logdetective 2.0.1__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. logdetective/extractors.py +134 -23
  2. logdetective/logdetective.py +39 -23
  3. logdetective/models.py +26 -0
  4. logdetective/prompts-summary-first.yml +0 -2
  5. logdetective/prompts.yml +0 -3
  6. logdetective/server/compressors.py +7 -10
  7. logdetective/server/config.py +3 -2
  8. logdetective/server/database/base.py +31 -26
  9. logdetective/server/database/models/__init__.py +2 -2
  10. logdetective/server/database/models/exceptions.py +4 -0
  11. logdetective/server/database/models/koji.py +47 -30
  12. logdetective/server/database/models/merge_request_jobs.py +205 -186
  13. logdetective/server/database/models/metrics.py +87 -61
  14. logdetective/server/emoji.py +57 -55
  15. logdetective/server/exceptions.py +4 -0
  16. logdetective/server/gitlab.py +18 -11
  17. logdetective/server/llm.py +19 -10
  18. logdetective/server/metric.py +18 -13
  19. logdetective/server/models.py +65 -48
  20. logdetective/server/plot.py +13 -11
  21. logdetective/server/server.py +52 -30
  22. logdetective/server/templates/base_response.html.j2 +59 -0
  23. logdetective/server/templates/gitlab_full_comment.md.j2 +58 -53
  24. logdetective/server/templates/gitlab_short_comment.md.j2 +52 -47
  25. logdetective/server/utils.py +15 -27
  26. logdetective/utils.py +115 -49
  27. {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/METADATA +95 -21
  28. logdetective-2.11.0.dist-info/RECORD +40 -0
  29. {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
  30. logdetective-2.0.1.dist-info/RECORD +0 -39
  31. {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
  32. {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,59 @@
1
+ <b>Log Detective report
2
+ {% if project %}for {{ project }}{% endif %}
3
+ </b>
4
+ <p>
5
+ Analysis:
6
+ <br>
7
+ {{ explanation }}
8
+ <br>
9
+ <details>
10
+ <summary>Analysis details</summary>
11
+ <ul>
12
+ {% for snippet in snippets %}
13
+ <li>
14
+ <b>Line {{ snippet.line_number }}:</b> <code>{{ snippet.text }}</code>
15
+ {{ snippet.explanation.text }}
16
+ </li>
17
+ {% endfor %}
18
+ </ul>
19
+ </details>
20
+ </p>
21
+ <br>
22
+ <details>
23
+ <summary>Help</summary>
24
+ <p>
25
+ Log Detective analyses logs using template mining algorithm and LLMs.
26
+ Providing overview of the primary issue, while highlighting potentially important log lines.
27
+ </p>
28
+ <p>
29
+ Log Detective may offer potential solution to problem.
30
+ However, it is designed to do so only when sufficiently confident in the response.
31
+ <br>
32
+ Even so, as with any LLM based AI system, recommendations of Log Detective must be taken with caution.
33
+ Before acting on any recommendations of Log Detective, consider following:
34
+ </p>
35
+ <ul>
36
+ <li>
37
+ <b>Do you understand how does the proposed solution work? And why should it solve your problem?</b>
38
+ </li>
39
+ <li>Did Log Detective work with sufficient information to make this suggestion?</li>
40
+ <li>Are the suggested actions safe in the context of your work?</li>
41
+ <li>Do results of snippet analysis align with results of final analysis?</li>
42
+ <li>Are there any potentially adverse outcomes from implementing proposed solution?</li>
43
+ </ul>
44
+ </details>
45
+ <br>
46
+ {% render_contacts %}
47
+ <details>
48
+ <summary>Contacts</summary>
49
+ <p>Don't hesitate to reach out with your questions or suggestions.</p>
50
+ <ul>
51
+ <li>
52
+ <a href="https://github.com/fedora-copr/logdetective">Source</a>
53
+ </li>
54
+ <li>
55
+ <a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a>
56
+ </li>
57
+ </ul>
58
+ </details>
59
+ {% endif %}
@@ -1,68 +1,73 @@
1
- The package {{ package }} failed to build, here is a possible explanation why.
2
-
1
+ The package {{ package }} build has experienced an issue.
3
2
  Please know that the explanation was provided by AI and may be incorrect.
4
3
  {% if certainty > 0 %}
5
- In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
4
+ In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
6
5
  {% endif %}
7
6
 
8
- {{ explanation }}
9
-
7
+ <details open>
8
+ <summary>Description</summary>
9
+ {{ explanation }}
10
+ </details>
10
11
  <details>
12
+ {#
13
+ Formatted so that we don't trigger GitLab markdown
14
+ #}
15
+ <summary>Snippets</summary>
11
16
  <ul>
12
- {% for snippet in snippets %}
13
- <li>
14
- <b>Line {{ snippet.line_number }}:</b> <code>{{ snippet.text }}</code>
15
- {{ snippet.explanation.text }}
16
- </li>
17
- {% endfor %}
17
+ {% for snippet in snippets -%}
18
+ <li><div><b>Line {{ snippet.line_number }}:</b> <code>{{ snippet.text | e }}</code><br>{{ snippet.explanation.text | e }}</div></li>
19
+ {%- endfor %}
18
20
  </ul>
19
21
  </details>
20
-
21
22
  <details>
22
- <summary>Logs</summary>
23
- <p>
24
- Log Detective analyzed the following logs files to provide an explanation:
25
- </p>
26
-
27
- <ul>
28
- <li><a href="{{ log_url }}">{{ log_url }}</a></li>
29
- </ul>
30
-
31
- <p>
32
- Additional logs are available from:
23
+ <summary>Logs</summary>
24
+ <p>Log Detective analyzed the following logs files to provide an explanation:</p>
33
25
  <ul>
34
- <li><a href="{{ artifacts_url }}">artifacts.zip</a></li>
35
- </ul>
36
- </p>
37
-
38
- <p>
39
- Please know that these log files are automatically removed after some
40
- time, so you might need a backup.
41
- </p>
26
+ <li>
27
+ <a href="{{ log_url }}">{{ log_url }}</a>
28
+ </li>
29
+ </ul>
30
+ <p>
31
+ Additional logs are available from:
32
+ <ul>
33
+ <li>
34
+ <a href="{{ artifacts_url }}">artifacts.zip</a>
35
+ </li>
36
+ </ul>
37
+ </p>
38
+ <p>
39
+ Please know that these log files are automatically removed after some
40
+ time, so you might need a backup.
41
+ </p>
42
42
  </details>
43
-
44
43
  <details>
45
- <summary>Help</summary>
46
- <p>Don't hesitate to reach out.</p>
47
-
48
- <ul>
49
- <li><a href="https://github.com/fedora-copr/logdetective">Upstream</a></li>
50
- <li><a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a></li>
51
- <li><a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a></li>
52
- <li><a href="https://log-detective.com/documentation">Documentation</a></li>
53
- </ul>
44
+ <summary>Help</summary>
45
+ <p>Don't hesitate to reach out.</p>
46
+ <ul>
47
+ <li>
48
+ <a href="https://github.com/fedora-copr/logdetective">Upstream</a>
49
+ </li>
50
+ <li>
51
+ <a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a>
52
+ </li>
53
+ <li>
54
+ <a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a>
55
+ </li>
56
+ <li>
57
+ <a href="https://log-detective.com/documentation">Documentation</a>
58
+ </li>
59
+ </ul>
54
60
  </details>
55
61
 
62
+ <hr>
56
63
 
57
- ---
58
- This comment was created by [Log Detective][log-detective].
59
-
60
- Was the provided feedback accurate and helpful? <br>Please vote with :thumbsup:
61
- or :thumbsdown: to help us improve.<br>
62
-
63
- <i>If this Log Detective report contains harmful content, please use the
64
- [Gitlab reporting feature for harmful content](https://docs.gitlab.com/user/report_abuse/)
65
- and contact the [Log Detective developers](https://github.com/fedora-copr/logdetective/issues).</i>
66
-
67
- [log-detective]: https://log-detective.com/
68
- [contact]: https://github.com/fedora-copr
64
+ This explanation was provided by AI (<a href="https://logdetective.com">Log Detective</a>).
65
+ Always review AI generated content prior to use.
66
+ Was the provided feedback accurate and helpful?
67
+ <br>
68
+ Please vote with :thumbsup:
69
+ or :thumbsdown: to help us improve.
70
+ <br>
71
+ <i>If this Log Detective report contains harmful content,
72
+ please use the <a href="https://docs.gitlab.com/user/report_abuse/">Gitlab reporting feature for harmful content</a>
73
+ and contact the <a href="https://github.com/fedora-copr/logdetective/issues">Log Detective developers</a>.</i>
@@ -1,57 +1,62 @@
1
- The package {{ package }} failed to build, here is a possible explanation why.
2
-
1
+ The package {{ package }} build has experienced an issue.
3
2
  Please know that the explanation was provided by AI and may be incorrect.
4
3
  {% if certainty > 0 %}
5
- In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
4
+ In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
6
5
  {% endif %}
7
6
 
8
- {{ explanation }}
9
-
7
+ <details open>
8
+ <summary>Description</summary>
9
+ {{ explanation }}
10
+ </details>
10
11
  <details>
11
- <summary>Logs</summary>
12
- <p>
13
- Log Detective analyzed the following logs files to provide an explanation:
14
- </p>
15
-
16
- <ul>
17
- <li><a href="{{ log_url }}">{{ log_url }}</a></li>
18
- </ul>
19
-
20
- <p>
21
- Additional logs are available from:
12
+ <summary>Logs</summary>
13
+ <p>Log Detective analyzed the following logs files to provide an explanation:</p>
22
14
  <ul>
23
- <li><a href="{{ artifacts_url }}">artifacts.zip</a></li>
24
- </ul>
25
- </p>
26
-
27
- <p>
28
- Please know that these log files are automatically removed after some
29
- time, so you might need a backup.
30
- </p>
15
+ <li>
16
+ <a href="{{ log_url }}">{{ log_url }}</a>
17
+ </li>
18
+ </ul>
19
+ <p>
20
+ Additional logs are available from:
21
+ <ul>
22
+ <li>
23
+ <a href="{{ artifacts_url }}">artifacts.zip</a>
24
+ </li>
25
+ </ul>
26
+ </p>
27
+ <p>
28
+ Please know that these log files are automatically removed after some
29
+ time, so you might need a backup.
30
+ </p>
31
31
  </details>
32
-
33
32
  <details>
34
- <summary>Help</summary>
35
- <p>Don't hesitate to reach out.</p>
36
-
37
- <ul>
38
- <li><a href="https://github.com/fedora-copr/logdetective">Upstream</a></li>
39
- <li><a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a></li>
40
- <li><a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a></li>
41
- <li><a href="https://log-detective.com/documentation">Documentation</a></li>
42
- </ul>
33
+ <summary>Help</summary>
34
+ <p>Don't hesitate to reach out.</p>
35
+ <ul>
36
+ <li>
37
+ <a href="https://github.com/fedora-copr/logdetective">Upstream</a>
38
+ </li>
39
+ <li>
40
+ <a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a>
41
+ </li>
42
+ <li>
43
+ <a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a>
44
+ </li>
45
+ <li>
46
+ <a href="https://log-detective.com/documentation">Documentation</a>
47
+ </li>
48
+ </ul>
43
49
  </details>
44
50
 
45
-
46
- ---
47
- This comment was created by [Log Detective][log-detective].
48
-
49
- Was the provided feedback accurate and helpful? <br>Please vote with :thumbsup:
50
- or :thumbsdown: to help us improve.<br>
51
-
52
- <i>If this Log Detective report contains harmful content, please use the
53
- [Gitlab reporting feature for harmful content](https://docs.gitlab.com/user/report_abuse/)
54
- and contact the [Log Detective developers](https://github.com/fedora-copr/logdetective/issues).</i>
55
-
56
- [log-detective]: https://log-detective.com/
57
- [contact]: https://github.com/fedora-copr
51
+ <hr>
52
+
53
+ This explanation was provided by AI (<a href="https://logdetective.com">Log Detective</a>).
54
+ Always review AI generated content prior to use.
55
+ Was the provided feedback accurate and helpful?
56
+ <br>
57
+ Please vote with :thumbsup:
58
+ or :thumbsdown: to help us improve.
59
+ <br>
60
+ <i>If this Log Detective report contains harmful content,
61
+ please use the <a href="https://docs.gitlab.com/user/report_abuse/">Gitlab reporting feature for harmful content</a>
62
+ and contact the <a href="https://github.com/fedora-copr/logdetective/issues">Log Detective developers</a>.</i>
@@ -1,15 +1,11 @@
1
- from typing import List, Tuple
1
+ from typing import List
2
+ from importlib.metadata import version
2
3
 
3
4
  import aiohttp
4
5
  from fastapi import HTTPException
5
6
 
6
7
  from logdetective.constants import SNIPPET_DELIMITER
7
- from logdetective.extractors import DrainExtractor
8
- from logdetective.server.config import (
9
- LOG,
10
- SERVER_CONFIG,
11
- SKIP_SNIPPETS_CONFIG,
12
- )
8
+ from logdetective.server.config import LOG
13
9
  from logdetective.server.exceptions import LogDetectiveConnectionError
14
10
  from logdetective.server.models import AnalyzedSnippet, RatedSnippetAnalysis
15
11
 
@@ -22,26 +18,6 @@ def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
22
18
  return summary
23
19
 
24
20
 
25
- def mine_logs(log: str) -> List[Tuple[int, str]]:
26
- """Extract snippets from log text"""
27
- extractor = DrainExtractor(
28
- verbose=True,
29
- context=True,
30
- max_clusters=SERVER_CONFIG.extractor.max_clusters,
31
- skip_snippets=SKIP_SNIPPETS_CONFIG,
32
- max_snippet_len=SERVER_CONFIG.extractor.max_snippet_len
33
- )
34
-
35
- LOG.info("Getting summary")
36
- log_summary = extractor(log)
37
-
38
- ratio = len(log_summary) / len(log.split("\n"))
39
- LOG.debug("Log summary: \n %s", log_summary)
40
- LOG.info("Compression ratio: %s", ratio)
41
-
42
- return log_summary
43
-
44
-
45
21
  def connection_error_giveup(details: dict) -> None:
46
22
  """Too many connection errors, give up.
47
23
  """
@@ -120,3 +96,15 @@ def filter_snippets(
120
96
  processed_snippets = sorted(processed_snippets, key=select_line_number)
121
97
 
122
98
  return processed_snippets
99
+
100
+
101
+ def construct_final_prompt(formatted_snippets: str, prompt_template: str) -> str:
102
+ """Create final prompt from processed snippets and csgrep output, if it is available."""
103
+
104
+ final_prompt = prompt_template.format(formatted_snippets)
105
+ return final_prompt
106
+
107
+
108
+ def get_version() -> str:
109
+ """Obtain the version number using importlib"""
110
+ return version('logdetective')
logdetective/utils.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import os
3
+ import subprocess as sp
3
4
  from typing import Iterator, List, Dict, Tuple, Generator
4
5
  from urllib.parse import urlparse
5
6
 
@@ -7,60 +8,73 @@ import aiohttp
7
8
  import numpy as np
8
9
  import yaml
9
10
 
10
- from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
11
+ from llama_cpp import (
12
+ Llama,
13
+ CreateChatCompletionResponse,
14
+ CreateChatCompletionStreamResponse,
15
+ )
16
+ from logdetective.constants import SNIPPET_DELIMITER
11
17
  from logdetective.models import PromptConfig, SkipSnippets
12
18
  from logdetective.remote_log import RemoteLog
13
19
 
14
-
15
20
  LOG = logging.getLogger("logdetective")
16
21
 
17
22
 
18
- def chunk_continues(text: str, index: int) -> bool:
23
+ def new_message(text: str) -> bool:
19
24
  """Set of heuristics for determining whether or not
20
25
  does the current chunk of log text continue on next line.
21
26
 
22
27
  Following rules are checked, in order:
23
- * is the next character is whitespace
24
- * is the previous character backslash '\\'
25
- * is the previous character colon ':'
26
-
28
+ * is the first character is whitespace
29
+ * is the first character backslash '|'
27
30
  """
28
31
  conditionals = [
29
- lambda i, string: string[i + 1].isspace(),
30
- lambda i, string: string[i - 1] == "\\",
31
- lambda i, string: string[i - 1] == ":",
32
+ lambda string: string[0].isspace(),
33
+ lambda string: string[0] == "|",
32
34
  ]
33
35
 
34
36
  for c in conditionals:
35
- y = c(index, text)
37
+ y = c(text)
36
38
  if y:
37
- return True
39
+ return False
38
40
 
39
- return False
41
+ return True
40
42
 
41
43
 
42
- def get_chunks(text: str, max_len: int = 2000) -> Generator[Tuple[int, str], None, None]:
44
+ def get_chunks(
45
+ text: str, max_chunk_len: int = 2000
46
+ ) -> Generator[Tuple[int, str], None, None]:
43
47
  """Split log into chunks according to heuristic
44
48
  based on whitespace and backslash presence.
45
49
  """
46
- text_len = len(text)
47
- i = 0
50
+ lines = text.splitlines()
51
+
52
+ # Chunk we will be yielding
48
53
  chunk = ""
49
- # Keep track of the original and next line number
50
- # every `\n` hit increases the next_line_number by one.
51
- original_line_number = 0
52
- next_line_number = 0
53
- while i < text_len:
54
- chunk += text[i]
55
- if text[i] == "\n":
56
- next_line_number += 1
57
- if i + 1 < text_len and chunk_continues(text, i) and i + 1 < max_len:
58
- i += 1
59
- continue
60
- yield (original_line_number, chunk)
61
- original_line_number = next_line_number + 1
62
- chunk = ""
63
- i += 1
54
+ # Number of line where the message started
55
+ original_line = 0
56
+ for i, line in enumerate(lines):
57
+ if len(line) == 0:
58
+ continue
59
+ if new_message(line):
60
+ # Yield chunk if we have it
61
+ if len(chunk) > 0:
62
+ yield (original_line, chunk)
63
+ original_line = i
64
+ chunk = line
65
+ else:
66
+ chunk += "\n" + line
67
+ if len(chunk) > max_chunk_len:
68
+ # If the chunk is too long, keep splitting into smaller chunks
69
+ # until we reach manageable size
70
+ while len(chunk) > max_chunk_len:
71
+ remainder = chunk[max_chunk_len:]
72
+ chunk = chunk[:max_chunk_len]
73
+ yield (original_line, chunk)
74
+ chunk = remainder
75
+
76
+ # if we still have some text left over
77
+ yield (original_line, chunk)
64
78
 
65
79
 
66
80
  def initialize_model(
@@ -113,8 +127,8 @@ def compute_certainty(probs: List[Dict]) -> float:
113
127
 
114
128
 
115
129
  def process_log(
116
- log: str, model: Llama, stream: bool, prompt_template: str, temperature: float
117
- ) -> CreateCompletionResponse | Iterator[CreateCompletionStreamResponse]:
130
+ log: str, model: Llama, stream: bool, prompt_templates: PromptConfig, temperature: float
131
+ ) -> CreateChatCompletionResponse | Iterator[CreateChatCompletionStreamResponse]:
118
132
  """Processes a given log using the provided language model and returns its summary.
119
133
 
120
134
  Args:
@@ -126,11 +140,23 @@ def process_log(
126
140
  Returns:
127
141
  str: The summary of the given log generated by the language model.
128
142
  """
129
- response = model(
130
- prompt=prompt_template.format(log),
143
+ messages = [
144
+ {
145
+ "role": "system",
146
+ "content": prompt_templates.default_system_prompt
147
+ },
148
+ {
149
+ "role": "user",
150
+ "content": prompt_templates.prompt_template.format(log)
151
+ },
152
+ ]
153
+
154
+ response = model.create_chat_completion(
155
+ messages=messages,
131
156
  stream=stream,
132
157
  max_tokens=0,
133
- logprobs=1,
158
+ logprobs=True,
159
+ top_logprobs=1,
134
160
  temperature=temperature,
135
161
  )
136
162
 
@@ -166,22 +192,21 @@ def format_snippets(snippets: list[str] | list[Tuple[int, str]]) -> str:
166
192
  Line number must be first element in the tuple. Mixed format of snippets
167
193
  is permitted, but may have impact on inference.
168
194
  """
169
- summary = ""
195
+ summary = "\n"
170
196
  for i, s in enumerate(snippets):
171
197
  if isinstance(s, tuple):
172
- summary += f"""
173
- Snippet No. {i} at line #{s[0]}:
174
-
175
- {s[1]}
176
- ================
177
- """
198
+ line_number, snippet_content = s
199
+ header = f"Snippet No. {i} at line #{line_number}:"
178
200
  else:
179
- summary += f"""
180
- Snippet No. {i}:
181
-
182
- {s}
183
- ================
184
- """
201
+ header = f"Snippet No. {i}:"
202
+ snippet_content = s
203
+ summary += (
204
+ f"{header}\n"
205
+ "\n"
206
+ f"{snippet_content}\n"
207
+ f"{SNIPPET_DELIMITER}\n"
208
+ f"\n"
209
+ )
185
210
  return summary
186
211
 
187
212
 
@@ -247,3 +272,44 @@ def load_skip_snippet_patterns(path: str | None) -> SkipSnippets:
247
272
  raise e
248
273
 
249
274
  return SkipSnippets({})
275
+
276
+
277
+ def check_csgrep() -> bool:
278
+ """Verifies presence of csgrep in path"""
279
+ try:
280
+ result = sp.run(
281
+ ["csgrep", "--version"],
282
+ text=True,
283
+ check=True,
284
+ shell=False,
285
+ capture_output=True,
286
+ timeout=1.0,
287
+ )
288
+ except (FileNotFoundError, sp.TimeoutExpired, sp.CalledProcessError) as ex:
289
+ LOG.error("Required binary `csgrep` was not found in path: %s", ex)
290
+ return False
291
+ if result.returncode == 0:
292
+ return True
293
+ LOG.error("Issue was encountered while calling `csgrep`: `%s`", result.stderr)
294
+
295
+ return False
296
+
297
+
298
+ def mine_logs(log: str, extractors: list) -> List[Tuple[int, str]]:
299
+ """Extract snippets from log text using extractors provided.
300
+ Each extractor is applied in turn on original log.
301
+ Depending on characteristics of extractors used, there may be
302
+ an overlap in snippets extracted."""
303
+
304
+ log_summary = []
305
+
306
+ LOG.info("Getting summary")
307
+
308
+ for extractor in extractors:
309
+ log_summary.extend(extractor(log))
310
+
311
+ ratio = len("\n".join([text for _, text in log_summary])) / len(log)
312
+ LOG.debug("Log summary: \n %s", log_summary)
313
+ LOG.info("Snippets: %s Compression ratio: %s", len(log_summary), ratio)
314
+
315
+ return log_summary