logdetective 2.0.1__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/extractors.py +134 -23
- logdetective/logdetective.py +39 -23
- logdetective/models.py +26 -0
- logdetective/prompts-summary-first.yml +0 -2
- logdetective/prompts.yml +0 -3
- logdetective/server/compressors.py +7 -10
- logdetective/server/config.py +3 -2
- logdetective/server/database/base.py +31 -26
- logdetective/server/database/models/__init__.py +2 -2
- logdetective/server/database/models/exceptions.py +4 -0
- logdetective/server/database/models/koji.py +47 -30
- logdetective/server/database/models/merge_request_jobs.py +205 -186
- logdetective/server/database/models/metrics.py +87 -61
- logdetective/server/emoji.py +57 -55
- logdetective/server/exceptions.py +4 -0
- logdetective/server/gitlab.py +18 -11
- logdetective/server/llm.py +19 -10
- logdetective/server/metric.py +18 -13
- logdetective/server/models.py +65 -48
- logdetective/server/plot.py +13 -11
- logdetective/server/server.py +52 -30
- logdetective/server/templates/base_response.html.j2 +59 -0
- logdetective/server/templates/gitlab_full_comment.md.j2 +58 -53
- logdetective/server/templates/gitlab_short_comment.md.j2 +52 -47
- logdetective/server/utils.py +15 -27
- logdetective/utils.py +115 -49
- {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/METADATA +95 -21
- logdetective-2.11.0.dist-info/RECORD +40 -0
- {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
- logdetective-2.0.1.dist-info/RECORD +0 -39
- {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
- {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
<b>Log Detective report
|
|
2
|
+
{% if project %}for {{ project }}{% endif %}
|
|
3
|
+
</b>
|
|
4
|
+
<p>
|
|
5
|
+
Analysis:
|
|
6
|
+
<br>
|
|
7
|
+
{{ explanation }}
|
|
8
|
+
<br>
|
|
9
|
+
<details>
|
|
10
|
+
<summary>Analysis details</summary>
|
|
11
|
+
<ul>
|
|
12
|
+
{% for snippet in snippets %}
|
|
13
|
+
<li>
|
|
14
|
+
<b>Line {{ snippet.line_number }}:</b> <code>{{ snippet.text }}</code>
|
|
15
|
+
{{ snippet.explanation.text }}
|
|
16
|
+
</li>
|
|
17
|
+
{% endfor %}
|
|
18
|
+
</ul>
|
|
19
|
+
</details>
|
|
20
|
+
</p>
|
|
21
|
+
<br>
|
|
22
|
+
<details>
|
|
23
|
+
<summary>Help</summary>
|
|
24
|
+
<p>
|
|
25
|
+
Log Detective analyses logs using template mining algorithm and LLMs.
|
|
26
|
+
Providing overview of the primary issue, while highlighting potentially important log lines.
|
|
27
|
+
</p>
|
|
28
|
+
<p>
|
|
29
|
+
Log Detective may offer potential solution to problem.
|
|
30
|
+
However, it is designed to do so only when sufficiently confident in the response.
|
|
31
|
+
<br>
|
|
32
|
+
Even so, as with any LLM based AI system, recommendations of Log Detective must be taken with caution.
|
|
33
|
+
Before acting on any recommendations of Log Detective, consider following:
|
|
34
|
+
</p>
|
|
35
|
+
<ul>
|
|
36
|
+
<li>
|
|
37
|
+
<b>Do you understand how does the proposed solution work? And why should it solve your problem?</b>
|
|
38
|
+
</li>
|
|
39
|
+
<li>Did Log Detective work with sufficient information to make this suggestion?</li>
|
|
40
|
+
<li>Are the suggested actions safe in the context of your work?</li>
|
|
41
|
+
<li>Do results of snippet analysis align with results of final analysis?</li>
|
|
42
|
+
<li>Are there any potentially adverse outcomes from implementing proposed solution?</li>
|
|
43
|
+
</ul>
|
|
44
|
+
</details>
|
|
45
|
+
<br>
|
|
46
|
+
{% render_contacts %}
|
|
47
|
+
<details>
|
|
48
|
+
<summary>Contacts</summary>
|
|
49
|
+
<p>Don't hesitate to reach out with your questions or suggestions.</p>
|
|
50
|
+
<ul>
|
|
51
|
+
<li>
|
|
52
|
+
<a href="https://github.com/fedora-copr/logdetective">Source</a>
|
|
53
|
+
</li>
|
|
54
|
+
<li>
|
|
55
|
+
<a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a>
|
|
56
|
+
</li>
|
|
57
|
+
</ul>
|
|
58
|
+
</details>
|
|
59
|
+
{% endif %}
|
|
@@ -1,68 +1,73 @@
|
|
|
1
|
-
The package {{ package }}
|
|
2
|
-
|
|
1
|
+
The package {{ package }} build has experienced an issue.
|
|
3
2
|
Please know that the explanation was provided by AI and may be incorrect.
|
|
4
3
|
{% if certainty > 0 %}
|
|
5
|
-
In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
|
|
4
|
+
In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
|
|
6
5
|
{% endif %}
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
<details open>
|
|
8
|
+
<summary>Description</summary>
|
|
9
|
+
{{ explanation }}
|
|
10
|
+
</details>
|
|
10
11
|
<details>
|
|
12
|
+
{#
|
|
13
|
+
Formatted so that we don't trigger GitLab markdown
|
|
14
|
+
#}
|
|
15
|
+
<summary>Snippets</summary>
|
|
11
16
|
<ul>
|
|
12
|
-
{% for snippet in snippets
|
|
13
|
-
<li>
|
|
14
|
-
|
|
15
|
-
{{ snippet.explanation.text }}
|
|
16
|
-
</li>
|
|
17
|
-
{% endfor %}
|
|
17
|
+
{% for snippet in snippets -%}
|
|
18
|
+
<li><div><b>Line {{ snippet.line_number }}:</b> <code>{{ snippet.text | e }}</code><br>{{ snippet.explanation.text | e }}</div></li>
|
|
19
|
+
{%- endfor %}
|
|
18
20
|
</ul>
|
|
19
21
|
</details>
|
|
20
|
-
|
|
21
22
|
<details>
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
Log Detective analyzed the following logs files to provide an explanation:
|
|
25
|
-
</p>
|
|
26
|
-
|
|
27
|
-
<ul>
|
|
28
|
-
<li><a href="{{ log_url }}">{{ log_url }}</a></li>
|
|
29
|
-
</ul>
|
|
30
|
-
|
|
31
|
-
<p>
|
|
32
|
-
Additional logs are available from:
|
|
23
|
+
<summary>Logs</summary>
|
|
24
|
+
<p>Log Detective analyzed the following logs files to provide an explanation:</p>
|
|
33
25
|
<ul>
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
26
|
+
<li>
|
|
27
|
+
<a href="{{ log_url }}">{{ log_url }}</a>
|
|
28
|
+
</li>
|
|
29
|
+
</ul>
|
|
30
|
+
<p>
|
|
31
|
+
Additional logs are available from:
|
|
32
|
+
<ul>
|
|
33
|
+
<li>
|
|
34
|
+
<a href="{{ artifacts_url }}">artifacts.zip</a>
|
|
35
|
+
</li>
|
|
36
|
+
</ul>
|
|
37
|
+
</p>
|
|
38
|
+
<p>
|
|
39
|
+
Please know that these log files are automatically removed after some
|
|
40
|
+
time, so you might need a backup.
|
|
41
|
+
</p>
|
|
42
42
|
</details>
|
|
43
|
-
|
|
44
43
|
<details>
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
44
|
+
<summary>Help</summary>
|
|
45
|
+
<p>Don't hesitate to reach out.</p>
|
|
46
|
+
<ul>
|
|
47
|
+
<li>
|
|
48
|
+
<a href="https://github.com/fedora-copr/logdetective">Upstream</a>
|
|
49
|
+
</li>
|
|
50
|
+
<li>
|
|
51
|
+
<a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a>
|
|
52
|
+
</li>
|
|
53
|
+
<li>
|
|
54
|
+
<a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a>
|
|
55
|
+
</li>
|
|
56
|
+
<li>
|
|
57
|
+
<a href="https://log-detective.com/documentation">Documentation</a>
|
|
58
|
+
</li>
|
|
59
|
+
</ul>
|
|
54
60
|
</details>
|
|
55
61
|
|
|
62
|
+
<hr>
|
|
56
63
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
<
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
[log-detective]: https://log-detective.com/
|
|
68
|
-
[contact]: https://github.com/fedora-copr
|
|
64
|
+
This explanation was provided by AI (<a href="https://logdetective.com">Log Detective</a>).
|
|
65
|
+
Always review AI generated content prior to use.
|
|
66
|
+
Was the provided feedback accurate and helpful?
|
|
67
|
+
<br>
|
|
68
|
+
Please vote with :thumbsup:
|
|
69
|
+
or :thumbsdown: to help us improve.
|
|
70
|
+
<br>
|
|
71
|
+
<i>If this Log Detective report contains harmful content,
|
|
72
|
+
please use the <a href="https://docs.gitlab.com/user/report_abuse/">Gitlab reporting feature for harmful content</a>
|
|
73
|
+
and contact the <a href="https://github.com/fedora-copr/logdetective/issues">Log Detective developers</a>.</i>
|
|
@@ -1,57 +1,62 @@
|
|
|
1
|
-
The package {{ package }}
|
|
2
|
-
|
|
1
|
+
The package {{ package }} build has experienced an issue.
|
|
3
2
|
Please know that the explanation was provided by AI and may be incorrect.
|
|
4
3
|
{% if certainty > 0 %}
|
|
5
|
-
In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
|
|
4
|
+
In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
|
|
6
5
|
{% endif %}
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
<details open>
|
|
8
|
+
<summary>Description</summary>
|
|
9
|
+
{{ explanation }}
|
|
10
|
+
</details>
|
|
10
11
|
<details>
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
Log Detective analyzed the following logs files to provide an explanation:
|
|
14
|
-
</p>
|
|
15
|
-
|
|
16
|
-
<ul>
|
|
17
|
-
<li><a href="{{ log_url }}">{{ log_url }}</a></li>
|
|
18
|
-
</ul>
|
|
19
|
-
|
|
20
|
-
<p>
|
|
21
|
-
Additional logs are available from:
|
|
12
|
+
<summary>Logs</summary>
|
|
13
|
+
<p>Log Detective analyzed the following logs files to provide an explanation:</p>
|
|
22
14
|
<ul>
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
15
|
+
<li>
|
|
16
|
+
<a href="{{ log_url }}">{{ log_url }}</a>
|
|
17
|
+
</li>
|
|
18
|
+
</ul>
|
|
19
|
+
<p>
|
|
20
|
+
Additional logs are available from:
|
|
21
|
+
<ul>
|
|
22
|
+
<li>
|
|
23
|
+
<a href="{{ artifacts_url }}">artifacts.zip</a>
|
|
24
|
+
</li>
|
|
25
|
+
</ul>
|
|
26
|
+
</p>
|
|
27
|
+
<p>
|
|
28
|
+
Please know that these log files are automatically removed after some
|
|
29
|
+
time, so you might need a backup.
|
|
30
|
+
</p>
|
|
31
31
|
</details>
|
|
32
|
-
|
|
33
32
|
<details>
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
33
|
+
<summary>Help</summary>
|
|
34
|
+
<p>Don't hesitate to reach out.</p>
|
|
35
|
+
<ul>
|
|
36
|
+
<li>
|
|
37
|
+
<a href="https://github.com/fedora-copr/logdetective">Upstream</a>
|
|
38
|
+
</li>
|
|
39
|
+
<li>
|
|
40
|
+
<a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a>
|
|
41
|
+
</li>
|
|
42
|
+
<li>
|
|
43
|
+
<a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a>
|
|
44
|
+
</li>
|
|
45
|
+
<li>
|
|
46
|
+
<a href="https://log-detective.com/documentation">Documentation</a>
|
|
47
|
+
</li>
|
|
48
|
+
</ul>
|
|
43
49
|
</details>
|
|
44
50
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
This
|
|
48
|
-
|
|
49
|
-
Was the provided feedback accurate and helpful?
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
[contact]: https://github.com/fedora-copr
|
|
51
|
+
<hr>
|
|
52
|
+
|
|
53
|
+
This explanation was provided by AI (<a href="https://logdetective.com">Log Detective</a>).
|
|
54
|
+
Always review AI generated content prior to use.
|
|
55
|
+
Was the provided feedback accurate and helpful?
|
|
56
|
+
<br>
|
|
57
|
+
Please vote with :thumbsup:
|
|
58
|
+
or :thumbsdown: to help us improve.
|
|
59
|
+
<br>
|
|
60
|
+
<i>If this Log Detective report contains harmful content,
|
|
61
|
+
please use the <a href="https://docs.gitlab.com/user/report_abuse/">Gitlab reporting feature for harmful content</a>
|
|
62
|
+
and contact the <a href="https://github.com/fedora-copr/logdetective/issues">Log Detective developers</a>.</i>
|
logdetective/server/utils.py
CHANGED
|
@@ -1,15 +1,11 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List
|
|
2
|
+
from importlib.metadata import version
|
|
2
3
|
|
|
3
4
|
import aiohttp
|
|
4
5
|
from fastapi import HTTPException
|
|
5
6
|
|
|
6
7
|
from logdetective.constants import SNIPPET_DELIMITER
|
|
7
|
-
from logdetective.
|
|
8
|
-
from logdetective.server.config import (
|
|
9
|
-
LOG,
|
|
10
|
-
SERVER_CONFIG,
|
|
11
|
-
SKIP_SNIPPETS_CONFIG,
|
|
12
|
-
)
|
|
8
|
+
from logdetective.server.config import LOG
|
|
13
9
|
from logdetective.server.exceptions import LogDetectiveConnectionError
|
|
14
10
|
from logdetective.server.models import AnalyzedSnippet, RatedSnippetAnalysis
|
|
15
11
|
|
|
@@ -22,26 +18,6 @@ def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
|
|
|
22
18
|
return summary
|
|
23
19
|
|
|
24
20
|
|
|
25
|
-
def mine_logs(log: str) -> List[Tuple[int, str]]:
|
|
26
|
-
"""Extract snippets from log text"""
|
|
27
|
-
extractor = DrainExtractor(
|
|
28
|
-
verbose=True,
|
|
29
|
-
context=True,
|
|
30
|
-
max_clusters=SERVER_CONFIG.extractor.max_clusters,
|
|
31
|
-
skip_snippets=SKIP_SNIPPETS_CONFIG,
|
|
32
|
-
max_snippet_len=SERVER_CONFIG.extractor.max_snippet_len
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
LOG.info("Getting summary")
|
|
36
|
-
log_summary = extractor(log)
|
|
37
|
-
|
|
38
|
-
ratio = len(log_summary) / len(log.split("\n"))
|
|
39
|
-
LOG.debug("Log summary: \n %s", log_summary)
|
|
40
|
-
LOG.info("Compression ratio: %s", ratio)
|
|
41
|
-
|
|
42
|
-
return log_summary
|
|
43
|
-
|
|
44
|
-
|
|
45
21
|
def connection_error_giveup(details: dict) -> None:
|
|
46
22
|
"""Too many connection errors, give up.
|
|
47
23
|
"""
|
|
@@ -120,3 +96,15 @@ def filter_snippets(
|
|
|
120
96
|
processed_snippets = sorted(processed_snippets, key=select_line_number)
|
|
121
97
|
|
|
122
98
|
return processed_snippets
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def construct_final_prompt(formatted_snippets: str, prompt_template: str) -> str:
|
|
102
|
+
"""Create final prompt from processed snippets and csgrep output, if it is available."""
|
|
103
|
+
|
|
104
|
+
final_prompt = prompt_template.format(formatted_snippets)
|
|
105
|
+
return final_prompt
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_version() -> str:
|
|
109
|
+
"""Obtain the version number using importlib"""
|
|
110
|
+
return version('logdetective')
|
logdetective/utils.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
+
import subprocess as sp
|
|
3
4
|
from typing import Iterator, List, Dict, Tuple, Generator
|
|
4
5
|
from urllib.parse import urlparse
|
|
5
6
|
|
|
@@ -7,60 +8,73 @@ import aiohttp
|
|
|
7
8
|
import numpy as np
|
|
8
9
|
import yaml
|
|
9
10
|
|
|
10
|
-
from llama_cpp import
|
|
11
|
+
from llama_cpp import (
|
|
12
|
+
Llama,
|
|
13
|
+
CreateChatCompletionResponse,
|
|
14
|
+
CreateChatCompletionStreamResponse,
|
|
15
|
+
)
|
|
16
|
+
from logdetective.constants import SNIPPET_DELIMITER
|
|
11
17
|
from logdetective.models import PromptConfig, SkipSnippets
|
|
12
18
|
from logdetective.remote_log import RemoteLog
|
|
13
19
|
|
|
14
|
-
|
|
15
20
|
LOG = logging.getLogger("logdetective")
|
|
16
21
|
|
|
17
22
|
|
|
18
|
-
def
|
|
23
|
+
def new_message(text: str) -> bool:
|
|
19
24
|
"""Set of heuristics for determining whether or not
|
|
20
25
|
does the current chunk of log text continue on next line.
|
|
21
26
|
|
|
22
27
|
Following rules are checked, in order:
|
|
23
|
-
* is the
|
|
24
|
-
* is the
|
|
25
|
-
* is the previous character colon ':'
|
|
26
|
-
|
|
28
|
+
* is the first character is whitespace
|
|
29
|
+
* is the first character backslash '|'
|
|
27
30
|
"""
|
|
28
31
|
conditionals = [
|
|
29
|
-
lambda
|
|
30
|
-
lambda
|
|
31
|
-
lambda i, string: string[i - 1] == ":",
|
|
32
|
+
lambda string: string[0].isspace(),
|
|
33
|
+
lambda string: string[0] == "|",
|
|
32
34
|
]
|
|
33
35
|
|
|
34
36
|
for c in conditionals:
|
|
35
|
-
y = c(
|
|
37
|
+
y = c(text)
|
|
36
38
|
if y:
|
|
37
|
-
return
|
|
39
|
+
return False
|
|
38
40
|
|
|
39
|
-
return
|
|
41
|
+
return True
|
|
40
42
|
|
|
41
43
|
|
|
42
|
-
def get_chunks(
|
|
44
|
+
def get_chunks(
|
|
45
|
+
text: str, max_chunk_len: int = 2000
|
|
46
|
+
) -> Generator[Tuple[int, str], None, None]:
|
|
43
47
|
"""Split log into chunks according to heuristic
|
|
44
48
|
based on whitespace and backslash presence.
|
|
45
49
|
"""
|
|
46
|
-
|
|
47
|
-
|
|
50
|
+
lines = text.splitlines()
|
|
51
|
+
|
|
52
|
+
# Chunk we will be yielding
|
|
48
53
|
chunk = ""
|
|
49
|
-
#
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
54
|
+
# Number of line where the message started
|
|
55
|
+
original_line = 0
|
|
56
|
+
for i, line in enumerate(lines):
|
|
57
|
+
if len(line) == 0:
|
|
58
|
+
continue
|
|
59
|
+
if new_message(line):
|
|
60
|
+
# Yield chunk if we have it
|
|
61
|
+
if len(chunk) > 0:
|
|
62
|
+
yield (original_line, chunk)
|
|
63
|
+
original_line = i
|
|
64
|
+
chunk = line
|
|
65
|
+
else:
|
|
66
|
+
chunk += "\n" + line
|
|
67
|
+
if len(chunk) > max_chunk_len:
|
|
68
|
+
# If the chunk is too long, keep splitting into smaller chunks
|
|
69
|
+
# until we reach manageable size
|
|
70
|
+
while len(chunk) > max_chunk_len:
|
|
71
|
+
remainder = chunk[max_chunk_len:]
|
|
72
|
+
chunk = chunk[:max_chunk_len]
|
|
73
|
+
yield (original_line, chunk)
|
|
74
|
+
chunk = remainder
|
|
75
|
+
|
|
76
|
+
# if we still have some text left over
|
|
77
|
+
yield (original_line, chunk)
|
|
64
78
|
|
|
65
79
|
|
|
66
80
|
def initialize_model(
|
|
@@ -113,8 +127,8 @@ def compute_certainty(probs: List[Dict]) -> float:
|
|
|
113
127
|
|
|
114
128
|
|
|
115
129
|
def process_log(
|
|
116
|
-
log: str, model: Llama, stream: bool,
|
|
117
|
-
) ->
|
|
130
|
+
log: str, model: Llama, stream: bool, prompt_templates: PromptConfig, temperature: float
|
|
131
|
+
) -> CreateChatCompletionResponse | Iterator[CreateChatCompletionStreamResponse]:
|
|
118
132
|
"""Processes a given log using the provided language model and returns its summary.
|
|
119
133
|
|
|
120
134
|
Args:
|
|
@@ -126,11 +140,23 @@ def process_log(
|
|
|
126
140
|
Returns:
|
|
127
141
|
str: The summary of the given log generated by the language model.
|
|
128
142
|
"""
|
|
129
|
-
|
|
130
|
-
|
|
143
|
+
messages = [
|
|
144
|
+
{
|
|
145
|
+
"role": "system",
|
|
146
|
+
"content": prompt_templates.default_system_prompt
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
"role": "user",
|
|
150
|
+
"content": prompt_templates.prompt_template.format(log)
|
|
151
|
+
},
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
response = model.create_chat_completion(
|
|
155
|
+
messages=messages,
|
|
131
156
|
stream=stream,
|
|
132
157
|
max_tokens=0,
|
|
133
|
-
logprobs=
|
|
158
|
+
logprobs=True,
|
|
159
|
+
top_logprobs=1,
|
|
134
160
|
temperature=temperature,
|
|
135
161
|
)
|
|
136
162
|
|
|
@@ -166,22 +192,21 @@ def format_snippets(snippets: list[str] | list[Tuple[int, str]]) -> str:
|
|
|
166
192
|
Line number must be first element in the tuple. Mixed format of snippets
|
|
167
193
|
is permitted, but may have impact on inference.
|
|
168
194
|
"""
|
|
169
|
-
summary = ""
|
|
195
|
+
summary = "\n"
|
|
170
196
|
for i, s in enumerate(snippets):
|
|
171
197
|
if isinstance(s, tuple):
|
|
172
|
-
|
|
173
|
-
Snippet No. {i} at line #{
|
|
174
|
-
|
|
175
|
-
{s[1]}
|
|
176
|
-
================
|
|
177
|
-
"""
|
|
198
|
+
line_number, snippet_content = s
|
|
199
|
+
header = f"Snippet No. {i} at line #{line_number}:"
|
|
178
200
|
else:
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
{
|
|
183
|
-
|
|
184
|
-
""
|
|
201
|
+
header = f"Snippet No. {i}:"
|
|
202
|
+
snippet_content = s
|
|
203
|
+
summary += (
|
|
204
|
+
f"{header}\n"
|
|
205
|
+
"\n"
|
|
206
|
+
f"{snippet_content}\n"
|
|
207
|
+
f"{SNIPPET_DELIMITER}\n"
|
|
208
|
+
f"\n"
|
|
209
|
+
)
|
|
185
210
|
return summary
|
|
186
211
|
|
|
187
212
|
|
|
@@ -247,3 +272,44 @@ def load_skip_snippet_patterns(path: str | None) -> SkipSnippets:
|
|
|
247
272
|
raise e
|
|
248
273
|
|
|
249
274
|
return SkipSnippets({})
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def check_csgrep() -> bool:
|
|
278
|
+
"""Verifies presence of csgrep in path"""
|
|
279
|
+
try:
|
|
280
|
+
result = sp.run(
|
|
281
|
+
["csgrep", "--version"],
|
|
282
|
+
text=True,
|
|
283
|
+
check=True,
|
|
284
|
+
shell=False,
|
|
285
|
+
capture_output=True,
|
|
286
|
+
timeout=1.0,
|
|
287
|
+
)
|
|
288
|
+
except (FileNotFoundError, sp.TimeoutExpired, sp.CalledProcessError) as ex:
|
|
289
|
+
LOG.error("Required binary `csgrep` was not found in path: %s", ex)
|
|
290
|
+
return False
|
|
291
|
+
if result.returncode == 0:
|
|
292
|
+
return True
|
|
293
|
+
LOG.error("Issue was encountered while calling `csgrep`: `%s`", result.stderr)
|
|
294
|
+
|
|
295
|
+
return False
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def mine_logs(log: str, extractors: list) -> List[Tuple[int, str]]:
|
|
299
|
+
"""Extract snippets from log text using extractors provided.
|
|
300
|
+
Each extractor is applied in turn on original log.
|
|
301
|
+
Depending on characteristics of extractors used, there may be
|
|
302
|
+
an overlap in snippets extracted."""
|
|
303
|
+
|
|
304
|
+
log_summary = []
|
|
305
|
+
|
|
306
|
+
LOG.info("Getting summary")
|
|
307
|
+
|
|
308
|
+
for extractor in extractors:
|
|
309
|
+
log_summary.extend(extractor(log))
|
|
310
|
+
|
|
311
|
+
ratio = len("\n".join([text for _, text in log_summary])) / len(log)
|
|
312
|
+
LOG.debug("Log summary: \n %s", log_summary)
|
|
313
|
+
LOG.info("Snippets: %s Compression ratio: %s", len(log_summary), ratio)
|
|
314
|
+
|
|
315
|
+
return log_summary
|