logdetective 1.4.0__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {logdetective-1.4.0 → logdetective-1.5.0}/PKG-INFO +7 -4
- {logdetective-1.4.0 → logdetective-1.5.0}/README.md +5 -2
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/constants.py +0 -11
- logdetective-1.5.0/logdetective/extractors.py +42 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/logdetective.py +19 -22
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/models.py +0 -5
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/prompts.yml +0 -11
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/remote_log.py +1 -3
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/config.py +3 -4
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/emoji.py +3 -1
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/plot.py +1 -1
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/utils.py +9 -13
- {logdetective-1.4.0 → logdetective-1.5.0}/pyproject.toml +2 -2
- logdetective-1.4.0/logdetective/extractors.py +0 -105
- {logdetective-1.4.0 → logdetective-1.5.0}/LICENSE +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/__init__.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/drain3.ini +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/prompts-summary-first.yml +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/prompts-summary-only.yml +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/__init__.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/compressors.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/database/__init__.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/database/base.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/database/models/__init__.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/database/models/merge_request_jobs.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/database/models/metrics.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/gitlab.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/llm.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/metric.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/models.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/server.py +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/templates/gitlab_full_comment.md.j2 +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/templates/gitlab_short_comment.md.j2 +0 -0
- {logdetective-1.4.0 → logdetective-1.5.0}/logdetective.1.asciidoc +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: logdetective
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Jiri Podivin
|
|
7
7
|
Author-email: jpodivin@gmail.com
|
|
8
8
|
Requires-Python: >=3.11,<4.0
|
|
9
|
-
Classifier: Development Status ::
|
|
9
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
10
10
|
Classifier: Environment :: Console
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
@@ -87,8 +87,8 @@ Usage
|
|
|
87
87
|
To analyze a log file, run the script with the following command line arguments:
|
|
88
88
|
- `url` (required): The URL of the log file to be analyzed.
|
|
89
89
|
- `--model` (optional, default: "Mistral-7B-Instruct-v0.2-GGUF"): The path or URL of the language model for analysis. As we are using LLama.cpp we want this to be in the `gguf` format. You can include the download link to the model here. If the model is already on your machine it will skip the download.
|
|
90
|
-
- `--summarizer` (optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
|
|
91
|
-
- `--n_lines` (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
|
|
90
|
+
- `--summarizer` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only.(optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
|
|
91
|
+
- `--n_lines` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only. (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
|
|
92
92
|
- `--n_clusters` (optional, default 8): Number of clusters for Drain to organize log chunks into. This only makes sense when you are summarizing with Drain
|
|
93
93
|
|
|
94
94
|
Example usage:
|
|
@@ -376,6 +376,9 @@ HTTPS certificate generated through:
|
|
|
376
376
|
certbot certonly --standalone -d logdetective01.fedorainfracloud.org
|
|
377
377
|
```
|
|
378
378
|
|
|
379
|
+
Certificates need to be be placed into location specified by the`LOGDETECTIVE_CERTDIR`
|
|
380
|
+
env var and the service should be restarted.
|
|
381
|
+
|
|
379
382
|
Querying statistics
|
|
380
383
|
-------------------
|
|
381
384
|
|
|
@@ -43,8 +43,8 @@ Usage
|
|
|
43
43
|
To analyze a log file, run the script with the following command line arguments:
|
|
44
44
|
- `url` (required): The URL of the log file to be analyzed.
|
|
45
45
|
- `--model` (optional, default: "Mistral-7B-Instruct-v0.2-GGUF"): The path or URL of the language model for analysis. As we are using LLama.cpp we want this to be in the `gguf` format. You can include the download link to the model here. If the model is already on your machine it will skip the download.
|
|
46
|
-
- `--summarizer` (optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
|
|
47
|
-
- `--n_lines` (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
|
|
46
|
+
- `--summarizer` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only.(optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
|
|
47
|
+
- `--n_lines` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only. (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
|
|
48
48
|
- `--n_clusters` (optional, default 8): Number of clusters for Drain to organize log chunks into. This only makes sense when you are summarizing with Drain
|
|
49
49
|
|
|
50
50
|
Example usage:
|
|
@@ -332,6 +332,9 @@ HTTPS certificate generated through:
|
|
|
332
332
|
certbot certonly --standalone -d logdetective01.fedorainfracloud.org
|
|
333
333
|
```
|
|
334
334
|
|
|
335
|
+
Certificates need to be be placed into location specified by the`LOGDETECTIVE_CERTDIR`
|
|
336
|
+
env var and the service should be restarted.
|
|
337
|
+
|
|
335
338
|
Querying statistics
|
|
336
339
|
-------------------
|
|
337
340
|
|
|
@@ -26,17 +26,6 @@ Analysis:
|
|
|
26
26
|
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
|
-
SUMMARIZATION_PROMPT_TEMPLATE = """
|
|
30
|
-
Does following log contain error or issue?
|
|
31
|
-
|
|
32
|
-
Log:
|
|
33
|
-
|
|
34
|
-
{}
|
|
35
|
-
|
|
36
|
-
Answer:
|
|
37
|
-
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
29
|
SNIPPET_PROMPT_TEMPLATE = """
|
|
41
30
|
Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
|
|
42
31
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Tuple
|
|
4
|
+
|
|
5
|
+
import drain3
|
|
6
|
+
from drain3.template_miner_config import TemplateMinerConfig
|
|
7
|
+
|
|
8
|
+
from logdetective.utils import get_chunks
|
|
9
|
+
|
|
10
|
+
LOG = logging.getLogger("logdetective")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DrainExtractor:
|
|
14
|
+
"""A class that extracts information from logs using a template miner algorithm."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, verbose: bool = False, context: bool = False, max_clusters=8):
|
|
17
|
+
config = TemplateMinerConfig()
|
|
18
|
+
config.load(f"{os.path.dirname(__file__)}/drain3.ini")
|
|
19
|
+
config.profiling_enabled = verbose
|
|
20
|
+
config.drain_max_clusters = max_clusters
|
|
21
|
+
self.miner = drain3.TemplateMiner(config=config)
|
|
22
|
+
self.verbose = verbose
|
|
23
|
+
self.context = context
|
|
24
|
+
|
|
25
|
+
def __call__(self, log: str) -> list[Tuple[int, str]]:
|
|
26
|
+
out = []
|
|
27
|
+
# First pass create clusters
|
|
28
|
+
for _, chunk in get_chunks(log):
|
|
29
|
+
processed_chunk = self.miner.add_log_message(chunk)
|
|
30
|
+
LOG.debug(processed_chunk)
|
|
31
|
+
# Sort found clusters by size, descending order
|
|
32
|
+
sorted_clusters = sorted(
|
|
33
|
+
self.miner.drain.clusters, key=lambda it: it.size, reverse=True
|
|
34
|
+
)
|
|
35
|
+
# Second pass, only matching lines with clusters,
|
|
36
|
+
# to recover original text
|
|
37
|
+
for chunk_start, chunk in get_chunks(log):
|
|
38
|
+
cluster = self.miner.match(chunk, "always")
|
|
39
|
+
if cluster in sorted_clusters:
|
|
40
|
+
out.append((chunk_start, chunk))
|
|
41
|
+
sorted_clusters.remove(cluster)
|
|
42
|
+
return out
|
|
@@ -15,7 +15,7 @@ from logdetective.utils import (
|
|
|
15
15
|
compute_certainty,
|
|
16
16
|
load_prompts,
|
|
17
17
|
)
|
|
18
|
-
from logdetective.extractors import
|
|
18
|
+
from logdetective.extractors import DrainExtractor
|
|
19
19
|
|
|
20
20
|
LOG = logging.getLogger("logdetective")
|
|
21
21
|
|
|
@@ -49,16 +49,16 @@ def setup_args():
|
|
|
49
49
|
"--summarizer",
|
|
50
50
|
type=str,
|
|
51
51
|
default="drain",
|
|
52
|
-
help="
|
|
53
|
-
|
|
52
|
+
help="DISABLED: LLM summarization option was removed. \
|
|
53
|
+
Argument is kept for backward compatibility only.",
|
|
54
54
|
)
|
|
55
55
|
parser.add_argument(
|
|
56
56
|
"-N",
|
|
57
57
|
"--n_lines",
|
|
58
58
|
type=int,
|
|
59
|
-
default=
|
|
60
|
-
help="
|
|
61
|
-
|
|
59
|
+
default=None,
|
|
60
|
+
help="DISABLED: LLM summarization option was removed. \
|
|
61
|
+
Argument is kept for backward compatibility only.",
|
|
62
62
|
)
|
|
63
63
|
parser.add_argument(
|
|
64
64
|
"-C",
|
|
@@ -74,13 +74,13 @@ def setup_args():
|
|
|
74
74
|
"--prompts",
|
|
75
75
|
type=str,
|
|
76
76
|
default=f"{os.path.dirname(__file__)}/prompts.yml",
|
|
77
|
-
help="Path to prompt configuration file."
|
|
77
|
+
help="Path to prompt configuration file.",
|
|
78
78
|
)
|
|
79
79
|
parser.add_argument(
|
|
80
80
|
"--temperature",
|
|
81
81
|
type=float,
|
|
82
82
|
default=DEFAULT_TEMPERATURE,
|
|
83
|
-
help="Temperature for inference."
|
|
83
|
+
help="Temperature for inference.",
|
|
84
84
|
)
|
|
85
85
|
return parser.parse_args()
|
|
86
86
|
|
|
@@ -93,6 +93,10 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals
|
|
|
93
93
|
sys.stderr.write("Error: --quiet and --verbose is mutually exclusive.\n")
|
|
94
94
|
sys.exit(2)
|
|
95
95
|
|
|
96
|
+
# Emit warning about use of discontinued args
|
|
97
|
+
if args.n_lines or args.summarizer != "drain":
|
|
98
|
+
LOG.warning("LLM based summarization was removed. Drain will be used instead.")
|
|
99
|
+
|
|
96
100
|
# Logging facility setup
|
|
97
101
|
log_level = logging.INFO
|
|
98
102
|
if args.verbose >= 1:
|
|
@@ -116,18 +120,10 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals
|
|
|
116
120
|
LOG.error("You likely do not have enough memory to load the AI model")
|
|
117
121
|
sys.exit(3)
|
|
118
122
|
|
|
119
|
-
# Log file summarizer
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
)
|
|
124
|
-
else:
|
|
125
|
-
summarizer_model = initialize_model(args.summarizer, verbose=args.verbose > 2)
|
|
126
|
-
extractor = LLMExtractor(
|
|
127
|
-
summarizer_model,
|
|
128
|
-
args.verbose > 1,
|
|
129
|
-
prompts_configuration.summarization_prompt_template,
|
|
130
|
-
)
|
|
123
|
+
# Log file summarizer initialization
|
|
124
|
+
extractor = DrainExtractor(
|
|
125
|
+
args.verbose > 1, context=True, max_clusters=args.n_clusters
|
|
126
|
+
)
|
|
131
127
|
|
|
132
128
|
LOG.info("Getting summary")
|
|
133
129
|
|
|
@@ -151,7 +147,8 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals
|
|
|
151
147
|
|
|
152
148
|
prompt = (
|
|
153
149
|
f"{prompts_configuration.default_system_prompt}\n"
|
|
154
|
-
f"{prompts_configuration.prompt_template}"
|
|
150
|
+
f"{prompts_configuration.prompt_template}"
|
|
151
|
+
)
|
|
155
152
|
|
|
156
153
|
stream = True
|
|
157
154
|
if args.no_stream:
|
|
@@ -191,7 +188,7 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals
|
|
|
191
188
|
|
|
192
189
|
|
|
193
190
|
def main():
|
|
194
|
-
"""
|
|
191
|
+
"""Evaluate logdetective program and wait for it to finish"""
|
|
195
192
|
asyncio.run(run())
|
|
196
193
|
|
|
197
194
|
|
|
@@ -4,7 +4,6 @@ from pydantic import BaseModel
|
|
|
4
4
|
from logdetective.constants import (
|
|
5
5
|
PROMPT_TEMPLATE,
|
|
6
6
|
PROMPT_TEMPLATE_STAGED,
|
|
7
|
-
SUMMARIZATION_PROMPT_TEMPLATE,
|
|
8
7
|
SNIPPET_PROMPT_TEMPLATE,
|
|
9
8
|
DEFAULT_SYSTEM_PROMPT,
|
|
10
9
|
)
|
|
@@ -14,7 +13,6 @@ class PromptConfig(BaseModel):
|
|
|
14
13
|
"""Configuration for basic log detective prompts."""
|
|
15
14
|
|
|
16
15
|
prompt_template: str = PROMPT_TEMPLATE
|
|
17
|
-
summarization_prompt_template: str = SUMMARIZATION_PROMPT_TEMPLATE
|
|
18
16
|
snippet_prompt_template: str = SNIPPET_PROMPT_TEMPLATE
|
|
19
17
|
prompt_template_staged: str = PROMPT_TEMPLATE_STAGED
|
|
20
18
|
|
|
@@ -27,9 +25,6 @@ class PromptConfig(BaseModel):
|
|
|
27
25
|
if data is None:
|
|
28
26
|
return
|
|
29
27
|
self.prompt_template = data.get("prompt_template", PROMPT_TEMPLATE)
|
|
30
|
-
self.summarization_prompt_template = data.get(
|
|
31
|
-
"summarization_prompt_template", SUMMARIZATION_PROMPT_TEMPLATE
|
|
32
|
-
)
|
|
33
28
|
self.snippet_prompt_template = data.get(
|
|
34
29
|
"snippet_prompt_template", SNIPPET_PROMPT_TEMPLATE
|
|
35
30
|
)
|
|
@@ -21,17 +21,6 @@ prompt_template: |
|
|
|
21
21
|
|
|
22
22
|
Analysis:
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
summarization_prompt_template: |
|
|
26
|
-
Does following log contain error or issue?
|
|
27
|
-
|
|
28
|
-
Log:
|
|
29
|
-
|
|
30
|
-
{}
|
|
31
|
-
|
|
32
|
-
Answer:
|
|
33
|
-
|
|
34
|
-
|
|
35
24
|
snippet_prompt_template: |
|
|
36
25
|
Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
|
|
37
26
|
|
|
@@ -52,11 +52,10 @@ def get_log(config: Config):
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
def get_openai_api_client(ineference_config: InferenceConfig):
|
|
55
|
-
"""Set up AsyncOpenAI client with default configuration.
|
|
56
|
-
"""
|
|
55
|
+
"""Set up AsyncOpenAI client with default configuration."""
|
|
57
56
|
return AsyncOpenAI(
|
|
58
|
-
api_key=ineference_config.api_token,
|
|
59
|
-
|
|
57
|
+
api_key=ineference_config.api_token, base_url=ineference_config.url
|
|
58
|
+
)
|
|
60
59
|
|
|
61
60
|
|
|
62
61
|
SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
|
|
@@ -51,7 +51,9 @@ async def _handle_gitlab_operation(func: Callable, *args):
|
|
|
51
51
|
else:
|
|
52
52
|
LOG.exception(log_msg)
|
|
53
53
|
except Exception as e: # pylint: disable=broad-exception-caught
|
|
54
|
-
LOG.exception(
|
|
54
|
+
LOG.exception(
|
|
55
|
+
"Unexpected error during GitLab operation %s(%s): %s", func, args, e
|
|
56
|
+
)
|
|
55
57
|
|
|
56
58
|
|
|
57
59
|
async def collect_emojis_in_comments( # pylint: disable=too-many-locals
|
|
@@ -340,7 +340,7 @@ def _plot_emoji_data( # pylint: disable=too-many-locals
|
|
|
340
340
|
)
|
|
341
341
|
all_counts.extend(counts)
|
|
342
342
|
|
|
343
|
-
colors = [cm.viridis(i) for i in numpy.linspace(0, 1, len(reactions_values_dict))]
|
|
343
|
+
colors = [cm.viridis(i) for i in numpy.linspace(0, 1, len(reactions_values_dict))] # pylint: disable=no-member
|
|
344
344
|
|
|
345
345
|
first_emoji = True
|
|
346
346
|
for i, (emoji, dict_counts) in enumerate(reactions_values_dict.items()):
|
|
@@ -179,7 +179,7 @@ def format_snippets(snippets: list[str] | list[Tuple[int, str]]) -> str:
|
|
|
179
179
|
summary += f"""
|
|
180
180
|
Snippet No. {i}:
|
|
181
181
|
|
|
182
|
-
{s
|
|
182
|
+
{s}
|
|
183
183
|
================
|
|
184
184
|
"""
|
|
185
185
|
return summary
|
|
@@ -198,8 +198,11 @@ def load_prompts(path: str | None) -> PromptConfig:
|
|
|
198
198
|
|
|
199
199
|
|
|
200
200
|
def prompt_to_messages(
|
|
201
|
-
|
|
202
|
-
|
|
201
|
+
user_message: str,
|
|
202
|
+
system_prompt: str | None = None,
|
|
203
|
+
system_role: str = "developer",
|
|
204
|
+
user_role: str = "user",
|
|
205
|
+
) -> List[Dict[str, str]]:
|
|
203
206
|
"""Turn prompt into list of message dictionaries.
|
|
204
207
|
If `system_role` and `user_role` are the same, only a single message is created,
|
|
205
208
|
as concatenation of `user_message` and `system_prompt`. This is useful for models which
|
|
@@ -208,22 +211,15 @@ def prompt_to_messages(
|
|
|
208
211
|
|
|
209
212
|
if system_role == user_role:
|
|
210
213
|
messages = [
|
|
211
|
-
{
|
|
212
|
-
"role": system_role,
|
|
213
|
-
"content": f"{system_prompt}\n{user_message}"
|
|
214
|
-
}
|
|
214
|
+
{"role": system_role, "content": f"{system_prompt}\n{user_message}"}
|
|
215
215
|
]
|
|
216
216
|
else:
|
|
217
|
-
|
|
218
217
|
messages = [
|
|
219
|
-
{
|
|
220
|
-
"role": system_role,
|
|
221
|
-
"content": system_prompt
|
|
222
|
-
},
|
|
218
|
+
{"role": system_role, "content": system_prompt},
|
|
223
219
|
{
|
|
224
220
|
"role": user_role,
|
|
225
221
|
"content": user_message,
|
|
226
|
-
}
|
|
222
|
+
},
|
|
227
223
|
]
|
|
228
224
|
|
|
229
225
|
return messages
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "logdetective"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.5.0"
|
|
4
4
|
description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
|
|
5
5
|
authors = ["Jiri Podivin <jpodivin@gmail.com>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -15,7 +15,7 @@ packages = [
|
|
|
15
15
|
{ include = "logdetective" }
|
|
16
16
|
]
|
|
17
17
|
classifiers = [
|
|
18
|
-
"Development Status ::
|
|
18
|
+
"Development Status :: 5 - Production/Stable",
|
|
19
19
|
"Environment :: Console",
|
|
20
20
|
"Intended Audience :: Developers",
|
|
21
21
|
"License :: OSI Approved :: Apache Software License",
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import logging
|
|
3
|
-
from typing import Tuple
|
|
4
|
-
|
|
5
|
-
import drain3
|
|
6
|
-
from drain3.template_miner_config import TemplateMinerConfig
|
|
7
|
-
from llama_cpp import Llama, LlamaGrammar
|
|
8
|
-
|
|
9
|
-
from logdetective.constants import SUMMARIZATION_PROMPT_TEMPLATE
|
|
10
|
-
from logdetective.utils import get_chunks
|
|
11
|
-
|
|
12
|
-
LOG = logging.getLogger("logdetective")
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class LLMExtractor:
|
|
16
|
-
"""
|
|
17
|
-
A class that extracts relevant information from logs using a language model.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
def __init__(
|
|
21
|
-
self,
|
|
22
|
-
model: Llama,
|
|
23
|
-
n_lines: int = 2,
|
|
24
|
-
prompt: str = SUMMARIZATION_PROMPT_TEMPLATE,
|
|
25
|
-
):
|
|
26
|
-
self.model = model
|
|
27
|
-
self.n_lines = n_lines
|
|
28
|
-
self.grammar = LlamaGrammar.from_string(
|
|
29
|
-
'root ::= ("Yes" | "No")', verbose=False
|
|
30
|
-
)
|
|
31
|
-
self.prompt = prompt
|
|
32
|
-
|
|
33
|
-
def __call__(
|
|
34
|
-
self, log: str, n_lines: int = 2, neighbors: bool = False
|
|
35
|
-
) -> list[str]:
|
|
36
|
-
chunks = self.rate_chunks(log)
|
|
37
|
-
out = self.create_extract(chunks, neighbors)
|
|
38
|
-
return out
|
|
39
|
-
|
|
40
|
-
def rate_chunks(self, log: str) -> list[tuple]:
|
|
41
|
-
"""Scan log by the model and store results.
|
|
42
|
-
|
|
43
|
-
:param log: log file content
|
|
44
|
-
"""
|
|
45
|
-
results = []
|
|
46
|
-
log_lines = log.split("\n")
|
|
47
|
-
|
|
48
|
-
for i in range(0, len(log_lines), self.n_lines):
|
|
49
|
-
block = "\n".join(log_lines[i: i + self.n_lines])
|
|
50
|
-
prompt = self.prompt.format(log)
|
|
51
|
-
out = self.model(prompt, max_tokens=7, grammar=self.grammar)
|
|
52
|
-
out = f"{out['choices'][0]['text']}\n"
|
|
53
|
-
results.append((block, out))
|
|
54
|
-
|
|
55
|
-
return results
|
|
56
|
-
|
|
57
|
-
def create_extract(self, chunks: list[tuple], neighbors: bool = False) -> list[str]:
|
|
58
|
-
"""Extract interesting chunks from the model processing."""
|
|
59
|
-
interesting = []
|
|
60
|
-
summary = []
|
|
61
|
-
# pylint: disable=consider-using-enumerate
|
|
62
|
-
for i in range(len(chunks)):
|
|
63
|
-
if chunks[i][1].startswith("Yes"):
|
|
64
|
-
interesting.append(i)
|
|
65
|
-
if neighbors:
|
|
66
|
-
interesting.extend([max(i - 1, 0), min(i + 1, len(chunks) - 1)])
|
|
67
|
-
|
|
68
|
-
interesting = set(interesting)
|
|
69
|
-
|
|
70
|
-
for i in interesting:
|
|
71
|
-
summary.append(chunks[i][0])
|
|
72
|
-
|
|
73
|
-
return summary
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
class DrainExtractor:
|
|
77
|
-
"""A class that extracts information from logs using a template miner algorithm."""
|
|
78
|
-
|
|
79
|
-
def __init__(self, verbose: bool = False, context: bool = False, max_clusters=8):
|
|
80
|
-
config = TemplateMinerConfig()
|
|
81
|
-
config.load(f"{os.path.dirname(__file__)}/drain3.ini")
|
|
82
|
-
config.profiling_enabled = verbose
|
|
83
|
-
config.drain_max_clusters = max_clusters
|
|
84
|
-
self.miner = drain3.TemplateMiner(config=config)
|
|
85
|
-
self.verbose = verbose
|
|
86
|
-
self.context = context
|
|
87
|
-
|
|
88
|
-
def __call__(self, log: str) -> list[Tuple[int, str]]:
|
|
89
|
-
out = []
|
|
90
|
-
# First pass create clusters
|
|
91
|
-
for _, chunk in get_chunks(log):
|
|
92
|
-
processed_chunk = self.miner.add_log_message(chunk)
|
|
93
|
-
LOG.debug(processed_chunk)
|
|
94
|
-
# Sort found clusters by size, descending order
|
|
95
|
-
sorted_clusters = sorted(
|
|
96
|
-
self.miner.drain.clusters, key=lambda it: it.size, reverse=True
|
|
97
|
-
)
|
|
98
|
-
# Second pass, only matching lines with clusters,
|
|
99
|
-
# to recover original text
|
|
100
|
-
for chunk_start, chunk in get_chunks(log):
|
|
101
|
-
cluster = self.miner.match(chunk, "always")
|
|
102
|
-
if cluster in sorted_clusters:
|
|
103
|
-
out.append((chunk_start, chunk))
|
|
104
|
-
sorted_clusters.remove(cluster)
|
|
105
|
-
return out
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/database/models/merge_request_jobs.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/templates/gitlab_full_comment.md.j2
RENAMED
|
File without changes
|
{logdetective-1.4.0 → logdetective-1.5.0}/logdetective/server/templates/gitlab_short_comment.md.j2
RENAMED
|
File without changes
|
|
File without changes
|