logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/constants.py +33 -12
- logdetective/extractors.py +137 -68
- logdetective/logdetective.py +102 -33
- logdetective/models.py +99 -0
- logdetective/prompts-summary-first.yml +20 -0
- logdetective/prompts-summary-only.yml +13 -0
- logdetective/prompts.yml +90 -0
- logdetective/remote_log.py +67 -0
- logdetective/server/compressors.py +186 -0
- logdetective/server/config.py +78 -0
- logdetective/server/database/base.py +34 -26
- logdetective/server/database/models/__init__.py +33 -0
- logdetective/server/database/models/exceptions.py +17 -0
- logdetective/server/database/models/koji.py +143 -0
- logdetective/server/database/models/merge_request_jobs.py +623 -0
- logdetective/server/database/models/metrics.py +427 -0
- logdetective/server/emoji.py +148 -0
- logdetective/server/exceptions.py +37 -0
- logdetective/server/gitlab.py +451 -0
- logdetective/server/koji.py +159 -0
- logdetective/server/llm.py +309 -0
- logdetective/server/metric.py +75 -30
- logdetective/server/models.py +426 -23
- logdetective/server/plot.py +432 -0
- logdetective/server/server.py +580 -468
- logdetective/server/templates/base_response.html.j2 +59 -0
- logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
- logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
- logdetective/server/utils.py +98 -32
- logdetective/skip_snippets.yml +12 -0
- logdetective/utils.py +187 -73
- logdetective-2.11.0.dist-info/METADATA +568 -0
- logdetective-2.11.0.dist-info/RECORD +40 -0
- {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
- logdetective/server/database/models.py +0 -88
- logdetective-0.4.0.dist-info/METADATA +0 -333
- logdetective-0.4.0.dist-info/RECORD +0 -19
- {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
- {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
logdetective/constants.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
|
+
"""This file contains various constants to be used as a fallback
|
|
2
|
+
in case other values are not specified. Prompt templates should be modified
|
|
3
|
+
in prompts.yaml instead.
|
|
4
|
+
"""
|
|
5
|
+
|
|
1
6
|
# pylint: disable=line-too-long
|
|
2
|
-
DEFAULT_ADVISOR = "fedora-copr/Mistral-7B-Instruct-v0.
|
|
7
|
+
DEFAULT_ADVISOR = "fedora-copr/Mistral-7B-Instruct-v0.3-GGUF"
|
|
3
8
|
|
|
4
9
|
PROMPT_TEMPLATE = """
|
|
5
10
|
Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
|
|
@@ -11,6 +16,8 @@ Snippets are delimited with '================'.
|
|
|
11
16
|
|
|
12
17
|
Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
|
|
13
18
|
|
|
19
|
+
Explanation of the issue, and recommended solution, should take handful of sentences.
|
|
20
|
+
|
|
14
21
|
Snippets:
|
|
15
22
|
|
|
16
23
|
{}
|
|
@@ -19,20 +26,11 @@ Analysis:
|
|
|
19
26
|
|
|
20
27
|
"""
|
|
21
28
|
|
|
22
|
-
SUMMARIZE_PROMPT_TEMPLATE = """
|
|
23
|
-
Does following log contain error or issue?
|
|
24
|
-
|
|
25
|
-
Log:
|
|
26
|
-
|
|
27
|
-
{}
|
|
28
|
-
|
|
29
|
-
Answer:
|
|
30
|
-
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
29
|
SNIPPET_PROMPT_TEMPLATE = """
|
|
34
30
|
Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
|
|
35
31
|
|
|
32
|
+
Your analysis must be as concise as possible, while keeping relevant information intact.
|
|
33
|
+
|
|
36
34
|
Snippet:
|
|
37
35
|
|
|
38
36
|
{}
|
|
@@ -50,6 +48,8 @@ Snippets are delimited with '================'.
|
|
|
50
48
|
|
|
51
49
|
Drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
|
|
52
50
|
|
|
51
|
+
Explanation of the issue, and recommended solution, should take handful of sentences.
|
|
52
|
+
|
|
53
53
|
Snippets:
|
|
54
54
|
|
|
55
55
|
{}
|
|
@@ -58,4 +58,25 @@ Analysis:
|
|
|
58
58
|
|
|
59
59
|
"""
|
|
60
60
|
|
|
61
|
+
DEFAULT_SYSTEM_PROMPT = """
|
|
62
|
+
You are a highly capable large language model based expert system specialized in
|
|
63
|
+
packaging and delivery of software using RPM (RPM Package Manager). Your purpose is to diagnose
|
|
64
|
+
RPM build failures, identifying root causes and proposing solutions if possible.
|
|
65
|
+
You are truthful, concise, and helpful.
|
|
66
|
+
|
|
67
|
+
You never speculate about package being built or fabricate information.
|
|
68
|
+
If you do not know the answer, you acknowledge the fact and end your response.
|
|
69
|
+
Your responses must be as short as possible.
|
|
70
|
+
"""
|
|
71
|
+
|
|
61
72
|
SNIPPET_DELIMITER = "================"
|
|
73
|
+
|
|
74
|
+
DEFAULT_TEMPERATURE = 0.8
|
|
75
|
+
|
|
76
|
+
# Tuning for LLM-as-a-Service
|
|
77
|
+
LLM_DEFAULT_MAX_QUEUE_SIZE = 50
|
|
78
|
+
LLM_DEFAULT_REQUESTS_PER_MINUTE = 60
|
|
79
|
+
|
|
80
|
+
# Roles for chat API
|
|
81
|
+
SYSTEM_ROLE_DEFAULT = "developer"
|
|
82
|
+
USER_ROLE_DEFAULT = "user"
|
logdetective/extractors.py
CHANGED
|
@@ -1,99 +1,168 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import logging
|
|
3
|
+
import subprocess as sp
|
|
3
4
|
from typing import Tuple
|
|
4
5
|
|
|
5
6
|
import drain3
|
|
6
7
|
from drain3.template_miner_config import TemplateMinerConfig
|
|
7
|
-
from
|
|
8
|
+
from pydantic import ValidationError
|
|
8
9
|
|
|
9
|
-
from logdetective.
|
|
10
|
-
from logdetective.
|
|
10
|
+
from logdetective.utils import get_chunks, filter_snippet_patterns
|
|
11
|
+
from logdetective.models import SkipSnippets, CSGrepOutput
|
|
11
12
|
|
|
12
13
|
LOG = logging.getLogger("logdetective")
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
class
|
|
16
|
-
"""
|
|
17
|
-
A class that extracts relevant information from logs using a language model.
|
|
18
|
-
"""
|
|
16
|
+
class Extractor:
|
|
17
|
+
"""Base extractor class."""
|
|
19
18
|
|
|
20
|
-
def __init__(
|
|
21
|
-
self
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
self
|
|
29
|
-
) -> list[str]:
|
|
30
|
-
chunks = self.rate_chunks(log)
|
|
31
|
-
out = self.create_extract(chunks, neighbors)
|
|
32
|
-
return out
|
|
33
|
-
|
|
34
|
-
def rate_chunks(self, log: str) -> list[tuple]:
|
|
35
|
-
"""Scan log by the model and store results.
|
|
36
|
-
|
|
37
|
-
:param log: log file content
|
|
38
|
-
"""
|
|
39
|
-
results = []
|
|
40
|
-
log_lines = log.split("\n")
|
|
41
|
-
|
|
42
|
-
for i in range(0, len(log_lines), self.n_lines):
|
|
43
|
-
block = "\n".join(log_lines[i: i + self.n_lines])
|
|
44
|
-
prompt = SUMMARIZE_PROMPT_TEMPLATE.format(log)
|
|
45
|
-
out = self.model(prompt, max_tokens=7, grammar=self.grammar)
|
|
46
|
-
out = f"{out['choices'][0]['text']}\n"
|
|
47
|
-
results.append((block, out))
|
|
48
|
-
|
|
49
|
-
return results
|
|
50
|
-
|
|
51
|
-
def create_extract(self, chunks: list[tuple], neighbors: bool = False) -> list[str]:
|
|
52
|
-
"""Extract interesting chunks from the model processing."""
|
|
53
|
-
interesting = []
|
|
54
|
-
summary = []
|
|
55
|
-
# pylint: disable=consider-using-enumerate
|
|
56
|
-
for i in range(len(chunks)):
|
|
57
|
-
if chunks[i][1].startswith("Yes"):
|
|
58
|
-
interesting.append(i)
|
|
59
|
-
if neighbors:
|
|
60
|
-
interesting.extend([max(i - 1, 0), min(i + 1, len(chunks) - 1)])
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
verbose: bool = False,
|
|
22
|
+
skip_snippets: SkipSnippets = SkipSnippets({}),
|
|
23
|
+
max_snippet_len: int = 2000,
|
|
24
|
+
):
|
|
25
|
+
self.verbose = verbose
|
|
26
|
+
self.skip_snippets = skip_snippets
|
|
27
|
+
self.max_snippet_len = max_snippet_len
|
|
61
28
|
|
|
62
|
-
|
|
29
|
+
if self.verbose:
|
|
30
|
+
LOG.setLevel(logging.DEBUG)
|
|
63
31
|
|
|
64
|
-
|
|
65
|
-
|
|
32
|
+
def __call__(self, log: str) -> list[Tuple[int, str]]:
|
|
33
|
+
raise NotImplementedError
|
|
66
34
|
|
|
67
|
-
|
|
35
|
+
def filter_snippet_patterns(
|
|
36
|
+
self, chunks: list[tuple[int, str]]
|
|
37
|
+
) -> list[tuple[int, str]]:
|
|
38
|
+
"""Keep only chunks that don't match any of the excluded patterns"""
|
|
39
|
+
chunks = [
|
|
40
|
+
(_, chunk)
|
|
41
|
+
for _, chunk in chunks
|
|
42
|
+
if not filter_snippet_patterns(chunk, self.skip_snippets)
|
|
43
|
+
]
|
|
44
|
+
return chunks
|
|
68
45
|
|
|
69
46
|
|
|
70
|
-
class DrainExtractor:
|
|
47
|
+
class DrainExtractor(Extractor):
|
|
71
48
|
"""A class that extracts information from logs using a template miner algorithm."""
|
|
72
49
|
|
|
73
|
-
|
|
50
|
+
_clusters: list
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
verbose: bool = False,
|
|
55
|
+
skip_snippets: SkipSnippets = SkipSnippets({}),
|
|
56
|
+
max_snippet_len: int = 2000,
|
|
57
|
+
max_clusters: int = 8,
|
|
58
|
+
):
|
|
59
|
+
super().__init__(verbose, skip_snippets, max_snippet_len)
|
|
74
60
|
config = TemplateMinerConfig()
|
|
75
61
|
config.load(f"{os.path.dirname(__file__)}/drain3.ini")
|
|
76
62
|
config.profiling_enabled = verbose
|
|
77
63
|
config.drain_max_clusters = max_clusters
|
|
78
64
|
self.miner = drain3.TemplateMiner(config=config)
|
|
79
|
-
self.verbose = verbose
|
|
80
|
-
self.context = context
|
|
81
65
|
|
|
82
66
|
def __call__(self, log: str) -> list[Tuple[int, str]]:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
#
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
)
|
|
67
|
+
# Create chunks
|
|
68
|
+
chunks = list(get_chunks(log, self.max_snippet_len))
|
|
69
|
+
|
|
70
|
+
chunks = self.filter_snippet_patterns(chunks)
|
|
71
|
+
|
|
72
|
+
# First pass to create clusters
|
|
73
|
+
self._create_clusters(chunks=chunks)
|
|
74
|
+
|
|
92
75
|
# Second pass, only matching lines with clusters,
|
|
93
76
|
# to recover original text
|
|
94
|
-
|
|
77
|
+
snippets = self._extract_messages(chunks=chunks)
|
|
78
|
+
return snippets
|
|
79
|
+
|
|
80
|
+
def _create_clusters(self, chunks: list[tuple[int, str]]):
|
|
81
|
+
"""First pass to create clusters"""
|
|
82
|
+
for _, chunk in chunks:
|
|
83
|
+
processed_chunk = self.miner.add_log_message(chunk)
|
|
84
|
+
LOG.debug(processed_chunk)
|
|
85
|
+
self._clusters = list(self.miner.drain.clusters)
|
|
86
|
+
|
|
87
|
+
def _extract_messages(self, chunks: list[tuple[int, str]]) -> list[tuple[int, str]]:
|
|
88
|
+
"""Second pass with drain using patterns from the first,
|
|
89
|
+
to extract matching lines and their numbers."""
|
|
90
|
+
out = []
|
|
91
|
+
|
|
92
|
+
for chunk_start, chunk in chunks:
|
|
95
93
|
cluster = self.miner.match(chunk, "always")
|
|
96
|
-
if cluster in
|
|
94
|
+
if cluster in self._clusters:
|
|
97
95
|
out.append((chunk_start, chunk))
|
|
98
|
-
|
|
96
|
+
self._clusters.remove(cluster)
|
|
99
97
|
return out
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class CSGrepExtractor(DrainExtractor):
|
|
101
|
+
"""Extract messages using csgrep
|
|
102
|
+
This extractor is only effective at retrieving messages from GCC
|
|
103
|
+
compiler and associated utilities, it is not capable of safely
|
|
104
|
+
extracting other messages from the logs. Therefore, it must only
|
|
105
|
+
be used together with the Drain based extractor."""
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self,
|
|
109
|
+
verbose: bool = False,
|
|
110
|
+
skip_snippets: SkipSnippets = SkipSnippets({}),
|
|
111
|
+
max_snippet_len: int = 2000,
|
|
112
|
+
max_clusters: int = 8,
|
|
113
|
+
):
|
|
114
|
+
super().__init__(verbose, skip_snippets, max_snippet_len, max_clusters)
|
|
115
|
+
|
|
116
|
+
def __call__(self, log: str) -> list[Tuple[int, str]]:
|
|
117
|
+
"""Extract error messages from log using csgrep"""
|
|
118
|
+
chunks = []
|
|
119
|
+
try:
|
|
120
|
+
# We are not running binary in check mode, since csgrep
|
|
121
|
+
# can produce many errors due to log file syntax
|
|
122
|
+
result = sp.run(
|
|
123
|
+
[
|
|
124
|
+
"csgrep",
|
|
125
|
+
"--event=error",
|
|
126
|
+
"--remove-duplicates",
|
|
127
|
+
"--mode=json",
|
|
128
|
+
"--quiet",
|
|
129
|
+
],
|
|
130
|
+
input=log,
|
|
131
|
+
shell=False,
|
|
132
|
+
check=False,
|
|
133
|
+
capture_output=True,
|
|
134
|
+
text=True,
|
|
135
|
+
timeout=1.0,
|
|
136
|
+
)
|
|
137
|
+
except sp.TimeoutExpired as ex:
|
|
138
|
+
LOG.exception("Exception encountered while parsing log with csgrep %s", ex)
|
|
139
|
+
raise ex
|
|
140
|
+
if result.returncode != 0:
|
|
141
|
+
# This can happen even if `csgrep` managed to extract useful info.
|
|
142
|
+
# Most commonly, when it encountered unexpected syntax in the log.
|
|
143
|
+
LOG.warning("csgrep call resulted in an error")
|
|
144
|
+
LOG.debug("csgrep error: `%s`", result.stderr)
|
|
145
|
+
if not result.stdout:
|
|
146
|
+
return []
|
|
147
|
+
|
|
148
|
+
# Parse JSON output from csgrep
|
|
149
|
+
try:
|
|
150
|
+
report = CSGrepOutput.model_validate_json(result.stdout)
|
|
151
|
+
except ValidationError as ex:
|
|
152
|
+
LOG.exception("Exception encountered while parsing csgrpe output %s", ex)
|
|
153
|
+
raise ex
|
|
154
|
+
for defect in report.defects:
|
|
155
|
+
# Single original error message can be split across multiple events
|
|
156
|
+
# before returning, we will turn them back into single string.
|
|
157
|
+
# We must also extract the original line number.
|
|
158
|
+
# Line number is NOT location of message in the log, but location of
|
|
159
|
+
# the issue in source, we can't really mix the two, so we'll set it to `0`.
|
|
160
|
+
|
|
161
|
+
chunks.append((0, "\n".join([event.message for event in defect.events])))
|
|
162
|
+
|
|
163
|
+
chunks = self.filter_snippet_patterns(chunks)
|
|
164
|
+
LOG.info("Total %d messages extracted with csgrep", len(chunks))
|
|
165
|
+
self._create_clusters(chunks=chunks)
|
|
166
|
+
snippets = self._extract_messages(chunks=chunks)
|
|
167
|
+
|
|
168
|
+
return snippets
|
logdetective/logdetective.py
CHANGED
|
@@ -1,16 +1,24 @@
|
|
|
1
1
|
import argparse
|
|
2
|
+
import asyncio
|
|
2
3
|
import logging
|
|
3
4
|
import sys
|
|
5
|
+
import os
|
|
4
6
|
|
|
5
|
-
|
|
7
|
+
import aiohttp
|
|
8
|
+
|
|
9
|
+
from logdetective.constants import DEFAULT_ADVISOR, DEFAULT_TEMPERATURE
|
|
6
10
|
from logdetective.utils import (
|
|
7
11
|
process_log,
|
|
8
12
|
initialize_model,
|
|
9
13
|
retrieve_log_content,
|
|
10
14
|
format_snippets,
|
|
11
15
|
compute_certainty,
|
|
16
|
+
load_prompts,
|
|
17
|
+
load_skip_snippet_patterns,
|
|
18
|
+
check_csgrep,
|
|
19
|
+
mine_logs,
|
|
12
20
|
)
|
|
13
|
-
from logdetective.extractors import
|
|
21
|
+
from logdetective.extractors import DrainExtractor, CSGrepExtractor
|
|
14
22
|
|
|
15
23
|
LOG = logging.getLogger("logdetective")
|
|
16
24
|
|
|
@@ -36,7 +44,7 @@ def setup_args():
|
|
|
36
44
|
"--filename_suffix",
|
|
37
45
|
help="Suffix of the model file name to be retrieved from Hugging Face.\
|
|
38
46
|
Makes sense only if the model is specified with Hugging Face name.",
|
|
39
|
-
default="
|
|
47
|
+
default="Q4_K.gguf",
|
|
40
48
|
)
|
|
41
49
|
parser.add_argument("-n", "--no-stream", action="store_true")
|
|
42
50
|
parser.add_argument(
|
|
@@ -44,16 +52,16 @@ def setup_args():
|
|
|
44
52
|
"--summarizer",
|
|
45
53
|
type=str,
|
|
46
54
|
default="drain",
|
|
47
|
-
help="
|
|
48
|
-
|
|
55
|
+
help="DISABLED: LLM summarization option was removed. \
|
|
56
|
+
Argument is kept for backward compatibility only.",
|
|
49
57
|
)
|
|
50
58
|
parser.add_argument(
|
|
51
59
|
"-N",
|
|
52
60
|
"--n_lines",
|
|
53
61
|
type=int,
|
|
54
|
-
default=
|
|
55
|
-
help="
|
|
56
|
-
|
|
62
|
+
default=None,
|
|
63
|
+
help="DISABLED: LLM summarization option was removed. \
|
|
64
|
+
Argument is kept for backward compatibility only.",
|
|
57
65
|
)
|
|
58
66
|
parser.add_argument(
|
|
59
67
|
"-C",
|
|
@@ -65,10 +73,31 @@ def setup_args():
|
|
|
65
73
|
)
|
|
66
74
|
parser.add_argument("-v", "--verbose", action="count", default=0)
|
|
67
75
|
parser.add_argument("-q", "--quiet", action="store_true")
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--prompts",
|
|
78
|
+
type=str,
|
|
79
|
+
default=f"{os.path.dirname(__file__)}/prompts.yml",
|
|
80
|
+
help="Path to prompt configuration file.",
|
|
81
|
+
)
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
"--temperature",
|
|
84
|
+
type=float,
|
|
85
|
+
default=DEFAULT_TEMPERATURE,
|
|
86
|
+
help="Temperature for inference.",
|
|
87
|
+
)
|
|
88
|
+
parser.add_argument(
|
|
89
|
+
"--skip_snippets",
|
|
90
|
+
type=str,
|
|
91
|
+
default=f"{os.path.dirname(__file__)}/skip_snippets.yml",
|
|
92
|
+
help="Path to patterns for skipping snippets.",
|
|
93
|
+
)
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"--csgrep", action="store_true", help="Use csgrep to process the log."
|
|
96
|
+
)
|
|
68
97
|
return parser.parse_args()
|
|
69
98
|
|
|
70
99
|
|
|
71
|
-
def
|
|
100
|
+
async def run(): # pylint: disable=too-many-statements,too-many-locals,too-many-branches
|
|
72
101
|
"""Main execution function."""
|
|
73
102
|
args = setup_args()
|
|
74
103
|
|
|
@@ -76,6 +105,10 @@ def main(): # pylint: disable=too-many-statements
|
|
|
76
105
|
sys.stderr.write("Error: --quiet and --verbose is mutually exclusive.\n")
|
|
77
106
|
sys.exit(2)
|
|
78
107
|
|
|
108
|
+
# Emit warning about use of discontinued args
|
|
109
|
+
if args.n_lines or args.summarizer != "drain":
|
|
110
|
+
LOG.warning("LLM based summarization was removed. Drain will be used instead.")
|
|
111
|
+
|
|
79
112
|
# Logging facility setup
|
|
80
113
|
log_level = logging.INFO
|
|
81
114
|
if args.verbose >= 1:
|
|
@@ -83,6 +116,9 @@ def main(): # pylint: disable=too-many-statements
|
|
|
83
116
|
if args.quiet:
|
|
84
117
|
log_level = 0
|
|
85
118
|
|
|
119
|
+
# Get prompts configuration
|
|
120
|
+
prompts_configuration = load_prompts(args.prompts)
|
|
121
|
+
|
|
86
122
|
logging.basicConfig(stream=sys.stdout)
|
|
87
123
|
LOG.setLevel(log_level)
|
|
88
124
|
|
|
@@ -96,29 +132,43 @@ def main(): # pylint: disable=too-many-statements
|
|
|
96
132
|
LOG.error("You likely do not have enough memory to load the AI model")
|
|
97
133
|
sys.exit(3)
|
|
98
134
|
|
|
99
|
-
# Log file summarizer selection and initialization
|
|
100
|
-
if args.summarizer == "drain":
|
|
101
|
-
extractor = DrainExtractor(
|
|
102
|
-
args.verbose > 1, context=True, max_clusters=args.n_clusters
|
|
103
|
-
)
|
|
104
|
-
else:
|
|
105
|
-
summarizer_model = initialize_model(args.summarizer, verbose=args.verbose > 2)
|
|
106
|
-
extractor = LLMExtractor(summarizer_model, args.verbose > 1)
|
|
107
|
-
|
|
108
|
-
LOG.info("Getting summary")
|
|
109
|
-
|
|
110
135
|
try:
|
|
111
|
-
|
|
112
|
-
except
|
|
113
|
-
# file does not exists
|
|
136
|
+
skip_snippets = load_skip_snippet_patterns(args.skip_snippets)
|
|
137
|
+
except OSError as e:
|
|
114
138
|
LOG.error(e)
|
|
115
|
-
sys.exit(
|
|
116
|
-
|
|
139
|
+
sys.exit(5)
|
|
140
|
+
|
|
141
|
+
# Log file summarizer initialization
|
|
142
|
+
extractors = []
|
|
143
|
+
extractors.append(
|
|
144
|
+
DrainExtractor(
|
|
145
|
+
args.verbose > 1,
|
|
146
|
+
max_clusters=args.n_clusters,
|
|
147
|
+
skip_snippets=skip_snippets,
|
|
148
|
+
)
|
|
149
|
+
)
|
|
117
150
|
|
|
118
|
-
|
|
151
|
+
if args.csgrep:
|
|
152
|
+
if not check_csgrep():
|
|
153
|
+
LOG.error(
|
|
154
|
+
"You have requested use of `csgrep` when it isn't available on your system."
|
|
155
|
+
)
|
|
156
|
+
sys.exit(6)
|
|
157
|
+
extractors.append(
|
|
158
|
+
CSGrepExtractor(args.verbose > 1, skip_snippets=skip_snippets)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
LOG.info("Getting summary")
|
|
119
162
|
|
|
120
|
-
|
|
163
|
+
async with aiohttp.ClientSession() as http:
|
|
164
|
+
try:
|
|
165
|
+
log = await retrieve_log_content(http, args.file)
|
|
166
|
+
except ValueError as e:
|
|
167
|
+
# file does not exist
|
|
168
|
+
LOG.error(e)
|
|
169
|
+
sys.exit(4)
|
|
121
170
|
|
|
171
|
+
log_summary = mine_logs(log=log, extractors=extractors)
|
|
122
172
|
LOG.info("Analyzing the text")
|
|
123
173
|
|
|
124
174
|
log_summary = format_snippets(log_summary)
|
|
@@ -127,33 +177,52 @@ def main(): # pylint: disable=too-many-statements
|
|
|
127
177
|
stream = True
|
|
128
178
|
if args.no_stream:
|
|
129
179
|
stream = False
|
|
130
|
-
response = process_log(
|
|
180
|
+
response = process_log(
|
|
181
|
+
log_summary,
|
|
182
|
+
model,
|
|
183
|
+
stream,
|
|
184
|
+
prompt_templates=prompts_configuration,
|
|
185
|
+
temperature=args.temperature,
|
|
186
|
+
)
|
|
131
187
|
probs = []
|
|
132
188
|
print("Explanation:")
|
|
133
189
|
# We need to extract top token probability from the response
|
|
134
|
-
#
|
|
190
|
+
# CreateChatCompletionResponse structure of llama-cpp-python.
|
|
135
191
|
# `compute_certainty` function expects list of dictionaries with form
|
|
136
192
|
# { 'logprob': <float> } as expected from the OpenAI API.
|
|
137
193
|
|
|
138
194
|
if args.no_stream:
|
|
139
|
-
print(response["choices"][0]["
|
|
195
|
+
print(response["choices"][0]["message"]["content"])
|
|
140
196
|
probs = [
|
|
141
|
-
{"logprob": e} for e in response["choices"][0]["logprobs"]["
|
|
197
|
+
{"logprob": e["logprob"]} for e in response["choices"][0]["logprobs"]["content"]
|
|
142
198
|
]
|
|
143
199
|
|
|
144
200
|
else:
|
|
145
201
|
# Stream the output
|
|
146
202
|
for chunk in response:
|
|
203
|
+
# What might happen, is that first (or possibly any other) chunk may not contain
|
|
204
|
+
# fields choices[0].delta.content or choices[0].logprobs -> if so, we just skip it
|
|
205
|
+
if any([
|
|
206
|
+
'content' not in chunk["choices"][0]["delta"],
|
|
207
|
+
'logprobs' not in chunk["choices"][0]
|
|
208
|
+
]):
|
|
209
|
+
continue
|
|
210
|
+
|
|
147
211
|
if isinstance(chunk["choices"][0]["logprobs"], dict):
|
|
148
212
|
probs.append(
|
|
149
|
-
{"logprob": chunk["choices"][0]["logprobs"]["
|
|
213
|
+
{"logprob": chunk["choices"][0]["logprobs"]["content"][0]["logprob"]}
|
|
150
214
|
)
|
|
151
|
-
delta = chunk["choices"][0]["
|
|
215
|
+
delta = chunk["choices"][0]["delta"]["content"]
|
|
152
216
|
print(delta, end="", flush=True)
|
|
153
217
|
certainty = compute_certainty(probs)
|
|
154
218
|
|
|
155
219
|
print(f"\nResponse certainty: {certainty:.2f}%\n")
|
|
156
220
|
|
|
157
221
|
|
|
222
|
+
def main():
|
|
223
|
+
"""Evaluate logdetective program and wait for it to finish"""
|
|
224
|
+
asyncio.run(run())
|
|
225
|
+
|
|
226
|
+
|
|
158
227
|
if __name__ == "__main__":
|
|
159
228
|
main()
|
logdetective/models.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from pydantic import BaseModel, model_validator
|
|
4
|
+
|
|
5
|
+
from logdetective.constants import (
|
|
6
|
+
PROMPT_TEMPLATE,
|
|
7
|
+
PROMPT_TEMPLATE_STAGED,
|
|
8
|
+
SNIPPET_PROMPT_TEMPLATE,
|
|
9
|
+
DEFAULT_SYSTEM_PROMPT,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PromptConfig(BaseModel):
|
|
14
|
+
"""Configuration for basic log detective prompts."""
|
|
15
|
+
|
|
16
|
+
prompt_template: str = PROMPT_TEMPLATE
|
|
17
|
+
snippet_prompt_template: str = SNIPPET_PROMPT_TEMPLATE
|
|
18
|
+
prompt_template_staged: str = PROMPT_TEMPLATE_STAGED
|
|
19
|
+
|
|
20
|
+
default_system_prompt: str = DEFAULT_SYSTEM_PROMPT
|
|
21
|
+
snippet_system_prompt: str = DEFAULT_SYSTEM_PROMPT
|
|
22
|
+
staged_system_prompt: str = DEFAULT_SYSTEM_PROMPT
|
|
23
|
+
|
|
24
|
+
def __init__(self, data: Optional[dict] = None):
|
|
25
|
+
super().__init__()
|
|
26
|
+
if data is None:
|
|
27
|
+
return
|
|
28
|
+
self.prompt_template = data.get("prompt_template", PROMPT_TEMPLATE)
|
|
29
|
+
self.snippet_prompt_template = data.get(
|
|
30
|
+
"snippet_prompt_template", SNIPPET_PROMPT_TEMPLATE
|
|
31
|
+
)
|
|
32
|
+
self.prompt_template_staged = data.get(
|
|
33
|
+
"prompt_template_staged", PROMPT_TEMPLATE_STAGED
|
|
34
|
+
)
|
|
35
|
+
self.default_system_prompt = data.get(
|
|
36
|
+
"default_system_prompt", DEFAULT_SYSTEM_PROMPT
|
|
37
|
+
)
|
|
38
|
+
self.snippet_system_prompt = data.get(
|
|
39
|
+
"snippet_system_prompt", DEFAULT_SYSTEM_PROMPT
|
|
40
|
+
)
|
|
41
|
+
self.staged_system_prompt = data.get(
|
|
42
|
+
"staged_system_prompt", DEFAULT_SYSTEM_PROMPT
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SkipSnippets(BaseModel):
|
|
47
|
+
"""Regular expressions defining snippets we should not analyze"""
|
|
48
|
+
|
|
49
|
+
snippet_patterns: dict[str, re.Pattern] = {}
|
|
50
|
+
|
|
51
|
+
def __init__(self, data: Optional[dict] = None):
|
|
52
|
+
super().__init__(data=data)
|
|
53
|
+
if data is None:
|
|
54
|
+
return
|
|
55
|
+
self.snippet_patterns = {
|
|
56
|
+
key: re.compile(pattern) for key, pattern in data.items()
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
@model_validator(mode="before")
|
|
60
|
+
@classmethod
|
|
61
|
+
def check_patterns(cls, data: dict):
|
|
62
|
+
"""Check if all supplied patterns are valid regular expressions.
|
|
63
|
+
Techically replicating what is done in __init__ but with nicer error message."""
|
|
64
|
+
patterns = data["data"]
|
|
65
|
+
for key, pattern in patterns.items():
|
|
66
|
+
try:
|
|
67
|
+
re.compile(pattern=pattern)
|
|
68
|
+
except (TypeError, re.error) as ex:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"Invalid pattern `{pattern}` with name `{key}` supplied for skipping in logs."
|
|
71
|
+
) from ex
|
|
72
|
+
|
|
73
|
+
return data
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class CSGrepEvent(BaseModel):
|
|
77
|
+
"""`csgrep` splits error and warning messages into individual events."""
|
|
78
|
+
|
|
79
|
+
file_name: str
|
|
80
|
+
line: int
|
|
81
|
+
event: str
|
|
82
|
+
message: str
|
|
83
|
+
verbosity_level: int
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class CSGrepDefect(BaseModel):
|
|
87
|
+
"""Defects detected by `csgrep`"""
|
|
88
|
+
|
|
89
|
+
checker: str
|
|
90
|
+
language: str
|
|
91
|
+
tool: str
|
|
92
|
+
key_event_idx: int
|
|
93
|
+
events: list[CSGrepEvent]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class CSGrepOutput(BaseModel):
|
|
97
|
+
"""Parsed output of `gsgrep`"""
|
|
98
|
+
|
|
99
|
+
defects: list[CSGrepDefect]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# This file is intended for customization of prompts
|
|
2
|
+
# It is used only in server mode.
|
|
3
|
+
# On command line you have to load it using --prompts
|
|
4
|
+
# The defaults are stored in constants.py
|
|
5
|
+
|
|
6
|
+
prompt_template: |
|
|
7
|
+
Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
|
|
8
|
+
|
|
9
|
+
Please start with concise, one sentence long, summary describing the problem and recommend solution to fix it. And then follow with analysis.
|
|
10
|
+
|
|
11
|
+
Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
|
|
12
|
+
Snippets themselves must not be altered in any way whatsoever.
|
|
13
|
+
|
|
14
|
+
Snippets are delimited with '================'.
|
|
15
|
+
|
|
16
|
+
Explanation of the issue, and recommended solution, should take handful of sentences.
|
|
17
|
+
|
|
18
|
+
Snippets:
|
|
19
|
+
|
|
20
|
+
{}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# This file is intended for customization of prompts
|
|
2
|
+
# It is used only in server mode.
|
|
3
|
+
# On command line you have to load it using --prompts
|
|
4
|
+
# The defaults are stored in constants.py
|
|
5
|
+
|
|
6
|
+
prompt_template: |
|
|
7
|
+
Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
|
|
8
|
+
|
|
9
|
+
Provide concise, one paragraph long, summary describing the problem of most probable culprit and recommend solution to fix it.
|
|
10
|
+
|
|
11
|
+
Snippets:
|
|
12
|
+
|
|
13
|
+
{}
|