logdetective 0.2.9__tar.gz → 0.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {logdetective-0.2.9 → logdetective-0.2.11}/PKG-INFO +3 -3
- {logdetective-0.2.9 → logdetective-0.2.11}/logdetective/extractors.py +1 -1
- {logdetective-0.2.9 → logdetective-0.2.11}/logdetective/logdetective.py +1 -0
- {logdetective-0.2.9 → logdetective-0.2.11}/logdetective/server.py +24 -21
- {logdetective-0.2.9 → logdetective-0.2.11}/logdetective/utils.py +13 -3
- {logdetective-0.2.9 → logdetective-0.2.11}/pyproject.toml +4 -2
- {logdetective-0.2.9 → logdetective-0.2.11}/LICENSE +0 -0
- {logdetective-0.2.9 → logdetective-0.2.11}/README.md +0 -0
- {logdetective-0.2.9 → logdetective-0.2.11}/logdetective/__init__.py +0 -0
- {logdetective-0.2.9 → logdetective-0.2.11}/logdetective/constants.py +0 -0
- {logdetective-0.2.9 → logdetective-0.2.11}/logdetective/drain3.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: logdetective
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.11
|
|
4
4
|
Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Jiri Podivin
|
|
@@ -22,7 +22,7 @@ Provides-Extra: server
|
|
|
22
22
|
Requires-Dist: drain3 (>=0.9.11,<0.10.0)
|
|
23
23
|
Requires-Dist: huggingface-hub (>0.23.2)
|
|
24
24
|
Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
|
|
25
|
-
Requires-Dist: numpy (>=1.26.0
|
|
25
|
+
Requires-Dist: numpy (>=1.26.0)
|
|
26
26
|
Requires-Dist: requests (>0.2.31)
|
|
27
27
|
Project-URL: homepage, https://github.com/fedora-copr/logdetective
|
|
28
28
|
Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
|
|
@@ -16,7 +16,7 @@ class LLMExtractor:
|
|
|
16
16
|
A class that extracts relevant information from logs using a language model.
|
|
17
17
|
"""
|
|
18
18
|
def __init__(self, model: Llama, n_lines: int = 2):
|
|
19
|
-
self.model =
|
|
19
|
+
self.model = model
|
|
20
20
|
self.n_lines = n_lines
|
|
21
21
|
self.grammar = LlamaGrammar.from_string(
|
|
22
22
|
"root ::= (\"Yes\" | \"No\")", verbose=False)
|
|
@@ -16,6 +16,7 @@ from logdetective.constants import (
|
|
|
16
16
|
from logdetective.extractors import DrainExtractor
|
|
17
17
|
from logdetective.utils import validate_url, compute_certainty
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
class BuildLog(BaseModel):
|
|
20
21
|
"""Model of data submitted to API.
|
|
21
22
|
"""
|
|
@@ -29,7 +30,7 @@ class Response(BaseModel):
|
|
|
29
30
|
https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
|
|
30
31
|
response_certainty: float
|
|
31
32
|
"""
|
|
32
|
-
explanation:
|
|
33
|
+
explanation: Dict
|
|
33
34
|
response_certainty: float
|
|
34
35
|
|
|
35
36
|
|
|
@@ -43,10 +44,10 @@ class StagedResponse(Response):
|
|
|
43
44
|
snippets:
|
|
44
45
|
list of dictionaries { 'snippet' : '<original_text>, 'comment': CreateCompletionResponse }
|
|
45
46
|
"""
|
|
46
|
-
snippets: List[Dict[str, str |
|
|
47
|
+
snippets: List[Dict[str, str | Dict]]
|
|
47
48
|
|
|
48
|
-
LOG = logging.getLogger("logdetective")
|
|
49
49
|
|
|
50
|
+
LOG = logging.getLogger("logdetective")
|
|
50
51
|
|
|
51
52
|
LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
|
|
52
53
|
LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
|
|
@@ -55,6 +56,7 @@ LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
|
|
|
55
56
|
LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
|
|
56
57
|
API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
|
|
57
58
|
|
|
59
|
+
|
|
58
60
|
def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
|
|
59
61
|
"""
|
|
60
62
|
FastAPI Depend function that expects a header named Authentication
|
|
@@ -82,6 +84,7 @@ def requires_token_when_set(authentication: Annotated[str | None, Header()] = No
|
|
|
82
84
|
API_TOKEN, token)
|
|
83
85
|
raise HTTPException(status_code=401, detail=f"Token {token} not valid.")
|
|
84
86
|
|
|
87
|
+
|
|
85
88
|
app = FastAPI(dependencies=[Depends(requires_token_when_set)])
|
|
86
89
|
|
|
87
90
|
|
|
@@ -99,7 +102,7 @@ def process_url(url: str) -> str:
|
|
|
99
102
|
if not log_request.ok:
|
|
100
103
|
raise HTTPException(status_code=400,
|
|
101
104
|
detail="Something went wrong while getting the logs: "
|
|
102
|
-
|
|
105
|
+
f"[{log_request.status_code}] {log_request.text}")
|
|
103
106
|
else:
|
|
104
107
|
LOG.error("Invalid URL received ")
|
|
105
108
|
raise HTTPException(status_code=400,
|
|
@@ -111,7 +114,7 @@ def process_url(url: str) -> str:
|
|
|
111
114
|
def mine_logs(log: str) -> List[str]:
|
|
112
115
|
"""Extract snippets from log text
|
|
113
116
|
"""
|
|
114
|
-
extractor = DrainExtractor(verbose=True, context=True, max_clusters=
|
|
117
|
+
extractor = DrainExtractor(verbose=True, context=True, max_clusters=16)
|
|
115
118
|
|
|
116
119
|
LOG.info("Getting summary")
|
|
117
120
|
log_summary = extractor(log)
|
|
@@ -120,10 +123,10 @@ def mine_logs(log: str) -> List[str]:
|
|
|
120
123
|
LOG.debug("Log summary: \n %s", log_summary)
|
|
121
124
|
LOG.info("Compression ratio: %s", ratio)
|
|
122
125
|
|
|
123
|
-
|
|
124
126
|
return log_summary
|
|
125
127
|
|
|
126
|
-
|
|
128
|
+
|
|
129
|
+
async def submit_text(text: str, max_tokens: int = -1, log_probs: int = 1, stream: bool = False,
|
|
127
130
|
model: str = "default-model"):
|
|
128
131
|
"""Submit prompt to LLM.
|
|
129
132
|
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
@@ -131,17 +134,17 @@ async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream
|
|
|
131
134
|
"""
|
|
132
135
|
LOG.info("Analyzing the text")
|
|
133
136
|
data = {
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
137
|
+
"prompt": text,
|
|
138
|
+
"max_tokens": max_tokens,
|
|
139
|
+
"logprobs": log_probs,
|
|
140
|
+
"stream": stream,
|
|
141
|
+
"model": model}
|
|
139
142
|
|
|
140
143
|
try:
|
|
141
144
|
# Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
|
|
142
145
|
response = requests.post(
|
|
143
146
|
f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
|
|
144
|
-
headers={"Content-Type":"application/json"},
|
|
147
|
+
headers={"Content-Type": "application/json"},
|
|
145
148
|
data=json.dumps(data),
|
|
146
149
|
timeout=int(LLM_CPP_SERVER_TIMEOUT),
|
|
147
150
|
stream=stream)
|
|
@@ -154,7 +157,7 @@ async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream
|
|
|
154
157
|
raise HTTPException(
|
|
155
158
|
status_code=400,
|
|
156
159
|
detail="Something went wrong while getting a response from the llama server: "
|
|
157
|
-
|
|
160
|
+
f"[{response.status_code}] {response.text}")
|
|
158
161
|
try:
|
|
159
162
|
response = json.loads(response.text)
|
|
160
163
|
except UnicodeDecodeError as ex:
|
|
@@ -184,13 +187,13 @@ async def analyze_log(build_log: BuildLog):
|
|
|
184
187
|
if "logprobs" in response["choices"][0]:
|
|
185
188
|
try:
|
|
186
189
|
certainty = compute_certainty(
|
|
187
|
-
response["choices"][0]["logprobs"]["top_logprobs"])
|
|
190
|
+
response["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
|
|
188
191
|
except ValueError as ex:
|
|
189
192
|
LOG.error("Error encountered while computing certainty: %s", ex)
|
|
190
193
|
raise HTTPException(
|
|
191
194
|
status_code=400,
|
|
192
195
|
detail=f"Couldn't compute certainty with data:\n"
|
|
193
|
-
f"{response["choices"][0]["logprobs"]["top_logprobs"]}") from ex
|
|
196
|
+
f"{response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
|
|
194
197
|
|
|
195
198
|
return Response(explanation=response, response_certainty=certainty)
|
|
196
199
|
|
|
@@ -211,27 +214,27 @@ async def analyze_log_staged(build_log: BuildLog):
|
|
|
211
214
|
*[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s)) for s in log_summary])
|
|
212
215
|
|
|
213
216
|
analyzed_snippets = [
|
|
214
|
-
{"snippet":e[0], "comment":e[1]} for e in zip(log_summary, analyzed_snippets)]
|
|
217
|
+
{"snippet": e[0], "comment": e[1]} for e in zip(log_summary, analyzed_snippets)]
|
|
215
218
|
|
|
216
219
|
final_prompt = PROMPT_TEMPLATE_STAGED.format(
|
|
217
220
|
f"\n{SNIPPET_DELIMITER}\n".join([
|
|
218
221
|
f"[{e["snippet"]}] : [{e["comment"]["choices"][0]["text"]}]"
|
|
219
|
-
|
|
222
|
+
for e in analyzed_snippets]))
|
|
220
223
|
|
|
221
224
|
final_analysis = await submit_text(final_prompt)
|
|
222
|
-
|
|
225
|
+
print(final_analysis)
|
|
223
226
|
certainty = 0
|
|
224
227
|
|
|
225
228
|
if "logprobs" in final_analysis["choices"][0]:
|
|
226
229
|
try:
|
|
227
230
|
certainty = compute_certainty(
|
|
228
|
-
final_analysis["choices"][0]["logprobs"]["top_logprobs"])
|
|
231
|
+
final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
|
|
229
232
|
except ValueError as ex:
|
|
230
233
|
LOG.error("Error encountered while computing certainty: %s", ex)
|
|
231
234
|
raise HTTPException(
|
|
232
235
|
status_code=400,
|
|
233
236
|
detail=f"Couldn't compute certainty with data:\n"
|
|
234
|
-
f"{final_analysis["choices"][0]["logprobs"]["top_logprobs"]}") from ex
|
|
237
|
+
f"{final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
|
|
235
238
|
|
|
236
239
|
return StagedResponse(
|
|
237
240
|
explanation=final_analysis, snippets=analyzed_snippets, response_certainty=certainty)
|
|
@@ -15,10 +15,17 @@ LOG = logging.getLogger("logdetective")
|
|
|
15
15
|
def chunk_continues(text: str, index: int) -> bool:
|
|
16
16
|
"""Set of heuristics for determining whether or not
|
|
17
17
|
does the current chunk of log text continue on next line.
|
|
18
|
+
|
|
19
|
+
Following rules are checked, in order:
|
|
20
|
+
* is the next character is whitespace
|
|
21
|
+
* is the previous character backslash '\\'
|
|
22
|
+
* is the previous character colon ':'
|
|
23
|
+
|
|
18
24
|
"""
|
|
19
25
|
conditionals = [
|
|
20
26
|
lambda i, string: string[i + 1].isspace(),
|
|
21
|
-
lambda i, string: string[i - 1] == "\\"
|
|
27
|
+
lambda i, string: string[i - 1] == "\\",
|
|
28
|
+
lambda i, string: string[i - 1] == ":"
|
|
22
29
|
]
|
|
23
30
|
|
|
24
31
|
for c in conditionals:
|
|
@@ -74,16 +81,19 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
|
|
|
74
81
|
return model
|
|
75
82
|
|
|
76
83
|
|
|
77
|
-
def compute_certainty(probs: List[Dict
|
|
84
|
+
def compute_certainty(probs: List[Dict]) -> float:
|
|
78
85
|
"""Compute certainty of repsponse based on average logit probability.
|
|
79
86
|
Log probability is log(p), isn't really readable for most people, especially in compound.
|
|
80
87
|
In this case it's just a matter of applying inverse operation exp.
|
|
81
88
|
Of course that leaves you with a value in range <0, 1> so it needs to be multiplied by 100.
|
|
82
89
|
Simply put, this is the most straightforward way to get the numbers out.
|
|
90
|
+
|
|
91
|
+
This function is used in the server codebase.
|
|
83
92
|
"""
|
|
84
93
|
|
|
85
94
|
top_logprobs = [
|
|
86
|
-
np.exp(
|
|
95
|
+
np.exp(e["logprob"]) * 100 for e in probs]
|
|
96
|
+
|
|
87
97
|
certainty = np.median(top_logprobs, axis=0)
|
|
88
98
|
if np.isnan(certainty):
|
|
89
99
|
raise ValueError("NaN certainty of answer")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "logdetective"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.11"
|
|
4
4
|
description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
|
|
5
5
|
authors = ["Jiri Podivin <jpodivin@gmail.com>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -31,7 +31,9 @@ requests = ">0.2.31"
|
|
|
31
31
|
llama-cpp-python = ">0.2.56,!=0.2.86"
|
|
32
32
|
drain3 = "^0.9.11"
|
|
33
33
|
huggingface-hub = ">0.23.2"
|
|
34
|
-
numpy
|
|
34
|
+
# rawhide has numpy 2, F40 and F41 are still on 1.26
|
|
35
|
+
# we need to support both versions
|
|
36
|
+
numpy = ">=1.26.0"
|
|
35
37
|
|
|
36
38
|
[build-system]
|
|
37
39
|
requires = ["poetry-core"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|