logdetective 0.2.10__tar.gz → 0.2.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {logdetective-0.2.10 → logdetective-0.2.12}/PKG-INFO +1 -1
- {logdetective-0.2.10 → logdetective-0.2.12}/logdetective/logdetective.py +8 -2
- {logdetective-0.2.10 → logdetective-0.2.12}/logdetective/server.py +14 -13
- {logdetective-0.2.10 → logdetective-0.2.12}/logdetective/utils.py +11 -3
- {logdetective-0.2.10 → logdetective-0.2.12}/pyproject.toml +1 -1
- {logdetective-0.2.10 → logdetective-0.2.12}/LICENSE +0 -0
- {logdetective-0.2.10 → logdetective-0.2.12}/README.md +0 -0
- {logdetective-0.2.10 → logdetective-0.2.12}/logdetective/__init__.py +0 -0
- {logdetective-0.2.10 → logdetective-0.2.12}/logdetective/constants.py +0 -0
- {logdetective-0.2.10 → logdetective-0.2.12}/logdetective/drain3.ini +0 -0
- {logdetective-0.2.10 → logdetective-0.2.12}/logdetective/extractors.py +0 -0
|
@@ -96,14 +96,20 @@ def main():
|
|
|
96
96
|
response = process_log(log_summary, model, stream)
|
|
97
97
|
probs = []
|
|
98
98
|
print("Explanation:")
|
|
99
|
+
# We need to extract top token probability from the response
|
|
100
|
+
# CreateCompletionResponse structure of llama-cpp-python.
|
|
101
|
+
# `compute_certainty` function expects list of dictionaries with form
|
|
102
|
+
# { 'logprob': <float> } as expected from the OpenAI API.
|
|
103
|
+
|
|
99
104
|
if args.no_stream:
|
|
100
105
|
print(response["choices"][0]["text"])
|
|
101
|
-
probs = response[
|
|
106
|
+
probs = [{'logprob': e} for e in response['choices'][0]['logprobs']['token_logprobs']]
|
|
107
|
+
|
|
102
108
|
else:
|
|
103
109
|
# Stream the output
|
|
104
110
|
for chunk in response:
|
|
105
111
|
if isinstance(chunk["choices"][0]["logprobs"], dict):
|
|
106
|
-
probs.
|
|
112
|
+
probs.append({'logprob': chunk["choices"][0]["logprobs"]['token_logprobs'][0]})
|
|
107
113
|
delta = chunk['choices'][0]['text']
|
|
108
114
|
print(delta, end='', flush=True)
|
|
109
115
|
certainty = compute_certainty(probs)
|
|
@@ -30,7 +30,7 @@ class Response(BaseModel):
|
|
|
30
30
|
https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
|
|
31
31
|
response_certainty: float
|
|
32
32
|
"""
|
|
33
|
-
explanation:
|
|
33
|
+
explanation: Dict
|
|
34
34
|
response_certainty: float
|
|
35
35
|
|
|
36
36
|
|
|
@@ -44,7 +44,8 @@ class StagedResponse(Response):
|
|
|
44
44
|
snippets:
|
|
45
45
|
list of dictionaries { 'snippet' : '<original_text>, 'comment': CreateCompletionResponse }
|
|
46
46
|
"""
|
|
47
|
-
snippets: List[Dict[str, str |
|
|
47
|
+
snippets: List[Dict[str, str | Dict]]
|
|
48
|
+
|
|
48
49
|
|
|
49
50
|
LOG = logging.getLogger("logdetective")
|
|
50
51
|
|
|
@@ -113,7 +114,7 @@ def process_url(url: str) -> str:
|
|
|
113
114
|
def mine_logs(log: str) -> List[str]:
|
|
114
115
|
"""Extract snippets from log text
|
|
115
116
|
"""
|
|
116
|
-
extractor = DrainExtractor(verbose=True, context=True, max_clusters=
|
|
117
|
+
extractor = DrainExtractor(verbose=True, context=True, max_clusters=16)
|
|
117
118
|
|
|
118
119
|
LOG.info("Getting summary")
|
|
119
120
|
log_summary = extractor(log)
|
|
@@ -125,7 +126,7 @@ def mine_logs(log: str) -> List[str]:
|
|
|
125
126
|
return log_summary
|
|
126
127
|
|
|
127
128
|
|
|
128
|
-
async def submit_text(text: str, max_tokens: int =
|
|
129
|
+
async def submit_text(text: str, max_tokens: int = -1, log_probs: int = 1, stream: bool = False,
|
|
129
130
|
model: str = "default-model"):
|
|
130
131
|
"""Submit prompt to LLM.
|
|
131
132
|
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
@@ -134,8 +135,8 @@ async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream
|
|
|
134
135
|
LOG.info("Analyzing the text")
|
|
135
136
|
data = {
|
|
136
137
|
"prompt": text,
|
|
137
|
-
"max_tokens":
|
|
138
|
-
"logprobs":
|
|
138
|
+
"max_tokens": max_tokens,
|
|
139
|
+
"logprobs": log_probs,
|
|
139
140
|
"stream": stream,
|
|
140
141
|
"model": model}
|
|
141
142
|
|
|
@@ -186,13 +187,13 @@ async def analyze_log(build_log: BuildLog):
|
|
|
186
187
|
if "logprobs" in response["choices"][0]:
|
|
187
188
|
try:
|
|
188
189
|
certainty = compute_certainty(
|
|
189
|
-
response["choices"][0]["logprobs"]["top_logprobs"])
|
|
190
|
+
response["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
|
|
190
191
|
except ValueError as ex:
|
|
191
192
|
LOG.error("Error encountered while computing certainty: %s", ex)
|
|
192
193
|
raise HTTPException(
|
|
193
194
|
status_code=400,
|
|
194
195
|
detail=f"Couldn't compute certainty with data:\n"
|
|
195
|
-
f"{response["choices"][0]["logprobs"]["top_logprobs"]}") from ex
|
|
196
|
+
f"{response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
|
|
196
197
|
|
|
197
198
|
return Response(explanation=response, response_certainty=certainty)
|
|
198
199
|
|
|
@@ -213,27 +214,27 @@ async def analyze_log_staged(build_log: BuildLog):
|
|
|
213
214
|
*[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s)) for s in log_summary])
|
|
214
215
|
|
|
215
216
|
analyzed_snippets = [
|
|
216
|
-
{"snippet":e[0], "comment":e[1]} for e in zip(log_summary, analyzed_snippets)]
|
|
217
|
+
{"snippet": e[0], "comment": e[1]} for e in zip(log_summary, analyzed_snippets)]
|
|
217
218
|
|
|
218
219
|
final_prompt = PROMPT_TEMPLATE_STAGED.format(
|
|
219
220
|
f"\n{SNIPPET_DELIMITER}\n".join([
|
|
220
221
|
f"[{e["snippet"]}] : [{e["comment"]["choices"][0]["text"]}]"
|
|
221
|
-
|
|
222
|
+
for e in analyzed_snippets]))
|
|
222
223
|
|
|
223
224
|
final_analysis = await submit_text(final_prompt)
|
|
224
|
-
|
|
225
|
+
print(final_analysis)
|
|
225
226
|
certainty = 0
|
|
226
227
|
|
|
227
228
|
if "logprobs" in final_analysis["choices"][0]:
|
|
228
229
|
try:
|
|
229
230
|
certainty = compute_certainty(
|
|
230
|
-
final_analysis["choices"][0]["logprobs"]["top_logprobs"])
|
|
231
|
+
final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
|
|
231
232
|
except ValueError as ex:
|
|
232
233
|
LOG.error("Error encountered while computing certainty: %s", ex)
|
|
233
234
|
raise HTTPException(
|
|
234
235
|
status_code=400,
|
|
235
236
|
detail=f"Couldn't compute certainty with data:\n"
|
|
236
|
-
f"{final_analysis["choices"][0]["logprobs"]["top_logprobs"]}") from ex
|
|
237
|
+
f"{final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
|
|
237
238
|
|
|
238
239
|
return StagedResponse(
|
|
239
240
|
explanation=final_analysis, snippets=analyzed_snippets, response_certainty=certainty)
|
|
@@ -15,10 +15,17 @@ LOG = logging.getLogger("logdetective")
|
|
|
15
15
|
def chunk_continues(text: str, index: int) -> bool:
|
|
16
16
|
"""Set of heuristics for determining whether or not
|
|
17
17
|
does the current chunk of log text continue on next line.
|
|
18
|
+
|
|
19
|
+
Following rules are checked, in order:
|
|
20
|
+
* is the next character is whitespace
|
|
21
|
+
* is the previous character backslash '\\'
|
|
22
|
+
* is the previous character colon ':'
|
|
23
|
+
|
|
18
24
|
"""
|
|
19
25
|
conditionals = [
|
|
20
26
|
lambda i, string: string[i + 1].isspace(),
|
|
21
|
-
lambda i, string: string[i - 1] == "\\"
|
|
27
|
+
lambda i, string: string[i - 1] == "\\",
|
|
28
|
+
lambda i, string: string[i - 1] == ":"
|
|
22
29
|
]
|
|
23
30
|
|
|
24
31
|
for c in conditionals:
|
|
@@ -74,7 +81,7 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
|
|
|
74
81
|
return model
|
|
75
82
|
|
|
76
83
|
|
|
77
|
-
def compute_certainty(probs: List[Dict
|
|
84
|
+
def compute_certainty(probs: List[Dict]) -> float:
|
|
78
85
|
"""Compute certainty of repsponse based on average logit probability.
|
|
79
86
|
Log probability is log(p), isn't really readable for most people, especially in compound.
|
|
80
87
|
In this case it's just a matter of applying inverse operation exp.
|
|
@@ -85,7 +92,8 @@ def compute_certainty(probs: List[Dict[str, float] | None]) -> float:
|
|
|
85
92
|
"""
|
|
86
93
|
|
|
87
94
|
top_logprobs = [
|
|
88
|
-
np.exp(
|
|
95
|
+
np.exp(e["logprob"]) * 100 for e in probs]
|
|
96
|
+
|
|
89
97
|
certainty = np.median(top_logprobs, axis=0)
|
|
90
98
|
if np.isnan(certainty):
|
|
91
99
|
raise ValueError("NaN certainty of answer")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|