logdetective 0.2.9__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: logdetective
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -22,7 +22,7 @@ Provides-Extra: server
22
22
  Requires-Dist: drain3 (>=0.9.11,<0.10.0)
23
23
  Requires-Dist: huggingface-hub (>0.23.2)
24
24
  Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
25
- Requires-Dist: numpy (>=1.26.0,<2.0.0)
25
+ Requires-Dist: numpy (>=1.26.0)
26
26
  Requires-Dist: requests (>0.2.31)
27
27
  Project-URL: homepage, https://github.com/fedora-copr/logdetective
28
28
  Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
@@ -16,7 +16,7 @@ class LLMExtractor:
16
16
  A class that extracts relevant information from logs using a language model.
17
17
  """
18
18
  def __init__(self, model: Llama, n_lines: int = 2):
19
- self.model = model
19
+ self.model = model
20
20
  self.n_lines = n_lines
21
21
  self.grammar = LlamaGrammar.from_string(
22
22
  "root ::= (\"Yes\" | \"No\")", verbose=False)
@@ -9,6 +9,7 @@ from logdetective.extractors import LLMExtractor, DrainExtractor
9
9
 
10
10
  LOG = logging.getLogger("logdetective")
11
11
 
12
+
12
13
  def setup_args():
13
14
  """ Setup argument parser and return arguments. """
14
15
  parser = argparse.ArgumentParser("logdetective")
@@ -16,6 +16,7 @@ from logdetective.constants import (
16
16
  from logdetective.extractors import DrainExtractor
17
17
  from logdetective.utils import validate_url, compute_certainty
18
18
 
19
+
19
20
  class BuildLog(BaseModel):
20
21
  """Model of data submitted to API.
21
22
  """
@@ -29,7 +30,7 @@ class Response(BaseModel):
29
30
  https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
30
31
  response_certainty: float
31
32
  """
32
- explanation: CreateCompletionResponse
33
+ explanation: Dict
33
34
  response_certainty: float
34
35
 
35
36
 
@@ -43,10 +44,10 @@ class StagedResponse(Response):
43
44
  snippets:
44
45
  list of dictionaries { 'snippet' : '<original_text>, 'comment': CreateCompletionResponse }
45
46
  """
46
- snippets: List[Dict[str, str | CreateCompletionResponse]]
47
+ snippets: List[Dict[str, str | Dict]]
47
48
 
48
- LOG = logging.getLogger("logdetective")
49
49
 
50
+ LOG = logging.getLogger("logdetective")
50
51
 
51
52
  LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
52
53
  LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
@@ -55,6 +56,7 @@ LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
55
56
  LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
56
57
  API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
57
58
 
59
+
58
60
  def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
59
61
  """
60
62
  FastAPI Depend function that expects a header named Authentication
@@ -82,6 +84,7 @@ def requires_token_when_set(authentication: Annotated[str | None, Header()] = No
82
84
  API_TOKEN, token)
83
85
  raise HTTPException(status_code=401, detail=f"Token {token} not valid.")
84
86
 
87
+
85
88
  app = FastAPI(dependencies=[Depends(requires_token_when_set)])
86
89
 
87
90
 
@@ -99,7 +102,7 @@ def process_url(url: str) -> str:
99
102
  if not log_request.ok:
100
103
  raise HTTPException(status_code=400,
101
104
  detail="Something went wrong while getting the logs: "
102
- f"[{log_request.status_code}] {log_request.text}")
105
+ f"[{log_request.status_code}] {log_request.text}")
103
106
  else:
104
107
  LOG.error("Invalid URL received ")
105
108
  raise HTTPException(status_code=400,
@@ -111,7 +114,7 @@ def process_url(url: str) -> str:
111
114
  def mine_logs(log: str) -> List[str]:
112
115
  """Extract snippets from log text
113
116
  """
114
- extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
117
+ extractor = DrainExtractor(verbose=True, context=True, max_clusters=16)
115
118
 
116
119
  LOG.info("Getting summary")
117
120
  log_summary = extractor(log)
@@ -120,10 +123,10 @@ def mine_logs(log: str) -> List[str]:
120
123
  LOG.debug("Log summary: \n %s", log_summary)
121
124
  LOG.info("Compression ratio: %s", ratio)
122
125
 
123
-
124
126
  return log_summary
125
127
 
126
- async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream: bool = False,
128
+
129
+ async def submit_text(text: str, max_tokens: int = -1, log_probs: int = 1, stream: bool = False,
127
130
  model: str = "default-model"):
128
131
  """Submit prompt to LLM.
129
132
  max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
@@ -131,17 +134,17 @@ async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream
131
134
  """
132
135
  LOG.info("Analyzing the text")
133
136
  data = {
134
- "prompt": text,
135
- "max_tokens": str(max_tokens),
136
- "logprobs": str(log_probs),
137
- "stream": stream,
138
- "model": model}
137
+ "prompt": text,
138
+ "max_tokens": max_tokens,
139
+ "logprobs": log_probs,
140
+ "stream": stream,
141
+ "model": model}
139
142
 
140
143
  try:
141
144
  # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
142
145
  response = requests.post(
143
146
  f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
144
- headers={"Content-Type":"application/json"},
147
+ headers={"Content-Type": "application/json"},
145
148
  data=json.dumps(data),
146
149
  timeout=int(LLM_CPP_SERVER_TIMEOUT),
147
150
  stream=stream)
@@ -154,7 +157,7 @@ async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream
154
157
  raise HTTPException(
155
158
  status_code=400,
156
159
  detail="Something went wrong while getting a response from the llama server: "
157
- f"[{response.status_code}] {response.text}")
160
+ f"[{response.status_code}] {response.text}")
158
161
  try:
159
162
  response = json.loads(response.text)
160
163
  except UnicodeDecodeError as ex:
@@ -184,13 +187,13 @@ async def analyze_log(build_log: BuildLog):
184
187
  if "logprobs" in response["choices"][0]:
185
188
  try:
186
189
  certainty = compute_certainty(
187
- response["choices"][0]["logprobs"]["top_logprobs"])
190
+ response["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
188
191
  except ValueError as ex:
189
192
  LOG.error("Error encountered while computing certainty: %s", ex)
190
193
  raise HTTPException(
191
194
  status_code=400,
192
195
  detail=f"Couldn't compute certainty with data:\n"
193
- f"{response["choices"][0]["logprobs"]["top_logprobs"]}") from ex
196
+ f"{response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
194
197
 
195
198
  return Response(explanation=response, response_certainty=certainty)
196
199
 
@@ -211,27 +214,27 @@ async def analyze_log_staged(build_log: BuildLog):
211
214
  *[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s)) for s in log_summary])
212
215
 
213
216
  analyzed_snippets = [
214
- {"snippet":e[0], "comment":e[1]} for e in zip(log_summary, analyzed_snippets)]
217
+ {"snippet": e[0], "comment": e[1]} for e in zip(log_summary, analyzed_snippets)]
215
218
 
216
219
  final_prompt = PROMPT_TEMPLATE_STAGED.format(
217
220
  f"\n{SNIPPET_DELIMITER}\n".join([
218
221
  f"[{e["snippet"]}] : [{e["comment"]["choices"][0]["text"]}]"
219
- for e in analyzed_snippets]))
222
+ for e in analyzed_snippets]))
220
223
 
221
224
  final_analysis = await submit_text(final_prompt)
222
-
225
+ print(final_analysis)
223
226
  certainty = 0
224
227
 
225
228
  if "logprobs" in final_analysis["choices"][0]:
226
229
  try:
227
230
  certainty = compute_certainty(
228
- final_analysis["choices"][0]["logprobs"]["top_logprobs"])
231
+ final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
229
232
  except ValueError as ex:
230
233
  LOG.error("Error encountered while computing certainty: %s", ex)
231
234
  raise HTTPException(
232
235
  status_code=400,
233
236
  detail=f"Couldn't compute certainty with data:\n"
234
- f"{final_analysis["choices"][0]["logprobs"]["top_logprobs"]}") from ex
237
+ f"{final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
235
238
 
236
239
  return StagedResponse(
237
240
  explanation=final_analysis, snippets=analyzed_snippets, response_certainty=certainty)
@@ -15,10 +15,17 @@ LOG = logging.getLogger("logdetective")
15
15
  def chunk_continues(text: str, index: int) -> bool:
16
16
  """Set of heuristics for determining whether or not
17
17
  does the current chunk of log text continue on next line.
18
+
19
+ Following rules are checked, in order:
20
+ * is the next character is whitespace
21
+ * is the previous character backslash '\\'
22
+ * is the previous character colon ':'
23
+
18
24
  """
19
25
  conditionals = [
20
26
  lambda i, string: string[i + 1].isspace(),
21
- lambda i, string: string[i - 1] == "\\"
27
+ lambda i, string: string[i - 1] == "\\",
28
+ lambda i, string: string[i - 1] == ":"
22
29
  ]
23
30
 
24
31
  for c in conditionals:
@@ -74,16 +81,19 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
74
81
  return model
75
82
 
76
83
 
77
- def compute_certainty(probs: List[Dict[str, float] | None]) -> float:
84
+ def compute_certainty(probs: List[Dict]) -> float:
78
85
  """Compute certainty of repsponse based on average logit probability.
79
86
  Log probability is log(p), isn't really readable for most people, especially in compound.
80
87
  In this case it's just a matter of applying inverse operation exp.
81
88
  Of course that leaves you with a value in range <0, 1> so it needs to be multiplied by 100.
82
89
  Simply put, this is the most straightforward way to get the numbers out.
90
+
91
+ This function is used in the server codebase.
83
92
  """
84
93
 
85
94
  top_logprobs = [
86
- np.exp(x) * 100 for e in probs if isinstance(e, dict) for x in e.values()]
95
+ np.exp(e["logprob"]) * 100 for e in probs]
96
+
87
97
  certainty = np.median(top_logprobs, axis=0)
88
98
  if np.isnan(certainty):
89
99
  raise ValueError("NaN certainty of answer")
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "logdetective"
3
- version = "0.2.9"
3
+ version = "0.2.11"
4
4
  description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
5
5
  authors = ["Jiri Podivin <jpodivin@gmail.com>"]
6
6
  license = "Apache-2.0"
@@ -31,7 +31,9 @@ requests = ">0.2.31"
31
31
  llama-cpp-python = ">0.2.56,!=0.2.86"
32
32
  drain3 = "^0.9.11"
33
33
  huggingface-hub = ">0.23.2"
34
- numpy = "^1.26.0"
34
+ # rawhide has numpy 2, F40 and F41 are still on 1.26
35
+ # we need to support both versions
36
+ numpy = ">=1.26.0"
35
37
 
36
38
  [build-system]
37
39
  requires = ["poetry-core"]
File without changes
File without changes