logdetective 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
logdetective/constants.py CHANGED
@@ -30,3 +30,16 @@ Log:
30
30
  Answer:
31
31
 
32
32
  """
33
+
34
+ SNIPPET_PROMPT_TEMPLATE = """
35
+ Analyse following RPM build log snippet.
36
+ Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
37
+ Snippets themselves must not be altered in any way whatsoever.
38
+
39
+ Snippet:
40
+
41
+ {}
42
+
43
+ Analysis:
44
+
45
+ """
@@ -3,7 +3,8 @@ import logging
3
3
  import sys
4
4
 
5
5
  from logdetective.constants import DEFAULT_ADVISOR
6
- from logdetective.utils import process_log, initialize_model, retrieve_log_content, format_snippets
6
+ from logdetective.utils import (
7
+ process_log, initialize_model, retrieve_log_content, format_snippets, compute_certainty)
7
8
  from logdetective.extractors import LLMExtractor, DrainExtractor
8
9
 
9
10
  LOG = logging.getLogger("logdetective")
@@ -92,15 +93,21 @@ def main():
92
93
  if args.no_stream:
93
94
  stream = False
94
95
  response = process_log(log_summary, model, stream)
96
+ probs = []
95
97
  print("Explanation:")
96
98
  if args.no_stream:
97
99
  print(response["choices"][0]["text"])
100
+ probs = response["choices"][0]["logprobs"]["top_logprobs"]
98
101
  else:
99
102
  # Stream the output
100
103
  for chunk in response:
104
+ if isinstance(chunk["choices"][0]["logprobs"], dict):
105
+ probs.extend(chunk["choices"][0]["logprobs"]["top_logprobs"])
101
106
  delta = chunk['choices'][0]['text']
102
107
  print(delta, end='', flush=True)
103
- print()
108
+ certainty = compute_certainty(probs)
109
+
110
+ print(f"\nResponse certainty: {certainty:.2f}%\n")
104
111
 
105
112
 
106
113
  if __name__ == "__main__":
logdetective/server.py CHANGED
@@ -1,21 +1,47 @@
1
+ import json
1
2
  import logging
2
3
  import os
3
- import json
4
+ from typing import List
4
5
 
6
+ from llama_cpp import CreateCompletionResponse
5
7
  from fastapi import FastAPI, HTTPException
6
8
  from pydantic import BaseModel
7
9
 
8
10
  import requests
9
11
 
10
- from logdetective.constants import PROMPT_TEMPLATE
12
+ from logdetective.constants import PROMPT_TEMPLATE, SNIPPET_PROMPT_TEMPLATE
11
13
  from logdetective.extractors import DrainExtractor
12
- from logdetective.utils import validate_url
14
+ from logdetective.utils import validate_url, compute_certainty
13
15
 
14
16
  class BuildLog(BaseModel):
15
17
  """Model of data submitted to API.
16
18
  """
17
19
  url: str
18
20
 
21
+
22
+ class Response(BaseModel):
23
+ """Model of data returned by Log Detective API
24
+
25
+ explanation: CreateCompletionResponse
26
+ https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
27
+ response_certainty: float
28
+ """
29
+ explanation: CreateCompletionResponse
30
+ response_certainty: float
31
+
32
+
33
+ class StagedResponse(Response):
34
+ """Model of data returned by Log Detective API when called when staged response
35
+ is requested. Contains list of reponses to prompts for individual snippets.
36
+
37
+ explanation: CreateCompletionResponse
38
+ https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
39
+ response_certainty: float
40
+ snippets: list of CreateCompletionResponse
41
+ """
42
+ snippets: List[CreateCompletionResponse]
43
+
44
+
19
45
  LOG = logging.getLogger("logdetective")
20
46
 
21
47
  app = FastAPI()
@@ -26,21 +52,13 @@ LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
26
52
  LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
27
53
  LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
28
54
 
29
- @app.post("/analyze", )
30
- async def analyze_log(build_log: BuildLog):
31
- """Provide endpoint for log file submission and analysis.
32
- Request must be in form {"url":"<YOUR_URL_HERE>"}.
33
- URL must be valid for the request to be passed to the LLM server.
34
- Meaning that it must contain appropriate scheme, path and netloc,
35
- while lacking result, params or query fields.
36
- """
37
- extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
38
55
 
39
- LOG.info("Getting summary")
40
- # Perform basic validation of the URL
41
- if validate_url(url=build_log.url):
56
+ def process_url(url: str) -> str:
57
+ """Validate log URL and return log text.
58
+ """
59
+ if validate_url(url=url):
42
60
  try:
43
- log_request = requests.get(build_log.url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
61
+ log_request = requests.get(url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
44
62
  except requests.RequestException as ex:
45
63
  raise HTTPException(
46
64
  status_code=400,
@@ -53,19 +71,36 @@ async def analyze_log(build_log: BuildLog):
53
71
  else:
54
72
  LOG.error("Invalid URL received ")
55
73
  raise HTTPException(status_code=400,
56
- detail=f"Invalid log URL: {build_log.url}")
74
+ detail=f"Invalid log URL: {url}")
75
+
76
+ return log_request.text
77
+
78
+
79
+ def mine_logs(log: str) -> List[str]:
80
+ """Extract snippets from log text
81
+ """
82
+ extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
57
83
 
58
- log = log_request.text
84
+ LOG.info("Getting summary")
59
85
  log_summary = extractor(log)
60
86
 
61
87
  ratio = len(log_summary) / len(log.split('\n'))
62
88
  LOG.debug("Log summary: \n %s", log_summary)
63
89
  LOG.info("Compression ratio: %s", ratio)
64
90
 
91
+
92
+ return log_summary
93
+
94
+ def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1):
95
+ """Submit prompt to LLM.
96
+ max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
97
+ log_probs: number of token choices to produce log probs for
98
+ """
65
99
  LOG.info("Analyzing the text")
66
100
  data = {
67
- "prompt": PROMPT_TEMPLATE.format(log_summary),
68
- "max_tokens": "0"}
101
+ "prompt": text,
102
+ "max_tokens": str(max_tokens),
103
+ "logprobs": str(log_probs)}
69
104
 
70
105
  try:
71
106
  # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
@@ -79,9 +114,79 @@ async def analyze_log(build_log: BuildLog):
79
114
  status_code=400,
80
115
  detail=f"Llama-cpp query failed: {ex}") from ex
81
116
 
82
- if not log_request.ok:
117
+ if not response.ok:
83
118
  raise HTTPException(
84
119
  status_code=400,
85
120
  detail="Something went wrong while getting a response from the llama server: "
86
- f"[{log_request.status_code}] {log_request.text}")
87
- return response.text
121
+ f"[{response.status_code}] {response.text}")
122
+ try:
123
+ response = json.loads(response.text)
124
+ except UnicodeDecodeError as ex:
125
+ LOG.error("Error encountered while parsing llama server response: %s", ex)
126
+ raise HTTPException(
127
+ status_code=400,
128
+ detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}") from ex
129
+
130
+ return CreateCompletionResponse(response)
131
+
132
+
133
+ @app.post("/analyze", response_model=Response)
134
+ async def analyze_log(build_log: BuildLog):
135
+ """Provide endpoint for log file submission and analysis.
136
+ Request must be in form {"url":"<YOUR_URL_HERE>"}.
137
+ URL must be valid for the request to be passed to the LLM server.
138
+ Meaning that it must contain appropriate scheme, path and netloc,
139
+ while lacking result, params or query fields.
140
+ """
141
+ log_text = process_url(build_log.url)
142
+ log_summary = mine_logs(log_text)
143
+ response = submit_text(PROMPT_TEMPLATE.format(log_summary))
144
+
145
+ if "logprobs" in response["choices"][0]:
146
+ try:
147
+ certainty = compute_certainty(
148
+ response["choices"][0]["logprobs"]["top_logprobs"])
149
+ except ValueError as ex:
150
+ LOG.error("Error encountered while computing certainty: %s", ex)
151
+ raise HTTPException(
152
+ status_code=400,
153
+ detail=f"Couldn't compute certainty with data:\n"
154
+ f"{response["choices"][0]["logprobs"]["top_logprobs"]}") from ex
155
+
156
+ return Response(explanation=response, response_certainty=certainty)
157
+
158
+
159
+ @app.post("/analyze/staged", response_model=StagedResponse)
160
+ async def analyze_log_staged(build_log: BuildLog):
161
+ """Provide endpoint for log file submission and analysis.
162
+ Request must be in form {"url":"<YOUR_URL_HERE>"}.
163
+ URL must be valid for the request to be passed to the LLM server.
164
+ Meaning that it must contain appropriate scheme, path and netloc,
165
+ while lacking result, params or query fields.
166
+ """
167
+ log_text = process_url(build_log.url)
168
+ log_summary = mine_logs(log_text)
169
+
170
+ analyzed_snippets = []
171
+
172
+ for snippet in log_summary:
173
+ response = submit_text(SNIPPET_PROMPT_TEMPLATE.format(snippet))
174
+ analyzed_snippets.append(response)
175
+
176
+ final_analysis = submit_text(
177
+ PROMPT_TEMPLATE.format([e["choices"][0]["text"] for e in analyzed_snippets]))
178
+
179
+ certainty = 0
180
+ if "logprobs" in final_analysis["choices"][0]:
181
+ try:
182
+ certainty = compute_certainty(
183
+ final_analysis["choices"][0]["logprobs"]["top_logprobs"])
184
+ except ValueError as ex:
185
+ LOG.error("Error encountered while computing certainty: %s", ex)
186
+ raise HTTPException(
187
+ status_code=400,
188
+ detail=f"Couldn't compute certainty with data:\n"
189
+ f"{final_analysis["choices"][0]["logprobs"]["top_logprobs"]}") from ex
190
+
191
+ return StagedResponse(
192
+ explanation=final_analysis, snippets=analyzed_snippets, response_certainty=certainty)
logdetective/utils.py CHANGED
@@ -1,10 +1,11 @@
1
1
  import logging
2
2
  import os
3
+ from typing import Iterator, List, Dict
3
4
  from urllib.parse import urlparse
4
-
5
+ import numpy as np
5
6
  import requests
6
7
 
7
- from llama_cpp import Llama
8
+ from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
8
9
  from logdetective.constants import PROMPT_TEMPLATE
9
10
 
10
11
 
@@ -60,20 +61,38 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
60
61
  model = Llama(
61
62
  model_path=model_pth,
62
63
  n_ctx=0, # Maximum context for the model
63
- verbose=verbose)
64
+ verbose=verbose,
65
+ logits_all=True)
64
66
  else:
65
67
  model = Llama.from_pretrained(
66
68
  model_pth,
67
69
  f"*{filename_suffix}",
68
70
  n_ctx=0, # Maximum context for the model
69
- verbose=verbose)
71
+ verbose=verbose,
72
+ logits_all=True)
70
73
 
71
74
  return model
72
75
 
73
76
 
74
- def process_log(log: str, model: Llama, stream: bool) -> str:
77
+ def compute_certainty(probs: List[Dict[str, float] | None]) -> float:
78
+ """Compute certainty of repsponse based on average logit probability.
79
+ Log probability is log(p), isn't really readable for most people, especially in compound.
80
+ In this case it's just a matter of applying inverse operation exp.
81
+ Of course that leaves you with a value in range <0, 1> so it needs to be multiplied by 100.
82
+ Simply put, this is the most straightforward way to get the numbers out.
75
83
  """
76
- Processes a given log using the provided language model and returns its summary.
84
+
85
+ top_logprobs = [
86
+ np.exp(x) * 100 for e in probs if isinstance(e, dict) for x in e.values()]
87
+ certainty = np.median(top_logprobs, axis=0)
88
+ if np.isnan(certainty):
89
+ raise ValueError("NaN certainty of answer")
90
+ return certainty
91
+
92
+
93
+ def process_log(log: str, model: Llama, stream: bool) -> (
94
+ CreateCompletionResponse | Iterator[CreateCompletionStreamResponse]):
95
+ """Processes a given log using the provided language model and returns its summary.
77
96
 
78
97
  Args:
79
98
  log (str): The input log to be processed.
@@ -82,10 +101,13 @@ def process_log(log: str, model: Llama, stream: bool) -> str:
82
101
  Returns:
83
102
  str: The summary of the given log generated by the language model.
84
103
  """
85
- return model(
104
+ response = model(
86
105
  prompt=PROMPT_TEMPLATE.format(log),
87
106
  stream=stream,
88
- max_tokens=0)
107
+ max_tokens=0,
108
+ logprobs=1)
109
+
110
+ return response
89
111
 
90
112
 
91
113
  def retrieve_log_content(log_path: str) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: logdetective
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -21,6 +21,7 @@ Provides-Extra: server
21
21
  Requires-Dist: drain3 (>=0.9.11,<0.10.0)
22
22
  Requires-Dist: huggingface-hub (>0.23.2)
23
23
  Requires-Dist: llama-cpp-python (>=0.2.56,<0.3.0,!=0.2.86)
24
+ Requires-Dist: numpy (>=1.26.0,<2.0.0)
24
25
  Requires-Dist: requests (>=2.31.0,<3.0.0)
25
26
  Project-URL: homepage, https://github.com/fedora-copr/logdetective
26
27
  Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
@@ -38,7 +39,7 @@ A Python tool to analyze logs using a Language Model (LLM) and Drain template mi
38
39
  Installation
39
40
  ------------
40
41
 
41
- ** Fedora 40+ **
42
+ **Fedora 40+**
42
43
 
43
44
  dnf install logdetective
44
45
 
@@ -0,0 +1,12 @@
1
+ logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ logdetective/constants.py,sha256=1Ls2VJXb7NwSgi_HmTOA1c52K16SZIeDYBXlvBJ07zU,991
3
+ logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
4
+ logdetective/extractors.py,sha256=eRizRiKhC3MPTHXS5nlRKcEudEaqct7G28V1bZYGkqI,3103
5
+ logdetective/logdetective.py,sha256=f7ASCJg_Yt6VBFieXBYgQYdenfXjC60ZdLHhzQHideI,4372
6
+ logdetective/server.py,sha256=m0NPtk9tAUzyu9O8jIAfgEzynZ-WCHqVvCJkHOm08Ks,7073
7
+ logdetective/utils.py,sha256=nTbaDVEfbHVQPTZe58T04HHZ6JWUJ1PonRRnzGX8hY0,4794
8
+ logdetective-0.2.7.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
9
+ logdetective-0.2.7.dist-info/METADATA,sha256=3iqnKnVJy6aTaAqP77btyqSGqCpjT8_PQqpWaNwLKHg,9100
10
+ logdetective-0.2.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
11
+ logdetective-0.2.7.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
12
+ logdetective-0.2.7.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- logdetective/constants.py,sha256=2DlzXvqWgKca5fPXGPCxREYRfg0eHW0b8TATKoaqb54,711
3
- logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
4
- logdetective/extractors.py,sha256=eRizRiKhC3MPTHXS5nlRKcEudEaqct7G28V1bZYGkqI,3103
5
- logdetective/logdetective.py,sha256=ee7et1mKyI33HaqIr7dR-o7AX1rijwPANw5s6fkDb-Q,4039
6
- logdetective/server.py,sha256=jb7TuC5xzzlTR6cUrSaZa8vHPZZwH0ei30b5N0iNVX8,3176
7
- logdetective/utils.py,sha256=UT3st9rbFXS8m-d0-3W39ENdVLhIraSH_K6vlKlDZ5w,3759
8
- logdetective-0.2.6.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
9
- logdetective-0.2.6.dist-info/METADATA,sha256=h9NS3BpBPHo3cu93xcPNOrgiq2De8DJTPr2Hm5nZhns,9063
10
- logdetective-0.2.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
11
- logdetective-0.2.6.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
12
- logdetective-0.2.6.dist-info/RECORD,,