logdetective 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,82 @@
1
+ from typing import List, Dict, Optional
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class BuildLog(BaseModel):
6
+ """Model of data submitted to API."""
7
+
8
+ url: str
9
+
10
+
11
+ class Response(BaseModel):
12
+ """Model of data returned by Log Detective API
13
+
14
+ explanation: CreateCompletionResponse
15
+ https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
16
+ response_certainty: float
17
+ """
18
+
19
+ explanation: Dict
20
+ response_certainty: float
21
+
22
+
23
+ class StagedResponse(Response):
24
+ """Model of data returned by Log Detective API when called when staged response
25
+ is requested. Contains list of reponses to prompts for individual snippets.
26
+
27
+ explanation: CreateCompletionResponse
28
+ https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
29
+ response_certainty: float
30
+ snippets:
31
+ list of dictionaries { 'snippet' : '<original_text>, 'comment': CreateCompletionResponse }
32
+ """
33
+
34
+ snippets: List[Dict[str, str | Dict]]
35
+
36
+
37
+ class InferenceConfig(BaseModel):
38
+ """Model for inference configuration of logdetective server."""
39
+
40
+ max_tokens: int = -1
41
+ log_probs: int = 1
42
+
43
+ def __init__(self, data: Optional[dict] = None):
44
+ super().__init__()
45
+ if data is None:
46
+ return
47
+
48
+ self.max_tokens = data.get("max_tokens", -1)
49
+ self.log_probs = data.get("log_probs", 1)
50
+
51
+
52
+ class ExtractorConfig(BaseModel):
53
+ """Model for extractor configuration of logdetective server."""
54
+
55
+ context: bool = True
56
+ max_clusters: int = 8
57
+ verbose: bool = False
58
+
59
+ def __init__(self, data: Optional[dict] = None):
60
+ super().__init__()
61
+ if data is None:
62
+ return
63
+
64
+ self.context = data.get("context", True)
65
+ self.max_clusters = data.get("max_clusters", 8)
66
+ self.verbose = data.get("verbose", False)
67
+
68
+
69
+ class Config(BaseModel):
70
+ """Model for configuration of logdetective server."""
71
+
72
+ inference: InferenceConfig = InferenceConfig()
73
+ extractor: ExtractorConfig = ExtractorConfig()
74
+
75
+ def __init__(self, data: Optional[dict] = None):
76
+ super().__init__()
77
+
78
+ if data is None:
79
+ return
80
+
81
+ self.inference = InferenceConfig(data.get("inference"))
82
+ self.extractor = ExtractorConfig(data.get("extractor"))
@@ -2,50 +2,23 @@ import asyncio
2
2
  import json
3
3
  import logging
4
4
  import os
5
- from typing import List, Annotated, Dict
5
+ from typing import List, Annotated
6
6
 
7
7
  from llama_cpp import CreateCompletionResponse
8
8
  from fastapi import FastAPI, HTTPException, Depends, Header
9
9
  from fastapi.responses import StreamingResponse
10
- from pydantic import BaseModel
11
10
  import requests
12
11
 
13
12
  from logdetective.constants import (
14
- PROMPT_TEMPLATE, SNIPPET_PROMPT_TEMPLATE,
15
- PROMPT_TEMPLATE_STAGED, SNIPPET_DELIMITER)
13
+ PROMPT_TEMPLATE,
14
+ SNIPPET_PROMPT_TEMPLATE,
15
+ PROMPT_TEMPLATE_STAGED,
16
+ SNIPPET_DELIMITER,
17
+ )
16
18
  from logdetective.extractors import DrainExtractor
17
19
  from logdetective.utils import validate_url, compute_certainty
18
-
19
-
20
- class BuildLog(BaseModel):
21
- """Model of data submitted to API.
22
- """
23
- url: str
24
-
25
-
26
- class Response(BaseModel):
27
- """Model of data returned by Log Detective API
28
-
29
- explanation: CreateCompletionResponse
30
- https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
31
- response_certainty: float
32
- """
33
- explanation: Dict
34
- response_certainty: float
35
-
36
-
37
- class StagedResponse(Response):
38
- """Model of data returned by Log Detective API when called when staged response
39
- is requested. Contains list of reponses to prompts for individual snippets.
40
-
41
- explanation: CreateCompletionResponse
42
- https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
43
- response_certainty: float
44
- snippets:
45
- list of dictionaries { 'snippet' : '<original_text>, 'comment': CreateCompletionResponse }
46
- """
47
- snippets: List[Dict[str, str | Dict]]
48
-
20
+ from logdetective.server.models import BuildLog, Response, StagedResponse
21
+ from logdetective.server.utils import load_server_config
49
22
 
50
23
  LOG = logging.getLogger("logdetective")
51
24
 
@@ -55,6 +28,10 @@ LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
55
28
  LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
56
29
  LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
57
30
  API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
31
+ SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
32
+ LLM_API_TOKEN = os.environ.get("LLM_API_TOKEN", None)
33
+
34
+ SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
58
35
 
59
36
 
60
37
  def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
@@ -75,13 +52,15 @@ def requires_token_when_set(authentication: Annotated[str | None, Header()] = No
75
52
  except (ValueError, IndexError):
76
53
  LOG.warning(
77
54
  "Authentication header has invalid structure (%s), it should be 'Bearer TOKEN'",
78
- authentication)
55
+ authentication,
56
+ )
79
57
  # eat the exception and raise 401 below
80
58
  token = None
81
59
  if token == API_TOKEN:
82
60
  return
83
- LOG.info("LOGDETECTIVE_TOKEN env var is set (%s), clien token = %s",
84
- API_TOKEN, token)
61
+ LOG.info(
62
+ "LOGDETECTIVE_TOKEN env var is set (%s), clien token = %s", API_TOKEN, token
63
+ )
85
64
  raise HTTPException(status_code=401, detail=f"Token {token} not valid.")
86
65
 
87
66
 
@@ -89,45 +68,51 @@ app = FastAPI(dependencies=[Depends(requires_token_when_set)])
89
68
 
90
69
 
91
70
  def process_url(url: str) -> str:
92
- """Validate log URL and return log text.
93
- """
71
+ """Validate log URL and return log text."""
94
72
  if validate_url(url=url):
95
73
  try:
96
74
  log_request = requests.get(url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
97
75
  except requests.RequestException as ex:
98
76
  raise HTTPException(
99
- status_code=400,
100
- detail=f"We couldn't obtain the logs: {ex}") from ex
77
+ status_code=400, detail=f"We couldn't obtain the logs: {ex}"
78
+ ) from ex
101
79
 
102
80
  if not log_request.ok:
103
- raise HTTPException(status_code=400,
104
- detail="Something went wrong while getting the logs: "
105
- f"[{log_request.status_code}] {log_request.text}")
81
+ raise HTTPException(
82
+ status_code=400,
83
+ detail="Something went wrong while getting the logs: "
84
+ f"[{log_request.status_code}] {log_request.text}",
85
+ )
106
86
  else:
107
87
  LOG.error("Invalid URL received ")
108
- raise HTTPException(status_code=400,
109
- detail=f"Invalid log URL: {url}")
88
+ raise HTTPException(status_code=400, detail=f"Invalid log URL: {url}")
110
89
 
111
90
  return log_request.text
112
91
 
113
92
 
114
93
  def mine_logs(log: str) -> List[str]:
115
- """Extract snippets from log text
116
- """
117
- extractor = DrainExtractor(verbose=True, context=True, max_clusters=16)
94
+ """Extract snippets from log text"""
95
+ extractor = DrainExtractor(
96
+ verbose=True, context=True, max_clusters=SERVER_CONFIG.extractor.max_clusters
97
+ )
118
98
 
119
99
  LOG.info("Getting summary")
120
100
  log_summary = extractor(log)
121
101
 
122
- ratio = len(log_summary) / len(log.split('\n'))
102
+ ratio = len(log_summary) / len(log.split("\n"))
123
103
  LOG.debug("Log summary: \n %s", log_summary)
124
104
  LOG.info("Compression ratio: %s", ratio)
125
105
 
126
106
  return log_summary
127
107
 
128
108
 
129
- async def submit_text(text: str, max_tokens: int = -1, log_probs: int = 1, stream: bool = False,
130
- model: str = "default-model"):
109
+ async def submit_text(
110
+ text: str,
111
+ max_tokens: int = -1,
112
+ log_probs: int = 1,
113
+ stream: bool = False,
114
+ model: str = "default-model",
115
+ ):
131
116
  """Submit prompt to LLM.
132
117
  max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
133
118
  log_probs: number of token choices to produce log probs for
@@ -138,33 +123,42 @@ async def submit_text(text: str, max_tokens: int = -1, log_probs: int = 1, strea
138
123
  "max_tokens": max_tokens,
139
124
  "logprobs": log_probs,
140
125
  "stream": stream,
141
- "model": model}
126
+ "model": model,
127
+ }
128
+
129
+ headers = {"Content-Type": "application/json"}
130
+
131
+ if LLM_API_TOKEN:
132
+ headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
142
133
 
143
134
  try:
144
135
  # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
145
136
  response = requests.post(
146
137
  f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
147
- headers={"Content-Type": "application/json"},
138
+ headers=headers,
148
139
  data=json.dumps(data),
149
140
  timeout=int(LLM_CPP_SERVER_TIMEOUT),
150
- stream=stream)
141
+ stream=stream,
142
+ )
151
143
  except requests.RequestException as ex:
152
144
  raise HTTPException(
153
- status_code=400,
154
- detail=f"Llama-cpp query failed: {ex}") from ex
145
+ status_code=400, detail=f"Llama-cpp query failed: {ex}"
146
+ ) from ex
155
147
  if not stream:
156
148
  if not response.ok:
157
149
  raise HTTPException(
158
150
  status_code=400,
159
151
  detail="Something went wrong while getting a response from the llama server: "
160
- f"[{response.status_code}] {response.text}")
152
+ f"[{response.status_code}] {response.text}",
153
+ )
161
154
  try:
162
155
  response = json.loads(response.text)
163
156
  except UnicodeDecodeError as ex:
164
157
  LOG.error("Error encountered while parsing llama server response: %s", ex)
165
158
  raise HTTPException(
166
159
  status_code=400,
167
- detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}") from ex
160
+ detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
161
+ ) from ex
168
162
  else:
169
163
  return response
170
164
 
@@ -187,13 +181,15 @@ async def analyze_log(build_log: BuildLog):
187
181
  if "logprobs" in response["choices"][0]:
188
182
  try:
189
183
  certainty = compute_certainty(
190
- response["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
184
+ response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]
185
+ )
191
186
  except ValueError as ex:
192
187
  LOG.error("Error encountered while computing certainty: %s", ex)
193
188
  raise HTTPException(
194
189
  status_code=400,
195
190
  detail=f"Couldn't compute certainty with data:\n"
196
- f"{response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
191
+ f"{response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}",
192
+ ) from ex
197
193
 
198
194
  return Response(explanation=response, response_certainty=certainty)
199
195
 
@@ -211,15 +207,21 @@ async def analyze_log_staged(build_log: BuildLog):
211
207
 
212
208
  # Process snippets asynchronously
213
209
  analyzed_snippets = await asyncio.gather(
214
- *[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s)) for s in log_summary])
210
+ *[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s)) for s in log_summary]
211
+ )
215
212
 
216
213
  analyzed_snippets = [
217
- {"snippet": e[0], "comment": e[1]} for e in zip(log_summary, analyzed_snippets)]
214
+ {"snippet": e[0], "comment": e[1]} for e in zip(log_summary, analyzed_snippets)
215
+ ]
218
216
 
219
217
  final_prompt = PROMPT_TEMPLATE_STAGED.format(
220
- f"\n{SNIPPET_DELIMITER}\n".join([
221
- f"[{e["snippet"]}] : [{e["comment"]["choices"][0]["text"]}]"
222
- for e in analyzed_snippets]))
218
+ f"\n{SNIPPET_DELIMITER}\n".join(
219
+ [
220
+ f"[{e["snippet"]}] : [{e["comment"]["choices"][0]["text"]}]"
221
+ for e in analyzed_snippets
222
+ ]
223
+ )
224
+ )
223
225
 
224
226
  final_analysis = await submit_text(final_prompt)
225
227
  print(final_analysis)
@@ -228,16 +230,21 @@ async def analyze_log_staged(build_log: BuildLog):
228
230
  if "logprobs" in final_analysis["choices"][0]:
229
231
  try:
230
232
  certainty = compute_certainty(
231
- final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
233
+ final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]
234
+ )
232
235
  except ValueError as ex:
233
236
  LOG.error("Error encountered while computing certainty: %s", ex)
234
237
  raise HTTPException(
235
238
  status_code=400,
236
239
  detail=f"Couldn't compute certainty with data:\n"
237
- f"{final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
240
+ f"{final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}",
241
+ ) from ex
238
242
 
239
243
  return StagedResponse(
240
- explanation=final_analysis, snippets=analyzed_snippets, response_certainty=certainty)
244
+ explanation=final_analysis,
245
+ snippets=analyzed_snippets,
246
+ response_certainty=certainty,
247
+ )
241
248
 
242
249
 
243
250
  @app.post("/analyze/stream", response_class=StreamingResponse)
@@ -0,0 +1,15 @@
1
+ import yaml
2
+ from logdetective.server.models import Config
3
+
4
+
5
+ def load_server_config(path: str | None) -> Config:
6
+ """Load configuration file for logdetective server.
7
+ If no path was provided, or if the file doesn't exist, return defaults.
8
+ """
9
+ if path is not None:
10
+ try:
11
+ with open(path, "r") as config_file:
12
+ return Config(yaml.safe_load(config_file))
13
+ except FileNotFoundError:
14
+ pass
15
+ return Config()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: logdetective
3
- Version: 0.2.12
3
+ Version: 0.2.14
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -18,7 +18,6 @@ Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Topic :: Internet :: Log Analysis
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Classifier: Topic :: Software Development :: Debuggers
21
- Provides-Extra: server
22
21
  Requires-Dist: drain3 (>=0.9.11,<0.10.0)
23
22
  Requires-Dist: huggingface-hub (>0.23.2)
24
23
  Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
@@ -0,0 +1,15 @@
1
+ logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ logdetective/constants.py,sha256=6XekuU7sbkY1Pmu4NJajgFbJ0no8PQ3DxQm8NeLKtjE,1383
3
+ logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
4
+ logdetective/extractors.py,sha256=xfan_dbGCrLH4cguJ2F6W6UkxXMz24Vob39r5-GsNV8,3102
5
+ logdetective/logdetective.py,sha256=03dDCZOx0PRl8KQ5axq5YE90erjoFtcn1tjTuggItco,4684
6
+ logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ logdetective/server/models.py,sha256=vFFOWg7HoI7_6XCty3Fa5AQPbK6g-HuRCEnaqlKXnWw,2333
8
+ logdetective/server/server.py,sha256=3HOwIXsnas5GvyRCm3Y3-ogxa8g_IomOpfxX-KG_yM8,9240
9
+ logdetective/server/utils.py,sha256=-SB49orES2zU83XJODU_1O9pVQg3CtEisaIm3oEiALA,469
10
+ logdetective/utils.py,sha256=j3u_JruoM57q_7dX3enV04t6WGEg3YNWbu5wmEGmP-I,5019
11
+ logdetective-0.2.14.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
12
+ logdetective-0.2.14.dist-info/METADATA,sha256=COm3Y0ToL6WAWzvY5HHAV9T8BezNTDoOrLqsV5UoKZk,9768
13
+ logdetective-0.2.14.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
14
+ logdetective-0.2.14.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
15
+ logdetective-0.2.14.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- logdetective/constants.py,sha256=6XekuU7sbkY1Pmu4NJajgFbJ0no8PQ3DxQm8NeLKtjE,1383
3
- logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
4
- logdetective/extractors.py,sha256=xfan_dbGCrLH4cguJ2F6W6UkxXMz24Vob39r5-GsNV8,3102
5
- logdetective/logdetective.py,sha256=03dDCZOx0PRl8KQ5axq5YE90erjoFtcn1tjTuggItco,4684
6
- logdetective/server.py,sha256=lgWvsY41rGNixxg5OS7f339Nt2WHWTIOyBG28Jd-zVI,9636
7
- logdetective/utils.py,sha256=j3u_JruoM57q_7dX3enV04t6WGEg3YNWbu5wmEGmP-I,5019
8
- logdetective-0.2.12.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
9
- logdetective-0.2.12.dist-info/METADATA,sha256=4Bj7UHQLhtqXRKRKiKSZXuchz1-jGmckFjXMa2zdcj0,9791
10
- logdetective-0.2.12.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
11
- logdetective-0.2.12.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
12
- logdetective-0.2.12.dist-info/RECORD,,