logdetective 0.3.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
logdetective/__init__.py CHANGED
@@ -0,0 +1,3 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("logdetective")
File without changes
@@ -0,0 +1,63 @@
1
+ from os import getenv
2
+ from contextlib import contextmanager
3
+ from sqlalchemy import create_engine
4
+ from sqlalchemy.orm import sessionmaker, declarative_base
5
+
6
+ from logdetective import logger
7
+
8
+
9
+ def get_pg_url() -> str:
10
+ """create postgresql connection string"""
11
+ return (
12
+ f"postgresql+psycopg2://{getenv('POSTGRESQL_USER')}"
13
+ f":{getenv('POSTGRESQL_PASSWORD')}@{getenv('POSTGRESQL_HOST', 'postgres')}"
14
+ f":{getenv('POSTGRESQL_PORT', '5432')}/{getenv('POSTGRESQL_DATABASE')}"
15
+ )
16
+
17
+
18
+ # To log SQL statements, set SQLALCHEMY_ECHO env. var. to True|T|Yes|Y|1
19
+ sqlalchemy_echo = getenv("SQLALCHEMY_ECHO", "False").lower() in (
20
+ "true",
21
+ "t",
22
+ "yes",
23
+ "y",
24
+ "1",
25
+ )
26
+ engine = create_engine(get_pg_url(), echo=sqlalchemy_echo)
27
+ SessionFactory = sessionmaker(autoflush=True, bind=engine)
28
+ Base = declarative_base()
29
+
30
+
31
+ @contextmanager
32
+ def transaction(commit: bool = False):
33
+ """
34
+ Context manager for 'framing' a db transaction.
35
+
36
+ Args:
37
+ commit: Whether to call `Session.commit()` upon exiting the context. Should be set to True
38
+ if any changes are made within the context. Defaults to False.
39
+ """
40
+
41
+ session = SessionFactory()
42
+ try:
43
+ yield session
44
+ if commit:
45
+ session.commit()
46
+ except Exception as ex:
47
+ logger.warning("Exception while working with database: %s", str(ex))
48
+ session.rollback()
49
+ raise
50
+ finally:
51
+ session.close()
52
+
53
+
54
+ def init():
55
+ """Init db"""
56
+ Base.metadata.create_all(engine)
57
+ logger.debug("Database initialized")
58
+
59
+
60
+ def destroy():
61
+ """Destroy db"""
62
+ Base.metadata.drop_all(engine)
63
+ logger.warning("Database cleaned")
@@ -0,0 +1,88 @@
1
+ import enum
2
+ import datetime
3
+
4
+ from typing import Optional
5
+ from sqlalchemy import Column, Integer, Float, DateTime, String, Enum
6
+
7
+ from logdetective.server.database.base import Base, transaction
8
+
9
+
10
+ class EndpointType(enum.Enum):
11
+ """Different analyze endpoints"""
12
+
13
+ ANALYZE = "analyze_log"
14
+ ANALYZE_STAGED = "analyze_log_staged"
15
+ ANALYZE_STREAM = "analyze_log_stream"
16
+
17
+
18
+ class AnalyzeRequestMetrics(Base):
19
+ """Store data related to received requests and given responses"""
20
+
21
+ __tablename__ = "analyze_request_metrics"
22
+
23
+ id = Column(Integer, primary_key=True)
24
+ endpoint = Column(
25
+ Enum(EndpointType),
26
+ nullable=False,
27
+ index=True,
28
+ comment="The service endpoint that was called",
29
+ )
30
+ request_received_at = Column(
31
+ DateTime,
32
+ nullable=False,
33
+ index=True,
34
+ default=datetime.datetime.now(datetime.timezone.utc),
35
+ comment="Timestamp when the request was received",
36
+ )
37
+ log_url = Column(
38
+ String,
39
+ nullable=False,
40
+ index=False,
41
+ comment="Log url for which analysis was requested",
42
+ )
43
+ response_sent_at = Column(
44
+ DateTime, nullable=True, comment="Timestamp when the response was sent back"
45
+ )
46
+ response_length = Column(
47
+ Integer, nullable=True, comment="Length of the response in chars"
48
+ )
49
+ response_certainty = Column(
50
+ Float, nullable=True, comment="Certainty for generated response"
51
+ )
52
+
53
+ @classmethod
54
+ def create(
55
+ cls,
56
+ endpoint: EndpointType,
57
+ log_url: str,
58
+ request_received_at: Optional[datetime.datetime] = None,
59
+ ) -> int:
60
+ """Create AnalyzeRequestMetrics new line
61
+ with data related to a received request"""
62
+ with transaction(commit=True) as session:
63
+ metrics = AnalyzeRequestMetrics()
64
+ metrics.endpoint = endpoint
65
+ metrics.request_received_at = request_received_at or datetime.datetime.now(
66
+ datetime.timezone.utc
67
+ )
68
+ metrics.log_url = log_url
69
+ session.add(metrics)
70
+ session.flush()
71
+ return metrics.id
72
+
73
+ @classmethod
74
+ def update(
75
+ cls,
76
+ id_: int,
77
+ response_sent_at: datetime,
78
+ response_length: int,
79
+ response_certainty: float,
80
+ ) -> None:
81
+ """Update an AnalyzeRequestMetrics line
82
+ with data related to the given response"""
83
+ with transaction(commit=True) as session:
84
+ metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
85
+ metrics.response_sent_at = response_sent_at
86
+ metrics.response_length = response_length
87
+ metrics.response_certainty = response_certainty
88
+ session.add(metrics)
@@ -0,0 +1,82 @@
1
+ import datetime
2
+ import inspect
3
+ from typing import Union
4
+ from functools import wraps
5
+
6
+ from starlette.responses import StreamingResponse
7
+ from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
8
+ from logdetective.server import models
9
+
10
+
11
+ def add_new_metrics(
12
+ api_name: str, build_log: models.BuildLog, received_at: datetime.datetime = None
13
+ ) -> int:
14
+ """Add a new database entry for a received request.
15
+
16
+ This will store the time when this function is called,
17
+ the endpoint from where the request was received,
18
+ and the log for which analysis is requested.
19
+ """
20
+ return AnalyzeRequestMetrics.create(
21
+ endpoint=EndpointType(api_name),
22
+ log_url=build_log.url,
23
+ request_received_at=received_at
24
+ if received_at
25
+ else datetime.datetime.now(datetime.timezone.utc),
26
+ )
27
+
28
+
29
+ def update_metrics(
30
+ metrics_id: int,
31
+ response: Union[models.Response, models.StagedResponse, StreamingResponse],
32
+ sent_at: datetime.datetime = None,
33
+ ) -> None:
34
+ """Update a database metric entry for a received request,
35
+ filling data for the given response.
36
+
37
+ This will add to the database entry the time when the response was sent,
38
+ the length of the created response and the certainty for it.
39
+ """
40
+ response_sent_at = (
41
+ sent_at if sent_at else datetime.datetime.now(datetime.timezone.utc)
42
+ )
43
+ response_length = None
44
+ if hasattr(response, "explanation") and "choices" in response.explanation:
45
+ response_length = sum(
46
+ len(choice["text"])
47
+ for choice in response.explanation["choices"]
48
+ if "text" in choice
49
+ )
50
+ response_certainty = (
51
+ response.response_certainty if hasattr(response, "response_certainty") else None
52
+ )
53
+ AnalyzeRequestMetrics.update(
54
+ metrics_id, response_sent_at, response_length, response_certainty
55
+ )
56
+
57
+
58
+ def track_request():
59
+ """
60
+ Decorator to track requests metrics
61
+ """
62
+
63
+ def decorator(f):
64
+ @wraps(f)
65
+ async def async_decorated_function(*args, **kwargs):
66
+ metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
67
+ response = await f(*args, **kwargs)
68
+ update_metrics(metrics_id, response)
69
+ return response
70
+
71
+ @wraps(f)
72
+ def sync_decorated_function(*args, **kwargs):
73
+ metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
74
+ response = f(*args, **kwargs)
75
+ update_metrics(metrics_id, response)
76
+ return response
77
+
78
+ if inspect.iscoroutinefunction(f):
79
+ return async_decorated_function
80
+ return sync_decorated_function
81
+
82
+ return decorator
@@ -1,5 +1,5 @@
1
1
  from logging import BASIC_FORMAT
2
- from typing import List, Dict, Optional
2
+ from typing import List, Dict, Optional, Literal
3
3
  from pydantic import BaseModel, Field
4
4
 
5
5
 
@@ -21,7 +21,7 @@ class JobHook(BaseModel):
21
21
 
22
22
  # The identifier of the job. We only care about 'build_rpm' and
23
23
  # 'build_centos_stream_rpm' jobs.
24
- build_name: str = Field(pattern=r"^build(_.*)?_rpm$")
24
+ build_name: str = Field(pattern=r"^build.*rpm$")
25
25
 
26
26
  # A string representing the job status. We only care about 'failed' jobs.
27
27
  build_status: str = Field(pattern=r"^failed$")
@@ -37,15 +37,36 @@ class JobHook(BaseModel):
37
37
  project_id: int
38
38
 
39
39
 
40
+ class Explanation(BaseModel):
41
+ """Model of snippet or general log explanation from Log Detective"""
42
+
43
+ text: str
44
+ logprobs: Optional[List[Dict]] = None
45
+
46
+ def __str__(self):
47
+ return self.text
48
+
49
+
50
+ class AnalyzedSnippet(BaseModel):
51
+ """Model for snippets already processed by Log Detective.
52
+
53
+ explanation: LLM output in form of plain text and logprobs dictionary
54
+ text: original snippet text
55
+ line_number: location of snippet in original log
56
+ """
57
+ explanation: Explanation
58
+ text: str
59
+ line_number: int
60
+
61
+
40
62
  class Response(BaseModel):
41
63
  """Model of data returned by Log Detective API
42
64
 
43
- explanation: CreateCompletionResponse
44
- https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
65
+ explanation: Explanation
45
66
  response_certainty: float
46
67
  """
47
68
 
48
- explanation: Dict
69
+ explanation: Explanation
49
70
  response_certainty: float
50
71
 
51
72
 
@@ -53,17 +74,12 @@ class StagedResponse(Response):
53
74
  """Model of data returned by Log Detective API when called when staged response
54
75
  is requested. Contains list of reponses to prompts for individual snippets.
55
76
 
56
- explanation: CreateCompletionResponse
57
- https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
77
+ explanation: Explanation
58
78
  response_certainty: float
59
- snippets:
60
- list of dictionaries {
61
- 'snippet' : '<original_text>,
62
- 'comment': CreateCompletionResponse,
63
- 'line_number': '<location_in_log>' }
79
+ snippets: list of AnalyzedSnippet objects
64
80
  """
65
81
 
66
- snippets: List[Dict[str, str | Dict | int]]
82
+ snippets: List[AnalyzedSnippet]
67
83
 
68
84
 
69
85
  class InferenceConfig(BaseModel):
@@ -71,6 +87,11 @@ class InferenceConfig(BaseModel):
71
87
 
72
88
  max_tokens: int = -1
73
89
  log_probs: int = 1
90
+ api_endpoint: Optional[Literal["/chat/completions", "/completions"]] = (
91
+ "/chat/completions"
92
+ )
93
+ url: str = ""
94
+ api_token: str = ""
74
95
 
75
96
  def __init__(self, data: Optional[dict] = None):
76
97
  super().__init__()
@@ -79,6 +100,9 @@ class InferenceConfig(BaseModel):
79
100
 
80
101
  self.max_tokens = data.get("max_tokens", -1)
81
102
  self.log_probs = data.get("log_probs", 1)
103
+ self.api_endpoint = data.get("api_endpoint", "/chat/completions")
104
+ self.url = data.get("url", "")
105
+ self.api_token = data.get("api_token", "")
82
106
 
83
107
 
84
108
  class ExtractorConfig(BaseModel):
@@ -3,16 +3,19 @@ import json
3
3
  import os
4
4
  import re
5
5
  import zipfile
6
- from pathlib import PurePath
6
+ from pathlib import Path, PurePath
7
7
  from tempfile import TemporaryFile
8
- from typing import List, Annotated, Tuple
8
+ from typing import List, Annotated, Tuple, Dict, Any
9
9
 
10
10
 
11
- from llama_cpp import CreateCompletionResponse
12
11
  from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header
12
+
13
13
  from fastapi.responses import StreamingResponse
14
14
  from fastapi.responses import Response as BasicResponse
15
15
  import gitlab
16
+ import gitlab.v4
17
+ import gitlab.v4.objects
18
+ import jinja2
16
19
  import requests
17
20
 
18
21
  from logdetective.constants import (
@@ -27,18 +30,21 @@ from logdetective.utils import (
27
30
  format_snippets,
28
31
  format_analyzed_snippets,
29
32
  )
30
- from logdetective.server.models import BuildLog, JobHook, Response, StagedResponse
31
33
  from logdetective.server.utils import load_server_config, get_log
34
+ from logdetective.server.metric import track_request
35
+ from logdetective.server.models import (
36
+ BuildLog,
37
+ JobHook,
38
+ Response,
39
+ StagedResponse,
40
+ Explanation,
41
+ AnalyzedSnippet,
42
+ )
32
43
 
33
-
34
- LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
35
- LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
36
- LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
37
44
  LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
38
45
  LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
39
46
  API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
40
47
  SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
41
- LLM_API_TOKEN = os.environ.get("LLM_API_TOKEN", None)
42
48
 
43
49
  SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
44
50
 
@@ -123,35 +129,21 @@ def mine_logs(log: str) -> List[Tuple[int, str]]:
123
129
  return log_summary
124
130
 
125
131
 
126
- async def submit_text(
127
- text: str,
128
- max_tokens: int = -1,
129
- log_probs: int = 1,
130
- stream: bool = False,
131
- model: str = "default-model",
132
- ):
133
- """Submit prompt to LLM.
134
- max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
135
- log_probs: number of token choices to produce log probs for
136
- """
137
- LOG.info("Analyzing the text")
138
- data = {
139
- "prompt": text,
140
- "max_tokens": max_tokens,
141
- "logprobs": log_probs,
142
- "stream": stream,
143
- "model": model,
144
- }
145
-
146
- headers = {"Content-Type": "application/json"}
147
-
148
- if LLM_API_TOKEN:
149
- headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
132
+ async def submit_to_llm_endpoint(
133
+ url: str, data: Dict[str, Any], headers: Dict[str, str], stream: bool
134
+ ) -> Any:
135
+ """Send request to selected API endpoint. Verifying successful request unless
136
+ the using the stream response.
150
137
 
138
+ url:
139
+ data:
140
+ headers:
141
+ stream:
142
+ """
151
143
  try:
152
144
  # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
153
145
  response = requests.post(
154
- f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
146
+ url,
155
147
  headers=headers,
156
148
  data=json.dumps(data),
157
149
  timeout=int(LLM_CPP_SERVER_TIMEOUT),
@@ -177,13 +169,118 @@ async def submit_text(
177
169
  status_code=400,
178
170
  detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
179
171
  ) from ex
180
- else:
181
- return response
182
172
 
183
- return CreateCompletionResponse(response)
173
+ return response
174
+
175
+
176
+ async def submit_text( # pylint: disable=R0913,R0917
177
+ text: str,
178
+ max_tokens: int = -1,
179
+ log_probs: int = 1,
180
+ stream: bool = False,
181
+ model: str = "default-model",
182
+ api_endpoint: str = "/chat/completions",
183
+ ) -> Explanation:
184
+ """Submit prompt to LLM using a selected endpoint.
185
+ max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
186
+ log_probs: number of token choices to produce log probs for
187
+ """
188
+ LOG.info("Analyzing the text")
189
+
190
+ headers = {"Content-Type": "application/json"}
191
+
192
+ if SERVER_CONFIG.inference.api_token:
193
+ headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
194
+
195
+ if api_endpoint == "/chat/completions":
196
+ return await submit_text_chat_completions(
197
+ text, headers, max_tokens, log_probs > 0, stream, model
198
+ )
199
+ return await submit_text_completions(
200
+ text, headers, max_tokens, log_probs, stream, model
201
+ )
202
+
203
+
204
+ async def submit_text_completions( # pylint: disable=R0913,R0917
205
+ text: str,
206
+ headers: dict,
207
+ max_tokens: int = -1,
208
+ log_probs: int = 1,
209
+ stream: bool = False,
210
+ model: str = "default-model",
211
+ ) -> Explanation:
212
+ """Submit prompt to OpenAI API completions endpoint.
213
+ max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
214
+ log_probs: number of token choices to produce log probs for
215
+ """
216
+ LOG.info("Submitting to /v1/completions endpoint")
217
+ data = {
218
+ "prompt": text,
219
+ "max_tokens": max_tokens,
220
+ "logprobs": log_probs,
221
+ "stream": stream,
222
+ "model": model,
223
+ }
224
+
225
+ response = await submit_to_llm_endpoint(
226
+ f"{SERVER_CONFIG.inference.url}/v1/completions",
227
+ data,
228
+ headers,
229
+ stream,
230
+ )
231
+
232
+ return Explanation(
233
+ text=response["choices"][0]["text"], logprobs=response["choices"][0]["logprobs"]
234
+ )
235
+
236
+
237
+ async def submit_text_chat_completions( # pylint: disable=R0913,R0917
238
+ text: str,
239
+ headers: dict,
240
+ max_tokens: int = -1,
241
+ log_probs: int = 1,
242
+ stream: bool = False,
243
+ model: str = "default-model",
244
+ ) -> Explanation:
245
+ """Submit prompt to OpenAI API /chat/completions endpoint.
246
+ max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
247
+ log_probs: number of token choices to produce log probs for
248
+ """
249
+ LOG.info("Submitting to /v1/chat/completions endpoint")
250
+
251
+ data = {
252
+ "messages": [
253
+ {
254
+ "role": "user",
255
+ "content": text,
256
+ }
257
+ ],
258
+ "max_tokens": max_tokens,
259
+ "logprobs": log_probs,
260
+ "stream": stream,
261
+ "model": model,
262
+ }
263
+
264
+ response = await submit_to_llm_endpoint(
265
+ f"{SERVER_CONFIG.inference.url}/v1/chat/completions",
266
+ data,
267
+ headers,
268
+ stream,
269
+ )
270
+
271
+ if stream:
272
+ return Explanation(
273
+ text=response["choices"][0]["delta"]["content"],
274
+ logprobs=response["choices"][0]["logprobs"]["content"],
275
+ )
276
+ return Explanation(
277
+ text=response["choices"][0]["message"]["content"],
278
+ logprobs=response["choices"][0]["logprobs"]["content"],
279
+ )
184
280
 
185
281
 
186
282
  @app.post("/analyze", response_model=Response)
283
+ @track_request()
187
284
  async def analyze_log(build_log: BuildLog):
188
285
  """Provide endpoint for log file submission and analysis.
189
286
  Request must be in form {"url":"<YOUR_URL_HERE>"}.
@@ -194,26 +291,27 @@ async def analyze_log(build_log: BuildLog):
194
291
  log_text = process_url(build_log.url)
195
292
  log_summary = mine_logs(log_text)
196
293
  log_summary = format_snippets(log_summary)
197
- response = await submit_text(PROMPT_TEMPLATE.format(log_summary))
294
+ response = await submit_text(
295
+ PROMPT_TEMPLATE.format(log_summary),
296
+ api_endpoint=SERVER_CONFIG.inference.api_endpoint,
297
+ )
198
298
  certainty = 0
199
299
 
200
- if "logprobs" in response["choices"][0]:
300
+ if response.logprobs is not None:
201
301
  try:
202
- certainty = compute_certainty(
203
- response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]
204
- )
302
+ certainty = compute_certainty(response.logprobs)
205
303
  except ValueError as ex:
206
304
  LOG.error("Error encountered while computing certainty: %s", ex)
207
305
  raise HTTPException(
208
306
  status_code=400,
209
- detail=f"Couldn't compute certainty with data:\n"
210
- f"{response['choices'][0]['logprobs']['content'][0]['top_logprobs']}",
307
+ detail=f"Couldn't compute certainty with data:\n{response.logprobs}",
211
308
  ) from ex
212
309
 
213
310
  return Response(explanation=response, response_certainty=certainty)
214
311
 
215
312
 
216
313
  @app.post("/analyze/staged", response_model=StagedResponse)
314
+ @track_request()
217
315
  async def analyze_log_staged(build_log: BuildLog):
218
316
  """Provide endpoint for log file submission and analysis.
219
317
  Request must be in form {"url":"<YOUR_URL_HERE>"}.
@@ -222,36 +320,48 @@ async def analyze_log_staged(build_log: BuildLog):
222
320
  while lacking result, params or query fields.
223
321
  """
224
322
  log_text = process_url(build_log.url)
323
+
324
+ return await perform_staged_analysis(log_text=log_text)
325
+
326
+
327
+ async def perform_staged_analysis(log_text: str) -> StagedResponse:
328
+ """Submit the log file snippets to the LLM and retrieve their results"""
225
329
  log_summary = mine_logs(log_text)
226
330
 
227
331
  # Process snippets asynchronously
228
332
  analyzed_snippets = await asyncio.gather(
229
- *[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s[1])) for s in log_summary]
333
+ *[
334
+ submit_text(
335
+ SNIPPET_PROMPT_TEMPLATE.format(s),
336
+ api_endpoint=SERVER_CONFIG.inference.api_endpoint,
337
+ )
338
+ for s in log_summary
339
+ ]
230
340
  )
231
341
 
232
342
  analyzed_snippets = [
233
- {"snippet": e[0][1], "line_number": e[0][0], "comment": e[1]}
343
+ AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
234
344
  for e in zip(log_summary, analyzed_snippets)
235
345
  ]
236
346
  final_prompt = PROMPT_TEMPLATE_STAGED.format(
237
347
  format_analyzed_snippets(analyzed_snippets)
238
348
  )
239
349
 
240
- final_analysis = await submit_text(final_prompt)
241
- print(final_analysis)
350
+ final_analysis = await submit_text(
351
+ final_prompt, api_endpoint=SERVER_CONFIG.inference.api_endpoint
352
+ )
353
+
242
354
  certainty = 0
243
355
 
244
- if "logprobs" in final_analysis["choices"][0]:
356
+ if final_analysis.logprobs:
245
357
  try:
246
- certainty = compute_certainty(
247
- final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]
248
- )
358
+ certainty = compute_certainty(final_analysis.logprobs)
249
359
  except ValueError as ex:
250
360
  LOG.error("Error encountered while computing certainty: %s", ex)
251
361
  raise HTTPException(
252
362
  status_code=400,
253
363
  detail=f"Couldn't compute certainty with data:\n"
254
- f"{final_analysis['choices'][0]['logprobs']['content'][0]['top_logprobs']}",
364
+ f"{final_analysis.logprobs}",
255
365
  ) from ex
256
366
 
257
367
  return StagedResponse(
@@ -262,6 +372,7 @@ async def analyze_log_staged(build_log: BuildLog):
262
372
 
263
373
 
264
374
  @app.post("/analyze/stream", response_class=StreamingResponse)
375
+ @track_request()
265
376
  async def analyze_log_stream(build_log: BuildLog):
266
377
  """Stream response endpoint for Logdetective.
267
378
  Request must be in form {"url":"<YOUR_URL_HERE>"}.
@@ -272,7 +383,14 @@ async def analyze_log_stream(build_log: BuildLog):
272
383
  log_text = process_url(build_log.url)
273
384
  log_summary = mine_logs(log_text)
274
385
  log_summary = format_snippets(log_summary)
275
- stream = await submit_text(PROMPT_TEMPLATE.format(log_summary), stream=True)
386
+ headers = {"Content-Type": "application/json"}
387
+
388
+ if SERVER_CONFIG.inference.api_token:
389
+ headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
390
+
391
+ stream = await submit_text_chat_completions(
392
+ PROMPT_TEMPLATE.format(log_summary), stream=True, headers=headers
393
+ )
276
394
 
277
395
  return StreamingResponse(stream)
278
396
 
@@ -309,6 +427,11 @@ async def process_gitlab_job_event(job_hook):
309
427
  # Retrieve data about the job from the GitLab API
310
428
  job = await asyncio.to_thread(project.jobs.get, job_hook.build_id)
311
429
 
430
+ # For easy retrieval later, we'll add project_name and project_url to the
431
+ # job object
432
+ job.project_name = project.name
433
+ job.project_url = project.web_url
434
+
312
435
  # Retrieve the pipeline that started this job
313
436
  pipeline = await asyncio.to_thread(project.pipelines.get, job_hook.pipeline_id)
314
437
 
@@ -324,37 +447,38 @@ async def process_gitlab_job_event(job_hook):
324
447
  "Pipeline source is merge_request_event but no merge request ID was provided."
325
448
  )
326
449
  return
327
- merge_request_id = int(match.group(1))
450
+ merge_request_iid = int(match.group(1))
328
451
 
329
452
  LOG.debug("Retrieving log artifacts")
330
453
  # Retrieve the build logs from the merge request artifacts and preprocess them
331
454
  try:
332
- preprocessed_log = await retrieve_and_preprocess_koji_logs(job)
455
+ log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(job)
333
456
  except LogsTooLargeError:
334
457
  LOG.error("Could not retrieve logs. Too large.")
335
458
  raise
336
459
 
337
460
  # Submit log to Log Detective and await the results.
338
- response = await submit_log_to_llm(preprocessed_log)
461
+ log_text = preprocessed_log.read().decode(encoding="utf-8")
462
+ staged_response = await perform_staged_analysis(log_text=log_text)
339
463
  preprocessed_log.close()
340
464
 
341
465
  # Add the Log Detective response as a comment to the merge request
342
- await comment_on_mr(merge_request_id, response)
466
+ await comment_on_mr(project, merge_request_iid, job, log_url, staged_response)
343
467
 
344
468
 
345
469
  class LogsTooLargeError(RuntimeError):
346
470
  """The log archive exceeds the configured maximum size"""
347
471
 
348
472
 
349
- async def retrieve_and_preprocess_koji_logs(job):
473
+ async def retrieve_and_preprocess_koji_logs(job: gitlab.v4.objects.ProjectJob):
350
474
  """Download logs from the merge request artifacts
351
475
 
352
476
  This function will retrieve the build logs and do some minimal
353
477
  preprocessing to determine which log is relevant for analysis.
354
478
 
355
- returns: An open, file-like object containing the log contents to be sent
356
- for processing by Log Detective. The calling function is responsible for
357
- closing this object."""
479
+ returns: The URL pointing to the selected log file and an open, file-like
480
+ object containing the log contents to be sent for processing by Log
481
+ Detective. The calling function is responsible for closing this object."""
358
482
 
359
483
  # Make sure the file isn't too large to process.
360
484
  if not await check_artifacts_file_size(job):
@@ -437,11 +561,13 @@ async def retrieve_and_preprocess_koji_logs(job):
437
561
 
438
562
  LOG.debug("Failed architecture: %s", failed_arch)
439
563
 
440
- log_path = failed_arches[failed_arch]
441
- LOG.debug("Returning contents of %s", log_path)
564
+ log_path = failed_arches[failed_arch].as_posix()
565
+
566
+ log_url = f"{SERVER_CONFIG.gitlab.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts/{log_path}" # pylint: disable=line-too-long
567
+ LOG.debug("Returning contents of %s", log_url)
442
568
 
443
569
  # Return the log as a file-like object with .read() function
444
- return artifacts_zip.open(log_path.as_posix())
570
+ return log_url, artifacts_zip.open(log_path)
445
571
 
446
572
 
447
573
  async def check_artifacts_file_size(job):
@@ -468,15 +594,62 @@ async def check_artifacts_file_size(job):
468
594
  return content_length <= SERVER_CONFIG.gitlab.max_artifact_size
469
595
 
470
596
 
471
- async def submit_log_to_llm(log):
472
- """Stream the log to the LLM for processing"""
473
- # TODO: query the LLM with the log contents # pylint: disable=fixme
474
- # This function will be implemented later; right now it does nothing.
475
- LOG.debug("Log contents:\n%s", log.read())
476
- return ""
597
+ async def comment_on_mr(
598
+ project: gitlab.v4.objects.Project,
599
+ merge_request_iid: int,
600
+ job: gitlab.v4.objects.ProjectJob,
601
+ log_url: str,
602
+ response: StagedResponse,
603
+ ):
604
+ """Add the Log Detective response as a comment to the merge request"""
605
+ LOG.debug(
606
+ "Primary Explanation for %s MR %d: %s",
607
+ project.name,
608
+ merge_request_iid,
609
+ response.explanation.text,
610
+ )
477
611
 
612
+ # Get the formatted comment.
613
+ comment = await generate_mr_comment(job, log_url, response)
478
614
 
479
- async def comment_on_mr(merge_request_id: int, response: str): # pylint: disable=unused-argument
480
- """Add the Log Detective response as a comment to the merge request"""
481
- # TODO: Implement this # pylint: disable=fixme
482
- pass # pylint: disable=unnecessary-pass
615
+ # Look up the merge request
616
+ merge_request = await asyncio.to_thread(
617
+ project.mergerequests.get, merge_request_iid
618
+ )
619
+
620
+ # Submit a new comment to the Merge Request using the Gitlab API
621
+ await asyncio.to_thread(merge_request.discussions.create, {"body": comment})
622
+
623
+
624
+ async def generate_mr_comment(
625
+ job: gitlab.v4.objects.ProjectJob, log_url: str, response: StagedResponse
626
+ ) -> str:
627
+ """Use a template to generate a comment string to submit to Gitlab"""
628
+
629
+ # Locate and load the comment template
630
+ script_path = Path(__file__).resolve().parent
631
+ template_path = Path(script_path, "templates")
632
+ jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_path))
633
+ tpl = jinja_env.get_template("gitlab_comment.md.j2")
634
+
635
+ artifacts_url = f"{job.project_url}/-/jobs/{job.id}/artifacts/download"
636
+
637
+ if response.response_certainty >= 90:
638
+ emoji_face = ":slight_smile:"
639
+ elif response.response_certainty >= 70:
640
+ emoji_face = ":neutral_face:"
641
+ else:
642
+ emoji_face = ":frowning2:"
643
+
644
+ # Generate the comment from the template
645
+ content = tpl.render(
646
+ package=job.project_name,
647
+ explanation=response.explanation.text,
648
+ certainty=f"{response.response_certainty:.2f}",
649
+ emoji_face=emoji_face,
650
+ snippets=response.snippets,
651
+ log_url=log_url,
652
+ artifacts_url=artifacts_url,
653
+ )
654
+
655
+ return content
@@ -0,0 +1,66 @@
1
+ The package {{ package }} failed to build, here is a possible explanation why.
2
+
3
+ Please know that the explanation was provided by AI and may be incorrect.
4
+ In this case, we are {{ certainty }}% certain of the response {{ emoji_face }}.
5
+
6
+ {{ explanation }}
7
+
8
+ <details>
9
+ <ul>
10
+ {% for snippet in snippets %}
11
+ <li>
12
+ <code>
13
+ Line {{ snippet.line_number }}: {{ snippet.text }}
14
+ </code>
15
+ {{ snippet.explanation }}
16
+ </li>
17
+ {% endfor %}
18
+ </ul>
19
+ </details>
20
+
21
+ <details>
22
+ <summary>Logs</summary>
23
+ <p>
24
+ Log Detective analyzed the following logs files to provide an explanation:
25
+ </p>
26
+
27
+ <ul>
28
+ <li><a href="{{ log_url }}">{{ log_url }}</a></li>
29
+ </ul>
30
+
31
+ <p>
32
+ Additional logs are available from:
33
+ <ul>
34
+ <li><a href="{{ artifacts_url }}">artifacts.zip</a></li>
35
+ </ul>
36
+ </p>
37
+
38
+ <p>
39
+ Please know that these log files are automatically removed after some
40
+ time, so you might need a backup.
41
+ </p>
42
+ </details>
43
+
44
+ <details>
45
+ <summary>Help</summary>
46
+ <p>Don't hesitate to reach out.</p>
47
+
48
+ <ul>
49
+ <li><a href="https://github.com/fedora-copr/logdetective">Upstream</a></li>
50
+ <li><a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a></li>
51
+ <li><a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a></li>
52
+ <li><a href="https://log-detective.com/documentation">Documentation</a></li>
53
+ </ul>
54
+ </details>
55
+
56
+
57
+ ---
58
+ This comment was created by [Log Detective][log-detective].
59
+
60
+ Was the provided feedback accurate and helpful? <br>Please vote with :thumbsup:
61
+ or :thumbsdown: to help us improve.<br>
62
+
63
+
64
+
65
+ [log-detective]: https://log-detective.com/
66
+ [contact]: https://github.com/fedora-copr
@@ -12,7 +12,8 @@ def load_server_config(path: str | None) -> Config:
12
12
  with open(path, "r") as config_file:
13
13
  return Config(yaml.safe_load(config_file))
14
14
  except FileNotFoundError:
15
- pass
15
+ # This is not an error, we will fall back to default
16
+ print("Unable to find server config file, using default then.")
16
17
  return Config()
17
18
 
18
19
 
logdetective/utils.py CHANGED
@@ -7,7 +7,7 @@ import requests
7
7
 
8
8
  from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
9
9
  from logdetective.constants import PROMPT_TEMPLATE, SNIPPET_DELIMITER
10
-
10
+ from logdetective.server.models import AnalyzedSnippet
11
11
 
12
12
  LOG = logging.getLogger("logdetective")
13
13
 
@@ -175,11 +175,11 @@ def format_snippets(snippets: list[str] | list[Tuple[int, str]]) -> str:
175
175
  return summary
176
176
 
177
177
 
178
- def format_analyzed_snippets(snippets: list[Dict]) -> str:
178
+ def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
179
179
  """Format snippets for submission into staged prompt."""
180
180
  summary = f"\n{SNIPPET_DELIMITER}\n".join(
181
181
  [
182
- f"[{e['snippet']}] at line [{e["line_number"]}]: [{e['comment']['choices'][0]['text']}]"
182
+ f"[{e.text}] at line [{e.line_number}]: [{e.explanation.text}]"
183
183
  for e in snippets
184
184
  ]
185
185
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: logdetective
3
- Version: 0.3.3
3
+ Version: 0.5.0
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -19,15 +19,18 @@ Classifier: Topic :: Internet :: Log Analysis
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Classifier: Topic :: Software Development :: Debuggers
21
21
  Provides-Extra: server
22
+ Requires-Dist: alembic (>=1.13.3,<2.0.0) ; extra == "server"
22
23
  Requires-Dist: drain3 (>=0.9.11,<0.10.0)
23
24
  Requires-Dist: fastapi (>=0.111.1) ; extra == "server"
24
25
  Requires-Dist: huggingface-hub (>0.23.2)
25
26
  Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
26
27
  Requires-Dist: numpy (>=1.26.0)
28
+ Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "server"
27
29
  Requires-Dist: pydantic (>=2.8.2,<3.0.0) ; extra == "server"
28
30
  Requires-Dist: python-gitlab (>=4.4.0)
29
31
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server"
30
32
  Requires-Dist: requests (>0.2.31)
33
+ Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server"
31
34
  Project-URL: homepage, https://github.com/fedora-copr/logdetective
32
35
  Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
33
36
  Description-Content-Type: text/markdown
@@ -188,6 +191,71 @@ or
188
191
 
189
192
  tox run -e lint # to run pylint
190
193
 
194
+ Visual Studio Code testing with podman/docker-compose
195
+ -----------------------------------------------------
196
+
197
+ - In `Containerfile`, add `debugpy` as a dependency
198
+
199
+ ```diff
200
+ -RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context \
201
+ +RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context debugpy\
202
+ ```
203
+
204
+ - Rebuild server image with new dependencies
205
+
206
+ ```
207
+ make rebuild-server
208
+ ```
209
+
210
+ - Forward debugging port in `docker-compose.yaml` for `server` service.
211
+
212
+ ```diff
213
+ ports:
214
+ - "${LOGDETECTIVE_SERVER_PORT:-8080}:${LOGDETECTIVE_SERVER_PORT:-8080}"
215
+ + - "${VSCODE_DEBUG_PORT:-5678}:${VSCODE_DEBUG_PORT:-5678}"
216
+ ```
217
+
218
+ - Add `debugpy` code in a logdetective file where you want to stop at first.
219
+
220
+ ```diff
221
+ +import debugpy
222
+ +debugpy.listen(("0.0.0.0", 5678))
223
+ +debugpy.wait_for_client()
224
+ ```
225
+
226
+ - Prepare `.vscode/lunch.json` configuration for Visual Studio Code (at least the following configuration is needed)
227
+
228
+ ```json
229
+ {
230
+ "version": "0.2.0",
231
+ "configurations": [
232
+ {
233
+ "name": "Python Debugger: Remote Attach",
234
+ "type": "debugpy",
235
+ "request": "attach",
236
+ "connect": {
237
+ "host": "localhost",
238
+ "port": 5678
239
+ },
240
+ "pathMappings": [
241
+ {
242
+ "localRoot": "${workspaceFolder}",
243
+ "remoteRoot": "/src"
244
+ }
245
+ ]
246
+ }
247
+ ]
248
+ }
249
+ ```
250
+
251
+ - Run the server
252
+
253
+ ```
254
+ podman-compose up server
255
+ ```
256
+
257
+ - Run Visual Stdio Code debug configuration named *Python Debug: Remote Attach*
258
+
191
259
  Server
192
260
  ------
193
261
 
@@ -220,6 +288,19 @@ Model can be downloaded from [our Hugging Space](https://huggingface.co/fedora-c
220
288
  $ curl -L -o models/mistral-7b-instruct-v0.2.Q4_K_S.gguf https://huggingface.co/fedora-copr/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/ggml-model-Q4_K_S.gguf
221
289
  ```
222
290
 
291
+ Generate a new database revision with alembic
292
+ ---------------------------------------------
293
+
294
+ Modify the database models (`logdetective/server/database/model.py).
295
+
296
+ Generate a new database revision with the command:
297
+
298
+ **Warning**: this command will start up a new server
299
+ and shut it down when the operation completes.
300
+
301
+ ```
302
+ CHANGE="A change comment" make alembic-generate-revision
303
+ ```
223
304
 
224
305
  Our production instance
225
306
  -----------------------
@@ -0,0 +1,20 @@
1
+ logdetective/__init__.py,sha256=VqRngDcuFT7JWms8Qc_MsOvajoXVOKPr-S1kqY3Pqhc,59
2
+ logdetective/constants.py,sha256=SPSs1Bq6zPms3RsFTmsADwgrnFTn4fefNHzrB-M3RAE,1383
3
+ logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
4
+ logdetective/extractors.py,sha256=cjxndfJaQur54GXksIQXL7YTxkOng8I8UnQZMN2t5_w,3388
5
+ logdetective/logdetective.py,sha256=KN0KASW63VAnrjVeXK5AO0ob-vSexutTyeg1fd4uj70,4884
6
+ logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ logdetective/server/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ logdetective/server/database/base.py,sha256=oMJUvbWeapIUP-8Cf_DR9ptFg8CsYeaBAIjOVEzx8SM,1668
9
+ logdetective/server/database/models.py,sha256=8jW4k03Kny_3ld35214hcjYoJqlBvQIr4LH9mfQukXw,2750
10
+ logdetective/server/metric.py,sha256=VYMifrfIhcqgyu6YYN0c1nt8fC1iJ2_LCB7Bh2AheoE,2679
11
+ logdetective/server/models.py,sha256=f42yMMMMfTdTN4KWpPUfaEoaiE9rhqltA0dQNKGOB2w,5660
12
+ logdetective/server/server.py,sha256=lDdXO3s1larmHvuQDasvutEvcOpa3Rv5Cd_btyiqHdU,23118
13
+ logdetective/server/templates/gitlab_comment.md.j2,sha256=kheTkhQ-LfuFkr8av-Mw2a-9VYEUbDTLwaa-CKI6OkI,1622
14
+ logdetective/server/utils.py,sha256=OFvhttjv3yp8kfim5_s4mNG8ly21qyILxE0o3DcVVKg,1340
15
+ logdetective/utils.py,sha256=eudens1_T6iTtYhyzoYCpwuWgFHUMDSt6eWnrAB-mAI,6188
16
+ logdetective-0.5.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
17
+ logdetective-0.5.0.dist-info/METADATA,sha256=420Qn9rAheVSNDYmHjUYSB5AojmY58lUCNt3RNzwFC4,12714
18
+ logdetective-0.5.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
19
+ logdetective-0.5.0.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
20
+ logdetective-0.5.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.1
2
+ Generator: poetry-core 2.1.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,15 +0,0 @@
1
- logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- logdetective/constants.py,sha256=SPSs1Bq6zPms3RsFTmsADwgrnFTn4fefNHzrB-M3RAE,1383
3
- logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
4
- logdetective/extractors.py,sha256=cjxndfJaQur54GXksIQXL7YTxkOng8I8UnQZMN2t5_w,3388
5
- logdetective/logdetective.py,sha256=KN0KASW63VAnrjVeXK5AO0ob-vSexutTyeg1fd4uj70,4884
6
- logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- logdetective/server/models.py,sha256=9QURaw0u9yZKywXwHzv6_rS6XhRBA2UHV5u4b9xkWqc,5196
8
- logdetective/server/server.py,sha256=o2s4ezQE-a1XY7RFK0vLDFQO_wj9ZgG58SEV0hErLd8,18237
9
- logdetective/server/utils.py,sha256=osW5-VXxJAxRt7Wd3t1wF7PyW89FE9g4gSZLZCShlLc,1216
10
- logdetective/utils.py,sha256=59jq7F45Wk8pldzDt4gkh47Hny0T3fy1ggJFjSXDSGo,6148
11
- logdetective-0.3.3.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
12
- logdetective-0.3.3.dist-info/METADATA,sha256=cO2ZL03HeNe5lASpa-4Wea-SESxQgSVaoQh5ry_EYCY,10691
13
- logdetective-0.3.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
14
- logdetective-0.3.3.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
15
- logdetective-0.3.3.dist-info/RECORD,,