logdetective 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: logdetective
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -21,7 +21,7 @@ class JobHook(BaseModel):
21
21
 
22
22
  # The identifier of the job. We only care about 'build_rpm' and
23
23
  # 'build_centos_stream_rpm' jobs.
24
- build_name: str = Field(pattern=r"^build(_.*)?_rpm$")
24
+ build_name: str = Field(pattern=r"^build.*rpm$")
25
25
 
26
26
  # A string representing the job status. We only care about 'failed' jobs.
27
27
  build_status: str = Field(pattern=r"^failed$")
@@ -90,6 +90,8 @@ class InferenceConfig(BaseModel):
90
90
  api_endpoint: Optional[Literal["/chat/completions", "/completions"]] = (
91
91
  "/chat/completions"
92
92
  )
93
+ url: str = ""
94
+ api_token: str = ""
93
95
 
94
96
  def __init__(self, data: Optional[dict] = None):
95
97
  super().__init__()
@@ -99,6 +101,8 @@ class InferenceConfig(BaseModel):
99
101
  self.max_tokens = data.get("max_tokens", -1)
100
102
  self.log_probs = data.get("log_probs", 1)
101
103
  self.api_endpoint = data.get("api_endpoint", "/chat/completions")
104
+ self.url = data.get("url", "")
105
+ self.api_token = data.get("api_token", "")
102
106
 
103
107
 
104
108
  class ExtractorConfig(BaseModel):
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import re
5
5
  import zipfile
6
- from pathlib import PurePath
6
+ from pathlib import Path, PurePath
7
7
  from tempfile import TemporaryFile
8
8
  from typing import List, Annotated, Tuple, Dict, Any
9
9
 
@@ -13,6 +13,9 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header
13
13
  from fastapi.responses import StreamingResponse
14
14
  from fastapi.responses import Response as BasicResponse
15
15
  import gitlab
16
+ import gitlab.v4
17
+ import gitlab.v4.objects
18
+ import jinja2
16
19
  import requests
17
20
 
18
21
  from logdetective.constants import (
@@ -38,14 +41,10 @@ from logdetective.server.models import (
38
41
  AnalyzedSnippet,
39
42
  )
40
43
 
41
- LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
42
- LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
43
- LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
44
44
  LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
45
45
  LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
46
46
  API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
47
47
  SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
48
- LLM_API_TOKEN = os.environ.get("LLM_API_TOKEN", None)
49
48
 
50
49
  SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
51
50
 
@@ -190,8 +189,8 @@ async def submit_text( # pylint: disable=R0913,R0917
190
189
 
191
190
  headers = {"Content-Type": "application/json"}
192
191
 
193
- if LLM_API_TOKEN:
194
- headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
192
+ if SERVER_CONFIG.inference.api_token:
193
+ headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
195
194
 
196
195
  if api_endpoint == "/chat/completions":
197
196
  return await submit_text_chat_completions(
@@ -224,7 +223,7 @@ async def submit_text_completions( # pylint: disable=R0913,R0917
224
223
  }
225
224
 
226
225
  response = await submit_to_llm_endpoint(
227
- f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
226
+ f"{SERVER_CONFIG.inference.url}/v1/completions",
228
227
  data,
229
228
  headers,
230
229
  stream,
@@ -263,7 +262,7 @@ async def submit_text_chat_completions( # pylint: disable=R0913,R0917
263
262
  }
264
263
 
265
264
  response = await submit_to_llm_endpoint(
266
- f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/chat/completions",
265
+ f"{SERVER_CONFIG.inference.url}/v1/chat/completions",
267
266
  data,
268
267
  headers,
269
268
  stream,
@@ -305,8 +304,7 @@ async def analyze_log(build_log: BuildLog):
305
304
  LOG.error("Error encountered while computing certainty: %s", ex)
306
305
  raise HTTPException(
307
306
  status_code=400,
308
- detail=f"Couldn't compute certainty with data:\n"
309
- f"{response.logprobs}",
307
+ detail=f"Couldn't compute certainty with data:\n{response.logprobs}",
310
308
  ) from ex
311
309
 
312
310
  return Response(explanation=response, response_certainty=certainty)
@@ -322,6 +320,12 @@ async def analyze_log_staged(build_log: BuildLog):
322
320
  while lacking result, params or query fields.
323
321
  """
324
322
  log_text = process_url(build_log.url)
323
+
324
+ return await perform_staged_analysis(log_text=log_text)
325
+
326
+
327
+ async def perform_staged_analysis(log_text: str) -> StagedResponse:
328
+ """Submit the log file snippets to the LLM and retrieve their results"""
325
329
  log_summary = mine_logs(log_text)
326
330
 
327
331
  # Process snippets asynchronously
@@ -381,8 +385,8 @@ async def analyze_log_stream(build_log: BuildLog):
381
385
  log_summary = format_snippets(log_summary)
382
386
  headers = {"Content-Type": "application/json"}
383
387
 
384
- if LLM_API_TOKEN:
385
- headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
388
+ if SERVER_CONFIG.inference.api_token:
389
+ headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
386
390
 
387
391
  stream = await submit_text_chat_completions(
388
392
  PROMPT_TEMPLATE.format(log_summary), stream=True, headers=headers
@@ -423,6 +427,11 @@ async def process_gitlab_job_event(job_hook):
423
427
  # Retrieve data about the job from the GitLab API
424
428
  job = await asyncio.to_thread(project.jobs.get, job_hook.build_id)
425
429
 
430
+ # For easy retrieval later, we'll add project_name and project_url to the
431
+ # job object
432
+ job.project_name = project.name
433
+ job.project_url = project.web_url
434
+
426
435
  # Retrieve the pipeline that started this job
427
436
  pipeline = await asyncio.to_thread(project.pipelines.get, job_hook.pipeline_id)
428
437
 
@@ -438,37 +447,38 @@ async def process_gitlab_job_event(job_hook):
438
447
  "Pipeline source is merge_request_event but no merge request ID was provided."
439
448
  )
440
449
  return
441
- merge_request_id = int(match.group(1))
450
+ merge_request_iid = int(match.group(1))
442
451
 
443
452
  LOG.debug("Retrieving log artifacts")
444
453
  # Retrieve the build logs from the merge request artifacts and preprocess them
445
454
  try:
446
- preprocessed_log = await retrieve_and_preprocess_koji_logs(job)
455
+ log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(job)
447
456
  except LogsTooLargeError:
448
457
  LOG.error("Could not retrieve logs. Too large.")
449
458
  raise
450
459
 
451
460
  # Submit log to Log Detective and await the results.
452
- response = await submit_log_to_llm(preprocessed_log)
461
+ log_text = preprocessed_log.read().decode(encoding="utf-8")
462
+ staged_response = await perform_staged_analysis(log_text=log_text)
453
463
  preprocessed_log.close()
454
464
 
455
465
  # Add the Log Detective response as a comment to the merge request
456
- await comment_on_mr(merge_request_id, response)
466
+ await comment_on_mr(project, merge_request_iid, job, log_url, staged_response)
457
467
 
458
468
 
459
469
  class LogsTooLargeError(RuntimeError):
460
470
  """The log archive exceeds the configured maximum size"""
461
471
 
462
472
 
463
- async def retrieve_and_preprocess_koji_logs(job):
473
+ async def retrieve_and_preprocess_koji_logs(job: gitlab.v4.objects.ProjectJob):
464
474
  """Download logs from the merge request artifacts
465
475
 
466
476
  This function will retrieve the build logs and do some minimal
467
477
  preprocessing to determine which log is relevant for analysis.
468
478
 
469
- returns: An open, file-like object containing the log contents to be sent
470
- for processing by Log Detective. The calling function is responsible for
471
- closing this object."""
479
+ returns: The URL pointing to the selected log file and an open, file-like
480
+ object containing the log contents to be sent for processing by Log
481
+ Detective. The calling function is responsible for closing this object."""
472
482
 
473
483
  # Make sure the file isn't too large to process.
474
484
  if not await check_artifacts_file_size(job):
@@ -551,11 +561,13 @@ async def retrieve_and_preprocess_koji_logs(job):
551
561
 
552
562
  LOG.debug("Failed architecture: %s", failed_arch)
553
563
 
554
- log_path = failed_arches[failed_arch]
555
- LOG.debug("Returning contents of %s", log_path)
564
+ log_path = failed_arches[failed_arch].as_posix()
565
+
566
+ log_url = f"{SERVER_CONFIG.gitlab.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts/{log_path}" # pylint: disable=line-too-long
567
+ LOG.debug("Returning contents of %s", log_url)
556
568
 
557
569
  # Return the log as a file-like object with .read() function
558
- return artifacts_zip.open(log_path.as_posix())
570
+ return log_url, artifacts_zip.open(log_path)
559
571
 
560
572
 
561
573
  async def check_artifacts_file_size(job):
@@ -582,15 +594,62 @@ async def check_artifacts_file_size(job):
582
594
  return content_length <= SERVER_CONFIG.gitlab.max_artifact_size
583
595
 
584
596
 
585
- async def submit_log_to_llm(log):
586
- """Stream the log to the LLM for processing"""
587
- # TODO: query the LLM with the log contents # pylint: disable=fixme
588
- # This function will be implemented later; right now it does nothing.
589
- LOG.debug("Log contents:\n%s", log.read())
590
- return ""
597
+ async def comment_on_mr(
598
+ project: gitlab.v4.objects.Project,
599
+ merge_request_iid: int,
600
+ job: gitlab.v4.objects.ProjectJob,
601
+ log_url: str,
602
+ response: StagedResponse,
603
+ ):
604
+ """Add the Log Detective response as a comment to the merge request"""
605
+ LOG.debug(
606
+ "Primary Explanation for %s MR %d: %s",
607
+ project.name,
608
+ merge_request_iid,
609
+ response.explanation.text,
610
+ )
591
611
 
612
+ # Get the formatted comment.
613
+ comment = await generate_mr_comment(job, log_url, response)
592
614
 
593
- async def comment_on_mr(merge_request_id: int, response: str): # pylint: disable=unused-argument
594
- """Add the Log Detective response as a comment to the merge request"""
595
- # TODO: Implement this # pylint: disable=fixme
596
- pass # pylint: disable=unnecessary-pass
615
+ # Look up the merge request
616
+ merge_request = await asyncio.to_thread(
617
+ project.mergerequests.get, merge_request_iid
618
+ )
619
+
620
+ # Submit a new comment to the Merge Request using the Gitlab API
621
+ await asyncio.to_thread(merge_request.discussions.create, {"body": comment})
622
+
623
+
624
+ async def generate_mr_comment(
625
+ job: gitlab.v4.objects.ProjectJob, log_url: str, response: StagedResponse
626
+ ) -> str:
627
+ """Use a template to generate a comment string to submit to Gitlab"""
628
+
629
+ # Locate and load the comment template
630
+ script_path = Path(__file__).resolve().parent
631
+ template_path = Path(script_path, "templates")
632
+ jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_path))
633
+ tpl = jinja_env.get_template("gitlab_comment.md.j2")
634
+
635
+ artifacts_url = f"{job.project_url}/-/jobs/{job.id}/artifacts/download"
636
+
637
+ if response.response_certainty >= 90:
638
+ emoji_face = ":slight_smile:"
639
+ elif response.response_certainty >= 70:
640
+ emoji_face = ":neutral_face:"
641
+ else:
642
+ emoji_face = ":frowning2:"
643
+
644
+ # Generate the comment from the template
645
+ content = tpl.render(
646
+ package=job.project_name,
647
+ explanation=response.explanation.text,
648
+ certainty=f"{response.response_certainty:.2f}",
649
+ emoji_face=emoji_face,
650
+ snippets=response.snippets,
651
+ log_url=log_url,
652
+ artifacts_url=artifacts_url,
653
+ )
654
+
655
+ return content
@@ -0,0 +1,66 @@
1
+ The package {{ package }} failed to build, here is a possible explanation why.
2
+
3
+ Please know that the explanation was provided by AI and may be incorrect.
4
+ In this case, we are {{ certainty }}% certain of the response {{ emoji_face }}.
5
+
6
+ {{ explanation }}
7
+
8
+ <details>
9
+ <ul>
10
+ {% for snippet in snippets %}
11
+ <li>
12
+ <code>
13
+ Line {{ snippet.line_number }}: {{ snippet.text }}
14
+ </code>
15
+ {{ snippet.explanation }}
16
+ </li>
17
+ {% endfor %}
18
+ </ul>
19
+ </details>
20
+
21
+ <details>
22
+ <summary>Logs</summary>
23
+ <p>
24
+ Log Detective analyzed the following logs files to provide an explanation:
25
+ </p>
26
+
27
+ <ul>
28
+ <li><a href="{{ log_url }}">{{ log_url }}</a></li>
29
+ </ul>
30
+
31
+ <p>
32
+ Additional logs are available from:
33
+ <ul>
34
+ <li><a href="{{ artifacts_url }}">artifacts.zip</a></li>
35
+ </ul>
36
+ </p>
37
+
38
+ <p>
39
+ Please know that these log files are automatically removed after some
40
+ time, so you might need a backup.
41
+ </p>
42
+ </details>
43
+
44
+ <details>
45
+ <summary>Help</summary>
46
+ <p>Don't hesitate to reach out.</p>
47
+
48
+ <ul>
49
+ <li><a href="https://github.com/fedora-copr/logdetective">Upstream</a></li>
50
+ <li><a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a></li>
51
+ <li><a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a></li>
52
+ <li><a href="https://log-detective.com/documentation">Documentation</a></li>
53
+ </ul>
54
+ </details>
55
+
56
+
57
+ ---
58
+ This comment was created by [Log Detective][log-detective].
59
+
60
+ Was the provided feedback accurate and helpful? <br>Please vote with :thumbsup:
61
+ or :thumbsdown: to help us improve.<br>
62
+
63
+
64
+
65
+ [log-detective]: https://log-detective.com/
66
+ [contact]: https://github.com/fedora-copr
@@ -12,7 +12,8 @@ def load_server_config(path: str | None) -> Config:
12
12
  with open(path, "r") as config_file:
13
13
  return Config(yaml.safe_load(config_file))
14
14
  except FileNotFoundError:
15
- pass
15
+ # This is not an error, we will fall back to default
16
+ print("Unable to find server config file, using default then.")
16
17
  return Config()
17
18
 
18
19
 
@@ -1,11 +1,14 @@
1
1
  [tool.poetry]
2
2
  name = "logdetective"
3
- version = "0.4.0"
3
+ version = "0.5.0"
4
4
  description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
5
5
  authors = ["Jiri Podivin <jpodivin@gmail.com>"]
6
6
  license = "Apache-2.0"
7
7
  readme = "README.md"
8
- include = ["logdetective/drain3.ini"]
8
+ include = [
9
+ "logdetective/drain3.ini",
10
+ "logdetective/server/templates/gitlab_comment.md.j2",
11
+ ]
9
12
  packages = [
10
13
  { include = "logdetective" }
11
14
  ]
File without changes
File without changes