PyPI - logdetective - Versions diffs - 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl - Mend

logdetective 2.10.0py3-none-any.whl → 2.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

logdetective/server/config.py +1 -1
logdetective/server/emoji.py +46 -48
logdetective/server/gitlab.py +21 -8
logdetective/server/llm.py +38 -12
logdetective/server/models.py +66 -259
logdetective/server/server.py +199 -32
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/METADATA +2 -2
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/RECORD +11 -11
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/WHEEL +0 -0
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/entry_points.txt +0 -0
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/licenses/LICENSE +0 -0

logdetective/server/config.py CHANGED Viewed

@@ -15,7 +15,7 @@ def load_server_config(path: str | None) -> Config:
     if path is not None:
         try:
             with open(path, "r") as config_file:
-                return Config(yaml.safe_load(config_file))
+                return Config.model_validate(yaml.safe_load(config_file))
         except FileNotFoundError:
             # This is not an error, we will fall back to default
             print("Unable to find server config file, using default then.")

logdetective/server/emoji.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
-from typing import List, Callable
+from typing import List
 from collections import Counter
 import gitlab
@@ -49,25 +49,6 @@ async def collect_emojis_for_mr(
     await collect_emojis_in_comments(comments, gitlab_conn)
-async def _handle_gitlab_operation(func: Callable, *args):
-    """
-    It handles errors for the specified GitLab operation.
-    After executing it in a separate thread.
-    """
-    try:
-        return await asyncio.to_thread(func, *args)
-    except (gitlab.GitlabError, gitlab.GitlabGetError) as e:
-        log_msg = f"Error during GitLab operation {func}{args}: {e}"
-        if "Not Found" in str(e):
-            LOG.error(log_msg)
-        else:
-            LOG.exception(log_msg)
-    except Exception as e:  # pylint: disable=broad-exception-caught
-        LOG.exception(
-            "Unexpected error during GitLab operation %s(%s): %s", func, args, e
-        )
 async def collect_emojis_in_comments(  # pylint: disable=too-many-locals
     comments: List[Comments], gitlab_conn: gitlab.Gitlab
 ):
@@ -80,37 +61,54 @@ async def collect_emojis_in_comments(  # pylint: disable=too-many-locals
         mr_job_db = await GitlabMergeRequestJobs.get_by_id(comment.merge_request_job_id)
         if not mr_job_db:
             continue
-        if mr_job_db.id not in projects:
-            project = await _handle_gitlab_operation(
-                gitlab_conn.projects.get, mr_job_db.project_id
-            )
-            if not project:
-                continue
-            projects[mr_job_db.id] = project
-        else:
-            project = projects[mr_job_db.id]
-        merge_request_iid = mr_job_db.mr_iid
-        if merge_request_iid not in merge_requests:
-            merge_request = await _handle_gitlab_operation(
-                project.mergerequests.get, merge_request_iid
+        try:
+            if mr_job_db.id not in projects:
+                project = await asyncio.to_thread(
+                    gitlab_conn.projects.get, mr_job_db.project_id
+                )
+                projects[mr_job_db.id] = project
+            else:
+                project = projects[mr_job_db.id]
+            merge_request_iid = mr_job_db.mr_iid
+            if merge_request_iid not in merge_requests:
+                merge_request = await asyncio.to_thread(
+                    project.mergerequests.get, merge_request_iid
+                )
+                merge_requests[merge_request_iid] = merge_request
+            else:
+                merge_request = merge_requests[merge_request_iid]
+            discussion = await asyncio.to_thread(
+                merge_request.discussions.get, comment.comment_id
             )
-            if not merge_request:
-                continue
-            merge_requests[merge_request_iid] = merge_request
-        else:
-            merge_request = merge_requests[merge_request_iid]
-        discussion = await _handle_gitlab_operation(
-            merge_request.discussions.get, comment.comment_id
-        )
-        if not discussion:
-            continue
+            # Get the ID of the first note
+            if "notes" not in discussion.attributes or len(discussion.attributes["notes"]) == 0:
+                LOG.warning(
+                    "No notes were found in comment %s in merge request %d",
+                    comment.comment_id,
+                    merge_request_iid,
+                )
+                continue
-        # Get the ID of the first note
-        note_id = discussion.attributes["notes"][0]["id"]
-        note = await _handle_gitlab_operation(merge_request.notes.get, note_id)
-        if not note:
-            continue
+            note_id = discussion.attributes["notes"][0]["id"]
+            note = await asyncio.to_thread(merge_request.notes.get, note_id)
+        # Log warning with full stack trace, in case we can't find the right
+        # discussion, merge request or project.
+        # All of these objects can be lost, and we shouldn't treat as an error.
+        # Other exceptions are raised.
+        except gitlab.GitlabError as e:
+            if e.response_code == 404:
+                LOG.warning(
+                    "Couldn't retrieve emoji counts for comment %s due to GitlabError",
+                    comment.comment_id, exc_info=True)
+                continue
+            LOG.error("Error encountered while processing emoji counts for GitLab comment %s",
+                      comment.comment_id, exc_info=True)
+            raise
         emoji_counts = Counter(emoji.name for emoji in note.awardemojis.list())

logdetective/server/gitlab.py CHANGED Viewed

@@ -4,6 +4,7 @@ import zipfile
 from pathlib import Path, PurePath
 from tempfile import TemporaryFile
+from aiolimiter import AsyncLimiter
 from fastapi import HTTPException
 import gitlab
@@ -13,6 +14,7 @@ import jinja2
 import aiohttp
 import backoff
+from logdetective.extractors import Extractor
 from logdetective.server.config import SERVER_CONFIG, LOG
 from logdetective.server.exceptions import (
     LogsTooLargeError,
@@ -41,15 +43,20 @@ FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
 async def process_gitlab_job_event(
     gitlab_cfg: GitLabInstanceConfig,
+    gitlab_connection: gitlab.Gitlab,
+    http_session: aiohttp.ClientSession,
     forge: Forge,
     job_hook: JobHook,
-):  # pylint: disable=too-many-locals
+    async_request_limiter: AsyncLimiter,
+    extractors: list[Extractor],
+):  # pylint: disable=too-many-locals disable=too-many-arguments disable=too-many-positional-arguments
     """Handle a received job_event webhook from GitLab"""
     LOG.debug("Received webhook message from %s:\n%s", forge.value, job_hook)
     # Look up the project this job belongs to
-    gitlab_conn = gitlab_cfg.get_connection()
-    project = await asyncio.to_thread(gitlab_conn.projects.get, job_hook.project_id)
+    project = await asyncio.to_thread(
+        gitlab_connection.projects.get, job_hook.project_id
+    )
     LOG.info("Processing failed job for %s", project.name)
     # Retrieve data about the job from the GitLab API
@@ -94,7 +101,7 @@ async def process_gitlab_job_event(
     # Retrieve the build logs from the merge request artifacts and preprocess them
     try:
         log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(
-            gitlab_cfg, job
+            gitlab_cfg, job, http_session
         )
     except (LogsTooLargeError, LogDetectiveConnectionError) as ex:
         LOG.error("Could not retrieve logs due to %s", ex)
@@ -105,10 +112,14 @@ async def process_gitlab_job_event(
     metrics_id = await add_new_metrics(
         api_name=EndpointType.ANALYZE_GITLAB_JOB,
         url=log_url,
-        http_session=gitlab_cfg.get_http_session(),
+        http_session=http_session,
         compressed_log_content=RemoteLogCompressor.zip_text(log_text),
     )
-    staged_response = await perform_staged_analysis(log_text=log_text)
+    staged_response = await perform_staged_analysis(
+        log_text=log_text,
+        async_request_limiter=async_request_limiter,
+        extractors=extractors,
+    )
     await update_metrics(metrics_id, staged_response)
     preprocessed_log.close()
@@ -162,6 +173,7 @@ def is_eligible_package(project_name: str):
 async def retrieve_and_preprocess_koji_logs(
     gitlab_cfg: GitLabInstanceConfig,
     job: gitlab.v4.objects.ProjectJob,
+    http_session: aiohttp.ClientSession,
 ):  # pylint: disable=too-many-branches,too-many-locals
     """Download logs from the merge request artifacts
@@ -173,7 +185,7 @@ async def retrieve_and_preprocess_koji_logs(
     Detective. The calling function is responsible for closing this object."""
     # Make sure the file isn't too large to process.
-    if not await check_artifacts_file_size(gitlab_cfg, job):
+    if not await check_artifacts_file_size(gitlab_cfg, job, http_session):
         raise LogsTooLargeError(
             f"Oversized logs for job {job.id} in project {job.project_id}"
         )
@@ -274,6 +286,7 @@ async def retrieve_and_preprocess_koji_logs(
 async def check_artifacts_file_size(
     gitlab_cfg: GitLabInstanceConfig,
     job: gitlab.v4.objects.ProjectJob,
+    http_session: aiohttp.ClientSession,
 ):
     """Method to determine if the artifacts are too large to process"""
     # First, make sure that the artifacts are of a reasonable size. The
@@ -285,7 +298,7 @@ async def check_artifacts_file_size(
     )
     LOG.debug("checking artifact URL %s%s", gitlab_cfg.url, artifacts_path)
     try:
-        head_response = await gitlab_cfg.get_http_session().head(
+        head_response = await http_session.head(
             artifacts_path,
             allow_redirects=True,
             raise_for_status=True,

logdetective/server/llm.py CHANGED Viewed

@@ -9,9 +9,11 @@ from fastapi import HTTPException
 from pydantic import ValidationError
 import aiohttp
+from aiolimiter import AsyncLimiter
 from openai import AsyncStream
 from openai.types.chat import ChatCompletionChunk
+from logdetective.extractors import Extractor
 from logdetective.utils import (
     compute_certainty,
     prompt_to_messages,
@@ -41,7 +43,6 @@ from logdetective.server.utils import (
     construct_final_prompt,
 )
 LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
@@ -57,6 +58,7 @@ LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
 async def call_llm(
     messages: List[Dict[str, str]],
     inference_cfg: InferenceConfig,
+    async_request_limiter: AsyncLimiter,
     stream: bool = False,
     structured_output: dict | None = None,
 ) -> Explanation:
@@ -87,7 +89,7 @@ async def call_llm(
         }
         kwargs["response_format"] = response_format
-    async with inference_cfg.get_limiter():
+    async with async_request_limiter:
         response = await CLIENT.chat.completions.create(
             messages=messages,
             max_tokens=inference_cfg.max_tokens,
@@ -126,6 +128,7 @@ async def call_llm(
 async def call_llm_stream(
     messages: List[Dict[str, str]],
     inference_cfg: InferenceConfig,
+    async_request_limiter: AsyncLimiter,
     stream: bool = False,
 ) -> AsyncStream[ChatCompletionChunk]:
     """Submit prompt to LLM and recieve stream of tokens as a result.
@@ -136,7 +139,7 @@ async def call_llm_stream(
     LOG.info("Submitting to /v1/chat/completions endpoint")
-    async with inference_cfg.get_limiter():
+    async with async_request_limiter:
         response = await CLIENT.chat.completions.create(
             messages=messages,
             max_tokens=inference_cfg.max_tokens,
@@ -150,7 +153,9 @@ async def call_llm_stream(
 async def analyze_snippets(
-    log_summary: List[Tuple[int, str]], structured_output: dict | None = None
+    log_summary: List[Tuple[int, str]],
+    async_request_limiter: AsyncLimiter,
+    structured_output: dict | None = None,
 ) -> List[SnippetAnalysis | RatedSnippetAnalysis]:
     """Submit log file snippets to the LLM and gather results"""
     # Process snippets asynchronously
@@ -162,6 +167,7 @@ async def analyze_snippets(
                 SERVER_CONFIG.inference.system_role,
                 SERVER_CONFIG.inference.user_role,
             ),
+            async_request_limiter=async_request_limiter,
             inference_cfg=SERVER_CONFIG.snippet_inference,
             structured_output=structured_output,
         )
@@ -184,9 +190,13 @@ async def analyze_snippets(
     return analyzed_snippets
-async def perfrom_analysis(log_text: str) -> Response:
+async def perform_analysis(
+    log_text: str,
+    async_request_limiter: AsyncLimiter,
+    extractors: List[Extractor],
+) -> Response:
     """Sumbit log file snippets in aggregate to LLM and retrieve results"""
-    log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
+    log_summary = mine_logs(log_text, extractors)
     log_summary = format_snippets(log_summary)
     final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
@@ -199,6 +209,7 @@ async def perfrom_analysis(log_text: str) -> Response:
     )
     response = await call_llm(
         messages,
+        async_request_limiter=async_request_limiter,
         inference_cfg=SERVER_CONFIG.inference,
     )
     certainty = 0
@@ -216,9 +227,13 @@ async def perfrom_analysis(log_text: str) -> Response:
     return Response(explanation=response, response_certainty=certainty)
-async def perform_analyis_stream(log_text: str) -> AsyncStream:
+async def perform_analysis_stream(
+    log_text: str,
+    async_request_limiter: AsyncLimiter,
+    extractors: List[Extractor],
+) -> AsyncStream:
     """Submit log file snippets in aggregate and return a stream of tokens"""
-    log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
+    log_summary = mine_logs(log_text, extractors)
     log_summary = format_snippets(log_summary)
     final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
@@ -232,6 +247,7 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
     stream = call_llm_stream(
         messages,
+        async_request_limiter=async_request_limiter,
         inference_cfg=SERVER_CONFIG.inference,
     )
@@ -241,13 +257,18 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
     return stream
-async def perform_staged_analysis(log_text: str) -> StagedResponse:
+async def perform_staged_analysis(
+    log_text: str,
+    async_request_limiter: AsyncLimiter,
+    extractors: List[Extractor],
+) -> StagedResponse:
     """Submit the log file snippets to the LLM and retrieve their results"""
-    log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
+    log_summary = mine_logs(log_text, extractors)
     start = time.time()
     if SERVER_CONFIG.general.top_k_snippets:
         rated_snippets = await analyze_snippets(
             log_summary=log_summary,
+            async_request_limiter=async_request_limiter,
             structured_output=RatedSnippetAnalysis.model_json_schema(),
         )
@@ -266,7 +287,9 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
             len(rated_snippets),
         )
     else:
-        processed_snippets = await analyze_snippets(log_summary=log_summary)
+        processed_snippets = await analyze_snippets(
+            log_summary=log_summary, async_request_limiter=async_request_limiter
+        )
         # Extract original text and line number from `log_summary`
         processed_snippets = [
@@ -276,7 +299,9 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
     delta = time.time() - start
     LOG.info("Snippet analysis performed in %f s", delta)
     log_summary = format_analyzed_snippets(processed_snippets)
-    final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template_staged)
+    final_prompt = construct_final_prompt(
+        log_summary, PROMPT_CONFIG.prompt_template_staged
+    )
     messages = prompt_to_messages(
         final_prompt,
@@ -286,6 +311,7 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
     )
     final_analysis = await call_llm(
         messages,
+        async_request_limiter=async_request_limiter,
         inference_cfg=SERVER_CONFIG.inference,
     )

logdetective 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl

logdetective 2.10.0py3-none-any.whl → 2.12.0py3-none-any.whl