PyPI - logdetective - Versions diffs - 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

logdetective 1.0.1py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

logdetective/prompts-summary-first.yml +22 -0
logdetective/prompts-summary-only.yml +13 -0
logdetective/server/emoji.py +33 -5
logdetective/server/gitlab.py +45 -39
logdetective/server/llm.py +39 -93
logdetective/server/models.py +109 -12
logdetective/server/server.py +37 -30
{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/METADATA +5 -1
{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/RECORD +12 -10
{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/LICENSE +0 -0
{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/WHEEL +0 -0
{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/entry_points.txt +0 -0

logdetective/prompts-summary-first.yml ADDED Viewed

@@ -0,0 +1,22 @@
+# This file is intended for customization of prompts
+# It is used only in server mode.
+# On command line you have to load it using --prompts
+# The defaults are stored in constants.py
+prompt_template: |
+  Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
+  Please start with concise, one sentence long, summary describing the problem and recommend solution to fix it. And then follow with analysis.
+  Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
+  Snippets themselves must not be altered in any way whatsoever.
+  Snippets are delimited with '================'.
+  Explanation of the issue, and recommended solution, should take handful of sentences.
+  Snippets:
+  {}
+  Analysis:

logdetective/prompts-summary-only.yml ADDED Viewed

@@ -0,0 +1,13 @@
+# This file is intended for customization of prompts
+# It is used only in server mode.
+# On command line you have to load it using --prompts
+# The defaults are stored in constants.py
+prompt_template: |
+  Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
+  Provide concise, one paragraph long, summary describing the problem of most probable culprit and recommend solution to fix it.
+  Snippets:
+  {}

logdetective/server/emoji.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
-from typing import List
+from typing import List, Callable
 from collections import Counter
 import gitlab
@@ -11,6 +11,7 @@ from logdetective.server.database.models import (
     Reactions,
     GitlabMergeRequestJobs,
 )
+from logdetective.server.config import LOG
 async def collect_emojis(gitlab_conn: gitlab.Gitlab, period: TimePeriod):
@@ -36,6 +37,23 @@ async def collect_emojis_for_mr(
     await collect_emojis_in_comments(comments, gitlab_conn)
+async def _handle_gitlab_operation(func: Callable, *args):
+    """
+    It handles errors for the specified GitLab operation.
+    After executing it in a separate thread.
+    """
+    try:
+        return await asyncio.to_thread(func, *args)
+    except gitlab.GitlabError as e:
+        log_msg = f"Error during GitLab operation {func}{args}: {e}"
+        if "Not Found" in str(e):
+            LOG.error(log_msg)
+        else:
+            LOG.exception(log_msg)
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        LOG.exception("Unexpected error during GitLab operation %s(%s): %s", func, args, e)
 async def collect_emojis_in_comments(  # pylint: disable=too-many-locals
     comments: List[Comments], gitlab_conn: gitlab.Gitlab
 ):
@@ -47,24 +65,34 @@ async def collect_emojis_in_comments(  # pylint: disable=too-many-locals
     for comment in comments:
         mr_job_db = GitlabMergeRequestJobs.get_by_id(comment.merge_request_job_id)
         if mr_job_db.id not in projects:
-            projects[mr_job_db.id] = project = await asyncio.to_thread(
+            projects[mr_job_db.id] = project = await _handle_gitlab_operation(
                 gitlab_conn.projects.get, mr_job_db.project_id
             )
+            if not project:
+                continue
         else:
             project = projects[mr_job_db.id]
         mr_iid = mr_job_db.mr_iid
         if mr_iid not in mrs:
-            mrs[mr_iid] = mr = await asyncio.to_thread(
+            mrs[mr_iid] = mr = await _handle_gitlab_operation(
                 project.mergerequests.get, mr_iid
             )
+            if not mr:
+                continue
         else:
             mr = mrs[mr_iid]
-        discussion = mr.discussions.get(comment.comment_id)
+        discussion = await _handle_gitlab_operation(
+            mr.discussions.get, comment.comment_id
+        )
+        if not discussion:
+            continue
         # Get the ID of the first note
         note_id = discussion.attributes["notes"][0]["id"]
-        note = mr.notes.get(note_id)
+        note = await _handle_gitlab_operation(mr.notes.get, note_id)
+        if not note:
+            continue
         emoji_counts = Counter(emoji.name for emoji in note.awardemojis.list())

logdetective/server/gitlab.py CHANGED Viewed

@@ -11,7 +11,6 @@ import gitlab.v4
 import gitlab.v4.objects
 import jinja2
 import aiohttp
-import sqlalchemy
 from logdetective.server.config import SERVER_CONFIG, LOG
 from logdetective.server.llm import perform_staged_analysis
@@ -22,10 +21,11 @@ from logdetective.server.models import (
     StagedResponse,
 )
 from logdetective.server.database.models import (
+    AnalyzeRequestMetrics,
     Comments,
     EndpointType,
     Forge,
-    AnalyzeRequestMetrics,
+    GitlabMergeRequestJobs,
 )
 from logdetective.server.compressors import RemoteLogCompressor
@@ -34,7 +34,6 @@ FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
 async def process_gitlab_job_event(
-    http: aiohttp.ClientSession,
     gitlab_cfg: GitLabInstanceConfig,
     forge: Forge,
     job_hook: JobHook,
@@ -63,19 +62,34 @@ async def process_gitlab_job_event(
         LOG.info("Not a merge request pipeline. Ignoring.")
         return
-    # Extract the merge-request ID from the job
+    # Extract the merge-request IID from the job
     match = MR_REGEX.search(pipeline.ref)
     if not match:
         LOG.error(
-            "Pipeline source is merge_request_event but no merge request ID was provided."
+            "Pipeline source is merge_request_event but no merge request IID was provided."
         )
         return
     merge_request_iid = int(match.group(1))
+    # Check if this is a resubmission of an existing, completed job.
+    # If it is, we'll exit out here and not waste time retrieving the logs,
+    # running a new analysis or trying to submit a new comment.
+    mr_job_db = GitlabMergeRequestJobs.get_by_details(
+        forge=forge,
+        project_id=project.id,
+        mr_iid=merge_request_iid,
+        job_id=job_hook.build_id,
+    )
+    if mr_job_db:
+        LOG.info("Resubmission of an existing build. Skipping.")
+        return
     LOG.debug("Retrieving log artifacts")
     # Retrieve the build logs from the merge request artifacts and preprocess them
     try:
-        log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(gitlab_cfg, http, job)
+        log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(
+            gitlab_cfg, job
+        )
     except LogsTooLargeError:
         LOG.error("Could not retrieve logs. Too large.")
         raise
@@ -85,10 +99,10 @@ async def process_gitlab_job_event(
     metrics_id = await add_new_metrics(
         api_name=EndpointType.ANALYZE_GITLAB_JOB,
         url=log_url,
-        http_session=http,
+        http_session=gitlab_cfg.get_http_session(),
         compressed_log_content=RemoteLogCompressor.zip_text(log_text),
     )
-    staged_response = await perform_staged_analysis(http, log_text=log_text)
+    staged_response = await perform_staged_analysis(log_text=log_text)
     update_metrics(metrics_id, staged_response)
     preprocessed_log.close()
@@ -142,8 +156,7 @@ class LogsTooLargeError(RuntimeError):
 async def retrieve_and_preprocess_koji_logs(
     gitlab_cfg: GitLabInstanceConfig,
-    http: aiohttp.ClientSession,
-    job: gitlab.v4.objects.ProjectJob
+    job: gitlab.v4.objects.ProjectJob,
 ):  # pylint: disable=too-many-branches,too-many-locals
     """Download logs from the merge request artifacts
@@ -155,7 +168,7 @@ async def retrieve_and_preprocess_koji_logs(
     Detective. The calling function is responsible for closing this object."""
     # Make sure the file isn't too large to process.
-    if not await check_artifacts_file_size(gitlab_cfg, http, job):
+    if not await check_artifacts_file_size(gitlab_cfg, job):
         raise LogsTooLargeError(
             f"Oversized logs for job {job.id} in project {job.project_id}"
         )
@@ -201,7 +214,9 @@ async def retrieve_and_preprocess_koji_logs(
                 match = FAILURE_LOG_REGEX.search(contents)
                 if match:
                     failure_log_name = match.group(1)
-                    failed_arches[architecture] = PurePath(path.parent, failure_log_name)
+                    failed_arches[architecture] = PurePath(
+                        path.parent, failure_log_name
+                    )
                 else:
                     LOG.info(
                         "task_failed.log does not indicate which log contains the failure."
@@ -243,8 +258,8 @@ async def retrieve_and_preprocess_koji_logs(
     log_path = failed_arches[failed_arch].as_posix()
-    log_url = f"{gitlab_cfg.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts/{log_path}"  # pylint: disable=line-too-long
-    LOG.debug("Returning contents of %s", log_url)
+    log_url = f"{gitlab_cfg.api_path}/projects/{job.project_id}/jobs/{job.id}/artifacts/{log_path}"  # pylint: disable=line-too-long
+    LOG.debug("Returning contents of %s%s", gitlab_cfg.url, log_url)
     # Return the log as a file-like object with .read() function
     return log_url, artifacts_zip.open(log_path)
@@ -252,7 +267,6 @@ async def retrieve_and_preprocess_koji_logs(
 async def check_artifacts_file_size(
     gitlab_cfg: GitLabInstanceConfig,
-    http: aiohttp.ClientSession,
     job: gitlab.v4.objects.ProjectJob,
 ):
     """Method to determine if the artifacts are too large to process"""
@@ -260,14 +274,14 @@ async def check_artifacts_file_size(
     # zipped artifact collection will be stored in memory below. The
     # python-gitlab library doesn't expose a way to check this value directly,
     # so we need to interact with directly with the headers.
-    artifacts_url = f"{gitlab_cfg.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts"  # pylint: disable=line-too-long
-    LOG.debug("checking artifact URL %s", artifacts_url)
+    artifacts_path = (
+        f"{gitlab_cfg.api_path}/projects/{job.project_id}/jobs/{job.id}/artifacts"
+    )
+    LOG.debug("checking artifact URL %s%s", gitlab_cfg.url, artifacts_path)
     try:
-        head_response = await http.head(
-            artifacts_url,
+        head_response = await gitlab_cfg.get_http_session().head(
+            artifacts_path,
             allow_redirects=True,
-            headers={"Authorization": f"Bearer {gitlab_cfg.api_token}"},
-            timeout=5,
             raise_for_status=True,
         )
     except aiohttp.ClientResponseError as ex:
@@ -278,7 +292,7 @@ async def check_artifacts_file_size(
     content_length = int(head_response.headers.get("content-length"))
     LOG.debug(
         "URL: %s, content-length: %d, max length: %d",
-        artifacts_url,
+        artifacts_path,
         content_length,
         gitlab_cfg.max_artifact_size,
     )
@@ -337,23 +351,15 @@ async def comment_on_mr(  # pylint: disable=too-many-arguments disable=too-many-
     await asyncio.to_thread(note.save)
     # Save the new comment to the database
-    try:
-        metrics = AnalyzeRequestMetrics.get_metric_by_id(metrics_id)
-        Comments.create(
-            forge,
-            project.id,
-            merge_request_iid,
-            job.id,
-            discussion.id,
-            metrics,
-        )
-    except sqlalchemy.exc.IntegrityError:
-        # We most likely attempted to save a new comment for the same
-        # build job. This is somewhat common during development when we're
-        # submitting requests manually. It shouldn't really happen in
-        # production.
-        if not SERVER_CONFIG.general.devmode:
-            raise
+    metrics = AnalyzeRequestMetrics.get_metric_by_id(metrics_id)
+    Comments.create(
+        forge,
+        project.id,
+        merge_request_iid,
+        job.id,
+        discussion.id,
+        metrics,
+    )
 async def suppress_latest_comment(

logdetective/server/llm.py CHANGED Viewed

@@ -17,9 +17,10 @@ from logdetective.utils import (
 )
 from logdetective.server.config import LOG, SERVER_CONFIG, PROMPT_CONFIG
 from logdetective.server.models import (
-    StagedResponse,
-    Explanation,
     AnalyzedSnippet,
+    InferenceConfig,
+    Explanation,
+    StagedResponse,
 )
@@ -54,24 +55,33 @@ def mine_logs(log: str) -> List[Tuple[int, str]]:
 async def submit_to_llm_endpoint(
-    http: aiohttp.ClientSession,
-    url: str,
+    url_path: str,
     data: Dict[str, Any],
     headers: Dict[str, str],
     stream: bool,
+    inference_cfg: InferenceConfig = SERVER_CONFIG.inference,
 ) -> Any:
-    """Send request to selected API endpoint. Verifying successful request unless
+    """Send request to an API endpoint. Verifying successful request unless
     the using the stream response.
-    url:
+    url_path: The endpoint path to query. (e.g. "/v1/chat/completions"). It should
+    not include the scheme and netloc of the URL, which is stored in the
+    InferenceConfig.
     data:
     headers:
     stream:
+    inference_cfg: An InferenceConfig object containing the URL, max_tokens
+    and other relevant configuration for talking to an inference server.
     """
-    async with SERVER_CONFIG.inference.get_limiter():
-        LOG.debug("async request %s headers=%s data=%s", url, headers, data)
-        response = await http.post(
-            url,
+    async with inference_cfg.get_limiter():
+        LOG.debug("async request %s headers=%s data=%s", url_path, headers, data)
+        session = inference_cfg.get_http_session()
+        if inference_cfg.api_token:
+            headers["Authorization"] = f"Bearer {inference_cfg.api_token}"
+        response = await session.post(
+            url_path,
             headers=headers,
             # we need to use the `json=` parameter here and let aiohttp
             # handle the json-encoding
@@ -88,7 +98,9 @@ async def submit_to_llm_endpoint(
         try:
             return json.loads(await response.text())
         except UnicodeDecodeError as ex:
-            LOG.error("Error encountered while parsing llama server response: %s", ex)
+            LOG.error(
+                "Error encountered while parsing llama server response: %s", ex
+            )
             raise HTTPException(
                 status_code=400,
                 detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
@@ -125,16 +137,14 @@ def we_give_up(details: backoff._typing.Details):
     raise_on_giveup=False,
     on_giveup=we_give_up,
 )
-async def submit_text(  # pylint: disable=R0913,R0917
-    http: aiohttp.ClientSession,
+async def submit_text(
     text: str,
-    max_tokens: int = -1,
-    log_probs: int = 1,
+    inference_cfg: InferenceConfig,
     stream: bool = False,
-    model: str = "default-model",
-) -> Explanation:
-    """Submit prompt to LLM using a selected endpoint.
-    max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
+) -> Union[Explanation, StreamReader]:
+    """Submit prompt to LLM.
+    inference_cfg: The configuration section from the config.json representing
+    the relevant inference server for this request.
     log_probs: number of token choices to produce log probs for
     """
     LOG.info("Analyzing the text")
@@ -144,64 +154,6 @@ async def submit_text(  # pylint: disable=R0913,R0917
     if SERVER_CONFIG.inference.api_token:
         headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
-    if SERVER_CONFIG.inference.api_endpoint == "/chat/completions":
-        return await submit_text_chat_completions(
-            http, text, headers, max_tokens, log_probs > 0, stream, model
-        )
-    return await submit_text_completions(
-        http, text, headers, max_tokens, log_probs, stream, model
-    )
-async def submit_text_completions(  # pylint: disable=R0913,R0917
-    http: aiohttp.ClientSession,
-    text: str,
-    headers: dict,
-    max_tokens: int = -1,
-    log_probs: int = 1,
-    stream: bool = False,
-    model: str = "default-model",
-) -> Explanation:
-    """Submit prompt to OpenAI API completions endpoint.
-    max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
-    log_probs: number of token choices to produce log probs for
-    """
-    LOG.info("Submitting to /v1/completions endpoint")
-    data = {
-        "prompt": text,
-        "max_tokens": max_tokens,
-        "logprobs": log_probs,
-        "stream": stream,
-        "model": model,
-        "temperature": SERVER_CONFIG.inference.temperature,
-    }
-    response = await submit_to_llm_endpoint(
-        http,
-        f"{SERVER_CONFIG.inference.url}/v1/completions",
-        data,
-        headers,
-        stream,
-    )
-    return Explanation(
-        text=response["choices"][0]["text"], logprobs=response["choices"][0]["logprobs"]
-    )
-async def submit_text_chat_completions(  # pylint: disable=R0913,R0917
-    http: aiohttp.ClientSession,
-    text: str,
-    headers: dict,
-    max_tokens: int = -1,
-    log_probs: int = 1,
-    stream: bool = False,
-    model: str = "default-model",
-) -> Union[Explanation, StreamReader]:
-    """Submit prompt to OpenAI API /chat/completions endpoint.
-    max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
-    log_probs: number of token choices to produce log probs for
-    """
     LOG.info("Submitting to /v1/chat/completions endpoint")
     data = {
@@ -211,19 +163,19 @@ async def submit_text_chat_completions(  # pylint: disable=R0913,R0917
                 "content": text,
             }
         ],
-        "max_tokens": max_tokens,
-        "logprobs": log_probs,
+        "max_tokens": inference_cfg.max_tokens,
+        "logprobs": inference_cfg.log_probs,
         "stream": stream,
-        "model": model,
-        "temperature": SERVER_CONFIG.inference.temperature,
+        "model": inference_cfg.model,
+        "temperature": inference_cfg.temperature,
     }
     response = await submit_to_llm_endpoint(
-        http,
-        f"{SERVER_CONFIG.inference.url}/v1/chat/completions",
+        "/v1/chat/completions",
         data,
         headers,
-        stream,
+        inference_cfg=inference_cfg,
+        stream=stream,
     )
     if stream:
@@ -234,19 +186,15 @@ async def submit_text_chat_completions(  # pylint: disable=R0913,R0917
     )
-async def perform_staged_analysis(
-    http: aiohttp.ClientSession, log_text: str
-) -> StagedResponse:
+async def perform_staged_analysis(log_text: str) -> StagedResponse:
     """Submit the log file snippets to the LLM and retrieve their results"""
     log_summary = mine_logs(log_text)
     # Process snippets asynchronously
     awaitables = [
         submit_text(
-            http,
             PROMPT_CONFIG.snippet_prompt_template.format(s),
-            model=SERVER_CONFIG.inference.model,
-            max_tokens=SERVER_CONFIG.inference.max_tokens,
+            inference_cfg=SERVER_CONFIG.snippet_inference,
         )
         for s in log_summary
     ]
@@ -261,10 +209,8 @@ async def perform_staged_analysis(
     )
     final_analysis = await submit_text(
-        http,
         final_prompt,
-        model=SERVER_CONFIG.inference.model,
-        max_tokens=SERVER_CONFIG.inference.max_tokens,
+        inference_cfg=SERVER_CONFIG.inference,
     )
     certainty = 0

logdetective/server/models.py CHANGED Viewed

@@ -1,6 +1,7 @@
+import asyncio
 import datetime
 from logging import BASIC_FORMAT
-from typing import List, Dict, Optional, Literal
+from typing import List, Dict, Optional
 from pydantic import (
     BaseModel,
     Field,
@@ -10,6 +11,8 @@ from pydantic import (
     HttpUrl,
 )
+import aiohttp
 from aiolimiter import AsyncLimiter
 from gitlab import Gitlab
@@ -131,16 +134,14 @@ class InferenceConfig(BaseModel):  # pylint: disable=too-many-instance-attribute
     """Model for inference configuration of logdetective server."""
     max_tokens: int = -1
-    log_probs: int = 1
-    api_endpoint: Optional[Literal["/chat/completions", "/completions"]] = (
-        "/chat/completions"
-    )
+    log_probs: bool = True
     url: str = ""
     api_token: str = ""
     model: str = ""
     temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
     max_queue_size: int = LLM_DEFAULT_MAX_QUEUE_SIZE
-    request_period: float = 60.0 / LLM_DEFAULT_REQUESTS_PER_MINUTE
+    http_timeout: float = 5.0
+    _http_session: aiohttp.ClientSession = None
     _limiter: AsyncLimiter = AsyncLimiter(LLM_DEFAULT_REQUESTS_PER_MINUTE)
     def __init__(self, data: Optional[dict] = None):
@@ -149,9 +150,9 @@ class InferenceConfig(BaseModel):  # pylint: disable=too-many-instance-attribute
             return
         self.max_tokens = data.get("max_tokens", -1)
-        self.log_probs = data.get("log_probs", 1)
-        self.api_endpoint = data.get("api_endpoint", "/chat/completions")
+        self.log_probs = data.get("log_probs", True)
         self.url = data.get("url", "")
+        self.http_timeout = data.get("http_timeout", 5.0)
         self.api_token = data.get("api_token", "")
         self.model = data.get("model", "default-model")
         self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
@@ -162,6 +163,40 @@ class InferenceConfig(BaseModel):  # pylint: disable=too-many-instance-attribute
         )
         self._limiter = AsyncLimiter(self._requests_per_minute)
+    def __del__(self):
+        # Close connection when this object is destroyed
+        if self._http_session:
+            try:
+                loop = asyncio.get_running_loop()
+                loop.create_task(self._http_session.close())
+            except RuntimeError:
+                # No loop running, so create one to close the session
+                loop = asyncio.new_event_loop()
+                loop.run_until_complete(self._http_session.close())
+                loop.close()
+            except Exception:  # pylint: disable=broad-exception-caught
+                # We should only get here if we're shutting down, so we don't
+                # really care if the close() completes cleanly.
+                pass
+    def get_http_session(self):
+        """Return the internal HTTP session so it can be used to contect the
+        LLM server. May be used as a context manager."""
+        # Create the session on the first attempt. We need to do this "lazily"
+        # because it needs to happen once the event loop is running, even
+        # though the initialization itself is synchronous.
+        if not self._http_session:
+            self._http_session = aiohttp.ClientSession(
+                base_url=self.url,
+                timeout=aiohttp.ClientTimeout(
+                    total=self.http_timeout,
+                    connect=3.07,
+                ),
+            )
+        return self._http_session
     def get_limiter(self):
         """Return the limiter object so it can be used as a context manager"""
         return self._limiter
@@ -184,14 +219,25 @@ class ExtractorConfig(BaseModel):
         self.verbose = data.get("verbose", False)
-class GitLabInstanceConfig(BaseModel):
+class GitLabInstanceConfig(BaseModel):  # pylint: disable=too-many-instance-attributes
     """Model for GitLab configuration of logdetective server."""
     name: str = None
     url: str = None
-    api_url: str = None
+    api_path: str = None
     api_token: str = None
+    # This is a list to support key rotation.
+    # When the key is being changed, we will add the new key as a new entry in
+    # the configuration and then remove the old key once all of the client
+    # webhook configurations have been updated.
+    # If this option is left empty or unspecified, all requests will be
+    # considered authorized.
+    webhook_secrets: Optional[List[str]] = None
+    timeout: float = 5.0
     _conn: Gitlab = None
+    _http_session: aiohttp.ClientSession = None
     # Maximum size of artifacts.zip in MiB. (default: 300 MiB)
     max_artifact_size: int = 300
@@ -203,16 +249,57 @@ class GitLabInstanceConfig(BaseModel):
         self.name = name
         self.url = data.get("url", "https://gitlab.com")
-        self.api_url = f"{self.url}/api/v4"
+        self.api_path = data.get("api_path", "/api/v4")
         self.api_token = data.get("api_token", None)
+        self.webhook_secrets = data.get("webhook_secrets", None)
         self.max_artifact_size = int(data.get("max_artifact_size")) * 1024 * 1024
-        self._conn = Gitlab(url=self.url, private_token=self.api_token)
+        self.timeout = data.get("timeout", 5.0)
+        self._conn = Gitlab(
+            url=self.url,
+            private_token=self.api_token,
+            timeout=self.timeout,
+        )
     def get_connection(self):
         """Get the Gitlab connection object"""
         return self._conn
+    def get_http_session(self):
+        """Return the internal HTTP session so it can be used to contect the
+        Gitlab server. May be used as a context manager."""
+        # Create the session on the first attempt. We need to do this "lazily"
+        # because it needs to happen once the event loop is running, even
+        # though the initialization itself is synchronous.
+        if not self._http_session:
+            self._http_session = aiohttp.ClientSession(
+                base_url=self.url,
+                headers={"Authorization": f"Bearer {self.api_token}"},
+                timeout=aiohttp.ClientTimeout(
+                    total=self.timeout,
+                    connect=3.07,
+                ),
+            )
+        return self._http_session
+    def __del__(self):
+        # Close connection when this object is destroyed
+        if self._http_session:
+            try:
+                loop = asyncio.get_running_loop()
+                loop.create_task(self._http_session.close())
+            except RuntimeError:
+                # No loop running, so create one to close the session
+                loop = asyncio.new_event_loop()
+                loop.run_until_complete(self._http_session.close())
+                loop.close()
+            except Exception:  # pylint: disable=broad-exception-caught
+                # We should only get here if we're shutting down, so we don't
+                # really care if the close() completes cleanly.
+                pass
 class GitLabConfig(BaseModel):
     """Model for GitLab configuration of logdetective server."""
@@ -257,6 +344,7 @@ class GeneralConfig(BaseModel):
     excluded_packages: List[str] = None
     devmode: bool = False
     sentry_dsn: HttpUrl | None = None
+    collect_emojis_interval: int = 60 * 60  # seconds
     def __init__(self, data: Optional[dict] = None):
         super().__init__()
@@ -267,6 +355,9 @@ class GeneralConfig(BaseModel):
         self.excluded_packages = data.get("excluded_packages", [])
         self.devmode = data.get("devmode", False)
         self.sentry_dsn = data.get("sentry_dsn")
+        self.collect_emojis_interval = data.get(
+            "collect_emojis_interval", 60 * 60
+        )  # seconds
 class Config(BaseModel):
@@ -274,6 +365,7 @@ class Config(BaseModel):
     log: LogConfig = LogConfig()
     inference: InferenceConfig = InferenceConfig()
+    snippet_inference: InferenceConfig = InferenceConfig()
     extractor: ExtractorConfig = ExtractorConfig()
     gitlab: GitLabConfig = GitLabConfig()
     general: GeneralConfig = GeneralConfig()
@@ -290,6 +382,11 @@ class Config(BaseModel):
         self.gitlab = GitLabConfig(data.get("gitlab"))
         self.general = GeneralConfig(data.get("general"))
+        if snippet_inference := data.get("snippet_inference", None):
+            self.snippet_inference = InferenceConfig(snippet_inference)
+        else:
+            self.snippet_inference = self.inference
 class TimePeriod(BaseModel):
     """Specification for a period of time.

logdetective/server/server.py CHANGED Viewed

@@ -28,7 +28,6 @@ from logdetective.server.llm import (
     mine_logs,
     perform_staged_analysis,
     submit_text,
-    submit_text_chat_completions,
 )
 from logdetective.server.gitlab import process_gitlab_job_event
 from logdetective.server.metric import track_request
@@ -138,10 +137,8 @@ async def analyze_log(
     log_summary = format_snippets(log_summary)
     response = await submit_text(
-        http_session,
         PROMPT_CONFIG.prompt_template.format(log_summary),
-        model=SERVER_CONFIG.inference.model,
-        max_tokens=SERVER_CONFIG.inference.max_tokens,
+        inference_cfg=SERVER_CONFIG.inference,
     )
     certainty = 0
@@ -172,10 +169,7 @@ async def analyze_log_staged(
     remote_log = RemoteLog(build_log.url, http_session)
     log_text = await remote_log.process_url()
-    return await perform_staged_analysis(
-        http_session,
-        log_text=log_text,
-    )
+    return await perform_staged_analysis(log_text)
 @app.get("/queue/print")
@@ -210,19 +204,12 @@ async def analyze_log_stream(
     log_text = await remote_log.process_url()
     log_summary = mine_logs(log_text)
     log_summary = format_snippets(log_summary)
-    headers = {"Content-Type": "application/json"}
-    if SERVER_CONFIG.inference.api_token:
-        headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
     try:
-        stream = submit_text_chat_completions(
-            http_session,
+        stream = submit_text(
             PROMPT_CONFIG.prompt_template.format(log_summary),
+            inference_cfg=SERVER_CONFIG.inference,
             stream=True,
-            headers=headers,
-            model=SERVER_CONFIG.inference.model,
-            max_tokens=SERVER_CONFIG.inference.max_tokens,
         )
     except aiohttp.ClientResponseError as ex:
         raise HTTPException(
@@ -237,12 +224,29 @@ async def analyze_log_stream(
     return StreamingResponse(stream)
+def is_valid_webhook_secret(forge, x_gitlab_token):
+    """Check whether the provided x_gitlab_token matches the webhook secret
+    specified in the configuration"""
+    gitlab_cfg = SERVER_CONFIG.gitlab.instances[forge.value]
+    if not gitlab_cfg.webhook_secrets:
+        # No secrets specified, so don't bother validating.
+        # This is mostly to be used for development.
+        return True
+    if x_gitlab_token in gitlab_cfg.webhook_secrets:
+        return True
+    return False
 @app.post("/webhook/gitlab/job_events")
 async def receive_gitlab_job_event_webhook(
-    x_gitlab_instance: Annotated[str | None, Header()],
     job_hook: JobHook,
     background_tasks: BackgroundTasks,
-    http: aiohttp.ClientSession = Depends(get_http_session),
+    x_gitlab_instance: Annotated[str | None, Header()],
+    x_gitlab_token: Annotated[str | None, Header()] = None,
 ):
     """Webhook endpoint for receiving job_events notifications from GitLab
     https://docs.gitlab.com/user/project/integrations/webhook_events/#job-events
@@ -254,11 +258,15 @@ async def receive_gitlab_job_event_webhook(
         LOG.critical("%s is not a recognized forge. Ignoring.", x_gitlab_instance)
         return BasicResponse(status_code=400)
+    if not is_valid_webhook_secret(forge, x_gitlab_token):
+        # This request could not be validated, so return a 401
+        # (Unauthorized) error.
+        return BasicResponse(status_code=401)
     # Handle the message in the background so we can return 204 immediately
     gitlab_cfg = SERVER_CONFIG.gitlab.instances[forge.value]
     background_tasks.add_task(
         process_gitlab_job_event,
-        http,
         gitlab_cfg,
         forge,
         job_hook,
@@ -280,6 +288,7 @@ emoji_lookup = {}
 @app.post("/webhook/gitlab/emoji_events")
 async def receive_gitlab_emoji_event_webhook(
     x_gitlab_instance: Annotated[str | None, Header()],
+    x_gitlab_token: Annotated[str | None, Header()],
     emoji_hook: EmojiHook,
     background_tasks: BackgroundTasks,
 ):
@@ -293,6 +302,11 @@ async def receive_gitlab_emoji_event_webhook(
         LOG.critical("%s is not a recognized forge. Ignoring.", x_gitlab_instance)
         return BasicResponse(status_code=400)
+    if not is_valid_webhook_secret(forge, x_gitlab_token):
+        # This request could not be validated, so return a 401
+        # (Unauthorized) error.
+        return BasicResponse(status_code=401)
     if not emoji_hook.merge_request:
         # This is not a merge request event. It is probably an emoji applied
         # to some other "awardable" entity. Just ignore it and return.
@@ -504,20 +518,13 @@ async def collect_emoji_task():
 async def schedule_collect_emojis_task():
-    """Schedule the collect_emojis_task to run every day at midnight"""
+    """Schedule the collect_emojis_task to run on a configured interval"""
     while True:
-        now = datetime.datetime.now(datetime.timezone.utc)
-        midnight = datetime.datetime.combine(
-            now.date() + datetime.timedelta(days=1),
-            datetime.time(0, 0),
-            datetime.timezone.utc,
-        )
-        seconds_until_run = (midnight - now).total_seconds()
+        seconds_until_run = SERVER_CONFIG.general.collect_emojis_interval
         LOG.info("Collect emojis in %d seconds", seconds_until_run)
         await asyncio.sleep(seconds_until_run)
         try:
             await collect_emoji_task()
         except Exception as e:  # pylint: disable=broad-exception-caught
-            LOG.error("Error in collect_emoji_task: %s", e)
+            LOG.exception("Error in collect_emoji_task: %s", e)

{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: logdetective
-Version: 1.0.1
+Version: 1.1.0
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin
@@ -218,6 +218,10 @@ or
     tox run -e lint # to run pylint
+To run full test suite you will need postgresql client utilities.
+    dnf install postgresql
 Visual Studio Code testing with podman/docker-compose
 -----------------------------------------------------

{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/RECORD RENAMED Viewed

@@ -4,6 +4,8 @@ logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
 logdetective/extractors.py,sha256=7ahzWbTtU9MveG1Q7wU9LO8OJgs85X-cHmWltUhCe9M,3491
 logdetective/logdetective.py,sha256=cC2oL4yPNo94AB2nS4v1jpZi-Qo1g0_FEchL_yQL1UU,5832
 logdetective/models.py,sha256=nrGBmMRu8i6UhFflQKAp81Y3Sd_Aaoor0i_yqSJoLT0,1115
+logdetective/prompts-summary-first.yml,sha256=3Zfp4NNOfaFYq5xBlBjeQa5PdjYfS4v17OtJqQ-DRpU,821
+logdetective/prompts-summary-only.yml,sha256=8U9AMJV8ePW-0CoXOXlQoO92DAJDeutIT8ntSkkm6W0,470
 logdetective/prompts.yml,sha256=urPKG068TYxi58EicFVUH6FavZq_q36oM1LvfI4ddjg,1729
 logdetective/remote_log.py,sha256=Zbv8g29jko8uQnzFUznnr8Nd9RSJCRs1PmPV7viqX9M,2267
 logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -14,18 +16,18 @@ logdetective/server/database/base.py,sha256=1mcjEbhwLl4RalvT3oy6XVctjJoWIW3H9aI_
 logdetective/server/database/models/__init__.py,sha256=xy2hkygyw6_87zPKkG20i7g7_LXTGR__PUeojhbvv94,496
 logdetective/server/database/models/merge_request_jobs.py,sha256=hw88wV1-3x7i53sX7ZotKClc6OsH1njPpbRSZofnqr4,18670
 logdetective/server/database/models/metrics.py,sha256=yl9fS4IPVFWDeFvPAxO6zOVu6oLF319ApvVLAgnD5yU,13928
-logdetective/server/emoji.py,sha256=g9GtMChwznD8g1xonsh-I_3xqRn6LBeg3sjPJWcI0Yg,3333
-logdetective/server/gitlab.py,sha256=1Qz62I8xIjwdk7vPhGTTPFkeWVrany8-GV5hfK6weNI,16233
-logdetective/server/llm.py,sha256=JtSCZj8SLnoyTCUdhA0TwcsMZfmHFFru2bJ9txI3GuU,8727
+logdetective/server/emoji.py,sha256=Jh8RM36K8XDLefS8STm-nvhSLuoaFwlKeZJ5HLmRmdY,4298
+logdetective/server/gitlab.py,sha256=HxepI9I7j6VaIn2gu0FLz34e_1qodHQNR8xdrRpEMLI,16277
+logdetective/server/llm.py,sha256=8OnyCErDxKZA7FJSUzNjgPHu0eAjagCz7Yx7LzW98EE,7207
 logdetective/server/metric.py,sha256=B3ew_qSmtEMj6xl-FoOtS4F_bkplp-shhtfHF1cG_Io,4010
-logdetective/server/models.py,sha256=mUBGzc0w6l-v1Q9lwDEcISn6SlFrrwbF3ypSmjNXbbs,11355
+logdetective/server/models.py,sha256=V8haEsnIYap1lRj2NlOCBtM7bJxWDdZehw4Whf_hnIE,15336
 logdetective/server/plot.py,sha256=eZs4r9gua-nW3yymSMIz1leL9mb4QKlh6FJZSeOfZ5M,14872
-logdetective/server/server.py,sha256=9shFgRkWcJVM2L7HHoQBMCfKuJamh2L4tC96duFPEOA,18127
+logdetective/server/server.py,sha256=-JJnHj8fPzx8aCJD3q2wRwidxoHPCmwOP8FTWwc1C14,18386
 logdetective/server/templates/gitlab_full_comment.md.j2,sha256=DQZ2WVFedpuXI6znbHIW4wpF9BmFS8FaUkowh8AnGhE,1627
 logdetective/server/templates/gitlab_short_comment.md.j2,sha256=fzScpayv2vpRLczP_0O0YxtA8rsKvR6gSv4ntNdWb98,1443
 logdetective/utils.py,sha256=hdExAC8FtDIxvdgIq-Ro6LVM-JZ-k_UofaMzaDAHvzM,6088
-logdetective-1.0.1.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-logdetective-1.0.1.dist-info/METADATA,sha256=dkso00EVQfwxoBekYT6KW48WvFdFqlBjTPw9U5S0wCg,17136
-logdetective-1.0.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-logdetective-1.0.1.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
-logdetective-1.0.1.dist-info/RECORD,,
+logdetective-1.1.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+logdetective-1.1.0.dist-info/METADATA,sha256=5oZU-qB24EoQ05ezESAuiDo0MH3o-y3_ZdLYbFiRldo,17231
+logdetective-1.1.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+logdetective-1.1.0.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
+logdetective-1.1.0.dist-info/RECORD,,

{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{logdetective-1.0.1.dist-info → logdetective-1.1.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

logdetective 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

logdetective 1.0.1py3-none-any.whl → 1.1.0py3-none-any.whl