PyPI - logdetective - Versions diffs - 1.1.0__tar.gz → 1.3.0__tar.gz - Mend

logdetective 1.1.0tar.gz → 1.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{logdetective-1.1.0 → logdetective-1.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: logdetective
-Version: 1.1.0
+Version: 1.3.0
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin
@@ -30,12 +30,13 @@ Requires-Dist: huggingface-hub (>0.23.2)
 Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
 Requires-Dist: matplotlib (>=3.8.4,<4.0.0) ; extra == "server" or extra == "server-testing"
 Requires-Dist: numpy (>=1.26.0)
+Requires-Dist: openai (>=1.82.1,<2.0.0) ; extra == "server" or extra == "server-testing"
 Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "server"
 Requires-Dist: psycopg2-binary (>=2.9.9,<3.0.0) ; extra == "server-testing"
 Requires-Dist: pydantic (>=2.8.2,<3.0.0)
 Requires-Dist: python-gitlab (>=4.4.0)
 Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
-Requires-Dist: sentry-sdk[fastapi] (>=2.17.0,<3.0.0)
+Requires-Dist: sentry-sdk[fastapi] (>=2.17.0,<3.0.0) ; extra == "server" or extra == "server-testing"
 Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server" or extra == "server-testing"
 Project-URL: homepage, https://github.com/fedora-copr/logdetective
 Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
@@ -204,6 +205,9 @@ Make changes to the code as needed and run pre-commit.
 Tests
 -----
+Tests for code used by server must placed in the `./tests/server/` path, while tests for general
+code must be in the `./tests/base/` path.
 The [tox](https://github.com/tox-dev/tox) is used to manage tests. Please install `tox` package into your distribution and run:
     tox
@@ -218,7 +222,17 @@ or
     tox run -e lint # to run pylint
-To run full test suite you will need postgresql client utilities.
+Tox environments for base and server tests are separate, each installs different dependencies.
+Running base tests:
+    tox run -e pytest_base
+Running server tests:
+    tox run -e pytest_server
+To run server test suite you will need postgresql client utilities.
     dnf install postgresql

{logdetective-1.1.0 → logdetective-1.3.0}/README.md RENAMED Viewed

@@ -161,6 +161,9 @@ Make changes to the code as needed and run pre-commit.
 Tests
 -----
+Tests for code used by server must placed in the `./tests/server/` path, while tests for general
+code must be in the `./tests/base/` path.
 The [tox](https://github.com/tox-dev/tox) is used to manage tests. Please install `tox` package into your distribution and run:
     tox
@@ -175,7 +178,17 @@ or
     tox run -e lint # to run pylint
-To run full test suite you will need postgresql client utilities.
+Tox environments for base and server tests are separate, each installs different dependencies.
+Running base tests:
+    tox run -e pytest_base
+Running server tests:
+    tox run -e pytest_server
+To run server test suite you will need postgresql client utilities.
     dnf install postgresql

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/remote_log.py RENAMED Viewed

@@ -2,7 +2,7 @@ import logging
 from urllib.parse import urlparse
 import aiohttp
-from fastapi import HTTPException
+from aiohttp.web import HTTPBadRequest
 LOG = logging.getLogger("logdetective")
@@ -64,6 +64,6 @@ class RemoteLog:
         try:
             return await self.get_url_content()
         except RuntimeError as ex:
-            raise HTTPException(
-                status_code=400, detail=f"We couldn't obtain the logs: {ex}"
+            raise HTTPBadRequest(
+                reason=f"We couldn't obtain the logs: {ex}"
             ) from ex

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/config.py RENAMED Viewed

@@ -1,8 +1,10 @@
 import os
 import logging
 import yaml
+from openai import AsyncOpenAI
 from logdetective.utils import load_prompts
-from logdetective.server.models import Config
+from logdetective.server.models import Config, InferenceConfig
 def load_server_config(path: str | None) -> Config:
@@ -49,6 +51,14 @@ def get_log(config: Config):
     return log
+def get_openai_api_client(ineference_config: InferenceConfig):
+    """Set up AsyncOpenAI client with default configuration.
+    """
+    return AsyncOpenAI(
+        api_key=ineference_config.api_token,
+        base_url=ineference_config.url)
 SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
 SERVER_PROMPT_PATH = os.environ.get("LOGDETECTIVE_PROMPTS", None)
@@ -56,3 +66,5 @@ SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
 PROMPT_CONFIG = load_prompts(SERVER_PROMPT_PATH)
 LOG = get_log(SERVER_CONFIG)
+CLIENT = get_openai_api_client(SERVER_CONFIG.inference)

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/emoji.py RENAMED Viewed

@@ -44,7 +44,7 @@ async def _handle_gitlab_operation(func: Callable, *args):
     """
     try:
         return await asyncio.to_thread(func, *args)
-    except gitlab.GitlabError as e:
+    except (gitlab.GitlabError, gitlab.GitlabGetError) as e:
         log_msg = f"Error during GitLab operation {func}{args}: {e}"
         if "Not Found" in str(e):
             LOG.error(log_msg)
@@ -64,6 +64,8 @@ async def collect_emojis_in_comments(  # pylint: disable=too-many-locals
     mrs = {}
     for comment in comments:
         mr_job_db = GitlabMergeRequestJobs.get_by_id(comment.merge_request_job_id)
+        if not mr_job_db:
+            continue
         if mr_job_db.id not in projects:
             projects[mr_job_db.id] = project = await _handle_gitlab_operation(
                 gitlab_conn.projects.get, mr_job_db.project_id

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/gitlab.py RENAMED Viewed

@@ -193,9 +193,12 @@ async def retrieve_and_preprocess_koji_logs(
             # may be presented only at the top level.
             # The paths look like `kojilogs/noarch-XXXXXX/task_failed.log`
             # or `kojilogs/noarch-XXXXXX/x86_64-XXXXXX/task_failed.log`
+            # We prefix "toplevel" with '~' so that later when we sort the
+            # keys to see if there are any unrecognized arches, it will always
+            # sort last.
             path = PurePath(zipinfo.filename)
             if len(path.parts) <= 3:
-                failed_arches["toplevel"] = path
+                failed_arches["~toplevel"] = path
                 continue
             # Extract the architecture from the immediate parent path
@@ -246,12 +249,11 @@ async def retrieve_and_preprocess_koji_logs(
     elif "noarch" in failed_arches:
         # May have failed during BuildSRPMFromSCM phase
         failed_arch = "noarch"
-    elif "toplevel" in failed_arches:
-        # Probably a Koji-specific error, not a build error
-        failed_arch = "toplevel"
     else:
         # We have one or more architectures that we don't know about? Just
-        # pick the first alphabetically.
+        # pick the first alphabetically. If the issue was a Koji error
+        # rather than a build failure, this will fall back to ~toplevel as
+        # the lowest-sorting possibility.
         failed_arch = sorted(list(failed_arches.keys()))[0]
     LOG.debug("Failed architecture: %s", failed_arch)

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/llm.py RENAMED Viewed

@@ -1,21 +1,21 @@
 import os
 import asyncio
-import json
 import random
-from typing import List, Tuple, Dict, Any, Union
+from typing import List, Tuple, Union
 import backoff
-from aiohttp import StreamReader
 from fastapi import HTTPException
 import aiohttp
+from openai import AsyncStream
+from openai.types.chat import ChatCompletionChunk
 from logdetective.constants import SNIPPET_DELIMITER
 from logdetective.extractors import DrainExtractor
 from logdetective.utils import (
     compute_certainty,
 )
-from logdetective.server.config import LOG, SERVER_CONFIG, PROMPT_CONFIG
+from logdetective.server.config import LOG, SERVER_CONFIG, PROMPT_CONFIG, CLIENT
 from logdetective.server.models import (
     AnalyzedSnippet,
     InferenceConfig,
@@ -54,59 +54,6 @@ def mine_logs(log: str) -> List[Tuple[int, str]]:
     return log_summary
-async def submit_to_llm_endpoint(
-    url_path: str,
-    data: Dict[str, Any],
-    headers: Dict[str, str],
-    stream: bool,
-    inference_cfg: InferenceConfig = SERVER_CONFIG.inference,
-) -> Any:
-    """Send request to an API endpoint. Verifying successful request unless
-    the using the stream response.
-    url_path: The endpoint path to query. (e.g. "/v1/chat/completions"). It should
-    not include the scheme and netloc of the URL, which is stored in the
-    InferenceConfig.
-    data:
-    headers:
-    stream:
-    inference_cfg: An InferenceConfig object containing the URL, max_tokens
-    and other relevant configuration for talking to an inference server.
-    """
-    async with inference_cfg.get_limiter():
-        LOG.debug("async request %s headers=%s data=%s", url_path, headers, data)
-        session = inference_cfg.get_http_session()
-        if inference_cfg.api_token:
-            headers["Authorization"] = f"Bearer {inference_cfg.api_token}"
-        response = await session.post(
-            url_path,
-            headers=headers,
-            # we need to use the `json=` parameter here and let aiohttp
-            # handle the json-encoding
-            json=data,
-            timeout=int(LLM_CPP_SERVER_TIMEOUT),
-            # Docs says chunked takes int, but:
-            #   DeprecationWarning: Chunk size is deprecated #1615
-            # So let's make sure we either put True or None here
-            chunked=True if stream else None,
-            raise_for_status=True,
-        )
-        if stream:
-            return response
-        try:
-            return json.loads(await response.text())
-        except UnicodeDecodeError as ex:
-            LOG.error(
-                "Error encountered while parsing llama server response: %s", ex
-            )
-            raise HTTPException(
-                status_code=400,
-                detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
-            ) from ex
 def should_we_giveup(exc: aiohttp.ClientResponseError) -> bool:
     """
     From backoff's docs:
@@ -141,7 +88,7 @@ async def submit_text(
     text: str,
     inference_cfg: InferenceConfig,
     stream: bool = False,
-) -> Union[Explanation, StreamReader]:
+) -> Union[Explanation, AsyncStream[ChatCompletionChunk]]:
     """Submit prompt to LLM.
     inference_cfg: The configuration section from the config.json representing
     the relevant inference server for this request.
@@ -149,40 +96,36 @@ async def submit_text(
     """
     LOG.info("Analyzing the text")
-    headers = {"Content-Type": "application/json"}
-    if SERVER_CONFIG.inference.api_token:
-        headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
     LOG.info("Submitting to /v1/chat/completions endpoint")
-    data = {
-        "messages": [
-            {
-                "role": "user",
-                "content": text,
-            }
-        ],
-        "max_tokens": inference_cfg.max_tokens,
-        "logprobs": inference_cfg.log_probs,
-        "stream": stream,
-        "model": inference_cfg.model,
-        "temperature": inference_cfg.temperature,
-    }
-    response = await submit_to_llm_endpoint(
-        "/v1/chat/completions",
-        data,
-        headers,
-        inference_cfg=inference_cfg,
-        stream=stream,
-    )
+    async with inference_cfg.get_limiter():
+        response = await CLIENT.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": text,
+                }
+            ],
+            max_tokens=inference_cfg.max_tokens,
+            logprobs=inference_cfg.log_probs,
+            stream=stream,
+            model=inference_cfg.model,
+            temperature=inference_cfg.temperature,
+        )
-    if stream:
+    if isinstance(response, AsyncStream):
         return response
+    if not response.choices[0].message.content:
+        LOG.error("No response content recieved from %s", inference_cfg.url)
+        raise RuntimeError()
+    if response.choices[0].logprobs and response.choices[0].logprobs.content:
+        logprobs = [e.to_dict() for e in response.choices[0].logprobs.content]
+    else:
+        logprobs = None
     return Explanation(
-        text=response["choices"][0]["message"]["content"],
-        logprobs=response["choices"][0]["logprobs"]["content"],
+        text=response.choices[0].message.content,
+        logprobs=logprobs,
     )

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/models.py RENAMED Viewed

@@ -136,7 +136,9 @@ class InferenceConfig(BaseModel):  # pylint: disable=too-many-instance-attribute
     max_tokens: int = -1
     log_probs: bool = True
     url: str = ""
-    api_token: str = ""
+    # OpenAI client library requires a string to be specified for API token
+    # even if it is not checked on the server side
+    api_token: str = "None"
     model: str = ""
     temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
     max_queue_size: int = LLM_DEFAULT_MAX_QUEUE_SIZE
@@ -153,7 +155,7 @@ class InferenceConfig(BaseModel):  # pylint: disable=too-many-instance-attribute
         self.log_probs = data.get("log_probs", True)
         self.url = data.get("url", "")
         self.http_timeout = data.get("http_timeout", 5.0)
-        self.api_token = data.get("api_token", "")
+        self.api_token = data.get("api_token", "None")
         self.model = data.get("model", "default-model")
         self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
         self.max_queue_size = data.get("max_queue_size", LLM_DEFAULT_MAX_QUEUE_SIZE)

{logdetective-1.1.0 → logdetective-1.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "logdetective"
-version = "1.1.0"
+version = "1.3.0"
 description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
 authors = ["Jiri Podivin <jpodivin@gmail.com>"]
 license = "Apache-2.0"
@@ -51,11 +51,12 @@ psycopg2 = {version = "^2.9.9", optional = true }
 alembic = {version = "^1.13.3", optional = true }
 matplotlib = {version = "^3.8.4", optional = true }
 backoff = {version = "2.2.1", optional = true }
-sentry-sdk = {version = "^2.17.0", extras = ["fastapi"]}
+sentry-sdk = {version = "^2.17.0", optional = true, extras = ["fastapi"]}
+openai = {version = "^1.82.1", optional = true}
 [tool.poetry.extras]
-server = ["fastapi", "sqlalchemy", "psycopg2", "alembic", "matplotlib", "backoff", "aiolimiter"]
-server-testing = ["fastapi", "sqlalchemy", "psycopg2-binary", "alembic", "matplotlib", "backoff", "pytest-asyncio"]
+server = ["fastapi", "sqlalchemy", "psycopg2", "alembic", "matplotlib", "backoff", "aiolimiter", "sentry-sdk", "openai"]
+server-testing = ["fastapi", "sqlalchemy", "psycopg2-binary", "alembic", "matplotlib", "backoff", "pytest-asyncio", "sentry-sdk", "openai"]
 [build-system]
 requires = ["poetry-core"]

{logdetective-1.1.0 → logdetective-1.3.0}/LICENSE RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/__init__.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/constants.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/drain3.ini RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/extractors.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/logdetective.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/models.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/prompts-summary-first.yml RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/prompts-summary-only.yml RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/prompts.yml RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/__init__.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/compressors.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/__init__.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/base.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/models/__init__.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/models/merge_request_jobs.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/models/metrics.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/metric.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/plot.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/server.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/templates/gitlab_full_comment.md.j2 RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/templates/gitlab_short_comment.md.j2 RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective/utils.py RENAMED Viewed

File without changes

{logdetective-1.1.0 → logdetective-1.3.0}/logdetective.1.asciidoc RENAMED Viewed

File without changes

logdetective 1.1.0__tar.gz → 1.3.0__tar.gz

logdetective 1.1.0tar.gz → 1.3.0tar.gz