PyPI - logdetective - Versions diffs - 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl - Mend

logdetective 0.4.0py3-none-any.whl → 2.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

logdetective/constants.py +33 -12
logdetective/extractors.py +137 -68
logdetective/logdetective.py +102 -33
logdetective/models.py +99 -0
logdetective/prompts-summary-first.yml +20 -0
logdetective/prompts-summary-only.yml +13 -0
logdetective/prompts.yml +90 -0
logdetective/remote_log.py +67 -0
logdetective/server/compressors.py +186 -0
logdetective/server/config.py +78 -0
logdetective/server/database/base.py +34 -26
logdetective/server/database/models/__init__.py +33 -0
logdetective/server/database/models/exceptions.py +17 -0
logdetective/server/database/models/koji.py +143 -0
logdetective/server/database/models/merge_request_jobs.py +623 -0
logdetective/server/database/models/metrics.py +427 -0
logdetective/server/emoji.py +148 -0
logdetective/server/exceptions.py +37 -0
logdetective/server/gitlab.py +451 -0
logdetective/server/koji.py +159 -0
logdetective/server/llm.py +309 -0
logdetective/server/metric.py +75 -30
logdetective/server/models.py +426 -23
logdetective/server/plot.py +432 -0
logdetective/server/server.py +580 -468
logdetective/server/templates/base_response.html.j2 +59 -0
logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
logdetective/server/utils.py +98 -32
logdetective/skip_snippets.yml +12 -0
logdetective/utils.py +187 -73
logdetective-2.11.0.dist-info/METADATA +568 -0
logdetective-2.11.0.dist-info/RECORD +40 -0
{logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
logdetective/server/database/models.py +0 -88
logdetective-0.4.0.dist-info/METADATA +0 -333
logdetective-0.4.0.dist-info/RECORD +0 -19
{logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
{logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0

logdetective/prompts.yml ADDED Viewed

@@ -0,0 +1,90 @@
+# This file is intended for customization of prompts
+# It is used only in server mode.
+# On command line you have to load it using --prompts
+# The defaults are stored in constants.py
+prompt_template: |
+  Given following log snippets, and nothing else, explain what failure, if any, occurred during build of this package.
+  Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
+  Snippets themselves must not be altered in any way whatsoever.
+  Snippets are delimited with '================'.
+  Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
+  Explanation of the issue, and recommended solution, should take handful of sentences.
+  Snippets:
+  {}
+snippet_prompt_template: |
+  Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution
+  and provide estimate of snippet relevance.
+  Your analysis must be as concise as possible, while keeping relevant information intact.
+  Snippet:
+  {}
+prompt_template_staged: |
+  Given following log snippets, their explanation, and nothing else, explain what failure, if any, occurred during build of this package.
+  Snippets are in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
+  Snippets are delimited with '================'.
+  Drawing on information from all snippets, provide a concise explanation of the issue and recommend a solution.
+  Explanation of the issue, and recommended solution, should take a handful of sentences.
+  Snippets:
+  {}
+# System prompts
+# System prompts are meant to serve as general guide for model behavior,
+# describing role and purpose it is meant to serve.
+# Sample system prompts in this file are intentionally the same,
+# however, in some circumstances it may be beneficial have different
+# system prompts for each sub case. For example when a specialized model is deployed
+# to analyze snippets.
+# Default prompt is used by the CLI tool and also for final analysis
+# with /analyze and /analyze/stream API endpoints
+default_system_prompt: |
+  You are a highly capable large language model based expert system specialized in
+  packaging and delivery of software using RPM (RPM Package Manager). Your purpose is to diagnose
+  RPM build failures, identifying root causes and proposing solutions if possible.
+  You are truthful, concise, and helpful.
+  You never speculate about package being built or fabricate information.
+  If you do not know the answer, you acknowledge the fact and end your response.
+  Your responses must be as short as possible.
+# Snippet system prompt is used for analysis of individual snippets
+snippet_system_prompt: |
+  You are a highly capable large language model based expert system specialized in
+  packaging and delivery of software using RPM (RPM Package Manager). Your purpose is to diagnose
+  RPM build failures, identifying root causes and proposing solutions if possible.
+  You are truthful, concise, and helpful.
+  You never speculate about package being built or fabricate information.
+  If you do not know the answer, you acknowledge the fact and end your response.
+  Your responses must be as short as possible.
+# Staged system prompt is used by /analyze/staged API endpoint
+staged_system_prompt: |
+  You are a highly capable large language model based expert system specialized in
+  packaging and delivery of software using RPM (RPM Package Manager). Your purpose is to diagnose
+  RPM build failures, identifying root causes and proposing solutions if possible.
+  You are truthful, concise, and helpful.
+  You never speculate about package being built or fabricate information.
+  If you do not know the answer, you acknowledge the fact and end your response.
+  Your responses must be as short as possible.

logdetective/remote_log.py ADDED Viewed

@@ -0,0 +1,67 @@
+import logging
+from urllib.parse import urlparse
+import aiohttp
+from aiohttp.web import HTTPBadRequest
+LOG = logging.getLogger("logdetective")
+class RemoteLog:
+    """
+    Handles retrieval of remote log files.
+    """
+    def __init__(self, url: str, http_session: aiohttp.ClientSession):
+        """
+        Initialize with a remote log URL and HTTP session.
+        Args:
+            url: A remote URL pointing to a log file
+            http_session: The HTTP session used to retrieve the remote file
+        """
+        self._url = url
+        self._http_session = http_session
+    @property
+    def url(self) -> str:
+        """The remote log url."""
+        return self._url
+    @property
+    async def content(self) -> str:
+        """Content of the url."""
+        return await self.get_url_content()
+    def validate_url(self) -> bool:
+        """Validate incoming URL to be at least somewhat sensible for log files
+        Only http and https protocols permitted. No result, params or query fields allowed.
+        Either netloc or path must have non-zero length.
+        """
+        result = urlparse(self.url)
+        if result.scheme not in ["http", "https"]:
+            return False
+        if any([result.params, result.query, result.fragment]):
+            return False
+        if not (result.path or result.netloc):
+            return False
+        return True
+    async def get_url_content(self) -> str:
+        """validate log url and return log text."""
+        if self.validate_url():
+            LOG.debug("process url %s", self.url)
+            try:
+                response = await self._http_session.get(self.url, raise_for_status=True)
+            except (aiohttp.ClientResponseError, aiohttp.ClientConnectorError) as ex:
+                raise RuntimeError(f"We couldn't obtain the logs: {ex}") from ex
+            return await response.text()
+        LOG.error("Invalid URL received ")
+        raise RuntimeError(f"Invalid log URL: {self.url}")
+    async def process_url(self) -> str:
+        """Validate log URL and return log text."""
+        try:
+            return await self.get_url_content()
+        except RuntimeError as ex:
+            raise HTTPBadRequest(reason=f"We couldn't obtain the logs: {ex}") from ex

logdetective/server/compressors.py ADDED Viewed

@@ -0,0 +1,186 @@
+import io
+import zipfile
+from typing import Union, Dict
+from logdetective.remote_log import RemoteLog
+from logdetective.server.models import (
+    StagedResponse,
+    Response,
+    AnalyzedSnippet,
+    Explanation,
+)
+class TextCompressor:
+    """
+    Encapsulates one or more texts in one or more files with the specified names
+    and provides methods to retrieve them later.
+    """
+    def zip(self, items: Dict[str, str]) -> bytes:
+        """
+        Compress multiple texts into different files within a zip archive.
+        Args:
+            items: Dictionary where keys are file names and values are text content
+                  to be compressed
+        Returns:
+            bytes: The compressed zip archive as bytes
+        """
+        zip_buffer = io.BytesIO()
+        with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
+            for key, value in items.items():
+                zip_file.writestr(key, value)
+        zip_buffer.seek(0)
+        return zip_buffer.getvalue()
+    def unzip(self, zip_data: bytes) -> Dict[str, str]:
+        """
+        Uncompress data created by TextCompressor.zip().
+        Args:
+            zip_data: A zipped stream of bytes
+        Returns:
+            {file_name: str}: The decompressed content as a dict of file names and UTF-8 strings
+        """
+        zip_buffer = io.BytesIO(zip_data)
+        content = {}
+        with zipfile.ZipFile(zip_buffer, "r") as zip_file:
+            file_list = zip_file.namelist()
+            for file_name in file_list:
+                content[file_name] = zip_file.read(file_name).decode("utf-8")
+        return content
+class RemoteLogCompressor:
+    """
+    Handles compression of remote log files.
+    """
+    LOG_FILE_NAME = "log.txt"
+    COMPRESSOR = TextCompressor()
+    def __init__(self, remote_log: RemoteLog):
+        """
+        Initialize with a RemoteLog object.
+        """
+        self._remote_log = remote_log
+    @classmethod
+    def zip_text(cls, text: str) -> bytes:
+        """
+        Compress the given text.
+        Returns:
+            bytes: Compressed text
+        """
+        return cls.COMPRESSOR.zip({cls.LOG_FILE_NAME: text})
+    async def zip_content(self) -> bytes:
+        """
+        Compress the content of the remote log.
+        Returns:
+            bytes: Compressed log content
+        """
+        content_text = await self._remote_log.content
+        return self.zip_text(content_text)
+    @classmethod
+    def unzip(cls, zip_data: bytes) -> str:
+        """
+        Uncompress the zipped content of the remote log.
+        Args:
+            zip_data: Compressed data as bytes
+        Returns:
+            str: The decompressed log content
+        """
+        return cls.COMPRESSOR.unzip(zip_data)[cls.LOG_FILE_NAME]
+class LLMResponseCompressor:
+    """
+    Handles compression and decompression of LLM responses.
+    """
+    EXPLANATION_FILE_NAME = "explanation.txt"
+    SNIPPET_FILE_NAME = "snippet_{number}.txt"
+    COMPRESSOR = TextCompressor()
+    def __init__(self, response: Union[StagedResponse, Response]):
+        """
+        Initialize with an LLM response.
+        Args:
+            response: Either a StagedResponse or Response object
+        """
+        self._response = response
+    def zip_response(self) -> bytes:
+        """
+        Compress the content of the LLM response.
+        Returns:
+            bytes: Compressed response as bytes
+        """
+        items = {
+            self.EXPLANATION_FILE_NAME: self._response.explanation.model_dump_json()
+        }
+        if isinstance(self._response, StagedResponse):
+            for i, snippet in enumerate(self._response.snippets):
+                items[self.SNIPPET_FILE_NAME.format(number=i)] = (
+                    snippet.model_dump_json()
+                )
+        return self.COMPRESSOR.zip(items)
+    @classmethod
+    def unzip(
+        cls, zip_data: bytes
+    ) -> Union[StagedResponse, Response]:
+        """
+        Uncompress the zipped content of the LLM response.
+        Args:
+            zip_data: Compressed data as bytes
+        Returns:
+            Union[StagedResponse, Response]: The decompressed (partial) response object,
+            missing response_certainty.
+        """
+        items = cls.COMPRESSOR.unzip(zip_data)
+        if cls.EXPLANATION_FILE_NAME not in items:
+            raise KeyError(
+                f"Required file {cls.EXPLANATION_FILE_NAME} not found in zip archive"
+            )
+        explanation = Explanation.model_validate_json(items[cls.EXPLANATION_FILE_NAME])
+        snippets = []
+        snippet_files = {
+            k: v
+            for k, v in items.items()
+            if cls.SNIPPET_FILE_NAME.replace("{number}.txt", "") in k
+        }
+        for i in range(len(snippet_files)):
+            snippets.append(
+                AnalyzedSnippet.model_validate_json(
+                    items[cls.SNIPPET_FILE_NAME.format(number=i)]
+                )
+            )
+        if snippets:
+            response = StagedResponse(
+                explanation=explanation, snippets=snippets, response_certainty=0
+            )
+        else:
+            response = Response(explanation=explanation, response_certainty=0)
+        return response

logdetective/server/config.py ADDED Viewed

@@ -0,0 +1,78 @@
+import os
+import logging
+import yaml
+from openai import AsyncOpenAI
+from logdetective.utils import load_prompts, load_skip_snippet_patterns
+from logdetective.server.models import Config, InferenceConfig
+import logdetective
+def load_server_config(path: str | None) -> Config:
+    """Load configuration file for logdetective server.
+    If no path was provided, or if the file doesn't exist, return defaults.
+    """
+    if path is not None:
+        try:
+            with open(path, "r") as config_file:
+                return Config(yaml.safe_load(config_file))
+        except FileNotFoundError:
+            # This is not an error, we will fall back to default
+            print("Unable to find server config file, using default then.")
+    return Config()
+def get_log(config: Config):
+    """
+    Initialize a logger for this server
+    """
+    log = logging.getLogger(config.log.name)
+    if getattr(log, "initialized", False):
+        return log
+    log.setLevel("DEBUG")
+    # Drop the default handler, we will create it ourselves
+    log.handlers = []
+    # STDOUT
+    stream_handler = logging.StreamHandler()
+    stream_handler.setFormatter(logging.Formatter(config.log.format))
+    stream_handler.setLevel(config.log.level_stream)
+    log.addHandler(stream_handler)
+    # Log to file
+    if config.log.path:
+        file_handler = logging.FileHandler(config.log.path)
+        file_handler.setFormatter(logging.Formatter(config.log.format))
+        file_handler.setLevel(config.log.level_file)
+        log.addHandler(file_handler)
+    log.initialized = True
+    return log
+def get_openai_api_client(inference_config: InferenceConfig):
+    """Set up AsyncOpenAI client with default configuration."""
+    return AsyncOpenAI(
+        api_key=inference_config.api_token, base_url=inference_config.url,
+        timeout=inference_config.llm_api_timeout
+    )
+SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
+SERVER_PROMPT_PATH = os.environ.get("LOGDETECTIVE_PROMPTS", None)
+# The default location for skip patterns is in the same directory
+# as logdetective __init__.py file.
+SERVER_SKIP_PATTERNS_PATH = os.environ.get(
+    "LOGDETECIVE_SKIP_PATTERNS",
+    f"{os.path.dirname(logdetective.__file__)}/skip_snippets.yml",
+)
+SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
+PROMPT_CONFIG = load_prompts(SERVER_PROMPT_PATH)
+SKIP_SNIPPETS_CONFIG = load_skip_snippet_patterns(SERVER_SKIP_PATTERNS_PATH)
+LOG = get_log(SERVER_CONFIG)
+CLIENT = get_openai_api_client(SERVER_CONFIG.inference)

logdetective/server/database/base.py CHANGED Viewed

@@ -1,15 +1,14 @@
 from os import getenv
-from contextlib import contextmanager
-from sqlalchemy import create_engine
-from sqlalchemy.orm import sessionmaker, declarative_base
+from contextlib import asynccontextmanager
+from sqlalchemy.orm import DeclarativeBase
+from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker
 from logdetective import logger
 def get_pg_url() -> str:
     """create postgresql connection string"""
     return (
-        f"postgresql+psycopg2://{getenv('POSTGRESQL_USER')}"
+        f"postgresql+asyncpg://{getenv('POSTGRESQL_USER')}"
         f":{getenv('POSTGRESQL_PASSWORD')}@{getenv('POSTGRESQL_HOST', 'postgres')}"
         f":{getenv('POSTGRESQL_PORT', '5432')}/{getenv('POSTGRESQL_DATABASE')}"
     )
@@ -23,13 +22,16 @@ sqlalchemy_echo = getenv("SQLALCHEMY_ECHO", "False").lower() in (
     "y",
     "1",
 )
-engine = create_engine(get_pg_url(), echo=sqlalchemy_echo)
-SessionFactory = sessionmaker(autoflush=True, bind=engine)
-Base = declarative_base()
+engine = create_async_engine(get_pg_url(), echo=sqlalchemy_echo)
+SessionFactory = async_sessionmaker(autoflush=True, bind=engine)  # pylint: disable=invalid-name
+class Base(DeclarativeBase):
+    """Declarative base class for all ORM models."""
-@contextmanager
-def transaction(commit: bool = False):
+@asynccontextmanager
+async def transaction(commit: bool = False):
     """
     Context manager for 'framing' a db transaction.
@@ -39,25 +41,31 @@ def transaction(commit: bool = False):
     """
     session = SessionFactory()
-    try:
-        yield session
-        if commit:
-            session.commit()
-    except Exception as ex:
-        logger.warning("Exception while working with database: %s", str(ex))
-        session.rollback()
-        raise
-    finally:
-        session.close()
-def init():
+    async with session:
+        try:
+            yield session
+            if commit:
+                await session.commit()
+        except Exception as ex:
+            logger.warning("Exception while working with database: %s", str(ex))
+            await session.rollback()
+            raise
+        finally:
+            await session.close()
+async def init():
     """Init db"""
-    Base.metadata.create_all(engine)
+    async with engine.begin() as conn:
+        await conn.run_sync(Base.metadata.create_all)
     logger.debug("Database initialized")
-def destroy():
+async def destroy():
     """Destroy db"""
-    Base.metadata.drop_all(engine)
+    async with engine.begin() as conn:
+        await conn.run_sync(Base.metadata.drop_all)
     logger.warning("Database cleaned")
+DB_MAX_RETRIES = 3  # How many times retry a db operation

logdetective/server/database/models/__init__.py ADDED Viewed

@@ -0,0 +1,33 @@
+from logdetective.server.database.models.merge_request_jobs import (
+    Forge,
+    GitlabMergeRequestJobs,
+    Comments,
+    Reactions,
+)
+from logdetective.server.database.models.koji import (
+    KojiTaskAnalysis,
+)
+from logdetective.server.database.models.metrics import (
+    AnalyzeRequestMetrics,
+    EndpointType,
+)
+from logdetective.server.database.models.exceptions import (
+    KojiTaskNotFoundError,
+    KojiTaskNotAnalyzedError,
+    KojiTaskAnalysisTimeoutError,
+)
+# pylint: disable=undefined-all-variable
+__all__ = [
+    GitlabMergeRequestJobs.__name__,
+    Comments.__name__,
+    Reactions.__name__,
+    AnalyzeRequestMetrics.__name__,
+    EndpointType.__name__,
+    Forge.__name__,
+    KojiTaskAnalysis.__name__,
+    KojiTaskNotFoundError.__name__,
+    KojiTaskNotAnalyzedError.__name__,
+    KojiTaskAnalysisTimeoutError.__name__,
+]

logdetective/server/database/models/exceptions.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Database model exceptions for logdetective."""
+class KojiTaskNotFoundError(Exception):
+    """Exception raised when a koji task is not found"""
+class KojiTaskNotAnalyzedError(Exception):
+    """Exception raised when a koji task analysis is still in progress"""
+class KojiTaskAnalysisTimeoutError(Exception):
+    """Exception raised when a koji task analysis has timed out"""
+class AnalyzeRequestMetricsNotFroundError(Exception):
+    """Exception raised when AnalyzeRequestMetrics is not found"""

logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

logdetective 0.4.0py3-none-any.whl → 2.11.0py3-none-any.whl