PyPI - logdetective - Versions diffs - 0.3.3__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

logdetective 0.3.3py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

logdetective/__init__.py +3 -0
logdetective/server/database/__init__.py +0 -0
logdetective/server/database/base.py +63 -0
logdetective/server/database/models.py +88 -0
logdetective/server/metric.py +82 -0
logdetective/server/models.py +37 -13
logdetective/server/server.py +248 -75
logdetective/server/templates/gitlab_comment.md.j2 +66 -0
logdetective/server/utils.py +2 -1
logdetective/utils.py +3 -3
{logdetective-0.3.3.dist-info → logdetective-0.5.0.dist-info}/METADATA +82 -1
logdetective-0.5.0.dist-info/RECORD +20 -0
{logdetective-0.3.3.dist-info → logdetective-0.5.0.dist-info}/WHEEL +1 -1
logdetective-0.3.3.dist-info/RECORD +0 -15
{logdetective-0.3.3.dist-info → logdetective-0.5.0.dist-info}/LICENSE +0 -0
{logdetective-0.3.3.dist-info → logdetective-0.5.0.dist-info}/entry_points.txt +0 -0

logdetective/__init__.py CHANGED Viewed

@@ -0,0 +1,3 @@
+import logging
+logger = logging.getLogger("logdetective")

logdetective/server/database/__init__.py ADDED Viewed

File without changes

logdetective/server/database/base.py ADDED Viewed

@@ -0,0 +1,63 @@
+from os import getenv
+from contextlib import contextmanager
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, declarative_base
+from logdetective import logger
+def get_pg_url() -> str:
+    """create postgresql connection string"""
+    return (
+        f"postgresql+psycopg2://{getenv('POSTGRESQL_USER')}"
+        f":{getenv('POSTGRESQL_PASSWORD')}@{getenv('POSTGRESQL_HOST', 'postgres')}"
+        f":{getenv('POSTGRESQL_PORT', '5432')}/{getenv('POSTGRESQL_DATABASE')}"
+    )
+# To log SQL statements, set SQLALCHEMY_ECHO env. var. to True|T|Yes|Y|1
+sqlalchemy_echo = getenv("SQLALCHEMY_ECHO", "False").lower() in (
+    "true",
+    "t",
+    "yes",
+    "y",
+    "1",
+)
+engine = create_engine(get_pg_url(), echo=sqlalchemy_echo)
+SessionFactory = sessionmaker(autoflush=True, bind=engine)
+Base = declarative_base()
+@contextmanager
+def transaction(commit: bool = False):
+    """
+    Context manager for 'framing' a db transaction.
+    Args:
+        commit: Whether to call `Session.commit()` upon exiting the context. Should be set to True
+            if any changes are made within the context. Defaults to False.
+    """
+    session = SessionFactory()
+    try:
+        yield session
+        if commit:
+            session.commit()
+    except Exception as ex:
+        logger.warning("Exception while working with database: %s", str(ex))
+        session.rollback()
+        raise
+    finally:
+        session.close()
+def init():
+    """Init db"""
+    Base.metadata.create_all(engine)
+    logger.debug("Database initialized")
+def destroy():
+    """Destroy db"""
+    Base.metadata.drop_all(engine)
+    logger.warning("Database cleaned")

logdetective/server/database/models.py ADDED Viewed

@@ -0,0 +1,88 @@
+import enum
+import datetime
+from typing import Optional
+from sqlalchemy import Column, Integer, Float, DateTime, String, Enum
+from logdetective.server.database.base import Base, transaction
+class EndpointType(enum.Enum):
+    """Different analyze endpoints"""
+    ANALYZE = "analyze_log"
+    ANALYZE_STAGED = "analyze_log_staged"
+    ANALYZE_STREAM = "analyze_log_stream"
+class AnalyzeRequestMetrics(Base):
+    """Store data related to received requests and given responses"""
+    __tablename__ = "analyze_request_metrics"
+    id = Column(Integer, primary_key=True)
+    endpoint = Column(
+        Enum(EndpointType),
+        nullable=False,
+        index=True,
+        comment="The service endpoint that was called",
+    )
+    request_received_at = Column(
+        DateTime,
+        nullable=False,
+        index=True,
+        default=datetime.datetime.now(datetime.timezone.utc),
+        comment="Timestamp when the request was received",
+    )
+    log_url = Column(
+        String,
+        nullable=False,
+        index=False,
+        comment="Log url for which analysis was requested",
+    )
+    response_sent_at = Column(
+        DateTime, nullable=True, comment="Timestamp when the response was sent back"
+    )
+    response_length = Column(
+        Integer, nullable=True, comment="Length of the response in chars"
+    )
+    response_certainty = Column(
+        Float, nullable=True, comment="Certainty for generated response"
+    )
+    @classmethod
+    def create(
+        cls,
+        endpoint: EndpointType,
+        log_url: str,
+        request_received_at: Optional[datetime.datetime] = None,
+    ) -> int:
+        """Create AnalyzeRequestMetrics new line
+        with data related to a received request"""
+        with transaction(commit=True) as session:
+            metrics = AnalyzeRequestMetrics()
+            metrics.endpoint = endpoint
+            metrics.request_received_at = request_received_at or datetime.datetime.now(
+                datetime.timezone.utc
+            )
+            metrics.log_url = log_url
+            session.add(metrics)
+            session.flush()
+            return metrics.id
+    @classmethod
+    def update(
+        cls,
+        id_: int,
+        response_sent_at: datetime,
+        response_length: int,
+        response_certainty: float,
+    ) -> None:
+        """Update an AnalyzeRequestMetrics line
+        with data related to the given response"""
+        with transaction(commit=True) as session:
+            metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
+            metrics.response_sent_at = response_sent_at
+            metrics.response_length = response_length
+            metrics.response_certainty = response_certainty
+            session.add(metrics)

logdetective/server/metric.py ADDED Viewed

@@ -0,0 +1,82 @@
+import datetime
+import inspect
+from typing import Union
+from functools import wraps
+from starlette.responses import StreamingResponse
+from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
+from logdetective.server import models
+def add_new_metrics(
+    api_name: str, build_log: models.BuildLog, received_at: datetime.datetime = None
+) -> int:
+    """Add a new database entry for a received request.
+    This will store the time when this function is called,
+    the endpoint from where the request was received,
+    and the log for which analysis is requested.
+    """
+    return AnalyzeRequestMetrics.create(
+        endpoint=EndpointType(api_name),
+        log_url=build_log.url,
+        request_received_at=received_at
+        if received_at
+        else datetime.datetime.now(datetime.timezone.utc),
+    )
+def update_metrics(
+    metrics_id: int,
+    response: Union[models.Response, models.StagedResponse, StreamingResponse],
+    sent_at: datetime.datetime = None,
+) -> None:
+    """Update a database metric entry for a received request,
+    filling data for the given response.
+    This will add to the database entry the time when the response was sent,
+    the length of the created response and the certainty for it.
+    """
+    response_sent_at = (
+        sent_at if sent_at else datetime.datetime.now(datetime.timezone.utc)
+    )
+    response_length = None
+    if hasattr(response, "explanation") and "choices" in response.explanation:
+        response_length = sum(
+            len(choice["text"])
+            for choice in response.explanation["choices"]
+            if "text" in choice
+        )
+    response_certainty = (
+        response.response_certainty if hasattr(response, "response_certainty") else None
+    )
+    AnalyzeRequestMetrics.update(
+        metrics_id, response_sent_at, response_length, response_certainty
+    )
+def track_request():
+    """
+    Decorator to track requests metrics
+    """
+    def decorator(f):
+        @wraps(f)
+        async def async_decorated_function(*args, **kwargs):
+            metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
+            response = await f(*args, **kwargs)
+            update_metrics(metrics_id, response)
+            return response
+        @wraps(f)
+        def sync_decorated_function(*args, **kwargs):
+            metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
+            response = f(*args, **kwargs)
+            update_metrics(metrics_id, response)
+            return response
+        if inspect.iscoroutinefunction(f):
+            return async_decorated_function
+        return sync_decorated_function
+    return decorator

logdetective/server/models.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from logging import BASIC_FORMAT
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Literal
 from pydantic import BaseModel, Field
@@ -21,7 +21,7 @@ class JobHook(BaseModel):
     # The identifier of the job. We only care about 'build_rpm' and
     # 'build_centos_stream_rpm' jobs.
-    build_name: str = Field(pattern=r"^build(_.*)?_rpm$")
+    build_name: str = Field(pattern=r"^build.*rpm$")
     # A string representing the job status. We only care about 'failed' jobs.
     build_status: str = Field(pattern=r"^failed$")
@@ -37,15 +37,36 @@ class JobHook(BaseModel):
     project_id: int
+class Explanation(BaseModel):
+    """Model of snippet or general log explanation from Log Detective"""
+    text: str
+    logprobs: Optional[List[Dict]] = None
+    def __str__(self):
+        return self.text
+class AnalyzedSnippet(BaseModel):
+    """Model for snippets already processed by Log Detective.
+    explanation: LLM output in form of plain text and logprobs dictionary
+    text: original snippet text
+    line_number: location of snippet in original log
+    """
+    explanation: Explanation
+    text: str
+    line_number: int
 class Response(BaseModel):
     """Model of data returned by Log Detective API
-    explanation: CreateCompletionResponse
-        https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
+    explanation: Explanation
     response_certainty: float
     """
-    explanation: Dict
+    explanation: Explanation
     response_certainty: float
@@ -53,17 +74,12 @@ class StagedResponse(Response):
     """Model of data returned by Log Detective API when called when staged response
     is requested. Contains list of reponses to prompts for individual snippets.
-    explanation: CreateCompletionResponse
-        https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
+    explanation: Explanation
     response_certainty: float
-    snippets:
-        list of dictionaries {
-        'snippet' : '<original_text>,
-        'comment': CreateCompletionResponse,
-        'line_number': '<location_in_log>' }
+    snippets: list of AnalyzedSnippet objects
     """
-    snippets: List[Dict[str, str | Dict | int]]
+    snippets: List[AnalyzedSnippet]
 class InferenceConfig(BaseModel):
@@ -71,6 +87,11 @@ class InferenceConfig(BaseModel):
     max_tokens: int = -1
     log_probs: int = 1
+    api_endpoint: Optional[Literal["/chat/completions", "/completions"]] = (
+        "/chat/completions"
+    )
+    url: str = ""
+    api_token: str = ""
     def __init__(self, data: Optional[dict] = None):
         super().__init__()
@@ -79,6 +100,9 @@ class InferenceConfig(BaseModel):
         self.max_tokens = data.get("max_tokens", -1)
         self.log_probs = data.get("log_probs", 1)
+        self.api_endpoint = data.get("api_endpoint", "/chat/completions")
+        self.url = data.get("url", "")
+        self.api_token = data.get("api_token", "")
 class ExtractorConfig(BaseModel):

logdetective/server/server.py CHANGED Viewed

@@ -3,16 +3,19 @@ import json
 import os
 import re
 import zipfile
-from pathlib import PurePath
+from pathlib import Path, PurePath
 from tempfile import TemporaryFile
-from typing import List, Annotated, Tuple
+from typing import List, Annotated, Tuple, Dict, Any
-from llama_cpp import CreateCompletionResponse
 from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header
 from fastapi.responses import StreamingResponse
 from fastapi.responses import Response as BasicResponse
 import gitlab
+import gitlab.v4
+import gitlab.v4.objects
+import jinja2
 import requests
 from logdetective.constants import (
@@ -27,18 +30,21 @@ from logdetective.utils import (
     format_snippets,
     format_analyzed_snippets,
 )
-from logdetective.server.models import BuildLog, JobHook, Response, StagedResponse
 from logdetective.server.utils import load_server_config, get_log
+from logdetective.server.metric import track_request
+from logdetective.server.models import (
+    BuildLog,
+    JobHook,
+    Response,
+    StagedResponse,
+    Explanation,
+    AnalyzedSnippet,
+)
-LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
-LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
-LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
 LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
 LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
 API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
 SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
-LLM_API_TOKEN = os.environ.get("LLM_API_TOKEN", None)
 SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
@@ -123,35 +129,21 @@ def mine_logs(log: str) -> List[Tuple[int, str]]:
     return log_summary
-async def submit_text(
-    text: str,
-    max_tokens: int = -1,
-    log_probs: int = 1,
-    stream: bool = False,
-    model: str = "default-model",
-):
-    """Submit prompt to LLM.
-    max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
-    log_probs: number of token choices to produce log probs for
-    """
-    LOG.info("Analyzing the text")
-    data = {
-        "prompt": text,
-        "max_tokens": max_tokens,
-        "logprobs": log_probs,
-        "stream": stream,
-        "model": model,
-    }
-    headers = {"Content-Type": "application/json"}
-    if LLM_API_TOKEN:
-        headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
+async def submit_to_llm_endpoint(
+    url: str, data: Dict[str, Any], headers: Dict[str, str], stream: bool
+) -> Any:
+    """Send request to selected API endpoint. Verifying successful request unless
+    the using the stream response.
+    url:
+    data:
+    headers:
+    stream:
+    """
     try:
         # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
         response = requests.post(
-            f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
+            url,
             headers=headers,
             data=json.dumps(data),
             timeout=int(LLM_CPP_SERVER_TIMEOUT),
@@ -177,13 +169,118 @@ async def submit_text(
                 status_code=400,
                 detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
             ) from ex
-    else:
-        return response
-    return CreateCompletionResponse(response)
+    return response
+async def submit_text(  # pylint: disable=R0913,R0917
+    text: str,
+    max_tokens: int = -1,
+    log_probs: int = 1,
+    stream: bool = False,
+    model: str = "default-model",
+    api_endpoint: str = "/chat/completions",
+) -> Explanation:
+    """Submit prompt to LLM using a selected endpoint.
+    max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
+    log_probs: number of token choices to produce log probs for
+    """
+    LOG.info("Analyzing the text")
+    headers = {"Content-Type": "application/json"}
+    if SERVER_CONFIG.inference.api_token:
+        headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
+    if api_endpoint == "/chat/completions":
+        return await submit_text_chat_completions(
+            text, headers, max_tokens, log_probs > 0, stream, model
+        )
+    return await submit_text_completions(
+        text, headers, max_tokens, log_probs, stream, model
+    )
+async def submit_text_completions(  # pylint: disable=R0913,R0917
+    text: str,
+    headers: dict,
+    max_tokens: int = -1,
+    log_probs: int = 1,
+    stream: bool = False,
+    model: str = "default-model",
+) -> Explanation:
+    """Submit prompt to OpenAI API completions endpoint.
+    max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
+    log_probs: number of token choices to produce log probs for
+    """
+    LOG.info("Submitting to /v1/completions endpoint")
+    data = {
+        "prompt": text,
+        "max_tokens": max_tokens,
+        "logprobs": log_probs,
+        "stream": stream,
+        "model": model,
+    }
+    response = await submit_to_llm_endpoint(
+        f"{SERVER_CONFIG.inference.url}/v1/completions",
+        data,
+        headers,
+        stream,
+    )
+    return Explanation(
+        text=response["choices"][0]["text"], logprobs=response["choices"][0]["logprobs"]
+    )
+async def submit_text_chat_completions(  # pylint: disable=R0913,R0917
+    text: str,
+    headers: dict,
+    max_tokens: int = -1,
+    log_probs: int = 1,
+    stream: bool = False,
+    model: str = "default-model",
+) -> Explanation:
+    """Submit prompt to OpenAI API /chat/completions endpoint.
+    max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
+    log_probs: number of token choices to produce log probs for
+    """
+    LOG.info("Submitting to /v1/chat/completions endpoint")
+    data = {
+        "messages": [
+            {
+                "role": "user",
+                "content": text,
+            }
+        ],
+        "max_tokens": max_tokens,
+        "logprobs": log_probs,
+        "stream": stream,
+        "model": model,
+    }
+    response = await submit_to_llm_endpoint(
+        f"{SERVER_CONFIG.inference.url}/v1/chat/completions",
+        data,
+        headers,
+        stream,
+    )
+    if stream:
+        return Explanation(
+            text=response["choices"][0]["delta"]["content"],
+            logprobs=response["choices"][0]["logprobs"]["content"],
+        )
+    return Explanation(
+        text=response["choices"][0]["message"]["content"],
+        logprobs=response["choices"][0]["logprobs"]["content"],
+    )
 @app.post("/analyze", response_model=Response)
+@track_request()
 async def analyze_log(build_log: BuildLog):
     """Provide endpoint for log file submission and analysis.
     Request must be in form {"url":"<YOUR_URL_HERE>"}.
@@ -194,26 +291,27 @@ async def analyze_log(build_log: BuildLog):
     log_text = process_url(build_log.url)
     log_summary = mine_logs(log_text)
     log_summary = format_snippets(log_summary)
-    response = await submit_text(PROMPT_TEMPLATE.format(log_summary))
+    response = await submit_text(
+        PROMPT_TEMPLATE.format(log_summary),
+        api_endpoint=SERVER_CONFIG.inference.api_endpoint,
+    )
     certainty = 0
-    if "logprobs" in response["choices"][0]:
+    if response.logprobs is not None:
         try:
-            certainty = compute_certainty(
-                response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]
-            )
+            certainty = compute_certainty(response.logprobs)
         except ValueError as ex:
             LOG.error("Error encountered while computing certainty: %s", ex)
             raise HTTPException(
                 status_code=400,
-                detail=f"Couldn't compute certainty with data:\n"
-                f"{response['choices'][0]['logprobs']['content'][0]['top_logprobs']}",
+                detail=f"Couldn't compute certainty with data:\n{response.logprobs}",
             ) from ex
     return Response(explanation=response, response_certainty=certainty)
 @app.post("/analyze/staged", response_model=StagedResponse)
+@track_request()
 async def analyze_log_staged(build_log: BuildLog):
     """Provide endpoint for log file submission and analysis.
     Request must be in form {"url":"<YOUR_URL_HERE>"}.
@@ -222,36 +320,48 @@ async def analyze_log_staged(build_log: BuildLog):
     while lacking  result, params or query fields.
     """
     log_text = process_url(build_log.url)
+    return await perform_staged_analysis(log_text=log_text)
+async def perform_staged_analysis(log_text: str) -> StagedResponse:
+    """Submit the log file snippets to the LLM and retrieve their results"""
     log_summary = mine_logs(log_text)
     # Process snippets asynchronously
     analyzed_snippets = await asyncio.gather(
-        *[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s[1])) for s in log_summary]
+        *[
+            submit_text(
+                SNIPPET_PROMPT_TEMPLATE.format(s),
+                api_endpoint=SERVER_CONFIG.inference.api_endpoint,
+            )
+            for s in log_summary
+        ]
     )
     analyzed_snippets = [
-        {"snippet": e[0][1], "line_number": e[0][0], "comment": e[1]}
+        AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
         for e in zip(log_summary, analyzed_snippets)
     ]
     final_prompt = PROMPT_TEMPLATE_STAGED.format(
         format_analyzed_snippets(analyzed_snippets)
     )
-    final_analysis = await submit_text(final_prompt)
-    print(final_analysis)
+    final_analysis = await submit_text(
+        final_prompt, api_endpoint=SERVER_CONFIG.inference.api_endpoint
+    )
     certainty = 0
-    if "logprobs" in final_analysis["choices"][0]:
+    if final_analysis.logprobs:
         try:
-            certainty = compute_certainty(
-                final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]
-            )
+            certainty = compute_certainty(final_analysis.logprobs)
         except ValueError as ex:
             LOG.error("Error encountered while computing certainty: %s", ex)
             raise HTTPException(
                 status_code=400,
                 detail=f"Couldn't compute certainty with data:\n"
-                f"{final_analysis['choices'][0]['logprobs']['content'][0]['top_logprobs']}",
+                f"{final_analysis.logprobs}",
             ) from ex
     return StagedResponse(
@@ -262,6 +372,7 @@ async def analyze_log_staged(build_log: BuildLog):
 @app.post("/analyze/stream", response_class=StreamingResponse)
+@track_request()
 async def analyze_log_stream(build_log: BuildLog):
     """Stream response endpoint for Logdetective.
     Request must be in form {"url":"<YOUR_URL_HERE>"}.
@@ -272,7 +383,14 @@ async def analyze_log_stream(build_log: BuildLog):
     log_text = process_url(build_log.url)
     log_summary = mine_logs(log_text)
     log_summary = format_snippets(log_summary)
-    stream = await submit_text(PROMPT_TEMPLATE.format(log_summary), stream=True)
+    headers = {"Content-Type": "application/json"}
+    if SERVER_CONFIG.inference.api_token:
+        headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
+    stream = await submit_text_chat_completions(
+        PROMPT_TEMPLATE.format(log_summary), stream=True, headers=headers
+    )
     return StreamingResponse(stream)
@@ -309,6 +427,11 @@ async def process_gitlab_job_event(job_hook):
     # Retrieve data about the job from the GitLab API
     job = await asyncio.to_thread(project.jobs.get, job_hook.build_id)
+    # For easy retrieval later, we'll add project_name and project_url to the
+    # job object
+    job.project_name = project.name
+    job.project_url = project.web_url
     # Retrieve the pipeline that started this job
     pipeline = await asyncio.to_thread(project.pipelines.get, job_hook.pipeline_id)
@@ -324,37 +447,38 @@ async def process_gitlab_job_event(job_hook):
             "Pipeline source is merge_request_event but no merge request ID was provided."
         )
         return
-    merge_request_id = int(match.group(1))
+    merge_request_iid = int(match.group(1))
     LOG.debug("Retrieving log artifacts")
     # Retrieve the build logs from the merge request artifacts and preprocess them
     try:
-        preprocessed_log = await retrieve_and_preprocess_koji_logs(job)
+        log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(job)
     except LogsTooLargeError:
         LOG.error("Could not retrieve logs. Too large.")
         raise
     # Submit log to Log Detective and await the results.
-    response = await submit_log_to_llm(preprocessed_log)
+    log_text = preprocessed_log.read().decode(encoding="utf-8")
+    staged_response = await perform_staged_analysis(log_text=log_text)
     preprocessed_log.close()
     # Add the Log Detective response as a comment to the merge request
-    await comment_on_mr(merge_request_id, response)
+    await comment_on_mr(project, merge_request_iid, job, log_url, staged_response)
 class LogsTooLargeError(RuntimeError):
     """The log archive exceeds the configured maximum size"""
-async def retrieve_and_preprocess_koji_logs(job):
+async def retrieve_and_preprocess_koji_logs(job: gitlab.v4.objects.ProjectJob):
     """Download logs from the merge request artifacts
     This function will retrieve the build logs and do some minimal
     preprocessing to determine which log is relevant for analysis.
-    returns: An open, file-like object containing the log contents to be sent
-    for processing by Log Detective. The calling function is responsible for
-    closing this object."""
+    returns: The URL pointing to the selected log file and an open, file-like
+    object containing the log contents to be sent for processing by Log
+    Detective. The calling function is responsible for closing this object."""
     # Make sure the file isn't too large to process.
     if not await check_artifacts_file_size(job):
@@ -437,11 +561,13 @@ async def retrieve_and_preprocess_koji_logs(job):
     LOG.debug("Failed architecture: %s", failed_arch)
-    log_path = failed_arches[failed_arch]
-    LOG.debug("Returning contents of %s", log_path)
+    log_path = failed_arches[failed_arch].as_posix()
+    log_url = f"{SERVER_CONFIG.gitlab.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts/{log_path}"  # pylint: disable=line-too-long
+    LOG.debug("Returning contents of %s", log_url)
     # Return the log as a file-like object with .read() function
-    return artifacts_zip.open(log_path.as_posix())
+    return log_url, artifacts_zip.open(log_path)
 async def check_artifacts_file_size(job):
@@ -468,15 +594,62 @@ async def check_artifacts_file_size(job):
     return content_length <= SERVER_CONFIG.gitlab.max_artifact_size
-async def submit_log_to_llm(log):
-    """Stream the log to the LLM for processing"""
-    # TODO: query the LLM with the log contents  # pylint: disable=fixme
-    # This function will be implemented later; right now it does nothing.
-    LOG.debug("Log contents:\n%s", log.read())
-    return ""
+async def comment_on_mr(
+    project: gitlab.v4.objects.Project,
+    merge_request_iid: int,
+    job: gitlab.v4.objects.ProjectJob,
+    log_url: str,
+    response: StagedResponse,
+):
+    """Add the Log Detective response as a comment to the merge request"""
+    LOG.debug(
+        "Primary Explanation for %s MR %d: %s",
+        project.name,
+        merge_request_iid,
+        response.explanation.text,
+    )
+    # Get the formatted comment.
+    comment = await generate_mr_comment(job, log_url, response)
-async def comment_on_mr(merge_request_id: int, response: str):  # pylint: disable=unused-argument
-    """Add the Log Detective response as a comment to the merge request"""
-    # TODO: Implement this  # pylint: disable=fixme
-    pass  # pylint: disable=unnecessary-pass
+    # Look up the merge request
+    merge_request = await asyncio.to_thread(
+        project.mergerequests.get, merge_request_iid
+    )
+    # Submit a new comment to the Merge Request using the Gitlab API
+    await asyncio.to_thread(merge_request.discussions.create, {"body": comment})
+async def generate_mr_comment(
+    job: gitlab.v4.objects.ProjectJob, log_url: str, response: StagedResponse
+) -> str:
+    """Use a template to generate a comment string to submit to Gitlab"""
+    # Locate and load the comment template
+    script_path = Path(__file__).resolve().parent
+    template_path = Path(script_path, "templates")
+    jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_path))
+    tpl = jinja_env.get_template("gitlab_comment.md.j2")
+    artifacts_url = f"{job.project_url}/-/jobs/{job.id}/artifacts/download"
+    if response.response_certainty >= 90:
+        emoji_face = ":slight_smile:"
+    elif response.response_certainty >= 70:
+        emoji_face = ":neutral_face:"
+    else:
+        emoji_face = ":frowning2:"
+    # Generate the comment from the template
+    content = tpl.render(
+        package=job.project_name,
+        explanation=response.explanation.text,
+        certainty=f"{response.response_certainty:.2f}",
+        emoji_face=emoji_face,
+        snippets=response.snippets,
+        log_url=log_url,
+        artifacts_url=artifacts_url,
+    )
+    return content

logdetective/server/templates/gitlab_comment.md.j2 ADDED Viewed

@@ -0,0 +1,66 @@
+The package {{ package }} failed to build, here is a possible explanation why.
+Please know that the explanation was provided by AI and may be incorrect.
+In this case, we are {{ certainty }}% certain of the response {{ emoji_face }}.
+{{ explanation }}
+<details>
+<ul>
+{% for snippet in snippets %}
+<li>
+<code>
+Line {{ snippet.line_number }}: {{ snippet.text }}
+</code>
+{{ snippet.explanation }}
+</li>
+{% endfor %}
+</ul>
+</details>
+<details>
+  <summary>Logs</summary>
+  <p>
+    Log Detective analyzed the following logs files to provide an explanation:
+  </p>
+  <ul>
+    <li><a href="{{ log_url }}">{{ log_url }}</a></li>
+  </ul>
+  <p>
+    Additional logs are available from:
+    <ul>
+    <li><a href="{{ artifacts_url }}">artifacts.zip</a></li>
+  </ul>
+  </p>
+  <p>
+    Please know that these log files are automatically removed after some
+    time, so you might need a backup.
+  </p>
+</details>
+<details>
+  <summary>Help</summary>
+  <p>Don't hesitate to reach out.</p>
+  <ul>
+    <li><a href="https://github.com/fedora-copr/logdetective">Upstream</a></li>
+    <li><a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a></li>
+    <li><a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a></li>
+    <li><a href="https://log-detective.com/documentation">Documentation</a></li>
+  </ul>
+</details>
+---
+This comment was created by [Log Detective][log-detective].
+Was the provided feedback accurate and helpful? <br>Please vote with :thumbsup:
+or :thumbsdown: to help us improve.<br>
+[log-detective]: https://log-detective.com/
+[contact]: https://github.com/fedora-copr

logdetective/server/utils.py CHANGED Viewed

@@ -12,7 +12,8 @@ def load_server_config(path: str | None) -> Config:
             with open(path, "r") as config_file:
                 return Config(yaml.safe_load(config_file))
         except FileNotFoundError:
-            pass
+            # This is not an error, we will fall back to default
+            print("Unable to find server config file, using default then.")
     return Config()

logdetective/utils.py CHANGED Viewed

@@ -7,7 +7,7 @@ import requests
 from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
 from logdetective.constants import PROMPT_TEMPLATE, SNIPPET_DELIMITER
+from logdetective.server.models import AnalyzedSnippet
 LOG = logging.getLogger("logdetective")
@@ -175,11 +175,11 @@ def format_snippets(snippets: list[str] | list[Tuple[int, str]]) -> str:
     return summary
-def format_analyzed_snippets(snippets: list[Dict]) -> str:
+def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
     """Format snippets for submission into staged prompt."""
     summary = f"\n{SNIPPET_DELIMITER}\n".join(
         [
-            f"[{e['snippet']}] at line [{e["line_number"]}]: [{e['comment']['choices'][0]['text']}]"
+            f"[{e.text}] at line [{e.line_number}]: [{e.explanation.text}]"
             for e in snippets
         ]
     )

{logdetective-0.3.3.dist-info → logdetective-0.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: logdetective
-Version: 0.3.3
+Version: 0.5.0
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin
@@ -19,15 +19,18 @@ Classifier: Topic :: Internet :: Log Analysis
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Debuggers
 Provides-Extra: server
+Requires-Dist: alembic (>=1.13.3,<2.0.0) ; extra == "server"
 Requires-Dist: drain3 (>=0.9.11,<0.10.0)
 Requires-Dist: fastapi (>=0.111.1) ; extra == "server"
 Requires-Dist: huggingface-hub (>0.23.2)
 Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
 Requires-Dist: numpy (>=1.26.0)
+Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "server"
 Requires-Dist: pydantic (>=2.8.2,<3.0.0) ; extra == "server"
 Requires-Dist: python-gitlab (>=4.4.0)
 Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server"
 Requires-Dist: requests (>0.2.31)
+Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server"
 Project-URL: homepage, https://github.com/fedora-copr/logdetective
 Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
 Description-Content-Type: text/markdown
@@ -188,6 +191,71 @@ or
     tox run -e lint # to run pylint
+Visual Studio Code testing with podman/docker-compose
+-----------------------------------------------------
+- In `Containerfile`, add `debugpy` as a dependency
+```diff
+-RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context \
++RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context debugpy\
+```
+- Rebuild server image with new dependencies
+```
+make rebuild-server
+```
+- Forward debugging port in `docker-compose.yaml` for `server` service.
+```diff
+     ports:
+       - "${LOGDETECTIVE_SERVER_PORT:-8080}:${LOGDETECTIVE_SERVER_PORT:-8080}"
++      - "${VSCODE_DEBUG_PORT:-5678}:${VSCODE_DEBUG_PORT:-5678}"
+```
+- Add `debugpy` code in a logdetective file where you want to stop at first.
+```diff
++import debugpy
++debugpy.listen(("0.0.0.0", 5678))
++debugpy.wait_for_client()
+```
+- Prepare `.vscode/lunch.json` configuration for Visual Studio Code (at least the following configuration is needed)
+```json
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python Debugger: Remote Attach",
+      "type": "debugpy",
+      "request": "attach",
+      "connect": {
+        "host": "localhost",
+        "port": 5678
+      },
+      "pathMappings": [
+        {
+          "localRoot": "${workspaceFolder}",
+          "remoteRoot": "/src"
+        }
+      ]
+    }
+  ]
+}
+```
+- Run the server
+```
+podman-compose up server
+```
+- Run Visual Stdio Code debug configuration named *Python Debug: Remote Attach*
 Server
 ------
@@ -220,6 +288,19 @@ Model can be downloaded from [our Hugging Space](https://huggingface.co/fedora-c
 $ curl -L -o models/mistral-7b-instruct-v0.2.Q4_K_S.gguf https://huggingface.co/fedora-copr/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/ggml-model-Q4_K_S.gguf
 ```
+Generate a new database revision with alembic
+---------------------------------------------
+Modify the database models (`logdetective/server/database/model.py).
+Generate a new database revision with the command:
+**Warning**: this command will start up a new server
+and shut it down when the operation completes.
+```
+CHANGE="A change comment" make alembic-generate-revision
+```
 Our production instance
 -----------------------

logdetective-0.5.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+logdetective/__init__.py,sha256=VqRngDcuFT7JWms8Qc_MsOvajoXVOKPr-S1kqY3Pqhc,59
+logdetective/constants.py,sha256=SPSs1Bq6zPms3RsFTmsADwgrnFTn4fefNHzrB-M3RAE,1383
+logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
+logdetective/extractors.py,sha256=cjxndfJaQur54GXksIQXL7YTxkOng8I8UnQZMN2t5_w,3388
+logdetective/logdetective.py,sha256=KN0KASW63VAnrjVeXK5AO0ob-vSexutTyeg1fd4uj70,4884
+logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+logdetective/server/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+logdetective/server/database/base.py,sha256=oMJUvbWeapIUP-8Cf_DR9ptFg8CsYeaBAIjOVEzx8SM,1668
+logdetective/server/database/models.py,sha256=8jW4k03Kny_3ld35214hcjYoJqlBvQIr4LH9mfQukXw,2750
+logdetective/server/metric.py,sha256=VYMifrfIhcqgyu6YYN0c1nt8fC1iJ2_LCB7Bh2AheoE,2679
+logdetective/server/models.py,sha256=f42yMMMMfTdTN4KWpPUfaEoaiE9rhqltA0dQNKGOB2w,5660
+logdetective/server/server.py,sha256=lDdXO3s1larmHvuQDasvutEvcOpa3Rv5Cd_btyiqHdU,23118
+logdetective/server/templates/gitlab_comment.md.j2,sha256=kheTkhQ-LfuFkr8av-Mw2a-9VYEUbDTLwaa-CKI6OkI,1622
+logdetective/server/utils.py,sha256=OFvhttjv3yp8kfim5_s4mNG8ly21qyILxE0o3DcVVKg,1340
+logdetective/utils.py,sha256=eudens1_T6iTtYhyzoYCpwuWgFHUMDSt6eWnrAB-mAI,6188
+logdetective-0.5.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+logdetective-0.5.0.dist-info/METADATA,sha256=420Qn9rAheVSNDYmHjUYSB5AojmY58lUCNt3RNzwFC4,12714
+logdetective-0.5.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
+logdetective-0.5.0.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
+logdetective-0.5.0.dist-info/RECORD,,

{logdetective-0.3.3.dist-info → logdetective-0.5.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.1
+Generator: poetry-core 2.1.2
 Root-Is-Purelib: true
 Tag: py3-none-any

logdetective-0.3.3.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-logdetective/constants.py,sha256=SPSs1Bq6zPms3RsFTmsADwgrnFTn4fefNHzrB-M3RAE,1383
-logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
-logdetective/extractors.py,sha256=cjxndfJaQur54GXksIQXL7YTxkOng8I8UnQZMN2t5_w,3388
-logdetective/logdetective.py,sha256=KN0KASW63VAnrjVeXK5AO0ob-vSexutTyeg1fd4uj70,4884
-logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-logdetective/server/models.py,sha256=9QURaw0u9yZKywXwHzv6_rS6XhRBA2UHV5u4b9xkWqc,5196
-logdetective/server/server.py,sha256=o2s4ezQE-a1XY7RFK0vLDFQO_wj9ZgG58SEV0hErLd8,18237
-logdetective/server/utils.py,sha256=osW5-VXxJAxRt7Wd3t1wF7PyW89FE9g4gSZLZCShlLc,1216
-logdetective/utils.py,sha256=59jq7F45Wk8pldzDt4gkh47Hny0T3fy1ggJFjSXDSGo,6148
-logdetective-0.3.3.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-logdetective-0.3.3.dist-info/METADATA,sha256=cO2ZL03HeNe5lASpa-4Wea-SESxQgSVaoQh5ry_EYCY,10691
-logdetective-0.3.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
-logdetective-0.3.3.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
-logdetective-0.3.3.dist-info/RECORD,,

{logdetective-0.3.3.dist-info → logdetective-0.5.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{logdetective-0.3.3.dist-info → logdetective-0.5.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

logdetective 0.3.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

logdetective 0.3.3py3-none-any.whl → 0.5.0py3-none-any.whl