PyPI - logdetective - Versions diffs - 0.5.10__py3-none-any.whl → 0.5.11__py3-none-any.whl - Mend

logdetective 0.5.10py3-none-any.whl → 0.5.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

logdetective/constants.py +8 -0
logdetective/logdetective.py +8 -1
logdetective/prompts.yml +6 -0
logdetective/server/database/models.py +216 -12
logdetective/server/metric.py +4 -6
logdetective/server/models.py +11 -3
logdetective/server/plot.py +114 -39
logdetective/server/server.py +104 -11
logdetective/server/templates/{gitlab_comment.md.j2 → gitlab_full_comment.md.j2} +1 -3
logdetective/server/templates/gitlab_short_comment.md.j2 +53 -0
logdetective/server/utils.py +3 -1
logdetective/utils.py +7 -3
{logdetective-0.5.10.dist-info → logdetective-0.5.11.dist-info}/METADATA +28 -4
logdetective-0.5.11.dist-info/RECORD +24 -0
logdetective-0.5.10.dist-info/RECORD +0 -23
{logdetective-0.5.10.dist-info → logdetective-0.5.11.dist-info}/LICENSE +0 -0
{logdetective-0.5.10.dist-info → logdetective-0.5.11.dist-info}/WHEEL +0 -0
{logdetective-0.5.10.dist-info → logdetective-0.5.11.dist-info}/entry_points.txt +0 -0

logdetective/constants.py CHANGED Viewed

@@ -16,6 +16,8 @@ Snippets are delimited with '================'.
 Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
+Explanation of the issue, and recommended solution, should take handful of sentences.
 Snippets:
 {}
@@ -38,6 +40,8 @@ Answer:
 SNIPPET_PROMPT_TEMPLATE = """
 Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
+Your analysis must be as concise as possible, while keeping relevant information intact.
 Snippet:
 {}
@@ -55,6 +59,8 @@ Snippets are delimited with '================'.
 Drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
+Explanation of the issue, and recommended solution, should take handful of sentences.
 Snippets:
 {}
@@ -64,3 +70,5 @@ Analysis:
 """
 SNIPPET_DELIMITER = "================"
+DEFAULT_TEMPERATURE = 0.8

logdetective/logdetective.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 import sys
 import os
-from logdetective.constants import DEFAULT_ADVISOR
+from logdetective.constants import DEFAULT_ADVISOR, DEFAULT_TEMPERATURE
 from logdetective.utils import (
     process_log,
     initialize_model,
@@ -73,6 +73,12 @@ def setup_args():
         default=f"{os.path.dirname(__file__)}/prompts.yml",
         help="Path to prompt configuration file."
     )
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        default=DEFAULT_TEMPERATURE,
+        help="Temperature for inference."
+    )
     return parser.parse_args()
@@ -147,6 +153,7 @@ def main():  # pylint: disable=too-many-statements,too-many-locals
         model,
         stream,
         prompt_template=prompts_configuration.prompt_template,
+        temperature=args.temperature,
     )
     probs = []
     print("Explanation:")

logdetective/prompts.yml CHANGED Viewed

@@ -13,6 +13,8 @@ prompt_template: |
   Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
+  Explanation of the issue, and recommended solution, should take handful of sentences.
   Snippets:
   {}
@@ -33,6 +35,8 @@ summarization_prompt_template: |
 snippet_prompt_template: |
   Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
+  Your analysis must be as concise as possible, while keeping relevant information intact.
   Snippet:
   {}
@@ -48,6 +52,8 @@ prompt_template_staged: |
   Drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
+  Explanation of the issue, and recommended solution, should take handful of sentences.
   Snippets:
   {}

logdetective/server/database/models.py CHANGED Viewed

@@ -97,17 +97,35 @@ class AnalyzeRequestMetrics(Base):
             metrics.response_certainty = response_certainty
             session.add(metrics)
+    @classmethod
+    def get_postgres_time_format(cls, time_format):
+        """Map python time format in the PostgreSQL format."""
+        if time_format == "%Y-%m-%d":
+            pgsql_time_format = "YYYY-MM-DD"
+        else:
+            pgsql_time_format = "YYYY-MM-DD HH24"
+        return pgsql_time_format
+    @classmethod
+    def get_dictionary_with_datetime_keys(
+        cls, time_format: str, values_dict: dict[str, any]
+    ) -> dict[datetime.datetime, any]:
+        """Convert from a dictionary with str keys to a dictionary with datetime keys"""
+        new_dict = {
+            datetime.datetime.strptime(r[0], time_format): r[1] for r in values_dict
+        }
+        return new_dict
     @classmethod
     def _get_requests_by_time_for_postgres(
         cls, start_time, end_time, time_format, endpoint
     ):
-        """func.to_char is PostgreSQL specific.
+        """Get total requests number in time period.
+        func.to_char is PostgreSQL specific.
         Let's unit tests replace this function with the SQLite version.
         """
-        if time_format == "%Y-%m-%d":
-            pgsql_time_format = "YYYY-MM-DD"
-        else:
-            pgsql_time_format = "YYYY-MM-DD HH24"
+        pgsql_time_format = cls.get_postgres_time_format(time_format)
         requests_by_time_format = (
             select(
@@ -123,10 +141,12 @@ class AnalyzeRequestMetrics(Base):
         return requests_by_time_format
     @classmethod
-    def _get_requests_by_time_for_sqllite(
+    def _get_requests_by_time_for_sqlite(
         cls, start_time, end_time, time_format, endpoint
     ):
-        """func.strftime is SQLite specific.
+        """Get total requests number in time period.
+        func.strftime is SQLite specific.
         Use this function in unit test using flexmock:
         flexmock(AnalyzeRequestMetrics).should_receive("_get_requests_by_time_for_postgres")
@@ -178,9 +198,193 @@ class AnalyzeRequestMetrics(Base):
             counts = session.execute(count_requests_by_time_format)
             results = counts.fetchall()
-            # Convert results to a dictionary with proper datetime keys
-            counts_dict = {
-                datetime.datetime.strptime(r[0], time_format): r[1] for r in results
-            }
+            return cls.get_dictionary_with_datetime_keys(time_format, results)
+    @classmethod
+    def _get_average_responses_times_for_postgres(
+        cls, start_time, end_time, time_format, endpoint
+    ):
+        """Get average responses time.
+        func.to_char is PostgreSQL specific.
+        Let's unit tests replace this function with the SQLite version.
+        """
+        with transaction(commit=False) as session:
+            pgsql_time_format = cls.get_postgres_time_format(time_format)
+            average_responses_times = (
+                select(
+                    func.to_char(cls.request_received_at, pgsql_time_format).label(
+                        "time_range"
+                    ),
+                    (
+                        func.avg(
+                            func.extract(  # pylint: disable=not-callable
+                                "epoch", cls.response_sent_at - cls.request_received_at
+                            )
+                        )
+                    ).label("average_response_seconds"),
+                )
+                .filter(cls.request_received_at.between(start_time, end_time))
+                .filter(cls.endpoint == endpoint)
+                .group_by("time_range")
+                .order_by("time_range")
+            )
+            results = session.execute(average_responses_times).fetchall()
+            return results
+    @classmethod
+    def _get_average_responses_times_for_sqlite(
+        cls, start_time, end_time, time_format, endpoint
+    ):
+        """Get average responses time.
+        func.strftime is SQLite specific.
+        Use this function in unit test using flexmock:
+        flexmock(AnalyzeRequestMetrics).should_receive("_get_average_responses_times_for_postgres")
+        .replace_with(AnalyzeRequestMetrics._get_average_responses_times_for_sqlite)
+        """
+        with transaction(commit=False) as session:
+            average_responses_times = (
+                select(
+                    func.strftime(time_format, cls.request_received_at).label(
+                        "time_range"
+                    ),
+                    (
+                        func.avg(
+                            func.julianday(cls.response_sent_at)
+                            - func.julianday(cls.request_received_at)  # noqa: W503 flake8 vs ruff
+                        )
+                        * 86400  # noqa: W503 flake8 vs ruff
+                    ).label("average_response_seconds"),
+                )
+                .filter(cls.request_received_at.between(start_time, end_time))
+                .filter(cls.endpoint == endpoint)
+                .group_by("time_range")
+                .order_by("time_range")
+            )
+            results = session.execute(average_responses_times).fetchall()
+            return results
+    @classmethod
+    def get_responses_average_time_in_period(
+        cls,
+        start_time: datetime.datetime,
+        end_time: datetime.datetime,
+        time_format: str,
+        endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
+    ) -> dict[datetime.datetime, int]:
+        """
+        Get a dictionary with average responses times
+        grouped by time units within a specified period.
+        Args:
+            start_time (datetime): The start of the time period to query
+            end_time (datetime): The end of the time period to query
+            time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
+            endpoint (EndpointType): The analyze API endpoint to query
-            return counts_dict
+        Returns:
+            dict[datetime, int]: A dictionary mapping datetime objects
+            to average responses times
+        """
+        with transaction(commit=False) as _:
+            average_responses_times = cls._get_average_responses_times_for_postgres(
+                start_time, end_time, time_format, endpoint
+            )
+            return cls.get_dictionary_with_datetime_keys(
+                time_format, average_responses_times
+            )
+    @classmethod
+    def _get_average_responses_lengths_for_postgres(
+        cls, start_time, end_time, time_format, endpoint
+    ):
+        """Get average responses length.
+        func.to_char is PostgreSQL specific.
+        Let's unit tests replace this function with the SQLite version.
+        """
+        with transaction(commit=False) as session:
+            pgsql_time_format = cls.get_postgres_time_format(time_format)
+            average_responses_lengths = (
+                select(
+                    func.to_char(cls.request_received_at, pgsql_time_format).label(
+                        "time_range"
+                    ),
+                    (func.avg(cls.response_length)).label("average_responses_length"),
+                )
+                .filter(cls.request_received_at.between(start_time, end_time))
+                .filter(cls.endpoint == endpoint)
+                .group_by("time_range")
+                .order_by("time_range")
+            )
+            results = session.execute(average_responses_lengths).fetchall()
+            return results
+    @classmethod
+    def _get_average_responses_lengths_for_sqlite(
+        cls, start_time, end_time, time_format, endpoint
+    ):
+        """Get average responses length.
+        func.strftime is SQLite specific.
+        Use this function in unit test using flexmock:
+        flexmock(AnalyzeRequestMetrics)
+        .should_receive("_get_average_responses_lengths_for_postgres")
+        .replace_with(AnalyzeRequestMetrics._get_average_responses_lengths_for_sqlite)
+        """
+        with transaction(commit=False) as session:
+            average_responses_lengths = (
+                select(
+                    func.strftime(time_format, cls.request_received_at).label(
+                        "time_range"
+                    ),
+                    (func.avg(cls.response_length)).label("average_responses_length"),
+                )
+                .filter(cls.request_received_at.between(start_time, end_time))
+                .filter(cls.endpoint == endpoint)
+                .group_by("time_range")
+                .order_by("time_range")
+            )
+            results = session.execute(average_responses_lengths).fetchall()
+            return results
+    @classmethod
+    def get_responses_average_length_in_period(
+        cls,
+        start_time: datetime.datetime,
+        end_time: datetime.datetime,
+        time_format: str,
+        endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
+    ) -> dict[datetime.datetime, int]:
+        """
+        Get a dictionary with average responses length
+        grouped by time units within a specified period.
+        Args:
+            start_time (datetime): The start of the time period to query
+            end_time (datetime): The end of the time period to query
+            time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
+            endpoint (EndpointType): The analyze API endpoint to query
+        Returns:
+            dict[datetime, int]: A dictionary mapping datetime objects
+            to average responses lengths
+        """
+        with transaction(commit=False) as _:
+            average_responses_lengths = cls._get_average_responses_lengths_for_postgres(
+                start_time, end_time, time_format, endpoint
+            )
+            return cls.get_dictionary_with_datetime_keys(
+                time_format, average_responses_lengths
+            )

logdetective/server/metric.py CHANGED Viewed

@@ -41,12 +41,10 @@ def update_metrics(
         sent_at if sent_at else datetime.datetime.now(datetime.timezone.utc)
     )
     response_length = None
-    if hasattr(response, "explanation") and "choices" in response.explanation:
-        response_length = sum(
-            len(choice["text"])
-            for choice in response.explanation["choices"]
-            if "text" in choice
-        )
+    if hasattr(response, "explanation") and isinstance(
+        response.explanation, models.Explanation
+    ):
+        response_length = len(response.explanation.text)
     response_certainty = (
         response.response_certainty if hasattr(response, "response_certainty") else None
     )

logdetective/server/models.py CHANGED Viewed

@@ -2,7 +2,9 @@ import datetime
 from logging import BASIC_FORMAT
 from typing import List, Dict, Optional, Literal
-from pydantic import BaseModel, Field, model_validator, field_validator
+from pydantic import BaseModel, Field, model_validator, field_validator, NonNegativeFloat
+from logdetective.constants import DEFAULT_TEMPERATURE
 class BuildLog(BaseModel):
@@ -95,6 +97,8 @@ class InferenceConfig(BaseModel):
     )
     url: str = ""
     api_token: str = ""
+    model: str = ""
+    temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
     def __init__(self, data: Optional[dict] = None):
         super().__init__()
@@ -106,6 +110,8 @@ class InferenceConfig(BaseModel):
         self.api_endpoint = data.get("api_endpoint", "/chat/completions")
         self.url = data.get("url", "")
         self.api_token = data.get("api_token", "")
+        self.model = data.get("model", "default-model")
+        self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
 class ExtractorConfig(BaseModel):
@@ -150,7 +156,8 @@ class LogConfig(BaseModel):
     """Logging configuration"""
     name: str = "logdetective"
-    level: str | int = "INFO"
+    level_stream: str | int = "INFO"
+    level_file: str | int = "INFO"
     path: str | None = None
     format: str = BASIC_FORMAT
@@ -160,7 +167,8 @@ class LogConfig(BaseModel):
             return
         self.name = data.get("name", "logdetective")
-        self.level = data.get("level", "INFO").upper()
+        self.level_stream = data.get("level_stream", "INFO").upper()
+        self.level_file = data.get("level_file", "INFO").upper()
         self.path = data.get("path")
         self.format = data.get("format", BASIC_FORMAT)

logdetective/server/plot.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import datetime
-from typing import Optional
+from typing import Optional, Union
 import numpy
 import matplotlib
@@ -62,24 +62,24 @@ class Definition:
 def create_time_series_arrays(
-    counts_dict: dict[datetime.datetime, int],
+    values_dict: dict[datetime.datetime, int],
+    plot_def: Definition,
     start_time: datetime.datetime,
     end_time: datetime.datetime,
-    time_delta: datetime.timedelta,
-    time_format: str,
+    value_type: Optional[Union[int, float]] = int,
 ) -> tuple[numpy.ndarray, numpy.ndarray]:
-    """Create time series arrays from a dictionary of counts.
+    """Create time series arrays from a dictionary of values.
     This function generates two aligned numpy arrays:
     1. An array of timestamps from start_time to end_time
-    2. A corresponding array of counts for each timestamp
+    2. A corresponding array of valuesfor each timestamp
     The timestamps are truncated to the precision specified by time_format.
-    If a timestamp in counts_dict matches a generated timestamp, its count is used;
-    otherwise, the count defaults to zero.
+    If a timestamp in values_dict matches a generated timestamp, its values is used;
+    otherwise, the value defaults to zero.
     Args:
-        counts_dict: Dictionary mapping timestamps to their respective counts
+        values_dict: Dictionary mapping timestamps to their respective values
         start_time: The starting timestamp of the time series
         end_time: The ending timestamp of the time series
         time_delta: The time interval between consecutive timestamps
@@ -88,67 +88,70 @@ def create_time_series_arrays(
     Returns:
         A tuple containing:
             - numpy.ndarray: Array of timestamps
-            - numpy.ndarray: Array of corresponding counts
+            - numpy.ndarray: Array of corresponding values
     """
-    num_intervals = int((end_time - start_time) / time_delta) + 1
+    num_intervals = int((end_time - start_time) / plot_def.time_delta) + 1
     timestamps = numpy.array(
         [
             datetime.datetime.strptime(
-                (start_time + i * time_delta).strftime(format=time_format), time_format
+                (start_time + i * plot_def.time_delta).strftime(
+                    format=plot_def.time_format
+                ),
+                plot_def.time_format,
             )
             for i in range(num_intervals)
         ]
     )
-    counts = numpy.zeros(num_intervals, dtype=int)
+    values = numpy.zeros(num_intervals, dtype=value_type)
     timestamp_to_index = {timestamp: i for i, timestamp in enumerate(timestamps)}
-    for timestamp, count in counts_dict.items():
+    for timestamp, count in values_dict.items():
         if timestamp in timestamp_to_index:
-            counts[timestamp_to_index[timestamp]] = count
+            values[timestamp_to_index[timestamp]] = count
-    return timestamps, counts
+    return timestamps, values
-def _add_bar_chart_for_requests_count(
-    ax1: matplotlib.figure.Axes,
+def _add_bar_chart(
+    ax: matplotlib.figure.Axes,
     plot_def: Definition,
     timestamps: numpy.array,
-    counts: numpy.array,
+    values: numpy.array,
+    label: str,
 ) -> None:
-    """Add a bar chart for requests count (axes 1)"""
+    """Add a blue bar chart"""
     bar_width = (
         0.8 * plot_def.time_delta.total_seconds() / 86400
     )  # Convert to days for matplotlib
-    ax1.bar(
+    ax.bar(
         timestamps,
-        counts,
+        values,
         width=bar_width,
         alpha=0.7,
         color="skyblue",
-        label="Requests",
+        label=label,
     )
-    ax1.set_xlabel("Time")
-    ax1.set_ylabel("Requests", color="blue")
-    ax1.tick_params(axis="y", labelcolor="blue")
+    ax.set_xlabel("Time")
+    ax.set_ylabel(label, color="blue")
+    ax.tick_params(axis="y", labelcolor="blue")
-    ax1.xaxis.set_major_formatter(matplotlib.dates.DateFormatter(plot_def.time_format))
-    ax1.xaxis.set_major_locator(plot_def.locator)
+    ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter(plot_def.time_format))
+    ax.xaxis.set_major_locator(plot_def.locator)
     matplotlib.pyplot.xticks(rotation=45)
-    ax1.grid(True, alpha=0.3)
+    ax.grid(True, alpha=0.3)
-def _add_cumulative_line_for_requests_count(
-    ax2: matplotlib.figure.Axes, timestamps: numpy.array, counts: numpy.array
+def _add_line_chart(
+    ax: matplotlib.figure.Axes, timestamps: numpy.array, values: numpy.array, label: str
 ) -> None:
-    """Add cumulative line on secondary y-axis"""
-    cumulative = numpy.cumsum(counts)
-    ax2.plot(timestamps, cumulative, "r-", linewidth=2, label="Cumulative")
-    ax2.set_ylabel("Cumulative Requests", color="red")
-    ax2.tick_params(axis="y", labelcolor="red")
+    """Add a red line chart"""
+    ax.plot(timestamps, values, "r-", linewidth=2, label=label)
+    ax.set_ylabel(label, color="red")
+    ax.tick_params(axis="y", labelcolor="red")
 def requests_per_time(
@@ -183,14 +186,14 @@ def requests_per_time(
         start_time, end_time, plot_def.time_format, endpoint
     )
     timestamps, counts = create_time_series_arrays(
-        requests_counts, start_time, end_time, plot_def.time_delta, plot_def.time_format
+        requests_counts, plot_def, start_time, end_time
     )
     fig, ax1 = matplotlib.pyplot.subplots(figsize=(12, 6))
-    _add_bar_chart_for_requests_count(ax1, plot_def, timestamps, counts)
+    _add_bar_chart(ax1, plot_def, timestamps, counts, "Requests")
     ax2 = ax1.twinx()
-    _add_cumulative_line_for_requests_count(ax2, timestamps, counts)
+    _add_line_chart(ax2, timestamps, numpy.cumsum(counts), "Cumulative Requests")
     matplotlib.pyplot.title(
         f"Requests received for API {endpoint} ({start_time.strftime(plot_def.time_format)} "
@@ -204,3 +207,75 @@ def requests_per_time(
     matplotlib.pyplot.tight_layout()
     return fig
+def average_time_per_responses(  # pylint: disable=too-many-locals
+    period_of_time: models.TimePeriod,
+    endpoint: EndpointType = EndpointType.ANALYZE,
+    end_time: Optional[datetime.datetime] = None,
+) -> matplotlib.figure.Figure:
+    """
+    Generate a visualization of average response time and length over a specified time period.
+    This function creates a dual-axis plot showing:
+    1. A bar chart of average response time per time interval
+    1. A line chart of average response length per time interval
+    The time intervals are determined by the provided TimePeriod object, which defines
+    the granularity and formatting of the time axis.
+    Args:
+        period_of_time: A TimePeriod object that defines the time period and interval
+                        for the analysis (e.g., hourly, daily, weekly)
+        endpoint: One of the API endpoints
+        end_time: The end time for the analysis period. If None, defaults to the current
+                  UTC time
+    Returns:
+        A matplotlib Figure object containing the generated visualization
+    """
+    end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
+    start_time = period_of_time.get_period_start_time(end_time)
+    plot_def = Definition(period_of_time)
+    responses_average_time = AnalyzeRequestMetrics.get_responses_average_time_in_period(
+        start_time, end_time, plot_def.time_format, endpoint
+    )
+    timestamps, average_time = create_time_series_arrays(
+        responses_average_time,
+        plot_def,
+        start_time,
+        end_time,
+        float,
+    )
+    fig, ax1 = matplotlib.pyplot.subplots(figsize=(12, 6))
+    _add_bar_chart(ax1, plot_def, timestamps, average_time, "average response time (seconds)")
+    responses_average_length = (
+        AnalyzeRequestMetrics.get_responses_average_length_in_period(
+            start_time, end_time, plot_def.time_format, endpoint
+        )
+    )
+    timestamps, average_length = create_time_series_arrays(
+        responses_average_length,
+        plot_def,
+        start_time,
+        end_time,
+        float,
+    )
+    ax2 = ax1.twinx()
+    _add_line_chart(ax2, timestamps, average_length, "average response length (chars)")
+    matplotlib.pyplot.title(
+        f"average response time for API {endpoint} ({start_time.strftime(plot_def.time_format)} "
+        f"to {end_time.strftime(plot_def.time_format)})"
+    )
+    lines1, labels1 = ax1.get_legend_handles_labels()
+    lines2, labels2 = ax2.get_legend_handles_labels()
+    ax1.legend(lines1 + lines2, labels1 + labels2, loc="center")
+    matplotlib.pyplot.tight_layout()
+    return fig

logdetective/server/server.py CHANGED Viewed

@@ -186,7 +186,6 @@ async def submit_text(  # pylint: disable=R0913,R0917
     log_probs: int = 1,
     stream: bool = False,
     model: str = "default-model",
-    api_endpoint: str = "/chat/completions",
 ) -> Explanation:
     """Submit prompt to LLM using a selected endpoint.
     max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
@@ -199,7 +198,7 @@ async def submit_text(  # pylint: disable=R0913,R0917
     if SERVER_CONFIG.inference.api_token:
         headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
-    if api_endpoint == "/chat/completions":
+    if SERVER_CONFIG.inference.api_endpoint == "/chat/completions":
         return await submit_text_chat_completions(
             text, headers, max_tokens, log_probs > 0, stream, model
         )
@@ -227,6 +226,7 @@ async def submit_text_completions(  # pylint: disable=R0913,R0917
         "logprobs": log_probs,
         "stream": stream,
         "model": model,
+        "temperature": SERVER_CONFIG.inference.temperature,
     }
     response = await submit_to_llm_endpoint(
@@ -266,6 +266,7 @@ async def submit_text_chat_completions(  # pylint: disable=R0913,R0917
         "logprobs": log_probs,
         "stream": stream,
         "model": model,
+        "temperature": SERVER_CONFIG.inference.temperature,
     }
     response = await submit_to_llm_endpoint(
@@ -300,7 +301,8 @@ async def analyze_log(build_log: BuildLog):
     log_summary = format_snippets(log_summary)
     response = await submit_text(
         PROMPT_CONFIG.prompt_template.format(log_summary),
-        api_endpoint=SERVER_CONFIG.inference.api_endpoint,
+        model=SERVER_CONFIG.inference.model,
+        max_tokens=SERVER_CONFIG.inference.max_tokens,
     )
     certainty = 0
@@ -340,7 +342,8 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
         *[
             submit_text(
                 PROMPT_CONFIG.snippet_prompt_template.format(s),
-                api_endpoint=SERVER_CONFIG.inference.api_endpoint,
+                model=SERVER_CONFIG.inference.model,
+                max_tokens=SERVER_CONFIG.inference.max_tokens,
             )
             for s in log_summary
         ]
@@ -355,7 +358,9 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
     )
     final_analysis = await submit_text(
-        final_prompt, api_endpoint=SERVER_CONFIG.inference.api_endpoint
+        final_prompt,
+        model=SERVER_CONFIG.inference.model,
+        max_tokens=SERVER_CONFIG.inference.max_tokens,
     )
     certainty = 0
@@ -396,7 +401,9 @@ async def analyze_log_stream(build_log: BuildLog):
         headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
     stream = await submit_text_chat_completions(
-        PROMPT_CONFIG.prompt_template.format(log_summary), stream=True, headers=headers
+        PROMPT_CONFIG.prompt_template.format(log_summary), stream=True, headers=headers,
+        model=SERVER_CONFIG.inference.model,
+        max_tokens=SERVER_CONFIG.inference.max_tokens,
     )
     return StreamingResponse(stream)
@@ -616,8 +623,8 @@ async def comment_on_mr(
         response.explanation.text,
     )
-    # Get the formatted comment.
-    comment = await generate_mr_comment(job, log_url, response)
+    # Get the formatted short comment.
+    short_comment = await generate_mr_comment(job, log_url, response, full=False)
     # Look up the merge request
     merge_request = await asyncio.to_thread(
@@ -625,11 +632,33 @@ async def comment_on_mr(
     )
     # Submit a new comment to the Merge Request using the Gitlab API
-    await asyncio.to_thread(merge_request.discussions.create, {"body": comment})
+    discussion = await asyncio.to_thread(
+        merge_request.discussions.create, {"body": short_comment}
+    )
+    # Get the ID of the first note
+    note_id = discussion.attributes["notes"][0]["id"]
+    note = discussion.notes.get(note_id)
+    # Update the comment with the full details
+    # We do this in a second step so we don't bombard the user's email
+    # notifications with a massive message. Gitlab doesn't send email for
+    # comment edits.
+    full_comment = await generate_mr_comment(job, log_url, response, full=True)
+    note.body = full_comment
+    # Pause for five seconds before sending the snippet data, otherwise
+    # Gitlab may bundle the edited message together with the creation
+    # message in email.
+    await asyncio.sleep(5)
+    await asyncio.to_thread(note.save)
 async def generate_mr_comment(
-    job: gitlab.v4.objects.ProjectJob, log_url: str, response: StagedResponse
+    job: gitlab.v4.objects.ProjectJob,
+    log_url: str,
+    response: StagedResponse,
+    full: bool = True,
 ) -> str:
     """Use a template to generate a comment string to submit to Gitlab"""
@@ -637,7 +666,11 @@ async def generate_mr_comment(
     script_path = Path(__file__).resolve().parent
     template_path = Path(script_path, "templates")
     jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_path))
-    tpl = jinja_env.get_template("gitlab_comment.md.j2")
+    if full:
+        tpl = jinja_env.get_template("gitlab_full_comment.md.j2")
+    else:
+        tpl = jinja_env.get_template("gitlab_short_comment.md.j2")
     artifacts_url = f"{job.project_url}/-/jobs/{job.id}/artifacts/download"
@@ -676,6 +709,35 @@ def _svg_figure_response(fig: matplotlib.figure.Figure):
     )
+def _multiple_svg_figures_response(figures: list[matplotlib.figure.Figure]):
+    """Create a response with multiple svg figures."""
+    svg_contents = []
+    for i, fig in enumerate(figures):
+        buf = BytesIO()
+        fig.savefig(buf, format="svg", bbox_inches="tight")
+        matplotlib.pyplot.close(fig)
+        buf.seek(0)
+        svg_contents.append(buf.read().decode("utf-8"))
+    html_content = "<html><body>\n"
+    for i, svg in enumerate(svg_contents):
+        html_content += f"<div id='figure-{i}'>\n{svg}\n</div>\n"
+    html_content += "</body></html>"
+    return BasicResponse(content=html_content, media_type="text/html")
+@app.get("/metrics/analyze", response_class=StreamingResponse)
+async def show_analyze_metrics(period_since_now: TimePeriod = Depends(TimePeriod)):
+    """Show statistics for requests and responses in the given period of time
+    for the /analyze API endpoint."""
+    fig_requests = plot.requests_per_time(period_since_now, EndpointType.ANALYZE)
+    fig_responses = plot.average_time_per_responses(
+        period_since_now, EndpointType.ANALYZE
+    )
+    return _multiple_svg_figures_response([fig_requests, fig_responses])
 @app.get("/metrics/analyze/requests", response_class=StreamingResponse)
 async def show_analyze_requests(period_since_now: TimePeriod = Depends(TimePeriod)):
     """Show statistics for the requests received in the given period of time
@@ -684,6 +746,27 @@ async def show_analyze_requests(period_since_now: TimePeriod = Depends(TimePerio
     return _svg_figure_response(fig)
+@app.get("/metrics/analyze/responses", response_class=StreamingResponse)
+async def show_analyze_responses(period_since_now: TimePeriod = Depends(TimePeriod)):
+    """Show statistics for responses given in the specified period of time
+    for the /analyze API endpoint."""
+    fig = plot.average_time_per_responses(period_since_now, EndpointType.ANALYZE)
+    return _svg_figure_response(fig)
+@app.get("/metrics/analyze/staged", response_class=StreamingResponse)
+async def show_analyze_staged_metrics(
+    period_since_now: TimePeriod = Depends(TimePeriod),
+):
+    """Show statistics for requests and responses in the given period of time
+    for the /analyze/staged API endpoint."""
+    fig_requests = plot.requests_per_time(period_since_now, EndpointType.ANALYZE_STAGED)
+    fig_responses = plot.average_time_per_responses(
+        period_since_now, EndpointType.ANALYZE_STAGED
+    )
+    return _multiple_svg_figures_response([fig_requests, fig_responses])
 @app.get("/metrics/analyze/staged/requests", response_class=StreamingResponse)
 async def show_analyze_staged_requests(
     period_since_now: TimePeriod = Depends(TimePeriod),
@@ -692,3 +775,13 @@ async def show_analyze_staged_requests(
     for the /analyze/staged API endpoint."""
     fig = plot.requests_per_time(period_since_now, EndpointType.ANALYZE_STAGED)
     return _svg_figure_response(fig)
+@app.get("/metrics/analyze/staged/responses", response_class=StreamingResponse)
+async def show_analyze_staged_responses(
+    period_since_now: TimePeriod = Depends(TimePeriod),
+):
+    """Show statistics for responses given in the specified period of time
+    for the /analyze/staged API endpoint."""
+    fig = plot.average_time_per_responses(period_since_now, EndpointType.ANALYZE_STAGED)
+    return _svg_figure_response(fig)

logdetective/server/templates/{gitlab_comment.md.j2 → gitlab_full_comment.md.j2} RENAMED Viewed

@@ -9,9 +9,7 @@ In this case, we are {{ certainty }}% certain of the response {{ emoji_face }}.
 <ul>
 {% for snippet in snippets %}
 <li>
-<code>
-Line {{ snippet.line_number }}: {{ snippet.text }}
-</code>
+<b>Line {{ snippet.line_number }}:</b> <code>{{ snippet.text }}</code>
 {{ snippet.explanation }}
 </li>
 {% endfor %}

logdetective/server/templates/gitlab_short_comment.md.j2 ADDED Viewed

@@ -0,0 +1,53 @@
+The package {{ package }} failed to build, here is a possible explanation why.
+Please know that the explanation was provided by AI and may be incorrect.
+In this case, we are {{ certainty }}% certain of the response {{ emoji_face }}.
+{{ explanation }}
+<details>
+  <summary>Logs</summary>
+  <p>
+    Log Detective analyzed the following logs files to provide an explanation:
+  </p>
+  <ul>
+    <li><a href="{{ log_url }}">{{ log_url }}</a></li>
+  </ul>
+  <p>
+    Additional logs are available from:
+    <ul>
+    <li><a href="{{ artifacts_url }}">artifacts.zip</a></li>
+  </ul>
+  </p>
+  <p>
+    Please know that these log files are automatically removed after some
+    time, so you might need a backup.
+  </p>
+</details>
+<details>
+  <summary>Help</summary>
+  <p>Don't hesitate to reach out.</p>
+  <ul>
+    <li><a href="https://github.com/fedora-copr/logdetective">Upstream</a></li>
+    <li><a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a></li>
+    <li><a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a></li>
+    <li><a href="https://log-detective.com/documentation">Documentation</a></li>
+  </ul>
+</details>
+---
+This comment was created by [Log Detective][log-detective].
+Was the provided feedback accurate and helpful? <br>Please vote with :thumbsup:
+or :thumbsdown: to help us improve.<br>
+[log-detective]: https://log-detective.com/
+[contact]: https://github.com/fedora-copr

logdetective/server/utils.py CHANGED Viewed

@@ -37,7 +37,7 @@ def get_log(config: Config):
     if getattr(log, "initialized", False):
         return log
-    log.setLevel(config.log.level)
+    log.setLevel("DEBUG")
     # Drop the default handler, we will create it ourselves
     log.handlers = []
@@ -45,12 +45,14 @@ def get_log(config: Config):
     # STDOUT
     stream_handler = logging.StreamHandler()
     stream_handler.setFormatter(logging.Formatter(config.log.format))
+    stream_handler.setLevel(config.log.level_stream)
     log.addHandler(stream_handler)
     # Log to file
     if config.log.path:
         file_handler = logging.FileHandler(config.log.path)
         file_handler.setFormatter(logging.Formatter(config.log.format))
+        file_handler.setLevel(config.log.level_file)
         log.addHandler(file_handler)
     log.initialized = True

logdetective/utils.py CHANGED Viewed

@@ -111,19 +111,23 @@ def compute_certainty(probs: List[Dict]) -> float:
 def process_log(
-    log: str, model: Llama, stream: bool, prompt_template: str
+    log: str, model: Llama, stream: bool, prompt_template: str,
+    temperature: float
 ) -> CreateCompletionResponse | Iterator[CreateCompletionStreamResponse]:
     """Processes a given log using the provided language model and returns its summary.
     Args:
         log (str): The input log to be processed.
         model (Llama): The language model used for processing the log.
+        stream (bool): Return output as Iterator.
+        prompt_template (str): Which prompt template to use.
+        temperature (float): Temperature parameter for model runtime.
     Returns:
         str: The summary of the given log generated by the language model.
     """
     response = model(
-        prompt=prompt_template.format(log), stream=stream, max_tokens=0, logprobs=1
+        prompt=prompt_template.format(log), stream=stream, max_tokens=0, logprobs=1,
+        temperature=temperature
     )
     return response

{logdetective-0.5.10.dist-info → logdetective-0.5.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: logdetective
-Version: 0.5.10
+Version: 0.5.11
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin
@@ -47,6 +47,8 @@ Log Detective
 A Python tool to analyze logs using a Language Model (LLM) and Drain template miner.
+Note: if you are looking for code of website logdetective.com it is in [github.com/fedora-copr/logdetective-website](https://github.com/fedora-copr/logdetective-website).
 Installation
 ------------
@@ -95,6 +97,17 @@ Example you want to use a different model:
     logdetective https://example.com/logs.txt --model https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true
     logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF
+Example of different suffix (useful for models that were quantized)
+    logdetective https://kojipkgs.fedoraproject.org//work/tasks/3367/131313367/build.log --model 'fedora-copr/granite-3.2-8b-instruct-GGUF' -F Q4_K.gguf
+Example of altered prompts:
+     cp ~/.local/lib/python3.13/site-packages/logdetective/prompts.yml ~/my-prompts.yml
+     vi ~/my-prompts.yml # edit the prompts there to better fit your needs
+     logdetective https://kojipkgs.fedoraproject.org//work/tasks/3367/131313367/build.log --prompts ~/my-prompts.yml
 Note that streaming with some models (notably Meta-Llama-3 is broken) is broken and can be workarounded by `no-stream` option:
     logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF --no-stream
@@ -337,11 +350,23 @@ certbot certonly --standalone -d logdetective01.fedorainfracloud.org
 Querying statistics
 -------------------
-You can retrieve statistics about server requests over a specified time period
-using either the `curl` command or the `http` command (provided by the `httpie` package).
+You can retrieve statistics about server requests and responses over a specified time period
+using either a browser, the `curl` or the `http` command (provided by the `httpie` package).
 When no time period is specified, the query defaults to the last 2 days:
+You can view requests and responses statistics
+ - for the `/analyze` endpoint at http://localhost:8080/metrics/analyze
+ - for the `/analyze/staged` endpoint at http://localhost:8080/metrics/analyze/staged.
+You can retrieve single svg images at the following endpoints:
+ - `/metrics/analyze/requests`
+ - `/metrics/analyze/responses`
+ - `/metrics/analyze/staged/requests`
+ - `/metrics/analyze/stages/responses`
+Examples:
 ```
 http GET "localhost:8080/metrics/analyze/requests" > /tmp/plot.svg
 curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
@@ -349,7 +374,6 @@ curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
 You can specify the time period in hours, days, or weeks.
 The time period:
  - cannot be less than one hour
  - cannot be negative
  - ends at the current time (when the query is made)

logdetective-0.5.11.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,24 @@
+logdetective/__init__.py,sha256=VqRngDcuFT7JWms8Qc_MsOvajoXVOKPr-S1kqY3Pqhc,59
+logdetective/constants.py,sha256=A5PzeqlQqDbBS_kzP2hl-lhJ0lCEqdbvW3CaQUYVxjw,1849
+logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
+logdetective/extractors.py,sha256=7ahzWbTtU9MveG1Q7wU9LO8OJgs85X-cHmWltUhCe9M,3491
+logdetective/logdetective.py,sha256=Q1SfQ9sWR5sIvHJag61-F-8edwf7p1SV7QZRg9VaWcc,5604
+logdetective/models.py,sha256=nrGBmMRu8i6UhFflQKAp81Y3Sd_Aaoor0i_yqSJoLT0,1115
+logdetective/prompts.yml,sha256=dMW2-bdTIqv7LF_owqRD4xinMK5ZWcNhDynnX1zoKns,1722
+logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+logdetective/server/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+logdetective/server/database/base.py,sha256=oMJUvbWeapIUP-8Cf_DR9ptFg8CsYeaBAIjOVEzx8SM,1668
+logdetective/server/database/models.py,sha256=m_3qNBWJwLSwjJn0AmwSxXMJk75Gu1bXFtGAP_4zps4,14088
+logdetective/server/metric.py,sha256=-uM_-yqxNA-EZTCnNRdQ8g1MicmE5eC6jRFI_mBBYUg,2606
+logdetective/server/models.py,sha256=URqZcfx5yUsifZ1pOwZ_uU3Tyjcdvuq6qEnAvTexl4A,8475
+logdetective/server/plot.py,sha256=B2rOngqx7g-Z3NfttboTip3frkypdF1H7FhK8vh45mE,9655
+logdetective/server/server.py,sha256=4NylBojHm9E3gjByVWs870T204ls39EbZmUfU0Kyq4U,28395
+logdetective/server/templates/gitlab_full_comment.md.j2,sha256=DQZ2WVFedpuXI6znbHIW4wpF9BmFS8FaUkowh8AnGhE,1627
+logdetective/server/templates/gitlab_short_comment.md.j2,sha256=fzScpayv2vpRLczP_0O0YxtA8rsKvR6gSv4ntNdWb98,1443
+logdetective/server/utils.py,sha256=QO0H1q55YLCLKxkViqex4Uu31LnakpYUKJfZHysonSc,1838
+logdetective/utils.py,sha256=nklnTipAet9P9aEiuHcnK62WT0DmNHbvO1TvNlrxlik,6463
+logdetective-0.5.11.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+logdetective-0.5.11.dist-info/METADATA,sha256=LOOzu99kJaP02U2OaFQciPdWKhlgr4Vm4tVKijTY7NM,15882
+logdetective-0.5.11.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
+logdetective-0.5.11.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
+logdetective-0.5.11.dist-info/RECORD,,

logdetective-0.5.10.dist-info/RECORD DELETED Viewed

@@ -1,23 +0,0 @@
-logdetective/__init__.py,sha256=VqRngDcuFT7JWms8Qc_MsOvajoXVOKPr-S1kqY3Pqhc,59
-logdetective/constants.py,sha256=eiS6eYhEgl_Rlyi_B9j00DDp9A-UDhuFz3ACWtKf_SU,1558
-logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
-logdetective/extractors.py,sha256=7ahzWbTtU9MveG1Q7wU9LO8OJgs85X-cHmWltUhCe9M,3491
-logdetective/logdetective.py,sha256=SDuzeS9sMp7rs6cTZAEd0ajtyWv9XnDkEPTF82nwaYo,5390
-logdetective/models.py,sha256=nrGBmMRu8i6UhFflQKAp81Y3Sd_Aaoor0i_yqSJoLT0,1115
-logdetective/prompts.yml,sha256=3orDNqqZNadWCaNncgfk8D3Pqqef4IzfScoa_jUJzCY,1452
-logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-logdetective/server/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-logdetective/server/database/base.py,sha256=oMJUvbWeapIUP-8Cf_DR9ptFg8CsYeaBAIjOVEzx8SM,1668
-logdetective/server/database/models.py,sha256=arIahOCT-hTmh904DXrWSkH7rlo13Ppu-OO80huX5Dc,6118
-logdetective/server/metric.py,sha256=VYMifrfIhcqgyu6YYN0c1nt8fC1iJ2_LCB7Bh2AheoE,2679
-logdetective/server/models.py,sha256=cf1ngu_-19rP_i49s5cEwIzh6SfL_ZpVy4EykCpfWck,8076
-logdetective/server/plot.py,sha256=3o-CNHjel04ekpwSB4ckV7dbiF663cfPkimQ0aP9U_8,7073
-logdetective/server/server.py,sha256=FDKx-6wsVoEwdEgcoepAT3GL0gZKjMSpB1VU-jaKt2w,24618
-logdetective/server/templates/gitlab_comment.md.j2,sha256=kheTkhQ-LfuFkr8av-Mw2a-9VYEUbDTLwaa-CKI6OkI,1622
-logdetective/server/utils.py,sha256=6y4gZCwQG4HcjWJwYdzwP46Jsm3xoNXZWH4kYmSWVZA,1741
-logdetective/utils.py,sha256=_cBBkBwZHX5qxy0K5WK2MnHA4x_oor7R-QED2VZLbCA,6226
-logdetective-0.5.10.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-logdetective-0.5.10.dist-info/METADATA,sha256=NbD3YEoEU-YAhH-VjOo95qWxyk1T1bq5wCih4N5oyqs,14738
-logdetective-0.5.10.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-logdetective-0.5.10.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
-logdetective-0.5.10.dist-info/RECORD,,

{logdetective-0.5.10.dist-info → logdetective-0.5.11.dist-info}/LICENSE RENAMED Viewed

File without changes

{logdetective-0.5.10.dist-info → logdetective-0.5.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{logdetective-0.5.10.dist-info → logdetective-0.5.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

logdetective 0.5.10__py3-none-any.whl → 0.5.11__py3-none-any.whl

logdetective 0.5.10py3-none-any.whl → 0.5.11py3-none-any.whl