PyPI - logdetective - Versions diffs - 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl - Mend

logdetective 0.4.0py3-none-any.whl → 2.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

logdetective/constants.py +33 -12
logdetective/extractors.py +137 -68
logdetective/logdetective.py +102 -33
logdetective/models.py +99 -0
logdetective/prompts-summary-first.yml +20 -0
logdetective/prompts-summary-only.yml +13 -0
logdetective/prompts.yml +90 -0
logdetective/remote_log.py +67 -0
logdetective/server/compressors.py +186 -0
logdetective/server/config.py +78 -0
logdetective/server/database/base.py +34 -26
logdetective/server/database/models/__init__.py +33 -0
logdetective/server/database/models/exceptions.py +17 -0
logdetective/server/database/models/koji.py +143 -0
logdetective/server/database/models/merge_request_jobs.py +623 -0
logdetective/server/database/models/metrics.py +427 -0
logdetective/server/emoji.py +148 -0
logdetective/server/exceptions.py +37 -0
logdetective/server/gitlab.py +451 -0
logdetective/server/koji.py +159 -0
logdetective/server/llm.py +309 -0
logdetective/server/metric.py +75 -30
logdetective/server/models.py +426 -23
logdetective/server/plot.py +432 -0
logdetective/server/server.py +580 -468
logdetective/server/templates/base_response.html.j2 +59 -0
logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
logdetective/server/utils.py +98 -32
logdetective/skip_snippets.yml +12 -0
logdetective/utils.py +187 -73
logdetective-2.11.0.dist-info/METADATA +568 -0
logdetective-2.11.0.dist-info/RECORD +40 -0
{logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
logdetective/server/database/models.py +0 -88
logdetective-0.4.0.dist-info/METADATA +0 -333
logdetective-0.4.0.dist-info/RECORD +0 -19
{logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
{logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0

logdetective/server/database/models/metrics.py ADDED Viewed

@@ -0,0 +1,427 @@
+from __future__ import annotations
+import enum
+import datetime
+from typing import Optional, List, Self, Tuple, TYPE_CHECKING
+import backoff
+from sqlalchemy import (
+    Integer,
+    Float,
+    DateTime,
+    Enum,
+    func,
+    select,
+    distinct,
+    ForeignKey,
+    LargeBinary,
+)
+from sqlalchemy.orm import Mapped, mapped_column, relationship, aliased
+from sqlalchemy.exc import OperationalError
+from logdetective.server.database.base import Base, transaction, DB_MAX_RETRIES
+from logdetective.server.database.models.merge_request_jobs import (
+    GitlabMergeRequestJobs,
+    Forge,
+)
+if TYPE_CHECKING:
+    from .koji import KojiTaskAnalysis
+class EndpointType(enum.Enum):
+    """Different analyze endpoints"""
+    ANALYZE = "analyze_log"
+    ANALYZE_STAGED = "analyze_log_staged"
+    ANALYZE_STREAM = "analyze_log_stream"
+    ANALYZE_GITLAB_JOB = "analyze_gitlab_job"
+    ANALYZE_KOJI_TASK = "analyze_koji_task"
+class AnalyzeRequestMetrics(Base):
+    """Store data related to received requests and given responses"""
+    __tablename__ = "analyze_request_metrics"
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    endpoint: Mapped[EndpointType] = mapped_column(
+        Enum(EndpointType),
+        nullable=False,
+        index=True,
+        comment="The service endpoint that was called",
+    )
+    request_received_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True),
+        nullable=False,
+        index=True,
+        default=datetime.datetime.now(datetime.timezone.utc),
+        comment="Timestamp when the request was received",
+    )
+    compressed_log: Mapped[bytes] = mapped_column(
+        LargeBinary(length=314572800),  # 300MB limit (300 * 1024 * 1024)
+        nullable=False,
+        index=False,
+        comment="Log processed, saved in a zip format",
+    )
+    compressed_response: Mapped[Optional[bytes]] = mapped_column(
+        LargeBinary(length=314572800),  # 300MB limit (300 * 1024 * 1024)
+        nullable=True,
+        index=False,
+        comment="Given response (with explanation and snippets) saved in a zip format",
+    )
+    response_sent_at: Mapped[Optional[datetime.datetime]] = mapped_column(
+        DateTime(timezone=True),
+        nullable=True,
+        comment="Timestamp when the response was sent back",
+    )
+    response_length: Mapped[Optional[int]] = mapped_column(
+        Integer, nullable=True, comment="Length of the response in chars"
+    )
+    response_certainty: Mapped[Optional[float]] = mapped_column(
+        Float, nullable=True, comment="Certainty for generated response"
+    )
+    merge_request_job_id: Mapped[Optional[int]] = mapped_column(
+        Integer,
+        ForeignKey("gitlab_merge_request_jobs.id"),
+        nullable=True,
+        index=False,
+        comment="Is this an analyze request coming from a merge request?",
+    )
+    mr_job: Mapped[Optional["GitlabMergeRequestJobs"]] = relationship(
+        "GitlabMergeRequestJobs",
+        back_populates="request_metrics"
+    )
+    koji_tasks: Mapped[List["KojiTaskAnalysis"]] = relationship(
+        "KojiTaskAnalysis",
+        back_populates="response"
+    )
+    @classmethod
+    @backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
+    async def create(
+        cls,
+        endpoint: EndpointType,
+        compressed_log: bytes,
+        request_received_at: Optional[datetime.datetime] = None,
+    ) -> int:
+        """Create AnalyzeRequestMetrics new line
+        with data related to a received request"""
+        async with transaction(commit=True) as session:
+            metrics = AnalyzeRequestMetrics()
+            metrics.endpoint = endpoint
+            metrics.compressed_log = compressed_log
+            metrics.request_received_at = request_received_at or datetime.datetime.now(
+                datetime.timezone.utc
+            )
+            session.add(metrics)
+            await session.flush()
+            return metrics.id
+    @classmethod
+    @backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
+    async def update(  # pylint: disable=too-many-arguments disable=too-many-positional-arguments
+        cls,
+        id_: int,
+        response_sent_at: DateTime,
+        response_length: int,
+        response_certainty: float,
+        compressed_response: bytes,
+    ) -> None:
+        """Update a row
+        with data related to the given response"""
+        query = select(AnalyzeRequestMetrics).filter(AnalyzeRequestMetrics.id == id_)
+        async with transaction(commit=True) as session:
+            query_result = await session.execute(query)
+            metrics = query_result.scalars().first()
+            if metrics is None:
+                raise ValueError("Returned `AnalyzeRequestMetrics` table is empty.")
+            metrics.response_sent_at = response_sent_at
+            metrics.response_length = response_length
+            metrics.response_certainty = response_certainty
+            metrics.compressed_response = compressed_response
+            session.add(metrics)
+    @classmethod
+    @backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
+    async def get_metric_by_id(
+        cls,
+        id_: int,
+    ) -> Self:
+        """Update a row
+        with data related to the given response"""
+        query = select(AnalyzeRequestMetrics).filter(AnalyzeRequestMetrics.id == id_)
+        async with transaction(commit=True) as session:
+            query_result = await session.execute(query)
+            metric = query_result.scalars().first()
+            if metric is None:
+                raise ValueError("Returned `AnalyzeRequestMetrics` table is empty.")
+            return metric
+    async def add_mr_job(
+        self,
+        forge: Forge,
+        project_id: int,
+        mr_iid: int,
+        job_id: int,
+    ) -> None:
+        """This request was triggered by a merge request job.
+        Link it.
+        Args:
+          forge: forge name
+          project_id: forge project id
+          mr_iid: merge request forge iid
+          job_id: forge job id
+        """
+        mr_job = await GitlabMergeRequestJobs.get_or_create(
+            forge, project_id, mr_iid, job_id
+        )
+        self.merge_request_job_id = mr_job.id
+        async with transaction(commit=True) as session:
+            await session.merge(self)
+    @classmethod
+    async def get_requests_metrics_for_mr_job(
+        cls,
+        forge: Forge,
+        project_id: int,
+        mr_iid: int,
+        job_id: int,
+    ) -> List[Self]:
+        """Search for all requests triggered by the specified merge request job.
+        Args:
+          forge: forge name
+          project_id: forge project id
+          mr_iid: merge request forge iid
+          job_id: forge job id
+        """
+        mr_job_alias = aliased(GitlabMergeRequestJobs)
+        query = (
+            select(cls)
+            .join(mr_job_alias, cls.merge_request_job_id == mr_job_alias.id)
+            .filter(
+                mr_job_alias.forge == forge,
+                mr_job_alias.mr_iid == mr_iid,
+                mr_job_alias.project_id == project_id,
+                mr_job_alias.job_id == job_id,
+            )
+        )
+        async with transaction(commit=False) as session:
+            query_result = await session.execute(query)
+            metrics = query_result.scalars().all()
+            return metrics
+    @classmethod
+    def get_postgres_time_format(cls, time_format):
+        """Map python time format in the PostgreSQL format."""
+        if time_format == "%Y-%m-%d":
+            pgsql_time_format = "YYYY-MM-DD"
+        else:
+            pgsql_time_format = "YYYY-MM-DD HH24"
+        return pgsql_time_format
+    @classmethod
+    def get_dictionary_with_datetime_keys(
+        cls, time_format: str, query_results: List[Tuple[str, int]]
+    ) -> dict[datetime.datetime, int]:
+        """Convert from a list of tuples with str as first values
+        to a dictionary with datetime keys"""
+        new_dict = {
+            datetime.datetime.strptime(e[0], time_format): e[1] for e in query_results
+        }
+        return new_dict
+    @classmethod
+    def _get_requests_by_time_for_postgres(
+        cls, start_time, end_time, time_format, endpoint
+    ):
+        """Get total requests number in time period.
+        func.to_char is PostgreSQL specific.
+        Let's unit tests replace this function with the SQLite version.
+        """
+        pgsql_time_format = cls.get_postgres_time_format(time_format)
+        requests_by_time_format = (
+            select(
+                cls.id,
+                func.to_char(cls.request_received_at, pgsql_time_format).label(
+                    "time_format"
+                ),
+            )
+            .filter(cls.request_received_at.between(start_time, end_time))
+            .filter(cls.endpoint == endpoint)
+            .cte("requests_by_time_format")
+        )
+        return requests_by_time_format
+    @classmethod
+    async def get_requests_in_period(
+        cls,
+        start_time: datetime.datetime,
+        end_time: datetime.datetime,
+        time_format: str,
+        endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
+    ) -> dict[datetime.datetime, int]:
+        """
+        Get a dictionary with request counts grouped by time units within a specified period.
+        Args:
+            start_time (datetime): The start of the time period to query
+            end_time (datetime): The end of the time period to query
+            time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
+            endpoint (EndpointType): The analyze API endpoint to query
+        Returns:
+            dict[datetime, int]: A dictionary mapping datetime objects to request counts
+        """
+        async with transaction(commit=False) as session:
+            requests_by_time_format = cls._get_requests_by_time_for_postgres(
+                start_time, end_time, time_format, endpoint
+            )
+            count_requests_by_time_format = select(
+                requests_by_time_format.c.time_format,
+                func.count(distinct(requests_by_time_format.c.id)),  # pylint: disable=not-callable
+            ).group_by("time_format")
+            query_results = await session.execute(count_requests_by_time_format)
+            results = query_results.all()
+            return cls.get_dictionary_with_datetime_keys(time_format, results)
+    @classmethod
+    async def _get_average_responses_times_for_postgres(
+        cls, start_time, end_time, time_format, endpoint
+    ):
+        """Get average responses time.
+        func.to_char is PostgreSQL specific.
+        Let's unit tests replace this function with the SQLite version.
+        """
+        async with transaction(commit=False) as session:
+            pgsql_time_format = cls.get_postgres_time_format(time_format)
+            average_responses_times = (
+                select(
+                    func.to_char(cls.request_received_at, pgsql_time_format).label(
+                        "time_range"
+                    ),
+                    (
+                        func.avg(
+                            func.extract(  # pylint: disable=not-callable
+                                "epoch", cls.response_sent_at - cls.request_received_at
+                            )
+                        )
+                    ).label("average_response_seconds"),
+                )
+                .filter(cls.request_received_at.between(start_time, end_time))
+                .filter(cls.endpoint == endpoint)
+                .group_by("time_range")
+                .order_by("time_range")
+            )
+            query_results = await session.execute(average_responses_times)
+            results = query_results.all()
+            return results
+    @classmethod
+    async def get_responses_average_time_in_period(
+        cls,
+        start_time: datetime.datetime,
+        end_time: datetime.datetime,
+        time_format: str,
+        endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
+    ) -> dict[datetime.datetime, int]:
+        """
+        Get a dictionary with average responses times
+        grouped by time units within a specified period.
+        Args:
+            start_time (datetime): The start of the time period to query
+            end_time (datetime): The end of the time period to query
+            time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
+            endpoint (EndpointType): The analyze API endpoint to query
+        Returns:
+            dict[datetime, int]: A dictionary mapping datetime objects
+            to average responses times
+        """
+        async with transaction(commit=False) as _:
+            average_responses_times = (
+                await cls._get_average_responses_times_for_postgres(
+                    start_time, end_time, time_format, endpoint
+                )
+            )
+            return cls.get_dictionary_with_datetime_keys(
+                time_format, average_responses_times
+            )
+    @classmethod
+    async def _get_average_responses_lengths_for_postgres(
+        cls, start_time, end_time, time_format, endpoint
+    ):
+        """Get average responses length.
+        func.to_char is PostgreSQL specific.
+        Let's unit tests replace this function with the SQLite version.
+        """
+        async with transaction(commit=False) as session:
+            pgsql_time_format = cls.get_postgres_time_format(time_format)
+            average_responses_lengths = (
+                select(
+                    func.to_char(cls.request_received_at, pgsql_time_format).label(
+                        "time_range"
+                    ),
+                    (func.avg(cls.response_length)).label("average_responses_length"),
+                )
+                .filter(cls.request_received_at.between(start_time, end_time))
+                .filter(cls.endpoint == endpoint)
+                .group_by("time_range")
+                .order_by("time_range")
+            )
+            query_results = await session.execute(average_responses_lengths)
+            results = query_results.all()
+            return results
+    @classmethod
+    async def get_responses_average_length_in_period(
+        cls,
+        start_time: datetime.datetime,
+        end_time: datetime.datetime,
+        time_format: str,
+        endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
+    ) -> dict[datetime.datetime, int]:
+        """
+        Get a dictionary with average responses length
+        grouped by time units within a specified period.
+        Args:
+            start_time (datetime): The start of the time period to query
+            end_time (datetime): The end of the time period to query
+            time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
+            endpoint (EndpointType): The analyze API endpoint to query
+        Returns:
+            dict[datetime, int]: A dictionary mapping datetime objects
+            to average responses lengths
+        """
+        async with transaction(commit=False) as _:
+            average_responses_lengths = (
+                await cls._get_average_responses_lengths_for_postgres(
+                    start_time, end_time, time_format, endpoint
+                )
+            )
+            return cls.get_dictionary_with_datetime_keys(
+                time_format, average_responses_lengths
+            )

logdetective/server/emoji.py ADDED Viewed

@@ -0,0 +1,148 @@
+import asyncio
+from typing import List
+from collections import Counter
+import gitlab
+from logdetective.server.models import TimePeriod
+from logdetective.server.database.models import (
+    Comments,
+    Reactions,
+    GitlabMergeRequestJobs,
+    Forge,
+)
+from logdetective.server.config import LOG
+async def collect_emojis(gitlab_conn: gitlab.Gitlab, period: TimePeriod):
+    """
+    Collect emoji feedback from logdetective comments saved in database.
+    Check only comments created in the last given period of time.
+    """
+    comments = await Comments.get_since(period.get_period_start_time()) or []
+    comments_for_gitlab_connection = [
+        comment for comment in comments if comment.forge == gitlab_conn.url
+    ]
+    await collect_emojis_in_comments(comments_for_gitlab_connection, gitlab_conn)
+async def collect_emojis_for_mr(
+    project_id: int, mr_iid: int, gitlab_conn: gitlab.Gitlab
+):
+    """
+    Collect emoji feedback from logdetective comments in the specified MR.
+    """
+    comments = []
+    try:
+        url = Forge(gitlab_conn.url)
+    except ValueError as ex:
+        LOG.exception("Attempt to use unrecognized Forge `%s`", gitlab_conn.url)
+        raise ex
+    mr_jobs = await GitlabMergeRequestJobs.get_by_mr_iid(url, project_id, mr_iid) or []
+    comments = [await Comments.get_by_mr_job(mr_job) for mr_job in mr_jobs]
+    # Filter all cases when no comments were found. This shouldn't happen if the database
+    # is in good order. But checking for it can't hurt.
+    comments = [comment for comment in comments if isinstance(comment, Comments)]
+    await collect_emojis_in_comments(comments, gitlab_conn)
+async def collect_emojis_in_comments(  # pylint: disable=too-many-locals
+    comments: List[Comments], gitlab_conn: gitlab.Gitlab
+):
+    """
+    Collect emoji feedback from specified logdetective comments.
+    """
+    projects = {}
+    merge_requests = {}
+    for comment in comments:
+        mr_job_db = await GitlabMergeRequestJobs.get_by_id(comment.merge_request_job_id)
+        if not mr_job_db:
+            continue
+        try:
+            if mr_job_db.id not in projects:
+                project = await asyncio.to_thread(
+                    gitlab_conn.projects.get, mr_job_db.project_id
+                )
+                projects[mr_job_db.id] = project
+            else:
+                project = projects[mr_job_db.id]
+            merge_request_iid = mr_job_db.mr_iid
+            if merge_request_iid not in merge_requests:
+                merge_request = await asyncio.to_thread(
+                    project.mergerequests.get, merge_request_iid
+                )
+                merge_requests[merge_request_iid] = merge_request
+            else:
+                merge_request = merge_requests[merge_request_iid]
+            discussion = await asyncio.to_thread(
+                merge_request.discussions.get, comment.comment_id
+            )
+            # Get the ID of the first note
+            if "notes" not in discussion.attributes or len(discussion.attributes["notes"]) == 0:
+                LOG.warning(
+                    "No notes were found in comment %s in merge request %d",
+                    comment.comment_id,
+                    merge_request_iid,
+                )
+                continue
+            note_id = discussion.attributes["notes"][0]["id"]
+            note = await asyncio.to_thread(merge_request.notes.get, note_id)
+        # Log warning with full stack trace, in case we can't find the right
+        # discussion, merge request or project.
+        # All of these objects can be lost, and we shouldn't treat as an error.
+        # Other exceptions are raised.
+        except gitlab.GitlabError as e:
+            if e.response_code == 404:
+                LOG.warning(
+                    "Couldn't retrieve emoji counts for comment %s due to GitlabError",
+                    comment.comment_id, exc_info=True)
+                continue
+            LOG.error("Error encountered while processing emoji counts for GitLab comment %s",
+                      comment.comment_id, exc_info=True)
+            raise
+        emoji_counts = Counter(emoji.name for emoji in note.awardemojis.list())
+        # keep track of not updated reactions
+        # because we need to remove them
+        old_emojis = [
+            reaction.reaction_type
+            for reaction in await Reactions.get_all_reactions(
+                comment.forge,
+                mr_job_db.project_id,
+                mr_job_db.mr_iid,
+                mr_job_db.job_id,
+                comment.comment_id,
+            )
+        ]
+        for key, value in emoji_counts.items():
+            await Reactions.create_or_update(
+                comment.forge,
+                mr_job_db.project_id,
+                mr_job_db.mr_iid,
+                mr_job_db.job_id,
+                comment.comment_id,
+                key,
+                value,
+            )
+            if key in old_emojis:
+                old_emojis.remove(key)
+        # not updated reactions has been removed, drop them
+        await Reactions.delete(
+            comment.forge,
+            mr_job_db.project_id,
+            mr_job_db.mr_iid,
+            mr_job_db.job_id,
+            comment.comment_id,
+            old_emojis,
+        )

logdetective/server/exceptions.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Exception classes for Log Detective server."""
+class LogDetectiveException(Exception):
+    """Base exception for Log Detective server."""
+class LogsMissingError(LogDetectiveException):
+    """The logs are missing, possibly due to garbage-collection"""
+class LogDetectiveKojiException(LogDetectiveException):
+    """Base exception for Koji-related errors."""
+class KojiInvalidTaskID(LogDetectiveKojiException):
+    """The task ID is invalid."""
+class UnknownTaskType(LogDetectiveKojiException):
+    """The task type is not supported."""
+class NoFailedTask(LogDetectiveKojiException):
+    """The task is not in the FAILED state."""
+class LogDetectiveConnectionError(LogDetectiveKojiException):
+    """A connection error occurred."""
+class LogsTooLargeError(LogDetectiveKojiException):
+    """The log archive exceeds the configured maximum size"""
+class LogDetectiveMetricsError(LogDetectiveException):
+    """Exception was encountered while recording metrics"""

logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

logdetective 0.4.0py3-none-any.whl → 2.11.0py3-none-any.whl