PyPI - logdetective - Versions diffs - 0.6.0__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

logdetective 0.6.0py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

logdetective/prompts.yml +4 -4
logdetective/server/compressors.py +144 -0
logdetective/server/database/base.py +3 -0
logdetective/server/database/models/__init__.py +21 -0
logdetective/server/database/models/merge_request_jobs.py +515 -0
logdetective/server/database/{models.py → models/metrics.py} +105 -100
logdetective/server/metric.py +40 -16
logdetective/server/models.py +12 -3
logdetective/server/remote_log.py +109 -0
logdetective/server/server.py +287 -136
logdetective/utils.py +9 -37
{logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/METADATA +11 -6
logdetective-0.9.1.dist-info/RECORD +28 -0
{logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/WHEEL +1 -1
logdetective-0.6.0.dist-info/RECORD +0 -24
{logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/LICENSE +0 -0
{logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/entry_points.txt +0 -0

logdetective/server/database/{models.py → models/metrics.py} RENAMED Viewed

@@ -1,20 +1,30 @@
+import io
 import enum
 import datetime
+from typing import Optional, List
+import backoff
-from typing import Optional
 from sqlalchemy import (
     Column,
     Integer,
     Float,
     DateTime,
-    String,
     Enum,
     func,
     select,
     distinct,
+    ForeignKey,
+    LargeBinary,
 )
+from sqlalchemy.orm import relationship, aliased
+from sqlalchemy.exc import OperationalError
-from logdetective.server.database.base import Base, transaction
+from logdetective.server.database.base import Base, transaction, DB_MAX_RETRIES
+from logdetective.server.database.models.merge_request_jobs import (
+    GitlabMergeRequestJobs,
+    Forge,
+)
 class EndpointType(enum.Enum):
@@ -23,6 +33,7 @@ class EndpointType(enum.Enum):
     ANALYZE = "analyze_log"
     ANALYZE_STAGED = "analyze_log_staged"
     ANALYZE_STREAM = "analyze_log_stream"
+    ANALYZE_GITLAB_JOB = "analyze_gitlab_job"
 class AnalyzeRequestMetrics(Base):
@@ -44,11 +55,17 @@ class AnalyzeRequestMetrics(Base):
         default=datetime.datetime.now(datetime.timezone.utc),
         comment="Timestamp when the request was received",
     )
-    log_url = Column(
-        String,
+    compressed_log = Column(
+        LargeBinary(length=314572800),  # 300MB limit (300 * 1024 * 1024)
         nullable=False,
         index=False,
-        comment="Log url for which analysis was requested",
+        comment="Log processed, saved in a zip format",
+    )
+    compressed_response = Column(
+        LargeBinary(length=314572800),  # 300MB limit (300 * 1024 * 1024)
+        nullable=True,
+        index=False,
+        comment="Given response (with explanation and snippets) saved in a zip format",
     )
     response_sent_at = Column(
         DateTime, nullable=True, comment="Timestamp when the response was sent back"
@@ -60,11 +77,22 @@ class AnalyzeRequestMetrics(Base):
         Float, nullable=True, comment="Certainty for generated response"
     )
+    merge_request_job_id = Column(
+        Integer,
+        ForeignKey("gitlab_merge_request_jobs.id"),
+        nullable=True,
+        index=False,
+        comment="Is this an analyze request coming from a merge request?",
+    )
+    mr_job = relationship("GitlabMergeRequestJobs", back_populates="request_metrics")
     @classmethod
+    @backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
     def create(
         cls,
         endpoint: EndpointType,
-        log_url: str,
+        compressed_log: io.BytesIO,
         request_received_at: Optional[datetime.datetime] = None,
     ) -> int:
         """Create AnalyzeRequestMetrics new line
@@ -72,31 +100,98 @@ class AnalyzeRequestMetrics(Base):
         with transaction(commit=True) as session:
             metrics = AnalyzeRequestMetrics()
             metrics.endpoint = endpoint
+            metrics.compressed_log = compressed_log
             metrics.request_received_at = request_received_at or datetime.datetime.now(
                 datetime.timezone.utc
             )
-            metrics.log_url = log_url
             session.add(metrics)
             session.flush()
             return metrics.id
     @classmethod
-    def update(
+    @backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
+    def update(  # pylint: disable=too-many-arguments disable=too-many-positional-arguments
         cls,
         id_: int,
         response_sent_at: datetime,
         response_length: int,
         response_certainty: float,
+        compressed_response: bytes,
     ) -> None:
-        """Update an AnalyzeRequestMetrics line
+        """Update a row
         with data related to the given response"""
         with transaction(commit=True) as session:
             metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
             metrics.response_sent_at = response_sent_at
             metrics.response_length = response_length
             metrics.response_certainty = response_certainty
+            metrics.compressed_response = compressed_response
             session.add(metrics)
+    @classmethod
+    @backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
+    def get_metric_by_id(
+        cls,
+        id_: int,
+    ) -> "AnalyzeRequestMetrics":
+        """Update a row
+        with data related to the given response"""
+        with transaction(commit=True) as session:
+            metric = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
+            return metric
+    def add_mr_job(
+        self,
+        forge: Forge,
+        project_id: int,
+        mr_iid: int,
+        job_id: int,
+    ) -> None:
+        """This request was triggered by a merge request job.
+        Link it.
+        Args:
+          forge: forge name
+          project_id: forge project id
+          mr_iid: merge request forge iid
+          job_id: forge job id
+        """
+        mr_job = GitlabMergeRequestJobs.get_or_create(forge, project_id, mr_iid, job_id)
+        self.merge_request_job_id = mr_job.id
+        with transaction(commit=True) as session:
+            session.merge(self)
+    @classmethod
+    def get_requests_metrics_for_mr_job(
+        cls,
+        forge: Forge,
+        project_id: int,
+        mr_iid: int,
+        job_id: int,
+    ) -> List["AnalyzeRequestMetrics"]:
+        """Search for all requests triggered by the specified merge request job.
+        Args:
+          forge: forge name
+          project_id: forge project id
+          mr_iid: merge request forge iid
+          job_id: forge job id
+        """
+        with transaction(commit=False) as session:
+            mr_job_alias = aliased(GitlabMergeRequestJobs)
+            metrics = (
+                session.query(cls)
+                .join(mr_job_alias, cls.merge_request_job_id == mr_job_alias.id)
+                .filter(
+                    mr_job_alias.forge == forge,
+                    mr_job_alias.mr_iid == mr_iid,
+                    mr_job_alias.project_id == project_id,
+                    mr_job_alias.job_id == job_id,
+                )
+                .all()
+            )
+            return metrics
     @classmethod
     def get_postgres_time_format(cls, time_format):
         """Map python time format in the PostgreSQL format."""
@@ -140,31 +235,6 @@ class AnalyzeRequestMetrics(Base):
         )
         return requests_by_time_format
-    @classmethod
-    def _get_requests_by_time_for_sqlite(
-        cls, start_time, end_time, time_format, endpoint
-    ):
-        """Get total requests number in time period.
-        func.strftime is SQLite specific.
-        Use this function in unit test using flexmock:
-        flexmock(AnalyzeRequestMetrics).should_receive("_get_requests_by_time_for_postgres")
-        .replace_with(AnalyzeRequestMetrics._get_requests_by_time_for_sqllite)
-        """
-        requests_by_time_format = (
-            select(
-                cls.id,
-                func.strftime(time_format, cls.request_received_at).label(
-                    "time_format"
-                ),
-            )
-            .filter(cls.request_received_at.between(start_time, end_time))
-            .filter(cls.endpoint == endpoint)
-            .cte("requests_by_time_format")
-        )
-        return requests_by_time_format
     @classmethod
     def get_requests_in_period(
         cls,
@@ -234,41 +304,6 @@ class AnalyzeRequestMetrics(Base):
             results = session.execute(average_responses_times).fetchall()
             return results
-    @classmethod
-    def _get_average_responses_times_for_sqlite(
-        cls, start_time, end_time, time_format, endpoint
-    ):
-        """Get average responses time.
-        func.strftime is SQLite specific.
-        Use this function in unit test using flexmock:
-        flexmock(AnalyzeRequestMetrics).should_receive("_get_average_responses_times_for_postgres")
-        .replace_with(AnalyzeRequestMetrics._get_average_responses_times_for_sqlite)
-        """
-        with transaction(commit=False) as session:
-            average_responses_times = (
-                select(
-                    func.strftime(time_format, cls.request_received_at).label(
-                        "time_range"
-                    ),
-                    (
-                        func.avg(
-                            func.julianday(cls.response_sent_at)
-                            - func.julianday(cls.request_received_at)  # noqa: W503 flake8 vs ruff
-                        )
-                        * 86400  # noqa: W503 flake8 vs ruff
-                    ).label("average_response_seconds"),
-                )
-                .filter(cls.request_received_at.between(start_time, end_time))
-                .filter(cls.endpoint == endpoint)
-                .group_by("time_range")
-                .order_by("time_range")
-            )
-            results = session.execute(average_responses_times).fetchall()
-            return results
     @classmethod
     def get_responses_average_time_in_period(
         cls,
@@ -328,36 +363,6 @@ class AnalyzeRequestMetrics(Base):
             results = session.execute(average_responses_lengths).fetchall()
             return results
-    @classmethod
-    def _get_average_responses_lengths_for_sqlite(
-        cls, start_time, end_time, time_format, endpoint
-    ):
-        """Get average responses length.
-        func.strftime is SQLite specific.
-        Use this function in unit test using flexmock:
-        flexmock(AnalyzeRequestMetrics)
-        .should_receive("_get_average_responses_lengths_for_postgres")
-        .replace_with(AnalyzeRequestMetrics._get_average_responses_lengths_for_sqlite)
-        """
-        with transaction(commit=False) as session:
-            average_responses_lengths = (
-                select(
-                    func.strftime(time_format, cls.request_received_at).label(
-                        "time_range"
-                    ),
-                    (func.avg(cls.response_length)).label("average_responses_length"),
-                )
-                .filter(cls.request_received_at.between(start_time, end_time))
-                .filter(cls.endpoint == endpoint)
-                .group_by("time_range")
-                .order_by("time_range")
-            )
-            results = session.execute(average_responses_lengths).fetchall()
-            return results
     @classmethod
     def get_responses_average_length_in_period(
         cls,

logdetective/server/metric.py CHANGED Viewed

@@ -1,25 +1,40 @@
-import datetime
+import io
 import inspect
+import logging
+import datetime
 from typing import Union
 from functools import wraps
+import aiohttp
 from starlette.responses import StreamingResponse
 from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
+from logdetective.server.remote_log import RemoteLog
 from logdetective.server import models
+from logdetective.server.compressors import LLMResponseCompressor
+LOG = logging.getLogger("logdetective")
-def add_new_metrics(
-    api_name: str, build_log: models.BuildLog, received_at: datetime.datetime = None
+async def add_new_metrics(
+    api_name: str,
+    url: str,
+    http_session: aiohttp.ClientSession,
+    received_at: datetime.datetime = None,
+    compressed_log_content: io.BytesIO = None,
 ) -> int:
     """Add a new database entry for a received request.
     This will store the time when this function is called,
     the endpoint from where the request was received,
-    and the log for which analysis is requested.
+    and the log (in a zip format) for which analysis is requested.
     """
+    remote_log = RemoteLog(url, http_session)
+    compressed_log_content = compressed_log_content or await remote_log.zip_content()
     return AnalyzeRequestMetrics.create(
         endpoint=EndpointType(api_name),
-        log_url=build_log.url,
+        compressed_log=compressed_log_content,
         request_received_at=received_at
         if received_at
         else datetime.datetime.now(datetime.timezone.utc),
@@ -37,6 +52,15 @@ def update_metrics(
     This will add to the database entry the time when the response was sent,
     the length of the created response and the certainty for it.
     """
+    try:
+        compressed_response = LLMResponseCompressor(response).zip_response()
+    except AttributeError as e:
+        compressed_response = None
+        LOG.warning(
+            "Given response can not be serialized "
+            "and saved in db (probably a StreamingResponse): %s.", e
+        )
     response_sent_at = (
         sent_at if sent_at else datetime.datetime.now(datetime.timezone.utc)
     )
@@ -49,11 +73,15 @@ def update_metrics(
         response.response_certainty if hasattr(response, "response_certainty") else None
     )
     AnalyzeRequestMetrics.update(
-        metrics_id, response_sent_at, response_length, response_certainty
+        metrics_id,
+        response_sent_at,
+        response_length,
+        response_certainty,
+        compressed_response,
     )
-def track_request():
+def track_request(name=None):
     """
     Decorator to track requests metrics
     """
@@ -61,20 +89,16 @@ def track_request():
     def decorator(f):
         @wraps(f)
         async def async_decorated_function(*args, **kwargs):
-            metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
+            log_url = kwargs["build_log"].url
+            metrics_id = await add_new_metrics(
+                name if name else f.__name__, log_url, kwargs["http_session"]
+            )
             response = await f(*args, **kwargs)
             update_metrics(metrics_id, response)
             return response
-        @wraps(f)
-        def sync_decorated_function(*args, **kwargs):
-            metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
-            response = f(*args, **kwargs)
-            update_metrics(metrics_id, response)
-            return response
         if inspect.iscoroutinefunction(f):
             return async_decorated_function
-        return sync_decorated_function
+        raise NotImplementedError("An async coroutine is needed")
     return decorator

logdetective/server/models.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import datetime
 from logging import BASIC_FORMAT
 from typing import List, Dict, Optional, Literal
-from pydantic import BaseModel, Field, model_validator, field_validator, NonNegativeFloat
+from pydantic import (
+    BaseModel,
+    Field,
+    model_validator,
+    field_validator,
+    NonNegativeFloat,
+    HttpUrl,
+)
 from logdetective.constants import DEFAULT_TEMPERATURE
@@ -177,6 +182,8 @@ class GeneralConfig(BaseModel):
     """General config options for Log Detective"""
     packages: List[str] = None
+    devmode: bool = False
+    sentry_dsn: HttpUrl | None = None
     def __init__(self, data: Optional[dict] = None):
         super().__init__()
@@ -184,6 +191,8 @@ class GeneralConfig(BaseModel):
             return
         self.packages = data.get("packages", [])
+        self.devmode = data.get("devmode", False)
+        self.sentry_dsn = data.get("sentry_dsn")
 class Config(BaseModel):

logdetective/server/remote_log.py ADDED Viewed

@@ -0,0 +1,109 @@
+import io
+import logging
+from typing import Union
+from urllib.parse import urlparse
+import aiohttp
+from logdetective.server.compressors import TextCompressor
+LOG = logging.getLogger("logdetective")
+class RemoteLog:
+    """
+    Handles retrieval and compression of remote log files.
+    """
+    LOG_FILE_NAME = "log.txt"
+    COMPRESSOR = TextCompressor()
+    def __init__(self, url: str, http_session: aiohttp.ClientSession):
+        """
+        Initialize with a remote log URL and HTTP session.
+        Args:
+            url: A remote URL pointing to a log file
+            http_session: The HTTP session used to retrieve the remote file
+        """
+        self._url = url
+        self._http_session = http_session
+    @property
+    def url(self) -> str:
+        """The remote log url."""
+        return self._url
+    @property
+    async def content(self) -> str:
+        """Content of the url."""
+        return await self.get_url_content()
+    @classmethod
+    def zip_text(cls, text: str) -> bytes:
+        """
+        Compress the given text.
+        Returns:
+            bytes: Compressed text
+        """
+        return cls.COMPRESSOR.zip({cls.LOG_FILE_NAME: text})
+    async def zip_content(self) -> bytes:
+        """
+        Compress the content of the remote log.
+        Returns:
+            bytes: Compressed log content
+        """
+        content_text = await self.content
+        return self.zip_text(content_text)
+    @classmethod
+    def unzip(cls, zip_data: Union[bytes, io.BytesIO]) -> str:
+        """
+        Uncompress the zipped content of the remote log.
+        Args:
+            zip_data: Compressed data as bytes or BytesIO
+        Returns:
+            str: The decompressed log content
+        """
+        return cls.COMPRESSOR.unzip(zip_data)[cls.LOG_FILE_NAME]
+    def validate_url(self) -> bool:
+        """Validate incoming URL to be at least somewhat sensible for log files
+        Only http and https protocols permitted. No result, params or query fields allowed.
+        Either netloc or path must have non-zero length.
+        """
+        result = urlparse(self.url)
+        if result.scheme not in ["http", "https"]:
+            return False
+        if any([result.params, result.query, result.fragment]):
+            return False
+        if not (result.path or result.netloc):
+            return False
+        return True
+    async def get_url_content(self) -> str:
+        """validate log url and return log text."""
+        if self.validate_url():
+            LOG.debug("process url %s", self.url)
+            try:
+                response = await self._http_session.get(self.url, raise_for_status=True)
+            except aiohttp.ClientResponseError as ex:
+                raise RuntimeError(f"We couldn't obtain the logs: {ex}") from ex
+            return await response.text()
+        LOG.error("Invalid URL received ")
+        raise RuntimeError(f"Invalid log URL: {self.url}")
+    async def process_url(self) -> str:
+        """Validate log URL and return log text."""
+        try:
+            return await self.get_url_content()
+        except RuntimeError as ex:
+            raise aiohttp.HTTPException(
+                status_code=400, detail=f"We couldn't obtain the logs: {ex}"
+            ) from ex

logdetective 0.6.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

logdetective 0.6.0py3-none-any.whl → 0.9.1py3-none-any.whl