PyPI - logdetective - Versions diffs - 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl - Mend

logdetective 2.10.0py3-none-any.whl → 2.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

logdetective/server/config.py +1 -1
logdetective/server/emoji.py +46 -48
logdetective/server/gitlab.py +21 -8
logdetective/server/llm.py +38 -12
logdetective/server/models.py +66 -259
logdetective/server/server.py +199 -32
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/METADATA +2 -2
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/RECORD +11 -11
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/WHEEL +0 -0
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/entry_points.txt +0 -0
{logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/licenses/LICENSE +0 -0

logdetective/server/models.py CHANGED Viewed

@@ -1,8 +1,6 @@
-import asyncio
-from collections import defaultdict
 import datetime
 from logging import BASIC_FORMAT
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Any
 from pydantic import (
     BaseModel,
     Field,
@@ -10,14 +8,8 @@ from pydantic import (
     field_validator,
     NonNegativeFloat,
     HttpUrl,
-    PrivateAttr,
 )
-import aiohttp
-from aiolimiter import AsyncLimiter
-from gitlab import Gitlab
-import koji
 from logdetective.constants import (
     DEFAULT_TEMPERATURE,
@@ -26,8 +18,6 @@ from logdetective.constants import (
     SYSTEM_ROLE_DEFAULT,
     USER_ROLE_DEFAULT,
 )
-from logdetective.extractors import Extractor, DrainExtractor, CSGrepExtractor
 from logdetective.utils import check_csgrep
@@ -177,40 +167,14 @@ class InferenceConfig(BaseModel):  # pylint: disable=too-many-instance-attribute
     # OpenAI client library requires a string to be specified for API token
     # even if it is not checked on the server side
     api_token: str = "None"
-    model: str = ""
+    model: str = "default-model"
     temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
     max_queue_size: int = LLM_DEFAULT_MAX_QUEUE_SIZE
     http_timeout: float = 5.0
     user_role: str = USER_ROLE_DEFAULT
     system_role: str = SYSTEM_ROLE_DEFAULT
     llm_api_timeout: float = 15.0
-    _limiter: AsyncLimiter = PrivateAttr(
-        default_factory=lambda: AsyncLimiter(LLM_DEFAULT_REQUESTS_PER_MINUTE))
-    def __init__(self, data: Optional[dict] = None):
-        super().__init__()
-        if data is None:
-            return
-        self.max_tokens = data.get("max_tokens", -1)
-        self.log_probs = data.get("log_probs", True)
-        self.url = data.get("url", "")
-        self.http_timeout = data.get("http_timeout", 5.0)
-        self.api_token = data.get("api_token", "None")
-        self.model = data.get("model", "default-model")
-        self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
-        self.max_queue_size = data.get("max_queue_size", LLM_DEFAULT_MAX_QUEUE_SIZE)
-        self.user_role = data.get("user_role", USER_ROLE_DEFAULT)
-        self.system_role = data.get("system_role", SYSTEM_ROLE_DEFAULT)
-        self._requests_per_minute = data.get(
-            "requests_per_minute", LLM_DEFAULT_REQUESTS_PER_MINUTE
-        )
-        self.llm_api_timeout = data.get("llm_api_timeout", 15.0)
-        self._limiter = AsyncLimiter(self._requests_per_minute)
-    def get_limiter(self):
-        """Return the limiter object so it can be used as a context manager"""
-        return self._limiter
+    requests_per_minute: int = LLM_DEFAULT_REQUESTS_PER_MINUTE
 class ExtractorConfig(BaseModel):
@@ -221,64 +185,25 @@ class ExtractorConfig(BaseModel):
     max_snippet_len: int = 2000
     csgrep: bool = False
-    _extractors: List[Extractor] = PrivateAttr(default_factory=list)
-    def _setup_extractors(self):
-        """Initialize extractors with common settings."""
-        self._extractors = [
-            DrainExtractor(
-                verbose=self.verbose,
-                max_snippet_len=self.max_snippet_len,
-                max_clusters=self.max_clusters,
-            )
-        ]
-        if self.csgrep:
-            self._extractors.append(
-                CSGrepExtractor(
-                    verbose=self.verbose,
-                    max_snippet_len=self.max_snippet_len,
-                )
-            )
-    def __init__(self, data: Optional[dict] = None):
-        super().__init__(data=data)
-        if data is None:
-            self._setup_extractors()
-            return
-        self.max_clusters = data.get("max_clusters", 8)
-        self.verbose = data.get("verbose", False)
-        self.max_snippet_len = data.get("max_snippet_len", 2000)
-        self.csgrep = data.get("csgrep", False)
-        self._setup_extractors()
-    def get_extractors(self) -> List[Extractor]:
-        """Return list of initialized extractors, each will be applied in turn
-        on original log text to retrieve snippets."""
-        return self._extractors
-    @field_validator("csgrep", mode="after")
+    @field_validator("csgrep", mode="before")
     @classmethod
-    def validate_csgrep(cls, value: bool) -> bool:
-        """Verify that csgrep is available if requested."""
-        if not check_csgrep():
+    def verify_csgrep(cls, v: bool):
+        """Verify presence of csgrep binary if csgrep extractor is requested."""
+        if v and not check_csgrep():
             raise ValueError(
                 "Requested csgrep extractor but `csgrep` binary is not in the PATH"
             )
-        return value
+        return v
 class GitLabInstanceConfig(BaseModel):  # pylint: disable=too-many-instance-attributes
     """Model for GitLab configuration of logdetective server."""
-    name: str = None
-    url: str = None
+    name: str
+    url: str = "https://gitlab.com"
     # Path to API of the gitlab instance, assuming `url` as prefix.
-    api_path: str = None
-    api_token: str = None
+    api_path: str = "/api/v4"
+    api_token: Optional[str] = None
     # This is a list to support key rotation.
     # When the key is being changed, we will add the new key as a new entry in
@@ -289,69 +214,17 @@ class GitLabInstanceConfig(BaseModel):  # pylint: disable=too-many-instance-attr
     webhook_secrets: Optional[List[str]] = None
     timeout: float = 5.0
-    _conn: Gitlab | None = PrivateAttr(default=None)
-    _http_session: aiohttp.ClientSession | None = PrivateAttr(default=None)
     # Maximum size of artifacts.zip in MiB. (default: 300 MiB)
     max_artifact_size: int = 300 * 1024 * 1024
-    def __init__(self, name: str, data: Optional[dict] = None):
-        super().__init__()
-        if data is None:
-            return
-        self.name = name
-        self.url = data.get("url", "https://gitlab.com")
-        self.api_path = data.get("api_path", "/api/v4")
-        self.api_token = data.get("api_token", None)
-        self.webhook_secrets = data.get("webhook_secrets", None)
-        self.max_artifact_size = int(data.get("max_artifact_size", 300)) * 1024 * 1024
-        self.timeout = data.get("timeout", 5.0)
-        self._conn = Gitlab(
-            url=self.url,
-            private_token=self.api_token,
-            timeout=self.timeout,
-        )
-    def get_connection(self):
-        """Get the Gitlab connection object"""
-        return self._conn
-    def get_http_session(self):
-        """Return the internal HTTP session so it can be used to contect the
-        Gitlab server. May be used as a context manager."""
-        # Create the session on the first attempt. We need to do this "lazily"
-        # because it needs to happen once the event loop is running, even
-        # though the initialization itself is synchronous.
-        if not self._http_session:
-            self._http_session = aiohttp.ClientSession(
-                base_url=self.url,
-                headers={"Authorization": f"Bearer {self.api_token}"},
-                timeout=aiohttp.ClientTimeout(
-                    total=self.timeout,
-                    connect=3.07,
-                ),
-            )
-        return self._http_session
-    def __del__(self):
-        # Close connection when this object is destroyed
-        if self._http_session:
-            try:
-                loop = asyncio.get_running_loop()
-                loop.create_task(self._http_session.close())
-            except RuntimeError:
-                # No loop running, so create one to close the session
-                loop = asyncio.new_event_loop()
-                loop.run_until_complete(self._http_session.close())
-                loop.close()
-            except Exception:  # pylint: disable=broad-exception-caught
-                # We should only get here if we're shutting down, so we don't
-                # really care if the close() completes cleanly.
-                pass
+    @field_validator("max_artifact_size", mode="before")
+    @classmethod
+    def megabytes_to_bytes(cls, v: Any):
+        """Convert max_artifact_size from megabytes to bytes."""
+        if isinstance(v, int):
+            return v * 1024 * 1024
+        return 300 * 1024 * 1024
 class GitLabConfig(BaseModel):
@@ -359,63 +232,28 @@ class GitLabConfig(BaseModel):
     instances: Dict[str, GitLabInstanceConfig] = {}
-    def __init__(self, data: Optional[dict] = None):
-        super().__init__()
-        if data is None:
-            return
+    @model_validator(mode="before")
+    @classmethod
+    def set_gitlab_instance_configs(cls, data: Any):
+        """Initialize configuration for each GitLab instance"""
+        if not isinstance(data, dict):
+            return data
+        instances = {}
         for instance_name, instance_data in data.items():
-            instance = GitLabInstanceConfig(instance_name, instance_data)
-            self.instances[instance.url] = instance
+            instance = GitLabInstanceConfig(name=instance_name, **instance_data)
+            instances[instance.url] = instance
+        return {"instances": instances}
 class KojiInstanceConfig(BaseModel):
     """Model for Koji configuration of logdetective server."""
     name: str = ""
-    xmlrpc_url: str = ""
+    xmlrpc_url: str = "https://koji.fedoraproject.org/kojihub"
     tokens: List[str] = []
-    _conn: Optional[koji.ClientSession] = PrivateAttr(default=None)
-    _callbacks: defaultdict[int, set[str]] = PrivateAttr(default_factory=lambda: defaultdict(set))
-    def __init__(self, name: str, data: Optional[dict] = None):
-        super().__init__()
-        self.name = name
-        if data is None:
-            # Set some reasonable defaults
-            self.xmlrpc_url = "https://koji.fedoraproject.org/kojihub"
-            self.tokens = []
-            self.max_artifact_size = 1024 * 1024
-            return
-        self.xmlrpc_url = data.get(
-            "xmlrpc_url", "https://koji.fedoraproject.org/kojihub"
-        )
-        self.tokens = data.get("tokens", [])
-    def get_connection(self):
-        """Get the Koji connection object"""
-        if not self._conn:
-            self._conn = koji.ClientSession(self.xmlrpc_url)
-        return self._conn
-    def register_callback(self, task_id: int, callback: str):
-        """Register a callback for a task"""
-        self._callbacks[task_id].add(callback)
-    def clear_callbacks(self, task_id: int):
-        """Unregister a callback for a task"""
-        try:
-            del self._callbacks[task_id]
-        except KeyError:
-            pass
-    def get_callbacks(self, task_id: int) -> set[str]:
-        """Get the callbacks for a task"""
-        return self._callbacks[task_id]
 class KojiConfig(BaseModel):
     """Model for Koji configuration of logdetective server."""
@@ -424,23 +262,26 @@ class KojiConfig(BaseModel):
     analysis_timeout: int = 15
     max_artifact_size: int = 300 * 1024 * 1024
-    def __init__(self, data: Optional[dict] = None):
-        super().__init__()
-        if data is None:
-            return
-        # Handle analysis_timeout with default 15
-        self.analysis_timeout = data.get("analysis_timeout", 15)
-        # Handle max_artifact_size with default 300
-        self.max_artifact_size = data.get("max_artifact_size", 300) * 1024 * 1024
+    @field_validator("max_artifact_size", mode="before")
+    @classmethod
+    def megabytes_to_bytes(cls, v: Any):
+        """Convert max_artifact_size from megabytes to bytes."""
+        if isinstance(v, int):
+            return v * 1024 * 1024
+        return 300 * 1024 * 1024
-        # Handle instances dictionary
-        instances_data = data.get("instances", {})
-        for instance_name, instance_data in instances_data.items():
-            self.instances[instance_name] = KojiInstanceConfig(
-                instance_name, instance_data
-            )
+    @model_validator(mode="before")
+    @classmethod
+    def set_koji_instance_configs(cls, data: Any):
+        """Initialize configuration for each Koji instance."""
+        if isinstance(data, dict):
+            instances = {}
+            for instance_name, instance_data in data.get("instances", {}).items():
+                instances[instance_name] = KojiInstanceConfig(
+                    name=instance_name, **instance_data
+                )
+            data["instances"] = instances
+        return data
 class LogConfig(BaseModel):
@@ -452,17 +293,6 @@ class LogConfig(BaseModel):
     path: str | None = None
     format: str = BASIC_FORMAT
-    def __init__(self, data: Optional[dict] = None):
-        super().__init__()
-        if data is None:
-            return
-        self.name = data.get("name", "logdetective")
-        self.level_stream = data.get("level_stream", "INFO").upper()
-        self.level_file = data.get("level_file", "INFO").upper()
-        self.path = data.get("path")
-        self.format = data.get("format", BASIC_FORMAT)
 class GeneralConfig(BaseModel):
     """General config options for Log Detective"""
@@ -474,50 +304,27 @@ class GeneralConfig(BaseModel):
     collect_emojis_interval: int = 60 * 60  # seconds
     top_k_snippets: int = 0
-    def __init__(self, data: Optional[dict] = None):
-        super().__init__()
-        if data is None:
-            return
-        self.packages = data.get("packages", [])
-        self.excluded_packages = data.get("excluded_packages", [])
-        self.devmode = data.get("devmode", False)
-        self.sentry_dsn = data.get("sentry_dsn")
-        self.collect_emojis_interval = data.get(
-            "collect_emojis_interval", 60 * 60
-        )  # seconds
-        self.top_k_snippets = data.get("top_k_snippets", 0)
 class Config(BaseModel):
     """Model for configuration of logdetective server."""
-    log: LogConfig = LogConfig()
-    inference: InferenceConfig = InferenceConfig()
-    snippet_inference: InferenceConfig = InferenceConfig()
+    log: LogConfig = Field(default_factory=LogConfig)
+    inference: InferenceConfig = Field(default_factory=InferenceConfig)
+    snippet_inference: InferenceConfig = Field(default_factory=InferenceConfig)
     # TODO(jpodivin): Extend to work with multiple extractor configs
-    extractor: ExtractorConfig = ExtractorConfig()
-    gitlab: GitLabConfig = GitLabConfig()
-    koji: KojiConfig = KojiConfig()
-    general: GeneralConfig = GeneralConfig()
-    def __init__(self, data: Optional[dict] = None):
-        super().__init__()
-        if data is None:
-            return
-        self.log = LogConfig(data.get("log"))
-        self.inference = InferenceConfig(data.get("inference"))
-        self.extractor = ExtractorConfig(data.get("extractor"))
-        self.gitlab = GitLabConfig(data.get("gitlab"))
-        self.koji = KojiConfig(data.get("koji"))
-        self.general = GeneralConfig(data.get("general"))
-        if snippet_inference := data.get("snippet_inference", None):
-            self.snippet_inference = InferenceConfig(snippet_inference)
-        else:
-            self.snippet_inference = self.inference
+    extractor: ExtractorConfig = Field(default_factory=ExtractorConfig)
+    gitlab: GitLabConfig = Field(default_factory=GitLabConfig)
+    koji: KojiConfig = Field(default_factory=KojiConfig)
+    general: GeneralConfig = Field(default_factory=GeneralConfig)
+    @model_validator(mode="before")
+    @classmethod
+    def default_snippet_inference(cls, data: Any):
+        """Use base inference configuration, if specific snippet configuration isn't provided."""
+        if isinstance(data, dict):
+            if "snippet_inference" not in data and "inference" in data:
+                data["snippet_inference"] = data["inference"]
+        return data
 class TimePeriod(BaseModel):

logdetective 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl

logdetective 2.10.0py3-none-any.whl → 2.12.0py3-none-any.whl