PyPI - logdetective - Versions diffs - 2.1.0__tar.gz → 2.2.1__tar.gz - Mend

logdetective 2.1.0tar.gz → 2.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{logdetective-2.1.0 → logdetective-2.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: logdetective
-Version: 2.1.0
+Version: 2.2.1
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin
@@ -494,6 +494,33 @@ Example of a valid pattern definition file: `logdetective/skip_patterns.yml`,
 can be used as a starting point and is used as a default if no other definition is provided.
+Extracting snippets with csgrep
+-------------------------------
+When working with logs containing messages from GCC, it can be beneficial to employ
+additional extractor based on `csgrep` tool, to ensure that the messages are kept intact.
+Since `csgrep` is not available as a python package, it must be installed separately,
+with a package manager or from [source](https://github.com/csutils/csdiff).
+The binary is available as part of `csdiff` package on Fedora.
+```
+dnf install csdiff
+```
+When working with CLI Log Detective, the csgrep extractor can be activated using option `--csgrep`.
+While in server mode, the `csgrep` field in `extractor` config needs to be set to `true`.
+```
+csgrep: true
+```
+Both options are disabled by default and error will be produced if the option is used,
+but `csgrep` is not present in the $PATH.
+The container images are built with `csdiff` installed.
 License
 -------

{logdetective-2.1.0 → logdetective-2.2.1}/README.md RENAMED Viewed

@@ -442,6 +442,33 @@ Example of a valid pattern definition file: `logdetective/skip_patterns.yml`,
 can be used as a starting point and is used as a default if no other definition is provided.
+Extracting snippets with csgrep
+-------------------------------
+When working with logs containing messages from GCC, it can be beneficial to employ
+additional extractor based on `csgrep` tool, to ensure that the messages are kept intact.
+Since `csgrep` is not available as a python package, it must be installed separately,
+with a package manager or from [source](https://github.com/csutils/csdiff).
+The binary is available as part of `csdiff` package on Fedora.
+```
+dnf install csdiff
+```
+When working with CLI Log Detective, the csgrep extractor can be activated using option `--csgrep`.
+While in server mode, the `csgrep` field in `extractor` config needs to be set to `true`.
+```
+csgrep: true
+```
+Both options are disabled by default and error will be produced if the option is used,
+but `csgrep` is not present in the $PATH.
+The container images are built with `csdiff` installed.
 License
 -------

logdetective-2.2.1/logdetective/extractors.py ADDED Viewed

@@ -0,0 +1,165 @@
+import os
+import logging
+import subprocess as sp
+from typing import Tuple
+import drain3
+from drain3.template_miner_config import TemplateMinerConfig
+from pydantic import ValidationError
+from logdetective.utils import get_chunks, filter_snippet_patterns
+from logdetective.models import SkipSnippets, CSGrepOutput
+LOG = logging.getLogger("logdetective")
+class Extractor:
+    """Base extractor class."""
+    def __init__(
+        self,
+        verbose: bool = False,
+        skip_snippets: SkipSnippets = SkipSnippets({}),
+        max_snippet_len: int = 2000,
+    ):
+        self.verbose = verbose
+        self.skip_snippets = skip_snippets
+        self.max_snippet_len = max_snippet_len
+    def __call__(self, log: str) -> list[Tuple[int, str]]:
+        raise NotImplementedError
+    def filter_snippet_patterns(
+        self, chunks: list[tuple[int, str]]
+    ) -> list[tuple[int, str]]:
+        """Keep only chunks that don't match any of the excluded patterns"""
+        chunks = [
+            (_, chunk)
+            for _, chunk in chunks
+            if not filter_snippet_patterns(chunk, self.skip_snippets)
+        ]
+        return chunks
+class DrainExtractor(Extractor):
+    """A class that extracts information from logs using a template miner algorithm."""
+    _clusters: list
+    def __init__(
+        self,
+        verbose: bool = False,
+        skip_snippets: SkipSnippets = SkipSnippets({}),
+        max_snippet_len: int = 2000,
+        max_clusters: int = 8,
+    ):
+        super().__init__(verbose, skip_snippets, max_snippet_len)
+        config = TemplateMinerConfig()
+        config.load(f"{os.path.dirname(__file__)}/drain3.ini")
+        config.profiling_enabled = verbose
+        config.drain_max_clusters = max_clusters
+        self.miner = drain3.TemplateMiner(config=config)
+    def __call__(self, log: str) -> list[Tuple[int, str]]:
+        # Create chunks
+        chunks = list(get_chunks(log, self.max_snippet_len))
+        chunks = self.filter_snippet_patterns(chunks)
+        # First pass to create clusters
+        self._create_clusters(chunks=chunks)
+        # Second pass, only matching lines with clusters,
+        # to recover original text
+        snippets = self._extract_messages(chunks=chunks)
+        return snippets
+    def _create_clusters(self, chunks: list[tuple[int, str]]):
+        """First pass to create clusters"""
+        for _, chunk in chunks:
+            processed_chunk = self.miner.add_log_message(chunk)
+            LOG.debug(processed_chunk)
+        self._clusters = list(self.miner.drain.clusters)
+    def _extract_messages(self, chunks: list[tuple[int, str]]) -> list[tuple[int, str]]:
+        """Second pass with drain using patterns from the first,
+        to extract matching lines and their numbers."""
+        out = []
+        for chunk_start, chunk in chunks:
+            cluster = self.miner.match(chunk, "always")
+            if cluster in self._clusters:
+                out.append((chunk_start, chunk))
+                self._clusters.remove(cluster)
+        return out
+class CSGrepExtractor(DrainExtractor):
+    """Extract messages using csgrep
+    This extractor is only effective at retrieving messages from GCC
+    compiler and associated utilities, it is not capable of safely
+    extracting other messages from the logs. Therefore, it must only
+    be used together with the Drain based extractor."""
+    def __init__(
+        self,
+        verbose: bool = False,
+        skip_snippets: SkipSnippets = SkipSnippets({}),
+        max_snippet_len: int = 2000,
+        max_clusters: int = 8,
+    ):
+        super().__init__(verbose, skip_snippets, max_snippet_len, max_clusters)
+    def __call__(self, log: str) -> list[Tuple[int, str]]:
+        """Extract error messages from log using csgrep"""
+        chunks = []
+        try:
+            # We are not running binary in check mode, since csgrep
+            # can produce many errors due to log file syntax
+            result = sp.run(
+                [
+                    "csgrep",
+                    "--event=error",
+                    "--remove-duplicates",
+                    "--mode=json",
+                    "--quiet",
+                ],
+                input=log,
+                shell=False,
+                check=False,
+                capture_output=True,
+                text=True,
+                timeout=1.0,
+            )
+        except sp.TimeoutExpired as ex:
+            LOG.exception("Exception encountered while parsing log with csgrep %s", ex)
+            raise ex
+        if result.returncode != 0:
+            # This can happen even if `csgrep` managed to extract useful info.
+            # Most commonly, when it encountered unexpected syntax in the log.
+            LOG.warning("csgrep call resulted in an error")
+            LOG.debug("csgrep error: `%s`", result.stderr)
+        if not result.stdout:
+            return []
+        # Parse JSON output from csgrep
+        try:
+            report = CSGrepOutput.model_validate_json(result.stdout)
+        except ValidationError as ex:
+            LOG.exception("Exception encountered while parsing csgrpe output %s", ex)
+            raise ex
+        for defect in report.defects:
+            # Single original error message can be split across multiple events
+            # before returning, we will turn them back into single string.
+            # We must also extract the original line number.
+            # Line number is NOT location of message in the log, but location of
+            # the issue in source, we can't really mix the two, so we'll set it to `0`.
+            chunks.append((0, "\n".join([event.message for event in defect.events])))
+        chunks = self.filter_snippet_patterns(chunks)
+        LOG.info("Total %d messages extracted with csgrep", len(chunks))
+        self._create_clusters(chunks=chunks)
+        snippets = self._extract_messages(chunks=chunks)
+        return snippets

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/logdetective.py RENAMED Viewed

@@ -15,8 +15,10 @@ from logdetective.utils import (
     compute_certainty,
     load_prompts,
     load_skip_snippet_patterns,
+    check_csgrep,
+    mine_logs,
 )
-from logdetective.extractors import DrainExtractor
+from logdetective.extractors import DrainExtractor, CSGrepExtractor
 LOG = logging.getLogger("logdetective")
@@ -89,10 +91,13 @@ def setup_args():
         default=f"{os.path.dirname(__file__)}/skip_snippets.yml",
         help="Path to patterns for skipping snippets.",
     )
+    parser.add_argument(
+        "--csgrep", action="store_true", help="Use csgrep to process the log."
+    )
     return parser.parse_args()
-async def run():  # pylint: disable=too-many-statements,too-many-locals
+async def run():  # pylint: disable=too-many-statements,too-many-locals,too-many-branches
     """Main execution function."""
     args = setup_args()
@@ -134,13 +139,25 @@ async def run():  # pylint: disable=too-many-statements,too-many-locals
         sys.exit(5)
     # Log file summarizer initialization
-    extractor = DrainExtractor(
-        args.verbose > 1,
-        context=True,
-        max_clusters=args.n_clusters,
-        skip_snippets=skip_snippets,
+    extractors = []
+    extractors.append(
+        DrainExtractor(
+            args.verbose > 1,
+            max_clusters=args.n_clusters,
+            skip_snippets=skip_snippets,
+        )
     )
+    if args.csgrep:
+        if not check_csgrep():
+            LOG.error(
+                "You have requested use of `csgrep` when it isn't available on your system."
+            )
+            sys.exit(6)
+        extractors.append(
+            CSGrepExtractor(args.verbose > 1, skip_snippets=skip_snippets)
+        )
     LOG.info("Getting summary")
     async with aiohttp.ClientSession() as http:
@@ -150,12 +167,8 @@ async def run():  # pylint: disable=too-many-statements,too-many-locals
             # file does not exist
             LOG.error(e)
             sys.exit(4)
-        log_summary = extractor(log)
-    ratio = len(log_summary) / len(log.split("\n"))
-    LOG.info("Compression ratio: %s", ratio)
+    log_summary = mine_logs(log=log, extractors=extractors)
     LOG.info("Analyzing the text")
     log_summary = format_snippets(log_summary)

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/models.py RENAMED Viewed

@@ -71,3 +71,29 @@ class SkipSnippets(BaseModel):
                 ) from ex
         return data
+class CSGrepEvent(BaseModel):
+    """`csgrep` splits error and warning messages into individual events."""
+    file_name: str
+    line: int
+    event: str
+    message: str
+    verbosity_level: int
+class CSGrepDefect(BaseModel):
+    """Defects detected by `csgrep`"""
+    checker: str
+    language: str
+    tool: str
+    key_event_idx: int
+    events: list[CSGrepEvent]
+class CSGrepOutput(BaseModel):
+    """Parsed output of `gsgrep`"""
+    defects: list[CSGrepDefect]

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/llm.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import os
 import asyncio
 import random
+import time
 from typing import List, Tuple, Dict
 import backoff
@@ -15,6 +16,7 @@ from logdetective.utils import (
     compute_certainty,
     prompt_to_messages,
     format_snippets,
+    mine_logs,
 )
 from logdetective.server.config import (
     LOG,
@@ -33,10 +35,10 @@ from logdetective.server.models import (
 )
 from logdetective.server.utils import (
     format_analyzed_snippets,
-    mine_logs,
     should_we_giveup,
     we_give_up,
     filter_snippets,
+    construct_final_prompt,
 )
@@ -184,10 +186,13 @@ async def analyze_snippets(
 async def perfrom_analysis(log_text: str) -> Response:
     """Sumbit log file snippets in aggregate to LLM and retrieve results"""
-    log_summary = mine_logs(log_text)
+    log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
     log_summary = format_snippets(log_summary)
+    final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
     messages = prompt_to_messages(
-        PROMPT_CONFIG.prompt_template.format(log_summary),
+        final_prompt,
         PROMPT_CONFIG.default_system_prompt,
         SERVER_CONFIG.inference.system_role,
         SERVER_CONFIG.inference.user_role,
@@ -213,10 +218,13 @@ async def perfrom_analysis(log_text: str) -> Response:
 async def perform_analyis_stream(log_text: str) -> AsyncStream:
     """Submit log file snippets in aggregate and return a stream of tokens"""
-    log_summary = mine_logs(log_text)
+    log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
     log_summary = format_snippets(log_summary)
+    final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
     messages = prompt_to_messages(
-        PROMPT_CONFIG.prompt_template.format(log_summary),
+        final_prompt,
         PROMPT_CONFIG.default_system_prompt,
         SERVER_CONFIG.inference.system_role,
         SERVER_CONFIG.inference.user_role,
@@ -235,8 +243,8 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
 async def perform_staged_analysis(log_text: str) -> StagedResponse:
     """Submit the log file snippets to the LLM and retrieve their results"""
-    log_summary = mine_logs(log_text)
+    log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
+    start = time.time()
     if SERVER_CONFIG.general.top_k_snippets:
         rated_snippets = await analyze_snippets(
             log_summary=log_summary,
@@ -265,10 +273,11 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
             AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
             for e in zip(log_summary, processed_snippets)
         ]
+    delta = time.time() - start
+    LOG.info("Snippet analysis performed in %f s", delta)
+    log_summary = format_analyzed_snippets(processed_snippets)
+    final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template_staged)
-    final_prompt = PROMPT_CONFIG.prompt_template_staged.format(
-        format_analyzed_snippets(processed_snippets)
-    )
     messages = prompt_to_messages(
         final_prompt,
         PROMPT_CONFIG.staged_system_prompt,

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/models.py RENAMED Viewed

@@ -26,6 +26,9 @@ from logdetective.constants import (
     USER_ROLE_DEFAULT,
 )
+from logdetective.extractors import Extractor, DrainExtractor, CSGrepExtractor
+from logdetective.utils import check_csgrep
 class BuildLog(BaseModel):
     """Model of data submitted to API."""
@@ -247,15 +250,56 @@ class ExtractorConfig(BaseModel):
     max_clusters: int = 8
     verbose: bool = False
     max_snippet_len: int = 2000
+    csgrep: bool = False
+    _extractors: List[Extractor] = []
+    def _setup_extractors(self):
+        """Initialize extractors with common settings."""
+        self._extractors = [
+            DrainExtractor(
+                verbose=self.verbose,
+                max_snippet_len=self.max_snippet_len,
+                max_clusters=self.max_clusters,
+            )
+        ]
+        if self.csgrep:
+            self._extractors.append(
+                CSGrepExtractor(
+                    verbose=self.verbose,
+                    max_snippet_len=self.max_snippet_len,
+                )
+            )
     def __init__(self, data: Optional[dict] = None):
-        super().__init__()
+        super().__init__(data=data)
         if data is None:
+            self._setup_extractors()
             return
         self.max_clusters = data.get("max_clusters", 8)
         self.verbose = data.get("verbose", False)
         self.max_snippet_len = data.get("max_snippet_len", 2000)
+        self.csgrep = data.get("csgrep", False)
+        self._setup_extractors()
+    def get_extractors(self) -> List[Extractor]:
+        """Return list of initialized extractors, each will be applied in turn
+        on original log text to retrieve snippets."""
+        return self._extractors
+    @field_validator("csgrep", mode="after")
+    @classmethod
+    def validate_csgrep(cls, value: bool) -> bool:
+        """Verify that csgrep is available if requested."""
+        if not check_csgrep():
+            raise ValueError(
+                "Requested csgrep extractor but `csgrep` binary is not in the PATH"
+            )
+        return value
 class GitLabInstanceConfig(BaseModel):  # pylint: disable=too-many-instance-attributes
@@ -481,6 +525,7 @@ class Config(BaseModel):
     log: LogConfig = LogConfig()
     inference: InferenceConfig = InferenceConfig()
     snippet_inference: InferenceConfig = InferenceConfig()
+    # TODO(jpodivin): Extend to work with multiple extractor configs
     extractor: ExtractorConfig = ExtractorConfig()
     gitlab: GitLabConfig = GitLabConfig()
     koji: KojiConfig = KojiConfig()

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/server.py RENAMED Viewed

@@ -106,35 +106,35 @@ async def get_http_session(request: Request) -> aiohttp.ClientSession:
     return request.app.http
-def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
+def requires_token_when_set(authorization: Annotated[str | None, Header()] = None):
     """
-    FastAPI Depend function that expects a header named Authentication
+    FastAPI Depend function that expects a header named Authorization
     If LOGDETECTIVE_TOKEN env var is set, validate the client-supplied token
     otherwise ignore it
     """
     if not API_TOKEN:
-        LOG.info("LOGDETECTIVE_TOKEN env var not set, authentication disabled")
+        LOG.info("LOGDETECTIVE_TOKEN env var not set, authorization disabled")
         # no token required, means local dev environment
         return
-    if authentication:
+    if authorization:
         try:
-            token = authentication.split(" ", 1)[1]
+            token = authorization.split(" ", 1)[1]
         except (ValueError, IndexError) as ex:
             LOG.warning(
-                "Authentication header has invalid structure '%s', it should be 'Bearer TOKEN'",
-                authentication,
+                "Authorization header has invalid structure '%s', it should be 'Bearer TOKEN'",
+                authorization,
             )
             # eat the exception and raise 401 below
             raise HTTPException(
                 status_code=401,
-                detail=f"Invalid authentication, HEADER '{authentication}' not valid.",
+                detail=f"Invalid authorization, HEADER '{authorization}' not valid.",
             ) from ex
         if token == API_TOKEN:
             return
         LOG.info("Provided token '%s' does not match expected value.", token)
         raise HTTPException(status_code=401, detail=f"Token '{token}' not valid.")
-    LOG.error("No authentication header provided but LOGDETECTIVE_TOKEN env var is set")
+    LOG.error("No authorization header provided but LOGDETECTIVE_TOKEN env var is set")
     raise HTTPException(status_code=401, detail="No token provided.")

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/utils.py RENAMED Viewed

@@ -1,15 +1,10 @@
-from typing import List, Tuple
+from typing import List
 import aiohttp
 from fastapi import HTTPException
 from logdetective.constants import SNIPPET_DELIMITER
-from logdetective.extractors import DrainExtractor
-from logdetective.server.config import (
-    LOG,
-    SERVER_CONFIG,
-    SKIP_SNIPPETS_CONFIG,
-)
+from logdetective.server.config import LOG
 from logdetective.server.exceptions import LogDetectiveConnectionError
 from logdetective.server.models import AnalyzedSnippet, RatedSnippetAnalysis
@@ -22,26 +17,6 @@ def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
     return summary
-def mine_logs(log: str) -> List[Tuple[int, str]]:
-    """Extract snippets from log text"""
-    extractor = DrainExtractor(
-        verbose=True,
-        context=True,
-        max_clusters=SERVER_CONFIG.extractor.max_clusters,
-        skip_snippets=SKIP_SNIPPETS_CONFIG,
-        max_snippet_len=SERVER_CONFIG.extractor.max_snippet_len
-    )
-    LOG.info("Getting summary")
-    log_summary = extractor(log)
-    ratio = len(log_summary) / len(log.split("\n"))
-    LOG.debug("Log summary: \n %s", log_summary)
-    LOG.info("Compression ratio: %s", ratio)
-    return log_summary
 def connection_error_giveup(details: dict) -> None:
     """Too many connection errors, give up.
     """
@@ -120,3 +95,10 @@ def filter_snippets(
     processed_snippets = sorted(processed_snippets, key=select_line_number)
     return processed_snippets
+def construct_final_prompt(formatted_snippets: str, prompt_template: str) -> str:
+    """Create final prompt from processed snippets and csgrep output, if it is available."""
+    final_prompt = prompt_template.format(formatted_snippets)
+    return final_prompt

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/utils.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import logging
 import os
+import subprocess as sp
 from typing import Iterator, List, Dict, Tuple, Generator
 from urllib.parse import urlparse
@@ -8,10 +9,10 @@ import numpy as np
 import yaml
 from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
+from logdetective.constants import SNIPPET_DELIMITER
 from logdetective.models import PromptConfig, SkipSnippets
 from logdetective.remote_log import RemoteLog
 LOG = logging.getLogger("logdetective")
@@ -39,7 +40,9 @@ def chunk_continues(text: str, index: int) -> bool:
     return False
-def get_chunks(text: str, max_len: int = 2000) -> Generator[Tuple[int, str], None, None]:
+def get_chunks(
+    text: str, max_len: int = 2000
+) -> Generator[Tuple[int, str], None, None]:
     """Split log into chunks according to heuristic
     based on whitespace and backslash presence.
     """
@@ -173,14 +176,14 @@ def format_snippets(snippets: list[str] | list[Tuple[int, str]]) -> str:
             Snippet No. {i} at line #{s[0]}:
             {s[1]}
-            ================
+            {SNIPPET_DELIMITER}
             """
         else:
             summary += f"""
             Snippet No. {i}:
             {s}
-            ================
+            {SNIPPET_DELIMITER}
             """
     return summary
@@ -247,3 +250,44 @@ def load_skip_snippet_patterns(path: str | None) -> SkipSnippets:
             raise e
     return SkipSnippets({})
+def check_csgrep() -> bool:
+    """Verifies presence of csgrep in path"""
+    try:
+        result = sp.run(
+            ["csgrep", "--version"],
+            text=True,
+            check=True,
+            shell=False,
+            capture_output=True,
+            timeout=1.0,
+        )
+    except (FileNotFoundError, sp.TimeoutExpired, sp.CalledProcessError) as ex:
+        LOG.error("Required binary `csgrep` was not found in path: %s", ex)
+        return False
+    if result.returncode == 0:
+        return True
+    LOG.error("Issue was encountered while calling `csgrep`: `%s`", result.stderr)
+    return False
+def mine_logs(log: str, extractors: list) -> List[Tuple[int, str]]:
+    """Extract snippets from log text using extractors provided.
+    Each extractor is applied in turn on original log.
+    Depending on characteristics of extractors used, there may be
+    an overlap in snippets extracted."""
+    log_summary = []
+    LOG.info("Getting summary")
+    for extractor in extractors:
+        log_summary.extend(extractor(log))
+    ratio = len("\n".join([text for _, text in log_summary])) / len(log)
+    LOG.debug("Log summary: \n %s", log_summary)
+    LOG.info("Snippets: %s Compression ratio: %s", len(log_summary), ratio)
+    return log_summary

{logdetective-2.1.0 → logdetective-2.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "logdetective"
-version = "2.1.0"
+version = "2.2.1"
 description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
 authors = ["Jiri Podivin <jpodivin@gmail.com>"]
 license = "Apache-2.0"

logdetective-2.1.0/logdetective/extractors.py DELETED Viewed

@@ -1,57 +0,0 @@
-import os
-import logging
-from typing import Tuple
-import drain3
-from drain3.template_miner_config import TemplateMinerConfig
-from logdetective.utils import get_chunks, filter_snippet_patterns
-from logdetective.models import SkipSnippets
-LOG = logging.getLogger("logdetective")
-class DrainExtractor:
-    """A class that extracts information from logs using a template miner algorithm."""
-    def __init__(
-        self,
-        verbose: bool = False,
-        context: bool = False,
-        max_clusters=8,
-        skip_snippets: SkipSnippets = SkipSnippets({}),
-        max_snippet_len: int = 2000
-    ):  # pylint: disable=R0913,R0917
-        config = TemplateMinerConfig()
-        config.load(f"{os.path.dirname(__file__)}/drain3.ini")
-        config.profiling_enabled = verbose
-        config.drain_max_clusters = max_clusters
-        self.miner = drain3.TemplateMiner(config=config)
-        self.verbose = verbose
-        self.context = context
-        self.skip_snippets = skip_snippets
-        self.max_snippet_len = max_snippet_len
-    def __call__(self, log: str) -> list[Tuple[int, str]]:
-        out = []
-        # Create chunks
-        chunks = list(get_chunks(log, self.max_snippet_len))
-        # Keep only chunks that don't match any of the excluded patterns
-        chunks = [
-            (_, chunk)
-            for _, chunk in chunks
-            if not filter_snippet_patterns(chunk, self.skip_snippets)
-        ]
-        # First pass create clusters
-        for _, chunk in chunks:
-            processed_chunk = self.miner.add_log_message(chunk)
-            LOG.debug(processed_chunk)
-        clusters = list(self.miner.drain.clusters)
-        # Second pass, only matching lines with clusters,
-        # to recover original text
-        for chunk_start, chunk in chunks:
-            cluster = self.miner.match(chunk, "always")
-            if cluster in clusters:
-                out.append((chunk_start, chunk))
-                clusters.remove(cluster)
-        return out

{logdetective-2.1.0 → logdetective-2.2.1}/LICENSE RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/__init__.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/constants.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/drain3.ini RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/prompts-summary-first.yml RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/prompts-summary-only.yml RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/prompts.yml RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/remote_log.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/__init__.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/compressors.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/config.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/__init__.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/base.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/__init__.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/exceptions.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/koji.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/merge_request_jobs.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/metrics.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/emoji.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/exceptions.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/gitlab.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/koji.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/metric.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/plot.py RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/templates/gitlab_full_comment.md.j2 RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/templates/gitlab_short_comment.md.j2 RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective/skip_snippets.yml RENAMED Viewed

File without changes

{logdetective-2.1.0 → logdetective-2.2.1}/logdetective.1.asciidoc RENAMED Viewed

File without changes

logdetective 2.1.0__tar.gz → 2.2.1__tar.gz

logdetective 2.1.0tar.gz → 2.2.1tar.gz