PyPI - logdetective - Versions diffs - 0.2.13__tar.gz → 0.3.1__tar.gz - Mend

logdetective 0.2.13tar.gz → 0.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{logdetective-0.2.13 → logdetective-0.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: logdetective
-Version: 0.2.13
+Version: 0.3.1
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin
@@ -18,10 +18,15 @@ Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Internet :: Log Analysis
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Debuggers
+Provides-Extra: server
 Requires-Dist: drain3 (>=0.9.11,<0.10.0)
+Requires-Dist: fastapi (>=0.115.8,<0.116.0) ; extra == "server"
 Requires-Dist: huggingface-hub (>0.23.2)
 Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
 Requires-Dist: numpy (>=1.26.0)
+Requires-Dist: pydantic (>=2.10.6,<3.0.0) ; extra == "server"
+Requires-Dist: python-gitlab (>=5.6.0,<6.0.0)
+Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server"
 Requires-Dist: requests (>0.2.31)
 Project-URL: homepage, https://github.com/fedora-copr/logdetective
 Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
@@ -216,6 +221,30 @@ $ curl -L -o models/mistral-7b-instruct-v0.2.Q4_K_S.gguf https://huggingface.co/
 ```
+Our production instance
+-----------------------
+Our FastAPI server and model inference server run through `podman-compose` on an
+Amazon AWS intance. The VM is provisioned by an
+[ansible playbook](https://pagure.io/fedora-infra/ansible/blob/main/f/roles/logdetective/tasks/main.yml).
+You can control the server through:
+```
+cd /root/logdetective
+podman-compose -f docker-compose-prod.yaml ...
+```
+The `/root` directory contains valuable data. If moving to a new instance,
+please backup the whole directory and transfer it to the new instance.
+Fore some reason, we need to manually run this command after every reboot:
+```
+nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
+```
 License
 -------

{logdetective-0.2.13 → logdetective-0.3.1}/README.md RENAMED Viewed

@@ -187,6 +187,30 @@ $ curl -L -o models/mistral-7b-instruct-v0.2.Q4_K_S.gguf https://huggingface.co/
 ```
+Our production instance
+-----------------------
+Our FastAPI server and model inference server run through `podman-compose` on an
+Amazon AWS intance. The VM is provisioned by an
+[ansible playbook](https://pagure.io/fedora-infra/ansible/blob/main/f/roles/logdetective/tasks/main.yml).
+You can control the server through:
+```
+cd /root/logdetective
+podman-compose -f docker-compose-prod.yaml ...
+```
+The `/root` directory contains valuable data. If moving to a new instance,
+please backup the whole directory and transfer it to the new instance.
+Fore some reason, we need to manually run this command after every reboot:
+```
+nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
+```
 License
 -------

{logdetective-0.2.13 → logdetective-0.3.1}/logdetective/constants.py RENAMED Viewed

@@ -1,4 +1,3 @@
 # pylint: disable=line-too-long
 DEFAULT_ADVISOR = "fedora-copr/Mistral-7B-Instruct-v0.2-GGUF"
@@ -32,7 +31,7 @@ Answer:
 """
 SNIPPET_PROMPT_TEMPLATE = """
-Analyse following RPM build log snippet. Decribe contents accurately, without speculation or suggestions for resolution.
+Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
 Snippet:
@@ -59,4 +58,4 @@ Analysis:
 """
-SNIPPET_DELIMITER = '================'
+SNIPPET_DELIMITER = "================"

{logdetective-0.2.13 → logdetective-0.3.1}/logdetective/extractors.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import os
 import logging
+from typing import Tuple
 import drain3
 from drain3.template_miner_config import TemplateMinerConfig
@@ -15,13 +16,17 @@ class LLMExtractor:
     """
     A class that extracts relevant information from logs using a language model.
     """
     def __init__(self, model: Llama, n_lines: int = 2):
         self.model = model
         self.n_lines = n_lines
         self.grammar = LlamaGrammar.from_string(
-            "root ::= (\"Yes\" | \"No\")", verbose=False)
+            'root ::= ("Yes" | "No")', verbose=False
+        )
-    def __call__(self, log: str, n_lines: int = 2, neighbors: bool = False) -> list[str]:
+    def __call__(
+        self, log: str, n_lines: int = 2, neighbors: bool = False
+    ) -> list[str]:
         chunks = self.rate_chunks(log)
         out = self.create_extract(chunks, neighbors)
         return out
@@ -35,7 +40,7 @@ class LLMExtractor:
         log_lines = log.split("\n")
         for i in range(0, len(log_lines), self.n_lines):
-            block = '\n'.join(log_lines[i:i + self.n_lines])
+            block = "\n".join(log_lines[i: i + self.n_lines])
             prompt = SUMMARIZE_PROMPT_TEMPLATE.format(log)
             out = self.model(prompt, max_tokens=7, grammar=self.grammar)
             out = f"{out['choices'][0]['text']}\n"
@@ -44,8 +49,7 @@ class LLMExtractor:
         return results
     def create_extract(self, chunks: list[tuple], neighbors: bool = False) -> list[str]:
-        """Extract interesting chunks from the model processing.
-        """
+        """Extract interesting chunks from the model processing."""
         interesting = []
         summary = []
         # pylint: disable=consider-using-enumerate
@@ -64,8 +68,8 @@ class LLMExtractor:
 class DrainExtractor:
-    """A class that extracts information from logs using a template miner algorithm.
-    """
+    """A class that extracts information from logs using a template miner algorithm."""
     def __init__(self, verbose: bool = False, context: bool = False, max_clusters=8):
         config = TemplateMinerConfig()
         config.load(f"{os.path.dirname(__file__)}/drain3.ini")
@@ -75,15 +79,21 @@ class DrainExtractor:
         self.verbose = verbose
         self.context = context
-    def __call__(self, log: str) -> list[str]:
+    def __call__(self, log: str) -> list[Tuple[int, str]]:
         out = []
-        for chunk in get_chunks(log):
-            processed_line = self.miner.add_log_message(chunk)
-            LOG.debug(processed_line)
-        sorted_clusters = sorted(self.miner.drain.clusters, key=lambda it: it.size, reverse=True)
-        for chunk in get_chunks(log):
+        # First pass create clusters
+        for _, chunk in get_chunks(log):
+            processed_chunk = self.miner.add_log_message(chunk)
+            LOG.debug(processed_chunk)
+        # Sort found clusters by size, descending order
+        sorted_clusters = sorted(
+            self.miner.drain.clusters, key=lambda it: it.size, reverse=True
+        )
+        # Second pass, only matching lines with clusters,
+        # to recover original text
+        for chunk_start, chunk in get_chunks(log):
             cluster = self.miner.match(chunk, "always")
             if cluster in sorted_clusters:
-                out.append(chunk)
+                out.append((chunk_start, chunk))
                 sorted_clusters.remove(cluster)
         return out

{logdetective-0.2.13 → logdetective-0.3.1}/logdetective/logdetective.py RENAMED Viewed

@@ -4,40 +4,71 @@ import sys
 from logdetective.constants import DEFAULT_ADVISOR
 from logdetective.utils import (
-    process_log, initialize_model, retrieve_log_content, format_snippets, compute_certainty)
+    process_log,
+    initialize_model,
+    retrieve_log_content,
+    format_snippets,
+    compute_certainty,
+)
 from logdetective.extractors import LLMExtractor, DrainExtractor
 LOG = logging.getLogger("logdetective")
 def setup_args():
-    """ Setup argument parser and return arguments. """
+    """Setup argument parser and return arguments."""
     parser = argparse.ArgumentParser("logdetective")
-    parser.add_argument("file", type=str,
-                        default="", help="The URL or path to the log file to be analyzed.")
-    parser.add_argument("-M", "--model",
-                        help="The path or Hugging Face name of the language model for analysis.",
-                        type=str, default=DEFAULT_ADVISOR)
-    parser.add_argument("-F", "--filename_suffix",
-                        help="Suffix of the model file name to be retrieved from Hugging Face.\
+    parser.add_argument(
+        "file",
+        type=str,
+        default="",
+        help="The URL or path to the log file to be analyzed.",
+    )
+    parser.add_argument(
+        "-M",
+        "--model",
+        help="The path or Hugging Face name of the language model for analysis.",
+        type=str,
+        default=DEFAULT_ADVISOR,
+    )
+    parser.add_argument(
+        "-F",
+        "--filename_suffix",
+        help="Suffix of the model file name to be retrieved from Hugging Face.\
                             Makes sense only if the model is specified with Hugging Face name.",
-                        default="Q4_K_S.gguf")
-    parser.add_argument("-n", "--no-stream", action='store_true')
-    parser.add_argument("-S", "--summarizer", type=str, default="drain",
-                        help="Choose between LLM and Drain template miner as the log summarizer.\
-                                LLM must be specified as path to a model, URL or local file.")
-    parser.add_argument("-N", "--n_lines", type=int,
-                        default=8, help="The number of lines per chunk for LLM analysis.\
-                            This only makes sense when you are summarizing with LLM.")
-    parser.add_argument("-C", "--n_clusters", type=int, default=8,
-                        help="Number of clusters for Drain to organize log chunks into.\
-                            This only makes sense when you are summarizing with Drain")
-    parser.add_argument("-v", "--verbose", action='count', default=0)
-    parser.add_argument("-q", "--quiet", action='store_true')
+        default="Q4_K_S.gguf",
+    )
+    parser.add_argument("-n", "--no-stream", action="store_true")
+    parser.add_argument(
+        "-S",
+        "--summarizer",
+        type=str,
+        default="drain",
+        help="Choose between LLM and Drain template miner as the log summarizer.\
+                                LLM must be specified as path to a model, URL or local file.",
+    )
+    parser.add_argument(
+        "-N",
+        "--n_lines",
+        type=int,
+        default=8,
+        help="The number of lines per chunk for LLM analysis.\
+                            This only makes sense when you are summarizing with LLM.",
+    )
+    parser.add_argument(
+        "-C",
+        "--n_clusters",
+        type=int,
+        default=8,
+        help="Number of clusters for Drain to organize log chunks into.\
+                            This only makes sense when you are summarizing with Drain",
+    )
+    parser.add_argument("-v", "--verbose", action="count", default=0)
+    parser.add_argument("-q", "--quiet", action="store_true")
     return parser.parse_args()
-def main():
+def main():  # pylint: disable=too-many-statements
     """Main execution function."""
     args = setup_args()
@@ -57,8 +88,9 @@ def main():
     # Primary model initialization
     try:
-        model = initialize_model(args.model, filename_suffix=args.filename_suffix,
-                                 verbose=args.verbose > 2)
+        model = initialize_model(
+            args.model, filename_suffix=args.filename_suffix, verbose=args.verbose > 2
+        )
     except ValueError as e:
         LOG.error(e)
         LOG.error("You likely do not have enough memory to load the AI model")
@@ -66,7 +98,9 @@ def main():
     # Log file summarizer selection and initialization
     if args.summarizer == "drain":
-        extractor = DrainExtractor(args.verbose > 1, context=True, max_clusters=args.n_clusters)
+        extractor = DrainExtractor(
+            args.verbose > 1, context=True, max_clusters=args.n_clusters
+        )
     else:
         summarizer_model = initialize_model(args.summarizer, verbose=args.verbose > 2)
         extractor = LLMExtractor(summarizer_model, args.verbose > 1)
@@ -81,7 +115,7 @@ def main():
         sys.exit(4)
     log_summary = extractor(log)
-    ratio = len(log_summary) / len(log.split('\n'))
+    ratio = len(log_summary) / len(log.split("\n"))
     LOG.info("Compression ratio: %s", ratio)
@@ -103,15 +137,19 @@ def main():
     if args.no_stream:
         print(response["choices"][0]["text"])
-        probs = [{'logprob': e} for e in response['choices'][0]['logprobs']['token_logprobs']]
+        probs = [
+            {"logprob": e} for e in response["choices"][0]["logprobs"]["token_logprobs"]
+        ]
     else:
         # Stream the output
         for chunk in response:
             if isinstance(chunk["choices"][0]["logprobs"], dict):
-                probs.append({'logprob': chunk["choices"][0]["logprobs"]['token_logprobs'][0]})
-            delta = chunk['choices'][0]['text']
-            print(delta, end='', flush=True)
+                probs.append(
+                    {"logprob": chunk["choices"][0]["logprobs"]["token_logprobs"][0]}
+                )
+            delta = chunk["choices"][0]["text"]
+            print(delta, end="", flush=True)
     certainty = compute_certainty(probs)
     print(f"\nResponse certainty: {certainty:.2f}%\n")

logdetective-0.3.1/logdetective/server/models.py ADDED Viewed

@@ -0,0 +1,173 @@
+from logging import BASIC_FORMAT
+from typing import List, Dict, Optional
+from pydantic import BaseModel, Field
+class BuildLog(BaseModel):
+    """Model of data submitted to API."""
+    url: str
+class JobHook(BaseModel):
+    """Model of Job Hook events sent from GitLab.
+    Full details of the specification are available at
+    https://docs.gitlab.com/user/project/integrations/webhook_events/#job-events
+    This model implements only the fields that we care about. The webhook
+    sends many more fields that we will ignore."""
+    # The unique job ID on this GitLab instance.
+    build_id: int
+    # The identifier of the job. We only care about 'build_rpm' and
+    # 'build_centos_stream_rpm' jobs.
+    build_name: str = Field(pattern=r"^build(_.*)?_rpm$")
+    # A string representing the job status. We only care about 'failed' jobs.
+    build_status: str = Field(pattern=r"^failed$")
+    # The kind of webhook message. We are only interested in 'build' messages
+    # which represents job tasks in a pipeline.
+    object_kind: str = Field(pattern=r"^build$")
+    # The unique ID of the enclosing pipeline on this GitLab instance.
+    pipeline_id: int
+    # The unique ID of the project triggering this event
+    project_id: int
+class Response(BaseModel):
+    """Model of data returned by Log Detective API
+    explanation: CreateCompletionResponse
+        https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
+    response_certainty: float
+    """
+    explanation: Dict
+    response_certainty: float
+class StagedResponse(Response):
+    """Model of data returned by Log Detective API when called when staged response
+    is requested. Contains list of reponses to prompts for individual snippets.
+    explanation: CreateCompletionResponse
+        https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
+    response_certainty: float
+    snippets:
+        list of dictionaries {
+        'snippet' : '<original_text>,
+        'comment': CreateCompletionResponse,
+        'line_number': '<location_in_log>' }
+    """
+    snippets: List[Dict[str, str | Dict | int]]
+class InferenceConfig(BaseModel):
+    """Model for inference configuration of logdetective server."""
+    max_tokens: int = -1
+    log_probs: int = 1
+    def __init__(self, data: Optional[dict] = None):
+        super().__init__()
+        if data is None:
+            return
+        self.max_tokens = data.get("max_tokens", -1)
+        self.log_probs = data.get("log_probs", 1)
+class ExtractorConfig(BaseModel):
+    """Model for extractor configuration of logdetective server."""
+    context: bool = True
+    max_clusters: int = 8
+    verbose: bool = False
+    def __init__(self, data: Optional[dict] = None):
+        super().__init__()
+        if data is None:
+            return
+        self.context = data.get("context", True)
+        self.max_clusters = data.get("max_clusters", 8)
+        self.verbose = data.get("verbose", False)
+class GitLabConfig(BaseModel):
+    """Model for GitLab configuration of logdetective server."""
+    url: str = None
+    api_url: str = None
+    api_token: str = None
+    # Maximum size of artifacts.zip in MiB. (default: 300 MiB)
+    max_artifact_size: int = 300
+    def __init__(self, data: Optional[dict] = None):
+        super().__init__()
+        if data is None:
+            return
+        self.url = data.get("url", "https://gitlab.com")
+        self.api_url = f"{self.url}/api/v4"
+        self.api_token = data.get("api_token", None)
+        self.max_artifact_size = int(data.get("max_artifact_size")) * 1024 * 1024
+class LogConfig(BaseModel):
+    """Logging configuration"""
+    name: str = "logdetective"
+    level: str | int = "INFO"
+    path: str | None = None
+    format: str = BASIC_FORMAT
+    def __init__(self, data: Optional[dict] = None):
+        super().__init__()
+        if data is None:
+            return
+        self.name = data.get("name", "logdetective")
+        self.level = data.get("level", "INFO").upper()
+        self.path = data.get("path")
+        self.format = data.get("format", BASIC_FORMAT)
+class GeneralConfig(BaseModel):
+    """General config options for Log Detective"""
+    packages: List[str] = None
+    def __init__(self, data: Optional[dict] = None):
+        super().__init__()
+        if data is None:
+            return
+        self.packages = data.get("packages", [])
+class Config(BaseModel):
+    """Model for configuration of logdetective server."""
+    log: LogConfig = LogConfig()
+    inference: InferenceConfig = InferenceConfig()
+    extractor: ExtractorConfig = ExtractorConfig()
+    gitlab: GitLabConfig = GitLabConfig()
+    general: GeneralConfig = GeneralConfig()
+    def __init__(self, data: Optional[dict] = None):
+        super().__init__()
+        if data is None:
+            return
+        self.log = LogConfig(data.get("log"))
+        self.inference = InferenceConfig(data.get("inference"))
+        self.extractor = ExtractorConfig(data.get("extractor"))
+        self.gitlab = GitLabConfig(data.get("gitlab"))
+        self.general = GeneralConfig(data.get("general"))

logdetective 0.2.13__tar.gz → 0.3.1__tar.gz

logdetective 0.2.13tar.gz → 0.3.1tar.gz