PyPI - logdetective - Versions diffs - 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl - Mend

logdetective 0.5.8py3-none-any.whl → 0.5.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

logdetective/constants.py +6 -1
logdetective/extractors.py +9 -3
logdetective/logdetective.py +19 -3
logdetective/models.py +33 -0
logdetective/prompts.yml +50 -0
logdetective/server/server.py +7 -9
logdetective/utils.py +18 -3
{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/METADATA +18 -3
{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/RECORD +12 -10
{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/LICENSE +0 -0
{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/WHEEL +0 -0
{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/entry_points.txt +0 -0

logdetective/constants.py CHANGED Viewed

@@ -1,3 +1,8 @@
+"""This file contains various constants to be used as a fallback
+in case other values are not specified. Prompt templates should be modified
+in prompts.yaml instead.
+"""
 # pylint: disable=line-too-long
 DEFAULT_ADVISOR = "fedora-copr/Mistral-7B-Instruct-v0.2-GGUF"
@@ -19,7 +24,7 @@ Analysis:
 """
-SUMMARIZE_PROMPT_TEMPLATE = """
+SUMMARIZATION_PROMPT_TEMPLATE = """
 Does following log contain error or issue?
 Log:

logdetective/extractors.py CHANGED Viewed

@@ -6,7 +6,7 @@ import drain3
 from drain3.template_miner_config import TemplateMinerConfig
 from llama_cpp import Llama, LlamaGrammar
-from logdetective.constants import SUMMARIZE_PROMPT_TEMPLATE
+from logdetective.constants import SUMMARIZATION_PROMPT_TEMPLATE
 from logdetective.utils import get_chunks
 LOG = logging.getLogger("logdetective")
@@ -17,12 +17,18 @@ class LLMExtractor:
     A class that extracts relevant information from logs using a language model.
     """
-    def __init__(self, model: Llama, n_lines: int = 2):
+    def __init__(
+        self,
+        model: Llama,
+        n_lines: int = 2,
+        prompt: str = SUMMARIZATION_PROMPT_TEMPLATE,
+    ):
         self.model = model
         self.n_lines = n_lines
         self.grammar = LlamaGrammar.from_string(
             'root ::= ("Yes" | "No")', verbose=False
         )
+        self.prompt = prompt
     def __call__(
         self, log: str, n_lines: int = 2, neighbors: bool = False
@@ -41,7 +47,7 @@ class LLMExtractor:
         for i in range(0, len(log_lines), self.n_lines):
             block = "\n".join(log_lines[i: i + self.n_lines])
-            prompt = SUMMARIZE_PROMPT_TEMPLATE.format(log)
+            prompt = self.prompt.format(log)
             out = self.model(prompt, max_tokens=7, grammar=self.grammar)
             out = f"{out['choices'][0]['text']}\n"
             results.append((block, out))

logdetective/logdetective.py CHANGED Viewed

@@ -9,6 +9,7 @@ from logdetective.utils import (
     retrieve_log_content,
     format_snippets,
     compute_certainty,
+    load_prompts,
 )
 from logdetective.extractors import LLMExtractor, DrainExtractor
@@ -65,10 +66,13 @@ def setup_args():
     )
     parser.add_argument("-v", "--verbose", action="count", default=0)
     parser.add_argument("-q", "--quiet", action="store_true")
+    parser.add_argument(
+        "--prompts", type=str, default="", help="Path to prompt configuration file."
+    )
     return parser.parse_args()
-def main():  # pylint: disable=too-many-statements
+def main():  # pylint: disable=too-many-statements,too-many-locals
     """Main execution function."""
     args = setup_args()
@@ -83,6 +87,9 @@ def main():  # pylint: disable=too-many-statements
     if args.quiet:
         log_level = 0
+    # Get prompts configuration
+    prompts_configuration = load_prompts(args.prompts)
     logging.basicConfig(stream=sys.stdout)
     LOG.setLevel(log_level)
@@ -103,7 +110,11 @@ def main():  # pylint: disable=too-many-statements
         )
     else:
         summarizer_model = initialize_model(args.summarizer, verbose=args.verbose > 2)
-        extractor = LLMExtractor(summarizer_model, args.verbose > 1)
+        extractor = LLMExtractor(
+            summarizer_model,
+            args.verbose > 1,
+            prompts_configuration.summarization_prompt_template,
+        )
     LOG.info("Getting summary")
@@ -127,7 +138,12 @@ def main():  # pylint: disable=too-many-statements
     stream = True
     if args.no_stream:
         stream = False
-    response = process_log(log_summary, model, stream)
+    response = process_log(
+        log_summary,
+        model,
+        stream,
+        prompt_template=prompts_configuration.prompt_template,
+    )
     probs = []
     print("Explanation:")
     # We need to extract top token probability from the response

logdetective/models.py ADDED Viewed

@@ -0,0 +1,33 @@
+from typing import Optional
+from pydantic import BaseModel
+from logdetective.constants import (
+    PROMPT_TEMPLATE,
+    PROMPT_TEMPLATE_STAGED,
+    SUMMARIZATION_PROMPT_TEMPLATE,
+    SNIPPET_PROMPT_TEMPLATE,
+)
+class PromptConfig(BaseModel):
+    """Configuration for basic log detective prompts."""
+    prompt_template: str = PROMPT_TEMPLATE
+    summarization_prompt_template: str = SUMMARIZATION_PROMPT_TEMPLATE
+    snippet_prompt_template: str = SNIPPET_PROMPT_TEMPLATE
+    prompt_template_staged: str = PROMPT_TEMPLATE_STAGED
+    def __init__(self, data: Optional[dict] = None):
+        super().__init__()
+        if data is None:
+            return
+        self.prompt_template = data.get("prompt_template", PROMPT_TEMPLATE)
+        self.summarization_prompt_template = data.get(
+            "summarization_prompt_template", SUMMARIZATION_PROMPT_TEMPLATE
+        )
+        self.snippet_prompt_template = data.get(
+            "snippet_prompt_template", SNIPPET_PROMPT_TEMPLATE
+        )
+        self.prompt_template_staged = data.get(
+            "prompt_template_staged", PROMPT_TEMPLATE_STAGED
+        )

logdetective/prompts.yml ADDED Viewed

@@ -0,0 +1,50 @@
+prompt_template: |
+  Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
+  Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
+  Snippets themselves must not be altered in any way whatsoever.
+  Snippets are delimited with '================'.
+  Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
+  Snippets:
+  {}
+  Analysis:
+summarization_prompt_template: |
+  Does following log contain error or issue?
+  Log:
+  {}
+  Answer:
+snippet_prompt_template: |
+  Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
+  Snippet:
+  {}
+  Analysis:
+prompt_template_staged: |
+  Given following log snippets, their explanation, and nothing else, explain what failure, if any, occured during build of this package.
+  Snippets are in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
+  Snippets are delimited with '================'.
+  Drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
+  Snippets:
+  {}
+  Analysis:

logdetective/server/server.py CHANGED Viewed

@@ -21,17 +21,13 @@ import gitlab.v4.objects
 import jinja2
 import requests
-from logdetective.constants import (
-    PROMPT_TEMPLATE,
-    SNIPPET_PROMPT_TEMPLATE,
-    PROMPT_TEMPLATE_STAGED,
-)
 from logdetective.extractors import DrainExtractor
 from logdetective.utils import (
     validate_url,
     compute_certainty,
     format_snippets,
     format_analyzed_snippets,
+    load_prompts,
 )
 from logdetective.server.utils import load_server_config, get_log
 from logdetective.server.metric import track_request
@@ -51,8 +47,10 @@ LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
 LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
 API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
 SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
+SERVER_PROMPT_PATH = os.environ.get("LOGDETECTIVE_PROMPTS", None)
 SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
+PROMPT_CONFIG = load_prompts(SERVER_PROMPT_PATH)
 MR_REGEX = re.compile(r"refs/merge-requests/(\d+)/.*$")
 FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
@@ -298,7 +296,7 @@ async def analyze_log(build_log: BuildLog):
     log_summary = mine_logs(log_text)
     log_summary = format_snippets(log_summary)
     response = await submit_text(
-        PROMPT_TEMPLATE.format(log_summary),
+        PROMPT_CONFIG.prompt_template.format(log_summary),
         api_endpoint=SERVER_CONFIG.inference.api_endpoint,
     )
     certainty = 0
@@ -338,7 +336,7 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
     analyzed_snippets = await asyncio.gather(
         *[
             submit_text(
-                SNIPPET_PROMPT_TEMPLATE.format(s),
+                PROMPT_CONFIG.snippet_prompt_template.format(s),
                 api_endpoint=SERVER_CONFIG.inference.api_endpoint,
             )
             for s in log_summary
@@ -349,7 +347,7 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
         AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
         for e in zip(log_summary, analyzed_snippets)
     ]
-    final_prompt = PROMPT_TEMPLATE_STAGED.format(
+    final_prompt = PROMPT_CONFIG.prompt_template_staged.format(
         format_analyzed_snippets(analyzed_snippets)
     )
@@ -395,7 +393,7 @@ async def analyze_log_stream(build_log: BuildLog):
         headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
     stream = await submit_text_chat_completions(
-        PROMPT_TEMPLATE.format(log_summary), stream=True, headers=headers
+        PROMPT_CONFIG.prompt_template.format(log_summary), stream=True, headers=headers
     )
     return StreamingResponse(stream)

logdetective/utils.py CHANGED Viewed

@@ -4,10 +4,13 @@ from typing import Iterator, List, Dict, Tuple, Generator
 from urllib.parse import urlparse
 import numpy as np
 import requests
+import yaml
 from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
-from logdetective.constants import PROMPT_TEMPLATE, SNIPPET_DELIMITER
+from logdetective.constants import SNIPPET_DELIMITER
 from logdetective.server.models import AnalyzedSnippet
+from logdetective.models import PromptConfig
 LOG = logging.getLogger("logdetective")
@@ -110,7 +113,7 @@ def compute_certainty(probs: List[Dict]) -> float:
 def process_log(
-    log: str, model: Llama, stream: bool
+    log: str, model: Llama, stream: bool, prompt_template: str
 ) -> CreateCompletionResponse | Iterator[CreateCompletionStreamResponse]:
     """Processes a given log using the provided language model and returns its summary.
@@ -122,7 +125,7 @@ def process_log(
         str: The summary of the given log generated by the language model.
     """
     response = model(
-        prompt=PROMPT_TEMPLATE.format(log), stream=stream, max_tokens=0, logprobs=1
+        prompt=prompt_template.format(log), stream=stream, max_tokens=0, logprobs=1
     )
     return response
@@ -199,3 +202,15 @@ def validate_url(url: str) -> bool:
     if not (result.path or result.netloc):
         return False
     return True
+def load_prompts(path: str | None) -> PromptConfig:
+    """Load prompts from given yaml file if there is one.
+    Alternatively use defaults."""
+    if path:
+        try:
+            with open(path, "r") as file:
+                return PromptConfig(yaml.safe_load(file))
+        except FileNotFoundError:
+            print("Prompt configuration file not found, reverting to defaults.")
+    return PromptConfig()

{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: logdetective
-Version: 0.5.8
+Version: 0.5.9
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin
@@ -29,9 +29,9 @@ Requires-Dist: matplotlib (>=3.8.4,<4.0.0) ; extra == "server" or extra == "serv
 Requires-Dist: numpy (>=1.26.0)
 Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "server"
 Requires-Dist: psycopg2-binary (>=2.9.9,<3.0.0) ; extra == "server-testing"
-Requires-Dist: pydantic (>=2.8.2,<3.0.0) ; extra == "server" or extra == "server-testing"
+Requires-Dist: pydantic (>=2.8.2,<3.0.0)
 Requires-Dist: python-gitlab (>=4.4.0)
-Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server" or extra == "server-testing"
+Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
 Requires-Dist: requests (>0.2.31)
 Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server" or extra == "server-testing"
 Project-URL: homepage, https://github.com/fedora-copr/logdetective
@@ -363,6 +363,21 @@ http GET "localhost:8080/metrics/analyze/requests?days=5" > /tmp/plot_days.svg
 http GET "localhost:8080/metrics/analyze/requests?weeks=5" > /tmp/plot_weeks.svg
 ```
+System Prompts
+--------------
+Prompt templates used by Log Detective are stored in the `prompts.yml` file.
+It is possible to modify the file in place, or provide your own.
+In CLI you can override prompt templates location using `--prompts` option,
+while in the container service deployment the `LOGDETECTIVE_PROMPTS` environment variable
+is used instead.
+Prompts need to have a form compatible with python [format string syntax](https://docs.python.org/3/library/string.html#format-string-syntax)
+with spaces, or replacement fields marked with curly braces, `{}` left for insertion of snippets.
+Number of replacement fields in new prompts, must be the same as in originals.
+Although their position may be different.
 License
 -------

{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,10 @@
 logdetective/__init__.py,sha256=VqRngDcuFT7JWms8Qc_MsOvajoXVOKPr-S1kqY3Pqhc,59
-logdetective/constants.py,sha256=SPSs1Bq6zPms3RsFTmsADwgrnFTn4fefNHzrB-M3RAE,1383
+logdetective/constants.py,sha256=eiS6eYhEgl_Rlyi_B9j00DDp9A-UDhuFz3ACWtKf_SU,1558
 logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
-logdetective/extractors.py,sha256=cjxndfJaQur54GXksIQXL7YTxkOng8I8UnQZMN2t5_w,3388
-logdetective/logdetective.py,sha256=KN0KASW63VAnrjVeXK5AO0ob-vSexutTyeg1fd4uj70,4884
+logdetective/extractors.py,sha256=7ahzWbTtU9MveG1Q7wU9LO8OJgs85X-cHmWltUhCe9M,3491
+logdetective/logdetective.py,sha256=1EFrml_gHdyKEZX4iXBxhGgmU7R7_S26-Fr0WUDaA7E,5316
+logdetective/models.py,sha256=nrGBmMRu8i6UhFflQKAp81Y3Sd_Aaoor0i_yqSJoLT0,1115
+logdetective/prompts.yml,sha256=OBOWDErlbigbLrStcCY5HKPReNb0g-SNlCnD4QawF7k,1268
 logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 logdetective/server/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 logdetective/server/database/base.py,sha256=oMJUvbWeapIUP-8Cf_DR9ptFg8CsYeaBAIjOVEzx8SM,1668
@@ -10,12 +12,12 @@ logdetective/server/database/models.py,sha256=arIahOCT-hTmh904DXrWSkH7rlo13Ppu-O
 logdetective/server/metric.py,sha256=VYMifrfIhcqgyu6YYN0c1nt8fC1iJ2_LCB7Bh2AheoE,2679
 logdetective/server/models.py,sha256=cf1ngu_-19rP_i49s5cEwIzh6SfL_ZpVy4EykCpfWck,8076
 logdetective/server/plot.py,sha256=3o-CNHjel04ekpwSB4ckV7dbiF663cfPkimQ0aP9U_8,7073
-logdetective/server/server.py,sha256=ALVD9cwG4d8OQXfOPbRtt4y0nlh2C-8jP8pQeaufC3g,24533
+logdetective/server/server.py,sha256=VGfBgbjUcyBd8hop-ea-O_Mo-FoGLDyP-elAWzRu51g,24605
 logdetective/server/templates/gitlab_comment.md.j2,sha256=kheTkhQ-LfuFkr8av-Mw2a-9VYEUbDTLwaa-CKI6OkI,1622
 logdetective/server/utils.py,sha256=OFvhttjv3yp8kfim5_s4mNG8ly21qyILxE0o3DcVVKg,1340
-logdetective/utils.py,sha256=eudens1_T6iTtYhyzoYCpwuWgFHUMDSt6eWnrAB-mAI,6188
-logdetective-0.5.8.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-logdetective-0.5.8.dist-info/METADATA,sha256=dUCiCPfW8ILyshanWpb_zHdm9q3LIBYKZUWyfQWqsCA,14115
-logdetective-0.5.8.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-logdetective-0.5.8.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
-logdetective-0.5.8.dist-info/RECORD,,
+logdetective/utils.py,sha256=yTEjfTTaCS8lreKRkwKzLo6Po8cOYzInjSEx4CwpyqA,6665
+logdetective-0.5.9.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+logdetective-0.5.9.dist-info/METADATA,sha256=YZbrICuAKXVD4LEEH6orwX-fuX3i3hpSsKuNa1nosoI,14737
+logdetective-0.5.9.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
+logdetective-0.5.9.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
+logdetective-0.5.9.dist-info/RECORD,,

{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/LICENSE RENAMED Viewed

File without changes

{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{logdetective-0.5.8.dist-info → logdetective-0.5.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

logdetective 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl

logdetective 0.5.8py3-none-any.whl → 0.5.9py3-none-any.whl