PyPI - logdetective - Versions diffs - 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

logdetective 0.2.6py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

logdetective/constants.py +13 -0
logdetective/logdetective.py +9 -2
logdetective/server.py +128 -23
logdetective/utils.py +30 -8
{logdetective-0.2.6.dist-info → logdetective-0.2.7.dist-info}/METADATA +3 -2
logdetective-0.2.7.dist-info/RECORD +12 -0
logdetective-0.2.6.dist-info/RECORD +0 -12
{logdetective-0.2.6.dist-info → logdetective-0.2.7.dist-info}/LICENSE +0 -0
{logdetective-0.2.6.dist-info → logdetective-0.2.7.dist-info}/WHEEL +0 -0
{logdetective-0.2.6.dist-info → logdetective-0.2.7.dist-info}/entry_points.txt +0 -0

logdetective/constants.py CHANGED Viewed

@@ -30,3 +30,16 @@ Log:
 Answer:
 """
+SNIPPET_PROMPT_TEMPLATE = """
+Analyse following RPM build log snippet.
+Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
+Snippets themselves must not be altered in any way whatsoever.
+Snippet:
+{}
+Analysis:
+"""

logdetective/logdetective.py CHANGED Viewed

@@ -3,7 +3,8 @@ import logging
 import sys
 from logdetective.constants import DEFAULT_ADVISOR
-from logdetective.utils import process_log, initialize_model, retrieve_log_content, format_snippets
+from logdetective.utils import (
+    process_log, initialize_model, retrieve_log_content, format_snippets, compute_certainty)
 from logdetective.extractors import LLMExtractor, DrainExtractor
 LOG = logging.getLogger("logdetective")
@@ -92,15 +93,21 @@ def main():
     if args.no_stream:
         stream = False
     response = process_log(log_summary, model, stream)
+    probs = []
     print("Explanation:")
     if args.no_stream:
         print(response["choices"][0]["text"])
+        probs = response["choices"][0]["logprobs"]["top_logprobs"]
     else:
         # Stream the output
         for chunk in response:
+            if isinstance(chunk["choices"][0]["logprobs"], dict):
+                probs.extend(chunk["choices"][0]["logprobs"]["top_logprobs"])
             delta = chunk['choices'][0]['text']
             print(delta, end='', flush=True)
-        print()
+    certainty = compute_certainty(probs)
+    print(f"\nResponse certainty: {certainty:.2f}%\n")
 if __name__ == "__main__":

logdetective/server.py CHANGED Viewed

@@ -1,21 +1,47 @@
+import json
 import logging
 import os
-import json
+from typing import List
+from llama_cpp import CreateCompletionResponse
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import requests
-from logdetective.constants import PROMPT_TEMPLATE
+from logdetective.constants import PROMPT_TEMPLATE, SNIPPET_PROMPT_TEMPLATE
 from logdetective.extractors import DrainExtractor
-from logdetective.utils import validate_url
+from logdetective.utils import validate_url, compute_certainty
 class BuildLog(BaseModel):
     """Model of data submitted to API.
     """
     url: str
+class Response(BaseModel):
+    """Model of data returned by Log Detective API
+    explanation: CreateCompletionResponse
+        https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
+    response_certainty: float
+    """
+    explanation: CreateCompletionResponse
+    response_certainty: float
+class StagedResponse(Response):
+    """Model of data returned by Log Detective API when called when staged response
+    is requested. Contains list of reponses to prompts for individual snippets.
+    explanation: CreateCompletionResponse
+        https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
+    response_certainty: float
+    snippets: list of CreateCompletionResponse
+    """
+    snippets: List[CreateCompletionResponse]
 LOG = logging.getLogger("logdetective")
 app = FastAPI()
@@ -26,21 +52,13 @@ LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
 LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
 LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
-@app.post("/analyze", )
-async def analyze_log(build_log: BuildLog):
-    """Provide endpoint for log file submission and analysis.
-    Request must be in form {"url":"<YOUR_URL_HERE>"}.
-    URL must be valid for the request to be passed to the LLM server.
-    Meaning that it must contain appropriate scheme, path and netloc,
-    while lacking  result, params or query fields.
-    """
-    extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
-    LOG.info("Getting summary")
-    # Perform basic validation of the URL
-    if validate_url(url=build_log.url):
+def process_url(url: str) -> str:
+    """Validate log URL and return log text.
+    """
+    if validate_url(url=url):
         try:
-            log_request = requests.get(build_log.url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
+            log_request = requests.get(url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
         except requests.RequestException as ex:
             raise HTTPException(
                 status_code=400,
@@ -53,19 +71,36 @@ async def analyze_log(build_log: BuildLog):
     else:
         LOG.error("Invalid URL received ")
         raise HTTPException(status_code=400,
-                            detail=f"Invalid log URL: {build_log.url}")
+                            detail=f"Invalid log URL: {url}")
+    return log_request.text
+def mine_logs(log: str) -> List[str]:
+    """Extract snippets from log text
+    """
+    extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
-    log = log_request.text
+    LOG.info("Getting summary")
     log_summary = extractor(log)
     ratio = len(log_summary) / len(log.split('\n'))
     LOG.debug("Log summary: \n %s", log_summary)
     LOG.info("Compression ratio: %s", ratio)
+    return log_summary
+def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1):
+    """Submit prompt to LLM.
+    max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
+    log_probs: number of token choices to produce log probs for
+    """
     LOG.info("Analyzing the text")
     data = {
-            "prompt": PROMPT_TEMPLATE.format(log_summary),
-            "max_tokens": "0"}
+            "prompt": text,
+            "max_tokens": str(max_tokens),
+            "logprobs": str(log_probs)}
     try:
         # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
@@ -79,9 +114,79 @@ async def analyze_log(build_log: BuildLog):
             status_code=400,
             detail=f"Llama-cpp query failed: {ex}") from ex
-    if not log_request.ok:
+    if not response.ok:
         raise HTTPException(
             status_code=400,
             detail="Something went wrong while getting a response from the llama server: "
-                   f"[{log_request.status_code}] {log_request.text}")
-    return response.text
+                f"[{response.status_code}] {response.text}")
+    try:
+        response = json.loads(response.text)
+    except UnicodeDecodeError as ex:
+        LOG.error("Error encountered while parsing llama server response: %s", ex)
+        raise HTTPException(
+            status_code=400,
+            detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}") from ex
+    return CreateCompletionResponse(response)
+@app.post("/analyze", response_model=Response)
+async def analyze_log(build_log: BuildLog):
+    """Provide endpoint for log file submission and analysis.
+    Request must be in form {"url":"<YOUR_URL_HERE>"}.
+    URL must be valid for the request to be passed to the LLM server.
+    Meaning that it must contain appropriate scheme, path and netloc,
+    while lacking  result, params or query fields.
+    """
+    log_text = process_url(build_log.url)
+    log_summary = mine_logs(log_text)
+    response = submit_text(PROMPT_TEMPLATE.format(log_summary))
+    if "logprobs" in response["choices"][0]:
+        try:
+            certainty = compute_certainty(
+                response["choices"][0]["logprobs"]["top_logprobs"])
+        except ValueError as ex:
+            LOG.error("Error encountered while computing certainty: %s", ex)
+            raise HTTPException(
+                status_code=400,
+                detail=f"Couldn't compute certainty with data:\n"
+                f"{response["choices"][0]["logprobs"]["top_logprobs"]}") from ex
+    return Response(explanation=response, response_certainty=certainty)
+@app.post("/analyze/staged", response_model=StagedResponse)
+async def analyze_log_staged(build_log: BuildLog):
+    """Provide endpoint for log file submission and analysis.
+    Request must be in form {"url":"<YOUR_URL_HERE>"}.
+    URL must be valid for the request to be passed to the LLM server.
+    Meaning that it must contain appropriate scheme, path and netloc,
+    while lacking  result, params or query fields.
+    """
+    log_text = process_url(build_log.url)
+    log_summary = mine_logs(log_text)
+    analyzed_snippets = []
+    for snippet in log_summary:
+        response = submit_text(SNIPPET_PROMPT_TEMPLATE.format(snippet))
+        analyzed_snippets.append(response)
+    final_analysis = submit_text(
+        PROMPT_TEMPLATE.format([e["choices"][0]["text"] for e in analyzed_snippets]))
+    certainty = 0
+    if "logprobs" in final_analysis["choices"][0]:
+        try:
+            certainty = compute_certainty(
+                final_analysis["choices"][0]["logprobs"]["top_logprobs"])
+        except ValueError as ex:
+            LOG.error("Error encountered while computing certainty: %s", ex)
+            raise HTTPException(
+                status_code=400,
+                detail=f"Couldn't compute certainty with data:\n"
+                f"{final_analysis["choices"][0]["logprobs"]["top_logprobs"]}") from ex
+    return StagedResponse(
+        explanation=final_analysis, snippets=analyzed_snippets, response_certainty=certainty)

logdetective/utils.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import logging
 import os
+from typing import Iterator, List, Dict
 from urllib.parse import urlparse
+import numpy as np
 import requests
-from llama_cpp import Llama
+from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
 from logdetective.constants import PROMPT_TEMPLATE
@@ -60,20 +61,38 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
         model = Llama(
             model_path=model_pth,
             n_ctx=0,  # Maximum context for the model
-            verbose=verbose)
+            verbose=verbose,
+            logits_all=True)
     else:
         model = Llama.from_pretrained(
             model_pth,
             f"*{filename_suffix}",
             n_ctx=0,  # Maximum context for the model
-            verbose=verbose)
+            verbose=verbose,
+            logits_all=True)
     return model
-def process_log(log: str, model: Llama, stream: bool) -> str:
+def compute_certainty(probs: List[Dict[str, float] | None]) -> float:
+    """Compute certainty of repsponse based on average logit probability.
+    Log probability is log(p), isn't really readable for most people, especially in compound.
+    In this case it's just a matter of applying inverse operation exp.
+    Of course that leaves you with a value in range <0, 1> so it needs to be multiplied by 100.
+    Simply put, this is the most straightforward way to get the numbers out.
     """
-    Processes a given log using the provided language model and returns its summary.
+    top_logprobs = [
+        np.exp(x) * 100 for e in probs if isinstance(e, dict) for x in e.values()]
+    certainty = np.median(top_logprobs, axis=0)
+    if np.isnan(certainty):
+        raise ValueError("NaN certainty of answer")
+    return certainty
+def process_log(log: str, model: Llama, stream: bool) -> (
+        CreateCompletionResponse | Iterator[CreateCompletionStreamResponse]):
+    """Processes a given log using the provided language model and returns its summary.
     Args:
         log (str): The input log to be processed.
@@ -82,10 +101,13 @@ def process_log(log: str, model: Llama, stream: bool) -> str:
     Returns:
         str: The summary of the given log generated by the language model.
     """
-    return model(
+    response = model(
         prompt=PROMPT_TEMPLATE.format(log),
         stream=stream,
-        max_tokens=0)
+        max_tokens=0,
+        logprobs=1)
+    return response
 def retrieve_log_content(log_path: str) -> str:

{logdetective-0.2.6.dist-info → logdetective-0.2.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: logdetective
-Version: 0.2.6
+Version: 0.2.7
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin
@@ -21,6 +21,7 @@ Provides-Extra: server
 Requires-Dist: drain3 (>=0.9.11,<0.10.0)
 Requires-Dist: huggingface-hub (>0.23.2)
 Requires-Dist: llama-cpp-python (>=0.2.56,<0.3.0,!=0.2.86)
+Requires-Dist: numpy (>=1.26.0,<2.0.0)
 Requires-Dist: requests (>=2.31.0,<3.0.0)
 Project-URL: homepage, https://github.com/fedora-copr/logdetective
 Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
@@ -38,7 +39,7 @@ A Python tool to analyze logs using a Language Model (LLM) and Drain template mi
 Installation
 ------------
-** Fedora 40+ **
+**Fedora 40+**
     dnf install logdetective

logdetective-0.2.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+logdetective/constants.py,sha256=1Ls2VJXb7NwSgi_HmTOA1c52K16SZIeDYBXlvBJ07zU,991
+logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
+logdetective/extractors.py,sha256=eRizRiKhC3MPTHXS5nlRKcEudEaqct7G28V1bZYGkqI,3103
+logdetective/logdetective.py,sha256=f7ASCJg_Yt6VBFieXBYgQYdenfXjC60ZdLHhzQHideI,4372
+logdetective/server.py,sha256=m0NPtk9tAUzyu9O8jIAfgEzynZ-WCHqVvCJkHOm08Ks,7073
+logdetective/utils.py,sha256=nTbaDVEfbHVQPTZe58T04HHZ6JWUJ1PonRRnzGX8hY0,4794
+logdetective-0.2.7.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+logdetective-0.2.7.dist-info/METADATA,sha256=3iqnKnVJy6aTaAqP77btyqSGqCpjT8_PQqpWaNwLKHg,9100
+logdetective-0.2.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+logdetective-0.2.7.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
+logdetective-0.2.7.dist-info/RECORD,,

logdetective-0.2.6.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-logdetective/constants.py,sha256=2DlzXvqWgKca5fPXGPCxREYRfg0eHW0b8TATKoaqb54,711
-logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
-logdetective/extractors.py,sha256=eRizRiKhC3MPTHXS5nlRKcEudEaqct7G28V1bZYGkqI,3103
-logdetective/logdetective.py,sha256=ee7et1mKyI33HaqIr7dR-o7AX1rijwPANw5s6fkDb-Q,4039
-logdetective/server.py,sha256=jb7TuC5xzzlTR6cUrSaZa8vHPZZwH0ei30b5N0iNVX8,3176
-logdetective/utils.py,sha256=UT3st9rbFXS8m-d0-3W39ENdVLhIraSH_K6vlKlDZ5w,3759
-logdetective-0.2.6.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-logdetective-0.2.6.dist-info/METADATA,sha256=h9NS3BpBPHo3cu93xcPNOrgiq2De8DJTPr2Hm5nZhns,9063
-logdetective-0.2.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-logdetective-0.2.6.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
-logdetective-0.2.6.dist-info/RECORD,,

{logdetective-0.2.6.dist-info → logdetective-0.2.7.dist-info}/LICENSE RENAMED Viewed

File without changes

{logdetective-0.2.6.dist-info → logdetective-0.2.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{logdetective-0.2.6.dist-info → logdetective-0.2.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

logdetective 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

logdetective 0.2.6py3-none-any.whl → 0.2.7py3-none-any.whl