PyPI - logdetective - Versions diffs - 0.2.9__tar.gz → 0.2.11__tar.gz - Mend

logdetective 0.2.9tar.gz → 0.2.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{logdetective-0.2.9 → logdetective-0.2.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: logdetective
-Version: 0.2.9
+Version: 0.2.11
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin
@@ -22,7 +22,7 @@ Provides-Extra: server
 Requires-Dist: drain3 (>=0.9.11,<0.10.0)
 Requires-Dist: huggingface-hub (>0.23.2)
 Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
-Requires-Dist: numpy (>=1.26.0,<2.0.0)
+Requires-Dist: numpy (>=1.26.0)
 Requires-Dist: requests (>0.2.31)
 Project-URL: homepage, https://github.com/fedora-copr/logdetective
 Project-URL: issues, https://github.com/fedora-copr/logdetective/issues

{logdetective-0.2.9 → logdetective-0.2.11}/logdetective/extractors.py RENAMED Viewed

@@ -16,7 +16,7 @@ class LLMExtractor:
     A class that extracts relevant information from logs using a language model.
     """
     def __init__(self, model: Llama, n_lines: int = 2):
-        self.model =  model
+        self.model = model
         self.n_lines = n_lines
         self.grammar = LlamaGrammar.from_string(
             "root ::= (\"Yes\" | \"No\")", verbose=False)

{logdetective-0.2.9 → logdetective-0.2.11}/logdetective/logdetective.py RENAMED Viewed

@@ -9,6 +9,7 @@ from logdetective.extractors import LLMExtractor, DrainExtractor
 LOG = logging.getLogger("logdetective")
 def setup_args():
     """ Setup argument parser and return arguments. """
     parser = argparse.ArgumentParser("logdetective")

{logdetective-0.2.9 → logdetective-0.2.11}/logdetective/server.py RENAMED Viewed

@@ -16,6 +16,7 @@ from logdetective.constants import (
 from logdetective.extractors import DrainExtractor
 from logdetective.utils import validate_url, compute_certainty
 class BuildLog(BaseModel):
     """Model of data submitted to API.
     """
@@ -29,7 +30,7 @@ class Response(BaseModel):
         https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
     response_certainty: float
     """
-    explanation: CreateCompletionResponse
+    explanation: Dict
     response_certainty: float
@@ -43,10 +44,10 @@ class StagedResponse(Response):
     snippets:
         list of dictionaries { 'snippet' : '<original_text>, 'comment': CreateCompletionResponse }
     """
-    snippets: List[Dict[str, str | CreateCompletionResponse]]
+    snippets: List[Dict[str, str | Dict]]
-LOG = logging.getLogger("logdetective")
+LOG = logging.getLogger("logdetective")
 LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
 LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
@@ -55,6 +56,7 @@ LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
 LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
 API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
 def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
     """
     FastAPI Depend function that expects a header named Authentication
@@ -82,6 +84,7 @@ def requires_token_when_set(authentication: Annotated[str | None, Header()] = No
              API_TOKEN, token)
     raise HTTPException(status_code=401, detail=f"Token {token} not valid.")
 app = FastAPI(dependencies=[Depends(requires_token_when_set)])
@@ -99,7 +102,7 @@ def process_url(url: str) -> str:
         if not log_request.ok:
             raise HTTPException(status_code=400,
                                 detail="Something went wrong while getting the logs: "
-                                    f"[{log_request.status_code}] {log_request.text}")
+                                f"[{log_request.status_code}] {log_request.text}")
     else:
         LOG.error("Invalid URL received ")
         raise HTTPException(status_code=400,
@@ -111,7 +114,7 @@ def process_url(url: str) -> str:
 def mine_logs(log: str) -> List[str]:
     """Extract snippets from log text
     """
-    extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
+    extractor = DrainExtractor(verbose=True, context=True, max_clusters=16)
     LOG.info("Getting summary")
     log_summary = extractor(log)
@@ -120,10 +123,10 @@ def mine_logs(log: str) -> List[str]:
     LOG.debug("Log summary: \n %s", log_summary)
     LOG.info("Compression ratio: %s", ratio)
     return log_summary
-async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream: bool = False,
+async def submit_text(text: str, max_tokens: int = -1, log_probs: int = 1, stream: bool = False,
                       model: str = "default-model"):
     """Submit prompt to LLM.
     max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
@@ -131,17 +134,17 @@ async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream
     """
     LOG.info("Analyzing the text")
     data = {
-            "prompt": text,
-            "max_tokens": str(max_tokens),
-            "logprobs": str(log_probs),
-            "stream": stream,
-            "model": model}
+        "prompt": text,
+        "max_tokens": max_tokens,
+        "logprobs": log_probs,
+        "stream": stream,
+        "model": model}
     try:
         # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
         response = requests.post(
             f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
-            headers={"Content-Type":"application/json"},
+            headers={"Content-Type": "application/json"},
             data=json.dumps(data),
             timeout=int(LLM_CPP_SERVER_TIMEOUT),
             stream=stream)
@@ -154,7 +157,7 @@ async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream
             raise HTTPException(
                 status_code=400,
                 detail="Something went wrong while getting a response from the llama server: "
-                    f"[{response.status_code}] {response.text}")
+                       f"[{response.status_code}] {response.text}")
         try:
             response = json.loads(response.text)
         except UnicodeDecodeError as ex:
@@ -184,13 +187,13 @@ async def analyze_log(build_log: BuildLog):
     if "logprobs" in response["choices"][0]:
         try:
             certainty = compute_certainty(
-                response["choices"][0]["logprobs"]["top_logprobs"])
+                response["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
         except ValueError as ex:
             LOG.error("Error encountered while computing certainty: %s", ex)
             raise HTTPException(
                 status_code=400,
                 detail=f"Couldn't compute certainty with data:\n"
-                f"{response["choices"][0]["logprobs"]["top_logprobs"]}") from ex
+                f"{response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
     return Response(explanation=response, response_certainty=certainty)
@@ -211,27 +214,27 @@ async def analyze_log_staged(build_log: BuildLog):
         *[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s)) for s in log_summary])
     analyzed_snippets = [
-        {"snippet":e[0], "comment":e[1]} for e in zip(log_summary, analyzed_snippets)]
+        {"snippet": e[0], "comment": e[1]} for e in zip(log_summary, analyzed_snippets)]
     final_prompt = PROMPT_TEMPLATE_STAGED.format(
         f"\n{SNIPPET_DELIMITER}\n".join([
             f"[{e["snippet"]}] : [{e["comment"]["choices"][0]["text"]}]"
-        for e in analyzed_snippets]))
+            for e in analyzed_snippets]))
     final_analysis = await submit_text(final_prompt)
+    print(final_analysis)
     certainty = 0
     if "logprobs" in final_analysis["choices"][0]:
         try:
             certainty = compute_certainty(
-                final_analysis["choices"][0]["logprobs"]["top_logprobs"])
+                final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
         except ValueError as ex:
             LOG.error("Error encountered while computing certainty: %s", ex)
             raise HTTPException(
                 status_code=400,
                 detail=f"Couldn't compute certainty with data:\n"
-                f"{final_analysis["choices"][0]["logprobs"]["top_logprobs"]}") from ex
+                f"{final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
     return StagedResponse(
         explanation=final_analysis, snippets=analyzed_snippets, response_certainty=certainty)

{logdetective-0.2.9 → logdetective-0.2.11}/logdetective/utils.py RENAMED Viewed

@@ -15,10 +15,17 @@ LOG = logging.getLogger("logdetective")
 def chunk_continues(text: str, index: int) -> bool:
     """Set of heuristics for determining whether or not
     does the current chunk of log text continue on next line.
+    Following rules are checked, in order:
+    * is the next character is whitespace
+    * is the previous character backslash '\\'
+    * is the previous character colon ':'
     """
     conditionals = [
         lambda i, string: string[i + 1].isspace(),
-        lambda i, string: string[i - 1] == "\\"
+        lambda i, string: string[i - 1] == "\\",
+        lambda i, string: string[i - 1] == ":"
     ]
     for c in conditionals:
@@ -74,16 +81,19 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
     return model
-def compute_certainty(probs: List[Dict[str, float] | None]) -> float:
+def compute_certainty(probs: List[Dict]) -> float:
     """Compute certainty of repsponse based on average logit probability.
     Log probability is log(p), isn't really readable for most people, especially in compound.
     In this case it's just a matter of applying inverse operation exp.
     Of course that leaves you with a value in range <0, 1> so it needs to be multiplied by 100.
     Simply put, this is the most straightforward way to get the numbers out.
+    This function is used in the server codebase.
     """
     top_logprobs = [
-        np.exp(x) * 100 for e in probs if isinstance(e, dict) for x in e.values()]
+        np.exp(e["logprob"]) * 100 for e in probs]
     certainty = np.median(top_logprobs, axis=0)
     if np.isnan(certainty):
         raise ValueError("NaN certainty of answer")

{logdetective-0.2.9 → logdetective-0.2.11}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "logdetective"
-version = "0.2.9"
+version = "0.2.11"
 description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
 authors = ["Jiri Podivin <jpodivin@gmail.com>"]
 license = "Apache-2.0"
@@ -31,7 +31,9 @@ requests = ">0.2.31"
 llama-cpp-python = ">0.2.56,!=0.2.86"
 drain3 = "^0.9.11"
 huggingface-hub = ">0.23.2"
-numpy = "^1.26.0"
+# rawhide has numpy 2, F40 and F41 are still on 1.26
+# we need to support both versions
+numpy = ">=1.26.0"
 [build-system]
 requires = ["poetry-core"]

{logdetective-0.2.9 → logdetective-0.2.11}/LICENSE RENAMED Viewed

File without changes

{logdetective-0.2.9 → logdetective-0.2.11}/README.md RENAMED Viewed

File without changes

{logdetective-0.2.9 → logdetective-0.2.11}/logdetective/__init__.py RENAMED Viewed

File without changes

{logdetective-0.2.9 → logdetective-0.2.11}/logdetective/constants.py RENAMED Viewed

File without changes

{logdetective-0.2.9 → logdetective-0.2.11}/logdetective/drain3.ini RENAMED Viewed

File without changes

logdetective 0.2.9__tar.gz → 0.2.11__tar.gz

logdetective 0.2.9tar.gz → 0.2.11tar.gz