PyPI - logdetective - Versions diffs - 0.2.10__tar.gz → 0.2.12__tar.gz - Mend

logdetective 0.2.10tar.gz → 0.2.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{logdetective-0.2.10 → logdetective-0.2.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: logdetective
-Version: 0.2.10
+Version: 0.2.12
 Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
 License: Apache-2.0
 Author: Jiri Podivin

{logdetective-0.2.10 → logdetective-0.2.12}/logdetective/logdetective.py RENAMED Viewed

@@ -96,14 +96,20 @@ def main():
     response = process_log(log_summary, model, stream)
     probs = []
     print("Explanation:")
+    # We need to extract top token probability from the response
+    # CreateCompletionResponse structure of llama-cpp-python.
+    # `compute_certainty` function expects list of dictionaries with form
+    # { 'logprob': <float> } as expected from the OpenAI API.
     if args.no_stream:
         print(response["choices"][0]["text"])
-        probs = response["choices"][0]["logprobs"]["top_logprobs"]
+        probs = [{'logprob': e} for e in response['choices'][0]['logprobs']['token_logprobs']]
     else:
         # Stream the output
         for chunk in response:
             if isinstance(chunk["choices"][0]["logprobs"], dict):
-                probs.extend(chunk["choices"][0]["logprobs"]["top_logprobs"])
+                probs.append({'logprob': chunk["choices"][0]["logprobs"]['token_logprobs'][0]})
             delta = chunk['choices'][0]['text']
             print(delta, end='', flush=True)
     certainty = compute_certainty(probs)

{logdetective-0.2.10 → logdetective-0.2.12}/logdetective/server.py RENAMED Viewed

@@ -30,7 +30,7 @@ class Response(BaseModel):
         https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
     response_certainty: float
     """
-    explanation: CreateCompletionResponse
+    explanation: Dict
     response_certainty: float
@@ -44,7 +44,8 @@ class StagedResponse(Response):
     snippets:
         list of dictionaries { 'snippet' : '<original_text>, 'comment': CreateCompletionResponse }
     """
-    snippets: List[Dict[str, str | CreateCompletionResponse]]
+    snippets: List[Dict[str, str | Dict]]
 LOG = logging.getLogger("logdetective")
@@ -113,7 +114,7 @@ def process_url(url: str) -> str:
 def mine_logs(log: str) -> List[str]:
     """Extract snippets from log text
     """
-    extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
+    extractor = DrainExtractor(verbose=True, context=True, max_clusters=16)
     LOG.info("Getting summary")
     log_summary = extractor(log)
@@ -125,7 +126,7 @@ def mine_logs(log: str) -> List[str]:
     return log_summary
-async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream: bool = False,
+async def submit_text(text: str, max_tokens: int = -1, log_probs: int = 1, stream: bool = False,
                       model: str = "default-model"):
     """Submit prompt to LLM.
     max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
@@ -134,8 +135,8 @@ async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream
     LOG.info("Analyzing the text")
     data = {
         "prompt": text,
-        "max_tokens": str(max_tokens),
-        "logprobs": str(log_probs),
+        "max_tokens": max_tokens,
+        "logprobs": log_probs,
         "stream": stream,
         "model": model}
@@ -186,13 +187,13 @@ async def analyze_log(build_log: BuildLog):
     if "logprobs" in response["choices"][0]:
         try:
             certainty = compute_certainty(
-                response["choices"][0]["logprobs"]["top_logprobs"])
+                response["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
         except ValueError as ex:
             LOG.error("Error encountered while computing certainty: %s", ex)
             raise HTTPException(
                 status_code=400,
                 detail=f"Couldn't compute certainty with data:\n"
-                f"{response["choices"][0]["logprobs"]["top_logprobs"]}") from ex
+                f"{response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
     return Response(explanation=response, response_certainty=certainty)
@@ -213,27 +214,27 @@ async def analyze_log_staged(build_log: BuildLog):
         *[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s)) for s in log_summary])
     analyzed_snippets = [
-        {"snippet":e[0], "comment":e[1]} for e in zip(log_summary, analyzed_snippets)]
+        {"snippet": e[0], "comment": e[1]} for e in zip(log_summary, analyzed_snippets)]
     final_prompt = PROMPT_TEMPLATE_STAGED.format(
         f"\n{SNIPPET_DELIMITER}\n".join([
             f"[{e["snippet"]}] : [{e["comment"]["choices"][0]["text"]}]"
-        for e in analyzed_snippets]))
+            for e in analyzed_snippets]))
     final_analysis = await submit_text(final_prompt)
+    print(final_analysis)
     certainty = 0
     if "logprobs" in final_analysis["choices"][0]:
         try:
             certainty = compute_certainty(
-                final_analysis["choices"][0]["logprobs"]["top_logprobs"])
+                final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"])
         except ValueError as ex:
             LOG.error("Error encountered while computing certainty: %s", ex)
             raise HTTPException(
                 status_code=400,
                 detail=f"Couldn't compute certainty with data:\n"
-                f"{final_analysis["choices"][0]["logprobs"]["top_logprobs"]}") from ex
+                f"{final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]}") from ex
     return StagedResponse(
         explanation=final_analysis, snippets=analyzed_snippets, response_certainty=certainty)

{logdetective-0.2.10 → logdetective-0.2.12}/logdetective/utils.py RENAMED Viewed

@@ -15,10 +15,17 @@ LOG = logging.getLogger("logdetective")
 def chunk_continues(text: str, index: int) -> bool:
     """Set of heuristics for determining whether or not
     does the current chunk of log text continue on next line.
+    Following rules are checked, in order:
+    * is the next character is whitespace
+    * is the previous character backslash '\\'
+    * is the previous character colon ':'
     """
     conditionals = [
         lambda i, string: string[i + 1].isspace(),
-        lambda i, string: string[i - 1] == "\\"
+        lambda i, string: string[i - 1] == "\\",
+        lambda i, string: string[i - 1] == ":"
     ]
     for c in conditionals:
@@ -74,7 +81,7 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
     return model
-def compute_certainty(probs: List[Dict[str, float] | None]) -> float:
+def compute_certainty(probs: List[Dict]) -> float:
     """Compute certainty of repsponse based on average logit probability.
     Log probability is log(p), isn't really readable for most people, especially in compound.
     In this case it's just a matter of applying inverse operation exp.
@@ -85,7 +92,8 @@ def compute_certainty(probs: List[Dict[str, float] | None]) -> float:
     """
     top_logprobs = [
-        np.exp(x) * 100 for e in probs if isinstance(e, dict) for x in e.values()]
+        np.exp(e["logprob"]) * 100 for e in probs]
     certainty = np.median(top_logprobs, axis=0)
     if np.isnan(certainty):
         raise ValueError("NaN certainty of answer")

{logdetective-0.2.10 → logdetective-0.2.12}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "logdetective"
-version = "0.2.10"
+version = "0.2.12"
 description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
 authors = ["Jiri Podivin <jpodivin@gmail.com>"]
 license = "Apache-2.0"

{logdetective-0.2.10 → logdetective-0.2.12}/LICENSE RENAMED Viewed

File without changes

{logdetective-0.2.10 → logdetective-0.2.12}/README.md RENAMED Viewed

File without changes

{logdetective-0.2.10 → logdetective-0.2.12}/logdetective/__init__.py RENAMED Viewed

File without changes

{logdetective-0.2.10 → logdetective-0.2.12}/logdetective/constants.py RENAMED Viewed

File without changes

{logdetective-0.2.10 → logdetective-0.2.12}/logdetective/drain3.ini RENAMED Viewed

File without changes

{logdetective-0.2.10 → logdetective-0.2.12}/logdetective/extractors.py RENAMED Viewed

File without changes

logdetective 0.2.10__tar.gz → 0.2.12__tar.gz

logdetective 0.2.10tar.gz → 0.2.12tar.gz