PyPI - logdetective - Versions diffs - 2.0.1__py3-none-any.whl → 2.11.0__py3-none-any.whl - Mend

logdetective 2.0.1py3-none-any.whl → 2.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

logdetective/extractors.py +134 -23
logdetective/logdetective.py +39 -23
logdetective/models.py +26 -0
logdetective/prompts-summary-first.yml +0 -2
logdetective/prompts.yml +0 -3
logdetective/server/compressors.py +7 -10
logdetective/server/config.py +3 -2
logdetective/server/database/base.py +31 -26
logdetective/server/database/models/__init__.py +2 -2
logdetective/server/database/models/exceptions.py +4 -0
logdetective/server/database/models/koji.py +47 -30
logdetective/server/database/models/merge_request_jobs.py +205 -186
logdetective/server/database/models/metrics.py +87 -61
logdetective/server/emoji.py +57 -55
logdetective/server/exceptions.py +4 -0
logdetective/server/gitlab.py +18 -11
logdetective/server/llm.py +19 -10
logdetective/server/metric.py +18 -13
logdetective/server/models.py +65 -48
logdetective/server/plot.py +13 -11
logdetective/server/server.py +52 -30
logdetective/server/templates/base_response.html.j2 +59 -0
logdetective/server/templates/gitlab_full_comment.md.j2 +58 -53
logdetective/server/templates/gitlab_short_comment.md.j2 +52 -47
logdetective/server/utils.py +15 -27
logdetective/utils.py +115 -49
{logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/METADATA +95 -21
logdetective-2.11.0.dist-info/RECORD +40 -0
{logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
logdetective-2.0.1.dist-info/RECORD +0 -39
{logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
{logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0

logdetective/server/templates/base_response.html.j2 ADDED Viewed

@@ -0,0 +1,59 @@
+<b>Log Detective report
+    {% if project %}for {{ project }}{% endif %}
+</b>
+<p>
+    Analysis:
+    <br>
+    {{ explanation }}
+    <br>
+    <details>
+        <summary>Analysis details</summary>
+        <ul>
+            {% for snippet in snippets %}
+                <li>
+                    <b>Line {{ snippet.line_number }}:</b> <code>{{ snippet.text }}</code>
+                    {{ snippet.explanation.text }}
+                </li>
+            {% endfor %}
+        </ul>
+    </details>
+</p>
+<br>
+<details>
+    <summary>Help</summary>
+    <p>
+        Log Detective analyses logs using template mining algorithm and LLMs.
+        Providing overview of the primary issue, while highlighting potentially important log lines.
+    </p>
+    <p>
+        Log Detective may offer potential solution to problem.
+        However, it is designed to do so only when sufficiently confident in the response.
+        <br>
+        Even so, as with any LLM based AI system, recommendations of Log Detective must be taken with caution.
+        Before acting on any recommendations of Log Detective, consider following:
+    </p>
+    <ul>
+        <li>
+            <b>Do you understand how does the proposed solution work? And why should it solve your problem?</b>
+        </li>
+        <li>Did Log Detective work with sufficient information to make this suggestion?</li>
+        <li>Are the suggested actions safe in the context of your work?</li>
+        <li>Do results of snippet analysis align with results of final analysis?</li>
+        <li>Are there any potentially adverse outcomes from implementing proposed solution?</li>
+    </ul>
+</details>
+<br>
+{% render_contacts %}
+<details>
+    <summary>Contacts</summary>
+    <p>Don't hesitate to reach out with your questions or suggestions.</p>
+    <ul>
+        <li>
+            <a href="https://github.com/fedora-copr/logdetective">Source</a>
+        </li>
+        <li>
+            <a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a>
+        </li>
+    </ul>
+</details>
+{% endif %}

logdetective/server/templates/gitlab_full_comment.md.j2 CHANGED Viewed

@@ -1,68 +1,73 @@
-The package {{ package }} failed to build, here is a possible explanation why.
+The package {{ package }} build has experienced an issue.
 Please know that the explanation was provided by AI and may be incorrect.
 {% if certainty > 0 %}
-In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
+    In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
 {% endif %}
-{{ explanation }}
+<details open>
+    <summary>Description</summary>
+    {{ explanation }}
+</details>
 <details>
+{#
+    Formatted so that we don't trigger GitLab markdown
+#}
+<summary>Snippets</summary>
 <ul>
-{% for snippet in snippets %}
-<li>
-<b>Line {{ snippet.line_number }}:</b> <code>{{ snippet.text }}</code>
-{{ snippet.explanation.text }}
-</li>
-{% endfor %}
+{% for snippet in snippets -%}
+<li><div><b>Line {{ snippet.line_number }}:</b> <code>{{ snippet.text | e }}</code><br>{{ snippet.explanation.text | e }}</div></li>
+{%- endfor %}
 </ul>
 </details>
 <details>
-  <summary>Logs</summary>
-  <p>
-    Log Detective analyzed the following logs files to provide an explanation:
-  </p>
-  <ul>
-    <li><a href="{{ log_url }}">{{ log_url }}</a></li>
-  </ul>
-  <p>
-    Additional logs are available from:
+    <summary>Logs</summary>
+    <p>Log Detective analyzed the following logs files to provide an explanation:</p>
     <ul>
-    <li><a href="{{ artifacts_url }}">artifacts.zip</a></li>
-  </ul>
-  </p>
-  <p>
-    Please know that these log files are automatically removed after some
-    time, so you might need a backup.
-  </p>
+        <li>
+            <a href="{{ log_url }}">{{ log_url }}</a>
+        </li>
+    </ul>
+    <p>
+        Additional logs are available from:
+        <ul>
+            <li>
+                <a href="{{ artifacts_url }}">artifacts.zip</a>
+            </li>
+        </ul>
+    </p>
+    <p>
+        Please know that these log files are automatically removed after some
+        time, so you might need a backup.
+    </p>
 </details>
 <details>
-  <summary>Help</summary>
-  <p>Don't hesitate to reach out.</p>
-  <ul>
-    <li><a href="https://github.com/fedora-copr/logdetective">Upstream</a></li>
-    <li><a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a></li>
-    <li><a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a></li>
-    <li><a href="https://log-detective.com/documentation">Documentation</a></li>
-  </ul>
+    <summary>Help</summary>
+    <p>Don't hesitate to reach out.</p>
+    <ul>
+        <li>
+            <a href="https://github.com/fedora-copr/logdetective">Upstream</a>
+        </li>
+        <li>
+            <a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a>
+        </li>
+        <li>
+            <a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a>
+        </li>
+        <li>
+            <a href="https://log-detective.com/documentation">Documentation</a>
+        </li>
+    </ul>
 </details>
+<hr>
----
-This comment was created by [Log Detective][log-detective].
-Was the provided feedback accurate and helpful? <br>Please vote with :thumbsup:
-or :thumbsdown: to help us improve.<br>
-<i>If this Log Detective report contains harmful content, please use the
-[Gitlab reporting feature for harmful content](https://docs.gitlab.com/user/report_abuse/)
-and contact the [Log Detective developers](https://github.com/fedora-copr/logdetective/issues).</i>
-[log-detective]: https://log-detective.com/
-[contact]: https://github.com/fedora-copr
+This explanation was provided by AI (<a href="https://logdetective.com">Log Detective</a>).
+Always review AI generated content prior to use.
+Was the provided feedback accurate and helpful?
+<br>
+Please vote with :thumbsup:
+or :thumbsdown: to help us improve.
+<br>
+<i>If this Log Detective report contains harmful content,
+please use the <a href="https://docs.gitlab.com/user/report_abuse/">Gitlab reporting feature for harmful content</a>
+and contact the <a href="https://github.com/fedora-copr/logdetective/issues">Log Detective developers</a>.</i>

logdetective/server/templates/gitlab_short_comment.md.j2 CHANGED Viewed

@@ -1,57 +1,62 @@
-The package {{ package }} failed to build, here is a possible explanation why.
+The package {{ package }} build has experienced an issue.
 Please know that the explanation was provided by AI and may be incorrect.
 {% if certainty > 0 %}
-In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
+    In this case, we are {{ "%.2f" | format(certainty) }}% certain of the response {{ emoji_face }}.
 {% endif %}
-{{ explanation }}
+<details open>
+    <summary>Description</summary>
+    {{ explanation }}
+</details>
 <details>
-  <summary>Logs</summary>
-  <p>
-    Log Detective analyzed the following logs files to provide an explanation:
-  </p>
-  <ul>
-    <li><a href="{{ log_url }}">{{ log_url }}</a></li>
-  </ul>
-  <p>
-    Additional logs are available from:
+    <summary>Logs</summary>
+    <p>Log Detective analyzed the following logs files to provide an explanation:</p>
     <ul>
-    <li><a href="{{ artifacts_url }}">artifacts.zip</a></li>
-  </ul>
-  </p>
-  <p>
-    Please know that these log files are automatically removed after some
-    time, so you might need a backup.
-  </p>
+        <li>
+            <a href="{{ log_url }}">{{ log_url }}</a>
+        </li>
+    </ul>
+    <p>
+        Additional logs are available from:
+        <ul>
+            <li>
+                <a href="{{ artifacts_url }}">artifacts.zip</a>
+            </li>
+        </ul>
+    </p>
+    <p>
+        Please know that these log files are automatically removed after some
+        time, so you might need a backup.
+    </p>
 </details>
 <details>
-  <summary>Help</summary>
-  <p>Don't hesitate to reach out.</p>
-  <ul>
-    <li><a href="https://github.com/fedora-copr/logdetective">Upstream</a></li>
-    <li><a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a></li>
-    <li><a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a></li>
-    <li><a href="https://log-detective.com/documentation">Documentation</a></li>
-  </ul>
+    <summary>Help</summary>
+    <p>Don't hesitate to reach out.</p>
+    <ul>
+        <li>
+            <a href="https://github.com/fedora-copr/logdetective">Upstream</a>
+        </li>
+        <li>
+            <a href="https://github.com/fedora-copr/logdetective/issues">Issue tracker</a>
+        </li>
+        <li>
+            <a href="https://redhat.enterprise.slack.com/archives/C06DWNVKKDE">Slack</a>
+        </li>
+        <li>
+            <a href="https://log-detective.com/documentation">Documentation</a>
+        </li>
+    </ul>
 </details>
----
-This comment was created by [Log Detective][log-detective].
-Was the provided feedback accurate and helpful? <br>Please vote with :thumbsup:
-or :thumbsdown: to help us improve.<br>
-<i>If this Log Detective report contains harmful content, please use the
-[Gitlab reporting feature for harmful content](https://docs.gitlab.com/user/report_abuse/)
-and contact the [Log Detective developers](https://github.com/fedora-copr/logdetective/issues).</i>
-[log-detective]: https://log-detective.com/
-[contact]: https://github.com/fedora-copr
+<hr>
+This explanation was provided by AI (<a href="https://logdetective.com">Log Detective</a>).
+Always review AI generated content prior to use.
+Was the provided feedback accurate and helpful?
+<br>
+Please vote with :thumbsup:
+or :thumbsdown: to help us improve.
+<br>
+<i>If this Log Detective report contains harmful content,
+please use the <a href="https://docs.gitlab.com/user/report_abuse/">Gitlab reporting feature for harmful content</a>
+and contact the <a href="https://github.com/fedora-copr/logdetective/issues">Log Detective developers</a>.</i>

logdetective/server/utils.py CHANGED Viewed

@@ -1,15 +1,11 @@
-from typing import List, Tuple
+from typing import List
+from importlib.metadata import version
 import aiohttp
 from fastapi import HTTPException
 from logdetective.constants import SNIPPET_DELIMITER
-from logdetective.extractors import DrainExtractor
-from logdetective.server.config import (
-    LOG,
-    SERVER_CONFIG,
-    SKIP_SNIPPETS_CONFIG,
-)
+from logdetective.server.config import LOG
 from logdetective.server.exceptions import LogDetectiveConnectionError
 from logdetective.server.models import AnalyzedSnippet, RatedSnippetAnalysis
@@ -22,26 +18,6 @@ def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
     return summary
-def mine_logs(log: str) -> List[Tuple[int, str]]:
-    """Extract snippets from log text"""
-    extractor = DrainExtractor(
-        verbose=True,
-        context=True,
-        max_clusters=SERVER_CONFIG.extractor.max_clusters,
-        skip_snippets=SKIP_SNIPPETS_CONFIG,
-        max_snippet_len=SERVER_CONFIG.extractor.max_snippet_len
-    )
-    LOG.info("Getting summary")
-    log_summary = extractor(log)
-    ratio = len(log_summary) / len(log.split("\n"))
-    LOG.debug("Log summary: \n %s", log_summary)
-    LOG.info("Compression ratio: %s", ratio)
-    return log_summary
 def connection_error_giveup(details: dict) -> None:
     """Too many connection errors, give up.
     """
@@ -120,3 +96,15 @@ def filter_snippets(
     processed_snippets = sorted(processed_snippets, key=select_line_number)
     return processed_snippets
+def construct_final_prompt(formatted_snippets: str, prompt_template: str) -> str:
+    """Create final prompt from processed snippets and csgrep output, if it is available."""
+    final_prompt = prompt_template.format(formatted_snippets)
+    return final_prompt
+def get_version() -> str:
+    """Obtain the version number using importlib"""
+    return version('logdetective')

logdetective/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import os
+import subprocess as sp
 from typing import Iterator, List, Dict, Tuple, Generator
 from urllib.parse import urlparse
@@ -7,60 +8,73 @@ import aiohttp
 import numpy as np
 import yaml
-from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
+from llama_cpp import (
+    Llama,
+    CreateChatCompletionResponse,
+    CreateChatCompletionStreamResponse,
+)
+from logdetective.constants import SNIPPET_DELIMITER
 from logdetective.models import PromptConfig, SkipSnippets
 from logdetective.remote_log import RemoteLog
 LOG = logging.getLogger("logdetective")
-def chunk_continues(text: str, index: int) -> bool:
+def new_message(text: str) -> bool:
     """Set of heuristics for determining whether or not
     does the current chunk of log text continue on next line.
     Following rules are checked, in order:
-    * is the next character is whitespace
-    * is the previous character backslash '\\'
-    * is the previous character colon ':'
+    * is the first character is whitespace
+    * is the first character backslash '|'
     """
     conditionals = [
-        lambda i, string: string[i + 1].isspace(),
-        lambda i, string: string[i - 1] == "\\",
-        lambda i, string: string[i - 1] == ":",
+        lambda string: string[0].isspace(),
+        lambda string: string[0] == "|",
     ]
     for c in conditionals:
-        y = c(index, text)
+        y = c(text)
         if y:
-            return True
+            return False
-    return False
+    return True
-def get_chunks(text: str, max_len: int = 2000) -> Generator[Tuple[int, str], None, None]:
+def get_chunks(
+    text: str, max_chunk_len: int = 2000
+) -> Generator[Tuple[int, str], None, None]:
     """Split log into chunks according to heuristic
     based on whitespace and backslash presence.
     """
-    text_len = len(text)
-    i = 0
+    lines = text.splitlines()
+    # Chunk we will be yielding
     chunk = ""
-    # Keep track of the original and next line number
-    # every `\n` hit increases the next_line_number by one.
-    original_line_number = 0
-    next_line_number = 0
-    while i < text_len:
-        chunk += text[i]
-        if text[i] == "\n":
-            next_line_number += 1
-            if i + 1 < text_len and chunk_continues(text, i) and i + 1 < max_len:
-                i += 1
-                continue
-            yield (original_line_number, chunk)
-            original_line_number = next_line_number + 1
-            chunk = ""
-        i += 1
+    # Number of line where the message started
+    original_line = 0
+    for i, line in enumerate(lines):
+        if len(line) == 0:
+            continue
+        if new_message(line):
+            # Yield chunk if we have it
+            if len(chunk) > 0:
+                yield (original_line, chunk)
+            original_line = i
+            chunk = line
+        else:
+            chunk += "\n" + line
+        if len(chunk) > max_chunk_len:
+            # If the chunk is too long, keep splitting into smaller chunks
+            # until we reach manageable size
+            while len(chunk) > max_chunk_len:
+                remainder = chunk[max_chunk_len:]
+                chunk = chunk[:max_chunk_len]
+                yield (original_line, chunk)
+                chunk = remainder
+    # if we still have some text left over
+    yield (original_line, chunk)
 def initialize_model(
@@ -113,8 +127,8 @@ def compute_certainty(probs: List[Dict]) -> float:
 def process_log(
-    log: str, model: Llama, stream: bool, prompt_template: str, temperature: float
-) -> CreateCompletionResponse | Iterator[CreateCompletionStreamResponse]:
+    log: str, model: Llama, stream: bool, prompt_templates: PromptConfig, temperature: float
+) -> CreateChatCompletionResponse | Iterator[CreateChatCompletionStreamResponse]:
     """Processes a given log using the provided language model and returns its summary.
     Args:
@@ -126,11 +140,23 @@ def process_log(
     Returns:
         str: The summary of the given log generated by the language model.
     """
-    response = model(
-        prompt=prompt_template.format(log),
+    messages = [
+        {
+            "role": "system",
+            "content": prompt_templates.default_system_prompt
+        },
+        {
+            "role": "user",
+            "content": prompt_templates.prompt_template.format(log)
+        },
+    ]
+    response = model.create_chat_completion(
+        messages=messages,
         stream=stream,
         max_tokens=0,
-        logprobs=1,
+        logprobs=True,
+        top_logprobs=1,
         temperature=temperature,
     )
@@ -166,22 +192,21 @@ def format_snippets(snippets: list[str] | list[Tuple[int, str]]) -> str:
     Line number must be first element in the tuple. Mixed format of snippets
     is permitted, but may have impact on inference.
     """
-    summary = ""
+    summary = "\n"
     for i, s in enumerate(snippets):
         if isinstance(s, tuple):
-            summary += f"""
-            Snippet No. {i} at line #{s[0]}:
-            {s[1]}
-            ================
-            """
+            line_number, snippet_content = s
+            header = f"Snippet No. {i} at line #{line_number}:"
         else:
-            summary += f"""
-            Snippet No. {i}:
-            {s}
-            ================
-            """
+            header = f"Snippet No. {i}:"
+            snippet_content = s
+        summary += (
+            f"{header}\n"
+            "\n"
+            f"{snippet_content}\n"
+            f"{SNIPPET_DELIMITER}\n"
+            f"\n"
+        )
     return summary
@@ -247,3 +272,44 @@ def load_skip_snippet_patterns(path: str | None) -> SkipSnippets:
             raise e
     return SkipSnippets({})
+def check_csgrep() -> bool:
+    """Verifies presence of csgrep in path"""
+    try:
+        result = sp.run(
+            ["csgrep", "--version"],
+            text=True,
+            check=True,
+            shell=False,
+            capture_output=True,
+            timeout=1.0,
+        )
+    except (FileNotFoundError, sp.TimeoutExpired, sp.CalledProcessError) as ex:
+        LOG.error("Required binary `csgrep` was not found in path: %s", ex)
+        return False
+    if result.returncode == 0:
+        return True
+    LOG.error("Issue was encountered while calling `csgrep`: `%s`", result.stderr)
+    return False
+def mine_logs(log: str, extractors: list) -> List[Tuple[int, str]]:
+    """Extract snippets from log text using extractors provided.
+    Each extractor is applied in turn on original log.
+    Depending on characteristics of extractors used, there may be
+    an overlap in snippets extracted."""
+    log_summary = []
+    LOG.info("Getting summary")
+    for extractor in extractors:
+        log_summary.extend(extractor(log))
+    ratio = len("\n".join([text for _, text in log_summary])) / len(log)
+    LOG.debug("Log summary: \n %s", log_summary)
+    LOG.info("Snippets: %s Compression ratio: %s", len(log_summary), ratio)
+    return log_summary

logdetective 2.0.1__py3-none-any.whl → 2.11.0__py3-none-any.whl

logdetective 2.0.1py3-none-any.whl → 2.11.0py3-none-any.whl