logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. logdetective/constants.py +33 -12
  2. logdetective/extractors.py +137 -68
  3. logdetective/logdetective.py +102 -33
  4. logdetective/models.py +99 -0
  5. logdetective/prompts-summary-first.yml +20 -0
  6. logdetective/prompts-summary-only.yml +13 -0
  7. logdetective/prompts.yml +90 -0
  8. logdetective/remote_log.py +67 -0
  9. logdetective/server/compressors.py +186 -0
  10. logdetective/server/config.py +78 -0
  11. logdetective/server/database/base.py +34 -26
  12. logdetective/server/database/models/__init__.py +33 -0
  13. logdetective/server/database/models/exceptions.py +17 -0
  14. logdetective/server/database/models/koji.py +143 -0
  15. logdetective/server/database/models/merge_request_jobs.py +623 -0
  16. logdetective/server/database/models/metrics.py +427 -0
  17. logdetective/server/emoji.py +148 -0
  18. logdetective/server/exceptions.py +37 -0
  19. logdetective/server/gitlab.py +451 -0
  20. logdetective/server/koji.py +159 -0
  21. logdetective/server/llm.py +309 -0
  22. logdetective/server/metric.py +75 -30
  23. logdetective/server/models.py +426 -23
  24. logdetective/server/plot.py +432 -0
  25. logdetective/server/server.py +580 -468
  26. logdetective/server/templates/base_response.html.j2 +59 -0
  27. logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
  28. logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
  29. logdetective/server/utils.py +98 -32
  30. logdetective/skip_snippets.yml +12 -0
  31. logdetective/utils.py +187 -73
  32. logdetective-2.11.0.dist-info/METADATA +568 -0
  33. logdetective-2.11.0.dist-info/RECORD +40 -0
  34. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
  35. logdetective/server/database/models.py +0 -88
  36. logdetective-0.4.0.dist-info/METADATA +0 -333
  37. logdetective-0.4.0.dist-info/RECORD +0 -19
  38. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
  39. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,90 @@
1
+ # This file is intended for customization of prompts
2
+ # It is used only in server mode.
3
+ # On command line you have to load it using --prompts
4
+ # The defaults are stored in constants.py
5
+
6
+ prompt_template: |
7
+ Given following log snippets, and nothing else, explain what failure, if any, occurred during build of this package.
8
+
9
+ Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
10
+ Snippets themselves must not be altered in any way whatsoever.
11
+
12
+ Snippets are delimited with '================'.
13
+
14
+ Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
15
+
16
+ Explanation of the issue, and recommended solution, should take handful of sentences.
17
+
18
+ Snippets:
19
+
20
+ {}
21
+
22
+
23
+ snippet_prompt_template: |
24
+ Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution
25
+ and provide estimate of snippet relevance.
26
+ Your analysis must be as concise as possible, while keeping relevant information intact.
27
+
28
+ Snippet:
29
+
30
+ {}
31
+
32
+
33
+ prompt_template_staged: |
34
+ Given following log snippets, their explanation, and nothing else, explain what failure, if any, occurred during build of this package.
35
+
36
+ Snippets are in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
37
+
38
+ Snippets are delimited with '================'.
39
+
40
+ Drawing on information from all snippets, provide a concise explanation of the issue and recommend a solution.
41
+
42
+ Explanation of the issue, and recommended solution, should take a handful of sentences.
43
+
44
+ Snippets:
45
+
46
+ {}
47
+
48
+
49
+ # System prompts
50
+ # System prompts are meant to serve as general guide for model behavior,
51
+ # describing role and purpose it is meant to serve.
52
+ # Sample system prompts in this file are intentionally the same,
53
+ # however, in some circumstances it may be beneficial have different
54
+ # system prompts for each sub case. For example when a specialized model is deployed
55
+ # to analyze snippets.
56
+
57
+ # Default prompt is used by the CLI tool and also for final analysis
58
+ # with /analyze and /analyze/stream API endpoints
59
+ default_system_prompt: |
60
+ You are a highly capable large language model based expert system specialized in
61
+ packaging and delivery of software using RPM (RPM Package Manager). Your purpose is to diagnose
62
+ RPM build failures, identifying root causes and proposing solutions if possible.
63
+ You are truthful, concise, and helpful.
64
+
65
+ You never speculate about package being built or fabricate information.
66
+ If you do not know the answer, you acknowledge the fact and end your response.
67
+ Your responses must be as short as possible.
68
+
69
+ # Snippet system prompt is used for analysis of individual snippets
70
+ snippet_system_prompt: |
71
+ You are a highly capable large language model based expert system specialized in
72
+ packaging and delivery of software using RPM (RPM Package Manager). Your purpose is to diagnose
73
+ RPM build failures, identifying root causes and proposing solutions if possible.
74
+ You are truthful, concise, and helpful.
75
+
76
+ You never speculate about package being built or fabricate information.
77
+ If you do not know the answer, you acknowledge the fact and end your response.
78
+ Your responses must be as short as possible.
79
+
80
+
81
+ # Staged system prompt is used by /analyze/staged API endpoint
82
+ staged_system_prompt: |
83
+ You are a highly capable large language model based expert system specialized in
84
+ packaging and delivery of software using RPM (RPM Package Manager). Your purpose is to diagnose
85
+ RPM build failures, identifying root causes and proposing solutions if possible.
86
+ You are truthful, concise, and helpful.
87
+
88
+ You never speculate about package being built or fabricate information.
89
+ If you do not know the answer, you acknowledge the fact and end your response.
90
+ Your responses must be as short as possible.
@@ -0,0 +1,67 @@
1
+ import logging
2
+ from urllib.parse import urlparse
3
+
4
+ import aiohttp
5
+ from aiohttp.web import HTTPBadRequest
6
+
7
+ LOG = logging.getLogger("logdetective")
8
+
9
+
10
+ class RemoteLog:
11
+ """
12
+ Handles retrieval of remote log files.
13
+ """
14
+
15
+ def __init__(self, url: str, http_session: aiohttp.ClientSession):
16
+ """
17
+ Initialize with a remote log URL and HTTP session.
18
+
19
+ Args:
20
+ url: A remote URL pointing to a log file
21
+ http_session: The HTTP session used to retrieve the remote file
22
+ """
23
+ self._url = url
24
+ self._http_session = http_session
25
+
26
+ @property
27
+ def url(self) -> str:
28
+ """The remote log url."""
29
+ return self._url
30
+
31
+ @property
32
+ async def content(self) -> str:
33
+ """Content of the url."""
34
+ return await self.get_url_content()
35
+
36
+ def validate_url(self) -> bool:
37
+ """Validate incoming URL to be at least somewhat sensible for log files
38
+ Only http and https protocols permitted. No result, params or query fields allowed.
39
+ Either netloc or path must have non-zero length.
40
+ """
41
+ result = urlparse(self.url)
42
+ if result.scheme not in ["http", "https"]:
43
+ return False
44
+ if any([result.params, result.query, result.fragment]):
45
+ return False
46
+ if not (result.path or result.netloc):
47
+ return False
48
+ return True
49
+
50
+ async def get_url_content(self) -> str:
51
+ """validate log url and return log text."""
52
+ if self.validate_url():
53
+ LOG.debug("process url %s", self.url)
54
+ try:
55
+ response = await self._http_session.get(self.url, raise_for_status=True)
56
+ except (aiohttp.ClientResponseError, aiohttp.ClientConnectorError) as ex:
57
+ raise RuntimeError(f"We couldn't obtain the logs: {ex}") from ex
58
+ return await response.text()
59
+ LOG.error("Invalid URL received ")
60
+ raise RuntimeError(f"Invalid log URL: {self.url}")
61
+
62
+ async def process_url(self) -> str:
63
+ """Validate log URL and return log text."""
64
+ try:
65
+ return await self.get_url_content()
66
+ except RuntimeError as ex:
67
+ raise HTTPBadRequest(reason=f"We couldn't obtain the logs: {ex}") from ex
@@ -0,0 +1,186 @@
1
+ import io
2
+ import zipfile
3
+
4
+ from typing import Union, Dict
5
+ from logdetective.remote_log import RemoteLog
6
+ from logdetective.server.models import (
7
+ StagedResponse,
8
+ Response,
9
+ AnalyzedSnippet,
10
+ Explanation,
11
+ )
12
+
13
+
14
+ class TextCompressor:
15
+ """
16
+ Encapsulates one or more texts in one or more files with the specified names
17
+ and provides methods to retrieve them later.
18
+ """
19
+
20
+ def zip(self, items: Dict[str, str]) -> bytes:
21
+ """
22
+ Compress multiple texts into different files within a zip archive.
23
+
24
+ Args:
25
+ items: Dictionary where keys are file names and values are text content
26
+ to be compressed
27
+
28
+ Returns:
29
+ bytes: The compressed zip archive as bytes
30
+ """
31
+ zip_buffer = io.BytesIO()
32
+ with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
33
+ for key, value in items.items():
34
+ zip_file.writestr(key, value)
35
+
36
+ zip_buffer.seek(0)
37
+ return zip_buffer.getvalue()
38
+
39
+ def unzip(self, zip_data: bytes) -> Dict[str, str]:
40
+ """
41
+ Uncompress data created by TextCompressor.zip().
42
+
43
+ Args:
44
+ zip_data: A zipped stream of bytes
45
+
46
+ Returns:
47
+ {file_name: str}: The decompressed content as a dict of file names and UTF-8 strings
48
+ """
49
+ zip_buffer = io.BytesIO(zip_data)
50
+
51
+ content = {}
52
+ with zipfile.ZipFile(zip_buffer, "r") as zip_file:
53
+ file_list = zip_file.namelist()
54
+ for file_name in file_list:
55
+ content[file_name] = zip_file.read(file_name).decode("utf-8")
56
+
57
+ return content
58
+
59
+
60
+ class RemoteLogCompressor:
61
+ """
62
+ Handles compression of remote log files.
63
+ """
64
+
65
+ LOG_FILE_NAME = "log.txt"
66
+ COMPRESSOR = TextCompressor()
67
+
68
+ def __init__(self, remote_log: RemoteLog):
69
+ """
70
+ Initialize with a RemoteLog object.
71
+ """
72
+ self._remote_log = remote_log
73
+
74
+ @classmethod
75
+ def zip_text(cls, text: str) -> bytes:
76
+ """
77
+ Compress the given text.
78
+
79
+ Returns:
80
+ bytes: Compressed text
81
+ """
82
+ return cls.COMPRESSOR.zip({cls.LOG_FILE_NAME: text})
83
+
84
+ async def zip_content(self) -> bytes:
85
+ """
86
+ Compress the content of the remote log.
87
+
88
+ Returns:
89
+ bytes: Compressed log content
90
+ """
91
+ content_text = await self._remote_log.content
92
+ return self.zip_text(content_text)
93
+
94
+ @classmethod
95
+ def unzip(cls, zip_data: bytes) -> str:
96
+ """
97
+ Uncompress the zipped content of the remote log.
98
+
99
+ Args:
100
+ zip_data: Compressed data as bytes
101
+
102
+ Returns:
103
+ str: The decompressed log content
104
+ """
105
+ return cls.COMPRESSOR.unzip(zip_data)[cls.LOG_FILE_NAME]
106
+
107
+
108
+ class LLMResponseCompressor:
109
+ """
110
+ Handles compression and decompression of LLM responses.
111
+ """
112
+
113
+ EXPLANATION_FILE_NAME = "explanation.txt"
114
+ SNIPPET_FILE_NAME = "snippet_{number}.txt"
115
+ COMPRESSOR = TextCompressor()
116
+
117
+ def __init__(self, response: Union[StagedResponse, Response]):
118
+ """
119
+ Initialize with an LLM response.
120
+
121
+ Args:
122
+ response: Either a StagedResponse or Response object
123
+ """
124
+ self._response = response
125
+
126
+ def zip_response(self) -> bytes:
127
+ """
128
+ Compress the content of the LLM response.
129
+
130
+ Returns:
131
+ bytes: Compressed response as bytes
132
+ """
133
+ items = {
134
+ self.EXPLANATION_FILE_NAME: self._response.explanation.model_dump_json()
135
+ }
136
+
137
+ if isinstance(self._response, StagedResponse):
138
+ for i, snippet in enumerate(self._response.snippets):
139
+ items[self.SNIPPET_FILE_NAME.format(number=i)] = (
140
+ snippet.model_dump_json()
141
+ )
142
+
143
+ return self.COMPRESSOR.zip(items)
144
+
145
+ @classmethod
146
+ def unzip(
147
+ cls, zip_data: bytes
148
+ ) -> Union[StagedResponse, Response]:
149
+ """
150
+ Uncompress the zipped content of the LLM response.
151
+
152
+ Args:
153
+ zip_data: Compressed data as bytes
154
+
155
+ Returns:
156
+ Union[StagedResponse, Response]: The decompressed (partial) response object,
157
+ missing response_certainty.
158
+ """
159
+ items = cls.COMPRESSOR.unzip(zip_data)
160
+ if cls.EXPLANATION_FILE_NAME not in items:
161
+ raise KeyError(
162
+ f"Required file {cls.EXPLANATION_FILE_NAME} not found in zip archive"
163
+ )
164
+ explanation = Explanation.model_validate_json(items[cls.EXPLANATION_FILE_NAME])
165
+
166
+ snippets = []
167
+ snippet_files = {
168
+ k: v
169
+ for k, v in items.items()
170
+ if cls.SNIPPET_FILE_NAME.replace("{number}.txt", "") in k
171
+ }
172
+ for i in range(len(snippet_files)):
173
+ snippets.append(
174
+ AnalyzedSnippet.model_validate_json(
175
+ items[cls.SNIPPET_FILE_NAME.format(number=i)]
176
+ )
177
+ )
178
+
179
+ if snippets:
180
+ response = StagedResponse(
181
+ explanation=explanation, snippets=snippets, response_certainty=0
182
+ )
183
+ else:
184
+ response = Response(explanation=explanation, response_certainty=0)
185
+
186
+ return response
@@ -0,0 +1,78 @@
1
+ import os
2
+ import logging
3
+ import yaml
4
+ from openai import AsyncOpenAI
5
+
6
+ from logdetective.utils import load_prompts, load_skip_snippet_patterns
7
+ from logdetective.server.models import Config, InferenceConfig
8
+ import logdetective
9
+
10
+
11
+ def load_server_config(path: str | None) -> Config:
12
+ """Load configuration file for logdetective server.
13
+ If no path was provided, or if the file doesn't exist, return defaults.
14
+ """
15
+ if path is not None:
16
+ try:
17
+ with open(path, "r") as config_file:
18
+ return Config(yaml.safe_load(config_file))
19
+ except FileNotFoundError:
20
+ # This is not an error, we will fall back to default
21
+ print("Unable to find server config file, using default then.")
22
+ return Config()
23
+
24
+
25
+ def get_log(config: Config):
26
+ """
27
+ Initialize a logger for this server
28
+ """
29
+ log = logging.getLogger(config.log.name)
30
+ if getattr(log, "initialized", False):
31
+ return log
32
+
33
+ log.setLevel("DEBUG")
34
+
35
+ # Drop the default handler, we will create it ourselves
36
+ log.handlers = []
37
+
38
+ # STDOUT
39
+ stream_handler = logging.StreamHandler()
40
+ stream_handler.setFormatter(logging.Formatter(config.log.format))
41
+ stream_handler.setLevel(config.log.level_stream)
42
+ log.addHandler(stream_handler)
43
+
44
+ # Log to file
45
+ if config.log.path:
46
+ file_handler = logging.FileHandler(config.log.path)
47
+ file_handler.setFormatter(logging.Formatter(config.log.format))
48
+ file_handler.setLevel(config.log.level_file)
49
+ log.addHandler(file_handler)
50
+
51
+ log.initialized = True
52
+ return log
53
+
54
+
55
+ def get_openai_api_client(inference_config: InferenceConfig):
56
+ """Set up AsyncOpenAI client with default configuration."""
57
+ return AsyncOpenAI(
58
+ api_key=inference_config.api_token, base_url=inference_config.url,
59
+ timeout=inference_config.llm_api_timeout
60
+ )
61
+
62
+
63
+ SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
64
+ SERVER_PROMPT_PATH = os.environ.get("LOGDETECTIVE_PROMPTS", None)
65
+ # The default location for skip patterns is in the same directory
66
+ # as logdetective __init__.py file.
67
+ SERVER_SKIP_PATTERNS_PATH = os.environ.get(
68
+ "LOGDETECIVE_SKIP_PATTERNS",
69
+ f"{os.path.dirname(logdetective.__file__)}/skip_snippets.yml",
70
+ )
71
+
72
+ SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
73
+ PROMPT_CONFIG = load_prompts(SERVER_PROMPT_PATH)
74
+ SKIP_SNIPPETS_CONFIG = load_skip_snippet_patterns(SERVER_SKIP_PATTERNS_PATH)
75
+
76
+ LOG = get_log(SERVER_CONFIG)
77
+
78
+ CLIENT = get_openai_api_client(SERVER_CONFIG.inference)
@@ -1,15 +1,14 @@
1
1
  from os import getenv
2
- from contextlib import contextmanager
3
- from sqlalchemy import create_engine
4
- from sqlalchemy.orm import sessionmaker, declarative_base
5
-
2
+ from contextlib import asynccontextmanager
3
+ from sqlalchemy.orm import DeclarativeBase
4
+ from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker
6
5
  from logdetective import logger
7
6
 
8
7
 
9
8
  def get_pg_url() -> str:
10
9
  """create postgresql connection string"""
11
10
  return (
12
- f"postgresql+psycopg2://{getenv('POSTGRESQL_USER')}"
11
+ f"postgresql+asyncpg://{getenv('POSTGRESQL_USER')}"
13
12
  f":{getenv('POSTGRESQL_PASSWORD')}@{getenv('POSTGRESQL_HOST', 'postgres')}"
14
13
  f":{getenv('POSTGRESQL_PORT', '5432')}/{getenv('POSTGRESQL_DATABASE')}"
15
14
  )
@@ -23,13 +22,16 @@ sqlalchemy_echo = getenv("SQLALCHEMY_ECHO", "False").lower() in (
23
22
  "y",
24
23
  "1",
25
24
  )
26
- engine = create_engine(get_pg_url(), echo=sqlalchemy_echo)
27
- SessionFactory = sessionmaker(autoflush=True, bind=engine)
28
- Base = declarative_base()
25
+ engine = create_async_engine(get_pg_url(), echo=sqlalchemy_echo)
26
+ SessionFactory = async_sessionmaker(autoflush=True, bind=engine) # pylint: disable=invalid-name
27
+
29
28
 
29
+ class Base(DeclarativeBase):
30
+ """Declarative base class for all ORM models."""
30
31
 
31
- @contextmanager
32
- def transaction(commit: bool = False):
32
+
33
+ @asynccontextmanager
34
+ async def transaction(commit: bool = False):
33
35
  """
34
36
  Context manager for 'framing' a db transaction.
35
37
 
@@ -39,25 +41,31 @@ def transaction(commit: bool = False):
39
41
  """
40
42
 
41
43
  session = SessionFactory()
42
- try:
43
- yield session
44
- if commit:
45
- session.commit()
46
- except Exception as ex:
47
- logger.warning("Exception while working with database: %s", str(ex))
48
- session.rollback()
49
- raise
50
- finally:
51
- session.close()
52
-
53
-
54
- def init():
44
+ async with session:
45
+ try:
46
+ yield session
47
+ if commit:
48
+ await session.commit()
49
+ except Exception as ex:
50
+ logger.warning("Exception while working with database: %s", str(ex))
51
+ await session.rollback()
52
+ raise
53
+ finally:
54
+ await session.close()
55
+
56
+
57
+ async def init():
55
58
  """Init db"""
56
- Base.metadata.create_all(engine)
59
+ async with engine.begin() as conn:
60
+ await conn.run_sync(Base.metadata.create_all)
57
61
  logger.debug("Database initialized")
58
62
 
59
63
 
60
- def destroy():
64
+ async def destroy():
61
65
  """Destroy db"""
62
- Base.metadata.drop_all(engine)
66
+ async with engine.begin() as conn:
67
+ await conn.run_sync(Base.metadata.drop_all)
63
68
  logger.warning("Database cleaned")
69
+
70
+
71
+ DB_MAX_RETRIES = 3 # How many times retry a db operation
@@ -0,0 +1,33 @@
1
+ from logdetective.server.database.models.merge_request_jobs import (
2
+ Forge,
3
+ GitlabMergeRequestJobs,
4
+ Comments,
5
+ Reactions,
6
+ )
7
+ from logdetective.server.database.models.koji import (
8
+ KojiTaskAnalysis,
9
+ )
10
+ from logdetective.server.database.models.metrics import (
11
+ AnalyzeRequestMetrics,
12
+ EndpointType,
13
+ )
14
+ from logdetective.server.database.models.exceptions import (
15
+ KojiTaskNotFoundError,
16
+ KojiTaskNotAnalyzedError,
17
+ KojiTaskAnalysisTimeoutError,
18
+ )
19
+
20
+ # pylint: disable=undefined-all-variable
21
+
22
+ __all__ = [
23
+ GitlabMergeRequestJobs.__name__,
24
+ Comments.__name__,
25
+ Reactions.__name__,
26
+ AnalyzeRequestMetrics.__name__,
27
+ EndpointType.__name__,
28
+ Forge.__name__,
29
+ KojiTaskAnalysis.__name__,
30
+ KojiTaskNotFoundError.__name__,
31
+ KojiTaskNotAnalyzedError.__name__,
32
+ KojiTaskAnalysisTimeoutError.__name__,
33
+ ]
@@ -0,0 +1,17 @@
1
+ """Database model exceptions for logdetective."""
2
+
3
+
4
+ class KojiTaskNotFoundError(Exception):
5
+ """Exception raised when a koji task is not found"""
6
+
7
+
8
+ class KojiTaskNotAnalyzedError(Exception):
9
+ """Exception raised when a koji task analysis is still in progress"""
10
+
11
+
12
+ class KojiTaskAnalysisTimeoutError(Exception):
13
+ """Exception raised when a koji task analysis has timed out"""
14
+
15
+
16
+ class AnalyzeRequestMetricsNotFroundError(Exception):
17
+ """Exception raised when AnalyzeRequestMetrics is not found"""