logdetective 1.1.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {logdetective-1.1.0 → logdetective-1.3.0}/PKG-INFO +17 -3
  2. {logdetective-1.1.0 → logdetective-1.3.0}/README.md +14 -1
  3. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/remote_log.py +3 -3
  4. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/config.py +13 -1
  5. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/emoji.py +3 -1
  6. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/gitlab.py +7 -5
  7. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/llm.py +30 -87
  8. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/models.py +4 -2
  9. {logdetective-1.1.0 → logdetective-1.3.0}/pyproject.toml +5 -4
  10. {logdetective-1.1.0 → logdetective-1.3.0}/LICENSE +0 -0
  11. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/__init__.py +0 -0
  12. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/constants.py +0 -0
  13. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/drain3.ini +0 -0
  14. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/extractors.py +0 -0
  15. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/logdetective.py +0 -0
  16. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/models.py +0 -0
  17. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/prompts-summary-first.yml +0 -0
  18. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/prompts-summary-only.yml +0 -0
  19. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/prompts.yml +0 -0
  20. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/__init__.py +0 -0
  21. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/compressors.py +0 -0
  22. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/__init__.py +0 -0
  23. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/base.py +0 -0
  24. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/models/__init__.py +0 -0
  25. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/models/merge_request_jobs.py +0 -0
  26. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/database/models/metrics.py +0 -0
  27. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/metric.py +0 -0
  28. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/plot.py +0 -0
  29. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/server.py +0 -0
  30. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/templates/gitlab_full_comment.md.j2 +0 -0
  31. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/server/templates/gitlab_short_comment.md.j2 +0 -0
  32. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective/utils.py +0 -0
  33. {logdetective-1.1.0 → logdetective-1.3.0}/logdetective.1.asciidoc +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: logdetective
3
- Version: 1.1.0
3
+ Version: 1.3.0
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -30,12 +30,13 @@ Requires-Dist: huggingface-hub (>0.23.2)
30
30
  Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
31
31
  Requires-Dist: matplotlib (>=3.8.4,<4.0.0) ; extra == "server" or extra == "server-testing"
32
32
  Requires-Dist: numpy (>=1.26.0)
33
+ Requires-Dist: openai (>=1.82.1,<2.0.0) ; extra == "server" or extra == "server-testing"
33
34
  Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "server"
34
35
  Requires-Dist: psycopg2-binary (>=2.9.9,<3.0.0) ; extra == "server-testing"
35
36
  Requires-Dist: pydantic (>=2.8.2,<3.0.0)
36
37
  Requires-Dist: python-gitlab (>=4.4.0)
37
38
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
38
- Requires-Dist: sentry-sdk[fastapi] (>=2.17.0,<3.0.0)
39
+ Requires-Dist: sentry-sdk[fastapi] (>=2.17.0,<3.0.0) ; extra == "server" or extra == "server-testing"
39
40
  Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server" or extra == "server-testing"
40
41
  Project-URL: homepage, https://github.com/fedora-copr/logdetective
41
42
  Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
@@ -204,6 +205,9 @@ Make changes to the code as needed and run pre-commit.
204
205
  Tests
205
206
  -----
206
207
 
208
+ Tests for code used by server must placed in the `./tests/server/` path, while tests for general
209
+ code must be in the `./tests/base/` path.
210
+
207
211
  The [tox](https://github.com/tox-dev/tox) is used to manage tests. Please install `tox` package into your distribution and run:
208
212
 
209
213
  tox
@@ -218,7 +222,17 @@ or
218
222
 
219
223
  tox run -e lint # to run pylint
220
224
 
221
- To run full test suite you will need postgresql client utilities.
225
+ Tox environments for base and server tests are separate, each installs different dependencies.
226
+
227
+ Running base tests:
228
+
229
+ tox run -e pytest_base
230
+
231
+ Running server tests:
232
+
233
+ tox run -e pytest_server
234
+
235
+ To run server test suite you will need postgresql client utilities.
222
236
 
223
237
  dnf install postgresql
224
238
 
@@ -161,6 +161,9 @@ Make changes to the code as needed and run pre-commit.
161
161
  Tests
162
162
  -----
163
163
 
164
+ Tests for code used by server must placed in the `./tests/server/` path, while tests for general
165
+ code must be in the `./tests/base/` path.
166
+
164
167
  The [tox](https://github.com/tox-dev/tox) is used to manage tests. Please install `tox` package into your distribution and run:
165
168
 
166
169
  tox
@@ -175,7 +178,17 @@ or
175
178
 
176
179
  tox run -e lint # to run pylint
177
180
 
178
- To run full test suite you will need postgresql client utilities.
181
+ Tox environments for base and server tests are separate, each installs different dependencies.
182
+
183
+ Running base tests:
184
+
185
+ tox run -e pytest_base
186
+
187
+ Running server tests:
188
+
189
+ tox run -e pytest_server
190
+
191
+ To run server test suite you will need postgresql client utilities.
179
192
 
180
193
  dnf install postgresql
181
194
 
@@ -2,7 +2,7 @@ import logging
2
2
  from urllib.parse import urlparse
3
3
 
4
4
  import aiohttp
5
- from fastapi import HTTPException
5
+ from aiohttp.web import HTTPBadRequest
6
6
 
7
7
  LOG = logging.getLogger("logdetective")
8
8
 
@@ -64,6 +64,6 @@ class RemoteLog:
64
64
  try:
65
65
  return await self.get_url_content()
66
66
  except RuntimeError as ex:
67
- raise HTTPException(
68
- status_code=400, detail=f"We couldn't obtain the logs: {ex}"
67
+ raise HTTPBadRequest(
68
+ reason=f"We couldn't obtain the logs: {ex}"
69
69
  ) from ex
@@ -1,8 +1,10 @@
1
1
  import os
2
2
  import logging
3
3
  import yaml
4
+ from openai import AsyncOpenAI
5
+
4
6
  from logdetective.utils import load_prompts
5
- from logdetective.server.models import Config
7
+ from logdetective.server.models import Config, InferenceConfig
6
8
 
7
9
 
8
10
  def load_server_config(path: str | None) -> Config:
@@ -49,6 +51,14 @@ def get_log(config: Config):
49
51
  return log
50
52
 
51
53
 
54
+ def get_openai_api_client(ineference_config: InferenceConfig):
55
+ """Set up AsyncOpenAI client with default configuration.
56
+ """
57
+ return AsyncOpenAI(
58
+ api_key=ineference_config.api_token,
59
+ base_url=ineference_config.url)
60
+
61
+
52
62
  SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
53
63
  SERVER_PROMPT_PATH = os.environ.get("LOGDETECTIVE_PROMPTS", None)
54
64
 
@@ -56,3 +66,5 @@ SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
56
66
  PROMPT_CONFIG = load_prompts(SERVER_PROMPT_PATH)
57
67
 
58
68
  LOG = get_log(SERVER_CONFIG)
69
+
70
+ CLIENT = get_openai_api_client(SERVER_CONFIG.inference)
@@ -44,7 +44,7 @@ async def _handle_gitlab_operation(func: Callable, *args):
44
44
  """
45
45
  try:
46
46
  return await asyncio.to_thread(func, *args)
47
- except gitlab.GitlabError as e:
47
+ except (gitlab.GitlabError, gitlab.GitlabGetError) as e:
48
48
  log_msg = f"Error during GitLab operation {func}{args}: {e}"
49
49
  if "Not Found" in str(e):
50
50
  LOG.error(log_msg)
@@ -64,6 +64,8 @@ async def collect_emojis_in_comments( # pylint: disable=too-many-locals
64
64
  mrs = {}
65
65
  for comment in comments:
66
66
  mr_job_db = GitlabMergeRequestJobs.get_by_id(comment.merge_request_job_id)
67
+ if not mr_job_db:
68
+ continue
67
69
  if mr_job_db.id not in projects:
68
70
  projects[mr_job_db.id] = project = await _handle_gitlab_operation(
69
71
  gitlab_conn.projects.get, mr_job_db.project_id
@@ -193,9 +193,12 @@ async def retrieve_and_preprocess_koji_logs(
193
193
  # may be presented only at the top level.
194
194
  # The paths look like `kojilogs/noarch-XXXXXX/task_failed.log`
195
195
  # or `kojilogs/noarch-XXXXXX/x86_64-XXXXXX/task_failed.log`
196
+ # We prefix "toplevel" with '~' so that later when we sort the
197
+ # keys to see if there are any unrecognized arches, it will always
198
+ # sort last.
196
199
  path = PurePath(zipinfo.filename)
197
200
  if len(path.parts) <= 3:
198
- failed_arches["toplevel"] = path
201
+ failed_arches["~toplevel"] = path
199
202
  continue
200
203
 
201
204
  # Extract the architecture from the immediate parent path
@@ -246,12 +249,11 @@ async def retrieve_and_preprocess_koji_logs(
246
249
  elif "noarch" in failed_arches:
247
250
  # May have failed during BuildSRPMFromSCM phase
248
251
  failed_arch = "noarch"
249
- elif "toplevel" in failed_arches:
250
- # Probably a Koji-specific error, not a build error
251
- failed_arch = "toplevel"
252
252
  else:
253
253
  # We have one or more architectures that we don't know about? Just
254
- # pick the first alphabetically.
254
+ # pick the first alphabetically. If the issue was a Koji error
255
+ # rather than a build failure, this will fall back to ~toplevel as
256
+ # the lowest-sorting possibility.
255
257
  failed_arch = sorted(list(failed_arches.keys()))[0]
256
258
 
257
259
  LOG.debug("Failed architecture: %s", failed_arch)
@@ -1,21 +1,21 @@
1
1
  import os
2
2
  import asyncio
3
- import json
4
3
  import random
5
- from typing import List, Tuple, Dict, Any, Union
4
+ from typing import List, Tuple, Union
6
5
 
7
6
  import backoff
8
- from aiohttp import StreamReader
9
7
  from fastapi import HTTPException
10
8
 
11
9
  import aiohttp
10
+ from openai import AsyncStream
11
+ from openai.types.chat import ChatCompletionChunk
12
12
 
13
13
  from logdetective.constants import SNIPPET_DELIMITER
14
14
  from logdetective.extractors import DrainExtractor
15
15
  from logdetective.utils import (
16
16
  compute_certainty,
17
17
  )
18
- from logdetective.server.config import LOG, SERVER_CONFIG, PROMPT_CONFIG
18
+ from logdetective.server.config import LOG, SERVER_CONFIG, PROMPT_CONFIG, CLIENT
19
19
  from logdetective.server.models import (
20
20
  AnalyzedSnippet,
21
21
  InferenceConfig,
@@ -54,59 +54,6 @@ def mine_logs(log: str) -> List[Tuple[int, str]]:
54
54
  return log_summary
55
55
 
56
56
 
57
- async def submit_to_llm_endpoint(
58
- url_path: str,
59
- data: Dict[str, Any],
60
- headers: Dict[str, str],
61
- stream: bool,
62
- inference_cfg: InferenceConfig = SERVER_CONFIG.inference,
63
- ) -> Any:
64
- """Send request to an API endpoint. Verifying successful request unless
65
- the using the stream response.
66
-
67
- url_path: The endpoint path to query. (e.g. "/v1/chat/completions"). It should
68
- not include the scheme and netloc of the URL, which is stored in the
69
- InferenceConfig.
70
- data:
71
- headers:
72
- stream:
73
- inference_cfg: An InferenceConfig object containing the URL, max_tokens
74
- and other relevant configuration for talking to an inference server.
75
- """
76
- async with inference_cfg.get_limiter():
77
- LOG.debug("async request %s headers=%s data=%s", url_path, headers, data)
78
- session = inference_cfg.get_http_session()
79
-
80
- if inference_cfg.api_token:
81
- headers["Authorization"] = f"Bearer {inference_cfg.api_token}"
82
-
83
- response = await session.post(
84
- url_path,
85
- headers=headers,
86
- # we need to use the `json=` parameter here and let aiohttp
87
- # handle the json-encoding
88
- json=data,
89
- timeout=int(LLM_CPP_SERVER_TIMEOUT),
90
- # Docs says chunked takes int, but:
91
- # DeprecationWarning: Chunk size is deprecated #1615
92
- # So let's make sure we either put True or None here
93
- chunked=True if stream else None,
94
- raise_for_status=True,
95
- )
96
- if stream:
97
- return response
98
- try:
99
- return json.loads(await response.text())
100
- except UnicodeDecodeError as ex:
101
- LOG.error(
102
- "Error encountered while parsing llama server response: %s", ex
103
- )
104
- raise HTTPException(
105
- status_code=400,
106
- detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
107
- ) from ex
108
-
109
-
110
57
  def should_we_giveup(exc: aiohttp.ClientResponseError) -> bool:
111
58
  """
112
59
  From backoff's docs:
@@ -141,7 +88,7 @@ async def submit_text(
141
88
  text: str,
142
89
  inference_cfg: InferenceConfig,
143
90
  stream: bool = False,
144
- ) -> Union[Explanation, StreamReader]:
91
+ ) -> Union[Explanation, AsyncStream[ChatCompletionChunk]]:
145
92
  """Submit prompt to LLM.
146
93
  inference_cfg: The configuration section from the config.json representing
147
94
  the relevant inference server for this request.
@@ -149,40 +96,36 @@ async def submit_text(
149
96
  """
150
97
  LOG.info("Analyzing the text")
151
98
 
152
- headers = {"Content-Type": "application/json"}
153
-
154
- if SERVER_CONFIG.inference.api_token:
155
- headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
156
-
157
99
  LOG.info("Submitting to /v1/chat/completions endpoint")
158
100
 
159
- data = {
160
- "messages": [
161
- {
162
- "role": "user",
163
- "content": text,
164
- }
165
- ],
166
- "max_tokens": inference_cfg.max_tokens,
167
- "logprobs": inference_cfg.log_probs,
168
- "stream": stream,
169
- "model": inference_cfg.model,
170
- "temperature": inference_cfg.temperature,
171
- }
172
-
173
- response = await submit_to_llm_endpoint(
174
- "/v1/chat/completions",
175
- data,
176
- headers,
177
- inference_cfg=inference_cfg,
178
- stream=stream,
179
- )
101
+ async with inference_cfg.get_limiter():
102
+ response = await CLIENT.chat.completions.create(
103
+ messages=[
104
+ {
105
+ "role": "user",
106
+ "content": text,
107
+ }
108
+ ],
109
+ max_tokens=inference_cfg.max_tokens,
110
+ logprobs=inference_cfg.log_probs,
111
+ stream=stream,
112
+ model=inference_cfg.model,
113
+ temperature=inference_cfg.temperature,
114
+ )
180
115
 
181
- if stream:
116
+ if isinstance(response, AsyncStream):
182
117
  return response
118
+ if not response.choices[0].message.content:
119
+ LOG.error("No response content recieved from %s", inference_cfg.url)
120
+ raise RuntimeError()
121
+ if response.choices[0].logprobs and response.choices[0].logprobs.content:
122
+ logprobs = [e.to_dict() for e in response.choices[0].logprobs.content]
123
+ else:
124
+ logprobs = None
125
+
183
126
  return Explanation(
184
- text=response["choices"][0]["message"]["content"],
185
- logprobs=response["choices"][0]["logprobs"]["content"],
127
+ text=response.choices[0].message.content,
128
+ logprobs=logprobs,
186
129
  )
187
130
 
188
131
 
@@ -136,7 +136,9 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
136
136
  max_tokens: int = -1
137
137
  log_probs: bool = True
138
138
  url: str = ""
139
- api_token: str = ""
139
+ # OpenAI client library requires a string to be specified for API token
140
+ # even if it is not checked on the server side
141
+ api_token: str = "None"
140
142
  model: str = ""
141
143
  temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
142
144
  max_queue_size: int = LLM_DEFAULT_MAX_QUEUE_SIZE
@@ -153,7 +155,7 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
153
155
  self.log_probs = data.get("log_probs", True)
154
156
  self.url = data.get("url", "")
155
157
  self.http_timeout = data.get("http_timeout", 5.0)
156
- self.api_token = data.get("api_token", "")
158
+ self.api_token = data.get("api_token", "None")
157
159
  self.model = data.get("model", "default-model")
158
160
  self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
159
161
  self.max_queue_size = data.get("max_queue_size", LLM_DEFAULT_MAX_QUEUE_SIZE)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "logdetective"
3
- version = "1.1.0"
3
+ version = "1.3.0"
4
4
  description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
5
5
  authors = ["Jiri Podivin <jpodivin@gmail.com>"]
6
6
  license = "Apache-2.0"
@@ -51,11 +51,12 @@ psycopg2 = {version = "^2.9.9", optional = true }
51
51
  alembic = {version = "^1.13.3", optional = true }
52
52
  matplotlib = {version = "^3.8.4", optional = true }
53
53
  backoff = {version = "2.2.1", optional = true }
54
- sentry-sdk = {version = "^2.17.0", extras = ["fastapi"]}
54
+ sentry-sdk = {version = "^2.17.0", optional = true, extras = ["fastapi"]}
55
+ openai = {version = "^1.82.1", optional = true}
55
56
 
56
57
  [tool.poetry.extras]
57
- server = ["fastapi", "sqlalchemy", "psycopg2", "alembic", "matplotlib", "backoff", "aiolimiter"]
58
- server-testing = ["fastapi", "sqlalchemy", "psycopg2-binary", "alembic", "matplotlib", "backoff", "pytest-asyncio"]
58
+ server = ["fastapi", "sqlalchemy", "psycopg2", "alembic", "matplotlib", "backoff", "aiolimiter", "sentry-sdk", "openai"]
59
+ server-testing = ["fastapi", "sqlalchemy", "psycopg2-binary", "alembic", "matplotlib", "backoff", "pytest-asyncio", "sentry-sdk", "openai"]
59
60
 
60
61
  [build-system]
61
62
  requires = ["poetry-core"]
File without changes