logdetective 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,7 @@ def load_server_config(path: str | None) -> Config:
15
15
  if path is not None:
16
16
  try:
17
17
  with open(path, "r") as config_file:
18
- return Config(yaml.safe_load(config_file))
18
+ return Config.model_validate(yaml.safe_load(config_file))
19
19
  except FileNotFoundError:
20
20
  # This is not an error, we will fall back to default
21
21
  print("Unable to find server config file, using default then.")
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
 
3
- from typing import List, Callable
3
+ from typing import List
4
4
  from collections import Counter
5
5
 
6
6
  import gitlab
@@ -49,25 +49,6 @@ async def collect_emojis_for_mr(
49
49
  await collect_emojis_in_comments(comments, gitlab_conn)
50
50
 
51
51
 
52
- async def _handle_gitlab_operation(func: Callable, *args):
53
- """
54
- It handles errors for the specified GitLab operation.
55
- After executing it in a separate thread.
56
- """
57
- try:
58
- return await asyncio.to_thread(func, *args)
59
- except (gitlab.GitlabError, gitlab.GitlabGetError) as e:
60
- log_msg = f"Error during GitLab operation {func}{args}: {e}"
61
- if "Not Found" in str(e):
62
- LOG.error(log_msg)
63
- else:
64
- LOG.exception(log_msg)
65
- except Exception as e: # pylint: disable=broad-exception-caught
66
- LOG.exception(
67
- "Unexpected error during GitLab operation %s(%s): %s", func, args, e
68
- )
69
-
70
-
71
52
  async def collect_emojis_in_comments( # pylint: disable=too-many-locals
72
53
  comments: List[Comments], gitlab_conn: gitlab.Gitlab
73
54
  ):
@@ -80,37 +61,54 @@ async def collect_emojis_in_comments( # pylint: disable=too-many-locals
80
61
  mr_job_db = await GitlabMergeRequestJobs.get_by_id(comment.merge_request_job_id)
81
62
  if not mr_job_db:
82
63
  continue
83
- if mr_job_db.id not in projects:
84
- project = await _handle_gitlab_operation(
85
- gitlab_conn.projects.get, mr_job_db.project_id
86
- )
87
- if not project:
88
- continue
89
- projects[mr_job_db.id] = project
90
- else:
91
- project = projects[mr_job_db.id]
92
- merge_request_iid = mr_job_db.mr_iid
93
- if merge_request_iid not in merge_requests:
94
- merge_request = await _handle_gitlab_operation(
95
- project.mergerequests.get, merge_request_iid
64
+ try:
65
+ if mr_job_db.id not in projects:
66
+ project = await asyncio.to_thread(
67
+ gitlab_conn.projects.get, mr_job_db.project_id
68
+ )
69
+
70
+ projects[mr_job_db.id] = project
71
+ else:
72
+ project = projects[mr_job_db.id]
73
+ merge_request_iid = mr_job_db.mr_iid
74
+ if merge_request_iid not in merge_requests:
75
+ merge_request = await asyncio.to_thread(
76
+ project.mergerequests.get, merge_request_iid
77
+ )
78
+
79
+ merge_requests[merge_request_iid] = merge_request
80
+ else:
81
+ merge_request = merge_requests[merge_request_iid]
82
+
83
+ discussion = await asyncio.to_thread(
84
+ merge_request.discussions.get, comment.comment_id
96
85
  )
97
- if not merge_request:
98
- continue
99
- merge_requests[merge_request_iid] = merge_request
100
- else:
101
- merge_request = merge_requests[merge_request_iid]
102
86
 
103
- discussion = await _handle_gitlab_operation(
104
- merge_request.discussions.get, comment.comment_id
105
- )
106
- if not discussion:
107
- continue
87
+ # Get the ID of the first note
88
+ if "notes" not in discussion.attributes or len(discussion.attributes["notes"]) == 0:
89
+ LOG.warning(
90
+ "No notes were found in comment %s in merge request %d",
91
+ comment.comment_id,
92
+ merge_request_iid,
93
+ )
94
+ continue
108
95
 
109
- # Get the ID of the first note
110
- note_id = discussion.attributes["notes"][0]["id"]
111
- note = await _handle_gitlab_operation(merge_request.notes.get, note_id)
112
- if not note:
113
- continue
96
+ note_id = discussion.attributes["notes"][0]["id"]
97
+ note = await asyncio.to_thread(merge_request.notes.get, note_id)
98
+
99
+ # Log warning with full stack trace, in case we can't find the right
100
+ # discussion, merge request or project.
101
+ # All of these objects can be lost, and we shouldn't treat as an error.
102
+ # Other exceptions are raised.
103
+ except gitlab.GitlabError as e:
104
+ if e.response_code == 404:
105
+ LOG.warning(
106
+ "Couldn't retrieve emoji counts for comment %s due to GitlabError",
107
+ comment.comment_id, exc_info=True)
108
+ continue
109
+ LOG.error("Error encountered while processing emoji counts for GitLab comment %s",
110
+ comment.comment_id, exc_info=True)
111
+ raise
114
112
 
115
113
  emoji_counts = Counter(emoji.name for emoji in note.awardemojis.list())
116
114
 
@@ -4,6 +4,7 @@ import zipfile
4
4
  from pathlib import Path, PurePath
5
5
  from tempfile import TemporaryFile
6
6
 
7
+ from aiolimiter import AsyncLimiter
7
8
  from fastapi import HTTPException
8
9
 
9
10
  import gitlab
@@ -13,6 +14,7 @@ import jinja2
13
14
  import aiohttp
14
15
  import backoff
15
16
 
17
+ from logdetective.extractors import Extractor
16
18
  from logdetective.server.config import SERVER_CONFIG, LOG
17
19
  from logdetective.server.exceptions import (
18
20
  LogsTooLargeError,
@@ -41,15 +43,20 @@ FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
41
43
 
42
44
  async def process_gitlab_job_event(
43
45
  gitlab_cfg: GitLabInstanceConfig,
46
+ gitlab_connection: gitlab.Gitlab,
47
+ http_session: aiohttp.ClientSession,
44
48
  forge: Forge,
45
49
  job_hook: JobHook,
46
- ): # pylint: disable=too-many-locals
50
+ async_request_limiter: AsyncLimiter,
51
+ extractors: list[Extractor],
52
+ ): # pylint: disable=too-many-locals disable=too-many-arguments disable=too-many-positional-arguments
47
53
  """Handle a received job_event webhook from GitLab"""
48
54
  LOG.debug("Received webhook message from %s:\n%s", forge.value, job_hook)
49
55
 
50
56
  # Look up the project this job belongs to
51
- gitlab_conn = gitlab_cfg.get_connection()
52
- project = await asyncio.to_thread(gitlab_conn.projects.get, job_hook.project_id)
57
+ project = await asyncio.to_thread(
58
+ gitlab_connection.projects.get, job_hook.project_id
59
+ )
53
60
  LOG.info("Processing failed job for %s", project.name)
54
61
 
55
62
  # Retrieve data about the job from the GitLab API
@@ -94,7 +101,7 @@ async def process_gitlab_job_event(
94
101
  # Retrieve the build logs from the merge request artifacts and preprocess them
95
102
  try:
96
103
  log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(
97
- gitlab_cfg, job
104
+ gitlab_cfg, job, http_session
98
105
  )
99
106
  except (LogsTooLargeError, LogDetectiveConnectionError) as ex:
100
107
  LOG.error("Could not retrieve logs due to %s", ex)
@@ -105,10 +112,14 @@ async def process_gitlab_job_event(
105
112
  metrics_id = await add_new_metrics(
106
113
  api_name=EndpointType.ANALYZE_GITLAB_JOB,
107
114
  url=log_url,
108
- http_session=gitlab_cfg.get_http_session(),
115
+ http_session=http_session,
109
116
  compressed_log_content=RemoteLogCompressor.zip_text(log_text),
110
117
  )
111
- staged_response = await perform_staged_analysis(log_text=log_text)
118
+ staged_response = await perform_staged_analysis(
119
+ log_text=log_text,
120
+ async_request_limiter=async_request_limiter,
121
+ extractors=extractors,
122
+ )
112
123
  await update_metrics(metrics_id, staged_response)
113
124
  preprocessed_log.close()
114
125
 
@@ -162,6 +173,7 @@ def is_eligible_package(project_name: str):
162
173
  async def retrieve_and_preprocess_koji_logs(
163
174
  gitlab_cfg: GitLabInstanceConfig,
164
175
  job: gitlab.v4.objects.ProjectJob,
176
+ http_session: aiohttp.ClientSession,
165
177
  ): # pylint: disable=too-many-branches,too-many-locals
166
178
  """Download logs from the merge request artifacts
167
179
 
@@ -173,7 +185,7 @@ async def retrieve_and_preprocess_koji_logs(
173
185
  Detective. The calling function is responsible for closing this object."""
174
186
 
175
187
  # Make sure the file isn't too large to process.
176
- if not await check_artifacts_file_size(gitlab_cfg, job):
188
+ if not await check_artifacts_file_size(gitlab_cfg, job, http_session):
177
189
  raise LogsTooLargeError(
178
190
  f"Oversized logs for job {job.id} in project {job.project_id}"
179
191
  )
@@ -274,6 +286,7 @@ async def retrieve_and_preprocess_koji_logs(
274
286
  async def check_artifacts_file_size(
275
287
  gitlab_cfg: GitLabInstanceConfig,
276
288
  job: gitlab.v4.objects.ProjectJob,
289
+ http_session: aiohttp.ClientSession,
277
290
  ):
278
291
  """Method to determine if the artifacts are too large to process"""
279
292
  # First, make sure that the artifacts are of a reasonable size. The
@@ -285,7 +298,7 @@ async def check_artifacts_file_size(
285
298
  )
286
299
  LOG.debug("checking artifact URL %s%s", gitlab_cfg.url, artifacts_path)
287
300
  try:
288
- head_response = await gitlab_cfg.get_http_session().head(
301
+ head_response = await http_session.head(
289
302
  artifacts_path,
290
303
  allow_redirects=True,
291
304
  raise_for_status=True,
@@ -9,9 +9,11 @@ from fastapi import HTTPException
9
9
  from pydantic import ValidationError
10
10
 
11
11
  import aiohttp
12
+ from aiolimiter import AsyncLimiter
12
13
  from openai import AsyncStream
13
14
  from openai.types.chat import ChatCompletionChunk
14
15
 
16
+ from logdetective.extractors import Extractor
15
17
  from logdetective.utils import (
16
18
  compute_certainty,
17
19
  prompt_to_messages,
@@ -41,7 +43,6 @@ from logdetective.server.utils import (
41
43
  construct_final_prompt,
42
44
  )
43
45
 
44
-
45
46
  LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
46
47
 
47
48
 
@@ -57,6 +58,7 @@ LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
57
58
  async def call_llm(
58
59
  messages: List[Dict[str, str]],
59
60
  inference_cfg: InferenceConfig,
61
+ async_request_limiter: AsyncLimiter,
60
62
  stream: bool = False,
61
63
  structured_output: dict | None = None,
62
64
  ) -> Explanation:
@@ -87,7 +89,7 @@ async def call_llm(
87
89
  }
88
90
  kwargs["response_format"] = response_format
89
91
 
90
- async with inference_cfg.get_limiter():
92
+ async with async_request_limiter:
91
93
  response = await CLIENT.chat.completions.create(
92
94
  messages=messages,
93
95
  max_tokens=inference_cfg.max_tokens,
@@ -126,6 +128,7 @@ async def call_llm(
126
128
  async def call_llm_stream(
127
129
  messages: List[Dict[str, str]],
128
130
  inference_cfg: InferenceConfig,
131
+ async_request_limiter: AsyncLimiter,
129
132
  stream: bool = False,
130
133
  ) -> AsyncStream[ChatCompletionChunk]:
131
134
  """Submit prompt to LLM and recieve stream of tokens as a result.
@@ -136,7 +139,7 @@ async def call_llm_stream(
136
139
 
137
140
  LOG.info("Submitting to /v1/chat/completions endpoint")
138
141
 
139
- async with inference_cfg.get_limiter():
142
+ async with async_request_limiter:
140
143
  response = await CLIENT.chat.completions.create(
141
144
  messages=messages,
142
145
  max_tokens=inference_cfg.max_tokens,
@@ -150,7 +153,9 @@ async def call_llm_stream(
150
153
 
151
154
 
152
155
  async def analyze_snippets(
153
- log_summary: List[Tuple[int, str]], structured_output: dict | None = None
156
+ log_summary: List[Tuple[int, str]],
157
+ async_request_limiter: AsyncLimiter,
158
+ structured_output: dict | None = None,
154
159
  ) -> List[SnippetAnalysis | RatedSnippetAnalysis]:
155
160
  """Submit log file snippets to the LLM and gather results"""
156
161
  # Process snippets asynchronously
@@ -162,6 +167,7 @@ async def analyze_snippets(
162
167
  SERVER_CONFIG.inference.system_role,
163
168
  SERVER_CONFIG.inference.user_role,
164
169
  ),
170
+ async_request_limiter=async_request_limiter,
165
171
  inference_cfg=SERVER_CONFIG.snippet_inference,
166
172
  structured_output=structured_output,
167
173
  )
@@ -184,9 +190,13 @@ async def analyze_snippets(
184
190
  return analyzed_snippets
185
191
 
186
192
 
187
- async def perfrom_analysis(log_text: str) -> Response:
193
+ async def perform_analysis(
194
+ log_text: str,
195
+ async_request_limiter: AsyncLimiter,
196
+ extractors: List[Extractor],
197
+ ) -> Response:
188
198
  """Sumbit log file snippets in aggregate to LLM and retrieve results"""
189
- log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
199
+ log_summary = mine_logs(log_text, extractors)
190
200
  log_summary = format_snippets(log_summary)
191
201
 
192
202
  final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
@@ -199,6 +209,7 @@ async def perfrom_analysis(log_text: str) -> Response:
199
209
  )
200
210
  response = await call_llm(
201
211
  messages,
212
+ async_request_limiter=async_request_limiter,
202
213
  inference_cfg=SERVER_CONFIG.inference,
203
214
  )
204
215
  certainty = 0
@@ -216,9 +227,13 @@ async def perfrom_analysis(log_text: str) -> Response:
216
227
  return Response(explanation=response, response_certainty=certainty)
217
228
 
218
229
 
219
- async def perform_analyis_stream(log_text: str) -> AsyncStream:
230
+ async def perform_analysis_stream(
231
+ log_text: str,
232
+ async_request_limiter: AsyncLimiter,
233
+ extractors: List[Extractor],
234
+ ) -> AsyncStream:
220
235
  """Submit log file snippets in aggregate and return a stream of tokens"""
221
- log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
236
+ log_summary = mine_logs(log_text, extractors)
222
237
  log_summary = format_snippets(log_summary)
223
238
 
224
239
  final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
@@ -232,6 +247,7 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
232
247
 
233
248
  stream = call_llm_stream(
234
249
  messages,
250
+ async_request_limiter=async_request_limiter,
235
251
  inference_cfg=SERVER_CONFIG.inference,
236
252
  )
237
253
 
@@ -241,13 +257,18 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
241
257
  return stream
242
258
 
243
259
 
244
- async def perform_staged_analysis(log_text: str) -> StagedResponse:
260
+ async def perform_staged_analysis(
261
+ log_text: str,
262
+ async_request_limiter: AsyncLimiter,
263
+ extractors: List[Extractor],
264
+ ) -> StagedResponse:
245
265
  """Submit the log file snippets to the LLM and retrieve their results"""
246
- log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
266
+ log_summary = mine_logs(log_text, extractors)
247
267
  start = time.time()
248
268
  if SERVER_CONFIG.general.top_k_snippets:
249
269
  rated_snippets = await analyze_snippets(
250
270
  log_summary=log_summary,
271
+ async_request_limiter=async_request_limiter,
251
272
  structured_output=RatedSnippetAnalysis.model_json_schema(),
252
273
  )
253
274
 
@@ -266,7 +287,9 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
266
287
  len(rated_snippets),
267
288
  )
268
289
  else:
269
- processed_snippets = await analyze_snippets(log_summary=log_summary)
290
+ processed_snippets = await analyze_snippets(
291
+ log_summary=log_summary, async_request_limiter=async_request_limiter
292
+ )
270
293
 
271
294
  # Extract original text and line number from `log_summary`
272
295
  processed_snippets = [
@@ -276,7 +299,9 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
276
299
  delta = time.time() - start
277
300
  LOG.info("Snippet analysis performed in %f s", delta)
278
301
  log_summary = format_analyzed_snippets(processed_snippets)
279
- final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template_staged)
302
+ final_prompt = construct_final_prompt(
303
+ log_summary, PROMPT_CONFIG.prompt_template_staged
304
+ )
280
305
 
281
306
  messages = prompt_to_messages(
282
307
  final_prompt,
@@ -286,6 +311,7 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
286
311
  )
287
312
  final_analysis = await call_llm(
288
313
  messages,
314
+ async_request_limiter=async_request_limiter,
289
315
  inference_cfg=SERVER_CONFIG.inference,
290
316
  )
291
317