logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. logdetective/constants.py +33 -12
  2. logdetective/extractors.py +137 -68
  3. logdetective/logdetective.py +102 -33
  4. logdetective/models.py +99 -0
  5. logdetective/prompts-summary-first.yml +20 -0
  6. logdetective/prompts-summary-only.yml +13 -0
  7. logdetective/prompts.yml +90 -0
  8. logdetective/remote_log.py +67 -0
  9. logdetective/server/compressors.py +186 -0
  10. logdetective/server/config.py +78 -0
  11. logdetective/server/database/base.py +34 -26
  12. logdetective/server/database/models/__init__.py +33 -0
  13. logdetective/server/database/models/exceptions.py +17 -0
  14. logdetective/server/database/models/koji.py +143 -0
  15. logdetective/server/database/models/merge_request_jobs.py +623 -0
  16. logdetective/server/database/models/metrics.py +427 -0
  17. logdetective/server/emoji.py +148 -0
  18. logdetective/server/exceptions.py +37 -0
  19. logdetective/server/gitlab.py +451 -0
  20. logdetective/server/koji.py +159 -0
  21. logdetective/server/llm.py +309 -0
  22. logdetective/server/metric.py +75 -30
  23. logdetective/server/models.py +426 -23
  24. logdetective/server/plot.py +432 -0
  25. logdetective/server/server.py +580 -468
  26. logdetective/server/templates/base_response.html.j2 +59 -0
  27. logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
  28. logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
  29. logdetective/server/utils.py +98 -32
  30. logdetective/skip_snippets.yml +12 -0
  31. logdetective/utils.py +187 -73
  32. logdetective-2.11.0.dist-info/METADATA +568 -0
  33. logdetective-2.11.0.dist-info/RECORD +40 -0
  34. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
  35. logdetective/server/database/models.py +0 -88
  36. logdetective-0.4.0.dist-info/METADATA +0 -333
  37. logdetective-0.4.0.dist-info/RECORD +0 -19
  38. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
  39. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,451 @@
1
+ import re
2
+ import asyncio
3
+ import zipfile
4
+ from pathlib import Path, PurePath
5
+ from tempfile import TemporaryFile
6
+
7
+ from fastapi import HTTPException
8
+
9
+ import gitlab
10
+ import gitlab.v4
11
+ import gitlab.v4.objects
12
+ import jinja2
13
+ import aiohttp
14
+ import backoff
15
+
16
+ from logdetective.server.config import SERVER_CONFIG, LOG
17
+ from logdetective.server.exceptions import (
18
+ LogsTooLargeError,
19
+ LogDetectiveConnectionError,
20
+ )
21
+ from logdetective.server.llm import perform_staged_analysis
22
+ from logdetective.server.metric import add_new_metrics, update_metrics
23
+ from logdetective.server.models import (
24
+ GitLabInstanceConfig,
25
+ JobHook,
26
+ StagedResponse,
27
+ )
28
+ from logdetective.server.database.models import (
29
+ AnalyzeRequestMetrics,
30
+ Comments,
31
+ EndpointType,
32
+ Forge,
33
+ GitlabMergeRequestJobs,
34
+ )
35
+ from logdetective.server.compressors import RemoteLogCompressor
36
+ from logdetective.server.utils import connection_error_giveup
37
+
38
+ MR_REGEX = re.compile(r"refs/merge-requests/(\d+)/.*$")
39
+ FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
40
+
41
+
42
+ async def process_gitlab_job_event(
43
+ gitlab_cfg: GitLabInstanceConfig,
44
+ forge: Forge,
45
+ job_hook: JobHook,
46
+ ): # pylint: disable=too-many-locals
47
+ """Handle a received job_event webhook from GitLab"""
48
+ LOG.debug("Received webhook message from %s:\n%s", forge.value, job_hook)
49
+
50
+ # Look up the project this job belongs to
51
+ gitlab_conn = gitlab_cfg.get_connection()
52
+ project = await asyncio.to_thread(gitlab_conn.projects.get, job_hook.project_id)
53
+ LOG.info("Processing failed job for %s", project.name)
54
+
55
+ # Retrieve data about the job from the GitLab API
56
+ job = await asyncio.to_thread(project.jobs.get, job_hook.build_id)
57
+
58
+ # For easy retrieval later, we'll add project_name and project_url to the
59
+ # job object
60
+ job.project_name = project.name
61
+ job.project_url = project.web_url
62
+
63
+ # Retrieve the pipeline that started this job
64
+ pipeline = await asyncio.to_thread(project.pipelines.get, job_hook.pipeline_id)
65
+
66
+ # Verify this is a merge request
67
+ if pipeline.source != "merge_request_event":
68
+ LOG.info("Not a merge request pipeline. Ignoring.")
69
+ return
70
+
71
+ # Extract the merge-request IID from the job
72
+ match = MR_REGEX.search(pipeline.ref)
73
+ if not match:
74
+ LOG.error(
75
+ "Pipeline source is merge_request_event but no merge request IID was provided."
76
+ )
77
+ return
78
+ merge_request_iid = int(match.group(1))
79
+
80
+ # Check if this is a resubmission of an existing, completed job.
81
+ # If it is, we'll exit out here and not waste time retrieving the logs,
82
+ # running a new analysis or trying to submit a new comment.
83
+ mr_job_db = await GitlabMergeRequestJobs.get_by_details(
84
+ forge=forge,
85
+ project_id=project.id,
86
+ mr_iid=merge_request_iid,
87
+ job_id=job_hook.build_id,
88
+ )
89
+ if mr_job_db:
90
+ LOG.info("Resubmission of an existing build. Skipping.")
91
+ return
92
+
93
+ LOG.debug("Retrieving log artifacts")
94
+ # Retrieve the build logs from the merge request artifacts and preprocess them
95
+ try:
96
+ log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(
97
+ gitlab_cfg, job
98
+ )
99
+ except (LogsTooLargeError, LogDetectiveConnectionError) as ex:
100
+ LOG.error("Could not retrieve logs due to %s", ex)
101
+ raise
102
+
103
+ # Submit log to Log Detective and await the results.
104
+ log_text = preprocessed_log.read().decode(encoding="utf-8")
105
+ metrics_id = await add_new_metrics(
106
+ api_name=EndpointType.ANALYZE_GITLAB_JOB,
107
+ url=log_url,
108
+ http_session=gitlab_cfg.get_http_session(),
109
+ compressed_log_content=RemoteLogCompressor.zip_text(log_text),
110
+ )
111
+ staged_response = await perform_staged_analysis(log_text=log_text)
112
+ await update_metrics(metrics_id, staged_response)
113
+ preprocessed_log.close()
114
+
115
+ # check if this project is on the opt-in list for posting comments.
116
+ if not is_eligible_package(project.name):
117
+ LOG.info("Not publishing comment for unrecognized package %s", project.name)
118
+ return
119
+
120
+ # Add the Log Detective response as a comment to the merge request
121
+ await comment_on_mr(
122
+ forge,
123
+ project,
124
+ merge_request_iid,
125
+ job,
126
+ log_url,
127
+ staged_response,
128
+ metrics_id,
129
+ )
130
+
131
+ return staged_response
132
+
133
+
134
+ def is_eligible_package(project_name: str):
135
+ """Check whether the provided package name is eligible for posting
136
+ comments to the merge request"""
137
+
138
+ # First check the allow-list. If it's not allowed, we deny.
139
+ allowed = False
140
+ for pattern in SERVER_CONFIG.general.packages:
141
+ LOG.debug("include %s", pattern)
142
+ if re.search(pattern, project_name):
143
+ allowed = True
144
+ break
145
+ if not allowed:
146
+ # The project did not match any of the permitted regular expressions
147
+ return False
148
+
149
+ # Next, check the deny-list. If it was allowed before, but denied here, we deny.
150
+ for pattern in SERVER_CONFIG.general.excluded_packages:
151
+ LOG.debug("exclude %s", pattern)
152
+ if re.search(pattern, project_name):
153
+ return False
154
+
155
+ # It was allowed and not denied, so return True to indicate it is eligible
156
+ return True
157
+
158
+
159
+ @backoff.on_exception(
160
+ backoff.expo, ConnectionResetError, max_time=60, on_giveup=connection_error_giveup
161
+ )
162
+ async def retrieve_and_preprocess_koji_logs(
163
+ gitlab_cfg: GitLabInstanceConfig,
164
+ job: gitlab.v4.objects.ProjectJob,
165
+ ): # pylint: disable=too-many-branches,too-many-locals
166
+ """Download logs from the merge request artifacts
167
+
168
+ This function will retrieve the build logs and do some minimal
169
+ preprocessing to determine which log is relevant for analysis.
170
+
171
+ returns: The URL pointing to the selected log file and an open, file-like
172
+ object containing the log contents to be sent for processing by Log
173
+ Detective. The calling function is responsible for closing this object."""
174
+
175
+ # Make sure the file isn't too large to process.
176
+ if not await check_artifacts_file_size(gitlab_cfg, job):
177
+ raise LogsTooLargeError(
178
+ f"Oversized logs for job {job.id} in project {job.project_id}"
179
+ )
180
+
181
+ # Create a temporary file to store the downloaded log zipfile.
182
+ # This will be automatically deleted when the last reference into it
183
+ # (returned by this function) is closed.
184
+ tempfile = TemporaryFile(mode="w+b")
185
+ await asyncio.to_thread(job.artifacts, streamed=True, action=tempfile.write)
186
+ tempfile.seek(0)
187
+
188
+ failed_arches = {}
189
+ artifacts_zip = zipfile.ZipFile(tempfile, mode="r") # pylint: disable=consider-using-with
190
+ for zipinfo in artifacts_zip.infolist():
191
+ if zipinfo.filename.endswith("task_failed.log"):
192
+ # The koji logs store this file in two places: 1) in the
193
+ # directory with the failed architecture and 2) in the parent
194
+ # directory. Most of the time, we want to ignore the one in the
195
+ # parent directory, since the rest of the information is in the
196
+ # specific task directory. However, there are some situations
197
+ # where non-build failures (such as "Target build already exists")
198
+ # may be presented only at the top level.
199
+ # The paths look like `kojilogs/noarch-XXXXXX/task_failed.log`
200
+ # or `kojilogs/noarch-XXXXXX/x86_64-XXXXXX/task_failed.log`
201
+ # We prefix "toplevel" with '~' so that later when we sort the
202
+ # keys to see if there are any unrecognized arches, it will always
203
+ # sort last.
204
+ path = PurePath(zipinfo.filename)
205
+ if len(path.parts) <= 3:
206
+ failed_arches["~toplevel"] = path
207
+ continue
208
+
209
+ # Extract the architecture from the immediate parent path
210
+ architecture = path.parent.parts[-1].split("-")[0]
211
+
212
+ # Open this file and read which log failed.
213
+ # The string in this log has the format
214
+ # `see <log> for more information`.
215
+ # Note: it may sometimes say
216
+ # `see build.log or root.log for more information`, but in
217
+ # that situation, we only want to handle build.log (for now),
218
+ # which means accepting only the first match for the regular
219
+ # expression.
220
+ with artifacts_zip.open(zipinfo.filename) as task_failed_log:
221
+ contents = task_failed_log.read().decode("utf-8")
222
+ match = FAILURE_LOG_REGEX.search(contents)
223
+ if match:
224
+ failure_log_name = match.group(1)
225
+ failed_arches[architecture] = PurePath(
226
+ path.parent, failure_log_name
227
+ )
228
+ else:
229
+ LOG.info(
230
+ "task_failed.log does not indicate which log contains the failure."
231
+ )
232
+ # The best thing we can do at this point is return the
233
+ # task_failed.log, since it will probably contain the most
234
+ # relevant information
235
+ failed_arches[architecture] = path
236
+
237
+ if not failed_arches:
238
+ # No failed task found in the sub-tasks.
239
+ raise FileNotFoundError("Could not detect failed architecture.")
240
+
241
+ # We only want to handle one arch, so we'll check them in order of
242
+ # "most to least likely for the maintainer to have access to hardware"
243
+ # This means: x86_64 > aarch64 > riscv > ppc64le > s390x
244
+ if "x86_64" in failed_arches:
245
+ failed_arch = "x86_64"
246
+ elif "aarch64" in failed_arches:
247
+ failed_arch = "aarch64"
248
+ elif "riscv" in failed_arches:
249
+ failed_arch = "riscv"
250
+ elif "ppc64le" in failed_arches:
251
+ failed_arch = "ppc64le"
252
+ elif "s390x" in failed_arches:
253
+ failed_arch = "s390x"
254
+ elif "noarch" in failed_arches:
255
+ # May have failed during BuildSRPMFromSCM phase
256
+ failed_arch = "noarch"
257
+ else:
258
+ # We have one or more architectures that we don't know about? Just
259
+ # pick the first alphabetically. If the issue was a Koji error
260
+ # rather than a build failure, this will fall back to ~toplevel as
261
+ # the lowest-sorting possibility.
262
+ failed_arch = sorted(list(failed_arches.keys()))[0]
263
+
264
+ LOG.debug("Failed architecture: %s", failed_arch)
265
+
266
+ log_path = failed_arches[failed_arch].as_posix()
267
+ log_url = f"{gitlab_cfg.url}/{gitlab_cfg.api_path}/projects/{job.project_id}/jobs/{job.id}/artifacts/{log_path}" # pylint: disable=line-too-long
268
+ LOG.debug("Returning contents of %s%s", gitlab_cfg.url, log_url)
269
+
270
+ # Return the log as a file-like object with .read() function
271
+ return log_url, artifacts_zip.open(log_path)
272
+
273
+
274
+ async def check_artifacts_file_size(
275
+ gitlab_cfg: GitLabInstanceConfig,
276
+ job: gitlab.v4.objects.ProjectJob,
277
+ ):
278
+ """Method to determine if the artifacts are too large to process"""
279
+ # First, make sure that the artifacts are of a reasonable size. The
280
+ # zipped artifact collection will be stored in memory below. The
281
+ # python-gitlab library doesn't expose a way to check this value directly,
282
+ # so we need to interact with directly with the headers.
283
+ artifacts_path = (
284
+ f"{gitlab_cfg.api_path}/projects/{job.project_id}/jobs/{job.id}/artifacts"
285
+ )
286
+ LOG.debug("checking artifact URL %s%s", gitlab_cfg.url, artifacts_path)
287
+ try:
288
+ head_response = await gitlab_cfg.get_http_session().head(
289
+ artifacts_path,
290
+ allow_redirects=True,
291
+ raise_for_status=True,
292
+ )
293
+ except aiohttp.ClientResponseError as ex:
294
+ raise HTTPException(
295
+ status_code=400,
296
+ detail=f"Unable to check artifact URL: [{ex.status}] {ex.message}",
297
+ ) from ex
298
+ content_length = int(head_response.headers.get("content-length"))
299
+ LOG.debug(
300
+ "URL: %s, content-length: %d, max length: %d",
301
+ artifacts_path,
302
+ content_length,
303
+ gitlab_cfg.max_artifact_size,
304
+ )
305
+ return content_length <= gitlab_cfg.max_artifact_size
306
+
307
+
308
+ async def comment_on_mr( # pylint: disable=too-many-arguments disable=too-many-positional-arguments
309
+ forge: Forge,
310
+ project: gitlab.v4.objects.Project,
311
+ merge_request_iid: int,
312
+ job: gitlab.v4.objects.ProjectJob,
313
+ log_url: str,
314
+ response: StagedResponse,
315
+ metrics_id: int,
316
+ ):
317
+ """Add the Log Detective response as a comment to the merge request"""
318
+ LOG.debug(
319
+ "Primary Explanation for %s MR %d: %s",
320
+ project.name,
321
+ merge_request_iid,
322
+ response.explanation.text,
323
+ )
324
+
325
+ # First, we'll see if there's an existing comment on this Merge Request
326
+ # and wrap it in <details></details> to reduce noise.
327
+ await suppress_latest_comment(forge, project, merge_request_iid)
328
+
329
+ # Get the formatted short comment.
330
+ short_comment = await generate_mr_comment(job, log_url, response, full=False)
331
+
332
+ # Look up the merge request
333
+ merge_request = await asyncio.to_thread(
334
+ project.mergerequests.get, merge_request_iid
335
+ )
336
+
337
+ # Submit a new comment to the Merge Request using the Gitlab API
338
+ discussion = await asyncio.to_thread(
339
+ merge_request.discussions.create, {"body": short_comment}
340
+ )
341
+
342
+ # Get the ID of the first note
343
+ note_id = discussion.attributes["notes"][0]["id"]
344
+ note = discussion.notes.get(note_id)
345
+
346
+ # Update the comment with the full details
347
+ # We do this in a second step so we don't bombard the user's email
348
+ # notifications with a massive message. Gitlab doesn't send email for
349
+ # comment edits.
350
+ full_comment = await generate_mr_comment(job, log_url, response, full=True)
351
+ note.body = full_comment
352
+
353
+ # Pause for five seconds before sending the snippet data, otherwise
354
+ # Gitlab may bundle the edited message together with the creation
355
+ # message in email.
356
+ await asyncio.sleep(5)
357
+ await asyncio.to_thread(note.save)
358
+
359
+ # Save the new comment to the database
360
+ metrics = await AnalyzeRequestMetrics.get_metric_by_id(metrics_id)
361
+ await Comments.create(
362
+ forge,
363
+ project.id,
364
+ merge_request_iid,
365
+ job.id,
366
+ str(discussion.id),
367
+ metrics,
368
+ )
369
+
370
+
371
+ async def suppress_latest_comment(
372
+ gitlab_instance: str,
373
+ project: gitlab.v4.objects.Project,
374
+ merge_request_iid: int,
375
+ ) -> None:
376
+ """Look up the latest comment on this Merge Request, if any, and wrap it
377
+ in a <details></details> block with a comment indicating that it has been
378
+ superseded by a new push."""
379
+
380
+ # Ask the database for the last known comment for this MR
381
+ previous_comment = await Comments.get_latest_comment(
382
+ gitlab_instance, project.id, merge_request_iid
383
+ )
384
+
385
+ if previous_comment is None:
386
+ # No existing comment, so nothing to do.
387
+ return
388
+
389
+ # Retrieve its content from the Gitlab API
390
+
391
+ # Look up the merge request
392
+ merge_request = await asyncio.to_thread(
393
+ project.mergerequests.get, merge_request_iid
394
+ )
395
+
396
+ # Find the discussion matching the latest comment ID
397
+ discussion = await asyncio.to_thread(
398
+ merge_request.discussions.get, previous_comment.comment_id
399
+ )
400
+
401
+ # Get the ID of the first note
402
+ note_id = discussion.attributes["notes"][0]["id"]
403
+ note = discussion.notes.get(note_id)
404
+
405
+ # Wrap the note in <details>, indicating why.
406
+ note.body = (
407
+ "This comment has been superseded by a newer "
408
+ f"Log Detective analysis.\n<details>\n{note.body}\n</details>"
409
+ )
410
+ await asyncio.to_thread(note.save)
411
+
412
+
413
+ async def generate_mr_comment(
414
+ job: gitlab.v4.objects.ProjectJob,
415
+ log_url: str,
416
+ response: StagedResponse,
417
+ full: bool = True,
418
+ ) -> str:
419
+ """Use a template to generate a comment string to submit to Gitlab"""
420
+
421
+ # Locate and load the comment template
422
+ script_path = Path(__file__).resolve().parent
423
+ template_path = Path(script_path, "templates")
424
+ jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_path))
425
+
426
+ if full:
427
+ tpl = jinja_env.get_template("gitlab_full_comment.md.j2")
428
+ else:
429
+ tpl = jinja_env.get_template("gitlab_short_comment.md.j2")
430
+
431
+ artifacts_url = f"{job.project_url}/-/jobs/{job.id}/artifacts/download"
432
+
433
+ if response.response_certainty >= 90:
434
+ emoji_face = ":slight_smile:"
435
+ elif response.response_certainty >= 70:
436
+ emoji_face = ":neutral_face:"
437
+ else:
438
+ emoji_face = ":frowning2:"
439
+
440
+ # Generate the comment from the template
441
+ content = tpl.render(
442
+ package=job.project_name,
443
+ explanation=response.explanation.text,
444
+ certainty=response.response_certainty,
445
+ emoji_face=emoji_face,
446
+ snippets=response.snippets,
447
+ log_url=log_url,
448
+ artifacts_url=artifacts_url,
449
+ )
450
+
451
+ return content
@@ -0,0 +1,159 @@
1
+ import asyncio
2
+ import re
3
+ from typing import Any, Callable, Optional
4
+
5
+ import backoff
6
+ import koji
7
+ from logdetective.server.exceptions import (
8
+ KojiInvalidTaskID,
9
+ LogDetectiveConnectionError,
10
+ LogsMissingError,
11
+ LogsTooLargeError,
12
+ UnknownTaskType,
13
+ )
14
+ from logdetective.server.utils import connection_error_giveup
15
+
16
+ FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
17
+
18
+
19
+ @backoff.on_exception(
20
+ backoff.expo,
21
+ koji.GenericError,
22
+ max_time=60,
23
+ on_giveup=connection_error_giveup,
24
+ )
25
+ async def call_koji(func: Callable, *args, **kwargs) -> Any:
26
+ """
27
+ Call a Koji function asynchronously.
28
+
29
+ Automatically retries on connection errors.
30
+ """
31
+ try:
32
+ result = await asyncio.to_thread(func, *args, **kwargs)
33
+ except koji.ActionNotAllowed as e:
34
+ # User doesn't have permission to do this, don't retry.
35
+ raise LogDetectiveConnectionError(e) from e
36
+ return result
37
+
38
+
39
+ async def get_failed_subtask_info(
40
+ koji_session: koji.ClientSession, task_id: int
41
+ ) -> dict[str, Any]:
42
+ """
43
+ If the provided task ID represents a task of type "build", this function
44
+ will return the buildArch or rebuildSRPM subtask that failed. If there is
45
+ more than one, it will return the first one found from the following
46
+ ordered list of processor architectures. If none is found among those
47
+ architectures, it will return the first failed architecture after a
48
+ standard sort.
49
+ * x86_64
50
+ * aarch64
51
+ * riscv
52
+ * ppc64le
53
+ * s390x
54
+
55
+ If the provided task ID represents a task of type "buildArch" or
56
+ "buildSRPMFromSCM" and has a task state of "FAILED", it will be returned
57
+ directly.
58
+
59
+ Any other task type will rase the UnknownTaskType exception.
60
+
61
+ If no task or subtask of the provided task is in the task state "FAILED",
62
+ this function will raise a NoFailedSubtask exception.
63
+ """
64
+
65
+ # Look up the current task first and check its type.
66
+ taskinfo = await call_koji(koji_session.getTaskInfo, task_id)
67
+ if not taskinfo:
68
+ raise KojiInvalidTaskID(f"Task {task_id} does not exist.")
69
+
70
+ # If the parent isn't FAILED, the children probably aren't either.
71
+ # There's one special case where the user may have canceled the
72
+ # overall task when one arch failed, so we should check that situation
73
+ # too.
74
+ if (
75
+ taskinfo["state"] != koji.TASK_STATES["FAILED"]
76
+ and taskinfo["state"] != koji.TASK_STATES["CANCELED"] # noqa: W503 flake vs lint
77
+ ):
78
+ raise UnknownTaskType(f"The primary task state was {taskinfo['state']}.")
79
+
80
+ # If the task is buildArch or buildSRPMFromSCM, we can return it directly.
81
+ if taskinfo["method"] in ["buildArch", "buildSRPMFromSCM"]:
82
+ return taskinfo
83
+
84
+ # Look up the subtasks for the task.
85
+ response = await asyncio.to_thread(koji_session.getTaskDescendents, task_id)
86
+ subtasks = response[f"{task_id}"]
87
+ arch_tasks = {}
88
+ for subtask in subtasks:
89
+ if (
90
+ subtask["method"] not in ["buildArch", "buildSRPMFromSCM"]
91
+ or subtask["state"] != koji.TASK_STATES["FAILED"] # noqa: W503 flake vs lint
92
+ ):
93
+ # Skip over any completed subtasks or non-build types
94
+ continue
95
+
96
+ arch_tasks[subtask["arch"]] = subtask
97
+
98
+ # Return the first architecture in the order of preference.
99
+ for arch in ["x86_64", "aarch64", "riscv", "ppc64le", "s390x"]:
100
+ if arch in arch_tasks:
101
+ return arch_tasks[arch]
102
+
103
+ # If none of those architectures were found, return the first one
104
+ # alphabetically
105
+ return arch_tasks[sorted(arch_tasks.keys())[0]]
106
+
107
+
108
+ async def get_failed_log_from_task(
109
+ koji_session: koji.ClientSession, task_id: int, max_size: int
110
+ ) -> Optional[tuple[str, str]]:
111
+ """
112
+ Get the failed log from a task.
113
+
114
+ If the log is too large, this function will raise a LogsTooLargeError.
115
+ If the log is missing or garbage-collected, this function will raise a
116
+ LogsMissingError.
117
+ """
118
+ taskinfo = await get_failed_subtask_info(koji_session, task_id)
119
+
120
+ # Read the failure reason from the task. Note that the taskinfo returned
121
+ # above may not be the same as passed in, so we need to use taskinfo["id"]
122
+ # to look up the correct failure reason.
123
+ result = await call_koji(
124
+ koji_session.getTaskResult, taskinfo["id"], raise_fault=False
125
+ )
126
+
127
+ # Examine the result message for the appropriate log file.
128
+ match = FAILURE_LOG_REGEX.search(result["faultString"])
129
+ if match:
130
+ failure_log_name = match.group(1)
131
+ else:
132
+ # The best thing we can do at this point is return the
133
+ # task_failed.log, since it will probably contain the most
134
+ # relevant information
135
+ return result["faultString"]
136
+
137
+ # Check that the size of the log file is not enormous
138
+ task_output = await call_koji(
139
+ koji_session.listTaskOutput, taskinfo["id"], stat=True
140
+ )
141
+ if not task_output:
142
+ # If the task has been garbage-collected, the task output will be empty
143
+ raise LogsMissingError(
144
+ "No logs attached to this task. Possibly garbage-collected."
145
+ )
146
+
147
+ if failure_log_name not in task_output:
148
+ # This shouldn't be possible, but we'll check anyway.
149
+ raise LogsMissingError(f"{failure_log_name} could not be located")
150
+
151
+ if int(task_output[failure_log_name]["st_size"]) > max_size:
152
+ raise LogsTooLargeError(
153
+ f"{task_output[failure_log_name]['st_size']} exceeds max size {max_size}"
154
+ )
155
+
156
+ log_contents = await call_koji(
157
+ koji_session.downloadTaskOutput, taskinfo["id"], failure_log_name
158
+ )
159
+ return failure_log_name, log_contents.decode("utf-8")