logdetective 0.6.0__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/prompts.yml +4 -4
- logdetective/server/compressors.py +144 -0
- logdetective/server/database/base.py +3 -0
- logdetective/server/database/models/__init__.py +21 -0
- logdetective/server/database/models/merge_request_jobs.py +515 -0
- logdetective/server/database/{models.py → models/metrics.py} +105 -100
- logdetective/server/metric.py +40 -16
- logdetective/server/models.py +12 -3
- logdetective/server/remote_log.py +109 -0
- logdetective/server/server.py +287 -136
- logdetective/utils.py +9 -37
- {logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/METADATA +11 -6
- logdetective-0.9.1.dist-info/RECORD +28 -0
- {logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/WHEEL +1 -1
- logdetective-0.6.0.dist-info/RECORD +0 -24
- {logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/LICENSE +0 -0
- {logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/entry_points.txt +0 -0
logdetective/server/server.py
CHANGED
|
@@ -3,15 +3,17 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import re
|
|
5
5
|
import zipfile
|
|
6
|
+
from enum import Enum
|
|
6
7
|
from contextlib import asynccontextmanager
|
|
7
8
|
from pathlib import Path, PurePath
|
|
8
9
|
from tempfile import TemporaryFile
|
|
9
|
-
from typing import List, Annotated, Tuple, Dict, Any
|
|
10
|
+
from typing import List, Annotated, Tuple, Dict, Any, Union
|
|
10
11
|
from io import BytesIO
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
import backoff
|
|
13
14
|
import matplotlib
|
|
14
15
|
import matplotlib.pyplot
|
|
16
|
+
from aiohttp import StreamReader
|
|
15
17
|
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header, Request
|
|
16
18
|
|
|
17
19
|
from fastapi.responses import StreamingResponse
|
|
@@ -21,20 +23,23 @@ import gitlab.v4
|
|
|
21
23
|
import gitlab.v4.objects
|
|
22
24
|
import jinja2
|
|
23
25
|
import aiohttp
|
|
26
|
+
import sqlalchemy
|
|
27
|
+
import sentry_sdk
|
|
28
|
+
|
|
29
|
+
import logdetective.server.database.base
|
|
24
30
|
|
|
25
31
|
from logdetective.extractors import DrainExtractor
|
|
26
32
|
from logdetective.utils import (
|
|
27
33
|
compute_certainty,
|
|
28
34
|
format_snippets,
|
|
29
35
|
load_prompts,
|
|
30
|
-
get_url_content,
|
|
31
36
|
)
|
|
32
37
|
from logdetective.server.utils import (
|
|
33
38
|
load_server_config,
|
|
34
39
|
get_log,
|
|
35
40
|
format_analyzed_snippets,
|
|
36
41
|
)
|
|
37
|
-
from logdetective.server.metric import track_request
|
|
42
|
+
from logdetective.server.metric import track_request, add_new_metrics, update_metrics
|
|
38
43
|
from logdetective.server.models import (
|
|
39
44
|
BuildLog,
|
|
40
45
|
JobHook,
|
|
@@ -44,8 +49,14 @@ from logdetective.server.models import (
|
|
|
44
49
|
AnalyzedSnippet,
|
|
45
50
|
TimePeriod,
|
|
46
51
|
)
|
|
47
|
-
from logdetective.server import plot
|
|
48
|
-
from logdetective.server.
|
|
52
|
+
from logdetective.server import plot as plot_engine
|
|
53
|
+
from logdetective.server.remote_log import RemoteLog
|
|
54
|
+
from logdetective.server.database.models import (
|
|
55
|
+
Comments,
|
|
56
|
+
EndpointType,
|
|
57
|
+
Forge,
|
|
58
|
+
)
|
|
59
|
+
from logdetective.server.database.models import AnalyzeRequestMetrics
|
|
49
60
|
|
|
50
61
|
LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
|
|
51
62
|
LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
|
|
@@ -61,6 +72,9 @@ FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
|
|
|
61
72
|
|
|
62
73
|
LOG = get_log(SERVER_CONFIG)
|
|
63
74
|
|
|
75
|
+
if sentry_dsn := SERVER_CONFIG.general.sentry_dsn:
|
|
76
|
+
sentry_sdk.init(dsn=str(sentry_dsn), traces_sample_rate=1.0)
|
|
77
|
+
|
|
64
78
|
|
|
65
79
|
@asynccontextmanager
|
|
66
80
|
async def lifespan(fapp: FastAPI):
|
|
@@ -72,6 +86,10 @@ async def lifespan(fapp: FastAPI):
|
|
|
72
86
|
total=int(LOG_SOURCE_REQUEST_TIMEOUT), connect=3.07
|
|
73
87
|
)
|
|
74
88
|
)
|
|
89
|
+
|
|
90
|
+
# Ensure that the database is initialized.
|
|
91
|
+
logdetective.server.database.base.init()
|
|
92
|
+
|
|
75
93
|
yield
|
|
76
94
|
await fapp.http.close()
|
|
77
95
|
|
|
@@ -119,16 +137,6 @@ app.gitlab_conn = gitlab.Gitlab(
|
|
|
119
137
|
)
|
|
120
138
|
|
|
121
139
|
|
|
122
|
-
async def process_url(http: aiohttp.ClientSession, url: str) -> str:
|
|
123
|
-
"""Validate log URL and return log text."""
|
|
124
|
-
try:
|
|
125
|
-
return await get_url_content(http, url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
|
|
126
|
-
except RuntimeError as ex:
|
|
127
|
-
raise HTTPException(
|
|
128
|
-
status_code=400, detail=f"We couldn't obtain the logs: {ex}"
|
|
129
|
-
) from ex
|
|
130
|
-
|
|
131
|
-
|
|
132
140
|
def mine_logs(log: str) -> List[Tuple[int, str]]:
|
|
133
141
|
"""Extract snippets from log text"""
|
|
134
142
|
extractor = DrainExtractor(
|
|
@@ -161,26 +169,19 @@ async def submit_to_llm_endpoint(
|
|
|
161
169
|
stream:
|
|
162
170
|
"""
|
|
163
171
|
LOG.debug("async request %s headers=%s data=%s", url, headers, data)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
)
|
|
178
|
-
except aiohttp.ClientResponseError as ex:
|
|
179
|
-
raise HTTPException(
|
|
180
|
-
status_code=400,
|
|
181
|
-
detail="HTTP Error while getting response from inference server "
|
|
182
|
-
f"[{ex.status}] {ex.message}",
|
|
183
|
-
) from ex
|
|
172
|
+
response = await http.post(
|
|
173
|
+
url,
|
|
174
|
+
headers=headers,
|
|
175
|
+
# we need to use the `json=` parameter here and let aiohttp
|
|
176
|
+
# handle the json-encoding
|
|
177
|
+
json=data,
|
|
178
|
+
timeout=int(LLM_CPP_SERVER_TIMEOUT),
|
|
179
|
+
# Docs says chunked takes int, but:
|
|
180
|
+
# DeprecationWarning: Chunk size is deprecated #1615
|
|
181
|
+
# So let's make sure we either put True or None here
|
|
182
|
+
chunked=True if stream else None,
|
|
183
|
+
raise_for_status=True,
|
|
184
|
+
)
|
|
184
185
|
if stream:
|
|
185
186
|
return response
|
|
186
187
|
try:
|
|
@@ -193,6 +194,34 @@ async def submit_to_llm_endpoint(
|
|
|
193
194
|
) from ex
|
|
194
195
|
|
|
195
196
|
|
|
197
|
+
def should_we_giveup(exc: aiohttp.ClientResponseError) -> bool:
|
|
198
|
+
"""
|
|
199
|
+
From backoff's docs:
|
|
200
|
+
|
|
201
|
+
> a function which accepts the exception and returns
|
|
202
|
+
> a truthy value if the exception should not be retried
|
|
203
|
+
"""
|
|
204
|
+
LOG.info("Should we give up on retrying error %s", exc)
|
|
205
|
+
return exc.status < 500
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def we_give_up(details: backoff._typing.Details):
|
|
209
|
+
"""
|
|
210
|
+
retries didn't work (or we got a different exc)
|
|
211
|
+
we give up and raise proper 500 for our API endpoint
|
|
212
|
+
"""
|
|
213
|
+
LOG.error("Inference error: %s", details["args"])
|
|
214
|
+
raise HTTPException(500, "Request to the inference API failed")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@backoff.on_exception(
|
|
218
|
+
backoff.expo,
|
|
219
|
+
aiohttp.ClientResponseError,
|
|
220
|
+
max_tries=3,
|
|
221
|
+
giveup=should_we_giveup,
|
|
222
|
+
raise_on_giveup=False,
|
|
223
|
+
on_giveup=we_give_up,
|
|
224
|
+
)
|
|
196
225
|
async def submit_text( # pylint: disable=R0913,R0917
|
|
197
226
|
http: aiohttp.ClientSession,
|
|
198
227
|
text: str,
|
|
@@ -265,7 +294,7 @@ async def submit_text_chat_completions( # pylint: disable=R0913,R0917
|
|
|
265
294
|
log_probs: int = 1,
|
|
266
295
|
stream: bool = False,
|
|
267
296
|
model: str = "default-model",
|
|
268
|
-
) -> Explanation:
|
|
297
|
+
) -> Union[Explanation, StreamReader]:
|
|
269
298
|
"""Submit prompt to OpenAI API /chat/completions endpoint.
|
|
270
299
|
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
271
300
|
log_probs: number of token choices to produce log probs for
|
|
@@ -295,10 +324,7 @@ async def submit_text_chat_completions( # pylint: disable=R0913,R0917
|
|
|
295
324
|
)
|
|
296
325
|
|
|
297
326
|
if stream:
|
|
298
|
-
return
|
|
299
|
-
text=response["choices"][0]["delta"]["content"],
|
|
300
|
-
logprobs=response["choices"][0]["logprobs"]["content"],
|
|
301
|
-
)
|
|
327
|
+
return response
|
|
302
328
|
return Explanation(
|
|
303
329
|
text=response["choices"][0]["message"]["content"],
|
|
304
330
|
logprobs=response["choices"][0]["logprobs"]["content"],
|
|
@@ -308,7 +334,7 @@ async def submit_text_chat_completions( # pylint: disable=R0913,R0917
|
|
|
308
334
|
@app.post("/analyze", response_model=Response)
|
|
309
335
|
@track_request()
|
|
310
336
|
async def analyze_log(
|
|
311
|
-
build_log: BuildLog,
|
|
337
|
+
build_log: BuildLog, http_session: aiohttp.ClientSession = Depends(get_http_session)
|
|
312
338
|
):
|
|
313
339
|
"""Provide endpoint for log file submission and analysis.
|
|
314
340
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
@@ -316,11 +342,12 @@ async def analyze_log(
|
|
|
316
342
|
Meaning that it must contain appropriate scheme, path and netloc,
|
|
317
343
|
while lacking result, params or query fields.
|
|
318
344
|
"""
|
|
319
|
-
|
|
345
|
+
remote_log = RemoteLog(build_log.url, http_session)
|
|
346
|
+
log_text = await remote_log.process_url()
|
|
320
347
|
log_summary = mine_logs(log_text)
|
|
321
348
|
log_summary = format_snippets(log_summary)
|
|
322
349
|
response = await submit_text(
|
|
323
|
-
|
|
350
|
+
http_session,
|
|
324
351
|
PROMPT_CONFIG.prompt_template.format(log_summary),
|
|
325
352
|
model=SERVER_CONFIG.inference.model,
|
|
326
353
|
max_tokens=SERVER_CONFIG.inference.max_tokens,
|
|
@@ -340,10 +367,10 @@ async def analyze_log(
|
|
|
340
367
|
return Response(explanation=response, response_certainty=certainty)
|
|
341
368
|
|
|
342
369
|
|
|
343
|
-
@app.post("/analyze/staged", response_model=StagedResponse)
|
|
344
370
|
@track_request()
|
|
371
|
+
@app.post("/analyze/staged", response_model=StagedResponse)
|
|
345
372
|
async def analyze_log_staged(
|
|
346
|
-
build_log: BuildLog,
|
|
373
|
+
build_log: BuildLog, http_session: aiohttp.ClientSession = Depends(get_http_session)
|
|
347
374
|
):
|
|
348
375
|
"""Provide endpoint for log file submission and analysis.
|
|
349
376
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
@@ -351,9 +378,10 @@ async def analyze_log_staged(
|
|
|
351
378
|
Meaning that it must contain appropriate scheme, path and netloc,
|
|
352
379
|
while lacking result, params or query fields.
|
|
353
380
|
"""
|
|
354
|
-
|
|
381
|
+
remote_log = RemoteLog(build_log.url, http_session)
|
|
382
|
+
log_text = await remote_log.process_url()
|
|
355
383
|
|
|
356
|
-
return await perform_staged_analysis(
|
|
384
|
+
return await perform_staged_analysis(http_session, log_text=log_text)
|
|
357
385
|
|
|
358
386
|
|
|
359
387
|
async def perform_staged_analysis(
|
|
@@ -413,7 +441,7 @@ async def perform_staged_analysis(
|
|
|
413
441
|
@app.post("/analyze/stream", response_class=StreamingResponse)
|
|
414
442
|
@track_request()
|
|
415
443
|
async def analyze_log_stream(
|
|
416
|
-
build_log: BuildLog,
|
|
444
|
+
build_log: BuildLog, http_session: aiohttp.ClientSession = Depends(get_http_session)
|
|
417
445
|
):
|
|
418
446
|
"""Stream response endpoint for Logdetective.
|
|
419
447
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
@@ -421,7 +449,8 @@ async def analyze_log_stream(
|
|
|
421
449
|
Meaning that it must contain appropriate scheme, path and netloc,
|
|
422
450
|
while lacking result, params or query fields.
|
|
423
451
|
"""
|
|
424
|
-
|
|
452
|
+
remote_log = RemoteLog(build_log.url, http_session)
|
|
453
|
+
log_text = await remote_log.process_url()
|
|
425
454
|
log_summary = mine_logs(log_text)
|
|
426
455
|
log_summary = format_snippets(log_summary)
|
|
427
456
|
headers = {"Content-Type": "application/json"}
|
|
@@ -429,20 +458,31 @@ async def analyze_log_stream(
|
|
|
429
458
|
if SERVER_CONFIG.inference.api_token:
|
|
430
459
|
headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
|
|
431
460
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
461
|
+
try:
|
|
462
|
+
stream = await submit_text_chat_completions(
|
|
463
|
+
http_session,
|
|
464
|
+
PROMPT_CONFIG.prompt_template.format(log_summary),
|
|
465
|
+
stream=True,
|
|
466
|
+
headers=headers,
|
|
467
|
+
model=SERVER_CONFIG.inference.model,
|
|
468
|
+
max_tokens=SERVER_CONFIG.inference.max_tokens,
|
|
469
|
+
)
|
|
470
|
+
except aiohttp.ClientResponseError as ex:
|
|
471
|
+
raise HTTPException(
|
|
472
|
+
status_code=400,
|
|
473
|
+
detail="HTTP Error while getting response from inference server "
|
|
474
|
+
f"[{ex.status}] {ex.message}",
|
|
475
|
+
) from ex
|
|
440
476
|
|
|
477
|
+
# we need to figure out a better response here, this is how it looks rn:
|
|
478
|
+
# b'data: {"choices":[{"finish_reason":"stop","index":0,"delta":{}}],
|
|
479
|
+
# "created":1744818071,"id":"chatcmpl-c9geTxNcQO7M9wR...
|
|
441
480
|
return StreamingResponse(stream)
|
|
442
481
|
|
|
443
482
|
|
|
444
483
|
@app.post("/webhook/gitlab/job_events")
|
|
445
484
|
async def receive_gitlab_job_event_webhook(
|
|
485
|
+
x_gitlab_instance: Annotated[str | None, Header()],
|
|
446
486
|
job_hook: JobHook,
|
|
447
487
|
background_tasks: BackgroundTasks,
|
|
448
488
|
http: aiohttp.ClientSession = Depends(get_http_session),
|
|
@@ -451,17 +491,27 @@ async def receive_gitlab_job_event_webhook(
|
|
|
451
491
|
https://docs.gitlab.com/user/project/integrations/webhook_events/#job-events
|
|
452
492
|
lists the full specification for the messages sent for job events."""
|
|
453
493
|
|
|
494
|
+
try:
|
|
495
|
+
forge = Forge(x_gitlab_instance)
|
|
496
|
+
except ValueError:
|
|
497
|
+
LOG.critical("%s is not a recognized forge. Ignoring.", x_gitlab_instance)
|
|
498
|
+
return BasicResponse(status_code=400)
|
|
499
|
+
|
|
454
500
|
# Handle the message in the background so we can return 200 immediately
|
|
455
|
-
background_tasks.add_task(process_gitlab_job_event, http, job_hook)
|
|
501
|
+
background_tasks.add_task(process_gitlab_job_event, http, forge, job_hook)
|
|
456
502
|
|
|
457
503
|
# No return value or body is required for a webhook.
|
|
458
504
|
# 204: No Content
|
|
459
505
|
return BasicResponse(status_code=204)
|
|
460
506
|
|
|
461
507
|
|
|
462
|
-
async def process_gitlab_job_event(
|
|
508
|
+
async def process_gitlab_job_event(
|
|
509
|
+
http: aiohttp.ClientSession,
|
|
510
|
+
forge: Forge,
|
|
511
|
+
job_hook: JobHook,
|
|
512
|
+
):
|
|
463
513
|
"""Handle a received job_event webhook from GitLab"""
|
|
464
|
-
LOG.debug("Received webhook message:\n%s", job_hook)
|
|
514
|
+
LOG.debug("Received webhook message from %s:\n%s", forge.value, job_hook)
|
|
465
515
|
|
|
466
516
|
# Look up the project this job belongs to
|
|
467
517
|
project = await asyncio.to_thread(app.gitlab_conn.projects.get, job_hook.project_id)
|
|
@@ -502,7 +552,14 @@ async def process_gitlab_job_event(http: aiohttp.ClientSession, job_hook):
|
|
|
502
552
|
|
|
503
553
|
# Submit log to Log Detective and await the results.
|
|
504
554
|
log_text = preprocessed_log.read().decode(encoding="utf-8")
|
|
555
|
+
metrics_id = await add_new_metrics(
|
|
556
|
+
api_name=EndpointType.ANALYZE_GITLAB_JOB,
|
|
557
|
+
url=log_url,
|
|
558
|
+
http_session=http,
|
|
559
|
+
compressed_log_content=RemoteLog.zip_text(log_text),
|
|
560
|
+
)
|
|
505
561
|
staged_response = await perform_staged_analysis(http, log_text=log_text)
|
|
562
|
+
update_metrics(metrics_id, staged_response)
|
|
506
563
|
preprocessed_log.close()
|
|
507
564
|
|
|
508
565
|
# check if this project is on the opt-in list for posting comments.
|
|
@@ -511,7 +568,17 @@ async def process_gitlab_job_event(http: aiohttp.ClientSession, job_hook):
|
|
|
511
568
|
return
|
|
512
569
|
|
|
513
570
|
# Add the Log Detective response as a comment to the merge request
|
|
514
|
-
await comment_on_mr(
|
|
571
|
+
await comment_on_mr(
|
|
572
|
+
forge,
|
|
573
|
+
project,
|
|
574
|
+
merge_request_iid,
|
|
575
|
+
job,
|
|
576
|
+
log_url,
|
|
577
|
+
staged_response,
|
|
578
|
+
metrics_id,
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
return staged_response
|
|
515
582
|
|
|
516
583
|
|
|
517
584
|
class LogsTooLargeError(RuntimeError):
|
|
@@ -520,7 +587,7 @@ class LogsTooLargeError(RuntimeError):
|
|
|
520
587
|
|
|
521
588
|
async def retrieve_and_preprocess_koji_logs(
|
|
522
589
|
http: aiohttp.ClientSession, job: gitlab.v4.objects.ProjectJob
|
|
523
|
-
):
|
|
590
|
+
): # pylint: disable=too-many-branches
|
|
524
591
|
"""Download logs from the merge request artifacts
|
|
525
592
|
|
|
526
593
|
This function will retrieve the build logs and do some minimal
|
|
@@ -549,13 +616,16 @@ async def retrieve_and_preprocess_koji_logs(
|
|
|
549
616
|
if zipinfo.filename.endswith("task_failed.log"):
|
|
550
617
|
# The koji logs store this file in two places: 1) in the
|
|
551
618
|
# directory with the failed architecture and 2) in the parent
|
|
552
|
-
# directory.
|
|
553
|
-
# directory, since the rest of the information is in the
|
|
554
|
-
# specific task directory.
|
|
619
|
+
# directory. Most of the time, we want to ignore the one in the
|
|
620
|
+
# parent directory, since the rest of the information is in the
|
|
621
|
+
# specific task directory. However, there are some situations
|
|
622
|
+
# where non-build failures (such as "Target build already exists")
|
|
623
|
+
# may be presented only at the top level.
|
|
555
624
|
# The paths look like `kojilogs/noarch-XXXXXX/task_failed.log`
|
|
556
625
|
# or `kojilogs/noarch-XXXXXX/x86_64-XXXXXX/task_failed.log`
|
|
557
626
|
path = PurePath(zipinfo.filename)
|
|
558
627
|
if len(path.parts) <= 3:
|
|
628
|
+
failed_arches["toplevel"] = path
|
|
559
629
|
continue
|
|
560
630
|
|
|
561
631
|
# Extract the architecture from the immediate parent path
|
|
@@ -584,30 +654,32 @@ async def retrieve_and_preprocess_koji_logs(
|
|
|
584
654
|
failed_arches[architecture] = PurePath(path.parent, failure_log_name)
|
|
585
655
|
|
|
586
656
|
if not failed_arches:
|
|
587
|
-
# No failed task found
|
|
657
|
+
# No failed task found in the sub-tasks.
|
|
588
658
|
raise FileNotFoundError("Could not detect failed architecture.")
|
|
589
659
|
|
|
590
|
-
#
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
660
|
+
# We only want to handle one arch, so we'll check them in order of
|
|
661
|
+
# "most to least likely for the maintainer to have access to hardware"
|
|
662
|
+
# This means: x86_64 > aarch64 > riscv > ppc64le > s390x
|
|
663
|
+
if "x86_64" in failed_arches:
|
|
664
|
+
failed_arch = "x86_64"
|
|
665
|
+
elif "aarch64" in failed_arches:
|
|
666
|
+
failed_arch = "aarch64"
|
|
667
|
+
elif "riscv" in failed_arches:
|
|
668
|
+
failed_arch = "riscv"
|
|
669
|
+
elif "ppc64le" in failed_arches:
|
|
670
|
+
failed_arch = "ppc64le"
|
|
671
|
+
elif "s390x" in failed_arches:
|
|
672
|
+
failed_arch = "s390x"
|
|
673
|
+
elif "noarch" in failed_arches:
|
|
674
|
+
# May have failed during BuildSRPMFromSCM phase
|
|
675
|
+
failed_arch = "noarch"
|
|
676
|
+
elif "toplevel" in failed_arches:
|
|
677
|
+
# Probably a Koji-specific error, not a build error
|
|
678
|
+
failed_arch = "toplevel"
|
|
594
679
|
else:
|
|
595
|
-
# We
|
|
596
|
-
#
|
|
597
|
-
|
|
598
|
-
if "x86_64" in failed_arches:
|
|
599
|
-
failed_arch = "x86_64"
|
|
600
|
-
elif "aarch64" in failed_arches:
|
|
601
|
-
failed_arch = "aarch64"
|
|
602
|
-
elif "ppc64le" in failed_arches:
|
|
603
|
-
failed_arch = "ppc64le"
|
|
604
|
-
elif "s390x" in failed_arches:
|
|
605
|
-
failed_arch = "s390x"
|
|
606
|
-
else:
|
|
607
|
-
# It should be impossible for us to get "noarch" here, since
|
|
608
|
-
# the only way that should happen is for a single architecture
|
|
609
|
-
# build.
|
|
610
|
-
raise FileNotFoundError("No failed architecture detected.")
|
|
680
|
+
# We have one or more architectures that we don't know about? Just
|
|
681
|
+
# pick the first alphabetically.
|
|
682
|
+
failed_arch = sorted(list(failed_arches.keys()))[0]
|
|
611
683
|
|
|
612
684
|
LOG.debug("Failed architecture: %s", failed_arch)
|
|
613
685
|
|
|
@@ -620,7 +692,10 @@ async def retrieve_and_preprocess_koji_logs(
|
|
|
620
692
|
return log_url, artifacts_zip.open(log_path)
|
|
621
693
|
|
|
622
694
|
|
|
623
|
-
async def check_artifacts_file_size(
|
|
695
|
+
async def check_artifacts_file_size(
|
|
696
|
+
http: aiohttp.ClientSession,
|
|
697
|
+
job: gitlab.v4.objects.ProjectJob,
|
|
698
|
+
):
|
|
624
699
|
"""Method to determine if the artifacts are too large to process"""
|
|
625
700
|
# First, make sure that the artifacts are of a reasonable size. The
|
|
626
701
|
# zipped artifact collection will be stored in memory below. The
|
|
@@ -651,12 +726,14 @@ async def check_artifacts_file_size(http: aiohttp.ClientSession, job):
|
|
|
651
726
|
return content_length <= SERVER_CONFIG.gitlab.max_artifact_size
|
|
652
727
|
|
|
653
728
|
|
|
654
|
-
async def comment_on_mr(
|
|
729
|
+
async def comment_on_mr( # pylint: disable=too-many-arguments disable=too-many-positional-arguments
|
|
730
|
+
forge: Forge,
|
|
655
731
|
project: gitlab.v4.objects.Project,
|
|
656
732
|
merge_request_iid: int,
|
|
657
733
|
job: gitlab.v4.objects.ProjectJob,
|
|
658
734
|
log_url: str,
|
|
659
735
|
response: StagedResponse,
|
|
736
|
+
metrics_id: int,
|
|
660
737
|
):
|
|
661
738
|
"""Add the Log Detective response as a comment to the merge request"""
|
|
662
739
|
LOG.debug(
|
|
@@ -666,6 +743,10 @@ async def comment_on_mr(
|
|
|
666
743
|
response.explanation.text,
|
|
667
744
|
)
|
|
668
745
|
|
|
746
|
+
# First, we'll see if there's an existing comment on this Merge Request
|
|
747
|
+
# and wrap it in <details></details> to reduce noise.
|
|
748
|
+
await suppress_latest_comment(forge, project, merge_request_iid)
|
|
749
|
+
|
|
669
750
|
# Get the formatted short comment.
|
|
670
751
|
short_comment = await generate_mr_comment(job, log_url, response, full=False)
|
|
671
752
|
|
|
@@ -696,6 +777,67 @@ async def comment_on_mr(
|
|
|
696
777
|
await asyncio.sleep(5)
|
|
697
778
|
await asyncio.to_thread(note.save)
|
|
698
779
|
|
|
780
|
+
# Save the new comment to the database
|
|
781
|
+
try:
|
|
782
|
+
metrics = AnalyzeRequestMetrics.get_metric_by_id(metrics_id)
|
|
783
|
+
Comments.create(
|
|
784
|
+
forge,
|
|
785
|
+
project.id,
|
|
786
|
+
merge_request_iid,
|
|
787
|
+
job.id,
|
|
788
|
+
discussion.id,
|
|
789
|
+
metrics,
|
|
790
|
+
)
|
|
791
|
+
except sqlalchemy.exc.IntegrityError:
|
|
792
|
+
# We most likely attempted to save a new comment for the same
|
|
793
|
+
# build job. This is somewhat common during development when we're
|
|
794
|
+
# submitting requests manually. It shouldn't really happen in
|
|
795
|
+
# production.
|
|
796
|
+
if not SERVER_CONFIG.general.devmode:
|
|
797
|
+
raise
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
async def suppress_latest_comment(
|
|
801
|
+
gitlab_instance: str,
|
|
802
|
+
project: gitlab.v4.objects.Project,
|
|
803
|
+
merge_request_iid: int,
|
|
804
|
+
) -> None:
|
|
805
|
+
"""Look up the latest comment on this Merge Request, if any, and wrap it
|
|
806
|
+
in a <details></details> block with a comment indicating that it has been
|
|
807
|
+
superseded by a new push."""
|
|
808
|
+
|
|
809
|
+
# Ask the database for the last known comment for this MR
|
|
810
|
+
previous_comment = Comments.get_latest_comment(
|
|
811
|
+
gitlab_instance, project.id, merge_request_iid
|
|
812
|
+
)
|
|
813
|
+
|
|
814
|
+
if previous_comment is None:
|
|
815
|
+
# No existing comment, so nothing to do.
|
|
816
|
+
return
|
|
817
|
+
|
|
818
|
+
# Retrieve its content from the Gitlab API
|
|
819
|
+
|
|
820
|
+
# Look up the merge request
|
|
821
|
+
merge_request = await asyncio.to_thread(
|
|
822
|
+
project.mergerequests.get, merge_request_iid
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
# Find the discussion matching the latest comment ID
|
|
826
|
+
discussion = await asyncio.to_thread(
|
|
827
|
+
merge_request.discussions.get, previous_comment.comment_id
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
# Get the ID of the first note
|
|
831
|
+
note_id = discussion.attributes["notes"][0]["id"]
|
|
832
|
+
note = discussion.notes.get(note_id)
|
|
833
|
+
|
|
834
|
+
# Wrap the note in <details>, indicating why.
|
|
835
|
+
note.body = (
|
|
836
|
+
"This comment has been superseded by a newer "
|
|
837
|
+
f"Log Detective analysis.\n<details>\n{note.body}\n</details>"
|
|
838
|
+
)
|
|
839
|
+
await asyncio.to_thread(note.save)
|
|
840
|
+
|
|
699
841
|
|
|
700
842
|
async def generate_mr_comment(
|
|
701
843
|
job: gitlab.v4.objects.ProjectJob,
|
|
@@ -770,61 +912,70 @@ def _multiple_svg_figures_response(figures: list[matplotlib.figure.Figure]):
|
|
|
770
912
|
return BasicResponse(content=html_content, media_type="text/html")
|
|
771
913
|
|
|
772
914
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
"""Show statistics for requests and responses in the given period of time
|
|
776
|
-
for the /analyze API endpoint."""
|
|
777
|
-
fig_requests = plot.requests_per_time(period_since_now, EndpointType.ANALYZE)
|
|
778
|
-
fig_responses = plot.average_time_per_responses(
|
|
779
|
-
period_since_now, EndpointType.ANALYZE
|
|
780
|
-
)
|
|
781
|
-
return _multiple_svg_figures_response([fig_requests, fig_responses])
|
|
915
|
+
class MetricRoute(str, Enum):
|
|
916
|
+
"""Routes for metrics"""
|
|
782
917
|
|
|
918
|
+
ANALYZE = "analyze"
|
|
919
|
+
ANALYZE_STAGED = "analyze-staged"
|
|
920
|
+
ANALYZE_GITLAB_JOB = "analyze-gitlab"
|
|
783
921
|
|
|
784
|
-
@app.get("/metrics/analyze/requests", response_class=StreamingResponse)
|
|
785
|
-
async def show_analyze_requests(period_since_now: TimePeriod = Depends(TimePeriod)):
|
|
786
|
-
"""Show statistics for the requests received in the given period of time
|
|
787
|
-
for the /analyze API endpoint."""
|
|
788
|
-
fig = plot.requests_per_time(period_since_now, EndpointType.ANALYZE)
|
|
789
|
-
return _svg_figure_response(fig)
|
|
790
922
|
|
|
923
|
+
class Plot(str, Enum):
|
|
924
|
+
"""Type of served plots"""
|
|
791
925
|
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
""
|
|
795
|
-
for the /analyze API endpoint."""
|
|
796
|
-
fig = plot.average_time_per_responses(period_since_now, EndpointType.ANALYZE)
|
|
797
|
-
return _svg_figure_response(fig)
|
|
926
|
+
REQUESTS = "requests"
|
|
927
|
+
RESPONSES = "responses"
|
|
928
|
+
BOTH = ""
|
|
798
929
|
|
|
799
930
|
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
for the /analyze/staged API endpoint."""
|
|
806
|
-
fig_requests = plot.requests_per_time(period_since_now, EndpointType.ANALYZE_STAGED)
|
|
807
|
-
fig_responses = plot.average_time_per_responses(
|
|
808
|
-
period_since_now, EndpointType.ANALYZE_STAGED
|
|
809
|
-
)
|
|
810
|
-
return _multiple_svg_figures_response([fig_requests, fig_responses])
|
|
931
|
+
ROUTE_TO_ENDPOINT_TYPES = {
|
|
932
|
+
MetricRoute.ANALYZE: EndpointType.ANALYZE,
|
|
933
|
+
MetricRoute.ANALYZE_STAGED: EndpointType.ANALYZE_STAGED,
|
|
934
|
+
MetricRoute.ANALYZE_GITLAB_JOB: EndpointType.ANALYZE_GITLAB_JOB,
|
|
935
|
+
}
|
|
811
936
|
|
|
812
937
|
|
|
813
|
-
@app.get("/metrics/
|
|
814
|
-
|
|
938
|
+
@app.get("/metrics/{route}/", response_class=StreamingResponse)
|
|
939
|
+
@app.get("/metrics/{route}/{plot}", response_class=StreamingResponse)
|
|
940
|
+
async def get_metrics(
|
|
941
|
+
route: MetricRoute,
|
|
942
|
+
plot: Plot = Plot.BOTH,
|
|
815
943
|
period_since_now: TimePeriod = Depends(TimePeriod),
|
|
816
944
|
):
|
|
817
|
-
"""
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
945
|
+
"""Get an handler for visualize statistics for the specified endpoint and plot."""
|
|
946
|
+
endpoint_type = ROUTE_TO_ENDPOINT_TYPES[route]
|
|
947
|
+
|
|
948
|
+
async def handler():
|
|
949
|
+
"""Show statistics for the specified endpoint and plot."""
|
|
950
|
+
if plot == Plot.REQUESTS:
|
|
951
|
+
fig = plot_engine.requests_per_time(period_since_now, endpoint_type)
|
|
952
|
+
return _svg_figure_response(fig)
|
|
953
|
+
if plot == Plot.RESPONSES:
|
|
954
|
+
fig = plot_engine.average_time_per_responses(
|
|
955
|
+
period_since_now, endpoint_type
|
|
956
|
+
)
|
|
957
|
+
return _svg_figure_response(fig)
|
|
958
|
+
# BOTH
|
|
959
|
+
fig_requests = plot_engine.requests_per_time(period_since_now, endpoint_type)
|
|
960
|
+
fig_responses = plot_engine.average_time_per_responses(
|
|
961
|
+
period_since_now, endpoint_type
|
|
962
|
+
)
|
|
963
|
+
return _multiple_svg_figures_response([fig_requests, fig_responses])
|
|
964
|
+
|
|
965
|
+
descriptions = {
|
|
966
|
+
Plot.REQUESTS: (
|
|
967
|
+
"Show statistics for the requests received in the given period of time "
|
|
968
|
+
f"for the /{endpoint_type.value} API endpoint."
|
|
969
|
+
),
|
|
970
|
+
Plot.RESPONSES: (
|
|
971
|
+
"Show statistics for responses given in the specified period of time "
|
|
972
|
+
f"for the /{endpoint_type.value} API endpoint."
|
|
973
|
+
),
|
|
974
|
+
Plot.BOTH: (
|
|
975
|
+
"Show statistics for requests and responses in the given period of time "
|
|
976
|
+
f"for the /{endpoint_type.value} API endpoint."
|
|
977
|
+
),
|
|
978
|
+
}
|
|
979
|
+
handler.__doc__ = descriptions[plot]
|
|
822
980
|
|
|
823
|
-
|
|
824
|
-
async def show_analyze_staged_responses(
|
|
825
|
-
period_since_now: TimePeriod = Depends(TimePeriod),
|
|
826
|
-
):
|
|
827
|
-
"""Show statistics for responses given in the specified period of time
|
|
828
|
-
for the /analyze/staged API endpoint."""
|
|
829
|
-
fig = plot.average_time_per_responses(period_since_now, EndpointType.ANALYZE_STAGED)
|
|
830
|
-
return _svg_figure_response(fig)
|
|
981
|
+
return await handler()
|