logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/constants.py +33 -12
- logdetective/extractors.py +137 -68
- logdetective/logdetective.py +102 -33
- logdetective/models.py +99 -0
- logdetective/prompts-summary-first.yml +20 -0
- logdetective/prompts-summary-only.yml +13 -0
- logdetective/prompts.yml +90 -0
- logdetective/remote_log.py +67 -0
- logdetective/server/compressors.py +186 -0
- logdetective/server/config.py +78 -0
- logdetective/server/database/base.py +34 -26
- logdetective/server/database/models/__init__.py +33 -0
- logdetective/server/database/models/exceptions.py +17 -0
- logdetective/server/database/models/koji.py +143 -0
- logdetective/server/database/models/merge_request_jobs.py +623 -0
- logdetective/server/database/models/metrics.py +427 -0
- logdetective/server/emoji.py +148 -0
- logdetective/server/exceptions.py +37 -0
- logdetective/server/gitlab.py +451 -0
- logdetective/server/koji.py +159 -0
- logdetective/server/llm.py +309 -0
- logdetective/server/metric.py +75 -30
- logdetective/server/models.py +426 -23
- logdetective/server/plot.py +432 -0
- logdetective/server/server.py +580 -468
- logdetective/server/templates/base_response.html.j2 +59 -0
- logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
- logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
- logdetective/server/utils.py +98 -32
- logdetective/skip_snippets.yml +12 -0
- logdetective/utils.py +187 -73
- logdetective-2.11.0.dist-info/METADATA +568 -0
- logdetective-2.11.0.dist-info/RECORD +40 -0
- {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
- logdetective/server/database/models.py +0 -88
- logdetective-0.4.0.dist-info/METADATA +0 -333
- logdetective-0.4.0.dist-info/RECORD +0 -19
- {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
- {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
logdetective/server/server.py
CHANGED
|
@@ -1,596 +1,708 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import json
|
|
3
1
|
import os
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from typing import
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
2
|
+
import asyncio
|
|
3
|
+
import datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from contextlib import asynccontextmanager
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
from io import BytesIO
|
|
8
|
+
|
|
9
|
+
import matplotlib
|
|
10
|
+
import matplotlib.figure
|
|
11
|
+
import matplotlib.pyplot
|
|
12
|
+
from fastapi import (
|
|
13
|
+
FastAPI,
|
|
14
|
+
HTTPException,
|
|
15
|
+
BackgroundTasks,
|
|
16
|
+
Depends,
|
|
17
|
+
Header,
|
|
18
|
+
Path,
|
|
19
|
+
Request,
|
|
20
|
+
)
|
|
12
21
|
|
|
13
22
|
from fastapi.responses import StreamingResponse
|
|
14
23
|
from fastapi.responses import Response as BasicResponse
|
|
15
|
-
import
|
|
16
|
-
import
|
|
24
|
+
import aiohttp
|
|
25
|
+
import sentry_sdk
|
|
26
|
+
|
|
27
|
+
from logdetective.server.exceptions import KojiInvalidTaskID
|
|
17
28
|
|
|
18
|
-
from logdetective.
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
29
|
+
from logdetective.server.database.models.koji import KojiTaskAnalysis
|
|
30
|
+
from logdetective.server.database.models.exceptions import (
|
|
31
|
+
KojiTaskAnalysisTimeoutError,
|
|
32
|
+
KojiTaskNotAnalyzedError,
|
|
33
|
+
KojiTaskNotFoundError,
|
|
22
34
|
)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
35
|
+
|
|
36
|
+
import logdetective.server.database.base
|
|
37
|
+
|
|
38
|
+
from logdetective.server.config import SERVER_CONFIG, LOG
|
|
39
|
+
from logdetective.server.koji import (
|
|
40
|
+
get_failed_log_from_task as get_failed_log_from_koji_task,
|
|
41
|
+
)
|
|
42
|
+
from logdetective.remote_log import RemoteLog
|
|
43
|
+
from logdetective.server.llm import (
|
|
44
|
+
perform_staged_analysis,
|
|
45
|
+
perfrom_analysis,
|
|
46
|
+
perform_analyis_stream,
|
|
29
47
|
)
|
|
30
|
-
from logdetective.server.
|
|
31
|
-
from logdetective.server.metric import track_request
|
|
48
|
+
from logdetective.server.gitlab import process_gitlab_job_event
|
|
49
|
+
from logdetective.server.metric import track_request, add_new_metrics, update_metrics
|
|
32
50
|
from logdetective.server.models import (
|
|
33
51
|
BuildLog,
|
|
52
|
+
EmojiHook,
|
|
34
53
|
JobHook,
|
|
54
|
+
KojiInstanceConfig,
|
|
55
|
+
KojiStagedResponse,
|
|
35
56
|
Response,
|
|
36
57
|
StagedResponse,
|
|
37
|
-
|
|
38
|
-
|
|
58
|
+
TimePeriod,
|
|
59
|
+
)
|
|
60
|
+
from logdetective.server import plot as plot_engine
|
|
61
|
+
from logdetective.server.database.models import (
|
|
62
|
+
EndpointType,
|
|
63
|
+
Forge,
|
|
64
|
+
)
|
|
65
|
+
from logdetective.server.emoji import (
|
|
66
|
+
collect_emojis,
|
|
67
|
+
collect_emojis_for_mr,
|
|
39
68
|
)
|
|
69
|
+
from logdetective.server.compressors import RemoteLogCompressor
|
|
70
|
+
from logdetective.server.utils import get_version
|
|
71
|
+
|
|
40
72
|
|
|
41
|
-
LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
|
|
42
|
-
LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
|
|
43
|
-
LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
|
|
44
|
-
LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
|
|
45
73
|
LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
|
|
46
74
|
API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
|
|
47
|
-
SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
|
|
48
|
-
LLM_API_TOKEN = os.environ.get("LLM_API_TOKEN", None)
|
|
49
75
|
|
|
50
|
-
SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
|
|
51
76
|
|
|
52
|
-
|
|
53
|
-
|
|
77
|
+
if sentry_dsn := SERVER_CONFIG.general.sentry_dsn:
|
|
78
|
+
sentry_sdk.init(dsn=str(sentry_dsn), traces_sample_rate=1.0)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@asynccontextmanager
|
|
82
|
+
async def lifespan(fapp: FastAPI):
|
|
83
|
+
"""
|
|
84
|
+
Establish one HTTP session
|
|
85
|
+
"""
|
|
86
|
+
fapp.http = aiohttp.ClientSession(
|
|
87
|
+
timeout=aiohttp.ClientTimeout(
|
|
88
|
+
total=int(LOG_SOURCE_REQUEST_TIMEOUT), connect=3.07
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Ensure that the database is initialized.
|
|
93
|
+
await logdetective.server.database.base.init()
|
|
94
|
+
|
|
95
|
+
# Start the background task scheduler for collecting emojis
|
|
96
|
+
asyncio.create_task(schedule_collect_emojis_task())
|
|
97
|
+
|
|
98
|
+
yield
|
|
54
99
|
|
|
55
|
-
|
|
100
|
+
await fapp.http.close()
|
|
56
101
|
|
|
57
102
|
|
|
58
|
-
def
|
|
103
|
+
async def get_http_session(request: Request) -> aiohttp.ClientSession:
|
|
59
104
|
"""
|
|
60
|
-
|
|
105
|
+
Return the single aiohttp ClientSession for this app
|
|
106
|
+
"""
|
|
107
|
+
return request.app.http
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def requires_token_when_set(authorization: Annotated[str | None, Header()] = None):
|
|
111
|
+
"""
|
|
112
|
+
FastAPI Depend function that expects a header named Authorization
|
|
61
113
|
|
|
62
114
|
If LOGDETECTIVE_TOKEN env var is set, validate the client-supplied token
|
|
63
115
|
otherwise ignore it
|
|
64
116
|
"""
|
|
65
117
|
if not API_TOKEN:
|
|
66
|
-
LOG.info("LOGDETECTIVE_TOKEN env var not set,
|
|
118
|
+
LOG.info("LOGDETECTIVE_TOKEN env var not set, authorization disabled")
|
|
67
119
|
# no token required, means local dev environment
|
|
68
120
|
return
|
|
69
|
-
|
|
70
|
-
if authentication:
|
|
121
|
+
if authorization:
|
|
71
122
|
try:
|
|
72
|
-
token =
|
|
73
|
-
except (ValueError, IndexError):
|
|
123
|
+
token = authorization.split(" ", 1)[1]
|
|
124
|
+
except (ValueError, IndexError) as ex:
|
|
74
125
|
LOG.warning(
|
|
75
|
-
"
|
|
76
|
-
|
|
126
|
+
"Authorization header has invalid structure '%s', it should be 'Bearer TOKEN'",
|
|
127
|
+
authorization,
|
|
77
128
|
)
|
|
78
129
|
# eat the exception and raise 401 below
|
|
79
|
-
|
|
130
|
+
raise HTTPException(
|
|
131
|
+
status_code=401,
|
|
132
|
+
detail=f"Invalid authorization, HEADER '{authorization}' not valid.",
|
|
133
|
+
) from ex
|
|
80
134
|
if token == API_TOKEN:
|
|
81
135
|
return
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
)
|
|
85
|
-
raise HTTPException(status_code=401, detail=
|
|
136
|
+
LOG.info("Provided token '%s' does not match expected value.", token)
|
|
137
|
+
raise HTTPException(status_code=401, detail=f"Token '{token}' not valid.")
|
|
138
|
+
LOG.error("No authorization header provided but LOGDETECTIVE_TOKEN env var is set")
|
|
139
|
+
raise HTTPException(status_code=401, detail="No token provided.")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
app = FastAPI(
|
|
143
|
+
title="Log Detective",
|
|
144
|
+
contact={
|
|
145
|
+
"name": "Log Detective developers",
|
|
146
|
+
"url": "https://github.com/fedora-copr/logdetective",
|
|
147
|
+
"email": "copr-devel@lists.fedorahosted.org"
|
|
148
|
+
},
|
|
149
|
+
license_info={
|
|
150
|
+
"name": "Apache-2.0",
|
|
151
|
+
"url": "https://www.apache.org/licenses/LICENSE-2.0.html",
|
|
152
|
+
},
|
|
153
|
+
version=get_version(),
|
|
154
|
+
dependencies=[Depends(requires_token_when_set)], lifespan=lifespan)
|
|
86
155
|
|
|
87
156
|
|
|
88
|
-
app =
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
)
|
|
92
|
-
|
|
157
|
+
@app.post("/analyze", response_model=Response)
|
|
158
|
+
@track_request()
|
|
159
|
+
async def analyze_log(
|
|
160
|
+
build_log: BuildLog, http_session: aiohttp.ClientSession = Depends(get_http_session)
|
|
161
|
+
):
|
|
162
|
+
"""Provide endpoint for log file submission and analysis.
|
|
163
|
+
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
164
|
+
URL must be valid for the request to be passed to the LLM server.
|
|
165
|
+
Meaning that it must contain appropriate scheme, path and netloc,
|
|
166
|
+
while lacking result, params or query fields.
|
|
167
|
+
"""
|
|
168
|
+
remote_log = RemoteLog(build_log.url, http_session)
|
|
169
|
+
log_text = await remote_log.process_url()
|
|
93
170
|
|
|
94
|
-
|
|
95
|
-
"""Validate log URL and return log text."""
|
|
96
|
-
if validate_url(url=url):
|
|
97
|
-
try:
|
|
98
|
-
log_request = requests.get(url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
|
|
99
|
-
except requests.RequestException as ex:
|
|
100
|
-
raise HTTPException(
|
|
101
|
-
status_code=400, detail=f"We couldn't obtain the logs: {ex}"
|
|
102
|
-
) from ex
|
|
171
|
+
return await perfrom_analysis(log_text)
|
|
103
172
|
|
|
104
|
-
if not log_request.ok:
|
|
105
|
-
raise HTTPException(
|
|
106
|
-
status_code=400,
|
|
107
|
-
detail="Something went wrong while getting the logs: "
|
|
108
|
-
f"[{log_request.status_code}] {log_request.text}",
|
|
109
|
-
)
|
|
110
|
-
else:
|
|
111
|
-
LOG.error("Invalid URL received ")
|
|
112
|
-
raise HTTPException(status_code=400, detail=f"Invalid log URL: {url}")
|
|
113
173
|
|
|
114
|
-
|
|
174
|
+
@app.post("/analyze/staged", response_model=StagedResponse)
|
|
175
|
+
@track_request()
|
|
176
|
+
async def analyze_log_staged(
|
|
177
|
+
build_log: BuildLog, http_session: aiohttp.ClientSession = Depends(get_http_session)
|
|
178
|
+
):
|
|
179
|
+
"""Provide endpoint for log file submission and analysis.
|
|
180
|
+
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
181
|
+
URL must be valid for the request to be passed to the LLM server.
|
|
182
|
+
Meaning that it must contain appropriate scheme, path and netloc,
|
|
183
|
+
while lacking result, params or query fields.
|
|
184
|
+
"""
|
|
185
|
+
remote_log = RemoteLog(build_log.url, http_session)
|
|
186
|
+
log_text = await remote_log.process_url()
|
|
115
187
|
|
|
188
|
+
return await perform_staged_analysis(log_text)
|
|
116
189
|
|
|
117
|
-
def mine_logs(log: str) -> List[Tuple[int, str]]:
|
|
118
|
-
"""Extract snippets from log text"""
|
|
119
|
-
extractor = DrainExtractor(
|
|
120
|
-
verbose=True, context=True, max_clusters=SERVER_CONFIG.extractor.max_clusters
|
|
121
|
-
)
|
|
122
190
|
|
|
123
|
-
|
|
124
|
-
|
|
191
|
+
@app.get(
|
|
192
|
+
"/analyze/rpmbuild/koji/{koji_instance}/{task_id}",
|
|
193
|
+
response_model=KojiStagedResponse,
|
|
194
|
+
)
|
|
195
|
+
async def get_koji_task_analysis(
|
|
196
|
+
koji_instance: Annotated[str, Path(title="The Koji instance to use")],
|
|
197
|
+
task_id: Annotated[int, Path(title="The task ID to analyze")],
|
|
198
|
+
x_koji_token: Annotated[str, Header()] = "",
|
|
199
|
+
):
|
|
200
|
+
"""Provide endpoint for retrieving log file analysis of a Koji task"""
|
|
125
201
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
202
|
+
try:
|
|
203
|
+
koji_instance_config = SERVER_CONFIG.koji.instances[koji_instance]
|
|
204
|
+
except KeyError:
|
|
205
|
+
# This Koji instance is not configured, so we will return a 404.
|
|
206
|
+
return BasicResponse(status_code=404, content="Unknown Koji instance.")
|
|
207
|
+
|
|
208
|
+
# This should always be available in a production environment.
|
|
209
|
+
# In a testing environment, the tokens list may be empty, in which case
|
|
210
|
+
# it will just proceed.
|
|
211
|
+
if koji_instance_config.tokens and x_koji_token not in koji_instance_config.tokens:
|
|
212
|
+
# (Unauthorized) error.
|
|
213
|
+
return BasicResponse(x_koji_token, status_code=401)
|
|
214
|
+
|
|
215
|
+
# Check if we have a response for this task
|
|
216
|
+
try:
|
|
217
|
+
return KojiTaskAnalysis.get_response_by_task_id(task_id)
|
|
218
|
+
|
|
219
|
+
except (KojiInvalidTaskID, KojiTaskNotFoundError):
|
|
220
|
+
# This task ID is malformed, out of range, or not found, so we will
|
|
221
|
+
# return a 404.
|
|
222
|
+
return BasicResponse(status_code=404)
|
|
223
|
+
|
|
224
|
+
except KojiTaskAnalysisTimeoutError:
|
|
225
|
+
# Task analysis has timed out, so we assume that the request was lost
|
|
226
|
+
# and that we need to start another analysis.
|
|
227
|
+
# There isn't a fully-appropriate error code for this, so we'll use
|
|
228
|
+
# 503 (Service Unavailable) as our best option.
|
|
229
|
+
return BasicResponse(
|
|
230
|
+
status_code=503, content="Task analysis timed out, please retry."
|
|
231
|
+
)
|
|
129
232
|
|
|
130
|
-
|
|
233
|
+
except KojiTaskNotAnalyzedError:
|
|
234
|
+
# Its still running, so we need to return a 202
|
|
235
|
+
# (Accepted) code to let the client know to keep waiting.
|
|
236
|
+
return BasicResponse(
|
|
237
|
+
status_code=202, content=f"Analysis still in progress for task {task_id}"
|
|
238
|
+
)
|
|
131
239
|
|
|
132
240
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
241
|
+
@app.post(
|
|
242
|
+
"/analyze/rpmbuild/koji/{koji_instance}/{task_id}",
|
|
243
|
+
response_model=KojiStagedResponse,
|
|
244
|
+
)
|
|
245
|
+
async def analyze_rpmbuild_koji(
|
|
246
|
+
koji_instance: Annotated[str, Path(title="The Koji instance to use")],
|
|
247
|
+
task_id: Annotated[int, Path(title="The task ID to analyze")],
|
|
248
|
+
x_koji_token: Annotated[str, Header()] = "",
|
|
249
|
+
x_koji_callback: Annotated[str, Header()] = "",
|
|
250
|
+
background_tasks: BackgroundTasks = BackgroundTasks(),
|
|
251
|
+
):
|
|
252
|
+
"""Provide endpoint for retrieving log file analysis of a Koji task"""
|
|
138
253
|
|
|
139
|
-
url:
|
|
140
|
-
data:
|
|
141
|
-
headers:
|
|
142
|
-
stream:
|
|
143
|
-
"""
|
|
144
254
|
try:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
raise HTTPException(
|
|
161
|
-
status_code=400,
|
|
162
|
-
detail="Something went wrong while getting a response from the llama server: "
|
|
163
|
-
f"[{response.status_code}] {response.text}",
|
|
164
|
-
)
|
|
165
|
-
try:
|
|
166
|
-
response = json.loads(response.text)
|
|
167
|
-
except UnicodeDecodeError as ex:
|
|
168
|
-
LOG.error("Error encountered while parsing llama server response: %s", ex)
|
|
169
|
-
raise HTTPException(
|
|
170
|
-
status_code=400,
|
|
171
|
-
detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
|
|
172
|
-
) from ex
|
|
255
|
+
koji_instance_config = SERVER_CONFIG.koji.instances[koji_instance]
|
|
256
|
+
except KeyError:
|
|
257
|
+
# This Koji instance is not configured, so we will return a 404.
|
|
258
|
+
return BasicResponse(status_code=404, content="Unknown Koji instance.")
|
|
259
|
+
|
|
260
|
+
# This should always be available in a production environment.
|
|
261
|
+
# In a testing environment, the tokens list may be empty, in which case
|
|
262
|
+
# it will just proceed.
|
|
263
|
+
if koji_instance_config.tokens and x_koji_token not in koji_instance_config.tokens:
|
|
264
|
+
# (Unauthorized) error.
|
|
265
|
+
return BasicResponse(x_koji_token, status_code=401)
|
|
266
|
+
|
|
267
|
+
# Check if we already have a response for this task
|
|
268
|
+
try:
|
|
269
|
+
response = KojiTaskAnalysis.get_response_by_task_id(task_id)
|
|
173
270
|
|
|
174
|
-
|
|
271
|
+
except KojiInvalidTaskID:
|
|
272
|
+
# This task ID is malformed or out of range, so we will return a 400.
|
|
273
|
+
response = BasicResponse(status_code=404, content="Invalid or unknown task ID.")
|
|
175
274
|
|
|
275
|
+
except (KojiTaskNotFoundError, KojiTaskAnalysisTimeoutError):
|
|
276
|
+
# Task not yet analyzed or it timed out, so we need to start the
|
|
277
|
+
# analysis in the background and return a 202 (Accepted) error.
|
|
176
278
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
model: str = "default-model",
|
|
183
|
-
api_endpoint: str = "/chat/completions",
|
|
184
|
-
) -> Explanation:
|
|
185
|
-
"""Submit prompt to LLM using a selected endpoint.
|
|
186
|
-
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
187
|
-
log_probs: number of token choices to produce log probs for
|
|
188
|
-
"""
|
|
189
|
-
LOG.info("Analyzing the text")
|
|
279
|
+
background_tasks.add_task(
|
|
280
|
+
analyze_koji_task,
|
|
281
|
+
task_id,
|
|
282
|
+
koji_instance_config,
|
|
283
|
+
)
|
|
190
284
|
|
|
191
|
-
|
|
285
|
+
# If a callback URL is provided, we need to add it to the callbacks
|
|
286
|
+
# table so that we can notify it when the analysis is complete.
|
|
287
|
+
if x_koji_callback:
|
|
288
|
+
koji_instance_config.register_callback(task_id, x_koji_callback)
|
|
192
289
|
|
|
193
|
-
|
|
194
|
-
|
|
290
|
+
response = BasicResponse(
|
|
291
|
+
status_code=202, content=f"Beginning analysis of task {task_id}"
|
|
292
|
+
)
|
|
195
293
|
|
|
196
|
-
|
|
197
|
-
return
|
|
198
|
-
|
|
294
|
+
except KojiTaskNotAnalyzedError:
|
|
295
|
+
# Its still running, so we need to return a 202
|
|
296
|
+
# (Accepted) error.
|
|
297
|
+
response = BasicResponse(
|
|
298
|
+
status_code=202, content=f"Analysis still in progress for task {task_id}"
|
|
199
299
|
)
|
|
200
|
-
return await submit_text_completions(
|
|
201
|
-
text, headers, max_tokens, log_probs, stream, model
|
|
202
|
-
)
|
|
203
300
|
|
|
301
|
+
return response
|
|
204
302
|
|
|
205
|
-
async def submit_text_completions( # pylint: disable=R0913,R0917
|
|
206
|
-
text: str,
|
|
207
|
-
headers: dict,
|
|
208
|
-
max_tokens: int = -1,
|
|
209
|
-
log_probs: int = 1,
|
|
210
|
-
stream: bool = False,
|
|
211
|
-
model: str = "default-model",
|
|
212
|
-
) -> Explanation:
|
|
213
|
-
"""Submit prompt to OpenAI API completions endpoint.
|
|
214
|
-
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
215
|
-
log_probs: number of token choices to produce log probs for
|
|
216
|
-
"""
|
|
217
|
-
LOG.info("Submitting to /v1/completions endpoint")
|
|
218
|
-
data = {
|
|
219
|
-
"prompt": text,
|
|
220
|
-
"max_tokens": max_tokens,
|
|
221
|
-
"logprobs": log_probs,
|
|
222
|
-
"stream": stream,
|
|
223
|
-
"model": model,
|
|
224
|
-
}
|
|
225
303
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
data,
|
|
229
|
-
headers,
|
|
230
|
-
stream,
|
|
231
|
-
)
|
|
304
|
+
async def analyze_koji_task(task_id: int, koji_instance_config: KojiInstanceConfig):
|
|
305
|
+
"""Analyze a koji task and return the response"""
|
|
232
306
|
|
|
233
|
-
|
|
234
|
-
|
|
307
|
+
# Get the log text from the koji task
|
|
308
|
+
koji_conn = koji_instance_config.get_connection()
|
|
309
|
+
log_file_name, log_text = await get_failed_log_from_koji_task(
|
|
310
|
+
koji_conn, task_id, max_size=SERVER_CONFIG.koji.max_artifact_size
|
|
235
311
|
)
|
|
236
312
|
|
|
313
|
+
# We need to handle the metric tracking manually here, because we need
|
|
314
|
+
# to retrieve the metric ID to associate it with the koji task analysis.
|
|
237
315
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
stream: bool = False,
|
|
244
|
-
model: str = "default-model",
|
|
245
|
-
) -> Explanation:
|
|
246
|
-
"""Submit prompt to OpenAI API /chat/completions endpoint.
|
|
247
|
-
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
248
|
-
log_probs: number of token choices to produce log probs for
|
|
249
|
-
"""
|
|
250
|
-
LOG.info("Submitting to /v1/chat/completions endpoint")
|
|
251
|
-
|
|
252
|
-
data = {
|
|
253
|
-
"messages": [
|
|
254
|
-
{
|
|
255
|
-
"role": "user",
|
|
256
|
-
"content": text,
|
|
257
|
-
}
|
|
258
|
-
],
|
|
259
|
-
"max_tokens": max_tokens,
|
|
260
|
-
"logprobs": log_probs,
|
|
261
|
-
"stream": stream,
|
|
262
|
-
"model": model,
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
response = await submit_to_llm_endpoint(
|
|
266
|
-
f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/chat/completions",
|
|
267
|
-
data,
|
|
268
|
-
headers,
|
|
269
|
-
stream,
|
|
316
|
+
metrics_id = await add_new_metrics(
|
|
317
|
+
EndpointType.ANALYZE_KOJI_TASK,
|
|
318
|
+
log_text,
|
|
319
|
+
received_at=datetime.datetime.now(datetime.timezone.utc),
|
|
320
|
+
compressed_log_content=RemoteLogCompressor.zip_text(log_text),
|
|
270
321
|
)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
text=response["choices"][0]["message"]["content"],
|
|
279
|
-
logprobs=response["choices"][0]["logprobs"]["content"],
|
|
322
|
+
# We need to associate the metric ID with the koji task analysis.
|
|
323
|
+
# This will create the new row without a response, which we will use as
|
|
324
|
+
# an indicator that the analysis is in progress.
|
|
325
|
+
await KojiTaskAnalysis.create_or_restart(
|
|
326
|
+
koji_instance=koji_instance_config.xmlrpc_url,
|
|
327
|
+
task_id=task_id,
|
|
328
|
+
log_file_name=log_file_name,
|
|
280
329
|
)
|
|
330
|
+
response = await perform_staged_analysis(log_text)
|
|
281
331
|
|
|
332
|
+
# Now that we have the response, we can update the metrics and mark the
|
|
333
|
+
# koji task analysis as completed.
|
|
334
|
+
await update_metrics(metrics_id, response)
|
|
335
|
+
await KojiTaskAnalysis.add_response(task_id, metrics_id)
|
|
282
336
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
288
|
-
URL must be valid for the request to be passed to the LLM server.
|
|
289
|
-
Meaning that it must contain appropriate scheme, path and netloc,
|
|
290
|
-
while lacking result, params or query fields.
|
|
291
|
-
"""
|
|
292
|
-
log_text = process_url(build_log.url)
|
|
293
|
-
log_summary = mine_logs(log_text)
|
|
294
|
-
log_summary = format_snippets(log_summary)
|
|
295
|
-
response = await submit_text(
|
|
296
|
-
PROMPT_TEMPLATE.format(log_summary),
|
|
297
|
-
api_endpoint=SERVER_CONFIG.inference.api_endpoint,
|
|
298
|
-
)
|
|
299
|
-
certainty = 0
|
|
337
|
+
# Notify any callbacks that the analysis is complete.
|
|
338
|
+
for callback in koji_instance_config.get_callbacks(task_id):
|
|
339
|
+
LOG.info("Notifying callback %s of task %d completion", callback, task_id)
|
|
340
|
+
asyncio.create_task(send_koji_callback(callback, task_id))
|
|
300
341
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
certainty = compute_certainty(response.logprobs)
|
|
304
|
-
except ValueError as ex:
|
|
305
|
-
LOG.error("Error encountered while computing certainty: %s", ex)
|
|
306
|
-
raise HTTPException(
|
|
307
|
-
status_code=400,
|
|
308
|
-
detail=f"Couldn't compute certainty with data:\n"
|
|
309
|
-
f"{response.logprobs}",
|
|
310
|
-
) from ex
|
|
342
|
+
# Now that it's sent, we can clear the callbacks for this task.
|
|
343
|
+
koji_instance_config.clear_callbacks(task_id)
|
|
311
344
|
|
|
312
|
-
return
|
|
345
|
+
return response
|
|
313
346
|
|
|
314
347
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
async
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
URL must be valid for the request to be passed to the LLM server.
|
|
321
|
-
Meaning that it must contain appropriate scheme, path and netloc,
|
|
322
|
-
while lacking result, params or query fields.
|
|
323
|
-
"""
|
|
324
|
-
log_text = process_url(build_log.url)
|
|
325
|
-
log_summary = mine_logs(log_text)
|
|
326
|
-
|
|
327
|
-
# Process snippets asynchronously
|
|
328
|
-
analyzed_snippets = await asyncio.gather(
|
|
329
|
-
*[
|
|
330
|
-
submit_text(
|
|
331
|
-
SNIPPET_PROMPT_TEMPLATE.format(s),
|
|
332
|
-
api_endpoint=SERVER_CONFIG.inference.api_endpoint,
|
|
333
|
-
)
|
|
334
|
-
for s in log_summary
|
|
335
|
-
]
|
|
336
|
-
)
|
|
348
|
+
async def send_koji_callback(callback: str, task_id: int):
|
|
349
|
+
"""Send a callback to the specified URL with the task ID and log file name."""
|
|
350
|
+
async with aiohttp.ClientSession() as session:
|
|
351
|
+
async with session.post(callback, json={"task_id": task_id}):
|
|
352
|
+
pass
|
|
337
353
|
|
|
338
|
-
analyzed_snippets = [
|
|
339
|
-
AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
|
|
340
|
-
for e in zip(log_summary, analyzed_snippets)
|
|
341
|
-
]
|
|
342
|
-
final_prompt = PROMPT_TEMPLATE_STAGED.format(
|
|
343
|
-
format_analyzed_snippets(analyzed_snippets)
|
|
344
|
-
)
|
|
345
354
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
355
|
+
@app.get("/queue/print")
|
|
356
|
+
async def queue_print(msg: str):
|
|
357
|
+
"""Debug endpoint to test the LLM request queue"""
|
|
358
|
+
LOG.info("Will print %s", msg)
|
|
349
359
|
|
|
350
|
-
|
|
360
|
+
result = await async_log(msg)
|
|
351
361
|
|
|
352
|
-
|
|
353
|
-
try:
|
|
354
|
-
certainty = compute_certainty(final_analysis.logprobs)
|
|
355
|
-
except ValueError as ex:
|
|
356
|
-
LOG.error("Error encountered while computing certainty: %s", ex)
|
|
357
|
-
raise HTTPException(
|
|
358
|
-
status_code=400,
|
|
359
|
-
detail=f"Couldn't compute certainty with data:\n"
|
|
360
|
-
f"{final_analysis.logprobs}",
|
|
361
|
-
) from ex
|
|
362
|
+
LOG.info("Printed %s and returned it", result)
|
|
362
363
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
364
|
+
|
|
365
|
+
async def async_log(msg):
|
|
366
|
+
"""Debug function to test the LLM request queue"""
|
|
367
|
+
async with SERVER_CONFIG.inference.get_limiter():
|
|
368
|
+
LOG.critical(msg)
|
|
369
|
+
return msg
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
@app.get("/version", response_class=BasicResponse)
|
|
373
|
+
async def get_version_wrapper():
|
|
374
|
+
"""Get the version of logdetective"""
|
|
375
|
+
return BasicResponse(content=get_version())
|
|
368
376
|
|
|
369
377
|
|
|
370
378
|
@app.post("/analyze/stream", response_class=StreamingResponse)
|
|
371
379
|
@track_request()
|
|
372
|
-
async def analyze_log_stream(
|
|
380
|
+
async def analyze_log_stream(
|
|
381
|
+
build_log: BuildLog, http_session: aiohttp.ClientSession = Depends(get_http_session)
|
|
382
|
+
):
|
|
373
383
|
"""Stream response endpoint for Logdetective.
|
|
374
384
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
375
385
|
URL must be valid for the request to be passed to the LLM server.
|
|
376
386
|
Meaning that it must contain appropriate scheme, path and netloc,
|
|
377
387
|
while lacking result, params or query fields.
|
|
378
388
|
"""
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
389
|
+
remote_log = RemoteLog(build_log.url, http_session)
|
|
390
|
+
log_text = await remote_log.process_url()
|
|
391
|
+
try:
|
|
392
|
+
stream = perform_analyis_stream(log_text)
|
|
393
|
+
except aiohttp.ClientResponseError as ex:
|
|
394
|
+
raise HTTPException(
|
|
395
|
+
status_code=400,
|
|
396
|
+
detail="HTTP Error while getting response from inference server "
|
|
397
|
+
f"[{ex.status}] {ex.message}",
|
|
398
|
+
) from ex
|
|
383
399
|
|
|
384
|
-
|
|
385
|
-
headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
|
|
400
|
+
return StreamingResponse(stream)
|
|
386
401
|
|
|
387
|
-
stream = await submit_text_chat_completions(
|
|
388
|
-
PROMPT_TEMPLATE.format(log_summary), stream=True, headers=headers
|
|
389
|
-
)
|
|
390
402
|
|
|
391
|
-
|
|
403
|
+
def is_valid_webhook_secret(forge, x_gitlab_token):
|
|
404
|
+
"""Check whether the provided x_gitlab_token matches the webhook secret
|
|
405
|
+
specified in the configuration"""
|
|
406
|
+
|
|
407
|
+
gitlab_cfg = SERVER_CONFIG.gitlab.instances[forge.value]
|
|
408
|
+
|
|
409
|
+
if not gitlab_cfg.webhook_secrets:
|
|
410
|
+
# No secrets specified, so don't bother validating.
|
|
411
|
+
# This is mostly to be used for development.
|
|
412
|
+
return True
|
|
413
|
+
|
|
414
|
+
if x_gitlab_token in gitlab_cfg.webhook_secrets:
|
|
415
|
+
return True
|
|
416
|
+
|
|
417
|
+
return False
|
|
392
418
|
|
|
393
419
|
|
|
394
420
|
@app.post("/webhook/gitlab/job_events")
|
|
395
421
|
async def receive_gitlab_job_event_webhook(
|
|
396
|
-
job_hook: JobHook,
|
|
422
|
+
job_hook: JobHook,
|
|
423
|
+
background_tasks: BackgroundTasks,
|
|
424
|
+
x_gitlab_instance: Annotated[str | None, Header()],
|
|
425
|
+
x_gitlab_token: Annotated[str | None, Header()] = None,
|
|
397
426
|
):
|
|
398
427
|
"""Webhook endpoint for receiving job_events notifications from GitLab
|
|
399
428
|
https://docs.gitlab.com/user/project/integrations/webhook_events/#job-events
|
|
400
429
|
lists the full specification for the messages sent for job events."""
|
|
401
430
|
|
|
402
|
-
|
|
403
|
-
|
|
431
|
+
try:
|
|
432
|
+
forge = Forge(x_gitlab_instance)
|
|
433
|
+
except ValueError:
|
|
434
|
+
LOG.critical("%s is not a recognized forge. Ignoring.", x_gitlab_instance)
|
|
435
|
+
return BasicResponse(status_code=400)
|
|
436
|
+
|
|
437
|
+
if not is_valid_webhook_secret(forge, x_gitlab_token):
|
|
438
|
+
# This request could not be validated, so return a 401
|
|
439
|
+
# (Unauthorized) error.
|
|
440
|
+
return BasicResponse(status_code=401)
|
|
441
|
+
|
|
442
|
+
# Handle the message in the background so we can return 204 immediately
|
|
443
|
+
gitlab_cfg = SERVER_CONFIG.gitlab.instances[forge.value]
|
|
444
|
+
background_tasks.add_task(
|
|
445
|
+
process_gitlab_job_event,
|
|
446
|
+
gitlab_cfg,
|
|
447
|
+
forge,
|
|
448
|
+
job_hook,
|
|
449
|
+
)
|
|
404
450
|
|
|
405
451
|
# No return value or body is required for a webhook.
|
|
406
452
|
# 204: No Content
|
|
407
453
|
return BasicResponse(status_code=204)
|
|
408
454
|
|
|
409
455
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
456
|
+
# A lookup table for whether we are currently processing a given merge request
|
|
457
|
+
# The key is the tuple (Forge, ProjectID, MRID) and the value is a boolean
|
|
458
|
+
# indicating whether we need to re-trigger the lookup immediately after
|
|
459
|
+
# completion due to another request coming in during processing.
|
|
460
|
+
# For example: {("https://gitlab.example.com", 23, 2): False}
|
|
461
|
+
emoji_lookup = {}
|
|
413
462
|
|
|
414
|
-
# Look up the project this job belongs to
|
|
415
|
-
project = await asyncio.to_thread(app.gitlab_conn.projects.get, job_hook.project_id)
|
|
416
463
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
464
|
+
@app.post("/webhook/gitlab/emoji_events")
|
|
465
|
+
async def receive_gitlab_emoji_event_webhook(
|
|
466
|
+
x_gitlab_instance: Annotated[str | None, Header()],
|
|
467
|
+
x_gitlab_token: Annotated[str | None, Header()],
|
|
468
|
+
emoji_hook: EmojiHook,
|
|
469
|
+
background_tasks: BackgroundTasks,
|
|
470
|
+
):
|
|
471
|
+
"""Webhook endpoint for receiving emoji event notifications from Gitlab
|
|
472
|
+
https://docs.gitlab.com/user/project/integrations/webhook_events/#emoji-events
|
|
473
|
+
lists the full specification for the messages sent for emoji events"""
|
|
422
474
|
|
|
423
|
-
|
|
424
|
-
|
|
475
|
+
try:
|
|
476
|
+
forge = Forge(x_gitlab_instance)
|
|
477
|
+
except ValueError:
|
|
478
|
+
LOG.critical("%s is not a recognized forge. Ignoring.", x_gitlab_instance)
|
|
479
|
+
return BasicResponse(status_code=400)
|
|
480
|
+
|
|
481
|
+
if not is_valid_webhook_secret(forge, x_gitlab_token):
|
|
482
|
+
# This request could not be validated, so return a 401
|
|
483
|
+
# (Unauthorized) error.
|
|
484
|
+
return BasicResponse(status_code=401)
|
|
485
|
+
|
|
486
|
+
if not emoji_hook.merge_request:
|
|
487
|
+
# This is not a merge request event. It is probably an emoji applied
|
|
488
|
+
# to some other "awardable" entity. Just ignore it and return.
|
|
489
|
+
LOG.debug("Emoji event is not related to a merge request. Ignoring.")
|
|
490
|
+
return BasicResponse(status_code=204)
|
|
491
|
+
|
|
492
|
+
# We will re-process all the emojis on this merge request, to ensure that
|
|
493
|
+
# we haven't missed any messages, since webhooks do not provide delivery
|
|
494
|
+
# guarantees.
|
|
495
|
+
|
|
496
|
+
# Check whether this request is already in progress.
|
|
497
|
+
# We are single-threaded, so we can guarantee that the table won't change
|
|
498
|
+
# between here and when we schedule the lookup.
|
|
499
|
+
key = (
|
|
500
|
+
forge,
|
|
501
|
+
emoji_hook.merge_request.target_project_id,
|
|
502
|
+
emoji_hook.merge_request.iid,
|
|
503
|
+
)
|
|
504
|
+
if key in emoji_lookup:
|
|
505
|
+
# It's already in progress, so we do not want to start another pass
|
|
506
|
+
# concurrently. We'll set the value to True to indicate that we should
|
|
507
|
+
# re-enqueue this lookup after the currently-running one concludes. It
|
|
508
|
+
# is always safe to set this to True, even if it's already True. If
|
|
509
|
+
# multiple requests come in during processing, we only need to re-run
|
|
510
|
+
# it a single time, since it will pick up all the ongoing changes. The
|
|
511
|
+
# worst-case situation is the one where we receive new requests just
|
|
512
|
+
# after processing starts, which will cause the cycle to repeat again.
|
|
513
|
+
# This should be very infrequent, as emoji events are computationally
|
|
514
|
+
# rare and very quick to process.
|
|
515
|
+
emoji_lookup[key] = True
|
|
516
|
+
LOG.info("MR Emojis already being processed for %s. Rescheduling.", key)
|
|
517
|
+
return BasicResponse(status_code=204)
|
|
518
|
+
|
|
519
|
+
# Inform the lookup table that we are processing this emoji
|
|
520
|
+
emoji_lookup[key] = False
|
|
521
|
+
|
|
522
|
+
# Create a background task to process the emojis on this Merge Request.
|
|
523
|
+
background_tasks.add_task(
|
|
524
|
+
schedule_emoji_collection_for_mr,
|
|
525
|
+
forge,
|
|
526
|
+
emoji_hook.merge_request.target_project_id,
|
|
527
|
+
emoji_hook.merge_request.iid,
|
|
528
|
+
background_tasks,
|
|
529
|
+
)
|
|
425
530
|
|
|
426
|
-
#
|
|
427
|
-
|
|
531
|
+
# No return value or body is required for a webhook.
|
|
532
|
+
# 204: No Content
|
|
533
|
+
return BasicResponse(status_code=204)
|
|
428
534
|
|
|
429
|
-
# Verify this is a merge request
|
|
430
|
-
if pipeline.source != "merge_request_event":
|
|
431
|
-
LOG.info("Not a merge request pipeline. Ignoring.")
|
|
432
|
-
return
|
|
433
535
|
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
536
|
+
async def schedule_emoji_collection_for_mr(
|
|
537
|
+
forge: Forge, project_id: int, mr_iid: int, background_tasks: BackgroundTasks
|
|
538
|
+
):
|
|
539
|
+
"""Background task to update the database on emoji reactions"""
|
|
540
|
+
|
|
541
|
+
key = (forge, project_id, mr_iid)
|
|
542
|
+
|
|
543
|
+
# FIXME: Look up the connection from the Forge # pylint: disable=fixme
|
|
544
|
+
gitlab_conn = SERVER_CONFIG.gitlab.instances[forge.value].get_connection()
|
|
545
|
+
|
|
546
|
+
LOG.debug("Looking up emojis for %s, %d, %d", forge, project_id, mr_iid)
|
|
547
|
+
await collect_emojis_for_mr(project_id, mr_iid, gitlab_conn)
|
|
548
|
+
|
|
549
|
+
# Check whether we've been asked to re-schedule this lookup because
|
|
550
|
+
# another request came in while it was processing.
|
|
551
|
+
if emoji_lookup[key]:
|
|
552
|
+
# The value is Truthy, which tells us to re-schedule
|
|
553
|
+
# Reset the boolean value to indicate that we're underway again.
|
|
554
|
+
emoji_lookup[key] = False
|
|
555
|
+
background_tasks.add_task(
|
|
556
|
+
schedule_emoji_collection_for_mr,
|
|
557
|
+
forge,
|
|
558
|
+
project_id,
|
|
559
|
+
mr_iid,
|
|
560
|
+
background_tasks,
|
|
439
561
|
)
|
|
440
562
|
return
|
|
441
|
-
merge_request_id = int(match.group(1))
|
|
442
563
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
preprocessed_log = await retrieve_and_preprocess_koji_logs(job)
|
|
447
|
-
except LogsTooLargeError:
|
|
448
|
-
LOG.error("Could not retrieve logs. Too large.")
|
|
449
|
-
raise
|
|
564
|
+
# We're all done, so clear this entry out of the lookup
|
|
565
|
+
del emoji_lookup[key]
|
|
566
|
+
|
|
450
567
|
|
|
451
|
-
|
|
452
|
-
response
|
|
453
|
-
|
|
568
|
+
def _svg_figure_response(fig: matplotlib.figure.Figure):
|
|
569
|
+
"""Create a response with the given svg figure."""
|
|
570
|
+
buf = BytesIO()
|
|
571
|
+
fig.savefig(buf, format="svg", bbox_inches="tight")
|
|
572
|
+
matplotlib.pyplot.close(fig)
|
|
454
573
|
|
|
455
|
-
|
|
456
|
-
|
|
574
|
+
buf.seek(0)
|
|
575
|
+
return StreamingResponse(
|
|
576
|
+
buf,
|
|
577
|
+
media_type="image/svg+xml",
|
|
578
|
+
headers={"Content-Disposition": "inline; filename=plot.svg"},
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def _multiple_svg_figures_response(figures: list[matplotlib.figure.Figure]):
|
|
583
|
+
"""Create a response with multiple svg figures."""
|
|
584
|
+
svg_contents = []
|
|
585
|
+
for i, fig in enumerate(figures):
|
|
586
|
+
buf = BytesIO()
|
|
587
|
+
fig.savefig(buf, format="svg", bbox_inches="tight")
|
|
588
|
+
matplotlib.pyplot.close(fig)
|
|
589
|
+
buf.seek(0)
|
|
590
|
+
svg_contents.append(buf.read().decode("utf-8"))
|
|
591
|
+
|
|
592
|
+
html_content = "<html><body>\n"
|
|
593
|
+
for i, svg in enumerate(svg_contents):
|
|
594
|
+
html_content += f"<div id='figure-{i}'>\n{svg}\n</div>\n"
|
|
595
|
+
html_content += "</body></html>"
|
|
596
|
+
|
|
597
|
+
return BasicResponse(content=html_content, media_type="text/html")
|
|
457
598
|
|
|
458
599
|
|
|
459
|
-
class
|
|
460
|
-
"""
|
|
600
|
+
class MetricRoute(str, Enum):
|
|
601
|
+
"""Routes for metrics"""
|
|
461
602
|
|
|
603
|
+
ANALYZE = "analyze"
|
|
604
|
+
ANALYZE_STAGED = "analyze-staged"
|
|
605
|
+
ANALYZE_GITLAB_JOB = "analyze-gitlab"
|
|
462
606
|
|
|
463
|
-
async def retrieve_and_preprocess_koji_logs(job):
|
|
464
|
-
"""Download logs from the merge request artifacts
|
|
465
607
|
|
|
466
|
-
|
|
467
|
-
|
|
608
|
+
class Plot(str, Enum):
|
|
609
|
+
"""Type of served plots"""
|
|
468
610
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
611
|
+
REQUESTS = "requests"
|
|
612
|
+
RESPONSES = "responses"
|
|
613
|
+
EMOJIS = "emojis"
|
|
614
|
+
BOTH = ""
|
|
472
615
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
616
|
+
|
|
617
|
+
ROUTE_TO_ENDPOINT_TYPES = {
|
|
618
|
+
MetricRoute.ANALYZE: EndpointType.ANALYZE,
|
|
619
|
+
MetricRoute.ANALYZE_STAGED: EndpointType.ANALYZE_STAGED,
|
|
620
|
+
MetricRoute.ANALYZE_GITLAB_JOB: EndpointType.ANALYZE_GITLAB_JOB,
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
@app.get("/metrics/{route}/", response_class=StreamingResponse)
|
|
625
|
+
@app.get("/metrics/{route}/{plot}", response_class=StreamingResponse)
|
|
626
|
+
async def get_metrics(
|
|
627
|
+
route: MetricRoute,
|
|
628
|
+
plot: Plot = Plot.BOTH,
|
|
629
|
+
period_since_now: TimePeriod = Depends(TimePeriod),
|
|
630
|
+
):
|
|
631
|
+
"""Get an handler for visualize statistics for the specified endpoint and plot."""
|
|
632
|
+
endpoint_type = ROUTE_TO_ENDPOINT_TYPES[route]
|
|
633
|
+
|
|
634
|
+
async def handler():
|
|
635
|
+
"""Show statistics for the specified endpoint and plot."""
|
|
636
|
+
if plot == Plot.REQUESTS:
|
|
637
|
+
fig = await plot_engine.requests_per_time(period_since_now, endpoint_type)
|
|
638
|
+
return _svg_figure_response(fig)
|
|
639
|
+
if plot == Plot.RESPONSES:
|
|
640
|
+
fig = await plot_engine.average_time_per_responses(
|
|
641
|
+
period_since_now, endpoint_type
|
|
642
|
+
)
|
|
643
|
+
return _svg_figure_response(fig)
|
|
644
|
+
if plot == Plot.EMOJIS:
|
|
645
|
+
fig = await plot_engine.emojis_per_time(period_since_now)
|
|
646
|
+
return _svg_figure_response(fig)
|
|
647
|
+
# BOTH
|
|
648
|
+
fig_requests = await plot_engine.requests_per_time(
|
|
649
|
+
period_since_now, endpoint_type
|
|
477
650
|
)
|
|
651
|
+
fig_responses = await plot_engine.average_time_per_responses(
|
|
652
|
+
period_since_now, endpoint_type
|
|
653
|
+
)
|
|
654
|
+
fig_emojis = await plot_engine.emojis_per_time(period_since_now)
|
|
655
|
+
return _multiple_svg_figures_response([fig_requests, fig_responses, fig_emojis])
|
|
656
|
+
|
|
657
|
+
descriptions = {
|
|
658
|
+
Plot.REQUESTS: (
|
|
659
|
+
"Show statistics for the requests received in the given period of time "
|
|
660
|
+
f"for the /{endpoint_type.value} API endpoint."
|
|
661
|
+
),
|
|
662
|
+
Plot.RESPONSES: (
|
|
663
|
+
"Show statistics for responses given in the specified period of time "
|
|
664
|
+
f"for the /{endpoint_type.value} API endpoint."
|
|
665
|
+
),
|
|
666
|
+
Plot.EMOJIS: (
|
|
667
|
+
"Show statistics for emoji feedback in the specified period of time "
|
|
668
|
+
f"for the /{endpoint_type.value} API endpoint."
|
|
669
|
+
),
|
|
670
|
+
Plot.BOTH: (
|
|
671
|
+
"Show statistics for requests and responses in the given period of time "
|
|
672
|
+
f"for the /{endpoint_type.value} API endpoint."
|
|
673
|
+
),
|
|
674
|
+
}
|
|
675
|
+
handler.__doc__ = descriptions[plot]
|
|
676
|
+
|
|
677
|
+
return await handler()
|
|
478
678
|
|
|
479
|
-
# Create a temporary file to store the downloaded log zipfile.
|
|
480
|
-
# This will be automatically deleted when the last reference into it
|
|
481
|
-
# (returned by this function) is closed.
|
|
482
|
-
tempfile = TemporaryFile(mode="w+b")
|
|
483
|
-
await asyncio.to_thread(job.artifacts, streamed=True, action=tempfile.write)
|
|
484
|
-
tempfile.seek(0)
|
|
485
|
-
|
|
486
|
-
failed_arches = {}
|
|
487
|
-
artifacts_zip = zipfile.ZipFile(tempfile, mode="r")
|
|
488
|
-
for zipinfo in artifacts_zip.infolist():
|
|
489
|
-
if zipinfo.filename.endswith("task_failed.log"):
|
|
490
|
-
# The koji logs store this file in two places: 1) in the
|
|
491
|
-
# directory with the failed architecture and 2) in the parent
|
|
492
|
-
# directory. We actually want to ignore the one in the parent
|
|
493
|
-
# directory, since the rest of the information is in the
|
|
494
|
-
# specific task directory.
|
|
495
|
-
# The paths look like `kojilogs/noarch-XXXXXX/task_failed.log`
|
|
496
|
-
# or `kojilogs/noarch-XXXXXX/x86_64-XXXXXX/task_failed.log`
|
|
497
|
-
path = PurePath(zipinfo.filename)
|
|
498
|
-
if len(path.parts) <= 3:
|
|
499
|
-
continue
|
|
500
|
-
|
|
501
|
-
# Extract the architecture from the immediate parent path
|
|
502
|
-
architecture = path.parent.parts[-1].split("-")[0]
|
|
503
|
-
|
|
504
|
-
# Open this file and read which log failed.
|
|
505
|
-
# The string in this log has the format
|
|
506
|
-
# `see <log> for more information`.
|
|
507
|
-
# Note: it may sometimes say
|
|
508
|
-
# `see build.log or root.log for more information`, but in
|
|
509
|
-
# that situation, we only want to handle build.log (for now),
|
|
510
|
-
# which means accepting only the first match for the regular
|
|
511
|
-
# expression.
|
|
512
|
-
with artifacts_zip.open(zipinfo.filename) as task_failed_log:
|
|
513
|
-
contents = task_failed_log.read().decode("utf-8")
|
|
514
|
-
match = FAILURE_LOG_REGEX.search(contents)
|
|
515
|
-
if not match:
|
|
516
|
-
LOG.error(
|
|
517
|
-
"task_failed.log does not indicate which log contains the failure."
|
|
518
|
-
)
|
|
519
|
-
raise SyntaxError(
|
|
520
|
-
"task_failed.log does not indicate which log contains the failure."
|
|
521
|
-
)
|
|
522
|
-
failure_log_name = match.group(1)
|
|
523
|
-
|
|
524
|
-
failed_arches[architecture] = PurePath(path.parent, failure_log_name)
|
|
525
|
-
|
|
526
|
-
if not failed_arches:
|
|
527
|
-
# No failed task found?
|
|
528
|
-
raise FileNotFoundError("Could not detect failed architecture.")
|
|
529
|
-
|
|
530
|
-
# First check if we only found one failed architecture
|
|
531
|
-
if len(failed_arches) == 1:
|
|
532
|
-
failed_arch = list(failed_arches.keys())[0]
|
|
533
|
-
|
|
534
|
-
else:
|
|
535
|
-
# We only want to handle one arch, so we'll check them in order of
|
|
536
|
-
# "most to least likely for the maintainer to have access to hardware"
|
|
537
|
-
# This means: x86_64 > aarch64 > ppc64le > s390x
|
|
538
|
-
if "x86_64" in failed_arches:
|
|
539
|
-
failed_arch = "x86_64"
|
|
540
|
-
elif "aarch64" in failed_arches:
|
|
541
|
-
failed_arch = "aarch64"
|
|
542
|
-
elif "ppc64le" in failed_arches:
|
|
543
|
-
failed_arch = "ppc64le"
|
|
544
|
-
elif "s390x" in failed_arches:
|
|
545
|
-
failed_arch = "s390x"
|
|
546
|
-
else:
|
|
547
|
-
# It should be impossible for us to get "noarch" here, since
|
|
548
|
-
# the only way that should happen is for a single architecture
|
|
549
|
-
# build.
|
|
550
|
-
raise FileNotFoundError("No failed architecture detected.")
|
|
551
|
-
|
|
552
|
-
LOG.debug("Failed architecture: %s", failed_arch)
|
|
553
|
-
|
|
554
|
-
log_path = failed_arches[failed_arch]
|
|
555
|
-
LOG.debug("Returning contents of %s", log_path)
|
|
556
|
-
|
|
557
|
-
# Return the log as a file-like object with .read() function
|
|
558
|
-
return artifacts_zip.open(log_path.as_posix())
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
async def check_artifacts_file_size(job):
|
|
562
|
-
"""Method to determine if the artifacts are too large to process"""
|
|
563
|
-
# First, make sure that the artifacts are of a reasonable size. The
|
|
564
|
-
# zipped artifact collection will be stored in memory below. The
|
|
565
|
-
# python-gitlab library doesn't expose a way to check this value directly,
|
|
566
|
-
# so we need to interact with directly with the headers.
|
|
567
|
-
artifacts_url = f"{SERVER_CONFIG.gitlab.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts" # pylint: disable=line-too-long
|
|
568
|
-
header_resp = await asyncio.to_thread(
|
|
569
|
-
requests.head,
|
|
570
|
-
artifacts_url,
|
|
571
|
-
allow_redirects=True,
|
|
572
|
-
headers={"Authorization": f"Bearer {SERVER_CONFIG.gitlab.api_token}"},
|
|
573
|
-
timeout=(3.07, 5),
|
|
574
|
-
)
|
|
575
|
-
content_length = int(header_resp.headers.get("content-length"))
|
|
576
|
-
LOG.debug(
|
|
577
|
-
"URL: %s, content-length: %d, max length: %d",
|
|
578
|
-
artifacts_url,
|
|
579
|
-
content_length,
|
|
580
|
-
SERVER_CONFIG.gitlab.max_artifact_size,
|
|
581
|
-
)
|
|
582
|
-
return content_length <= SERVER_CONFIG.gitlab.max_artifact_size
|
|
583
679
|
|
|
680
|
+
async def collect_emoji_task():
|
|
681
|
+
"""Collect emoji feedback.
|
|
682
|
+
Query only comments created in the last year.
|
|
683
|
+
"""
|
|
584
684
|
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
685
|
+
for instance in SERVER_CONFIG.gitlab.instances.values():
|
|
686
|
+
LOG.info(
|
|
687
|
+
"Collect emoji feedback for %s started at %s",
|
|
688
|
+
instance.url,
|
|
689
|
+
datetime.datetime.now(datetime.timezone.utc),
|
|
690
|
+
)
|
|
691
|
+
await collect_emojis(instance.get_connection(), TimePeriod(weeks=54))
|
|
692
|
+
LOG.info(
|
|
693
|
+
"Collect emoji feedback finished at %s",
|
|
694
|
+
datetime.datetime.now(datetime.timezone.utc),
|
|
695
|
+
)
|
|
591
696
|
|
|
592
697
|
|
|
593
|
-
async def
|
|
594
|
-
"""
|
|
595
|
-
|
|
596
|
-
|
|
698
|
+
async def schedule_collect_emojis_task():
|
|
699
|
+
"""Schedule the collect_emojis_task to run on a configured interval"""
|
|
700
|
+
while True:
|
|
701
|
+
seconds_until_run = SERVER_CONFIG.general.collect_emojis_interval
|
|
702
|
+
LOG.info("Collect emojis in %d seconds", seconds_until_run)
|
|
703
|
+
await asyncio.sleep(seconds_until_run)
|
|
704
|
+
|
|
705
|
+
try:
|
|
706
|
+
await collect_emoji_task()
|
|
707
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
708
|
+
LOG.exception("Error in collect_emoji_task: %s", e)
|