logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. logdetective/constants.py +33 -12
  2. logdetective/extractors.py +137 -68
  3. logdetective/logdetective.py +102 -33
  4. logdetective/models.py +99 -0
  5. logdetective/prompts-summary-first.yml +20 -0
  6. logdetective/prompts-summary-only.yml +13 -0
  7. logdetective/prompts.yml +90 -0
  8. logdetective/remote_log.py +67 -0
  9. logdetective/server/compressors.py +186 -0
  10. logdetective/server/config.py +78 -0
  11. logdetective/server/database/base.py +34 -26
  12. logdetective/server/database/models/__init__.py +33 -0
  13. logdetective/server/database/models/exceptions.py +17 -0
  14. logdetective/server/database/models/koji.py +143 -0
  15. logdetective/server/database/models/merge_request_jobs.py +623 -0
  16. logdetective/server/database/models/metrics.py +427 -0
  17. logdetective/server/emoji.py +148 -0
  18. logdetective/server/exceptions.py +37 -0
  19. logdetective/server/gitlab.py +451 -0
  20. logdetective/server/koji.py +159 -0
  21. logdetective/server/llm.py +309 -0
  22. logdetective/server/metric.py +75 -30
  23. logdetective/server/models.py +426 -23
  24. logdetective/server/plot.py +432 -0
  25. logdetective/server/server.py +580 -468
  26. logdetective/server/templates/base_response.html.j2 +59 -0
  27. logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
  28. logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
  29. logdetective/server/utils.py +98 -32
  30. logdetective/skip_snippets.yml +12 -0
  31. logdetective/utils.py +187 -73
  32. logdetective-2.11.0.dist-info/METADATA +568 -0
  33. logdetective-2.11.0.dist-info/RECORD +40 -0
  34. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
  35. logdetective/server/database/models.py +0 -88
  36. logdetective-0.4.0.dist-info/METADATA +0 -333
  37. logdetective-0.4.0.dist-info/RECORD +0 -19
  38. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
  39. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,596 +1,708 @@
1
- import asyncio
2
- import json
3
1
  import os
4
- import re
5
- import zipfile
6
- from pathlib import PurePath
7
- from tempfile import TemporaryFile
8
- from typing import List, Annotated, Tuple, Dict, Any
9
-
10
-
11
- from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header
2
+ import asyncio
3
+ import datetime
4
+ from enum import Enum
5
+ from contextlib import asynccontextmanager
6
+ from typing import Annotated
7
+ from io import BytesIO
8
+
9
+ import matplotlib
10
+ import matplotlib.figure
11
+ import matplotlib.pyplot
12
+ from fastapi import (
13
+ FastAPI,
14
+ HTTPException,
15
+ BackgroundTasks,
16
+ Depends,
17
+ Header,
18
+ Path,
19
+ Request,
20
+ )
12
21
 
13
22
  from fastapi.responses import StreamingResponse
14
23
  from fastapi.responses import Response as BasicResponse
15
- import gitlab
16
- import requests
24
+ import aiohttp
25
+ import sentry_sdk
26
+
27
+ from logdetective.server.exceptions import KojiInvalidTaskID
17
28
 
18
- from logdetective.constants import (
19
- PROMPT_TEMPLATE,
20
- SNIPPET_PROMPT_TEMPLATE,
21
- PROMPT_TEMPLATE_STAGED,
29
+ from logdetective.server.database.models.koji import KojiTaskAnalysis
30
+ from logdetective.server.database.models.exceptions import (
31
+ KojiTaskAnalysisTimeoutError,
32
+ KojiTaskNotAnalyzedError,
33
+ KojiTaskNotFoundError,
22
34
  )
23
- from logdetective.extractors import DrainExtractor
24
- from logdetective.utils import (
25
- validate_url,
26
- compute_certainty,
27
- format_snippets,
28
- format_analyzed_snippets,
35
+
36
+ import logdetective.server.database.base
37
+
38
+ from logdetective.server.config import SERVER_CONFIG, LOG
39
+ from logdetective.server.koji import (
40
+ get_failed_log_from_task as get_failed_log_from_koji_task,
41
+ )
42
+ from logdetective.remote_log import RemoteLog
43
+ from logdetective.server.llm import (
44
+ perform_staged_analysis,
45
+ perfrom_analysis,
46
+ perform_analyis_stream,
29
47
  )
30
- from logdetective.server.utils import load_server_config, get_log
31
- from logdetective.server.metric import track_request
48
+ from logdetective.server.gitlab import process_gitlab_job_event
49
+ from logdetective.server.metric import track_request, add_new_metrics, update_metrics
32
50
  from logdetective.server.models import (
33
51
  BuildLog,
52
+ EmojiHook,
34
53
  JobHook,
54
+ KojiInstanceConfig,
55
+ KojiStagedResponse,
35
56
  Response,
36
57
  StagedResponse,
37
- Explanation,
38
- AnalyzedSnippet,
58
+ TimePeriod,
59
+ )
60
+ from logdetective.server import plot as plot_engine
61
+ from logdetective.server.database.models import (
62
+ EndpointType,
63
+ Forge,
64
+ )
65
+ from logdetective.server.emoji import (
66
+ collect_emojis,
67
+ collect_emojis_for_mr,
39
68
  )
69
+ from logdetective.server.compressors import RemoteLogCompressor
70
+ from logdetective.server.utils import get_version
71
+
40
72
 
41
- LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
42
- LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
43
- LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
44
- LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
45
73
  LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
46
74
  API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
47
- SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
48
- LLM_API_TOKEN = os.environ.get("LLM_API_TOKEN", None)
49
75
 
50
- SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
51
76
 
52
- MR_REGEX = re.compile(r"refs/merge-requests/(\d+)/merge")
53
- FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
77
+ if sentry_dsn := SERVER_CONFIG.general.sentry_dsn:
78
+ sentry_sdk.init(dsn=str(sentry_dsn), traces_sample_rate=1.0)
79
+
80
+
81
+ @asynccontextmanager
82
+ async def lifespan(fapp: FastAPI):
83
+ """
84
+ Establish one HTTP session
85
+ """
86
+ fapp.http = aiohttp.ClientSession(
87
+ timeout=aiohttp.ClientTimeout(
88
+ total=int(LOG_SOURCE_REQUEST_TIMEOUT), connect=3.07
89
+ )
90
+ )
91
+
92
+ # Ensure that the database is initialized.
93
+ await logdetective.server.database.base.init()
94
+
95
+ # Start the background task scheduler for collecting emojis
96
+ asyncio.create_task(schedule_collect_emojis_task())
97
+
98
+ yield
54
99
 
55
- LOG = get_log(SERVER_CONFIG)
100
+ await fapp.http.close()
56
101
 
57
102
 
58
- def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
103
+ async def get_http_session(request: Request) -> aiohttp.ClientSession:
59
104
  """
60
- FastAPI Depend function that expects a header named Authentication
105
+ Return the single aiohttp ClientSession for this app
106
+ """
107
+ return request.app.http
108
+
109
+
110
+ def requires_token_when_set(authorization: Annotated[str | None, Header()] = None):
111
+ """
112
+ FastAPI Depend function that expects a header named Authorization
61
113
 
62
114
  If LOGDETECTIVE_TOKEN env var is set, validate the client-supplied token
63
115
  otherwise ignore it
64
116
  """
65
117
  if not API_TOKEN:
66
- LOG.info("LOGDETECTIVE_TOKEN env var not set, authentication disabled")
118
+ LOG.info("LOGDETECTIVE_TOKEN env var not set, authorization disabled")
67
119
  # no token required, means local dev environment
68
120
  return
69
- token = None
70
- if authentication:
121
+ if authorization:
71
122
  try:
72
- token = authentication.split(" ", 1)[1]
73
- except (ValueError, IndexError):
123
+ token = authorization.split(" ", 1)[1]
124
+ except (ValueError, IndexError) as ex:
74
125
  LOG.warning(
75
- "Authentication header has invalid structure (%s), it should be 'Bearer TOKEN'",
76
- authentication,
126
+ "Authorization header has invalid structure '%s', it should be 'Bearer TOKEN'",
127
+ authorization,
77
128
  )
78
129
  # eat the exception and raise 401 below
79
- token = None
130
+ raise HTTPException(
131
+ status_code=401,
132
+ detail=f"Invalid authorization, HEADER '{authorization}' not valid.",
133
+ ) from ex
80
134
  if token == API_TOKEN:
81
135
  return
82
- LOG.info(
83
- "LOGDETECTIVE_TOKEN env var is set (%s), clien token = %s", API_TOKEN, token
84
- )
85
- raise HTTPException(status_code=401, detail=f"Token {token} not valid.")
136
+ LOG.info("Provided token '%s' does not match expected value.", token)
137
+ raise HTTPException(status_code=401, detail=f"Token '{token}' not valid.")
138
+ LOG.error("No authorization header provided but LOGDETECTIVE_TOKEN env var is set")
139
+ raise HTTPException(status_code=401, detail="No token provided.")
140
+
141
+
142
+ app = FastAPI(
143
+ title="Log Detective",
144
+ contact={
145
+ "name": "Log Detective developers",
146
+ "url": "https://github.com/fedora-copr/logdetective",
147
+ "email": "copr-devel@lists.fedorahosted.org"
148
+ },
149
+ license_info={
150
+ "name": "Apache-2.0",
151
+ "url": "https://www.apache.org/licenses/LICENSE-2.0.html",
152
+ },
153
+ version=get_version(),
154
+ dependencies=[Depends(requires_token_when_set)], lifespan=lifespan)
86
155
 
87
156
 
88
- app = FastAPI(dependencies=[Depends(requires_token_when_set)])
89
- app.gitlab_conn = gitlab.Gitlab(
90
- url=SERVER_CONFIG.gitlab.url, private_token=SERVER_CONFIG.gitlab.api_token
91
- )
92
-
157
+ @app.post("/analyze", response_model=Response)
158
+ @track_request()
159
+ async def analyze_log(
160
+ build_log: BuildLog, http_session: aiohttp.ClientSession = Depends(get_http_session)
161
+ ):
162
+ """Provide endpoint for log file submission and analysis.
163
+ Request must be in form {"url":"<YOUR_URL_HERE>"}.
164
+ URL must be valid for the request to be passed to the LLM server.
165
+ Meaning that it must contain appropriate scheme, path and netloc,
166
+ while lacking result, params or query fields.
167
+ """
168
+ remote_log = RemoteLog(build_log.url, http_session)
169
+ log_text = await remote_log.process_url()
93
170
 
94
- def process_url(url: str) -> str:
95
- """Validate log URL and return log text."""
96
- if validate_url(url=url):
97
- try:
98
- log_request = requests.get(url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
99
- except requests.RequestException as ex:
100
- raise HTTPException(
101
- status_code=400, detail=f"We couldn't obtain the logs: {ex}"
102
- ) from ex
171
+ return await perfrom_analysis(log_text)
103
172
 
104
- if not log_request.ok:
105
- raise HTTPException(
106
- status_code=400,
107
- detail="Something went wrong while getting the logs: "
108
- f"[{log_request.status_code}] {log_request.text}",
109
- )
110
- else:
111
- LOG.error("Invalid URL received ")
112
- raise HTTPException(status_code=400, detail=f"Invalid log URL: {url}")
113
173
 
114
- return log_request.text
174
+ @app.post("/analyze/staged", response_model=StagedResponse)
175
+ @track_request()
176
+ async def analyze_log_staged(
177
+ build_log: BuildLog, http_session: aiohttp.ClientSession = Depends(get_http_session)
178
+ ):
179
+ """Provide endpoint for log file submission and analysis.
180
+ Request must be in form {"url":"<YOUR_URL_HERE>"}.
181
+ URL must be valid for the request to be passed to the LLM server.
182
+ Meaning that it must contain appropriate scheme, path and netloc,
183
+ while lacking result, params or query fields.
184
+ """
185
+ remote_log = RemoteLog(build_log.url, http_session)
186
+ log_text = await remote_log.process_url()
115
187
 
188
+ return await perform_staged_analysis(log_text)
116
189
 
117
- def mine_logs(log: str) -> List[Tuple[int, str]]:
118
- """Extract snippets from log text"""
119
- extractor = DrainExtractor(
120
- verbose=True, context=True, max_clusters=SERVER_CONFIG.extractor.max_clusters
121
- )
122
190
 
123
- LOG.info("Getting summary")
124
- log_summary = extractor(log)
191
+ @app.get(
192
+ "/analyze/rpmbuild/koji/{koji_instance}/{task_id}",
193
+ response_model=KojiStagedResponse,
194
+ )
195
+ async def get_koji_task_analysis(
196
+ koji_instance: Annotated[str, Path(title="The Koji instance to use")],
197
+ task_id: Annotated[int, Path(title="The task ID to analyze")],
198
+ x_koji_token: Annotated[str, Header()] = "",
199
+ ):
200
+ """Provide endpoint for retrieving log file analysis of a Koji task"""
125
201
 
126
- ratio = len(log_summary) / len(log.split("\n"))
127
- LOG.debug("Log summary: \n %s", log_summary)
128
- LOG.info("Compression ratio: %s", ratio)
202
+ try:
203
+ koji_instance_config = SERVER_CONFIG.koji.instances[koji_instance]
204
+ except KeyError:
205
+ # This Koji instance is not configured, so we will return a 404.
206
+ return BasicResponse(status_code=404, content="Unknown Koji instance.")
207
+
208
+ # This should always be available in a production environment.
209
+ # In a testing environment, the tokens list may be empty, in which case
210
+ # it will just proceed.
211
+ if koji_instance_config.tokens and x_koji_token not in koji_instance_config.tokens:
212
+ # (Unauthorized) error.
213
+ return BasicResponse(x_koji_token, status_code=401)
214
+
215
+ # Check if we have a response for this task
216
+ try:
217
+ return KojiTaskAnalysis.get_response_by_task_id(task_id)
218
+
219
+ except (KojiInvalidTaskID, KojiTaskNotFoundError):
220
+ # This task ID is malformed, out of range, or not found, so we will
221
+ # return a 404.
222
+ return BasicResponse(status_code=404)
223
+
224
+ except KojiTaskAnalysisTimeoutError:
225
+ # Task analysis has timed out, so we assume that the request was lost
226
+ # and that we need to start another analysis.
227
+ # There isn't a fully-appropriate error code for this, so we'll use
228
+ # 503 (Service Unavailable) as our best option.
229
+ return BasicResponse(
230
+ status_code=503, content="Task analysis timed out, please retry."
231
+ )
129
232
 
130
- return log_summary
233
+ except KojiTaskNotAnalyzedError:
234
+ # Its still running, so we need to return a 202
235
+ # (Accepted) code to let the client know to keep waiting.
236
+ return BasicResponse(
237
+ status_code=202, content=f"Analysis still in progress for task {task_id}"
238
+ )
131
239
 
132
240
 
133
- async def submit_to_llm_endpoint(
134
- url: str, data: Dict[str, Any], headers: Dict[str, str], stream: bool
135
- ) -> Any:
136
- """Send request to selected API endpoint. Verifying successful request unless
137
- the using the stream response.
241
+ @app.post(
242
+ "/analyze/rpmbuild/koji/{koji_instance}/{task_id}",
243
+ response_model=KojiStagedResponse,
244
+ )
245
+ async def analyze_rpmbuild_koji(
246
+ koji_instance: Annotated[str, Path(title="The Koji instance to use")],
247
+ task_id: Annotated[int, Path(title="The task ID to analyze")],
248
+ x_koji_token: Annotated[str, Header()] = "",
249
+ x_koji_callback: Annotated[str, Header()] = "",
250
+ background_tasks: BackgroundTasks = BackgroundTasks(),
251
+ ):
252
+ """Provide endpoint for retrieving log file analysis of a Koji task"""
138
253
 
139
- url:
140
- data:
141
- headers:
142
- stream:
143
- """
144
254
  try:
145
- # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
146
- response = requests.post(
147
- url,
148
- headers=headers,
149
- data=json.dumps(data),
150
- timeout=int(LLM_CPP_SERVER_TIMEOUT),
151
- stream=stream,
152
- )
153
- except requests.RequestException as ex:
154
- LOG.error("Llama-cpp query failed: %s", ex)
155
- raise HTTPException(
156
- status_code=400, detail=f"Llama-cpp query failed: {ex}"
157
- ) from ex
158
- if not stream:
159
- if not response.ok:
160
- raise HTTPException(
161
- status_code=400,
162
- detail="Something went wrong while getting a response from the llama server: "
163
- f"[{response.status_code}] {response.text}",
164
- )
165
- try:
166
- response = json.loads(response.text)
167
- except UnicodeDecodeError as ex:
168
- LOG.error("Error encountered while parsing llama server response: %s", ex)
169
- raise HTTPException(
170
- status_code=400,
171
- detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
172
- ) from ex
255
+ koji_instance_config = SERVER_CONFIG.koji.instances[koji_instance]
256
+ except KeyError:
257
+ # This Koji instance is not configured, so we will return a 404.
258
+ return BasicResponse(status_code=404, content="Unknown Koji instance.")
259
+
260
+ # This should always be available in a production environment.
261
+ # In a testing environment, the tokens list may be empty, in which case
262
+ # it will just proceed.
263
+ if koji_instance_config.tokens and x_koji_token not in koji_instance_config.tokens:
264
+ # (Unauthorized) error.
265
+ return BasicResponse(x_koji_token, status_code=401)
266
+
267
+ # Check if we already have a response for this task
268
+ try:
269
+ response = KojiTaskAnalysis.get_response_by_task_id(task_id)
173
270
 
174
- return response
271
+ except KojiInvalidTaskID:
272
+ # This task ID is malformed or out of range, so we will return a 400.
273
+ response = BasicResponse(status_code=404, content="Invalid or unknown task ID.")
175
274
 
275
+ except (KojiTaskNotFoundError, KojiTaskAnalysisTimeoutError):
276
+ # Task not yet analyzed or it timed out, so we need to start the
277
+ # analysis in the background and return a 202 (Accepted) error.
176
278
 
177
- async def submit_text( # pylint: disable=R0913,R0917
178
- text: str,
179
- max_tokens: int = -1,
180
- log_probs: int = 1,
181
- stream: bool = False,
182
- model: str = "default-model",
183
- api_endpoint: str = "/chat/completions",
184
- ) -> Explanation:
185
- """Submit prompt to LLM using a selected endpoint.
186
- max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
187
- log_probs: number of token choices to produce log probs for
188
- """
189
- LOG.info("Analyzing the text")
279
+ background_tasks.add_task(
280
+ analyze_koji_task,
281
+ task_id,
282
+ koji_instance_config,
283
+ )
190
284
 
191
- headers = {"Content-Type": "application/json"}
285
+ # If a callback URL is provided, we need to add it to the callbacks
286
+ # table so that we can notify it when the analysis is complete.
287
+ if x_koji_callback:
288
+ koji_instance_config.register_callback(task_id, x_koji_callback)
192
289
 
193
- if LLM_API_TOKEN:
194
- headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
290
+ response = BasicResponse(
291
+ status_code=202, content=f"Beginning analysis of task {task_id}"
292
+ )
195
293
 
196
- if api_endpoint == "/chat/completions":
197
- return await submit_text_chat_completions(
198
- text, headers, max_tokens, log_probs > 0, stream, model
294
+ except KojiTaskNotAnalyzedError:
295
+ # Its still running, so we need to return a 202
296
+ # (Accepted) error.
297
+ response = BasicResponse(
298
+ status_code=202, content=f"Analysis still in progress for task {task_id}"
199
299
  )
200
- return await submit_text_completions(
201
- text, headers, max_tokens, log_probs, stream, model
202
- )
203
300
 
301
+ return response
204
302
 
205
- async def submit_text_completions( # pylint: disable=R0913,R0917
206
- text: str,
207
- headers: dict,
208
- max_tokens: int = -1,
209
- log_probs: int = 1,
210
- stream: bool = False,
211
- model: str = "default-model",
212
- ) -> Explanation:
213
- """Submit prompt to OpenAI API completions endpoint.
214
- max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
215
- log_probs: number of token choices to produce log probs for
216
- """
217
- LOG.info("Submitting to /v1/completions endpoint")
218
- data = {
219
- "prompt": text,
220
- "max_tokens": max_tokens,
221
- "logprobs": log_probs,
222
- "stream": stream,
223
- "model": model,
224
- }
225
303
 
226
- response = await submit_to_llm_endpoint(
227
- f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
228
- data,
229
- headers,
230
- stream,
231
- )
304
+ async def analyze_koji_task(task_id: int, koji_instance_config: KojiInstanceConfig):
305
+ """Analyze a koji task and return the response"""
232
306
 
233
- return Explanation(
234
- text=response["choices"][0]["text"], logprobs=response["choices"][0]["logprobs"]
307
+ # Get the log text from the koji task
308
+ koji_conn = koji_instance_config.get_connection()
309
+ log_file_name, log_text = await get_failed_log_from_koji_task(
310
+ koji_conn, task_id, max_size=SERVER_CONFIG.koji.max_artifact_size
235
311
  )
236
312
 
313
+ # We need to handle the metric tracking manually here, because we need
314
+ # to retrieve the metric ID to associate it with the koji task analysis.
237
315
 
238
- async def submit_text_chat_completions( # pylint: disable=R0913,R0917
239
- text: str,
240
- headers: dict,
241
- max_tokens: int = -1,
242
- log_probs: int = 1,
243
- stream: bool = False,
244
- model: str = "default-model",
245
- ) -> Explanation:
246
- """Submit prompt to OpenAI API /chat/completions endpoint.
247
- max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
248
- log_probs: number of token choices to produce log probs for
249
- """
250
- LOG.info("Submitting to /v1/chat/completions endpoint")
251
-
252
- data = {
253
- "messages": [
254
- {
255
- "role": "user",
256
- "content": text,
257
- }
258
- ],
259
- "max_tokens": max_tokens,
260
- "logprobs": log_probs,
261
- "stream": stream,
262
- "model": model,
263
- }
264
-
265
- response = await submit_to_llm_endpoint(
266
- f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/chat/completions",
267
- data,
268
- headers,
269
- stream,
316
+ metrics_id = await add_new_metrics(
317
+ EndpointType.ANALYZE_KOJI_TASK,
318
+ log_text,
319
+ received_at=datetime.datetime.now(datetime.timezone.utc),
320
+ compressed_log_content=RemoteLogCompressor.zip_text(log_text),
270
321
  )
271
-
272
- if stream:
273
- return Explanation(
274
- text=response["choices"][0]["delta"]["content"],
275
- logprobs=response["choices"][0]["logprobs"]["content"],
276
- )
277
- return Explanation(
278
- text=response["choices"][0]["message"]["content"],
279
- logprobs=response["choices"][0]["logprobs"]["content"],
322
+ # We need to associate the metric ID with the koji task analysis.
323
+ # This will create the new row without a response, which we will use as
324
+ # an indicator that the analysis is in progress.
325
+ await KojiTaskAnalysis.create_or_restart(
326
+ koji_instance=koji_instance_config.xmlrpc_url,
327
+ task_id=task_id,
328
+ log_file_name=log_file_name,
280
329
  )
330
+ response = await perform_staged_analysis(log_text)
281
331
 
332
+ # Now that we have the response, we can update the metrics and mark the
333
+ # koji task analysis as completed.
334
+ await update_metrics(metrics_id, response)
335
+ await KojiTaskAnalysis.add_response(task_id, metrics_id)
282
336
 
283
- @app.post("/analyze", response_model=Response)
284
- @track_request()
285
- async def analyze_log(build_log: BuildLog):
286
- """Provide endpoint for log file submission and analysis.
287
- Request must be in form {"url":"<YOUR_URL_HERE>"}.
288
- URL must be valid for the request to be passed to the LLM server.
289
- Meaning that it must contain appropriate scheme, path and netloc,
290
- while lacking result, params or query fields.
291
- """
292
- log_text = process_url(build_log.url)
293
- log_summary = mine_logs(log_text)
294
- log_summary = format_snippets(log_summary)
295
- response = await submit_text(
296
- PROMPT_TEMPLATE.format(log_summary),
297
- api_endpoint=SERVER_CONFIG.inference.api_endpoint,
298
- )
299
- certainty = 0
337
+ # Notify any callbacks that the analysis is complete.
338
+ for callback in koji_instance_config.get_callbacks(task_id):
339
+ LOG.info("Notifying callback %s of task %d completion", callback, task_id)
340
+ asyncio.create_task(send_koji_callback(callback, task_id))
300
341
 
301
- if response.logprobs is not None:
302
- try:
303
- certainty = compute_certainty(response.logprobs)
304
- except ValueError as ex:
305
- LOG.error("Error encountered while computing certainty: %s", ex)
306
- raise HTTPException(
307
- status_code=400,
308
- detail=f"Couldn't compute certainty with data:\n"
309
- f"{response.logprobs}",
310
- ) from ex
342
+ # Now that it's sent, we can clear the callbacks for this task.
343
+ koji_instance_config.clear_callbacks(task_id)
311
344
 
312
- return Response(explanation=response, response_certainty=certainty)
345
+ return response
313
346
 
314
347
 
315
- @app.post("/analyze/staged", response_model=StagedResponse)
316
- @track_request()
317
- async def analyze_log_staged(build_log: BuildLog):
318
- """Provide endpoint for log file submission and analysis.
319
- Request must be in form {"url":"<YOUR_URL_HERE>"}.
320
- URL must be valid for the request to be passed to the LLM server.
321
- Meaning that it must contain appropriate scheme, path and netloc,
322
- while lacking result, params or query fields.
323
- """
324
- log_text = process_url(build_log.url)
325
- log_summary = mine_logs(log_text)
326
-
327
- # Process snippets asynchronously
328
- analyzed_snippets = await asyncio.gather(
329
- *[
330
- submit_text(
331
- SNIPPET_PROMPT_TEMPLATE.format(s),
332
- api_endpoint=SERVER_CONFIG.inference.api_endpoint,
333
- )
334
- for s in log_summary
335
- ]
336
- )
348
+ async def send_koji_callback(callback: str, task_id: int):
349
+ """Send a callback to the specified URL with the task ID and log file name."""
350
+ async with aiohttp.ClientSession() as session:
351
+ async with session.post(callback, json={"task_id": task_id}):
352
+ pass
337
353
 
338
- analyzed_snippets = [
339
- AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
340
- for e in zip(log_summary, analyzed_snippets)
341
- ]
342
- final_prompt = PROMPT_TEMPLATE_STAGED.format(
343
- format_analyzed_snippets(analyzed_snippets)
344
- )
345
354
 
346
- final_analysis = await submit_text(
347
- final_prompt, api_endpoint=SERVER_CONFIG.inference.api_endpoint
348
- )
355
+ @app.get("/queue/print")
356
+ async def queue_print(msg: str):
357
+ """Debug endpoint to test the LLM request queue"""
358
+ LOG.info("Will print %s", msg)
349
359
 
350
- certainty = 0
360
+ result = await async_log(msg)
351
361
 
352
- if final_analysis.logprobs:
353
- try:
354
- certainty = compute_certainty(final_analysis.logprobs)
355
- except ValueError as ex:
356
- LOG.error("Error encountered while computing certainty: %s", ex)
357
- raise HTTPException(
358
- status_code=400,
359
- detail=f"Couldn't compute certainty with data:\n"
360
- f"{final_analysis.logprobs}",
361
- ) from ex
362
+ LOG.info("Printed %s and returned it", result)
362
363
 
363
- return StagedResponse(
364
- explanation=final_analysis,
365
- snippets=analyzed_snippets,
366
- response_certainty=certainty,
367
- )
364
+
365
+ async def async_log(msg):
366
+ """Debug function to test the LLM request queue"""
367
+ async with SERVER_CONFIG.inference.get_limiter():
368
+ LOG.critical(msg)
369
+ return msg
370
+
371
+
372
+ @app.get("/version", response_class=BasicResponse)
373
+ async def get_version_wrapper():
374
+ """Get the version of logdetective"""
375
+ return BasicResponse(content=get_version())
368
376
 
369
377
 
370
378
  @app.post("/analyze/stream", response_class=StreamingResponse)
371
379
  @track_request()
372
- async def analyze_log_stream(build_log: BuildLog):
380
+ async def analyze_log_stream(
381
+ build_log: BuildLog, http_session: aiohttp.ClientSession = Depends(get_http_session)
382
+ ):
373
383
  """Stream response endpoint for Logdetective.
374
384
  Request must be in form {"url":"<YOUR_URL_HERE>"}.
375
385
  URL must be valid for the request to be passed to the LLM server.
376
386
  Meaning that it must contain appropriate scheme, path and netloc,
377
387
  while lacking result, params or query fields.
378
388
  """
379
- log_text = process_url(build_log.url)
380
- log_summary = mine_logs(log_text)
381
- log_summary = format_snippets(log_summary)
382
- headers = {"Content-Type": "application/json"}
389
+ remote_log = RemoteLog(build_log.url, http_session)
390
+ log_text = await remote_log.process_url()
391
+ try:
392
+ stream = perform_analyis_stream(log_text)
393
+ except aiohttp.ClientResponseError as ex:
394
+ raise HTTPException(
395
+ status_code=400,
396
+ detail="HTTP Error while getting response from inference server "
397
+ f"[{ex.status}] {ex.message}",
398
+ ) from ex
383
399
 
384
- if LLM_API_TOKEN:
385
- headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
400
+ return StreamingResponse(stream)
386
401
 
387
- stream = await submit_text_chat_completions(
388
- PROMPT_TEMPLATE.format(log_summary), stream=True, headers=headers
389
- )
390
402
 
391
- return StreamingResponse(stream)
403
+ def is_valid_webhook_secret(forge, x_gitlab_token):
404
+ """Check whether the provided x_gitlab_token matches the webhook secret
405
+ specified in the configuration"""
406
+
407
+ gitlab_cfg = SERVER_CONFIG.gitlab.instances[forge.value]
408
+
409
+ if not gitlab_cfg.webhook_secrets:
410
+ # No secrets specified, so don't bother validating.
411
+ # This is mostly to be used for development.
412
+ return True
413
+
414
+ if x_gitlab_token in gitlab_cfg.webhook_secrets:
415
+ return True
416
+
417
+ return False
392
418
 
393
419
 
394
420
  @app.post("/webhook/gitlab/job_events")
395
421
  async def receive_gitlab_job_event_webhook(
396
- job_hook: JobHook, background_tasks: BackgroundTasks
422
+ job_hook: JobHook,
423
+ background_tasks: BackgroundTasks,
424
+ x_gitlab_instance: Annotated[str | None, Header()],
425
+ x_gitlab_token: Annotated[str | None, Header()] = None,
397
426
  ):
398
427
  """Webhook endpoint for receiving job_events notifications from GitLab
399
428
  https://docs.gitlab.com/user/project/integrations/webhook_events/#job-events
400
429
  lists the full specification for the messages sent for job events."""
401
430
 
402
- # Handle the message in the background so we can return 200 immediately
403
- background_tasks.add_task(process_gitlab_job_event, job_hook)
431
+ try:
432
+ forge = Forge(x_gitlab_instance)
433
+ except ValueError:
434
+ LOG.critical("%s is not a recognized forge. Ignoring.", x_gitlab_instance)
435
+ return BasicResponse(status_code=400)
436
+
437
+ if not is_valid_webhook_secret(forge, x_gitlab_token):
438
+ # This request could not be validated, so return a 401
439
+ # (Unauthorized) error.
440
+ return BasicResponse(status_code=401)
441
+
442
+ # Handle the message in the background so we can return 204 immediately
443
+ gitlab_cfg = SERVER_CONFIG.gitlab.instances[forge.value]
444
+ background_tasks.add_task(
445
+ process_gitlab_job_event,
446
+ gitlab_cfg,
447
+ forge,
448
+ job_hook,
449
+ )
404
450
 
405
451
  # No return value or body is required for a webhook.
406
452
  # 204: No Content
407
453
  return BasicResponse(status_code=204)
408
454
 
409
455
 
410
- async def process_gitlab_job_event(job_hook):
411
- """Handle a received job_event webhook from GitLab"""
412
- LOG.debug("Received webhook message:\n%s", job_hook)
456
+ # A lookup table for whether we are currently processing a given merge request
457
+ # The key is the tuple (Forge, ProjectID, MRID) and the value is a boolean
458
+ # indicating whether we need to re-trigger the lookup immediately after
459
+ # completion due to another request coming in during processing.
460
+ # For example: {("https://gitlab.example.com", 23, 2): False}
461
+ emoji_lookup = {}
413
462
 
414
- # Look up the project this job belongs to
415
- project = await asyncio.to_thread(app.gitlab_conn.projects.get, job_hook.project_id)
416
463
 
417
- # check if this project is on the opt-in list
418
- if project.name not in SERVER_CONFIG.general.packages:
419
- LOG.info("Ignoring unrecognized package %s", project.name)
420
- return
421
- LOG.info("Processing failed job for %s", project.name)
464
+ @app.post("/webhook/gitlab/emoji_events")
465
+ async def receive_gitlab_emoji_event_webhook(
466
+ x_gitlab_instance: Annotated[str | None, Header()],
467
+ x_gitlab_token: Annotated[str | None, Header()],
468
+ emoji_hook: EmojiHook,
469
+ background_tasks: BackgroundTasks,
470
+ ):
471
+ """Webhook endpoint for receiving emoji event notifications from Gitlab
472
+ https://docs.gitlab.com/user/project/integrations/webhook_events/#emoji-events
473
+ lists the full specification for the messages sent for emoji events"""
422
474
 
423
- # Retrieve data about the job from the GitLab API
424
- job = await asyncio.to_thread(project.jobs.get, job_hook.build_id)
475
+ try:
476
+ forge = Forge(x_gitlab_instance)
477
+ except ValueError:
478
+ LOG.critical("%s is not a recognized forge. Ignoring.", x_gitlab_instance)
479
+ return BasicResponse(status_code=400)
480
+
481
+ if not is_valid_webhook_secret(forge, x_gitlab_token):
482
+ # This request could not be validated, so return a 401
483
+ # (Unauthorized) error.
484
+ return BasicResponse(status_code=401)
485
+
486
+ if not emoji_hook.merge_request:
487
+ # This is not a merge request event. It is probably an emoji applied
488
+ # to some other "awardable" entity. Just ignore it and return.
489
+ LOG.debug("Emoji event is not related to a merge request. Ignoring.")
490
+ return BasicResponse(status_code=204)
491
+
492
+ # We will re-process all the emojis on this merge request, to ensure that
493
+ # we haven't missed any messages, since webhooks do not provide delivery
494
+ # guarantees.
495
+
496
+ # Check whether this request is already in progress.
497
+ # We are single-threaded, so we can guarantee that the table won't change
498
+ # between here and when we schedule the lookup.
499
+ key = (
500
+ forge,
501
+ emoji_hook.merge_request.target_project_id,
502
+ emoji_hook.merge_request.iid,
503
+ )
504
+ if key in emoji_lookup:
505
+ # It's already in progress, so we do not want to start another pass
506
+ # concurrently. We'll set the value to True to indicate that we should
507
+ # re-enqueue this lookup after the currently-running one concludes. It
508
+ # is always safe to set this to True, even if it's already True. If
509
+ # multiple requests come in during processing, we only need to re-run
510
+ # it a single time, since it will pick up all the ongoing changes. The
511
+ # worst-case situation is the one where we receive new requests just
512
+ # after processing starts, which will cause the cycle to repeat again.
513
+ # This should be very infrequent, as emoji events are computationally
514
+ # rare and very quick to process.
515
+ emoji_lookup[key] = True
516
+ LOG.info("MR Emojis already being processed for %s. Rescheduling.", key)
517
+ return BasicResponse(status_code=204)
518
+
519
+ # Inform the lookup table that we are processing this emoji
520
+ emoji_lookup[key] = False
521
+
522
+ # Create a background task to process the emojis on this Merge Request.
523
+ background_tasks.add_task(
524
+ schedule_emoji_collection_for_mr,
525
+ forge,
526
+ emoji_hook.merge_request.target_project_id,
527
+ emoji_hook.merge_request.iid,
528
+ background_tasks,
529
+ )
425
530
 
426
- # Retrieve the pipeline that started this job
427
- pipeline = await asyncio.to_thread(project.pipelines.get, job_hook.pipeline_id)
531
+ # No return value or body is required for a webhook.
532
+ # 204: No Content
533
+ return BasicResponse(status_code=204)
428
534
 
429
- # Verify this is a merge request
430
- if pipeline.source != "merge_request_event":
431
- LOG.info("Not a merge request pipeline. Ignoring.")
432
- return
433
535
 
434
- # Extract the merge-request ID from the job
435
- match = MR_REGEX.search(pipeline.ref)
436
- if not match:
437
- LOG.error(
438
- "Pipeline source is merge_request_event but no merge request ID was provided."
536
+ async def schedule_emoji_collection_for_mr(
537
+ forge: Forge, project_id: int, mr_iid: int, background_tasks: BackgroundTasks
538
+ ):
539
+ """Background task to update the database on emoji reactions"""
540
+
541
+ key = (forge, project_id, mr_iid)
542
+
543
+ # FIXME: Look up the connection from the Forge # pylint: disable=fixme
544
+ gitlab_conn = SERVER_CONFIG.gitlab.instances[forge.value].get_connection()
545
+
546
+ LOG.debug("Looking up emojis for %s, %d, %d", forge, project_id, mr_iid)
547
+ await collect_emojis_for_mr(project_id, mr_iid, gitlab_conn)
548
+
549
+ # Check whether we've been asked to re-schedule this lookup because
550
+ # another request came in while it was processing.
551
+ if emoji_lookup[key]:
552
+ # The value is Truthy, which tells us to re-schedule
553
+ # Reset the boolean value to indicate that we're underway again.
554
+ emoji_lookup[key] = False
555
+ background_tasks.add_task(
556
+ schedule_emoji_collection_for_mr,
557
+ forge,
558
+ project_id,
559
+ mr_iid,
560
+ background_tasks,
439
561
  )
440
562
  return
441
- merge_request_id = int(match.group(1))
442
563
 
443
- LOG.debug("Retrieving log artifacts")
444
- # Retrieve the build logs from the merge request artifacts and preprocess them
445
- try:
446
- preprocessed_log = await retrieve_and_preprocess_koji_logs(job)
447
- except LogsTooLargeError:
448
- LOG.error("Could not retrieve logs. Too large.")
449
- raise
564
+ # We're all done, so clear this entry out of the lookup
565
+ del emoji_lookup[key]
566
+
450
567
 
451
- # Submit log to Log Detective and await the results.
452
- response = await submit_log_to_llm(preprocessed_log)
453
- preprocessed_log.close()
568
+ def _svg_figure_response(fig: matplotlib.figure.Figure):
569
+ """Create a response with the given svg figure."""
570
+ buf = BytesIO()
571
+ fig.savefig(buf, format="svg", bbox_inches="tight")
572
+ matplotlib.pyplot.close(fig)
454
573
 
455
- # Add the Log Detective response as a comment to the merge request
456
- await comment_on_mr(merge_request_id, response)
574
+ buf.seek(0)
575
+ return StreamingResponse(
576
+ buf,
577
+ media_type="image/svg+xml",
578
+ headers={"Content-Disposition": "inline; filename=plot.svg"},
579
+ )
580
+
581
+
582
+ def _multiple_svg_figures_response(figures: list[matplotlib.figure.Figure]):
583
+ """Create a response with multiple svg figures."""
584
+ svg_contents = []
585
+ for i, fig in enumerate(figures):
586
+ buf = BytesIO()
587
+ fig.savefig(buf, format="svg", bbox_inches="tight")
588
+ matplotlib.pyplot.close(fig)
589
+ buf.seek(0)
590
+ svg_contents.append(buf.read().decode("utf-8"))
591
+
592
+ html_content = "<html><body>\n"
593
+ for i, svg in enumerate(svg_contents):
594
+ html_content += f"<div id='figure-{i}'>\n{svg}\n</div>\n"
595
+ html_content += "</body></html>"
596
+
597
+ return BasicResponse(content=html_content, media_type="text/html")
457
598
 
458
599
 
459
- class LogsTooLargeError(RuntimeError):
460
- """The log archive exceeds the configured maximum size"""
600
+ class MetricRoute(str, Enum):
601
+ """Routes for metrics"""
461
602
 
603
+ ANALYZE = "analyze"
604
+ ANALYZE_STAGED = "analyze-staged"
605
+ ANALYZE_GITLAB_JOB = "analyze-gitlab"
462
606
 
463
- async def retrieve_and_preprocess_koji_logs(job):
464
- """Download logs from the merge request artifacts
465
607
 
466
- This function will retrieve the build logs and do some minimal
467
- preprocessing to determine which log is relevant for analysis.
608
+ class Plot(str, Enum):
609
+ """Type of served plots"""
468
610
 
469
- returns: An open, file-like object containing the log contents to be sent
470
- for processing by Log Detective. The calling function is responsible for
471
- closing this object."""
611
+ REQUESTS = "requests"
612
+ RESPONSES = "responses"
613
+ EMOJIS = "emojis"
614
+ BOTH = ""
472
615
 
473
- # Make sure the file isn't too large to process.
474
- if not await check_artifacts_file_size(job):
475
- raise LogsTooLargeError(
476
- f"Oversized logs for job {job.id} in project {job.project_id}"
616
+
617
+ ROUTE_TO_ENDPOINT_TYPES = {
618
+ MetricRoute.ANALYZE: EndpointType.ANALYZE,
619
+ MetricRoute.ANALYZE_STAGED: EndpointType.ANALYZE_STAGED,
620
+ MetricRoute.ANALYZE_GITLAB_JOB: EndpointType.ANALYZE_GITLAB_JOB,
621
+ }
622
+
623
+
624
+ @app.get("/metrics/{route}/", response_class=StreamingResponse)
625
+ @app.get("/metrics/{route}/{plot}", response_class=StreamingResponse)
626
+ async def get_metrics(
627
+ route: MetricRoute,
628
+ plot: Plot = Plot.BOTH,
629
+ period_since_now: TimePeriod = Depends(TimePeriod),
630
+ ):
631
+ """Get an handler for visualize statistics for the specified endpoint and plot."""
632
+ endpoint_type = ROUTE_TO_ENDPOINT_TYPES[route]
633
+
634
+ async def handler():
635
+ """Show statistics for the specified endpoint and plot."""
636
+ if plot == Plot.REQUESTS:
637
+ fig = await plot_engine.requests_per_time(period_since_now, endpoint_type)
638
+ return _svg_figure_response(fig)
639
+ if plot == Plot.RESPONSES:
640
+ fig = await plot_engine.average_time_per_responses(
641
+ period_since_now, endpoint_type
642
+ )
643
+ return _svg_figure_response(fig)
644
+ if plot == Plot.EMOJIS:
645
+ fig = await plot_engine.emojis_per_time(period_since_now)
646
+ return _svg_figure_response(fig)
647
+ # BOTH
648
+ fig_requests = await plot_engine.requests_per_time(
649
+ period_since_now, endpoint_type
477
650
  )
651
+ fig_responses = await plot_engine.average_time_per_responses(
652
+ period_since_now, endpoint_type
653
+ )
654
+ fig_emojis = await plot_engine.emojis_per_time(period_since_now)
655
+ return _multiple_svg_figures_response([fig_requests, fig_responses, fig_emojis])
656
+
657
+ descriptions = {
658
+ Plot.REQUESTS: (
659
+ "Show statistics for the requests received in the given period of time "
660
+ f"for the /{endpoint_type.value} API endpoint."
661
+ ),
662
+ Plot.RESPONSES: (
663
+ "Show statistics for responses given in the specified period of time "
664
+ f"for the /{endpoint_type.value} API endpoint."
665
+ ),
666
+ Plot.EMOJIS: (
667
+ "Show statistics for emoji feedback in the specified period of time "
668
+ f"for the /{endpoint_type.value} API endpoint."
669
+ ),
670
+ Plot.BOTH: (
671
+ "Show statistics for requests and responses in the given period of time "
672
+ f"for the /{endpoint_type.value} API endpoint."
673
+ ),
674
+ }
675
+ handler.__doc__ = descriptions[plot]
676
+
677
+ return await handler()
478
678
 
479
- # Create a temporary file to store the downloaded log zipfile.
480
- # This will be automatically deleted when the last reference into it
481
- # (returned by this function) is closed.
482
- tempfile = TemporaryFile(mode="w+b")
483
- await asyncio.to_thread(job.artifacts, streamed=True, action=tempfile.write)
484
- tempfile.seek(0)
485
-
486
- failed_arches = {}
487
- artifacts_zip = zipfile.ZipFile(tempfile, mode="r")
488
- for zipinfo in artifacts_zip.infolist():
489
- if zipinfo.filename.endswith("task_failed.log"):
490
- # The koji logs store this file in two places: 1) in the
491
- # directory with the failed architecture and 2) in the parent
492
- # directory. We actually want to ignore the one in the parent
493
- # directory, since the rest of the information is in the
494
- # specific task directory.
495
- # The paths look like `kojilogs/noarch-XXXXXX/task_failed.log`
496
- # or `kojilogs/noarch-XXXXXX/x86_64-XXXXXX/task_failed.log`
497
- path = PurePath(zipinfo.filename)
498
- if len(path.parts) <= 3:
499
- continue
500
-
501
- # Extract the architecture from the immediate parent path
502
- architecture = path.parent.parts[-1].split("-")[0]
503
-
504
- # Open this file and read which log failed.
505
- # The string in this log has the format
506
- # `see <log> for more information`.
507
- # Note: it may sometimes say
508
- # `see build.log or root.log for more information`, but in
509
- # that situation, we only want to handle build.log (for now),
510
- # which means accepting only the first match for the regular
511
- # expression.
512
- with artifacts_zip.open(zipinfo.filename) as task_failed_log:
513
- contents = task_failed_log.read().decode("utf-8")
514
- match = FAILURE_LOG_REGEX.search(contents)
515
- if not match:
516
- LOG.error(
517
- "task_failed.log does not indicate which log contains the failure."
518
- )
519
- raise SyntaxError(
520
- "task_failed.log does not indicate which log contains the failure."
521
- )
522
- failure_log_name = match.group(1)
523
-
524
- failed_arches[architecture] = PurePath(path.parent, failure_log_name)
525
-
526
- if not failed_arches:
527
- # No failed task found?
528
- raise FileNotFoundError("Could not detect failed architecture.")
529
-
530
- # First check if we only found one failed architecture
531
- if len(failed_arches) == 1:
532
- failed_arch = list(failed_arches.keys())[0]
533
-
534
- else:
535
- # We only want to handle one arch, so we'll check them in order of
536
- # "most to least likely for the maintainer to have access to hardware"
537
- # This means: x86_64 > aarch64 > ppc64le > s390x
538
- if "x86_64" in failed_arches:
539
- failed_arch = "x86_64"
540
- elif "aarch64" in failed_arches:
541
- failed_arch = "aarch64"
542
- elif "ppc64le" in failed_arches:
543
- failed_arch = "ppc64le"
544
- elif "s390x" in failed_arches:
545
- failed_arch = "s390x"
546
- else:
547
- # It should be impossible for us to get "noarch" here, since
548
- # the only way that should happen is for a single architecture
549
- # build.
550
- raise FileNotFoundError("No failed architecture detected.")
551
-
552
- LOG.debug("Failed architecture: %s", failed_arch)
553
-
554
- log_path = failed_arches[failed_arch]
555
- LOG.debug("Returning contents of %s", log_path)
556
-
557
- # Return the log as a file-like object with .read() function
558
- return artifacts_zip.open(log_path.as_posix())
559
-
560
-
561
- async def check_artifacts_file_size(job):
562
- """Method to determine if the artifacts are too large to process"""
563
- # First, make sure that the artifacts are of a reasonable size. The
564
- # zipped artifact collection will be stored in memory below. The
565
- # python-gitlab library doesn't expose a way to check this value directly,
566
- # so we need to interact with directly with the headers.
567
- artifacts_url = f"{SERVER_CONFIG.gitlab.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts" # pylint: disable=line-too-long
568
- header_resp = await asyncio.to_thread(
569
- requests.head,
570
- artifacts_url,
571
- allow_redirects=True,
572
- headers={"Authorization": f"Bearer {SERVER_CONFIG.gitlab.api_token}"},
573
- timeout=(3.07, 5),
574
- )
575
- content_length = int(header_resp.headers.get("content-length"))
576
- LOG.debug(
577
- "URL: %s, content-length: %d, max length: %d",
578
- artifacts_url,
579
- content_length,
580
- SERVER_CONFIG.gitlab.max_artifact_size,
581
- )
582
- return content_length <= SERVER_CONFIG.gitlab.max_artifact_size
583
679
 
680
+ async def collect_emoji_task():
681
+ """Collect emoji feedback.
682
+ Query only comments created in the last year.
683
+ """
584
684
 
585
- async def submit_log_to_llm(log):
586
- """Stream the log to the LLM for processing"""
587
- # TODO: query the LLM with the log contents # pylint: disable=fixme
588
- # This function will be implemented later; right now it does nothing.
589
- LOG.debug("Log contents:\n%s", log.read())
590
- return ""
685
+ for instance in SERVER_CONFIG.gitlab.instances.values():
686
+ LOG.info(
687
+ "Collect emoji feedback for %s started at %s",
688
+ instance.url,
689
+ datetime.datetime.now(datetime.timezone.utc),
690
+ )
691
+ await collect_emojis(instance.get_connection(), TimePeriod(weeks=54))
692
+ LOG.info(
693
+ "Collect emoji feedback finished at %s",
694
+ datetime.datetime.now(datetime.timezone.utc),
695
+ )
591
696
 
592
697
 
593
- async def comment_on_mr(merge_request_id: int, response: str): # pylint: disable=unused-argument
594
- """Add the Log Detective response as a comment to the merge request"""
595
- # TODO: Implement this # pylint: disable=fixme
596
- pass # pylint: disable=unnecessary-pass
698
+ async def schedule_collect_emojis_task():
699
+ """Schedule the collect_emojis_task to run on a configured interval"""
700
+ while True:
701
+ seconds_until_run = SERVER_CONFIG.general.collect_emojis_interval
702
+ LOG.info("Collect emojis in %d seconds", seconds_until_run)
703
+ await asyncio.sleep(seconds_until_run)
704
+
705
+ try:
706
+ await collect_emoji_task()
707
+ except Exception as e: # pylint: disable=broad-exception-caught
708
+ LOG.exception("Error in collect_emoji_task: %s", e)