logdetective 0.5.11__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/logdetective.py +17 -8
- logdetective/server/server.py +124 -81
- logdetective/utils.py +21 -3
- {logdetective-0.5.11.dist-info → logdetective-0.6.0.dist-info}/METADATA +2 -2
- {logdetective-0.5.11.dist-info → logdetective-0.6.0.dist-info}/RECORD +8 -8
- {logdetective-0.5.11.dist-info → logdetective-0.6.0.dist-info}/LICENSE +0 -0
- {logdetective-0.5.11.dist-info → logdetective-0.6.0.dist-info}/WHEEL +0 -0
- {logdetective-0.5.11.dist-info → logdetective-0.6.0.dist-info}/entry_points.txt +0 -0
logdetective/logdetective.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import argparse
|
|
2
|
+
import asyncio
|
|
2
3
|
import logging
|
|
3
4
|
import sys
|
|
4
5
|
import os
|
|
5
6
|
|
|
7
|
+
import aiohttp
|
|
8
|
+
|
|
6
9
|
from logdetective.constants import DEFAULT_ADVISOR, DEFAULT_TEMPERATURE
|
|
7
10
|
from logdetective.utils import (
|
|
8
11
|
process_log,
|
|
@@ -82,7 +85,7 @@ def setup_args():
|
|
|
82
85
|
return parser.parse_args()
|
|
83
86
|
|
|
84
87
|
|
|
85
|
-
def
|
|
88
|
+
async def run(): # pylint: disable=too-many-statements,too-many-locals
|
|
86
89
|
"""Main execution function."""
|
|
87
90
|
args = setup_args()
|
|
88
91
|
|
|
@@ -128,13 +131,14 @@ def main(): # pylint: disable=too-many-statements,too-many-locals
|
|
|
128
131
|
|
|
129
132
|
LOG.info("Getting summary")
|
|
130
133
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
134
|
+
async with aiohttp.ClientSession() as http:
|
|
135
|
+
try:
|
|
136
|
+
log = await retrieve_log_content(http, args.file)
|
|
137
|
+
except ValueError as e:
|
|
138
|
+
# file does not exist
|
|
139
|
+
LOG.error(e)
|
|
140
|
+
sys.exit(4)
|
|
141
|
+
log_summary = extractor(log)
|
|
138
142
|
|
|
139
143
|
ratio = len(log_summary) / len(log.split("\n"))
|
|
140
144
|
|
|
@@ -182,5 +186,10 @@ def main(): # pylint: disable=too-many-statements,too-many-locals
|
|
|
182
186
|
print(f"\nResponse certainty: {certainty:.2f}%\n")
|
|
183
187
|
|
|
184
188
|
|
|
189
|
+
def main():
|
|
190
|
+
""" Evaluate logdetective program and wait for it to finish """
|
|
191
|
+
asyncio.run(run())
|
|
192
|
+
|
|
193
|
+
|
|
185
194
|
if __name__ == "__main__":
|
|
186
195
|
main()
|
logdetective/server/server.py
CHANGED
|
@@ -3,6 +3,7 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import re
|
|
5
5
|
import zipfile
|
|
6
|
+
from contextlib import asynccontextmanager
|
|
6
7
|
from pathlib import Path, PurePath
|
|
7
8
|
from tempfile import TemporaryFile
|
|
8
9
|
from typing import List, Annotated, Tuple, Dict, Any
|
|
@@ -11,7 +12,7 @@ from io import BytesIO
|
|
|
11
12
|
|
|
12
13
|
import matplotlib
|
|
13
14
|
import matplotlib.pyplot
|
|
14
|
-
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header
|
|
15
|
+
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header, Request
|
|
15
16
|
|
|
16
17
|
from fastapi.responses import StreamingResponse
|
|
17
18
|
from fastapi.responses import Response as BasicResponse
|
|
@@ -19,14 +20,14 @@ import gitlab
|
|
|
19
20
|
import gitlab.v4
|
|
20
21
|
import gitlab.v4.objects
|
|
21
22
|
import jinja2
|
|
22
|
-
import
|
|
23
|
+
import aiohttp
|
|
23
24
|
|
|
24
25
|
from logdetective.extractors import DrainExtractor
|
|
25
26
|
from logdetective.utils import (
|
|
26
|
-
validate_url,
|
|
27
27
|
compute_certainty,
|
|
28
28
|
format_snippets,
|
|
29
29
|
load_prompts,
|
|
30
|
+
get_url_content,
|
|
30
31
|
)
|
|
31
32
|
from logdetective.server.utils import (
|
|
32
33
|
load_server_config,
|
|
@@ -61,6 +62,27 @@ FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
|
|
|
61
62
|
LOG = get_log(SERVER_CONFIG)
|
|
62
63
|
|
|
63
64
|
|
|
65
|
+
@asynccontextmanager
|
|
66
|
+
async def lifespan(fapp: FastAPI):
|
|
67
|
+
"""
|
|
68
|
+
Establish one HTTP session
|
|
69
|
+
"""
|
|
70
|
+
fapp.http = aiohttp.ClientSession(
|
|
71
|
+
timeout=aiohttp.ClientTimeout(
|
|
72
|
+
total=int(LOG_SOURCE_REQUEST_TIMEOUT), connect=3.07
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
yield
|
|
76
|
+
await fapp.http.close()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
async def get_http_session(request: Request) -> aiohttp.ClientSession:
|
|
80
|
+
"""
|
|
81
|
+
Return the single aiohttp ClientSession for this app
|
|
82
|
+
"""
|
|
83
|
+
return request.app.http
|
|
84
|
+
|
|
85
|
+
|
|
64
86
|
def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
|
|
65
87
|
"""
|
|
66
88
|
FastAPI Depend function that expects a header named Authentication
|
|
@@ -91,33 +113,20 @@ def requires_token_when_set(authentication: Annotated[str | None, Header()] = No
|
|
|
91
113
|
raise HTTPException(status_code=401, detail=f"Token {token} not valid.")
|
|
92
114
|
|
|
93
115
|
|
|
94
|
-
app = FastAPI(dependencies=[Depends(requires_token_when_set)])
|
|
116
|
+
app = FastAPI(dependencies=[Depends(requires_token_when_set)], lifespan=lifespan)
|
|
95
117
|
app.gitlab_conn = gitlab.Gitlab(
|
|
96
118
|
url=SERVER_CONFIG.gitlab.url, private_token=SERVER_CONFIG.gitlab.api_token
|
|
97
119
|
)
|
|
98
120
|
|
|
99
121
|
|
|
100
|
-
def process_url(url: str) -> str:
|
|
122
|
+
async def process_url(http: aiohttp.ClientSession, url: str) -> str:
|
|
101
123
|
"""Validate log URL and return log text."""
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
) from ex
|
|
109
|
-
|
|
110
|
-
if not log_request.ok:
|
|
111
|
-
raise HTTPException(
|
|
112
|
-
status_code=400,
|
|
113
|
-
detail="Something went wrong while getting the logs: "
|
|
114
|
-
f"[{log_request.status_code}] {log_request.text}",
|
|
115
|
-
)
|
|
116
|
-
else:
|
|
117
|
-
LOG.error("Invalid URL received ")
|
|
118
|
-
raise HTTPException(status_code=400, detail=f"Invalid log URL: {url}")
|
|
119
|
-
|
|
120
|
-
return log_request.text
|
|
124
|
+
try:
|
|
125
|
+
return await get_url_content(http, url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
|
|
126
|
+
except RuntimeError as ex:
|
|
127
|
+
raise HTTPException(
|
|
128
|
+
status_code=400, detail=f"We couldn't obtain the logs: {ex}"
|
|
129
|
+
) from ex
|
|
121
130
|
|
|
122
131
|
|
|
123
132
|
def mine_logs(log: str) -> List[Tuple[int, str]]:
|
|
@@ -137,7 +146,11 @@ def mine_logs(log: str) -> List[Tuple[int, str]]:
|
|
|
137
146
|
|
|
138
147
|
|
|
139
148
|
async def submit_to_llm_endpoint(
|
|
140
|
-
|
|
149
|
+
http: aiohttp.ClientSession,
|
|
150
|
+
url: str,
|
|
151
|
+
data: Dict[str, Any],
|
|
152
|
+
headers: Dict[str, str],
|
|
153
|
+
stream: bool,
|
|
141
154
|
) -> Any:
|
|
142
155
|
"""Send request to selected API endpoint. Verifying successful request unless
|
|
143
156
|
the using the stream response.
|
|
@@ -147,40 +160,41 @@ async def submit_to_llm_endpoint(
|
|
|
147
160
|
headers:
|
|
148
161
|
stream:
|
|
149
162
|
"""
|
|
163
|
+
LOG.debug("async request %s headers=%s data=%s", url, headers, data)
|
|
150
164
|
try:
|
|
151
|
-
|
|
152
|
-
response = requests.post(
|
|
165
|
+
response = await http.post(
|
|
153
166
|
url,
|
|
154
167
|
headers=headers,
|
|
155
|
-
|
|
168
|
+
# we need to use the `json=` parameter here and let aiohttp
|
|
169
|
+
# handle the json-encoding
|
|
170
|
+
json=data,
|
|
156
171
|
timeout=int(LLM_CPP_SERVER_TIMEOUT),
|
|
157
|
-
|
|
172
|
+
# Docs says chunked takes int, but:
|
|
173
|
+
# DeprecationWarning: Chunk size is deprecated #1615
|
|
174
|
+
# So let's make sure we either put True or None here
|
|
175
|
+
chunked=True if stream else None,
|
|
176
|
+
raise_for_status=True,
|
|
158
177
|
)
|
|
159
|
-
except
|
|
160
|
-
LOG.error("Llama-cpp query failed: %s", ex)
|
|
178
|
+
except aiohttp.ClientResponseError as ex:
|
|
161
179
|
raise HTTPException(
|
|
162
|
-
status_code=400,
|
|
180
|
+
status_code=400,
|
|
181
|
+
detail="HTTP Error while getting response from inference server "
|
|
182
|
+
f"[{ex.status}] {ex.message}",
|
|
183
|
+
) from ex
|
|
184
|
+
if stream:
|
|
185
|
+
return response
|
|
186
|
+
try:
|
|
187
|
+
return json.loads(await response.text())
|
|
188
|
+
except UnicodeDecodeError as ex:
|
|
189
|
+
LOG.error("Error encountered while parsing llama server response: %s", ex)
|
|
190
|
+
raise HTTPException(
|
|
191
|
+
status_code=400,
|
|
192
|
+
detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
|
|
163
193
|
) from ex
|
|
164
|
-
if not stream:
|
|
165
|
-
if not response.ok:
|
|
166
|
-
raise HTTPException(
|
|
167
|
-
status_code=400,
|
|
168
|
-
detail="Something went wrong while getting a response from the llama server: "
|
|
169
|
-
f"[{response.status_code}] {response.text}",
|
|
170
|
-
)
|
|
171
|
-
try:
|
|
172
|
-
response = json.loads(response.text)
|
|
173
|
-
except UnicodeDecodeError as ex:
|
|
174
|
-
LOG.error("Error encountered while parsing llama server response: %s", ex)
|
|
175
|
-
raise HTTPException(
|
|
176
|
-
status_code=400,
|
|
177
|
-
detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
|
|
178
|
-
) from ex
|
|
179
|
-
|
|
180
|
-
return response
|
|
181
194
|
|
|
182
195
|
|
|
183
196
|
async def submit_text( # pylint: disable=R0913,R0917
|
|
197
|
+
http: aiohttp.ClientSession,
|
|
184
198
|
text: str,
|
|
185
199
|
max_tokens: int = -1,
|
|
186
200
|
log_probs: int = 1,
|
|
@@ -200,14 +214,15 @@ async def submit_text( # pylint: disable=R0913,R0917
|
|
|
200
214
|
|
|
201
215
|
if SERVER_CONFIG.inference.api_endpoint == "/chat/completions":
|
|
202
216
|
return await submit_text_chat_completions(
|
|
203
|
-
text, headers, max_tokens, log_probs > 0, stream, model
|
|
217
|
+
http, text, headers, max_tokens, log_probs > 0, stream, model
|
|
204
218
|
)
|
|
205
219
|
return await submit_text_completions(
|
|
206
|
-
text, headers, max_tokens, log_probs, stream, model
|
|
220
|
+
http, text, headers, max_tokens, log_probs, stream, model
|
|
207
221
|
)
|
|
208
222
|
|
|
209
223
|
|
|
210
224
|
async def submit_text_completions( # pylint: disable=R0913,R0917
|
|
225
|
+
http: aiohttp.ClientSession,
|
|
211
226
|
text: str,
|
|
212
227
|
headers: dict,
|
|
213
228
|
max_tokens: int = -1,
|
|
@@ -230,6 +245,7 @@ async def submit_text_completions( # pylint: disable=R0913,R0917
|
|
|
230
245
|
}
|
|
231
246
|
|
|
232
247
|
response = await submit_to_llm_endpoint(
|
|
248
|
+
http,
|
|
233
249
|
f"{SERVER_CONFIG.inference.url}/v1/completions",
|
|
234
250
|
data,
|
|
235
251
|
headers,
|
|
@@ -242,6 +258,7 @@ async def submit_text_completions( # pylint: disable=R0913,R0917
|
|
|
242
258
|
|
|
243
259
|
|
|
244
260
|
async def submit_text_chat_completions( # pylint: disable=R0913,R0917
|
|
261
|
+
http: aiohttp.ClientSession,
|
|
245
262
|
text: str,
|
|
246
263
|
headers: dict,
|
|
247
264
|
max_tokens: int = -1,
|
|
@@ -270,6 +287,7 @@ async def submit_text_chat_completions( # pylint: disable=R0913,R0917
|
|
|
270
287
|
}
|
|
271
288
|
|
|
272
289
|
response = await submit_to_llm_endpoint(
|
|
290
|
+
http,
|
|
273
291
|
f"{SERVER_CONFIG.inference.url}/v1/chat/completions",
|
|
274
292
|
data,
|
|
275
293
|
headers,
|
|
@@ -289,17 +307,20 @@ async def submit_text_chat_completions( # pylint: disable=R0913,R0917
|
|
|
289
307
|
|
|
290
308
|
@app.post("/analyze", response_model=Response)
|
|
291
309
|
@track_request()
|
|
292
|
-
async def analyze_log(
|
|
310
|
+
async def analyze_log(
|
|
311
|
+
build_log: BuildLog, http: aiohttp.ClientSession = Depends(get_http_session)
|
|
312
|
+
):
|
|
293
313
|
"""Provide endpoint for log file submission and analysis.
|
|
294
314
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
295
315
|
URL must be valid for the request to be passed to the LLM server.
|
|
296
316
|
Meaning that it must contain appropriate scheme, path and netloc,
|
|
297
317
|
while lacking result, params or query fields.
|
|
298
318
|
"""
|
|
299
|
-
log_text = process_url(build_log.url)
|
|
319
|
+
log_text = await process_url(http, build_log.url)
|
|
300
320
|
log_summary = mine_logs(log_text)
|
|
301
321
|
log_summary = format_snippets(log_summary)
|
|
302
322
|
response = await submit_text(
|
|
323
|
+
http,
|
|
303
324
|
PROMPT_CONFIG.prompt_template.format(log_summary),
|
|
304
325
|
model=SERVER_CONFIG.inference.model,
|
|
305
326
|
max_tokens=SERVER_CONFIG.inference.max_tokens,
|
|
@@ -321,19 +342,23 @@ async def analyze_log(build_log: BuildLog):
|
|
|
321
342
|
|
|
322
343
|
@app.post("/analyze/staged", response_model=StagedResponse)
|
|
323
344
|
@track_request()
|
|
324
|
-
async def analyze_log_staged(
|
|
345
|
+
async def analyze_log_staged(
|
|
346
|
+
build_log: BuildLog, http: aiohttp.ClientSession = Depends(get_http_session)
|
|
347
|
+
):
|
|
325
348
|
"""Provide endpoint for log file submission and analysis.
|
|
326
349
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
327
350
|
URL must be valid for the request to be passed to the LLM server.
|
|
328
351
|
Meaning that it must contain appropriate scheme, path and netloc,
|
|
329
352
|
while lacking result, params or query fields.
|
|
330
353
|
"""
|
|
331
|
-
log_text = process_url(build_log.url)
|
|
354
|
+
log_text = await process_url(http, build_log.url)
|
|
332
355
|
|
|
333
|
-
return await perform_staged_analysis(log_text=log_text)
|
|
356
|
+
return await perform_staged_analysis(http, log_text=log_text)
|
|
334
357
|
|
|
335
358
|
|
|
336
|
-
async def perform_staged_analysis(
|
|
359
|
+
async def perform_staged_analysis(
|
|
360
|
+
http: aiohttp.ClientSession, log_text: str
|
|
361
|
+
) -> StagedResponse:
|
|
337
362
|
"""Submit the log file snippets to the LLM and retrieve their results"""
|
|
338
363
|
log_summary = mine_logs(log_text)
|
|
339
364
|
|
|
@@ -341,6 +366,7 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
|
|
|
341
366
|
analyzed_snippets = await asyncio.gather(
|
|
342
367
|
*[
|
|
343
368
|
submit_text(
|
|
369
|
+
http,
|
|
344
370
|
PROMPT_CONFIG.snippet_prompt_template.format(s),
|
|
345
371
|
model=SERVER_CONFIG.inference.model,
|
|
346
372
|
max_tokens=SERVER_CONFIG.inference.max_tokens,
|
|
@@ -358,6 +384,7 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
|
|
|
358
384
|
)
|
|
359
385
|
|
|
360
386
|
final_analysis = await submit_text(
|
|
387
|
+
http,
|
|
361
388
|
final_prompt,
|
|
362
389
|
model=SERVER_CONFIG.inference.model,
|
|
363
390
|
max_tokens=SERVER_CONFIG.inference.max_tokens,
|
|
@@ -385,14 +412,16 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
|
|
|
385
412
|
|
|
386
413
|
@app.post("/analyze/stream", response_class=StreamingResponse)
|
|
387
414
|
@track_request()
|
|
388
|
-
async def analyze_log_stream(
|
|
415
|
+
async def analyze_log_stream(
|
|
416
|
+
build_log: BuildLog, http: aiohttp.ClientSession = Depends(get_http_session)
|
|
417
|
+
):
|
|
389
418
|
"""Stream response endpoint for Logdetective.
|
|
390
419
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
391
420
|
URL must be valid for the request to be passed to the LLM server.
|
|
392
421
|
Meaning that it must contain appropriate scheme, path and netloc,
|
|
393
422
|
while lacking result, params or query fields.
|
|
394
423
|
"""
|
|
395
|
-
log_text = process_url(build_log.url)
|
|
424
|
+
log_text = await process_url(http, build_log.url)
|
|
396
425
|
log_summary = mine_logs(log_text)
|
|
397
426
|
log_summary = format_snippets(log_summary)
|
|
398
427
|
headers = {"Content-Type": "application/json"}
|
|
@@ -401,7 +430,10 @@ async def analyze_log_stream(build_log: BuildLog):
|
|
|
401
430
|
headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
|
|
402
431
|
|
|
403
432
|
stream = await submit_text_chat_completions(
|
|
404
|
-
|
|
433
|
+
http,
|
|
434
|
+
PROMPT_CONFIG.prompt_template.format(log_summary),
|
|
435
|
+
stream=True,
|
|
436
|
+
headers=headers,
|
|
405
437
|
model=SERVER_CONFIG.inference.model,
|
|
406
438
|
max_tokens=SERVER_CONFIG.inference.max_tokens,
|
|
407
439
|
)
|
|
@@ -411,31 +443,28 @@ async def analyze_log_stream(build_log: BuildLog):
|
|
|
411
443
|
|
|
412
444
|
@app.post("/webhook/gitlab/job_events")
|
|
413
445
|
async def receive_gitlab_job_event_webhook(
|
|
414
|
-
job_hook: JobHook,
|
|
446
|
+
job_hook: JobHook,
|
|
447
|
+
background_tasks: BackgroundTasks,
|
|
448
|
+
http: aiohttp.ClientSession = Depends(get_http_session),
|
|
415
449
|
):
|
|
416
450
|
"""Webhook endpoint for receiving job_events notifications from GitLab
|
|
417
451
|
https://docs.gitlab.com/user/project/integrations/webhook_events/#job-events
|
|
418
452
|
lists the full specification for the messages sent for job events."""
|
|
419
453
|
|
|
420
454
|
# Handle the message in the background so we can return 200 immediately
|
|
421
|
-
background_tasks.add_task(process_gitlab_job_event, job_hook)
|
|
455
|
+
background_tasks.add_task(process_gitlab_job_event, http, job_hook)
|
|
422
456
|
|
|
423
457
|
# No return value or body is required for a webhook.
|
|
424
458
|
# 204: No Content
|
|
425
459
|
return BasicResponse(status_code=204)
|
|
426
460
|
|
|
427
461
|
|
|
428
|
-
async def process_gitlab_job_event(job_hook):
|
|
462
|
+
async def process_gitlab_job_event(http: aiohttp.ClientSession, job_hook):
|
|
429
463
|
"""Handle a received job_event webhook from GitLab"""
|
|
430
464
|
LOG.debug("Received webhook message:\n%s", job_hook)
|
|
431
465
|
|
|
432
466
|
# Look up the project this job belongs to
|
|
433
467
|
project = await asyncio.to_thread(app.gitlab_conn.projects.get, job_hook.project_id)
|
|
434
|
-
|
|
435
|
-
# check if this project is on the opt-in list
|
|
436
|
-
if project.name not in SERVER_CONFIG.general.packages:
|
|
437
|
-
LOG.info("Ignoring unrecognized package %s", project.name)
|
|
438
|
-
return
|
|
439
468
|
LOG.info("Processing failed job for %s", project.name)
|
|
440
469
|
|
|
441
470
|
# Retrieve data about the job from the GitLab API
|
|
@@ -466,16 +495,21 @@ async def process_gitlab_job_event(job_hook):
|
|
|
466
495
|
LOG.debug("Retrieving log artifacts")
|
|
467
496
|
# Retrieve the build logs from the merge request artifacts and preprocess them
|
|
468
497
|
try:
|
|
469
|
-
log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(job)
|
|
498
|
+
log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(http, job)
|
|
470
499
|
except LogsTooLargeError:
|
|
471
500
|
LOG.error("Could not retrieve logs. Too large.")
|
|
472
501
|
raise
|
|
473
502
|
|
|
474
503
|
# Submit log to Log Detective and await the results.
|
|
475
504
|
log_text = preprocessed_log.read().decode(encoding="utf-8")
|
|
476
|
-
staged_response = await perform_staged_analysis(log_text=log_text)
|
|
505
|
+
staged_response = await perform_staged_analysis(http, log_text=log_text)
|
|
477
506
|
preprocessed_log.close()
|
|
478
507
|
|
|
508
|
+
# check if this project is on the opt-in list for posting comments.
|
|
509
|
+
if project.name not in SERVER_CONFIG.general.packages:
|
|
510
|
+
LOG.info("Not publishing comment for unrecognized package %s", project.name)
|
|
511
|
+
return
|
|
512
|
+
|
|
479
513
|
# Add the Log Detective response as a comment to the merge request
|
|
480
514
|
await comment_on_mr(project, merge_request_iid, job, log_url, staged_response)
|
|
481
515
|
|
|
@@ -484,7 +518,9 @@ class LogsTooLargeError(RuntimeError):
|
|
|
484
518
|
"""The log archive exceeds the configured maximum size"""
|
|
485
519
|
|
|
486
520
|
|
|
487
|
-
async def retrieve_and_preprocess_koji_logs(
|
|
521
|
+
async def retrieve_and_preprocess_koji_logs(
|
|
522
|
+
http: aiohttp.ClientSession, job: gitlab.v4.objects.ProjectJob
|
|
523
|
+
):
|
|
488
524
|
"""Download logs from the merge request artifacts
|
|
489
525
|
|
|
490
526
|
This function will retrieve the build logs and do some minimal
|
|
@@ -495,7 +531,7 @@ async def retrieve_and_preprocess_koji_logs(job: gitlab.v4.objects.ProjectJob):
|
|
|
495
531
|
Detective. The calling function is responsible for closing this object."""
|
|
496
532
|
|
|
497
533
|
# Make sure the file isn't too large to process.
|
|
498
|
-
if not await check_artifacts_file_size(job):
|
|
534
|
+
if not await check_artifacts_file_size(http, job):
|
|
499
535
|
raise LogsTooLargeError(
|
|
500
536
|
f"Oversized logs for job {job.id} in project {job.project_id}"
|
|
501
537
|
)
|
|
@@ -584,21 +620,28 @@ async def retrieve_and_preprocess_koji_logs(job: gitlab.v4.objects.ProjectJob):
|
|
|
584
620
|
return log_url, artifacts_zip.open(log_path)
|
|
585
621
|
|
|
586
622
|
|
|
587
|
-
async def check_artifacts_file_size(job):
|
|
623
|
+
async def check_artifacts_file_size(http: aiohttp.ClientSession, job):
|
|
588
624
|
"""Method to determine if the artifacts are too large to process"""
|
|
589
625
|
# First, make sure that the artifacts are of a reasonable size. The
|
|
590
626
|
# zipped artifact collection will be stored in memory below. The
|
|
591
627
|
# python-gitlab library doesn't expose a way to check this value directly,
|
|
592
628
|
# so we need to interact with directly with the headers.
|
|
593
629
|
artifacts_url = f"{SERVER_CONFIG.gitlab.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts" # pylint: disable=line-too-long
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
630
|
+
LOG.debug("checking artifact URL %s", artifacts_url)
|
|
631
|
+
try:
|
|
632
|
+
head_response = await http.head(
|
|
633
|
+
artifacts_url,
|
|
634
|
+
allow_redirects=True,
|
|
635
|
+
headers={"Authorization": f"Bearer {SERVER_CONFIG.gitlab.api_token}"},
|
|
636
|
+
timeout=5,
|
|
637
|
+
raise_for_status=True,
|
|
638
|
+
)
|
|
639
|
+
except aiohttp.ClientResponseError as ex:
|
|
640
|
+
raise HTTPException(
|
|
641
|
+
status_code=400,
|
|
642
|
+
detail=f"Unable to check artifact URL: [{ex.status}] {ex.message}",
|
|
643
|
+
) from ex
|
|
644
|
+
content_length = int(head_response.headers.get("content-length"))
|
|
602
645
|
LOG.debug(
|
|
603
646
|
"URL: %s, content-length: %d, max length: %d",
|
|
604
647
|
artifacts_url,
|
logdetective/utils.py
CHANGED
|
@@ -2,8 +2,9 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
from typing import Iterator, List, Dict, Tuple, Generator
|
|
4
4
|
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
import aiohttp
|
|
5
7
|
import numpy as np
|
|
6
|
-
import requests
|
|
7
8
|
import yaml
|
|
8
9
|
|
|
9
10
|
from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
|
|
@@ -133,7 +134,24 @@ def process_log(
|
|
|
133
134
|
return response
|
|
134
135
|
|
|
135
136
|
|
|
136
|
-
def
|
|
137
|
+
async def get_url_content(http: aiohttp.ClientSession, url: str, timeout: int) -> str:
|
|
138
|
+
"""validate log url and return log text."""
|
|
139
|
+
if validate_url(url=url):
|
|
140
|
+
LOG.debug("process url %s", url)
|
|
141
|
+
try:
|
|
142
|
+
response = await http.get(
|
|
143
|
+
url,
|
|
144
|
+
timeout=timeout,
|
|
145
|
+
raise_for_status=True
|
|
146
|
+
)
|
|
147
|
+
except aiohttp.ClientResponseError as ex:
|
|
148
|
+
raise RuntimeError(f"We couldn't obtain the logs: {ex}") from ex
|
|
149
|
+
return await response.text()
|
|
150
|
+
LOG.error("Invalid URL received ")
|
|
151
|
+
raise RuntimeError(f"Invalid log URL: {url}")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
async def retrieve_log_content(http: aiohttp.ClientSession, log_path: str) -> str:
|
|
137
155
|
"""Get content of the file on the log_path path.
|
|
138
156
|
Path is assumed to be valid URL if it has a scheme.
|
|
139
157
|
Otherwise it attempts to pull it from local filesystem."""
|
|
@@ -148,7 +166,7 @@ def retrieve_log_content(log_path: str) -> str:
|
|
|
148
166
|
log = f.read()
|
|
149
167
|
|
|
150
168
|
else:
|
|
151
|
-
log =
|
|
169
|
+
log = await get_url_content(http, log_path, timeout=60)
|
|
152
170
|
|
|
153
171
|
return log
|
|
154
172
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: logdetective
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Jiri Podivin
|
|
@@ -20,6 +20,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
20
20
|
Classifier: Topic :: Software Development :: Debuggers
|
|
21
21
|
Provides-Extra: server
|
|
22
22
|
Provides-Extra: server-testing
|
|
23
|
+
Requires-Dist: aiohttp (>=3.7.4)
|
|
23
24
|
Requires-Dist: alembic (>=1.13.3,<2.0.0) ; extra == "server" or extra == "server-testing"
|
|
24
25
|
Requires-Dist: drain3 (>=0.9.11,<0.10.0)
|
|
25
26
|
Requires-Dist: fastapi (>=0.111.1) ; extra == "server" or extra == "server-testing"
|
|
@@ -32,7 +33,6 @@ Requires-Dist: psycopg2-binary (>=2.9.9,<3.0.0) ; extra == "server-testing"
|
|
|
32
33
|
Requires-Dist: pydantic (>=2.8.2,<3.0.0)
|
|
33
34
|
Requires-Dist: python-gitlab (>=4.4.0)
|
|
34
35
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
35
|
-
Requires-Dist: requests (>0.2.31)
|
|
36
36
|
Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server" or extra == "server-testing"
|
|
37
37
|
Project-URL: homepage, https://github.com/fedora-copr/logdetective
|
|
38
38
|
Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
|
|
@@ -2,7 +2,7 @@ logdetective/__init__.py,sha256=VqRngDcuFT7JWms8Qc_MsOvajoXVOKPr-S1kqY3Pqhc,59
|
|
|
2
2
|
logdetective/constants.py,sha256=A5PzeqlQqDbBS_kzP2hl-lhJ0lCEqdbvW3CaQUYVxjw,1849
|
|
3
3
|
logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
|
|
4
4
|
logdetective/extractors.py,sha256=7ahzWbTtU9MveG1Q7wU9LO8OJgs85X-cHmWltUhCe9M,3491
|
|
5
|
-
logdetective/logdetective.py,sha256=
|
|
5
|
+
logdetective/logdetective.py,sha256=cC2oL4yPNo94AB2nS4v1jpZi-Qo1g0_FEchL_yQL1UU,5832
|
|
6
6
|
logdetective/models.py,sha256=nrGBmMRu8i6UhFflQKAp81Y3Sd_Aaoor0i_yqSJoLT0,1115
|
|
7
7
|
logdetective/prompts.yml,sha256=dMW2-bdTIqv7LF_owqRD4xinMK5ZWcNhDynnX1zoKns,1722
|
|
8
8
|
logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -12,13 +12,13 @@ logdetective/server/database/models.py,sha256=m_3qNBWJwLSwjJn0AmwSxXMJk75Gu1bXFt
|
|
|
12
12
|
logdetective/server/metric.py,sha256=-uM_-yqxNA-EZTCnNRdQ8g1MicmE5eC6jRFI_mBBYUg,2606
|
|
13
13
|
logdetective/server/models.py,sha256=URqZcfx5yUsifZ1pOwZ_uU3Tyjcdvuq6qEnAvTexl4A,8475
|
|
14
14
|
logdetective/server/plot.py,sha256=B2rOngqx7g-Z3NfttboTip3frkypdF1H7FhK8vh45mE,9655
|
|
15
|
-
logdetective/server/server.py,sha256=
|
|
15
|
+
logdetective/server/server.py,sha256=AXduOwD6zPNFc-COw-JcTp4bPan9DfXJml52XiBHmds,29613
|
|
16
16
|
logdetective/server/templates/gitlab_full_comment.md.j2,sha256=DQZ2WVFedpuXI6znbHIW4wpF9BmFS8FaUkowh8AnGhE,1627
|
|
17
17
|
logdetective/server/templates/gitlab_short_comment.md.j2,sha256=fzScpayv2vpRLczP_0O0YxtA8rsKvR6gSv4ntNdWb98,1443
|
|
18
18
|
logdetective/server/utils.py,sha256=QO0H1q55YLCLKxkViqex4Uu31LnakpYUKJfZHysonSc,1838
|
|
19
|
-
logdetective/utils.py,sha256=
|
|
20
|
-
logdetective-0.
|
|
21
|
-
logdetective-0.
|
|
22
|
-
logdetective-0.
|
|
23
|
-
logdetective-0.
|
|
24
|
-
logdetective-0.
|
|
19
|
+
logdetective/utils.py,sha256=IBAH6hleuhtbA3cAGhiIDb3zwKaovunBfVsG0TKzoKA,7127
|
|
20
|
+
logdetective-0.6.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
21
|
+
logdetective-0.6.0.dist-info/METADATA,sha256=zBYLi3jNjqR-6EYJY4wy5CS-1-ZgEzzRGofcqzXbHco,15880
|
|
22
|
+
logdetective-0.6.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
|
23
|
+
logdetective-0.6.0.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
|
|
24
|
+
logdetective-0.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|