logdetective 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/server/config.py +1 -1
- logdetective/server/emoji.py +46 -48
- logdetective/server/gitlab.py +21 -8
- logdetective/server/llm.py +38 -12
- logdetective/server/models.py +66 -259
- logdetective/server/server.py +199 -32
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/METADATA +2 -2
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/RECORD +11 -11
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/WHEEL +0 -0
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/entry_points.txt +0 -0
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/licenses/LICENSE +0 -0
logdetective/server/config.py
CHANGED
|
@@ -15,7 +15,7 @@ def load_server_config(path: str | None) -> Config:
|
|
|
15
15
|
if path is not None:
|
|
16
16
|
try:
|
|
17
17
|
with open(path, "r") as config_file:
|
|
18
|
-
return Config(yaml.safe_load(config_file))
|
|
18
|
+
return Config.model_validate(yaml.safe_load(config_file))
|
|
19
19
|
except FileNotFoundError:
|
|
20
20
|
# This is not an error, we will fall back to default
|
|
21
21
|
print("Unable to find server config file, using default then.")
|
logdetective/server/emoji.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
|
|
3
|
-
from typing import List
|
|
3
|
+
from typing import List
|
|
4
4
|
from collections import Counter
|
|
5
5
|
|
|
6
6
|
import gitlab
|
|
@@ -49,25 +49,6 @@ async def collect_emojis_for_mr(
|
|
|
49
49
|
await collect_emojis_in_comments(comments, gitlab_conn)
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
async def _handle_gitlab_operation(func: Callable, *args):
|
|
53
|
-
"""
|
|
54
|
-
It handles errors for the specified GitLab operation.
|
|
55
|
-
After executing it in a separate thread.
|
|
56
|
-
"""
|
|
57
|
-
try:
|
|
58
|
-
return await asyncio.to_thread(func, *args)
|
|
59
|
-
except (gitlab.GitlabError, gitlab.GitlabGetError) as e:
|
|
60
|
-
log_msg = f"Error during GitLab operation {func}{args}: {e}"
|
|
61
|
-
if "Not Found" in str(e):
|
|
62
|
-
LOG.error(log_msg)
|
|
63
|
-
else:
|
|
64
|
-
LOG.exception(log_msg)
|
|
65
|
-
except Exception as e: # pylint: disable=broad-exception-caught
|
|
66
|
-
LOG.exception(
|
|
67
|
-
"Unexpected error during GitLab operation %s(%s): %s", func, args, e
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
|
|
71
52
|
async def collect_emojis_in_comments( # pylint: disable=too-many-locals
|
|
72
53
|
comments: List[Comments], gitlab_conn: gitlab.Gitlab
|
|
73
54
|
):
|
|
@@ -80,37 +61,54 @@ async def collect_emojis_in_comments( # pylint: disable=too-many-locals
|
|
|
80
61
|
mr_job_db = await GitlabMergeRequestJobs.get_by_id(comment.merge_request_job_id)
|
|
81
62
|
if not mr_job_db:
|
|
82
63
|
continue
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
64
|
+
try:
|
|
65
|
+
if mr_job_db.id not in projects:
|
|
66
|
+
project = await asyncio.to_thread(
|
|
67
|
+
gitlab_conn.projects.get, mr_job_db.project_id
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
projects[mr_job_db.id] = project
|
|
71
|
+
else:
|
|
72
|
+
project = projects[mr_job_db.id]
|
|
73
|
+
merge_request_iid = mr_job_db.mr_iid
|
|
74
|
+
if merge_request_iid not in merge_requests:
|
|
75
|
+
merge_request = await asyncio.to_thread(
|
|
76
|
+
project.mergerequests.get, merge_request_iid
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
merge_requests[merge_request_iid] = merge_request
|
|
80
|
+
else:
|
|
81
|
+
merge_request = merge_requests[merge_request_iid]
|
|
82
|
+
|
|
83
|
+
discussion = await asyncio.to_thread(
|
|
84
|
+
merge_request.discussions.get, comment.comment_id
|
|
96
85
|
)
|
|
97
|
-
if not merge_request:
|
|
98
|
-
continue
|
|
99
|
-
merge_requests[merge_request_iid] = merge_request
|
|
100
|
-
else:
|
|
101
|
-
merge_request = merge_requests[merge_request_iid]
|
|
102
86
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
87
|
+
# Get the ID of the first note
|
|
88
|
+
if "notes" not in discussion.attributes or len(discussion.attributes["notes"]) == 0:
|
|
89
|
+
LOG.warning(
|
|
90
|
+
"No notes were found in comment %s in merge request %d",
|
|
91
|
+
comment.comment_id,
|
|
92
|
+
merge_request_iid,
|
|
93
|
+
)
|
|
94
|
+
continue
|
|
108
95
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
96
|
+
note_id = discussion.attributes["notes"][0]["id"]
|
|
97
|
+
note = await asyncio.to_thread(merge_request.notes.get, note_id)
|
|
98
|
+
|
|
99
|
+
# Log warning with full stack trace, in case we can't find the right
|
|
100
|
+
# discussion, merge request or project.
|
|
101
|
+
# All of these objects can be lost, and we shouldn't treat as an error.
|
|
102
|
+
# Other exceptions are raised.
|
|
103
|
+
except gitlab.GitlabError as e:
|
|
104
|
+
if e.response_code == 404:
|
|
105
|
+
LOG.warning(
|
|
106
|
+
"Couldn't retrieve emoji counts for comment %s due to GitlabError",
|
|
107
|
+
comment.comment_id, exc_info=True)
|
|
108
|
+
continue
|
|
109
|
+
LOG.error("Error encountered while processing emoji counts for GitLab comment %s",
|
|
110
|
+
comment.comment_id, exc_info=True)
|
|
111
|
+
raise
|
|
114
112
|
|
|
115
113
|
emoji_counts = Counter(emoji.name for emoji in note.awardemojis.list())
|
|
116
114
|
|
logdetective/server/gitlab.py
CHANGED
|
@@ -4,6 +4,7 @@ import zipfile
|
|
|
4
4
|
from pathlib import Path, PurePath
|
|
5
5
|
from tempfile import TemporaryFile
|
|
6
6
|
|
|
7
|
+
from aiolimiter import AsyncLimiter
|
|
7
8
|
from fastapi import HTTPException
|
|
8
9
|
|
|
9
10
|
import gitlab
|
|
@@ -13,6 +14,7 @@ import jinja2
|
|
|
13
14
|
import aiohttp
|
|
14
15
|
import backoff
|
|
15
16
|
|
|
17
|
+
from logdetective.extractors import Extractor
|
|
16
18
|
from logdetective.server.config import SERVER_CONFIG, LOG
|
|
17
19
|
from logdetective.server.exceptions import (
|
|
18
20
|
LogsTooLargeError,
|
|
@@ -41,15 +43,20 @@ FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
|
|
|
41
43
|
|
|
42
44
|
async def process_gitlab_job_event(
|
|
43
45
|
gitlab_cfg: GitLabInstanceConfig,
|
|
46
|
+
gitlab_connection: gitlab.Gitlab,
|
|
47
|
+
http_session: aiohttp.ClientSession,
|
|
44
48
|
forge: Forge,
|
|
45
49
|
job_hook: JobHook,
|
|
46
|
-
|
|
50
|
+
async_request_limiter: AsyncLimiter,
|
|
51
|
+
extractors: list[Extractor],
|
|
52
|
+
): # pylint: disable=too-many-locals disable=too-many-arguments disable=too-many-positional-arguments
|
|
47
53
|
"""Handle a received job_event webhook from GitLab"""
|
|
48
54
|
LOG.debug("Received webhook message from %s:\n%s", forge.value, job_hook)
|
|
49
55
|
|
|
50
56
|
# Look up the project this job belongs to
|
|
51
|
-
|
|
52
|
-
|
|
57
|
+
project = await asyncio.to_thread(
|
|
58
|
+
gitlab_connection.projects.get, job_hook.project_id
|
|
59
|
+
)
|
|
53
60
|
LOG.info("Processing failed job for %s", project.name)
|
|
54
61
|
|
|
55
62
|
# Retrieve data about the job from the GitLab API
|
|
@@ -94,7 +101,7 @@ async def process_gitlab_job_event(
|
|
|
94
101
|
# Retrieve the build logs from the merge request artifacts and preprocess them
|
|
95
102
|
try:
|
|
96
103
|
log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(
|
|
97
|
-
gitlab_cfg, job
|
|
104
|
+
gitlab_cfg, job, http_session
|
|
98
105
|
)
|
|
99
106
|
except (LogsTooLargeError, LogDetectiveConnectionError) as ex:
|
|
100
107
|
LOG.error("Could not retrieve logs due to %s", ex)
|
|
@@ -105,10 +112,14 @@ async def process_gitlab_job_event(
|
|
|
105
112
|
metrics_id = await add_new_metrics(
|
|
106
113
|
api_name=EndpointType.ANALYZE_GITLAB_JOB,
|
|
107
114
|
url=log_url,
|
|
108
|
-
http_session=
|
|
115
|
+
http_session=http_session,
|
|
109
116
|
compressed_log_content=RemoteLogCompressor.zip_text(log_text),
|
|
110
117
|
)
|
|
111
|
-
staged_response = await perform_staged_analysis(
|
|
118
|
+
staged_response = await perform_staged_analysis(
|
|
119
|
+
log_text=log_text,
|
|
120
|
+
async_request_limiter=async_request_limiter,
|
|
121
|
+
extractors=extractors,
|
|
122
|
+
)
|
|
112
123
|
await update_metrics(metrics_id, staged_response)
|
|
113
124
|
preprocessed_log.close()
|
|
114
125
|
|
|
@@ -162,6 +173,7 @@ def is_eligible_package(project_name: str):
|
|
|
162
173
|
async def retrieve_and_preprocess_koji_logs(
|
|
163
174
|
gitlab_cfg: GitLabInstanceConfig,
|
|
164
175
|
job: gitlab.v4.objects.ProjectJob,
|
|
176
|
+
http_session: aiohttp.ClientSession,
|
|
165
177
|
): # pylint: disable=too-many-branches,too-many-locals
|
|
166
178
|
"""Download logs from the merge request artifacts
|
|
167
179
|
|
|
@@ -173,7 +185,7 @@ async def retrieve_and_preprocess_koji_logs(
|
|
|
173
185
|
Detective. The calling function is responsible for closing this object."""
|
|
174
186
|
|
|
175
187
|
# Make sure the file isn't too large to process.
|
|
176
|
-
if not await check_artifacts_file_size(gitlab_cfg, job):
|
|
188
|
+
if not await check_artifacts_file_size(gitlab_cfg, job, http_session):
|
|
177
189
|
raise LogsTooLargeError(
|
|
178
190
|
f"Oversized logs for job {job.id} in project {job.project_id}"
|
|
179
191
|
)
|
|
@@ -274,6 +286,7 @@ async def retrieve_and_preprocess_koji_logs(
|
|
|
274
286
|
async def check_artifacts_file_size(
|
|
275
287
|
gitlab_cfg: GitLabInstanceConfig,
|
|
276
288
|
job: gitlab.v4.objects.ProjectJob,
|
|
289
|
+
http_session: aiohttp.ClientSession,
|
|
277
290
|
):
|
|
278
291
|
"""Method to determine if the artifacts are too large to process"""
|
|
279
292
|
# First, make sure that the artifacts are of a reasonable size. The
|
|
@@ -285,7 +298,7 @@ async def check_artifacts_file_size(
|
|
|
285
298
|
)
|
|
286
299
|
LOG.debug("checking artifact URL %s%s", gitlab_cfg.url, artifacts_path)
|
|
287
300
|
try:
|
|
288
|
-
head_response = await
|
|
301
|
+
head_response = await http_session.head(
|
|
289
302
|
artifacts_path,
|
|
290
303
|
allow_redirects=True,
|
|
291
304
|
raise_for_status=True,
|
logdetective/server/llm.py
CHANGED
|
@@ -9,9 +9,11 @@ from fastapi import HTTPException
|
|
|
9
9
|
from pydantic import ValidationError
|
|
10
10
|
|
|
11
11
|
import aiohttp
|
|
12
|
+
from aiolimiter import AsyncLimiter
|
|
12
13
|
from openai import AsyncStream
|
|
13
14
|
from openai.types.chat import ChatCompletionChunk
|
|
14
15
|
|
|
16
|
+
from logdetective.extractors import Extractor
|
|
15
17
|
from logdetective.utils import (
|
|
16
18
|
compute_certainty,
|
|
17
19
|
prompt_to_messages,
|
|
@@ -41,7 +43,6 @@ from logdetective.server.utils import (
|
|
|
41
43
|
construct_final_prompt,
|
|
42
44
|
)
|
|
43
45
|
|
|
44
|
-
|
|
45
46
|
LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
|
|
46
47
|
|
|
47
48
|
|
|
@@ -57,6 +58,7 @@ LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
|
|
|
57
58
|
async def call_llm(
|
|
58
59
|
messages: List[Dict[str, str]],
|
|
59
60
|
inference_cfg: InferenceConfig,
|
|
61
|
+
async_request_limiter: AsyncLimiter,
|
|
60
62
|
stream: bool = False,
|
|
61
63
|
structured_output: dict | None = None,
|
|
62
64
|
) -> Explanation:
|
|
@@ -87,7 +89,7 @@ async def call_llm(
|
|
|
87
89
|
}
|
|
88
90
|
kwargs["response_format"] = response_format
|
|
89
91
|
|
|
90
|
-
async with
|
|
92
|
+
async with async_request_limiter:
|
|
91
93
|
response = await CLIENT.chat.completions.create(
|
|
92
94
|
messages=messages,
|
|
93
95
|
max_tokens=inference_cfg.max_tokens,
|
|
@@ -126,6 +128,7 @@ async def call_llm(
|
|
|
126
128
|
async def call_llm_stream(
|
|
127
129
|
messages: List[Dict[str, str]],
|
|
128
130
|
inference_cfg: InferenceConfig,
|
|
131
|
+
async_request_limiter: AsyncLimiter,
|
|
129
132
|
stream: bool = False,
|
|
130
133
|
) -> AsyncStream[ChatCompletionChunk]:
|
|
131
134
|
"""Submit prompt to LLM and recieve stream of tokens as a result.
|
|
@@ -136,7 +139,7 @@ async def call_llm_stream(
|
|
|
136
139
|
|
|
137
140
|
LOG.info("Submitting to /v1/chat/completions endpoint")
|
|
138
141
|
|
|
139
|
-
async with
|
|
142
|
+
async with async_request_limiter:
|
|
140
143
|
response = await CLIENT.chat.completions.create(
|
|
141
144
|
messages=messages,
|
|
142
145
|
max_tokens=inference_cfg.max_tokens,
|
|
@@ -150,7 +153,9 @@ async def call_llm_stream(
|
|
|
150
153
|
|
|
151
154
|
|
|
152
155
|
async def analyze_snippets(
|
|
153
|
-
log_summary: List[Tuple[int, str]],
|
|
156
|
+
log_summary: List[Tuple[int, str]],
|
|
157
|
+
async_request_limiter: AsyncLimiter,
|
|
158
|
+
structured_output: dict | None = None,
|
|
154
159
|
) -> List[SnippetAnalysis | RatedSnippetAnalysis]:
|
|
155
160
|
"""Submit log file snippets to the LLM and gather results"""
|
|
156
161
|
# Process snippets asynchronously
|
|
@@ -162,6 +167,7 @@ async def analyze_snippets(
|
|
|
162
167
|
SERVER_CONFIG.inference.system_role,
|
|
163
168
|
SERVER_CONFIG.inference.user_role,
|
|
164
169
|
),
|
|
170
|
+
async_request_limiter=async_request_limiter,
|
|
165
171
|
inference_cfg=SERVER_CONFIG.snippet_inference,
|
|
166
172
|
structured_output=structured_output,
|
|
167
173
|
)
|
|
@@ -184,9 +190,13 @@ async def analyze_snippets(
|
|
|
184
190
|
return analyzed_snippets
|
|
185
191
|
|
|
186
192
|
|
|
187
|
-
async def
|
|
193
|
+
async def perform_analysis(
|
|
194
|
+
log_text: str,
|
|
195
|
+
async_request_limiter: AsyncLimiter,
|
|
196
|
+
extractors: List[Extractor],
|
|
197
|
+
) -> Response:
|
|
188
198
|
"""Sumbit log file snippets in aggregate to LLM and retrieve results"""
|
|
189
|
-
log_summary = mine_logs(log_text,
|
|
199
|
+
log_summary = mine_logs(log_text, extractors)
|
|
190
200
|
log_summary = format_snippets(log_summary)
|
|
191
201
|
|
|
192
202
|
final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
|
|
@@ -199,6 +209,7 @@ async def perfrom_analysis(log_text: str) -> Response:
|
|
|
199
209
|
)
|
|
200
210
|
response = await call_llm(
|
|
201
211
|
messages,
|
|
212
|
+
async_request_limiter=async_request_limiter,
|
|
202
213
|
inference_cfg=SERVER_CONFIG.inference,
|
|
203
214
|
)
|
|
204
215
|
certainty = 0
|
|
@@ -216,9 +227,13 @@ async def perfrom_analysis(log_text: str) -> Response:
|
|
|
216
227
|
return Response(explanation=response, response_certainty=certainty)
|
|
217
228
|
|
|
218
229
|
|
|
219
|
-
async def
|
|
230
|
+
async def perform_analysis_stream(
|
|
231
|
+
log_text: str,
|
|
232
|
+
async_request_limiter: AsyncLimiter,
|
|
233
|
+
extractors: List[Extractor],
|
|
234
|
+
) -> AsyncStream:
|
|
220
235
|
"""Submit log file snippets in aggregate and return a stream of tokens"""
|
|
221
|
-
log_summary = mine_logs(log_text,
|
|
236
|
+
log_summary = mine_logs(log_text, extractors)
|
|
222
237
|
log_summary = format_snippets(log_summary)
|
|
223
238
|
|
|
224
239
|
final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
|
|
@@ -232,6 +247,7 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
|
|
|
232
247
|
|
|
233
248
|
stream = call_llm_stream(
|
|
234
249
|
messages,
|
|
250
|
+
async_request_limiter=async_request_limiter,
|
|
235
251
|
inference_cfg=SERVER_CONFIG.inference,
|
|
236
252
|
)
|
|
237
253
|
|
|
@@ -241,13 +257,18 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
|
|
|
241
257
|
return stream
|
|
242
258
|
|
|
243
259
|
|
|
244
|
-
async def perform_staged_analysis(
|
|
260
|
+
async def perform_staged_analysis(
|
|
261
|
+
log_text: str,
|
|
262
|
+
async_request_limiter: AsyncLimiter,
|
|
263
|
+
extractors: List[Extractor],
|
|
264
|
+
) -> StagedResponse:
|
|
245
265
|
"""Submit the log file snippets to the LLM and retrieve their results"""
|
|
246
|
-
log_summary = mine_logs(log_text,
|
|
266
|
+
log_summary = mine_logs(log_text, extractors)
|
|
247
267
|
start = time.time()
|
|
248
268
|
if SERVER_CONFIG.general.top_k_snippets:
|
|
249
269
|
rated_snippets = await analyze_snippets(
|
|
250
270
|
log_summary=log_summary,
|
|
271
|
+
async_request_limiter=async_request_limiter,
|
|
251
272
|
structured_output=RatedSnippetAnalysis.model_json_schema(),
|
|
252
273
|
)
|
|
253
274
|
|
|
@@ -266,7 +287,9 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
|
|
|
266
287
|
len(rated_snippets),
|
|
267
288
|
)
|
|
268
289
|
else:
|
|
269
|
-
processed_snippets = await analyze_snippets(
|
|
290
|
+
processed_snippets = await analyze_snippets(
|
|
291
|
+
log_summary=log_summary, async_request_limiter=async_request_limiter
|
|
292
|
+
)
|
|
270
293
|
|
|
271
294
|
# Extract original text and line number from `log_summary`
|
|
272
295
|
processed_snippets = [
|
|
@@ -276,7 +299,9 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
|
|
|
276
299
|
delta = time.time() - start
|
|
277
300
|
LOG.info("Snippet analysis performed in %f s", delta)
|
|
278
301
|
log_summary = format_analyzed_snippets(processed_snippets)
|
|
279
|
-
final_prompt = construct_final_prompt(
|
|
302
|
+
final_prompt = construct_final_prompt(
|
|
303
|
+
log_summary, PROMPT_CONFIG.prompt_template_staged
|
|
304
|
+
)
|
|
280
305
|
|
|
281
306
|
messages = prompt_to_messages(
|
|
282
307
|
final_prompt,
|
|
@@ -286,6 +311,7 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
|
|
|
286
311
|
)
|
|
287
312
|
final_analysis = await call_llm(
|
|
288
313
|
messages,
|
|
314
|
+
async_request_limiter=async_request_limiter,
|
|
289
315
|
inference_cfg=SERVER_CONFIG.inference,
|
|
290
316
|
)
|
|
291
317
|
|