logdetective 0.9.1__py3-none-any.whl → 0.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,413 @@
1
+ import re
2
+ import asyncio
3
+ import zipfile
4
+ from pathlib import Path, PurePath
5
+ from tempfile import TemporaryFile
6
+
7
+ from fastapi import HTTPException
8
+
9
+ import gitlab
10
+ import gitlab.v4
11
+ import gitlab.v4.objects
12
+ import jinja2
13
+ import aiohttp
14
+ import sqlalchemy
15
+
16
+ from logdetective.server.config import SERVER_CONFIG, LOG
17
+ from logdetective.server.llm import perform_staged_analysis
18
+ from logdetective.server.metric import add_new_metrics, update_metrics
19
+ from logdetective.server.models import (
20
+ GitLabInstanceConfig,
21
+ JobHook,
22
+ StagedResponse,
23
+ )
24
+ from logdetective.server.database.models import (
25
+ Comments,
26
+ EndpointType,
27
+ Forge,
28
+ AnalyzeRequestMetrics,
29
+ )
30
+ from logdetective.server.compressors import RemoteLogCompressor
31
+
32
+ MR_REGEX = re.compile(r"refs/merge-requests/(\d+)/.*$")
33
+ FAILURE_LOG_REGEX = re.compile(r"(\w*\.log)")
34
+
35
+
36
+ async def process_gitlab_job_event(
37
+ http: aiohttp.ClientSession,
38
+ gitlab_cfg: GitLabInstanceConfig,
39
+ forge: Forge,
40
+ job_hook: JobHook,
41
+ ): # pylint: disable=too-many-locals
42
+ """Handle a received job_event webhook from GitLab"""
43
+ LOG.debug("Received webhook message from %s:\n%s", forge.value, job_hook)
44
+
45
+ # Look up the project this job belongs to
46
+ gitlab_conn = gitlab_cfg.get_connection()
47
+ project = await asyncio.to_thread(gitlab_conn.projects.get, job_hook.project_id)
48
+ LOG.info("Processing failed job for %s", project.name)
49
+
50
+ # Retrieve data about the job from the GitLab API
51
+ job = await asyncio.to_thread(project.jobs.get, job_hook.build_id)
52
+
53
+ # For easy retrieval later, we'll add project_name and project_url to the
54
+ # job object
55
+ job.project_name = project.name
56
+ job.project_url = project.web_url
57
+
58
+ # Retrieve the pipeline that started this job
59
+ pipeline = await asyncio.to_thread(project.pipelines.get, job_hook.pipeline_id)
60
+
61
+ # Verify this is a merge request
62
+ if pipeline.source != "merge_request_event":
63
+ LOG.info("Not a merge request pipeline. Ignoring.")
64
+ return
65
+
66
+ # Extract the merge-request ID from the job
67
+ match = MR_REGEX.search(pipeline.ref)
68
+ if not match:
69
+ LOG.error(
70
+ "Pipeline source is merge_request_event but no merge request ID was provided."
71
+ )
72
+ return
73
+ merge_request_iid = int(match.group(1))
74
+
75
+ LOG.debug("Retrieving log artifacts")
76
+ # Retrieve the build logs from the merge request artifacts and preprocess them
77
+ try:
78
+ log_url, preprocessed_log = await retrieve_and_preprocess_koji_logs(gitlab_cfg, http, job)
79
+ except LogsTooLargeError:
80
+ LOG.error("Could not retrieve logs. Too large.")
81
+ raise
82
+
83
+ # Submit log to Log Detective and await the results.
84
+ log_text = preprocessed_log.read().decode(encoding="utf-8")
85
+ metrics_id = await add_new_metrics(
86
+ api_name=EndpointType.ANALYZE_GITLAB_JOB,
87
+ url=log_url,
88
+ http_session=http,
89
+ compressed_log_content=RemoteLogCompressor.zip_text(log_text),
90
+ )
91
+ staged_response = await perform_staged_analysis(http, log_text=log_text)
92
+ update_metrics(metrics_id, staged_response)
93
+ preprocessed_log.close()
94
+
95
+ # check if this project is on the opt-in list for posting comments.
96
+ if project.name not in SERVER_CONFIG.general.packages:
97
+ LOG.info("Not publishing comment for unrecognized package %s", project.name)
98
+ return
99
+
100
+ # Add the Log Detective response as a comment to the merge request
101
+ await comment_on_mr(
102
+ forge,
103
+ project,
104
+ merge_request_iid,
105
+ job,
106
+ log_url,
107
+ staged_response,
108
+ metrics_id,
109
+ )
110
+
111
+ return staged_response
112
+
113
+
114
+ class LogsTooLargeError(RuntimeError):
115
+ """The log archive exceeds the configured maximum size"""
116
+
117
+
118
+ async def retrieve_and_preprocess_koji_logs(
119
+ gitlab_cfg: GitLabInstanceConfig,
120
+ http: aiohttp.ClientSession,
121
+ job: gitlab.v4.objects.ProjectJob
122
+ ): # pylint: disable=too-many-branches,too-many-locals
123
+ """Download logs from the merge request artifacts
124
+
125
+ This function will retrieve the build logs and do some minimal
126
+ preprocessing to determine which log is relevant for analysis.
127
+
128
+ returns: The URL pointing to the selected log file and an open, file-like
129
+ object containing the log contents to be sent for processing by Log
130
+ Detective. The calling function is responsible for closing this object."""
131
+
132
+ # Make sure the file isn't too large to process.
133
+ if not await check_artifacts_file_size(gitlab_cfg, http, job):
134
+ raise LogsTooLargeError(
135
+ f"Oversized logs for job {job.id} in project {job.project_id}"
136
+ )
137
+
138
+ # Create a temporary file to store the downloaded log zipfile.
139
+ # This will be automatically deleted when the last reference into it
140
+ # (returned by this function) is closed.
141
+ tempfile = TemporaryFile(mode="w+b")
142
+ await asyncio.to_thread(job.artifacts, streamed=True, action=tempfile.write)
143
+ tempfile.seek(0)
144
+
145
+ failed_arches = {}
146
+ artifacts_zip = zipfile.ZipFile(tempfile, mode="r") # pylint: disable=consider-using-with
147
+ for zipinfo in artifacts_zip.infolist():
148
+ if zipinfo.filename.endswith("task_failed.log"):
149
+ # The koji logs store this file in two places: 1) in the
150
+ # directory with the failed architecture and 2) in the parent
151
+ # directory. Most of the time, we want to ignore the one in the
152
+ # parent directory, since the rest of the information is in the
153
+ # specific task directory. However, there are some situations
154
+ # where non-build failures (such as "Target build already exists")
155
+ # may be presented only at the top level.
156
+ # The paths look like `kojilogs/noarch-XXXXXX/task_failed.log`
157
+ # or `kojilogs/noarch-XXXXXX/x86_64-XXXXXX/task_failed.log`
158
+ path = PurePath(zipinfo.filename)
159
+ if len(path.parts) <= 3:
160
+ failed_arches["toplevel"] = path
161
+ continue
162
+
163
+ # Extract the architecture from the immediate parent path
164
+ architecture = path.parent.parts[-1].split("-")[0]
165
+
166
+ # Open this file and read which log failed.
167
+ # The string in this log has the format
168
+ # `see <log> for more information`.
169
+ # Note: it may sometimes say
170
+ # `see build.log or root.log for more information`, but in
171
+ # that situation, we only want to handle build.log (for now),
172
+ # which means accepting only the first match for the regular
173
+ # expression.
174
+ with artifacts_zip.open(zipinfo.filename) as task_failed_log:
175
+ contents = task_failed_log.read().decode("utf-8")
176
+ match = FAILURE_LOG_REGEX.search(contents)
177
+ if not match:
178
+ LOG.error(
179
+ "task_failed.log does not indicate which log contains the failure."
180
+ )
181
+ raise SyntaxError(
182
+ "task_failed.log does not indicate which log contains the failure."
183
+ )
184
+ failure_log_name = match.group(1)
185
+
186
+ failed_arches[architecture] = PurePath(path.parent, failure_log_name)
187
+
188
+ if not failed_arches:
189
+ # No failed task found in the sub-tasks.
190
+ raise FileNotFoundError("Could not detect failed architecture.")
191
+
192
+ # We only want to handle one arch, so we'll check them in order of
193
+ # "most to least likely for the maintainer to have access to hardware"
194
+ # This means: x86_64 > aarch64 > riscv > ppc64le > s390x
195
+ if "x86_64" in failed_arches:
196
+ failed_arch = "x86_64"
197
+ elif "aarch64" in failed_arches:
198
+ failed_arch = "aarch64"
199
+ elif "riscv" in failed_arches:
200
+ failed_arch = "riscv"
201
+ elif "ppc64le" in failed_arches:
202
+ failed_arch = "ppc64le"
203
+ elif "s390x" in failed_arches:
204
+ failed_arch = "s390x"
205
+ elif "noarch" in failed_arches:
206
+ # May have failed during BuildSRPMFromSCM phase
207
+ failed_arch = "noarch"
208
+ elif "toplevel" in failed_arches:
209
+ # Probably a Koji-specific error, not a build error
210
+ failed_arch = "toplevel"
211
+ else:
212
+ # We have one or more architectures that we don't know about? Just
213
+ # pick the first alphabetically.
214
+ failed_arch = sorted(list(failed_arches.keys()))[0]
215
+
216
+ LOG.debug("Failed architecture: %s", failed_arch)
217
+
218
+ log_path = failed_arches[failed_arch].as_posix()
219
+
220
+ log_url = f"{gitlab_cfg.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts/{log_path}" # pylint: disable=line-too-long
221
+ LOG.debug("Returning contents of %s", log_url)
222
+
223
+ # Return the log as a file-like object with .read() function
224
+ return log_url, artifacts_zip.open(log_path)
225
+
226
+
227
+ async def check_artifacts_file_size(
228
+ gitlab_cfg: GitLabInstanceConfig,
229
+ http: aiohttp.ClientSession,
230
+ job: gitlab.v4.objects.ProjectJob,
231
+ ):
232
+ """Method to determine if the artifacts are too large to process"""
233
+ # First, make sure that the artifacts are of a reasonable size. The
234
+ # zipped artifact collection will be stored in memory below. The
235
+ # python-gitlab library doesn't expose a way to check this value directly,
236
+ # so we need to interact with directly with the headers.
237
+ artifacts_url = f"{gitlab_cfg.api_url}/projects/{job.project_id}/jobs/{job.id}/artifacts" # pylint: disable=line-too-long
238
+ LOG.debug("checking artifact URL %s", artifacts_url)
239
+ try:
240
+ head_response = await http.head(
241
+ artifacts_url,
242
+ allow_redirects=True,
243
+ headers={"Authorization": f"Bearer {gitlab_cfg.api_token}"},
244
+ timeout=5,
245
+ raise_for_status=True,
246
+ )
247
+ except aiohttp.ClientResponseError as ex:
248
+ raise HTTPException(
249
+ status_code=400,
250
+ detail=f"Unable to check artifact URL: [{ex.status}] {ex.message}",
251
+ ) from ex
252
+ content_length = int(head_response.headers.get("content-length"))
253
+ LOG.debug(
254
+ "URL: %s, content-length: %d, max length: %d",
255
+ artifacts_url,
256
+ content_length,
257
+ gitlab_cfg.max_artifact_size,
258
+ )
259
+ return content_length <= gitlab_cfg.max_artifact_size
260
+
261
+
262
+ async def comment_on_mr( # pylint: disable=too-many-arguments disable=too-many-positional-arguments
263
+ forge: Forge,
264
+ project: gitlab.v4.objects.Project,
265
+ merge_request_iid: int,
266
+ job: gitlab.v4.objects.ProjectJob,
267
+ log_url: str,
268
+ response: StagedResponse,
269
+ metrics_id: int,
270
+ ):
271
+ """Add the Log Detective response as a comment to the merge request"""
272
+ LOG.debug(
273
+ "Primary Explanation for %s MR %d: %s",
274
+ project.name,
275
+ merge_request_iid,
276
+ response.explanation.text,
277
+ )
278
+
279
+ # First, we'll see if there's an existing comment on this Merge Request
280
+ # and wrap it in <details></details> to reduce noise.
281
+ await suppress_latest_comment(forge, project, merge_request_iid)
282
+
283
+ # Get the formatted short comment.
284
+ short_comment = await generate_mr_comment(job, log_url, response, full=False)
285
+
286
+ # Look up the merge request
287
+ merge_request = await asyncio.to_thread(
288
+ project.mergerequests.get, merge_request_iid
289
+ )
290
+
291
+ # Submit a new comment to the Merge Request using the Gitlab API
292
+ discussion = await asyncio.to_thread(
293
+ merge_request.discussions.create, {"body": short_comment}
294
+ )
295
+
296
+ # Get the ID of the first note
297
+ note_id = discussion.attributes["notes"][0]["id"]
298
+ note = discussion.notes.get(note_id)
299
+
300
+ # Update the comment with the full details
301
+ # We do this in a second step so we don't bombard the user's email
302
+ # notifications with a massive message. Gitlab doesn't send email for
303
+ # comment edits.
304
+ full_comment = await generate_mr_comment(job, log_url, response, full=True)
305
+ note.body = full_comment
306
+
307
+ # Pause for five seconds before sending the snippet data, otherwise
308
+ # Gitlab may bundle the edited message together with the creation
309
+ # message in email.
310
+ await asyncio.sleep(5)
311
+ await asyncio.to_thread(note.save)
312
+
313
+ # Save the new comment to the database
314
+ try:
315
+ metrics = AnalyzeRequestMetrics.get_metric_by_id(metrics_id)
316
+ Comments.create(
317
+ forge,
318
+ project.id,
319
+ merge_request_iid,
320
+ job.id,
321
+ discussion.id,
322
+ metrics,
323
+ )
324
+ except sqlalchemy.exc.IntegrityError:
325
+ # We most likely attempted to save a new comment for the same
326
+ # build job. This is somewhat common during development when we're
327
+ # submitting requests manually. It shouldn't really happen in
328
+ # production.
329
+ if not SERVER_CONFIG.general.devmode:
330
+ raise
331
+
332
+
333
+ async def suppress_latest_comment(
334
+ gitlab_instance: str,
335
+ project: gitlab.v4.objects.Project,
336
+ merge_request_iid: int,
337
+ ) -> None:
338
+ """Look up the latest comment on this Merge Request, if any, and wrap it
339
+ in a <details></details> block with a comment indicating that it has been
340
+ superseded by a new push."""
341
+
342
+ # Ask the database for the last known comment for this MR
343
+ previous_comment = Comments.get_latest_comment(
344
+ gitlab_instance, project.id, merge_request_iid
345
+ )
346
+
347
+ if previous_comment is None:
348
+ # No existing comment, so nothing to do.
349
+ return
350
+
351
+ # Retrieve its content from the Gitlab API
352
+
353
+ # Look up the merge request
354
+ merge_request = await asyncio.to_thread(
355
+ project.mergerequests.get, merge_request_iid
356
+ )
357
+
358
+ # Find the discussion matching the latest comment ID
359
+ discussion = await asyncio.to_thread(
360
+ merge_request.discussions.get, previous_comment.comment_id
361
+ )
362
+
363
+ # Get the ID of the first note
364
+ note_id = discussion.attributes["notes"][0]["id"]
365
+ note = discussion.notes.get(note_id)
366
+
367
+ # Wrap the note in <details>, indicating why.
368
+ note.body = (
369
+ "This comment has been superseded by a newer "
370
+ f"Log Detective analysis.\n<details>\n{note.body}\n</details>"
371
+ )
372
+ await asyncio.to_thread(note.save)
373
+
374
+
375
+ async def generate_mr_comment(
376
+ job: gitlab.v4.objects.ProjectJob,
377
+ log_url: str,
378
+ response: StagedResponse,
379
+ full: bool = True,
380
+ ) -> str:
381
+ """Use a template to generate a comment string to submit to Gitlab"""
382
+
383
+ # Locate and load the comment template
384
+ script_path = Path(__file__).resolve().parent
385
+ template_path = Path(script_path, "templates")
386
+ jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_path))
387
+
388
+ if full:
389
+ tpl = jinja_env.get_template("gitlab_full_comment.md.j2")
390
+ else:
391
+ tpl = jinja_env.get_template("gitlab_short_comment.md.j2")
392
+
393
+ artifacts_url = f"{job.project_url}/-/jobs/{job.id}/artifacts/download"
394
+
395
+ if response.response_certainty >= 90:
396
+ emoji_face = ":slight_smile:"
397
+ elif response.response_certainty >= 70:
398
+ emoji_face = ":neutral_face:"
399
+ else:
400
+ emoji_face = ":frowning2:"
401
+
402
+ # Generate the comment from the template
403
+ content = tpl.render(
404
+ package=job.project_name,
405
+ explanation=response.explanation.text,
406
+ certainty=f"{response.response_certainty:.2f}",
407
+ emoji_face=emoji_face,
408
+ snippets=response.snippets,
409
+ log_url=log_url,
410
+ artifacts_url=artifacts_url,
411
+ )
412
+
413
+ return content
@@ -0,0 +1,284 @@
1
+ import os
2
+ import asyncio
3
+ import json
4
+ from typing import List, Tuple, Dict, Any, Union
5
+
6
+ import backoff
7
+ from aiohttp import StreamReader
8
+ from fastapi import HTTPException
9
+
10
+ import aiohttp
11
+
12
+ from logdetective.constants import SNIPPET_DELIMITER
13
+ from logdetective.extractors import DrainExtractor
14
+ from logdetective.utils import (
15
+ compute_certainty,
16
+ )
17
+ from logdetective.server.config import LOG, SERVER_CONFIG, PROMPT_CONFIG
18
+ from logdetective.server.models import (
19
+ StagedResponse,
20
+ Explanation,
21
+ AnalyzedSnippet,
22
+ )
23
+
24
+
25
+ LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
26
+
27
+
28
+ def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
29
+ """Format snippets for submission into staged prompt."""
30
+ summary = f"\n{SNIPPET_DELIMITER}\n".join(
31
+ [
32
+ f"[{e.text}] at line [{e.line_number}]: [{e.explanation.text}]"
33
+ for e in snippets
34
+ ]
35
+ )
36
+ return summary
37
+
38
+
39
+ def mine_logs(log: str) -> List[Tuple[int, str]]:
40
+ """Extract snippets from log text"""
41
+ extractor = DrainExtractor(
42
+ verbose=True, context=True, max_clusters=SERVER_CONFIG.extractor.max_clusters
43
+ )
44
+
45
+ LOG.info("Getting summary")
46
+ log_summary = extractor(log)
47
+
48
+ ratio = len(log_summary) / len(log.split("\n"))
49
+ LOG.debug("Log summary: \n %s", log_summary)
50
+ LOG.info("Compression ratio: %s", ratio)
51
+
52
+ return log_summary
53
+
54
+
55
+ async def submit_to_llm_endpoint(
56
+ http: aiohttp.ClientSession,
57
+ url: str,
58
+ data: Dict[str, Any],
59
+ headers: Dict[str, str],
60
+ stream: bool,
61
+ ) -> Any:
62
+ """Send request to selected API endpoint. Verifying successful request unless
63
+ the using the stream response.
64
+
65
+ url:
66
+ data:
67
+ headers:
68
+ stream:
69
+ """
70
+ async with SERVER_CONFIG.inference.get_limiter():
71
+ LOG.debug("async request %s headers=%s data=%s", url, headers, data)
72
+ response = await http.post(
73
+ url,
74
+ headers=headers,
75
+ # we need to use the `json=` parameter here and let aiohttp
76
+ # handle the json-encoding
77
+ json=data,
78
+ timeout=int(LLM_CPP_SERVER_TIMEOUT),
79
+ # Docs says chunked takes int, but:
80
+ # DeprecationWarning: Chunk size is deprecated #1615
81
+ # So let's make sure we either put True or None here
82
+ chunked=True if stream else None,
83
+ raise_for_status=True,
84
+ )
85
+ if stream:
86
+ return response
87
+ try:
88
+ return json.loads(await response.text())
89
+ except UnicodeDecodeError as ex:
90
+ LOG.error("Error encountered while parsing llama server response: %s", ex)
91
+ raise HTTPException(
92
+ status_code=400,
93
+ detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
94
+ ) from ex
95
+
96
+
97
+ def should_we_giveup(exc: aiohttp.ClientResponseError) -> bool:
98
+ """
99
+ From backoff's docs:
100
+
101
+ > a function which accepts the exception and returns
102
+ > a truthy value if the exception should not be retried
103
+ """
104
+ LOG.info("Should we give up on retrying error %s", exc)
105
+ return exc.status < 500
106
+
107
+
108
+ def we_give_up(details: backoff._typing.Details):
109
+ """
110
+ retries didn't work (or we got a different exc)
111
+ we give up and raise proper 500 for our API endpoint
112
+ """
113
+ LOG.error("Inference error: %s", details["args"])
114
+ raise HTTPException(500, "Request to the inference API failed")
115
+
116
+
117
+ @backoff.on_exception(
118
+ lambda: backoff.constant([10, 30, 120]),
119
+ aiohttp.ClientResponseError,
120
+ max_tries=3,
121
+ giveup=should_we_giveup,
122
+ raise_on_giveup=False,
123
+ on_giveup=we_give_up,
124
+ )
125
+ async def submit_text( # pylint: disable=R0913,R0917
126
+ http: aiohttp.ClientSession,
127
+ text: str,
128
+ max_tokens: int = -1,
129
+ log_probs: int = 1,
130
+ stream: bool = False,
131
+ model: str = "default-model",
132
+ ) -> Explanation:
133
+ """Submit prompt to LLM using a selected endpoint.
134
+ max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
135
+ log_probs: number of token choices to produce log probs for
136
+ """
137
+ LOG.info("Analyzing the text")
138
+
139
+ headers = {"Content-Type": "application/json"}
140
+
141
+ if SERVER_CONFIG.inference.api_token:
142
+ headers["Authorization"] = f"Bearer {SERVER_CONFIG.inference.api_token}"
143
+
144
+ if SERVER_CONFIG.inference.api_endpoint == "/chat/completions":
145
+ return await submit_text_chat_completions(
146
+ http, text, headers, max_tokens, log_probs > 0, stream, model
147
+ )
148
+ return await submit_text_completions(
149
+ http, text, headers, max_tokens, log_probs, stream, model
150
+ )
151
+
152
+
153
+ async def submit_text_completions( # pylint: disable=R0913,R0917
154
+ http: aiohttp.ClientSession,
155
+ text: str,
156
+ headers: dict,
157
+ max_tokens: int = -1,
158
+ log_probs: int = 1,
159
+ stream: bool = False,
160
+ model: str = "default-model",
161
+ ) -> Explanation:
162
+ """Submit prompt to OpenAI API completions endpoint.
163
+ max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
164
+ log_probs: number of token choices to produce log probs for
165
+ """
166
+ LOG.info("Submitting to /v1/completions endpoint")
167
+ data = {
168
+ "prompt": text,
169
+ "max_tokens": max_tokens,
170
+ "logprobs": log_probs,
171
+ "stream": stream,
172
+ "model": model,
173
+ "temperature": SERVER_CONFIG.inference.temperature,
174
+ }
175
+
176
+ response = await submit_to_llm_endpoint(
177
+ http,
178
+ f"{SERVER_CONFIG.inference.url}/v1/completions",
179
+ data,
180
+ headers,
181
+ stream,
182
+ )
183
+
184
+ return Explanation(
185
+ text=response["choices"][0]["text"], logprobs=response["choices"][0]["logprobs"]
186
+ )
187
+
188
+
189
+ async def submit_text_chat_completions( # pylint: disable=R0913,R0917
190
+ http: aiohttp.ClientSession,
191
+ text: str,
192
+ headers: dict,
193
+ max_tokens: int = -1,
194
+ log_probs: int = 1,
195
+ stream: bool = False,
196
+ model: str = "default-model",
197
+ ) -> Union[Explanation, StreamReader]:
198
+ """Submit prompt to OpenAI API /chat/completions endpoint.
199
+ max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
200
+ log_probs: number of token choices to produce log probs for
201
+ """
202
+ LOG.info("Submitting to /v1/chat/completions endpoint")
203
+
204
+ data = {
205
+ "messages": [
206
+ {
207
+ "role": "user",
208
+ "content": text,
209
+ }
210
+ ],
211
+ "max_tokens": max_tokens,
212
+ "logprobs": log_probs,
213
+ "stream": stream,
214
+ "model": model,
215
+ "temperature": SERVER_CONFIG.inference.temperature,
216
+ }
217
+
218
+ response = await submit_to_llm_endpoint(
219
+ http,
220
+ f"{SERVER_CONFIG.inference.url}/v1/chat/completions",
221
+ data,
222
+ headers,
223
+ stream,
224
+ )
225
+
226
+ if stream:
227
+ return response
228
+ return Explanation(
229
+ text=response["choices"][0]["message"]["content"],
230
+ logprobs=response["choices"][0]["logprobs"]["content"],
231
+ )
232
+
233
+
234
+ async def perform_staged_analysis(
235
+ http: aiohttp.ClientSession, log_text: str
236
+ ) -> StagedResponse:
237
+ """Submit the log file snippets to the LLM and retrieve their results"""
238
+ log_summary = mine_logs(log_text)
239
+
240
+ # Process snippets asynchronously
241
+ awaitables = [
242
+ submit_text(
243
+ http,
244
+ PROMPT_CONFIG.snippet_prompt_template.format(s),
245
+ model=SERVER_CONFIG.inference.model,
246
+ max_tokens=SERVER_CONFIG.inference.max_tokens,
247
+ )
248
+ for s in log_summary
249
+ ]
250
+ analyzed_snippets = await asyncio.gather(*awaitables)
251
+
252
+ analyzed_snippets = [
253
+ AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
254
+ for e in zip(log_summary, analyzed_snippets)
255
+ ]
256
+ final_prompt = PROMPT_CONFIG.prompt_template_staged.format(
257
+ format_analyzed_snippets(analyzed_snippets)
258
+ )
259
+
260
+ final_analysis = await submit_text(
261
+ http,
262
+ final_prompt,
263
+ model=SERVER_CONFIG.inference.model,
264
+ max_tokens=SERVER_CONFIG.inference.max_tokens,
265
+ )
266
+
267
+ certainty = 0
268
+
269
+ if final_analysis.logprobs:
270
+ try:
271
+ certainty = compute_certainty(final_analysis.logprobs)
272
+ except ValueError as ex:
273
+ LOG.error("Error encountered while computing certainty: %s", ex)
274
+ raise HTTPException(
275
+ status_code=400,
276
+ detail=f"Couldn't compute certainty with data:\n"
277
+ f"{final_analysis.logprobs}",
278
+ ) from ex
279
+
280
+ return StagedResponse(
281
+ explanation=final_analysis,
282
+ snippets=analyzed_snippets,
283
+ response_certainty=certainty,
284
+ )