logdetective 2.0.1__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/extractors.py +134 -23
- logdetective/logdetective.py +39 -23
- logdetective/models.py +26 -0
- logdetective/prompts-summary-first.yml +0 -2
- logdetective/prompts.yml +0 -3
- logdetective/server/compressors.py +7 -10
- logdetective/server/config.py +3 -2
- logdetective/server/database/base.py +31 -26
- logdetective/server/database/models/__init__.py +2 -2
- logdetective/server/database/models/exceptions.py +4 -0
- logdetective/server/database/models/koji.py +47 -30
- logdetective/server/database/models/merge_request_jobs.py +205 -186
- logdetective/server/database/models/metrics.py +87 -61
- logdetective/server/emoji.py +57 -55
- logdetective/server/exceptions.py +4 -0
- logdetective/server/gitlab.py +18 -11
- logdetective/server/llm.py +19 -10
- logdetective/server/metric.py +18 -13
- logdetective/server/models.py +65 -48
- logdetective/server/plot.py +13 -11
- logdetective/server/server.py +52 -30
- logdetective/server/templates/base_response.html.j2 +59 -0
- logdetective/server/templates/gitlab_full_comment.md.j2 +58 -53
- logdetective/server/templates/gitlab_short_comment.md.j2 +52 -47
- logdetective/server/utils.py +15 -27
- logdetective/utils.py +115 -49
- {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/METADATA +95 -21
- logdetective-2.11.0.dist-info/RECORD +40 -0
- {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
- logdetective-2.0.1.dist-info/RECORD +0 -39
- {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
- {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
logdetective/server/llm.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import asyncio
|
|
3
3
|
import random
|
|
4
|
+
import time
|
|
4
5
|
from typing import List, Tuple, Dict
|
|
5
6
|
|
|
6
7
|
import backoff
|
|
@@ -15,6 +16,7 @@ from logdetective.utils import (
|
|
|
15
16
|
compute_certainty,
|
|
16
17
|
prompt_to_messages,
|
|
17
18
|
format_snippets,
|
|
19
|
+
mine_logs,
|
|
18
20
|
)
|
|
19
21
|
from logdetective.server.config import (
|
|
20
22
|
LOG,
|
|
@@ -33,10 +35,10 @@ from logdetective.server.models import (
|
|
|
33
35
|
)
|
|
34
36
|
from logdetective.server.utils import (
|
|
35
37
|
format_analyzed_snippets,
|
|
36
|
-
mine_logs,
|
|
37
38
|
should_we_giveup,
|
|
38
39
|
we_give_up,
|
|
39
40
|
filter_snippets,
|
|
41
|
+
construct_final_prompt,
|
|
40
42
|
)
|
|
41
43
|
|
|
42
44
|
|
|
@@ -184,10 +186,13 @@ async def analyze_snippets(
|
|
|
184
186
|
|
|
185
187
|
async def perfrom_analysis(log_text: str) -> Response:
|
|
186
188
|
"""Sumbit log file snippets in aggregate to LLM and retrieve results"""
|
|
187
|
-
log_summary = mine_logs(log_text)
|
|
189
|
+
log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
|
|
188
190
|
log_summary = format_snippets(log_summary)
|
|
191
|
+
|
|
192
|
+
final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
|
|
193
|
+
|
|
189
194
|
messages = prompt_to_messages(
|
|
190
|
-
|
|
195
|
+
final_prompt,
|
|
191
196
|
PROMPT_CONFIG.default_system_prompt,
|
|
192
197
|
SERVER_CONFIG.inference.system_role,
|
|
193
198
|
SERVER_CONFIG.inference.user_role,
|
|
@@ -213,10 +218,13 @@ async def perfrom_analysis(log_text: str) -> Response:
|
|
|
213
218
|
|
|
214
219
|
async def perform_analyis_stream(log_text: str) -> AsyncStream:
|
|
215
220
|
"""Submit log file snippets in aggregate and return a stream of tokens"""
|
|
216
|
-
log_summary = mine_logs(log_text)
|
|
221
|
+
log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
|
|
217
222
|
log_summary = format_snippets(log_summary)
|
|
223
|
+
|
|
224
|
+
final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
|
|
225
|
+
|
|
218
226
|
messages = prompt_to_messages(
|
|
219
|
-
|
|
227
|
+
final_prompt,
|
|
220
228
|
PROMPT_CONFIG.default_system_prompt,
|
|
221
229
|
SERVER_CONFIG.inference.system_role,
|
|
222
230
|
SERVER_CONFIG.inference.user_role,
|
|
@@ -235,8 +243,8 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
|
|
|
235
243
|
|
|
236
244
|
async def perform_staged_analysis(log_text: str) -> StagedResponse:
|
|
237
245
|
"""Submit the log file snippets to the LLM and retrieve their results"""
|
|
238
|
-
log_summary = mine_logs(log_text)
|
|
239
|
-
|
|
246
|
+
log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
|
|
247
|
+
start = time.time()
|
|
240
248
|
if SERVER_CONFIG.general.top_k_snippets:
|
|
241
249
|
rated_snippets = await analyze_snippets(
|
|
242
250
|
log_summary=log_summary,
|
|
@@ -265,10 +273,11 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
|
|
|
265
273
|
AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
|
|
266
274
|
for e in zip(log_summary, processed_snippets)
|
|
267
275
|
]
|
|
276
|
+
delta = time.time() - start
|
|
277
|
+
LOG.info("Snippet analysis performed in %f s", delta)
|
|
278
|
+
log_summary = format_analyzed_snippets(processed_snippets)
|
|
279
|
+
final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template_staged)
|
|
268
280
|
|
|
269
|
-
final_prompt = PROMPT_CONFIG.prompt_template_staged.format(
|
|
270
|
-
format_analyzed_snippets(processed_snippets)
|
|
271
|
-
)
|
|
272
281
|
messages = prompt_to_messages(
|
|
273
282
|
final_prompt,
|
|
274
283
|
PROMPT_CONFIG.staged_system_prompt,
|
logdetective/server/metric.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import io
|
|
2
1
|
import inspect
|
|
3
2
|
import datetime
|
|
4
3
|
|
|
@@ -13,14 +12,15 @@ from logdetective.remote_log import RemoteLog
|
|
|
13
12
|
from logdetective.server.config import LOG
|
|
14
13
|
from logdetective.server.compressors import LLMResponseCompressor, RemoteLogCompressor
|
|
15
14
|
from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
|
|
15
|
+
from logdetective.server.exceptions import LogDetectiveMetricsError
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
async def add_new_metrics(
|
|
19
|
-
api_name:
|
|
19
|
+
api_name: EndpointType,
|
|
20
20
|
url: Optional[str] = None,
|
|
21
21
|
http_session: Optional[aiohttp.ClientSession] = None,
|
|
22
22
|
received_at: Optional[datetime.datetime] = None,
|
|
23
|
-
compressed_log_content: Optional[
|
|
23
|
+
compressed_log_content: Optional[bytes] = None,
|
|
24
24
|
) -> int:
|
|
25
25
|
"""Add a new database entry for a received request.
|
|
26
26
|
|
|
@@ -29,10 +29,14 @@ async def add_new_metrics(
|
|
|
29
29
|
and the log (in a zip format) for which analysis is requested.
|
|
30
30
|
"""
|
|
31
31
|
if not compressed_log_content:
|
|
32
|
+
if not (url and http_session):
|
|
33
|
+
raise LogDetectiveMetricsError(
|
|
34
|
+
f"""Remote log can not be retrieved without URL and http session.
|
|
35
|
+
URL: {url}, http session:{http_session}""")
|
|
32
36
|
remote_log = RemoteLog(url, http_session)
|
|
33
37
|
compressed_log_content = await RemoteLogCompressor(remote_log).zip_content()
|
|
34
38
|
|
|
35
|
-
return AnalyzeRequestMetrics.create(
|
|
39
|
+
return await AnalyzeRequestMetrics.create(
|
|
36
40
|
endpoint=EndpointType(api_name),
|
|
37
41
|
compressed_log=compressed_log_content,
|
|
38
42
|
request_received_at=received_at
|
|
@@ -41,7 +45,7 @@ async def add_new_metrics(
|
|
|
41
45
|
)
|
|
42
46
|
|
|
43
47
|
|
|
44
|
-
def update_metrics(
|
|
48
|
+
async def update_metrics(
|
|
45
49
|
metrics_id: int,
|
|
46
50
|
response: Union[models.Response, models.StagedResponse, StreamingResponse],
|
|
47
51
|
sent_at: Optional[datetime.datetime] = None,
|
|
@@ -73,12 +77,12 @@ def update_metrics(
|
|
|
73
77
|
response_certainty = (
|
|
74
78
|
response.response_certainty if hasattr(response, "response_certainty") else None
|
|
75
79
|
)
|
|
76
|
-
AnalyzeRequestMetrics.update(
|
|
77
|
-
metrics_id,
|
|
78
|
-
response_sent_at,
|
|
79
|
-
response_length,
|
|
80
|
-
response_certainty,
|
|
81
|
-
compressed_response,
|
|
80
|
+
await AnalyzeRequestMetrics.update(
|
|
81
|
+
id_=metrics_id,
|
|
82
|
+
response_sent_at=response_sent_at,
|
|
83
|
+
response_length=response_length,
|
|
84
|
+
response_certainty=response_certainty,
|
|
85
|
+
compressed_response=compressed_response,
|
|
82
86
|
)
|
|
83
87
|
|
|
84
88
|
|
|
@@ -109,10 +113,11 @@ def track_request(name=None):
|
|
|
109
113
|
async def async_decorated_function(*args, **kwargs):
|
|
110
114
|
log_url = kwargs["build_log"].url
|
|
111
115
|
metrics_id = await add_new_metrics(
|
|
112
|
-
name if name else f.__name__,
|
|
116
|
+
api_name=EndpointType(name if name else f.__name__),
|
|
117
|
+
url=log_url, http_session=kwargs["http_session"]
|
|
113
118
|
)
|
|
114
119
|
response = await f(*args, **kwargs)
|
|
115
|
-
update_metrics(metrics_id, response)
|
|
120
|
+
await update_metrics(metrics_id, response)
|
|
116
121
|
return response
|
|
117
122
|
|
|
118
123
|
if inspect.iscoroutinefunction(f):
|
logdetective/server/models.py
CHANGED
|
@@ -10,6 +10,7 @@ from pydantic import (
|
|
|
10
10
|
field_validator,
|
|
11
11
|
NonNegativeFloat,
|
|
12
12
|
HttpUrl,
|
|
13
|
+
PrivateAttr,
|
|
13
14
|
)
|
|
14
15
|
|
|
15
16
|
import aiohttp
|
|
@@ -26,6 +27,9 @@ from logdetective.constants import (
|
|
|
26
27
|
USER_ROLE_DEFAULT,
|
|
27
28
|
)
|
|
28
29
|
|
|
30
|
+
from logdetective.extractors import Extractor, DrainExtractor, CSGrepExtractor
|
|
31
|
+
from logdetective.utils import check_csgrep
|
|
32
|
+
|
|
29
33
|
|
|
30
34
|
class BuildLog(BaseModel):
|
|
31
35
|
"""Model of data submitted to API."""
|
|
@@ -179,8 +183,9 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
|
|
|
179
183
|
http_timeout: float = 5.0
|
|
180
184
|
user_role: str = USER_ROLE_DEFAULT
|
|
181
185
|
system_role: str = SYSTEM_ROLE_DEFAULT
|
|
182
|
-
|
|
183
|
-
_limiter: AsyncLimiter =
|
|
186
|
+
llm_api_timeout: float = 15.0
|
|
187
|
+
_limiter: AsyncLimiter = PrivateAttr(
|
|
188
|
+
default_factory=lambda: AsyncLimiter(LLM_DEFAULT_REQUESTS_PER_MINUTE))
|
|
184
189
|
|
|
185
190
|
def __init__(self, data: Optional[dict] = None):
|
|
186
191
|
super().__init__()
|
|
@@ -200,42 +205,9 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
|
|
|
200
205
|
self._requests_per_minute = data.get(
|
|
201
206
|
"requests_per_minute", LLM_DEFAULT_REQUESTS_PER_MINUTE
|
|
202
207
|
)
|
|
208
|
+
self.llm_api_timeout = data.get("llm_api_timeout", 15.0)
|
|
203
209
|
self._limiter = AsyncLimiter(self._requests_per_minute)
|
|
204
210
|
|
|
205
|
-
def __del__(self):
|
|
206
|
-
# Close connection when this object is destroyed
|
|
207
|
-
if self._http_session:
|
|
208
|
-
try:
|
|
209
|
-
loop = asyncio.get_running_loop()
|
|
210
|
-
loop.create_task(self._http_session.close())
|
|
211
|
-
except RuntimeError:
|
|
212
|
-
# No loop running, so create one to close the session
|
|
213
|
-
loop = asyncio.new_event_loop()
|
|
214
|
-
loop.run_until_complete(self._http_session.close())
|
|
215
|
-
loop.close()
|
|
216
|
-
except Exception: # pylint: disable=broad-exception-caught
|
|
217
|
-
# We should only get here if we're shutting down, so we don't
|
|
218
|
-
# really care if the close() completes cleanly.
|
|
219
|
-
pass
|
|
220
|
-
|
|
221
|
-
def get_http_session(self):
|
|
222
|
-
"""Return the internal HTTP session so it can be used to contect the
|
|
223
|
-
LLM server. May be used as a context manager."""
|
|
224
|
-
|
|
225
|
-
# Create the session on the first attempt. We need to do this "lazily"
|
|
226
|
-
# because it needs to happen once the event loop is running, even
|
|
227
|
-
# though the initialization itself is synchronous.
|
|
228
|
-
if not self._http_session:
|
|
229
|
-
self._http_session = aiohttp.ClientSession(
|
|
230
|
-
base_url=self.url,
|
|
231
|
-
timeout=aiohttp.ClientTimeout(
|
|
232
|
-
total=self.http_timeout,
|
|
233
|
-
connect=3.07,
|
|
234
|
-
),
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
return self._http_session
|
|
238
|
-
|
|
239
211
|
def get_limiter(self):
|
|
240
212
|
"""Return the limiter object so it can be used as a context manager"""
|
|
241
213
|
return self._limiter
|
|
@@ -244,20 +216,59 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
|
|
|
244
216
|
class ExtractorConfig(BaseModel):
|
|
245
217
|
"""Model for extractor configuration of logdetective server."""
|
|
246
218
|
|
|
247
|
-
context: bool = True
|
|
248
219
|
max_clusters: int = 8
|
|
249
220
|
verbose: bool = False
|
|
250
221
|
max_snippet_len: int = 2000
|
|
222
|
+
csgrep: bool = False
|
|
223
|
+
|
|
224
|
+
_extractors: List[Extractor] = PrivateAttr(default_factory=list)
|
|
225
|
+
|
|
226
|
+
def _setup_extractors(self):
|
|
227
|
+
"""Initialize extractors with common settings."""
|
|
228
|
+
self._extractors = [
|
|
229
|
+
DrainExtractor(
|
|
230
|
+
verbose=self.verbose,
|
|
231
|
+
max_snippet_len=self.max_snippet_len,
|
|
232
|
+
max_clusters=self.max_clusters,
|
|
233
|
+
)
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
if self.csgrep:
|
|
237
|
+
self._extractors.append(
|
|
238
|
+
CSGrepExtractor(
|
|
239
|
+
verbose=self.verbose,
|
|
240
|
+
max_snippet_len=self.max_snippet_len,
|
|
241
|
+
)
|
|
242
|
+
)
|
|
251
243
|
|
|
252
244
|
def __init__(self, data: Optional[dict] = None):
|
|
253
|
-
super().__init__()
|
|
245
|
+
super().__init__(data=data)
|
|
246
|
+
|
|
254
247
|
if data is None:
|
|
248
|
+
self._setup_extractors()
|
|
255
249
|
return
|
|
256
250
|
|
|
257
|
-
self.context = data.get("context", True)
|
|
258
251
|
self.max_clusters = data.get("max_clusters", 8)
|
|
259
252
|
self.verbose = data.get("verbose", False)
|
|
260
253
|
self.max_snippet_len = data.get("max_snippet_len", 2000)
|
|
254
|
+
self.csgrep = data.get("csgrep", False)
|
|
255
|
+
|
|
256
|
+
self._setup_extractors()
|
|
257
|
+
|
|
258
|
+
def get_extractors(self) -> List[Extractor]:
|
|
259
|
+
"""Return list of initialized extractors, each will be applied in turn
|
|
260
|
+
on original log text to retrieve snippets."""
|
|
261
|
+
return self._extractors
|
|
262
|
+
|
|
263
|
+
@field_validator("csgrep", mode="after")
|
|
264
|
+
@classmethod
|
|
265
|
+
def validate_csgrep(cls, value: bool) -> bool:
|
|
266
|
+
"""Verify that csgrep is available if requested."""
|
|
267
|
+
if not check_csgrep():
|
|
268
|
+
raise ValueError(
|
|
269
|
+
"Requested csgrep extractor but `csgrep` binary is not in the PATH"
|
|
270
|
+
)
|
|
271
|
+
return value
|
|
261
272
|
|
|
262
273
|
|
|
263
274
|
class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attributes
|
|
@@ -265,6 +276,7 @@ class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attr
|
|
|
265
276
|
|
|
266
277
|
name: str = None
|
|
267
278
|
url: str = None
|
|
279
|
+
# Path to API of the gitlab instance, assuming `url` as prefix.
|
|
268
280
|
api_path: str = None
|
|
269
281
|
api_token: str = None
|
|
270
282
|
|
|
@@ -277,8 +289,8 @@ class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attr
|
|
|
277
289
|
webhook_secrets: Optional[List[str]] = None
|
|
278
290
|
|
|
279
291
|
timeout: float = 5.0
|
|
280
|
-
_conn: Gitlab = None
|
|
281
|
-
_http_session: aiohttp.ClientSession = None
|
|
292
|
+
_conn: Gitlab | None = PrivateAttr(default=None)
|
|
293
|
+
_http_session: aiohttp.ClientSession | None = PrivateAttr(default=None)
|
|
282
294
|
|
|
283
295
|
# Maximum size of artifacts.zip in MiB. (default: 300 MiB)
|
|
284
296
|
max_artifact_size: int = 300 * 1024 * 1024
|
|
@@ -364,8 +376,8 @@ class KojiInstanceConfig(BaseModel):
|
|
|
364
376
|
xmlrpc_url: str = ""
|
|
365
377
|
tokens: List[str] = []
|
|
366
378
|
|
|
367
|
-
_conn: Optional[koji.ClientSession] = None
|
|
368
|
-
_callbacks: defaultdict[int, set[str]] = defaultdict(set)
|
|
379
|
+
_conn: Optional[koji.ClientSession] = PrivateAttr(default=None)
|
|
380
|
+
_callbacks: defaultdict[int, set[str]] = PrivateAttr(default_factory=lambda: defaultdict(set))
|
|
369
381
|
|
|
370
382
|
def __init__(self, name: str, data: Optional[dict] = None):
|
|
371
383
|
super().__init__()
|
|
@@ -455,8 +467,8 @@ class LogConfig(BaseModel):
|
|
|
455
467
|
class GeneralConfig(BaseModel):
|
|
456
468
|
"""General config options for Log Detective"""
|
|
457
469
|
|
|
458
|
-
packages: List[str] =
|
|
459
|
-
excluded_packages: List[str] =
|
|
470
|
+
packages: List[str] = []
|
|
471
|
+
excluded_packages: List[str] = []
|
|
460
472
|
devmode: bool = False
|
|
461
473
|
sentry_dsn: HttpUrl | None = None
|
|
462
474
|
collect_emojis_interval: int = 60 * 60 # seconds
|
|
@@ -483,6 +495,7 @@ class Config(BaseModel):
|
|
|
483
495
|
log: LogConfig = LogConfig()
|
|
484
496
|
inference: InferenceConfig = InferenceConfig()
|
|
485
497
|
snippet_inference: InferenceConfig = InferenceConfig()
|
|
498
|
+
# TODO(jpodivin): Extend to work with multiple extractor configs
|
|
486
499
|
extractor: ExtractorConfig = ExtractorConfig()
|
|
487
500
|
gitlab: GitLabConfig = GitLabConfig()
|
|
488
501
|
koji: KojiConfig = KojiConfig()
|
|
@@ -522,7 +535,8 @@ class TimePeriod(BaseModel):
|
|
|
522
535
|
@model_validator(mode="before")
|
|
523
536
|
@classmethod
|
|
524
537
|
def check_exclusive_fields(cls, data):
|
|
525
|
-
"""Check that only one key between weeks, days and hours is defined
|
|
538
|
+
"""Check that only one key between weeks, days and hours is defined,
|
|
539
|
+
if no period is specified, fall back to 2 days."""
|
|
526
540
|
if isinstance(data, dict):
|
|
527
541
|
how_many_fields = sum(
|
|
528
542
|
1
|
|
@@ -548,6 +562,7 @@ class TimePeriod(BaseModel):
|
|
|
548
562
|
|
|
549
563
|
def get_time_period(self) -> datetime.timedelta:
|
|
550
564
|
"""Get the period of time represented by this input model.
|
|
565
|
+
Will default to 2 days, if no period is set.
|
|
551
566
|
|
|
552
567
|
Returns:
|
|
553
568
|
datetime.timedelta: The time period as a timedelta object.
|
|
@@ -559,10 +574,12 @@ class TimePeriod(BaseModel):
|
|
|
559
574
|
delta = datetime.timedelta(days=self.days)
|
|
560
575
|
elif self.hours:
|
|
561
576
|
delta = datetime.timedelta(hours=self.hours)
|
|
577
|
+
else:
|
|
578
|
+
delta = datetime.timedelta(days=2)
|
|
562
579
|
return delta
|
|
563
580
|
|
|
564
581
|
def get_period_start_time(
|
|
565
|
-
self, end_time: datetime.datetime = None
|
|
582
|
+
self, end_time: Optional[datetime.datetime] = None
|
|
566
583
|
) -> datetime.datetime:
|
|
567
584
|
"""Calculate the start time of this period based on the end time.
|
|
568
585
|
|
|
@@ -575,5 +592,5 @@ class TimePeriod(BaseModel):
|
|
|
575
592
|
"""
|
|
576
593
|
time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
577
594
|
if time.tzinfo is None:
|
|
578
|
-
|
|
595
|
+
time = time.replace(tzinfo=datetime.timezone.utc)
|
|
579
596
|
return time - self.get_time_period()
|
logdetective/server/plot.py
CHANGED
|
@@ -69,7 +69,7 @@ def create_time_series_arrays(
|
|
|
69
69
|
plot_def: Definition,
|
|
70
70
|
start_time: datetime.datetime,
|
|
71
71
|
end_time: datetime.datetime,
|
|
72
|
-
value_type: Optional[Union[int, float]] = int,
|
|
72
|
+
value_type: Optional[Union[type[int], type[float]]] = int,
|
|
73
73
|
) -> tuple[numpy.ndarray, numpy.ndarray]:
|
|
74
74
|
"""Create time series arrays from a dictionary of values.
|
|
75
75
|
|
|
@@ -163,7 +163,7 @@ def _add_line_chart( # pylint: disable=too-many-arguments disable=too-many-posi
|
|
|
163
163
|
ax.tick_params(axis="y", labelcolor=color)
|
|
164
164
|
|
|
165
165
|
|
|
166
|
-
def requests_per_time(
|
|
166
|
+
async def requests_per_time(
|
|
167
167
|
period_of_time: TimePeriod,
|
|
168
168
|
endpoint: EndpointType = EndpointType.ANALYZE,
|
|
169
169
|
end_time: Optional[datetime.datetime] = None,
|
|
@@ -191,7 +191,7 @@ def requests_per_time(
|
|
|
191
191
|
end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
192
192
|
start_time = period_of_time.get_period_start_time(end_time)
|
|
193
193
|
plot_def = Definition(period_of_time)
|
|
194
|
-
requests_counts = AnalyzeRequestMetrics.get_requests_in_period(
|
|
194
|
+
requests_counts = await AnalyzeRequestMetrics.get_requests_in_period(
|
|
195
195
|
start_time, end_time, plot_def.time_format, endpoint
|
|
196
196
|
)
|
|
197
197
|
timestamps, counts = create_time_series_arrays(
|
|
@@ -218,7 +218,7 @@ def requests_per_time(
|
|
|
218
218
|
return fig
|
|
219
219
|
|
|
220
220
|
|
|
221
|
-
def average_time_per_responses( # pylint: disable=too-many-locals
|
|
221
|
+
async def average_time_per_responses( # pylint: disable=too-many-locals
|
|
222
222
|
period_of_time: TimePeriod,
|
|
223
223
|
endpoint: EndpointType = EndpointType.ANALYZE,
|
|
224
224
|
end_time: Optional[datetime.datetime] = None,
|
|
@@ -246,8 +246,10 @@ def average_time_per_responses( # pylint: disable=too-many-locals
|
|
|
246
246
|
end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
247
247
|
start_time = period_of_time.get_period_start_time(end_time)
|
|
248
248
|
plot_def = Definition(period_of_time)
|
|
249
|
-
responses_average_time =
|
|
250
|
-
|
|
249
|
+
responses_average_time = (
|
|
250
|
+
await AnalyzeRequestMetrics.get_responses_average_time_in_period(
|
|
251
|
+
start_time, end_time, plot_def.time_format, endpoint
|
|
252
|
+
)
|
|
251
253
|
)
|
|
252
254
|
timestamps, average_time = create_time_series_arrays(
|
|
253
255
|
responses_average_time,
|
|
@@ -263,7 +265,7 @@ def average_time_per_responses( # pylint: disable=too-many-locals
|
|
|
263
265
|
)
|
|
264
266
|
|
|
265
267
|
responses_average_length = (
|
|
266
|
-
AnalyzeRequestMetrics.get_responses_average_length_in_period(
|
|
268
|
+
await AnalyzeRequestMetrics.get_responses_average_length_in_period(
|
|
267
269
|
start_time, end_time, plot_def.time_format, endpoint
|
|
268
270
|
)
|
|
269
271
|
)
|
|
@@ -292,7 +294,7 @@ def average_time_per_responses( # pylint: disable=too-many-locals
|
|
|
292
294
|
return fig
|
|
293
295
|
|
|
294
296
|
|
|
295
|
-
def _collect_emoji_data(
|
|
297
|
+
async def _collect_emoji_data(
|
|
296
298
|
start_time: datetime.datetime, plot_def: Definition
|
|
297
299
|
) -> Dict[str, Dict[datetime.datetime, int]]:
|
|
298
300
|
"""Collect and organize emoji feedback data
|
|
@@ -300,7 +302,7 @@ def _collect_emoji_data(
|
|
|
300
302
|
Counts all emojis given to logdetective comments created since start_time.
|
|
301
303
|
Collect counts in time accordingly to the plot definition.
|
|
302
304
|
"""
|
|
303
|
-
reactions = Reactions.get_since(start_time)
|
|
305
|
+
reactions = await Reactions.get_since(start_time)
|
|
304
306
|
reactions_values_dict: Dict[str, Dict] = {}
|
|
305
307
|
for comment_created_at, reaction in reactions:
|
|
306
308
|
comment_created_at_formatted = comment_created_at.strptime(
|
|
@@ -369,7 +371,7 @@ def _plot_emoji_data( # pylint: disable=too-many-locals
|
|
|
369
371
|
return emoji_lines, emoji_labels
|
|
370
372
|
|
|
371
373
|
|
|
372
|
-
def emojis_per_time(
|
|
374
|
+
async def emojis_per_time(
|
|
373
375
|
period_of_time: TimePeriod,
|
|
374
376
|
end_time: Optional[datetime.datetime] = None,
|
|
375
377
|
) -> figure.Figure:
|
|
@@ -395,7 +397,7 @@ def emojis_per_time(
|
|
|
395
397
|
plot_def = Definition(period_of_time)
|
|
396
398
|
end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
397
399
|
start_time = period_of_time.get_period_start_time(end_time)
|
|
398
|
-
reactions_values_dict = _collect_emoji_data(start_time, plot_def)
|
|
400
|
+
reactions_values_dict = await _collect_emoji_data(start_time, plot_def)
|
|
399
401
|
|
|
400
402
|
fig, ax = pyplot.subplots(figsize=(12, 6))
|
|
401
403
|
|
logdetective/server/server.py
CHANGED
|
@@ -67,6 +67,7 @@ from logdetective.server.emoji import (
|
|
|
67
67
|
collect_emojis_for_mr,
|
|
68
68
|
)
|
|
69
69
|
from logdetective.server.compressors import RemoteLogCompressor
|
|
70
|
+
from logdetective.server.utils import get_version
|
|
70
71
|
|
|
71
72
|
|
|
72
73
|
LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
|
|
@@ -89,7 +90,7 @@ async def lifespan(fapp: FastAPI):
|
|
|
89
90
|
)
|
|
90
91
|
|
|
91
92
|
# Ensure that the database is initialized.
|
|
92
|
-
logdetective.server.database.base.init()
|
|
93
|
+
await logdetective.server.database.base.init()
|
|
93
94
|
|
|
94
95
|
# Start the background task scheduler for collecting emojis
|
|
95
96
|
asyncio.create_task(schedule_collect_emojis_task())
|
|
@@ -106,37 +107,51 @@ async def get_http_session(request: Request) -> aiohttp.ClientSession:
|
|
|
106
107
|
return request.app.http
|
|
107
108
|
|
|
108
109
|
|
|
109
|
-
def requires_token_when_set(
|
|
110
|
+
def requires_token_when_set(authorization: Annotated[str | None, Header()] = None):
|
|
110
111
|
"""
|
|
111
|
-
FastAPI Depend function that expects a header named
|
|
112
|
+
FastAPI Depend function that expects a header named Authorization
|
|
112
113
|
|
|
113
114
|
If LOGDETECTIVE_TOKEN env var is set, validate the client-supplied token
|
|
114
115
|
otherwise ignore it
|
|
115
116
|
"""
|
|
116
117
|
if not API_TOKEN:
|
|
117
|
-
LOG.info("LOGDETECTIVE_TOKEN env var not set,
|
|
118
|
+
LOG.info("LOGDETECTIVE_TOKEN env var not set, authorization disabled")
|
|
118
119
|
# no token required, means local dev environment
|
|
119
120
|
return
|
|
120
|
-
|
|
121
|
-
if authentication:
|
|
121
|
+
if authorization:
|
|
122
122
|
try:
|
|
123
|
-
token =
|
|
124
|
-
except (ValueError, IndexError):
|
|
123
|
+
token = authorization.split(" ", 1)[1]
|
|
124
|
+
except (ValueError, IndexError) as ex:
|
|
125
125
|
LOG.warning(
|
|
126
|
-
"
|
|
127
|
-
|
|
126
|
+
"Authorization header has invalid structure '%s', it should be 'Bearer TOKEN'",
|
|
127
|
+
authorization,
|
|
128
128
|
)
|
|
129
129
|
# eat the exception and raise 401 below
|
|
130
|
-
|
|
130
|
+
raise HTTPException(
|
|
131
|
+
status_code=401,
|
|
132
|
+
detail=f"Invalid authorization, HEADER '{authorization}' not valid.",
|
|
133
|
+
) from ex
|
|
131
134
|
if token == API_TOKEN:
|
|
132
135
|
return
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
)
|
|
136
|
-
raise HTTPException(status_code=401, detail=
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
app = FastAPI(
|
|
136
|
+
LOG.info("Provided token '%s' does not match expected value.", token)
|
|
137
|
+
raise HTTPException(status_code=401, detail=f"Token '{token}' not valid.")
|
|
138
|
+
LOG.error("No authorization header provided but LOGDETECTIVE_TOKEN env var is set")
|
|
139
|
+
raise HTTPException(status_code=401, detail="No token provided.")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
app = FastAPI(
|
|
143
|
+
title="Log Detective",
|
|
144
|
+
contact={
|
|
145
|
+
"name": "Log Detective developers",
|
|
146
|
+
"url": "https://github.com/fedora-copr/logdetective",
|
|
147
|
+
"email": "copr-devel@lists.fedorahosted.org"
|
|
148
|
+
},
|
|
149
|
+
license_info={
|
|
150
|
+
"name": "Apache-2.0",
|
|
151
|
+
"url": "https://www.apache.org/licenses/LICENSE-2.0.html",
|
|
152
|
+
},
|
|
153
|
+
version=get_version(),
|
|
154
|
+
dependencies=[Depends(requires_token_when_set)], lifespan=lifespan)
|
|
140
155
|
|
|
141
156
|
|
|
142
157
|
@app.post("/analyze", response_model=Response)
|
|
@@ -299,16 +314,15 @@ async def analyze_koji_task(task_id: int, koji_instance_config: KojiInstanceConf
|
|
|
299
314
|
# to retrieve the metric ID to associate it with the koji task analysis.
|
|
300
315
|
|
|
301
316
|
metrics_id = await add_new_metrics(
|
|
302
|
-
|
|
317
|
+
EndpointType.ANALYZE_KOJI_TASK,
|
|
303
318
|
log_text,
|
|
304
319
|
received_at=datetime.datetime.now(datetime.timezone.utc),
|
|
305
320
|
compressed_log_content=RemoteLogCompressor.zip_text(log_text),
|
|
306
321
|
)
|
|
307
|
-
|
|
308
322
|
# We need to associate the metric ID with the koji task analysis.
|
|
309
323
|
# This will create the new row without a response, which we will use as
|
|
310
324
|
# an indicator that the analysis is in progress.
|
|
311
|
-
KojiTaskAnalysis.create_or_restart(
|
|
325
|
+
await KojiTaskAnalysis.create_or_restart(
|
|
312
326
|
koji_instance=koji_instance_config.xmlrpc_url,
|
|
313
327
|
task_id=task_id,
|
|
314
328
|
log_file_name=log_file_name,
|
|
@@ -317,8 +331,8 @@ async def analyze_koji_task(task_id: int, koji_instance_config: KojiInstanceConf
|
|
|
317
331
|
|
|
318
332
|
# Now that we have the response, we can update the metrics and mark the
|
|
319
333
|
# koji task analysis as completed.
|
|
320
|
-
update_metrics(metrics_id, response)
|
|
321
|
-
KojiTaskAnalysis.add_response(task_id, metrics_id)
|
|
334
|
+
await update_metrics(metrics_id, response)
|
|
335
|
+
await KojiTaskAnalysis.add_response(task_id, metrics_id)
|
|
322
336
|
|
|
323
337
|
# Notify any callbacks that the analysis is complete.
|
|
324
338
|
for callback in koji_instance_config.get_callbacks(task_id):
|
|
@@ -355,6 +369,12 @@ async def async_log(msg):
|
|
|
355
369
|
return msg
|
|
356
370
|
|
|
357
371
|
|
|
372
|
+
@app.get("/version", response_class=BasicResponse)
|
|
373
|
+
async def get_version_wrapper():
|
|
374
|
+
"""Get the version of logdetective"""
|
|
375
|
+
return BasicResponse(content=get_version())
|
|
376
|
+
|
|
377
|
+
|
|
358
378
|
@app.post("/analyze/stream", response_class=StreamingResponse)
|
|
359
379
|
@track_request()
|
|
360
380
|
async def analyze_log_stream(
|
|
@@ -521,7 +541,7 @@ async def schedule_emoji_collection_for_mr(
|
|
|
521
541
|
key = (forge, project_id, mr_iid)
|
|
522
542
|
|
|
523
543
|
# FIXME: Look up the connection from the Forge # pylint: disable=fixme
|
|
524
|
-
gitlab_conn = SERVER_CONFIG.gitlab.instances[forge.value]
|
|
544
|
+
gitlab_conn = SERVER_CONFIG.gitlab.instances[forge.value].get_connection()
|
|
525
545
|
|
|
526
546
|
LOG.debug("Looking up emojis for %s, %d, %d", forge, project_id, mr_iid)
|
|
527
547
|
await collect_emojis_for_mr(project_id, mr_iid, gitlab_conn)
|
|
@@ -614,22 +634,24 @@ async def get_metrics(
|
|
|
614
634
|
async def handler():
|
|
615
635
|
"""Show statistics for the specified endpoint and plot."""
|
|
616
636
|
if plot == Plot.REQUESTS:
|
|
617
|
-
fig = plot_engine.requests_per_time(period_since_now, endpoint_type)
|
|
637
|
+
fig = await plot_engine.requests_per_time(period_since_now, endpoint_type)
|
|
618
638
|
return _svg_figure_response(fig)
|
|
619
639
|
if plot == Plot.RESPONSES:
|
|
620
|
-
fig = plot_engine.average_time_per_responses(
|
|
640
|
+
fig = await plot_engine.average_time_per_responses(
|
|
621
641
|
period_since_now, endpoint_type
|
|
622
642
|
)
|
|
623
643
|
return _svg_figure_response(fig)
|
|
624
644
|
if plot == Plot.EMOJIS:
|
|
625
|
-
fig = plot_engine.emojis_per_time(period_since_now)
|
|
645
|
+
fig = await plot_engine.emojis_per_time(period_since_now)
|
|
626
646
|
return _svg_figure_response(fig)
|
|
627
647
|
# BOTH
|
|
628
|
-
fig_requests = plot_engine.requests_per_time(
|
|
629
|
-
|
|
648
|
+
fig_requests = await plot_engine.requests_per_time(
|
|
649
|
+
period_since_now, endpoint_type
|
|
650
|
+
)
|
|
651
|
+
fig_responses = await plot_engine.average_time_per_responses(
|
|
630
652
|
period_since_now, endpoint_type
|
|
631
653
|
)
|
|
632
|
-
fig_emojis = plot_engine.emojis_per_time(period_since_now)
|
|
654
|
+
fig_emojis = await plot_engine.emojis_per_time(period_since_now)
|
|
633
655
|
return _multiple_svg_figures_response([fig_requests, fig_responses, fig_emojis])
|
|
634
656
|
|
|
635
657
|
descriptions = {
|