logdetective 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/server/config.py +1 -1
- logdetective/server/emoji.py +46 -48
- logdetective/server/gitlab.py +21 -8
- logdetective/server/llm.py +38 -12
- logdetective/server/models.py +66 -259
- logdetective/server/server.py +199 -32
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/METADATA +2 -2
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/RECORD +11 -11
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/WHEEL +0 -0
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/entry_points.txt +0 -0
- {logdetective-2.10.0.dist-info → logdetective-2.12.0.dist-info}/licenses/LICENSE +0 -0
logdetective/server/models.py
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from collections import defaultdict
|
|
3
1
|
import datetime
|
|
4
2
|
from logging import BASIC_FORMAT
|
|
5
|
-
from typing import List, Dict, Optional
|
|
3
|
+
from typing import List, Dict, Optional, Any
|
|
6
4
|
from pydantic import (
|
|
7
5
|
BaseModel,
|
|
8
6
|
Field,
|
|
@@ -10,14 +8,8 @@ from pydantic import (
|
|
|
10
8
|
field_validator,
|
|
11
9
|
NonNegativeFloat,
|
|
12
10
|
HttpUrl,
|
|
13
|
-
PrivateAttr,
|
|
14
11
|
)
|
|
15
12
|
|
|
16
|
-
import aiohttp
|
|
17
|
-
|
|
18
|
-
from aiolimiter import AsyncLimiter
|
|
19
|
-
from gitlab import Gitlab
|
|
20
|
-
import koji
|
|
21
13
|
|
|
22
14
|
from logdetective.constants import (
|
|
23
15
|
DEFAULT_TEMPERATURE,
|
|
@@ -26,8 +18,6 @@ from logdetective.constants import (
|
|
|
26
18
|
SYSTEM_ROLE_DEFAULT,
|
|
27
19
|
USER_ROLE_DEFAULT,
|
|
28
20
|
)
|
|
29
|
-
|
|
30
|
-
from logdetective.extractors import Extractor, DrainExtractor, CSGrepExtractor
|
|
31
21
|
from logdetective.utils import check_csgrep
|
|
32
22
|
|
|
33
23
|
|
|
@@ -177,40 +167,14 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
|
|
|
177
167
|
# OpenAI client library requires a string to be specified for API token
|
|
178
168
|
# even if it is not checked on the server side
|
|
179
169
|
api_token: str = "None"
|
|
180
|
-
model: str = ""
|
|
170
|
+
model: str = "default-model"
|
|
181
171
|
temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
|
|
182
172
|
max_queue_size: int = LLM_DEFAULT_MAX_QUEUE_SIZE
|
|
183
173
|
http_timeout: float = 5.0
|
|
184
174
|
user_role: str = USER_ROLE_DEFAULT
|
|
185
175
|
system_role: str = SYSTEM_ROLE_DEFAULT
|
|
186
176
|
llm_api_timeout: float = 15.0
|
|
187
|
-
|
|
188
|
-
default_factory=lambda: AsyncLimiter(LLM_DEFAULT_REQUESTS_PER_MINUTE))
|
|
189
|
-
|
|
190
|
-
def __init__(self, data: Optional[dict] = None):
|
|
191
|
-
super().__init__()
|
|
192
|
-
if data is None:
|
|
193
|
-
return
|
|
194
|
-
|
|
195
|
-
self.max_tokens = data.get("max_tokens", -1)
|
|
196
|
-
self.log_probs = data.get("log_probs", True)
|
|
197
|
-
self.url = data.get("url", "")
|
|
198
|
-
self.http_timeout = data.get("http_timeout", 5.0)
|
|
199
|
-
self.api_token = data.get("api_token", "None")
|
|
200
|
-
self.model = data.get("model", "default-model")
|
|
201
|
-
self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
|
|
202
|
-
self.max_queue_size = data.get("max_queue_size", LLM_DEFAULT_MAX_QUEUE_SIZE)
|
|
203
|
-
self.user_role = data.get("user_role", USER_ROLE_DEFAULT)
|
|
204
|
-
self.system_role = data.get("system_role", SYSTEM_ROLE_DEFAULT)
|
|
205
|
-
self._requests_per_minute = data.get(
|
|
206
|
-
"requests_per_minute", LLM_DEFAULT_REQUESTS_PER_MINUTE
|
|
207
|
-
)
|
|
208
|
-
self.llm_api_timeout = data.get("llm_api_timeout", 15.0)
|
|
209
|
-
self._limiter = AsyncLimiter(self._requests_per_minute)
|
|
210
|
-
|
|
211
|
-
def get_limiter(self):
|
|
212
|
-
"""Return the limiter object so it can be used as a context manager"""
|
|
213
|
-
return self._limiter
|
|
177
|
+
requests_per_minute: int = LLM_DEFAULT_REQUESTS_PER_MINUTE
|
|
214
178
|
|
|
215
179
|
|
|
216
180
|
class ExtractorConfig(BaseModel):
|
|
@@ -221,64 +185,25 @@ class ExtractorConfig(BaseModel):
|
|
|
221
185
|
max_snippet_len: int = 2000
|
|
222
186
|
csgrep: bool = False
|
|
223
187
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def _setup_extractors(self):
|
|
227
|
-
"""Initialize extractors with common settings."""
|
|
228
|
-
self._extractors = [
|
|
229
|
-
DrainExtractor(
|
|
230
|
-
verbose=self.verbose,
|
|
231
|
-
max_snippet_len=self.max_snippet_len,
|
|
232
|
-
max_clusters=self.max_clusters,
|
|
233
|
-
)
|
|
234
|
-
]
|
|
235
|
-
|
|
236
|
-
if self.csgrep:
|
|
237
|
-
self._extractors.append(
|
|
238
|
-
CSGrepExtractor(
|
|
239
|
-
verbose=self.verbose,
|
|
240
|
-
max_snippet_len=self.max_snippet_len,
|
|
241
|
-
)
|
|
242
|
-
)
|
|
243
|
-
|
|
244
|
-
def __init__(self, data: Optional[dict] = None):
|
|
245
|
-
super().__init__(data=data)
|
|
246
|
-
|
|
247
|
-
if data is None:
|
|
248
|
-
self._setup_extractors()
|
|
249
|
-
return
|
|
250
|
-
|
|
251
|
-
self.max_clusters = data.get("max_clusters", 8)
|
|
252
|
-
self.verbose = data.get("verbose", False)
|
|
253
|
-
self.max_snippet_len = data.get("max_snippet_len", 2000)
|
|
254
|
-
self.csgrep = data.get("csgrep", False)
|
|
255
|
-
|
|
256
|
-
self._setup_extractors()
|
|
257
|
-
|
|
258
|
-
def get_extractors(self) -> List[Extractor]:
|
|
259
|
-
"""Return list of initialized extractors, each will be applied in turn
|
|
260
|
-
on original log text to retrieve snippets."""
|
|
261
|
-
return self._extractors
|
|
262
|
-
|
|
263
|
-
@field_validator("csgrep", mode="after")
|
|
188
|
+
@field_validator("csgrep", mode="before")
|
|
264
189
|
@classmethod
|
|
265
|
-
def
|
|
266
|
-
"""Verify
|
|
267
|
-
if not check_csgrep():
|
|
190
|
+
def verify_csgrep(cls, v: bool):
|
|
191
|
+
"""Verify presence of csgrep binary if csgrep extractor is requested."""
|
|
192
|
+
if v and not check_csgrep():
|
|
268
193
|
raise ValueError(
|
|
269
194
|
"Requested csgrep extractor but `csgrep` binary is not in the PATH"
|
|
270
195
|
)
|
|
271
|
-
return
|
|
196
|
+
return v
|
|
272
197
|
|
|
273
198
|
|
|
274
199
|
class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attributes
|
|
275
200
|
"""Model for GitLab configuration of logdetective server."""
|
|
276
201
|
|
|
277
|
-
name: str
|
|
278
|
-
url: str =
|
|
202
|
+
name: str
|
|
203
|
+
url: str = "https://gitlab.com"
|
|
279
204
|
# Path to API of the gitlab instance, assuming `url` as prefix.
|
|
280
|
-
api_path: str =
|
|
281
|
-
api_token: str = None
|
|
205
|
+
api_path: str = "/api/v4"
|
|
206
|
+
api_token: Optional[str] = None
|
|
282
207
|
|
|
283
208
|
# This is a list to support key rotation.
|
|
284
209
|
# When the key is being changed, we will add the new key as a new entry in
|
|
@@ -289,69 +214,17 @@ class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attr
|
|
|
289
214
|
webhook_secrets: Optional[List[str]] = None
|
|
290
215
|
|
|
291
216
|
timeout: float = 5.0
|
|
292
|
-
_conn: Gitlab | None = PrivateAttr(default=None)
|
|
293
|
-
_http_session: aiohttp.ClientSession | None = PrivateAttr(default=None)
|
|
294
217
|
|
|
295
218
|
# Maximum size of artifacts.zip in MiB. (default: 300 MiB)
|
|
296
219
|
max_artifact_size: int = 300 * 1024 * 1024
|
|
297
220
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
self.api_path = data.get("api_path", "/api/v4")
|
|
306
|
-
self.api_token = data.get("api_token", None)
|
|
307
|
-
self.webhook_secrets = data.get("webhook_secrets", None)
|
|
308
|
-
self.max_artifact_size = int(data.get("max_artifact_size", 300)) * 1024 * 1024
|
|
309
|
-
|
|
310
|
-
self.timeout = data.get("timeout", 5.0)
|
|
311
|
-
self._conn = Gitlab(
|
|
312
|
-
url=self.url,
|
|
313
|
-
private_token=self.api_token,
|
|
314
|
-
timeout=self.timeout,
|
|
315
|
-
)
|
|
316
|
-
|
|
317
|
-
def get_connection(self):
|
|
318
|
-
"""Get the Gitlab connection object"""
|
|
319
|
-
return self._conn
|
|
320
|
-
|
|
321
|
-
def get_http_session(self):
|
|
322
|
-
"""Return the internal HTTP session so it can be used to contect the
|
|
323
|
-
Gitlab server. May be used as a context manager."""
|
|
324
|
-
|
|
325
|
-
# Create the session on the first attempt. We need to do this "lazily"
|
|
326
|
-
# because it needs to happen once the event loop is running, even
|
|
327
|
-
# though the initialization itself is synchronous.
|
|
328
|
-
if not self._http_session:
|
|
329
|
-
self._http_session = aiohttp.ClientSession(
|
|
330
|
-
base_url=self.url,
|
|
331
|
-
headers={"Authorization": f"Bearer {self.api_token}"},
|
|
332
|
-
timeout=aiohttp.ClientTimeout(
|
|
333
|
-
total=self.timeout,
|
|
334
|
-
connect=3.07,
|
|
335
|
-
),
|
|
336
|
-
)
|
|
337
|
-
|
|
338
|
-
return self._http_session
|
|
339
|
-
|
|
340
|
-
def __del__(self):
|
|
341
|
-
# Close connection when this object is destroyed
|
|
342
|
-
if self._http_session:
|
|
343
|
-
try:
|
|
344
|
-
loop = asyncio.get_running_loop()
|
|
345
|
-
loop.create_task(self._http_session.close())
|
|
346
|
-
except RuntimeError:
|
|
347
|
-
# No loop running, so create one to close the session
|
|
348
|
-
loop = asyncio.new_event_loop()
|
|
349
|
-
loop.run_until_complete(self._http_session.close())
|
|
350
|
-
loop.close()
|
|
351
|
-
except Exception: # pylint: disable=broad-exception-caught
|
|
352
|
-
# We should only get here if we're shutting down, so we don't
|
|
353
|
-
# really care if the close() completes cleanly.
|
|
354
|
-
pass
|
|
221
|
+
@field_validator("max_artifact_size", mode="before")
|
|
222
|
+
@classmethod
|
|
223
|
+
def megabytes_to_bytes(cls, v: Any):
|
|
224
|
+
"""Convert max_artifact_size from megabytes to bytes."""
|
|
225
|
+
if isinstance(v, int):
|
|
226
|
+
return v * 1024 * 1024
|
|
227
|
+
return 300 * 1024 * 1024
|
|
355
228
|
|
|
356
229
|
|
|
357
230
|
class GitLabConfig(BaseModel):
|
|
@@ -359,63 +232,28 @@ class GitLabConfig(BaseModel):
|
|
|
359
232
|
|
|
360
233
|
instances: Dict[str, GitLabInstanceConfig] = {}
|
|
361
234
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
235
|
+
@model_validator(mode="before")
|
|
236
|
+
@classmethod
|
|
237
|
+
def set_gitlab_instance_configs(cls, data: Any):
|
|
238
|
+
"""Initialize configuration for each GitLab instance"""
|
|
239
|
+
if not isinstance(data, dict):
|
|
240
|
+
return data
|
|
366
241
|
|
|
242
|
+
instances = {}
|
|
367
243
|
for instance_name, instance_data in data.items():
|
|
368
|
-
instance = GitLabInstanceConfig(instance_name, instance_data)
|
|
369
|
-
|
|
244
|
+
instance = GitLabInstanceConfig(name=instance_name, **instance_data)
|
|
245
|
+
instances[instance.url] = instance
|
|
246
|
+
|
|
247
|
+
return {"instances": instances}
|
|
370
248
|
|
|
371
249
|
|
|
372
250
|
class KojiInstanceConfig(BaseModel):
|
|
373
251
|
"""Model for Koji configuration of logdetective server."""
|
|
374
252
|
|
|
375
253
|
name: str = ""
|
|
376
|
-
xmlrpc_url: str = ""
|
|
254
|
+
xmlrpc_url: str = "https://koji.fedoraproject.org/kojihub"
|
|
377
255
|
tokens: List[str] = []
|
|
378
256
|
|
|
379
|
-
_conn: Optional[koji.ClientSession] = PrivateAttr(default=None)
|
|
380
|
-
_callbacks: defaultdict[int, set[str]] = PrivateAttr(default_factory=lambda: defaultdict(set))
|
|
381
|
-
|
|
382
|
-
def __init__(self, name: str, data: Optional[dict] = None):
|
|
383
|
-
super().__init__()
|
|
384
|
-
|
|
385
|
-
self.name = name
|
|
386
|
-
if data is None:
|
|
387
|
-
# Set some reasonable defaults
|
|
388
|
-
self.xmlrpc_url = "https://koji.fedoraproject.org/kojihub"
|
|
389
|
-
self.tokens = []
|
|
390
|
-
self.max_artifact_size = 1024 * 1024
|
|
391
|
-
return
|
|
392
|
-
|
|
393
|
-
self.xmlrpc_url = data.get(
|
|
394
|
-
"xmlrpc_url", "https://koji.fedoraproject.org/kojihub"
|
|
395
|
-
)
|
|
396
|
-
self.tokens = data.get("tokens", [])
|
|
397
|
-
|
|
398
|
-
def get_connection(self):
|
|
399
|
-
"""Get the Koji connection object"""
|
|
400
|
-
if not self._conn:
|
|
401
|
-
self._conn = koji.ClientSession(self.xmlrpc_url)
|
|
402
|
-
return self._conn
|
|
403
|
-
|
|
404
|
-
def register_callback(self, task_id: int, callback: str):
|
|
405
|
-
"""Register a callback for a task"""
|
|
406
|
-
self._callbacks[task_id].add(callback)
|
|
407
|
-
|
|
408
|
-
def clear_callbacks(self, task_id: int):
|
|
409
|
-
"""Unregister a callback for a task"""
|
|
410
|
-
try:
|
|
411
|
-
del self._callbacks[task_id]
|
|
412
|
-
except KeyError:
|
|
413
|
-
pass
|
|
414
|
-
|
|
415
|
-
def get_callbacks(self, task_id: int) -> set[str]:
|
|
416
|
-
"""Get the callbacks for a task"""
|
|
417
|
-
return self._callbacks[task_id]
|
|
418
|
-
|
|
419
257
|
|
|
420
258
|
class KojiConfig(BaseModel):
|
|
421
259
|
"""Model for Koji configuration of logdetective server."""
|
|
@@ -424,23 +262,26 @@ class KojiConfig(BaseModel):
|
|
|
424
262
|
analysis_timeout: int = 15
|
|
425
263
|
max_artifact_size: int = 300 * 1024 * 1024
|
|
426
264
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
# Handle max_artifact_size with default 300
|
|
436
|
-
self.max_artifact_size = data.get("max_artifact_size", 300) * 1024 * 1024
|
|
265
|
+
@field_validator("max_artifact_size", mode="before")
|
|
266
|
+
@classmethod
|
|
267
|
+
def megabytes_to_bytes(cls, v: Any):
|
|
268
|
+
"""Convert max_artifact_size from megabytes to bytes."""
|
|
269
|
+
if isinstance(v, int):
|
|
270
|
+
return v * 1024 * 1024
|
|
271
|
+
return 300 * 1024 * 1024
|
|
437
272
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
273
|
+
@model_validator(mode="before")
|
|
274
|
+
@classmethod
|
|
275
|
+
def set_koji_instance_configs(cls, data: Any):
|
|
276
|
+
"""Initialize configuration for each Koji instance."""
|
|
277
|
+
if isinstance(data, dict):
|
|
278
|
+
instances = {}
|
|
279
|
+
for instance_name, instance_data in data.get("instances", {}).items():
|
|
280
|
+
instances[instance_name] = KojiInstanceConfig(
|
|
281
|
+
name=instance_name, **instance_data
|
|
282
|
+
)
|
|
283
|
+
data["instances"] = instances
|
|
284
|
+
return data
|
|
444
285
|
|
|
445
286
|
|
|
446
287
|
class LogConfig(BaseModel):
|
|
@@ -452,17 +293,6 @@ class LogConfig(BaseModel):
|
|
|
452
293
|
path: str | None = None
|
|
453
294
|
format: str = BASIC_FORMAT
|
|
454
295
|
|
|
455
|
-
def __init__(self, data: Optional[dict] = None):
|
|
456
|
-
super().__init__()
|
|
457
|
-
if data is None:
|
|
458
|
-
return
|
|
459
|
-
|
|
460
|
-
self.name = data.get("name", "logdetective")
|
|
461
|
-
self.level_stream = data.get("level_stream", "INFO").upper()
|
|
462
|
-
self.level_file = data.get("level_file", "INFO").upper()
|
|
463
|
-
self.path = data.get("path")
|
|
464
|
-
self.format = data.get("format", BASIC_FORMAT)
|
|
465
|
-
|
|
466
296
|
|
|
467
297
|
class GeneralConfig(BaseModel):
|
|
468
298
|
"""General config options for Log Detective"""
|
|
@@ -474,50 +304,27 @@ class GeneralConfig(BaseModel):
|
|
|
474
304
|
collect_emojis_interval: int = 60 * 60 # seconds
|
|
475
305
|
top_k_snippets: int = 0
|
|
476
306
|
|
|
477
|
-
def __init__(self, data: Optional[dict] = None):
|
|
478
|
-
super().__init__()
|
|
479
|
-
if data is None:
|
|
480
|
-
return
|
|
481
|
-
|
|
482
|
-
self.packages = data.get("packages", [])
|
|
483
|
-
self.excluded_packages = data.get("excluded_packages", [])
|
|
484
|
-
self.devmode = data.get("devmode", False)
|
|
485
|
-
self.sentry_dsn = data.get("sentry_dsn")
|
|
486
|
-
self.collect_emojis_interval = data.get(
|
|
487
|
-
"collect_emojis_interval", 60 * 60
|
|
488
|
-
) # seconds
|
|
489
|
-
self.top_k_snippets = data.get("top_k_snippets", 0)
|
|
490
|
-
|
|
491
307
|
|
|
492
308
|
class Config(BaseModel):
|
|
493
309
|
"""Model for configuration of logdetective server."""
|
|
494
310
|
|
|
495
|
-
log: LogConfig = LogConfig
|
|
496
|
-
inference: InferenceConfig = InferenceConfig
|
|
497
|
-
snippet_inference: InferenceConfig = InferenceConfig
|
|
311
|
+
log: LogConfig = Field(default_factory=LogConfig)
|
|
312
|
+
inference: InferenceConfig = Field(default_factory=InferenceConfig)
|
|
313
|
+
snippet_inference: InferenceConfig = Field(default_factory=InferenceConfig)
|
|
498
314
|
# TODO(jpodivin): Extend to work with multiple extractor configs
|
|
499
|
-
extractor: ExtractorConfig = ExtractorConfig
|
|
500
|
-
gitlab: GitLabConfig = GitLabConfig
|
|
501
|
-
koji: KojiConfig = KojiConfig
|
|
502
|
-
general: GeneralConfig = GeneralConfig
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
if
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
self.extractor = ExtractorConfig(data.get("extractor"))
|
|
513
|
-
self.gitlab = GitLabConfig(data.get("gitlab"))
|
|
514
|
-
self.koji = KojiConfig(data.get("koji"))
|
|
515
|
-
self.general = GeneralConfig(data.get("general"))
|
|
516
|
-
|
|
517
|
-
if snippet_inference := data.get("snippet_inference", None):
|
|
518
|
-
self.snippet_inference = InferenceConfig(snippet_inference)
|
|
519
|
-
else:
|
|
520
|
-
self.snippet_inference = self.inference
|
|
315
|
+
extractor: ExtractorConfig = Field(default_factory=ExtractorConfig)
|
|
316
|
+
gitlab: GitLabConfig = Field(default_factory=GitLabConfig)
|
|
317
|
+
koji: KojiConfig = Field(default_factory=KojiConfig)
|
|
318
|
+
general: GeneralConfig = Field(default_factory=GeneralConfig)
|
|
319
|
+
|
|
320
|
+
@model_validator(mode="before")
|
|
321
|
+
@classmethod
|
|
322
|
+
def default_snippet_inference(cls, data: Any):
|
|
323
|
+
"""Use base inference configuration, if specific snippet configuration isn't provided."""
|
|
324
|
+
if isinstance(data, dict):
|
|
325
|
+
if "snippet_inference" not in data and "inference" in data:
|
|
326
|
+
data["snippet_inference"] = data["inference"]
|
|
327
|
+
return data
|
|
521
328
|
|
|
522
329
|
|
|
523
330
|
class TimePeriod(BaseModel):
|