logdetective 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,6 @@
1
- import asyncio
2
- from collections import defaultdict
3
1
  import datetime
4
2
  from logging import BASIC_FORMAT
5
- from typing import List, Dict, Optional
3
+ from typing import List, Dict, Optional, Any
6
4
  from pydantic import (
7
5
  BaseModel,
8
6
  Field,
@@ -10,14 +8,8 @@ from pydantic import (
10
8
  field_validator,
11
9
  NonNegativeFloat,
12
10
  HttpUrl,
13
- PrivateAttr,
14
11
  )
15
12
 
16
- import aiohttp
17
-
18
- from aiolimiter import AsyncLimiter
19
- from gitlab import Gitlab
20
- import koji
21
13
 
22
14
  from logdetective.constants import (
23
15
  DEFAULT_TEMPERATURE,
@@ -26,8 +18,6 @@ from logdetective.constants import (
26
18
  SYSTEM_ROLE_DEFAULT,
27
19
  USER_ROLE_DEFAULT,
28
20
  )
29
-
30
- from logdetective.extractors import Extractor, DrainExtractor, CSGrepExtractor
31
21
  from logdetective.utils import check_csgrep
32
22
 
33
23
 
@@ -177,40 +167,14 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
177
167
  # OpenAI client library requires a string to be specified for API token
178
168
  # even if it is not checked on the server side
179
169
  api_token: str = "None"
180
- model: str = ""
170
+ model: str = "default-model"
181
171
  temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
182
172
  max_queue_size: int = LLM_DEFAULT_MAX_QUEUE_SIZE
183
173
  http_timeout: float = 5.0
184
174
  user_role: str = USER_ROLE_DEFAULT
185
175
  system_role: str = SYSTEM_ROLE_DEFAULT
186
176
  llm_api_timeout: float = 15.0
187
- _limiter: AsyncLimiter = PrivateAttr(
188
- default_factory=lambda: AsyncLimiter(LLM_DEFAULT_REQUESTS_PER_MINUTE))
189
-
190
- def __init__(self, data: Optional[dict] = None):
191
- super().__init__()
192
- if data is None:
193
- return
194
-
195
- self.max_tokens = data.get("max_tokens", -1)
196
- self.log_probs = data.get("log_probs", True)
197
- self.url = data.get("url", "")
198
- self.http_timeout = data.get("http_timeout", 5.0)
199
- self.api_token = data.get("api_token", "None")
200
- self.model = data.get("model", "default-model")
201
- self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
202
- self.max_queue_size = data.get("max_queue_size", LLM_DEFAULT_MAX_QUEUE_SIZE)
203
- self.user_role = data.get("user_role", USER_ROLE_DEFAULT)
204
- self.system_role = data.get("system_role", SYSTEM_ROLE_DEFAULT)
205
- self._requests_per_minute = data.get(
206
- "requests_per_minute", LLM_DEFAULT_REQUESTS_PER_MINUTE
207
- )
208
- self.llm_api_timeout = data.get("llm_api_timeout", 15.0)
209
- self._limiter = AsyncLimiter(self._requests_per_minute)
210
-
211
- def get_limiter(self):
212
- """Return the limiter object so it can be used as a context manager"""
213
- return self._limiter
177
+ requests_per_minute: int = LLM_DEFAULT_REQUESTS_PER_MINUTE
214
178
 
215
179
 
216
180
  class ExtractorConfig(BaseModel):
@@ -221,64 +185,25 @@ class ExtractorConfig(BaseModel):
221
185
  max_snippet_len: int = 2000
222
186
  csgrep: bool = False
223
187
 
224
- _extractors: List[Extractor] = PrivateAttr(default_factory=list)
225
-
226
- def _setup_extractors(self):
227
- """Initialize extractors with common settings."""
228
- self._extractors = [
229
- DrainExtractor(
230
- verbose=self.verbose,
231
- max_snippet_len=self.max_snippet_len,
232
- max_clusters=self.max_clusters,
233
- )
234
- ]
235
-
236
- if self.csgrep:
237
- self._extractors.append(
238
- CSGrepExtractor(
239
- verbose=self.verbose,
240
- max_snippet_len=self.max_snippet_len,
241
- )
242
- )
243
-
244
- def __init__(self, data: Optional[dict] = None):
245
- super().__init__(data=data)
246
-
247
- if data is None:
248
- self._setup_extractors()
249
- return
250
-
251
- self.max_clusters = data.get("max_clusters", 8)
252
- self.verbose = data.get("verbose", False)
253
- self.max_snippet_len = data.get("max_snippet_len", 2000)
254
- self.csgrep = data.get("csgrep", False)
255
-
256
- self._setup_extractors()
257
-
258
- def get_extractors(self) -> List[Extractor]:
259
- """Return list of initialized extractors, each will be applied in turn
260
- on original log text to retrieve snippets."""
261
- return self._extractors
262
-
263
- @field_validator("csgrep", mode="after")
188
+ @field_validator("csgrep", mode="before")
264
189
  @classmethod
265
- def validate_csgrep(cls, value: bool) -> bool:
266
- """Verify that csgrep is available if requested."""
267
- if not check_csgrep():
190
+ def verify_csgrep(cls, v: bool):
191
+ """Verify presence of csgrep binary if csgrep extractor is requested."""
192
+ if v and not check_csgrep():
268
193
  raise ValueError(
269
194
  "Requested csgrep extractor but `csgrep` binary is not in the PATH"
270
195
  )
271
- return value
196
+ return v
272
197
 
273
198
 
274
199
  class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attributes
275
200
  """Model for GitLab configuration of logdetective server."""
276
201
 
277
- name: str = None
278
- url: str = None
202
+ name: str
203
+ url: str = "https://gitlab.com"
279
204
  # Path to API of the gitlab instance, assuming `url` as prefix.
280
- api_path: str = None
281
- api_token: str = None
205
+ api_path: str = "/api/v4"
206
+ api_token: Optional[str] = None
282
207
 
283
208
  # This is a list to support key rotation.
284
209
  # When the key is being changed, we will add the new key as a new entry in
@@ -289,69 +214,17 @@ class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attr
289
214
  webhook_secrets: Optional[List[str]] = None
290
215
 
291
216
  timeout: float = 5.0
292
- _conn: Gitlab | None = PrivateAttr(default=None)
293
- _http_session: aiohttp.ClientSession | None = PrivateAttr(default=None)
294
217
 
295
218
  # Maximum size of artifacts.zip in MiB. (default: 300 MiB)
296
219
  max_artifact_size: int = 300 * 1024 * 1024
297
220
 
298
- def __init__(self, name: str, data: Optional[dict] = None):
299
- super().__init__()
300
- if data is None:
301
- return
302
-
303
- self.name = name
304
- self.url = data.get("url", "https://gitlab.com")
305
- self.api_path = data.get("api_path", "/api/v4")
306
- self.api_token = data.get("api_token", None)
307
- self.webhook_secrets = data.get("webhook_secrets", None)
308
- self.max_artifact_size = int(data.get("max_artifact_size", 300)) * 1024 * 1024
309
-
310
- self.timeout = data.get("timeout", 5.0)
311
- self._conn = Gitlab(
312
- url=self.url,
313
- private_token=self.api_token,
314
- timeout=self.timeout,
315
- )
316
-
317
- def get_connection(self):
318
- """Get the Gitlab connection object"""
319
- return self._conn
320
-
321
- def get_http_session(self):
322
- """Return the internal HTTP session so it can be used to contect the
323
- Gitlab server. May be used as a context manager."""
324
-
325
- # Create the session on the first attempt. We need to do this "lazily"
326
- # because it needs to happen once the event loop is running, even
327
- # though the initialization itself is synchronous.
328
- if not self._http_session:
329
- self._http_session = aiohttp.ClientSession(
330
- base_url=self.url,
331
- headers={"Authorization": f"Bearer {self.api_token}"},
332
- timeout=aiohttp.ClientTimeout(
333
- total=self.timeout,
334
- connect=3.07,
335
- ),
336
- )
337
-
338
- return self._http_session
339
-
340
- def __del__(self):
341
- # Close connection when this object is destroyed
342
- if self._http_session:
343
- try:
344
- loop = asyncio.get_running_loop()
345
- loop.create_task(self._http_session.close())
346
- except RuntimeError:
347
- # No loop running, so create one to close the session
348
- loop = asyncio.new_event_loop()
349
- loop.run_until_complete(self._http_session.close())
350
- loop.close()
351
- except Exception: # pylint: disable=broad-exception-caught
352
- # We should only get here if we're shutting down, so we don't
353
- # really care if the close() completes cleanly.
354
- pass
221
+ @field_validator("max_artifact_size", mode="before")
222
+ @classmethod
223
+ def megabytes_to_bytes(cls, v: Any):
224
+ """Convert max_artifact_size from megabytes to bytes."""
225
+ if isinstance(v, int):
226
+ return v * 1024 * 1024
227
+ return 300 * 1024 * 1024
355
228
 
356
229
 
357
230
  class GitLabConfig(BaseModel):
@@ -359,63 +232,28 @@ class GitLabConfig(BaseModel):
359
232
 
360
233
  instances: Dict[str, GitLabInstanceConfig] = {}
361
234
 
362
- def __init__(self, data: Optional[dict] = None):
363
- super().__init__()
364
- if data is None:
365
- return
235
+ @model_validator(mode="before")
236
+ @classmethod
237
+ def set_gitlab_instance_configs(cls, data: Any):
238
+ """Initialize configuration for each GitLab instance"""
239
+ if not isinstance(data, dict):
240
+ return data
366
241
 
242
+ instances = {}
367
243
  for instance_name, instance_data in data.items():
368
- instance = GitLabInstanceConfig(instance_name, instance_data)
369
- self.instances[instance.url] = instance
244
+ instance = GitLabInstanceConfig(name=instance_name, **instance_data)
245
+ instances[instance.url] = instance
246
+
247
+ return {"instances": instances}
370
248
 
371
249
 
372
250
  class KojiInstanceConfig(BaseModel):
373
251
  """Model for Koji configuration of logdetective server."""
374
252
 
375
253
  name: str = ""
376
- xmlrpc_url: str = ""
254
+ xmlrpc_url: str = "https://koji.fedoraproject.org/kojihub"
377
255
  tokens: List[str] = []
378
256
 
379
- _conn: Optional[koji.ClientSession] = PrivateAttr(default=None)
380
- _callbacks: defaultdict[int, set[str]] = PrivateAttr(default_factory=lambda: defaultdict(set))
381
-
382
- def __init__(self, name: str, data: Optional[dict] = None):
383
- super().__init__()
384
-
385
- self.name = name
386
- if data is None:
387
- # Set some reasonable defaults
388
- self.xmlrpc_url = "https://koji.fedoraproject.org/kojihub"
389
- self.tokens = []
390
- self.max_artifact_size = 1024 * 1024
391
- return
392
-
393
- self.xmlrpc_url = data.get(
394
- "xmlrpc_url", "https://koji.fedoraproject.org/kojihub"
395
- )
396
- self.tokens = data.get("tokens", [])
397
-
398
- def get_connection(self):
399
- """Get the Koji connection object"""
400
- if not self._conn:
401
- self._conn = koji.ClientSession(self.xmlrpc_url)
402
- return self._conn
403
-
404
- def register_callback(self, task_id: int, callback: str):
405
- """Register a callback for a task"""
406
- self._callbacks[task_id].add(callback)
407
-
408
- def clear_callbacks(self, task_id: int):
409
- """Unregister a callback for a task"""
410
- try:
411
- del self._callbacks[task_id]
412
- except KeyError:
413
- pass
414
-
415
- def get_callbacks(self, task_id: int) -> set[str]:
416
- """Get the callbacks for a task"""
417
- return self._callbacks[task_id]
418
-
419
257
 
420
258
  class KojiConfig(BaseModel):
421
259
  """Model for Koji configuration of logdetective server."""
@@ -424,23 +262,26 @@ class KojiConfig(BaseModel):
424
262
  analysis_timeout: int = 15
425
263
  max_artifact_size: int = 300 * 1024 * 1024
426
264
 
427
- def __init__(self, data: Optional[dict] = None):
428
- super().__init__()
429
- if data is None:
430
- return
431
-
432
- # Handle analysis_timeout with default 15
433
- self.analysis_timeout = data.get("analysis_timeout", 15)
434
-
435
- # Handle max_artifact_size with default 300
436
- self.max_artifact_size = data.get("max_artifact_size", 300) * 1024 * 1024
265
+ @field_validator("max_artifact_size", mode="before")
266
+ @classmethod
267
+ def megabytes_to_bytes(cls, v: Any):
268
+ """Convert max_artifact_size from megabytes to bytes."""
269
+ if isinstance(v, int):
270
+ return v * 1024 * 1024
271
+ return 300 * 1024 * 1024
437
272
 
438
- # Handle instances dictionary
439
- instances_data = data.get("instances", {})
440
- for instance_name, instance_data in instances_data.items():
441
- self.instances[instance_name] = KojiInstanceConfig(
442
- instance_name, instance_data
443
- )
273
+ @model_validator(mode="before")
274
+ @classmethod
275
+ def set_koji_instance_configs(cls, data: Any):
276
+ """Initialize configuration for each Koji instance."""
277
+ if isinstance(data, dict):
278
+ instances = {}
279
+ for instance_name, instance_data in data.get("instances", {}).items():
280
+ instances[instance_name] = KojiInstanceConfig(
281
+ name=instance_name, **instance_data
282
+ )
283
+ data["instances"] = instances
284
+ return data
444
285
 
445
286
 
446
287
  class LogConfig(BaseModel):
@@ -452,17 +293,6 @@ class LogConfig(BaseModel):
452
293
  path: str | None = None
453
294
  format: str = BASIC_FORMAT
454
295
 
455
- def __init__(self, data: Optional[dict] = None):
456
- super().__init__()
457
- if data is None:
458
- return
459
-
460
- self.name = data.get("name", "logdetective")
461
- self.level_stream = data.get("level_stream", "INFO").upper()
462
- self.level_file = data.get("level_file", "INFO").upper()
463
- self.path = data.get("path")
464
- self.format = data.get("format", BASIC_FORMAT)
465
-
466
296
 
467
297
  class GeneralConfig(BaseModel):
468
298
  """General config options for Log Detective"""
@@ -474,50 +304,27 @@ class GeneralConfig(BaseModel):
474
304
  collect_emojis_interval: int = 60 * 60 # seconds
475
305
  top_k_snippets: int = 0
476
306
 
477
- def __init__(self, data: Optional[dict] = None):
478
- super().__init__()
479
- if data is None:
480
- return
481
-
482
- self.packages = data.get("packages", [])
483
- self.excluded_packages = data.get("excluded_packages", [])
484
- self.devmode = data.get("devmode", False)
485
- self.sentry_dsn = data.get("sentry_dsn")
486
- self.collect_emojis_interval = data.get(
487
- "collect_emojis_interval", 60 * 60
488
- ) # seconds
489
- self.top_k_snippets = data.get("top_k_snippets", 0)
490
-
491
307
 
492
308
  class Config(BaseModel):
493
309
  """Model for configuration of logdetective server."""
494
310
 
495
- log: LogConfig = LogConfig()
496
- inference: InferenceConfig = InferenceConfig()
497
- snippet_inference: InferenceConfig = InferenceConfig()
311
+ log: LogConfig = Field(default_factory=LogConfig)
312
+ inference: InferenceConfig = Field(default_factory=InferenceConfig)
313
+ snippet_inference: InferenceConfig = Field(default_factory=InferenceConfig)
498
314
  # TODO(jpodivin): Extend to work with multiple extractor configs
499
- extractor: ExtractorConfig = ExtractorConfig()
500
- gitlab: GitLabConfig = GitLabConfig()
501
- koji: KojiConfig = KojiConfig()
502
- general: GeneralConfig = GeneralConfig()
503
-
504
- def __init__(self, data: Optional[dict] = None):
505
- super().__init__()
506
-
507
- if data is None:
508
- return
509
-
510
- self.log = LogConfig(data.get("log"))
511
- self.inference = InferenceConfig(data.get("inference"))
512
- self.extractor = ExtractorConfig(data.get("extractor"))
513
- self.gitlab = GitLabConfig(data.get("gitlab"))
514
- self.koji = KojiConfig(data.get("koji"))
515
- self.general = GeneralConfig(data.get("general"))
516
-
517
- if snippet_inference := data.get("snippet_inference", None):
518
- self.snippet_inference = InferenceConfig(snippet_inference)
519
- else:
520
- self.snippet_inference = self.inference
315
+ extractor: ExtractorConfig = Field(default_factory=ExtractorConfig)
316
+ gitlab: GitLabConfig = Field(default_factory=GitLabConfig)
317
+ koji: KojiConfig = Field(default_factory=KojiConfig)
318
+ general: GeneralConfig = Field(default_factory=GeneralConfig)
319
+
320
+ @model_validator(mode="before")
321
+ @classmethod
322
+ def default_snippet_inference(cls, data: Any):
323
+ """Use base inference configuration, if specific snippet configuration isn't provided."""
324
+ if isinstance(data, dict):
325
+ if "snippet_inference" not in data and "inference" in data:
326
+ data["snippet_inference"] = data["inference"]
327
+ return data
521
328
 
522
329
 
523
330
  class TimePeriod(BaseModel):