logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/constants.py +33 -12
- logdetective/extractors.py +137 -68
- logdetective/logdetective.py +102 -33
- logdetective/models.py +99 -0
- logdetective/prompts-summary-first.yml +20 -0
- logdetective/prompts-summary-only.yml +13 -0
- logdetective/prompts.yml +90 -0
- logdetective/remote_log.py +67 -0
- logdetective/server/compressors.py +186 -0
- logdetective/server/config.py +78 -0
- logdetective/server/database/base.py +34 -26
- logdetective/server/database/models/__init__.py +33 -0
- logdetective/server/database/models/exceptions.py +17 -0
- logdetective/server/database/models/koji.py +143 -0
- logdetective/server/database/models/merge_request_jobs.py +623 -0
- logdetective/server/database/models/metrics.py +427 -0
- logdetective/server/emoji.py +148 -0
- logdetective/server/exceptions.py +37 -0
- logdetective/server/gitlab.py +451 -0
- logdetective/server/koji.py +159 -0
- logdetective/server/llm.py +309 -0
- logdetective/server/metric.py +75 -30
- logdetective/server/models.py +426 -23
- logdetective/server/plot.py +432 -0
- logdetective/server/server.py +580 -468
- logdetective/server/templates/base_response.html.j2 +59 -0
- logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
- logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
- logdetective/server/utils.py +98 -32
- logdetective/skip_snippets.yml +12 -0
- logdetective/utils.py +187 -73
- logdetective-2.11.0.dist-info/METADATA +568 -0
- logdetective-2.11.0.dist-info/RECORD +40 -0
- {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
- logdetective/server/database/models.py +0 -88
- logdetective-0.4.0.dist-info/METADATA +0 -333
- logdetective-0.4.0.dist-info/RECORD +0 -19
- {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
- {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
logdetective/server/models.py
CHANGED
|
@@ -1,6 +1,34 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
import datetime
|
|
1
4
|
from logging import BASIC_FORMAT
|
|
2
|
-
from typing import List, Dict, Optional
|
|
3
|
-
from pydantic import
|
|
5
|
+
from typing import List, Dict, Optional
|
|
6
|
+
from pydantic import (
|
|
7
|
+
BaseModel,
|
|
8
|
+
Field,
|
|
9
|
+
model_validator,
|
|
10
|
+
field_validator,
|
|
11
|
+
NonNegativeFloat,
|
|
12
|
+
HttpUrl,
|
|
13
|
+
PrivateAttr,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
import aiohttp
|
|
17
|
+
|
|
18
|
+
from aiolimiter import AsyncLimiter
|
|
19
|
+
from gitlab import Gitlab
|
|
20
|
+
import koji
|
|
21
|
+
|
|
22
|
+
from logdetective.constants import (
|
|
23
|
+
DEFAULT_TEMPERATURE,
|
|
24
|
+
LLM_DEFAULT_MAX_QUEUE_SIZE,
|
|
25
|
+
LLM_DEFAULT_REQUESTS_PER_MINUTE,
|
|
26
|
+
SYSTEM_ROLE_DEFAULT,
|
|
27
|
+
USER_ROLE_DEFAULT,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from logdetective.extractors import Extractor, DrainExtractor, CSGrepExtractor
|
|
31
|
+
from logdetective.utils import check_csgrep
|
|
4
32
|
|
|
5
33
|
|
|
6
34
|
class BuildLog(BaseModel):
|
|
@@ -21,7 +49,7 @@ class JobHook(BaseModel):
|
|
|
21
49
|
|
|
22
50
|
# The identifier of the job. We only care about 'build_rpm' and
|
|
23
51
|
# 'build_centos_stream_rpm' jobs.
|
|
24
|
-
build_name: str = Field(pattern=r"^build
|
|
52
|
+
build_name: str = Field(pattern=r"^build.*rpm$")
|
|
25
53
|
|
|
26
54
|
# A string representing the job status. We only care about 'failed' jobs.
|
|
27
55
|
build_status: str = Field(pattern=r"^failed$")
|
|
@@ -37,6 +65,51 @@ class JobHook(BaseModel):
|
|
|
37
65
|
project_id: int
|
|
38
66
|
|
|
39
67
|
|
|
68
|
+
class EmojiMergeRequest(BaseModel):
|
|
69
|
+
"""Model of the 'merge_request' subsection of Emoji webhook messages.
|
|
70
|
+
This model implements only the fields that we care about. The webhook
|
|
71
|
+
sends many more fields that we will ignore."""
|
|
72
|
+
|
|
73
|
+
# The identifier of the target project
|
|
74
|
+
target_project_id: int
|
|
75
|
+
|
|
76
|
+
# The internal identifier (relative to the target project)
|
|
77
|
+
iid: int
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class EmojiHook(BaseModel):
|
|
81
|
+
"""Model of Job Hook events sent from GitLab.
|
|
82
|
+
Full details of the specification are available at
|
|
83
|
+
https://docs.gitlab.com/user/project/integrations/webhook_events/#job-events
|
|
84
|
+
This model implements only the fields that we care about. The webhook
|
|
85
|
+
sends many more fields that we will ignore."""
|
|
86
|
+
|
|
87
|
+
# The kind of webhook message. We are only interested in 'emoji' messages
|
|
88
|
+
# which represents awarding or revoking emoji reactions on notes.
|
|
89
|
+
object_kind: str = Field(pattern=r"^emoji$")
|
|
90
|
+
|
|
91
|
+
# Information about the merge request this emoji applies to, if any.
|
|
92
|
+
merge_request: EmojiMergeRequest = Field(default=None)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class SnippetAnalysis(BaseModel):
|
|
96
|
+
"""Model of snippet analysis from LLM."""
|
|
97
|
+
|
|
98
|
+
text: str = Field(description="Analysis of log snippet contents.")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class RatedSnippetAnalysis(SnippetAnalysis):
|
|
102
|
+
"""Model for rated snippet analysis. This model is used to generate
|
|
103
|
+
json schema for inference with structured output."""
|
|
104
|
+
|
|
105
|
+
relevance: int = Field(
|
|
106
|
+
ge=0,
|
|
107
|
+
le=100,
|
|
108
|
+
description="Estimate of likelyhood that snippet contains an error, "
|
|
109
|
+
"with 0 standing for completely unlikely, 100 for absolutely certain.",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
40
113
|
class Explanation(BaseModel):
|
|
41
114
|
"""Model of snippet or general log explanation from Log Detective"""
|
|
42
115
|
|
|
@@ -44,6 +117,7 @@ class Explanation(BaseModel):
|
|
|
44
117
|
logprobs: Optional[List[Dict]] = None
|
|
45
118
|
|
|
46
119
|
def __str__(self):
|
|
120
|
+
"""Return text of the Explanation"""
|
|
47
121
|
return self.text
|
|
48
122
|
|
|
49
123
|
|
|
@@ -54,7 +128,8 @@ class AnalyzedSnippet(BaseModel):
|
|
|
54
128
|
text: original snippet text
|
|
55
129
|
line_number: location of snippet in original log
|
|
56
130
|
"""
|
|
57
|
-
|
|
131
|
+
|
|
132
|
+
explanation: SnippetAnalysis | RatedSnippetAnalysis
|
|
58
133
|
text: str
|
|
59
134
|
line_number: int
|
|
60
135
|
|
|
@@ -82,14 +157,35 @@ class StagedResponse(Response):
|
|
|
82
157
|
snippets: List[AnalyzedSnippet]
|
|
83
158
|
|
|
84
159
|
|
|
85
|
-
class
|
|
160
|
+
class KojiStagedResponse(BaseModel):
|
|
161
|
+
"""Model of data returned by Log Detective API when called when a Koji build
|
|
162
|
+
analysis is requested. Contains list of reponses to prompts for individual
|
|
163
|
+
snippets.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
task_id: int
|
|
167
|
+
log_file_name: str
|
|
168
|
+
response: StagedResponse
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attributes
|
|
86
172
|
"""Model for inference configuration of logdetective server."""
|
|
87
173
|
|
|
88
174
|
max_tokens: int = -1
|
|
89
|
-
log_probs:
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
175
|
+
log_probs: bool = True
|
|
176
|
+
url: str = ""
|
|
177
|
+
# OpenAI client library requires a string to be specified for API token
|
|
178
|
+
# even if it is not checked on the server side
|
|
179
|
+
api_token: str = "None"
|
|
180
|
+
model: str = ""
|
|
181
|
+
temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
|
|
182
|
+
max_queue_size: int = LLM_DEFAULT_MAX_QUEUE_SIZE
|
|
183
|
+
http_timeout: float = 5.0
|
|
184
|
+
user_role: str = USER_ROLE_DEFAULT
|
|
185
|
+
system_role: str = SYSTEM_ROLE_DEFAULT
|
|
186
|
+
llm_api_timeout: float = 15.0
|
|
187
|
+
_limiter: AsyncLimiter = PrivateAttr(
|
|
188
|
+
default_factory=lambda: AsyncLimiter(LLM_DEFAULT_REQUESTS_PER_MINUTE))
|
|
93
189
|
|
|
94
190
|
def __init__(self, data: Optional[dict] = None):
|
|
95
191
|
super().__init__()
|
|
@@ -97,53 +193,262 @@ class InferenceConfig(BaseModel):
|
|
|
97
193
|
return
|
|
98
194
|
|
|
99
195
|
self.max_tokens = data.get("max_tokens", -1)
|
|
100
|
-
self.log_probs = data.get("log_probs",
|
|
101
|
-
self.
|
|
196
|
+
self.log_probs = data.get("log_probs", True)
|
|
197
|
+
self.url = data.get("url", "")
|
|
198
|
+
self.http_timeout = data.get("http_timeout", 5.0)
|
|
199
|
+
self.api_token = data.get("api_token", "None")
|
|
200
|
+
self.model = data.get("model", "default-model")
|
|
201
|
+
self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
|
|
202
|
+
self.max_queue_size = data.get("max_queue_size", LLM_DEFAULT_MAX_QUEUE_SIZE)
|
|
203
|
+
self.user_role = data.get("user_role", USER_ROLE_DEFAULT)
|
|
204
|
+
self.system_role = data.get("system_role", SYSTEM_ROLE_DEFAULT)
|
|
205
|
+
self._requests_per_minute = data.get(
|
|
206
|
+
"requests_per_minute", LLM_DEFAULT_REQUESTS_PER_MINUTE
|
|
207
|
+
)
|
|
208
|
+
self.llm_api_timeout = data.get("llm_api_timeout", 15.0)
|
|
209
|
+
self._limiter = AsyncLimiter(self._requests_per_minute)
|
|
210
|
+
|
|
211
|
+
def get_limiter(self):
|
|
212
|
+
"""Return the limiter object so it can be used as a context manager"""
|
|
213
|
+
return self._limiter
|
|
102
214
|
|
|
103
215
|
|
|
104
216
|
class ExtractorConfig(BaseModel):
|
|
105
217
|
"""Model for extractor configuration of logdetective server."""
|
|
106
218
|
|
|
107
|
-
context: bool = True
|
|
108
219
|
max_clusters: int = 8
|
|
109
220
|
verbose: bool = False
|
|
221
|
+
max_snippet_len: int = 2000
|
|
222
|
+
csgrep: bool = False
|
|
223
|
+
|
|
224
|
+
_extractors: List[Extractor] = PrivateAttr(default_factory=list)
|
|
225
|
+
|
|
226
|
+
def _setup_extractors(self):
|
|
227
|
+
"""Initialize extractors with common settings."""
|
|
228
|
+
self._extractors = [
|
|
229
|
+
DrainExtractor(
|
|
230
|
+
verbose=self.verbose,
|
|
231
|
+
max_snippet_len=self.max_snippet_len,
|
|
232
|
+
max_clusters=self.max_clusters,
|
|
233
|
+
)
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
if self.csgrep:
|
|
237
|
+
self._extractors.append(
|
|
238
|
+
CSGrepExtractor(
|
|
239
|
+
verbose=self.verbose,
|
|
240
|
+
max_snippet_len=self.max_snippet_len,
|
|
241
|
+
)
|
|
242
|
+
)
|
|
110
243
|
|
|
111
244
|
def __init__(self, data: Optional[dict] = None):
|
|
112
|
-
super().__init__()
|
|
245
|
+
super().__init__(data=data)
|
|
246
|
+
|
|
113
247
|
if data is None:
|
|
248
|
+
self._setup_extractors()
|
|
114
249
|
return
|
|
115
250
|
|
|
116
|
-
self.context = data.get("context", True)
|
|
117
251
|
self.max_clusters = data.get("max_clusters", 8)
|
|
118
252
|
self.verbose = data.get("verbose", False)
|
|
253
|
+
self.max_snippet_len = data.get("max_snippet_len", 2000)
|
|
254
|
+
self.csgrep = data.get("csgrep", False)
|
|
119
255
|
|
|
256
|
+
self._setup_extractors()
|
|
120
257
|
|
|
121
|
-
|
|
258
|
+
def get_extractors(self) -> List[Extractor]:
|
|
259
|
+
"""Return list of initialized extractors, each will be applied in turn
|
|
260
|
+
on original log text to retrieve snippets."""
|
|
261
|
+
return self._extractors
|
|
262
|
+
|
|
263
|
+
@field_validator("csgrep", mode="after")
|
|
264
|
+
@classmethod
|
|
265
|
+
def validate_csgrep(cls, value: bool) -> bool:
|
|
266
|
+
"""Verify that csgrep is available if requested."""
|
|
267
|
+
if not check_csgrep():
|
|
268
|
+
raise ValueError(
|
|
269
|
+
"Requested csgrep extractor but `csgrep` binary is not in the PATH"
|
|
270
|
+
)
|
|
271
|
+
return value
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attributes
|
|
122
275
|
"""Model for GitLab configuration of logdetective server."""
|
|
123
276
|
|
|
277
|
+
name: str = None
|
|
124
278
|
url: str = None
|
|
125
|
-
|
|
279
|
+
# Path to API of the gitlab instance, assuming `url` as prefix.
|
|
280
|
+
api_path: str = None
|
|
126
281
|
api_token: str = None
|
|
127
282
|
|
|
283
|
+
# This is a list to support key rotation.
|
|
284
|
+
# When the key is being changed, we will add the new key as a new entry in
|
|
285
|
+
# the configuration and then remove the old key once all of the client
|
|
286
|
+
# webhook configurations have been updated.
|
|
287
|
+
# If this option is left empty or unspecified, all requests will be
|
|
288
|
+
# considered authorized.
|
|
289
|
+
webhook_secrets: Optional[List[str]] = None
|
|
290
|
+
|
|
291
|
+
timeout: float = 5.0
|
|
292
|
+
_conn: Gitlab | None = PrivateAttr(default=None)
|
|
293
|
+
_http_session: aiohttp.ClientSession | None = PrivateAttr(default=None)
|
|
294
|
+
|
|
128
295
|
# Maximum size of artifacts.zip in MiB. (default: 300 MiB)
|
|
129
|
-
max_artifact_size: int = 300
|
|
296
|
+
max_artifact_size: int = 300 * 1024 * 1024
|
|
130
297
|
|
|
131
|
-
def __init__(self, data: Optional[dict] = None):
|
|
298
|
+
def __init__(self, name: str, data: Optional[dict] = None):
|
|
132
299
|
super().__init__()
|
|
133
300
|
if data is None:
|
|
134
301
|
return
|
|
135
302
|
|
|
303
|
+
self.name = name
|
|
136
304
|
self.url = data.get("url", "https://gitlab.com")
|
|
137
|
-
self.
|
|
305
|
+
self.api_path = data.get("api_path", "/api/v4")
|
|
138
306
|
self.api_token = data.get("api_token", None)
|
|
139
|
-
self.
|
|
307
|
+
self.webhook_secrets = data.get("webhook_secrets", None)
|
|
308
|
+
self.max_artifact_size = int(data.get("max_artifact_size", 300)) * 1024 * 1024
|
|
309
|
+
|
|
310
|
+
self.timeout = data.get("timeout", 5.0)
|
|
311
|
+
self._conn = Gitlab(
|
|
312
|
+
url=self.url,
|
|
313
|
+
private_token=self.api_token,
|
|
314
|
+
timeout=self.timeout,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
def get_connection(self):
|
|
318
|
+
"""Get the Gitlab connection object"""
|
|
319
|
+
return self._conn
|
|
320
|
+
|
|
321
|
+
def get_http_session(self):
|
|
322
|
+
"""Return the internal HTTP session so it can be used to contect the
|
|
323
|
+
Gitlab server. May be used as a context manager."""
|
|
324
|
+
|
|
325
|
+
# Create the session on the first attempt. We need to do this "lazily"
|
|
326
|
+
# because it needs to happen once the event loop is running, even
|
|
327
|
+
# though the initialization itself is synchronous.
|
|
328
|
+
if not self._http_session:
|
|
329
|
+
self._http_session = aiohttp.ClientSession(
|
|
330
|
+
base_url=self.url,
|
|
331
|
+
headers={"Authorization": f"Bearer {self.api_token}"},
|
|
332
|
+
timeout=aiohttp.ClientTimeout(
|
|
333
|
+
total=self.timeout,
|
|
334
|
+
connect=3.07,
|
|
335
|
+
),
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
return self._http_session
|
|
339
|
+
|
|
340
|
+
def __del__(self):
|
|
341
|
+
# Close connection when this object is destroyed
|
|
342
|
+
if self._http_session:
|
|
343
|
+
try:
|
|
344
|
+
loop = asyncio.get_running_loop()
|
|
345
|
+
loop.create_task(self._http_session.close())
|
|
346
|
+
except RuntimeError:
|
|
347
|
+
# No loop running, so create one to close the session
|
|
348
|
+
loop = asyncio.new_event_loop()
|
|
349
|
+
loop.run_until_complete(self._http_session.close())
|
|
350
|
+
loop.close()
|
|
351
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
352
|
+
# We should only get here if we're shutting down, so we don't
|
|
353
|
+
# really care if the close() completes cleanly.
|
|
354
|
+
pass
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
class GitLabConfig(BaseModel):
|
|
358
|
+
"""Model for GitLab configuration of logdetective server."""
|
|
359
|
+
|
|
360
|
+
instances: Dict[str, GitLabInstanceConfig] = {}
|
|
361
|
+
|
|
362
|
+
def __init__(self, data: Optional[dict] = None):
|
|
363
|
+
super().__init__()
|
|
364
|
+
if data is None:
|
|
365
|
+
return
|
|
366
|
+
|
|
367
|
+
for instance_name, instance_data in data.items():
|
|
368
|
+
instance = GitLabInstanceConfig(instance_name, instance_data)
|
|
369
|
+
self.instances[instance.url] = instance
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class KojiInstanceConfig(BaseModel):
|
|
373
|
+
"""Model for Koji configuration of logdetective server."""
|
|
374
|
+
|
|
375
|
+
name: str = ""
|
|
376
|
+
xmlrpc_url: str = ""
|
|
377
|
+
tokens: List[str] = []
|
|
378
|
+
|
|
379
|
+
_conn: Optional[koji.ClientSession] = PrivateAttr(default=None)
|
|
380
|
+
_callbacks: defaultdict[int, set[str]] = PrivateAttr(default_factory=lambda: defaultdict(set))
|
|
381
|
+
|
|
382
|
+
def __init__(self, name: str, data: Optional[dict] = None):
|
|
383
|
+
super().__init__()
|
|
384
|
+
|
|
385
|
+
self.name = name
|
|
386
|
+
if data is None:
|
|
387
|
+
# Set some reasonable defaults
|
|
388
|
+
self.xmlrpc_url = "https://koji.fedoraproject.org/kojihub"
|
|
389
|
+
self.tokens = []
|
|
390
|
+
self.max_artifact_size = 1024 * 1024
|
|
391
|
+
return
|
|
392
|
+
|
|
393
|
+
self.xmlrpc_url = data.get(
|
|
394
|
+
"xmlrpc_url", "https://koji.fedoraproject.org/kojihub"
|
|
395
|
+
)
|
|
396
|
+
self.tokens = data.get("tokens", [])
|
|
397
|
+
|
|
398
|
+
def get_connection(self):
|
|
399
|
+
"""Get the Koji connection object"""
|
|
400
|
+
if not self._conn:
|
|
401
|
+
self._conn = koji.ClientSession(self.xmlrpc_url)
|
|
402
|
+
return self._conn
|
|
403
|
+
|
|
404
|
+
def register_callback(self, task_id: int, callback: str):
|
|
405
|
+
"""Register a callback for a task"""
|
|
406
|
+
self._callbacks[task_id].add(callback)
|
|
407
|
+
|
|
408
|
+
def clear_callbacks(self, task_id: int):
|
|
409
|
+
"""Unregister a callback for a task"""
|
|
410
|
+
try:
|
|
411
|
+
del self._callbacks[task_id]
|
|
412
|
+
except KeyError:
|
|
413
|
+
pass
|
|
414
|
+
|
|
415
|
+
def get_callbacks(self, task_id: int) -> set[str]:
|
|
416
|
+
"""Get the callbacks for a task"""
|
|
417
|
+
return self._callbacks[task_id]
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
class KojiConfig(BaseModel):
|
|
421
|
+
"""Model for Koji configuration of logdetective server."""
|
|
422
|
+
|
|
423
|
+
instances: Dict[str, KojiInstanceConfig] = {}
|
|
424
|
+
analysis_timeout: int = 15
|
|
425
|
+
max_artifact_size: int = 300 * 1024 * 1024
|
|
426
|
+
|
|
427
|
+
def __init__(self, data: Optional[dict] = None):
|
|
428
|
+
super().__init__()
|
|
429
|
+
if data is None:
|
|
430
|
+
return
|
|
431
|
+
|
|
432
|
+
# Handle analysis_timeout with default 15
|
|
433
|
+
self.analysis_timeout = data.get("analysis_timeout", 15)
|
|
434
|
+
|
|
435
|
+
# Handle max_artifact_size with default 300
|
|
436
|
+
self.max_artifact_size = data.get("max_artifact_size", 300) * 1024 * 1024
|
|
437
|
+
|
|
438
|
+
# Handle instances dictionary
|
|
439
|
+
instances_data = data.get("instances", {})
|
|
440
|
+
for instance_name, instance_data in instances_data.items():
|
|
441
|
+
self.instances[instance_name] = KojiInstanceConfig(
|
|
442
|
+
instance_name, instance_data
|
|
443
|
+
)
|
|
140
444
|
|
|
141
445
|
|
|
142
446
|
class LogConfig(BaseModel):
|
|
143
447
|
"""Logging configuration"""
|
|
144
448
|
|
|
145
449
|
name: str = "logdetective"
|
|
146
|
-
|
|
450
|
+
level_stream: str | int = "INFO"
|
|
451
|
+
level_file: str | int = "INFO"
|
|
147
452
|
path: str | None = None
|
|
148
453
|
format: str = BASIC_FORMAT
|
|
149
454
|
|
|
@@ -153,7 +458,8 @@ class LogConfig(BaseModel):
|
|
|
153
458
|
return
|
|
154
459
|
|
|
155
460
|
self.name = data.get("name", "logdetective")
|
|
156
|
-
self.
|
|
461
|
+
self.level_stream = data.get("level_stream", "INFO").upper()
|
|
462
|
+
self.level_file = data.get("level_file", "INFO").upper()
|
|
157
463
|
self.path = data.get("path")
|
|
158
464
|
self.format = data.get("format", BASIC_FORMAT)
|
|
159
465
|
|
|
@@ -161,7 +467,12 @@ class LogConfig(BaseModel):
|
|
|
161
467
|
class GeneralConfig(BaseModel):
|
|
162
468
|
"""General config options for Log Detective"""
|
|
163
469
|
|
|
164
|
-
packages: List[str] =
|
|
470
|
+
packages: List[str] = []
|
|
471
|
+
excluded_packages: List[str] = []
|
|
472
|
+
devmode: bool = False
|
|
473
|
+
sentry_dsn: HttpUrl | None = None
|
|
474
|
+
collect_emojis_interval: int = 60 * 60 # seconds
|
|
475
|
+
top_k_snippets: int = 0
|
|
165
476
|
|
|
166
477
|
def __init__(self, data: Optional[dict] = None):
|
|
167
478
|
super().__init__()
|
|
@@ -169,6 +480,13 @@ class GeneralConfig(BaseModel):
|
|
|
169
480
|
return
|
|
170
481
|
|
|
171
482
|
self.packages = data.get("packages", [])
|
|
483
|
+
self.excluded_packages = data.get("excluded_packages", [])
|
|
484
|
+
self.devmode = data.get("devmode", False)
|
|
485
|
+
self.sentry_dsn = data.get("sentry_dsn")
|
|
486
|
+
self.collect_emojis_interval = data.get(
|
|
487
|
+
"collect_emojis_interval", 60 * 60
|
|
488
|
+
) # seconds
|
|
489
|
+
self.top_k_snippets = data.get("top_k_snippets", 0)
|
|
172
490
|
|
|
173
491
|
|
|
174
492
|
class Config(BaseModel):
|
|
@@ -176,8 +494,11 @@ class Config(BaseModel):
|
|
|
176
494
|
|
|
177
495
|
log: LogConfig = LogConfig()
|
|
178
496
|
inference: InferenceConfig = InferenceConfig()
|
|
497
|
+
snippet_inference: InferenceConfig = InferenceConfig()
|
|
498
|
+
# TODO(jpodivin): Extend to work with multiple extractor configs
|
|
179
499
|
extractor: ExtractorConfig = ExtractorConfig()
|
|
180
500
|
gitlab: GitLabConfig = GitLabConfig()
|
|
501
|
+
koji: KojiConfig = KojiConfig()
|
|
181
502
|
general: GeneralConfig = GeneralConfig()
|
|
182
503
|
|
|
183
504
|
def __init__(self, data: Optional[dict] = None):
|
|
@@ -190,4 +511,86 @@ class Config(BaseModel):
|
|
|
190
511
|
self.inference = InferenceConfig(data.get("inference"))
|
|
191
512
|
self.extractor = ExtractorConfig(data.get("extractor"))
|
|
192
513
|
self.gitlab = GitLabConfig(data.get("gitlab"))
|
|
514
|
+
self.koji = KojiConfig(data.get("koji"))
|
|
193
515
|
self.general = GeneralConfig(data.get("general"))
|
|
516
|
+
|
|
517
|
+
if snippet_inference := data.get("snippet_inference", None):
|
|
518
|
+
self.snippet_inference = InferenceConfig(snippet_inference)
|
|
519
|
+
else:
|
|
520
|
+
self.snippet_inference = self.inference
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
class TimePeriod(BaseModel):
|
|
524
|
+
"""Specification for a period of time.
|
|
525
|
+
|
|
526
|
+
If no indication is given
|
|
527
|
+
it falls back to a 2 days period of time.
|
|
528
|
+
|
|
529
|
+
Can't be smaller than a hour"""
|
|
530
|
+
|
|
531
|
+
weeks: Optional[int] = None
|
|
532
|
+
days: Optional[int] = None
|
|
533
|
+
hours: Optional[int] = None
|
|
534
|
+
|
|
535
|
+
@model_validator(mode="before")
|
|
536
|
+
@classmethod
|
|
537
|
+
def check_exclusive_fields(cls, data):
|
|
538
|
+
"""Check that only one key between weeks, days and hours is defined,
|
|
539
|
+
if no period is specified, fall back to 2 days."""
|
|
540
|
+
if isinstance(data, dict):
|
|
541
|
+
how_many_fields = sum(
|
|
542
|
+
1
|
|
543
|
+
for field in ["weeks", "days", "hours"]
|
|
544
|
+
if field in data and data[field] is not None
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
if how_many_fields == 0:
|
|
548
|
+
data["days"] = 2 # by default fallback to a 2 days period
|
|
549
|
+
|
|
550
|
+
if how_many_fields > 1:
|
|
551
|
+
raise ValueError("Only one of months, weeks, days, or hours can be set")
|
|
552
|
+
|
|
553
|
+
return data
|
|
554
|
+
|
|
555
|
+
@field_validator("weeks", "days", "hours")
|
|
556
|
+
@classmethod
|
|
557
|
+
def check_positive(cls, v):
|
|
558
|
+
"""Check that the given value is positive"""
|
|
559
|
+
if v is not None and v <= 0:
|
|
560
|
+
raise ValueError("Time period must be positive")
|
|
561
|
+
return v
|
|
562
|
+
|
|
563
|
+
def get_time_period(self) -> datetime.timedelta:
|
|
564
|
+
"""Get the period of time represented by this input model.
|
|
565
|
+
Will default to 2 days, if no period is set.
|
|
566
|
+
|
|
567
|
+
Returns:
|
|
568
|
+
datetime.timedelta: The time period as a timedelta object.
|
|
569
|
+
"""
|
|
570
|
+
delta = None
|
|
571
|
+
if self.weeks:
|
|
572
|
+
delta = datetime.timedelta(weeks=self.weeks)
|
|
573
|
+
elif self.days:
|
|
574
|
+
delta = datetime.timedelta(days=self.days)
|
|
575
|
+
elif self.hours:
|
|
576
|
+
delta = datetime.timedelta(hours=self.hours)
|
|
577
|
+
else:
|
|
578
|
+
delta = datetime.timedelta(days=2)
|
|
579
|
+
return delta
|
|
580
|
+
|
|
581
|
+
def get_period_start_time(
|
|
582
|
+
self, end_time: Optional[datetime.datetime] = None
|
|
583
|
+
) -> datetime.datetime:
|
|
584
|
+
"""Calculate the start time of this period based on the end time.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
end_time (datetime.datetime, optional): The end time of the period.
|
|
588
|
+
Defaults to current UTC time if not provided.
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
datetime.datetime: The start time of the period.
|
|
592
|
+
"""
|
|
593
|
+
time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
594
|
+
if time.tzinfo is None:
|
|
595
|
+
time = time.replace(tzinfo=datetime.timezone.utc)
|
|
596
|
+
return time - self.get_time_period()
|