logdetective 2.1.0__tar.gz → 2.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {logdetective-2.1.0 → logdetective-2.2.1}/PKG-INFO +28 -1
  2. {logdetective-2.1.0 → logdetective-2.2.1}/README.md +27 -0
  3. logdetective-2.2.1/logdetective/extractors.py +165 -0
  4. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/logdetective.py +25 -12
  5. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/models.py +26 -0
  6. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/llm.py +19 -10
  7. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/models.py +46 -1
  8. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/server.py +9 -9
  9. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/utils.py +9 -27
  10. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/utils.py +48 -4
  11. {logdetective-2.1.0 → logdetective-2.2.1}/pyproject.toml +1 -1
  12. logdetective-2.1.0/logdetective/extractors.py +0 -57
  13. {logdetective-2.1.0 → logdetective-2.2.1}/LICENSE +0 -0
  14. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/__init__.py +0 -0
  15. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/constants.py +0 -0
  16. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/drain3.ini +0 -0
  17. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/prompts-summary-first.yml +0 -0
  18. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/prompts-summary-only.yml +0 -0
  19. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/prompts.yml +0 -0
  20. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/remote_log.py +0 -0
  21. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/__init__.py +0 -0
  22. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/compressors.py +0 -0
  23. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/config.py +0 -0
  24. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/__init__.py +0 -0
  25. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/base.py +0 -0
  26. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/__init__.py +0 -0
  27. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/exceptions.py +0 -0
  28. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/koji.py +0 -0
  29. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/merge_request_jobs.py +0 -0
  30. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/database/models/metrics.py +0 -0
  31. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/emoji.py +0 -0
  32. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/exceptions.py +0 -0
  33. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/gitlab.py +0 -0
  34. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/koji.py +0 -0
  35. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/metric.py +0 -0
  36. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/plot.py +0 -0
  37. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/templates/gitlab_full_comment.md.j2 +0 -0
  38. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/server/templates/gitlab_short_comment.md.j2 +0 -0
  39. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective/skip_snippets.yml +0 -0
  40. {logdetective-2.1.0 → logdetective-2.2.1}/logdetective.1.asciidoc +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: logdetective
3
- Version: 2.1.0
3
+ Version: 2.2.1
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -494,6 +494,33 @@ Example of a valid pattern definition file: `logdetective/skip_patterns.yml`,
494
494
  can be used as a starting point and is used as a default if no other definition is provided.
495
495
 
496
496
 
497
+ Extracting snippets with csgrep
498
+ -------------------------------
499
+
500
+ When working with logs containing messages from GCC, it can be beneficial to employ
501
+ additional extractor based on `csgrep` tool, to ensure that the messages are kept intact.
502
+ Since `csgrep` is not available as a python package, it must be installed separately,
503
+ with a package manager or from [source](https://github.com/csutils/csdiff).
504
+
505
+ The binary is available as part of `csdiff` package on Fedora.
506
+
507
+ ```
508
+ dnf install csdiff
509
+ ```
510
+
511
+ When working with CLI Log Detective, the csgrep extractor can be activated using option `--csgrep`.
512
+ While in server mode, the `csgrep` field in `extractor` config needs to be set to `true`.
513
+
514
+ ```
515
+ csgrep: true
516
+ ```
517
+
518
+ Both options are disabled by default and error will be produced if the option is used,
519
+ but `csgrep` is not present in the $PATH.
520
+
521
+ The container images are built with `csdiff` installed.
522
+
523
+
497
524
  License
498
525
  -------
499
526
 
@@ -442,6 +442,33 @@ Example of a valid pattern definition file: `logdetective/skip_patterns.yml`,
442
442
  can be used as a starting point and is used as a default if no other definition is provided.
443
443
 
444
444
 
445
+ Extracting snippets with csgrep
446
+ -------------------------------
447
+
448
+ When working with logs containing messages from GCC, it can be beneficial to employ
449
+ additional extractor based on `csgrep` tool, to ensure that the messages are kept intact.
450
+ Since `csgrep` is not available as a python package, it must be installed separately,
451
+ with a package manager or from [source](https://github.com/csutils/csdiff).
452
+
453
+ The binary is available as part of `csdiff` package on Fedora.
454
+
455
+ ```
456
+ dnf install csdiff
457
+ ```
458
+
459
+ When working with CLI Log Detective, the csgrep extractor can be activated using option `--csgrep`.
460
+ While in server mode, the `csgrep` field in `extractor` config needs to be set to `true`.
461
+
462
+ ```
463
+ csgrep: true
464
+ ```
465
+
466
+ Both options are disabled by default and error will be produced if the option is used,
467
+ but `csgrep` is not present in the $PATH.
468
+
469
+ The container images are built with `csdiff` installed.
470
+
471
+
445
472
  License
446
473
  -------
447
474
 
@@ -0,0 +1,165 @@
1
+ import os
2
+ import logging
3
+ import subprocess as sp
4
+ from typing import Tuple
5
+
6
+ import drain3
7
+ from drain3.template_miner_config import TemplateMinerConfig
8
+ from pydantic import ValidationError
9
+
10
+ from logdetective.utils import get_chunks, filter_snippet_patterns
11
+ from logdetective.models import SkipSnippets, CSGrepOutput
12
+
13
+ LOG = logging.getLogger("logdetective")
14
+
15
+
16
+ class Extractor:
17
+ """Base extractor class."""
18
+
19
+ def __init__(
20
+ self,
21
+ verbose: bool = False,
22
+ skip_snippets: SkipSnippets = SkipSnippets({}),
23
+ max_snippet_len: int = 2000,
24
+ ):
25
+ self.verbose = verbose
26
+ self.skip_snippets = skip_snippets
27
+ self.max_snippet_len = max_snippet_len
28
+
29
+ def __call__(self, log: str) -> list[Tuple[int, str]]:
30
+ raise NotImplementedError
31
+
32
+ def filter_snippet_patterns(
33
+ self, chunks: list[tuple[int, str]]
34
+ ) -> list[tuple[int, str]]:
35
+ """Keep only chunks that don't match any of the excluded patterns"""
36
+ chunks = [
37
+ (_, chunk)
38
+ for _, chunk in chunks
39
+ if not filter_snippet_patterns(chunk, self.skip_snippets)
40
+ ]
41
+ return chunks
42
+
43
+
44
+ class DrainExtractor(Extractor):
45
+ """A class that extracts information from logs using a template miner algorithm."""
46
+
47
+ _clusters: list
48
+
49
+ def __init__(
50
+ self,
51
+ verbose: bool = False,
52
+ skip_snippets: SkipSnippets = SkipSnippets({}),
53
+ max_snippet_len: int = 2000,
54
+ max_clusters: int = 8,
55
+ ):
56
+ super().__init__(verbose, skip_snippets, max_snippet_len)
57
+ config = TemplateMinerConfig()
58
+ config.load(f"{os.path.dirname(__file__)}/drain3.ini")
59
+ config.profiling_enabled = verbose
60
+ config.drain_max_clusters = max_clusters
61
+ self.miner = drain3.TemplateMiner(config=config)
62
+
63
+ def __call__(self, log: str) -> list[Tuple[int, str]]:
64
+ # Create chunks
65
+ chunks = list(get_chunks(log, self.max_snippet_len))
66
+
67
+ chunks = self.filter_snippet_patterns(chunks)
68
+
69
+ # First pass to create clusters
70
+ self._create_clusters(chunks=chunks)
71
+
72
+ # Second pass, only matching lines with clusters,
73
+ # to recover original text
74
+ snippets = self._extract_messages(chunks=chunks)
75
+ return snippets
76
+
77
+ def _create_clusters(self, chunks: list[tuple[int, str]]):
78
+ """First pass to create clusters"""
79
+ for _, chunk in chunks:
80
+ processed_chunk = self.miner.add_log_message(chunk)
81
+ LOG.debug(processed_chunk)
82
+ self._clusters = list(self.miner.drain.clusters)
83
+
84
+ def _extract_messages(self, chunks: list[tuple[int, str]]) -> list[tuple[int, str]]:
85
+ """Second pass with drain using patterns from the first,
86
+ to extract matching lines and their numbers."""
87
+ out = []
88
+
89
+ for chunk_start, chunk in chunks:
90
+ cluster = self.miner.match(chunk, "always")
91
+ if cluster in self._clusters:
92
+ out.append((chunk_start, chunk))
93
+ self._clusters.remove(cluster)
94
+ return out
95
+
96
+
97
+ class CSGrepExtractor(DrainExtractor):
98
+ """Extract messages using csgrep
99
+ This extractor is only effective at retrieving messages from GCC
100
+ compiler and associated utilities, it is not capable of safely
101
+ extracting other messages from the logs. Therefore, it must only
102
+ be used together with the Drain based extractor."""
103
+
104
+ def __init__(
105
+ self,
106
+ verbose: bool = False,
107
+ skip_snippets: SkipSnippets = SkipSnippets({}),
108
+ max_snippet_len: int = 2000,
109
+ max_clusters: int = 8,
110
+ ):
111
+ super().__init__(verbose, skip_snippets, max_snippet_len, max_clusters)
112
+
113
+ def __call__(self, log: str) -> list[Tuple[int, str]]:
114
+ """Extract error messages from log using csgrep"""
115
+ chunks = []
116
+ try:
117
+ # We are not running binary in check mode, since csgrep
118
+ # can produce many errors due to log file syntax
119
+ result = sp.run(
120
+ [
121
+ "csgrep",
122
+ "--event=error",
123
+ "--remove-duplicates",
124
+ "--mode=json",
125
+ "--quiet",
126
+ ],
127
+ input=log,
128
+ shell=False,
129
+ check=False,
130
+ capture_output=True,
131
+ text=True,
132
+ timeout=1.0,
133
+ )
134
+ except sp.TimeoutExpired as ex:
135
+ LOG.exception("Exception encountered while parsing log with csgrep %s", ex)
136
+ raise ex
137
+ if result.returncode != 0:
138
+ # This can happen even if `csgrep` managed to extract useful info.
139
+ # Most commonly, when it encountered unexpected syntax in the log.
140
+ LOG.warning("csgrep call resulted in an error")
141
+ LOG.debug("csgrep error: `%s`", result.stderr)
142
+ if not result.stdout:
143
+ return []
144
+
145
+ # Parse JSON output from csgrep
146
+ try:
147
+ report = CSGrepOutput.model_validate_json(result.stdout)
148
+ except ValidationError as ex:
149
+ LOG.exception("Exception encountered while parsing csgrpe output %s", ex)
150
+ raise ex
151
+ for defect in report.defects:
152
+ # Single original error message can be split across multiple events
153
+ # before returning, we will turn them back into single string.
154
+ # We must also extract the original line number.
155
+ # Line number is NOT location of message in the log, but location of
156
+ # the issue in source, we can't really mix the two, so we'll set it to `0`.
157
+
158
+ chunks.append((0, "\n".join([event.message for event in defect.events])))
159
+
160
+ chunks = self.filter_snippet_patterns(chunks)
161
+ LOG.info("Total %d messages extracted with csgrep", len(chunks))
162
+ self._create_clusters(chunks=chunks)
163
+ snippets = self._extract_messages(chunks=chunks)
164
+
165
+ return snippets
@@ -15,8 +15,10 @@ from logdetective.utils import (
15
15
  compute_certainty,
16
16
  load_prompts,
17
17
  load_skip_snippet_patterns,
18
+ check_csgrep,
19
+ mine_logs,
18
20
  )
19
- from logdetective.extractors import DrainExtractor
21
+ from logdetective.extractors import DrainExtractor, CSGrepExtractor
20
22
 
21
23
  LOG = logging.getLogger("logdetective")
22
24
 
@@ -89,10 +91,13 @@ def setup_args():
89
91
  default=f"{os.path.dirname(__file__)}/skip_snippets.yml",
90
92
  help="Path to patterns for skipping snippets.",
91
93
  )
94
+ parser.add_argument(
95
+ "--csgrep", action="store_true", help="Use csgrep to process the log."
96
+ )
92
97
  return parser.parse_args()
93
98
 
94
99
 
95
- async def run(): # pylint: disable=too-many-statements,too-many-locals
100
+ async def run(): # pylint: disable=too-many-statements,too-many-locals,too-many-branches
96
101
  """Main execution function."""
97
102
  args = setup_args()
98
103
 
@@ -134,13 +139,25 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals
134
139
  sys.exit(5)
135
140
 
136
141
  # Log file summarizer initialization
137
- extractor = DrainExtractor(
138
- args.verbose > 1,
139
- context=True,
140
- max_clusters=args.n_clusters,
141
- skip_snippets=skip_snippets,
142
+ extractors = []
143
+ extractors.append(
144
+ DrainExtractor(
145
+ args.verbose > 1,
146
+ max_clusters=args.n_clusters,
147
+ skip_snippets=skip_snippets,
148
+ )
142
149
  )
143
150
 
151
+ if args.csgrep:
152
+ if not check_csgrep():
153
+ LOG.error(
154
+ "You have requested use of `csgrep` when it isn't available on your system."
155
+ )
156
+ sys.exit(6)
157
+ extractors.append(
158
+ CSGrepExtractor(args.verbose > 1, skip_snippets=skip_snippets)
159
+ )
160
+
144
161
  LOG.info("Getting summary")
145
162
 
146
163
  async with aiohttp.ClientSession() as http:
@@ -150,12 +167,8 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals
150
167
  # file does not exist
151
168
  LOG.error(e)
152
169
  sys.exit(4)
153
- log_summary = extractor(log)
154
-
155
- ratio = len(log_summary) / len(log.split("\n"))
156
-
157
- LOG.info("Compression ratio: %s", ratio)
158
170
 
171
+ log_summary = mine_logs(log=log, extractors=extractors)
159
172
  LOG.info("Analyzing the text")
160
173
 
161
174
  log_summary = format_snippets(log_summary)
@@ -71,3 +71,29 @@ class SkipSnippets(BaseModel):
71
71
  ) from ex
72
72
 
73
73
  return data
74
+
75
+
76
+ class CSGrepEvent(BaseModel):
77
+ """`csgrep` splits error and warning messages into individual events."""
78
+
79
+ file_name: str
80
+ line: int
81
+ event: str
82
+ message: str
83
+ verbosity_level: int
84
+
85
+
86
+ class CSGrepDefect(BaseModel):
87
+ """Defects detected by `csgrep`"""
88
+
89
+ checker: str
90
+ language: str
91
+ tool: str
92
+ key_event_idx: int
93
+ events: list[CSGrepEvent]
94
+
95
+
96
+ class CSGrepOutput(BaseModel):
97
+ """Parsed output of `gsgrep`"""
98
+
99
+ defects: list[CSGrepDefect]
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  import asyncio
3
3
  import random
4
+ import time
4
5
  from typing import List, Tuple, Dict
5
6
 
6
7
  import backoff
@@ -15,6 +16,7 @@ from logdetective.utils import (
15
16
  compute_certainty,
16
17
  prompt_to_messages,
17
18
  format_snippets,
19
+ mine_logs,
18
20
  )
19
21
  from logdetective.server.config import (
20
22
  LOG,
@@ -33,10 +35,10 @@ from logdetective.server.models import (
33
35
  )
34
36
  from logdetective.server.utils import (
35
37
  format_analyzed_snippets,
36
- mine_logs,
37
38
  should_we_giveup,
38
39
  we_give_up,
39
40
  filter_snippets,
41
+ construct_final_prompt,
40
42
  )
41
43
 
42
44
 
@@ -184,10 +186,13 @@ async def analyze_snippets(
184
186
 
185
187
  async def perfrom_analysis(log_text: str) -> Response:
186
188
  """Sumbit log file snippets in aggregate to LLM and retrieve results"""
187
- log_summary = mine_logs(log_text)
189
+ log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
188
190
  log_summary = format_snippets(log_summary)
191
+
192
+ final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
193
+
189
194
  messages = prompt_to_messages(
190
- PROMPT_CONFIG.prompt_template.format(log_summary),
195
+ final_prompt,
191
196
  PROMPT_CONFIG.default_system_prompt,
192
197
  SERVER_CONFIG.inference.system_role,
193
198
  SERVER_CONFIG.inference.user_role,
@@ -213,10 +218,13 @@ async def perfrom_analysis(log_text: str) -> Response:
213
218
 
214
219
  async def perform_analyis_stream(log_text: str) -> AsyncStream:
215
220
  """Submit log file snippets in aggregate and return a stream of tokens"""
216
- log_summary = mine_logs(log_text)
221
+ log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
217
222
  log_summary = format_snippets(log_summary)
223
+
224
+ final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
225
+
218
226
  messages = prompt_to_messages(
219
- PROMPT_CONFIG.prompt_template.format(log_summary),
227
+ final_prompt,
220
228
  PROMPT_CONFIG.default_system_prompt,
221
229
  SERVER_CONFIG.inference.system_role,
222
230
  SERVER_CONFIG.inference.user_role,
@@ -235,8 +243,8 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
235
243
 
236
244
  async def perform_staged_analysis(log_text: str) -> StagedResponse:
237
245
  """Submit the log file snippets to the LLM and retrieve their results"""
238
- log_summary = mine_logs(log_text)
239
-
246
+ log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
247
+ start = time.time()
240
248
  if SERVER_CONFIG.general.top_k_snippets:
241
249
  rated_snippets = await analyze_snippets(
242
250
  log_summary=log_summary,
@@ -265,10 +273,11 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
265
273
  AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
266
274
  for e in zip(log_summary, processed_snippets)
267
275
  ]
276
+ delta = time.time() - start
277
+ LOG.info("Snippet analysis performed in %f s", delta)
278
+ log_summary = format_analyzed_snippets(processed_snippets)
279
+ final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template_staged)
268
280
 
269
- final_prompt = PROMPT_CONFIG.prompt_template_staged.format(
270
- format_analyzed_snippets(processed_snippets)
271
- )
272
281
  messages = prompt_to_messages(
273
282
  final_prompt,
274
283
  PROMPT_CONFIG.staged_system_prompt,
@@ -26,6 +26,9 @@ from logdetective.constants import (
26
26
  USER_ROLE_DEFAULT,
27
27
  )
28
28
 
29
+ from logdetective.extractors import Extractor, DrainExtractor, CSGrepExtractor
30
+ from logdetective.utils import check_csgrep
31
+
29
32
 
30
33
  class BuildLog(BaseModel):
31
34
  """Model of data submitted to API."""
@@ -247,15 +250,56 @@ class ExtractorConfig(BaseModel):
247
250
  max_clusters: int = 8
248
251
  verbose: bool = False
249
252
  max_snippet_len: int = 2000
253
+ csgrep: bool = False
254
+
255
+ _extractors: List[Extractor] = []
256
+
257
+ def _setup_extractors(self):
258
+ """Initialize extractors with common settings."""
259
+ self._extractors = [
260
+ DrainExtractor(
261
+ verbose=self.verbose,
262
+ max_snippet_len=self.max_snippet_len,
263
+ max_clusters=self.max_clusters,
264
+ )
265
+ ]
266
+
267
+ if self.csgrep:
268
+ self._extractors.append(
269
+ CSGrepExtractor(
270
+ verbose=self.verbose,
271
+ max_snippet_len=self.max_snippet_len,
272
+ )
273
+ )
250
274
 
251
275
  def __init__(self, data: Optional[dict] = None):
252
- super().__init__()
276
+ super().__init__(data=data)
277
+
253
278
  if data is None:
279
+ self._setup_extractors()
254
280
  return
255
281
 
256
282
  self.max_clusters = data.get("max_clusters", 8)
257
283
  self.verbose = data.get("verbose", False)
258
284
  self.max_snippet_len = data.get("max_snippet_len", 2000)
285
+ self.csgrep = data.get("csgrep", False)
286
+
287
+ self._setup_extractors()
288
+
289
+ def get_extractors(self) -> List[Extractor]:
290
+ """Return list of initialized extractors, each will be applied in turn
291
+ on original log text to retrieve snippets."""
292
+ return self._extractors
293
+
294
+ @field_validator("csgrep", mode="after")
295
+ @classmethod
296
+ def validate_csgrep(cls, value: bool) -> bool:
297
+ """Verify that csgrep is available if requested."""
298
+ if not check_csgrep():
299
+ raise ValueError(
300
+ "Requested csgrep extractor but `csgrep` binary is not in the PATH"
301
+ )
302
+ return value
259
303
 
260
304
 
261
305
  class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attributes
@@ -481,6 +525,7 @@ class Config(BaseModel):
481
525
  log: LogConfig = LogConfig()
482
526
  inference: InferenceConfig = InferenceConfig()
483
527
  snippet_inference: InferenceConfig = InferenceConfig()
528
+ # TODO(jpodivin): Extend to work with multiple extractor configs
484
529
  extractor: ExtractorConfig = ExtractorConfig()
485
530
  gitlab: GitLabConfig = GitLabConfig()
486
531
  koji: KojiConfig = KojiConfig()
@@ -106,35 +106,35 @@ async def get_http_session(request: Request) -> aiohttp.ClientSession:
106
106
  return request.app.http
107
107
 
108
108
 
109
- def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
109
+ def requires_token_when_set(authorization: Annotated[str | None, Header()] = None):
110
110
  """
111
- FastAPI Depend function that expects a header named Authentication
111
+ FastAPI Depend function that expects a header named Authorization
112
112
 
113
113
  If LOGDETECTIVE_TOKEN env var is set, validate the client-supplied token
114
114
  otherwise ignore it
115
115
  """
116
116
  if not API_TOKEN:
117
- LOG.info("LOGDETECTIVE_TOKEN env var not set, authentication disabled")
117
+ LOG.info("LOGDETECTIVE_TOKEN env var not set, authorization disabled")
118
118
  # no token required, means local dev environment
119
119
  return
120
- if authentication:
120
+ if authorization:
121
121
  try:
122
- token = authentication.split(" ", 1)[1]
122
+ token = authorization.split(" ", 1)[1]
123
123
  except (ValueError, IndexError) as ex:
124
124
  LOG.warning(
125
- "Authentication header has invalid structure '%s', it should be 'Bearer TOKEN'",
126
- authentication,
125
+ "Authorization header has invalid structure '%s', it should be 'Bearer TOKEN'",
126
+ authorization,
127
127
  )
128
128
  # eat the exception and raise 401 below
129
129
  raise HTTPException(
130
130
  status_code=401,
131
- detail=f"Invalid authentication, HEADER '{authentication}' not valid.",
131
+ detail=f"Invalid authorization, HEADER '{authorization}' not valid.",
132
132
  ) from ex
133
133
  if token == API_TOKEN:
134
134
  return
135
135
  LOG.info("Provided token '%s' does not match expected value.", token)
136
136
  raise HTTPException(status_code=401, detail=f"Token '{token}' not valid.")
137
- LOG.error("No authentication header provided but LOGDETECTIVE_TOKEN env var is set")
137
+ LOG.error("No authorization header provided but LOGDETECTIVE_TOKEN env var is set")
138
138
  raise HTTPException(status_code=401, detail="No token provided.")
139
139
 
140
140
 
@@ -1,15 +1,10 @@
1
- from typing import List, Tuple
1
+ from typing import List
2
2
 
3
3
  import aiohttp
4
4
  from fastapi import HTTPException
5
5
 
6
6
  from logdetective.constants import SNIPPET_DELIMITER
7
- from logdetective.extractors import DrainExtractor
8
- from logdetective.server.config import (
9
- LOG,
10
- SERVER_CONFIG,
11
- SKIP_SNIPPETS_CONFIG,
12
- )
7
+ from logdetective.server.config import LOG
13
8
  from logdetective.server.exceptions import LogDetectiveConnectionError
14
9
  from logdetective.server.models import AnalyzedSnippet, RatedSnippetAnalysis
15
10
 
@@ -22,26 +17,6 @@ def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
22
17
  return summary
23
18
 
24
19
 
25
- def mine_logs(log: str) -> List[Tuple[int, str]]:
26
- """Extract snippets from log text"""
27
- extractor = DrainExtractor(
28
- verbose=True,
29
- context=True,
30
- max_clusters=SERVER_CONFIG.extractor.max_clusters,
31
- skip_snippets=SKIP_SNIPPETS_CONFIG,
32
- max_snippet_len=SERVER_CONFIG.extractor.max_snippet_len
33
- )
34
-
35
- LOG.info("Getting summary")
36
- log_summary = extractor(log)
37
-
38
- ratio = len(log_summary) / len(log.split("\n"))
39
- LOG.debug("Log summary: \n %s", log_summary)
40
- LOG.info("Compression ratio: %s", ratio)
41
-
42
- return log_summary
43
-
44
-
45
20
  def connection_error_giveup(details: dict) -> None:
46
21
  """Too many connection errors, give up.
47
22
  """
@@ -120,3 +95,10 @@ def filter_snippets(
120
95
  processed_snippets = sorted(processed_snippets, key=select_line_number)
121
96
 
122
97
  return processed_snippets
98
+
99
+
100
+ def construct_final_prompt(formatted_snippets: str, prompt_template: str) -> str:
101
+ """Create final prompt from processed snippets and csgrep output, if it is available."""
102
+
103
+ final_prompt = prompt_template.format(formatted_snippets)
104
+ return final_prompt
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import os
3
+ import subprocess as sp
3
4
  from typing import Iterator, List, Dict, Tuple, Generator
4
5
  from urllib.parse import urlparse
5
6
 
@@ -8,10 +9,10 @@ import numpy as np
8
9
  import yaml
9
10
 
10
11
  from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
12
+ from logdetective.constants import SNIPPET_DELIMITER
11
13
  from logdetective.models import PromptConfig, SkipSnippets
12
14
  from logdetective.remote_log import RemoteLog
13
15
 
14
-
15
16
  LOG = logging.getLogger("logdetective")
16
17
 
17
18
 
@@ -39,7 +40,9 @@ def chunk_continues(text: str, index: int) -> bool:
39
40
  return False
40
41
 
41
42
 
42
- def get_chunks(text: str, max_len: int = 2000) -> Generator[Tuple[int, str], None, None]:
43
+ def get_chunks(
44
+ text: str, max_len: int = 2000
45
+ ) -> Generator[Tuple[int, str], None, None]:
43
46
  """Split log into chunks according to heuristic
44
47
  based on whitespace and backslash presence.
45
48
  """
@@ -173,14 +176,14 @@ def format_snippets(snippets: list[str] | list[Tuple[int, str]]) -> str:
173
176
  Snippet No. {i} at line #{s[0]}:
174
177
 
175
178
  {s[1]}
176
- ================
179
+ {SNIPPET_DELIMITER}
177
180
  """
178
181
  else:
179
182
  summary += f"""
180
183
  Snippet No. {i}:
181
184
 
182
185
  {s}
183
- ================
186
+ {SNIPPET_DELIMITER}
184
187
  """
185
188
  return summary
186
189
 
@@ -247,3 +250,44 @@ def load_skip_snippet_patterns(path: str | None) -> SkipSnippets:
247
250
  raise e
248
251
 
249
252
  return SkipSnippets({})
253
+
254
+
255
+ def check_csgrep() -> bool:
256
+ """Verifies presence of csgrep in path"""
257
+ try:
258
+ result = sp.run(
259
+ ["csgrep", "--version"],
260
+ text=True,
261
+ check=True,
262
+ shell=False,
263
+ capture_output=True,
264
+ timeout=1.0,
265
+ )
266
+ except (FileNotFoundError, sp.TimeoutExpired, sp.CalledProcessError) as ex:
267
+ LOG.error("Required binary `csgrep` was not found in path: %s", ex)
268
+ return False
269
+ if result.returncode == 0:
270
+ return True
271
+ LOG.error("Issue was encountered while calling `csgrep`: `%s`", result.stderr)
272
+
273
+ return False
274
+
275
+
276
+ def mine_logs(log: str, extractors: list) -> List[Tuple[int, str]]:
277
+ """Extract snippets from log text using extractors provided.
278
+ Each extractor is applied in turn on original log.
279
+ Depending on characteristics of extractors used, there may be
280
+ an overlap in snippets extracted."""
281
+
282
+ log_summary = []
283
+
284
+ LOG.info("Getting summary")
285
+
286
+ for extractor in extractors:
287
+ log_summary.extend(extractor(log))
288
+
289
+ ratio = len("\n".join([text for _, text in log_summary])) / len(log)
290
+ LOG.debug("Log summary: \n %s", log_summary)
291
+ LOG.info("Snippets: %s Compression ratio: %s", len(log_summary), ratio)
292
+
293
+ return log_summary
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "logdetective"
3
- version = "2.1.0"
3
+ version = "2.2.1"
4
4
  description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
5
5
  authors = ["Jiri Podivin <jpodivin@gmail.com>"]
6
6
  license = "Apache-2.0"
@@ -1,57 +0,0 @@
1
- import os
2
- import logging
3
- from typing import Tuple
4
-
5
- import drain3
6
- from drain3.template_miner_config import TemplateMinerConfig
7
-
8
- from logdetective.utils import get_chunks, filter_snippet_patterns
9
- from logdetective.models import SkipSnippets
10
-
11
- LOG = logging.getLogger("logdetective")
12
-
13
-
14
- class DrainExtractor:
15
- """A class that extracts information from logs using a template miner algorithm."""
16
-
17
- def __init__(
18
- self,
19
- verbose: bool = False,
20
- context: bool = False,
21
- max_clusters=8,
22
- skip_snippets: SkipSnippets = SkipSnippets({}),
23
- max_snippet_len: int = 2000
24
- ): # pylint: disable=R0913,R0917
25
- config = TemplateMinerConfig()
26
- config.load(f"{os.path.dirname(__file__)}/drain3.ini")
27
- config.profiling_enabled = verbose
28
- config.drain_max_clusters = max_clusters
29
- self.miner = drain3.TemplateMiner(config=config)
30
- self.verbose = verbose
31
- self.context = context
32
- self.skip_snippets = skip_snippets
33
- self.max_snippet_len = max_snippet_len
34
-
35
- def __call__(self, log: str) -> list[Tuple[int, str]]:
36
- out = []
37
- # Create chunks
38
- chunks = list(get_chunks(log, self.max_snippet_len))
39
- # Keep only chunks that don't match any of the excluded patterns
40
- chunks = [
41
- (_, chunk)
42
- for _, chunk in chunks
43
- if not filter_snippet_patterns(chunk, self.skip_snippets)
44
- ]
45
- # First pass create clusters
46
- for _, chunk in chunks:
47
- processed_chunk = self.miner.add_log_message(chunk)
48
- LOG.debug(processed_chunk)
49
- clusters = list(self.miner.drain.clusters)
50
- # Second pass, only matching lines with clusters,
51
- # to recover original text
52
- for chunk_start, chunk in chunks:
53
- cluster = self.miner.match(chunk, "always")
54
- if cluster in clusters:
55
- out.append((chunk_start, chunk))
56
- clusters.remove(cluster)
57
- return out
File without changes