logdetective 1.5.0__tar.gz → 1.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {logdetective-1.5.0 → logdetective-1.6.0}/PKG-INFO +24 -1
  2. {logdetective-1.5.0 → logdetective-1.6.0}/README.md +23 -0
  3. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/extractors.py +23 -10
  4. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/logdetective.py +17 -1
  5. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/models.py +32 -1
  6. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/remote_log.py +1 -1
  7. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/config.py +9 -1
  8. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/llm.py +11 -2
  9. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/plot.py +36 -35
  10. logdetective-1.6.0/logdetective/skip_snippets.yml +12 -0
  11. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/utils.py +25 -1
  12. {logdetective-1.5.0 → logdetective-1.6.0}/pyproject.toml +1 -1
  13. {logdetective-1.5.0 → logdetective-1.6.0}/LICENSE +0 -0
  14. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/__init__.py +0 -0
  15. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/constants.py +0 -0
  16. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/drain3.ini +0 -0
  17. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/prompts-summary-first.yml +0 -0
  18. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/prompts-summary-only.yml +0 -0
  19. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/prompts.yml +0 -0
  20. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/__init__.py +0 -0
  21. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/compressors.py +0 -0
  22. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/database/__init__.py +0 -0
  23. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/database/base.py +0 -0
  24. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/database/models/__init__.py +0 -0
  25. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/database/models/merge_request_jobs.py +0 -0
  26. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/database/models/metrics.py +0 -0
  27. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/emoji.py +0 -0
  28. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/gitlab.py +0 -0
  29. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/metric.py +0 -0
  30. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/models.py +0 -0
  31. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/server.py +0 -0
  32. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/templates/gitlab_full_comment.md.j2 +0 -0
  33. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective/server/templates/gitlab_short_comment.md.j2 +0 -0
  34. {logdetective-1.5.0 → logdetective-1.6.0}/logdetective.1.asciidoc +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: logdetective
3
- Version: 1.5.0
3
+ Version: 1.6.0
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -90,6 +90,7 @@ To analyze a log file, run the script with the following command line arguments:
90
90
  - `--summarizer` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only.(optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
91
91
  - `--n_lines` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only. (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
92
92
  - `--n_clusters` (optional, default 8): Number of clusters for Drain to organize log chunks into. This only makes sense when you are summarizing with Drain
93
+ - `--skip_snippets` Path to patterns for skipping snippets.
93
94
 
94
95
  Example usage:
95
96
 
@@ -438,6 +439,28 @@ with spaces, or replacement fields marked with curly braces, `{}` left for inser
438
439
  Number of replacement fields in new prompts, must be the same as in originals.
439
440
  Although their position may be different.
440
441
 
442
+
443
+ Skip Snippets
444
+ -------------
445
+
446
+ Certain log chunks may not contribute to the analysis of the problem under any circumstances.
447
+ User can specify regular expressions, matching such log chunks, along with simple description,
448
+ using Skip Snippets feature.
449
+
450
+ Patterns to be skipped must be defined yaml file as a dictionary, where key is a description
451
+ and value is a regular expression. For example:
452
+
453
+ ```
454
+ child_exit_code_zero: "Child return code was: 0"
455
+ ```
456
+
457
+ Special care must be taken not to write a regular expression which may match
458
+ too many chunks, or which may be evaluated as data structure by the yaml parser.
459
+
460
+ Example of a valid pattern definition file: `logdetective/skip_patterns.yml`,
461
+ can be used as a starting point and is used as a default if no other definition is provided.
462
+
463
+
441
464
  License
442
465
  -------
443
466
 
@@ -46,6 +46,7 @@ To analyze a log file, run the script with the following command line arguments:
46
46
  - `--summarizer` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only.(optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
47
47
  - `--n_lines` DISABLED: LLM summarization option was removed. Argument is kept for backward compatibility only. (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
48
48
  - `--n_clusters` (optional, default 8): Number of clusters for Drain to organize log chunks into. This only makes sense when you are summarizing with Drain
49
+ - `--skip_snippets` Path to patterns for skipping snippets.
49
50
 
50
51
  Example usage:
51
52
 
@@ -394,6 +395,28 @@ with spaces, or replacement fields marked with curly braces, `{}` left for inser
394
395
  Number of replacement fields in new prompts, must be the same as in originals.
395
396
  Although their position may be different.
396
397
 
398
+
399
+ Skip Snippets
400
+ -------------
401
+
402
+ Certain log chunks may not contribute to the analysis of the problem under any circumstances.
403
+ User can specify regular expressions, matching such log chunks, along with simple description,
404
+ using Skip Snippets feature.
405
+
406
+ Patterns to be skipped must be defined yaml file as a dictionary, where key is a description
407
+ and value is a regular expression. For example:
408
+
409
+ ```
410
+ child_exit_code_zero: "Child return code was: 0"
411
+ ```
412
+
413
+ Special care must be taken not to write a regular expression which may match
414
+ too many chunks, or which may be evaluated as data structure by the yaml parser.
415
+
416
+ Example of a valid pattern definition file: `logdetective/skip_patterns.yml`,
417
+ can be used as a starting point and is used as a default if no other definition is provided.
418
+
419
+
397
420
  License
398
421
  -------
399
422
 
@@ -5,7 +5,8 @@ from typing import Tuple
5
5
  import drain3
6
6
  from drain3.template_miner_config import TemplateMinerConfig
7
7
 
8
- from logdetective.utils import get_chunks
8
+ from logdetective.utils import get_chunks, filter_snippet_patterns
9
+ from logdetective.models import SkipSnippets
9
10
 
10
11
  LOG = logging.getLogger("logdetective")
11
12
 
@@ -13,7 +14,13 @@ LOG = logging.getLogger("logdetective")
13
14
  class DrainExtractor:
14
15
  """A class that extracts information from logs using a template miner algorithm."""
15
16
 
16
- def __init__(self, verbose: bool = False, context: bool = False, max_clusters=8):
17
+ def __init__(
18
+ self,
19
+ verbose: bool = False,
20
+ context: bool = False,
21
+ max_clusters=8,
22
+ skip_snippets: SkipSnippets = SkipSnippets({}),
23
+ ):
17
24
  config = TemplateMinerConfig()
18
25
  config.load(f"{os.path.dirname(__file__)}/drain3.ini")
19
26
  config.profiling_enabled = verbose
@@ -21,22 +28,28 @@ class DrainExtractor:
21
28
  self.miner = drain3.TemplateMiner(config=config)
22
29
  self.verbose = verbose
23
30
  self.context = context
31
+ self.skip_snippets = skip_snippets
24
32
 
25
33
  def __call__(self, log: str) -> list[Tuple[int, str]]:
26
34
  out = []
35
+ # Create chunks
36
+ chunks = list(get_chunks(log))
37
+ # Keep only chunks that don't match any of the excluded patterns
38
+ chunks = [
39
+ (_, chunk)
40
+ for _, chunk in chunks
41
+ if not filter_snippet_patterns(chunk, self.skip_snippets)
42
+ ]
27
43
  # First pass create clusters
28
- for _, chunk in get_chunks(log):
44
+ for _, chunk in chunks:
29
45
  processed_chunk = self.miner.add_log_message(chunk)
30
46
  LOG.debug(processed_chunk)
31
- # Sort found clusters by size, descending order
32
- sorted_clusters = sorted(
33
- self.miner.drain.clusters, key=lambda it: it.size, reverse=True
34
- )
47
+ clusters = list(self.miner.drain.clusters)
35
48
  # Second pass, only matching lines with clusters,
36
49
  # to recover original text
37
- for chunk_start, chunk in get_chunks(log):
50
+ for chunk_start, chunk in chunks:
38
51
  cluster = self.miner.match(chunk, "always")
39
- if cluster in sorted_clusters:
52
+ if cluster in clusters:
40
53
  out.append((chunk_start, chunk))
41
- sorted_clusters.remove(cluster)
54
+ clusters.remove(cluster)
42
55
  return out
@@ -14,6 +14,7 @@ from logdetective.utils import (
14
14
  format_snippets,
15
15
  compute_certainty,
16
16
  load_prompts,
17
+ load_skip_snippet_patterns,
17
18
  )
18
19
  from logdetective.extractors import DrainExtractor
19
20
 
@@ -82,6 +83,12 @@ def setup_args():
82
83
  default=DEFAULT_TEMPERATURE,
83
84
  help="Temperature for inference.",
84
85
  )
86
+ parser.add_argument(
87
+ "--skip_snippets",
88
+ type=str,
89
+ default=f"{os.path.dirname(__file__)}/skip_snippets.yml",
90
+ help="Path to patterns for skipping snippets.",
91
+ )
85
92
  return parser.parse_args()
86
93
 
87
94
 
@@ -120,9 +127,18 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals
120
127
  LOG.error("You likely do not have enough memory to load the AI model")
121
128
  sys.exit(3)
122
129
 
130
+ try:
131
+ skip_snippets = load_skip_snippet_patterns(args.skip_snippets)
132
+ except OSError as e:
133
+ LOG.error(e)
134
+ sys.exit(5)
135
+
123
136
  # Log file summarizer initialization
124
137
  extractor = DrainExtractor(
125
- args.verbose > 1, context=True, max_clusters=args.n_clusters
138
+ args.verbose > 1,
139
+ context=True,
140
+ max_clusters=args.n_clusters,
141
+ skip_snippets=skip_snippets,
126
142
  )
127
143
 
128
144
  LOG.info("Getting summary")
@@ -1,5 +1,6 @@
1
+ import re
1
2
  from typing import Optional
2
- from pydantic import BaseModel
3
+ from pydantic import BaseModel, model_validator
3
4
 
4
5
  from logdetective.constants import (
5
6
  PROMPT_TEMPLATE,
@@ -40,3 +41,33 @@ class PromptConfig(BaseModel):
40
41
  self.staged_system_prompt = data.get(
41
42
  "staged_system_prompt", DEFAULT_SYSTEM_PROMPT
42
43
  )
44
+
45
+
46
+ class SkipSnippets(BaseModel):
47
+ """Regular expressions defining snippets we should not analyze"""
48
+
49
+ snippet_patterns: dict[str, re.Pattern] = {}
50
+
51
+ def __init__(self, data: Optional[dict] = None):
52
+ super().__init__(data=data)
53
+ if data is None:
54
+ return
55
+ self.snippet_patterns = {
56
+ key: re.compile(pattern) for key, pattern in data.items()
57
+ }
58
+
59
+ @model_validator(mode="before")
60
+ @classmethod
61
+ def check_patterns(cls, data: dict):
62
+ """Check if all supplied patterns are valid regular expressions.
63
+ Techically replicating what is done in __init__ but with nicer error message."""
64
+ patterns = data["data"]
65
+ for key, pattern in patterns.items():
66
+ try:
67
+ re.compile(pattern=pattern)
68
+ except (TypeError, re.error) as ex:
69
+ raise ValueError(
70
+ f"Invalid pattern `{pattern}` with name `{key}` supplied for skipping in logs."
71
+ ) from ex
72
+
73
+ return data
@@ -53,7 +53,7 @@ class RemoteLog:
53
53
  LOG.debug("process url %s", self.url)
54
54
  try:
55
55
  response = await self._http_session.get(self.url, raise_for_status=True)
56
- except aiohttp.ClientResponseError as ex:
56
+ except (aiohttp.ClientResponseError, aiohttp.ClientConnectorError) as ex:
57
57
  raise RuntimeError(f"We couldn't obtain the logs: {ex}") from ex
58
58
  return await response.text()
59
59
  LOG.error("Invalid URL received ")
@@ -3,8 +3,9 @@ import logging
3
3
  import yaml
4
4
  from openai import AsyncOpenAI
5
5
 
6
- from logdetective.utils import load_prompts
6
+ from logdetective.utils import load_prompts, load_skip_snippet_patterns
7
7
  from logdetective.server.models import Config, InferenceConfig
8
+ import logdetective
8
9
 
9
10
 
10
11
  def load_server_config(path: str | None) -> Config:
@@ -60,9 +61,16 @@ def get_openai_api_client(ineference_config: InferenceConfig):
60
61
 
61
62
  SERVER_CONFIG_PATH = os.environ.get("LOGDETECTIVE_SERVER_CONF", None)
62
63
  SERVER_PROMPT_PATH = os.environ.get("LOGDETECTIVE_PROMPTS", None)
64
+ # The default location for skip patterns is in the same directory
65
+ # as logdetective __init__.py file.
66
+ SERVER_SKIP_PATTERNS_PATH = os.environ.get(
67
+ "LOGDETECIVE_SKIP_PATTERNS",
68
+ f"{os.path.dirname(logdetective.__file__)}/skip_snippets.yml",
69
+ )
63
70
 
64
71
  SERVER_CONFIG = load_server_config(SERVER_CONFIG_PATH)
65
72
  PROMPT_CONFIG = load_prompts(SERVER_PROMPT_PATH)
73
+ SKIP_SNIPPETS_CONFIG = load_skip_snippet_patterns(SERVER_SKIP_PATTERNS_PATH)
66
74
 
67
75
  LOG = get_log(SERVER_CONFIG)
68
76
 
@@ -16,7 +16,13 @@ from logdetective.utils import (
16
16
  compute_certainty,
17
17
  prompt_to_messages,
18
18
  )
19
- from logdetective.server.config import LOG, SERVER_CONFIG, PROMPT_CONFIG, CLIENT
19
+ from logdetective.server.config import (
20
+ LOG,
21
+ SERVER_CONFIG,
22
+ PROMPT_CONFIG,
23
+ CLIENT,
24
+ SKIP_SNIPPETS_CONFIG,
25
+ )
20
26
  from logdetective.server.models import (
21
27
  AnalyzedSnippet,
22
28
  InferenceConfig,
@@ -42,7 +48,10 @@ def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
42
48
  def mine_logs(log: str) -> List[Tuple[int, str]]:
43
49
  """Extract snippets from log text"""
44
50
  extractor = DrainExtractor(
45
- verbose=True, context=True, max_clusters=SERVER_CONFIG.extractor.max_clusters
51
+ verbose=True,
52
+ context=True,
53
+ max_clusters=SERVER_CONFIG.extractor.max_clusters,
54
+ skip_snippets=SKIP_SNIPPETS_CONFIG,
46
55
  )
47
56
 
48
57
  LOG.info("Getting summary")
@@ -2,12 +2,10 @@ import datetime
2
2
  from typing import Optional, Union, Dict
3
3
 
4
4
  import numpy
5
- import matplotlib
6
- import matplotlib.figure
7
- import matplotlib.pyplot
5
+ from numpy.typing import ArrayLike
6
+ from matplotlib import dates, colormaps, axes, pyplot, figure
8
7
 
9
- from matplotlib.pyplot import cm
10
- from logdetective.server import models
8
+ from logdetective.server.models import TimePeriod
11
9
  from logdetective.server.database.models import (
12
10
  AnalyzeRequestMetrics,
13
11
  EndpointType,
@@ -18,25 +16,25 @@ from logdetective.server.database.models import (
18
16
  class Definition:
19
17
  """Define plot details, given a time period."""
20
18
 
21
- def __init__(self, time_period: models.TimePeriod):
19
+ def __init__(self, time_period: TimePeriod):
22
20
  self.time_period = time_period
23
21
  self.days_diff = time_period.get_time_period().days
24
22
  if self.time_period.hours:
25
23
  self._freq = "H"
26
24
  self._time_format = "%Y-%m-%d %H"
27
- self._locator = matplotlib.dates.HourLocator(interval=2)
25
+ self._locator = dates.HourLocator(interval=2)
28
26
  self._time_unit = "hour"
29
27
  self._time_delta = datetime.timedelta(hours=1)
30
28
  elif self.time_period.days:
31
29
  self._freq = "D"
32
30
  self._time_format = "%Y-%m-%d"
33
- self._locator = matplotlib.dates.DayLocator(interval=1)
31
+ self._locator = dates.DayLocator(interval=1)
34
32
  self._time_unit = "day"
35
33
  self._time_delta = datetime.timedelta(days=1)
36
34
  elif self.time_period.weeks:
37
35
  self._freq = "W"
38
36
  self._time_format = "%Y-%m-%d"
39
- self._locator = matplotlib.dates.WeekdayLocator(interval=1)
37
+ self._locator = dates.WeekdayLocator(interval=1)
40
38
  self._time_unit = "week"
41
39
  self._time_delta = datetime.timedelta(weeks=1)
42
40
 
@@ -120,10 +118,10 @@ def create_time_series_arrays(
120
118
 
121
119
 
122
120
  def _add_bar_chart(
123
- ax: matplotlib.figure.Axes,
121
+ ax: axes.Axes,
124
122
  plot_def: Definition,
125
- timestamps: numpy.array,
126
- values: numpy.array,
123
+ timestamps: ArrayLike,
124
+ values: ArrayLike,
127
125
  label: str,
128
126
  ) -> None:
129
127
  """Add a blue bar chart"""
@@ -142,18 +140,18 @@ def _add_bar_chart(
142
140
  ax.set_ylabel(label, color="blue")
143
141
  ax.tick_params(axis="y", labelcolor="blue")
144
142
 
145
- ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter(plot_def.time_format))
143
+ ax.xaxis.set_major_formatter(dates.DateFormatter(plot_def.time_format))
146
144
  ax.xaxis.set_major_locator(plot_def.locator)
147
145
 
148
- matplotlib.pyplot.xticks(rotation=45)
146
+ pyplot.xticks(rotation=45)
149
147
 
150
148
  ax.grid(True, alpha=0.3)
151
149
 
152
150
 
153
151
  def _add_line_chart( # pylint: disable=too-many-arguments disable=too-many-positional-arguments
154
- ax: matplotlib.figure.Axes,
155
- timestamps: numpy.array,
156
- values: numpy.array,
152
+ ax: axes.Axes,
153
+ timestamps: ArrayLike,
154
+ values: ArrayLike,
157
155
  label: str,
158
156
  color: str = "red",
159
157
  set_label: bool = True,
@@ -166,10 +164,10 @@ def _add_line_chart( # pylint: disable=too-many-arguments disable=too-many-posi
166
164
 
167
165
 
168
166
  def requests_per_time(
169
- period_of_time: models.TimePeriod,
167
+ period_of_time: TimePeriod,
170
168
  endpoint: EndpointType = EndpointType.ANALYZE,
171
169
  end_time: Optional[datetime.datetime] = None,
172
- ) -> matplotlib.figure.Figure:
170
+ ) -> figure.Figure:
173
171
  """
174
172
  Generate a visualization of request counts over a specified time period.
175
173
 
@@ -200,13 +198,13 @@ def requests_per_time(
200
198
  requests_counts, plot_def, start_time, end_time
201
199
  )
202
200
 
203
- fig, ax1 = matplotlib.pyplot.subplots(figsize=(12, 6))
201
+ fig, ax1 = pyplot.subplots(figsize=(12, 6))
204
202
  _add_bar_chart(ax1, plot_def, timestamps, counts, "Requests")
205
203
 
206
204
  ax2 = ax1.twinx()
207
205
  _add_line_chart(ax2, timestamps, numpy.cumsum(counts), "Cumulative Requests")
208
206
 
209
- matplotlib.pyplot.title(
207
+ pyplot.title(
210
208
  f"Requests received for API {endpoint} ({start_time.strftime(plot_def.time_format)} "
211
209
  f"to {end_time.strftime(plot_def.time_format)})"
212
210
  )
@@ -215,16 +213,16 @@ def requests_per_time(
215
213
  lines2, labels2 = ax2.get_legend_handles_labels()
216
214
  ax1.legend(lines1 + lines2, labels1 + labels2, loc="center")
217
215
 
218
- matplotlib.pyplot.tight_layout()
216
+ pyplot.tight_layout()
219
217
 
220
218
  return fig
221
219
 
222
220
 
223
221
  def average_time_per_responses( # pylint: disable=too-many-locals
224
- period_of_time: models.TimePeriod,
222
+ period_of_time: TimePeriod,
225
223
  endpoint: EndpointType = EndpointType.ANALYZE,
226
224
  end_time: Optional[datetime.datetime] = None,
227
- ) -> matplotlib.figure.Figure:
225
+ ) -> figure.Figure:
228
226
  """
229
227
  Generate a visualization of average response time and length over a specified time period.
230
228
 
@@ -259,7 +257,7 @@ def average_time_per_responses( # pylint: disable=too-many-locals
259
257
  float,
260
258
  )
261
259
 
262
- fig, ax1 = matplotlib.pyplot.subplots(figsize=(12, 6))
260
+ fig, ax1 = pyplot.subplots(figsize=(12, 6))
263
261
  _add_bar_chart(
264
262
  ax1, plot_def, timestamps, average_time, "average response time (seconds)"
265
263
  )
@@ -280,7 +278,7 @@ def average_time_per_responses( # pylint: disable=too-many-locals
280
278
  ax2 = ax1.twinx()
281
279
  _add_line_chart(ax2, timestamps, average_length, "average response length (chars)")
282
280
 
283
- matplotlib.pyplot.title(
281
+ pyplot.title(
284
282
  f"average response time for API {endpoint} ({start_time.strftime(plot_def.time_format)} "
285
283
  f"to {end_time.strftime(plot_def.time_format)})"
286
284
  )
@@ -289,7 +287,7 @@ def average_time_per_responses( # pylint: disable=too-many-locals
289
287
  lines2, labels2 = ax2.get_legend_handles_labels()
290
288
  ax1.legend(lines1 + lines2, labels1 + labels2, loc="center")
291
289
 
292
- matplotlib.pyplot.tight_layout()
290
+ pyplot.tight_layout()
293
291
 
294
292
  return fig
295
293
 
@@ -322,7 +320,7 @@ def _collect_emoji_data(
322
320
 
323
321
 
324
322
  def _plot_emoji_data( # pylint: disable=too-many-locals
325
- ax: matplotlib.figure.Axes,
323
+ ax: axes.Axes,
326
324
  reactions_values_dict: Dict[str, Dict[datetime.datetime, int]],
327
325
  plot_def: Definition,
328
326
  start_time: datetime.datetime,
@@ -340,7 +338,10 @@ def _plot_emoji_data( # pylint: disable=too-many-locals
340
338
  )
341
339
  all_counts.extend(counts)
342
340
 
343
- colors = [cm.viridis(i) for i in numpy.linspace(0, 1, len(reactions_values_dict))] # pylint: disable=no-member
341
+ colors = [
342
+ colormaps["viridis"](i)
343
+ for i in numpy.linspace(0, 1, len(reactions_values_dict))
344
+ ]
344
345
 
345
346
  first_emoji = True
346
347
  for i, (emoji, dict_counts) in enumerate(reactions_values_dict.items()):
@@ -369,9 +370,9 @@ def _plot_emoji_data( # pylint: disable=too-many-locals
369
370
 
370
371
 
371
372
  def emojis_per_time(
372
- period_of_time: models.TimePeriod,
373
+ period_of_time: TimePeriod,
373
374
  end_time: Optional[datetime.datetime] = None,
374
- ) -> matplotlib.figure.Figure:
375
+ ) -> figure.Figure:
375
376
  """
376
377
  Generate a visualization of overall emoji feedback
377
378
  over a specified time period.
@@ -396,13 +397,13 @@ def emojis_per_time(
396
397
  start_time = period_of_time.get_period_start_time(end_time)
397
398
  reactions_values_dict = _collect_emoji_data(start_time, plot_def)
398
399
 
399
- fig, ax = matplotlib.pyplot.subplots(figsize=(12, 6))
400
+ fig, ax = pyplot.subplots(figsize=(12, 6))
400
401
 
401
402
  emoji_lines, emoji_labels = _plot_emoji_data(
402
403
  ax, reactions_values_dict, plot_def, start_time, end_time
403
404
  )
404
405
 
405
- matplotlib.pyplot.title(
406
+ pyplot.title(
406
407
  f"Emoji feedback ({start_time.strftime(plot_def.time_format)} "
407
408
  f"to {end_time.strftime(plot_def.time_format)})"
408
409
  )
@@ -419,11 +420,11 @@ def emojis_per_time(
419
420
  ax.set_ylabel("Count")
420
421
 
421
422
  # Format x-axis
422
- ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter(plot_def.time_format))
423
+ ax.xaxis.set_major_formatter(dates.DateFormatter(plot_def.time_format))
423
424
  ax.xaxis.set_major_locator(plot_def.locator)
424
425
  ax.tick_params(axis="x", labelrotation=45)
425
426
  ax.grid(True, alpha=0.3)
426
427
 
427
- matplotlib.pyplot.tight_layout()
428
+ pyplot.tight_layout()
428
429
 
429
430
  return fig
@@ -0,0 +1,12 @@
1
+ # This file holds patterns you want to skip during log parsing.
2
+ # By default, no patterns are supplied.
3
+ # Patterns are to be specified as values of dictionary,
4
+ # with each key being a descriptive name of the pattern.
5
+ # Patterns themselves are evaluated as a regular expression.
6
+ # Make sure to avoid regular expressions that may be interpreted
7
+ # as yaml syntax.
8
+ # Example:
9
+
10
+ # contains_capital_a: "^.*A.*"
11
+ # starts_with_numeric: "^[0-9].*"
12
+ child_exit_code_zero: "Child return code was: 0"
@@ -8,7 +8,7 @@ import numpy as np
8
8
  import yaml
9
9
 
10
10
  from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
11
- from logdetective.models import PromptConfig
11
+ from logdetective.models import PromptConfig, SkipSnippets
12
12
  from logdetective.remote_log import RemoteLog
13
13
 
14
14
 
@@ -223,3 +223,27 @@ def prompt_to_messages(
223
223
  ]
224
224
 
225
225
  return messages
226
+
227
+
228
+ def filter_snippet_patterns(snippet: str, skip_snippets: SkipSnippets) -> bool:
229
+ """Try to match snippet agains provided patterns to determine if we should
230
+ filter it out or not."""
231
+ for key, pattern in skip_snippets.snippet_patterns.items():
232
+ if pattern.match(snippet):
233
+ LOG.debug("Snippet `%s` has matched agains skip pattern %s", snippet, key)
234
+ return True
235
+
236
+ return False
237
+
238
+
239
+ def load_skip_snippet_patterns(path: str | None) -> SkipSnippets:
240
+ """Load dictionary of snippet patterns we want to skip."""
241
+ if path:
242
+ try:
243
+ with open(path, "r") as file:
244
+ return SkipSnippets(yaml.safe_load(file))
245
+ except OSError as e:
246
+ LOG.error("Couldn't open file with snippet skip patterns `%s`", path)
247
+ raise e
248
+
249
+ return SkipSnippets({})
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "logdetective"
3
- version = "1.5.0"
3
+ version = "1.6.0"
4
4
  description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
5
5
  authors = ["Jiri Podivin <jpodivin@gmail.com>"]
6
6
  license = "Apache-2.0"
File without changes