logdetective 0.4.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. logdetective/constants.py +33 -12
  2. logdetective/extractors.py +137 -68
  3. logdetective/logdetective.py +102 -33
  4. logdetective/models.py +99 -0
  5. logdetective/prompts-summary-first.yml +20 -0
  6. logdetective/prompts-summary-only.yml +13 -0
  7. logdetective/prompts.yml +90 -0
  8. logdetective/remote_log.py +67 -0
  9. logdetective/server/compressors.py +186 -0
  10. logdetective/server/config.py +78 -0
  11. logdetective/server/database/base.py +34 -26
  12. logdetective/server/database/models/__init__.py +33 -0
  13. logdetective/server/database/models/exceptions.py +17 -0
  14. logdetective/server/database/models/koji.py +143 -0
  15. logdetective/server/database/models/merge_request_jobs.py +623 -0
  16. logdetective/server/database/models/metrics.py +427 -0
  17. logdetective/server/emoji.py +148 -0
  18. logdetective/server/exceptions.py +37 -0
  19. logdetective/server/gitlab.py +451 -0
  20. logdetective/server/koji.py +159 -0
  21. logdetective/server/llm.py +309 -0
  22. logdetective/server/metric.py +75 -30
  23. logdetective/server/models.py +426 -23
  24. logdetective/server/plot.py +432 -0
  25. logdetective/server/server.py +580 -468
  26. logdetective/server/templates/base_response.html.j2 +59 -0
  27. logdetective/server/templates/gitlab_full_comment.md.j2 +73 -0
  28. logdetective/server/templates/gitlab_short_comment.md.j2 +62 -0
  29. logdetective/server/utils.py +98 -32
  30. logdetective/skip_snippets.yml +12 -0
  31. logdetective/utils.py +187 -73
  32. logdetective-2.11.0.dist-info/METADATA +568 -0
  33. logdetective-2.11.0.dist-info/RECORD +40 -0
  34. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
  35. logdetective/server/database/models.py +0 -88
  36. logdetective-0.4.0.dist-info/METADATA +0 -333
  37. logdetective-0.4.0.dist-info/RECORD +0 -19
  38. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
  39. {logdetective-0.4.0.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
logdetective/constants.py CHANGED
@@ -1,5 +1,10 @@
1
+ """This file contains various constants to be used as a fallback
2
+ in case other values are not specified. Prompt templates should be modified
3
+ in prompts.yaml instead.
4
+ """
5
+
1
6
  # pylint: disable=line-too-long
2
- DEFAULT_ADVISOR = "fedora-copr/Mistral-7B-Instruct-v0.2-GGUF"
7
+ DEFAULT_ADVISOR = "fedora-copr/Mistral-7B-Instruct-v0.3-GGUF"
3
8
 
4
9
  PROMPT_TEMPLATE = """
5
10
  Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
@@ -11,6 +16,8 @@ Snippets are delimited with '================'.
11
16
 
12
17
  Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
13
18
 
19
+ Explanation of the issue, and recommended solution, should take handful of sentences.
20
+
14
21
  Snippets:
15
22
 
16
23
  {}
@@ -19,20 +26,11 @@ Analysis:
19
26
 
20
27
  """
21
28
 
22
- SUMMARIZE_PROMPT_TEMPLATE = """
23
- Does following log contain error or issue?
24
-
25
- Log:
26
-
27
- {}
28
-
29
- Answer:
30
-
31
- """
32
-
33
29
  SNIPPET_PROMPT_TEMPLATE = """
34
30
  Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
35
31
 
32
+ Your analysis must be as concise as possible, while keeping relevant information intact.
33
+
36
34
  Snippet:
37
35
 
38
36
  {}
@@ -50,6 +48,8 @@ Snippets are delimited with '================'.
50
48
 
51
49
  Drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
52
50
 
51
+ Explanation of the issue, and recommended solution, should take handful of sentences.
52
+
53
53
  Snippets:
54
54
 
55
55
  {}
@@ -58,4 +58,25 @@ Analysis:
58
58
 
59
59
  """
60
60
 
61
+ DEFAULT_SYSTEM_PROMPT = """
62
+ You are a highly capable large language model based expert system specialized in
63
+ packaging and delivery of software using RPM (RPM Package Manager). Your purpose is to diagnose
64
+ RPM build failures, identifying root causes and proposing solutions if possible.
65
+ You are truthful, concise, and helpful.
66
+
67
+ You never speculate about package being built or fabricate information.
68
+ If you do not know the answer, you acknowledge the fact and end your response.
69
+ Your responses must be as short as possible.
70
+ """
71
+
61
72
  SNIPPET_DELIMITER = "================"
73
+
74
+ DEFAULT_TEMPERATURE = 0.8
75
+
76
+ # Tuning for LLM-as-a-Service
77
+ LLM_DEFAULT_MAX_QUEUE_SIZE = 50
78
+ LLM_DEFAULT_REQUESTS_PER_MINUTE = 60
79
+
80
+ # Roles for chat API
81
+ SYSTEM_ROLE_DEFAULT = "developer"
82
+ USER_ROLE_DEFAULT = "user"
@@ -1,99 +1,168 @@
1
1
  import os
2
2
  import logging
3
+ import subprocess as sp
3
4
  from typing import Tuple
4
5
 
5
6
  import drain3
6
7
  from drain3.template_miner_config import TemplateMinerConfig
7
- from llama_cpp import Llama, LlamaGrammar
8
+ from pydantic import ValidationError
8
9
 
9
- from logdetective.constants import SUMMARIZE_PROMPT_TEMPLATE
10
- from logdetective.utils import get_chunks
10
+ from logdetective.utils import get_chunks, filter_snippet_patterns
11
+ from logdetective.models import SkipSnippets, CSGrepOutput
11
12
 
12
13
  LOG = logging.getLogger("logdetective")
13
14
 
14
15
 
15
- class LLMExtractor:
16
- """
17
- A class that extracts relevant information from logs using a language model.
18
- """
16
+ class Extractor:
17
+ """Base extractor class."""
19
18
 
20
- def __init__(self, model: Llama, n_lines: int = 2):
21
- self.model = model
22
- self.n_lines = n_lines
23
- self.grammar = LlamaGrammar.from_string(
24
- 'root ::= ("Yes" | "No")', verbose=False
25
- )
26
-
27
- def __call__(
28
- self, log: str, n_lines: int = 2, neighbors: bool = False
29
- ) -> list[str]:
30
- chunks = self.rate_chunks(log)
31
- out = self.create_extract(chunks, neighbors)
32
- return out
33
-
34
- def rate_chunks(self, log: str) -> list[tuple]:
35
- """Scan log by the model and store results.
36
-
37
- :param log: log file content
38
- """
39
- results = []
40
- log_lines = log.split("\n")
41
-
42
- for i in range(0, len(log_lines), self.n_lines):
43
- block = "\n".join(log_lines[i: i + self.n_lines])
44
- prompt = SUMMARIZE_PROMPT_TEMPLATE.format(log)
45
- out = self.model(prompt, max_tokens=7, grammar=self.grammar)
46
- out = f"{out['choices'][0]['text']}\n"
47
- results.append((block, out))
48
-
49
- return results
50
-
51
- def create_extract(self, chunks: list[tuple], neighbors: bool = False) -> list[str]:
52
- """Extract interesting chunks from the model processing."""
53
- interesting = []
54
- summary = []
55
- # pylint: disable=consider-using-enumerate
56
- for i in range(len(chunks)):
57
- if chunks[i][1].startswith("Yes"):
58
- interesting.append(i)
59
- if neighbors:
60
- interesting.extend([max(i - 1, 0), min(i + 1, len(chunks) - 1)])
19
+ def __init__(
20
+ self,
21
+ verbose: bool = False,
22
+ skip_snippets: SkipSnippets = SkipSnippets({}),
23
+ max_snippet_len: int = 2000,
24
+ ):
25
+ self.verbose = verbose
26
+ self.skip_snippets = skip_snippets
27
+ self.max_snippet_len = max_snippet_len
61
28
 
62
- interesting = set(interesting)
29
+ if self.verbose:
30
+ LOG.setLevel(logging.DEBUG)
63
31
 
64
- for i in interesting:
65
- summary.append(chunks[i][0])
32
+ def __call__(self, log: str) -> list[Tuple[int, str]]:
33
+ raise NotImplementedError
66
34
 
67
- return summary
35
+ def filter_snippet_patterns(
36
+ self, chunks: list[tuple[int, str]]
37
+ ) -> list[tuple[int, str]]:
38
+ """Keep only chunks that don't match any of the excluded patterns"""
39
+ chunks = [
40
+ (_, chunk)
41
+ for _, chunk in chunks
42
+ if not filter_snippet_patterns(chunk, self.skip_snippets)
43
+ ]
44
+ return chunks
68
45
 
69
46
 
70
- class DrainExtractor:
47
+ class DrainExtractor(Extractor):
71
48
  """A class that extracts information from logs using a template miner algorithm."""
72
49
 
73
- def __init__(self, verbose: bool = False, context: bool = False, max_clusters=8):
50
+ _clusters: list
51
+
52
+ def __init__(
53
+ self,
54
+ verbose: bool = False,
55
+ skip_snippets: SkipSnippets = SkipSnippets({}),
56
+ max_snippet_len: int = 2000,
57
+ max_clusters: int = 8,
58
+ ):
59
+ super().__init__(verbose, skip_snippets, max_snippet_len)
74
60
  config = TemplateMinerConfig()
75
61
  config.load(f"{os.path.dirname(__file__)}/drain3.ini")
76
62
  config.profiling_enabled = verbose
77
63
  config.drain_max_clusters = max_clusters
78
64
  self.miner = drain3.TemplateMiner(config=config)
79
- self.verbose = verbose
80
- self.context = context
81
65
 
82
66
  def __call__(self, log: str) -> list[Tuple[int, str]]:
83
- out = []
84
- # First pass create clusters
85
- for _, chunk in get_chunks(log):
86
- processed_chunk = self.miner.add_log_message(chunk)
87
- LOG.debug(processed_chunk)
88
- # Sort found clusters by size, descending order
89
- sorted_clusters = sorted(
90
- self.miner.drain.clusters, key=lambda it: it.size, reverse=True
91
- )
67
+ # Create chunks
68
+ chunks = list(get_chunks(log, self.max_snippet_len))
69
+
70
+ chunks = self.filter_snippet_patterns(chunks)
71
+
72
+ # First pass to create clusters
73
+ self._create_clusters(chunks=chunks)
74
+
92
75
  # Second pass, only matching lines with clusters,
93
76
  # to recover original text
94
- for chunk_start, chunk in get_chunks(log):
77
+ snippets = self._extract_messages(chunks=chunks)
78
+ return snippets
79
+
80
+ def _create_clusters(self, chunks: list[tuple[int, str]]):
81
+ """First pass to create clusters"""
82
+ for _, chunk in chunks:
83
+ processed_chunk = self.miner.add_log_message(chunk)
84
+ LOG.debug(processed_chunk)
85
+ self._clusters = list(self.miner.drain.clusters)
86
+
87
+ def _extract_messages(self, chunks: list[tuple[int, str]]) -> list[tuple[int, str]]:
88
+ """Second pass with drain using patterns from the first,
89
+ to extract matching lines and their numbers."""
90
+ out = []
91
+
92
+ for chunk_start, chunk in chunks:
95
93
  cluster = self.miner.match(chunk, "always")
96
- if cluster in sorted_clusters:
94
+ if cluster in self._clusters:
97
95
  out.append((chunk_start, chunk))
98
- sorted_clusters.remove(cluster)
96
+ self._clusters.remove(cluster)
99
97
  return out
98
+
99
+
100
+ class CSGrepExtractor(DrainExtractor):
101
+ """Extract messages using csgrep
102
+ This extractor is only effective at retrieving messages from GCC
103
+ compiler and associated utilities, it is not capable of safely
104
+ extracting other messages from the logs. Therefore, it must only
105
+ be used together with the Drain based extractor."""
106
+
107
+ def __init__(
108
+ self,
109
+ verbose: bool = False,
110
+ skip_snippets: SkipSnippets = SkipSnippets({}),
111
+ max_snippet_len: int = 2000,
112
+ max_clusters: int = 8,
113
+ ):
114
+ super().__init__(verbose, skip_snippets, max_snippet_len, max_clusters)
115
+
116
+ def __call__(self, log: str) -> list[Tuple[int, str]]:
117
+ """Extract error messages from log using csgrep"""
118
+ chunks = []
119
+ try:
120
+ # We are not running binary in check mode, since csgrep
121
+ # can produce many errors due to log file syntax
122
+ result = sp.run(
123
+ [
124
+ "csgrep",
125
+ "--event=error",
126
+ "--remove-duplicates",
127
+ "--mode=json",
128
+ "--quiet",
129
+ ],
130
+ input=log,
131
+ shell=False,
132
+ check=False,
133
+ capture_output=True,
134
+ text=True,
135
+ timeout=1.0,
136
+ )
137
+ except sp.TimeoutExpired as ex:
138
+ LOG.exception("Exception encountered while parsing log with csgrep %s", ex)
139
+ raise ex
140
+ if result.returncode != 0:
141
+ # This can happen even if `csgrep` managed to extract useful info.
142
+ # Most commonly, when it encountered unexpected syntax in the log.
143
+ LOG.warning("csgrep call resulted in an error")
144
+ LOG.debug("csgrep error: `%s`", result.stderr)
145
+ if not result.stdout:
146
+ return []
147
+
148
+ # Parse JSON output from csgrep
149
+ try:
150
+ report = CSGrepOutput.model_validate_json(result.stdout)
151
+ except ValidationError as ex:
152
+ LOG.exception("Exception encountered while parsing csgrpe output %s", ex)
153
+ raise ex
154
+ for defect in report.defects:
155
+ # Single original error message can be split across multiple events
156
+ # before returning, we will turn them back into single string.
157
+ # We must also extract the original line number.
158
+ # Line number is NOT location of message in the log, but location of
159
+ # the issue in source, we can't really mix the two, so we'll set it to `0`.
160
+
161
+ chunks.append((0, "\n".join([event.message for event in defect.events])))
162
+
163
+ chunks = self.filter_snippet_patterns(chunks)
164
+ LOG.info("Total %d messages extracted with csgrep", len(chunks))
165
+ self._create_clusters(chunks=chunks)
166
+ snippets = self._extract_messages(chunks=chunks)
167
+
168
+ return snippets
@@ -1,16 +1,24 @@
1
1
  import argparse
2
+ import asyncio
2
3
  import logging
3
4
  import sys
5
+ import os
4
6
 
5
- from logdetective.constants import DEFAULT_ADVISOR
7
+ import aiohttp
8
+
9
+ from logdetective.constants import DEFAULT_ADVISOR, DEFAULT_TEMPERATURE
6
10
  from logdetective.utils import (
7
11
  process_log,
8
12
  initialize_model,
9
13
  retrieve_log_content,
10
14
  format_snippets,
11
15
  compute_certainty,
16
+ load_prompts,
17
+ load_skip_snippet_patterns,
18
+ check_csgrep,
19
+ mine_logs,
12
20
  )
13
- from logdetective.extractors import LLMExtractor, DrainExtractor
21
+ from logdetective.extractors import DrainExtractor, CSGrepExtractor
14
22
 
15
23
  LOG = logging.getLogger("logdetective")
16
24
 
@@ -36,7 +44,7 @@ def setup_args():
36
44
  "--filename_suffix",
37
45
  help="Suffix of the model file name to be retrieved from Hugging Face.\
38
46
  Makes sense only if the model is specified with Hugging Face name.",
39
- default="Q4_K_S.gguf",
47
+ default="Q4_K.gguf",
40
48
  )
41
49
  parser.add_argument("-n", "--no-stream", action="store_true")
42
50
  parser.add_argument(
@@ -44,16 +52,16 @@ def setup_args():
44
52
  "--summarizer",
45
53
  type=str,
46
54
  default="drain",
47
- help="Choose between LLM and Drain template miner as the log summarizer.\
48
- LLM must be specified as path to a model, URL or local file.",
55
+ help="DISABLED: LLM summarization option was removed. \
56
+ Argument is kept for backward compatibility only.",
49
57
  )
50
58
  parser.add_argument(
51
59
  "-N",
52
60
  "--n_lines",
53
61
  type=int,
54
- default=8,
55
- help="The number of lines per chunk for LLM analysis.\
56
- This only makes sense when you are summarizing with LLM.",
62
+ default=None,
63
+ help="DISABLED: LLM summarization option was removed. \
64
+ Argument is kept for backward compatibility only.",
57
65
  )
58
66
  parser.add_argument(
59
67
  "-C",
@@ -65,10 +73,31 @@ def setup_args():
65
73
  )
66
74
  parser.add_argument("-v", "--verbose", action="count", default=0)
67
75
  parser.add_argument("-q", "--quiet", action="store_true")
76
+ parser.add_argument(
77
+ "--prompts",
78
+ type=str,
79
+ default=f"{os.path.dirname(__file__)}/prompts.yml",
80
+ help="Path to prompt configuration file.",
81
+ )
82
+ parser.add_argument(
83
+ "--temperature",
84
+ type=float,
85
+ default=DEFAULT_TEMPERATURE,
86
+ help="Temperature for inference.",
87
+ )
88
+ parser.add_argument(
89
+ "--skip_snippets",
90
+ type=str,
91
+ default=f"{os.path.dirname(__file__)}/skip_snippets.yml",
92
+ help="Path to patterns for skipping snippets.",
93
+ )
94
+ parser.add_argument(
95
+ "--csgrep", action="store_true", help="Use csgrep to process the log."
96
+ )
68
97
  return parser.parse_args()
69
98
 
70
99
 
71
- def main(): # pylint: disable=too-many-statements
100
+ async def run(): # pylint: disable=too-many-statements,too-many-locals,too-many-branches
72
101
  """Main execution function."""
73
102
  args = setup_args()
74
103
 
@@ -76,6 +105,10 @@ def main(): # pylint: disable=too-many-statements
76
105
  sys.stderr.write("Error: --quiet and --verbose is mutually exclusive.\n")
77
106
  sys.exit(2)
78
107
 
108
+ # Emit warning about use of discontinued args
109
+ if args.n_lines or args.summarizer != "drain":
110
+ LOG.warning("LLM based summarization was removed. Drain will be used instead.")
111
+
79
112
  # Logging facility setup
80
113
  log_level = logging.INFO
81
114
  if args.verbose >= 1:
@@ -83,6 +116,9 @@ def main(): # pylint: disable=too-many-statements
83
116
  if args.quiet:
84
117
  log_level = 0
85
118
 
119
+ # Get prompts configuration
120
+ prompts_configuration = load_prompts(args.prompts)
121
+
86
122
  logging.basicConfig(stream=sys.stdout)
87
123
  LOG.setLevel(log_level)
88
124
 
@@ -96,29 +132,43 @@ def main(): # pylint: disable=too-many-statements
96
132
  LOG.error("You likely do not have enough memory to load the AI model")
97
133
  sys.exit(3)
98
134
 
99
- # Log file summarizer selection and initialization
100
- if args.summarizer == "drain":
101
- extractor = DrainExtractor(
102
- args.verbose > 1, context=True, max_clusters=args.n_clusters
103
- )
104
- else:
105
- summarizer_model = initialize_model(args.summarizer, verbose=args.verbose > 2)
106
- extractor = LLMExtractor(summarizer_model, args.verbose > 1)
107
-
108
- LOG.info("Getting summary")
109
-
110
135
  try:
111
- log = retrieve_log_content(args.file)
112
- except ValueError as e:
113
- # file does not exists
136
+ skip_snippets = load_skip_snippet_patterns(args.skip_snippets)
137
+ except OSError as e:
114
138
  LOG.error(e)
115
- sys.exit(4)
116
- log_summary = extractor(log)
139
+ sys.exit(5)
140
+
141
+ # Log file summarizer initialization
142
+ extractors = []
143
+ extractors.append(
144
+ DrainExtractor(
145
+ args.verbose > 1,
146
+ max_clusters=args.n_clusters,
147
+ skip_snippets=skip_snippets,
148
+ )
149
+ )
117
150
 
118
- ratio = len(log_summary) / len(log.split("\n"))
151
+ if args.csgrep:
152
+ if not check_csgrep():
153
+ LOG.error(
154
+ "You have requested use of `csgrep` when it isn't available on your system."
155
+ )
156
+ sys.exit(6)
157
+ extractors.append(
158
+ CSGrepExtractor(args.verbose > 1, skip_snippets=skip_snippets)
159
+ )
160
+
161
+ LOG.info("Getting summary")
119
162
 
120
- LOG.info("Compression ratio: %s", ratio)
163
+ async with aiohttp.ClientSession() as http:
164
+ try:
165
+ log = await retrieve_log_content(http, args.file)
166
+ except ValueError as e:
167
+ # file does not exist
168
+ LOG.error(e)
169
+ sys.exit(4)
121
170
 
171
+ log_summary = mine_logs(log=log, extractors=extractors)
122
172
  LOG.info("Analyzing the text")
123
173
 
124
174
  log_summary = format_snippets(log_summary)
@@ -127,33 +177,52 @@ def main(): # pylint: disable=too-many-statements
127
177
  stream = True
128
178
  if args.no_stream:
129
179
  stream = False
130
- response = process_log(log_summary, model, stream)
180
+ response = process_log(
181
+ log_summary,
182
+ model,
183
+ stream,
184
+ prompt_templates=prompts_configuration,
185
+ temperature=args.temperature,
186
+ )
131
187
  probs = []
132
188
  print("Explanation:")
133
189
  # We need to extract top token probability from the response
134
- # CreateCompletionResponse structure of llama-cpp-python.
190
+ # CreateChatCompletionResponse structure of llama-cpp-python.
135
191
  # `compute_certainty` function expects list of dictionaries with form
136
192
  # { 'logprob': <float> } as expected from the OpenAI API.
137
193
 
138
194
  if args.no_stream:
139
- print(response["choices"][0]["text"])
195
+ print(response["choices"][0]["message"]["content"])
140
196
  probs = [
141
- {"logprob": e} for e in response["choices"][0]["logprobs"]["token_logprobs"]
197
+ {"logprob": e["logprob"]} for e in response["choices"][0]["logprobs"]["content"]
142
198
  ]
143
199
 
144
200
  else:
145
201
  # Stream the output
146
202
  for chunk in response:
203
+ # What might happen, is that first (or possibly any other) chunk may not contain
204
+ # fields choices[0].delta.content or choices[0].logprobs -> if so, we just skip it
205
+ if any([
206
+ 'content' not in chunk["choices"][0]["delta"],
207
+ 'logprobs' not in chunk["choices"][0]
208
+ ]):
209
+ continue
210
+
147
211
  if isinstance(chunk["choices"][0]["logprobs"], dict):
148
212
  probs.append(
149
- {"logprob": chunk["choices"][0]["logprobs"]["token_logprobs"][0]}
213
+ {"logprob": chunk["choices"][0]["logprobs"]["content"][0]["logprob"]}
150
214
  )
151
- delta = chunk["choices"][0]["text"]
215
+ delta = chunk["choices"][0]["delta"]["content"]
152
216
  print(delta, end="", flush=True)
153
217
  certainty = compute_certainty(probs)
154
218
 
155
219
  print(f"\nResponse certainty: {certainty:.2f}%\n")
156
220
 
157
221
 
222
+ def main():
223
+ """Evaluate logdetective program and wait for it to finish"""
224
+ asyncio.run(run())
225
+
226
+
158
227
  if __name__ == "__main__":
159
228
  main()
logdetective/models.py ADDED
@@ -0,0 +1,99 @@
1
+ import re
2
+ from typing import Optional
3
+ from pydantic import BaseModel, model_validator
4
+
5
+ from logdetective.constants import (
6
+ PROMPT_TEMPLATE,
7
+ PROMPT_TEMPLATE_STAGED,
8
+ SNIPPET_PROMPT_TEMPLATE,
9
+ DEFAULT_SYSTEM_PROMPT,
10
+ )
11
+
12
+
13
+ class PromptConfig(BaseModel):
14
+ """Configuration for basic log detective prompts."""
15
+
16
+ prompt_template: str = PROMPT_TEMPLATE
17
+ snippet_prompt_template: str = SNIPPET_PROMPT_TEMPLATE
18
+ prompt_template_staged: str = PROMPT_TEMPLATE_STAGED
19
+
20
+ default_system_prompt: str = DEFAULT_SYSTEM_PROMPT
21
+ snippet_system_prompt: str = DEFAULT_SYSTEM_PROMPT
22
+ staged_system_prompt: str = DEFAULT_SYSTEM_PROMPT
23
+
24
+ def __init__(self, data: Optional[dict] = None):
25
+ super().__init__()
26
+ if data is None:
27
+ return
28
+ self.prompt_template = data.get("prompt_template", PROMPT_TEMPLATE)
29
+ self.snippet_prompt_template = data.get(
30
+ "snippet_prompt_template", SNIPPET_PROMPT_TEMPLATE
31
+ )
32
+ self.prompt_template_staged = data.get(
33
+ "prompt_template_staged", PROMPT_TEMPLATE_STAGED
34
+ )
35
+ self.default_system_prompt = data.get(
36
+ "default_system_prompt", DEFAULT_SYSTEM_PROMPT
37
+ )
38
+ self.snippet_system_prompt = data.get(
39
+ "snippet_system_prompt", DEFAULT_SYSTEM_PROMPT
40
+ )
41
+ self.staged_system_prompt = data.get(
42
+ "staged_system_prompt", DEFAULT_SYSTEM_PROMPT
43
+ )
44
+
45
+
46
+ class SkipSnippets(BaseModel):
47
+ """Regular expressions defining snippets we should not analyze"""
48
+
49
+ snippet_patterns: dict[str, re.Pattern] = {}
50
+
51
+ def __init__(self, data: Optional[dict] = None):
52
+ super().__init__(data=data)
53
+ if data is None:
54
+ return
55
+ self.snippet_patterns = {
56
+ key: re.compile(pattern) for key, pattern in data.items()
57
+ }
58
+
59
+ @model_validator(mode="before")
60
+ @classmethod
61
+ def check_patterns(cls, data: dict):
62
+ """Check if all supplied patterns are valid regular expressions.
63
+ Techically replicating what is done in __init__ but with nicer error message."""
64
+ patterns = data["data"]
65
+ for key, pattern in patterns.items():
66
+ try:
67
+ re.compile(pattern=pattern)
68
+ except (TypeError, re.error) as ex:
69
+ raise ValueError(
70
+ f"Invalid pattern `{pattern}` with name `{key}` supplied for skipping in logs."
71
+ ) from ex
72
+
73
+ return data
74
+
75
+
76
+ class CSGrepEvent(BaseModel):
77
+ """`csgrep` splits error and warning messages into individual events."""
78
+
79
+ file_name: str
80
+ line: int
81
+ event: str
82
+ message: str
83
+ verbosity_level: int
84
+
85
+
86
+ class CSGrepDefect(BaseModel):
87
+ """Defects detected by `csgrep`"""
88
+
89
+ checker: str
90
+ language: str
91
+ tool: str
92
+ key_event_idx: int
93
+ events: list[CSGrepEvent]
94
+
95
+
96
+ class CSGrepOutput(BaseModel):
97
+ """Parsed output of `gsgrep`"""
98
+
99
+ defects: list[CSGrepDefect]
@@ -0,0 +1,20 @@
1
+ # This file is intended for customization of prompts
2
+ # It is used only in server mode.
3
+ # On command line you have to load it using --prompts
4
+ # The defaults are stored in constants.py
5
+
6
+ prompt_template: |
7
+ Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
8
+
9
+ Please start with concise, one sentence long, summary describing the problem and recommend solution to fix it. And then follow with analysis.
10
+
11
+ Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
12
+ Snippets themselves must not be altered in any way whatsoever.
13
+
14
+ Snippets are delimited with '================'.
15
+
16
+ Explanation of the issue, and recommended solution, should take handful of sentences.
17
+
18
+ Snippets:
19
+
20
+ {}
@@ -0,0 +1,13 @@
1
+ # This file is intended for customization of prompts
2
+ # It is used only in server mode.
3
+ # On command line you have to load it using --prompts
4
+ # The defaults are stored in constants.py
5
+
6
+ prompt_template: |
7
+ Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
8
+
9
+ Provide concise, one paragraph long, summary describing the problem of most probable culprit and recommend solution to fix it.
10
+
11
+ Snippets:
12
+
13
+ {}