logdetective 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: logdetective
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -5,11 +5,16 @@ DEFAULT_ADVISOR = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
5
5
  PROMPT_TEMPLATE = """
6
6
  Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
7
7
 
8
- {}
8
+ Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
9
+ Snippets themselves must not be altered in any way whatsoever.
10
+
11
+ Snippets are delimited with '================'.
9
12
 
10
- Analysis of the failure must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
13
+ Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
11
14
 
12
- Finally, drawing on information from all snippets, provide complete explanation of the issue.
15
+ Snippets:
16
+
17
+ {}
13
18
 
14
19
  Analysis:
15
20
 
@@ -21,7 +21,7 @@ class LLMExtractor:
21
21
  self.grammar = LlamaGrammar.from_string(
22
22
  "root ::= (\"Yes\" | \"No\")", verbose=False)
23
23
 
24
- def __call__(self, log: str, n_lines: int = 2, neighbors: bool = False) -> str:
24
+ def __call__(self, log: str, n_lines: int = 2, neighbors: bool = False) -> list[str]:
25
25
  chunks = self.rate_chunks(log)
26
26
  out = self.create_extract(chunks, neighbors)
27
27
  return out
@@ -43,11 +43,11 @@ class LLMExtractor:
43
43
 
44
44
  return results
45
45
 
46
- def create_extract(self, chunks: list[tuple], neighbors: bool = False) -> str:
46
+ def create_extract(self, chunks: list[tuple], neighbors: bool = False) -> list[str]:
47
47
  """Extract interesting chunks from the model processing.
48
48
  """
49
49
  interesting = []
50
- summary = ""
50
+ summary = []
51
51
  # pylint: disable=consider-using-enumerate
52
52
  for i in range(len(chunks)):
53
53
  if chunks[i][1].startswith("Yes"):
@@ -58,7 +58,7 @@ class LLMExtractor:
58
58
  interesting = set(interesting)
59
59
 
60
60
  for i in interesting:
61
- summary += chunks[i][0] + "\n"
61
+ summary.append(chunks[i][0])
62
62
 
63
63
  return summary
64
64
 
@@ -75,8 +75,8 @@ class DrainExtractor:
75
75
  self.verbose = verbose
76
76
  self.context = context
77
77
 
78
- def __call__(self, log: str) -> str:
79
- out = ""
78
+ def __call__(self, log: str) -> list[str]:
79
+ out = []
80
80
  for chunk in get_chunks(log):
81
81
  processed_line = self.miner.add_log_message(chunk)
82
82
  LOG.debug(processed_line)
@@ -84,6 +84,6 @@ class DrainExtractor:
84
84
  for chunk in get_chunks(log):
85
85
  cluster = self.miner.match(chunk, "always")
86
86
  if cluster in sorted_clusters:
87
- out += f"{chunk}\n"
87
+ out.append(chunk)
88
88
  sorted_clusters.remove(cluster)
89
89
  return out
@@ -3,7 +3,7 @@ import logging
3
3
  import sys
4
4
 
5
5
  from logdetective.constants import DEFAULT_ADVISOR
6
- from logdetective.utils import process_log, initialize_model, retrieve_log_content
6
+ from logdetective.utils import process_log, initialize_model, retrieve_log_content, format_snippets
7
7
  from logdetective.extractors import LLMExtractor, DrainExtractor
8
8
 
9
9
  LOG = logging.getLogger("logdetective")
@@ -64,11 +64,15 @@ def main():
64
64
  log = retrieve_log_content(args.file)
65
65
  log_summary = extractor(log)
66
66
 
67
- ratio = len(log_summary.split('\n')) / len(log.split('\n'))
68
- LOG.debug("Log summary: \n %s", log_summary)
67
+ ratio = len(log_summary) / len(log.split('\n'))
68
+
69
69
  LOG.info("Compression ratio: %s", ratio)
70
70
 
71
71
  LOG.info("Analyzing the text")
72
+
73
+ log_summary = format_snippets(log_summary)
74
+ LOG.info("Log summary: \n %s", log_summary)
75
+
72
76
  print(f"Explanation: \n{process_log(log_summary, model)}")
73
77
 
74
78
 
@@ -37,7 +37,7 @@ async def analyze_log(build_log: BuildLog):
37
37
  log = requests.get(build_log.url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT)).text
38
38
  log_summary = extractor(log)
39
39
 
40
- ratio = len(log_summary.split('\n')) / len(log.split('\n'))
40
+ ratio = len(log_summary) / len(log.split('\n'))
41
41
  LOG.debug("Log summary: \n %s", log_summary)
42
42
  LOG.info("Compression ratio: %s", ratio)
43
43
 
@@ -81,6 +81,7 @@ def process_log(log: str, model: Llama) -> str:
81
81
  """
82
82
  return model(PROMPT_TEMPLATE.format(log), max_tokens=0)["choices"][0]["text"]
83
83
 
84
+
84
85
  def retrieve_log_content(log_path: str) -> str:
85
86
  """Get content of the file on the log_path path."""
86
87
  parsed_url = urlparse(log_path)
@@ -97,3 +98,18 @@ def retrieve_log_content(log_path: str) -> str:
97
98
  log = requests.get(log_path, timeout=60).text
98
99
 
99
100
  return log
101
+
102
+
103
+ def format_snippets(snippets: list[str]) -> str:
104
+ """Format snippets, giving them separator, id and finally
105
+ concatenating them.
106
+ """
107
+ summary = ""
108
+ for i, s in enumerate(snippets):
109
+ summary += f"""
110
+ Snippet No. {i}:
111
+
112
+ {s}
113
+ ================
114
+ """
115
+ return summary
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "logdetective"
3
- version = "0.2.2"
3
+ version = "0.2.3"
4
4
  description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
5
5
  authors = ["Jiri Podivin <jpodivin@gmail.com>"]
6
6
  license = "Apache-2.0"
File without changes
File without changes