logdetective 0.2.3__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
logdetective/constants.py CHANGED
@@ -1,6 +1,6 @@
1
1
 
2
2
  # pylint: disable=line-too-long
3
- DEFAULT_ADVISOR = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
3
+ DEFAULT_ADVISOR = "fedora-copr/Mistral-7B-Instruct-v0.2-GGUF"
4
4
 
5
5
  PROMPT_TEMPLATE = """
6
6
  Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
@@ -8,9 +8,8 @@ from logdetective.extractors import LLMExtractor, DrainExtractor
8
8
 
9
9
  LOG = logging.getLogger("logdetective")
10
10
 
11
-
12
- def main():
13
- """Main execution function."""
11
+ def setup_args():
12
+ """ Setup argument parser and return arguments. """
14
13
  parser = argparse.ArgumentParser("logdetective")
15
14
  parser.add_argument("file", type=str,
16
15
  default="", help="The URL or path to the log file to be analyzed.")
@@ -21,6 +20,7 @@ def main():
21
20
  help="Suffix of the model file name to be retrieved from Hugging Face.\
22
21
  Makes sense only if the model is specified with Hugging Face name.",
23
22
  default="Q4_K_S.gguf")
23
+ parser.add_argument("-n", "--no-stream", action='store_true')
24
24
  parser.add_argument("-S", "--summarizer", type=str, default="drain",
25
25
  help="Choose between LLM and Drain template miner as the log summarizer.\
26
26
  LLM must be specified as path to a model, URL or local file.")
@@ -32,7 +32,12 @@ def main():
32
32
  This only makes sense when you are summarizing with Drain")
33
33
  parser.add_argument("-v", "--verbose", action='count', default=0)
34
34
  parser.add_argument("-q", "--quiet", action='store_true')
35
- args = parser.parse_args()
35
+ return parser.parse_args()
36
+
37
+
38
+ def main():
39
+ """Main execution function."""
40
+ args = setup_args()
36
41
 
37
42
  if args.verbose and args.quiet:
38
43
  sys.stderr.write("Error: --quiet and --verbose is mutually exclusive.\n")
@@ -49,8 +54,13 @@ def main():
49
54
  LOG.setLevel(log_level)
50
55
 
51
56
  # Primary model initialization
52
- model = initialize_model(args.model, filename_suffix=args.filename_suffix,
53
- verbose=args.verbose > 2)
57
+ try:
58
+ model = initialize_model(args.model, filename_suffix=args.filename_suffix,
59
+ verbose=args.verbose > 2)
60
+ except ValueError as e:
61
+ LOG.error(e)
62
+ LOG.error("You likely do not have enough memory to load the AI model")
63
+ sys.exit(3)
54
64
 
55
65
  # Log file summarizer selection and initialization
56
66
  if args.summarizer == "drain":
@@ -61,7 +71,12 @@ def main():
61
71
 
62
72
  LOG.info("Getting summary")
63
73
 
64
- log = retrieve_log_content(args.file)
74
+ try:
75
+ log = retrieve_log_content(args.file)
76
+ except ValueError as e:
77
+ # file does not exists
78
+ LOG.error(e)
79
+ sys.exit(4)
65
80
  log_summary = extractor(log)
66
81
 
67
82
  ratio = len(log_summary) / len(log.split('\n'))
@@ -73,7 +88,19 @@ def main():
73
88
  log_summary = format_snippets(log_summary)
74
89
  LOG.info("Log summary: \n %s", log_summary)
75
90
 
76
- print(f"Explanation: \n{process_log(log_summary, model)}")
91
+ stream = True
92
+ if args.no_stream:
93
+ stream = False
94
+ response = process_log(log_summary, model, stream)
95
+ print("Explanation:")
96
+ if args.no_stream:
97
+ print(response["choices"][0]["text"])
98
+ else:
99
+ # Stream the output
100
+ for chunk in response:
101
+ delta = chunk['choices'][0]['text']
102
+ print(delta, end='', flush=True)
103
+ print()
77
104
 
78
105
 
79
106
  if __name__ == "__main__":
logdetective/server.py CHANGED
@@ -2,14 +2,14 @@ import logging
2
2
  import os
3
3
  import json
4
4
 
5
- from fastapi import FastAPI
5
+ from fastapi import FastAPI, HTTPException
6
6
  from pydantic import BaseModel
7
7
 
8
8
  import requests
9
9
 
10
10
  from logdetective.constants import PROMPT_TEMPLATE
11
11
  from logdetective.extractors import DrainExtractor
12
-
12
+ from logdetective.utils import validate_url
13
13
 
14
14
  class BuildLog(BaseModel):
15
15
  """Model of data submitted to API.
@@ -20,7 +20,8 @@ LOG = logging.getLogger("logdetective")
20
20
 
21
21
  app = FastAPI()
22
22
 
23
- LLM_CPP_SERVER_ADDRESS = os.environ.get("LLAMA_CPP_SERVER", " http://localhost")
23
+ LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
24
+ LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
24
25
  LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
25
26
  LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
26
27
  LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
@@ -29,12 +30,32 @@ LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
29
30
  async def analyze_log(build_log: BuildLog):
30
31
  """Provide endpoint for log file submission and analysis.
31
32
  Request must be in form {"url":"<YOUR_URL_HERE>"}.
33
+ URL must be valid for the request to be passed to the LLM server.
34
+ Meaning that it must contain appropriate scheme, path and netloc,
35
+ while lacking result, params or query fields.
32
36
  """
33
37
  extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
34
38
 
35
39
  LOG.info("Getting summary")
40
+ # Perform basic validation of the URL
41
+ if validate_url(url=build_log.url):
42
+ try:
43
+ log_request = requests.get(build_log.url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
44
+ except requests.RequestException as ex:
45
+ raise HTTPException(
46
+ status_code=400,
47
+ detail=f"We couldn't obtain the logs: {ex}") from ex
48
+
49
+ if not log_request.ok:
50
+ raise HTTPException(status_code=400,
51
+ detail="Something went wrong while getting the logs: "
52
+ f"[{log_request.status_code}] {log_request.text}")
53
+ else:
54
+ LOG.error("Invalid URL received ")
55
+ raise HTTPException(status_code=400,
56
+ detail=f"Invalid log URL: {build_log.url}")
36
57
 
37
- log = requests.get(build_log.url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT)).text
58
+ log = log_request.text
38
59
  log_summary = extractor(log)
39
60
 
40
61
  ratio = len(log_summary) / len(log.split('\n'))
@@ -46,11 +67,21 @@ async def analyze_log(build_log: BuildLog):
46
67
  "prompt": PROMPT_TEMPLATE.format(log_summary),
47
68
  "max_tokens": "0"}
48
69
 
49
- # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
50
- response = requests.post(
51
- f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
52
- headers={"Content-Type":"application/json"},
53
- data=json.dumps(data),
54
- timeout=int(LLM_CPP_SERVER_TIMEOUT))
70
+ try:
71
+ # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
72
+ response = requests.post(
73
+ f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
74
+ headers={"Content-Type":"application/json"},
75
+ data=json.dumps(data),
76
+ timeout=int(LLM_CPP_SERVER_TIMEOUT))
77
+ except requests.RequestException as ex:
78
+ raise HTTPException(
79
+ status_code=400,
80
+ detail=f"Llama-cpp query failed: {ex}") from ex
55
81
 
82
+ if not log_request.ok:
83
+ raise HTTPException(
84
+ status_code=400,
85
+ detail="Something went wrong while getting a response from the llama server: "
86
+ f"[{log_request.status_code}] {log_request.text}")
56
87
  return response.text
logdetective/utils.py CHANGED
@@ -53,6 +53,9 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
53
53
  filename_suffix (str): suffix of the model file name to be pulled from Hugging Face
54
54
  verbose (bool): level of verbosity for llamacpp
55
55
  """
56
+
57
+ LOG.info("Loading model from %s", model_pth)
58
+
56
59
  if os.path.isfile(model_pth):
57
60
  model = Llama(
58
61
  model_path=model_pth,
@@ -68,7 +71,7 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
68
71
  return model
69
72
 
70
73
 
71
- def process_log(log: str, model: Llama) -> str:
74
+ def process_log(log: str, model: Llama, stream: bool) -> str:
72
75
  """
73
76
  Processes a given log using the provided language model and returns its summary.
74
77
 
@@ -79,11 +82,16 @@ def process_log(log: str, model: Llama) -> str:
79
82
  Returns:
80
83
  str: The summary of the given log generated by the language model.
81
84
  """
82
- return model(PROMPT_TEMPLATE.format(log), max_tokens=0)["choices"][0]["text"]
85
+ return model(
86
+ prompt=PROMPT_TEMPLATE.format(log),
87
+ stream=stream,
88
+ max_tokens=0)
83
89
 
84
90
 
85
91
  def retrieve_log_content(log_path: str) -> str:
86
- """Get content of the file on the log_path path."""
92
+ """Get content of the file on the log_path path.
93
+ Path is assumed to be valid URL if it has a scheme.
94
+ Otherwise it attempts to pull it from local filesystem."""
87
95
  parsed_url = urlparse(log_path)
88
96
  log = ""
89
97
 
@@ -113,3 +121,18 @@ def format_snippets(snippets: list[str]) -> str:
113
121
  ================
114
122
  """
115
123
  return summary
124
+
125
+
126
+ def validate_url(url: str) -> bool:
127
+ """Validate incoming URL to be at least somewhat sensible for log files
128
+ Only http and https protocols permitted. No result, params or query fields allowed.
129
+ Either netloc or path must have non-zero length.
130
+ """
131
+ result = urlparse(url)
132
+ if result.scheme not in ['http', 'https']:
133
+ return False
134
+ if any([result.params, result.query, result.fragment]):
135
+ return False
136
+ if not (result.path or result.netloc):
137
+ return False
138
+ return True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: logdetective
3
- Version: 0.2.3
3
+ Version: 0.2.6
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -19,8 +19,8 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
19
  Classifier: Topic :: Software Development :: Debuggers
20
20
  Provides-Extra: server
21
21
  Requires-Dist: drain3 (>=0.9.11,<0.10.0)
22
- Requires-Dist: huggingface-hub (>=0.23.2,<0.24.0)
23
- Requires-Dist: llama-cpp-python (>=0.2.56,<0.3.0)
22
+ Requires-Dist: huggingface-hub (>0.23.2)
23
+ Requires-Dist: llama-cpp-python (>=0.2.56,<0.3.0,!=0.2.86)
24
24
  Requires-Dist: requests (>=2.31.0,<3.0.0)
25
25
  Project-URL: homepage, https://github.com/fedora-copr/logdetective
26
26
  Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
@@ -29,11 +29,21 @@ Description-Content-Type: text/markdown
29
29
  Log Detective
30
30
  =============
31
31
 
32
+ [![PyPI - Version](https://img.shields.io/pypi/v/logdetective?color=blue)][PyPI Releases]
33
+
34
+ [PyPI Releases]: https://pypi.org/project/logdetective/#history
35
+
32
36
  A Python tool to analyze logs using a Language Model (LLM) and Drain template miner.
33
37
 
34
38
  Installation
35
39
  ------------
36
40
 
41
+ ** Fedora 40+ **
42
+
43
+ dnf install logdetective
44
+
45
+ **From Pypi repository**
46
+
37
47
  The logdetective project is published on the the the the the [Pypi repository](https://pypi.org/project/logdetective/). The `pip` tool can be used for installation.
38
48
 
39
49
  First, ensure that the necessary dependencies for the `llama-cpp-python` project are installed. For Fedora, install `gcc-c++`:
@@ -41,8 +51,6 @@ First, ensure that the necessary dependencies for the `llama-cpp-python` project
41
51
  # for Fedora it will be:
42
52
  dnf install gcc-c++
43
53
 
44
- **From Pypi repository**
45
-
46
54
  Then, install the `logdetective` project using pip:
47
55
 
48
56
  # then install logdetective project
@@ -73,6 +81,11 @@ Or if the log file is stored locally:
73
81
  Example you want to use a different model:
74
82
 
75
83
  logdetective https://example.com/logs.txt --model https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true
84
+ logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF
85
+
86
+ Note that streaming with some models (notably Meta-Llama-3 is broken) is broken and can be workarounded by `no-stream` option:
87
+
88
+ logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF --no-stream
76
89
 
77
90
 
78
91
  Real Example
@@ -172,7 +185,7 @@ or
172
185
  Server
173
186
  ------
174
187
 
175
- FastApi based server is implemented in `logdetective/server.py`. In order to run in a development mode,
188
+ FastApi based server is implemented in `logdetective/server.py`. In order to run it in a development mode,
176
189
  simply start llama-cpp-python server with your chosen model as described in llama-cpp-python [docs](https://llama-cpp-python.readthedocs.io/en/latest/server/#running-the-server).
177
190
 
178
191
  Afterwards, start the logdetective server with `fastapi dev logdetective/server.py --port 8080`.
@@ -180,6 +193,17 @@ Requests can then be made with post requests, for example:
180
193
 
181
194
  curl --header "Content-Type: application/json" --request POST --data '{"url":"<YOUR_URL_HERE>"}' http://localhost:8080/analyze
182
195
 
196
+ We also have a Containerfile and composefile to run the logdetective server and llama server in containers.
197
+
198
+ Before doing `podman-compose up`, make sure to set `MODELS_PATH` environment variable and point to a directory with your local model files:
199
+ ```
200
+ $ export MODELS_PATH=/path/to/models/
201
+ $ ll $MODELS_PATH
202
+ -rw-r--r--. 1 tt tt 3.9G apr 10 17:18 mistral-7b-instruct-v0.2.Q4_K_S.gguf
203
+ ```
204
+
205
+ If the variable is not set, `./models` is mounted inside by default.
206
+
183
207
 
184
208
  License
185
209
  -------
@@ -0,0 +1,12 @@
1
+ logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ logdetective/constants.py,sha256=2DlzXvqWgKca5fPXGPCxREYRfg0eHW0b8TATKoaqb54,711
3
+ logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
4
+ logdetective/extractors.py,sha256=eRizRiKhC3MPTHXS5nlRKcEudEaqct7G28V1bZYGkqI,3103
5
+ logdetective/logdetective.py,sha256=ee7et1mKyI33HaqIr7dR-o7AX1rijwPANw5s6fkDb-Q,4039
6
+ logdetective/server.py,sha256=jb7TuC5xzzlTR6cUrSaZa8vHPZZwH0ei30b5N0iNVX8,3176
7
+ logdetective/utils.py,sha256=UT3st9rbFXS8m-d0-3W39ENdVLhIraSH_K6vlKlDZ5w,3759
8
+ logdetective-0.2.6.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
9
+ logdetective-0.2.6.dist-info/METADATA,sha256=h9NS3BpBPHo3cu93xcPNOrgiq2De8DJTPr2Hm5nZhns,9063
10
+ logdetective-0.2.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
11
+ logdetective-0.2.6.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
12
+ logdetective-0.2.6.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- logdetective/constants.py,sha256=ObrYDQiPvZwCpokLbLQoSY_w_-wHl7l94EkXae7Xgq0,708
3
- logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
4
- logdetective/extractors.py,sha256=eRizRiKhC3MPTHXS5nlRKcEudEaqct7G28V1bZYGkqI,3103
5
- logdetective/logdetective.py,sha256=gOAgRqc5_GdqBAcDwpwJLjqIrJzbqujU-9rbk-_oSoE,3267
6
- logdetective/server.py,sha256=GAU6mggoZSf-ER3AHhmd7BKGDLh5ZcsnmkdHTFd_lTU,1715
7
- logdetective/utils.py,sha256=XRqVvPbAQ0ZAHGivHhAA1kTY8Tv6JAeSsA7gMMoPz8E,3034
8
- logdetective-0.2.3.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
9
- logdetective-0.2.3.dist-info/METADATA,sha256=KlWaeyLiehfcsnGe5zWg7BMqGRCV0d_L6BIMYGMXVa8,8061
10
- logdetective-0.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
11
- logdetective-0.2.3.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
12
- logdetective-0.2.3.dist-info/RECORD,,