logdetective 0.2.6__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: logdetective
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -14,14 +14,16 @@ Classifier: Natural Language :: English
14
14
  Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
17
18
  Classifier: Topic :: Internet :: Log Analysis
18
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
20
  Classifier: Topic :: Software Development :: Debuggers
20
21
  Provides-Extra: server
21
22
  Requires-Dist: drain3 (>=0.9.11,<0.10.0)
22
23
  Requires-Dist: huggingface-hub (>0.23.2)
23
- Requires-Dist: llama-cpp-python (>=0.2.56,<0.3.0,!=0.2.86)
24
- Requires-Dist: requests (>=2.31.0,<3.0.0)
24
+ Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
25
+ Requires-Dist: numpy (>=1.26.0,<2.0.0)
26
+ Requires-Dist: requests (>0.2.31)
25
27
  Project-URL: homepage, https://github.com/fedora-copr/logdetective
26
28
  Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
27
29
  Description-Content-Type: text/markdown
@@ -38,7 +40,7 @@ A Python tool to analyze logs using a Language Model (LLM) and Drain template mi
38
40
  Installation
39
41
  ------------
40
42
 
41
- ** Fedora 40+ **
43
+ **Fedora 40+**
42
44
 
43
45
  dnf install logdetective
44
46
 
@@ -193,6 +195,11 @@ Requests can then be made with post requests, for example:
193
195
 
194
196
  curl --header "Content-Type: application/json" --request POST --data '{"url":"<YOUR_URL_HERE>"}' http://localhost:8080/analyze
195
197
 
198
+ For more accurate responses, you can use `/analyze/staged` endpoint. This will submit snippets to model for individual analysis first.
199
+ Afterwards the model outputs are used to construct final prompt. This will take substantially longer, compared to plain `/analyze`
200
+
201
+ curl --header "Content-Type: application/json" --request POST --data '{"url":"<YOUR_URL_HERE>"}' http://localhost:8080/analyze/staged
202
+
196
203
  We also have a Containerfile and composefile to run the logdetective server and llama server in containers.
197
204
 
198
205
  Before doing `podman-compose up`, make sure to set `MODELS_PATH` environment variable and point to a directory with your local model files:
@@ -204,6 +211,11 @@ $ ll $MODELS_PATH
204
211
 
205
212
  If the variable is not set, `./models` is mounted inside by default.
206
213
 
214
+ Model can be downloaded from [our Hugging Space](https://huggingface.co/fedora-copr) by:
215
+ ```
216
+ $ curl -L -o models/mistral-7b-instruct-v0.2.Q4_K_S.gguf https://huggingface.co/fedora-copr/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/ggml-model-Q4_K_S.gguf
217
+ ```
218
+
207
219
 
208
220
  License
209
221
  -------
@@ -10,7 +10,7 @@ A Python tool to analyze logs using a Language Model (LLM) and Drain template mi
10
10
  Installation
11
11
  ------------
12
12
 
13
- ** Fedora 40+ **
13
+ **Fedora 40+**
14
14
 
15
15
  dnf install logdetective
16
16
 
@@ -165,6 +165,11 @@ Requests can then be made with post requests, for example:
165
165
 
166
166
  curl --header "Content-Type: application/json" --request POST --data '{"url":"<YOUR_URL_HERE>"}' http://localhost:8080/analyze
167
167
 
168
+ For more accurate responses, you can use `/analyze/staged` endpoint. This will submit snippets to model for individual analysis first.
169
+ Afterwards the model outputs are used to construct final prompt. This will take substantially longer, compared to plain `/analyze`
170
+
171
+ curl --header "Content-Type: application/json" --request POST --data '{"url":"<YOUR_URL_HERE>"}' http://localhost:8080/analyze/staged
172
+
168
173
  We also have a Containerfile and composefile to run the logdetective server and llama server in containers.
169
174
 
170
175
  Before doing `podman-compose up`, make sure to set `MODELS_PATH` environment variable and point to a directory with your local model files:
@@ -176,6 +181,11 @@ $ ll $MODELS_PATH
176
181
 
177
182
  If the variable is not set, `./models` is mounted inside by default.
178
183
 
184
+ Model can be downloaded from [our Hugging Space](https://huggingface.co/fedora-copr) by:
185
+ ```
186
+ $ curl -L -o models/mistral-7b-instruct-v0.2.Q4_K_S.gguf https://huggingface.co/fedora-copr/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/ggml-model-Q4_K_S.gguf
187
+ ```
188
+
179
189
 
180
190
  License
181
191
  -------
@@ -30,3 +30,16 @@ Log:
30
30
  Answer:
31
31
 
32
32
  """
33
+
34
+ SNIPPET_PROMPT_TEMPLATE = """
35
+ Analyse following RPM build log snippet.
36
+ Analysis of the snippets must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
37
+ Snippets themselves must not be altered in any way whatsoever.
38
+
39
+ Snippet:
40
+
41
+ {}
42
+
43
+ Analysis:
44
+
45
+ """
@@ -3,7 +3,8 @@ import logging
3
3
  import sys
4
4
 
5
5
  from logdetective.constants import DEFAULT_ADVISOR
6
- from logdetective.utils import process_log, initialize_model, retrieve_log_content, format_snippets
6
+ from logdetective.utils import (
7
+ process_log, initialize_model, retrieve_log_content, format_snippets, compute_certainty)
7
8
  from logdetective.extractors import LLMExtractor, DrainExtractor
8
9
 
9
10
  LOG = logging.getLogger("logdetective")
@@ -92,15 +93,21 @@ def main():
92
93
  if args.no_stream:
93
94
  stream = False
94
95
  response = process_log(log_summary, model, stream)
96
+ probs = []
95
97
  print("Explanation:")
96
98
  if args.no_stream:
97
99
  print(response["choices"][0]["text"])
100
+ probs = response["choices"][0]["logprobs"]["top_logprobs"]
98
101
  else:
99
102
  # Stream the output
100
103
  for chunk in response:
104
+ if isinstance(chunk["choices"][0]["logprobs"], dict):
105
+ probs.extend(chunk["choices"][0]["logprobs"]["top_logprobs"])
101
106
  delta = chunk['choices'][0]['text']
102
107
  print(delta, end='', flush=True)
103
- print()
108
+ certainty = compute_certainty(probs)
109
+
110
+ print(f"\nResponse certainty: {certainty:.2f}%\n")
104
111
 
105
112
 
106
113
  if __name__ == "__main__":
@@ -0,0 +1,242 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ from typing import List, Annotated
6
+
7
+ from llama_cpp import CreateCompletionResponse
8
+ from fastapi import FastAPI, HTTPException, Depends, Header
9
+ from fastapi.responses import StreamingResponse
10
+ from pydantic import BaseModel
11
+ import requests
12
+
13
+ from logdetective.constants import PROMPT_TEMPLATE, SNIPPET_PROMPT_TEMPLATE
14
+ from logdetective.extractors import DrainExtractor
15
+ from logdetective.utils import validate_url, compute_certainty
16
+
17
+ class BuildLog(BaseModel):
18
+ """Model of data submitted to API.
19
+ """
20
+ url: str
21
+
22
+
23
+ class Response(BaseModel):
24
+ """Model of data returned by Log Detective API
25
+
26
+ explanation: CreateCompletionResponse
27
+ https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
28
+ response_certainty: float
29
+ """
30
+ explanation: CreateCompletionResponse
31
+ response_certainty: float
32
+
33
+
34
+ class StagedResponse(Response):
35
+ """Model of data returned by Log Detective API when called when staged response
36
+ is requested. Contains list of reponses to prompts for individual snippets.
37
+
38
+ explanation: CreateCompletionResponse
39
+ https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
40
+ response_certainty: float
41
+ snippets: list of CreateCompletionResponse
42
+ """
43
+ snippets: List[CreateCompletionResponse]
44
+
45
+
46
+ LOG = logging.getLogger("logdetective")
47
+
48
+
49
+ LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
50
+ LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
51
+ LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
52
+ LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
53
+ LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
54
+ API_TOKEN = os.environ.get("LOGDETECTIVE_TOKEN", None)
55
+
56
+ def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
57
+ """
58
+ FastAPI Depend function that expects a header named Authentication
59
+
60
+ If LOGDETECTIVE_TOKEN env var is set, validate the client-supplied token
61
+ otherwise ignore it
62
+ """
63
+ if not API_TOKEN:
64
+ LOG.info("LOGDETECTIVE_TOKEN env var not set, authentication disabled")
65
+ # no token required, means local dev environment
66
+ return
67
+ token = None
68
+ if authentication:
69
+ try:
70
+ token = authentication.split(" ", 1)[1]
71
+ except (ValueError, IndexError):
72
+ LOG.warning(
73
+ "Authentication header has invalid structure (%s), it should be 'Bearer TOKEN'",
74
+ authentication)
75
+ # eat the exception and raise 401 below
76
+ token = None
77
+ if token == API_TOKEN:
78
+ return
79
+ LOG.info("LOGDETECTIVE_TOKEN env var is set (%s), clien token = %s",
80
+ API_TOKEN, token)
81
+ raise HTTPException(status_code=401, detail=f"Token {token} not valid.")
82
+
83
+ app = FastAPI(dependencies=[Depends(requires_token_when_set)])
84
+
85
+
86
+ def process_url(url: str) -> str:
87
+ """Validate log URL and return log text.
88
+ """
89
+ if validate_url(url=url):
90
+ try:
91
+ log_request = requests.get(url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
92
+ except requests.RequestException as ex:
93
+ raise HTTPException(
94
+ status_code=400,
95
+ detail=f"We couldn't obtain the logs: {ex}") from ex
96
+
97
+ if not log_request.ok:
98
+ raise HTTPException(status_code=400,
99
+ detail="Something went wrong while getting the logs: "
100
+ f"[{log_request.status_code}] {log_request.text}")
101
+ else:
102
+ LOG.error("Invalid URL received ")
103
+ raise HTTPException(status_code=400,
104
+ detail=f"Invalid log URL: {url}")
105
+
106
+ return log_request.text
107
+
108
+
109
+ def mine_logs(log: str) -> List[str]:
110
+ """Extract snippets from log text
111
+ """
112
+ extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
113
+
114
+ LOG.info("Getting summary")
115
+ log_summary = extractor(log)
116
+
117
+ ratio = len(log_summary) / len(log.split('\n'))
118
+ LOG.debug("Log summary: \n %s", log_summary)
119
+ LOG.info("Compression ratio: %s", ratio)
120
+
121
+
122
+ return log_summary
123
+
124
+ async def submit_text(text: str, max_tokens: int = 0, log_probs: int = 1, stream: bool = False,
125
+ model: str = "default-model"):
126
+ """Submit prompt to LLM.
127
+ max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
128
+ log_probs: number of token choices to produce log probs for
129
+ """
130
+ LOG.info("Analyzing the text")
131
+ data = {
132
+ "prompt": text,
133
+ "max_tokens": str(max_tokens),
134
+ "logprobs": str(log_probs),
135
+ "stream": stream,
136
+ "model": model}
137
+
138
+ try:
139
+ # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
140
+ response = requests.post(
141
+ f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
142
+ headers={"Content-Type":"application/json"},
143
+ data=json.dumps(data),
144
+ timeout=int(LLM_CPP_SERVER_TIMEOUT),
145
+ stream=stream)
146
+ except requests.RequestException as ex:
147
+ raise HTTPException(
148
+ status_code=400,
149
+ detail=f"Llama-cpp query failed: {ex}") from ex
150
+ if not stream:
151
+ if not response.ok:
152
+ raise HTTPException(
153
+ status_code=400,
154
+ detail="Something went wrong while getting a response from the llama server: "
155
+ f"[{response.status_code}] {response.text}")
156
+ try:
157
+ response = json.loads(response.text)
158
+ except UnicodeDecodeError as ex:
159
+ LOG.error("Error encountered while parsing llama server response: %s", ex)
160
+ raise HTTPException(
161
+ status_code=400,
162
+ detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}") from ex
163
+ else:
164
+ return response
165
+
166
+ return CreateCompletionResponse(response)
167
+
168
+
169
+ @app.post("/analyze", response_model=Response)
170
+ async def analyze_log(build_log: BuildLog):
171
+ """Provide endpoint for log file submission and analysis.
172
+ Request must be in form {"url":"<YOUR_URL_HERE>"}.
173
+ URL must be valid for the request to be passed to the LLM server.
174
+ Meaning that it must contain appropriate scheme, path and netloc,
175
+ while lacking result, params or query fields.
176
+ """
177
+ log_text = process_url(build_log.url)
178
+ log_summary = mine_logs(log_text)
179
+ response = await submit_text(PROMPT_TEMPLATE.format(log_summary))
180
+ certainty = 0
181
+
182
+ if "logprobs" in response["choices"][0]:
183
+ try:
184
+ certainty = compute_certainty(
185
+ response["choices"][0]["logprobs"]["top_logprobs"])
186
+ except ValueError as ex:
187
+ LOG.error("Error encountered while computing certainty: %s", ex)
188
+ raise HTTPException(
189
+ status_code=400,
190
+ detail=f"Couldn't compute certainty with data:\n"
191
+ f"{response["choices"][0]["logprobs"]["top_logprobs"]}") from ex
192
+
193
+ return Response(explanation=response, response_certainty=certainty)
194
+
195
+
196
+ @app.post("/analyze/staged", response_model=StagedResponse)
197
+ async def analyze_log_staged(build_log: BuildLog):
198
+ """Provide endpoint for log file submission and analysis.
199
+ Request must be in form {"url":"<YOUR_URL_HERE>"}.
200
+ URL must be valid for the request to be passed to the LLM server.
201
+ Meaning that it must contain appropriate scheme, path and netloc,
202
+ while lacking result, params or query fields.
203
+ """
204
+ log_text = process_url(build_log.url)
205
+ log_summary = mine_logs(log_text)
206
+
207
+ # Process snippets asynchronously
208
+ analyzed_snippets = await asyncio.gather(
209
+ *[submit_text(SNIPPET_PROMPT_TEMPLATE.format(s)) for s in log_summary])
210
+
211
+ final_analysis = await submit_text(
212
+ PROMPT_TEMPLATE.format([e["choices"][0]["text"] for e in analyzed_snippets]))
213
+
214
+ certainty = 0
215
+ if "logprobs" in final_analysis["choices"][0]:
216
+ try:
217
+ certainty = compute_certainty(
218
+ final_analysis["choices"][0]["logprobs"]["top_logprobs"])
219
+ except ValueError as ex:
220
+ LOG.error("Error encountered while computing certainty: %s", ex)
221
+ raise HTTPException(
222
+ status_code=400,
223
+ detail=f"Couldn't compute certainty with data:\n"
224
+ f"{final_analysis["choices"][0]["logprobs"]["top_logprobs"]}") from ex
225
+
226
+ return StagedResponse(
227
+ explanation=final_analysis, snippets=analyzed_snippets, response_certainty=certainty)
228
+
229
+
230
+ @app.post("/analyze/stream", response_class=StreamingResponse)
231
+ async def analyze_log_stream(build_log: BuildLog):
232
+ """Stream response endpoint for Logdetective.
233
+ Request must be in form {"url":"<YOUR_URL_HERE>"}.
234
+ URL must be valid for the request to be passed to the LLM server.
235
+ Meaning that it must contain appropriate scheme, path and netloc,
236
+ while lacking result, params or query fields.
237
+ """
238
+ log_text = process_url(build_log.url)
239
+ log_summary = mine_logs(log_text)
240
+ stream = await submit_text(PROMPT_TEMPLATE.format(log_summary), stream=True)
241
+
242
+ return StreamingResponse(stream)
@@ -1,10 +1,11 @@
1
1
  import logging
2
2
  import os
3
+ from typing import Iterator, List, Dict
3
4
  from urllib.parse import urlparse
4
-
5
+ import numpy as np
5
6
  import requests
6
7
 
7
- from llama_cpp import Llama
8
+ from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
8
9
  from logdetective.constants import PROMPT_TEMPLATE
9
10
 
10
11
 
@@ -60,20 +61,38 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
60
61
  model = Llama(
61
62
  model_path=model_pth,
62
63
  n_ctx=0, # Maximum context for the model
63
- verbose=verbose)
64
+ verbose=verbose,
65
+ logits_all=True)
64
66
  else:
65
67
  model = Llama.from_pretrained(
66
68
  model_pth,
67
69
  f"*{filename_suffix}",
68
70
  n_ctx=0, # Maximum context for the model
69
- verbose=verbose)
71
+ verbose=verbose,
72
+ logits_all=True)
70
73
 
71
74
  return model
72
75
 
73
76
 
74
- def process_log(log: str, model: Llama, stream: bool) -> str:
77
+ def compute_certainty(probs: List[Dict[str, float] | None]) -> float:
78
+ """Compute certainty of repsponse based on average logit probability.
79
+ Log probability is log(p), isn't really readable for most people, especially in compound.
80
+ In this case it's just a matter of applying inverse operation exp.
81
+ Of course that leaves you with a value in range <0, 1> so it needs to be multiplied by 100.
82
+ Simply put, this is the most straightforward way to get the numbers out.
75
83
  """
76
- Processes a given log using the provided language model and returns its summary.
84
+
85
+ top_logprobs = [
86
+ np.exp(x) * 100 for e in probs if isinstance(e, dict) for x in e.values()]
87
+ certainty = np.median(top_logprobs, axis=0)
88
+ if np.isnan(certainty):
89
+ raise ValueError("NaN certainty of answer")
90
+ return certainty
91
+
92
+
93
+ def process_log(log: str, model: Llama, stream: bool) -> (
94
+ CreateCompletionResponse | Iterator[CreateCompletionStreamResponse]):
95
+ """Processes a given log using the provided language model and returns its summary.
77
96
 
78
97
  Args:
79
98
  log (str): The input log to be processed.
@@ -82,10 +101,13 @@ def process_log(log: str, model: Llama, stream: bool) -> str:
82
101
  Returns:
83
102
  str: The summary of the given log generated by the language model.
84
103
  """
85
- return model(
104
+ response = model(
86
105
  prompt=PROMPT_TEMPLATE.format(log),
87
106
  stream=stream,
88
- max_tokens=0)
107
+ max_tokens=0,
108
+ logprobs=1)
109
+
110
+ return response
89
111
 
90
112
 
91
113
  def retrieve_log_content(log_path: str) -> str:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "logdetective"
3
- version = "0.2.6"
3
+ version = "0.2.8"
4
4
  description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
5
5
  authors = ["Jiri Podivin <jpodivin@gmail.com>"]
6
6
  license = "Apache-2.0"
@@ -27,10 +27,11 @@ issues = "https://github.com/fedora-copr/logdetective/issues"
27
27
 
28
28
  [tool.poetry.dependencies]
29
29
  python = "^3.11"
30
- requests = "^2.31.0"
31
- llama-cpp-python = "^0.2.56,!=0.2.86"
30
+ requests = ">0.2.31"
31
+ llama-cpp-python = ">0.2.56,!=0.2.86"
32
32
  drain3 = "^0.9.11"
33
33
  huggingface-hub = ">0.23.2"
34
+ numpy = "^1.26.0"
34
35
 
35
36
  [build-system]
36
37
  requires = ["poetry-core"]
@@ -1,87 +0,0 @@
1
- import logging
2
- import os
3
- import json
4
-
5
- from fastapi import FastAPI, HTTPException
6
- from pydantic import BaseModel
7
-
8
- import requests
9
-
10
- from logdetective.constants import PROMPT_TEMPLATE
11
- from logdetective.extractors import DrainExtractor
12
- from logdetective.utils import validate_url
13
-
14
- class BuildLog(BaseModel):
15
- """Model of data submitted to API.
16
- """
17
- url: str
18
-
19
- LOG = logging.getLogger("logdetective")
20
-
21
- app = FastAPI()
22
-
23
- LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
24
- LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
25
- LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
26
- LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
27
- LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
28
-
29
- @app.post("/analyze", )
30
- async def analyze_log(build_log: BuildLog):
31
- """Provide endpoint for log file submission and analysis.
32
- Request must be in form {"url":"<YOUR_URL_HERE>"}.
33
- URL must be valid for the request to be passed to the LLM server.
34
- Meaning that it must contain appropriate scheme, path and netloc,
35
- while lacking result, params or query fields.
36
- """
37
- extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
38
-
39
- LOG.info("Getting summary")
40
- # Perform basic validation of the URL
41
- if validate_url(url=build_log.url):
42
- try:
43
- log_request = requests.get(build_log.url, timeout=int(LOG_SOURCE_REQUEST_TIMEOUT))
44
- except requests.RequestException as ex:
45
- raise HTTPException(
46
- status_code=400,
47
- detail=f"We couldn't obtain the logs: {ex}") from ex
48
-
49
- if not log_request.ok:
50
- raise HTTPException(status_code=400,
51
- detail="Something went wrong while getting the logs: "
52
- f"[{log_request.status_code}] {log_request.text}")
53
- else:
54
- LOG.error("Invalid URL received ")
55
- raise HTTPException(status_code=400,
56
- detail=f"Invalid log URL: {build_log.url}")
57
-
58
- log = log_request.text
59
- log_summary = extractor(log)
60
-
61
- ratio = len(log_summary) / len(log.split('\n'))
62
- LOG.debug("Log summary: \n %s", log_summary)
63
- LOG.info("Compression ratio: %s", ratio)
64
-
65
- LOG.info("Analyzing the text")
66
- data = {
67
- "prompt": PROMPT_TEMPLATE.format(log_summary),
68
- "max_tokens": "0"}
69
-
70
- try:
71
- # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
72
- response = requests.post(
73
- f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
74
- headers={"Content-Type":"application/json"},
75
- data=json.dumps(data),
76
- timeout=int(LLM_CPP_SERVER_TIMEOUT))
77
- except requests.RequestException as ex:
78
- raise HTTPException(
79
- status_code=400,
80
- detail=f"Llama-cpp query failed: {ex}") from ex
81
-
82
- if not log_request.ok:
83
- raise HTTPException(
84
- status_code=400,
85
- detail="Something went wrong while getting a response from the llama server: "
86
- f"[{log_request.status_code}] {log_request.text}")
87
- return response.text
File without changes