logdetective 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/__init__.py +3 -0
- logdetective/server/database/__init__.py +0 -0
- logdetective/server/database/base.py +63 -0
- logdetective/server/database/models.py +88 -0
- logdetective/server/metric.py +82 -0
- logdetective/server/models.py +32 -12
- logdetective/server/server.py +162 -48
- logdetective/utils.py +3 -3
- {logdetective-0.3.2.dist-info → logdetective-0.4.0.dist-info}/METADATA +82 -1
- logdetective-0.4.0.dist-info/RECORD +19 -0
- logdetective-0.3.2.dist-info/RECORD +0 -15
- {logdetective-0.3.2.dist-info → logdetective-0.4.0.dist-info}/LICENSE +0 -0
- {logdetective-0.3.2.dist-info → logdetective-0.4.0.dist-info}/WHEEL +0 -0
- {logdetective-0.3.2.dist-info → logdetective-0.4.0.dist-info}/entry_points.txt +0 -0
logdetective/__init__.py
CHANGED
|
File without changes
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from os import getenv
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from sqlalchemy import create_engine
|
|
4
|
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
|
5
|
+
|
|
6
|
+
from logdetective import logger
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_pg_url() -> str:
|
|
10
|
+
"""create postgresql connection string"""
|
|
11
|
+
return (
|
|
12
|
+
f"postgresql+psycopg2://{getenv('POSTGRESQL_USER')}"
|
|
13
|
+
f":{getenv('POSTGRESQL_PASSWORD')}@{getenv('POSTGRESQL_HOST', 'postgres')}"
|
|
14
|
+
f":{getenv('POSTGRESQL_PORT', '5432')}/{getenv('POSTGRESQL_DATABASE')}"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# To log SQL statements, set SQLALCHEMY_ECHO env. var. to True|T|Yes|Y|1
|
|
19
|
+
sqlalchemy_echo = getenv("SQLALCHEMY_ECHO", "False").lower() in (
|
|
20
|
+
"true",
|
|
21
|
+
"t",
|
|
22
|
+
"yes",
|
|
23
|
+
"y",
|
|
24
|
+
"1",
|
|
25
|
+
)
|
|
26
|
+
engine = create_engine(get_pg_url(), echo=sqlalchemy_echo)
|
|
27
|
+
SessionFactory = sessionmaker(autoflush=True, bind=engine)
|
|
28
|
+
Base = declarative_base()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@contextmanager
|
|
32
|
+
def transaction(commit: bool = False):
|
|
33
|
+
"""
|
|
34
|
+
Context manager for 'framing' a db transaction.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
commit: Whether to call `Session.commit()` upon exiting the context. Should be set to True
|
|
38
|
+
if any changes are made within the context. Defaults to False.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
session = SessionFactory()
|
|
42
|
+
try:
|
|
43
|
+
yield session
|
|
44
|
+
if commit:
|
|
45
|
+
session.commit()
|
|
46
|
+
except Exception as ex:
|
|
47
|
+
logger.warning("Exception while working with database: %s", str(ex))
|
|
48
|
+
session.rollback()
|
|
49
|
+
raise
|
|
50
|
+
finally:
|
|
51
|
+
session.close()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def init():
|
|
55
|
+
"""Init db"""
|
|
56
|
+
Base.metadata.create_all(engine)
|
|
57
|
+
logger.debug("Database initialized")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def destroy():
|
|
61
|
+
"""Destroy db"""
|
|
62
|
+
Base.metadata.drop_all(engine)
|
|
63
|
+
logger.warning("Database cleaned")
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import datetime
|
|
3
|
+
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from sqlalchemy import Column, Integer, Float, DateTime, String, Enum
|
|
6
|
+
|
|
7
|
+
from logdetective.server.database.base import Base, transaction
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EndpointType(enum.Enum):
|
|
11
|
+
"""Different analyze endpoints"""
|
|
12
|
+
|
|
13
|
+
ANALYZE = "analyze_log"
|
|
14
|
+
ANALYZE_STAGED = "analyze_log_staged"
|
|
15
|
+
ANALYZE_STREAM = "analyze_log_stream"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AnalyzeRequestMetrics(Base):
|
|
19
|
+
"""Store data related to received requests and given responses"""
|
|
20
|
+
|
|
21
|
+
__tablename__ = "analyze_request_metrics"
|
|
22
|
+
|
|
23
|
+
id = Column(Integer, primary_key=True)
|
|
24
|
+
endpoint = Column(
|
|
25
|
+
Enum(EndpointType),
|
|
26
|
+
nullable=False,
|
|
27
|
+
index=True,
|
|
28
|
+
comment="The service endpoint that was called",
|
|
29
|
+
)
|
|
30
|
+
request_received_at = Column(
|
|
31
|
+
DateTime,
|
|
32
|
+
nullable=False,
|
|
33
|
+
index=True,
|
|
34
|
+
default=datetime.datetime.now(datetime.timezone.utc),
|
|
35
|
+
comment="Timestamp when the request was received",
|
|
36
|
+
)
|
|
37
|
+
log_url = Column(
|
|
38
|
+
String,
|
|
39
|
+
nullable=False,
|
|
40
|
+
index=False,
|
|
41
|
+
comment="Log url for which analysis was requested",
|
|
42
|
+
)
|
|
43
|
+
response_sent_at = Column(
|
|
44
|
+
DateTime, nullable=True, comment="Timestamp when the response was sent back"
|
|
45
|
+
)
|
|
46
|
+
response_length = Column(
|
|
47
|
+
Integer, nullable=True, comment="Length of the response in chars"
|
|
48
|
+
)
|
|
49
|
+
response_certainty = Column(
|
|
50
|
+
Float, nullable=True, comment="Certainty for generated response"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def create(
|
|
55
|
+
cls,
|
|
56
|
+
endpoint: EndpointType,
|
|
57
|
+
log_url: str,
|
|
58
|
+
request_received_at: Optional[datetime.datetime] = None,
|
|
59
|
+
) -> int:
|
|
60
|
+
"""Create AnalyzeRequestMetrics new line
|
|
61
|
+
with data related to a received request"""
|
|
62
|
+
with transaction(commit=True) as session:
|
|
63
|
+
metrics = AnalyzeRequestMetrics()
|
|
64
|
+
metrics.endpoint = endpoint
|
|
65
|
+
metrics.request_received_at = request_received_at or datetime.datetime.now(
|
|
66
|
+
datetime.timezone.utc
|
|
67
|
+
)
|
|
68
|
+
metrics.log_url = log_url
|
|
69
|
+
session.add(metrics)
|
|
70
|
+
session.flush()
|
|
71
|
+
return metrics.id
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def update(
|
|
75
|
+
cls,
|
|
76
|
+
id_: int,
|
|
77
|
+
response_sent_at: datetime,
|
|
78
|
+
response_length: int,
|
|
79
|
+
response_certainty: float,
|
|
80
|
+
) -> None:
|
|
81
|
+
"""Update an AnalyzeRequestMetrics line
|
|
82
|
+
with data related to the given response"""
|
|
83
|
+
with transaction(commit=True) as session:
|
|
84
|
+
metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
|
|
85
|
+
metrics.response_sent_at = response_sent_at
|
|
86
|
+
metrics.response_length = response_length
|
|
87
|
+
metrics.response_certainty = response_certainty
|
|
88
|
+
session.add(metrics)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import inspect
|
|
3
|
+
from typing import Union
|
|
4
|
+
from functools import wraps
|
|
5
|
+
|
|
6
|
+
from starlette.responses import StreamingResponse
|
|
7
|
+
from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
|
|
8
|
+
from logdetective.server import models
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def add_new_metrics(
|
|
12
|
+
api_name: str, build_log: models.BuildLog, received_at: datetime.datetime = None
|
|
13
|
+
) -> int:
|
|
14
|
+
"""Add a new database entry for a received request.
|
|
15
|
+
|
|
16
|
+
This will store the time when this function is called,
|
|
17
|
+
the endpoint from where the request was received,
|
|
18
|
+
and the log for which analysis is requested.
|
|
19
|
+
"""
|
|
20
|
+
return AnalyzeRequestMetrics.create(
|
|
21
|
+
endpoint=EndpointType(api_name),
|
|
22
|
+
log_url=build_log.url,
|
|
23
|
+
request_received_at=received_at
|
|
24
|
+
if received_at
|
|
25
|
+
else datetime.datetime.now(datetime.timezone.utc),
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def update_metrics(
|
|
30
|
+
metrics_id: int,
|
|
31
|
+
response: Union[models.Response, models.StagedResponse, StreamingResponse],
|
|
32
|
+
sent_at: datetime.datetime = None,
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Update a database metric entry for a received request,
|
|
35
|
+
filling data for the given response.
|
|
36
|
+
|
|
37
|
+
This will add to the database entry the time when the response was sent,
|
|
38
|
+
the length of the created response and the certainty for it.
|
|
39
|
+
"""
|
|
40
|
+
response_sent_at = (
|
|
41
|
+
sent_at if sent_at else datetime.datetime.now(datetime.timezone.utc)
|
|
42
|
+
)
|
|
43
|
+
response_length = None
|
|
44
|
+
if hasattr(response, "explanation") and "choices" in response.explanation:
|
|
45
|
+
response_length = sum(
|
|
46
|
+
len(choice["text"])
|
|
47
|
+
for choice in response.explanation["choices"]
|
|
48
|
+
if "text" in choice
|
|
49
|
+
)
|
|
50
|
+
response_certainty = (
|
|
51
|
+
response.response_certainty if hasattr(response, "response_certainty") else None
|
|
52
|
+
)
|
|
53
|
+
AnalyzeRequestMetrics.update(
|
|
54
|
+
metrics_id, response_sent_at, response_length, response_certainty
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def track_request():
|
|
59
|
+
"""
|
|
60
|
+
Decorator to track requests metrics
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def decorator(f):
|
|
64
|
+
@wraps(f)
|
|
65
|
+
async def async_decorated_function(*args, **kwargs):
|
|
66
|
+
metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
|
|
67
|
+
response = await f(*args, **kwargs)
|
|
68
|
+
update_metrics(metrics_id, response)
|
|
69
|
+
return response
|
|
70
|
+
|
|
71
|
+
@wraps(f)
|
|
72
|
+
def sync_decorated_function(*args, **kwargs):
|
|
73
|
+
metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
|
|
74
|
+
response = f(*args, **kwargs)
|
|
75
|
+
update_metrics(metrics_id, response)
|
|
76
|
+
return response
|
|
77
|
+
|
|
78
|
+
if inspect.iscoroutinefunction(f):
|
|
79
|
+
return async_decorated_function
|
|
80
|
+
return sync_decorated_function
|
|
81
|
+
|
|
82
|
+
return decorator
|
logdetective/server/models.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from logging import BASIC_FORMAT
|
|
2
|
-
from typing import List, Dict, Optional
|
|
2
|
+
from typing import List, Dict, Optional, Literal
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
|
|
5
5
|
|
|
@@ -37,15 +37,36 @@ class JobHook(BaseModel):
|
|
|
37
37
|
project_id: int
|
|
38
38
|
|
|
39
39
|
|
|
40
|
+
class Explanation(BaseModel):
|
|
41
|
+
"""Model of snippet or general log explanation from Log Detective"""
|
|
42
|
+
|
|
43
|
+
text: str
|
|
44
|
+
logprobs: Optional[List[Dict]] = None
|
|
45
|
+
|
|
46
|
+
def __str__(self):
|
|
47
|
+
return self.text
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AnalyzedSnippet(BaseModel):
|
|
51
|
+
"""Model for snippets already processed by Log Detective.
|
|
52
|
+
|
|
53
|
+
explanation: LLM output in form of plain text and logprobs dictionary
|
|
54
|
+
text: original snippet text
|
|
55
|
+
line_number: location of snippet in original log
|
|
56
|
+
"""
|
|
57
|
+
explanation: Explanation
|
|
58
|
+
text: str
|
|
59
|
+
line_number: int
|
|
60
|
+
|
|
61
|
+
|
|
40
62
|
class Response(BaseModel):
|
|
41
63
|
"""Model of data returned by Log Detective API
|
|
42
64
|
|
|
43
|
-
explanation:
|
|
44
|
-
https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
|
|
65
|
+
explanation: Explanation
|
|
45
66
|
response_certainty: float
|
|
46
67
|
"""
|
|
47
68
|
|
|
48
|
-
explanation:
|
|
69
|
+
explanation: Explanation
|
|
49
70
|
response_certainty: float
|
|
50
71
|
|
|
51
72
|
|
|
@@ -53,17 +74,12 @@ class StagedResponse(Response):
|
|
|
53
74
|
"""Model of data returned by Log Detective API when called when staged response
|
|
54
75
|
is requested. Contains list of reponses to prompts for individual snippets.
|
|
55
76
|
|
|
56
|
-
explanation:
|
|
57
|
-
https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_types.CreateCompletionResponse
|
|
77
|
+
explanation: Explanation
|
|
58
78
|
response_certainty: float
|
|
59
|
-
snippets:
|
|
60
|
-
list of dictionaries {
|
|
61
|
-
'snippet' : '<original_text>,
|
|
62
|
-
'comment': CreateCompletionResponse,
|
|
63
|
-
'line_number': '<location_in_log>' }
|
|
79
|
+
snippets: list of AnalyzedSnippet objects
|
|
64
80
|
"""
|
|
65
81
|
|
|
66
|
-
snippets: List[
|
|
82
|
+
snippets: List[AnalyzedSnippet]
|
|
67
83
|
|
|
68
84
|
|
|
69
85
|
class InferenceConfig(BaseModel):
|
|
@@ -71,6 +87,9 @@ class InferenceConfig(BaseModel):
|
|
|
71
87
|
|
|
72
88
|
max_tokens: int = -1
|
|
73
89
|
log_probs: int = 1
|
|
90
|
+
api_endpoint: Optional[Literal["/chat/completions", "/completions"]] = (
|
|
91
|
+
"/chat/completions"
|
|
92
|
+
)
|
|
74
93
|
|
|
75
94
|
def __init__(self, data: Optional[dict] = None):
|
|
76
95
|
super().__init__()
|
|
@@ -79,6 +98,7 @@ class InferenceConfig(BaseModel):
|
|
|
79
98
|
|
|
80
99
|
self.max_tokens = data.get("max_tokens", -1)
|
|
81
100
|
self.log_probs = data.get("log_probs", 1)
|
|
101
|
+
self.api_endpoint = data.get("api_endpoint", "/chat/completions")
|
|
82
102
|
|
|
83
103
|
|
|
84
104
|
class ExtractorConfig(BaseModel):
|
logdetective/server/server.py
CHANGED
|
@@ -5,11 +5,11 @@ import re
|
|
|
5
5
|
import zipfile
|
|
6
6
|
from pathlib import PurePath
|
|
7
7
|
from tempfile import TemporaryFile
|
|
8
|
-
from typing import List, Annotated, Tuple
|
|
8
|
+
from typing import List, Annotated, Tuple, Dict, Any
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
from llama_cpp import CreateCompletionResponse
|
|
12
11
|
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header
|
|
12
|
+
|
|
13
13
|
from fastapi.responses import StreamingResponse
|
|
14
14
|
from fastapi.responses import Response as BasicResponse
|
|
15
15
|
import gitlab
|
|
@@ -27,9 +27,16 @@ from logdetective.utils import (
|
|
|
27
27
|
format_snippets,
|
|
28
28
|
format_analyzed_snippets,
|
|
29
29
|
)
|
|
30
|
-
from logdetective.server.models import BuildLog, JobHook, Response, StagedResponse
|
|
31
30
|
from logdetective.server.utils import load_server_config, get_log
|
|
32
|
-
|
|
31
|
+
from logdetective.server.metric import track_request
|
|
32
|
+
from logdetective.server.models import (
|
|
33
|
+
BuildLog,
|
|
34
|
+
JobHook,
|
|
35
|
+
Response,
|
|
36
|
+
StagedResponse,
|
|
37
|
+
Explanation,
|
|
38
|
+
AnalyzedSnippet,
|
|
39
|
+
)
|
|
33
40
|
|
|
34
41
|
LLM_CPP_HOST = os.environ.get("LLAMA_CPP_HOST", "localhost")
|
|
35
42
|
LLM_CPP_SERVER_ADDRESS = f"http://{LLM_CPP_HOST}"
|
|
@@ -123,35 +130,21 @@ def mine_logs(log: str) -> List[Tuple[int, str]]:
|
|
|
123
130
|
return log_summary
|
|
124
131
|
|
|
125
132
|
|
|
126
|
-
async def
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
model: str = "default-model",
|
|
132
|
-
):
|
|
133
|
-
"""Submit prompt to LLM.
|
|
134
|
-
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
135
|
-
log_probs: number of token choices to produce log probs for
|
|
136
|
-
"""
|
|
137
|
-
LOG.info("Analyzing the text")
|
|
138
|
-
data = {
|
|
139
|
-
"prompt": text,
|
|
140
|
-
"max_tokens": max_tokens,
|
|
141
|
-
"logprobs": log_probs,
|
|
142
|
-
"stream": stream,
|
|
143
|
-
"model": model,
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
headers = {"Content-Type": "application/json"}
|
|
147
|
-
|
|
148
|
-
if LLM_API_TOKEN:
|
|
149
|
-
headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
|
|
133
|
+
async def submit_to_llm_endpoint(
|
|
134
|
+
url: str, data: Dict[str, Any], headers: Dict[str, str], stream: bool
|
|
135
|
+
) -> Any:
|
|
136
|
+
"""Send request to selected API endpoint. Verifying successful request unless
|
|
137
|
+
the using the stream response.
|
|
150
138
|
|
|
139
|
+
url:
|
|
140
|
+
data:
|
|
141
|
+
headers:
|
|
142
|
+
stream:
|
|
143
|
+
"""
|
|
151
144
|
try:
|
|
152
145
|
# Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
|
|
153
146
|
response = requests.post(
|
|
154
|
-
|
|
147
|
+
url,
|
|
155
148
|
headers=headers,
|
|
156
149
|
data=json.dumps(data),
|
|
157
150
|
timeout=int(LLM_CPP_SERVER_TIMEOUT),
|
|
@@ -177,13 +170,118 @@ async def submit_text(
|
|
|
177
170
|
status_code=400,
|
|
178
171
|
detail=f"Couldn't parse the response.\nError: {ex}\nData: {response.text}",
|
|
179
172
|
) from ex
|
|
180
|
-
else:
|
|
181
|
-
return response
|
|
182
173
|
|
|
183
|
-
return
|
|
174
|
+
return response
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
async def submit_text( # pylint: disable=R0913,R0917
|
|
178
|
+
text: str,
|
|
179
|
+
max_tokens: int = -1,
|
|
180
|
+
log_probs: int = 1,
|
|
181
|
+
stream: bool = False,
|
|
182
|
+
model: str = "default-model",
|
|
183
|
+
api_endpoint: str = "/chat/completions",
|
|
184
|
+
) -> Explanation:
|
|
185
|
+
"""Submit prompt to LLM using a selected endpoint.
|
|
186
|
+
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
187
|
+
log_probs: number of token choices to produce log probs for
|
|
188
|
+
"""
|
|
189
|
+
LOG.info("Analyzing the text")
|
|
190
|
+
|
|
191
|
+
headers = {"Content-Type": "application/json"}
|
|
192
|
+
|
|
193
|
+
if LLM_API_TOKEN:
|
|
194
|
+
headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
|
|
195
|
+
|
|
196
|
+
if api_endpoint == "/chat/completions":
|
|
197
|
+
return await submit_text_chat_completions(
|
|
198
|
+
text, headers, max_tokens, log_probs > 0, stream, model
|
|
199
|
+
)
|
|
200
|
+
return await submit_text_completions(
|
|
201
|
+
text, headers, max_tokens, log_probs, stream, model
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
async def submit_text_completions( # pylint: disable=R0913,R0917
|
|
206
|
+
text: str,
|
|
207
|
+
headers: dict,
|
|
208
|
+
max_tokens: int = -1,
|
|
209
|
+
log_probs: int = 1,
|
|
210
|
+
stream: bool = False,
|
|
211
|
+
model: str = "default-model",
|
|
212
|
+
) -> Explanation:
|
|
213
|
+
"""Submit prompt to OpenAI API completions endpoint.
|
|
214
|
+
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
215
|
+
log_probs: number of token choices to produce log probs for
|
|
216
|
+
"""
|
|
217
|
+
LOG.info("Submitting to /v1/completions endpoint")
|
|
218
|
+
data = {
|
|
219
|
+
"prompt": text,
|
|
220
|
+
"max_tokens": max_tokens,
|
|
221
|
+
"logprobs": log_probs,
|
|
222
|
+
"stream": stream,
|
|
223
|
+
"model": model,
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
response = await submit_to_llm_endpoint(
|
|
227
|
+
f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
|
|
228
|
+
data,
|
|
229
|
+
headers,
|
|
230
|
+
stream,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return Explanation(
|
|
234
|
+
text=response["choices"][0]["text"], logprobs=response["choices"][0]["logprobs"]
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
async def submit_text_chat_completions( # pylint: disable=R0913,R0917
|
|
239
|
+
text: str,
|
|
240
|
+
headers: dict,
|
|
241
|
+
max_tokens: int = -1,
|
|
242
|
+
log_probs: int = 1,
|
|
243
|
+
stream: bool = False,
|
|
244
|
+
model: str = "default-model",
|
|
245
|
+
) -> Explanation:
|
|
246
|
+
"""Submit prompt to OpenAI API /chat/completions endpoint.
|
|
247
|
+
max_tokens: number of tokens to be produces, 0 indicates run until encountering EOS
|
|
248
|
+
log_probs: number of token choices to produce log probs for
|
|
249
|
+
"""
|
|
250
|
+
LOG.info("Submitting to /v1/chat/completions endpoint")
|
|
251
|
+
|
|
252
|
+
data = {
|
|
253
|
+
"messages": [
|
|
254
|
+
{
|
|
255
|
+
"role": "user",
|
|
256
|
+
"content": text,
|
|
257
|
+
}
|
|
258
|
+
],
|
|
259
|
+
"max_tokens": max_tokens,
|
|
260
|
+
"logprobs": log_probs,
|
|
261
|
+
"stream": stream,
|
|
262
|
+
"model": model,
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
response = await submit_to_llm_endpoint(
|
|
266
|
+
f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/chat/completions",
|
|
267
|
+
data,
|
|
268
|
+
headers,
|
|
269
|
+
stream,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
if stream:
|
|
273
|
+
return Explanation(
|
|
274
|
+
text=response["choices"][0]["delta"]["content"],
|
|
275
|
+
logprobs=response["choices"][0]["logprobs"]["content"],
|
|
276
|
+
)
|
|
277
|
+
return Explanation(
|
|
278
|
+
text=response["choices"][0]["message"]["content"],
|
|
279
|
+
logprobs=response["choices"][0]["logprobs"]["content"],
|
|
280
|
+
)
|
|
184
281
|
|
|
185
282
|
|
|
186
283
|
@app.post("/analyze", response_model=Response)
|
|
284
|
+
@track_request()
|
|
187
285
|
async def analyze_log(build_log: BuildLog):
|
|
188
286
|
"""Provide endpoint for log file submission and analysis.
|
|
189
287
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
@@ -194,26 +292,28 @@ async def analyze_log(build_log: BuildLog):
|
|
|
194
292
|
log_text = process_url(build_log.url)
|
|
195
293
|
log_summary = mine_logs(log_text)
|
|
196
294
|
log_summary = format_snippets(log_summary)
|
|
197
|
-
response = await submit_text(
|
|
295
|
+
response = await submit_text(
|
|
296
|
+
PROMPT_TEMPLATE.format(log_summary),
|
|
297
|
+
api_endpoint=SERVER_CONFIG.inference.api_endpoint,
|
|
298
|
+
)
|
|
198
299
|
certainty = 0
|
|
199
300
|
|
|
200
|
-
if
|
|
301
|
+
if response.logprobs is not None:
|
|
201
302
|
try:
|
|
202
|
-
certainty = compute_certainty(
|
|
203
|
-
response["choices"][0]["logprobs"]["content"][0]["top_logprobs"]
|
|
204
|
-
)
|
|
303
|
+
certainty = compute_certainty(response.logprobs)
|
|
205
304
|
except ValueError as ex:
|
|
206
305
|
LOG.error("Error encountered while computing certainty: %s", ex)
|
|
207
306
|
raise HTTPException(
|
|
208
307
|
status_code=400,
|
|
209
308
|
detail=f"Couldn't compute certainty with data:\n"
|
|
210
|
-
f"{response
|
|
309
|
+
f"{response.logprobs}",
|
|
211
310
|
) from ex
|
|
212
311
|
|
|
213
312
|
return Response(explanation=response, response_certainty=certainty)
|
|
214
313
|
|
|
215
314
|
|
|
216
315
|
@app.post("/analyze/staged", response_model=StagedResponse)
|
|
316
|
+
@track_request()
|
|
217
317
|
async def analyze_log_staged(build_log: BuildLog):
|
|
218
318
|
"""Provide endpoint for log file submission and analysis.
|
|
219
319
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
@@ -226,32 +326,38 @@ async def analyze_log_staged(build_log: BuildLog):
|
|
|
226
326
|
|
|
227
327
|
# Process snippets asynchronously
|
|
228
328
|
analyzed_snippets = await asyncio.gather(
|
|
229
|
-
*[
|
|
329
|
+
*[
|
|
330
|
+
submit_text(
|
|
331
|
+
SNIPPET_PROMPT_TEMPLATE.format(s),
|
|
332
|
+
api_endpoint=SERVER_CONFIG.inference.api_endpoint,
|
|
333
|
+
)
|
|
334
|
+
for s in log_summary
|
|
335
|
+
]
|
|
230
336
|
)
|
|
231
337
|
|
|
232
338
|
analyzed_snippets = [
|
|
233
|
-
|
|
339
|
+
AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
|
|
234
340
|
for e in zip(log_summary, analyzed_snippets)
|
|
235
341
|
]
|
|
236
342
|
final_prompt = PROMPT_TEMPLATE_STAGED.format(
|
|
237
343
|
format_analyzed_snippets(analyzed_snippets)
|
|
238
344
|
)
|
|
239
345
|
|
|
240
|
-
final_analysis = await submit_text(
|
|
241
|
-
|
|
346
|
+
final_analysis = await submit_text(
|
|
347
|
+
final_prompt, api_endpoint=SERVER_CONFIG.inference.api_endpoint
|
|
348
|
+
)
|
|
349
|
+
|
|
242
350
|
certainty = 0
|
|
243
351
|
|
|
244
|
-
if
|
|
352
|
+
if final_analysis.logprobs:
|
|
245
353
|
try:
|
|
246
|
-
certainty = compute_certainty(
|
|
247
|
-
final_analysis["choices"][0]["logprobs"]["content"][0]["top_logprobs"]
|
|
248
|
-
)
|
|
354
|
+
certainty = compute_certainty(final_analysis.logprobs)
|
|
249
355
|
except ValueError as ex:
|
|
250
356
|
LOG.error("Error encountered while computing certainty: %s", ex)
|
|
251
357
|
raise HTTPException(
|
|
252
358
|
status_code=400,
|
|
253
359
|
detail=f"Couldn't compute certainty with data:\n"
|
|
254
|
-
f"{final_analysis
|
|
360
|
+
f"{final_analysis.logprobs}",
|
|
255
361
|
) from ex
|
|
256
362
|
|
|
257
363
|
return StagedResponse(
|
|
@@ -262,6 +368,7 @@ async def analyze_log_staged(build_log: BuildLog):
|
|
|
262
368
|
|
|
263
369
|
|
|
264
370
|
@app.post("/analyze/stream", response_class=StreamingResponse)
|
|
371
|
+
@track_request()
|
|
265
372
|
async def analyze_log_stream(build_log: BuildLog):
|
|
266
373
|
"""Stream response endpoint for Logdetective.
|
|
267
374
|
Request must be in form {"url":"<YOUR_URL_HERE>"}.
|
|
@@ -272,7 +379,14 @@ async def analyze_log_stream(build_log: BuildLog):
|
|
|
272
379
|
log_text = process_url(build_log.url)
|
|
273
380
|
log_summary = mine_logs(log_text)
|
|
274
381
|
log_summary = format_snippets(log_summary)
|
|
275
|
-
|
|
382
|
+
headers = {"Content-Type": "application/json"}
|
|
383
|
+
|
|
384
|
+
if LLM_API_TOKEN:
|
|
385
|
+
headers["Authorization"] = f"Bearer {LLM_API_TOKEN}"
|
|
386
|
+
|
|
387
|
+
stream = await submit_text_chat_completions(
|
|
388
|
+
PROMPT_TEMPLATE.format(log_summary), stream=True, headers=headers
|
|
389
|
+
)
|
|
276
390
|
|
|
277
391
|
return StreamingResponse(stream)
|
|
278
392
|
|
logdetective/utils.py
CHANGED
|
@@ -7,7 +7,7 @@ import requests
|
|
|
7
7
|
|
|
8
8
|
from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
|
|
9
9
|
from logdetective.constants import PROMPT_TEMPLATE, SNIPPET_DELIMITER
|
|
10
|
-
|
|
10
|
+
from logdetective.server.models import AnalyzedSnippet
|
|
11
11
|
|
|
12
12
|
LOG = logging.getLogger("logdetective")
|
|
13
13
|
|
|
@@ -175,11 +175,11 @@ def format_snippets(snippets: list[str] | list[Tuple[int, str]]) -> str:
|
|
|
175
175
|
return summary
|
|
176
176
|
|
|
177
177
|
|
|
178
|
-
def format_analyzed_snippets(snippets: list[
|
|
178
|
+
def format_analyzed_snippets(snippets: list[AnalyzedSnippet]) -> str:
|
|
179
179
|
"""Format snippets for submission into staged prompt."""
|
|
180
180
|
summary = f"\n{SNIPPET_DELIMITER}\n".join(
|
|
181
181
|
[
|
|
182
|
-
f"[{e
|
|
182
|
+
f"[{e.text}] at line [{e.line_number}]: [{e.explanation.text}]"
|
|
183
183
|
for e in snippets
|
|
184
184
|
]
|
|
185
185
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: logdetective
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Jiri Podivin
|
|
@@ -19,15 +19,18 @@ Classifier: Topic :: Internet :: Log Analysis
|
|
|
19
19
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
20
|
Classifier: Topic :: Software Development :: Debuggers
|
|
21
21
|
Provides-Extra: server
|
|
22
|
+
Requires-Dist: alembic (>=1.13.3,<2.0.0) ; extra == "server"
|
|
22
23
|
Requires-Dist: drain3 (>=0.9.11,<0.10.0)
|
|
23
24
|
Requires-Dist: fastapi (>=0.111.1) ; extra == "server"
|
|
24
25
|
Requires-Dist: huggingface-hub (>0.23.2)
|
|
25
26
|
Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
|
|
26
27
|
Requires-Dist: numpy (>=1.26.0)
|
|
28
|
+
Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "server"
|
|
27
29
|
Requires-Dist: pydantic (>=2.8.2,<3.0.0) ; extra == "server"
|
|
28
30
|
Requires-Dist: python-gitlab (>=4.4.0)
|
|
29
31
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server"
|
|
30
32
|
Requires-Dist: requests (>0.2.31)
|
|
33
|
+
Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server"
|
|
31
34
|
Project-URL: homepage, https://github.com/fedora-copr/logdetective
|
|
32
35
|
Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
|
|
33
36
|
Description-Content-Type: text/markdown
|
|
@@ -188,6 +191,71 @@ or
|
|
|
188
191
|
|
|
189
192
|
tox run -e lint # to run pylint
|
|
190
193
|
|
|
194
|
+
Visual Studio Code testing with podman/docker-compose
|
|
195
|
+
-----------------------------------------------------
|
|
196
|
+
|
|
197
|
+
- In `Containerfile`, add `debugpy` as a dependency
|
|
198
|
+
|
|
199
|
+
```diff
|
|
200
|
+
-RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context \
|
|
201
|
+
+RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context debugpy\
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
- Rebuild server image with new dependencies
|
|
205
|
+
|
|
206
|
+
```
|
|
207
|
+
make rebuild-server
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
- Forward debugging port in `docker-compose.yaml` for `server` service.
|
|
211
|
+
|
|
212
|
+
```diff
|
|
213
|
+
ports:
|
|
214
|
+
- "${LOGDETECTIVE_SERVER_PORT:-8080}:${LOGDETECTIVE_SERVER_PORT:-8080}"
|
|
215
|
+
+ - "${VSCODE_DEBUG_PORT:-5678}:${VSCODE_DEBUG_PORT:-5678}"
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
- Add `debugpy` code in a logdetective file where you want to stop at first.
|
|
219
|
+
|
|
220
|
+
```diff
|
|
221
|
+
+import debugpy
|
|
222
|
+
+debugpy.listen(("0.0.0.0", 5678))
|
|
223
|
+
+debugpy.wait_for_client()
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
- Prepare `.vscode/lunch.json` configuration for Visual Studio Code (at least the following configuration is needed)
|
|
227
|
+
|
|
228
|
+
```json
|
|
229
|
+
{
|
|
230
|
+
"version": "0.2.0",
|
|
231
|
+
"configurations": [
|
|
232
|
+
{
|
|
233
|
+
"name": "Python Debugger: Remote Attach",
|
|
234
|
+
"type": "debugpy",
|
|
235
|
+
"request": "attach",
|
|
236
|
+
"connect": {
|
|
237
|
+
"host": "localhost",
|
|
238
|
+
"port": 5678
|
|
239
|
+
},
|
|
240
|
+
"pathMappings": [
|
|
241
|
+
{
|
|
242
|
+
"localRoot": "${workspaceFolder}",
|
|
243
|
+
"remoteRoot": "/src"
|
|
244
|
+
}
|
|
245
|
+
]
|
|
246
|
+
}
|
|
247
|
+
]
|
|
248
|
+
}
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
- Run the server
|
|
252
|
+
|
|
253
|
+
```
|
|
254
|
+
podman-compose up server
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
- Run Visual Stdio Code debug configuration named *Python Debug: Remote Attach*
|
|
258
|
+
|
|
191
259
|
Server
|
|
192
260
|
------
|
|
193
261
|
|
|
@@ -220,6 +288,19 @@ Model can be downloaded from [our Hugging Space](https://huggingface.co/fedora-c
|
|
|
220
288
|
$ curl -L -o models/mistral-7b-instruct-v0.2.Q4_K_S.gguf https://huggingface.co/fedora-copr/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/ggml-model-Q4_K_S.gguf
|
|
221
289
|
```
|
|
222
290
|
|
|
291
|
+
Generate a new database revision with alembic
|
|
292
|
+
---------------------------------------------
|
|
293
|
+
|
|
294
|
+
Modify the database models (`logdetective/server/database/model.py).
|
|
295
|
+
|
|
296
|
+
Generate a new database revision with the command:
|
|
297
|
+
|
|
298
|
+
**Warning**: this command will start up a new server
|
|
299
|
+
and shut it down when the operation completes.
|
|
300
|
+
|
|
301
|
+
```
|
|
302
|
+
CHANGE="A change comment" make alembic-generate-revision
|
|
303
|
+
```
|
|
223
304
|
|
|
224
305
|
Our production instance
|
|
225
306
|
-----------------------
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
logdetective/__init__.py,sha256=VqRngDcuFT7JWms8Qc_MsOvajoXVOKPr-S1kqY3Pqhc,59
|
|
2
|
+
logdetective/constants.py,sha256=SPSs1Bq6zPms3RsFTmsADwgrnFTn4fefNHzrB-M3RAE,1383
|
|
3
|
+
logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
|
|
4
|
+
logdetective/extractors.py,sha256=cjxndfJaQur54GXksIQXL7YTxkOng8I8UnQZMN2t5_w,3388
|
|
5
|
+
logdetective/logdetective.py,sha256=KN0KASW63VAnrjVeXK5AO0ob-vSexutTyeg1fd4uj70,4884
|
|
6
|
+
logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
logdetective/server/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
logdetective/server/database/base.py,sha256=oMJUvbWeapIUP-8Cf_DR9ptFg8CsYeaBAIjOVEzx8SM,1668
|
|
9
|
+
logdetective/server/database/models.py,sha256=8jW4k03Kny_3ld35214hcjYoJqlBvQIr4LH9mfQukXw,2750
|
|
10
|
+
logdetective/server/metric.py,sha256=VYMifrfIhcqgyu6YYN0c1nt8fC1iJ2_LCB7Bh2AheoE,2679
|
|
11
|
+
logdetective/server/models.py,sha256=cTmNJ-w_WT3meD_x0A92wCZ5f6UiSeYpH1f01PNCOy4,5533
|
|
12
|
+
logdetective/server/server.py,sha256=fKGN6ddlLW25V5X0O-NBGHTBlDszRU6R8FpJs0xEYe8,21179
|
|
13
|
+
logdetective/server/utils.py,sha256=osW5-VXxJAxRt7Wd3t1wF7PyW89FE9g4gSZLZCShlLc,1216
|
|
14
|
+
logdetective/utils.py,sha256=eudens1_T6iTtYhyzoYCpwuWgFHUMDSt6eWnrAB-mAI,6188
|
|
15
|
+
logdetective-0.4.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
16
|
+
logdetective-0.4.0.dist-info/METADATA,sha256=92vGsbePzgQKz1PHOVmdysHvSkgV0Yj0Owtjdzv0G2w,12714
|
|
17
|
+
logdetective-0.4.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
18
|
+
logdetective-0.4.0.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
|
|
19
|
+
logdetective-0.4.0.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
logdetective/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
logdetective/constants.py,sha256=SPSs1Bq6zPms3RsFTmsADwgrnFTn4fefNHzrB-M3RAE,1383
|
|
3
|
-
logdetective/drain3.ini,sha256=ni91eCT1TwTznZwcqWoOVMQcGEnWhEDNCoTPF7cfGfY,1360
|
|
4
|
-
logdetective/extractors.py,sha256=cjxndfJaQur54GXksIQXL7YTxkOng8I8UnQZMN2t5_w,3388
|
|
5
|
-
logdetective/logdetective.py,sha256=KN0KASW63VAnrjVeXK5AO0ob-vSexutTyeg1fd4uj70,4884
|
|
6
|
-
logdetective/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
logdetective/server/models.py,sha256=9QURaw0u9yZKywXwHzv6_rS6XhRBA2UHV5u4b9xkWqc,5196
|
|
8
|
-
logdetective/server/server.py,sha256=o2s4ezQE-a1XY7RFK0vLDFQO_wj9ZgG58SEV0hErLd8,18237
|
|
9
|
-
logdetective/server/utils.py,sha256=osW5-VXxJAxRt7Wd3t1wF7PyW89FE9g4gSZLZCShlLc,1216
|
|
10
|
-
logdetective/utils.py,sha256=59jq7F45Wk8pldzDt4gkh47Hny0T3fy1ggJFjSXDSGo,6148
|
|
11
|
-
logdetective-0.3.2.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
12
|
-
logdetective-0.3.2.dist-info/METADATA,sha256=vIn_AMoQZAHpsOB_6KXgR8wX1Z0tPEPe34044sj9mKY,10691
|
|
13
|
-
logdetective-0.3.2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
14
|
-
logdetective-0.3.2.dist-info/entry_points.txt,sha256=3K_vXja6PmcA8sNdUi63WdImeiNhVZcEGPTaoJmltfA,63
|
|
15
|
-
logdetective-0.3.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|