logdetective 0.5.10__tar.gz → 0.5.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {logdetective-0.5.10 → logdetective-0.5.11}/PKG-INFO +28 -4
- {logdetective-0.5.10 → logdetective-0.5.11}/README.md +27 -3
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/constants.py +8 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/logdetective.py +8 -1
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/prompts.yml +6 -0
- logdetective-0.5.11/logdetective/server/database/models.py +390 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/server/metric.py +4 -6
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/server/models.py +11 -3
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/server/plot.py +114 -39
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/server/server.py +104 -11
- logdetective-0.5.10/logdetective/server/templates/gitlab_comment.md.j2 → logdetective-0.5.11/logdetective/server/templates/gitlab_full_comment.md.j2 +1 -3
- logdetective-0.5.11/logdetective/server/templates/gitlab_short_comment.md.j2 +53 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/server/utils.py +3 -1
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/utils.py +7 -3
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective.1.asciidoc +2 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/pyproject.toml +1 -1
- logdetective-0.5.10/logdetective/server/database/models.py +0 -186
- {logdetective-0.5.10 → logdetective-0.5.11}/LICENSE +0 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/__init__.py +0 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/drain3.ini +0 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/extractors.py +0 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/models.py +0 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/server/__init__.py +0 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/server/database/__init__.py +0 -0
- {logdetective-0.5.10 → logdetective-0.5.11}/logdetective/server/database/base.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: logdetective
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.11
|
|
4
4
|
Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Jiri Podivin
|
|
@@ -47,6 +47,8 @@ Log Detective
|
|
|
47
47
|
|
|
48
48
|
A Python tool to analyze logs using a Language Model (LLM) and Drain template miner.
|
|
49
49
|
|
|
50
|
+
Note: if you are looking for code of website logdetective.com it is in [github.com/fedora-copr/logdetective-website](https://github.com/fedora-copr/logdetective-website).
|
|
51
|
+
|
|
50
52
|
Installation
|
|
51
53
|
------------
|
|
52
54
|
|
|
@@ -95,6 +97,17 @@ Example you want to use a different model:
|
|
|
95
97
|
logdetective https://example.com/logs.txt --model https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true
|
|
96
98
|
logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF
|
|
97
99
|
|
|
100
|
+
Example of different suffix (useful for models that were quantized)
|
|
101
|
+
|
|
102
|
+
logdetective https://kojipkgs.fedoraproject.org//work/tasks/3367/131313367/build.log --model 'fedora-copr/granite-3.2-8b-instruct-GGUF' -F Q4_K.gguf
|
|
103
|
+
|
|
104
|
+
Example of altered prompts:
|
|
105
|
+
|
|
106
|
+
cp ~/.local/lib/python3.13/site-packages/logdetective/prompts.yml ~/my-prompts.yml
|
|
107
|
+
vi ~/my-prompts.yml # edit the prompts there to better fit your needs
|
|
108
|
+
logdetective https://kojipkgs.fedoraproject.org//work/tasks/3367/131313367/build.log --prompts ~/my-prompts.yml
|
|
109
|
+
|
|
110
|
+
|
|
98
111
|
Note that streaming with some models (notably Meta-Llama-3 is broken) is broken and can be workarounded by `no-stream` option:
|
|
99
112
|
|
|
100
113
|
logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF --no-stream
|
|
@@ -337,11 +350,23 @@ certbot certonly --standalone -d logdetective01.fedorainfracloud.org
|
|
|
337
350
|
Querying statistics
|
|
338
351
|
-------------------
|
|
339
352
|
|
|
340
|
-
You can retrieve statistics about server requests over a specified time period
|
|
341
|
-
using either the `curl`
|
|
353
|
+
You can retrieve statistics about server requests and responses over a specified time period
|
|
354
|
+
using either a browser, the `curl` or the `http` command (provided by the `httpie` package).
|
|
342
355
|
|
|
343
356
|
When no time period is specified, the query defaults to the last 2 days:
|
|
344
357
|
|
|
358
|
+
You can view requests and responses statistics
|
|
359
|
+
- for the `/analyze` endpoint at http://localhost:8080/metrics/analyze
|
|
360
|
+
- for the `/analyze/staged` endpoint at http://localhost:8080/metrics/analyze/staged.
|
|
361
|
+
|
|
362
|
+
You can retrieve single svg images at the following endpoints:
|
|
363
|
+
- `/metrics/analyze/requests`
|
|
364
|
+
- `/metrics/analyze/responses`
|
|
365
|
+
- `/metrics/analyze/staged/requests`
|
|
366
|
+
- `/metrics/analyze/stages/responses`
|
|
367
|
+
|
|
368
|
+
Examples:
|
|
369
|
+
|
|
345
370
|
```
|
|
346
371
|
http GET "localhost:8080/metrics/analyze/requests" > /tmp/plot.svg
|
|
347
372
|
curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
|
|
@@ -349,7 +374,6 @@ curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
|
|
|
349
374
|
|
|
350
375
|
You can specify the time period in hours, days, or weeks.
|
|
351
376
|
The time period:
|
|
352
|
-
|
|
353
377
|
- cannot be less than one hour
|
|
354
378
|
- cannot be negative
|
|
355
379
|
- ends at the current time (when the query is made)
|
|
@@ -7,6 +7,8 @@ Log Detective
|
|
|
7
7
|
|
|
8
8
|
A Python tool to analyze logs using a Language Model (LLM) and Drain template miner.
|
|
9
9
|
|
|
10
|
+
Note: if you are looking for code of website logdetective.com it is in [github.com/fedora-copr/logdetective-website](https://github.com/fedora-copr/logdetective-website).
|
|
11
|
+
|
|
10
12
|
Installation
|
|
11
13
|
------------
|
|
12
14
|
|
|
@@ -55,6 +57,17 @@ Example you want to use a different model:
|
|
|
55
57
|
logdetective https://example.com/logs.txt --model https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true
|
|
56
58
|
logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF
|
|
57
59
|
|
|
60
|
+
Example of different suffix (useful for models that were quantized)
|
|
61
|
+
|
|
62
|
+
logdetective https://kojipkgs.fedoraproject.org//work/tasks/3367/131313367/build.log --model 'fedora-copr/granite-3.2-8b-instruct-GGUF' -F Q4_K.gguf
|
|
63
|
+
|
|
64
|
+
Example of altered prompts:
|
|
65
|
+
|
|
66
|
+
cp ~/.local/lib/python3.13/site-packages/logdetective/prompts.yml ~/my-prompts.yml
|
|
67
|
+
vi ~/my-prompts.yml # edit the prompts there to better fit your needs
|
|
68
|
+
logdetective https://kojipkgs.fedoraproject.org//work/tasks/3367/131313367/build.log --prompts ~/my-prompts.yml
|
|
69
|
+
|
|
70
|
+
|
|
58
71
|
Note that streaming with some models (notably Meta-Llama-3 is broken) is broken and can be workarounded by `no-stream` option:
|
|
59
72
|
|
|
60
73
|
logdetective https://example.com/logs.txt --model QuantFactory/Meta-Llama-3-8B-Instruct-GGUF --no-stream
|
|
@@ -297,11 +310,23 @@ certbot certonly --standalone -d logdetective01.fedorainfracloud.org
|
|
|
297
310
|
Querying statistics
|
|
298
311
|
-------------------
|
|
299
312
|
|
|
300
|
-
You can retrieve statistics about server requests over a specified time period
|
|
301
|
-
using either the `curl`
|
|
313
|
+
You can retrieve statistics about server requests and responses over a specified time period
|
|
314
|
+
using either a browser, the `curl` or the `http` command (provided by the `httpie` package).
|
|
302
315
|
|
|
303
316
|
When no time period is specified, the query defaults to the last 2 days:
|
|
304
317
|
|
|
318
|
+
You can view requests and responses statistics
|
|
319
|
+
- for the `/analyze` endpoint at http://localhost:8080/metrics/analyze
|
|
320
|
+
- for the `/analyze/staged` endpoint at http://localhost:8080/metrics/analyze/staged.
|
|
321
|
+
|
|
322
|
+
You can retrieve single svg images at the following endpoints:
|
|
323
|
+
- `/metrics/analyze/requests`
|
|
324
|
+
- `/metrics/analyze/responses`
|
|
325
|
+
- `/metrics/analyze/staged/requests`
|
|
326
|
+
- `/metrics/analyze/stages/responses`
|
|
327
|
+
|
|
328
|
+
Examples:
|
|
329
|
+
|
|
305
330
|
```
|
|
306
331
|
http GET "localhost:8080/metrics/analyze/requests" > /tmp/plot.svg
|
|
307
332
|
curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
|
|
@@ -309,7 +334,6 @@ curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
|
|
|
309
334
|
|
|
310
335
|
You can specify the time period in hours, days, or weeks.
|
|
311
336
|
The time period:
|
|
312
|
-
|
|
313
337
|
- cannot be less than one hour
|
|
314
338
|
- cannot be negative
|
|
315
339
|
- ends at the current time (when the query is made)
|
|
@@ -16,6 +16,8 @@ Snippets are delimited with '================'.
|
|
|
16
16
|
|
|
17
17
|
Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
|
|
18
18
|
|
|
19
|
+
Explanation of the issue, and recommended solution, should take handful of sentences.
|
|
20
|
+
|
|
19
21
|
Snippets:
|
|
20
22
|
|
|
21
23
|
{}
|
|
@@ -38,6 +40,8 @@ Answer:
|
|
|
38
40
|
SNIPPET_PROMPT_TEMPLATE = """
|
|
39
41
|
Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
|
|
40
42
|
|
|
43
|
+
Your analysis must be as concise as possible, while keeping relevant information intact.
|
|
44
|
+
|
|
41
45
|
Snippet:
|
|
42
46
|
|
|
43
47
|
{}
|
|
@@ -55,6 +59,8 @@ Snippets are delimited with '================'.
|
|
|
55
59
|
|
|
56
60
|
Drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
|
|
57
61
|
|
|
62
|
+
Explanation of the issue, and recommended solution, should take handful of sentences.
|
|
63
|
+
|
|
58
64
|
Snippets:
|
|
59
65
|
|
|
60
66
|
{}
|
|
@@ -64,3 +70,5 @@ Analysis:
|
|
|
64
70
|
"""
|
|
65
71
|
|
|
66
72
|
SNIPPET_DELIMITER = "================"
|
|
73
|
+
|
|
74
|
+
DEFAULT_TEMPERATURE = 0.8
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
import sys
|
|
4
4
|
import os
|
|
5
5
|
|
|
6
|
-
from logdetective.constants import DEFAULT_ADVISOR
|
|
6
|
+
from logdetective.constants import DEFAULT_ADVISOR, DEFAULT_TEMPERATURE
|
|
7
7
|
from logdetective.utils import (
|
|
8
8
|
process_log,
|
|
9
9
|
initialize_model,
|
|
@@ -73,6 +73,12 @@ def setup_args():
|
|
|
73
73
|
default=f"{os.path.dirname(__file__)}/prompts.yml",
|
|
74
74
|
help="Path to prompt configuration file."
|
|
75
75
|
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--temperature",
|
|
78
|
+
type=float,
|
|
79
|
+
default=DEFAULT_TEMPERATURE,
|
|
80
|
+
help="Temperature for inference."
|
|
81
|
+
)
|
|
76
82
|
return parser.parse_args()
|
|
77
83
|
|
|
78
84
|
|
|
@@ -147,6 +153,7 @@ def main(): # pylint: disable=too-many-statements,too-many-locals
|
|
|
147
153
|
model,
|
|
148
154
|
stream,
|
|
149
155
|
prompt_template=prompts_configuration.prompt_template,
|
|
156
|
+
temperature=args.temperature,
|
|
150
157
|
)
|
|
151
158
|
probs = []
|
|
152
159
|
print("Explanation:")
|
|
@@ -13,6 +13,8 @@ prompt_template: |
|
|
|
13
13
|
|
|
14
14
|
Finally, drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
|
|
15
15
|
|
|
16
|
+
Explanation of the issue, and recommended solution, should take handful of sentences.
|
|
17
|
+
|
|
16
18
|
Snippets:
|
|
17
19
|
|
|
18
20
|
{}
|
|
@@ -33,6 +35,8 @@ summarization_prompt_template: |
|
|
|
33
35
|
snippet_prompt_template: |
|
|
34
36
|
Analyse following RPM build log snippet. Describe contents accurately, without speculation or suggestions for resolution.
|
|
35
37
|
|
|
38
|
+
Your analysis must be as concise as possible, while keeping relevant information intact.
|
|
39
|
+
|
|
36
40
|
Snippet:
|
|
37
41
|
|
|
38
42
|
{}
|
|
@@ -48,6 +52,8 @@ prompt_template_staged: |
|
|
|
48
52
|
|
|
49
53
|
Drawing on information from all snippets, provide complete explanation of the issue and recommend solution.
|
|
50
54
|
|
|
55
|
+
Explanation of the issue, and recommended solution, should take handful of sentences.
|
|
56
|
+
|
|
51
57
|
Snippets:
|
|
52
58
|
|
|
53
59
|
{}
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import datetime
|
|
3
|
+
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from sqlalchemy import (
|
|
6
|
+
Column,
|
|
7
|
+
Integer,
|
|
8
|
+
Float,
|
|
9
|
+
DateTime,
|
|
10
|
+
String,
|
|
11
|
+
Enum,
|
|
12
|
+
func,
|
|
13
|
+
select,
|
|
14
|
+
distinct,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from logdetective.server.database.base import Base, transaction
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class EndpointType(enum.Enum):
|
|
21
|
+
"""Different analyze endpoints"""
|
|
22
|
+
|
|
23
|
+
ANALYZE = "analyze_log"
|
|
24
|
+
ANALYZE_STAGED = "analyze_log_staged"
|
|
25
|
+
ANALYZE_STREAM = "analyze_log_stream"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AnalyzeRequestMetrics(Base):
|
|
29
|
+
"""Store data related to received requests and given responses"""
|
|
30
|
+
|
|
31
|
+
__tablename__ = "analyze_request_metrics"
|
|
32
|
+
|
|
33
|
+
id = Column(Integer, primary_key=True)
|
|
34
|
+
endpoint = Column(
|
|
35
|
+
Enum(EndpointType),
|
|
36
|
+
nullable=False,
|
|
37
|
+
index=True,
|
|
38
|
+
comment="The service endpoint that was called",
|
|
39
|
+
)
|
|
40
|
+
request_received_at = Column(
|
|
41
|
+
DateTime,
|
|
42
|
+
nullable=False,
|
|
43
|
+
index=True,
|
|
44
|
+
default=datetime.datetime.now(datetime.timezone.utc),
|
|
45
|
+
comment="Timestamp when the request was received",
|
|
46
|
+
)
|
|
47
|
+
log_url = Column(
|
|
48
|
+
String,
|
|
49
|
+
nullable=False,
|
|
50
|
+
index=False,
|
|
51
|
+
comment="Log url for which analysis was requested",
|
|
52
|
+
)
|
|
53
|
+
response_sent_at = Column(
|
|
54
|
+
DateTime, nullable=True, comment="Timestamp when the response was sent back"
|
|
55
|
+
)
|
|
56
|
+
response_length = Column(
|
|
57
|
+
Integer, nullable=True, comment="Length of the response in chars"
|
|
58
|
+
)
|
|
59
|
+
response_certainty = Column(
|
|
60
|
+
Float, nullable=True, comment="Certainty for generated response"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def create(
|
|
65
|
+
cls,
|
|
66
|
+
endpoint: EndpointType,
|
|
67
|
+
log_url: str,
|
|
68
|
+
request_received_at: Optional[datetime.datetime] = None,
|
|
69
|
+
) -> int:
|
|
70
|
+
"""Create AnalyzeRequestMetrics new line
|
|
71
|
+
with data related to a received request"""
|
|
72
|
+
with transaction(commit=True) as session:
|
|
73
|
+
metrics = AnalyzeRequestMetrics()
|
|
74
|
+
metrics.endpoint = endpoint
|
|
75
|
+
metrics.request_received_at = request_received_at or datetime.datetime.now(
|
|
76
|
+
datetime.timezone.utc
|
|
77
|
+
)
|
|
78
|
+
metrics.log_url = log_url
|
|
79
|
+
session.add(metrics)
|
|
80
|
+
session.flush()
|
|
81
|
+
return metrics.id
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def update(
|
|
85
|
+
cls,
|
|
86
|
+
id_: int,
|
|
87
|
+
response_sent_at: datetime,
|
|
88
|
+
response_length: int,
|
|
89
|
+
response_certainty: float,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Update an AnalyzeRequestMetrics line
|
|
92
|
+
with data related to the given response"""
|
|
93
|
+
with transaction(commit=True) as session:
|
|
94
|
+
metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
|
|
95
|
+
metrics.response_sent_at = response_sent_at
|
|
96
|
+
metrics.response_length = response_length
|
|
97
|
+
metrics.response_certainty = response_certainty
|
|
98
|
+
session.add(metrics)
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def get_postgres_time_format(cls, time_format):
|
|
102
|
+
"""Map python time format in the PostgreSQL format."""
|
|
103
|
+
if time_format == "%Y-%m-%d":
|
|
104
|
+
pgsql_time_format = "YYYY-MM-DD"
|
|
105
|
+
else:
|
|
106
|
+
pgsql_time_format = "YYYY-MM-DD HH24"
|
|
107
|
+
return pgsql_time_format
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def get_dictionary_with_datetime_keys(
|
|
111
|
+
cls, time_format: str, values_dict: dict[str, any]
|
|
112
|
+
) -> dict[datetime.datetime, any]:
|
|
113
|
+
"""Convert from a dictionary with str keys to a dictionary with datetime keys"""
|
|
114
|
+
new_dict = {
|
|
115
|
+
datetime.datetime.strptime(r[0], time_format): r[1] for r in values_dict
|
|
116
|
+
}
|
|
117
|
+
return new_dict
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def _get_requests_by_time_for_postgres(
|
|
121
|
+
cls, start_time, end_time, time_format, endpoint
|
|
122
|
+
):
|
|
123
|
+
"""Get total requests number in time period.
|
|
124
|
+
|
|
125
|
+
func.to_char is PostgreSQL specific.
|
|
126
|
+
Let's unit tests replace this function with the SQLite version.
|
|
127
|
+
"""
|
|
128
|
+
pgsql_time_format = cls.get_postgres_time_format(time_format)
|
|
129
|
+
|
|
130
|
+
requests_by_time_format = (
|
|
131
|
+
select(
|
|
132
|
+
cls.id,
|
|
133
|
+
func.to_char(cls.request_received_at, pgsql_time_format).label(
|
|
134
|
+
"time_format"
|
|
135
|
+
),
|
|
136
|
+
)
|
|
137
|
+
.filter(cls.request_received_at.between(start_time, end_time))
|
|
138
|
+
.filter(cls.endpoint == endpoint)
|
|
139
|
+
.cte("requests_by_time_format")
|
|
140
|
+
)
|
|
141
|
+
return requests_by_time_format
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def _get_requests_by_time_for_sqlite(
|
|
145
|
+
cls, start_time, end_time, time_format, endpoint
|
|
146
|
+
):
|
|
147
|
+
"""Get total requests number in time period.
|
|
148
|
+
|
|
149
|
+
func.strftime is SQLite specific.
|
|
150
|
+
Use this function in unit test using flexmock:
|
|
151
|
+
|
|
152
|
+
flexmock(AnalyzeRequestMetrics).should_receive("_get_requests_by_time_for_postgres")
|
|
153
|
+
.replace_with(AnalyzeRequestMetrics._get_requests_by_time_for_sqllite)
|
|
154
|
+
"""
|
|
155
|
+
requests_by_time_format = (
|
|
156
|
+
select(
|
|
157
|
+
cls.id,
|
|
158
|
+
func.strftime(time_format, cls.request_received_at).label(
|
|
159
|
+
"time_format"
|
|
160
|
+
),
|
|
161
|
+
)
|
|
162
|
+
.filter(cls.request_received_at.between(start_time, end_time))
|
|
163
|
+
.filter(cls.endpoint == endpoint)
|
|
164
|
+
.cte("requests_by_time_format")
|
|
165
|
+
)
|
|
166
|
+
return requests_by_time_format
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def get_requests_in_period(
|
|
170
|
+
cls,
|
|
171
|
+
start_time: datetime.datetime,
|
|
172
|
+
end_time: datetime.datetime,
|
|
173
|
+
time_format: str,
|
|
174
|
+
endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
|
|
175
|
+
) -> dict[datetime.datetime, int]:
|
|
176
|
+
"""
|
|
177
|
+
Get a dictionary with request counts grouped by time units within a specified period.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
start_time (datetime): The start of the time period to query
|
|
181
|
+
end_time (datetime): The end of the time period to query
|
|
182
|
+
time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
|
|
183
|
+
endpoint (EndpointType): The analyze API endpoint to query
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
dict[datetime, int]: A dictionary mapping datetime objects to request counts
|
|
187
|
+
"""
|
|
188
|
+
with transaction(commit=False) as session:
|
|
189
|
+
requests_by_time_format = cls._get_requests_by_time_for_postgres(
|
|
190
|
+
start_time, end_time, time_format, endpoint
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
count_requests_by_time_format = select(
|
|
194
|
+
requests_by_time_format.c.time_format,
|
|
195
|
+
func.count(distinct(requests_by_time_format.c.id)), # pylint: disable=not-callable
|
|
196
|
+
).group_by("time_format")
|
|
197
|
+
|
|
198
|
+
counts = session.execute(count_requests_by_time_format)
|
|
199
|
+
results = counts.fetchall()
|
|
200
|
+
|
|
201
|
+
return cls.get_dictionary_with_datetime_keys(time_format, results)
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
def _get_average_responses_times_for_postgres(
|
|
205
|
+
cls, start_time, end_time, time_format, endpoint
|
|
206
|
+
):
|
|
207
|
+
"""Get average responses time.
|
|
208
|
+
|
|
209
|
+
func.to_char is PostgreSQL specific.
|
|
210
|
+
Let's unit tests replace this function with the SQLite version.
|
|
211
|
+
"""
|
|
212
|
+
with transaction(commit=False) as session:
|
|
213
|
+
pgsql_time_format = cls.get_postgres_time_format(time_format)
|
|
214
|
+
|
|
215
|
+
average_responses_times = (
|
|
216
|
+
select(
|
|
217
|
+
func.to_char(cls.request_received_at, pgsql_time_format).label(
|
|
218
|
+
"time_range"
|
|
219
|
+
),
|
|
220
|
+
(
|
|
221
|
+
func.avg(
|
|
222
|
+
func.extract( # pylint: disable=not-callable
|
|
223
|
+
"epoch", cls.response_sent_at - cls.request_received_at
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
).label("average_response_seconds"),
|
|
227
|
+
)
|
|
228
|
+
.filter(cls.request_received_at.between(start_time, end_time))
|
|
229
|
+
.filter(cls.endpoint == endpoint)
|
|
230
|
+
.group_by("time_range")
|
|
231
|
+
.order_by("time_range")
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
results = session.execute(average_responses_times).fetchall()
|
|
235
|
+
return results
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def _get_average_responses_times_for_sqlite(
|
|
239
|
+
cls, start_time, end_time, time_format, endpoint
|
|
240
|
+
):
|
|
241
|
+
"""Get average responses time.
|
|
242
|
+
|
|
243
|
+
func.strftime is SQLite specific.
|
|
244
|
+
Use this function in unit test using flexmock:
|
|
245
|
+
|
|
246
|
+
flexmock(AnalyzeRequestMetrics).should_receive("_get_average_responses_times_for_postgres")
|
|
247
|
+
.replace_with(AnalyzeRequestMetrics._get_average_responses_times_for_sqlite)
|
|
248
|
+
"""
|
|
249
|
+
with transaction(commit=False) as session:
|
|
250
|
+
average_responses_times = (
|
|
251
|
+
select(
|
|
252
|
+
func.strftime(time_format, cls.request_received_at).label(
|
|
253
|
+
"time_range"
|
|
254
|
+
),
|
|
255
|
+
(
|
|
256
|
+
func.avg(
|
|
257
|
+
func.julianday(cls.response_sent_at)
|
|
258
|
+
- func.julianday(cls.request_received_at) # noqa: W503 flake8 vs ruff
|
|
259
|
+
)
|
|
260
|
+
* 86400 # noqa: W503 flake8 vs ruff
|
|
261
|
+
).label("average_response_seconds"),
|
|
262
|
+
)
|
|
263
|
+
.filter(cls.request_received_at.between(start_time, end_time))
|
|
264
|
+
.filter(cls.endpoint == endpoint)
|
|
265
|
+
.group_by("time_range")
|
|
266
|
+
.order_by("time_range")
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
results = session.execute(average_responses_times).fetchall()
|
|
270
|
+
return results
|
|
271
|
+
|
|
272
|
+
@classmethod
|
|
273
|
+
def get_responses_average_time_in_period(
|
|
274
|
+
cls,
|
|
275
|
+
start_time: datetime.datetime,
|
|
276
|
+
end_time: datetime.datetime,
|
|
277
|
+
time_format: str,
|
|
278
|
+
endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
|
|
279
|
+
) -> dict[datetime.datetime, int]:
|
|
280
|
+
"""
|
|
281
|
+
Get a dictionary with average responses times
|
|
282
|
+
grouped by time units within a specified period.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
start_time (datetime): The start of the time period to query
|
|
286
|
+
end_time (datetime): The end of the time period to query
|
|
287
|
+
time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
|
|
288
|
+
endpoint (EndpointType): The analyze API endpoint to query
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
dict[datetime, int]: A dictionary mapping datetime objects
|
|
292
|
+
to average responses times
|
|
293
|
+
"""
|
|
294
|
+
with transaction(commit=False) as _:
|
|
295
|
+
average_responses_times = cls._get_average_responses_times_for_postgres(
|
|
296
|
+
start_time, end_time, time_format, endpoint
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
return cls.get_dictionary_with_datetime_keys(
|
|
300
|
+
time_format, average_responses_times
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
@classmethod
|
|
304
|
+
def _get_average_responses_lengths_for_postgres(
|
|
305
|
+
cls, start_time, end_time, time_format, endpoint
|
|
306
|
+
):
|
|
307
|
+
"""Get average responses length.
|
|
308
|
+
|
|
309
|
+
func.to_char is PostgreSQL specific.
|
|
310
|
+
Let's unit tests replace this function with the SQLite version.
|
|
311
|
+
"""
|
|
312
|
+
with transaction(commit=False) as session:
|
|
313
|
+
pgsql_time_format = cls.get_postgres_time_format(time_format)
|
|
314
|
+
|
|
315
|
+
average_responses_lengths = (
|
|
316
|
+
select(
|
|
317
|
+
func.to_char(cls.request_received_at, pgsql_time_format).label(
|
|
318
|
+
"time_range"
|
|
319
|
+
),
|
|
320
|
+
(func.avg(cls.response_length)).label("average_responses_length"),
|
|
321
|
+
)
|
|
322
|
+
.filter(cls.request_received_at.between(start_time, end_time))
|
|
323
|
+
.filter(cls.endpoint == endpoint)
|
|
324
|
+
.group_by("time_range")
|
|
325
|
+
.order_by("time_range")
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
results = session.execute(average_responses_lengths).fetchall()
|
|
329
|
+
return results
|
|
330
|
+
|
|
331
|
+
@classmethod
|
|
332
|
+
def _get_average_responses_lengths_for_sqlite(
|
|
333
|
+
cls, start_time, end_time, time_format, endpoint
|
|
334
|
+
):
|
|
335
|
+
"""Get average responses length.
|
|
336
|
+
|
|
337
|
+
func.strftime is SQLite specific.
|
|
338
|
+
Use this function in unit test using flexmock:
|
|
339
|
+
|
|
340
|
+
flexmock(AnalyzeRequestMetrics)
|
|
341
|
+
.should_receive("_get_average_responses_lengths_for_postgres")
|
|
342
|
+
.replace_with(AnalyzeRequestMetrics._get_average_responses_lengths_for_sqlite)
|
|
343
|
+
"""
|
|
344
|
+
with transaction(commit=False) as session:
|
|
345
|
+
average_responses_lengths = (
|
|
346
|
+
select(
|
|
347
|
+
func.strftime(time_format, cls.request_received_at).label(
|
|
348
|
+
"time_range"
|
|
349
|
+
),
|
|
350
|
+
(func.avg(cls.response_length)).label("average_responses_length"),
|
|
351
|
+
)
|
|
352
|
+
.filter(cls.request_received_at.between(start_time, end_time))
|
|
353
|
+
.filter(cls.endpoint == endpoint)
|
|
354
|
+
.group_by("time_range")
|
|
355
|
+
.order_by("time_range")
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
results = session.execute(average_responses_lengths).fetchall()
|
|
359
|
+
return results
|
|
360
|
+
|
|
361
|
+
@classmethod
|
|
362
|
+
def get_responses_average_length_in_period(
|
|
363
|
+
cls,
|
|
364
|
+
start_time: datetime.datetime,
|
|
365
|
+
end_time: datetime.datetime,
|
|
366
|
+
time_format: str,
|
|
367
|
+
endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
|
|
368
|
+
) -> dict[datetime.datetime, int]:
|
|
369
|
+
"""
|
|
370
|
+
Get a dictionary with average responses length
|
|
371
|
+
grouped by time units within a specified period.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
start_time (datetime): The start of the time period to query
|
|
375
|
+
end_time (datetime): The end of the time period to query
|
|
376
|
+
time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
|
|
377
|
+
endpoint (EndpointType): The analyze API endpoint to query
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
dict[datetime, int]: A dictionary mapping datetime objects
|
|
381
|
+
to average responses lengths
|
|
382
|
+
"""
|
|
383
|
+
with transaction(commit=False) as _:
|
|
384
|
+
average_responses_lengths = cls._get_average_responses_lengths_for_postgres(
|
|
385
|
+
start_time, end_time, time_format, endpoint
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
return cls.get_dictionary_with_datetime_keys(
|
|
389
|
+
time_format, average_responses_lengths
|
|
390
|
+
)
|
|
@@ -41,12 +41,10 @@ def update_metrics(
|
|
|
41
41
|
sent_at if sent_at else datetime.datetime.now(datetime.timezone.utc)
|
|
42
42
|
)
|
|
43
43
|
response_length = None
|
|
44
|
-
if hasattr(response, "explanation") and
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if "text" in choice
|
|
49
|
-
)
|
|
44
|
+
if hasattr(response, "explanation") and isinstance(
|
|
45
|
+
response.explanation, models.Explanation
|
|
46
|
+
):
|
|
47
|
+
response_length = len(response.explanation.text)
|
|
50
48
|
response_certainty = (
|
|
51
49
|
response.response_certainty if hasattr(response, "response_certainty") else None
|
|
52
50
|
)
|
|
@@ -2,7 +2,9 @@ import datetime
|
|
|
2
2
|
from logging import BASIC_FORMAT
|
|
3
3
|
from typing import List, Dict, Optional, Literal
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel, Field, model_validator, field_validator
|
|
5
|
+
from pydantic import BaseModel, Field, model_validator, field_validator, NonNegativeFloat
|
|
6
|
+
|
|
7
|
+
from logdetective.constants import DEFAULT_TEMPERATURE
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
class BuildLog(BaseModel):
|
|
@@ -95,6 +97,8 @@ class InferenceConfig(BaseModel):
|
|
|
95
97
|
)
|
|
96
98
|
url: str = ""
|
|
97
99
|
api_token: str = ""
|
|
100
|
+
model: str = ""
|
|
101
|
+
temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
|
|
98
102
|
|
|
99
103
|
def __init__(self, data: Optional[dict] = None):
|
|
100
104
|
super().__init__()
|
|
@@ -106,6 +110,8 @@ class InferenceConfig(BaseModel):
|
|
|
106
110
|
self.api_endpoint = data.get("api_endpoint", "/chat/completions")
|
|
107
111
|
self.url = data.get("url", "")
|
|
108
112
|
self.api_token = data.get("api_token", "")
|
|
113
|
+
self.model = data.get("model", "default-model")
|
|
114
|
+
self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
|
|
109
115
|
|
|
110
116
|
|
|
111
117
|
class ExtractorConfig(BaseModel):
|
|
@@ -150,7 +156,8 @@ class LogConfig(BaseModel):
|
|
|
150
156
|
"""Logging configuration"""
|
|
151
157
|
|
|
152
158
|
name: str = "logdetective"
|
|
153
|
-
|
|
159
|
+
level_stream: str | int = "INFO"
|
|
160
|
+
level_file: str | int = "INFO"
|
|
154
161
|
path: str | None = None
|
|
155
162
|
format: str = BASIC_FORMAT
|
|
156
163
|
|
|
@@ -160,7 +167,8 @@ class LogConfig(BaseModel):
|
|
|
160
167
|
return
|
|
161
168
|
|
|
162
169
|
self.name = data.get("name", "logdetective")
|
|
163
|
-
self.
|
|
170
|
+
self.level_stream = data.get("level_stream", "INFO").upper()
|
|
171
|
+
self.level_file = data.get("level_file", "INFO").upper()
|
|
164
172
|
self.path = data.get("path")
|
|
165
173
|
self.format = data.get("format", BASIC_FORMAT)
|
|
166
174
|
|