logdetective 0.5.6__tar.gz → 0.5.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {logdetective-0.5.6 → logdetective-0.5.8}/PKG-INFO +37 -6
- {logdetective-0.5.6 → logdetective-0.5.8}/README.md +28 -0
- logdetective-0.5.8/logdetective/server/database/models.py +186 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/server/models.py +76 -1
- logdetective-0.5.8/logdetective/server/plot.py +206 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/server/server.py +39 -1
- {logdetective-0.5.6 → logdetective-0.5.8}/pyproject.toml +5 -2
- logdetective-0.5.6/logdetective/server/database/models.py +0 -88
- {logdetective-0.5.6 → logdetective-0.5.8}/LICENSE +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/__init__.py +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/constants.py +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/drain3.ini +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/extractors.py +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/logdetective.py +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/server/__init__.py +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/server/database/__init__.py +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/server/database/base.py +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/server/metric.py +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/server/templates/gitlab_comment.md.j2 +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/server/utils.py +0 -0
- {logdetective-0.5.6 → logdetective-0.5.8}/logdetective/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: logdetective
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.8
|
|
4
4
|
Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Jiri Podivin
|
|
@@ -19,18 +19,21 @@ Classifier: Topic :: Internet :: Log Analysis
|
|
|
19
19
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
20
|
Classifier: Topic :: Software Development :: Debuggers
|
|
21
21
|
Provides-Extra: server
|
|
22
|
-
|
|
22
|
+
Provides-Extra: server-testing
|
|
23
|
+
Requires-Dist: alembic (>=1.13.3,<2.0.0) ; extra == "server" or extra == "server-testing"
|
|
23
24
|
Requires-Dist: drain3 (>=0.9.11,<0.10.0)
|
|
24
|
-
Requires-Dist: fastapi (>=0.111.1) ; extra == "server"
|
|
25
|
+
Requires-Dist: fastapi (>=0.111.1) ; extra == "server" or extra == "server-testing"
|
|
25
26
|
Requires-Dist: huggingface-hub (>0.23.2)
|
|
26
27
|
Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
|
|
28
|
+
Requires-Dist: matplotlib (>=3.8.4,<4.0.0) ; extra == "server" or extra == "server-testing"
|
|
27
29
|
Requires-Dist: numpy (>=1.26.0)
|
|
28
30
|
Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "server"
|
|
29
|
-
Requires-Dist:
|
|
31
|
+
Requires-Dist: psycopg2-binary (>=2.9.9,<3.0.0) ; extra == "server-testing"
|
|
32
|
+
Requires-Dist: pydantic (>=2.8.2,<3.0.0) ; extra == "server" or extra == "server-testing"
|
|
30
33
|
Requires-Dist: python-gitlab (>=4.4.0)
|
|
31
|
-
Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server"
|
|
34
|
+
Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server" or extra == "server-testing"
|
|
32
35
|
Requires-Dist: requests (>0.2.31)
|
|
33
|
-
Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server"
|
|
36
|
+
Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server" or extra == "server-testing"
|
|
34
37
|
Project-URL: homepage, https://github.com/fedora-copr/logdetective
|
|
35
38
|
Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
|
|
36
39
|
Description-Content-Type: text/markdown
|
|
@@ -331,6 +334,34 @@ HTTPS certificate generated through:
|
|
|
331
334
|
certbot certonly --standalone -d logdetective01.fedorainfracloud.org
|
|
332
335
|
```
|
|
333
336
|
|
|
337
|
+
Querying statistics
|
|
338
|
+
-------------------
|
|
339
|
+
|
|
340
|
+
You can retrieve statistics about server requests over a specified time period
|
|
341
|
+
using either the `curl` command or the `http` command (provided by the `httpie` package).
|
|
342
|
+
|
|
343
|
+
When no time period is specified, the query defaults to the last 2 days:
|
|
344
|
+
|
|
345
|
+
```
|
|
346
|
+
http GET "localhost:8080/metrics/analyze/requests" > /tmp/plot.svg
|
|
347
|
+
curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
You can specify the time period in hours, days, or weeks.
|
|
351
|
+
The time period:
|
|
352
|
+
|
|
353
|
+
- cannot be less than one hour
|
|
354
|
+
- cannot be negative
|
|
355
|
+
- ends at the current time (when the query is made)
|
|
356
|
+
- starts at the specified time interval before the current time.
|
|
357
|
+
|
|
358
|
+
Examples:
|
|
359
|
+
|
|
360
|
+
```
|
|
361
|
+
http GET "localhost:8080/metrics/analyze/requests?hours=5" > /tmp/plot_hours.svg
|
|
362
|
+
http GET "localhost:8080/metrics/analyze/requests?days=5" > /tmp/plot_days.svg
|
|
363
|
+
http GET "localhost:8080/metrics/analyze/requests?weeks=5" > /tmp/plot_weeks.svg
|
|
364
|
+
```
|
|
334
365
|
|
|
335
366
|
License
|
|
336
367
|
-------
|
|
@@ -294,6 +294,34 @@ HTTPS certificate generated through:
|
|
|
294
294
|
certbot certonly --standalone -d logdetective01.fedorainfracloud.org
|
|
295
295
|
```
|
|
296
296
|
|
|
297
|
+
Querying statistics
|
|
298
|
+
-------------------
|
|
299
|
+
|
|
300
|
+
You can retrieve statistics about server requests over a specified time period
|
|
301
|
+
using either the `curl` command or the `http` command (provided by the `httpie` package).
|
|
302
|
+
|
|
303
|
+
When no time period is specified, the query defaults to the last 2 days:
|
|
304
|
+
|
|
305
|
+
```
|
|
306
|
+
http GET "localhost:8080/metrics/analyze/requests" > /tmp/plot.svg
|
|
307
|
+
curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
You can specify the time period in hours, days, or weeks.
|
|
311
|
+
The time period:
|
|
312
|
+
|
|
313
|
+
- cannot be less than one hour
|
|
314
|
+
- cannot be negative
|
|
315
|
+
- ends at the current time (when the query is made)
|
|
316
|
+
- starts at the specified time interval before the current time.
|
|
317
|
+
|
|
318
|
+
Examples:
|
|
319
|
+
|
|
320
|
+
```
|
|
321
|
+
http GET "localhost:8080/metrics/analyze/requests?hours=5" > /tmp/plot_hours.svg
|
|
322
|
+
http GET "localhost:8080/metrics/analyze/requests?days=5" > /tmp/plot_days.svg
|
|
323
|
+
http GET "localhost:8080/metrics/analyze/requests?weeks=5" > /tmp/plot_weeks.svg
|
|
324
|
+
```
|
|
297
325
|
|
|
298
326
|
License
|
|
299
327
|
-------
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import datetime
|
|
3
|
+
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from sqlalchemy import (
|
|
6
|
+
Column,
|
|
7
|
+
Integer,
|
|
8
|
+
Float,
|
|
9
|
+
DateTime,
|
|
10
|
+
String,
|
|
11
|
+
Enum,
|
|
12
|
+
func,
|
|
13
|
+
select,
|
|
14
|
+
distinct,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from logdetective.server.database.base import Base, transaction
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class EndpointType(enum.Enum):
|
|
21
|
+
"""Different analyze endpoints"""
|
|
22
|
+
|
|
23
|
+
ANALYZE = "analyze_log"
|
|
24
|
+
ANALYZE_STAGED = "analyze_log_staged"
|
|
25
|
+
ANALYZE_STREAM = "analyze_log_stream"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AnalyzeRequestMetrics(Base):
|
|
29
|
+
"""Store data related to received requests and given responses"""
|
|
30
|
+
|
|
31
|
+
__tablename__ = "analyze_request_metrics"
|
|
32
|
+
|
|
33
|
+
id = Column(Integer, primary_key=True)
|
|
34
|
+
endpoint = Column(
|
|
35
|
+
Enum(EndpointType),
|
|
36
|
+
nullable=False,
|
|
37
|
+
index=True,
|
|
38
|
+
comment="The service endpoint that was called",
|
|
39
|
+
)
|
|
40
|
+
request_received_at = Column(
|
|
41
|
+
DateTime,
|
|
42
|
+
nullable=False,
|
|
43
|
+
index=True,
|
|
44
|
+
default=datetime.datetime.now(datetime.timezone.utc),
|
|
45
|
+
comment="Timestamp when the request was received",
|
|
46
|
+
)
|
|
47
|
+
log_url = Column(
|
|
48
|
+
String,
|
|
49
|
+
nullable=False,
|
|
50
|
+
index=False,
|
|
51
|
+
comment="Log url for which analysis was requested",
|
|
52
|
+
)
|
|
53
|
+
response_sent_at = Column(
|
|
54
|
+
DateTime, nullable=True, comment="Timestamp when the response was sent back"
|
|
55
|
+
)
|
|
56
|
+
response_length = Column(
|
|
57
|
+
Integer, nullable=True, comment="Length of the response in chars"
|
|
58
|
+
)
|
|
59
|
+
response_certainty = Column(
|
|
60
|
+
Float, nullable=True, comment="Certainty for generated response"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def create(
|
|
65
|
+
cls,
|
|
66
|
+
endpoint: EndpointType,
|
|
67
|
+
log_url: str,
|
|
68
|
+
request_received_at: Optional[datetime.datetime] = None,
|
|
69
|
+
) -> int:
|
|
70
|
+
"""Create AnalyzeRequestMetrics new line
|
|
71
|
+
with data related to a received request"""
|
|
72
|
+
with transaction(commit=True) as session:
|
|
73
|
+
metrics = AnalyzeRequestMetrics()
|
|
74
|
+
metrics.endpoint = endpoint
|
|
75
|
+
metrics.request_received_at = request_received_at or datetime.datetime.now(
|
|
76
|
+
datetime.timezone.utc
|
|
77
|
+
)
|
|
78
|
+
metrics.log_url = log_url
|
|
79
|
+
session.add(metrics)
|
|
80
|
+
session.flush()
|
|
81
|
+
return metrics.id
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def update(
|
|
85
|
+
cls,
|
|
86
|
+
id_: int,
|
|
87
|
+
response_sent_at: datetime,
|
|
88
|
+
response_length: int,
|
|
89
|
+
response_certainty: float,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Update an AnalyzeRequestMetrics line
|
|
92
|
+
with data related to the given response"""
|
|
93
|
+
with transaction(commit=True) as session:
|
|
94
|
+
metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
|
|
95
|
+
metrics.response_sent_at = response_sent_at
|
|
96
|
+
metrics.response_length = response_length
|
|
97
|
+
metrics.response_certainty = response_certainty
|
|
98
|
+
session.add(metrics)
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def _get_requests_by_time_for_postgres(
|
|
102
|
+
cls, start_time, end_time, time_format, endpoint
|
|
103
|
+
):
|
|
104
|
+
"""func.to_char is PostgreSQL specific.
|
|
105
|
+
Let's unit tests replace this function with the SQLite version.
|
|
106
|
+
"""
|
|
107
|
+
if time_format == "%Y-%m-%d":
|
|
108
|
+
pgsql_time_format = "YYYY-MM-DD"
|
|
109
|
+
else:
|
|
110
|
+
pgsql_time_format = "YYYY-MM-DD HH24"
|
|
111
|
+
|
|
112
|
+
requests_by_time_format = (
|
|
113
|
+
select(
|
|
114
|
+
cls.id,
|
|
115
|
+
func.to_char(cls.request_received_at, pgsql_time_format).label(
|
|
116
|
+
"time_format"
|
|
117
|
+
),
|
|
118
|
+
)
|
|
119
|
+
.filter(cls.request_received_at.between(start_time, end_time))
|
|
120
|
+
.filter(cls.endpoint == endpoint)
|
|
121
|
+
.cte("requests_by_time_format")
|
|
122
|
+
)
|
|
123
|
+
return requests_by_time_format
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def _get_requests_by_time_for_sqllite(
|
|
127
|
+
cls, start_time, end_time, time_format, endpoint
|
|
128
|
+
):
|
|
129
|
+
"""func.strftime is SQLite specific.
|
|
130
|
+
Use this function in unit test using flexmock:
|
|
131
|
+
|
|
132
|
+
flexmock(AnalyzeRequestMetrics).should_receive("_get_requests_by_time_for_postgres")
|
|
133
|
+
.replace_with(AnalyzeRequestMetrics._get_requests_by_time_for_sqllite)
|
|
134
|
+
"""
|
|
135
|
+
requests_by_time_format = (
|
|
136
|
+
select(
|
|
137
|
+
cls.id,
|
|
138
|
+
func.strftime(time_format, cls.request_received_at).label(
|
|
139
|
+
"time_format"
|
|
140
|
+
),
|
|
141
|
+
)
|
|
142
|
+
.filter(cls.request_received_at.between(start_time, end_time))
|
|
143
|
+
.filter(cls.endpoint == endpoint)
|
|
144
|
+
.cte("requests_by_time_format")
|
|
145
|
+
)
|
|
146
|
+
return requests_by_time_format
|
|
147
|
+
|
|
148
|
+
@classmethod
|
|
149
|
+
def get_requests_in_period(
|
|
150
|
+
cls,
|
|
151
|
+
start_time: datetime.datetime,
|
|
152
|
+
end_time: datetime.datetime,
|
|
153
|
+
time_format: str,
|
|
154
|
+
endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
|
|
155
|
+
) -> dict[datetime.datetime, int]:
|
|
156
|
+
"""
|
|
157
|
+
Get a dictionary with request counts grouped by time units within a specified period.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
start_time (datetime): The start of the time period to query
|
|
161
|
+
end_time (datetime): The end of the time period to query
|
|
162
|
+
time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
|
|
163
|
+
endpoint (EndpointType): The analyze API endpoint to query
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
dict[datetime, int]: A dictionary mapping datetime objects to request counts
|
|
167
|
+
"""
|
|
168
|
+
with transaction(commit=False) as session:
|
|
169
|
+
requests_by_time_format = cls._get_requests_by_time_for_postgres(
|
|
170
|
+
start_time, end_time, time_format, endpoint
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
count_requests_by_time_format = select(
|
|
174
|
+
requests_by_time_format.c.time_format,
|
|
175
|
+
func.count(distinct(requests_by_time_format.c.id)), # pylint: disable=not-callable
|
|
176
|
+
).group_by("time_format")
|
|
177
|
+
|
|
178
|
+
counts = session.execute(count_requests_by_time_format)
|
|
179
|
+
results = counts.fetchall()
|
|
180
|
+
|
|
181
|
+
# Convert results to a dictionary with proper datetime keys
|
|
182
|
+
counts_dict = {
|
|
183
|
+
datetime.datetime.strptime(r[0], time_format): r[1] for r in results
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return counts_dict
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
from logging import BASIC_FORMAT
|
|
2
3
|
from typing import List, Dict, Optional, Literal
|
|
3
|
-
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field, model_validator, field_validator
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
class BuildLog(BaseModel):
|
|
@@ -54,6 +56,7 @@ class AnalyzedSnippet(BaseModel):
|
|
|
54
56
|
text: original snippet text
|
|
55
57
|
line_number: location of snippet in original log
|
|
56
58
|
"""
|
|
59
|
+
|
|
57
60
|
explanation: Explanation
|
|
58
61
|
text: str
|
|
59
62
|
line_number: int
|
|
@@ -195,3 +198,75 @@ class Config(BaseModel):
|
|
|
195
198
|
self.extractor = ExtractorConfig(data.get("extractor"))
|
|
196
199
|
self.gitlab = GitLabConfig(data.get("gitlab"))
|
|
197
200
|
self.general = GeneralConfig(data.get("general"))
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class TimePeriod(BaseModel):
|
|
204
|
+
"""Specification for a period of time.
|
|
205
|
+
|
|
206
|
+
If no indication is given
|
|
207
|
+
it falls back to a 2 days period of time.
|
|
208
|
+
|
|
209
|
+
Can't be smaller than a hour"""
|
|
210
|
+
|
|
211
|
+
weeks: Optional[int] = None
|
|
212
|
+
days: Optional[int] = None
|
|
213
|
+
hours: Optional[int] = None
|
|
214
|
+
|
|
215
|
+
@model_validator(mode="before")
|
|
216
|
+
@classmethod
|
|
217
|
+
def check_exclusive_fields(cls, data):
|
|
218
|
+
""" Check that only one key between weeks, days and hours is defined"""
|
|
219
|
+
if isinstance(data, dict):
|
|
220
|
+
how_many_fields = sum(
|
|
221
|
+
1
|
|
222
|
+
for field in ["weeks", "days", "hours"]
|
|
223
|
+
if field in data and data[field] is not None
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
if how_many_fields == 0:
|
|
227
|
+
data["days"] = 2 # by default fallback to a 2 days period
|
|
228
|
+
|
|
229
|
+
if how_many_fields > 1:
|
|
230
|
+
raise ValueError("Only one of months, weeks, days, or hours can be set")
|
|
231
|
+
|
|
232
|
+
return data
|
|
233
|
+
|
|
234
|
+
@field_validator("weeks", "days", "hours")
|
|
235
|
+
@classmethod
|
|
236
|
+
def check_positive(cls, v):
|
|
237
|
+
"""Check that the given value is positive"""
|
|
238
|
+
if v is not None and v <= 0:
|
|
239
|
+
raise ValueError("Time period must be positive")
|
|
240
|
+
return v
|
|
241
|
+
|
|
242
|
+
def get_time_period(self) -> datetime.timedelta:
|
|
243
|
+
"""Get the period of time represented by this input model.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
datetime.timedelta: The time period as a timedelta object.
|
|
247
|
+
"""
|
|
248
|
+
delta = None
|
|
249
|
+
if self.weeks:
|
|
250
|
+
delta = datetime.timedelta(weeks=self.weeks)
|
|
251
|
+
elif self.days:
|
|
252
|
+
delta = datetime.timedelta(days=self.days)
|
|
253
|
+
elif self.hours:
|
|
254
|
+
delta = datetime.timedelta(hours=self.hours)
|
|
255
|
+
return delta
|
|
256
|
+
|
|
257
|
+
def get_period_start_time(
|
|
258
|
+
self, end_time: datetime.datetime = None
|
|
259
|
+
) -> datetime.datetime:
|
|
260
|
+
"""Calculate the start time of this period based on the end time.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
end_time (datetime.datetime, optional): The end time of the period.
|
|
264
|
+
Defaults to current UTC time if not provided.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
datetime.datetime: The start time of the period.
|
|
268
|
+
"""
|
|
269
|
+
time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
270
|
+
if end_time.tzinfo is None:
|
|
271
|
+
end_time = end_time.replace(tzinfo=datetime.timezone.utc)
|
|
272
|
+
return time - self.get_time_period()
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import numpy
|
|
5
|
+
import matplotlib
|
|
6
|
+
import matplotlib.figure
|
|
7
|
+
import matplotlib.pyplot
|
|
8
|
+
|
|
9
|
+
from logdetective.server import models
|
|
10
|
+
from logdetective.server.database.models import AnalyzeRequestMetrics, EndpointType
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Definition:
|
|
14
|
+
"""Define plot details, given a time period."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, time_period: models.TimePeriod):
|
|
17
|
+
self.time_period = time_period
|
|
18
|
+
self.days_diff = time_period.get_time_period().days
|
|
19
|
+
if self.time_period.hours:
|
|
20
|
+
self._freq = "H"
|
|
21
|
+
self._time_format = "%Y-%m-%d %H"
|
|
22
|
+
self._locator = matplotlib.dates.HourLocator(interval=2)
|
|
23
|
+
self._time_unit = "hour"
|
|
24
|
+
self._time_delta = datetime.timedelta(hours=1)
|
|
25
|
+
elif self.time_period.days:
|
|
26
|
+
self._freq = "D"
|
|
27
|
+
self._time_format = "%Y-%m-%d"
|
|
28
|
+
self._locator = matplotlib.dates.DayLocator(interval=1)
|
|
29
|
+
self._time_unit = "day"
|
|
30
|
+
self._time_delta = datetime.timedelta(days=1)
|
|
31
|
+
elif self.time_period.weeks:
|
|
32
|
+
self._freq = "W"
|
|
33
|
+
self._time_format = "%Y-%m-%d"
|
|
34
|
+
self._locator = matplotlib.dates.WeekdayLocator(interval=1)
|
|
35
|
+
self._time_unit = "week"
|
|
36
|
+
self._time_delta = datetime.timedelta(weeks=1)
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def freq(self):
|
|
40
|
+
# pylint: disable=missing-function-docstring
|
|
41
|
+
return self._freq
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def time_format(self):
|
|
45
|
+
# pylint: disable=missing-function-docstring
|
|
46
|
+
return self._time_format
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def locator(self):
|
|
50
|
+
# pylint: disable=missing-function-docstring
|
|
51
|
+
return self._locator
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def time_unit(self):
|
|
55
|
+
# pylint: disable=missing-function-docstring
|
|
56
|
+
return self._time_unit
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def time_delta(self):
|
|
60
|
+
# pylint: disable=missing-function-docstring
|
|
61
|
+
return self._time_delta
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def create_time_series_arrays(
|
|
65
|
+
counts_dict: dict[datetime.datetime, int],
|
|
66
|
+
start_time: datetime.datetime,
|
|
67
|
+
end_time: datetime.datetime,
|
|
68
|
+
time_delta: datetime.timedelta,
|
|
69
|
+
time_format: str,
|
|
70
|
+
) -> tuple[numpy.ndarray, numpy.ndarray]:
|
|
71
|
+
"""Create time series arrays from a dictionary of counts.
|
|
72
|
+
|
|
73
|
+
This function generates two aligned numpy arrays:
|
|
74
|
+
1. An array of timestamps from start_time to end_time
|
|
75
|
+
2. A corresponding array of counts for each timestamp
|
|
76
|
+
|
|
77
|
+
The timestamps are truncated to the precision specified by time_format.
|
|
78
|
+
If a timestamp in counts_dict matches a generated timestamp, its count is used;
|
|
79
|
+
otherwise, the count defaults to zero.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
counts_dict: Dictionary mapping timestamps to their respective counts
|
|
83
|
+
start_time: The starting timestamp of the time series
|
|
84
|
+
end_time: The ending timestamp of the time series
|
|
85
|
+
time_delta: The time interval between consecutive timestamps
|
|
86
|
+
time_format: String format for datetime truncation (e.g., '%Y-%m-%d %H:%M')
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
A tuple containing:
|
|
90
|
+
- numpy.ndarray: Array of timestamps
|
|
91
|
+
- numpy.ndarray: Array of corresponding counts
|
|
92
|
+
"""
|
|
93
|
+
num_intervals = int((end_time - start_time) / time_delta) + 1
|
|
94
|
+
|
|
95
|
+
timestamps = numpy.array(
|
|
96
|
+
[
|
|
97
|
+
datetime.datetime.strptime(
|
|
98
|
+
(start_time + i * time_delta).strftime(format=time_format), time_format
|
|
99
|
+
)
|
|
100
|
+
for i in range(num_intervals)
|
|
101
|
+
]
|
|
102
|
+
)
|
|
103
|
+
counts = numpy.zeros(num_intervals, dtype=int)
|
|
104
|
+
|
|
105
|
+
timestamp_to_index = {timestamp: i for i, timestamp in enumerate(timestamps)}
|
|
106
|
+
|
|
107
|
+
for timestamp, count in counts_dict.items():
|
|
108
|
+
if timestamp in timestamp_to_index:
|
|
109
|
+
counts[timestamp_to_index[timestamp]] = count
|
|
110
|
+
|
|
111
|
+
return timestamps, counts
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _add_bar_chart_for_requests_count(
|
|
115
|
+
ax1: matplotlib.figure.Axes,
|
|
116
|
+
plot_def: Definition,
|
|
117
|
+
timestamps: numpy.array,
|
|
118
|
+
counts: numpy.array,
|
|
119
|
+
) -> None:
|
|
120
|
+
"""Add a bar chart for requests count (axes 1)"""
|
|
121
|
+
bar_width = (
|
|
122
|
+
0.8 * plot_def.time_delta.total_seconds() / 86400
|
|
123
|
+
) # Convert to days for matplotlib
|
|
124
|
+
ax1.bar(
|
|
125
|
+
timestamps,
|
|
126
|
+
counts,
|
|
127
|
+
width=bar_width,
|
|
128
|
+
alpha=0.7,
|
|
129
|
+
color="skyblue",
|
|
130
|
+
label="Requests",
|
|
131
|
+
)
|
|
132
|
+
ax1.set_xlabel("Time")
|
|
133
|
+
ax1.set_ylabel("Requests", color="blue")
|
|
134
|
+
ax1.tick_params(axis="y", labelcolor="blue")
|
|
135
|
+
|
|
136
|
+
ax1.xaxis.set_major_formatter(matplotlib.dates.DateFormatter(plot_def.time_format))
|
|
137
|
+
ax1.xaxis.set_major_locator(plot_def.locator)
|
|
138
|
+
|
|
139
|
+
matplotlib.pyplot.xticks(rotation=45)
|
|
140
|
+
|
|
141
|
+
ax1.grid(True, alpha=0.3)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _add_cumulative_line_for_requests_count(
|
|
145
|
+
ax2: matplotlib.figure.Axes, timestamps: numpy.array, counts: numpy.array
|
|
146
|
+
) -> None:
|
|
147
|
+
"""Add cumulative line on secondary y-axis"""
|
|
148
|
+
cumulative = numpy.cumsum(counts)
|
|
149
|
+
ax2.plot(timestamps, cumulative, "r-", linewidth=2, label="Cumulative")
|
|
150
|
+
ax2.set_ylabel("Cumulative Requests", color="red")
|
|
151
|
+
ax2.tick_params(axis="y", labelcolor="red")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def requests_per_time(
|
|
155
|
+
period_of_time: models.TimePeriod,
|
|
156
|
+
endpoint: EndpointType = EndpointType.ANALYZE,
|
|
157
|
+
end_time: Optional[datetime.datetime] = None,
|
|
158
|
+
) -> matplotlib.figure.Figure:
|
|
159
|
+
"""
|
|
160
|
+
Generate a visualization of request counts over a specified time period.
|
|
161
|
+
|
|
162
|
+
This function creates a dual-axis plot showing:
|
|
163
|
+
1. A bar chart of request counts per time interval
|
|
164
|
+
2. A line chart showing the cumulative request count
|
|
165
|
+
|
|
166
|
+
The time intervals are determined by the provided TimePeriod object, which defines
|
|
167
|
+
the granularity and formatting of the time axis.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
period_of_time: A TimePeriod object that defines the time period and interval
|
|
171
|
+
for the analysis (e.g., hourly, daily, weekly)
|
|
172
|
+
endpoint: One of the API endpoints
|
|
173
|
+
end_time: The end time for the analysis period. If None, defaults to the current
|
|
174
|
+
UTC time
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
A matplotlib Figure object containing the generated visualization
|
|
178
|
+
"""
|
|
179
|
+
end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
|
|
180
|
+
start_time = period_of_time.get_period_start_time(end_time)
|
|
181
|
+
plot_def = Definition(period_of_time)
|
|
182
|
+
requests_counts = AnalyzeRequestMetrics.get_requests_in_period(
|
|
183
|
+
start_time, end_time, plot_def.time_format, endpoint
|
|
184
|
+
)
|
|
185
|
+
timestamps, counts = create_time_series_arrays(
|
|
186
|
+
requests_counts, start_time, end_time, plot_def.time_delta, plot_def.time_format
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
fig, ax1 = matplotlib.pyplot.subplots(figsize=(12, 6))
|
|
190
|
+
_add_bar_chart_for_requests_count(ax1, plot_def, timestamps, counts)
|
|
191
|
+
|
|
192
|
+
ax2 = ax1.twinx()
|
|
193
|
+
_add_cumulative_line_for_requests_count(ax2, timestamps, counts)
|
|
194
|
+
|
|
195
|
+
matplotlib.pyplot.title(
|
|
196
|
+
f"Requests received for API {endpoint} ({start_time.strftime(plot_def.time_format)} "
|
|
197
|
+
f"to {end_time.strftime(plot_def.time_format)})"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
lines1, labels1 = ax1.get_legend_handles_labels()
|
|
201
|
+
lines2, labels2 = ax2.get_legend_handles_labels()
|
|
202
|
+
ax1.legend(lines1 + lines2, labels1 + labels2, loc="center")
|
|
203
|
+
|
|
204
|
+
matplotlib.pyplot.tight_layout()
|
|
205
|
+
|
|
206
|
+
return fig
|
|
@@ -6,8 +6,11 @@ import zipfile
|
|
|
6
6
|
from pathlib import Path, PurePath
|
|
7
7
|
from tempfile import TemporaryFile
|
|
8
8
|
from typing import List, Annotated, Tuple, Dict, Any
|
|
9
|
+
from io import BytesIO
|
|
9
10
|
|
|
10
11
|
|
|
12
|
+
import matplotlib
|
|
13
|
+
import matplotlib.pyplot
|
|
11
14
|
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header
|
|
12
15
|
|
|
13
16
|
from fastapi.responses import StreamingResponse
|
|
@@ -39,7 +42,10 @@ from logdetective.server.models import (
|
|
|
39
42
|
StagedResponse,
|
|
40
43
|
Explanation,
|
|
41
44
|
AnalyzedSnippet,
|
|
45
|
+
TimePeriod,
|
|
42
46
|
)
|
|
47
|
+
from logdetective.server import plot
|
|
48
|
+
from logdetective.server.database.models import EndpointType
|
|
43
49
|
|
|
44
50
|
LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
|
|
45
51
|
LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
|
|
@@ -494,7 +500,7 @@ async def retrieve_and_preprocess_koji_logs(job: gitlab.v4.objects.ProjectJob):
|
|
|
494
500
|
tempfile.seek(0)
|
|
495
501
|
|
|
496
502
|
failed_arches = {}
|
|
497
|
-
artifacts_zip = zipfile.ZipFile(tempfile, mode="r")
|
|
503
|
+
artifacts_zip = zipfile.ZipFile(tempfile, mode="r") # pylint: disable=consider-using-with
|
|
498
504
|
for zipinfo in artifacts_zip.infolist():
|
|
499
505
|
if zipinfo.filename.endswith("task_failed.log"):
|
|
500
506
|
# The koji logs store this file in two places: 1) in the
|
|
@@ -653,3 +659,35 @@ async def generate_mr_comment(
|
|
|
653
659
|
)
|
|
654
660
|
|
|
655
661
|
return content
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def _svg_figure_response(fig: matplotlib.figure.Figure):
|
|
665
|
+
"""Create a response with the given svg figure."""
|
|
666
|
+
buf = BytesIO()
|
|
667
|
+
fig.savefig(buf, format="svg", bbox_inches="tight")
|
|
668
|
+
matplotlib.pyplot.close(fig)
|
|
669
|
+
|
|
670
|
+
buf.seek(0)
|
|
671
|
+
return StreamingResponse(
|
|
672
|
+
buf,
|
|
673
|
+
media_type="image/svg+xml",
|
|
674
|
+
headers={"Content-Disposition": "inline; filename=plot.svg"},
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
@app.get("/metrics/analyze/requests", response_class=StreamingResponse)
|
|
679
|
+
async def show_analyze_requests(period_since_now: TimePeriod = Depends(TimePeriod)):
|
|
680
|
+
"""Show statistics for the requests received in the given period of time
|
|
681
|
+
for the /analyze API endpoint."""
|
|
682
|
+
fig = plot.requests_per_time(period_since_now, EndpointType.ANALYZE)
|
|
683
|
+
return _svg_figure_response(fig)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
@app.get("/metrics/analyze/staged/requests", response_class=StreamingResponse)
|
|
687
|
+
async def show_analyze_staged_requests(
|
|
688
|
+
period_since_now: TimePeriod = Depends(TimePeriod),
|
|
689
|
+
):
|
|
690
|
+
"""Show statistics for the requests received in the given period of time
|
|
691
|
+
for the /analyze/staged API endpoint."""
|
|
692
|
+
fig = plot.requests_per_time(period_since_now, EndpointType.ANALYZE_STAGED)
|
|
693
|
+
return _svg_figure_response(fig)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "logdetective"
|
|
3
|
-
version = "0.5.
|
|
3
|
+
version = "0.5.8"
|
|
4
4
|
description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
|
|
5
5
|
authors = ["Jiri Podivin <jpodivin@gmail.com>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -43,11 +43,14 @@ pydantic = {version = "^2.8.2", optional = true }
|
|
|
43
43
|
fastapi = {version = ">=0.111.1", optional = true }
|
|
44
44
|
pyyaml = {version = "^6.0.1", optional = true }
|
|
45
45
|
sqlalchemy = {version = "^2.0.36", optional = true }
|
|
46
|
+
psycopg2-binary = {version = "^2.9.9", optional = true }
|
|
46
47
|
psycopg2 = {version = "^2.9.9", optional = true }
|
|
47
48
|
alembic = {version = "^1.13.3", optional = true }
|
|
49
|
+
matplotlib = {version = "^3.8.4", optional = true }
|
|
48
50
|
|
|
49
51
|
[tool.poetry.extras]
|
|
50
|
-
server = ["pydantic", "fastapi", "pyyaml", "sqlalchemy", "psycopg2", "alembic"]
|
|
52
|
+
server = ["pydantic", "fastapi", "pyyaml", "sqlalchemy", "psycopg2", "alembic", "matplotlib"]
|
|
53
|
+
server-testing = ["pydantic", "fastapi", "pyyaml", "sqlalchemy", "psycopg2-binary", "alembic", "matplotlib"]
|
|
51
54
|
|
|
52
55
|
[build-system]
|
|
53
56
|
requires = ["poetry-core"]
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
import enum
|
|
2
|
-
import datetime
|
|
3
|
-
|
|
4
|
-
from typing import Optional
|
|
5
|
-
from sqlalchemy import Column, Integer, Float, DateTime, String, Enum
|
|
6
|
-
|
|
7
|
-
from logdetective.server.database.base import Base, transaction
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class EndpointType(enum.Enum):
|
|
11
|
-
"""Different analyze endpoints"""
|
|
12
|
-
|
|
13
|
-
ANALYZE = "analyze_log"
|
|
14
|
-
ANALYZE_STAGED = "analyze_log_staged"
|
|
15
|
-
ANALYZE_STREAM = "analyze_log_stream"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class AnalyzeRequestMetrics(Base):
|
|
19
|
-
"""Store data related to received requests and given responses"""
|
|
20
|
-
|
|
21
|
-
__tablename__ = "analyze_request_metrics"
|
|
22
|
-
|
|
23
|
-
id = Column(Integer, primary_key=True)
|
|
24
|
-
endpoint = Column(
|
|
25
|
-
Enum(EndpointType),
|
|
26
|
-
nullable=False,
|
|
27
|
-
index=True,
|
|
28
|
-
comment="The service endpoint that was called",
|
|
29
|
-
)
|
|
30
|
-
request_received_at = Column(
|
|
31
|
-
DateTime,
|
|
32
|
-
nullable=False,
|
|
33
|
-
index=True,
|
|
34
|
-
default=datetime.datetime.now(datetime.timezone.utc),
|
|
35
|
-
comment="Timestamp when the request was received",
|
|
36
|
-
)
|
|
37
|
-
log_url = Column(
|
|
38
|
-
String,
|
|
39
|
-
nullable=False,
|
|
40
|
-
index=False,
|
|
41
|
-
comment="Log url for which analysis was requested",
|
|
42
|
-
)
|
|
43
|
-
response_sent_at = Column(
|
|
44
|
-
DateTime, nullable=True, comment="Timestamp when the response was sent back"
|
|
45
|
-
)
|
|
46
|
-
response_length = Column(
|
|
47
|
-
Integer, nullable=True, comment="Length of the response in chars"
|
|
48
|
-
)
|
|
49
|
-
response_certainty = Column(
|
|
50
|
-
Float, nullable=True, comment="Certainty for generated response"
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
@classmethod
|
|
54
|
-
def create(
|
|
55
|
-
cls,
|
|
56
|
-
endpoint: EndpointType,
|
|
57
|
-
log_url: str,
|
|
58
|
-
request_received_at: Optional[datetime.datetime] = None,
|
|
59
|
-
) -> int:
|
|
60
|
-
"""Create AnalyzeRequestMetrics new line
|
|
61
|
-
with data related to a received request"""
|
|
62
|
-
with transaction(commit=True) as session:
|
|
63
|
-
metrics = AnalyzeRequestMetrics()
|
|
64
|
-
metrics.endpoint = endpoint
|
|
65
|
-
metrics.request_received_at = request_received_at or datetime.datetime.now(
|
|
66
|
-
datetime.timezone.utc
|
|
67
|
-
)
|
|
68
|
-
metrics.log_url = log_url
|
|
69
|
-
session.add(metrics)
|
|
70
|
-
session.flush()
|
|
71
|
-
return metrics.id
|
|
72
|
-
|
|
73
|
-
@classmethod
|
|
74
|
-
def update(
|
|
75
|
-
cls,
|
|
76
|
-
id_: int,
|
|
77
|
-
response_sent_at: datetime,
|
|
78
|
-
response_length: int,
|
|
79
|
-
response_certainty: float,
|
|
80
|
-
) -> None:
|
|
81
|
-
"""Update an AnalyzeRequestMetrics line
|
|
82
|
-
with data related to the given response"""
|
|
83
|
-
with transaction(commit=True) as session:
|
|
84
|
-
metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
|
|
85
|
-
metrics.response_sent_at = response_sent_at
|
|
86
|
-
metrics.response_length = response_length
|
|
87
|
-
metrics.response_certainty = response_certainty
|
|
88
|
-
session.add(metrics)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{logdetective-0.5.6 → logdetective-0.5.8}/logdetective/server/templates/gitlab_comment.md.j2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|