logdetective 0.5.5__tar.gz → 0.5.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {logdetective-0.5.5 → logdetective-0.5.7}/PKG-INFO +37 -6
  2. {logdetective-0.5.5 → logdetective-0.5.7}/README.md +28 -0
  3. logdetective-0.5.7/logdetective/server/database/models.py +186 -0
  4. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/server/models.py +76 -1
  5. logdetective-0.5.7/logdetective/server/plot.py +206 -0
  6. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/server/server.py +39 -1
  7. {logdetective-0.5.5 → logdetective-0.5.7}/pyproject.toml +5 -2
  8. logdetective-0.5.5/logdetective/server/database/models.py +0 -88
  9. {logdetective-0.5.5 → logdetective-0.5.7}/LICENSE +0 -0
  10. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/__init__.py +0 -0
  11. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/constants.py +0 -0
  12. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/drain3.ini +0 -0
  13. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/extractors.py +0 -0
  14. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/logdetective.py +0 -0
  15. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/server/__init__.py +0 -0
  16. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/server/database/__init__.py +0 -0
  17. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/server/database/base.py +0 -0
  18. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/server/metric.py +0 -0
  19. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/server/templates/gitlab_comment.md.j2 +0 -0
  20. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/server/utils.py +0 -0
  21. {logdetective-0.5.5 → logdetective-0.5.7}/logdetective/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: logdetective
3
- Version: 0.5.5
3
+ Version: 0.5.7
4
4
  Summary: Log using LLM AI to search for build/test failures and provide ideas for fixing these.
5
5
  License: Apache-2.0
6
6
  Author: Jiri Podivin
@@ -19,18 +19,21 @@ Classifier: Topic :: Internet :: Log Analysis
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Classifier: Topic :: Software Development :: Debuggers
21
21
  Provides-Extra: server
22
- Requires-Dist: alembic (>=1.13.3,<2.0.0) ; extra == "server"
22
+ Provides-Extra: server-testing
23
+ Requires-Dist: alembic (>=1.13.3,<2.0.0) ; extra == "server" or extra == "server-testing"
23
24
  Requires-Dist: drain3 (>=0.9.11,<0.10.0)
24
- Requires-Dist: fastapi (>=0.111.1) ; extra == "server"
25
+ Requires-Dist: fastapi (>=0.111.1) ; extra == "server" or extra == "server-testing"
25
26
  Requires-Dist: huggingface-hub (>0.23.2)
26
27
  Requires-Dist: llama-cpp-python (>0.2.56,!=0.2.86)
28
+ Requires-Dist: matplotlib (>=3.8.4,<4.0.0) ; extra == "server" or extra == "server-testing"
27
29
  Requires-Dist: numpy (>=1.26.0)
28
30
  Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "server"
29
- Requires-Dist: pydantic (>=2.8.2,<3.0.0) ; extra == "server"
31
+ Requires-Dist: psycopg2-binary (>=2.9.9,<3.0.0) ; extra == "server-testing"
32
+ Requires-Dist: pydantic (>=2.8.2,<3.0.0) ; extra == "server" or extra == "server-testing"
30
33
  Requires-Dist: python-gitlab (>=4.4.0)
31
- Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server"
34
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "server" or extra == "server-testing"
32
35
  Requires-Dist: requests (>0.2.31)
33
- Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server"
36
+ Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "server" or extra == "server-testing"
34
37
  Project-URL: homepage, https://github.com/fedora-copr/logdetective
35
38
  Project-URL: issues, https://github.com/fedora-copr/logdetective/issues
36
39
  Description-Content-Type: text/markdown
@@ -331,6 +334,34 @@ HTTPS certificate generated through:
331
334
  certbot certonly --standalone -d logdetective01.fedorainfracloud.org
332
335
  ```
333
336
 
337
+ Querying statistics
338
+ -------------------
339
+
340
+ You can retrieve statistics about server requests over a specified time period
341
+ using either the `curl` command or the `http` command (provided by the `httpie` package).
342
+
343
+ When no time period is specified, the query defaults to the last 2 days:
344
+
345
+ ```
346
+ http GET "localhost:8080/metrics/analyze/requests" > /tmp/plot.svg
347
+ curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
348
+ ```
349
+
350
+ You can specify the time period in hours, days, or weeks.
351
+ The time period:
352
+
353
+ - cannot be less than one hour
354
+ - cannot be negative
355
+ - ends at the current time (when the query is made)
356
+ - starts at the specified time interval before the current time.
357
+
358
+ Examples:
359
+
360
+ ```
361
+ http GET "localhost:8080/metrics/analyze/requests?hours=5" > /tmp/plot_hours.svg
362
+ http GET "localhost:8080/metrics/analyze/requests?days=5" > /tmp/plot_days.svg
363
+ http GET "localhost:8080/metrics/analyze/requests?weeks=5" > /tmp/plot_weeks.svg
364
+ ```
334
365
 
335
366
  License
336
367
  -------
@@ -294,6 +294,34 @@ HTTPS certificate generated through:
294
294
  certbot certonly --standalone -d logdetective01.fedorainfracloud.org
295
295
  ```
296
296
 
297
+ Querying statistics
298
+ -------------------
299
+
300
+ You can retrieve statistics about server requests over a specified time period
301
+ using either the `curl` command or the `http` command (provided by the `httpie` package).
302
+
303
+ When no time period is specified, the query defaults to the last 2 days:
304
+
305
+ ```
306
+ http GET "localhost:8080/metrics/analyze/requests" > /tmp/plot.svg
307
+ curl "localhost:8080/metrics/analyze/staged/requests" > /tmp/plot.svg
308
+ ```
309
+
310
+ You can specify the time period in hours, days, or weeks.
311
+ The time period:
312
+
313
+ - cannot be less than one hour
314
+ - cannot be negative
315
+ - ends at the current time (when the query is made)
316
+ - starts at the specified time interval before the current time.
317
+
318
+ Examples:
319
+
320
+ ```
321
+ http GET "localhost:8080/metrics/analyze/requests?hours=5" > /tmp/plot_hours.svg
322
+ http GET "localhost:8080/metrics/analyze/requests?days=5" > /tmp/plot_days.svg
323
+ http GET "localhost:8080/metrics/analyze/requests?weeks=5" > /tmp/plot_weeks.svg
324
+ ```
297
325
 
298
326
  License
299
327
  -------
@@ -0,0 +1,186 @@
1
+ import enum
2
+ import datetime
3
+
4
+ from typing import Optional
5
+ from sqlalchemy import (
6
+ Column,
7
+ Integer,
8
+ Float,
9
+ DateTime,
10
+ String,
11
+ Enum,
12
+ func,
13
+ select,
14
+ distinct,
15
+ )
16
+
17
+ from logdetective.server.database.base import Base, transaction
18
+
19
+
20
+ class EndpointType(enum.Enum):
21
+ """Different analyze endpoints"""
22
+
23
+ ANALYZE = "analyze_log"
24
+ ANALYZE_STAGED = "analyze_log_staged"
25
+ ANALYZE_STREAM = "analyze_log_stream"
26
+
27
+
28
+ class AnalyzeRequestMetrics(Base):
29
+ """Store data related to received requests and given responses"""
30
+
31
+ __tablename__ = "analyze_request_metrics"
32
+
33
+ id = Column(Integer, primary_key=True)
34
+ endpoint = Column(
35
+ Enum(EndpointType),
36
+ nullable=False,
37
+ index=True,
38
+ comment="The service endpoint that was called",
39
+ )
40
+ request_received_at = Column(
41
+ DateTime,
42
+ nullable=False,
43
+ index=True,
44
+ default=datetime.datetime.now(datetime.timezone.utc),
45
+ comment="Timestamp when the request was received",
46
+ )
47
+ log_url = Column(
48
+ String,
49
+ nullable=False,
50
+ index=False,
51
+ comment="Log url for which analysis was requested",
52
+ )
53
+ response_sent_at = Column(
54
+ DateTime, nullable=True, comment="Timestamp when the response was sent back"
55
+ )
56
+ response_length = Column(
57
+ Integer, nullable=True, comment="Length of the response in chars"
58
+ )
59
+ response_certainty = Column(
60
+ Float, nullable=True, comment="Certainty for generated response"
61
+ )
62
+
63
+ @classmethod
64
+ def create(
65
+ cls,
66
+ endpoint: EndpointType,
67
+ log_url: str,
68
+ request_received_at: Optional[datetime.datetime] = None,
69
+ ) -> int:
70
+ """Create AnalyzeRequestMetrics new line
71
+ with data related to a received request"""
72
+ with transaction(commit=True) as session:
73
+ metrics = AnalyzeRequestMetrics()
74
+ metrics.endpoint = endpoint
75
+ metrics.request_received_at = request_received_at or datetime.datetime.now(
76
+ datetime.timezone.utc
77
+ )
78
+ metrics.log_url = log_url
79
+ session.add(metrics)
80
+ session.flush()
81
+ return metrics.id
82
+
83
+ @classmethod
84
+ def update(
85
+ cls,
86
+ id_: int,
87
+ response_sent_at: datetime,
88
+ response_length: int,
89
+ response_certainty: float,
90
+ ) -> None:
91
+ """Update an AnalyzeRequestMetrics line
92
+ with data related to the given response"""
93
+ with transaction(commit=True) as session:
94
+ metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
95
+ metrics.response_sent_at = response_sent_at
96
+ metrics.response_length = response_length
97
+ metrics.response_certainty = response_certainty
98
+ session.add(metrics)
99
+
100
+ @classmethod
101
+ def _get_requests_by_time_for_postgres(
102
+ cls, start_time, end_time, time_format, endpoint
103
+ ):
104
+ """func.to_char is PostgreSQL specific.
105
+ Let's unit tests replace this function with the SQLite version.
106
+ """
107
+ if time_format == "%Y-%m-%d":
108
+ pgsql_time_format = "YYYY-MM-DD"
109
+ else:
110
+ pgsql_time_format = "YYYY-MM-DD HH24"
111
+
112
+ requests_by_time_format = (
113
+ select(
114
+ cls.id,
115
+ func.to_char(cls.request_received_at, pgsql_time_format).label(
116
+ "time_format"
117
+ ),
118
+ )
119
+ .filter(cls.request_received_at.between(start_time, end_time))
120
+ .filter(cls.endpoint == endpoint)
121
+ .cte("requests_by_time_format")
122
+ )
123
+ return requests_by_time_format
124
+
125
+ @classmethod
126
+ def _get_requests_by_time_for_sqllite(
127
+ cls, start_time, end_time, time_format, endpoint
128
+ ):
129
+ """func.strftime is SQLite specific.
130
+ Use this function in unit test using flexmock:
131
+
132
+ flexmock(AnalyzeRequestMetrics).should_receive("_get_requests_by_time_for_postgres")
133
+ .replace_with(AnalyzeRequestMetrics._get_requests_by_time_for_sqllite)
134
+ """
135
+ requests_by_time_format = (
136
+ select(
137
+ cls.id,
138
+ func.strftime(time_format, cls.request_received_at).label(
139
+ "time_format"
140
+ ),
141
+ )
142
+ .filter(cls.request_received_at.between(start_time, end_time))
143
+ .filter(cls.endpoint == endpoint)
144
+ .cte("requests_by_time_format")
145
+ )
146
+ return requests_by_time_format
147
+
148
+ @classmethod
149
+ def get_requests_in_period(
150
+ cls,
151
+ start_time: datetime.datetime,
152
+ end_time: datetime.datetime,
153
+ time_format: str,
154
+ endpoint: Optional[EndpointType] = EndpointType.ANALYZE,
155
+ ) -> dict[datetime.datetime, int]:
156
+ """
157
+ Get a dictionary with request counts grouped by time units within a specified period.
158
+
159
+ Args:
160
+ start_time (datetime): The start of the time period to query
161
+ end_time (datetime): The end of the time period to query
162
+ time_format (str): The strftime format string to format timestamps (e.g., '%Y-%m-%d')
163
+ endpoint (EndpointType): The analyze API endpoint to query
164
+
165
+ Returns:
166
+ dict[datetime, int]: A dictionary mapping datetime objects to request counts
167
+ """
168
+ with transaction(commit=False) as session:
169
+ requests_by_time_format = cls._get_requests_by_time_for_postgres(
170
+ start_time, end_time, time_format, endpoint
171
+ )
172
+
173
+ count_requests_by_time_format = select(
174
+ requests_by_time_format.c.time_format,
175
+ func.count(distinct(requests_by_time_format.c.id)), # pylint: disable=not-callable
176
+ ).group_by("time_format")
177
+
178
+ counts = session.execute(count_requests_by_time_format)
179
+ results = counts.fetchall()
180
+
181
+ # Convert results to a dictionary with proper datetime keys
182
+ counts_dict = {
183
+ datetime.datetime.strptime(r[0], time_format): r[1] for r in results
184
+ }
185
+
186
+ return counts_dict
@@ -1,6 +1,8 @@
1
+ import datetime
1
2
  from logging import BASIC_FORMAT
2
3
  from typing import List, Dict, Optional, Literal
3
- from pydantic import BaseModel, Field
4
+
5
+ from pydantic import BaseModel, Field, model_validator, field_validator
4
6
 
5
7
 
6
8
  class BuildLog(BaseModel):
@@ -54,6 +56,7 @@ class AnalyzedSnippet(BaseModel):
54
56
  text: original snippet text
55
57
  line_number: location of snippet in original log
56
58
  """
59
+
57
60
  explanation: Explanation
58
61
  text: str
59
62
  line_number: int
@@ -195,3 +198,75 @@ class Config(BaseModel):
195
198
  self.extractor = ExtractorConfig(data.get("extractor"))
196
199
  self.gitlab = GitLabConfig(data.get("gitlab"))
197
200
  self.general = GeneralConfig(data.get("general"))
201
+
202
+
203
+ class TimePeriod(BaseModel):
204
+ """Specification for a period of time.
205
+
206
+ If no indication is given
207
+ it falls back to a 2 days period of time.
208
+
209
+ Can't be smaller than a hour"""
210
+
211
+ weeks: Optional[int] = None
212
+ days: Optional[int] = None
213
+ hours: Optional[int] = None
214
+
215
+ @model_validator(mode="before")
216
+ @classmethod
217
+ def check_exclusive_fields(cls, data):
218
+ """ Check that only one key between weeks, days and hours is defined"""
219
+ if isinstance(data, dict):
220
+ how_many_fields = sum(
221
+ 1
222
+ for field in ["weeks", "days", "hours"]
223
+ if field in data and data[field] is not None
224
+ )
225
+
226
+ if how_many_fields == 0:
227
+ data["days"] = 2 # by default fallback to a 2 days period
228
+
229
+ if how_many_fields > 1:
230
+ raise ValueError("Only one of months, weeks, days, or hours can be set")
231
+
232
+ return data
233
+
234
+ @field_validator("weeks", "days", "hours")
235
+ @classmethod
236
+ def check_positive(cls, v):
237
+ """Check that the given value is positive"""
238
+ if v is not None and v <= 0:
239
+ raise ValueError("Time period must be positive")
240
+ return v
241
+
242
+ def get_time_period(self) -> datetime.timedelta:
243
+ """Get the period of time represented by this input model.
244
+
245
+ Returns:
246
+ datetime.timedelta: The time period as a timedelta object.
247
+ """
248
+ delta = None
249
+ if self.weeks:
250
+ delta = datetime.timedelta(weeks=self.weeks)
251
+ elif self.days:
252
+ delta = datetime.timedelta(days=self.days)
253
+ elif self.hours:
254
+ delta = datetime.timedelta(hours=self.hours)
255
+ return delta
256
+
257
+ def get_period_start_time(
258
+ self, end_time: datetime.datetime = None
259
+ ) -> datetime.datetime:
260
+ """Calculate the start time of this period based on the end time.
261
+
262
+ Args:
263
+ end_time (datetime.datetime, optional): The end time of the period.
264
+ Defaults to current UTC time if not provided.
265
+
266
+ Returns:
267
+ datetime.datetime: The start time of the period.
268
+ """
269
+ time = end_time or datetime.datetime.now(datetime.timezone.utc)
270
+ if end_time.tzinfo is None:
271
+ end_time = end_time.replace(tzinfo=datetime.timezone.utc)
272
+ return time - self.get_time_period()
@@ -0,0 +1,206 @@
1
+ import datetime
2
+ from typing import Optional
3
+
4
+ import numpy
5
+ import matplotlib
6
+ import matplotlib.figure
7
+ import matplotlib.pyplot
8
+
9
+ from logdetective.server import models
10
+ from logdetective.server.database.models import AnalyzeRequestMetrics, EndpointType
11
+
12
+
13
+ class Definition:
14
+ """Define plot details, given a time period."""
15
+
16
+ def __init__(self, time_period: models.TimePeriod):
17
+ self.time_period = time_period
18
+ self.days_diff = time_period.get_time_period().days
19
+ if self.time_period.hours:
20
+ self._freq = "H"
21
+ self._time_format = "%Y-%m-%d %H"
22
+ self._locator = matplotlib.dates.HourLocator(interval=2)
23
+ self._time_unit = "hour"
24
+ self._time_delta = datetime.timedelta(hours=1)
25
+ elif self.time_period.days:
26
+ self._freq = "D"
27
+ self._time_format = "%Y-%m-%d"
28
+ self._locator = matplotlib.dates.DayLocator(interval=1)
29
+ self._time_unit = "day"
30
+ self._time_delta = datetime.timedelta(days=1)
31
+ elif self.time_period.weeks:
32
+ self._freq = "W"
33
+ self._time_format = "%Y-%m-%d"
34
+ self._locator = matplotlib.dates.WeekdayLocator(interval=1)
35
+ self._time_unit = "week"
36
+ self._time_delta = datetime.timedelta(weeks=1)
37
+
38
+ @property
39
+ def freq(self):
40
+ # pylint: disable=missing-function-docstring
41
+ return self._freq
42
+
43
+ @property
44
+ def time_format(self):
45
+ # pylint: disable=missing-function-docstring
46
+ return self._time_format
47
+
48
+ @property
49
+ def locator(self):
50
+ # pylint: disable=missing-function-docstring
51
+ return self._locator
52
+
53
+ @property
54
+ def time_unit(self):
55
+ # pylint: disable=missing-function-docstring
56
+ return self._time_unit
57
+
58
+ @property
59
+ def time_delta(self):
60
+ # pylint: disable=missing-function-docstring
61
+ return self._time_delta
62
+
63
+
64
+ def create_time_series_arrays(
65
+ counts_dict: dict[datetime.datetime, int],
66
+ start_time: datetime.datetime,
67
+ end_time: datetime.datetime,
68
+ time_delta: datetime.timedelta,
69
+ time_format: str,
70
+ ) -> tuple[numpy.ndarray, numpy.ndarray]:
71
+ """Create time series arrays from a dictionary of counts.
72
+
73
+ This function generates two aligned numpy arrays:
74
+ 1. An array of timestamps from start_time to end_time
75
+ 2. A corresponding array of counts for each timestamp
76
+
77
+ The timestamps are truncated to the precision specified by time_format.
78
+ If a timestamp in counts_dict matches a generated timestamp, its count is used;
79
+ otherwise, the count defaults to zero.
80
+
81
+ Args:
82
+ counts_dict: Dictionary mapping timestamps to their respective counts
83
+ start_time: The starting timestamp of the time series
84
+ end_time: The ending timestamp of the time series
85
+ time_delta: The time interval between consecutive timestamps
86
+ time_format: String format for datetime truncation (e.g., '%Y-%m-%d %H:%M')
87
+
88
+ Returns:
89
+ A tuple containing:
90
+ - numpy.ndarray: Array of timestamps
91
+ - numpy.ndarray: Array of corresponding counts
92
+ """
93
+ num_intervals = int((end_time - start_time) / time_delta) + 1
94
+
95
+ timestamps = numpy.array(
96
+ [
97
+ datetime.datetime.strptime(
98
+ (start_time + i * time_delta).strftime(format=time_format), time_format
99
+ )
100
+ for i in range(num_intervals)
101
+ ]
102
+ )
103
+ counts = numpy.zeros(num_intervals, dtype=int)
104
+
105
+ timestamp_to_index = {timestamp: i for i, timestamp in enumerate(timestamps)}
106
+
107
+ for timestamp, count in counts_dict.items():
108
+ if timestamp in timestamp_to_index:
109
+ counts[timestamp_to_index[timestamp]] = count
110
+
111
+ return timestamps, counts
112
+
113
+
114
+ def _add_bar_chart_for_requests_count(
115
+ ax1: matplotlib.figure.Axes,
116
+ plot_def: Definition,
117
+ timestamps: numpy.array,
118
+ counts: numpy.array,
119
+ ) -> None:
120
+ """Add a bar chart for requests count (axes 1)"""
121
+ bar_width = (
122
+ 0.8 * plot_def.time_delta.total_seconds() / 86400
123
+ ) # Convert to days for matplotlib
124
+ ax1.bar(
125
+ timestamps,
126
+ counts,
127
+ width=bar_width,
128
+ alpha=0.7,
129
+ color="skyblue",
130
+ label="Requests",
131
+ )
132
+ ax1.set_xlabel("Time")
133
+ ax1.set_ylabel("Requests", color="blue")
134
+ ax1.tick_params(axis="y", labelcolor="blue")
135
+
136
+ ax1.xaxis.set_major_formatter(matplotlib.dates.DateFormatter(plot_def.time_format))
137
+ ax1.xaxis.set_major_locator(plot_def.locator)
138
+
139
+ matplotlib.pyplot.xticks(rotation=45)
140
+
141
+ ax1.grid(True, alpha=0.3)
142
+
143
+
144
+ def _add_cumulative_line_for_requests_count(
145
+ ax2: matplotlib.figure.Axes, timestamps: numpy.array, counts: numpy.array
146
+ ) -> None:
147
+ """Add cumulative line on secondary y-axis"""
148
+ cumulative = numpy.cumsum(counts)
149
+ ax2.plot(timestamps, cumulative, "r-", linewidth=2, label="Cumulative")
150
+ ax2.set_ylabel("Cumulative Requests", color="red")
151
+ ax2.tick_params(axis="y", labelcolor="red")
152
+
153
+
154
+ def requests_per_time(
155
+ period_of_time: models.TimePeriod,
156
+ endpoint: EndpointType = EndpointType.ANALYZE,
157
+ end_time: Optional[datetime.datetime] = None,
158
+ ) -> matplotlib.figure.Figure:
159
+ """
160
+ Generate a visualization of request counts over a specified time period.
161
+
162
+ This function creates a dual-axis plot showing:
163
+ 1. A bar chart of request counts per time interval
164
+ 2. A line chart showing the cumulative request count
165
+
166
+ The time intervals are determined by the provided TimePeriod object, which defines
167
+ the granularity and formatting of the time axis.
168
+
169
+ Args:
170
+ period_of_time: A TimePeriod object that defines the time period and interval
171
+ for the analysis (e.g., hourly, daily, weekly)
172
+ endpoint: One of the API endpoints
173
+ end_time: The end time for the analysis period. If None, defaults to the current
174
+ UTC time
175
+
176
+ Returns:
177
+ A matplotlib Figure object containing the generated visualization
178
+ """
179
+ end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
180
+ start_time = period_of_time.get_period_start_time(end_time)
181
+ plot_def = Definition(period_of_time)
182
+ requests_counts = AnalyzeRequestMetrics.get_requests_in_period(
183
+ start_time, end_time, plot_def.time_format, endpoint
184
+ )
185
+ timestamps, counts = create_time_series_arrays(
186
+ requests_counts, start_time, end_time, plot_def.time_delta, plot_def.time_format
187
+ )
188
+
189
+ fig, ax1 = matplotlib.pyplot.subplots(figsize=(12, 6))
190
+ _add_bar_chart_for_requests_count(ax1, plot_def, timestamps, counts)
191
+
192
+ ax2 = ax1.twinx()
193
+ _add_cumulative_line_for_requests_count(ax2, timestamps, counts)
194
+
195
+ matplotlib.pyplot.title(
196
+ f"Requests received for API {endpoint} ({start_time.strftime(plot_def.time_format)} "
197
+ f"to {end_time.strftime(plot_def.time_format)})"
198
+ )
199
+
200
+ lines1, labels1 = ax1.get_legend_handles_labels()
201
+ lines2, labels2 = ax2.get_legend_handles_labels()
202
+ ax1.legend(lines1 + lines2, labels1 + labels2, loc="center")
203
+
204
+ matplotlib.pyplot.tight_layout()
205
+
206
+ return fig
@@ -6,8 +6,11 @@ import zipfile
6
6
  from pathlib import Path, PurePath
7
7
  from tempfile import TemporaryFile
8
8
  from typing import List, Annotated, Tuple, Dict, Any
9
+ from io import BytesIO
9
10
 
10
11
 
12
+ import matplotlib
13
+ import matplotlib.pyplot
11
14
  from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Header
12
15
 
13
16
  from fastapi.responses import StreamingResponse
@@ -39,7 +42,10 @@ from logdetective.server.models import (
39
42
  StagedResponse,
40
43
  Explanation,
41
44
  AnalyzedSnippet,
45
+ TimePeriod,
42
46
  )
47
+ from logdetective.server import plot
48
+ from logdetective.server.database.models import EndpointType
43
49
 
44
50
  LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 600)
45
51
  LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
@@ -494,7 +500,7 @@ async def retrieve_and_preprocess_koji_logs(job: gitlab.v4.objects.ProjectJob):
494
500
  tempfile.seek(0)
495
501
 
496
502
  failed_arches = {}
497
- artifacts_zip = zipfile.ZipFile(tempfile, mode="r")
503
+ artifacts_zip = zipfile.ZipFile(tempfile, mode="r") # pylint: disable=consider-using-with
498
504
  for zipinfo in artifacts_zip.infolist():
499
505
  if zipinfo.filename.endswith("task_failed.log"):
500
506
  # The koji logs store this file in two places: 1) in the
@@ -653,3 +659,35 @@ async def generate_mr_comment(
653
659
  )
654
660
 
655
661
  return content
662
+
663
+
664
+ def _svg_figure_response(fig: matplotlib.figure.Figure):
665
+ """Create a response with the given svg figure."""
666
+ buf = BytesIO()
667
+ fig.savefig(buf, format="svg", bbox_inches="tight")
668
+ matplotlib.pyplot.close(fig)
669
+
670
+ buf.seek(0)
671
+ return StreamingResponse(
672
+ buf,
673
+ media_type="image/svg+xml",
674
+ headers={"Content-Disposition": "inline; filename=plot.svg"},
675
+ )
676
+
677
+
678
+ @app.get("/metrics/analyze/requests", response_class=StreamingResponse)
679
+ async def show_analyze_requests(period_since_now: TimePeriod = Depends(TimePeriod)):
680
+ """Show statistics for the requests received in the given period of time
681
+ for the /analyze API endpoint."""
682
+ fig = plot.requests_per_time(period_since_now, EndpointType.ANALYZE)
683
+ return _svg_figure_response(fig)
684
+
685
+
686
+ @app.get("/metrics/analyze/staged/requests", response_class=StreamingResponse)
687
+ async def show_analyze_staged_requests(
688
+ period_since_now: TimePeriod = Depends(TimePeriod),
689
+ ):
690
+ """Show statistics for the requests received in the given period of time
691
+ for the /analyze/staged API endpoint."""
692
+ fig = plot.requests_per_time(period_since_now, EndpointType.ANALYZE_STAGED)
693
+ return _svg_figure_response(fig)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "logdetective"
3
- version = "0.5.5"
3
+ version = "0.5.7"
4
4
  description = "Log using LLM AI to search for build/test failures and provide ideas for fixing these."
5
5
  authors = ["Jiri Podivin <jpodivin@gmail.com>"]
6
6
  license = "Apache-2.0"
@@ -43,11 +43,14 @@ pydantic = {version = "^2.8.2", optional = true }
43
43
  fastapi = {version = ">=0.111.1", optional = true }
44
44
  pyyaml = {version = "^6.0.1", optional = true }
45
45
  sqlalchemy = {version = "^2.0.36", optional = true }
46
+ psycopg2-binary = {version = "^2.9.9", optional = true }
46
47
  psycopg2 = {version = "^2.9.9", optional = true }
47
48
  alembic = {version = "^1.13.3", optional = true }
49
+ matplotlib = {version = "^3.8.4", optional = true }
48
50
 
49
51
  [tool.poetry.extras]
50
- server = ["pydantic", "fastapi", "pyyaml", "sqlalchemy", "psycopg2", "alembic"]
52
+ server = ["pydantic", "fastapi", "pyyaml", "sqlalchemy", "psycopg2", "alembic", "matplotlib"]
53
+ server-testing = ["pydantic", "fastapi", "pyyaml", "sqlalchemy", "psycopg2-binary", "alembic", "matplotlib"]
51
54
 
52
55
  [build-system]
53
56
  requires = ["poetry-core"]
@@ -1,88 +0,0 @@
1
- import enum
2
- import datetime
3
-
4
- from typing import Optional
5
- from sqlalchemy import Column, Integer, Float, DateTime, String, Enum
6
-
7
- from logdetective.server.database.base import Base, transaction
8
-
9
-
10
- class EndpointType(enum.Enum):
11
- """Different analyze endpoints"""
12
-
13
- ANALYZE = "analyze_log"
14
- ANALYZE_STAGED = "analyze_log_staged"
15
- ANALYZE_STREAM = "analyze_log_stream"
16
-
17
-
18
- class AnalyzeRequestMetrics(Base):
19
- """Store data related to received requests and given responses"""
20
-
21
- __tablename__ = "analyze_request_metrics"
22
-
23
- id = Column(Integer, primary_key=True)
24
- endpoint = Column(
25
- Enum(EndpointType),
26
- nullable=False,
27
- index=True,
28
- comment="The service endpoint that was called",
29
- )
30
- request_received_at = Column(
31
- DateTime,
32
- nullable=False,
33
- index=True,
34
- default=datetime.datetime.now(datetime.timezone.utc),
35
- comment="Timestamp when the request was received",
36
- )
37
- log_url = Column(
38
- String,
39
- nullable=False,
40
- index=False,
41
- comment="Log url for which analysis was requested",
42
- )
43
- response_sent_at = Column(
44
- DateTime, nullable=True, comment="Timestamp when the response was sent back"
45
- )
46
- response_length = Column(
47
- Integer, nullable=True, comment="Length of the response in chars"
48
- )
49
- response_certainty = Column(
50
- Float, nullable=True, comment="Certainty for generated response"
51
- )
52
-
53
- @classmethod
54
- def create(
55
- cls,
56
- endpoint: EndpointType,
57
- log_url: str,
58
- request_received_at: Optional[datetime.datetime] = None,
59
- ) -> int:
60
- """Create AnalyzeRequestMetrics new line
61
- with data related to a received request"""
62
- with transaction(commit=True) as session:
63
- metrics = AnalyzeRequestMetrics()
64
- metrics.endpoint = endpoint
65
- metrics.request_received_at = request_received_at or datetime.datetime.now(
66
- datetime.timezone.utc
67
- )
68
- metrics.log_url = log_url
69
- session.add(metrics)
70
- session.flush()
71
- return metrics.id
72
-
73
- @classmethod
74
- def update(
75
- cls,
76
- id_: int,
77
- response_sent_at: datetime,
78
- response_length: int,
79
- response_certainty: float,
80
- ) -> None:
81
- """Update an AnalyzeRequestMetrics line
82
- with data related to the given response"""
83
- with transaction(commit=True) as session:
84
- metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
85
- metrics.response_sent_at = response_sent_at
86
- metrics.response_length = response_length
87
- metrics.response_certainty = response_certainty
88
- session.add(metrics)
File without changes