logdetective 0.9.1__py3-none-any.whl → 0.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,5 @@
1
1
  import io
2
2
  import inspect
3
- import logging
4
3
  import datetime
5
4
 
6
5
  from typing import Union
@@ -9,12 +8,11 @@ from functools import wraps
9
8
  import aiohttp
10
9
 
11
10
  from starlette.responses import StreamingResponse
12
- from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
13
- from logdetective.server.remote_log import RemoteLog
14
11
  from logdetective.server import models
15
- from logdetective.server.compressors import LLMResponseCompressor
16
-
17
- LOG = logging.getLogger("logdetective")
12
+ from logdetective.remote_log import RemoteLog
13
+ from logdetective.server.config import LOG
14
+ from logdetective.server.compressors import LLMResponseCompressor, RemoteLogCompressor
15
+ from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
18
16
 
19
17
 
20
18
  async def add_new_metrics(
@@ -31,7 +29,9 @@ async def add_new_metrics(
31
29
  and the log (in a zip format) for which analysis is requested.
32
30
  """
33
31
  remote_log = RemoteLog(url, http_session)
34
- compressed_log_content = compressed_log_content or await remote_log.zip_content()
32
+ compressed_log_content = (
33
+ compressed_log_content or await RemoteLogCompressor(remote_log).zip_content()
34
+ )
35
35
  return AnalyzeRequestMetrics.create(
36
36
  endpoint=EndpointType(api_name),
37
37
  compressed_log=compressed_log_content,
@@ -58,7 +58,8 @@ def update_metrics(
58
58
  compressed_response = None
59
59
  LOG.warning(
60
60
  "Given response can not be serialized "
61
- "and saved in db (probably a StreamingResponse): %s.", e
61
+ "and saved in db (probably a StreamingResponse): %s.",
62
+ e,
62
63
  )
63
64
 
64
65
  response_sent_at = (
@@ -83,7 +84,24 @@ def update_metrics(
83
84
 
84
85
  def track_request(name=None):
85
86
  """
86
- Decorator to track requests metrics
87
+ Decorator to track requests/responses metrics
88
+
89
+ On entering the decorated function, it registers the time for the request
90
+ and saves the passed log content.
91
+ On exiting the decorated function, it registers the time for the response
92
+ and saves the generated response.
93
+
94
+ Use it to decorate server endpoints that generate a llm response
95
+ as in the following example:
96
+
97
+ >>> @app.post("/analyze", response_model=Response)
98
+ >>> @track_request()
99
+ >>> async def analyze_log(build_log)
100
+ >>> pass
101
+
102
+ Warning: the decorators' order is important!
103
+ The function returned by the *track_request* decorator is the
104
+ server API function we want to be called by FastAPI.
87
105
  """
88
106
 
89
107
  def decorator(f):
@@ -9,7 +9,15 @@ from pydantic import (
9
9
  NonNegativeFloat,
10
10
  HttpUrl,
11
11
  )
12
- from logdetective.constants import DEFAULT_TEMPERATURE
12
+
13
+ from aiolimiter import AsyncLimiter
14
+ from gitlab import Gitlab
15
+
16
+ from logdetective.constants import (
17
+ DEFAULT_TEMPERATURE,
18
+ LLM_DEFAULT_MAX_QUEUE_SIZE,
19
+ LLM_DEFAULT_REQUESTS_PER_MINUTE,
20
+ )
13
21
 
14
22
 
15
23
  class BuildLog(BaseModel):
@@ -46,6 +54,33 @@ class JobHook(BaseModel):
46
54
  project_id: int
47
55
 
48
56
 
57
+ class EmojiMergeRequest(BaseModel):
58
+ """Model of the 'merge_request' subsection of Emoji webhook messages.
59
+ This model implements only the fields that we care about. The webhook
60
+ sends many more fields that we will ignore."""
61
+
62
+ # The identifier of the target project
63
+ target_project_id: int
64
+
65
+ # The internal identifier (relative to the target project)
66
+ iid: int
67
+
68
+
69
+ class EmojiHook(BaseModel):
70
+ """Model of Job Hook events sent from GitLab.
71
+ Full details of the specification are available at
72
+ https://docs.gitlab.com/user/project/integrations/webhook_events/#job-events
73
+ This model implements only the fields that we care about. The webhook
74
+ sends many more fields that we will ignore."""
75
+
76
+ # The kind of webhook message. We are only interested in 'emoji' messages
77
+ # which represents awarding or revoking emoji reactions on notes.
78
+ object_kind: str = Field(pattern=r"^emoji$")
79
+
80
+ # Information about the merge request this emoji applies to, if any.
81
+ merge_request: EmojiMergeRequest = Field(default=None)
82
+
83
+
49
84
  class Explanation(BaseModel):
50
85
  """Model of snippet or general log explanation from Log Detective"""
51
86
 
@@ -92,7 +127,7 @@ class StagedResponse(Response):
92
127
  snippets: List[AnalyzedSnippet]
93
128
 
94
129
 
95
- class InferenceConfig(BaseModel):
130
+ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attributes
96
131
  """Model for inference configuration of logdetective server."""
97
132
 
98
133
  max_tokens: int = -1
@@ -104,6 +139,9 @@ class InferenceConfig(BaseModel):
104
139
  api_token: str = ""
105
140
  model: str = ""
106
141
  temperature: NonNegativeFloat = DEFAULT_TEMPERATURE
142
+ max_queue_size: int = LLM_DEFAULT_MAX_QUEUE_SIZE
143
+ request_period: float = 60.0 / LLM_DEFAULT_REQUESTS_PER_MINUTE
144
+ _limiter: AsyncLimiter = AsyncLimiter(LLM_DEFAULT_REQUESTS_PER_MINUTE)
107
145
 
108
146
  def __init__(self, data: Optional[dict] = None):
109
147
  super().__init__()
@@ -117,6 +155,16 @@ class InferenceConfig(BaseModel):
117
155
  self.api_token = data.get("api_token", "")
118
156
  self.model = data.get("model", "default-model")
119
157
  self.temperature = data.get("temperature", DEFAULT_TEMPERATURE)
158
+ self.max_queue_size = data.get("max_queue_size", LLM_DEFAULT_MAX_QUEUE_SIZE)
159
+
160
+ self._requests_per_minute = data.get(
161
+ "requests_per_minute", LLM_DEFAULT_REQUESTS_PER_MINUTE
162
+ )
163
+ self._limiter = AsyncLimiter(self._requests_per_minute)
164
+
165
+ def get_limiter(self):
166
+ """Return the limiter object so it can be used as a context manager"""
167
+ return self._limiter
120
168
 
121
169
 
122
170
  class ExtractorConfig(BaseModel):
@@ -136,26 +184,50 @@ class ExtractorConfig(BaseModel):
136
184
  self.verbose = data.get("verbose", False)
137
185
 
138
186
 
139
- class GitLabConfig(BaseModel):
187
+ class GitLabInstanceConfig(BaseModel):
140
188
  """Model for GitLab configuration of logdetective server."""
141
189
 
190
+ name: str = None
142
191
  url: str = None
143
192
  api_url: str = None
144
193
  api_token: str = None
194
+ _conn: Gitlab = None
145
195
 
146
196
  # Maximum size of artifacts.zip in MiB. (default: 300 MiB)
147
197
  max_artifact_size: int = 300
148
198
 
149
- def __init__(self, data: Optional[dict] = None):
199
+ def __init__(self, name: str, data: Optional[dict] = None):
150
200
  super().__init__()
151
201
  if data is None:
152
202
  return
153
203
 
204
+ self.name = name
154
205
  self.url = data.get("url", "https://gitlab.com")
155
206
  self.api_url = f"{self.url}/api/v4"
156
207
  self.api_token = data.get("api_token", None)
157
208
  self.max_artifact_size = int(data.get("max_artifact_size")) * 1024 * 1024
158
209
 
210
+ self._conn = Gitlab(url=self.url, private_token=self.api_token)
211
+
212
+ def get_connection(self):
213
+ """Get the Gitlab connection object"""
214
+ return self._conn
215
+
216
+
217
+ class GitLabConfig(BaseModel):
218
+ """Model for GitLab configuration of logdetective server."""
219
+
220
+ instances: Dict[str, GitLabInstanceConfig] = {}
221
+
222
+ def __init__(self, data: Optional[dict] = None):
223
+ super().__init__()
224
+ if data is None:
225
+ return
226
+
227
+ for instance_name, instance_data in data.items():
228
+ instance = GitLabInstanceConfig(instance_name, instance_data)
229
+ self.instances[instance.url] = instance
230
+
159
231
 
160
232
  class LogConfig(BaseModel):
161
233
  """Logging configuration"""
@@ -232,7 +304,7 @@ class TimePeriod(BaseModel):
232
304
  @model_validator(mode="before")
233
305
  @classmethod
234
306
  def check_exclusive_fields(cls, data):
235
- """ Check that only one key between weeks, days and hours is defined"""
307
+ """Check that only one key between weeks, days and hours is defined"""
236
308
  if isinstance(data, dict):
237
309
  how_many_fields = sum(
238
310
  1
@@ -284,6 +356,6 @@ class TimePeriod(BaseModel):
284
356
  datetime.datetime: The start time of the period.
285
357
  """
286
358
  time = end_time or datetime.datetime.now(datetime.timezone.utc)
287
- if end_time.tzinfo is None:
359
+ if time.tzinfo is None:
288
360
  end_time = end_time.replace(tzinfo=datetime.timezone.utc)
289
361
  return time - self.get_time_period()
@@ -1,13 +1,18 @@
1
1
  import datetime
2
- from typing import Optional, Union
2
+ from typing import Optional, Union, Dict
3
3
 
4
4
  import numpy
5
5
  import matplotlib
6
6
  import matplotlib.figure
7
7
  import matplotlib.pyplot
8
8
 
9
+ from matplotlib.pyplot import cm
9
10
  from logdetective.server import models
10
- from logdetective.server.database.models import AnalyzeRequestMetrics, EndpointType
11
+ from logdetective.server.database.models import (
12
+ AnalyzeRequestMetrics,
13
+ EndpointType,
14
+ Reactions,
15
+ )
11
16
 
12
17
 
13
18
  class Definition:
@@ -145,13 +150,19 @@ def _add_bar_chart(
145
150
  ax.grid(True, alpha=0.3)
146
151
 
147
152
 
148
- def _add_line_chart(
149
- ax: matplotlib.figure.Axes, timestamps: numpy.array, values: numpy.array, label: str
150
- ) -> None:
153
+ def _add_line_chart( # pylint: disable=too-many-arguments disable=too-many-positional-arguments
154
+ ax: matplotlib.figure.Axes,
155
+ timestamps: numpy.array,
156
+ values: numpy.array,
157
+ label: str,
158
+ color: str = "red",
159
+ set_label: bool = True,
160
+ ):
151
161
  """Add a red line chart"""
152
- ax.plot(timestamps, values, "r-", linewidth=2, label=label)
153
- ax.set_ylabel(label, color="red")
154
- ax.tick_params(axis="y", labelcolor="red")
162
+ ax.plot(timestamps, values, color=color, linestyle="-", linewidth=2, label=label)
163
+ if set_label:
164
+ ax.set_ylabel(label, color=color)
165
+ ax.tick_params(axis="y", labelcolor=color)
155
166
 
156
167
 
157
168
  def requests_per_time(
@@ -249,7 +260,9 @@ def average_time_per_responses( # pylint: disable=too-many-locals
249
260
  )
250
261
 
251
262
  fig, ax1 = matplotlib.pyplot.subplots(figsize=(12, 6))
252
- _add_bar_chart(ax1, plot_def, timestamps, average_time, "average response time (seconds)")
263
+ _add_bar_chart(
264
+ ax1, plot_def, timestamps, average_time, "average response time (seconds)"
265
+ )
253
266
 
254
267
  responses_average_length = (
255
268
  AnalyzeRequestMetrics.get_responses_average_length_in_period(
@@ -279,3 +292,138 @@ def average_time_per_responses( # pylint: disable=too-many-locals
279
292
  matplotlib.pyplot.tight_layout()
280
293
 
281
294
  return fig
295
+
296
+
297
+ def _collect_emoji_data(
298
+ start_time: datetime.datetime, plot_def: Definition
299
+ ) -> Dict[str, Dict[datetime.datetime, int]]:
300
+ """Collect and organize emoji feedback data
301
+
302
+ Counts all emojis given to logdetective comments created since start_time.
303
+ Collect counts in time accordingly to the plot definition.
304
+ """
305
+ reactions = Reactions.get_since(start_time)
306
+ reactions_values_dict: Dict[str, Dict] = {}
307
+ for comment_created_at, reaction in reactions:
308
+ comment_created_at_formatted = comment_created_at.strptime(
309
+ comment_created_at.strftime(plot_def.time_format), plot_def.time_format
310
+ )
311
+ if reaction.reaction_type in reactions_values_dict:
312
+ reaction_values_dict = reactions_values_dict[reaction.reaction_type]
313
+ if comment_created_at_formatted in reaction_values_dict:
314
+ reaction_values_dict[comment_created_at_formatted] += reaction.count
315
+ else:
316
+ reaction_values_dict[comment_created_at_formatted] = reaction.count
317
+ else:
318
+ reaction_values_dict = {comment_created_at_formatted: reaction.count}
319
+ reactions_values_dict.update({reaction.reaction_type: reaction_values_dict})
320
+
321
+ return reactions_values_dict
322
+
323
+
324
+ def _plot_emoji_data( # pylint: disable=too-many-locals
325
+ ax: matplotlib.figure.Axes,
326
+ reactions_values_dict: Dict[str, Dict[datetime.datetime, int]],
327
+ plot_def: Definition,
328
+ start_time: datetime.datetime,
329
+ end_time: datetime.datetime,
330
+ ):
331
+ """Plot each emoji's data on its own axis."""
332
+ emoji_lines = {}
333
+ emoji_labels = {}
334
+
335
+ # Find global min and max y values to set consistent scale
336
+ all_counts = []
337
+ for emoji, dict_counts in reactions_values_dict.items():
338
+ timestamps, counts = create_time_series_arrays(
339
+ dict_counts, plot_def, start_time, end_time
340
+ )
341
+ all_counts.extend(counts)
342
+
343
+ colors = [cm.viridis(i) for i in numpy.linspace(0, 1, len(reactions_values_dict))] # pylint: disable=no-member
344
+
345
+ first_emoji = True
346
+ for i, (emoji, dict_counts) in enumerate(reactions_values_dict.items()):
347
+ timestamps, counts = create_time_series_arrays(
348
+ dict_counts, plot_def, start_time, end_time
349
+ )
350
+
351
+ if first_emoji:
352
+ current_ax = ax
353
+ first_emoji = False
354
+ else:
355
+ current_ax = ax.twinx()
356
+ current_ax.spines["right"].set_position(("outward", 60 * (i - 1)))
357
+
358
+ _add_line_chart(current_ax, timestamps, counts, f"{emoji}", colors[i], False)
359
+ emoji_lines[emoji], emoji_labels[emoji] = current_ax.get_legend_handles_labels()
360
+
361
+ # Set the same y-limits for all axes
362
+ current_ax.set_ylim(0, max(all_counts) * 1.1)
363
+
364
+ # Only show y-ticks on the first axis to avoid clutter
365
+ if 0 < i < len(reactions_values_dict):
366
+ current_ax.set_yticks([])
367
+
368
+ return emoji_lines, emoji_labels
369
+
370
+
371
+ def emojis_per_time(
372
+ period_of_time: models.TimePeriod,
373
+ end_time: Optional[datetime.datetime] = None,
374
+ ) -> matplotlib.figure.Figure:
375
+ """
376
+ Generate a visualization of overall emoji feedback
377
+ over a specified time period.
378
+
379
+ This function creates a multiple-axis plot showing
380
+ a line chart for every found emoji
381
+
382
+ The time intervals are determined by the provided TimePeriod object, which defines
383
+ the granularity and formatting of the time axis.
384
+
385
+ Args:
386
+ period_of_time: A TimePeriod object that defines the time period and interval
387
+ for the analysis (e.g., hourly, daily, weekly)
388
+ end_time: The end time for the analysis period. If None, defaults to the current
389
+ UTC time
390
+
391
+ Returns:
392
+ A matplotlib Figure object containing the generated visualization
393
+ """
394
+ plot_def = Definition(period_of_time)
395
+ end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
396
+ start_time = period_of_time.get_period_start_time(end_time)
397
+ reactions_values_dict = _collect_emoji_data(start_time, plot_def)
398
+
399
+ fig, ax = matplotlib.pyplot.subplots(figsize=(12, 6))
400
+
401
+ emoji_lines, emoji_labels = _plot_emoji_data(
402
+ ax, reactions_values_dict, plot_def, start_time, end_time
403
+ )
404
+
405
+ matplotlib.pyplot.title(
406
+ f"Emoji feedback ({start_time.strftime(plot_def.time_format)} "
407
+ f"to {end_time.strftime(plot_def.time_format)})"
408
+ )
409
+
410
+ all_lines = []
411
+ for lines in emoji_lines.values():
412
+ all_lines.extend(lines)
413
+ all_labels = []
414
+ for labels in emoji_labels.values():
415
+ all_labels.extend(labels)
416
+
417
+ ax.legend(all_lines, all_labels, loc="upper left")
418
+ ax.set_xlabel("Time")
419
+ ax.set_ylabel("Count")
420
+
421
+ # Format x-axis
422
+ ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter(plot_def.time_format))
423
+ ax.xaxis.set_major_locator(plot_def.locator)
424
+ ax.tick_params(axis="x", labelrotation=45)
425
+ ax.grid(True, alpha=0.3)
426
+
427
+ matplotlib.pyplot.tight_layout()
428
+
429
+ return fig