logdetective 2.0.1__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. logdetective/extractors.py +134 -23
  2. logdetective/logdetective.py +39 -23
  3. logdetective/models.py +26 -0
  4. logdetective/prompts-summary-first.yml +0 -2
  5. logdetective/prompts.yml +0 -3
  6. logdetective/server/compressors.py +7 -10
  7. logdetective/server/config.py +3 -2
  8. logdetective/server/database/base.py +31 -26
  9. logdetective/server/database/models/__init__.py +2 -2
  10. logdetective/server/database/models/exceptions.py +4 -0
  11. logdetective/server/database/models/koji.py +47 -30
  12. logdetective/server/database/models/merge_request_jobs.py +205 -186
  13. logdetective/server/database/models/metrics.py +87 -61
  14. logdetective/server/emoji.py +57 -55
  15. logdetective/server/exceptions.py +4 -0
  16. logdetective/server/gitlab.py +18 -11
  17. logdetective/server/llm.py +19 -10
  18. logdetective/server/metric.py +18 -13
  19. logdetective/server/models.py +65 -48
  20. logdetective/server/plot.py +13 -11
  21. logdetective/server/server.py +52 -30
  22. logdetective/server/templates/base_response.html.j2 +59 -0
  23. logdetective/server/templates/gitlab_full_comment.md.j2 +58 -53
  24. logdetective/server/templates/gitlab_short_comment.md.j2 +52 -47
  25. logdetective/server/utils.py +15 -27
  26. logdetective/utils.py +115 -49
  27. {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/METADATA +95 -21
  28. logdetective-2.11.0.dist-info/RECORD +40 -0
  29. {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/WHEEL +1 -1
  30. logdetective-2.0.1.dist-info/RECORD +0 -39
  31. {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info}/entry_points.txt +0 -0
  32. {logdetective-2.0.1.dist-info → logdetective-2.11.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  import asyncio
3
3
  import random
4
+ import time
4
5
  from typing import List, Tuple, Dict
5
6
 
6
7
  import backoff
@@ -15,6 +16,7 @@ from logdetective.utils import (
15
16
  compute_certainty,
16
17
  prompt_to_messages,
17
18
  format_snippets,
19
+ mine_logs,
18
20
  )
19
21
  from logdetective.server.config import (
20
22
  LOG,
@@ -33,10 +35,10 @@ from logdetective.server.models import (
33
35
  )
34
36
  from logdetective.server.utils import (
35
37
  format_analyzed_snippets,
36
- mine_logs,
37
38
  should_we_giveup,
38
39
  we_give_up,
39
40
  filter_snippets,
41
+ construct_final_prompt,
40
42
  )
41
43
 
42
44
 
@@ -184,10 +186,13 @@ async def analyze_snippets(
184
186
 
185
187
  async def perfrom_analysis(log_text: str) -> Response:
186
188
  """Sumbit log file snippets in aggregate to LLM and retrieve results"""
187
- log_summary = mine_logs(log_text)
189
+ log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
188
190
  log_summary = format_snippets(log_summary)
191
+
192
+ final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
193
+
189
194
  messages = prompt_to_messages(
190
- PROMPT_CONFIG.prompt_template.format(log_summary),
195
+ final_prompt,
191
196
  PROMPT_CONFIG.default_system_prompt,
192
197
  SERVER_CONFIG.inference.system_role,
193
198
  SERVER_CONFIG.inference.user_role,
@@ -213,10 +218,13 @@ async def perfrom_analysis(log_text: str) -> Response:
213
218
 
214
219
  async def perform_analyis_stream(log_text: str) -> AsyncStream:
215
220
  """Submit log file snippets in aggregate and return a stream of tokens"""
216
- log_summary = mine_logs(log_text)
221
+ log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
217
222
  log_summary = format_snippets(log_summary)
223
+
224
+ final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template)
225
+
218
226
  messages = prompt_to_messages(
219
- PROMPT_CONFIG.prompt_template.format(log_summary),
227
+ final_prompt,
220
228
  PROMPT_CONFIG.default_system_prompt,
221
229
  SERVER_CONFIG.inference.system_role,
222
230
  SERVER_CONFIG.inference.user_role,
@@ -235,8 +243,8 @@ async def perform_analyis_stream(log_text: str) -> AsyncStream:
235
243
 
236
244
  async def perform_staged_analysis(log_text: str) -> StagedResponse:
237
245
  """Submit the log file snippets to the LLM and retrieve their results"""
238
- log_summary = mine_logs(log_text)
239
-
246
+ log_summary = mine_logs(log_text, SERVER_CONFIG.extractor.get_extractors())
247
+ start = time.time()
240
248
  if SERVER_CONFIG.general.top_k_snippets:
241
249
  rated_snippets = await analyze_snippets(
242
250
  log_summary=log_summary,
@@ -265,10 +273,11 @@ async def perform_staged_analysis(log_text: str) -> StagedResponse:
265
273
  AnalyzedSnippet(line_number=e[0][0], text=e[0][1], explanation=e[1])
266
274
  for e in zip(log_summary, processed_snippets)
267
275
  ]
276
+ delta = time.time() - start
277
+ LOG.info("Snippet analysis performed in %f s", delta)
278
+ log_summary = format_analyzed_snippets(processed_snippets)
279
+ final_prompt = construct_final_prompt(log_summary, PROMPT_CONFIG.prompt_template_staged)
268
280
 
269
- final_prompt = PROMPT_CONFIG.prompt_template_staged.format(
270
- format_analyzed_snippets(processed_snippets)
271
- )
272
281
  messages = prompt_to_messages(
273
282
  final_prompt,
274
283
  PROMPT_CONFIG.staged_system_prompt,
@@ -1,4 +1,3 @@
1
- import io
2
1
  import inspect
3
2
  import datetime
4
3
 
@@ -13,14 +12,15 @@ from logdetective.remote_log import RemoteLog
13
12
  from logdetective.server.config import LOG
14
13
  from logdetective.server.compressors import LLMResponseCompressor, RemoteLogCompressor
15
14
  from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
15
+ from logdetective.server.exceptions import LogDetectiveMetricsError
16
16
 
17
17
 
18
18
  async def add_new_metrics(
19
- api_name: str,
19
+ api_name: EndpointType,
20
20
  url: Optional[str] = None,
21
21
  http_session: Optional[aiohttp.ClientSession] = None,
22
22
  received_at: Optional[datetime.datetime] = None,
23
- compressed_log_content: Optional[io.BytesIO] = None,
23
+ compressed_log_content: Optional[bytes] = None,
24
24
  ) -> int:
25
25
  """Add a new database entry for a received request.
26
26
 
@@ -29,10 +29,14 @@ async def add_new_metrics(
29
29
  and the log (in a zip format) for which analysis is requested.
30
30
  """
31
31
  if not compressed_log_content:
32
+ if not (url and http_session):
33
+ raise LogDetectiveMetricsError(
34
+ f"""Remote log can not be retrieved without URL and http session.
35
+ URL: {url}, http session:{http_session}""")
32
36
  remote_log = RemoteLog(url, http_session)
33
37
  compressed_log_content = await RemoteLogCompressor(remote_log).zip_content()
34
38
 
35
- return AnalyzeRequestMetrics.create(
39
+ return await AnalyzeRequestMetrics.create(
36
40
  endpoint=EndpointType(api_name),
37
41
  compressed_log=compressed_log_content,
38
42
  request_received_at=received_at
@@ -41,7 +45,7 @@ async def add_new_metrics(
41
45
  )
42
46
 
43
47
 
44
- def update_metrics(
48
+ async def update_metrics(
45
49
  metrics_id: int,
46
50
  response: Union[models.Response, models.StagedResponse, StreamingResponse],
47
51
  sent_at: Optional[datetime.datetime] = None,
@@ -73,12 +77,12 @@ def update_metrics(
73
77
  response_certainty = (
74
78
  response.response_certainty if hasattr(response, "response_certainty") else None
75
79
  )
76
- AnalyzeRequestMetrics.update(
77
- metrics_id,
78
- response_sent_at,
79
- response_length,
80
- response_certainty,
81
- compressed_response,
80
+ await AnalyzeRequestMetrics.update(
81
+ id_=metrics_id,
82
+ response_sent_at=response_sent_at,
83
+ response_length=response_length,
84
+ response_certainty=response_certainty,
85
+ compressed_response=compressed_response,
82
86
  )
83
87
 
84
88
 
@@ -109,10 +113,11 @@ def track_request(name=None):
109
113
  async def async_decorated_function(*args, **kwargs):
110
114
  log_url = kwargs["build_log"].url
111
115
  metrics_id = await add_new_metrics(
112
- name if name else f.__name__, log_url, kwargs["http_session"]
116
+ api_name=EndpointType(name if name else f.__name__),
117
+ url=log_url, http_session=kwargs["http_session"]
113
118
  )
114
119
  response = await f(*args, **kwargs)
115
- update_metrics(metrics_id, response)
120
+ await update_metrics(metrics_id, response)
116
121
  return response
117
122
 
118
123
  if inspect.iscoroutinefunction(f):
@@ -10,6 +10,7 @@ from pydantic import (
10
10
  field_validator,
11
11
  NonNegativeFloat,
12
12
  HttpUrl,
13
+ PrivateAttr,
13
14
  )
14
15
 
15
16
  import aiohttp
@@ -26,6 +27,9 @@ from logdetective.constants import (
26
27
  USER_ROLE_DEFAULT,
27
28
  )
28
29
 
30
+ from logdetective.extractors import Extractor, DrainExtractor, CSGrepExtractor
31
+ from logdetective.utils import check_csgrep
32
+
29
33
 
30
34
  class BuildLog(BaseModel):
31
35
  """Model of data submitted to API."""
@@ -179,8 +183,9 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
179
183
  http_timeout: float = 5.0
180
184
  user_role: str = USER_ROLE_DEFAULT
181
185
  system_role: str = SYSTEM_ROLE_DEFAULT
182
- _http_session: aiohttp.ClientSession = None
183
- _limiter: AsyncLimiter = AsyncLimiter(LLM_DEFAULT_REQUESTS_PER_MINUTE)
186
+ llm_api_timeout: float = 15.0
187
+ _limiter: AsyncLimiter = PrivateAttr(
188
+ default_factory=lambda: AsyncLimiter(LLM_DEFAULT_REQUESTS_PER_MINUTE))
184
189
 
185
190
  def __init__(self, data: Optional[dict] = None):
186
191
  super().__init__()
@@ -200,42 +205,9 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
200
205
  self._requests_per_minute = data.get(
201
206
  "requests_per_minute", LLM_DEFAULT_REQUESTS_PER_MINUTE
202
207
  )
208
+ self.llm_api_timeout = data.get("llm_api_timeout", 15.0)
203
209
  self._limiter = AsyncLimiter(self._requests_per_minute)
204
210
 
205
- def __del__(self):
206
- # Close connection when this object is destroyed
207
- if self._http_session:
208
- try:
209
- loop = asyncio.get_running_loop()
210
- loop.create_task(self._http_session.close())
211
- except RuntimeError:
212
- # No loop running, so create one to close the session
213
- loop = asyncio.new_event_loop()
214
- loop.run_until_complete(self._http_session.close())
215
- loop.close()
216
- except Exception: # pylint: disable=broad-exception-caught
217
- # We should only get here if we're shutting down, so we don't
218
- # really care if the close() completes cleanly.
219
- pass
220
-
221
- def get_http_session(self):
222
- """Return the internal HTTP session so it can be used to contect the
223
- LLM server. May be used as a context manager."""
224
-
225
- # Create the session on the first attempt. We need to do this "lazily"
226
- # because it needs to happen once the event loop is running, even
227
- # though the initialization itself is synchronous.
228
- if not self._http_session:
229
- self._http_session = aiohttp.ClientSession(
230
- base_url=self.url,
231
- timeout=aiohttp.ClientTimeout(
232
- total=self.http_timeout,
233
- connect=3.07,
234
- ),
235
- )
236
-
237
- return self._http_session
238
-
239
211
  def get_limiter(self):
240
212
  """Return the limiter object so it can be used as a context manager"""
241
213
  return self._limiter
@@ -244,20 +216,59 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
244
216
  class ExtractorConfig(BaseModel):
245
217
  """Model for extractor configuration of logdetective server."""
246
218
 
247
- context: bool = True
248
219
  max_clusters: int = 8
249
220
  verbose: bool = False
250
221
  max_snippet_len: int = 2000
222
+ csgrep: bool = False
223
+
224
+ _extractors: List[Extractor] = PrivateAttr(default_factory=list)
225
+
226
+ def _setup_extractors(self):
227
+ """Initialize extractors with common settings."""
228
+ self._extractors = [
229
+ DrainExtractor(
230
+ verbose=self.verbose,
231
+ max_snippet_len=self.max_snippet_len,
232
+ max_clusters=self.max_clusters,
233
+ )
234
+ ]
235
+
236
+ if self.csgrep:
237
+ self._extractors.append(
238
+ CSGrepExtractor(
239
+ verbose=self.verbose,
240
+ max_snippet_len=self.max_snippet_len,
241
+ )
242
+ )
251
243
 
252
244
  def __init__(self, data: Optional[dict] = None):
253
- super().__init__()
245
+ super().__init__(data=data)
246
+
254
247
  if data is None:
248
+ self._setup_extractors()
255
249
  return
256
250
 
257
- self.context = data.get("context", True)
258
251
  self.max_clusters = data.get("max_clusters", 8)
259
252
  self.verbose = data.get("verbose", False)
260
253
  self.max_snippet_len = data.get("max_snippet_len", 2000)
254
+ self.csgrep = data.get("csgrep", False)
255
+
256
+ self._setup_extractors()
257
+
258
+ def get_extractors(self) -> List[Extractor]:
259
+ """Return list of initialized extractors, each will be applied in turn
260
+ on original log text to retrieve snippets."""
261
+ return self._extractors
262
+
263
+ @field_validator("csgrep", mode="after")
264
+ @classmethod
265
+ def validate_csgrep(cls, value: bool) -> bool:
266
+ """Verify that csgrep is available if requested."""
267
+ if not check_csgrep():
268
+ raise ValueError(
269
+ "Requested csgrep extractor but `csgrep` binary is not in the PATH"
270
+ )
271
+ return value
261
272
 
262
273
 
263
274
  class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attributes
@@ -265,6 +276,7 @@ class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attr
265
276
 
266
277
  name: str = None
267
278
  url: str = None
279
+ # Path to API of the gitlab instance, assuming `url` as prefix.
268
280
  api_path: str = None
269
281
  api_token: str = None
270
282
 
@@ -277,8 +289,8 @@ class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attr
277
289
  webhook_secrets: Optional[List[str]] = None
278
290
 
279
291
  timeout: float = 5.0
280
- _conn: Gitlab = None
281
- _http_session: aiohttp.ClientSession = None
292
+ _conn: Gitlab | None = PrivateAttr(default=None)
293
+ _http_session: aiohttp.ClientSession | None = PrivateAttr(default=None)
282
294
 
283
295
  # Maximum size of artifacts.zip in MiB. (default: 300 MiB)
284
296
  max_artifact_size: int = 300 * 1024 * 1024
@@ -364,8 +376,8 @@ class KojiInstanceConfig(BaseModel):
364
376
  xmlrpc_url: str = ""
365
377
  tokens: List[str] = []
366
378
 
367
- _conn: Optional[koji.ClientSession] = None
368
- _callbacks: defaultdict[int, set[str]] = defaultdict(set)
379
+ _conn: Optional[koji.ClientSession] = PrivateAttr(default=None)
380
+ _callbacks: defaultdict[int, set[str]] = PrivateAttr(default_factory=lambda: defaultdict(set))
369
381
 
370
382
  def __init__(self, name: str, data: Optional[dict] = None):
371
383
  super().__init__()
@@ -455,8 +467,8 @@ class LogConfig(BaseModel):
455
467
  class GeneralConfig(BaseModel):
456
468
  """General config options for Log Detective"""
457
469
 
458
- packages: List[str] = None
459
- excluded_packages: List[str] = None
470
+ packages: List[str] = []
471
+ excluded_packages: List[str] = []
460
472
  devmode: bool = False
461
473
  sentry_dsn: HttpUrl | None = None
462
474
  collect_emojis_interval: int = 60 * 60 # seconds
@@ -483,6 +495,7 @@ class Config(BaseModel):
483
495
  log: LogConfig = LogConfig()
484
496
  inference: InferenceConfig = InferenceConfig()
485
497
  snippet_inference: InferenceConfig = InferenceConfig()
498
+ # TODO(jpodivin): Extend to work with multiple extractor configs
486
499
  extractor: ExtractorConfig = ExtractorConfig()
487
500
  gitlab: GitLabConfig = GitLabConfig()
488
501
  koji: KojiConfig = KojiConfig()
@@ -522,7 +535,8 @@ class TimePeriod(BaseModel):
522
535
  @model_validator(mode="before")
523
536
  @classmethod
524
537
  def check_exclusive_fields(cls, data):
525
- """Check that only one key between weeks, days and hours is defined"""
538
+ """Check that only one key between weeks, days and hours is defined,
539
+ if no period is specified, fall back to 2 days."""
526
540
  if isinstance(data, dict):
527
541
  how_many_fields = sum(
528
542
  1
@@ -548,6 +562,7 @@ class TimePeriod(BaseModel):
548
562
 
549
563
  def get_time_period(self) -> datetime.timedelta:
550
564
  """Get the period of time represented by this input model.
565
+ Will default to 2 days, if no period is set.
551
566
 
552
567
  Returns:
553
568
  datetime.timedelta: The time period as a timedelta object.
@@ -559,10 +574,12 @@ class TimePeriod(BaseModel):
559
574
  delta = datetime.timedelta(days=self.days)
560
575
  elif self.hours:
561
576
  delta = datetime.timedelta(hours=self.hours)
577
+ else:
578
+ delta = datetime.timedelta(days=2)
562
579
  return delta
563
580
 
564
581
  def get_period_start_time(
565
- self, end_time: datetime.datetime = None
582
+ self, end_time: Optional[datetime.datetime] = None
566
583
  ) -> datetime.datetime:
567
584
  """Calculate the start time of this period based on the end time.
568
585
 
@@ -575,5 +592,5 @@ class TimePeriod(BaseModel):
575
592
  """
576
593
  time = end_time or datetime.datetime.now(datetime.timezone.utc)
577
594
  if time.tzinfo is None:
578
- end_time = end_time.replace(tzinfo=datetime.timezone.utc)
595
+ time = time.replace(tzinfo=datetime.timezone.utc)
579
596
  return time - self.get_time_period()
@@ -69,7 +69,7 @@ def create_time_series_arrays(
69
69
  plot_def: Definition,
70
70
  start_time: datetime.datetime,
71
71
  end_time: datetime.datetime,
72
- value_type: Optional[Union[int, float]] = int,
72
+ value_type: Optional[Union[type[int], type[float]]] = int,
73
73
  ) -> tuple[numpy.ndarray, numpy.ndarray]:
74
74
  """Create time series arrays from a dictionary of values.
75
75
 
@@ -163,7 +163,7 @@ def _add_line_chart( # pylint: disable=too-many-arguments disable=too-many-posi
163
163
  ax.tick_params(axis="y", labelcolor=color)
164
164
 
165
165
 
166
- def requests_per_time(
166
+ async def requests_per_time(
167
167
  period_of_time: TimePeriod,
168
168
  endpoint: EndpointType = EndpointType.ANALYZE,
169
169
  end_time: Optional[datetime.datetime] = None,
@@ -191,7 +191,7 @@ def requests_per_time(
191
191
  end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
192
192
  start_time = period_of_time.get_period_start_time(end_time)
193
193
  plot_def = Definition(period_of_time)
194
- requests_counts = AnalyzeRequestMetrics.get_requests_in_period(
194
+ requests_counts = await AnalyzeRequestMetrics.get_requests_in_period(
195
195
  start_time, end_time, plot_def.time_format, endpoint
196
196
  )
197
197
  timestamps, counts = create_time_series_arrays(
@@ -218,7 +218,7 @@ def requests_per_time(
218
218
  return fig
219
219
 
220
220
 
221
- def average_time_per_responses( # pylint: disable=too-many-locals
221
+ async def average_time_per_responses( # pylint: disable=too-many-locals
222
222
  period_of_time: TimePeriod,
223
223
  endpoint: EndpointType = EndpointType.ANALYZE,
224
224
  end_time: Optional[datetime.datetime] = None,
@@ -246,8 +246,10 @@ def average_time_per_responses( # pylint: disable=too-many-locals
246
246
  end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
247
247
  start_time = period_of_time.get_period_start_time(end_time)
248
248
  plot_def = Definition(period_of_time)
249
- responses_average_time = AnalyzeRequestMetrics.get_responses_average_time_in_period(
250
- start_time, end_time, plot_def.time_format, endpoint
249
+ responses_average_time = (
250
+ await AnalyzeRequestMetrics.get_responses_average_time_in_period(
251
+ start_time, end_time, plot_def.time_format, endpoint
252
+ )
251
253
  )
252
254
  timestamps, average_time = create_time_series_arrays(
253
255
  responses_average_time,
@@ -263,7 +265,7 @@ def average_time_per_responses( # pylint: disable=too-many-locals
263
265
  )
264
266
 
265
267
  responses_average_length = (
266
- AnalyzeRequestMetrics.get_responses_average_length_in_period(
268
+ await AnalyzeRequestMetrics.get_responses_average_length_in_period(
267
269
  start_time, end_time, plot_def.time_format, endpoint
268
270
  )
269
271
  )
@@ -292,7 +294,7 @@ def average_time_per_responses( # pylint: disable=too-many-locals
292
294
  return fig
293
295
 
294
296
 
295
- def _collect_emoji_data(
297
+ async def _collect_emoji_data(
296
298
  start_time: datetime.datetime, plot_def: Definition
297
299
  ) -> Dict[str, Dict[datetime.datetime, int]]:
298
300
  """Collect and organize emoji feedback data
@@ -300,7 +302,7 @@ def _collect_emoji_data(
300
302
  Counts all emojis given to logdetective comments created since start_time.
301
303
  Collect counts in time accordingly to the plot definition.
302
304
  """
303
- reactions = Reactions.get_since(start_time)
305
+ reactions = await Reactions.get_since(start_time)
304
306
  reactions_values_dict: Dict[str, Dict] = {}
305
307
  for comment_created_at, reaction in reactions:
306
308
  comment_created_at_formatted = comment_created_at.strptime(
@@ -369,7 +371,7 @@ def _plot_emoji_data( # pylint: disable=too-many-locals
369
371
  return emoji_lines, emoji_labels
370
372
 
371
373
 
372
- def emojis_per_time(
374
+ async def emojis_per_time(
373
375
  period_of_time: TimePeriod,
374
376
  end_time: Optional[datetime.datetime] = None,
375
377
  ) -> figure.Figure:
@@ -395,7 +397,7 @@ def emojis_per_time(
395
397
  plot_def = Definition(period_of_time)
396
398
  end_time = end_time or datetime.datetime.now(datetime.timezone.utc)
397
399
  start_time = period_of_time.get_period_start_time(end_time)
398
- reactions_values_dict = _collect_emoji_data(start_time, plot_def)
400
+ reactions_values_dict = await _collect_emoji_data(start_time, plot_def)
399
401
 
400
402
  fig, ax = pyplot.subplots(figsize=(12, 6))
401
403
 
@@ -67,6 +67,7 @@ from logdetective.server.emoji import (
67
67
  collect_emojis_for_mr,
68
68
  )
69
69
  from logdetective.server.compressors import RemoteLogCompressor
70
+ from logdetective.server.utils import get_version
70
71
 
71
72
 
72
73
  LOG_SOURCE_REQUEST_TIMEOUT = os.environ.get("LOG_SOURCE_REQUEST_TIMEOUT", 60)
@@ -89,7 +90,7 @@ async def lifespan(fapp: FastAPI):
89
90
  )
90
91
 
91
92
  # Ensure that the database is initialized.
92
- logdetective.server.database.base.init()
93
+ await logdetective.server.database.base.init()
93
94
 
94
95
  # Start the background task scheduler for collecting emojis
95
96
  asyncio.create_task(schedule_collect_emojis_task())
@@ -106,37 +107,51 @@ async def get_http_session(request: Request) -> aiohttp.ClientSession:
106
107
  return request.app.http
107
108
 
108
109
 
109
- def requires_token_when_set(authentication: Annotated[str | None, Header()] = None):
110
+ def requires_token_when_set(authorization: Annotated[str | None, Header()] = None):
110
111
  """
111
- FastAPI Depend function that expects a header named Authentication
112
+ FastAPI Depend function that expects a header named Authorization
112
113
 
113
114
  If LOGDETECTIVE_TOKEN env var is set, validate the client-supplied token
114
115
  otherwise ignore it
115
116
  """
116
117
  if not API_TOKEN:
117
- LOG.info("LOGDETECTIVE_TOKEN env var not set, authentication disabled")
118
+ LOG.info("LOGDETECTIVE_TOKEN env var not set, authorization disabled")
118
119
  # no token required, means local dev environment
119
120
  return
120
- token = None
121
- if authentication:
121
+ if authorization:
122
122
  try:
123
- token = authentication.split(" ", 1)[1]
124
- except (ValueError, IndexError):
123
+ token = authorization.split(" ", 1)[1]
124
+ except (ValueError, IndexError) as ex:
125
125
  LOG.warning(
126
- "Authentication header has invalid structure (%s), it should be 'Bearer TOKEN'",
127
- authentication,
126
+ "Authorization header has invalid structure '%s', it should be 'Bearer TOKEN'",
127
+ authorization,
128
128
  )
129
129
  # eat the exception and raise 401 below
130
- token = None
130
+ raise HTTPException(
131
+ status_code=401,
132
+ detail=f"Invalid authorization, HEADER '{authorization}' not valid.",
133
+ ) from ex
131
134
  if token == API_TOKEN:
132
135
  return
133
- LOG.info(
134
- "LOGDETECTIVE_TOKEN env var is set (%s), clien token = %s", API_TOKEN, token
135
- )
136
- raise HTTPException(status_code=401, detail=f"Token {token} not valid.")
137
-
138
-
139
- app = FastAPI(dependencies=[Depends(requires_token_when_set)], lifespan=lifespan)
136
+ LOG.info("Provided token '%s' does not match expected value.", token)
137
+ raise HTTPException(status_code=401, detail=f"Token '{token}' not valid.")
138
+ LOG.error("No authorization header provided but LOGDETECTIVE_TOKEN env var is set")
139
+ raise HTTPException(status_code=401, detail="No token provided.")
140
+
141
+
142
+ app = FastAPI(
143
+ title="Log Detective",
144
+ contact={
145
+ "name": "Log Detective developers",
146
+ "url": "https://github.com/fedora-copr/logdetective",
147
+ "email": "copr-devel@lists.fedorahosted.org"
148
+ },
149
+ license_info={
150
+ "name": "Apache-2.0",
151
+ "url": "https://www.apache.org/licenses/LICENSE-2.0.html",
152
+ },
153
+ version=get_version(),
154
+ dependencies=[Depends(requires_token_when_set)], lifespan=lifespan)
140
155
 
141
156
 
142
157
  @app.post("/analyze", response_model=Response)
@@ -299,16 +314,15 @@ async def analyze_koji_task(task_id: int, koji_instance_config: KojiInstanceConf
299
314
  # to retrieve the metric ID to associate it with the koji task analysis.
300
315
 
301
316
  metrics_id = await add_new_metrics(
302
- "analyze_koji_task",
317
+ EndpointType.ANALYZE_KOJI_TASK,
303
318
  log_text,
304
319
  received_at=datetime.datetime.now(datetime.timezone.utc),
305
320
  compressed_log_content=RemoteLogCompressor.zip_text(log_text),
306
321
  )
307
-
308
322
  # We need to associate the metric ID with the koji task analysis.
309
323
  # This will create the new row without a response, which we will use as
310
324
  # an indicator that the analysis is in progress.
311
- KojiTaskAnalysis.create_or_restart(
325
+ await KojiTaskAnalysis.create_or_restart(
312
326
  koji_instance=koji_instance_config.xmlrpc_url,
313
327
  task_id=task_id,
314
328
  log_file_name=log_file_name,
@@ -317,8 +331,8 @@ async def analyze_koji_task(task_id: int, koji_instance_config: KojiInstanceConf
317
331
 
318
332
  # Now that we have the response, we can update the metrics and mark the
319
333
  # koji task analysis as completed.
320
- update_metrics(metrics_id, response)
321
- KojiTaskAnalysis.add_response(task_id, metrics_id)
334
+ await update_metrics(metrics_id, response)
335
+ await KojiTaskAnalysis.add_response(task_id, metrics_id)
322
336
 
323
337
  # Notify any callbacks that the analysis is complete.
324
338
  for callback in koji_instance_config.get_callbacks(task_id):
@@ -355,6 +369,12 @@ async def async_log(msg):
355
369
  return msg
356
370
 
357
371
 
372
+ @app.get("/version", response_class=BasicResponse)
373
+ async def get_version_wrapper():
374
+ """Get the version of logdetective"""
375
+ return BasicResponse(content=get_version())
376
+
377
+
358
378
  @app.post("/analyze/stream", response_class=StreamingResponse)
359
379
  @track_request()
360
380
  async def analyze_log_stream(
@@ -521,7 +541,7 @@ async def schedule_emoji_collection_for_mr(
521
541
  key = (forge, project_id, mr_iid)
522
542
 
523
543
  # FIXME: Look up the connection from the Forge # pylint: disable=fixme
524
- gitlab_conn = SERVER_CONFIG.gitlab.instances[forge.value]
544
+ gitlab_conn = SERVER_CONFIG.gitlab.instances[forge.value].get_connection()
525
545
 
526
546
  LOG.debug("Looking up emojis for %s, %d, %d", forge, project_id, mr_iid)
527
547
  await collect_emojis_for_mr(project_id, mr_iid, gitlab_conn)
@@ -614,22 +634,24 @@ async def get_metrics(
614
634
  async def handler():
615
635
  """Show statistics for the specified endpoint and plot."""
616
636
  if plot == Plot.REQUESTS:
617
- fig = plot_engine.requests_per_time(period_since_now, endpoint_type)
637
+ fig = await plot_engine.requests_per_time(period_since_now, endpoint_type)
618
638
  return _svg_figure_response(fig)
619
639
  if plot == Plot.RESPONSES:
620
- fig = plot_engine.average_time_per_responses(
640
+ fig = await plot_engine.average_time_per_responses(
621
641
  period_since_now, endpoint_type
622
642
  )
623
643
  return _svg_figure_response(fig)
624
644
  if plot == Plot.EMOJIS:
625
- fig = plot_engine.emojis_per_time(period_since_now)
645
+ fig = await plot_engine.emojis_per_time(period_since_now)
626
646
  return _svg_figure_response(fig)
627
647
  # BOTH
628
- fig_requests = plot_engine.requests_per_time(period_since_now, endpoint_type)
629
- fig_responses = plot_engine.average_time_per_responses(
648
+ fig_requests = await plot_engine.requests_per_time(
649
+ period_since_now, endpoint_type
650
+ )
651
+ fig_responses = await plot_engine.average_time_per_responses(
630
652
  period_since_now, endpoint_type
631
653
  )
632
- fig_emojis = plot_engine.emojis_per_time(period_since_now)
654
+ fig_emojis = await plot_engine.emojis_per_time(period_since_now)
633
655
  return _multiple_svg_figures_response([fig_requests, fig_responses, fig_emojis])
634
656
 
635
657
  descriptions = {