logdetective 0.6.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,30 @@
1
+ import io
1
2
  import enum
2
3
  import datetime
4
+ from typing import Optional, List
5
+
6
+ import backoff
3
7
 
4
- from typing import Optional
5
8
  from sqlalchemy import (
6
9
  Column,
7
10
  Integer,
8
11
  Float,
9
12
  DateTime,
10
- String,
11
13
  Enum,
12
14
  func,
13
15
  select,
14
16
  distinct,
17
+ ForeignKey,
18
+ LargeBinary,
15
19
  )
20
+ from sqlalchemy.orm import relationship, aliased
21
+ from sqlalchemy.exc import OperationalError
16
22
 
17
- from logdetective.server.database.base import Base, transaction
23
+ from logdetective.server.database.base import Base, transaction, DB_MAX_RETRIES
24
+ from logdetective.server.database.models.merge_request_jobs import (
25
+ GitlabMergeRequestJobs,
26
+ Forge,
27
+ )
18
28
 
19
29
 
20
30
  class EndpointType(enum.Enum):
@@ -23,6 +33,7 @@ class EndpointType(enum.Enum):
23
33
  ANALYZE = "analyze_log"
24
34
  ANALYZE_STAGED = "analyze_log_staged"
25
35
  ANALYZE_STREAM = "analyze_log_stream"
36
+ ANALYZE_GITLAB_JOB = "analyze_gitlab_job"
26
37
 
27
38
 
28
39
  class AnalyzeRequestMetrics(Base):
@@ -44,11 +55,17 @@ class AnalyzeRequestMetrics(Base):
44
55
  default=datetime.datetime.now(datetime.timezone.utc),
45
56
  comment="Timestamp when the request was received",
46
57
  )
47
- log_url = Column(
48
- String,
58
+ compressed_log = Column(
59
+ LargeBinary(length=314572800), # 300MB limit (300 * 1024 * 1024)
49
60
  nullable=False,
50
61
  index=False,
51
- comment="Log url for which analysis was requested",
62
+ comment="Log processed, saved in a zip format",
63
+ )
64
+ compressed_response = Column(
65
+ LargeBinary(length=314572800), # 300MB limit (300 * 1024 * 1024)
66
+ nullable=True,
67
+ index=False,
68
+ comment="Given response (with explanation and snippets) saved in a zip format",
52
69
  )
53
70
  response_sent_at = Column(
54
71
  DateTime, nullable=True, comment="Timestamp when the response was sent back"
@@ -60,11 +77,22 @@ class AnalyzeRequestMetrics(Base):
60
77
  Float, nullable=True, comment="Certainty for generated response"
61
78
  )
62
79
 
80
+ merge_request_job_id = Column(
81
+ Integer,
82
+ ForeignKey("gitlab_merge_request_jobs.id"),
83
+ nullable=True,
84
+ index=False,
85
+ comment="Is this an analyze request coming from a merge request?",
86
+ )
87
+
88
+ mr_job = relationship("GitlabMergeRequestJobs", back_populates="request_metrics")
89
+
63
90
  @classmethod
91
+ @backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
64
92
  def create(
65
93
  cls,
66
94
  endpoint: EndpointType,
67
- log_url: str,
95
+ compressed_log: io.BytesIO,
68
96
  request_received_at: Optional[datetime.datetime] = None,
69
97
  ) -> int:
70
98
  """Create AnalyzeRequestMetrics new line
@@ -72,31 +100,98 @@ class AnalyzeRequestMetrics(Base):
72
100
  with transaction(commit=True) as session:
73
101
  metrics = AnalyzeRequestMetrics()
74
102
  metrics.endpoint = endpoint
103
+ metrics.compressed_log = compressed_log
75
104
  metrics.request_received_at = request_received_at or datetime.datetime.now(
76
105
  datetime.timezone.utc
77
106
  )
78
- metrics.log_url = log_url
79
107
  session.add(metrics)
80
108
  session.flush()
81
109
  return metrics.id
82
110
 
83
111
  @classmethod
84
- def update(
112
+ @backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
113
+ def update( # pylint: disable=too-many-arguments disable=too-many-positional-arguments
85
114
  cls,
86
115
  id_: int,
87
116
  response_sent_at: datetime,
88
117
  response_length: int,
89
118
  response_certainty: float,
119
+ compressed_response: bytes,
90
120
  ) -> None:
91
- """Update an AnalyzeRequestMetrics line
121
+ """Update a row
92
122
  with data related to the given response"""
93
123
  with transaction(commit=True) as session:
94
124
  metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
95
125
  metrics.response_sent_at = response_sent_at
96
126
  metrics.response_length = response_length
97
127
  metrics.response_certainty = response_certainty
128
+ metrics.compressed_response = compressed_response
98
129
  session.add(metrics)
99
130
 
131
+ @classmethod
132
+ @backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
133
+ def get_metric_by_id(
134
+ cls,
135
+ id_: int,
136
+ ) -> "AnalyzeRequestMetrics":
137
+ """Update a row
138
+ with data related to the given response"""
139
+ with transaction(commit=True) as session:
140
+ metric = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
141
+ return metric
142
+
143
+ def add_mr_job(
144
+ self,
145
+ forge: Forge,
146
+ project_id: int,
147
+ mr_iid: int,
148
+ job_id: int,
149
+ ) -> None:
150
+ """This request was triggered by a merge request job.
151
+ Link it.
152
+
153
+ Args:
154
+ forge: forge name
155
+ project_id: forge project id
156
+ mr_iid: merge request forge iid
157
+ job_id: forge job id
158
+ """
159
+ mr_job = GitlabMergeRequestJobs.get_or_create(forge, project_id, mr_iid, job_id)
160
+ self.merge_request_job_id = mr_job.id
161
+ with transaction(commit=True) as session:
162
+ session.merge(self)
163
+
164
+ @classmethod
165
+ def get_requests_metrics_for_mr_job(
166
+ cls,
167
+ forge: Forge,
168
+ project_id: int,
169
+ mr_iid: int,
170
+ job_id: int,
171
+ ) -> List["AnalyzeRequestMetrics"]:
172
+ """Search for all requests triggered by the specified merge request job.
173
+
174
+ Args:
175
+ forge: forge name
176
+ project_id: forge project id
177
+ mr_iid: merge request forge iid
178
+ job_id: forge job id
179
+ """
180
+ with transaction(commit=False) as session:
181
+ mr_job_alias = aliased(GitlabMergeRequestJobs)
182
+ metrics = (
183
+ session.query(cls)
184
+ .join(mr_job_alias, cls.merge_request_job_id == mr_job_alias.id)
185
+ .filter(
186
+ mr_job_alias.forge == forge,
187
+ mr_job_alias.mr_iid == mr_iid,
188
+ mr_job_alias.project_id == project_id,
189
+ mr_job_alias.job_id == job_id,
190
+ )
191
+ .all()
192
+ )
193
+ return metrics
194
+
100
195
  @classmethod
101
196
  def get_postgres_time_format(cls, time_format):
102
197
  """Map python time format in the PostgreSQL format."""
@@ -140,31 +235,6 @@ class AnalyzeRequestMetrics(Base):
140
235
  )
141
236
  return requests_by_time_format
142
237
 
143
- @classmethod
144
- def _get_requests_by_time_for_sqlite(
145
- cls, start_time, end_time, time_format, endpoint
146
- ):
147
- """Get total requests number in time period.
148
-
149
- func.strftime is SQLite specific.
150
- Use this function in unit test using flexmock:
151
-
152
- flexmock(AnalyzeRequestMetrics).should_receive("_get_requests_by_time_for_postgres")
153
- .replace_with(AnalyzeRequestMetrics._get_requests_by_time_for_sqllite)
154
- """
155
- requests_by_time_format = (
156
- select(
157
- cls.id,
158
- func.strftime(time_format, cls.request_received_at).label(
159
- "time_format"
160
- ),
161
- )
162
- .filter(cls.request_received_at.between(start_time, end_time))
163
- .filter(cls.endpoint == endpoint)
164
- .cte("requests_by_time_format")
165
- )
166
- return requests_by_time_format
167
-
168
238
  @classmethod
169
239
  def get_requests_in_period(
170
240
  cls,
@@ -234,41 +304,6 @@ class AnalyzeRequestMetrics(Base):
234
304
  results = session.execute(average_responses_times).fetchall()
235
305
  return results
236
306
 
237
- @classmethod
238
- def _get_average_responses_times_for_sqlite(
239
- cls, start_time, end_time, time_format, endpoint
240
- ):
241
- """Get average responses time.
242
-
243
- func.strftime is SQLite specific.
244
- Use this function in unit test using flexmock:
245
-
246
- flexmock(AnalyzeRequestMetrics).should_receive("_get_average_responses_times_for_postgres")
247
- .replace_with(AnalyzeRequestMetrics._get_average_responses_times_for_sqlite)
248
- """
249
- with transaction(commit=False) as session:
250
- average_responses_times = (
251
- select(
252
- func.strftime(time_format, cls.request_received_at).label(
253
- "time_range"
254
- ),
255
- (
256
- func.avg(
257
- func.julianday(cls.response_sent_at)
258
- - func.julianday(cls.request_received_at) # noqa: W503 flake8 vs ruff
259
- )
260
- * 86400 # noqa: W503 flake8 vs ruff
261
- ).label("average_response_seconds"),
262
- )
263
- .filter(cls.request_received_at.between(start_time, end_time))
264
- .filter(cls.endpoint == endpoint)
265
- .group_by("time_range")
266
- .order_by("time_range")
267
- )
268
-
269
- results = session.execute(average_responses_times).fetchall()
270
- return results
271
-
272
307
  @classmethod
273
308
  def get_responses_average_time_in_period(
274
309
  cls,
@@ -328,36 +363,6 @@ class AnalyzeRequestMetrics(Base):
328
363
  results = session.execute(average_responses_lengths).fetchall()
329
364
  return results
330
365
 
331
- @classmethod
332
- def _get_average_responses_lengths_for_sqlite(
333
- cls, start_time, end_time, time_format, endpoint
334
- ):
335
- """Get average responses length.
336
-
337
- func.strftime is SQLite specific.
338
- Use this function in unit test using flexmock:
339
-
340
- flexmock(AnalyzeRequestMetrics)
341
- .should_receive("_get_average_responses_lengths_for_postgres")
342
- .replace_with(AnalyzeRequestMetrics._get_average_responses_lengths_for_sqlite)
343
- """
344
- with transaction(commit=False) as session:
345
- average_responses_lengths = (
346
- select(
347
- func.strftime(time_format, cls.request_received_at).label(
348
- "time_range"
349
- ),
350
- (func.avg(cls.response_length)).label("average_responses_length"),
351
- )
352
- .filter(cls.request_received_at.between(start_time, end_time))
353
- .filter(cls.endpoint == endpoint)
354
- .group_by("time_range")
355
- .order_by("time_range")
356
- )
357
-
358
- results = session.execute(average_responses_lengths).fetchall()
359
- return results
360
-
361
366
  @classmethod
362
367
  def get_responses_average_length_in_period(
363
368
  cls,
@@ -1,25 +1,40 @@
1
- import datetime
1
+ import io
2
2
  import inspect
3
+ import logging
4
+ import datetime
5
+
3
6
  from typing import Union
4
7
  from functools import wraps
5
8
 
9
+ import aiohttp
10
+
6
11
  from starlette.responses import StreamingResponse
7
12
  from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
13
+ from logdetective.server.remote_log import RemoteLog
8
14
  from logdetective.server import models
15
+ from logdetective.server.compressors import LLMResponseCompressor
16
+
17
+ LOG = logging.getLogger("logdetective")
9
18
 
10
19
 
11
- def add_new_metrics(
12
- api_name: str, build_log: models.BuildLog, received_at: datetime.datetime = None
20
+ async def add_new_metrics(
21
+ api_name: str,
22
+ url: str,
23
+ http_session: aiohttp.ClientSession,
24
+ received_at: datetime.datetime = None,
25
+ compressed_log_content: io.BytesIO = None,
13
26
  ) -> int:
14
27
  """Add a new database entry for a received request.
15
28
 
16
29
  This will store the time when this function is called,
17
30
  the endpoint from where the request was received,
18
- and the log for which analysis is requested.
31
+ and the log (in a zip format) for which analysis is requested.
19
32
  """
33
+ remote_log = RemoteLog(url, http_session)
34
+ compressed_log_content = compressed_log_content or await remote_log.zip_content()
20
35
  return AnalyzeRequestMetrics.create(
21
36
  endpoint=EndpointType(api_name),
22
- log_url=build_log.url,
37
+ compressed_log=compressed_log_content,
23
38
  request_received_at=received_at
24
39
  if received_at
25
40
  else datetime.datetime.now(datetime.timezone.utc),
@@ -37,6 +52,15 @@ def update_metrics(
37
52
  This will add to the database entry the time when the response was sent,
38
53
  the length of the created response and the certainty for it.
39
54
  """
55
+ try:
56
+ compressed_response = LLMResponseCompressor(response).zip_response()
57
+ except AttributeError as e:
58
+ compressed_response = None
59
+ LOG.warning(
60
+ "Given response can not be serialized "
61
+ "and saved in db (probably a StreamingResponse): %s.", e
62
+ )
63
+
40
64
  response_sent_at = (
41
65
  sent_at if sent_at else datetime.datetime.now(datetime.timezone.utc)
42
66
  )
@@ -49,11 +73,15 @@ def update_metrics(
49
73
  response.response_certainty if hasattr(response, "response_certainty") else None
50
74
  )
51
75
  AnalyzeRequestMetrics.update(
52
- metrics_id, response_sent_at, response_length, response_certainty
76
+ metrics_id,
77
+ response_sent_at,
78
+ response_length,
79
+ response_certainty,
80
+ compressed_response,
53
81
  )
54
82
 
55
83
 
56
- def track_request():
84
+ def track_request(name=None):
57
85
  """
58
86
  Decorator to track requests metrics
59
87
  """
@@ -61,20 +89,16 @@ def track_request():
61
89
  def decorator(f):
62
90
  @wraps(f)
63
91
  async def async_decorated_function(*args, **kwargs):
64
- metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
92
+ log_url = kwargs["build_log"].url
93
+ metrics_id = await add_new_metrics(
94
+ name if name else f.__name__, log_url, kwargs["http_session"]
95
+ )
65
96
  response = await f(*args, **kwargs)
66
97
  update_metrics(metrics_id, response)
67
98
  return response
68
99
 
69
- @wraps(f)
70
- def sync_decorated_function(*args, **kwargs):
71
- metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
72
- response = f(*args, **kwargs)
73
- update_metrics(metrics_id, response)
74
- return response
75
-
76
100
  if inspect.iscoroutinefunction(f):
77
101
  return async_decorated_function
78
- return sync_decorated_function
102
+ raise NotImplementedError("An async coroutine is needed")
79
103
 
80
104
  return decorator
@@ -1,9 +1,14 @@
1
1
  import datetime
2
2
  from logging import BASIC_FORMAT
3
3
  from typing import List, Dict, Optional, Literal
4
-
5
- from pydantic import BaseModel, Field, model_validator, field_validator, NonNegativeFloat
6
-
4
+ from pydantic import (
5
+ BaseModel,
6
+ Field,
7
+ model_validator,
8
+ field_validator,
9
+ NonNegativeFloat,
10
+ HttpUrl,
11
+ )
7
12
  from logdetective.constants import DEFAULT_TEMPERATURE
8
13
 
9
14
 
@@ -177,6 +182,8 @@ class GeneralConfig(BaseModel):
177
182
  """General config options for Log Detective"""
178
183
 
179
184
  packages: List[str] = None
185
+ devmode: bool = False
186
+ sentry_dsn: HttpUrl | None = None
180
187
 
181
188
  def __init__(self, data: Optional[dict] = None):
182
189
  super().__init__()
@@ -184,6 +191,8 @@ class GeneralConfig(BaseModel):
184
191
  return
185
192
 
186
193
  self.packages = data.get("packages", [])
194
+ self.devmode = data.get("devmode", False)
195
+ self.sentry_dsn = data.get("sentry_dsn")
187
196
 
188
197
 
189
198
  class Config(BaseModel):
@@ -0,0 +1,109 @@
1
+ import io
2
+ import logging
3
+ from typing import Union
4
+ from urllib.parse import urlparse
5
+
6
+ import aiohttp
7
+
8
+ from logdetective.server.compressors import TextCompressor
9
+
10
+
11
+ LOG = logging.getLogger("logdetective")
12
+
13
+
14
+ class RemoteLog:
15
+ """
16
+ Handles retrieval and compression of remote log files.
17
+ """
18
+
19
+ LOG_FILE_NAME = "log.txt"
20
+ COMPRESSOR = TextCompressor()
21
+
22
+ def __init__(self, url: str, http_session: aiohttp.ClientSession):
23
+ """
24
+ Initialize with a remote log URL and HTTP session.
25
+
26
+ Args:
27
+ url: A remote URL pointing to a log file
28
+ http_session: The HTTP session used to retrieve the remote file
29
+ """
30
+ self._url = url
31
+ self._http_session = http_session
32
+
33
+ @property
34
+ def url(self) -> str:
35
+ """The remote log url."""
36
+ return self._url
37
+
38
+ @property
39
+ async def content(self) -> str:
40
+ """Content of the url."""
41
+ return await self.get_url_content()
42
+
43
+ @classmethod
44
+ def zip_text(cls, text: str) -> bytes:
45
+ """
46
+ Compress the given text.
47
+
48
+ Returns:
49
+ bytes: Compressed text
50
+ """
51
+ return cls.COMPRESSOR.zip({cls.LOG_FILE_NAME: text})
52
+
53
+ async def zip_content(self) -> bytes:
54
+ """
55
+ Compress the content of the remote log.
56
+
57
+ Returns:
58
+ bytes: Compressed log content
59
+ """
60
+ content_text = await self.content
61
+ return self.zip_text(content_text)
62
+
63
+ @classmethod
64
+ def unzip(cls, zip_data: Union[bytes, io.BytesIO]) -> str:
65
+ """
66
+ Uncompress the zipped content of the remote log.
67
+
68
+ Args:
69
+ zip_data: Compressed data as bytes or BytesIO
70
+
71
+ Returns:
72
+ str: The decompressed log content
73
+ """
74
+ return cls.COMPRESSOR.unzip(zip_data)[cls.LOG_FILE_NAME]
75
+
76
+ def validate_url(self) -> bool:
77
+ """Validate incoming URL to be at least somewhat sensible for log files
78
+ Only http and https protocols permitted. No result, params or query fields allowed.
79
+ Either netloc or path must have non-zero length.
80
+ """
81
+ result = urlparse(self.url)
82
+ if result.scheme not in ["http", "https"]:
83
+ return False
84
+ if any([result.params, result.query, result.fragment]):
85
+ return False
86
+ if not (result.path or result.netloc):
87
+ return False
88
+ return True
89
+
90
+ async def get_url_content(self) -> str:
91
+ """validate log url and return log text."""
92
+ if self.validate_url():
93
+ LOG.debug("process url %s", self.url)
94
+ try:
95
+ response = await self._http_session.get(self.url, raise_for_status=True)
96
+ except aiohttp.ClientResponseError as ex:
97
+ raise RuntimeError(f"We couldn't obtain the logs: {ex}") from ex
98
+ return await response.text()
99
+ LOG.error("Invalid URL received ")
100
+ raise RuntimeError(f"Invalid log URL: {self.url}")
101
+
102
+ async def process_url(self) -> str:
103
+ """Validate log URL and return log text."""
104
+ try:
105
+ return await self.get_url_content()
106
+ except RuntimeError as ex:
107
+ raise aiohttp.HTTPException(
108
+ status_code=400, detail=f"We couldn't obtain the logs: {ex}"
109
+ ) from ex