logdetective 0.6.0__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logdetective/prompts.yml +4 -4
- logdetective/server/compressors.py +144 -0
- logdetective/server/database/base.py +3 -0
- logdetective/server/database/models/__init__.py +21 -0
- logdetective/server/database/models/merge_request_jobs.py +515 -0
- logdetective/server/database/{models.py → models/metrics.py} +105 -100
- logdetective/server/metric.py +40 -16
- logdetective/server/models.py +12 -3
- logdetective/server/remote_log.py +109 -0
- logdetective/server/server.py +287 -136
- logdetective/utils.py +9 -37
- {logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/METADATA +11 -6
- logdetective-0.9.1.dist-info/RECORD +28 -0
- {logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/WHEEL +1 -1
- logdetective-0.6.0.dist-info/RECORD +0 -24
- {logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/LICENSE +0 -0
- {logdetective-0.6.0.dist-info → logdetective-0.9.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,20 +1,30 @@
|
|
|
1
|
+
import io
|
|
1
2
|
import enum
|
|
2
3
|
import datetime
|
|
4
|
+
from typing import Optional, List
|
|
5
|
+
|
|
6
|
+
import backoff
|
|
3
7
|
|
|
4
|
-
from typing import Optional
|
|
5
8
|
from sqlalchemy import (
|
|
6
9
|
Column,
|
|
7
10
|
Integer,
|
|
8
11
|
Float,
|
|
9
12
|
DateTime,
|
|
10
|
-
String,
|
|
11
13
|
Enum,
|
|
12
14
|
func,
|
|
13
15
|
select,
|
|
14
16
|
distinct,
|
|
17
|
+
ForeignKey,
|
|
18
|
+
LargeBinary,
|
|
15
19
|
)
|
|
20
|
+
from sqlalchemy.orm import relationship, aliased
|
|
21
|
+
from sqlalchemy.exc import OperationalError
|
|
16
22
|
|
|
17
|
-
from logdetective.server.database.base import Base, transaction
|
|
23
|
+
from logdetective.server.database.base import Base, transaction, DB_MAX_RETRIES
|
|
24
|
+
from logdetective.server.database.models.merge_request_jobs import (
|
|
25
|
+
GitlabMergeRequestJobs,
|
|
26
|
+
Forge,
|
|
27
|
+
)
|
|
18
28
|
|
|
19
29
|
|
|
20
30
|
class EndpointType(enum.Enum):
|
|
@@ -23,6 +33,7 @@ class EndpointType(enum.Enum):
|
|
|
23
33
|
ANALYZE = "analyze_log"
|
|
24
34
|
ANALYZE_STAGED = "analyze_log_staged"
|
|
25
35
|
ANALYZE_STREAM = "analyze_log_stream"
|
|
36
|
+
ANALYZE_GITLAB_JOB = "analyze_gitlab_job"
|
|
26
37
|
|
|
27
38
|
|
|
28
39
|
class AnalyzeRequestMetrics(Base):
|
|
@@ -44,11 +55,17 @@ class AnalyzeRequestMetrics(Base):
|
|
|
44
55
|
default=datetime.datetime.now(datetime.timezone.utc),
|
|
45
56
|
comment="Timestamp when the request was received",
|
|
46
57
|
)
|
|
47
|
-
|
|
48
|
-
|
|
58
|
+
compressed_log = Column(
|
|
59
|
+
LargeBinary(length=314572800), # 300MB limit (300 * 1024 * 1024)
|
|
49
60
|
nullable=False,
|
|
50
61
|
index=False,
|
|
51
|
-
comment="Log
|
|
62
|
+
comment="Log processed, saved in a zip format",
|
|
63
|
+
)
|
|
64
|
+
compressed_response = Column(
|
|
65
|
+
LargeBinary(length=314572800), # 300MB limit (300 * 1024 * 1024)
|
|
66
|
+
nullable=True,
|
|
67
|
+
index=False,
|
|
68
|
+
comment="Given response (with explanation and snippets) saved in a zip format",
|
|
52
69
|
)
|
|
53
70
|
response_sent_at = Column(
|
|
54
71
|
DateTime, nullable=True, comment="Timestamp when the response was sent back"
|
|
@@ -60,11 +77,22 @@ class AnalyzeRequestMetrics(Base):
|
|
|
60
77
|
Float, nullable=True, comment="Certainty for generated response"
|
|
61
78
|
)
|
|
62
79
|
|
|
80
|
+
merge_request_job_id = Column(
|
|
81
|
+
Integer,
|
|
82
|
+
ForeignKey("gitlab_merge_request_jobs.id"),
|
|
83
|
+
nullable=True,
|
|
84
|
+
index=False,
|
|
85
|
+
comment="Is this an analyze request coming from a merge request?",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
mr_job = relationship("GitlabMergeRequestJobs", back_populates="request_metrics")
|
|
89
|
+
|
|
63
90
|
@classmethod
|
|
91
|
+
@backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
|
|
64
92
|
def create(
|
|
65
93
|
cls,
|
|
66
94
|
endpoint: EndpointType,
|
|
67
|
-
|
|
95
|
+
compressed_log: io.BytesIO,
|
|
68
96
|
request_received_at: Optional[datetime.datetime] = None,
|
|
69
97
|
) -> int:
|
|
70
98
|
"""Create AnalyzeRequestMetrics new line
|
|
@@ -72,31 +100,98 @@ class AnalyzeRequestMetrics(Base):
|
|
|
72
100
|
with transaction(commit=True) as session:
|
|
73
101
|
metrics = AnalyzeRequestMetrics()
|
|
74
102
|
metrics.endpoint = endpoint
|
|
103
|
+
metrics.compressed_log = compressed_log
|
|
75
104
|
metrics.request_received_at = request_received_at or datetime.datetime.now(
|
|
76
105
|
datetime.timezone.utc
|
|
77
106
|
)
|
|
78
|
-
metrics.log_url = log_url
|
|
79
107
|
session.add(metrics)
|
|
80
108
|
session.flush()
|
|
81
109
|
return metrics.id
|
|
82
110
|
|
|
83
111
|
@classmethod
|
|
84
|
-
|
|
112
|
+
@backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
|
|
113
|
+
def update( # pylint: disable=too-many-arguments disable=too-many-positional-arguments
|
|
85
114
|
cls,
|
|
86
115
|
id_: int,
|
|
87
116
|
response_sent_at: datetime,
|
|
88
117
|
response_length: int,
|
|
89
118
|
response_certainty: float,
|
|
119
|
+
compressed_response: bytes,
|
|
90
120
|
) -> None:
|
|
91
|
-
"""Update
|
|
121
|
+
"""Update a row
|
|
92
122
|
with data related to the given response"""
|
|
93
123
|
with transaction(commit=True) as session:
|
|
94
124
|
metrics = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
|
|
95
125
|
metrics.response_sent_at = response_sent_at
|
|
96
126
|
metrics.response_length = response_length
|
|
97
127
|
metrics.response_certainty = response_certainty
|
|
128
|
+
metrics.compressed_response = compressed_response
|
|
98
129
|
session.add(metrics)
|
|
99
130
|
|
|
131
|
+
@classmethod
|
|
132
|
+
@backoff.on_exception(backoff.expo, OperationalError, max_tries=DB_MAX_RETRIES)
|
|
133
|
+
def get_metric_by_id(
|
|
134
|
+
cls,
|
|
135
|
+
id_: int,
|
|
136
|
+
) -> "AnalyzeRequestMetrics":
|
|
137
|
+
"""Update a row
|
|
138
|
+
with data related to the given response"""
|
|
139
|
+
with transaction(commit=True) as session:
|
|
140
|
+
metric = session.query(AnalyzeRequestMetrics).filter_by(id=id_).first()
|
|
141
|
+
return metric
|
|
142
|
+
|
|
143
|
+
def add_mr_job(
|
|
144
|
+
self,
|
|
145
|
+
forge: Forge,
|
|
146
|
+
project_id: int,
|
|
147
|
+
mr_iid: int,
|
|
148
|
+
job_id: int,
|
|
149
|
+
) -> None:
|
|
150
|
+
"""This request was triggered by a merge request job.
|
|
151
|
+
Link it.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
forge: forge name
|
|
155
|
+
project_id: forge project id
|
|
156
|
+
mr_iid: merge request forge iid
|
|
157
|
+
job_id: forge job id
|
|
158
|
+
"""
|
|
159
|
+
mr_job = GitlabMergeRequestJobs.get_or_create(forge, project_id, mr_iid, job_id)
|
|
160
|
+
self.merge_request_job_id = mr_job.id
|
|
161
|
+
with transaction(commit=True) as session:
|
|
162
|
+
session.merge(self)
|
|
163
|
+
|
|
164
|
+
@classmethod
|
|
165
|
+
def get_requests_metrics_for_mr_job(
|
|
166
|
+
cls,
|
|
167
|
+
forge: Forge,
|
|
168
|
+
project_id: int,
|
|
169
|
+
mr_iid: int,
|
|
170
|
+
job_id: int,
|
|
171
|
+
) -> List["AnalyzeRequestMetrics"]:
|
|
172
|
+
"""Search for all requests triggered by the specified merge request job.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
forge: forge name
|
|
176
|
+
project_id: forge project id
|
|
177
|
+
mr_iid: merge request forge iid
|
|
178
|
+
job_id: forge job id
|
|
179
|
+
"""
|
|
180
|
+
with transaction(commit=False) as session:
|
|
181
|
+
mr_job_alias = aliased(GitlabMergeRequestJobs)
|
|
182
|
+
metrics = (
|
|
183
|
+
session.query(cls)
|
|
184
|
+
.join(mr_job_alias, cls.merge_request_job_id == mr_job_alias.id)
|
|
185
|
+
.filter(
|
|
186
|
+
mr_job_alias.forge == forge,
|
|
187
|
+
mr_job_alias.mr_iid == mr_iid,
|
|
188
|
+
mr_job_alias.project_id == project_id,
|
|
189
|
+
mr_job_alias.job_id == job_id,
|
|
190
|
+
)
|
|
191
|
+
.all()
|
|
192
|
+
)
|
|
193
|
+
return metrics
|
|
194
|
+
|
|
100
195
|
@classmethod
|
|
101
196
|
def get_postgres_time_format(cls, time_format):
|
|
102
197
|
"""Map python time format in the PostgreSQL format."""
|
|
@@ -140,31 +235,6 @@ class AnalyzeRequestMetrics(Base):
|
|
|
140
235
|
)
|
|
141
236
|
return requests_by_time_format
|
|
142
237
|
|
|
143
|
-
@classmethod
|
|
144
|
-
def _get_requests_by_time_for_sqlite(
|
|
145
|
-
cls, start_time, end_time, time_format, endpoint
|
|
146
|
-
):
|
|
147
|
-
"""Get total requests number in time period.
|
|
148
|
-
|
|
149
|
-
func.strftime is SQLite specific.
|
|
150
|
-
Use this function in unit test using flexmock:
|
|
151
|
-
|
|
152
|
-
flexmock(AnalyzeRequestMetrics).should_receive("_get_requests_by_time_for_postgres")
|
|
153
|
-
.replace_with(AnalyzeRequestMetrics._get_requests_by_time_for_sqllite)
|
|
154
|
-
"""
|
|
155
|
-
requests_by_time_format = (
|
|
156
|
-
select(
|
|
157
|
-
cls.id,
|
|
158
|
-
func.strftime(time_format, cls.request_received_at).label(
|
|
159
|
-
"time_format"
|
|
160
|
-
),
|
|
161
|
-
)
|
|
162
|
-
.filter(cls.request_received_at.between(start_time, end_time))
|
|
163
|
-
.filter(cls.endpoint == endpoint)
|
|
164
|
-
.cte("requests_by_time_format")
|
|
165
|
-
)
|
|
166
|
-
return requests_by_time_format
|
|
167
|
-
|
|
168
238
|
@classmethod
|
|
169
239
|
def get_requests_in_period(
|
|
170
240
|
cls,
|
|
@@ -234,41 +304,6 @@ class AnalyzeRequestMetrics(Base):
|
|
|
234
304
|
results = session.execute(average_responses_times).fetchall()
|
|
235
305
|
return results
|
|
236
306
|
|
|
237
|
-
@classmethod
|
|
238
|
-
def _get_average_responses_times_for_sqlite(
|
|
239
|
-
cls, start_time, end_time, time_format, endpoint
|
|
240
|
-
):
|
|
241
|
-
"""Get average responses time.
|
|
242
|
-
|
|
243
|
-
func.strftime is SQLite specific.
|
|
244
|
-
Use this function in unit test using flexmock:
|
|
245
|
-
|
|
246
|
-
flexmock(AnalyzeRequestMetrics).should_receive("_get_average_responses_times_for_postgres")
|
|
247
|
-
.replace_with(AnalyzeRequestMetrics._get_average_responses_times_for_sqlite)
|
|
248
|
-
"""
|
|
249
|
-
with transaction(commit=False) as session:
|
|
250
|
-
average_responses_times = (
|
|
251
|
-
select(
|
|
252
|
-
func.strftime(time_format, cls.request_received_at).label(
|
|
253
|
-
"time_range"
|
|
254
|
-
),
|
|
255
|
-
(
|
|
256
|
-
func.avg(
|
|
257
|
-
func.julianday(cls.response_sent_at)
|
|
258
|
-
- func.julianday(cls.request_received_at) # noqa: W503 flake8 vs ruff
|
|
259
|
-
)
|
|
260
|
-
* 86400 # noqa: W503 flake8 vs ruff
|
|
261
|
-
).label("average_response_seconds"),
|
|
262
|
-
)
|
|
263
|
-
.filter(cls.request_received_at.between(start_time, end_time))
|
|
264
|
-
.filter(cls.endpoint == endpoint)
|
|
265
|
-
.group_by("time_range")
|
|
266
|
-
.order_by("time_range")
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
results = session.execute(average_responses_times).fetchall()
|
|
270
|
-
return results
|
|
271
|
-
|
|
272
307
|
@classmethod
|
|
273
308
|
def get_responses_average_time_in_period(
|
|
274
309
|
cls,
|
|
@@ -328,36 +363,6 @@ class AnalyzeRequestMetrics(Base):
|
|
|
328
363
|
results = session.execute(average_responses_lengths).fetchall()
|
|
329
364
|
return results
|
|
330
365
|
|
|
331
|
-
@classmethod
|
|
332
|
-
def _get_average_responses_lengths_for_sqlite(
|
|
333
|
-
cls, start_time, end_time, time_format, endpoint
|
|
334
|
-
):
|
|
335
|
-
"""Get average responses length.
|
|
336
|
-
|
|
337
|
-
func.strftime is SQLite specific.
|
|
338
|
-
Use this function in unit test using flexmock:
|
|
339
|
-
|
|
340
|
-
flexmock(AnalyzeRequestMetrics)
|
|
341
|
-
.should_receive("_get_average_responses_lengths_for_postgres")
|
|
342
|
-
.replace_with(AnalyzeRequestMetrics._get_average_responses_lengths_for_sqlite)
|
|
343
|
-
"""
|
|
344
|
-
with transaction(commit=False) as session:
|
|
345
|
-
average_responses_lengths = (
|
|
346
|
-
select(
|
|
347
|
-
func.strftime(time_format, cls.request_received_at).label(
|
|
348
|
-
"time_range"
|
|
349
|
-
),
|
|
350
|
-
(func.avg(cls.response_length)).label("average_responses_length"),
|
|
351
|
-
)
|
|
352
|
-
.filter(cls.request_received_at.between(start_time, end_time))
|
|
353
|
-
.filter(cls.endpoint == endpoint)
|
|
354
|
-
.group_by("time_range")
|
|
355
|
-
.order_by("time_range")
|
|
356
|
-
)
|
|
357
|
-
|
|
358
|
-
results = session.execute(average_responses_lengths).fetchall()
|
|
359
|
-
return results
|
|
360
|
-
|
|
361
366
|
@classmethod
|
|
362
367
|
def get_responses_average_length_in_period(
|
|
363
368
|
cls,
|
logdetective/server/metric.py
CHANGED
|
@@ -1,25 +1,40 @@
|
|
|
1
|
-
import
|
|
1
|
+
import io
|
|
2
2
|
import inspect
|
|
3
|
+
import logging
|
|
4
|
+
import datetime
|
|
5
|
+
|
|
3
6
|
from typing import Union
|
|
4
7
|
from functools import wraps
|
|
5
8
|
|
|
9
|
+
import aiohttp
|
|
10
|
+
|
|
6
11
|
from starlette.responses import StreamingResponse
|
|
7
12
|
from logdetective.server.database.models import EndpointType, AnalyzeRequestMetrics
|
|
13
|
+
from logdetective.server.remote_log import RemoteLog
|
|
8
14
|
from logdetective.server import models
|
|
15
|
+
from logdetective.server.compressors import LLMResponseCompressor
|
|
16
|
+
|
|
17
|
+
LOG = logging.getLogger("logdetective")
|
|
9
18
|
|
|
10
19
|
|
|
11
|
-
def add_new_metrics(
|
|
12
|
-
api_name: str,
|
|
20
|
+
async def add_new_metrics(
|
|
21
|
+
api_name: str,
|
|
22
|
+
url: str,
|
|
23
|
+
http_session: aiohttp.ClientSession,
|
|
24
|
+
received_at: datetime.datetime = None,
|
|
25
|
+
compressed_log_content: io.BytesIO = None,
|
|
13
26
|
) -> int:
|
|
14
27
|
"""Add a new database entry for a received request.
|
|
15
28
|
|
|
16
29
|
This will store the time when this function is called,
|
|
17
30
|
the endpoint from where the request was received,
|
|
18
|
-
and the log for which analysis is requested.
|
|
31
|
+
and the log (in a zip format) for which analysis is requested.
|
|
19
32
|
"""
|
|
33
|
+
remote_log = RemoteLog(url, http_session)
|
|
34
|
+
compressed_log_content = compressed_log_content or await remote_log.zip_content()
|
|
20
35
|
return AnalyzeRequestMetrics.create(
|
|
21
36
|
endpoint=EndpointType(api_name),
|
|
22
|
-
|
|
37
|
+
compressed_log=compressed_log_content,
|
|
23
38
|
request_received_at=received_at
|
|
24
39
|
if received_at
|
|
25
40
|
else datetime.datetime.now(datetime.timezone.utc),
|
|
@@ -37,6 +52,15 @@ def update_metrics(
|
|
|
37
52
|
This will add to the database entry the time when the response was sent,
|
|
38
53
|
the length of the created response and the certainty for it.
|
|
39
54
|
"""
|
|
55
|
+
try:
|
|
56
|
+
compressed_response = LLMResponseCompressor(response).zip_response()
|
|
57
|
+
except AttributeError as e:
|
|
58
|
+
compressed_response = None
|
|
59
|
+
LOG.warning(
|
|
60
|
+
"Given response can not be serialized "
|
|
61
|
+
"and saved in db (probably a StreamingResponse): %s.", e
|
|
62
|
+
)
|
|
63
|
+
|
|
40
64
|
response_sent_at = (
|
|
41
65
|
sent_at if sent_at else datetime.datetime.now(datetime.timezone.utc)
|
|
42
66
|
)
|
|
@@ -49,11 +73,15 @@ def update_metrics(
|
|
|
49
73
|
response.response_certainty if hasattr(response, "response_certainty") else None
|
|
50
74
|
)
|
|
51
75
|
AnalyzeRequestMetrics.update(
|
|
52
|
-
metrics_id,
|
|
76
|
+
metrics_id,
|
|
77
|
+
response_sent_at,
|
|
78
|
+
response_length,
|
|
79
|
+
response_certainty,
|
|
80
|
+
compressed_response,
|
|
53
81
|
)
|
|
54
82
|
|
|
55
83
|
|
|
56
|
-
def track_request():
|
|
84
|
+
def track_request(name=None):
|
|
57
85
|
"""
|
|
58
86
|
Decorator to track requests metrics
|
|
59
87
|
"""
|
|
@@ -61,20 +89,16 @@ def track_request():
|
|
|
61
89
|
def decorator(f):
|
|
62
90
|
@wraps(f)
|
|
63
91
|
async def async_decorated_function(*args, **kwargs):
|
|
64
|
-
|
|
92
|
+
log_url = kwargs["build_log"].url
|
|
93
|
+
metrics_id = await add_new_metrics(
|
|
94
|
+
name if name else f.__name__, log_url, kwargs["http_session"]
|
|
95
|
+
)
|
|
65
96
|
response = await f(*args, **kwargs)
|
|
66
97
|
update_metrics(metrics_id, response)
|
|
67
98
|
return response
|
|
68
99
|
|
|
69
|
-
@wraps(f)
|
|
70
|
-
def sync_decorated_function(*args, **kwargs):
|
|
71
|
-
metrics_id = add_new_metrics(f.__name__, kwargs["build_log"])
|
|
72
|
-
response = f(*args, **kwargs)
|
|
73
|
-
update_metrics(metrics_id, response)
|
|
74
|
-
return response
|
|
75
|
-
|
|
76
100
|
if inspect.iscoroutinefunction(f):
|
|
77
101
|
return async_decorated_function
|
|
78
|
-
|
|
102
|
+
raise NotImplementedError("An async coroutine is needed")
|
|
79
103
|
|
|
80
104
|
return decorator
|
logdetective/server/models.py
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
from logging import BASIC_FORMAT
|
|
3
3
|
from typing import List, Dict, Optional, Literal
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
from pydantic import (
|
|
5
|
+
BaseModel,
|
|
6
|
+
Field,
|
|
7
|
+
model_validator,
|
|
8
|
+
field_validator,
|
|
9
|
+
NonNegativeFloat,
|
|
10
|
+
HttpUrl,
|
|
11
|
+
)
|
|
7
12
|
from logdetective.constants import DEFAULT_TEMPERATURE
|
|
8
13
|
|
|
9
14
|
|
|
@@ -177,6 +182,8 @@ class GeneralConfig(BaseModel):
|
|
|
177
182
|
"""General config options for Log Detective"""
|
|
178
183
|
|
|
179
184
|
packages: List[str] = None
|
|
185
|
+
devmode: bool = False
|
|
186
|
+
sentry_dsn: HttpUrl | None = None
|
|
180
187
|
|
|
181
188
|
def __init__(self, data: Optional[dict] = None):
|
|
182
189
|
super().__init__()
|
|
@@ -184,6 +191,8 @@ class GeneralConfig(BaseModel):
|
|
|
184
191
|
return
|
|
185
192
|
|
|
186
193
|
self.packages = data.get("packages", [])
|
|
194
|
+
self.devmode = data.get("devmode", False)
|
|
195
|
+
self.sentry_dsn = data.get("sentry_dsn")
|
|
187
196
|
|
|
188
197
|
|
|
189
198
|
class Config(BaseModel):
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Union
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
import aiohttp
|
|
7
|
+
|
|
8
|
+
from logdetective.server.compressors import TextCompressor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
LOG = logging.getLogger("logdetective")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RemoteLog:
|
|
15
|
+
"""
|
|
16
|
+
Handles retrieval and compression of remote log files.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
LOG_FILE_NAME = "log.txt"
|
|
20
|
+
COMPRESSOR = TextCompressor()
|
|
21
|
+
|
|
22
|
+
def __init__(self, url: str, http_session: aiohttp.ClientSession):
|
|
23
|
+
"""
|
|
24
|
+
Initialize with a remote log URL and HTTP session.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
url: A remote URL pointing to a log file
|
|
28
|
+
http_session: The HTTP session used to retrieve the remote file
|
|
29
|
+
"""
|
|
30
|
+
self._url = url
|
|
31
|
+
self._http_session = http_session
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def url(self) -> str:
|
|
35
|
+
"""The remote log url."""
|
|
36
|
+
return self._url
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
async def content(self) -> str:
|
|
40
|
+
"""Content of the url."""
|
|
41
|
+
return await self.get_url_content()
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def zip_text(cls, text: str) -> bytes:
|
|
45
|
+
"""
|
|
46
|
+
Compress the given text.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
bytes: Compressed text
|
|
50
|
+
"""
|
|
51
|
+
return cls.COMPRESSOR.zip({cls.LOG_FILE_NAME: text})
|
|
52
|
+
|
|
53
|
+
async def zip_content(self) -> bytes:
|
|
54
|
+
"""
|
|
55
|
+
Compress the content of the remote log.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
bytes: Compressed log content
|
|
59
|
+
"""
|
|
60
|
+
content_text = await self.content
|
|
61
|
+
return self.zip_text(content_text)
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def unzip(cls, zip_data: Union[bytes, io.BytesIO]) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Uncompress the zipped content of the remote log.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
zip_data: Compressed data as bytes or BytesIO
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
str: The decompressed log content
|
|
73
|
+
"""
|
|
74
|
+
return cls.COMPRESSOR.unzip(zip_data)[cls.LOG_FILE_NAME]
|
|
75
|
+
|
|
76
|
+
def validate_url(self) -> bool:
|
|
77
|
+
"""Validate incoming URL to be at least somewhat sensible for log files
|
|
78
|
+
Only http and https protocols permitted. No result, params or query fields allowed.
|
|
79
|
+
Either netloc or path must have non-zero length.
|
|
80
|
+
"""
|
|
81
|
+
result = urlparse(self.url)
|
|
82
|
+
if result.scheme not in ["http", "https"]:
|
|
83
|
+
return False
|
|
84
|
+
if any([result.params, result.query, result.fragment]):
|
|
85
|
+
return False
|
|
86
|
+
if not (result.path or result.netloc):
|
|
87
|
+
return False
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
async def get_url_content(self) -> str:
|
|
91
|
+
"""validate log url and return log text."""
|
|
92
|
+
if self.validate_url():
|
|
93
|
+
LOG.debug("process url %s", self.url)
|
|
94
|
+
try:
|
|
95
|
+
response = await self._http_session.get(self.url, raise_for_status=True)
|
|
96
|
+
except aiohttp.ClientResponseError as ex:
|
|
97
|
+
raise RuntimeError(f"We couldn't obtain the logs: {ex}") from ex
|
|
98
|
+
return await response.text()
|
|
99
|
+
LOG.error("Invalid URL received ")
|
|
100
|
+
raise RuntimeError(f"Invalid log URL: {self.url}")
|
|
101
|
+
|
|
102
|
+
async def process_url(self) -> str:
|
|
103
|
+
"""Validate log URL and return log text."""
|
|
104
|
+
try:
|
|
105
|
+
return await self.get_url_content()
|
|
106
|
+
except RuntimeError as ex:
|
|
107
|
+
raise aiohttp.HTTPException(
|
|
108
|
+
status_code=400, detail=f"We couldn't obtain the logs: {ex}"
|
|
109
|
+
) from ex
|