corva-worker-python 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- corva_worker_python-2.0.0.dist-info/METADATA +30 -0
- corva_worker_python-2.0.0.dist-info/RECORD +63 -0
- corva_worker_python-2.0.0.dist-info/WHEEL +5 -0
- corva_worker_python-2.0.0.dist-info/top_level.txt +1 -0
- worker/__init__.py +5 -0
- worker/app/__init__.py +291 -0
- worker/app/modules/__init__.py +265 -0
- worker/app/modules/activity_module.py +141 -0
- worker/app/modules/connection_module.py +21 -0
- worker/app/modules/depth_activity_module.py +21 -0
- worker/app/modules/scheduler.py +44 -0
- worker/app/modules/time_activity_module.py +21 -0
- worker/app/modules/trigger.py +43 -0
- worker/constants.py +51 -0
- worker/data/__init__.py +0 -0
- worker/data/activity/__init__.py +132 -0
- worker/data/activity/activity_grouping.py +242 -0
- worker/data/alert.py +89 -0
- worker/data/api.py +155 -0
- worker/data/enums.py +141 -0
- worker/data/json_encoder.py +18 -0
- worker/data/math.py +104 -0
- worker/data/operations.py +477 -0
- worker/data/serialization.py +110 -0
- worker/data/task_handler.py +82 -0
- worker/data/two_way_dict.py +17 -0
- worker/data/unit_conversions.py +5 -0
- worker/data/wits.py +323 -0
- worker/event/__init__.py +53 -0
- worker/event/event_handler.py +90 -0
- worker/event/scheduled.py +64 -0
- worker/event/stream.py +48 -0
- worker/exceptions.py +26 -0
- worker/mixins/__init__.py +0 -0
- worker/mixins/logging.py +119 -0
- worker/mixins/rollbar.py +87 -0
- worker/partial_rerun_merge/__init__.py +0 -0
- worker/partial_rerun_merge/merge.py +500 -0
- worker/partial_rerun_merge/models.py +91 -0
- worker/partial_rerun_merge/progress.py +241 -0
- worker/state/__init__.py +96 -0
- worker/state/mixins.py +111 -0
- worker/state/state.py +46 -0
- worker/test/__init__.py +3 -0
- worker/test/lambda_function_test_run.py +196 -0
- worker/test/local_testing/__init__.py +0 -0
- worker/test/local_testing/to_local_transfer.py +360 -0
- worker/test/utils.py +51 -0
- worker/wellbore/__init__.py +0 -0
- worker/wellbore/factory.py +496 -0
- worker/wellbore/measured_depth_finder.py +12 -0
- worker/wellbore/model/__init__.py +0 -0
- worker/wellbore/model/ann.py +103 -0
- worker/wellbore/model/annulus.py +113 -0
- worker/wellbore/model/drillstring.py +196 -0
- worker/wellbore/model/drillstring_components.py +439 -0
- worker/wellbore/model/element.py +102 -0
- worker/wellbore/model/enums.py +92 -0
- worker/wellbore/model/hole.py +297 -0
- worker/wellbore/model/hole_section.py +51 -0
- worker/wellbore/model/riser.py +22 -0
- worker/wellbore/sections_mixin.py +64 -0
- worker/wellbore/wellbore.py +289 -0
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
import math as pymath
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
from typing import List, Literal, Optional
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
import simplejson as json
|
|
8
|
+
|
|
9
|
+
from worker.data.api import API
|
|
10
|
+
from worker.data.enums import RerunMode
|
|
11
|
+
from worker.partial_rerun_merge.models import MergingSchemaModel
|
|
12
|
+
from worker.partial_rerun_merge.progress import DatasetProgress, MergingProgress
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PartialRerunMerge:
|
|
16
|
+
"""
|
|
17
|
+
Class for performing partial rerun merge operation.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
MAX_TIMESTAMP = 9999999999
|
|
21
|
+
MAX_API_GET_LIMIT = 5_000
|
|
22
|
+
# A default value for the maximum number of records for a heavy collection.
|
|
23
|
+
MAX_RECORDS_COUNT = 10
|
|
24
|
+
# A default value for the maximum number of records to be posted in a batch.
|
|
25
|
+
POST_BATCH_SIZE = int(MAX_API_GET_LIMIT / 5)
|
|
26
|
+
REMAINING_SECONDS_THRESHOLD = 45
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def REMAINING_SECONDS_THRESHOLD_HALF(self) -> int:
|
|
30
|
+
"""
|
|
31
|
+
Gets the half of the remaining seconds threshold.
|
|
32
|
+
:return: the half of the remaining seconds threshold
|
|
33
|
+
"""
|
|
34
|
+
return self.REMAINING_SECONDS_THRESHOLD // 2
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
schema: MergingSchemaModel,
|
|
39
|
+
api: API,
|
|
40
|
+
logger,
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Constructor for PartialRerunMerge
|
|
44
|
+
"""
|
|
45
|
+
self.schema = schema
|
|
46
|
+
|
|
47
|
+
self.api = api
|
|
48
|
+
self.logger = logger
|
|
49
|
+
|
|
50
|
+
remaining_seconds_threshold = os.getenv("PARTIAL_RERUN_REMAINING_SECONDS_THRESHOLD")
|
|
51
|
+
if remaining_seconds_threshold:
|
|
52
|
+
self.REMAINING_SECONDS_THRESHOLD = int(remaining_seconds_threshold)
|
|
53
|
+
|
|
54
|
+
# the following attributes are set in the preprocess method
|
|
55
|
+
self.partial_well_rerun_id: Optional[int] = None
|
|
56
|
+
self.app_id: Optional[int] = None
|
|
57
|
+
self.original_asset_id: Optional[int] = None
|
|
58
|
+
self.rerun_asset_id: Optional[int] = None
|
|
59
|
+
self.start_timestamp: Optional[int] = None
|
|
60
|
+
self.end_timestamp: Optional[int] = None
|
|
61
|
+
self.rerun_mode: Optional[RerunMode] = None
|
|
62
|
+
self.run_until: Optional[int] = None
|
|
63
|
+
|
|
64
|
+
self.start_hole_depth: Optional[float] = None
|
|
65
|
+
self.end_hole_depth: Optional[float] = None
|
|
66
|
+
|
|
67
|
+
self.merging_progress: Optional[MergingProgress] = None
|
|
68
|
+
|
|
69
|
+
def perform_merge(self, event: dict) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Performs a merge operation by updating the cache state,
|
|
72
|
+
merging collections, and updating the status.
|
|
73
|
+
|
|
74
|
+
:param event: the event which should be the data part of
|
|
75
|
+
a an event dict, or a python sdk partial rerun event object
|
|
76
|
+
that is converted to a dict
|
|
77
|
+
|
|
78
|
+
:raises Exception: if an error occurs during the merge operation
|
|
79
|
+
"""
|
|
80
|
+
self.preprocess(event)
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
self.merge_cache_state()
|
|
84
|
+
self.merge_collections()
|
|
85
|
+
except Exception as ex:
|
|
86
|
+
self.logger.error(f"An error occurred during the merge operation: {ex}")
|
|
87
|
+
self.merging_progress.fail_status(str(ex))
|
|
88
|
+
|
|
89
|
+
self.update_status()
|
|
90
|
+
|
|
91
|
+
def preprocess(self, event: dict) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Performs any necessary preprocessing before the merge operation.
|
|
94
|
+
|
|
95
|
+
:param event: the event which should be the data part of
|
|
96
|
+
a an event dict, or a python sdk partial rerun event object
|
|
97
|
+
that is converted to a dict
|
|
98
|
+
"""
|
|
99
|
+
self.partial_well_rerun_id = event["partial_well_rerun_id"]
|
|
100
|
+
self.app_id = event["app_id"]
|
|
101
|
+
self.original_asset_id = event["asset_id"]
|
|
102
|
+
self.rerun_asset_id = event["rerun_asset_id"]
|
|
103
|
+
|
|
104
|
+
segment = event.get("source_type")
|
|
105
|
+
if segment != "drilling":
|
|
106
|
+
raise ValueError(f"Invalid source type: {segment}")
|
|
107
|
+
|
|
108
|
+
self.run_until = event["run_until"]
|
|
109
|
+
|
|
110
|
+
self.start_timestamp = event["start"]
|
|
111
|
+
start_wits = self.get_wits_at_or(self.rerun_asset_id, self.start_timestamp, "after")
|
|
112
|
+
|
|
113
|
+
if end := event.get("end"):
|
|
114
|
+
self.end_timestamp = end
|
|
115
|
+
end_wits = self.get_wits_at_or(self.rerun_asset_id, self.end_timestamp, "before")
|
|
116
|
+
else:
|
|
117
|
+
end_wits = self.get_wits_at_or(self.rerun_asset_id, self.MAX_TIMESTAMP, "before")
|
|
118
|
+
self.end_timestamp = end_wits.get("timestamp")
|
|
119
|
+
|
|
120
|
+
if not self.end_timestamp:
|
|
121
|
+
self.end_timestamp = self.MAX_TIMESTAMP
|
|
122
|
+
|
|
123
|
+
self.start_hole_depth = start_wits.get("data", {}).get("hole_depth")
|
|
124
|
+
self.end_hole_depth = end_wits.get("data", {}).get("hole_depth")
|
|
125
|
+
|
|
126
|
+
self.rerun_mode = RerunMode(event["rerun_mode"])
|
|
127
|
+
|
|
128
|
+
self.merging_progress = MergingProgress(self.partial_well_rerun_id, self.app_id, self.api)
|
|
129
|
+
|
|
130
|
+
def merge_cache_state(self) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Handles the merging of cache state.
|
|
133
|
+
"""
|
|
134
|
+
try:
|
|
135
|
+
if self.merging_progress.is_cache_update_completed:
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
for module in self.schema.modules:
|
|
139
|
+
module.update_cache(merger=self)
|
|
140
|
+
except Exception as ex:
|
|
141
|
+
error_message = f"Failed to update cache state: {ex}"
|
|
142
|
+
raise Exception(error_message)
|
|
143
|
+
|
|
144
|
+
def merge_collections(self) -> None:
|
|
145
|
+
"""
|
|
146
|
+
Handles the merging of collections. Update this method as per your requirements.
|
|
147
|
+
"""
|
|
148
|
+
is_completed = True
|
|
149
|
+
|
|
150
|
+
for collection in self.schema.collections:
|
|
151
|
+
collection_name = collection.collection_name
|
|
152
|
+
self.logger.debug(f"Started merging collection: {collection_name}")
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
if not self.has_time_to_continue_merging():
|
|
156
|
+
self.logger.debug("Not enough time to continue merging. Stopping.")
|
|
157
|
+
is_completed = False
|
|
158
|
+
break
|
|
159
|
+
|
|
160
|
+
if self.merging_progress.is_collection_completed(collection_name):
|
|
161
|
+
self.logger.debug(f"Collection {collection_name} is already completed or failed. Skipping.")
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
if collection.merging_method:
|
|
165
|
+
col_is_completed = getattr(self, collection.merging_method)(collection_name)
|
|
166
|
+
|
|
167
|
+
else:
|
|
168
|
+
col_is_completed = self.default_merging_method(collection_name)
|
|
169
|
+
|
|
170
|
+
if not col_is_completed:
|
|
171
|
+
is_completed = False
|
|
172
|
+
|
|
173
|
+
except Exception as ex:
|
|
174
|
+
error_message = f"Failed to merge collection '{collection_name}': {ex}"
|
|
175
|
+
raise Exception(error_message)
|
|
176
|
+
|
|
177
|
+
if is_completed:
|
|
178
|
+
self.merging_progress.complete_status()
|
|
179
|
+
|
|
180
|
+
def default_merging_method(
|
|
181
|
+
self,
|
|
182
|
+
collection_name: str,
|
|
183
|
+
downsample_count: Optional[int] = None,
|
|
184
|
+
downsample_ratio: Optional[float] = None,
|
|
185
|
+
skip_progress: Optional[bool] = False,
|
|
186
|
+
) -> bool:
|
|
187
|
+
"""
|
|
188
|
+
Handles the merging of collections in current mode.
|
|
189
|
+
It copies the data from the rerun asset to the original asset, with
|
|
190
|
+
the 'timestamp' being in the range of the start and end timestamps.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
collection_name (str): the collection name
|
|
194
|
+
downsample_count (Optional[int]): the downsample count. Defaults to None.
|
|
195
|
+
downsample_ratio (Optional[float]): the downsample ratio. Defaults to None.
|
|
196
|
+
skip_progress (Optional[bool]): whether to skip the progress. Defaults to False.
|
|
197
|
+
"""
|
|
198
|
+
dataset_progress = self.merging_progress.get_dataset_progress(collection_name)
|
|
199
|
+
if not dataset_progress:
|
|
200
|
+
if not skip_progress:
|
|
201
|
+
return True
|
|
202
|
+
dataset_progress = DatasetProgress(0, -1, False, collection_name)
|
|
203
|
+
|
|
204
|
+
start_time = self.start_timestamp
|
|
205
|
+
if dataset_progress.is_started():
|
|
206
|
+
start_time = dataset_progress.processed_timestamp + 1
|
|
207
|
+
|
|
208
|
+
is_completed = False
|
|
209
|
+
|
|
210
|
+
# delete all the data of the original asset within the time range
|
|
211
|
+
is_delete_finished = self._delete_data(collection_name, self.original_asset_id, start_time, self.end_timestamp)
|
|
212
|
+
if not is_delete_finished:
|
|
213
|
+
return is_completed
|
|
214
|
+
|
|
215
|
+
while start_time <= self.end_timestamp:
|
|
216
|
+
if not self.has_time_to_continue_merging():
|
|
217
|
+
self.logger.warn("Not enough time to continue merging. Stopping.")
|
|
218
|
+
break
|
|
219
|
+
|
|
220
|
+
updated_data = self._get_data(collection_name, self.rerun_asset_id, start_time, self.end_timestamp, "once")
|
|
221
|
+
original_records_count = len(updated_data)
|
|
222
|
+
if not updated_data:
|
|
223
|
+
dataset_progress.mark_completed_at(self.end_timestamp)
|
|
224
|
+
is_completed = True
|
|
225
|
+
break
|
|
226
|
+
|
|
227
|
+
last_timestamp = updated_data[-1]["timestamp"]
|
|
228
|
+
|
|
229
|
+
if downsample_count is not None or downsample_ratio is not None:
|
|
230
|
+
updated_data = choose_items(updated_data, downsample_count, downsample_ratio)
|
|
231
|
+
|
|
232
|
+
self._move_records(collection_name, updated_data)
|
|
233
|
+
|
|
234
|
+
dataset_progress.processed_timestamp = last_timestamp
|
|
235
|
+
|
|
236
|
+
if original_records_count < self.MAX_API_GET_LIMIT:
|
|
237
|
+
dataset_progress.mark_completed_at(self.end_timestamp)
|
|
238
|
+
is_completed = True
|
|
239
|
+
break
|
|
240
|
+
|
|
241
|
+
start_time = last_timestamp + 1
|
|
242
|
+
|
|
243
|
+
return is_completed
|
|
244
|
+
|
|
245
|
+
def _move_records(self, collection_name: str, records: List[dict]) -> None:
|
|
246
|
+
"""
|
|
247
|
+
Moves records from a rerun asset to the original asset within a specified time range.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
collection_name (str): The name of the collection to move records from.
|
|
251
|
+
records (List[dict]): The list of records to move.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
None
|
|
255
|
+
"""
|
|
256
|
+
# changing the asset_id of the records to the original asset_id
|
|
257
|
+
# and dropping _id key from the records
|
|
258
|
+
for record in records:
|
|
259
|
+
record["asset_id"] = self.original_asset_id
|
|
260
|
+
|
|
261
|
+
# since we insert the data from the rerun asset into the original
|
|
262
|
+
# asset we need to drop the '_id' field to avoid data transfer and
|
|
263
|
+
# instead create new records
|
|
264
|
+
record.pop("_id", None)
|
|
265
|
+
|
|
266
|
+
# post the records to the original asset
|
|
267
|
+
self._post_data(collection_name, records)
|
|
268
|
+
self.logger.debug(f" --> {collection_name}, copied {len(records)} records")
|
|
269
|
+
|
|
270
|
+
def update_status(self) -> requests.Response:
|
|
271
|
+
"""
|
|
272
|
+
Handles the updating of status. Update this method as per your requirements.
|
|
273
|
+
"""
|
|
274
|
+
return self.merging_progress.update_status()
|
|
275
|
+
|
|
276
|
+
def has_time_to_continue_merging(self, apply_half: Optional[bool] = False) -> bool:
|
|
277
|
+
"""
|
|
278
|
+
Checks if there is enough time to continue merging.
|
|
279
|
+
:param apply_half: whether to apply half of the remaining seconds threshold
|
|
280
|
+
:return: True if there is enough time, False otherwise
|
|
281
|
+
"""
|
|
282
|
+
if not self.run_until:
|
|
283
|
+
return True
|
|
284
|
+
|
|
285
|
+
remaining_seconds = self._get_remaining_seconds()
|
|
286
|
+
threshold = self.REMAINING_SECONDS_THRESHOLD_HALF if apply_half else self.REMAINING_SECONDS_THRESHOLD
|
|
287
|
+
return remaining_seconds > threshold
|
|
288
|
+
|
|
289
|
+
def _get_remaining_seconds(self) -> float:
|
|
290
|
+
"""
|
|
291
|
+
Gets the remaining seconds before the Lambda function times out.
|
|
292
|
+
:return: the remaining seconds
|
|
293
|
+
"""
|
|
294
|
+
return self.run_until - time.time()
|
|
295
|
+
|
|
296
|
+
def get_wits_at_or(self, asset_id: int, timestamp: Optional[int], direction: Literal["before", "after"]) -> dict:
|
|
297
|
+
"""
|
|
298
|
+
Get a record of the wits collection at or before/after the given timestamp
|
|
299
|
+
|
|
300
|
+
:param asset_id: ID of the asset
|
|
301
|
+
:param timestamp: start or end timestamp or None
|
|
302
|
+
:param direction: "before" or "after"
|
|
303
|
+
:return: A dictionary containing the record information
|
|
304
|
+
:raises ValueError: if the provided direction is not "before" or "after"
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
collection_name = "wits"
|
|
308
|
+
query = sort = None
|
|
309
|
+
|
|
310
|
+
if direction == "before":
|
|
311
|
+
query = "{timestamp#lte#%s}" % timestamp
|
|
312
|
+
sort = "{timestamp:-1}"
|
|
313
|
+
elif direction == "after":
|
|
314
|
+
query = "{timestamp#gte#%s}" % timestamp
|
|
315
|
+
sort = "{timestamp:1}"
|
|
316
|
+
else:
|
|
317
|
+
raise ValueError(f"Invalid direction: {direction}")
|
|
318
|
+
|
|
319
|
+
res = self.api.get(
|
|
320
|
+
path="/v1/data/corva/",
|
|
321
|
+
collection=collection_name,
|
|
322
|
+
asset_id=asset_id,
|
|
323
|
+
query=query,
|
|
324
|
+
sort=sort,
|
|
325
|
+
limit=1,
|
|
326
|
+
).data
|
|
327
|
+
|
|
328
|
+
if not res:
|
|
329
|
+
return {}
|
|
330
|
+
|
|
331
|
+
return res[0]
|
|
332
|
+
|
|
333
|
+
def _get_data(
|
|
334
|
+
self,
|
|
335
|
+
collection_name: str,
|
|
336
|
+
asset_id: int,
|
|
337
|
+
start_timestamp: Optional[int] = None,
|
|
338
|
+
end_timestamp: Optional[int] = None,
|
|
339
|
+
get_mode: Literal["once", "all"] = "all",
|
|
340
|
+
) -> List[dict]:
|
|
341
|
+
"""
|
|
342
|
+
Gets the data from the given collection.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
collection_name (str): collection name
|
|
346
|
+
asset_id (int): asset ID
|
|
347
|
+
start_timestamp (Optional[int]): start timestamp
|
|
348
|
+
end_timestamp (Optional[int]): end timestamp
|
|
349
|
+
get_mode (Literal["once", "all"]): get mode
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
List[dict]: list of data
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
sort = "{timestamp:1}"
|
|
356
|
+
|
|
357
|
+
records = []
|
|
358
|
+
|
|
359
|
+
start_query = "{timestamp#gte#%s}" % (start_timestamp or 0)
|
|
360
|
+
end_query = "{timestamp#lte#%s}" % (end_timestamp or self.MAX_TIMESTAMP)
|
|
361
|
+
|
|
362
|
+
while True:
|
|
363
|
+
query = "%sAND%s" % (start_query, end_query)
|
|
364
|
+
|
|
365
|
+
res = self.api.get(
|
|
366
|
+
path="/v1/data/corva/",
|
|
367
|
+
collection=collection_name,
|
|
368
|
+
asset_id=asset_id,
|
|
369
|
+
query=query,
|
|
370
|
+
sort=sort,
|
|
371
|
+
limit=self.MAX_API_GET_LIMIT,
|
|
372
|
+
).data
|
|
373
|
+
|
|
374
|
+
if not res:
|
|
375
|
+
break
|
|
376
|
+
|
|
377
|
+
records.extend(res)
|
|
378
|
+
|
|
379
|
+
last_timestamp = res[-1]["timestamp"]
|
|
380
|
+
|
|
381
|
+
if get_mode == "once" or len(res) < self.MAX_API_GET_LIMIT or last_timestamp >= end_timestamp:
|
|
382
|
+
break
|
|
383
|
+
|
|
384
|
+
start_query = "{timestamp#gte#%s}" % (last_timestamp + 1)
|
|
385
|
+
|
|
386
|
+
return records
|
|
387
|
+
|
|
388
|
+
def _post_data(self, collection_name: str, records: List[dict]):
|
|
389
|
+
"""
|
|
390
|
+
Posts the given data to the given collection.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
collection_name (str): collection name
|
|
394
|
+
records (List[dict]): list of records
|
|
395
|
+
"""
|
|
396
|
+
for i in range(0, len(records), self.POST_BATCH_SIZE):
|
|
397
|
+
data = json.dumps(records[i : i + self.POST_BATCH_SIZE])
|
|
398
|
+
self.api.post(
|
|
399
|
+
path=f"/v1/data/corva/{collection_name}",
|
|
400
|
+
data=data,
|
|
401
|
+
)
|
|
402
|
+
self.sleep()
|
|
403
|
+
|
|
404
|
+
def _delete_data(
|
|
405
|
+
self, collection_name: str, asset_id: int, start_timestamp: int, end_timestamp: Optional[int] = None
|
|
406
|
+
) -> bool:
|
|
407
|
+
"""
|
|
408
|
+
Deletes data from a specified collection within a given time range.
|
|
409
|
+
|
|
410
|
+
Parameters
|
|
411
|
+
----------
|
|
412
|
+
collection_name : str
|
|
413
|
+
The name of the collection to delete data from.
|
|
414
|
+
asset_id : int
|
|
415
|
+
The ID of the asset to delete data for.
|
|
416
|
+
start_timestamp : int
|
|
417
|
+
The start timestamp of the time range to delete data for.
|
|
418
|
+
end_timestamp : Optional[int], optional
|
|
419
|
+
The end timestamp of the time range to delete data for. If not provided, the maximum timestamp is used.
|
|
420
|
+
|
|
421
|
+
Returns
|
|
422
|
+
-------
|
|
423
|
+
A boolean indicating whether all data was deleted.
|
|
424
|
+
|
|
425
|
+
"""
|
|
426
|
+
end_timestamp = end_timestamp or self.MAX_TIMESTAMP
|
|
427
|
+
|
|
428
|
+
query = "{asset_id#eq#%s}AND{timestamp#gte#%s}AND{timestamp#lte#%s}" % (
|
|
429
|
+
asset_id,
|
|
430
|
+
start_timestamp,
|
|
431
|
+
end_timestamp,
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
while True:
|
|
435
|
+
if not self.has_time_to_continue_merging(apply_half=True):
|
|
436
|
+
self.logger.debug("Not enough time to continue deleting. Stopping.")
|
|
437
|
+
return False
|
|
438
|
+
|
|
439
|
+
res = self.api.delete(
|
|
440
|
+
path=f"/v1/data/corva/{collection_name}",
|
|
441
|
+
query=query,
|
|
442
|
+
limit=self.MAX_API_GET_LIMIT,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
deleted_count = res.data.get("deleted_count", 0)
|
|
446
|
+
if deleted_count < self.MAX_API_GET_LIMIT:
|
|
447
|
+
break
|
|
448
|
+
|
|
449
|
+
self.sleep()
|
|
450
|
+
|
|
451
|
+
return True
|
|
452
|
+
|
|
453
|
+
def sleep(self, seconds: Optional[int] = 1) -> None:
|
|
454
|
+
"""
|
|
455
|
+
Sleeps for a few seconds after each API call.
|
|
456
|
+
"""
|
|
457
|
+
time.sleep(seconds)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def choose_items(
|
|
461
|
+
records: List,
|
|
462
|
+
max_records_count: Optional[int] = None,
|
|
463
|
+
max_records_ratio: Optional[float] = None,
|
|
464
|
+
) -> List:
|
|
465
|
+
"""
|
|
466
|
+
Choose a subset of records from a list of records; the first and last
|
|
467
|
+
records an inclusive.
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
records (List): A list of records.
|
|
471
|
+
max_records_count (Optional[int]): The maximum number of records
|
|
472
|
+
to choose. If None, all records are chosen.
|
|
473
|
+
max_records_ratio (Optional[float]): The maximum ratio of records
|
|
474
|
+
to choose. If None, all records are chosen.
|
|
475
|
+
|
|
476
|
+
Returns:
|
|
477
|
+
List: A list of chosen records.
|
|
478
|
+
"""
|
|
479
|
+
if max_records_count is None and max_records_ratio is None:
|
|
480
|
+
return records
|
|
481
|
+
|
|
482
|
+
# only one of max_records_count and max_records_ratio can be provided
|
|
483
|
+
if max_records_count is not None and max_records_ratio is not None:
|
|
484
|
+
raise ValueError("Only one of max_records_count and max_records_ratio can be provided.")
|
|
485
|
+
|
|
486
|
+
if max_records_ratio is not None:
|
|
487
|
+
max_records_count = int(len(records) * max_records_ratio)
|
|
488
|
+
|
|
489
|
+
if max_records_count <= 0 or len(records) <= max_records_count or len(records) < 3:
|
|
490
|
+
return records
|
|
491
|
+
|
|
492
|
+
step = pymath.ceil((len(records) - 2) / max_records_count)
|
|
493
|
+
|
|
494
|
+
chosen_records = [
|
|
495
|
+
records[0],
|
|
496
|
+
*(records[tracker] for tracker in range(1, len(records) - 1, step)),
|
|
497
|
+
records[-1],
|
|
498
|
+
]
|
|
499
|
+
|
|
500
|
+
return chosen_records
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This file contains the models for the configurations for
|
|
3
|
+
the collections and cache updates.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from worker.data.enums import CollectionRecordDataScope, CountOfCollectionRecord
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class CollectionMergingModel:
|
|
14
|
+
"""
|
|
15
|
+
A model representing a collection of records.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
collection_name (str):
|
|
19
|
+
The name of the collection.
|
|
20
|
+
|
|
21
|
+
record_scope (Optional[CollectionRecordDataScope], optional):
|
|
22
|
+
The scope of the records to be included in the collection.
|
|
23
|
+
Defaults to CollectionRecordDataScope.CURRENT.
|
|
24
|
+
|
|
25
|
+
count (Optional[CountOfCollectionRecord], optional):
|
|
26
|
+
The count of records to be included in the collection.
|
|
27
|
+
Defaults to None.
|
|
28
|
+
|
|
29
|
+
merging_method (Optional[str], optional):
|
|
30
|
+
The name of the method used to merge the records in the collection.
|
|
31
|
+
Defaults to None.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
collection_name: str
|
|
35
|
+
record_scope: Optional[CollectionRecordDataScope] = CollectionRecordDataScope.CURRENT
|
|
36
|
+
count: Optional[CountOfCollectionRecord] = None
|
|
37
|
+
merging_method: Optional[str] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class MergingSchemaModel:
|
|
42
|
+
"""
|
|
43
|
+
A class representing the schema for merging collections and modules.
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
collections (List[CollectionMergingModel]): A list of
|
|
47
|
+
CollectionMergingModel objects representing the collections
|
|
48
|
+
to be merged.
|
|
49
|
+
modules (List): A list of module objects representing the modules
|
|
50
|
+
that their cache needs to be updated.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
collections: List[CollectionMergingModel] = field(default_factory=list)
|
|
54
|
+
modules: Optional[List] = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class RerunMergeCacheUpdater:
|
|
58
|
+
"""
|
|
59
|
+
A generic model representing a cache update for a module.
|
|
60
|
+
A module just need to override the `update_cache` method.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def update_cache(cls, merger: "PartialRerunMerge") -> None:
|
|
65
|
+
"""
|
|
66
|
+
Updates the cache with the given rerun mode, original asset ID, and rerun asset ID.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
merger: The merger object.
|
|
70
|
+
"""
|
|
71
|
+
raise NotImplementedError
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def default_updater(cls, original_asset_cache: dict, rerun_asset_cache: dict) -> dict:
|
|
75
|
+
"""
|
|
76
|
+
The default cache updater.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
original_asset_cache: The original asset cache.
|
|
80
|
+
rerun_asset_cache: The rerun asset cache.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
The updated cache.
|
|
84
|
+
"""
|
|
85
|
+
# Remove the asset ID and company ID from the rerun asset cache.
|
|
86
|
+
rerun_asset_cache.pop("asset_id", None)
|
|
87
|
+
rerun_asset_cache.pop("company_id", None)
|
|
88
|
+
|
|
89
|
+
original_asset_cache.update(rerun_asset_cache)
|
|
90
|
+
|
|
91
|
+
return original_asset_cache
|