corva-worker-python 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- corva_worker_python-2.0.0.dist-info/METADATA +30 -0
- corva_worker_python-2.0.0.dist-info/RECORD +63 -0
- corva_worker_python-2.0.0.dist-info/WHEEL +5 -0
- corva_worker_python-2.0.0.dist-info/top_level.txt +1 -0
- worker/__init__.py +5 -0
- worker/app/__init__.py +291 -0
- worker/app/modules/__init__.py +265 -0
- worker/app/modules/activity_module.py +141 -0
- worker/app/modules/connection_module.py +21 -0
- worker/app/modules/depth_activity_module.py +21 -0
- worker/app/modules/scheduler.py +44 -0
- worker/app/modules/time_activity_module.py +21 -0
- worker/app/modules/trigger.py +43 -0
- worker/constants.py +51 -0
- worker/data/__init__.py +0 -0
- worker/data/activity/__init__.py +132 -0
- worker/data/activity/activity_grouping.py +242 -0
- worker/data/alert.py +89 -0
- worker/data/api.py +155 -0
- worker/data/enums.py +141 -0
- worker/data/json_encoder.py +18 -0
- worker/data/math.py +104 -0
- worker/data/operations.py +477 -0
- worker/data/serialization.py +110 -0
- worker/data/task_handler.py +82 -0
- worker/data/two_way_dict.py +17 -0
- worker/data/unit_conversions.py +5 -0
- worker/data/wits.py +323 -0
- worker/event/__init__.py +53 -0
- worker/event/event_handler.py +90 -0
- worker/event/scheduled.py +64 -0
- worker/event/stream.py +48 -0
- worker/exceptions.py +26 -0
- worker/mixins/__init__.py +0 -0
- worker/mixins/logging.py +119 -0
- worker/mixins/rollbar.py +87 -0
- worker/partial_rerun_merge/__init__.py +0 -0
- worker/partial_rerun_merge/merge.py +500 -0
- worker/partial_rerun_merge/models.py +91 -0
- worker/partial_rerun_merge/progress.py +241 -0
- worker/state/__init__.py +96 -0
- worker/state/mixins.py +111 -0
- worker/state/state.py +46 -0
- worker/test/__init__.py +3 -0
- worker/test/lambda_function_test_run.py +196 -0
- worker/test/local_testing/__init__.py +0 -0
- worker/test/local_testing/to_local_transfer.py +360 -0
- worker/test/utils.py +51 -0
- worker/wellbore/__init__.py +0 -0
- worker/wellbore/factory.py +496 -0
- worker/wellbore/measured_depth_finder.py +12 -0
- worker/wellbore/model/__init__.py +0 -0
- worker/wellbore/model/ann.py +103 -0
- worker/wellbore/model/annulus.py +113 -0
- worker/wellbore/model/drillstring.py +196 -0
- worker/wellbore/model/drillstring_components.py +439 -0
- worker/wellbore/model/element.py +102 -0
- worker/wellbore/model/enums.py +92 -0
- worker/wellbore/model/hole.py +297 -0
- worker/wellbore/model/hole_section.py +51 -0
- worker/wellbore/model/riser.py +22 -0
- worker/wellbore/sections_mixin.py +64 -0
- worker/wellbore/wellbore.py +289 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module defines the progress of the partial reruns.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import asdict, dataclass
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
import simplejson as json
|
|
10
|
+
|
|
11
|
+
from worker.data.api import API
|
|
12
|
+
from worker.data.enums import PartialRerunStatus
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class DatasetProgress:
|
|
17
|
+
"""
|
|
18
|
+
Represents the progress of a dataset.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
dataset_id (int): The ID of the dataset.
|
|
22
|
+
processed_timestamp (int): The timestamp when the dataset was processed.
|
|
23
|
+
completed (bool): Whether the dataset processing is completed or not.
|
|
24
|
+
dataset_name (str): The name of the dataset.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
dataset_id: int
|
|
28
|
+
processed_timestamp: int
|
|
29
|
+
completed: bool
|
|
30
|
+
dataset_name: str
|
|
31
|
+
|
|
32
|
+
def is_completed(self) -> bool:
|
|
33
|
+
"""
|
|
34
|
+
Returns a boolean indicating whether the progress has been completed.
|
|
35
|
+
"""
|
|
36
|
+
return self.completed
|
|
37
|
+
|
|
38
|
+
def is_started(self) -> bool:
|
|
39
|
+
"""
|
|
40
|
+
Returns a boolean indicating whether the progress has been started.
|
|
41
|
+
"""
|
|
42
|
+
return self.processed_timestamp > 0
|
|
43
|
+
|
|
44
|
+
def mark_completed_at(self, processed_timestamp: int) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Marks the progress as completed and sets the processed timestamp.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
processed_timestamp (int): The timestamp when the progress was processed.
|
|
50
|
+
"""
|
|
51
|
+
self.completed = True
|
|
52
|
+
self.processed_timestamp = processed_timestamp
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def set_from_dict(cls, json_object: dict) -> "DatasetProgress":
|
|
56
|
+
"""Set the object from a dict.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
json_object (dict): the input json
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
_type_: DatasetProgress
|
|
63
|
+
"""
|
|
64
|
+
return cls(**json_object)
|
|
65
|
+
|
|
66
|
+
def to_dict(self) -> dict:
|
|
67
|
+
"""
|
|
68
|
+
Returns a dictionary representation of the Progress object.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
dict: A dictionary containing the dataset_id, processed_timestamp,
|
|
72
|
+
completed, and dataset_name attributes.
|
|
73
|
+
"""
|
|
74
|
+
return asdict(self)
|
|
75
|
+
|
|
76
|
+
def get_trimmed_dataset_name(self) -> str:
|
|
77
|
+
"""
|
|
78
|
+
Returns the trimmed dataset name by splitting the dataset name
|
|
79
|
+
with "#" delimiter and returning the last part.
|
|
80
|
+
|
|
81
|
+
Example: "corva#circulation.volumetric" -> "circulation.volumetric"
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
str: The trimmed dataset name.
|
|
85
|
+
"""
|
|
86
|
+
return self.dataset_name.split("#")[-1]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class MergingProgress:
|
|
90
|
+
"""
|
|
91
|
+
Represents the progress of merging a partial rerun for a specific app.
|
|
92
|
+
|
|
93
|
+
Attributes:
|
|
94
|
+
partial_well_rerun_id (int): The ID of the partial rerun.
|
|
95
|
+
app_id (int): The ID of the app.
|
|
96
|
+
status (PartialRerunStatus): The status of the partial rerun.
|
|
97
|
+
dataset_progresses (List[DatasetProgress]): The progress of each dataset
|
|
98
|
+
in the partial rerun.
|
|
99
|
+
api: (Worker) The API object used to make requests to the Corva API.
|
|
100
|
+
is_cache_update_completed (bool): Whether the cache update is completed.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self, partial_well_rerun_id: int, app_id: int, api: API):
|
|
104
|
+
"""
|
|
105
|
+
Initializes a new instance of the Progress class.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
partial_rerun_id (int): The ID of the partial rerun.
|
|
109
|
+
app_id (int): The ID of the application.
|
|
110
|
+
api: The API object.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
None
|
|
114
|
+
"""
|
|
115
|
+
self.partial_well_rerun_id = partial_well_rerun_id
|
|
116
|
+
self.app_id = app_id
|
|
117
|
+
self.status = PartialRerunStatus.MERGING
|
|
118
|
+
self.dataset_progresses: List[DatasetProgress] = []
|
|
119
|
+
|
|
120
|
+
self.api = api
|
|
121
|
+
|
|
122
|
+
# getting and parsing the progress
|
|
123
|
+
request = self.api.get(self.get_url_path()).data
|
|
124
|
+
self.parse_progress(request)
|
|
125
|
+
|
|
126
|
+
self.is_cache_update_completed = self.determine_if_cache_update_completed()
|
|
127
|
+
|
|
128
|
+
# in case of failure, this will be set to the reason of failure
|
|
129
|
+
self.fail_reason: Optional[str] = None
|
|
130
|
+
|
|
131
|
+
def parse_progress(self, request: dict) -> None:
|
|
132
|
+
"""
|
|
133
|
+
Parses the progress information retrieved from the API and populates the
|
|
134
|
+
'dataset_progresses' list.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
request (dict): The progress information retrieved from the API.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
None
|
|
141
|
+
"""
|
|
142
|
+
progresses = request.get("included") or []
|
|
143
|
+
|
|
144
|
+
for progress in progresses:
|
|
145
|
+
if progress.get("type") != "partial_well_rerun_dataset_progress":
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
attributes = progress.get("attributes") or {}
|
|
149
|
+
|
|
150
|
+
dataset_progress = DatasetProgress.set_from_dict(attributes)
|
|
151
|
+
self.dataset_progresses.append(dataset_progress)
|
|
152
|
+
|
|
153
|
+
def complete_status(self) -> None:
|
|
154
|
+
"""
|
|
155
|
+
Marks the partial rerun merge as completed.
|
|
156
|
+
"""
|
|
157
|
+
self.status = PartialRerunStatus.COMPLETED
|
|
158
|
+
|
|
159
|
+
def fail_status(self, reason: str) -> None:
|
|
160
|
+
"""
|
|
161
|
+
Sets the status of the partial rerun to FAILED and provides a reason for the failure.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
reason (str): The reason for failure.
|
|
165
|
+
"""
|
|
166
|
+
self.status = PartialRerunStatus.FAILED
|
|
167
|
+
self.fail_reason = reason
|
|
168
|
+
|
|
169
|
+
def get_dataset_progress(self, dataset_name: str) -> Optional[DatasetProgress]:
|
|
170
|
+
"""
|
|
171
|
+
Gets the progress of the dataset with the given name.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
dataset_name (str): The name of the dataset.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
DatasetProgress: The progress of the dataset.
|
|
178
|
+
"""
|
|
179
|
+
return next((dp for dp in self.dataset_progresses if dp.get_trimmed_dataset_name() == dataset_name), None)
|
|
180
|
+
|
|
181
|
+
def is_collection_completed(self, collection_name: str) -> bool:
|
|
182
|
+
"""
|
|
183
|
+
Checks if a collection with the given name has been completed.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
collection_name (str): The name of the collection to check.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
bool: True if the collection has been completed, False otherwise.
|
|
190
|
+
"""
|
|
191
|
+
for dataset_progress in self.dataset_progresses:
|
|
192
|
+
if dataset_progress.get_trimmed_dataset_name() == collection_name:
|
|
193
|
+
return dataset_progress.completed
|
|
194
|
+
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
def determine_if_cache_update_completed(self) -> bool:
|
|
198
|
+
"""
|
|
199
|
+
Checks if the cache update is completed.
|
|
200
|
+
Since there is no collection named "cache", it checks if ANY collection is completed,
|
|
201
|
+
and since cache update needs to be done once, therefore we need to know when the merge
|
|
202
|
+
will continue in the next iteration, we need to check if the cache update is already completed.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
bool: True if the cache update has completed for all datasets, False otherwise.
|
|
206
|
+
"""
|
|
207
|
+
return any(dataset_progress.is_started() for dataset_progress in self.dataset_progresses)
|
|
208
|
+
|
|
209
|
+
def get_url_path(self) -> str:
|
|
210
|
+
"""
|
|
211
|
+
Returns the URL path for the API request.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
str: URL Path for the API request.
|
|
215
|
+
"""
|
|
216
|
+
return f"/v2/partial_reruns/{self.partial_well_rerun_id}/app_progress/{self.app_id}"
|
|
217
|
+
|
|
218
|
+
def to_dict(self) -> dict:
|
|
219
|
+
"""
|
|
220
|
+
Converts the merging progress to a dict.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
dict: progress as a dict.
|
|
224
|
+
"""
|
|
225
|
+
dataset_progresses = [dataset_progress.to_dict() for dataset_progress in self.dataset_progresses]
|
|
226
|
+
|
|
227
|
+
output = dict(app_progress=dict(status=self.status.value, dataset_progresses=dataset_progresses))
|
|
228
|
+
|
|
229
|
+
if self.fail_reason:
|
|
230
|
+
output["app_progress"]["fail_reason"] = self.fail_reason
|
|
231
|
+
|
|
232
|
+
return output
|
|
233
|
+
|
|
234
|
+
def update_status(self) -> requests.Response:
|
|
235
|
+
"""
|
|
236
|
+
Sends the updated merging progress to the API.
|
|
237
|
+
"""
|
|
238
|
+
url_path = self.get_url_path()
|
|
239
|
+
body = json.dumps(self.to_dict())
|
|
240
|
+
response = self.api.patch(url_path, data=body)
|
|
241
|
+
return response.response
|
worker/state/__init__.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Union
|
|
3
|
+
|
|
4
|
+
import redis
|
|
5
|
+
import simplejson as json
|
|
6
|
+
|
|
7
|
+
from worker.data.json_encoder import JsonEncoder
|
|
8
|
+
from worker.mixins.logging import Logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RedisHandler:
|
|
12
|
+
redis_ttl = os.getenv("REDIS_TTL", 90 * 24 * 3600) # 90 days
|
|
13
|
+
_redis_connection = None
|
|
14
|
+
|
|
15
|
+
@classmethod
|
|
16
|
+
def _get_redis_connection(cls) -> redis.client.Redis:
|
|
17
|
+
"""
|
|
18
|
+
Setup redis and get connection if one does not exist already
|
|
19
|
+
:return:
|
|
20
|
+
"""
|
|
21
|
+
# Use previous connection if cached
|
|
22
|
+
if cls._redis_connection:
|
|
23
|
+
Logger.debug("Using existing connection")
|
|
24
|
+
return cls._redis_connection
|
|
25
|
+
|
|
26
|
+
# If connection does not exist, create a new one
|
|
27
|
+
cls._redis_connection = cls._new_redis_connection()
|
|
28
|
+
|
|
29
|
+
return cls._redis_connection
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def _new_redis_connection(cls) -> redis.client.Redis:
|
|
33
|
+
"""
|
|
34
|
+
Creates a new Redis connection
|
|
35
|
+
:return:
|
|
36
|
+
"""
|
|
37
|
+
cache_url = os.getenv("CACHE_URL", None)
|
|
38
|
+
|
|
39
|
+
if not cache_url:
|
|
40
|
+
raise Exception("redis key (CACHE_URL) not found in Environment Variables.")
|
|
41
|
+
|
|
42
|
+
_redis_connection = redis.Redis.from_url(cache_url)
|
|
43
|
+
|
|
44
|
+
if not _redis_connection:
|
|
45
|
+
raise Exception(f"Could not connect to Redis with URL: {cache_url}")
|
|
46
|
+
|
|
47
|
+
client_id = _redis_connection.client_id()
|
|
48
|
+
Logger.log(f"Created a new connection with {client_id=}")
|
|
49
|
+
return _redis_connection
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def setup_existing_connection(cls, existing_connection: Union[redis.client.Redis, None]) -> redis.client.Redis:
|
|
53
|
+
"""
|
|
54
|
+
Set the internal connection variable to existing connection provided in this function
|
|
55
|
+
The returned output should be saved to a global variable if it is intended to cache the
|
|
56
|
+
connection across lambda invokes
|
|
57
|
+
:param existing_connection:
|
|
58
|
+
:return:
|
|
59
|
+
"""
|
|
60
|
+
cls._redis_connection = existing_connection
|
|
61
|
+
return cls._get_redis_connection()
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def load_state(cls, state_key: str) -> dict:
|
|
65
|
+
"""
|
|
66
|
+
Load state from redis
|
|
67
|
+
:return:
|
|
68
|
+
"""
|
|
69
|
+
state = cls._get_redis_connection().get(state_key)
|
|
70
|
+
if state:
|
|
71
|
+
return json.loads(state)
|
|
72
|
+
|
|
73
|
+
return {}
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def save_state(cls, state: dict, state_key: str, **kwargs):
|
|
77
|
+
"""
|
|
78
|
+
Save the state to redis. Uses redis_ttl from kwargs if passed, fallbacks to env var then default 90 days
|
|
79
|
+
:param state:
|
|
80
|
+
:param state_key:
|
|
81
|
+
:return:
|
|
82
|
+
"""
|
|
83
|
+
# Using redis ttl from kwargs if passed in.
|
|
84
|
+
redis_ttl = kwargs.pop("redis_ttl", cls.redis_ttl)
|
|
85
|
+
cls._get_redis_connection().set(
|
|
86
|
+
state_key, value=json.dumps(state, cls=JsonEncoder, ignore_nan=True), ex=redis_ttl
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def delete_states(cls, state_keys: List[str]):
|
|
91
|
+
"""
|
|
92
|
+
Delete the states corresponding to given keys
|
|
93
|
+
:param state_keys: a list of redis keys
|
|
94
|
+
:return:
|
|
95
|
+
"""
|
|
96
|
+
cls._get_redis_connection().delete(*state_keys)
|
worker/state/mixins.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from typing import List, Optional, Union
|
|
5
|
+
|
|
6
|
+
import simplejson as json
|
|
7
|
+
|
|
8
|
+
from worker.data.json_encoder import JsonEncoder
|
|
9
|
+
from worker.mixins.logging import LoggingMixin
|
|
10
|
+
from worker.mixins.rollbar import RollbarMixin
|
|
11
|
+
from worker.state import RedisHandler
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RedisMixin(LoggingMixin, RollbarMixin):
|
|
15
|
+
def __init__(self, *args, **kwargs):
|
|
16
|
+
super().__init__(*args, **kwargs)
|
|
17
|
+
self.state = {}
|
|
18
|
+
|
|
19
|
+
@cached_property
|
|
20
|
+
def _size_limits(self):
|
|
21
|
+
state_storage_limit_fatal = os.getenv("STATE_STORAGE_CRITICAL_LIMIT", 1_000.0) # 1 MB
|
|
22
|
+
state_storage_limit_warning = os.getenv("STATE_STORAGE_WARNING_LIMIT", 100.0) # 100 kB
|
|
23
|
+
size_limits = {"fatal": float(state_storage_limit_fatal), "warning": float(state_storage_limit_warning)}
|
|
24
|
+
return size_limits
|
|
25
|
+
|
|
26
|
+
def load_state(self, state_key: Union[str, None] = None, raise_warnings: bool = False) -> dict:
|
|
27
|
+
"""
|
|
28
|
+
Load the state from redis
|
|
29
|
+
:param state_key:
|
|
30
|
+
:param raise_warnings: Set to True if size limits warnings should be printed
|
|
31
|
+
:return:
|
|
32
|
+
"""
|
|
33
|
+
if not state_key:
|
|
34
|
+
state_key = self.get_formatted_state_key(self.asset_id, self.app_key, self.module_key)
|
|
35
|
+
|
|
36
|
+
state = RedisHandler.load_state(state_key)
|
|
37
|
+
size_object = self.check_state_size(state, state_key, raise_warnings=raise_warnings)
|
|
38
|
+
|
|
39
|
+
self.debug(self.asset_id, f"Retrieved state of size {size_object} kb")
|
|
40
|
+
self.state = state
|
|
41
|
+
return state
|
|
42
|
+
|
|
43
|
+
def save_state(self, state_key: Union[str, None] = None, raise_warnings: bool = True, **kwargs) -> None:
|
|
44
|
+
"""
|
|
45
|
+
Save the state to redis
|
|
46
|
+
:param state_key:
|
|
47
|
+
:param raise_warnings: Set to True if size limits warnings should be printed
|
|
48
|
+
:return:
|
|
49
|
+
"""
|
|
50
|
+
if not state_key:
|
|
51
|
+
state_key = self.get_formatted_state_key(self.asset_id, self.app_key, self.module_key)
|
|
52
|
+
|
|
53
|
+
size_object = self.check_state_size(self.state, state_key, raise_warnings=raise_warnings)
|
|
54
|
+
RedisHandler.save_state(self.state, state_key, **kwargs)
|
|
55
|
+
self.debug(self.asset_id, f"Saved state of size {size_object} kb")
|
|
56
|
+
|
|
57
|
+
def delete_states(self, state_keys: Union[List[str], str]):
|
|
58
|
+
"""
|
|
59
|
+
Delete state for current module
|
|
60
|
+
:param state_keys:
|
|
61
|
+
:return:
|
|
62
|
+
"""
|
|
63
|
+
if not isinstance(state_keys, list):
|
|
64
|
+
state_keys = [state_keys]
|
|
65
|
+
|
|
66
|
+
RedisHandler.delete_states(state_keys)
|
|
67
|
+
self.debug(None, "Deleted state from Redis")
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def get_formatted_state_key(asset_id: int, app_key: str, module_key: Optional[str] = None) -> str:
|
|
71
|
+
"""
|
|
72
|
+
Returns the state key in Corva naming format
|
|
73
|
+
:param asset_id:
|
|
74
|
+
:param module_key:
|
|
75
|
+
:param app_key:
|
|
76
|
+
:return:
|
|
77
|
+
"""
|
|
78
|
+
state_key = "corva/{0}.{1}".format(asset_id, app_key)
|
|
79
|
+
|
|
80
|
+
if module_key:
|
|
81
|
+
return "{0}.{1}".format(state_key, module_key)
|
|
82
|
+
|
|
83
|
+
return state_key
|
|
84
|
+
|
|
85
|
+
def check_state_size(self, state, state_key, raise_warnings=True):
|
|
86
|
+
"""
|
|
87
|
+
Check the size of the state dictionary and generate warnings if necessary
|
|
88
|
+
:param state:
|
|
89
|
+
:param state_key:
|
|
90
|
+
:param raise_warnings:
|
|
91
|
+
:return:
|
|
92
|
+
"""
|
|
93
|
+
size_object = sys.getsizeof(json.dumps(state, cls=JsonEncoder, ignore_nan=True)) / 1024
|
|
94
|
+
|
|
95
|
+
if not raise_warnings:
|
|
96
|
+
return size_object
|
|
97
|
+
|
|
98
|
+
size_limit = self._size_limits["fatal"]
|
|
99
|
+
if size_object > size_limit:
|
|
100
|
+
message = f"State_key {state_key} is of size {size_object} kb > {size_limit} kb."
|
|
101
|
+
self.fatal(self.asset_id, message)
|
|
102
|
+
self.track_message(message, level="critical")
|
|
103
|
+
return size_object
|
|
104
|
+
|
|
105
|
+
size_limit = self._size_limits["warning"]
|
|
106
|
+
if size_object > size_limit:
|
|
107
|
+
message = f"State_key {state_key} is of size {size_object} kb > {size_limit} kb."
|
|
108
|
+
self.warn(self.asset_id, message)
|
|
109
|
+
return size_object
|
|
110
|
+
|
|
111
|
+
return size_object
|
worker/state/state.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import simplejson as json
|
|
2
|
+
|
|
3
|
+
from worker.data.json_encoder import JsonEncoder
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class State(dict):
|
|
7
|
+
def __init__(self, fields: dict, state: dict = None, *args, **kwargs):
|
|
8
|
+
"""
|
|
9
|
+
:param fields: is a dictionary that holds the state items. The value of each key is the data type.
|
|
10
|
+
:param state:
|
|
11
|
+
:param args:
|
|
12
|
+
:param kwargs:
|
|
13
|
+
"""
|
|
14
|
+
self.fields = fields
|
|
15
|
+
|
|
16
|
+
super().__init__(*args, **kwargs)
|
|
17
|
+
if not state:
|
|
18
|
+
state = {}
|
|
19
|
+
|
|
20
|
+
for field, conversion in self.fields.items():
|
|
21
|
+
self.set_field_if_exists(field, state, conversion)
|
|
22
|
+
|
|
23
|
+
def set_field_if_exists(self, field, state, conversion):
|
|
24
|
+
if not field:
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
value = None
|
|
28
|
+
|
|
29
|
+
if field in state and state[field] is not None:
|
|
30
|
+
value = state[field]
|
|
31
|
+
if conversion:
|
|
32
|
+
value = conversion(value)
|
|
33
|
+
|
|
34
|
+
self[field] = value
|
|
35
|
+
|
|
36
|
+
def to_json(self):
|
|
37
|
+
output = {field: self.get(field, None) for field in self.fields.keys()}
|
|
38
|
+
output = json.dumps(output, cls=JsonEncoder, ignore_nan=True)
|
|
39
|
+
|
|
40
|
+
return output
|
|
41
|
+
|
|
42
|
+
def get(self, key, default=None):
|
|
43
|
+
value = super().get(key, None)
|
|
44
|
+
if value is None:
|
|
45
|
+
value = default
|
|
46
|
+
return value
|
worker/test/__init__.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This file is used to trigger lambda functions locally and test
|
|
3
|
+
the results of the app on actual data to make sure of the results.
|
|
4
|
+
An example to use this functionality is like the one shows on
|
|
5
|
+
'app_test.py' and then that file can run in command line.
|
|
6
|
+
|
|
7
|
+
Note: make sure you run your tests on QA environment.
|
|
8
|
+
|
|
9
|
+
== app_test.py file
|
|
10
|
+
# added the main directory as the first path
|
|
11
|
+
from os.path import dirname
|
|
12
|
+
import sys
|
|
13
|
+
parent = dirname(dirname(__file__))
|
|
14
|
+
sys.path.insert(0, parent)
|
|
15
|
+
|
|
16
|
+
if __name__ == '__main__':
|
|
17
|
+
collections = ['collection_to_delete']
|
|
18
|
+
app = AppTestRun(lambda_function.lambda_handler, collections)
|
|
19
|
+
app.run()
|
|
20
|
+
|
|
21
|
+
> python app_test -a 16886 -d True
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
from typing import List
|
|
26
|
+
|
|
27
|
+
from dotenv import load_dotenv
|
|
28
|
+
from tqdm import tqdm
|
|
29
|
+
|
|
30
|
+
from worker import constants
|
|
31
|
+
from worker.data.operations import (
|
|
32
|
+
delete_collection_data_of_asset_id,
|
|
33
|
+
gather_data_for_period,
|
|
34
|
+
get_one_data_record,
|
|
35
|
+
point_main_envs,
|
|
36
|
+
)
|
|
37
|
+
from worker.state.mixins import RedisMixin
|
|
38
|
+
from worker.test.utils import create_scheduler_events, get_last_processed_timestamp
|
|
39
|
+
|
|
40
|
+
load_dotenv(override=True)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def strtobool(val):
|
|
44
|
+
"""Convert a string representation of truth to true (1) or false (0).
|
|
45
|
+
|
|
46
|
+
True values are 'y', 'yes', 't', 'true', 'on', and '1'.
|
|
47
|
+
False values are 'n', 'no', 'f', 'false', 'off', and '0'.
|
|
48
|
+
Raises ValueError if 'val' is anything else.
|
|
49
|
+
"""
|
|
50
|
+
val = val.lower()
|
|
51
|
+
if val in ("y", "yes", "t", "true", "on", "1"):
|
|
52
|
+
return 1
|
|
53
|
+
elif val in ("n", "no", "f", "false", "off", "0"):
|
|
54
|
+
return 0
|
|
55
|
+
else:
|
|
56
|
+
raise ValueError("invalid truth value %r" % (val,))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def generate_runner_parser():
|
|
60
|
+
"""
|
|
61
|
+
Creating the supporting arguments
|
|
62
|
+
:return:
|
|
63
|
+
"""
|
|
64
|
+
parser = argparse.ArgumentParser(description="Run your tests on an asset.")
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"-v",
|
|
67
|
+
"--environment",
|
|
68
|
+
"--env",
|
|
69
|
+
type=str,
|
|
70
|
+
required=False,
|
|
71
|
+
help="environment, options: 'local', 'qa', 'staging', 'production'",
|
|
72
|
+
)
|
|
73
|
+
parser.add_argument("-a", "--asset_id", "--id", type=int, required=True, help="set asset_id")
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
"-s", "--start_timestamp", "--start", type=int, required=False, default=None, help="start timestamp"
|
|
76
|
+
)
|
|
77
|
+
parser.add_argument("-e", "--end_timestamp", "--end", type=int, required=False, default=None, help="end timestamp")
|
|
78
|
+
parser.add_argument(
|
|
79
|
+
"-i",
|
|
80
|
+
"--timestep",
|
|
81
|
+
"--step",
|
|
82
|
+
type=int,
|
|
83
|
+
required=False,
|
|
84
|
+
default=60,
|
|
85
|
+
help="trigger the lambda function once every step",
|
|
86
|
+
)
|
|
87
|
+
parser.add_argument(
|
|
88
|
+
"-d",
|
|
89
|
+
"--to_delete",
|
|
90
|
+
"--delete",
|
|
91
|
+
type=strtobool,
|
|
92
|
+
required=False,
|
|
93
|
+
default=False,
|
|
94
|
+
help="to delete the state and data",
|
|
95
|
+
)
|
|
96
|
+
return parser
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class AppTestRun:
|
|
100
|
+
def __init__(self, lambda_handler, collections: List[str], args: List[str] = None):
|
|
101
|
+
"""
|
|
102
|
+
:param lambda_handler: lambda handler function to run
|
|
103
|
+
:param collections: collections to erase in case to_delete is on
|
|
104
|
+
:param constants: constants module of the whole app, if there is only one app in the whole repository
|
|
105
|
+
pass nothing and the code takes care of that
|
|
106
|
+
:param args: arguments for the run
|
|
107
|
+
"""
|
|
108
|
+
self.lambda_handler = lambda_handler
|
|
109
|
+
self.collections = collections
|
|
110
|
+
|
|
111
|
+
self.event_type = None
|
|
112
|
+
self.progress = None
|
|
113
|
+
|
|
114
|
+
if args is None:
|
|
115
|
+
parser = generate_runner_parser()
|
|
116
|
+
args = parser.parse_args()
|
|
117
|
+
|
|
118
|
+
self.initialize(args)
|
|
119
|
+
|
|
120
|
+
def initialize(self, args):
|
|
121
|
+
environment = args.environment
|
|
122
|
+
|
|
123
|
+
# directing main environment variables to the provided env: API_KEY, API_ROOT_URL, CACHE_URL
|
|
124
|
+
point_main_envs(environment)
|
|
125
|
+
|
|
126
|
+
asset_id = args.asset_id
|
|
127
|
+
|
|
128
|
+
start_timestamp = args.start_timestamp
|
|
129
|
+
end_timestamp = args.end_timestamp
|
|
130
|
+
step = args.timestep
|
|
131
|
+
self.event_type = constants.get("global.event-type")
|
|
132
|
+
to_delete = args.to_delete
|
|
133
|
+
|
|
134
|
+
state_keys = construct_state_keys(asset_id)
|
|
135
|
+
|
|
136
|
+
if not start_timestamp:
|
|
137
|
+
start_timestamp = get_one_data_record(asset_id, timestamp_sort=+1).get("timestamp")
|
|
138
|
+
|
|
139
|
+
if not end_timestamp:
|
|
140
|
+
end_timestamp = get_one_data_record(asset_id, timestamp_sort=-1).get("timestamp")
|
|
141
|
+
|
|
142
|
+
if to_delete:
|
|
143
|
+
delete_state_data_of_asset_id(state_keys)
|
|
144
|
+
delete_collection_data_of_asset_id(asset_id, self.collections)
|
|
145
|
+
print("Deleted relevant Redis states and collections for this asset!")
|
|
146
|
+
|
|
147
|
+
start_timestamp = get_last_processed_timestamp(asset_id, state_keys[0]) or start_timestamp
|
|
148
|
+
print(f"asset_id: {asset_id}, timestamp interval: [{start_timestamp}, {end_timestamp}]")
|
|
149
|
+
|
|
150
|
+
events = create_scheduler_events(asset_id, start_timestamp, end_timestamp, step)
|
|
151
|
+
|
|
152
|
+
self.progress = tqdm(events, ncols=150)
|
|
153
|
+
|
|
154
|
+
def run(self):
|
|
155
|
+
print("\nRunning the main module started ...")
|
|
156
|
+
for event in self.progress:
|
|
157
|
+
schedule_time = str(int(event[0][0]["schedule_start"] / 1000))
|
|
158
|
+
if self.event_type == "wits_stream":
|
|
159
|
+
wits = gather_data_for_period(
|
|
160
|
+
int(event[0][0]["asset_id"]),
|
|
161
|
+
int(event[0][0]["schedule_start"] / 1000),
|
|
162
|
+
int(event[0][0]["schedule_end"] / 1000),
|
|
163
|
+
)
|
|
164
|
+
if not wits:
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
event = [{"metadata": {}, "records": wits}]
|
|
168
|
+
|
|
169
|
+
self.lambda_handler(event, None)
|
|
170
|
+
self.progress.set_description(schedule_time)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def construct_state_keys(asset_id: int) -> List[str]:
|
|
174
|
+
"""
|
|
175
|
+
Get the constants dict of an app and construct the storage keys.
|
|
176
|
+
Note: the first key is the global app key.
|
|
177
|
+
:param asset_id: well asset id
|
|
178
|
+
:return: a list of storage keys
|
|
179
|
+
"""
|
|
180
|
+
app_key = constants.get("global.app-key")
|
|
181
|
+
module_keys = list(constants.get(app_key).keys())
|
|
182
|
+
state_app = RedisMixin()
|
|
183
|
+
global_state_key = state_app.get_formatted_state_key(asset_id, app_key)
|
|
184
|
+
|
|
185
|
+
module_state_keys = [state_app.get_formatted_state_key(asset_id, app_key, module) for module in module_keys]
|
|
186
|
+
|
|
187
|
+
return [global_state_key, *module_state_keys]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def delete_state_data_of_asset_id(state_keys: List[str]):
|
|
191
|
+
"""
|
|
192
|
+
Delete the state from given storage type
|
|
193
|
+
:param state_keys:
|
|
194
|
+
:return:
|
|
195
|
+
"""
|
|
196
|
+
RedisMixin().delete_states(state_keys)
|
|
File without changes
|