corva-worker-python 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- corva_worker_python-2.0.0.dist-info/METADATA +30 -0
- corva_worker_python-2.0.0.dist-info/RECORD +63 -0
- corva_worker_python-2.0.0.dist-info/WHEEL +5 -0
- corva_worker_python-2.0.0.dist-info/top_level.txt +1 -0
- worker/__init__.py +5 -0
- worker/app/__init__.py +291 -0
- worker/app/modules/__init__.py +265 -0
- worker/app/modules/activity_module.py +141 -0
- worker/app/modules/connection_module.py +21 -0
- worker/app/modules/depth_activity_module.py +21 -0
- worker/app/modules/scheduler.py +44 -0
- worker/app/modules/time_activity_module.py +21 -0
- worker/app/modules/trigger.py +43 -0
- worker/constants.py +51 -0
- worker/data/__init__.py +0 -0
- worker/data/activity/__init__.py +132 -0
- worker/data/activity/activity_grouping.py +242 -0
- worker/data/alert.py +89 -0
- worker/data/api.py +155 -0
- worker/data/enums.py +141 -0
- worker/data/json_encoder.py +18 -0
- worker/data/math.py +104 -0
- worker/data/operations.py +477 -0
- worker/data/serialization.py +110 -0
- worker/data/task_handler.py +82 -0
- worker/data/two_way_dict.py +17 -0
- worker/data/unit_conversions.py +5 -0
- worker/data/wits.py +323 -0
- worker/event/__init__.py +53 -0
- worker/event/event_handler.py +90 -0
- worker/event/scheduled.py +64 -0
- worker/event/stream.py +48 -0
- worker/exceptions.py +26 -0
- worker/mixins/__init__.py +0 -0
- worker/mixins/logging.py +119 -0
- worker/mixins/rollbar.py +87 -0
- worker/partial_rerun_merge/__init__.py +0 -0
- worker/partial_rerun_merge/merge.py +500 -0
- worker/partial_rerun_merge/models.py +91 -0
- worker/partial_rerun_merge/progress.py +241 -0
- worker/state/__init__.py +96 -0
- worker/state/mixins.py +111 -0
- worker/state/state.py +46 -0
- worker/test/__init__.py +3 -0
- worker/test/lambda_function_test_run.py +196 -0
- worker/test/local_testing/__init__.py +0 -0
- worker/test/local_testing/to_local_transfer.py +360 -0
- worker/test/utils.py +51 -0
- worker/wellbore/__init__.py +0 -0
- worker/wellbore/factory.py +496 -0
- worker/wellbore/measured_depth_finder.py +12 -0
- worker/wellbore/model/__init__.py +0 -0
- worker/wellbore/model/ann.py +103 -0
- worker/wellbore/model/annulus.py +113 -0
- worker/wellbore/model/drillstring.py +196 -0
- worker/wellbore/model/drillstring_components.py +439 -0
- worker/wellbore/model/element.py +102 -0
- worker/wellbore/model/enums.py +92 -0
- worker/wellbore/model/hole.py +297 -0
- worker/wellbore/model/hole_section.py +51 -0
- worker/wellbore/model/riser.py +22 -0
- worker/wellbore/sections_mixin.py +64 -0
- worker/wellbore/wellbore.py +289 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import simplejson as json
|
|
4
|
+
|
|
5
|
+
from worker import API
|
|
6
|
+
from worker.data.enums import LambdaStates
|
|
7
|
+
from worker.data.json_encoder import JsonEncoder
|
|
8
|
+
from worker.mixins.logging import LoggingMixin
|
|
9
|
+
from worker.mixins.rollbar import RollbarMixin
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TaskHandler(LoggingMixin, RollbarMixin):
|
|
13
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
14
|
+
self._api = kwargs.pop("api", API())
|
|
15
|
+
super().__init__(*args, **kwargs)
|
|
16
|
+
self.event = {}
|
|
17
|
+
self.task_id = ""
|
|
18
|
+
self.task = {}
|
|
19
|
+
|
|
20
|
+
def process(self, *args, **kwargs):
|
|
21
|
+
"""
|
|
22
|
+
Provides a placeholder for actual logic code for each app.
|
|
23
|
+
e.g. Instantiate the app and run it
|
|
24
|
+
This method must be overriden by each app
|
|
25
|
+
:return:
|
|
26
|
+
"""
|
|
27
|
+
raise NotImplementedError("process method must be defined before instantiating this class")
|
|
28
|
+
|
|
29
|
+
def run_process(self, event: dict, *args, **kwargs):
|
|
30
|
+
"""
|
|
31
|
+
Runs the task handler workflow.
|
|
32
|
+
This method would be called after instantiation of the TaskHandler class
|
|
33
|
+
:param event:
|
|
34
|
+
:return:
|
|
35
|
+
"""
|
|
36
|
+
self.event = event
|
|
37
|
+
self.task_id = self.event.get("task_id")
|
|
38
|
+
self.get_task(self.task_id)
|
|
39
|
+
|
|
40
|
+
# AWS_EXECUTION_ENV is a reserved environment variable it will hold the
|
|
41
|
+
# runtime environment eg python3.6, java11 etc.
|
|
42
|
+
# We do not have to set it on our lambda functions, it will be set by AWS
|
|
43
|
+
# We can use this to check if we are running on hosted or local
|
|
44
|
+
is_hosted = os.environ.get("AWS_EXECUTION_ENV", False)
|
|
45
|
+
|
|
46
|
+
test_mode = self.task.get("properties", {}).get("test_mode", False)
|
|
47
|
+
|
|
48
|
+
if test_mode and is_hosted:
|
|
49
|
+
asset_id = self.task.get("asset_id", 0)
|
|
50
|
+
self.warn(asset_id, f"Not processing on AWS due to test mode. task_id: {self.task_id} maybe running.")
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
data = self.process(self.event, *args, **kwargs)
|
|
55
|
+
request_body = {"task": {"payload": {"data": data, "message": LambdaStates.SUCCEEDED.value}}}
|
|
56
|
+
self.update_task(self.task_id, "success", request_body)
|
|
57
|
+
return data
|
|
58
|
+
|
|
59
|
+
except Exception as exception:
|
|
60
|
+
request_body = {"task": {"fail_reason": f"Message: {LambdaStates.FAILED.value}, Exception: {exception}"}}
|
|
61
|
+
self.update_task(self.task_id, "fail", request_body)
|
|
62
|
+
raise exception
|
|
63
|
+
|
|
64
|
+
def get_task(self, task_id: str) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Gets the task from API endpoint for the given task_id
|
|
67
|
+
:param task_id:
|
|
68
|
+
:return:
|
|
69
|
+
"""
|
|
70
|
+
path = "/v2/tasks/{0}".format(task_id)
|
|
71
|
+
self.task = self._api.get(path).data
|
|
72
|
+
|
|
73
|
+
def update_task(self, task_id: str, status: str, request_body: dict) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Updates the task status on the API endpoint in case of success or failure
|
|
76
|
+
:param task_id:
|
|
77
|
+
:param status: [success, fail]
|
|
78
|
+
:param request_body:
|
|
79
|
+
:return:
|
|
80
|
+
"""
|
|
81
|
+
path = "/v2/tasks/{0}/{1}".format(task_id, status)
|
|
82
|
+
self.task = self._api.put(path, data=json.dumps(request_body, cls=JsonEncoder, ignore_nan=True)).data
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
class TwoWayDict(dict):
|
|
2
|
+
def __setitem__(self, key, value):
|
|
3
|
+
# Remove any previous connections with these values
|
|
4
|
+
if key in self:
|
|
5
|
+
del self[key]
|
|
6
|
+
if value in self:
|
|
7
|
+
del self[value]
|
|
8
|
+
dict.__setitem__(self, key, value)
|
|
9
|
+
dict.__setitem__(self, value, key)
|
|
10
|
+
|
|
11
|
+
def __delitem__(self, key):
|
|
12
|
+
dict.__delitem__(self, self[key])
|
|
13
|
+
dict.__delitem__(self, key)
|
|
14
|
+
|
|
15
|
+
def __len__(self):
|
|
16
|
+
"""Returns the number of connections"""
|
|
17
|
+
return dict.__len__(self) // 2
|
worker/data/wits.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
from typing import Dict, Union
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from worker.data import operations
|
|
6
|
+
from worker.data.activity import Activity
|
|
7
|
+
from worker.data.enums import ChannelStatus, DataStatus
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Channel(float):
|
|
11
|
+
def __new__(cls, value=None, status=None):
|
|
12
|
+
# If only a valid value is passed, data status is automatically set to VALID
|
|
13
|
+
# If None is passed as value, then data status is automatically set to MISSING
|
|
14
|
+
# If both value and data status are passed, then it uses the passed data status
|
|
15
|
+
if value is None:
|
|
16
|
+
return None
|
|
17
|
+
|
|
18
|
+
if not isinstance(value, float):
|
|
19
|
+
try:
|
|
20
|
+
value = float(value)
|
|
21
|
+
except ValueError or TypeError:
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
if value is not None and not status:
|
|
25
|
+
status = DataStatus.VALID
|
|
26
|
+
|
|
27
|
+
ch = float.__new__(cls, value)
|
|
28
|
+
ch._status = status
|
|
29
|
+
return ch
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def status(self) -> DataStatus:
|
|
33
|
+
"""
|
|
34
|
+
- Status can be valid of missing
|
|
35
|
+
- Data status is only available for non None wits attributes
|
|
36
|
+
- Missing represents a value that has been forward filled or interpolated
|
|
37
|
+
|
|
38
|
+
:return: DataStatus
|
|
39
|
+
"""
|
|
40
|
+
return self._status
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class WITS:
|
|
44
|
+
"""
|
|
45
|
+
- Create a wits object by WITS.set_wits(dataset: Union[list, dict])
|
|
46
|
+
- Access any wits data attribute by wits.hole_depth
|
|
47
|
+
- Access any data status of a wits data attribute by wits.hole_depth.status
|
|
48
|
+
- Data status is only available for non None wits attributes
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
data = {
|
|
52
|
+
"entry_at": int,
|
|
53
|
+
"hole_depth": Channel,
|
|
54
|
+
"bit_depth": Channel,
|
|
55
|
+
"block_height": Channel,
|
|
56
|
+
"hook_load": Channel,
|
|
57
|
+
"weight_on_bit": Channel,
|
|
58
|
+
"rop": Channel,
|
|
59
|
+
"rotary_rpm": Channel,
|
|
60
|
+
"rotary_torque": Channel,
|
|
61
|
+
"mud_flow_in": Channel,
|
|
62
|
+
"mud_flow_out_percent": Channel,
|
|
63
|
+
"standpipe_pressure": Channel,
|
|
64
|
+
"diff_press": Channel,
|
|
65
|
+
"pump_spm_1": Channel,
|
|
66
|
+
"pump_spm_2": Channel,
|
|
67
|
+
"pump_spm_3": Channel,
|
|
68
|
+
"pump_spm_4": Channel,
|
|
69
|
+
"pump_spm_total": Channel,
|
|
70
|
+
"trip_tank_volume_1": Channel,
|
|
71
|
+
"trip_tank_volume_2": Channel,
|
|
72
|
+
"active_pit_volume": Channel,
|
|
73
|
+
"lag_depth": Channel,
|
|
74
|
+
"state": Activity,
|
|
75
|
+
"boost_pump_flow_in": Channel,
|
|
76
|
+
"annular_back_pressure": Channel,
|
|
77
|
+
}
|
|
78
|
+
metadata = {"drillstring": str}
|
|
79
|
+
spm_channels = ["pump_spm_1", "pump_spm_2", "pump_spm_3", "pump_spm_4"]
|
|
80
|
+
status = {}
|
|
81
|
+
|
|
82
|
+
def __init__(self, wits_json: dict = None, **kwargs):
|
|
83
|
+
if not wits_json:
|
|
84
|
+
wits_json = {}
|
|
85
|
+
|
|
86
|
+
# Serialize functionality in Worker unpacks all the attributes. If that's how the class is called, repack.
|
|
87
|
+
if kwargs:
|
|
88
|
+
wits_json.update(dict(kwargs))
|
|
89
|
+
wits_json.pop("_wits_", None)
|
|
90
|
+
|
|
91
|
+
self.wits_json = wits_json
|
|
92
|
+
|
|
93
|
+
data = wits_json.get("data", {})
|
|
94
|
+
data_raw = wits_json.get("data_raw", {})
|
|
95
|
+
|
|
96
|
+
metadata = wits_json.get("metadata", {})
|
|
97
|
+
status = wits_json.get("status", {})
|
|
98
|
+
|
|
99
|
+
for key in self.data.keys():
|
|
100
|
+
# If type is not Channel or if the value is None, just setattr
|
|
101
|
+
if self.data.get(key) != Channel or data.get(key, None) is None:
|
|
102
|
+
setattr(self, key, data.get(key, None))
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
key_data_status = DataStatus.VALID.value
|
|
106
|
+
# If key is in data_raw, there are 2 possibilities - Missing (Value is None or -999.25) or Overridden
|
|
107
|
+
if key in data_raw:
|
|
108
|
+
key_data_status = DataStatus.OVERRIDDEN.value
|
|
109
|
+
# If key in data_raw and value is null (None or -999.25), then we just setattr None
|
|
110
|
+
if operations.is_null(data_raw.get(key, None)):
|
|
111
|
+
setattr(self, key, data.get(key, None))
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
# If type is Channel and (key not in data_raw or (key in data_raw and value is not null))
|
|
115
|
+
setattr(self, key, Channel(data.get(key), DataStatus(data.get(f"{key}_data_status", key_data_status))))
|
|
116
|
+
|
|
117
|
+
for key in self.metadata.keys():
|
|
118
|
+
setattr(self, key, metadata.get(key, None))
|
|
119
|
+
for key in self.status.keys():
|
|
120
|
+
setattr(self, key, status.get(key, None))
|
|
121
|
+
|
|
122
|
+
setattr(self, "timestamp", wits_json.get("data").get("entry_at"))
|
|
123
|
+
setattr(self, "timestamp_read", wits_json.get("timestamp_read"))
|
|
124
|
+
|
|
125
|
+
if getattr(self, "pump_spm_total") is None:
|
|
126
|
+
self.set_pump_spm_total()
|
|
127
|
+
|
|
128
|
+
setattr(self, "total_trip_tank_volume", self.get_total_trip_tank_volume())
|
|
129
|
+
|
|
130
|
+
def __getattr__(self, item):
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
def __setattr__(self, key, value):
|
|
134
|
+
"""
|
|
135
|
+
When trying to set an attribute it automatically tries to typecast the value.
|
|
136
|
+
:param key: key
|
|
137
|
+
:param value: value
|
|
138
|
+
:return:
|
|
139
|
+
"""
|
|
140
|
+
if key not in {**self.data, **self.status}.keys():
|
|
141
|
+
self.__dict__[key] = value
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
data_type = {**self.data, **self.status}.get(key)
|
|
145
|
+
try:
|
|
146
|
+
if not isinstance(value, Channel):
|
|
147
|
+
value = data_type(value)
|
|
148
|
+
self.__dict__[key] = value
|
|
149
|
+
except TypeError:
|
|
150
|
+
self.__dict__[key] = None
|
|
151
|
+
except ValueError:
|
|
152
|
+
self.__dict__[key] = None
|
|
153
|
+
|
|
154
|
+
def __iter__(self):
|
|
155
|
+
yield self
|
|
156
|
+
|
|
157
|
+
@classmethod
|
|
158
|
+
def set_wits(cls, dataset: Union[list, dict]):
|
|
159
|
+
"""
|
|
160
|
+
Class method to create WITS objects using a dict or a list of dicts
|
|
161
|
+
|
|
162
|
+
:param dataset: wits record or a list of wits records
|
|
163
|
+
:return: WITS object or a list of WITS objects
|
|
164
|
+
"""
|
|
165
|
+
if isinstance(dataset, dict):
|
|
166
|
+
return cls(dataset)
|
|
167
|
+
|
|
168
|
+
if isinstance(dataset, list):
|
|
169
|
+
return [cls(record) for record in dataset]
|
|
170
|
+
|
|
171
|
+
def get_as_dict(self) -> [None, dict]:
|
|
172
|
+
"""
|
|
173
|
+
Get the wits record as a dictionary
|
|
174
|
+
|
|
175
|
+
:return: wits record as a dict
|
|
176
|
+
"""
|
|
177
|
+
if not self.wits_json:
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
self.wits_json.pop("data_raw", None)
|
|
181
|
+
self.wits_json["data"] = {key: getattr(self, key) for key in self.data}
|
|
182
|
+
self.wits_json["data"].update(
|
|
183
|
+
{
|
|
184
|
+
f"{key}_data_status": getattr(self, key).status.value
|
|
185
|
+
for key in self.data
|
|
186
|
+
if self.data.get(key) == Channel and getattr(self, key) is not None
|
|
187
|
+
}
|
|
188
|
+
)
|
|
189
|
+
# convert state from Activity type to str
|
|
190
|
+
self.wits_json["data"]["state"] = self.wits_json["data"]["state"].value
|
|
191
|
+
self.wits_json["status"] = {key: getattr(self, key) for key in self.status}
|
|
192
|
+
|
|
193
|
+
return self.wits_json
|
|
194
|
+
|
|
195
|
+
def get_wits_data(self, columns=None) -> dict:
|
|
196
|
+
"""
|
|
197
|
+
Get the data part of the wits record as a dictionary
|
|
198
|
+
|
|
199
|
+
:param columns: Specify if you only want a select list of columns
|
|
200
|
+
:return: data part of wits record as a dict
|
|
201
|
+
"""
|
|
202
|
+
if not columns:
|
|
203
|
+
data = {key: getattr(self, key) for key in self.data}
|
|
204
|
+
return data
|
|
205
|
+
|
|
206
|
+
data = {key: getattr(self, key) for key in columns}
|
|
207
|
+
return data
|
|
208
|
+
|
|
209
|
+
def spm_status(self, low_spm_threshold: float) -> [ChannelStatus, None]:
|
|
210
|
+
"""
|
|
211
|
+
Get spm status. Compares total of all SPM channels against SPM threshold.
|
|
212
|
+
|
|
213
|
+
:param low_spm_threshold: threshold
|
|
214
|
+
:return: None if channels are unavailable, OFF if less than threshold and ON if greater
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
# If all SPM channels are empty, return None
|
|
218
|
+
if not any(operations.is_number(getattr(self, spm_channel)) for spm_channel in self.spm_channels):
|
|
219
|
+
return ChannelStatus.MISSING
|
|
220
|
+
|
|
221
|
+
if getattr(self, "pump_spm_total") > low_spm_threshold:
|
|
222
|
+
return ChannelStatus.ON
|
|
223
|
+
|
|
224
|
+
return ChannelStatus.OFF
|
|
225
|
+
|
|
226
|
+
def mud_flow_in_status(self, low_mud_flow_in_threshold: float) -> [ChannelStatus, None]:
|
|
227
|
+
"""
|
|
228
|
+
Get flow rate status. Compares mud flow in channel against flow rate threshold.
|
|
229
|
+
|
|
230
|
+
:param low_mud_flow_in_threshold: threshold
|
|
231
|
+
:return: None if channel is unavailable, OFF if less than threshold and ON if greater
|
|
232
|
+
"""
|
|
233
|
+
if not operations.is_number(self.mud_flow_in):
|
|
234
|
+
return ChannelStatus.MISSING
|
|
235
|
+
|
|
236
|
+
if self.mud_flow_in > low_mud_flow_in_threshold:
|
|
237
|
+
return ChannelStatus.ON
|
|
238
|
+
|
|
239
|
+
return ChannelStatus.OFF
|
|
240
|
+
|
|
241
|
+
def check_channel_availability(self, required_wits_channels: list) -> bool:
|
|
242
|
+
"""
|
|
243
|
+
This function checks if all the required channels are available in the wits record
|
|
244
|
+
|
|
245
|
+
:param required_wits_channels: List of required channels
|
|
246
|
+
:return: Boolean value. True if all the required channels are available, else False
|
|
247
|
+
"""
|
|
248
|
+
return all(
|
|
249
|
+
operations.is_in_and_not_none(self.get_wits_data(required_wits_channels), channel)
|
|
250
|
+
for channel in required_wits_channels
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def set_pump_spm_total(self):
|
|
254
|
+
"""This function sets the pump_spm_total attribute value based on individual spm values"""
|
|
255
|
+
|
|
256
|
+
pump_spm_total = sum(
|
|
257
|
+
getattr(self, spm_channel)
|
|
258
|
+
for spm_channel in self.spm_channels
|
|
259
|
+
if (getattr(self, spm_channel) and getattr(self, spm_channel) >= 0)
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
setattr(self, "pump_spm_total", pump_spm_total)
|
|
263
|
+
|
|
264
|
+
def get_total_trip_tank_volume(self) -> float:
|
|
265
|
+
"""
|
|
266
|
+
This function returns the total trip tank volume if more than one trip tanks are available
|
|
267
|
+
|
|
268
|
+
:return: Total trip tank volume
|
|
269
|
+
"""
|
|
270
|
+
ttk1 = self.trip_tank_volume_1 or 0
|
|
271
|
+
ttk2 = self.trip_tank_volume_2 or 0
|
|
272
|
+
|
|
273
|
+
return ttk1 + ttk2
|
|
274
|
+
|
|
275
|
+
def validate_range(self, channel_range_map: Dict[str, Dict]):
|
|
276
|
+
"""
|
|
277
|
+
This method can be used to validate the range of the given channels and apply the proper operation on the data
|
|
278
|
+
:param channel_range_map: a dictionary of channel to operation and range ([min, max]) mapping
|
|
279
|
+
example:
|
|
280
|
+
{
|
|
281
|
+
"mud_flow_out_percent": {"range": [0, 100], "operation": "clip"}
|
|
282
|
+
}
|
|
283
|
+
options for 'operation': ['clip', None]
|
|
284
|
+
:return: None,
|
|
285
|
+
"""
|
|
286
|
+
for channel, properties in channel_range_map.items():
|
|
287
|
+
orig_value = getattr(self, channel, None)
|
|
288
|
+
if orig_value is None:
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
valid_range = properties.get("range")
|
|
292
|
+
operation = properties.get("operation", None)
|
|
293
|
+
|
|
294
|
+
new_value = None
|
|
295
|
+
|
|
296
|
+
if operation == "clip":
|
|
297
|
+
new_value = np.clip(orig_value, *valid_range)
|
|
298
|
+
elif operation is None:
|
|
299
|
+
if valid_range[0] <= orig_value <= valid_range[-1]:
|
|
300
|
+
new_value = orig_value
|
|
301
|
+
else:
|
|
302
|
+
raise ValueError("Invalid operation provided!")
|
|
303
|
+
|
|
304
|
+
setattr(self, channel, new_value)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def serialize(obj: Union[list, WITS]) -> Union[list, dict, None]:
|
|
308
|
+
"""
|
|
309
|
+
This function is used to serialize a WITS object or a list of WITS objects.
|
|
310
|
+
Use WITS.set_wits(dataset: Union[list, dict]) to deserialize.
|
|
311
|
+
|
|
312
|
+
:param obj: WITS object(s) to serialize.
|
|
313
|
+
:return: return a serialized object
|
|
314
|
+
"""
|
|
315
|
+
if isinstance(obj, list):
|
|
316
|
+
s_obj = [each.get_as_dict() for each in obj]
|
|
317
|
+
return s_obj
|
|
318
|
+
|
|
319
|
+
if isinstance(obj, WITS):
|
|
320
|
+
s_obj = obj.get_as_dict()
|
|
321
|
+
return s_obj
|
|
322
|
+
|
|
323
|
+
return None
|
worker/event/__init__.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from functools import cached_property
|
|
3
|
+
|
|
4
|
+
import simplejson as json
|
|
5
|
+
|
|
6
|
+
from worker import constants
|
|
7
|
+
from worker.data.enums import EventType
|
|
8
|
+
from worker.data.json_encoder import JsonEncoder
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Event(abc.ABC):
|
|
12
|
+
"""An event class that holds the events of a single asset_id."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, event_type: EventType):
|
|
15
|
+
self.event_type: EventType = event_type
|
|
16
|
+
|
|
17
|
+
self.asset_id: int = None
|
|
18
|
+
self.records = []
|
|
19
|
+
self.app_connection_id = None
|
|
20
|
+
|
|
21
|
+
def add_records(self, new_records: list) -> None:
|
|
22
|
+
self.records.extend(new_records)
|
|
23
|
+
|
|
24
|
+
@abc.abstractmethod
|
|
25
|
+
def complete_event(self, api) -> None:
|
|
26
|
+
raise NotImplementedError
|
|
27
|
+
|
|
28
|
+
def __len__(self):
|
|
29
|
+
return len(self.records)
|
|
30
|
+
|
|
31
|
+
def __getitem__(self, index: int):
|
|
32
|
+
return self.records[index]
|
|
33
|
+
|
|
34
|
+
@cached_property
|
|
35
|
+
def is_posting_to_message_producer(self) -> bool:
|
|
36
|
+
"""
|
|
37
|
+
Whether to post to message producer for the wits stream and scheduler events or not.
|
|
38
|
+
If there is no lambda app following your app then this should be false,
|
|
39
|
+
because this process is intracting with the API which slows the process down.
|
|
40
|
+
"""
|
|
41
|
+
is_posting: bool = constants.get("global.post-to-message-producer", False)
|
|
42
|
+
return is_posting
|
|
43
|
+
|
|
44
|
+
@abc.abstractmethod
|
|
45
|
+
def build_message_producer_payload(self) -> dict:
|
|
46
|
+
raise NotImplementedError
|
|
47
|
+
|
|
48
|
+
def post_to_message_producer(self, api):
|
|
49
|
+
if not self.is_posting_to_message_producer or not self.app_connection_id:
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
payload = self.build_message_producer_payload()
|
|
53
|
+
api.post(path="/v1/message_producer", data=json.dumps(payload, cls=JsonEncoder, ignore_nan=True))
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
from typing import Dict, Union
|
|
3
|
+
|
|
4
|
+
from worker.app import App
|
|
5
|
+
from worker.data.api import API
|
|
6
|
+
from worker.data.enums import EventType
|
|
7
|
+
from worker.data.operations import get_cleaned_event_and_type
|
|
8
|
+
from worker.event import Event
|
|
9
|
+
from worker.event.scheduled import ScheduledEvent, SingleScheduledEvent
|
|
10
|
+
from worker.event.stream import StreamEvent
|
|
11
|
+
from worker.exceptions import EventFormatError
|
|
12
|
+
from worker.partial_rerun_merge.merge import PartialRerunMerge
|
|
13
|
+
|
|
14
|
+
api = API()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EventHandler:
|
|
18
|
+
def __init__(self, app: App, merger: PartialRerunMerge):
|
|
19
|
+
self.app: App = app
|
|
20
|
+
self.event_type: EventType = None
|
|
21
|
+
self.event_by_asset_id: Dict[int, Event] = None
|
|
22
|
+
self.merger: PartialRerunMerge = merger
|
|
23
|
+
|
|
24
|
+
def process(self, event: Union[str, dict]):
|
|
25
|
+
"""
|
|
26
|
+
The whole process that is performed on an event including the
|
|
27
|
+
loading, handling state, running, and completing the event.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
event (Union[str, dict]): lambda handler event
|
|
31
|
+
"""
|
|
32
|
+
self._load(event)
|
|
33
|
+
self._run()
|
|
34
|
+
|
|
35
|
+
def _load(self, event: Union[str, dict]):
|
|
36
|
+
"""
|
|
37
|
+
Cleaning the event and group events based on their asset ids
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
event (Union[str, dict]): lambda handler event
|
|
41
|
+
"""
|
|
42
|
+
event, event_type = get_cleaned_event_and_type(event)
|
|
43
|
+
self.event_type = event_type
|
|
44
|
+
self.event_by_asset_id = self.format_event(self.event_type, event)
|
|
45
|
+
|
|
46
|
+
def _run(self):
|
|
47
|
+
"""Full run of the events based on their asset id"""
|
|
48
|
+
for _asset_id, event in self.event_by_asset_id.items():
|
|
49
|
+
if self.event_type == EventType.PARTIAL_RERUN:
|
|
50
|
+
self.merger.perform_merge(event.get("data", {}))
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
self.app.load(self.event_type, event)
|
|
54
|
+
self.app.run_modules()
|
|
55
|
+
self.app.save_state()
|
|
56
|
+
event.complete_event(api)
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def format_event(event_type: EventType, event: Union[list, dict]) -> dict:
|
|
60
|
+
"""
|
|
61
|
+
validate the wits_stream event, flatten and organize the data into a desired format
|
|
62
|
+
:param event_type: type of event
|
|
63
|
+
:param event: the wits or scheduler json event
|
|
64
|
+
:return: a dict of records that are grouped by the asset_ids
|
|
65
|
+
"""
|
|
66
|
+
if event_type == EventType.PARTIAL_RERUN:
|
|
67
|
+
if not isinstance(event, dict):
|
|
68
|
+
raise EventFormatError("Invalid event!")
|
|
69
|
+
|
|
70
|
+
return {event["data"]["asset_id"]: event.copy()}
|
|
71
|
+
|
|
72
|
+
elif event_type == EventType.SCHEDULER:
|
|
73
|
+
if not isinstance(event[0], list):
|
|
74
|
+
raise EventFormatError("Invalid event!")
|
|
75
|
+
|
|
76
|
+
# Scheduler events type is 'list of lists'; flattening into a single list
|
|
77
|
+
events = [SingleScheduledEvent(item) for sublist in event for item in sublist]
|
|
78
|
+
merging_function = ScheduledEvent
|
|
79
|
+
|
|
80
|
+
else: # 'wits_stream'
|
|
81
|
+
events = [StreamEvent(each) for each in event]
|
|
82
|
+
merging_function = StreamEvent.merge
|
|
83
|
+
|
|
84
|
+
# sorting is required otherwise we only capture the last group of each asset_id
|
|
85
|
+
events.sort(key=lambda single_event: single_event.asset_id)
|
|
86
|
+
groups = itertools.groupby(events, key=lambda single_event: single_event.asset_id)
|
|
87
|
+
|
|
88
|
+
events_by_asset_id = {group: merging_function(list(dataset)) for group, dataset in groups}
|
|
89
|
+
|
|
90
|
+
return events_by_asset_id
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from typing import List, Union
|
|
2
|
+
|
|
3
|
+
from worker.event import Event, EventType
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SingleScheduledEvent:
|
|
7
|
+
def __init__(self, single_stream_event: dict):
|
|
8
|
+
self.asset_id = single_stream_event.get("asset_id") or 0
|
|
9
|
+
|
|
10
|
+
self.start_time = single_stream_event.get("schedule_start", 0) / 1000
|
|
11
|
+
self.end_time = single_stream_event.get("schedule_end", 0) / 1000
|
|
12
|
+
self.schedule_id = single_stream_event.get("schedule")
|
|
13
|
+
self.app_connection_id = single_stream_event.get("app_connection")
|
|
14
|
+
|
|
15
|
+
def complete_event(self, api) -> None:
|
|
16
|
+
"""Sets schedule as completed."""
|
|
17
|
+
if not self.schedule_id:
|
|
18
|
+
return
|
|
19
|
+
|
|
20
|
+
api.post(path=f"/scheduler/{self.schedule_id}/completed")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ScheduledEvent(Event):
|
|
24
|
+
"""A scheduled event class that holds the events of a single asset_id."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, new_records: Union[SingleScheduledEvent, List[SingleScheduledEvent]]):
|
|
27
|
+
super().__init__(EventType.SCHEDULER)
|
|
28
|
+
|
|
29
|
+
if new_records:
|
|
30
|
+
self.add_records(new_records)
|
|
31
|
+
|
|
32
|
+
if self.records:
|
|
33
|
+
self.app_connection_id = self.records[-1].app_connection_id
|
|
34
|
+
|
|
35
|
+
def add_records(self, new_records: Union[SingleScheduledEvent, List[SingleScheduledEvent]]):
|
|
36
|
+
if isinstance(new_records, SingleScheduledEvent):
|
|
37
|
+
new_records = [new_records]
|
|
38
|
+
|
|
39
|
+
if not self.records:
|
|
40
|
+
self.asset_id = new_records[0].asset_id
|
|
41
|
+
|
|
42
|
+
super().add_records(new_records)
|
|
43
|
+
|
|
44
|
+
def complete_event(self, api) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Two steps can happen for scheduler events.
|
|
47
|
+
1. Sets schedule as completed; this happens for all the events.
|
|
48
|
+
2. If another lambda function is following your lambda then it should
|
|
49
|
+
send a message to message producer as well.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
api (API):
|
|
53
|
+
"""
|
|
54
|
+
# Step 1: mark all the events as completed.
|
|
55
|
+
for each in self.records:
|
|
56
|
+
each.complete_event(api)
|
|
57
|
+
|
|
58
|
+
# Step 2: in case another lambda function following your lambda
|
|
59
|
+
self.post_to_message_producer(api)
|
|
60
|
+
|
|
61
|
+
def build_message_producer_payload(self) -> dict:
|
|
62
|
+
data = [{"timestamp": self.records[-1].start_time}]
|
|
63
|
+
|
|
64
|
+
return {"app_connection_id": self.app_connection_id, "asset_id": self.asset_id, "data": data}
|
worker/event/stream.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from typing import List, Union
|
|
2
|
+
|
|
3
|
+
from worker import constants
|
|
4
|
+
from worker.data.operations import get_data_by_path
|
|
5
|
+
from worker.event import Event, EventType
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StreamEvent(Event):
|
|
9
|
+
"""A stream event class that holds the events of a single asset_id."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, event: dict, is_posting_to_message_producer: bool = False):
|
|
12
|
+
super().__init__(EventType.STREAM)
|
|
13
|
+
|
|
14
|
+
self.metadata = event.get("metadata") or {}
|
|
15
|
+
self.records = event.get("records") or []
|
|
16
|
+
|
|
17
|
+
self.asset_id = 0
|
|
18
|
+
if self.records:
|
|
19
|
+
self.asset_id = self.records[0].get("asset_id")
|
|
20
|
+
|
|
21
|
+
app_key = constants.get("global.stream_app_key")
|
|
22
|
+
self.app_connection_id = get_data_by_path(
|
|
23
|
+
self.metadata, f"apps.{app_key}.app_connection_id", func=int, default=None
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def merge(cls, events: List["StreamEvent"]) -> Union["StreamEvent", None]:
|
|
28
|
+
"""Merge stream events of the same asset id into one"""
|
|
29
|
+
if not events:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
merged_event = events[0]
|
|
33
|
+
for each in events[1:]:
|
|
34
|
+
merged_event.add(each)
|
|
35
|
+
|
|
36
|
+
return merged_event
|
|
37
|
+
|
|
38
|
+
def add(self, other: "StreamEvent"):
|
|
39
|
+
if self.asset_id != other.asset_id:
|
|
40
|
+
raise Exception(f"Events of different assets can not be merged; {self.asset_id} and {other.asset_id}!")
|
|
41
|
+
|
|
42
|
+
self.add_records(other.records)
|
|
43
|
+
|
|
44
|
+
def build_message_producer_payload(self) -> dict:
|
|
45
|
+
return {"app_connection_id": self.app_connection_id, "asset_id": self.asset_id, "data": self.records}
|
|
46
|
+
|
|
47
|
+
def complete_event(self, api) -> None:
|
|
48
|
+
self.post_to_message_producer(api)
|