corva-worker-python 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. corva_worker_python-2.0.0.dist-info/METADATA +30 -0
  2. corva_worker_python-2.0.0.dist-info/RECORD +63 -0
  3. corva_worker_python-2.0.0.dist-info/WHEEL +5 -0
  4. corva_worker_python-2.0.0.dist-info/top_level.txt +1 -0
  5. worker/__init__.py +5 -0
  6. worker/app/__init__.py +291 -0
  7. worker/app/modules/__init__.py +265 -0
  8. worker/app/modules/activity_module.py +141 -0
  9. worker/app/modules/connection_module.py +21 -0
  10. worker/app/modules/depth_activity_module.py +21 -0
  11. worker/app/modules/scheduler.py +44 -0
  12. worker/app/modules/time_activity_module.py +21 -0
  13. worker/app/modules/trigger.py +43 -0
  14. worker/constants.py +51 -0
  15. worker/data/__init__.py +0 -0
  16. worker/data/activity/__init__.py +132 -0
  17. worker/data/activity/activity_grouping.py +242 -0
  18. worker/data/alert.py +89 -0
  19. worker/data/api.py +155 -0
  20. worker/data/enums.py +141 -0
  21. worker/data/json_encoder.py +18 -0
  22. worker/data/math.py +104 -0
  23. worker/data/operations.py +477 -0
  24. worker/data/serialization.py +110 -0
  25. worker/data/task_handler.py +82 -0
  26. worker/data/two_way_dict.py +17 -0
  27. worker/data/unit_conversions.py +5 -0
  28. worker/data/wits.py +323 -0
  29. worker/event/__init__.py +53 -0
  30. worker/event/event_handler.py +90 -0
  31. worker/event/scheduled.py +64 -0
  32. worker/event/stream.py +48 -0
  33. worker/exceptions.py +26 -0
  34. worker/mixins/__init__.py +0 -0
  35. worker/mixins/logging.py +119 -0
  36. worker/mixins/rollbar.py +87 -0
  37. worker/partial_rerun_merge/__init__.py +0 -0
  38. worker/partial_rerun_merge/merge.py +500 -0
  39. worker/partial_rerun_merge/models.py +91 -0
  40. worker/partial_rerun_merge/progress.py +241 -0
  41. worker/state/__init__.py +96 -0
  42. worker/state/mixins.py +111 -0
  43. worker/state/state.py +46 -0
  44. worker/test/__init__.py +3 -0
  45. worker/test/lambda_function_test_run.py +196 -0
  46. worker/test/local_testing/__init__.py +0 -0
  47. worker/test/local_testing/to_local_transfer.py +360 -0
  48. worker/test/utils.py +51 -0
  49. worker/wellbore/__init__.py +0 -0
  50. worker/wellbore/factory.py +496 -0
  51. worker/wellbore/measured_depth_finder.py +12 -0
  52. worker/wellbore/model/__init__.py +0 -0
  53. worker/wellbore/model/ann.py +103 -0
  54. worker/wellbore/model/annulus.py +113 -0
  55. worker/wellbore/model/drillstring.py +196 -0
  56. worker/wellbore/model/drillstring_components.py +439 -0
  57. worker/wellbore/model/element.py +102 -0
  58. worker/wellbore/model/enums.py +92 -0
  59. worker/wellbore/model/hole.py +297 -0
  60. worker/wellbore/model/hole_section.py +51 -0
  61. worker/wellbore/model/riser.py +22 -0
  62. worker/wellbore/sections_mixin.py +64 -0
  63. worker/wellbore/wellbore.py +289 -0
@@ -0,0 +1,82 @@
1
+ import os
2
+
3
+ import simplejson as json
4
+
5
+ from worker import API
6
+ from worker.data.enums import LambdaStates
7
+ from worker.data.json_encoder import JsonEncoder
8
+ from worker.mixins.logging import LoggingMixin
9
+ from worker.mixins.rollbar import RollbarMixin
10
+
11
+
12
+ class TaskHandler(LoggingMixin, RollbarMixin):
13
+ def __init__(self, *args, **kwargs) -> None:
14
+ self._api = kwargs.pop("api", API())
15
+ super().__init__(*args, **kwargs)
16
+ self.event = {}
17
+ self.task_id = ""
18
+ self.task = {}
19
+
20
+ def process(self, *args, **kwargs):
21
+ """
22
+ Provides a placeholder for actual logic code for each app.
23
+ e.g. Instantiate the app and run it
24
+ This method must be overriden by each app
25
+ :return:
26
+ """
27
+ raise NotImplementedError("process method must be defined before instantiating this class")
28
+
29
+ def run_process(self, event: dict, *args, **kwargs):
30
+ """
31
+ Runs the task handler workflow.
32
+ This method would be called after instantiation of the TaskHandler class
33
+ :param event:
34
+ :return:
35
+ """
36
+ self.event = event
37
+ self.task_id = self.event.get("task_id")
38
+ self.get_task(self.task_id)
39
+
40
+ # AWS_EXECUTION_ENV is a reserved environment variable it will hold the
41
+ # runtime environment eg python3.6, java11 etc.
42
+ # We do not have to set it on our lambda functions, it will be set by AWS
43
+ # We can use this to check if we are running on hosted or local
44
+ is_hosted = os.environ.get("AWS_EXECUTION_ENV", False)
45
+
46
+ test_mode = self.task.get("properties", {}).get("test_mode", False)
47
+
48
+ if test_mode and is_hosted:
49
+ asset_id = self.task.get("asset_id", 0)
50
+ self.warn(asset_id, f"Not processing on AWS due to test mode. task_id: {self.task_id} maybe running.")
51
+ return
52
+
53
+ try:
54
+ data = self.process(self.event, *args, **kwargs)
55
+ request_body = {"task": {"payload": {"data": data, "message": LambdaStates.SUCCEEDED.value}}}
56
+ self.update_task(self.task_id, "success", request_body)
57
+ return data
58
+
59
+ except Exception as exception:
60
+ request_body = {"task": {"fail_reason": f"Message: {LambdaStates.FAILED.value}, Exception: {exception}"}}
61
+ self.update_task(self.task_id, "fail", request_body)
62
+ raise exception
63
+
64
+ def get_task(self, task_id: str) -> None:
65
+ """
66
+ Gets the task from API endpoint for the given task_id
67
+ :param task_id:
68
+ :return:
69
+ """
70
+ path = "/v2/tasks/{0}".format(task_id)
71
+ self.task = self._api.get(path).data
72
+
73
+ def update_task(self, task_id: str, status: str, request_body: dict) -> None:
74
+ """
75
+ Updates the task status on the API endpoint in case of success or failure
76
+ :param task_id:
77
+ :param status: [success, fail]
78
+ :param request_body:
79
+ :return:
80
+ """
81
+ path = "/v2/tasks/{0}/{1}".format(task_id, status)
82
+ self.task = self._api.put(path, data=json.dumps(request_body, cls=JsonEncoder, ignore_nan=True)).data
@@ -0,0 +1,17 @@
1
+ class TwoWayDict(dict):
2
+ def __setitem__(self, key, value):
3
+ # Remove any previous connections with these values
4
+ if key in self:
5
+ del self[key]
6
+ if value in self:
7
+ del self[value]
8
+ dict.__setitem__(self, key, value)
9
+ dict.__setitem__(self, value, key)
10
+
11
+ def __delitem__(self, key):
12
+ dict.__delitem__(self, self[key])
13
+ dict.__delitem__(self, key)
14
+
15
+ def __len__(self):
16
+ """Returns the number of connections"""
17
+ return dict.__len__(self) // 2
@@ -0,0 +1,5 @@
1
+ # unit conversions
2
+ KG_M3_to_PPG = 0.00834540445319611
3
+ GPM_SEC_to_FT3 = (1 / 60) * 0.133681
4
+ US_LIQUID_GAL_to_FOOT3 = 0.133681
5
+ US_LIQUID_GAL_to_INCH3 = 231
worker/data/wits.py ADDED
@@ -0,0 +1,323 @@
1
+ from typing import Dict, Union
2
+
3
+ import numpy as np
4
+
5
+ from worker.data import operations
6
+ from worker.data.activity import Activity
7
+ from worker.data.enums import ChannelStatus, DataStatus
8
+
9
+
10
+ class Channel(float):
11
+ def __new__(cls, value=None, status=None):
12
+ # If only a valid value is passed, data status is automatically set to VALID
13
+ # If None is passed as value, then data status is automatically set to MISSING
14
+ # If both value and data status are passed, then it uses the passed data status
15
+ if value is None:
16
+ return None
17
+
18
+ if not isinstance(value, float):
19
+ try:
20
+ value = float(value)
21
+ except ValueError or TypeError:
22
+ return None
23
+
24
+ if value is not None and not status:
25
+ status = DataStatus.VALID
26
+
27
+ ch = float.__new__(cls, value)
28
+ ch._status = status
29
+ return ch
30
+
31
+ @property
32
+ def status(self) -> DataStatus:
33
+ """
34
+ - Status can be valid of missing
35
+ - Data status is only available for non None wits attributes
36
+ - Missing represents a value that has been forward filled or interpolated
37
+
38
+ :return: DataStatus
39
+ """
40
+ return self._status
41
+
42
+
43
+ class WITS:
44
+ """
45
+ - Create a wits object by WITS.set_wits(dataset: Union[list, dict])
46
+ - Access any wits data attribute by wits.hole_depth
47
+ - Access any data status of a wits data attribute by wits.hole_depth.status
48
+ - Data status is only available for non None wits attributes
49
+ """
50
+
51
+ data = {
52
+ "entry_at": int,
53
+ "hole_depth": Channel,
54
+ "bit_depth": Channel,
55
+ "block_height": Channel,
56
+ "hook_load": Channel,
57
+ "weight_on_bit": Channel,
58
+ "rop": Channel,
59
+ "rotary_rpm": Channel,
60
+ "rotary_torque": Channel,
61
+ "mud_flow_in": Channel,
62
+ "mud_flow_out_percent": Channel,
63
+ "standpipe_pressure": Channel,
64
+ "diff_press": Channel,
65
+ "pump_spm_1": Channel,
66
+ "pump_spm_2": Channel,
67
+ "pump_spm_3": Channel,
68
+ "pump_spm_4": Channel,
69
+ "pump_spm_total": Channel,
70
+ "trip_tank_volume_1": Channel,
71
+ "trip_tank_volume_2": Channel,
72
+ "active_pit_volume": Channel,
73
+ "lag_depth": Channel,
74
+ "state": Activity,
75
+ "boost_pump_flow_in": Channel,
76
+ "annular_back_pressure": Channel,
77
+ }
78
+ metadata = {"drillstring": str}
79
+ spm_channels = ["pump_spm_1", "pump_spm_2", "pump_spm_3", "pump_spm_4"]
80
+ status = {}
81
+
82
+ def __init__(self, wits_json: dict = None, **kwargs):
83
+ if not wits_json:
84
+ wits_json = {}
85
+
86
+ # Serialize functionality in Worker unpacks all the attributes. If that's how the class is called, repack.
87
+ if kwargs:
88
+ wits_json.update(dict(kwargs))
89
+ wits_json.pop("_wits_", None)
90
+
91
+ self.wits_json = wits_json
92
+
93
+ data = wits_json.get("data", {})
94
+ data_raw = wits_json.get("data_raw", {})
95
+
96
+ metadata = wits_json.get("metadata", {})
97
+ status = wits_json.get("status", {})
98
+
99
+ for key in self.data.keys():
100
+ # If type is not Channel or if the value is None, just setattr
101
+ if self.data.get(key) != Channel or data.get(key, None) is None:
102
+ setattr(self, key, data.get(key, None))
103
+ continue
104
+
105
+ key_data_status = DataStatus.VALID.value
106
+ # If key is in data_raw, there are 2 possibilities - Missing (Value is None or -999.25) or Overridden
107
+ if key in data_raw:
108
+ key_data_status = DataStatus.OVERRIDDEN.value
109
+ # If key in data_raw and value is null (None or -999.25), then we just setattr None
110
+ if operations.is_null(data_raw.get(key, None)):
111
+ setattr(self, key, data.get(key, None))
112
+ continue
113
+
114
+ # If type is Channel and (key not in data_raw or (key in data_raw and value is not null))
115
+ setattr(self, key, Channel(data.get(key), DataStatus(data.get(f"{key}_data_status", key_data_status))))
116
+
117
+ for key in self.metadata.keys():
118
+ setattr(self, key, metadata.get(key, None))
119
+ for key in self.status.keys():
120
+ setattr(self, key, status.get(key, None))
121
+
122
+ setattr(self, "timestamp", wits_json.get("data").get("entry_at"))
123
+ setattr(self, "timestamp_read", wits_json.get("timestamp_read"))
124
+
125
+ if getattr(self, "pump_spm_total") is None:
126
+ self.set_pump_spm_total()
127
+
128
+ setattr(self, "total_trip_tank_volume", self.get_total_trip_tank_volume())
129
+
130
+ def __getattr__(self, item):
131
+ return None
132
+
133
+ def __setattr__(self, key, value):
134
+ """
135
+ When trying to set an attribute it automatically tries to typecast the value.
136
+ :param key: key
137
+ :param value: value
138
+ :return:
139
+ """
140
+ if key not in {**self.data, **self.status}.keys():
141
+ self.__dict__[key] = value
142
+ return
143
+
144
+ data_type = {**self.data, **self.status}.get(key)
145
+ try:
146
+ if not isinstance(value, Channel):
147
+ value = data_type(value)
148
+ self.__dict__[key] = value
149
+ except TypeError:
150
+ self.__dict__[key] = None
151
+ except ValueError:
152
+ self.__dict__[key] = None
153
+
154
+ def __iter__(self):
155
+ yield self
156
+
157
+ @classmethod
158
+ def set_wits(cls, dataset: Union[list, dict]):
159
+ """
160
+ Class method to create WITS objects using a dict or a list of dicts
161
+
162
+ :param dataset: wits record or a list of wits records
163
+ :return: WITS object or a list of WITS objects
164
+ """
165
+ if isinstance(dataset, dict):
166
+ return cls(dataset)
167
+
168
+ if isinstance(dataset, list):
169
+ return [cls(record) for record in dataset]
170
+
171
+ def get_as_dict(self) -> [None, dict]:
172
+ """
173
+ Get the wits record as a dictionary
174
+
175
+ :return: wits record as a dict
176
+ """
177
+ if not self.wits_json:
178
+ return None
179
+
180
+ self.wits_json.pop("data_raw", None)
181
+ self.wits_json["data"] = {key: getattr(self, key) for key in self.data}
182
+ self.wits_json["data"].update(
183
+ {
184
+ f"{key}_data_status": getattr(self, key).status.value
185
+ for key in self.data
186
+ if self.data.get(key) == Channel and getattr(self, key) is not None
187
+ }
188
+ )
189
+ # convert state from Activity type to str
190
+ self.wits_json["data"]["state"] = self.wits_json["data"]["state"].value
191
+ self.wits_json["status"] = {key: getattr(self, key) for key in self.status}
192
+
193
+ return self.wits_json
194
+
195
+ def get_wits_data(self, columns=None) -> dict:
196
+ """
197
+ Get the data part of the wits record as a dictionary
198
+
199
+ :param columns: Specify if you only want a select list of columns
200
+ :return: data part of wits record as a dict
201
+ """
202
+ if not columns:
203
+ data = {key: getattr(self, key) for key in self.data}
204
+ return data
205
+
206
+ data = {key: getattr(self, key) for key in columns}
207
+ return data
208
+
209
+ def spm_status(self, low_spm_threshold: float) -> [ChannelStatus, None]:
210
+ """
211
+ Get spm status. Compares total of all SPM channels against SPM threshold.
212
+
213
+ :param low_spm_threshold: threshold
214
+ :return: None if channels are unavailable, OFF if less than threshold and ON if greater
215
+ """
216
+
217
+ # If all SPM channels are empty, return None
218
+ if not any(operations.is_number(getattr(self, spm_channel)) for spm_channel in self.spm_channels):
219
+ return ChannelStatus.MISSING
220
+
221
+ if getattr(self, "pump_spm_total") > low_spm_threshold:
222
+ return ChannelStatus.ON
223
+
224
+ return ChannelStatus.OFF
225
+
226
+ def mud_flow_in_status(self, low_mud_flow_in_threshold: float) -> [ChannelStatus, None]:
227
+ """
228
+ Get flow rate status. Compares mud flow in channel against flow rate threshold.
229
+
230
+ :param low_mud_flow_in_threshold: threshold
231
+ :return: None if channel is unavailable, OFF if less than threshold and ON if greater
232
+ """
233
+ if not operations.is_number(self.mud_flow_in):
234
+ return ChannelStatus.MISSING
235
+
236
+ if self.mud_flow_in > low_mud_flow_in_threshold:
237
+ return ChannelStatus.ON
238
+
239
+ return ChannelStatus.OFF
240
+
241
+ def check_channel_availability(self, required_wits_channels: list) -> bool:
242
+ """
243
+ This function checks if all the required channels are available in the wits record
244
+
245
+ :param required_wits_channels: List of required channels
246
+ :return: Boolean value. True if all the required channels are available, else False
247
+ """
248
+ return all(
249
+ operations.is_in_and_not_none(self.get_wits_data(required_wits_channels), channel)
250
+ for channel in required_wits_channels
251
+ )
252
+
253
+ def set_pump_spm_total(self):
254
+ """This function sets the pump_spm_total attribute value based on individual spm values"""
255
+
256
+ pump_spm_total = sum(
257
+ getattr(self, spm_channel)
258
+ for spm_channel in self.spm_channels
259
+ if (getattr(self, spm_channel) and getattr(self, spm_channel) >= 0)
260
+ )
261
+
262
+ setattr(self, "pump_spm_total", pump_spm_total)
263
+
264
+ def get_total_trip_tank_volume(self) -> float:
265
+ """
266
+ This function returns the total trip tank volume if more than one trip tanks are available
267
+
268
+ :return: Total trip tank volume
269
+ """
270
+ ttk1 = self.trip_tank_volume_1 or 0
271
+ ttk2 = self.trip_tank_volume_2 or 0
272
+
273
+ return ttk1 + ttk2
274
+
275
+ def validate_range(self, channel_range_map: Dict[str, Dict]):
276
+ """
277
+ This method can be used to validate the range of the given channels and apply the proper operation on the data
278
+ :param channel_range_map: a dictionary of channel to operation and range ([min, max]) mapping
279
+ example:
280
+ {
281
+ "mud_flow_out_percent": {"range": [0, 100], "operation": "clip"}
282
+ }
283
+ options for 'operation': ['clip', None]
284
+ :return: None,
285
+ """
286
+ for channel, properties in channel_range_map.items():
287
+ orig_value = getattr(self, channel, None)
288
+ if orig_value is None:
289
+ continue
290
+
291
+ valid_range = properties.get("range")
292
+ operation = properties.get("operation", None)
293
+
294
+ new_value = None
295
+
296
+ if operation == "clip":
297
+ new_value = np.clip(orig_value, *valid_range)
298
+ elif operation is None:
299
+ if valid_range[0] <= orig_value <= valid_range[-1]:
300
+ new_value = orig_value
301
+ else:
302
+ raise ValueError("Invalid operation provided!")
303
+
304
+ setattr(self, channel, new_value)
305
+
306
+
307
+ def serialize(obj: Union[list, WITS]) -> Union[list, dict, None]:
308
+ """
309
+ This function is used to serialize a WITS object or a list of WITS objects.
310
+ Use WITS.set_wits(dataset: Union[list, dict]) to deserialize.
311
+
312
+ :param obj: WITS object(s) to serialize.
313
+ :return: return a serialized object
314
+ """
315
+ if isinstance(obj, list):
316
+ s_obj = [each.get_as_dict() for each in obj]
317
+ return s_obj
318
+
319
+ if isinstance(obj, WITS):
320
+ s_obj = obj.get_as_dict()
321
+ return s_obj
322
+
323
+ return None
@@ -0,0 +1,53 @@
1
+ import abc
2
+ from functools import cached_property
3
+
4
+ import simplejson as json
5
+
6
+ from worker import constants
7
+ from worker.data.enums import EventType
8
+ from worker.data.json_encoder import JsonEncoder
9
+
10
+
11
+ class Event(abc.ABC):
12
+ """An event class that holds the events of a single asset_id."""
13
+
14
+ def __init__(self, event_type: EventType):
15
+ self.event_type: EventType = event_type
16
+
17
+ self.asset_id: int = None
18
+ self.records = []
19
+ self.app_connection_id = None
20
+
21
+ def add_records(self, new_records: list) -> None:
22
+ self.records.extend(new_records)
23
+
24
+ @abc.abstractmethod
25
+ def complete_event(self, api) -> None:
26
+ raise NotImplementedError
27
+
28
+ def __len__(self):
29
+ return len(self.records)
30
+
31
+ def __getitem__(self, index: int):
32
+ return self.records[index]
33
+
34
+ @cached_property
35
+ def is_posting_to_message_producer(self) -> bool:
36
+ """
37
+ Whether to post to message producer for the wits stream and scheduler events or not.
38
+ If there is no lambda app following your app then this should be false,
39
+ because this process is intracting with the API which slows the process down.
40
+ """
41
+ is_posting: bool = constants.get("global.post-to-message-producer", False)
42
+ return is_posting
43
+
44
+ @abc.abstractmethod
45
+ def build_message_producer_payload(self) -> dict:
46
+ raise NotImplementedError
47
+
48
+ def post_to_message_producer(self, api):
49
+ if not self.is_posting_to_message_producer or not self.app_connection_id:
50
+ return
51
+
52
+ payload = self.build_message_producer_payload()
53
+ api.post(path="/v1/message_producer", data=json.dumps(payload, cls=JsonEncoder, ignore_nan=True))
@@ -0,0 +1,90 @@
1
+ import itertools
2
+ from typing import Dict, Union
3
+
4
+ from worker.app import App
5
+ from worker.data.api import API
6
+ from worker.data.enums import EventType
7
+ from worker.data.operations import get_cleaned_event_and_type
8
+ from worker.event import Event
9
+ from worker.event.scheduled import ScheduledEvent, SingleScheduledEvent
10
+ from worker.event.stream import StreamEvent
11
+ from worker.exceptions import EventFormatError
12
+ from worker.partial_rerun_merge.merge import PartialRerunMerge
13
+
14
+ api = API()
15
+
16
+
17
+ class EventHandler:
18
+ def __init__(self, app: App, merger: PartialRerunMerge):
19
+ self.app: App = app
20
+ self.event_type: EventType = None
21
+ self.event_by_asset_id: Dict[int, Event] = None
22
+ self.merger: PartialRerunMerge = merger
23
+
24
+ def process(self, event: Union[str, dict]):
25
+ """
26
+ The whole process that is performed on an event including the
27
+ loading, handling state, running, and completing the event.
28
+
29
+ Args:
30
+ event (Union[str, dict]): lambda handler event
31
+ """
32
+ self._load(event)
33
+ self._run()
34
+
35
+ def _load(self, event: Union[str, dict]):
36
+ """
37
+ Cleaning the event and group events based on their asset ids
38
+
39
+ Args:
40
+ event (Union[str, dict]): lambda handler event
41
+ """
42
+ event, event_type = get_cleaned_event_and_type(event)
43
+ self.event_type = event_type
44
+ self.event_by_asset_id = self.format_event(self.event_type, event)
45
+
46
+ def _run(self):
47
+ """Full run of the events based on their asset id"""
48
+ for _asset_id, event in self.event_by_asset_id.items():
49
+ if self.event_type == EventType.PARTIAL_RERUN:
50
+ self.merger.perform_merge(event.get("data", {}))
51
+ continue
52
+
53
+ self.app.load(self.event_type, event)
54
+ self.app.run_modules()
55
+ self.app.save_state()
56
+ event.complete_event(api)
57
+
58
+ @staticmethod
59
+ def format_event(event_type: EventType, event: Union[list, dict]) -> dict:
60
+ """
61
+ validate the wits_stream event, flatten and organize the data into a desired format
62
+ :param event_type: type of event
63
+ :param event: the wits or scheduler json event
64
+ :return: a dict of records that are grouped by the asset_ids
65
+ """
66
+ if event_type == EventType.PARTIAL_RERUN:
67
+ if not isinstance(event, dict):
68
+ raise EventFormatError("Invalid event!")
69
+
70
+ return {event["data"]["asset_id"]: event.copy()}
71
+
72
+ elif event_type == EventType.SCHEDULER:
73
+ if not isinstance(event[0], list):
74
+ raise EventFormatError("Invalid event!")
75
+
76
+ # Scheduler events type is 'list of lists'; flattening into a single list
77
+ events = [SingleScheduledEvent(item) for sublist in event for item in sublist]
78
+ merging_function = ScheduledEvent
79
+
80
+ else: # 'wits_stream'
81
+ events = [StreamEvent(each) for each in event]
82
+ merging_function = StreamEvent.merge
83
+
84
+ # sorting is required otherwise we only capture the last group of each asset_id
85
+ events.sort(key=lambda single_event: single_event.asset_id)
86
+ groups = itertools.groupby(events, key=lambda single_event: single_event.asset_id)
87
+
88
+ events_by_asset_id = {group: merging_function(list(dataset)) for group, dataset in groups}
89
+
90
+ return events_by_asset_id
@@ -0,0 +1,64 @@
1
+ from typing import List, Union
2
+
3
+ from worker.event import Event, EventType
4
+
5
+
6
+ class SingleScheduledEvent:
7
+ def __init__(self, single_stream_event: dict):
8
+ self.asset_id = single_stream_event.get("asset_id") or 0
9
+
10
+ self.start_time = single_stream_event.get("schedule_start", 0) / 1000
11
+ self.end_time = single_stream_event.get("schedule_end", 0) / 1000
12
+ self.schedule_id = single_stream_event.get("schedule")
13
+ self.app_connection_id = single_stream_event.get("app_connection")
14
+
15
+ def complete_event(self, api) -> None:
16
+ """Sets schedule as completed."""
17
+ if not self.schedule_id:
18
+ return
19
+
20
+ api.post(path=f"/scheduler/{self.schedule_id}/completed")
21
+
22
+
23
+ class ScheduledEvent(Event):
24
+ """A scheduled event class that holds the events of a single asset_id."""
25
+
26
+ def __init__(self, new_records: Union[SingleScheduledEvent, List[SingleScheduledEvent]]):
27
+ super().__init__(EventType.SCHEDULER)
28
+
29
+ if new_records:
30
+ self.add_records(new_records)
31
+
32
+ if self.records:
33
+ self.app_connection_id = self.records[-1].app_connection_id
34
+
35
+ def add_records(self, new_records: Union[SingleScheduledEvent, List[SingleScheduledEvent]]):
36
+ if isinstance(new_records, SingleScheduledEvent):
37
+ new_records = [new_records]
38
+
39
+ if not self.records:
40
+ self.asset_id = new_records[0].asset_id
41
+
42
+ super().add_records(new_records)
43
+
44
+ def complete_event(self, api) -> None:
45
+ """
46
+ Two steps can happen for scheduler events.
47
+ 1. Sets schedule as completed; this happens for all the events.
48
+ 2. If another lambda function is following your lambda then it should
49
+ send a message to message producer as well.
50
+
51
+ Args:
52
+ api (API):
53
+ """
54
+ # Step 1: mark all the events as completed.
55
+ for each in self.records:
56
+ each.complete_event(api)
57
+
58
+ # Step 2: in case another lambda function following your lambda
59
+ self.post_to_message_producer(api)
60
+
61
+ def build_message_producer_payload(self) -> dict:
62
+ data = [{"timestamp": self.records[-1].start_time}]
63
+
64
+ return {"app_connection_id": self.app_connection_id, "asset_id": self.asset_id, "data": data}
worker/event/stream.py ADDED
@@ -0,0 +1,48 @@
1
+ from typing import List, Union
2
+
3
+ from worker import constants
4
+ from worker.data.operations import get_data_by_path
5
+ from worker.event import Event, EventType
6
+
7
+
8
+ class StreamEvent(Event):
9
+ """A stream event class that holds the events of a single asset_id."""
10
+
11
+ def __init__(self, event: dict, is_posting_to_message_producer: bool = False):
12
+ super().__init__(EventType.STREAM)
13
+
14
+ self.metadata = event.get("metadata") or {}
15
+ self.records = event.get("records") or []
16
+
17
+ self.asset_id = 0
18
+ if self.records:
19
+ self.asset_id = self.records[0].get("asset_id")
20
+
21
+ app_key = constants.get("global.stream_app_key")
22
+ self.app_connection_id = get_data_by_path(
23
+ self.metadata, f"apps.{app_key}.app_connection_id", func=int, default=None
24
+ )
25
+
26
+ @classmethod
27
+ def merge(cls, events: List["StreamEvent"]) -> Union["StreamEvent", None]:
28
+ """Merge stream events of the same asset id into one"""
29
+ if not events:
30
+ return None
31
+
32
+ merged_event = events[0]
33
+ for each in events[1:]:
34
+ merged_event.add(each)
35
+
36
+ return merged_event
37
+
38
+ def add(self, other: "StreamEvent"):
39
+ if self.asset_id != other.asset_id:
40
+ raise Exception(f"Events of different assets can not be merged; {self.asset_id} and {other.asset_id}!")
41
+
42
+ self.add_records(other.records)
43
+
44
+ def build_message_producer_payload(self) -> dict:
45
+ return {"app_connection_id": self.app_connection_id, "asset_id": self.asset_id, "data": self.records}
46
+
47
+ def complete_event(self, api) -> None:
48
+ self.post_to_message_producer(api)