corva-worker-python 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. corva_worker_python-2.0.0.dist-info/METADATA +30 -0
  2. corva_worker_python-2.0.0.dist-info/RECORD +63 -0
  3. corva_worker_python-2.0.0.dist-info/WHEEL +5 -0
  4. corva_worker_python-2.0.0.dist-info/top_level.txt +1 -0
  5. worker/__init__.py +5 -0
  6. worker/app/__init__.py +291 -0
  7. worker/app/modules/__init__.py +265 -0
  8. worker/app/modules/activity_module.py +141 -0
  9. worker/app/modules/connection_module.py +21 -0
  10. worker/app/modules/depth_activity_module.py +21 -0
  11. worker/app/modules/scheduler.py +44 -0
  12. worker/app/modules/time_activity_module.py +21 -0
  13. worker/app/modules/trigger.py +43 -0
  14. worker/constants.py +51 -0
  15. worker/data/__init__.py +0 -0
  16. worker/data/activity/__init__.py +132 -0
  17. worker/data/activity/activity_grouping.py +242 -0
  18. worker/data/alert.py +89 -0
  19. worker/data/api.py +155 -0
  20. worker/data/enums.py +141 -0
  21. worker/data/json_encoder.py +18 -0
  22. worker/data/math.py +104 -0
  23. worker/data/operations.py +477 -0
  24. worker/data/serialization.py +110 -0
  25. worker/data/task_handler.py +82 -0
  26. worker/data/two_way_dict.py +17 -0
  27. worker/data/unit_conversions.py +5 -0
  28. worker/data/wits.py +323 -0
  29. worker/event/__init__.py +53 -0
  30. worker/event/event_handler.py +90 -0
  31. worker/event/scheduled.py +64 -0
  32. worker/event/stream.py +48 -0
  33. worker/exceptions.py +26 -0
  34. worker/mixins/__init__.py +0 -0
  35. worker/mixins/logging.py +119 -0
  36. worker/mixins/rollbar.py +87 -0
  37. worker/partial_rerun_merge/__init__.py +0 -0
  38. worker/partial_rerun_merge/merge.py +500 -0
  39. worker/partial_rerun_merge/models.py +91 -0
  40. worker/partial_rerun_merge/progress.py +241 -0
  41. worker/state/__init__.py +96 -0
  42. worker/state/mixins.py +111 -0
  43. worker/state/state.py +46 -0
  44. worker/test/__init__.py +3 -0
  45. worker/test/lambda_function_test_run.py +196 -0
  46. worker/test/local_testing/__init__.py +0 -0
  47. worker/test/local_testing/to_local_transfer.py +360 -0
  48. worker/test/utils.py +51 -0
  49. worker/wellbore/__init__.py +0 -0
  50. worker/wellbore/factory.py +496 -0
  51. worker/wellbore/measured_depth_finder.py +12 -0
  52. worker/wellbore/model/__init__.py +0 -0
  53. worker/wellbore/model/ann.py +103 -0
  54. worker/wellbore/model/annulus.py +113 -0
  55. worker/wellbore/model/drillstring.py +196 -0
  56. worker/wellbore/model/drillstring_components.py +439 -0
  57. worker/wellbore/model/element.py +102 -0
  58. worker/wellbore/model/enums.py +92 -0
  59. worker/wellbore/model/hole.py +297 -0
  60. worker/wellbore/model/hole_section.py +51 -0
  61. worker/wellbore/model/riser.py +22 -0
  62. worker/wellbore/sections_mixin.py +64 -0
  63. worker/wellbore/wellbore.py +289 -0
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: corva-worker-python
3
+ Version: 2.0.0
4
+ Summary: SDK for interacting with Corva
5
+ Home-page: https://github.com/corva-ai/corva-worker-python
6
+ Author: Jordan Ambra <jordan.ambra@corva.ai>, Mohammadreza Kamyab <m.kamyab@corva.ai>
7
+ License: Unlicensed
8
+ Keywords: corva,worker
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Requires-Python: >=3.13
18
+ Requires-Dist: numpy>=2.3.5
19
+ Requires-Dist: redis>=7.1.0
20
+ Requires-Dist: requests>=2.32.5
21
+ Requires-Dist: simplejson>=3.20.2
22
+ Requires-Dist: urllib3>=2.5.0
23
+ Dynamic: author
24
+ Dynamic: classifier
25
+ Dynamic: home-page
26
+ Dynamic: keywords
27
+ Dynamic: license
28
+ Dynamic: requires-dist
29
+ Dynamic: requires-python
30
+ Dynamic: summary
@@ -0,0 +1,63 @@
1
+ worker/__init__.py,sha256=xw-_4rRcZm65YTohJOUOZY7ZXE7WDGYwhwQxo3ng_t0,58
2
+ worker/constants.py,sha256=beZCoZk6Tl32EzHCxP6WNtH4QPsG_Tay6sfZSnddYYI,1480
3
+ worker/exceptions.py,sha256=ABjo2tay2TI40cVFSaKBE4i_ZqIC3IJZCw88qo2iLws,309
4
+ worker/app/__init__.py,sha256=-PEDBW7xCBbbc3fE6pxA5JBKM_5SJaCaVFqvzS5aNxc,11596
5
+ worker/app/modules/__init__.py,sha256=wJV7Jo_DSlkgDQqQRcmzplXGBG1SGfJIL9JHFi5hWgo,8871
6
+ worker/app/modules/activity_module.py,sha256=ot1zHSBhOP76gu9RV4OWnQY1JNAq-r3nkRLtgpwdmU0,5046
7
+ worker/app/modules/connection_module.py,sha256=qvaDgTBrf3NMA9Q1A-490fhjlhPVh_iskaMvLYK09lc,575
8
+ worker/app/modules/depth_activity_module.py,sha256=0enhx_fhDiLRaPwVh4BDcu2KkWPzG_9Bkqt1p7RzHbQ,572
9
+ worker/app/modules/scheduler.py,sha256=oZVLQH2p7aEQJUFXi4zL-QsGcfXGF859grI31TIt7bc,1448
10
+ worker/app/modules/time_activity_module.py,sha256=_M3UhF3FNdjUHMJX1SMFx_weqCT087R0KP1HaoFH2d0,583
11
+ worker/app/modules/trigger.py,sha256=OYRJCcK9wGxjvi0EcL9ZQSLZOMYR11RGLT_vynK5nF8,1438
12
+ worker/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ worker/data/alert.py,sha256=1sxY2pQMnirmAV9mNNw8MW0dllebprlHK-XgfSfAKKc,3074
14
+ worker/data/api.py,sha256=JSPVf1Ff6Cvkc1BfCpnOuhdRUuOSUVbZamAVe1Lyc1M,4799
15
+ worker/data/enums.py,sha256=Owe3Kp7UrALRdl7_C829S6ULhBK_IRWUXcw_gA_2PT8,3876
16
+ worker/data/json_encoder.py,sha256=jJcpNymA8370bgVIi_cz-EcUHjn4clC_I_t_2bK8Dqk,549
17
+ worker/data/math.py,sha256=TSDwWZD-gY6oikhPSmpt97dcxhIFEV5wZgiZcs3FcxE,2952
18
+ worker/data/operations.py,sha256=jv6w4Wqh75IDLeSPfkGNQ1Hu3cuom25d17dUSHlhRg4,12764
19
+ worker/data/serialization.py,sha256=p0Rmi9n2L8TCm7sinSBKDh9xvSJe6cg3PUk_Uq_mSwY,3058
20
+ worker/data/task_handler.py,sha256=X77REOvYqPqSOE4SLOgprwGd8kifmD9oRHXy3vNAH60,3113
21
+ worker/data/two_way_dict.py,sha256=rR4sTF_whBrRP8-PKQRfry7gdpPhsxtBjtk44dgAHQ0,535
22
+ worker/data/unit_conversions.py,sha256=UyFRYxTc1Jk2vN9Pfblq38_T-a_fBiZt7w2994ZbUag,154
23
+ worker/data/wits.py,sha256=P-3vWRZ5T28m0Lj3DHQD8Bm6aTmhFjeCeTJeDrPNMEM,11324
24
+ worker/data/activity/__init__.py,sha256=J63WWhGnkO3pYPJdhrtJfrzVS1xhjlO2EeRTdDNucd0,4807
25
+ worker/data/activity/activity_grouping.py,sha256=eDy1RqrlLCS4BlKNBb0xM7QHDX6lAw28zMLRwGcUBrE,8779
26
+ worker/event/__init__.py,sha256=7gYI4NcpO1snmrjZnrSkZgYs70xZwnlz4tXm0hzGs3E,1704
27
+ worker/event/event_handler.py,sha256=Hv9zm0z8i3vQeCfn_qTbykMaLS1j6muIz1i28qQZkOs,3401
28
+ worker/event/scheduled.py,sha256=Qrh6Upw1skQ4I8C0Q59zaWHcTSqMDlgEzvdocPEh-mM,2270
29
+ worker/event/stream.py,sha256=FTBkY1LQGgsVTSZ8_e4C_mMCMW0MyVehrvUxHVaonX0,1639
30
+ worker/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ worker/mixins/logging.py,sha256=vZrEiRZVmON0y9nMhao1gV8qqBkZ9zcpMEJkTYyhRGc,3234
32
+ worker/mixins/rollbar.py,sha256=DdmTw8PpExEofoZcPHBvmmIHfbMx4yB1lsdsMubtB5c,3089
33
+ worker/partial_rerun_merge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ worker/partial_rerun_merge/merge.py,sha256=a8RvEio_3wSZKkCqVvbpf3l0EqxP-HL2S_MEwToiw5c,17620
35
+ worker/partial_rerun_merge/models.py,sha256=UaGBf2HZkTitg4jgZtvVSyQKEFtwMSNMfND5bM-KpT4,2794
36
+ worker/partial_rerun_merge/progress.py,sha256=BPhUf8_IEc2SbzDgkN9tqmYOYfh-L9qVGg4mZ25t94I,7791
37
+ worker/state/__init__.py,sha256=X0TaxqfNqGp-lgraYgWpLHgdSDK0WlNjDYI0VWT9WOM,3069
38
+ worker/state/mixins.py,sha256=43YuxtRUfYZ2c5Ql4wcUOOxuWNVnKevElwDxEtueNCE,4044
39
+ worker/state/state.py,sha256=KEiw2-sB6xxyrweNE34JZ-iOD6C61sxXn3tsJvV6vLk,1267
40
+ worker/test/__init__.py,sha256=WCh2zc6JHZuiK8lPB41_aytnSqVcpAsNj5Tio4BARAA,82
41
+ worker/test/lambda_function_test_run.py,sha256=nbS6owG9deaHNYllxOs8klHHLR0_iSYy_jn1ZE1aB_I,6406
42
+ worker/test/utils.py,sha256=Z7tgC_lUk2bRMlR-3tY9i5d836AdYlIY4i9WFFplheg,1479
43
+ worker/test/local_testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ worker/test/local_testing/to_local_transfer.py,sha256=QYLGtT9fgDKsSK3x2QsaNDCMOpPZ5rpOsSnSl5s--WI,12343
45
+ worker/wellbore/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ worker/wellbore/factory.py,sha256=xKKpa9y3yuIQDfpps8wKSA2CiLhZuYFeAhrdZ0ZwdQM,19353
47
+ worker/wellbore/measured_depth_finder.py,sha256=I5FmkELT0_k1ajlraeBx2Q-G3noqNTAw1wEPKyUAQOw,367
48
+ worker/wellbore/sections_mixin.py,sha256=JNeEKm_fRxMBBD7SUx7LM8lRDJjKIeIcVvcSOKt4DTY,1873
49
+ worker/wellbore/wellbore.py,sha256=syvYszkbN40BhzxCQ8m6VmSBGPp2wQR9sJ6TcSu7WO8,11907
50
+ worker/wellbore/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
+ worker/wellbore/model/ann.py,sha256=4xf-YQqyuWbMXDbH8XV0ohuga5r8SVO9lJVMgdxS-7c,3483
52
+ worker/wellbore/model/annulus.py,sha256=U2JHQtJp6KPVlcAZMlO2MvrQf0I-cM2GlM969RBn8fg,3805
53
+ worker/wellbore/model/drillstring.py,sha256=om3It2UVZs_WSlaVJmpNziEIlAdVlN58tGXjcK0qMDo,6441
54
+ worker/wellbore/model/drillstring_components.py,sha256=PFs-as6b2D494gzBKnLz2jToKVghOymcVsU-4NaYamk,14769
55
+ worker/wellbore/model/element.py,sha256=6axQ1eh3zwej_9kke9o1uXNtJv4UABMTK0VmsCDfJYQ,3231
56
+ worker/wellbore/model/enums.py,sha256=DZm-o9ZCM1hSRHw_CcOK4Oy2Q_bmpDM7Gx3GlKQ_bvE,2828
57
+ worker/wellbore/model/hole.py,sha256=LYikxOMJfxhc2yoKZN12pDBIl6W8apJcAGtGR3ZuvhM,10046
58
+ worker/wellbore/model/hole_section.py,sha256=4sfP19xiY--wV1Ve2MtRoNqpya-FdPhNNmTgxLb90uk,1630
59
+ worker/wellbore/model/riser.py,sha256=ElxvQMfdFTGGpJ1Jba_DNVue3zGAsm-05zDl1HBXwDk,824
60
+ corva_worker_python-2.0.0.dist-info/METADATA,sha256=QTJwlYdwZWW7PqJNz6CYV6GY9K0mOcy5aSRz4X_Hv9Y,1017
61
+ corva_worker_python-2.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
+ corva_worker_python-2.0.0.dist-info/top_level.txt,sha256=KrP-SOC-I1V0U33nVzMgkHR8wO-reJYASIvWVyLyFIY,7
63
+ corva_worker_python-2.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ worker
worker/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from worker.data.api import API
2
+
3
+ __all__ = [
4
+ "API",
5
+ ]
worker/app/__init__.py ADDED
@@ -0,0 +1,291 @@
1
+ from typing import List, Optional
2
+
3
+ from worker import constants, exceptions
4
+ from worker.app.modules import Module
5
+ from worker.app.modules.time_activity_module import TimeActivityModule
6
+ from worker.data.enums import EventType, RerunMode
7
+ from worker.data.operations import gather_data_for_period
8
+ from worker.event import Event
9
+ from worker.event.scheduled import ScheduledEvent
10
+ from worker.event.stream import StreamEvent
11
+ from worker.mixins.logging import LoggingMixin
12
+ from worker.mixins.rollbar import RollbarMixin
13
+ from worker.partial_rerun_merge.models import RerunMergeCacheUpdater
14
+ from worker.state.mixins import RedisMixin
15
+ from worker.state.state import State
16
+
17
+
18
+ class App(RedisMixin, LoggingMixin, RollbarMixin):
19
+ """
20
+ The apps is designed to receive the events of a single asset
21
+ """
22
+
23
+ app_key = constants.get("global.app-key")
24
+ app_name = constants.get("global.app-name")
25
+
26
+ app_state_fields = {"asset_id": int, "last_processed_timestamp": int}
27
+
28
+ def __init__(self, *args, **kwargs):
29
+ self.module_key = None
30
+
31
+ self.event_type: EventType = None
32
+
33
+ self.asset_id = None
34
+ self.event = None # event records
35
+
36
+ super().__init__(*args, **kwargs)
37
+
38
+ def load(self, event_type: EventType, event: Event):
39
+ """
40
+ :param event_type:
41
+ :param event: a scheduler event or wits stream; belong to one asset
42
+ :return:
43
+ """
44
+ self.event_type = event_type
45
+
46
+ self.asset_id = event.asset_id
47
+
48
+ self.state = self.load_state()
49
+
50
+ max_lookback_seconds = self.get_max_lookback_seconds()
51
+ event = self.load_event(event, max_lookback_seconds)
52
+
53
+ valid_stream_collections = self.get_valid_stream_collections()
54
+ self.event = self.filter_event_for_collections(
55
+ self.event_type, valid_stream_collections, event
56
+ ) # event records
57
+
58
+ self.log_event(self.event, max_lookback_seconds) # event records
59
+
60
+ def log_event(self, event: List[dict], max_lookback_seconds: int):
61
+ self.debug(self.asset_id, "WITS input to {0} -> {1}".format(self.app_name, event))
62
+
63
+ if not event:
64
+ return
65
+
66
+ batch_size = len(event)
67
+ start_time = event[0].get("timestamp")
68
+ end_time = event[-1].get("timestamp")
69
+
70
+ self.log(
71
+ self.asset_id,
72
+ text=(
73
+ f"Received {batch_size} elements from {start_time} to {end_time}. "
74
+ f"{max_lookback_seconds} seconds of initial data are lookback."
75
+ ),
76
+ )
77
+
78
+ @staticmethod
79
+ def get_valid_stream_collections():
80
+ valid_collections = constants.get("global.valid-stream-collections", [])
81
+ if not isinstance(valid_collections, (str, list)):
82
+ raise TypeError("Incorrect type of valid-stream-collections in global constants")
83
+
84
+ if isinstance(valid_collections, str):
85
+ valid_collections = [valid_collections]
86
+ return valid_collections
87
+
88
+ def get_max_lookback_seconds(self):
89
+ """
90
+ For each module (mostly in time-base modules), the time of processing does not
91
+ match the last time of the event so extra data is required to look back and get
92
+ the data so the processing can start from where it left off.
93
+ :return:
94
+ """
95
+
96
+ time_modules = [module for module in self.get_modules() if issubclass(module, TimeActivityModule)]
97
+ maximum_lookback = 0
98
+ for module in time_modules:
99
+ module_lookback = constants.get(
100
+ "{}.{}.lookback-duration".format(self.app_key, module.module_key), default=0
101
+ )
102
+ maximum_lookback = max(module_lookback, maximum_lookback)
103
+
104
+ return maximum_lookback
105
+
106
+ def load_event(self, event: Event, max_lookback_seconds: int) -> List[dict]:
107
+ if self.event_type == EventType.SCHEDULER:
108
+ return self.load_scheduler_event(self.asset_id, event, max_lookback_seconds)
109
+
110
+ if self.event_type == EventType.STREAM:
111
+ return self.load_wits_stream_event(self.asset_id, event, max_lookback_seconds)
112
+
113
+ return None
114
+
115
+ @staticmethod
116
+ def filter_event_for_collections(event_type: EventType, valid_stream_collections: list, event: list) -> list:
117
+ """
118
+ This function filters the incoming event based on a list of valid collections
119
+
120
+ :param event_type: Type of incoming event.
121
+ :param valid_stream_collections: List of valid collections
122
+ :param event: List of data records
123
+ :return: List of records whose collection is one of the allowed event collections
124
+ """
125
+
126
+ # If event type is scheduler or event_collections is empty, return the entire event
127
+ if event_type == EventType.SCHEDULER or not valid_stream_collections:
128
+ return event
129
+
130
+ # Filtering the event based on valid collections
131
+ event = [record for record in event if record.get("collection") in valid_stream_collections]
132
+ return event
133
+
134
+ def filter_event_for_duplicates(self, event) -> list:
135
+ last_processed_timestamp = self.state.get("last_processed_timestamp") or 0
136
+
137
+ # If event is a single record and greater than the last processed timestamp, return the event
138
+ if len(event) == 1 and event[0].get("timestamp") > last_processed_timestamp:
139
+ return event
140
+
141
+ # If length of unique timestamps is same as length of event and the first record timestamp is also greater than
142
+ # last_processed_timestamp, then return the event
143
+ unique_timestamps = set([record.get("timestamp") for record in event])
144
+ if len(unique_timestamps) == len(event) and event[0].get("timestamp") > last_processed_timestamp:
145
+ return event
146
+
147
+ # Filtering the events for duplicates, once we identify that duplicates exist.
148
+ filtered_events = []
149
+ for each_record in event:
150
+ if each_record.get("timestamp") > last_processed_timestamp:
151
+ filtered_events.append(each_record)
152
+ last_processed_timestamp = each_record.get("timestamp")
153
+
154
+ return filtered_events
155
+
156
+ def load_scheduler_event(self, asset_id: int, event: ScheduledEvent, max_lookback_seconds: int) -> List[dict]:
157
+ """
158
+ To load a scheduler event and get the wits stream data
159
+ :param asset_id: The asset to load
160
+ :param event: A cleaned event
161
+ :param max_lookback_seconds: Maximum amount of time to look back prior to the scheduler event to cover gaps
162
+ :return: list of WITS data between the last processed timestamp and the final event item timestamp
163
+ """
164
+
165
+ start_timestamp = self.state.get("last_processed_timestamp", event[0].start_time - 1)
166
+ end_timestamp = event[-1].start_time
167
+
168
+ # the event is converted from scheduler to wits stream
169
+ scheduler_event = gather_data_for_period(
170
+ asset_id=asset_id,
171
+ start=start_timestamp - max_lookback_seconds,
172
+ end=end_timestamp,
173
+ limit=constants.get("global.query-limit"),
174
+ fields=constants.get("global.wits_query_fields", None),
175
+ )
176
+
177
+ return scheduler_event
178
+
179
+ def load_wits_stream_event(self, asset_id: int, event: StreamEvent, max_lookback_seconds: int) -> List[dict]:
180
+ """
181
+ To load a wits stream event and get more data if necessary
182
+ :param asset_id: The asset to load
183
+ :param event: A cleaned event
184
+ :param max_lookback_seconds: Maximum amount of time to look back prior to WITS data to cover gaps
185
+ :return: list of WITS data between the first event timestamp and the first timestamp
186
+ """
187
+ records = event.records
188
+
189
+ # First filtering original event for duplicates
190
+ records = self.filter_event_for_duplicates(records)
191
+
192
+ # If all record timestamps are before the last processed timestamp,
193
+ # the first timestamp will be the last processed timestamp from cache
194
+ if not records:
195
+ first_timestamp = self.state.get("last_processed_timestamp") or 0
196
+ else:
197
+ first_timestamp = records[0].get("timestamp")
198
+
199
+ if not first_timestamp:
200
+ return records
201
+
202
+ if max_lookback_seconds:
203
+ # Subtract one from the timestamp so that we don't reselect the final data item that was sent in the event
204
+ end_timestamp = first_timestamp - 1
205
+
206
+ previous_events = gather_data_for_period(
207
+ asset_id=asset_id,
208
+ start=first_timestamp - max_lookback_seconds,
209
+ end=end_timestamp,
210
+ limit=constants.get("global.query-limit"),
211
+ fields=constants.get("global.wits_query_fields", None),
212
+ )
213
+
214
+ records = previous_events + records
215
+
216
+ return records
217
+
218
+ @staticmethod
219
+ def determine_asset_id(event: list) -> int:
220
+ try:
221
+ return int(event[0]["asset_id"])
222
+ except Exception:
223
+ raise Exception(f"Event does not contain asset_id: {event}")
224
+
225
+ def load_state(self, state_key: Optional[str] = None, raise_warnings: bool = False) -> dict:
226
+ previous_state = super().load_state(state_key=state_key, raise_warnings=raise_warnings)
227
+
228
+ state = State(self.app_state_fields, previous_state)
229
+
230
+ if not state.get("asset_id", None):
231
+ state["asset_id"] = self.asset_id
232
+
233
+ return state
234
+
235
+ def get_modules(self) -> List[Module]:
236
+ raise NotImplementedError("No modules found")
237
+
238
+ def get_active_modules(self) -> List[Module]:
239
+ return [module for module in self.get_modules() if module.enabled]
240
+
241
+ def run_modules(self):
242
+ if not self.event: # event records
243
+ return
244
+
245
+ for module_type in self.get_active_modules():
246
+ try:
247
+ module = module_type(self.state, rollbar=self.rollbar)
248
+ except Exception:
249
+ raise exceptions.Misconfigured(
250
+ "Module {0} not able to initialize for asset_id {1}".format(module_type, self.asset_id)
251
+ )
252
+
253
+ try:
254
+ module.run(self.event) # event records
255
+ except Exception as ex:
256
+ message = f"Error in module {module_type.module_key}"
257
+ ex.args += (message,) # Adding message to existing Exception
258
+ raise
259
+
260
+ last_processed_timestamp = self.event[-1].get("timestamp") # event records
261
+ self.state["last_processed_timestamp"] = last_processed_timestamp
262
+
263
+ @classmethod
264
+ def update_cache(cls, merger: "PartialRerunMerge") -> None:
265
+ """
266
+ This function updates the cache of the original asset with the rerun asset's cache
267
+
268
+ If in any app that is extending this class, it needs to be overridden, it can be done so.
269
+
270
+ :param merger: PartialRerunMerge object
271
+ :return: None
272
+ """
273
+ if merger.rerun_mode == RerunMode.HISTORICAL:
274
+ return
275
+
276
+ original_asset_id = merger.original_asset_id
277
+ rerun_asset_id = merger.rerun_asset_id
278
+
279
+ redis_handler = RedisMixin()
280
+ redis_handler.asset_id = original_asset_id
281
+
282
+ original_state_key = redis_handler.get_formatted_state_key(original_asset_id, cls.app_key)
283
+ rerun_state_key = redis_handler.get_formatted_state_key(rerun_asset_id, cls.app_key)
284
+
285
+ original_state = redis_handler.load_state(original_state_key)
286
+ rerun_state = redis_handler.load_state(rerun_state_key)
287
+
288
+ original_state = RerunMergeCacheUpdater.default_updater(original_state, rerun_state)
289
+
290
+ redis_handler.state = original_state
291
+ redis_handler.save_state(original_state_key)
@@ -0,0 +1,265 @@
1
+ from typing import Union
2
+
3
+ import simplejson as json
4
+
5
+ from worker import constants
6
+ from worker.data.api import API
7
+ from worker.data.enums import RerunMode
8
+ from worker.data.json_encoder import JsonEncoder
9
+ from worker.mixins.logging import LoggingMixin
10
+ from worker.mixins.rollbar import RollbarMixin
11
+ from worker.partial_rerun_merge.models import RerunMergeCacheUpdater
12
+ from worker.state.mixins import RedisMixin
13
+ from worker.state.state import State
14
+
15
+
16
+ class Module(RedisMixin, LoggingMixin, RollbarMixin):
17
+ """
18
+ This is an abstract base module that needs to be extended by an actual module.
19
+ """
20
+
21
+ # module_key is used for redis access and state of this module
22
+ app_key = constants.get("global.app-key")
23
+ app_name = constants.get("global.app-name")
24
+ module_key = "module"
25
+ collection = "collection"
26
+ module_state_fields = {"last_processed_timestamp": int}
27
+
28
+ enabled = True
29
+
30
+ def __init__(self, global_state, *args, **kwargs):
31
+ self.asset_id = global_state.get("asset_id")
32
+
33
+ self.global_state = global_state
34
+
35
+ super().__init__(*args, **kwargs)
36
+
37
+ def run(self, wits_stream: list):
38
+ """
39
+ :param wits_stream: a wits stream event
40
+ :return:
41
+ """
42
+ # load the state
43
+ state = self.get_state()
44
+ self.state = self.process_module_state(state)
45
+
46
+ # subclasses should implement their own run
47
+
48
+ def should_run_processor(self, event):
49
+ raise Exception("This method need to be implemented by subclasses!")
50
+
51
+ def get_state(self, state_key: [str, None] = None, raise_warnings: bool = False) -> dict:
52
+ current_state = super().load_state(state_key=state_key, raise_warnings=raise_warnings)
53
+ return State(self.module_state_fields, current_state)
54
+
55
+ @staticmethod
56
+ def process_module_state(state):
57
+ return state
58
+
59
+ def set_state(self, key, value):
60
+ self.state[key] = value
61
+
62
+ def load_dataset(self, event):
63
+ return event
64
+
65
+ def run_module(self, dataset: Union[list, dict], beginning_results: list) -> list:
66
+ raise Exception("Not implemented")
67
+
68
+ def get_last_exported_timestamp_from_collection(self, asset_id, query=None, less_than=None):
69
+ """
70
+ Query the module collection for this asset_id + module, sorted by timestamp descending,
71
+ limit 1, grab the last item's timestamp. Default to 0 if no records found.
72
+ @asset_id:
73
+ @less_than: the timestamp before which you want to get
74
+ """
75
+ if less_than:
76
+ query = query or ""
77
+ query += "AND{timestamp#lt#%s}" % less_than
78
+
79
+ worker = API()
80
+ last_document = worker.get(
81
+ path="/v1/data/corva",
82
+ query=query,
83
+ collection=self.collection,
84
+ asset_id=asset_id,
85
+ sort="{timestamp: -1}",
86
+ limit=1,
87
+ ).data
88
+
89
+ if not last_document:
90
+ return 0
91
+
92
+ last_document = last_document[0]
93
+ last_processed_timestamp = last_document.get("timestamp", 0)
94
+
95
+ return last_processed_timestamp
96
+
97
+ @staticmethod
98
+ def gather_first_wits_timestamp_since(asset_id: int, since: int, activity_fields=None, operator="eq") -> int:
99
+ """
100
+ Query the Wits collection for this asset_id where state in wits_states and timestamp >= since
101
+ """
102
+
103
+ query = "{timestamp#%s#%s}" % ("gt", since)
104
+
105
+ operator = operator.lower()
106
+
107
+ if activity_fields:
108
+ if operator == "eq" and isinstance(activity_fields, list):
109
+ operator = "in"
110
+
111
+ if operator in ("in", "nin"):
112
+ if not isinstance(activity_fields, list):
113
+ activity_fields = [activity_fields]
114
+
115
+ # Put each state into a formatted string for querying
116
+ activity_fields = ["'{0}'".format(state) for state in activity_fields]
117
+ activity_fields = "[{0}]".format(",".join(activity_fields))
118
+ else:
119
+ activity_fields = "'{0}'".format(activity_fields)
120
+
121
+ query += "AND{data.state#%s#%s}" % (operator, activity_fields)
122
+
123
+ worker = API()
124
+ first_wits_since = worker.get(
125
+ path="/v1/data/corva", collection="wits", asset_id=asset_id, sort="{timestamp: 1}", limit=1, query=query
126
+ ).data
127
+
128
+ if not first_wits_since:
129
+ return 0
130
+
131
+ first_wits_since = first_wits_since[0]
132
+ first_wits_since_timestamp = first_wits_since.get("timestamp", 0)
133
+
134
+ return first_wits_since_timestamp
135
+
136
+ @staticmethod
137
+ def gather_maximum_timestamp(event, start, activity_fields):
138
+ """
139
+ get the maximum time stamp of a stream of data
140
+ :param event: a stream of data that the majority is wits collection
141
+ :param start:
142
+ :param activity_fields:
143
+ :return:
144
+ """
145
+ maximum_timestamp = start
146
+ for data in event:
147
+ if data.get("collection") == "wits" and data.get("data", {}).get("state", None) in activity_fields:
148
+ maximum_timestamp = max(data.get("timestamp", 0), maximum_timestamp)
149
+
150
+ return maximum_timestamp
151
+
152
+ def gather_minimum_timestamp(self, asset_id: int, event: list):
153
+ minimum = self.get_last_exported_timestamp_from_collection(asset_id)
154
+
155
+ if not minimum:
156
+ minimum = event[0]["timestamp"] - 1800
157
+
158
+ return minimum
159
+
160
+ def gather_collections_for_period(self, asset_id, start, end, query=None):
161
+ limit = constants.get("global.query-limit")
162
+
163
+ query = query or ""
164
+ if query:
165
+ query += "AND"
166
+
167
+ query += "{timestamp#gte#%s}AND{timestamp#lte#%s}" % (start, end)
168
+
169
+ worker = API()
170
+ dataset = worker.get(
171
+ path="/v1/data/corva",
172
+ collection=self.collection,
173
+ asset_id=asset_id,
174
+ query=query,
175
+ sort="{timestamp: 1}",
176
+ limit=limit,
177
+ ).data
178
+
179
+ if not dataset:
180
+ return []
181
+
182
+ return dataset
183
+
184
+ def store_output(self, asset_id, output):
185
+ """
186
+ to store/post results
187
+ :param asset_id: asset id of the well
188
+ :param output: an array of json objects to be posted
189
+ :return: None
190
+ """
191
+
192
+ if not asset_id or not output or not self.collection:
193
+ return
194
+
195
+ output = self.format_output(output)
196
+
197
+ self.debug(asset_id, "{0} output -> {1}".format(self.module_key, output))
198
+
199
+ worker = API()
200
+ worker.post(path="/v1/data/corva", data=output)
201
+
202
+ def build_empty_output(self, wits: dict) -> dict:
203
+ """
204
+ Building an empty output result.
205
+ :param wits: one wits record
206
+ :return:
207
+ """
208
+ output = {
209
+ "timestamp": int(wits.get("timestamp")),
210
+ "company_id": int(wits.get("company_id")),
211
+ "asset_id": int(wits.get("asset_id")),
212
+ "provider": str(wits.get("provider", "corva")),
213
+ "version": 1,
214
+ "collection": self.collection,
215
+ "data": {},
216
+ }
217
+
218
+ enable_output_type_field = constants.get(
219
+ "{}.{}.enable-output-type-field".format(self.app_key, self.module_key), False
220
+ )
221
+
222
+ if not enable_output_type_field:
223
+ return output
224
+
225
+ # If a specific module enforces type field in the output collection,
226
+ # build_empty_output adds an additional type field to the output.
227
+ # This can be enforced by setting "enable-output-type-field" to True in the module app constants.
228
+ output.update({"type": self.module_key})
229
+
230
+ return output
231
+
232
+ @staticmethod
233
+ def format_output(output):
234
+ output = json.dumps(output, cls=JsonEncoder, ignore_nan=True)
235
+ return output
236
+
237
+ @classmethod
238
+ def update_cache(cls, merger: "PartialRerunMerge") -> None:
239
+ """
240
+ This function updates the cache of the original asset with the rerun asset's cache
241
+
242
+ If in any app that is extending this class, it needs to be overridden, it can be done so.
243
+
244
+ :param merger: PartialRerunMerge object
245
+ :return: None
246
+ """
247
+ if merger.rerun_mode == RerunMode.HISTORICAL:
248
+ return
249
+
250
+ original_asset_id = merger.original_asset_id
251
+ rerun_asset_id = merger.rerun_asset_id
252
+
253
+ redis_handler = RedisMixin()
254
+ redis_handler.asset_id = original_asset_id
255
+
256
+ original_state_key = redis_handler.get_formatted_state_key(original_asset_id, cls.app_key, cls.module_key)
257
+ rerun_state_key = redis_handler.get_formatted_state_key(rerun_asset_id, cls.app_key, cls.module_key)
258
+
259
+ original_state = redis_handler.load_state(original_state_key)
260
+ rerun_state = redis_handler.load_state(rerun_state_key)
261
+
262
+ original_state = RerunMergeCacheUpdater.default_updater(original_state, rerun_state)
263
+
264
+ redis_handler.state = original_state
265
+ redis_handler.save_state(original_state_key)