corva-worker-python 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- corva_worker_python-2.0.0.dist-info/METADATA +30 -0
- corva_worker_python-2.0.0.dist-info/RECORD +63 -0
- corva_worker_python-2.0.0.dist-info/WHEEL +5 -0
- corva_worker_python-2.0.0.dist-info/top_level.txt +1 -0
- worker/__init__.py +5 -0
- worker/app/__init__.py +291 -0
- worker/app/modules/__init__.py +265 -0
- worker/app/modules/activity_module.py +141 -0
- worker/app/modules/connection_module.py +21 -0
- worker/app/modules/depth_activity_module.py +21 -0
- worker/app/modules/scheduler.py +44 -0
- worker/app/modules/time_activity_module.py +21 -0
- worker/app/modules/trigger.py +43 -0
- worker/constants.py +51 -0
- worker/data/__init__.py +0 -0
- worker/data/activity/__init__.py +132 -0
- worker/data/activity/activity_grouping.py +242 -0
- worker/data/alert.py +89 -0
- worker/data/api.py +155 -0
- worker/data/enums.py +141 -0
- worker/data/json_encoder.py +18 -0
- worker/data/math.py +104 -0
- worker/data/operations.py +477 -0
- worker/data/serialization.py +110 -0
- worker/data/task_handler.py +82 -0
- worker/data/two_way_dict.py +17 -0
- worker/data/unit_conversions.py +5 -0
- worker/data/wits.py +323 -0
- worker/event/__init__.py +53 -0
- worker/event/event_handler.py +90 -0
- worker/event/scheduled.py +64 -0
- worker/event/stream.py +48 -0
- worker/exceptions.py +26 -0
- worker/mixins/__init__.py +0 -0
- worker/mixins/logging.py +119 -0
- worker/mixins/rollbar.py +87 -0
- worker/partial_rerun_merge/__init__.py +0 -0
- worker/partial_rerun_merge/merge.py +500 -0
- worker/partial_rerun_merge/models.py +91 -0
- worker/partial_rerun_merge/progress.py +241 -0
- worker/state/__init__.py +96 -0
- worker/state/mixins.py +111 -0
- worker/state/state.py +46 -0
- worker/test/__init__.py +3 -0
- worker/test/lambda_function_test_run.py +196 -0
- worker/test/local_testing/__init__.py +0 -0
- worker/test/local_testing/to_local_transfer.py +360 -0
- worker/test/utils.py +51 -0
- worker/wellbore/__init__.py +0 -0
- worker/wellbore/factory.py +496 -0
- worker/wellbore/measured_depth_finder.py +12 -0
- worker/wellbore/model/__init__.py +0 -0
- worker/wellbore/model/ann.py +103 -0
- worker/wellbore/model/annulus.py +113 -0
- worker/wellbore/model/drillstring.py +196 -0
- worker/wellbore/model/drillstring_components.py +439 -0
- worker/wellbore/model/element.py +102 -0
- worker/wellbore/model/enums.py +92 -0
- worker/wellbore/model/hole.py +297 -0
- worker/wellbore/model/hole_section.py +51 -0
- worker/wellbore/model/riser.py +22 -0
- worker/wellbore/sections_mixin.py +64 -0
- worker/wellbore/wellbore.py +289 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: corva-worker-python
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: SDK for interacting with Corva
|
|
5
|
+
Home-page: https://github.com/corva-ai/corva-worker-python
|
|
6
|
+
Author: Jordan Ambra <jordan.ambra@corva.ai>, Mohammadreza Kamyab <m.kamyab@corva.ai>
|
|
7
|
+
License: Unlicensed
|
|
8
|
+
Keywords: corva,worker
|
|
9
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Requires-Python: >=3.13
|
|
18
|
+
Requires-Dist: numpy>=2.3.5
|
|
19
|
+
Requires-Dist: redis>=7.1.0
|
|
20
|
+
Requires-Dist: requests>=2.32.5
|
|
21
|
+
Requires-Dist: simplejson>=3.20.2
|
|
22
|
+
Requires-Dist: urllib3>=2.5.0
|
|
23
|
+
Dynamic: author
|
|
24
|
+
Dynamic: classifier
|
|
25
|
+
Dynamic: home-page
|
|
26
|
+
Dynamic: keywords
|
|
27
|
+
Dynamic: license
|
|
28
|
+
Dynamic: requires-dist
|
|
29
|
+
Dynamic: requires-python
|
|
30
|
+
Dynamic: summary
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
worker/__init__.py,sha256=xw-_4rRcZm65YTohJOUOZY7ZXE7WDGYwhwQxo3ng_t0,58
|
|
2
|
+
worker/constants.py,sha256=beZCoZk6Tl32EzHCxP6WNtH4QPsG_Tay6sfZSnddYYI,1480
|
|
3
|
+
worker/exceptions.py,sha256=ABjo2tay2TI40cVFSaKBE4i_ZqIC3IJZCw88qo2iLws,309
|
|
4
|
+
worker/app/__init__.py,sha256=-PEDBW7xCBbbc3fE6pxA5JBKM_5SJaCaVFqvzS5aNxc,11596
|
|
5
|
+
worker/app/modules/__init__.py,sha256=wJV7Jo_DSlkgDQqQRcmzplXGBG1SGfJIL9JHFi5hWgo,8871
|
|
6
|
+
worker/app/modules/activity_module.py,sha256=ot1zHSBhOP76gu9RV4OWnQY1JNAq-r3nkRLtgpwdmU0,5046
|
|
7
|
+
worker/app/modules/connection_module.py,sha256=qvaDgTBrf3NMA9Q1A-490fhjlhPVh_iskaMvLYK09lc,575
|
|
8
|
+
worker/app/modules/depth_activity_module.py,sha256=0enhx_fhDiLRaPwVh4BDcu2KkWPzG_9Bkqt1p7RzHbQ,572
|
|
9
|
+
worker/app/modules/scheduler.py,sha256=oZVLQH2p7aEQJUFXi4zL-QsGcfXGF859grI31TIt7bc,1448
|
|
10
|
+
worker/app/modules/time_activity_module.py,sha256=_M3UhF3FNdjUHMJX1SMFx_weqCT087R0KP1HaoFH2d0,583
|
|
11
|
+
worker/app/modules/trigger.py,sha256=OYRJCcK9wGxjvi0EcL9ZQSLZOMYR11RGLT_vynK5nF8,1438
|
|
12
|
+
worker/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
worker/data/alert.py,sha256=1sxY2pQMnirmAV9mNNw8MW0dllebprlHK-XgfSfAKKc,3074
|
|
14
|
+
worker/data/api.py,sha256=JSPVf1Ff6Cvkc1BfCpnOuhdRUuOSUVbZamAVe1Lyc1M,4799
|
|
15
|
+
worker/data/enums.py,sha256=Owe3Kp7UrALRdl7_C829S6ULhBK_IRWUXcw_gA_2PT8,3876
|
|
16
|
+
worker/data/json_encoder.py,sha256=jJcpNymA8370bgVIi_cz-EcUHjn4clC_I_t_2bK8Dqk,549
|
|
17
|
+
worker/data/math.py,sha256=TSDwWZD-gY6oikhPSmpt97dcxhIFEV5wZgiZcs3FcxE,2952
|
|
18
|
+
worker/data/operations.py,sha256=jv6w4Wqh75IDLeSPfkGNQ1Hu3cuom25d17dUSHlhRg4,12764
|
|
19
|
+
worker/data/serialization.py,sha256=p0Rmi9n2L8TCm7sinSBKDh9xvSJe6cg3PUk_Uq_mSwY,3058
|
|
20
|
+
worker/data/task_handler.py,sha256=X77REOvYqPqSOE4SLOgprwGd8kifmD9oRHXy3vNAH60,3113
|
|
21
|
+
worker/data/two_way_dict.py,sha256=rR4sTF_whBrRP8-PKQRfry7gdpPhsxtBjtk44dgAHQ0,535
|
|
22
|
+
worker/data/unit_conversions.py,sha256=UyFRYxTc1Jk2vN9Pfblq38_T-a_fBiZt7w2994ZbUag,154
|
|
23
|
+
worker/data/wits.py,sha256=P-3vWRZ5T28m0Lj3DHQD8Bm6aTmhFjeCeTJeDrPNMEM,11324
|
|
24
|
+
worker/data/activity/__init__.py,sha256=J63WWhGnkO3pYPJdhrtJfrzVS1xhjlO2EeRTdDNucd0,4807
|
|
25
|
+
worker/data/activity/activity_grouping.py,sha256=eDy1RqrlLCS4BlKNBb0xM7QHDX6lAw28zMLRwGcUBrE,8779
|
|
26
|
+
worker/event/__init__.py,sha256=7gYI4NcpO1snmrjZnrSkZgYs70xZwnlz4tXm0hzGs3E,1704
|
|
27
|
+
worker/event/event_handler.py,sha256=Hv9zm0z8i3vQeCfn_qTbykMaLS1j6muIz1i28qQZkOs,3401
|
|
28
|
+
worker/event/scheduled.py,sha256=Qrh6Upw1skQ4I8C0Q59zaWHcTSqMDlgEzvdocPEh-mM,2270
|
|
29
|
+
worker/event/stream.py,sha256=FTBkY1LQGgsVTSZ8_e4C_mMCMW0MyVehrvUxHVaonX0,1639
|
|
30
|
+
worker/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
worker/mixins/logging.py,sha256=vZrEiRZVmON0y9nMhao1gV8qqBkZ9zcpMEJkTYyhRGc,3234
|
|
32
|
+
worker/mixins/rollbar.py,sha256=DdmTw8PpExEofoZcPHBvmmIHfbMx4yB1lsdsMubtB5c,3089
|
|
33
|
+
worker/partial_rerun_merge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
worker/partial_rerun_merge/merge.py,sha256=a8RvEio_3wSZKkCqVvbpf3l0EqxP-HL2S_MEwToiw5c,17620
|
|
35
|
+
worker/partial_rerun_merge/models.py,sha256=UaGBf2HZkTitg4jgZtvVSyQKEFtwMSNMfND5bM-KpT4,2794
|
|
36
|
+
worker/partial_rerun_merge/progress.py,sha256=BPhUf8_IEc2SbzDgkN9tqmYOYfh-L9qVGg4mZ25t94I,7791
|
|
37
|
+
worker/state/__init__.py,sha256=X0TaxqfNqGp-lgraYgWpLHgdSDK0WlNjDYI0VWT9WOM,3069
|
|
38
|
+
worker/state/mixins.py,sha256=43YuxtRUfYZ2c5Ql4wcUOOxuWNVnKevElwDxEtueNCE,4044
|
|
39
|
+
worker/state/state.py,sha256=KEiw2-sB6xxyrweNE34JZ-iOD6C61sxXn3tsJvV6vLk,1267
|
|
40
|
+
worker/test/__init__.py,sha256=WCh2zc6JHZuiK8lPB41_aytnSqVcpAsNj5Tio4BARAA,82
|
|
41
|
+
worker/test/lambda_function_test_run.py,sha256=nbS6owG9deaHNYllxOs8klHHLR0_iSYy_jn1ZE1aB_I,6406
|
|
42
|
+
worker/test/utils.py,sha256=Z7tgC_lUk2bRMlR-3tY9i5d836AdYlIY4i9WFFplheg,1479
|
|
43
|
+
worker/test/local_testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
+
worker/test/local_testing/to_local_transfer.py,sha256=QYLGtT9fgDKsSK3x2QsaNDCMOpPZ5rpOsSnSl5s--WI,12343
|
|
45
|
+
worker/wellbore/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
+
worker/wellbore/factory.py,sha256=xKKpa9y3yuIQDfpps8wKSA2CiLhZuYFeAhrdZ0ZwdQM,19353
|
|
47
|
+
worker/wellbore/measured_depth_finder.py,sha256=I5FmkELT0_k1ajlraeBx2Q-G3noqNTAw1wEPKyUAQOw,367
|
|
48
|
+
worker/wellbore/sections_mixin.py,sha256=JNeEKm_fRxMBBD7SUx7LM8lRDJjKIeIcVvcSOKt4DTY,1873
|
|
49
|
+
worker/wellbore/wellbore.py,sha256=syvYszkbN40BhzxCQ8m6VmSBGPp2wQR9sJ6TcSu7WO8,11907
|
|
50
|
+
worker/wellbore/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
+
worker/wellbore/model/ann.py,sha256=4xf-YQqyuWbMXDbH8XV0ohuga5r8SVO9lJVMgdxS-7c,3483
|
|
52
|
+
worker/wellbore/model/annulus.py,sha256=U2JHQtJp6KPVlcAZMlO2MvrQf0I-cM2GlM969RBn8fg,3805
|
|
53
|
+
worker/wellbore/model/drillstring.py,sha256=om3It2UVZs_WSlaVJmpNziEIlAdVlN58tGXjcK0qMDo,6441
|
|
54
|
+
worker/wellbore/model/drillstring_components.py,sha256=PFs-as6b2D494gzBKnLz2jToKVghOymcVsU-4NaYamk,14769
|
|
55
|
+
worker/wellbore/model/element.py,sha256=6axQ1eh3zwej_9kke9o1uXNtJv4UABMTK0VmsCDfJYQ,3231
|
|
56
|
+
worker/wellbore/model/enums.py,sha256=DZm-o9ZCM1hSRHw_CcOK4Oy2Q_bmpDM7Gx3GlKQ_bvE,2828
|
|
57
|
+
worker/wellbore/model/hole.py,sha256=LYikxOMJfxhc2yoKZN12pDBIl6W8apJcAGtGR3ZuvhM,10046
|
|
58
|
+
worker/wellbore/model/hole_section.py,sha256=4sfP19xiY--wV1Ve2MtRoNqpya-FdPhNNmTgxLb90uk,1630
|
|
59
|
+
worker/wellbore/model/riser.py,sha256=ElxvQMfdFTGGpJ1Jba_DNVue3zGAsm-05zDl1HBXwDk,824
|
|
60
|
+
corva_worker_python-2.0.0.dist-info/METADATA,sha256=QTJwlYdwZWW7PqJNz6CYV6GY9K0mOcy5aSRz4X_Hv9Y,1017
|
|
61
|
+
corva_worker_python-2.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
62
|
+
corva_worker_python-2.0.0.dist-info/top_level.txt,sha256=KrP-SOC-I1V0U33nVzMgkHR8wO-reJYASIvWVyLyFIY,7
|
|
63
|
+
corva_worker_python-2.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
worker
|
worker/__init__.py
ADDED
worker/app/__init__.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from worker import constants, exceptions
|
|
4
|
+
from worker.app.modules import Module
|
|
5
|
+
from worker.app.modules.time_activity_module import TimeActivityModule
|
|
6
|
+
from worker.data.enums import EventType, RerunMode
|
|
7
|
+
from worker.data.operations import gather_data_for_period
|
|
8
|
+
from worker.event import Event
|
|
9
|
+
from worker.event.scheduled import ScheduledEvent
|
|
10
|
+
from worker.event.stream import StreamEvent
|
|
11
|
+
from worker.mixins.logging import LoggingMixin
|
|
12
|
+
from worker.mixins.rollbar import RollbarMixin
|
|
13
|
+
from worker.partial_rerun_merge.models import RerunMergeCacheUpdater
|
|
14
|
+
from worker.state.mixins import RedisMixin
|
|
15
|
+
from worker.state.state import State
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class App(RedisMixin, LoggingMixin, RollbarMixin):
|
|
19
|
+
"""
|
|
20
|
+
The apps is designed to receive the events of a single asset
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
app_key = constants.get("global.app-key")
|
|
24
|
+
app_name = constants.get("global.app-name")
|
|
25
|
+
|
|
26
|
+
app_state_fields = {"asset_id": int, "last_processed_timestamp": int}
|
|
27
|
+
|
|
28
|
+
def __init__(self, *args, **kwargs):
|
|
29
|
+
self.module_key = None
|
|
30
|
+
|
|
31
|
+
self.event_type: EventType = None
|
|
32
|
+
|
|
33
|
+
self.asset_id = None
|
|
34
|
+
self.event = None # event records
|
|
35
|
+
|
|
36
|
+
super().__init__(*args, **kwargs)
|
|
37
|
+
|
|
38
|
+
def load(self, event_type: EventType, event: Event):
|
|
39
|
+
"""
|
|
40
|
+
:param event_type:
|
|
41
|
+
:param event: a scheduler event or wits stream; belong to one asset
|
|
42
|
+
:return:
|
|
43
|
+
"""
|
|
44
|
+
self.event_type = event_type
|
|
45
|
+
|
|
46
|
+
self.asset_id = event.asset_id
|
|
47
|
+
|
|
48
|
+
self.state = self.load_state()
|
|
49
|
+
|
|
50
|
+
max_lookback_seconds = self.get_max_lookback_seconds()
|
|
51
|
+
event = self.load_event(event, max_lookback_seconds)
|
|
52
|
+
|
|
53
|
+
valid_stream_collections = self.get_valid_stream_collections()
|
|
54
|
+
self.event = self.filter_event_for_collections(
|
|
55
|
+
self.event_type, valid_stream_collections, event
|
|
56
|
+
) # event records
|
|
57
|
+
|
|
58
|
+
self.log_event(self.event, max_lookback_seconds) # event records
|
|
59
|
+
|
|
60
|
+
def log_event(self, event: List[dict], max_lookback_seconds: int):
|
|
61
|
+
self.debug(self.asset_id, "WITS input to {0} -> {1}".format(self.app_name, event))
|
|
62
|
+
|
|
63
|
+
if not event:
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
batch_size = len(event)
|
|
67
|
+
start_time = event[0].get("timestamp")
|
|
68
|
+
end_time = event[-1].get("timestamp")
|
|
69
|
+
|
|
70
|
+
self.log(
|
|
71
|
+
self.asset_id,
|
|
72
|
+
text=(
|
|
73
|
+
f"Received {batch_size} elements from {start_time} to {end_time}. "
|
|
74
|
+
f"{max_lookback_seconds} seconds of initial data are lookback."
|
|
75
|
+
),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def get_valid_stream_collections():
|
|
80
|
+
valid_collections = constants.get("global.valid-stream-collections", [])
|
|
81
|
+
if not isinstance(valid_collections, (str, list)):
|
|
82
|
+
raise TypeError("Incorrect type of valid-stream-collections in global constants")
|
|
83
|
+
|
|
84
|
+
if isinstance(valid_collections, str):
|
|
85
|
+
valid_collections = [valid_collections]
|
|
86
|
+
return valid_collections
|
|
87
|
+
|
|
88
|
+
def get_max_lookback_seconds(self):
|
|
89
|
+
"""
|
|
90
|
+
For each module (mostly in time-base modules), the time of processing does not
|
|
91
|
+
match the last time of the event so extra data is required to look back and get
|
|
92
|
+
the data so the processing can start from where it left off.
|
|
93
|
+
:return:
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
time_modules = [module for module in self.get_modules() if issubclass(module, TimeActivityModule)]
|
|
97
|
+
maximum_lookback = 0
|
|
98
|
+
for module in time_modules:
|
|
99
|
+
module_lookback = constants.get(
|
|
100
|
+
"{}.{}.lookback-duration".format(self.app_key, module.module_key), default=0
|
|
101
|
+
)
|
|
102
|
+
maximum_lookback = max(module_lookback, maximum_lookback)
|
|
103
|
+
|
|
104
|
+
return maximum_lookback
|
|
105
|
+
|
|
106
|
+
def load_event(self, event: Event, max_lookback_seconds: int) -> List[dict]:
|
|
107
|
+
if self.event_type == EventType.SCHEDULER:
|
|
108
|
+
return self.load_scheduler_event(self.asset_id, event, max_lookback_seconds)
|
|
109
|
+
|
|
110
|
+
if self.event_type == EventType.STREAM:
|
|
111
|
+
return self.load_wits_stream_event(self.asset_id, event, max_lookback_seconds)
|
|
112
|
+
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def filter_event_for_collections(event_type: EventType, valid_stream_collections: list, event: list) -> list:
|
|
117
|
+
"""
|
|
118
|
+
This function filters the incoming event based on a list of valid collections
|
|
119
|
+
|
|
120
|
+
:param event_type: Type of incoming event.
|
|
121
|
+
:param valid_stream_collections: List of valid collections
|
|
122
|
+
:param event: List of data records
|
|
123
|
+
:return: List of records whose collection is one of the allowed event collections
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
# If event type is scheduler or event_collections is empty, return the entire event
|
|
127
|
+
if event_type == EventType.SCHEDULER or not valid_stream_collections:
|
|
128
|
+
return event
|
|
129
|
+
|
|
130
|
+
# Filtering the event based on valid collections
|
|
131
|
+
event = [record for record in event if record.get("collection") in valid_stream_collections]
|
|
132
|
+
return event
|
|
133
|
+
|
|
134
|
+
def filter_event_for_duplicates(self, event) -> list:
|
|
135
|
+
last_processed_timestamp = self.state.get("last_processed_timestamp") or 0
|
|
136
|
+
|
|
137
|
+
# If event is a single record and greater than the last processed timestamp, return the event
|
|
138
|
+
if len(event) == 1 and event[0].get("timestamp") > last_processed_timestamp:
|
|
139
|
+
return event
|
|
140
|
+
|
|
141
|
+
# If length of unique timestamps is same as length of event and the first record timestamp is also greater than
|
|
142
|
+
# last_processed_timestamp, then return the event
|
|
143
|
+
unique_timestamps = set([record.get("timestamp") for record in event])
|
|
144
|
+
if len(unique_timestamps) == len(event) and event[0].get("timestamp") > last_processed_timestamp:
|
|
145
|
+
return event
|
|
146
|
+
|
|
147
|
+
# Filtering the events for duplicates, once we identify that duplicates exist.
|
|
148
|
+
filtered_events = []
|
|
149
|
+
for each_record in event:
|
|
150
|
+
if each_record.get("timestamp") > last_processed_timestamp:
|
|
151
|
+
filtered_events.append(each_record)
|
|
152
|
+
last_processed_timestamp = each_record.get("timestamp")
|
|
153
|
+
|
|
154
|
+
return filtered_events
|
|
155
|
+
|
|
156
|
+
def load_scheduler_event(self, asset_id: int, event: ScheduledEvent, max_lookback_seconds: int) -> List[dict]:
|
|
157
|
+
"""
|
|
158
|
+
To load a scheduler event and get the wits stream data
|
|
159
|
+
:param asset_id: The asset to load
|
|
160
|
+
:param event: A cleaned event
|
|
161
|
+
:param max_lookback_seconds: Maximum amount of time to look back prior to the scheduler event to cover gaps
|
|
162
|
+
:return: list of WITS data between the last processed timestamp and the final event item timestamp
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
start_timestamp = self.state.get("last_processed_timestamp", event[0].start_time - 1)
|
|
166
|
+
end_timestamp = event[-1].start_time
|
|
167
|
+
|
|
168
|
+
# the event is converted from scheduler to wits stream
|
|
169
|
+
scheduler_event = gather_data_for_period(
|
|
170
|
+
asset_id=asset_id,
|
|
171
|
+
start=start_timestamp - max_lookback_seconds,
|
|
172
|
+
end=end_timestamp,
|
|
173
|
+
limit=constants.get("global.query-limit"),
|
|
174
|
+
fields=constants.get("global.wits_query_fields", None),
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
return scheduler_event
|
|
178
|
+
|
|
179
|
+
def load_wits_stream_event(self, asset_id: int, event: StreamEvent, max_lookback_seconds: int) -> List[dict]:
|
|
180
|
+
"""
|
|
181
|
+
To load a wits stream event and get more data if necessary
|
|
182
|
+
:param asset_id: The asset to load
|
|
183
|
+
:param event: A cleaned event
|
|
184
|
+
:param max_lookback_seconds: Maximum amount of time to look back prior to WITS data to cover gaps
|
|
185
|
+
:return: list of WITS data between the first event timestamp and the first timestamp
|
|
186
|
+
"""
|
|
187
|
+
records = event.records
|
|
188
|
+
|
|
189
|
+
# First filtering original event for duplicates
|
|
190
|
+
records = self.filter_event_for_duplicates(records)
|
|
191
|
+
|
|
192
|
+
# If all record timestamps are before the last processed timestamp,
|
|
193
|
+
# the first timestamp will be the last processed timestamp from cache
|
|
194
|
+
if not records:
|
|
195
|
+
first_timestamp = self.state.get("last_processed_timestamp") or 0
|
|
196
|
+
else:
|
|
197
|
+
first_timestamp = records[0].get("timestamp")
|
|
198
|
+
|
|
199
|
+
if not first_timestamp:
|
|
200
|
+
return records
|
|
201
|
+
|
|
202
|
+
if max_lookback_seconds:
|
|
203
|
+
# Subtract one from the timestamp so that we don't reselect the final data item that was sent in the event
|
|
204
|
+
end_timestamp = first_timestamp - 1
|
|
205
|
+
|
|
206
|
+
previous_events = gather_data_for_period(
|
|
207
|
+
asset_id=asset_id,
|
|
208
|
+
start=first_timestamp - max_lookback_seconds,
|
|
209
|
+
end=end_timestamp,
|
|
210
|
+
limit=constants.get("global.query-limit"),
|
|
211
|
+
fields=constants.get("global.wits_query_fields", None),
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
records = previous_events + records
|
|
215
|
+
|
|
216
|
+
return records
|
|
217
|
+
|
|
218
|
+
@staticmethod
|
|
219
|
+
def determine_asset_id(event: list) -> int:
|
|
220
|
+
try:
|
|
221
|
+
return int(event[0]["asset_id"])
|
|
222
|
+
except Exception:
|
|
223
|
+
raise Exception(f"Event does not contain asset_id: {event}")
|
|
224
|
+
|
|
225
|
+
def load_state(self, state_key: Optional[str] = None, raise_warnings: bool = False) -> dict:
|
|
226
|
+
previous_state = super().load_state(state_key=state_key, raise_warnings=raise_warnings)
|
|
227
|
+
|
|
228
|
+
state = State(self.app_state_fields, previous_state)
|
|
229
|
+
|
|
230
|
+
if not state.get("asset_id", None):
|
|
231
|
+
state["asset_id"] = self.asset_id
|
|
232
|
+
|
|
233
|
+
return state
|
|
234
|
+
|
|
235
|
+
def get_modules(self) -> List[Module]:
|
|
236
|
+
raise NotImplementedError("No modules found")
|
|
237
|
+
|
|
238
|
+
def get_active_modules(self) -> List[Module]:
|
|
239
|
+
return [module for module in self.get_modules() if module.enabled]
|
|
240
|
+
|
|
241
|
+
def run_modules(self):
|
|
242
|
+
if not self.event: # event records
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
for module_type in self.get_active_modules():
|
|
246
|
+
try:
|
|
247
|
+
module = module_type(self.state, rollbar=self.rollbar)
|
|
248
|
+
except Exception:
|
|
249
|
+
raise exceptions.Misconfigured(
|
|
250
|
+
"Module {0} not able to initialize for asset_id {1}".format(module_type, self.asset_id)
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
module.run(self.event) # event records
|
|
255
|
+
except Exception as ex:
|
|
256
|
+
message = f"Error in module {module_type.module_key}"
|
|
257
|
+
ex.args += (message,) # Adding message to existing Exception
|
|
258
|
+
raise
|
|
259
|
+
|
|
260
|
+
last_processed_timestamp = self.event[-1].get("timestamp") # event records
|
|
261
|
+
self.state["last_processed_timestamp"] = last_processed_timestamp
|
|
262
|
+
|
|
263
|
+
@classmethod
|
|
264
|
+
def update_cache(cls, merger: "PartialRerunMerge") -> None:
|
|
265
|
+
"""
|
|
266
|
+
This function updates the cache of the original asset with the rerun asset's cache
|
|
267
|
+
|
|
268
|
+
If in any app that is extending this class, it needs to be overridden, it can be done so.
|
|
269
|
+
|
|
270
|
+
:param merger: PartialRerunMerge object
|
|
271
|
+
:return: None
|
|
272
|
+
"""
|
|
273
|
+
if merger.rerun_mode == RerunMode.HISTORICAL:
|
|
274
|
+
return
|
|
275
|
+
|
|
276
|
+
original_asset_id = merger.original_asset_id
|
|
277
|
+
rerun_asset_id = merger.rerun_asset_id
|
|
278
|
+
|
|
279
|
+
redis_handler = RedisMixin()
|
|
280
|
+
redis_handler.asset_id = original_asset_id
|
|
281
|
+
|
|
282
|
+
original_state_key = redis_handler.get_formatted_state_key(original_asset_id, cls.app_key)
|
|
283
|
+
rerun_state_key = redis_handler.get_formatted_state_key(rerun_asset_id, cls.app_key)
|
|
284
|
+
|
|
285
|
+
original_state = redis_handler.load_state(original_state_key)
|
|
286
|
+
rerun_state = redis_handler.load_state(rerun_state_key)
|
|
287
|
+
|
|
288
|
+
original_state = RerunMergeCacheUpdater.default_updater(original_state, rerun_state)
|
|
289
|
+
|
|
290
|
+
redis_handler.state = original_state
|
|
291
|
+
redis_handler.save_state(original_state_key)
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
import simplejson as json
|
|
4
|
+
|
|
5
|
+
from worker import constants
|
|
6
|
+
from worker.data.api import API
|
|
7
|
+
from worker.data.enums import RerunMode
|
|
8
|
+
from worker.data.json_encoder import JsonEncoder
|
|
9
|
+
from worker.mixins.logging import LoggingMixin
|
|
10
|
+
from worker.mixins.rollbar import RollbarMixin
|
|
11
|
+
from worker.partial_rerun_merge.models import RerunMergeCacheUpdater
|
|
12
|
+
from worker.state.mixins import RedisMixin
|
|
13
|
+
from worker.state.state import State
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Module(RedisMixin, LoggingMixin, RollbarMixin):
|
|
17
|
+
"""
|
|
18
|
+
This is an abstract base module that needs to be extended by an actual module.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# module_key is used for redis access and state of this module
|
|
22
|
+
app_key = constants.get("global.app-key")
|
|
23
|
+
app_name = constants.get("global.app-name")
|
|
24
|
+
module_key = "module"
|
|
25
|
+
collection = "collection"
|
|
26
|
+
module_state_fields = {"last_processed_timestamp": int}
|
|
27
|
+
|
|
28
|
+
enabled = True
|
|
29
|
+
|
|
30
|
+
def __init__(self, global_state, *args, **kwargs):
|
|
31
|
+
self.asset_id = global_state.get("asset_id")
|
|
32
|
+
|
|
33
|
+
self.global_state = global_state
|
|
34
|
+
|
|
35
|
+
super().__init__(*args, **kwargs)
|
|
36
|
+
|
|
37
|
+
def run(self, wits_stream: list):
|
|
38
|
+
"""
|
|
39
|
+
:param wits_stream: a wits stream event
|
|
40
|
+
:return:
|
|
41
|
+
"""
|
|
42
|
+
# load the state
|
|
43
|
+
state = self.get_state()
|
|
44
|
+
self.state = self.process_module_state(state)
|
|
45
|
+
|
|
46
|
+
# subclasses should implement their own run
|
|
47
|
+
|
|
48
|
+
def should_run_processor(self, event):
|
|
49
|
+
raise Exception("This method need to be implemented by subclasses!")
|
|
50
|
+
|
|
51
|
+
def get_state(self, state_key: [str, None] = None, raise_warnings: bool = False) -> dict:
|
|
52
|
+
current_state = super().load_state(state_key=state_key, raise_warnings=raise_warnings)
|
|
53
|
+
return State(self.module_state_fields, current_state)
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def process_module_state(state):
|
|
57
|
+
return state
|
|
58
|
+
|
|
59
|
+
def set_state(self, key, value):
|
|
60
|
+
self.state[key] = value
|
|
61
|
+
|
|
62
|
+
def load_dataset(self, event):
|
|
63
|
+
return event
|
|
64
|
+
|
|
65
|
+
def run_module(self, dataset: Union[list, dict], beginning_results: list) -> list:
|
|
66
|
+
raise Exception("Not implemented")
|
|
67
|
+
|
|
68
|
+
def get_last_exported_timestamp_from_collection(self, asset_id, query=None, less_than=None):
|
|
69
|
+
"""
|
|
70
|
+
Query the module collection for this asset_id + module, sorted by timestamp descending,
|
|
71
|
+
limit 1, grab the last item's timestamp. Default to 0 if no records found.
|
|
72
|
+
@asset_id:
|
|
73
|
+
@less_than: the timestamp before which you want to get
|
|
74
|
+
"""
|
|
75
|
+
if less_than:
|
|
76
|
+
query = query or ""
|
|
77
|
+
query += "AND{timestamp#lt#%s}" % less_than
|
|
78
|
+
|
|
79
|
+
worker = API()
|
|
80
|
+
last_document = worker.get(
|
|
81
|
+
path="/v1/data/corva",
|
|
82
|
+
query=query,
|
|
83
|
+
collection=self.collection,
|
|
84
|
+
asset_id=asset_id,
|
|
85
|
+
sort="{timestamp: -1}",
|
|
86
|
+
limit=1,
|
|
87
|
+
).data
|
|
88
|
+
|
|
89
|
+
if not last_document:
|
|
90
|
+
return 0
|
|
91
|
+
|
|
92
|
+
last_document = last_document[0]
|
|
93
|
+
last_processed_timestamp = last_document.get("timestamp", 0)
|
|
94
|
+
|
|
95
|
+
return last_processed_timestamp
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def gather_first_wits_timestamp_since(asset_id: int, since: int, activity_fields=None, operator="eq") -> int:
|
|
99
|
+
"""
|
|
100
|
+
Query the Wits collection for this asset_id where state in wits_states and timestamp >= since
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
query = "{timestamp#%s#%s}" % ("gt", since)
|
|
104
|
+
|
|
105
|
+
operator = operator.lower()
|
|
106
|
+
|
|
107
|
+
if activity_fields:
|
|
108
|
+
if operator == "eq" and isinstance(activity_fields, list):
|
|
109
|
+
operator = "in"
|
|
110
|
+
|
|
111
|
+
if operator in ("in", "nin"):
|
|
112
|
+
if not isinstance(activity_fields, list):
|
|
113
|
+
activity_fields = [activity_fields]
|
|
114
|
+
|
|
115
|
+
# Put each state into a formatted string for querying
|
|
116
|
+
activity_fields = ["'{0}'".format(state) for state in activity_fields]
|
|
117
|
+
activity_fields = "[{0}]".format(",".join(activity_fields))
|
|
118
|
+
else:
|
|
119
|
+
activity_fields = "'{0}'".format(activity_fields)
|
|
120
|
+
|
|
121
|
+
query += "AND{data.state#%s#%s}" % (operator, activity_fields)
|
|
122
|
+
|
|
123
|
+
worker = API()
|
|
124
|
+
first_wits_since = worker.get(
|
|
125
|
+
path="/v1/data/corva", collection="wits", asset_id=asset_id, sort="{timestamp: 1}", limit=1, query=query
|
|
126
|
+
).data
|
|
127
|
+
|
|
128
|
+
if not first_wits_since:
|
|
129
|
+
return 0
|
|
130
|
+
|
|
131
|
+
first_wits_since = first_wits_since[0]
|
|
132
|
+
first_wits_since_timestamp = first_wits_since.get("timestamp", 0)
|
|
133
|
+
|
|
134
|
+
return first_wits_since_timestamp
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def gather_maximum_timestamp(event, start, activity_fields):
|
|
138
|
+
"""
|
|
139
|
+
get the maximum time stamp of a stream of data
|
|
140
|
+
:param event: a stream of data that the majority is wits collection
|
|
141
|
+
:param start:
|
|
142
|
+
:param activity_fields:
|
|
143
|
+
:return:
|
|
144
|
+
"""
|
|
145
|
+
maximum_timestamp = start
|
|
146
|
+
for data in event:
|
|
147
|
+
if data.get("collection") == "wits" and data.get("data", {}).get("state", None) in activity_fields:
|
|
148
|
+
maximum_timestamp = max(data.get("timestamp", 0), maximum_timestamp)
|
|
149
|
+
|
|
150
|
+
return maximum_timestamp
|
|
151
|
+
|
|
152
|
+
def gather_minimum_timestamp(self, asset_id: int, event: list):
|
|
153
|
+
minimum = self.get_last_exported_timestamp_from_collection(asset_id)
|
|
154
|
+
|
|
155
|
+
if not minimum:
|
|
156
|
+
minimum = event[0]["timestamp"] - 1800
|
|
157
|
+
|
|
158
|
+
return minimum
|
|
159
|
+
|
|
160
|
+
def gather_collections_for_period(self, asset_id, start, end, query=None):
|
|
161
|
+
limit = constants.get("global.query-limit")
|
|
162
|
+
|
|
163
|
+
query = query or ""
|
|
164
|
+
if query:
|
|
165
|
+
query += "AND"
|
|
166
|
+
|
|
167
|
+
query += "{timestamp#gte#%s}AND{timestamp#lte#%s}" % (start, end)
|
|
168
|
+
|
|
169
|
+
worker = API()
|
|
170
|
+
dataset = worker.get(
|
|
171
|
+
path="/v1/data/corva",
|
|
172
|
+
collection=self.collection,
|
|
173
|
+
asset_id=asset_id,
|
|
174
|
+
query=query,
|
|
175
|
+
sort="{timestamp: 1}",
|
|
176
|
+
limit=limit,
|
|
177
|
+
).data
|
|
178
|
+
|
|
179
|
+
if not dataset:
|
|
180
|
+
return []
|
|
181
|
+
|
|
182
|
+
return dataset
|
|
183
|
+
|
|
184
|
+
def store_output(self, asset_id, output):
|
|
185
|
+
"""
|
|
186
|
+
to store/post results
|
|
187
|
+
:param asset_id: asset id of the well
|
|
188
|
+
:param output: an array of json objects to be posted
|
|
189
|
+
:return: None
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
if not asset_id or not output or not self.collection:
|
|
193
|
+
return
|
|
194
|
+
|
|
195
|
+
output = self.format_output(output)
|
|
196
|
+
|
|
197
|
+
self.debug(asset_id, "{0} output -> {1}".format(self.module_key, output))
|
|
198
|
+
|
|
199
|
+
worker = API()
|
|
200
|
+
worker.post(path="/v1/data/corva", data=output)
|
|
201
|
+
|
|
202
|
+
def build_empty_output(self, wits: dict) -> dict:
|
|
203
|
+
"""
|
|
204
|
+
Building an empty output result.
|
|
205
|
+
:param wits: one wits record
|
|
206
|
+
:return:
|
|
207
|
+
"""
|
|
208
|
+
output = {
|
|
209
|
+
"timestamp": int(wits.get("timestamp")),
|
|
210
|
+
"company_id": int(wits.get("company_id")),
|
|
211
|
+
"asset_id": int(wits.get("asset_id")),
|
|
212
|
+
"provider": str(wits.get("provider", "corva")),
|
|
213
|
+
"version": 1,
|
|
214
|
+
"collection": self.collection,
|
|
215
|
+
"data": {},
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
enable_output_type_field = constants.get(
|
|
219
|
+
"{}.{}.enable-output-type-field".format(self.app_key, self.module_key), False
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if not enable_output_type_field:
|
|
223
|
+
return output
|
|
224
|
+
|
|
225
|
+
# If a specific module enforces type field in the output collection,
|
|
226
|
+
# build_empty_output adds an additional type field to the output.
|
|
227
|
+
# This can be enforced by setting "enable-output-type-field" to True in the module app constants.
|
|
228
|
+
output.update({"type": self.module_key})
|
|
229
|
+
|
|
230
|
+
return output
|
|
231
|
+
|
|
232
|
+
@staticmethod
|
|
233
|
+
def format_output(output):
|
|
234
|
+
output = json.dumps(output, cls=JsonEncoder, ignore_nan=True)
|
|
235
|
+
return output
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def update_cache(cls, merger: "PartialRerunMerge") -> None:
|
|
239
|
+
"""
|
|
240
|
+
This function updates the cache of the original asset with the rerun asset's cache
|
|
241
|
+
|
|
242
|
+
If in any app that is extending this class, it needs to be overridden, it can be done so.
|
|
243
|
+
|
|
244
|
+
:param merger: PartialRerunMerge object
|
|
245
|
+
:return: None
|
|
246
|
+
"""
|
|
247
|
+
if merger.rerun_mode == RerunMode.HISTORICAL:
|
|
248
|
+
return
|
|
249
|
+
|
|
250
|
+
original_asset_id = merger.original_asset_id
|
|
251
|
+
rerun_asset_id = merger.rerun_asset_id
|
|
252
|
+
|
|
253
|
+
redis_handler = RedisMixin()
|
|
254
|
+
redis_handler.asset_id = original_asset_id
|
|
255
|
+
|
|
256
|
+
original_state_key = redis_handler.get_formatted_state_key(original_asset_id, cls.app_key, cls.module_key)
|
|
257
|
+
rerun_state_key = redis_handler.get_formatted_state_key(rerun_asset_id, cls.app_key, cls.module_key)
|
|
258
|
+
|
|
259
|
+
original_state = redis_handler.load_state(original_state_key)
|
|
260
|
+
rerun_state = redis_handler.load_state(rerun_state_key)
|
|
261
|
+
|
|
262
|
+
original_state = RerunMergeCacheUpdater.default_updater(original_state, rerun_state)
|
|
263
|
+
|
|
264
|
+
redis_handler.state = original_state
|
|
265
|
+
redis_handler.save_state(original_state_key)
|