corva-worker-python 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. corva_worker_python-2.0.0.dist-info/METADATA +30 -0
  2. corva_worker_python-2.0.0.dist-info/RECORD +63 -0
  3. corva_worker_python-2.0.0.dist-info/WHEEL +5 -0
  4. corva_worker_python-2.0.0.dist-info/top_level.txt +1 -0
  5. worker/__init__.py +5 -0
  6. worker/app/__init__.py +291 -0
  7. worker/app/modules/__init__.py +265 -0
  8. worker/app/modules/activity_module.py +141 -0
  9. worker/app/modules/connection_module.py +21 -0
  10. worker/app/modules/depth_activity_module.py +21 -0
  11. worker/app/modules/scheduler.py +44 -0
  12. worker/app/modules/time_activity_module.py +21 -0
  13. worker/app/modules/trigger.py +43 -0
  14. worker/constants.py +51 -0
  15. worker/data/__init__.py +0 -0
  16. worker/data/activity/__init__.py +132 -0
  17. worker/data/activity/activity_grouping.py +242 -0
  18. worker/data/alert.py +89 -0
  19. worker/data/api.py +155 -0
  20. worker/data/enums.py +141 -0
  21. worker/data/json_encoder.py +18 -0
  22. worker/data/math.py +104 -0
  23. worker/data/operations.py +477 -0
  24. worker/data/serialization.py +110 -0
  25. worker/data/task_handler.py +82 -0
  26. worker/data/two_way_dict.py +17 -0
  27. worker/data/unit_conversions.py +5 -0
  28. worker/data/wits.py +323 -0
  29. worker/event/__init__.py +53 -0
  30. worker/event/event_handler.py +90 -0
  31. worker/event/scheduled.py +64 -0
  32. worker/event/stream.py +48 -0
  33. worker/exceptions.py +26 -0
  34. worker/mixins/__init__.py +0 -0
  35. worker/mixins/logging.py +119 -0
  36. worker/mixins/rollbar.py +87 -0
  37. worker/partial_rerun_merge/__init__.py +0 -0
  38. worker/partial_rerun_merge/merge.py +500 -0
  39. worker/partial_rerun_merge/models.py +91 -0
  40. worker/partial_rerun_merge/progress.py +241 -0
  41. worker/state/__init__.py +96 -0
  42. worker/state/mixins.py +111 -0
  43. worker/state/state.py +46 -0
  44. worker/test/__init__.py +3 -0
  45. worker/test/lambda_function_test_run.py +196 -0
  46. worker/test/local_testing/__init__.py +0 -0
  47. worker/test/local_testing/to_local_transfer.py +360 -0
  48. worker/test/utils.py +51 -0
  49. worker/wellbore/__init__.py +0 -0
  50. worker/wellbore/factory.py +496 -0
  51. worker/wellbore/measured_depth_finder.py +12 -0
  52. worker/wellbore/model/__init__.py +0 -0
  53. worker/wellbore/model/ann.py +103 -0
  54. worker/wellbore/model/annulus.py +113 -0
  55. worker/wellbore/model/drillstring.py +196 -0
  56. worker/wellbore/model/drillstring_components.py +439 -0
  57. worker/wellbore/model/element.py +102 -0
  58. worker/wellbore/model/enums.py +92 -0
  59. worker/wellbore/model/hole.py +297 -0
  60. worker/wellbore/model/hole_section.py +51 -0
  61. worker/wellbore/model/riser.py +22 -0
  62. worker/wellbore/sections_mixin.py +64 -0
  63. worker/wellbore/wellbore.py +289 -0
@@ -0,0 +1,241 @@
1
+ """
2
+ This module defines the progress of the partial reruns.
3
+ """
4
+
5
+ from dataclasses import asdict, dataclass
6
+ from typing import List, Optional
7
+
8
+ import requests
9
+ import simplejson as json
10
+
11
+ from worker.data.api import API
12
+ from worker.data.enums import PartialRerunStatus
13
+
14
+
15
+ @dataclass
16
+ class DatasetProgress:
17
+ """
18
+ Represents the progress of a dataset.
19
+
20
+ Attributes:
21
+ dataset_id (int): The ID of the dataset.
22
+ processed_timestamp (int): The timestamp when the dataset was processed.
23
+ completed (bool): Whether the dataset processing is completed or not.
24
+ dataset_name (str): The name of the dataset.
25
+ """
26
+
27
+ dataset_id: int
28
+ processed_timestamp: int
29
+ completed: bool
30
+ dataset_name: str
31
+
32
+ def is_completed(self) -> bool:
33
+ """
34
+ Returns a boolean indicating whether the progress has been completed.
35
+ """
36
+ return self.completed
37
+
38
+ def is_started(self) -> bool:
39
+ """
40
+ Returns a boolean indicating whether the progress has been started.
41
+ """
42
+ return self.processed_timestamp > 0
43
+
44
+ def mark_completed_at(self, processed_timestamp: int) -> None:
45
+ """
46
+ Marks the progress as completed and sets the processed timestamp.
47
+
48
+ Args:
49
+ processed_timestamp (int): The timestamp when the progress was processed.
50
+ """
51
+ self.completed = True
52
+ self.processed_timestamp = processed_timestamp
53
+
54
+ @classmethod
55
+ def set_from_dict(cls, json_object: dict) -> "DatasetProgress":
56
+ """Set the object from a dict.
57
+
58
+ Args:
59
+ json_object (dict): the input json
60
+
61
+ Returns:
62
+ _type_: DatasetProgress
63
+ """
64
+ return cls(**json_object)
65
+
66
+ def to_dict(self) -> dict:
67
+ """
68
+ Returns a dictionary representation of the Progress object.
69
+
70
+ Returns:
71
+ dict: A dictionary containing the dataset_id, processed_timestamp,
72
+ completed, and dataset_name attributes.
73
+ """
74
+ return asdict(self)
75
+
76
+ def get_trimmed_dataset_name(self) -> str:
77
+ """
78
+ Returns the trimmed dataset name by splitting the dataset name
79
+ with "#" delimiter and returning the last part.
80
+
81
+ Example: "corva#circulation.volumetric" -> "circulation.volumetric"
82
+
83
+ Returns:
84
+ str: The trimmed dataset name.
85
+ """
86
+ return self.dataset_name.split("#")[-1]
87
+
88
+
89
+ class MergingProgress:
90
+ """
91
+ Represents the progress of merging a partial rerun for a specific app.
92
+
93
+ Attributes:
94
+ partial_well_rerun_id (int): The ID of the partial rerun.
95
+ app_id (int): The ID of the app.
96
+ status (PartialRerunStatus): The status of the partial rerun.
97
+ dataset_progresses (List[DatasetProgress]): The progress of each dataset
98
+ in the partial rerun.
99
+ api: (Worker) The API object used to make requests to the Corva API.
100
+ is_cache_update_completed (bool): Whether the cache update is completed.
101
+ """
102
+
103
+ def __init__(self, partial_well_rerun_id: int, app_id: int, api: API):
104
+ """
105
+ Initializes a new instance of the Progress class.
106
+
107
+ Args:
108
+ partial_rerun_id (int): The ID of the partial rerun.
109
+ app_id (int): The ID of the application.
110
+ api: The API object.
111
+
112
+ Returns:
113
+ None
114
+ """
115
+ self.partial_well_rerun_id = partial_well_rerun_id
116
+ self.app_id = app_id
117
+ self.status = PartialRerunStatus.MERGING
118
+ self.dataset_progresses: List[DatasetProgress] = []
119
+
120
+ self.api = api
121
+
122
+ # getting and parsing the progress
123
+ request = self.api.get(self.get_url_path()).data
124
+ self.parse_progress(request)
125
+
126
+ self.is_cache_update_completed = self.determine_if_cache_update_completed()
127
+
128
+ # in case of failure, this will be set to the reason of failure
129
+ self.fail_reason: Optional[str] = None
130
+
131
+ def parse_progress(self, request: dict) -> None:
132
+ """
133
+ Parses the progress information retrieved from the API and populates the
134
+ 'dataset_progresses' list.
135
+
136
+ Args:
137
+ request (dict): The progress information retrieved from the API.
138
+
139
+ Returns:
140
+ None
141
+ """
142
+ progresses = request.get("included") or []
143
+
144
+ for progress in progresses:
145
+ if progress.get("type") != "partial_well_rerun_dataset_progress":
146
+ continue
147
+
148
+ attributes = progress.get("attributes") or {}
149
+
150
+ dataset_progress = DatasetProgress.set_from_dict(attributes)
151
+ self.dataset_progresses.append(dataset_progress)
152
+
153
+ def complete_status(self) -> None:
154
+ """
155
+ Marks the partial rerun merge as completed.
156
+ """
157
+ self.status = PartialRerunStatus.COMPLETED
158
+
159
+ def fail_status(self, reason: str) -> None:
160
+ """
161
+ Sets the status of the partial rerun to FAILED and provides a reason for the failure.
162
+
163
+ Args:
164
+ reason (str): The reason for failure.
165
+ """
166
+ self.status = PartialRerunStatus.FAILED
167
+ self.fail_reason = reason
168
+
169
+ def get_dataset_progress(self, dataset_name: str) -> Optional[DatasetProgress]:
170
+ """
171
+ Gets the progress of the dataset with the given name.
172
+
173
+ Args:
174
+ dataset_name (str): The name of the dataset.
175
+
176
+ Returns:
177
+ DatasetProgress: The progress of the dataset.
178
+ """
179
+ return next((dp for dp in self.dataset_progresses if dp.get_trimmed_dataset_name() == dataset_name), None)
180
+
181
+ def is_collection_completed(self, collection_name: str) -> bool:
182
+ """
183
+ Checks if a collection with the given name has been completed.
184
+
185
+ Args:
186
+ collection_name (str): The name of the collection to check.
187
+
188
+ Returns:
189
+ bool: True if the collection has been completed, False otherwise.
190
+ """
191
+ for dataset_progress in self.dataset_progresses:
192
+ if dataset_progress.get_trimmed_dataset_name() == collection_name:
193
+ return dataset_progress.completed
194
+
195
+ return False
196
+
197
+ def determine_if_cache_update_completed(self) -> bool:
198
+ """
199
+ Checks if the cache update is completed.
200
+ Since there is no collection named "cache", it checks if ANY collection is completed,
201
+ and since cache update needs to be done once, therefore we need to know when the merge
202
+ will continue in the next iteration, we need to check if the cache update is already completed.
203
+
204
+ Returns:
205
+ bool: True if the cache update has completed for all datasets, False otherwise.
206
+ """
207
+ return any(dataset_progress.is_started() for dataset_progress in self.dataset_progresses)
208
+
209
+ def get_url_path(self) -> str:
210
+ """
211
+ Returns the URL path for the API request.
212
+
213
+ Returns:
214
+ str: URL Path for the API request.
215
+ """
216
+ return f"/v2/partial_reruns/{self.partial_well_rerun_id}/app_progress/{self.app_id}"
217
+
218
+ def to_dict(self) -> dict:
219
+ """
220
+ Converts the merging progress to a dict.
221
+
222
+ Returns:
223
+ dict: progress as a dict.
224
+ """
225
+ dataset_progresses = [dataset_progress.to_dict() for dataset_progress in self.dataset_progresses]
226
+
227
+ output = dict(app_progress=dict(status=self.status.value, dataset_progresses=dataset_progresses))
228
+
229
+ if self.fail_reason:
230
+ output["app_progress"]["fail_reason"] = self.fail_reason
231
+
232
+ return output
233
+
234
+ def update_status(self) -> requests.Response:
235
+ """
236
+ Sends the updated merging progress to the API.
237
+ """
238
+ url_path = self.get_url_path()
239
+ body = json.dumps(self.to_dict())
240
+ response = self.api.patch(url_path, data=body)
241
+ return response.response
@@ -0,0 +1,96 @@
1
+ import os
2
+ from typing import List, Union
3
+
4
+ import redis
5
+ import simplejson as json
6
+
7
+ from worker.data.json_encoder import JsonEncoder
8
+ from worker.mixins.logging import Logger
9
+
10
+
11
+ class RedisHandler:
12
+ redis_ttl = os.getenv("REDIS_TTL", 90 * 24 * 3600) # 90 days
13
+ _redis_connection = None
14
+
15
+ @classmethod
16
+ def _get_redis_connection(cls) -> redis.client.Redis:
17
+ """
18
+ Setup redis and get connection if one does not exist already
19
+ :return:
20
+ """
21
+ # Use previous connection if cached
22
+ if cls._redis_connection:
23
+ Logger.debug("Using existing connection")
24
+ return cls._redis_connection
25
+
26
+ # If connection does not exist, create a new one
27
+ cls._redis_connection = cls._new_redis_connection()
28
+
29
+ return cls._redis_connection
30
+
31
+ @classmethod
32
+ def _new_redis_connection(cls) -> redis.client.Redis:
33
+ """
34
+ Creates a new Redis connection
35
+ :return:
36
+ """
37
+ cache_url = os.getenv("CACHE_URL", None)
38
+
39
+ if not cache_url:
40
+ raise Exception("redis key (CACHE_URL) not found in Environment Variables.")
41
+
42
+ _redis_connection = redis.Redis.from_url(cache_url)
43
+
44
+ if not _redis_connection:
45
+ raise Exception(f"Could not connect to Redis with URL: {cache_url}")
46
+
47
+ client_id = _redis_connection.client_id()
48
+ Logger.log(f"Created a new connection with {client_id=}")
49
+ return _redis_connection
50
+
51
+ @classmethod
52
+ def setup_existing_connection(cls, existing_connection: Union[redis.client.Redis, None]) -> redis.client.Redis:
53
+ """
54
+ Set the internal connection variable to existing connection provided in this function
55
+ The returned output should be saved to a global variable if it is intended to cache the
56
+ connection across lambda invokes
57
+ :param existing_connection:
58
+ :return:
59
+ """
60
+ cls._redis_connection = existing_connection
61
+ return cls._get_redis_connection()
62
+
63
+ @classmethod
64
+ def load_state(cls, state_key: str) -> dict:
65
+ """
66
+ Load state from redis
67
+ :return:
68
+ """
69
+ state = cls._get_redis_connection().get(state_key)
70
+ if state:
71
+ return json.loads(state)
72
+
73
+ return {}
74
+
75
+ @classmethod
76
+ def save_state(cls, state: dict, state_key: str, **kwargs):
77
+ """
78
+ Save the state to redis. Uses redis_ttl from kwargs if passed, fallbacks to env var then default 90 days
79
+ :param state:
80
+ :param state_key:
81
+ :return:
82
+ """
83
+ # Using redis ttl from kwargs if passed in.
84
+ redis_ttl = kwargs.pop("redis_ttl", cls.redis_ttl)
85
+ cls._get_redis_connection().set(
86
+ state_key, value=json.dumps(state, cls=JsonEncoder, ignore_nan=True), ex=redis_ttl
87
+ )
88
+
89
+ @classmethod
90
+ def delete_states(cls, state_keys: List[str]):
91
+ """
92
+ Delete the states corresponding to given keys
93
+ :param state_keys: a list of redis keys
94
+ :return:
95
+ """
96
+ cls._get_redis_connection().delete(*state_keys)
worker/state/mixins.py ADDED
@@ -0,0 +1,111 @@
1
+ import os
2
+ import sys
3
+ from functools import cached_property
4
+ from typing import List, Optional, Union
5
+
6
+ import simplejson as json
7
+
8
+ from worker.data.json_encoder import JsonEncoder
9
+ from worker.mixins.logging import LoggingMixin
10
+ from worker.mixins.rollbar import RollbarMixin
11
+ from worker.state import RedisHandler
12
+
13
+
14
+ class RedisMixin(LoggingMixin, RollbarMixin):
15
+ def __init__(self, *args, **kwargs):
16
+ super().__init__(*args, **kwargs)
17
+ self.state = {}
18
+
19
+ @cached_property
20
+ def _size_limits(self):
21
+ state_storage_limit_fatal = os.getenv("STATE_STORAGE_CRITICAL_LIMIT", 1_000.0) # 1 MB
22
+ state_storage_limit_warning = os.getenv("STATE_STORAGE_WARNING_LIMIT", 100.0) # 100 kB
23
+ size_limits = {"fatal": float(state_storage_limit_fatal), "warning": float(state_storage_limit_warning)}
24
+ return size_limits
25
+
26
+ def load_state(self, state_key: Union[str, None] = None, raise_warnings: bool = False) -> dict:
27
+ """
28
+ Load the state from redis
29
+ :param state_key:
30
+ :param raise_warnings: Set to True if size limits warnings should be printed
31
+ :return:
32
+ """
33
+ if not state_key:
34
+ state_key = self.get_formatted_state_key(self.asset_id, self.app_key, self.module_key)
35
+
36
+ state = RedisHandler.load_state(state_key)
37
+ size_object = self.check_state_size(state, state_key, raise_warnings=raise_warnings)
38
+
39
+ self.debug(self.asset_id, f"Retrieved state of size {size_object} kb")
40
+ self.state = state
41
+ return state
42
+
43
+ def save_state(self, state_key: Union[str, None] = None, raise_warnings: bool = True, **kwargs) -> None:
44
+ """
45
+ Save the state to redis
46
+ :param state_key:
47
+ :param raise_warnings: Set to True if size limits warnings should be printed
48
+ :return:
49
+ """
50
+ if not state_key:
51
+ state_key = self.get_formatted_state_key(self.asset_id, self.app_key, self.module_key)
52
+
53
+ size_object = self.check_state_size(self.state, state_key, raise_warnings=raise_warnings)
54
+ RedisHandler.save_state(self.state, state_key, **kwargs)
55
+ self.debug(self.asset_id, f"Saved state of size {size_object} kb")
56
+
57
+ def delete_states(self, state_keys: Union[List[str], str]):
58
+ """
59
+ Delete state for current module
60
+ :param state_keys:
61
+ :return:
62
+ """
63
+ if not isinstance(state_keys, list):
64
+ state_keys = [state_keys]
65
+
66
+ RedisHandler.delete_states(state_keys)
67
+ self.debug(None, "Deleted state from Redis")
68
+
69
+ @staticmethod
70
+ def get_formatted_state_key(asset_id: int, app_key: str, module_key: Optional[str] = None) -> str:
71
+ """
72
+ Returns the state key in Corva naming format
73
+ :param asset_id:
74
+ :param module_key:
75
+ :param app_key:
76
+ :return:
77
+ """
78
+ state_key = "corva/{0}.{1}".format(asset_id, app_key)
79
+
80
+ if module_key:
81
+ return "{0}.{1}".format(state_key, module_key)
82
+
83
+ return state_key
84
+
85
+ def check_state_size(self, state, state_key, raise_warnings=True):
86
+ """
87
+ Check the size of the state dictionary and generate warnings if necessary
88
+ :param state:
89
+ :param state_key:
90
+ :param raise_warnings:
91
+ :return:
92
+ """
93
+ size_object = sys.getsizeof(json.dumps(state, cls=JsonEncoder, ignore_nan=True)) / 1024
94
+
95
+ if not raise_warnings:
96
+ return size_object
97
+
98
+ size_limit = self._size_limits["fatal"]
99
+ if size_object > size_limit:
100
+ message = f"State_key {state_key} is of size {size_object} kb > {size_limit} kb."
101
+ self.fatal(self.asset_id, message)
102
+ self.track_message(message, level="critical")
103
+ return size_object
104
+
105
+ size_limit = self._size_limits["warning"]
106
+ if size_object > size_limit:
107
+ message = f"State_key {state_key} is of size {size_object} kb > {size_limit} kb."
108
+ self.warn(self.asset_id, message)
109
+ return size_object
110
+
111
+ return size_object
worker/state/state.py ADDED
@@ -0,0 +1,46 @@
1
+ import simplejson as json
2
+
3
+ from worker.data.json_encoder import JsonEncoder
4
+
5
+
6
+ class State(dict):
7
+ def __init__(self, fields: dict, state: dict = None, *args, **kwargs):
8
+ """
9
+ :param fields: is a dictionary that holds the state items. The value of each key is the data type.
10
+ :param state:
11
+ :param args:
12
+ :param kwargs:
13
+ """
14
+ self.fields = fields
15
+
16
+ super().__init__(*args, **kwargs)
17
+ if not state:
18
+ state = {}
19
+
20
+ for field, conversion in self.fields.items():
21
+ self.set_field_if_exists(field, state, conversion)
22
+
23
+ def set_field_if_exists(self, field, state, conversion):
24
+ if not field:
25
+ return
26
+
27
+ value = None
28
+
29
+ if field in state and state[field] is not None:
30
+ value = state[field]
31
+ if conversion:
32
+ value = conversion(value)
33
+
34
+ self[field] = value
35
+
36
+ def to_json(self):
37
+ output = {field: self.get(field, None) for field in self.fields.keys()}
38
+ output = json.dumps(output, cls=JsonEncoder, ignore_nan=True)
39
+
40
+ return output
41
+
42
+ def get(self, key, default=None):
43
+ value = super().get(key, None)
44
+ if value is None:
45
+ value = default
46
+ return value
@@ -0,0 +1,3 @@
1
+ # flake8: noqa: F401
2
+
3
+ from worker.test.lambda_function_test_run import AppTestRun
@@ -0,0 +1,196 @@
1
+ """
2
+ This file is used to trigger lambda functions locally and test
3
+ the results of the app on actual data to make sure of the results.
4
+ An example to use this functionality is like the one shows on
5
+ 'app_test.py' and then that file can run in command line.
6
+
7
+ Note: make sure you run your tests on QA environment.
8
+
9
+ == app_test.py file
10
+ # added the main directory as the first path
11
+ from os.path import dirname
12
+ import sys
13
+ parent = dirname(dirname(__file__))
14
+ sys.path.insert(0, parent)
15
+
16
+ if __name__ == '__main__':
17
+ collections = ['collection_to_delete']
18
+ app = AppTestRun(lambda_function.lambda_handler, collections)
19
+ app.run()
20
+
21
+ > python app_test -a 16886 -d True
22
+ """
23
+
24
+ import argparse
25
+ from typing import List
26
+
27
+ from dotenv import load_dotenv
28
+ from tqdm import tqdm
29
+
30
+ from worker import constants
31
+ from worker.data.operations import (
32
+ delete_collection_data_of_asset_id,
33
+ gather_data_for_period,
34
+ get_one_data_record,
35
+ point_main_envs,
36
+ )
37
+ from worker.state.mixins import RedisMixin
38
+ from worker.test.utils import create_scheduler_events, get_last_processed_timestamp
39
+
40
+ load_dotenv(override=True)
41
+
42
+
43
+ def strtobool(val):
44
+ """Convert a string representation of truth to true (1) or false (0).
45
+
46
+ True values are 'y', 'yes', 't', 'true', 'on', and '1'.
47
+ False values are 'n', 'no', 'f', 'false', 'off', and '0'.
48
+ Raises ValueError if 'val' is anything else.
49
+ """
50
+ val = val.lower()
51
+ if val in ("y", "yes", "t", "true", "on", "1"):
52
+ return 1
53
+ elif val in ("n", "no", "f", "false", "off", "0"):
54
+ return 0
55
+ else:
56
+ raise ValueError("invalid truth value %r" % (val,))
57
+
58
+
59
+ def generate_runner_parser():
60
+ """
61
+ Creating the supporting arguments
62
+ :return:
63
+ """
64
+ parser = argparse.ArgumentParser(description="Run your tests on an asset.")
65
+ parser.add_argument(
66
+ "-v",
67
+ "--environment",
68
+ "--env",
69
+ type=str,
70
+ required=False,
71
+ help="environment, options: 'local', 'qa', 'staging', 'production'",
72
+ )
73
+ parser.add_argument("-a", "--asset_id", "--id", type=int, required=True, help="set asset_id")
74
+ parser.add_argument(
75
+ "-s", "--start_timestamp", "--start", type=int, required=False, default=None, help="start timestamp"
76
+ )
77
+ parser.add_argument("-e", "--end_timestamp", "--end", type=int, required=False, default=None, help="end timestamp")
78
+ parser.add_argument(
79
+ "-i",
80
+ "--timestep",
81
+ "--step",
82
+ type=int,
83
+ required=False,
84
+ default=60,
85
+ help="trigger the lambda function once every step",
86
+ )
87
+ parser.add_argument(
88
+ "-d",
89
+ "--to_delete",
90
+ "--delete",
91
+ type=strtobool,
92
+ required=False,
93
+ default=False,
94
+ help="to delete the state and data",
95
+ )
96
+ return parser
97
+
98
+
99
+ class AppTestRun:
100
+ def __init__(self, lambda_handler, collections: List[str], args: List[str] = None):
101
+ """
102
+ :param lambda_handler: lambda handler function to run
103
+ :param collections: collections to erase in case to_delete is on
104
+ :param constants: constants module of the whole app, if there is only one app in the whole repository
105
+ pass nothing and the code takes care of that
106
+ :param args: arguments for the run
107
+ """
108
+ self.lambda_handler = lambda_handler
109
+ self.collections = collections
110
+
111
+ self.event_type = None
112
+ self.progress = None
113
+
114
+ if args is None:
115
+ parser = generate_runner_parser()
116
+ args = parser.parse_args()
117
+
118
+ self.initialize(args)
119
+
120
+ def initialize(self, args):
121
+ environment = args.environment
122
+
123
+ # directing main environment variables to the provided env: API_KEY, API_ROOT_URL, CACHE_URL
124
+ point_main_envs(environment)
125
+
126
+ asset_id = args.asset_id
127
+
128
+ start_timestamp = args.start_timestamp
129
+ end_timestamp = args.end_timestamp
130
+ step = args.timestep
131
+ self.event_type = constants.get("global.event-type")
132
+ to_delete = args.to_delete
133
+
134
+ state_keys = construct_state_keys(asset_id)
135
+
136
+ if not start_timestamp:
137
+ start_timestamp = get_one_data_record(asset_id, timestamp_sort=+1).get("timestamp")
138
+
139
+ if not end_timestamp:
140
+ end_timestamp = get_one_data_record(asset_id, timestamp_sort=-1).get("timestamp")
141
+
142
+ if to_delete:
143
+ delete_state_data_of_asset_id(state_keys)
144
+ delete_collection_data_of_asset_id(asset_id, self.collections)
145
+ print("Deleted relevant Redis states and collections for this asset!")
146
+
147
+ start_timestamp = get_last_processed_timestamp(asset_id, state_keys[0]) or start_timestamp
148
+ print(f"asset_id: {asset_id}, timestamp interval: [{start_timestamp}, {end_timestamp}]")
149
+
150
+ events = create_scheduler_events(asset_id, start_timestamp, end_timestamp, step)
151
+
152
+ self.progress = tqdm(events, ncols=150)
153
+
154
+ def run(self):
155
+ print("\nRunning the main module started ...")
156
+ for event in self.progress:
157
+ schedule_time = str(int(event[0][0]["schedule_start"] / 1000))
158
+ if self.event_type == "wits_stream":
159
+ wits = gather_data_for_period(
160
+ int(event[0][0]["asset_id"]),
161
+ int(event[0][0]["schedule_start"] / 1000),
162
+ int(event[0][0]["schedule_end"] / 1000),
163
+ )
164
+ if not wits:
165
+ continue
166
+
167
+ event = [{"metadata": {}, "records": wits}]
168
+
169
+ self.lambda_handler(event, None)
170
+ self.progress.set_description(schedule_time)
171
+
172
+
173
+ def construct_state_keys(asset_id: int) -> List[str]:
174
+ """
175
+ Get the constants dict of an app and construct the storage keys.
176
+ Note: the first key is the global app key.
177
+ :param asset_id: well asset id
178
+ :return: a list of storage keys
179
+ """
180
+ app_key = constants.get("global.app-key")
181
+ module_keys = list(constants.get(app_key).keys())
182
+ state_app = RedisMixin()
183
+ global_state_key = state_app.get_formatted_state_key(asset_id, app_key)
184
+
185
+ module_state_keys = [state_app.get_formatted_state_key(asset_id, app_key, module) for module in module_keys]
186
+
187
+ return [global_state_key, *module_state_keys]
188
+
189
+
190
+ def delete_state_data_of_asset_id(state_keys: List[str]):
191
+ """
192
+ Delete the state from given storage type
193
+ :param state_keys:
194
+ :return:
195
+ """
196
+ RedisMixin().delete_states(state_keys)
File without changes