runnable 0.17.1__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/file_system.py +253 -0
  5. extensions/catalog/pyproject.toml +14 -0
  6. extensions/job_executor/README.md +0 -0
  7. extensions/job_executor/__init__.py +160 -0
  8. extensions/job_executor/k8s.py +484 -0
  9. extensions/job_executor/k8s_job_spec.yaml +37 -0
  10. extensions/job_executor/local.py +61 -0
  11. extensions/job_executor/local_container.py +192 -0
  12. extensions/job_executor/pyproject.toml +16 -0
  13. extensions/nodes/README.md +0 -0
  14. extensions/nodes/nodes.py +954 -0
  15. extensions/nodes/pyproject.toml +15 -0
  16. extensions/pipeline_executor/README.md +0 -0
  17. extensions/pipeline_executor/__init__.py +644 -0
  18. extensions/pipeline_executor/argo.py +1307 -0
  19. extensions/pipeline_executor/argo_specification.yaml +51 -0
  20. extensions/pipeline_executor/local.py +62 -0
  21. extensions/pipeline_executor/local_container.py +362 -0
  22. extensions/pipeline_executor/mocked.py +161 -0
  23. extensions/pipeline_executor/pyproject.toml +16 -0
  24. extensions/pipeline_executor/retry.py +180 -0
  25. extensions/run_log_store/README.md +0 -0
  26. extensions/run_log_store/__init__.py +0 -0
  27. extensions/run_log_store/chunked_fs.py +113 -0
  28. extensions/run_log_store/db/implementation_FF.py +163 -0
  29. extensions/run_log_store/db/integration_FF.py +0 -0
  30. extensions/run_log_store/file_system.py +145 -0
  31. extensions/run_log_store/generic_chunked.py +599 -0
  32. extensions/run_log_store/pyproject.toml +15 -0
  33. extensions/secrets/README.md +0 -0
  34. extensions/secrets/dotenv.py +62 -0
  35. extensions/secrets/pyproject.toml +15 -0
  36. runnable/__init__.py +1 -0
  37. runnable/catalog.py +1 -2
  38. runnable/entrypoints.py +1 -5
  39. runnable/executor.py +1 -1
  40. runnable/parameters.py +0 -9
  41. runnable/utils.py +5 -25
  42. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/METADATA +1 -7
  43. runnable-0.19.0.dist-info/RECORD +58 -0
  44. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/entry_points.txt +1 -0
  45. runnable-0.17.1.dist-info/RECORD +0 -23
  46. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/WHEEL +0 -0
  47. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,180 @@
1
+ import logging
2
+ from functools import cached_property
3
+ from typing import Any, Dict, Optional
4
+
5
+ from extensions.pipeline_executor import GenericPipelineExecutor
6
+ from runnable import context, defaults, exceptions
7
+ from runnable.datastore import RunLog
8
+ from runnable.defaults import TypeMapVariable
9
+ from runnable.nodes import BaseNode
10
+
11
+ logger = logging.getLogger(defaults.LOGGER_NAME)
12
+
13
+
14
+ class RetryExecutor(GenericPipelineExecutor):
15
+ """
16
+ The skeleton of an executor class.
17
+ Any implementation of an executor should inherit this class and over-ride accordingly.
18
+
19
+ This is a loaded base class which has a lot of methods already implemented for "typical" executions.
20
+ Look at the function docs to understand how to use them appropriately.
21
+
22
+ For any implementation:
23
+ 1). Who/when should the run log be set up?
24
+ 2). Who/When should the step log be set up?
25
+
26
+ """
27
+
28
+ service_name: str = "retry"
29
+ service_type: str = "executor"
30
+ run_id: str
31
+
32
+ _is_local: bool = True
33
+ _original_run_log: Optional[RunLog] = None
34
+ _restart_initiated: bool = False
35
+
36
+ @property
37
+ def _context(self):
38
+ return context.run_context
39
+
40
+ @cached_property
41
+ def original_run_log(self):
42
+ return self._context.run_log_store.get_run_log_by_id(
43
+ run_id=self.run_id,
44
+ full=True,
45
+ )
46
+
47
+ def _set_up_for_re_run(self, params: Dict[str, Any]) -> None:
48
+ # Sync the previous run log catalog to this one.
49
+ self._context.catalog_handler.sync_between_runs(
50
+ previous_run_id=self.run_id, run_id=self._context.run_id
51
+ )
52
+
53
+ params.update(self.original_run_log.parameters)
54
+
55
+ def _set_up_run_log(self, exists_ok=False):
56
+ """
57
+ Create a run log and put that in the run log store
58
+
59
+ If exists_ok, we allow the run log to be already present in the run log store.
60
+ """
61
+ super()._set_up_run_log(exists_ok=exists_ok)
62
+
63
+ # Should the parameters be copied from previous execution
64
+ # self._set_up_for_re_run(params=params)
65
+
66
+ def execute_from_graph(
67
+ self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
68
+ ):
69
+ """
70
+ This is the entry point to from the graph execution.
71
+
72
+ While the self.execute_graph is responsible for traversing the graph, this function is responsible for
73
+ actual execution of the node.
74
+
75
+ If the node type is:
76
+ * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
77
+ * success: We can delegate to _execute_node
78
+ * fail: We can delegate to _execute_node
79
+
80
+ For nodes that are internally graphs:
81
+ * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
82
+ * dag: Delegate the responsibility of execution to the node.execute_as_graph()
83
+ * map: Delegate the responsibility of execution to the node.execute_as_graph()
84
+
85
+ Transpilers will NEVER use this method and will NEVER call ths method.
86
+ This method should only be used by interactive executors.
87
+
88
+ Args:
89
+ node (Node): The node to execute
90
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
91
+ Defaults to None.
92
+ """
93
+ step_log = self._context.run_log_store.create_step_log(
94
+ node.name, node._get_step_log_name(map_variable)
95
+ )
96
+
97
+ self.add_code_identities(node=node, step_log=step_log)
98
+
99
+ step_log.step_type = node.node_type
100
+ step_log.status = defaults.PROCESSING
101
+
102
+ # Add the step log to the database as per the situation.
103
+ # If its a terminal node, complete it now
104
+ if node.node_type in ["success", "fail"]:
105
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
106
+ self._execute_node(node, map_variable=map_variable, **kwargs)
107
+ return
108
+
109
+ # In retry step
110
+ if not self._is_step_eligible_for_rerun(node, map_variable=map_variable):
111
+ # If the node name does not match, we move on to the next node.
112
+ # If previous run was successful, move on to the next step
113
+ step_log.mock = True
114
+ step_log.status = defaults.SUCCESS
115
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
116
+ return
117
+
118
+ # We call an internal function to iterate the sub graphs and execute them
119
+ if node.is_composite:
120
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
121
+ node.execute_as_graph(map_variable=map_variable, **kwargs)
122
+ return
123
+
124
+ # Executor specific way to trigger a job
125
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
126
+ self.execute_node(node=node, map_variable=map_variable, **kwargs)
127
+
128
+ def _is_step_eligible_for_rerun(
129
+ self, node: BaseNode, map_variable: TypeMapVariable = None
130
+ ):
131
+ """
132
+ In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
133
+ necessary.
134
+ * True: If its not a re-run.
135
+ * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
136
+ * False: If its a re-run and we succeeded in the last run.
137
+
138
+ Most cases, this logic need not be touched
139
+
140
+ Args:
141
+ node (Node): The node to check against re-run
142
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
143
+ Defaults to None.
144
+
145
+ Returns:
146
+ bool: Eligibility for re-run. True means re-run, False means skip to the next step.
147
+ """
148
+
149
+ node_step_log_name = node._get_step_log_name(map_variable=map_variable)
150
+ logger.info(
151
+ f"Scanning previous run logs for node logs of: {node_step_log_name}"
152
+ )
153
+
154
+ if self._restart_initiated:
155
+ return True
156
+
157
+ try:
158
+ previous_attempt_log, _ = (
159
+ self.original_run_log.search_step_by_internal_name(node_step_log_name)
160
+ )
161
+ except exceptions.StepLogNotFoundError:
162
+ logger.warning(f"Did not find the node {node.name} in previous run log")
163
+ self._restart_initiated = True
164
+ return True # We should re-run the node.
165
+
166
+ logger.info(f"The original step status: {previous_attempt_log.status}")
167
+
168
+ if previous_attempt_log.status == defaults.SUCCESS:
169
+ return False # We need not run the node
170
+
171
+ logger.info(
172
+ f"The new execution should start executing graph from this node {node.name}"
173
+ )
174
+ self._restart_initiated = True
175
+ return True
176
+
177
+ def execute_node(
178
+ self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
179
+ ):
180
+ self._execute_node(node, map_variable=map_variable, **kwargs)
File without changes
File without changes
@@ -0,0 +1,113 @@
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+ from string import Template
5
+ from typing import Any, Dict, Optional, Sequence, Union
6
+
7
+ from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
8
+ from runnable import defaults, utils
9
+
10
+ logger = logging.getLogger(defaults.LOGGER_NAME)
11
+
12
+ T = Union[str, Path]
13
+
14
+
15
+ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
16
+ """
17
+ File system run log store but chunks the run log into thread safe chunks.
18
+ This enables executions to be parallel.
19
+ """
20
+
21
+ service_name: str = "chunked-fs"
22
+ log_folder: str = defaults.LOG_LOCATION_FOLDER
23
+
24
+ def get_summary(self) -> Dict[str, Any]:
25
+ summary = {"Type": self.service_name, "Location": self.log_folder}
26
+
27
+ return summary
28
+
29
+ def get_matches(
30
+ self, run_id: str, name: str, multiple_allowed: bool = False
31
+ ) -> Optional[Union[Sequence[T], T]]:
32
+ """
33
+ Get contents of files matching the pattern name*
34
+
35
+ Args:
36
+ run_id (str): The run id
37
+ name (str): The suffix of the file name to check in the run log store.
38
+ """
39
+ log_folder = self.log_folder_with_run_id(run_id=run_id)
40
+ sub_name = Template(name).safe_substitute({"creation_time": ""})
41
+
42
+ matches = list(log_folder.glob(f"{sub_name}*"))
43
+
44
+ if matches:
45
+ if not multiple_allowed:
46
+ if len(matches) > 1:
47
+ msg = f"Multiple matches found for {name} while multiple is not allowed"
48
+ raise Exception(msg)
49
+ return matches[0]
50
+ return matches
51
+
52
+ return None
53
+
54
+ def log_folder_with_run_id(self, run_id: str) -> Path:
55
+ """
56
+ Utility function to get the log folder for a run id.
57
+
58
+ Args:
59
+ run_id (str): The run id
60
+
61
+ Returns:
62
+ Path: The path to the log folder with the run id
63
+ """
64
+ return Path(self.log_folder) / run_id
65
+
66
+ def safe_suffix_json(self, name: Union[Path, str]) -> str:
67
+ """
68
+ Safely attach a suffix to a json file.
69
+
70
+ Args:
71
+ name (Path): The name of the file with or without suffix of json
72
+
73
+ Returns:
74
+ str : The name of the file with .json
75
+ """
76
+ if str(name).endswith("json"):
77
+ return str(name)
78
+
79
+ return str(name) + ".json"
80
+
81
+ def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
82
+ """
83
+ Store the contents against the name in the folder.
84
+
85
+ Args:
86
+ run_id (str): The run id
87
+ contents (dict): The dict to store
88
+ name (str): The name to store as
89
+ """
90
+ if insert:
91
+ name = self.log_folder_with_run_id(run_id=run_id) / name
92
+
93
+ utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
94
+
95
+ with open(self.safe_suffix_json(name), "w") as fw:
96
+ json.dump(contents, fw, ensure_ascii=True, indent=4)
97
+
98
+ def _retrieve(self, name: Union[str, Path]) -> dict:
99
+ """
100
+ Does the job of retrieving from the folder.
101
+
102
+ Args:
103
+ name (str): the name of the file to retrieve
104
+
105
+ Returns:
106
+ dict: The contents
107
+ """
108
+ contents: dict = {}
109
+
110
+ with open(self.safe_suffix_json(name), "r") as fr:
111
+ contents = json.load(fr)
112
+
113
+ return contents
@@ -0,0 +1,163 @@
1
+ import datetime
2
+ import json
3
+ import logging
4
+ from pathlib import Path
5
+ from string import Template
6
+ from typing import Any, Dict, List, Optional, Union, cast
7
+
8
+ from runnable import defaults, utils
9
+ from runnable.extensions.run_log_store.generic_chunked import ChunkedRunLogStore
10
+
11
+ logger = logging.getLogger(defaults.LOGGER_NAME)
12
+
13
+
14
+ class DBRunLogStore(ChunkedRunLogStore):
15
+ """
16
+ File system run log store but chunks the run log into thread safe chunks.
17
+ This enables executions to be parallel.
18
+ """
19
+
20
+ service_name: str = "chunked-fs"
21
+ connection_string: str
22
+ db_name: str
23
+
24
+ _DB_LOG: Any = None
25
+ _engine: Any = None
26
+ _session: Any = None
27
+ _connection_string: str = ""
28
+ _base: Any = None
29
+
30
+ def model_post_init(self, _: Any) -> None:
31
+ run_context = self._context
32
+
33
+ secrets = cast(Dict[str, str], run_context.secrets_handler.get())
34
+ connection_string = Template(self.connection_string).safe_substitute(**secrets)
35
+
36
+ try:
37
+ import sqlalchemy
38
+ from sqlalchemy import Column, DateTime, Integer, Sequence, Text
39
+ from sqlalchemy.orm import declarative_base, sessionmaker
40
+
41
+ Base = declarative_base()
42
+
43
+ class DBLog(Base):
44
+ """
45
+ Base table for storing run logs in database.
46
+
47
+ In this model, we fragment the run log into logical units that are concurrent safe.
48
+ """
49
+
50
+ __tablename__ = self.db_name
51
+ pk = Column(Integer, Sequence("id_seq"), primary_key=True)
52
+ run_id = Column(Text, index=True)
53
+ attribute_key = Column(
54
+ Text
55
+ ) # run_log, step_internal_name, parameter_key etc
56
+ attribute_type = Column(Text) # RunLog, Step, Branch, Parameter
57
+ attribute_value = Column(Text) # The JSON string
58
+ created_at = Column(DateTime, default=datetime.datetime.utcnow)
59
+
60
+ self._engine = sqlalchemy.create_engine(
61
+ connection_string, pool_pre_ping=True
62
+ )
63
+ self._session = sessionmaker(bind=self._engine)
64
+ self._DB_LOG = DBLog
65
+ self._connection_string = connection_string
66
+ self._base = Base
67
+
68
+ except ImportError as _e:
69
+ logger.exception("Unable to import SQLalchemy, is it installed?")
70
+ msg = "SQLAlchemy is required for this extension. Please install it"
71
+ raise Exception(msg) from _e
72
+
73
+ def create_tables(self):
74
+ import sqlalchemy
75
+
76
+ engine = sqlalchemy.create_engine(self._connection_string)
77
+ self._base.metadata.create_all(engine)
78
+
79
+ def get_matches(
80
+ self, run_id: str, name: str, multiple_allowed: bool = False
81
+ ) -> Optional[Union[List[Path], Path]]:
82
+ """
83
+ Get contents of files matching the pattern name*
84
+
85
+ Args:
86
+ run_id (str): The run id
87
+ name (str): The suffix of the file name to check in the run log store.
88
+ """
89
+ log_folder = self.log_folder_with_run_id(run_id=run_id)
90
+
91
+ sub_name = Template(name).safe_substitute({"creation_time": ""})
92
+
93
+ matches = list(log_folder.glob(f"{sub_name}*"))
94
+ if matches:
95
+ if not multiple_allowed:
96
+ if len(matches) > 1:
97
+ msg = f"Multiple matches found for {name} while multiple is not allowed"
98
+ raise Exception(msg)
99
+ return matches[0]
100
+ return matches
101
+
102
+ return None
103
+
104
+ def log_folder_with_run_id(self, run_id: str) -> Path:
105
+ """
106
+ Utility function to get the log folder for a run id.
107
+
108
+ Args:
109
+ run_id (str): The run id
110
+
111
+ Returns:
112
+ Path: The path to the log folder with the run id
113
+ """
114
+ return Path(self.log_folder) / run_id
115
+
116
+ def safe_suffix_json(self, name: Union[Path, str]) -> str:
117
+ """
118
+ Safely attach a suffix to a json file.
119
+
120
+ Args:
121
+ name (Path): The name of the file with or without suffix of json
122
+
123
+ Returns:
124
+ str : The name of the file with .json
125
+ """
126
+ if str(name).endswith("json"):
127
+ return str(name)
128
+
129
+ return str(name) + ".json"
130
+
131
+ def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
132
+ """
133
+ Store the contents against the name in the folder.
134
+
135
+ Args:
136
+ run_id (str): The run id
137
+ contents (dict): The dict to store
138
+ name (str): The name to store as
139
+ """
140
+ if insert:
141
+ name = self.log_folder_with_run_id(run_id=run_id) / name
142
+
143
+ utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
144
+
145
+ with open(self.safe_suffix_json(name), "w") as fw:
146
+ json.dump(contents, fw, ensure_ascii=True, indent=4)
147
+
148
+ def _retrieve(self, name: Path) -> dict:
149
+ """
150
+ Does the job of retrieving from the folder.
151
+
152
+ Args:
153
+ name (str): the name of the file to retrieve
154
+
155
+ Returns:
156
+ dict: The contents
157
+ """
158
+ contents: dict = {}
159
+
160
+ with open(self.safe_suffix_json(name), "r") as fr:
161
+ contents = json.load(fr)
162
+
163
+ return contents
File without changes
@@ -0,0 +1,145 @@
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Any, Dict
5
+
6
+ from runnable import defaults, exceptions, utils
7
+ from runnable.datastore import BaseRunLogStore, RunLog
8
+
9
+ logger = logging.getLogger(defaults.LOGGER_NAME)
10
+
11
+
12
+ class FileSystemRunLogstore(BaseRunLogStore):
13
+ """
14
+ In this type of Run Log store, we use a file system to store the JSON run log.
15
+
16
+ Every single run is stored as a different file which makes it compatible across other store types.
17
+
18
+ When to use:
19
+ When locally testing a pipeline and have the need to compare across runs.
20
+ Its fully featured and perfectly fine if your local environment is where you would do everything.
21
+
22
+ Do not use:
23
+ If you need parallelization on local, this run log would not support it.
24
+
25
+ Example config:
26
+
27
+ run_log:
28
+ type: file-system
29
+ config:
30
+ log_folder: The folder to out the logs. Defaults to .run_log_store
31
+
32
+ """
33
+
34
+ service_name: str = "file-system"
35
+ log_folder: str = defaults.LOG_LOCATION_FOLDER
36
+
37
+ @property
38
+ def log_folder_name(self):
39
+ return self.log_folder
40
+
41
+ def get_summary(self) -> Dict[str, Any]:
42
+ summary = {"Type": self.service_name, "Location": self.log_folder}
43
+
44
+ return summary
45
+
46
+ def write_to_folder(self, run_log: RunLog):
47
+ """
48
+ Write the run log to the folder
49
+
50
+ Args:
51
+ run_log (RunLog): The run log to be added to the database
52
+ """
53
+ write_to = self.log_folder_name
54
+ utils.safe_make_dir(write_to)
55
+
56
+ write_to_path = Path(write_to)
57
+ run_id = run_log.run_id
58
+ json_file_path = write_to_path / f"{run_id}.json"
59
+
60
+ with json_file_path.open("w") as fw:
61
+ json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member
62
+
63
+ def get_from_folder(self, run_id: str) -> RunLog:
64
+ """
65
+ Look into the run log folder for the run log for the run id.
66
+
67
+ If the run log does not exist, raise an exception. If it does, decode it
68
+ as a RunLog and return it
69
+
70
+ Args:
71
+ run_id (str): The requested run id to retrieve the run log store
72
+
73
+ Raises:
74
+ FileNotFoundError: If the Run Log has not been found.
75
+
76
+ Returns:
77
+ RunLog: The decoded Run log
78
+ """
79
+ write_to = self.log_folder_name
80
+
81
+ read_from_path = Path(write_to)
82
+ json_file_path = read_from_path / f"{run_id}.json"
83
+
84
+ if not json_file_path.exists():
85
+ raise FileNotFoundError(f"Expected {json_file_path} is not present")
86
+
87
+ with json_file_path.open("r") as fr:
88
+ json_str = json.load(fr)
89
+ run_log = RunLog(**json_str) # pylint: disable=no-member
90
+ return run_log
91
+
92
+ def create_run_log(
93
+ self,
94
+ run_id: str,
95
+ dag_hash: str = "",
96
+ use_cached: bool = False,
97
+ tag: str = "",
98
+ original_run_id: str = "",
99
+ status: str = defaults.CREATED,
100
+ ) -> RunLog:
101
+ """
102
+ # Creates a Run log
103
+ # Adds it to the db
104
+ """
105
+
106
+ try:
107
+ self.get_run_log_by_id(run_id=run_id, full=False)
108
+ raise exceptions.RunLogExistsError(run_id=run_id)
109
+ except exceptions.RunLogNotFoundError:
110
+ pass
111
+
112
+ logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
113
+ run_log = RunLog(
114
+ run_id=run_id,
115
+ dag_hash=dag_hash,
116
+ tag=tag,
117
+ status=status,
118
+ )
119
+ self.write_to_folder(run_log)
120
+ return run_log
121
+
122
+ def get_run_log_by_id(
123
+ self,
124
+ run_id: str,
125
+ full: bool = False,
126
+ ) -> RunLog:
127
+ """
128
+ # Returns the run_log defined by id
129
+ # Raises Exception if not found
130
+ """
131
+ try:
132
+ logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
133
+ run_log = self.get_from_folder(run_id)
134
+ return run_log
135
+ except FileNotFoundError as e:
136
+ raise exceptions.RunLogNotFoundError(run_id) from e
137
+
138
+ def put_run_log(self, run_log: RunLog):
139
+ """
140
+ # Puts the run_log into the database
141
+ """
142
+ logger.info(
143
+ f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
144
+ )
145
+ self.write_to_folder(run_log)