runnable 0.17.1__py3-none-any.whl → 0.19.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/file_system.py +253 -0
  5. extensions/catalog/pyproject.toml +14 -0
  6. extensions/job_executor/README.md +0 -0
  7. extensions/job_executor/__init__.py +160 -0
  8. extensions/job_executor/k8s.py +484 -0
  9. extensions/job_executor/k8s_job_spec.yaml +37 -0
  10. extensions/job_executor/local.py +61 -0
  11. extensions/job_executor/local_container.py +192 -0
  12. extensions/job_executor/pyproject.toml +16 -0
  13. extensions/nodes/README.md +0 -0
  14. extensions/nodes/nodes.py +954 -0
  15. extensions/nodes/pyproject.toml +15 -0
  16. extensions/pipeline_executor/README.md +0 -0
  17. extensions/pipeline_executor/__init__.py +644 -0
  18. extensions/pipeline_executor/argo.py +1307 -0
  19. extensions/pipeline_executor/argo_specification.yaml +51 -0
  20. extensions/pipeline_executor/local.py +62 -0
  21. extensions/pipeline_executor/local_container.py +362 -0
  22. extensions/pipeline_executor/mocked.py +161 -0
  23. extensions/pipeline_executor/pyproject.toml +16 -0
  24. extensions/pipeline_executor/retry.py +180 -0
  25. extensions/run_log_store/README.md +0 -0
  26. extensions/run_log_store/__init__.py +0 -0
  27. extensions/run_log_store/chunked_fs.py +113 -0
  28. extensions/run_log_store/db/implementation_FF.py +163 -0
  29. extensions/run_log_store/db/integration_FF.py +0 -0
  30. extensions/run_log_store/file_system.py +145 -0
  31. extensions/run_log_store/generic_chunked.py +599 -0
  32. extensions/run_log_store/pyproject.toml +15 -0
  33. extensions/secrets/README.md +0 -0
  34. extensions/secrets/dotenv.py +62 -0
  35. extensions/secrets/pyproject.toml +15 -0
  36. runnable/__init__.py +1 -0
  37. runnable/catalog.py +1 -2
  38. runnable/entrypoints.py +1 -5
  39. runnable/executor.py +1 -1
  40. runnable/parameters.py +0 -9
  41. runnable/utils.py +5 -25
  42. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/METADATA +1 -7
  43. runnable-0.19.0.dist-info/RECORD +58 -0
  44. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/entry_points.txt +1 -0
  45. runnable-0.17.1.dist-info/RECORD +0 -23
  46. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/WHEEL +0 -0
  47. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,180 @@
1
+ import logging
2
+ from functools import cached_property
3
+ from typing import Any, Dict, Optional
4
+
5
+ from extensions.pipeline_executor import GenericPipelineExecutor
6
+ from runnable import context, defaults, exceptions
7
+ from runnable.datastore import RunLog
8
+ from runnable.defaults import TypeMapVariable
9
+ from runnable.nodes import BaseNode
10
+
11
+ logger = logging.getLogger(defaults.LOGGER_NAME)
12
+
13
+
14
+ class RetryExecutor(GenericPipelineExecutor):
15
+ """
16
+ The skeleton of an executor class.
17
+ Any implementation of an executor should inherit this class and over-ride accordingly.
18
+
19
+ This is a loaded base class which has a lot of methods already implemented for "typical" executions.
20
+ Look at the function docs to understand how to use them appropriately.
21
+
22
+ For any implementation:
23
+ 1). Who/when should the run log be set up?
24
+ 2). Who/When should the step log be set up?
25
+
26
+ """
27
+
28
+ service_name: str = "retry"
29
+ service_type: str = "executor"
30
+ run_id: str
31
+
32
+ _is_local: bool = True
33
+ _original_run_log: Optional[RunLog] = None
34
+ _restart_initiated: bool = False
35
+
36
+ @property
37
+ def _context(self):
38
+ return context.run_context
39
+
40
+ @cached_property
41
+ def original_run_log(self):
42
+ return self._context.run_log_store.get_run_log_by_id(
43
+ run_id=self.run_id,
44
+ full=True,
45
+ )
46
+
47
+ def _set_up_for_re_run(self, params: Dict[str, Any]) -> None:
48
+ # Sync the previous run log catalog to this one.
49
+ self._context.catalog_handler.sync_between_runs(
50
+ previous_run_id=self.run_id, run_id=self._context.run_id
51
+ )
52
+
53
+ params.update(self.original_run_log.parameters)
54
+
55
+ def _set_up_run_log(self, exists_ok=False):
56
+ """
57
+ Create a run log and put that in the run log store
58
+
59
+ If exists_ok, we allow the run log to be already present in the run log store.
60
+ """
61
+ super()._set_up_run_log(exists_ok=exists_ok)
62
+
63
+ # Should the parameters be copied from previous execution
64
+ # self._set_up_for_re_run(params=params)
65
+
66
+ def execute_from_graph(
67
+ self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
68
+ ):
69
+ """
70
+ This is the entry point to from the graph execution.
71
+
72
+ While the self.execute_graph is responsible for traversing the graph, this function is responsible for
73
+ actual execution of the node.
74
+
75
+ If the node type is:
76
+ * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
77
+ * success: We can delegate to _execute_node
78
+ * fail: We can delegate to _execute_node
79
+
80
+ For nodes that are internally graphs:
81
+ * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
82
+ * dag: Delegate the responsibility of execution to the node.execute_as_graph()
83
+ * map: Delegate the responsibility of execution to the node.execute_as_graph()
84
+
85
+ Transpilers will NEVER use this method and will NEVER call ths method.
86
+ This method should only be used by interactive executors.
87
+
88
+ Args:
89
+ node (Node): The node to execute
90
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
91
+ Defaults to None.
92
+ """
93
+ step_log = self._context.run_log_store.create_step_log(
94
+ node.name, node._get_step_log_name(map_variable)
95
+ )
96
+
97
+ self.add_code_identities(node=node, step_log=step_log)
98
+
99
+ step_log.step_type = node.node_type
100
+ step_log.status = defaults.PROCESSING
101
+
102
+ # Add the step log to the database as per the situation.
103
+ # If its a terminal node, complete it now
104
+ if node.node_type in ["success", "fail"]:
105
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
106
+ self._execute_node(node, map_variable=map_variable, **kwargs)
107
+ return
108
+
109
+ # In retry step
110
+ if not self._is_step_eligible_for_rerun(node, map_variable=map_variable):
111
+ # If the node name does not match, we move on to the next node.
112
+ # If previous run was successful, move on to the next step
113
+ step_log.mock = True
114
+ step_log.status = defaults.SUCCESS
115
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
116
+ return
117
+
118
+ # We call an internal function to iterate the sub graphs and execute them
119
+ if node.is_composite:
120
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
121
+ node.execute_as_graph(map_variable=map_variable, **kwargs)
122
+ return
123
+
124
+ # Executor specific way to trigger a job
125
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
126
+ self.execute_node(node=node, map_variable=map_variable, **kwargs)
127
+
128
+ def _is_step_eligible_for_rerun(
129
+ self, node: BaseNode, map_variable: TypeMapVariable = None
130
+ ):
131
+ """
132
+ In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
133
+ necessary.
134
+ * True: If its not a re-run.
135
+ * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
136
+ * False: If its a re-run and we succeeded in the last run.
137
+
138
+ Most cases, this logic need not be touched
139
+
140
+ Args:
141
+ node (Node): The node to check against re-run
142
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
143
+ Defaults to None.
144
+
145
+ Returns:
146
+ bool: Eligibility for re-run. True means re-run, False means skip to the next step.
147
+ """
148
+
149
+ node_step_log_name = node._get_step_log_name(map_variable=map_variable)
150
+ logger.info(
151
+ f"Scanning previous run logs for node logs of: {node_step_log_name}"
152
+ )
153
+
154
+ if self._restart_initiated:
155
+ return True
156
+
157
+ try:
158
+ previous_attempt_log, _ = (
159
+ self.original_run_log.search_step_by_internal_name(node_step_log_name)
160
+ )
161
+ except exceptions.StepLogNotFoundError:
162
+ logger.warning(f"Did not find the node {node.name} in previous run log")
163
+ self._restart_initiated = True
164
+ return True # We should re-run the node.
165
+
166
+ logger.info(f"The original step status: {previous_attempt_log.status}")
167
+
168
+ if previous_attempt_log.status == defaults.SUCCESS:
169
+ return False # We need not run the node
170
+
171
+ logger.info(
172
+ f"The new execution should start executing graph from this node {node.name}"
173
+ )
174
+ self._restart_initiated = True
175
+ return True
176
+
177
+ def execute_node(
178
+ self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
179
+ ):
180
+ self._execute_node(node, map_variable=map_variable, **kwargs)
File without changes
File without changes
@@ -0,0 +1,113 @@
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+ from string import Template
5
+ from typing import Any, Dict, Optional, Sequence, Union
6
+
7
+ from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
8
+ from runnable import defaults, utils
9
+
10
+ logger = logging.getLogger(defaults.LOGGER_NAME)
11
+
12
+ T = Union[str, Path]
13
+
14
+
15
+ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
16
+ """
17
+ File system run log store but chunks the run log into thread safe chunks.
18
+ This enables executions to be parallel.
19
+ """
20
+
21
+ service_name: str = "chunked-fs"
22
+ log_folder: str = defaults.LOG_LOCATION_FOLDER
23
+
24
+ def get_summary(self) -> Dict[str, Any]:
25
+ summary = {"Type": self.service_name, "Location": self.log_folder}
26
+
27
+ return summary
28
+
29
+ def get_matches(
30
+ self, run_id: str, name: str, multiple_allowed: bool = False
31
+ ) -> Optional[Union[Sequence[T], T]]:
32
+ """
33
+ Get contents of files matching the pattern name*
34
+
35
+ Args:
36
+ run_id (str): The run id
37
+ name (str): The suffix of the file name to check in the run log store.
38
+ """
39
+ log_folder = self.log_folder_with_run_id(run_id=run_id)
40
+ sub_name = Template(name).safe_substitute({"creation_time": ""})
41
+
42
+ matches = list(log_folder.glob(f"{sub_name}*"))
43
+
44
+ if matches:
45
+ if not multiple_allowed:
46
+ if len(matches) > 1:
47
+ msg = f"Multiple matches found for {name} while multiple is not allowed"
48
+ raise Exception(msg)
49
+ return matches[0]
50
+ return matches
51
+
52
+ return None
53
+
54
+ def log_folder_with_run_id(self, run_id: str) -> Path:
55
+ """
56
+ Utility function to get the log folder for a run id.
57
+
58
+ Args:
59
+ run_id (str): The run id
60
+
61
+ Returns:
62
+ Path: The path to the log folder with the run id
63
+ """
64
+ return Path(self.log_folder) / run_id
65
+
66
+ def safe_suffix_json(self, name: Union[Path, str]) -> str:
67
+ """
68
+ Safely attach a suffix to a json file.
69
+
70
+ Args:
71
+ name (Path): The name of the file with or without suffix of json
72
+
73
+ Returns:
74
+ str : The name of the file with .json
75
+ """
76
+ if str(name).endswith("json"):
77
+ return str(name)
78
+
79
+ return str(name) + ".json"
80
+
81
+ def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
82
+ """
83
+ Store the contents against the name in the folder.
84
+
85
+ Args:
86
+ run_id (str): The run id
87
+ contents (dict): The dict to store
88
+ name (str): The name to store as
89
+ """
90
+ if insert:
91
+ name = self.log_folder_with_run_id(run_id=run_id) / name
92
+
93
+ utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
94
+
95
+ with open(self.safe_suffix_json(name), "w") as fw:
96
+ json.dump(contents, fw, ensure_ascii=True, indent=4)
97
+
98
+ def _retrieve(self, name: Union[str, Path]) -> dict:
99
+ """
100
+ Does the job of retrieving from the folder.
101
+
102
+ Args:
103
+ name (str): the name of the file to retrieve
104
+
105
+ Returns:
106
+ dict: The contents
107
+ """
108
+ contents: dict = {}
109
+
110
+ with open(self.safe_suffix_json(name), "r") as fr:
111
+ contents = json.load(fr)
112
+
113
+ return contents
@@ -0,0 +1,163 @@
1
+ import datetime
2
+ import json
3
+ import logging
4
+ from pathlib import Path
5
+ from string import Template
6
+ from typing import Any, Dict, List, Optional, Union, cast
7
+
8
+ from runnable import defaults, utils
9
+ from runnable.extensions.run_log_store.generic_chunked import ChunkedRunLogStore
10
+
11
+ logger = logging.getLogger(defaults.LOGGER_NAME)
12
+
13
+
14
+ class DBRunLogStore(ChunkedRunLogStore):
15
+ """
16
+ File system run log store but chunks the run log into thread safe chunks.
17
+ This enables executions to be parallel.
18
+ """
19
+
20
+ service_name: str = "chunked-fs"
21
+ connection_string: str
22
+ db_name: str
23
+
24
+ _DB_LOG: Any = None
25
+ _engine: Any = None
26
+ _session: Any = None
27
+ _connection_string: str = ""
28
+ _base: Any = None
29
+
30
+ def model_post_init(self, _: Any) -> None:
31
+ run_context = self._context
32
+
33
+ secrets = cast(Dict[str, str], run_context.secrets_handler.get())
34
+ connection_string = Template(self.connection_string).safe_substitute(**secrets)
35
+
36
+ try:
37
+ import sqlalchemy
38
+ from sqlalchemy import Column, DateTime, Integer, Sequence, Text
39
+ from sqlalchemy.orm import declarative_base, sessionmaker
40
+
41
+ Base = declarative_base()
42
+
43
+ class DBLog(Base):
44
+ """
45
+ Base table for storing run logs in database.
46
+
47
+ In this model, we fragment the run log into logical units that are concurrent safe.
48
+ """
49
+
50
+ __tablename__ = self.db_name
51
+ pk = Column(Integer, Sequence("id_seq"), primary_key=True)
52
+ run_id = Column(Text, index=True)
53
+ attribute_key = Column(
54
+ Text
55
+ ) # run_log, step_internal_name, parameter_key etc
56
+ attribute_type = Column(Text) # RunLog, Step, Branch, Parameter
57
+ attribute_value = Column(Text) # The JSON string
58
+ created_at = Column(DateTime, default=datetime.datetime.utcnow)
59
+
60
+ self._engine = sqlalchemy.create_engine(
61
+ connection_string, pool_pre_ping=True
62
+ )
63
+ self._session = sessionmaker(bind=self._engine)
64
+ self._DB_LOG = DBLog
65
+ self._connection_string = connection_string
66
+ self._base = Base
67
+
68
+ except ImportError as _e:
69
+ logger.exception("Unable to import SQLalchemy, is it installed?")
70
+ msg = "SQLAlchemy is required for this extension. Please install it"
71
+ raise Exception(msg) from _e
72
+
73
+ def create_tables(self):
74
+ import sqlalchemy
75
+
76
+ engine = sqlalchemy.create_engine(self._connection_string)
77
+ self._base.metadata.create_all(engine)
78
+
79
+ def get_matches(
80
+ self, run_id: str, name: str, multiple_allowed: bool = False
81
+ ) -> Optional[Union[List[Path], Path]]:
82
+ """
83
+ Get contents of files matching the pattern name*
84
+
85
+ Args:
86
+ run_id (str): The run id
87
+ name (str): The suffix of the file name to check in the run log store.
88
+ """
89
+ log_folder = self.log_folder_with_run_id(run_id=run_id)
90
+
91
+ sub_name = Template(name).safe_substitute({"creation_time": ""})
92
+
93
+ matches = list(log_folder.glob(f"{sub_name}*"))
94
+ if matches:
95
+ if not multiple_allowed:
96
+ if len(matches) > 1:
97
+ msg = f"Multiple matches found for {name} while multiple is not allowed"
98
+ raise Exception(msg)
99
+ return matches[0]
100
+ return matches
101
+
102
+ return None
103
+
104
+ def log_folder_with_run_id(self, run_id: str) -> Path:
105
+ """
106
+ Utility function to get the log folder for a run id.
107
+
108
+ Args:
109
+ run_id (str): The run id
110
+
111
+ Returns:
112
+ Path: The path to the log folder with the run id
113
+ """
114
+ return Path(self.log_folder) / run_id
115
+
116
+ def safe_suffix_json(self, name: Union[Path, str]) -> str:
117
+ """
118
+ Safely attach a suffix to a json file.
119
+
120
+ Args:
121
+ name (Path): The name of the file with or without suffix of json
122
+
123
+ Returns:
124
+ str : The name of the file with .json
125
+ """
126
+ if str(name).endswith("json"):
127
+ return str(name)
128
+
129
+ return str(name) + ".json"
130
+
131
+ def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
132
+ """
133
+ Store the contents against the name in the folder.
134
+
135
+ Args:
136
+ run_id (str): The run id
137
+ contents (dict): The dict to store
138
+ name (str): The name to store as
139
+ """
140
+ if insert:
141
+ name = self.log_folder_with_run_id(run_id=run_id) / name
142
+
143
+ utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
144
+
145
+ with open(self.safe_suffix_json(name), "w") as fw:
146
+ json.dump(contents, fw, ensure_ascii=True, indent=4)
147
+
148
+ def _retrieve(self, name: Path) -> dict:
149
+ """
150
+ Does the job of retrieving from the folder.
151
+
152
+ Args:
153
+ name (str): the name of the file to retrieve
154
+
155
+ Returns:
156
+ dict: The contents
157
+ """
158
+ contents: dict = {}
159
+
160
+ with open(self.safe_suffix_json(name), "r") as fr:
161
+ contents = json.load(fr)
162
+
163
+ return contents
File without changes
@@ -0,0 +1,145 @@
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Any, Dict
5
+
6
+ from runnable import defaults, exceptions, utils
7
+ from runnable.datastore import BaseRunLogStore, RunLog
8
+
9
+ logger = logging.getLogger(defaults.LOGGER_NAME)
10
+
11
+
12
+ class FileSystemRunLogstore(BaseRunLogStore):
13
+ """
14
+ In this type of Run Log store, we use a file system to store the JSON run log.
15
+
16
+ Every single run is stored as a different file which makes it compatible across other store types.
17
+
18
+ When to use:
19
+ When locally testing a pipeline and have the need to compare across runs.
20
+ Its fully featured and perfectly fine if your local environment is where you would do everything.
21
+
22
+ Do not use:
23
+ If you need parallelization on local, this run log would not support it.
24
+
25
+ Example config:
26
+
27
+ run_log:
28
+ type: file-system
29
+ config:
30
+ log_folder: The folder to out the logs. Defaults to .run_log_store
31
+
32
+ """
33
+
34
+ service_name: str = "file-system"
35
+ log_folder: str = defaults.LOG_LOCATION_FOLDER
36
+
37
+ @property
38
+ def log_folder_name(self):
39
+ return self.log_folder
40
+
41
+ def get_summary(self) -> Dict[str, Any]:
42
+ summary = {"Type": self.service_name, "Location": self.log_folder}
43
+
44
+ return summary
45
+
46
+ def write_to_folder(self, run_log: RunLog):
47
+ """
48
+ Write the run log to the folder
49
+
50
+ Args:
51
+ run_log (RunLog): The run log to be added to the database
52
+ """
53
+ write_to = self.log_folder_name
54
+ utils.safe_make_dir(write_to)
55
+
56
+ write_to_path = Path(write_to)
57
+ run_id = run_log.run_id
58
+ json_file_path = write_to_path / f"{run_id}.json"
59
+
60
+ with json_file_path.open("w") as fw:
61
+ json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member
62
+
63
+ def get_from_folder(self, run_id: str) -> RunLog:
64
+ """
65
+ Look into the run log folder for the run log for the run id.
66
+
67
+ If the run log does not exist, raise an exception. If it does, decode it
68
+ as a RunLog and return it
69
+
70
+ Args:
71
+ run_id (str): The requested run id to retrieve the run log store
72
+
73
+ Raises:
74
+ FileNotFoundError: If the Run Log has not been found.
75
+
76
+ Returns:
77
+ RunLog: The decoded Run log
78
+ """
79
+ write_to = self.log_folder_name
80
+
81
+ read_from_path = Path(write_to)
82
+ json_file_path = read_from_path / f"{run_id}.json"
83
+
84
+ if not json_file_path.exists():
85
+ raise FileNotFoundError(f"Expected {json_file_path} is not present")
86
+
87
+ with json_file_path.open("r") as fr:
88
+ json_str = json.load(fr)
89
+ run_log = RunLog(**json_str) # pylint: disable=no-member
90
+ return run_log
91
+
92
+ def create_run_log(
93
+ self,
94
+ run_id: str,
95
+ dag_hash: str = "",
96
+ use_cached: bool = False,
97
+ tag: str = "",
98
+ original_run_id: str = "",
99
+ status: str = defaults.CREATED,
100
+ ) -> RunLog:
101
+ """
102
+ # Creates a Run log
103
+ # Adds it to the db
104
+ """
105
+
106
+ try:
107
+ self.get_run_log_by_id(run_id=run_id, full=False)
108
+ raise exceptions.RunLogExistsError(run_id=run_id)
109
+ except exceptions.RunLogNotFoundError:
110
+ pass
111
+
112
+ logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
113
+ run_log = RunLog(
114
+ run_id=run_id,
115
+ dag_hash=dag_hash,
116
+ tag=tag,
117
+ status=status,
118
+ )
119
+ self.write_to_folder(run_log)
120
+ return run_log
121
+
122
+ def get_run_log_by_id(
123
+ self,
124
+ run_id: str,
125
+ full: bool = False,
126
+ ) -> RunLog:
127
+ """
128
+ # Returns the run_log defined by id
129
+ # Raises Exception if not found
130
+ """
131
+ try:
132
+ logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
133
+ run_log = self.get_from_folder(run_id)
134
+ return run_log
135
+ except FileNotFoundError as e:
136
+ raise exceptions.RunLogNotFoundError(run_id) from e
137
+
138
+ def put_run_log(self, run_log: RunLog):
139
+ """
140
+ # Puts the run_log into the database
141
+ """
142
+ logger.info(
143
+ f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
144
+ )
145
+ self.write_to_folder(run_log)