runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,119 @@
1
+ import logging
2
+ import shlex
3
+ import subprocess
4
+ import sys
5
+ from typing import Optional
6
+
7
+ from pydantic import PrivateAttr
8
+
9
+ from extensions.pipeline_executor import GenericPipelineExecutor
10
+ from runnable import defaults
11
+ from runnable.defaults import IterableParameterModel
12
+ from runnable.nodes import BaseNode
13
+
14
+ logger = logging.getLogger(defaults.LOGGER_NAME)
15
+
16
+
17
+ class Emulator(GenericPipelineExecutor):
18
+ """
19
+ In the mode of local execution, we run everything on the local computer.
20
+
21
+ This has some serious implications on the amount of time it would take to complete the run.
22
+ Also ensure that the local compute is good enough for the compute to happen of all the steps.
23
+
24
+ Example config:
25
+
26
+ ```yaml
27
+ pipeline-executor:
28
+ type: local
29
+ ```
30
+
31
+ """
32
+
33
+ service_name: str = "emulator"
34
+
35
+ _should_setup_run_log_at_traversal: bool = PrivateAttr(default=True)
36
+
37
+ def trigger_node_execution(
38
+ self,
39
+ node: BaseNode,
40
+ iter_variable: Optional[IterableParameterModel] = None,
41
+ ):
42
+ """
43
+ In this mode of execution, we prepare for the node execution and execute the node
44
+
45
+ Args:
46
+ node (BaseNode): [description]
47
+ iter_variable (str, optional): [description]. Defaults to ''.
48
+ """
49
+ command = self._context.get_node_callable_command(
50
+ node, iter_variable=iter_variable
51
+ )
52
+
53
+ self.run_click_command(command)
54
+ # execute the command in a forked process
55
+
56
+ step_log = self._context.run_log_store.get_step_log(
57
+ node._get_step_log_name(iter_variable), self._context.run_id
58
+ )
59
+ if step_log.status != defaults.SUCCESS:
60
+ msg = "Node execution inside the emulate failed. Please check the logs.\n"
61
+ logger.error(msg)
62
+ step_log.status = defaults.FAIL
63
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
64
+
65
+ def execute_node(
66
+ self,
67
+ node: BaseNode,
68
+ iter_variable: Optional[IterableParameterModel] = None,
69
+ ):
70
+ """
71
+ For local execution, we just execute the node.
72
+
73
+ Args:
74
+ node (BaseNode): _description_
75
+ iter_variable (dict[str, str], optional): _description_. Defaults to None.
76
+ """
77
+ self._execute_node(node=node, iter_variable=iter_variable)
78
+
79
+ def run_click_command(self, command: str) -> str:
80
+ """
81
+ Execute a Click-based CLI command in the current virtual environment.
82
+
83
+ Args:
84
+ args: List of Click command arguments (including subcommands and options)
85
+
86
+ Returns:
87
+ Combined stdout/stderr output as string
88
+ """
89
+ # For Click commands installed via setup.py entry_points
90
+ # command = [sys.executable, '-m', 'your_package.cli'] + args
91
+
92
+ # For direct module execution
93
+ sub_command = [sys.executable, "-m", "runnable.cli"] + shlex.split(command)[1:]
94
+
95
+ process = subprocess.Popen(
96
+ sub_command,
97
+ stdout=subprocess.PIPE,
98
+ stderr=subprocess.STDOUT,
99
+ universal_newlines=True,
100
+ bufsize=1,
101
+ )
102
+
103
+ output = []
104
+ try:
105
+ while True:
106
+ line = process.stdout.readline() # type: ignore
107
+ if not line and process.poll() is not None:
108
+ break
109
+ print(line, end="")
110
+ output.append(line)
111
+ finally:
112
+ process.stdout.close() # type: ignore
113
+
114
+ if process.returncode != 0:
115
+ raise subprocess.CalledProcessError(
116
+ process.returncode, command, "".join(output)
117
+ )
118
+
119
+ return "".join(output)
@@ -0,0 +1,226 @@
1
+ import logging
2
+ import os
3
+ from typing import List, Optional
4
+
5
+ from pydantic import Field, PrivateAttr
6
+
7
+ from extensions.pipeline_executor import GenericPipelineExecutor
8
+ from runnable import console, defaults
9
+ from runnable.datastore import DataCatalog
10
+ from runnable.defaults import IterableParameterModel
11
+ from runnable.graph import Graph
12
+ from runnable.nodes import BaseNode
13
+
14
+ logger = logging.getLogger(defaults.LOGGER_NAME)
15
+
16
+
17
+ class LocalExecutor(GenericPipelineExecutor):
18
+ """
19
+ In the mode of local execution, we run everything on the local computer.
20
+
21
+ This has some serious implications on the amount of time it would take to complete the run.
22
+ Also ensure that the local compute is good enough for the compute to happen of all the steps.
23
+
24
+ Example config:
25
+
26
+ ```yaml
27
+ pipeline-executor:
28
+ type: local
29
+ config:
30
+ enable_parallel: false # Enable parallel execution for parallel/map nodes
31
+ ```
32
+
33
+ """
34
+
35
+ service_name: str = "local"
36
+ enable_parallel: bool = Field(default=False)
37
+
38
+ # TODO: Not fully done
39
+ object_serialisation: bool = Field(default=True)
40
+
41
+ _is_local: bool = PrivateAttr(default=True)
42
+
43
+ def execute_from_graph(
44
+ self,
45
+ node: BaseNode,
46
+ iter_variable: Optional[IterableParameterModel] = None,
47
+ ):
48
+ if not self.object_serialisation:
49
+ self._context.object_serialisation = False
50
+
51
+ super().execute_from_graph(node=node, iter_variable=iter_variable)
52
+
53
+ def trigger_node_execution(
54
+ self,
55
+ node: BaseNode,
56
+ iter_variable: Optional[IterableParameterModel] = None,
57
+ ):
58
+ """
59
+ In this mode of execution, we prepare for the node execution and execute the node
60
+
61
+ Args:
62
+ node (BaseNode): [description]
63
+ iter_variable (str, optional): [description]. Defaults to ''.
64
+ """
65
+ self.execute_node(node=node, iter_variable=iter_variable)
66
+
67
+ def execute_node(
68
+ self,
69
+ node: BaseNode,
70
+ iter_variable: Optional[IterableParameterModel] = None,
71
+ ):
72
+ """
73
+ For local execution, we just execute the node.
74
+
75
+ Args:
76
+ node (BaseNode): _description_
77
+ iter_variable (dict[str, str], optional): _description_. Defaults to None.
78
+ """
79
+ self._execute_node(node=node, iter_variable=iter_variable)
80
+
81
+ # ═══════════════════════════════════════════════════════════════
82
+ # Async Path - implement async methods for local execution
83
+ # ═══════════════════════════════════════════════════════════════
84
+
85
+ async def execute_graph_async(
86
+ self,
87
+ dag: Graph,
88
+ iter_variable: Optional[IterableParameterModel] = None,
89
+ ):
90
+ """Async graph traversal."""
91
+ current_node = dag.start_at
92
+ previous_node = None
93
+ logger.info(f"Running async execution with {current_node}")
94
+
95
+ branch_task_name: str = ""
96
+ if dag.internal_branch_name:
97
+ branch_task_name = BaseNode._resolve_map_placeholders(
98
+ dag.internal_branch_name or "Graph",
99
+ iter_variable,
100
+ )
101
+ console.print(
102
+ f":runner: Executing the branch {branch_task_name} ... ",
103
+ style="bold color(208)",
104
+ )
105
+
106
+ while True:
107
+ working_on = dag.get_node_by_name(current_node)
108
+ task_name = working_on._resolve_map_placeholders(
109
+ working_on.internal_name, iter_variable
110
+ )
111
+
112
+ if previous_node == current_node:
113
+ raise Exception("Potentially running in an infinite loop")
114
+ previous_node = current_node
115
+
116
+ try:
117
+ await self.execute_from_graph_async(
118
+ working_on, iter_variable=iter_variable
119
+ )
120
+ # Sync helper - no await needed
121
+ status, next_node_name = self._get_status_and_next_node_name(
122
+ current_node=working_on, dag=dag, iter_variable=iter_variable
123
+ )
124
+
125
+ if status == defaults.SUCCESS:
126
+ console.print(f":white_check_mark: Node {task_name} succeeded")
127
+ else:
128
+ console.print(f":x: Node {task_name} failed")
129
+ except Exception as e:
130
+ console.print(":x: Error during execution", style="bold red")
131
+ console.print(e, style=defaults.error_style)
132
+ logger.exception(e)
133
+ raise
134
+
135
+ console.rule(style="[dark orange]")
136
+
137
+ if working_on.node_type in ["success", "fail"]:
138
+ break
139
+ current_node = next_node_name
140
+
141
+ # Sync helper - no await needed
142
+ self._finalize_graph_execution(working_on, dag, iter_variable)
143
+
144
+ async def execute_from_graph_async(
145
+ self,
146
+ node: BaseNode,
147
+ iter_variable: Optional[IterableParameterModel] = None,
148
+ ):
149
+ """Async node execution entry point."""
150
+ if not self.object_serialisation:
151
+ self._context.object_serialisation = False
152
+
153
+ # Sync helper - no await needed
154
+ step_log = self._prepare_node_for_execution(node, iter_variable)
155
+ if step_log is None:
156
+ return # Skipped
157
+
158
+ logger.info(f"Executing node: {node.get_summary()}")
159
+
160
+ if node.node_type in ["success", "fail"]:
161
+ await self._execute_node_async(node, iter_variable=iter_variable)
162
+ return
163
+
164
+ if node.is_composite:
165
+ await node.execute_as_graph_async(iter_variable=iter_variable)
166
+ return
167
+
168
+ task_name = node._resolve_map_placeholders(node.internal_name, iter_variable)
169
+ console.print(
170
+ f":runner: Executing the node {task_name} ... ", style="bold color(208)"
171
+ )
172
+ await self.trigger_node_execution_async(node=node, iter_variable=iter_variable)
173
+
174
+ async def trigger_node_execution_async(
175
+ self,
176
+ node: BaseNode,
177
+ iter_variable: Optional[IterableParameterModel] = None,
178
+ ):
179
+ """Async trigger for node execution."""
180
+ await self._execute_node_async(node=node, iter_variable=iter_variable)
181
+
182
+ async def _execute_node_async(
183
+ self,
184
+ node: BaseNode,
185
+ iter_variable: Optional[IterableParameterModel] = None,
186
+ mock: bool = False,
187
+ ):
188
+ """Async node execution wrapper."""
189
+ current_attempt_number = self._calculate_attempt_number(node, iter_variable)
190
+ os.environ[defaults.ATTEMPT_NUMBER] = str(current_attempt_number)
191
+
192
+ logger.info(
193
+ f"Trying to execute node: {node.internal_name}, attempt: {current_attempt_number}"
194
+ )
195
+
196
+ self._context_node = node
197
+
198
+ # Sync - catalog get
199
+ data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(stage="get")
200
+ logger.debug(f"data_catalogs_get: {data_catalogs_get}")
201
+
202
+ # ASYNC - execute the node
203
+ step_log = await node.execute_async(
204
+ iter_variable=iter_variable,
205
+ attempt_number=current_attempt_number,
206
+ mock=mock,
207
+ )
208
+
209
+ # Sync - catalog put and finalization
210
+ allow_file_not_found_exc = step_log.status != defaults.SUCCESS
211
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
212
+ stage="put", allow_file_no_found_exc=allow_file_not_found_exc
213
+ )
214
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
215
+ step_log.add_data_catalogs(data_catalogs_put or [])
216
+ step_log.add_data_catalogs(data_catalogs_get or [])
217
+
218
+ console.print(f"Summary of the step: {step_log.internal_name}")
219
+ console.print(step_log.get_summary(), style=defaults.info_style)
220
+
221
+ self.add_task_log_to_catalog(
222
+ name=self._context_node.internal_name, iter_variable=iter_variable
223
+ )
224
+ self._context_node = None
225
+
226
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)