runnable 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
runnable/sdk.py CHANGED
@@ -3,11 +3,10 @@ from __future__ import annotations
3
3
  import logging
4
4
  import os
5
5
  from abc import ABC, abstractmethod
6
- from typing import Any, Dict, List, Optional, Union
6
+ from typing import Any, Callable, Dict, List, Optional, Union
7
7
 
8
- from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, field_validator, model_validator
8
+ from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, model_validator
9
9
  from rich import print
10
- from ruamel.yaml import YAML
11
10
  from typing_extensions import Self
12
11
 
13
12
  from runnable import defaults, entrypoints, graph, utils
@@ -16,11 +15,8 @@ from runnable.nodes import TraversalNode
16
15
 
17
16
  logger = logging.getLogger(defaults.LOGGER_NAME)
18
17
 
19
- StepType = Union["Stub", "Task", "Success", "Fail", "Parallel", "Map"]
20
- TraversalTypes = Union["Stub", "Task", "Parallel", "Map"]
21
-
22
-
23
- ALLOWED_COMMAND_TYPES = ["shell", "python", "notebook"]
18
+ StepType = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Success", "Fail", "Parallel", "Map"]
19
+ TraversalTypes = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Parallel", "Map"]
24
20
 
25
21
 
26
22
  class Catalog(BaseModel):
@@ -33,7 +29,7 @@ class Catalog(BaseModel):
33
29
  put (List[str]): List of glob patterns to put into central catalog from the compute data folder.
34
30
 
35
31
  Examples:
36
- >>> from magnus import Catalog, Task
32
+ >>> from runnable import Catalog, Task
37
33
  >>> catalog = Catalog(compute_data_folder="/path/to/data", get=["*.csv"], put=["*.csv"])
38
34
 
39
35
  >>> task = Task(name="task", catalog=catalog, command="echo 'hello'")
@@ -107,7 +103,7 @@ class BaseTraversal(ABC, BaseModel):
107
103
  ...
108
104
 
109
105
 
110
- class Task(BaseTraversal):
106
+ class BaseTask(BaseTraversal):
111
107
  """
112
108
  An execution node of the pipeline.
113
109
  Please refer to [concepts](concepts/task.md) for more information.
@@ -133,10 +129,10 @@ class Task(BaseTraversal):
133
129
  executor:
134
130
  type: local-container
135
131
  config:
136
- docker_image: "magnus/magnus:latest"
132
+ docker_image: "runnable/runnable:latest"
137
133
  overrides:
138
134
  custom_docker_image:
139
- docker_image: "magnus/magnus:custom"
135
+ docker_image: "runnable/runnable:custom"
140
136
  ```
141
137
  ### Task specific configuration
142
138
  ```python
@@ -148,48 +144,173 @@ class Task(BaseTraversal):
148
144
  optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
149
145
  Only used when command_type is 'notebook', defaults to {}
150
146
  output_cell_tag (Optional[str]): The tag of the output cell.
151
- Only used when command_type is 'notebook', defaults to "magnus_output"
147
+ Only used when command_type is 'notebook', defaults to "runnable_output"
152
148
  terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
153
149
  terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
154
150
  on_failure (str): The name of the node to execute if the step fails.
155
151
 
156
152
  """
157
153
 
158
- command: str = Field(alias="command")
159
- command_type: str = Field(default="python")
160
154
  catalog: Optional[Catalog] = Field(default=None, alias="catalog")
161
155
  overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides")
162
156
 
157
+ def create_node(self) -> TaskNode:
158
+ if not self.next_node:
159
+ if not (self.terminate_with_failure or self.terminate_with_success):
160
+ raise AssertionError("A node not being terminated must have a user defined next node")
161
+
162
+ print(self.model_dump(exclude_none=True))
163
+ return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
164
+
165
+
166
+ class PythonTask(BaseTask):
167
+ """
168
+ An execution node of the pipeline of python functions.
169
+ Please refer to [concepts](concepts/task.md) for more information.
170
+
171
+ Attributes:
172
+ name (str): The name of the node.
173
+ function (callable): The function to execute.
174
+ catalog (Optional[Catalog]): The catalog to sync data from/to.
175
+ Please see Catalog about the structure of the catalog.
176
+ overrides (Dict[str, Any]): Any overrides to the command.
177
+ Individual tasks can override the global configuration config by referring to the
178
+ specific override.
179
+
180
+ For example,
181
+ ### Global configuration
182
+ ```yaml
183
+ executor:
184
+ type: local-container
185
+ config:
186
+ docker_image: "runnable/runnable:latest"
187
+ overrides:
188
+ custom_docker_image:
189
+ docker_image: "runnable/runnable:custom"
190
+ ```
191
+ ### Task specific configuration
192
+ ```python
193
+ task = PythonTask(name="task", function="function'",
194
+ overrides={'local-container': custom_docker_image})
195
+ ```
196
+
197
+ terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
198
+ terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
199
+ on_failure (str): The name of the node to execute if the step fails.
200
+
201
+ """
202
+
203
+ function: Callable = Field(exclude=True)
204
+
205
+ @computed_field
206
+ def command_type(self) -> str:
207
+ return "python"
208
+
209
+ @computed_field
210
+ def command(self) -> str:
211
+ module = self.function.__module__
212
+ name = self.function.__name__
213
+
214
+ return f"{module}.{name}"
215
+
216
+
217
+ class NotebookTask(BaseTask):
218
+ """
219
+ An execution node of the pipeline of type notebook.
220
+ Please refer to [concepts](concepts/task.md) for more information.
221
+
222
+ Attributes:
223
+ name (str): The name of the node.
224
+ notebook: The path to the notebook
225
+ catalog (Optional[Catalog]): The catalog to sync data from/to.
226
+ Please see Catalog about the structure of the catalog.
227
+ returns: A list of the names of variables to return from the notebook.
228
+ overrides (Dict[str, Any]): Any overrides to the command.
229
+ Individual tasks can override the global configuration config by referring to the
230
+ specific override.
231
+
232
+ For example,
233
+ ### Global configuration
234
+ ```yaml
235
+ executor:
236
+ type: local-container
237
+ config:
238
+ docker_image: "runnable/runnable:latest"
239
+ overrides:
240
+ custom_docker_image:
241
+ docker_image: "runnable/runnable:custom"
242
+ ```
243
+ ### Task specific configuration
244
+ ```python
245
+ task = NotebookTask(name="task", notebook="evaluation.ipynb",
246
+ overrides={'local-container': custom_docker_image})
247
+ ```
248
+ notebook_output_path (Optional[str]): The path to save the notebook output.
249
+ Only used when command_type is 'notebook', defaults to command+_out.ipynb
250
+ optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
251
+ Only used when command_type is 'notebook', defaults to {}
252
+
253
+ terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
254
+ terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
255
+ on_failure (str): The name of the node to execute if the step fails.
256
+
257
+ """
258
+
259
+ notebook: str = Field(alias="command")
260
+
163
261
  notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path")
164
262
  optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args")
165
- output_cell_tag: Optional[str] = Field(default=None, alias="output_cell_tag")
263
+ returns: List[str] = Field(default_factory=list, alias="returns")
166
264
 
167
- @field_validator("command_type", mode="before")
168
- @classmethod
169
- def validate_command_type(cls, value: str) -> str:
170
- if value not in ALLOWED_COMMAND_TYPES:
171
- raise ValueError(f"Invalid command_type: {value}")
172
- return value
265
+ @computed_field
266
+ def command_type(self) -> str:
267
+ return "notebook"
173
268
 
174
- @model_validator(mode="after")
175
- def check_notebook_args(self) -> "Task":
176
- if self.command_type != "notebook":
177
- assert (
178
- self.notebook_output_path is None
179
- ), "Only command_types of 'notebook' can be used with notebook_output_path"
180
269
 
181
- assert (
182
- self.optional_ploomber_args is None
183
- ), "Only command_types of 'notebook' can be used with optional_ploomber_args"
270
+ class ShellTask(BaseTask):
271
+ """
272
+ An execution node of the pipeline of type shell.
273
+ Please refer to [concepts](concepts/task.md) for more information.
184
274
 
185
- assert self.output_cell_tag is None, "Only command_types of 'notebook' can be used with output_cell_tag"
186
- return self
275
+ Attributes:
276
+ name (str): The name of the node.
277
+ command: The shell command to execute.
278
+ catalog (Optional[Catalog]): The catalog to sync data from/to.
279
+ Please see Catalog about the structure of the catalog.
280
+ returns: A list of the names of variables to capture from environment variables of shell.
281
+ overrides (Dict[str, Any]): Any overrides to the command.
282
+ Individual tasks can override the global configuration config by referring to the
283
+ specific override.
187
284
 
188
- def create_node(self) -> TaskNode:
189
- if not self.next_node:
190
- if not (self.terminate_with_failure or self.terminate_with_success):
191
- raise AssertionError("A node not being terminated must have a user defined next node")
192
- return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
285
+ For example,
286
+ ### Global configuration
287
+ ```yaml
288
+ executor:
289
+ type: local-container
290
+ config:
291
+ docker_image: "runnable/runnable:latest"
292
+ overrides:
293
+ custom_docker_image:
294
+ docker_image: "runnable/runnable:custom"
295
+ ```
296
+ ### Task specific configuration
297
+ ```python
298
+ task = ShellTask(name="task", command="exit 0",
299
+ overrides={'local-container': custom_docker_image})
300
+ ```
301
+
302
+ terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
303
+ terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
304
+ on_failure (str): The name of the node to execute if the step fails.
305
+
306
+ """
307
+
308
+ command: str = Field(alias="command")
309
+ returns: List[str] = Field(default_factory=list, alias="returns")
310
+
311
+ @computed_field
312
+ def command_type(self) -> str:
313
+ return "shell"
193
314
 
194
315
 
195
316
  class Stub(BaseTraversal):
@@ -341,7 +462,8 @@ class Pipeline(BaseModel):
341
462
  A Pipeline is a directed acyclic graph of Steps that define a workflow.
342
463
 
343
464
  Attributes:
344
- steps (List[Stub | Task | Parallel | Map | Success | Fail]): A list of Steps that make up the Pipeline.
465
+ steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map | Success | Fail]):
466
+ A list of Steps that make up the Pipeline.
345
467
  start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline.
346
468
  name (str, optional): The name of the Pipeline. Defaults to "".
347
469
  description (str, optional): A description of the Pipeline. Defaults to "".
@@ -385,6 +507,9 @@ class Pipeline(BaseModel):
385
507
 
386
508
  self._dag.check_graph()
387
509
 
510
+ def return_dag(self) -> graph.Graph:
511
+ return self._dag
512
+
388
513
  def execute(
389
514
  self,
390
515
  configuration_file: str = "",
@@ -393,7 +518,6 @@ class Pipeline(BaseModel):
393
518
  parameters_file: str = "",
394
519
  use_cached: str = "",
395
520
  log_level: str = defaults.LOG_LEVEL,
396
- output_pipeline_definition: str = "magnus-pipeline.yaml",
397
521
  ):
398
522
  """
399
523
  *Execute* the Pipeline.
@@ -408,7 +532,7 @@ class Pipeline(BaseModel):
408
532
 
409
533
  Args:
410
534
  configuration_file (str, optional): The path to the configuration file. Defaults to "".
411
- The configuration file can be overridden by the environment variable MAGNUS_CONFIGURATION_FILE.
535
+ The configuration file can be overridden by the environment variable runnable_CONFIGURATION_FILE.
412
536
 
413
537
  run_id (str, optional): The ID of the run. Defaults to "".
414
538
  tag (str, optional): The tag of the run. Defaults to "".
@@ -419,18 +543,18 @@ class Pipeline(BaseModel):
419
543
  Provide the run_id of the older execution to recover.
420
544
 
421
545
  log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL.
422
- output_pipeline_definition (str, optional): The path to the output pipeline definition file.
423
- Defaults to "magnus-pipeline.yaml".
424
-
425
- Only applicable for the execution via SDK for non ```local``` executors.
426
546
  """
427
- from runnable.extensions.executor.local.implementation import LocalExecutor
428
- from runnable.extensions.executor.mocked.implementation import MockedExecutor
547
+
548
+ # py_to_yaml is used by non local executors to generate the yaml representation of the pipeline.
549
+ py_to_yaml = os.environ.get("RUNNABLE_PY_TO_YAML", "false")
550
+
551
+ if py_to_yaml == "true":
552
+ return
429
553
 
430
554
  logger.setLevel(log_level)
431
555
 
432
556
  run_id = utils.generate_run_id(run_id=run_id)
433
- configuration_file = os.environ.get("MAGNUS_CONFIGURATION_FILE", configuration_file)
557
+ configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
434
558
  run_context = entrypoints.prepare_configurations(
435
559
  configuration_file=configuration_file,
436
560
  run_id=run_id,
@@ -440,7 +564,7 @@ class Pipeline(BaseModel):
440
564
  )
441
565
 
442
566
  run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
443
- utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
567
+ utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
444
568
 
445
569
  dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
446
570
 
@@ -449,17 +573,14 @@ class Pipeline(BaseModel):
449
573
  print("Working with context:")
450
574
  print(run_context)
451
575
 
452
- if not (isinstance(run_context.executor, LocalExecutor) or isinstance(run_context.executor, MockedExecutor)):
453
- logger.debug(run_context.dag.model_dump(by_alias=True))
454
- yaml = YAML()
576
+ if not run_context.executor._local:
577
+ # We are working with non local executor
578
+ import inspect
455
579
 
456
- with open(output_pipeline_definition, "w", encoding="utf-8") as f:
457
- yaml.dump(
458
- {"dag": run_context.dag.model_dump(by_alias=True, exclude_none=True)},
459
- f,
460
- )
580
+ caller_stack = inspect.stack()[1]
581
+ module_to_call = f"{caller_stack.filename.replace('/', '.').replace('.py', '')}.{caller_stack.function}"
461
582
 
462
- return
583
+ run_context.pipeline_file = f"{module_to_call}.py"
463
584
 
464
585
  # Prepare for graph execution
465
586
  run_context.executor.prepare_for_graph_execution()
@@ -467,4 +588,5 @@ class Pipeline(BaseModel):
467
588
  logger.info("Executing the graph")
468
589
  run_context.executor.execute_graph(dag=run_context.dag)
469
590
 
470
- return run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id)
591
+ if run_context.executor._local:
592
+ return run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id)