runnable 0.13.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. runnable/__init__.py +1 -12
  2. runnable/catalog.py +29 -5
  3. runnable/cli.py +268 -215
  4. runnable/context.py +10 -3
  5. runnable/datastore.py +212 -53
  6. runnable/defaults.py +13 -55
  7. runnable/entrypoints.py +270 -183
  8. runnable/exceptions.py +28 -2
  9. runnable/executor.py +133 -86
  10. runnable/graph.py +37 -13
  11. runnable/nodes.py +50 -22
  12. runnable/parameters.py +27 -8
  13. runnable/pickler.py +1 -1
  14. runnable/sdk.py +230 -66
  15. runnable/secrets.py +3 -1
  16. runnable/tasks.py +99 -41
  17. runnable/utils.py +59 -39
  18. {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/METADATA +28 -31
  19. runnable-0.16.0.dist-info/RECORD +23 -0
  20. {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/WHEEL +1 -1
  21. runnable-0.16.0.dist-info/entry_points.txt +45 -0
  22. runnable/extensions/__init__.py +0 -0
  23. runnable/extensions/catalog/__init__.py +0 -21
  24. runnable/extensions/catalog/file_system/__init__.py +0 -0
  25. runnable/extensions/catalog/file_system/implementation.py +0 -234
  26. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  27. runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
  28. runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
  29. runnable/extensions/executor/__init__.py +0 -649
  30. runnable/extensions/executor/argo/__init__.py +0 -0
  31. runnable/extensions/executor/argo/implementation.py +0 -1194
  32. runnable/extensions/executor/argo/specification.yaml +0 -51
  33. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  34. runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
  35. runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
  36. runnable/extensions/executor/local.py +0 -69
  37. runnable/extensions/executor/local_container/__init__.py +0 -0
  38. runnable/extensions/executor/local_container/implementation.py +0 -446
  39. runnable/extensions/executor/mocked/__init__.py +0 -0
  40. runnable/extensions/executor/mocked/implementation.py +0 -154
  41. runnable/extensions/executor/retry/__init__.py +0 -0
  42. runnable/extensions/executor/retry/implementation.py +0 -168
  43. runnable/extensions/nodes.py +0 -870
  44. runnable/extensions/run_log_store/__init__.py +0 -0
  45. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  46. runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
  47. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  48. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
  49. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
  50. runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
  51. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  52. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  53. runnable/extensions/run_log_store/file_system/implementation.py +0 -140
  54. runnable/extensions/run_log_store/generic_chunked.py +0 -557
  55. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  56. runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
  57. runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
  58. runnable/extensions/secrets/__init__.py +0 -0
  59. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  60. runnable/extensions/secrets/dotenv/implementation.py +0 -100
  61. runnable/integration.py +0 -192
  62. runnable-0.13.0.dist-info/RECORD +0 -63
  63. runnable-0.13.0.dist-info/entry_points.txt +0 -41
  64. {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info/licenses}/LICENSE +0 -0
runnable/entrypoints.py CHANGED
@@ -9,12 +9,16 @@ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
9
9
  from rich.table import Column
10
10
 
11
11
  import runnable.context as context
12
- from runnable import console, defaults, graph, task_console, utils
12
+ from runnable import console, defaults, graph, task_console, tasks, utils
13
13
  from runnable.defaults import RunnableConfig, ServiceConfig
14
+ from runnable.executor import BaseJobExecutor, BasePipelineExecutor
14
15
 
15
16
  logger = logging.getLogger(defaults.LOGGER_NAME)
16
17
 
17
18
 
19
+ print("") # removes the buffer print
20
+
21
+
18
22
  def get_default_configs() -> RunnableConfig:
19
23
  """
20
24
  User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
@@ -29,19 +33,18 @@ def get_default_configs() -> RunnableConfig:
29
33
  def prepare_configurations(
30
34
  run_id: str,
31
35
  configuration_file: str = "",
32
- pipeline_file: str = "",
33
36
  tag: str = "",
34
37
  parameters_file: str = "",
35
- force_local_executor: bool = False,
38
+ is_job: bool = False,
36
39
  ) -> context.Context:
37
40
  """
41
+ Sets up everything needed
38
42
  Replace the placeholders in the dag/config against the variables file.
39
43
 
40
44
  Attach the secrets_handler, run_log_store, catalog_handler to the executor and return it.
41
45
 
42
46
  Args:
43
47
  variables_file (str): The variables file, if used or None
44
- pipeline_file (str): The config/dag file
45
48
  run_id (str): The run id of the run.
46
49
  tag (str): If a tag is provided at the run time
47
50
 
@@ -53,45 +56,87 @@ def prepare_configurations(
53
56
  variables = utils.gather_variables()
54
57
 
55
58
  templated_configuration = {}
56
- configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
59
+ configuration_file = os.environ.get(
60
+ "RUNNABLE_CONFIGURATION_FILE", configuration_file
61
+ )
57
62
 
58
63
  if configuration_file:
59
- templated_configuration = utils.load_yaml(configuration_file) or {}
64
+ templated_configuration = utils.load_yaml(configuration_file)
60
65
 
61
- configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
66
+ # apply variables
67
+ configuration = cast(
68
+ RunnableConfig, utils.apply_variables(templated_configuration, variables)
69
+ )
70
+
71
+ # Since all the services (run_log_store, catalog, secrets, executor) are
72
+ # dynamically loaded via stevedore, we cannot validate the configuration
73
+ # before they are passed to the service.
62
74
 
63
75
  logger.info(f"Resolved configurations: {configuration}")
64
76
 
65
77
  # Run log settings, configuration over-rides everything
66
- run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
78
+ # The user config has run-log-store while internally we use run_log_store
79
+ run_log_config: Optional[ServiceConfig] = configuration.get("run-log-store", None) # type: ignore
67
80
  if not run_log_config:
68
- run_log_config = cast(ServiceConfig, runnable_defaults.get("run_log_store", defaults.DEFAULT_RUN_LOG_STORE))
81
+ run_log_config = cast(
82
+ ServiceConfig,
83
+ runnable_defaults.get("run-log-store", defaults.DEFAULT_RUN_LOG_STORE),
84
+ )
69
85
  run_log_store = utils.get_provider_by_name_and_type("run_log_store", run_log_config)
70
86
 
71
87
  # Catalog handler settings, configuration over-rides everything
72
88
  catalog_config: Optional[ServiceConfig] = configuration.get("catalog", None)
73
89
  if not catalog_config:
74
- catalog_config = cast(ServiceConfig, runnable_defaults.get("catalog", defaults.DEFAULT_CATALOG))
90
+ catalog_config = cast(
91
+ ServiceConfig, runnable_defaults.get("catalog", defaults.DEFAULT_CATALOG)
92
+ )
75
93
  catalog_handler = utils.get_provider_by_name_and_type("catalog", catalog_config)
76
94
 
77
95
  # Secret handler settings, configuration over-rides everything
78
96
  secrets_config: Optional[ServiceConfig] = configuration.get("secrets", None)
79
97
  if not secrets_config:
80
- secrets_config = cast(ServiceConfig, runnable_defaults.get("secrets", defaults.DEFAULT_SECRETS))
98
+ secrets_config = cast(
99
+ ServiceConfig, runnable_defaults.get("secrets", defaults.DEFAULT_SECRETS)
100
+ )
81
101
  secrets_handler = utils.get_provider_by_name_and_type("secrets", secrets_config)
82
102
 
83
103
  # pickler
84
- pickler_config = cast(ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER))
104
+ pickler_config = cast(
105
+ ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER)
106
+ )
85
107
  pickler_handler = utils.get_provider_by_name_and_type("pickler", pickler_config)
86
108
 
87
- # executor configurations, configuration over rides everything
88
- executor_config: Optional[ServiceConfig] = configuration.get("executor", None)
89
- if force_local_executor:
90
- executor_config = ServiceConfig(type="local", config={})
91
-
92
- if not executor_config:
93
- executor_config = cast(ServiceConfig, runnable_defaults.get("executor", defaults.DEFAULT_EXECUTOR))
94
- configured_executor = utils.get_provider_by_name_and_type("executor", executor_config)
109
+ if not is_job:
110
+ # executor configurations, configuration over rides everything
111
+ executor_config: Optional[ServiceConfig] = configuration.get(
112
+ "pipeline-executor", None
113
+ ) # type: ignore
114
+ # as pipeline-executor is not a valid key
115
+ if not executor_config:
116
+ executor_config = cast(
117
+ ServiceConfig,
118
+ runnable_defaults.get(
119
+ "pipeline-executor", defaults.DEFAULT_PIPELINE_EXECUTOR
120
+ ),
121
+ )
122
+ configured_executor = utils.get_provider_by_name_and_type(
123
+ "pipeline_executor", executor_config
124
+ )
125
+ else:
126
+ # executor configurations, configuration over rides everything
127
+ job_executor_config: Optional[ServiceConfig] = configuration.get(
128
+ "job-executor", None
129
+ ) # type: ignore
130
+ if not job_executor_config:
131
+ executor_config = cast(
132
+ ServiceConfig,
133
+ runnable_defaults.get("job-executor", defaults.DEFAULT_JOB_EXECUTOR),
134
+ )
135
+
136
+ assert job_executor_config, "Job executor is not provided"
137
+ configured_executor = utils.get_provider_by_name_and_type(
138
+ "job_executor", job_executor_config
139
+ )
95
140
 
96
141
  # Construct the context
97
142
  run_context = context.Context(
@@ -107,38 +152,45 @@ def prepare_configurations(
107
152
  parameters_file=parameters_file,
108
153
  )
109
154
 
110
- if pipeline_file:
111
- if pipeline_file.endswith(".py"):
112
- # converting a pipeline defined in python to a dag in yaml
113
- module_file = pipeline_file.strip(".py")
114
- module, func = utils.get_module_and_attr_names(module_file)
115
- sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
116
- imported_module = importlib.import_module(module)
155
+ context.run_context = run_context
117
156
 
118
- os.environ["RUNNABLE_PY_TO_YAML"] = "true"
119
- dag = getattr(imported_module, func)().return_dag()
157
+ return run_context
120
158
 
121
- else:
122
- pipeline_config = utils.load_yaml(pipeline_file)
123
159
 
124
- logger.info("The input pipeline:")
125
- logger.info(json.dumps(pipeline_config, indent=4))
160
+ def set_pipeline_spec_from_yaml(run_context: context.Context, pipeline_file: str):
161
+ """
162
+ Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
163
+ """
164
+ pipeline_config = utils.load_yaml(pipeline_file)
165
+ logger.info("The input pipeline:")
166
+ logger.info(json.dumps(pipeline_config, indent=4))
126
167
 
127
- dag_config = pipeline_config["dag"]
168
+ dag_config = pipeline_config["dag"]
128
169
 
129
- dag_hash = utils.get_dag_hash(dag_config)
130
- dag = graph.create_graph(dag_config)
131
- run_context.dag_hash = dag_hash
170
+ dag_hash = utils.get_dag_hash(dag_config)
171
+ dag = graph.create_graph(dag_config)
172
+ run_context.dag_hash = dag_hash
132
173
 
133
- run_context.pipeline_file = pipeline_file
134
- run_context.dag = dag
174
+ run_context.pipeline_file = pipeline_file
175
+ run_context.dag = dag
135
176
 
136
- context.run_context = run_context
137
177
 
138
- return run_context
178
+ def set_pipeline_spec_from_python(run_context: context.Context, python_module: str):
179
+ # Call the SDK to get the dag
180
+ # Import the module and call the function to get the dag
181
+ module_file = python_module.strip(".py")
182
+ module, func = utils.get_module_and_attr_names(module_file)
183
+ sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
184
+ imported_module = importlib.import_module(module)
139
185
 
186
+ run_context.from_sdk = True
187
+ dag = getattr(imported_module, func)().return_dag()
140
188
 
141
- def execute(
189
+ run_context.pipeline_file = python_module
190
+ run_context.dag = dag
191
+
192
+
193
+ def execute_pipeline_yaml_spec(
142
194
  pipeline_file: str,
143
195
  configuration_file: str = "",
144
196
  tag: str = "",
@@ -147,66 +199,82 @@ def execute(
147
199
  ):
148
200
  # pylint: disable=R0914,R0913
149
201
  """
150
- The entry point to runnable execution. This method would prepare the configurations and delegates traversal to the
151
- executor
152
-
153
- Args:
154
- pipeline_file (str): The config/dag file
155
- run_id (str): The run id of the run.
156
- tag (str): If a tag is provided at the run time
157
- parameters_file (str): The parameters being sent in to the application
202
+ The entry point to runnable execution for any YAML based spec.
203
+ The result could:
204
+ - Execution of the pipeline if its local executor
205
+ - Rendering of the spec in the case of non local executor
158
206
  """
159
207
  run_id = utils.generate_run_id(run_id=run_id)
160
208
 
161
209
  run_context = prepare_configurations(
162
210
  configuration_file=configuration_file,
163
- pipeline_file=pipeline_file,
164
211
  run_id=run_id,
165
212
  tag=tag,
166
213
  parameters_file=parameters_file,
167
214
  )
168
215
 
169
- console.print("Working with context:")
170
- console.print(run_context)
171
- console.rule(style="[dark orange]")
216
+ assert isinstance(run_context.executor, BasePipelineExecutor)
172
217
 
218
+ set_pipeline_spec_from_yaml(run_context, pipeline_file)
173
219
  executor = run_context.executor
174
220
 
175
- run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
176
-
177
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
221
+ utils.set_runnable_environment_variables(
222
+ run_id=run_id, configuration_file=configuration_file, tag=tag
223
+ )
178
224
 
179
225
  # Prepare for graph execution
180
- executor.prepare_for_graph_execution()
226
+ executor._set_up_run_log(exists_ok=False)
227
+
228
+ console.print("Working with context:")
229
+ console.print(run_context)
230
+ console.rule(style="[dark orange]")
181
231
 
182
232
  logger.info(f"Executing the graph: {run_context.dag}")
183
233
  with Progress(
184
- TextColumn("[progress.description]{task.description}", table_column=Column(ratio=2)),
234
+ TextColumn(
235
+ "[progress.description]{task.description}", table_column=Column(ratio=2)
236
+ ),
185
237
  BarColumn(table_column=Column(ratio=1), style="dark_orange"),
186
238
  TimeElapsedColumn(table_column=Column(ratio=1)),
187
239
  console=console,
188
240
  expand=True,
189
241
  ) as progress:
190
- pipeline_execution_task = progress.add_task("[dark_orange] Starting execution .. ", total=1)
242
+ pipeline_execution_task = progress.add_task(
243
+ "[dark_orange] Starting execution .. ", total=1
244
+ )
191
245
  try:
192
246
  run_context.progress = progress
193
247
  executor.execute_graph(dag=run_context.dag) # type: ignore
194
248
 
195
- # Non local executors have no run logs
196
- if not executor._local:
249
+ if not executor._is_local:
250
+ # Non local executors only traverse the graph and do not execute the nodes
197
251
  executor.send_return_code(stage="traversal")
198
252
  return
199
253
 
200
- run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False)
254
+ run_log = run_context.run_log_store.get_run_log_by_id(
255
+ run_id=run_context.run_id, full=False
256
+ )
201
257
 
202
258
  if run_log.status == defaults.SUCCESS:
203
- progress.update(pipeline_execution_task, description="[green] Success", completed=True)
259
+ progress.update(
260
+ pipeline_execution_task,
261
+ description="[green] Success",
262
+ completed=True,
263
+ )
204
264
  else:
205
- progress.update(pipeline_execution_task, description="[red] Failed", completed=True)
265
+ progress.update(
266
+ pipeline_execution_task, description="[red] Failed", completed=True
267
+ )
206
268
  except Exception as e: # noqa: E722
207
269
  console.print(e, style=defaults.error_style)
208
- progress.update(pipeline_execution_task, description="[red] Errored execution", completed=True)
209
- run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False)
270
+ progress.update(
271
+ pipeline_execution_task,
272
+ description="[red] Errored execution",
273
+ completed=True,
274
+ )
275
+ run_log = run_context.run_log_store.get_run_log_by_id(
276
+ run_id=run_context.run_id, full=False
277
+ )
210
278
  run_log.status = defaults.FAIL
211
279
  run_context.run_log_store.add_branch_log(run_log, run_context.run_id)
212
280
  raise e
@@ -219,62 +287,64 @@ def execute_single_node(
219
287
  pipeline_file: str,
220
288
  step_name: str,
221
289
  map_variable: str,
290
+ mode: str,
222
291
  run_id: str,
223
292
  tag: str = "",
224
293
  parameters_file: str = "",
225
294
  ):
226
295
  """
227
- The entry point into executing a single node of runnable. Orchestration modes should extensively use this
228
- entry point.
229
-
230
- It should have similar set up of configurations to execute because orchestrator modes can initiate the execution.
231
-
232
- Args:
233
- variables_file (str): The variables file, if used or None
234
- step_name : The name of the step to execute in dot path convention
235
- pipeline_file (str): The config/dag file
236
- run_id (str): The run id of the run.
237
- tag (str): If a tag is provided at the run time
238
- parameters_file (str): The parameters being sent in to the application
296
+ This entry point is triggered during the execution of the pipeline
297
+ - non local execution environments
239
298
 
299
+ The mode defines how the pipeline spec is provided to the runnable
300
+ - yaml
301
+ - python
240
302
  """
241
303
  from runnable import nodes
242
304
 
243
- task_console.print(f"Executing the single node: {step_name} with map variable: {map_variable}")
305
+ task_console.print(
306
+ f"Executing the single node: {step_name} with map variable: {map_variable}"
307
+ )
244
308
 
245
- configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
309
+ configuration_file = os.environ.get(
310
+ "RUNNABLE_CONFIGURATION_FILE", configuration_file
311
+ )
246
312
 
247
313
  run_context = prepare_configurations(
248
314
  configuration_file=configuration_file,
249
- pipeline_file=pipeline_file,
250
315
  run_id=run_id,
251
316
  tag=tag,
252
317
  parameters_file=parameters_file,
253
318
  )
319
+ assert isinstance(run_context.executor, BasePipelineExecutor)
320
+
321
+ if mode == "yaml":
322
+ # Load the yaml file
323
+ set_pipeline_spec_from_yaml(run_context, pipeline_file)
324
+ elif mode == "python":
325
+ # Call the SDK to get the dag
326
+ set_pipeline_spec_from_python(run_context, pipeline_file)
327
+
328
+ assert run_context.dag
329
+
254
330
  task_console.print("Working with context:")
255
331
  task_console.print(run_context)
256
332
  task_console.rule(style="[dark orange]")
257
333
 
258
334
  executor = run_context.executor
259
- run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
260
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
261
-
262
- executor.prepare_for_node_execution()
263
-
264
- # TODO: may be make its own entry point
265
- # if not run_context.dag:
266
- # # There are a few entry points that make graph dynamically and do not have a dag defined statically.
267
- # run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_id, full=False)
268
- # run_context.dag = graph.create_graph(run_log.run_config["pipeline"])
269
- assert run_context.dag
335
+ utils.set_runnable_environment_variables(
336
+ run_id=run_id, configuration_file=configuration_file, tag=tag
337
+ )
270
338
 
271
339
  map_variable_dict = utils.json_to_ordered_dict(map_variable)
272
340
 
273
341
  step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
274
- node_to_execute, _ = graph.search_node_by_internal_name(run_context.dag, step_internal_name)
342
+ node_to_execute, _ = graph.search_node_by_internal_name(
343
+ run_context.dag, step_internal_name
344
+ )
275
345
 
276
346
  logger.info("Executing the single node of : %s", node_to_execute)
277
- ## This step is where we save the log file
347
+ ## This step is where we save output of the function/shell command
278
348
  try:
279
349
  executor.execute_node(node=node_to_execute, map_variable=map_variable_dict)
280
350
  finally:
@@ -288,23 +358,15 @@ def execute_single_node(
288
358
  run_context.catalog_handler.put(name=log_file_name, run_id=run_context.run_id)
289
359
  os.remove(log_file_name)
290
360
 
291
- # executor.send_return_code(stage="execution")
292
-
293
361
 
294
- def execute_notebook(
295
- entrypoint: str,
296
- notebook_file: str,
297
- catalog_config: dict,
298
- configuration_file: str,
299
- notebook_output_path: str = "",
362
+ def execute_job_yaml_spec(
363
+ job_definition_file: str,
364
+ configuration_file: str = "",
300
365
  tag: str = "",
301
366
  run_id: str = "",
302
367
  parameters_file: str = "",
303
368
  ):
304
- """
305
- The entry point to runnable execution of a notebook. This method would prepare the configurations and
306
- delegates traversal to the executor
307
- """
369
+ # A job and task are internally the same.
308
370
  run_id = utils.generate_run_id(run_id=run_id)
309
371
 
310
372
  run_context = prepare_configurations(
@@ -312,61 +374,87 @@ def execute_notebook(
312
374
  run_id=run_id,
313
375
  tag=tag,
314
376
  parameters_file=parameters_file,
377
+ is_job=True,
315
378
  )
316
379
 
380
+ assert isinstance(run_context.executor, BaseJobExecutor)
381
+
317
382
  executor = run_context.executor
318
- run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
319
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
383
+ utils.set_runnable_environment_variables(
384
+ run_id=run_id, configuration_file=configuration_file, tag=tag
385
+ )
386
+
387
+ run_context.job_definition_file = job_definition_file
388
+
389
+ job_config = utils.load_yaml(job_definition_file)
390
+ logger.info(
391
+ "Executing the job from the user."
392
+ f"job definition: {job_definition_file}, config: {job_config}"
393
+ )
394
+ assert job_config.get("type"), "Job type is not provided"
320
395
 
321
396
  console.print("Working with context:")
322
397
  console.print(run_context)
323
398
  console.rule(style="[dark orange]")
324
399
 
325
- step_config = {
326
- "command": notebook_file,
327
- "command_type": "notebook",
328
- "notebook_output_path": notebook_output_path,
329
- "type": "task",
330
- "next": "success",
331
- "catalog": catalog_config,
332
- }
333
- node = graph.create_node(name="executing job", step_config=step_config)
400
+ # A hack where we create a task node and get our job/catalog settings
401
+ catalog_config: list[str] = job_config.pop("catalog", {})
334
402
 
335
- if entrypoint == defaults.ENTRYPOINT.USER.value:
336
- # Prepare for graph execution
337
- executor.prepare_for_graph_execution()
403
+ # rename the type to command_type of task
404
+ job_config["command_type"] = job_config.pop("type")
405
+ job = tasks.create_task(job_config)
338
406
 
339
- logger.info("Executing the job from the user. We are still in the caller's compute environment")
340
- executor.execute_job(node=node)
407
+ logger.info(
408
+ "Executing the job from the user. We are still in the caller's compute environment"
409
+ )
341
410
 
342
- elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value:
343
- executor.prepare_for_node_execution()
344
- logger.info("Executing the job from the system. We are in the config's compute environment")
345
- executor.execute_node(node=node)
411
+ assert isinstance(executor, BaseJobExecutor)
412
+ executor.submit_job(job, catalog_settings=catalog_config)
346
413
 
347
- # Update the status of the run log
348
- step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id)
349
- run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status)
414
+ executor.send_return_code()
350
415
 
351
- else:
352
- raise ValueError(f"Invalid entrypoint {entrypoint}")
353
416
 
354
- executor.send_return_code()
417
+ def set_job_spec_from_yaml(run_context: context.Context, job_definition_file: str):
418
+ """
419
+ Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
420
+ """
421
+ job_config = utils.load_yaml(job_definition_file)
422
+ logger.info("The input job definition file:")
423
+ logger.info(json.dumps(job_config, indent=4))
355
424
 
425
+ catalog_config: list[str] = job_config.pop("catalog", {})
356
426
 
357
- def execute_function(
358
- entrypoint: str,
359
- command: str,
360
- catalog_config: dict,
361
- configuration_file: str,
427
+ job_config["command_type"] = job_config.pop("type")
428
+
429
+ run_context.job_definition_file = job_definition_file
430
+ run_context.job = tasks.create_task(job_config)
431
+ run_context.job_catalog_settings = catalog_config
432
+
433
+
434
+ def set_job_spec_from_python(run_context: context.Context, python_module: str):
435
+ # Import the module and call the function to get the task
436
+ module_file = python_module.strip(".py")
437
+ module, func = utils.get_module_and_attr_names(module_file)
438
+ sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
439
+ imported_module = importlib.import_module(module)
440
+
441
+ run_context.from_sdk = True
442
+ task = getattr(imported_module, func)().return_task()
443
+ catalog_settings = getattr(imported_module, func)().return_catalog_settings()
444
+
445
+ run_context.job_definition_file = python_module
446
+ run_context.job = task
447
+ run_context.job_catalog_settings = catalog_settings
448
+
449
+
450
+ def execute_job_non_local(
451
+ job_definition_file: str,
452
+ configuration_file: str = "",
453
+ mode: str = "yaml",
362
454
  tag: str = "",
363
455
  run_id: str = "",
364
456
  parameters_file: str = "",
365
457
  ):
366
- """
367
- The entry point to runnable execution of a function. This method would prepare the configurations and
368
- delegates traversal to the executor
369
- """
370
458
  run_id = utils.generate_run_id(run_id=run_id)
371
459
 
372
460
  run_context = prepare_configurations(
@@ -374,47 +462,33 @@ def execute_function(
374
462
  run_id=run_id,
375
463
  tag=tag,
376
464
  parameters_file=parameters_file,
465
+ is_job=True,
377
466
  )
378
467
 
379
- executor = run_context.executor
468
+ assert isinstance(run_context.executor, BaseJobExecutor)
380
469
 
381
- run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
382
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
470
+ if mode == "yaml":
471
+ # Load the yaml file
472
+ set_job_spec_from_yaml(run_context, job_definition_file)
473
+ elif mode == "python":
474
+ # Call the SDK to get the task
475
+ set_job_spec_from_python(run_context, job_definition_file)
476
+
477
+ assert run_context.job
383
478
 
384
479
  console.print("Working with context:")
385
480
  console.print(run_context)
386
481
  console.rule(style="[dark orange]")
387
482
 
388
- # Prepare the graph with a single node
389
- step_config = {
390
- "command": command,
391
- "command_type": "python",
392
- "type": "task",
393
- "next": "success",
394
- "catalog": catalog_config,
395
- }
396
- node = graph.create_node(name="executing job", step_config=step_config)
397
-
398
- if entrypoint == defaults.ENTRYPOINT.USER.value:
399
- # Prepare for graph execution
400
- executor.prepare_for_graph_execution()
401
-
402
- logger.info("Executing the job from the user. We are still in the caller's compute environment")
403
- executor.execute_job(node=node)
404
-
405
- elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value:
406
- executor.prepare_for_node_execution()
407
- logger.info("Executing the job from the system. We are in the config's compute environment")
408
- executor.execute_node(node=node)
409
-
410
- # Update the status of the run log
411
- step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id)
412
- run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status)
483
+ logger.info(
484
+ "Executing the job from the user. We are still in the caller's compute environment"
485
+ )
413
486
 
414
- else:
415
- raise ValueError(f"Invalid entrypoint {entrypoint}")
487
+ run_context.executor.execute_job(
488
+ run_context.job, catalog_settings=run_context.job_catalog_settings
489
+ )
416
490
 
417
- executor.send_return_code()
491
+ run_context.executor.send_return_code()
418
492
 
419
493
 
420
494
  def fan(
@@ -444,27 +518,40 @@ def fan(
444
518
  """
445
519
  from runnable import nodes
446
520
 
447
- configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
521
+ configuration_file = os.environ.get(
522
+ "RUNNABLE_CONFIGURATION_FILE", configuration_file
523
+ )
448
524
 
449
525
  run_context = prepare_configurations(
450
526
  configuration_file=configuration_file,
451
- pipeline_file=pipeline_file,
452
527
  run_id=run_id,
453
528
  tag=tag,
454
529
  parameters_file=parameters_file,
455
530
  )
531
+
532
+ assert isinstance(run_context.executor, BasePipelineExecutor)
533
+
534
+ if mode == "yaml":
535
+ # Load the yaml file
536
+ set_pipeline_spec_from_yaml(run_context, pipeline_file)
537
+ elif mode == "python":
538
+ # Call the SDK to get the dag
539
+ set_pipeline_spec_from_python(run_context, pipeline_file)
540
+
456
541
  console.print("Working with context:")
457
542
  console.print(run_context)
458
543
  console.rule(style="[dark orange]")
459
544
 
460
545
  executor = run_context.executor
461
- run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
462
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
463
-
464
- executor.prepare_for_node_execution()
546
+ utils.set_runnable_environment_variables(
547
+ run_id=run_id, configuration_file=configuration_file, tag=tag
548
+ )
465
549
 
466
550
  step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
467
- node_to_execute, _ = graph.search_node_by_internal_name(run_context.dag, step_internal_name) # type: ignore
551
+ node_to_execute, _ = graph.search_node_by_internal_name(
552
+ run_context.dag, # type: ignore
553
+ step_internal_name,
554
+ )
468
555
 
469
556
  map_variable_dict = utils.json_to_ordered_dict(map_variable)
470
557
 
@@ -478,6 +565,6 @@ def fan(
478
565
  raise ValueError(f"Invalid mode {mode}")
479
566
 
480
567
 
481
- if __name__ == "__main__":
482
- # This is only for perf testing purposes.
483
- prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")
568
+ # if __name__ == "__main__":
569
+ # # This is only for perf testing purposes.
570
+ # prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")