runnable 0.13.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. runnable/__init__.py +1 -12
  2. runnable/catalog.py +29 -5
  3. runnable/cli.py +268 -215
  4. runnable/context.py +10 -3
  5. runnable/datastore.py +212 -53
  6. runnable/defaults.py +13 -55
  7. runnable/entrypoints.py +270 -183
  8. runnable/exceptions.py +28 -2
  9. runnable/executor.py +133 -86
  10. runnable/graph.py +37 -13
  11. runnable/nodes.py +50 -22
  12. runnable/parameters.py +27 -8
  13. runnable/pickler.py +1 -1
  14. runnable/sdk.py +230 -66
  15. runnable/secrets.py +3 -1
  16. runnable/tasks.py +99 -41
  17. runnable/utils.py +59 -39
  18. {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/METADATA +28 -31
  19. runnable-0.16.0.dist-info/RECORD +23 -0
  20. {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/WHEEL +1 -1
  21. runnable-0.16.0.dist-info/entry_points.txt +45 -0
  22. runnable/extensions/__init__.py +0 -0
  23. runnable/extensions/catalog/__init__.py +0 -21
  24. runnable/extensions/catalog/file_system/__init__.py +0 -0
  25. runnable/extensions/catalog/file_system/implementation.py +0 -234
  26. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  27. runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
  28. runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
  29. runnable/extensions/executor/__init__.py +0 -649
  30. runnable/extensions/executor/argo/__init__.py +0 -0
  31. runnable/extensions/executor/argo/implementation.py +0 -1194
  32. runnable/extensions/executor/argo/specification.yaml +0 -51
  33. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  34. runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
  35. runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
  36. runnable/extensions/executor/local.py +0 -69
  37. runnable/extensions/executor/local_container/__init__.py +0 -0
  38. runnable/extensions/executor/local_container/implementation.py +0 -446
  39. runnable/extensions/executor/mocked/__init__.py +0 -0
  40. runnable/extensions/executor/mocked/implementation.py +0 -154
  41. runnable/extensions/executor/retry/__init__.py +0 -0
  42. runnable/extensions/executor/retry/implementation.py +0 -168
  43. runnable/extensions/nodes.py +0 -870
  44. runnable/extensions/run_log_store/__init__.py +0 -0
  45. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  46. runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
  47. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  48. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
  49. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
  50. runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
  51. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  52. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  53. runnable/extensions/run_log_store/file_system/implementation.py +0 -140
  54. runnable/extensions/run_log_store/generic_chunked.py +0 -557
  55. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  56. runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
  57. runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
  58. runnable/extensions/secrets/__init__.py +0 -0
  59. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  60. runnable/extensions/secrets/dotenv/implementation.py +0 -100
  61. runnable/integration.py +0 -192
  62. runnable-0.13.0.dist-info/RECORD +0 -63
  63. runnable-0.13.0.dist-info/entry_points.txt +0 -41
  64. {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info/licenses}/LICENSE +0 -0
runnable/entrypoints.py CHANGED
@@ -9,12 +9,16 @@ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
9
9
  from rich.table import Column
10
10
 
11
11
  import runnable.context as context
12
- from runnable import console, defaults, graph, task_console, utils
12
+ from runnable import console, defaults, graph, task_console, tasks, utils
13
13
  from runnable.defaults import RunnableConfig, ServiceConfig
14
+ from runnable.executor import BaseJobExecutor, BasePipelineExecutor
14
15
 
15
16
  logger = logging.getLogger(defaults.LOGGER_NAME)
16
17
 
17
18
 
19
+ print("") # removes the buffer print
20
+
21
+
18
22
  def get_default_configs() -> RunnableConfig:
19
23
  """
20
24
  User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
@@ -29,19 +33,18 @@ def get_default_configs() -> RunnableConfig:
29
33
  def prepare_configurations(
30
34
  run_id: str,
31
35
  configuration_file: str = "",
32
- pipeline_file: str = "",
33
36
  tag: str = "",
34
37
  parameters_file: str = "",
35
- force_local_executor: bool = False,
38
+ is_job: bool = False,
36
39
  ) -> context.Context:
37
40
  """
41
+ Sets up everything needed
38
42
  Replace the placeholders in the dag/config against the variables file.
39
43
 
40
44
  Attach the secrets_handler, run_log_store, catalog_handler to the executor and return it.
41
45
 
42
46
  Args:
43
47
  variables_file (str): The variables file, if used or None
44
- pipeline_file (str): The config/dag file
45
48
  run_id (str): The run id of the run.
46
49
  tag (str): If a tag is provided at the run time
47
50
 
@@ -53,45 +56,87 @@ def prepare_configurations(
53
56
  variables = utils.gather_variables()
54
57
 
55
58
  templated_configuration = {}
56
- configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
59
+ configuration_file = os.environ.get(
60
+ "RUNNABLE_CONFIGURATION_FILE", configuration_file
61
+ )
57
62
 
58
63
  if configuration_file:
59
- templated_configuration = utils.load_yaml(configuration_file) or {}
64
+ templated_configuration = utils.load_yaml(configuration_file)
60
65
 
61
- configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
66
+ # apply variables
67
+ configuration = cast(
68
+ RunnableConfig, utils.apply_variables(templated_configuration, variables)
69
+ )
70
+
71
+ # Since all the services (run_log_store, catalog, secrets, executor) are
72
+ # dynamically loaded via stevedore, we cannot validate the configuration
73
+ # before they are passed to the service.
62
74
 
63
75
  logger.info(f"Resolved configurations: {configuration}")
64
76
 
65
77
  # Run log settings, configuration over-rides everything
66
- run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
78
+ # The user config has run-log-store while internally we use run_log_store
79
+ run_log_config: Optional[ServiceConfig] = configuration.get("run-log-store", None) # type: ignore
67
80
  if not run_log_config:
68
- run_log_config = cast(ServiceConfig, runnable_defaults.get("run_log_store", defaults.DEFAULT_RUN_LOG_STORE))
81
+ run_log_config = cast(
82
+ ServiceConfig,
83
+ runnable_defaults.get("run-log-store", defaults.DEFAULT_RUN_LOG_STORE),
84
+ )
69
85
  run_log_store = utils.get_provider_by_name_and_type("run_log_store", run_log_config)
70
86
 
71
87
  # Catalog handler settings, configuration over-rides everything
72
88
  catalog_config: Optional[ServiceConfig] = configuration.get("catalog", None)
73
89
  if not catalog_config:
74
- catalog_config = cast(ServiceConfig, runnable_defaults.get("catalog", defaults.DEFAULT_CATALOG))
90
+ catalog_config = cast(
91
+ ServiceConfig, runnable_defaults.get("catalog", defaults.DEFAULT_CATALOG)
92
+ )
75
93
  catalog_handler = utils.get_provider_by_name_and_type("catalog", catalog_config)
76
94
 
77
95
  # Secret handler settings, configuration over-rides everything
78
96
  secrets_config: Optional[ServiceConfig] = configuration.get("secrets", None)
79
97
  if not secrets_config:
80
- secrets_config = cast(ServiceConfig, runnable_defaults.get("secrets", defaults.DEFAULT_SECRETS))
98
+ secrets_config = cast(
99
+ ServiceConfig, runnable_defaults.get("secrets", defaults.DEFAULT_SECRETS)
100
+ )
81
101
  secrets_handler = utils.get_provider_by_name_and_type("secrets", secrets_config)
82
102
 
83
103
  # pickler
84
- pickler_config = cast(ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER))
104
+ pickler_config = cast(
105
+ ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER)
106
+ )
85
107
  pickler_handler = utils.get_provider_by_name_and_type("pickler", pickler_config)
86
108
 
87
- # executor configurations, configuration over rides everything
88
- executor_config: Optional[ServiceConfig] = configuration.get("executor", None)
89
- if force_local_executor:
90
- executor_config = ServiceConfig(type="local", config={})
91
-
92
- if not executor_config:
93
- executor_config = cast(ServiceConfig, runnable_defaults.get("executor", defaults.DEFAULT_EXECUTOR))
94
- configured_executor = utils.get_provider_by_name_and_type("executor", executor_config)
109
+ if not is_job:
110
+ # executor configurations, configuration over rides everything
111
+ executor_config: Optional[ServiceConfig] = configuration.get(
112
+ "pipeline-executor", None
113
+ ) # type: ignore
114
+ # as pipeline-executor is not a valid key
115
+ if not executor_config:
116
+ executor_config = cast(
117
+ ServiceConfig,
118
+ runnable_defaults.get(
119
+ "pipeline-executor", defaults.DEFAULT_PIPELINE_EXECUTOR
120
+ ),
121
+ )
122
+ configured_executor = utils.get_provider_by_name_and_type(
123
+ "pipeline_executor", executor_config
124
+ )
125
+ else:
126
+ # executor configurations, configuration over rides everything
127
+ job_executor_config: Optional[ServiceConfig] = configuration.get(
128
+ "job-executor", None
129
+ ) # type: ignore
130
+ if not job_executor_config:
131
+ executor_config = cast(
132
+ ServiceConfig,
133
+ runnable_defaults.get("job-executor", defaults.DEFAULT_JOB_EXECUTOR),
134
+ )
135
+
136
+ assert job_executor_config, "Job executor is not provided"
137
+ configured_executor = utils.get_provider_by_name_and_type(
138
+ "job_executor", job_executor_config
139
+ )
95
140
 
96
141
  # Construct the context
97
142
  run_context = context.Context(
@@ -107,38 +152,45 @@ def prepare_configurations(
107
152
  parameters_file=parameters_file,
108
153
  )
109
154
 
110
- if pipeline_file:
111
- if pipeline_file.endswith(".py"):
112
- # converting a pipeline defined in python to a dag in yaml
113
- module_file = pipeline_file.strip(".py")
114
- module, func = utils.get_module_and_attr_names(module_file)
115
- sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
116
- imported_module = importlib.import_module(module)
155
+ context.run_context = run_context
117
156
 
118
- os.environ["RUNNABLE_PY_TO_YAML"] = "true"
119
- dag = getattr(imported_module, func)().return_dag()
157
+ return run_context
120
158
 
121
- else:
122
- pipeline_config = utils.load_yaml(pipeline_file)
123
159
 
124
- logger.info("The input pipeline:")
125
- logger.info(json.dumps(pipeline_config, indent=4))
160
+ def set_pipeline_spec_from_yaml(run_context: context.Context, pipeline_file: str):
161
+ """
162
+ Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
163
+ """
164
+ pipeline_config = utils.load_yaml(pipeline_file)
165
+ logger.info("The input pipeline:")
166
+ logger.info(json.dumps(pipeline_config, indent=4))
126
167
 
127
- dag_config = pipeline_config["dag"]
168
+ dag_config = pipeline_config["dag"]
128
169
 
129
- dag_hash = utils.get_dag_hash(dag_config)
130
- dag = graph.create_graph(dag_config)
131
- run_context.dag_hash = dag_hash
170
+ dag_hash = utils.get_dag_hash(dag_config)
171
+ dag = graph.create_graph(dag_config)
172
+ run_context.dag_hash = dag_hash
132
173
 
133
- run_context.pipeline_file = pipeline_file
134
- run_context.dag = dag
174
+ run_context.pipeline_file = pipeline_file
175
+ run_context.dag = dag
135
176
 
136
- context.run_context = run_context
137
177
 
138
- return run_context
178
+ def set_pipeline_spec_from_python(run_context: context.Context, python_module: str):
179
+ # Call the SDK to get the dag
180
+ # Import the module and call the function to get the dag
181
+ module_file = python_module.strip(".py")
182
+ module, func = utils.get_module_and_attr_names(module_file)
183
+ sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
184
+ imported_module = importlib.import_module(module)
139
185
 
186
+ run_context.from_sdk = True
187
+ dag = getattr(imported_module, func)().return_dag()
140
188
 
141
- def execute(
189
+ run_context.pipeline_file = python_module
190
+ run_context.dag = dag
191
+
192
+
193
+ def execute_pipeline_yaml_spec(
142
194
  pipeline_file: str,
143
195
  configuration_file: str = "",
144
196
  tag: str = "",
@@ -147,66 +199,82 @@ def execute(
147
199
  ):
148
200
  # pylint: disable=R0914,R0913
149
201
  """
150
- The entry point to runnable execution. This method would prepare the configurations and delegates traversal to the
151
- executor
152
-
153
- Args:
154
- pipeline_file (str): The config/dag file
155
- run_id (str): The run id of the run.
156
- tag (str): If a tag is provided at the run time
157
- parameters_file (str): The parameters being sent in to the application
202
+ The entry point to runnable execution for any YAML based spec.
203
+ The result could:
204
+ - Execution of the pipeline if its local executor
205
+ - Rendering of the spec in the case of non local executor
158
206
  """
159
207
  run_id = utils.generate_run_id(run_id=run_id)
160
208
 
161
209
  run_context = prepare_configurations(
162
210
  configuration_file=configuration_file,
163
- pipeline_file=pipeline_file,
164
211
  run_id=run_id,
165
212
  tag=tag,
166
213
  parameters_file=parameters_file,
167
214
  )
168
215
 
169
- console.print("Working with context:")
170
- console.print(run_context)
171
- console.rule(style="[dark orange]")
216
+ assert isinstance(run_context.executor, BasePipelineExecutor)
172
217
 
218
+ set_pipeline_spec_from_yaml(run_context, pipeline_file)
173
219
  executor = run_context.executor
174
220
 
175
- run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
176
-
177
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
221
+ utils.set_runnable_environment_variables(
222
+ run_id=run_id, configuration_file=configuration_file, tag=tag
223
+ )
178
224
 
179
225
  # Prepare for graph execution
180
- executor.prepare_for_graph_execution()
226
+ executor._set_up_run_log(exists_ok=False)
227
+
228
+ console.print("Working with context:")
229
+ console.print(run_context)
230
+ console.rule(style="[dark orange]")
181
231
 
182
232
  logger.info(f"Executing the graph: {run_context.dag}")
183
233
  with Progress(
184
- TextColumn("[progress.description]{task.description}", table_column=Column(ratio=2)),
234
+ TextColumn(
235
+ "[progress.description]{task.description}", table_column=Column(ratio=2)
236
+ ),
185
237
  BarColumn(table_column=Column(ratio=1), style="dark_orange"),
186
238
  TimeElapsedColumn(table_column=Column(ratio=1)),
187
239
  console=console,
188
240
  expand=True,
189
241
  ) as progress:
190
- pipeline_execution_task = progress.add_task("[dark_orange] Starting execution .. ", total=1)
242
+ pipeline_execution_task = progress.add_task(
243
+ "[dark_orange] Starting execution .. ", total=1
244
+ )
191
245
  try:
192
246
  run_context.progress = progress
193
247
  executor.execute_graph(dag=run_context.dag) # type: ignore
194
248
 
195
- # Non local executors have no run logs
196
- if not executor._local:
249
+ if not executor._is_local:
250
+ # Non local executors only traverse the graph and do not execute the nodes
197
251
  executor.send_return_code(stage="traversal")
198
252
  return
199
253
 
200
- run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False)
254
+ run_log = run_context.run_log_store.get_run_log_by_id(
255
+ run_id=run_context.run_id, full=False
256
+ )
201
257
 
202
258
  if run_log.status == defaults.SUCCESS:
203
- progress.update(pipeline_execution_task, description="[green] Success", completed=True)
259
+ progress.update(
260
+ pipeline_execution_task,
261
+ description="[green] Success",
262
+ completed=True,
263
+ )
204
264
  else:
205
- progress.update(pipeline_execution_task, description="[red] Failed", completed=True)
265
+ progress.update(
266
+ pipeline_execution_task, description="[red] Failed", completed=True
267
+ )
206
268
  except Exception as e: # noqa: E722
207
269
  console.print(e, style=defaults.error_style)
208
- progress.update(pipeline_execution_task, description="[red] Errored execution", completed=True)
209
- run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False)
270
+ progress.update(
271
+ pipeline_execution_task,
272
+ description="[red] Errored execution",
273
+ completed=True,
274
+ )
275
+ run_log = run_context.run_log_store.get_run_log_by_id(
276
+ run_id=run_context.run_id, full=False
277
+ )
210
278
  run_log.status = defaults.FAIL
211
279
  run_context.run_log_store.add_branch_log(run_log, run_context.run_id)
212
280
  raise e
@@ -219,62 +287,64 @@ def execute_single_node(
219
287
  pipeline_file: str,
220
288
  step_name: str,
221
289
  map_variable: str,
290
+ mode: str,
222
291
  run_id: str,
223
292
  tag: str = "",
224
293
  parameters_file: str = "",
225
294
  ):
226
295
  """
227
- The entry point into executing a single node of runnable. Orchestration modes should extensively use this
228
- entry point.
229
-
230
- It should have similar set up of configurations to execute because orchestrator modes can initiate the execution.
231
-
232
- Args:
233
- variables_file (str): The variables file, if used or None
234
- step_name : The name of the step to execute in dot path convention
235
- pipeline_file (str): The config/dag file
236
- run_id (str): The run id of the run.
237
- tag (str): If a tag is provided at the run time
238
- parameters_file (str): The parameters being sent in to the application
296
+ This entry point is triggered during the execution of the pipeline
297
+ - non local execution environments
239
298
 
299
+ The mode defines how the pipeline spec is provided to the runnable
300
+ - yaml
301
+ - python
240
302
  """
241
303
  from runnable import nodes
242
304
 
243
- task_console.print(f"Executing the single node: {step_name} with map variable: {map_variable}")
305
+ task_console.print(
306
+ f"Executing the single node: {step_name} with map variable: {map_variable}"
307
+ )
244
308
 
245
- configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
309
+ configuration_file = os.environ.get(
310
+ "RUNNABLE_CONFIGURATION_FILE", configuration_file
311
+ )
246
312
 
247
313
  run_context = prepare_configurations(
248
314
  configuration_file=configuration_file,
249
- pipeline_file=pipeline_file,
250
315
  run_id=run_id,
251
316
  tag=tag,
252
317
  parameters_file=parameters_file,
253
318
  )
319
+ assert isinstance(run_context.executor, BasePipelineExecutor)
320
+
321
+ if mode == "yaml":
322
+ # Load the yaml file
323
+ set_pipeline_spec_from_yaml(run_context, pipeline_file)
324
+ elif mode == "python":
325
+ # Call the SDK to get the dag
326
+ set_pipeline_spec_from_python(run_context, pipeline_file)
327
+
328
+ assert run_context.dag
329
+
254
330
  task_console.print("Working with context:")
255
331
  task_console.print(run_context)
256
332
  task_console.rule(style="[dark orange]")
257
333
 
258
334
  executor = run_context.executor
259
- run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
260
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
261
-
262
- executor.prepare_for_node_execution()
263
-
264
- # TODO: may be make its own entry point
265
- # if not run_context.dag:
266
- # # There are a few entry points that make graph dynamically and do not have a dag defined statically.
267
- # run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_id, full=False)
268
- # run_context.dag = graph.create_graph(run_log.run_config["pipeline"])
269
- assert run_context.dag
335
+ utils.set_runnable_environment_variables(
336
+ run_id=run_id, configuration_file=configuration_file, tag=tag
337
+ )
270
338
 
271
339
  map_variable_dict = utils.json_to_ordered_dict(map_variable)
272
340
 
273
341
  step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
274
- node_to_execute, _ = graph.search_node_by_internal_name(run_context.dag, step_internal_name)
342
+ node_to_execute, _ = graph.search_node_by_internal_name(
343
+ run_context.dag, step_internal_name
344
+ )
275
345
 
276
346
  logger.info("Executing the single node of : %s", node_to_execute)
277
- ## This step is where we save the log file
347
+ ## This step is where we save output of the function/shell command
278
348
  try:
279
349
  executor.execute_node(node=node_to_execute, map_variable=map_variable_dict)
280
350
  finally:
@@ -288,23 +358,15 @@ def execute_single_node(
288
358
  run_context.catalog_handler.put(name=log_file_name, run_id=run_context.run_id)
289
359
  os.remove(log_file_name)
290
360
 
291
- # executor.send_return_code(stage="execution")
292
-
293
361
 
294
- def execute_notebook(
295
- entrypoint: str,
296
- notebook_file: str,
297
- catalog_config: dict,
298
- configuration_file: str,
299
- notebook_output_path: str = "",
362
+ def execute_job_yaml_spec(
363
+ job_definition_file: str,
364
+ configuration_file: str = "",
300
365
  tag: str = "",
301
366
  run_id: str = "",
302
367
  parameters_file: str = "",
303
368
  ):
304
- """
305
- The entry point to runnable execution of a notebook. This method would prepare the configurations and
306
- delegates traversal to the executor
307
- """
369
+ # A job and task are internally the same.
308
370
  run_id = utils.generate_run_id(run_id=run_id)
309
371
 
310
372
  run_context = prepare_configurations(
@@ -312,61 +374,87 @@ def execute_notebook(
312
374
  run_id=run_id,
313
375
  tag=tag,
314
376
  parameters_file=parameters_file,
377
+ is_job=True,
315
378
  )
316
379
 
380
+ assert isinstance(run_context.executor, BaseJobExecutor)
381
+
317
382
  executor = run_context.executor
318
- run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
319
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
383
+ utils.set_runnable_environment_variables(
384
+ run_id=run_id, configuration_file=configuration_file, tag=tag
385
+ )
386
+
387
+ run_context.job_definition_file = job_definition_file
388
+
389
+ job_config = utils.load_yaml(job_definition_file)
390
+ logger.info(
391
+ "Executing the job from the user."
392
+ f"job definition: {job_definition_file}, config: {job_config}"
393
+ )
394
+ assert job_config.get("type"), "Job type is not provided"
320
395
 
321
396
  console.print("Working with context:")
322
397
  console.print(run_context)
323
398
  console.rule(style="[dark orange]")
324
399
 
325
- step_config = {
326
- "command": notebook_file,
327
- "command_type": "notebook",
328
- "notebook_output_path": notebook_output_path,
329
- "type": "task",
330
- "next": "success",
331
- "catalog": catalog_config,
332
- }
333
- node = graph.create_node(name="executing job", step_config=step_config)
400
+ # A hack where we create a task node and get our job/catalog settings
401
+ catalog_config: list[str] = job_config.pop("catalog", {})
334
402
 
335
- if entrypoint == defaults.ENTRYPOINT.USER.value:
336
- # Prepare for graph execution
337
- executor.prepare_for_graph_execution()
403
+ # rename the type to command_type of task
404
+ job_config["command_type"] = job_config.pop("type")
405
+ job = tasks.create_task(job_config)
338
406
 
339
- logger.info("Executing the job from the user. We are still in the caller's compute environment")
340
- executor.execute_job(node=node)
407
+ logger.info(
408
+ "Executing the job from the user. We are still in the caller's compute environment"
409
+ )
341
410
 
342
- elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value:
343
- executor.prepare_for_node_execution()
344
- logger.info("Executing the job from the system. We are in the config's compute environment")
345
- executor.execute_node(node=node)
411
+ assert isinstance(executor, BaseJobExecutor)
412
+ executor.submit_job(job, catalog_settings=catalog_config)
346
413
 
347
- # Update the status of the run log
348
- step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id)
349
- run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status)
414
+ executor.send_return_code()
350
415
 
351
- else:
352
- raise ValueError(f"Invalid entrypoint {entrypoint}")
353
416
 
354
- executor.send_return_code()
417
+ def set_job_spec_from_yaml(run_context: context.Context, job_definition_file: str):
418
+ """
419
+ Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
420
+ """
421
+ job_config = utils.load_yaml(job_definition_file)
422
+ logger.info("The input job definition file:")
423
+ logger.info(json.dumps(job_config, indent=4))
355
424
 
425
+ catalog_config: list[str] = job_config.pop("catalog", {})
356
426
 
357
- def execute_function(
358
- entrypoint: str,
359
- command: str,
360
- catalog_config: dict,
361
- configuration_file: str,
427
+ job_config["command_type"] = job_config.pop("type")
428
+
429
+ run_context.job_definition_file = job_definition_file
430
+ run_context.job = tasks.create_task(job_config)
431
+ run_context.job_catalog_settings = catalog_config
432
+
433
+
434
+ def set_job_spec_from_python(run_context: context.Context, python_module: str):
435
+ # Import the module and call the function to get the task
436
+ module_file = python_module.strip(".py")
437
+ module, func = utils.get_module_and_attr_names(module_file)
438
+ sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
439
+ imported_module = importlib.import_module(module)
440
+
441
+ run_context.from_sdk = True
442
+ task = getattr(imported_module, func)().return_task()
443
+ catalog_settings = getattr(imported_module, func)().return_catalog_settings()
444
+
445
+ run_context.job_definition_file = python_module
446
+ run_context.job = task
447
+ run_context.job_catalog_settings = catalog_settings
448
+
449
+
450
+ def execute_job_non_local(
451
+ job_definition_file: str,
452
+ configuration_file: str = "",
453
+ mode: str = "yaml",
362
454
  tag: str = "",
363
455
  run_id: str = "",
364
456
  parameters_file: str = "",
365
457
  ):
366
- """
367
- The entry point to runnable execution of a function. This method would prepare the configurations and
368
- delegates traversal to the executor
369
- """
370
458
  run_id = utils.generate_run_id(run_id=run_id)
371
459
 
372
460
  run_context = prepare_configurations(
@@ -374,47 +462,33 @@ def execute_function(
374
462
  run_id=run_id,
375
463
  tag=tag,
376
464
  parameters_file=parameters_file,
465
+ is_job=True,
377
466
  )
378
467
 
379
- executor = run_context.executor
468
+ assert isinstance(run_context.executor, BaseJobExecutor)
380
469
 
381
- run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
382
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
470
+ if mode == "yaml":
471
+ # Load the yaml file
472
+ set_job_spec_from_yaml(run_context, job_definition_file)
473
+ elif mode == "python":
474
+ # Call the SDK to get the task
475
+ set_job_spec_from_python(run_context, job_definition_file)
476
+
477
+ assert run_context.job
383
478
 
384
479
  console.print("Working with context:")
385
480
  console.print(run_context)
386
481
  console.rule(style="[dark orange]")
387
482
 
388
- # Prepare the graph with a single node
389
- step_config = {
390
- "command": command,
391
- "command_type": "python",
392
- "type": "task",
393
- "next": "success",
394
- "catalog": catalog_config,
395
- }
396
- node = graph.create_node(name="executing job", step_config=step_config)
397
-
398
- if entrypoint == defaults.ENTRYPOINT.USER.value:
399
- # Prepare for graph execution
400
- executor.prepare_for_graph_execution()
401
-
402
- logger.info("Executing the job from the user. We are still in the caller's compute environment")
403
- executor.execute_job(node=node)
404
-
405
- elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value:
406
- executor.prepare_for_node_execution()
407
- logger.info("Executing the job from the system. We are in the config's compute environment")
408
- executor.execute_node(node=node)
409
-
410
- # Update the status of the run log
411
- step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id)
412
- run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status)
483
+ logger.info(
484
+ "Executing the job from the user. We are still in the caller's compute environment"
485
+ )
413
486
 
414
- else:
415
- raise ValueError(f"Invalid entrypoint {entrypoint}")
487
+ run_context.executor.execute_job(
488
+ run_context.job, catalog_settings=run_context.job_catalog_settings
489
+ )
416
490
 
417
- executor.send_return_code()
491
+ run_context.executor.send_return_code()
418
492
 
419
493
 
420
494
  def fan(
@@ -444,27 +518,40 @@ def fan(
444
518
  """
445
519
  from runnable import nodes
446
520
 
447
- configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
521
+ configuration_file = os.environ.get(
522
+ "RUNNABLE_CONFIGURATION_FILE", configuration_file
523
+ )
448
524
 
449
525
  run_context = prepare_configurations(
450
526
  configuration_file=configuration_file,
451
- pipeline_file=pipeline_file,
452
527
  run_id=run_id,
453
528
  tag=tag,
454
529
  parameters_file=parameters_file,
455
530
  )
531
+
532
+ assert isinstance(run_context.executor, BasePipelineExecutor)
533
+
534
+ if mode == "yaml":
535
+ # Load the yaml file
536
+ set_pipeline_spec_from_yaml(run_context, pipeline_file)
537
+ elif mode == "python":
538
+ # Call the SDK to get the dag
539
+ set_pipeline_spec_from_python(run_context, pipeline_file)
540
+
456
541
  console.print("Working with context:")
457
542
  console.print(run_context)
458
543
  console.rule(style="[dark orange]")
459
544
 
460
545
  executor = run_context.executor
461
- run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
462
- utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
463
-
464
- executor.prepare_for_node_execution()
546
+ utils.set_runnable_environment_variables(
547
+ run_id=run_id, configuration_file=configuration_file, tag=tag
548
+ )
465
549
 
466
550
  step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
467
- node_to_execute, _ = graph.search_node_by_internal_name(run_context.dag, step_internal_name) # type: ignore
551
+ node_to_execute, _ = graph.search_node_by_internal_name(
552
+ run_context.dag, # type: ignore
553
+ step_internal_name,
554
+ )
468
555
 
469
556
  map_variable_dict = utils.json_to_ordered_dict(map_variable)
470
557
 
@@ -478,6 +565,6 @@ def fan(
478
565
  raise ValueError(f"Invalid mode {mode}")
479
566
 
480
567
 
481
- if __name__ == "__main__":
482
- # This is only for perf testing purposes.
483
- prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")
568
+ # if __name__ == "__main__":
569
+ # # This is only for perf testing purposes.
570
+ # prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")