argo-kedro 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/PKG-INFO +2 -1
  2. argo_kedro-0.1.7/argo_kedro/framework/cli/cli.py +400 -0
  3. argo_kedro-0.1.7/argo_kedro/framework/hooks/argo_hook.py +66 -0
  4. argo_kedro-0.1.7/argo_kedro/pipeline/__init__.py +4 -0
  5. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro/pipeline/fused_pipeline.py +9 -4
  6. argo_kedro-0.1.7/argo_kedro/pipeline/node.py +26 -0
  7. argo_kedro-0.1.7/argo_kedro/templates/argo.yml +9 -0
  8. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro/templates/argo_wf_spec.tmpl +25 -1
  9. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro.egg-info/PKG-INFO +2 -1
  10. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro.egg-info/SOURCES.txt +9 -1
  11. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro.egg-info/entry_points.txt +3 -0
  12. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro.egg-info/requires.txt +1 -0
  13. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/pyproject.toml +6 -2
  14. argo_kedro-0.1.7/tests/__init__.py +0 -0
  15. argo_kedro-0.1.7/tests/cli/__init__.py +0 -0
  16. argo_kedro-0.1.7/tests/cli/test_cli.py +206 -0
  17. argo_kedro-0.1.7/tests/pipeline/__init__.py +0 -0
  18. argo_kedro-0.1.7/tests/pipeline/test_fused_pipeline.py +45 -0
  19. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/uv.lock +23 -0
  20. argo_kedro-0.1.5/argo_kedro/framework/cli/cli.py +0 -242
  21. argo_kedro-0.1.5/argo_kedro/pipeline/__init__.py +0 -3
  22. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/.gitignore +0 -0
  23. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/.python-version +0 -0
  24. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/LICENSE +0 -0
  25. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/MANIFEST.in +0 -0
  26. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/README.md +0 -0
  27. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro/framework/__init__.py +0 -0
  28. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro/framework/cli/__init__.py +0 -0
  29. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro/runners/__init__.py +0 -0
  30. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro/runners/fuse_runner.py +0 -0
  31. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro.egg-info/dependency_links.txt +0 -0
  32. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/argo_kedro.egg-info/top_level.txt +0 -0
  33. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/locally_building_guide.md +0 -0
  34. {argo_kedro-0.1.5 → argo_kedro-0.1.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: argo-kedro
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: Kedro plugin for running pipelines on Argo Workflows
5
5
  Author-email: Laurens Vijnck <laurens@everycure.org>, Nelson Alfonso <nelson@everycure.org>
6
6
  License: MIT
@@ -26,6 +26,7 @@ Requires-Dist: kedro
26
26
  Requires-Dist: pyyaml>=6.0.2
27
27
  Requires-Dist: jinja2>=3.0.0
28
28
  Requires-Dist: kubernetes>=35.0.0
29
+ Requires-Dist: pydantic>=2.0.0
29
30
  Dynamic: license-file
30
31
 
31
32
  # argo-kedro
@@ -0,0 +1,400 @@
1
+ import re
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List, Iterable, Union
4
+ from logging import getLogger
5
+
6
+ import click
7
+ import yaml
8
+ from kubernetes import config
9
+ from kubernetes.dynamic import DynamicClient
10
+ from jinja2 import Environment, FileSystemLoader
11
+ from kedro.framework.cli.utils import CONTEXT_SETTINGS, KedroCliError
12
+ from kedro.framework.project import pipelines, settings
13
+ from kedro.framework.session import KedroSession
14
+ from kedro.framework.startup import bootstrap_project
15
+ from kedro.utils import find_kedro_project, is_kedro_project
16
+ from kedro.framework.cli.project import (
17
+ ASYNC_ARG_HELP,
18
+ CONF_SOURCE_HELP,
19
+ FROM_INPUTS_HELP,
20
+ FROM_NODES_HELP,
21
+ LOAD_VERSION_HELP,
22
+ NODE_ARG_HELP,
23
+ PARAMS_ARG_HELP,
24
+ PIPELINE_ARG_HELP,
25
+ RUNNER_ARG_HELP,
26
+ TAG_ARG_HELP,
27
+ TO_NODES_HELP,
28
+ TO_OUTPUTS_HELP,
29
+ project_group,
30
+ )
31
+ from kedro.framework.project import pipelines as kedro_pipelines
32
+ from kedro.pipeline import Pipeline
33
+ from kedro.pipeline.node import Node
34
+ from kedro.runner.sequential_runner import SequentialRunner
35
+ from argo_kedro.runners.fuse_runner import FusedRunner
36
+ from argo_kedro.framework.hooks.argo_hook import MachineType
37
+ from argo_kedro.pipeline.node import ArgoNode
38
+
39
+ LOGGER = getLogger(__name__)
40
+ ARGO_TEMPLATES_DIR_PATH = Path(__file__).parent.parent.parent / "templates"
41
+
42
+
43
+ def render_jinja_template(
44
+ src: Union[str, Path], **kwargs
45
+ ) -> str:
46
+ """This functions enable to copy a file and render the
47
+ tags (identified by {{ my_tag }}) with the values provided in kwargs.
48
+
49
+ Arguments:
50
+ src {Union[str, Path]} -- The path to the template which should be rendered
51
+
52
+ Returns:
53
+ str -- A string that contains all the files with replaced tags.
54
+ """
55
+ src = Path(src)
56
+ template_loader = FileSystemLoader(searchpath=src.parent.as_posix())
57
+ template_env = Environment(loader=template_loader, keep_trailing_newline=True)
58
+ template = template_env.get_template(src.name)
59
+ return template.render(**kwargs)
60
+
61
+
62
+ def write_jinja_template(
63
+ src: Union[str, Path], dst: Union[str, Path], **kwargs
64
+ ) -> None:
65
+ """Write a template file and replace tis jinja's tags
66
+ (identified by {{ my_tag }}) with the values provided in kwargs.
67
+
68
+ Arguments:
69
+ src {Union[str, Path]} -- Path to the template which should be rendered
70
+ dst {Union[str, Path]} -- Path where the rendered template should be saved
71
+ """
72
+ dst = Path(dst)
73
+ parsed_template = render_jinja_template(src, **kwargs)
74
+ with open(dst, "w") as file_handler:
75
+ file_handler.write(parsed_template)
76
+
77
+
78
+ @click.group(context_settings=CONTEXT_SETTINGS)
79
+ def cli():
80
+ pass
81
+
82
+ @cli.command(name="run")
83
+ @click.option("--pipeline", "-p", type=str, default="__default__", help="Name of the pipeline to execute")
84
+ @click.option("--env", "-e", type=str, default=None, help="Kedro environment to run the pipeline in")
85
+ @click.option("--config", "-c", type=str, multiple=True, help="Extra config to pass to KedroContext")
86
+ @click.option("--params", type=str, multiple=True, help="Override parameters")
87
+ @click.option("--tags", "-t", type=str, multiple=True, help=TAG_ARG_HELP)
88
+ @click.option("--nodes", "-n", type=str, multiple=True, help="Run only nodes with specified names")
89
+ @click.option("--to-nodes", type=str, multiple=True, help="Run a sub-pipeline up to certain nodes")
90
+ @click.option("--from-nodes", type=str, multiple=True, help="Run a sub-pipeline starting from certain nodes")
91
+ @click.option("--from-inputs", type=str, multiple=True, help="Run a sub-pipeline starting from nodes that produce these inputs")
92
+ @click.option("--to-outputs", type=str, multiple=True, help="Run a sub-pipeline up to nodes that produce these outputs")
93
+ @click.option("--load-version", type=str, multiple=True, help="Specify a particular dataset version")
94
+ @click.option("--namespaces", type=str, multiple=True, help="Namespaces of the pipeline")
95
+ @click.pass_obj
96
+ def _run_command_impl(
97
+ ctx,
98
+ pipeline: str,
99
+ env: str,
100
+ config: tuple,
101
+ params: tuple,
102
+ tags: list[str],
103
+ nodes: tuple,
104
+ to_nodes: tuple,
105
+ from_nodes: tuple,
106
+ from_inputs: tuple,
107
+ to_outputs: tuple,
108
+ load_version: tuple,
109
+ namespaces: Iterable[str],
110
+ ):
111
+ """Run the pipeline with the FusedRunner."""
112
+
113
+ LOGGER.warning(f"Using plugin entrypoint")
114
+
115
+ load_versions = None
116
+ if load_version:
117
+ load_versions = {}
118
+ for version_spec in load_version:
119
+ if ":" in version_spec:
120
+ dataset, version = version_spec.split(":", 1)
121
+ load_versions[dataset] = version
122
+
123
+ conf_source = getattr(ctx, "conf_source", None)
124
+ env_value = env or getattr(ctx, "env", None)
125
+
126
+ with KedroSession.create(
127
+ env=env_value,
128
+ conf_source=conf_source,
129
+ ) as session:
130
+
131
+ session.run(
132
+ pipeline_name=pipeline,
133
+ tags=tags,
134
+ runner=FusedRunner(pipeline_name=pipeline),
135
+ node_names=list(nodes) if nodes else None,
136
+ from_nodes=list(from_nodes) if from_nodes else None,
137
+ to_nodes=list(to_nodes) if to_nodes else None,
138
+ from_inputs=list(from_inputs) if from_inputs else None,
139
+ to_outputs=list(to_outputs) if to_outputs else None,
140
+ load_versions=load_versions,
141
+ namespaces=namespaces,
142
+ )
143
+
144
+ class KedroClickGroup(click.Group):
145
+ def reset_commands(self):
146
+ self.commands = {}
147
+
148
+ # add commands on the fly based on conditions
149
+ if is_kedro_project(find_kedro_project(Path.cwd())):
150
+ self.add_command(init)
151
+ self.add_command(submit)
152
+
153
+ def list_commands(self, ctx):
154
+ self.reset_commands()
155
+ commands_list = sorted(self.commands)
156
+ return commands_list
157
+
158
+ def get_command(self, ctx, cmd_name):
159
+ self.reset_commands()
160
+ return self.commands.get(cmd_name)
161
+
162
+ @click.group(name="argo")
163
+ def commands():
164
+ pass
165
+
166
+ @commands.command(name="argo", cls=KedroClickGroup)
167
+ def argo_commands():
168
+ """Use mlflow-specific commands inside kedro project."""
169
+ pass # pragma: no cover
170
+
171
+ @argo_commands.command()
172
+ @click.option(
173
+ "--env",
174
+ "-e",
175
+ default="base",
176
+ help="The name of the kedro environment where the 'argo.yml' should be created. Default to 'base'",
177
+ )
178
+ @click.option(
179
+ "--force",
180
+ "-f",
181
+ is_flag=True,
182
+ default=False,
183
+ help="Update the template without any checks.",
184
+ )
185
+ @click.option(
186
+ "--silent",
187
+ "-s",
188
+ is_flag=True,
189
+ default=False,
190
+ help="Should message be logged when files are modified?",
191
+ )
192
+ def init(env: str, force: bool, silent: bool):
193
+ """Updates the template of a kedro project.
194
+ Running this command is mandatory to use argo-kedro.
195
+ This adds "conf/base/argo.yml": This is a configuration file
196
+ used for run parametrization when calling "kedro run" command.
197
+ """
198
+
199
+ # get constants
200
+ argo_yml = "argo.yml"
201
+ project_path = find_kedro_project(Path.cwd()) or Path.cwd()
202
+ project_metadata = bootstrap_project(project_path)
203
+ argo_yml_path = project_path / settings.CONF_SOURCE / env / argo_yml
204
+
205
+ # mlflow.yml is just a static file,
206
+ # but the name of the experiment is set to be the same as the project
207
+ if argo_yml_path.is_file() and not force:
208
+ click.secho(
209
+ click.style(
210
+ f"A 'argo.yml' already exists at '{argo_yml_path}' You can use the ``--force`` option to override it.",
211
+ fg="red",
212
+ )
213
+ )
214
+ else:
215
+ try:
216
+ write_jinja_template(
217
+ src=ARGO_TEMPLATES_DIR_PATH / argo_yml,
218
+ is_cookiecutter=False,
219
+ dst=argo_yml_path,
220
+ python_package=project_metadata.package_name,
221
+ )
222
+ if not silent:
223
+ click.secho(
224
+ click.style(
225
+ f"'{settings.CONF_SOURCE}/{env}/{argo_yml}' successfully updated.",
226
+ fg="green",
227
+ )
228
+ )
229
+ except FileNotFoundError:
230
+ click.secho(
231
+ click.style(
232
+ f"No env '{env}' found. Please check this folder exists inside '{settings.CONF_SOURCE}' folder.",
233
+ fg="red",
234
+ )
235
+ )
236
+
237
+ @argo_commands.command(name="submit")
238
+ @click.option("--pipeline", "-p", type=str, default="__default__", help="Specify which pipeline to execute")
239
+ @click.option("--environment", "-e", type=str, default="base", help="Kedro environment to execute in")
240
+ @click.option("--image", type=str, required=True, help="Image to execute")
241
+ @click.pass_obj
242
+ def submit(
243
+ ctx,
244
+ pipeline: str,
245
+ image: str,
246
+ environment: str
247
+ ):
248
+ """Submit the pipeline to Argo."""
249
+ LOGGER.info("Loading spec template..")
250
+
251
+ loader = FileSystemLoader(searchpath=ARGO_TEMPLATES_DIR_PATH)
252
+ template_env = Environment(loader=loader, trim_blocks=True, lstrip_blocks=True)
253
+ template = template_env.get_template("argo_wf_spec.tmpl")
254
+
255
+ LOGGER.info("Rendering Argo spec...")
256
+
257
+ project_path = find_kedro_project(Path.cwd()) or Path.cwd()
258
+ bootstrap_project(project_path)
259
+ with KedroSession.create(
260
+ project_path=project_path,
261
+ env=environment,
262
+ ) as session:
263
+ context = session.load_context()
264
+ pipeline_tasks = get_argo_dag(
265
+ kedro_pipelines[pipeline],
266
+ machine_types=context.argo.machine_types,
267
+ default_machine_type=context.argo.default_machine_type
268
+ )
269
+
270
+ # Render the template
271
+ rendered_template = template.render(
272
+ pipeline_tasks=[task.to_dict() for task in pipeline_tasks.values()],
273
+ pipeline_name=pipeline,
274
+ image=image,
275
+ namespace=context.argo.namespace,
276
+ environment=environment
277
+ )
278
+
279
+ # Load as yaml
280
+ yaml_data = yaml.safe_load(rendered_template)
281
+ yaml_without_anchors = yaml.dump(yaml_data, sort_keys=False, default_flow_style=False)
282
+ save_argo_template(
283
+ yaml_without_anchors,
284
+ )
285
+
286
+ # Use kubeconfig to submit to kubernetes
287
+ config.load_kube_config()
288
+ client = DynamicClient(config.new_client_from_config())
289
+
290
+ resource = client.resources.get(
291
+ api_version=yaml_data["apiVersion"],
292
+ kind=yaml_data["kind"],
293
+ )
294
+
295
+ response = resource.create(
296
+ body=yaml_data,
297
+ namespace=context.argo.namespace
298
+ )
299
+
300
+ workflow_name = response.metadata.name
301
+ LOGGER.info(f"Workflow submitted successfully: {workflow_name}")
302
+ LOGGER.info(f"View workflow at: https://argo.ai-platform.dev.everycure.org/workflows/{context.argo.namespace}/{workflow_name}")
303
+
304
+ return workflow_name
305
+
306
+
307
+ def save_argo_template(argo_template: str) -> str:
308
+ file_path = Path("templates") / "argo-workflow-template.yml"
309
+ file_path.parent.mkdir(parents=True, exist_ok=True)
310
+ with open(file_path, "w") as f:
311
+ f.write(argo_template)
312
+ return str(file_path)
313
+
314
+
315
+ class ArgoTask:
316
+ """Class to model an Argo task.
317
+
318
+ Argo's operating model slightly differs from Kedro's, i.e., while Kedro uses dataset
319
+ dependecies to model relationships, Argo uses task dependencies."""
320
+
321
+ def __init__(self, node: Node, machine_type: MachineType):
322
+ self._node = node
323
+ self._parents = []
324
+ self._machine_type = machine_type
325
+
326
+ @property
327
+ def node(self):
328
+ return self._node
329
+
330
+ def add_parents(self, nodes: List[Node]):
331
+ self._parents.extend(nodes)
332
+
333
+ def to_dict(self):
334
+ return {
335
+ "name": clean_name(self._node.name),
336
+ "nodes": self._node.name,
337
+ "deps": [clean_name(parent.name) for parent in sorted(self._parents)],
338
+ "mem": self._machine_type.mem,
339
+ "cpu": self._machine_type.cpu,
340
+ "num_gpu": self._machine_type.num_gpu,
341
+ }
342
+
343
+
344
+ def get_argo_dag(
345
+ pipeline: Pipeline,
346
+ machine_types: dict[str, MachineType],
347
+ default_machine_type: str,
348
+ ) -> List[Dict[str, Any]]:
349
+ """Function to convert the Kedro pipeline into Argo Tasks. The function
350
+ iterates the nodes of the pipeline and generates Argo tasks with dependencies.
351
+ These dependencies are inferred based on the input and output datasets for
352
+ each node.
353
+
354
+ NOTE: This function is now agnostic to the fact that nodes might be fused. The nodes
355
+ returned as part of the pipeline may optionally contain FusedNodes, which have correct
356
+ inputs and outputs for the perspective of the Argo Task.
357
+ """
358
+ tasks = {}
359
+
360
+ # The `grouped_nodes` property returns the nodes list, in a toplogical order,
361
+ # allowing us to easily translate the Kedro DAG to an Argo WF.
362
+ for group in pipeline.grouped_nodes:
363
+ for target_node in group:
364
+ try:
365
+ task = ArgoTask(target_node, machine_types[target_node.machine_type] if isinstance(target_node, ArgoNode) and target_node.machine_type is not None else machine_types[default_machine_type])
366
+ except KeyError as e:
367
+ LOGGER.error(f"Machine type not found for node `{target_node.name}`")
368
+ raise KeyError(f"Machine type `{target_node.machine_type}` not found for node `{target_node.name}`")
369
+
370
+ task.add_parents(
371
+ [
372
+ parent.node
373
+ for parent in tasks.values()
374
+ if set(clean_dependencies(target_node.inputs)) & set(clean_dependencies(parent.node.outputs))
375
+ ]
376
+ )
377
+
378
+ tasks[target_node.name] = task
379
+
380
+ return tasks
381
+
382
+
383
+ def clean_name(name: str) -> str:
384
+ """Function to clean the node name.
385
+
386
+ Args:
387
+ name: name of the node
388
+ Returns:
389
+ Clean node name, according to Argo's requirements
390
+ """
391
+ return re.sub(r"[\W_]+", "-", name).strip("-")
392
+
393
+
394
+ def clean_dependencies(elements) -> List[str]:
395
+ """Function to clean node dependencies.
396
+
397
+ Operates by removing `params:` from the list and dismissing
398
+ the transcoding operator.
399
+ """
400
+ return [el.split("@")[0] for el in elements if not el.startswith("params:")]
@@ -0,0 +1,66 @@
1
+
2
+ import os
3
+ import re
4
+ from logging import Logger, getLogger
5
+ from pathlib import Path
6
+ from tempfile import TemporaryDirectory
7
+ from typing import Any, Union
8
+
9
+ from kedro.config import MissingConfigException
10
+ from kedro.framework.context import KedroContext
11
+ from kedro.framework.hooks import hook_impl
12
+ from kedro.framework.startup import _get_project_metadata
13
+ from kedro.io import CatalogProtocol, DataCatalog
14
+ from kedro.pipeline import Pipeline
15
+ from kedro.pipeline.node import Node
16
+ from omegaconf import OmegaConf
17
+
18
+
19
+ from pydantic import BaseModel
20
+
21
+ class MachineType(BaseModel):
22
+ mem: int
23
+ cpu: int
24
+ num_gpu: int
25
+
26
+ class ArgoConfig(BaseModel):
27
+ namespace: str
28
+ machine_types: dict[str, MachineType]
29
+ default_machine_type: str
30
+
31
+
32
+ class ArgoHook:
33
+ @property
34
+ def _logger(self) -> Logger:
35
+ return getLogger(__name__)
36
+
37
+ @hook_impl
38
+ def after_context_created(
39
+ self,
40
+ context: KedroContext,
41
+ ) -> None:
42
+ """Hooks to be invoked after a `KedroContext` is created. This is the earliest
43
+ hook triggered within a Kedro run. The `KedroContext` stores useful information
44
+ such as `credentials`, `config_loader` and `env`.
45
+ Args:
46
+ context: The context that was created.
47
+ """
48
+ try:
49
+ if "argo" not in context.config_loader.config_patterns.keys():
50
+ context.config_loader.config_patterns.update(
51
+ {"argo": ["argo*", "argo*/**", "**/argo*"]}
52
+ )
53
+ conf_argo_yml = context.config_loader["argo"]
54
+ except MissingConfigException:
55
+ self._logger.warning(
56
+ "No 'argo.yml' config file found in environment. Default configuration will be used. Use ``kedro argo init`` command in CLI to customize the configuration."
57
+ )
58
+ # we create an empty dict to have the same behaviour when the argo.yml
59
+ # is commented out. In this situation there is no MissingConfigException
60
+ # but we got an empty dict
61
+ conf_argo_yml = {}
62
+
63
+ conf_argo_yml = ArgoConfig.model_validate(conf_argo_yml)
64
+ context.__setattr__("argo", conf_argo_yml)
65
+
66
+ argo_hook = ArgoHook()
@@ -0,0 +1,4 @@
1
+ from .fused_pipeline import FusedPipeline
2
+ from .node import ArgoNode
3
+
4
+ __all__ = ["FusedPipeline", "ArgoNode"]
@@ -1,14 +1,16 @@
1
1
  from typing import Iterable, List
2
- from kedro.pipeline import Pipeline, Node
2
+ from kedro.pipeline import Pipeline
3
3
  from functools import cached_property
4
+ from argo_kedro.pipeline.node import ArgoNode
5
+ from kedro.pipeline.node import Node
4
6
 
5
- class FusedNode(Node):
7
+ class FusedNode(ArgoNode):
6
8
  """FusedNode is an extension of Kedro's internal node. The FusedNode
7
9
  wraps a set of nodes, and correctly sets it's `inputs` and `outputs`
8
10
  allowing it to act as a single unit for execution.
9
11
  """
10
12
 
11
- def __init__(self, nodes: List[Node], name: str):
13
+ def __init__(self, nodes: List[Node], name: str, machine_type: str | None = None):
12
14
  self._nodes = nodes
13
15
  self._name = name
14
16
  self._namespace = None
@@ -17,6 +19,7 @@ class FusedNode(Node):
17
19
  self._confirms = []
18
20
  self._func = lambda: None
19
21
  self._tags = []
22
+ self._machine_type = machine_type
20
23
 
21
24
  for node in nodes:
22
25
  self._inputs.extend(node.inputs)
@@ -49,10 +52,12 @@ class FusedPipeline(Pipeline):
49
52
  name: str,
50
53
  *,
51
54
  tags: str | Iterable[str] | None = None,
55
+ machine_type: str | None = None,
52
56
  ):
53
57
  self._name = name
58
+ self._machine_type = machine_type
54
59
  super().__init__(nodes, tags=tags)
55
60
 
56
61
  @property
57
62
  def nodes(self) -> list[Node]:
58
- return [FusedNode(self._nodes, name=self._name)]
63
+ return [FusedNode(self._nodes, name=self._name, machine_type=self._machine_type)]
@@ -0,0 +1,26 @@
1
+ from kedro.pipeline import Node
2
+ from typing import Callable, Iterable
3
+
4
+ class ArgoNode(Node):
5
+ """ArgoNode is an extension of the Kedro node class, aimed at allowing
6
+ the node to be allocated to a specific machine type.
7
+ """
8
+ def __init__(
9
+ self,
10
+ func: Callable,
11
+ inputs: str | list[str] | dict[str, str] | None,
12
+ outputs: str | list[str] | dict[str, str] | None,
13
+ *,
14
+ name: str | None = None,
15
+ machine_type: str | None = None,
16
+ tags: str | Iterable[str] | None = None,
17
+ confirms: str | list[str] | None = None,
18
+ namespace: str | None = None,
19
+ ):
20
+
21
+ super().__init__(func, inputs, outputs, name=name, tags=tags, confirms=confirms, namespace=namespace)
22
+ self._machine_type = machine_type
23
+
24
+ @property
25
+ def machine_type(self) -> str:
26
+ return self._machine_type
@@ -0,0 +1,9 @@
1
+ namespace: argo-workflows
2
+
3
+ machine_types:
4
+ default:
5
+ mem: 16
6
+ cpu: 4
7
+ num_gpu: 0
8
+
9
+ default_machine_type: default
@@ -18,6 +18,25 @@ spec:
18
18
  parameters:
19
19
  - name: pipeline
20
20
  - name: kedro_nodes
21
+ podSpecPatch: |
22
+ containers:
23
+ - name: main
24
+ # Add tolerations for large memory nodes and GPU nodes
25
+ resources:
26
+ requests:
27
+ memory: {% raw %} "{{inputs.parameters.mem}}Gi"
28
+ {% endraw %}
29
+ cpu: {% raw %} "{{inputs.parameters.cpu}}"
30
+ {% endraw %}
31
+ nvidia.com/gpu: {% raw %} "{{inputs.parameters.num_gpu}}"
32
+ {% endraw %}
33
+ limits:
34
+ memory: {% raw %} "{{inputs.parameters.mem}}Gi"
35
+ {% endraw %}
36
+ cpu: {% raw %} "{{inputs.parameters.cpu}}"
37
+ {% endraw %}
38
+ nvidia.com/gpu: {% raw %} "{{inputs.parameters.num_gpu}}"
39
+ {% endraw %}
21
40
  container:
22
41
  image: {{ image }}
23
42
  command: ["kedro"]
@@ -49,5 +68,10 @@ spec:
49
68
  value: {{ pipeline_name }}
50
69
  - name: kedro_nodes
51
70
  value: {{ task.nodes }}
52
-
71
+ - name: num_gpu
72
+ value: {{ task.num_gpu }}
73
+ - name: mem
74
+ value: {{ task.mem }}
75
+ - name: cpu
76
+ value: {{ task.cpu }}
53
77
  {% endfor %}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: argo-kedro
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: Kedro plugin for running pipelines on Argo Workflows
5
5
  Author-email: Laurens Vijnck <laurens@everycure.org>, Nelson Alfonso <nelson@everycure.org>
6
6
  License: MIT
@@ -26,6 +26,7 @@ Requires-Dist: kedro
26
26
  Requires-Dist: pyyaml>=6.0.2
27
27
  Requires-Dist: jinja2>=3.0.0
28
28
  Requires-Dist: kubernetes>=35.0.0
29
+ Requires-Dist: pydantic>=2.0.0
29
30
  Dynamic: license-file
30
31
 
31
32
  # argo-kedro
@@ -15,8 +15,16 @@ argo_kedro.egg-info/top_level.txt
15
15
  argo_kedro/framework/__init__.py
16
16
  argo_kedro/framework/cli/__init__.py
17
17
  argo_kedro/framework/cli/cli.py
18
+ argo_kedro/framework/hooks/argo_hook.py
18
19
  argo_kedro/pipeline/__init__.py
19
20
  argo_kedro/pipeline/fused_pipeline.py
21
+ argo_kedro/pipeline/node.py
20
22
  argo_kedro/runners/__init__.py
21
23
  argo_kedro/runners/fuse_runner.py
22
- argo_kedro/templates/argo_wf_spec.tmpl
24
+ argo_kedro/templates/argo.yml
25
+ argo_kedro/templates/argo_wf_spec.tmpl
26
+ tests/__init__.py
27
+ tests/cli/__init__.py
28
+ tests/cli/test_cli.py
29
+ tests/pipeline/__init__.py
30
+ tests/pipeline/test_fused_pipeline.py
@@ -1,5 +1,8 @@
1
1
  [kedro.global_commands]
2
2
  run = argo_kedro.framework.cli.cli:cli
3
3
 
4
+ [kedro.hooks]
5
+ argo_hook = argo_kedro.framework.hooks.argo_hook:argo_hook
6
+
4
7
  [kedro.project_commands]
5
8
  argo = argo_kedro.framework.cli.cli:commands
@@ -2,3 +2,4 @@ kedro
2
2
  pyyaml>=6.0.2
3
3
  jinja2>=3.0.0
4
4
  kubernetes>=35.0.0
5
+ pydantic>=2.0.0
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "argo-kedro"
8
- version = "0.1.5"
8
+ version = "0.1.7"
9
9
  description = "Kedro plugin for running pipelines on Argo Workflows"
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.10"
@@ -33,6 +33,7 @@ dependencies = [
33
33
  "pyyaml>=6.0.2",
34
34
  "jinja2>=3.0.0",
35
35
  "kubernetes>=35.0.0",
36
+ "pydantic>=2.0.0",
36
37
  ]
37
38
 
38
39
  [project.urls]
@@ -46,6 +47,9 @@ run = "argo_kedro.framework.cli.cli:cli"
46
47
  [project.entry-points."kedro.project_commands"]
47
48
  argo = "argo_kedro.framework.cli.cli:commands"
48
49
 
50
+ [project.entry-points."kedro.hooks"]
51
+ argo_hook = "argo_kedro.framework.hooks.argo_hook:argo_hook"
52
+
49
53
  [tool.setuptools]
50
54
  include-package-data = true
51
55
 
@@ -53,4 +57,4 @@ include-package-data = true
53
57
  include = ["argo_kedro*"]
54
58
 
55
59
  [tool.setuptools.package-data]
56
- argo_kedro = ["**/*.tmpl"]
60
+ argo_kedro = ["**/*.tmpl", "**/*.yml"]
File without changes
File without changes
@@ -0,0 +1,206 @@
1
+ import pytest
2
+
3
+ from kedro.pipeline import Pipeline, node
4
+ from argo_kedro.pipeline import FusedPipeline, ArgoNode
5
+ from argo_kedro.framework.cli.cli import get_argo_dag, MachineType
6
+
7
+ @pytest.fixture
8
+ def machine_types() -> dict[str, MachineType]:
9
+ return {
10
+ "default": MachineType(mem=16, cpu=2, num_gpu=0),
11
+ "n1-standard-4": MachineType(mem=16, cpu=4, num_gpu=0),
12
+ "n1-standard-8": MachineType(mem=16, cpu=8, num_gpu=0),
13
+ }
14
+
15
+ @pytest.fixture
16
+ def default_machine_type() -> str:
17
+ return "default"
18
+
19
+ @pytest.fixture
20
+ def pipeline() -> Pipeline:
21
+ return Pipeline(
22
+ [
23
+ ArgoNode(
24
+ func=lambda x: x,
25
+ inputs="raw_data",
26
+ outputs="data",
27
+ tags=["preprocessing"],
28
+ name="preprocess_fun",
29
+ machine_type="n1-standard-4",
30
+ ),
31
+ ArgoNode(
32
+ func=lambda x: x,
33
+ inputs="data",
34
+ outputs="model",
35
+ tags=["training"],
36
+ name="train_fun",
37
+ machine_type="n1-standard-8",
38
+ ),
39
+ ]
40
+ )
41
+
42
+ @pytest.fixture
43
+ def fused_pipeline() -> Pipeline:
44
+ return Pipeline(
45
+ [
46
+ node(
47
+ func=lambda x: x,
48
+ inputs="raw_data",
49
+ outputs="data",
50
+ tags=["preprocessing"],
51
+ name="preprocess_fun",
52
+ ),
53
+ FusedPipeline(
54
+ [
55
+ node(
56
+ func=lambda x: x,
57
+ inputs="data",
58
+ outputs="model",
59
+ tags=["training"],
60
+ name="train_fun",
61
+ ),
62
+ node(
63
+ func=lambda x: x,
64
+ inputs="model",
65
+ outputs="predictions",
66
+ tags=["predictions"],
67
+ name="create_predictions",
68
+ ),
69
+ ],
70
+ name="fused_modelling",
71
+ machine_type="n1-standard-8",
72
+ ),
73
+ ]
74
+ )
75
+
76
+
77
+ @pytest.fixture
78
+ def fused_pipeline_complex() -> Pipeline:
79
+ return Pipeline(
80
+ [
81
+ node(
82
+ func=lambda x: x,
83
+ inputs="raw_data",
84
+ outputs="data",
85
+ tags=["preprocessing"],
86
+ name="preprocess_fun",
87
+ ),
88
+ node(
89
+ func=lambda x: x,
90
+ inputs="raw_customers",
91
+ outputs="customers",
92
+ tags=["preprocessing"],
93
+ name="preprocess_customers",
94
+ ),
95
+ FusedPipeline(
96
+ [
97
+ node(
98
+ func=lambda x: x,
99
+ inputs="data",
100
+ outputs="model",
101
+ tags=["training"],
102
+ name="train_fun",
103
+ ),
104
+ node(
105
+ func=lambda x, y: x,
106
+ inputs=["model", "customers"],
107
+ outputs="predictions",
108
+ tags=["predictions"],
109
+ name="create_predictions",
110
+ ),
111
+ ],
112
+ name="fused_modelling",
113
+ machine_type="n1-standard-8",
114
+ ),
115
+ ]
116
+ )
117
+
118
+
119
+ def test_get_argo_dag(pipeline: Pipeline, machine_types: dict[str, MachineType], default_machine_type: str):
120
+
121
+ # When generating the argo DAG
122
+ argo_dag = get_argo_dag(pipeline, machine_types, default_machine_type)
123
+ expected = {
124
+ "preprocess_fun": {
125
+ "name": "preprocess-fun",
126
+ "nodes": "preprocess_fun",
127
+ "deps": [],
128
+ "mem": 16,
129
+ "cpu": 4,
130
+ "num_gpu": 0,
131
+ },
132
+ "train_fun": {
133
+ "name": "train-fun",
134
+ "nodes": "train_fun",
135
+ "deps": ["preprocess-fun"],
136
+ "mem": 16,
137
+ "cpu": 8,
138
+ "num_gpu": 0,
139
+ }
140
+ }
141
+
142
+ # Assert resulting argo dag is correct
143
+ assert {key: task.to_dict() for key,task in argo_dag.items()} == expected
144
+
145
+
146
+ def test_get_argo_dag_fused(fused_pipeline: Pipeline, machine_types: dict[str, MachineType], default_machine_type: str):
147
+
148
+ # When generating the argo DAG
149
+ argo_dag = get_argo_dag(fused_pipeline, machine_types, default_machine_type)
150
+ expected = {
151
+ "preprocess_fun": {
152
+ "name": "preprocess-fun",
153
+ "nodes": "preprocess_fun",
154
+ "deps": [],
155
+ "mem": 16,
156
+ "cpu": 2,
157
+ "num_gpu": 0,
158
+ },
159
+ "fused_modelling": {
160
+ "name": "fused-modelling",
161
+ "nodes": "fused_modelling",
162
+ "deps": ["preprocess-fun"],
163
+ "mem": 16,
164
+ "cpu": 8,
165
+ "num_gpu": 0,
166
+ }
167
+ }
168
+
169
+ # Assert resulting argo dag is correct
170
+ assert {key: task.to_dict() for key,task in argo_dag.items()} == expected
171
+
172
+
173
+ def test_get_argo_dag_fused_complex(fused_pipeline_complex: Pipeline, machine_types: dict[str, MachineType], default_machine_type: str):
174
+
175
+ # When generating the argo DAG
176
+ argo_dag = get_argo_dag(fused_pipeline_complex, machine_types, default_machine_type)
177
+ expected = {
178
+ "preprocess_fun": {
179
+ "name": "preprocess-fun",
180
+ "nodes": "preprocess_fun",
181
+ "deps": [],
182
+ "mem": 16,
183
+ "cpu": 2,
184
+ "num_gpu": 0,
185
+ },
186
+ "preprocess_customers": {
187
+ "name": "preprocess-customers",
188
+ "nodes": "preprocess_customers",
189
+ "deps": [],
190
+ "mem": 16,
191
+ "cpu": 2,
192
+ "num_gpu": 0,
193
+ },
194
+ "fused_modelling": {
195
+ "name": "fused-modelling",
196
+ "nodes": "fused_modelling",
197
+ "deps": ["preprocess-customers", "preprocess-fun"],
198
+ "mem": 16,
199
+ "cpu": 8,
200
+ "num_gpu": 0,
201
+ }
202
+ }
203
+
204
+ # Assert resulting argo dag is correct
205
+ assert {key: task.to_dict() for key,task in argo_dag.items()} == expected
206
+
File without changes
@@ -0,0 +1,45 @@
1
+ import pytest
2
+
3
+ from kedro.pipeline import Pipeline, node
4
+ from argo_kedro.pipeline.fused_pipeline import FusedPipeline, FusedNode
5
+
6
+ @pytest.fixture
7
+ def pipeline() -> Pipeline:
8
+ return Pipeline(
9
+ [
10
+ node(
11
+ func=lambda x: x,
12
+ inputs="raw_data",
13
+ outputs="data",
14
+ tags=["preprocessing"],
15
+ name="preprocess_fun",
16
+ ),
17
+ node(
18
+ func=lambda x: x,
19
+ inputs="data",
20
+ outputs="model",
21
+ tags=["training"],
22
+ name="train_fun",
23
+ ),
24
+ ]
25
+ )
26
+
27
+
28
+ def test_fused_node_inputs(pipeline: Pipeline):
29
+
30
+ # Wrap pipeline in FusedNode
31
+ fused_node = FusedNode(pipeline.nodes, name="fused_node")
32
+
33
+ # Assert that the fused node inputs are the pure inputs of the pipeline, i.e.,
34
+ # all inputs not produced as part of intermediate nodes.
35
+ assert set(fused_node.inputs) == set(["raw_data"])
36
+
37
+
38
+ def test_fused_pipeline_nodes(pipeline: Pipeline):
39
+
40
+ # Wrap pipeline in FusedPipeline
41
+ fused_pipeline = FusedPipeline(pipeline.nodes, name="fused_pipeline")
42
+
43
+ # Assert that the fused pipeline nodes are the same as the pipeline nodes
44
+ assert len(fused_pipeline.nodes) == 1
45
+ assert isinstance(fused_pipeline.nodes[0], FusedNode)
@@ -356,6 +356,26 @@ wheels = [
356
356
  ]
357
357
 
358
358
  [[package]]
359
+ <<<<<<< HEAD:kedro-argo/uv.lock
360
+ name = "kedro-argo"
361
+ version = "0.1.0"
362
+ source = { editable = "." }
363
+ dependencies = [
364
+ { name = "jinja2" },
365
+ { name = "kedro" },
366
+ { name = "pyyaml" },
367
+ ]
368
+
369
+ [package.metadata]
370
+ requires-dist = [
371
+ { name = "jinja2", specifier = ">=3.0.0" },
372
+ { name = "kedro" },
373
+ { name = "pyyaml", specifier = ">=6.0.2" },
374
+ ]
375
+
376
+ [[package]]
377
+ =======
378
+ >>>>>>> refs/remotes/origin/main:argo-kedro/uv.lock
359
379
  name = "kedro-telemetry"
360
380
  version = "0.6.5"
361
381
  source = { registry = "https://pypi.org/simple" }
@@ -820,6 +840,8 @@ wheels = [
820
840
  ]
821
841
 
822
842
  [[package]]
843
+ <<<<<<< HEAD:kedro-argo/uv.lock
844
+ =======
823
845
  name = "websocket-client"
824
846
  version = "1.9.0"
825
847
  source = { registry = "https://pypi.org/simple" }
@@ -829,6 +851,7 @@ wheels = [
829
851
  ]
830
852
 
831
853
  [[package]]
854
+ >>>>>>> refs/remotes/origin/main:argo-kedro/uv.lock
832
855
  name = "zipp"
833
856
  version = "3.23.0"
834
857
  source = { registry = "https://pypi.org/simple" }
@@ -1,242 +0,0 @@
1
- import re
2
- from pathlib import Path
3
- from typing import Any, Dict, List, Iterable
4
- from logging import getLogger
5
-
6
- import click
7
- import yaml
8
- from kubernetes import config
9
- from kubernetes.dynamic import DynamicClient
10
- from jinja2 import Environment, FileSystemLoader
11
- from kedro.framework.cli.utils import CONTEXT_SETTINGS, KedroCliError
12
- from kedro.framework.session import KedroSession
13
- from kedro.framework.cli.project import (
14
- ASYNC_ARG_HELP,
15
- CONF_SOURCE_HELP,
16
- FROM_INPUTS_HELP,
17
- FROM_NODES_HELP,
18
- LOAD_VERSION_HELP,
19
- NODE_ARG_HELP,
20
- PARAMS_ARG_HELP,
21
- PIPELINE_ARG_HELP,
22
- RUNNER_ARG_HELP,
23
- TAG_ARG_HELP,
24
- TO_NODES_HELP,
25
- TO_OUTPUTS_HELP,
26
- project_group,
27
- )
28
- from kedro.framework.project import pipelines as kedro_pipelines
29
- from kedro.pipeline import Pipeline
30
- from kedro.pipeline.node import Node
31
- from kedro.runner.sequential_runner import SequentialRunner
32
- from argo_kedro.runners.fuse_runner import FusedRunner
33
-
34
- LOGGER = getLogger(__name__)
35
- ARGO_TEMPLATES_DIR_PATH = Path(__file__).parent.parent.parent / "templates"
36
-
37
-
38
- @click.group(context_settings=CONTEXT_SETTINGS)
39
- def cli():
40
- pass
41
-
42
- @cli.command(name="run")
43
- @click.option("--pipeline", "-p", type=str, default="__default__", help="Name of the pipeline to execute")
44
- @click.option("--env", "-e", type=str, default=None, help="Kedro environment to run the pipeline in")
45
- @click.option("--config", "-c", type=str, multiple=True, help="Extra config to pass to KedroContext")
46
- @click.option("--params", type=str, multiple=True, help="Override parameters")
47
- @click.option("--tags", "-t", type=str, multiple=True, help=TAG_ARG_HELP)
48
- @click.option("--nodes", "-n", type=str, multiple=True, help="Run only nodes with specified names")
49
- @click.option("--to-nodes", type=str, multiple=True, help="Run a sub-pipeline up to certain nodes")
50
- @click.option("--from-nodes", type=str, multiple=True, help="Run a sub-pipeline starting from certain nodes")
51
- @click.option("--from-inputs", type=str, multiple=True, help="Run a sub-pipeline starting from nodes that produce these inputs")
52
- @click.option("--to-outputs", type=str, multiple=True, help="Run a sub-pipeline up to nodes that produce these outputs")
53
- @click.option("--load-version", type=str, multiple=True, help="Specify a particular dataset version")
54
- @click.option("--namespaces", type=str, multiple=True, help="Namespaces of the pipeline")
55
- @click.pass_obj
56
- def _run_command_impl(
57
- ctx,
58
- pipeline: str,
59
- env: str,
60
- config: tuple,
61
- params: tuple,
62
- tags: list[str],
63
- nodes: tuple,
64
- to_nodes: tuple,
65
- from_nodes: tuple,
66
- from_inputs: tuple,
67
- to_outputs: tuple,
68
- load_version: tuple,
69
- namespaces: Iterable[str],
70
- ):
71
- """Run the pipeline with the FusedRunner."""
72
-
73
- LOGGER.warning(f"Using plugin entrypoint")
74
-
75
- load_versions = None
76
- if load_version:
77
- load_versions = {}
78
- for version_spec in load_version:
79
- if ":" in version_spec:
80
- dataset, version = version_spec.split(":", 1)
81
- load_versions[dataset] = version
82
-
83
- conf_source = getattr(ctx, "conf_source", None)
84
- env_value = env or getattr(ctx, "env", None)
85
-
86
- with KedroSession.create(
87
- env=env_value,
88
- conf_source=conf_source,
89
- ) as session:
90
-
91
- session.run(
92
- pipeline_name=pipeline,
93
- tags=tags,
94
- runner=FusedRunner(pipeline_name=pipeline),
95
- node_names=list(nodes) if nodes else None,
96
- from_nodes=list(from_nodes) if from_nodes else None,
97
- to_nodes=list(to_nodes) if to_nodes else None,
98
- from_inputs=list(from_inputs) if from_inputs else None,
99
- to_outputs=list(to_outputs) if to_outputs else None,
100
- load_versions=load_versions,
101
- namespaces=namespaces,
102
- )
103
-
104
- @click.group(name="argo")
105
- def commands():
106
- pass
107
-
108
- @commands.command(name="submit")
109
- @click.option("--pipeline", "-p", type=str, default="__default__", help="Specify which pipeline to execute")
110
- @click.option("--environment", "-e", type=str, default="base", help="Kedro environment to execute in")
111
- @click.option("--image", type=str, required=True, help="Image to execute")
112
- @click.option("--namespace", "-n", type=str, required=True, help="Namespace to execute in")
113
- @click.pass_obj
114
- def submit(
115
- ctx,
116
- pipeline: str,
117
- image: str,
118
- namespace: str,
119
- environment: str
120
- ):
121
- """Submit the pipeline to Argo."""
122
- LOGGER.info("Loading spec template..")
123
-
124
- loader = FileSystemLoader(searchpath=ARGO_TEMPLATES_DIR_PATH)
125
- template_env = Environment(loader=loader, trim_blocks=True, lstrip_blocks=True)
126
- template = template_env.get_template("argo_wf_spec.tmpl")
127
-
128
- pipeline_tasks = get_argo_dag(kedro_pipelines[pipeline])
129
-
130
- LOGGER.info("Rendering Argo spec...")
131
-
132
- # Render the template
133
- rendered_template = template.render(
134
- pipeline_tasks=[task.to_dict() for task in pipeline_tasks.values()],
135
- pipeline_name=pipeline,
136
- image=image,
137
- namespace=namespace,
138
- environment=environment
139
- )
140
-
141
- # Load as yaml
142
- yaml_data = yaml.safe_load(rendered_template)
143
- yaml_without_anchors = yaml.dump(yaml_data, sort_keys=False, default_flow_style=False)
144
- save_argo_template(
145
- yaml_without_anchors,
146
- )
147
-
148
- # Use kubeconfig to submit to kubernetes
149
- config.load_kube_config()
150
- client = DynamicClient(config.new_client_from_config())
151
-
152
- resource = client.resources.get(
153
- api_version=yaml_data["apiVersion"],
154
- kind=yaml_data["kind"],
155
- )
156
-
157
- resource.create(
158
- body=yaml_data,
159
- namespace=namespace
160
- )
161
-
162
-
163
- def save_argo_template(argo_template: str) -> str:
164
- file_path = Path("templates") / "argo-workflow-template.yml"
165
- with open(file_path, "w") as f:
166
- f.write(argo_template)
167
- return str(file_path)
168
-
169
-
170
- class ArgoTask:
171
- """Class to model an Argo task.
172
-
173
- Argo's operating model slightly differs from Kedro's, i.e., while Kedro uses dataset
174
- dependecies to model relationships, Argo uses task dependencies."""
175
-
176
- def __init__(self, node: Node):
177
- self._node = node
178
- self._parents = []
179
-
180
- @property
181
- def node(self):
182
- return self._node
183
-
184
- def add_parents(self, nodes: List[Node]):
185
- self._parents.extend(nodes)
186
-
187
- def to_dict(self):
188
- return {
189
- "name": clean_name(self._node.name),
190
- "nodes": self._node.name,
191
- "deps": [clean_name(parent.name) for parent in sorted(self._parents)],
192
- }
193
-
194
-
195
- def get_argo_dag(pipeline: Pipeline) -> List[Dict[str, Any]]:
196
- """Function to convert the Kedro pipeline into Argo Tasks. The function
197
- iterates the nodes of the pipeline and generates Argo tasks with dependencies.
198
- These dependencies are inferred based on the input and output datasets for
199
- each node.
200
-
201
- NOTE: This function is now agnostic to the fact that nodes might be fused. The nodes
202
- returned as part of the pipeline may optionally contain FusedNodes, which have correct
203
- inputs and outputs for the perspective of the Argo Task.
204
- """
205
- tasks = {}
206
-
207
- # The `grouped_nodes` property returns the nodes list, in a toplogical order,
208
- # allowing us to easily translate the Kedro DAG to an Argo WF.
209
- for group in pipeline.grouped_nodes:
210
- for target_node in group:
211
- task = ArgoTask(target_node)
212
- task.add_parents(
213
- [
214
- parent.node
215
- for parent in tasks.values()
216
- if set(clean_dependencies(target_node.inputs)) & set(clean_dependencies(parent.node.outputs))
217
- ]
218
- )
219
-
220
- tasks[target_node.name] = task
221
-
222
- return tasks
223
-
224
-
225
- def clean_name(name: str) -> str:
226
- """Function to clean the node name.
227
-
228
- Args:
229
- name: name of the node
230
- Returns:
231
- Clean node name, according to Argo's requirements
232
- """
233
- return re.sub(r"[\W_]+", "-", name).strip("-")
234
-
235
-
236
- def clean_dependencies(elements) -> List[str]:
237
- """Function to clean node dependencies.
238
-
239
- Operates by removing `params:` from the list and dismissing
240
- the transcoding operator.
241
- """
242
- return [el.split("@")[0] for el in elements if not el.startswith("params:")]
@@ -1,3 +0,0 @@
1
- from .fused_pipeline import FusedPipeline
2
-
3
- __all__ = ["FusedPipeline", ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes