ob-metaflow 2.12.30.2__py2.py3-none-any.whl → 2.13.6.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (96) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cards.py +1 -0
  3. metaflow/cli.py +185 -717
  4. metaflow/cli_args.py +17 -0
  5. metaflow/cli_components/__init__.py +0 -0
  6. metaflow/cli_components/dump_cmd.py +96 -0
  7. metaflow/cli_components/init_cmd.py +51 -0
  8. metaflow/cli_components/run_cmds.py +362 -0
  9. metaflow/cli_components/step_cmd.py +176 -0
  10. metaflow/cli_components/utils.py +140 -0
  11. metaflow/cmd/develop/stub_generator.py +9 -2
  12. metaflow/datastore/flow_datastore.py +2 -2
  13. metaflow/decorators.py +63 -2
  14. metaflow/exception.py +8 -2
  15. metaflow/extension_support/plugins.py +42 -27
  16. metaflow/flowspec.py +176 -23
  17. metaflow/graph.py +28 -27
  18. metaflow/includefile.py +50 -22
  19. metaflow/lint.py +35 -20
  20. metaflow/metadata_provider/heartbeat.py +23 -8
  21. metaflow/metaflow_config.py +10 -1
  22. metaflow/multicore_utils.py +31 -14
  23. metaflow/package.py +17 -3
  24. metaflow/parameters.py +97 -25
  25. metaflow/plugins/__init__.py +22 -0
  26. metaflow/plugins/airflow/airflow.py +18 -17
  27. metaflow/plugins/airflow/airflow_cli.py +1 -0
  28. metaflow/plugins/argo/argo_client.py +0 -2
  29. metaflow/plugins/argo/argo_workflows.py +195 -132
  30. metaflow/plugins/argo/argo_workflows_cli.py +1 -1
  31. metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
  32. metaflow/plugins/argo/argo_workflows_deployer_objects.py +51 -9
  33. metaflow/plugins/argo/jobset_input_paths.py +0 -1
  34. metaflow/plugins/aws/aws_utils.py +6 -1
  35. metaflow/plugins/aws/batch/batch_client.py +1 -3
  36. metaflow/plugins/aws/batch/batch_decorator.py +13 -13
  37. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  38. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  39. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  40. metaflow/plugins/aws/step_functions/step_functions.py +33 -1
  41. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
  42. metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
  43. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +7 -9
  44. metaflow/plugins/cards/card_cli.py +7 -2
  45. metaflow/plugins/cards/card_creator.py +1 -0
  46. metaflow/plugins/cards/card_decorator.py +79 -8
  47. metaflow/plugins/cards/card_modules/basic.py +56 -5
  48. metaflow/plugins/cards/card_modules/card.py +16 -1
  49. metaflow/plugins/cards/card_modules/components.py +64 -16
  50. metaflow/plugins/cards/card_modules/main.js +27 -25
  51. metaflow/plugins/cards/card_modules/test_cards.py +4 -4
  52. metaflow/plugins/cards/component_serializer.py +1 -1
  53. metaflow/plugins/datatools/s3/s3.py +12 -4
  54. metaflow/plugins/datatools/s3/s3op.py +3 -3
  55. metaflow/plugins/events_decorator.py +338 -186
  56. metaflow/plugins/kubernetes/kube_utils.py +84 -1
  57. metaflow/plugins/kubernetes/kubernetes.py +40 -92
  58. metaflow/plugins/kubernetes/kubernetes_cli.py +32 -7
  59. metaflow/plugins/kubernetes/kubernetes_decorator.py +76 -4
  60. metaflow/plugins/kubernetes/kubernetes_job.py +23 -20
  61. metaflow/plugins/kubernetes/kubernetes_jobsets.py +41 -20
  62. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  63. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  64. metaflow/plugins/parallel_decorator.py +4 -1
  65. metaflow/plugins/project_decorator.py +33 -5
  66. metaflow/plugins/pypi/bootstrap.py +249 -81
  67. metaflow/plugins/pypi/conda_decorator.py +20 -10
  68. metaflow/plugins/pypi/conda_environment.py +83 -27
  69. metaflow/plugins/pypi/micromamba.py +82 -37
  70. metaflow/plugins/pypi/pip.py +9 -6
  71. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  72. metaflow/plugins/pypi/utils.py +4 -2
  73. metaflow/plugins/timeout_decorator.py +2 -2
  74. metaflow/runner/click_api.py +240 -50
  75. metaflow/runner/deployer.py +1 -1
  76. metaflow/runner/deployer_impl.py +12 -11
  77. metaflow/runner/metaflow_runner.py +68 -34
  78. metaflow/runner/nbdeploy.py +2 -0
  79. metaflow/runner/nbrun.py +1 -1
  80. metaflow/runner/subprocess_manager.py +61 -10
  81. metaflow/runner/utils.py +208 -44
  82. metaflow/runtime.py +216 -112
  83. metaflow/sidecar/sidecar_worker.py +1 -1
  84. metaflow/tracing/tracing_modules.py +4 -1
  85. metaflow/user_configs/__init__.py +0 -0
  86. metaflow/user_configs/config_decorators.py +563 -0
  87. metaflow/user_configs/config_options.py +548 -0
  88. metaflow/user_configs/config_parameters.py +436 -0
  89. metaflow/util.py +22 -0
  90. metaflow/version.py +1 -1
  91. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/METADATA +12 -3
  92. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/RECORD +96 -84
  93. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/WHEEL +1 -1
  94. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/LICENSE +0 -0
  95. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/entry_points.txt +0 -0
  96. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/top_level.txt +0 -0
@@ -7,12 +7,11 @@ import sys
7
7
  from collections import defaultdict
8
8
  from hashlib import sha1
9
9
  from math import inf
10
- from typing import List, Tuple
11
10
 
12
11
  from metaflow import JSONType, current
13
12
  from metaflow.decorators import flow_decorators
14
13
  from metaflow.exception import MetaflowException
15
- from metaflow.graph import DAGNode, FlowGraph
14
+ from metaflow.graph import FlowGraph
16
15
  from metaflow.includefile import FilePathClass
17
16
  from metaflow.metaflow_config import (
18
17
  ARGO_EVENTS_EVENT,
@@ -39,9 +38,7 @@ from metaflow.metaflow_config import (
39
38
  DEFAULT_SECRETS_BACKEND_TYPE,
40
39
  GCP_SECRET_MANAGER_PREFIX,
41
40
  KUBERNETES_FETCH_EC2_METADATA,
42
- KUBERNETES_LABELS,
43
41
  KUBERNETES_NAMESPACE,
44
- KUBERNETES_NODE_SELECTOR,
45
42
  KUBERNETES_SANDBOX_INIT_SCRIPT,
46
43
  KUBERNETES_SECRETS,
47
44
  S3_ENDPOINT_URL,
@@ -54,12 +51,11 @@ from metaflow.metaflow_config import (
54
51
  from metaflow.metaflow_config_funcs import config_values, init_config
55
52
  from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
56
53
  from metaflow.parameters import deploy_time_eval
57
- from metaflow.plugins.kubernetes.kubernetes import (
58
- parse_kube_keyvalue_list,
59
- validate_kube_labels,
60
- )
54
+ from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
55
+
61
56
  from metaflow.plugins.kubernetes.kubernetes_jobsets import KubernetesArgoJobSet
62
57
  from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
58
+ from metaflow.user_configs.config_options import ConfigInput
63
59
  from metaflow.util import (
64
60
  compress_list,
65
61
  dict_to_cli_options,
@@ -168,10 +164,12 @@ class ArgoWorkflows(object):
168
164
  self.enable_heartbeat_daemon = enable_heartbeat_daemon
169
165
  self.enable_error_msg_capture = enable_error_msg_capture
170
166
  self.parameters = self._process_parameters()
167
+ self.config_parameters = self._process_config_parameters()
171
168
  self.triggers, self.trigger_options = self._process_triggers()
172
169
  self._schedule, self._timezone = self._get_schedule()
173
170
 
174
- self.kubernetes_labels = self._get_kubernetes_labels()
171
+ self._base_labels = self._base_kubernetes_labels()
172
+ self._base_annotations = self._base_kubernetes_annotations()
175
173
  self._workflow_template = self._compile_workflow_template()
176
174
  self._sensor = self._compile_sensor()
177
175
 
@@ -308,7 +306,7 @@ class ArgoWorkflows(object):
308
306
  try:
309
307
  # Check that the workflow was deployed through Metaflow
310
308
  workflow_template["metadata"]["annotations"]["metaflow/owner"]
311
- except KeyError as e:
309
+ except KeyError:
312
310
  raise ArgoWorkflowsException(
313
311
  "An existing non-metaflow workflow with the same name as "
314
312
  "*%s* already exists in Argo Workflows. \nPlease modify the "
@@ -322,18 +320,42 @@ class ArgoWorkflows(object):
322
320
  except Exception as e:
323
321
  raise ArgoWorkflowsException(str(e))
324
322
 
325
- @staticmethod
326
- def _get_kubernetes_labels():
323
+ def _base_kubernetes_labels(self):
327
324
  """
328
- Get Kubernetes labels from environment variable.
329
- Parses the string into a dict and validates that values adhere to Kubernetes restrictions.
325
+ Get shared Kubernetes labels for Argo resources.
330
326
  """
331
- if not KUBERNETES_LABELS:
332
- return {}
333
- env_labels = KUBERNETES_LABELS.split(",")
334
- env_labels = parse_kube_keyvalue_list(env_labels, False)
335
- validate_kube_labels(env_labels)
336
- return env_labels
327
+ # TODO: Add configuration through an environment variable or Metaflow config in the future if required.
328
+ labels = {"app.kubernetes.io/part-of": "metaflow"}
329
+
330
+ return labels
331
+
332
+ def _base_kubernetes_annotations(self):
333
+ """
334
+ Get shared Kubernetes annotations for Argo resources.
335
+ """
336
+ from datetime import datetime, timezone
337
+
338
+ # TODO: Add configuration through an environment variable or Metaflow config in the future if required.
339
+ # base annotations
340
+ annotations = {
341
+ "metaflow/production_token": self.production_token,
342
+ "metaflow/owner": self.username,
343
+ "metaflow/user": "argo-workflows",
344
+ "metaflow/flow_name": self.flow.name,
345
+ "metaflow/deployment_timestamp": str(
346
+ datetime.now(timezone.utc).isoformat()
347
+ ),
348
+ }
349
+
350
+ if current.get("project_name"):
351
+ annotations.update(
352
+ {
353
+ "metaflow/project_name": current.project_name,
354
+ "metaflow/branch_name": current.branch_name,
355
+ "metaflow/project_flow_name": current.project_flow_name,
356
+ }
357
+ )
358
+ return annotations
337
359
 
338
360
  def _get_schedule(self):
339
361
  schedule = self.flow._flow_decorators.get("schedule")
@@ -409,7 +431,7 @@ class ArgoWorkflows(object):
409
431
  "metaflow/production_token"
410
432
  ],
411
433
  )
412
- except KeyError as e:
434
+ except KeyError:
413
435
  raise ArgoWorkflowsException(
414
436
  "An existing non-metaflow workflow with the same name as "
415
437
  "*%s* already exists in Argo Workflows. \nPlease modify the "
@@ -455,6 +477,10 @@ class ArgoWorkflows(object):
455
477
  "case-insensitive." % param.name
456
478
  )
457
479
  seen.add(norm)
480
+ # NOTE: We skip config parameters as these do not have dynamic values,
481
+ # and need to be treated differently.
482
+ if param.IS_CONFIG_PARAMETER:
483
+ continue
458
484
 
459
485
  extra_attrs = {}
460
486
  if param.kwargs.get("type") == JSONType:
@@ -488,6 +514,7 @@ class ArgoWorkflows(object):
488
514
  # execution - which needs to be fixed imminently.
489
515
  if not is_required or default_value is not None:
490
516
  default_value = json.dumps(default_value)
517
+
491
518
  parameters[param.name] = dict(
492
519
  name=param.name,
493
520
  value=default_value,
@@ -498,6 +525,27 @@ class ArgoWorkflows(object):
498
525
  )
499
526
  return parameters
500
527
 
528
+ def _process_config_parameters(self):
529
+ parameters = []
530
+ seen = set()
531
+ for var, param in self.flow._get_parameters():
532
+ if not param.IS_CONFIG_PARAMETER:
533
+ continue
534
+ # Throw an exception if the parameter is specified twice.
535
+ norm = param.name.lower()
536
+ if norm in seen:
537
+ raise MetaflowException(
538
+ "Parameter *%s* is specified twice. "
539
+ "Note that parameter names are "
540
+ "case-insensitive." % param.name
541
+ )
542
+ seen.add(norm)
543
+
544
+ parameters.append(
545
+ dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
546
+ )
547
+ return parameters
548
+
501
549
  def _process_triggers(self):
502
550
  # Impute triggers for Argo Workflow Template specified through @trigger and
503
551
  # @trigger_on_finish decorators
@@ -520,10 +568,17 @@ class ArgoWorkflows(object):
520
568
  # convert them to lower case since Metaflow parameters are case
521
569
  # insensitive.
522
570
  seen = set()
571
+ # NOTE: We skip config parameters as their values can not be set through event payloads
523
572
  params = set(
524
- [param.name.lower() for var, param in self.flow._get_parameters()]
573
+ [
574
+ param.name.lower()
575
+ for var, param in self.flow._get_parameters()
576
+ if not param.IS_CONFIG_PARAMETER
577
+ ]
525
578
  )
526
- for event in self.flow._flow_decorators.get("trigger")[0].triggers:
579
+ trigger_deco = self.flow._flow_decorators.get("trigger")[0]
580
+ trigger_deco.format_deploytime_value()
581
+ for event in trigger_deco.triggers:
527
582
  parameters = {}
528
583
  # TODO: Add a check to guard against names starting with numerals(?)
529
584
  if not re.match(r"^[A-Za-z0-9_.-]+$", event["name"]):
@@ -563,11 +618,23 @@ class ArgoWorkflows(object):
563
618
 
564
619
  # @trigger_on_finish decorator
565
620
  if self.flow._flow_decorators.get("trigger_on_finish"):
566
- for event in self.flow._flow_decorators.get("trigger_on_finish")[
567
- 0
568
- ].triggers:
621
+ trigger_on_finish_deco = self.flow._flow_decorators.get(
622
+ "trigger_on_finish"
623
+ )[0]
624
+ trigger_on_finish_deco.format_deploytime_value()
625
+ for event in trigger_on_finish_deco.triggers:
569
626
  # Actual filters are deduced here since we don't have access to
570
627
  # the current object in the @trigger_on_finish decorator.
628
+ project_name = event.get("project") or current.get("project_name")
629
+ branch_name = event.get("branch") or current.get("branch_name")
630
+ # validate that we have complete project info for an event name
631
+ if project_name or branch_name:
632
+ if not (project_name and branch_name):
633
+ # if one of the two is missing, we would end up listening to an event that will never be broadcast.
634
+ raise ArgoWorkflowsException(
635
+ "Incomplete project info. Please specify both 'project' and 'project_branch' or use the @project decorator"
636
+ )
637
+
571
638
  triggers.append(
572
639
  {
573
640
  # Make sure this remains consistent with the event name format
@@ -576,18 +643,16 @@ class ArgoWorkflows(object):
576
643
  % ".".join(
577
644
  v
578
645
  for v in [
579
- event.get("project") or current.get("project_name"),
580
- event.get("branch") or current.get("branch_name"),
646
+ project_name,
647
+ branch_name,
581
648
  event["flow"],
582
649
  ]
583
650
  if v
584
651
  ),
585
652
  "filters": {
586
653
  "auto-generated-by-metaflow": True,
587
- "project_name": event.get("project")
588
- or current.get("project_name"),
589
- "branch_name": event.get("branch")
590
- or current.get("branch_name"),
654
+ "project_name": project_name,
655
+ "branch_name": branch_name,
591
656
  # TODO: Add a time filters to guard against cached events
592
657
  },
593
658
  "type": "run",
@@ -639,18 +704,7 @@ class ArgoWorkflows(object):
639
704
  # generate container templates at the top level (in WorkflowSpec) and maintain
640
705
  # references to them within the DAGTask.
641
706
 
642
- from datetime import datetime, timezone
643
-
644
- annotations = {
645
- "metaflow/production_token": self.production_token,
646
- "metaflow/owner": self.username,
647
- "metaflow/user": "argo-workflows",
648
- "metaflow/flow_name": self.flow.name,
649
- "metaflow/deployment_timestamp": str(
650
- datetime.now(timezone.utc).isoformat()
651
- ),
652
- }
653
-
707
+ annotations = {}
654
708
  if self._schedule is not None:
655
709
  # timezone is an optional field and json dumps on None will result in null
656
710
  # hence configuring it to an empty string
@@ -662,15 +716,6 @@ class ArgoWorkflows(object):
662
716
  if self.parameters:
663
717
  annotations.update({"metaflow/parameters": json.dumps(self.parameters)})
664
718
 
665
- if current.get("project_name"):
666
- annotations.update(
667
- {
668
- "metaflow/project_name": current.project_name,
669
- "metaflow/branch_name": current.branch_name,
670
- "metaflow/project_flow_name": current.project_flow_name,
671
- }
672
- )
673
-
674
719
  # Some more annotations to populate the Argo UI nicely
675
720
  if self.tags:
676
721
  annotations.update({"metaflow/tags": json.dumps(self.tags)})
@@ -707,6 +752,17 @@ class ArgoWorkflows(object):
707
752
  )
708
753
  }
709
754
  )
755
+ try:
756
+ # Build the DAG based on the DAGNodes given by the FlowGraph for the found FlowSpec class.
757
+ _steps_info, graph_structure = self.graph.output_steps()
758
+ graph_info = {
759
+ # for the time being, we only need the graph_structure. Being mindful of annotation size limits we do not include anything extra.
760
+ "graph_structure": graph_structure
761
+ }
762
+ except Exception:
763
+ graph_info = None
764
+
765
+ dag_annotation = {"metaflow/dag": json.dumps(graph_info)}
710
766
 
711
767
  return (
712
768
  WorkflowTemplate()
@@ -718,9 +774,11 @@ class ArgoWorkflows(object):
718
774
  # is released, we should be able to support multi-namespace /
719
775
  # multi-cluster scheduling.
720
776
  .namespace(KUBERNETES_NAMESPACE)
721
- .label("app.kubernetes.io/name", "metaflow-flow")
722
- .label("app.kubernetes.io/part-of", "metaflow")
723
777
  .annotations(annotations)
778
+ .annotations(self._base_annotations)
779
+ .labels(self._base_labels)
780
+ .label("app.kubernetes.io/name", "metaflow-flow")
781
+ .annotations(dag_annotation)
724
782
  )
725
783
  .spec(
726
784
  WorkflowSpec()
@@ -750,10 +808,14 @@ class ArgoWorkflows(object):
750
808
  # Set workflow metadata
751
809
  .workflow_metadata(
752
810
  Metadata()
811
+ .labels(self._base_labels)
753
812
  .label("app.kubernetes.io/name", "metaflow-run")
754
- .label("app.kubernetes.io/part-of", "metaflow")
755
813
  .annotations(
756
- {**annotations, **{"metaflow/run_id": "argo-{{workflow.name}}"}}
814
+ {
815
+ **annotations,
816
+ **self._base_annotations,
817
+ **{"metaflow/run_id": "argo-{{workflow.name}}"},
818
+ }
757
819
  )
758
820
  # TODO: Set dynamic labels using labels_from. Ideally, we would
759
821
  # want to expose run_id as a label. It's easy to add labels,
@@ -786,10 +848,17 @@ class ArgoWorkflows(object):
786
848
  # Set common pod metadata.
787
849
  .pod_metadata(
788
850
  Metadata()
851
+ .labels(self._base_labels)
789
852
  .label("app.kubernetes.io/name", "metaflow-task")
790
- .label("app.kubernetes.io/part-of", "metaflow")
791
- .annotations(annotations)
792
- .labels(self.kubernetes_labels)
853
+ .annotations(
854
+ {
855
+ **annotations,
856
+ **self._base_annotations,
857
+ **{
858
+ "metaflow/run_id": "argo-{{workflow.name}}"
859
+ }, # we want pods of the workflow to have the run_id as an annotation as well
860
+ }
861
+ )
793
862
  )
794
863
  # Set the entrypoint to flow name
795
864
  .entrypoint(self.flow.name)
@@ -1652,6 +1721,7 @@ class ArgoWorkflows(object):
1652
1721
  },
1653
1722
  **{
1654
1723
  # Some optional values for bookkeeping
1724
+ "METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0]),
1655
1725
  "METAFLOW_FLOW_NAME": self.flow.name,
1656
1726
  "METAFLOW_STEP_NAME": node.name,
1657
1727
  "METAFLOW_RUN_ID": run_id,
@@ -1716,6 +1786,13 @@ class ArgoWorkflows(object):
1716
1786
  metaflow_version["production_token"] = self.production_token
1717
1787
  env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
1718
1788
 
1789
+ # map config values
1790
+ cfg_env = {
1791
+ param["name"]: param["kv_name"] for param in self.config_parameters
1792
+ }
1793
+ if cfg_env:
1794
+ env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
1795
+
1719
1796
  # Set the template inputs and outputs for passing state. Very simply,
1720
1797
  # the container template takes in input-paths as input and outputs
1721
1798
  # the task-id (which feeds in as input-paths to the subsequent task).
@@ -1824,7 +1901,7 @@ class ArgoWorkflows(object):
1824
1901
 
1825
1902
  # get initial configs
1826
1903
  initial_configs = init_config()
1827
- for entry in ["OBP_PERIMETER", "OBP_INTEGRATIONS_SECRETS_METADATA_URL"]:
1904
+ for entry in ["OBP_PERIMETER", "OBP_INTEGRATIONS_URL"]:
1828
1905
  if entry not in initial_configs:
1829
1906
  raise ArgoWorkflowsException(
1830
1907
  f"{entry} was not found in metaflow config. Please make sure to run `outerbounds configure <...>` command which can be found on the Ourebounds UI or reach out to your Outerbounds support team."
@@ -1832,8 +1909,8 @@ class ArgoWorkflows(object):
1832
1909
 
1833
1910
  additional_obp_configs = {
1834
1911
  "OBP_PERIMETER": initial_configs["OBP_PERIMETER"],
1835
- "OBP_INTEGRATIONS_SECRETS_METADATA_URL": initial_configs[
1836
- "OBP_INTEGRATIONS_SECRETS_METADATA_URL"
1912
+ "OBP_INTEGRATIONS_URL": initial_configs[
1913
+ "OBP_INTEGRATIONS_URL"
1837
1914
  ],
1838
1915
  }
1839
1916
 
@@ -1854,20 +1931,19 @@ class ArgoWorkflows(object):
1854
1931
  if tmpfs_enabled and tmpfs_tempdir:
1855
1932
  env["METAFLOW_TEMPDIR"] = tmpfs_path
1856
1933
 
1934
+ qos_requests, qos_limits = qos_requests_and_limits(
1935
+ resources["qos"],
1936
+ resources["cpu"],
1937
+ resources["memory"],
1938
+ resources["disk"],
1939
+ )
1940
+
1857
1941
  # Create a ContainerTemplate for this node. Ideally, we would have
1858
1942
  # liked to inline this ContainerTemplate and avoid scanning the workflow
1859
1943
  # twice, but due to issues with variable substitution, we will have to
1860
1944
  # live with this routine.
1861
1945
  if node.parallel_step:
1862
- # Explicitly add the task-id-hint label. This is important because this label
1863
- # is returned as an Output parameter of this step and is used subsequently as an
1864
- # an input in the join step.
1865
- kubernetes_labels = self.kubernetes_labels.copy()
1866
1946
  jobset_name = "{{inputs.parameters.jobset-name}}"
1867
- kubernetes_labels["task_id_entropy"] = (
1868
- "{{inputs.parameters.task-id-entropy}}"
1869
- )
1870
- kubernetes_labels["num_parallel"] = "{{inputs.parameters.num-parallel}}"
1871
1947
  jobset = KubernetesArgoJobSet(
1872
1948
  kubernetes_sdk=kubernetes_sdk,
1873
1949
  name=jobset_name,
@@ -1917,13 +1993,28 @@ class ArgoWorkflows(object):
1917
1993
  persistent_volume_claims=resources["persistent_volume_claims"],
1918
1994
  shared_memory=shared_memory,
1919
1995
  port=port,
1996
+ qos=resources["qos"],
1920
1997
  )
1921
1998
 
1922
1999
  for k, v in env.items():
1923
2000
  jobset.environment_variable(k, v)
1924
2001
 
1925
- for k, v in kubernetes_labels.items():
1926
- jobset.label(k, v)
2002
+ # Set labels. Do not allow user-specified task labels to override internal ones.
2003
+ #
2004
+ # Explicitly add the task-id-hint label. This is important because this label
2005
+ # is returned as an Output parameter of this step and is used subsequently as an
2006
+ # an input in the join step.
2007
+ kubernetes_labels = {
2008
+ "task_id_entropy": "{{inputs.parameters.task-id-entropy}}",
2009
+ "num_parallel": "{{inputs.parameters.num-parallel}}",
2010
+ }
2011
+ jobset.labels(
2012
+ {
2013
+ **resources["labels"],
2014
+ **self._base_labels,
2015
+ **kubernetes_labels,
2016
+ }
2017
+ )
1927
2018
 
1928
2019
  jobset.environment_variable(
1929
2020
  "MF_MASTER_ADDR", jobset.jobset_control_addr
@@ -1952,27 +2043,23 @@ class ArgoWorkflows(object):
1952
2043
  "TASK_ID_SUFFIX": "metadata.annotations['jobset.sigs.k8s.io/job-index']",
1953
2044
  }
1954
2045
  )
2046
+
2047
+ # Set annotations. Do not allow user-specified task-specific annotations to override internal ones.
1955
2048
  annotations = {
1956
2049
  # setting annotations explicitly as they wont be
1957
2050
  # passed down from WorkflowTemplate level
1958
2051
  "metaflow/step_name": node.name,
1959
2052
  "metaflow/attempt": str(retry_count),
1960
2053
  "metaflow/run_id": run_id,
1961
- "metaflow/production_token": self.production_token,
1962
- "metaflow/owner": self.username,
1963
- "metaflow/user": "argo-workflows",
1964
- "metaflow/flow_name": self.flow.name,
1965
2054
  }
1966
- if current.get("project_name"):
1967
- annotations.update(
1968
- {
1969
- "metaflow/project_name": current.project_name,
1970
- "metaflow/branch_name": current.branch_name,
1971
- "metaflow/project_flow_name": current.project_flow_name,
1972
- }
1973
- )
1974
- for k, v in annotations.items():
1975
- jobset.annotation(k, v)
2055
+
2056
+ jobset.annotations(
2057
+ {
2058
+ **resources["annotations"],
2059
+ **self._base_annotations,
2060
+ **annotations,
2061
+ }
2062
+ )
1976
2063
 
1977
2064
  jobset.control.replicas(1)
1978
2065
  jobset.worker.replicas("{{=asInt(inputs.parameters.workerCount)}}")
@@ -2029,13 +2116,16 @@ class ArgoWorkflows(object):
2029
2116
  minutes_between_retries=minutes_between_retries,
2030
2117
  )
2031
2118
  .metadata(
2032
- ObjectMeta().annotation("metaflow/step_name", node.name)
2119
+ ObjectMeta()
2120
+ .annotation("metaflow/step_name", node.name)
2033
2121
  # Unfortunately, we can't set the task_id since it is generated
2034
2122
  # inside the pod. However, it can be inferred from the annotation
2035
2123
  # set by argo-workflows - `workflows.argoproj.io/outputs` - refer
2036
2124
  # the field 'task-id' in 'parameters'
2037
2125
  # .annotation("metaflow/task_id", ...)
2038
2126
  .annotation("metaflow/attempt", retry_count)
2127
+ .annotations(resources["annotations"])
2128
+ .labels(resources["labels"])
2039
2129
  )
2040
2130
  # Set emptyDir volume for state management
2041
2131
  .empty_dir_volume("out")
@@ -2109,17 +2199,17 @@ class ArgoWorkflows(object):
2109
2199
  image=resources["image"],
2110
2200
  image_pull_policy=resources["image_pull_policy"],
2111
2201
  resources=kubernetes_sdk.V1ResourceRequirements(
2112
- requests={
2113
- "cpu": str(resources["cpu"]),
2114
- "memory": "%sM" % str(resources["memory"]),
2115
- "ephemeral-storage": "%sM"
2116
- % str(resources["disk"]),
2117
- },
2202
+ requests=qos_requests,
2118
2203
  limits={
2119
- "%s.com/gpu".lower()
2120
- % resources["gpu_vendor"]: str(resources["gpu"])
2121
- for k in [0]
2122
- if resources["gpu"] is not None
2204
+ **qos_limits,
2205
+ **{
2206
+ "%s.com/gpu".lower()
2207
+ % resources["gpu_vendor"]: str(
2208
+ resources["gpu"]
2209
+ )
2210
+ for k in [0]
2211
+ if resources["gpu"] is not None
2212
+ },
2123
2213
  },
2124
2214
  ),
2125
2215
  # Configure secrets
@@ -2356,7 +2446,7 @@ class ArgoWorkflows(object):
2356
2446
  "memory": "500Mi",
2357
2447
  },
2358
2448
  ),
2359
- )
2449
+ ).to_dict()
2360
2450
  )
2361
2451
  ),
2362
2452
  Template("capture-error-hook-fn-preflight").steps(
@@ -2715,7 +2805,7 @@ class ArgoWorkflows(object):
2715
2805
  },
2716
2806
  ),
2717
2807
  )
2718
- )
2808
+ ).to_dict()
2719
2809
  )
2720
2810
  )
2721
2811
 
@@ -2818,33 +2908,6 @@ class ArgoWorkflows(object):
2818
2908
  "sdk (https://pypi.org/project/kubernetes/) first."
2819
2909
  )
2820
2910
 
2821
- labels = {"app.kubernetes.io/part-of": "metaflow"}
2822
-
2823
- annotations = {
2824
- "metaflow/production_token": self.production_token,
2825
- "metaflow/owner": self.username,
2826
- "metaflow/user": "argo-workflows",
2827
- "metaflow/flow_name": self.flow.name,
2828
- }
2829
- if current.get("project_name"):
2830
- annotations.update(
2831
- {
2832
- "metaflow/project_name": current.project_name,
2833
- "metaflow/branch_name": current.branch_name,
2834
- "metaflow/project_flow_name": current.project_flow_name,
2835
- }
2836
- )
2837
-
2838
- # Useful to paint the UI
2839
- trigger_annotations = {
2840
- "metaflow/triggered_by": json.dumps(
2841
- [
2842
- {key: trigger.get(key) for key in ["name", "type"]}
2843
- for trigger in self.triggers
2844
- ]
2845
- )
2846
- }
2847
-
2848
2911
  return (
2849
2912
  Sensor()
2850
2913
  .metadata(
@@ -2852,10 +2915,9 @@ class ArgoWorkflows(object):
2852
2915
  ObjectMeta()
2853
2916
  .name(ArgoWorkflows._sensor_name(self.name))
2854
2917
  .namespace(KUBERNETES_NAMESPACE)
2918
+ .labels(self._base_labels)
2855
2919
  .label("app.kubernetes.io/name", "metaflow-sensor")
2856
- .label("app.kubernetes.io/part-of", "metaflow")
2857
- .labels(self.kubernetes_labels)
2858
- .annotations(annotations)
2920
+ .annotations(self._base_annotations)
2859
2921
  )
2860
2922
  .spec(
2861
2923
  SensorSpec().template(
@@ -2865,7 +2927,7 @@ class ArgoWorkflows(object):
2865
2927
  ObjectMeta()
2866
2928
  .label("app.kubernetes.io/name", "metaflow-sensor")
2867
2929
  .label("app.kubernetes.io/part-of", "metaflow")
2868
- .annotations(annotations)
2930
+ .annotations(self._base_annotations)
2869
2931
  )
2870
2932
  .container(
2871
2933
  # Run sensor in guaranteed QoS. The sensor isn't doing a lot
@@ -2885,7 +2947,7 @@ class ArgoWorkflows(object):
2885
2947
  "memory": "250Mi",
2886
2948
  },
2887
2949
  ),
2888
- )
2950
+ ).to_dict()
2889
2951
  )
2890
2952
  )
2891
2953
  .service_account_name(ARGO_EVENTS_SERVICE_ACCOUNT)
@@ -2912,6 +2974,7 @@ class ArgoWorkflows(object):
2912
2974
  "metadata": {
2913
2975
  "generateName": "%s-" % self.name,
2914
2976
  "namespace": KUBERNETES_NAMESPACE,
2977
+ # Useful to paint the UI
2915
2978
  "annotations": {
2916
2979
  "metaflow/triggered_by": json.dumps(
2917
2980
  [
@@ -8,7 +8,6 @@ from time import sleep
8
8
 
9
9
  from metaflow import JSONType, Run, current, decorators, parameters
10
10
  from metaflow._vendor import click
11
- from metaflow.client.core import get_metadata
12
11
  from metaflow.exception import (
13
12
  MetaflowException,
14
13
  MetaflowInternalError,
@@ -470,6 +469,7 @@ def make_flow(
470
469
  decorators._attach_decorators(
471
470
  obj.flow, [KubernetesDecorator.name, EnvironmentDecorator.name]
472
471
  )
472
+ decorators._init(obj.flow)
473
473
 
474
474
  decorators._init_step_decorators(
475
475
  obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
@@ -1,14 +1,12 @@
1
1
  import json
2
2
  import os
3
- import time
4
3
 
5
4
 
6
5
  from metaflow import current
7
6
  from metaflow.decorators import StepDecorator
8
7
  from metaflow.events import Trigger
9
8
  from metaflow.metadata_provider import MetaDatum
10
- from metaflow.metaflow_config import ARGO_EVENTS_WEBHOOK_URL
11
- from metaflow.graph import DAGNode, FlowGraph
9
+ from metaflow.graph import FlowGraph
12
10
  from metaflow.flowspec import FlowSpec
13
11
  from .argo_events import ArgoEvent
14
12
 
@@ -42,7 +40,7 @@ class ArgoWorkflowsInternalDecorator(StepDecorator):
42
40
  if payload != "null": # Argo-Workflow's None
43
41
  try:
44
42
  payload = json.loads(payload)
45
- except (TypeError, ValueError) as e:
43
+ except (TypeError, ValueError):
46
44
  # There could be arbitrary events that Metaflow doesn't know of
47
45
  payload = {}
48
46
  triggers.append(