ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. metaflow/__init__.py +10 -3
  2. metaflow/_vendor/imghdr/__init__.py +186 -0
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cards.py +4 -0
  21. metaflow/cli.py +125 -21
  22. metaflow/cli_components/init_cmd.py +1 -0
  23. metaflow/cli_components/run_cmds.py +204 -40
  24. metaflow/cli_components/step_cmd.py +160 -4
  25. metaflow/client/__init__.py +1 -0
  26. metaflow/client/core.py +198 -130
  27. metaflow/client/filecache.py +59 -32
  28. metaflow/cmd/code/__init__.py +2 -1
  29. metaflow/cmd/develop/stub_generator.py +49 -18
  30. metaflow/cmd/develop/stubs.py +9 -27
  31. metaflow/cmd/make_wrapper.py +30 -0
  32. metaflow/datastore/__init__.py +1 -0
  33. metaflow/datastore/content_addressed_store.py +40 -9
  34. metaflow/datastore/datastore_set.py +10 -1
  35. metaflow/datastore/flow_datastore.py +124 -4
  36. metaflow/datastore/spin_datastore.py +91 -0
  37. metaflow/datastore/task_datastore.py +92 -6
  38. metaflow/debug.py +5 -0
  39. metaflow/decorators.py +331 -82
  40. metaflow/extension_support/__init__.py +414 -356
  41. metaflow/extension_support/_empty_file.py +2 -2
  42. metaflow/flowspec.py +322 -82
  43. metaflow/graph.py +178 -15
  44. metaflow/includefile.py +25 -3
  45. metaflow/lint.py +94 -3
  46. metaflow/meta_files.py +13 -0
  47. metaflow/metadata_provider/metadata.py +13 -2
  48. metaflow/metaflow_config.py +66 -4
  49. metaflow/metaflow_environment.py +91 -25
  50. metaflow/metaflow_profile.py +18 -0
  51. metaflow/metaflow_version.py +16 -1
  52. metaflow/package/__init__.py +673 -0
  53. metaflow/packaging_sys/__init__.py +880 -0
  54. metaflow/packaging_sys/backend.py +128 -0
  55. metaflow/packaging_sys/distribution_support.py +153 -0
  56. metaflow/packaging_sys/tar_backend.py +99 -0
  57. metaflow/packaging_sys/utils.py +54 -0
  58. metaflow/packaging_sys/v1.py +527 -0
  59. metaflow/parameters.py +6 -2
  60. metaflow/plugins/__init__.py +6 -0
  61. metaflow/plugins/airflow/airflow.py +11 -1
  62. metaflow/plugins/airflow/airflow_cli.py +16 -5
  63. metaflow/plugins/argo/argo_client.py +42 -20
  64. metaflow/plugins/argo/argo_events.py +6 -6
  65. metaflow/plugins/argo/argo_workflows.py +1023 -344
  66. metaflow/plugins/argo/argo_workflows_cli.py +396 -94
  67. metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
  68. metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
  69. metaflow/plugins/argo/capture_error.py +5 -2
  70. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  71. metaflow/plugins/argo/exit_hooks.py +209 -0
  72. metaflow/plugins/argo/param_val.py +19 -0
  73. metaflow/plugins/aws/aws_client.py +6 -0
  74. metaflow/plugins/aws/aws_utils.py +33 -1
  75. metaflow/plugins/aws/batch/batch.py +72 -5
  76. metaflow/plugins/aws/batch/batch_cli.py +24 -3
  77. metaflow/plugins/aws/batch/batch_decorator.py +57 -6
  78. metaflow/plugins/aws/step_functions/step_functions.py +28 -3
  79. metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
  80. metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
  81. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
  82. metaflow/plugins/cards/card_cli.py +20 -1
  83. metaflow/plugins/cards/card_creator.py +24 -1
  84. metaflow/plugins/cards/card_datastore.py +21 -49
  85. metaflow/plugins/cards/card_decorator.py +58 -6
  86. metaflow/plugins/cards/card_modules/basic.py +38 -9
  87. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  88. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  89. metaflow/plugins/cards/card_modules/components.py +592 -3
  90. metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
  91. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  92. metaflow/plugins/cards/card_modules/main.css +1 -0
  93. metaflow/plugins/cards/card_modules/main.js +56 -41
  94. metaflow/plugins/cards/card_modules/test_cards.py +22 -6
  95. metaflow/plugins/cards/component_serializer.py +1 -8
  96. metaflow/plugins/cards/metadata.py +22 -0
  97. metaflow/plugins/catch_decorator.py +9 -0
  98. metaflow/plugins/datastores/local_storage.py +12 -6
  99. metaflow/plugins/datastores/spin_storage.py +12 -0
  100. metaflow/plugins/datatools/s3/s3.py +49 -17
  101. metaflow/plugins/datatools/s3/s3op.py +113 -66
  102. metaflow/plugins/env_escape/client_modules.py +102 -72
  103. metaflow/plugins/events_decorator.py +127 -121
  104. metaflow/plugins/exit_hook/__init__.py +0 -0
  105. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  106. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  107. metaflow/plugins/kubernetes/kubernetes.py +12 -1
  108. metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
  109. metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
  110. metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
  111. metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
  112. metaflow/plugins/metadata_providers/local.py +76 -82
  113. metaflow/plugins/metadata_providers/service.py +13 -9
  114. metaflow/plugins/metadata_providers/spin.py +16 -0
  115. metaflow/plugins/package_cli.py +36 -24
  116. metaflow/plugins/parallel_decorator.py +11 -2
  117. metaflow/plugins/parsers.py +16 -0
  118. metaflow/plugins/pypi/bootstrap.py +7 -1
  119. metaflow/plugins/pypi/conda_decorator.py +41 -82
  120. metaflow/plugins/pypi/conda_environment.py +14 -6
  121. metaflow/plugins/pypi/micromamba.py +9 -1
  122. metaflow/plugins/pypi/pip.py +41 -5
  123. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  124. metaflow/plugins/pypi/utils.py +22 -0
  125. metaflow/plugins/secrets/__init__.py +3 -0
  126. metaflow/plugins/secrets/secrets_decorator.py +14 -178
  127. metaflow/plugins/secrets/secrets_func.py +49 -0
  128. metaflow/plugins/secrets/secrets_spec.py +101 -0
  129. metaflow/plugins/secrets/utils.py +74 -0
  130. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  131. metaflow/plugins/timeout_decorator.py +0 -1
  132. metaflow/plugins/uv/bootstrap.py +29 -1
  133. metaflow/plugins/uv/uv_environment.py +5 -3
  134. metaflow/pylint_wrapper.py +5 -1
  135. metaflow/runner/click_api.py +79 -26
  136. metaflow/runner/deployer.py +208 -6
  137. metaflow/runner/deployer_impl.py +32 -12
  138. metaflow/runner/metaflow_runner.py +266 -33
  139. metaflow/runner/subprocess_manager.py +21 -1
  140. metaflow/runner/utils.py +27 -16
  141. metaflow/runtime.py +660 -66
  142. metaflow/task.py +255 -26
  143. metaflow/user_configs/config_options.py +33 -21
  144. metaflow/user_configs/config_parameters.py +220 -58
  145. metaflow/user_decorators/__init__.py +0 -0
  146. metaflow/user_decorators/common.py +144 -0
  147. metaflow/user_decorators/mutable_flow.py +512 -0
  148. metaflow/user_decorators/mutable_step.py +424 -0
  149. metaflow/user_decorators/user_flow_decorator.py +264 -0
  150. metaflow/user_decorators/user_step_decorator.py +749 -0
  151. metaflow/util.py +197 -7
  152. metaflow/vendor.py +23 -7
  153. metaflow/version.py +1 -1
  154. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
  155. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
  156. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
  157. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
  158. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
  159. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  160. metaflow/_vendor/v3_5/__init__.py +0 -1
  161. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  162. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  163. metaflow/_vendor/v3_5/zipp.py +0 -329
  164. metaflow/info_file.py +0 -25
  165. metaflow/package.py +0 -203
  166. metaflow/user_configs/config_decorators.py +0 -568
  167. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
  168. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
  169. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@ from metaflow.exception import (
15
15
  )
16
16
  from metaflow.metaflow_config import (
17
17
  ARGO_WORKFLOWS_UI_URL,
18
+ FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
18
19
  KUBERNETES_NAMESPACE,
19
20
  SERVICE_VERSION_CHECK,
20
21
  UI_URL,
@@ -32,9 +33,11 @@ from metaflow.plugins.kubernetes.kubernetes_decorator import KubernetesDecorator
32
33
  from metaflow.tagging_util import validate_tags
33
34
  from metaflow.util import get_username, to_bytes, to_unicode, version_parse
34
35
 
35
- from .argo_workflows import ArgoWorkflows
36
+ from .argo_workflows import ArgoWorkflows, ArgoWorkflowsException
36
37
 
37
- VALID_NAME = re.compile(r"^[a-z0-9]([a-z0-9\.\-]*[a-z0-9])?$")
38
+ NEW_ARGO_NAMELENGTH_METAFLOW_VERSION = "2.17"
39
+
40
+ VALID_NAME = re.compile(r"^[a-z]([a-z0-9\.\-]*[a-z0-9])?$")
38
41
 
39
42
  unsupported_decorators = {
40
43
  "snowpark": "Step *%s* is marked for execution on Snowpark with Argo Workflows which isn't currently supported.",
@@ -85,7 +88,16 @@ def argo_workflows(obj, name=None):
85
88
  obj.workflow_name,
86
89
  obj.token_prefix,
87
90
  obj.is_project,
88
- ) = resolve_workflow_name(obj, name)
91
+ obj._is_workflow_name_modified,
92
+ obj._exception_on_create, # exception_on_create is used to prevent deploying new flows with too long names via --name
93
+ ) = resolve_workflow_name_v2(obj, name)
94
+ # Backward compatibility for Metaflow versions <=2.16 because of
95
+ # change in name length restrictions in Argo Workflows from 253 to 52
96
+ # characters.
97
+ (
98
+ obj._v1_workflow_name,
99
+ obj._v1_is_workflow_name_modified,
100
+ ) = resolve_workflow_name_v1(obj, name)
89
101
 
90
102
 
91
103
  @argo_workflows.command(help="Deploy a new version of this workflow to Argo Workflows.")
@@ -215,6 +227,18 @@ def argo_workflows(obj, name=None):
215
227
  show_default=True,
216
228
  help="Capture stack trace of first failed task in exit hook.",
217
229
  )
230
+ @click.option(
231
+ "--workflow-title",
232
+ default=None,
233
+ type=str,
234
+ help="Custom title for the workflow displayed in Argo Workflows UI. Defaults to `project_flow_name`. Supports markdown formatting.",
235
+ )
236
+ @click.option(
237
+ "--workflow-description",
238
+ default=None,
239
+ type=str,
240
+ help="Custom description for the workflow displayed in Argo Workflows UI. Defaults to the flow's docstring if available. Supports markdown formatting and multi-line text.",
241
+ )
218
242
  @click.pass_obj
219
243
  def create(
220
244
  obj,
@@ -236,9 +260,16 @@ def create(
236
260
  incident_io_alert_source_config_id=None,
237
261
  incident_io_metadata=None,
238
262
  enable_heartbeat_daemon=True,
263
+ workflow_title=None,
264
+ workflow_description=None,
239
265
  deployer_attribute_file=None,
240
266
  enable_error_msg_capture=False,
241
267
  ):
268
+ # check if we are supposed to block deploying the flow due to name length constraints.
269
+ if obj._exception_on_create is not None:
270
+ raise obj._exception_on_create
271
+
272
+ # TODO: Remove this once we have a proper validator system in place
242
273
  for node in obj.graph:
243
274
  for decorator, error_message in unsupported_decorators.items():
244
275
  if any([d.name == decorator for d in node.decorators]):
@@ -257,7 +288,7 @@ def create(
257
288
  f,
258
289
  )
259
290
 
260
- obj.echo("Deploying *%s* to Argo Workflows..." % obj.workflow_name, bold=True)
291
+ obj.echo("Deploying *%s* to Argo Workflows..." % obj.flow.name, bold=True)
261
292
 
262
293
  if SERVICE_VERSION_CHECK:
263
294
  # TODO: Consider dispelling with this check since it's been 2 years since the
@@ -295,6 +326,8 @@ def create(
295
326
  incident_io_metadata,
296
327
  enable_heartbeat_daemon,
297
328
  enable_error_msg_capture,
329
+ workflow_title,
330
+ workflow_description,
298
331
  )
299
332
 
300
333
  if only_json:
@@ -304,7 +337,7 @@ def create(
304
337
  flow.deploy()
305
338
  obj.echo(
306
339
  "Workflow *{workflow_name}* "
307
- "for flow *{name}* pushed to "
340
+ "for flow *{name}* deployed to "
308
341
  "Argo Workflows successfully.\n".format(
309
342
  workflow_name=obj.workflow_name, name=current.flow_name
310
343
  ),
@@ -313,8 +346,40 @@ def create(
313
346
  if obj._is_workflow_name_modified:
314
347
  obj.echo(
315
348
  "Note that the flow was deployed with a modified name "
316
- "due to Kubernetes naming conventions\non Argo Workflows. The "
317
- "original flow name is stored in the workflow annotation.\n"
349
+ "due to Kubernetes naming conventions on Argo Workflows. The "
350
+ "original flow name is stored in the workflow annotations.\n",
351
+ wrap=True,
352
+ )
353
+
354
+ if obj.workflow_name != obj._v1_workflow_name:
355
+ # Delete the old workflow if it exists
356
+ try:
357
+ ArgoWorkflows.delete(obj._v1_workflow_name)
358
+ obj.echo("Important!", bold=True, nl=False)
359
+ obj.echo(
360
+ " To comply with new naming restrictions on Argo "
361
+ "Workflows, this deployment replaced the previously "
362
+ "deployed workflow {v1_workflow_name}.\n".format(
363
+ v1_workflow_name=obj._v1_workflow_name
364
+ ),
365
+ wrap=True,
366
+ )
367
+ except ArgoWorkflowsException as e:
368
+ # TODO: Catch a more specific exception
369
+ pass
370
+
371
+ obj.echo("Warning! ", bold=True, nl=False)
372
+ obj.echo(
373
+ "Due to new naming restrictions on Argo Workflows, "
374
+ "re-deploying this flow with older versions of Metaflow (<{version}) "
375
+ "will result in the flow being deployed with a different name -\n"
376
+ "*{v1_workflow_name}* without replacing the version you just deployed. "
377
+ "This may result in duplicate executions of this flow. To avoid this issue, "
378
+ "always deploy this flow using Metaflow ≥{version} or specify the flow name with --name.".format(
379
+ v1_workflow_name=obj._v1_workflow_name,
380
+ version=NEW_ARGO_NAMELENGTH_METAFLOW_VERSION,
381
+ ),
382
+ wrap=True,
318
383
  )
319
384
 
320
385
  if ARGO_WORKFLOWS_UI_URL:
@@ -344,20 +409,20 @@ def create(
344
409
 
345
410
 
346
411
  def check_python_version(obj):
347
- # argo-workflows integration for Metaflow isn't supported for Py versions below 3.5.
412
+ # argo-workflows integration for Metaflow isn't supported for Py versions below 3.6.
348
413
  # This constraint can very well be lifted if desired.
349
- if sys.version_info < (3, 5):
414
+ if sys.version_info < (3, 6):
350
415
  obj.echo("")
351
416
  obj.echo(
352
417
  "Metaflow doesn't support Argo Workflows for Python %s right now."
353
418
  % platform.python_version()
354
419
  )
355
420
  obj.echo(
356
- "Please upgrade your Python interpreter to version 3.5 (or higher) or "
421
+ "Please upgrade your Python interpreter to version 3.6 (or higher) or "
357
422
  "reach out to us at slack.outerbounds.co for more help."
358
423
  )
359
424
  raise UnsupportedPythonVersion(
360
- "Try again with a more recent version of Python (>=3.5)."
425
+ "Try again with a more recent version of Python (>=3.6)."
361
426
  )
362
427
 
363
428
 
@@ -394,9 +459,108 @@ def check_metadata_service_version(obj):
394
459
  )
395
460
 
396
461
 
397
- def resolve_workflow_name(obj, name):
462
+ # Argo Workflows has a few restrictions on workflow names:
463
+ # - Argo Workflow Template names can't be longer than 253 characters since
464
+ # they follow DNS Subdomain name restrictions.
465
+ # - Argo Workflows stores workflow template names as a label in the workflow
466
+ # template metadata - workflows.argoproj.io/workflow-template, which follows
467
+ # RFC 1123, which is a strict subset of DNS Subdomain names and allows for
468
+ # 63 characters.
469
+ # - Argo Workflows appends a unix timestamp to the workflow name when the workflow
470
+ # is created (-1243856725) from a workflow template deployed as a cron workflow template
471
+ # reducing the number of characters available to 52.
472
+ # - TODO: Check naming restrictions for Argo Events.
473
+
474
+ # In summary -
475
+ # - We truncate the workflow name to 45 characters to leave enough room for future
476
+ # enhancements to the Argo Workflows integration.
477
+ # - We remove any underscores since Argo Workflows doesn't allow them.
478
+ # - We convert the name to lower case.
479
+ # - We remove + and @ as not allowed characters, which can be part of the
480
+ # project branch due to using email addresses as user names.
481
+ # - We append a hash of the workflow name to the end to make it unique.
482
+
483
+ # A complication here is that in previous versions of Metaflow (=<2.16), the limit was a
484
+ # rather lax 253 characters - so we have two issues to contend with:
485
+ # 1. Replacing any equivalent flows deployed using previous versions of Metaflow which
486
+ # adds a bit of complexity to the business logic.
487
+ # 2. Breaking Metaflow users who have multiple versions of Metaflow floating in their
488
+ # organization. Imagine a scenario, where metaflow-v1 (253 chars) deploys the same
489
+ # flow which was previously deployed using the new metaflow-v2 (45 chars) - the user
490
+ # will end up with two workflows templates instead of one since metaflow-v1 has no
491
+ # awareness of the new name truncation logic introduced by metaflow-v2. Unfortunately,
492
+ # there is no way to avoid this scenario - so we will do our best to message to the
493
+ # user to not use an older version of Metaflow to redeploy affected flows.
494
+ # ------------------------------------------------------------------------------------------
495
+ # | metaflow-v1 (253 chars) | metaflow-v2 (45 chars) | Result |
496
+ # ------------------------------------------------------------------------------------------
497
+ # | workflow_name_modified = True | workflow_name_modified = False | Not possible |
498
+ # ------------------------------------------------------------------------------------------
499
+ # | workflow_name_modified = False | workflow_name_modified = True | Messaging needed |
500
+ # ------------------------------------------------------------------------------------------
501
+ # | workflow_name_modified = False | workflow_name_modified = False | No message needed |
502
+ # ------------------------------------------------------------------------------------------
503
+ # | workflow_name_modified = True | workflow_name_modified = True | Messaging needed |
504
+ # ------------------------------------------------------------------------------------------
505
+
506
+
507
+ def resolve_workflow_name_v1(obj, name):
508
+ # models the workflow_name calculation logic in Metaflow versions =<2.16
509
+ # important!! - should stay static including any future bugs
510
+ project = current.get("project_name")
511
+ is_workflow_name_modified = False
512
+ if project:
513
+ if name:
514
+ return None, False # not possible in versions =<2.16
515
+ workflow_name = current.project_flow_name
516
+ if len(workflow_name) > 253:
517
+ name_hash = to_unicode(
518
+ base64.b32encode(sha1(to_bytes(workflow_name)).digest())
519
+ )[:8].lower()
520
+ workflow_name = "%s-%s" % (workflow_name[:242], name_hash)
521
+ is_workflow_name_modified = True
522
+ if not VALID_NAME.search(workflow_name):
523
+ workflow_name = (
524
+ re.compile(r"^[^A-Za-z0-9]+")
525
+ .sub("", workflow_name)
526
+ .replace("_", "")
527
+ .replace("@", "")
528
+ .replace("+", "")
529
+ .lower()
530
+ )
531
+ is_workflow_name_modified = True
532
+ else:
533
+ if name and not VALID_NAME.search(name):
534
+ return None, False # not possible in versions =<2.16
535
+ workflow_name = name if name else current.flow_name
536
+ if len(workflow_name) > 253:
537
+ return None, False # not possible in versions =<2.16
538
+ if not VALID_NAME.search(workflow_name):
539
+ # Note - since the original name sanitization was a surjective
540
+ # mapping, using it here is a bug, but we leave this in
541
+ # place since the usage of v1_workflow_name is to generate
542
+ # historical workflow names, so we need to replicate all
543
+ # the bugs too :'(
544
+
545
+ workflow_name = (
546
+ re.compile(r"^[^A-Za-z0-9]+")
547
+ .sub("", workflow_name)
548
+ .replace("_", "")
549
+ .replace("@", "")
550
+ .replace("+", "")
551
+ .lower()
552
+ )
553
+ is_workflow_name_modified = True
554
+ return workflow_name, is_workflow_name_modified
555
+
556
+
557
+ def resolve_workflow_name_v2(obj, name):
558
+ # current logic for imputing workflow_name
559
+ limit = 45
398
560
  project = current.get("project_name")
399
- obj._is_workflow_name_modified = False
561
+ is_workflow_name_modified = False
562
+ exception_on_create = None
563
+
400
564
  if project:
401
565
  if name:
402
566
  raise MetaflowException(
@@ -409,48 +573,86 @@ def resolve_workflow_name(obj, name):
409
573
  % to_unicode(base64.b32encode(sha1(project_branch).digest()))[:16]
410
574
  )
411
575
  is_project = True
412
- # Argo Workflow names can't be longer than 253 characters, so we truncate
413
- # by default. Also, while project and branch allow for underscores, Argo
414
- # Workflows doesn't (DNS Subdomain names as defined in RFC 1123) - so we will
415
- # remove any underscores as well as convert the name to lower case.
416
- # Also remove + and @ as not allowed characters, which can be part of the
417
- # project branch due to using email addresses as user names.
418
- if len(workflow_name) > 253:
576
+
577
+ if len(workflow_name) > limit:
419
578
  name_hash = to_unicode(
420
579
  base64.b32encode(sha1(to_bytes(workflow_name)).digest())
421
- )[:8].lower()
422
- workflow_name = "%s-%s" % (workflow_name[:242], name_hash)
423
- obj._is_workflow_name_modified = True
424
- if not VALID_NAME.search(workflow_name):
425
- workflow_name = sanitize_for_argo(workflow_name)
426
- obj._is_workflow_name_modified = True
580
+ )[:5].lower()
581
+
582
+ # Generate a meaningful short name
583
+ project_name = project
584
+ branch_name = current.branch_name
585
+ flow_name = current.flow_name
586
+ parts = [project_name, branch_name, flow_name]
587
+ max_name_len = limit - 6
588
+ min_each = 7
589
+ total_len = sum(len(p) for p in parts)
590
+ remaining = max_name_len - 3 * min_each
591
+ extras = [int(remaining * len(p) / total_len) for p in parts]
592
+ while sum(extras) < remaining:
593
+ extras[extras.index(min(extras))] += 1
594
+ budgets = [min_each + e for e in extras]
595
+ proj_budget = budgets[0]
596
+ if len(project_name) <= proj_budget:
597
+ proj_str = project_name
598
+ else:
599
+ h = proj_budget // 2
600
+ t = proj_budget - h
601
+ proj_str = project_name[:h] + project_name[-t:]
602
+ branch_budget = budgets[1]
603
+ branch_str = branch_name[:branch_budget]
604
+ flow_budget = budgets[2]
605
+ if len(flow_name) <= flow_budget:
606
+ flow_str = flow_name
607
+ else:
608
+ h = flow_budget // 2
609
+ t = flow_budget - h
610
+ flow_str = flow_name[:h] + flow_name[-t:]
611
+ descriptive_name = sanitize_for_argo(
612
+ "%s.%s.%s" % (proj_str, branch_str, flow_str)
613
+ )
614
+ workflow_name = "%s-%s" % (descriptive_name, name_hash)
615
+ is_workflow_name_modified = True
427
616
  else:
428
617
  if name and not VALID_NAME.search(name):
429
618
  raise MetaflowException(
430
619
  "Name '%s' contains invalid characters. The "
431
620
  "name must consist of lower case alphanumeric characters, '-' or '.'"
432
- ", and must start and end with an alphanumeric character." % name
621
+ ", and must start with an alphabetic character, "
622
+ "and end with an alphanumeric character." % name
433
623
  )
434
-
435
624
  workflow_name = name if name else current.flow_name
436
625
  token_prefix = workflow_name
437
626
  is_project = False
438
627
 
439
- if len(workflow_name) > 253:
440
- msg = (
441
- "The full name of the workflow:\n*%s*\nis longer than 253 "
628
+ if len(workflow_name) > limit:
629
+ # NOTE: We could have opted for truncating names specified by --name and flow_name
630
+ # as well, but chose to error instead due to the expectation that users would
631
+ # be intentionally explicit in their naming, and truncating these would lose
632
+ # information they intended to encode in the deployment.
633
+ exception_on_create = ArgoWorkflowsNameTooLong(
634
+ "The full name of the workflow:\n*%s*\nis longer than %s "
442
635
  "characters.\n\n"
443
636
  "To deploy this workflow to Argo Workflows, please "
444
637
  "assign a shorter name\nusing the option\n"
445
- "*argo-workflows --name <name> create*." % workflow_name
638
+ "*argo-workflows --name <name> create*." % (name, limit)
446
639
  )
447
- raise ArgoWorkflowsNameTooLong(msg)
448
640
 
449
- if not VALID_NAME.search(workflow_name):
450
- workflow_name = sanitize_for_argo(workflow_name)
451
- obj._is_workflow_name_modified = True
641
+ if not VALID_NAME.search(workflow_name):
642
+ # NOTE: Even though sanitize_for_argo is surjective which can result in collisions,
643
+ # we still use it here since production tokens guard against name collisions
644
+ # and if we made it injective, metaflow 2.17 will result in every deployed
645
+ # flow's name changing, significantly increasing the blast radius of the change.
646
+ workflow_name = sanitize_for_argo(workflow_name)
647
+ is_workflow_name_modified = True
452
648
 
453
- return workflow_name, token_prefix.lower(), is_project
649
+ return (
650
+ workflow_name,
651
+ token_prefix.lower(),
652
+ is_project,
653
+ is_workflow_name_modified,
654
+ exception_on_create,
655
+ )
454
656
 
455
657
 
456
658
  def make_flow(
@@ -472,6 +674,8 @@ def make_flow(
472
674
  incident_io_metadata,
473
675
  enable_heartbeat_daemon,
474
676
  enable_error_msg_capture,
677
+ workflow_title,
678
+ workflow_description,
475
679
  ):
476
680
  # TODO: Make this check less specific to Amazon S3 as we introduce
477
681
  # support for more cloud object stores.
@@ -514,20 +718,32 @@ def make_flow(
514
718
  decorators._init_step_decorators(
515
719
  obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
516
720
  )
721
+ obj.graph = obj.flow._graph
517
722
 
518
723
  # Save the code package in the flow datastore so that both user code and
519
724
  # metaflow package can be retrieved during workflow execution.
520
725
  obj.package = MetaflowPackage(
521
- obj.flow, obj.environment, obj.echo, obj.package_suffixes
726
+ obj.flow,
727
+ obj.environment,
728
+ obj.echo,
729
+ suffixes=obj.package_suffixes,
730
+ flow_datastore=obj.flow_datastore if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE else None,
522
731
  )
523
- package_url, package_sha = obj.flow_datastore.save_data(
524
- [obj.package.blob], len_hint=1
525
- )[0]
732
+
733
+ # This blocks until the package is created
734
+ if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE:
735
+ package_url = obj.package.package_url()
736
+ package_sha = obj.package.package_sha()
737
+ else:
738
+ package_url, package_sha = obj.flow_datastore.save_data(
739
+ [obj.package.blob], len_hint=1
740
+ )[0]
526
741
 
527
742
  return ArgoWorkflows(
528
743
  name,
529
744
  obj.graph,
530
745
  obj.flow,
746
+ obj.package.package_metadata,
531
747
  package_sha,
532
748
  package_url,
533
749
  token,
@@ -552,6 +768,8 @@ def make_flow(
552
768
  incident_io_metadata=incident_io_metadata,
553
769
  enable_heartbeat_daemon=enable_heartbeat_daemon,
554
770
  enable_error_msg_capture=enable_error_msg_capture,
771
+ workflow_title=workflow_title,
772
+ workflow_description=workflow_description,
555
773
  )
556
774
 
557
775
 
@@ -688,7 +906,28 @@ def trigger(obj, run_id_file=None, deployer_attribute_file=None, **kwargs):
688
906
  if kwargs.get(param.name.replace("-", "_").lower()) is not None
689
907
  }
690
908
 
691
- response = ArgoWorkflows.trigger(obj.workflow_name, params)
909
+ workflow_name_to_deploy = obj.workflow_name
910
+ # For users that upgraded the client but did not redeploy their flow,
911
+ # we fallback to old workflow names in case of a conflict.
912
+ if obj.workflow_name != obj._v1_workflow_name:
913
+ # use the old name only if there exists a deployment.
914
+ if ArgoWorkflows.get_existing_deployment(obj._v1_workflow_name):
915
+ obj.echo("Warning! ", bold=True, nl=False)
916
+ obj.echo(
917
+ "Found a deployment of this flow with an old style name, defaulted to triggering *%s*."
918
+ % obj._v1_workflow_name,
919
+ wrap=True,
920
+ )
921
+ obj.echo(
922
+ "Due to new naming restrictions on Argo Workflows, "
923
+ "this flow will have a shorter name with newer versions of Metaflow (>=%s) "
924
+ "which will allow it to be triggered through Argo UI as well. "
925
+ % NEW_ARGO_NAMELENGTH_METAFLOW_VERSION,
926
+ wrap=True,
927
+ )
928
+ obj.echo("re-deploy your flow in order to get rid of this message.")
929
+ workflow_name_to_deploy = obj._v1_workflow_name
930
+ response = ArgoWorkflows.trigger(workflow_name_to_deploy, params)
692
931
  run_id = "argo-" + response["metadata"]["name"]
693
932
 
694
933
  if run_id_file:
@@ -699,7 +938,7 @@ def trigger(obj, run_id_file=None, deployer_attribute_file=None, **kwargs):
699
938
  with open(deployer_attribute_file, "w") as f:
700
939
  json.dump(
701
940
  {
702
- "name": obj.workflow_name,
941
+ "name": workflow_name_to_deploy,
703
942
  "metadata": obj.metadata.metadata_str(),
704
943
  "pathspec": "/".join((obj.flow.name, run_id)),
705
944
  },
@@ -708,7 +947,7 @@ def trigger(obj, run_id_file=None, deployer_attribute_file=None, **kwargs):
708
947
 
709
948
  obj.echo(
710
949
  "Workflow *{name}* triggered on Argo Workflows "
711
- "(run-id *{run_id}*).".format(name=obj.workflow_name, run_id=run_id),
950
+ "(run-id *{run_id}*).".format(name=workflow_name_to_deploy, run_id=run_id),
712
951
  bold=True,
713
952
  )
714
953
 
@@ -750,26 +989,57 @@ def delete(obj, authorize=None):
750
989
  "about production tokens."
751
990
  )
752
991
 
753
- validate_token(obj.workflow_name, obj.token_prefix, authorize, _token_instructions)
754
- obj.echo("Deleting workflow *{name}*...".format(name=obj.workflow_name), bold=True)
992
+ # Cases and expected behaviours:
993
+ # old name exists, new name does not exist -> delete old and do not fail on missing new
994
+ # old name exists, new name exists -> delete both
995
+ # old name does not exist, new name exists -> only try to delete new
996
+ # old name does not exist, new name does not exist -> keep previous behaviour where missing deployment raises error for the new name.
997
+ def _delete(workflow_name):
998
+ validate_token(workflow_name, obj.token_prefix, authorize, _token_instructions)
999
+ obj.echo("Deleting workflow *{name}*...".format(name=workflow_name), bold=True)
1000
+
1001
+ schedule_deleted, sensor_deleted, workflow_deleted = ArgoWorkflows.delete(
1002
+ workflow_name
1003
+ )
755
1004
 
756
- schedule_deleted, sensor_deleted, workflow_deleted = ArgoWorkflows.delete(
757
- obj.workflow_name
758
- )
1005
+ if schedule_deleted:
1006
+ obj.echo(
1007
+ "Deleting cronworkflow *{name}*...".format(name=workflow_name),
1008
+ bold=True,
1009
+ )
759
1010
 
760
- if schedule_deleted:
761
- obj.echo(
762
- "Deleting cronworkflow *{name}*...".format(name=obj.workflow_name),
763
- bold=True,
764
- )
1011
+ if sensor_deleted:
1012
+ obj.echo(
1013
+ "Deleting sensor *{name}*...".format(name=workflow_name),
1014
+ bold=True,
1015
+ )
1016
+ return workflow_deleted
1017
+
1018
+ workflows_deleted = False
1019
+ cleanup_old_name = False
1020
+ if obj.workflow_name != obj._v1_workflow_name:
1021
+ # Only add the old name if there exists a deployment with such name.
1022
+ # This is due to the way validate_token is tied to an existing deployment.
1023
+ if ArgoWorkflows.get_existing_deployment(obj._v1_workflow_name) is not None:
1024
+ cleanup_old_name = True
1025
+ obj.echo(
1026
+ "This flow has been deployed with another name in the past due to a limitation with Argo Workflows. "
1027
+ "Will also delete the older deployment.",
1028
+ wrap=True,
1029
+ )
1030
+ _delete(obj._v1_workflow_name)
1031
+ workflows_deleted = True
765
1032
 
766
- if sensor_deleted:
767
- obj.echo(
768
- "Deleting sensor *{name}*...".format(name=obj.workflow_name),
769
- bold=True,
770
- )
1033
+ # Always try to delete the current name.
1034
+ # Do not raise exception if we deleted old name before this.
1035
+ try:
1036
+ _delete(obj.workflow_name)
1037
+ workflows_deleted = True
1038
+ except ArgoWorkflowsException:
1039
+ if not cleanup_old_name:
1040
+ raise
771
1041
 
772
- if workflow_deleted:
1042
+ if workflows_deleted:
773
1043
  obj.echo(
774
1044
  "Deleting Kubernetes resources may take a while. "
775
1045
  "Deploying the flow again to Argo Workflows while the delete is in-flight will fail."
@@ -808,17 +1078,21 @@ def suspend(obj, run_id, authorize=None):
808
1078
  "about production tokens."
809
1079
  )
810
1080
 
811
- validate_run_id(
812
- obj.workflow_name, obj.token_prefix, authorize, run_id, _token_instructions
813
- )
1081
+ workflows = _get_existing_workflow_names(obj)
814
1082
 
815
- # Trim prefix from run_id
816
- name = run_id[5:]
1083
+ for workflow_name in workflows:
1084
+ validate_run_id(
1085
+ workflow_name, obj.token_prefix, authorize, run_id, _token_instructions
1086
+ )
1087
+
1088
+ # Trim prefix from run_id
1089
+ name = run_id[5:]
817
1090
 
818
- workflow_suspended = ArgoWorkflows.suspend(name)
1091
+ workflow_suspended = ArgoWorkflows.suspend(name)
819
1092
 
820
- if workflow_suspended:
821
- obj.echo("Suspended execution of *%s*" % run_id)
1093
+ if workflow_suspended:
1094
+ obj.echo("Suspended execution of *%s*" % run_id)
1095
+ break # no need to try out all workflow_names if we found the running one.
822
1096
 
823
1097
 
824
1098
  @argo_workflows.command(help="Unsuspend flow execution on Argo Workflows.")
@@ -852,17 +1126,21 @@ def unsuspend(obj, run_id, authorize=None):
852
1126
  "about production tokens."
853
1127
  )
854
1128
 
855
- validate_run_id(
856
- obj.workflow_name, obj.token_prefix, authorize, run_id, _token_instructions
857
- )
1129
+ workflows = _get_existing_workflow_names(obj)
858
1130
 
859
- # Trim prefix from run_id
860
- name = run_id[5:]
1131
+ for workflow_name in workflows:
1132
+ validate_run_id(
1133
+ workflow_name, obj.token_prefix, authorize, run_id, _token_instructions
1134
+ )
861
1135
 
862
- workflow_suspended = ArgoWorkflows.unsuspend(name)
1136
+ # Trim prefix from run_id
1137
+ name = run_id[5:]
863
1138
 
864
- if workflow_suspended:
865
- obj.echo("Unsuspended execution of *%s*" % run_id)
1139
+ workflow_suspended = ArgoWorkflows.unsuspend(name)
1140
+
1141
+ if workflow_suspended:
1142
+ obj.echo("Unsuspended execution of *%s*" % run_id)
1143
+ break # no need to try all workflow_names if we found one.
866
1144
 
867
1145
 
868
1146
  def validate_token(name, token_prefix, authorize, instructions_fn=None):
@@ -970,22 +1248,26 @@ def terminate(obj, run_id, authorize=None):
970
1248
  "about production tokens."
971
1249
  )
972
1250
 
973
- validate_run_id(
974
- obj.workflow_name, obj.token_prefix, authorize, run_id, _token_instructions
975
- )
1251
+ workflows = _get_existing_workflow_names(obj)
976
1252
 
977
- # Trim prefix from run_id
978
- name = run_id[5:]
979
- obj.echo(
980
- "Terminating run *{run_id}* for {flow_name} ...".format(
981
- run_id=run_id, flow_name=obj.flow.name
982
- ),
983
- bold=True,
984
- )
1253
+ for workflow_name in workflows:
1254
+ validate_run_id(
1255
+ workflow_name, obj.token_prefix, authorize, run_id, _token_instructions
1256
+ )
985
1257
 
986
- terminated = ArgoWorkflows.terminate(obj.flow.name, name)
987
- if terminated:
988
- obj.echo("\nRun terminated.")
1258
+ # Trim prefix from run_id
1259
+ name = run_id[5:]
1260
+ obj.echo(
1261
+ "Terminating run *{run_id}* for {flow_name} ...".format(
1262
+ run_id=run_id, flow_name=obj.flow.name
1263
+ ),
1264
+ bold=True,
1265
+ )
1266
+
1267
+ terminated = ArgoWorkflows.terminate(obj.flow.name, name)
1268
+ if terminated:
1269
+ obj.echo("\nRun terminated.")
1270
+ break # no need to try all workflow_names if we found the running one.
989
1271
 
990
1272
 
991
1273
  @argo_workflows.command(help="List Argo Workflow templates for the flow.")
@@ -998,8 +1280,7 @@ def terminate(obj, run_id, authorize=None):
998
1280
  )
999
1281
  @click.pass_obj
1000
1282
  def list_workflow_templates(obj, all=None):
1001
- templates = ArgoWorkflows.list_templates(obj.flow.name, all)
1002
- for template_name in templates:
1283
+ for template_name in ArgoWorkflows.list_templates(obj.flow.name, all):
1003
1284
  obj.echo_always(template_name)
1004
1285
 
1005
1286
 
@@ -1089,11 +1370,26 @@ def validate_run_id(
1089
1370
  return True
1090
1371
 
1091
1372
 
1373
+ def _get_existing_workflow_names(obj):
1374
+ """
1375
+ Construct a list of the current workflow name and possible existing deployments of old workflow names
1376
+ """
1377
+ workflows = [obj.workflow_name]
1378
+ if obj.workflow_name != obj._v1_workflow_name:
1379
+ # Only add the old name if there exists a deployment with such name.
1380
+ # This is due to the way validate_token is tied to an existing deployment.
1381
+ if ArgoWorkflows.get_existing_deployment(obj._v1_workflow_name) is not None:
1382
+ workflows.append(obj._v1_workflow_name)
1383
+
1384
+ return workflows
1385
+
1386
+
1092
1387
  def sanitize_for_argo(text):
1093
1388
  """
1094
- Sanitizes a string so it does not contain characters that are not permitted in Argo Workflow resource names.
1389
+ Sanitizes a string so it does not contain characters that are not permitted in
1390
+ Argo Workflow resource names.
1095
1391
  """
1096
- return (
1392
+ sanitized = (
1097
1393
  re.compile(r"^[^A-Za-z0-9]+")
1098
1394
  .sub("", text)
1099
1395
  .replace("_", "")
@@ -1101,6 +1397,12 @@ def sanitize_for_argo(text):
1101
1397
  .replace("+", "")
1102
1398
  .lower()
1103
1399
  )
1400
+ # This is added in order to get sanitized and truncated project branch names to adhere to RFC 1123 subdomain requirements
1401
+ # f.ex. after truncation a project flow name might be project.branch-cut-short-.flowname
1402
+ # sanitize around the . separators by removing any non-alphanumeric characters
1403
+ sanitized = re.compile(r"[^a-z0-9]*\.[^a-z0-9]*").sub(".", sanitized)
1404
+
1405
+ return sanitized
1104
1406
 
1105
1407
 
1106
1408
  def remap_status(status):