ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,16 @@
1
+ from metaflow.plugins.metadata_providers.local import LocalMetadataProvider
2
+ from metaflow.metaflow_config import DATASTORE_SPIN_LOCAL_DIR
3
+
4
+
5
+ class SpinMetadataProvider(LocalMetadataProvider):
6
+ TYPE = "spin"
7
+ DATASTORE_DIR = DATASTORE_SPIN_LOCAL_DIR # ".metaflow_spin"
8
+
9
+ @classmethod
10
+ def _get_storage_class(cls):
11
+ from metaflow.plugins.datastores.spin_storage import SpinStorage
12
+
13
+ return SpinStorage
14
+
15
+ def version(self):
16
+ return "spin"
@@ -9,49 +9,61 @@ def cli():
9
9
 
10
10
 
11
11
  @cli.group(help="Commands related to code packages.")
12
+ @click.option(
13
+ "--timeout", default=60, help="Timeout for package operations in seconds."
14
+ )
12
15
  @click.pass_obj
13
- def package(obj):
16
+ def package(obj, timeout):
14
17
  # Prepare the package before any of the sub-commands are invoked.
18
+ # We explicitly will *not* upload it to the datastore.
15
19
  obj.package = MetaflowPackage(
16
- obj.flow, obj.environment, obj.echo, obj.package_suffixes
20
+ obj.flow,
21
+ obj.environment,
22
+ obj.echo,
23
+ suffixes=obj.package_suffixes,
24
+ flow_datastore=None,
17
25
  )
26
+ obj.package_op_timeout = timeout
18
27
 
19
28
 
20
- @package.command(help="Output information about the current code package.")
29
+ @package.command(help="Output information about the code package.")
21
30
  @click.pass_obj
22
31
  def info(obj):
23
- obj.echo("Status of the current working directory:", fg="magenta", bold=False)
24
- obj.echo_always(
25
- "Hash: *%s*" % sha1(obj.package.blob).hexdigest(),
26
- highlight="green",
27
- highlight_bold=False,
28
- )
29
- obj.echo_always(
30
- "Package size: *%d* KB" % (len(obj.package.blob) / 1024),
31
- highlight="green",
32
- highlight_bold=False,
33
- )
34
- num = sum(1 for _ in obj.package.path_tuples())
35
- obj.echo_always(
36
- "Number of files: *%d*" % num, highlight="green", highlight_bold=False
37
- )
32
+ obj.echo_always(obj.package.show())
38
33
 
39
34
 
40
- @package.command(help="List files included in the code package.")
35
+ @package.command(help="List all files included in the code package.")
36
+ @click.option(
37
+ "--archive/--no-archive",
38
+ default=False,
39
+ help="If True, lists the file paths as present in the code package archive; "
40
+ "otherwise, lists the files on your filesystem included in the code package",
41
+ show_default=True,
42
+ )
41
43
  @click.pass_obj
42
- def list(obj):
44
+ def list(obj, archive=False):
45
+ _ = obj.package.blob_with_timeout(timeout=obj.package_op_timeout)
46
+ # We now have all the information about the blob
43
47
  obj.echo(
44
- "Files included in the code package " "(change with --package-suffixes):",
48
+ "Files included in the code package (change with --package-suffixes):",
45
49
  fg="magenta",
46
50
  bold=False,
47
51
  )
48
- obj.echo_always("\n".join(path for path, _ in obj.package.path_tuples()))
52
+ if archive:
53
+ obj.echo_always("\n".join(path for _, path in obj.package.path_tuples()))
54
+ else:
55
+ obj.echo_always("\n".join(path for path, _ in obj.package.path_tuples()))
49
56
 
50
57
 
51
- @package.command(help="Save the current code package in a tar file")
58
+ @package.command(help="Save the current code package to a file.")
52
59
  @click.argument("path")
53
60
  @click.pass_obj
54
61
  def save(obj, path):
55
62
  with open(path, "wb") as f:
56
63
  f.write(obj.package.blob)
57
- obj.echo("Code package saved in *%s*." % path, fg="magenta", bold=False)
64
+ obj.echo(
65
+ "Code package saved in *%s* with metadata: %s"
66
+ % (path, obj.package.package_metadata),
67
+ fg="magenta",
68
+ bold=False,
69
+ )
@@ -1,30 +1,145 @@
1
+ from collections import namedtuple
1
2
  from metaflow.decorators import StepDecorator
2
- from metaflow.unbounded_foreach import UBF_CONTROL
3
+ from metaflow.unbounded_foreach import UBF_CONTROL, CONTROL_TASK_TAG
3
4
  from metaflow.exception import MetaflowException
5
+ from metaflow.metadata_provider import MetaDatum
6
+ from metaflow.metaflow_current import current, Parallel
4
7
  import os
5
8
  import sys
6
9
 
7
10
 
8
11
  class ParallelDecorator(StepDecorator):
12
+ """
13
+ MF Add To Current
14
+ -----------------
15
+ parallel -> metaflow.metaflow_current.Parallel
16
+ Returns a namedtuple with relevant information about the parallel task.
17
+
18
+ @@ Returns
19
+ -------
20
+ Parallel
21
+ `namedtuple` with the following fields:
22
+ - main_ip (`str`)
23
+ The IP address of the control task.
24
+ - num_nodes (`int`)
25
+ The total number of tasks created by @parallel
26
+ - node_index (`int`)
27
+ The index of the current task in all the @parallel tasks.
28
+ - control_task_id (`Optional[str]`)
29
+ The task ID of the control task. Available to all tasks.
30
+
31
+ is_parallel -> bool
32
+ True if the current step is a @parallel step.
33
+ """
34
+
9
35
  name = "parallel"
10
36
  defaults = {}
11
37
  IS_PARALLEL = True
12
38
 
13
- def __init__(self, attributes=None, statically_defined=False):
14
- super(ParallelDecorator, self).__init__(attributes, statically_defined)
39
+ def __init__(self, attributes=None, statically_defined=False, inserted_by=None):
40
+ super(ParallelDecorator, self).__init__(
41
+ attributes, statically_defined, inserted_by
42
+ )
15
43
 
16
44
  def runtime_step_cli(
17
45
  self, cli_args, retry_count, max_user_code_retries, ubf_context
18
46
  ):
19
-
20
47
  if ubf_context == UBF_CONTROL:
21
48
  num_parallel = cli_args.task.ubf_iter.num_parallel
22
49
  cli_args.command_options["num-parallel"] = str(num_parallel)
50
+ if os.environ.get("METAFLOW_RUNTIME_ENVIRONMENT", "local") == "local":
51
+ cli_args.command_options["split_index"] = "0"
23
52
 
24
53
  def step_init(
25
54
  self, flow, graph, step_name, decorators, environment, flow_datastore, logger
26
55
  ):
56
+ # TODO: This can be supported in the future, but for the time being we disable the transition as it leads to
57
+ # a UBF exception during runtime when the actual parallel-join step is conditional (switching between different join implementations from the @parallel step).
58
+ if graph[step_name].type == "split-switch":
59
+ raise MetaflowException(
60
+ "A @parallel step can not be a conditional switch step. Please add a join step after *%s*"
61
+ % step_name
62
+ )
27
63
  self.environment = environment
64
+ # Previously, the `parallel` property was a hardcoded, static property within `current`.
65
+ # Whenever `current.parallel` was called, it returned a named tuple with values coming from
66
+ # environment variables, loaded dynamically at runtime.
67
+ # Now, many of these environment variables are set by compute-related decorators in `task_pre_step`.
68
+ # This necessitates ensuring the correct ordering of the `parallel` and compute decorators if we want to
69
+ # statically set the namedtuple via `current._update_env` in `task_pre_step`. Hence we avoid using
70
+ # `current._update_env` since:
71
+ # - it will set a static named tuple, resolving environment variables only once (at the time of calling `current._update_env`).
72
+ # - we cannot guarantee the order of calling the decorator's `task_pre_step` (calling `current._update_env` may not set
73
+ # the named tuple with the correct values).
74
+ # Therefore, we explicitly set the property in `step_init` to ensure the property can resolve the appropriate values in the named tuple
75
+ # when accessed at runtime.
76
+ setattr(
77
+ current.__class__,
78
+ "parallel",
79
+ property(
80
+ fget=lambda _: Parallel(
81
+ main_ip=os.environ.get("MF_PARALLEL_MAIN_IP", "127.0.0.1"),
82
+ num_nodes=int(os.environ.get("MF_PARALLEL_NUM_NODES", "1")),
83
+ node_index=int(os.environ.get("MF_PARALLEL_NODE_INDEX", "0")),
84
+ control_task_id=os.environ.get("MF_PARALLEL_CONTROL_TASK_ID", None),
85
+ )
86
+ ),
87
+ )
88
+
89
+ def task_pre_step(
90
+ self,
91
+ step_name,
92
+ task_datastore,
93
+ metadata,
94
+ run_id,
95
+ task_id,
96
+ flow,
97
+ graph,
98
+ retry_count,
99
+ max_user_code_retries,
100
+ ubf_context,
101
+ inputs,
102
+ ):
103
+ from metaflow import current
104
+
105
+ # Set `is_parallel` to `True` in `current` just like we
106
+ # with `is_production` in the project decorator.
107
+ current._update_env(
108
+ {
109
+ "is_parallel": True,
110
+ }
111
+ )
112
+
113
+ self.input_paths = [obj.pathspec for obj in inputs]
114
+ task_metadata_list = [
115
+ MetaDatum(
116
+ field="parallel-world-size",
117
+ value=flow._parallel_ubf_iter.num_parallel,
118
+ type="parallel-world-size",
119
+ tags=["attempt_id:{0}".format(0)],
120
+ )
121
+ ]
122
+ if ubf_context == UBF_CONTROL:
123
+ # A Task's tags are now those of its ancestral Run, so we are not able
124
+ # to rely on a task's tags to indicate the presence of a control task
125
+ # so, on top of adding the tags above, we also add a task metadata
126
+ # entry indicating that this is a "control task".
127
+ #
128
+ # Here we will also add a task metadata entry to indicate "control
129
+ # task". Within the metaflow repo, the only dependency of such a
130
+ # "control task" indicator is in the integration test suite (see
131
+ # Step.control_tasks() in client API).
132
+ task_metadata_list += [
133
+ MetaDatum(
134
+ field="internal_task_type",
135
+ value=CONTROL_TASK_TAG,
136
+ type="internal_task_type",
137
+ tags=["attempt_id:{0}".format(0)],
138
+ )
139
+ ]
140
+ flow._control_task_is_mapper_zero = True
141
+
142
+ metadata.register_metadata(run_id, step_name, task_id, task_metadata_list)
28
143
 
29
144
  def task_decorate(
30
145
  self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
@@ -47,6 +162,7 @@ class ParallelDecorator(StepDecorator):
47
162
  env_to_use,
48
163
  _step_func_with_setup,
49
164
  retry_count,
165
+ ",".join(self.input_paths),
50
166
  )
51
167
  else:
52
168
  return _step_func_with_setup
@@ -56,7 +172,9 @@ class ParallelDecorator(StepDecorator):
56
172
  pass
57
173
 
58
174
 
59
- def _local_multinode_control_task_step_func(flow, env_to_use, step_func, retry_count):
175
+ def _local_multinode_control_task_step_func(
176
+ flow, env_to_use, step_func, retry_count, input_paths
177
+ ):
60
178
  """
61
179
  Used as multinode UBF control task when run in local mode.
62
180
  """
@@ -76,14 +194,12 @@ def _local_multinode_control_task_step_func(flow, env_to_use, step_func, retry_c
76
194
  num_parallel = foreach_iter.num_parallel
77
195
  os.environ["MF_PARALLEL_NUM_NODES"] = str(num_parallel)
78
196
  os.environ["MF_PARALLEL_MAIN_IP"] = "127.0.0.1"
197
+ os.environ["MF_PARALLEL_CONTROL_TASK_ID"] = str(current.task_id)
79
198
 
80
199
  run_id = current.run_id
81
200
  step_name = current.step_name
82
201
  control_task_id = current.task_id
83
-
84
- (_, split_step_name, split_task_id) = control_task_id.split("-")[1:]
85
202
  # UBF handling for multinode case
86
- top_task_id = control_task_id.replace("control-", "") # chop "-0"
87
203
  mapper_task_ids = [control_task_id]
88
204
  # If we are running inside Conda, we use the base executable FIRST;
89
205
  # the conda environment will then be used when runtime_step_cli is
@@ -93,12 +209,13 @@ def _local_multinode_control_task_step_func(flow, env_to_use, step_func, retry_c
93
209
  script = sys.argv[0]
94
210
 
95
211
  # start workers
212
+ # TODO: Logs for worker processes are assigned to control process as of today, which
213
+ # should be fixed at some point
96
214
  subprocesses = []
97
215
  for node_index in range(1, num_parallel):
98
- task_id = "%s_node_%d" % (top_task_id, node_index)
216
+ task_id = "%s_node_%d" % (control_task_id, node_index)
99
217
  mapper_task_ids.append(task_id)
100
218
  os.environ["MF_PARALLEL_NODE_INDEX"] = str(node_index)
101
- input_paths = "%s/%s/%s" % (run_id, split_step_name, split_task_id)
102
219
  # Override specific `step` kwargs.
103
220
  kwargs = cli_args.step_kwargs
104
221
  kwargs["split_index"] = str(node_index)
@@ -109,6 +226,7 @@ def _local_multinode_control_task_step_func(flow, env_to_use, step_func, retry_c
109
226
  kwargs["retry_count"] = str(retry_count)
110
227
 
111
228
  cmd = cli_args.step_command(executable, script, step_name, step_kwargs=kwargs)
229
+
112
230
  p = subprocess.Popen(cmd)
113
231
  subprocesses.append(p)
114
232
 
@@ -116,7 +234,6 @@ def _local_multinode_control_task_step_func(flow, env_to_use, step_func, retry_c
116
234
  "%s/%s/%s" % (run_id, step_name, mapper_task_id)
117
235
  for mapper_task_id in mapper_task_ids
118
236
  ]
119
- flow._control_task_is_mapper_zero = True
120
237
 
121
238
  # run the step function ourselves
122
239
  os.environ["MF_PARALLEL_NODE_INDEX"] = "0"
@@ -0,0 +1,16 @@
1
+ from metaflow._vendor import yaml
2
+
3
+
4
+ def yaml_parser(content: str) -> dict:
5
+ """
6
+ Parse YAML content to a dictionary.
7
+
8
+ Parameters
9
+ ----------
10
+ content : str
11
+
12
+ Returns
13
+ -------
14
+ dict
15
+ """
16
+ return yaml.safe_load(content)
@@ -26,6 +26,24 @@ class ProjectDecorator(FlowDecorator):
26
26
  projects that use the same production scheduler. The name may
27
27
  contain only lowercase alphanumeric characters and underscores.
28
28
 
29
+ branch : Optional[str], default None
30
+ The branch to use. If not specified, the branch is set to
31
+ `user.<username>` unless `production` is set to `True`. This can
32
+ also be set on the command line using `--branch` as a top-level option.
33
+ It is an error to specify `branch` in the decorator and on the command line.
34
+
35
+ production : bool, default False
36
+ Whether or not the branch is the production branch. This can also be set on the
37
+ command line using `--production` as a top-level option. It is an error to specify
38
+ `production` in the decorator and on the command line.
39
+ The project branch name will be:
40
+ - if `branch` is specified:
41
+ - if `production` is True: `prod.<branch>`
42
+ - if `production` is False: `test.<branch>`
43
+ - if `branch` is not specified:
44
+ - if `production` is True: `prod`
45
+ - if `production` is False: `user.<username>`
46
+
29
47
  MF Add To Current
30
48
  -----------------
31
49
  project_name -> str
@@ -72,7 +90,6 @@ class ProjectDecorator(FlowDecorator):
72
90
  """
73
91
 
74
92
  name = "project"
75
- defaults = {"name": None}
76
93
 
77
94
  options = {
78
95
  "production": dict(
@@ -91,19 +108,48 @@ class ProjectDecorator(FlowDecorator):
91
108
  ),
92
109
  }
93
110
 
111
+ defaults = {"name": None, **{k: v["default"] for k, v in options.items()}}
112
+
94
113
  def flow_init(
95
114
  self, flow, graph, environment, flow_datastore, metadata, logger, echo, options
96
115
  ):
97
116
  self._option_values = options
98
117
  project_name = self.attributes.get("name")
118
+ for op in options:
119
+ if (
120
+ op in self._user_defined_attributes
121
+ and options[op] != self.defaults[op]
122
+ and self.attributes[op] != options[op]
123
+ ):
124
+ # Exception if:
125
+ # - the user provides a value in the attributes field
126
+ # - AND the user provided a value in the command line (non default)
127
+ # - AND the values are different
128
+ # Note that this won't raise an error if the user provided the default
129
+ # value in the command line and provided one in attribute but although
130
+ # slightly inconsistent, it is not incorrect.
131
+ raise MetaflowException(
132
+ "You cannot pass %s as both a command-line argument and an attribute "
133
+ "of the @project decorator." % op
134
+ )
135
+ if "branch" in self._user_defined_attributes:
136
+ project_branch = self.attributes["branch"]
137
+ else:
138
+ project_branch = options["branch"]
139
+
140
+ if "production" in self._user_defined_attributes:
141
+ project_production = self.attributes["production"]
142
+ else:
143
+ project_production = options["production"]
144
+
99
145
  project_flow_name, branch_name = format_name(
100
146
  flow.name,
101
147
  project_name,
102
- options["production"],
103
- options["branch"],
148
+ project_production,
149
+ project_branch,
104
150
  get_username(),
105
151
  )
106
- is_user_branch = options["branch"] is None and not options["production"]
152
+ is_user_branch = project_branch is None and not project_production
107
153
  echo(
108
154
  "Project: *%s*, Branch: *%s*" % (project_name, branch_name),
109
155
  fg="magenta",
@@ -114,7 +160,7 @@ class ProjectDecorator(FlowDecorator):
114
160
  "project_name": project_name,
115
161
  "branch_name": branch_name,
116
162
  "is_user_branch": is_user_branch,
117
- "is_production": options["production"],
163
+ "is_production": project_production,
118
164
  "project_flow_name": project_flow_name,
119
165
  }
120
166
  )