ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -12,23 +12,13 @@ from . import develop
12
12
  from .stub_generator import StubGenerator
13
13
 
14
14
  _py_ver = sys.version_info[:2]
15
- _metadata_package = None
16
15
 
17
-
18
- def _check_stubs_supported():
19
- global _metadata_package
20
- if _metadata_package is not None:
21
- return _metadata_package
22
- else:
23
- if _py_ver >= (3, 4):
24
- if _py_ver >= (3, 8):
25
- from importlib import metadata
26
- elif _py_ver >= (3, 6):
27
- from metaflow._vendor.v3_6 import importlib_metadata as metadata
28
- else:
29
- from metaflow._vendor.v3_5 import importlib_metadata as metadata
30
- _metadata_package = metadata
31
- return _metadata_package
16
+ if _py_ver >= (3, 8):
17
+ from importlib import metadata
18
+ elif _py_ver >= (3, 7):
19
+ from metaflow._vendor.v3_7 import importlib_metadata as metadata
20
+ else:
21
+ from metaflow._vendor.v3_6 import importlib_metadata as metadata
32
22
 
33
23
 
34
24
  @develop.group(short_help="Stubs management")
@@ -43,12 +33,6 @@ def stubs(ctx: Any):
43
33
  This CLI provides utilities to check and generate stubs for your current Metaflow
44
34
  installation.
45
35
  """
46
- if _check_stubs_supported() is None:
47
- raise click.UsageError(
48
- "Building and installing stubs are not supported on Python %d.%d "
49
- "(3.4 minimum required)" % _py_ver,
50
- ctx=ctx,
51
- )
52
36
 
53
37
 
54
38
  @stubs.command(short_help="Check validity of stubs")
@@ -168,7 +152,7 @@ def install(ctx: Any, force: bool):
168
152
  "Metaflow stubs are already installed and valid -- use --force to reinstall"
169
153
  )
170
154
  return
171
- mf_version, _ = get_mf_version()
155
+ mf_version, _ = get_mf_version(True)
172
156
  with tempfile.TemporaryDirectory() as tmp_dir:
173
157
  with open(os.path.join(tmp_dir, "setup.py"), "w") as f:
174
158
  f.write(
@@ -185,7 +169,7 @@ setup(
185
169
  packages=find_namespace_packages(),
186
170
  package_data={{"metaflow-stubs": ["generated_for.txt", "py.typed", "**/*.pyi"]}},
187
171
  install_requires=["metaflow=={mf_version}"],
188
- python_requires=">=3.5.2",
172
+ python_requires=">=3.6.1",
189
173
  )
190
174
  """
191
175
  )
@@ -259,10 +243,10 @@ def split_version(vers: str) -> Tuple[str, Optional[str]]:
259
243
  return vers_split[0], vers_split[1]
260
244
 
261
245
 
262
- def get_mf_version() -> Tuple[str, Optional[str]]:
246
+ def get_mf_version(public: bool = False) -> Tuple[str, Optional[str]]:
263
247
  from metaflow.metaflow_version import get_version
264
248
 
265
- return split_version(get_version())
249
+ return split_version(get_version(public))
266
250
 
267
251
 
268
252
  def get_stubs_version(stubs_root_path: Optional[str]) -> Tuple[str, Optional[str]]:
@@ -328,14 +312,14 @@ def get_packages_for_stubs() -> Tuple[List[Tuple[str, str]], List[str]]:
328
312
  # some reason it shows up multiple times.
329
313
  interesting_dists = [
330
314
  d
331
- for d in _metadata_package.distributions()
315
+ for d in metadata.distributions()
332
316
  if any(
333
317
  [
334
318
  p == "metaflow-stubs"
335
319
  for p in (d.read_text("top_level.txt") or "").split()
336
320
  ]
337
321
  )
338
- and isinstance(d, _metadata_package.PathDistribution)
322
+ and isinstance(d, metadata.PathDistribution)
339
323
  ]
340
324
 
341
325
  for dist in interesting_dists:
metaflow/cmd/main_cli.py CHANGED
@@ -12,7 +12,7 @@ import metaflow.tracing as tracing
12
12
 
13
13
 
14
14
  @click.group()
15
- @tracing.cli_entrypoint("cli/main")
15
+ @tracing.cli("cli/main")
16
16
  def main():
17
17
  pass
18
18
 
@@ -67,6 +67,7 @@ CMDS_DESC = [
67
67
  ("configure", ".configure_cmd.cli"),
68
68
  ("tutorials", ".tutorials_cmd.cli"),
69
69
  ("develop", ".develop.cli"),
70
+ ("code", ".code.cli"),
70
71
  ]
71
72
 
72
73
  process_cmds(globals())
@@ -84,15 +85,16 @@ def start(ctx):
84
85
 
85
86
  import metaflow
86
87
 
88
+ version = get_version()
87
89
  echo("Metaflow ", fg="magenta", bold=True, nl=False)
88
90
 
89
91
  if ctx.invoked_subcommand is None:
90
- echo("(%s): " % get_version(), fg="magenta", bold=False, nl=False)
92
+ echo("(%s): " % version, fg="magenta", bold=False, nl=False)
91
93
  else:
92
- echo("(%s)\n" % get_version(), fg="magenta", bold=False)
94
+ echo("(%s)\n" % version, fg="magenta", bold=False)
93
95
 
94
96
  if ctx.invoked_subcommand is None:
95
- echo("More data science, less engineering\n", fg="magenta")
97
+ echo("More AI, less engineering\n", fg="magenta")
96
98
 
97
99
  lnk_sz = max(len(lnk) for lnk in CONTACT_INFO.values()) + 1
98
100
  for what, lnk in CONTACT_INFO.items():
@@ -0,0 +1,78 @@
1
+ import sys
2
+ import subprocess
3
+ from pathlib import Path
4
+ import sysconfig
5
+ import site
6
+
7
+
8
+ def find_makefile():
9
+ possible_dirs = []
10
+
11
+ # 1) The standard sysconfig-based location
12
+ data_dir = sysconfig.get_paths()["data"]
13
+ possible_dirs.append(Path(data_dir) / "share" / "metaflow" / "devtools")
14
+
15
+ # 2) The user base (e.g. ~/.local on many systems)
16
+ user_base = site.getuserbase() # e.g. /home/runner/.local
17
+ possible_dirs.append(Path(user_base) / "share" / "metaflow" / "devtools")
18
+
19
+ # 3) site-packages can vary, we can guess share/.. near each site-packages
20
+ # (Works if pip actually placed devtools near site-packages.)
21
+ for p in site.getsitepackages():
22
+ possible_dirs.append(Path(p).parent / "share" / "metaflow" / "devtools")
23
+ user_site = site.getusersitepackages()
24
+ possible_dirs.append(Path(user_site).parent / "share" / "metaflow" / "devtools")
25
+
26
+ for candidate_dir in possible_dirs:
27
+ makefile_candidate = candidate_dir / "Makefile"
28
+ if makefile_candidate.is_file():
29
+ return makefile_candidate
30
+
31
+ # 4) When developing, Metaflow might be installed with --editable, which means the devtools will not be located within site-packages.
32
+ # We read the actual location from package metadata in this case, but only do this heavier operation if the above lookups fail.
33
+ try:
34
+ import json
35
+ from importlib.metadata import Distribution
36
+
37
+ direct_url = Distribution.from_name("metaflow").read_text("direct_url.json")
38
+ if direct_url:
39
+ content = json.loads(direct_url)
40
+ url = content.get("url", "")
41
+ if not url.startswith("file://"):
42
+ return None
43
+
44
+ makefile_candidate = (
45
+ Path(url.replace("file://", "")) / "devtools" / "Makefile"
46
+ )
47
+ if makefile_candidate.is_file():
48
+ return makefile_candidate
49
+ else:
50
+ # No dist metadata found. This is tied to the version of pip being used
51
+ # Do not bother with .egg-link installs due to the handling of the file contents being a headache due to lack of a unified spec.
52
+ print(
53
+ "Could not locate an installation of Metaflow. No package metadata found."
54
+ )
55
+ print(
56
+ "If Metaflow is installed as editable, try upgrading the version of pip and reinstalling in order to generate proper package metadata.\n"
57
+ )
58
+ except Exception:
59
+ return None
60
+
61
+ return None
62
+
63
+
64
+ def main():
65
+ makefile_path = find_makefile()
66
+ if not makefile_path:
67
+ print("ERROR: Could not find executable in any known location.")
68
+ sys.exit(1)
69
+ cmd = ["make", "-f", str(makefile_path)] + sys.argv[1:]
70
+
71
+ try:
72
+ completed = subprocess.run(cmd, check=True)
73
+ sys.exit(completed.returncode)
74
+ except subprocess.CalledProcessError as ex:
75
+ sys.exit(ex.returncode)
76
+ except KeyboardInterrupt:
77
+ print("Process interrupted by user. Exiting cleanly.")
78
+ sys.exit(1)
@@ -2,3 +2,4 @@ from .inputs import Inputs
2
2
  from .flow_datastore import FlowDataStore
3
3
  from .datastore_set import TaskDataStoreSet
4
4
  from .task_datastore import TaskDataStore
5
+ from .spin_datastore import SpinTaskDatastore
@@ -38,7 +38,7 @@ class ContentAddressedStore(object):
38
38
  def set_blob_cache(self, blob_cache):
39
39
  self._blob_cache = blob_cache
40
40
 
41
- def save_blobs(self, blob_iter, raw=False, len_hint=0):
41
+ def save_blobs(self, blob_iter, raw=False, len_hint=0, is_transfer=False):
42
42
  """
43
43
  Saves blobs of data to the datastore
44
44
 
@@ -60,11 +60,16 @@ class ContentAddressedStore(object):
60
60
 
61
61
  Parameters
62
62
  ----------
63
- blob_iter : Iterator over bytes objects to save
64
- raw : bool, optional
63
+ blob_iter : Iterator
64
+ Iterator over bytes objects to save
65
+ raw : bool, default False
65
66
  Whether to save the bytes directly or process them, by default False
66
- len_hint : Hint of the number of blobs that will be produced by the
67
+ len_hint : int, default 0
68
+ Hint of the number of blobs that will be produced by the
67
69
  iterator, by default 0
70
+ is_transfer : bool, default False
71
+ If True, this indicates we are saving blobs directly from the output of another
72
+ content addressed store's
68
73
 
69
74
  Returns
70
75
  -------
@@ -76,6 +81,20 @@ class ContentAddressedStore(object):
76
81
 
77
82
  def packing_iter():
78
83
  for blob in blob_iter:
84
+ if is_transfer:
85
+ key, blob_data, meta = blob
86
+ path = self._storage_impl.path_join(self._prefix, key[:2], key)
87
+ # Transfer data is always raw/decompressed, so mark it as such
88
+ meta_corrected = {"cas_raw": True, "cas_version": 1}
89
+
90
+ results.append(
91
+ self.save_blobs_result(
92
+ uri=self._storage_impl.full_uri(path),
93
+ key=key,
94
+ )
95
+ )
96
+ yield path, (BytesIO(blob_data), meta_corrected)
97
+ continue
79
98
  sha = sha1(blob).hexdigest()
80
99
  path = self._storage_impl.path_join(self._prefix, sha[:2], sha)
81
100
  results.append(
@@ -100,7 +119,7 @@ class ContentAddressedStore(object):
100
119
  self._storage_impl.save_bytes(packing_iter(), overwrite=True, len_hint=len_hint)
101
120
  return results
102
121
 
103
- def load_blobs(self, keys, force_raw=False):
122
+ def load_blobs(self, keys, force_raw=False, is_transfer=False):
104
123
  """
105
124
  Mirror function of save_blobs
106
125
 
@@ -111,15 +130,20 @@ class ContentAddressedStore(object):
111
130
  ----------
112
131
  keys : List of string
113
132
  Key describing the object to load
114
- force_raw : bool, optional
133
+ force_raw : bool, default False
115
134
  Support for backward compatibility with previous datastores. If
116
135
  True, this will force the key to be loaded as is (raw). By default,
117
136
  False
137
+ is_transfer : bool, default False
138
+ If True, this indicates we are loading blobs to transfer them directly
139
+ to another datastore. We will, in this case, also transfer the metadata
140
+ and do minimal processing. This is for internal use only.
118
141
 
119
142
  Returns
120
143
  -------
121
144
  Returns an iterator of (string, bytes) tuples; the iterator may return keys
122
- in a different order than were passed in.
145
+ in a different order than were passed in. If is_transfer is True, the tuple
146
+ has three elements with the third one being the metadata.
123
147
  """
124
148
  load_paths = []
125
149
  for key in keys:
@@ -127,13 +151,17 @@ class ContentAddressedStore(object):
127
151
  if self._blob_cache:
128
152
  blob = self._blob_cache.load_key(key)
129
153
  if blob is not None:
130
- yield key, blob
154
+ if is_transfer:
155
+ # Cached blobs are decompressed/processed bytes regardless of original format
156
+ yield key, blob, {"cas_raw": False, "cas_version": 1}
157
+ else:
158
+ yield key, blob
131
159
  else:
132
160
  path = self._storage_impl.path_join(self._prefix, key[:2], key)
133
161
  load_paths.append((key, path))
134
162
 
135
163
  with self._storage_impl.load_bytes([p for _, p in load_paths]) as loaded:
136
- for (path_key, file_path, meta) in loaded:
164
+ for path_key, file_path, meta in loaded:
137
165
  key = self._storage_impl.path_split(path_key)[-1]
138
166
  # At this point, we either return the object as is (if raw) or
139
167
  # decode it according to the encoding version
@@ -169,7 +197,10 @@ class ContentAddressedStore(object):
169
197
  if self._blob_cache:
170
198
  self._blob_cache.store_key(key, blob)
171
199
 
172
- yield key, blob
200
+ if is_transfer:
201
+ yield key, blob, meta # Preserve exact original metadata from storage
202
+ else:
203
+ yield key, blob
173
204
 
174
205
  def _unpack_backward_compatible(self, blob):
175
206
  # This is the backward compatible unpack
@@ -21,9 +21,18 @@ class TaskDataStoreSet(object):
21
21
  pathspecs=None,
22
22
  prefetch_data_artifacts=None,
23
23
  allow_not_done=False,
24
+ join_type=None,
25
+ orig_flow_datastore=None,
26
+ spin_artifacts=None,
24
27
  ):
25
- self.task_datastores = flow_datastore.get_latest_task_datastores(
26
- run_id, steps=steps, pathspecs=pathspecs, allow_not_done=allow_not_done
28
+ self.task_datastores = flow_datastore.get_task_datastores(
29
+ run_id,
30
+ steps=steps,
31
+ pathspecs=pathspecs,
32
+ allow_not_done=allow_not_done,
33
+ join_type=join_type,
34
+ orig_flow_datastore=orig_flow_datastore,
35
+ spin_artifacts=spin_artifacts,
27
36
  )
28
37
 
29
38
  if prefetch_data_artifacts:
@@ -1,10 +1,13 @@
1
1
  import itertools
2
2
  import json
3
+ from abc import ABC, abstractmethod
3
4
 
4
5
  from .. import metaflow_config
5
6
 
6
7
  from .content_addressed_store import ContentAddressedStore
7
8
  from .task_datastore import TaskDataStore
9
+ from .spin_datastore import SpinTaskDatastore
10
+ from ..metaflow_profile import from_start
8
11
 
9
12
 
10
13
  class FlowDataStore(object):
@@ -13,7 +16,7 @@ class FlowDataStore(object):
13
16
  def __init__(
14
17
  self,
15
18
  flow_name,
16
- environment,
19
+ environment=None,
17
20
  metadata=None,
18
21
  event_logger=None,
19
22
  monitor=None,
@@ -31,7 +34,7 @@ class FlowDataStore(object):
31
34
  ----------
32
35
  flow_name : str
33
36
  The name of the flow
34
- environment : MetaflowEnvironment
37
+ environment : MetaflowEnvironment, optional
35
38
  Environment this datastore is operating in
36
39
  metadata : MetadataProvider, optional
37
40
  The metadata provider to use and update if needed, by default None
@@ -63,12 +66,28 @@ class FlowDataStore(object):
63
66
  self._storage_impl.path_join(self.flow_name, "data"), self._storage_impl
64
67
  )
65
68
 
69
+ # Private
70
+ self._metadata_cache = None
71
+
66
72
  @property
67
73
  def datastore_root(self):
68
74
  return self._storage_impl.datastore_root
69
75
 
70
- def get_latest_task_datastores(
71
- self, run_id=None, steps=None, pathspecs=None, allow_not_done=False
76
+ def set_metadata_cache(self, cache):
77
+ self._metadata_cache = cache
78
+
79
+ def get_task_datastores(
80
+ self,
81
+ run_id=None,
82
+ steps=None,
83
+ pathspecs=None,
84
+ allow_not_done=False,
85
+ attempt=None,
86
+ include_prior=False,
87
+ mode="r",
88
+ join_type=None,
89
+ orig_flow_datastore=None,
90
+ spin_artifacts=None,
72
91
  ):
73
92
  """
74
93
  Return a list of TaskDataStore for a subset of the tasks.
@@ -88,11 +107,27 @@ class FlowDataStore(object):
88
107
  Steps to get the tasks from. If run_id is specified, this
89
108
  must also be specified, by default None
90
109
  pathspecs : List[str], optional
91
- Full task specs (run_id/step_name/task_id). Can be used instead of
110
+ Full task specs (run_id/step_name/task_id[/attempt]). Can be used instead of
92
111
  specifying run_id and steps, by default None
93
112
  allow_not_done : bool, optional
94
113
  If True, returns the latest attempt of a task even if that attempt
95
114
  wasn't marked as done, by default False
115
+ attempt : int, optional
116
+ Attempt number of the tasks to return. If not provided, returns latest attempt.
117
+ include_prior : boolean, default False
118
+ If True, returns all attempts up to and including attempt.
119
+ mode : str, default "r"
120
+ Mode to initialize the returned TaskDataStores in.
121
+ join_type : str, optional, default None
122
+ If specified, the join type for the task. This is used to determine
123
+ the user specified artifacts for the task in case of a spin task.
124
+ orig_flow_datastore : MetadataProvider, optional, default None
125
+ The metadata provider in case of a spin task. If provided, the
126
+ returned TaskDataStore will be a SpinTaskDatastore instead of a
127
+ TaskDataStore.
128
+ spin_artifacts : Dict[str, Any], optional, default None
129
+ Artifacts provided by user that can override the artifacts fetched via the
130
+ spin pathspec.
96
131
 
97
132
  Returns
98
133
  -------
@@ -126,8 +161,20 @@ class FlowDataStore(object):
126
161
  if task.is_file is False
127
162
  ]
128
163
  urls = []
164
+ # parse content urls for specific attempt only, or for all attempts in max range
165
+ attempt_range = range(metaflow_config.MAX_ATTEMPTS)
166
+ # we have no reason to check for attempts greater than MAX_ATTEMPTS, as they do not exist.
167
+ if attempt is not None and attempt <= metaflow_config.MAX_ATTEMPTS - 1:
168
+ attempt_range = range(attempt + 1) if include_prior else [attempt]
129
169
  for task_url in task_urls:
130
- for attempt in range(metaflow_config.MAX_ATTEMPTS):
170
+ # task_url can have a trailing slash, so strip this to avoid empty strings in the split
171
+ task_splits = task_url.rstrip("/").split("/")
172
+ # Usually it is flow, run, step, task (so 4 components) -- if we have a
173
+ # fifth one, there is a specific attempt number listed as well.
174
+ task_attempt_range = attempt_range
175
+ if len(task_splits) == 5:
176
+ task_attempt_range = [int(task_splits[4])]
177
+ for attempt in task_attempt_range:
131
178
  for suffix in [
132
179
  TaskDataStore.METADATA_DATA_SUFFIX,
133
180
  TaskDataStore.METADATA_ATTEMPT_SUFFIX,
@@ -168,11 +215,30 @@ class FlowDataStore(object):
168
215
  for (run, step, task), attempt in latest_started_attempts.items()
169
216
  )
170
217
  if allow_not_done:
171
- latest_to_fetch = latest_started_attempts
218
+ latest_to_fetch = (
219
+ done_attempts.union(latest_started_attempts)
220
+ if include_prior
221
+ else latest_started_attempts
222
+ )
172
223
  else:
173
- latest_to_fetch = latest_started_attempts & done_attempts
224
+ latest_to_fetch = (
225
+ done_attempts
226
+ if include_prior
227
+ else (latest_started_attempts & done_attempts)
228
+ )
174
229
  latest_to_fetch = [
175
- (v[0], v[1], v[2], v[3], data_objs.get(v), "r", allow_not_done)
230
+ (
231
+ v[0],
232
+ v[1],
233
+ v[2],
234
+ v[3],
235
+ data_objs.get(v),
236
+ mode,
237
+ allow_not_done,
238
+ join_type,
239
+ orig_flow_datastore,
240
+ spin_artifacts,
241
+ )
176
242
  for v in latest_to_fetch
177
243
  ]
178
244
  return list(itertools.starmap(self.get_task_datastore, latest_to_fetch))
@@ -186,8 +252,63 @@ class FlowDataStore(object):
186
252
  data_metadata=None,
187
253
  mode="r",
188
254
  allow_not_done=False,
255
+ join_type=None,
256
+ orig_flow_datastore=None,
257
+ spin_artifacts=None,
258
+ persist=True,
189
259
  ):
190
- return TaskDataStore(
260
+ if orig_flow_datastore is not None:
261
+ # In spin step subprocess, use SpinTaskDatastore for accessing artifacts
262
+ if join_type is not None:
263
+ # If join_type is specified, we need to use the artifacts corresponding
264
+ # to that particular join index, specified by the parent task pathspec.
265
+ spin_artifacts = spin_artifacts.get(
266
+ f"{run_id}/{step_name}/{task_id}", {}
267
+ )
268
+ from_start(
269
+ "FlowDataStore: get_task_datastore for spin task for type %s %s metadata"
270
+ % (self.TYPE, "without" if data_metadata is None else "with")
271
+ )
272
+ # Get the task datastore for the spun task.
273
+ orig_datastore = orig_flow_datastore.get_task_datastore(
274
+ run_id,
275
+ step_name,
276
+ task_id,
277
+ attempt=attempt,
278
+ data_metadata=data_metadata,
279
+ mode=mode,
280
+ allow_not_done=allow_not_done,
281
+ persist=persist,
282
+ )
283
+
284
+ return SpinTaskDatastore(
285
+ self.flow_name,
286
+ run_id,
287
+ step_name,
288
+ task_id,
289
+ orig_datastore,
290
+ spin_artifacts,
291
+ )
292
+
293
+ cache_hit = False
294
+ if (
295
+ self._metadata_cache is not None
296
+ and data_metadata is None
297
+ and attempt is not None
298
+ and allow_not_done is False
299
+ ):
300
+ # If we have a metadata cache, we can try to load the metadata
301
+ # from the cache if it is not provided.
302
+ data_metadata = self._metadata_cache.load_metadata(
303
+ run_id, step_name, task_id, attempt
304
+ )
305
+ cache_hit = data_metadata is not None
306
+
307
+ from_start(
308
+ "FlowDataStore: get_task_datastore for regular task for type %s %s metadata"
309
+ % (self.TYPE, "without" if data_metadata is None else "with")
310
+ )
311
+ task_datastore = TaskDataStore(
191
312
  self,
192
313
  run_id,
193
314
  step_name,
@@ -196,8 +317,23 @@ class FlowDataStore(object):
196
317
  data_metadata=data_metadata,
197
318
  mode=mode,
198
319
  allow_not_done=allow_not_done,
320
+ persist=persist,
199
321
  )
200
322
 
323
+ # Only persist in cache if it is non-changing (so done only) and we have
324
+ # a non-None attempt
325
+ if (
326
+ not cache_hit
327
+ and self._metadata_cache is not None
328
+ and allow_not_done is False
329
+ and attempt is not None
330
+ ):
331
+ self._metadata_cache.store_metadata(
332
+ run_id, step_name, task_id, attempt, task_datastore.ds_metadata
333
+ )
334
+
335
+ return task_datastore
336
+
201
337
  def save_data(self, data_iter, len_hint=0):
202
338
  """Saves data to the underlying content-addressed store
203
339
 
@@ -239,3 +375,13 @@ class FlowDataStore(object):
239
375
  """
240
376
  for key, blob in self.ca_store.load_blobs(keys, force_raw=force_raw):
241
377
  yield key, blob
378
+
379
+
380
+ class MetadataCache(ABC):
381
+ @abstractmethod
382
+ def load_metadata(self, run_id, step_name, task_id, attempt):
383
+ raise NotImplementedError()
384
+
385
+ @abstractmethod
386
+ def store_metadata(self, run_id, step_name, task_id, attempt, metadata_dict):
387
+ raise NotImplementedError()
@@ -0,0 +1,91 @@
1
+ from typing import Dict, Any
2
+ from .task_datastore import TaskDataStore, require_mode
3
+ from ..metaflow_profile import from_start
4
+
5
+
6
+ class SpinTaskDatastore(object):
7
+ def __init__(
8
+ self,
9
+ flow_name: str,
10
+ run_id: str,
11
+ step_name: str,
12
+ task_id: str,
13
+ orig_datastore: TaskDataStore,
14
+ spin_artifacts: Dict[str, Any],
15
+ ):
16
+ """
17
+ SpinTaskDatastore is a datastore for a task that is used to retrieve
18
+ artifacts and attributes for a spin step. It uses the task pathspec
19
+ from a previous execution of the step to access the artifacts and attributes.
20
+
21
+ Parameters:
22
+ -----------
23
+ flow_name : str
24
+ Name of the flow
25
+ run_id : str
26
+ Run ID of the flow
27
+ step_name : str
28
+ Name of the step
29
+ task_id : str
30
+ Task ID of the step
31
+ orig_datastore : TaskDataStore
32
+ The datastore for the underlying task that is being spun.
33
+ spin_artifacts : Dict[str, Any]
34
+ User provided artifacts that are to be used in the spin task. This is a dictionary
35
+ where keys are artifact names and values are the actual data or metadata.
36
+ """
37
+ self.flow_name = flow_name
38
+ self.run_id = run_id
39
+ self.step_name = step_name
40
+ self.task_id = task_id
41
+ self.orig_datastore = orig_datastore
42
+ self.spin_artifacts = spin_artifacts
43
+ self._task = None
44
+
45
+ # Update _objects and _info in order to persist artifacts
46
+ # See `persist` method in `TaskDatastore` for more details
47
+ self._objects = self.orig_datastore._objects.copy()
48
+ self._info = self.orig_datastore._info.copy()
49
+
50
+ # We strip out some of the control ones
51
+ for key in ("_transition",):
52
+ if key in self._objects:
53
+ del self._objects[key]
54
+ del self._info[key]
55
+
56
+ from_start("SpinTaskDatastore: Initialized artifacts")
57
+
58
+ @require_mode(None)
59
+ def __getitem__(self, name):
60
+ try:
61
+ # Check if it's an artifact in the spin_artifacts
62
+ return self.spin_artifacts[name]
63
+ except KeyError:
64
+ try:
65
+ # Check if it's an attribute of the task
66
+ # _foreach_stack, _foreach_index, ...
67
+ return self.orig_datastore[name]
68
+ except (KeyError, AttributeError) as e:
69
+ raise KeyError(
70
+ f"Attribute '{name}' not found in the previous execution "
71
+ f"of the tasks for `{self.step_name}`."
72
+ ) from e
73
+
74
+ @require_mode(None)
75
+ def is_none(self, name):
76
+ val = self.__getitem__(name)
77
+ return val is None
78
+
79
+ @require_mode(None)
80
+ def __contains__(self, name):
81
+ try:
82
+ _ = self.__getitem__(name)
83
+ return True
84
+ except KeyError:
85
+ return False
86
+
87
+ @require_mode(None)
88
+ def items(self):
89
+ if self._objects:
90
+ return self._objects.items()
91
+ return {}