ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import errno
1
2
  import json
2
3
  import os
3
4
  import re
@@ -17,11 +18,12 @@ from metaflow.metaflow_config import (
17
18
  DATATOOLS_S3ROOT,
18
19
  S3_RETRY_COUNT,
19
20
  S3_TRANSIENT_RETRY_COUNT,
21
+ S3_LOG_TRANSIENT_RETRIES,
20
22
  S3_SERVER_SIDE_ENCRYPTION,
23
+ S3_WORKER_COUNT,
21
24
  TEMPDIR,
22
25
  )
23
26
  from metaflow.util import (
24
- namedtuple_with_defaults,
25
27
  is_stringish,
26
28
  to_bytes,
27
29
  to_unicode,
@@ -29,6 +31,7 @@ from metaflow.util import (
29
31
  url_quote,
30
32
  url_unquote,
31
33
  )
34
+ from metaflow.tuple_util import namedtuple_with_defaults
32
35
  from metaflow.exception import MetaflowException
33
36
  from metaflow.debug import debug
34
37
  import metaflow.tracing as tracing
@@ -136,6 +139,10 @@ class MetaflowS3InvalidRange(MetaflowException):
136
139
  headline = "S3 invalid range"
137
140
 
138
141
 
142
+ class MetaflowS3InsufficientDiskSpace(MetaflowException):
143
+ headline = "Insufficient disk space"
144
+
145
+
139
146
  class S3Object(object):
140
147
  """
141
148
  This object represents a path or an object in S3,
@@ -492,18 +499,22 @@ class S3(object):
492
499
 
493
500
  Parameters
494
501
  ----------
495
- tmproot : str, default: '.'
502
+ tmproot : str, default '.'
496
503
  Where to store the temporary directory.
497
- bucket : str, optional
504
+ bucket : str, optional, default None
498
505
  Override the bucket from `DATATOOLS_S3ROOT` when `run` is specified.
499
- prefix : str, optional
506
+ prefix : str, optional, default None
500
507
  Override the path from `DATATOOLS_S3ROOT` when `run` is specified.
501
- run : FlowSpec or Run, optional
508
+ run : FlowSpec or Run, optional, default None
502
509
  Derive path prefix from the current or a past run ID, e.g. S3(run=self).
503
- s3root : str, optional
510
+ s3root : str, optional, default None
504
511
  If `run` is not specified, use this as the S3 prefix.
512
+ encryption : str, optional, default None
513
+ Server-side encryption to use when uploading objects to S3.
505
514
  """
506
515
 
516
+ TYPE = "s3"
517
+
507
518
  @classmethod
508
519
  def get_root_from_config(cls, echo, create_on_absent=True):
509
520
  return DATATOOLS_S3ROOT
@@ -570,7 +581,13 @@ class S3(object):
570
581
  self._s3_inject_failures = kwargs.get(
571
582
  "inject_failure_rate", TEST_INJECT_RETRYABLE_FAILURES
572
583
  )
573
- self._tmpdir = mkdtemp(dir=tmproot, prefix="metaflow.s3.")
584
+ # Storing tmproot, bucket, ... as members to allow easier reconstruction
585
+ # during JSON deserialization
586
+ self._tmproot = tmproot
587
+ self._bucket = bucket
588
+ self._prefix = prefix
589
+ self._run = run
590
+ self._tmpdir = mkdtemp(dir=self._tmproot, prefix="metaflow.s3.")
574
591
  self._encryption = encryption
575
592
 
576
593
  def __enter__(self) -> "S3":
@@ -598,7 +615,9 @@ class S3(object):
598
615
  # returned are Unicode.
599
616
  key = getattr(key_value, "key", key_value)
600
617
  if self._s3root is None:
601
- parsed = urlparse(to_unicode(key))
618
+ # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
619
+ # Without allow_fragments=False the parsed.path for an object name with fragments is incomplete.
620
+ parsed = urlparse(to_unicode(key), allow_fragments=False)
602
621
  if parsed.scheme == "s3" and parsed.path:
603
622
  return key
604
623
  else:
@@ -619,7 +638,9 @@ class S3(object):
619
638
  "Don't use absolute S3 URLs when the S3 client is "
620
639
  "initialized with a prefix. URL: %s" % key
621
640
  )
622
- return os.path.join(self._s3root, key)
641
+ # Strip leading slashes to ensure os.path.join works correctly
642
+ # os.path.join discards the first argument if the second starts with '/'
643
+ return os.path.join(self._s3root, key.lstrip("/"))
623
644
  else:
624
645
  return self._s3root
625
646
 
@@ -763,7 +784,9 @@ class S3(object):
763
784
  """
764
785
 
765
786
  url = self._url(key)
766
- src = urlparse(url)
787
+ # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
788
+ # Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
789
+ src = urlparse(url, allow_fragments=False)
767
790
 
768
791
  def _info(s3, tmp):
769
792
  resp = s3.head_object(Bucket=src.netloc, Key=src.path.lstrip('/"'))
@@ -889,7 +912,9 @@ class S3(object):
889
912
  DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1
890
913
 
891
914
  url, r = self._url_and_range(key)
892
- src = urlparse(url)
915
+ # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
916
+ # Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
917
+ src = urlparse(url, allow_fragments=False)
893
918
 
894
919
  def _download(s3, tmp):
895
920
  if r:
@@ -1171,7 +1196,9 @@ class S3(object):
1171
1196
  blob.close = lambda: None
1172
1197
 
1173
1198
  url = self._url(key)
1174
- src = urlparse(url)
1199
+ # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
1200
+ # Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
1201
+ src = urlparse(url, allow_fragments=False)
1175
1202
  extra_args = None
1176
1203
  if content_type or metadata or self._encryption:
1177
1204
  extra_args = {}
@@ -1245,12 +1272,12 @@ class S3(object):
1245
1272
 
1246
1273
  def _store():
1247
1274
  for key_obj in key_objs:
1248
- if isinstance(key_obj, tuple):
1249
- key = key_obj[0]
1250
- obj = key_obj[1]
1251
- else:
1275
+ if isinstance(key_obj, S3PutObject):
1252
1276
  key = key_obj.key
1253
1277
  obj = key_obj.value
1278
+ else:
1279
+ key = key_obj[0]
1280
+ obj = key_obj[1]
1254
1281
  store_info = {
1255
1282
  "key": key,
1256
1283
  "content_type": getattr(key_obj, "content_type", None),
@@ -1319,12 +1346,12 @@ class S3(object):
1319
1346
 
1320
1347
  def _check():
1321
1348
  for key_path in key_paths:
1322
- if isinstance(key_path, tuple):
1323
- key = key_path[0]
1324
- path = key_path[1]
1325
- else:
1349
+ if isinstance(key_path, S3PutObject):
1326
1350
  key = key_path.key
1327
1351
  path = key_path.path
1352
+ else:
1353
+ key = key_path[0]
1354
+ path = key_path[1]
1328
1355
  store_info = {
1329
1356
  "key": key,
1330
1357
  "content_type": getattr(key_path, "content_type", None),
@@ -1366,8 +1393,13 @@ class S3(object):
1366
1393
  elif error_code == "NoSuchBucket":
1367
1394
  raise MetaflowS3URLException("Specified S3 bucket doesn't exist.")
1368
1395
  error = str(err)
1396
+ except OSError as e:
1397
+ if e.errno == errno.ENOSPC:
1398
+ raise MetaflowS3InsufficientDiskSpace(str(e))
1399
+ except MetaflowException as ex:
1400
+ # Re-raise Metaflow exceptions (including TimeoutException)
1401
+ raise
1369
1402
  except Exception as ex:
1370
- # TODO specific error message for out of disk space
1371
1403
  error = str(ex)
1372
1404
  if tmp:
1373
1405
  os.unlink(tmp.name)
@@ -1380,9 +1412,31 @@ class S3(object):
1380
1412
  )
1381
1413
 
1382
1414
  # add some jitter to make sure retries are not synchronized
1383
- def _jitter_sleep(self, trynum, multiplier=2):
1384
- interval = multiplier**trynum + random.randint(0, 10)
1385
- time.sleep(interval)
1415
+ def _jitter_sleep(
1416
+ self, trynum: int, base: int = 2, cap: int = 360, jitter: float = 0.1
1417
+ ) -> None:
1418
+ """
1419
+ Sleep for an exponentially increasing interval with added jitter.
1420
+
1421
+ Parameters
1422
+ ----------
1423
+ trynum: The current retry attempt number.
1424
+ base: The base multiplier for the exponential backoff.
1425
+ cap: The maximum interval to sleep.
1426
+ jitter: The maximum jitter percentage to add to the interval.
1427
+ """
1428
+ # Calculate the exponential backoff interval
1429
+ interval = min(cap, base**trynum)
1430
+
1431
+ # Add random jitter
1432
+ jitter_value = interval * jitter * random.uniform(-1, 1)
1433
+ interval_with_jitter = interval + jitter_value
1434
+
1435
+ # Ensure the interval is not negative
1436
+ interval_with_jitter = max(0, interval_with_jitter)
1437
+
1438
+ # Sleep for the calculated interval
1439
+ time.sleep(interval_with_jitter)
1386
1440
 
1387
1441
  # NOTE: re: _read_many_files and _put_many_files
1388
1442
  # All file IO is through binary files - we write bytes, we read
@@ -1470,20 +1524,17 @@ class S3(object):
1470
1524
  # - a known transient failure (SlowDown for example) in which case we will
1471
1525
  # retry *only* the inputs that have this transient failure.
1472
1526
  # - an unknown failure (something went wrong but we cannot say if it was
1473
- # a known permanent failure or something else). In this case, we retry
1474
- # the operation completely.
1475
- #
1476
- # There are therefore two retry counts:
1477
- # - the transient failure retry count: how many times do we try on known
1478
- # transient errors
1479
- # - the top-level retry count: how many times do we try on unknown failures
1527
+ # a known permanent failure or something else). In this case, we assume
1528
+ # it's a transient failure and retry only those inputs (same as above).
1480
1529
  #
1481
- # Note that, if the operation runs out of transient failure retries, it will
1482
- # count as an "unknown" failure (ie: it will be retried according to the
1483
- # outer top-level retry count). In other words, you can potentially have
1484
- # transient_retry_count * retry_count tries).
1485
- # Finally, if on transient failures, we make NO progress (ie: no input is
1486
- # successfully processed), that counts as an "unknown" failure.
1530
+ # NOTES(npow): 2025-05-13
1531
+ # Previously, this code would also retry the fatal failures, including no_progress
1532
+ # and unknown failures, from the beginning. This is not ideal because:
1533
+ # 1. Fatal errors are not supposed to be retried.
1534
+ # 2. Retrying from the beginning does not improve the situation, and is
1535
+ # wasteful since we have already uploaded some files.
1536
+ # 3. The number of transient errors is far more than fatal errors, so we
1537
+ # can be optimistic and assume the unknown errors are transient.
1487
1538
  cmdline = [sys.executable, os.path.abspath(s3op.__file__), mode]
1488
1539
  recursive_get = False
1489
1540
  for key, value in options.items():
@@ -1518,7 +1569,6 @@ class S3(object):
1518
1569
  # Otherwise, we cap the failure rate at 90%
1519
1570
  return min(90, self._s3_inject_failures)
1520
1571
 
1521
- retry_count = 0 # Number of retries (excluding transient failures)
1522
1572
  transient_retry_count = 0 # Number of transient retries (per top-level retry)
1523
1573
  inject_failures = _inject_failure_rate()
1524
1574
  out_lines = [] # List to contain the lines returned by _s3op_with_retries
@@ -1585,7 +1635,12 @@ class S3(object):
1585
1635
  # things, this will shrink more and more until we are doing a
1586
1636
  # single operation at a time. If things start going better, it
1587
1637
  # will increase by 20% every round.
1588
- max_count = min(int(last_ok_count * 1.2), len(pending_retries))
1638
+ #
1639
+ # If we made no progress (last_ok_count == 0) we retry at most
1640
+ # 2*S3_WORKER_COUNT from whatever is left in `pending_retries`
1641
+ max_count = min(
1642
+ int(last_ok_count * 1.2), len(pending_retries)
1643
+ ) or min(2 * S3_WORKER_COUNT, len(pending_retries))
1589
1644
  tmp_input.writelines(pending_retries[:max_count])
1590
1645
  tmp_input.flush()
1591
1646
  debug.s3client_exec(
@@ -1702,53 +1757,49 @@ class S3(object):
1702
1757
  _update_out_lines(out_lines, ok_lines, resize=loop_count == 0)
1703
1758
  return 0, 0, inject_failures, err_out
1704
1759
 
1705
- while retry_count <= S3_RETRY_COUNT:
1760
+ while transient_retry_count <= S3_TRANSIENT_RETRY_COUNT:
1706
1761
  (
1707
1762
  last_ok_count,
1708
1763
  last_retry_count,
1709
1764
  inject_failures,
1710
1765
  err_out,
1711
1766
  ) = try_s3_op(last_ok_count, pending_retries, out_lines, inject_failures)
1712
- if err_out or (
1713
- last_retry_count != 0
1714
- and (
1715
- last_ok_count == 0
1716
- or transient_retry_count > S3_TRANSIENT_RETRY_COUNT
1717
- )
1718
- ):
1719
- # We had a fatal failure (err_out is not None)
1720
- # or we made no progress (last_ok_count is 0)
1721
- # or we are out of transient retries
1722
- # so we will restart from scratch (being very conservative)
1723
- retry_count += 1
1724
- err_msg = err_out
1725
- if err_msg is None and last_ok_count == 0:
1726
- err_msg = "No progress"
1727
- if err_msg is None:
1728
- err_msg = "Too many transient errors"
1729
- print(
1730
- "S3 non-transient error (attempt #%d): %s" % (retry_count, err_msg)
1731
- )
1732
- _reset()
1733
- if retry_count <= S3_RETRY_COUNT:
1734
- self._jitter_sleep(retry_count)
1735
- continue
1736
- elif last_retry_count != 0:
1767
+ if err_out:
1768
+ break
1769
+ if last_retry_count != 0:
1737
1770
  # During our last try, we did not manage to process everything we wanted
1738
1771
  # due to a transient failure so we try again.
1739
1772
  transient_retry_count += 1
1740
1773
  total_ok_count += last_ok_count
1741
- print(
1742
- "Transient S3 failure (attempt #%d) -- total success: %d, "
1743
- "last attempt %d/%d -- remaining: %d"
1744
- % (
1745
- transient_retry_count,
1746
- total_ok_count,
1747
- last_ok_count,
1748
- last_ok_count + last_retry_count,
1749
- len(pending_retries),
1774
+
1775
+ if S3_LOG_TRANSIENT_RETRIES:
1776
+ # Extract transient error type from pending retry lines
1777
+ error_info = ""
1778
+ if pending_retries:
1779
+ try:
1780
+ # Parse the first line to get transient error type
1781
+ first_retry = json.loads(
1782
+ pending_retries[0].decode("utf-8").strip()
1783
+ )
1784
+ if "transient_error_type" in first_retry:
1785
+ error_info = (
1786
+ " (%s)" % first_retry["transient_error_type"]
1787
+ )
1788
+ except (json.JSONDecodeError, IndexError, KeyError):
1789
+ pass
1790
+
1791
+ print(
1792
+ "Transient S3 failure (attempt #%d) -- total success: %d, "
1793
+ "last attempt %d/%d -- remaining: %d%s"
1794
+ % (
1795
+ transient_retry_count,
1796
+ total_ok_count,
1797
+ last_ok_count,
1798
+ last_ok_count + last_retry_count,
1799
+ len(pending_retries),
1800
+ error_info,
1801
+ )
1750
1802
  )
1751
- )
1752
1803
  if inject_failures == 0:
1753
1804
  # Don't sleep when we are "faking" the failures
1754
1805
  self._jitter_sleep(transient_retry_count)