metaflow 2.18.13__py2.py3-none-any.whl → 2.19.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. metaflow/__init__.py +1 -0
  2. metaflow/cli.py +78 -13
  3. metaflow/cli_components/run_cmds.py +182 -39
  4. metaflow/cli_components/step_cmd.py +160 -4
  5. metaflow/client/__init__.py +1 -0
  6. metaflow/client/core.py +162 -99
  7. metaflow/client/filecache.py +59 -32
  8. metaflow/cmd/code/__init__.py +2 -1
  9. metaflow/datastore/__init__.py +1 -0
  10. metaflow/datastore/content_addressed_store.py +40 -9
  11. metaflow/datastore/datastore_set.py +10 -1
  12. metaflow/datastore/flow_datastore.py +123 -4
  13. metaflow/datastore/spin_datastore.py +91 -0
  14. metaflow/datastore/task_datastore.py +86 -2
  15. metaflow/decorators.py +75 -6
  16. metaflow/extension_support/__init__.py +372 -305
  17. metaflow/flowspec.py +3 -2
  18. metaflow/metaflow_config.py +41 -0
  19. metaflow/metaflow_profile.py +18 -0
  20. metaflow/packaging_sys/utils.py +2 -39
  21. metaflow/packaging_sys/v1.py +63 -16
  22. metaflow/plugins/__init__.py +2 -0
  23. metaflow/plugins/argo/argo_client.py +1 -0
  24. metaflow/plugins/argo/argo_workflows.py +3 -1
  25. metaflow/plugins/cards/card_datastore.py +9 -3
  26. metaflow/plugins/cards/card_decorator.py +1 -0
  27. metaflow/plugins/cards/card_modules/basic.py +9 -3
  28. metaflow/plugins/datastores/local_storage.py +12 -6
  29. metaflow/plugins/datastores/spin_storage.py +12 -0
  30. metaflow/plugins/datatools/s3/s3.py +29 -10
  31. metaflow/plugins/datatools/s3/s3op.py +90 -62
  32. metaflow/plugins/metadata_providers/local.py +76 -82
  33. metaflow/plugins/metadata_providers/spin.py +16 -0
  34. metaflow/runner/metaflow_runner.py +210 -19
  35. metaflow/runtime.py +348 -21
  36. metaflow/task.py +61 -12
  37. metaflow/user_configs/config_parameters.py +2 -4
  38. metaflow/user_decorators/mutable_flow.py +1 -1
  39. metaflow/user_decorators/user_step_decorator.py +10 -1
  40. metaflow/util.py +191 -1
  41. metaflow/version.py +1 -1
  42. {metaflow-2.18.13.data → metaflow-2.19.1.data}/data/share/metaflow/devtools/Makefile +10 -0
  43. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/METADATA +2 -4
  44. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/RECORD +50 -47
  45. {metaflow-2.18.13.data → metaflow-2.19.1.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  46. {metaflow-2.18.13.data → metaflow-2.19.1.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  47. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/WHEEL +0 -0
  48. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/entry_points.txt +0 -0
  49. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/licenses/LICENSE +0 -0
  50. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/top_level.txt +0 -0
metaflow/flowspec.py CHANGED
@@ -307,7 +307,8 @@ class FlowSpec(metaclass=FlowSpecMeta):
307
307
  % (deco._flow_cls.__name__, cls.__name__)
308
308
  )
309
309
  debug.userconf_exec(
310
- "Evaluating flow level decorator %s" % deco.__class__.__name__
310
+ "Evaluating flow level decorator %s (pre-mutate)"
311
+ % deco.__class__.__name__
311
312
  )
312
313
  deco.pre_mutate(mutable_flow)
313
314
  # We reset cached_parameters on the very off chance that the user added
@@ -324,7 +325,7 @@ class FlowSpec(metaclass=FlowSpecMeta):
324
325
  if isinstance(deco, StepMutator):
325
326
  inserted_by_value = [deco.decorator_name] + (deco.inserted_by or [])
326
327
  debug.userconf_exec(
327
- "Evaluating step level decorator %s for %s"
328
+ "Evaluating step level decorator %s for %s (pre-mutate)"
328
329
  % (deco.__class__.__name__, step.name)
329
330
  )
330
331
  deco.pre_mutate(
@@ -21,6 +21,7 @@ if sys.platform == "darwin":
21
21
 
22
22
  # Path to the local directory to store artifacts for 'local' datastore.
23
23
  DATASTORE_LOCAL_DIR = ".metaflow"
24
+ DATASTORE_SPIN_LOCAL_DIR = ".metaflow_spin"
24
25
 
25
26
  # Local configuration file (in .metaflow) containing overrides per-project
26
27
  LOCAL_CONFIG_FILE = "config.json"
@@ -47,6 +48,38 @@ DEFAULT_FROM_DEPLOYMENT_IMPL = from_conf(
47
48
  "DEFAULT_FROM_DEPLOYMENT_IMPL", "argo-workflows"
48
49
  )
49
50
 
51
+ ###
52
+ # Spin configuration
53
+ ###
54
+ # Essentially a whitelist of decorators that are allowed in Spin steps
55
+ SPIN_ALLOWED_DECORATORS = from_conf(
56
+ "SPIN_ALLOWED_DECORATORS",
57
+ [
58
+ "conda",
59
+ "pypi",
60
+ "conda_base",
61
+ "pypi_base",
62
+ "environment",
63
+ "project",
64
+ "timeout",
65
+ "conda_env_internal",
66
+ "card",
67
+ ],
68
+ )
69
+
70
+ # Essentially a blacklist of decorators that are not allowed in Spin steps
71
+ # Note: decorators not in either SPIN_ALLOWED_DECORATORS or SPIN_DISALLOWED_DECORATORS
72
+ # are simply ignored in Spin steps
73
+ SPIN_DISALLOWED_DECORATORS = from_conf(
74
+ "SPIN_DISALLOWED_DECORATORS",
75
+ [
76
+ "parallel",
77
+ ],
78
+ )
79
+
80
+ # Default value for persist option in spin command
81
+ SPIN_PERSIST = from_conf("SPIN_PERSIST", False)
82
+
50
83
  ###
51
84
  # User configuration
52
85
  ###
@@ -57,6 +90,7 @@ USER = from_conf("USER")
57
90
  # Datastore configuration
58
91
  ###
59
92
  DATASTORE_SYSROOT_LOCAL = from_conf("DATASTORE_SYSROOT_LOCAL")
93
+ DATASTORE_SYSROOT_SPIN = from_conf("DATASTORE_SYSROOT_SPIN")
60
94
  # S3 bucket and prefix to store artifacts for 's3' datastore.
61
95
  DATASTORE_SYSROOT_S3 = from_conf("DATASTORE_SYSROOT_S3")
62
96
  # Azure Blob Storage container and blob prefix
@@ -109,6 +143,9 @@ S3_WORKER_COUNT = from_conf("S3_WORKER_COUNT", 64)
109
143
  # top-level retries)
110
144
  S3_TRANSIENT_RETRY_COUNT = from_conf("S3_TRANSIENT_RETRY_COUNT", 20)
111
145
 
146
+ # Whether to log transient retry messages to stdout
147
+ S3_LOG_TRANSIENT_RETRIES = from_conf("S3_LOG_TRANSIENT_RETRIES", False)
148
+
112
149
  # S3 retry configuration used in the aws client
113
150
  # Use the adaptive retry strategy by default
114
151
  S3_CLIENT_RETRY_CONFIG = from_conf(
@@ -461,6 +498,10 @@ ESCAPE_HATCH_WARNING = from_conf("ESCAPE_HATCH_WARNING", True)
461
498
  ###
462
499
  FEAT_ALWAYS_UPLOAD_CODE_PACKAGE = from_conf("FEAT_ALWAYS_UPLOAD_CODE_PACKAGE", False)
463
500
  ###
501
+ # Profile
502
+ ###
503
+ PROFILE_FROM_START = from_conf("PROFILE_FROM_START", False)
504
+ ###
464
505
  # Debug configuration
465
506
  ###
466
507
  DEBUG_OPTIONS = [
@@ -2,6 +2,24 @@ import time
2
2
 
3
3
  from contextlib import contextmanager
4
4
 
5
+ from .metaflow_config import PROFILE_FROM_START
6
+
7
+ init_time = None
8
+
9
+
10
+ if PROFILE_FROM_START:
11
+
12
+ def from_start(msg: str):
13
+ global init_time
14
+ if init_time is None:
15
+ init_time = time.time()
16
+ print("From start: %s took %dms" % (msg, int((time.time() - init_time) * 1000)))
17
+
18
+ else:
19
+
20
+ def from_start(_msg: str):
21
+ pass
22
+
5
23
 
6
24
  @contextmanager
7
25
  def profile(label, stats_dict=None):
@@ -2,45 +2,7 @@ import os
2
2
  from contextlib import contextmanager
3
3
  from typing import Callable, Generator, List, Optional, Tuple
4
4
 
5
- from ..util import to_unicode
6
-
7
-
8
- # this is os.walk(follow_symlinks=True) with cycle detection
9
- def walk_without_cycles(
10
- top_root: str,
11
- exclude_dirs: Optional[List[str]] = None,
12
- ) -> Generator[Tuple[str, List[str]], None, None]:
13
- seen = set()
14
-
15
- default_skip_dirs = ["__pycache__"]
16
-
17
- def _recurse(root, skip_dirs):
18
- for parent, dirs, files in os.walk(root):
19
- dirs[:] = [d for d in dirs if d not in skip_dirs]
20
- for d in dirs:
21
- path = os.path.join(parent, d)
22
- if os.path.islink(path):
23
- # Breaking loops: never follow the same symlink twice
24
- #
25
- # NOTE: this also means that links to sibling links are
26
- # not followed. In this case:
27
- #
28
- # x -> y
29
- # y -> oo
30
- # oo/real_file
31
- #
32
- # real_file is only included twice, not three times
33
- reallink = os.path.realpath(path)
34
- if reallink not in seen:
35
- seen.add(reallink)
36
- for x in _recurse(path, default_skip_dirs):
37
- yield x
38
- yield parent, files
39
-
40
- skip_dirs = set(default_skip_dirs + (exclude_dirs or []))
41
- for x in _recurse(top_root, skip_dirs):
42
- skip_dirs = default_skip_dirs
43
- yield x
5
+ from ..util import to_unicode, walk_without_cycles
44
6
 
45
7
 
46
8
  def walk(
@@ -53,6 +15,7 @@ def walk(
53
15
  prefixlen = len("%s/" % os.path.dirname(root))
54
16
  for (
55
17
  path,
18
+ _,
56
19
  files,
57
20
  ) in walk_without_cycles(root, exclude_tl_dirs):
58
21
  if exclude_hidden and "/." in path:
@@ -16,7 +16,7 @@ from ..exception import MetaflowException
16
16
  from ..metaflow_version import get_version
17
17
  from ..user_decorators.user_flow_decorator import FlowMutatorMeta
18
18
  from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
19
- from ..util import get_metaflow_root
19
+ from ..util import get_metaflow_root, walk_without_cycles
20
20
  from . import ContentType, MFCONTENT_MARKER, MetaflowCodeContentV1Base
21
21
  from .distribution_support import _ModuleInfo, modules_to_distributions
22
22
  from .utils import suffix_filter, walk
@@ -269,12 +269,50 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
269
269
  # If the module is a single file, we handle this here by looking at __file__
270
270
  # which will point to the single file. If it is an actual module, __path__
271
271
  # will contain the path(s) to the module
272
+ if hasattr(module, "__file__") and module.__file__:
273
+ root_paths = [Path(module.__file__).resolve().as_posix()]
274
+ else:
275
+ root_paths = []
276
+ seen_path_values = set()
277
+ new_paths = module.__spec__.submodule_search_locations
278
+ while new_paths:
279
+ paths = new_paths
280
+ new_paths = []
281
+ for p in paths:
282
+ if p in seen_path_values:
283
+ continue
284
+ if os.path.isdir(p):
285
+ root_paths.append(Path(p).resolve().as_posix())
286
+ elif p in sys.path_importer_cache:
287
+ # We have a path hook that we likely need to call to get the actual path
288
+ addl_spec = sys.path_importer_cache[p].find_spec(name)
289
+ if (
290
+ addl_spec is not None
291
+ and addl_spec.submodule_search_locations
292
+ ):
293
+ new_paths.extend(addl_spec.submodule_search_locations)
294
+ else:
295
+ # This may not be as required since it is likely the importer cache has
296
+ # everything already but just in case, we will also go through the
297
+ # path hooks and see if we find another one
298
+ for path_hook in sys.path_hooks:
299
+ try:
300
+ finder = path_hook(p)
301
+ addl_spec = finder.find_spec(name)
302
+ if (
303
+ addl_spec is not None
304
+ and addl_spec.submodule_search_locations
305
+ ):
306
+ new_paths.extend(
307
+ addl_spec.submodule_search_locations
308
+ )
309
+ break
310
+ except ImportError:
311
+ continue
312
+ seen_path_values.add(p)
272
313
  self._modules[name] = _ModuleInfo(
273
314
  name,
274
- set(
275
- Path(p).resolve().as_posix()
276
- for p in getattr(module, "__path__", [module.__file__])
277
- ),
315
+ set(root_paths),
278
316
  module,
279
317
  False, # This is not a Metaflow module (added by the user manually)
280
318
  )
@@ -417,15 +455,7 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
417
455
  % (dist_name, name)
418
456
  )
419
457
  dist_root = str(dist.locate_file(name))
420
- if dist_root not in paths:
421
- # This is an error because it means that this distribution is
422
- # not contributing to the module.
423
- raise RuntimeError(
424
- "Distribution '%s' is not contributing to module '%s' as "
425
- "expected (got '%s' when expected one of %s)"
426
- % (dist.metadata["Name"], name, dist_root, paths)
427
- )
428
- paths.discard(dist_root)
458
+ has_file_in_root = False
429
459
  if dist_name not in self._distmetainfo:
430
460
  # Possible that a distribution contributes to multiple modules
431
461
  self._distmetainfo[dist_name] = {
@@ -438,13 +468,30 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
438
468
  for file in dist.files or []:
439
469
  # Skip files that do not belong to this module (distribution may
440
470
  # provide multiple modules)
441
- if file.parts[: len(prefix_parts)] != prefix_parts:
471
+ if (
472
+ file.parts[: len(prefix_parts)] != prefix_parts
473
+ or file.suffix == ".pth"
474
+ or str(file).startswith("__editable__")
475
+ ):
442
476
  continue
443
477
  if file.parts[len(prefix_parts)] == "__init__.py":
444
478
  has_init = True
479
+ has_file_in_root = True
480
+ # At this point, we know that we are seeing actual files in the
481
+ # dist_root so we make sure it is as expected
482
+ if dist_root not in paths:
483
+ # This is an error because it means that this distribution is
484
+ # not contributing to the module.
485
+ raise RuntimeError(
486
+ "Distribution '%s' is not contributing to module '%s' as "
487
+ "expected (got '%s' when expected one of %s)"
488
+ % (dist.metadata["Name"], name, dist_root, paths)
489
+ )
445
490
  yield str(
446
491
  dist.locate_file(file).resolve().as_posix()
447
492
  ), os.path.join(self._code_dir, *prefix_parts, *file.parts[1:])
493
+ if has_file_in_root:
494
+ paths.discard(dist_root)
448
495
 
449
496
  # Now if there are more paths left in paths, it means there is a non-distribution
450
497
  # component to this package which we also include.
@@ -460,7 +507,7 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
460
507
  )
461
508
  has_init = True
462
509
  else:
463
- for root, _, files in os.walk(path):
510
+ for root, _, files in walk_without_cycles(path):
464
511
  for file in files:
465
512
  if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES):
466
513
  continue
@@ -83,11 +83,13 @@ ENVIRONMENTS_DESC = [
83
83
  METADATA_PROVIDERS_DESC = [
84
84
  ("service", ".metadata_providers.service.ServiceMetadataProvider"),
85
85
  ("local", ".metadata_providers.local.LocalMetadataProvider"),
86
+ ("spin", ".metadata_providers.spin.SpinMetadataProvider"),
86
87
  ]
87
88
 
88
89
  # Add datastore here
89
90
  DATASTORES_DESC = [
90
91
  ("local", ".datastores.local_storage.LocalStorage"),
92
+ ("spin", ".datastores.spin_storage.SpinStorage"),
91
93
  ("s3", ".datastores.s3_storage.S3Storage"),
92
94
  ("azure", ".datastores.azure_storage.AzureStorage"),
93
95
  ("gs", ".datastores.gs_storage.GSStorage"),
@@ -325,6 +325,7 @@ class ArgoClient(object):
325
325
  "failedJobsHistoryLimit": 10000, # default is unfortunately 1
326
326
  "successfulJobsHistoryLimit": 10000, # default is unfortunately 3
327
327
  "workflowSpec": {"workflowTemplateRef": {"name": name}},
328
+ "startingDeadlineSeconds": 3540, # configuring this to 59 minutes so a failed trigger of cron workflow can succeed at most 59 mins after scheduled execution
328
329
  },
329
330
  }
330
331
  try:
@@ -2136,6 +2136,8 @@ class ArgoWorkflows(object):
2136
2136
  foreach_step,
2137
2137
  )
2138
2138
  )
2139
+ # NOTE: input-paths might be extremely lengthy so we dump these to disk instead of passing them directly to the cmd
2140
+ step_cmds.append("echo %s >> /tmp/mf-input-paths" % input_paths)
2139
2141
  step = [
2140
2142
  "step",
2141
2143
  node.name,
@@ -2143,7 +2145,7 @@ class ArgoWorkflows(object):
2143
2145
  "--task-id %s" % task_id,
2144
2146
  "--retry-count %s" % retry_count,
2145
2147
  "--max-user-code-retries %d" % user_code_retries,
2146
- "--input-paths %s" % input_paths,
2148
+ "--input-paths-filename /tmp/mf-input-paths",
2147
2149
  ]
2148
2150
  if node.parallel_step:
2149
2151
  step.append(
@@ -9,6 +9,7 @@ from metaflow.metaflow_config import (
9
9
  CARD_S3ROOT,
10
10
  CARD_LOCALROOT,
11
11
  DATASTORE_LOCAL_DIR,
12
+ DATASTORE_SPIN_LOCAL_DIR,
12
13
  CARD_SUFFIX,
13
14
  CARD_AZUREROOT,
14
15
  CARD_GSROOT,
@@ -58,12 +59,17 @@ class CardDatastore(object):
58
59
  return CARD_AZUREROOT
59
60
  elif storage_type == "gs":
60
61
  return CARD_GSROOT
61
- elif storage_type == "local":
62
+ elif storage_type == "local" or storage_type == "spin":
62
63
  # Borrowing some of the logic from LocalStorage.get_storage_root
63
64
  result = CARD_LOCALROOT
65
+ local_dir = (
66
+ DATASTORE_SPIN_LOCAL_DIR
67
+ if storage_type == "spin"
68
+ else DATASTORE_LOCAL_DIR
69
+ )
64
70
  if result is None:
65
71
  current_path = os.getcwd()
66
- check_dir = os.path.join(current_path, DATASTORE_LOCAL_DIR)
72
+ check_dir = os.path.join(current_path, local_dir)
67
73
  check_dir = os.path.realpath(check_dir)
68
74
  orig_path = check_dir
69
75
  while not os.path.isdir(check_dir):
@@ -73,7 +79,7 @@ class CardDatastore(object):
73
79
  # return the top level path
74
80
  return os.path.join(orig_path, CARD_SUFFIX)
75
81
  current_path = new_path
76
- check_dir = os.path.join(current_path, DATASTORE_LOCAL_DIR)
82
+ check_dir = os.path.join(current_path, local_dir)
77
83
  return os.path.join(check_dir, CARD_SUFFIX)
78
84
  else:
79
85
  # Let's make it obvious we need to update this block for each new datastore backend...
@@ -171,6 +171,7 @@ class CardDecorator(StepDecorator):
171
171
  self._flow_datastore = flow_datastore
172
172
  self._environment = environment
173
173
  self._logger = logger
174
+
174
175
  self.card_options = None
175
176
 
176
177
  # We check for configuration options. We do this here before they are
@@ -496,9 +496,15 @@ class TaskInfoComponent(MetaflowCardComponent):
496
496
  )
497
497
 
498
498
  # ignore the name as a parameter
499
- param_ids = [
500
- p.id for p in self._task.parent.parent["_parameters"].task if p.id != "name"
501
- ]
499
+ if "_parameters" not in self._task.parent.parent:
500
+ # In case of spin steps, there is no _parameters task
501
+ param_ids = []
502
+ else:
503
+ param_ids = [
504
+ p.id
505
+ for p in self._task.parent.parent["_parameters"].task
506
+ if p.id != "name"
507
+ ]
502
508
  if len(param_ids) > 0:
503
509
  # Extract parameter from the Parameter Task. That is less brittle.
504
510
  parameter_data = TaskToDict(
@@ -1,24 +1,29 @@
1
1
  import json
2
2
  import os
3
3
 
4
- from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, DATASTORE_SYSROOT_LOCAL
4
+ from metaflow.metaflow_config import (
5
+ DATASTORE_LOCAL_DIR,
6
+ DATASTORE_SYSROOT_LOCAL,
7
+ )
5
8
  from metaflow.datastore.datastore_storage import CloseAfterUse, DataStoreStorage
6
9
 
7
10
 
8
11
  class LocalStorage(DataStoreStorage):
9
12
  TYPE = "local"
10
13
  METADATA_DIR = "_meta"
14
+ DATASTORE_DIR = DATASTORE_LOCAL_DIR # ".metaflow"
15
+ SYSROOT_VAR = DATASTORE_SYSROOT_LOCAL
11
16
 
12
17
  @classmethod
13
18
  def get_datastore_root_from_config(cls, echo, create_on_absent=True):
14
- result = DATASTORE_SYSROOT_LOCAL
19
+ result = cls.SYSROOT_VAR
15
20
  if result is None:
16
21
  try:
17
22
  # Python2
18
23
  current_path = os.getcwdu()
19
24
  except: # noqa E722
20
25
  current_path = os.getcwd()
21
- check_dir = os.path.join(current_path, DATASTORE_LOCAL_DIR)
26
+ check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
22
27
  check_dir = os.path.realpath(check_dir)
23
28
  orig_path = check_dir
24
29
  top_level_reached = False
@@ -28,12 +33,13 @@ class LocalStorage(DataStoreStorage):
28
33
  top_level_reached = True
29
34
  break # We are no longer making upward progress
30
35
  current_path = new_path
31
- check_dir = os.path.join(current_path, DATASTORE_LOCAL_DIR)
36
+ check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
32
37
  if top_level_reached:
33
38
  if create_on_absent:
34
39
  # Could not find any directory to use so create a new one
35
40
  echo(
36
- "Creating local datastore in current directory (%s)" % orig_path
41
+ "Creating %s datastore in current directory (%s)"
42
+ % (cls.TYPE, orig_path)
37
43
  )
38
44
  os.mkdir(orig_path)
39
45
  result = orig_path
@@ -42,7 +48,7 @@ class LocalStorage(DataStoreStorage):
42
48
  else:
43
49
  result = check_dir
44
50
  else:
45
- result = os.path.join(result, DATASTORE_LOCAL_DIR)
51
+ result = os.path.join(result, cls.DATASTORE_DIR)
46
52
  return result
47
53
 
48
54
  @staticmethod
@@ -0,0 +1,12 @@
1
+ from metaflow.metaflow_config import (
2
+ DATASTORE_SPIN_LOCAL_DIR,
3
+ DATASTORE_SYSROOT_SPIN,
4
+ )
5
+ from metaflow.plugins.datastores.local_storage import LocalStorage
6
+
7
+
8
+ class SpinStorage(LocalStorage):
9
+ TYPE = "spin"
10
+ METADATA_DIR = "_meta"
11
+ DATASTORE_DIR = DATASTORE_SPIN_LOCAL_DIR # ".metaflow_spin"
12
+ SYSROOT_VAR = DATASTORE_SYSROOT_SPIN
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
18
18
  DATATOOLS_S3ROOT,
19
19
  S3_RETRY_COUNT,
20
20
  S3_TRANSIENT_RETRY_COUNT,
21
+ S3_LOG_TRANSIENT_RETRIES,
21
22
  S3_SERVER_SIDE_ENCRYPTION,
22
23
  S3_WORKER_COUNT,
23
24
  TEMPDIR,
@@ -1760,17 +1761,35 @@ class S3(object):
1760
1761
  # due to a transient failure so we try again.
1761
1762
  transient_retry_count += 1
1762
1763
  total_ok_count += last_ok_count
1763
- print(
1764
- "Transient S3 failure (attempt #%d) -- total success: %d, "
1765
- "last attempt %d/%d -- remaining: %d"
1766
- % (
1767
- transient_retry_count,
1768
- total_ok_count,
1769
- last_ok_count,
1770
- last_ok_count + last_retry_count,
1771
- len(pending_retries),
1764
+
1765
+ if S3_LOG_TRANSIENT_RETRIES:
1766
+ # Extract transient error type from pending retry lines
1767
+ error_info = ""
1768
+ if pending_retries:
1769
+ try:
1770
+ # Parse the first line to get transient error type
1771
+ first_retry = json.loads(
1772
+ pending_retries[0].decode("utf-8").strip()
1773
+ )
1774
+ if "transient_error_type" in first_retry:
1775
+ error_info = (
1776
+ " (%s)" % first_retry["transient_error_type"]
1777
+ )
1778
+ except (json.JSONDecodeError, IndexError, KeyError):
1779
+ pass
1780
+
1781
+ print(
1782
+ "Transient S3 failure (attempt #%d) -- total success: %d, "
1783
+ "last attempt %d/%d -- remaining: %d%s"
1784
+ % (
1785
+ transient_retry_count,
1786
+ total_ok_count,
1787
+ last_ok_count,
1788
+ last_ok_count + last_retry_count,
1789
+ len(pending_retries),
1790
+ error_info,
1791
+ )
1772
1792
  )
1773
- )
1774
1793
  if inject_failures == 0:
1775
1794
  # Don't sleep when we are "faking" the failures
1776
1795
  self._jitter_sleep(transient_retry_count)