ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -6,11 +6,12 @@ import time
6
6
 
7
7
  from functools import wraps
8
8
  from io import BufferedIOBase, FileIO, RawIOBase
9
+ from typing import List, Optional
9
10
  from types import MethodType, FunctionType
10
11
 
11
12
  from .. import metaflow_config
12
13
  from ..exception import MetaflowInternalError
13
- from ..metadata import DataArtifact, MetaDatum
14
+ from ..metadata_provider import DataArtifact, MetaDatum
14
15
  from ..parameters import Parameter
15
16
  from ..util import Path, is_stringish, to_fileobj
16
17
 
@@ -98,8 +99,8 @@ class TaskDataStore(object):
98
99
  data_metadata=None,
99
100
  mode="r",
100
101
  allow_not_done=False,
102
+ persist=True,
101
103
  ):
102
-
103
104
  self._storage_impl = flow_datastore._storage_impl
104
105
  self.TYPE = self._storage_impl.TYPE
105
106
  self._ca_store = flow_datastore.ca_store
@@ -114,11 +115,12 @@ class TaskDataStore(object):
114
115
  self._attempt = attempt
115
116
  self._metadata = flow_datastore.metadata
116
117
  self._parent = flow_datastore
118
+ self._persist = persist
117
119
 
118
120
  # The GZIP encodings are for backward compatibility
119
121
  self._encodings = {"pickle-v2", "gzip+pickle-v2"}
120
122
  ver = sys.version_info[0] * 10 + sys.version_info[1]
121
- if ver >= 34:
123
+ if ver >= 36:
122
124
  self._encodings.add("pickle-v4")
123
125
  self._encodings.add("gzip+pickle-v4")
124
126
 
@@ -149,6 +151,8 @@ class TaskDataStore(object):
149
151
  )
150
152
  if self.has_metadata(check_meta, add_attempt=False):
151
153
  max_attempt = i
154
+ elif max_attempt is not None:
155
+ break
152
156
  if self._attempt is None:
153
157
  self._attempt = max_attempt
154
158
  elif max_attempt is None or self._attempt > max_attempt:
@@ -173,6 +177,26 @@ class TaskDataStore(object):
173
177
  if data_obj is not None:
174
178
  self._objects = data_obj.get("objects", {})
175
179
  self._info = data_obj.get("info", {})
180
+ elif self._mode == "d":
181
+ self._objects = {}
182
+ self._info = {}
183
+
184
+ if self._attempt is None:
185
+ for i in range(metaflow_config.MAX_ATTEMPTS):
186
+ check_meta = self._metadata_name_for_attempt(
187
+ self.METADATA_ATTEMPT_SUFFIX, i
188
+ )
189
+ if self.has_metadata(check_meta, add_attempt=False):
190
+ self._attempt = i
191
+
192
+ # Do not allow destructive operations on the datastore if attempt is still in flight
193
+ # and we explicitly did not allow operating on running tasks.
194
+ if not allow_not_done and not self.has_metadata(self.METADATA_DONE_SUFFIX):
195
+ raise DataException(
196
+ "No completed attempts of the task was found for task '%s'"
197
+ % self._path
198
+ )
199
+
176
200
  else:
177
201
  raise DataException("Unknown datastore mode: '%s'" % self._mode)
178
202
 
@@ -203,6 +227,9 @@ class TaskDataStore(object):
203
227
  @property
204
228
  def pathspec_index(self):
205
229
  idxstr = ",".join(map(str, (f.index for f in self["_foreach_stack"])))
230
+ if "_iteration_stack" in self:
231
+ itrstr = ",".join(map(str, (f for f in self["_iteration_stack"])))
232
+ return "%s/%s[%s][%s]" % (self._run_id, self._step_name, idxstr, itrstr)
206
233
  return "%s/%s[%s]" % (self._run_id, self._step_name, idxstr)
207
234
 
208
235
  @property
@@ -233,7 +260,73 @@ class TaskDataStore(object):
233
260
 
234
261
  @only_if_not_done
235
262
  @require_mode("w")
236
- def save_artifacts(self, artifacts_iter, force_v4=False, len_hint=0):
263
+ def transfer_artifacts(
264
+ self, other_datastore: "TaskDataStore", names: Optional[List[str]] = None
265
+ ):
266
+ """
267
+ Copies the blobs from other_datastore to this datastore if the datastore roots
268
+ are different.
269
+
270
+ This is used specifically for spin so we can bring in artifacts from the original
271
+ datastore.
272
+
273
+ Parameters
274
+ ----------
275
+ other_datastore : TaskDataStore
276
+ Other datastore from which to copy artifacts from
277
+ names : List[str], optional, default None
278
+ If provided, only transfer the artifacts with these names. If None,
279
+ transfer all artifacts from the other datastore.
280
+ """
281
+ if (
282
+ other_datastore.TYPE == self.TYPE
283
+ and other_datastore._storage_impl.datastore_root
284
+ == self._storage_impl.datastore_root
285
+ ):
286
+ # Nothing to transfer -- artifacts are already saved properly
287
+ return
288
+
289
+ # Determine which artifacts need to be transferred
290
+ if names is None:
291
+ # Transfer all artifacts from other datastore
292
+ artifacts_to_transfer = list(other_datastore._objects.keys())
293
+ else:
294
+ # Transfer only specified artifacts
295
+ artifacts_to_transfer = [
296
+ name for name in names if name in other_datastore._objects
297
+ ]
298
+
299
+ if not artifacts_to_transfer:
300
+ return
301
+
302
+ # Get SHA keys for artifacts to transfer
303
+ shas_to_transfer = [
304
+ other_datastore._objects[name] for name in artifacts_to_transfer
305
+ ]
306
+
307
+ # Check which blobs are missing locally
308
+ missing_shas = []
309
+ for sha in shas_to_transfer:
310
+ local_path = self._ca_store._storage_impl.path_join(
311
+ self._ca_store._prefix, sha[:2], sha
312
+ )
313
+ if not self._ca_store._storage_impl.is_file([local_path])[0]:
314
+ missing_shas.append(sha)
315
+
316
+ if not missing_shas:
317
+ return # All blobs already exist locally
318
+
319
+ # Load blobs from other datastore in transfer mode
320
+ transfer_blobs = other_datastore._ca_store.load_blobs(
321
+ missing_shas, is_transfer=True
322
+ )
323
+
324
+ # Save blobs to local datastore in transfer mode
325
+ self._ca_store.save_blobs(transfer_blobs, is_transfer=True)
326
+
327
+ @only_if_not_done
328
+ @require_mode("w")
329
+ def save_artifacts(self, artifacts_iter, len_hint=0):
237
330
  """
238
331
  Saves Metaflow Artifacts (Python objects) to the datastore and stores
239
332
  any relevant metadata needed to retrieve them.
@@ -249,11 +342,6 @@ class TaskDataStore(object):
249
342
  artifacts : Iterator[(string, object)]
250
343
  Iterator over the human-readable name of the object to save
251
344
  and the object itself
252
- force_v4 : boolean or Dict[string -> boolean]
253
- Indicates whether the artifact should be pickled using the v4
254
- version of pickle. If a single boolean, applies to all artifacts.
255
- If a dictionary, applies to the object named only. Defaults to False
256
- if not present or not specified
257
345
  len_hint: integer
258
346
  Estimated number of items in artifacts_iter
259
347
  """
@@ -261,40 +349,24 @@ class TaskDataStore(object):
261
349
 
262
350
  def pickle_iter():
263
351
  for name, obj in artifacts_iter:
264
- do_v4 = (
265
- force_v4 and force_v4
266
- if isinstance(force_v4, bool)
267
- else force_v4.get(name, False)
268
- )
269
- if do_v4:
270
- encode_type = "gzip+pickle-v4"
271
- if encode_type not in self._encodings:
272
- raise DataException(
273
- "Artifact *%s* requires a serialization encoding that "
274
- "requires Python 3.4 or newer." % name
275
- )
352
+ encode_type = "gzip+pickle-v4"
353
+ if encode_type in self._encodings:
276
354
  try:
277
355
  blob = pickle.dumps(obj, protocol=4)
278
356
  except TypeError as e:
279
- raise UnpicklableArtifactException(name)
357
+ raise UnpicklableArtifactException(name) from e
280
358
  else:
281
359
  try:
282
360
  blob = pickle.dumps(obj, protocol=2)
283
361
  encode_type = "gzip+pickle-v2"
284
- except (SystemError, OverflowError):
285
- encode_type = "gzip+pickle-v4"
286
- if encode_type not in self._encodings:
287
- raise DataException(
288
- "Artifact *%s* is very large (over 2GB). "
289
- "You need to use Python 3.4 or newer if you want to "
290
- "serialize large objects." % name
291
- )
292
- try:
293
- blob = pickle.dumps(obj, protocol=4)
294
- except TypeError as e:
295
- raise UnpicklableArtifactException(name)
362
+ except (SystemError, OverflowError) as e:
363
+ raise DataException(
364
+ "Artifact *%s* is very large (over 2GB). "
365
+ "You need to use Python 3.6 or newer if you want to "
366
+ "serialize large objects." % name
367
+ ) from e
296
368
  except TypeError as e:
297
- raise UnpicklableArtifactException(name)
369
+ raise UnpicklableArtifactException(name) from e
298
370
 
299
371
  self._info[name] = {
300
372
  "size": len(blob),
@@ -353,7 +425,7 @@ class TaskDataStore(object):
353
425
  encode_type = "gzip+pickle-v2"
354
426
  if encode_type not in self._encodings:
355
427
  raise DataException(
356
- "Python 3.4 or later is required to load artifact '%s'" % name
428
+ "Python 3.6 or later is required to load artifact '%s'" % name
357
429
  )
358
430
  else:
359
431
  to_load[self._objects[name]].append(name)
@@ -361,7 +433,7 @@ class TaskDataStore(object):
361
433
  # We assume that if we have one "old" style artifact, all of them are
362
434
  # like that which is an easy assumption to make since artifacts are all
363
435
  # stored by the same implementation of the datastore for a given task.
364
- for (key, blob) in self._ca_store.load_blobs(to_load.keys()):
436
+ for key, blob in self._ca_store.load_blobs(to_load.keys()):
365
437
  names = to_load[key]
366
438
  for name in names:
367
439
  # We unpickle everytime to have fully distinct objects (the user
@@ -682,14 +754,16 @@ class TaskDataStore(object):
682
754
  flow : FlowSpec
683
755
  Flow to persist
684
756
  """
757
+ if not self._persist:
758
+ return
685
759
 
686
760
  if flow._datastore:
687
761
  self._objects.update(flow._datastore._objects)
688
762
  self._info.update(flow._datastore._info)
689
763
 
690
- # we create a list of valid_artifacts in advance, outside of
691
- # artifacts_iter, so we can provide a len_hint below
764
+ # Scan flow object FIRST
692
765
  valid_artifacts = []
766
+ current_artifact_names = set()
693
767
  for var in dir(flow):
694
768
  if var.startswith("__") or var in flow._EPHEMERAL:
695
769
  continue
@@ -706,6 +780,16 @@ class TaskDataStore(object):
706
780
  or isinstance(val, Parameter)
707
781
  ):
708
782
  valid_artifacts.append((var, val))
783
+ current_artifact_names.add(var)
784
+
785
+ # Transfer ONLY artifacts that aren't being overridden
786
+ if hasattr(flow._datastore, "orig_datastore"):
787
+ parent_artifacts = set(flow._datastore._objects.keys())
788
+ unchanged_artifacts = parent_artifacts - current_artifact_names
789
+ if unchanged_artifacts:
790
+ self.transfer_artifacts(
791
+ flow._datastore.orig_datastore, names=list(unchanged_artifacts)
792
+ )
709
793
 
710
794
  def artifacts_iter():
711
795
  # we consume the valid_artifacts list destructively to
@@ -721,6 +805,7 @@ class TaskDataStore(object):
721
805
  delattr(flow, var)
722
806
  yield var, val
723
807
 
808
+ # Save current artifacts
724
809
  self.save_artifacts(artifacts_iter(), len_hint=len(valid_artifacts))
725
810
 
726
811
  @only_if_not_done
@@ -750,6 +835,36 @@ class TaskDataStore(object):
750
835
  to_store_dict[n] = data
751
836
  self._save_file(to_store_dict)
752
837
 
838
+ @require_mode("d")
839
+ def scrub_logs(self, logsources, stream, attempt_override=None):
840
+ path_logsources = {
841
+ self._metadata_name_for_attempt(
842
+ self._get_log_location(s, stream),
843
+ attempt_override=attempt_override,
844
+ ): s
845
+ for s in logsources
846
+ }
847
+
848
+ # Legacy log paths
849
+ legacy_log = self._metadata_name_for_attempt(
850
+ "%s.log" % stream, attempt_override
851
+ )
852
+ path_logsources[legacy_log] = stream
853
+
854
+ existing_paths = [
855
+ path
856
+ for path in path_logsources.keys()
857
+ if self.has_metadata(path, add_attempt=False)
858
+ ]
859
+
860
+ # Replace log contents with [REDACTED source stream]
861
+ to_store_dict = {
862
+ path: bytes("[REDACTED %s %s]" % (path_logsources[path], stream), "utf-8")
863
+ for path in existing_paths
864
+ }
865
+
866
+ self._save_file(to_store_dict, add_attempt=False, allow_overwrite=True)
867
+
753
868
  @require_mode("r")
754
869
  def load_log_legacy(self, stream, attempt_override=None):
755
870
  """
metaflow/debug.py CHANGED
@@ -42,6 +42,11 @@ class Debug(object):
42
42
  filename = inspect.stack()[1][1]
43
43
  print("debug[%s %s:%s]: %s" % (typ, filename, lineno, s), file=sys.stderr)
44
44
 
45
+ def __getattr__(self, name):
46
+ # Small piece of code to get pyright and other linters to recognize that there
47
+ # are dynamic attributes.
48
+ return getattr(self, name)
49
+
45
50
  def noop(self, args):
46
51
  pass
47
52