ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,24 @@
1
1
  from __future__ import print_function
2
2
  from collections import OrderedDict
3
+ import json
3
4
  import os
4
5
  import sys
5
6
  import time
6
7
  from tempfile import NamedTemporaryFile
7
8
  from hashlib import sha1
8
9
 
10
+ from urllib.parse import urlparse
11
+
9
12
  from metaflow.datastore import FlowDataStore
10
13
  from metaflow.datastore.content_addressed_store import BlobCache
14
+ from metaflow.datastore.flow_datastore import MetadataCache
11
15
  from metaflow.exception import MetaflowException
12
16
  from metaflow.metaflow_config import (
13
17
  CLIENT_CACHE_PATH,
14
18
  CLIENT_CACHE_MAX_SIZE,
15
19
  CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT,
16
- CLIENT_CACHE_MAX_TASKDATASTORE_COUNT,
17
20
  )
21
+ from metaflow.metaflow_profile import from_start
18
22
 
19
23
  from metaflow.plugins import DATASTORES
20
24
 
@@ -61,8 +65,8 @@ class FileCache(object):
61
65
  # when querying for sizes of artifacts. Once we have queried for the size
62
66
  # of one artifact in a TaskDatastore, caching this means that any
63
67
  # queries on that same TaskDatastore will be quick (since we already
64
- # have all the metadata)
65
- self._task_metadata_caches = OrderedDict()
68
+ # have all the metadata). We keep track of this in a file so it persists
69
+ # across processes.
66
70
 
67
71
  @property
68
72
  def cache_dir(self):
@@ -83,10 +87,9 @@ class FileCache(object):
83
87
  def get_log_legacy(
84
88
  self, ds_type, location, logtype, attempt, flow_name, run_id, step_name, task_id
85
89
  ):
86
-
87
90
  ds_cls = self._get_datastore_storage_impl(ds_type)
88
91
  ds_root = ds_cls.path_join(*ds_cls.path_split(location)[:-5])
89
- cache_id = self._flow_ds_id(ds_type, ds_root, flow_name)
92
+ cache_id = self.flow_ds_id(ds_type, ds_root, flow_name)
90
93
 
91
94
  token = (
92
95
  "%s.cached"
@@ -310,13 +313,25 @@ class FileCache(object):
310
313
  self._objects = sorted(objects, reverse=False)
311
314
 
312
315
  @staticmethod
313
- def _flow_ds_id(ds_type, ds_root, flow_name):
314
- return ".".join([ds_type, ds_root, flow_name])
316
+ def flow_ds_id(ds_type, ds_root, flow_name):
317
+ p = urlparse(ds_root)
318
+ sanitized_root = (p.netloc + p.path).replace("/", "_")
319
+ return ".".join([ds_type, sanitized_root, flow_name])
315
320
 
316
321
  @staticmethod
317
- def _task_ds_id(ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt):
322
+ def task_ds_id(ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt):
323
+ p = urlparse(ds_root)
324
+ sanitized_root = (p.netloc + p.path).replace("/", "_")
318
325
  return ".".join(
319
- [ds_type, ds_root, flow_name, run_id, step_name, task_id, str(attempt)]
326
+ [
327
+ ds_type,
328
+ sanitized_root,
329
+ flow_name,
330
+ run_id,
331
+ step_name,
332
+ task_id,
333
+ str(attempt),
334
+ ]
320
335
  )
321
336
 
322
337
  def _garbage_collect(self):
@@ -352,7 +367,7 @@ class FileCache(object):
352
367
  return storage_impl[0]
353
368
 
354
369
  def _get_flow_datastore(self, ds_type, ds_root, flow_name):
355
- cache_id = self._flow_ds_id(ds_type, ds_root, flow_name)
370
+ cache_id = self.flow_ds_id(ds_type, ds_root, flow_name)
356
371
  cached_flow_datastore = self._store_caches.get(cache_id)
357
372
 
358
373
  if cached_flow_datastore:
@@ -367,9 +382,14 @@ class FileCache(object):
367
382
  ds_root=ds_root,
368
383
  )
369
384
  blob_cache = self._blob_caches.setdefault(
370
- cache_id, FileBlobCache(self, cache_id)
385
+ cache_id,
386
+ (
387
+ FileBlobCache(self, cache_id),
388
+ TaskMetadataCache(self, ds_type, ds_root, flow_name),
389
+ ),
371
390
  )
372
- cached_flow_datastore.ca_store.set_blob_cache(blob_cache)
391
+ cached_flow_datastore.ca_store.set_blob_cache(blob_cache[0])
392
+ cached_flow_datastore.set_metadata_cache(blob_cache[1])
373
393
  self._store_caches[cache_id] = cached_flow_datastore
374
394
  if len(self._store_caches) > CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT:
375
395
  cache_id_to_remove, _ = self._store_caches.popitem(last=False)
@@ -380,32 +400,52 @@ class FileCache(object):
380
400
  self, ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt
381
401
  ):
382
402
  flow_ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
383
- cached_metadata = None
384
- if attempt is not None:
385
- cache_id = self._task_ds_id(
386
- ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt
403
+
404
+ return flow_ds.get_task_datastore(run_id, step_name, task_id, attempt=attempt)
405
+
406
+
407
+ class TaskMetadataCache(MetadataCache):
408
+ def __init__(self, filecache, ds_type, ds_root, flow_name):
409
+ self._filecache = filecache
410
+ self._ds_type = ds_type
411
+ self._ds_root = ds_root
412
+ self._flow_name = flow_name
413
+
414
+ def _path(self, run_id, step_name, task_id, attempt):
415
+ if attempt is None:
416
+ raise MetaflowException(
417
+ "Attempt number must be specified to use task metadata cache. Raise an issue "
418
+ "on Metaflow GitHub if you see this message.",
387
419
  )
388
- cached_metadata = self._task_metadata_caches.get(cache_id)
389
- if cached_metadata:
390
- od_move_to_end(self._task_metadata_caches, cache_id)
391
- return flow_ds.get_task_datastore(
392
- run_id,
393
- step_name,
394
- task_id,
395
- attempt=attempt,
396
- data_metadata=cached_metadata,
397
- )
398
- # If we are here, we either have attempt=None or nothing in the cache
399
- task_ds = flow_ds.get_task_datastore(
400
- run_id, step_name, task_id, attempt=attempt
420
+ cache_id = self._filecache.task_ds_id(
421
+ self._ds_type,
422
+ self._ds_root,
423
+ self._flow_name,
424
+ run_id,
425
+ step_name,
426
+ task_id,
427
+ attempt,
401
428
  )
402
- cache_id = self._task_ds_id(
403
- ds_type, ds_root, flow_name, run_id, step_name, task_id, task_ds.attempt
429
+ token = (
430
+ "%s.cached"
431
+ % sha1(
432
+ os.path.join(
433
+ run_id, step_name, task_id, str(attempt), "metadata"
434
+ ).encode("utf-8")
435
+ ).hexdigest()
436
+ )
437
+ return os.path.join(self._filecache.cache_dir, cache_id, token[:2], token)
438
+
439
+ def load_metadata(self, run_id, step_name, task_id, attempt):
440
+ d = self._filecache.read_file(self._path(run_id, step_name, task_id, attempt))
441
+ if d:
442
+ return json.loads(d)
443
+
444
+ def store_metadata(self, run_id, step_name, task_id, attempt, metadata_dict):
445
+ self._filecache.create_file(
446
+ self._path(run_id, step_name, task_id, attempt),
447
+ json.dumps(metadata_dict).encode("utf-8"),
404
448
  )
405
- self._task_metadata_caches[cache_id] = task_ds.ds_metadata
406
- if len(self._task_metadata_caches) > CLIENT_CACHE_MAX_TASKDATASTORE_COUNT:
407
- self._task_metadata_caches.popitem(last=False)
408
- return task_ds
409
449
 
410
450
 
411
451
  class FileBlobCache(BlobCache):
metaflow/clone_util.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import time
2
- from .metadata import MetaDatum
2
+ from .metadata_provider import MetaDatum
3
3
 
4
4
 
5
5
  def clone_task_helper(
@@ -66,6 +66,12 @@ def clone_task_helper(
66
66
  type="attempt",
67
67
  tags=metadata_tags,
68
68
  ),
69
+ MetaDatum(
70
+ field="attempt_ok",
71
+ value="True", # During clone, the task is always considered successful.
72
+ type="internal_attempt_status",
73
+ tags=metadata_tags,
74
+ ),
69
75
  ],
70
76
  )
71
77
  output.done()
@@ -0,0 +1,231 @@
1
+ import os
2
+ import shutil
3
+ import sys
4
+ from subprocess import PIPE, CompletedProcess, run
5
+ from tempfile import TemporaryDirectory
6
+ from typing import Any, Callable, List, Mapping, Optional, cast
7
+
8
+ from metaflow import Run
9
+ from metaflow.util import walk_without_cycles
10
+ from metaflow._vendor import click
11
+ from metaflow.cli import echo_always
12
+
13
+
14
+ @click.group()
15
+ def cli():
16
+ pass
17
+
18
+
19
+ @cli.group(help="Access, compare, and manage code associated with Metaflow runs.")
20
+ def code():
21
+ pass
22
+
23
+
24
+ def echo(line: str) -> None:
25
+ echo_always(line, err=True, fg="magenta")
26
+
27
+
28
+ def extract_code_package(runspec: str) -> TemporaryDirectory:
29
+ try:
30
+ mf_run = Run(runspec, _namespace_check=False)
31
+ echo(f"✅ Run *{runspec}* found, downloading code..")
32
+ except Exception as e:
33
+ echo(f"❌ Run **{runspec}** not found")
34
+ raise e
35
+
36
+ if mf_run.code is None:
37
+ echo(
38
+ f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?"
39
+ )
40
+ raise RuntimeError("no code package found")
41
+
42
+ return mf_run.code.extract()
43
+
44
+
45
+ def perform_diff(
46
+ source_dir: str,
47
+ target_dir: Optional[str] = None,
48
+ output: bool = False,
49
+ **kwargs: Mapping[str, Any],
50
+ ) -> Optional[List[str]]:
51
+ if target_dir is None:
52
+ target_dir = os.getcwd()
53
+
54
+ diffs = []
55
+ for dirpath, _, filenames in walk_without_cycles(source_dir):
56
+ for fname in filenames:
57
+ # NOTE: the paths below need to be set up carefully
58
+ # for the `patch` command to work. Better not to touch
59
+ # the directories below. If you must, test that patches
60
+ # work after your changes.
61
+ #
62
+ # target_file is the git repo in the current working directory
63
+ rel = os.path.relpath(dirpath, source_dir)
64
+ target_file = os.path.join(rel, fname)
65
+ # source_file is the run file loaded in a tmp directory
66
+ source_file = os.path.join(dirpath, fname)
67
+
68
+ if sys.stdout.isatty() and not output:
69
+ color = ["--color"]
70
+ else:
71
+ color = ["--no-color"]
72
+
73
+ if os.path.exists(os.path.join(target_dir, target_file)):
74
+ cmd = (
75
+ ["git", "diff", "--no-index", "--exit-code"]
76
+ + color
77
+ + [
78
+ target_file,
79
+ source_file,
80
+ ]
81
+ )
82
+ result: CompletedProcess = run(
83
+ cmd, text=True, stdout=PIPE, cwd=target_dir
84
+ )
85
+ if result.returncode == 0:
86
+ if not output:
87
+ echo(f"✅ {target_file} is identical, skipping")
88
+ continue
89
+
90
+ if output:
91
+ diffs.append(result.stdout)
92
+ else:
93
+ run(["less", "-R"], input=result.stdout, text=True)
94
+ else:
95
+ if not output:
96
+ echo(f"❗ {target_file} not in the target directory, skipping")
97
+ return diffs if output else None
98
+
99
+
100
+ def run_op(
101
+ runspec: str, op: Callable[..., Optional[List[str]]], **op_args: Mapping[str, Any]
102
+ ) -> Optional[List[str]]:
103
+ tmp = None
104
+ try:
105
+ tmp = extract_code_package(runspec)
106
+ return op(tmp.name, **op_args)
107
+ finally:
108
+ if tmp and os.path.exists(tmp.name):
109
+ shutil.rmtree(tmp.name)
110
+
111
+
112
+ def run_op_diff_runs(
113
+ source_run_pathspec: str, target_run_pathspec: str, **op_args: Mapping[str, Any]
114
+ ) -> Optional[List[str]]:
115
+ source_tmp = None
116
+ target_tmp = None
117
+ try:
118
+ source_tmp = extract_code_package(source_run_pathspec)
119
+ target_tmp = extract_code_package(target_run_pathspec)
120
+ return perform_diff(source_tmp.name, target_tmp.name, **op_args)
121
+ finally:
122
+ for d in [source_tmp, target_tmp]:
123
+ if d and os.path.exists(d.name):
124
+ shutil.rmtree(d.name)
125
+
126
+
127
+ def op_diff(tmpdir: str, **kwargs: Mapping[str, Any]) -> Optional[List[str]]:
128
+ kwargs_dict = dict(kwargs)
129
+ target_dir = cast(Optional[str], kwargs_dict.pop("target_dir", None))
130
+ output: bool = bool(kwargs_dict.pop("output", False))
131
+ op_args: Mapping[str, Any] = {**kwargs_dict}
132
+ return perform_diff(tmpdir, target_dir=target_dir, output=output, **op_args)
133
+
134
+
135
+ def op_pull(tmpdir: str, dst: str, **op_args: Mapping[str, Any]) -> None:
136
+ if os.path.exists(dst):
137
+ echo(f"❌ Directory *{dst}* already exists")
138
+ else:
139
+ shutil.move(tmpdir, dst)
140
+ echo(f"Code downloaded to *{dst}*")
141
+
142
+
143
+ def op_patch(tmpdir: str, dst: str, **kwargs: Mapping[str, Any]) -> None:
144
+ diffs = perform_diff(tmpdir, output=True) or []
145
+ with open(dst, "w", encoding="utf-8") as f:
146
+ for out in diffs:
147
+ out = out.replace(tmpdir, "/.")
148
+ out = out.replace("+++ b/./", "+++ b/")
149
+ out = out.replace("--- b/./", "--- b/")
150
+ out = out.replace("--- a/./", "--- a/")
151
+ out = out.replace("+++ a/./", "+++ a/")
152
+ f.write(out)
153
+ echo(f"Patch saved in *{dst}*")
154
+ path = run(
155
+ ["git", "rev-parse", "--show-prefix"], text=True, stdout=PIPE
156
+ ).stdout.strip()
157
+ if path:
158
+ diropt = f" --directory={path.rstrip('/')}"
159
+ else:
160
+ diropt = ""
161
+ echo("Apply the patch by running:")
162
+ echo_always(
163
+ f"git apply --verbose{diropt} {dst}", highlight=True, bold=True, err=True
164
+ )
165
+
166
+
167
+ @code.command()
168
+ @click.argument("run_pathspec")
169
+ def diff(run_pathspec: str, **kwargs: Mapping[str, Any]) -> None:
170
+ """
171
+ Do a 'git diff' of the current directory and a Metaflow run.
172
+ """
173
+ _ = run_op(run_pathspec, op_diff, **kwargs)
174
+
175
+
176
+ @code.command()
177
+ @click.argument("source_run_pathspec")
178
+ @click.argument("target_run_pathspec")
179
+ def diff_runs(
180
+ source_run_pathspec: str, target_run_pathspec: str, **kwargs: Mapping[str, Any]
181
+ ) -> None:
182
+ """
183
+ Do a 'git diff' between two Metaflow runs.
184
+ """
185
+ _ = run_op_diff_runs(source_run_pathspec, target_run_pathspec, **kwargs)
186
+
187
+
188
+ @code.command()
189
+ @click.argument("run_pathspec")
190
+ @click.option(
191
+ "--dir", help="Destination directory (default: {run_pathspec}_code)", default=None
192
+ )
193
+ def pull(
194
+ run_pathspec: str, dir: Optional[str] = None, **kwargs: Mapping[str, Any]
195
+ ) -> None:
196
+ """
197
+ Pull the code of a Metaflow run.
198
+ """
199
+ if dir is None:
200
+ dir = run_pathspec.lower().replace("/", "_") + "_code"
201
+ op_args: Mapping[str, Any] = {**kwargs, "dst": dir}
202
+ run_op(run_pathspec, op_pull, **op_args)
203
+
204
+
205
+ @code.command()
206
+ @click.argument("run_pathspec")
207
+ @click.option(
208
+ "--file_path",
209
+ help="Patch file name. If not provided, defaults to a sanitized version of RUN_PATHSPEC "
210
+ "with slashes replaced by underscores, plus '.patch'.",
211
+ show_default=False,
212
+ )
213
+ @click.option(
214
+ "--overwrite", is_flag=True, help="Overwrite the patch file if it exists."
215
+ )
216
+ def patch(
217
+ run_pathspec: str,
218
+ file_path: Optional[str] = None,
219
+ overwrite: bool = False,
220
+ **kwargs: Mapping[str, Any],
221
+ ) -> None:
222
+ """
223
+ Create a patch by comparing current dir with a Metaflow run.
224
+ """
225
+ if file_path is None:
226
+ file_path = run_pathspec.lower().replace("/", "_") + ".patch"
227
+ if os.path.exists(file_path) and not overwrite:
228
+ echo(f"File *{file_path}* already exists. To overwrite, specify --overwrite.")
229
+ return
230
+ op_args: Mapping[str, Any] = {**kwargs, "dst": file_path}
231
+ run_op(run_pathspec, op_patch, **op_args)