ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import bz2
2
+ import concurrent.futures
2
3
  import io
3
4
  import json
4
5
  import os
@@ -6,133 +7,384 @@ import shutil
6
7
  import subprocess
7
8
  import sys
8
9
  import tarfile
9
-
10
- from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
10
+ import time
11
+ import platform
12
+ from urllib.error import URLError
13
+ from urllib.request import urlopen
14
+ from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, CONDA_USE_FAST_INIT
15
+ from metaflow.packaging_sys import MetaflowCodeContent, ContentType
11
16
  from metaflow.plugins import DATASTORES
17
+ from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL
12
18
  from metaflow.util import which
19
+ from urllib.request import Request
20
+ import warnings
13
21
 
14
22
  from . import MAGIC_FILE, _datastore_packageroot
15
23
 
24
+ FAST_INIT_BIN_URL = "https://fast-flow-init.outerbounds.sh/{platform}/latest"
25
+
16
26
  # Bootstraps a valid conda virtual environment composed of conda and pypi packages
17
27
 
28
+
29
+ def timer(func):
30
+ def wrapper(*args, **kwargs):
31
+ start_time = time.time()
32
+ result = func(*args, **kwargs)
33
+ duration = time.time() - start_time
34
+ # print(f"Time taken for {func.__name__}: {duration:.2f} seconds")
35
+ return result
36
+
37
+ return wrapper
38
+
39
+
18
40
  if __name__ == "__main__":
19
- if len(sys.argv) != 5:
20
- print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
21
- sys.exit(1)
22
- _, flow_name, id_, datastore_type, architecture = sys.argv
23
-
24
- # TODO: Detect architecture on the fly when dealing with arm architectures.
25
- # ARCH=$(uname -m)
26
- # OS=$(uname)
27
-
28
- # if [[ "$OS" == "Linux" ]]; then
29
- # PLATFORM="linux"
30
- # if [[ "$ARCH" == "aarch64" ]]; then
31
- # ARCH="aarch64";
32
- # elif [[ $ARCH == "ppc64le" ]]; then
33
- # ARCH="ppc64le";
34
- # else
35
- # ARCH="64";
36
- # fi
37
- # fi
38
-
39
- # if [[ "$OS" == "Darwin" ]]; then
40
- # PLATFORM="osx";
41
- # if [[ "$ARCH" == "arm64" ]]; then
42
- # ARCH="arm64";
43
- # else
44
- # ARCH="64"
45
- # fi
46
- # fi
47
-
48
- prefix = os.path.join(os.getcwd(), architecture, id_)
49
- pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
50
- manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
51
-
52
- datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
53
- if not datastores:
54
- print(f"No datastore found for type: {datastore_type}")
55
- sys.exit(1)
56
41
 
57
- storage = datastores[0](
58
- _datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
59
- )
42
+ def run_cmd(cmd, stdin_str=None):
43
+ result = subprocess.run(
44
+ cmd,
45
+ shell=True,
46
+ input=stdin_str,
47
+ stdout=subprocess.PIPE,
48
+ stderr=subprocess.PIPE,
49
+ text=True,
50
+ )
51
+ if result.returncode != 0:
52
+ print(f"Bootstrap failed while executing: {cmd}")
53
+ print("Stdout:", result.stdout)
54
+ print("Stderr:", result.stderr)
55
+ sys.exit(1)
56
+
57
+ @timer
58
+ def install_fast_initializer(architecture):
59
+ import gzip
60
+
61
+ fast_initializer_path = os.path.join(
62
+ os.getcwd(), "fast-initializer", "bin", "fast-initializer"
63
+ )
64
+
65
+ if which("fast-initializer"):
66
+ return which("fast-initializer")
67
+ if os.path.exists(fast_initializer_path):
68
+ os.environ["PATH"] += os.pathsep + os.path.dirname(fast_initializer_path)
69
+ return fast_initializer_path
70
+
71
+ url = FAST_INIT_BIN_URL.format(platform=architecture)
60
72
 
61
- # Move MAGIC_FILE inside local datastore.
62
- os.makedirs(manifest_dir, exist_ok=True)
63
- shutil.move(
64
- os.path.join(os.getcwd(), MAGIC_FILE),
65
- os.path.join(manifest_dir, MAGIC_FILE),
66
- )
73
+ # Prepare directory once
74
+ os.makedirs(os.path.dirname(fast_initializer_path), exist_ok=True)
67
75
 
68
- with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
69
- env = json.load(f)[id_][architecture]
76
+ # Download and decompress in one go
77
+ def _download_and_extract(url):
78
+ headers = {
79
+ "Accept-Encoding": "gzip, deflate, br",
80
+ "Connection": "keep-alive",
81
+ "User-Agent": "python-urllib",
82
+ }
70
83
 
71
- # Download Conda packages.
72
- conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
73
- with storage.load_bytes([package["path"] for package in env["conda"]]) as results:
74
- for key, tmpfile, _ in results:
84
+ max_retries = 3
85
+ for attempt in range(max_retries):
86
+ try:
87
+ req = Request(url, headers=headers)
88
+ with urlopen(req) as response:
89
+ with gzip.GzipFile(fileobj=response) as gz:
90
+ with open(fast_initializer_path, "wb") as f:
91
+ f.write(gz.read())
92
+ break
93
+ except (URLError, IOError) as e:
94
+ if attempt == max_retries - 1:
95
+ raise Exception(
96
+ f"Failed to download fast-initializer after {max_retries} attempts: {e}"
97
+ )
98
+ time.sleep(2**attempt)
99
+
100
+ _download_and_extract(url)
101
+
102
+ # Set executable permission
103
+ os.chmod(fast_initializer_path, 0o755)
104
+
105
+ # Update PATH only once at the end
106
+ os.environ["PATH"] += os.pathsep + os.path.dirname(fast_initializer_path)
107
+ return fast_initializer_path
108
+
109
+ @timer
110
+ def install_micromamba(architecture):
111
+ micromamba_dir = os.path.join(os.getcwd(), "micromamba")
112
+ micromamba_path = os.path.join(micromamba_dir, "bin", "micromamba")
113
+
114
+ if which("micromamba"):
115
+ return which("micromamba")
116
+ if os.path.exists(micromamba_path):
117
+ os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
118
+ return micromamba_path
119
+
120
+ # Download and extract in one go
121
+ url = MICROMAMBA_URL.format(platform=architecture, version="2.0.4")
122
+ mirror_url = MICROMAMBA_MIRROR_URL.format(
123
+ platform=architecture, version="2.0.4"
124
+ )
125
+
126
+ # Prepare directory once
127
+ os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
128
+
129
+ # Download and decompress in one go
130
+ def _download_and_extract(url):
131
+ headers = {
132
+ "Accept-Encoding": "gzip, deflate, br",
133
+ "Connection": "keep-alive",
134
+ "User-Agent": "python-urllib",
135
+ }
136
+
137
+ max_retries = 3
138
+ for attempt in range(max_retries):
139
+ try:
140
+ req = Request(url, headers=headers)
141
+
142
+ with urlopen(req) as response:
143
+ decompressor = bz2.BZ2Decompressor()
144
+ with warnings.catch_warnings():
145
+ warnings.filterwarnings(
146
+ "ignore", category=DeprecationWarning
147
+ )
148
+ with tarfile.open(
149
+ fileobj=io.BytesIO(
150
+ decompressor.decompress(response.read())
151
+ ),
152
+ mode="r:",
153
+ ) as tar:
154
+ member = tar.getmember("bin/micromamba")
155
+ tar.extract(member, micromamba_dir)
156
+ break
157
+ except (URLError, IOError) as e:
158
+ if attempt == max_retries - 1:
159
+ raise Exception(
160
+ f"Failed to download micromamba after {max_retries} attempts: {e}"
161
+ )
162
+ time.sleep(2**attempt)
163
+
164
+ try:
165
+ # first try from mirror
166
+ _download_and_extract(mirror_url)
167
+ except Exception:
168
+ # download from mirror failed, try official source before failing.
169
+ _download_and_extract(url)
170
+
171
+ # Set executable permission
172
+ os.chmod(micromamba_path, 0o755)
173
+
174
+ # Update PATH only once at the end
175
+ os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
176
+ return micromamba_path
177
+
178
+ @timer
179
+ def download_conda_packages(storage, packages, dest_dir):
180
+ def process_conda_package(args):
75
181
  # Ensure that conda packages go into architecture specific folders.
76
182
  # The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
77
183
  # Micromamba into believing that all packages are coming from a local
78
184
  # channel - the only hurdle is ensuring that packages are organised
79
185
  # properly.
80
-
81
- # TODO: consider RAM disk
82
- dest = os.path.join(conda_pkgs_dir, "/".join(key.split("/")[-2:]))
186
+ key, tmpfile, dest_dir = args
187
+ dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
83
188
  os.makedirs(os.path.dirname(dest), exist_ok=True)
84
189
  shutil.move(tmpfile, dest)
85
190
 
86
- # Create Conda environment.
87
- cmds = [
88
- # TODO: check if mamba or conda are already available on the image
89
- # TODO: micromamba installation can be pawned off to micromamba.py
90
- f"""set -e;
91
- if ! command -v micromamba >/dev/null 2>&1; then
92
- mkdir micromamba;
93
- python -c "import requests, bz2, sys; data = requests.get('https://micro.mamba.pm/api/micromamba/{architecture}/latest').content; sys.stdout.buffer.write(bz2.decompress(data))" | tar -xv -C $(pwd)/micromamba bin/micromamba --strip-components 1;
191
+ os.makedirs(dest_dir, exist_ok=True)
192
+ with storage.load_bytes([package["path"] for package in packages]) as results:
193
+ with concurrent.futures.ThreadPoolExecutor() as executor:
194
+ executor.map(
195
+ process_conda_package,
196
+ [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
197
+ )
198
+ # for key, tmpfile, _ in results:
199
+
200
+ # # TODO: consider RAM disk
201
+ # dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
202
+ # os.makedirs(os.path.dirname(dest), exist_ok=True)
203
+ # shutil.move(tmpfile, dest)
204
+ return dest_dir
205
+
206
+ @timer
207
+ def download_pypi_packages(storage, packages, dest_dir):
208
+ def process_pypi_package(args):
209
+ key, tmpfile, dest_dir = args
210
+ dest = os.path.join(dest_dir, os.path.basename(key))
211
+ shutil.move(tmpfile, dest)
212
+
213
+ os.makedirs(dest_dir, exist_ok=True)
214
+ with storage.load_bytes([package["path"] for package in packages]) as results:
215
+ with concurrent.futures.ThreadPoolExecutor() as executor:
216
+ executor.map(
217
+ process_pypi_package,
218
+ [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
219
+ )
220
+ # for key, tmpfile, _ in results:
221
+ # dest = os.path.join(dest_dir, os.path.basename(key))
222
+ # shutil.move(tmpfile, dest)
223
+ return dest_dir
224
+
225
+ @timer
226
+ def create_conda_environment(prefix, conda_pkgs_dir):
227
+ cmd = f'''set -e;
228
+ tmpfile=$(mktemp);
229
+ echo "@EXPLICIT" > "$tmpfile";
230
+ ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
94
231
  export PATH=$PATH:$(pwd)/micromamba;
95
- if ! command -v micromamba >/dev/null 2>&1; then
96
- echo "Failed to install Micromamba!";
97
- exit 1;
98
- fi;
99
- fi""",
100
- # Create a conda environment through Micromamba.
101
- f'''set -e;
102
- tmpfile=$(mktemp);
103
- echo "@EXPLICIT" > "$tmpfile";
104
- ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
105
- export PATH=$PATH:$(pwd)/micromamba;
106
- micromamba create --yes --offline --no-deps --safety-checks=disabled --no-extra-safety-checks --prefix {prefix} --file "$tmpfile";
107
- rm "$tmpfile"''',
108
- ]
109
-
110
- # Download PyPI packages.
111
- if "pypi" in env:
232
+ export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
233
+ export MAMBA_NO_LOW_SPEED_LIMIT=1;
234
+ export MAMBA_USE_INDEX_CACHE=1;
235
+ export MAMBA_NO_PROGRESS_BARS=1;
236
+ export CONDA_FETCH_THREADS=1;
237
+ micromamba create --yes --offline --no-deps \
238
+ --safety-checks=disabled --no-extra-safety-checks \
239
+ --prefix {prefix} --file "$tmpfile" \
240
+ --no-pyc --no-rc --always-copy;
241
+ rm "$tmpfile"'''
242
+ run_cmd(cmd)
243
+
244
+ @timer
245
+ def install_pypi_packages(prefix, pypi_pkgs_dir):
246
+ cmd = f"""set -e;
247
+ export PATH=$PATH:$(pwd)/micromamba;
248
+ export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
249
+ micromamba run --prefix {prefix} python -m pip --disable-pip-version-check \
250
+ install --root-user-action=ignore --no-compile --no-index \
251
+ --no-cache-dir --no-deps --prefer-binary \
252
+ --find-links={pypi_pkgs_dir} --no-user \
253
+ --no-warn-script-location --no-input \
254
+ {pypi_pkgs_dir}/*.whl
255
+ """
256
+ run_cmd(cmd)
257
+
258
+ @timer
259
+ def setup_environment(
260
+ architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
261
+ ):
262
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
263
+ # install micromamba, download conda and pypi packages in parallel
264
+ futures = {
265
+ "micromamba": executor.submit(install_micromamba, architecture),
266
+ "conda_pkgs": executor.submit(
267
+ download_conda_packages, storage, env["conda"], conda_pkgs_dir
268
+ ),
269
+ }
270
+ if "pypi" in env:
271
+ futures["pypi_pkgs"] = executor.submit(
272
+ download_pypi_packages, storage, env["pypi"], pypi_pkgs_dir
273
+ )
274
+
275
+ # create conda environment after micromamba is installed and conda packages are downloaded
276
+ done, _ = concurrent.futures.wait(
277
+ [futures["micromamba"], futures["conda_pkgs"]],
278
+ return_when=concurrent.futures.ALL_COMPLETED,
279
+ )
280
+
281
+ for future in done:
282
+ future.result()
283
+
284
+ # start conda environment creation
285
+ futures["conda_env"] = executor.submit(
286
+ create_conda_environment, prefix, conda_pkgs_dir
287
+ )
288
+
289
+ if "pypi" in env:
290
+ # install pypi packages after conda environment is created and pypi packages are downloaded
291
+ done, _ = concurrent.futures.wait(
292
+ [futures["conda_env"], futures["pypi_pkgs"]],
293
+ return_when=concurrent.futures.ALL_COMPLETED,
294
+ )
295
+
296
+ for future in done:
297
+ future.result()
298
+
299
+ # install pypi packages
300
+ futures["pypi_install"] = executor.submit(
301
+ install_pypi_packages, prefix, pypi_pkgs_dir
302
+ )
303
+ # wait for pypi packages to be installed
304
+ futures["pypi_install"].result()
305
+ else:
306
+ # wait for conda environment to be created
307
+ futures["conda_env"].result()
308
+
309
+ @timer
310
+ def fast_setup_environment(architecture, storage, env, prefix, pkgs_dir):
311
+ install_fast_initializer(architecture)
312
+
313
+ # Get package urls
314
+ conda_pkgs = env["conda"]
315
+ pypi_pkgs = env.get("pypi", [])
316
+ conda_pkg_urls = [package["path"] for package in conda_pkgs]
317
+ pypi_pkg_urls = [package["path"] for package in pypi_pkgs]
318
+
319
+ # Create string with package URLs
320
+ all_package_urls = ""
321
+ for url in conda_pkg_urls:
322
+ all_package_urls += f"{storage.datastore_root}/{url}\n"
323
+ all_package_urls += "---\n"
324
+ for url in pypi_pkg_urls:
325
+ all_package_urls += f"{storage.datastore_root}/{url}\n"
326
+
327
+ # Initialize environment
328
+ # NOTE: For the time being the fast-initializer only works for the S3 datastore implementation
329
+ cmd = f"fast-initializer --prefix {prefix} --packages-dir {pkgs_dir}"
330
+ run_cmd(cmd, all_package_urls)
331
+
332
+ if len(sys.argv) != 4:
333
+ print("Usage: bootstrap.py <flow_name> <id> <datastore_type>")
334
+ sys.exit(1)
335
+
336
+ try:
337
+ _, flow_name, id_, datastore_type = sys.argv
338
+
339
+ system = platform.system().lower()
340
+ arch_machine = platform.machine().lower()
341
+
342
+ if system == "darwin" and arch_machine == "arm64":
343
+ architecture = "osx-arm64"
344
+ elif system == "darwin":
345
+ architecture = "osx-64"
346
+ elif system == "linux" and arch_machine == "aarch64":
347
+ architecture = "linux-aarch64"
348
+ else:
349
+ # default fallback
350
+ architecture = "linux-64"
351
+
352
+ prefix = os.path.join(os.getcwd(), architecture, id_)
353
+ pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
354
+ conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
112
355
  pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
113
- with storage.load_bytes(
114
- [package["path"] for package in env["pypi"]]
115
- ) as results:
116
- for key, tmpfile, _ in results:
117
- dest = os.path.join(pypi_pkgs_dir, os.path.basename(key))
118
- os.makedirs(os.path.dirname(dest), exist_ok=True)
119
- shutil.move(tmpfile, dest)
120
-
121
- # Install PyPI packages.
122
- cmds.extend(
123
- [
124
- f"""set -e;
125
- export PATH=$PATH:$(pwd)/micromamba;
126
- micromamba run --prefix {prefix} pip --disable-pip-version-check install --root-user-action=ignore --no-compile {pypi_pkgs_dir}/*.whl"""
127
- ]
356
+ manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
357
+
358
+ datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
359
+ if not datastores:
360
+ print(f"No datastore found for type: {datastore_type}")
361
+ sys.exit(1)
362
+
363
+ storage = datastores[0](
364
+ _datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
128
365
  )
129
366
 
130
- for cmd in cmds:
131
- result = subprocess.run(
132
- cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
367
+ # Move MAGIC_FILE inside local datastore.
368
+ os.makedirs(manifest_dir, exist_ok=True)
369
+ path_to_manifest = MetaflowCodeContent.get_filename(
370
+ MAGIC_FILE, ContentType.OTHER_CONTENT
133
371
  )
134
- if result.returncode != 0:
135
- print(f"Bootstrap failed while executing: {cmd}")
136
- print("Stdout:", result.stdout.decode())
137
- print("Stderr:", result.stderr.decode())
138
- sys.exit(1)
372
+ if path_to_manifest is None:
373
+ raise RuntimeError(f"Cannot find {MAGIC_FILE} in the package")
374
+ shutil.move(
375
+ path_to_manifest,
376
+ os.path.join(manifest_dir, MAGIC_FILE),
377
+ )
378
+ with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
379
+ env = json.load(f)[id_][architecture]
380
+
381
+ if CONDA_USE_FAST_INIT:
382
+ fast_setup_environment(architecture, storage, env, prefix, pkgs_dir)
383
+ else:
384
+ setup_environment(
385
+ architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
386
+ )
387
+
388
+ except Exception as e:
389
+ print(f"Error: {str(e)}", file=sys.stderr)
390
+ sys.exit(1)