ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,527 @@
1
+ import json
2
+ import os
3
+ import sys
4
+ from pathlib import Path
5
+ from types import ModuleType
6
+ from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Union
7
+
8
+ from ..debug import debug
9
+ from ..extension_support import (
10
+ EXT_EXCLUDE_SUFFIXES,
11
+ extension_info,
12
+ package_mfext_all,
13
+ package_mfext_all_descriptions,
14
+ )
15
+ from ..exception import MetaflowException
16
+ from ..metaflow_version import get_version
17
+ from ..user_decorators.user_flow_decorator import FlowMutatorMeta
18
+ from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
19
+ from ..util import get_metaflow_root, walk_without_cycles
20
+ from . import ContentType, MFCONTENT_MARKER, MetaflowCodeContentV1Base
21
+ from .distribution_support import _ModuleInfo, modules_to_distributions
22
+ from .utils import suffix_filter, walk
23
+
24
+
25
+ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
26
+ METAFLOW_SUFFIXES_LIST = [".py", ".html", ".css", ".js"]
27
+
28
+ def __init__(
29
+ self,
30
+ code_dir: str = MetaflowCodeContentV1Base._code_dir,
31
+ other_dir: str = MetaflowCodeContentV1Base._other_dir,
32
+ criteria: Callable[[ModuleType], bool] = lambda x: True,
33
+ ):
34
+ super().__init__(code_dir, other_dir)
35
+
36
+ self._metaflow_root = get_metaflow_root()
37
+ self._metaflow_version = get_version()
38
+
39
+ self._criteria = criteria
40
+
41
+ # We try to find the modules we need to package. We will first look at all modules
42
+ # and apply the criteria to them. Then we will use the most parent module that
43
+ # fits the criteria as the module to package
44
+
45
+ # Make a copy since sys.modules could be modified while we load other
46
+ # modules. See https://github.com/Netflix/metaflow/issues/2489
47
+ all_modules = dict(sys.modules)
48
+ modules = filter(lambda x: criteria(x[1]), all_modules.items())
49
+ # Ensure that we see the parent modules first
50
+ modules = sorted(modules, key=lambda x: x[0])
51
+ if modules:
52
+ last_prefix = modules[0][0]
53
+ new_modules = [modules[0]]
54
+ for name, mod in modules[1:]:
55
+ if name.startswith(last_prefix + "."):
56
+ # This is a submodule of the last module, we can skip it
57
+ continue
58
+ # Otherwise, we have a new top-level module
59
+ last_prefix = name
60
+ new_modules.append((name, mod))
61
+ else:
62
+ new_modules = []
63
+
64
+ self._modules = {} # type: Dict[str, _ModuleInfo]
65
+ # We do this explicitly module by module to harden it against misbehaving
66
+ # modules like the one in:
67
+ # https://github.com/Netflix/metaflow/issues/2512
68
+ # We will silently ignore modules that are not well built.
69
+ for name, mod in new_modules:
70
+ try:
71
+ minfo = _ModuleInfo(
72
+ name,
73
+ set(
74
+ Path(p).resolve().as_posix()
75
+ for p in getattr(mod, "__path__", [mod.__file__])
76
+ ),
77
+ mod,
78
+ True, # This is a Metaflow module (see filter below)
79
+ )
80
+ except:
81
+ continue
82
+ self._modules[name] = minfo
83
+
84
+ # Contain metadata information regarding the distributions packaged.
85
+ # This allows Metaflow to "fake" distribution information when packaged
86
+ self._distmetainfo = {} # type: Dict[str, Dict[str, str]]
87
+
88
+ # Maps an absolute path on the filesystem to the path of the file in the
89
+ # archive.
90
+ self._files = {} # type: Dict[str, str]
91
+ self._files_from_modules = {} # type: Dict[str, str]
92
+
93
+ self._other_files = {} # type: Dict[str, str]
94
+ self._other_content = {} # type: Dict[str, bytes]
95
+
96
+ debug.package_exec(f"Used system modules found: {str(self._modules)}")
97
+
98
+ # Populate with files from the third party modules
99
+ for k, v in self._modules.items():
100
+ self._files_from_modules.update(self._module_files(k, v.root_paths))
101
+
102
+ # Figure out the files to package for Metaflow and extensions
103
+ self._cached_metaflow_files = list(self._metaflow_distribution_files())
104
+ self._cached_metaflow_files.extend(list(self._metaflow_extension_files()))
105
+
106
+ def create_mfcontent_info(self) -> Dict[str, Any]:
107
+ return {"version": 1, "module_files": list(self._files_from_modules.values())}
108
+
109
+ def get_excluded_tl_entries(self) -> List[str]:
110
+ """
111
+ When packaging Metaflow from within an executing Metaflow flow, we need to
112
+ exclude the files that are inserted by this content from being packaged (possibly).
113
+
114
+ Use this function to return these files or top-level directories.
115
+
116
+ Returns
117
+ -------
118
+ List[str]
119
+ Files or directories to exclude
120
+ """
121
+ return [self._code_dir, self._other_dir]
122
+
123
+ def content_names(
124
+ self, content_types: Optional[int] = None
125
+ ) -> Generator[Tuple[str, str], None, None]:
126
+ """
127
+ Detailed list of the content of this MetaflowCodeContent. This will list all files
128
+ (or non files -- for the INFO or CONFIG data for example) present in the archive.
129
+
130
+ Parameters
131
+ ----------
132
+ content_types : Optional[int]
133
+ The type of content to get the names of. If None, all content is returned.
134
+
135
+ Yields
136
+ ------
137
+ Generator[Tuple[str, str], None, None]
138
+ Path on the filesystem and the name in the archive
139
+ """
140
+ yield from self._content(content_types, generate_value=False)
141
+
142
+ def contents(
143
+ self, content_types: Optional[int] = None
144
+ ) -> Generator[Tuple[Union[bytes, str], str], None, None]:
145
+ """
146
+ Very similar to content_names but returns the content of the non-files
147
+ as well as bytes. For files, identical output as content_names
148
+
149
+ Parameters
150
+ ----------
151
+ content_types : Optional[int]
152
+ The type of content to get the content of. If None, all content is returned.
153
+
154
+ Yields
155
+ ------
156
+ Generator[Tuple[Union[str, bytes], str], None, None]
157
+ Content of the MF content
158
+ """
159
+ yield from self._content(content_types, generate_value=True)
160
+
161
+ def show(self) -> str:
162
+ """
163
+ Returns a more human-readable string representation of the content of this
164
+ MetaflowCodeContent. This will not, for example, list all files but summarize what
165
+ is included at a more high level.
166
+
167
+ Returns
168
+ -------
169
+ str
170
+ A human-readable string representation of the content of this MetaflowCodeContent
171
+ """
172
+ all_user_step_decorators = {}
173
+ for k, v in UserStepDecoratorMeta.all_decorators().items():
174
+ all_user_step_decorators.setdefault(
175
+ getattr(v, "_original_module", v.__module__), []
176
+ ).append(k)
177
+
178
+ all_user_flow_decorators = {}
179
+ for k, v in FlowMutatorMeta.all_decorators().items():
180
+ all_user_flow_decorators.setdefault(
181
+ getattr(v, "_original_module", v.__module__), []
182
+ ).append(k)
183
+
184
+ result = []
185
+ if self._metaflow_version:
186
+ result.append(f"\nMetaflow version: {self._metaflow_version}")
187
+ ext_info = extension_info()
188
+ if ext_info["installed"]:
189
+ result.append("\nMetaflow extensions packaged:")
190
+ for ext_name, ext_info in ext_info["installed"].items():
191
+ result.append(
192
+ f" - {ext_name} ({ext_info['extension_name']}) @ {ext_info['dist_version']}"
193
+ )
194
+
195
+ if self._modules:
196
+ mf_modules = []
197
+ other_modules = []
198
+ for name, info in self._modules.items():
199
+ if info.metaflow_module:
200
+ mf_modules.append(f" - {name} @ {', '.join(info.root_paths)}")
201
+ module_user_step_decorators = [
202
+ ", ".join(v)
203
+ for k, v in all_user_step_decorators.items()
204
+ if k == info.name or k.startswith(info.name + ".")
205
+ ]
206
+ module_user_flow_decorators = [
207
+ ", ".join(v)
208
+ for k, v in all_user_flow_decorators.items()
209
+ if k == info.name or k.startswith(info.name + ".")
210
+ ]
211
+ if module_user_step_decorators:
212
+ mf_modules.append(
213
+ f" - Provides step decorators: {', '.join(module_user_step_decorators)}"
214
+ )
215
+ if module_user_flow_decorators:
216
+ mf_modules.append(
217
+ f" - Provides flow mutators: {', '.join(module_user_flow_decorators)}"
218
+ )
219
+ else:
220
+ other_modules.append(f" - {name} @ {', '.join(info.root_paths)}")
221
+ if mf_modules:
222
+ result.append("\nMetaflow modules:")
223
+ result.extend(mf_modules)
224
+ if other_modules:
225
+ result.append("\nNon-Metaflow packaged modules:")
226
+ result.extend(other_modules)
227
+
228
+ return "\n".join(result)
229
+
230
+ def add_info(self, info: Dict[str, Any]) -> None:
231
+ """
232
+ Add the content of the INFO file to the Metaflow content
233
+
234
+ Parameters
235
+ ----------
236
+ info: Dict[str, Any]
237
+ The content of the INFO file
238
+ """
239
+ info_file_path = os.path.join(self._other_dir, self._info_file)
240
+ if info_file_path in self._other_content:
241
+ raise MetaflowException("INFO file already present in the MF environment")
242
+ self._other_content[info_file_path] = json.dumps(info).encode("utf-8")
243
+
244
+ def add_config(self, config: Dict[str, Any]) -> None:
245
+ """
246
+ Add the content of the CONFIG file to the Metaflow content
247
+
248
+ Parameters
249
+ ----------
250
+ config: Dict[str, Any]
251
+ The content of the CONFIG file
252
+ """
253
+ config_file_path = os.path.join(self._other_dir, self._config_file)
254
+ if config_file_path in self._other_content:
255
+ raise MetaflowException("CONFIG file already present in the MF environment")
256
+ self._other_content[config_file_path] = json.dumps(config).encode("utf-8")
257
+
258
+ def add_module(self, module: ModuleType) -> None:
259
+ """
260
+ Add a python module to the Metaflow content
261
+
262
+ Parameters
263
+ ----------
264
+ module_path: ModuleType
265
+ The module to add
266
+ """
267
+ name = module.__name__
268
+ debug.package_exec(f"Adding module {name} to the MF content")
269
+ # If the module is a single file, we handle this here by looking at __file__
270
+ # which will point to the single file. If it is an actual module, __path__
271
+ # will contain the path(s) to the module
272
+ if hasattr(module, "__file__") and module.__file__:
273
+ root_paths = [Path(module.__file__).resolve().as_posix()]
274
+ else:
275
+ root_paths = []
276
+ seen_path_values = set()
277
+ new_paths = module.__spec__.submodule_search_locations
278
+ while new_paths:
279
+ paths = new_paths
280
+ new_paths = []
281
+ for p in paths:
282
+ if p in seen_path_values:
283
+ continue
284
+ if os.path.isdir(p):
285
+ root_paths.append(Path(p).resolve().as_posix())
286
+ elif p in sys.path_importer_cache:
287
+ # We have a path hook that we likely need to call to get the actual path
288
+ addl_spec = sys.path_importer_cache[p].find_spec(name)
289
+ if (
290
+ addl_spec is not None
291
+ and addl_spec.submodule_search_locations
292
+ ):
293
+ new_paths.extend(addl_spec.submodule_search_locations)
294
+ else:
295
+ # This may not be as required since it is likely the importer cache has
296
+ # everything already but just in case, we will also go through the
297
+ # path hooks and see if we find another one
298
+ for path_hook in sys.path_hooks:
299
+ try:
300
+ finder = path_hook(p)
301
+ addl_spec = finder.find_spec(name)
302
+ if (
303
+ addl_spec is not None
304
+ and addl_spec.submodule_search_locations
305
+ ):
306
+ new_paths.extend(
307
+ addl_spec.submodule_search_locations
308
+ )
309
+ break
310
+ except ImportError:
311
+ continue
312
+ seen_path_values.add(p)
313
+ self._modules[name] = _ModuleInfo(
314
+ name,
315
+ set(root_paths),
316
+ module,
317
+ False, # This is not a Metaflow module (added by the user manually)
318
+ )
319
+ self._files_from_modules.update(
320
+ self._module_files(name, self._modules[name].root_paths)
321
+ )
322
+
323
+ def add_code_file(self, file_path: str, file_name: str) -> None:
324
+ """
325
+ Add a code file to the Metaflow content
326
+
327
+ Parameters
328
+ ----------
329
+ file_path: str
330
+ The path to the code file to add (on the filesystem)
331
+ file_name: str
332
+ The path in the archive to add the code file to
333
+ """
334
+ file_path = os.path.realpath(file_path)
335
+ debug.package_exec(
336
+ f"Adding code file {file_path} as {file_name} to the MF content"
337
+ )
338
+
339
+ if file_path in self._files and self._files[file_path] != os.path.join(
340
+ self._code_dir, file_name.lstrip("/")
341
+ ):
342
+ raise MetaflowException(
343
+ "File '%s' is already present in the MF content with a different name: '%s'"
344
+ % (file_path, self._files[file_path])
345
+ )
346
+ self._files[file_path] = os.path.join(self._code_dir, file_name.lstrip("/"))
347
+
348
+ def add_other_file(self, file_path: str, file_name: str) -> None:
349
+ """
350
+ Add a non-python file to the Metaflow content
351
+
352
+ Parameters
353
+ ----------
354
+ file_path: str
355
+ The path to the file to add (on the filesystem)
356
+ file_name: str
357
+ The path in the archive to add the file to
358
+ """
359
+ file_path = os.path.realpath(file_path)
360
+ debug.package_exec(
361
+ f"Adding other file {file_path} as {file_name} to the MF content"
362
+ )
363
+ if file_path in self._other_files and self._other_files[
364
+ file_path
365
+ ] != os.path.join(self._other_dir, file_name.lstrip("/")):
366
+ raise MetaflowException(
367
+ "File %s is already present in the MF content with a different name: %s"
368
+ % (file_path, self._other_files[file_path])
369
+ )
370
+ self._other_files[file_path] = os.path.join(
371
+ self._other_dir, file_name.lstrip("/")
372
+ )
373
+
374
+ def _content(
375
+ self, content_types: Optional[int] = None, generate_value: bool = False
376
+ ) -> Generator[Tuple[Union[str, bytes], str], None, None]:
377
+ from ..package import MetaflowPackage # Prevent circular dependency
378
+
379
+ if content_types is None:
380
+ content_types = ContentType.ALL_CONTENT.value
381
+
382
+ if content_types & ContentType.CODE_CONTENT.value:
383
+ yield from self._cached_metaflow_files
384
+ yield from self._files.items()
385
+ if content_types & ContentType.MODULE_CONTENT.value:
386
+ yield from self._files_from_modules.items()
387
+ if content_types & ContentType.OTHER_CONTENT.value:
388
+ yield from self._other_files.items()
389
+ if generate_value:
390
+ for k, v in self._other_content.items():
391
+ yield v, k
392
+ # Include the distribution file too
393
+ yield json.dumps(self._distmetainfo).encode("utf-8"), os.path.join(
394
+ self._other_dir, self._dist_info_file
395
+ )
396
+ yield json.dumps(self.create_mfcontent_info()).encode(
397
+ "utf-8"
398
+ ), MFCONTENT_MARKER
399
+ else:
400
+ for k in self._other_content.keys():
401
+ yield "<generated %s content>" % (os.path.basename(k)), k
402
+ yield "<generated %s content>" % (
403
+ os.path.basename(self._dist_info_file)
404
+ ), os.path.join(self._other_dir, self._dist_info_file)
405
+ yield "<generated %s content>" % MFCONTENT_MARKER, MFCONTENT_MARKER
406
+
407
+ def _metaflow_distribution_files(self) -> Generator[Tuple[str, str], None, None]:
408
+ debug.package_exec("Including Metaflow from '%s'" % self._metaflow_root)
409
+ for path_tuple in walk(
410
+ os.path.join(self._metaflow_root, "metaflow"),
411
+ exclude_hidden=False,
412
+ file_filter=suffix_filter(self.METAFLOW_SUFFIXES_LIST),
413
+ ):
414
+ yield path_tuple[0], os.path.join(self._code_dir, path_tuple[1])
415
+
416
+ def _metaflow_extension_files(self) -> Generator[Tuple[str, str], None, None]:
417
+ # Metaflow extensions; for now, we package *all* extensions but this may change
418
+ # at a later date; it is possible to call `package_mfext_package` instead of
419
+ # `package_mfext_all` but in that case, make sure to also add a
420
+ # metaflow_extensions/__init__.py file to properly "close" the metaflow_extensions
421
+ # package and prevent other extensions from being loaded that may be
422
+ # present in the rest of the system
423
+ for path_tuple in package_mfext_all():
424
+ yield path_tuple[0], os.path.join(self._code_dir, path_tuple[1])
425
+ if debug.package:
426
+ ext_info = package_mfext_all_descriptions()
427
+ ext_info = {
428
+ k: {k1: v1 for k1, v1 in v.items() if k1 in ("root_paths",)}
429
+ for k, v in ext_info.items()
430
+ }
431
+ debug.package_exec(f"Metaflow extensions packaged: {ext_info}")
432
+
433
+ def _module_files(
434
+ self, name: str, paths: Set[str]
435
+ ) -> Generator[Tuple[str, str], None, None]:
436
+ debug.package_exec(
437
+ " Looking for distributions for module %s in %s" % (name, paths)
438
+ )
439
+ paths = set(paths) # Do not modify external paths
440
+ has_init = False
441
+ distributions = modules_to_distributions().get(name)
442
+ prefix_parts = tuple(name.split("."))
443
+
444
+ seen_distributions = set()
445
+ if distributions:
446
+ for dist in distributions:
447
+ dist_name = dist.metadata["Name"] # dist.name not always present
448
+ if dist_name in seen_distributions:
449
+ continue
450
+ # For some reason, sometimes the same distribution appears twice. We
451
+ # don't need to process twice.
452
+ seen_distributions.add(dist_name)
453
+ debug.package_exec(
454
+ " Including distribution '%s' for module '%s'"
455
+ % (dist_name, name)
456
+ )
457
+ dist_root = str(dist.locate_file(name))
458
+ has_file_in_root = False
459
+ if dist_name not in self._distmetainfo:
460
+ # Possible that a distribution contributes to multiple modules
461
+ self._distmetainfo[dist_name] = {
462
+ # We can add more if needed but these are likely the most
463
+ # useful (captures, name, version, etc and files which can
464
+ # be used to find non-python files in the distribution).
465
+ "METADATA": dist.read_text("METADATA") or "",
466
+ "RECORD": dist.read_text("RECORD") or "",
467
+ }
468
+ for file in dist.files or []:
469
+ # Skip files that do not belong to this module (distribution may
470
+ # provide multiple modules)
471
+ if (
472
+ file.parts[: len(prefix_parts)] != prefix_parts
473
+ or file.suffix == ".pth"
474
+ or str(file).startswith("__editable__")
475
+ ):
476
+ continue
477
+ if file.parts[len(prefix_parts)] == "__init__.py":
478
+ has_init = True
479
+ has_file_in_root = True
480
+ # At this point, we know that we are seeing actual files in the
481
+ # dist_root so we make sure it is as expected
482
+ if dist_root not in paths:
483
+ # This is an error because it means that this distribution is
484
+ # not contributing to the module.
485
+ raise RuntimeError(
486
+ "Distribution '%s' is not contributing to module '%s' as "
487
+ "expected (got '%s' when expected one of %s)"
488
+ % (dist.metadata["Name"], name, dist_root, paths)
489
+ )
490
+ yield str(
491
+ dist.locate_file(file).resolve().as_posix()
492
+ ), os.path.join(self._code_dir, *prefix_parts, *file.parts[1:])
493
+ if has_file_in_root:
494
+ paths.discard(dist_root)
495
+
496
+ # Now if there are more paths left in paths, it means there is a non-distribution
497
+ # component to this package which we also include.
498
+ debug.package_exec(
499
+ " Looking for non-distribution files for module '%s' in %s"
500
+ % (name, paths)
501
+ )
502
+ for path in paths:
503
+ if not Path(path).is_dir():
504
+ # Single file for the module -- this will be something like <name>.py
505
+ yield path, os.path.join(
506
+ self._code_dir, *prefix_parts[:-1], f"{prefix_parts[-1]}.py"
507
+ )
508
+ has_init = True
509
+ else:
510
+ for root, _, files in walk_without_cycles(path):
511
+ for file in files:
512
+ if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES):
513
+ continue
514
+ rel_path = os.path.relpath(os.path.join(root, file), path)
515
+ if rel_path == "__init__.py":
516
+ has_init = True
517
+ yield os.path.join(root, file), os.path.join(
518
+ self._code_dir,
519
+ name,
520
+ rel_path,
521
+ )
522
+ # We now include an empty __init__.py file to close the module and prevent
523
+ # leaks from possible namespace packages
524
+ if not has_init:
525
+ yield os.path.join(
526
+ self._metaflow_root, "metaflow", "extension_support", "_empty_file.py"
527
+ ), os.path.join(self._code_dir, *prefix_parts, "__init__.py")