ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,17 @@
1
+ import json
1
2
  import os
2
3
  import platform
3
4
  import sys
4
5
 
5
6
  from .util import get_username
6
7
  from . import metaflow_version
8
+ from . import metaflow_git
7
9
  from metaflow.exception import MetaflowException
8
10
  from metaflow.extension_support import dump_module_info
9
- from metaflow.mflog import BASH_MFLOG
10
- from . import R
11
+ from metaflow.mflog import BASH_MFLOG, BASH_FLUSH_LOGS
12
+ from metaflow.package import MetaflowPackage
11
13
 
12
- version_cache = None
14
+ from . import R
13
15
 
14
16
 
15
17
  class InvalidEnvironmentException(MetaflowException):
@@ -50,8 +52,36 @@ class MetaflowEnvironment(object):
50
52
 
51
53
  def add_to_package(self):
52
54
  """
53
- A list of tuples (file, arcname) to add to the job package.
54
- `arcname` is an alternative name for the file in the job package.
55
+ Called to add custom files needed for this environment. This hook will be
56
+ called in the `MetaflowPackage` class where metaflow compiles the code package
57
+ tarball. This hook can return one of two things (the first is for backwards
58
+ compatibility -- move to the second):
59
+ - a generator yielding a tuple of `(file_path, arcname)` to add files to
60
+ the code package. `file_path` is the path to the file on the local filesystem
61
+ and `arcname` is the path relative to the packaged code.
62
+ - a generator yielding a tuple of `(content, arcname, type)` where:
63
+ - type is one of
64
+ ContentType.{USER_CONTENT, CODE_CONTENT, MODULE_CONTENT, OTHER_CONTENT}
65
+ - for USER_CONTENT:
66
+ - the file will be included relative to the directory containing the
67
+ user's flow file.
68
+ - content: path to the file to include
69
+ - arcname: path relative to the directory containing the user's flow file
70
+ - for CODE_CONTENT:
71
+ - the file will be included relative to the code directory in the package.
72
+ This will be the directory containing `metaflow`.
73
+ - content: path to the file to include
74
+ - arcname: path relative to the code directory in the package
75
+ - for MODULE_CONTENT:
76
+ - the module will be added to the code package as a python module. It will
77
+ be accessible as usual (import <module_name>)
78
+ - content: name of the module
79
+ - arcname: None (ignored)
80
+ - for OTHER_CONTENT:
81
+ - the file will be included relative to any other configuration/metadata
82
+ files for the flow
83
+ - content: path to the file to include
84
+ - arcname: path relative to the config directory in the package
55
85
  """
56
86
  return []
57
87
 
@@ -89,10 +119,18 @@ class MetaflowEnvironment(object):
89
119
  It should work silently if everything goes well.
90
120
  """
91
121
  if datastore_type == "s3":
92
- return (
93
- '%s -m awscli ${METAFLOW_S3_ENDPOINT_URL:+--endpoint-url=\\"${METAFLOW_S3_ENDPOINT_URL}\\"} '
94
- + "s3 cp %s job.tar"
95
- ) % (self._python(), code_package_url)
122
+ from .plugins.aws.aws_utils import parse_s3_full_path
123
+
124
+ bucket, s3_object = parse_s3_full_path(code_package_url)
125
+ # NOTE: the script quoting is extremely sensitive due to the way shlex.split operates and this being inserted
126
+ # into a quoted command elsewhere.
127
+ # NOTE: Reason for the extra conditionals in the script are because
128
+ # Boto3 does not play well with passing None or an empty string to endpoint_url
129
+ return "{python} -c '{script}'".format(
130
+ python=self._python(),
131
+ script='import boto3, os; ep=os.getenv(\\"METAFLOW_S3_ENDPOINT_URL\\"); boto3.client(\\"s3\\", **({\\"endpoint_url\\":ep} if ep else {})).download_file(\\"%s\\", \\"%s\\", \\"job.tar\\")'
132
+ % (bucket, s3_object),
133
+ )
96
134
  elif datastore_type == "azure":
97
135
  from .plugins.azure.azure_utils import parse_azure_full_path
98
136
 
@@ -119,54 +157,98 @@ class MetaflowEnvironment(object):
119
157
  )
120
158
 
121
159
  def _get_install_dependencies_cmd(self, datastore_type):
122
- cmds = ["%s -m pip install requests" % self._python()]
123
- if datastore_type == "s3":
124
- cmds.append("%s -m pip install awscli boto3" % self._python())
125
- elif datastore_type == "azure":
126
- cmds.append(
127
- "%s -m pip install azure-identity azure-storage-blob simple-azure-blob-downloader -qqq"
128
- % self._python()
160
+ base_cmd = "{} -m pip install -qqq --no-compile --no-cache-dir --disable-pip-version-check".format(
161
+ self._python()
162
+ )
163
+
164
+ datastore_packages = {
165
+ "s3": ["boto3"],
166
+ "azure": [
167
+ "azure-identity",
168
+ "azure-storage-blob",
169
+ "azure-keyvault-secrets",
170
+ "simple-azure-blob-downloader",
171
+ ],
172
+ "gs": [
173
+ "google-cloud-storage",
174
+ "google-auth",
175
+ "simple-gcp-object-downloader",
176
+ "google-cloud-secret-manager",
177
+ "packaging",
178
+ ],
179
+ }
180
+
181
+ if datastore_type not in datastore_packages:
182
+ raise NotImplementedError(
183
+ "Unknown datastore type: {}".format(datastore_type)
129
184
  )
130
- elif datastore_type == "gs":
131
- cmds.append(
132
- "%s -m pip install google-cloud-storage google-auth simple-gcp-object-downloader google-cloud-secret-manager -qqq"
133
- % self._python()
185
+
186
+ cmd = "{} {}".format(
187
+ base_cmd, " ".join(datastore_packages[datastore_type] + ["requests"])
188
+ )
189
+ # skip pip installs if we know that packages might already be available
190
+ return "if [ -z $METAFLOW_SKIP_INSTALL_DEPENDENCIES ]; then {}; fi".format(cmd)
191
+
192
+ def get_package_commands(
193
+ self, code_package_url, datastore_type, code_package_metadata=None
194
+ ):
195
+ # HACK: We want to keep forward compatibility with compute layers so that
196
+ # they can still call get_package_commands and NOT pass any metadata. If
197
+ # there is no additional information, we *assume* that it is the default
198
+ # used.
199
+ if code_package_metadata is None:
200
+ code_package_metadata = json.dumps(
201
+ {
202
+ "version": 0,
203
+ "archive_format": "tgz",
204
+ "mfcontent_version": 1,
205
+ }
134
206
  )
135
- else:
136
- raise NotImplementedError(
137
- "We don't know how to generate an install dependencies cmd for datastore %s"
138
- % datastore_type
207
+
208
+ extra_exports = []
209
+ for k, v in MetaflowPackage.get_post_extract_env_vars(
210
+ code_package_metadata, dest_dir="$(pwd)"
211
+ ).items():
212
+ if k.endswith(":"):
213
+ # If the value ends with a colon, we override the existing value
214
+ extra_exports.append("export %s=%s" % (k[:-1], v))
215
+ else:
216
+ extra_exports.append(
217
+ "export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
218
+ )
219
+
220
+ cmds = (
221
+ [
222
+ BASH_MFLOG,
223
+ BASH_FLUSH_LOGS,
224
+ "mflog 'Setting up task environment.'",
225
+ self._get_install_dependencies_cmd(datastore_type),
226
+ "mkdir metaflow",
227
+ "cd metaflow",
228
+ "mkdir .metaflow", # mute local datastore creation log
229
+ "i=0; while [ $i -le 5 ]; do "
230
+ "mflog 'Downloading code package...'; "
231
+ + self._get_download_code_package_cmd(code_package_url, datastore_type)
232
+ + " && mflog 'Code package downloaded.' && break; "
233
+ "sleep 10; i=$((i+1)); "
234
+ "done",
235
+ "if [ $i -gt 5 ]; then "
236
+ "mflog 'Failed to download code package from %s "
237
+ "after 6 tries. Exiting...' && exit 1; "
238
+ "fi" % code_package_url,
239
+ ]
240
+ + MetaflowPackage.get_extract_commands(
241
+ code_package_metadata, "job.tar", dest_dir="."
139
242
  )
140
- return " && ".join(cmds)
141
-
142
- def get_package_commands(self, code_package_url, datastore_type):
143
- cmds = [
144
- BASH_MFLOG,
145
- "mflog 'Setting up task environment.'",
146
- self._get_install_dependencies_cmd(datastore_type),
147
- "mkdir metaflow",
148
- "cd metaflow",
149
- "mkdir .metaflow", # mute local datastore creation log
150
- "i=0; while [ $i -le 5 ]; do "
151
- "mflog 'Downloading code package...'; "
152
- + self._get_download_code_package_cmd(code_package_url, datastore_type)
153
- + " && mflog 'Code package downloaded.' && break; "
154
- "sleep 10; i=$((i+1)); "
155
- "done",
156
- "if [ $i -gt 5 ]; then "
157
- "mflog 'Failed to download code package from %s "
158
- "after 6 tries. Exiting...' && exit 1; "
159
- "fi" % code_package_url,
160
- "TAR_OPTIONS='--warning=no-timestamp' tar xf job.tar",
161
- "mflog 'Task is starting.'",
162
- ]
243
+ + extra_exports
244
+ + [
245
+ "mflog 'Task is starting.'",
246
+ "flush_mflogs",
247
+ ]
248
+ )
163
249
  return cmds
164
250
 
165
251
  def get_environment_info(self, include_ext_info=False):
166
- global version_cache
167
- if version_cache is None:
168
- version_cache = metaflow_version.get_version()
169
-
170
252
  # note that this dict goes into the code package
171
253
  # so variables here should be relatively stable (no
172
254
  # timestamps) so the hash won't change all the time
@@ -180,8 +262,12 @@ class MetaflowEnvironment(object):
180
262
  "use_r": R.use_r(),
181
263
  "python_version": sys.version,
182
264
  "python_version_code": "%d.%d.%d" % sys.version_info[:3],
183
- "metaflow_version": version_cache,
265
+ "metaflow_version": metaflow_version.get_version(),
184
266
  "script": os.path.basename(os.path.abspath(sys.argv[0])),
267
+ # Add git info
268
+ **metaflow_git.get_repository_info(
269
+ path=os.path.dirname(os.path.abspath(sys.argv[0]))
270
+ ),
185
271
  }
186
272
  if R.use_r():
187
273
  env["metaflow_r_version"] = R.metaflow_r_version()
@@ -191,7 +277,7 @@ class MetaflowEnvironment(object):
191
277
  # Information about extension modules (to load them in the proper order)
192
278
  ext_key, ext_val = dump_module_info()
193
279
  env[ext_key] = ext_val
194
- return env
280
+ return {k: v for k, v in env.items() if v is not None and v != ""}
195
281
 
196
282
  def executable(self, step_name, default=None):
197
283
  if default is not None:
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env python
2
+ """Get git repository information for the package
3
+
4
+ Functions to retrieve git repository details like URL, branch name,
5
+ and commit SHA for Metaflow code provenance tracking.
6
+ """
7
+
8
+ import os
9
+ import subprocess
10
+ from typing import Dict, List, Optional, Tuple, Union
11
+
12
+ # Cache for git information to avoid repeated subprocess calls
13
+ _git_info_cache = None
14
+
15
+ __all__ = ("get_repository_info",)
16
+
17
+
18
+ def _call_git(
19
+ args: List[str], path=Union[str, os.PathLike]
20
+ ) -> Tuple[Optional[str], Optional[int], bool]:
21
+ """
22
+ Call git with provided args.
23
+
24
+ Returns
25
+ -------
26
+ tuple : Tuple containing
27
+ (stdout, exitcode, failure) of the call
28
+ """
29
+ try:
30
+ result = subprocess.run(
31
+ ["git", *args],
32
+ cwd=path,
33
+ capture_output=True,
34
+ text=True,
35
+ check=False,
36
+ )
37
+ return result.stdout.strip(), result.returncode, False
38
+ except (OSError, subprocess.SubprocessError):
39
+ # Covers subprocess timeouts and other errors which would not lead to an exit code
40
+ return None, None, True
41
+
42
+
43
+ def _get_repo_url(path: Union[str, os.PathLike]) -> Optional[str]:
44
+ """Get the repository URL from git config"""
45
+ stdout, returncode, _failed = _call_git(
46
+ ["config", "--get", "remote.origin.url"], path
47
+ )
48
+ if returncode == 0:
49
+ url = stdout
50
+ # Convert SSH URLs to HTTPS for clickable links
51
+ if url.startswith("git@"):
52
+ parts = url.split(":", 1)
53
+ if len(parts) == 2:
54
+ domain = parts[0].replace("git@", "")
55
+ repo_path = parts[1]
56
+ url = f"https://{domain}/{repo_path}"
57
+ return url
58
+ return None
59
+
60
+
61
+ def _get_branch_name(path: Union[str, os.PathLike]) -> Optional[str]:
62
+ """Get the current git branch name"""
63
+ stdout, returncode, _failed = _call_git(["rev-parse", "--abbrev-ref", "HEAD"], path)
64
+ return stdout if returncode == 0 else None
65
+
66
+
67
+ def _get_commit_sha(path: Union[str, os.PathLike]) -> Optional[str]:
68
+ """Get the current git commit SHA"""
69
+ stdout, returncode, _failed = _call_git(["rev-parse", "HEAD"], path)
70
+ return stdout if returncode == 0 else None
71
+
72
+
73
+ def _is_in_git_repo(path: Union[str, os.PathLike]) -> bool:
74
+ """Check if we're currently in a git repository"""
75
+ stdout, returncode, _failed = _call_git(
76
+ ["rev-parse", "--is-inside-work-tree"], path
77
+ )
78
+ return returncode == 0 and stdout == "true"
79
+
80
+
81
+ def _has_uncommitted_changes(path: Union[str, os.PathLike]) -> Optional[bool]:
82
+ """Check if the git repository has uncommitted changes"""
83
+ _stdout, returncode, failed = _call_git(
84
+ ["diff-index", "--quiet", "HEAD", "--"], path
85
+ )
86
+ if failed:
87
+ return None
88
+ return returncode != 0
89
+
90
+
91
+ def get_repository_info(path: Union[str, os.PathLike]) -> Dict[str, Union[str, bool]]:
92
+ """Get git repository information for a path
93
+
94
+ Returns:
95
+ dict: Dictionary containing:
96
+ repo_url: Repository URL (converted to HTTPS if from SSH)
97
+ branch_name: Current branch name
98
+ commit_sha: Current commit SHA
99
+ has_uncommitted_changes: Boolean indicating if there are uncommitted changes
100
+ """
101
+ global _git_info_cache
102
+
103
+ if _git_info_cache is not None:
104
+ return _git_info_cache
105
+
106
+ _git_info_cache = {}
107
+ if _is_in_git_repo(path):
108
+ _git_info_cache = {
109
+ "repo_url": _get_repo_url(path),
110
+ "branch_name": _get_branch_name(path),
111
+ "commit_sha": _get_commit_sha(path),
112
+ "has_uncommitted_changes": _has_uncommitted_changes(path),
113
+ }
114
+
115
+ return _git_info_cache
@@ -2,6 +2,24 @@ import time
2
2
 
3
3
  from contextlib import contextmanager
4
4
 
5
+ from .metaflow_config import PROFILE_FROM_START
6
+
7
+ init_time = None
8
+
9
+
10
+ if PROFILE_FROM_START:
11
+
12
+ def from_start(msg: str):
13
+ global init_time
14
+ if init_time is None:
15
+ init_time = time.time()
16
+ print("From start: %s took %dms" % (msg, int((time.time() - init_time) * 1000)))
17
+
18
+ else:
19
+
20
+ def from_start(_msg: str):
21
+ pass
22
+
5
23
 
6
24
  @contextmanager
7
25
  def profile(label, stats_dict=None):
@@ -7,11 +7,15 @@ See the documentation of get_version for more information
7
7
 
8
8
  # This file is adapted from https://github.com/aebrahim/python-git-version
9
9
 
10
- from subprocess import check_output, CalledProcessError
11
- from os import path, name, devnull, environ, listdir
12
- import json
10
+ import subprocess
11
+ from os import path, name, environ, listdir
13
12
 
14
- from metaflow import CURRENT_DIRECTORY, INFO_FILE
13
+ from metaflow.extension_support import update_package_info
14
+ from metaflow.meta_files import read_info_file
15
+
16
+
17
+ # True/False correspond to the value `public`` in get_version
18
+ _version_cache = {True: None, False: None}
15
19
 
16
20
  __all__ = ("get_version",)
17
21
 
@@ -23,11 +27,11 @@ if name == "nt":
23
27
  """find the path to the git executable on Windows"""
24
28
  # first see if git is in the path
25
29
  try:
26
- check_output(["where", "/Q", "git"])
30
+ subprocess.check_output(["where", "/Q", "git"])
27
31
  # if this command succeeded, git is in the path
28
32
  return "git"
29
33
  # catch the exception thrown if git was not found
30
- except CalledProcessError:
34
+ except subprocess.CalledProcessError:
31
35
  pass
32
36
  # There are several locations where git.exe may be hiding
33
37
  possible_locations = []
@@ -57,87 +61,167 @@ if name == "nt":
57
61
  GIT_COMMAND = find_git_on_windows()
58
62
 
59
63
 
60
- def call_git_describe(abbrev=7):
64
+ def call_git_describe(file_to_check, abbrev=7):
61
65
  """return the string output of git describe"""
62
66
  try:
63
-
64
- # first, make sure we are actually in a Metaflow repo,
65
- # not some other repo
66
- with open(devnull, "w") as fnull:
67
- arguments = [GIT_COMMAND, "rev-parse", "--show-toplevel"]
68
- reponame = (
69
- check_output(arguments, cwd=CURRENT_DIRECTORY, stderr=fnull)
70
- .decode("ascii")
71
- .strip()
72
- )
73
- if path.basename(reponame) != "metaflow":
74
- return None
75
-
76
- with open(devnull, "w") as fnull:
77
- arguments = [GIT_COMMAND, "describe", "--tags", "--abbrev=%d" % abbrev]
78
- return (
79
- check_output(arguments, cwd=CURRENT_DIRECTORY, stderr=fnull)
80
- .decode("ascii")
81
- .strip()
82
- )
83
-
84
- except (OSError, CalledProcessError):
67
+ wd = path.dirname(file_to_check)
68
+ filename = path.basename(file_to_check)
69
+
70
+ # First check if the file is tracked in the GIT repository we are in
71
+ # We do this because in some setups and for some bizarre reason, python files
72
+ # are installed directly into a git repository (I am looking at you brew). We
73
+ # don't want to consider this a GIT install in that case.
74
+ args = [GIT_COMMAND, "ls-files", "--error-unmatch", filename]
75
+ git_return_code = subprocess.run(
76
+ args,
77
+ cwd=wd,
78
+ stderr=subprocess.DEVNULL,
79
+ stdout=subprocess.DEVNULL,
80
+ check=False,
81
+ ).returncode
82
+
83
+ if git_return_code != 0:
84
+ return None
85
+
86
+ args = [
87
+ GIT_COMMAND,
88
+ "describe",
89
+ "--tags",
90
+ "--dirty",
91
+ "--long",
92
+ "--abbrev=%d" % abbrev,
93
+ ]
94
+ return (
95
+ subprocess.check_output(args, cwd=wd, stderr=subprocess.DEVNULL)
96
+ .decode("ascii")
97
+ .strip()
98
+ )
99
+
100
+ except (OSError, subprocess.CalledProcessError):
85
101
  return None
86
102
 
87
103
 
88
- def format_git_describe(git_str, pep440=False):
104
+ def format_git_describe(git_str, public=False):
89
105
  """format the result of calling 'git describe' as a python version"""
90
106
  if git_str is None:
91
107
  return None
92
- if "-" not in git_str: # currently at a tag
93
- return git_str
108
+ splits = git_str.split("-")
109
+ if len(splits) == 4:
110
+ # Formatted as <tag>-<post>-<hash>-dirty
111
+ tag, post, h = splits[:3]
112
+ dirty = "-" + splits[3]
94
113
  else:
95
- # formatted as version-N-githash
96
- # want to convert to version.postN-githash
97
- git_str = git_str.replace("-", ".post", 1)
98
- if pep440: # does not allow git hash afterwards
99
- return git_str.split("-")[0]
100
- else:
101
- return git_str.replace("-g", "+git")
114
+ # Formatted as <tag>-<post>-<hash>
115
+ tag, post, h = splits
116
+ dirty = ""
117
+ if post == "0":
118
+ if public:
119
+ return tag
120
+ return tag + dirty
121
+
122
+ if public:
123
+ return "%s.post%s" % (tag, post)
124
+
125
+ return "%s.post%s-git%s%s" % (tag, post, h[1:], dirty)
102
126
 
103
127
 
104
128
  def read_info_version():
105
129
  """Read version information from INFO file"""
106
- try:
107
- with open(INFO_FILE, "r") as contents:
108
- return json.load(contents).get("metaflow_version")
109
- except IOError:
110
- return None
130
+ info_file = read_info_file()
131
+ if info_file:
132
+ return info_file.get("metaflow_version")
133
+ return None
111
134
 
112
135
 
113
- def get_version(pep440=False):
114
- """Tracks the version number.
136
+ def make_public_version(version_string):
137
+ """
138
+ Takes a complex version string and returns a public, PEP 440-compliant version.
139
+ It removes local version identifiers (+...) and development markers (-...).
140
+ """
141
+ base_version = version_string.split("+", 1)[0]
142
+ public_version = base_version.split("-", 1)[0]
143
+ return public_version
115
144
 
116
- pep440: bool
117
- When True, this function returns a version string suitable for
118
- a release as defined by PEP 440. When False, the githash (if
119
- available) will be appended to the version string.
120
145
 
121
- If the script is located within an active git repository,
122
- git-describe is used to get the version information.
146
+ def get_version(public=False):
147
+ """Tracks the version number.
123
148
 
124
- Otherwise, the version logged by package installer is returned.
149
+ public: bool
150
+ When True, this function returns a *public* version specification which
151
+ doesn't include any local information (dirtiness or hash). See
152
+ https://packaging.python.org/en/latest/specifications/version-specifiers/#version-scheme
125
153
 
126
- If even that information isn't available (likely when executing on a
127
- remote cloud instance), the version information is returned from INFO file
128
- in the current directory.
154
+ We first check the INFO file to see if we recorded a version of Metaflow. If there
155
+ is none, we check if we are in a GIT repository and if so, form the version
156
+ from that.
129
157
 
130
- """
158
+ Otherwise, we return the version of Metaflow that was installed.
131
159
 
132
- version = format_git_describe(call_git_describe(), pep440=pep440)
133
- version_addl = None
134
- if version is None: # not a git repository
135
- import metaflow
160
+ """
136
161
 
162
+ global _version_cache
163
+
164
+ # To get the version we do the following:
165
+ # - Check if we have a cached version. If so, return that
166
+ # - Then check if we have an INFO file present. If so, use that as it is
167
+ # the most reliable way to get the version. In particular, when running remotely,
168
+ # metaflow is installed in a directory and if any extension is using distutils to
169
+ # determine its version, this would return None and querying the version directly
170
+ # from the extension would fail to produce the correct result
171
+ # - Then if we are in the GIT repository and if so, use the git describe
172
+ # - If we don't have an INFO file, we look at the version information that is
173
+ # populated by metaflow and the extensions.
174
+
175
+ if _version_cache[public] is not None:
176
+ return _version_cache[public]
177
+
178
+ version = (
179
+ read_info_version()
180
+ ) # Version info is cached in INFO file; includes extension info
181
+
182
+ if version:
183
+ # If we have a version from the INFO file, use it directly.
184
+ # However, if we are asked for a public version, we parse it to make sure
185
+ # that no local information is included.
186
+ if public:
187
+ version = make_public_version(version)
188
+ _version_cache[public] = version
189
+ return version
190
+
191
+ # Get the version for Metaflow, favor the GIT version
192
+ import metaflow
193
+
194
+ version = format_git_describe(
195
+ call_git_describe(file_to_check=metaflow.__file__), public=public
196
+ )
197
+ if version is None:
137
198
  version = metaflow.__version__
138
- version_addl = metaflow.__version_addl__
139
- if version is None: # not a proper python package
140
- return read_info_version()
141
- if version_addl:
142
- return "+".join([version, version_addl])
199
+
200
+ # Look for extensions and compute their versions. Properly formed extensions have
201
+ # a toplevel file which will contain a __mf_extensions__ value and a __version__
202
+ # value. We already saved the properly formed modules when loading metaflow in
203
+ # __ext_tl_modules__.
204
+ ext_versions = []
205
+ for pkg_name, extension_module in metaflow.__ext_tl_modules__:
206
+ ext_name = getattr(extension_module, "__mf_extensions__", "<unk>")
207
+ ext_version = format_git_describe(
208
+ call_git_describe(file_to_check=extension_module.__file__), public=public
209
+ )
210
+ if ext_version is None:
211
+ ext_version = getattr(extension_module, "__version__", "<unk>")
212
+ # Update the package information about reported version for the extension
213
+ # (only for the full info which is called at least once -- if we update more
214
+ # it will error out since we can only update_package_info once)
215
+ if not public:
216
+ update_package_info(
217
+ package_name=pkg_name,
218
+ extension_name=ext_name,
219
+ package_version=ext_version,
220
+ )
221
+ ext_versions.append("%s(%s)" % (ext_name, ext_version))
222
+
223
+ # We now have all the information about extensions so we can form the final string
224
+ if ext_versions:
225
+ version = version + "+" + ";".join(ext_versions)
226
+ _version_cache[public] = version
143
227
  return version