ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/cli.py CHANGED
@@ -1,55 +1,48 @@
1
+ import os
2
+ import functools
1
3
  import inspect
4
+ import os
2
5
  import sys
3
6
  import traceback
4
7
  from datetime import datetime
5
- from functools import wraps
6
- import metaflow.tracing as tracing
7
8
 
9
+ import metaflow.tracing as tracing
8
10
  from metaflow._vendor import click
9
11
 
10
- from . import lint
11
- from . import plugins
12
- from . import parameters
13
- from . import decorators
14
- from . import metaflow_version
15
- from . import namespace
16
- from .metaflow_current import current
12
+ from . import decorators, lint, metaflow_version, parameters, plugins
17
13
  from .cli_args import cli_args
18
- from .tagging_util import validate_tags
19
- from .util import (
20
- resolve_identity,
21
- decompress_list,
22
- write_latest_run_id,
23
- get_latest_run_id,
24
- )
25
- from .task import MetaflowTask
14
+ from .cli_components.utils import LazyGroup, LazyPluginCommandCollection
15
+ from .datastore import FlowDataStore, TaskDataStoreSet
16
+ from .debug import debug
26
17
  from .exception import CommandException, MetaflowException
18
+ from .flowspec import FlowStateItems
27
19
  from .graph import FlowGraph
28
- from .datastore import FlowDataStore, TaskDataStoreSet, TaskDataStore
29
-
30
- from .runtime import NativeRuntime
31
- from .package import MetaflowPackage
32
- from .plugins import (
33
- DATASTORES,
34
- ENVIRONMENTS,
35
- LOGGING_SIDECARS,
36
- METADATA_PROVIDERS,
37
- MONITOR_SIDECARS,
38
- )
39
20
  from .metaflow_config import (
40
21
  DEFAULT_DATASTORE,
22
+ DEFAULT_DECOSPECS,
41
23
  DEFAULT_ENVIRONMENT,
42
24
  DEFAULT_EVENT_LOGGER,
43
25
  DEFAULT_METADATA,
44
26
  DEFAULT_MONITOR,
45
27
  DEFAULT_PACKAGE_SUFFIXES,
46
28
  )
29
+ from .metaflow_current import current
30
+ from .metaflow_profile import from_start
31
+ from metaflow.system import _system_monitor, _system_logger
47
32
  from .metaflow_environment import MetaflowEnvironment
33
+ from .packaging_sys import MetaflowCodeContent
34
+ from .plugins import (
35
+ DATASTORES,
36
+ ENVIRONMENTS,
37
+ LOGGING_SIDECARS,
38
+ METADATA_PROVIDERS,
39
+ MONITOR_SIDECARS,
40
+ )
48
41
  from .pylint_wrapper import PyLint
49
- from .R import use_r, metaflow_r_version
50
- from .mflog import mflog, LOG_SOURCES
51
- from .unbounded_foreach import UBF_CONTROL, UBF_TASK
52
-
42
+ from .R import metaflow_r_version, use_r
43
+ from .util import get_latest_run_id, resolve_identity, decompress_list
44
+ from .user_configs.config_options import LocalFileInput, config_options
45
+ from .user_configs.config_parameters import ConfigValue
53
46
 
54
47
  ERASE_TO_EOL = "\033[K"
55
48
  HIGHLIGHT = "red"
@@ -59,19 +52,21 @@ LOGGER_TIMESTAMP = "magenta"
59
52
  LOGGER_COLOR = "green"
60
53
  LOGGER_BAD_COLOR = "red"
61
54
 
62
- try:
63
- # Python 2
64
- import cPickle as pickle
65
- except ImportError:
66
- # Python 3
67
- import pickle
68
-
69
55
 
70
56
  def echo_dev_null(*args, **kwargs):
71
57
  pass
72
58
 
73
59
 
74
60
  def echo_always(line, **kwargs):
61
+ if kwargs.pop("wrap", False):
62
+ import textwrap
63
+
64
+ indent_str = INDENT if kwargs.get("indent", None) else ""
65
+ effective_width = 80 - len(indent_str)
66
+ wrapped = textwrap.wrap(line, width=effective_width, break_long_words=False)
67
+ line = "\n".join(indent_str + l for l in wrapped)
68
+ kwargs["indent"] = False
69
+
75
70
  kwargs["err"] = kwargs.get("err", True)
76
71
  if kwargs.pop("indent", None):
77
72
  line = "\n".join(INDENT + x for x in line.splitlines())
@@ -124,7 +119,18 @@ def logger(body="", system_msg=False, head="", bad=False, timestamp=True, nl=Tru
124
119
  click.secho(body, bold=system_msg, fg=LOGGER_BAD_COLOR if bad else None, nl=nl)
125
120
 
126
121
 
127
- @click.group()
122
+ @click.group(
123
+ cls=LazyGroup,
124
+ lazy_subcommands={
125
+ "init": "metaflow.cli_components.init_cmd.init",
126
+ "dump": "metaflow.cli_components.dump_cmd.dump",
127
+ "step": "metaflow.cli_components.step_cmd.step",
128
+ "run": "metaflow.cli_components.run_cmds.run",
129
+ "resume": "metaflow.cli_components.run_cmds.resume",
130
+ "spin": "metaflow.cli_components.run_cmds.spin",
131
+ "spin-step": "metaflow.cli_components.step_cmd.spin_step",
132
+ },
133
+ )
128
134
  def cli(ctx):
129
135
  pass
130
136
 
@@ -138,7 +144,13 @@ def cli(ctx):
138
144
  )
139
145
  @click.pass_obj
140
146
  def check(obj, warnings=False):
141
- _check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint, warnings=warnings)
147
+ if obj.is_quiet:
148
+ echo = echo_dev_null
149
+ else:
150
+ echo = echo_always
151
+ _check(
152
+ echo, obj.graph, obj.flow, obj.environment, pylint=obj.pylint, warnings=warnings
153
+ )
142
154
  fname = inspect.getfile(obj.flow.__class__)
143
155
  echo(
144
156
  "\n*'{cmd} show'* shows a description of this flow.\n"
@@ -153,8 +165,14 @@ def check(obj, warnings=False):
153
165
  @click.pass_obj
154
166
  def show(obj):
155
167
  echo_always("\n%s" % obj.graph.doc)
156
- for _, node in sorted((n.func_lineno, n) for n in obj.graph):
157
- echo_always("\nStep *%s*" % node.name, err=False)
168
+ for node_name in obj.graph.sorted_nodes:
169
+ echo_always("")
170
+ node = obj.graph[node_name]
171
+ for deco in node.decorators:
172
+ echo_always("@%s" % deco.name, err=False)
173
+ for deco in node.wrappers:
174
+ echo_always("@%s" % deco.decorator_name, err=False)
175
+ echo_always("Step *%s*" % node.name, err=False)
158
176
  echo_always(node.doc if node.doc else "?", indent=True, err=False)
159
177
  if node.type != "end":
160
178
  echo_always(
@@ -188,15 +206,15 @@ def output_raw(obj, json):
188
206
  else:
189
207
  _graph = str(obj.graph)
190
208
  _msg = "Internal representation of the flow:"
191
- echo(_msg, fg="magenta", bold=False)
209
+ echo_always(_msg, fg="magenta", bold=False)
192
210
  echo_always(_graph, err=False)
193
211
 
194
212
 
195
213
  @cli.command(help="Visualize the flow with Graphviz.")
196
214
  @click.pass_obj
197
215
  def output_dot(obj):
198
- echo("Visualizing the flow as a GraphViz graph", fg="magenta", bold=False)
199
- echo(
216
+ echo_always("Visualizing the flow as a GraphViz graph", fg="magenta", bold=False)
217
+ echo_always(
200
218
  "Try piping the output to 'dot -Tpng -o graph.png' to produce "
201
219
  "an actual image.",
202
220
  indent=True,
@@ -204,712 +222,31 @@ def output_dot(obj):
204
222
  echo_always(obj.graph.output_dot(), err=False)
205
223
 
206
224
 
207
- @cli.command(
208
- help="Get data artifacts of a task or all tasks in a step. "
209
- "The format for input-path is either <run_id>/<step_name> or "
210
- "<run_id>/<step_name>/<task_id>."
211
- )
212
- @click.argument("input-path")
213
- @click.option(
214
- "--private/--no-private",
215
- default=False,
216
- show_default=True,
217
- help="Show also private attributes.",
218
- )
219
- @click.option(
220
- "--max-value-size",
221
- default=1000,
222
- show_default=True,
223
- type=int,
224
- help="Show only values that are smaller than this number. "
225
- "Set to 0 to see only keys.",
226
- )
227
- @click.option(
228
- "--include",
229
- type=str,
230
- default="",
231
- help="Include only artifacts in the given comma-separated list.",
232
- )
233
- @click.option(
234
- "--file", type=str, default=None, help="Serialize artifacts in the given file."
235
- )
236
- @click.pass_obj
237
- def dump(obj, input_path, private=None, max_value_size=None, include=None, file=None):
238
- output = {}
239
- kwargs = {
240
- "show_private": private,
241
- "max_value_size": max_value_size,
242
- "include": {t for t in include.split(",") if t},
243
- }
244
-
245
- # Pathspec can either be run_id/step_name or run_id/step_name/task_id.
246
- parts = input_path.split("/")
247
- if len(parts) == 2:
248
- run_id, step_name = parts
249
- task_id = None
250
- elif len(parts) == 3:
251
- run_id, step_name, task_id = parts
252
- else:
253
- raise CommandException(
254
- "input_path should either be run_id/step_name or run_id/step_name/task_id"
255
- )
256
-
257
- datastore_set = TaskDataStoreSet(
258
- obj.flow_datastore,
259
- run_id,
260
- steps=[step_name],
261
- prefetch_data_artifacts=kwargs.get("include"),
262
- )
263
- if task_id:
264
- ds_list = [datastore_set.get_with_pathspec(input_path)]
265
- else:
266
- ds_list = list(datastore_set) # get all tasks
267
-
268
- for ds in ds_list:
269
- echo(
270
- "Dumping output of run_id=*{run_id}* "
271
- "step=*{step}* task_id=*{task_id}*".format(
272
- run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
273
- ),
274
- fg="magenta",
275
- )
276
-
277
- if file is None:
278
- echo_always(
279
- ds.format(**kwargs), highlight="green", highlight_bold=False, err=False
280
- )
281
- else:
282
- output[ds.pathspec] = ds.to_dict(**kwargs)
283
-
284
- if file is not None:
285
- with open(file, "wb") as f:
286
- pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL)
287
- echo("Artifacts written to *%s*" % file)
288
-
289
-
290
- @cli.command(
291
- help="Show stdout/stderr produced by a task or all tasks in a step. "
292
- "The format for input-path is either <run_id>/<step_name> or "
293
- "<run_id>/<step_name>/<task_id>."
294
- )
295
- @click.argument("input-path")
296
- @click.option(
297
- "--stdout/--no-stdout",
298
- default=False,
299
- show_default=True,
300
- help="Show stdout of the task.",
301
- )
302
- @click.option(
303
- "--stderr/--no-stderr",
304
- default=False,
305
- show_default=True,
306
- help="Show stderr of the task.",
307
- )
308
- @click.option(
309
- "--both/--no-both",
310
- default=True,
311
- show_default=True,
312
- help="Show both stdout and stderr of the task.",
313
- )
314
- @click.option(
315
- "--timestamps/--no-timestamps",
316
- default=False,
317
- show_default=True,
318
- help="Show timestamps.",
319
- )
320
- @click.pass_obj
321
- def logs(obj, input_path, stdout=None, stderr=None, both=None, timestamps=False):
322
- types = set()
323
- if stdout:
324
- types.add("stdout")
325
- both = False
326
- if stderr:
327
- types.add("stderr")
328
- both = False
329
- if both:
330
- types.update(("stdout", "stderr"))
331
-
332
- streams = list(sorted(types, reverse=True))
333
-
334
- # Pathspec can either be run_id/step_name or run_id/step_name/task_id.
335
- parts = input_path.split("/")
336
- if len(parts) == 2:
337
- run_id, step_name = parts
338
- task_id = None
339
- elif len(parts) == 3:
340
- run_id, step_name, task_id = parts
341
- else:
342
- raise CommandException(
343
- "input_path should either be run_id/step_name "
344
- "or run_id/step_name/task_id"
345
- )
346
-
347
- datastore_set = TaskDataStoreSet(
348
- obj.flow_datastore, run_id, steps=[step_name], allow_not_done=True
349
- )
350
- if task_id:
351
- ds_list = [
352
- TaskDataStore(
353
- obj.flow_datastore,
354
- run_id=run_id,
355
- step_name=step_name,
356
- task_id=task_id,
357
- mode="r",
358
- allow_not_done=True,
359
- )
360
- ]
361
- else:
362
- ds_list = list(datastore_set) # get all tasks
363
-
364
- if ds_list:
365
-
366
- def echo_unicode(line, **kwargs):
367
- click.secho(line.decode("UTF-8", errors="replace"), **kwargs)
368
-
369
- # old style logs are non mflog-style logs
370
- maybe_old_style = True
371
- for ds in ds_list:
372
- echo(
373
- "Dumping logs of run_id=*{run_id}* "
374
- "step=*{step}* task_id=*{task_id}*".format(
375
- run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
376
- ),
377
- fg="magenta",
378
- )
379
-
380
- for stream in streams:
381
- echo(stream, bold=True)
382
- logs = ds.load_logs(LOG_SOURCES, stream)
383
- if any(data for _, data in logs):
384
- # attempt to read new, mflog-style logs
385
- for line in mflog.merge_logs([blob for _, blob in logs]):
386
- if timestamps:
387
- ts = mflog.utc_to_local(line.utc_tstamp)
388
- tstamp = ts.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
389
- click.secho(tstamp + " ", fg=LOGGER_TIMESTAMP, nl=False)
390
- echo_unicode(line.msg)
391
- maybe_old_style = False
392
- elif maybe_old_style:
393
- # if they are not available, we may be looking at
394
- # a legacy run (unless we have seen new-style data already
395
- # for another stream). This return an empty string if
396
- # nothing is found
397
- log = ds.load_log_legacy(stream)
398
- if log and timestamps:
399
- raise CommandException(
400
- "We can't show --timestamps for old runs. Sorry!"
401
- )
402
- echo_unicode(log, nl=False)
403
- else:
404
- raise CommandException(
405
- "No Tasks found at the given path -- "
406
- "either none exist or none have started yet"
407
- )
408
-
409
-
410
- # TODO - move step and init under a separate 'internal' subcommand
411
-
412
-
413
- @cli.command(help="Internal command to execute a single task.", hidden=True)
414
- @click.argument("step-name")
415
- @click.option(
416
- "--run-id",
417
- default=None,
418
- required=True,
419
- help="ID for one execution of all steps in the flow.",
420
- )
421
- @click.option(
422
- "--task-id",
423
- default=None,
424
- required=True,
425
- show_default=True,
426
- help="ID for this instance of the step.",
427
- )
428
- @click.option(
429
- "--input-paths",
430
- help="A comma-separated list of pathspecs specifying inputs for this step.",
431
- )
432
- @click.option(
433
- "--input-paths-filename",
434
- type=click.Path(exists=True, readable=True, dir_okay=False, resolve_path=True),
435
- help="A filename containing the argument typically passed to `input-paths`",
436
- hidden=True,
437
- )
438
- @click.option(
439
- "--split-index",
440
- type=int,
441
- default=None,
442
- show_default=True,
443
- help="Index of this foreach split.",
444
- )
445
- @click.option(
446
- "--tag",
447
- "opt_tag",
448
- multiple=True,
449
- default=None,
450
- help="Annotate this run with the given tag. You can specify "
451
- "this option multiple times to attach multiple tags in "
452
- "the task.",
453
- )
454
- @click.option(
455
- "--namespace",
456
- "opt_namespace",
457
- default=None,
458
- help="Change namespace from the default (your username) to the specified tag.",
459
- )
460
- @click.option(
461
- "--retry-count",
462
- default=0,
463
- help="How many times we have attempted to run this task.",
464
- )
465
- @click.option(
466
- "--max-user-code-retries",
467
- default=0,
468
- help="How many times we should attempt running the user code.",
469
- )
470
- @click.option(
471
- "--clone-only",
472
- default=None,
473
- help="Pathspec of the origin task for this task to clone. Do "
474
- "not execute anything.",
475
- )
476
- @click.option(
477
- "--clone-run-id",
478
- default=None,
479
- help="Run id of the origin flow, if this task is part of a flow being resumed.",
480
- )
481
- @click.option(
482
- "--with",
483
- "decospecs",
484
- multiple=True,
485
- help="Add a decorator to this task. You can specify this "
486
- "option multiple times to attach multiple decorators "
487
- "to this task.",
488
- )
489
- @click.option(
490
- "--ubf-context",
491
- default="none",
492
- type=click.Choice(["none", UBF_CONTROL, UBF_TASK]),
493
- help="Provides additional context if this task is of type unbounded foreach.",
494
- )
495
- @click.option(
496
- "--num-parallel",
497
- default=0,
498
- type=int,
499
- help="Number of parallel instances of a step. Ignored in local mode (see parallel decorator code).",
500
- )
501
- @click.pass_context
502
- def step(
503
- ctx,
504
- step_name,
505
- opt_tag=None,
506
- run_id=None,
507
- task_id=None,
508
- input_paths=None,
509
- input_paths_filename=None,
510
- split_index=None,
511
- opt_namespace=None,
512
- retry_count=None,
513
- max_user_code_retries=None,
514
- clone_only=None,
515
- clone_run_id=None,
516
- decospecs=None,
517
- ubf_context="none",
518
- num_parallel=None,
519
- ):
520
- if ubf_context == "none":
521
- ubf_context = None
522
- if opt_namespace is not None:
523
- namespace(opt_namespace or None)
524
-
525
- func = None
526
- try:
527
- func = getattr(ctx.obj.flow, step_name)
528
- except:
529
- raise CommandException("Step *%s* doesn't exist." % step_name)
530
- if not func.is_step:
531
- raise CommandException("Function *%s* is not a step." % step_name)
532
- echo("Executing a step, *%s*" % step_name, fg="magenta", bold=False)
533
-
534
- if decospecs:
535
- decorators._attach_decorators_to_step(func, decospecs)
536
-
537
- step_kwargs = ctx.params
538
- # Remove argument `step_name` from `step_kwargs`.
539
- step_kwargs.pop("step_name", None)
540
- # Remove `opt_*` prefix from (some) option keys.
541
- step_kwargs = dict(
542
- [(k[4:], v) if k.startswith("opt_") else (k, v) for k, v in step_kwargs.items()]
543
- )
544
- cli_args._set_step_kwargs(step_kwargs)
545
-
546
- ctx.obj.metadata.add_sticky_tags(tags=opt_tag)
547
- if not input_paths and input_paths_filename:
548
- with open(input_paths_filename, mode="r", encoding="utf-8") as f:
549
- input_paths = f.read().strip(" \n\"'")
550
-
551
- paths = decompress_list(input_paths) if input_paths else []
552
-
553
- task = MetaflowTask(
554
- ctx.obj.flow,
555
- ctx.obj.flow_datastore,
556
- ctx.obj.metadata,
557
- ctx.obj.environment,
558
- ctx.obj.echo,
559
- ctx.obj.event_logger,
560
- ctx.obj.monitor,
561
- ubf_context,
562
- )
563
- if clone_only:
564
- task.clone_only(
565
- step_name,
566
- run_id,
567
- task_id,
568
- clone_only,
569
- retry_count,
570
- )
571
- else:
572
- task.run_step(
573
- step_name,
574
- run_id,
575
- task_id,
576
- clone_run_id,
577
- paths,
578
- split_index,
579
- retry_count,
580
- max_user_code_retries,
581
- )
582
-
583
- echo("Success", fg="green", bold=True, indent=True)
584
-
585
-
586
- @parameters.add_custom_parameters(deploy_mode=False)
587
- @cli.command(help="Internal command to initialize a run.", hidden=True)
588
- @click.option(
589
- "--run-id",
590
- default=None,
591
- required=True,
592
- help="ID for one execution of all steps in the flow.",
593
- )
594
- @click.option(
595
- "--task-id", default=None, required=True, help="ID for this instance of the step."
596
- )
597
- @click.option(
598
- "--tag",
599
- "tags",
600
- multiple=True,
601
- default=None,
602
- help="Tags for this instance of the step.",
603
- )
604
- @click.pass_obj
605
- def init(obj, run_id=None, task_id=None, tags=None, **kwargs):
606
- # init is a separate command instead of an option in 'step'
607
- # since we need to capture user-specified parameters with
608
- # @add_custom_parameters. Adding custom parameters to 'step'
609
- # is not desirable due to the possibility of name clashes between
610
- # user-specified parameters and our internal options. Note that
611
- # user-specified parameters are often defined as environment
612
- # variables.
613
-
614
- obj.metadata.add_sticky_tags(tags=tags)
615
-
616
- runtime = NativeRuntime(
617
- obj.flow,
618
- obj.graph,
619
- obj.flow_datastore,
620
- obj.metadata,
621
- obj.environment,
622
- obj.package,
623
- obj.logger,
624
- obj.entrypoint,
625
- obj.event_logger,
626
- obj.monitor,
627
- run_id=run_id,
628
- )
629
- obj.flow._set_constants(obj.graph, kwargs)
630
- runtime.persist_constants(task_id=task_id)
631
-
632
-
633
- def common_run_options(func):
634
- @click.option(
635
- "--tag",
636
- "tags",
637
- multiple=True,
638
- default=None,
639
- help="Annotate this run with the given tag. You can specify "
640
- "this option multiple times to attach multiple tags in "
641
- "the run.",
642
- )
643
- @click.option(
644
- "--max-workers",
645
- default=16,
646
- show_default=True,
647
- help="Maximum number of parallel processes.",
648
- )
649
- @click.option(
650
- "--max-num-splits",
651
- default=100,
652
- show_default=True,
653
- help="Maximum number of splits allowed in a foreach. This "
654
- "is a safety check preventing bugs from triggering "
655
- "thousands of steps inadvertently.",
656
- )
657
- @click.option(
658
- "--max-log-size",
659
- default=10,
660
- show_default=True,
661
- help="Maximum size of stdout and stderr captured in "
662
- "megabytes. If a step outputs more than this to "
663
- "stdout/stderr, its output will be truncated.",
664
- )
665
- @click.option(
666
- "--with",
667
- "decospecs",
668
- multiple=True,
669
- help="Add a decorator to all steps. You can specify this "
670
- "option multiple times to attach multiple decorators "
671
- "in steps.",
672
- )
673
- @click.option(
674
- "--run-id-file",
675
- default=None,
676
- show_default=True,
677
- type=str,
678
- help="Write the ID of this run to the file specified.",
679
- )
680
- @wraps(func)
681
- def wrapper(*args, **kwargs):
682
- return func(*args, **kwargs)
683
-
684
- return wrapper
685
-
686
-
687
- @click.option(
688
- "--origin-run-id",
689
- default=None,
690
- help="ID of the run that should be resumed. By default, the "
691
- "last run executed locally.",
692
- )
693
- @click.option(
694
- "--run-id",
695
- default=None,
696
- help="Run ID for the new run. By default, a new run-id will be generated",
697
- hidden=True,
698
- )
699
- @click.option(
700
- "--clone-only/--no-clone-only",
701
- default=False,
702
- show_default=True,
703
- help="Only clone tasks without continuing execution",
704
- hidden=True,
705
- )
706
- @click.option(
707
- "--reentrant/--no-reentrant",
708
- default=False,
709
- show_default=True,
710
- hidden=True,
711
- help="If specified, allows this call to be called in parallel",
712
- )
713
- @click.option(
714
- "--resume-identifier",
715
- default=None,
716
- show_default=True,
717
- hidden=True,
718
- help="If specified, it identifies the task that started this resume call. It is in the form of {step_name}-{task_id}",
719
- )
720
- @click.argument("step-to-rerun", required=False)
721
- @cli.command(help="Resume execution of a previous run of this flow.")
722
- @common_run_options
723
- @click.pass_obj
724
- def resume(
725
- obj,
726
- tags=None,
727
- step_to_rerun=None,
728
- origin_run_id=None,
729
- run_id=None,
730
- clone_only=False,
731
- reentrant=False,
732
- max_workers=None,
733
- max_num_splits=None,
734
- max_log_size=None,
735
- decospecs=None,
736
- run_id_file=None,
737
- resume_identifier=None,
738
- ):
739
- before_run(obj, tags, decospecs + obj.environment.decospecs())
740
-
741
- if origin_run_id is None:
742
- origin_run_id = get_latest_run_id(obj.echo, obj.flow.name)
743
- if origin_run_id is None:
744
- raise CommandException(
745
- "A previous run id was not found. Specify --origin-run-id."
746
- )
747
-
748
- if step_to_rerun is None:
749
- clone_steps = set()
750
- else:
751
- # validate step name
752
- if step_to_rerun not in obj.graph.nodes:
753
- raise CommandException(
754
- "invalid step name {0} specified, must be step present in "
755
- "current form of execution graph. Valid step names include: {1}".format(
756
- step_to_rerun, ",".join(list(obj.graph.nodes.keys()))
757
- )
758
- )
759
- clone_steps = {step_to_rerun}
760
-
761
- if run_id:
762
- # Run-ids that are provided by the metadata service are always integers.
763
- # External providers or run-ids (like external schedulers) always need to
764
- # be non-integers to avoid any clashes. This condition ensures this.
765
- try:
766
- int(run_id)
767
- except:
768
- pass
769
- else:
770
- raise CommandException("run-id %s cannot be an integer" % run_id)
771
-
772
- runtime = NativeRuntime(
773
- obj.flow,
774
- obj.graph,
775
- obj.flow_datastore,
776
- obj.metadata,
777
- obj.environment,
778
- obj.package,
779
- obj.logger,
780
- obj.entrypoint,
781
- obj.event_logger,
782
- obj.monitor,
783
- run_id=run_id,
784
- clone_run_id=origin_run_id,
785
- clone_only=clone_only,
786
- reentrant=reentrant,
787
- clone_steps=clone_steps,
788
- max_workers=max_workers,
789
- max_num_splits=max_num_splits,
790
- max_log_size=max_log_size * 1024 * 1024,
791
- resume_identifier=resume_identifier,
792
- )
793
- write_run_id(run_id_file, runtime.run_id)
794
- runtime.print_workflow_info()
795
- runtime.persist_constants()
796
- if clone_only:
797
- runtime.clone_original_run()
798
- else:
799
- runtime.execute()
800
-
801
-
802
- @tracing.cli_entrypoint("cli/run")
803
- @parameters.add_custom_parameters(deploy_mode=True)
804
- @cli.command(help="Run the workflow locally.")
805
- @common_run_options
806
- @click.option(
807
- "--namespace",
808
- "user_namespace",
809
- default=None,
810
- help="Change namespace from the default (your username) to "
811
- "the specified tag. Note that this option does not alter "
812
- "tags assigned to the objects produced by this run, just "
813
- "what existing objects are visible in the client API. You "
814
- "can enable the global namespace with an empty string."
815
- "--namespace=",
816
- )
817
- @click.pass_obj
818
- def run(
819
- obj,
820
- tags=None,
821
- max_workers=None,
822
- max_num_splits=None,
823
- max_log_size=None,
824
- decospecs=None,
825
- run_id_file=None,
826
- user_namespace=None,
827
- **kwargs
828
- ):
829
- if user_namespace is not None:
830
- namespace(user_namespace or None)
831
- before_run(obj, tags, decospecs + obj.environment.decospecs())
832
-
833
- runtime = NativeRuntime(
834
- obj.flow,
835
- obj.graph,
836
- obj.flow_datastore,
837
- obj.metadata,
838
- obj.environment,
839
- obj.package,
840
- obj.logger,
841
- obj.entrypoint,
842
- obj.event_logger,
843
- obj.monitor,
844
- max_workers=max_workers,
845
- max_num_splits=max_num_splits,
846
- max_log_size=max_log_size * 1024 * 1024,
847
- )
848
- write_latest_run_id(obj, runtime.run_id)
849
- write_run_id(run_id_file, runtime.run_id)
850
-
851
- obj.flow._set_constants(obj.graph, kwargs)
852
- runtime.print_workflow_info()
853
- runtime.persist_constants()
854
- runtime.execute()
855
-
856
-
857
- def write_run_id(run_id_file, run_id):
858
- if run_id_file is not None:
859
- with open(run_id_file, "w") as f:
860
- f.write(str(run_id))
861
-
862
-
863
- def before_run(obj, tags, decospecs):
864
- validate_tags(tags)
865
-
866
- # There's a --with option both at the top-level and for the run
867
- # subcommand. Why?
868
- #
869
- # "run --with shoes" looks so much better than "--with shoes run".
870
- # This is a very common use case of --with.
871
- #
872
- # A downside is that we need to have the following decorators handling
873
- # in two places in this module and make sure _init_step_decorators
874
- # doesn't get called twice.
875
- if decospecs:
876
- decorators._attach_decorators(obj.flow, decospecs)
877
- obj.graph = FlowGraph(obj.flow.__class__)
878
- obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
879
- # obj.environment.init_environment(obj.logger)
880
-
881
- decorators._init_step_decorators(
882
- obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
883
- )
884
-
885
- obj.metadata.add_sticky_tags(tags=tags)
886
-
887
- # Package working directory only once per run.
888
- # We explicitly avoid doing this in `start` since it is invoked for every
889
- # step in the run.
890
- obj.package = MetaflowPackage(
891
- obj.flow, obj.environment, obj.echo, obj.package_suffixes
892
- )
893
-
894
-
895
225
  @cli.command(help="Print the Metaflow version")
896
226
  @click.pass_obj
897
227
  def version(obj):
898
228
  echo_always(obj.version)
899
229
 
900
230
 
901
- @tracing.cli_entrypoint("cli/start")
231
+ # NOTE: add_decorator_options should be TL because it checks to make sure
232
+ # that no option conflict with the ones below
902
233
  @decorators.add_decorator_options
234
+ @config_options
903
235
  @click.command(
904
- cls=click.CommandCollection,
905
- sources=[cli] + plugins.get_plugin_cli(),
236
+ cls=LazyPluginCommandCollection,
237
+ sources=[cli],
238
+ lazy_sources=plugins.get_plugin_cli_path(),
906
239
  invoke_without_command=True,
907
240
  )
241
+ # Quiet is eager to make sure it is available when processing --config options since
242
+ # we need it to construct a context to pass to any DeployTimeField for the default
243
+ # value.
908
244
  @click.option(
909
245
  "--quiet/--not-quiet",
910
246
  show_default=True,
911
247
  default=False,
912
248
  help="Suppress unnecessary messages",
249
+ is_eager=True,
913
250
  )
914
251
  @click.option(
915
252
  "--metadata",
@@ -925,12 +262,22 @@ def version(obj):
925
262
  type=click.Choice(["local"] + [m.TYPE for m in ENVIRONMENTS]),
926
263
  help="Execution environment type",
927
264
  )
265
+ @click.option(
266
+ "--force-rebuild-environments/--no-force-rebuild-environments",
267
+ is_flag=True,
268
+ default=False,
269
+ hidden=True,
270
+ type=bool,
271
+ help="Explicitly rebuild the execution environments",
272
+ )
273
+ # See comment for --quiet
928
274
  @click.option(
929
275
  "--datastore",
930
276
  default=DEFAULT_DATASTORE,
931
277
  show_default=True,
932
278
  type=click.Choice([d.TYPE for d in DATASTORES]),
933
279
  help="Data backend type",
280
+ is_eager=True,
934
281
  )
935
282
  @click.option("--datastore-root", help="Root path for datastore")
936
283
  @click.option(
@@ -966,12 +313,29 @@ def version(obj):
966
313
  type=click.Choice(MONITOR_SIDECARS),
967
314
  help="Monitoring backend type",
968
315
  )
316
+ @click.option(
317
+ "--local-config-file",
318
+ type=LocalFileInput(exists=True, readable=True, dir_okay=False, resolve_path=True),
319
+ required=False,
320
+ default=None,
321
+ help="A filename containing the dumped configuration values. Internal use only.",
322
+ hidden=True,
323
+ is_eager=True,
324
+ )
325
+ @click.option(
326
+ "--mode",
327
+ type=click.Choice(["spin"]),
328
+ default=None,
329
+ help="Execution mode for metaflow CLI commands. Use 'spin' to enable "
330
+ "spin metadata and spin datastore for executions",
331
+ )
969
332
  @click.pass_context
970
333
  def start(
971
334
  ctx,
972
335
  quiet=False,
973
336
  metadata=None,
974
337
  environment=None,
338
+ force_rebuild_environments=False,
975
339
  datastore=None,
976
340
  datastore_root=None,
977
341
  decospecs=None,
@@ -979,9 +343,12 @@ def start(
979
343
  pylint=None,
980
344
  event_logger=None,
981
345
  monitor=None,
346
+ local_config_file=None,
347
+ config=None,
348
+ config_value=None,
349
+ mode=None,
982
350
  **deco_options
983
351
  ):
984
- global echo
985
352
  if quiet:
986
353
  echo = echo_dev_null
987
354
  else:
@@ -992,40 +359,27 @@ def start(
992
359
  if use_r():
993
360
  version = metaflow_r_version()
994
361
 
362
+ from_start("MetaflowCLI: Starting")
995
363
  echo("Metaflow %s" % version, fg="magenta", bold=True, nl=False)
996
364
  echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False)
997
365
  echo(" for *%s*" % resolve_identity(), fg="magenta")
998
366
 
367
+ # Check if we need to setup the distribution finder (if running )
368
+ dist_info = MetaflowCodeContent.get_distribution_finder()
369
+ if dist_info:
370
+ sys.meta_path.append(dist_info)
371
+
372
+ # Setup the context
999
373
  cli_args._set_top_kwargs(ctx.params)
1000
374
  ctx.obj.echo = echo
1001
375
  ctx.obj.echo_always = echo_always
1002
376
  ctx.obj.is_quiet = quiet
1003
- ctx.obj.graph = FlowGraph(ctx.obj.flow.__class__)
1004
377
  ctx.obj.logger = logger
1005
- ctx.obj.check = _check
1006
378
  ctx.obj.pylint = pylint
379
+ ctx.obj.check = functools.partial(_check, echo)
1007
380
  ctx.obj.top_cli = cli
1008
381
  ctx.obj.package_suffixes = package_suffixes.split(",")
1009
- ctx.obj.reconstruct_cli = _reconstruct_cli
1010
-
1011
- ctx.obj.environment = [
1012
- e for e in ENVIRONMENTS + [MetaflowEnvironment] if e.TYPE == environment
1013
- ][0](ctx.obj.flow)
1014
- ctx.obj.environment.validate_environment(echo, datastore)
1015
-
1016
- ctx.obj.event_logger = LOGGING_SIDECARS[event_logger](
1017
- flow=ctx.obj.flow, env=ctx.obj.environment
1018
- )
1019
- ctx.obj.event_logger.start()
1020
-
1021
- ctx.obj.monitor = MONITOR_SIDECARS[monitor](
1022
- flow=ctx.obj.flow, env=ctx.obj.environment
1023
- )
1024
- ctx.obj.monitor.start()
1025
-
1026
- ctx.obj.metadata = [m for m in METADATA_PROVIDERS if m.TYPE == metadata][0](
1027
- ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
1028
- )
382
+ ctx.obj.spin_mode = mode == "spin"
1029
383
 
1030
384
  ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
1031
385
 
@@ -1042,6 +396,106 @@ def start(
1042
396
  ctx.obj.datastore_impl.datastore_root = datastore_root
1043
397
 
1044
398
  FlowDataStore.default_storage_impl = ctx.obj.datastore_impl
399
+
400
+ # At this point, we are able to resolve the user-configuration options so we can
401
+ # process all those decorators that the user added that will modify the flow based
402
+ # on those configurations. It is important to do this as early as possible since it
403
+ # actually modifies the flow itself
404
+
405
+ # When we process the options, the first one processed will return None and the
406
+ # second one processed will return the actual options. The order of processing
407
+ # depends on what (and in what order) the user specifies on the command line.
408
+ config_options = config or config_value
409
+
410
+ if (
411
+ hasattr(ctx, "saved_args")
412
+ and ctx.saved_args
413
+ and ctx.saved_args[0] == "resume"
414
+ and getattr(ctx.obj, "has_config_options", False)
415
+ ):
416
+ # In the case of resume, we actually need to load the configurations
417
+ # from the resumed run to process them. This can be slightly onerous so check
418
+ # if we need to in the first place
419
+ if getattr(ctx.obj, "has_cl_config_options", False):
420
+ raise click.UsageError(
421
+ "Cannot specify --config or --config-value with 'resume'"
422
+ )
423
+ # We now load the config artifacts from the original run id
424
+ run_id = None
425
+ try:
426
+ idx = ctx.saved_args.index("--origin-run-id")
427
+ except ValueError:
428
+ idx = -1
429
+ if idx >= 0:
430
+ run_id = ctx.saved_args[idx + 1]
431
+ else:
432
+ run_id = get_latest_run_id(ctx.obj.echo, ctx.obj.flow.name)
433
+ if run_id is None:
434
+ raise CommandException(
435
+ "A previous run id was not found. Specify --origin-run-id."
436
+ )
437
+ # We get the name of the parameters we need to load from the datastore -- these
438
+ # are accessed using the *variable* name and not necessarily the *parameter* name
439
+ config_var_names = []
440
+ config_param_names = []
441
+ for name, param in ctx.obj.flow._get_parameters():
442
+ if not param.IS_CONFIG_PARAMETER:
443
+ continue
444
+ config_var_names.append(name)
445
+ config_param_names.append(param.name)
446
+
447
+ # We just need a task datastore that will be thrown away -- we do this so
448
+ # we don't have to create the logger, monitor, etc.
449
+ debug.userconf_exec("Loading config parameters from run %s" % run_id)
450
+ for d in TaskDataStoreSet(
451
+ FlowDataStore(ctx.obj.flow.name),
452
+ run_id,
453
+ steps=["_parameters"],
454
+ prefetch_data_artifacts=config_var_names,
455
+ ):
456
+ param_ds = d
457
+
458
+ # We can now set the the CONFIGS value in the flow properly. This will overwrite
459
+ # anything that may have been passed in by default and we will use exactly what
460
+ # the original flow had. Note that these are accessed through the parameter name
461
+ ctx.obj.flow._flow_state[FlowStateItems.CONFIGS].clear()
462
+ d = ctx.obj.flow._flow_state[FlowStateItems.CONFIGS]
463
+ for param_name, var_name in zip(config_param_names, config_var_names):
464
+ val = param_ds[var_name]
465
+ debug.userconf_exec("Loaded config %s as: %s" % (param_name, val))
466
+ d[param_name] = val
467
+
468
+ elif getattr(ctx.obj, "delayed_config_exception", None):
469
+ # If we are not doing a resume, any exception we had parsing configs needs to
470
+ # be raised. For resume, since we ignore those options, we ignore the error.
471
+ raise ctx.obj.delayed_config_exception
472
+
473
+ # Init all values in the flow mutators and then process them
474
+ for decorator in ctx.obj.flow._flow_state[FlowStateItems.FLOW_MUTATORS]:
475
+ decorator.external_init()
476
+
477
+ new_cls = ctx.obj.flow._process_config_decorators(config_options)
478
+ if new_cls:
479
+ ctx.obj.flow = new_cls(use_cli=False)
480
+
481
+ ctx.obj.graph = ctx.obj.flow._graph
482
+
483
+ ctx.obj.environment = [
484
+ e for e in ENVIRONMENTS + [MetaflowEnvironment] if e.TYPE == environment
485
+ ][0](ctx.obj.flow)
486
+ # set force rebuild flag for environments that support it.
487
+ ctx.obj.environment._force_rebuild = force_rebuild_environments
488
+ ctx.obj.environment.validate_environment(ctx.obj.logger, datastore)
489
+ ctx.obj.event_logger = LOGGING_SIDECARS[event_logger](
490
+ flow=ctx.obj.flow, env=ctx.obj.environment
491
+ )
492
+ ctx.obj.monitor = MONITOR_SIDECARS[monitor](
493
+ flow=ctx.obj.flow, env=ctx.obj.environment
494
+ )
495
+ ctx.obj.metadata = [m for m in METADATA_PROVIDERS if m.TYPE == metadata][0](
496
+ ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
497
+ )
498
+
1045
499
  ctx.obj.flow_datastore = FlowDataStore(
1046
500
  ctx.obj.flow.name,
1047
501
  ctx.obj.environment,
@@ -1050,6 +504,61 @@ def start(
1050
504
  ctx.obj.monitor,
1051
505
  )
1052
506
 
507
+ ctx.obj.config_options = config_options
508
+ ctx.obj.is_spin = False
509
+ ctx.obj.skip_decorators = False
510
+
511
+ # Override values for spin steps, or if we are in spin mode
512
+ if (
513
+ hasattr(ctx, "saved_args")
514
+ and ctx.saved_args
515
+ and "spin" in ctx.saved_args[0]
516
+ or ctx.obj.spin_mode
517
+ ):
518
+ # To minimize side effects for spin, we will only use the following:
519
+ # - local metadata provider,
520
+ # - local datastore,
521
+ # - local environment,
522
+ # - null event logger,
523
+ # - null monitor
524
+ ctx.obj.is_spin = True
525
+ if "--skip-decorators" in ctx.saved_args:
526
+ ctx.obj.skip_decorators = True
527
+
528
+ ctx.obj.event_logger = LOGGING_SIDECARS["nullSidecarLogger"](
529
+ flow=ctx.obj.flow, env=ctx.obj.environment
530
+ )
531
+ ctx.obj.monitor = MONITOR_SIDECARS["nullSidecarMonitor"](
532
+ flow=ctx.obj.flow, env=ctx.obj.environment
533
+ )
534
+ # Use spin metadata, spin datastore, and spin datastore root
535
+ ctx.obj.metadata = [m for m in METADATA_PROVIDERS if m.TYPE == "spin"][0](
536
+ ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
537
+ )
538
+ ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == "spin"][0]
539
+ datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
540
+ ctx.obj.echo, create_on_absent=True
541
+ )
542
+ ctx.obj.datastore_impl.datastore_root = datastore_root
543
+
544
+ ctx.obj.flow_datastore = FlowDataStore(
545
+ ctx.obj.flow.name,
546
+ ctx.obj.environment, # Same environment as run/resume
547
+ ctx.obj.metadata, # local metadata
548
+ ctx.obj.event_logger, # null event logger
549
+ ctx.obj.monitor, # null monitor
550
+ storage_impl=ctx.obj.datastore_impl,
551
+ )
552
+
553
+ # Start event logger and monitor
554
+ ctx.obj.event_logger.start()
555
+ _system_logger.init_system_logger(ctx.obj.flow.name, ctx.obj.event_logger)
556
+
557
+ ctx.obj.monitor.start()
558
+ _system_monitor.init_system_monitor(ctx.obj.flow.name, ctx.obj.monitor)
559
+
560
+ decorators._init(ctx.obj.flow)
561
+
1053
562
  # It is important to initialize flow decorators early as some of the
1054
563
  # things they provide may be used by some of the objects initialized after.
1055
564
  decorators._init_flow_decorators(
@@ -1061,49 +570,83 @@ def start(
1061
570
  ctx.obj.logger,
1062
571
  echo,
1063
572
  deco_options,
573
+ ctx.obj.is_spin,
574
+ ctx.obj.skip_decorators,
1064
575
  )
1065
576
 
1066
- if decospecs:
1067
- decorators._attach_decorators(ctx.obj.flow, decospecs)
577
+ # In the case of run/resume/spin, we will want to apply the TL decospecs
578
+ # *after* the run decospecs so that they don't take precedence. In other
579
+ # words, for the same decorator, we want `myflow.py run --with foo` to
580
+ # take precedence over any other `foo` decospec
581
+
582
+ # Note that top-level decospecs are used primarily with non run/resume
583
+ # options as well as with the airflow/argo/sfn integrations which pass
584
+ # all the decospecs (the ones from top-level but also the ones from the
585
+ # run/resume level) through the tl decospecs.
586
+ ctx.obj.tl_decospecs = list(decospecs or [])
1068
587
 
1069
588
  # initialize current and parameter context for deploy-time parameters
1070
589
  current._set_env(flow=ctx.obj.flow, is_running=False)
1071
590
  parameters.set_parameter_context(
1072
- ctx.obj.flow.name, ctx.obj.echo, ctx.obj.flow_datastore
591
+ ctx.obj.flow.name,
592
+ ctx.obj.echo,
593
+ ctx.obj.flow_datastore,
594
+ {
595
+ k: ConfigValue(v) if v is not None else None
596
+ for k, v in ctx.obj.flow.__class__._flow_state[
597
+ FlowStateItems.CONFIGS
598
+ ].items()
599
+ },
1073
600
  )
1074
601
 
1075
- if ctx.invoked_subcommand not in ("run", "resume"):
1076
- # run/resume are special cases because they can add more decorators with --with,
602
+ if (
603
+ hasattr(ctx, "saved_args")
604
+ and ctx.saved_args
605
+ and ctx.saved_args[0] not in ("run", "resume", "spin")
606
+ ):
607
+ # run/resume/spin are special cases because they can add more decorators with --with,
1077
608
  # so they have to take care of themselves.
1078
- decorators._attach_decorators(ctx.obj.flow, ctx.obj.environment.decospecs())
609
+ all_decospecs = ctx.obj.tl_decospecs + list(
610
+ ctx.obj.environment.decospecs() or []
611
+ )
612
+
613
+ # We add the default decospecs for everything except init and step since in those
614
+ # cases, the decospecs will already have been handled by either a run/resume
615
+ # or a scheduler setting them up in their own way.
616
+ if ctx.saved_args[0] not in ("step", "init"):
617
+ all_decospecs += DEFAULT_DECOSPECS.split()
618
+ elif ctx.saved_args[0] == "spin-step":
619
+ # If we are in spin-args, we will not attach any decorators
620
+ all_decospecs = []
621
+ if all_decospecs:
622
+ decorators._attach_decorators(ctx.obj.flow, all_decospecs)
623
+ decorators._init(ctx.obj.flow)
624
+ # Regenerate graph if we attached more decorators
625
+ ctx.obj.flow.__class__._init_graph()
626
+ ctx.obj.graph = ctx.obj.flow._graph
627
+
1079
628
  decorators._init_step_decorators(
1080
629
  ctx.obj.flow,
1081
630
  ctx.obj.graph,
1082
631
  ctx.obj.environment,
1083
632
  ctx.obj.flow_datastore,
1084
633
  ctx.obj.logger,
634
+ # The last two arguments are only used for spin steps
635
+ ctx.obj.is_spin,
636
+ ctx.obj.skip_decorators,
1085
637
  )
638
+
639
+ # Check the graph again (mutators may have changed it)
640
+ ctx.obj.graph = ctx.obj.flow._graph
641
+
1086
642
  # TODO (savin): Enable lazy instantiation of package
1087
643
  ctx.obj.package = None
644
+
1088
645
  if ctx.invoked_subcommand is None:
1089
646
  ctx.invoke(check)
1090
647
 
1091
648
 
1092
- def _reconstruct_cli(params):
1093
- for k, v in params.items():
1094
- if v:
1095
- if k == "decospecs":
1096
- k = "with"
1097
- k = k.replace("_", "-")
1098
- if not isinstance(v, tuple):
1099
- v = [v]
1100
- for value in v:
1101
- yield "--%s" % k
1102
- if not isinstance(value, bool):
1103
- yield str(value)
1104
-
1105
-
1106
- def _check(graph, flow, environment, pylint=True, warnings=False, **kwargs):
649
+ def _check(echo, graph, flow, environment, pylint=True, warnings=False, **kwargs):
1107
650
  echo("Validating your flow...", fg="magenta", bold=False)
1108
651
  linter = lint.linter
1109
652
  # TODO set linter settings
@@ -1142,10 +685,13 @@ def _check(graph, flow, environment, pylint=True, warnings=False, **kwargs):
1142
685
 
1143
686
  def print_metaflow_exception(ex):
1144
687
  echo_always(ex.headline, indent=True, nl=False, bold=True)
1145
- if ex.line_no is None:
1146
- echo_always(":")
1147
- else:
1148
- echo_always(" on line %d:" % ex.line_no, bold=True)
688
+ location = ""
689
+ if ex.source_file is not None:
690
+ location += " in file %s" % ex.source_file
691
+ if ex.line_no is not None:
692
+ location += " on line %d" % ex.line_no
693
+ location += ":"
694
+ echo_always(location, bold=True)
1149
695
  echo_always(ex.message, indent=True, bold=False, padding_bottom=True)
1150
696
 
1151
697