metaflow 2.15.21__py2.py3-none-any.whl → 2.16.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +7 -1
- metaflow/cli.py +16 -1
- metaflow/cli_components/init_cmd.py +1 -0
- metaflow/cli_components/run_cmds.py +6 -2
- metaflow/client/core.py +22 -30
- metaflow/datastore/task_datastore.py +0 -1
- metaflow/debug.py +5 -0
- metaflow/decorators.py +230 -70
- metaflow/extension_support/__init__.py +15 -8
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/flowspec.py +80 -53
- metaflow/graph.py +24 -2
- metaflow/meta_files.py +13 -0
- metaflow/metadata_provider/metadata.py +7 -1
- metaflow/metaflow_config.py +5 -0
- metaflow/metaflow_environment.py +82 -25
- metaflow/metaflow_version.py +1 -1
- metaflow/package/__init__.py +664 -0
- metaflow/packaging_sys/__init__.py +870 -0
- metaflow/packaging_sys/backend.py +113 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +86 -0
- metaflow/packaging_sys/utils.py +91 -0
- metaflow/packaging_sys/v1.py +476 -0
- metaflow/plugins/airflow/airflow.py +5 -1
- metaflow/plugins/airflow/airflow_cli.py +15 -4
- metaflow/plugins/argo/argo_workflows.py +15 -4
- metaflow/plugins/argo/argo_workflows_cli.py +16 -4
- metaflow/plugins/aws/batch/batch.py +22 -3
- metaflow/plugins/aws/batch/batch_cli.py +3 -0
- metaflow/plugins/aws/batch/batch_decorator.py +13 -5
- metaflow/plugins/aws/step_functions/step_functions.py +4 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +15 -4
- metaflow/plugins/cards/card_decorator.py +0 -5
- metaflow/plugins/kubernetes/kubernetes.py +8 -1
- metaflow/plugins/kubernetes/kubernetes_cli.py +3 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +13 -5
- metaflow/plugins/package_cli.py +25 -23
- metaflow/plugins/parallel_decorator.py +4 -2
- metaflow/plugins/pypi/bootstrap.py +8 -2
- metaflow/plugins/pypi/conda_decorator.py +39 -82
- metaflow/plugins/pypi/conda_environment.py +6 -2
- metaflow/plugins/pypi/pypi_decorator.py +4 -4
- metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
- metaflow/plugins/timeout_decorator.py +0 -1
- metaflow/plugins/uv/bootstrap.py +11 -0
- metaflow/plugins/uv/uv_environment.py +4 -2
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/click_api.py +5 -4
- metaflow/runner/subprocess_manager.py +14 -2
- metaflow/runtime.py +37 -11
- metaflow/task.py +91 -7
- metaflow/user_configs/config_options.py +13 -8
- metaflow/user_configs/config_parameters.py +0 -4
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +499 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +263 -0
- metaflow/user_decorators/user_step_decorator.py +712 -0
- metaflow/util.py +4 -1
- metaflow/version.py +1 -1
- {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/METADATA +2 -2
- {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/RECORD +71 -60
- metaflow/info_file.py +0 -25
- metaflow/package.py +0 -203
- metaflow/user_configs/config_decorators.py +0 -568
- {metaflow-2.15.21.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Makefile +0 -0
- {metaflow-2.15.21.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {metaflow-2.15.21.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/WHEEL +0 -0
- {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/entry_points.txt +0 -0
- {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/licenses/LICENSE +0 -0
- {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,476 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
import sys
|
4
|
+
from pathlib import Path
|
5
|
+
from types import ModuleType
|
6
|
+
from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Union
|
7
|
+
|
8
|
+
from ..debug import debug
|
9
|
+
from ..extension_support import (
|
10
|
+
EXT_EXCLUDE_SUFFIXES,
|
11
|
+
extension_info,
|
12
|
+
package_mfext_all,
|
13
|
+
package_mfext_all_descriptions,
|
14
|
+
)
|
15
|
+
from ..exception import MetaflowException
|
16
|
+
from ..metaflow_version import get_version
|
17
|
+
from ..user_decorators.user_flow_decorator import FlowMutatorMeta
|
18
|
+
from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
|
19
|
+
from ..util import get_metaflow_root
|
20
|
+
from . import ContentType, MFCONTENT_MARKER, MetaflowCodeContentV1Base
|
21
|
+
from .distribution_support import _ModuleInfo, modules_to_distributions
|
22
|
+
from .utils import suffix_filter, walk
|
23
|
+
|
24
|
+
|
25
|
+
class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
26
|
+
METAFLOW_SUFFIXES_LIST = [".py", ".html", ".css", ".js"]
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
code_dir: str = MetaflowCodeContentV1Base._code_dir,
|
31
|
+
other_dir: str = MetaflowCodeContentV1Base._other_dir,
|
32
|
+
criteria: Callable[[ModuleType], bool] = lambda x: True,
|
33
|
+
):
|
34
|
+
super().__init__(code_dir, other_dir)
|
35
|
+
|
36
|
+
self._metaflow_root = get_metaflow_root()
|
37
|
+
self._metaflow_version = get_version()
|
38
|
+
|
39
|
+
self._criteria = criteria
|
40
|
+
|
41
|
+
# We try to find the modules we need to package. We will first look at all modules
|
42
|
+
# and apply the criteria to them. Then we will use the most parent module that
|
43
|
+
# fits the criteria as the module to package
|
44
|
+
modules = filter(lambda x: criteria(x[1]), sys.modules.items())
|
45
|
+
# Ensure that we see the parent modules first
|
46
|
+
modules = sorted(modules, key=lambda x: x[0])
|
47
|
+
if modules:
|
48
|
+
last_prefix = modules[0][0]
|
49
|
+
new_modules = [modules[0]]
|
50
|
+
for name, mod in modules[1:]:
|
51
|
+
if name.startswith(last_prefix + "."):
|
52
|
+
# This is a submodule of the last module, we can skip it
|
53
|
+
continue
|
54
|
+
# Otherwise, we have a new top-level module
|
55
|
+
last_prefix = name
|
56
|
+
new_modules.append((name, mod))
|
57
|
+
else:
|
58
|
+
new_modules = []
|
59
|
+
|
60
|
+
self._modules = {
|
61
|
+
name: _ModuleInfo(
|
62
|
+
name,
|
63
|
+
set(
|
64
|
+
Path(p).resolve().as_posix()
|
65
|
+
for p in getattr(mod, "__path__", [mod.__file__])
|
66
|
+
),
|
67
|
+
mod,
|
68
|
+
True, # This is a Metaflow module (see filter below)
|
69
|
+
)
|
70
|
+
for (name, mod) in new_modules
|
71
|
+
}
|
72
|
+
|
73
|
+
# Filter the modules
|
74
|
+
self._modules = {
|
75
|
+
name: info for name, info in self._modules.items() if criteria(info.module)
|
76
|
+
}
|
77
|
+
|
78
|
+
# Contain metadata information regarding the distributions packaged.
|
79
|
+
# This allows Metaflow to "fake" distribution information when packaged
|
80
|
+
self._distmetainfo = {} # type: Dict[str, Dict[str, str]]
|
81
|
+
|
82
|
+
# Maps an absolute path on the filesystem to the path of the file in the
|
83
|
+
# archive.
|
84
|
+
self._files = {} # type: Dict[str, str]
|
85
|
+
self._files_from_modules = {} # type: Dict[str, str]
|
86
|
+
|
87
|
+
self._other_files = {} # type: Dict[str, str]
|
88
|
+
self._other_content = {} # type: Dict[str, bytes]
|
89
|
+
|
90
|
+
debug.package_exec(f"Used system modules found: {str(self._modules)}")
|
91
|
+
|
92
|
+
# Populate with files from the third party modules
|
93
|
+
for k, v in self._modules.items():
|
94
|
+
self._files_from_modules.update(self._module_files(k, v.root_paths))
|
95
|
+
|
96
|
+
# Figure out the files to package for Metaflow and extensions
|
97
|
+
self._cached_metaflow_files = list(self._metaflow_distribution_files())
|
98
|
+
self._cached_metaflow_files.extend(list(self._metaflow_extension_files()))
|
99
|
+
|
100
|
+
def create_mfcontent_info(self) -> Dict[str, Any]:
|
101
|
+
return {"version": 1, "module_files": list(self._files_from_modules.values())}
|
102
|
+
|
103
|
+
def get_excluded_tl_entries(self) -> List[str]:
|
104
|
+
"""
|
105
|
+
When packaging Metaflow from within an executing Metaflow flow, we need to
|
106
|
+
exclude the files that are inserted by this content from being packaged (possibly).
|
107
|
+
|
108
|
+
Use this function to return these files or top-level directories.
|
109
|
+
|
110
|
+
Returns
|
111
|
+
-------
|
112
|
+
List[str]
|
113
|
+
Files or directories to exclude
|
114
|
+
"""
|
115
|
+
return [self._code_dir, self._other_dir]
|
116
|
+
|
117
|
+
def content_names(
|
118
|
+
self, content_types: Optional[int] = None
|
119
|
+
) -> Generator[Tuple[str, str], None, None]:
|
120
|
+
"""
|
121
|
+
Detailed list of the content of this MetaflowCodeContent. This will list all files
|
122
|
+
(or non files -- for the INFO or CONFIG data for example) present in the archive.
|
123
|
+
|
124
|
+
Parameters
|
125
|
+
----------
|
126
|
+
content_types : Optional[int]
|
127
|
+
The type of content to get the names of. If None, all content is returned.
|
128
|
+
|
129
|
+
Yields
|
130
|
+
------
|
131
|
+
Generator[Tuple[str, str], None, None]
|
132
|
+
Path on the filesystem and the name in the archive
|
133
|
+
"""
|
134
|
+
yield from self._content(content_types, generate_value=False)
|
135
|
+
|
136
|
+
def contents(
|
137
|
+
self, content_types: Optional[int] = None
|
138
|
+
) -> Generator[Tuple[Union[bytes, str], str], None, None]:
|
139
|
+
"""
|
140
|
+
Very similar to content_names but returns the content of the non-files
|
141
|
+
as well as bytes. For files, identical output as content_names
|
142
|
+
|
143
|
+
Parameters
|
144
|
+
----------
|
145
|
+
content_types : Optional[int]
|
146
|
+
The type of content to get the content of. If None, all content is returned.
|
147
|
+
|
148
|
+
Yields
|
149
|
+
------
|
150
|
+
Generator[Tuple[Union[str, bytes], str], None, None]
|
151
|
+
Content of the MF content
|
152
|
+
"""
|
153
|
+
yield from self._content(content_types, generate_value=True)
|
154
|
+
|
155
|
+
def show(self) -> str:
|
156
|
+
"""
|
157
|
+
Returns a more human-readable string representation of the content of this
|
158
|
+
MetaflowCodeContent. This will not, for example, list all files but summarize what
|
159
|
+
is included at a more high level.
|
160
|
+
|
161
|
+
Returns
|
162
|
+
-------
|
163
|
+
str
|
164
|
+
A human-readable string representation of the content of this MetaflowCodeContent
|
165
|
+
"""
|
166
|
+
all_user_step_decorators = {}
|
167
|
+
for k, v in UserStepDecoratorMeta.all_decorators().items():
|
168
|
+
all_user_step_decorators.setdefault(
|
169
|
+
getattr(v, "_original_module", v.__module__), []
|
170
|
+
).append(k)
|
171
|
+
|
172
|
+
all_user_flow_decorators = {}
|
173
|
+
for k, v in FlowMutatorMeta.all_decorators().items():
|
174
|
+
all_user_flow_decorators.setdefault(
|
175
|
+
getattr(v, "_original_module", v.__module__), []
|
176
|
+
).append(k)
|
177
|
+
|
178
|
+
result = []
|
179
|
+
if self._metaflow_version:
|
180
|
+
result.append(f"\nMetaflow version: {self._metaflow_version}")
|
181
|
+
ext_info = extension_info()
|
182
|
+
if ext_info["installed"]:
|
183
|
+
result.append("\nMetaflow extensions packaged:")
|
184
|
+
for ext_name, ext_info in ext_info["installed"].items():
|
185
|
+
result.append(
|
186
|
+
f" - {ext_name} ({ext_info['extension_name']}) @ {ext_info['dist_version']}"
|
187
|
+
)
|
188
|
+
|
189
|
+
if self._modules:
|
190
|
+
mf_modules = []
|
191
|
+
other_modules = []
|
192
|
+
for name, info in self._modules.items():
|
193
|
+
if info.metaflow_module:
|
194
|
+
mf_modules.append(f" - {name} @ {', '.join(info.root_paths)}")
|
195
|
+
module_user_step_decorators = [
|
196
|
+
", ".join(v)
|
197
|
+
for k, v in all_user_step_decorators.items()
|
198
|
+
if k == info.name or k.startswith(info.name + ".")
|
199
|
+
]
|
200
|
+
module_user_flow_decorators = [
|
201
|
+
", ".join(v)
|
202
|
+
for k, v in all_user_flow_decorators.items()
|
203
|
+
if k == info.name or k.startswith(info.name + ".")
|
204
|
+
]
|
205
|
+
if module_user_step_decorators:
|
206
|
+
mf_modules.append(
|
207
|
+
f" - Provides step decorators: {', '.join(module_user_step_decorators)}"
|
208
|
+
)
|
209
|
+
if module_user_flow_decorators:
|
210
|
+
mf_modules.append(
|
211
|
+
f" - Provides flow mutators: {', '.join(module_user_flow_decorators)}"
|
212
|
+
)
|
213
|
+
else:
|
214
|
+
other_modules.append(f" - {name} @ {', '.join(info.root_paths)}")
|
215
|
+
if mf_modules:
|
216
|
+
result.append("\nMetaflow modules:")
|
217
|
+
result.extend(mf_modules)
|
218
|
+
if other_modules:
|
219
|
+
result.append("\nNon-Metaflow packaged modules:")
|
220
|
+
result.extend(other_modules)
|
221
|
+
|
222
|
+
return "\n".join(result)
|
223
|
+
|
224
|
+
def add_info(self, info: Dict[str, Any]) -> None:
|
225
|
+
"""
|
226
|
+
Add the content of the INFO file to the Metaflow content
|
227
|
+
|
228
|
+
Parameters
|
229
|
+
----------
|
230
|
+
info: Dict[str, Any]
|
231
|
+
The content of the INFO file
|
232
|
+
"""
|
233
|
+
info_file_path = os.path.join(self._other_dir, self._info_file)
|
234
|
+
if info_file_path in self._other_content:
|
235
|
+
raise MetaflowException("INFO file already present in the MF environment")
|
236
|
+
self._other_content[info_file_path] = json.dumps(info).encode("utf-8")
|
237
|
+
|
238
|
+
def add_config(self, config: Dict[str, Any]) -> None:
|
239
|
+
"""
|
240
|
+
Add the content of the CONFIG file to the Metaflow content
|
241
|
+
|
242
|
+
Parameters
|
243
|
+
----------
|
244
|
+
config: Dict[str, Any]
|
245
|
+
The content of the CONFIG file
|
246
|
+
"""
|
247
|
+
config_file_path = os.path.join(self._other_dir, self._config_file)
|
248
|
+
if config_file_path in self._other_content:
|
249
|
+
raise MetaflowException("CONFIG file already present in the MF environment")
|
250
|
+
self._other_content[config_file_path] = json.dumps(config).encode("utf-8")
|
251
|
+
|
252
|
+
def add_module(self, module: ModuleType) -> None:
|
253
|
+
"""
|
254
|
+
Add a python module to the Metaflow content
|
255
|
+
|
256
|
+
Parameters
|
257
|
+
----------
|
258
|
+
module_path: ModuleType
|
259
|
+
The module to add
|
260
|
+
"""
|
261
|
+
name = module.__name__
|
262
|
+
debug.package_exec(f"Adding module {name} to the MF content")
|
263
|
+
# If the module is a single file, we handle this here by looking at __file__
|
264
|
+
# which will point to the single file. If it is an actual module, __path__
|
265
|
+
# will contain the path(s) to the module
|
266
|
+
self._modules[name] = _ModuleInfo(
|
267
|
+
name,
|
268
|
+
set(
|
269
|
+
Path(p).resolve().as_posix()
|
270
|
+
for p in getattr(module, "__path__", [module.__file__])
|
271
|
+
),
|
272
|
+
module,
|
273
|
+
False, # This is not a Metaflow module (added by the user manually)
|
274
|
+
)
|
275
|
+
self._files_from_modules.update(
|
276
|
+
self._module_files(name, self._modules[name].root_paths)
|
277
|
+
)
|
278
|
+
|
279
|
+
def add_code_file(self, file_path: str, file_name: str) -> None:
|
280
|
+
"""
|
281
|
+
Add a code file to the Metaflow content
|
282
|
+
|
283
|
+
Parameters
|
284
|
+
----------
|
285
|
+
file_path: str
|
286
|
+
The path to the code file to add (on the filesystem)
|
287
|
+
file_name: str
|
288
|
+
The path in the archive to add the code file to
|
289
|
+
"""
|
290
|
+
file_path = os.path.realpath(file_path)
|
291
|
+
debug.package_exec(
|
292
|
+
f"Adding code file {file_path} as {file_name} to the MF content"
|
293
|
+
)
|
294
|
+
|
295
|
+
if file_path in self._files and self._files[file_path] != os.path.join(
|
296
|
+
self._code_dir, file_name.lstrip("/")
|
297
|
+
):
|
298
|
+
raise MetaflowException(
|
299
|
+
"File '%s' is already present in the MF content with a different name: '%s'"
|
300
|
+
% (file_path, self._files[file_path])
|
301
|
+
)
|
302
|
+
self._files[file_path] = os.path.join(self._code_dir, file_name.lstrip("/"))
|
303
|
+
|
304
|
+
def add_other_file(self, file_path: str, file_name: str) -> None:
|
305
|
+
"""
|
306
|
+
Add a non-python file to the Metaflow content
|
307
|
+
|
308
|
+
Parameters
|
309
|
+
----------
|
310
|
+
file_path: str
|
311
|
+
The path to the file to add (on the filesystem)
|
312
|
+
file_name: str
|
313
|
+
The path in the archive to add the file to
|
314
|
+
"""
|
315
|
+
file_path = os.path.realpath(file_path)
|
316
|
+
debug.package_exec(
|
317
|
+
f"Adding other file {file_path} as {file_name} to the MF content"
|
318
|
+
)
|
319
|
+
if file_path in self._other_files and self._other_files[
|
320
|
+
file_path
|
321
|
+
] != os.path.join(self._other_dir, file_name.lstrip("/")):
|
322
|
+
raise MetaflowException(
|
323
|
+
"File %s is already present in the MF content with a different name: %s"
|
324
|
+
% (file_path, self._other_files[file_path])
|
325
|
+
)
|
326
|
+
self._other_files[file_path] = os.path.join(
|
327
|
+
self._other_dir, file_name.lstrip("/")
|
328
|
+
)
|
329
|
+
|
330
|
+
def _content(
|
331
|
+
self, content_types: Optional[int] = None, generate_value: bool = False
|
332
|
+
) -> Generator[Tuple[Union[str, bytes], str], None, None]:
|
333
|
+
from ..package import MetaflowPackage # Prevent circular dependency
|
334
|
+
|
335
|
+
if content_types is None:
|
336
|
+
content_types = ContentType.ALL_CONTENT.value
|
337
|
+
|
338
|
+
if content_types & ContentType.CODE_CONTENT.value:
|
339
|
+
yield from self._cached_metaflow_files
|
340
|
+
yield from self._files.items()
|
341
|
+
if content_types & ContentType.MODULE_CONTENT.value:
|
342
|
+
yield from self._files_from_modules.items()
|
343
|
+
if content_types & ContentType.OTHER_CONTENT.value:
|
344
|
+
yield from self._other_files.items()
|
345
|
+
if generate_value:
|
346
|
+
for k, v in self._other_content.items():
|
347
|
+
yield v, k
|
348
|
+
# Include the distribution file too
|
349
|
+
yield json.dumps(self._distmetainfo).encode("utf-8"), os.path.join(
|
350
|
+
self._other_dir, self._dist_info_file
|
351
|
+
)
|
352
|
+
yield json.dumps(self.create_mfcontent_info()).encode(
|
353
|
+
"utf-8"
|
354
|
+
), os.path.join(self._code_dir, MFCONTENT_MARKER)
|
355
|
+
else:
|
356
|
+
for k in self._other_content.keys():
|
357
|
+
yield "<generated %s content>" % (os.path.basename(k)), k
|
358
|
+
yield "<generated %s content>" % (
|
359
|
+
os.path.basename(self._dist_info_file)
|
360
|
+
), os.path.join(self._other_dir, self._dist_info_file)
|
361
|
+
yield "<generated %s content>" % MFCONTENT_MARKER, os.path.join(
|
362
|
+
self._code_dir, MFCONTENT_MARKER
|
363
|
+
)
|
364
|
+
|
365
|
+
def _metaflow_distribution_files(self) -> Generator[Tuple[str, str], None, None]:
|
366
|
+
debug.package_exec("Including Metaflow from '%s'" % self._metaflow_root)
|
367
|
+
for path_tuple in walk(
|
368
|
+
os.path.join(self._metaflow_root, "metaflow"),
|
369
|
+
exclude_hidden=False,
|
370
|
+
file_filter=suffix_filter(self.METAFLOW_SUFFIXES_LIST),
|
371
|
+
):
|
372
|
+
yield path_tuple[0], os.path.join(self._code_dir, path_tuple[1])
|
373
|
+
|
374
|
+
def _metaflow_extension_files(self) -> Generator[Tuple[str, str], None, None]:
|
375
|
+
# Metaflow extensions; for now, we package *all* extensions but this may change
|
376
|
+
# at a later date; it is possible to call `package_mfext_package` instead of
|
377
|
+
# `package_mfext_all` but in that case, make sure to also add a
|
378
|
+
# metaflow_extensions/__init__.py file to properly "close" the metaflow_extensions
|
379
|
+
# package and prevent other extensions from being loaded that may be
|
380
|
+
# present in the rest of the system
|
381
|
+
for path_tuple in package_mfext_all():
|
382
|
+
yield path_tuple[0], os.path.join(self._code_dir, path_tuple[1])
|
383
|
+
if debug.package:
|
384
|
+
ext_info = package_mfext_all_descriptions()
|
385
|
+
ext_info = {
|
386
|
+
k: {k1: v1 for k1, v1 in v.items() if k1 in ("root_paths",)}
|
387
|
+
for k, v in ext_info.items()
|
388
|
+
}
|
389
|
+
debug.package_exec(f"Metaflow extensions packaged: {ext_info}")
|
390
|
+
|
391
|
+
def _module_files(
|
392
|
+
self, name: str, paths: Set[str]
|
393
|
+
) -> Generator[Tuple[str, str], None, None]:
|
394
|
+
debug.package_exec(
|
395
|
+
" Looking for distributions for module %s in %s" % (name, paths)
|
396
|
+
)
|
397
|
+
paths = set(paths) # Do not modify external paths
|
398
|
+
has_init = False
|
399
|
+
distributions = modules_to_distributions().get(name)
|
400
|
+
prefix_parts = tuple(name.split("."))
|
401
|
+
|
402
|
+
seen_distributions = set()
|
403
|
+
if distributions:
|
404
|
+
for dist in distributions:
|
405
|
+
dist_name = dist.metadata["Name"] # dist.name not always present
|
406
|
+
if dist_name in seen_distributions:
|
407
|
+
continue
|
408
|
+
# For some reason, sometimes the same distribution appears twice. We
|
409
|
+
# don't need to process twice.
|
410
|
+
seen_distributions.add(dist_name)
|
411
|
+
debug.package_exec(
|
412
|
+
" Including distribution '%s' for module '%s'"
|
413
|
+
% (dist_name, name)
|
414
|
+
)
|
415
|
+
dist_root = str(dist.locate_file(name))
|
416
|
+
if dist_root not in paths:
|
417
|
+
# This is an error because it means that this distribution is
|
418
|
+
# not contributing to the module.
|
419
|
+
raise RuntimeError(
|
420
|
+
"Distribution '%s' is not contributing to module '%s' as "
|
421
|
+
"expected (got '%s' when expected one of %s)"
|
422
|
+
% (dist.metadata["Name"], name, dist_root, paths)
|
423
|
+
)
|
424
|
+
paths.discard(dist_root)
|
425
|
+
if dist_name not in self._distmetainfo:
|
426
|
+
# Possible that a distribution contributes to multiple modules
|
427
|
+
self._distmetainfo[dist_name] = {
|
428
|
+
# We can add more if needed but these are likely the most
|
429
|
+
# useful (captures, name, version, etc and files which can
|
430
|
+
# be used to find non-python files in the distribution).
|
431
|
+
"METADATA": dist.read_text("METADATA") or "",
|
432
|
+
"RECORD": dist.read_text("RECORD") or "",
|
433
|
+
}
|
434
|
+
for file in dist.files or []:
|
435
|
+
# Skip files that do not belong to this module (distribution may
|
436
|
+
# provide multiple modules)
|
437
|
+
if file.parts[: len(prefix_parts)] != prefix_parts:
|
438
|
+
continue
|
439
|
+
if file.parts[len(prefix_parts)] == "__init__.py":
|
440
|
+
has_init = True
|
441
|
+
yield str(
|
442
|
+
dist.locate_file(file).resolve().as_posix()
|
443
|
+
), os.path.join(self._code_dir, *prefix_parts, *file.parts[1:])
|
444
|
+
|
445
|
+
# Now if there are more paths left in paths, it means there is a non-distribution
|
446
|
+
# component to this package which we also include.
|
447
|
+
debug.package_exec(
|
448
|
+
" Looking for non-distribution files for module '%s' in %s"
|
449
|
+
% (name, paths)
|
450
|
+
)
|
451
|
+
for path in paths:
|
452
|
+
if not Path(path).is_dir():
|
453
|
+
# Single file for the module -- this will be something like <name>.py
|
454
|
+
yield path, os.path.join(
|
455
|
+
self._code_dir, *prefix_parts[:-1], f"{prefix_parts[-1]}.py"
|
456
|
+
)
|
457
|
+
has_init = True
|
458
|
+
else:
|
459
|
+
for root, _, files in os.walk(path):
|
460
|
+
for file in files:
|
461
|
+
if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES):
|
462
|
+
continue
|
463
|
+
rel_path = os.path.relpath(os.path.join(root, file), path)
|
464
|
+
if rel_path == "__init__.py":
|
465
|
+
has_init = True
|
466
|
+
yield os.path.join(root, file), os.path.join(
|
467
|
+
self._code_dir,
|
468
|
+
name,
|
469
|
+
rel_path,
|
470
|
+
)
|
471
|
+
# We now include an empty __init__.py file to close the module and prevent
|
472
|
+
# leaks from possible namespace packages
|
473
|
+
if not has_init:
|
474
|
+
yield os.path.join(
|
475
|
+
self._metaflow_root, "metaflow", "extension_support", "_empty_file.py"
|
476
|
+
), os.path.join(self._code_dir, *prefix_parts, "__init__.py")
|
@@ -66,6 +66,7 @@ class Airflow(object):
|
|
66
66
|
name,
|
67
67
|
graph,
|
68
68
|
flow,
|
69
|
+
code_package_metadata,
|
69
70
|
code_package_sha,
|
70
71
|
code_package_url,
|
71
72
|
metadata,
|
@@ -87,6 +88,7 @@ class Airflow(object):
|
|
87
88
|
self.name = name
|
88
89
|
self.graph = graph
|
89
90
|
self.flow = flow
|
91
|
+
self.code_package_metadata = code_package_metadata
|
90
92
|
self.code_package_sha = code_package_sha
|
91
93
|
self.code_package_url = code_package_url
|
92
94
|
self.metadata = metadata
|
@@ -372,6 +374,7 @@ class Airflow(object):
|
|
372
374
|
# Technically the "user" is the stakeholder but should these labels be present.
|
373
375
|
}
|
374
376
|
additional_mf_variables = {
|
377
|
+
"METAFLOW_CODE_METADATA": self.code_package_metadata,
|
375
378
|
"METAFLOW_CODE_SHA": self.code_package_sha,
|
376
379
|
"METAFLOW_CODE_URL": self.code_package_url,
|
377
380
|
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
|
@@ -476,6 +479,7 @@ class Airflow(object):
|
|
476
479
|
node.name,
|
477
480
|
AIRFLOW_MACROS.create_task_id(self.contains_foreach),
|
478
481
|
AIRFLOW_MACROS.ATTEMPT,
|
482
|
+
code_package_metadata=self.code_package_metadata,
|
479
483
|
code_package_url=self.code_package_url,
|
480
484
|
step_cmds=self._step_cli(
|
481
485
|
node, input_paths, self.code_package_url, user_code_retries
|
@@ -534,7 +538,7 @@ class Airflow(object):
|
|
534
538
|
"with": [
|
535
539
|
decorator.make_decorator_spec()
|
536
540
|
for decorator in node.decorators
|
537
|
-
if not decorator.statically_defined
|
541
|
+
if not decorator.statically_defined and decorator.inserted_by is None
|
538
542
|
]
|
539
543
|
}
|
540
544
|
# FlowDecorators can define their own top-level options. They are
|
@@ -7,6 +7,7 @@ from hashlib import sha1
|
|
7
7
|
from metaflow import current, decorators
|
8
8
|
from metaflow._vendor import click
|
9
9
|
from metaflow.exception import MetaflowException, MetaflowInternalError
|
10
|
+
from metaflow.metaflow_config import FEAT_ALWAYS_UPLOAD_CODE_PACKAGE
|
10
11
|
from metaflow.package import MetaflowPackage
|
11
12
|
from metaflow.plugins.aws.step_functions.production_token import (
|
12
13
|
load_token,
|
@@ -292,16 +293,26 @@ def make_flow(
|
|
292
293
|
# Save the code package in the flow datastore so that both user code and
|
293
294
|
# metaflow package can be retrieved during workflow execution.
|
294
295
|
obj.package = MetaflowPackage(
|
295
|
-
obj.flow,
|
296
|
+
obj.flow,
|
297
|
+
obj.environment,
|
298
|
+
obj.echo,
|
299
|
+
suffixes=obj.package_suffixes,
|
300
|
+
flow_datastore=obj.flow_datastore if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE else None,
|
296
301
|
)
|
297
|
-
|
298
|
-
|
299
|
-
|
302
|
+
# This blocks until the package is created
|
303
|
+
if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE:
|
304
|
+
package_url = obj.package.package_url()
|
305
|
+
package_sha = obj.package.package_sha()
|
306
|
+
else:
|
307
|
+
package_url, package_sha = obj.flow_datastore.save_data(
|
308
|
+
[obj.package.blob], len_hint=1
|
309
|
+
)[0]
|
300
310
|
|
301
311
|
return Airflow(
|
302
312
|
dag_name,
|
303
313
|
obj.graph,
|
304
314
|
obj.flow,
|
315
|
+
obj.package.package_metadata,
|
305
316
|
package_sha,
|
306
317
|
package_url,
|
307
318
|
obj.metadata,
|
@@ -91,6 +91,7 @@ class ArgoWorkflows(object):
|
|
91
91
|
name,
|
92
92
|
graph: FlowGraph,
|
93
93
|
flow,
|
94
|
+
code_package_metadata,
|
94
95
|
code_package_sha,
|
95
96
|
code_package_url,
|
96
97
|
production_token,
|
@@ -143,6 +144,7 @@ class ArgoWorkflows(object):
|
|
143
144
|
self.name = name
|
144
145
|
self.graph = graph
|
145
146
|
self.flow = flow
|
147
|
+
self.code_package_metadata = code_package_metadata
|
146
148
|
self.code_package_sha = code_package_sha
|
147
149
|
self.code_package_url = code_package_url
|
148
150
|
self.production_token = production_token
|
@@ -551,7 +553,7 @@ class ArgoWorkflows(object):
|
|
551
553
|
type=param_type,
|
552
554
|
description=param.kwargs.get("help"),
|
553
555
|
is_required=is_required,
|
554
|
-
**extra_attrs
|
556
|
+
**extra_attrs,
|
555
557
|
)
|
556
558
|
return parameters
|
557
559
|
|
@@ -1495,7 +1497,9 @@ class ArgoWorkflows(object):
|
|
1495
1497
|
mflog_expr,
|
1496
1498
|
]
|
1497
1499
|
+ self.environment.get_package_commands(
|
1498
|
-
self.code_package_url,
|
1500
|
+
self.code_package_url,
|
1501
|
+
self.flow_datastore.TYPE,
|
1502
|
+
self.code_package_metadata,
|
1499
1503
|
)
|
1500
1504
|
)
|
1501
1505
|
step_cmds = self.environment.bootstrap_commands(
|
@@ -1507,6 +1511,7 @@ class ArgoWorkflows(object):
|
|
1507
1511
|
decorator.make_decorator_spec()
|
1508
1512
|
for decorator in node.decorators
|
1509
1513
|
if not decorator.statically_defined
|
1514
|
+
and decorator.inserted_by is None
|
1510
1515
|
]
|
1511
1516
|
}
|
1512
1517
|
# FlowDecorators can define their own top-level options. They are
|
@@ -1673,6 +1678,7 @@ class ArgoWorkflows(object):
|
|
1673
1678
|
**{
|
1674
1679
|
# These values are needed by Metaflow to set it's internal
|
1675
1680
|
# state appropriately.
|
1681
|
+
"METAFLOW_CODE_METADATA": self.code_package_metadata,
|
1676
1682
|
"METAFLOW_CODE_URL": self.code_package_url,
|
1677
1683
|
"METAFLOW_CODE_SHA": self.code_package_sha,
|
1678
1684
|
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
|
@@ -2476,7 +2482,9 @@ class ArgoWorkflows(object):
|
|
2476
2482
|
mflog_expr,
|
2477
2483
|
]
|
2478
2484
|
+ self.environment.get_package_commands(
|
2479
|
-
self.code_package_url,
|
2485
|
+
self.code_package_url,
|
2486
|
+
self.flow_datastore.TYPE,
|
2487
|
+
self.code_package_metadata,
|
2480
2488
|
)[:-1]
|
2481
2489
|
# Replace the line 'Task in starting'
|
2482
2490
|
# FIXME: this can be brittle.
|
@@ -2496,6 +2504,7 @@ class ArgoWorkflows(object):
|
|
2496
2504
|
env = {
|
2497
2505
|
# These values are needed by Metaflow to set it's internal
|
2498
2506
|
# state appropriately.
|
2507
|
+
"METAFLOW_CODE_METADATA": self.code_package_metadata,
|
2499
2508
|
"METAFLOW_CODE_URL": self.code_package_url,
|
2500
2509
|
"METAFLOW_CODE_SHA": self.code_package_sha,
|
2501
2510
|
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
|
@@ -2952,7 +2961,8 @@ class ArgoWorkflows(object):
|
|
2952
2961
|
mflog_expr,
|
2953
2962
|
]
|
2954
2963
|
+ self.environment.get_package_commands(
|
2955
|
-
self.code_package_url,
|
2964
|
+
self.code_package_url,
|
2965
|
+
self.flow_datastore.TYPE,
|
2956
2966
|
)[:-1]
|
2957
2967
|
# Replace the line 'Task in starting'
|
2958
2968
|
# FIXME: this can be brittle.
|
@@ -2967,6 +2977,7 @@ class ArgoWorkflows(object):
|
|
2967
2977
|
env = {
|
2968
2978
|
# These values are needed by Metaflow to set it's internal
|
2969
2979
|
# state appropriately.
|
2980
|
+
"METAFLOW_CODE_METADATA": self.code_package_metadata,
|
2970
2981
|
"METAFLOW_CODE_URL": self.code_package_url,
|
2971
2982
|
"METAFLOW_CODE_SHA": self.code_package_sha,
|
2972
2983
|
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
|
@@ -15,6 +15,7 @@ from metaflow.exception import (
|
|
15
15
|
)
|
16
16
|
from metaflow.metaflow_config import (
|
17
17
|
ARGO_WORKFLOWS_UI_URL,
|
18
|
+
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
|
18
19
|
KUBERNETES_NAMESPACE,
|
19
20
|
SERVICE_VERSION_CHECK,
|
20
21
|
UI_URL,
|
@@ -518,16 +519,27 @@ def make_flow(
|
|
518
519
|
# Save the code package in the flow datastore so that both user code and
|
519
520
|
# metaflow package can be retrieved during workflow execution.
|
520
521
|
obj.package = MetaflowPackage(
|
521
|
-
obj.flow,
|
522
|
+
obj.flow,
|
523
|
+
obj.environment,
|
524
|
+
obj.echo,
|
525
|
+
suffixes=obj.package_suffixes,
|
526
|
+
flow_datastore=obj.flow_datastore if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE else None,
|
522
527
|
)
|
523
|
-
|
524
|
-
|
525
|
-
|
528
|
+
|
529
|
+
# This blocks until the package is created
|
530
|
+
if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE:
|
531
|
+
package_url = obj.package.package_url()
|
532
|
+
package_sha = obj.package.package_sha()
|
533
|
+
else:
|
534
|
+
package_url, package_sha = obj.flow_datastore.save_data(
|
535
|
+
[obj.package.blob], len_hint=1
|
536
|
+
)[0]
|
526
537
|
|
527
538
|
return ArgoWorkflows(
|
528
539
|
name,
|
529
540
|
obj.graph,
|
530
541
|
obj.flow,
|
542
|
+
obj.package.package_metadata,
|
531
543
|
package_sha,
|
532
544
|
package_url,
|
533
545
|
token,
|