metaflow 2.15.20__py2.py3-none-any.whl → 2.16.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +7 -1
- metaflow/cli.py +16 -1
- metaflow/cli_components/init_cmd.py +1 -0
- metaflow/cli_components/run_cmds.py +6 -2
- metaflow/client/core.py +22 -30
- metaflow/datastore/task_datastore.py +0 -1
- metaflow/debug.py +5 -0
- metaflow/decorators.py +230 -70
- metaflow/extension_support/__init__.py +15 -8
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/flowspec.py +80 -53
- metaflow/graph.py +24 -2
- metaflow/meta_files.py +13 -0
- metaflow/metadata_provider/metadata.py +7 -1
- metaflow/metaflow_config.py +5 -0
- metaflow/metaflow_environment.py +82 -25
- metaflow/metaflow_version.py +1 -1
- metaflow/package/__init__.py +664 -0
- metaflow/packaging_sys/__init__.py +870 -0
- metaflow/packaging_sys/backend.py +113 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +86 -0
- metaflow/packaging_sys/utils.py +91 -0
- metaflow/packaging_sys/v1.py +476 -0
- metaflow/plugins/airflow/airflow.py +5 -1
- metaflow/plugins/airflow/airflow_cli.py +15 -4
- metaflow/plugins/argo/argo_workflows.py +23 -17
- metaflow/plugins/argo/argo_workflows_cli.py +16 -4
- metaflow/plugins/aws/batch/batch.py +22 -3
- metaflow/plugins/aws/batch/batch_cli.py +3 -0
- metaflow/plugins/aws/batch/batch_decorator.py +13 -5
- metaflow/plugins/aws/step_functions/step_functions.py +4 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +15 -4
- metaflow/plugins/cards/card_decorator.py +0 -5
- metaflow/plugins/kubernetes/kubernetes.py +8 -1
- metaflow/plugins/kubernetes/kubernetes_cli.py +3 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +13 -5
- metaflow/plugins/package_cli.py +25 -23
- metaflow/plugins/parallel_decorator.py +4 -2
- metaflow/plugins/pypi/bootstrap.py +8 -2
- metaflow/plugins/pypi/conda_decorator.py +39 -82
- metaflow/plugins/pypi/conda_environment.py +6 -2
- metaflow/plugins/pypi/pypi_decorator.py +4 -4
- metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
- metaflow/plugins/timeout_decorator.py +0 -1
- metaflow/plugins/uv/bootstrap.py +11 -0
- metaflow/plugins/uv/uv_environment.py +4 -2
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/click_api.py +5 -4
- metaflow/runner/subprocess_manager.py +14 -2
- metaflow/runtime.py +37 -11
- metaflow/task.py +91 -7
- metaflow/user_configs/config_options.py +13 -8
- metaflow/user_configs/config_parameters.py +0 -4
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +499 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +263 -0
- metaflow/user_decorators/user_step_decorator.py +712 -0
- metaflow/util.py +4 -1
- metaflow/version.py +1 -1
- {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/METADATA +2 -2
- {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/RECORD +71 -60
- metaflow/info_file.py +0 -25
- metaflow/package.py +0 -203
- metaflow/user_configs/config_decorators.py +0 -568
- {metaflow-2.15.20.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Makefile +0 -0
- {metaflow-2.15.20.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {metaflow-2.15.20.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/WHEEL +0 -0
- {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/entry_points.txt +0 -0
- {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/licenses/LICENSE +0 -0
- {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,870 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
|
4
|
+
from enum import IntEnum
|
5
|
+
from types import ModuleType
|
6
|
+
from typing import (
|
7
|
+
Any,
|
8
|
+
Dict,
|
9
|
+
Generator,
|
10
|
+
List,
|
11
|
+
Optional,
|
12
|
+
TYPE_CHECKING,
|
13
|
+
Tuple,
|
14
|
+
Type,
|
15
|
+
Union,
|
16
|
+
)
|
17
|
+
|
18
|
+
from metaflow.packaging_sys.distribution_support import PackagedDistributionFinder
|
19
|
+
|
20
|
+
|
21
|
+
from .backend import PackagingBackend
|
22
|
+
from .tar_backend import TarPackagingBackend
|
23
|
+
|
24
|
+
from ..util import get_metaflow_root
|
25
|
+
|
26
|
+
MFCONTENT_MARKER = ".mf_install"
|
27
|
+
|
28
|
+
if TYPE_CHECKING:
|
29
|
+
import metaflow.extension_support.metadata
|
30
|
+
|
31
|
+
|
32
|
+
class ContentType(IntEnum):
|
33
|
+
USER_CONTENT = (
|
34
|
+
0x1 # File being added is user code (ie: the directory with the flow file)
|
35
|
+
)
|
36
|
+
CODE_CONTENT = (
|
37
|
+
0x2 # File being added is non-user code (libraries, metaflow itself, ...)
|
38
|
+
)
|
39
|
+
MODULE_CONTENT = 0x4 # File being added is a python module
|
40
|
+
OTHER_CONTENT = 0x8 # File being added is a non-python file
|
41
|
+
|
42
|
+
ALL_CONTENT = USER_CONTENT | CODE_CONTENT | MODULE_CONTENT | OTHER_CONTENT
|
43
|
+
|
44
|
+
|
45
|
+
class MetaflowCodeContent:
|
46
|
+
"""
|
47
|
+
Base class for all Metaflow code packages (non user code).
|
48
|
+
|
49
|
+
A Metaflow code package, at a minimum, contains:
|
50
|
+
- a special INFO file (containing a bunch of metadata about the Metaflow environment)
|
51
|
+
- a special CONFIG file (containing user configurations for the flow)
|
52
|
+
|
53
|
+
Declare all other MetaflowCodeContent subclasses (versions) here to handle just the functions
|
54
|
+
that are not implemented here. In a *separate* file, declare any other
|
55
|
+
function for that specific version.
|
56
|
+
|
57
|
+
NOTE: This file must remain as dependency-free as possible as it is loaded *very*
|
58
|
+
early on. This is why you must decleare a *separate* class implementing what you want
|
59
|
+
the Metaflow code package (non user) to do.
|
60
|
+
"""
|
61
|
+
|
62
|
+
_cached_mfcontent_info = {}
|
63
|
+
|
64
|
+
_mappings = {}
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def get_info(cls) -> Optional[Dict[str, Any]]:
|
68
|
+
"""
|
69
|
+
Get the content of the special INFO file on the local filesystem after
|
70
|
+
the code package has been expanded.
|
71
|
+
|
72
|
+
Returns
|
73
|
+
-------
|
74
|
+
Optional[Dict[str, Any]]
|
75
|
+
The content of the INFO file -- None if there is no such file.
|
76
|
+
"""
|
77
|
+
mfcontent_info = cls._extract_mfcontent_info()
|
78
|
+
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
79
|
+
return handling_cls.get_info_impl(mfcontent_info)
|
80
|
+
|
81
|
+
@classmethod
|
82
|
+
def get_config(cls) -> Optional[Dict[str, Any]]:
|
83
|
+
"""
|
84
|
+
Get the content of the special CONFIG file on the local filesystem after
|
85
|
+
the code package has been expanded.
|
86
|
+
|
87
|
+
Returns
|
88
|
+
-------
|
89
|
+
Optional[Dict[str, Any]]
|
90
|
+
The content of the CONFIG file -- None if there is no such file.
|
91
|
+
"""
|
92
|
+
mfcontent_info = cls._extract_mfcontent_info()
|
93
|
+
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
94
|
+
return handling_cls.get_config_impl(mfcontent_info)
|
95
|
+
|
96
|
+
@classmethod
|
97
|
+
def get_filename(cls, filename: str, content_type: ContentType) -> Optional[str]:
|
98
|
+
"""
|
99
|
+
Get the path to a file extracted from the archive. The filename is the filename
|
100
|
+
passed in when creating the archive and content_type is the type of the content.
|
101
|
+
|
102
|
+
This function will return the local path where the file can be found after
|
103
|
+
the package has been extracted.
|
104
|
+
|
105
|
+
Parameters
|
106
|
+
----------
|
107
|
+
filename: str
|
108
|
+
The name of the file on the filesystem.
|
109
|
+
content_type: ContentType
|
110
|
+
|
111
|
+
Returns
|
112
|
+
-------
|
113
|
+
str
|
114
|
+
The path to the file on the local filesystem or None if not found.
|
115
|
+
"""
|
116
|
+
mfcontent_info = cls._extract_mfcontent_info()
|
117
|
+
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
118
|
+
return handling_cls.get_filename_impl(mfcontent_info, filename, content_type)
|
119
|
+
|
120
|
+
@classmethod
|
121
|
+
def get_env_vars_for_packaged_metaflow(
|
122
|
+
cls, dest_dir: str
|
123
|
+
) -> Optional[Dict[str, str]]:
|
124
|
+
"""
|
125
|
+
Get the environment variables that are needed to run Metaflow when it is
|
126
|
+
packaged. This is typically used to set the PYTHONPATH to include the
|
127
|
+
directory where the Metaflow code package has been extracted.
|
128
|
+
|
129
|
+
Returns
|
130
|
+
-------
|
131
|
+
Optional[Dict[str, str]]
|
132
|
+
The environment variables that are needed to run Metaflow when it is
|
133
|
+
packaged -- None if there are no such variables (not packaged for example)
|
134
|
+
"""
|
135
|
+
mfcontent_info = cls._extract_mfcontent_info()
|
136
|
+
if mfcontent_info is None:
|
137
|
+
# No MFCONTENT_MARKER file found -- this is not a packaged Metaflow code
|
138
|
+
# package so no environment variables to set.
|
139
|
+
return None
|
140
|
+
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
141
|
+
return handling_cls.get_post_extract_env_vars_impl(dest_dir)
|
142
|
+
|
143
|
+
@classmethod
|
144
|
+
def get_archive_info(
|
145
|
+
cls,
|
146
|
+
archive: Any,
|
147
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
148
|
+
) -> Optional[Dict[str, Any]]:
|
149
|
+
"""
|
150
|
+
Get the content of the special INFO file in the archive.
|
151
|
+
|
152
|
+
Returns
|
153
|
+
-------
|
154
|
+
Optional[Dict[str, Any]]
|
155
|
+
The content of the INFO file -- None if there is no such file.
|
156
|
+
"""
|
157
|
+
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
|
158
|
+
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
159
|
+
return handling_cls.get_archive_info_impl(
|
160
|
+
mfcontent_info, archive, packaging_backend
|
161
|
+
)
|
162
|
+
|
163
|
+
@classmethod
|
164
|
+
def get_archive_config(
|
165
|
+
cls,
|
166
|
+
archive: Any,
|
167
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
168
|
+
) -> Optional[Dict[str, Any]]:
|
169
|
+
"""
|
170
|
+
Get the content of the special CONFIG file in the archive.
|
171
|
+
|
172
|
+
Returns
|
173
|
+
-------
|
174
|
+
Optional[Dict[str, Any]]
|
175
|
+
The content of the CONFIG file -- None if there is no such file.
|
176
|
+
"""
|
177
|
+
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
|
178
|
+
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
179
|
+
return handling_cls.get_archive_config_impl(
|
180
|
+
mfcontent_info, archive, packaging_backend
|
181
|
+
)
|
182
|
+
|
183
|
+
@classmethod
|
184
|
+
def get_archive_filename(
|
185
|
+
cls,
|
186
|
+
archive: Any,
|
187
|
+
filename: str,
|
188
|
+
content_type: ContentType,
|
189
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
190
|
+
) -> Optional[str]:
|
191
|
+
"""
|
192
|
+
Get the filename of the archive. This does not do any extraction but simply
|
193
|
+
returns where, in the archive, the file is located. This is the equivalent of
|
194
|
+
get_filename but for files not extracted yet.
|
195
|
+
|
196
|
+
Parameters
|
197
|
+
----------
|
198
|
+
archive: Any
|
199
|
+
The archive to get the filename from.
|
200
|
+
filename: str
|
201
|
+
The name of the file in the archive.
|
202
|
+
content_type: ContentType
|
203
|
+
The type of the content (e.g., code, other, etc.).
|
204
|
+
packaging_backend: Type[PackagingBackend], default TarPackagingBackend
|
205
|
+
The packaging backend to use.
|
206
|
+
|
207
|
+
Returns
|
208
|
+
-------
|
209
|
+
str
|
210
|
+
The filename of the archive or None if not found.
|
211
|
+
"""
|
212
|
+
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
|
213
|
+
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
214
|
+
return handling_cls.get_archive_filename_impl(
|
215
|
+
mfcontent_info, archive, filename, content_type, packaging_backend
|
216
|
+
)
|
217
|
+
|
218
|
+
@classmethod
|
219
|
+
def get_archive_content_names(
|
220
|
+
cls,
|
221
|
+
archive: Any,
|
222
|
+
content_types: Optional[int] = None,
|
223
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
224
|
+
) -> List[str]:
|
225
|
+
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
|
226
|
+
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
227
|
+
return handling_cls.get_archive_content_names_impl(
|
228
|
+
mfcontent_info, archive, content_types, packaging_backend
|
229
|
+
)
|
230
|
+
|
231
|
+
@classmethod
|
232
|
+
def get_distribution_finder(
|
233
|
+
cls,
|
234
|
+
) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
|
235
|
+
"""
|
236
|
+
Get the distribution finder for the Metaflow code package (if applicable).
|
237
|
+
|
238
|
+
Some packages will include distribution information to "pretend" that some packages
|
239
|
+
are actually distributions even if we just include them in the code package.
|
240
|
+
|
241
|
+
Returns
|
242
|
+
-------
|
243
|
+
Optional["metaflow.extension_support.metadata.DistributionFinder"]
|
244
|
+
The distribution finder for the Metaflow code package -- None if there is no
|
245
|
+
such finder.
|
246
|
+
"""
|
247
|
+
mfcontent_info = cls._extract_mfcontent_info()
|
248
|
+
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
249
|
+
return handling_cls.get_distribution_finder_impl(mfcontent_info)
|
250
|
+
|
251
|
+
@classmethod
|
252
|
+
def get_post_extract_env_vars(
|
253
|
+
cls, version_id: int, dest_dir: str = "."
|
254
|
+
) -> Dict[str, str]:
|
255
|
+
"""
|
256
|
+
Get the post-extract environment variables that are needed to access the content
|
257
|
+
that has been extracted into dest_dir.
|
258
|
+
|
259
|
+
This will typically involve setting PYTHONPATH.
|
260
|
+
|
261
|
+
Parameters
|
262
|
+
----------
|
263
|
+
version_id: int
|
264
|
+
The version of MetaflowCodeContent for this package.
|
265
|
+
dest_dir: str, default "."
|
266
|
+
The directory where the content has been extracted to.
|
267
|
+
|
268
|
+
Returns
|
269
|
+
-------
|
270
|
+
Dict[str, str]
|
271
|
+
The post-extract environment variables that are needed to access the content
|
272
|
+
that has been extracted into extracted_dir.
|
273
|
+
"""
|
274
|
+
if version_id not in cls._mappings:
|
275
|
+
raise ValueError(
|
276
|
+
"Invalid package -- unknown version %s in info: %s"
|
277
|
+
% (version_id, cls._mappings)
|
278
|
+
)
|
279
|
+
return cls._mappings[version_id].get_post_extract_env_vars_impl(dest_dir)
|
280
|
+
|
281
|
+
# Implement the _impl methods in the base subclass (in this file). These need to
|
282
|
+
# happen with as few imports as possible to prevent circular dependencies.
|
283
|
+
@classmethod
|
284
|
+
def get_info_impl(
|
285
|
+
cls, mfcontent_info: Optional[Dict[str, Any]]
|
286
|
+
) -> Optional[Dict[str, Any]]:
|
287
|
+
raise NotImplementedError("get_info_impl not implemented")
|
288
|
+
|
289
|
+
@classmethod
|
290
|
+
def get_config_impl(
|
291
|
+
cls, mfcontent_info: Optional[Dict[str, Any]]
|
292
|
+
) -> Optional[Dict[str, Any]]:
|
293
|
+
raise NotImplementedError("get_config_impl not implemented")
|
294
|
+
|
295
|
+
@classmethod
|
296
|
+
def get_filename_impl(
|
297
|
+
cls,
|
298
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
299
|
+
filename: str,
|
300
|
+
content_type: ContentType,
|
301
|
+
) -> Optional[str]:
|
302
|
+
raise NotImplementedError("get_filename_impl not implemented")
|
303
|
+
|
304
|
+
@classmethod
|
305
|
+
def get_distribution_finder_impl(
|
306
|
+
cls, mfcontent_info: Optional[Dict[str, Any]]
|
307
|
+
) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
|
308
|
+
raise NotImplementedError("get_distribution_finder_impl not implemented")
|
309
|
+
|
310
|
+
@classmethod
|
311
|
+
def get_archive_info_impl(
|
312
|
+
cls,
|
313
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
314
|
+
archive: Any,
|
315
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
316
|
+
) -> Optional[Dict[str, Any]]:
|
317
|
+
raise NotImplementedError("get_archive_info_impl not implemented")
|
318
|
+
|
319
|
+
@classmethod
|
320
|
+
def get_archive_config_impl(
|
321
|
+
cls,
|
322
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
323
|
+
archive: Any,
|
324
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
325
|
+
) -> Optional[Dict[str, Any]]:
|
326
|
+
raise NotImplementedError("get_archive_config_impl not implemented")
|
327
|
+
|
328
|
+
@classmethod
|
329
|
+
def get_archive_filename_impl(
|
330
|
+
cls,
|
331
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
332
|
+
archive: Any,
|
333
|
+
filename: str,
|
334
|
+
content_type: ContentType,
|
335
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
336
|
+
) -> Optional[str]:
|
337
|
+
raise NotImplementedError("get_archive_filename_impl not implemented")
|
338
|
+
|
339
|
+
@classmethod
|
340
|
+
def get_archive_content_names_impl(
|
341
|
+
cls,
|
342
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
343
|
+
archive: Any,
|
344
|
+
content_types: Optional[int] = None,
|
345
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
346
|
+
) -> List[str]:
|
347
|
+
raise NotImplementedError("get_archive_content_names_impl not implemented")
|
348
|
+
|
349
|
+
@classmethod
|
350
|
+
def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
|
351
|
+
raise NotImplementedError("get_post_extract_env_vars_impl not implemented")
|
352
|
+
|
353
|
+
def __init_subclass__(cls, version_id, **kwargs) -> None:
|
354
|
+
super().__init_subclass__(**kwargs)
|
355
|
+
if version_id in MetaflowCodeContent._mappings:
|
356
|
+
raise ValueError(
|
357
|
+
"Version ID %s already exists in MetaflowCodeContent mappings "
|
358
|
+
"-- this is a bug in Metaflow." % str(version_id)
|
359
|
+
)
|
360
|
+
MetaflowCodeContent._mappings[version_id] = cls
|
361
|
+
cls._version_id = version_id
|
362
|
+
|
363
|
+
# Implement these methods in sub-classes of the base sub-classes. These methods
|
364
|
+
# are called later and can have more dependencies and so can live in other files.
|
365
|
+
def get_excluded_tl_entries(self) -> List[str]:
|
366
|
+
"""
|
367
|
+
When packaging Metaflow from within an executing Metaflow flow, we need to
|
368
|
+
exclude the files that are inserted by this content from being packaged (possibly).
|
369
|
+
|
370
|
+
Use this function to return these files or top-level directories.
|
371
|
+
|
372
|
+
Returns
|
373
|
+
-------
|
374
|
+
List[str]
|
375
|
+
Files or directories to exclude
|
376
|
+
"""
|
377
|
+
return []
|
378
|
+
|
379
|
+
def content_names(
|
380
|
+
self, content_types: Optional[int] = None
|
381
|
+
) -> Generator[Tuple[str, str], None, None]:
|
382
|
+
"""
|
383
|
+
Detailed list of the content of this MetaflowCodeContent. This will list all files
|
384
|
+
(or non files -- for the INFO or CONFIG data for example) present in the archive.
|
385
|
+
|
386
|
+
Parameters
|
387
|
+
----------
|
388
|
+
content_types : Optional[int]
|
389
|
+
The type of content to get the names of. If None, all content is returned.
|
390
|
+
|
391
|
+
Yields
|
392
|
+
------
|
393
|
+
Generator[Tuple[str, str], None, None]
|
394
|
+
Path on the filesystem and the name in the archive
|
395
|
+
"""
|
396
|
+
raise NotImplementedError("content_names not implemented")
|
397
|
+
|
398
|
+
def contents(
|
399
|
+
self, content_types: Optional[int] = None
|
400
|
+
) -> Generator[Tuple[Union[bytes, str], str], None, None]:
|
401
|
+
"""
|
402
|
+
Very similar to content_names but returns the content of the non-files
|
403
|
+
as well as bytes. For files, identical output as content_names
|
404
|
+
|
405
|
+
Parameters
|
406
|
+
----------
|
407
|
+
content_types : Optional[int]
|
408
|
+
The type of content to get the content of. If None, all content is returned.
|
409
|
+
|
410
|
+
Yields
|
411
|
+
------
|
412
|
+
Generator[Tuple[Union[str, bytes], str], None, None]
|
413
|
+
Content of the MF content
|
414
|
+
"""
|
415
|
+
raise NotImplementedError("content not implemented")
|
416
|
+
|
417
|
+
def show(self) -> str:
|
418
|
+
"""
|
419
|
+
Returns a more human-readable string representation of the content of this
|
420
|
+
MetaflowCodeContent. This will not, for example, list all files but summarize what
|
421
|
+
is included at a more high level.
|
422
|
+
|
423
|
+
Returns
|
424
|
+
-------
|
425
|
+
str
|
426
|
+
A human-readable string representation of the content of this MetaflowCodeContent
|
427
|
+
"""
|
428
|
+
raise NotImplementedError("show not implemented")
|
429
|
+
|
430
|
+
def add_info(self, info: Dict[str, Any]) -> None:
|
431
|
+
"""
|
432
|
+
Add the content of the INFO file to the Metaflow content
|
433
|
+
|
434
|
+
Parameters
|
435
|
+
----------
|
436
|
+
info: Dict[str, Any]
|
437
|
+
The content of the INFO file
|
438
|
+
"""
|
439
|
+
raise NotImplementedError("add_info not implemented")
|
440
|
+
|
441
|
+
def add_config(self, config: Dict[str, Any]) -> None:
|
442
|
+
"""
|
443
|
+
Add the content of the CONFIG file to the Metaflow content
|
444
|
+
|
445
|
+
Parameters
|
446
|
+
----------
|
447
|
+
config: Dict[str, Any]
|
448
|
+
The content of the CONFIG file
|
449
|
+
"""
|
450
|
+
raise NotImplementedError("add_config not implemented")
|
451
|
+
|
452
|
+
def add_module(self, module_path: ModuleType) -> None:
|
453
|
+
"""
|
454
|
+
Add a python module to the Metaflow content
|
455
|
+
|
456
|
+
Parameters
|
457
|
+
----------
|
458
|
+
module_path: ModuleType
|
459
|
+
The module to add
|
460
|
+
"""
|
461
|
+
raise NotImplementedError("add_module not implemented")
|
462
|
+
|
463
|
+
def add_code_file(self, file_path: str, file_name: str) -> None:
|
464
|
+
"""
|
465
|
+
Add a code file to the Metaflow content
|
466
|
+
|
467
|
+
Parameters
|
468
|
+
----------
|
469
|
+
file_path: str
|
470
|
+
The path to the code file to add (on the filesystem)
|
471
|
+
file_name: str
|
472
|
+
The path in the archive to add the code file to
|
473
|
+
"""
|
474
|
+
raise NotImplementedError("add_code_file not implemented")
|
475
|
+
|
476
|
+
def add_other_file(self, file_path: str, file_name: str) -> None:
|
477
|
+
"""
|
478
|
+
Add a non-python file to the Metaflow content
|
479
|
+
|
480
|
+
Parameters
|
481
|
+
----------
|
482
|
+
file_path: str
|
483
|
+
The path to the file to add (on the filesystem)
|
484
|
+
file_name: str
|
485
|
+
The path in the archive to add the file to
|
486
|
+
"""
|
487
|
+
raise NotImplementedError("add_other_file not implemented")
|
488
|
+
|
489
|
+
@classmethod
|
490
|
+
def _get_mfcontent_class(
|
491
|
+
cls, info: Optional[Dict[str, Any]]
|
492
|
+
) -> Type["MetaflowCodeContent"]:
|
493
|
+
if info is None:
|
494
|
+
return MetaflowCodeContentV0
|
495
|
+
if "version" not in info:
|
496
|
+
raise ValueError("Invalid package -- missing version in info: %s" % info)
|
497
|
+
version = info["version"]
|
498
|
+
if version not in cls._mappings:
|
499
|
+
raise ValueError(
|
500
|
+
"Invalid package -- unknown version %s in info: %s" % (version, info)
|
501
|
+
)
|
502
|
+
|
503
|
+
return cls._mappings[version]
|
504
|
+
|
505
|
+
@classmethod
|
506
|
+
def _extract_archive_mfcontent_info(
|
507
|
+
cls,
|
508
|
+
archive: Any,
|
509
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
510
|
+
) -> Optional[Dict[str, Any]]:
|
511
|
+
if id(archive) in cls._cached_mfcontent_info:
|
512
|
+
return cls._cached_mfcontent_info[id(archive)]
|
513
|
+
|
514
|
+
mfcontent_info = None # type: Optional[Dict[str, Any]]
|
515
|
+
# Here we need to extract the information from the archive
|
516
|
+
if packaging_backend.cls_has_member(archive, MFCONTENT_MARKER):
|
517
|
+
# The MFCONTENT_MARKER file is present in the archive
|
518
|
+
# We can extract the information from it
|
519
|
+
extracted_info = packaging_backend.cls_get_member(archive, MFCONTENT_MARKER)
|
520
|
+
if extracted_info:
|
521
|
+
mfcontent_info = json.loads(extracted_info)
|
522
|
+
cls._cached_mfcontent_info[id(archive)] = mfcontent_info
|
523
|
+
return mfcontent_info
|
524
|
+
|
525
|
+
@classmethod
|
526
|
+
def _extract_mfcontent_info(cls) -> Optional[Dict[str, Any]]:
|
527
|
+
if "_local" in cls._cached_mfcontent_info:
|
528
|
+
return cls._cached_mfcontent_info["_local"]
|
529
|
+
|
530
|
+
mfcontent_info = None # type: Optional[Dict[str, Any]]
|
531
|
+
if os.path.exists(os.path.join(get_metaflow_root(), MFCONTENT_MARKER)):
|
532
|
+
with open(
|
533
|
+
os.path.join(get_metaflow_root(), MFCONTENT_MARKER),
|
534
|
+
"r",
|
535
|
+
encoding="utf-8",
|
536
|
+
) as f:
|
537
|
+
mfcontent_info = json.load(f)
|
538
|
+
cls._cached_mfcontent_info["_local"] = mfcontent_info
|
539
|
+
return mfcontent_info
|
540
|
+
|
541
|
+
def get_package_version(self) -> int:
|
542
|
+
"""
|
543
|
+
Get the version of MetaflowCodeContent for this package.
|
544
|
+
"""
|
545
|
+
# _version_id is set in __init_subclass__ when the subclass is created
|
546
|
+
return self._version_id
|
547
|
+
|
548
|
+
|
549
|
+
class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
|
550
|
+
@classmethod
|
551
|
+
def get_info_impl(
|
552
|
+
cls, mfcontent_info: Optional[Dict[str, Any]]
|
553
|
+
) -> Optional[Dict[str, Any]]:
|
554
|
+
path_to_file = os.path.join(get_metaflow_root(), "INFO")
|
555
|
+
if os.path.isfile(path_to_file):
|
556
|
+
with open(path_to_file, "r", encoding="utf-8") as f:
|
557
|
+
return json.load(f)
|
558
|
+
return None
|
559
|
+
|
560
|
+
@classmethod
|
561
|
+
def get_config_impl(
|
562
|
+
cls, mfcontent_info: Optional[Dict[str, Any]]
|
563
|
+
) -> Optional[Dict[str, Any]]:
|
564
|
+
path_to_file = os.path.join(get_metaflow_root(), "CONFIG")
|
565
|
+
if os.path.isfile(path_to_file):
|
566
|
+
with open(path_to_file, "r", encoding="utf-8") as f:
|
567
|
+
return json.load(f)
|
568
|
+
return None
|
569
|
+
|
570
|
+
@classmethod
|
571
|
+
def get_filename_impl(
|
572
|
+
cls,
|
573
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
574
|
+
filename: str,
|
575
|
+
content_type: ContentType,
|
576
|
+
) -> Optional[str]:
|
577
|
+
"""
|
578
|
+
For V0, the filename is simply the filename passed in.
|
579
|
+
"""
|
580
|
+
path_to_file = os.path.join(get_metaflow_root(), filename)
|
581
|
+
if os.path.isfile(path_to_file):
|
582
|
+
return path_to_file
|
583
|
+
return None
|
584
|
+
|
585
|
+
@classmethod
|
586
|
+
def get_distribution_finder_impl(
|
587
|
+
cls, mfcontent_info: Optional[Dict[str, Any]]
|
588
|
+
) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
|
589
|
+
return None
|
590
|
+
|
591
|
+
@classmethod
|
592
|
+
def get_archive_info_impl(
|
593
|
+
cls,
|
594
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
595
|
+
archive: Any,
|
596
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
597
|
+
) -> Optional[Dict[str, Any]]:
|
598
|
+
info_content = packaging_backend.cls_get_member(archive, "INFO")
|
599
|
+
if info_content:
|
600
|
+
return json.loads(info_content)
|
601
|
+
return None
|
602
|
+
|
603
|
+
@classmethod
|
604
|
+
def get_archive_config_impl(
|
605
|
+
cls,
|
606
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
607
|
+
archive: Any,
|
608
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
609
|
+
) -> Optional[Dict[str, Any]]:
|
610
|
+
info_content = packaging_backend.cls_get_member(archive, "CONFIG")
|
611
|
+
if info_content:
|
612
|
+
return json.loads(info_content)
|
613
|
+
return None
|
614
|
+
|
615
|
+
@classmethod
|
616
|
+
def get_archive_filename_impl(
|
617
|
+
cls,
|
618
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
619
|
+
archive: Any,
|
620
|
+
filename: str,
|
621
|
+
content_type: ContentType,
|
622
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
623
|
+
) -> str:
|
624
|
+
if packaging_backend.cls_has_member(archive, filename):
|
625
|
+
# The file is present in the archive
|
626
|
+
return filename
|
627
|
+
return None
|
628
|
+
|
629
|
+
@classmethod
|
630
|
+
def get_archive_content_names_impl(
|
631
|
+
cls,
|
632
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
633
|
+
archive: Any,
|
634
|
+
content_types: Optional[int] = None,
|
635
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
636
|
+
) -> List[str]:
|
637
|
+
"""
|
638
|
+
For V0, we use a static list of known files to classify the content
|
639
|
+
"""
|
640
|
+
known_prefixes = {
|
641
|
+
"metaflow/": ContentType.CODE_CONTENT.value,
|
642
|
+
"metaflow_extensions/": ContentType.CODE_CONTENT.value,
|
643
|
+
"INFO": ContentType.OTHER_CONTENT.value,
|
644
|
+
"CONFIG": ContentType.OTHER_CONTENT.value,
|
645
|
+
"conda.manifest": ContentType.OTHER_CONTENT.value,
|
646
|
+
"uv.lock": ContentType.OTHER_CONTENT.value,
|
647
|
+
"pyproject.toml": ContentType.OTHER_CONTENT.value,
|
648
|
+
# Used in nflx-metaflow-extensions
|
649
|
+
"condav2-1.cnd": ContentType.OTHER_CONTENT.value,
|
650
|
+
}
|
651
|
+
to_return = []
|
652
|
+
for filename in packaging_backend.cls_list_members(archive):
|
653
|
+
for prefix, classification in known_prefixes.items():
|
654
|
+
if (
|
655
|
+
prefix[-1] == "/" and filename.startswith(prefix)
|
656
|
+
) or prefix == filename:
|
657
|
+
if content_types & classification:
|
658
|
+
to_return.append(filename)
|
659
|
+
elif content_types & ContentType.USER_CONTENT.value:
|
660
|
+
# Everything else is user content
|
661
|
+
to_return.append(filename)
|
662
|
+
return to_return
|
663
|
+
|
664
|
+
@classmethod
|
665
|
+
def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
|
666
|
+
return {"PYTHONPATH": dest_dir}
|
667
|
+
|
668
|
+
def get_excluded_tl_entries(self) -> List[str]:
|
669
|
+
"""
|
670
|
+
When packaging Metaflow from within an executing Metaflow flow, we need to
|
671
|
+
exclude the files that are inserted by this content from being packaged (possibly).
|
672
|
+
|
673
|
+
Use this function to return these files or top-level directories.
|
674
|
+
|
675
|
+
Returns
|
676
|
+
-------
|
677
|
+
List[str]
|
678
|
+
Files or directories to exclude
|
679
|
+
"""
|
680
|
+
return ["CONFIG", "INFO"]
|
681
|
+
|
682
|
+
# Other non-implemented methods are OK not being implemented as they will never
|
683
|
+
# be called as they are only used when creating the package and we are starting
|
684
|
+
# with V1.
|
685
|
+
|
686
|
+
|
687
|
+
class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
|
688
|
+
_code_dir = ".mf_code"
|
689
|
+
_other_dir = ".mf_meta"
|
690
|
+
_info_file = "INFO"
|
691
|
+
_config_file = "CONFIG"
|
692
|
+
_dist_info_file = "DIST_INFO"
|
693
|
+
|
694
|
+
def __init_subclass__(cls, **kwargs) -> None:
|
695
|
+
# Important to add this here to prevent the subclass of MetaflowCodeContentV1Base from
|
696
|
+
# also calling __init_subclass__ in MetaflowCodeContent (which would create a problem)
|
697
|
+
return None
|
698
|
+
|
699
|
+
def __init__(self, code_dir: str, other_dir: str) -> None:
|
700
|
+
self._code_dir = code_dir
|
701
|
+
self._other_dir = other_dir
|
702
|
+
|
703
|
+
@classmethod
|
704
|
+
def _get_otherfile_path(
|
705
|
+
cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
|
706
|
+
) -> str:
|
707
|
+
if in_archive:
|
708
|
+
return filename
|
709
|
+
return os.path.join(get_metaflow_root(), "..", cls._other_dir, filename)
|
710
|
+
|
711
|
+
@classmethod
|
712
|
+
def _get_codefile_path(
|
713
|
+
cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
|
714
|
+
) -> str:
|
715
|
+
if in_archive:
|
716
|
+
return filename
|
717
|
+
return os.path.join(get_metaflow_root(), filename)
|
718
|
+
|
719
|
+
@classmethod
|
720
|
+
def get_info_impl(
|
721
|
+
cls, mfcontent_info: Optional[Dict[str, Any]]
|
722
|
+
) -> Optional[Dict[str, Any]]:
|
723
|
+
path_to_file = cls._get_otherfile_path(
|
724
|
+
mfcontent_info, cls._info_file, in_archive=False
|
725
|
+
)
|
726
|
+
if os.path.isfile(path_to_file):
|
727
|
+
with open(path_to_file, "r", encoding="utf-8") as f:
|
728
|
+
return json.load(f)
|
729
|
+
return None
|
730
|
+
|
731
|
+
@classmethod
|
732
|
+
def get_config_impl(
|
733
|
+
cls, mfcontent_info: Optional[Dict[str, Any]]
|
734
|
+
) -> Optional[Dict[str, Any]]:
|
735
|
+
path_to_file = cls._get_otherfile_path(
|
736
|
+
mfcontent_info, cls._config_file, in_archive=False
|
737
|
+
)
|
738
|
+
if os.path.isfile(path_to_file):
|
739
|
+
with open(path_to_file, "r", encoding="utf-8") as f:
|
740
|
+
return json.load(f)
|
741
|
+
return None
|
742
|
+
|
743
|
+
@classmethod
|
744
|
+
def get_filename_impl(
|
745
|
+
cls,
|
746
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
747
|
+
filename: str,
|
748
|
+
content_type: ContentType,
|
749
|
+
) -> Optional[str]:
|
750
|
+
if content_type == ContentType.CODE_CONTENT:
|
751
|
+
path_to_file = cls._get_codefile_path(
|
752
|
+
mfcontent_info, filename, in_archive=False
|
753
|
+
)
|
754
|
+
elif content_type in (ContentType.OTHER_CONTENT, ContentType.MODULE_CONTENT):
|
755
|
+
path_to_file = cls._get_otherfile_path(
|
756
|
+
mfcontent_info, filename, in_archive=False
|
757
|
+
)
|
758
|
+
else:
|
759
|
+
raise ValueError(
|
760
|
+
f"Invalid content type {content_type} for filename {filename}"
|
761
|
+
)
|
762
|
+
if os.path.isfile(path_to_file):
|
763
|
+
return path_to_file
|
764
|
+
return None
|
765
|
+
|
766
|
+
@classmethod
|
767
|
+
def get_distribution_finder_impl(
|
768
|
+
cls, mfcontent_info: Optional[Dict[str, Any]]
|
769
|
+
) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
|
770
|
+
path_to_file = cls._get_otherfile_path(
|
771
|
+
mfcontent_info, cls._dist_info_file, in_archive=False
|
772
|
+
)
|
773
|
+
if os.path.isfile(path_to_file):
|
774
|
+
with open(path_to_file, "r", encoding="utf-8") as f:
|
775
|
+
return PackagedDistributionFinder(json.load(f))
|
776
|
+
return None
|
777
|
+
|
778
|
+
@classmethod
|
779
|
+
def get_archive_info_impl(
|
780
|
+
cls,
|
781
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
782
|
+
archive: Any,
|
783
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
784
|
+
) -> Optional[Dict[str, Any]]:
|
785
|
+
info_file = packaging_backend.cls_get_member(
|
786
|
+
archive,
|
787
|
+
cls._get_otherfile_path(mfcontent_info, cls._info_file, in_archive=True),
|
788
|
+
)
|
789
|
+
if info_file:
|
790
|
+
return json.loads(info_file)
|
791
|
+
return None
|
792
|
+
|
793
|
+
@classmethod
|
794
|
+
def get_archive_config_impl(
|
795
|
+
cls,
|
796
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
797
|
+
archive: Any,
|
798
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
799
|
+
) -> Optional[Dict[str, Any]]:
|
800
|
+
config_file = packaging_backend.cls_get_member(
|
801
|
+
archive,
|
802
|
+
cls._get_otherfile_path(mfcontent_info, cls._config_file, in_archive=True),
|
803
|
+
)
|
804
|
+
if config_file:
|
805
|
+
return json.loads(config_file)
|
806
|
+
return None
|
807
|
+
|
808
|
+
@classmethod
|
809
|
+
def get_archive_filename_impl(
|
810
|
+
cls,
|
811
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
812
|
+
archive: Any,
|
813
|
+
filename: str,
|
814
|
+
content_type: ContentType,
|
815
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
816
|
+
) -> str:
|
817
|
+
if content_type == ContentType.CODE_CONTENT:
|
818
|
+
path_to_file = cls._get_codefile_path(
|
819
|
+
mfcontent_info, filename, in_archive=False
|
820
|
+
)
|
821
|
+
elif content_type in (ContentType.OTHER_CONTENT, ContentType.MODULE_CONTENT):
|
822
|
+
path_to_file = cls._get_otherfile_path(
|
823
|
+
mfcontent_info, filename, in_archive=False
|
824
|
+
)
|
825
|
+
else:
|
826
|
+
raise ValueError(
|
827
|
+
f"Invalid content type {content_type} for filename {filename}"
|
828
|
+
)
|
829
|
+
if packaging_backend.cls_has_member(archive, path_to_file):
|
830
|
+
# The file is present in the archive
|
831
|
+
return path_to_file
|
832
|
+
return None
|
833
|
+
|
834
|
+
@classmethod
|
835
|
+
def get_archive_content_names_impl(
|
836
|
+
cls,
|
837
|
+
mfcontent_info: Optional[Dict[str, Any]],
|
838
|
+
archive: Any,
|
839
|
+
content_types: Optional[int] = None,
|
840
|
+
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
841
|
+
) -> List[str]:
|
842
|
+
to_return = []
|
843
|
+
module_content = set(mfcontent_info.get("module_files", []))
|
844
|
+
for filename in packaging_backend.cls_list_members(archive):
|
845
|
+
if filename.startswith(cls._other_dir) and (
|
846
|
+
content_types & ContentType.OTHER_CONTENT.value
|
847
|
+
):
|
848
|
+
to_return.append(filename)
|
849
|
+
elif filename.startswith(cls._code_dir):
|
850
|
+
# Special case for marker which is a other content even if in code.
|
851
|
+
if filename == f"{cls._code_dir}/{MFCONTENT_MARKER}":
|
852
|
+
if content_types & ContentType.OTHER_CONTENT.value:
|
853
|
+
to_return.append(filename)
|
854
|
+
else:
|
855
|
+
continue
|
856
|
+
# Here it is either module or code
|
857
|
+
if os.path.join(cls._code_dir, filename) in module_content:
|
858
|
+
if content_types & ContentType.MODULE_CONTENT.value:
|
859
|
+
to_return.append(filename)
|
860
|
+
elif content_types & ContentType.CODE_CONTENT.value:
|
861
|
+
to_return.append(filename)
|
862
|
+
else:
|
863
|
+
if content_types & ContentType.USER_CONTENT.value:
|
864
|
+
# Everything else is user content
|
865
|
+
to_return.append(filename)
|
866
|
+
return to_return
|
867
|
+
|
868
|
+
@classmethod
|
869
|
+
def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
|
870
|
+
return {"PYTHONPATH": f"{dest_dir}/{cls._code_dir}"}
|