metaflow 2.15.21__py2.py3-none-any.whl → 2.16.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. metaflow/__init__.py +7 -1
  2. metaflow/cli.py +16 -1
  3. metaflow/cli_components/init_cmd.py +1 -0
  4. metaflow/cli_components/run_cmds.py +6 -2
  5. metaflow/client/core.py +22 -30
  6. metaflow/datastore/task_datastore.py +0 -1
  7. metaflow/debug.py +5 -0
  8. metaflow/decorators.py +230 -70
  9. metaflow/extension_support/__init__.py +15 -8
  10. metaflow/extension_support/_empty_file.py +2 -2
  11. metaflow/flowspec.py +80 -53
  12. metaflow/graph.py +24 -2
  13. metaflow/meta_files.py +13 -0
  14. metaflow/metadata_provider/metadata.py +7 -1
  15. metaflow/metaflow_config.py +5 -0
  16. metaflow/metaflow_environment.py +82 -25
  17. metaflow/metaflow_version.py +1 -1
  18. metaflow/package/__init__.py +664 -0
  19. metaflow/packaging_sys/__init__.py +870 -0
  20. metaflow/packaging_sys/backend.py +113 -0
  21. metaflow/packaging_sys/distribution_support.py +153 -0
  22. metaflow/packaging_sys/tar_backend.py +86 -0
  23. metaflow/packaging_sys/utils.py +91 -0
  24. metaflow/packaging_sys/v1.py +476 -0
  25. metaflow/plugins/airflow/airflow.py +5 -1
  26. metaflow/plugins/airflow/airflow_cli.py +15 -4
  27. metaflow/plugins/argo/argo_workflows.py +15 -4
  28. metaflow/plugins/argo/argo_workflows_cli.py +16 -4
  29. metaflow/plugins/aws/batch/batch.py +22 -3
  30. metaflow/plugins/aws/batch/batch_cli.py +3 -0
  31. metaflow/plugins/aws/batch/batch_decorator.py +13 -5
  32. metaflow/plugins/aws/step_functions/step_functions.py +4 -1
  33. metaflow/plugins/aws/step_functions/step_functions_cli.py +15 -4
  34. metaflow/plugins/cards/card_decorator.py +0 -5
  35. metaflow/plugins/kubernetes/kubernetes.py +8 -1
  36. metaflow/plugins/kubernetes/kubernetes_cli.py +3 -0
  37. metaflow/plugins/kubernetes/kubernetes_decorator.py +13 -5
  38. metaflow/plugins/package_cli.py +25 -23
  39. metaflow/plugins/parallel_decorator.py +4 -2
  40. metaflow/plugins/pypi/bootstrap.py +8 -2
  41. metaflow/plugins/pypi/conda_decorator.py +39 -82
  42. metaflow/plugins/pypi/conda_environment.py +6 -2
  43. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  44. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  45. metaflow/plugins/timeout_decorator.py +0 -1
  46. metaflow/plugins/uv/bootstrap.py +11 -0
  47. metaflow/plugins/uv/uv_environment.py +4 -2
  48. metaflow/pylint_wrapper.py +5 -1
  49. metaflow/runner/click_api.py +5 -4
  50. metaflow/runner/subprocess_manager.py +14 -2
  51. metaflow/runtime.py +37 -11
  52. metaflow/task.py +91 -7
  53. metaflow/user_configs/config_options.py +13 -8
  54. metaflow/user_configs/config_parameters.py +0 -4
  55. metaflow/user_decorators/__init__.py +0 -0
  56. metaflow/user_decorators/common.py +144 -0
  57. metaflow/user_decorators/mutable_flow.py +499 -0
  58. metaflow/user_decorators/mutable_step.py +424 -0
  59. metaflow/user_decorators/user_flow_decorator.py +263 -0
  60. metaflow/user_decorators/user_step_decorator.py +712 -0
  61. metaflow/util.py +4 -1
  62. metaflow/version.py +1 -1
  63. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/METADATA +2 -2
  64. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/RECORD +71 -60
  65. metaflow/info_file.py +0 -25
  66. metaflow/package.py +0 -203
  67. metaflow/user_configs/config_decorators.py +0 -568
  68. {metaflow-2.15.21.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Makefile +0 -0
  69. {metaflow-2.15.21.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  70. {metaflow-2.15.21.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  71. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/WHEEL +0 -0
  72. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/entry_points.txt +0 -0
  73. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/licenses/LICENSE +0 -0
  74. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,870 @@
1
+ import json
2
+ import os
3
+
4
+ from enum import IntEnum
5
+ from types import ModuleType
6
+ from typing import (
7
+ Any,
8
+ Dict,
9
+ Generator,
10
+ List,
11
+ Optional,
12
+ TYPE_CHECKING,
13
+ Tuple,
14
+ Type,
15
+ Union,
16
+ )
17
+
18
+ from metaflow.packaging_sys.distribution_support import PackagedDistributionFinder
19
+
20
+
21
+ from .backend import PackagingBackend
22
+ from .tar_backend import TarPackagingBackend
23
+
24
+ from ..util import get_metaflow_root
25
+
26
+ MFCONTENT_MARKER = ".mf_install"
27
+
28
+ if TYPE_CHECKING:
29
+ import metaflow.extension_support.metadata
30
+
31
+
32
+ class ContentType(IntEnum):
33
+ USER_CONTENT = (
34
+ 0x1 # File being added is user code (ie: the directory with the flow file)
35
+ )
36
+ CODE_CONTENT = (
37
+ 0x2 # File being added is non-user code (libraries, metaflow itself, ...)
38
+ )
39
+ MODULE_CONTENT = 0x4 # File being added is a python module
40
+ OTHER_CONTENT = 0x8 # File being added is a non-python file
41
+
42
+ ALL_CONTENT = USER_CONTENT | CODE_CONTENT | MODULE_CONTENT | OTHER_CONTENT
43
+
44
+
45
+ class MetaflowCodeContent:
46
+ """
47
+ Base class for all Metaflow code packages (non user code).
48
+
49
+ A Metaflow code package, at a minimum, contains:
50
+ - a special INFO file (containing a bunch of metadata about the Metaflow environment)
51
+ - a special CONFIG file (containing user configurations for the flow)
52
+
53
+ Declare all other MetaflowCodeContent subclasses (versions) here to handle just the functions
54
+ that are not implemented here. In a *separate* file, declare any other
55
+ function for that specific version.
56
+
57
+ NOTE: This file must remain as dependency-free as possible as it is loaded *very*
58
+ early on. This is why you must decleare a *separate* class implementing what you want
59
+ the Metaflow code package (non user) to do.
60
+ """
61
+
62
+ _cached_mfcontent_info = {}
63
+
64
+ _mappings = {}
65
+
66
+ @classmethod
67
+ def get_info(cls) -> Optional[Dict[str, Any]]:
68
+ """
69
+ Get the content of the special INFO file on the local filesystem after
70
+ the code package has been expanded.
71
+
72
+ Returns
73
+ -------
74
+ Optional[Dict[str, Any]]
75
+ The content of the INFO file -- None if there is no such file.
76
+ """
77
+ mfcontent_info = cls._extract_mfcontent_info()
78
+ handling_cls = cls._get_mfcontent_class(mfcontent_info)
79
+ return handling_cls.get_info_impl(mfcontent_info)
80
+
81
+ @classmethod
82
+ def get_config(cls) -> Optional[Dict[str, Any]]:
83
+ """
84
+ Get the content of the special CONFIG file on the local filesystem after
85
+ the code package has been expanded.
86
+
87
+ Returns
88
+ -------
89
+ Optional[Dict[str, Any]]
90
+ The content of the CONFIG file -- None if there is no such file.
91
+ """
92
+ mfcontent_info = cls._extract_mfcontent_info()
93
+ handling_cls = cls._get_mfcontent_class(mfcontent_info)
94
+ return handling_cls.get_config_impl(mfcontent_info)
95
+
96
+ @classmethod
97
+ def get_filename(cls, filename: str, content_type: ContentType) -> Optional[str]:
98
+ """
99
+ Get the path to a file extracted from the archive. The filename is the filename
100
+ passed in when creating the archive and content_type is the type of the content.
101
+
102
+ This function will return the local path where the file can be found after
103
+ the package has been extracted.
104
+
105
+ Parameters
106
+ ----------
107
+ filename: str
108
+ The name of the file on the filesystem.
109
+ content_type: ContentType
110
+
111
+ Returns
112
+ -------
113
+ str
114
+ The path to the file on the local filesystem or None if not found.
115
+ """
116
+ mfcontent_info = cls._extract_mfcontent_info()
117
+ handling_cls = cls._get_mfcontent_class(mfcontent_info)
118
+ return handling_cls.get_filename_impl(mfcontent_info, filename, content_type)
119
+
120
+ @classmethod
121
+ def get_env_vars_for_packaged_metaflow(
122
+ cls, dest_dir: str
123
+ ) -> Optional[Dict[str, str]]:
124
+ """
125
+ Get the environment variables that are needed to run Metaflow when it is
126
+ packaged. This is typically used to set the PYTHONPATH to include the
127
+ directory where the Metaflow code package has been extracted.
128
+
129
+ Returns
130
+ -------
131
+ Optional[Dict[str, str]]
132
+ The environment variables that are needed to run Metaflow when it is
133
+ packaged -- None if there are no such variables (not packaged for example)
134
+ """
135
+ mfcontent_info = cls._extract_mfcontent_info()
136
+ if mfcontent_info is None:
137
+ # No MFCONTENT_MARKER file found -- this is not a packaged Metaflow code
138
+ # package so no environment variables to set.
139
+ return None
140
+ handling_cls = cls._get_mfcontent_class(mfcontent_info)
141
+ return handling_cls.get_post_extract_env_vars_impl(dest_dir)
142
+
143
+ @classmethod
144
+ def get_archive_info(
145
+ cls,
146
+ archive: Any,
147
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
148
+ ) -> Optional[Dict[str, Any]]:
149
+ """
150
+ Get the content of the special INFO file in the archive.
151
+
152
+ Returns
153
+ -------
154
+ Optional[Dict[str, Any]]
155
+ The content of the INFO file -- None if there is no such file.
156
+ """
157
+ mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
158
+ handling_cls = cls._get_mfcontent_class(mfcontent_info)
159
+ return handling_cls.get_archive_info_impl(
160
+ mfcontent_info, archive, packaging_backend
161
+ )
162
+
163
+ @classmethod
164
+ def get_archive_config(
165
+ cls,
166
+ archive: Any,
167
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
168
+ ) -> Optional[Dict[str, Any]]:
169
+ """
170
+ Get the content of the special CONFIG file in the archive.
171
+
172
+ Returns
173
+ -------
174
+ Optional[Dict[str, Any]]
175
+ The content of the CONFIG file -- None if there is no such file.
176
+ """
177
+ mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
178
+ handling_cls = cls._get_mfcontent_class(mfcontent_info)
179
+ return handling_cls.get_archive_config_impl(
180
+ mfcontent_info, archive, packaging_backend
181
+ )
182
+
183
+ @classmethod
184
+ def get_archive_filename(
185
+ cls,
186
+ archive: Any,
187
+ filename: str,
188
+ content_type: ContentType,
189
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
190
+ ) -> Optional[str]:
191
+ """
192
+ Get the filename of the archive. This does not do any extraction but simply
193
+ returns where, in the archive, the file is located. This is the equivalent of
194
+ get_filename but for files not extracted yet.
195
+
196
+ Parameters
197
+ ----------
198
+ archive: Any
199
+ The archive to get the filename from.
200
+ filename: str
201
+ The name of the file in the archive.
202
+ content_type: ContentType
203
+ The type of the content (e.g., code, other, etc.).
204
+ packaging_backend: Type[PackagingBackend], default TarPackagingBackend
205
+ The packaging backend to use.
206
+
207
+ Returns
208
+ -------
209
+ str
210
+ The filename of the archive or None if not found.
211
+ """
212
+ mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
213
+ handling_cls = cls._get_mfcontent_class(mfcontent_info)
214
+ return handling_cls.get_archive_filename_impl(
215
+ mfcontent_info, archive, filename, content_type, packaging_backend
216
+ )
217
+
218
+ @classmethod
219
+ def get_archive_content_names(
220
+ cls,
221
+ archive: Any,
222
+ content_types: Optional[int] = None,
223
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
224
+ ) -> List[str]:
225
+ mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
226
+ handling_cls = cls._get_mfcontent_class(mfcontent_info)
227
+ return handling_cls.get_archive_content_names_impl(
228
+ mfcontent_info, archive, content_types, packaging_backend
229
+ )
230
+
231
+ @classmethod
232
+ def get_distribution_finder(
233
+ cls,
234
+ ) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
235
+ """
236
+ Get the distribution finder for the Metaflow code package (if applicable).
237
+
238
+ Some packages will include distribution information to "pretend" that some packages
239
+ are actually distributions even if we just include them in the code package.
240
+
241
+ Returns
242
+ -------
243
+ Optional["metaflow.extension_support.metadata.DistributionFinder"]
244
+ The distribution finder for the Metaflow code package -- None if there is no
245
+ such finder.
246
+ """
247
+ mfcontent_info = cls._extract_mfcontent_info()
248
+ handling_cls = cls._get_mfcontent_class(mfcontent_info)
249
+ return handling_cls.get_distribution_finder_impl(mfcontent_info)
250
+
251
+ @classmethod
252
+ def get_post_extract_env_vars(
253
+ cls, version_id: int, dest_dir: str = "."
254
+ ) -> Dict[str, str]:
255
+ """
256
+ Get the post-extract environment variables that are needed to access the content
257
+ that has been extracted into dest_dir.
258
+
259
+ This will typically involve setting PYTHONPATH.
260
+
261
+ Parameters
262
+ ----------
263
+ version_id: int
264
+ The version of MetaflowCodeContent for this package.
265
+ dest_dir: str, default "."
266
+ The directory where the content has been extracted to.
267
+
268
+ Returns
269
+ -------
270
+ Dict[str, str]
271
+ The post-extract environment variables that are needed to access the content
272
+ that has been extracted into extracted_dir.
273
+ """
274
+ if version_id not in cls._mappings:
275
+ raise ValueError(
276
+ "Invalid package -- unknown version %s in info: %s"
277
+ % (version_id, cls._mappings)
278
+ )
279
+ return cls._mappings[version_id].get_post_extract_env_vars_impl(dest_dir)
280
+
281
+ # Implement the _impl methods in the base subclass (in this file). These need to
282
+ # happen with as few imports as possible to prevent circular dependencies.
283
+ @classmethod
284
+ def get_info_impl(
285
+ cls, mfcontent_info: Optional[Dict[str, Any]]
286
+ ) -> Optional[Dict[str, Any]]:
287
+ raise NotImplementedError("get_info_impl not implemented")
288
+
289
+ @classmethod
290
+ def get_config_impl(
291
+ cls, mfcontent_info: Optional[Dict[str, Any]]
292
+ ) -> Optional[Dict[str, Any]]:
293
+ raise NotImplementedError("get_config_impl not implemented")
294
+
295
+ @classmethod
296
+ def get_filename_impl(
297
+ cls,
298
+ mfcontent_info: Optional[Dict[str, Any]],
299
+ filename: str,
300
+ content_type: ContentType,
301
+ ) -> Optional[str]:
302
+ raise NotImplementedError("get_filename_impl not implemented")
303
+
304
+ @classmethod
305
+ def get_distribution_finder_impl(
306
+ cls, mfcontent_info: Optional[Dict[str, Any]]
307
+ ) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
308
+ raise NotImplementedError("get_distribution_finder_impl not implemented")
309
+
310
+ @classmethod
311
+ def get_archive_info_impl(
312
+ cls,
313
+ mfcontent_info: Optional[Dict[str, Any]],
314
+ archive: Any,
315
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
316
+ ) -> Optional[Dict[str, Any]]:
317
+ raise NotImplementedError("get_archive_info_impl not implemented")
318
+
319
+ @classmethod
320
+ def get_archive_config_impl(
321
+ cls,
322
+ mfcontent_info: Optional[Dict[str, Any]],
323
+ archive: Any,
324
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
325
+ ) -> Optional[Dict[str, Any]]:
326
+ raise NotImplementedError("get_archive_config_impl not implemented")
327
+
328
+ @classmethod
329
+ def get_archive_filename_impl(
330
+ cls,
331
+ mfcontent_info: Optional[Dict[str, Any]],
332
+ archive: Any,
333
+ filename: str,
334
+ content_type: ContentType,
335
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
336
+ ) -> Optional[str]:
337
+ raise NotImplementedError("get_archive_filename_impl not implemented")
338
+
339
+ @classmethod
340
+ def get_archive_content_names_impl(
341
+ cls,
342
+ mfcontent_info: Optional[Dict[str, Any]],
343
+ archive: Any,
344
+ content_types: Optional[int] = None,
345
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
346
+ ) -> List[str]:
347
+ raise NotImplementedError("get_archive_content_names_impl not implemented")
348
+
349
+ @classmethod
350
+ def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
351
+ raise NotImplementedError("get_post_extract_env_vars_impl not implemented")
352
+
353
+ def __init_subclass__(cls, version_id, **kwargs) -> None:
354
+ super().__init_subclass__(**kwargs)
355
+ if version_id in MetaflowCodeContent._mappings:
356
+ raise ValueError(
357
+ "Version ID %s already exists in MetaflowCodeContent mappings "
358
+ "-- this is a bug in Metaflow." % str(version_id)
359
+ )
360
+ MetaflowCodeContent._mappings[version_id] = cls
361
+ cls._version_id = version_id
362
+
363
+ # Implement these methods in sub-classes of the base sub-classes. These methods
364
+ # are called later and can have more dependencies and so can live in other files.
365
+ def get_excluded_tl_entries(self) -> List[str]:
366
+ """
367
+ When packaging Metaflow from within an executing Metaflow flow, we need to
368
+ exclude the files that are inserted by this content from being packaged (possibly).
369
+
370
+ Use this function to return these files or top-level directories.
371
+
372
+ Returns
373
+ -------
374
+ List[str]
375
+ Files or directories to exclude
376
+ """
377
+ return []
378
+
379
+ def content_names(
380
+ self, content_types: Optional[int] = None
381
+ ) -> Generator[Tuple[str, str], None, None]:
382
+ """
383
+ Detailed list of the content of this MetaflowCodeContent. This will list all files
384
+ (or non files -- for the INFO or CONFIG data for example) present in the archive.
385
+
386
+ Parameters
387
+ ----------
388
+ content_types : Optional[int]
389
+ The type of content to get the names of. If None, all content is returned.
390
+
391
+ Yields
392
+ ------
393
+ Generator[Tuple[str, str], None, None]
394
+ Path on the filesystem and the name in the archive
395
+ """
396
+ raise NotImplementedError("content_names not implemented")
397
+
398
+ def contents(
399
+ self, content_types: Optional[int] = None
400
+ ) -> Generator[Tuple[Union[bytes, str], str], None, None]:
401
+ """
402
+ Very similar to content_names but returns the content of the non-files
403
+ as well as bytes. For files, identical output as content_names
404
+
405
+ Parameters
406
+ ----------
407
+ content_types : Optional[int]
408
+ The type of content to get the content of. If None, all content is returned.
409
+
410
+ Yields
411
+ ------
412
+ Generator[Tuple[Union[str, bytes], str], None, None]
413
+ Content of the MF content
414
+ """
415
+ raise NotImplementedError("content not implemented")
416
+
417
+ def show(self) -> str:
418
+ """
419
+ Returns a more human-readable string representation of the content of this
420
+ MetaflowCodeContent. This will not, for example, list all files but summarize what
421
+ is included at a more high level.
422
+
423
+ Returns
424
+ -------
425
+ str
426
+ A human-readable string representation of the content of this MetaflowCodeContent
427
+ """
428
+ raise NotImplementedError("show not implemented")
429
+
430
+ def add_info(self, info: Dict[str, Any]) -> None:
431
+ """
432
+ Add the content of the INFO file to the Metaflow content
433
+
434
+ Parameters
435
+ ----------
436
+ info: Dict[str, Any]
437
+ The content of the INFO file
438
+ """
439
+ raise NotImplementedError("add_info not implemented")
440
+
441
+ def add_config(self, config: Dict[str, Any]) -> None:
442
+ """
443
+ Add the content of the CONFIG file to the Metaflow content
444
+
445
+ Parameters
446
+ ----------
447
+ config: Dict[str, Any]
448
+ The content of the CONFIG file
449
+ """
450
+ raise NotImplementedError("add_config not implemented")
451
+
452
+ def add_module(self, module_path: ModuleType) -> None:
453
+ """
454
+ Add a python module to the Metaflow content
455
+
456
+ Parameters
457
+ ----------
458
+ module_path: ModuleType
459
+ The module to add
460
+ """
461
+ raise NotImplementedError("add_module not implemented")
462
+
463
+ def add_code_file(self, file_path: str, file_name: str) -> None:
464
+ """
465
+ Add a code file to the Metaflow content
466
+
467
+ Parameters
468
+ ----------
469
+ file_path: str
470
+ The path to the code file to add (on the filesystem)
471
+ file_name: str
472
+ The path in the archive to add the code file to
473
+ """
474
+ raise NotImplementedError("add_code_file not implemented")
475
+
476
+ def add_other_file(self, file_path: str, file_name: str) -> None:
477
+ """
478
+ Add a non-python file to the Metaflow content
479
+
480
+ Parameters
481
+ ----------
482
+ file_path: str
483
+ The path to the file to add (on the filesystem)
484
+ file_name: str
485
+ The path in the archive to add the file to
486
+ """
487
+ raise NotImplementedError("add_other_file not implemented")
488
+
489
+ @classmethod
490
+ def _get_mfcontent_class(
491
+ cls, info: Optional[Dict[str, Any]]
492
+ ) -> Type["MetaflowCodeContent"]:
493
+ if info is None:
494
+ return MetaflowCodeContentV0
495
+ if "version" not in info:
496
+ raise ValueError("Invalid package -- missing version in info: %s" % info)
497
+ version = info["version"]
498
+ if version not in cls._mappings:
499
+ raise ValueError(
500
+ "Invalid package -- unknown version %s in info: %s" % (version, info)
501
+ )
502
+
503
+ return cls._mappings[version]
504
+
505
+ @classmethod
506
+ def _extract_archive_mfcontent_info(
507
+ cls,
508
+ archive: Any,
509
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
510
+ ) -> Optional[Dict[str, Any]]:
511
+ if id(archive) in cls._cached_mfcontent_info:
512
+ return cls._cached_mfcontent_info[id(archive)]
513
+
514
+ mfcontent_info = None # type: Optional[Dict[str, Any]]
515
+ # Here we need to extract the information from the archive
516
+ if packaging_backend.cls_has_member(archive, MFCONTENT_MARKER):
517
+ # The MFCONTENT_MARKER file is present in the archive
518
+ # We can extract the information from it
519
+ extracted_info = packaging_backend.cls_get_member(archive, MFCONTENT_MARKER)
520
+ if extracted_info:
521
+ mfcontent_info = json.loads(extracted_info)
522
+ cls._cached_mfcontent_info[id(archive)] = mfcontent_info
523
+ return mfcontent_info
524
+
525
+ @classmethod
526
+ def _extract_mfcontent_info(cls) -> Optional[Dict[str, Any]]:
527
+ if "_local" in cls._cached_mfcontent_info:
528
+ return cls._cached_mfcontent_info["_local"]
529
+
530
+ mfcontent_info = None # type: Optional[Dict[str, Any]]
531
+ if os.path.exists(os.path.join(get_metaflow_root(), MFCONTENT_MARKER)):
532
+ with open(
533
+ os.path.join(get_metaflow_root(), MFCONTENT_MARKER),
534
+ "r",
535
+ encoding="utf-8",
536
+ ) as f:
537
+ mfcontent_info = json.load(f)
538
+ cls._cached_mfcontent_info["_local"] = mfcontent_info
539
+ return mfcontent_info
540
+
541
+ def get_package_version(self) -> int:
542
+ """
543
+ Get the version of MetaflowCodeContent for this package.
544
+ """
545
+ # _version_id is set in __init_subclass__ when the subclass is created
546
+ return self._version_id
547
+
548
+
549
+ class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
550
+ @classmethod
551
+ def get_info_impl(
552
+ cls, mfcontent_info: Optional[Dict[str, Any]]
553
+ ) -> Optional[Dict[str, Any]]:
554
+ path_to_file = os.path.join(get_metaflow_root(), "INFO")
555
+ if os.path.isfile(path_to_file):
556
+ with open(path_to_file, "r", encoding="utf-8") as f:
557
+ return json.load(f)
558
+ return None
559
+
560
+ @classmethod
561
+ def get_config_impl(
562
+ cls, mfcontent_info: Optional[Dict[str, Any]]
563
+ ) -> Optional[Dict[str, Any]]:
564
+ path_to_file = os.path.join(get_metaflow_root(), "CONFIG")
565
+ if os.path.isfile(path_to_file):
566
+ with open(path_to_file, "r", encoding="utf-8") as f:
567
+ return json.load(f)
568
+ return None
569
+
570
+ @classmethod
571
+ def get_filename_impl(
572
+ cls,
573
+ mfcontent_info: Optional[Dict[str, Any]],
574
+ filename: str,
575
+ content_type: ContentType,
576
+ ) -> Optional[str]:
577
+ """
578
+ For V0, the filename is simply the filename passed in.
579
+ """
580
+ path_to_file = os.path.join(get_metaflow_root(), filename)
581
+ if os.path.isfile(path_to_file):
582
+ return path_to_file
583
+ return None
584
+
585
+ @classmethod
586
+ def get_distribution_finder_impl(
587
+ cls, mfcontent_info: Optional[Dict[str, Any]]
588
+ ) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
589
+ return None
590
+
591
+ @classmethod
592
+ def get_archive_info_impl(
593
+ cls,
594
+ mfcontent_info: Optional[Dict[str, Any]],
595
+ archive: Any,
596
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
597
+ ) -> Optional[Dict[str, Any]]:
598
+ info_content = packaging_backend.cls_get_member(archive, "INFO")
599
+ if info_content:
600
+ return json.loads(info_content)
601
+ return None
602
+
603
+ @classmethod
604
+ def get_archive_config_impl(
605
+ cls,
606
+ mfcontent_info: Optional[Dict[str, Any]],
607
+ archive: Any,
608
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
609
+ ) -> Optional[Dict[str, Any]]:
610
+ info_content = packaging_backend.cls_get_member(archive, "CONFIG")
611
+ if info_content:
612
+ return json.loads(info_content)
613
+ return None
614
+
615
+ @classmethod
616
+ def get_archive_filename_impl(
617
+ cls,
618
+ mfcontent_info: Optional[Dict[str, Any]],
619
+ archive: Any,
620
+ filename: str,
621
+ content_type: ContentType,
622
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
623
+ ) -> str:
624
+ if packaging_backend.cls_has_member(archive, filename):
625
+ # The file is present in the archive
626
+ return filename
627
+ return None
628
+
629
+ @classmethod
630
+ def get_archive_content_names_impl(
631
+ cls,
632
+ mfcontent_info: Optional[Dict[str, Any]],
633
+ archive: Any,
634
+ content_types: Optional[int] = None,
635
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
636
+ ) -> List[str]:
637
+ """
638
+ For V0, we use a static list of known files to classify the content
639
+ """
640
+ known_prefixes = {
641
+ "metaflow/": ContentType.CODE_CONTENT.value,
642
+ "metaflow_extensions/": ContentType.CODE_CONTENT.value,
643
+ "INFO": ContentType.OTHER_CONTENT.value,
644
+ "CONFIG": ContentType.OTHER_CONTENT.value,
645
+ "conda.manifest": ContentType.OTHER_CONTENT.value,
646
+ "uv.lock": ContentType.OTHER_CONTENT.value,
647
+ "pyproject.toml": ContentType.OTHER_CONTENT.value,
648
+ # Used in nflx-metaflow-extensions
649
+ "condav2-1.cnd": ContentType.OTHER_CONTENT.value,
650
+ }
651
+ to_return = []
652
+ for filename in packaging_backend.cls_list_members(archive):
653
+ for prefix, classification in known_prefixes.items():
654
+ if (
655
+ prefix[-1] == "/" and filename.startswith(prefix)
656
+ ) or prefix == filename:
657
+ if content_types & classification:
658
+ to_return.append(filename)
659
+ elif content_types & ContentType.USER_CONTENT.value:
660
+ # Everything else is user content
661
+ to_return.append(filename)
662
+ return to_return
663
+
664
+ @classmethod
665
+ def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
666
+ return {"PYTHONPATH": dest_dir}
667
+
668
+ def get_excluded_tl_entries(self) -> List[str]:
669
+ """
670
+ When packaging Metaflow from within an executing Metaflow flow, we need to
671
+ exclude the files that are inserted by this content from being packaged (possibly).
672
+
673
+ Use this function to return these files or top-level directories.
674
+
675
+ Returns
676
+ -------
677
+ List[str]
678
+ Files or directories to exclude
679
+ """
680
+ return ["CONFIG", "INFO"]
681
+
682
+ # Other non-implemented methods are OK not being implemented as they will never
683
+ # be called as they are only used when creating the package and we are starting
684
+ # with V1.
685
+
686
+
687
+ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
688
+ _code_dir = ".mf_code"
689
+ _other_dir = ".mf_meta"
690
+ _info_file = "INFO"
691
+ _config_file = "CONFIG"
692
+ _dist_info_file = "DIST_INFO"
693
+
694
+ def __init_subclass__(cls, **kwargs) -> None:
695
+ # Important to add this here to prevent the subclass of MetaflowCodeContentV1Base from
696
+ # also calling __init_subclass__ in MetaflowCodeContent (which would create a problem)
697
+ return None
698
+
699
+ def __init__(self, code_dir: str, other_dir: str) -> None:
700
+ self._code_dir = code_dir
701
+ self._other_dir = other_dir
702
+
703
+ @classmethod
704
+ def _get_otherfile_path(
705
+ cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
706
+ ) -> str:
707
+ if in_archive:
708
+ return filename
709
+ return os.path.join(get_metaflow_root(), "..", cls._other_dir, filename)
710
+
711
+ @classmethod
712
+ def _get_codefile_path(
713
+ cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
714
+ ) -> str:
715
+ if in_archive:
716
+ return filename
717
+ return os.path.join(get_metaflow_root(), filename)
718
+
719
+ @classmethod
720
+ def get_info_impl(
721
+ cls, mfcontent_info: Optional[Dict[str, Any]]
722
+ ) -> Optional[Dict[str, Any]]:
723
+ path_to_file = cls._get_otherfile_path(
724
+ mfcontent_info, cls._info_file, in_archive=False
725
+ )
726
+ if os.path.isfile(path_to_file):
727
+ with open(path_to_file, "r", encoding="utf-8") as f:
728
+ return json.load(f)
729
+ return None
730
+
731
+ @classmethod
732
+ def get_config_impl(
733
+ cls, mfcontent_info: Optional[Dict[str, Any]]
734
+ ) -> Optional[Dict[str, Any]]:
735
+ path_to_file = cls._get_otherfile_path(
736
+ mfcontent_info, cls._config_file, in_archive=False
737
+ )
738
+ if os.path.isfile(path_to_file):
739
+ with open(path_to_file, "r", encoding="utf-8") as f:
740
+ return json.load(f)
741
+ return None
742
+
743
+ @classmethod
744
+ def get_filename_impl(
745
+ cls,
746
+ mfcontent_info: Optional[Dict[str, Any]],
747
+ filename: str,
748
+ content_type: ContentType,
749
+ ) -> Optional[str]:
750
+ if content_type == ContentType.CODE_CONTENT:
751
+ path_to_file = cls._get_codefile_path(
752
+ mfcontent_info, filename, in_archive=False
753
+ )
754
+ elif content_type in (ContentType.OTHER_CONTENT, ContentType.MODULE_CONTENT):
755
+ path_to_file = cls._get_otherfile_path(
756
+ mfcontent_info, filename, in_archive=False
757
+ )
758
+ else:
759
+ raise ValueError(
760
+ f"Invalid content type {content_type} for filename {filename}"
761
+ )
762
+ if os.path.isfile(path_to_file):
763
+ return path_to_file
764
+ return None
765
+
766
+ @classmethod
767
+ def get_distribution_finder_impl(
768
+ cls, mfcontent_info: Optional[Dict[str, Any]]
769
+ ) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
770
+ path_to_file = cls._get_otherfile_path(
771
+ mfcontent_info, cls._dist_info_file, in_archive=False
772
+ )
773
+ if os.path.isfile(path_to_file):
774
+ with open(path_to_file, "r", encoding="utf-8") as f:
775
+ return PackagedDistributionFinder(json.load(f))
776
+ return None
777
+
778
+ @classmethod
779
+ def get_archive_info_impl(
780
+ cls,
781
+ mfcontent_info: Optional[Dict[str, Any]],
782
+ archive: Any,
783
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
784
+ ) -> Optional[Dict[str, Any]]:
785
+ info_file = packaging_backend.cls_get_member(
786
+ archive,
787
+ cls._get_otherfile_path(mfcontent_info, cls._info_file, in_archive=True),
788
+ )
789
+ if info_file:
790
+ return json.loads(info_file)
791
+ return None
792
+
793
+ @classmethod
794
+ def get_archive_config_impl(
795
+ cls,
796
+ mfcontent_info: Optional[Dict[str, Any]],
797
+ archive: Any,
798
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
799
+ ) -> Optional[Dict[str, Any]]:
800
+ config_file = packaging_backend.cls_get_member(
801
+ archive,
802
+ cls._get_otherfile_path(mfcontent_info, cls._config_file, in_archive=True),
803
+ )
804
+ if config_file:
805
+ return json.loads(config_file)
806
+ return None
807
+
808
+ @classmethod
809
+ def get_archive_filename_impl(
810
+ cls,
811
+ mfcontent_info: Optional[Dict[str, Any]],
812
+ archive: Any,
813
+ filename: str,
814
+ content_type: ContentType,
815
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
816
+ ) -> str:
817
+ if content_type == ContentType.CODE_CONTENT:
818
+ path_to_file = cls._get_codefile_path(
819
+ mfcontent_info, filename, in_archive=False
820
+ )
821
+ elif content_type in (ContentType.OTHER_CONTENT, ContentType.MODULE_CONTENT):
822
+ path_to_file = cls._get_otherfile_path(
823
+ mfcontent_info, filename, in_archive=False
824
+ )
825
+ else:
826
+ raise ValueError(
827
+ f"Invalid content type {content_type} for filename {filename}"
828
+ )
829
+ if packaging_backend.cls_has_member(archive, path_to_file):
830
+ # The file is present in the archive
831
+ return path_to_file
832
+ return None
833
+
834
+ @classmethod
835
+ def get_archive_content_names_impl(
836
+ cls,
837
+ mfcontent_info: Optional[Dict[str, Any]],
838
+ archive: Any,
839
+ content_types: Optional[int] = None,
840
+ packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
841
+ ) -> List[str]:
842
+ to_return = []
843
+ module_content = set(mfcontent_info.get("module_files", []))
844
+ for filename in packaging_backend.cls_list_members(archive):
845
+ if filename.startswith(cls._other_dir) and (
846
+ content_types & ContentType.OTHER_CONTENT.value
847
+ ):
848
+ to_return.append(filename)
849
+ elif filename.startswith(cls._code_dir):
850
+ # Special case for marker which is a other content even if in code.
851
+ if filename == f"{cls._code_dir}/{MFCONTENT_MARKER}":
852
+ if content_types & ContentType.OTHER_CONTENT.value:
853
+ to_return.append(filename)
854
+ else:
855
+ continue
856
+ # Here it is either module or code
857
+ if os.path.join(cls._code_dir, filename) in module_content:
858
+ if content_types & ContentType.MODULE_CONTENT.value:
859
+ to_return.append(filename)
860
+ elif content_types & ContentType.CODE_CONTENT.value:
861
+ to_return.append(filename)
862
+ else:
863
+ if content_types & ContentType.USER_CONTENT.value:
864
+ # Everything else is user content
865
+ to_return.append(filename)
866
+ return to_return
867
+
868
+ @classmethod
869
+ def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
870
+ return {"PYTHONPATH": f"{dest_dir}/{cls._code_dir}"}