metaflow 2.15.20__py2.py3-none-any.whl → 2.16.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. metaflow/__init__.py +7 -1
  2. metaflow/cli.py +16 -1
  3. metaflow/cli_components/init_cmd.py +1 -0
  4. metaflow/cli_components/run_cmds.py +6 -2
  5. metaflow/client/core.py +22 -30
  6. metaflow/datastore/task_datastore.py +0 -1
  7. metaflow/debug.py +5 -0
  8. metaflow/decorators.py +230 -70
  9. metaflow/extension_support/__init__.py +15 -8
  10. metaflow/extension_support/_empty_file.py +2 -2
  11. metaflow/flowspec.py +80 -53
  12. metaflow/graph.py +24 -2
  13. metaflow/meta_files.py +13 -0
  14. metaflow/metadata_provider/metadata.py +7 -1
  15. metaflow/metaflow_config.py +5 -0
  16. metaflow/metaflow_environment.py +82 -25
  17. metaflow/metaflow_version.py +1 -1
  18. metaflow/package/__init__.py +664 -0
  19. metaflow/packaging_sys/__init__.py +870 -0
  20. metaflow/packaging_sys/backend.py +113 -0
  21. metaflow/packaging_sys/distribution_support.py +153 -0
  22. metaflow/packaging_sys/tar_backend.py +86 -0
  23. metaflow/packaging_sys/utils.py +91 -0
  24. metaflow/packaging_sys/v1.py +476 -0
  25. metaflow/plugins/airflow/airflow.py +5 -1
  26. metaflow/plugins/airflow/airflow_cli.py +15 -4
  27. metaflow/plugins/argo/argo_workflows.py +23 -17
  28. metaflow/plugins/argo/argo_workflows_cli.py +16 -4
  29. metaflow/plugins/aws/batch/batch.py +22 -3
  30. metaflow/plugins/aws/batch/batch_cli.py +3 -0
  31. metaflow/plugins/aws/batch/batch_decorator.py +13 -5
  32. metaflow/plugins/aws/step_functions/step_functions.py +4 -1
  33. metaflow/plugins/aws/step_functions/step_functions_cli.py +15 -4
  34. metaflow/plugins/cards/card_decorator.py +0 -5
  35. metaflow/plugins/kubernetes/kubernetes.py +8 -1
  36. metaflow/plugins/kubernetes/kubernetes_cli.py +3 -0
  37. metaflow/plugins/kubernetes/kubernetes_decorator.py +13 -5
  38. metaflow/plugins/package_cli.py +25 -23
  39. metaflow/plugins/parallel_decorator.py +4 -2
  40. metaflow/plugins/pypi/bootstrap.py +8 -2
  41. metaflow/plugins/pypi/conda_decorator.py +39 -82
  42. metaflow/plugins/pypi/conda_environment.py +6 -2
  43. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  44. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  45. metaflow/plugins/timeout_decorator.py +0 -1
  46. metaflow/plugins/uv/bootstrap.py +11 -0
  47. metaflow/plugins/uv/uv_environment.py +4 -2
  48. metaflow/pylint_wrapper.py +5 -1
  49. metaflow/runner/click_api.py +5 -4
  50. metaflow/runner/subprocess_manager.py +14 -2
  51. metaflow/runtime.py +37 -11
  52. metaflow/task.py +91 -7
  53. metaflow/user_configs/config_options.py +13 -8
  54. metaflow/user_configs/config_parameters.py +0 -4
  55. metaflow/user_decorators/__init__.py +0 -0
  56. metaflow/user_decorators/common.py +144 -0
  57. metaflow/user_decorators/mutable_flow.py +499 -0
  58. metaflow/user_decorators/mutable_step.py +424 -0
  59. metaflow/user_decorators/user_flow_decorator.py +263 -0
  60. metaflow/user_decorators/user_step_decorator.py +712 -0
  61. metaflow/util.py +4 -1
  62. metaflow/version.py +1 -1
  63. {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/METADATA +2 -2
  64. {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/RECORD +71 -60
  65. metaflow/info_file.py +0 -25
  66. metaflow/package.py +0 -203
  67. metaflow/user_configs/config_decorators.py +0 -568
  68. {metaflow-2.15.20.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Makefile +0 -0
  69. {metaflow-2.15.20.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  70. {metaflow-2.15.20.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  71. {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/WHEEL +0 -0
  72. {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/entry_points.txt +0 -0
  73. {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/licenses/LICENSE +0 -0
  74. {metaflow-2.15.20.dist-info → metaflow-2.16.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,476 @@
1
+ import json
2
+ import os
3
+ import sys
4
+ from pathlib import Path
5
+ from types import ModuleType
6
+ from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Union
7
+
8
+ from ..debug import debug
9
+ from ..extension_support import (
10
+ EXT_EXCLUDE_SUFFIXES,
11
+ extension_info,
12
+ package_mfext_all,
13
+ package_mfext_all_descriptions,
14
+ )
15
+ from ..exception import MetaflowException
16
+ from ..metaflow_version import get_version
17
+ from ..user_decorators.user_flow_decorator import FlowMutatorMeta
18
+ from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
19
+ from ..util import get_metaflow_root
20
+ from . import ContentType, MFCONTENT_MARKER, MetaflowCodeContentV1Base
21
+ from .distribution_support import _ModuleInfo, modules_to_distributions
22
+ from .utils import suffix_filter, walk
23
+
24
+
25
+ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
26
+ METAFLOW_SUFFIXES_LIST = [".py", ".html", ".css", ".js"]
27
+
28
+ def __init__(
29
+ self,
30
+ code_dir: str = MetaflowCodeContentV1Base._code_dir,
31
+ other_dir: str = MetaflowCodeContentV1Base._other_dir,
32
+ criteria: Callable[[ModuleType], bool] = lambda x: True,
33
+ ):
34
+ super().__init__(code_dir, other_dir)
35
+
36
+ self._metaflow_root = get_metaflow_root()
37
+ self._metaflow_version = get_version()
38
+
39
+ self._criteria = criteria
40
+
41
+ # We try to find the modules we need to package. We will first look at all modules
42
+ # and apply the criteria to them. Then we will use the most parent module that
43
+ # fits the criteria as the module to package
44
+ modules = filter(lambda x: criteria(x[1]), sys.modules.items())
45
+ # Ensure that we see the parent modules first
46
+ modules = sorted(modules, key=lambda x: x[0])
47
+ if modules:
48
+ last_prefix = modules[0][0]
49
+ new_modules = [modules[0]]
50
+ for name, mod in modules[1:]:
51
+ if name.startswith(last_prefix + "."):
52
+ # This is a submodule of the last module, we can skip it
53
+ continue
54
+ # Otherwise, we have a new top-level module
55
+ last_prefix = name
56
+ new_modules.append((name, mod))
57
+ else:
58
+ new_modules = []
59
+
60
+ self._modules = {
61
+ name: _ModuleInfo(
62
+ name,
63
+ set(
64
+ Path(p).resolve().as_posix()
65
+ for p in getattr(mod, "__path__", [mod.__file__])
66
+ ),
67
+ mod,
68
+ True, # This is a Metaflow module (see filter below)
69
+ )
70
+ for (name, mod) in new_modules
71
+ }
72
+
73
+ # Filter the modules
74
+ self._modules = {
75
+ name: info for name, info in self._modules.items() if criteria(info.module)
76
+ }
77
+
78
+ # Contain metadata information regarding the distributions packaged.
79
+ # This allows Metaflow to "fake" distribution information when packaged
80
+ self._distmetainfo = {} # type: Dict[str, Dict[str, str]]
81
+
82
+ # Maps an absolute path on the filesystem to the path of the file in the
83
+ # archive.
84
+ self._files = {} # type: Dict[str, str]
85
+ self._files_from_modules = {} # type: Dict[str, str]
86
+
87
+ self._other_files = {} # type: Dict[str, str]
88
+ self._other_content = {} # type: Dict[str, bytes]
89
+
90
+ debug.package_exec(f"Used system modules found: {str(self._modules)}")
91
+
92
+ # Populate with files from the third party modules
93
+ for k, v in self._modules.items():
94
+ self._files_from_modules.update(self._module_files(k, v.root_paths))
95
+
96
+ # Figure out the files to package for Metaflow and extensions
97
+ self._cached_metaflow_files = list(self._metaflow_distribution_files())
98
+ self._cached_metaflow_files.extend(list(self._metaflow_extension_files()))
99
+
100
+ def create_mfcontent_info(self) -> Dict[str, Any]:
101
+ return {"version": 1, "module_files": list(self._files_from_modules.values())}
102
+
103
+ def get_excluded_tl_entries(self) -> List[str]:
104
+ """
105
+ When packaging Metaflow from within an executing Metaflow flow, we need to
106
+ exclude the files that are inserted by this content from being packaged (possibly).
107
+
108
+ Use this function to return these files or top-level directories.
109
+
110
+ Returns
111
+ -------
112
+ List[str]
113
+ Files or directories to exclude
114
+ """
115
+ return [self._code_dir, self._other_dir]
116
+
117
+ def content_names(
118
+ self, content_types: Optional[int] = None
119
+ ) -> Generator[Tuple[str, str], None, None]:
120
+ """
121
+ Detailed list of the content of this MetaflowCodeContent. This will list all files
122
+ (or non files -- for the INFO or CONFIG data for example) present in the archive.
123
+
124
+ Parameters
125
+ ----------
126
+ content_types : Optional[int]
127
+ The type of content to get the names of. If None, all content is returned.
128
+
129
+ Yields
130
+ ------
131
+ Generator[Tuple[str, str], None, None]
132
+ Path on the filesystem and the name in the archive
133
+ """
134
+ yield from self._content(content_types, generate_value=False)
135
+
136
+ def contents(
137
+ self, content_types: Optional[int] = None
138
+ ) -> Generator[Tuple[Union[bytes, str], str], None, None]:
139
+ """
140
+ Very similar to content_names but returns the content of the non-files
141
+ as well as bytes. For files, identical output as content_names
142
+
143
+ Parameters
144
+ ----------
145
+ content_types : Optional[int]
146
+ The type of content to get the content of. If None, all content is returned.
147
+
148
+ Yields
149
+ ------
150
+ Generator[Tuple[Union[str, bytes], str], None, None]
151
+ Content of the MF content
152
+ """
153
+ yield from self._content(content_types, generate_value=True)
154
+
155
+ def show(self) -> str:
156
+ """
157
+ Returns a more human-readable string representation of the content of this
158
+ MetaflowCodeContent. This will not, for example, list all files but summarize what
159
+ is included at a more high level.
160
+
161
+ Returns
162
+ -------
163
+ str
164
+ A human-readable string representation of the content of this MetaflowCodeContent
165
+ """
166
+ all_user_step_decorators = {}
167
+ for k, v in UserStepDecoratorMeta.all_decorators().items():
168
+ all_user_step_decorators.setdefault(
169
+ getattr(v, "_original_module", v.__module__), []
170
+ ).append(k)
171
+
172
+ all_user_flow_decorators = {}
173
+ for k, v in FlowMutatorMeta.all_decorators().items():
174
+ all_user_flow_decorators.setdefault(
175
+ getattr(v, "_original_module", v.__module__), []
176
+ ).append(k)
177
+
178
+ result = []
179
+ if self._metaflow_version:
180
+ result.append(f"\nMetaflow version: {self._metaflow_version}")
181
+ ext_info = extension_info()
182
+ if ext_info["installed"]:
183
+ result.append("\nMetaflow extensions packaged:")
184
+ for ext_name, ext_info in ext_info["installed"].items():
185
+ result.append(
186
+ f" - {ext_name} ({ext_info['extension_name']}) @ {ext_info['dist_version']}"
187
+ )
188
+
189
+ if self._modules:
190
+ mf_modules = []
191
+ other_modules = []
192
+ for name, info in self._modules.items():
193
+ if info.metaflow_module:
194
+ mf_modules.append(f" - {name} @ {', '.join(info.root_paths)}")
195
+ module_user_step_decorators = [
196
+ ", ".join(v)
197
+ for k, v in all_user_step_decorators.items()
198
+ if k == info.name or k.startswith(info.name + ".")
199
+ ]
200
+ module_user_flow_decorators = [
201
+ ", ".join(v)
202
+ for k, v in all_user_flow_decorators.items()
203
+ if k == info.name or k.startswith(info.name + ".")
204
+ ]
205
+ if module_user_step_decorators:
206
+ mf_modules.append(
207
+ f" - Provides step decorators: {', '.join(module_user_step_decorators)}"
208
+ )
209
+ if module_user_flow_decorators:
210
+ mf_modules.append(
211
+ f" - Provides flow mutators: {', '.join(module_user_flow_decorators)}"
212
+ )
213
+ else:
214
+ other_modules.append(f" - {name} @ {', '.join(info.root_paths)}")
215
+ if mf_modules:
216
+ result.append("\nMetaflow modules:")
217
+ result.extend(mf_modules)
218
+ if other_modules:
219
+ result.append("\nNon-Metaflow packaged modules:")
220
+ result.extend(other_modules)
221
+
222
+ return "\n".join(result)
223
+
224
+ def add_info(self, info: Dict[str, Any]) -> None:
225
+ """
226
+ Add the content of the INFO file to the Metaflow content
227
+
228
+ Parameters
229
+ ----------
230
+ info: Dict[str, Any]
231
+ The content of the INFO file
232
+ """
233
+ info_file_path = os.path.join(self._other_dir, self._info_file)
234
+ if info_file_path in self._other_content:
235
+ raise MetaflowException("INFO file already present in the MF environment")
236
+ self._other_content[info_file_path] = json.dumps(info).encode("utf-8")
237
+
238
+ def add_config(self, config: Dict[str, Any]) -> None:
239
+ """
240
+ Add the content of the CONFIG file to the Metaflow content
241
+
242
+ Parameters
243
+ ----------
244
+ config: Dict[str, Any]
245
+ The content of the CONFIG file
246
+ """
247
+ config_file_path = os.path.join(self._other_dir, self._config_file)
248
+ if config_file_path in self._other_content:
249
+ raise MetaflowException("CONFIG file already present in the MF environment")
250
+ self._other_content[config_file_path] = json.dumps(config).encode("utf-8")
251
+
252
+ def add_module(self, module: ModuleType) -> None:
253
+ """
254
+ Add a python module to the Metaflow content
255
+
256
+ Parameters
257
+ ----------
258
+ module_path: ModuleType
259
+ The module to add
260
+ """
261
+ name = module.__name__
262
+ debug.package_exec(f"Adding module {name} to the MF content")
263
+ # If the module is a single file, we handle this here by looking at __file__
264
+ # which will point to the single file. If it is an actual module, __path__
265
+ # will contain the path(s) to the module
266
+ self._modules[name] = _ModuleInfo(
267
+ name,
268
+ set(
269
+ Path(p).resolve().as_posix()
270
+ for p in getattr(module, "__path__", [module.__file__])
271
+ ),
272
+ module,
273
+ False, # This is not a Metaflow module (added by the user manually)
274
+ )
275
+ self._files_from_modules.update(
276
+ self._module_files(name, self._modules[name].root_paths)
277
+ )
278
+
279
+ def add_code_file(self, file_path: str, file_name: str) -> None:
280
+ """
281
+ Add a code file to the Metaflow content
282
+
283
+ Parameters
284
+ ----------
285
+ file_path: str
286
+ The path to the code file to add (on the filesystem)
287
+ file_name: str
288
+ The path in the archive to add the code file to
289
+ """
290
+ file_path = os.path.realpath(file_path)
291
+ debug.package_exec(
292
+ f"Adding code file {file_path} as {file_name} to the MF content"
293
+ )
294
+
295
+ if file_path in self._files and self._files[file_path] != os.path.join(
296
+ self._code_dir, file_name.lstrip("/")
297
+ ):
298
+ raise MetaflowException(
299
+ "File '%s' is already present in the MF content with a different name: '%s'"
300
+ % (file_path, self._files[file_path])
301
+ )
302
+ self._files[file_path] = os.path.join(self._code_dir, file_name.lstrip("/"))
303
+
304
+ def add_other_file(self, file_path: str, file_name: str) -> None:
305
+ """
306
+ Add a non-python file to the Metaflow content
307
+
308
+ Parameters
309
+ ----------
310
+ file_path: str
311
+ The path to the file to add (on the filesystem)
312
+ file_name: str
313
+ The path in the archive to add the file to
314
+ """
315
+ file_path = os.path.realpath(file_path)
316
+ debug.package_exec(
317
+ f"Adding other file {file_path} as {file_name} to the MF content"
318
+ )
319
+ if file_path in self._other_files and self._other_files[
320
+ file_path
321
+ ] != os.path.join(self._other_dir, file_name.lstrip("/")):
322
+ raise MetaflowException(
323
+ "File %s is already present in the MF content with a different name: %s"
324
+ % (file_path, self._other_files[file_path])
325
+ )
326
+ self._other_files[file_path] = os.path.join(
327
+ self._other_dir, file_name.lstrip("/")
328
+ )
329
+
330
+ def _content(
331
+ self, content_types: Optional[int] = None, generate_value: bool = False
332
+ ) -> Generator[Tuple[Union[str, bytes], str], None, None]:
333
+ from ..package import MetaflowPackage # Prevent circular dependency
334
+
335
+ if content_types is None:
336
+ content_types = ContentType.ALL_CONTENT.value
337
+
338
+ if content_types & ContentType.CODE_CONTENT.value:
339
+ yield from self._cached_metaflow_files
340
+ yield from self._files.items()
341
+ if content_types & ContentType.MODULE_CONTENT.value:
342
+ yield from self._files_from_modules.items()
343
+ if content_types & ContentType.OTHER_CONTENT.value:
344
+ yield from self._other_files.items()
345
+ if generate_value:
346
+ for k, v in self._other_content.items():
347
+ yield v, k
348
+ # Include the distribution file too
349
+ yield json.dumps(self._distmetainfo).encode("utf-8"), os.path.join(
350
+ self._other_dir, self._dist_info_file
351
+ )
352
+ yield json.dumps(self.create_mfcontent_info()).encode(
353
+ "utf-8"
354
+ ), os.path.join(self._code_dir, MFCONTENT_MARKER)
355
+ else:
356
+ for k in self._other_content.keys():
357
+ yield "<generated %s content>" % (os.path.basename(k)), k
358
+ yield "<generated %s content>" % (
359
+ os.path.basename(self._dist_info_file)
360
+ ), os.path.join(self._other_dir, self._dist_info_file)
361
+ yield "<generated %s content>" % MFCONTENT_MARKER, os.path.join(
362
+ self._code_dir, MFCONTENT_MARKER
363
+ )
364
+
365
+ def _metaflow_distribution_files(self) -> Generator[Tuple[str, str], None, None]:
366
+ debug.package_exec("Including Metaflow from '%s'" % self._metaflow_root)
367
+ for path_tuple in walk(
368
+ os.path.join(self._metaflow_root, "metaflow"),
369
+ exclude_hidden=False,
370
+ file_filter=suffix_filter(self.METAFLOW_SUFFIXES_LIST),
371
+ ):
372
+ yield path_tuple[0], os.path.join(self._code_dir, path_tuple[1])
373
+
374
+ def _metaflow_extension_files(self) -> Generator[Tuple[str, str], None, None]:
375
+ # Metaflow extensions; for now, we package *all* extensions but this may change
376
+ # at a later date; it is possible to call `package_mfext_package` instead of
377
+ # `package_mfext_all` but in that case, make sure to also add a
378
+ # metaflow_extensions/__init__.py file to properly "close" the metaflow_extensions
379
+ # package and prevent other extensions from being loaded that may be
380
+ # present in the rest of the system
381
+ for path_tuple in package_mfext_all():
382
+ yield path_tuple[0], os.path.join(self._code_dir, path_tuple[1])
383
+ if debug.package:
384
+ ext_info = package_mfext_all_descriptions()
385
+ ext_info = {
386
+ k: {k1: v1 for k1, v1 in v.items() if k1 in ("root_paths",)}
387
+ for k, v in ext_info.items()
388
+ }
389
+ debug.package_exec(f"Metaflow extensions packaged: {ext_info}")
390
+
391
+ def _module_files(
392
+ self, name: str, paths: Set[str]
393
+ ) -> Generator[Tuple[str, str], None, None]:
394
+ debug.package_exec(
395
+ " Looking for distributions for module %s in %s" % (name, paths)
396
+ )
397
+ paths = set(paths) # Do not modify external paths
398
+ has_init = False
399
+ distributions = modules_to_distributions().get(name)
400
+ prefix_parts = tuple(name.split("."))
401
+
402
+ seen_distributions = set()
403
+ if distributions:
404
+ for dist in distributions:
405
+ dist_name = dist.metadata["Name"] # dist.name not always present
406
+ if dist_name in seen_distributions:
407
+ continue
408
+ # For some reason, sometimes the same distribution appears twice. We
409
+ # don't need to process twice.
410
+ seen_distributions.add(dist_name)
411
+ debug.package_exec(
412
+ " Including distribution '%s' for module '%s'"
413
+ % (dist_name, name)
414
+ )
415
+ dist_root = str(dist.locate_file(name))
416
+ if dist_root not in paths:
417
+ # This is an error because it means that this distribution is
418
+ # not contributing to the module.
419
+ raise RuntimeError(
420
+ "Distribution '%s' is not contributing to module '%s' as "
421
+ "expected (got '%s' when expected one of %s)"
422
+ % (dist.metadata["Name"], name, dist_root, paths)
423
+ )
424
+ paths.discard(dist_root)
425
+ if dist_name not in self._distmetainfo:
426
+ # Possible that a distribution contributes to multiple modules
427
+ self._distmetainfo[dist_name] = {
428
+ # We can add more if needed but these are likely the most
429
+ # useful (captures, name, version, etc and files which can
430
+ # be used to find non-python files in the distribution).
431
+ "METADATA": dist.read_text("METADATA") or "",
432
+ "RECORD": dist.read_text("RECORD") or "",
433
+ }
434
+ for file in dist.files or []:
435
+ # Skip files that do not belong to this module (distribution may
436
+ # provide multiple modules)
437
+ if file.parts[: len(prefix_parts)] != prefix_parts:
438
+ continue
439
+ if file.parts[len(prefix_parts)] == "__init__.py":
440
+ has_init = True
441
+ yield str(
442
+ dist.locate_file(file).resolve().as_posix()
443
+ ), os.path.join(self._code_dir, *prefix_parts, *file.parts[1:])
444
+
445
+ # Now if there are more paths left in paths, it means there is a non-distribution
446
+ # component to this package which we also include.
447
+ debug.package_exec(
448
+ " Looking for non-distribution files for module '%s' in %s"
449
+ % (name, paths)
450
+ )
451
+ for path in paths:
452
+ if not Path(path).is_dir():
453
+ # Single file for the module -- this will be something like <name>.py
454
+ yield path, os.path.join(
455
+ self._code_dir, *prefix_parts[:-1], f"{prefix_parts[-1]}.py"
456
+ )
457
+ has_init = True
458
+ else:
459
+ for root, _, files in os.walk(path):
460
+ for file in files:
461
+ if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES):
462
+ continue
463
+ rel_path = os.path.relpath(os.path.join(root, file), path)
464
+ if rel_path == "__init__.py":
465
+ has_init = True
466
+ yield os.path.join(root, file), os.path.join(
467
+ self._code_dir,
468
+ name,
469
+ rel_path,
470
+ )
471
+ # We now include an empty __init__.py file to close the module and prevent
472
+ # leaks from possible namespace packages
473
+ if not has_init:
474
+ yield os.path.join(
475
+ self._metaflow_root, "metaflow", "extension_support", "_empty_file.py"
476
+ ), os.path.join(self._code_dir, *prefix_parts, "__init__.py")
@@ -66,6 +66,7 @@ class Airflow(object):
66
66
  name,
67
67
  graph,
68
68
  flow,
69
+ code_package_metadata,
69
70
  code_package_sha,
70
71
  code_package_url,
71
72
  metadata,
@@ -87,6 +88,7 @@ class Airflow(object):
87
88
  self.name = name
88
89
  self.graph = graph
89
90
  self.flow = flow
91
+ self.code_package_metadata = code_package_metadata
90
92
  self.code_package_sha = code_package_sha
91
93
  self.code_package_url = code_package_url
92
94
  self.metadata = metadata
@@ -372,6 +374,7 @@ class Airflow(object):
372
374
  # Technically the "user" is the stakeholder but should these labels be present.
373
375
  }
374
376
  additional_mf_variables = {
377
+ "METAFLOW_CODE_METADATA": self.code_package_metadata,
375
378
  "METAFLOW_CODE_SHA": self.code_package_sha,
376
379
  "METAFLOW_CODE_URL": self.code_package_url,
377
380
  "METAFLOW_CODE_DS": self.flow_datastore.TYPE,
@@ -476,6 +479,7 @@ class Airflow(object):
476
479
  node.name,
477
480
  AIRFLOW_MACROS.create_task_id(self.contains_foreach),
478
481
  AIRFLOW_MACROS.ATTEMPT,
482
+ code_package_metadata=self.code_package_metadata,
479
483
  code_package_url=self.code_package_url,
480
484
  step_cmds=self._step_cli(
481
485
  node, input_paths, self.code_package_url, user_code_retries
@@ -534,7 +538,7 @@ class Airflow(object):
534
538
  "with": [
535
539
  decorator.make_decorator_spec()
536
540
  for decorator in node.decorators
537
- if not decorator.statically_defined
541
+ if not decorator.statically_defined and decorator.inserted_by is None
538
542
  ]
539
543
  }
540
544
  # FlowDecorators can define their own top-level options. They are
@@ -7,6 +7,7 @@ from hashlib import sha1
7
7
  from metaflow import current, decorators
8
8
  from metaflow._vendor import click
9
9
  from metaflow.exception import MetaflowException, MetaflowInternalError
10
+ from metaflow.metaflow_config import FEAT_ALWAYS_UPLOAD_CODE_PACKAGE
10
11
  from metaflow.package import MetaflowPackage
11
12
  from metaflow.plugins.aws.step_functions.production_token import (
12
13
  load_token,
@@ -292,16 +293,26 @@ def make_flow(
292
293
  # Save the code package in the flow datastore so that both user code and
293
294
  # metaflow package can be retrieved during workflow execution.
294
295
  obj.package = MetaflowPackage(
295
- obj.flow, obj.environment, obj.echo, obj.package_suffixes
296
+ obj.flow,
297
+ obj.environment,
298
+ obj.echo,
299
+ suffixes=obj.package_suffixes,
300
+ flow_datastore=obj.flow_datastore if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE else None,
296
301
  )
297
- package_url, package_sha = obj.flow_datastore.save_data(
298
- [obj.package.blob], len_hint=1
299
- )[0]
302
+ # This blocks until the package is created
303
+ if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE:
304
+ package_url = obj.package.package_url()
305
+ package_sha = obj.package.package_sha()
306
+ else:
307
+ package_url, package_sha = obj.flow_datastore.save_data(
308
+ [obj.package.blob], len_hint=1
309
+ )[0]
300
310
 
301
311
  return Airflow(
302
312
  dag_name,
303
313
  obj.graph,
304
314
  obj.flow,
315
+ obj.package.package_metadata,
305
316
  package_sha,
306
317
  package_url,
307
318
  obj.metadata,
@@ -91,6 +91,7 @@ class ArgoWorkflows(object):
91
91
  name,
92
92
  graph: FlowGraph,
93
93
  flow,
94
+ code_package_metadata,
94
95
  code_package_sha,
95
96
  code_package_url,
96
97
  production_token,
@@ -143,6 +144,7 @@ class ArgoWorkflows(object):
143
144
  self.name = name
144
145
  self.graph = graph
145
146
  self.flow = flow
147
+ self.code_package_metadata = code_package_metadata
146
148
  self.code_package_sha = code_package_sha
147
149
  self.code_package_url = code_package_url
148
150
  self.production_token = production_token
@@ -551,7 +553,7 @@ class ArgoWorkflows(object):
551
553
  type=param_type,
552
554
  description=param.kwargs.get("help"),
553
555
  is_required=is_required,
554
- **extra_attrs
556
+ **extra_attrs,
555
557
  )
556
558
  return parameters
557
559
 
@@ -1495,7 +1497,9 @@ class ArgoWorkflows(object):
1495
1497
  mflog_expr,
1496
1498
  ]
1497
1499
  + self.environment.get_package_commands(
1498
- self.code_package_url, self.flow_datastore.TYPE
1500
+ self.code_package_url,
1501
+ self.flow_datastore.TYPE,
1502
+ self.code_package_metadata,
1499
1503
  )
1500
1504
  )
1501
1505
  step_cmds = self.environment.bootstrap_commands(
@@ -1507,6 +1511,7 @@ class ArgoWorkflows(object):
1507
1511
  decorator.make_decorator_spec()
1508
1512
  for decorator in node.decorators
1509
1513
  if not decorator.statically_defined
1514
+ and decorator.inserted_by is None
1510
1515
  ]
1511
1516
  }
1512
1517
  # FlowDecorators can define their own top-level options. They are
@@ -1673,6 +1678,7 @@ class ArgoWorkflows(object):
1673
1678
  **{
1674
1679
  # These values are needed by Metaflow to set it's internal
1675
1680
  # state appropriately.
1681
+ "METAFLOW_CODE_METADATA": self.code_package_metadata,
1676
1682
  "METAFLOW_CODE_URL": self.code_package_url,
1677
1683
  "METAFLOW_CODE_SHA": self.code_package_sha,
1678
1684
  "METAFLOW_CODE_DS": self.flow_datastore.TYPE,
@@ -2298,11 +2304,11 @@ class ArgoWorkflows(object):
2298
2304
  start_step = [step for step in self.graph if step.name == "start"][0]
2299
2305
  # We want to grab the base image used by the start step, as this is known to be pullable from within the cluster,
2300
2306
  # and it might contain the required libraries, allowing us to start up faster.
2301
- resources = dict(
2302
- [deco for deco in start_step.decorators if deco.name == "kubernetes"][
2303
- 0
2304
- ].attributes
2305
- )
2307
+ start_kube_deco = [
2308
+ deco for deco in start_step.decorators if deco.name == "kubernetes"
2309
+ ][0]
2310
+ resources = dict(start_kube_deco.attributes)
2311
+ kube_defaults = dict(start_kube_deco.defaults)
2306
2312
 
2307
2313
  run_id_template = "argo-{{workflow.name}}"
2308
2314
  metaflow_version = self.environment.get_environment_info()
@@ -2401,15 +2407,10 @@ class ArgoWorkflows(object):
2401
2407
  if k
2402
2408
  ],
2403
2409
  resources=kubernetes_sdk.V1ResourceRequirements(
2404
- # NOTE: base resources for this are kept to a minimum to save on running costs.
2405
2410
  requests={
2406
- "cpu": "200m",
2407
- "memory": "100Mi",
2408
- },
2409
- limits={
2410
- "cpu": "200m",
2411
- "memory": "500Mi",
2412
- },
2411
+ "cpu": str(kube_defaults["cpu"]),
2412
+ "memory": "%sM" % str(kube_defaults["memory"]),
2413
+ }
2413
2414
  ),
2414
2415
  ).to_dict()
2415
2416
  )
@@ -2481,7 +2482,9 @@ class ArgoWorkflows(object):
2481
2482
  mflog_expr,
2482
2483
  ]
2483
2484
  + self.environment.get_package_commands(
2484
- self.code_package_url, self.flow_datastore.TYPE
2485
+ self.code_package_url,
2486
+ self.flow_datastore.TYPE,
2487
+ self.code_package_metadata,
2485
2488
  )[:-1]
2486
2489
  # Replace the line 'Task in starting'
2487
2490
  # FIXME: this can be brittle.
@@ -2501,6 +2504,7 @@ class ArgoWorkflows(object):
2501
2504
  env = {
2502
2505
  # These values are needed by Metaflow to set it's internal
2503
2506
  # state appropriately.
2507
+ "METAFLOW_CODE_METADATA": self.code_package_metadata,
2504
2508
  "METAFLOW_CODE_URL": self.code_package_url,
2505
2509
  "METAFLOW_CODE_SHA": self.code_package_sha,
2506
2510
  "METAFLOW_CODE_DS": self.flow_datastore.TYPE,
@@ -2957,7 +2961,8 @@ class ArgoWorkflows(object):
2957
2961
  mflog_expr,
2958
2962
  ]
2959
2963
  + self.environment.get_package_commands(
2960
- self.code_package_url, self.flow_datastore.TYPE
2964
+ self.code_package_url,
2965
+ self.flow_datastore.TYPE,
2961
2966
  )[:-1]
2962
2967
  # Replace the line 'Task in starting'
2963
2968
  # FIXME: this can be brittle.
@@ -2972,6 +2977,7 @@ class ArgoWorkflows(object):
2972
2977
  env = {
2973
2978
  # These values are needed by Metaflow to set it's internal
2974
2979
  # state appropriately.
2980
+ "METAFLOW_CODE_METADATA": self.code_package_metadata,
2975
2981
  "METAFLOW_CODE_URL": self.code_package_url,
2976
2982
  "METAFLOW_CODE_SHA": self.code_package_sha,
2977
2983
  "METAFLOW_CODE_DS": self.flow_datastore.TYPE,