metaflow 2.18.12__py2.py3-none-any.whl → 2.19.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. metaflow/__init__.py +1 -0
  2. metaflow/cli.py +78 -13
  3. metaflow/cli_components/run_cmds.py +182 -39
  4. metaflow/cli_components/step_cmd.py +160 -4
  5. metaflow/client/__init__.py +1 -0
  6. metaflow/client/core.py +162 -99
  7. metaflow/client/filecache.py +59 -32
  8. metaflow/cmd/code/__init__.py +2 -1
  9. metaflow/datastore/__init__.py +1 -0
  10. metaflow/datastore/content_addressed_store.py +40 -9
  11. metaflow/datastore/datastore_set.py +10 -1
  12. metaflow/datastore/flow_datastore.py +123 -4
  13. metaflow/datastore/spin_datastore.py +91 -0
  14. metaflow/datastore/task_datastore.py +86 -2
  15. metaflow/decorators.py +75 -6
  16. metaflow/extension_support/__init__.py +372 -305
  17. metaflow/flowspec.py +3 -2
  18. metaflow/graph.py +2 -2
  19. metaflow/metaflow_config.py +41 -0
  20. metaflow/metaflow_profile.py +18 -0
  21. metaflow/packaging_sys/utils.py +2 -39
  22. metaflow/packaging_sys/v1.py +63 -16
  23. metaflow/plugins/__init__.py +2 -0
  24. metaflow/plugins/argo/argo_workflows.py +20 -25
  25. metaflow/plugins/argo/param_val.py +19 -0
  26. metaflow/plugins/cards/card_datastore.py +13 -13
  27. metaflow/plugins/cards/card_decorator.py +1 -0
  28. metaflow/plugins/cards/card_modules/basic.py +9 -3
  29. metaflow/plugins/datastores/local_storage.py +12 -6
  30. metaflow/plugins/datastores/spin_storage.py +12 -0
  31. metaflow/plugins/datatools/s3/s3.py +29 -10
  32. metaflow/plugins/datatools/s3/s3op.py +90 -62
  33. metaflow/plugins/metadata_providers/local.py +76 -82
  34. metaflow/plugins/metadata_providers/spin.py +16 -0
  35. metaflow/runner/click_api.py +4 -2
  36. metaflow/runner/metaflow_runner.py +210 -19
  37. metaflow/runtime.py +348 -21
  38. metaflow/task.py +61 -12
  39. metaflow/user_configs/config_parameters.py +2 -4
  40. metaflow/user_decorators/mutable_flow.py +1 -1
  41. metaflow/user_decorators/user_step_decorator.py +10 -1
  42. metaflow/util.py +191 -1
  43. metaflow/version.py +1 -1
  44. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Makefile +10 -0
  45. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/METADATA +2 -4
  46. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/RECORD +52 -48
  47. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  48. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  49. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/WHEEL +0 -0
  50. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/entry_points.txt +0 -0
  51. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/licenses/LICENSE +0 -0
  52. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/top_level.txt +0 -0
@@ -11,8 +11,10 @@ from collections import defaultdict, namedtuple
11
11
  from importlib.abc import MetaPathFinder, Loader
12
12
  from itertools import chain
13
13
  from pathlib import Path
14
+ from typing import Any, Dict
14
15
 
15
16
  from metaflow.meta_files import read_info_file
17
+ from metaflow.util import walk_without_cycles
16
18
 
17
19
 
18
20
  #
@@ -82,6 +84,7 @@ EXT_CONFIG_REGEXP = re.compile(r"^mfextinit_[a-zA-Z0-9_-]+\.py$")
82
84
  EXT_META_REGEXP = re.compile(r"^mfextmeta_[a-zA-Z0-9_-]+\.py$")
83
85
  REQ_NAME = re.compile(r"^(([a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9])|[a-zA-Z0-9]).*$")
84
86
  EXT_EXCLUDE_SUFFIXES = [".pyc"]
87
+ FINDER_TRANS = str.maketrans(".-", "__")
85
88
 
86
89
  # To get verbose messages, set METAFLOW_DEBUG_EXT to 1
87
90
  DEBUG_EXT = os.environ.get("METAFLOW_DEBUG_EXT", False)
@@ -127,7 +130,6 @@ def dump_module_info(all_packages=None, pkgs_per_extension_point=None):
127
130
  if pkgs_per_extension_point is None:
128
131
  pkgs_per_extension_point = _pkgs_per_extension_point
129
132
 
130
- _filter_files_all(all_packages)
131
133
  sanitized_all_packages = dict()
132
134
  # Strip out root_paths (we don't need it and no need to expose user's dir structure)
133
135
  for k, v in all_packages.items():
@@ -135,6 +137,7 @@ def dump_module_info(all_packages=None, pkgs_per_extension_point=None):
135
137
  "root_paths": None,
136
138
  "meta_module": v["meta_module"],
137
139
  "files": v["files"],
140
+ "full_path_files": None,
138
141
  "version": v["version"],
139
142
  "package_version": v.get("package_version", "<unk>"),
140
143
  "extension_name": v.get("extension_name", "<unk>"),
@@ -187,17 +190,25 @@ def package_mfext_package(package_name):
187
190
 
188
191
  _ext_debug("Packaging '%s'" % package_name)
189
192
  pkg_info = _all_packages.get(package_name, None)
190
- _filter_files_package(pkg_info)
193
+
191
194
  if pkg_info and pkg_info.get("root_paths", None):
192
- single_path = len(pkg_info["root_paths"]) == 1
193
- for p in pkg_info["root_paths"]:
194
- root_path = to_unicode(p)
195
- for f in pkg_info["files"]:
196
- f_unicode = to_unicode(f)
197
- fp = os.path.join(root_path, f_unicode)
198
- if single_path or os.path.isfile(fp):
199
- _ext_debug(" Adding '%s'" % fp)
200
- yield fp, os.path.join(EXT_PKG, f_unicode)
195
+ if pkg_info["full_path_files"]:
196
+ # Case for initial packaging
197
+ for f, short_name in zip(pkg_info["full_path_files"], pkg_info["files"]):
198
+ f_unicode = os.path.join(EXT_PKG, to_unicode(short_name))
199
+ _ext_debug(" Adding '%s' as '%s'" % (f, f_unicode))
200
+ yield f, f_unicode
201
+ else:
202
+ # When re-packaging (ie: packaging Metaflow from a Metaflow run):
203
+ single_path = len(pkg_info["root_paths"]) == 1
204
+ for p in pkg_info["root_paths"]:
205
+ root_path = to_unicode(p)
206
+ for f in pkg_info["files"]:
207
+ f_unicode = to_unicode(f)
208
+ fp = os.path.join(root_path, f_unicode)
209
+ if single_path or os.path.isfile(fp):
210
+ _ext_debug(" Adding '%s'" % fp)
211
+ yield fp, os.path.join(EXT_PKG, f_unicode)
201
212
 
202
213
 
203
214
  def package_mfext_all():
@@ -211,8 +222,7 @@ def package_mfext_all():
211
222
  ), os.path.join(EXT_PKG, "__init__.py")
212
223
 
213
224
  for p in _all_packages:
214
- for path_tuple in package_mfext_package(p):
215
- yield path_tuple
225
+ yield from package_mfext_package(p)
216
226
 
217
227
 
218
228
  def package_mfext_all_descriptions():
@@ -395,7 +405,80 @@ def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None
395
405
  # At this point, we look at all the paths and create a set. As we find distributions
396
406
  # that match it, we will remove from the set and then will be left with any
397
407
  # PYTHONPATH "packages"
398
- all_paths = set(Path(p).resolve().as_posix() for p in extensions_module.__path__)
408
+ all_paths = set()
409
+ # Records which finders provided which paths if applicable
410
+ # This is then later used to determine which paths belong
411
+ # to which distribution
412
+ finders_to_paths = dict()
413
+
414
+ # Temporary variables to support the loop below and make sure we loop through all
415
+ # the paths in the submodule_search_locations including calling the path hooks.
416
+ # We could skip calling things on the path hooks since the module was just imported
417
+ # by importlib so the values are probably already in submodule_search_locations but
418
+ # there may be cases where we need to call multiple times. This also allows us to tie
419
+ # the finders (ie: the path hooks) back to the distribution since they share a name.
420
+ # This is useful in knowing which paths we consider as belonging to a distribution so
421
+ # we know which order to load it in.
422
+ seen_path_values = set()
423
+ new_paths = extensions_module.__spec__.submodule_search_locations
424
+ _ext_debug("Found initial paths: %s" % str(new_paths))
425
+ while new_paths:
426
+ paths = new_paths
427
+ new_paths = []
428
+ for p in paths:
429
+ if p in seen_path_values:
430
+ continue
431
+ if os.path.isdir(p):
432
+ all_paths.add(Path(p).resolve().as_posix())
433
+ elif p in sys.path_importer_cache:
434
+ # We have a path hook that we likely need to call to get the actual path
435
+ addl_spec = sys.path_importer_cache[p].find_spec(EXT_PKG)
436
+ if addl_spec is not None and addl_spec.submodule_search_locations:
437
+ new_paths.extend(addl_spec.submodule_search_locations)
438
+ # Remove .__path_hook__ and add .py to match the name of the file
439
+ # installed by the distribution
440
+ finder_name = p[:-14].translate(FINDER_TRANS) + ".py"
441
+ new_dirs = [
442
+ d
443
+ for d in addl_spec.submodule_search_locations
444
+ if os.path.isdir(d)
445
+ ]
446
+ _ext_debug(
447
+ "Finder %s added directories %s"
448
+ % (finder_name, ", ".join(new_dirs))
449
+ )
450
+ finders_to_paths.setdefault(finder_name, []).extend(new_dirs)
451
+ else:
452
+ # This may not be as required since it is likely the importer cache has
453
+ # everything already but just in case, we will also go through the
454
+ # path hooks and see if we find another one
455
+ for path_hook in sys.path_hooks:
456
+ try:
457
+ finder = path_hook(p)
458
+ addl_spec = finder.find_spec(EXT_PKG)
459
+ if (
460
+ addl_spec is not None
461
+ and addl_spec.submodule_search_locations
462
+ ):
463
+ finder_name = p[:-14].translate(FINDER_TRANS) + ".py"
464
+ new_dirs = [
465
+ d
466
+ for d in addl_spec.submodule_search_locations
467
+ if os.path.isdir(d)
468
+ ]
469
+ _ext_debug(
470
+ "Finder (through hooks) %s added directories %s"
471
+ % (finder_name, ", ".join(new_dirs))
472
+ )
473
+ finders_to_paths.setdefault(finder_name, []).extend(
474
+ new_dirs
475
+ )
476
+ new_paths.extend(addl_spec.submodule_search_locations)
477
+ break
478
+ except ImportError:
479
+ continue
480
+ seen_path_values.add(p)
481
+
399
482
  _ext_debug("Found packages present at %s" % str(all_paths))
400
483
  if restrict_to_directories:
401
484
  _ext_debug(
@@ -437,18 +520,146 @@ def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None
437
520
  # Same as config_to_pkg for meta files
438
521
  meta_to_pkg = defaultdict(list)
439
522
 
523
+ # The file passed to process_file has EXT_PKG as the first component
524
+ # root_dir also has EXT_PKG as the last component
525
+ def process_file(state: Dict[str, Any], root_dir: str, file: str):
526
+ parts = file.split("/")
527
+
528
+ if len(parts) > 1 and parts[0] == EXT_PKG:
529
+ # Check for top-level files (ie: meta file which specifies how to package
530
+ # the extension and __init__.py file)
531
+ if len(parts) == 2:
532
+ # Ensure that we don't have a __init__.py to force this package to
533
+ # be a NS package
534
+ if parts[1] == "__init__.py":
535
+ raise RuntimeError(
536
+ "Package '%s' providing '%s' is not an implicit namespace "
537
+ "package as required" % (state["name"], EXT_PKG)
538
+ )
539
+ # Check for any metadata; we can only have one metadata per
540
+ # distribution at most
541
+ if EXT_META_REGEXP.match(parts[1]) is not None:
542
+ potential_meta_module = ".".join([EXT_PKG, parts[1][:-3]])
543
+ if state["meta_module"]:
544
+ raise RuntimeError(
545
+ "Package '%s' defines more than one meta configuration: "
546
+ "'%s' and '%s' (at least)"
547
+ % (
548
+ state["name"],
549
+ state["meta_module"],
550
+ potential_meta_module,
551
+ )
552
+ )
553
+ state["meta_module"] = potential_meta_module
554
+ _ext_debug(
555
+ "Found meta '%s' for '%s'"
556
+ % (state["meta_module"], state["name"])
557
+ )
558
+ meta_to_pkg[state["meta_module"]].append(state["name"])
559
+
560
+ # Record the file as a candidate for inclusion when packaging if
561
+ # needed
562
+ if not any(parts[-1].endswith(suffix) for suffix in EXT_EXCLUDE_SUFFIXES):
563
+ # Strip out metaflow_extensions from the file
564
+ state["files"].append(os.path.join(*parts[1:]))
565
+ state["full_path_files"].append(os.path.join(root_dir, *parts[1:]))
566
+
567
+ if parts[1] in init_ext_points:
568
+ # This is most likely a problem as we need an intermediate
569
+ # "identifier"
570
+ raise RuntimeError(
571
+ "Package '%s' should conform to '%s.X.%s' and not '%s.%s' where "
572
+ "X is your organization's name for example"
573
+ % (
574
+ state["name"],
575
+ EXT_PKG,
576
+ parts[1],
577
+ EXT_PKG,
578
+ parts[1],
579
+ )
580
+ )
581
+
582
+ if len(parts) > 3 and parts[0] == EXT_PKG:
583
+ # We go over _extension_points *in order* to make sure we get more
584
+ # specific paths first
585
+
586
+ # To give useful errors in case multiple top-level packages in
587
+ # one package
588
+ dist_full_name = "%s[%s]" % (state["name"], parts[1])
589
+ for idx, ext_list in enumerate(list_ext_points):
590
+ if (
591
+ len(parts) > len(ext_list) + 2
592
+ and parts[2 : 2 + len(ext_list)] == ext_list
593
+ ):
594
+ # Check if this is an "init" file
595
+ config_module = None
596
+
597
+ if len(parts) == len(ext_list) + 3 and (
598
+ EXT_CONFIG_REGEXP.match(parts[-1]) is not None
599
+ or parts[-1] == "__init__.py"
600
+ ):
601
+ parts[-1] = parts[-1][:-3] # Remove the .py
602
+ config_module = ".".join(parts)
603
+
604
+ config_to_pkg[config_module].append(dist_full_name)
605
+ cur_pkg = (
606
+ extension_points_to_pkg[_extension_points[idx]]
607
+ .setdefault(state["name"], {})
608
+ .get(parts[1])
609
+ )
610
+ if cur_pkg is not None:
611
+ if (
612
+ config_module is not None
613
+ and cur_pkg.config_module is not None
614
+ ):
615
+ raise RuntimeError(
616
+ "Package '%s' defines more than one "
617
+ "configuration file for '%s': '%s' and '%s'"
618
+ % (
619
+ dist_full_name,
620
+ _extension_points[idx],
621
+ config_module,
622
+ cur_pkg.config_module,
623
+ )
624
+ )
625
+ if config_module is not None:
626
+ _ext_debug(
627
+ " Top-level '%s' found config file '%s'"
628
+ % (parts[1], config_module)
629
+ )
630
+ extension_points_to_pkg[_extension_points[idx]][
631
+ state["name"]
632
+ ][parts[1]] = MFExtPackage(
633
+ package_name=state["name"],
634
+ tl_package=parts[1],
635
+ config_module=config_module,
636
+ )
637
+ else:
638
+ _ext_debug(
639
+ " Top-level '%s' extends '%s' with config '%s'"
640
+ % (parts[1], _extension_points[idx], config_module)
641
+ )
642
+ extension_points_to_pkg[_extension_points[idx]][state["name"]][
643
+ parts[1]
644
+ ] = MFExtPackage(
645
+ package_name=state["name"],
646
+ tl_package=parts[1],
647
+ config_module=config_module,
648
+ )
649
+ break
650
+
440
651
  # 1st step: look for distributions (the common case)
441
652
  for dist in metadata.distributions():
442
653
  if any(
443
654
  [pkg == EXT_PKG for pkg in (dist.read_text("top_level.txt") or "").split()]
444
655
  ):
445
- # In all cases (whether duplicate package or not), we remove the package
446
- # from the list of locations to look in.
447
- # This is not 100% accurate because it is possible that at the same
448
- # location there is a package and a non-package, but this is extremely
449
- # unlikely so we are going to ignore this case.
656
+ # Note that locate_file does not actually make sure the file exists. It just
657
+ # appends whatever you pass in to locate_file to the folder containing the
658
+ # metadata for the distribution. We will therefore check if we are actually
659
+ # seeing files in that directory using has_file_in_dist_root.
450
660
  dist_root = dist.locate_file(EXT_PKG).resolve().as_posix()
451
- all_paths.discard(dist_root)
661
+ all_roots = []
662
+ has_file_in_dist_root = False
452
663
  dist_name = dist.metadata["Name"]
453
664
  dist_version = dist.metadata["Version"]
454
665
  if restrict_to_directories:
@@ -468,144 +679,88 @@ def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None
468
679
  )
469
680
  continue
470
681
  _ext_debug(
471
- "Found extension package '%s' at '%s'..." % (dist_name, dist_root)
682
+ "Found extension package '%s' at presumptive path '%s'..."
683
+ % (dist_name, dist_root)
472
684
  )
473
685
 
474
- files_to_include = []
475
- meta_module = None
476
-
686
+ state = {
687
+ "name": dist_name,
688
+ "files": [],
689
+ "full_path_files": [],
690
+ "meta_module": None, # Meta information about the package (if applicable)
691
+ }
692
+ addl_dirs = []
477
693
  # At this point, we check to see what extension points this package
478
694
  # contributes to. This is to enable multiple namespace packages to contribute
479
695
  # to the same extension point (for example, you may have multiple packages
480
696
  # that have plugins)
481
- for f in dist.files:
482
- parts = list(f.parts)
483
-
484
- if len(parts) > 1 and parts[0] == EXT_PKG:
485
- # Ensure that we don't have a __init__.py to force this package to
486
- # be a NS package
487
- if parts[1] == "__init__.py":
488
- raise RuntimeError(
489
- "Package '%s' providing '%s' is not an implicit namespace "
490
- "package as required" % (dist_name, EXT_PKG)
491
- )
492
-
493
- # Record the file as a candidate for inclusion when packaging if
494
- # needed
495
- if not any(
496
- parts[-1].endswith(suffix) for suffix in EXT_EXCLUDE_SUFFIXES
497
- ):
498
- files_to_include.append(os.path.join(*parts[1:]))
499
-
500
- if parts[1] in init_ext_points:
501
- # This is most likely a problem as we need an intermediate
502
- # "identifier"
503
- raise RuntimeError(
504
- "Package '%s' should conform to '%s.X.%s' and not '%s.%s' where "
505
- "X is your organization's name for example"
506
- % (
507
- dist_name,
508
- EXT_PKG,
509
- parts[1],
510
- EXT_PKG,
511
- parts[1],
512
- )
513
- )
697
+ for f in dist.files or []:
698
+ if f.suffix == ".pth":
699
+ # This is a directory we need to walk to find the files
700
+ d = f.read_text().strip()
701
+ if os.path.isdir(d):
702
+ _ext_debug(" Found additional directory '%s' from .pth" % d)
703
+ addl_dirs.append(d)
704
+ elif str(f).startswith("__editable__"):
705
+ # This is a finder file because we already checked for .pth
706
+ _ext_debug(
707
+ " Added additional directories from finder '%s': %s"
708
+ % (str(f), ", ".join(finders_to_paths.get(str(f), [])))
709
+ )
710
+ addl_dirs.extend(finders_to_paths.get(str(f), []))
711
+ elif f.parts[0] == EXT_PKG:
712
+ has_file_in_dist_root = True
713
+ process_file(state, dist_root, str(f))
714
+ else:
715
+ # We ignore the file
716
+ continue
514
717
 
515
- # Check for any metadata; we can only have one metadata per
516
- # distribution at most
517
- if EXT_META_REGEXP.match(parts[1]) is not None:
518
- potential_meta_module = ".".join([EXT_PKG, parts[1][:-3]])
519
- if meta_module:
520
- raise RuntimeError(
521
- "Package '%s' defines more than one meta configuration: "
522
- "'%s' and '%s' (at least)"
523
- % (
524
- dist_name,
525
- meta_module,
526
- potential_meta_module,
527
- )
528
- )
529
- meta_module = potential_meta_module
718
+ if has_file_in_dist_root:
719
+ all_roots.append(dist_root)
720
+ all_paths.discard(dist_root)
721
+ # Now walk any additional directory for this distribution as well
722
+ for addl_dir in addl_dirs:
723
+ if restrict_to_directories:
724
+ parent_dirs = list(
725
+ p.as_posix() for p in Path(addl_dir).resolve().parents
726
+ )
727
+ if all(p not in parent_dirs for p in restrict_to_directories):
530
728
  _ext_debug(
531
- "Found meta '%s' for '%s'" % (meta_module, dist_full_name)
729
+ "Ignoring package at %s as it is not in the considered "
730
+ "directories" % addl_dir
532
731
  )
533
- meta_to_pkg[meta_module].append(dist_full_name)
534
-
535
- if len(parts) > 3 and parts[0] == EXT_PKG:
536
- # We go over _extension_points *in order* to make sure we get more
537
- # specific paths first
538
-
539
- # To give useful errors in case multiple top-level packages in
540
- # one package
541
- dist_full_name = "%s[%s]" % (dist_name, parts[1])
542
- for idx, ext_list in enumerate(list_ext_points):
543
- if (
544
- len(parts) > len(ext_list) + 2
545
- and parts[2 : 2 + len(ext_list)] == ext_list
546
- ):
547
- # Check if this is an "init" file
548
- config_module = None
549
-
550
- if len(parts) == len(ext_list) + 3 and (
551
- EXT_CONFIG_REGEXP.match(parts[-1]) is not None
552
- or parts[-1] == "__init__.py"
553
- ):
554
- parts[-1] = parts[-1][:-3] # Remove the .py
555
- config_module = ".".join(parts)
556
-
557
- config_to_pkg[config_module].append(dist_full_name)
558
- cur_pkg = (
559
- extension_points_to_pkg[_extension_points[idx]]
560
- .setdefault(dist_name, {})
561
- .get(parts[1])
562
- )
563
- if cur_pkg is not None:
564
- if (
565
- config_module is not None
566
- and cur_pkg.config_module is not None
567
- ):
568
- raise RuntimeError(
569
- "Package '%s' defines more than one "
570
- "configuration file for '%s': '%s' and '%s'"
571
- % (
572
- dist_full_name,
573
- _extension_points[idx],
574
- config_module,
575
- cur_pkg.config_module,
576
- )
577
- )
578
- if config_module is not None:
579
- _ext_debug(
580
- " Top-level '%s' found config file '%s'"
581
- % (parts[1], config_module)
582
- )
583
- extension_points_to_pkg[_extension_points[idx]][
584
- dist_name
585
- ][parts[1]] = MFExtPackage(
586
- package_name=dist_name,
587
- tl_package=parts[1],
588
- config_module=config_module,
589
- )
590
- else:
591
- _ext_debug(
592
- " Top-level '%s' extends '%s' with config '%s'"
593
- % (parts[1], _extension_points[idx], config_module)
594
- )
595
- extension_points_to_pkg[_extension_points[idx]][
596
- dist_name
597
- ][parts[1]] = MFExtPackage(
598
- package_name=dist_name,
599
- tl_package=parts[1],
600
- config_module=config_module,
601
- )
602
- break
732
+ continue
733
+ base_depth = len(addl_dir.split("/"))
734
+ # .pth files give addl_dirs that don't have EXT_PKG at the end but
735
+ # finders do so check this
736
+ if addl_dir.split("/")[-1] == EXT_PKG:
737
+ base_depth -= 1
738
+ else:
739
+ addl_dir = os.path.join(addl_dir, EXT_PKG)
740
+ all_roots.append(addl_dir)
741
+ all_paths.discard(addl_dir)
742
+ _ext_debug(" Walking additional directory '%s'" % addl_dir)
743
+ for root, _, files in walk_without_cycles(addl_dir):
744
+ relative_root = "/".join(root.split("/")[base_depth:])
745
+ for f in files:
746
+ process_file(state, addl_dir, os.path.join(relative_root, f))
603
747
  mf_ext_packages[dist_name] = {
604
- "root_paths": [dist_root],
605
- "meta_module": meta_module,
606
- "files": files_to_include,
748
+ "root_paths": all_roots,
749
+ "meta_module": state["meta_module"],
750
+ "full_path_files": state["full_path_files"],
751
+ "files": state["files"],
607
752
  "version": dist_version,
608
753
  }
754
+ if addl_dirs:
755
+ # If we have additional directories, this means that we may need to filter
756
+ # the files based on the meta information about the module since we
757
+ # walked down the directories instead of relying simply on files that
758
+ # were packaged with the distribution. We do this now so we don't have to
759
+ # do it multiple times later for packaging. This is only useful if the
760
+ # distribution does not completely specify the files that need to be
761
+ # installed. In the case where the distribution completely specifies the
762
+ # files, we ignore the meta module
763
+ _filter_files_package(mf_ext_packages[dist_name])
609
764
  # At this point, we have all the packages that contribute to EXT_PKG,
610
765
  # we now check to see if there is an order to respect based on dependencies. We will
611
766
  # return an ordered list that respects that order and is ordered alphabetically in
@@ -666,9 +821,7 @@ def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None
666
821
  all_paths_list.sort()
667
822
 
668
823
  # This block of code is the equivalent of the one above for distributions except
669
- # for PYTHONPATH packages. The functionality is identical, but it looks a little
670
- # different because we construct the file list instead of having it nicely provided
671
- # to us.
824
+ # for PYTHONPATH packages.
672
825
  package_name_to_path = dict()
673
826
  if len(all_paths_list) > 0:
674
827
  _ext_debug("Non installed packages present at %s" % str(all_paths))
@@ -695,132 +848,29 @@ def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None
695
848
  )
696
849
  package_name_to_path[package_name] = package_path
697
850
  base_depth = len(package_path.split("/"))
698
- files_to_include = []
699
- meta_module = None
700
- for root, dirs, files in os.walk(package_path):
701
- parts = root.split("/")
702
- cur_depth = len(parts)
703
- # relative_root strips out metaflow_extensions
704
- relative_root = "/".join(parts[base_depth:])
705
- relative_module = ".".join(parts[base_depth - 1 :])
706
- files_to_include.extend(
707
- [
708
- "/".join([relative_root, f]) if relative_root else f
709
- for f in files
710
- if not any(
711
- [f.endswith(suffix) for suffix in EXT_EXCLUDE_SUFFIXES]
712
- )
713
- ]
714
- )
715
- if cur_depth == base_depth:
716
- if "__init__.py" in files:
717
- raise RuntimeError(
718
- "'%s' at '%s' is not an implicit namespace package as required"
719
- % (EXT_PKG, root)
720
- )
721
- for d in dirs:
722
- if d in init_ext_points:
723
- raise RuntimeError(
724
- "Package at '%s' should conform to' %s.X.%s' and not "
725
- "'%s.%s' where X is your organization's name for example"
726
- % (root, EXT_PKG, d, EXT_PKG, d)
727
- )
728
- # Check for meta files for this package
729
- meta_files = [
730
- x for x in map(EXT_META_REGEXP.match, files) if x is not None
731
- ]
732
- if meta_files:
733
- # We should have one meta file at most
734
- if len(meta_files) > 1:
735
- raise RuntimeError(
736
- "Package at '%s' defines more than one meta file: %s"
737
- % (
738
- package_path,
739
- ", and ".join(
740
- ["'%s'" % x.group(0) for x in meta_files]
741
- ),
742
- )
743
- )
744
- else:
745
- meta_module = ".".join(
746
- [relative_module, meta_files[0].group(0)[:-3]]
747
- )
851
+ state = {
852
+ "name": package_name,
853
+ "files": [],
854
+ "full_path_files": [],
855
+ "meta_module": None,
856
+ }
748
857
 
749
- elif cur_depth > base_depth + 1:
750
- # We want at least a top-level name and something under
751
- tl_name = parts[base_depth]
752
- tl_fullname = "%s[%s]" % (package_path, tl_name)
753
- prefix_match = parts[base_depth + 1 :]
754
- for idx, ext_list in enumerate(list_ext_points):
755
- if prefix_match == ext_list:
756
- # We check to see if this is an actual extension point
757
- # or if we just have a directory on the way to another
758
- # extension point. To do this, we check to see if we have
759
- # any files or directories that are *not* directly another
760
- # extension point
761
- skip_extension = len(files) == 0
762
- if skip_extension:
763
- next_dir_idx = len(list_ext_points[idx])
764
- ok_subdirs = [
765
- list_ext_points[j][next_dir_idx]
766
- for j in range(0, idx)
767
- if len(list_ext_points[j]) > next_dir_idx
768
- ]
769
- skip_extension = set(dirs).issubset(set(ok_subdirs))
770
-
771
- if skip_extension:
772
- _ext_debug(
773
- " Skipping '%s' as no files/directory of interest"
774
- % _extension_points[idx]
775
- )
776
- continue
858
+ for root, _, files in walk_without_cycles(package_path):
859
+ relative_root = "/".join(root.split("/")[base_depth - 1 :])
860
+ for f in files:
861
+ process_file(state, package_path, os.path.join(relative_root, f))
777
862
 
778
- # Check for any "init" files
779
- init_files = [
780
- x.group(0)
781
- for x in map(EXT_CONFIG_REGEXP.match, files)
782
- if x is not None
783
- ]
784
- if "__init__.py" in files:
785
- init_files.append("__init__.py")
786
-
787
- config_module = None
788
- if len(init_files) > 1:
789
- raise RuntimeError(
790
- "Package at '%s' defines more than one configuration "
791
- "file for '%s': %s"
792
- % (
793
- tl_fullname,
794
- ".".join(prefix_match),
795
- ", and ".join(["'%s'" % x for x in init_files]),
796
- )
797
- )
798
- elif len(init_files) == 1:
799
- config_module = ".".join(
800
- [relative_module, init_files[0][:-3]]
801
- )
802
- config_to_pkg[config_module].append(tl_fullname)
803
-
804
- d = extension_points_to_pkg[_extension_points[idx]][
805
- package_name
806
- ] = dict()
807
- d[tl_name] = MFExtPackage(
808
- package_name=package_name,
809
- tl_package=tl_name,
810
- config_module=config_module,
811
- )
812
- _ext_debug(
813
- " Extends '%s' with config '%s'"
814
- % (_extension_points[idx], config_module)
815
- )
816
- if files_to_include:
863
+ if state["files"]:
817
864
  mf_pkg_list.append(package_name)
818
865
  mf_ext_packages[package_name] = {
819
866
  "root_paths": [package_path],
820
- "meta_module": meta_module,
821
- "files": files_to_include,
867
+ "meta_module": state["meta_module"],
868
+ "full_path_files": state["full_path_files"],
869
+ "files": state["files"],
822
870
  "version": "_local_",
823
871
  }
872
+ # Always filter here since we don't have any distribution information
873
+ _filter_files_package(mf_ext_packages[package_name])
824
874
  else:
825
875
  _ext_debug("Skipping package as no files found (empty dir?)")
826
876
 
@@ -879,9 +929,6 @@ def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None
879
929
  return mf_ext_packages, extension_points_to_pkg
880
930
 
881
931
 
882
- _all_packages, _pkgs_per_extension_point = _get_extension_packages()
883
-
884
-
885
932
  def _attempt_load_module(module_name):
886
933
  try:
887
934
  extension_module = importlib.import_module(module_name)
@@ -905,6 +952,60 @@ def _attempt_load_module(module_name):
905
952
  return extension_module
906
953
 
907
954
 
955
+ def _filter_files_package(pkg):
956
+ if pkg and pkg["root_paths"] and pkg["meta_module"]:
957
+ meta_module = _attempt_load_module(pkg["meta_module"])
958
+ if meta_module:
959
+ filter_function = meta_module.__dict__.get("filter_function")
960
+ include_suffixes = meta_module.__dict__.get("include_suffixes")
961
+ exclude_suffixes = meta_module.__dict__.get("exclude_suffixes")
962
+
963
+ # Behavior is as follows:
964
+ # - if nothing specified, include all files (so do nothing here)
965
+ # - if filter_function specified, call that function on the list of files
966
+ # and only include the files where the function returns True. Note that
967
+ # the function will always be passed a value that starts with
968
+ # metaflow_extensions/...
969
+ # - if include_suffixes, only include those suffixes
970
+ # - if *not* include_suffixes but exclude_suffixes, include everything *except*
971
+ # files ending with that suffix
972
+ new_files, new_full_path_files = [], []
973
+
974
+ if filter_function:
975
+ for short_file, full_file in zip(pkg["files"], pkg["full_path_files"]):
976
+ try:
977
+ if filter_function(os.path.join(EXT_PKG, short_file)):
978
+ new_files.append(short_file)
979
+ new_full_path_files.append(full_file)
980
+ except Exception as e:
981
+ _ext_debug(
982
+ " Exception '%s' when calling filter_function on "
983
+ "'%s', ignoring file" % (e, short_file)
984
+ )
985
+ elif include_suffixes:
986
+ for short_file, full_file in zip(pkg["files"], pkg["full_path_files"]):
987
+ if any(
988
+ [short_file.endswith(suffix) for suffix in include_suffixes]
989
+ ):
990
+ new_files.append(short_file)
991
+ new_full_path_files.append(full_file)
992
+ elif exclude_suffixes:
993
+ for short_file, full_file in zip(pkg["files"], pkg["full_path_files"]):
994
+ if not any(
995
+ [short_file.endswith(suffix) for suffix in exclude_suffixes]
996
+ ):
997
+ new_files.append(short_file)
998
+ new_full_path_files.append(full_file)
999
+ else:
1000
+ new_files = pkg["files"]
1001
+ new_full_path_files = pkg["full_path_files"]
1002
+ pkg["files"] = new_files
1003
+ pkg["full_path_files"] = new_full_path_files
1004
+
1005
+
1006
+ _all_packages, _pkgs_per_extension_point = _get_extension_packages()
1007
+
1008
+
908
1009
  def _get_extension_config(distribution_name, tl_pkg, extension_point, config_module):
909
1010
  if config_module is not None and not config_module.endswith("__init__"):
910
1011
  module_name = config_module
@@ -955,40 +1056,6 @@ def _get_extension_config(distribution_name, tl_pkg, extension_point, config_mod
955
1056
  return None
956
1057
 
957
1058
 
958
- def _filter_files_package(pkg):
959
- if pkg and pkg["root_paths"] and pkg["meta_module"]:
960
- meta_module = _attempt_load_module(pkg["meta_module"])
961
- if meta_module:
962
- include_suffixes = meta_module.__dict__.get("include_suffixes")
963
- exclude_suffixes = meta_module.__dict__.get("exclude_suffixes")
964
-
965
- # Behavior is as follows:
966
- # - if nothing specified, include all files (so do nothing here)
967
- # - if include_suffixes, only include those suffixes
968
- # - if *not* include_suffixes but exclude_suffixes, include everything *except*
969
- # files ending with that suffix
970
- if include_suffixes:
971
- new_files = [
972
- f
973
- for f in pkg["files"]
974
- if any([f.endswith(suffix) for suffix in include_suffixes])
975
- ]
976
- elif exclude_suffixes:
977
- new_files = [
978
- f
979
- for f in pkg["files"]
980
- if not any([f.endswith(suffix) for suffix in exclude_suffixes])
981
- ]
982
- else:
983
- new_files = pkg["files"]
984
- pkg["files"] = new_files
985
-
986
-
987
- def _filter_files_all(all_packages):
988
- for p in all_packages.values():
989
- _filter_files_package(p)
990
-
991
-
992
1059
  class _AliasLoader(Loader):
993
1060
  def __init__(self, alias, orig):
994
1061
  self._alias = alias