metaflow-prebuilt 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. metaflow_prebuilt-0.1.0/PKG-INFO +21 -0
  2. metaflow_prebuilt-0.1.0/pyproject.toml +43 -0
  3. metaflow_prebuilt-0.1.0/setup.cfg +4 -0
  4. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/__init__.py +0 -0
  5. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/mfextinit_prebuilt.py +3 -0
  6. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/__init__.py +0 -0
  7. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/__init__.py +0 -0
  8. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/build_service.py +77 -0
  9. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/image_registry.py +99 -0
  10. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/prebuilt_build_install.py +99 -0
  11. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/prebuilt_conda_environment.py +499 -0
  12. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/prebuilt_runtime_activate.py +101 -0
  13. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/registries/__init__.py +0 -0
  14. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/registries/dockerhub_registry.py +53 -0
  15. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/registries/ecr_registry.py +64 -0
  16. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/registries/gcr_registry.py +68 -0
  17. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/registries/local_registry.py +89 -0
  18. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/services/__init__.py +0 -0
  19. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/services/buildx_service.py +80 -0
  20. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/services/codebuild_service.py +127 -0
  21. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/services/docker_service.py +88 -0
  22. metaflow_prebuilt-0.1.0/src/metaflow_extensions/prebuilt/plugins/conda/services/kaniko_service.py +215 -0
  23. metaflow_prebuilt-0.1.0/src/metaflow_prebuilt.egg-info/PKG-INFO +21 -0
  24. metaflow_prebuilt-0.1.0/src/metaflow_prebuilt.egg-info/SOURCES.txt +26 -0
  25. metaflow_prebuilt-0.1.0/src/metaflow_prebuilt.egg-info/dependency_links.txt +1 -0
  26. metaflow_prebuilt-0.1.0/src/metaflow_prebuilt.egg-info/entry_points.txt +11 -0
  27. metaflow_prebuilt-0.1.0/src/metaflow_prebuilt.egg-info/requires.txt +19 -0
  28. metaflow_prebuilt-0.1.0/src/metaflow_prebuilt.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: metaflow-prebuilt
3
+ Version: 0.1.0
4
+ Summary: Metaflow extension: pre-bake conda environments into Docker images for fast cold starts
5
+ License: Apache-2.0
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: metaflow
9
+ Provides-Extra: ecr
10
+ Requires-Dist: boto3; extra == "ecr"
11
+ Provides-Extra: codebuild
12
+ Requires-Dist: boto3; extra == "codebuild"
13
+ Provides-Extra: gcr
14
+ Requires-Dist: google-cloud-storage; extra == "gcr"
15
+ Provides-Extra: kaniko
16
+ Requires-Dist: google-cloud-storage; extra == "kaniko"
17
+ Requires-Dist: kubernetes; extra == "kaniko"
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=7; extra == "dev"
20
+ Requires-Dist: pytest-mock; extra == "dev"
21
+ Requires-Dist: tox>=4; extra == "dev"
@@ -0,0 +1,43 @@
1
+ [build-system]
2
+ requires = ["setuptools>=45"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "metaflow-prebuilt"
7
+ version = "0.1.0"
8
+ description = "Metaflow extension: pre-bake conda environments into Docker images for fast cold starts"
9
+ readme = "README.md"
10
+ license = { text = "Apache-2.0" }
11
+ requires-python = ">=3.8"
12
+ dependencies = [
13
+ "metaflow",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ ecr = ["boto3"]
18
+ codebuild = ["boto3"]
19
+ gcr = ["google-cloud-storage"]
20
+ kaniko = ["google-cloud-storage", "kubernetes"]
21
+ dev = [
22
+ "pytest>=7",
23
+ "pytest-mock",
24
+ "tox>=4",
25
+ ]
26
+
27
+ [project.entry-points."metaflow_prebuilt.build_services"]
28
+ docker = "metaflow_extensions.prebuilt.plugins.conda.services.docker_service:LocalDockerBuildService"
29
+ kaniko = "metaflow_extensions.prebuilt.plugins.conda.services.kaniko_service:KanikoBuildService"
30
+ buildx = "metaflow_extensions.prebuilt.plugins.conda.services.buildx_service:BuildxBuildService"
31
+ codebuild = "metaflow_extensions.prebuilt.plugins.conda.services.codebuild_service:CodeBuildService"
32
+
33
+ [project.entry-points."metaflow_prebuilt.image_registries"]
34
+ ecr = "metaflow_extensions.prebuilt.plugins.conda.registries.ecr_registry:ECRRegistry"
35
+ gcr = "metaflow_extensions.prebuilt.plugins.conda.registries.gcr_registry:GCRRegistry"
36
+ dockerhub = "metaflow_extensions.prebuilt.plugins.conda.registries.dockerhub_registry:DockerHubRegistry"
37
+ local = "metaflow_extensions.prebuilt.plugins.conda.registries.local_registry:LocalRegistry"
38
+
39
+ [tool.setuptools.packages.find]
40
+ where = ["src"]
41
+
42
+ [tool.pytest.ini_options]
43
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ ENVIRONMENTS_DESC = [
2
+ ("prebuilt", ".plugins.conda.prebuilt_conda_environment.PrebuiltCondaEnvironment"),
3
+ ]
@@ -0,0 +1,77 @@
1
+ import importlib.metadata
2
+ import os
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Callable, Dict, TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ pass
8
+
9
+
10
+ def _resolve_entry_point(group: str, name: str) -> Any:
11
+ try:
12
+ eps = importlib.metadata.entry_points(group=group)
13
+ except TypeError:
14
+ # Python 3.8 compatibility
15
+ eps = importlib.metadata.entry_points().get(group, []) # type: ignore[assignment]
16
+ mapping = {ep.name: ep for ep in eps}
17
+ if name not in mapping:
18
+ from metaflow.exception import MetaflowException # noqa: PLC0415
19
+ raise MetaflowException(
20
+ "No entry point named %r in group %r. "
21
+ "Installed: %s. "
22
+ "Install the package that provides this backend or check "
23
+ "METAFLOW_PREBUILT_BUILD_SERVICE." % (name, group, sorted(mapping))
24
+ )
25
+ return mapping[name].load()
26
+
27
+
28
+ class DockerBuildService(ABC):
29
+ """Abstract base class for Docker image build-and-push backends.
30
+
31
+ Implement this class and register it under the
32
+ ``metaflow_prebuilt.build_services`` entry point group to make it
33
+ selectable via ``METAFLOW_PREBUILT_BUILD_SERVICE=<name>``.
34
+ """
35
+
36
+ @abstractmethod
37
+ def build_and_push(
38
+ self,
39
+ dockerfile: str,
40
+ context_files: Dict[str, Any],
41
+ image_tag: str,
42
+ push_credentials: Dict[str, Any],
43
+ echo: Callable[..., None],
44
+ ) -> bool:
45
+ """Build a Docker image and push it to the registry.
46
+
47
+ Args:
48
+ dockerfile: Full Dockerfile content.
49
+ context_files: Files to include in the build context alongside the
50
+ Dockerfile. Keys are filenames; values are ``str`` (text) or
51
+ ``bytes`` (binary).
52
+ image_tag: Fully-qualified destination tag, e.g.
53
+ ``registry.example.com/ns/name:v28-abc123``.
54
+ Provided by ``ImageRegistry.push_tag()``.
55
+ push_credentials: Opaque dict from ``ImageRegistry.push_credentials()``.
56
+ Schema is defined by the paired registry implementation.
57
+ echo: Callable for user-visible progress output.
58
+
59
+ Returns:
60
+ ``True`` on success, ``False`` on recoverable failure.
61
+ Must NOT raise on recoverable failures — return ``False`` and
62
+ call ``echo()`` with a diagnostic. MAY raise on programmer errors.
63
+ """
64
+ ...
65
+
66
+ @classmethod
67
+ def from_config(cls) -> "DockerBuildService":
68
+ """Resolve and instantiate the build service selected by
69
+ ``METAFLOW_PREBUILT_BUILD_SERVICE`` (default: ``docker``).
70
+
71
+ Raises ``MetaflowException`` if the name is not registered.
72
+ """
73
+ name = os.environ.get("METAFLOW_PREBUILT_BUILD_SERVICE", "docker")
74
+ service_cls = _resolve_entry_point(
75
+ "metaflow_prebuilt.build_services", name
76
+ )
77
+ return service_cls()
@@ -0,0 +1,99 @@
1
+ import os
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any, Dict, TYPE_CHECKING
4
+
5
+ from .build_service import _resolve_entry_point
6
+
7
+ if TYPE_CHECKING:
8
+ from .env_descr import EnvID
9
+
10
+
11
+ class ImageRegistry(ABC):
12
+ """Abstract base class for Docker image registries.
13
+
14
+ Implement this class and register it under the
15
+ ``metaflow_prebuilt.image_registries`` entry point group to make it
16
+ selectable via ``METAFLOW_PREBUILT_IMAGE_REGISTRY=<name>``.
17
+
18
+ For most registries ``push_tag()`` and ``pull_tag()`` both delegate to
19
+ ``image_tag()`` — subclasses only need to implement ``image_tag()``.
20
+ Override ``push_tag()`` and/or ``pull_tag()`` independently only when the
21
+ push address differs from the pull address (e.g. ``LocalRegistry``).
22
+ """
23
+
24
+ @abstractmethod
25
+ def image_tag(self, env_id: "EnvID") -> str:
26
+ """Canonical fully-qualified image tag for the given env_id.
27
+
28
+ Used as the default return value for both ``push_tag()`` and
29
+ ``pull_tag()``. Format: ``<registry>/<namespace>/<name>:<version>``.
30
+ """
31
+ ...
32
+
33
+ def push_tag(self, env_id: "EnvID") -> str:
34
+ """Tag the build service uses to push. Defaults to ``image_tag()``."""
35
+ return self.image_tag(env_id)
36
+
37
+ def pull_tag(self, env_id: "EnvID") -> str:
38
+ """Tag baked into the remote runner spec. Defaults to ``image_tag()``."""
39
+ return self.image_tag(env_id)
40
+
41
+ def image_tag_for_named(self, name: str) -> str:
42
+ """Full mutable tag for a ``@named_env(fetch_at_exec=True)`` env.
43
+
44
+ Tagged by alias name (not env_id hash) so subsequent deploys under the
45
+ same name overwrite the manifest — matching fetch_at_exec semantics.
46
+
47
+ Raises ``NotImplementedError`` by default; registries that support named
48
+ envs must override this method.
49
+ """
50
+ raise NotImplementedError(
51
+ "Registry %r does not implement named env tags. "
52
+ "Override image_tag_for_named() to use @named_env(fetch_at_exec=True)."
53
+ % type(self).__name__
54
+ )
55
+
56
+ def push_tag_for_named(self, name: str) -> str:
57
+ """Push-side tag for a named env. Defaults to ``image_tag_for_named()``."""
58
+ return self.image_tag_for_named(name)
59
+
60
+ def pull_tag_for_named(self, name: str) -> str:
61
+ """Pull-side tag for a named env. Defaults to ``push_tag_for_named()``."""
62
+ return self.push_tag_for_named(name)
63
+
64
+ @abstractmethod
65
+ def push_credentials(self) -> Dict[str, Any]:
66
+ """Credentials/config passed to ``DockerBuildService.build_and_push``.
67
+
68
+ Return ``{}`` when the build service uses ambient auth (e.g. docker login).
69
+ The schema is build-service-specific; see the ``DockerBuildService``
70
+ contract for which keys each service consumes.
71
+ """
72
+ ...
73
+
74
+ @abstractmethod
75
+ def pull_config(self, pull_tag: str) -> Dict[str, Any]:
76
+ """Attributes to inject into the remote runner decorator so it can pull
77
+ the image at ``pull_tag``.
78
+
79
+ Return ``{}`` when the runner has ambient pull credentials (e.g. an IAM
80
+ role for ECR + Batch, or a service account for GKE).
81
+
82
+ Non-empty example (private registry with K8s imagePullSecrets)::
83
+
84
+ {"image_pull_policy": "Always", "image_pull_secrets": "my-secret"}
85
+ """
86
+ ...
87
+
88
+ @classmethod
89
+ def from_config(cls) -> "ImageRegistry":
90
+ """Resolve and instantiate the registry selected by
91
+ ``METAFLOW_PREBUILT_IMAGE_REGISTRY`` (default: ``dockerhub``).
92
+
93
+ Raises ``MetaflowException`` if the name is not registered.
94
+ """
95
+ name = os.environ.get("METAFLOW_PREBUILT_IMAGE_REGISTRY", "dockerhub")
96
+ registry_cls = _resolve_entry_point(
97
+ "metaflow_prebuilt.image_registries", name
98
+ )
99
+ return registry_cls()
@@ -0,0 +1,99 @@
1
+ # pyright: strict, reportTypeCommentUsage=false, reportMissingTypeStubs=false
2
+ """Build-time conda env installer for `--environment=prebuilt`.
3
+
4
+ Runs INSIDE the Cloudbuild docker build, with the MetaflowPackage
5
+ tarball already extracted at `cwd`. The runtime task entry_point does
6
+ exactly the same setup before invoking `remote_bootstrap.bootstrap_environment`
7
+ — we just call `Conda.create_for_step` directly with the slightly
8
+ different settings the build container needs.
9
+
10
+ What the Dockerfile sets up for us:
11
+
12
+ - `METAFLOW_CONDA_REMOTE_INSTALLER=""` → `Conda._ensure_remote_conda`'s
13
+ truthy check drops into `_ensure_micromamba` (public download from
14
+ micro.mamba.pm). No AWS creds required.
15
+ - `METAFLOW_DATASTORE_SYSROOT_LOCAL=<cwd>` → `LocalStorage` reads the
16
+ resolved-env manifest from `<cwd>/.metaflow/<CONDA_MAGIC_FILE_V2>`.
17
+ We call `setup_conda_manifest()` (reused from `remote_bootstrap`)
18
+ to move the manifest from its packed location to that path.
19
+ - `METAFLOW_EXTRACTED_ROOT=<cwd>` → activates the `.mf_install` marker
20
+ bypass in `metaflow.extension_support`, so the flattened nflx-*
21
+ distributions in the code package load cleanly without tripping
22
+ the path walker's per-distribution checks.
23
+ - `PYTHONPATH=<cwd>/.mf_code` → metaflow + all extensions resolve from
24
+ the extracted code package.
25
+ - `MAMBA_ROOT_PREFIX=<conda-root>` → micromamba creates the env at the
26
+ deterministic runtime-shared path.
27
+
28
+ What's left for the shim:
29
+
30
+ 1. `setup_conda_manifest()` — move the conda manifest from the
31
+ packaging-internal location to the LocalStorage path. Identical
32
+ call to the one `remote_bootstrap.bootstrap_environment` makes.
33
+ 2. `Conda(echo, "local", mode="remote")` — `datastore_type="local"`
34
+ makes `Conda._storage = None`, which (after the upstream storage
35
+ guards in `_create` + `lazy_fetch_packages`) disables cache_info
36
+ URL emission and forces web downloads from `pkg.url`. Those
37
+ URLs go to conda.netflix.net / pypi.netflix.net over HTTPS.
38
+ 3. `create_for_step` — same call the runtime makes via
39
+ `remote_bootstrap.bootstrap_environment`.
40
+ """
41
+ import os
42
+ import sys
43
+ import time
44
+
45
+ from metaflow.cli import echo_always
46
+
47
+ from .conda import Conda
48
+ from .env_descr import EnvID
49
+ from .remote_bootstrap import setup_conda_manifest
50
+ from .utils import arch_id
51
+
52
+
53
+ def _echo(*args, **kwargs):
54
+ kwargs["err"] = False
55
+ echo_always(*args, **kwargs)
56
+
57
+
58
+ def install_env(req_id: str, full_id: str) -> str:
59
+ start = time.time()
60
+ _echo(" Setting up Conda (build-time) ...", nl=False)
61
+
62
+ setup_conda_manifest()
63
+
64
+ my_conda = Conda(_echo, "local", mode="remote")
65
+ my_conda.binary("micromamba")
66
+ _echo(" done in %ds." % int(time.time() - start))
67
+
68
+ env_id = EnvID(req_id=req_id, full_id=full_id, arch=arch_id())
69
+ resolved_env = my_conda.environment(env_id)
70
+ if resolved_env is None:
71
+ raise RuntimeError(
72
+ "Cannot find cached environment for hash %s:%s in build-time "
73
+ "manifest. Verify the MetaflowPackage tarball was extracted at "
74
+ "the WORKDIR and that setup_conda_manifest moved the manifest "
75
+ "into place." % (req_id, full_id)
76
+ )
77
+
78
+ install_start = time.time()
79
+ env_path = my_conda.create_for_step(
80
+ "prebuilt_build", resolved_env, do_symlink=False
81
+ )
82
+ _echo(
83
+ " Env installed at %s (%ds)" % (env_path, int(time.time() - install_start))
84
+ )
85
+ return env_path
86
+
87
+
88
+ if __name__ == "__main__":
89
+ if len(sys.argv) != 3:
90
+ print(
91
+ "Usage: python -m metaflow_extensions.prebuilt.plugins.conda."
92
+ "prebuilt_build_install <req_id> <full_id>",
93
+ file=sys.stderr,
94
+ )
95
+ sys.exit(2)
96
+ path = install_env(sys.argv[1], sys.argv[2])
97
+ print(path)
98
+ sys.stdout.flush()
99
+ os._exit(0)