aissemble-inference-deploy 1.5.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. aissemble_inference_deploy/__init__.py +38 -0
  2. aissemble_inference_deploy/cli.py +278 -0
  3. aissemble_inference_deploy/config.py +182 -0
  4. aissemble_inference_deploy/generators/__init__.py +36 -0
  5. aissemble_inference_deploy/generators/base.py +239 -0
  6. aissemble_inference_deploy/generators/docker.py +307 -0
  7. aissemble_inference_deploy/generators/kserve.py +89 -0
  8. aissemble_inference_deploy/generators/kubernetes.py +119 -0
  9. aissemble_inference_deploy/generators/local.py +162 -0
  10. aissemble_inference_deploy/registry.py +158 -0
  11. aissemble_inference_deploy/templates/docker/.dockerignore.j2 +47 -0
  12. aissemble_inference_deploy/templates/docker/Dockerfile.j2 +59 -0
  13. aissemble_inference_deploy/templates/docker/README.md.j2 +163 -0
  14. aissemble_inference_deploy/templates/docker/docker-compose.yml.j2 +22 -0
  15. aissemble_inference_deploy/templates/kserve/README.md.j2 +278 -0
  16. aissemble_inference_deploy/templates/kserve/inference-service.yaml.j2 +14 -0
  17. aissemble_inference_deploy/templates/kserve/serving-runtime.yaml.j2 +35 -0
  18. aissemble_inference_deploy/templates/kubernetes/README.md.j2 +164 -0
  19. aissemble_inference_deploy/templates/kubernetes/deployment.yaml.j2 +50 -0
  20. aissemble_inference_deploy/templates/kubernetes/kustomization.yaml.j2 +11 -0
  21. aissemble_inference_deploy/templates/kubernetes/overlays/dev/kustomization.yaml.j2 +52 -0
  22. aissemble_inference_deploy/templates/kubernetes/overlays/prod/kustomization.yaml.j2 +36 -0
  23. aissemble_inference_deploy/templates/kubernetes/service.yaml.j2 +19 -0
  24. aissemble_inference_deploy/templates/local/run-mlserver.sh.j2 +47 -0
  25. aissemble_inference_deploy-1.5.0rc3.dist-info/METADATA +248 -0
  26. aissemble_inference_deploy-1.5.0rc3.dist-info/RECORD +29 -0
  27. aissemble_inference_deploy-1.5.0rc3.dist-info/WHEEL +4 -0
  28. aissemble_inference_deploy-1.5.0rc3.dist-info/entry_points.txt +8 -0
  29. aissemble_inference_deploy-1.5.0rc3.dist-info/licenses/LICENSE.txt +201 -0
@@ -0,0 +1,239 @@
1
+ ###
2
+ # #%L
3
+ # aiSSEMBLE::Open Inference Protocol::Deploy
4
+ # %%
5
+ # Copyright (C) 2024 Booz Allen Hamilton Inc.
6
+ # %%
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ # #L%
19
+ ###
20
+ """
21
+ Base generator class for deployment config generation.
22
+ """
23
+
24
+ import json
25
+ import sys
26
+ from abc import ABC, abstractmethod
27
+ from dataclasses import dataclass, field
28
+ from pathlib import Path
29
+ from typing import Any
30
+
31
+ from jinja2 import Environment, PackageLoader, TemplateNotFound, select_autoescape
32
+
33
+
34
+ @dataclass
35
+ class ModelInfo:
36
+ """Information about a model discovered in the project."""
37
+
38
+ name: str
39
+ path: Path
40
+ settings: dict[str, Any] = field(default_factory=dict)
41
+
42
+ @property
43
+ def runtime(self) -> str | None:
44
+ """Get the runtime implementation class."""
45
+ return self.settings.get("implementation")
46
+
47
+ @property
48
+ def parameters(self) -> dict[str, Any]:
49
+ """Get model parameters."""
50
+ return self.settings.get("parameters", {})
51
+
52
+
53
+ class Generator(ABC):
54
+ """Abstract base class for deployment config generators."""
55
+
56
+ # Name used in CLI (e.g., 'local', 'docker', 'kubernetes')
57
+ name: str = ""
58
+
59
+ def __init__(self, project_dir: Path, output_dir: Path | None = None):
60
+ """
61
+ Initialize the generator.
62
+
63
+ Args:
64
+ project_dir: Root directory of the user's project
65
+ output_dir: Where to write generated configs (default: project_dir/deploy)
66
+ """
67
+ self.project_dir = project_dir.resolve()
68
+ self.output_dir = (output_dir or project_dir / "deploy").resolve()
69
+ self.jinja_env = Environment(
70
+ loader=PackageLoader("aissemble_inference_deploy", "templates"),
71
+ autoescape=select_autoescape(),
72
+ trim_blocks=True,
73
+ lstrip_blocks=True,
74
+ )
75
+
76
+ def get_image_name(self) -> str:
77
+ """
78
+ Get the Docker image name for this project.
79
+
80
+ Derives the image name from the project directory name,
81
+ ensuring consistency between Docker and Kubernetes deployments.
82
+
83
+ Returns:
84
+ Image name (e.g., 'aissemble-summarization-example')
85
+ """
86
+ return self.project_dir.name
87
+
88
+ def detect_models(
89
+ self, models_dir: Path | None = None, max_depth: int = 5
90
+ ) -> list[ModelInfo]:
91
+ """
92
+ Discover models in the project.
93
+
94
+ Args:
95
+ models_dir: Directory to search for models (default: project_dir/models)
96
+ max_depth: Maximum directory depth to search (prevents resource exhaustion)
97
+
98
+ Returns:
99
+ List of discovered ModelInfo objects
100
+ """
101
+ models_dir = models_dir or self.project_dir / "models"
102
+ models = []
103
+
104
+ if not models_dir.exists():
105
+ return models
106
+
107
+ # Use rglob with depth check to prevent abuse
108
+ for model_settings_path in models_dir.rglob("model-settings.json"):
109
+ # Check depth relative to models_dir
110
+ try:
111
+ rel_path = model_settings_path.relative_to(models_dir)
112
+ if len(rel_path.parts) > max_depth + 1: # +1 for the file itself
113
+ continue
114
+ except ValueError:
115
+ # Path is outside models_dir somehow, skip it
116
+ continue
117
+
118
+ model_dir = model_settings_path.parent
119
+ model_name = model_dir.name
120
+
121
+ # Skip the root models directory if it has a settings.json
122
+ if model_dir == models_dir:
123
+ continue
124
+
125
+ try:
126
+ settings_text = model_settings_path.read_text(encoding="utf-8")
127
+ settings = json.loads(settings_text)
128
+ except json.JSONDecodeError as e:
129
+ print(
130
+ f"Warning: Invalid JSON in {model_settings_path}: {e}",
131
+ file=sys.stderr,
132
+ )
133
+ settings = {}
134
+ except OSError as e:
135
+ print(
136
+ f"Warning: Cannot read {model_settings_path}: {e}",
137
+ file=sys.stderr,
138
+ )
139
+ settings = {}
140
+
141
+ models.append(
142
+ ModelInfo(
143
+ name=model_name,
144
+ path=model_dir,
145
+ settings=settings,
146
+ )
147
+ )
148
+
149
+ return models
150
+
151
+ @abstractmethod
152
+ def generate(self, models: list[ModelInfo] | None = None) -> list[Path]:
153
+ """
154
+ Generate deployment configs.
155
+
156
+ Args:
157
+ models: Models to generate configs for (auto-detected if None)
158
+
159
+ Returns:
160
+ List of paths to generated files
161
+ """
162
+ pass
163
+
164
+ def render_template(self, template_name: str, context: dict[str, Any]) -> str:
165
+ """
166
+ Render a Jinja2 template with the given context.
167
+
168
+ Args:
169
+ template_name: Name of the template file
170
+ context: Template variables
171
+
172
+ Returns:
173
+ Rendered template content
174
+
175
+ Raises:
176
+ TemplateNotFound: If template doesn't exist
177
+ """
178
+ try:
179
+ template = self.jinja_env.get_template(template_name)
180
+ return template.render(**context)
181
+ except TemplateNotFound:
182
+ raise TemplateNotFound(
183
+ f"Template '{template_name}' not found. Check that the generator has the correct templates."
184
+ )
185
+
186
+ def write_file(self, path: Path, content: str, executable: bool = False) -> Path:
187
+ """
188
+ Write content to a file, creating directories as needed.
189
+
190
+ Args:
191
+ path: Path to write to (must be within output_dir)
192
+ content: Content to write
193
+ executable: Whether to make the file executable
194
+
195
+ Returns:
196
+ The path that was written to
197
+
198
+ Raises:
199
+ ValueError: If path is outside output_dir (path traversal protection)
200
+ """
201
+ # Ensure path is within output_dir (prevent path traversal attacks)
202
+ try:
203
+ resolved_path = path.resolve()
204
+ resolved_output = self.output_dir.resolve()
205
+ resolved_path.relative_to(resolved_output)
206
+ except ValueError:
207
+ raise ValueError(
208
+ f"Cannot write file outside output directory. "
209
+ f"Attempted: {path}, Output dir: {self.output_dir}"
210
+ )
211
+
212
+ path.parent.mkdir(parents=True, exist_ok=True)
213
+ path.write_text(content, encoding="utf-8")
214
+ if executable:
215
+ path.chmod(0o755)
216
+ return path
217
+
218
+ def extract_runtime_packages(self, models: list[ModelInfo]) -> list[str]:
219
+ """
220
+ Extract PyPI package names from model runtime implementations.
221
+
222
+ Args:
223
+ models: List of models to extract packages from
224
+
225
+ Returns:
226
+ List of PyPI package specifiers (e.g., ["aissemble-inference-sumy>=1.0"])
227
+ """
228
+ packages = set()
229
+ packages.add("mlserver>=1.6.0")
230
+
231
+ for model in models:
232
+ if model.runtime and "." in model.runtime:
233
+ # Extract package name from "aissemble_inference_sumy.SumyRuntime"
234
+ module_name = model.runtime.split(".")[0]
235
+ # Convert underscores to hyphens for PyPI package names
236
+ package_name = module_name.replace("_", "-")
237
+ packages.add(package_name)
238
+
239
+ return sorted(packages)
@@ -0,0 +1,307 @@
1
+ ###
2
+ # #%L
3
+ # aiSSEMBLE::Open Inference Protocol::Deploy
4
+ # %%
5
+ # Copyright (C) 2024 Booz Allen Hamilton Inc.
6
+ # %%
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ # #L%
19
+ ###
20
+ """
21
+ Docker deployment generator.
22
+
23
+ Generates Dockerfile and docker-compose.yml for containerized MLServer deployment.
24
+
25
+ For dev versions: Uses uv-monorepo-dependency-tool to build wheels with pinned
26
+ dependencies, including all transitive local path dependencies.
27
+
28
+ For release versions: Generates requirements.txt to install from PyPI.
29
+ """
30
+
31
+ import shutil
32
+ import subprocess
33
+ import sys
34
+ from pathlib import Path
35
+
36
+ import tomlkit
37
+
38
+ from .base import Generator, ModelInfo
39
+
40
+
41
+ class DockerGenerator(Generator):
42
+ """Generator for Docker deployment configurations."""
43
+
44
+ name = "docker"
45
+
46
+ def generate(self, models: list[ModelInfo] | None = None) -> list[Path]:
47
+ """
48
+ Generate Docker deployment configs.
49
+
50
+ Args:
51
+ models: Models to generate configs for (auto-detected if None)
52
+
53
+ Returns:
54
+ List of paths to generated files
55
+ """
56
+ if models is None:
57
+ models = self.detect_models()
58
+
59
+ generated_files = []
60
+ target_dir = self.output_dir / "docker"
61
+
62
+ # Extract runtime packages for documentation and requirements
63
+ runtime_packages = self.extract_runtime_packages(models)
64
+
65
+ # Check if this is a dev version
66
+ is_dev = self._is_dev_version()
67
+
68
+ if is_dev:
69
+ # Build wheels for local testing
70
+ wheels_dir = target_dir / "wheels"
71
+ wheel_files = self._build_all_wheels(wheels_dir)
72
+ generated_files.extend(wheel_files)
73
+ use_wheels = True
74
+ else:
75
+ # Generate requirements.txt for PyPI install
76
+ requirements_content = "\n".join(runtime_packages) + "\n"
77
+ requirements_path = self.write_file(
78
+ target_dir / "requirements.txt", requirements_content
79
+ )
80
+ generated_files.append(requirements_path)
81
+ use_wheels = False
82
+
83
+ # Generate Dockerfile
84
+ dockerfile_content = self.render_template(
85
+ "docker/Dockerfile.j2",
86
+ {
87
+ "python_version": "3.11",
88
+ "use_wheels": use_wheels,
89
+ },
90
+ )
91
+ dockerfile_path = self.write_file(target_dir / "Dockerfile", dockerfile_content)
92
+ generated_files.append(dockerfile_path)
93
+
94
+ # Generate docker-compose.yml
95
+ image_name = self.get_image_name()
96
+ compose_content = self.render_template(
97
+ "docker/docker-compose.yml.j2",
98
+ {
99
+ "image_name": image_name,
100
+ "http_port": 8080,
101
+ "grpc_port": 8081,
102
+ "models": models,
103
+ },
104
+ )
105
+ compose_path = self.write_file(
106
+ target_dir / "docker-compose.yml", compose_content
107
+ )
108
+ generated_files.append(compose_path)
109
+
110
+ # Generate .dockerignore
111
+ dockerignore_content = self.render_template(
112
+ "docker/.dockerignore.j2",
113
+ {},
114
+ )
115
+ dockerignore_path = self.write_file(
116
+ target_dir / ".dockerignore", dockerignore_content
117
+ )
118
+ generated_files.append(dockerignore_path)
119
+
120
+ # Generate README
121
+ readme_content = self.render_template(
122
+ "docker/README.md.j2",
123
+ {
124
+ "models": models,
125
+ "runtime_packages": runtime_packages,
126
+ "http_port": 8080,
127
+ "grpc_port": 8081,
128
+ "use_wheels": use_wheels,
129
+ },
130
+ )
131
+ readme_path = self.write_file(target_dir / "README.md", readme_content)
132
+ generated_files.append(readme_path)
133
+
134
+ return generated_files
135
+
136
+ def _is_dev_version(self) -> bool:
137
+ """
138
+ Check if the project version is a dev version.
139
+
140
+ Returns:
141
+ True if version contains '.dev', False otherwise
142
+ """
143
+ pyproject_path = self.project_dir / "pyproject.toml"
144
+ if not pyproject_path.exists():
145
+ return False
146
+
147
+ content = pyproject_path.read_text(encoding="utf-8")
148
+ doc = tomlkit.parse(content)
149
+
150
+ version = doc.get("project", {}).get("version", "")
151
+ return ".dev" in version
152
+
153
+ def _build_all_wheels(self, wheels_dir: Path) -> list[Path]:
154
+ """
155
+ Build wheels for this project and all local path dependencies.
156
+
157
+ Recursively finds and builds all monorepo packages that this project
158
+ depends on, ensuring Docker has all required wheels.
159
+
160
+ Args:
161
+ wheels_dir: Directory to copy built wheels to
162
+
163
+ Returns:
164
+ List of paths to copied wheel files
165
+ """
166
+ if shutil.which("uv") is None:
167
+ raise RuntimeError(
168
+ "uv is not installed or not in PATH. "
169
+ "Install uv: https://docs.astral.sh/uv/getting-started/installation/"
170
+ )
171
+
172
+ wheels_dir.mkdir(parents=True, exist_ok=True)
173
+
174
+ # Find all projects to build (this project + path dependencies)
175
+ projects_to_build = self._find_all_path_dependencies(self.project_dir)
176
+
177
+ print(
178
+ f" Building wheels for {len(projects_to_build)} project(s) (dev mode)...",
179
+ file=sys.stderr,
180
+ )
181
+
182
+ copied_wheels = []
183
+ for project_path in projects_to_build:
184
+ wheel_path = self._build_single_wheel(project_path, wheels_dir)
185
+ if wheel_path:
186
+ copied_wheels.append(wheel_path)
187
+
188
+ if not copied_wheels:
189
+ raise RuntimeError(
190
+ "No wheel files were built. "
191
+ "Check that uv-monorepo-dependency-tool completed successfully."
192
+ )
193
+
194
+ print(f" Built {len(copied_wheels)} wheel(s) to {wheels_dir}", file=sys.stderr)
195
+ return copied_wheels
196
+
197
+ def _find_all_path_dependencies(self, start_dir: Path) -> list[Path]:
198
+ """
199
+ Recursively find all local path dependencies.
200
+
201
+ Args:
202
+ start_dir: Starting project directory
203
+
204
+ Returns:
205
+ List of project directories to build (including start_dir)
206
+ """
207
+ visited = set()
208
+ to_visit = [start_dir.resolve()]
209
+ result = []
210
+
211
+ while to_visit:
212
+ current = to_visit.pop(0)
213
+ if current in visited:
214
+ continue
215
+ visited.add(current)
216
+ result.append(current)
217
+
218
+ # Find path dependencies in this project
219
+ path_deps = self._get_path_dependencies(current)
220
+ for dep_path in path_deps:
221
+ resolved = (current / dep_path).resolve()
222
+ if resolved.exists() and resolved not in visited:
223
+ to_visit.append(resolved)
224
+
225
+ return result
226
+
227
+ def _get_path_dependencies(self, project_dir: Path) -> list[str]:
228
+ """
229
+ Extract path dependencies from a project's pyproject.toml.
230
+
231
+ Args:
232
+ project_dir: Project directory containing pyproject.toml
233
+
234
+ Returns:
235
+ List of relative path strings to dependencies
236
+ """
237
+ pyproject_path = project_dir / "pyproject.toml"
238
+ if not pyproject_path.exists():
239
+ return []
240
+
241
+ content = pyproject_path.read_text(encoding="utf-8")
242
+ doc = tomlkit.parse(content)
243
+
244
+ # Look for [tool.uv.sources] section
245
+ sources = doc.get("tool", {}).get("uv", {}).get("sources", {})
246
+
247
+ paths = []
248
+ for _name, source in sources.items():
249
+ if isinstance(source, dict) and "path" in source:
250
+ paths.append(source["path"])
251
+
252
+ return paths
253
+
254
+ def _build_single_wheel(self, project_dir: Path, wheels_dir: Path) -> Path | None:
255
+ """
256
+ Build a wheel for a single project using uv-monorepo-dependency-tool.
257
+
258
+ Args:
259
+ project_dir: Project directory to build
260
+ wheels_dir: Directory to copy the wheel to
261
+
262
+ Returns:
263
+ Path to the copied wheel file, or None if build failed
264
+ """
265
+ project_name = project_dir.name
266
+ print(f" Building {project_name}...", file=sys.stderr)
267
+
268
+ result = subprocess.run(
269
+ [
270
+ "uv",
271
+ "tool",
272
+ "run",
273
+ "uv-monorepo-dependency-tool",
274
+ "build-rewrite-path-deps",
275
+ "--version-pinning-strategy=mixed",
276
+ ],
277
+ cwd=project_dir,
278
+ capture_output=True,
279
+ text=True,
280
+ )
281
+
282
+ if result.returncode != 0:
283
+ print(
284
+ f" Warning: Failed to build {project_name}: {result.stderr}",
285
+ file=sys.stderr,
286
+ )
287
+ return None
288
+
289
+ # Find and copy the built wheel
290
+ dist_dir = project_dir / "dist"
291
+ if not dist_dir.exists():
292
+ print(
293
+ f" Warning: No dist/ directory for {project_name}", file=sys.stderr
294
+ )
295
+ return None
296
+
297
+ # Get the most recent wheel
298
+ wheels = sorted(dist_dir.glob("*.whl"), key=lambda p: p.stat().st_mtime)
299
+ if not wheels:
300
+ print(f" Warning: No wheel found for {project_name}", file=sys.stderr)
301
+ return None
302
+
303
+ wheel_file = wheels[-1] # Most recent
304
+ dest_path = wheels_dir / wheel_file.name
305
+ shutil.copy2(wheel_file, dest_path)
306
+ print(f" Copied {wheel_file.name}", file=sys.stderr)
307
+ return dest_path
@@ -0,0 +1,89 @@
1
+ ###
2
+ # #%L
3
+ # aiSSEMBLE::Open Inference Protocol::Deploy
4
+ # %%
5
+ # Copyright (C) 2024 Booz Allen Hamilton Inc.
6
+ # %%
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ # #L%
19
+ ###
20
+ """
21
+ KServe deployment generator.
22
+
23
+ Generates KServe InferenceService configurations for deploying MLServer-based models.
24
+ Produces a ServingRuntime (shared runtime configuration) and InferenceService
25
+ (model deployment) following the DRY principle.
26
+ """
27
+
28
+ from pathlib import Path
29
+
30
+ from .base import Generator, ModelInfo
31
+
32
+
33
+ class KServeGenerator(Generator):
34
+ """Generator for KServe InferenceService deployments."""
35
+
36
+ name = "kserve"
37
+
38
+ def generate(self, models: list[ModelInfo] | None = None) -> list[Path]:
39
+ """
40
+ Generate KServe deployment configs (ServingRuntime + InferenceService).
41
+
42
+ Args:
43
+ models: Models to generate configs for (auto-detected if None)
44
+
45
+ Returns:
46
+ List of paths to generated files
47
+ """
48
+ if models is None:
49
+ models = self.detect_models()
50
+
51
+ generated_files = []
52
+ target_dir = self.output_dir / "kserve"
53
+
54
+ # Extract runtime packages for documentation
55
+ runtime_packages = self.extract_runtime_packages(models)
56
+
57
+ # Get image name (consistent with Docker generator)
58
+ image_name = self.get_image_name()
59
+
60
+ # Common template context
61
+ context = {
62
+ "models": models,
63
+ "runtime_packages": runtime_packages,
64
+ "image_name": image_name,
65
+ "app_name": image_name,
66
+ "http_port": 8080,
67
+ "grpc_port": 8081,
68
+ }
69
+
70
+ # Generate ServingRuntime (shared runtime configuration)
71
+ runtime_content = self.render_template(
72
+ "kserve/serving-runtime.yaml.j2", context
73
+ )
74
+ runtime_path = self.write_file(
75
+ target_dir / "serving-runtime.yaml", runtime_content
76
+ )
77
+ generated_files.append(runtime_path)
78
+
79
+ # Generate InferenceService (references the ServingRuntime)
80
+ isvc_content = self.render_template("kserve/inference-service.yaml.j2", context)
81
+ isvc_path = self.write_file(target_dir / "inference-service.yaml", isvc_content)
82
+ generated_files.append(isvc_path)
83
+
84
+ # Generate README
85
+ readme_content = self.render_template("kserve/README.md.j2", context)
86
+ readme_path = self.write_file(target_dir / "README.md", readme_content)
87
+ generated_files.append(readme_path)
88
+
89
+ return generated_files