aissemble-inference-deploy 1.5.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aissemble_inference_deploy/__init__.py +38 -0
- aissemble_inference_deploy/cli.py +278 -0
- aissemble_inference_deploy/config.py +182 -0
- aissemble_inference_deploy/generators/__init__.py +36 -0
- aissemble_inference_deploy/generators/base.py +239 -0
- aissemble_inference_deploy/generators/docker.py +307 -0
- aissemble_inference_deploy/generators/kserve.py +89 -0
- aissemble_inference_deploy/generators/kubernetes.py +119 -0
- aissemble_inference_deploy/generators/local.py +162 -0
- aissemble_inference_deploy/registry.py +158 -0
- aissemble_inference_deploy/templates/docker/.dockerignore.j2 +47 -0
- aissemble_inference_deploy/templates/docker/Dockerfile.j2 +59 -0
- aissemble_inference_deploy/templates/docker/README.md.j2 +163 -0
- aissemble_inference_deploy/templates/docker/docker-compose.yml.j2 +22 -0
- aissemble_inference_deploy/templates/kserve/README.md.j2 +278 -0
- aissemble_inference_deploy/templates/kserve/inference-service.yaml.j2 +14 -0
- aissemble_inference_deploy/templates/kserve/serving-runtime.yaml.j2 +35 -0
- aissemble_inference_deploy/templates/kubernetes/README.md.j2 +164 -0
- aissemble_inference_deploy/templates/kubernetes/deployment.yaml.j2 +50 -0
- aissemble_inference_deploy/templates/kubernetes/kustomization.yaml.j2 +11 -0
- aissemble_inference_deploy/templates/kubernetes/overlays/dev/kustomization.yaml.j2 +52 -0
- aissemble_inference_deploy/templates/kubernetes/overlays/prod/kustomization.yaml.j2 +36 -0
- aissemble_inference_deploy/templates/kubernetes/service.yaml.j2 +19 -0
- aissemble_inference_deploy/templates/local/run-mlserver.sh.j2 +47 -0
- aissemble_inference_deploy-1.5.0rc3.dist-info/METADATA +248 -0
- aissemble_inference_deploy-1.5.0rc3.dist-info/RECORD +29 -0
- aissemble_inference_deploy-1.5.0rc3.dist-info/WHEEL +4 -0
- aissemble_inference_deploy-1.5.0rc3.dist-info/entry_points.txt +8 -0
- aissemble_inference_deploy-1.5.0rc3.dist-info/licenses/LICENSE.txt +201 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
###
|
|
2
|
+
# #%L
|
|
3
|
+
# aiSSEMBLE::Open Inference Protocol::Deploy
|
|
4
|
+
# %%
|
|
5
|
+
# Copyright (C) 2024 Booz Allen Hamilton Inc.
|
|
6
|
+
# %%
|
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
# you may not use this file except in compliance with the License.
|
|
9
|
+
# You may obtain a copy of the License at
|
|
10
|
+
#
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
#
|
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
# See the License for the specific language governing permissions and
|
|
17
|
+
# limitations under the License.
|
|
18
|
+
# #L%
|
|
19
|
+
###
|
|
20
|
+
"""
|
|
21
|
+
Kubernetes deployment generator.
|
|
22
|
+
|
|
23
|
+
Generates Kubernetes manifests with Kustomize structure for MLServer deployment.
|
|
24
|
+
Includes base manifests and dev/prod overlays for environment-specific configuration.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
|
|
29
|
+
from .base import Generator, ModelInfo
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class KubernetesGenerator(Generator):
|
|
33
|
+
"""Generator for Kubernetes deployment configurations using Kustomize."""
|
|
34
|
+
|
|
35
|
+
name = "kubernetes"
|
|
36
|
+
|
|
37
|
+
def generate(self, models: list[ModelInfo] | None = None) -> list[Path]:
|
|
38
|
+
"""
|
|
39
|
+
Generate Kubernetes deployment configs with Kustomize structure.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
models: Models to generate configs for (auto-detected if None)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of paths to generated files
|
|
46
|
+
"""
|
|
47
|
+
if models is None:
|
|
48
|
+
models = self.detect_models()
|
|
49
|
+
|
|
50
|
+
generated_files = []
|
|
51
|
+
target_dir = self.output_dir / "kubernetes"
|
|
52
|
+
|
|
53
|
+
# Extract runtime packages for documentation
|
|
54
|
+
runtime_packages = self.extract_runtime_packages(models)
|
|
55
|
+
|
|
56
|
+
# Get image name (consistent with Docker generator)
|
|
57
|
+
image_name = self.get_image_name()
|
|
58
|
+
|
|
59
|
+
# Common template context
|
|
60
|
+
context = {
|
|
61
|
+
"models": models,
|
|
62
|
+
"runtime_packages": runtime_packages,
|
|
63
|
+
"image_name": image_name,
|
|
64
|
+
"app_name": image_name,
|
|
65
|
+
"http_port": 8080,
|
|
66
|
+
"grpc_port": 8081,
|
|
67
|
+
"node_port_http": 30080,
|
|
68
|
+
"node_port_grpc": 30081,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Generate base manifests
|
|
72
|
+
base_dir = target_dir / "base"
|
|
73
|
+
|
|
74
|
+
deployment_content = self.render_template(
|
|
75
|
+
"kubernetes/deployment.yaml.j2", context
|
|
76
|
+
)
|
|
77
|
+
deployment_path = self.write_file(
|
|
78
|
+
base_dir / "deployment.yaml", deployment_content
|
|
79
|
+
)
|
|
80
|
+
generated_files.append(deployment_path)
|
|
81
|
+
|
|
82
|
+
service_content = self.render_template("kubernetes/service.yaml.j2", context)
|
|
83
|
+
service_path = self.write_file(base_dir / "service.yaml", service_content)
|
|
84
|
+
generated_files.append(service_path)
|
|
85
|
+
|
|
86
|
+
base_kustomization_content = self.render_template(
|
|
87
|
+
"kubernetes/kustomization.yaml.j2", context
|
|
88
|
+
)
|
|
89
|
+
base_kustomization_path = self.write_file(
|
|
90
|
+
base_dir / "kustomization.yaml", base_kustomization_content
|
|
91
|
+
)
|
|
92
|
+
generated_files.append(base_kustomization_path)
|
|
93
|
+
|
|
94
|
+
# Generate dev overlay
|
|
95
|
+
dev_dir = target_dir / "overlays" / "dev"
|
|
96
|
+
dev_kustomization_content = self.render_template(
|
|
97
|
+
"kubernetes/overlays/dev/kustomization.yaml.j2", context
|
|
98
|
+
)
|
|
99
|
+
dev_kustomization_path = self.write_file(
|
|
100
|
+
dev_dir / "kustomization.yaml", dev_kustomization_content
|
|
101
|
+
)
|
|
102
|
+
generated_files.append(dev_kustomization_path)
|
|
103
|
+
|
|
104
|
+
# Generate prod overlay
|
|
105
|
+
prod_dir = target_dir / "overlays" / "prod"
|
|
106
|
+
prod_kustomization_content = self.render_template(
|
|
107
|
+
"kubernetes/overlays/prod/kustomization.yaml.j2", context
|
|
108
|
+
)
|
|
109
|
+
prod_kustomization_path = self.write_file(
|
|
110
|
+
prod_dir / "kustomization.yaml", prod_kustomization_content
|
|
111
|
+
)
|
|
112
|
+
generated_files.append(prod_kustomization_path)
|
|
113
|
+
|
|
114
|
+
# Generate README
|
|
115
|
+
readme_content = self.render_template("kubernetes/README.md.j2", context)
|
|
116
|
+
readme_path = self.write_file(target_dir / "README.md", readme_content)
|
|
117
|
+
generated_files.append(readme_path)
|
|
118
|
+
|
|
119
|
+
return generated_files
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
###
|
|
2
|
+
# #%L
|
|
3
|
+
# aiSSEMBLE::Open Inference Protocol::Deploy
|
|
4
|
+
# %%
|
|
5
|
+
# Copyright (C) 2024 Booz Allen Hamilton Inc.
|
|
6
|
+
# %%
|
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
# you may not use this file except in compliance with the License.
|
|
9
|
+
# You may obtain a copy of the License at
|
|
10
|
+
#
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
#
|
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
# See the License for the specific language governing permissions and
|
|
17
|
+
# limitations under the License.
|
|
18
|
+
# #L%
|
|
19
|
+
###
|
|
20
|
+
"""
|
|
21
|
+
Local MLServer deployment generator.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import re
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
from .base import Generator, ModelInfo
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LocalGenerator(Generator):
|
|
31
|
+
"""Generator for local MLServer deployment scripts."""
|
|
32
|
+
|
|
33
|
+
name = "local"
|
|
34
|
+
|
|
35
|
+
def generate(self, models: list[ModelInfo] | None = None) -> list[Path]:
|
|
36
|
+
"""
|
|
37
|
+
Generate local MLServer run script.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
models: Models to generate configs for (auto-detected if None)
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List of paths to generated files
|
|
44
|
+
"""
|
|
45
|
+
if models is None:
|
|
46
|
+
models = self.detect_models()
|
|
47
|
+
|
|
48
|
+
generated_files = []
|
|
49
|
+
target_dir = self.output_dir / "local"
|
|
50
|
+
|
|
51
|
+
# From deploy/local, go up two levels (../.. = project_dir), then down to models
|
|
52
|
+
models_rel_path = "../../models"
|
|
53
|
+
|
|
54
|
+
# Validate path is safe (no shell metacharacters)
|
|
55
|
+
if not re.match(r"^[a-zA-Z0-9_/.-]+$", models_rel_path):
|
|
56
|
+
raise ValueError(f"Invalid models path: {models_rel_path}")
|
|
57
|
+
|
|
58
|
+
# Generate run script
|
|
59
|
+
script_content = self.render_template(
|
|
60
|
+
"local/run-mlserver.sh.j2",
|
|
61
|
+
{
|
|
62
|
+
"models_dir": models_rel_path,
|
|
63
|
+
"models": models,
|
|
64
|
+
"port": 8080,
|
|
65
|
+
},
|
|
66
|
+
)
|
|
67
|
+
script_path = self.write_file(
|
|
68
|
+
target_dir / "run-mlserver.sh",
|
|
69
|
+
script_content,
|
|
70
|
+
executable=True,
|
|
71
|
+
)
|
|
72
|
+
generated_files.append(script_path)
|
|
73
|
+
|
|
74
|
+
# Generate README
|
|
75
|
+
readme_content = self._generate_readme(models)
|
|
76
|
+
readme_path = self.write_file(target_dir / "README.md", readme_content)
|
|
77
|
+
generated_files.append(readme_path)
|
|
78
|
+
|
|
79
|
+
return generated_files
|
|
80
|
+
|
|
81
|
+
def _generate_readme(self, models: list[ModelInfo]) -> str:
|
|
82
|
+
"""Generate README for local deployment."""
|
|
83
|
+
model_list = (
|
|
84
|
+
"\n".join(f"- {m.name}" for m in models) if models else "- (none detected)"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Extract unique runtime packages for installation instructions
|
|
88
|
+
# Runtime is typically "module_name.ClassName", we need just the module
|
|
89
|
+
runtime_packages = set()
|
|
90
|
+
for model in models:
|
|
91
|
+
if model.runtime and "." in model.runtime:
|
|
92
|
+
# Extract package name from "aissemble_inference_sumy.SumyRuntime"
|
|
93
|
+
package = model.runtime.split(".")[0]
|
|
94
|
+
# Convert underscores to hyphens for PyPI package names
|
|
95
|
+
package = package.replace("_", "-")
|
|
96
|
+
runtime_packages.add(package)
|
|
97
|
+
|
|
98
|
+
runtime_install = ""
|
|
99
|
+
if runtime_packages:
|
|
100
|
+
packages_str = " ".join(sorted(runtime_packages))
|
|
101
|
+
runtime_install = f"""
|
|
102
|
+
**Model-specific runtimes:**
|
|
103
|
+
```bash
|
|
104
|
+
pip install {packages_str}
|
|
105
|
+
```
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
return f"""# Local MLServer Deployment
|
|
109
|
+
|
|
110
|
+
This directory contains scripts for running your models locally with MLServer.
|
|
111
|
+
|
|
112
|
+
## Prerequisites
|
|
113
|
+
|
|
114
|
+
**Required:**
|
|
115
|
+
- Python 3.11+
|
|
116
|
+
- MLServer:
|
|
117
|
+
```bash
|
|
118
|
+
pip install mlserver
|
|
119
|
+
```
|
|
120
|
+
{runtime_install}
|
|
121
|
+
**Note:** The run script will check for MLServer and display installation instructions if not found.
|
|
122
|
+
|
|
123
|
+
## Models
|
|
124
|
+
|
|
125
|
+
{model_list}
|
|
126
|
+
|
|
127
|
+
## Usage
|
|
128
|
+
|
|
129
|
+
Start MLServer:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
./run-mlserver.sh
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
The server will start on http://localhost:8080
|
|
136
|
+
|
|
137
|
+
If MLServer is not installed, the script will display installation instructions.
|
|
138
|
+
|
|
139
|
+
## Testing
|
|
140
|
+
|
|
141
|
+
Once the server is running, you can test it with:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
curl -X POST http://localhost:8080/v2/models/<model-name>/infer \\
|
|
145
|
+
-H "Content-Type: application/json" \\
|
|
146
|
+
-d '{{"inputs": [...]}}'
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Stopping
|
|
150
|
+
|
|
151
|
+
Press Ctrl+C to stop the server.
|
|
152
|
+
|
|
153
|
+
## Troubleshooting
|
|
154
|
+
|
|
155
|
+
**MLServer not found:**
|
|
156
|
+
- Ensure MLServer is installed: `pip install mlserver`
|
|
157
|
+
- Check that it's in your PATH: `which mlserver`
|
|
158
|
+
|
|
159
|
+
**Import errors when starting:**
|
|
160
|
+
- Install model-specific dependencies (see Prerequisites above)
|
|
161
|
+
- Verify your Python environment matches the model requirements (Python 3.11+)
|
|
162
|
+
"""
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
###
|
|
2
|
+
# #%L
|
|
3
|
+
# aiSSEMBLE::Open Inference Protocol::Deploy
|
|
4
|
+
# %%
|
|
5
|
+
# Copyright (C) 2024 Booz Allen Hamilton Inc.
|
|
6
|
+
# %%
|
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
# you may not use this file except in compliance with the License.
|
|
9
|
+
# You may obtain a copy of the License at
|
|
10
|
+
#
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
#
|
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
# See the License for the specific language governing permissions and
|
|
17
|
+
# limitations under the License.
|
|
18
|
+
# #L%
|
|
19
|
+
###
|
|
20
|
+
"""
|
|
21
|
+
Generator registry with entry point discovery.
|
|
22
|
+
|
|
23
|
+
Allows custom generators to be registered via Python entry points,
|
|
24
|
+
enabling extensibility for air-gapped systems, custom platforms, etc.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import sys
|
|
28
|
+
import threading
|
|
29
|
+
from importlib.metadata import entry_points
|
|
30
|
+
from typing import TYPE_CHECKING
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from .generators.base import Generator
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class GeneratorRegistry:
|
|
37
|
+
"""
|
|
38
|
+
Registry for deployment generators discovered via entry points.
|
|
39
|
+
|
|
40
|
+
Generators register themselves via the 'inference.generators' entry point group:
|
|
41
|
+
|
|
42
|
+
[project.entry-points."inference.generators"]
|
|
43
|
+
openshift = "my_package:OpenShiftGenerator"
|
|
44
|
+
|
|
45
|
+
This allows custom generators to be installed as separate packages
|
|
46
|
+
without modifying the core aissemble-inference-deploy module.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
_instance: "GeneratorRegistry | None" = None
|
|
50
|
+
_lock = threading.Lock()
|
|
51
|
+
|
|
52
|
+
def __init__(self):
|
|
53
|
+
self._generators: dict[str, type["Generator"]] | None = None
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def instance(cls) -> "GeneratorRegistry":
|
|
57
|
+
"""Get the singleton registry instance (thread-safe)."""
|
|
58
|
+
if cls._instance is None:
|
|
59
|
+
with cls._lock:
|
|
60
|
+
if cls._instance is None: # Double-check locking
|
|
61
|
+
cls._instance = cls()
|
|
62
|
+
return cls._instance
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def reset(cls) -> None:
|
|
66
|
+
"""Reset the singleton instance (useful for testing)."""
|
|
67
|
+
with cls._lock:
|
|
68
|
+
cls._instance = None
|
|
69
|
+
|
|
70
|
+
def _discover_generators(self) -> dict[str, type["Generator"]]:
|
|
71
|
+
"""Discover generators from entry points."""
|
|
72
|
+
# Import here to avoid circular imports
|
|
73
|
+
from .generators.base import Generator
|
|
74
|
+
|
|
75
|
+
generators: dict[str, type["Generator"]] = {}
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
eps = entry_points(group="inference.generators")
|
|
79
|
+
except Exception as e:
|
|
80
|
+
# entry_points() failed - likely environment issue
|
|
81
|
+
print(
|
|
82
|
+
f"Warning: Failed to discover generators: {e}",
|
|
83
|
+
file=sys.stderr,
|
|
84
|
+
)
|
|
85
|
+
return generators
|
|
86
|
+
|
|
87
|
+
for ep in eps:
|
|
88
|
+
try:
|
|
89
|
+
generator_cls = ep.load()
|
|
90
|
+
|
|
91
|
+
# Validate it's actually a Generator subclass
|
|
92
|
+
if not isinstance(generator_cls, type):
|
|
93
|
+
print(
|
|
94
|
+
f"Warning: Generator '{ep.name}' is not a class: {type(generator_cls)}",
|
|
95
|
+
file=sys.stderr,
|
|
96
|
+
)
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
if not issubclass(generator_cls, Generator):
|
|
100
|
+
print(
|
|
101
|
+
f"Warning: Generator '{ep.name}' must be a subclass of Generator, got {generator_cls.__name__}",
|
|
102
|
+
file=sys.stderr,
|
|
103
|
+
)
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
generators[ep.name] = generator_cls
|
|
107
|
+
|
|
108
|
+
except ImportError as e:
|
|
109
|
+
print(
|
|
110
|
+
f"Warning: Failed to import generator '{ep.name}': {e}",
|
|
111
|
+
file=sys.stderr,
|
|
112
|
+
)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
print(
|
|
115
|
+
f"Warning: Failed to load generator '{ep.name}': {e}",
|
|
116
|
+
file=sys.stderr,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return generators
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def generators(self) -> dict[str, type["Generator"]]:
|
|
123
|
+
"""Get all available generators (lazy-loaded)."""
|
|
124
|
+
if self._generators is None:
|
|
125
|
+
self._generators = self._discover_generators()
|
|
126
|
+
return self._generators
|
|
127
|
+
|
|
128
|
+
def get(self, name: str) -> type["Generator"] | None:
|
|
129
|
+
"""Get a generator by name, or None if not found."""
|
|
130
|
+
return self.generators.get(name)
|
|
131
|
+
|
|
132
|
+
def list_available(self) -> list[str]:
|
|
133
|
+
"""List names of all available generators."""
|
|
134
|
+
return sorted(self.generators.keys())
|
|
135
|
+
|
|
136
|
+
def register(self, name: str, generator_cls: type["Generator"]) -> None:
|
|
137
|
+
"""
|
|
138
|
+
Manually register a generator (useful for testing or runtime registration).
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
name: The target name (e.g., 'docker', 'kubernetes')
|
|
142
|
+
generator_cls: The generator class
|
|
143
|
+
|
|
144
|
+
Raises:
|
|
145
|
+
TypeError: If generator_cls is not a Generator subclass
|
|
146
|
+
"""
|
|
147
|
+
from .generators.base import Generator
|
|
148
|
+
|
|
149
|
+
if not isinstance(generator_cls, type) or not issubclass(
|
|
150
|
+
generator_cls, Generator
|
|
151
|
+
):
|
|
152
|
+
raise TypeError(
|
|
153
|
+
f"generator_cls must be a subclass of Generator, got {type(generator_cls)}"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
if self._generators is None:
|
|
157
|
+
self._generators = self._discover_generators()
|
|
158
|
+
self._generators[name] = generator_cls
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Generated by aissemble-inference-deploy
|
|
2
|
+
# Files to exclude from Docker build context
|
|
3
|
+
|
|
4
|
+
# Version control
|
|
5
|
+
.git
|
|
6
|
+
.gitignore
|
|
7
|
+
|
|
8
|
+
# Python
|
|
9
|
+
__pycache__
|
|
10
|
+
*.py[cod]
|
|
11
|
+
*$py.class
|
|
12
|
+
*.so
|
|
13
|
+
.Python
|
|
14
|
+
.venv
|
|
15
|
+
venv/
|
|
16
|
+
ENV/
|
|
17
|
+
env/
|
|
18
|
+
|
|
19
|
+
# IDEs
|
|
20
|
+
.idea/
|
|
21
|
+
.vscode/
|
|
22
|
+
*.swp
|
|
23
|
+
*.swo
|
|
24
|
+
|
|
25
|
+
# Build artifacts
|
|
26
|
+
*.egg-info/
|
|
27
|
+
dist/
|
|
28
|
+
build/
|
|
29
|
+
target/
|
|
30
|
+
|
|
31
|
+
# Test artifacts
|
|
32
|
+
.pytest_cache/
|
|
33
|
+
.coverage
|
|
34
|
+
htmlcov/
|
|
35
|
+
.tox/
|
|
36
|
+
|
|
37
|
+
# Documentation
|
|
38
|
+
docs/_build/
|
|
39
|
+
*.md
|
|
40
|
+
!README.md
|
|
41
|
+
|
|
42
|
+
# Deployment configs (we only need models/)
|
|
43
|
+
deploy/
|
|
44
|
+
|
|
45
|
+
# OS files
|
|
46
|
+
.DS_Store
|
|
47
|
+
Thumbs.db
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Generated by aissemble-inference-deploy
|
|
2
|
+
# Multi-stage Dockerfile for MLServer deployment
|
|
3
|
+
{% if use_wheels %}
|
|
4
|
+
# Dev mode: Dependencies built with uv-monorepo-dependency-tool
|
|
5
|
+
{% else %}
|
|
6
|
+
# Release mode: Dependencies installed from PyPI
|
|
7
|
+
{% endif %}
|
|
8
|
+
|
|
9
|
+
# Build stage - install dependencies using uv
|
|
10
|
+
FROM python:{{ python_version }}-slim AS builder
|
|
11
|
+
|
|
12
|
+
# Install uv
|
|
13
|
+
RUN pip install --no-cache-dir uv
|
|
14
|
+
|
|
15
|
+
WORKDIR /app
|
|
16
|
+
|
|
17
|
+
{% if use_wheels %}
|
|
18
|
+
# Copy pre-built wheels with pinned dependencies (dev mode)
|
|
19
|
+
COPY deploy/docker/wheels/ /wheels/
|
|
20
|
+
|
|
21
|
+
# Create virtual environment and install wheels
|
|
22
|
+
RUN uv venv .venv && \
|
|
23
|
+
uv pip install --no-cache /wheels/*.whl
|
|
24
|
+
{% else %}
|
|
25
|
+
# Copy requirements file (release mode)
|
|
26
|
+
COPY deploy/docker/requirements.txt .
|
|
27
|
+
|
|
28
|
+
# Create virtual environment and install from PyPI
|
|
29
|
+
RUN uv venv .venv && \
|
|
30
|
+
uv pip install --no-cache -r requirements.txt
|
|
31
|
+
{% endif %}
|
|
32
|
+
|
|
33
|
+
# Runtime stage - minimal image
|
|
34
|
+
FROM python:{{ python_version }}-slim
|
|
35
|
+
|
|
36
|
+
WORKDIR /app
|
|
37
|
+
|
|
38
|
+
# Copy virtual environment from builder
|
|
39
|
+
COPY --from=builder /app/.venv /app/.venv
|
|
40
|
+
|
|
41
|
+
# Add venv to PATH
|
|
42
|
+
ENV PATH="/app/.venv/bin:$PATH"
|
|
43
|
+
|
|
44
|
+
# Copy models directory
|
|
45
|
+
COPY models/ /app/models/
|
|
46
|
+
|
|
47
|
+
# Create MLServer settings to bind to all interfaces (required for Docker)
|
|
48
|
+
# parallel_workers=0 disables multiprocessing to avoid uvloop issues
|
|
49
|
+
RUN echo '{"host": "0.0.0.0", "http_port": 8080, "grpc_port": 8081, "parallel_workers": 0}' > /app/models/settings.json
|
|
50
|
+
|
|
51
|
+
# Expose MLServer ports
|
|
52
|
+
EXPOSE 8080 8081
|
|
53
|
+
|
|
54
|
+
# Health check using MLServer's ready endpoint
|
|
55
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
|
|
56
|
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/v2/health/ready')" || exit 1
|
|
57
|
+
|
|
58
|
+
# Start MLServer
|
|
59
|
+
CMD ["mlserver", "start", "/app/models"]
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Docker Deployment
|
|
2
|
+
|
|
3
|
+
This directory contains Docker configuration for containerized deployment.
|
|
4
|
+
|
|
5
|
+
{% if use_wheels %}
|
|
6
|
+
**Mode:** Dev (local wheels)
|
|
7
|
+
{% else %}
|
|
8
|
+
**Mode:** Release (PyPI packages)
|
|
9
|
+
{% endif %}
|
|
10
|
+
|
|
11
|
+
## Prerequisites
|
|
12
|
+
|
|
13
|
+
- Docker Desktop or Rancher Desktop
|
|
14
|
+
- Docker Compose (included with Docker Desktop)
|
|
15
|
+
|
|
16
|
+
## Models
|
|
17
|
+
|
|
18
|
+
{% for model in models %}
|
|
19
|
+
- {{ model.name }}
|
|
20
|
+
{% else %}
|
|
21
|
+
- (none detected)
|
|
22
|
+
{% endfor %}
|
|
23
|
+
|
|
24
|
+
## Runtime Dependencies
|
|
25
|
+
|
|
26
|
+
{% if use_wheels %}
|
|
27
|
+
The following packages are installed in the container (from pre-built wheels):
|
|
28
|
+
|
|
29
|
+
{% for package in runtime_packages %}
|
|
30
|
+
- {{ package }}
|
|
31
|
+
{% endfor %}
|
|
32
|
+
|
|
33
|
+
The wheels in `wheels/` were built using [uv-monorepo-dependency-tool](https://github.com/TechnologyBrewery/uv-monorepo-dependency-tool)
|
|
34
|
+
to convert local path dependencies to pinned version dependencies.
|
|
35
|
+
{% else %}
|
|
36
|
+
The following packages are installed from PyPI:
|
|
37
|
+
|
|
38
|
+
{% for package in runtime_packages %}
|
|
39
|
+
- {{ package }}
|
|
40
|
+
{% endfor %}
|
|
41
|
+
{% endif %}
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
Build and start the container:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
docker-compose up --build
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
The server will be available at http://localhost:{{ http_port }}
|
|
52
|
+
|
|
53
|
+
## Usage
|
|
54
|
+
|
|
55
|
+
### Build Only
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
docker-compose build
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Start in Background
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
docker-compose up -d
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### View Logs
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
docker-compose logs -f
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Stop
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
docker-compose down
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Testing
|
|
80
|
+
|
|
81
|
+
Once the container is running, test the endpoint:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
curl -X POST http://localhost:{{ http_port }}/v2/models/<model-name>/infer \
|
|
85
|
+
-H "Content-Type: application/json" \
|
|
86
|
+
-d '{"inputs": [...]}'
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Customization
|
|
90
|
+
|
|
91
|
+
### Environment Variables
|
|
92
|
+
|
|
93
|
+
The docker-compose.yml supports these environment variables:
|
|
94
|
+
|
|
95
|
+
- `MLSERVER_HTTP_PORT`: HTTP port (default: {{ http_port }})
|
|
96
|
+
- `MLSERVER_GRPC_PORT`: gRPC port (default: {{ grpc_port }})
|
|
97
|
+
|
|
98
|
+
### Resource Limits
|
|
99
|
+
|
|
100
|
+
Edit docker-compose.yml to add resource constraints:
|
|
101
|
+
|
|
102
|
+
```yaml
|
|
103
|
+
services:
|
|
104
|
+
mlserver:
|
|
105
|
+
deploy:
|
|
106
|
+
resources:
|
|
107
|
+
limits:
|
|
108
|
+
cpus: '2'
|
|
109
|
+
memory: 4G
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
{% if use_wheels %}
|
|
113
|
+
### Rebuilding Wheels
|
|
114
|
+
|
|
115
|
+
To regenerate the wheels with updated dependencies:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
uv tool run uv-monorepo-dependency-tool build-rewrite-path-deps --version-pinning-strategy=mixed
|
|
119
|
+
cp dist/*.whl deploy/docker/wheels/
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Or re-run the generator:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
inference deploy init --target docker
|
|
126
|
+
```
|
|
127
|
+
{% else %}
|
|
128
|
+
### Updating Dependencies
|
|
129
|
+
|
|
130
|
+
Edit `requirements.txt` to change package versions, then rebuild:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
docker-compose build --no-cache
|
|
134
|
+
```
|
|
135
|
+
{% endif %}
|
|
136
|
+
|
|
137
|
+
## Production Considerations
|
|
138
|
+
|
|
139
|
+
For production deployments:
|
|
140
|
+
|
|
141
|
+
1. **Multi-architecture builds**: Use `docker buildx` for ARM/AMD64 support
|
|
142
|
+
2. **Registry**: Push to a container registry (Docker Hub, ECR, GCR, etc.)
|
|
143
|
+
3. **Secrets**: Use Docker secrets or environment variable injection
|
|
144
|
+
4. **Health checks**: The Dockerfile includes MLServer health endpoints
|
|
145
|
+
5. **Logging**: Configure structured logging for your platform
|
|
146
|
+
|
|
147
|
+
## Troubleshooting
|
|
148
|
+
|
|
149
|
+
**Build fails with package not found:**
|
|
150
|
+
{% if use_wheels %}
|
|
151
|
+
- Regenerate wheels: `inference deploy init --target docker`
|
|
152
|
+
{% else %}
|
|
153
|
+
- Ensure runtime packages are published to PyPI
|
|
154
|
+
- Check network connectivity to PyPI
|
|
155
|
+
{% endif %}
|
|
156
|
+
|
|
157
|
+
**Container exits immediately:**
|
|
158
|
+
- Check logs: `docker-compose logs`
|
|
159
|
+
- Verify model-settings.json files are valid JSON
|
|
160
|
+
|
|
161
|
+
**Port already in use:**
|
|
162
|
+
- Stop other services on port {{ http_port }}/{{ grpc_port }}
|
|
163
|
+
- Or change the ports in docker-compose.yml
|