aissemble-inference-deploy 1.5.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. aissemble_inference_deploy/__init__.py +38 -0
  2. aissemble_inference_deploy/cli.py +278 -0
  3. aissemble_inference_deploy/config.py +182 -0
  4. aissemble_inference_deploy/generators/__init__.py +36 -0
  5. aissemble_inference_deploy/generators/base.py +239 -0
  6. aissemble_inference_deploy/generators/docker.py +307 -0
  7. aissemble_inference_deploy/generators/kserve.py +89 -0
  8. aissemble_inference_deploy/generators/kubernetes.py +119 -0
  9. aissemble_inference_deploy/generators/local.py +162 -0
  10. aissemble_inference_deploy/registry.py +158 -0
  11. aissemble_inference_deploy/templates/docker/.dockerignore.j2 +47 -0
  12. aissemble_inference_deploy/templates/docker/Dockerfile.j2 +59 -0
  13. aissemble_inference_deploy/templates/docker/README.md.j2 +163 -0
  14. aissemble_inference_deploy/templates/docker/docker-compose.yml.j2 +22 -0
  15. aissemble_inference_deploy/templates/kserve/README.md.j2 +278 -0
  16. aissemble_inference_deploy/templates/kserve/inference-service.yaml.j2 +14 -0
  17. aissemble_inference_deploy/templates/kserve/serving-runtime.yaml.j2 +35 -0
  18. aissemble_inference_deploy/templates/kubernetes/README.md.j2 +164 -0
  19. aissemble_inference_deploy/templates/kubernetes/deployment.yaml.j2 +50 -0
  20. aissemble_inference_deploy/templates/kubernetes/kustomization.yaml.j2 +11 -0
  21. aissemble_inference_deploy/templates/kubernetes/overlays/dev/kustomization.yaml.j2 +52 -0
  22. aissemble_inference_deploy/templates/kubernetes/overlays/prod/kustomization.yaml.j2 +36 -0
  23. aissemble_inference_deploy/templates/kubernetes/service.yaml.j2 +19 -0
  24. aissemble_inference_deploy/templates/local/run-mlserver.sh.j2 +47 -0
  25. aissemble_inference_deploy-1.5.0rc3.dist-info/METADATA +248 -0
  26. aissemble_inference_deploy-1.5.0rc3.dist-info/RECORD +29 -0
  27. aissemble_inference_deploy-1.5.0rc3.dist-info/WHEEL +4 -0
  28. aissemble_inference_deploy-1.5.0rc3.dist-info/entry_points.txt +8 -0
  29. aissemble_inference_deploy-1.5.0rc3.dist-info/licenses/LICENSE.txt +201 -0
@@ -0,0 +1,119 @@
1
+ ###
2
+ # #%L
3
+ # aiSSEMBLE::Open Inference Protocol::Deploy
4
+ # %%
5
+ # Copyright (C) 2024 Booz Allen Hamilton Inc.
6
+ # %%
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ # #L%
19
+ ###
20
+ """
21
+ Kubernetes deployment generator.
22
+
23
+ Generates Kubernetes manifests with Kustomize structure for MLServer deployment.
24
+ Includes base manifests and dev/prod overlays for environment-specific configuration.
25
+ """
26
+
27
+ from pathlib import Path
28
+
29
+ from .base import Generator, ModelInfo
30
+
31
+
32
+ class KubernetesGenerator(Generator):
33
+ """Generator for Kubernetes deployment configurations using Kustomize."""
34
+
35
+ name = "kubernetes"
36
+
37
+ def generate(self, models: list[ModelInfo] | None = None) -> list[Path]:
38
+ """
39
+ Generate Kubernetes deployment configs with Kustomize structure.
40
+
41
+ Args:
42
+ models: Models to generate configs for (auto-detected if None)
43
+
44
+ Returns:
45
+ List of paths to generated files
46
+ """
47
+ if models is None:
48
+ models = self.detect_models()
49
+
50
+ generated_files = []
51
+ target_dir = self.output_dir / "kubernetes"
52
+
53
+ # Extract runtime packages for documentation
54
+ runtime_packages = self.extract_runtime_packages(models)
55
+
56
+ # Get image name (consistent with Docker generator)
57
+ image_name = self.get_image_name()
58
+
59
+ # Common template context
60
+ context = {
61
+ "models": models,
62
+ "runtime_packages": runtime_packages,
63
+ "image_name": image_name,
64
+ "app_name": image_name,
65
+ "http_port": 8080,
66
+ "grpc_port": 8081,
67
+ "node_port_http": 30080,
68
+ "node_port_grpc": 30081,
69
+ }
70
+
71
+ # Generate base manifests
72
+ base_dir = target_dir / "base"
73
+
74
+ deployment_content = self.render_template(
75
+ "kubernetes/deployment.yaml.j2", context
76
+ )
77
+ deployment_path = self.write_file(
78
+ base_dir / "deployment.yaml", deployment_content
79
+ )
80
+ generated_files.append(deployment_path)
81
+
82
+ service_content = self.render_template("kubernetes/service.yaml.j2", context)
83
+ service_path = self.write_file(base_dir / "service.yaml", service_content)
84
+ generated_files.append(service_path)
85
+
86
+ base_kustomization_content = self.render_template(
87
+ "kubernetes/kustomization.yaml.j2", context
88
+ )
89
+ base_kustomization_path = self.write_file(
90
+ base_dir / "kustomization.yaml", base_kustomization_content
91
+ )
92
+ generated_files.append(base_kustomization_path)
93
+
94
+ # Generate dev overlay
95
+ dev_dir = target_dir / "overlays" / "dev"
96
+ dev_kustomization_content = self.render_template(
97
+ "kubernetes/overlays/dev/kustomization.yaml.j2", context
98
+ )
99
+ dev_kustomization_path = self.write_file(
100
+ dev_dir / "kustomization.yaml", dev_kustomization_content
101
+ )
102
+ generated_files.append(dev_kustomization_path)
103
+
104
+ # Generate prod overlay
105
+ prod_dir = target_dir / "overlays" / "prod"
106
+ prod_kustomization_content = self.render_template(
107
+ "kubernetes/overlays/prod/kustomization.yaml.j2", context
108
+ )
109
+ prod_kustomization_path = self.write_file(
110
+ prod_dir / "kustomization.yaml", prod_kustomization_content
111
+ )
112
+ generated_files.append(prod_kustomization_path)
113
+
114
+ # Generate README
115
+ readme_content = self.render_template("kubernetes/README.md.j2", context)
116
+ readme_path = self.write_file(target_dir / "README.md", readme_content)
117
+ generated_files.append(readme_path)
118
+
119
+ return generated_files
@@ -0,0 +1,162 @@
1
+ ###
2
+ # #%L
3
+ # aiSSEMBLE::Open Inference Protocol::Deploy
4
+ # %%
5
+ # Copyright (C) 2024 Booz Allen Hamilton Inc.
6
+ # %%
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ # #L%
19
+ ###
20
+ """
21
+ Local MLServer deployment generator.
22
+ """
23
+
24
+ import re
25
+ from pathlib import Path
26
+
27
+ from .base import Generator, ModelInfo
28
+
29
+
30
+ class LocalGenerator(Generator):
31
+ """Generator for local MLServer deployment scripts."""
32
+
33
+ name = "local"
34
+
35
+ def generate(self, models: list[ModelInfo] | None = None) -> list[Path]:
36
+ """
37
+ Generate local MLServer run script.
38
+
39
+ Args:
40
+ models: Models to generate configs for (auto-detected if None)
41
+
42
+ Returns:
43
+ List of paths to generated files
44
+ """
45
+ if models is None:
46
+ models = self.detect_models()
47
+
48
+ generated_files = []
49
+ target_dir = self.output_dir / "local"
50
+
51
+ # From deploy/local, go up two levels (../.. = project_dir), then down to models
52
+ models_rel_path = "../../models"
53
+
54
+ # Validate path is safe (no shell metacharacters)
55
+ if not re.match(r"^[a-zA-Z0-9_/.-]+$", models_rel_path):
56
+ raise ValueError(f"Invalid models path: {models_rel_path}")
57
+
58
+ # Generate run script
59
+ script_content = self.render_template(
60
+ "local/run-mlserver.sh.j2",
61
+ {
62
+ "models_dir": models_rel_path,
63
+ "models": models,
64
+ "port": 8080,
65
+ },
66
+ )
67
+ script_path = self.write_file(
68
+ target_dir / "run-mlserver.sh",
69
+ script_content,
70
+ executable=True,
71
+ )
72
+ generated_files.append(script_path)
73
+
74
+ # Generate README
75
+ readme_content = self._generate_readme(models)
76
+ readme_path = self.write_file(target_dir / "README.md", readme_content)
77
+ generated_files.append(readme_path)
78
+
79
+ return generated_files
80
+
81
+ def _generate_readme(self, models: list[ModelInfo]) -> str:
82
+ """Generate README for local deployment."""
83
+ model_list = (
84
+ "\n".join(f"- {m.name}" for m in models) if models else "- (none detected)"
85
+ )
86
+
87
+ # Extract unique runtime packages for installation instructions
88
+ # Runtime is typically "module_name.ClassName", we need just the module
89
+ runtime_packages = set()
90
+ for model in models:
91
+ if model.runtime and "." in model.runtime:
92
+ # Extract package name from "aissemble_inference_sumy.SumyRuntime"
93
+ package = model.runtime.split(".")[0]
94
+ # Convert underscores to hyphens for PyPI package names
95
+ package = package.replace("_", "-")
96
+ runtime_packages.add(package)
97
+
98
+ runtime_install = ""
99
+ if runtime_packages:
100
+ packages_str = " ".join(sorted(runtime_packages))
101
+ runtime_install = f"""
102
+ **Model-specific runtimes:**
103
+ ```bash
104
+ pip install {packages_str}
105
+ ```
106
+ """
107
+
108
+ return f"""# Local MLServer Deployment
109
+
110
+ This directory contains scripts for running your models locally with MLServer.
111
+
112
+ ## Prerequisites
113
+
114
+ **Required:**
115
+ - Python 3.11+
116
+ - MLServer:
117
+ ```bash
118
+ pip install mlserver
119
+ ```
120
+ {runtime_install}
121
+ **Note:** The run script will check for MLServer and display installation instructions if not found.
122
+
123
+ ## Models
124
+
125
+ {model_list}
126
+
127
+ ## Usage
128
+
129
+ Start MLServer:
130
+
131
+ ```bash
132
+ ./run-mlserver.sh
133
+ ```
134
+
135
+ The server will start on http://localhost:8080
136
+
137
+ If MLServer is not installed, the script will display installation instructions.
138
+
139
+ ## Testing
140
+
141
+ Once the server is running, you can test it with:
142
+
143
+ ```bash
144
+ curl -X POST http://localhost:8080/v2/models/<model-name>/infer \\
145
+ -H "Content-Type: application/json" \\
146
+ -d '{{"inputs": [...]}}'
147
+ ```
148
+
149
+ ## Stopping
150
+
151
+ Press Ctrl+C to stop the server.
152
+
153
+ ## Troubleshooting
154
+
155
+ **MLServer not found:**
156
+ - Ensure MLServer is installed: `pip install mlserver`
157
+ - Check that it's in your PATH: `which mlserver`
158
+
159
+ **Import errors when starting:**
160
+ - Install model-specific dependencies (see Prerequisites above)
161
+ - Verify your Python environment matches the model requirements (Python 3.11+)
162
+ """
@@ -0,0 +1,158 @@
1
+ ###
2
+ # #%L
3
+ # aiSSEMBLE::Open Inference Protocol::Deploy
4
+ # %%
5
+ # Copyright (C) 2024 Booz Allen Hamilton Inc.
6
+ # %%
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ # #L%
19
+ ###
20
+ """
21
+ Generator registry with entry point discovery.
22
+
23
+ Allows custom generators to be registered via Python entry points,
24
+ enabling extensibility for air-gapped systems, custom platforms, etc.
25
+ """
26
+
27
+ import sys
28
+ import threading
29
+ from importlib.metadata import entry_points
30
+ from typing import TYPE_CHECKING
31
+
32
+ if TYPE_CHECKING:
33
+ from .generators.base import Generator
34
+
35
+
36
+ class GeneratorRegistry:
37
+ """
38
+ Registry for deployment generators discovered via entry points.
39
+
40
+ Generators register themselves via the 'inference.generators' entry point group:
41
+
42
+ [project.entry-points."inference.generators"]
43
+ openshift = "my_package:OpenShiftGenerator"
44
+
45
+ This allows custom generators to be installed as separate packages
46
+ without modifying the core aissemble-inference-deploy module.
47
+ """
48
+
49
+ _instance: "GeneratorRegistry | None" = None
50
+ _lock = threading.Lock()
51
+
52
+ def __init__(self):
53
+ self._generators: dict[str, type["Generator"]] | None = None
54
+
55
+ @classmethod
56
+ def instance(cls) -> "GeneratorRegistry":
57
+ """Get the singleton registry instance (thread-safe)."""
58
+ if cls._instance is None:
59
+ with cls._lock:
60
+ if cls._instance is None: # Double-check locking
61
+ cls._instance = cls()
62
+ return cls._instance
63
+
64
+ @classmethod
65
+ def reset(cls) -> None:
66
+ """Reset the singleton instance (useful for testing)."""
67
+ with cls._lock:
68
+ cls._instance = None
69
+
70
+ def _discover_generators(self) -> dict[str, type["Generator"]]:
71
+ """Discover generators from entry points."""
72
+ # Import here to avoid circular imports
73
+ from .generators.base import Generator
74
+
75
+ generators: dict[str, type["Generator"]] = {}
76
+
77
+ try:
78
+ eps = entry_points(group="inference.generators")
79
+ except Exception as e:
80
+ # entry_points() failed - likely environment issue
81
+ print(
82
+ f"Warning: Failed to discover generators: {e}",
83
+ file=sys.stderr,
84
+ )
85
+ return generators
86
+
87
+ for ep in eps:
88
+ try:
89
+ generator_cls = ep.load()
90
+
91
+ # Validate it's actually a Generator subclass
92
+ if not isinstance(generator_cls, type):
93
+ print(
94
+ f"Warning: Generator '{ep.name}' is not a class: {type(generator_cls)}",
95
+ file=sys.stderr,
96
+ )
97
+ continue
98
+
99
+ if not issubclass(generator_cls, Generator):
100
+ print(
101
+ f"Warning: Generator '{ep.name}' must be a subclass of Generator, got {generator_cls.__name__}",
102
+ file=sys.stderr,
103
+ )
104
+ continue
105
+
106
+ generators[ep.name] = generator_cls
107
+
108
+ except ImportError as e:
109
+ print(
110
+ f"Warning: Failed to import generator '{ep.name}': {e}",
111
+ file=sys.stderr,
112
+ )
113
+ except Exception as e:
114
+ print(
115
+ f"Warning: Failed to load generator '{ep.name}': {e}",
116
+ file=sys.stderr,
117
+ )
118
+
119
+ return generators
120
+
121
+ @property
122
+ def generators(self) -> dict[str, type["Generator"]]:
123
+ """Get all available generators (lazy-loaded)."""
124
+ if self._generators is None:
125
+ self._generators = self._discover_generators()
126
+ return self._generators
127
+
128
+ def get(self, name: str) -> type["Generator"] | None:
129
+ """Get a generator by name, or None if not found."""
130
+ return self.generators.get(name)
131
+
132
+ def list_available(self) -> list[str]:
133
+ """List names of all available generators."""
134
+ return sorted(self.generators.keys())
135
+
136
+ def register(self, name: str, generator_cls: type["Generator"]) -> None:
137
+ """
138
+ Manually register a generator (useful for testing or runtime registration).
139
+
140
+ Args:
141
+ name: The target name (e.g., 'docker', 'kubernetes')
142
+ generator_cls: The generator class
143
+
144
+ Raises:
145
+ TypeError: If generator_cls is not a Generator subclass
146
+ """
147
+ from .generators.base import Generator
148
+
149
+ if not isinstance(generator_cls, type) or not issubclass(
150
+ generator_cls, Generator
151
+ ):
152
+ raise TypeError(
153
+ f"generator_cls must be a subclass of Generator, got {type(generator_cls)}"
154
+ )
155
+
156
+ if self._generators is None:
157
+ self._generators = self._discover_generators()
158
+ self._generators[name] = generator_cls
@@ -0,0 +1,47 @@
1
+ # Generated by aissemble-inference-deploy
2
+ # Files to exclude from Docker build context
3
+
4
+ # Version control
5
+ .git
6
+ .gitignore
7
+
8
+ # Python
9
+ __pycache__
10
+ *.py[cod]
11
+ *$py.class
12
+ *.so
13
+ .Python
14
+ .venv
15
+ venv/
16
+ ENV/
17
+ env/
18
+
19
+ # IDEs
20
+ .idea/
21
+ .vscode/
22
+ *.swp
23
+ *.swo
24
+
25
+ # Build artifacts
26
+ *.egg-info/
27
+ dist/
28
+ build/
29
+ target/
30
+
31
+ # Test artifacts
32
+ .pytest_cache/
33
+ .coverage
34
+ htmlcov/
35
+ .tox/
36
+
37
+ # Documentation
38
+ docs/_build/
39
+ *.md
40
+ !README.md
41
+
42
+ # Deployment configs (we only need models/)
43
+ deploy/
44
+
45
+ # OS files
46
+ .DS_Store
47
+ Thumbs.db
@@ -0,0 +1,59 @@
1
+ # Generated by aissemble-inference-deploy
2
+ # Multi-stage Dockerfile for MLServer deployment
3
+ {% if use_wheels %}
4
+ # Dev mode: Dependencies built with uv-monorepo-dependency-tool
5
+ {% else %}
6
+ # Release mode: Dependencies installed from PyPI
7
+ {% endif %}
8
+
9
+ # Build stage - install dependencies using uv
10
+ FROM python:{{ python_version }}-slim AS builder
11
+
12
+ # Install uv
13
+ RUN pip install --no-cache-dir uv
14
+
15
+ WORKDIR /app
16
+
17
+ {% if use_wheels %}
18
+ # Copy pre-built wheels with pinned dependencies (dev mode)
19
+ COPY deploy/docker/wheels/ /wheels/
20
+
21
+ # Create virtual environment and install wheels
22
+ RUN uv venv .venv && \
23
+ uv pip install --no-cache /wheels/*.whl
24
+ {% else %}
25
+ # Copy requirements file (release mode)
26
+ COPY deploy/docker/requirements.txt .
27
+
28
+ # Create virtual environment and install from PyPI
29
+ RUN uv venv .venv && \
30
+ uv pip install --no-cache -r requirements.txt
31
+ {% endif %}
32
+
33
+ # Runtime stage - minimal image
34
+ FROM python:{{ python_version }}-slim
35
+
36
+ WORKDIR /app
37
+
38
+ # Copy virtual environment from builder
39
+ COPY --from=builder /app/.venv /app/.venv
40
+
41
+ # Add venv to PATH
42
+ ENV PATH="/app/.venv/bin:$PATH"
43
+
44
+ # Copy models directory
45
+ COPY models/ /app/models/
46
+
47
+ # Create MLServer settings to bind to all interfaces (required for Docker)
48
+ # parallel_workers=0 disables multiprocessing to avoid uvloop issues
49
+ RUN echo '{"host": "0.0.0.0", "http_port": 8080, "grpc_port": 8081, "parallel_workers": 0}' > /app/models/settings.json
50
+
51
+ # Expose MLServer ports
52
+ EXPOSE 8080 8081
53
+
54
+ # Health check using MLServer's ready endpoint
55
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
56
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/v2/health/ready')" || exit 1
57
+
58
+ # Start MLServer
59
+ CMD ["mlserver", "start", "/app/models"]
@@ -0,0 +1,163 @@
1
+ # Docker Deployment
2
+
3
+ This directory contains Docker configuration for containerized deployment.
4
+
5
+ {% if use_wheels %}
6
+ **Mode:** Dev (local wheels)
7
+ {% else %}
8
+ **Mode:** Release (PyPI packages)
9
+ {% endif %}
10
+
11
+ ## Prerequisites
12
+
13
+ - Docker Desktop or Rancher Desktop
14
+ - Docker Compose (included with Docker Desktop)
15
+
16
+ ## Models
17
+
18
+ {% for model in models %}
19
+ - {{ model.name }}
20
+ {% else %}
21
+ - (none detected)
22
+ {% endfor %}
23
+
24
+ ## Runtime Dependencies
25
+
26
+ {% if use_wheels %}
27
+ The following packages are installed in the container (from pre-built wheels):
28
+
29
+ {% for package in runtime_packages %}
30
+ - {{ package }}
31
+ {% endfor %}
32
+
33
+ The wheels in `wheels/` were built using [uv-monorepo-dependency-tool](https://github.com/TechnologyBrewery/uv-monorepo-dependency-tool)
34
+ to convert local path dependencies to pinned version dependencies.
35
+ {% else %}
36
+ The following packages are installed from PyPI:
37
+
38
+ {% for package in runtime_packages %}
39
+ - {{ package }}
40
+ {% endfor %}
41
+ {% endif %}
42
+
43
+ ## Quick Start
44
+
45
+ Build and start the container:
46
+
47
+ ```bash
48
+ docker-compose up --build
49
+ ```
50
+
51
+ The server will be available at http://localhost:{{ http_port }}
52
+
53
+ ## Usage
54
+
55
+ ### Build Only
56
+
57
+ ```bash
58
+ docker-compose build
59
+ ```
60
+
61
+ ### Start in Background
62
+
63
+ ```bash
64
+ docker-compose up -d
65
+ ```
66
+
67
+ ### View Logs
68
+
69
+ ```bash
70
+ docker-compose logs -f
71
+ ```
72
+
73
+ ### Stop
74
+
75
+ ```bash
76
+ docker-compose down
77
+ ```
78
+
79
+ ## Testing
80
+
81
+ Once the container is running, test the endpoint:
82
+
83
+ ```bash
84
+ curl -X POST http://localhost:{{ http_port }}/v2/models/<model-name>/infer \
85
+ -H "Content-Type: application/json" \
86
+ -d '{"inputs": [...]}'
87
+ ```
88
+
89
+ ## Customization
90
+
91
+ ### Environment Variables
92
+
93
+ The docker-compose.yml supports these environment variables:
94
+
95
+ - `MLSERVER_HTTP_PORT`: HTTP port (default: {{ http_port }})
96
+ - `MLSERVER_GRPC_PORT`: gRPC port (default: {{ grpc_port }})
97
+
98
+ ### Resource Limits
99
+
100
+ Edit docker-compose.yml to add resource constraints:
101
+
102
+ ```yaml
103
+ services:
104
+ mlserver:
105
+ deploy:
106
+ resources:
107
+ limits:
108
+ cpus: '2'
109
+ memory: 4G
110
+ ```
111
+
112
+ {% if use_wheels %}
113
+ ### Rebuilding Wheels
114
+
115
+ To regenerate the wheels with updated dependencies:
116
+
117
+ ```bash
118
+ uv tool run uv-monorepo-dependency-tool build-rewrite-path-deps --version-pinning-strategy=mixed
119
+ cp dist/*.whl deploy/docker/wheels/
120
+ ```
121
+
122
+ Or re-run the generator:
123
+
124
+ ```bash
125
+ inference deploy init --target docker
126
+ ```
127
+ {% else %}
128
+ ### Updating Dependencies
129
+
130
+ Edit `requirements.txt` to change package versions, then rebuild:
131
+
132
+ ```bash
133
+ docker-compose build --no-cache
134
+ ```
135
+ {% endif %}
136
+
137
+ ## Production Considerations
138
+
139
+ For production deployments:
140
+
141
+ 1. **Multi-architecture builds**: Use `docker buildx` for ARM/AMD64 support
142
+ 2. **Registry**: Push to a container registry (Docker Hub, ECR, GCR, etc.)
143
+ 3. **Secrets**: Use Docker secrets or environment variable injection
144
+ 4. **Health checks**: The Dockerfile includes MLServer health endpoints
145
+ 5. **Logging**: Configure structured logging for your platform
146
+
147
+ ## Troubleshooting
148
+
149
+ **Build fails with package not found:**
150
+ {% if use_wheels %}
151
+ - Regenerate wheels: `inference deploy init --target docker`
152
+ {% else %}
153
+ - Ensure runtime packages are published to PyPI
154
+ - Check network connectivity to PyPI
155
+ {% endif %}
156
+
157
+ **Container exits immediately:**
158
+ - Check logs: `docker-compose logs`
159
+ - Verify model-settings.json files are valid JSON
160
+
161
+ **Port already in use:**
162
+ - Stop other services on port {{ http_port }}/{{ grpc_port }}
163
+ - Or change the ports in docker-compose.yml