anydeploy 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anydeploy-0.2.2 → anydeploy-0.2.4}/.github/workflows/ci.yml +1 -1
- anydeploy-0.2.4/PKG-INFO +241 -0
- anydeploy-0.2.4/README.md +190 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/pyproject.toml +2 -1
- anydeploy-0.2.2/PKG-INFO +0 -234
- anydeploy-0.2.2/README.md +0 -184
- {anydeploy-0.2.2 → anydeploy-0.2.4}/CHANGELOG.md +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/CONTRIBUTING.md +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/LICENSE +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/logo.svg +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/__init__.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/benchmark.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/cli.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/config.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/export/__init__.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/export/base.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/export/ncnn.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/export/onnx.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/export/tflite.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/export/torchscript.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/mcp.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/py.typed +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/serve/__init__.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/serve/base.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/serve/docker.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/serve/fastapi.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/serve/llamacpp.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/serve/mcp.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/utils.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/src/anydeploy/validate.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/__init__.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_benchmark.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_cli.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_config.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_docker.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_edge_cases.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_export.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_fastapi.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_mcp.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_profiles.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_real_world.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_security.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_serve.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_utils.py +0 -0
- {anydeploy-0.2.2 → anydeploy-0.2.4}/tests/test_validate.py +0 -0
anydeploy-0.2.4/PKG-INFO
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: anydeploy
|
|
3
|
+
Version: 0.2.4
|
|
4
|
+
Summary: CLI tool and library to export ML models to production formats and containerize them with Docker
|
|
5
|
+
Project-URL: Homepage, https://www.nrl.ai
|
|
6
|
+
Project-URL: Repository, https://github.com/vietanhdev/anydeploy
|
|
7
|
+
Project-URL: Documentation, https://github.com/vietanhdev/anydeploy#readme
|
|
8
|
+
Project-URL: Issues, https://github.com/vietanhdev/anydeploy/issues
|
|
9
|
+
Author-email: Viet-Anh Nguyen <vietanh.dev@gmail.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: deployment,docker,machine-learning,model-serving,onnx,tflite,torchscript
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Requires-Python: >=3.8
|
|
26
|
+
Requires-Dist: click>=8.0
|
|
27
|
+
Requires-Dist: numpy>=1.20
|
|
28
|
+
Requires-Dist: pyyaml>=6.0
|
|
29
|
+
Provides-Extra: all
|
|
30
|
+
Requires-Dist: fastapi>=0.68; extra == 'all'
|
|
31
|
+
Requires-Dist: onnx>=1.10; extra == 'all'
|
|
32
|
+
Requires-Dist: onnxruntime>=1.10; extra == 'all'
|
|
33
|
+
Requires-Dist: tensorflow>=2.5; extra == 'all'
|
|
34
|
+
Requires-Dist: torch>=1.9; extra == 'all'
|
|
35
|
+
Requires-Dist: uvicorn>=0.15; extra == 'all'
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
40
|
+
Provides-Extra: onnx
|
|
41
|
+
Requires-Dist: onnx>=1.10; extra == 'onnx'
|
|
42
|
+
Requires-Dist: onnxruntime>=1.10; extra == 'onnx'
|
|
43
|
+
Provides-Extra: serve
|
|
44
|
+
Requires-Dist: fastapi>=0.68; extra == 'serve'
|
|
45
|
+
Requires-Dist: uvicorn>=0.15; extra == 'serve'
|
|
46
|
+
Provides-Extra: tflite
|
|
47
|
+
Requires-Dist: tensorflow>=2.5; extra == 'tflite'
|
|
48
|
+
Provides-Extra: torch
|
|
49
|
+
Requires-Dist: torch>=1.9; extra == 'torch'
|
|
50
|
+
Description-Content-Type: text/markdown
|
|
51
|
+
|
|
52
|
+
<h1 align="center">anydeploy</h1>
|
|
53
|
+
<p align="center"><em>Export, serve, and containerize any ML model — plus auto-generate MCP servers for AI agents.</em></p>
|
|
54
|
+
|
|
55
|
+
<p align="center">
|
|
56
|
+
<img src="https://img.shields.io/pypi/v/anydeploy.svg" alt="PyPI">
|
|
57
|
+
<img src="https://img.shields.io/pypi/pyversions/anydeploy.svg" alt="Python">
|
|
58
|
+
<img src="https://img.shields.io/pypi/l/anydeploy.svg" alt="License">
|
|
59
|
+
</p>
|
|
60
|
+
|
|
61
|
+
**anydeploy** is the last-mile deployment toolkit for ML models. It exports PyTorch or sklearn models to ONNX, TorchScript, or TFLite with smart defaults; generates a FastAPI server with health checks and OpenAPI docs; auto-creates a Model Context Protocol (MCP) server so any AI agent (Claude Desktop, Continue, Cursor) can call your model as a tool; and produces Dockerfiles + requirements files for reproducible deployment. Three deployment profiles (`edge`, `balanced`, `quality`) pick quantization and precision for you.
|
|
62
|
+
|
|
63
|
+
Built by [Viet-Anh Nguyen](https://github.com/vietanhdev) at [NRL.ai](https://www.nrl.ai).
|
|
64
|
+
|
|
65
|
+
## Why anydeploy?
|
|
66
|
+
|
|
67
|
+
- **One-liner API** — `anydeploy.export(model, "onnx")` handles shape inference, opset, and validation
|
|
68
|
+
- **Plugin architecture** — Register custom exporters, servers, or container targets
|
|
69
|
+
- **Local-first** — Everything runs on your machine; no cloud account needed
|
|
70
|
+
- **Minimal core deps** — Base install has zero heavy deps; torch/tf are optional
|
|
71
|
+
- **Production-ready** — MCP integration, FastAPI generation, Dockerfile scaffolding
|
|
72
|
+
|
|
73
|
+
## Installation
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install anydeploy
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
For optional features:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install anydeploy[onnx] # ONNX export + onnxruntime verification
|
|
83
|
+
pip install anydeploy[torch] # TorchScript export
|
|
84
|
+
pip install anydeploy[tflite] # TFLite conversion
|
|
85
|
+
pip install anydeploy[serve] # FastAPI + uvicorn server
|
|
86
|
+
pip install anydeploy[mcp] # Model Context Protocol server generation
|
|
87
|
+
pip install anydeploy[all] # everything
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Python 3.8+ supported** (tested on 3.8, 3.9, 3.10, 3.11, 3.12, 3.13)
|
|
91
|
+
|
|
92
|
+
## Quick Start
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
import anydeploy
|
|
96
|
+
import torch
|
|
97
|
+
|
|
98
|
+
model = torch.load("resnet50.pt").eval()
|
|
99
|
+
|
|
100
|
+
# 1. Export to ONNX with smart defaults (opset, dynamic axes, validation)
|
|
101
|
+
anydeploy.export(
|
|
102
|
+
model,
|
|
103
|
+
format="onnx",
|
|
104
|
+
out="resnet50.onnx",
|
|
105
|
+
example_input=torch.randn(1, 3, 224, 224),
|
|
106
|
+
profile="balanced", # edge | balanced | quality
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# 2. Generate a FastAPI server with health check + OpenAPI docs
|
|
110
|
+
anydeploy.serve("resnet50.onnx", host="0.0.0.0", port=8000)
|
|
111
|
+
|
|
112
|
+
# 3. Generate an MCP server so Claude Desktop / Cursor can call the model
|
|
113
|
+
anydeploy.mcp("resnet50.onnx", out="my_mcp_server/", name="image-classifier")
|
|
114
|
+
|
|
115
|
+
# 4. Generate a Dockerfile + requirements.txt for reproducible deployment
|
|
116
|
+
anydeploy.containerize("resnet50.onnx", out="docker/", base="python:3.11-slim")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Models & Methods
|
|
120
|
+
|
|
121
|
+
### Export formats
|
|
122
|
+
|
|
123
|
+
| Format | How it works | Notes |
|
|
124
|
+
|---|---|---|
|
|
125
|
+
| **ONNX** | `torch.onnx.export` with auto-derived dynamic axes + opset 17 defaults | Validates via onnxruntime after export |
|
|
126
|
+
| **TorchScript** | `torch.jit.trace` (default) or `torch.jit.script` | Python-free runtime |
|
|
127
|
+
| **TFLite** | `torch -> onnx -> tf -> tflite` via onnx-tf + TensorFlow converter | Mobile / embedded |
|
|
128
|
+
|
|
129
|
+
All exports include automatic **shape inference**, **input/output naming**, and a **round-trip validation step** that runs a dummy input through both the original and the exported model and compares outputs.
|
|
130
|
+
|
|
131
|
+
### Deployment profiles
|
|
132
|
+
|
|
133
|
+
| Profile | Precision | Quantization | Intended target |
|
|
134
|
+
|---|---|---|---|
|
|
135
|
+
| `edge` | int8 | Post-training static quantization | Raspberry Pi, phones, MCUs |
|
|
136
|
+
| `balanced` (default) | fp16 | Optional fp16 conversion | Laptop / workstation CPU |
|
|
137
|
+
| `quality` | fp32 | None | Server / GPU inference |
|
|
138
|
+
|
|
139
|
+
### FastAPI server generation
|
|
140
|
+
|
|
141
|
+
`anydeploy.serve(model_path)` generates and launches a FastAPI app with:
|
|
142
|
+
|
|
143
|
+
- `POST /predict` — accepts JSON or multipart image upload
|
|
144
|
+
- `GET /health` — liveness check
|
|
145
|
+
- `GET /docs` — interactive OpenAPI UI (Swagger)
|
|
146
|
+
- Automatic request/response Pydantic schemas inferred from the model's input/output shapes
|
|
147
|
+
- Optional batching, CORS, and API-key authentication
|
|
148
|
+
|
|
149
|
+
### MCP (Model Context Protocol) server generation
|
|
150
|
+
|
|
151
|
+
`anydeploy.mcp(model_path, name=...)` generates a complete MCP server implementation that exposes your model as an AI-callable tool. Any MCP-compatible client — **Claude Desktop**, **Cursor**, **Continue**, **Zed** — can then invoke your model via natural language.
|
|
152
|
+
|
|
153
|
+
The generated server:
|
|
154
|
+
|
|
155
|
+
- Exposes a `run_model` tool with a JSON schema derived from model inputs
|
|
156
|
+
- Handles image decoding, tensor conversion, and postprocessing
|
|
157
|
+
- Ships with a `claude_desktop_config.json` snippet ready to copy
|
|
158
|
+
|
|
159
|
+
### Containerization
|
|
160
|
+
|
|
161
|
+
`anydeploy.containerize(model_path)` generates:
|
|
162
|
+
|
|
163
|
+
- `Dockerfile` — minimal base image (python-slim by default) with only the runtime dependencies your model needs
|
|
164
|
+
- `requirements.txt` — pinned versions discovered from the export step
|
|
165
|
+
- `.dockerignore` — sensible defaults
|
|
166
|
+
- `docker-compose.yml` (optional) — for multi-container setups
|
|
167
|
+
|
|
168
|
+
## API Reference
|
|
169
|
+
|
|
170
|
+
| Function | Purpose |
|
|
171
|
+
|---|---|
|
|
172
|
+
| `anydeploy.export(model, format, out, **opts)` | Export to ONNX/TorchScript/TFLite |
|
|
173
|
+
| `anydeploy.serve(model_path, host, port)` | Launch a FastAPI server |
|
|
174
|
+
| `anydeploy.generate_server(model_path, out)` | Generate FastAPI code to disk |
|
|
175
|
+
| `anydeploy.mcp(model_path, out, name)` | Generate an MCP tool server |
|
|
176
|
+
| `anydeploy.containerize(model_path, out)` | Generate Dockerfile + requirements |
|
|
177
|
+
| `anydeploy.quantize(model_path, mode="int8")` | Post-training quantization |
|
|
178
|
+
| `anydeploy.benchmark(model_path)` | Measure latency + throughput |
|
|
179
|
+
|
|
180
|
+
## CLI Usage
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
# Export
|
|
184
|
+
anydeploy export model.pt --format onnx --out model.onnx --profile edge
|
|
185
|
+
|
|
186
|
+
# Serve
|
|
187
|
+
anydeploy serve model.onnx --port 8000
|
|
188
|
+
|
|
189
|
+
# Generate MCP server
|
|
190
|
+
anydeploy mcp model.onnx --out mcp_server/ --name my-model
|
|
191
|
+
|
|
192
|
+
# Containerize
|
|
193
|
+
anydeploy containerize model.onnx --out docker/
|
|
194
|
+
|
|
195
|
+
# Benchmark
|
|
196
|
+
anydeploy benchmark model.onnx --runs 100
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Examples
|
|
200
|
+
|
|
201
|
+
### Train with traincv, deploy with anydeploy
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
import traincv, anydeploy
|
|
205
|
+
|
|
206
|
+
# Train a YOLOv8 detector
|
|
207
|
+
run = traincv.train("datasets/pets/", task="detect", model="yolov8n", epochs=50)
|
|
208
|
+
|
|
209
|
+
# Export to ONNX, edge-quantized
|
|
210
|
+
anydeploy.export(run.weights_path, format="onnx",
|
|
211
|
+
out="pets.onnx", profile="edge")
|
|
212
|
+
|
|
213
|
+
# Expose as an MCP tool for Claude Desktop
|
|
214
|
+
anydeploy.mcp("pets.onnx", out="pets_mcp/", name="pet-detector")
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Auto-generate a Docker image and run it
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
import anydeploy
|
|
221
|
+
|
|
222
|
+
anydeploy.containerize("model.onnx", out="deploy/")
|
|
223
|
+
|
|
224
|
+
# Then:
|
|
225
|
+
# cd deploy && docker build -t my-model .
|
|
226
|
+
# docker run -p 8000:8000 my-model
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Benchmark before and after quantization
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
import anydeploy
|
|
233
|
+
|
|
234
|
+
print(anydeploy.benchmark("model.onnx")) # fp32 baseline
|
|
235
|
+
anydeploy.quantize("model.onnx", mode="int8", out="model_int8.onnx")
|
|
236
|
+
print(anydeploy.benchmark("model_int8.onnx")) # int8 quantized
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## License
|
|
240
|
+
|
|
241
|
+
MIT (c) Viet-Anh Nguyen
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
<h1 align="center">anydeploy</h1>
|
|
2
|
+
<p align="center"><em>Export, serve, and containerize any ML model — plus auto-generate MCP servers for AI agents.</em></p>
|
|
3
|
+
|
|
4
|
+
<p align="center">
|
|
5
|
+
<img src="https://img.shields.io/pypi/v/anydeploy.svg" alt="PyPI">
|
|
6
|
+
<img src="https://img.shields.io/pypi/pyversions/anydeploy.svg" alt="Python">
|
|
7
|
+
<img src="https://img.shields.io/pypi/l/anydeploy.svg" alt="License">
|
|
8
|
+
</p>
|
|
9
|
+
|
|
10
|
+
**anydeploy** is the last-mile deployment toolkit for ML models. It exports PyTorch or sklearn models to ONNX, TorchScript, or TFLite with smart defaults; generates a FastAPI server with health checks and OpenAPI docs; auto-creates a Model Context Protocol (MCP) server so any AI agent (Claude Desktop, Continue, Cursor) can call your model as a tool; and produces Dockerfiles + requirements files for reproducible deployment. Three deployment profiles (`edge`, `balanced`, `quality`) pick quantization and precision for you.
|
|
11
|
+
|
|
12
|
+
Built by [Viet-Anh Nguyen](https://github.com/vietanhdev) at [NRL.ai](https://www.nrl.ai).
|
|
13
|
+
|
|
14
|
+
## Why anydeploy?
|
|
15
|
+
|
|
16
|
+
- **One-liner API** — `anydeploy.export(model, "onnx")` handles shape inference, opset, and validation
|
|
17
|
+
- **Plugin architecture** — Register custom exporters, servers, or container targets
|
|
18
|
+
- **Local-first** — Everything runs on your machine; no cloud account needed
|
|
19
|
+
- **Minimal core deps** — Base install has zero heavy deps; torch/tf are optional
|
|
20
|
+
- **Production-ready** — MCP integration, FastAPI generation, Dockerfile scaffolding
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install anydeploy
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
For optional features:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install anydeploy[onnx] # ONNX export + onnxruntime verification
|
|
32
|
+
pip install anydeploy[torch] # TorchScript export
|
|
33
|
+
pip install anydeploy[tflite] # TFLite conversion
|
|
34
|
+
pip install anydeploy[serve] # FastAPI + uvicorn server
|
|
35
|
+
pip install anydeploy[mcp] # Model Context Protocol server generation
|
|
36
|
+
pip install anydeploy[all] # everything
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Python 3.8+ supported** (tested on 3.8, 3.9, 3.10, 3.11, 3.12, 3.13)
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
import anydeploy
|
|
45
|
+
import torch
|
|
46
|
+
|
|
47
|
+
model = torch.load("resnet50.pt").eval()
|
|
48
|
+
|
|
49
|
+
# 1. Export to ONNX with smart defaults (opset, dynamic axes, validation)
|
|
50
|
+
anydeploy.export(
|
|
51
|
+
model,
|
|
52
|
+
format="onnx",
|
|
53
|
+
out="resnet50.onnx",
|
|
54
|
+
example_input=torch.randn(1, 3, 224, 224),
|
|
55
|
+
profile="balanced", # edge | balanced | quality
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# 2. Generate a FastAPI server with health check + OpenAPI docs
|
|
59
|
+
anydeploy.serve("resnet50.onnx", host="0.0.0.0", port=8000)
|
|
60
|
+
|
|
61
|
+
# 3. Generate an MCP server so Claude Desktop / Cursor can call the model
|
|
62
|
+
anydeploy.mcp("resnet50.onnx", out="my_mcp_server/", name="image-classifier")
|
|
63
|
+
|
|
64
|
+
# 4. Generate a Dockerfile + requirements.txt for reproducible deployment
|
|
65
|
+
anydeploy.containerize("resnet50.onnx", out="docker/", base="python:3.11-slim")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Models & Methods
|
|
69
|
+
|
|
70
|
+
### Export formats
|
|
71
|
+
|
|
72
|
+
| Format | How it works | Notes |
|
|
73
|
+
|---|---|---|
|
|
74
|
+
| **ONNX** | `torch.onnx.export` with auto-derived dynamic axes + opset 17 defaults | Validates via onnxruntime after export |
|
|
75
|
+
| **TorchScript** | `torch.jit.trace` (default) or `torch.jit.script` | Python-free runtime |
|
|
76
|
+
| **TFLite** | `torch -> onnx -> tf -> tflite` via onnx-tf + TensorFlow converter | Mobile / embedded |
|
|
77
|
+
|
|
78
|
+
All exports include automatic **shape inference**, **input/output naming**, and a **round-trip validation step** that runs a dummy input through both the original and the exported model and compares outputs.
|
|
79
|
+
|
|
80
|
+
### Deployment profiles
|
|
81
|
+
|
|
82
|
+
| Profile | Precision | Quantization | Intended target |
|
|
83
|
+
|---|---|---|---|
|
|
84
|
+
| `edge` | int8 | Post-training static quantization | Raspberry Pi, phones, MCUs |
|
|
85
|
+
| `balanced` (default) | fp16 | Optional fp16 conversion | Laptop / workstation CPU |
|
|
86
|
+
| `quality` | fp32 | None | Server / GPU inference |
|
|
87
|
+
|
|
88
|
+
### FastAPI server generation
|
|
89
|
+
|
|
90
|
+
`anydeploy.serve(model_path)` generates and launches a FastAPI app with:
|
|
91
|
+
|
|
92
|
+
- `POST /predict` — accepts JSON or multipart image upload
|
|
93
|
+
- `GET /health` — liveness check
|
|
94
|
+
- `GET /docs` — interactive OpenAPI UI (Swagger)
|
|
95
|
+
- Automatic request/response Pydantic schemas inferred from the model's input/output shapes
|
|
96
|
+
- Optional batching, CORS, and API-key authentication
|
|
97
|
+
|
|
98
|
+
### MCP (Model Context Protocol) server generation
|
|
99
|
+
|
|
100
|
+
`anydeploy.mcp(model_path, name=...)` generates a complete MCP server implementation that exposes your model as an AI-callable tool. Any MCP-compatible client — **Claude Desktop**, **Cursor**, **Continue**, **Zed** — can then invoke your model via natural language.
|
|
101
|
+
|
|
102
|
+
The generated server:
|
|
103
|
+
|
|
104
|
+
- Exposes a `run_model` tool with a JSON schema derived from model inputs
|
|
105
|
+
- Handles image decoding, tensor conversion, and postprocessing
|
|
106
|
+
- Ships with a `claude_desktop_config.json` snippet ready to copy
|
|
107
|
+
|
|
108
|
+
### Containerization
|
|
109
|
+
|
|
110
|
+
`anydeploy.containerize(model_path)` generates:
|
|
111
|
+
|
|
112
|
+
- `Dockerfile` — minimal base image (python-slim by default) with only the runtime dependencies your model needs
|
|
113
|
+
- `requirements.txt` — pinned versions discovered from the export step
|
|
114
|
+
- `.dockerignore` — sensible defaults
|
|
115
|
+
- `docker-compose.yml` (optional) — for multi-container setups
|
|
116
|
+
|
|
117
|
+
## API Reference
|
|
118
|
+
|
|
119
|
+
| Function | Purpose |
|
|
120
|
+
|---|---|
|
|
121
|
+
| `anydeploy.export(model, format, out, **opts)` | Export to ONNX/TorchScript/TFLite |
|
|
122
|
+
| `anydeploy.serve(model_path, host, port)` | Launch a FastAPI server |
|
|
123
|
+
| `anydeploy.generate_server(model_path, out)` | Generate FastAPI code to disk |
|
|
124
|
+
| `anydeploy.mcp(model_path, out, name)` | Generate an MCP tool server |
|
|
125
|
+
| `anydeploy.containerize(model_path, out)` | Generate Dockerfile + requirements |
|
|
126
|
+
| `anydeploy.quantize(model_path, mode="int8")` | Post-training quantization |
|
|
127
|
+
| `anydeploy.benchmark(model_path)` | Measure latency + throughput |
|
|
128
|
+
|
|
129
|
+
## CLI Usage
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# Export
|
|
133
|
+
anydeploy export model.pt --format onnx --out model.onnx --profile edge
|
|
134
|
+
|
|
135
|
+
# Serve
|
|
136
|
+
anydeploy serve model.onnx --port 8000
|
|
137
|
+
|
|
138
|
+
# Generate MCP server
|
|
139
|
+
anydeploy mcp model.onnx --out mcp_server/ --name my-model
|
|
140
|
+
|
|
141
|
+
# Containerize
|
|
142
|
+
anydeploy containerize model.onnx --out docker/
|
|
143
|
+
|
|
144
|
+
# Benchmark
|
|
145
|
+
anydeploy benchmark model.onnx --runs 100
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Examples
|
|
149
|
+
|
|
150
|
+
### Train with traincv, deploy with anydeploy
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
import traincv, anydeploy
|
|
154
|
+
|
|
155
|
+
# Train a YOLOv8 detector
|
|
156
|
+
run = traincv.train("datasets/pets/", task="detect", model="yolov8n", epochs=50)
|
|
157
|
+
|
|
158
|
+
# Export to ONNX, edge-quantized
|
|
159
|
+
anydeploy.export(run.weights_path, format="onnx",
|
|
160
|
+
out="pets.onnx", profile="edge")
|
|
161
|
+
|
|
162
|
+
# Expose as an MCP tool for Claude Desktop
|
|
163
|
+
anydeploy.mcp("pets.onnx", out="pets_mcp/", name="pet-detector")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Auto-generate a Docker image and run it
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
import anydeploy
|
|
170
|
+
|
|
171
|
+
anydeploy.containerize("model.onnx", out="deploy/")
|
|
172
|
+
|
|
173
|
+
# Then:
|
|
174
|
+
# cd deploy && docker build -t my-model .
|
|
175
|
+
# docker run -p 8000:8000 my-model
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Benchmark before and after quantization
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
import anydeploy
|
|
182
|
+
|
|
183
|
+
print(anydeploy.benchmark("model.onnx")) # fp32 baseline
|
|
184
|
+
anydeploy.quantize("model.onnx", mode="int8", out="model_int8.onnx")
|
|
185
|
+
print(anydeploy.benchmark("model_int8.onnx")) # int8 quantized
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## License
|
|
189
|
+
|
|
190
|
+
MIT (c) Viet-Anh Nguyen
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "anydeploy"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.4"
|
|
8
8
|
description = "CLI tool and library to export ML models to production formats and containerize them with Docker"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -24,6 +24,7 @@ classifiers = [
|
|
|
24
24
|
"Programming Language :: Python :: 3.10",
|
|
25
25
|
"Programming Language :: Python :: 3.11",
|
|
26
26
|
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Programming Language :: Python :: 3.13",
|
|
27
28
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
28
29
|
]
|
|
29
30
|
dependencies = [
|
anydeploy-0.2.2/PKG-INFO
DELETED
|
@@ -1,234 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: anydeploy
|
|
3
|
-
Version: 0.2.2
|
|
4
|
-
Summary: CLI tool and library to export ML models to production formats and containerize them with Docker
|
|
5
|
-
Project-URL: Homepage, https://www.nrl.ai
|
|
6
|
-
Project-URL: Repository, https://github.com/vietanhdev/anydeploy
|
|
7
|
-
Project-URL: Documentation, https://github.com/vietanhdev/anydeploy#readme
|
|
8
|
-
Project-URL: Issues, https://github.com/vietanhdev/anydeploy/issues
|
|
9
|
-
Author-email: Viet-Anh Nguyen <vietanh.dev@gmail.com>
|
|
10
|
-
License-Expression: MIT
|
|
11
|
-
License-File: LICENSE
|
|
12
|
-
Keywords: deployment,docker,machine-learning,model-serving,onnx,tflite,torchscript
|
|
13
|
-
Classifier: Development Status :: 3 - Alpha
|
|
14
|
-
Classifier: Intended Audience :: Developers
|
|
15
|
-
Classifier: Intended Audience :: Science/Research
|
|
16
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
-
Classifier: Programming Language :: Python :: 3
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
-
Requires-Python: >=3.8
|
|
25
|
-
Requires-Dist: click>=8.0
|
|
26
|
-
Requires-Dist: numpy>=1.20
|
|
27
|
-
Requires-Dist: pyyaml>=6.0
|
|
28
|
-
Provides-Extra: all
|
|
29
|
-
Requires-Dist: fastapi>=0.68; extra == 'all'
|
|
30
|
-
Requires-Dist: onnx>=1.10; extra == 'all'
|
|
31
|
-
Requires-Dist: onnxruntime>=1.10; extra == 'all'
|
|
32
|
-
Requires-Dist: tensorflow>=2.5; extra == 'all'
|
|
33
|
-
Requires-Dist: torch>=1.9; extra == 'all'
|
|
34
|
-
Requires-Dist: uvicorn>=0.15; extra == 'all'
|
|
35
|
-
Provides-Extra: dev
|
|
36
|
-
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
37
|
-
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
38
|
-
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
39
|
-
Provides-Extra: onnx
|
|
40
|
-
Requires-Dist: onnx>=1.10; extra == 'onnx'
|
|
41
|
-
Requires-Dist: onnxruntime>=1.10; extra == 'onnx'
|
|
42
|
-
Provides-Extra: serve
|
|
43
|
-
Requires-Dist: fastapi>=0.68; extra == 'serve'
|
|
44
|
-
Requires-Dist: uvicorn>=0.15; extra == 'serve'
|
|
45
|
-
Provides-Extra: tflite
|
|
46
|
-
Requires-Dist: tensorflow>=2.5; extra == 'tflite'
|
|
47
|
-
Provides-Extra: torch
|
|
48
|
-
Requires-Dist: torch>=1.9; extra == 'torch'
|
|
49
|
-
Description-Content-Type: text/markdown
|
|
50
|
-
|
|
51
|
-
<h1 align="center">anydeploy</h1>
|
|
52
|
-
<p align="center"><em>Deploy ML models anywhere</em></p>
|
|
53
|
-
|
|
54
|
-

|
|
55
|
-

|
|
56
|
-

|
|
57
|
-
|
|
58
|
-
**Export ML models to production formats (ONNX, TFLite, TorchScript) and deploy them locally or at the edge.**
|
|
59
|
-
|
|
60
|
-
`anydeploy` makes model deployment easy. Convert your trained models to optimized inference formats, benchmark performance, validate correctness, generate serving code, and containerize everything -- all from a single CLI or Python API.
|
|
61
|
-
|
|
62
|
-
**Edge-first deployment.** Supports ONNX Runtime (CPU/GPU/edge), TFLite (mobile/edge), and llama.cpp (local LLM serving). All deployment targets work completely offline.
|
|
63
|
-
|
|
64
|
-
Built and maintained by [Viet-Anh Nguyen](https://github.com/vietanhdev) at [NRL.ai](https://www.nrl.ai).
|
|
65
|
-
|
|
66
|
-
## Installation
|
|
67
|
-
|
|
68
|
-
```bash
|
|
69
|
-
# Core (CLI + config + benchmarking)
|
|
70
|
-
pip install anydeploy
|
|
71
|
-
|
|
72
|
-
# With specific framework support
|
|
73
|
-
pip install anydeploy[torch] # PyTorch + TorchScript
|
|
74
|
-
pip install anydeploy[onnx] # ONNX + ONNX Runtime
|
|
75
|
-
pip install anydeploy[tflite] # TensorFlow Lite
|
|
76
|
-
pip install anydeploy[serve] # FastAPI serving
|
|
77
|
-
|
|
78
|
-
# Everything
|
|
79
|
-
pip install anydeploy[all]
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
## Quick Start
|
|
83
|
-
|
|
84
|
-
### CLI
|
|
85
|
-
|
|
86
|
-
```bash
|
|
87
|
-
# Export a PyTorch model to ONNX
|
|
88
|
-
anydeploy export model.pt --format onnx --input-shape 1,3,224,224
|
|
89
|
-
|
|
90
|
-
# Export to TFLite
|
|
91
|
-
anydeploy export model.pt --format tflite --input-shape 1,3,224,224
|
|
92
|
-
|
|
93
|
-
# Benchmark an exported model
|
|
94
|
-
anydeploy benchmark model.onnx --runs 100
|
|
95
|
-
|
|
96
|
-
# Serve a model with FastAPI
|
|
97
|
-
anydeploy serve model.onnx --backend fastapi --port 8000
|
|
98
|
-
|
|
99
|
-
# Generate a Docker container for deployment
|
|
100
|
-
anydeploy dockerize model.onnx --base python:3.11-slim
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
### Python API
|
|
104
|
-
|
|
105
|
-
```python
|
|
106
|
-
import anydeploy
|
|
107
|
-
|
|
108
|
-
# Export a model
|
|
109
|
-
anydeploy.export(model, format="onnx", input_shape=(1, 3, 224, 224))
|
|
110
|
-
|
|
111
|
-
# Benchmark performance
|
|
112
|
-
result = anydeploy.benchmark("model.onnx", runs=100)
|
|
113
|
-
print(f"Mean latency: {result.mean_latency_ms:.2f} ms")
|
|
114
|
-
print(f"P95 latency: {result.p95_latency_ms:.2f} ms")
|
|
115
|
-
print(f"Throughput: {result.throughput:.1f} inferences/sec")
|
|
116
|
-
|
|
117
|
-
# Validate exported model against original
|
|
118
|
-
report = anydeploy.validate(original_model, "model.onnx", test_input)
|
|
119
|
-
print(f"Max difference: {report.max_diff}")
|
|
120
|
-
print(f"Passed: {report.passed}")
|
|
121
|
-
|
|
122
|
-
# Generate Dockerfile and serving code
|
|
123
|
-
from anydeploy.config import DockerConfig
|
|
124
|
-
docker_cfg = DockerConfig(base_image="python:3.11-slim")
|
|
125
|
-
anydeploy.dockerize("model.onnx", docker_cfg)
|
|
126
|
-
|
|
127
|
-
# Register a custom exporter
|
|
128
|
-
from anydeploy.export.base import BaseExporter
|
|
129
|
-
class MyExporter(BaseExporter):
|
|
130
|
-
def export(self, model, output_path, config=None):
|
|
131
|
-
...
|
|
132
|
-
anydeploy.register_exporter("myformat", MyExporter)
|
|
133
|
-
```
|
|
134
|
-
|
|
135
|
-
## Export Format Comparison
|
|
136
|
-
|
|
137
|
-
| Format | Framework | Hardware | Optimization | File Size |
|
|
138
|
-
|-------------|-------------|-----------------|------------------|-----------|
|
|
139
|
-
| ONNX | Any (via ONNX Runtime) | CPU, GPU, Edge | Graph optimization | Medium |
|
|
140
|
-
| TFLite | TensorFlow | Mobile, Edge | Quantization | Small |
|
|
141
|
-
| TorchScript | PyTorch | CPU, GPU | JIT compilation | Large |
|
|
142
|
-
|
|
143
|
-
## Serving
|
|
144
|
-
|
|
145
|
-
`anydeploy` generates production-ready serving code for multiple backends:
|
|
146
|
-
|
|
147
|
-
```bash
|
|
148
|
-
# FastAPI server for ONNX/TFLite/TorchScript models
|
|
149
|
-
anydeploy serve model.onnx --backend fastapi --port 8000
|
|
150
|
-
|
|
151
|
-
# llama.cpp server for GGUF language models (edge LLM deployment)
|
|
152
|
-
anydeploy serve model.gguf --backend llamacpp --port 8080
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
### FastAPI Backend
|
|
156
|
-
|
|
157
|
-
Creates a FastAPI application with:
|
|
158
|
-
- `/predict` endpoint accepting JSON or binary input
|
|
159
|
-
- `/health` health check endpoint
|
|
160
|
-
- Automatic input validation
|
|
161
|
-
- Configurable batch size
|
|
162
|
-
|
|
163
|
-
### llama.cpp Backend
|
|
164
|
-
|
|
165
|
-
Creates deployment scripts for serving GGUF language models locally:
|
|
166
|
-
- Shell script to launch llama.cpp server
|
|
167
|
-
- Dockerfile for containerized LLM serving
|
|
168
|
-
- OpenAI-compatible `/v1/chat/completions` endpoint
|
|
169
|
-
- Works on CPU, GPU, and edge devices
|
|
170
|
-
|
|
171
|
-
## Docker Deployment
|
|
172
|
-
|
|
173
|
-
Generate a complete Docker setup for your model:
|
|
174
|
-
|
|
175
|
-
```bash
|
|
176
|
-
anydeploy dockerize model.onnx --base python:3.11-slim --port 8000
|
|
177
|
-
```
|
|
178
|
-
|
|
179
|
-
This creates:
|
|
180
|
-
- `Dockerfile` with optimized layers
|
|
181
|
-
- `serve.py` FastAPI application
|
|
182
|
-
- `requirements.txt` with pinned dependencies
|
|
183
|
-
|
|
184
|
-
## Extensibility
|
|
185
|
-
|
|
186
|
-
`anydeploy` uses a plugin architecture. You can register custom exporters and serving backends:
|
|
187
|
-
|
|
188
|
-
```python
|
|
189
|
-
import anydeploy
|
|
190
|
-
from anydeploy.export.base import BaseExporter
|
|
191
|
-
|
|
192
|
-
class CoreMLExporter(BaseExporter):
|
|
193
|
-
format_name = "coreml"
|
|
194
|
-
|
|
195
|
-
def export(self, model, output_path, config=None):
|
|
196
|
-
# Your export logic
|
|
197
|
-
...
|
|
198
|
-
|
|
199
|
-
def validate_model(self, model):
|
|
200
|
-
return True
|
|
201
|
-
|
|
202
|
-
anydeploy.register_exporter("coreml", CoreMLExporter)
|
|
203
|
-
```
|
|
204
|
-
|
|
205
|
-
See [CONTRIBUTING.md](CONTRIBUTING.md) for details on adding new exporters and backends.
|
|
206
|
-
|
|
207
|
-
## Local-First / Edge AI
|
|
208
|
-
|
|
209
|
-
This package is designed for edge and local deployment. All export formats
|
|
210
|
-
(ONNX, TFLite, TorchScript) produce models that run completely offline.
|
|
211
|
-
The llama.cpp backend enables local LLM serving without any cloud dependencies.
|
|
212
|
-
|
|
213
|
-
```bash
|
|
214
|
-
# Export for edge deployment
|
|
215
|
-
anydeploy export model.pt --format onnx # ONNX Runtime (CPU/GPU/edge)
|
|
216
|
-
anydeploy export model.pt --format tflite # TFLite (mobile/edge)
|
|
217
|
-
|
|
218
|
-
# Serve an LLM locally
|
|
219
|
-
anydeploy serve model.gguf --backend llamacpp
|
|
220
|
-
```
|
|
221
|
-
|
|
222
|
-
## Contributing
|
|
223
|
-
|
|
224
|
-
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
225
|
-
|
|
226
|
-
## License
|
|
227
|
-
|
|
228
|
-
MIT License. See [LICENSE](LICENSE) for details.
|
|
229
|
-
|
|
230
|
-
## Links
|
|
231
|
-
|
|
232
|
-
- [NRL.ai](https://www.nrl.ai)
|
|
233
|
-
- [GitHub](https://github.com/vietanhdev/anydeploy)
|
|
234
|
-
- [PyPI](https://pypi.org/project/anydeploy/)
|
anydeploy-0.2.2/README.md
DELETED
|
@@ -1,184 +0,0 @@
|
|
|
1
|
-
<h1 align="center">anydeploy</h1>
|
|
2
|
-
<p align="center"><em>Deploy ML models anywhere</em></p>
|
|
3
|
-
|
|
4
|
-

|
|
5
|
-

|
|
6
|
-

|
|
7
|
-
|
|
8
|
-
**Export ML models to production formats (ONNX, TFLite, TorchScript) and deploy them locally or at the edge.**
|
|
9
|
-
|
|
10
|
-
`anydeploy` makes model deployment easy. Convert your trained models to optimized inference formats, benchmark performance, validate correctness, generate serving code, and containerize everything -- all from a single CLI or Python API.
|
|
11
|
-
|
|
12
|
-
**Edge-first deployment.** Supports ONNX Runtime (CPU/GPU/edge), TFLite (mobile/edge), and llama.cpp (local LLM serving). All deployment targets work completely offline.
|
|
13
|
-
|
|
14
|
-
Built and maintained by [Viet-Anh Nguyen](https://github.com/vietanhdev) at [NRL.ai](https://www.nrl.ai).
|
|
15
|
-
|
|
16
|
-
## Installation
|
|
17
|
-
|
|
18
|
-
```bash
|
|
19
|
-
# Core (CLI + config + benchmarking)
|
|
20
|
-
pip install anydeploy
|
|
21
|
-
|
|
22
|
-
# With specific framework support
|
|
23
|
-
pip install anydeploy[torch] # PyTorch + TorchScript
|
|
24
|
-
pip install anydeploy[onnx] # ONNX + ONNX Runtime
|
|
25
|
-
pip install anydeploy[tflite] # TensorFlow Lite
|
|
26
|
-
pip install anydeploy[serve] # FastAPI serving
|
|
27
|
-
|
|
28
|
-
# Everything
|
|
29
|
-
pip install anydeploy[all]
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
## Quick Start
|
|
33
|
-
|
|
34
|
-
### CLI
|
|
35
|
-
|
|
36
|
-
```bash
|
|
37
|
-
# Export a PyTorch model to ONNX
|
|
38
|
-
anydeploy export model.pt --format onnx --input-shape 1,3,224,224
|
|
39
|
-
|
|
40
|
-
# Export to TFLite
|
|
41
|
-
anydeploy export model.pt --format tflite --input-shape 1,3,224,224
|
|
42
|
-
|
|
43
|
-
# Benchmark an exported model
|
|
44
|
-
anydeploy benchmark model.onnx --runs 100
|
|
45
|
-
|
|
46
|
-
# Serve a model with FastAPI
|
|
47
|
-
anydeploy serve model.onnx --backend fastapi --port 8000
|
|
48
|
-
|
|
49
|
-
# Generate a Docker container for deployment
|
|
50
|
-
anydeploy dockerize model.onnx --base python:3.11-slim
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
### Python API
|
|
54
|
-
|
|
55
|
-
```python
|
|
56
|
-
import anydeploy
|
|
57
|
-
|
|
58
|
-
# Export a model
|
|
59
|
-
anydeploy.export(model, format="onnx", input_shape=(1, 3, 224, 224))
|
|
60
|
-
|
|
61
|
-
# Benchmark performance
|
|
62
|
-
result = anydeploy.benchmark("model.onnx", runs=100)
|
|
63
|
-
print(f"Mean latency: {result.mean_latency_ms:.2f} ms")
|
|
64
|
-
print(f"P95 latency: {result.p95_latency_ms:.2f} ms")
|
|
65
|
-
print(f"Throughput: {result.throughput:.1f} inferences/sec")
|
|
66
|
-
|
|
67
|
-
# Validate exported model against original
|
|
68
|
-
report = anydeploy.validate(original_model, "model.onnx", test_input)
|
|
69
|
-
print(f"Max difference: {report.max_diff}")
|
|
70
|
-
print(f"Passed: {report.passed}")
|
|
71
|
-
|
|
72
|
-
# Generate Dockerfile and serving code
|
|
73
|
-
from anydeploy.config import DockerConfig
|
|
74
|
-
docker_cfg = DockerConfig(base_image="python:3.11-slim")
|
|
75
|
-
anydeploy.dockerize("model.onnx", docker_cfg)
|
|
76
|
-
|
|
77
|
-
# Register a custom exporter
|
|
78
|
-
from anydeploy.export.base import BaseExporter
|
|
79
|
-
class MyExporter(BaseExporter):
|
|
80
|
-
def export(self, model, output_path, config=None):
|
|
81
|
-
...
|
|
82
|
-
anydeploy.register_exporter("myformat", MyExporter)
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
## Export Format Comparison
|
|
86
|
-
|
|
87
|
-
| Format | Framework | Hardware | Optimization | File Size |
|
|
88
|
-
|-------------|-------------|-----------------|------------------|-----------|
|
|
89
|
-
| ONNX | Any (via ONNX Runtime) | CPU, GPU, Edge | Graph optimization | Medium |
|
|
90
|
-
| TFLite | TensorFlow | Mobile, Edge | Quantization | Small |
|
|
91
|
-
| TorchScript | PyTorch | CPU, GPU | JIT compilation | Large |
|
|
92
|
-
|
|
93
|
-
## Serving
|
|
94
|
-
|
|
95
|
-
`anydeploy` generates production-ready serving code for multiple backends:
|
|
96
|
-
|
|
97
|
-
```bash
|
|
98
|
-
# FastAPI server for ONNX/TFLite/TorchScript models
|
|
99
|
-
anydeploy serve model.onnx --backend fastapi --port 8000
|
|
100
|
-
|
|
101
|
-
# llama.cpp server for GGUF language models (edge LLM deployment)
|
|
102
|
-
anydeploy serve model.gguf --backend llamacpp --port 8080
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
### FastAPI Backend
|
|
106
|
-
|
|
107
|
-
Creates a FastAPI application with:
|
|
108
|
-
- `/predict` endpoint accepting JSON or binary input
|
|
109
|
-
- `/health` health check endpoint
|
|
110
|
-
- Automatic input validation
|
|
111
|
-
- Configurable batch size
|
|
112
|
-
|
|
113
|
-
### llama.cpp Backend
|
|
114
|
-
|
|
115
|
-
Creates deployment scripts for serving GGUF language models locally:
|
|
116
|
-
- Shell script to launch llama.cpp server
|
|
117
|
-
- Dockerfile for containerized LLM serving
|
|
118
|
-
- OpenAI-compatible `/v1/chat/completions` endpoint
|
|
119
|
-
- Works on CPU, GPU, and edge devices
|
|
120
|
-
|
|
121
|
-
## Docker Deployment
|
|
122
|
-
|
|
123
|
-
Generate a complete Docker setup for your model:
|
|
124
|
-
|
|
125
|
-
```bash
|
|
126
|
-
anydeploy dockerize model.onnx --base python:3.11-slim --port 8000
|
|
127
|
-
```
|
|
128
|
-
|
|
129
|
-
This creates:
|
|
130
|
-
- `Dockerfile` with optimized layers
|
|
131
|
-
- `serve.py` FastAPI application
|
|
132
|
-
- `requirements.txt` with pinned dependencies
|
|
133
|
-
|
|
134
|
-
## Extensibility
|
|
135
|
-
|
|
136
|
-
`anydeploy` uses a plugin architecture. You can register custom exporters and serving backends:
|
|
137
|
-
|
|
138
|
-
```python
|
|
139
|
-
import anydeploy
|
|
140
|
-
from anydeploy.export.base import BaseExporter
|
|
141
|
-
|
|
142
|
-
class CoreMLExporter(BaseExporter):
|
|
143
|
-
format_name = "coreml"
|
|
144
|
-
|
|
145
|
-
def export(self, model, output_path, config=None):
|
|
146
|
-
# Your export logic
|
|
147
|
-
...
|
|
148
|
-
|
|
149
|
-
def validate_model(self, model):
|
|
150
|
-
return True
|
|
151
|
-
|
|
152
|
-
anydeploy.register_exporter("coreml", CoreMLExporter)
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
See [CONTRIBUTING.md](CONTRIBUTING.md) for details on adding new exporters and backends.
|
|
156
|
-
|
|
157
|
-
## Local-First / Edge AI
|
|
158
|
-
|
|
159
|
-
This package is designed for edge and local deployment. All export formats
|
|
160
|
-
(ONNX, TFLite, TorchScript) produce models that run completely offline.
|
|
161
|
-
The llama.cpp backend enables local LLM serving without any cloud dependencies.
|
|
162
|
-
|
|
163
|
-
```bash
|
|
164
|
-
# Export for edge deployment
|
|
165
|
-
anydeploy export model.pt --format onnx # ONNX Runtime (CPU/GPU/edge)
|
|
166
|
-
anydeploy export model.pt --format tflite # TFLite (mobile/edge)
|
|
167
|
-
|
|
168
|
-
# Serve an LLM locally
|
|
169
|
-
anydeploy serve model.gguf --backend llamacpp
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
## Contributing
|
|
173
|
-
|
|
174
|
-
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
175
|
-
|
|
176
|
-
## License
|
|
177
|
-
|
|
178
|
-
MIT License. See [LICENSE](LICENSE) for details.
|
|
179
|
-
|
|
180
|
-
## Links
|
|
181
|
-
|
|
182
|
-
- [NRL.ai](https://www.nrl.ai)
|
|
183
|
-
- [GitHub](https://github.com/vietanhdev/anydeploy)
|
|
184
|
-
- [PyPI](https://pypi.org/project/anydeploy/)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|