mozo 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mozo/__init__.py +50 -5
- mozo/__main__.py +24 -0
- mozo/adapters/__init__.py +0 -0
- mozo/adapters/depth_anything.py +75 -0
- mozo/adapters/detectron2.py +128 -0
- mozo/adapters/qwen2_5_vl.py +170 -0
- mozo/cli.py +47 -0
- mozo/factory.py +150 -0
- mozo/manager.py +294 -0
- mozo/registry.py +235 -0
- mozo/server.py +294 -0
- mozo-0.2.0.dist-info/METADATA +343 -0
- mozo-0.2.0.dist-info/RECORD +17 -0
- mozo-0.2.0.dist-info/entry_points.txt +2 -0
- {mozo-0.1.0.dist-info → mozo-0.2.0.dist-info}/licenses/LICENSE +2 -2
- mozo-0.1.0.dist-info/METADATA +0 -58
- mozo-0.1.0.dist-info/RECORD +0 -6
- {mozo-0.1.0.dist-info → mozo-0.2.0.dist-info}/WHEEL +0 -0
- {mozo-0.1.0.dist-info → mozo-0.2.0.dist-info}/top_level.txt +0 -0
mozo/server.py
ADDED
@@ -0,0 +1,294 @@
|
|
1
|
+
import io
|
2
|
+
import cv2
|
3
|
+
import json
|
4
|
+
import numpy as np
|
5
|
+
from pathlib import Path
|
6
|
+
from fastapi import FastAPI, UploadFile, File, HTTPException
|
7
|
+
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
|
8
|
+
|
9
|
+
# Import model manager and registry utilities
|
10
|
+
from .manager import ModelManager
|
11
|
+
from .registry import get_available_families, get_available_variants, get_model_info
|
12
|
+
|
13
|
+
import os
|
14
|
+
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
|
15
|
+
|
16
|
+
# --- FastAPI App ---
|
17
|
+
app = FastAPI(
|
18
|
+
title="Mozo Model Server",
|
19
|
+
description="Dynamic model serving API with lazy loading and lifecycle management.",
|
20
|
+
version="0.2.0"
|
21
|
+
)
|
22
|
+
|
23
|
+
# --- Model Manager Setup ---
|
24
|
+
@app.on_event("startup")
|
25
|
+
def setup_manager():
|
26
|
+
"""
|
27
|
+
Initialize model manager (no models loaded yet - they load on-demand).
|
28
|
+
|
29
|
+
This is much faster than the old approach which loaded all models at startup.
|
30
|
+
Models will be loaded automatically when first requested.
|
31
|
+
"""
|
32
|
+
print("[Server] Initializing model manager...")
|
33
|
+
app.state.model_manager = ModelManager()
|
34
|
+
print("[Server] Model manager ready. Models will be loaded on-demand.")
|
35
|
+
|
36
|
+
# --- API Endpoints ---
|
37
|
+
@app.get("/", summary="Health Check", description="Check if the API server is ready.")
|
38
|
+
def health_check():
|
39
|
+
"""
|
40
|
+
Health check endpoint.
|
41
|
+
|
42
|
+
Note: Models are loaded on-demand, so this just checks if the manager is initialized.
|
43
|
+
"""
|
44
|
+
manager_ready = hasattr(app.state, "model_manager")
|
45
|
+
if not manager_ready:
|
46
|
+
return {"status": "error", "message": "Server is starting up, model manager not yet initialized."}
|
47
|
+
return {
|
48
|
+
"status": "ok",
|
49
|
+
"message": "Server is running with dynamic model management.",
|
50
|
+
"loaded_models": app.state.model_manager.list_loaded_models()
|
51
|
+
}
|
52
|
+
|
53
|
+
|
54
|
+
# --- Test UI ---
|
55
|
+
|
56
|
+
@app.get("/test-ui", summary="Test UI", description="Serve interactive testing interface.")
|
57
|
+
def serve_test_ui():
|
58
|
+
"""
|
59
|
+
Serve the interactive test UI for model testing.
|
60
|
+
|
61
|
+
This provides a user-friendly web interface to:
|
62
|
+
- Upload images
|
63
|
+
- Select models dynamically
|
64
|
+
- View prediction results
|
65
|
+
"""
|
66
|
+
html_path = Path(__file__).parent / "static" / "test_ui.html"
|
67
|
+
return FileResponse(html_path, media_type="text/html")
|
68
|
+
|
69
|
+
|
70
|
+
@app.get("/static/example.jpg", summary="Example Image", description="Serve example test image.")
|
71
|
+
def serve_example_image():
|
72
|
+
"""Serve the default example image for testing."""
|
73
|
+
image_path = Path(__file__).parent.parent / "vision" / "example.jpg"
|
74
|
+
|
75
|
+
if not image_path.exists():
|
76
|
+
raise HTTPException(status_code=404, detail="Example image not found at vision/example.jpg")
|
77
|
+
|
78
|
+
return FileResponse(image_path, media_type="image/jpeg")
|
79
|
+
|
80
|
+
|
81
|
+
# --- Prediction Endpoints ---
|
82
|
+
|
83
|
+
@app.post("/predict/{family}/{variant}",
|
84
|
+
summary="Run Model Prediction",
|
85
|
+
description="Upload an image and get predictions from any available model variant.")
|
86
|
+
async def predict(
|
87
|
+
family: str,
|
88
|
+
variant: str,
|
89
|
+
file: UploadFile = File(..., description="Image file to process."),
|
90
|
+
prompt: str = "Describe this image in detail."
|
91
|
+
):
|
92
|
+
"""
|
93
|
+
Universal prediction endpoint supporting all model families and variants.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
family: Model family (e.g., 'detectron2', 'depth_anything', 'qwen2.5_vl')
|
97
|
+
variant: Model variant (e.g., 'mask_rcnn_R_50_FPN_3x', 'small', '7b-instruct')
|
98
|
+
file: Image file to process
|
99
|
+
prompt: Text prompt for vision-language models (used by qwen2.5_vl)
|
100
|
+
|
101
|
+
Returns:
|
102
|
+
JSON response with predictions (format depends on model type)
|
103
|
+
|
104
|
+
Examples:
|
105
|
+
POST /predict/detectron2/mask_rcnn_R_50_FPN_3x
|
106
|
+
POST /predict/detectron2/faster_rcnn_X_101_32x8d_FPN_3x
|
107
|
+
POST /predict/depth_anything/small
|
108
|
+
POST /predict/depth_anything/large
|
109
|
+
POST /predict/qwen2.5_vl/7b-instruct?prompt=What objects are in this image?
|
110
|
+
"""
|
111
|
+
if not hasattr(app.state, "model_manager"):
|
112
|
+
raise HTTPException(status_code=503, detail="Server is starting up, model manager not initialized.")
|
113
|
+
|
114
|
+
# Read and decode image
|
115
|
+
try:
|
116
|
+
contents = await file.read()
|
117
|
+
image = cv2.imdecode(np.frombuffer(contents, np.uint8), cv2.IMREAD_COLOR)
|
118
|
+
if image is None:
|
119
|
+
raise HTTPException(status_code=400, detail="Invalid image file.")
|
120
|
+
except Exception as e:
|
121
|
+
raise HTTPException(status_code=400, detail=f"Could not read or decode the image file: {e}")
|
122
|
+
|
123
|
+
# Get or load model (lazy loading happens here)
|
124
|
+
try:
|
125
|
+
model = app.state.model_manager.get_model(family, variant)
|
126
|
+
except ValueError as e:
|
127
|
+
raise HTTPException(status_code=404, detail=str(e))
|
128
|
+
except Exception as e:
|
129
|
+
raise HTTPException(status_code=500, detail=f"Failed to load model: {e}")
|
130
|
+
|
131
|
+
# Run prediction
|
132
|
+
try:
|
133
|
+
# Vision-language models need prompt parameter
|
134
|
+
if family == 'qwen2.5_vl':
|
135
|
+
results = model.predict(image, prompt=prompt)
|
136
|
+
else:
|
137
|
+
results = model.predict(image)
|
138
|
+
|
139
|
+
# Handle different return types
|
140
|
+
if hasattr(results, 'save'): # It's a PIL Image (depth map)
|
141
|
+
buffer = io.BytesIO()
|
142
|
+
results.save(buffer, format="PNG")
|
143
|
+
buffer.seek(0)
|
144
|
+
return StreamingResponse(buffer, media_type="image/png")
|
145
|
+
|
146
|
+
elif hasattr(results, 'to_dict'): # It's a PixelFlow Detections object
|
147
|
+
# PixelFlow's to_dict() now properly serializes numpy arrays to base64/lists
|
148
|
+
return JSONResponse(content=results.to_dict())
|
149
|
+
|
150
|
+
else: # It's a dict (VLM results)
|
151
|
+
return JSONResponse(content=results)
|
152
|
+
|
153
|
+
except Exception as e:
|
154
|
+
raise HTTPException(status_code=500, detail=f"Prediction failed: {e}")
|
155
|
+
|
156
|
+
|
157
|
+
# --- Model Management Endpoints ---
|
158
|
+
|
159
|
+
@app.get("/models",
|
160
|
+
summary="List Available Models",
|
161
|
+
description="Get all available model families and their variants.")
|
162
|
+
def list_available_models():
|
163
|
+
"""
|
164
|
+
List all available model families and their variants.
|
165
|
+
|
166
|
+
Returns:
|
167
|
+
dict: Available models organized by family, with variant lists and descriptions
|
168
|
+
"""
|
169
|
+
families = get_available_families()
|
170
|
+
result = {}
|
171
|
+
|
172
|
+
for family in families:
|
173
|
+
variants = get_available_variants(family)
|
174
|
+
info = get_model_info(family)
|
175
|
+
result[family] = {
|
176
|
+
'task_type': info['task_type'],
|
177
|
+
'description': info['description'],
|
178
|
+
'num_variants': len(variants),
|
179
|
+
'variants': variants
|
180
|
+
}
|
181
|
+
|
182
|
+
return result
|
183
|
+
|
184
|
+
|
185
|
+
@app.get("/models/loaded",
|
186
|
+
summary="List Loaded Models",
|
187
|
+
description="Get currently loaded models in memory.")
|
188
|
+
def list_loaded_models():
|
189
|
+
"""
|
190
|
+
List currently loaded models.
|
191
|
+
|
192
|
+
Returns:
|
193
|
+
dict: Loaded model IDs and their usage information
|
194
|
+
"""
|
195
|
+
if not hasattr(app.state, "model_manager"):
|
196
|
+
raise HTTPException(status_code=503, detail="Model manager not initialized.")
|
197
|
+
|
198
|
+
loaded = app.state.model_manager.list_loaded_models()
|
199
|
+
info = app.state.model_manager.get_model_info()
|
200
|
+
|
201
|
+
return {
|
202
|
+
"loaded_count": len(loaded),
|
203
|
+
"models": info
|
204
|
+
}
|
205
|
+
|
206
|
+
|
207
|
+
@app.get("/models/{family}/{variant}/info",
|
208
|
+
summary="Get Model Info",
|
209
|
+
description="Get detailed information about a specific model variant.")
|
210
|
+
def get_model_details(family: str, variant: str):
|
211
|
+
"""
|
212
|
+
Get detailed information about a specific model variant.
|
213
|
+
|
214
|
+
Args:
|
215
|
+
family: Model family name
|
216
|
+
variant: Model variant name
|
217
|
+
|
218
|
+
Returns:
|
219
|
+
dict: Model information including parameters and load status
|
220
|
+
"""
|
221
|
+
try:
|
222
|
+
info = get_model_info(family, variant)
|
223
|
+
|
224
|
+
# Add load status
|
225
|
+
if hasattr(app.state, "model_manager"):
|
226
|
+
model_id = f"{family}/{variant}"
|
227
|
+
load_info = app.state.model_manager.get_model_info(model_id)
|
228
|
+
info['load_status'] = load_info
|
229
|
+
else:
|
230
|
+
info['load_status'] = {'loaded': False}
|
231
|
+
|
232
|
+
return info
|
233
|
+
except ValueError as e:
|
234
|
+
raise HTTPException(status_code=404, detail=str(e))
|
235
|
+
|
236
|
+
|
237
|
+
@app.post("/models/{family}/{variant}/unload",
|
238
|
+
summary="Unload Model",
|
239
|
+
description="Manually unload a model to free memory.")
|
240
|
+
def unload_model(family: str, variant: str):
|
241
|
+
"""
|
242
|
+
Manually unload a specific model to free memory.
|
243
|
+
|
244
|
+
Args:
|
245
|
+
family: Model family name
|
246
|
+
variant: Model variant name
|
247
|
+
|
248
|
+
Returns:
|
249
|
+
dict: Unload status
|
250
|
+
"""
|
251
|
+
if not hasattr(app.state, "model_manager"):
|
252
|
+
raise HTTPException(status_code=503, detail="Model manager not initialized.")
|
253
|
+
|
254
|
+
success = app.state.model_manager.unload_model(family, variant)
|
255
|
+
|
256
|
+
if success:
|
257
|
+
return {
|
258
|
+
"status": "unloaded",
|
259
|
+
"family": family,
|
260
|
+
"variant": variant,
|
261
|
+
"model_id": f"{family}/{variant}"
|
262
|
+
}
|
263
|
+
else:
|
264
|
+
return {
|
265
|
+
"status": "not_loaded",
|
266
|
+
"family": family,
|
267
|
+
"variant": variant,
|
268
|
+
"message": "Model was not loaded, nothing to unload."
|
269
|
+
}
|
270
|
+
|
271
|
+
|
272
|
+
@app.post("/models/cleanup",
|
273
|
+
summary="Cleanup Inactive Models",
|
274
|
+
description="Unload models that haven't been used recently.")
|
275
|
+
def cleanup_inactive_models(inactive_seconds: int = 600):
|
276
|
+
"""
|
277
|
+
Cleanup models that haven't been used in the specified time period.
|
278
|
+
|
279
|
+
Args:
|
280
|
+
inactive_seconds: Time threshold in seconds (default: 600 = 10 minutes)
|
281
|
+
|
282
|
+
Returns:
|
283
|
+
dict: Cleanup results
|
284
|
+
"""
|
285
|
+
if not hasattr(app.state, "model_manager"):
|
286
|
+
raise HTTPException(status_code=503, detail="Model manager not initialized.")
|
287
|
+
|
288
|
+
count = app.state.model_manager.cleanup_inactive_models(inactive_seconds)
|
289
|
+
|
290
|
+
return {
|
291
|
+
"status": "completed",
|
292
|
+
"models_unloaded": count,
|
293
|
+
"inactive_threshold_seconds": inactive_seconds
|
294
|
+
}
|
@@ -0,0 +1,343 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: mozo
|
3
|
+
Version: 0.2.0
|
4
|
+
Summary: Universal computer vision model serving library with dynamic model management and PixelFlow integration
|
5
|
+
Home-page: https://github.com/datamarkin/mozo
|
6
|
+
Author: Emrah NAZIF
|
7
|
+
Author-email: Emrah NAZIF <emrah@datamarkin.com>, Datamarkin <support@datamarkin.com>
|
8
|
+
License: MIT
|
9
|
+
Project-URL: Homepage, https://github.com/datamarkin/mozo
|
10
|
+
Project-URL: Bug Tracker, https://github.com/datamarkin/mozo/issues
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
12
|
+
Classifier: Intended Audience :: Developers
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
19
|
+
Requires-Python: >=3.9
|
20
|
+
Description-Content-Type: text/markdown
|
21
|
+
License-File: LICENSE
|
22
|
+
Requires-Dist: fastapi
|
23
|
+
Requires-Dist: uvicorn
|
24
|
+
Requires-Dist: requests
|
25
|
+
Requires-Dist: opencv-python
|
26
|
+
Requires-Dist: transformers
|
27
|
+
Requires-Dist: torch
|
28
|
+
Requires-Dist: torchvision
|
29
|
+
Requires-Dist: Pillow
|
30
|
+
Requires-Dist: numpy
|
31
|
+
Requires-Dist: scipy
|
32
|
+
Requires-Dist: pixelflow
|
33
|
+
Requires-Dist: click
|
34
|
+
Dynamic: author
|
35
|
+
Dynamic: home-page
|
36
|
+
Dynamic: license-file
|
37
|
+
Dynamic: requires-python
|
38
|
+
|
39
|
+
# Mozo
|
40
|
+
|
41
|
+
Universal computer vision model server with automatic memory management and multi-framework support.
|
42
|
+
|
43
|
+
Mozo provides HTTP access to 25+ pre-configured models from Detectron2, HuggingFace Transformers, and other frameworks. Models load on-demand and clean up automatically.
|
44
|
+
|
45
|
+
## Quick Start
|
46
|
+
|
47
|
+
```bash
|
48
|
+
pip install mozo
|
49
|
+
mozo start
|
50
|
+
```
|
51
|
+
|
52
|
+
Server starts on `http://localhost:8000` with all models available via REST API.
|
53
|
+
|
54
|
+
### Examples
|
55
|
+
|
56
|
+
Object detection:
|
57
|
+
```bash
|
58
|
+
curl -X POST "http://localhost:8000/predict/detectron2/mask_rcnn_R_50_FPN_3x" \
|
59
|
+
-F "file=@image.jpg"
|
60
|
+
```
|
61
|
+
|
62
|
+
Depth estimation:
|
63
|
+
```bash
|
64
|
+
curl -X POST "http://localhost:8000/predict/depth_anything/small" \
|
65
|
+
-F "file=@image.jpg" --output depth.png
|
66
|
+
```
|
67
|
+
|
68
|
+
Vision-language Q&A:
|
69
|
+
```bash
|
70
|
+
curl -X POST "http://localhost:8000/predict/qwen2.5_vl/7b-instruct?prompt=What%20is%20in%20this%20image" \
|
71
|
+
-F "file=@image.jpg"
|
72
|
+
```
|
73
|
+
|
74
|
+
List available models:
|
75
|
+
```bash
|
76
|
+
curl http://localhost:8000/models
|
77
|
+
```
|
78
|
+
|
79
|
+
## Features
|
80
|
+
|
81
|
+
- **25+ Pre-configured Models** - Detectron2, HuggingFace Transformers, custom adapters
|
82
|
+
- **Automatic Memory Management** - Lazy loading, usage tracking, automatic cleanup
|
83
|
+
- **Multi-Framework Support** - Unified API across different ML frameworks
|
84
|
+
- **PixelFlow Integration** - Detection models return unified format for filtering and annotation
|
85
|
+
- **Thread-Safe** - Concurrent request handling with per-model locks
|
86
|
+
- **Production Ready** - Multiple workers, configurable timeouts, health checks
|
87
|
+
|
88
|
+
## Installation
|
89
|
+
|
90
|
+
```bash
|
91
|
+
# Basic installation
|
92
|
+
pip install mozo
|
93
|
+
|
94
|
+
# Framework dependencies (install as needed)
|
95
|
+
pip install transformers torch torchvision
|
96
|
+
pip install 'git+https://github.com/facebookresearch/detectron2.git'
|
97
|
+
```
|
98
|
+
|
99
|
+
## Available Models
|
100
|
+
|
101
|
+
### Detectron2 (22 variants)
|
102
|
+
Object detection, instance segmentation, keypoint detection trained on COCO dataset.
|
103
|
+
|
104
|
+
Popular variants:
|
105
|
+
- `mask_rcnn_R_50_FPN_3x` - Instance segmentation
|
106
|
+
- `faster_rcnn_R_50_FPN_3x` - Object detection
|
107
|
+
- `faster_rcnn_X_101_32x8d_FPN_3x` - High-accuracy detection
|
108
|
+
- `keypoint_rcnn_R_50_FPN_3x` - Keypoint detection
|
109
|
+
- `retinanet_R_50_FPN_3x` - Single-stage detector
|
110
|
+
|
111
|
+
Output: JSON with bounding boxes, class names, confidence scores (80 COCO classes)
|
112
|
+
|
113
|
+
### Depth Anything (3 variants)
|
114
|
+
Monocular depth estimation.
|
115
|
+
|
116
|
+
- `small` - Fastest, lowest memory
|
117
|
+
- `base` - Balanced performance
|
118
|
+
- `large` - Best accuracy
|
119
|
+
|
120
|
+
Output: PNG grayscale depth map
|
121
|
+
|
122
|
+
### Qwen2.5-VL (1 variant)
|
123
|
+
Vision-language understanding for VQA, captioning, and image analysis.
|
124
|
+
|
125
|
+
- `7b-instruct` - 7B parameter model (requires 16GB+ RAM)
|
126
|
+
|
127
|
+
Output: JSON with text response
|
128
|
+
|
129
|
+
## Server
|
130
|
+
|
131
|
+
```bash
|
132
|
+
# Start with defaults (0.0.0.0:8000, auto-reload enabled)
|
133
|
+
mozo start
|
134
|
+
|
135
|
+
# Custom port
|
136
|
+
mozo start --port 8080
|
137
|
+
|
138
|
+
# Production mode with multiple workers
|
139
|
+
mozo start --workers 4
|
140
|
+
|
141
|
+
# Check version
|
142
|
+
mozo version
|
143
|
+
```
|
144
|
+
|
145
|
+
## API Reference
|
146
|
+
|
147
|
+
### Run Prediction
|
148
|
+
```http
|
149
|
+
POST /predict/{family}/{variant}
|
150
|
+
Content-Type: multipart/form-data
|
151
|
+
```
|
152
|
+
|
153
|
+
Parameters:
|
154
|
+
- `family` - Model family (e.g., `detectron2`, `depth_anything`, `qwen2.5_vl`)
|
155
|
+
- `variant` - Model variant (e.g., `mask_rcnn_R_50_FPN_3x`, `small`, `7b-instruct`)
|
156
|
+
- `file` - Image file
|
157
|
+
- `prompt` - Text prompt (VLM models only)
|
158
|
+
|
159
|
+
### Health Check
|
160
|
+
```http
|
161
|
+
GET /
|
162
|
+
```
|
163
|
+
|
164
|
+
Returns server status and loaded models.
|
165
|
+
|
166
|
+
### List Models
|
167
|
+
```http
|
168
|
+
GET /models
|
169
|
+
```
|
170
|
+
|
171
|
+
Returns all available model families and variants.
|
172
|
+
|
173
|
+
### List Loaded Models
|
174
|
+
```http
|
175
|
+
GET /models/loaded
|
176
|
+
```
|
177
|
+
|
178
|
+
Returns currently loaded models with usage information.
|
179
|
+
|
180
|
+
### Get Model Info
|
181
|
+
```http
|
182
|
+
GET /models/{family}/{variant}/info
|
183
|
+
```
|
184
|
+
|
185
|
+
Returns detailed information about a specific model variant.
|
186
|
+
|
187
|
+
### Unload Model
|
188
|
+
```http
|
189
|
+
POST /models/{family}/{variant}/unload
|
190
|
+
```
|
191
|
+
|
192
|
+
Manually unload a model to free memory.
|
193
|
+
|
194
|
+
### Cleanup Inactive Models
|
195
|
+
```http
|
196
|
+
POST /models/cleanup?inactive_seconds=600
|
197
|
+
```
|
198
|
+
|
199
|
+
Unload models inactive for specified duration (default: 600 seconds).
|
200
|
+
|
201
|
+
## How It Works
|
202
|
+
|
203
|
+
**Lazy Loading**
|
204
|
+
Models load on first request, not at server startup. This keeps startup time instant regardless of available models.
|
205
|
+
|
206
|
+
**Smart Caching**
|
207
|
+
Loaded models stay in memory and are reused across requests. First request is slower (model download + load), subsequent requests are fast.
|
208
|
+
|
209
|
+
**Usage Tracking**
|
210
|
+
Each model access updates a timestamp. Models inactive for 10+ minutes are automatically unloaded.
|
211
|
+
|
212
|
+
**Thread Safety**
|
213
|
+
Per-model locks ensure only one thread loads a given model. Other threads wait and reuse the loaded instance.
|
214
|
+
|
215
|
+
Example flow:
|
216
|
+
```bash
|
217
|
+
# Server starts instantly (no models loaded)
|
218
|
+
mozo start
|
219
|
+
|
220
|
+
# First request loads model
|
221
|
+
curl -X POST "http://localhost:8000/predict/detectron2/faster_rcnn_R_50_FPN_3x" -F "file=@test.jpg"
|
222
|
+
# Output: [ModelManager] Loading model: detectron2/faster_rcnn_R_50_FPN_3x...
|
223
|
+
|
224
|
+
# Subsequent requests reuse loaded model
|
225
|
+
curl -X POST "http://localhost:8000/predict/detectron2/faster_rcnn_R_50_FPN_3x" -F "file=@test2.jpg"
|
226
|
+
# Output: [ModelManager] Model already loaded, reusing existing instance.
|
227
|
+
|
228
|
+
# After 10 minutes of inactivity, model auto-unloads
|
229
|
+
# Output: [ModelManager] Cleanup: Unloaded 1 inactive model(s).
|
230
|
+
```
|
231
|
+
|
232
|
+
## Python SDK
|
233
|
+
|
234
|
+
For direct integration in Python applications:
|
235
|
+
|
236
|
+
```python
|
237
|
+
from mozo import ModelManager
|
238
|
+
import cv2
|
239
|
+
|
240
|
+
manager = ModelManager()
|
241
|
+
model = manager.get_model('detectron2', 'mask_rcnn_R_50_FPN_3x')
|
242
|
+
|
243
|
+
image = cv2.imread('image.jpg')
|
244
|
+
detections = model.predict(image)
|
245
|
+
|
246
|
+
# Filter results
|
247
|
+
high_confidence = detections.filter_by_confidence(0.8)
|
248
|
+
|
249
|
+
# Manual memory management
|
250
|
+
manager.unload_model('detectron2', 'mask_rcnn_R_50_FPN_3x')
|
251
|
+
manager.cleanup_inactive_models(inactive_seconds=300)
|
252
|
+
```
|
253
|
+
|
254
|
+
### PixelFlow Integration
|
255
|
+
|
256
|
+
Detection models return PixelFlow Detections objects - a unified format across all ML frameworks:
|
257
|
+
|
258
|
+
```python
|
259
|
+
# Works the same for Detectron2, YOLO, or custom models
|
260
|
+
detections = model.predict(image)
|
261
|
+
|
262
|
+
# Filter and annotate
|
263
|
+
import pixelflow as pf
|
264
|
+
filtered = detections.filter_by_confidence(0.8).filter_by_class_id([0, 2])
|
265
|
+
annotated = pf.annotate.box(image, filtered)
|
266
|
+
annotated = pf.annotate.label(annotated, filtered)
|
267
|
+
|
268
|
+
# Export
|
269
|
+
json_output = filtered.to_json()
|
270
|
+
```
|
271
|
+
|
272
|
+
Learn more: [PixelFlow](https://github.com/datamarkin/pixelflow)
|
273
|
+
|
274
|
+
## Configuration
|
275
|
+
|
276
|
+
### Environment Variables
|
277
|
+
|
278
|
+
```bash
|
279
|
+
# Enable MPS fallback for macOS (Apple Silicon)
|
280
|
+
export PYTORCH_ENABLE_MPS_FALLBACK=1
|
281
|
+
|
282
|
+
# Configure HuggingFace cache location
|
283
|
+
export HF_HOME=~/.cache/huggingface
|
284
|
+
```
|
285
|
+
|
286
|
+
### Memory Management
|
287
|
+
|
288
|
+
Models automatically unload after 10 minutes of inactivity. Adjust this:
|
289
|
+
|
290
|
+
```bash
|
291
|
+
curl -X POST "http://localhost:8000/models/cleanup?inactive_seconds=300"
|
292
|
+
```
|
293
|
+
|
294
|
+
Or in Python:
|
295
|
+
```python
|
296
|
+
manager.cleanup_inactive_models(inactive_seconds=300)
|
297
|
+
```
|
298
|
+
|
299
|
+
## Extending Mozo
|
300
|
+
|
301
|
+
Add new models in 3 steps:
|
302
|
+
|
303
|
+
1. Create adapter in `mozo/adapters/your_model.py`
|
304
|
+
2. Register in `mozo/registry.py`
|
305
|
+
3. Use via HTTP or Python API
|
306
|
+
|
307
|
+
See [CLAUDE.md](CLAUDE.md) for detailed implementation guide.
|
308
|
+
|
309
|
+
## Architecture
|
310
|
+
|
311
|
+
```
|
312
|
+
HTTP Request → FastAPI Server → ModelManager → ModelFactory → Adapter → Framework
|
313
|
+
↓
|
314
|
+
Thread-safe cache
|
315
|
+
Usage tracking
|
316
|
+
Auto cleanup
|
317
|
+
```
|
318
|
+
|
319
|
+
Components:
|
320
|
+
- **Server** - FastAPI REST API
|
321
|
+
- **Manager** - Lifecycle management, caching, cleanup
|
322
|
+
- **Factory** - Dynamic adapter instantiation
|
323
|
+
- **Registry** - Central catalog of models
|
324
|
+
- **Adapters** - Framework-specific implementations
|
325
|
+
|
326
|
+
## Development
|
327
|
+
|
328
|
+
```bash
|
329
|
+
# Install in development mode
|
330
|
+
pip install -e .
|
331
|
+
|
332
|
+
# Start server with auto-reload
|
333
|
+
mozo start
|
334
|
+
```
|
335
|
+
|
336
|
+
## Documentation
|
337
|
+
|
338
|
+
- [Repository](https://github.com/datamarkin/mozo)
|
339
|
+
- [Issues](https://github.com/datamarkin/mozo/issues)
|
340
|
+
|
341
|
+
## License
|
342
|
+
|
343
|
+
MIT License
|
@@ -0,0 +1,17 @@
|
|
1
|
+
mozo/__init__.py,sha256=g5eQJP-xx0SvxMd0uQ1HAI4b4KRCmeo2P31rDc-5YjE,1490
|
2
|
+
mozo/__main__.py,sha256=aGVemo--o10oQnIqOW2-tsUR0KGyMzvrJgz3aBzdDos,460
|
3
|
+
mozo/cli.py,sha256=nymMNcCdTVvNoQJSy8YoHRexCEdcxpmhHeR4YuzAAuc,1211
|
4
|
+
mozo/factory.py,sha256=HB9OG2shfo6Gxn6Odc9ZiBKWuiYJlUhBNwfHbtN6heA,4826
|
5
|
+
mozo/manager.py,sha256=NPID5QxieAeL8zAWorlvk2QXWiSNYmJmHWRHDRGd0hs,9681
|
6
|
+
mozo/registry.py,sha256=RKU4LPyNgivS13PAwxJTSa9zit4v7dxMIFUO2xioErk,7744
|
7
|
+
mozo/server.py,sha256=RkFfbGBquqplBABY8b7oIiybXfdv-naamKQKdwzq7sQ,9712
|
8
|
+
mozo/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
mozo/adapters/depth_anything.py,sha256=SsmNrU5d2t0KukbSn8PMDliyaya8AUo6xHoMsjvw4k4,2632
|
10
|
+
mozo/adapters/detectron2.py,sha256=zUH3VxwMKQ-3L6SBMeDckojpc5mvaymkSDpkQktqe00,5798
|
11
|
+
mozo/adapters/qwen2_5_vl.py,sha256=8UK_pXfdbprw-OHbDT8gFdfUMEeRG3rC_obXlHlZo00,5801
|
12
|
+
mozo-0.2.0.dist-info/licenses/LICENSE,sha256=QsFuCag5g1YSVKk3WUlv4Nc6TaQwNjyO53Rl0N60CB0,1067
|
13
|
+
mozo-0.2.0.dist-info/METADATA,sha256=dGHns-S0LzSZ-x0vlAvmw06zrWn9v8MKfUJ265rFgb4,8861
|
14
|
+
mozo-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
+
mozo-0.2.0.dist-info/entry_points.txt,sha256=4KCJ-IL4STq1ib_M5Ny9Yd0dJ9QSUayoY5Cc2XSDU6w,38
|
16
|
+
mozo-0.2.0.dist-info/top_level.txt,sha256=tvn0MKjQnyc1gIlKPjBmrhF3xVUXuM0TGUrdsv2XHgI,5
|
17
|
+
mozo-0.2.0.dist-info/RECORD,,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2025
|
3
|
+
Copyright (c) 2025 Datamarkin
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
-
SOFTWARE.
|
21
|
+
SOFTWARE.
|