mozo 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mozo/__init__.py +50 -5
- mozo/__main__.py +24 -0
- mozo/adapters/__init__.py +0 -0
- mozo/adapters/depth_anything.py +75 -0
- mozo/adapters/detectron2.py +128 -0
- mozo/adapters/qwen2_5_vl.py +170 -0
- mozo/cli.py +47 -0
- mozo/factory.py +150 -0
- mozo/manager.py +294 -0
- mozo/registry.py +235 -0
- mozo/server.py +294 -0
- mozo-0.2.0.dist-info/METADATA +343 -0
- mozo-0.2.0.dist-info/RECORD +17 -0
- mozo-0.2.0.dist-info/entry_points.txt +2 -0
- {mozo-0.1.0.dist-info → mozo-0.2.0.dist-info}/licenses/LICENSE +2 -2
- mozo-0.1.0.dist-info/METADATA +0 -58
- mozo-0.1.0.dist-info/RECORD +0 -6
- {mozo-0.1.0.dist-info → mozo-0.2.0.dist-info}/WHEEL +0 -0
- {mozo-0.1.0.dist-info → mozo-0.2.0.dist-info}/top_level.txt +0 -0
mozo/__init__.py
CHANGED
@@ -1,9 +1,54 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
Mozo - Universal Computer Vision Model Server
|
3
|
+
|
4
|
+
25+ pre-configured models ready to use. No deployment, no configuration.
|
5
|
+
Just `mozo start` and make HTTP requests.
|
6
|
+
|
7
|
+
Quick Start:
|
8
|
+
>>> # From terminal:
|
9
|
+
>>> mozo start
|
10
|
+
>>>
|
11
|
+
>>> # Then use any model via HTTP:
|
12
|
+
>>> curl -X POST "http://localhost:8000/predict/detectron2/mask_rcnn_R_50_FPN_3x" \\
|
13
|
+
>>> -F "file=@image.jpg"
|
14
|
+
|
15
|
+
Advanced Usage (Python SDK):
|
16
|
+
>>> from mozo import ModelManager
|
17
|
+
>>> import cv2
|
18
|
+
>>>
|
19
|
+
>>> manager = ModelManager()
|
20
|
+
>>> model = manager.get_model('detectron2', 'mask_rcnn_R_50_FPN_3x')
|
21
|
+
>>> image = cv2.imread('example.jpg')
|
22
|
+
>>> detections = model.predict(image) # Returns PixelFlow Detections
|
23
|
+
>>> print(f"Found {len(detections)} objects")
|
24
|
+
|
25
|
+
Features:
|
26
|
+
- 25+ models from Detectron2, HuggingFace Transformers
|
27
|
+
- Zero deployment - no Docker, Kubernetes, or cloud needed
|
28
|
+
- Automatic memory management with lazy loading
|
29
|
+
- PixelFlow integration for unified detection format
|
30
|
+
- Thread-safe concurrent access
|
31
|
+
|
32
|
+
For more information, see:
|
33
|
+
- Documentation: https://github.com/datamarkin/mozo
|
3
34
|
"""
|
4
35
|
|
5
|
-
__version__ = "0.
|
36
|
+
__version__ = "0.2.0"
|
37
|
+
|
38
|
+
# Public API exports
|
39
|
+
from mozo.manager import ModelManager
|
40
|
+
from mozo.registry import (
|
41
|
+
MODEL_REGISTRY,
|
42
|
+
get_available_families,
|
43
|
+
get_available_variants,
|
44
|
+
get_model_info,
|
45
|
+
)
|
6
46
|
|
7
|
-
|
8
|
-
""
|
9
|
-
|
47
|
+
__all__ = [
|
48
|
+
"ModelManager",
|
49
|
+
"MODEL_REGISTRY",
|
50
|
+
"get_available_families",
|
51
|
+
"get_available_variants",
|
52
|
+
"get_model_info",
|
53
|
+
"__version__",
|
54
|
+
]
|
mozo/__main__.py
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
"""
|
2
|
+
Entry point for running `python -m mozo`.
|
3
|
+
|
4
|
+
This delegates to the CLI start command for consistency.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from mozo.cli import cli
|
8
|
+
import sys
|
9
|
+
|
10
|
+
|
11
|
+
def main():
|
12
|
+
"""
|
13
|
+
Entry point when running `python -m mozo`.
|
14
|
+
Defaults to the start command for backward compatibility.
|
15
|
+
"""
|
16
|
+
# If no arguments, default to 'start --reload'
|
17
|
+
if len(sys.argv) == 1:
|
18
|
+
sys.argv.extend(['start', '--reload'])
|
19
|
+
|
20
|
+
cli()
|
21
|
+
|
22
|
+
|
23
|
+
if __name__ == "__main__":
|
24
|
+
main()
|
File without changes
|
@@ -0,0 +1,75 @@
|
|
1
|
+
from PIL import Image
|
2
|
+
import numpy as np
|
3
|
+
import cv2
|
4
|
+
|
5
|
+
try:
|
6
|
+
from transformers import pipeline
|
7
|
+
except ImportError:
|
8
|
+
print("="*50)
|
9
|
+
print("ERROR: `transformers` is not installed.")
|
10
|
+
print("Please install it with: `pip install transformers`")
|
11
|
+
print("="*50)
|
12
|
+
raise
|
13
|
+
|
14
|
+
class DepthAnythingPredictor:
|
15
|
+
"""
|
16
|
+
Universal Depth Anything adapter - handles all model size variants.
|
17
|
+
Supports small, base, and large variants of Depth Anything V2.
|
18
|
+
"""
|
19
|
+
|
20
|
+
# Registry of all supported Depth Anything model variants
|
21
|
+
SUPPORTED_VARIANTS = {
|
22
|
+
'small': 'depth-anything/Depth-Anything-V2-Small-hf',
|
23
|
+
'base': 'depth-anything/Depth-Anything-V2-Base-hf',
|
24
|
+
'large': 'depth-anything/Depth-Anything-V2-Large-hf',
|
25
|
+
}
|
26
|
+
|
27
|
+
def __init__(self, variant="small"):
|
28
|
+
"""
|
29
|
+
Initialize Depth Anything predictor with specific model size variant.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
variant: Model size variant - 'small', 'base', or 'large'
|
33
|
+
small: Fastest, lowest memory, good for real-time applications
|
34
|
+
base: Balanced speed and accuracy
|
35
|
+
large: Best accuracy, slower, higher memory usage
|
36
|
+
|
37
|
+
Raises:
|
38
|
+
ValueError: If variant is not supported
|
39
|
+
"""
|
40
|
+
if variant not in self.SUPPORTED_VARIANTS:
|
41
|
+
raise ValueError(
|
42
|
+
f"Unsupported variant: '{variant}'. "
|
43
|
+
f"Choose from: {list(self.SUPPORTED_VARIANTS.keys())}"
|
44
|
+
)
|
45
|
+
|
46
|
+
self.variant = variant
|
47
|
+
model_name = self.SUPPORTED_VARIANTS[variant]
|
48
|
+
|
49
|
+
print(f"Loading Depth Anything model (variant: {variant}, model: {model_name})...")
|
50
|
+
self.pipe = pipeline(task="depth-estimation", model=model_name)
|
51
|
+
print(f"Depth Anything model loaded successfully (variant: {variant}).")
|
52
|
+
|
53
|
+
def predict(self, image: np.ndarray) -> Image.Image:
|
54
|
+
"""
|
55
|
+
Runs depth estimation on an image.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
image: A numpy array representing the input image in BGR format (from cv2).
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
A PIL.Image object representing the depth map.
|
62
|
+
"""
|
63
|
+
print("Running depth estimation...")
|
64
|
+
# The pipeline expects a PIL Image in RGB format.
|
65
|
+
# cv2 reads images as BGR, so we need to convert it.
|
66
|
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
67
|
+
pil_image = Image.fromarray(image_rgb)
|
68
|
+
|
69
|
+
result = self.pipe(pil_image)
|
70
|
+
|
71
|
+
# The pipeline returns a dictionary, the depth map is in the "depth" key
|
72
|
+
depth_map = result["depth"]
|
73
|
+
|
74
|
+
print("Depth estimation complete.")
|
75
|
+
return depth_map
|
@@ -0,0 +1,128 @@
|
|
1
|
+
import cv2
|
2
|
+
import numpy as np
|
3
|
+
|
4
|
+
try:
|
5
|
+
from detectron2.engine import DefaultPredictor
|
6
|
+
from detectron2.config import get_cfg
|
7
|
+
from detectron2 import model_zoo
|
8
|
+
from detectron2.data import MetadataCatalog
|
9
|
+
except ImportError:
|
10
|
+
print("="*50)
|
11
|
+
print("ERROR: Detectron2 is not installed.")
|
12
|
+
print("Please install it following the instructions at:")
|
13
|
+
print("https://detectron2.readthedocs.io/en/latest/tutorials/install.html")
|
14
|
+
print("="*50)
|
15
|
+
raise
|
16
|
+
|
17
|
+
try:
|
18
|
+
import pixelflow as pf
|
19
|
+
except ImportError:
|
20
|
+
print("="*50)
|
21
|
+
print("ERROR: PixelFlow is not installed.")
|
22
|
+
print("Please install it with: pip install pixelflow")
|
23
|
+
print("="*50)
|
24
|
+
raise
|
25
|
+
|
26
|
+
class Detectron2Predictor:
|
27
|
+
"""
|
28
|
+
Universal Detectron2 adapter - handles ALL detectron2 model variants.
|
29
|
+
Supports multiple model families: Mask R-CNN, Faster R-CNN, RetinaNet, Keypoint R-CNN, etc.
|
30
|
+
"""
|
31
|
+
|
32
|
+
# Registry of all supported detectron2 model variants
|
33
|
+
SUPPORTED_CONFIGS = {
|
34
|
+
# Mask R-CNN (Instance Segmentation)
|
35
|
+
'mask_rcnn_R_50_FPN_3x': 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml',
|
36
|
+
'mask_rcnn_R_50_C4_1x': 'COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml',
|
37
|
+
'mask_rcnn_R_50_C4_3x': 'COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml',
|
38
|
+
'mask_rcnn_R_50_DC5_1x': 'COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml',
|
39
|
+
'mask_rcnn_R_50_DC5_3x': 'COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml',
|
40
|
+
'mask_rcnn_R_50_FPN_1x': 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml',
|
41
|
+
'mask_rcnn_R_101_C4_3x': 'COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml',
|
42
|
+
'mask_rcnn_R_101_DC5_3x': 'COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml',
|
43
|
+
'mask_rcnn_R_101_FPN_3x': 'COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml',
|
44
|
+
'mask_rcnn_X_101_32x8d_FPN_3x': 'COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml',
|
45
|
+
|
46
|
+
# Faster R-CNN (Object Detection)
|
47
|
+
'faster_rcnn_R_50_C4_1x': 'COCO-Detection/faster_rcnn_R_50_C4_1x.yaml',
|
48
|
+
'faster_rcnn_R_50_C4_3x': 'COCO-Detection/faster_rcnn_R_50_C4_3x.yaml',
|
49
|
+
'faster_rcnn_R_50_DC5_1x': 'COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml',
|
50
|
+
'faster_rcnn_R_50_DC5_3x': 'COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml',
|
51
|
+
'faster_rcnn_R_50_FPN_1x': 'COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml',
|
52
|
+
'faster_rcnn_R_50_FPN_3x': 'COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml',
|
53
|
+
'faster_rcnn_R_101_C4_3x': 'COCO-Detection/faster_rcnn_R_101_C4_3x.yaml',
|
54
|
+
'faster_rcnn_R_101_DC5_3x': 'COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml',
|
55
|
+
'faster_rcnn_R_101_FPN_3x': 'COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml',
|
56
|
+
'faster_rcnn_X_101_32x8d_FPN_3x': 'COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml',
|
57
|
+
|
58
|
+
# RetinaNet (Object Detection)
|
59
|
+
'retinanet_R_50_FPN_1x': 'COCO-Detection/retinanet_R_50_FPN_1x.yaml',
|
60
|
+
'retinanet_R_50_FPN_3x': 'COCO-Detection/retinanet_R_50_FPN_3x.yaml',
|
61
|
+
'retinanet_R_101_FPN_3x': 'COCO-Detection/retinanet_R_101_FPN_3x.yaml',
|
62
|
+
|
63
|
+
# Keypoint R-CNN (Keypoint Detection)
|
64
|
+
'keypoint_rcnn_R_50_FPN_1x': 'COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml',
|
65
|
+
'keypoint_rcnn_R_50_FPN_3x': 'COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml',
|
66
|
+
'keypoint_rcnn_R_101_FPN_3x': 'COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml',
|
67
|
+
'keypoint_rcnn_X_101_32x8d_FPN_3x': 'COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml',
|
68
|
+
|
69
|
+
# RPN (Region Proposal Network)
|
70
|
+
'rpn_R_50_C4_1x': 'COCO-Detection/rpn_R_50_C4_1x.yaml',
|
71
|
+
'rpn_R_50_FPN_1x': 'COCO-Detection/rpn_R_50_FPN_1x.yaml',
|
72
|
+
|
73
|
+
# Fast R-CNN
|
74
|
+
'fast_rcnn_R_50_FPN_1x': 'COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml',
|
75
|
+
}
|
76
|
+
|
77
|
+
def __init__(self, variant="mask_rcnn_R_50_FPN_3x", confidence_threshold=0.5, device="cpu"):
|
78
|
+
"""
|
79
|
+
Initialize Detectron2 predictor with specific model variant.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
variant: Model variant name (e.g., 'mask_rcnn_R_50_FPN_3x', 'faster_rcnn_X_101_32x8d_FPN_3x')
|
83
|
+
confidence_threshold: Detection confidence threshold (0.0-1.0)
|
84
|
+
device: Device to run on - 'cpu' or 'cuda'
|
85
|
+
|
86
|
+
Raises:
|
87
|
+
ValueError: If variant is not supported
|
88
|
+
"""
|
89
|
+
if variant not in self.SUPPORTED_CONFIGS:
|
90
|
+
raise ValueError(
|
91
|
+
f"Unsupported variant: '{variant}'. "
|
92
|
+
f"Choose from: {list(self.SUPPORTED_CONFIGS.keys())}"
|
93
|
+
)
|
94
|
+
|
95
|
+
self.variant = variant
|
96
|
+
config_file = self.SUPPORTED_CONFIGS[variant]
|
97
|
+
|
98
|
+
print(f"Loading Detectron2 model (variant: {variant})...")
|
99
|
+
cfg = get_cfg()
|
100
|
+
cfg.merge_from_file(model_zoo.get_config_file(config_file))
|
101
|
+
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config_file)
|
102
|
+
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidence_threshold
|
103
|
+
cfg.MODEL.DEVICE = device
|
104
|
+
|
105
|
+
self.predictor = DefaultPredictor(cfg)
|
106
|
+
|
107
|
+
# Get class names from metadata
|
108
|
+
dataset_name = cfg.DATASETS.TRAIN[0] if cfg.DATASETS.TRAIN else "coco_2017_val"
|
109
|
+
metadata = MetadataCatalog.get(dataset_name)
|
110
|
+
self.class_names = metadata.thing_classes
|
111
|
+
|
112
|
+
print(f"Detectron2 model loaded successfully (variant: {variant}).")
|
113
|
+
|
114
|
+
def predict(self, image: np.ndarray):
|
115
|
+
"""
|
116
|
+
Runs inference on an image and returns PixelFlow Detections.
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
pf.detections.Detections: PixelFlow Detections object containing all detected objects
|
120
|
+
"""
|
121
|
+
print("Running prediction...")
|
122
|
+
outputs = self.predictor(image)
|
123
|
+
|
124
|
+
# Use PixelFlow's existing converter for Detectron2
|
125
|
+
detections = pf.detections.from_detectron2(outputs)
|
126
|
+
|
127
|
+
print(f"Found {len(detections)} objects.")
|
128
|
+
return detections
|
@@ -0,0 +1,170 @@
|
|
1
|
+
from PIL import Image
|
2
|
+
import numpy as np
|
3
|
+
import cv2
|
4
|
+
|
5
|
+
try:
|
6
|
+
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
7
|
+
from qwen_vl_utils import process_vision_info
|
8
|
+
except ImportError:
|
9
|
+
print("="*50)
|
10
|
+
print("ERROR: Qwen2.5-VL dependencies not installed.")
|
11
|
+
print("Install with:")
|
12
|
+
print(" pip install git+https://github.com/huggingface/transformers")
|
13
|
+
print(" pip install qwen-vl-utils")
|
14
|
+
print("="*50)
|
15
|
+
raise
|
16
|
+
|
17
|
+
class Qwen2_5VLPredictor:
|
18
|
+
"""
|
19
|
+
Universal Qwen2.5-VL adapter for vision-language understanding.
|
20
|
+
|
21
|
+
Supports:
|
22
|
+
- Image understanding and description
|
23
|
+
- Visual question answering (VQA)
|
24
|
+
- Object recognition and counting
|
25
|
+
- Text extraction (OCR)
|
26
|
+
- Chart and diagram analysis
|
27
|
+
- Visual reasoning
|
28
|
+
"""
|
29
|
+
|
30
|
+
# Registry of all supported Qwen2.5-VL model variants
|
31
|
+
SUPPORTED_VARIANTS = {
|
32
|
+
'7b-instruct': 'Qwen/Qwen2.5-VL-7B-Instruct',
|
33
|
+
# Future variants can be added here:
|
34
|
+
# '2b-instruct': 'Qwen/Qwen2.5-VL-2B-Instruct',
|
35
|
+
# '72b-instruct': 'Qwen/Qwen2.5-VL-72B-Instruct',
|
36
|
+
}
|
37
|
+
|
38
|
+
def __init__(self, variant="7b-instruct", device="auto", torch_dtype="auto"):
|
39
|
+
"""
|
40
|
+
Initialize Qwen2.5-VL predictor with specific model variant.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
variant: Model size variant - '7b-instruct'
|
44
|
+
7b-instruct: 7 billion parameters, balanced performance
|
45
|
+
device: Device placement - 'auto', 'cpu', 'cuda', 'mps'
|
46
|
+
'auto' will automatically use best available device
|
47
|
+
torch_dtype: Precision - 'auto', 'float16', 'bfloat16', 'float32'
|
48
|
+
'auto' will choose based on device capabilities
|
49
|
+
|
50
|
+
Raises:
|
51
|
+
ValueError: If variant is not supported
|
52
|
+
|
53
|
+
Note:
|
54
|
+
This is a large model (7B parameters). First load will download ~7-14GB.
|
55
|
+
Recommended: 16GB+ RAM and GPU/MPS for reasonable performance.
|
56
|
+
"""
|
57
|
+
if variant not in self.SUPPORTED_VARIANTS:
|
58
|
+
raise ValueError(
|
59
|
+
f"Unsupported variant: '{variant}'. "
|
60
|
+
f"Choose from: {list(self.SUPPORTED_VARIANTS.keys())}"
|
61
|
+
)
|
62
|
+
|
63
|
+
self.variant = variant
|
64
|
+
model_name = self.SUPPORTED_VARIANTS[variant]
|
65
|
+
|
66
|
+
print(f"Loading Qwen2.5-VL model (variant: {variant}, model: {model_name})...")
|
67
|
+
print("Note: This is a 7B model and may take time to load on first run...")
|
68
|
+
|
69
|
+
# Load model and processor
|
70
|
+
self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
71
|
+
model_name,
|
72
|
+
torch_dtype=torch_dtype,
|
73
|
+
device_map=device
|
74
|
+
)
|
75
|
+
self.processor = AutoProcessor.from_pretrained(model_name)
|
76
|
+
|
77
|
+
print(f"Qwen2.5-VL model loaded successfully (variant: {variant}).")
|
78
|
+
print(f"Model device: {self.model.device}")
|
79
|
+
|
80
|
+
def predict(self, image: np.ndarray, prompt: str = "Describe this image in detail.") -> dict:
|
81
|
+
"""
|
82
|
+
Run vision-language understanding on an image.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
image: Input image as numpy array (BGR format from cv2)
|
86
|
+
prompt: Text prompt/question about the image
|
87
|
+
Examples:
|
88
|
+
- "Describe this image in detail."
|
89
|
+
- "What objects are in this image?"
|
90
|
+
- "How many people are visible?"
|
91
|
+
- "What is the text in this image?"
|
92
|
+
- "Analyze this chart."
|
93
|
+
|
94
|
+
Returns:
|
95
|
+
dict: {
|
96
|
+
'text': str, # Generated text response
|
97
|
+
'prompt': str, # Original prompt used
|
98
|
+
'variant': str # Model variant used
|
99
|
+
}
|
100
|
+
|
101
|
+
Example:
|
102
|
+
>>> predictor = Qwen2_5VLPredictor()
|
103
|
+
>>> result = predictor.predict(image, "What is in this image?")
|
104
|
+
>>> print(result['text'])
|
105
|
+
"""
|
106
|
+
print(f"Running Qwen2.5-VL inference with prompt: '{prompt[:50]}...'")
|
107
|
+
|
108
|
+
# Convert BGR (OpenCV format) to RGB (PIL format)
|
109
|
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
110
|
+
pil_image = Image.fromarray(image_rgb)
|
111
|
+
|
112
|
+
# Prepare conversation in Qwen format
|
113
|
+
messages = [
|
114
|
+
{
|
115
|
+
"role": "user",
|
116
|
+
"content": [
|
117
|
+
{"type": "image", "image": pil_image},
|
118
|
+
{"type": "text", "text": prompt},
|
119
|
+
],
|
120
|
+
}
|
121
|
+
]
|
122
|
+
|
123
|
+
# Apply chat template
|
124
|
+
text = self.processor.apply_chat_template(
|
125
|
+
messages,
|
126
|
+
tokenize=False,
|
127
|
+
add_generation_prompt=True
|
128
|
+
)
|
129
|
+
|
130
|
+
# Process vision information
|
131
|
+
image_inputs, video_inputs = process_vision_info(messages)
|
132
|
+
|
133
|
+
# Prepare inputs for model
|
134
|
+
inputs = self.processor(
|
135
|
+
text=[text],
|
136
|
+
images=image_inputs,
|
137
|
+
videos=video_inputs,
|
138
|
+
padding=True,
|
139
|
+
return_tensors="pt",
|
140
|
+
)
|
141
|
+
|
142
|
+
# Move inputs to model device
|
143
|
+
inputs = inputs.to(self.model.device)
|
144
|
+
|
145
|
+
# Generate response
|
146
|
+
generated_ids = self.model.generate(
|
147
|
+
**inputs,
|
148
|
+
max_new_tokens=512 # Adjust based on expected response length
|
149
|
+
)
|
150
|
+
|
151
|
+
# Trim input tokens from generated output
|
152
|
+
generated_ids_trimmed = [
|
153
|
+
out_ids[len(in_ids):]
|
154
|
+
for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
155
|
+
]
|
156
|
+
|
157
|
+
# Decode generated text
|
158
|
+
output_text = self.processor.batch_decode(
|
159
|
+
generated_ids_trimmed,
|
160
|
+
skip_special_tokens=True,
|
161
|
+
clean_up_tokenization_spaces=False
|
162
|
+
)[0]
|
163
|
+
|
164
|
+
print(f"Qwen2.5-VL inference complete. Generated {len(output_text)} characters.")
|
165
|
+
|
166
|
+
return {
|
167
|
+
'text': output_text,
|
168
|
+
'prompt': prompt,
|
169
|
+
'variant': self.variant
|
170
|
+
}
|
mozo/cli.py
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
"""
|
2
|
+
Command-line interface for Mozo.
|
3
|
+
|
4
|
+
Provides simple commands for starting the server and checking version.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import click
|
8
|
+
import uvicorn
|
9
|
+
|
10
|
+
|
11
|
+
@click.group()
|
12
|
+
def cli():
|
13
|
+
"""Mozo - Universal CV Model Server"""
|
14
|
+
pass
|
15
|
+
|
16
|
+
|
17
|
+
@cli.command()
|
18
|
+
@click.option('--host', default='0.0.0.0', help='Host to bind to')
|
19
|
+
@click.option('--port', default=8000, type=int, help='Port to bind to')
|
20
|
+
@click.option('--reload', is_flag=True, help='Enable auto-reload on code changes')
|
21
|
+
@click.option('--workers', default=1, type=int, help='Number of worker processes')
|
22
|
+
def start(host, port, reload, workers):
|
23
|
+
"""Start the Mozo model server"""
|
24
|
+
click.echo(f"Starting Mozo server on {host}:{port}...")
|
25
|
+
if reload:
|
26
|
+
click.echo("Auto-reload enabled (development mode)")
|
27
|
+
if workers > 1:
|
28
|
+
click.echo(f"Running with {workers} worker processes")
|
29
|
+
|
30
|
+
uvicorn.run(
|
31
|
+
"mozo.server:app",
|
32
|
+
host=host,
|
33
|
+
port=port,
|
34
|
+
reload=reload,
|
35
|
+
workers=workers if not reload else 1 # reload only works with 1 worker
|
36
|
+
)
|
37
|
+
|
38
|
+
|
39
|
+
@cli.command()
|
40
|
+
def version():
|
41
|
+
"""Show Mozo version"""
|
42
|
+
from mozo import __version__
|
43
|
+
click.echo(f"Mozo version {__version__}")
|
44
|
+
|
45
|
+
|
46
|
+
if __name__ == '__main__':
|
47
|
+
cli()
|
mozo/factory.py
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
"""
|
2
|
+
Model Factory for Mozo
|
3
|
+
|
4
|
+
Factory pattern implementation for dynamic model instantiation.
|
5
|
+
Handles loading adapter classes and creating model instances from registry configurations.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import importlib
|
9
|
+
from .registry import MODEL_REGISTRY
|
10
|
+
|
11
|
+
|
12
|
+
class ModelFactory:
|
13
|
+
"""
|
14
|
+
Factory class for creating model instances dynamically.
|
15
|
+
|
16
|
+
The factory reads from MODEL_REGISTRY to instantiate the correct adapter class
|
17
|
+
with the appropriate parameters for the requested model variant.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def __init__(self):
|
21
|
+
"""Initialize the model factory."""
|
22
|
+
self._adapter_cache = {} # Cache for loaded adapter classes
|
23
|
+
|
24
|
+
def _get_adapter_class(self, module_path, class_name):
|
25
|
+
"""
|
26
|
+
Dynamically import and return an adapter class.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
module_path: Python module path (e.g., 'mozo.adapters.detectron2')
|
30
|
+
class_name: Class name to import (e.g., 'Detectron2Predictor')
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
The adapter class
|
34
|
+
|
35
|
+
Raises:
|
36
|
+
ImportError: If module or class cannot be loaded
|
37
|
+
"""
|
38
|
+
cache_key = f"{module_path}.{class_name}"
|
39
|
+
|
40
|
+
# Return cached class if available
|
41
|
+
if cache_key in self._adapter_cache:
|
42
|
+
return self._adapter_cache[cache_key]
|
43
|
+
|
44
|
+
try:
|
45
|
+
# Dynamically import the module
|
46
|
+
module = importlib.import_module(module_path)
|
47
|
+
|
48
|
+
# Get the class from the module
|
49
|
+
adapter_class = getattr(module, class_name)
|
50
|
+
|
51
|
+
# Cache for future use
|
52
|
+
self._adapter_cache[cache_key] = adapter_class
|
53
|
+
|
54
|
+
return adapter_class
|
55
|
+
|
56
|
+
except ImportError as e:
|
57
|
+
raise ImportError(
|
58
|
+
f"Failed to import module '{module_path}': {e}"
|
59
|
+
) from e
|
60
|
+
except AttributeError as e:
|
61
|
+
raise ImportError(
|
62
|
+
f"Module '{module_path}' does not have class '{class_name}': {e}"
|
63
|
+
) from e
|
64
|
+
|
65
|
+
def create_model(self, family, variant):
|
66
|
+
"""
|
67
|
+
Create a model instance from family and variant identifiers.
|
68
|
+
|
69
|
+
Args:
|
70
|
+
family: Model family name (e.g., 'detectron2', 'depth_anything')
|
71
|
+
variant: Model variant name (e.g., 'mask_rcnn_R_50_FPN_3x', 'small')
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
Instantiated model predictor object
|
75
|
+
|
76
|
+
Raises:
|
77
|
+
ValueError: If family or variant is not found in registry
|
78
|
+
ImportError: If adapter class cannot be loaded
|
79
|
+
|
80
|
+
Example:
|
81
|
+
>>> factory = ModelFactory()
|
82
|
+
>>> model = factory.create_model('detectron2', 'mask_rcnn_R_50_FPN_3x')
|
83
|
+
>>> predictions = model.predict(image)
|
84
|
+
"""
|
85
|
+
# Validate family exists
|
86
|
+
if family not in MODEL_REGISTRY:
|
87
|
+
available_families = list(MODEL_REGISTRY.keys())
|
88
|
+
raise ValueError(
|
89
|
+
f"Unknown model family: '{family}'. "
|
90
|
+
f"Available families: {available_families}"
|
91
|
+
)
|
92
|
+
|
93
|
+
family_config = MODEL_REGISTRY[family]
|
94
|
+
|
95
|
+
# Validate variant exists for this family
|
96
|
+
if variant not in family_config['variants']:
|
97
|
+
available_variants = list(family_config['variants'].keys())
|
98
|
+
raise ValueError(
|
99
|
+
f"Unknown variant '{variant}' for family '{family}'. "
|
100
|
+
f"Available variants: {available_variants}"
|
101
|
+
)
|
102
|
+
|
103
|
+
# Get adapter class information
|
104
|
+
module_path = family_config['module']
|
105
|
+
class_name = family_config['adapter_class']
|
106
|
+
|
107
|
+
# Get variant-specific parameters
|
108
|
+
variant_params = family_config['variants'][variant]
|
109
|
+
|
110
|
+
# Load the adapter class
|
111
|
+
adapter_class = self._get_adapter_class(module_path, class_name)
|
112
|
+
|
113
|
+
# Instantiate the adapter with variant parameters
|
114
|
+
try:
|
115
|
+
model_instance = adapter_class(**variant_params)
|
116
|
+
return model_instance
|
117
|
+
except Exception as e:
|
118
|
+
raise RuntimeError(
|
119
|
+
f"Failed to instantiate {class_name} with parameters {variant_params}: {e}"
|
120
|
+
) from e
|
121
|
+
|
122
|
+
def get_available_families(self):
|
123
|
+
"""
|
124
|
+
Get list of all available model families.
|
125
|
+
|
126
|
+
Returns:
|
127
|
+
list: Model family names
|
128
|
+
"""
|
129
|
+
return list(MODEL_REGISTRY.keys())
|
130
|
+
|
131
|
+
def get_available_variants(self, family):
|
132
|
+
"""
|
133
|
+
Get list of all available variants for a model family.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
family: Model family name
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
list: Variant names for the family
|
140
|
+
|
141
|
+
Raises:
|
142
|
+
ValueError: If family not found
|
143
|
+
"""
|
144
|
+
if family not in MODEL_REGISTRY:
|
145
|
+
raise ValueError(f"Unknown model family: '{family}'")
|
146
|
+
return list(MODEL_REGISTRY[family]['variants'].keys())
|
147
|
+
|
148
|
+
def clear_cache(self):
|
149
|
+
"""Clear the adapter class cache."""
|
150
|
+
self._adapter_cache.clear()
|