nexaai 1.0.4rc13__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/__init__.py +71 -0
- nexaai/_stub.cp310-win_amd64.pyd +0 -0
- nexaai/_version.py +4 -0
- nexaai/asr.py +60 -0
- nexaai/asr_impl/__init__.py +0 -0
- nexaai/asr_impl/mlx_asr_impl.py +91 -0
- nexaai/asr_impl/pybind_asr_impl.py +43 -0
- nexaai/base.py +39 -0
- nexaai/binds/__init__.py +3 -0
- nexaai/binds/common_bind.cp310-win_amd64.pyd +0 -0
- nexaai/binds/embedder_bind.cp310-win_amd64.pyd +0 -0
- nexaai/binds/llm_bind.cp310-win_amd64.pyd +0 -0
- nexaai/binds/nexa_bridge.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml-base.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml-cpu.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml-cuda.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml-vulkan.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml.dll +0 -0
- nexaai/binds/nexa_llama_cpp/llama.dll +0 -0
- nexaai/binds/nexa_llama_cpp/mtmd.dll +0 -0
- nexaai/binds/nexa_llama_cpp/nexa_plugin.dll +0 -0
- nexaai/common.py +61 -0
- nexaai/cv.py +87 -0
- nexaai/cv_impl/__init__.py +0 -0
- nexaai/cv_impl/mlx_cv_impl.py +88 -0
- nexaai/cv_impl/pybind_cv_impl.py +31 -0
- nexaai/embedder.py +68 -0
- nexaai/embedder_impl/__init__.py +0 -0
- nexaai/embedder_impl/mlx_embedder_impl.py +114 -0
- nexaai/embedder_impl/pybind_embedder_impl.py +91 -0
- nexaai/image_gen.py +136 -0
- nexaai/image_gen_impl/__init__.py +0 -0
- nexaai/image_gen_impl/mlx_image_gen_impl.py +291 -0
- nexaai/image_gen_impl/pybind_image_gen_impl.py +84 -0
- nexaai/llm.py +89 -0
- nexaai/llm_impl/__init__.py +0 -0
- nexaai/llm_impl/mlx_llm_impl.py +249 -0
- nexaai/llm_impl/pybind_llm_impl.py +207 -0
- nexaai/rerank.py +51 -0
- nexaai/rerank_impl/__init__.py +0 -0
- nexaai/rerank_impl/mlx_rerank_impl.py +91 -0
- nexaai/rerank_impl/pybind_rerank_impl.py +42 -0
- nexaai/runtime.py +64 -0
- nexaai/tts.py +70 -0
- nexaai/tts_impl/__init__.py +0 -0
- nexaai/tts_impl/mlx_tts_impl.py +93 -0
- nexaai/tts_impl/pybind_tts_impl.py +42 -0
- nexaai/utils/avatar_fetcher.py +104 -0
- nexaai/utils/decode.py +18 -0
- nexaai/utils/model_manager.py +1195 -0
- nexaai/utils/progress_tracker.py +372 -0
- nexaai/vlm.py +120 -0
- nexaai/vlm_impl/__init__.py +0 -0
- nexaai/vlm_impl/mlx_vlm_impl.py +205 -0
- nexaai/vlm_impl/pybind_vlm_impl.py +228 -0
- nexaai-1.0.4rc13.dist-info/METADATA +26 -0
- nexaai-1.0.4rc13.dist-info/RECORD +59 -0
- nexaai-1.0.4rc13.dist-info/WHEEL +5 -0
- nexaai-1.0.4rc13.dist-info/top_level.txt +1 -0
nexaai/__init__.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NexaAI Python bindings for NexaSDK C-lib backend.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
# Add mlx_backend to Python path as individual module (only if it exists)
|
|
9
|
+
_current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
10
|
+
_mlx_backend_path = os.path.join(_current_dir, "mlx_backend")
|
|
11
|
+
# Only add to path if the directory exists (it won't on Windows)
|
|
12
|
+
if os.path.exists(_mlx_backend_path) and _mlx_backend_path not in sys.path:
|
|
13
|
+
sys.path.insert(0, _mlx_backend_path)
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from ._version import __version__
|
|
17
|
+
except ImportError:
|
|
18
|
+
# Fallback for development or when version file hasn't been generated yet
|
|
19
|
+
__version__ = "0.0.1"
|
|
20
|
+
|
|
21
|
+
# Import common configuration classes first (no external dependencies)
|
|
22
|
+
from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig
|
|
23
|
+
|
|
24
|
+
# Import new feature classes (no external dependencies in base classes)
|
|
25
|
+
from .llm import LLM
|
|
26
|
+
from .embedder import Embedder, EmbeddingConfig
|
|
27
|
+
from .vlm import VLM
|
|
28
|
+
from .asr import ASR, ASRConfig, ASRResult
|
|
29
|
+
from .cv import CVModel, CVModelConfig, CVResult, CVResults, CVCapabilities, BoundingBox
|
|
30
|
+
from .rerank import Reranker, RerankConfig
|
|
31
|
+
from .image_gen import ImageGen, ImageGenerationConfig, ImageSamplerConfig, SchedulerConfig, Image
|
|
32
|
+
from .tts import TTS, TTSConfig, TTSSamplerConfig, TTSResult
|
|
33
|
+
|
|
34
|
+
# Build __all__ list dynamically
|
|
35
|
+
__all__ = [
|
|
36
|
+
"__version__",
|
|
37
|
+
# Common configurations (always available)
|
|
38
|
+
"ModelConfig",
|
|
39
|
+
"GenerationConfig",
|
|
40
|
+
"ChatMessage",
|
|
41
|
+
"SamplerConfig",
|
|
42
|
+
"EmbeddingConfig",
|
|
43
|
+
|
|
44
|
+
"LLM",
|
|
45
|
+
"Embedder",
|
|
46
|
+
"VLM",
|
|
47
|
+
"ASR",
|
|
48
|
+
"CVModel",
|
|
49
|
+
"Reranker",
|
|
50
|
+
"ImageGen",
|
|
51
|
+
"TTS",
|
|
52
|
+
|
|
53
|
+
"ASRConfig",
|
|
54
|
+
"ASRResult",
|
|
55
|
+
"CVModelConfig",
|
|
56
|
+
"CVResult",
|
|
57
|
+
"CVResults",
|
|
58
|
+
"CVCapabilities",
|
|
59
|
+
"BoundingBox",
|
|
60
|
+
"RerankConfig",
|
|
61
|
+
"ImageGenerationConfig",
|
|
62
|
+
"ImageSamplerConfig",
|
|
63
|
+
"SchedulerConfig",
|
|
64
|
+
"Image",
|
|
65
|
+
"TTSConfig",
|
|
66
|
+
"TTSSamplerConfig",
|
|
67
|
+
"TTSResult",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
|
|
Binary file
|
nexaai/_version.py
ADDED
nexaai/asr.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from typing import List, Optional, Sequence, Tuple
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from nexaai.base import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class ASRConfig:
|
|
10
|
+
"""Configuration for ASR."""
|
|
11
|
+
timestamps: str = "none" # "none" | "segment" | "word"
|
|
12
|
+
beam_size: int = 5
|
|
13
|
+
stream: bool = False
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ASRResult:
|
|
18
|
+
"""Result from ASR processing."""
|
|
19
|
+
transcript: str
|
|
20
|
+
confidence_scores: Sequence[float]
|
|
21
|
+
timestamps: Sequence[Tuple[float, float]]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ASR(BaseModel):
|
|
25
|
+
"""Abstract base class for Automatic Speech Recognition models."""
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
"""Initialize base ASR class."""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def _load_from(cls,
|
|
33
|
+
model_path: str,
|
|
34
|
+
tokenizer_path: Optional[str] = None,
|
|
35
|
+
language: Optional[str] = None,
|
|
36
|
+
plugin_id: str = "llama_cpp",
|
|
37
|
+
device_id: Optional[str] = None
|
|
38
|
+
) -> 'ASR':
|
|
39
|
+
"""Load ASR model from local path, routing to appropriate implementation."""
|
|
40
|
+
if plugin_id == "mlx":
|
|
41
|
+
from nexaai.asr_impl.mlx_asr_impl import MLXASRImpl
|
|
42
|
+
return MLXASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
|
|
43
|
+
else:
|
|
44
|
+
from nexaai.asr_impl.pybind_asr_impl import PyBindASRImpl
|
|
45
|
+
return PyBindASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def transcribe(
|
|
49
|
+
self,
|
|
50
|
+
audio_path: str,
|
|
51
|
+
language: Optional[str] = None,
|
|
52
|
+
config: Optional[ASRConfig] = None,
|
|
53
|
+
) -> ASRResult:
|
|
54
|
+
"""Transcribe audio file to text."""
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def list_supported_languages(self) -> List[str]:
|
|
59
|
+
"""List supported languages."""
|
|
60
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Note: This code is generated by Cursor, not tested yet.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from nexaai.asr import ASR, ASRConfig, ASRResult
|
|
6
|
+
from nexaai.mlx_backend.asr.interface import MlxAsr as MLXASRInterface
|
|
7
|
+
from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MLXASRImpl(ASR):
|
|
11
|
+
def __init__(self):
|
|
12
|
+
"""Initialize MLX ASR implementation."""
|
|
13
|
+
super().__init__()
|
|
14
|
+
self._mlx_asr = None
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def _load_from(cls,
|
|
18
|
+
model_path: str,
|
|
19
|
+
tokenizer_path: Optional[str] = None,
|
|
20
|
+
language: Optional[str] = None,
|
|
21
|
+
plugin_id: str = "mlx",
|
|
22
|
+
device_id: Optional[str] = None
|
|
23
|
+
) -> 'MLXASRImpl':
|
|
24
|
+
"""Load ASR model from local path using MLX backend."""
|
|
25
|
+
try:
|
|
26
|
+
# MLX ASR interface is already imported
|
|
27
|
+
|
|
28
|
+
# Create instance and load MLX ASR
|
|
29
|
+
instance = cls()
|
|
30
|
+
instance._mlx_asr = MLXASRInterface(
|
|
31
|
+
model_path=model_path,
|
|
32
|
+
tokenizer_path=tokenizer_path,
|
|
33
|
+
language=language,
|
|
34
|
+
device=device_id
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
return instance
|
|
38
|
+
except Exception as e:
|
|
39
|
+
raise RuntimeError(f"Failed to load MLX ASR: {str(e)}")
|
|
40
|
+
|
|
41
|
+
def eject(self):
|
|
42
|
+
"""Destroy the model and free resources."""
|
|
43
|
+
if self._mlx_asr:
|
|
44
|
+
self._mlx_asr.destroy()
|
|
45
|
+
self._mlx_asr = None
|
|
46
|
+
|
|
47
|
+
def transcribe(
|
|
48
|
+
self,
|
|
49
|
+
audio_path: str,
|
|
50
|
+
language: Optional[str] = None,
|
|
51
|
+
config: Optional[ASRConfig] = None,
|
|
52
|
+
) -> ASRResult:
|
|
53
|
+
"""Transcribe audio file to text."""
|
|
54
|
+
if not self._mlx_asr:
|
|
55
|
+
raise RuntimeError("MLX ASR not loaded")
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# Convert our config to MLX format if provided
|
|
59
|
+
mlx_config = None
|
|
60
|
+
if config:
|
|
61
|
+
from nexaai.mlx_backend.ml import ASRConfig as MLXASRConfig
|
|
62
|
+
|
|
63
|
+
mlx_config = MLXASRConfig()
|
|
64
|
+
mlx_config.timestamps = config.timestamps
|
|
65
|
+
mlx_config.beam_size = config.beam_size
|
|
66
|
+
mlx_config.stream = config.stream
|
|
67
|
+
|
|
68
|
+
# Use MLX ASR transcription
|
|
69
|
+
result = self._mlx_asr.transcribe(audio_path, language, mlx_config)
|
|
70
|
+
|
|
71
|
+
# Convert MLX result to our format
|
|
72
|
+
return ASRResult(
|
|
73
|
+
transcript=result.transcript,
|
|
74
|
+
confidence_scores=result.confidence_scores,
|
|
75
|
+
timestamps=result.timestamps
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
except Exception as e:
|
|
79
|
+
raise RuntimeError(f"Failed to transcribe audio: {str(e)}")
|
|
80
|
+
|
|
81
|
+
def list_supported_languages(self) -> List[str]:
|
|
82
|
+
"""List supported languages."""
|
|
83
|
+
if not self._mlx_asr:
|
|
84
|
+
raise RuntimeError("MLX ASR not loaded")
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
return self._mlx_asr.list_supported_languages()
|
|
88
|
+
except Exception as e:
|
|
89
|
+
raise RuntimeError(f"Failed to list supported languages: {str(e)}")
|
|
90
|
+
|
|
91
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from nexaai.asr import ASR, ASRConfig, ASRResult
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PyBindASRImpl(ASR):
|
|
7
|
+
def __init__(self):
|
|
8
|
+
"""Initialize PyBind ASR implementation."""
|
|
9
|
+
super().__init__()
|
|
10
|
+
# TODO: Add PyBind-specific initialization
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def _load_from(cls,
|
|
14
|
+
model_path: str,
|
|
15
|
+
tokenizer_path: Optional[str] = None,
|
|
16
|
+
language: Optional[str] = None,
|
|
17
|
+
plugin_id: str = "llama_cpp",
|
|
18
|
+
device_id: Optional[str] = None
|
|
19
|
+
) -> 'PyBindASRImpl':
|
|
20
|
+
"""Load ASR model from local path using PyBind backend."""
|
|
21
|
+
# TODO: Implement PyBind ASR loading
|
|
22
|
+
instance = cls()
|
|
23
|
+
return instance
|
|
24
|
+
|
|
25
|
+
def eject(self):
|
|
26
|
+
"""Destroy the model and free resources."""
|
|
27
|
+
# TODO: Implement PyBind ASR cleanup
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def transcribe(
|
|
31
|
+
self,
|
|
32
|
+
audio_path: str,
|
|
33
|
+
language: Optional[str] = None,
|
|
34
|
+
config: Optional[ASRConfig] = None,
|
|
35
|
+
) -> ASRResult:
|
|
36
|
+
"""Transcribe audio file to text."""
|
|
37
|
+
# TODO: Implement PyBind ASR transcription
|
|
38
|
+
raise NotImplementedError("PyBind ASR transcription not yet implemented")
|
|
39
|
+
|
|
40
|
+
def list_supported_languages(self) -> List[str]:
|
|
41
|
+
"""List supported languages."""
|
|
42
|
+
# TODO: Implement PyBind ASR language listing
|
|
43
|
+
raise NotImplementedError("PyBind ASR language listing not yet implemented")
|
nexaai/base.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from nexaai.common import ProfilingData
|
|
3
|
+
from nexaai.utils.model_manager import auto_download_model
|
|
4
|
+
|
|
5
|
+
class BaseModel(ABC):
|
|
6
|
+
|
|
7
|
+
def __enter__(self):
|
|
8
|
+
return self
|
|
9
|
+
|
|
10
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
11
|
+
self.eject()
|
|
12
|
+
|
|
13
|
+
def __del__(self):
|
|
14
|
+
self.eject()
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
@auto_download_model
|
|
18
|
+
def from_(cls, name_or_path: str, **kwargs) -> "BaseModel":
|
|
19
|
+
"""
|
|
20
|
+
initialize model from (1) HF (2) if not found, then from local path
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
return cls._load_from(name_or_path, **kwargs)
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def _load_from(cls, name_or_path: str, **kwargs) -> "BaseModel":
|
|
28
|
+
"""
|
|
29
|
+
Model-specific loading logic. Must be implemented by each model type.
|
|
30
|
+
Called after model is available locally.
|
|
31
|
+
"""
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def eject(self):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def get_profiling_data(self) -> ProfilingData:
|
|
39
|
+
pass
|
nexaai/binds/__init__.py
ADDED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
nexaai/common.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import TypedDict, Literal, Optional, List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ChatMessage(TypedDict):
|
|
6
|
+
role: Literal["user", "assistant", "system"]
|
|
7
|
+
content: str
|
|
8
|
+
|
|
9
|
+
class MultiModalMessageContent(TypedDict):
|
|
10
|
+
type: Literal["text", "image", "audio", "video"]
|
|
11
|
+
text: Optional[str]
|
|
12
|
+
url: Optional[str]
|
|
13
|
+
path: Optional[str]
|
|
14
|
+
|
|
15
|
+
class MultiModalMessage(TypedDict):
|
|
16
|
+
role: Literal["user", "assistant", "system"]
|
|
17
|
+
content: List[MultiModalMessageContent]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class SamplerConfig:
|
|
22
|
+
temperature: float = 0.8
|
|
23
|
+
top_p: float = 0.95
|
|
24
|
+
top_k: int = 40
|
|
25
|
+
repetition_penalty: float = 1.0
|
|
26
|
+
presence_penalty: float = 0.0
|
|
27
|
+
frequency_penalty: float = 0.0
|
|
28
|
+
seed: int = -1
|
|
29
|
+
grammar_path: str = None
|
|
30
|
+
grammar_string: str = None
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class GenerationConfig:
|
|
34
|
+
max_tokens: int = 1024
|
|
35
|
+
stop_words: list[str] = None
|
|
36
|
+
sampler_config: SamplerConfig = None
|
|
37
|
+
image_paths: list[str] = None
|
|
38
|
+
audio_paths: list[str] = None
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ModelConfig:
|
|
42
|
+
n_ctx: int = 4096
|
|
43
|
+
n_threads: int = None
|
|
44
|
+
n_threads_batch: int = None
|
|
45
|
+
n_batch: int = 512
|
|
46
|
+
n_ubatch: int = 512
|
|
47
|
+
n_seq_max: int = 1
|
|
48
|
+
n_gpu_layers: int = 999
|
|
49
|
+
chat_template_path: str = None
|
|
50
|
+
chat_template_content: str = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True) # Read-only
|
|
54
|
+
class ProfilingData:
|
|
55
|
+
start_time: int
|
|
56
|
+
end_time: int
|
|
57
|
+
prompt_start_time: int = None
|
|
58
|
+
prompt_end_time: int = None
|
|
59
|
+
decode_start_time: int = None
|
|
60
|
+
decode_ent_time: int = None
|
|
61
|
+
first_token_time: int = None
|
nexaai/cv.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from nexaai.base import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class BoundingBox:
|
|
10
|
+
"""Generic bounding box structure."""
|
|
11
|
+
x: float # X coordinate (normalized or pixel, depends on model)
|
|
12
|
+
y: float # Y coordinate (normalized or pixel, depends on model)
|
|
13
|
+
width: float # Width
|
|
14
|
+
height: float # Height
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class CVResult:
|
|
19
|
+
"""Generic detection/classification result."""
|
|
20
|
+
image_paths: Optional[List[str]] = None # Output image paths
|
|
21
|
+
image_count: int = 0 # Number of output images
|
|
22
|
+
class_id: int = 0 # Class ID (example: ConvNext)
|
|
23
|
+
confidence: float = 0.0 # Confidence score [0.0-1.0]
|
|
24
|
+
bbox: Optional[BoundingBox] = None # Bounding box (example: YOLO)
|
|
25
|
+
text: Optional[str] = None # Text result (example: OCR)
|
|
26
|
+
embedding: Optional[List[float]] = None # Feature embedding (example: CLIP embedding)
|
|
27
|
+
embedding_dim: int = 0 # Embedding dimension
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class CVResults:
|
|
32
|
+
"""Generic CV inference result."""
|
|
33
|
+
results: List[CVResult] # Array of CV results
|
|
34
|
+
result_count: int # Number of CV results
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CVCapabilities:
|
|
38
|
+
"""CV capabilities enum."""
|
|
39
|
+
OCR = 0 # OCR
|
|
40
|
+
CLASSIFICATION = 1 # Classification
|
|
41
|
+
SEGMENTATION = 2 # Segmentation
|
|
42
|
+
CUSTOM = 3 # Custom task
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class CVModelConfig:
|
|
47
|
+
"""CV model preprocessing configuration."""
|
|
48
|
+
capabilities: int # CVCapabilities
|
|
49
|
+
|
|
50
|
+
# MLX-OCR
|
|
51
|
+
det_model_path: Optional[str] = None # Detection model path
|
|
52
|
+
rec_model_path: Optional[str] = None # Recognition model path
|
|
53
|
+
|
|
54
|
+
# QNN
|
|
55
|
+
model_path: Optional[str] = None # Model path
|
|
56
|
+
system_library_path: Optional[str] = None # System library path
|
|
57
|
+
backend_library_path: Optional[str] = None # Backend library path
|
|
58
|
+
extension_library_path: Optional[str] = None # Extension library path
|
|
59
|
+
config_file_path: Optional[str] = None # Config file path
|
|
60
|
+
char_dict_path: Optional[str] = None # Character dictionary path
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class CVModel(BaseModel):
|
|
64
|
+
"""Abstract base class for generic computer vision models."""
|
|
65
|
+
|
|
66
|
+
def __init__(self):
|
|
67
|
+
"""Initialize base CV model class."""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def _load_from(cls,
|
|
72
|
+
config: CVModelConfig,
|
|
73
|
+
plugin_id: str = "llama_cpp",
|
|
74
|
+
device_id: Optional[str] = None
|
|
75
|
+
) -> 'CVModel':
|
|
76
|
+
"""Load CV model from configuration, routing to appropriate implementation."""
|
|
77
|
+
if plugin_id == "mlx":
|
|
78
|
+
from nexaai.cv_impl.mlx_cv_impl import MLXCVImpl
|
|
79
|
+
return MLXCVImpl._load_from(config, plugin_id, device_id)
|
|
80
|
+
else:
|
|
81
|
+
from nexaai.cv_impl.pybind_cv_impl import PyBindCVImpl
|
|
82
|
+
return PyBindCVImpl._load_from(config, plugin_id, device_id)
|
|
83
|
+
|
|
84
|
+
@abstractmethod
|
|
85
|
+
def infer(self, input_image_path: str) -> CVResults:
|
|
86
|
+
"""Perform inference on image."""
|
|
87
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Note: This code is generated by Cursor, not tested yet.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from nexaai.cv import CVModel, CVModelConfig, CVResults
|
|
7
|
+
from nexaai.mlx_backend.cv.interface import CVModel as MLXCVInterface, create_cv_model
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MLXCVImpl(CVModel):
|
|
11
|
+
def __init__(self):
|
|
12
|
+
"""Initialize MLX CV implementation."""
|
|
13
|
+
super().__init__()
|
|
14
|
+
self._mlx_cv = None
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def _load_from(cls,
|
|
18
|
+
config: CVModelConfig,
|
|
19
|
+
plugin_id: str = "mlx",
|
|
20
|
+
device_id: Optional[str] = None
|
|
21
|
+
) -> 'MLXCVImpl':
|
|
22
|
+
"""Load CV model from configuration using MLX backend."""
|
|
23
|
+
try:
|
|
24
|
+
# Get MLX config class
|
|
25
|
+
from nexaai.mlx_backend.ml import CVModelConfig as MLXCVModelConfig
|
|
26
|
+
|
|
27
|
+
# Convert our config to MLX format
|
|
28
|
+
mlx_config = MLXCVModelConfig(
|
|
29
|
+
capabilities=config.capabilities,
|
|
30
|
+
det_model_path=config.det_model_path,
|
|
31
|
+
rec_model_path=config.rec_model_path,
|
|
32
|
+
model_path=config.model_path,
|
|
33
|
+
system_library_path=config.system_library_path,
|
|
34
|
+
backend_library_path=config.backend_library_path,
|
|
35
|
+
extension_library_path=config.extension_library_path,
|
|
36
|
+
config_file_path=config.config_file_path,
|
|
37
|
+
char_dict_path=config.char_dict_path
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Create instance and load MLX CV model
|
|
41
|
+
instance = cls()
|
|
42
|
+
instance._mlx_cv = create_cv_model(mlx_config, device_id)
|
|
43
|
+
|
|
44
|
+
return instance
|
|
45
|
+
except Exception as e:
|
|
46
|
+
raise RuntimeError(f"Failed to load MLX CV: {str(e)}")
|
|
47
|
+
|
|
48
|
+
def eject(self):
|
|
49
|
+
"""Destroy the model and free resources."""
|
|
50
|
+
if self._mlx_cv:
|
|
51
|
+
self._mlx_cv.destroy()
|
|
52
|
+
self._mlx_cv = None
|
|
53
|
+
|
|
54
|
+
def infer(self, input_image_path: str) -> CVResults:
|
|
55
|
+
"""Perform inference on image."""
|
|
56
|
+
if not self._mlx_cv:
|
|
57
|
+
raise RuntimeError("MLX CV not loaded")
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
# Use MLX CV inference
|
|
61
|
+
result = self._mlx_cv.infer(input_image_path)
|
|
62
|
+
|
|
63
|
+
# Convert MLX result to our format
|
|
64
|
+
from nexaai.cv import CVResult
|
|
65
|
+
|
|
66
|
+
our_results = []
|
|
67
|
+
for mlx_result in result.results:
|
|
68
|
+
our_result = CVResult(
|
|
69
|
+
image_paths=mlx_result.image_paths,
|
|
70
|
+
image_count=mlx_result.image_count,
|
|
71
|
+
class_id=mlx_result.class_id,
|
|
72
|
+
confidence=mlx_result.confidence,
|
|
73
|
+
bbox=mlx_result.bbox,
|
|
74
|
+
text=mlx_result.text,
|
|
75
|
+
embedding=mlx_result.embedding,
|
|
76
|
+
embedding_dim=mlx_result.embedding_dim
|
|
77
|
+
)
|
|
78
|
+
our_results.append(our_result)
|
|
79
|
+
|
|
80
|
+
return CVResults(
|
|
81
|
+
results=our_results,
|
|
82
|
+
result_count=result.result_count
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
except Exception as e:
|
|
86
|
+
raise RuntimeError(f"Failed to perform CV inference: {str(e)}")
|
|
87
|
+
|
|
88
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from nexaai.cv import CVModel, CVModelConfig, CVResults
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PyBindCVImpl(CVModel):
|
|
7
|
+
def __init__(self):
|
|
8
|
+
"""Initialize PyBind CV implementation."""
|
|
9
|
+
super().__init__()
|
|
10
|
+
# TODO: Add PyBind-specific initialization
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def _load_from(cls,
|
|
14
|
+
config: CVModelConfig,
|
|
15
|
+
plugin_id: str = "llama_cpp",
|
|
16
|
+
device_id: Optional[str] = None
|
|
17
|
+
) -> 'PyBindCVImpl':
|
|
18
|
+
"""Load CV model from configuration using PyBind backend."""
|
|
19
|
+
# TODO: Implement PyBind CV loading
|
|
20
|
+
instance = cls()
|
|
21
|
+
return instance
|
|
22
|
+
|
|
23
|
+
def eject(self):
|
|
24
|
+
"""Destroy the model and free resources."""
|
|
25
|
+
# TODO: Implement PyBind CV cleanup
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def infer(self, input_image_path: str) -> CVResults:
|
|
29
|
+
"""Perform inference on image."""
|
|
30
|
+
# TODO: Implement PyBind CV inference
|
|
31
|
+
raise NotImplementedError("PyBind CV inference not yet implemented")
|