nexaai 1.0.21rc5__cp313-cp313-win_arm64.whl → 1.0.21rc14__cp313-cp313-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/__init__.py +95 -95
- nexaai/_stub.cp313-win_arm64.pyd +0 -0
- nexaai/_version.py +4 -1
- nexaai/asr.py +68 -65
- nexaai/asr_impl/mlx_asr_impl.py +92 -92
- nexaai/asr_impl/pybind_asr_impl.py +127 -44
- nexaai/base.py +39 -39
- nexaai/binds/__init__.py +6 -5
- nexaai/binds/asr_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/common_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/cpu_gpu/ggml-base.dll +0 -0
- nexaai/binds/cpu_gpu/ggml-cpu.dll +0 -0
- nexaai/binds/cpu_gpu/ggml-opencl.dll +0 -0
- nexaai/binds/cpu_gpu/ggml.dll +0 -0
- nexaai/binds/cpu_gpu/mtmd.dll +0 -0
- nexaai/binds/cpu_gpu/nexa_cpu_gpu.dll +0 -0
- nexaai/binds/cpu_gpu/nexa_plugin.dll +0 -0
- nexaai/binds/embedder_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/libcrypto-3-arm64.dll +0 -0
- nexaai/binds/libssl-3-arm64.dll +0 -0
- nexaai/binds/llm_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/nexa_bridge.dll +0 -0
- nexaai/binds/npu/convnext-sdk.dll +0 -0
- nexaai/binds/npu/embed-gemma-sdk.dll +0 -0
- nexaai/binds/npu/ggml-base.dll +0 -0
- nexaai/binds/npu/ggml-cpu.dll +0 -0
- nexaai/binds/npu/ggml-opencl.dll +0 -0
- nexaai/binds/npu/ggml.dll +0 -0
- nexaai/binds/npu/granite-nano-sdk.dll +0 -0
- nexaai/binds/npu/granite4-sdk.dll +0 -0
- nexaai/binds/npu/jina-rerank-sdk.dll +0 -0
- nexaai/binds/npu/liquid-sdk.dll +0 -0
- nexaai/binds/npu/llama3-3b-sdk.dll +0 -0
- nexaai/binds/npu/nexa-mm-process.dll +0 -0
- nexaai/binds/npu/nexa-sampling.dll +0 -0
- nexaai/binds/npu/nexa_plugin.dll +0 -0
- nexaai/binds/npu/omni-neural-sdk.dll +0 -0
- nexaai/binds/npu/openblas.dll +0 -0
- nexaai/binds/npu/paddleocr-sdk.dll +0 -0
- nexaai/binds/npu/parakeet-sdk.dll +0 -0
- nexaai/binds/npu/phi3-5-sdk.dll +0 -0
- nexaai/binds/npu/phi4-sdk.dll +0 -0
- nexaai/binds/npu/pyannote-sdk.dll +0 -0
- nexaai/binds/npu/qwen3-4b-sdk.dll +0 -0
- nexaai/binds/npu/qwen3vl-sdk.dll +0 -0
- nexaai/binds/npu/qwen3vl-vision.dll +0 -0
- nexaai/binds/npu/yolov12-sdk.dll +0 -0
- nexaai/binds/npu/zlib1.dll +0 -0
- nexaai/binds/rerank_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/vlm_bind.cp313-win_arm64.pyd +0 -0
- nexaai/common.py +105 -105
- nexaai/cv.py +93 -93
- nexaai/cv_impl/mlx_cv_impl.py +89 -89
- nexaai/cv_impl/pybind_cv_impl.py +32 -32
- nexaai/embedder.py +73 -73
- nexaai/embedder_impl/mlx_embedder_impl.py +118 -118
- nexaai/embedder_impl/pybind_embedder_impl.py +96 -96
- nexaai/image_gen.py +141 -141
- nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -292
- nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -85
- nexaai/llm.py +98 -98
- nexaai/llm_impl/mlx_llm_impl.py +271 -271
- nexaai/llm_impl/pybind_llm_impl.py +220 -220
- nexaai/log.py +92 -92
- nexaai/rerank.py +57 -57
- nexaai/rerank_impl/mlx_rerank_impl.py +94 -94
- nexaai/rerank_impl/pybind_rerank_impl.py +136 -136
- nexaai/runtime.py +68 -68
- nexaai/runtime_error.py +24 -24
- nexaai/tts.py +75 -75
- nexaai/tts_impl/mlx_tts_impl.py +94 -94
- nexaai/tts_impl/pybind_tts_impl.py +43 -43
- nexaai/utils/decode.py +17 -17
- nexaai/utils/manifest_utils.py +531 -531
- nexaai/utils/model_manager.py +1562 -1562
- nexaai/utils/model_types.py +49 -49
- nexaai/utils/progress_tracker.py +384 -384
- nexaai/utils/quantization_utils.py +245 -245
- nexaai/vlm.py +129 -129
- nexaai/vlm_impl/mlx_vlm_impl.py +258 -258
- nexaai/vlm_impl/pybind_vlm_impl.py +256 -256
- {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/METADATA +1 -1
- nexaai-1.0.21rc14.dist-info/RECORD +154 -0
- nexaai/binds/nexaml/FLAC.dll +0 -0
- nexaai/binds/nexaml/fftw3.dll +0 -0
- nexaai/binds/nexaml/fftw3f.dll +0 -0
- nexaai/binds/nexaml/ggml-base.dll +0 -0
- nexaai/binds/nexaml/ggml-cpu.dll +0 -0
- nexaai/binds/nexaml/ggml-opencl.dll +0 -0
- nexaai/binds/nexaml/ggml.dll +0 -0
- nexaai/binds/nexaml/libmp3lame.DLL +0 -0
- nexaai/binds/nexaml/mpg123.dll +0 -0
- nexaai/binds/nexaml/nexa-mm-process.dll +0 -0
- nexaai/binds/nexaml/nexa-sampling.dll +0 -0
- nexaai/binds/nexaml/nexa_plugin.dll +0 -0
- nexaai/binds/nexaml/nexaproc.dll +0 -0
- nexaai/binds/nexaml/ogg.dll +0 -0
- nexaai/binds/nexaml/opus.dll +0 -0
- nexaai/binds/nexaml/qwen3-vl.dll +0 -0
- nexaai/binds/nexaml/qwen3vl-vision.dll +0 -0
- nexaai/binds/nexaml/vorbis.dll +0 -0
- nexaai/binds/nexaml/vorbisenc.dll +0 -0
- nexaai-1.0.21rc5.dist-info/RECORD +0 -162
- {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/WHEEL +0 -0
- {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/top_level.txt +0 -0
nexaai/vlm.py
CHANGED
|
@@ -1,130 +1,130 @@
|
|
|
1
|
-
from typing import Generator, Optional, List, Dict, Any, Union
|
|
2
|
-
from abc import abstractmethod
|
|
3
|
-
import queue
|
|
4
|
-
import threading
|
|
5
|
-
import base64
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
|
|
9
|
-
from nexaai.base import BaseModel, ProfilingData
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class VLM(BaseModel):
|
|
13
|
-
def __init__(self, m_cfg: ModelConfig = ModelConfig()):
|
|
14
|
-
"""Initialize base VLM class."""
|
|
15
|
-
self._m_cfg = m_cfg
|
|
16
|
-
self._cancel_event = threading.Event() # New attribute to control cancellation
|
|
17
|
-
|
|
18
|
-
@classmethod
|
|
19
|
-
def _load_from(cls,
|
|
20
|
-
local_path: str,
|
|
21
|
-
mmproj_path: str = None,
|
|
22
|
-
model_name: Optional[str] = None,
|
|
23
|
-
m_cfg: ModelConfig = ModelConfig(),
|
|
24
|
-
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
25
|
-
device_id: Optional[str] = None,
|
|
26
|
-
**kwargs
|
|
27
|
-
) -> 'VLM':
|
|
28
|
-
"""Load VLM model from local path, routing to appropriate implementation.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
local_path: Path to the main model file
|
|
32
|
-
mmproj_path: Path to the multimodal projection file
|
|
33
|
-
m_cfg: Model configuration
|
|
34
|
-
plugin_id: Plugin identifier
|
|
35
|
-
device_id: Optional device ID (not used in current binding)
|
|
36
|
-
|
|
37
|
-
Returns:
|
|
38
|
-
VLM instance
|
|
39
|
-
"""
|
|
40
|
-
# Check plugin_id value for routing - handle both enum and string
|
|
41
|
-
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
42
|
-
|
|
43
|
-
if plugin_value == "mlx":
|
|
44
|
-
from nexaai.vlm_impl.mlx_vlm_impl import MlxVlmImpl
|
|
45
|
-
return MlxVlmImpl._load_from(local_path, mmproj_path, model_name, m_cfg, plugin_id, device_id)
|
|
46
|
-
else:
|
|
47
|
-
from nexaai.vlm_impl.pybind_vlm_impl import PyBindVLMImpl
|
|
48
|
-
return PyBindVLMImpl._load_from(local_path, mmproj_path, model_name, m_cfg, plugin_id, device_id)
|
|
49
|
-
|
|
50
|
-
@abstractmethod
|
|
51
|
-
def eject(self):
|
|
52
|
-
"""Release the model from memory."""
|
|
53
|
-
pass
|
|
54
|
-
|
|
55
|
-
def cancel_generation(self):
|
|
56
|
-
"""Signal to cancel any ongoing stream generation."""
|
|
57
|
-
self._cancel_event.set()
|
|
58
|
-
|
|
59
|
-
def reset_cancel(self):
|
|
60
|
-
"""Reset the cancel event. Call before starting a new generation if needed."""
|
|
61
|
-
self._cancel_event.clear()
|
|
62
|
-
|
|
63
|
-
@abstractmethod
|
|
64
|
-
def reset(self):
|
|
65
|
-
"""
|
|
66
|
-
Reset the VLM model context and KV cache. If not reset, the model will skip the number of evaluated tokens and treat tokens after those as the new incremental tokens.
|
|
67
|
-
If your past chat history changed, or you are starting a new chat, you should always reset the model before running generate.
|
|
68
|
-
"""
|
|
69
|
-
pass
|
|
70
|
-
|
|
71
|
-
def _process_image(self, image: Union[bytes, str, Path]) -> bytes:
|
|
72
|
-
"""Process image input to bytes format.
|
|
73
|
-
|
|
74
|
-
Args:
|
|
75
|
-
image: Image data as bytes, base64 string, or file path
|
|
76
|
-
|
|
77
|
-
Returns:
|
|
78
|
-
Image data as bytes
|
|
79
|
-
"""
|
|
80
|
-
if isinstance(image, bytes):
|
|
81
|
-
return image
|
|
82
|
-
elif isinstance(image, str):
|
|
83
|
-
# Check if it's a base64 string
|
|
84
|
-
if image.startswith('data:image'):
|
|
85
|
-
# Extract base64 data from data URL
|
|
86
|
-
base64_data = image.split(',')[1] if ',' in image else image
|
|
87
|
-
return base64.b64decode(base64_data)
|
|
88
|
-
else:
|
|
89
|
-
# Assume it's a file path
|
|
90
|
-
with open(image, 'rb') as f:
|
|
91
|
-
return f.read()
|
|
92
|
-
elif isinstance(image, Path):
|
|
93
|
-
with open(image, 'rb') as f:
|
|
94
|
-
return f.read()
|
|
95
|
-
else:
|
|
96
|
-
raise ValueError(f"Unsupported image type: {type(image)}")
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
@abstractmethod
|
|
100
|
-
def apply_chat_template(
|
|
101
|
-
self,
|
|
102
|
-
messages: List[MultiModalMessage],
|
|
103
|
-
tools: Optional[List[Dict[str, Any]]] = None,
|
|
104
|
-
enable_thinking: bool = True
|
|
105
|
-
) -> str:
|
|
106
|
-
"""Apply the chat template to multimodal messages."""
|
|
107
|
-
pass
|
|
108
|
-
|
|
109
|
-
@abstractmethod
|
|
110
|
-
def generate_stream(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> Generator[str, None, None]:
|
|
111
|
-
"""Generate text with streaming."""
|
|
112
|
-
pass
|
|
113
|
-
|
|
114
|
-
@abstractmethod
|
|
115
|
-
def generate(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> str:
|
|
116
|
-
"""
|
|
117
|
-
Generate text without streaming.
|
|
118
|
-
|
|
119
|
-
Args:
|
|
120
|
-
prompt (str): The prompt to generate text from. For chat models, this is the chat messages after chat template is applied.
|
|
121
|
-
g_cfg (GenerationConfig): Generation configuration.
|
|
122
|
-
|
|
123
|
-
Returns:
|
|
124
|
-
str: The generated text.
|
|
125
|
-
"""
|
|
126
|
-
pass
|
|
127
|
-
|
|
128
|
-
def get_profiling_data(self) -> Optional[ProfilingData]:
|
|
129
|
-
"""Get profiling data from the last generation."""
|
|
1
|
+
from typing import Generator, Optional, List, Dict, Any, Union
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
import queue
|
|
4
|
+
import threading
|
|
5
|
+
import base64
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
|
|
9
|
+
from nexaai.base import BaseModel, ProfilingData
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class VLM(BaseModel):
|
|
13
|
+
def __init__(self, m_cfg: ModelConfig = ModelConfig()):
|
|
14
|
+
"""Initialize base VLM class."""
|
|
15
|
+
self._m_cfg = m_cfg
|
|
16
|
+
self._cancel_event = threading.Event() # New attribute to control cancellation
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def _load_from(cls,
|
|
20
|
+
local_path: str,
|
|
21
|
+
mmproj_path: str = None,
|
|
22
|
+
model_name: Optional[str] = None,
|
|
23
|
+
m_cfg: ModelConfig = ModelConfig(),
|
|
24
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
25
|
+
device_id: Optional[str] = None,
|
|
26
|
+
**kwargs
|
|
27
|
+
) -> 'VLM':
|
|
28
|
+
"""Load VLM model from local path, routing to appropriate implementation.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
local_path: Path to the main model file
|
|
32
|
+
mmproj_path: Path to the multimodal projection file
|
|
33
|
+
m_cfg: Model configuration
|
|
34
|
+
plugin_id: Plugin identifier
|
|
35
|
+
device_id: Optional device ID (not used in current binding)
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
VLM instance
|
|
39
|
+
"""
|
|
40
|
+
# Check plugin_id value for routing - handle both enum and string
|
|
41
|
+
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
42
|
+
|
|
43
|
+
if plugin_value == "mlx":
|
|
44
|
+
from nexaai.vlm_impl.mlx_vlm_impl import MlxVlmImpl
|
|
45
|
+
return MlxVlmImpl._load_from(local_path, mmproj_path, model_name, m_cfg, plugin_id, device_id)
|
|
46
|
+
else:
|
|
47
|
+
from nexaai.vlm_impl.pybind_vlm_impl import PyBindVLMImpl
|
|
48
|
+
return PyBindVLMImpl._load_from(local_path, mmproj_path, model_name, m_cfg, plugin_id, device_id)
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def eject(self):
|
|
52
|
+
"""Release the model from memory."""
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
def cancel_generation(self):
|
|
56
|
+
"""Signal to cancel any ongoing stream generation."""
|
|
57
|
+
self._cancel_event.set()
|
|
58
|
+
|
|
59
|
+
def reset_cancel(self):
|
|
60
|
+
"""Reset the cancel event. Call before starting a new generation if needed."""
|
|
61
|
+
self._cancel_event.clear()
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def reset(self):
|
|
65
|
+
"""
|
|
66
|
+
Reset the VLM model context and KV cache. If not reset, the model will skip the number of evaluated tokens and treat tokens after those as the new incremental tokens.
|
|
67
|
+
If your past chat history changed, or you are starting a new chat, you should always reset the model before running generate.
|
|
68
|
+
"""
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
def _process_image(self, image: Union[bytes, str, Path]) -> bytes:
|
|
72
|
+
"""Process image input to bytes format.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
image: Image data as bytes, base64 string, or file path
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Image data as bytes
|
|
79
|
+
"""
|
|
80
|
+
if isinstance(image, bytes):
|
|
81
|
+
return image
|
|
82
|
+
elif isinstance(image, str):
|
|
83
|
+
# Check if it's a base64 string
|
|
84
|
+
if image.startswith('data:image'):
|
|
85
|
+
# Extract base64 data from data URL
|
|
86
|
+
base64_data = image.split(',')[1] if ',' in image else image
|
|
87
|
+
return base64.b64decode(base64_data)
|
|
88
|
+
else:
|
|
89
|
+
# Assume it's a file path
|
|
90
|
+
with open(image, 'rb') as f:
|
|
91
|
+
return f.read()
|
|
92
|
+
elif isinstance(image, Path):
|
|
93
|
+
with open(image, 'rb') as f:
|
|
94
|
+
return f.read()
|
|
95
|
+
else:
|
|
96
|
+
raise ValueError(f"Unsupported image type: {type(image)}")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
def apply_chat_template(
|
|
101
|
+
self,
|
|
102
|
+
messages: List[MultiModalMessage],
|
|
103
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
104
|
+
enable_thinking: bool = True
|
|
105
|
+
) -> str:
|
|
106
|
+
"""Apply the chat template to multimodal messages."""
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def generate_stream(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> Generator[str, None, None]:
|
|
111
|
+
"""Generate text with streaming."""
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
@abstractmethod
|
|
115
|
+
def generate(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> str:
|
|
116
|
+
"""
|
|
117
|
+
Generate text without streaming.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
prompt (str): The prompt to generate text from. For chat models, this is the chat messages after chat template is applied.
|
|
121
|
+
g_cfg (GenerationConfig): Generation configuration.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
str: The generated text.
|
|
125
|
+
"""
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
def get_profiling_data(self) -> Optional[ProfilingData]:
|
|
129
|
+
"""Get profiling data from the last generation."""
|
|
130
130
|
pass
|