nexaai 1.0.20__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.21__cp310-cp310-macosx_13_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/__init__.py +12 -0
- nexaai/_stub.cpython-310-darwin.so +0 -0
- nexaai/_version.py +1 -1
- nexaai/asr.py +10 -6
- nexaai/asr_impl/pybind_asr_impl.py +98 -15
- nexaai/binds/__init__.py +2 -0
- nexaai/binds/asr_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/cpu_gpu/libnexa_plugin.dylib +0 -0
- nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/libnexa_bridge.dylib +0 -0
- nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/rerank_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/vlm_bind.cpython-310-darwin.so +0 -0
- nexaai/common.py +1 -0
- nexaai/cv.py +2 -1
- nexaai/embedder.py +4 -3
- nexaai/embedder_impl/mlx_embedder_impl.py +3 -1
- nexaai/embedder_impl/pybind_embedder_impl.py +3 -2
- nexaai/image_gen.py +2 -1
- nexaai/llm.py +5 -3
- nexaai/llm_impl/mlx_llm_impl.py +2 -0
- nexaai/llm_impl/pybind_llm_impl.py +2 -0
- nexaai/mlx_backend/vlm/interface.py +5 -2
- nexaai/rerank.py +5 -3
- nexaai/rerank_impl/mlx_rerank_impl.py +2 -0
- nexaai/rerank_impl/pybind_rerank_impl.py +109 -16
- nexaai/runtime_error.py +24 -0
- nexaai/tts.py +2 -1
- nexaai/utils/manifest_utils.py +10 -6
- nexaai/utils/model_manager.py +139 -8
- nexaai/vlm.py +4 -2
- nexaai/vlm_impl/mlx_vlm_impl.py +3 -2
- nexaai/vlm_impl/pybind_vlm_impl.py +33 -7
- {nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/METADATA +1 -2
- {nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/RECORD +37 -34
- {nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/WHEEL +0 -0
- {nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/top_level.txt +0 -0
nexaai/__init__.py
CHANGED
|
@@ -24,6 +24,13 @@ from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig, P
|
|
|
24
24
|
# Import logging functionality
|
|
25
25
|
from .log import set_logger, get_error_message
|
|
26
26
|
|
|
27
|
+
# Import runtime errors
|
|
28
|
+
from .runtime_error import (
|
|
29
|
+
NexaRuntimeError,
|
|
30
|
+
ContextLengthExceededError,
|
|
31
|
+
GenerationError
|
|
32
|
+
)
|
|
33
|
+
|
|
27
34
|
# Create alias for PluginID to be accessible as plugin_id
|
|
28
35
|
plugin_id = PluginID
|
|
29
36
|
|
|
@@ -52,6 +59,11 @@ __all__ = [
|
|
|
52
59
|
# Logging functionality
|
|
53
60
|
"set_logger",
|
|
54
61
|
"get_error_message",
|
|
62
|
+
|
|
63
|
+
# Runtime errors
|
|
64
|
+
"NexaRuntimeError",
|
|
65
|
+
"ContextLengthExceededError",
|
|
66
|
+
"GenerationError",
|
|
55
67
|
|
|
56
68
|
"LLM",
|
|
57
69
|
"Embedder",
|
|
Binary file
|
nexaai/_version.py
CHANGED
nexaai/asr.py
CHANGED
|
@@ -3,7 +3,7 @@ from abc import abstractmethod
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
|
|
5
5
|
from nexaai.base import BaseModel
|
|
6
|
-
from nexaai.common import PluginID
|
|
6
|
+
from nexaai.common import PluginID, ModelConfig
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
@dataclass
|
|
@@ -25,17 +25,20 @@ class ASRResult:
|
|
|
25
25
|
class ASR(BaseModel):
|
|
26
26
|
"""Abstract base class for Automatic Speech Recognition models."""
|
|
27
27
|
|
|
28
|
-
def __init__(self):
|
|
28
|
+
def __init__(self, m_cfg: ModelConfig = ModelConfig()):
|
|
29
29
|
"""Initialize base ASR class."""
|
|
30
|
-
|
|
30
|
+
self._m_cfg = m_cfg
|
|
31
31
|
|
|
32
32
|
@classmethod
|
|
33
33
|
def _load_from(cls,
|
|
34
34
|
model_path: str,
|
|
35
|
+
model_name: Optional[str] = None,
|
|
35
36
|
tokenizer_path: Optional[str] = None,
|
|
36
37
|
language: Optional[str] = None,
|
|
38
|
+
m_cfg: ModelConfig = ModelConfig(),
|
|
37
39
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
38
|
-
device_id: Optional[str] = None
|
|
40
|
+
device_id: Optional[str] = None,
|
|
41
|
+
**kwargs
|
|
39
42
|
) -> 'ASR':
|
|
40
43
|
"""Load ASR model from local path, routing to appropriate implementation."""
|
|
41
44
|
# Check plugin_id value for routing - handle both enum and string
|
|
@@ -43,10 +46,11 @@ class ASR(BaseModel):
|
|
|
43
46
|
|
|
44
47
|
if plugin_value == "mlx":
|
|
45
48
|
from nexaai.asr_impl.mlx_asr_impl import MLXASRImpl
|
|
46
|
-
return MLXASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
|
|
49
|
+
return MLXASRImpl._load_from(model_path, model_name, tokenizer_path, language, m_cfg, plugin_id, device_id)
|
|
47
50
|
else:
|
|
48
51
|
from nexaai.asr_impl.pybind_asr_impl import PyBindASRImpl
|
|
49
|
-
return PyBindASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
|
|
52
|
+
return PyBindASRImpl._load_from(model_path, model_name, tokenizer_path, language, m_cfg, plugin_id, device_id)
|
|
53
|
+
|
|
50
54
|
|
|
51
55
|
@abstractmethod
|
|
52
56
|
def transcribe(
|
|
@@ -1,32 +1,78 @@
|
|
|
1
1
|
from typing import List, Optional, Union
|
|
2
2
|
|
|
3
|
-
from nexaai.common import PluginID
|
|
3
|
+
from nexaai.common import PluginID, ModelConfig
|
|
4
4
|
from nexaai.asr import ASR, ASRConfig, ASRResult
|
|
5
|
+
from nexaai.binds import asr_bind, common_bind
|
|
6
|
+
from nexaai.runtime import _ensure_runtime
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
class PyBindASRImpl(ASR):
|
|
8
|
-
def __init__(self):
|
|
9
|
-
"""
|
|
10
|
-
super().__init__()
|
|
11
|
-
#
|
|
10
|
+
def __init__(self, handle: any, m_cfg: ModelConfig = ModelConfig()):
|
|
11
|
+
"""Private constructor, should not be called directly."""
|
|
12
|
+
super().__init__(m_cfg)
|
|
13
|
+
self._handle = handle # This is a py::capsule
|
|
14
|
+
self._model_config = None
|
|
12
15
|
|
|
13
16
|
@classmethod
|
|
14
17
|
def _load_from(cls,
|
|
15
18
|
model_path: str,
|
|
19
|
+
model_name: Optional[str] = None,
|
|
16
20
|
tokenizer_path: Optional[str] = None,
|
|
17
21
|
language: Optional[str] = None,
|
|
22
|
+
m_cfg: ModelConfig = ModelConfig(),
|
|
18
23
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
19
24
|
device_id: Optional[str] = None
|
|
20
25
|
) -> 'PyBindASRImpl':
|
|
21
26
|
"""Load ASR model from local path using PyBind backend."""
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
_ensure_runtime()
|
|
28
|
+
|
|
29
|
+
# Create model config
|
|
30
|
+
config = common_bind.ModelConfig()
|
|
31
|
+
|
|
32
|
+
config.n_ctx = m_cfg.n_ctx
|
|
33
|
+
if m_cfg.n_threads is not None:
|
|
34
|
+
config.n_threads = m_cfg.n_threads
|
|
35
|
+
if m_cfg.n_threads_batch is not None:
|
|
36
|
+
config.n_threads_batch = m_cfg.n_threads_batch
|
|
37
|
+
if m_cfg.n_batch is not None:
|
|
38
|
+
config.n_batch = m_cfg.n_batch
|
|
39
|
+
if m_cfg.n_ubatch is not None:
|
|
40
|
+
config.n_ubatch = m_cfg.n_ubatch
|
|
41
|
+
if m_cfg.n_seq_max is not None:
|
|
42
|
+
config.n_seq_max = m_cfg.n_seq_max
|
|
43
|
+
config.n_gpu_layers = m_cfg.n_gpu_layers
|
|
44
|
+
|
|
45
|
+
# handle chat template strings
|
|
46
|
+
if m_cfg.chat_template_path:
|
|
47
|
+
config.chat_template_path = m_cfg.chat_template_path
|
|
48
|
+
|
|
49
|
+
if m_cfg.chat_template_content:
|
|
50
|
+
config.chat_template_content = m_cfg.chat_template_content
|
|
51
|
+
|
|
52
|
+
# Convert plugin_id to string
|
|
53
|
+
plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else str(plugin_id)
|
|
54
|
+
|
|
55
|
+
# Create ASR handle using the binding
|
|
56
|
+
handle = asr_bind.ml_asr_create(
|
|
57
|
+
model_path=model_path,
|
|
58
|
+
model_name=model_name,
|
|
59
|
+
tokenizer_path=tokenizer_path,
|
|
60
|
+
model_config=config,
|
|
61
|
+
language=language,
|
|
62
|
+
plugin_id=plugin_id_str,
|
|
63
|
+
device_id=device_id,
|
|
64
|
+
license_id=None, # Optional
|
|
65
|
+
license_key=None # Optional
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return cls(handle, m_cfg)
|
|
25
69
|
|
|
26
70
|
def eject(self):
|
|
27
|
-
"""
|
|
28
|
-
#
|
|
29
|
-
|
|
71
|
+
"""Release the model from memory."""
|
|
72
|
+
# py::capsule handles cleanup automatically
|
|
73
|
+
if hasattr(self, '_handle') and self._handle is not None:
|
|
74
|
+
del self._handle
|
|
75
|
+
self._handle = None
|
|
30
76
|
|
|
31
77
|
def transcribe(
|
|
32
78
|
self,
|
|
@@ -35,10 +81,47 @@ class PyBindASRImpl(ASR):
|
|
|
35
81
|
config: Optional[ASRConfig] = None,
|
|
36
82
|
) -> ASRResult:
|
|
37
83
|
"""Transcribe audio file to text."""
|
|
38
|
-
|
|
39
|
-
|
|
84
|
+
if self._handle is None:
|
|
85
|
+
raise RuntimeError("ASR model not loaded. Call _load_from first.")
|
|
86
|
+
|
|
87
|
+
# Convert ASRConfig to binding format if provided
|
|
88
|
+
asr_config = None
|
|
89
|
+
if config:
|
|
90
|
+
asr_config = asr_bind.ASRConfig()
|
|
91
|
+
asr_config.timestamps = config.timestamps
|
|
92
|
+
asr_config.beam_size = config.beam_size
|
|
93
|
+
asr_config.stream = config.stream
|
|
94
|
+
|
|
95
|
+
# Perform transcription using the binding
|
|
96
|
+
result_dict = asr_bind.ml_asr_transcribe(
|
|
97
|
+
handle=self._handle,
|
|
98
|
+
audio_path=audio_path,
|
|
99
|
+
language=language,
|
|
100
|
+
config=asr_config
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Convert result to ASRResult
|
|
104
|
+
transcript = result_dict.get("transcript", "")
|
|
105
|
+
confidence_scores = result_dict.get("confidence_scores")
|
|
106
|
+
timestamps = result_dict.get("timestamps")
|
|
107
|
+
|
|
108
|
+
# Convert timestamps to the expected format
|
|
109
|
+
timestamp_pairs = []
|
|
110
|
+
if timestamps:
|
|
111
|
+
for start, end in timestamps:
|
|
112
|
+
timestamp_pairs.append((float(start), float(end)))
|
|
113
|
+
|
|
114
|
+
return ASRResult(
|
|
115
|
+
transcript=transcript,
|
|
116
|
+
confidence_scores=confidence_scores or [],
|
|
117
|
+
timestamps=timestamp_pairs
|
|
118
|
+
)
|
|
40
119
|
|
|
41
120
|
def list_supported_languages(self) -> List[str]:
|
|
42
121
|
"""List supported languages."""
|
|
43
|
-
|
|
44
|
-
|
|
122
|
+
if self._handle is None:
|
|
123
|
+
raise RuntimeError("ASR model not loaded. Call _load_from first.")
|
|
124
|
+
|
|
125
|
+
# Get supported languages using the binding
|
|
126
|
+
languages = asr_bind.ml_asr_list_supported_languages(handle=self._handle)
|
|
127
|
+
return languages
|
nexaai/binds/__init__.py
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
nexaai/common.py
CHANGED
nexaai/cv.py
CHANGED
|
@@ -73,7 +73,8 @@ class CVModel(BaseModel):
|
|
|
73
73
|
_: str, # TODO: remove this argument, this is a hack to make api design happy
|
|
74
74
|
config: CVModelConfig,
|
|
75
75
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
76
|
-
device_id: Optional[str] = None
|
|
76
|
+
device_id: Optional[str] = None,
|
|
77
|
+
**kwargs
|
|
77
78
|
) -> 'CVModel':
|
|
78
79
|
"""Load CV model from configuration, routing to appropriate implementation."""
|
|
79
80
|
# Check plugin_id value for routing - handle both enum and string
|
nexaai/embedder.py
CHANGED
|
@@ -22,12 +22,13 @@ class Embedder(BaseModel):
|
|
|
22
22
|
pass
|
|
23
23
|
|
|
24
24
|
@classmethod
|
|
25
|
-
def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
|
|
25
|
+
def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP, **kwargs):
|
|
26
26
|
"""
|
|
27
27
|
Load an embedder from model files, routing to appropriate implementation.
|
|
28
28
|
|
|
29
29
|
Args:
|
|
30
30
|
model_path: Path to the model file
|
|
31
|
+
model_name: Name of the model
|
|
31
32
|
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
32
33
|
plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
|
|
33
34
|
|
|
@@ -39,10 +40,10 @@ class Embedder(BaseModel):
|
|
|
39
40
|
|
|
40
41
|
if plugin_value == "mlx":
|
|
41
42
|
from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
|
|
42
|
-
return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
|
|
43
|
+
return MLXEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
|
|
43
44
|
else:
|
|
44
45
|
from nexaai.embedder_impl.pybind_embedder_impl import PyBindEmbedderImpl
|
|
45
|
-
return PyBindEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
|
|
46
|
+
return PyBindEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
|
|
46
47
|
|
|
47
48
|
@abstractmethod
|
|
48
49
|
def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
|
|
@@ -14,12 +14,13 @@ class MLXEmbedderImpl(Embedder):
|
|
|
14
14
|
self._mlx_embedder = None
|
|
15
15
|
|
|
16
16
|
@classmethod
|
|
17
|
-
def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
|
|
17
|
+
def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
|
|
18
18
|
"""
|
|
19
19
|
Load an embedder from model files using MLX backend.
|
|
20
20
|
|
|
21
21
|
Args:
|
|
22
22
|
model_path: Path to the model file
|
|
23
|
+
model_name: Name of the model
|
|
23
24
|
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
24
25
|
plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
|
|
25
26
|
|
|
@@ -34,6 +35,7 @@ class MLXEmbedderImpl(Embedder):
|
|
|
34
35
|
# This will automatically detect if it's JinaV2 or generic model and route correctly
|
|
35
36
|
instance._mlx_embedder = create_embedder(
|
|
36
37
|
model_path=model_path,
|
|
38
|
+
# model_name=model_name, # FIXME: For MLX Embedder, model_name is not used
|
|
37
39
|
tokenizer_path=tokenizer_file
|
|
38
40
|
)
|
|
39
41
|
|
|
@@ -16,12 +16,13 @@ class PyBindEmbedderImpl(Embedder):
|
|
|
16
16
|
self._handle = _handle_ptr
|
|
17
17
|
|
|
18
18
|
@classmethod
|
|
19
|
-
def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
|
|
19
|
+
def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
|
|
20
20
|
"""
|
|
21
21
|
Load an embedder from model files
|
|
22
22
|
|
|
23
23
|
Args:
|
|
24
24
|
model_path: Path to the model file
|
|
25
|
+
model_name: Name of the model
|
|
25
26
|
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
26
27
|
plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
|
|
27
28
|
|
|
@@ -32,7 +33,7 @@ class PyBindEmbedderImpl(Embedder):
|
|
|
32
33
|
# Convert enum to string for C++ binding
|
|
33
34
|
plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
34
35
|
# New parameter order: model_path, plugin_id, tokenizer_path (optional)
|
|
35
|
-
handle = embedder_bind.ml_embedder_create(model_path, plugin_id_str, tokenizer_file)
|
|
36
|
+
handle = embedder_bind.ml_embedder_create(model_path, model_name, plugin_id_str, tokenizer_file)
|
|
36
37
|
return cls(handle)
|
|
37
38
|
|
|
38
39
|
def eject(self):
|
nexaai/image_gen.py
CHANGED
|
@@ -71,7 +71,8 @@ class ImageGen(BaseModel):
|
|
|
71
71
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
72
72
|
device_id: Optional[str] = None,
|
|
73
73
|
float16: bool = True,
|
|
74
|
-
quantize: bool = False
|
|
74
|
+
quantize: bool = False,
|
|
75
|
+
**kwargs
|
|
75
76
|
) -> 'ImageGen':
|
|
76
77
|
"""Load image generation model from local path, routing to appropriate implementation."""
|
|
77
78
|
# Check plugin_id value for routing - handle both enum and string
|
nexaai/llm.py
CHANGED
|
@@ -15,10 +15,12 @@ class LLM(BaseModel):
|
|
|
15
15
|
@classmethod
|
|
16
16
|
def _load_from(cls,
|
|
17
17
|
local_path: str,
|
|
18
|
+
model_name: Optional[str] = None,
|
|
18
19
|
tokenizer_path: Optional[str] = None,
|
|
19
20
|
m_cfg: ModelConfig = ModelConfig(),
|
|
20
21
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
21
|
-
device_id: Optional[str] = None
|
|
22
|
+
device_id: Optional[str] = None,
|
|
23
|
+
**kwargs
|
|
22
24
|
) -> 'LLM':
|
|
23
25
|
"""Load model from local path, routing to appropriate implementation."""
|
|
24
26
|
# Check plugin_id value for routing - handle both enum and string
|
|
@@ -26,10 +28,10 @@ class LLM(BaseModel):
|
|
|
26
28
|
|
|
27
29
|
if plugin_value == "mlx":
|
|
28
30
|
from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
|
|
29
|
-
return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
|
|
31
|
+
return MLXLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
|
|
30
32
|
else:
|
|
31
33
|
from nexaai.llm_impl.pybind_llm_impl import PyBindLLMImpl
|
|
32
|
-
return PyBindLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
|
|
34
|
+
return PyBindLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
|
|
33
35
|
|
|
34
36
|
def cancel_generation(self):
|
|
35
37
|
"""Signal to cancel any ongoing stream generation."""
|
nexaai/llm_impl/mlx_llm_impl.py
CHANGED
|
@@ -16,6 +16,7 @@ class MLXLLMImpl(LLM):
|
|
|
16
16
|
@classmethod
|
|
17
17
|
def _load_from(cls,
|
|
18
18
|
local_path: str,
|
|
19
|
+
model_name: Optional[str] = None,
|
|
19
20
|
tokenizer_path: Optional[str] = None,
|
|
20
21
|
m_cfg: ModelConfig = ModelConfig(),
|
|
21
22
|
plugin_id: Union[PluginID, str] = PluginID.MLX,
|
|
@@ -40,6 +41,7 @@ class MLXLLMImpl(LLM):
|
|
|
40
41
|
instance = cls(m_cfg)
|
|
41
42
|
instance._mlx_llm = MLXLLMInterface(
|
|
42
43
|
model_path=local_path,
|
|
44
|
+
# model_name=model_name, # FIXME: For MLX LLM, model_name is not used
|
|
43
45
|
tokenizer_path=tokenizer_path or local_path,
|
|
44
46
|
config=mlx_config,
|
|
45
47
|
device=device_id
|
|
@@ -19,6 +19,7 @@ class PyBindLLMImpl(LLM):
|
|
|
19
19
|
@classmethod
|
|
20
20
|
def _load_from(cls,
|
|
21
21
|
local_path: str,
|
|
22
|
+
model_name: Optional[str] = None,
|
|
22
23
|
tokenizer_path: Optional[str] = None,
|
|
23
24
|
m_cfg: ModelConfig = ModelConfig(),
|
|
24
25
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
@@ -55,6 +56,7 @@ class PyBindLLMImpl(LLM):
|
|
|
55
56
|
plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
56
57
|
handle = llm_bind.ml_llm_create(
|
|
57
58
|
model_path=local_path,
|
|
59
|
+
model_name=model_name,
|
|
58
60
|
tokenizer_path=tokenizer_path,
|
|
59
61
|
model_config=config,
|
|
60
62
|
plugin_id=plugin_id_str,
|
|
@@ -482,8 +482,12 @@ class VLM(ProfilingMixin):
|
|
|
482
482
|
|
|
483
483
|
def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True) -> str:
|
|
484
484
|
"""Apply chat template to messages with optional tools support."""
|
|
485
|
+
if self.model_name in ["qwen3vl", "qwen3vl-4b", "qwen3vl-4b-thinking", "qwen3vl-8b", "qwen3vl-8b-thinking"]:
|
|
486
|
+
return apply_chat_template_qwen3_vl(messages, num_images=0, num_audios=0, tools=tools, enable_thinking=enable_thinking)
|
|
487
|
+
if self.model_name == "qwen3vl-moe":
|
|
488
|
+
return apply_chat_template_qwen3_vl_moe(messages, num_images=0, num_audios=0, tools=tools, enable_thinking=enable_thinking)
|
|
489
|
+
|
|
485
490
|
if hasattr(self.processor, "apply_chat_template"):
|
|
486
|
-
# Convert ChatMessage objects to dictionaries for the processor
|
|
487
491
|
messages_dict = [{"role": msg.role, "content": msg.content} for msg in messages]
|
|
488
492
|
|
|
489
493
|
parsed_tools = None
|
|
@@ -492,7 +496,6 @@ class VLM(ProfilingMixin):
|
|
|
492
496
|
|
|
493
497
|
result = apply_chat_template(self.processor, self.model.config, messages_dict, add_generation_prompt=True, enable_thinking=enable_thinking, tools=parsed_tools)
|
|
494
498
|
return result
|
|
495
|
-
# Fallback: join messages
|
|
496
499
|
return "\n".join([f"{m.role}: {m.content}" for m in messages])
|
|
497
500
|
|
|
498
501
|
def apply_chat_template_with_media(self, messages: Sequence[ChatMessage], num_images: int = 0, num_audios: int = 0, tools: Optional[str] = None, enable_thinking: bool = True) -> str:
|
nexaai/rerank.py
CHANGED
|
@@ -24,9 +24,11 @@ class Reranker(BaseModel):
|
|
|
24
24
|
@classmethod
|
|
25
25
|
def _load_from(cls,
|
|
26
26
|
model_path: str,
|
|
27
|
+
model_name: str = None,
|
|
27
28
|
tokenizer_file: str = "tokenizer.json",
|
|
28
29
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
29
|
-
device_id: Optional[str] = None
|
|
30
|
+
device_id: Optional[str] = None,
|
|
31
|
+
**kwargs
|
|
30
32
|
) -> 'Reranker':
|
|
31
33
|
"""Load reranker model from local path, routing to appropriate implementation."""
|
|
32
34
|
# Check plugin_id value for routing - handle both enum and string
|
|
@@ -34,10 +36,10 @@ class Reranker(BaseModel):
|
|
|
34
36
|
|
|
35
37
|
if plugin_value == "mlx":
|
|
36
38
|
from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
|
|
37
|
-
return MLXRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
|
|
39
|
+
return MLXRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
|
|
38
40
|
else:
|
|
39
41
|
from nexaai.rerank_impl.pybind_rerank_impl import PyBindRerankImpl
|
|
40
|
-
return PyBindRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
|
|
42
|
+
return PyBindRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
|
|
41
43
|
|
|
42
44
|
@abstractmethod
|
|
43
45
|
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
@@ -17,6 +17,7 @@ class MLXRerankImpl(Reranker):
|
|
|
17
17
|
@classmethod
|
|
18
18
|
def _load_from(cls,
|
|
19
19
|
model_path: str,
|
|
20
|
+
model_name: str = None,
|
|
20
21
|
tokenizer_file: str = "tokenizer.json",
|
|
21
22
|
plugin_id: Union[PluginID, str] = PluginID.MLX,
|
|
22
23
|
device_id: Optional[str] = None
|
|
@@ -29,6 +30,7 @@ class MLXRerankImpl(Reranker):
|
|
|
29
30
|
instance = cls()
|
|
30
31
|
instance._mlx_reranker = create_reranker(
|
|
31
32
|
model_path=model_path,
|
|
33
|
+
# model_name=model_name, # FIXME: For MLX Reranker, model_name is not used
|
|
32
34
|
tokenizer_path=tokenizer_file,
|
|
33
35
|
device=device_id
|
|
34
36
|
)
|
|
@@ -1,36 +1,89 @@
|
|
|
1
1
|
from typing import List, Optional, Sequence, Union
|
|
2
|
+
import numpy as np
|
|
2
3
|
|
|
3
4
|
from nexaai.common import PluginID
|
|
4
5
|
from nexaai.rerank import Reranker, RerankConfig
|
|
6
|
+
from nexaai.binds import rerank_bind, common_bind
|
|
7
|
+
from nexaai.runtime import _ensure_runtime
|
|
5
8
|
|
|
6
9
|
|
|
7
10
|
class PyBindRerankImpl(Reranker):
|
|
8
|
-
def __init__(self):
|
|
9
|
-
"""
|
|
11
|
+
def __init__(self, _handle_ptr):
|
|
12
|
+
"""
|
|
13
|
+
Internal initializer
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
_handle_ptr: Capsule handle to the C++ reranker object
|
|
17
|
+
"""
|
|
10
18
|
super().__init__()
|
|
11
|
-
|
|
19
|
+
self._handle = _handle_ptr
|
|
12
20
|
|
|
13
21
|
@classmethod
|
|
14
22
|
def _load_from(cls,
|
|
15
23
|
model_path: str,
|
|
24
|
+
model_name: str = None,
|
|
16
25
|
tokenizer_file: str = "tokenizer.json",
|
|
17
26
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
18
27
|
device_id: Optional[str] = None
|
|
19
28
|
) -> 'PyBindRerankImpl':
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
29
|
+
"""
|
|
30
|
+
Load reranker model from local path using PyBind backend.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
model_path: Path to the model file
|
|
34
|
+
model_name: Name of the model (optional)
|
|
35
|
+
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
36
|
+
plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
|
|
37
|
+
device_id: Device ID to use for the model (optional)
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
PyBindRerankImpl instance
|
|
41
|
+
"""
|
|
42
|
+
_ensure_runtime()
|
|
43
|
+
|
|
44
|
+
# Convert enum to string for C++ binding
|
|
45
|
+
plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
46
|
+
|
|
47
|
+
# Create model config
|
|
48
|
+
model_config = common_bind.ModelConfig()
|
|
49
|
+
|
|
50
|
+
# Create reranker handle with new API signature
|
|
51
|
+
handle = rerank_bind.ml_reranker_create(
|
|
52
|
+
model_path,
|
|
53
|
+
model_name,
|
|
54
|
+
tokenizer_file,
|
|
55
|
+
model_config,
|
|
56
|
+
plugin_id_str,
|
|
57
|
+
device_id
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return cls(handle)
|
|
24
61
|
|
|
25
62
|
def eject(self):
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
|
|
63
|
+
"""
|
|
64
|
+
Clean up resources and destroy the reranker
|
|
65
|
+
"""
|
|
66
|
+
# Destructor of the handle will unload the model correctly
|
|
67
|
+
if hasattr(self, '_handle') and self._handle is not None:
|
|
68
|
+
del self._handle
|
|
69
|
+
self._handle = None
|
|
29
70
|
|
|
30
71
|
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
|
|
72
|
+
"""
|
|
73
|
+
Load model from path.
|
|
74
|
+
|
|
75
|
+
Note: This method is not typically used directly. Use _load_from instead.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
model_path: Path to the model file
|
|
79
|
+
extra_data: Additional data (unused)
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
True if successful
|
|
83
|
+
"""
|
|
84
|
+
# This method is part of the BaseModel interface but typically not used
|
|
85
|
+
# directly for PyBind implementations since _load_from handles creation
|
|
86
|
+
raise NotImplementedError("Use _load_from class method to load models")
|
|
34
87
|
|
|
35
88
|
def rerank(
|
|
36
89
|
self,
|
|
@@ -38,6 +91,46 @@ class PyBindRerankImpl(Reranker):
|
|
|
38
91
|
documents: Sequence[str],
|
|
39
92
|
config: Optional[RerankConfig] = None,
|
|
40
93
|
) -> List[float]:
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
|
|
94
|
+
"""
|
|
95
|
+
Rerank documents given a query.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
query: Query text as UTF-8 string
|
|
99
|
+
documents: List of document texts to rerank
|
|
100
|
+
config: Optional reranking configuration
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of ranking scores (one per document)
|
|
104
|
+
"""
|
|
105
|
+
if self._handle is None:
|
|
106
|
+
raise RuntimeError("Reranker handle is None. Model may have been ejected.")
|
|
107
|
+
|
|
108
|
+
# Use default config if not provided
|
|
109
|
+
if config is None:
|
|
110
|
+
config = RerankConfig()
|
|
111
|
+
|
|
112
|
+
# Create bind config
|
|
113
|
+
bind_config = rerank_bind.RerankConfig()
|
|
114
|
+
bind_config.batch_size = config.batch_size
|
|
115
|
+
bind_config.normalize = config.normalize
|
|
116
|
+
bind_config.normalize_method = config.normalize_method
|
|
117
|
+
|
|
118
|
+
# Convert documents to list if needed
|
|
119
|
+
documents_list = list(documents)
|
|
120
|
+
|
|
121
|
+
# Call the binding which returns a dict with scores and profile_data
|
|
122
|
+
result = rerank_bind.ml_reranker_rerank(
|
|
123
|
+
self._handle,
|
|
124
|
+
query,
|
|
125
|
+
documents_list,
|
|
126
|
+
bind_config
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Extract scores from result dict
|
|
130
|
+
scores_array = result.get("scores", np.array([]))
|
|
131
|
+
|
|
132
|
+
# Convert numpy array to list of floats
|
|
133
|
+
if isinstance(scores_array, np.ndarray):
|
|
134
|
+
return scores_array.tolist()
|
|
135
|
+
else:
|
|
136
|
+
return []
|
nexaai/runtime_error.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Runtime errors for Nexa SDK operations."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class NexaRuntimeError(Exception):
|
|
5
|
+
"""Base class for Nexa runtime errors."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, message: str, error_code: int = None):
|
|
8
|
+
self.error_code = error_code
|
|
9
|
+
super().__init__(message)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ContextLengthExceededError(NexaRuntimeError):
|
|
13
|
+
"""Raised when the input context length exceeds the model's maximum."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, message: str = "Input context length exceeded model's maximum", error_code: int = None):
|
|
16
|
+
super().__init__(message, error_code)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class GenerationError(NexaRuntimeError):
|
|
20
|
+
"""Raised when generation fails."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, message: str = "Generation failed", error_code: int = None):
|
|
23
|
+
super().__init__(message, error_code)
|
|
24
|
+
|
nexaai/tts.py
CHANGED
|
@@ -45,7 +45,8 @@ class TTS(BaseModel):
|
|
|
45
45
|
model_path: str,
|
|
46
46
|
vocoder_path: str,
|
|
47
47
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
48
|
-
device_id: Optional[str] = None
|
|
48
|
+
device_id: Optional[str] = None,
|
|
49
|
+
**kwargs
|
|
49
50
|
) -> 'TTS':
|
|
50
51
|
"""Load TTS model from local path, routing to appropriate implementation."""
|
|
51
52
|
# Check plugin_id value for routing - handle both enum and string
|
nexaai/utils/manifest_utils.py
CHANGED
|
@@ -157,12 +157,16 @@ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, ol
|
|
|
157
157
|
# Use the new enum-based quantization extraction
|
|
158
158
|
quantization_type = extract_quantization_from_filename(current_file_name)
|
|
159
159
|
quant_level = quantization_type.value if quantization_type else "UNKNOWN"
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
160
|
+
|
|
161
|
+
# FIXME: hardcode to handle the multiple mmproj files problem
|
|
162
|
+
if quant_level == "UNKNOWN" and "mmproj" in current_file_name.lower():
|
|
163
|
+
pass
|
|
164
|
+
else:
|
|
165
|
+
model_files[quant_level] = {
|
|
166
|
+
"Name": current_file_name,
|
|
167
|
+
"Downloaded": True,
|
|
168
|
+
"Size": file_size
|
|
169
|
+
}
|
|
166
170
|
|
|
167
171
|
# Determine PluginId with priority: kwargs > downloaded_manifest > model_file_type > default
|
|
168
172
|
plugin_id = kwargs.get('plugin_id')
|
nexaai/utils/model_manager.py
CHANGED
|
@@ -410,6 +410,20 @@ def _remove_specific_file(target_model: DownloadedModel, file_name: str, local_d
|
|
|
410
410
|
except OSError:
|
|
411
411
|
file_size = 0
|
|
412
412
|
|
|
413
|
+
# Check if we should remove entire folder instead (for .gguf files)
|
|
414
|
+
# If removing a .gguf file and no other non-mmproj .gguf files remain, remove entire folder
|
|
415
|
+
if file_name.endswith('.gguf'):
|
|
416
|
+
updated_files = [f for f in target_model.files if f != file_name]
|
|
417
|
+
# Find remaining .gguf files that don't contain "mmproj" in filename
|
|
418
|
+
remaining_non_mmproj_gguf = [
|
|
419
|
+
f for f in updated_files
|
|
420
|
+
if f.endswith('.gguf') and 'mmproj' not in f.lower()
|
|
421
|
+
]
|
|
422
|
+
|
|
423
|
+
# If no non-mmproj .gguf files remain, remove entire repository
|
|
424
|
+
if len(remaining_non_mmproj_gguf) == 0:
|
|
425
|
+
return _remove_entire_repository(target_model, local_dir)
|
|
426
|
+
|
|
413
427
|
# Remove the file
|
|
414
428
|
try:
|
|
415
429
|
os.remove(file_path)
|
|
@@ -846,6 +860,41 @@ class HuggingFaceDownloader:
|
|
|
846
860
|
pass
|
|
847
861
|
return {}
|
|
848
862
|
|
|
863
|
+
def _download_manifest_if_needed(self, repo_id: str, local_dir: str) -> bool:
|
|
864
|
+
"""
|
|
865
|
+
Download nexa.manifest from the repository if it doesn't exist locally.
|
|
866
|
+
|
|
867
|
+
Args:
|
|
868
|
+
repo_id: Repository ID
|
|
869
|
+
local_dir: Local directory where the manifest should be saved
|
|
870
|
+
|
|
871
|
+
Returns:
|
|
872
|
+
bool: True if manifest was downloaded or already exists, False if not found in repo
|
|
873
|
+
"""
|
|
874
|
+
manifest_path = os.path.join(local_dir, 'nexa.manifest')
|
|
875
|
+
|
|
876
|
+
# Check if manifest already exists locally
|
|
877
|
+
if os.path.exists(manifest_path):
|
|
878
|
+
return True
|
|
879
|
+
|
|
880
|
+
# Try to download nexa.manifest from the repository
|
|
881
|
+
try:
|
|
882
|
+
print(f"[INFO] Attempting to download nexa.manifest from {repo_id}...")
|
|
883
|
+
self.api.hf_hub_download(
|
|
884
|
+
repo_id=repo_id,
|
|
885
|
+
filename='nexa.manifest',
|
|
886
|
+
local_dir=local_dir,
|
|
887
|
+
local_dir_use_symlinks=False,
|
|
888
|
+
token=self.token,
|
|
889
|
+
force_download=False
|
|
890
|
+
)
|
|
891
|
+
print(f"[OK] Successfully downloaded nexa.manifest from {repo_id}")
|
|
892
|
+
return True
|
|
893
|
+
except Exception as e:
|
|
894
|
+
# Manifest doesn't exist in repo or other error - this is fine, we'll create it
|
|
895
|
+
print(f"[INFO] nexa.manifest not found in {repo_id}, will create locally")
|
|
896
|
+
return False
|
|
897
|
+
|
|
849
898
|
def _fetch_and_save_metadata(self, repo_id: str, local_dir: str, is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> None:
|
|
850
899
|
"""Fetch model info and save metadata after successful download."""
|
|
851
900
|
# Initialize metadata with defaults to ensure manifest is always created
|
|
@@ -946,6 +995,9 @@ class HuggingFaceDownloader:
|
|
|
946
995
|
if progress_tracker:
|
|
947
996
|
progress_tracker.stop_tracking()
|
|
948
997
|
|
|
998
|
+
# Download nexa.manifest from repo if it doesn't exist locally
|
|
999
|
+
self._download_manifest_if_needed(repo_id, file_local_dir)
|
|
1000
|
+
|
|
949
1001
|
# Save metadata after successful download
|
|
950
1002
|
self._fetch_and_save_metadata(repo_id, file_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
|
|
951
1003
|
|
|
@@ -1055,6 +1107,9 @@ class HuggingFaceDownloader:
|
|
|
1055
1107
|
if progress_tracker:
|
|
1056
1108
|
progress_tracker.stop_tracking()
|
|
1057
1109
|
|
|
1110
|
+
# Download nexa.manifest from repo if it doesn't exist locally
|
|
1111
|
+
self._download_manifest_if_needed(repo_id, repo_local_dir)
|
|
1112
|
+
|
|
1058
1113
|
# Save metadata after successful download
|
|
1059
1114
|
self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
|
|
1060
1115
|
|
|
@@ -1289,7 +1344,7 @@ def _download_model_if_needed(
|
|
|
1289
1344
|
token: Union[bool, str, None] = None,
|
|
1290
1345
|
is_mmproj: bool = False,
|
|
1291
1346
|
**kwargs
|
|
1292
|
-
) -> str:
|
|
1347
|
+
) -> tuple[str, Optional[str], Optional[str]]:
|
|
1293
1348
|
"""
|
|
1294
1349
|
Helper function to download a model from HuggingFace if it doesn't exist locally.
|
|
1295
1350
|
|
|
@@ -1300,15 +1355,78 @@ def _download_model_if_needed(
|
|
|
1300
1355
|
token: HuggingFace authentication token for private repositories
|
|
1301
1356
|
|
|
1302
1357
|
Returns:
|
|
1303
|
-
str
|
|
1358
|
+
tuple[str, Optional[str], Optional[str]]: Tuple of (local_path, model_name, plugin_id)
|
|
1359
|
+
- local_path: Local path to the model (either existing or downloaded)
|
|
1360
|
+
- model_name: ModelName from nexa.manifest if available, None otherwise
|
|
1361
|
+
- plugin_id: PluginId from nexa.manifest if available, None otherwise
|
|
1304
1362
|
|
|
1305
1363
|
Raises:
|
|
1306
1364
|
RuntimeError: If download fails
|
|
1307
1365
|
"""
|
|
1366
|
+
# Helper function to extract model info from manifest
|
|
1367
|
+
def _extract_info_from_manifest(path: str) -> tuple[Optional[str], Optional[str], Optional[dict]]:
|
|
1368
|
+
"""Extract ModelName, PluginId, and full manifest from nexa.manifest if it exists."""
|
|
1369
|
+
# If path is a file, check its parent directory for manifest
|
|
1370
|
+
if os.path.isfile(path):
|
|
1371
|
+
manifest_dir = os.path.dirname(path)
|
|
1372
|
+
else:
|
|
1373
|
+
manifest_dir = path
|
|
1374
|
+
|
|
1375
|
+
manifest_path = os.path.join(manifest_dir, 'nexa.manifest')
|
|
1376
|
+
if not os.path.exists(manifest_path):
|
|
1377
|
+
return None, None, None
|
|
1378
|
+
|
|
1379
|
+
try:
|
|
1380
|
+
with open(manifest_path, 'r', encoding='utf-8') as f:
|
|
1381
|
+
manifest = json.load(f)
|
|
1382
|
+
return manifest.get('ModelName'), manifest.get('PluginId'), manifest
|
|
1383
|
+
except (json.JSONDecodeError, IOError):
|
|
1384
|
+
return None, None, None
|
|
1385
|
+
|
|
1386
|
+
# Helper function to get a model file path from manifest
|
|
1387
|
+
# Note: Tnis is for NPU only, because when downloading, it is a directory; when passing local path to inference, it needs to be a file.
|
|
1388
|
+
def _get_model_file_from_manifest(manifest: dict, base_dir: str) -> Optional[str]:
|
|
1389
|
+
"""Extract a model file path from manifest's ModelFile section."""
|
|
1390
|
+
if not manifest or 'ModelFile' not in manifest:
|
|
1391
|
+
return None
|
|
1392
|
+
|
|
1393
|
+
model_files = manifest['ModelFile']
|
|
1394
|
+
# Find the first valid model file (skip N/A entries and metadata files)
|
|
1395
|
+
for key, file_info in model_files.items():
|
|
1396
|
+
if key == 'N/A':
|
|
1397
|
+
continue
|
|
1398
|
+
if isinstance(file_info, dict) and 'Name' in file_info:
|
|
1399
|
+
file_name = file_info['Name']
|
|
1400
|
+
# Skip common non-model files
|
|
1401
|
+
if file_name and not file_name.startswith('.') and file_name.endswith('.nexa'):
|
|
1402
|
+
file_path = os.path.join(base_dir, file_name)
|
|
1403
|
+
if os.path.exists(file_path):
|
|
1404
|
+
return file_path
|
|
1405
|
+
|
|
1406
|
+
# If no .nexa files found, try ExtraFiles for .nexa files
|
|
1407
|
+
if 'ExtraFiles' in manifest:
|
|
1408
|
+
for file_info in manifest['ExtraFiles']:
|
|
1409
|
+
if isinstance(file_info, dict) and 'Name' in file_info:
|
|
1410
|
+
file_name = file_info['Name']
|
|
1411
|
+
if file_name and file_name.endswith('.nexa') and not file_name.startswith('.cache'):
|
|
1412
|
+
file_path = os.path.join(base_dir, file_name)
|
|
1413
|
+
if os.path.exists(file_path):
|
|
1414
|
+
return file_path
|
|
1415
|
+
|
|
1416
|
+
return None
|
|
1417
|
+
|
|
1308
1418
|
# Check if model_path exists locally (file or directory)
|
|
1309
1419
|
if os.path.exists(model_path):
|
|
1310
|
-
# Local path exists,
|
|
1311
|
-
|
|
1420
|
+
# Local path exists, try to extract model info
|
|
1421
|
+
model_name, plugin_id, manifest = _extract_info_from_manifest(model_path)
|
|
1422
|
+
|
|
1423
|
+
# If PluginId is "npu" and path is a directory, convert to file path
|
|
1424
|
+
if plugin_id == "npu" and os.path.isdir(model_path):
|
|
1425
|
+
model_file_path = _get_model_file_from_manifest(manifest, model_path)
|
|
1426
|
+
if model_file_path:
|
|
1427
|
+
model_path = model_file_path
|
|
1428
|
+
|
|
1429
|
+
return model_path, model_name, plugin_id
|
|
1312
1430
|
|
|
1313
1431
|
# Model path doesn't exist locally, try to download from HuggingFace
|
|
1314
1432
|
try:
|
|
@@ -1328,7 +1446,16 @@ def _download_model_if_needed(
|
|
|
1328
1446
|
**kwargs
|
|
1329
1447
|
)
|
|
1330
1448
|
|
|
1331
|
-
|
|
1449
|
+
# Extract model info from the downloaded manifest
|
|
1450
|
+
model_name, plugin_id, manifest = _extract_info_from_manifest(downloaded_path)
|
|
1451
|
+
|
|
1452
|
+
# If PluginId is "npu" and path is a directory, convert to file path
|
|
1453
|
+
if plugin_id == "npu" and os.path.isdir(downloaded_path):
|
|
1454
|
+
model_file_path = _get_model_file_from_manifest(manifest, downloaded_path)
|
|
1455
|
+
if model_file_path:
|
|
1456
|
+
downloaded_path = model_file_path
|
|
1457
|
+
|
|
1458
|
+
return downloaded_path, model_name, plugin_id
|
|
1332
1459
|
|
|
1333
1460
|
except Exception as e:
|
|
1334
1461
|
# Only handle download-related errors
|
|
@@ -1397,7 +1524,7 @@ def auto_download_model(func: Callable) -> Callable:
|
|
|
1397
1524
|
# Download name_or_path if needed
|
|
1398
1525
|
if name_or_path is not None:
|
|
1399
1526
|
try:
|
|
1400
|
-
downloaded_name_path = _download_model_if_needed(
|
|
1527
|
+
downloaded_name_path, model_name, plugin_id = _download_model_if_needed(
|
|
1401
1528
|
name_or_path, 'name_or_path', progress_callback, token, **kwargs
|
|
1402
1529
|
)
|
|
1403
1530
|
|
|
@@ -1408,6 +1535,10 @@ def auto_download_model(func: Callable) -> Callable:
|
|
|
1408
1535
|
args = tuple(args_list)
|
|
1409
1536
|
else:
|
|
1410
1537
|
kwargs['name_or_path'] = downloaded_name_path
|
|
1538
|
+
|
|
1539
|
+
# Add model_name to kwargs if it exists and not already set
|
|
1540
|
+
if model_name is not None and 'model_name' not in kwargs:
|
|
1541
|
+
kwargs['model_name'] = model_name
|
|
1411
1542
|
|
|
1412
1543
|
except Exception as e:
|
|
1413
1544
|
raise e # Re-raise the error from _download_model_if_needed
|
|
@@ -1415,7 +1546,7 @@ def auto_download_model(func: Callable) -> Callable:
|
|
|
1415
1546
|
# Download mmproj_path if needed
|
|
1416
1547
|
if mmproj_path is not None:
|
|
1417
1548
|
try:
|
|
1418
|
-
downloaded_mmproj_path = _download_model_if_needed(
|
|
1549
|
+
downloaded_mmproj_path, _, _ = _download_model_if_needed(
|
|
1419
1550
|
mmproj_path, 'mmproj_path', progress_callback, token, is_mmproj=True, **kwargs
|
|
1420
1551
|
)
|
|
1421
1552
|
|
|
@@ -1427,5 +1558,5 @@ def auto_download_model(func: Callable) -> Callable:
|
|
|
1427
1558
|
|
|
1428
1559
|
# Call original function with updated paths (outside try-catch to let model creation errors bubble up)
|
|
1429
1560
|
return func(*args, **kwargs)
|
|
1430
|
-
|
|
1561
|
+
|
|
1431
1562
|
return wrapper
|
nexaai/vlm.py
CHANGED
|
@@ -22,7 +22,8 @@ class VLM(BaseModel):
|
|
|
22
22
|
model_name: Optional[str] = None,
|
|
23
23
|
m_cfg: ModelConfig = ModelConfig(),
|
|
24
24
|
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
25
|
-
device_id: Optional[str] = None
|
|
25
|
+
device_id: Optional[str] = None,
|
|
26
|
+
**kwargs
|
|
26
27
|
) -> 'VLM':
|
|
27
28
|
"""Load VLM model from local path, routing to appropriate implementation.
|
|
28
29
|
|
|
@@ -99,7 +100,8 @@ class VLM(BaseModel):
|
|
|
99
100
|
def apply_chat_template(
|
|
100
101
|
self,
|
|
101
102
|
messages: List[MultiModalMessage],
|
|
102
|
-
tools: Optional[List[Dict[str, Any]]] = None
|
|
103
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
104
|
+
enable_thinking: bool = True
|
|
103
105
|
) -> str:
|
|
104
106
|
"""Apply the chat template to multimodal messages."""
|
|
105
107
|
pass
|
nexaai/vlm_impl/mlx_vlm_impl.py
CHANGED
|
@@ -72,7 +72,8 @@ class MlxVlmImpl(VLM):
|
|
|
72
72
|
def apply_chat_template(
|
|
73
73
|
self,
|
|
74
74
|
messages: List[MultiModalMessage],
|
|
75
|
-
tools: Optional[List[Dict[str, Any]]] = None
|
|
75
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
76
|
+
enable_thinking: bool = True
|
|
76
77
|
) -> str:
|
|
77
78
|
"""Apply the chat template to multimodal messages."""
|
|
78
79
|
if not self._mlx_vlm:
|
|
@@ -116,7 +117,7 @@ class MlxVlmImpl(VLM):
|
|
|
116
117
|
num_images=total_images,
|
|
117
118
|
num_audios=total_audios,
|
|
118
119
|
tools=tools,
|
|
119
|
-
enable_thinking=
|
|
120
|
+
enable_thinking=enable_thinking
|
|
120
121
|
)
|
|
121
122
|
else:
|
|
122
123
|
# Use regular apply_chat_template for text-only messages
|
|
@@ -8,6 +8,11 @@ from nexaai.binds import vlm_bind, common_bind
|
|
|
8
8
|
from nexaai.runtime import _ensure_runtime
|
|
9
9
|
from nexaai.vlm import VLM
|
|
10
10
|
from nexaai.base import ProfilingData
|
|
11
|
+
from nexaai.runtime_error import ContextLengthExceededError, GenerationError
|
|
12
|
+
|
|
13
|
+
# Error codes from ml.h
|
|
14
|
+
ML_SUCCESS = 0
|
|
15
|
+
ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH = -200004
|
|
11
16
|
|
|
12
17
|
|
|
13
18
|
class PyBindVLMImpl(VLM):
|
|
@@ -68,7 +73,7 @@ class PyBindVLMImpl(VLM):
|
|
|
68
73
|
handle = vlm_bind.create_vlm(
|
|
69
74
|
model_path=local_path,
|
|
70
75
|
mmproj_path=mmproj_path,
|
|
71
|
-
|
|
76
|
+
model_name=model_name,
|
|
72
77
|
model_config=config,
|
|
73
78
|
plugin_id=plugin_id_str,
|
|
74
79
|
device_id=device_id
|
|
@@ -91,7 +96,8 @@ class PyBindVLMImpl(VLM):
|
|
|
91
96
|
def apply_chat_template(
|
|
92
97
|
self,
|
|
93
98
|
messages: List[MultiModalMessage],
|
|
94
|
-
tools: Optional[List[Dict[str, Any]]] = None
|
|
99
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
100
|
+
enable_thinking: bool = True
|
|
95
101
|
) -> str:
|
|
96
102
|
"""Apply the chat template to multimodal messages."""
|
|
97
103
|
payload = []
|
|
@@ -103,15 +109,14 @@ class PyBindVLMImpl(VLM):
|
|
|
103
109
|
t = c["type"]
|
|
104
110
|
if t == "text":
|
|
105
111
|
blocks.append({"type": "text", "text": c.get("text","") or ""})
|
|
106
|
-
elif t == "image":
|
|
107
|
-
# Pass through the original structure - let vlm-bind.cpp handle field extraction
|
|
108
|
-
blocks.append(c)
|
|
109
112
|
else:
|
|
110
|
-
|
|
113
|
+
# Pass through the original structure for image, audio, and any other types
|
|
114
|
+
# Let vlm-bind.cpp handle field extraction (text/url/path)
|
|
115
|
+
blocks.append(c)
|
|
111
116
|
|
|
112
117
|
payload.append({"role": role, "content": blocks})
|
|
113
118
|
|
|
114
|
-
result = vlm_bind.ml_vlm_apply_chat_template(self._handle, payload, tools)
|
|
119
|
+
result = vlm_bind.ml_vlm_apply_chat_template(self._handle, payload, tools, enable_thinking)
|
|
115
120
|
return result
|
|
116
121
|
|
|
117
122
|
def generate_stream(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> Generator[str, None, None]:
|
|
@@ -143,6 +148,18 @@ class PyBindVLMImpl(VLM):
|
|
|
143
148
|
on_token=on_token,
|
|
144
149
|
user_data=None
|
|
145
150
|
)
|
|
151
|
+
|
|
152
|
+
# Check for errors in result
|
|
153
|
+
error_code = result.get("error_code", ML_SUCCESS)
|
|
154
|
+
if error_code != ML_SUCCESS:
|
|
155
|
+
error_message = result.get("error_message", "Unknown error")
|
|
156
|
+
if error_code == ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH:
|
|
157
|
+
exception_container[0] = ContextLengthExceededError(error_message, error_code)
|
|
158
|
+
else:
|
|
159
|
+
exception_container[0] = GenerationError(error_message, error_code)
|
|
160
|
+
token_queue.put(('end', None))
|
|
161
|
+
return
|
|
162
|
+
|
|
146
163
|
self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
|
|
147
164
|
except Exception as e:
|
|
148
165
|
exception_container[0] = e
|
|
@@ -186,6 +203,15 @@ class PyBindVLMImpl(VLM):
|
|
|
186
203
|
user_data=None
|
|
187
204
|
)
|
|
188
205
|
|
|
206
|
+
# Check for errors in result
|
|
207
|
+
error_code = result.get("error_code", ML_SUCCESS)
|
|
208
|
+
if error_code != ML_SUCCESS:
|
|
209
|
+
error_message = result.get("error_message", "Unknown error")
|
|
210
|
+
if error_code == ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH:
|
|
211
|
+
raise ContextLengthExceededError(error_message, error_code)
|
|
212
|
+
else:
|
|
213
|
+
raise GenerationError(error_message, error_code)
|
|
214
|
+
|
|
189
215
|
self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
|
|
190
216
|
return result.get("text", "")
|
|
191
217
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nexaai
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.21
|
|
4
4
|
Summary: Python bindings for NexaSDK C-lib backend
|
|
5
5
|
Author-email: "Nexa AI, Inc." <dev@nexa.ai>
|
|
6
6
|
Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
|
|
@@ -14,7 +14,6 @@ Requires-Python: >=3.7
|
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
15
15
|
Requires-Dist: huggingface_hub
|
|
16
16
|
Requires-Dist: tqdm
|
|
17
|
-
Requires-Dist: hf_xet
|
|
18
17
|
Requires-Dist: numpy
|
|
19
18
|
Requires-Dist: httpx
|
|
20
19
|
Provides-Extra: mlx
|
|
@@ -1,46 +1,49 @@
|
|
|
1
|
-
nexaai/__init__.py,sha256=
|
|
2
|
-
nexaai/_stub.cpython-310-darwin.so,sha256=
|
|
3
|
-
nexaai/_version.py,sha256=
|
|
4
|
-
nexaai/asr.py,sha256=
|
|
1
|
+
nexaai/__init__.py,sha256=gOd7sNsqEESopw_24xgnOSkIRENrk4Fa-RMtmVv62eA,2421
|
|
2
|
+
nexaai/_stub.cpython-310-darwin.so,sha256=HbW9PeHihO3_JhDrG31qtXJ4Ru733LdQmr5EwkZKvM0,49832
|
|
3
|
+
nexaai/_version.py,sha256=nWa8LYSocqThPKZF7GPMpRrb1TPnqOI4BR2IoL05toU,139
|
|
4
|
+
nexaai/asr.py,sha256=wqtq71cxIMGE4KvOIYZebHdWik8dy4LyKrDI98PDvzQ,2294
|
|
5
5
|
nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
|
|
6
|
-
nexaai/common.py,sha256=
|
|
7
|
-
nexaai/cv.py,sha256=
|
|
8
|
-
nexaai/embedder.py,sha256=
|
|
9
|
-
nexaai/image_gen.py,sha256=
|
|
10
|
-
nexaai/llm.py,sha256
|
|
6
|
+
nexaai/common.py,sha256=MRWZ6a7pnci_OUHxZRm3YqgKLAtZFD7b88STYDfeIF8,3460
|
|
7
|
+
nexaai/cv.py,sha256=gpE3F__6bjh8OQKNJZs-QrBuCxqMj2eH-u6HR90vGZE,3302
|
|
8
|
+
nexaai/embedder.py,sha256=lXOT16PEvd_hT23d77dZH38VHNOAk-3JvoOUdQTEaGI,2552
|
|
9
|
+
nexaai/image_gen.py,sha256=MkGw1HXqqv8cJzbiGERNPKFXfq9vMOlvuq0pgekXw68,4385
|
|
10
|
+
nexaai/llm.py,sha256=-agVJuj0FOaDvDiT-fFSOpoyVt-MpNudBucsod3Vp1M,3673
|
|
11
11
|
nexaai/log.py,sha256=Kwo2CIfWN6iP4M4F5EUIV8KIO5hAsvz6HZAaOwJ27Og,2628
|
|
12
|
-
nexaai/rerank.py,sha256=
|
|
12
|
+
nexaai/rerank.py,sha256=rFKm1Y_ou__0lU82OTy4j_AYIGVBGfID0gzuZ6zXYsM,1968
|
|
13
13
|
nexaai/runtime.py,sha256=JvllhlNPgYGLbgGyX2yNvmGzT0lZ5XbvTvEo8sZG_Ho,2067
|
|
14
|
-
nexaai/
|
|
15
|
-
nexaai/
|
|
14
|
+
nexaai/runtime_error.py,sha256=sO87LyCA0qzm0hVqBrmG2FDzGQH865EMbTMop2OfZto,779
|
|
15
|
+
nexaai/tts.py,sha256=jvgDZIyo47NBDny6z74IQT2SDDVo7Mpp-QZwl6YxARU,2196
|
|
16
|
+
nexaai/vlm.py,sha256=LUrd1_SGHOsYpWyUymX93oEIsNJv7XzHIHo4hBZOhQA,4800
|
|
16
17
|
nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
18
|
nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
|
|
18
|
-
nexaai/asr_impl/pybind_asr_impl.py,sha256=
|
|
19
|
-
nexaai/binds/__init__.py,sha256=
|
|
19
|
+
nexaai/asr_impl/pybind_asr_impl.py,sha256=FLOWIph37q_nIiNx8xYi-VnhQ6CrPuc4HFAJZQKc42w,4680
|
|
20
|
+
nexaai/binds/__init__.py,sha256=2-Rr0NwyWygqwS8Xlxq0BJ2ltyID-WbGuzEYNlSanCI,155
|
|
21
|
+
nexaai/binds/asr_bind.cpython-310-darwin.so,sha256=QmxLTY6qmHtbkdZlSyvdh7pVh0KP9j1ARtIWJDi_QMs,217096
|
|
20
22
|
nexaai/binds/common_bind.cpython-310-darwin.so,sha256=BoXByRlNGDaNS1YyZyCF-s7h0vXP9NLPlJMQQ5pqusU,235488
|
|
21
|
-
nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=
|
|
22
|
-
nexaai/binds/libnexa_bridge.dylib,sha256=
|
|
23
|
-
nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=
|
|
24
|
-
nexaai/binds/
|
|
23
|
+
nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=ZOJLzVvTUkbDdBBak1ylOmKx_bwHVzaPvha6RkoLpGo,202032
|
|
24
|
+
nexaai/binds/libnexa_bridge.dylib,sha256=fQSsvrM4-9tWIedEkTpdHvFlFzeatI7q2Llswkrhl-4,290352
|
|
25
|
+
nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=O-HyjCya-GBZnaIb_GJSxk5kBJRCaQL6nKu_qBGEZ1w,183096
|
|
26
|
+
nexaai/binds/rerank_bind.cpython-310-darwin.so,sha256=seJQ1ZpYVR_RCMmBvPSHnLj5LCHX33k5VUFadUkQsvI,200384
|
|
27
|
+
nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=IXM3RTJx-rii3DNZCAVY6eKxn9C8TtAMP9i5bi8qA6s,199392
|
|
25
28
|
nexaai/binds/cpu_gpu/libggml-base.dylib,sha256=YDclLDlP7XlDpXiKfTOTt6mW7jgXlmwSoT_VuRrGrmM,629528
|
|
26
29
|
nexaai/binds/cpu_gpu/libggml-cpu.so,sha256=cnLUQ7WdX-5iiDaH8v45u1kX1NUmK8DanpzSMGCpXPE,1039800
|
|
27
30
|
nexaai/binds/cpu_gpu/libggml-metal.so,sha256=Xhhl_tLg1xmCIQVrKjqPFaLHAlx_2wUFiwDyUk0wJ-E,713680
|
|
28
31
|
nexaai/binds/cpu_gpu/libggml.dylib,sha256=12Q1Z98oM81hxzT_GMQsW5rlhC8DOMsX6luWVCFQHcI,58336
|
|
29
32
|
nexaai/binds/cpu_gpu/libmtmd.dylib,sha256=4-KGS82gxwwIJBNHuZ88mzzTbNZ12tqsDD46-ey6sQ4,701504
|
|
30
33
|
nexaai/binds/cpu_gpu/libnexa_cpu_gpu.dylib,sha256=9qrrMOlGWM9cWUORg64GfkE_p9aQ1rjIp_z-QVfIFH8,1982280
|
|
31
|
-
nexaai/binds/cpu_gpu/libnexa_plugin.dylib,sha256=
|
|
34
|
+
nexaai/binds/cpu_gpu/libnexa_plugin.dylib,sha256=GiXEXNYePuJRaCtnJw1jrS2dtPcp90qr-IvnrL95dmU,2064152
|
|
32
35
|
nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
36
|
nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
|
|
34
37
|
nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
|
|
35
38
|
nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
-
nexaai/embedder_impl/mlx_embedder_impl.py,sha256=
|
|
37
|
-
nexaai/embedder_impl/pybind_embedder_impl.py,sha256=
|
|
39
|
+
nexaai/embedder_impl/mlx_embedder_impl.py,sha256=pFPraUAjm9EVvVbwIp1cjbtXUysF5pqxEcK2CAFvcDw,4639
|
|
40
|
+
nexaai/embedder_impl/pybind_embedder_impl.py,sha256=lFpf0wI2d7kfO2GUyUuUS1U2L_PyZMJVGmAvF8EuQ0g,3653
|
|
38
41
|
nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
42
|
nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=BuDkksvXyb4J02GsdnbGAmYckfUU0Eah6BimoMD3QqY,11219
|
|
40
43
|
nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=ms34VYoD5AxZFG6cIG0QAJDjCtfphaZ1bHzKzey1xF8,3692
|
|
41
44
|
nexaai/llm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
-
nexaai/llm_impl/mlx_llm_impl.py,sha256=
|
|
43
|
-
nexaai/llm_impl/pybind_llm_impl.py,sha256=
|
|
45
|
+
nexaai/llm_impl/mlx_llm_impl.py,sha256=dPtaEribluHZZY_f9M114glcQhtDEckukw4Sfd5zJos,11296
|
|
46
|
+
nexaai/llm_impl/pybind_llm_impl.py,sha256=XXnUuRZMr9rrEL1vM6VTwsgs0KQnKn4C3TyrHE46uw8,8139
|
|
44
47
|
nexaai/mlx_backend/ml.py,sha256=DKXVOAfh8cg7KTKljh7jpcPwfQFNigc6uv_ZXF6lse8,23977
|
|
45
48
|
nexaai/mlx_backend/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
|
|
46
49
|
nexaai/mlx_backend/asr/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nujxkSM,451
|
|
@@ -248,7 +251,7 @@ nexaai/mlx_backend/vlm/__init__.py,sha256=_25kvMEviX16Hg3bro8Ws70V0eeIEqYKV8ZDXq
|
|
|
248
251
|
nexaai/mlx_backend/vlm/generate.py,sha256=DqHFEAuqk-nko8ho6U9GAXTDAWz4d8GTe_hCt-XFyCw,19071
|
|
249
252
|
nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=srN8-RFv8eOeH2rdyygCJ7Yt7kW7MQzS3i50UHBVfIM,13151
|
|
250
253
|
nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py,sha256=ZSbM8JjTlkxUaVO9UNZM6YSbd60am3Z4ztJJEBsnJHg,9015
|
|
251
|
-
nexaai/mlx_backend/vlm/interface.py,sha256=
|
|
254
|
+
nexaai/mlx_backend/vlm/interface.py,sha256=D6TCUWbiGLkgmAk_b9yMb36Y4TLGT9gFPxnTaDSaCSM,23070
|
|
252
255
|
nexaai/mlx_backend/vlm/main.py,sha256=8bmSTtyebp8eyL2jL36DZbNHapOpFXNmjM2NyzCFqGs,12919
|
|
253
256
|
nexaai/mlx_backend/vlm/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
254
257
|
nexaai/mlx_backend/vlm/modeling/convert.py,sha256=ia5i9cgTufFGmKyhkYUaW0nfNqT_bMo8i-Hg_zy5JC4,1863
|
|
@@ -384,21 +387,21 @@ nexaai/mlx_backend/vlm/modeling/trainer/lora.py,sha256=tGjvenjEQ8_1Az8Nz3smz5Mgv
|
|
|
384
387
|
nexaai/mlx_backend/vlm/modeling/trainer/trainer.py,sha256=h16SaHt76JzFruXuidgXDx7_2evx4L0SecvzqLmhyZw,9081
|
|
385
388
|
nexaai/mlx_backend/vlm/modeling/trainer/utils.py,sha256=29oHf_7946YeJKP_-Dt-NPeN4xJq8Fj7Yv4jZKO9RWA,4909
|
|
386
389
|
nexaai/rerank_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
387
|
-
nexaai/rerank_impl/mlx_rerank_impl.py,sha256=
|
|
388
|
-
nexaai/rerank_impl/pybind_rerank_impl.py,sha256=
|
|
390
|
+
nexaai/rerank_impl/mlx_rerank_impl.py,sha256=3nbqCdzyAugc4P_6K9mowEgy4LFdfzhy7GUvn9GMpSE,3377
|
|
391
|
+
nexaai/rerank_impl/pybind_rerank_impl.py,sha256=tmzrpRYCCV3ATxbE9G1Io6SUtgYPO8BFe48nTae6_xw,4490
|
|
389
392
|
nexaai/tts_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
390
393
|
nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJi8,3101
|
|
391
394
|
nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
|
|
392
395
|
nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
|
|
393
|
-
nexaai/utils/manifest_utils.py,sha256=
|
|
394
|
-
nexaai/utils/model_manager.py,sha256=
|
|
396
|
+
nexaai/utils/manifest_utils.py,sha256=OOp_BmFWH1ZHMYkS2VGAby5Rpm4f4GLCRBJEBYm-kys,21489
|
|
397
|
+
nexaai/utils/model_manager.py,sha256=OnL87zCPn3cBcScCKo-bHnBUpr24-Po293QC6Bwgx1Q,66112
|
|
395
398
|
nexaai/utils/model_types.py,sha256=ONWjjo8CFPdhxki6qo7MXnSZaEzjBcxa_Kkf_y5NXus,1483
|
|
396
399
|
nexaai/utils/progress_tracker.py,sha256=jdUqtmPqyhwC9uSKvQcJEYETwSt-OhP4oitdJ94614o,15394
|
|
397
400
|
nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oSxFwiU,7825
|
|
398
401
|
nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
399
|
-
nexaai/vlm_impl/mlx_vlm_impl.py,sha256=
|
|
400
|
-
nexaai/vlm_impl/pybind_vlm_impl.py,sha256=
|
|
401
|
-
nexaai-1.0.
|
|
402
|
-
nexaai-1.0.
|
|
403
|
-
nexaai-1.0.
|
|
404
|
-
nexaai-1.0.
|
|
402
|
+
nexaai/vlm_impl/mlx_vlm_impl.py,sha256=sgHqnX5OCSGLccCnTuRiktIbqThNn3AAIvYE2_Dy4TI,10833
|
|
403
|
+
nexaai/vlm_impl/pybind_vlm_impl.py,sha256=stJKHdhYhBuWUQkky-nHgCv625qDB_1geI3v5BLNGpM,9765
|
|
404
|
+
nexaai-1.0.21.dist-info/METADATA,sha256=WTZ4KM_6xJlrJ-NOaDoQEEPwEidaxuot5bocvRHKB0k,1184
|
|
405
|
+
nexaai-1.0.21.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
|
|
406
|
+
nexaai-1.0.21.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
|
|
407
|
+
nexaai-1.0.21.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|