nexaai 1.0.21rc4__cp310-cp310-macosx_14_0_universal2.whl → 1.0.21rc5__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Binary file
nexaai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is generated by CMake from _version.py.in
2
2
  # Do not modify this file manually - it will be overwritten
3
3
 
4
- __version__ = "1.0.21-rc4"
4
+ __version__ = "1.0.21-rc5"
nexaai/asr.py CHANGED
@@ -35,7 +35,8 @@ class ASR(BaseModel):
35
35
  tokenizer_path: Optional[str] = None,
36
36
  language: Optional[str] = None,
37
37
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
38
- device_id: Optional[str] = None
38
+ device_id: Optional[str] = None,
39
+ **kwargs
39
40
  ) -> 'ASR':
40
41
  """Load ASR model from local path, routing to appropriate implementation."""
41
42
  # Check plugin_id value for routing - handle both enum and string
nexaai/binds/__init__.py CHANGED
@@ -2,3 +2,4 @@ from .common_bind import *
2
2
  from .llm_bind import *
3
3
  from .embedder_bind import *
4
4
  from .vlm_bind import *
5
+ from .rerank_bind import *
Binary file
Binary file
Binary file
nexaai/common.py CHANGED
@@ -8,6 +8,7 @@ class PluginID(str, Enum):
8
8
  MLX = "mlx"
9
9
  LLAMA_CPP = "llama_cpp"
10
10
  NEXAML = "nexaml"
11
+ NPU = "npu"
11
12
 
12
13
 
13
14
  class ChatMessage(TypedDict):
nexaai/cv.py CHANGED
@@ -73,7 +73,8 @@ class CVModel(BaseModel):
73
73
  _: str, # TODO: remove this argument, this is a hack to make api design happy
74
74
  config: CVModelConfig,
75
75
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
76
- device_id: Optional[str] = None
76
+ device_id: Optional[str] = None,
77
+ **kwargs
77
78
  ) -> 'CVModel':
78
79
  """Load CV model from configuration, routing to appropriate implementation."""
79
80
  # Check plugin_id value for routing - handle both enum and string
nexaai/embedder.py CHANGED
@@ -22,12 +22,13 @@ class Embedder(BaseModel):
22
22
  pass
23
23
 
24
24
  @classmethod
25
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
25
+ def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP, **kwargs):
26
26
  """
27
27
  Load an embedder from model files, routing to appropriate implementation.
28
28
 
29
29
  Args:
30
30
  model_path: Path to the model file
31
+ model_name: Name of the model
31
32
  tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
32
33
  plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
33
34
 
@@ -39,10 +40,10 @@ class Embedder(BaseModel):
39
40
 
40
41
  if plugin_value == "mlx":
41
42
  from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
42
- return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
43
+ return MLXEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
43
44
  else:
44
45
  from nexaai.embedder_impl.pybind_embedder_impl import PyBindEmbedderImpl
45
- return PyBindEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
46
+ return PyBindEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
46
47
 
47
48
  @abstractmethod
48
49
  def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
@@ -14,12 +14,13 @@ class MLXEmbedderImpl(Embedder):
14
14
  self._mlx_embedder = None
15
15
 
16
16
  @classmethod
17
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
17
+ def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
18
18
  """
19
19
  Load an embedder from model files using MLX backend.
20
20
 
21
21
  Args:
22
22
  model_path: Path to the model file
23
+ model_name: Name of the model
23
24
  tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
24
25
  plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
25
26
 
@@ -34,6 +35,7 @@ class MLXEmbedderImpl(Embedder):
34
35
  # This will automatically detect if it's JinaV2 or generic model and route correctly
35
36
  instance._mlx_embedder = create_embedder(
36
37
  model_path=model_path,
38
+ # model_name=model_name, # FIXME: For MLX Embedder, model_name is not used
37
39
  tokenizer_path=tokenizer_file
38
40
  )
39
41
 
@@ -16,12 +16,13 @@ class PyBindEmbedderImpl(Embedder):
16
16
  self._handle = _handle_ptr
17
17
 
18
18
  @classmethod
19
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
19
+ def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
20
20
  """
21
21
  Load an embedder from model files
22
22
 
23
23
  Args:
24
24
  model_path: Path to the model file
25
+ model_name: Name of the model
25
26
  tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
26
27
  plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
27
28
 
@@ -32,7 +33,7 @@ class PyBindEmbedderImpl(Embedder):
32
33
  # Convert enum to string for C++ binding
33
34
  plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
34
35
  # New parameter order: model_path, plugin_id, tokenizer_path (optional)
35
- handle = embedder_bind.ml_embedder_create(model_path, plugin_id_str, tokenizer_file)
36
+ handle = embedder_bind.ml_embedder_create(model_path, model_name, plugin_id_str, tokenizer_file)
36
37
  return cls(handle)
37
38
 
38
39
  def eject(self):
nexaai/image_gen.py CHANGED
@@ -71,7 +71,8 @@ class ImageGen(BaseModel):
71
71
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
72
72
  device_id: Optional[str] = None,
73
73
  float16: bool = True,
74
- quantize: bool = False
74
+ quantize: bool = False,
75
+ **kwargs
75
76
  ) -> 'ImageGen':
76
77
  """Load image generation model from local path, routing to appropriate implementation."""
77
78
  # Check plugin_id value for routing - handle both enum and string
nexaai/llm.py CHANGED
@@ -15,10 +15,12 @@ class LLM(BaseModel):
15
15
  @classmethod
16
16
  def _load_from(cls,
17
17
  local_path: str,
18
+ model_name: Optional[str] = None,
18
19
  tokenizer_path: Optional[str] = None,
19
20
  m_cfg: ModelConfig = ModelConfig(),
20
21
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
21
- device_id: Optional[str] = None
22
+ device_id: Optional[str] = None,
23
+ **kwargs
22
24
  ) -> 'LLM':
23
25
  """Load model from local path, routing to appropriate implementation."""
24
26
  # Check plugin_id value for routing - handle both enum and string
@@ -26,10 +28,10 @@ class LLM(BaseModel):
26
28
 
27
29
  if plugin_value == "mlx":
28
30
  from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
29
- return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
31
+ return MLXLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
30
32
  else:
31
33
  from nexaai.llm_impl.pybind_llm_impl import PyBindLLMImpl
32
- return PyBindLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
34
+ return PyBindLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
33
35
 
34
36
  def cancel_generation(self):
35
37
  """Signal to cancel any ongoing stream generation."""
@@ -16,6 +16,7 @@ class MLXLLMImpl(LLM):
16
16
  @classmethod
17
17
  def _load_from(cls,
18
18
  local_path: str,
19
+ model_name: Optional[str] = None,
19
20
  tokenizer_path: Optional[str] = None,
20
21
  m_cfg: ModelConfig = ModelConfig(),
21
22
  plugin_id: Union[PluginID, str] = PluginID.MLX,
@@ -40,6 +41,7 @@ class MLXLLMImpl(LLM):
40
41
  instance = cls(m_cfg)
41
42
  instance._mlx_llm = MLXLLMInterface(
42
43
  model_path=local_path,
44
+ # model_name=model_name, # FIXME: For MLX LLM, model_name is not used
43
45
  tokenizer_path=tokenizer_path or local_path,
44
46
  config=mlx_config,
45
47
  device=device_id
@@ -19,6 +19,7 @@ class PyBindLLMImpl(LLM):
19
19
  @classmethod
20
20
  def _load_from(cls,
21
21
  local_path: str,
22
+ model_name: Optional[str] = None,
22
23
  tokenizer_path: Optional[str] = None,
23
24
  m_cfg: ModelConfig = ModelConfig(),
24
25
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
@@ -55,6 +56,7 @@ class PyBindLLMImpl(LLM):
55
56
  plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
56
57
  handle = llm_bind.ml_llm_create(
57
58
  model_path=local_path,
59
+ model_name=model_name,
58
60
  tokenizer_path=tokenizer_path,
59
61
  model_config=config,
60
62
  plugin_id=plugin_id_str,
nexaai/rerank.py CHANGED
@@ -24,9 +24,11 @@ class Reranker(BaseModel):
24
24
  @classmethod
25
25
  def _load_from(cls,
26
26
  model_path: str,
27
+ model_name: str = None,
27
28
  tokenizer_file: str = "tokenizer.json",
28
29
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
29
- device_id: Optional[str] = None
30
+ device_id: Optional[str] = None,
31
+ **kwargs
30
32
  ) -> 'Reranker':
31
33
  """Load reranker model from local path, routing to appropriate implementation."""
32
34
  # Check plugin_id value for routing - handle both enum and string
@@ -34,10 +36,10 @@ class Reranker(BaseModel):
34
36
 
35
37
  if plugin_value == "mlx":
36
38
  from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
37
- return MLXRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
39
+ return MLXRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
38
40
  else:
39
41
  from nexaai.rerank_impl.pybind_rerank_impl import PyBindRerankImpl
40
- return PyBindRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
42
+ return PyBindRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
41
43
 
42
44
  @abstractmethod
43
45
  def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
@@ -17,6 +17,7 @@ class MLXRerankImpl(Reranker):
17
17
  @classmethod
18
18
  def _load_from(cls,
19
19
  model_path: str,
20
+ model_name: str = None,
20
21
  tokenizer_file: str = "tokenizer.json",
21
22
  plugin_id: Union[PluginID, str] = PluginID.MLX,
22
23
  device_id: Optional[str] = None
@@ -29,6 +30,7 @@ class MLXRerankImpl(Reranker):
29
30
  instance = cls()
30
31
  instance._mlx_reranker = create_reranker(
31
32
  model_path=model_path,
33
+ # model_name=model_name, # FIXME: For MLX Reranker, model_name is not used
32
34
  tokenizer_path=tokenizer_file,
33
35
  device=device_id
34
36
  )
@@ -1,36 +1,89 @@
1
1
  from typing import List, Optional, Sequence, Union
2
+ import numpy as np
2
3
 
3
4
  from nexaai.common import PluginID
4
5
  from nexaai.rerank import Reranker, RerankConfig
6
+ from nexaai.binds import rerank_bind, common_bind
7
+ from nexaai.runtime import _ensure_runtime
5
8
 
6
9
 
7
10
  class PyBindRerankImpl(Reranker):
8
- def __init__(self):
9
- """Initialize PyBind Rerank implementation."""
11
+ def __init__(self, _handle_ptr):
12
+ """
13
+ Internal initializer
14
+
15
+ Args:
16
+ _handle_ptr: Capsule handle to the C++ reranker object
17
+ """
10
18
  super().__init__()
11
- # TODO: Add PyBind-specific initialization
19
+ self._handle = _handle_ptr
12
20
 
13
21
  @classmethod
14
22
  def _load_from(cls,
15
23
  model_path: str,
24
+ model_name: str = None,
16
25
  tokenizer_file: str = "tokenizer.json",
17
26
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
18
27
  device_id: Optional[str] = None
19
28
  ) -> 'PyBindRerankImpl':
20
- """Load reranker model from local path using PyBind backend."""
21
- # TODO: Implement PyBind reranker loading
22
- instance = cls()
23
- return instance
29
+ """
30
+ Load reranker model from local path using PyBind backend.
31
+
32
+ Args:
33
+ model_path: Path to the model file
34
+ model_name: Name of the model (optional)
35
+ tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
36
+ plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
37
+ device_id: Device ID to use for the model (optional)
38
+
39
+ Returns:
40
+ PyBindRerankImpl instance
41
+ """
42
+ _ensure_runtime()
43
+
44
+ # Convert enum to string for C++ binding
45
+ plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
46
+
47
+ # Create model config
48
+ model_config = common_bind.ModelConfig()
49
+
50
+ # Create reranker handle with new API signature
51
+ handle = rerank_bind.ml_reranker_create(
52
+ model_path,
53
+ model_name,
54
+ tokenizer_file,
55
+ model_config,
56
+ plugin_id_str,
57
+ device_id
58
+ )
59
+
60
+ return cls(handle)
24
61
 
25
62
  def eject(self):
26
- """Destroy the model and free resources."""
27
- # TODO: Implement PyBind reranker cleanup
28
- pass
63
+ """
64
+ Clean up resources and destroy the reranker
65
+ """
66
+ # Destructor of the handle will unload the model correctly
67
+ if hasattr(self, '_handle') and self._handle is not None:
68
+ del self._handle
69
+ self._handle = None
29
70
 
30
71
  def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
31
- """Load model from path."""
32
- # TODO: Implement PyBind reranker model loading
33
- raise NotImplementedError("PyBind reranker model loading not yet implemented")
72
+ """
73
+ Load model from path.
74
+
75
+ Note: This method is not typically used directly. Use _load_from instead.
76
+
77
+ Args:
78
+ model_path: Path to the model file
79
+ extra_data: Additional data (unused)
80
+
81
+ Returns:
82
+ True if successful
83
+ """
84
+ # This method is part of the BaseModel interface but typically not used
85
+ # directly for PyBind implementations since _load_from handles creation
86
+ raise NotImplementedError("Use _load_from class method to load models")
34
87
 
35
88
  def rerank(
36
89
  self,
@@ -38,6 +91,46 @@ class PyBindRerankImpl(Reranker):
38
91
  documents: Sequence[str],
39
92
  config: Optional[RerankConfig] = None,
40
93
  ) -> List[float]:
41
- """Rerank documents given a query."""
42
- # TODO: Implement PyBind reranking
43
- raise NotImplementedError("PyBind reranking not yet implemented")
94
+ """
95
+ Rerank documents given a query.
96
+
97
+ Args:
98
+ query: Query text as UTF-8 string
99
+ documents: List of document texts to rerank
100
+ config: Optional reranking configuration
101
+
102
+ Returns:
103
+ List of ranking scores (one per document)
104
+ """
105
+ if self._handle is None:
106
+ raise RuntimeError("Reranker handle is None. Model may have been ejected.")
107
+
108
+ # Use default config if not provided
109
+ if config is None:
110
+ config = RerankConfig()
111
+
112
+ # Create bind config
113
+ bind_config = rerank_bind.RerankConfig()
114
+ bind_config.batch_size = config.batch_size
115
+ bind_config.normalize = config.normalize
116
+ bind_config.normalize_method = config.normalize_method
117
+
118
+ # Convert documents to list if needed
119
+ documents_list = list(documents)
120
+
121
+ # Call the binding which returns a dict with scores and profile_data
122
+ result = rerank_bind.ml_reranker_rerank(
123
+ self._handle,
124
+ query,
125
+ documents_list,
126
+ bind_config
127
+ )
128
+
129
+ # Extract scores from result dict
130
+ scores_array = result.get("scores", np.array([]))
131
+
132
+ # Convert numpy array to list of floats
133
+ if isinstance(scores_array, np.ndarray):
134
+ return scores_array.tolist()
135
+ else:
136
+ return []
nexaai/tts.py CHANGED
@@ -45,7 +45,8 @@ class TTS(BaseModel):
45
45
  model_path: str,
46
46
  vocoder_path: str,
47
47
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
48
- device_id: Optional[str] = None
48
+ device_id: Optional[str] = None,
49
+ **kwargs
49
50
  ) -> 'TTS':
50
51
  """Load TTS model from local path, routing to appropriate implementation."""
51
52
  # Check plugin_id value for routing - handle both enum and string
@@ -1344,7 +1344,7 @@ def _download_model_if_needed(
1344
1344
  token: Union[bool, str, None] = None,
1345
1345
  is_mmproj: bool = False,
1346
1346
  **kwargs
1347
- ) -> str:
1347
+ ) -> tuple[str, Optional[str], Optional[str]]:
1348
1348
  """
1349
1349
  Helper function to download a model from HuggingFace if it doesn't exist locally.
1350
1350
 
@@ -1355,15 +1355,78 @@ def _download_model_if_needed(
1355
1355
  token: HuggingFace authentication token for private repositories
1356
1356
 
1357
1357
  Returns:
1358
- str: Local path to the model (either existing or downloaded)
1358
+ tuple[str, Optional[str], Optional[str]]: Tuple of (local_path, model_name, plugin_id)
1359
+ - local_path: Local path to the model (either existing or downloaded)
1360
+ - model_name: ModelName from nexa.manifest if available, None otherwise
1361
+ - plugin_id: PluginId from nexa.manifest if available, None otherwise
1359
1362
 
1360
1363
  Raises:
1361
1364
  RuntimeError: If download fails
1362
1365
  """
1366
+ # Helper function to extract model info from manifest
1367
+ def _extract_info_from_manifest(path: str) -> tuple[Optional[str], Optional[str], Optional[dict]]:
1368
+ """Extract ModelName, PluginId, and full manifest from nexa.manifest if it exists."""
1369
+ # If path is a file, check its parent directory for manifest
1370
+ if os.path.isfile(path):
1371
+ manifest_dir = os.path.dirname(path)
1372
+ else:
1373
+ manifest_dir = path
1374
+
1375
+ manifest_path = os.path.join(manifest_dir, 'nexa.manifest')
1376
+ if not os.path.exists(manifest_path):
1377
+ return None, None, None
1378
+
1379
+ try:
1380
+ with open(manifest_path, 'r', encoding='utf-8') as f:
1381
+ manifest = json.load(f)
1382
+ return manifest.get('ModelName'), manifest.get('PluginId'), manifest
1383
+ except (json.JSONDecodeError, IOError):
1384
+ return None, None, None
1385
+
1386
+ # Helper function to get a model file path from manifest
1387
+ # Note: Tnis is for NPU only, because when downloading, it is a directory; when passing local path to inference, it needs to be a file.
1388
+ def _get_model_file_from_manifest(manifest: dict, base_dir: str) -> Optional[str]:
1389
+ """Extract a model file path from manifest's ModelFile section."""
1390
+ if not manifest or 'ModelFile' not in manifest:
1391
+ return None
1392
+
1393
+ model_files = manifest['ModelFile']
1394
+ # Find the first valid model file (skip N/A entries and metadata files)
1395
+ for key, file_info in model_files.items():
1396
+ if key == 'N/A':
1397
+ continue
1398
+ if isinstance(file_info, dict) and 'Name' in file_info:
1399
+ file_name = file_info['Name']
1400
+ # Skip common non-model files
1401
+ if file_name and not file_name.startswith('.') and file_name.endswith('.nexa'):
1402
+ file_path = os.path.join(base_dir, file_name)
1403
+ if os.path.exists(file_path):
1404
+ return file_path
1405
+
1406
+ # If no .nexa files found, try ExtraFiles for .nexa files
1407
+ if 'ExtraFiles' in manifest:
1408
+ for file_info in manifest['ExtraFiles']:
1409
+ if isinstance(file_info, dict) and 'Name' in file_info:
1410
+ file_name = file_info['Name']
1411
+ if file_name and file_name.endswith('.nexa') and not file_name.startswith('.cache'):
1412
+ file_path = os.path.join(base_dir, file_name)
1413
+ if os.path.exists(file_path):
1414
+ return file_path
1415
+
1416
+ return None
1417
+
1363
1418
  # Check if model_path exists locally (file or directory)
1364
1419
  if os.path.exists(model_path):
1365
- # Local path exists, return as-is
1366
- return model_path
1420
+ # Local path exists, try to extract model info
1421
+ model_name, plugin_id, manifest = _extract_info_from_manifest(model_path)
1422
+
1423
+ # If PluginId is "npu" and path is a directory, convert to file path
1424
+ if plugin_id == "npu" and os.path.isdir(model_path):
1425
+ model_file_path = _get_model_file_from_manifest(manifest, model_path)
1426
+ if model_file_path:
1427
+ model_path = model_file_path
1428
+
1429
+ return model_path, model_name, plugin_id
1367
1430
 
1368
1431
  # Model path doesn't exist locally, try to download from HuggingFace
1369
1432
  try:
@@ -1383,7 +1446,16 @@ def _download_model_if_needed(
1383
1446
  **kwargs
1384
1447
  )
1385
1448
 
1386
- return downloaded_path
1449
+ # Extract model info from the downloaded manifest
1450
+ model_name, plugin_id, manifest = _extract_info_from_manifest(downloaded_path)
1451
+
1452
+ # If PluginId is "npu" and path is a directory, convert to file path
1453
+ if plugin_id == "npu" and os.path.isdir(downloaded_path):
1454
+ model_file_path = _get_model_file_from_manifest(manifest, downloaded_path)
1455
+ if model_file_path:
1456
+ downloaded_path = model_file_path
1457
+
1458
+ return downloaded_path, model_name, plugin_id
1387
1459
 
1388
1460
  except Exception as e:
1389
1461
  # Only handle download-related errors
@@ -1452,7 +1524,7 @@ def auto_download_model(func: Callable) -> Callable:
1452
1524
  # Download name_or_path if needed
1453
1525
  if name_or_path is not None:
1454
1526
  try:
1455
- downloaded_name_path = _download_model_if_needed(
1527
+ downloaded_name_path, model_name, plugin_id = _download_model_if_needed(
1456
1528
  name_or_path, 'name_or_path', progress_callback, token, **kwargs
1457
1529
  )
1458
1530
 
@@ -1463,6 +1535,10 @@ def auto_download_model(func: Callable) -> Callable:
1463
1535
  args = tuple(args_list)
1464
1536
  else:
1465
1537
  kwargs['name_or_path'] = downloaded_name_path
1538
+
1539
+ # Add model_name to kwargs if it exists and not already set
1540
+ if model_name is not None and 'model_name' not in kwargs:
1541
+ kwargs['model_name'] = model_name
1466
1542
 
1467
1543
  except Exception as e:
1468
1544
  raise e # Re-raise the error from _download_model_if_needed
@@ -1470,7 +1546,7 @@ def auto_download_model(func: Callable) -> Callable:
1470
1546
  # Download mmproj_path if needed
1471
1547
  if mmproj_path is not None:
1472
1548
  try:
1473
- downloaded_mmproj_path = _download_model_if_needed(
1549
+ downloaded_mmproj_path, _, _ = _download_model_if_needed(
1474
1550
  mmproj_path, 'mmproj_path', progress_callback, token, is_mmproj=True, **kwargs
1475
1551
  )
1476
1552
 
@@ -1482,5 +1558,5 @@ def auto_download_model(func: Callable) -> Callable:
1482
1558
 
1483
1559
  # Call original function with updated paths (outside try-catch to let model creation errors bubble up)
1484
1560
  return func(*args, **kwargs)
1485
-
1561
+
1486
1562
  return wrapper
nexaai/vlm.py CHANGED
@@ -22,7 +22,8 @@ class VLM(BaseModel):
22
22
  model_name: Optional[str] = None,
23
23
  m_cfg: ModelConfig = ModelConfig(),
24
24
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
25
- device_id: Optional[str] = None
25
+ device_id: Optional[str] = None,
26
+ **kwargs
26
27
  ) -> 'VLM':
27
28
  """Load VLM model from local path, routing to appropriate implementation.
28
29
 
@@ -73,7 +73,7 @@ class PyBindVLMImpl(VLM):
73
73
  handle = vlm_bind.create_vlm(
74
74
  model_path=local_path,
75
75
  mmproj_path=mmproj_path,
76
- # model_name=model_name, # TODO: enable model_name in pybind later
76
+ model_name=model_name,
77
77
  model_config=config,
78
78
  plugin_id=plugin_id_str,
79
79
  device_id=device_id
@@ -109,11 +109,10 @@ class PyBindVLMImpl(VLM):
109
109
  t = c["type"]
110
110
  if t == "text":
111
111
  blocks.append({"type": "text", "text": c.get("text","") or ""})
112
- elif t == "image":
113
- # Pass through the original structure - let vlm-bind.cpp handle field extraction
114
- blocks.append(c)
115
112
  else:
116
- raise ValueError(f"Unsupported content type: {t}. Use 'text' or 'image' to match the golden reference in vlm.cpp")
113
+ # Pass through the original structure for image, audio, and any other types
114
+ # Let vlm-bind.cpp handle field extraction (text/url/path)
115
+ blocks.append(c)
117
116
 
118
117
  payload.append({"role": role, "content": blocks})
119
118
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexaai
3
- Version: 1.0.21rc4
3
+ Version: 1.0.21rc5
4
4
  Summary: Python bindings for NexaSDK C-lib backend
5
5
  Author-email: "Nexa AI, Inc." <dev@nexa.ai>
6
6
  Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -14,7 +14,6 @@ Requires-Python: >=3.7
14
14
  Description-Content-Type: text/markdown
15
15
  Requires-Dist: huggingface_hub
16
16
  Requires-Dist: tqdm
17
- Requires-Dist: hf_xet
18
17
  Requires-Dist: numpy
19
18
  Requires-Dist: httpx
20
19
  Provides-Extra: mlx
@@ -1,28 +1,29 @@
1
1
  nexaai/__init__.py,sha256=gOd7sNsqEESopw_24xgnOSkIRENrk4Fa-RMtmVv62eA,2421
2
- nexaai/_stub.cpython-310-darwin.so,sha256=S7HBEXSqi5I45poF3LYs2TG074WA8bKc1x8HwuoZyWs,66768
3
- nexaai/_version.py,sha256=96J46jiKFpZK_xMMQViecm1z3kZktIDNNqQCXx0kYp0,143
4
- nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
2
+ nexaai/_stub.cpython-310-darwin.so,sha256=tKGwBF2vBcfDMxLzlwD0mCcn3NhgwqqkETX_6ZQhxEY,66768
3
+ nexaai/_version.py,sha256=xjLVIdWRLT2AZrjSVMMaQ9riEM81_VB5EUm6fx_kLHU,143
4
+ nexaai/asr.py,sha256=BglyvLM_SiLbQwKhihOuiyX2m1I9-Kq_gYcE7vpjBPY,2083
5
5
  nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
6
- nexaai/common.py,sha256=Y0NJNLTi4Nq4x1WL6PQsSvGUto0eGmWhjpsC6jcekfA,3444
7
- nexaai/cv.py,sha256=RHCDo8gvBH8BkGZx7qVyp-OKxqi7E1GG9XzyaXehCNA,3273
8
- nexaai/embedder.py,sha256=Cw0tSHkPgd-RI62afCqQAcTHMnQhaI2CvfTMO-1JKOg,2452
9
- nexaai/image_gen.py,sha256=0C_5Tjj4BYmxLbmMmvwajp-yy2mmEEOKwBFnDQNPzx4,4356
10
- nexaai/llm.py,sha256=S1o_k2VQoF5w2wO25f142OO1R75TP89Ii69VZv8pIGo,3567
6
+ nexaai/common.py,sha256=MRWZ6a7pnci_OUHxZRm3YqgKLAtZFD7b88STYDfeIF8,3460
7
+ nexaai/cv.py,sha256=gpE3F__6bjh8OQKNJZs-QrBuCxqMj2eH-u6HR90vGZE,3302
8
+ nexaai/embedder.py,sha256=lXOT16PEvd_hT23d77dZH38VHNOAk-3JvoOUdQTEaGI,2552
9
+ nexaai/image_gen.py,sha256=MkGw1HXqqv8cJzbiGERNPKFXfq9vMOlvuq0pgekXw68,4385
10
+ nexaai/llm.py,sha256=-agVJuj0FOaDvDiT-fFSOpoyVt-MpNudBucsod3Vp1M,3673
11
11
  nexaai/log.py,sha256=Kwo2CIfWN6iP4M4F5EUIV8KIO5hAsvz6HZAaOwJ27Og,2628
12
- nexaai/rerank.py,sha256=vWaBucoQ1wz-2iYnZqyFIcEjm-4Xcs1KDbFN5X8zzDQ,1872
12
+ nexaai/rerank.py,sha256=rFKm1Y_ou__0lU82OTy4j_AYIGVBGfID0gzuZ6zXYsM,1968
13
13
  nexaai/runtime.py,sha256=JvllhlNPgYGLbgGyX2yNvmGzT0lZ5XbvTvEo8sZG_Ho,2067
14
14
  nexaai/runtime_error.py,sha256=sO87LyCA0qzm0hVqBrmG2FDzGQH865EMbTMop2OfZto,779
15
- nexaai/tts.py,sha256=ZnBpWUxIfHhh7KfEjddtH7hHOTa91zg7ogGLakMIALo,2167
16
- nexaai/vlm.py,sha256=MreJ_S5-C0KH3haFuJwHqVtL099MrrmBQ23vK1PINCc,4771
15
+ nexaai/tts.py,sha256=jvgDZIyo47NBDny6z74IQT2SDDVo7Mpp-QZwl6YxARU,2196
16
+ nexaai/vlm.py,sha256=LUrd1_SGHOsYpWyUymX93oEIsNJv7XzHIHo4hBZOhQA,4800
17
17
  nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
19
19
  nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XED9YpA,1516
20
- nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
20
+ nexaai/binds/__init__.py,sha256=tI15Ghxgp_PZAdob7op8_K5Ia8DNiTvXiYpFZyB4oN0,131
21
21
  nexaai/binds/common_bind.cpython-310-darwin.so,sha256=zxJuD0nSV--VZKxBfWZUavU7_bHj_JTi0FhkjvG4VJw,235264
22
- nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=tPa0c0Dv_GiW66fgmAGWGCHXRGNApznqoQS0eQx9GFM,202064
23
- nexaai/binds/libnexa_bridge.dylib,sha256=0nzFJHOy_JI55ALwJDo2cmNAFJqcEkwlLF40ESPFaJ4,272648
24
- nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=TAWfa1Hzq00TjtC1xVsiAeLp6hv2LrL5afDz4omUghc,182784
25
- nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=VpTl0U8YmiYTyp5vNlmLRxiwGD8-4am8nBvdWYY1d-I,182704
22
+ nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=nn3kINQUNyXIzmcU1olLg8RlkZYyIs_RtIlCk6OH1ds,202064
23
+ nexaai/binds/libnexa_bridge.dylib,sha256=O5cI95LKiety_NjJT21lOz5KlXxUZSY5kqgXzPQXx18,272648
24
+ nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=YShsXbe_n2N05joMmTnZXaXh9gM9LGdcmuWUxUc5plI,182784
25
+ nexaai/binds/rerank_bind.cpython-310-darwin.so,sha256=GJmffOLo9A48S_pMG2CtHyhbamtt97QikSSCXk3LqiM,183920
26
+ nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=JEoQbyOkMXVK4WyBCqKBHbyXHtTiTWT18UkZMFvMh4k,182704
26
27
  nexaai/binds/cpu_gpu/libggml-base.dylib,sha256=9urbatWTB4QJ6uariOYlB_ZN1FFci5GaD8OfGZ9oPSQ,650736
27
28
  nexaai/binds/cpu_gpu/libggml-cpu.so,sha256=qiYxbTe4Nt7n36zJVvq3zovgSZEmrN2is6gzTern7UI,677728
28
29
  nexaai/binds/cpu_gpu/libggml-metal.so,sha256=zfaX7rIBYQazH2lf-vza007BMhPTK1ASd2T0HLLIA4E,673104
@@ -202,14 +203,14 @@ nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
202
203
  nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
203
204
  nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
204
205
  nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
205
- nexaai/embedder_impl/mlx_embedder_impl.py,sha256=dTjOC1VJ9ypIgCvkK_jKNSWpswbg132rDcTzWcL5oFA,4482
206
- nexaai/embedder_impl/pybind_embedder_impl.py,sha256=9gsHuSbF64IZH9ugqv4-GTUuRpy-FJNUb0cww2QR3uA,3575
206
+ nexaai/embedder_impl/mlx_embedder_impl.py,sha256=pFPraUAjm9EVvVbwIp1cjbtXUysF5pqxEcK2CAFvcDw,4639
207
+ nexaai/embedder_impl/pybind_embedder_impl.py,sha256=lFpf0wI2d7kfO2GUyUuUS1U2L_PyZMJVGmAvF8EuQ0g,3653
207
208
  nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
208
209
  nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=BuDkksvXyb4J02GsdnbGAmYckfUU0Eah6BimoMD3QqY,11219
209
210
  nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=ms34VYoD5AxZFG6cIG0QAJDjCtfphaZ1bHzKzey1xF8,3692
210
211
  nexaai/llm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
211
- nexaai/llm_impl/mlx_llm_impl.py,sha256=4v7jUFzHfE7zw2uViekGQDaTROz8A6oaW31Z3iVe6tg,11157
212
- nexaai/llm_impl/pybind_llm_impl.py,sha256=aooqkcXZWhCo07wbSafGgBrA3WnijtnUADShjjgFsBQ,8051
212
+ nexaai/llm_impl/mlx_llm_impl.py,sha256=dPtaEribluHZZY_f9M114glcQhtDEckukw4Sfd5zJos,11296
213
+ nexaai/llm_impl/pybind_llm_impl.py,sha256=XXnUuRZMr9rrEL1vM6VTwsgs0KQnKn4C3TyrHE46uw8,8139
213
214
  nexaai/mlx_backend/ml.py,sha256=DKXVOAfh8cg7KTKljh7jpcPwfQFNigc6uv_ZXF6lse8,23977
214
215
  nexaai/mlx_backend/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
215
216
  nexaai/mlx_backend/asr/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nujxkSM,451
@@ -553,21 +554,21 @@ nexaai/mlx_backend/vlm/modeling/trainer/lora.py,sha256=tGjvenjEQ8_1Az8Nz3smz5Mgv
553
554
  nexaai/mlx_backend/vlm/modeling/trainer/trainer.py,sha256=h16SaHt76JzFruXuidgXDx7_2evx4L0SecvzqLmhyZw,9081
554
555
  nexaai/mlx_backend/vlm/modeling/trainer/utils.py,sha256=29oHf_7946YeJKP_-Dt-NPeN4xJq8Fj7Yv4jZKO9RWA,4909
555
556
  nexaai/rerank_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
556
- nexaai/rerank_impl/mlx_rerank_impl.py,sha256=h37PKSIRBY8mwzVeLeP4ix9ui3waIsg4gorzelYLJbM,3243
557
- nexaai/rerank_impl/pybind_rerank_impl.py,sha256=CtwkG7YrW58GPMDERJSnISGTVCXWNju5__R2W837t7c,1513
557
+ nexaai/rerank_impl/mlx_rerank_impl.py,sha256=3nbqCdzyAugc4P_6K9mowEgy4LFdfzhy7GUvn9GMpSE,3377
558
+ nexaai/rerank_impl/pybind_rerank_impl.py,sha256=tmzrpRYCCV3ATxbE9G1Io6SUtgYPO8BFe48nTae6_xw,4490
558
559
  nexaai/tts_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
559
560
  nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJi8,3101
560
561
  nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
561
562
  nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
562
563
  nexaai/utils/manifest_utils.py,sha256=OOp_BmFWH1ZHMYkS2VGAby5Rpm4f4GLCRBJEBYm-kys,21489
563
- nexaai/utils/model_manager.py,sha256=CsRflxY329DgDeZxl_PGFKis7MQW7XROkrvZUCPEpvo,62022
564
+ nexaai/utils/model_manager.py,sha256=OnL87zCPn3cBcScCKo-bHnBUpr24-Po293QC6Bwgx1Q,66112
564
565
  nexaai/utils/model_types.py,sha256=ONWjjo8CFPdhxki6qo7MXnSZaEzjBcxa_Kkf_y5NXus,1483
565
566
  nexaai/utils/progress_tracker.py,sha256=jdUqtmPqyhwC9uSKvQcJEYETwSt-OhP4oitdJ94614o,15394
566
567
  nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oSxFwiU,7825
567
568
  nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
568
569
  nexaai/vlm_impl/mlx_vlm_impl.py,sha256=sgHqnX5OCSGLccCnTuRiktIbqThNn3AAIvYE2_Dy4TI,10833
569
- nexaai/vlm_impl/pybind_vlm_impl.py,sha256=MDbreWSqugakXU_PqH6mPoCxjKEEbYfQIco_NDck8_s,9905
570
- nexaai-1.0.21rc4.dist-info/METADATA,sha256=GHbnStWeUZeE2wbSl1Omhzxe6i2-LrQRH03jG3i8Yco,1209
571
- nexaai-1.0.21rc4.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
572
- nexaai-1.0.21rc4.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
573
- nexaai-1.0.21rc4.dist-info/RECORD,,
570
+ nexaai/vlm_impl/pybind_vlm_impl.py,sha256=stJKHdhYhBuWUQkky-nHgCv625qDB_1geI3v5BLNGpM,9765
571
+ nexaai-1.0.21rc5.dist-info/METADATA,sha256=QJ9gbIXVYJHi45-NWVh9g5XxqImymoirQot4jMqi2tw,1187
572
+ nexaai-1.0.21rc5.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
573
+ nexaai-1.0.21rc5.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
574
+ nexaai-1.0.21rc5.dist-info/RECORD,,