nexaai 1.0.4rc15__cp310-cp310-macosx_14_0_universal2.whl → 1.0.5__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (40) hide show
  1. nexaai/__init__.py +6 -1
  2. nexaai/_stub.cpython-310-darwin.so +0 -0
  3. nexaai/_version.py +1 -1
  4. nexaai/asr.py +7 -3
  5. nexaai/asr_impl/mlx_asr_impl.py +3 -2
  6. nexaai/asr_impl/pybind_asr_impl.py +3 -2
  7. nexaai/binds/libcrypto.dylib +0 -0
  8. nexaai/binds/libnexa_bridge.dylib +0 -0
  9. nexaai/binds/libssl.dylib +0 -0
  10. nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
  11. nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
  12. nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
  13. nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
  14. nexaai/common.py +49 -7
  15. nexaai/cv.py +7 -3
  16. nexaai/cv_impl/mlx_cv_impl.py +3 -2
  17. nexaai/cv_impl/pybind_cv_impl.py +3 -2
  18. nexaai/embedder.py +7 -3
  19. nexaai/embedder_impl/mlx_embedder_impl.py +3 -2
  20. nexaai/embedder_impl/pybind_embedder_impl.py +6 -3
  21. nexaai/image_gen.py +6 -2
  22. nexaai/image_gen_impl/mlx_image_gen_impl.py +3 -2
  23. nexaai/image_gen_impl/pybind_image_gen_impl.py +3 -2
  24. nexaai/llm.py +13 -6
  25. nexaai/llm_impl/mlx_llm_impl.py +26 -6
  26. nexaai/llm_impl/pybind_llm_impl.py +17 -6
  27. nexaai/mlx_backend/llm/interface.py +12 -12
  28. nexaai/rerank.py +7 -3
  29. nexaai/rerank_impl/mlx_rerank_impl.py +3 -2
  30. nexaai/rerank_impl/pybind_rerank_impl.py +3 -2
  31. nexaai/tts.py +7 -3
  32. nexaai/tts_impl/mlx_tts_impl.py +3 -2
  33. nexaai/tts_impl/pybind_tts_impl.py +3 -2
  34. nexaai/vlm.py +11 -4
  35. nexaai/vlm_impl/mlx_vlm_impl.py +10 -3
  36. nexaai/vlm_impl/pybind_vlm_impl.py +15 -4
  37. {nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/METADATA +13 -9
  38. {nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/RECORD +40 -38
  39. {nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/WHEEL +0 -0
  40. {nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/top_level.txt +0 -0
nexaai/__init__.py CHANGED
@@ -19,7 +19,10 @@ except ImportError:
19
19
  __version__ = "0.0.1"
20
20
 
21
21
  # Import common configuration classes first (no external dependencies)
22
- from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig
22
+ from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig, PluginID
23
+
24
+ # Create alias for PluginID to be accessible as plugin_id
25
+ plugin_id = PluginID
23
26
 
24
27
  # Import new feature classes (no external dependencies in base classes)
25
28
  from .llm import LLM
@@ -40,6 +43,8 @@ __all__ = [
40
43
  "ChatMessage",
41
44
  "SamplerConfig",
42
45
  "EmbeddingConfig",
46
+ "PluginID",
47
+ "plugin_id",
43
48
 
44
49
  "LLM",
45
50
  "Embedder",
Binary file
nexaai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is generated by CMake from _version.py.in
2
2
  # Do not modify this file manually - it will be overwritten
3
3
 
4
- __version__ = "1.0.4-rc15"
4
+ __version__ = "1.0.5"
nexaai/asr.py CHANGED
@@ -1,8 +1,9 @@
1
- from typing import List, Optional, Sequence, Tuple
1
+ from typing import List, Optional, Sequence, Tuple, Union
2
2
  from abc import abstractmethod
3
3
  from dataclasses import dataclass
4
4
 
5
5
  from nexaai.base import BaseModel
6
+ from nexaai.common import PluginID
6
7
 
7
8
 
8
9
  @dataclass
@@ -33,11 +34,14 @@ class ASR(BaseModel):
33
34
  model_path: str,
34
35
  tokenizer_path: Optional[str] = None,
35
36
  language: Optional[str] = None,
36
- plugin_id: str = "llama_cpp",
37
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
37
38
  device_id: Optional[str] = None
38
39
  ) -> 'ASR':
39
40
  """Load ASR model from local path, routing to appropriate implementation."""
40
- if plugin_id == "mlx":
41
+ # Check plugin_id value for routing - handle both enum and string
42
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
43
+
44
+ if plugin_value == "mlx":
41
45
  from nexaai.asr_impl.mlx_asr_impl import MLXASRImpl
42
46
  return MLXASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
43
47
  else:
@@ -1,7 +1,8 @@
1
1
  # Note: This code is generated by Cursor, not tested yet.
2
2
 
3
- from typing import List, Optional
3
+ from typing import List, Optional, Union
4
4
 
5
+ from nexaai.common import PluginID
5
6
  from nexaai.asr import ASR, ASRConfig, ASRResult
6
7
  from nexaai.mlx_backend.asr.interface import MlxAsr as MLXASRInterface
7
8
  from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -18,7 +19,7 @@ class MLXASRImpl(ASR):
18
19
  model_path: str,
19
20
  tokenizer_path: Optional[str] = None,
20
21
  language: Optional[str] = None,
21
- plugin_id: str = "mlx",
22
+ plugin_id: Union[PluginID, str] = PluginID.MLX,
22
23
  device_id: Optional[str] = None
23
24
  ) -> 'MLXASRImpl':
24
25
  """Load ASR model from local path using MLX backend."""
@@ -1,5 +1,6 @@
1
- from typing import List, Optional
1
+ from typing import List, Optional, Union
2
2
 
3
+ from nexaai.common import PluginID
3
4
  from nexaai.asr import ASR, ASRConfig, ASRResult
4
5
 
5
6
 
@@ -14,7 +15,7 @@ class PyBindASRImpl(ASR):
14
15
  model_path: str,
15
16
  tokenizer_path: Optional[str] = None,
16
17
  language: Optional[str] = None,
17
- plugin_id: str = "llama_cpp",
18
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
18
19
  device_id: Optional[str] = None
19
20
  ) -> 'PyBindASRImpl':
20
21
  """Load ASR model from local path using PyBind backend."""
Binary file
Binary file
Binary file
Binary file
Binary file
nexaai/common.py CHANGED
@@ -1,5 +1,12 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import TypedDict, Literal, Optional, List
3
+ from enum import Enum
4
+
5
+
6
+ class PluginID(str, Enum):
7
+ """Enum for plugin identifiers."""
8
+ MLX = "mlx"
9
+ LLAMA_CPP = "llama_cpp"
3
10
 
4
11
 
5
12
  class ChatMessage(TypedDict):
@@ -52,10 +59,45 @@ class ModelConfig:
52
59
 
53
60
  @dataclass(frozen=True) # Read-only
54
61
  class ProfilingData:
55
- start_time: int
56
- end_time: int
57
- prompt_start_time: int = None
58
- prompt_end_time: int = None
59
- decode_start_time: int = None
60
- decode_ent_time: int = None
61
- first_token_time: int = None
62
+ """Profiling data structure for LLM/VLM performance metrics."""
63
+ ttft: int = 0 # Time to first token (us)
64
+ prompt_time: int = 0 # Prompt processing time (us)
65
+ decode_time: int = 0 # Token generation time (us)
66
+ prompt_tokens: int = 0 # Number of prompt tokens
67
+ generated_tokens: int = 0 # Number of generated tokens
68
+ audio_duration: int = 0 # Audio duration (us)
69
+ prefill_speed: float = 0.0 # Prefill speed (tokens/sec)
70
+ decoding_speed: float = 0.0 # Decoding speed (tokens/sec)
71
+ real_time_factor: float = 0.0 # Real-Time Factor (RTF)
72
+ stop_reason: str = "" # Stop reason: "eos", "length", "user", "stop_sequence"
73
+
74
+ @classmethod
75
+ def from_dict(cls, data: dict) -> "ProfilingData":
76
+ """Create ProfilingData from dictionary."""
77
+ return cls(
78
+ ttft=data.get("ttft", 0),
79
+ prompt_time=data.get("prompt_time", 0),
80
+ decode_time=data.get("decode_time", 0),
81
+ prompt_tokens=data.get("prompt_tokens", 0),
82
+ generated_tokens=data.get("generated_tokens", 0),
83
+ audio_duration=data.get("audio_duration", 0),
84
+ prefill_speed=data.get("prefill_speed", 0.0),
85
+ decoding_speed=data.get("decoding_speed", 0.0),
86
+ real_time_factor=data.get("real_time_factor", 0.0),
87
+ stop_reason=data.get("stop_reason", "")
88
+ )
89
+
90
+ def to_dict(self) -> dict:
91
+ """Convert to dictionary."""
92
+ return {
93
+ "ttft": self.ttft,
94
+ "prompt_time": self.prompt_time,
95
+ "decode_time": self.decode_time,
96
+ "prompt_tokens": self.prompt_tokens,
97
+ "generated_tokens": self.generated_tokens,
98
+ "audio_duration": self.audio_duration,
99
+ "prefill_speed": self.prefill_speed,
100
+ "decoding_speed": self.decoding_speed,
101
+ "real_time_factor": self.real_time_factor,
102
+ "stop_reason": self.stop_reason
103
+ }
nexaai/cv.py CHANGED
@@ -1,8 +1,9 @@
1
- from typing import List, Optional
1
+ from typing import List, Optional, Union
2
2
  from abc import abstractmethod
3
3
  from dataclasses import dataclass
4
4
 
5
5
  from nexaai.base import BaseModel
6
+ from nexaai.common import PluginID
6
7
 
7
8
 
8
9
  @dataclass
@@ -71,11 +72,14 @@ class CVModel(BaseModel):
71
72
  def _load_from(cls,
72
73
  _: str, # TODO: remove this argument, this is a hack to make api design happy
73
74
  config: CVModelConfig,
74
- plugin_id: str = "llama_cpp",
75
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
75
76
  device_id: Optional[str] = None
76
77
  ) -> 'CVModel':
77
78
  """Load CV model from configuration, routing to appropriate implementation."""
78
- if plugin_id == "mlx":
79
+ # Check plugin_id value for routing - handle both enum and string
80
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
81
+
82
+ if plugin_value == "mlx":
79
83
  from nexaai.cv_impl.mlx_cv_impl import MLXCVImpl
80
84
  return MLXCVImpl._load_from(config, plugin_id, device_id)
81
85
  else:
@@ -1,8 +1,9 @@
1
1
  # Note: This code is generated by Cursor, not tested yet.
2
2
 
3
- from typing import Optional
3
+ from typing import Optional, Union
4
4
  import os
5
5
 
6
+ from nexaai.common import PluginID
6
7
  from nexaai.cv import CVModel, CVModelConfig, CVResults
7
8
  from nexaai.mlx_backend.cv.interface import CVModel as MLXCVInterface, create_cv_model
8
9
 
@@ -16,7 +17,7 @@ class MLXCVImpl(CVModel):
16
17
  @classmethod
17
18
  def _load_from(cls,
18
19
  config: CVModelConfig,
19
- plugin_id: str = "mlx",
20
+ plugin_id: Union[PluginID, str] = PluginID.MLX,
20
21
  device_id: Optional[str] = None
21
22
  ) -> 'MLXCVImpl':
22
23
  """Load CV model from configuration using MLX backend."""
@@ -1,5 +1,6 @@
1
- from typing import Optional
1
+ from typing import Optional, Union
2
2
 
3
+ from nexaai.common import PluginID
3
4
  from nexaai.cv import CVModel, CVModelConfig, CVResults
4
5
 
5
6
 
@@ -12,7 +13,7 @@ class PyBindCVImpl(CVModel):
12
13
  @classmethod
13
14
  def _load_from(cls,
14
15
  config: CVModelConfig,
15
- plugin_id: str = "llama_cpp",
16
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
16
17
  device_id: Optional[str] = None
17
18
  ) -> 'PyBindCVImpl':
18
19
  """Load CV model from configuration using PyBind backend."""
nexaai/embedder.py CHANGED
@@ -4,6 +4,7 @@ from abc import abstractmethod
4
4
  import numpy as np
5
5
 
6
6
  from nexaai.base import BaseModel
7
+ from nexaai.common import PluginID
7
8
 
8
9
 
9
10
  @dataclass
@@ -21,19 +22,22 @@ class Embedder(BaseModel):
21
22
  pass
22
23
 
23
24
  @classmethod
24
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "llama_cpp"):
25
+ def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
25
26
  """
26
27
  Load an embedder from model files, routing to appropriate implementation.
27
28
 
28
29
  Args:
29
30
  model_path: Path to the model file
30
31
  tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
31
- plugin_id: Plugin ID to use for the model (default: "llama_cpp")
32
+ plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
32
33
 
33
34
  Returns:
34
35
  Embedder instance
35
36
  """
36
- if plugin_id == "mlx":
37
+ # Check plugin_id value for routing - handle both enum and string
38
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
39
+
40
+ if plugin_value == "mlx":
37
41
  from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
38
42
  return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
39
43
  else:
@@ -1,6 +1,7 @@
1
1
  from typing import List, Union
2
2
  import numpy as np
3
3
 
4
+ from nexaai.common import PluginID
4
5
  from nexaai.embedder import Embedder, EmbeddingConfig
5
6
  from nexaai.mlx_backend.embedding.interface import Embedder as MLXEmbedderInterface
6
7
  from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -13,14 +14,14 @@ class MLXEmbedderImpl(Embedder):
13
14
  self._mlx_embedder = None
14
15
 
15
16
  @classmethod
16
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "mlx"):
17
+ def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
17
18
  """
18
19
  Load an embedder from model files using MLX backend.
19
20
 
20
21
  Args:
21
22
  model_path: Path to the model file
22
23
  tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
23
- plugin_id: Plugin ID to use for the model (default: "mlx")
24
+ plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
24
25
 
25
26
  Returns:
26
27
  MLXEmbedderImpl instance
@@ -1,6 +1,7 @@
1
1
  from typing import List, Union
2
2
  import numpy as np
3
3
 
4
+ from nexaai.common import PluginID
4
5
  from nexaai.embedder import Embedder, EmbeddingConfig
5
6
  from nexaai.binds import embedder_bind
6
7
  from nexaai.runtime import _ensure_runtime
@@ -15,20 +16,22 @@ class PyBindEmbedderImpl(Embedder):
15
16
  self._handle = _handle_ptr
16
17
 
17
18
  @classmethod
18
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "llama_cpp"):
19
+ def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
19
20
  """
20
21
  Load an embedder from model files
21
22
 
22
23
  Args:
23
24
  model_path: Path to the model file
24
25
  tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
25
- plugin_id: Plugin ID to use for the model (default: "llama_cpp")
26
+ plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
26
27
 
27
28
  Returns:
28
29
  PyBindEmbedderImpl instance
29
30
  """
30
31
  _ensure_runtime()
31
- handle = embedder_bind.ml_embedder_create(model_path, tokenizer_file, plugin_id)
32
+ # Convert enum to string for C++ binding
33
+ plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
34
+ handle = embedder_bind.ml_embedder_create(model_path, tokenizer_file, plugin_id_str)
32
35
  return cls(handle)
33
36
 
34
37
  def eject(self):
nexaai/image_gen.py CHANGED
@@ -3,6 +3,7 @@ from abc import abstractmethod
3
3
  from dataclasses import dataclass
4
4
 
5
5
  from nexaai.base import BaseModel
6
+ from nexaai.common import PluginID
6
7
 
7
8
 
8
9
  @dataclass
@@ -67,13 +68,16 @@ class ImageGen(BaseModel):
67
68
  def _load_from(cls,
68
69
  model_path: str,
69
70
  scheduler_config_path: str = "",
70
- plugin_id: str = "llama_cpp",
71
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
71
72
  device_id: Optional[str] = None,
72
73
  float16: bool = True,
73
74
  quantize: bool = False
74
75
  ) -> 'ImageGen':
75
76
  """Load image generation model from local path, routing to appropriate implementation."""
76
- if plugin_id == "mlx":
77
+ # Check plugin_id value for routing - handle both enum and string
78
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
79
+
80
+ if plugin_value == "mlx":
77
81
  from nexaai.image_gen_impl.mlx_image_gen_impl import MLXImageGenImpl
78
82
  return MLXImageGenImpl._load_from(model_path, scheduler_config_path, plugin_id, device_id, float16, quantize)
79
83
  else:
@@ -1,8 +1,9 @@
1
1
  # Note: This code is generated by Cursor, not tested yet.
2
2
 
3
- from typing import List, Optional
3
+ from typing import List, Optional, Union
4
4
  import os
5
5
 
6
+ from nexaai.common import PluginID
6
7
  from nexaai.image_gen import ImageGen, ImageGenerationConfig, ImageSamplerConfig, SchedulerConfig, Image
7
8
  from nexaai.mlx_backend.sd.interface import ImageGen as MLXImageGenInterface
8
9
 
@@ -17,7 +18,7 @@ class MLXImageGenImpl(ImageGen):
17
18
  def _load_from(cls,
18
19
  model_path: str,
19
20
  scheduler_config_path: str = "",
20
- plugin_id: str = "mlx",
21
+ plugin_id: Union[PluginID, str] = PluginID.MLX,
21
22
  device_id: Optional[str] = None,
22
23
  float16: bool = True,
23
24
  quantize: bool = False
@@ -1,5 +1,6 @@
1
- from typing import List, Optional
1
+ from typing import List, Optional, Union
2
2
 
3
+ from nexaai.common import PluginID
3
4
  from nexaai.image_gen import ImageGen, ImageGenerationConfig, ImageSamplerConfig, SchedulerConfig, Image
4
5
 
5
6
 
@@ -13,7 +14,7 @@ class PyBindImageGenImpl(ImageGen):
13
14
  def _load_from(cls,
14
15
  model_path: str,
15
16
  scheduler_config_path: str = "",
16
- plugin_id: str = "llama_cpp",
17
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
17
18
  device_id: Optional[str] = None,
18
19
  float16: bool = True,
19
20
  quantize: bool = False
nexaai/llm.py CHANGED
@@ -1,10 +1,10 @@
1
- from typing import Generator, Optional
1
+ from typing import Generator, Optional, Union
2
2
  from abc import abstractmethod
3
3
  import queue
4
4
  import threading
5
5
 
6
- from nexaai.common import ModelConfig, GenerationConfig, ChatMessage
7
- from nexaai.base import BaseModel
6
+ from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
7
+ from nexaai.base import BaseModel, ProfilingData
8
8
 
9
9
  class LLM(BaseModel):
10
10
  def __init__(self, m_cfg: ModelConfig = ModelConfig()):
@@ -17,11 +17,14 @@ class LLM(BaseModel):
17
17
  local_path: str,
18
18
  tokenizer_path: Optional[str] = None,
19
19
  m_cfg: ModelConfig = ModelConfig(),
20
- plugin_id: str = "llama_cpp",
20
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
21
21
  device_id: Optional[str] = None
22
22
  ) -> 'LLM':
23
23
  """Load model from local path, routing to appropriate implementation."""
24
- if plugin_id == "mlx":
24
+ # Check plugin_id value for routing - handle both enum and string
25
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
26
+
27
+ if plugin_value == "mlx":
25
28
  from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
26
29
  return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
27
30
  else:
@@ -37,7 +40,7 @@ class LLM(BaseModel):
37
40
  self._cancel_event.clear()
38
41
 
39
42
  @abstractmethod
40
- def apply_chat_template(self, messages: list[ChatMessage]) -> str:
43
+ def apply_chat_template(self, messages: list[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
41
44
  """Apply the chat template to messages."""
42
45
  pass
43
46
 
@@ -60,6 +63,10 @@ class LLM(BaseModel):
60
63
  """
61
64
  pass
62
65
 
66
+ def get_profiling_data(self) -> Optional[ProfilingData]:
67
+ """Get profiling data from the last generation."""
68
+ pass
69
+
63
70
  @abstractmethod
64
71
  def save_kv_cache(self, path: str):
65
72
  """
@@ -1,6 +1,7 @@
1
- from typing import Generator, Optional, Any
1
+ from typing import Generator, Optional, Any, Sequence, Union
2
2
 
3
- from nexaai.common import ModelConfig, GenerationConfig, ChatMessage
3
+ from nexaai.base import ProfilingData
4
+ from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
4
5
  from nexaai.llm import LLM
5
6
  from nexaai.mlx_backend.llm.interface import LLM as MLXLLMInterface
6
7
  from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -17,7 +18,7 @@ class MLXLLMImpl(LLM):
17
18
  local_path: str,
18
19
  tokenizer_path: Optional[str] = None,
19
20
  m_cfg: ModelConfig = ModelConfig(),
20
- plugin_id: str = "mlx",
21
+ plugin_id: Union[PluginID, str] = PluginID.MLX,
21
22
  device_id: Optional[str] = None
22
23
  ) -> 'MLXLLMImpl':
23
24
  """Load model from local path using MLX backend."""
@@ -54,7 +55,13 @@ class MLXLLMImpl(LLM):
54
55
  self._mlx_llm.destroy()
55
56
  self._mlx_llm = None
56
57
 
57
- def apply_chat_template(self, messages: list[ChatMessage]) -> str:
58
+ def apply_chat_template(
59
+ self,
60
+ messages: Sequence[ChatMessage],
61
+ tools: Optional[str] = None,
62
+ enable_thinking: bool = True,
63
+ add_generation_prompt: bool = True
64
+ ) -> str:
58
65
  """Apply the chat template to messages."""
59
66
  if not self._mlx_llm:
60
67
  raise RuntimeError("MLX LLM not loaded")
@@ -68,9 +75,16 @@ class MLXLLMImpl(LLM):
68
75
  def __init__(self, role, content):
69
76
  self.role = role
70
77
  self.content = content
71
- mlx_messages.append(MLXChatMessage(msg["role"], msg["content"]))
78
+
79
+ # Handle both dict-style and attribute-style access
80
+ if hasattr(msg, 'role') and hasattr(msg, 'content'):
81
+ # Message is already an object with attributes
82
+ mlx_messages.append(MLXChatMessage(msg.role, msg.content))
83
+ else:
84
+ # Message is a dict
85
+ mlx_messages.append(MLXChatMessage(msg["role"], msg["content"]))
72
86
 
73
- return self._mlx_llm.apply_chat_template(mlx_messages)
87
+ return self._mlx_llm.apply_chat_template(mlx_messages, tools=tools, enable_thinking=enable_thinking, add_generation_prompt=add_generation_prompt)
74
88
  except Exception as e:
75
89
  raise RuntimeError(f"Failed to apply chat template: {str(e)}")
76
90
 
@@ -202,6 +216,12 @@ class MLXLLMImpl(LLM):
202
216
  except Exception as e:
203
217
  raise RuntimeError(f"Failed to generate text: {str(e)}")
204
218
 
219
+ def get_profiling_data(self) -> Optional[ProfilingData]:
220
+ """Get profiling data from the last generation."""
221
+ if not self._mlx_llm:
222
+ raise RuntimeError("MLX LLM not loaded")
223
+ return self._mlx_llm.get_profiling_data()
224
+
205
225
  def save_kv_cache(self, path: str):
206
226
  """
207
227
  Save the key-value cache to the file.
@@ -1,8 +1,9 @@
1
- from typing import Generator, Optional
1
+ from typing import Generator, Optional, Union
2
2
  import queue
3
3
  import threading
4
4
 
5
- from nexaai.common import ModelConfig, GenerationConfig, ChatMessage
5
+ from nexaai.base import ProfilingData
6
+ from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
6
7
  from nexaai.binds import llm_bind, common_bind
7
8
  from nexaai.runtime import _ensure_runtime
8
9
  from nexaai.llm import LLM
@@ -13,13 +14,14 @@ class PyBindLLMImpl(LLM):
13
14
  """Private constructor, should not be called directly."""
14
15
  super().__init__(m_cfg)
15
16
  self._handle = handle # This is a py::capsule
17
+ self._profiling_data = None
16
18
 
17
19
  @classmethod
18
20
  def _load_from(cls,
19
21
  local_path: str,
20
22
  tokenizer_path: Optional[str] = None,
21
23
  m_cfg: ModelConfig = ModelConfig(),
22
- plugin_id: str = "llama_cpp",
24
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
23
25
  device_id: Optional[str] = None
24
26
  ) -> 'PyBindLLMImpl':
25
27
  """Load model from local path."""
@@ -49,11 +51,13 @@ class PyBindLLMImpl(LLM):
49
51
  config.chat_template_content = m_cfg.chat_template_content
50
52
 
51
53
  # Create handle : returns py::capsule with automatic cleanup
54
+ # Convert enum to string for C++ binding
55
+ plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
52
56
  handle = llm_bind.ml_llm_create(
53
57
  model_path=local_path,
54
58
  tokenizer_path=tokenizer_path,
55
59
  model_config=config,
56
- plugin_id=plugin_id,
60
+ plugin_id=plugin_id_str,
57
61
  device_id=device_id
58
62
  )
59
63
  return cls(handle, m_cfg)
@@ -64,7 +68,7 @@ class PyBindLLMImpl(LLM):
64
68
  del self._handle
65
69
  self._handle = None
66
70
 
67
- def apply_chat_template(self, messages: list[ChatMessage]) -> str:
71
+ def apply_chat_template(self, messages: list[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
68
72
  """Apply the chat template to messages."""
69
73
  # Convert TypedDict to list of dicts for binding
70
74
  message_dicts = [
@@ -95,13 +99,14 @@ class PyBindLLMImpl(LLM):
95
99
  # Run generation in thread
96
100
  def generate():
97
101
  try:
98
- llm_bind.ml_llm_generate(
102
+ result = llm_bind.ml_llm_generate(
99
103
  handle=self._handle,
100
104
  prompt=prompt,
101
105
  config=config,
102
106
  on_token=on_token,
103
107
  user_data=None
104
108
  )
109
+ self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
105
110
  except Exception as e:
106
111
  exception_container[0] = e
107
112
  finally:
@@ -143,8 +148,14 @@ class PyBindLLMImpl(LLM):
143
148
  on_token=None, # No callback for non-streaming
144
149
  user_data=None
145
150
  )
151
+
152
+ self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
146
153
  return result.get("text", "")
147
154
 
155
+ def get_profiling_data(self) -> Optional[ProfilingData]:
156
+ """Get profiling data."""
157
+ return self._profiling_data
158
+
148
159
  def save_kv_cache(self, path: str):
149
160
  """
150
161
  Save the key-value cache to the file.
@@ -371,19 +371,19 @@ class LLM(BaseLLM, ProfilingMixin):
371
371
  cached_tokens = 0
372
372
 
373
373
  # Only offset prefix kv-cache at first round
374
- if is_first_round:
374
+ # if is_first_round:
375
375
 
376
- # Handle KV cache prefix offset if available
377
- if self.kv_cache is not None and len(self.kv_cache) > 0:
378
- # Get the offset from the first cache layer
379
- if hasattr(self.kv_cache[0], 'offset'):
380
- cached_tokens = self.kv_cache[0].offset - 1
376
+ # # Handle KV cache prefix offset if available
377
+ # if self.kv_cache is not None and len(self.kv_cache) > 0:
378
+ # # Get the offset from the first cache layer
379
+ # if hasattr(self.kv_cache[0], 'offset'):
380
+ # cached_tokens = self.kv_cache[0].offset - 1
381
381
 
382
- # Process only the non-cached tokens
383
- incremental_tokens = incremental_tokens[cached_tokens:] if cached_tokens > 0 else incremental_tokens
382
+ # # Process only the non-cached tokens
383
+ # incremental_tokens = incremental_tokens[cached_tokens:] if cached_tokens > 0 else incremental_tokens
384
384
 
385
- if len(incremental_tokens) == 0:
386
- raise ValueError("No tokens to process, KV cache is too long.")
385
+ # if len(incremental_tokens) == 0:
386
+ # raise ValueError("No tokens to process, KV cache is too long.")
387
387
 
388
388
  # Since apply_chat_template now returns incremental prompts, we can use the prompt directly
389
389
  # The prompt is already the incremental part based on global_n_past
@@ -467,7 +467,7 @@ class LLM(BaseLLM, ProfilingMixin):
467
467
  # We'll ignore the argument for now.
468
468
  return self.tokenizer.chat_template
469
469
 
470
- def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True) -> str:
470
+ def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
471
471
  """
472
472
  Apply chat template to messages with incremental prompt support and optional tools.
473
473
 
@@ -526,7 +526,7 @@ class LLM(BaseLLM, ProfilingMixin):
526
526
  incremental_messages,
527
527
  tokenize=False,
528
528
  enable_thinking=enable_thinking,
529
- add_generation_prompt=True,
529
+ add_generation_prompt=add_generation_prompt,
530
530
  tools=parsed_tools
531
531
  )
532
532
  except Exception as e:
nexaai/rerank.py CHANGED
@@ -1,8 +1,9 @@
1
- from typing import List, Optional, Sequence
1
+ from typing import List, Optional, Sequence, Union
2
2
  from abc import abstractmethod
3
3
  from dataclasses import dataclass
4
4
 
5
5
  from nexaai.base import BaseModel
6
+ from nexaai.common import PluginID
6
7
 
7
8
 
8
9
  @dataclass
@@ -24,11 +25,14 @@ class Reranker(BaseModel):
24
25
  def _load_from(cls,
25
26
  model_path: str,
26
27
  tokenizer_file: str = "tokenizer.json",
27
- plugin_id: str = "llama_cpp",
28
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
28
29
  device_id: Optional[str] = None
29
30
  ) -> 'Reranker':
30
31
  """Load reranker model from local path, routing to appropriate implementation."""
31
- if plugin_id == "mlx":
32
+ # Check plugin_id value for routing - handle both enum and string
33
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
34
+
35
+ if plugin_value == "mlx":
32
36
  from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
33
37
  return MLXRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
34
38
  else:
@@ -1,8 +1,9 @@
1
1
  # Note: This code is generated by Cursor, not tested yet.
2
2
 
3
- from typing import List, Optional, Sequence
3
+ from typing import List, Optional, Sequence, Union
4
4
  import os
5
5
 
6
+ from nexaai.common import PluginID
6
7
  from nexaai.rerank import Reranker, RerankConfig
7
8
  from nexaai.mlx_backend.rerank.interface import Reranker as MLXRerankInterface, create_reranker
8
9
 
@@ -17,7 +18,7 @@ class MLXRerankImpl(Reranker):
17
18
  def _load_from(cls,
18
19
  model_path: str,
19
20
  tokenizer_file: str = "tokenizer.json",
20
- plugin_id: str = "mlx",
21
+ plugin_id: Union[PluginID, str] = PluginID.MLX,
21
22
  device_id: Optional[str] = None
22
23
  ) -> 'MLXRerankImpl':
23
24
  """Load reranker model from local path using MLX backend."""
@@ -1,5 +1,6 @@
1
- from typing import List, Optional, Sequence
1
+ from typing import List, Optional, Sequence, Union
2
2
 
3
+ from nexaai.common import PluginID
3
4
  from nexaai.rerank import Reranker, RerankConfig
4
5
 
5
6
 
@@ -13,7 +14,7 @@ class PyBindRerankImpl(Reranker):
13
14
  def _load_from(cls,
14
15
  model_path: str,
15
16
  tokenizer_file: str = "tokenizer.json",
16
- plugin_id: str = "llama_cpp",
17
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
17
18
  device_id: Optional[str] = None
18
19
  ) -> 'PyBindRerankImpl':
19
20
  """Load reranker model from local path using PyBind backend."""
nexaai/tts.py CHANGED
@@ -1,8 +1,9 @@
1
- from typing import List, Optional
1
+ from typing import List, Optional, Union
2
2
  from abc import abstractmethod
3
3
  from dataclasses import dataclass
4
4
 
5
5
  from nexaai.base import BaseModel
6
+ from nexaai.common import PluginID
6
7
 
7
8
 
8
9
  @dataclass
@@ -43,11 +44,14 @@ class TTS(BaseModel):
43
44
  def _load_from(cls,
44
45
  model_path: str,
45
46
  vocoder_path: str,
46
- plugin_id: str = "llama_cpp",
47
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
47
48
  device_id: Optional[str] = None
48
49
  ) -> 'TTS':
49
50
  """Load TTS model from local path, routing to appropriate implementation."""
50
- if plugin_id == "mlx":
51
+ # Check plugin_id value for routing - handle both enum and string
52
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
53
+
54
+ if plugin_value == "mlx":
51
55
  from nexaai.tts_impl.mlx_tts_impl import MLXTTSImpl
52
56
  return MLXTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
53
57
  else:
@@ -1,8 +1,9 @@
1
1
  # Note: This code is generated by Cursor, not tested yet.
2
2
 
3
- from typing import List, Optional
3
+ from typing import List, Optional, Union
4
4
  import os
5
5
 
6
+ from nexaai.common import PluginID
6
7
  from nexaai.tts import TTS, TTSConfig, TTSResult
7
8
  from nexaai.mlx_backend.tts.interface import MlxTts as MLXTTSInterface
8
9
 
@@ -17,7 +18,7 @@ class MLXTTSImpl(TTS):
17
18
  def _load_from(cls,
18
19
  model_path: str,
19
20
  vocoder_path: str,
20
- plugin_id: str = "mlx",
21
+ plugin_id: Union[PluginID, str] = PluginID.MLX,
21
22
  device_id: Optional[str] = None
22
23
  ) -> 'MLXTTSImpl':
23
24
  """Load TTS model from local path using MLX backend."""
@@ -1,5 +1,6 @@
1
- from typing import List, Optional
1
+ from typing import List, Optional, Union
2
2
 
3
+ from nexaai.common import PluginID
3
4
  from nexaai.tts import TTS, TTSConfig, TTSResult
4
5
 
5
6
 
@@ -13,7 +14,7 @@ class PyBindTTSImpl(TTS):
13
14
  def _load_from(cls,
14
15
  model_path: str,
15
16
  vocoder_path: str,
16
- plugin_id: str = "llama_cpp",
17
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
17
18
  device_id: Optional[str] = None
18
19
  ) -> 'PyBindTTSImpl':
19
20
  """Load TTS model from local path using PyBind backend."""
nexaai/vlm.py CHANGED
@@ -5,8 +5,8 @@ import threading
5
5
  import base64
6
6
  from pathlib import Path
7
7
 
8
- from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage
9
- from nexaai.base import BaseModel
8
+ from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
9
+ from nexaai.base import BaseModel, ProfilingData
10
10
 
11
11
 
12
12
  class VLM(BaseModel):
@@ -20,7 +20,7 @@ class VLM(BaseModel):
20
20
  local_path: str,
21
21
  mmproj_path: str,
22
22
  m_cfg: ModelConfig = ModelConfig(),
23
- plugin_id: str = "llama_cpp",
23
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
24
24
  device_id: Optional[str] = None
25
25
  ) -> 'VLM':
26
26
  """Load VLM model from local path, routing to appropriate implementation.
@@ -35,7 +35,10 @@ class VLM(BaseModel):
35
35
  Returns:
36
36
  VLM instance
37
37
  """
38
- if plugin_id == "mlx":
38
+ # Check plugin_id value for routing - handle both enum and string
39
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
40
+
41
+ if plugin_value == "mlx":
39
42
  from nexaai.vlm_impl.mlx_vlm_impl import MlxVlmImpl
40
43
  return MlxVlmImpl._load_from(local_path, mmproj_path, m_cfg, plugin_id, device_id)
41
44
  else:
@@ -117,4 +120,8 @@ class VLM(BaseModel):
117
120
  Returns:
118
121
  str: The generated text.
119
122
  """
123
+ pass
124
+
125
+ def get_profiling_data(self) -> Optional[ProfilingData]:
126
+ """Get profiling data from the last generation."""
120
127
  pass
@@ -1,6 +1,7 @@
1
- from typing import Generator, Optional, List, Dict, Any
1
+ from typing import Generator, Optional, List, Dict, Any, Union
2
2
 
3
- from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage
3
+ from nexaai.base import ProfilingData
4
+ from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
4
5
  from nexaai.vlm import VLM
5
6
  from nexaai.mlx_backend.vlm.interface import VLM as MLXVLMInterface
6
7
  from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -17,7 +18,7 @@ class MlxVlmImpl(VLM):
17
18
  local_path: str,
18
19
  mmproj_path: str,
19
20
  m_cfg: ModelConfig = ModelConfig(),
20
- plugin_id: str = "mlx",
21
+ plugin_id: Union[PluginID, str] = PluginID.MLX,
21
22
  device_id: Optional[str] = None
22
23
  ) -> 'MlxVlmImpl':
23
24
  """Load VLM model from local path using MLX backend.
@@ -247,3 +248,9 @@ class MlxVlmImpl(VLM):
247
248
 
248
249
  except Exception as e:
249
250
  raise RuntimeError(f"Failed to generate text: {str(e)}")
251
+
252
+ def get_profiling_data(self) -> Optional[ProfilingData]:
253
+ """Get profiling data from the last generation."""
254
+ if not self._mlx_vlm:
255
+ raise RuntimeError("MLX VLM not loaded")
256
+ return self._mlx_vlm.get_profiling_data()
@@ -4,10 +4,11 @@ import threading
4
4
  import base64
5
5
  from pathlib import Path
6
6
 
7
- from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage
7
+ from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
8
8
  from nexaai.binds import vlm_bind, common_bind
9
9
  from nexaai.runtime import _ensure_runtime
10
10
  from nexaai.vlm import VLM
11
+ from nexaai.base import ProfilingData
11
12
 
12
13
 
13
14
  class PyBindVLMImpl(VLM):
@@ -15,13 +16,14 @@ class PyBindVLMImpl(VLM):
15
16
  """Private constructor, should not be called directly."""
16
17
  super().__init__(m_cfg)
17
18
  self._handle = handle # This is a py::capsule
19
+ self._profiling_data = None
18
20
 
19
21
  @classmethod
20
22
  def _load_from(cls,
21
23
  local_path: str,
22
24
  mmproj_path: str,
23
25
  m_cfg: ModelConfig = ModelConfig(),
24
- plugin_id: str = "llama_cpp",
26
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
25
27
  device_id: Optional[str] = None
26
28
  ) -> 'PyBindVLMImpl':
27
29
  """Load VLM model from local path.
@@ -61,11 +63,13 @@ class PyBindVLMImpl(VLM):
61
63
  config.chat_template_content = m_cfg.chat_template_content
62
64
 
63
65
  # Create handle : returns py::capsule with automatic cleanup
66
+ # Convert enum to string for C++ binding
67
+ plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
64
68
  handle = vlm_bind.create_vlm(
65
69
  model_path=local_path,
66
70
  mmproj_path=mmproj_path,
67
71
  model_config=config,
68
- plugin_id=plugin_id,
72
+ plugin_id=plugin_id_str,
69
73
  device_id=device_id
70
74
  )
71
75
  return cls(handle, m_cfg)
@@ -141,13 +145,14 @@ class PyBindVLMImpl(VLM):
141
145
  # Run generation in thread
142
146
  def generate():
143
147
  try:
144
- vlm_bind.ml_vlm_generate(
148
+ result = vlm_bind.ml_vlm_generate(
145
149
  handle=self._handle,
146
150
  prompt=prompt,
147
151
  config=config,
148
152
  on_token=on_token,
149
153
  user_data=None
150
154
  )
155
+ self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
151
156
  except Exception as e:
152
157
  exception_container[0] = e
153
158
  finally:
@@ -189,8 +194,14 @@ class PyBindVLMImpl(VLM):
189
194
  on_token=None, # No callback for non-streaming
190
195
  user_data=None
191
196
  )
197
+
198
+ self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
192
199
  return result.get("text", "")
193
200
 
201
+ def get_profiling_data(self) -> Optional[ProfilingData]:
202
+ """Get profiling data."""
203
+ return self._profiling_data
204
+
194
205
  def _convert_generation_config(self, g_cfg: GenerationConfig):
195
206
  """Convert GenerationConfig to binding format."""
196
207
  config = common_bind.GenerationConfig()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexaai
3
- Version: 1.0.4rc15
3
+ Version: 1.0.5
4
4
  Summary: Python bindings for NexaSDK C-lib backend
5
5
  Author-email: "Nexa AI, Inc." <dev@nexa.ai>
6
6
  Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -17,11 +17,15 @@ Requires-Dist: tqdm
17
17
  Requires-Dist: hf_xet
18
18
  Requires-Dist: numpy
19
19
  Requires-Dist: httpx
20
- Requires-Dist: mlx
21
- Requires-Dist: mlx-lm
22
- Requires-Dist: scipy
23
- Requires-Dist: soundfile
24
- Requires-Dist: Pillow
25
- Requires-Dist: opencv-python
26
- Requires-Dist: shapely
27
- Requires-Dist: pyclipper
20
+ Provides-Extra: mlx
21
+ Requires-Dist: mlx; extra == "mlx"
22
+ Requires-Dist: mlx-lm; extra == "mlx"
23
+ Requires-Dist: mlx-vlm; extra == "mlx"
24
+ Requires-Dist: tokenizers; extra == "mlx"
25
+ Requires-Dist: safetensors; extra == "mlx"
26
+ Requires-Dist: Pillow; extra == "mlx"
27
+ Requires-Dist: scipy; extra == "mlx"
28
+ Requires-Dist: soundfile; extra == "mlx"
29
+ Requires-Dist: opencv-python; extra == "mlx"
30
+ Requires-Dist: shapely; extra == "mlx"
31
+ Requires-Dist: pyclipper; extra == "mlx"
@@ -1,33 +1,35 @@
1
- nexaai/__init__.py,sha256=JTjJWdiBXHZyc_91Oe-GNOcODFp9gbUQM43bzNY7S8Q,1906
2
- nexaai/_stub.cpython-310-darwin.so,sha256=6cDmmUXwfQIBTowXBsoMjB7kqSQskigaWJAHEB8aaTw,66768
3
- nexaai/_version.py,sha256=UxLv07_TC8sCUMr5KTEXolBn9DNXJx2RUjkBcGewdXw,143
4
- nexaai/asr.py,sha256=Yg8Yml_nklzJYl3C_lwvEApTdNjY2czAurDaoEjkiIU,1813
1
+ nexaai/__init__.py,sha256=jXdC4vv6DBK1fVewYTYSUhOOYfvf_Mk81UIeMGGIKUg,2029
2
+ nexaai/_stub.cpython-310-darwin.so,sha256=z7e7YOjkyjyalKr6u9iLGZ5YHgdvh5d5pLTwGvCCmtM,66768
3
+ nexaai/_version.py,sha256=j-3XhaOQERPf3uculltVeo9djhrsUyWzi1EY_j4wPEc,138
4
+ nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
5
5
  nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
6
- nexaai/common.py,sha256=VPM7NaUNaLTT7quW-u4D2uOeNrQqPjvfcgJlYGS3Qy8,1525
7
- nexaai/cv.py,sha256=KOaiRouiQ-YFP8FL20QuiieJfHN7DzASEi5_0m6H-E0,3032
8
- nexaai/embedder.py,sha256=VheiZEYBuuBjhQcvLawCz26jX0I169Xk4b9VP-ERjqU,2211
9
- nexaai/image_gen.py,sha256=IhLQLpmPkK9KcHteUdaQdxrnTIjk6xdyekRqeJtHfWw,4122
10
- nexaai/llm.py,sha256=egHa6YafNWyZy5qrmZRNZlFHO8LRUejc_gkOpK0nbnw,3105
11
- nexaai/rerank.py,sha256=7EEm96gpvd6kXO_Q8xSrQDlLZdAYTk0MODeNWDq70WA,1631
6
+ nexaai/common.py,sha256=yBnIbqYaQYnfrl7IczOBh6MDibYZVxwaRJEglYcKgGs,3422
7
+ nexaai/cv.py,sha256=RHCDo8gvBH8BkGZx7qVyp-OKxqi7E1GG9XzyaXehCNA,3273
8
+ nexaai/embedder.py,sha256=Cw0tSHkPgd-RI62afCqQAcTHMnQhaI2CvfTMO-1JKOg,2452
9
+ nexaai/image_gen.py,sha256=0C_5Tjj4BYmxLbmMmvwajp-yy2mmEEOKwBFnDQNPzx4,4356
10
+ nexaai/llm.py,sha256=S1o_k2VQoF5w2wO25f142OO1R75TP89Ii69VZv8pIGo,3567
11
+ nexaai/rerank.py,sha256=vWaBucoQ1wz-2iYnZqyFIcEjm-4Xcs1KDbFN5X8zzDQ,1872
12
12
  nexaai/runtime.py,sha256=mxxHYsb5iBUAm2K_u-XJWr_U-spJ9S4eApc8kf9myjw,1957
13
- nexaai/tts.py,sha256=4EbC0BfFh5TLrm_3Q5vx1sXdug5gvOi-owNeX7ekbdA,1926
14
- nexaai/vlm.py,sha256=g65S8ChMnp_wsz_O4szjR3Z8sD_46NHaxDlfdoZoQ0c,4291
13
+ nexaai/tts.py,sha256=ZnBpWUxIfHhh7KfEjddtH7hHOTa91zg7ogGLakMIALo,2167
14
+ nexaai/vlm.py,sha256=3voXmAVnGlXnOiwA3wcX4p0Lvmp0X1VKkQVPObJdwBY,4649
15
15
  nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- nexaai/asr_impl/mlx_asr_impl.py,sha256=JuyxFzFbbgclK5_2Rq5pT278h0q8LztJX7Tggz0zkbM,3191
17
- nexaai/asr_impl/pybind_asr_impl.py,sha256=ybvthYgtVbH_JgpSsl0nxjZYvXyk8KGRSKdsJ-hLfZE,1450
16
+ nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
17
+ nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XED9YpA,1516
18
18
  nexaai/binds/__init__.py,sha256=T9Ua7SzHNglSeEqXlfH5ymYXRyXhNKkC9z_y_bWCNMo,80
19
19
  nexaai/binds/common_bind.cpython-310-darwin.so,sha256=hVxY76tn7hN6uHDIgM7LWNvgoudHgNZVoaygM9X1RWE,217232
20
20
  nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=FT8581RNciilskK89PhtnNSjw4Oh0-xk8QdbJVFmOd8,202064
21
- nexaai/binds/libnexa_bridge.dylib,sha256=cdriv4BpSoRUlNmFCVuSAHiWeVjnC8KKihCTiTNrJno,251256
22
- nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=Bv08rn9OBAHy01eAQeANiJSrCxskn1xSx4Gl1Vcrhm0,166064
23
- nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=D_mlY_PNMIFlm6mHERSLMoA7QfYHwEPVlb0UKMbl2N0,632048
21
+ nexaai/binds/libcrypto.dylib,sha256=aWif9WhTKVQhmZL3DmtIpMkZY5JSb_Ny6CClmUBKYM4,4710416
22
+ nexaai/binds/libnexa_bridge.dylib,sha256=g4nlxyGyVJ-LJV1cHMDg2m2pYF8fFTBBXGTPQV-lotg,251480
23
+ nexaai/binds/libssl.dylib,sha256=Q2frAdhR729oKYuCjJOEr1Ott3idFWoFp98fwNqtIaU,881616
24
+ nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=3Bsq0_tGkM027-bORVeJUDl6CYZxAF9sbDIn1l31XTQ,182704
25
+ nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=CzsTec_QHlvbBGzmx4MBQ4LUjG7aIqW1rP5p_A90Vds,632048
24
26
  nexaai/binds/nexa_llama_cpp/libggml-cpu.so,sha256=RiMhOv6IAWY1zkFTp0JCB7CYoPfOv54vBVQHvj1koBM,661120
25
27
  nexaai/binds/nexa_llama_cpp/libggml-metal.so,sha256=L4RQvaD0w4qBjexi4O05RMCH8842fof5QgBEvyx0RcA,673104
26
28
  nexaai/binds/nexa_llama_cpp/libggml.dylib,sha256=aOTj_6RrAMkfDO0ZI28_3nfcC-l4Y3dRCiS3C0d0_eI,58592
27
29
  nexaai/binds/nexa_llama_cpp/libllama.dylib,sha256=fDPnTG6EQ1JN6aRmnIFQzag_kmtyImRxKjMOOtaTY5Q,1746928
28
30
  nexaai/binds/nexa_llama_cpp/libmtmd.dylib,sha256=ccnBRsJNFGTCsjgW03N9PvX26wUirqpxljnxdVPINVc,587008
29
- nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib,sha256=1kAoSq1w8pPiNmceOdmAZ7ehfENauFoGq8mpIwGl-kk,1806696
30
- nexaai/binds/nexa_mlx/libnexa_plugin.dylib,sha256=aw8if8RwXjb02CehbqGPHOeEKRUcTpzeJZLOkjTBm8A,596328
31
+ nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib,sha256=bZOa9K6tROVv7sfBU6JaA7WEPvuLfJljkXH37um3WSU,2368632
32
+ nexaai/binds/nexa_mlx/libnexa_plugin.dylib,sha256=yjbdy0FpBE_RwgqvwGxd3czIfs3OYVoh--vWpn2H7RQ,1422888
31
33
  nexaai/binds/nexa_mlx/py-lib/ml.py,sha256=LafDM_TeXmuQkld2tdQxUBGgooT0JPMXngLam2TADqU,23179
32
34
  nexaai/binds/nexa_mlx/py-lib/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
33
35
  nexaai/binds/nexa_mlx/py-lib/mlx_audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -181,17 +183,17 @@ nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_convert.py,sha256=79ddUhtT
181
183
  nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_interpolate.py,sha256=9dNmH03C46HtxwesH2DpT2oTNEG1KCZWYEKq6UQ3vfk,3536
182
184
  nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_models.py,sha256=12RiOfPtSZQj5g5JM-yCJk3uGQfM3OdmRiPt5uUDE4E,35096
183
185
  nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
- nexaai/cv_impl/mlx_cv_impl.py,sha256=mdK4DEffPe96AgDGDXtQeHlG958hf8FO1fBZ1qjZMEE,3162
185
- nexaai/cv_impl/pybind_cv_impl.py,sha256=yS4JKfRSaIjjVP7hJ-CizG76pIX85bpmGLk9B9cnL24,998
186
+ nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
187
+ nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
186
188
  nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
187
- nexaai/embedder_impl/mlx_embedder_impl.py,sha256=dTOz34WGDnhsI9L7Ctv6fGPngvMAUc4FwEwRgBp_M9I,4317
188
- nexaai/embedder_impl/pybind_embedder_impl.py,sha256=AGGrOq4z0mDpQZInOvJsOIlQWflByhDjsihMu_Wjtbk,3286
189
+ nexaai/embedder_impl/mlx_embedder_impl.py,sha256=OsDzsc_2wZkSoWu6yCOZadMkaYdBW3uyjF11hDKTaX8,4383
190
+ nexaai/embedder_impl/pybind_embedder_impl.py,sha256=Ga1JYauVkRq6jwAGL7Xx5HDaIx483_v9gZVoTyd3xNU,3495
189
191
  nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
190
- nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=OxSbk9zIDj7tTvsdM8bMJQDBhpn-mygBNktewd_wgtE,11153
191
- nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=XoSdWG5ID_g93WT9QB0qCP64a4rX-Rva0u4fQ8xpoqg,3626
192
+ nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=BuDkksvXyb4J02GsdnbGAmYckfUU0Eah6BimoMD3QqY,11219
193
+ nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=ms34VYoD5AxZFG6cIG0QAJDjCtfphaZ1bHzKzey1xF8,3692
192
194
  nexaai/llm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
193
- nexaai/llm_impl/mlx_llm_impl.py,sha256=HCi1uQBjccDDi64LbAgyH85lWx7qDZIW8i43dojGfF0,10210
194
- nexaai/llm_impl/pybind_llm_impl.py,sha256=8Us4N5KF6oi-0-K_5Dpf2rYe9smd89ZfWFrP_fWBsM4,7374
195
+ nexaai/llm_impl/mlx_llm_impl.py,sha256=4v7jUFzHfE7zw2uViekGQDaTROz8A6oaW31Z3iVe6tg,11157
196
+ nexaai/llm_impl/pybind_llm_impl.py,sha256=aooqkcXZWhCo07wbSafGgBrA3WnijtnUADShjjgFsBQ,8051
195
197
  nexaai/mlx_backend/ml.py,sha256=LafDM_TeXmuQkld2tdQxUBGgooT0JPMXngLam2TADqU,23179
196
198
  nexaai/mlx_backend/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
197
199
  nexaai/mlx_backend/asr/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nujxkSM,451
@@ -211,7 +213,7 @@ nexaai/mlx_backend/embedding/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
211
213
  nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py,sha256=F9Z_9r-Dh0wNThiMp5W5hqE2dt5bf4ps5_c6h4BuWGw,15218
212
214
  nexaai/mlx_backend/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
213
215
  nexaai/mlx_backend/llm/generate.py,sha256=Phes0tzxbbEWA2hDylQvD0LjorMaPwvcfZq9RKCAOt0,4399
214
- nexaai/mlx_backend/llm/interface.py,sha256=Fx28O2jCDPaEfr0xLffWnqGIU5Gspggxr-o54-fBWj4,29257
216
+ nexaai/mlx_backend/llm/interface.py,sha256=SZFkuAUi2vxj_dSqj8RXf9vPTGMtpks_pZxxrF7iIe8,29330
215
217
  nexaai/mlx_backend/llm/main.py,sha256=gFDE4VZv_CLKMCTn0N521OfCKH_Ys26bHDh6g9VEFNc,1982
216
218
  nexaai/mlx_backend/mlx_audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
217
219
  nexaai/mlx_backend/mlx_audio/server.py,sha256=Pqy13Fafq4WX_cTuvRFz1jq89beQm2QQGpXmhK4b9jc,17547
@@ -502,19 +504,19 @@ nexaai/mlx_backend/vlm/modeling/trainer/lora.py,sha256=tGjvenjEQ8_1Az8Nz3smz5Mgv
502
504
  nexaai/mlx_backend/vlm/modeling/trainer/trainer.py,sha256=h16SaHt76JzFruXuidgXDx7_2evx4L0SecvzqLmhyZw,9081
503
505
  nexaai/mlx_backend/vlm/modeling/trainer/utils.py,sha256=29oHf_7946YeJKP_-Dt-NPeN4xJq8Fj7Yv4jZKO9RWA,4909
504
506
  nexaai/rerank_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
505
- nexaai/rerank_impl/mlx_rerank_impl.py,sha256=I-jumShLm1jAmKunRcDNUU4yjfWLoWClFMFONd88-Es,3177
506
- nexaai/rerank_impl/pybind_rerank_impl.py,sha256=FIIN96zCxXopqpqZdBd7OjuqqviFBY8HMZek1bCeoJw,1447
507
+ nexaai/rerank_impl/mlx_rerank_impl.py,sha256=h37PKSIRBY8mwzVeLeP4ix9ui3waIsg4gorzelYLJbM,3243
508
+ nexaai/rerank_impl/pybind_rerank_impl.py,sha256=CtwkG7YrW58GPMDERJSnISGTVCXWNju5__R2W837t7c,1513
507
509
  nexaai/tts_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
508
- nexaai/tts_impl/mlx_tts_impl.py,sha256=D71IFtIYWzrVdBS2y5vDBWjZ4ZAzRRjFHC0KO0pA5BU,3035
509
- nexaai/tts_impl/pybind_tts_impl.py,sha256=Be5QiXzDz6h1LTIQzUBd0ZyBs7rUpNA-pULCXFtt2Is,1378
510
+ nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJi8,3101
511
+ nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
510
512
  nexaai/utils/avatar_fetcher.py,sha256=bWy8ujgbOiTHFCjFxTwkn3uXbZ84PgEGUkXkR3MH4bI,3821
511
513
  nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
512
514
  nexaai/utils/model_manager.py,sha256=c07ocxxw1IHCQw6esbmYK0dX2R2OajfEIGsC_2teHXo,48572
513
515
  nexaai/utils/progress_tracker.py,sha256=76HlPkyN41IMHSsH56-qdlN_aY_oBfJz50J16Cx67R0,15102
514
516
  nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
515
- nexaai/vlm_impl/mlx_vlm_impl.py,sha256=7gm_tFNox3LC78DQEtlMQ-eBK55zDY0xWlJghUAOP5Y,10402
516
- nexaai/vlm_impl/pybind_vlm_impl.py,sha256=C-3fa0AIypI33OAGuGfVxo1V7zN0wjQMgruKlDIlW4Q,8333
517
- nexaai-1.0.4rc15.dist-info/METADATA,sha256=izOUOhvRNpO73EELnKolgU0Kn_PK79tsJkJr3RMWBzA,883
518
- nexaai-1.0.4rc15.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
519
- nexaai-1.0.4rc15.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
520
- nexaai-1.0.4rc15.dist-info/RECORD,,
517
+ nexaai/vlm_impl/mlx_vlm_impl.py,sha256=od1R1mRoIgPG3NHC7JiDlcB_YJY8aklX8Em3ZkeHNpE,10734
518
+ nexaai/vlm_impl/pybind_vlm_impl.py,sha256=5ZMFgDATthmMzjrd-vE5KX5ZAMoWPYbF_FTLz8DBKIk,8908
519
+ nexaai-1.0.5.dist-info/METADATA,sha256=fxZuww9PtpWooTJNDcf04tzwKi7AEThWyD0Z9O5a0rY,1151
520
+ nexaai-1.0.5.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
521
+ nexaai-1.0.5.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
522
+ nexaai-1.0.5.dist-info/RECORD,,