PyPI - nexaai - Versions diffs - 1.0.4rc10__py3-none-macosx_11_0_arm64.whl - Mend

nexaai 1.0.4rc10__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (519) hide show

nexaai/binds/nexa_mlx/py-lib/profiling.py ADDED Viewed

@@ -0,0 +1,239 @@
+from __future__ import annotations
+import time
+from dataclasses import dataclass, field
+from typing import Any, Optional
+from enum import IntEnum
+# --------------------------------------------------------------------------------------
+# Stop reason constants matching profile.h
+# --------------------------------------------------------------------------------------
+class StopReason(IntEnum):
+    """Stop reason constants matching profile.h"""
+    ML_STOP_REASON_UNKNOWN = 0
+    ML_STOP_REASON_EOS = 1
+    ML_STOP_REASON_LENGTH = 2
+    ML_STOP_REASON_USER = 3
+    ML_STOP_REASON_STOP_SEQUENCE = 4
+    ML_STOP_REASON_COMPLETED = 5
+# --------------------------------------------------------------------------------------
+# Profiling data structure
+# --------------------------------------------------------------------------------------
+@dataclass
+class ProfilingData:
+    """Profiling data for performance metrics."""
+    ttft_us: int = 0             # Time to first token (us)
+    total_time_us: int = 0       # Total generation time (us)
+    prompt_time_us: int = 0      # Prompt processing time (us)
+    decode_time_us: int = 0      # Token generation time (us)
+    tokens_per_second: float = 0.0  # Decoding speed (tokens/sec)
+    total_tokens: int = 0        # Total tokens generated
+    prompt_tokens: int = 0       # Number of prompt tokens
+    generated_tokens: int = 0    # Number of generated tokens
+    stop_reason: int = StopReason.ML_STOP_REASON_UNKNOWN  # Stop reason (numeric)
+    def reset(self):
+        """Reset all profiling data."""
+        self.ttft_us = 0
+        self.total_time_us = 0
+        self.prompt_time_us = 0
+        self.decode_time_us = 0
+        self.tokens_per_second = 0.0
+        self.total_tokens = 0
+        self.prompt_tokens = 0
+        self.generated_tokens = 0
+        self.stop_reason = StopReason.ML_STOP_REASON_UNKNOWN
+# --------------------------------------------------------------------------------------
+# Profiling context (similar to ml_ProfilingContext in profile.h)
+# --------------------------------------------------------------------------------------
+@dataclass
+class ProfilingContext:
+    """Profiling context for tracking timing and state."""
+    start_time: Optional[float] = None
+    prompt_start_time: Optional[float] = None
+    prompt_end_time: Optional[float] = None
+    decode_start_time: Optional[float] = None
+    decode_end_time: Optional[float] = None
+    first_token_time: Optional[float] = None
+    end_time: Optional[float] = None
+    ttft_recorded: bool = False
+    stop_reason: int = StopReason.ML_STOP_REASON_UNKNOWN
+    prompt_tokens: int = 0
+    generated_tokens: int = 0
+    def reset(self):
+        """Reset profiling context."""
+        self.start_time = None
+        self.prompt_start_time = None
+        self.prompt_end_time = None
+        self.decode_start_time = None
+        self.decode_end_time = None
+        self.first_token_time = None
+        self.end_time = None
+        self.ttft_recorded = False
+        self.stop_reason = StopReason.ML_STOP_REASON_UNKNOWN
+        self.prompt_tokens = 0
+        self.generated_tokens = 0
+# --------------------------------------------------------------------------------------
+# Profiling functions (similar to profile.h functions)
+# --------------------------------------------------------------------------------------
+def profiling_reset(ctx: ProfilingContext) -> None:
+    """Reset profiling context (ml_profiling_reset)."""
+    ctx.reset()
+def profiling_start(ctx: ProfilingContext) -> None:
+    """Start profiling (ml_profiling_start)."""
+    ctx.start_time = time.perf_counter()
+    ctx.prompt_start_time = ctx.start_time
+def profiling_prompt_start(ctx: ProfilingContext) -> None:
+    """Start prompt processing timing (ml_profiling_prompt_start)."""
+    ctx.prompt_start_time = time.perf_counter()
+def profiling_prompt_end(ctx: ProfilingContext) -> None:
+    """End prompt processing timing (ml_profiling_prompt_end)."""
+    ctx.prompt_end_time = time.perf_counter()
+def profiling_decode_start(ctx: ProfilingContext) -> None:
+    """Start decode timing (ml_profiling_decode_start)."""
+    ctx.decode_start_time = time.perf_counter()
+def profiling_decode_end(ctx: ProfilingContext) -> None:
+    """End decode timing (ml_profiling_decode_end)."""
+    ctx.decode_end_time = time.perf_counter()
+def profiling_record_ttft(ctx: ProfilingContext) -> None:
+    """Record time to first token (ml_profiling_record_ttft)."""
+    if not ctx.ttft_recorded and ctx.start_time is not None:
+        ctx.first_token_time = time.perf_counter()
+        ctx.ttft_recorded = True
+def profiling_update_prompt_tokens(ctx: ProfilingContext, prompt_tokens: int) -> None:
+    """Update prompt token count (ml_profiling_update_prompt_tokens)."""
+    ctx.prompt_tokens = prompt_tokens
+def profiling_update_generated_tokens(ctx: ProfilingContext, generated_tokens: int) -> None:
+    """Update generated token count (ml_profiling_update_generated_tokens)."""
+    ctx.generated_tokens = generated_tokens
+def profiling_stop_reason(ctx: ProfilingContext, stop_reason: int) -> None:
+    """Set stop reason (ml_profiling_stop_reason)."""
+    ctx.stop_reason = stop_reason
+def profiling_end(ctx: ProfilingContext) -> None:
+    """End profiling (ml_profiling_end)."""
+    ctx.end_time = time.perf_counter()
+def profiling_gen_data(ctx: ProfilingContext) -> ProfilingData:
+    """Generate profiling data from context (ml_profiling_gen_data)."""
+    data = ProfilingData()
+    if ctx.start_time is None or ctx.end_time is None:
+        return data
+    # Calculate total time
+    data.total_time_us = int((ctx.end_time - ctx.start_time) * 1_000_000)
+    # Calculate prompt time
+    if ctx.prompt_start_time is not None and ctx.prompt_end_time is not None:
+        data.prompt_time_us = int((ctx.prompt_end_time - ctx.prompt_start_time) * 1_000_000)
+    # Calculate decode time
+    if ctx.decode_start_time is not None and ctx.decode_end_time is not None:
+        data.decode_time_us = int((ctx.decode_end_time - ctx.decode_start_time) * 1_000_000)
+    # Calculate TTFT
+    if ctx.first_token_time is not None and ctx.start_time is not None:
+        data.ttft_us = int((ctx.first_token_time - ctx.start_time) * 1_000_000)
+    # Set token counts
+    data.prompt_tokens = ctx.prompt_tokens
+    data.generated_tokens = ctx.generated_tokens
+    data.total_tokens = ctx.prompt_tokens + ctx.generated_tokens
+    # Calculate tokens per second
+    if data.decode_time_us > 0:
+        data.tokens_per_second = (data.generated_tokens * 1_000_000.0) / data.decode_time_us
+    # Set stop reason
+    data.stop_reason = ctx.stop_reason
+    return data
+def stop_reason_to_string(reason: int) -> str:
+    """Convert stop reason to string (stop_reason_to_string)."""
+    try:
+        return StopReason(reason).name
+    except ValueError:
+        return f"UNKNOWN({reason})"
+# --------------------------------------------------------------------------------------
+# Profiling mixin for model classes
+# --------------------------------------------------------------------------------------
+class ProfilingMixin:
+    """Mixin class to add profiling capabilities to model classes."""
+    def __init__(self):
+        """Initialize profiling mixin."""
+        self._profiling_context = ProfilingContext()
+        self._profiling_data = ProfilingData()
+    def _start_profiling(self) -> None:
+        """Start profiling for an operation."""
+        profiling_reset(self._profiling_context)
+        profiling_start(self._profiling_context)
+    def _prompt_start(self) -> None:
+        """Start prompt processing timing."""
+        profiling_prompt_start(self._profiling_context)
+    def _prompt_end(self) -> None:
+        """End prompt processing timing."""
+        profiling_prompt_end(self._profiling_context)
+    def _decode_start(self) -> None:
+        """Start decode timing."""
+        profiling_decode_start(self._profiling_context)
+    def _decode_end(self) -> None:
+        """End decode timing."""
+        profiling_decode_end(self._profiling_context)
+    def _record_ttft(self) -> None:
+        """Record time to first token."""
+        profiling_record_ttft(self._profiling_context)
+    def _update_prompt_tokens(self, prompt_tokens: int) -> None:
+        """Update prompt token count."""
+        profiling_update_prompt_tokens(self._profiling_context, prompt_tokens)
+    def _update_generated_tokens(self, generated_tokens: int) -> None:
+        """Update generated token count."""
+        profiling_update_generated_tokens(self._profiling_context, generated_tokens)
+    def _set_stop_reason(self, stop_reason: int) -> None:
+        """Set stop reason."""
+        profiling_stop_reason(self._profiling_context, stop_reason)
+    def _end_profiling(self) -> ProfilingData:
+        """End profiling and return data."""
+        profiling_end(self._profiling_context)
+        self._profiling_data = profiling_gen_data(self._profiling_context)
+        return self._profiling_data
+    def get_profiling_data(self) -> ProfilingData:
+        """Get profiling data for the last operation."""
+        return self._profiling_data
+    def reset_profiling(self) -> None:
+        """Reset profiling data."""
+        self._profiling_data.reset()

nexaai/common.py ADDED Viewed

@@ -0,0 +1,61 @@
+from dataclasses import dataclass
+from typing import TypedDict, Literal, Optional, List
+class ChatMessage(TypedDict):
+    role: Literal["user", "assistant", "system"]
+    content: str
+class MultiModalMessageContent(TypedDict):
+    type: Literal["text", "image", "audio", "video"]
+    text: Optional[str]
+    url: Optional[str]
+    path: Optional[str]
+class MultiModalMessage(TypedDict):
+    role: Literal["user", "assistant", "system"]
+    content: List[MultiModalMessageContent]
+@dataclass
+class SamplerConfig:
+    temperature: float = 0.8
+    top_p: float = 0.95
+    top_k: int = 40
+    repetition_penalty: float = 1.0
+    presence_penalty: float = 0.0
+    frequency_penalty: float = 0.0
+    seed: int = -1
+    grammar_path: str = None
+    grammar_string: str = None
+@dataclass
+class GenerationConfig:
+    max_tokens: int = 1024
+    stop_words: list[str] = None
+    sampler_config: SamplerConfig = None
+    image_paths: list[str] = None
+    audio_paths: list[str] = None
+@dataclass
+class ModelConfig:
+    n_ctx: int = 4096
+    n_threads: int = None
+    n_threads_batch: int = None
+    n_batch: int = 512
+    n_ubatch: int = 512
+    n_seq_max: int = 1
+    n_gpu_layers: int = 999
+    chat_template_path: str = None
+    chat_template_content: str = None
+@dataclass(frozen=True) # Read-only
+class ProfilingData:
+    start_time: int
+    end_time: int
+    prompt_start_time: int = None
+    prompt_end_time: int = None
+    decode_start_time: int = None
+    decode_ent_time: int = None
+    first_token_time: int = None

nexaai/cv.py ADDED Viewed

@@ -0,0 +1,87 @@
+from typing import List, Optional
+from abc import abstractmethod
+from dataclasses import dataclass
+from nexaai.base import BaseModel
+@dataclass
+class BoundingBox:
+    """Generic bounding box structure."""
+    x: float  # X coordinate (normalized or pixel, depends on model)
+    y: float  # Y coordinate (normalized or pixel, depends on model)
+    width: float  # Width
+    height: float  # Height
+@dataclass
+class CVResult:
+    """Generic detection/classification result."""
+    image_paths: Optional[List[str]] = None  # Output image paths
+    image_count: int = 0  # Number of output images
+    class_id: int = 0  # Class ID (example: ConvNext)
+    confidence: float = 0.0  # Confidence score [0.0-1.0]
+    bbox: Optional[BoundingBox] = None  # Bounding box (example: YOLO)
+    text: Optional[str] = None  # Text result (example: OCR)
+    embedding: Optional[List[float]] = None  # Feature embedding (example: CLIP embedding)
+    embedding_dim: int = 0  # Embedding dimension
+@dataclass
+class CVResults:
+    """Generic CV inference result."""
+    results: List[CVResult]  # Array of CV results
+    result_count: int  # Number of CV results
+class CVCapabilities:
+    """CV capabilities enum."""
+    OCR = 0  # OCR
+    CLASSIFICATION = 1  # Classification
+    SEGMENTATION = 2  # Segmentation
+    CUSTOM = 3  # Custom task
+@dataclass
+class CVModelConfig:
+    """CV model preprocessing configuration."""
+    capabilities: int  # CVCapabilities
+    # MLX-OCR
+    det_model_path: Optional[str] = None  # Detection model path
+    rec_model_path: Optional[str] = None  # Recognition model path
+    # QNN
+    model_path: Optional[str] = None  # Model path
+    system_library_path: Optional[str] = None  # System library path
+    backend_library_path: Optional[str] = None  # Backend library path
+    extension_library_path: Optional[str] = None  # Extension library path
+    config_file_path: Optional[str] = None  # Config file path
+    char_dict_path: Optional[str] = None  # Character dictionary path
+class CVModel(BaseModel):
+    """Abstract base class for generic computer vision models."""
+    def __init__(self):
+        """Initialize base CV model class."""
+        pass
+    @classmethod
+    def _load_from(cls,
+                   config: CVModelConfig,
+                   plugin_id: str = "llama_cpp",
+                   device_id: Optional[str] = None
+        ) -> 'CVModel':
+        """Load CV model from configuration, routing to appropriate implementation."""
+        if plugin_id == "mlx":
+            from nexaai.cv_impl.mlx_cv_impl import MLXCVImpl
+            return MLXCVImpl._load_from(config, plugin_id, device_id)
+        else:
+            from nexaai.cv_impl.pybind_cv_impl import PyBindCVImpl
+            return PyBindCVImpl._load_from(config, plugin_id, device_id)
+    @abstractmethod
+    def infer(self, input_image_path: str) -> CVResults:
+        """Perform inference on image."""
+        pass

nexaai/cv_impl/__init__.py ADDED Viewed

File without changes

nexaai/cv_impl/mlx_cv_impl.py ADDED Viewed

@@ -0,0 +1,88 @@
+# Note: This code is generated by Cursor, not tested yet.
+from typing import Optional
+import os
+from nexaai.cv import CVModel, CVModelConfig, CVResults
+from nexaai.mlx_backend.cv.interface import CVModel as MLXCVInterface, create_cv_model
+class MLXCVImpl(CVModel):
+    def __init__(self):
+        """Initialize MLX CV implementation."""
+        super().__init__()
+        self._mlx_cv = None
+    @classmethod
+    def _load_from(cls,
+                   config: CVModelConfig,
+                   plugin_id: str = "mlx",
+                   device_id: Optional[str] = None
+        ) -> 'MLXCVImpl':
+        """Load CV model from configuration using MLX backend."""
+        try:
+            # Get MLX config class
+            from nexaai.mlx_backend.ml import CVModelConfig as MLXCVModelConfig
+            # Convert our config to MLX format
+            mlx_config = MLXCVModelConfig(
+                capabilities=config.capabilities,
+                det_model_path=config.det_model_path,
+                rec_model_path=config.rec_model_path,
+                model_path=config.model_path,
+                system_library_path=config.system_library_path,
+                backend_library_path=config.backend_library_path,
+                extension_library_path=config.extension_library_path,
+                config_file_path=config.config_file_path,
+                char_dict_path=config.char_dict_path
+            )
+            # Create instance and load MLX CV model
+            instance = cls()
+            instance._mlx_cv = create_cv_model(mlx_config, device_id)
+            return instance
+        except Exception as e:
+            raise RuntimeError(f"Failed to load MLX CV: {str(e)}")
+    def eject(self):
+        """Destroy the model and free resources."""
+        if self._mlx_cv:
+            self._mlx_cv.destroy()
+            self._mlx_cv = None
+    def infer(self, input_image_path: str) -> CVResults:
+        """Perform inference on image."""
+        if not self._mlx_cv:
+            raise RuntimeError("MLX CV not loaded")
+        try:
+            # Use MLX CV inference
+            result = self._mlx_cv.infer(input_image_path)
+            # Convert MLX result to our format
+            from nexaai.cv import CVResult
+            our_results = []
+            for mlx_result in result.results:
+                our_result = CVResult(
+                    image_paths=mlx_result.image_paths,
+                    image_count=mlx_result.image_count,
+                    class_id=mlx_result.class_id,
+                    confidence=mlx_result.confidence,
+                    bbox=mlx_result.bbox,
+                    text=mlx_result.text,
+                    embedding=mlx_result.embedding,
+                    embedding_dim=mlx_result.embedding_dim
+                )
+                our_results.append(our_result)
+            return CVResults(
+                results=our_results,
+                result_count=result.result_count
+            )
+        except Exception as e:
+            raise RuntimeError(f"Failed to perform CV inference: {str(e)}")

nexaai/cv_impl/pybind_cv_impl.py ADDED Viewed

@@ -0,0 +1,31 @@
+from typing import Optional
+from nexaai.cv import CVModel, CVModelConfig, CVResults
+class PyBindCVImpl(CVModel):
+    def __init__(self):
+        """Initialize PyBind CV implementation."""
+        super().__init__()
+        # TODO: Add PyBind-specific initialization
+    @classmethod
+    def _load_from(cls,
+                   config: CVModelConfig,
+                   plugin_id: str = "llama_cpp",
+                   device_id: Optional[str] = None
+        ) -> 'PyBindCVImpl':
+        """Load CV model from configuration using PyBind backend."""
+        # TODO: Implement PyBind CV loading
+        instance = cls()
+        return instance
+    def eject(self):
+        """Destroy the model and free resources."""
+        # TODO: Implement PyBind CV cleanup
+        pass
+    def infer(self, input_image_path: str) -> CVResults:
+        """Perform inference on image."""
+        # TODO: Implement PyBind CV inference
+        raise NotImplementedError("PyBind CV inference not yet implemented")

nexaai/embedder.py ADDED Viewed

@@ -0,0 +1,68 @@
+from typing import List, Union
+from dataclasses import dataclass
+from abc import abstractmethod
+import numpy as np
+from nexaai.base import BaseModel
+@dataclass
+class EmbeddingConfig:
+    batch_size: int = 32
+    normalize: bool = True
+    normalize_method: str = "l2"
+class Embedder(BaseModel):
+    def __init__(self):
+        """
+        Internal initializer
+        """
+        pass
+    @classmethod
+    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "llama_cpp"):
+        """
+        Load an embedder from model files, routing to appropriate implementation.
+        Args:
+            model_path: Path to the model file
+            tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
+            plugin_id: Plugin ID to use for the model (default: "llama_cpp")
+        Returns:
+            Embedder instance
+        """
+        if plugin_id == "mlx":
+            from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
+            return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
+        else:
+            from nexaai.embedder_impl.pybind_embedder_impl import PyBindEmbedderImpl
+            return PyBindEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
+    @abstractmethod
+    def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
+        """
+        Generate embeddings for the given texts or input_ids.
+        Args:
+            texts: List of strings or single string to embed
+            input_ids: Pre-tokenized input as:
+                      - Single sequence: list of integers [1, 2, 3, 4]
+                      - Multiple sequences: list of lists [[1, 2, 3], [4, 5, 6]]
+            config: Configuration for embedding generation
+        Returns:
+            numpy array of embeddings with shape (num_sequences, embedding_dim)
+        """
+        pass
+    @abstractmethod
+    def get_embedding_dim(self) -> int:
+        """
+        Get the embedding dimension of the model
+        Returns:
+            The embedding dimension in int
+        """
+        pass

nexaai/embedder_impl/__init__.py ADDED Viewed

File without changes

nexaai/embedder_impl/mlx_embedder_impl.py ADDED Viewed

@@ -0,0 +1,114 @@
+from typing import List, Union
+import numpy as np
+from nexaai.embedder import Embedder, EmbeddingConfig
+from nexaai.mlx_backend.embedding.interface import Embedder as MLXEmbedderInterface
+from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
+class MLXEmbedderImpl(Embedder):
+    def __init__(self):
+        """Initialize MLX Embedder implementation."""
+        super().__init__()
+        self._mlx_embedder = None
+    @classmethod
+    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "mlx"):
+        """
+        Load an embedder from model files using MLX backend.
+        Args:
+            model_path: Path to the model file
+            tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
+            plugin_id: Plugin ID to use for the model (default: "mlx")
+        Returns:
+            MLXEmbedderImpl instance
+        """
+        try:
+            # MLX interface is already imported
+            # Create instance and load MLX embedder
+            instance = cls()
+            instance._mlx_embedder = MLXEmbedderInterface(
+                model_path=model_path,
+                tokenizer_path=tokenizer_file
+            )
+            # Load the model
+            success = instance._mlx_embedder.load_model(model_path)
+            if not success:
+                raise RuntimeError("Failed to load MLX embedder model")
+            return instance
+        except Exception as e:
+            raise RuntimeError(f"Failed to load MLX Embedder: {str(e)}")
+    def eject(self):
+        """
+        Clean up resources and destroy the embedder
+        """
+        if self._mlx_embedder:
+            self._mlx_embedder.destroy()
+            self._mlx_embedder = None
+    def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
+        """
+        Generate embeddings for the given texts or input_ids.
+        Args:
+            texts: List of strings or single string to embed
+            input_ids: Pre-tokenized input as:
+                      - Single sequence: list of integers [1, 2, 3, 4]
+                      - Multiple sequences: list of lists [[1, 2, 3], [4, 5, 6]]
+            config: Configuration for embedding generation
+        Returns:
+            numpy array of embeddings with shape (num_sequences, embedding_dim)
+        """
+        if not self._mlx_embedder:
+            raise RuntimeError("MLX Embedder not loaded")
+        if texts is None and input_ids is None:
+            raise ValueError("Either texts or input_ids must be provided")
+        # MLX embedder currently only supports text input, not pre-tokenized input_ids
+        if input_ids is not None:
+            raise NotImplementedError("MLX embedder does not support input_ids, only text input")
+        try:
+            # Convert single string to list if needed
+            if isinstance(texts, str):
+                texts = [texts]
+            # MLX config classes are already imported
+            # Convert our config to MLX config
+            mlx_config = EmbeddingConfig()
+            mlx_config.batch_size = config.batch_size
+            mlx_config.normalize = config.normalize
+            mlx_config.normalize_method = config.normalize_method
+            # Generate embeddings using MLX
+            embeddings = self._mlx_embedder.embed(texts, mlx_config)
+            # Convert to numpy array
+            return np.array(embeddings, dtype=np.float32)
+        except Exception as e:
+            raise RuntimeError(f"Failed to generate embeddings: {str(e)}")
+    def get_embedding_dim(self) -> int:
+        """
+        Get the embedding dimension of the model
+        Returns:
+            The embedding dimension in int
+        """
+        if not self._mlx_embedder:
+            raise RuntimeError("MLX Embedder not loaded")
+        try:
+            return self._mlx_embedder.embedding_dim()
+        except Exception as e:
+            raise RuntimeError(f"Failed to get embedding dimension: {str(e)}")