PyPI - nexaai - Versions diffs - 1.0.21rc5__cp313-cp313-win_arm64.whl → 1.0.21rc14__cp313-cp313-win_arm64.whl - Mend

nexaai 1.0.21rc5__cp313-cp313-win_arm64.whl → 1.0.21rc14__cp313-cp313-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (105) hide show

nexaai/__init__.py +95 -95
nexaai/_stub.cp313-win_arm64.pyd +0 -0
nexaai/_version.py +4 -1
nexaai/asr.py +68 -65
nexaai/asr_impl/mlx_asr_impl.py +92 -92
nexaai/asr_impl/pybind_asr_impl.py +127 -44
nexaai/base.py +39 -39
nexaai/binds/__init__.py +6 -5
nexaai/binds/asr_bind.cp313-win_arm64.pyd +0 -0
nexaai/binds/common_bind.cp313-win_arm64.pyd +0 -0
nexaai/binds/cpu_gpu/ggml-base.dll +0 -0
nexaai/binds/cpu_gpu/ggml-cpu.dll +0 -0
nexaai/binds/cpu_gpu/ggml-opencl.dll +0 -0
nexaai/binds/cpu_gpu/ggml.dll +0 -0
nexaai/binds/cpu_gpu/mtmd.dll +0 -0
nexaai/binds/cpu_gpu/nexa_cpu_gpu.dll +0 -0
nexaai/binds/cpu_gpu/nexa_plugin.dll +0 -0
nexaai/binds/embedder_bind.cp313-win_arm64.pyd +0 -0
nexaai/binds/libcrypto-3-arm64.dll +0 -0
nexaai/binds/libssl-3-arm64.dll +0 -0
nexaai/binds/llm_bind.cp313-win_arm64.pyd +0 -0
nexaai/binds/nexa_bridge.dll +0 -0
nexaai/binds/npu/convnext-sdk.dll +0 -0
nexaai/binds/npu/embed-gemma-sdk.dll +0 -0
nexaai/binds/npu/ggml-base.dll +0 -0
nexaai/binds/npu/ggml-cpu.dll +0 -0
nexaai/binds/npu/ggml-opencl.dll +0 -0
nexaai/binds/npu/ggml.dll +0 -0
nexaai/binds/npu/granite-nano-sdk.dll +0 -0
nexaai/binds/npu/granite4-sdk.dll +0 -0
nexaai/binds/npu/jina-rerank-sdk.dll +0 -0
nexaai/binds/npu/liquid-sdk.dll +0 -0
nexaai/binds/npu/llama3-3b-sdk.dll +0 -0
nexaai/binds/npu/nexa-mm-process.dll +0 -0
nexaai/binds/npu/nexa-sampling.dll +0 -0
nexaai/binds/npu/nexa_plugin.dll +0 -0
nexaai/binds/npu/omni-neural-sdk.dll +0 -0
nexaai/binds/npu/openblas.dll +0 -0
nexaai/binds/npu/paddleocr-sdk.dll +0 -0
nexaai/binds/npu/parakeet-sdk.dll +0 -0
nexaai/binds/npu/phi3-5-sdk.dll +0 -0
nexaai/binds/npu/phi4-sdk.dll +0 -0
nexaai/binds/npu/pyannote-sdk.dll +0 -0
nexaai/binds/npu/qwen3-4b-sdk.dll +0 -0
nexaai/binds/npu/qwen3vl-sdk.dll +0 -0
nexaai/binds/npu/qwen3vl-vision.dll +0 -0
nexaai/binds/npu/yolov12-sdk.dll +0 -0
nexaai/binds/npu/zlib1.dll +0 -0
nexaai/binds/rerank_bind.cp313-win_arm64.pyd +0 -0
nexaai/binds/vlm_bind.cp313-win_arm64.pyd +0 -0
nexaai/common.py +105 -105
nexaai/cv.py +93 -93
nexaai/cv_impl/mlx_cv_impl.py +89 -89
nexaai/cv_impl/pybind_cv_impl.py +32 -32
nexaai/embedder.py +73 -73
nexaai/embedder_impl/mlx_embedder_impl.py +118 -118
nexaai/embedder_impl/pybind_embedder_impl.py +96 -96
nexaai/image_gen.py +141 -141
nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -292
nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -85
nexaai/llm.py +98 -98
nexaai/llm_impl/mlx_llm_impl.py +271 -271
nexaai/llm_impl/pybind_llm_impl.py +220 -220
nexaai/log.py +92 -92
nexaai/rerank.py +57 -57
nexaai/rerank_impl/mlx_rerank_impl.py +94 -94
nexaai/rerank_impl/pybind_rerank_impl.py +136 -136
nexaai/runtime.py +68 -68
nexaai/runtime_error.py +24 -24
nexaai/tts.py +75 -75
nexaai/tts_impl/mlx_tts_impl.py +94 -94
nexaai/tts_impl/pybind_tts_impl.py +43 -43
nexaai/utils/decode.py +17 -17
nexaai/utils/manifest_utils.py +531 -531
nexaai/utils/model_manager.py +1562 -1562
nexaai/utils/model_types.py +49 -49
nexaai/utils/progress_tracker.py +384 -384
nexaai/utils/quantization_utils.py +245 -245
nexaai/vlm.py +129 -129
nexaai/vlm_impl/mlx_vlm_impl.py +258 -258
nexaai/vlm_impl/pybind_vlm_impl.py +256 -256
{nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/METADATA +1 -1
nexaai-1.0.21rc14.dist-info/RECORD +154 -0
nexaai/binds/nexaml/FLAC.dll +0 -0
nexaai/binds/nexaml/fftw3.dll +0 -0
nexaai/binds/nexaml/fftw3f.dll +0 -0
nexaai/binds/nexaml/ggml-base.dll +0 -0
nexaai/binds/nexaml/ggml-cpu.dll +0 -0
nexaai/binds/nexaml/ggml-opencl.dll +0 -0
nexaai/binds/nexaml/ggml.dll +0 -0
nexaai/binds/nexaml/libmp3lame.DLL +0 -0
nexaai/binds/nexaml/mpg123.dll +0 -0
nexaai/binds/nexaml/nexa-mm-process.dll +0 -0
nexaai/binds/nexaml/nexa-sampling.dll +0 -0
nexaai/binds/nexaml/nexa_plugin.dll +0 -0
nexaai/binds/nexaml/nexaproc.dll +0 -0
nexaai/binds/nexaml/ogg.dll +0 -0
nexaai/binds/nexaml/opus.dll +0 -0
nexaai/binds/nexaml/qwen3-vl.dll +0 -0
nexaai/binds/nexaml/qwen3vl-vision.dll +0 -0
nexaai/binds/nexaml/vorbis.dll +0 -0
nexaai/binds/nexaml/vorbisenc.dll +0 -0
nexaai-1.0.21rc5.dist-info/RECORD +0 -162
{nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/WHEEL +0 -0
{nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/top_level.txt +0 -0

nexaai/asr_impl/pybind_asr_impl.py CHANGED Viewed

@@ -1,44 +1,127 @@
-from typing import List, Optional, Union
-from nexaai.common import PluginID
-from nexaai.asr import ASR, ASRConfig, ASRResult
-class PyBindASRImpl(ASR):
-    def __init__(self):
-        """Initialize PyBind ASR implementation."""
-        super().__init__()
-        # TODO: Add PyBind-specific initialization
-    @classmethod
-    def _load_from(cls,
-                   model_path: str,
-                   tokenizer_path: Optional[str] = None,
-                   language: Optional[str] = None,
-                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
-        ) -> 'PyBindASRImpl':
-        """Load ASR model from local path using PyBind backend."""
-        # TODO: Implement PyBind ASR loading
-        instance = cls()
-        return instance
-    def eject(self):
-        """Destroy the model and free resources."""
-        # TODO: Implement PyBind ASR cleanup
-        pass
-    def transcribe(
-        self,
-        audio_path: str,
-        language: Optional[str] = None,
-        config: Optional[ASRConfig] = None,
-    ) -> ASRResult:
-        """Transcribe audio file to text."""
-        # TODO: Implement PyBind ASR transcription
-        raise NotImplementedError("PyBind ASR transcription not yet implemented")
-    def list_supported_languages(self) -> List[str]:
-        """List supported languages."""
-        # TODO: Implement PyBind ASR language listing
-        raise NotImplementedError("PyBind ASR language listing not yet implemented")
+from typing import List, Optional, Union
+from nexaai.common import PluginID, ModelConfig
+from nexaai.asr import ASR, ASRConfig, ASRResult
+from nexaai.binds import asr_bind, common_bind
+from nexaai.runtime import _ensure_runtime
+class PyBindASRImpl(ASR):
+    def __init__(self, handle: any, m_cfg: ModelConfig = ModelConfig()):
+        """Private constructor, should not be called directly."""
+        super().__init__(m_cfg)
+        self._handle = handle  # This is a py::capsule
+        self._model_config = None
+    @classmethod
+    def _load_from(cls,
+                   model_path: str,
+                   model_name: Optional[str] = None,
+                   tokenizer_path: Optional[str] = None,
+                   language: Optional[str] = None,
+                   m_cfg: ModelConfig = ModelConfig(),
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
+                   device_id: Optional[str] = None
+        ) -> 'PyBindASRImpl':
+        """Load ASR model from local path using PyBind backend."""
+        _ensure_runtime()
+        # Create model config
+        config = common_bind.ModelConfig()
+        config.n_ctx = m_cfg.n_ctx
+        if m_cfg.n_threads is not None:
+            config.n_threads = m_cfg.n_threads
+        if m_cfg.n_threads_batch is not None:
+            config.n_threads_batch = m_cfg.n_threads_batch
+        if m_cfg.n_batch is not None:
+            config.n_batch = m_cfg.n_batch
+        if m_cfg.n_ubatch is not None:
+            config.n_ubatch = m_cfg.n_ubatch
+        if m_cfg.n_seq_max is not None:
+            config.n_seq_max = m_cfg.n_seq_max
+        config.n_gpu_layers = m_cfg.n_gpu_layers
+        # handle chat template strings
+        if m_cfg.chat_template_path:
+            config.chat_template_path = m_cfg.chat_template_path
+        if m_cfg.chat_template_content:
+            config.chat_template_content = m_cfg.chat_template_content
+        # Convert plugin_id to string
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else str(plugin_id)
+        # Create ASR handle using the binding
+        handle = asr_bind.ml_asr_create(
+            model_path=model_path,
+            model_name=model_name,
+            tokenizer_path=tokenizer_path,
+            model_config=config,
+            language=language,
+            plugin_id=plugin_id_str,
+            device_id=device_id,
+            license_id=None,  # Optional
+            license_key=None  # Optional
+        )
+        return cls(handle, m_cfg)
+    def eject(self):
+        """Release the model from memory."""
+        # py::capsule handles cleanup automatically
+        if hasattr(self, '_handle') and self._handle is not None:
+            del self._handle
+            self._handle = None
+    def transcribe(
+        self,
+        audio_path: str,
+        language: Optional[str] = None,
+        config: Optional[ASRConfig] = None,
+    ) -> ASRResult:
+        """Transcribe audio file to text."""
+        if self._handle is None:
+            raise RuntimeError("ASR model not loaded. Call _load_from first.")
+        # Convert ASRConfig to binding format if provided
+        asr_config = None
+        if config:
+            asr_config = asr_bind.ASRConfig()
+            asr_config.timestamps = config.timestamps
+            asr_config.beam_size = config.beam_size
+            asr_config.stream = config.stream
+        # Perform transcription using the binding
+        result_dict = asr_bind.ml_asr_transcribe(
+            handle=self._handle,
+            audio_path=audio_path,
+            language=language,
+            config=asr_config
+        )
+        # Convert result to ASRResult
+        transcript = result_dict.get("transcript", "")
+        confidence_scores = result_dict.get("confidence_scores")
+        timestamps = result_dict.get("timestamps")
+        # Convert timestamps to the expected format
+        timestamp_pairs = []
+        if timestamps:
+            for start, end in timestamps:
+                timestamp_pairs.append((float(start), float(end)))
+        return ASRResult(
+            transcript=transcript,
+            confidence_scores=confidence_scores or [],
+            timestamps=timestamp_pairs
+        )
+    def list_supported_languages(self) -> List[str]:
+        """List supported languages."""
+        if self._handle is None:
+            raise RuntimeError("ASR model not loaded. Call _load_from first.")
+        # Get supported languages using the binding
+        languages = asr_bind.ml_asr_list_supported_languages(handle=self._handle)
+        return languages

nexaai/base.py CHANGED Viewed

@@ -1,39 +1,39 @@
-from abc import ABC, abstractmethod
-from nexaai.common import ProfilingData
-from nexaai.utils.model_manager import auto_download_model
-class BaseModel(ABC):
-    def __enter__(self):
-        return self
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.eject()
-    def __del__(self):
-        self.eject()
-    @classmethod
-    @auto_download_model
-    def from_(cls, name_or_path: str, **kwargs) -> "BaseModel":
-        """
-        initialize model from (1) HF (2) if not found, then from local path
-        """
-        return cls._load_from(name_or_path, **kwargs)
-    @classmethod
-    @abstractmethod
-    def _load_from(cls, name_or_path: str, **kwargs) -> "BaseModel":
-        """
-        Model-specific loading logic. Must be implemented by each model type.
-        Called after model is available locally.
-        """
-        pass
-    @abstractmethod
-    def eject(self):
-        pass
-    def get_profiling_data(self) -> ProfilingData:
-        pass
+from abc import ABC, abstractmethod
+from nexaai.common import ProfilingData
+from nexaai.utils.model_manager import auto_download_model
+class BaseModel(ABC):
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.eject()
+    def __del__(self):
+        self.eject()
+    @classmethod
+    @auto_download_model
+    def from_(cls, name_or_path: str, **kwargs) -> "BaseModel":
+        """
+        initialize model from (1) HF (2) if not found, then from local path
+        """
+        return cls._load_from(name_or_path, **kwargs)
+    @classmethod
+    @abstractmethod
+    def _load_from(cls, name_or_path: str, **kwargs) -> "BaseModel":
+        """
+        Model-specific loading logic. Must be implemented by each model type.
+        Called after model is available locally.
+        """
+        pass
+    @abstractmethod
+    def eject(self):
+        pass
+    def get_profiling_data(self) -> ProfilingData:
+        pass

nexaai/binds/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from .common_bind import *
-from .llm_bind import *
-from .embedder_bind import *
-from .vlm_bind import *
-from .rerank_bind import *
+from .common_bind import *
+from .llm_bind import *
+from .embedder_bind import *
+from .vlm_bind import *
+from .rerank_bind import *
+from .asr_bind import *

nexaai/binds/asr_bind.cp313-win_arm64.pyd ADDED Viewed

Binary file

nexaai/binds/common_bind.cp313-win_arm64.pyd CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/ggml-base.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/ggml-cpu.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/ggml-opencl.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/ggml.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/mtmd.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/nexa_cpu_gpu.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/nexa_plugin.dll CHANGED Viewed

Binary file

nexaai/binds/embedder_bind.cp313-win_arm64.pyd CHANGED Viewed

Binary file

nexaai/binds/libcrypto-3-arm64.dll CHANGED Viewed

Binary file

nexaai/binds/libssl-3-arm64.dll CHANGED Viewed

Binary file

nexaai/binds/llm_bind.cp313-win_arm64.pyd CHANGED Viewed

Binary file

nexaai/binds/nexa_bridge.dll CHANGED Viewed

Binary file

nexaai/binds/npu/convnext-sdk.dll ADDED Viewed

Binary file

nexaai/binds/npu/embed-gemma-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/ggml-base.dll ADDED Viewed

Binary file

nexaai/binds/npu/ggml-cpu.dll ADDED Viewed

Binary file

nexaai/binds/npu/ggml-opencl.dll ADDED Viewed

Binary file

nexaai/binds/npu/ggml.dll ADDED Viewed

Binary file

nexaai/binds/npu/granite-nano-sdk.dll ADDED Viewed

Binary file

nexaai/binds/npu/granite4-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/jina-rerank-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/liquid-sdk.dll ADDED Viewed

Binary file

nexaai/binds/npu/llama3-3b-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/nexa-mm-process.dll CHANGED Viewed

Binary file

nexaai/binds/npu/nexa-sampling.dll CHANGED Viewed

Binary file

nexaai/binds/npu/nexa_plugin.dll CHANGED Viewed

Binary file

nexaai/binds/npu/omni-neural-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/openblas.dll ADDED Viewed

Binary file

nexaai/binds/npu/paddleocr-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/parakeet-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/phi3-5-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/phi4-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/pyannote-sdk.dll ADDED Viewed

Binary file

nexaai/binds/npu/qwen3-4b-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/qwen3vl-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/qwen3vl-vision.dll CHANGED Viewed

Binary file

nexaai/binds/npu/yolov12-sdk.dll CHANGED Viewed

Binary file

nexaai/binds/npu/zlib1.dll ADDED Viewed

Binary file

nexaai/binds/rerank_bind.cp313-win_arm64.pyd CHANGED Viewed

Binary file

nexaai/binds/vlm_bind.cp313-win_arm64.pyd CHANGED Viewed

Binary file

nexaai/common.py CHANGED Viewed

@@ -1,105 +1,105 @@
-from dataclasses import dataclass
-from typing import TypedDict, Literal, Optional, List
-from enum import Enum
-class PluginID(str, Enum):
-    """Enum for plugin identifiers."""
-    MLX = "mlx"
-    LLAMA_CPP = "llama_cpp"
-    NEXAML = "nexaml"
-    NPU = "npu"
-class ChatMessage(TypedDict):
-    role: Literal["user", "assistant", "system"]
-    content: str
-class MultiModalMessageContent(TypedDict):
-    type: Literal["text", "image", "audio", "video"]
-    text: Optional[str]
-    url: Optional[str]
-    path: Optional[str]
-class MultiModalMessage(TypedDict):
-    role: Literal["user", "assistant", "system"]
-    content: List[MultiModalMessageContent]
-@dataclass
-class SamplerConfig:
-    temperature: float = 0.8
-    top_p: float = 0.95
-    top_k: int = 40
-    repetition_penalty: float = 1.0
-    presence_penalty: float = 0.0
-    frequency_penalty: float = 0.0
-    seed: int = -1
-    grammar_path: str = None
-    grammar_string: str = None
-@dataclass
-class GenerationConfig:
-    max_tokens: int = 1024
-    stop_words: list[str] = None
-    sampler_config: SamplerConfig = None
-    image_paths: list[str] = None
-    audio_paths: list[str] = None
-@dataclass
-class ModelConfig:
-    n_ctx: int = 4096
-    n_threads: int = None
-    n_threads_batch: int = None
-    n_batch: int = 512
-    n_ubatch: int = 512
-    n_seq_max: int = 1
-    n_gpu_layers: int = 999
-    chat_template_path: str = None
-    chat_template_content: str = None
-@dataclass(frozen=True) # Read-only
-class ProfilingData:
-    """Profiling data structure for LLM/VLM performance metrics."""
-    ttft: int = 0             # Time to first token (us)
-    prompt_time: int = 0      # Prompt processing time (us)
-    decode_time: int = 0      # Token generation time (us)
-    prompt_tokens: int = 0    # Number of prompt tokens
-    generated_tokens: int = 0  # Number of generated tokens
-    audio_duration: int = 0   # Audio duration (us)
-    prefill_speed: float = 0.0  # Prefill speed (tokens/sec)
-    decoding_speed: float = 0.0 # Decoding speed (tokens/sec)
-    real_time_factor: float = 0.0 # Real-Time Factor (RTF)
-    stop_reason: str = ""     # Stop reason: "eos", "length", "user", "stop_sequence"
-    @classmethod
-    def from_dict(cls, data: dict) -> "ProfilingData":
-        """Create ProfilingData from dictionary."""
-        return cls(
-            ttft=data.get("ttft", 0),
-            prompt_time=data.get("prompt_time", 0),
-            decode_time=data.get("decode_time", 0),
-            prompt_tokens=data.get("prompt_tokens", 0),
-            generated_tokens=data.get("generated_tokens", 0),
-            audio_duration=data.get("audio_duration", 0),
-            prefill_speed=data.get("prefill_speed", 0.0),
-            decoding_speed=data.get("decoding_speed", 0.0),
-            real_time_factor=data.get("real_time_factor", 0.0),
-            stop_reason=data.get("stop_reason", "")
-        )
-    def to_dict(self) -> dict:
-        """Convert to dictionary."""
-        return {
-            "ttft": self.ttft,
-            "prompt_time": self.prompt_time,
-            "decode_time": self.decode_time,
-            "prompt_tokens": self.prompt_tokens,
-            "generated_tokens": self.generated_tokens,
-            "audio_duration": self.audio_duration,
-            "prefill_speed": self.prefill_speed,
-            "decoding_speed": self.decoding_speed,
-            "real_time_factor": self.real_time_factor,
-            "stop_reason": self.stop_reason
-        }
+from dataclasses import dataclass
+from typing import TypedDict, Literal, Optional, List
+from enum import Enum
+class PluginID(str, Enum):
+    """Enum for plugin identifiers."""
+    MLX = "mlx"
+    LLAMA_CPP = "llama_cpp"
+    NEXAML = "nexaml"
+    NPU = "npu"
+class ChatMessage(TypedDict):
+    role: Literal["user", "assistant", "system"]
+    content: str
+class MultiModalMessageContent(TypedDict):
+    type: Literal["text", "image", "audio", "video"]
+    text: Optional[str]
+    url: Optional[str]
+    path: Optional[str]
+class MultiModalMessage(TypedDict):
+    role: Literal["user", "assistant", "system"]
+    content: List[MultiModalMessageContent]
+@dataclass
+class SamplerConfig:
+    temperature: float = 0.8
+    top_p: float = 0.95
+    top_k: int = 40
+    repetition_penalty: float = 1.0
+    presence_penalty: float = 0.0
+    frequency_penalty: float = 0.0
+    seed: int = -1
+    grammar_path: str = None
+    grammar_string: str = None
+@dataclass
+class GenerationConfig:
+    max_tokens: int = 1024
+    stop_words: list[str] = None
+    sampler_config: SamplerConfig = None
+    image_paths: list[str] = None
+    audio_paths: list[str] = None
+@dataclass
+class ModelConfig:
+    n_ctx: int = 4096
+    n_threads: int = None
+    n_threads_batch: int = None
+    n_batch: int = 512
+    n_ubatch: int = 512
+    n_seq_max: int = 1
+    n_gpu_layers: int = 999
+    chat_template_path: str = None
+    chat_template_content: str = None
+@dataclass(frozen=True) # Read-only
+class ProfilingData:
+    """Profiling data structure for LLM/VLM performance metrics."""
+    ttft: int = 0             # Time to first token (us)
+    prompt_time: int = 0      # Prompt processing time (us)
+    decode_time: int = 0      # Token generation time (us)
+    prompt_tokens: int = 0    # Number of prompt tokens
+    generated_tokens: int = 0  # Number of generated tokens
+    audio_duration: int = 0   # Audio duration (us)
+    prefill_speed: float = 0.0  # Prefill speed (tokens/sec)
+    decoding_speed: float = 0.0 # Decoding speed (tokens/sec)
+    real_time_factor: float = 0.0 # Real-Time Factor (RTF)
+    stop_reason: str = ""     # Stop reason: "eos", "length", "user", "stop_sequence"
+    @classmethod
+    def from_dict(cls, data: dict) -> "ProfilingData":
+        """Create ProfilingData from dictionary."""
+        return cls(
+            ttft=data.get("ttft", 0),
+            prompt_time=data.get("prompt_time", 0),
+            decode_time=data.get("decode_time", 0),
+            prompt_tokens=data.get("prompt_tokens", 0),
+            generated_tokens=data.get("generated_tokens", 0),
+            audio_duration=data.get("audio_duration", 0),
+            prefill_speed=data.get("prefill_speed", 0.0),
+            decoding_speed=data.get("decoding_speed", 0.0),
+            real_time_factor=data.get("real_time_factor", 0.0),
+            stop_reason=data.get("stop_reason", "")
+        )
+    def to_dict(self) -> dict:
+        """Convert to dictionary."""
+        return {
+            "ttft": self.ttft,
+            "prompt_time": self.prompt_time,
+            "decode_time": self.decode_time,
+            "prompt_tokens": self.prompt_tokens,
+            "generated_tokens": self.generated_tokens,
+            "audio_duration": self.audio_duration,
+            "prefill_speed": self.prefill_speed,
+            "decoding_speed": self.decoding_speed,
+            "real_time_factor": self.real_time_factor,
+            "stop_reason": self.stop_reason
+        }