nexaai 1.0.21rc5__cp313-cp313-win_arm64.whl → 1.0.21rc14__cp313-cp313-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (105) hide show
  1. nexaai/__init__.py +95 -95
  2. nexaai/_stub.cp313-win_arm64.pyd +0 -0
  3. nexaai/_version.py +4 -1
  4. nexaai/asr.py +68 -65
  5. nexaai/asr_impl/mlx_asr_impl.py +92 -92
  6. nexaai/asr_impl/pybind_asr_impl.py +127 -44
  7. nexaai/base.py +39 -39
  8. nexaai/binds/__init__.py +6 -5
  9. nexaai/binds/asr_bind.cp313-win_arm64.pyd +0 -0
  10. nexaai/binds/common_bind.cp313-win_arm64.pyd +0 -0
  11. nexaai/binds/cpu_gpu/ggml-base.dll +0 -0
  12. nexaai/binds/cpu_gpu/ggml-cpu.dll +0 -0
  13. nexaai/binds/cpu_gpu/ggml-opencl.dll +0 -0
  14. nexaai/binds/cpu_gpu/ggml.dll +0 -0
  15. nexaai/binds/cpu_gpu/mtmd.dll +0 -0
  16. nexaai/binds/cpu_gpu/nexa_cpu_gpu.dll +0 -0
  17. nexaai/binds/cpu_gpu/nexa_plugin.dll +0 -0
  18. nexaai/binds/embedder_bind.cp313-win_arm64.pyd +0 -0
  19. nexaai/binds/libcrypto-3-arm64.dll +0 -0
  20. nexaai/binds/libssl-3-arm64.dll +0 -0
  21. nexaai/binds/llm_bind.cp313-win_arm64.pyd +0 -0
  22. nexaai/binds/nexa_bridge.dll +0 -0
  23. nexaai/binds/npu/convnext-sdk.dll +0 -0
  24. nexaai/binds/npu/embed-gemma-sdk.dll +0 -0
  25. nexaai/binds/npu/ggml-base.dll +0 -0
  26. nexaai/binds/npu/ggml-cpu.dll +0 -0
  27. nexaai/binds/npu/ggml-opencl.dll +0 -0
  28. nexaai/binds/npu/ggml.dll +0 -0
  29. nexaai/binds/npu/granite-nano-sdk.dll +0 -0
  30. nexaai/binds/npu/granite4-sdk.dll +0 -0
  31. nexaai/binds/npu/jina-rerank-sdk.dll +0 -0
  32. nexaai/binds/npu/liquid-sdk.dll +0 -0
  33. nexaai/binds/npu/llama3-3b-sdk.dll +0 -0
  34. nexaai/binds/npu/nexa-mm-process.dll +0 -0
  35. nexaai/binds/npu/nexa-sampling.dll +0 -0
  36. nexaai/binds/npu/nexa_plugin.dll +0 -0
  37. nexaai/binds/npu/omni-neural-sdk.dll +0 -0
  38. nexaai/binds/npu/openblas.dll +0 -0
  39. nexaai/binds/npu/paddleocr-sdk.dll +0 -0
  40. nexaai/binds/npu/parakeet-sdk.dll +0 -0
  41. nexaai/binds/npu/phi3-5-sdk.dll +0 -0
  42. nexaai/binds/npu/phi4-sdk.dll +0 -0
  43. nexaai/binds/npu/pyannote-sdk.dll +0 -0
  44. nexaai/binds/npu/qwen3-4b-sdk.dll +0 -0
  45. nexaai/binds/npu/qwen3vl-sdk.dll +0 -0
  46. nexaai/binds/npu/qwen3vl-vision.dll +0 -0
  47. nexaai/binds/npu/yolov12-sdk.dll +0 -0
  48. nexaai/binds/npu/zlib1.dll +0 -0
  49. nexaai/binds/rerank_bind.cp313-win_arm64.pyd +0 -0
  50. nexaai/binds/vlm_bind.cp313-win_arm64.pyd +0 -0
  51. nexaai/common.py +105 -105
  52. nexaai/cv.py +93 -93
  53. nexaai/cv_impl/mlx_cv_impl.py +89 -89
  54. nexaai/cv_impl/pybind_cv_impl.py +32 -32
  55. nexaai/embedder.py +73 -73
  56. nexaai/embedder_impl/mlx_embedder_impl.py +118 -118
  57. nexaai/embedder_impl/pybind_embedder_impl.py +96 -96
  58. nexaai/image_gen.py +141 -141
  59. nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -292
  60. nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -85
  61. nexaai/llm.py +98 -98
  62. nexaai/llm_impl/mlx_llm_impl.py +271 -271
  63. nexaai/llm_impl/pybind_llm_impl.py +220 -220
  64. nexaai/log.py +92 -92
  65. nexaai/rerank.py +57 -57
  66. nexaai/rerank_impl/mlx_rerank_impl.py +94 -94
  67. nexaai/rerank_impl/pybind_rerank_impl.py +136 -136
  68. nexaai/runtime.py +68 -68
  69. nexaai/runtime_error.py +24 -24
  70. nexaai/tts.py +75 -75
  71. nexaai/tts_impl/mlx_tts_impl.py +94 -94
  72. nexaai/tts_impl/pybind_tts_impl.py +43 -43
  73. nexaai/utils/decode.py +17 -17
  74. nexaai/utils/manifest_utils.py +531 -531
  75. nexaai/utils/model_manager.py +1562 -1562
  76. nexaai/utils/model_types.py +49 -49
  77. nexaai/utils/progress_tracker.py +384 -384
  78. nexaai/utils/quantization_utils.py +245 -245
  79. nexaai/vlm.py +129 -129
  80. nexaai/vlm_impl/mlx_vlm_impl.py +258 -258
  81. nexaai/vlm_impl/pybind_vlm_impl.py +256 -256
  82. {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/METADATA +1 -1
  83. nexaai-1.0.21rc14.dist-info/RECORD +154 -0
  84. nexaai/binds/nexaml/FLAC.dll +0 -0
  85. nexaai/binds/nexaml/fftw3.dll +0 -0
  86. nexaai/binds/nexaml/fftw3f.dll +0 -0
  87. nexaai/binds/nexaml/ggml-base.dll +0 -0
  88. nexaai/binds/nexaml/ggml-cpu.dll +0 -0
  89. nexaai/binds/nexaml/ggml-opencl.dll +0 -0
  90. nexaai/binds/nexaml/ggml.dll +0 -0
  91. nexaai/binds/nexaml/libmp3lame.DLL +0 -0
  92. nexaai/binds/nexaml/mpg123.dll +0 -0
  93. nexaai/binds/nexaml/nexa-mm-process.dll +0 -0
  94. nexaai/binds/nexaml/nexa-sampling.dll +0 -0
  95. nexaai/binds/nexaml/nexa_plugin.dll +0 -0
  96. nexaai/binds/nexaml/nexaproc.dll +0 -0
  97. nexaai/binds/nexaml/ogg.dll +0 -0
  98. nexaai/binds/nexaml/opus.dll +0 -0
  99. nexaai/binds/nexaml/qwen3-vl.dll +0 -0
  100. nexaai/binds/nexaml/qwen3vl-vision.dll +0 -0
  101. nexaai/binds/nexaml/vorbis.dll +0 -0
  102. nexaai/binds/nexaml/vorbisenc.dll +0 -0
  103. nexaai-1.0.21rc5.dist-info/RECORD +0 -162
  104. {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/WHEEL +0 -0
  105. {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/top_level.txt +0 -0
@@ -1,44 +1,127 @@
1
- from typing import List, Optional, Union
2
-
3
- from nexaai.common import PluginID
4
- from nexaai.asr import ASR, ASRConfig, ASRResult
5
-
6
-
7
- class PyBindASRImpl(ASR):
8
- def __init__(self):
9
- """Initialize PyBind ASR implementation."""
10
- super().__init__()
11
- # TODO: Add PyBind-specific initialization
12
-
13
- @classmethod
14
- def _load_from(cls,
15
- model_path: str,
16
- tokenizer_path: Optional[str] = None,
17
- language: Optional[str] = None,
18
- plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
19
- device_id: Optional[str] = None
20
- ) -> 'PyBindASRImpl':
21
- """Load ASR model from local path using PyBind backend."""
22
- # TODO: Implement PyBind ASR loading
23
- instance = cls()
24
- return instance
25
-
26
- def eject(self):
27
- """Destroy the model and free resources."""
28
- # TODO: Implement PyBind ASR cleanup
29
- pass
30
-
31
- def transcribe(
32
- self,
33
- audio_path: str,
34
- language: Optional[str] = None,
35
- config: Optional[ASRConfig] = None,
36
- ) -> ASRResult:
37
- """Transcribe audio file to text."""
38
- # TODO: Implement PyBind ASR transcription
39
- raise NotImplementedError("PyBind ASR transcription not yet implemented")
40
-
41
- def list_supported_languages(self) -> List[str]:
42
- """List supported languages."""
43
- # TODO: Implement PyBind ASR language listing
44
- raise NotImplementedError("PyBind ASR language listing not yet implemented")
1
+ from typing import List, Optional, Union
2
+
3
+ from nexaai.common import PluginID, ModelConfig
4
+ from nexaai.asr import ASR, ASRConfig, ASRResult
5
+ from nexaai.binds import asr_bind, common_bind
6
+ from nexaai.runtime import _ensure_runtime
7
+
8
+
9
+ class PyBindASRImpl(ASR):
10
+ def __init__(self, handle: any, m_cfg: ModelConfig = ModelConfig()):
11
+ """Private constructor, should not be called directly."""
12
+ super().__init__(m_cfg)
13
+ self._handle = handle # This is a py::capsule
14
+ self._model_config = None
15
+
16
+ @classmethod
17
+ def _load_from(cls,
18
+ model_path: str,
19
+ model_name: Optional[str] = None,
20
+ tokenizer_path: Optional[str] = None,
21
+ language: Optional[str] = None,
22
+ m_cfg: ModelConfig = ModelConfig(),
23
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
24
+ device_id: Optional[str] = None
25
+ ) -> 'PyBindASRImpl':
26
+ """Load ASR model from local path using PyBind backend."""
27
+ _ensure_runtime()
28
+
29
+ # Create model config
30
+ config = common_bind.ModelConfig()
31
+
32
+ config.n_ctx = m_cfg.n_ctx
33
+ if m_cfg.n_threads is not None:
34
+ config.n_threads = m_cfg.n_threads
35
+ if m_cfg.n_threads_batch is not None:
36
+ config.n_threads_batch = m_cfg.n_threads_batch
37
+ if m_cfg.n_batch is not None:
38
+ config.n_batch = m_cfg.n_batch
39
+ if m_cfg.n_ubatch is not None:
40
+ config.n_ubatch = m_cfg.n_ubatch
41
+ if m_cfg.n_seq_max is not None:
42
+ config.n_seq_max = m_cfg.n_seq_max
43
+ config.n_gpu_layers = m_cfg.n_gpu_layers
44
+
45
+ # handle chat template strings
46
+ if m_cfg.chat_template_path:
47
+ config.chat_template_path = m_cfg.chat_template_path
48
+
49
+ if m_cfg.chat_template_content:
50
+ config.chat_template_content = m_cfg.chat_template_content
51
+
52
+ # Convert plugin_id to string
53
+ plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else str(plugin_id)
54
+
55
+ # Create ASR handle using the binding
56
+ handle = asr_bind.ml_asr_create(
57
+ model_path=model_path,
58
+ model_name=model_name,
59
+ tokenizer_path=tokenizer_path,
60
+ model_config=config,
61
+ language=language,
62
+ plugin_id=plugin_id_str,
63
+ device_id=device_id,
64
+ license_id=None, # Optional
65
+ license_key=None # Optional
66
+ )
67
+
68
+ return cls(handle, m_cfg)
69
+
70
+ def eject(self):
71
+ """Release the model from memory."""
72
+ # py::capsule handles cleanup automatically
73
+ if hasattr(self, '_handle') and self._handle is not None:
74
+ del self._handle
75
+ self._handle = None
76
+
77
+ def transcribe(
78
+ self,
79
+ audio_path: str,
80
+ language: Optional[str] = None,
81
+ config: Optional[ASRConfig] = None,
82
+ ) -> ASRResult:
83
+ """Transcribe audio file to text."""
84
+ if self._handle is None:
85
+ raise RuntimeError("ASR model not loaded. Call _load_from first.")
86
+
87
+ # Convert ASRConfig to binding format if provided
88
+ asr_config = None
89
+ if config:
90
+ asr_config = asr_bind.ASRConfig()
91
+ asr_config.timestamps = config.timestamps
92
+ asr_config.beam_size = config.beam_size
93
+ asr_config.stream = config.stream
94
+
95
+ # Perform transcription using the binding
96
+ result_dict = asr_bind.ml_asr_transcribe(
97
+ handle=self._handle,
98
+ audio_path=audio_path,
99
+ language=language,
100
+ config=asr_config
101
+ )
102
+
103
+ # Convert result to ASRResult
104
+ transcript = result_dict.get("transcript", "")
105
+ confidence_scores = result_dict.get("confidence_scores")
106
+ timestamps = result_dict.get("timestamps")
107
+
108
+ # Convert timestamps to the expected format
109
+ timestamp_pairs = []
110
+ if timestamps:
111
+ for start, end in timestamps:
112
+ timestamp_pairs.append((float(start), float(end)))
113
+
114
+ return ASRResult(
115
+ transcript=transcript,
116
+ confidence_scores=confidence_scores or [],
117
+ timestamps=timestamp_pairs
118
+ )
119
+
120
+ def list_supported_languages(self) -> List[str]:
121
+ """List supported languages."""
122
+ if self._handle is None:
123
+ raise RuntimeError("ASR model not loaded. Call _load_from first.")
124
+
125
+ # Get supported languages using the binding
126
+ languages = asr_bind.ml_asr_list_supported_languages(handle=self._handle)
127
+ return languages
nexaai/base.py CHANGED
@@ -1,39 +1,39 @@
1
- from abc import ABC, abstractmethod
2
- from nexaai.common import ProfilingData
3
- from nexaai.utils.model_manager import auto_download_model
4
-
5
- class BaseModel(ABC):
6
-
7
- def __enter__(self):
8
- return self
9
-
10
- def __exit__(self, exc_type, exc_value, traceback):
11
- self.eject()
12
-
13
- def __del__(self):
14
- self.eject()
15
-
16
- @classmethod
17
- @auto_download_model
18
- def from_(cls, name_or_path: str, **kwargs) -> "BaseModel":
19
- """
20
- initialize model from (1) HF (2) if not found, then from local path
21
- """
22
-
23
- return cls._load_from(name_or_path, **kwargs)
24
-
25
- @classmethod
26
- @abstractmethod
27
- def _load_from(cls, name_or_path: str, **kwargs) -> "BaseModel":
28
- """
29
- Model-specific loading logic. Must be implemented by each model type.
30
- Called after model is available locally.
31
- """
32
- pass
33
-
34
- @abstractmethod
35
- def eject(self):
36
- pass
37
-
38
- def get_profiling_data(self) -> ProfilingData:
39
- pass
1
+ from abc import ABC, abstractmethod
2
+ from nexaai.common import ProfilingData
3
+ from nexaai.utils.model_manager import auto_download_model
4
+
5
+ class BaseModel(ABC):
6
+
7
+ def __enter__(self):
8
+ return self
9
+
10
+ def __exit__(self, exc_type, exc_value, traceback):
11
+ self.eject()
12
+
13
+ def __del__(self):
14
+ self.eject()
15
+
16
+ @classmethod
17
+ @auto_download_model
18
+ def from_(cls, name_or_path: str, **kwargs) -> "BaseModel":
19
+ """
20
+ initialize model from (1) HF (2) if not found, then from local path
21
+ """
22
+
23
+ return cls._load_from(name_or_path, **kwargs)
24
+
25
+ @classmethod
26
+ @abstractmethod
27
+ def _load_from(cls, name_or_path: str, **kwargs) -> "BaseModel":
28
+ """
29
+ Model-specific loading logic. Must be implemented by each model type.
30
+ Called after model is available locally.
31
+ """
32
+ pass
33
+
34
+ @abstractmethod
35
+ def eject(self):
36
+ pass
37
+
38
+ def get_profiling_data(self) -> ProfilingData:
39
+ pass
nexaai/binds/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
- from .common_bind import *
2
- from .llm_bind import *
3
- from .embedder_bind import *
4
- from .vlm_bind import *
5
- from .rerank_bind import *
1
+ from .common_bind import *
2
+ from .llm_bind import *
3
+ from .embedder_bind import *
4
+ from .vlm_bind import *
5
+ from .rerank_bind import *
6
+ from .asr_bind import *
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
nexaai/common.py CHANGED
@@ -1,105 +1,105 @@
1
- from dataclasses import dataclass
2
- from typing import TypedDict, Literal, Optional, List
3
- from enum import Enum
4
-
5
-
6
- class PluginID(str, Enum):
7
- """Enum for plugin identifiers."""
8
- MLX = "mlx"
9
- LLAMA_CPP = "llama_cpp"
10
- NEXAML = "nexaml"
11
- NPU = "npu"
12
-
13
-
14
- class ChatMessage(TypedDict):
15
- role: Literal["user", "assistant", "system"]
16
- content: str
17
-
18
- class MultiModalMessageContent(TypedDict):
19
- type: Literal["text", "image", "audio", "video"]
20
- text: Optional[str]
21
- url: Optional[str]
22
- path: Optional[str]
23
-
24
- class MultiModalMessage(TypedDict):
25
- role: Literal["user", "assistant", "system"]
26
- content: List[MultiModalMessageContent]
27
-
28
-
29
- @dataclass
30
- class SamplerConfig:
31
- temperature: float = 0.8
32
- top_p: float = 0.95
33
- top_k: int = 40
34
- repetition_penalty: float = 1.0
35
- presence_penalty: float = 0.0
36
- frequency_penalty: float = 0.0
37
- seed: int = -1
38
- grammar_path: str = None
39
- grammar_string: str = None
40
-
41
- @dataclass
42
- class GenerationConfig:
43
- max_tokens: int = 1024
44
- stop_words: list[str] = None
45
- sampler_config: SamplerConfig = None
46
- image_paths: list[str] = None
47
- audio_paths: list[str] = None
48
-
49
- @dataclass
50
- class ModelConfig:
51
- n_ctx: int = 4096
52
- n_threads: int = None
53
- n_threads_batch: int = None
54
- n_batch: int = 512
55
- n_ubatch: int = 512
56
- n_seq_max: int = 1
57
- n_gpu_layers: int = 999
58
- chat_template_path: str = None
59
- chat_template_content: str = None
60
-
61
-
62
- @dataclass(frozen=True) # Read-only
63
- class ProfilingData:
64
- """Profiling data structure for LLM/VLM performance metrics."""
65
- ttft: int = 0 # Time to first token (us)
66
- prompt_time: int = 0 # Prompt processing time (us)
67
- decode_time: int = 0 # Token generation time (us)
68
- prompt_tokens: int = 0 # Number of prompt tokens
69
- generated_tokens: int = 0 # Number of generated tokens
70
- audio_duration: int = 0 # Audio duration (us)
71
- prefill_speed: float = 0.0 # Prefill speed (tokens/sec)
72
- decoding_speed: float = 0.0 # Decoding speed (tokens/sec)
73
- real_time_factor: float = 0.0 # Real-Time Factor (RTF)
74
- stop_reason: str = "" # Stop reason: "eos", "length", "user", "stop_sequence"
75
-
76
- @classmethod
77
- def from_dict(cls, data: dict) -> "ProfilingData":
78
- """Create ProfilingData from dictionary."""
79
- return cls(
80
- ttft=data.get("ttft", 0),
81
- prompt_time=data.get("prompt_time", 0),
82
- decode_time=data.get("decode_time", 0),
83
- prompt_tokens=data.get("prompt_tokens", 0),
84
- generated_tokens=data.get("generated_tokens", 0),
85
- audio_duration=data.get("audio_duration", 0),
86
- prefill_speed=data.get("prefill_speed", 0.0),
87
- decoding_speed=data.get("decoding_speed", 0.0),
88
- real_time_factor=data.get("real_time_factor", 0.0),
89
- stop_reason=data.get("stop_reason", "")
90
- )
91
-
92
- def to_dict(self) -> dict:
93
- """Convert to dictionary."""
94
- return {
95
- "ttft": self.ttft,
96
- "prompt_time": self.prompt_time,
97
- "decode_time": self.decode_time,
98
- "prompt_tokens": self.prompt_tokens,
99
- "generated_tokens": self.generated_tokens,
100
- "audio_duration": self.audio_duration,
101
- "prefill_speed": self.prefill_speed,
102
- "decoding_speed": self.decoding_speed,
103
- "real_time_factor": self.real_time_factor,
104
- "stop_reason": self.stop_reason
105
- }
1
+ from dataclasses import dataclass
2
+ from typing import TypedDict, Literal, Optional, List
3
+ from enum import Enum
4
+
5
+
6
+ class PluginID(str, Enum):
7
+ """Enum for plugin identifiers."""
8
+ MLX = "mlx"
9
+ LLAMA_CPP = "llama_cpp"
10
+ NEXAML = "nexaml"
11
+ NPU = "npu"
12
+
13
+
14
+ class ChatMessage(TypedDict):
15
+ role: Literal["user", "assistant", "system"]
16
+ content: str
17
+
18
+ class MultiModalMessageContent(TypedDict):
19
+ type: Literal["text", "image", "audio", "video"]
20
+ text: Optional[str]
21
+ url: Optional[str]
22
+ path: Optional[str]
23
+
24
+ class MultiModalMessage(TypedDict):
25
+ role: Literal["user", "assistant", "system"]
26
+ content: List[MultiModalMessageContent]
27
+
28
+
29
+ @dataclass
30
+ class SamplerConfig:
31
+ temperature: float = 0.8
32
+ top_p: float = 0.95
33
+ top_k: int = 40
34
+ repetition_penalty: float = 1.0
35
+ presence_penalty: float = 0.0
36
+ frequency_penalty: float = 0.0
37
+ seed: int = -1
38
+ grammar_path: str = None
39
+ grammar_string: str = None
40
+
41
+ @dataclass
42
+ class GenerationConfig:
43
+ max_tokens: int = 1024
44
+ stop_words: list[str] = None
45
+ sampler_config: SamplerConfig = None
46
+ image_paths: list[str] = None
47
+ audio_paths: list[str] = None
48
+
49
+ @dataclass
50
+ class ModelConfig:
51
+ n_ctx: int = 4096
52
+ n_threads: int = None
53
+ n_threads_batch: int = None
54
+ n_batch: int = 512
55
+ n_ubatch: int = 512
56
+ n_seq_max: int = 1
57
+ n_gpu_layers: int = 999
58
+ chat_template_path: str = None
59
+ chat_template_content: str = None
60
+
61
+
62
+ @dataclass(frozen=True) # Read-only
63
+ class ProfilingData:
64
+ """Profiling data structure for LLM/VLM performance metrics."""
65
+ ttft: int = 0 # Time to first token (us)
66
+ prompt_time: int = 0 # Prompt processing time (us)
67
+ decode_time: int = 0 # Token generation time (us)
68
+ prompt_tokens: int = 0 # Number of prompt tokens
69
+ generated_tokens: int = 0 # Number of generated tokens
70
+ audio_duration: int = 0 # Audio duration (us)
71
+ prefill_speed: float = 0.0 # Prefill speed (tokens/sec)
72
+ decoding_speed: float = 0.0 # Decoding speed (tokens/sec)
73
+ real_time_factor: float = 0.0 # Real-Time Factor (RTF)
74
+ stop_reason: str = "" # Stop reason: "eos", "length", "user", "stop_sequence"
75
+
76
+ @classmethod
77
+ def from_dict(cls, data: dict) -> "ProfilingData":
78
+ """Create ProfilingData from dictionary."""
79
+ return cls(
80
+ ttft=data.get("ttft", 0),
81
+ prompt_time=data.get("prompt_time", 0),
82
+ decode_time=data.get("decode_time", 0),
83
+ prompt_tokens=data.get("prompt_tokens", 0),
84
+ generated_tokens=data.get("generated_tokens", 0),
85
+ audio_duration=data.get("audio_duration", 0),
86
+ prefill_speed=data.get("prefill_speed", 0.0),
87
+ decoding_speed=data.get("decoding_speed", 0.0),
88
+ real_time_factor=data.get("real_time_factor", 0.0),
89
+ stop_reason=data.get("stop_reason", "")
90
+ )
91
+
92
+ def to_dict(self) -> dict:
93
+ """Convert to dictionary."""
94
+ return {
95
+ "ttft": self.ttft,
96
+ "prompt_time": self.prompt_time,
97
+ "decode_time": self.decode_time,
98
+ "prompt_tokens": self.prompt_tokens,
99
+ "generated_tokens": self.generated_tokens,
100
+ "audio_duration": self.audio_duration,
101
+ "prefill_speed": self.prefill_speed,
102
+ "decoding_speed": self.decoding_speed,
103
+ "real_time_factor": self.real_time_factor,
104
+ "stop_reason": self.stop_reason
105
+ }