nexaai 1.0.21rc5__cp313-cp313-win_arm64.whl → 1.0.21rc14__cp313-cp313-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (105) hide show
  1. nexaai/__init__.py +95 -95
  2. nexaai/_stub.cp313-win_arm64.pyd +0 -0
  3. nexaai/_version.py +4 -1
  4. nexaai/asr.py +68 -65
  5. nexaai/asr_impl/mlx_asr_impl.py +92 -92
  6. nexaai/asr_impl/pybind_asr_impl.py +127 -44
  7. nexaai/base.py +39 -39
  8. nexaai/binds/__init__.py +6 -5
  9. nexaai/binds/asr_bind.cp313-win_arm64.pyd +0 -0
  10. nexaai/binds/common_bind.cp313-win_arm64.pyd +0 -0
  11. nexaai/binds/cpu_gpu/ggml-base.dll +0 -0
  12. nexaai/binds/cpu_gpu/ggml-cpu.dll +0 -0
  13. nexaai/binds/cpu_gpu/ggml-opencl.dll +0 -0
  14. nexaai/binds/cpu_gpu/ggml.dll +0 -0
  15. nexaai/binds/cpu_gpu/mtmd.dll +0 -0
  16. nexaai/binds/cpu_gpu/nexa_cpu_gpu.dll +0 -0
  17. nexaai/binds/cpu_gpu/nexa_plugin.dll +0 -0
  18. nexaai/binds/embedder_bind.cp313-win_arm64.pyd +0 -0
  19. nexaai/binds/libcrypto-3-arm64.dll +0 -0
  20. nexaai/binds/libssl-3-arm64.dll +0 -0
  21. nexaai/binds/llm_bind.cp313-win_arm64.pyd +0 -0
  22. nexaai/binds/nexa_bridge.dll +0 -0
  23. nexaai/binds/npu/convnext-sdk.dll +0 -0
  24. nexaai/binds/npu/embed-gemma-sdk.dll +0 -0
  25. nexaai/binds/npu/ggml-base.dll +0 -0
  26. nexaai/binds/npu/ggml-cpu.dll +0 -0
  27. nexaai/binds/npu/ggml-opencl.dll +0 -0
  28. nexaai/binds/npu/ggml.dll +0 -0
  29. nexaai/binds/npu/granite-nano-sdk.dll +0 -0
  30. nexaai/binds/npu/granite4-sdk.dll +0 -0
  31. nexaai/binds/npu/jina-rerank-sdk.dll +0 -0
  32. nexaai/binds/npu/liquid-sdk.dll +0 -0
  33. nexaai/binds/npu/llama3-3b-sdk.dll +0 -0
  34. nexaai/binds/npu/nexa-mm-process.dll +0 -0
  35. nexaai/binds/npu/nexa-sampling.dll +0 -0
  36. nexaai/binds/npu/nexa_plugin.dll +0 -0
  37. nexaai/binds/npu/omni-neural-sdk.dll +0 -0
  38. nexaai/binds/npu/openblas.dll +0 -0
  39. nexaai/binds/npu/paddleocr-sdk.dll +0 -0
  40. nexaai/binds/npu/parakeet-sdk.dll +0 -0
  41. nexaai/binds/npu/phi3-5-sdk.dll +0 -0
  42. nexaai/binds/npu/phi4-sdk.dll +0 -0
  43. nexaai/binds/npu/pyannote-sdk.dll +0 -0
  44. nexaai/binds/npu/qwen3-4b-sdk.dll +0 -0
  45. nexaai/binds/npu/qwen3vl-sdk.dll +0 -0
  46. nexaai/binds/npu/qwen3vl-vision.dll +0 -0
  47. nexaai/binds/npu/yolov12-sdk.dll +0 -0
  48. nexaai/binds/npu/zlib1.dll +0 -0
  49. nexaai/binds/rerank_bind.cp313-win_arm64.pyd +0 -0
  50. nexaai/binds/vlm_bind.cp313-win_arm64.pyd +0 -0
  51. nexaai/common.py +105 -105
  52. nexaai/cv.py +93 -93
  53. nexaai/cv_impl/mlx_cv_impl.py +89 -89
  54. nexaai/cv_impl/pybind_cv_impl.py +32 -32
  55. nexaai/embedder.py +73 -73
  56. nexaai/embedder_impl/mlx_embedder_impl.py +118 -118
  57. nexaai/embedder_impl/pybind_embedder_impl.py +96 -96
  58. nexaai/image_gen.py +141 -141
  59. nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -292
  60. nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -85
  61. nexaai/llm.py +98 -98
  62. nexaai/llm_impl/mlx_llm_impl.py +271 -271
  63. nexaai/llm_impl/pybind_llm_impl.py +220 -220
  64. nexaai/log.py +92 -92
  65. nexaai/rerank.py +57 -57
  66. nexaai/rerank_impl/mlx_rerank_impl.py +94 -94
  67. nexaai/rerank_impl/pybind_rerank_impl.py +136 -136
  68. nexaai/runtime.py +68 -68
  69. nexaai/runtime_error.py +24 -24
  70. nexaai/tts.py +75 -75
  71. nexaai/tts_impl/mlx_tts_impl.py +94 -94
  72. nexaai/tts_impl/pybind_tts_impl.py +43 -43
  73. nexaai/utils/decode.py +17 -17
  74. nexaai/utils/manifest_utils.py +531 -531
  75. nexaai/utils/model_manager.py +1562 -1562
  76. nexaai/utils/model_types.py +49 -49
  77. nexaai/utils/progress_tracker.py +384 -384
  78. nexaai/utils/quantization_utils.py +245 -245
  79. nexaai/vlm.py +129 -129
  80. nexaai/vlm_impl/mlx_vlm_impl.py +258 -258
  81. nexaai/vlm_impl/pybind_vlm_impl.py +256 -256
  82. {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/METADATA +1 -1
  83. nexaai-1.0.21rc14.dist-info/RECORD +154 -0
  84. nexaai/binds/nexaml/FLAC.dll +0 -0
  85. nexaai/binds/nexaml/fftw3.dll +0 -0
  86. nexaai/binds/nexaml/fftw3f.dll +0 -0
  87. nexaai/binds/nexaml/ggml-base.dll +0 -0
  88. nexaai/binds/nexaml/ggml-cpu.dll +0 -0
  89. nexaai/binds/nexaml/ggml-opencl.dll +0 -0
  90. nexaai/binds/nexaml/ggml.dll +0 -0
  91. nexaai/binds/nexaml/libmp3lame.DLL +0 -0
  92. nexaai/binds/nexaml/mpg123.dll +0 -0
  93. nexaai/binds/nexaml/nexa-mm-process.dll +0 -0
  94. nexaai/binds/nexaml/nexa-sampling.dll +0 -0
  95. nexaai/binds/nexaml/nexa_plugin.dll +0 -0
  96. nexaai/binds/nexaml/nexaproc.dll +0 -0
  97. nexaai/binds/nexaml/ogg.dll +0 -0
  98. nexaai/binds/nexaml/opus.dll +0 -0
  99. nexaai/binds/nexaml/qwen3-vl.dll +0 -0
  100. nexaai/binds/nexaml/qwen3vl-vision.dll +0 -0
  101. nexaai/binds/nexaml/vorbis.dll +0 -0
  102. nexaai/binds/nexaml/vorbisenc.dll +0 -0
  103. nexaai-1.0.21rc5.dist-info/RECORD +0 -162
  104. {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/WHEEL +0 -0
  105. {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/top_level.txt +0 -0
@@ -1,245 +1,245 @@
1
- """
2
- Quantization utilities for extracting quantization types from model files and configurations.
3
-
4
- This module provides utilities to extract quantization information from:
5
- - GGUF model filenames
6
- - MLX model repository IDs
7
- - MLX model config.json files
8
- """
9
-
10
- import os
11
- import json
12
- import re
13
- import logging
14
- from enum import Enum
15
- from typing import Optional
16
-
17
- # Set up logger
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
- class QuantizationType(str, Enum):
22
- """Enum for GGUF and MLX model quantization types."""
23
- # GGUF quantization types
24
- BF16 = "BF16"
25
- F16 = "F16"
26
- Q2_K = "Q2_K"
27
- Q2_K_L = "Q2_K_L"
28
- Q3_K = "Q3_K"
29
- Q3_K_M = "Q3_K_M"
30
- Q3_K_S = "Q3_K_S"
31
- Q4_0 = "Q4_0"
32
- Q4_1 = "Q4_1"
33
- Q4_K = "Q4_K"
34
- Q4_K_M = "Q4_K_M"
35
- Q4_K_S = "Q4_K_S"
36
- Q5_K = "Q5_K"
37
- Q5_K_M = "Q5_K_M"
38
- Q5_K_S = "Q5_K_S"
39
- Q6_K = "Q6_K"
40
- Q8_0 = "Q8_0"
41
- MXFP4 = "MXFP4"
42
- MXFP8 = "MXFP8"
43
-
44
- # MLX bit-based quantization types
45
- BIT_1 = "1BIT"
46
- BIT_2 = "2BIT"
47
- BIT_3 = "3BIT"
48
- BIT_4 = "4BIT"
49
- BIT_5 = "5BIT"
50
- BIT_6 = "6BIT"
51
- BIT_7 = "7BIT"
52
- BIT_8 = "8BIT"
53
- BIT_16 = "16BIT"
54
-
55
-
56
- def extract_quantization_from_filename(filename: str) -> Optional[QuantizationType]:
57
- """
58
- Extract quantization type from filename.
59
-
60
- Args:
61
- filename: The filename to extract quantization from
62
-
63
- Returns:
64
- QuantizationType enum value or None if not found
65
- """
66
- # Define mapping from lowercase patterns to enum values
67
- # Include "." to ensure precise matching (e.g., "q4_0." not "q4_0_xl")
68
- pattern_to_enum = {
69
- 'bf16.': QuantizationType.BF16,
70
- 'f16.': QuantizationType.F16, # Add F16 support
71
- 'q2_k_l.': QuantizationType.Q2_K_L, # Check Q2_K_L before Q2_K to avoid partial match
72
- 'q2_k.': QuantizationType.Q2_K,
73
- 'q3_k.': QuantizationType.Q3_K,
74
- 'q3_k_m.': QuantizationType.Q3_K_M,
75
- 'q3_k_s.': QuantizationType.Q3_K_S,
76
- 'q4_k_m.': QuantizationType.Q4_K_M,
77
- 'q4_k_s.': QuantizationType.Q4_K_S,
78
- 'q4_0.': QuantizationType.Q4_0,
79
- 'q4_1.': QuantizationType.Q4_1,
80
- 'q4_k.': QuantizationType.Q4_K,
81
- 'q5_k.': QuantizationType.Q5_K,
82
- 'q5_k_m.': QuantizationType.Q5_K_M,
83
- 'q5_k_s.': QuantizationType.Q5_K_S,
84
- 'q6_k.': QuantizationType.Q6_K,
85
- 'q8_0.': QuantizationType.Q8_0,
86
- 'mxfp4.': QuantizationType.MXFP4,
87
- 'mxfp8.': QuantizationType.MXFP8,
88
- }
89
-
90
- filename_lower = filename.lower()
91
-
92
- # Check longer patterns first to avoid partial matches
93
- # Sort by length descending to check q2_k_l before q2_k, q4_k_m before q4_0, etc.
94
- for pattern in sorted(pattern_to_enum.keys(), key=len, reverse=True):
95
- if pattern in filename_lower:
96
- return pattern_to_enum[pattern]
97
-
98
- return None
99
-
100
-
101
- def extract_quantization_from_repo_id(repo_id: str) -> Optional[QuantizationType]:
102
- """
103
- Extract quantization type from repo_id for MLX models by looking for bit patterns.
104
-
105
- Args:
106
- repo_id: The repository ID to extract quantization from
107
-
108
- Returns:
109
- QuantizationType enum value or None if not found
110
- """
111
- # Define mapping from bit numbers to enum values
112
- bit_to_enum = {
113
- 1: QuantizationType.BIT_1,
114
- 2: QuantizationType.BIT_2,
115
- 3: QuantizationType.BIT_3,
116
- 4: QuantizationType.BIT_4,
117
- 5: QuantizationType.BIT_5,
118
- 6: QuantizationType.BIT_6,
119
- 7: QuantizationType.BIT_7,
120
- 8: QuantizationType.BIT_8,
121
- 16: QuantizationType.BIT_16,
122
- }
123
-
124
- # First check for patterns like "4bit", "8bit" etc. (case insensitive)
125
- pattern = r'(\d+)bit'
126
- matches = re.findall(pattern, repo_id.lower())
127
-
128
- for match in matches:
129
- try:
130
- bit_number = int(match)
131
- if bit_number in bit_to_enum:
132
- logger.debug(f"Found {bit_number}bit quantization in repo_id: {repo_id}")
133
- return bit_to_enum[bit_number]
134
- except ValueError:
135
- continue
136
-
137
- # Also check for patterns like "-q8", "_Q4" etc.
138
- q_pattern = r'[-_]q(\d+)'
139
- q_matches = re.findall(q_pattern, repo_id.lower())
140
-
141
- for match in q_matches:
142
- try:
143
- bit_number = int(match)
144
- if bit_number in bit_to_enum:
145
- logger.debug(f"Found Q{bit_number} quantization in repo_id: {repo_id}")
146
- return bit_to_enum[bit_number]
147
- except ValueError:
148
- continue
149
-
150
- return None
151
-
152
-
153
- def extract_quantization_from_mlx_config(mlx_folder_path: str) -> Optional[QuantizationType]:
154
- """
155
- Extract quantization type from MLX model's config.json file.
156
-
157
- Args:
158
- mlx_folder_path: Path to the MLX model folder
159
-
160
- Returns:
161
- QuantizationType enum value or None if not found
162
- """
163
- config_path = os.path.join(mlx_folder_path, "config.json")
164
-
165
- if not os.path.exists(config_path):
166
- logger.debug(f"Config file not found: {config_path}")
167
- return None
168
-
169
- try:
170
- with open(config_path, 'r', encoding='utf-8') as f:
171
- config = json.load(f)
172
-
173
- # Look for quantization.bits field
174
- quantization_config = config.get("quantization", {})
175
- if isinstance(quantization_config, dict):
176
- bits = quantization_config.get("bits")
177
- if isinstance(bits, int):
178
- # Define mapping from bit numbers to enum values
179
- bit_to_enum = {
180
- 1: QuantizationType.BIT_1,
181
- 2: QuantizationType.BIT_2,
182
- 3: QuantizationType.BIT_3,
183
- 4: QuantizationType.BIT_4,
184
- 5: QuantizationType.BIT_5,
185
- 6: QuantizationType.BIT_6,
186
- 7: QuantizationType.BIT_7,
187
- 8: QuantizationType.BIT_8,
188
- 16: QuantizationType.BIT_16,
189
- }
190
-
191
- if bits in bit_to_enum:
192
- logger.debug(f"Found {bits}bit quantization in config.json: {config_path}")
193
- return bit_to_enum[bits]
194
- else:
195
- logger.debug(f"Unsupported quantization bits value: {bits}")
196
-
197
- except (json.JSONDecodeError, IOError) as e:
198
- logger.warning(f"Error reading config.json from {config_path}: {e}")
199
- except Exception as e:
200
- logger.warning(f"Unexpected error reading config.json from {config_path}: {e}")
201
-
202
- return None
203
-
204
-
205
- def extract_gguf_quantization(filename: str) -> str:
206
- """
207
- Extract quantization level from GGUF filename using the enum-based approach.
208
-
209
- This function provides backward compatibility by returning a string representation
210
- of the quantization type.
211
-
212
- Args:
213
- filename: The GGUF filename
214
-
215
- Returns:
216
- String representation of the quantization type or "UNKNOWN" if not found
217
- """
218
- quantization_type = extract_quantization_from_filename(filename)
219
- if quantization_type:
220
- return quantization_type.value
221
- return "UNKNOWN"
222
-
223
-
224
- def detect_quantization_for_mlx(repo_id: str, directory_path: str) -> Optional[QuantizationType]:
225
- """
226
- Detect quantization for MLX models using multiple methods in priority order.
227
-
228
- Args:
229
- repo_id: The repository ID
230
- directory_path: Path to the model directory
231
-
232
- Returns:
233
- QuantizationType enum value or None if not found
234
- """
235
- # Method 1: Extract from repo_id
236
- quantization_type = extract_quantization_from_repo_id(repo_id)
237
- if quantization_type:
238
- return quantization_type
239
-
240
- # Method 2: Extract from config.json if available
241
- quantization_type = extract_quantization_from_mlx_config(directory_path)
242
- if quantization_type:
243
- return quantization_type
244
-
245
- return None
1
+ """
2
+ Quantization utilities for extracting quantization types from model files and configurations.
3
+
4
+ This module provides utilities to extract quantization information from:
5
+ - GGUF model filenames
6
+ - MLX model repository IDs
7
+ - MLX model config.json files
8
+ """
9
+
10
+ import os
11
+ import json
12
+ import re
13
+ import logging
14
+ from enum import Enum
15
+ from typing import Optional
16
+
17
+ # Set up logger
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class QuantizationType(str, Enum):
22
+ """Enum for GGUF and MLX model quantization types."""
23
+ # GGUF quantization types
24
+ BF16 = "BF16"
25
+ F16 = "F16"
26
+ Q2_K = "Q2_K"
27
+ Q2_K_L = "Q2_K_L"
28
+ Q3_K = "Q3_K"
29
+ Q3_K_M = "Q3_K_M"
30
+ Q3_K_S = "Q3_K_S"
31
+ Q4_0 = "Q4_0"
32
+ Q4_1 = "Q4_1"
33
+ Q4_K = "Q4_K"
34
+ Q4_K_M = "Q4_K_M"
35
+ Q4_K_S = "Q4_K_S"
36
+ Q5_K = "Q5_K"
37
+ Q5_K_M = "Q5_K_M"
38
+ Q5_K_S = "Q5_K_S"
39
+ Q6_K = "Q6_K"
40
+ Q8_0 = "Q8_0"
41
+ MXFP4 = "MXFP4"
42
+ MXFP8 = "MXFP8"
43
+
44
+ # MLX bit-based quantization types
45
+ BIT_1 = "1BIT"
46
+ BIT_2 = "2BIT"
47
+ BIT_3 = "3BIT"
48
+ BIT_4 = "4BIT"
49
+ BIT_5 = "5BIT"
50
+ BIT_6 = "6BIT"
51
+ BIT_7 = "7BIT"
52
+ BIT_8 = "8BIT"
53
+ BIT_16 = "16BIT"
54
+
55
+
56
+ def extract_quantization_from_filename(filename: str) -> Optional[QuantizationType]:
57
+ """
58
+ Extract quantization type from filename.
59
+
60
+ Args:
61
+ filename: The filename to extract quantization from
62
+
63
+ Returns:
64
+ QuantizationType enum value or None if not found
65
+ """
66
+ # Define mapping from lowercase patterns to enum values
67
+ # Include "." to ensure precise matching (e.g., "q4_0." not "q4_0_xl")
68
+ pattern_to_enum = {
69
+ 'bf16.': QuantizationType.BF16,
70
+ 'f16.': QuantizationType.F16, # Add F16 support
71
+ 'q2_k_l.': QuantizationType.Q2_K_L, # Check Q2_K_L before Q2_K to avoid partial match
72
+ 'q2_k.': QuantizationType.Q2_K,
73
+ 'q3_k.': QuantizationType.Q3_K,
74
+ 'q3_k_m.': QuantizationType.Q3_K_M,
75
+ 'q3_k_s.': QuantizationType.Q3_K_S,
76
+ 'q4_k_m.': QuantizationType.Q4_K_M,
77
+ 'q4_k_s.': QuantizationType.Q4_K_S,
78
+ 'q4_0.': QuantizationType.Q4_0,
79
+ 'q4_1.': QuantizationType.Q4_1,
80
+ 'q4_k.': QuantizationType.Q4_K,
81
+ 'q5_k.': QuantizationType.Q5_K,
82
+ 'q5_k_m.': QuantizationType.Q5_K_M,
83
+ 'q5_k_s.': QuantizationType.Q5_K_S,
84
+ 'q6_k.': QuantizationType.Q6_K,
85
+ 'q8_0.': QuantizationType.Q8_0,
86
+ 'mxfp4.': QuantizationType.MXFP4,
87
+ 'mxfp8.': QuantizationType.MXFP8,
88
+ }
89
+
90
+ filename_lower = filename.lower()
91
+
92
+ # Check longer patterns first to avoid partial matches
93
+ # Sort by length descending to check q2_k_l before q2_k, q4_k_m before q4_0, etc.
94
+ for pattern in sorted(pattern_to_enum.keys(), key=len, reverse=True):
95
+ if pattern in filename_lower:
96
+ return pattern_to_enum[pattern]
97
+
98
+ return None
99
+
100
+
101
+ def extract_quantization_from_repo_id(repo_id: str) -> Optional[QuantizationType]:
102
+ """
103
+ Extract quantization type from repo_id for MLX models by looking for bit patterns.
104
+
105
+ Args:
106
+ repo_id: The repository ID to extract quantization from
107
+
108
+ Returns:
109
+ QuantizationType enum value or None if not found
110
+ """
111
+ # Define mapping from bit numbers to enum values
112
+ bit_to_enum = {
113
+ 1: QuantizationType.BIT_1,
114
+ 2: QuantizationType.BIT_2,
115
+ 3: QuantizationType.BIT_3,
116
+ 4: QuantizationType.BIT_4,
117
+ 5: QuantizationType.BIT_5,
118
+ 6: QuantizationType.BIT_6,
119
+ 7: QuantizationType.BIT_7,
120
+ 8: QuantizationType.BIT_8,
121
+ 16: QuantizationType.BIT_16,
122
+ }
123
+
124
+ # First check for patterns like "4bit", "8bit" etc. (case insensitive)
125
+ pattern = r'(\d+)bit'
126
+ matches = re.findall(pattern, repo_id.lower())
127
+
128
+ for match in matches:
129
+ try:
130
+ bit_number = int(match)
131
+ if bit_number in bit_to_enum:
132
+ logger.debug(f"Found {bit_number}bit quantization in repo_id: {repo_id}")
133
+ return bit_to_enum[bit_number]
134
+ except ValueError:
135
+ continue
136
+
137
+ # Also check for patterns like "-q8", "_Q4" etc.
138
+ q_pattern = r'[-_]q(\d+)'
139
+ q_matches = re.findall(q_pattern, repo_id.lower())
140
+
141
+ for match in q_matches:
142
+ try:
143
+ bit_number = int(match)
144
+ if bit_number in bit_to_enum:
145
+ logger.debug(f"Found Q{bit_number} quantization in repo_id: {repo_id}")
146
+ return bit_to_enum[bit_number]
147
+ except ValueError:
148
+ continue
149
+
150
+ return None
151
+
152
+
153
+ def extract_quantization_from_mlx_config(mlx_folder_path: str) -> Optional[QuantizationType]:
154
+ """
155
+ Extract quantization type from MLX model's config.json file.
156
+
157
+ Args:
158
+ mlx_folder_path: Path to the MLX model folder
159
+
160
+ Returns:
161
+ QuantizationType enum value or None if not found
162
+ """
163
+ config_path = os.path.join(mlx_folder_path, "config.json")
164
+
165
+ if not os.path.exists(config_path):
166
+ logger.debug(f"Config file not found: {config_path}")
167
+ return None
168
+
169
+ try:
170
+ with open(config_path, 'r', encoding='utf-8') as f:
171
+ config = json.load(f)
172
+
173
+ # Look for quantization.bits field
174
+ quantization_config = config.get("quantization", {})
175
+ if isinstance(quantization_config, dict):
176
+ bits = quantization_config.get("bits")
177
+ if isinstance(bits, int):
178
+ # Define mapping from bit numbers to enum values
179
+ bit_to_enum = {
180
+ 1: QuantizationType.BIT_1,
181
+ 2: QuantizationType.BIT_2,
182
+ 3: QuantizationType.BIT_3,
183
+ 4: QuantizationType.BIT_4,
184
+ 5: QuantizationType.BIT_5,
185
+ 6: QuantizationType.BIT_6,
186
+ 7: QuantizationType.BIT_7,
187
+ 8: QuantizationType.BIT_8,
188
+ 16: QuantizationType.BIT_16,
189
+ }
190
+
191
+ if bits in bit_to_enum:
192
+ logger.debug(f"Found {bits}bit quantization in config.json: {config_path}")
193
+ return bit_to_enum[bits]
194
+ else:
195
+ logger.debug(f"Unsupported quantization bits value: {bits}")
196
+
197
+ except (json.JSONDecodeError, IOError) as e:
198
+ logger.warning(f"Error reading config.json from {config_path}: {e}")
199
+ except Exception as e:
200
+ logger.warning(f"Unexpected error reading config.json from {config_path}: {e}")
201
+
202
+ return None
203
+
204
+
205
+ def extract_gguf_quantization(filename: str) -> str:
206
+ """
207
+ Extract quantization level from GGUF filename using the enum-based approach.
208
+
209
+ This function provides backward compatibility by returning a string representation
210
+ of the quantization type.
211
+
212
+ Args:
213
+ filename: The GGUF filename
214
+
215
+ Returns:
216
+ String representation of the quantization type or "UNKNOWN" if not found
217
+ """
218
+ quantization_type = extract_quantization_from_filename(filename)
219
+ if quantization_type:
220
+ return quantization_type.value
221
+ return "UNKNOWN"
222
+
223
+
224
+ def detect_quantization_for_mlx(repo_id: str, directory_path: str) -> Optional[QuantizationType]:
225
+ """
226
+ Detect quantization for MLX models using multiple methods in priority order.
227
+
228
+ Args:
229
+ repo_id: The repository ID
230
+ directory_path: Path to the model directory
231
+
232
+ Returns:
233
+ QuantizationType enum value or None if not found
234
+ """
235
+ # Method 1: Extract from repo_id
236
+ quantization_type = extract_quantization_from_repo_id(repo_id)
237
+ if quantization_type:
238
+ return quantization_type
239
+
240
+ # Method 2: Extract from config.json if available
241
+ quantization_type = extract_quantization_from_mlx_config(directory_path)
242
+ if quantization_type:
243
+ return quantization_type
244
+
245
+ return None