nexaai 1.0.6rc1__cp310-cp310-macosx_14_0_universal2.whl → 1.0.7__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

@@ -0,0 +1,239 @@
1
+ """
2
+ Quantization utilities for extracting quantization types from model files and configurations.
3
+
4
+ This module provides utilities to extract quantization information from:
5
+ - GGUF model filenames
6
+ - MLX model repository IDs
7
+ - MLX model config.json files
8
+ """
9
+
10
+ import os
11
+ import json
12
+ import re
13
+ import logging
14
+ from enum import Enum
15
+ from typing import Optional
16
+
17
+ # Set up logger
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class QuantizationType(str, Enum):
22
+ """Enum for GGUF and MLX model quantization types."""
23
+ # GGUF quantization types
24
+ BF16 = "BF16"
25
+ F16 = "F16"
26
+ Q2_K = "Q2_K"
27
+ Q2_K_L = "Q2_K_L"
28
+ Q3_K_M = "Q3_K_M"
29
+ Q3_K_S = "Q3_K_S"
30
+ Q4_0 = "Q4_0"
31
+ Q4_1 = "Q4_1"
32
+ Q4_K_M = "Q4_K_M"
33
+ Q4_K_S = "Q4_K_S"
34
+ Q5_K_M = "Q5_K_M"
35
+ Q5_K_S = "Q5_K_S"
36
+ Q6_K = "Q6_K"
37
+ Q8_0 = "Q8_0"
38
+ MXFP4 = "MXFP4"
39
+ MXFP8 = "MXFP8"
40
+
41
+ # MLX bit-based quantization types
42
+ BIT_1 = "1BIT"
43
+ BIT_2 = "2BIT"
44
+ BIT_3 = "3BIT"
45
+ BIT_4 = "4BIT"
46
+ BIT_5 = "5BIT"
47
+ BIT_6 = "6BIT"
48
+ BIT_7 = "7BIT"
49
+ BIT_8 = "8BIT"
50
+ BIT_16 = "16BIT"
51
+
52
+
53
+ def extract_quantization_from_filename(filename: str) -> Optional[QuantizationType]:
54
+ """
55
+ Extract quantization type from filename.
56
+
57
+ Args:
58
+ filename: The filename to extract quantization from
59
+
60
+ Returns:
61
+ QuantizationType enum value or None if not found
62
+ """
63
+ # Define mapping from lowercase patterns to enum values
64
+ # Include "." to ensure precise matching (e.g., "q4_0." not "q4_0_xl")
65
+ pattern_to_enum = {
66
+ 'bf16.': QuantizationType.BF16,
67
+ 'f16.': QuantizationType.F16, # Add F16 support
68
+ 'q2_k_l.': QuantizationType.Q2_K_L, # Check Q2_K_L before Q2_K to avoid partial match
69
+ 'q2_k.': QuantizationType.Q2_K,
70
+ 'q3_k_m.': QuantizationType.Q3_K_M,
71
+ 'q3_ks.': QuantizationType.Q3_K_S,
72
+ 'q4_k_m.': QuantizationType.Q4_K_M,
73
+ 'q4_k_s.': QuantizationType.Q4_K_S,
74
+ 'q4_0.': QuantizationType.Q4_0,
75
+ 'q4_1.': QuantizationType.Q4_1,
76
+ 'q5_k_m.': QuantizationType.Q5_K_M,
77
+ 'q5_k_s.': QuantizationType.Q5_K_S,
78
+ 'q6_k.': QuantizationType.Q6_K,
79
+ 'q8_0.': QuantizationType.Q8_0,
80
+ 'mxfp4.': QuantizationType.MXFP4,
81
+ 'mxfp8.': QuantizationType.MXFP8,
82
+ }
83
+
84
+ filename_lower = filename.lower()
85
+
86
+ # Check longer patterns first to avoid partial matches
87
+ # Sort by length descending to check q2_k_l before q2_k, q4_k_m before q4_0, etc.
88
+ for pattern in sorted(pattern_to_enum.keys(), key=len, reverse=True):
89
+ if pattern in filename_lower:
90
+ return pattern_to_enum[pattern]
91
+
92
+ return None
93
+
94
+
95
+ def extract_quantization_from_repo_id(repo_id: str) -> Optional[QuantizationType]:
96
+ """
97
+ Extract quantization type from repo_id for MLX models by looking for bit patterns.
98
+
99
+ Args:
100
+ repo_id: The repository ID to extract quantization from
101
+
102
+ Returns:
103
+ QuantizationType enum value or None if not found
104
+ """
105
+ # Define mapping from bit numbers to enum values
106
+ bit_to_enum = {
107
+ 1: QuantizationType.BIT_1,
108
+ 2: QuantizationType.BIT_2,
109
+ 3: QuantizationType.BIT_3,
110
+ 4: QuantizationType.BIT_4,
111
+ 5: QuantizationType.BIT_5,
112
+ 6: QuantizationType.BIT_6,
113
+ 7: QuantizationType.BIT_7,
114
+ 8: QuantizationType.BIT_8,
115
+ 16: QuantizationType.BIT_16,
116
+ }
117
+
118
+ # First check for patterns like "4bit", "8bit" etc. (case insensitive)
119
+ pattern = r'(\d+)bit'
120
+ matches = re.findall(pattern, repo_id.lower())
121
+
122
+ for match in matches:
123
+ try:
124
+ bit_number = int(match)
125
+ if bit_number in bit_to_enum:
126
+ logger.debug(f"Found {bit_number}bit quantization in repo_id: {repo_id}")
127
+ return bit_to_enum[bit_number]
128
+ except ValueError:
129
+ continue
130
+
131
+ # Also check for patterns like "-q8", "_Q4" etc.
132
+ q_pattern = r'[-_]q(\d+)'
133
+ q_matches = re.findall(q_pattern, repo_id.lower())
134
+
135
+ for match in q_matches:
136
+ try:
137
+ bit_number = int(match)
138
+ if bit_number in bit_to_enum:
139
+ logger.debug(f"Found Q{bit_number} quantization in repo_id: {repo_id}")
140
+ return bit_to_enum[bit_number]
141
+ except ValueError:
142
+ continue
143
+
144
+ return None
145
+
146
+
147
+ def extract_quantization_from_mlx_config(mlx_folder_path: str) -> Optional[QuantizationType]:
148
+ """
149
+ Extract quantization type from MLX model's config.json file.
150
+
151
+ Args:
152
+ mlx_folder_path: Path to the MLX model folder
153
+
154
+ Returns:
155
+ QuantizationType enum value or None if not found
156
+ """
157
+ config_path = os.path.join(mlx_folder_path, "config.json")
158
+
159
+ if not os.path.exists(config_path):
160
+ logger.debug(f"Config file not found: {config_path}")
161
+ return None
162
+
163
+ try:
164
+ with open(config_path, 'r', encoding='utf-8') as f:
165
+ config = json.load(f)
166
+
167
+ # Look for quantization.bits field
168
+ quantization_config = config.get("quantization", {})
169
+ if isinstance(quantization_config, dict):
170
+ bits = quantization_config.get("bits")
171
+ if isinstance(bits, int):
172
+ # Define mapping from bit numbers to enum values
173
+ bit_to_enum = {
174
+ 1: QuantizationType.BIT_1,
175
+ 2: QuantizationType.BIT_2,
176
+ 3: QuantizationType.BIT_3,
177
+ 4: QuantizationType.BIT_4,
178
+ 5: QuantizationType.BIT_5,
179
+ 6: QuantizationType.BIT_6,
180
+ 7: QuantizationType.BIT_7,
181
+ 8: QuantizationType.BIT_8,
182
+ 16: QuantizationType.BIT_16,
183
+ }
184
+
185
+ if bits in bit_to_enum:
186
+ logger.debug(f"Found {bits}bit quantization in config.json: {config_path}")
187
+ return bit_to_enum[bits]
188
+ else:
189
+ logger.debug(f"Unsupported quantization bits value: {bits}")
190
+
191
+ except (json.JSONDecodeError, IOError) as e:
192
+ logger.warning(f"Error reading config.json from {config_path}: {e}")
193
+ except Exception as e:
194
+ logger.warning(f"Unexpected error reading config.json from {config_path}: {e}")
195
+
196
+ return None
197
+
198
+
199
+ def extract_gguf_quantization(filename: str) -> str:
200
+ """
201
+ Extract quantization level from GGUF filename using the enum-based approach.
202
+
203
+ This function provides backward compatibility by returning a string representation
204
+ of the quantization type.
205
+
206
+ Args:
207
+ filename: The GGUF filename
208
+
209
+ Returns:
210
+ String representation of the quantization type or "UNKNOWN" if not found
211
+ """
212
+ quantization_type = extract_quantization_from_filename(filename)
213
+ if quantization_type:
214
+ return quantization_type.value
215
+ return "UNKNOWN"
216
+
217
+
218
+ def detect_quantization_for_mlx(repo_id: str, directory_path: str) -> Optional[QuantizationType]:
219
+ """
220
+ Detect quantization for MLX models using multiple methods in priority order.
221
+
222
+ Args:
223
+ repo_id: The repository ID
224
+ directory_path: Path to the model directory
225
+
226
+ Returns:
227
+ QuantizationType enum value or None if not found
228
+ """
229
+ # Method 1: Extract from repo_id
230
+ quantization_type = extract_quantization_from_repo_id(repo_id)
231
+ if quantization_type:
232
+ return quantization_type
233
+
234
+ # Method 2: Extract from config.json if available
235
+ quantization_type = extract_quantization_from_mlx_config(directory_path)
236
+ if quantization_type:
237
+ return quantization_type
238
+
239
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexaai
3
- Version: 1.0.6rc1
3
+ Version: 1.0.7
4
4
  Summary: Python bindings for NexaSDK C-lib backend
5
5
  Author-email: "Nexa AI, Inc." <dev@nexa.ai>
6
6
  Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -21,6 +21,7 @@ Provides-Extra: mlx
21
21
  Requires-Dist: mlx; extra == "mlx"
22
22
  Requires-Dist: mlx-lm; extra == "mlx"
23
23
  Requires-Dist: mlx-vlm; extra == "mlx"
24
+ Requires-Dist: mlx-embeddings; extra == "mlx"
24
25
  Requires-Dist: tokenizers; extra == "mlx"
25
26
  Requires-Dist: safetensors; extra == "mlx"
26
27
  Requires-Dist: Pillow; extra == "mlx"
@@ -1,6 +1,6 @@
1
1
  nexaai/__init__.py,sha256=jXdC4vv6DBK1fVewYTYSUhOOYfvf_Mk81UIeMGGIKUg,2029
2
- nexaai/_stub.cpython-310-darwin.so,sha256=HD3LnNPlQm7XugP9lz_ed5o9EIZZwH5_SUfJWjeyJwg,66768
3
- nexaai/_version.py,sha256=o8WPRe-h5be83JEwTPwBVdsZ20QQ2VFyIwzifvgbiPQ,142
2
+ nexaai/_stub.cpython-310-darwin.so,sha256=f2Z51NtVXCP4Jt6YFxsABdZxVReTE4jWqJSks_DFJtk,66768
3
+ nexaai/_version.py,sha256=HMQ_cuen1UlESzaxkeIlsIDBtPl1Uc9t60FOoMWVLcM,138
4
4
  nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
5
5
  nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
6
6
  nexaai/common.py,sha256=yBnIbqYaQYnfrl7IczOBh6MDibYZVxwaRJEglYcKgGs,3422
@@ -19,7 +19,7 @@ nexaai/binds/__init__.py,sha256=T9Ua7SzHNglSeEqXlfH5ymYXRyXhNKkC9z_y_bWCNMo,80
19
19
  nexaai/binds/common_bind.cpython-310-darwin.so,sha256=hVxY76tn7hN6uHDIgM7LWNvgoudHgNZVoaygM9X1RWE,217232
20
20
  nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=FT8581RNciilskK89PhtnNSjw4Oh0-xk8QdbJVFmOd8,202064
21
21
  nexaai/binds/libcrypto.dylib,sha256=aWif9WhTKVQhmZL3DmtIpMkZY5JSb_Ny6CClmUBKYM4,4710416
22
- nexaai/binds/libnexa_bridge.dylib,sha256=hv4zUyl0ajPO_84svUUssADt0qGeLouyMGeeyqsrWOY,251480
22
+ nexaai/binds/libnexa_bridge.dylib,sha256=9xmdJs9T2eulxIYJJ2axhnXCYeVTTFE_5b3qF9mDsLE,251480
23
23
  nexaai/binds/libssl.dylib,sha256=Q2frAdhR729oKYuCjJOEr1Ott3idFWoFp98fwNqtIaU,881616
24
24
  nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=3Bsq0_tGkM027-bORVeJUDl6CYZxAF9sbDIn1l31XTQ,182704
25
25
  nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=JM4oOkie1su0ES5hMdtILeQHlRukRzH1vTleTupUXhg,650736
@@ -186,7 +186,7 @@ nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
186
186
  nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
187
187
  nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
188
188
  nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
- nexaai/embedder_impl/mlx_embedder_impl.py,sha256=OsDzsc_2wZkSoWu6yCOZadMkaYdBW3uyjF11hDKTaX8,4383
189
+ nexaai/embedder_impl/mlx_embedder_impl.py,sha256=dTjOC1VJ9ypIgCvkK_jKNSWpswbg132rDcTzWcL5oFA,4482
190
190
  nexaai/embedder_impl/pybind_embedder_impl.py,sha256=Ga1JYauVkRq6jwAGL7Xx5HDaIx483_v9gZVoTyd3xNU,3495
191
191
  nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
192
192
  nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=BuDkksvXyb4J02GsdnbGAmYckfUU0Eah6BimoMD3QqY,11219
@@ -206,9 +206,9 @@ nexaai/mlx_backend/cv/interface.py,sha256=qE51ApUETEZxDMPZB4VdV098fsXcIiEg4Hj9za
206
206
  nexaai/mlx_backend/cv/main.py,sha256=hYaF2C36hKTyy7kGMNkzLrdczPiFVS73H320klzzpVM,2856
207
207
  nexaai/mlx_backend/cv/modeling/pp_ocr_v4.py,sha256=Vpa-QTy7N5oFfGI7Emldx1dOYJWv_4nAFNRDz_5vHBI,58593
208
208
  nexaai/mlx_backend/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
209
- nexaai/mlx_backend/embedding/generate.py,sha256=irAbc_nBD9wMqe5z1eFgp6Gf_mONow2I3z3g-DAAbtY,5018
210
- nexaai/mlx_backend/embedding/interface.py,sha256=hW0yrtD55ol0hB-X5glcXMc4TiyKuT4U5GaI8SP-kAU,11508
211
- nexaai/mlx_backend/embedding/main.py,sha256=_kIwz69A7UXA_u0VNP6eqM2W-LH_1_1hlJtro6U_FjI,2620
209
+ nexaai/mlx_backend/embedding/generate.py,sha256=leZA0Ir78-5GV3jloPKYSAKgb04Wr5jORFJlSSVyKs0,12855
210
+ nexaai/mlx_backend/embedding/interface.py,sha256=M7AGiq_UVLNIi2Ie6H08ySnMxIjIhUlNgmV9I_rKYt4,22742
211
+ nexaai/mlx_backend/embedding/main.py,sha256=xKRebBcooKuf8DzWKwCicftes3MAcYAd1QvcT9_AAPQ,6003
212
212
  nexaai/mlx_backend/embedding/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
213
213
  nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py,sha256=F9Z_9r-Dh0wNThiMp5W5hqE2dt5bf4ps5_c6h4BuWGw,15218
214
214
  nexaai/mlx_backend/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -511,12 +511,15 @@ nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJ
511
511
  nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
512
512
  nexaai/utils/avatar_fetcher.py,sha256=bWy8ujgbOiTHFCjFxTwkn3uXbZ84PgEGUkXkR3MH4bI,3821
513
513
  nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
514
- nexaai/utils/model_manager.py,sha256=c07ocxxw1IHCQw6esbmYK0dX2R2OajfEIGsC_2teHXo,48572
515
- nexaai/utils/progress_tracker.py,sha256=76HlPkyN41IMHSsH56-qdlN_aY_oBfJz50J16Cx67R0,15102
514
+ nexaai/utils/manifest_utils.py,sha256=2waOuQErodNHhoAETQqlQgXdVes-T5A4HMb8pUIN9hg,9765
515
+ nexaai/utils/model_manager.py,sha256=xzerYqXkvRrHEqpEQvhOeg_6XQho2BvYw6ee4dlz69A,48575
516
+ nexaai/utils/model_types.py,sha256=-DER8L4lAUR_iLS99F0r57avwqWtuN21ug5pX2p24_E,1369
517
+ nexaai/utils/progress_tracker.py,sha256=mTw7kaKH8BkmecYm7iBMqRHd9uUH4Ch0S8CzbpARDCk,15404
518
+ nexaai/utils/quantization_utils.py,sha256=4gvp6UQfSO9G1FYBwnFtQspTzH9sDbi1PBXw2t1N69M,7650
516
519
  nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
517
520
  nexaai/vlm_impl/mlx_vlm_impl.py,sha256=od1R1mRoIgPG3NHC7JiDlcB_YJY8aklX8Em3ZkeHNpE,10734
518
521
  nexaai/vlm_impl/pybind_vlm_impl.py,sha256=5ZMFgDATthmMzjrd-vE5KX5ZAMoWPYbF_FTLz8DBKIk,8908
519
- nexaai-1.0.6rc1.dist-info/METADATA,sha256=U2gJx8JlzG3wUYtVYk7VdDN7ildkHxWTQUE5Oya_Z_s,1154
520
- nexaai-1.0.6rc1.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
521
- nexaai-1.0.6rc1.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
522
- nexaai-1.0.6rc1.dist-info/RECORD,,
522
+ nexaai-1.0.7.dist-info/METADATA,sha256=DMyi7lxZHVYv62pJQ6SemiNzIqHSGuS4-r5vHO9llJw,1197
523
+ nexaai-1.0.7.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
524
+ nexaai-1.0.7.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
525
+ nexaai-1.0.7.dist-info/RECORD,,