nexaai 1.0.16rc8__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.16rc10__cp310-cp310-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Binary file
nexaai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is generated by CMake from _version.py.in
2
2
  # Do not modify this file manually - it will be overwritten
3
3
 
4
- __version__ = "1.0.16-rc8"
4
+ __version__ = "1.0.16-rc10"
Binary file
nexaai/mlx_backend/ml.py CHANGED
@@ -1,9 +1,6 @@
1
1
  # This file defines the python interface that c-lib expects from a python backend
2
2
 
3
3
  from __future__ import annotations
4
- from typing import Optional
5
- from pathlib import Path
6
- from dataclasses import dataclass
7
4
 
8
5
  from abc import ABC, abstractmethod
9
6
  from dataclasses import dataclass, field
@@ -104,12 +101,9 @@ class ModelConfig:
104
101
  n_threads_batch: int = 0 # number of threads to use for batch processing
105
102
  n_batch: int = 0 # logical maximum batch size that can be submitted to llama_decode
106
103
  n_ubatch: int = 0 # physical maximum batch size
107
- # max number of sequences (i.e. distinct states for recurrent models)
108
- n_seq_max: int = 0
109
- # path to chat template file, optional
110
- chat_template_path: Optional[Path] = None
111
- # content of chat template file, optional
112
- chat_template_content: Optional[str] = None
104
+ n_seq_max: int = 0 # max number of sequences (i.e. distinct states for recurrent models)
105
+ chat_template_path: Optional[Path] = None # path to chat template file, optional
106
+ chat_template_content: Optional[str] = None # content of chat template file, optional
113
107
 
114
108
 
115
109
  @dataclass
@@ -124,8 +118,7 @@ class SamplerConfig:
124
118
  frequency_penalty: float = 0.0
125
119
  seed: int = -1 # –1 for random
126
120
  grammar_path: Optional[Path] = None
127
- # Optional grammar string (BNF-like format)
128
- grammar_string: Optional[str] = None
121
+ grammar_string: Optional[str] = None # Optional grammar string (BNF-like format)
129
122
 
130
123
 
131
124
  @dataclass
@@ -135,10 +128,8 @@ class GenerationConfig:
135
128
  stop: Sequence[str] = field(default_factory=tuple)
136
129
  n_past: int = 0
137
130
  sampler_config: Optional[SamplerConfig] = None
138
- # Array of image paths for VLM (None if none)
139
- image_paths: Optional[Sequence[Path]] = None
140
- # Array of audio paths for VLM (None if none)
141
- audio_paths: Optional[Sequence[Path]] = None
131
+ image_paths: Optional[Sequence[Path]] = None # Array of image paths for VLM (None if none)
132
+ audio_paths: Optional[Sequence[Path]] = None # Array of audio paths for VLM (None if none)
142
133
 
143
134
 
144
135
  @dataclass
@@ -179,32 +170,6 @@ class RerankConfig:
179
170
  normalize_method: str = "softmax" # "softmax" | "min-max" | "none"
180
171
 
181
172
 
182
- # image-gen
183
-
184
-
185
- @dataclass
186
- class ImageGenTxt2ImgInput:
187
- """Input structure for text-to-image generation."""
188
- prompt: str
189
- config: ImageGenerationConfig
190
- output_path: Optional[Path] = None
191
-
192
-
193
- @dataclass
194
- class ImageGenImg2ImgInput:
195
- """Input structure for image-to-image generation."""
196
- init_image_path: Path
197
- prompt: str
198
- config: ImageGenerationConfig
199
- output_path: Optional[Path] = None
200
-
201
-
202
- @dataclass
203
- class ImageGenOutput:
204
- """Output structure for image generation."""
205
- output_image_path: Path
206
-
207
-
208
173
  @dataclass
209
174
  class ImageSamplerConfig:
210
175
  """Configuration for image sampling."""
@@ -215,27 +180,17 @@ class ImageSamplerConfig:
215
180
  seed: int = -1 # –1 for random
216
181
 
217
182
 
218
- @dataclass
219
- class ImageGenCreateInput:
220
- """Configuration for image generation."""
221
- model_name: str
222
- model_path: Path
223
- config: ModelConfig
224
- scheduler_config_path: Path
225
- plugin_id: str
226
- device_id: Optional[str] = None
227
-
228
-
229
183
  @dataclass
230
184
  class ImageGenerationConfig:
231
185
  """Configuration for image generation."""
232
- prompts: List[str]
233
- sampler_config: ImageSamplerConfig
234
- scheduler_config: SchedulerConfig
235
- strength: float
236
- negative_prompts: Optional[List[str]] = None
186
+ prompts: str | List[str]
187
+ negative_prompts: str | List[str] | None = None
237
188
  height: int = 512
238
189
  width: int = 512
190
+ sampler_config: Optional[ImageSamplerConfig] = None
191
+ lora_id: int = -1 # –1 for none
192
+ init_image: Optional[Image] = None
193
+ strength: float = 1.0
239
194
 
240
195
 
241
196
  @dataclass
@@ -306,7 +261,7 @@ class TTSResult:
306
261
  class BoundingBox:
307
262
  """Generic bounding box structure."""
308
263
  x: float # X coordinate (normalized or pixel, depends on model)
309
- y: float # Y coordinate (normalized or pixel, depends on model)
264
+ y: float # Y coordinate (normalized or pixel, depends on model)
310
265
  width: float # Width
311
266
  height: float # Height
312
267
 
@@ -320,8 +275,7 @@ class CVResult:
320
275
  confidence: float = 0.0 # Confidence score [0.0-1.0]
321
276
  bbox: Optional[BoundingBox] = None # Bounding box (example: YOLO)
322
277
  text: Optional[str] = None # Text result (example: OCR)
323
- # Feature embedding (example: CLIP embedding)
324
- embedding: Optional[List[float]] = None
278
+ embedding: Optional[List[float]] = None # Feature embedding (example: CLIP embedding)
325
279
  embedding_dim: int = 0 # Embedding dimension
326
280
 
327
281
 
@@ -1,7 +1,6 @@
1
1
  # Copyright © 2023-2024 Apple Inc.
2
2
 
3
3
  import json
4
- import os
5
4
  from typing import Optional
6
5
 
7
6
  import mlx.core as mx
@@ -177,37 +176,19 @@ def _load_safetensor_weights(mapper, model, weight_file, float16: bool = False):
177
176
 
178
177
 
179
178
  def _check_key(key: str, part: str):
180
- # Check if it's a local path
181
- if os.path.exists(key) or '/' in key or '\\' in key:
182
- # For local paths, we'll use a default model structure
183
- return
184
179
  if key not in _MODELS:
185
180
  raise ValueError(
186
181
  f"[{part}] '{key}' model not found, choose one of {{{','.join(_MODELS.keys())}}}"
187
182
  )
188
183
 
189
- def _get_model_path(key: str, file_path: str):
190
- """Get the full path for a model file, supporting both local and HuggingFace paths"""
191
- if os.path.exists(key) or '/' in key or '\\' in key:
192
- # Local path
193
- return os.path.join(key, file_path)
194
- else:
195
- # HuggingFace path
196
- return hf_hub_download(key, file_path)
197
-
198
184
 
199
185
  def load_unet(key: str = _DEFAULT_MODEL, float16: bool = False):
200
186
  """Load the stable diffusion UNet from Hugging Face Hub."""
201
187
  _check_key(key, "load_unet")
202
188
 
203
- # Get the config path
204
- if os.path.exists(key) or '/' in key or '\\' in key:
205
- # Local path - use SDXL Turbo structure
206
- unet_config = "unet/config.json"
207
- else:
208
- unet_config = _MODELS[key]["unet_config"]
209
-
210
- with open(_get_model_path(key, unet_config)) as f:
189
+ # Download the config and create the model
190
+ unet_config = _MODELS[key]["unet_config"]
191
+ with open(hf_hub_download(key, unet_config)) as f:
211
192
  config = json.load(f)
212
193
 
213
194
  n_blocks = len(config["block_out_channels"])
@@ -238,13 +219,8 @@ def load_unet(key: str = _DEFAULT_MODEL, float16: bool = False):
238
219
  )
239
220
 
240
221
  # Download the weights and map them into the model
241
- if os.path.exists(key) or '/' in key or '\\' in key:
242
- # Local path - use SDXL Turbo structure
243
- unet_weights = "unet/diffusion_pytorch_model.safetensors"
244
- else:
245
- unet_weights = _MODELS[key]["unet"]
246
-
247
- weight_file = _get_model_path(key, unet_weights)
222
+ unet_weights = _MODELS[key]["unet"]
223
+ weight_file = hf_hub_download(key, unet_weights)
248
224
  _load_safetensor_weights(map_unet_weights, model, weight_file, float16)
249
225
 
250
226
  return model
@@ -262,13 +238,8 @@ def load_text_encoder(
262
238
  config_key = config_key or (model_key + "_config")
263
239
 
264
240
  # Download the config and create the model
265
- if os.path.exists(key) or '/' in key or '\\' in key:
266
- # Local path - use SDXL Turbo structure
267
- text_encoder_config = f"{model_key}/config.json"
268
- else:
269
- text_encoder_config = _MODELS[key][config_key]
270
-
271
- with open(_get_model_path(key, text_encoder_config)) as f:
241
+ text_encoder_config = _MODELS[key][config_key]
242
+ with open(hf_hub_download(key, text_encoder_config)) as f:
272
243
  config = json.load(f)
273
244
 
274
245
  with_projection = "WithProjection" in config["architectures"][0]
@@ -286,13 +257,8 @@ def load_text_encoder(
286
257
  )
287
258
 
288
259
  # Download the weights and map them into the model
289
- if os.path.exists(key) or '/' in key or '\\' in key:
290
- # Local path - use SDXL Turbo structure
291
- text_encoder_weights = f"{model_key}/model.safetensors"
292
- else:
293
- text_encoder_weights = _MODELS[key][model_key]
294
-
295
- weight_file = _get_model_path(key, text_encoder_weights)
260
+ text_encoder_weights = _MODELS[key][model_key]
261
+ weight_file = hf_hub_download(key, text_encoder_weights)
296
262
  _load_safetensor_weights(map_clip_text_encoder_weights, model, weight_file, float16)
297
263
 
298
264
  return model
@@ -303,13 +269,8 @@ def load_autoencoder(key: str = _DEFAULT_MODEL, float16: bool = False):
303
269
  _check_key(key, "load_autoencoder")
304
270
 
305
271
  # Download the config and create the model
306
- if os.path.exists(key) or '/' in key or '\\' in key:
307
- # Local path - use SDXL Turbo structure
308
- vae_config = "vae/config.json"
309
- else:
310
- vae_config = _MODELS[key]["vae_config"]
311
-
312
- with open(_get_model_path(key, vae_config)) as f:
272
+ vae_config = _MODELS[key]["vae_config"]
273
+ with open(hf_hub_download(key, vae_config)) as f:
313
274
  config = json.load(f)
314
275
 
315
276
  model = Autoencoder(
@@ -326,13 +287,8 @@ def load_autoencoder(key: str = _DEFAULT_MODEL, float16: bool = False):
326
287
  )
327
288
 
328
289
  # Download the weights and map them into the model
329
- if os.path.exists(key) or '/' in key or '\\' in key:
330
- # Local path - use SDXL Turbo structure
331
- vae_weights = "vae/diffusion_pytorch_model.safetensors"
332
- else:
333
- vae_weights = _MODELS[key]["vae"]
334
-
335
- weight_file = _get_model_path(key, vae_weights)
290
+ vae_weights = _MODELS[key]["vae"]
291
+ weight_file = hf_hub_download(key, vae_weights)
336
292
  _load_safetensor_weights(map_vae_weights, model, weight_file, float16)
337
293
 
338
294
  return model
@@ -342,13 +298,8 @@ def load_diffusion_config(key: str = _DEFAULT_MODEL):
342
298
  """Load the stable diffusion config from Hugging Face Hub."""
343
299
  _check_key(key, "load_diffusion_config")
344
300
 
345
- if os.path.exists(key) or '/' in key or '\\' in key:
346
- # Local path - use SDXL Turbo structure
347
- diffusion_config = "scheduler/scheduler_config.json"
348
- else:
349
- diffusion_config = _MODELS[key]["diffusion_config"]
350
-
351
- with open(_get_model_path(key, diffusion_config)) as f:
301
+ diffusion_config = _MODELS[key]["diffusion_config"]
302
+ with open(hf_hub_download(key, diffusion_config)) as f:
352
303
  config = json.load(f)
353
304
 
354
305
  return DiffusionConfig(
@@ -366,17 +317,11 @@ def load_tokenizer(
366
317
  ):
367
318
  _check_key(key, "load_tokenizer")
368
319
 
369
- if os.path.exists(key) or '/' in key or '\\' in key:
370
- # Local path - use SDXL Turbo structure
371
- vocab_file = _get_model_path(key, f"tokenizer/{vocab_key.split('_')[1]}.json")
372
- merges_file = _get_model_path(key, f"tokenizer/{merges_key.split('_')[1]}.txt")
373
- else:
374
- vocab_file = _get_model_path(key, _MODELS[key][vocab_key])
375
- merges_file = _get_model_path(key, _MODELS[key][merges_key])
376
-
320
+ vocab_file = hf_hub_download(key, _MODELS[key][vocab_key])
377
321
  with open(vocab_file, encoding="utf-8") as f:
378
322
  vocab = json.load(f)
379
323
 
324
+ merges_file = hf_hub_download(key, _MODELS[key][merges_key])
380
325
  with open(merges_file, encoding="utf-8") as f:
381
326
  bpe_merges = f.read().strip().split("\n")[1 : 49152 - 256 - 2 + 1]
382
327
  bpe_merges = [tuple(m.split()) for m in bpe_merges]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexaai
3
- Version: 1.0.16rc8
3
+ Version: 1.0.16rc10
4
4
  Summary: Python bindings for NexaSDK C-lib backend
5
5
  Author-email: "Nexa AI, Inc." <dev@nexa.ai>
6
6
  Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -1,6 +1,6 @@
1
1
  nexaai/__init__.py,sha256=jXdC4vv6DBK1fVewYTYSUhOOYfvf_Mk81UIeMGGIKUg,2029
2
- nexaai/_stub.cpython-310-darwin.so,sha256=zvVd8ZOu9tG4yYAHPt-3aGryeGsJ7-5c1KTZdh2U99k,49832
3
- nexaai/_version.py,sha256=HYA8rSkVin3FWg68b5gJ7nJTXAuo0Wk_FZ6Xr8Fv-hQ,143
2
+ nexaai/_stub.cpython-310-darwin.so,sha256=bjv9gcR4eVwpi9riqlrWzsFcawoQgULf9ZcwFX0em3g,49832
3
+ nexaai/_version.py,sha256=rM7mysA3I6mJ-jXt3DwjydINMplIqs78L_Oid1wMpI8,144
4
4
  nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
5
5
  nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
6
6
  nexaai/common.py,sha256=yBnIbqYaQYnfrl7IczOBh6MDibYZVxwaRJEglYcKgGs,3422
@@ -18,7 +18,7 @@ nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XE
18
18
  nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
19
19
  nexaai/binds/common_bind.cpython-310-darwin.so,sha256=KX_nfaQQPzxGv0GS0efcO-ByWai7y7RskMZvSAAaJWI,233960
20
20
  nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=b2NoXFAJvPLi_P1X7lXLKmAUU0v2HJI3Zwa10gfqHdw,202032
21
- nexaai/binds/libnexa_bridge.dylib,sha256=yaKtbQlBTvXqO14Ge5Pz3XFJpAu-wbOh5Fjjs9410xY,250376
21
+ nexaai/binds/libnexa_bridge.dylib,sha256=haJ62A2Xnv3ZQ76GIT-HTqXh9maiMpNTSaFCCTNIJko,250376
22
22
  nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=aKcT2kW1PL1xPFX7vsT6Gs79ZydcVfg8bKtnEthRpI4,183008
23
23
  nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=LGd-tykePnQFfGca25HnPIBfXsfrMzbwyx6d5Ld3xps,183000
24
24
  nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=GyOkHOM-5uHp7NUZ4Sr9BWak6BYpcc9aqI9A-zPnQp4,629528
@@ -40,7 +40,7 @@ nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=ms34VYoD5AxZFG6cIG0QAJDjCt
40
40
  nexaai/llm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  nexaai/llm_impl/mlx_llm_impl.py,sha256=4v7jUFzHfE7zw2uViekGQDaTROz8A6oaW31Z3iVe6tg,11157
42
42
  nexaai/llm_impl/pybind_llm_impl.py,sha256=aooqkcXZWhCo07wbSafGgBrA3WnijtnUADShjjgFsBQ,8051
43
- nexaai/mlx_backend/ml.py,sha256=DKXVOAfh8cg7KTKljh7jpcPwfQFNigc6uv_ZXF6lse8,23977
43
+ nexaai/mlx_backend/ml.py,sha256=LafDM_TeXmuQkld2tdQxUBGgooT0JPMXngLam2TADqU,23179
44
44
  nexaai/mlx_backend/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
45
45
  nexaai/mlx_backend/asr/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nujxkSM,451
46
46
  nexaai/mlx_backend/asr/interface.py,sha256=pE5expr8sP7O9bEgWaUaJ4ITX0MsCxFovG9iVWqVDVU,4246
@@ -57,18 +57,6 @@ nexaai/mlx_backend/embedding/interface.py,sha256=M7AGiq_UVLNIi2Ie6H08ySnMxIjIhUl
57
57
  nexaai/mlx_backend/embedding/main.py,sha256=xKRebBcooKuf8DzWKwCicftes3MAcYAd1QvcT9_AAPQ,6003
58
58
  nexaai/mlx_backend/embedding/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
59
  nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py,sha256=F9Z_9r-Dh0wNThiMp5W5hqE2dt5bf4ps5_c6h4BuWGw,15218
60
- nexaai/mlx_backend/image_gen/__init__.py,sha256=8eFAF00-yWdEs0LJiszjHEsgPDAF8tSruBYvujCNgE0,42
61
- nexaai/mlx_backend/image_gen/generate_sd.py,sha256=8DuPsJlxVf1LlFARVThiW807G-0cWPReYONj-x8qKRo,8616
62
- nexaai/mlx_backend/image_gen/interface.py,sha256=7ElIiLm5gZXAfKPs497wLkn8qqLFLL5T-oV-hXVmEdw,3285
63
- nexaai/mlx_backend/image_gen/main.py,sha256=jKkGDfqmGIsQwhHJaL_j_CTtr04xplD6dnnGe3AcilU,8123
64
- nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py,sha256=wriLb0wA5vCBlCoQMtfKrVVWMJw8fhXCCk6R9_Nrb9c,9524
65
- nexaai/mlx_backend/image_gen/stable_diffusion/clip.py,sha256=feHQXi1NiGa01AMo7nK8M-sgBoZBHI95xAGMfAv64kE,3733
66
- nexaai/mlx_backend/image_gen/stable_diffusion/config.py,sha256=lq2sWXevpnCk8KXOywEVOzgFT6WBNnG4xr7NLWgidII,1773
67
- nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py,sha256=ei-pdrqFxjeHLL6Ov5NZaHT5lceqZbHDyNFfUFUh_04,13755
68
- nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py,sha256=nJxiSmF79E2aDGzLqygOat4zpgaTzfR7Kp1PD0bmhWA,3397
69
- nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py,sha256=WMcmyQmb5MmhCp-NNU72Bqjq9vQvDzgR8KAfGKA2Qso,2991
70
- nexaai/mlx_backend/image_gen/stable_diffusion/unet.py,sha256=rYj1-baAugL7CNyYYvLch891WUgpunhAJCVBNKxfZSA,14762
71
- nexaai/mlx_backend/image_gen/stable_diffusion/vae.py,sha256=sQ1943x5SKyTmeL2xe3t84FL9DGO72Eab2m88r-BSe8,8044
72
60
  nexaai/mlx_backend/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
61
  nexaai/mlx_backend/llm/generate.py,sha256=Phes0tzxbbEWA2hDylQvD0LjorMaPwvcfZq9RKCAOt0,4399
74
62
  nexaai/mlx_backend/llm/interface.py,sha256=SZFkuAUi2vxj_dSqj8RXf9vPTGMtpks_pZxxrF7iIe8,29330
@@ -236,7 +224,7 @@ nexaai/mlx_backend/sd/main.py,sha256=cHlZhG8KHDFXbYRH-diKA7B1Qacq1euuGw0pKeXJGAI
236
224
  nexaai/mlx_backend/sd/modeling/__init__.py,sha256=wriLb0wA5vCBlCoQMtfKrVVWMJw8fhXCCk6R9_Nrb9c,9524
237
225
  nexaai/mlx_backend/sd/modeling/clip.py,sha256=feHQXi1NiGa01AMo7nK8M-sgBoZBHI95xAGMfAv64kE,3733
238
226
  nexaai/mlx_backend/sd/modeling/config.py,sha256=lq2sWXevpnCk8KXOywEVOzgFT6WBNnG4xr7NLWgidII,1773
239
- nexaai/mlx_backend/sd/modeling/model_io.py,sha256=xUjF5XNUfRLHQz-LtGT_D3XGQ1MI7ZQWknmdUDMpi_s,13732
227
+ nexaai/mlx_backend/sd/modeling/model_io.py,sha256=0jpMHJdMoTk0Jo4Uz1ZGiXekAWzPuTTD5g7ms4m4idY,11668
240
228
  nexaai/mlx_backend/sd/modeling/sampler.py,sha256=nJxiSmF79E2aDGzLqygOat4zpgaTzfR7Kp1PD0bmhWA,3397
241
229
  nexaai/mlx_backend/sd/modeling/tokenizer.py,sha256=WMcmyQmb5MmhCp-NNU72Bqjq9vQvDzgR8KAfGKA2Qso,2991
242
230
  nexaai/mlx_backend/sd/modeling/unet.py,sha256=rYj1-baAugL7CNyYYvLch891WUgpunhAJCVBNKxfZSA,14762
@@ -387,7 +375,7 @@ nexaai/utils/quantization_utils.py,sha256=4gvp6UQfSO9G1FYBwnFtQspTzH9sDbi1PBXw2t
387
375
  nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
388
376
  nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
389
377
  nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
390
- nexaai-1.0.16rc8.dist-info/METADATA,sha256=mMeCi8dxuYWGVRLrai250qYihewe89ROiq72He4RPSo,1201
391
- nexaai-1.0.16rc8.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
392
- nexaai-1.0.16rc8.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
393
- nexaai-1.0.16rc8.dist-info/RECORD,,
378
+ nexaai-1.0.16rc10.dist-info/METADATA,sha256=_p5OXiFfM3Nb6BN_grqE9--aJ4A9bz_B84UPqWYxp9I,1202
379
+ nexaai-1.0.16rc10.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
380
+ nexaai-1.0.16rc10.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
381
+ nexaai-1.0.16rc10.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- # Image generation module for MLX backend
@@ -1,244 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import (
4
- List,
5
- Optional,
6
- )
7
-
8
- import mlx.core as mx
9
- import numpy as np
10
- from PIL import Image as PILImage
11
- import mlx.nn as nn
12
- import os
13
-
14
- from .stable_diffusion import StableDiffusion, StableDiffusionXL
15
-
16
-
17
- class Image:
18
- def __init__(self, data: List[float], width: int, height: int, channels: int) -> None:
19
- """Initialize an image with pixel data"""
20
- self.data = data
21
- self.width = width
22
- self.height = height
23
- self.channels = channels
24
-
25
- @classmethod
26
- def from_numpy(cls, array: np.ndarray) -> 'Image':
27
- """Create Image from numpy array (H, W, C)"""
28
- height, width, channels = array.shape
29
- data = array.flatten().tolist()
30
- return cls(data, width, height, channels)
31
-
32
- @classmethod
33
- def from_pil(cls, pil_image: PILImage.Image) -> 'Image':
34
- """Create Image from PIL Image"""
35
- array = np.array(pil_image).astype(np.float32) / 255.0
36
- return cls.from_numpy(array)
37
-
38
- def to_numpy(self) -> np.ndarray:
39
- """Convert to numpy array (H, W, C)"""
40
- return np.array(self.data).reshape(self.height, self.width, self.channels)
41
-
42
- def to_pil(self) -> PILImage.Image:
43
- """Convert to PIL Image"""
44
- array = (self.to_numpy() * 255).astype(np.uint8)
45
- return PILImage.fromarray(array)
46
-
47
-
48
- class ImageSamplerConfig:
49
- def __init__(
50
- self,
51
- method: str = "ddim",
52
- steps: int = 4, # SDXL Turbo typically uses fewer steps
53
- guidance_scale: float = 0.0, # SDXL Turbo works well with no guidance
54
- eta: float = 0.0,
55
- seed: int = -1,
56
- ) -> None:
57
- """Initialize sampler configuration optimized for SDXL Turbo"""
58
- self.method = method
59
- self.steps = steps
60
- self.guidance_scale = guidance_scale
61
- self.eta = eta
62
- self.seed = seed
63
-
64
-
65
- class ImageGenerationConfig:
66
- def __init__(
67
- self,
68
- prompts: str | List[str],
69
- negative_prompts: str | List[str] | None = None,
70
- height: int = 512,
71
- width: int = 512,
72
- sampler_config: Optional[ImageSamplerConfig] = None,
73
- lora_id: int = -1, # Not used but kept for compatibility
74
- init_image: Optional[Image] = None,
75
- strength: float = 1.0,
76
- n_images: int = 1,
77
- n_rows: int = 1,
78
- decoding_batch_size: int = 1,
79
- ) -> None:
80
- """Initialize image generation configuration"""
81
- self.prompts = prompts
82
- self.negative_prompts = negative_prompts or ""
83
- self.height = height
84
- self.width = width
85
- self.sampler_config = sampler_config or ImageSamplerConfig()
86
- self.lora_id = lora_id
87
- self.init_image = init_image
88
- self.strength = strength
89
- self.n_images = n_images
90
- self.n_rows = n_rows
91
- self.decoding_batch_size = decoding_batch_size
92
-
93
-
94
- class ImageGen:
95
- def __init__(
96
- self,
97
- model_path: str,
98
- scheduler_config_path: Optional[str] = None,
99
- device: Optional[str] = None,
100
- float16: bool = True,
101
- quantize: bool = False,
102
- ) -> None:
103
- """Initialize the image generation model for SDXL Turbo"""
104
- self.model_path = model_path
105
- self.scheduler_config_path = scheduler_config_path
106
- self.float16 = float16
107
- self.quantize = quantize
108
- self.model = None
109
-
110
- @staticmethod
111
- def load_model(model_path: str, float16: bool = True, quantize: bool = False) -> StableDiffusion:
112
- """Load a model from the given path - following txt2img.py pattern"""
113
-
114
- # Check if it's a local path or HuggingFace repo
115
- # If it contains path separators or exists as a file/directory, treat as local
116
- is_local_path = (
117
- '/' in model_path or '\\' in model_path or os.path.exists(model_path))
118
-
119
- if is_local_path:
120
- # For local paths, determine model type from the path or model files
121
- if "xl" in model_path.lower() or "turbo" in model_path.lower():
122
- model = StableDiffusionXL(model_path, float16=float16)
123
- else:
124
- model = StableDiffusion(model_path, float16=float16)
125
- else:
126
- # For HuggingFace repo names, use the original logic
127
- if "xl" in model_path.lower() or "turbo" in model_path.lower():
128
- model = StableDiffusionXL(model_path, float16=float16)
129
- else:
130
- model = StableDiffusion(model_path, float16=float16)
131
-
132
- # Apply quantization if requested - same as txt2img.py
133
- if quantize:
134
- if "xl" in model_path.lower() or "turbo" in model_path.lower():
135
- nn.quantize(
136
- model.text_encoder_1, class_predicate=lambda _, m: isinstance(
137
- m, nn.Linear)
138
- )
139
- nn.quantize(
140
- model.text_encoder_2, class_predicate=lambda _, m: isinstance(
141
- m, nn.Linear)
142
- )
143
- else:
144
- nn.quantize(
145
- model.text_encoder, class_predicate=lambda _, m: isinstance(
146
- m, nn.Linear)
147
- )
148
- nn.quantize(model.unet, group_size=32, bits=8)
149
- return model
150
-
151
- def txt2img(self, prompt: str, config: ImageGenerationConfig, clear_cache: bool = True) -> Image:
152
- """Generate an image from a text prompt - following txt2img.py pattern"""
153
- if not self.model:
154
- self.model = self.load_model(self.model_path)
155
- if not self.model:
156
- raise RuntimeError("Model not loaded")
157
-
158
- sampler_config = config.sampler_config
159
-
160
- negative_prompt = ""
161
- if config.negative_prompts:
162
- negative_prompt = config.negative_prompts if isinstance(
163
- config.negative_prompts, str) else config.negative_prompts[0]
164
-
165
- # Generate latents - following txt2img.py approach
166
- latents_generator = self.model.generate_latents(
167
- prompt,
168
- n_images=1,
169
- num_steps=sampler_config.steps,
170
- cfg_weight=sampler_config.guidance_scale,
171
- negative_text=negative_prompt,
172
- seed=sampler_config.seed if sampler_config.seed >= 0 else None
173
- )
174
-
175
- # Get final latents - following txt2img.py pattern
176
- final_latents = None
177
- for latents in latents_generator:
178
- final_latents = latents
179
- mx.eval(final_latents)
180
-
181
- if final_latents is None:
182
- raise RuntimeError("No latents generated")
183
-
184
- # Decode to image - following txt2img.py pattern
185
- decoded_image = self.model.decode(final_latents)
186
- mx.eval(decoded_image)
187
-
188
- # Convert to numpy array
189
- image_array = np.array(decoded_image.squeeze(0))
190
-
191
- if clear_cache:
192
- mx.clear_cache()
193
-
194
- return Image.from_numpy(image_array)
195
-
196
- def img2img(self, init_image: Image, prompt: str, config: ImageGenerationConfig, clear_cache: bool = True) -> Image:
197
- """Generate an image from an initial image and a text prompt using SDXL Turbo"""
198
- if not self.model:
199
- self.model = self.load_model(self.model_path)
200
- if not self.model:
201
- raise RuntimeError("Model not loaded")
202
-
203
- sampler_config = config.sampler_config
204
-
205
- negative_prompt = ""
206
- if config.negative_prompts:
207
- negative_prompt = config.negative_prompts if isinstance(
208
- config.negative_prompts, str) else config.negative_prompts[0]
209
-
210
- img_tensor = _prepare_image_for_sd(
211
- init_image, config.width, config.height)
212
-
213
- # Generate latents from image
214
- latents_generator = self.model.generate_latents_from_image(
215
- img_tensor,
216
- prompt,
217
- n_images=1,
218
- strength=config.strength,
219
- num_steps=sampler_config.steps,
220
- cfg_weight=sampler_config.guidance_scale,
221
- negative_text=negative_prompt,
222
- seed=sampler_config.seed if sampler_config.seed >= 0 else None
223
- )
224
-
225
- # Get final latents
226
- final_latents = None
227
- for latents in latents_generator:
228
- final_latents = latents
229
- mx.eval(final_latents)
230
-
231
- if final_latents is None:
232
- raise RuntimeError("No latents generated")
233
-
234
- # Decode to image
235
- decoded_image = self.model.decode(final_latents)
236
- mx.eval(decoded_image)
237
-
238
- # Convert to numpy array
239
- image_array = np.array(decoded_image.squeeze(0))
240
-
241
- if clear_cache:
242
- mx.clear_cache()
243
-
244
- return Image.from_numpy(image_array)