nexaai 1.0.16rc8__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.16rc10__cp310-cp310-macosx_13_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/_stub.cpython-310-darwin.so +0 -0
- nexaai/_version.py +1 -1
- nexaai/binds/libnexa_bridge.dylib +0 -0
- nexaai/mlx_backend/ml.py +14 -60
- nexaai/mlx_backend/sd/modeling/model_io.py +17 -72
- {nexaai-1.0.16rc8.dist-info → nexaai-1.0.16rc10.dist-info}/METADATA +1 -1
- {nexaai-1.0.16rc8.dist-info → nexaai-1.0.16rc10.dist-info}/RECORD +9 -21
- nexaai/mlx_backend/image_gen/__init__.py +0 -1
- nexaai/mlx_backend/image_gen/generate_sd.py +0 -244
- nexaai/mlx_backend/image_gen/interface.py +0 -82
- nexaai/mlx_backend/image_gen/main.py +0 -281
- nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py +0 -306
- nexaai/mlx_backend/image_gen/stable_diffusion/clip.py +0 -116
- nexaai/mlx_backend/image_gen/stable_diffusion/config.py +0 -65
- nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py +0 -386
- nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py +0 -105
- nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py +0 -100
- nexaai/mlx_backend/image_gen/stable_diffusion/unet.py +0 -460
- nexaai/mlx_backend/image_gen/stable_diffusion/vae.py +0 -274
- {nexaai-1.0.16rc8.dist-info → nexaai-1.0.16rc10.dist-info}/WHEEL +0 -0
- {nexaai-1.0.16rc8.dist-info → nexaai-1.0.16rc10.dist-info}/top_level.txt +0 -0
|
Binary file
|
nexaai/_version.py
CHANGED
|
Binary file
|
nexaai/mlx_backend/ml.py
CHANGED
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
# This file defines the python interface that c-lib expects from a python backend
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
|
-
from typing import Optional
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from dataclasses import dataclass
|
|
7
4
|
|
|
8
5
|
from abc import ABC, abstractmethod
|
|
9
6
|
from dataclasses import dataclass, field
|
|
@@ -104,12 +101,9 @@ class ModelConfig:
|
|
|
104
101
|
n_threads_batch: int = 0 # number of threads to use for batch processing
|
|
105
102
|
n_batch: int = 0 # logical maximum batch size that can be submitted to llama_decode
|
|
106
103
|
n_ubatch: int = 0 # physical maximum batch size
|
|
107
|
-
# max number of sequences (i.e. distinct states for recurrent models)
|
|
108
|
-
|
|
109
|
-
#
|
|
110
|
-
chat_template_path: Optional[Path] = None
|
|
111
|
-
# content of chat template file, optional
|
|
112
|
-
chat_template_content: Optional[str] = None
|
|
104
|
+
n_seq_max: int = 0 # max number of sequences (i.e. distinct states for recurrent models)
|
|
105
|
+
chat_template_path: Optional[Path] = None # path to chat template file, optional
|
|
106
|
+
chat_template_content: Optional[str] = None # content of chat template file, optional
|
|
113
107
|
|
|
114
108
|
|
|
115
109
|
@dataclass
|
|
@@ -124,8 +118,7 @@ class SamplerConfig:
|
|
|
124
118
|
frequency_penalty: float = 0.0
|
|
125
119
|
seed: int = -1 # –1 for random
|
|
126
120
|
grammar_path: Optional[Path] = None
|
|
127
|
-
# Optional grammar string (BNF-like format)
|
|
128
|
-
grammar_string: Optional[str] = None
|
|
121
|
+
grammar_string: Optional[str] = None # Optional grammar string (BNF-like format)
|
|
129
122
|
|
|
130
123
|
|
|
131
124
|
@dataclass
|
|
@@ -135,10 +128,8 @@ class GenerationConfig:
|
|
|
135
128
|
stop: Sequence[str] = field(default_factory=tuple)
|
|
136
129
|
n_past: int = 0
|
|
137
130
|
sampler_config: Optional[SamplerConfig] = None
|
|
138
|
-
# Array of image paths for VLM (None if none)
|
|
139
|
-
|
|
140
|
-
# Array of audio paths for VLM (None if none)
|
|
141
|
-
audio_paths: Optional[Sequence[Path]] = None
|
|
131
|
+
image_paths: Optional[Sequence[Path]] = None # Array of image paths for VLM (None if none)
|
|
132
|
+
audio_paths: Optional[Sequence[Path]] = None # Array of audio paths for VLM (None if none)
|
|
142
133
|
|
|
143
134
|
|
|
144
135
|
@dataclass
|
|
@@ -179,32 +170,6 @@ class RerankConfig:
|
|
|
179
170
|
normalize_method: str = "softmax" # "softmax" | "min-max" | "none"
|
|
180
171
|
|
|
181
172
|
|
|
182
|
-
# image-gen
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
@dataclass
|
|
186
|
-
class ImageGenTxt2ImgInput:
|
|
187
|
-
"""Input structure for text-to-image generation."""
|
|
188
|
-
prompt: str
|
|
189
|
-
config: ImageGenerationConfig
|
|
190
|
-
output_path: Optional[Path] = None
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
@dataclass
|
|
194
|
-
class ImageGenImg2ImgInput:
|
|
195
|
-
"""Input structure for image-to-image generation."""
|
|
196
|
-
init_image_path: Path
|
|
197
|
-
prompt: str
|
|
198
|
-
config: ImageGenerationConfig
|
|
199
|
-
output_path: Optional[Path] = None
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
@dataclass
|
|
203
|
-
class ImageGenOutput:
|
|
204
|
-
"""Output structure for image generation."""
|
|
205
|
-
output_image_path: Path
|
|
206
|
-
|
|
207
|
-
|
|
208
173
|
@dataclass
|
|
209
174
|
class ImageSamplerConfig:
|
|
210
175
|
"""Configuration for image sampling."""
|
|
@@ -215,27 +180,17 @@ class ImageSamplerConfig:
|
|
|
215
180
|
seed: int = -1 # –1 for random
|
|
216
181
|
|
|
217
182
|
|
|
218
|
-
@dataclass
|
|
219
|
-
class ImageGenCreateInput:
|
|
220
|
-
"""Configuration for image generation."""
|
|
221
|
-
model_name: str
|
|
222
|
-
model_path: Path
|
|
223
|
-
config: ModelConfig
|
|
224
|
-
scheduler_config_path: Path
|
|
225
|
-
plugin_id: str
|
|
226
|
-
device_id: Optional[str] = None
|
|
227
|
-
|
|
228
|
-
|
|
229
183
|
@dataclass
|
|
230
184
|
class ImageGenerationConfig:
|
|
231
185
|
"""Configuration for image generation."""
|
|
232
|
-
prompts: List[str]
|
|
233
|
-
|
|
234
|
-
scheduler_config: SchedulerConfig
|
|
235
|
-
strength: float
|
|
236
|
-
negative_prompts: Optional[List[str]] = None
|
|
186
|
+
prompts: str | List[str]
|
|
187
|
+
negative_prompts: str | List[str] | None = None
|
|
237
188
|
height: int = 512
|
|
238
189
|
width: int = 512
|
|
190
|
+
sampler_config: Optional[ImageSamplerConfig] = None
|
|
191
|
+
lora_id: int = -1 # –1 for none
|
|
192
|
+
init_image: Optional[Image] = None
|
|
193
|
+
strength: float = 1.0
|
|
239
194
|
|
|
240
195
|
|
|
241
196
|
@dataclass
|
|
@@ -306,7 +261,7 @@ class TTSResult:
|
|
|
306
261
|
class BoundingBox:
|
|
307
262
|
"""Generic bounding box structure."""
|
|
308
263
|
x: float # X coordinate (normalized or pixel, depends on model)
|
|
309
|
-
y: float # Y coordinate (normalized or pixel, depends on model)
|
|
264
|
+
y: float # Y coordinate (normalized or pixel, depends on model)
|
|
310
265
|
width: float # Width
|
|
311
266
|
height: float # Height
|
|
312
267
|
|
|
@@ -320,8 +275,7 @@ class CVResult:
|
|
|
320
275
|
confidence: float = 0.0 # Confidence score [0.0-1.0]
|
|
321
276
|
bbox: Optional[BoundingBox] = None # Bounding box (example: YOLO)
|
|
322
277
|
text: Optional[str] = None # Text result (example: OCR)
|
|
323
|
-
# Feature embedding (example: CLIP embedding)
|
|
324
|
-
embedding: Optional[List[float]] = None
|
|
278
|
+
embedding: Optional[List[float]] = None # Feature embedding (example: CLIP embedding)
|
|
325
279
|
embedding_dim: int = 0 # Embedding dimension
|
|
326
280
|
|
|
327
281
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# Copyright © 2023-2024 Apple Inc.
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
import os
|
|
5
4
|
from typing import Optional
|
|
6
5
|
|
|
7
6
|
import mlx.core as mx
|
|
@@ -177,37 +176,19 @@ def _load_safetensor_weights(mapper, model, weight_file, float16: bool = False):
|
|
|
177
176
|
|
|
178
177
|
|
|
179
178
|
def _check_key(key: str, part: str):
|
|
180
|
-
# Check if it's a local path
|
|
181
|
-
if os.path.exists(key) or '/' in key or '\\' in key:
|
|
182
|
-
# For local paths, we'll use a default model structure
|
|
183
|
-
return
|
|
184
179
|
if key not in _MODELS:
|
|
185
180
|
raise ValueError(
|
|
186
181
|
f"[{part}] '{key}' model not found, choose one of {{{','.join(_MODELS.keys())}}}"
|
|
187
182
|
)
|
|
188
183
|
|
|
189
|
-
def _get_model_path(key: str, file_path: str):
|
|
190
|
-
"""Get the full path for a model file, supporting both local and HuggingFace paths"""
|
|
191
|
-
if os.path.exists(key) or '/' in key or '\\' in key:
|
|
192
|
-
# Local path
|
|
193
|
-
return os.path.join(key, file_path)
|
|
194
|
-
else:
|
|
195
|
-
# HuggingFace path
|
|
196
|
-
return hf_hub_download(key, file_path)
|
|
197
|
-
|
|
198
184
|
|
|
199
185
|
def load_unet(key: str = _DEFAULT_MODEL, float16: bool = False):
|
|
200
186
|
"""Load the stable diffusion UNet from Hugging Face Hub."""
|
|
201
187
|
_check_key(key, "load_unet")
|
|
202
188
|
|
|
203
|
-
#
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
unet_config = "unet/config.json"
|
|
207
|
-
else:
|
|
208
|
-
unet_config = _MODELS[key]["unet_config"]
|
|
209
|
-
|
|
210
|
-
with open(_get_model_path(key, unet_config)) as f:
|
|
189
|
+
# Download the config and create the model
|
|
190
|
+
unet_config = _MODELS[key]["unet_config"]
|
|
191
|
+
with open(hf_hub_download(key, unet_config)) as f:
|
|
211
192
|
config = json.load(f)
|
|
212
193
|
|
|
213
194
|
n_blocks = len(config["block_out_channels"])
|
|
@@ -238,13 +219,8 @@ def load_unet(key: str = _DEFAULT_MODEL, float16: bool = False):
|
|
|
238
219
|
)
|
|
239
220
|
|
|
240
221
|
# Download the weights and map them into the model
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
unet_weights = "unet/diffusion_pytorch_model.safetensors"
|
|
244
|
-
else:
|
|
245
|
-
unet_weights = _MODELS[key]["unet"]
|
|
246
|
-
|
|
247
|
-
weight_file = _get_model_path(key, unet_weights)
|
|
222
|
+
unet_weights = _MODELS[key]["unet"]
|
|
223
|
+
weight_file = hf_hub_download(key, unet_weights)
|
|
248
224
|
_load_safetensor_weights(map_unet_weights, model, weight_file, float16)
|
|
249
225
|
|
|
250
226
|
return model
|
|
@@ -262,13 +238,8 @@ def load_text_encoder(
|
|
|
262
238
|
config_key = config_key or (model_key + "_config")
|
|
263
239
|
|
|
264
240
|
# Download the config and create the model
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
text_encoder_config = f"{model_key}/config.json"
|
|
268
|
-
else:
|
|
269
|
-
text_encoder_config = _MODELS[key][config_key]
|
|
270
|
-
|
|
271
|
-
with open(_get_model_path(key, text_encoder_config)) as f:
|
|
241
|
+
text_encoder_config = _MODELS[key][config_key]
|
|
242
|
+
with open(hf_hub_download(key, text_encoder_config)) as f:
|
|
272
243
|
config = json.load(f)
|
|
273
244
|
|
|
274
245
|
with_projection = "WithProjection" in config["architectures"][0]
|
|
@@ -286,13 +257,8 @@ def load_text_encoder(
|
|
|
286
257
|
)
|
|
287
258
|
|
|
288
259
|
# Download the weights and map them into the model
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
text_encoder_weights = f"{model_key}/model.safetensors"
|
|
292
|
-
else:
|
|
293
|
-
text_encoder_weights = _MODELS[key][model_key]
|
|
294
|
-
|
|
295
|
-
weight_file = _get_model_path(key, text_encoder_weights)
|
|
260
|
+
text_encoder_weights = _MODELS[key][model_key]
|
|
261
|
+
weight_file = hf_hub_download(key, text_encoder_weights)
|
|
296
262
|
_load_safetensor_weights(map_clip_text_encoder_weights, model, weight_file, float16)
|
|
297
263
|
|
|
298
264
|
return model
|
|
@@ -303,13 +269,8 @@ def load_autoencoder(key: str = _DEFAULT_MODEL, float16: bool = False):
|
|
|
303
269
|
_check_key(key, "load_autoencoder")
|
|
304
270
|
|
|
305
271
|
# Download the config and create the model
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
vae_config = "vae/config.json"
|
|
309
|
-
else:
|
|
310
|
-
vae_config = _MODELS[key]["vae_config"]
|
|
311
|
-
|
|
312
|
-
with open(_get_model_path(key, vae_config)) as f:
|
|
272
|
+
vae_config = _MODELS[key]["vae_config"]
|
|
273
|
+
with open(hf_hub_download(key, vae_config)) as f:
|
|
313
274
|
config = json.load(f)
|
|
314
275
|
|
|
315
276
|
model = Autoencoder(
|
|
@@ -326,13 +287,8 @@ def load_autoencoder(key: str = _DEFAULT_MODEL, float16: bool = False):
|
|
|
326
287
|
)
|
|
327
288
|
|
|
328
289
|
# Download the weights and map them into the model
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
vae_weights = "vae/diffusion_pytorch_model.safetensors"
|
|
332
|
-
else:
|
|
333
|
-
vae_weights = _MODELS[key]["vae"]
|
|
334
|
-
|
|
335
|
-
weight_file = _get_model_path(key, vae_weights)
|
|
290
|
+
vae_weights = _MODELS[key]["vae"]
|
|
291
|
+
weight_file = hf_hub_download(key, vae_weights)
|
|
336
292
|
_load_safetensor_weights(map_vae_weights, model, weight_file, float16)
|
|
337
293
|
|
|
338
294
|
return model
|
|
@@ -342,13 +298,8 @@ def load_diffusion_config(key: str = _DEFAULT_MODEL):
|
|
|
342
298
|
"""Load the stable diffusion config from Hugging Face Hub."""
|
|
343
299
|
_check_key(key, "load_diffusion_config")
|
|
344
300
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
diffusion_config = "scheduler/scheduler_config.json"
|
|
348
|
-
else:
|
|
349
|
-
diffusion_config = _MODELS[key]["diffusion_config"]
|
|
350
|
-
|
|
351
|
-
with open(_get_model_path(key, diffusion_config)) as f:
|
|
301
|
+
diffusion_config = _MODELS[key]["diffusion_config"]
|
|
302
|
+
with open(hf_hub_download(key, diffusion_config)) as f:
|
|
352
303
|
config = json.load(f)
|
|
353
304
|
|
|
354
305
|
return DiffusionConfig(
|
|
@@ -366,17 +317,11 @@ def load_tokenizer(
|
|
|
366
317
|
):
|
|
367
318
|
_check_key(key, "load_tokenizer")
|
|
368
319
|
|
|
369
|
-
|
|
370
|
-
# Local path - use SDXL Turbo structure
|
|
371
|
-
vocab_file = _get_model_path(key, f"tokenizer/{vocab_key.split('_')[1]}.json")
|
|
372
|
-
merges_file = _get_model_path(key, f"tokenizer/{merges_key.split('_')[1]}.txt")
|
|
373
|
-
else:
|
|
374
|
-
vocab_file = _get_model_path(key, _MODELS[key][vocab_key])
|
|
375
|
-
merges_file = _get_model_path(key, _MODELS[key][merges_key])
|
|
376
|
-
|
|
320
|
+
vocab_file = hf_hub_download(key, _MODELS[key][vocab_key])
|
|
377
321
|
with open(vocab_file, encoding="utf-8") as f:
|
|
378
322
|
vocab = json.load(f)
|
|
379
323
|
|
|
324
|
+
merges_file = hf_hub_download(key, _MODELS[key][merges_key])
|
|
380
325
|
with open(merges_file, encoding="utf-8") as f:
|
|
381
326
|
bpe_merges = f.read().strip().split("\n")[1 : 49152 - 256 - 2 + 1]
|
|
382
327
|
bpe_merges = [tuple(m.split()) for m in bpe_merges]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
nexaai/__init__.py,sha256=jXdC4vv6DBK1fVewYTYSUhOOYfvf_Mk81UIeMGGIKUg,2029
|
|
2
|
-
nexaai/_stub.cpython-310-darwin.so,sha256=
|
|
3
|
-
nexaai/_version.py,sha256=
|
|
2
|
+
nexaai/_stub.cpython-310-darwin.so,sha256=bjv9gcR4eVwpi9riqlrWzsFcawoQgULf9ZcwFX0em3g,49832
|
|
3
|
+
nexaai/_version.py,sha256=rM7mysA3I6mJ-jXt3DwjydINMplIqs78L_Oid1wMpI8,144
|
|
4
4
|
nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
|
|
5
5
|
nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
|
|
6
6
|
nexaai/common.py,sha256=yBnIbqYaQYnfrl7IczOBh6MDibYZVxwaRJEglYcKgGs,3422
|
|
@@ -18,7 +18,7 @@ nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XE
|
|
|
18
18
|
nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
|
|
19
19
|
nexaai/binds/common_bind.cpython-310-darwin.so,sha256=KX_nfaQQPzxGv0GS0efcO-ByWai7y7RskMZvSAAaJWI,233960
|
|
20
20
|
nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=b2NoXFAJvPLi_P1X7lXLKmAUU0v2HJI3Zwa10gfqHdw,202032
|
|
21
|
-
nexaai/binds/libnexa_bridge.dylib,sha256=
|
|
21
|
+
nexaai/binds/libnexa_bridge.dylib,sha256=haJ62A2Xnv3ZQ76GIT-HTqXh9maiMpNTSaFCCTNIJko,250376
|
|
22
22
|
nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=aKcT2kW1PL1xPFX7vsT6Gs79ZydcVfg8bKtnEthRpI4,183008
|
|
23
23
|
nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=LGd-tykePnQFfGca25HnPIBfXsfrMzbwyx6d5Ld3xps,183000
|
|
24
24
|
nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=GyOkHOM-5uHp7NUZ4Sr9BWak6BYpcc9aqI9A-zPnQp4,629528
|
|
@@ -40,7 +40,7 @@ nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=ms34VYoD5AxZFG6cIG0QAJDjCt
|
|
|
40
40
|
nexaai/llm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
nexaai/llm_impl/mlx_llm_impl.py,sha256=4v7jUFzHfE7zw2uViekGQDaTROz8A6oaW31Z3iVe6tg,11157
|
|
42
42
|
nexaai/llm_impl/pybind_llm_impl.py,sha256=aooqkcXZWhCo07wbSafGgBrA3WnijtnUADShjjgFsBQ,8051
|
|
43
|
-
nexaai/mlx_backend/ml.py,sha256=
|
|
43
|
+
nexaai/mlx_backend/ml.py,sha256=LafDM_TeXmuQkld2tdQxUBGgooT0JPMXngLam2TADqU,23179
|
|
44
44
|
nexaai/mlx_backend/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
|
|
45
45
|
nexaai/mlx_backend/asr/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nujxkSM,451
|
|
46
46
|
nexaai/mlx_backend/asr/interface.py,sha256=pE5expr8sP7O9bEgWaUaJ4ITX0MsCxFovG9iVWqVDVU,4246
|
|
@@ -57,18 +57,6 @@ nexaai/mlx_backend/embedding/interface.py,sha256=M7AGiq_UVLNIi2Ie6H08ySnMxIjIhUl
|
|
|
57
57
|
nexaai/mlx_backend/embedding/main.py,sha256=xKRebBcooKuf8DzWKwCicftes3MAcYAd1QvcT9_AAPQ,6003
|
|
58
58
|
nexaai/mlx_backend/embedding/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
59
|
nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py,sha256=F9Z_9r-Dh0wNThiMp5W5hqE2dt5bf4ps5_c6h4BuWGw,15218
|
|
60
|
-
nexaai/mlx_backend/image_gen/__init__.py,sha256=8eFAF00-yWdEs0LJiszjHEsgPDAF8tSruBYvujCNgE0,42
|
|
61
|
-
nexaai/mlx_backend/image_gen/generate_sd.py,sha256=8DuPsJlxVf1LlFARVThiW807G-0cWPReYONj-x8qKRo,8616
|
|
62
|
-
nexaai/mlx_backend/image_gen/interface.py,sha256=7ElIiLm5gZXAfKPs497wLkn8qqLFLL5T-oV-hXVmEdw,3285
|
|
63
|
-
nexaai/mlx_backend/image_gen/main.py,sha256=jKkGDfqmGIsQwhHJaL_j_CTtr04xplD6dnnGe3AcilU,8123
|
|
64
|
-
nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py,sha256=wriLb0wA5vCBlCoQMtfKrVVWMJw8fhXCCk6R9_Nrb9c,9524
|
|
65
|
-
nexaai/mlx_backend/image_gen/stable_diffusion/clip.py,sha256=feHQXi1NiGa01AMo7nK8M-sgBoZBHI95xAGMfAv64kE,3733
|
|
66
|
-
nexaai/mlx_backend/image_gen/stable_diffusion/config.py,sha256=lq2sWXevpnCk8KXOywEVOzgFT6WBNnG4xr7NLWgidII,1773
|
|
67
|
-
nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py,sha256=ei-pdrqFxjeHLL6Ov5NZaHT5lceqZbHDyNFfUFUh_04,13755
|
|
68
|
-
nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py,sha256=nJxiSmF79E2aDGzLqygOat4zpgaTzfR7Kp1PD0bmhWA,3397
|
|
69
|
-
nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py,sha256=WMcmyQmb5MmhCp-NNU72Bqjq9vQvDzgR8KAfGKA2Qso,2991
|
|
70
|
-
nexaai/mlx_backend/image_gen/stable_diffusion/unet.py,sha256=rYj1-baAugL7CNyYYvLch891WUgpunhAJCVBNKxfZSA,14762
|
|
71
|
-
nexaai/mlx_backend/image_gen/stable_diffusion/vae.py,sha256=sQ1943x5SKyTmeL2xe3t84FL9DGO72Eab2m88r-BSe8,8044
|
|
72
60
|
nexaai/mlx_backend/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
61
|
nexaai/mlx_backend/llm/generate.py,sha256=Phes0tzxbbEWA2hDylQvD0LjorMaPwvcfZq9RKCAOt0,4399
|
|
74
62
|
nexaai/mlx_backend/llm/interface.py,sha256=SZFkuAUi2vxj_dSqj8RXf9vPTGMtpks_pZxxrF7iIe8,29330
|
|
@@ -236,7 +224,7 @@ nexaai/mlx_backend/sd/main.py,sha256=cHlZhG8KHDFXbYRH-diKA7B1Qacq1euuGw0pKeXJGAI
|
|
|
236
224
|
nexaai/mlx_backend/sd/modeling/__init__.py,sha256=wriLb0wA5vCBlCoQMtfKrVVWMJw8fhXCCk6R9_Nrb9c,9524
|
|
237
225
|
nexaai/mlx_backend/sd/modeling/clip.py,sha256=feHQXi1NiGa01AMo7nK8M-sgBoZBHI95xAGMfAv64kE,3733
|
|
238
226
|
nexaai/mlx_backend/sd/modeling/config.py,sha256=lq2sWXevpnCk8KXOywEVOzgFT6WBNnG4xr7NLWgidII,1773
|
|
239
|
-
nexaai/mlx_backend/sd/modeling/model_io.py,sha256=
|
|
227
|
+
nexaai/mlx_backend/sd/modeling/model_io.py,sha256=0jpMHJdMoTk0Jo4Uz1ZGiXekAWzPuTTD5g7ms4m4idY,11668
|
|
240
228
|
nexaai/mlx_backend/sd/modeling/sampler.py,sha256=nJxiSmF79E2aDGzLqygOat4zpgaTzfR7Kp1PD0bmhWA,3397
|
|
241
229
|
nexaai/mlx_backend/sd/modeling/tokenizer.py,sha256=WMcmyQmb5MmhCp-NNU72Bqjq9vQvDzgR8KAfGKA2Qso,2991
|
|
242
230
|
nexaai/mlx_backend/sd/modeling/unet.py,sha256=rYj1-baAugL7CNyYYvLch891WUgpunhAJCVBNKxfZSA,14762
|
|
@@ -387,7 +375,7 @@ nexaai/utils/quantization_utils.py,sha256=4gvp6UQfSO9G1FYBwnFtQspTzH9sDbi1PBXw2t
|
|
|
387
375
|
nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
388
376
|
nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
|
|
389
377
|
nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
|
|
390
|
-
nexaai-1.0.
|
|
391
|
-
nexaai-1.0.
|
|
392
|
-
nexaai-1.0.
|
|
393
|
-
nexaai-1.0.
|
|
378
|
+
nexaai-1.0.16rc10.dist-info/METADATA,sha256=_p5OXiFfM3Nb6BN_grqE9--aJ4A9bz_B84UPqWYxp9I,1202
|
|
379
|
+
nexaai-1.0.16rc10.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
|
|
380
|
+
nexaai-1.0.16rc10.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
|
|
381
|
+
nexaai-1.0.16rc10.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
# Image generation module for MLX backend
|
|
@@ -1,244 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import (
|
|
4
|
-
List,
|
|
5
|
-
Optional,
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
import mlx.core as mx
|
|
9
|
-
import numpy as np
|
|
10
|
-
from PIL import Image as PILImage
|
|
11
|
-
import mlx.nn as nn
|
|
12
|
-
import os
|
|
13
|
-
|
|
14
|
-
from .stable_diffusion import StableDiffusion, StableDiffusionXL
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class Image:
|
|
18
|
-
def __init__(self, data: List[float], width: int, height: int, channels: int) -> None:
|
|
19
|
-
"""Initialize an image with pixel data"""
|
|
20
|
-
self.data = data
|
|
21
|
-
self.width = width
|
|
22
|
-
self.height = height
|
|
23
|
-
self.channels = channels
|
|
24
|
-
|
|
25
|
-
@classmethod
|
|
26
|
-
def from_numpy(cls, array: np.ndarray) -> 'Image':
|
|
27
|
-
"""Create Image from numpy array (H, W, C)"""
|
|
28
|
-
height, width, channels = array.shape
|
|
29
|
-
data = array.flatten().tolist()
|
|
30
|
-
return cls(data, width, height, channels)
|
|
31
|
-
|
|
32
|
-
@classmethod
|
|
33
|
-
def from_pil(cls, pil_image: PILImage.Image) -> 'Image':
|
|
34
|
-
"""Create Image from PIL Image"""
|
|
35
|
-
array = np.array(pil_image).astype(np.float32) / 255.0
|
|
36
|
-
return cls.from_numpy(array)
|
|
37
|
-
|
|
38
|
-
def to_numpy(self) -> np.ndarray:
|
|
39
|
-
"""Convert to numpy array (H, W, C)"""
|
|
40
|
-
return np.array(self.data).reshape(self.height, self.width, self.channels)
|
|
41
|
-
|
|
42
|
-
def to_pil(self) -> PILImage.Image:
|
|
43
|
-
"""Convert to PIL Image"""
|
|
44
|
-
array = (self.to_numpy() * 255).astype(np.uint8)
|
|
45
|
-
return PILImage.fromarray(array)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class ImageSamplerConfig:
|
|
49
|
-
def __init__(
|
|
50
|
-
self,
|
|
51
|
-
method: str = "ddim",
|
|
52
|
-
steps: int = 4, # SDXL Turbo typically uses fewer steps
|
|
53
|
-
guidance_scale: float = 0.0, # SDXL Turbo works well with no guidance
|
|
54
|
-
eta: float = 0.0,
|
|
55
|
-
seed: int = -1,
|
|
56
|
-
) -> None:
|
|
57
|
-
"""Initialize sampler configuration optimized for SDXL Turbo"""
|
|
58
|
-
self.method = method
|
|
59
|
-
self.steps = steps
|
|
60
|
-
self.guidance_scale = guidance_scale
|
|
61
|
-
self.eta = eta
|
|
62
|
-
self.seed = seed
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class ImageGenerationConfig:
|
|
66
|
-
def __init__(
|
|
67
|
-
self,
|
|
68
|
-
prompts: str | List[str],
|
|
69
|
-
negative_prompts: str | List[str] | None = None,
|
|
70
|
-
height: int = 512,
|
|
71
|
-
width: int = 512,
|
|
72
|
-
sampler_config: Optional[ImageSamplerConfig] = None,
|
|
73
|
-
lora_id: int = -1, # Not used but kept for compatibility
|
|
74
|
-
init_image: Optional[Image] = None,
|
|
75
|
-
strength: float = 1.0,
|
|
76
|
-
n_images: int = 1,
|
|
77
|
-
n_rows: int = 1,
|
|
78
|
-
decoding_batch_size: int = 1,
|
|
79
|
-
) -> None:
|
|
80
|
-
"""Initialize image generation configuration"""
|
|
81
|
-
self.prompts = prompts
|
|
82
|
-
self.negative_prompts = negative_prompts or ""
|
|
83
|
-
self.height = height
|
|
84
|
-
self.width = width
|
|
85
|
-
self.sampler_config = sampler_config or ImageSamplerConfig()
|
|
86
|
-
self.lora_id = lora_id
|
|
87
|
-
self.init_image = init_image
|
|
88
|
-
self.strength = strength
|
|
89
|
-
self.n_images = n_images
|
|
90
|
-
self.n_rows = n_rows
|
|
91
|
-
self.decoding_batch_size = decoding_batch_size
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
class ImageGen:
|
|
95
|
-
def __init__(
|
|
96
|
-
self,
|
|
97
|
-
model_path: str,
|
|
98
|
-
scheduler_config_path: Optional[str] = None,
|
|
99
|
-
device: Optional[str] = None,
|
|
100
|
-
float16: bool = True,
|
|
101
|
-
quantize: bool = False,
|
|
102
|
-
) -> None:
|
|
103
|
-
"""Initialize the image generation model for SDXL Turbo"""
|
|
104
|
-
self.model_path = model_path
|
|
105
|
-
self.scheduler_config_path = scheduler_config_path
|
|
106
|
-
self.float16 = float16
|
|
107
|
-
self.quantize = quantize
|
|
108
|
-
self.model = None
|
|
109
|
-
|
|
110
|
-
@staticmethod
|
|
111
|
-
def load_model(model_path: str, float16: bool = True, quantize: bool = False) -> StableDiffusion:
|
|
112
|
-
"""Load a model from the given path - following txt2img.py pattern"""
|
|
113
|
-
|
|
114
|
-
# Check if it's a local path or HuggingFace repo
|
|
115
|
-
# If it contains path separators or exists as a file/directory, treat as local
|
|
116
|
-
is_local_path = (
|
|
117
|
-
'/' in model_path or '\\' in model_path or os.path.exists(model_path))
|
|
118
|
-
|
|
119
|
-
if is_local_path:
|
|
120
|
-
# For local paths, determine model type from the path or model files
|
|
121
|
-
if "xl" in model_path.lower() or "turbo" in model_path.lower():
|
|
122
|
-
model = StableDiffusionXL(model_path, float16=float16)
|
|
123
|
-
else:
|
|
124
|
-
model = StableDiffusion(model_path, float16=float16)
|
|
125
|
-
else:
|
|
126
|
-
# For HuggingFace repo names, use the original logic
|
|
127
|
-
if "xl" in model_path.lower() or "turbo" in model_path.lower():
|
|
128
|
-
model = StableDiffusionXL(model_path, float16=float16)
|
|
129
|
-
else:
|
|
130
|
-
model = StableDiffusion(model_path, float16=float16)
|
|
131
|
-
|
|
132
|
-
# Apply quantization if requested - same as txt2img.py
|
|
133
|
-
if quantize:
|
|
134
|
-
if "xl" in model_path.lower() or "turbo" in model_path.lower():
|
|
135
|
-
nn.quantize(
|
|
136
|
-
model.text_encoder_1, class_predicate=lambda _, m: isinstance(
|
|
137
|
-
m, nn.Linear)
|
|
138
|
-
)
|
|
139
|
-
nn.quantize(
|
|
140
|
-
model.text_encoder_2, class_predicate=lambda _, m: isinstance(
|
|
141
|
-
m, nn.Linear)
|
|
142
|
-
)
|
|
143
|
-
else:
|
|
144
|
-
nn.quantize(
|
|
145
|
-
model.text_encoder, class_predicate=lambda _, m: isinstance(
|
|
146
|
-
m, nn.Linear)
|
|
147
|
-
)
|
|
148
|
-
nn.quantize(model.unet, group_size=32, bits=8)
|
|
149
|
-
return model
|
|
150
|
-
|
|
151
|
-
def txt2img(self, prompt: str, config: ImageGenerationConfig, clear_cache: bool = True) -> Image:
|
|
152
|
-
"""Generate an image from a text prompt - following txt2img.py pattern"""
|
|
153
|
-
if not self.model:
|
|
154
|
-
self.model = self.load_model(self.model_path)
|
|
155
|
-
if not self.model:
|
|
156
|
-
raise RuntimeError("Model not loaded")
|
|
157
|
-
|
|
158
|
-
sampler_config = config.sampler_config
|
|
159
|
-
|
|
160
|
-
negative_prompt = ""
|
|
161
|
-
if config.negative_prompts:
|
|
162
|
-
negative_prompt = config.negative_prompts if isinstance(
|
|
163
|
-
config.negative_prompts, str) else config.negative_prompts[0]
|
|
164
|
-
|
|
165
|
-
# Generate latents - following txt2img.py approach
|
|
166
|
-
latents_generator = self.model.generate_latents(
|
|
167
|
-
prompt,
|
|
168
|
-
n_images=1,
|
|
169
|
-
num_steps=sampler_config.steps,
|
|
170
|
-
cfg_weight=sampler_config.guidance_scale,
|
|
171
|
-
negative_text=negative_prompt,
|
|
172
|
-
seed=sampler_config.seed if sampler_config.seed >= 0 else None
|
|
173
|
-
)
|
|
174
|
-
|
|
175
|
-
# Get final latents - following txt2img.py pattern
|
|
176
|
-
final_latents = None
|
|
177
|
-
for latents in latents_generator:
|
|
178
|
-
final_latents = latents
|
|
179
|
-
mx.eval(final_latents)
|
|
180
|
-
|
|
181
|
-
if final_latents is None:
|
|
182
|
-
raise RuntimeError("No latents generated")
|
|
183
|
-
|
|
184
|
-
# Decode to image - following txt2img.py pattern
|
|
185
|
-
decoded_image = self.model.decode(final_latents)
|
|
186
|
-
mx.eval(decoded_image)
|
|
187
|
-
|
|
188
|
-
# Convert to numpy array
|
|
189
|
-
image_array = np.array(decoded_image.squeeze(0))
|
|
190
|
-
|
|
191
|
-
if clear_cache:
|
|
192
|
-
mx.clear_cache()
|
|
193
|
-
|
|
194
|
-
return Image.from_numpy(image_array)
|
|
195
|
-
|
|
196
|
-
def img2img(self, init_image: Image, prompt: str, config: ImageGenerationConfig, clear_cache: bool = True) -> Image:
|
|
197
|
-
"""Generate an image from an initial image and a text prompt using SDXL Turbo"""
|
|
198
|
-
if not self.model:
|
|
199
|
-
self.model = self.load_model(self.model_path)
|
|
200
|
-
if not self.model:
|
|
201
|
-
raise RuntimeError("Model not loaded")
|
|
202
|
-
|
|
203
|
-
sampler_config = config.sampler_config
|
|
204
|
-
|
|
205
|
-
negative_prompt = ""
|
|
206
|
-
if config.negative_prompts:
|
|
207
|
-
negative_prompt = config.negative_prompts if isinstance(
|
|
208
|
-
config.negative_prompts, str) else config.negative_prompts[0]
|
|
209
|
-
|
|
210
|
-
img_tensor = _prepare_image_for_sd(
|
|
211
|
-
init_image, config.width, config.height)
|
|
212
|
-
|
|
213
|
-
# Generate latents from image
|
|
214
|
-
latents_generator = self.model.generate_latents_from_image(
|
|
215
|
-
img_tensor,
|
|
216
|
-
prompt,
|
|
217
|
-
n_images=1,
|
|
218
|
-
strength=config.strength,
|
|
219
|
-
num_steps=sampler_config.steps,
|
|
220
|
-
cfg_weight=sampler_config.guidance_scale,
|
|
221
|
-
negative_text=negative_prompt,
|
|
222
|
-
seed=sampler_config.seed if sampler_config.seed >= 0 else None
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
# Get final latents
|
|
226
|
-
final_latents = None
|
|
227
|
-
for latents in latents_generator:
|
|
228
|
-
final_latents = latents
|
|
229
|
-
mx.eval(final_latents)
|
|
230
|
-
|
|
231
|
-
if final_latents is None:
|
|
232
|
-
raise RuntimeError("No latents generated")
|
|
233
|
-
|
|
234
|
-
# Decode to image
|
|
235
|
-
decoded_image = self.model.decode(final_latents)
|
|
236
|
-
mx.eval(decoded_image)
|
|
237
|
-
|
|
238
|
-
# Convert to numpy array
|
|
239
|
-
image_array = np.array(decoded_image.squeeze(0))
|
|
240
|
-
|
|
241
|
-
if clear_cache:
|
|
242
|
-
mx.clear_cache()
|
|
243
|
-
|
|
244
|
-
return Image.from_numpy(image_array)
|