PyPI - nexaai - Versions diffs - 1.0.16rc10__cp310-cp310-macosx_14_0_universal2.whl → 1.0.16rc12__cp310-cp310-macosx_14_0_universal2.whl - Mend

nexaai 1.0.16rc10__cp310-cp310-macosx_14_0_universal2.whl → 1.0.16rc12__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (29) hide show

nexaai/__init__.py +7 -0
nexaai/_stub.cpython-310-darwin.so +0 -0
nexaai/_version.py +1 -1
nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
nexaai/binds/libnexa_bridge.dylib +0 -0
nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
nexaai/binds/nexa_mlx/py-lib/ml.py +60 -14
nexaai/log.py +92 -0
nexaai/mlx_backend/image_gen/__init__.py +1 -0
nexaai/mlx_backend/image_gen/generate_sd.py +244 -0
nexaai/mlx_backend/image_gen/interface.py +82 -0
nexaai/mlx_backend/image_gen/main.py +281 -0
nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py +306 -0
nexaai/mlx_backend/image_gen/stable_diffusion/clip.py +116 -0
nexaai/mlx_backend/image_gen/stable_diffusion/config.py +65 -0
nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py +386 -0
nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py +105 -0
nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py +100 -0
nexaai/mlx_backend/image_gen/stable_diffusion/unet.py +460 -0
nexaai/mlx_backend/image_gen/stable_diffusion/vae.py +274 -0
nexaai/mlx_backend/ml.py +60 -14
nexaai/mlx_backend/sd/modeling/model_io.py +72 -17
nexaai/runtime.py +4 -0
{nexaai-1.0.16rc10.dist-info → nexaai-1.0.16rc12.dist-info}/METADATA +1 -1
{nexaai-1.0.16rc10.dist-info → nexaai-1.0.16rc12.dist-info}/RECORD +29 -16
{nexaai-1.0.16rc10.dist-info → nexaai-1.0.16rc12.dist-info}/WHEEL +0 -0
{nexaai-1.0.16rc10.dist-info → nexaai-1.0.16rc12.dist-info}/top_level.txt +0 -0

nexaai/mlx_backend/image_gen/stable_diffusion/vae.py ADDED Viewed

@@ -0,0 +1,274 @@
+# Copyright © 2023 Apple Inc.
+import math
+from typing import List
+import mlx.core as mx
+import mlx.nn as nn
+from .config import AutoencoderConfig
+from .unet import ResnetBlock2D, upsample_nearest
+class Attention(nn.Module):
+    """A single head unmasked attention for use with the VAE."""
+    def __init__(self, dims: int, norm_groups: int = 32):
+        super().__init__()
+        self.group_norm = nn.GroupNorm(norm_groups, dims, pytorch_compatible=True)
+        self.query_proj = nn.Linear(dims, dims)
+        self.key_proj = nn.Linear(dims, dims)
+        self.value_proj = nn.Linear(dims, dims)
+        self.out_proj = nn.Linear(dims, dims)
+    def __call__(self, x):
+        B, H, W, C = x.shape
+        y = self.group_norm(x)
+        queries = self.query_proj(y).reshape(B, H * W, C)
+        keys = self.key_proj(y).reshape(B, H * W, C)
+        values = self.value_proj(y).reshape(B, H * W, C)
+        scale = 1 / math.sqrt(queries.shape[-1])
+        scores = (queries * scale) @ keys.transpose(0, 2, 1)
+        attn = mx.softmax(scores, axis=-1)
+        y = (attn @ values).reshape(B, H, W, C)
+        y = self.out_proj(y)
+        x = x + y
+        return x
+class EncoderDecoderBlock2D(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        num_layers: int = 1,
+        resnet_groups: int = 32,
+        add_downsample=True,
+        add_upsample=True,
+    ):
+        super().__init__()
+        # Add the resnet blocks
+        self.resnets = [
+            ResnetBlock2D(
+                in_channels=in_channels if i == 0 else out_channels,
+                out_channels=out_channels,
+                groups=resnet_groups,
+            )
+            for i in range(num_layers)
+        ]
+        # Add an optional downsampling layer
+        if add_downsample:
+            self.downsample = nn.Conv2d(
+                out_channels, out_channels, kernel_size=3, stride=2, padding=0
+            )
+        # or upsampling layer
+        if add_upsample:
+            self.upsample = nn.Conv2d(
+                out_channels, out_channels, kernel_size=3, stride=1, padding=1
+            )
+    def __call__(self, x):
+        for resnet in self.resnets:
+            x = resnet(x)
+        if "downsample" in self:
+            x = mx.pad(x, [(0, 0), (0, 1), (0, 1), (0, 0)])
+            x = self.downsample(x)
+        if "upsample" in self:
+            x = self.upsample(upsample_nearest(x))
+        return x
+class Encoder(nn.Module):
+    """Implements the encoder side of the Autoencoder."""
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        block_out_channels: List[int] = [64],
+        layers_per_block: int = 2,
+        resnet_groups: int = 32,
+    ):
+        super().__init__()
+        self.conv_in = nn.Conv2d(
+            in_channels, block_out_channels[0], kernel_size=3, stride=1, padding=1
+        )
+        channels = [block_out_channels[0]] + list(block_out_channels)
+        self.down_blocks = [
+            EncoderDecoderBlock2D(
+                in_channels,
+                out_channels,
+                num_layers=layers_per_block,
+                resnet_groups=resnet_groups,
+                add_downsample=i < len(block_out_channels) - 1,
+                add_upsample=False,
+            )
+            for i, (in_channels, out_channels) in enumerate(zip(channels, channels[1:]))
+        ]
+        self.mid_blocks = [
+            ResnetBlock2D(
+                in_channels=block_out_channels[-1],
+                out_channels=block_out_channels[-1],
+                groups=resnet_groups,
+            ),
+            Attention(block_out_channels[-1], resnet_groups),
+            ResnetBlock2D(
+                in_channels=block_out_channels[-1],
+                out_channels=block_out_channels[-1],
+                groups=resnet_groups,
+            ),
+        ]
+        self.conv_norm_out = nn.GroupNorm(
+            resnet_groups, block_out_channels[-1], pytorch_compatible=True
+        )
+        self.conv_out = nn.Conv2d(block_out_channels[-1], out_channels, 3, padding=1)
+    def __call__(self, x):
+        x = self.conv_in(x)
+        for l in self.down_blocks:
+            x = l(x)
+        x = self.mid_blocks[0](x)
+        x = self.mid_blocks[1](x)
+        x = self.mid_blocks[2](x)
+        x = self.conv_norm_out(x)
+        x = nn.silu(x)
+        x = self.conv_out(x)
+        return x
+class Decoder(nn.Module):
+    """Implements the decoder side of the Autoencoder."""
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        block_out_channels: List[int] = [64],
+        layers_per_block: int = 2,
+        resnet_groups: int = 32,
+    ):
+        super().__init__()
+        self.conv_in = nn.Conv2d(
+            in_channels, block_out_channels[-1], kernel_size=3, stride=1, padding=1
+        )
+        self.mid_blocks = [
+            ResnetBlock2D(
+                in_channels=block_out_channels[-1],
+                out_channels=block_out_channels[-1],
+                groups=resnet_groups,
+            ),
+            Attention(block_out_channels[-1], resnet_groups),
+            ResnetBlock2D(
+                in_channels=block_out_channels[-1],
+                out_channels=block_out_channels[-1],
+                groups=resnet_groups,
+            ),
+        ]
+        channels = list(reversed(block_out_channels))
+        channels = [channels[0]] + channels
+        self.up_blocks = [
+            EncoderDecoderBlock2D(
+                in_channels,
+                out_channels,
+                num_layers=layers_per_block,
+                resnet_groups=resnet_groups,
+                add_downsample=False,
+                add_upsample=i < len(block_out_channels) - 1,
+            )
+            for i, (in_channels, out_channels) in enumerate(zip(channels, channels[1:]))
+        ]
+        self.conv_norm_out = nn.GroupNorm(
+            resnet_groups, block_out_channels[0], pytorch_compatible=True
+        )
+        self.conv_out = nn.Conv2d(block_out_channels[0], out_channels, 3, padding=1)
+    def __call__(self, x):
+        x = self.conv_in(x)
+        x = self.mid_blocks[0](x)
+        x = self.mid_blocks[1](x)
+        x = self.mid_blocks[2](x)
+        for l in self.up_blocks:
+            x = l(x)
+        x = self.conv_norm_out(x)
+        x = nn.silu(x)
+        x = self.conv_out(x)
+        return x
+class Autoencoder(nn.Module):
+    """The autoencoder that allows us to perform diffusion in the latent space."""
+    def __init__(self, config: AutoencoderConfig):
+        super().__init__()
+        self.latent_channels = config.latent_channels_in
+        self.scaling_factor = config.scaling_factor
+        self.encoder = Encoder(
+            config.in_channels,
+            config.latent_channels_out,
+            config.block_out_channels,
+            config.layers_per_block,
+            resnet_groups=config.norm_num_groups,
+        )
+        self.decoder = Decoder(
+            config.latent_channels_in,
+            config.out_channels,
+            config.block_out_channels,
+            config.layers_per_block + 1,
+            resnet_groups=config.norm_num_groups,
+        )
+        self.quant_proj = nn.Linear(
+            config.latent_channels_out, config.latent_channels_out
+        )
+        self.post_quant_proj = nn.Linear(
+            config.latent_channels_in, config.latent_channels_in
+        )
+    def decode(self, z):
+        z = z / self.scaling_factor
+        return self.decoder(self.post_quant_proj(z))
+    def encode(self, x):
+        x = self.encoder(x)
+        x = self.quant_proj(x)
+        mean, logvar = x.split(2, axis=-1)
+        mean = mean * self.scaling_factor
+        logvar = logvar + 2 * math.log(self.scaling_factor)
+        return mean, logvar
+    def __call__(self, x, key=None):
+        mean, logvar = self.encode(x)
+        z = mx.random.normal(mean.shape, key=key) * mx.exp(0.5 * logvar) + mean
+        x_hat = self.decode(z)
+        return dict(x_hat=x_hat, z=z, mean=mean, logvar=logvar)

nexaai/mlx_backend/ml.py CHANGED Viewed

@@ -1,6 +1,9 @@
 # This file defines the python interface that c-lib expects from a python backend
 from __future__ import annotations
+from typing import Optional
+from pathlib import Path
+from dataclasses import dataclass
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
@@ -101,9 +104,12 @@ class ModelConfig:
     n_threads_batch: int = 0  # number of threads to use for batch processing
     n_batch: int = 0  # logical maximum batch size that can be submitted to llama_decode
     n_ubatch: int = 0  # physical maximum batch size
-    n_seq_max: int = 0  # max number of sequences (i.e. distinct states for recurrent models)
-    chat_template_path: Optional[Path] = None  # path to chat template file, optional
-    chat_template_content: Optional[str] = None  # content of chat template file, optional
+    # max number of sequences (i.e. distinct states for recurrent models)
+    n_seq_max: int = 0
+    # path to chat template file, optional
+    chat_template_path: Optional[Path] = None
+    # content of chat template file, optional
+    chat_template_content: Optional[str] = None
 @dataclass
@@ -118,7 +124,8 @@ class SamplerConfig:
     frequency_penalty: float = 0.0
     seed: int = -1  # –1 for random
     grammar_path: Optional[Path] = None
-    grammar_string: Optional[str] = None  # Optional grammar string (BNF-like format)
+    # Optional grammar string (BNF-like format)
+    grammar_string: Optional[str] = None
 @dataclass
@@ -128,8 +135,10 @@ class GenerationConfig:
     stop: Sequence[str] = field(default_factory=tuple)
     n_past: int = 0
     sampler_config: Optional[SamplerConfig] = None
-    image_paths: Optional[Sequence[Path]] = None  # Array of image paths for VLM (None if none)
-    audio_paths: Optional[Sequence[Path]] = None  # Array of audio paths for VLM (None if none)
+    # Array of image paths for VLM (None if none)
+    image_paths: Optional[Sequence[Path]] = None
+    # Array of audio paths for VLM (None if none)
+    audio_paths: Optional[Sequence[Path]] = None
 @dataclass
@@ -170,6 +179,32 @@ class RerankConfig:
     normalize_method: str = "softmax"  # "softmax" | "min-max" | "none"
+# image-gen
+@dataclass
+class ImageGenTxt2ImgInput:
+    """Input structure for text-to-image generation."""
+    prompt: str
+    config: ImageGenerationConfig
+    output_path: Optional[Path] = None
+@dataclass
+class ImageGenImg2ImgInput:
+    """Input structure for image-to-image generation."""
+    init_image_path: Path
+    prompt: str
+    config: ImageGenerationConfig
+    output_path: Optional[Path] = None
+@dataclass
+class ImageGenOutput:
+    """Output structure for image generation."""
+    output_image_path: Path
 @dataclass
 class ImageSamplerConfig:
     """Configuration for image sampling."""
@@ -180,17 +215,27 @@ class ImageSamplerConfig:
     seed: int = -1  # –1 for random
+@dataclass
+class ImageGenCreateInput:
+    """Configuration for image generation."""
+    model_name: str
+    model_path: Path
+    config: ModelConfig
+    scheduler_config_path: Path
+    plugin_id: str
+    device_id: Optional[str] = None
 @dataclass
 class ImageGenerationConfig:
     """Configuration for image generation."""
-    prompts: str | List[str]
-    negative_prompts: str | List[str] | None = None
+    prompts: List[str]
+    sampler_config: ImageSamplerConfig
+    scheduler_config: SchedulerConfig
+    strength: float
+    negative_prompts: Optional[List[str]] = None
     height: int = 512
     width: int = 512
-    sampler_config: Optional[ImageSamplerConfig] = None
-    lora_id: int = -1  # –1 for none
-    init_image: Optional[Image] = None
-    strength: float = 1.0
 @dataclass
@@ -261,7 +306,7 @@ class TTSResult:
 class BoundingBox:
     """Generic bounding box structure."""
     x: float  # X coordinate (normalized or pixel, depends on model)
-    y: float  # Y coordinate (normalized or pixel, depends on model)
+    y: float  # Y coordinate (normalized or pixel, depends on model)
     width: float  # Width
     height: float  # Height
@@ -275,7 +320,8 @@ class CVResult:
     confidence: float = 0.0  # Confidence score [0.0-1.0]
     bbox: Optional[BoundingBox] = None  # Bounding box (example: YOLO)
     text: Optional[str] = None  # Text result (example: OCR)
-    embedding: Optional[List[float]] = None  # Feature embedding (example: CLIP embedding)
+    # Feature embedding (example: CLIP embedding)
+    embedding: Optional[List[float]] = None
     embedding_dim: int = 0  # Embedding dimension

nexaai/mlx_backend/sd/modeling/model_io.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # Copyright © 2023-2024 Apple Inc.
 import json
+import os
 from typing import Optional
 import mlx.core as mx
@@ -176,19 +177,37 @@ def _load_safetensor_weights(mapper, model, weight_file, float16: bool = False):
 def _check_key(key: str, part: str):
+    # Check if it's a local path
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # For local paths, we'll use a default model structure
+        return
     if key not in _MODELS:
         raise ValueError(
             f"[{part}] '{key}' model not found, choose one of {{{','.join(_MODELS.keys())}}}"
         )
+def _get_model_path(key: str, file_path: str):
+    """Get the full path for a model file, supporting both local and HuggingFace paths"""
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # Local path
+        return os.path.join(key, file_path)
+    else:
+        # HuggingFace path
+        return hf_hub_download(key, file_path)
 def load_unet(key: str = _DEFAULT_MODEL, float16: bool = False):
     """Load the stable diffusion UNet from Hugging Face Hub."""
     _check_key(key, "load_unet")
-    # Download the config and create the model
-    unet_config = _MODELS[key]["unet_config"]
-    with open(hf_hub_download(key, unet_config)) as f:
+    # Get the config path
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # Local path - use SDXL Turbo structure
+        unet_config = "unet/config.json"
+    else:
+        unet_config = _MODELS[key]["unet_config"]
+    with open(_get_model_path(key, unet_config)) as f:
         config = json.load(f)
     n_blocks = len(config["block_out_channels"])
@@ -219,8 +238,13 @@ def load_unet(key: str = _DEFAULT_MODEL, float16: bool = False):
     )
     # Download the weights and map them into the model
-    unet_weights = _MODELS[key]["unet"]
-    weight_file = hf_hub_download(key, unet_weights)
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # Local path - use SDXL Turbo structure
+        unet_weights = "unet/diffusion_pytorch_model.safetensors"
+    else:
+        unet_weights = _MODELS[key]["unet"]
+    weight_file = _get_model_path(key, unet_weights)
     _load_safetensor_weights(map_unet_weights, model, weight_file, float16)
     return model
@@ -238,8 +262,13 @@ def load_text_encoder(
     config_key = config_key or (model_key + "_config")
     # Download the config and create the model
-    text_encoder_config = _MODELS[key][config_key]
-    with open(hf_hub_download(key, text_encoder_config)) as f:
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # Local path - use SDXL Turbo structure
+        text_encoder_config = f"{model_key}/config.json"
+    else:
+        text_encoder_config = _MODELS[key][config_key]
+    with open(_get_model_path(key, text_encoder_config)) as f:
         config = json.load(f)
     with_projection = "WithProjection" in config["architectures"][0]
@@ -257,8 +286,13 @@ def load_text_encoder(
     )
     # Download the weights and map them into the model
-    text_encoder_weights = _MODELS[key][model_key]
-    weight_file = hf_hub_download(key, text_encoder_weights)
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # Local path - use SDXL Turbo structure
+        text_encoder_weights = f"{model_key}/model.safetensors"
+    else:
+        text_encoder_weights = _MODELS[key][model_key]
+    weight_file = _get_model_path(key, text_encoder_weights)
     _load_safetensor_weights(map_clip_text_encoder_weights, model, weight_file, float16)
     return model
@@ -269,8 +303,13 @@ def load_autoencoder(key: str = _DEFAULT_MODEL, float16: bool = False):
     _check_key(key, "load_autoencoder")
     # Download the config and create the model
-    vae_config = _MODELS[key]["vae_config"]
-    with open(hf_hub_download(key, vae_config)) as f:
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # Local path - use SDXL Turbo structure
+        vae_config = "vae/config.json"
+    else:
+        vae_config = _MODELS[key]["vae_config"]
+    with open(_get_model_path(key, vae_config)) as f:
         config = json.load(f)
     model = Autoencoder(
@@ -287,8 +326,13 @@ def load_autoencoder(key: str = _DEFAULT_MODEL, float16: bool = False):
     )
     # Download the weights and map them into the model
-    vae_weights = _MODELS[key]["vae"]
-    weight_file = hf_hub_download(key, vae_weights)
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # Local path - use SDXL Turbo structure
+        vae_weights = "vae/diffusion_pytorch_model.safetensors"
+    else:
+        vae_weights = _MODELS[key]["vae"]
+    weight_file = _get_model_path(key, vae_weights)
     _load_safetensor_weights(map_vae_weights, model, weight_file, float16)
     return model
@@ -298,8 +342,13 @@ def load_diffusion_config(key: str = _DEFAULT_MODEL):
     """Load the stable diffusion config from Hugging Face Hub."""
     _check_key(key, "load_diffusion_config")
-    diffusion_config = _MODELS[key]["diffusion_config"]
-    with open(hf_hub_download(key, diffusion_config)) as f:
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # Local path - use SDXL Turbo structure
+        diffusion_config = "scheduler/scheduler_config.json"
+    else:
+        diffusion_config = _MODELS[key]["diffusion_config"]
+    with open(_get_model_path(key, diffusion_config)) as f:
         config = json.load(f)
     return DiffusionConfig(
@@ -317,11 +366,17 @@ def load_tokenizer(
 ):
     _check_key(key, "load_tokenizer")
-    vocab_file = hf_hub_download(key, _MODELS[key][vocab_key])
+    if os.path.exists(key) or '/' in key or '\\' in key:
+        # Local path - use SDXL Turbo structure
+        vocab_file = _get_model_path(key, f"tokenizer/{vocab_key.split('_')[1]}.json")
+        merges_file = _get_model_path(key, f"tokenizer/{merges_key.split('_')[1]}.txt")
+    else:
+        vocab_file = _get_model_path(key, _MODELS[key][vocab_key])
+        merges_file = _get_model_path(key, _MODELS[key][merges_key])
     with open(vocab_file, encoding="utf-8") as f:
         vocab = json.load(f)
-    merges_file = hf_hub_download(key, _MODELS[key][merges_key])
     with open(merges_file, encoding="utf-8") as f:
         bpe_merges = f.read().strip().split("\n")[1 : 49152 - 256 - 2 + 1]
     bpe_merges = [tuple(m.split()) for m in bpe_merges]

nexaai/runtime.py CHANGED Viewed

@@ -28,6 +28,10 @@ def _shutdown_runtime() -> None:
 # Public helper so advanced users can reclaim memory on demand
 shutdown = _shutdown_runtime
+def is_initialized() -> bool:
+    """Check if the runtime has been initialized."""
+    return _runtime_alive
 # ----------------------------------------------------------------------
 # Single public class
 # ----------------------------------------------------------------------

{nexaai-1.0.16rc10.dist-info → nexaai-1.0.16rc12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nexaai
-Version: 1.0.16rc10
+Version: 1.0.16rc12
 Summary: Python bindings for NexaSDK C-lib backend
 Author-email: "Nexa AI, Inc." <dev@nexa.ai>
 Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge