PyPI - nexaai - Versions diffs - 1.0.16rc5__cp310-cp310-macosx_14_0_universal2.whl → 1.0.16rc7__cp310-cp310-macosx_14_0_universal2.whl - Mend

nexaai 1.0.16rc5__cp310-cp310-macosx_14_0_universal2.whl → 1.0.16rc7__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (23) hide show

nexaai/_stub.cpython-310-darwin.so +0 -0
nexaai/_version.py +1 -1
nexaai/binds/libnexa_bridge.dylib +0 -0
nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
nexaai/binds/nexa_mlx/py-lib/ml.py +60 -14
nexaai/mlx_backend/image_gen/__init__.py +1 -0
nexaai/mlx_backend/image_gen/generate_sd.py +244 -0
nexaai/mlx_backend/image_gen/interface.py +82 -0
nexaai/mlx_backend/image_gen/main.py +281 -0
nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py +306 -0
nexaai/mlx_backend/image_gen/stable_diffusion/clip.py +116 -0
nexaai/mlx_backend/image_gen/stable_diffusion/config.py +65 -0
nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py +386 -0
nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py +105 -0
nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py +100 -0
nexaai/mlx_backend/image_gen/stable_diffusion/unet.py +460 -0
nexaai/mlx_backend/image_gen/stable_diffusion/vae.py +274 -0
nexaai/mlx_backend/ml.py +60 -14
nexaai/mlx_backend/sd/modeling/model_io.py +72 -17
{nexaai-1.0.16rc5.dist-info → nexaai-1.0.16rc7.dist-info}/METADATA +1 -1
{nexaai-1.0.16rc5.dist-info → nexaai-1.0.16rc7.dist-info}/RECORD +23 -11
{nexaai-1.0.16rc5.dist-info → nexaai-1.0.16rc7.dist-info}/WHEEL +0 -0
{nexaai-1.0.16rc5.dist-info → nexaai-1.0.16rc7.dist-info}/top_level.txt +0 -0

nexaai/_stub.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # This file is generated by CMake from _version.py.in
 # Do not modify this file manually - it will be overwritten
-__version__ = "1.0.16-rc5"
+__version__ = "1.0.16-rc7"

nexaai/binds/libnexa_bridge.dylib CHANGED Viewed

Binary file

nexaai/binds/nexa_mlx/libnexa_plugin.dylib CHANGED Viewed

Binary file

nexaai/binds/nexa_mlx/py-lib/ml.py CHANGED Viewed

@@ -1,6 +1,9 @@
 # This file defines the python interface that c-lib expects from a python backend
 from __future__ import annotations
+from typing import Optional
+from pathlib import Path
+from dataclasses import dataclass
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
@@ -101,9 +104,12 @@ class ModelConfig:
     n_threads_batch: int = 0  # number of threads to use for batch processing
     n_batch: int = 0  # logical maximum batch size that can be submitted to llama_decode
     n_ubatch: int = 0  # physical maximum batch size
-    n_seq_max: int = 0  # max number of sequences (i.e. distinct states for recurrent models)
-    chat_template_path: Optional[Path] = None  # path to chat template file, optional
-    chat_template_content: Optional[str] = None  # content of chat template file, optional
+    # max number of sequences (i.e. distinct states for recurrent models)
+    n_seq_max: int = 0
+    # path to chat template file, optional
+    chat_template_path: Optional[Path] = None
+    # content of chat template file, optional
+    chat_template_content: Optional[str] = None
 @dataclass
@@ -118,7 +124,8 @@ class SamplerConfig:
     frequency_penalty: float = 0.0
     seed: int = -1  # –1 for random
     grammar_path: Optional[Path] = None
-    grammar_string: Optional[str] = None  # Optional grammar string (BNF-like format)
+    # Optional grammar string (BNF-like format)
+    grammar_string: Optional[str] = None
 @dataclass
@@ -128,8 +135,10 @@ class GenerationConfig:
     stop: Sequence[str] = field(default_factory=tuple)
     n_past: int = 0
     sampler_config: Optional[SamplerConfig] = None
-    image_paths: Optional[Sequence[Path]] = None  # Array of image paths for VLM (None if none)
-    audio_paths: Optional[Sequence[Path]] = None  # Array of audio paths for VLM (None if none)
+    # Array of image paths for VLM (None if none)
+    image_paths: Optional[Sequence[Path]] = None
+    # Array of audio paths for VLM (None if none)
+    audio_paths: Optional[Sequence[Path]] = None
 @dataclass
@@ -170,6 +179,32 @@ class RerankConfig:
     normalize_method: str = "softmax"  # "softmax" | "min-max" | "none"
+# image-gen
+@dataclass
+class ImageGenTxt2ImgInput:
+    """Input structure for text-to-image generation."""
+    prompt: str
+    config: ImageGenerationConfig
+    output_path: Optional[Path] = None
+@dataclass
+class ImageGenImg2ImgInput:
+    """Input structure for image-to-image generation."""
+    init_image_path: Path
+    prompt: str
+    config: ImageGenerationConfig
+    output_path: Optional[Path] = None
+@dataclass
+class ImageGenOutput:
+    """Output structure for image generation."""
+    output_image_path: Path
 @dataclass
 class ImageSamplerConfig:
     """Configuration for image sampling."""
@@ -180,17 +215,27 @@ class ImageSamplerConfig:
     seed: int = -1  # –1 for random
+@dataclass
+class ImageGenCreateInput:
+    """Configuration for image generation."""
+    model_name: str
+    model_path: Path
+    config: ModelConfig
+    scheduler_config_path: Path
+    plugin_id: str
+    device_id: Optional[str] = None
 @dataclass
 class ImageGenerationConfig:
     """Configuration for image generation."""
-    prompts: str | List[str]
-    negative_prompts: str | List[str] | None = None
+    prompts: List[str]
+    sampler_config: ImageSamplerConfig
+    scheduler_config: SchedulerConfig
+    strength: float
+    negative_prompts: Optional[List[str]] = None
     height: int = 512
     width: int = 512
-    sampler_config: Optional[ImageSamplerConfig] = None
-    lora_id: int = -1  # –1 for none
-    init_image: Optional[Image] = None
-    strength: float = 1.0
 @dataclass
@@ -261,7 +306,7 @@ class TTSResult:
 class BoundingBox:
     """Generic bounding box structure."""
     x: float  # X coordinate (normalized or pixel, depends on model)
-    y: float  # Y coordinate (normalized or pixel, depends on model)
+    y: float  # Y coordinate (normalized or pixel, depends on model)
     width: float  # Width
     height: float  # Height
@@ -275,7 +320,8 @@ class CVResult:
     confidence: float = 0.0  # Confidence score [0.0-1.0]
     bbox: Optional[BoundingBox] = None  # Bounding box (example: YOLO)
     text: Optional[str] = None  # Text result (example: OCR)
-    embedding: Optional[List[float]] = None  # Feature embedding (example: CLIP embedding)
+    # Feature embedding (example: CLIP embedding)
+    embedding: Optional[List[float]] = None
     embedding_dim: int = 0  # Embedding dimension

nexaai/mlx_backend/image_gen/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Image generation module for MLX backend

nexaai/mlx_backend/image_gen/generate_sd.py ADDED Viewed

@@ -0,0 +1,244 @@
+from __future__ import annotations
+from typing import (
+    List,
+    Optional,
+)
+import mlx.core as mx
+import numpy as np
+from PIL import Image as PILImage
+import mlx.nn as nn
+import os
+from .stable_diffusion import StableDiffusion, StableDiffusionXL
+class Image:
+    def __init__(self, data: List[float], width: int, height: int, channels: int) -> None:
+        """Initialize an image with pixel data"""
+        self.data = data
+        self.width = width
+        self.height = height
+        self.channels = channels
+    @classmethod
+    def from_numpy(cls, array: np.ndarray) -> 'Image':
+        """Create Image from numpy array (H, W, C)"""
+        height, width, channels = array.shape
+        data = array.flatten().tolist()
+        return cls(data, width, height, channels)
+    @classmethod
+    def from_pil(cls, pil_image: PILImage.Image) -> 'Image':
+        """Create Image from PIL Image"""
+        array = np.array(pil_image).astype(np.float32) / 255.0
+        return cls.from_numpy(array)
+    def to_numpy(self) -> np.ndarray:
+        """Convert to numpy array (H, W, C)"""
+        return np.array(self.data).reshape(self.height, self.width, self.channels)
+    def to_pil(self) -> PILImage.Image:
+        """Convert to PIL Image"""
+        array = (self.to_numpy() * 255).astype(np.uint8)
+        return PILImage.fromarray(array)
+class ImageSamplerConfig:
+    def __init__(
+        self,
+        method: str = "ddim",
+        steps: int = 4,  # SDXL Turbo typically uses fewer steps
+        guidance_scale: float = 0.0,  # SDXL Turbo works well with no guidance
+        eta: float = 0.0,
+        seed: int = -1,
+    ) -> None:
+        """Initialize sampler configuration optimized for SDXL Turbo"""
+        self.method = method
+        self.steps = steps
+        self.guidance_scale = guidance_scale
+        self.eta = eta
+        self.seed = seed
+class ImageGenerationConfig:
+    def __init__(
+        self,
+        prompts: str | List[str],
+        negative_prompts: str | List[str] | None = None,
+        height: int = 512,
+        width: int = 512,
+        sampler_config: Optional[ImageSamplerConfig] = None,
+        lora_id: int = -1,  # Not used but kept for compatibility
+        init_image: Optional[Image] = None,
+        strength: float = 1.0,
+        n_images: int = 1,
+        n_rows: int = 1,
+        decoding_batch_size: int = 1,
+    ) -> None:
+        """Initialize image generation configuration"""
+        self.prompts = prompts
+        self.negative_prompts = negative_prompts or ""
+        self.height = height
+        self.width = width
+        self.sampler_config = sampler_config or ImageSamplerConfig()
+        self.lora_id = lora_id
+        self.init_image = init_image
+        self.strength = strength
+        self.n_images = n_images
+        self.n_rows = n_rows
+        self.decoding_batch_size = decoding_batch_size
+class ImageGen:
+    def __init__(
+        self,
+        model_path: str,
+        scheduler_config_path: Optional[str] = None,
+        device: Optional[str] = None,
+        float16: bool = True,
+        quantize: bool = False,
+    ) -> None:
+        """Initialize the image generation model for SDXL Turbo"""
+        self.model_path = model_path
+        self.scheduler_config_path = scheduler_config_path
+        self.float16 = float16
+        self.quantize = quantize
+        self.model = None
+    @staticmethod
+    def load_model(model_path: str, float16: bool = True, quantize: bool = False) -> StableDiffusion:
+        """Load a model from the given path - following txt2img.py pattern"""
+        # Check if it's a local path or HuggingFace repo
+        # If it contains path separators or exists as a file/directory, treat as local
+        is_local_path = (
+            '/' in model_path or '\\' in model_path or os.path.exists(model_path))
+        if is_local_path:
+            # For local paths, determine model type from the path or model files
+            if "xl" in model_path.lower() or "turbo" in model_path.lower():
+                model = StableDiffusionXL(model_path, float16=float16)
+            else:
+                model = StableDiffusion(model_path, float16=float16)
+        else:
+            # For HuggingFace repo names, use the original logic
+            if "xl" in model_path.lower() or "turbo" in model_path.lower():
+                model = StableDiffusionXL(model_path, float16=float16)
+            else:
+                model = StableDiffusion(model_path, float16=float16)
+        # Apply quantization if requested - same as txt2img.py
+        if quantize:
+            if "xl" in model_path.lower() or "turbo" in model_path.lower():
+                nn.quantize(
+                    model.text_encoder_1, class_predicate=lambda _, m: isinstance(
+                        m, nn.Linear)
+                )
+                nn.quantize(
+                    model.text_encoder_2, class_predicate=lambda _, m: isinstance(
+                        m, nn.Linear)
+                )
+            else:
+                nn.quantize(
+                    model.text_encoder, class_predicate=lambda _, m: isinstance(
+                        m, nn.Linear)
+                )
+            nn.quantize(model.unet, group_size=32, bits=8)
+        return model
+    def txt2img(self, prompt: str, config: ImageGenerationConfig, clear_cache: bool = True) -> Image:
+        """Generate an image from a text prompt - following txt2img.py pattern"""
+        if not self.model:
+            self.model = self.load_model(self.model_path)
+            if not self.model:
+                raise RuntimeError("Model not loaded")
+        sampler_config = config.sampler_config
+        negative_prompt = ""
+        if config.negative_prompts:
+            negative_prompt = config.negative_prompts if isinstance(
+                config.negative_prompts, str) else config.negative_prompts[0]
+        # Generate latents - following txt2img.py approach
+        latents_generator = self.model.generate_latents(
+            prompt,
+            n_images=1,
+            num_steps=sampler_config.steps,
+            cfg_weight=sampler_config.guidance_scale,
+            negative_text=negative_prompt,
+            seed=sampler_config.seed if sampler_config.seed >= 0 else None
+        )
+        # Get final latents - following txt2img.py pattern
+        final_latents = None
+        for latents in latents_generator:
+            final_latents = latents
+            mx.eval(final_latents)
+        if final_latents is None:
+            raise RuntimeError("No latents generated")
+        # Decode to image - following txt2img.py pattern
+        decoded_image = self.model.decode(final_latents)
+        mx.eval(decoded_image)
+        # Convert to numpy array
+        image_array = np.array(decoded_image.squeeze(0))
+        if clear_cache:
+            mx.clear_cache()
+        return Image.from_numpy(image_array)
+    def img2img(self, init_image: Image, prompt: str, config: ImageGenerationConfig, clear_cache: bool = True) -> Image:
+        """Generate an image from an initial image and a text prompt using SDXL Turbo"""
+        if not self.model:
+            self.model = self.load_model(self.model_path)
+            if not self.model:
+                raise RuntimeError("Model not loaded")
+        sampler_config = config.sampler_config
+        negative_prompt = ""
+        if config.negative_prompts:
+            negative_prompt = config.negative_prompts if isinstance(
+                config.negative_prompts, str) else config.negative_prompts[0]
+        img_tensor = _prepare_image_for_sd(
+            init_image, config.width, config.height)
+        # Generate latents from image
+        latents_generator = self.model.generate_latents_from_image(
+            img_tensor,
+            prompt,
+            n_images=1,
+            strength=config.strength,
+            num_steps=sampler_config.steps,
+            cfg_weight=sampler_config.guidance_scale,
+            negative_text=negative_prompt,
+            seed=sampler_config.seed if sampler_config.seed >= 0 else None
+        )
+        # Get final latents
+        final_latents = None
+        for latents in latents_generator:
+            final_latents = latents
+            mx.eval(final_latents)
+        if final_latents is None:
+            raise RuntimeError("No latents generated")
+        # Decode to image
+        decoded_image = self.model.decode(final_latents)
+        mx.eval(decoded_image)
+        # Convert to numpy array
+        image_array = np.array(decoded_image.squeeze(0))
+        if clear_cache:
+            mx.clear_cache()
+        return Image.from_numpy(image_array)

nexaai/mlx_backend/image_gen/interface.py ADDED Viewed

@@ -0,0 +1,82 @@
+from __future__ import annotations
+import os
+from typing import Optional
+from ml import ImageGenCreateInput, ImageGenerationConfig, ImageGenImg2ImgInput, ImageGenTxt2ImgInput, ImageGenOutput
+from profiling import ProfilingMixin, StopReason
+from .generate_sd import ImageGen as SDImageGen, Image, ImageGenerationConfig as SDImageGenerationConfig, ImageSamplerConfig
+class ImageGen(ProfilingMixin):
+    sd_gen: Optional[SDImageGen] = None
+    def __init__(self, input: ImageGenCreateInput):
+        """Initialize the image generation model"""
+        self.sd_gen = SDImageGen(model_path=input.model_path)
+    def destroy(self) -> None:
+        """Clean up resources"""
+        self.sd_gen = None
+    def txt2img(self, input: ImageGenTxt2ImgInput) -> ImageGenOutput:
+        """Generate an image from a text prompt - public interface"""
+        height = input.config.height
+        width = input.config.width
+        assert height % 16 == 0, f"Height must be divisible by 16 ({height}/16={height/16})"
+        assert width % 16 == 0, f"Width must be divisible by 16 ({width}/16={width/16})"
+        internal_config = SDImageGenerationConfig(
+            prompts=input.prompt,
+            negative_prompts=input.config.negative_prompts,
+            height=height,
+            width=width,
+            sampler_config=ImageSamplerConfig(
+                steps=input.config.sampler_config.steps,
+                guidance_scale=input.config.sampler_config.guidance_scale,
+                seed=input.config.sampler_config.seed
+            ),
+            strength=input.config.strength
+        )
+        result_image = self.sd_gen.txt2img(input.prompt, internal_config)
+        parent_dir = os.path.dirname(input.output_path)
+        if not os.path.exists(parent_dir):
+            os.makedirs(parent_dir)
+        result_image.to_pil().save(input.output_path)
+        return ImageGenOutput(output_image_path=input.output_path)
+    def img2img(self, input: ImageGenImg2ImgInput) -> ImageGenOutput:
+        """Generate an image from an initial image and a text prompt - public interface"""
+        height = input.config.height
+        width = input.config.width
+        assert height % 16 == 0, f"Height must be divisible by 16 ({height}/16={height/16})"
+        assert width % 16 == 0, f"Width must be divisible by 16 ({width}/16={width/16})"
+        init_image = Image.from_pil(input.init_image_path)
+        internal_config = SDImageGenerationConfig(
+            prompts=input.prompt,
+            negative_prompts=input.config.negative_prompts,
+            height=height,
+            width=width,
+            sampler_config=ImageSamplerConfig(
+                steps=input.config.sampler_config.steps,
+                guidance_scale=input.config.sampler_config.guidance_scale,
+                seed=input.config.sampler_config.seed
+            ),
+            init_image=init_image,
+            strength=input.config.strength
+        )
+        result_image = self.sd_gen.img2img(
+            init_image, input.prompt, internal_config)
+        parent_dir = os.path.dirname(input.output_path)
+        if not os.path.exists(parent_dir):
+            os.makedirs(parent_dir)
+        result_image.to_pil().save(input.output_path)
+        return ImageGenOutput(output_image_path=input.output_path)