PyPI - diffusion-prompt-embedder - Versions diffs - 0.1.0__py3-none-any.whl - Mend

diffusion-prompt-embedder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

diffusion_prompt_embedder/__init__.py +17 -0
diffusion_prompt_embedder/clip/__init__.py +13 -0
diffusion_prompt_embedder/clip/tokenization.py +123 -0
diffusion_prompt_embedder/core/__init__.py +23 -0
diffusion_prompt_embedder/core/embedding.py +309 -0
diffusion_prompt_embedder/core/parser.py +178 -0
diffusion_prompt_embedder/py.typed +0 -0
diffusion_prompt_embedder-0.1.0.dist-info/METADATA +152 -0
diffusion_prompt_embedder-0.1.0.dist-info/RECORD +10 -0
diffusion_prompt_embedder-0.1.0.dist-info/WHEEL +4 -0

diffusion_prompt_embedder/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""
+prompt_parser: A library for parsing and processing text prompts with attention weights.
+This package provides tools for parsing text prompts with attention weights syntax,
+tokenizing prompts, and generating embeddings for use with Stable Diffusion models.
+"""
+from __future__ import annotations
+from diffusion_prompt_embedder.core.embedding import get_embeddings_sd15, get_embeddings_sd_15_batch
+from diffusion_prompt_embedder.core.parser import parse_prompt_attention
+__all__ = [
+    "get_embeddings_sd15",
+    "get_embeddings_sd_15_batch",
+    "parse_prompt_attention",
+]

diffusion_prompt_embedder/clip/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+CLIP model functionality for embedding generation.
+"""
+from diffusion_prompt_embedder.clip.tokenization import (
+    get_prompts_tokens_with_weights,
+    group_tokens_and_weights,
+)
+__all__ = [
+    "get_prompts_tokens_with_weights",
+    "group_tokens_and_weights",
+]

diffusion_prompt_embedder/clip/tokenization.py ADDED Viewed

@@ -0,0 +1,123 @@
+from transformers import CLIPTokenizer
+from diffusion_prompt_embedder.core.parser import parse_prompt_attention
+def group_tokens_and_weights(
+    token_ids: list[int],
+    weights: list[float],
+    *,
+    pad_last_block: bool = True,
+) -> tuple[list[list[int]], list[list[float]]]:
+    """
+    Group tokenized IDs and weights into CLIP-compatible chunks of 77 tokens.
+    This function takes tokenized IDs and their corresponding weights, then groups them
+    into chunks of 77 tokens (75 content tokens + BOS and EOS tokens). The last block
+    can be padded with EOS tokens based on the pad_last_block parameter.
+    Args:
+        token_ids (list): Token IDs generated from the CLIP tokenizer
+        weights (list): Corresponding weights for each token
+        pad_last_block (bool): Whether to pad the last block to 75 tokens with EOS tokens
+    Returns:
+        tuple: A tuple containing:
+            - list[list[int]]: Grouped token IDs with each sublist containing 77 tokens
+            - list[list[float]]: Grouped weights matching the token IDs structure
+    Example:
+        token_groups, weight_groups = group_tokens_and_weights(
+            token_ids=token_id_list,
+            weights=token_weight_list
+        )
+    """
+    # Define beginning-of-sequence and end-of-sequence token IDs
+    bos, eos = 49406, 49407
+    # Initialize empty lists for storing grouped tokens and weights
+    new_token_ids = []
+    new_weights = []
+    # Process complete blocks of 75 tokens
+    while len(token_ids) >= 75:
+        # Extract the first 75 tokens and their weights
+        head_75_tokens = [token_ids.pop(0) for _ in range(75)]
+        head_75_weights = [weights.pop(0) for _ in range(75)]
+        # Create a complete block with BOS and EOS tokens
+        temp_77_token_ids = [bos, *head_75_tokens, eos]
+        temp_77_weights = [1.0, *head_75_weights, 1.0]
+        # Add the completed block to our result lists
+        new_token_ids.append(temp_77_token_ids)
+        new_weights.append(temp_77_weights)
+    # Process remaining tokens if any exist
+    if len(token_ids) > 0:
+        # Calculate padding length if pad_last_block is True
+        padding_len = 75 - len(token_ids) if pad_last_block else 0
+        # Create the final block with appropriate padding
+        temp_77_token_ids = [bos] + token_ids + [eos] * padding_len + [eos]
+        new_token_ids.append(temp_77_token_ids)
+        temp_77_weights = [1.0] + weights + [1.0] * padding_len + [1.0]
+        new_weights.append(temp_77_weights)
+    return new_token_ids, new_weights
+def get_prompts_tokens_with_weights(
+    clip_tokenizer: CLIPTokenizer,
+    prompt: str | None,
+) -> tuple[list[int], list[float]]:
+    """
+    Tokenize a prompt with attention weights into token IDs and their corresponding weights.
+    This function processes prompts with weighted terms (like "a (cat:1.2) in the garden")
+    and returns both the token IDs and their respective weights. Works for both positive
+    and negative prompts in Stable Diffusion.
+    Args:
+        clip_tokenizer (CLIPTokenizer): The CLIP tokenizer instance
+        prompt (str | None): A prompt string with optional weights in parentheses
+                            If None or empty, defaults to "empty"
+    Returns:
+        tuple: A tuple containing:
+            - list[int]: List of token IDs
+            - list[float]: List of weights corresponding to each token
+    Example:
+        token_id_list, token_weight_list = get_prompts_tokens_with_weights(
+            clip_tokenizer=clip_tokenizer,
+            prompt="a (red:1.5) cat"
+        )
+    """
+    # Use "empty" as default if prompt is None or empty
+    if (prompt is None) or (len(prompt) < 1):
+        prompt = "empty"
+    # Parse the prompt to get text chunks and their weights
+    texts_and_weights = parse_prompt_attention(prompt)
+    text_tokens: list[int] = []
+    text_weights: list[float] = []
+    for word, weight in texts_and_weights:
+        # Tokenize the text chunk, removing BOS/EOS tokens (positions 0 and -1)
+        token = clip_tokenizer(
+            word,
+            truncation=False,  # Allow processing prompts of any length
+        ).input_ids[1:-1]
+        # Append new tokens to the full token list
+        text_tokens = [*text_tokens, *token]
+        # Apply the same weight to all tokens in this text chunk
+        chunk_weights = [weight] * len(token)
+        # Append weights to the full weights list
+        text_weights = [*text_weights, *chunk_weights]
+    return text_tokens, text_weights

diffusion_prompt_embedder/core/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""
+Core prompt parsing functionality.
+"""
+from diffusion_prompt_embedder.core.embedding import (
+    get_embeddings_sd15,
+    get_embeddings_sd_15_batch,
+)
+from diffusion_prompt_embedder.core.parser import (
+    apply_multiplier_to_range,
+    merge_identical_weights,
+    parse_prompt_attention,
+    process_text_token,
+)
+__all__ = [
+    "apply_multiplier_to_range",
+    "get_embeddings_sd15",
+    "get_embeddings_sd_15_batch",
+    "merge_identical_weights",
+    "parse_prompt_attention",
+    "process_text_token",
+]

diffusion_prompt_embedder/core/embedding.py ADDED Viewed

@@ -0,0 +1,309 @@
+import torch
+from transformers import CLIPTextModel, CLIPTokenizer
+from diffusion_prompt_embedder.clip.tokenization import get_prompts_tokens_with_weights, group_tokens_and_weights
+def _encode_tokens_with_weights(
+    text_encoder: CLIPTextModel,
+    token_groups: list[list[int]],
+    weight_groups: list[list[float]],
+    device: torch.device,
+    dtype: torch.dtype,
+) -> list[torch.Tensor]:
+    """
+    Internal helper function to encode token groups and apply weights.
+    Args:
+        text_encoder: The CLIP text encoder model
+        token_groups: Grouped token IDs, each group has 77 tokens
+        weight_groups: Grouped weights matching the token IDs
+        device: Device to run encoding on
+        dtype: Data type for tensors
+    Returns:
+        list[torch.Tensor]: List of encoded embeddings for each token group
+    """
+    embeds = []
+    # Process each token group through the text encoder
+    for i in range(len(token_groups)):
+        # Process tokens
+        token_tensor = torch.tensor(
+            [token_groups[i]],
+            dtype=torch.long,
+            device=device,
+        )
+        weight_tensor = torch.tensor(
+            weight_groups[i],
+            dtype=dtype,
+            device=device,
+        )
+        # Get embeddings from text encoder
+        token_embedding = text_encoder(token_tensor)[0].squeeze(0)
+        # Apply attention weights to token embeddings
+        for j in range(len(weight_tensor)):
+            token_embedding[j] = token_embedding[j] * weight_tensor[j]
+        # Add batch dimension back and append to results
+        token_embedding = token_embedding.unsqueeze(0)
+        embeds.append(token_embedding)
+    return embeds
+def _setup_clip_for_embedding(
+    text_encoder: CLIPTextModel,
+    clip_skip: int = 0,
+) -> tuple[torch.device, torch.dtype, object | None, int]:
+    """
+    Setup CLIP model for embedding generation and return common parameters.
+    Args:
+        text_encoder: The CLIP text encoder model
+        clip_skip: Number of layers to skip in CLIP model
+    Returns:
+        tuple: (device, dtype, original_clip_layers, clip_skip_applied)
+    """
+    # Get the device and dtype from the text encoder
+    device = text_encoder.device
+    dtype = text_encoder.dtype
+    # Store original layers for clip skip feature
+    original_clip_layers = None
+    if clip_skip > 0 and hasattr(text_encoder, "text_model"):
+        original_clip_layers = text_encoder.text_model.encoder.layers
+        text_encoder.text_model.encoder.layers = original_clip_layers[:-clip_skip]
+    return device, dtype, original_clip_layers, clip_skip
+def get_embeddings_sd15(  # noqa: PLR0913
+    tokenizer: CLIPTokenizer,
+    text_encoder: CLIPTextModel,
+    *,
+    prompt: str = "",
+    neg_prompt: str = "",
+    pad_last_block: bool = False,
+    clip_skip: int = 0,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """
+    Generate weighted text embeddings for Stable Diffusion 1.5 models.
+    This function processes both positive and negative prompts with weights and
+    generates CLIP text embeddings for use in Stable Diffusion inference. It can
+    handle arbitrarily long prompts by processing them in chunks and supports
+    clip-skip for style control.
+    Args:
+        tokenizer (CLIPTokenizer): The CLIP tokenizer instance
+        text_encoder (CLIPTextModel): The CLIP text encoder model
+        prompt (str): The positive prompt with optional weights in parentheses
+        neg_prompt (str): The negative prompt with optional weights in parentheses
+        pad_last_block (bool): Whether to pad the last token block to full length
+        clip_skip (int): Number of layers to skip in CLIP model for style control
+    Returns:
+        tuple[torch.Tensor, torch.Tensor]: A tuple containing:
+            - prompt_embeds: Tensor of positive prompt embeddings
+            - neg_prompt_embeds: Tensor of negative prompt embeddings
+    Example:
+        from transformers import CLIPTokenizer, CLIPTextModel
+        tokenizer = CLIPTokenizer.from_pretrained(
+            "openai/clip-vit-large-patch14",
+        )
+        text_encoder = CLIPTextModel.from_pretrained(
+            "openai/clip-vit-large-patch14",
+            torch_dtype=torch.float16
+        ).to("cuda")
+        prompt_embeds, neg_prompt_embeds = get_weighted_text_embeddings_sd15(
+            tokenizer=tokenizer,
+            text_encoder=text_encoder,
+            prompt="a (white:1.2) cat",
+            neg_prompt="blur, bad quality",
+        )
+    """
+    # Setup CLIP model and get common parameters
+    device, dtype, original_clip_layers, _ = _setup_clip_for_embedding(
+        text_encoder,
+        clip_skip,
+    )
+    # Get the eos token id from tokenizer
+    eos = tokenizer.eos_token_id
+    # Tokenize prompts with weights
+    prompt_tokens, prompt_weights = get_prompts_tokens_with_weights(
+        tokenizer,
+        prompt,
+    )
+    neg_prompt_tokens, neg_prompt_weights = get_prompts_tokens_with_weights(
+        tokenizer,
+        neg_prompt,
+    )
+    # Pad the shorter prompt to match the longer one for consistent batch processing
+    prompt_token_len = len(prompt_tokens)
+    neg_prompt_token_len = len(neg_prompt_tokens)
+    if prompt_token_len > neg_prompt_token_len:
+        # Pad negative prompt with EOS tokens to match positive prompt length
+        neg_prompt_tokens = neg_prompt_tokens + [eos] * abs(prompt_token_len - neg_prompt_token_len)
+        neg_prompt_weights = neg_prompt_weights + [1.0] * abs(prompt_token_len - neg_prompt_token_len)
+    else:
+        # Pad positive prompt with EOS tokens to match negative prompt length
+        prompt_tokens = prompt_tokens + [eos] * abs(prompt_token_len - neg_prompt_token_len)
+        prompt_weights = prompt_weights + [1.0] * abs(prompt_token_len - neg_prompt_token_len)
+    # Group tokens for processing in CLIP-compatible chunks (77 tokens per chunk)
+    prompt_token_groups, prompt_weight_groups = group_tokens_and_weights(
+        prompt_tokens.copy(),
+        prompt_weights.copy(),
+        pad_last_block=pad_last_block,
+    )
+    neg_prompt_token_groups, neg_prompt_weight_groups = group_tokens_and_weights(
+        neg_prompt_tokens.copy(),
+        neg_prompt_weights.copy(),
+        pad_last_block=pad_last_block,
+    )
+    # Process token groups through the shared encoder function
+    embeds = _encode_tokens_with_weights(
+        text_encoder,
+        prompt_token_groups,
+        prompt_weight_groups,
+        device,
+        dtype,
+    )
+    neg_embeds = _encode_tokens_with_weights(
+        text_encoder,
+        neg_prompt_token_groups,
+        neg_prompt_weight_groups,
+        device,
+        dtype,
+    )
+    # Concatenate all token group embeddings
+    prompt_embeds = torch.cat(embeds, dim=1)
+    neg_prompt_embeds = torch.cat(neg_embeds, dim=1)
+    # Restore original CLIP layers if clip_skip was used
+    if clip_skip > 0 and original_clip_layers is not None:
+        text_encoder.text_model.encoder.layers = original_clip_layers
+    return prompt_embeds, neg_prompt_embeds
+def get_embeddings_sd_15_batch(
+    tokenizer: CLIPTokenizer,
+    text_encoder: CLIPTextModel,
+    *,
+    prompts: list[str],
+    pad_last_block: bool = True,
+    clip_skip: int = 0,
+) -> torch.Tensor:
+    """
+    Generate weighted text embeddings for multiple prompts in a batch.
+    This function processes a list of prompts with weights and generates CLIP text
+    embeddings for use in batch inference. It handles arbitrarily long prompts
+    by processing them in chunks, pads all prompts to the same length, and supports
+    clip-skip for style control.
+    Args:
+        tokenizer (CLIPTokenizer): The CLIP tokenizer instance
+        text_encoder (CLIPTextModel): The CLIP text encoder model
+        prompts (list[str]): List of prompts, each with optional weights in parentheses
+        pad_last_block (bool): Whether to pad the last token block to full length
+        clip_skip (int): Number of layers to skip in CLIP model for style control
+    Returns:
+        torch.Tensor: Tensor of embeddings for all prompts, shape [batch_size, seq_len, hidden_size]
+    Example:
+        from transformers import CLIPTokenizer, CLIPTextModel
+        tokenizer = CLIPTokenizer.from_pretrained(
+            "openai/clip-vit-large-patch14",
+        )
+        text_encoder = CLIPTextModel.from_pretrained(
+            "openai/clip-vit-large-patch14",
+            torch_dtype=torch.float16
+        ).to("cuda")
+        prompt_embeds = get_weighted_text_embeddings_batch(
+            tokenizer=tokenizer,
+            text_encoder=text_encoder,
+            prompts=["a (white:1.2) cat", "a (blue:1.4) dog", "a red bird"],
+        )
+    """
+    # Setup CLIP model and get common parameters
+    device, dtype, original_clip_layers, _ = _setup_clip_for_embedding(
+        text_encoder,
+        clip_skip,
+    )
+    # Get the eos token id from tokenizer
+    eos = tokenizer.eos_token_id
+    # Tokenize all prompts with weights
+    all_prompt_tokens: list[list[int]] = []
+    all_prompt_weights: list[list[float]] = []
+    max_token_len: int = 0
+    for prompt in prompts:
+        prompt_tokens, prompt_weights = get_prompts_tokens_with_weights(
+            tokenizer,
+            prompt,
+        )
+        all_prompt_tokens.append(prompt_tokens)
+        all_prompt_weights.append(prompt_weights)
+        max_token_len = max(max_token_len, len(prompt_tokens))
+    # Pad all prompts to the same length
+    for i in range(len(all_prompt_tokens)):
+        token_len = len(all_prompt_tokens[i])
+        if token_len < max_token_len:
+            padding_len = max_token_len - token_len
+            all_prompt_tokens[i] = all_prompt_tokens[i] + [eos] * padding_len
+            all_prompt_weights[i] = all_prompt_weights[i] + [1.0] * padding_len
+    # Initialize list to hold embeddings for each prompt
+    all_embeds = []
+    # Process each prompt separately
+    for prompt_idx in range(len(prompts)):
+        # Group tokens for processing in CLIP-compatible chunks (77 tokens per chunk)
+        prompt_token_groups, prompt_weight_groups = group_tokens_and_weights(
+            all_prompt_tokens[prompt_idx].copy(),
+            all_prompt_weights[prompt_idx].copy(),
+            pad_last_block=pad_last_block,
+        )
+        # Process token groups through the shared encoder function
+        embeds = _encode_tokens_with_weights(
+            text_encoder,
+            prompt_token_groups,
+            prompt_weight_groups,
+            device,
+            dtype,
+        )
+        # Concatenate all token group embeddings for this prompt
+        prompt_embeds = torch.cat(embeds, dim=1)
+        all_embeds.append(prompt_embeds)
+    # Stack all prompt embeddings into a batch
+    batched_embeds = torch.cat(all_embeds, dim=0)
+    # Restore original CLIP layers if clip_skip was used
+    if clip_skip > 0 and original_clip_layers is not None:
+        text_encoder.text_model.encoder.layers = original_clip_layers
+    return batched_embeds

diffusion_prompt_embedder/core/parser.py ADDED Viewed

@@ -0,0 +1,178 @@
+import re
+# Regular expressions for prompt processing
+# Matches the "AND" keyword (used to split prompts)
+re_and = re.compile(r"\bAND\b")
+# Matches weight format: "text:1.5", captures text and weight value
+re_weight = re.compile(r"^((?:\s|.)*?)(?:\s*:\s*([-+]?(?:\d+\.?|\d*\.\d+)))?\s*$")
+# Matches the "BREAK" keyword (used to insert separators in prompts)
+re_break = re.compile(r"\s*\bBREAK\b\s*", re.DOTALL)
+# Complex regular expression for parsing attention markers
+# This regex identifies various brackets and weight markers used to enhance or reduce specific parts of prompts
+re_attention = re.compile(
+    r"""
+    \\\(|      # Escaped left parenthesis \(
+    \\\)|      # Escaped right parenthesis \)
+    \\\[|      # Escaped left bracket \[
+    \\]|       # Escaped right bracket \]
+    \\\\|      # Escaped backslash \\
+    \\|        # Single backslash (escape character)
+    \(|        # Left parenthesis - starts an enhanced attention area
+    \[|        # Left bracket - starts a reduced attention area
+    :\s*([+-]?[.\d]+)\s*\)|  # Colon followed by number and right parenthesis - custom weight value
+    \)|        # Right parenthesis - ends enhanced attention area
+    ]|         # Right bracket - ends reduced attention area
+    [^\\()\[\]:]+|  # Regular text (any text not containing special characters)
+    :          # Single colon
+    """,
+    re.VERBOSE,  # Enables verbose mode, allowing comments and whitespace in regex
+)
+def apply_multiplier_to_range(
+    tokens: list[list[str | float]],
+    start_position: int,
+    multiplier: float,
+) -> None:
+    """
+    Applies a weight multiplier to a range of tokens starting from a specified position.
+    This function is used to process weight adjustments for text within brackets,
+    such as weight changes in (text) or [text].
+    Args:
+        tokens: List of [text, weight] pairs to modify
+        start_position: Position to start applying the multiplier
+        multiplier: Weight multiplier to apply
+    """
+    for p in range(start_position, len(tokens)):
+        tokens[p][1] *= multiplier
+def process_text_token(text: str) -> list[list[str | float]]:
+    """
+    Processes text tokens, specifically handling BREAK markers in the text.
+    BREAK markers are used to insert special separators in prompts,
+    typically used to divide different concepts or regions.
+    Args:
+        text: Text to process
+    Returns:
+        List of [text, weight] pairs
+    """
+    result = []
+    # Split text by BREAK keyword
+    parts = re.split(re_break, text)
+    for i, part in enumerate(parts):
+        if i > 0:
+            # Add a special marker after each BREAK with weight -1
+            result.append(["BREAK", -1])
+        # Add regular text with default weight 1.0
+        result.append([part, 1.0])
+    return result
+def merge_identical_weights(tokens: list[list[str | float]]) -> list[list[str | float]]:
+    """
+    Merges consecutive tokens with identical weights.
+    When multiple consecutive text fragments have the same weight, this function
+    combines them into one to simplify output and improve efficiency.
+    Args:
+        tokens: List of [text, weight] pairs
+    Returns:
+        List of merged tokens
+    """
+    if not tokens:
+        return [["", 1.0]]  # Return a default value if list is empty
+    i = 0
+    while i + 1 < len(tokens):
+        if tokens[i][1] == tokens[i + 1][1]:
+            # When two consecutive tokens have the same weight, merge their text
+            tokens[i][0] += tokens[i + 1][0]
+            tokens.pop(i + 1)  # Remove the merged token
+        else:
+            i += 1
+    return tokens
+def parse_prompt_attention(text: str) -> list[list[str | float]]:
+    """
+    Parses a string with attention markers and returns a list of text and associated weight pairs.
+    This function is the core of prompt parsing, handling various attention control symbols
+    like parentheses and brackets used to adjust focus on different parts of the prompt during generation.
+    Supported markers:
+      (abc) - increases attention to abc by a multiplier of 1.1
+      (abc:3.12) - increases attention to abc by a multiplier of 3.12
+      [abc] - decreases attention to abc by a multiplier of 1.1
+      \\( - literal character '('
+      \\[ - literal character '['
+      \\) - literal character ')'
+      \\] - literal character ']'
+      \\ - literal character '\'
+      anything else - just text
+    Args:
+        text: Prompt text to parse
+    Returns:
+        List of [text, weight] pairs representing the parsed prompt parts and their weights
+    """
+    res: list[list[str | float]] = []  # Result list storing [text, weight] pairs
+    round_brackets: list[int] = []  # Stack for parentheses, stores opening position
+    square_brackets: list[int] = []  # Stack for brackets, stores opening position
+    # Define weight multiplier constants
+    round_bracket_multiplier = 1.1  # Default enhancement factor for parentheses
+    square_bracket_multiplier = 1 / 1.1  # Default reduction factor for brackets (reciprocal)
+    # Parse each token in the text using regex
+    for m in re_attention.finditer(text):
+        token_text = m.group(0)  # Current matched text
+        weight = m.group(1)  # Possible weight value (if any)
+        if token_text.startswith("\\"):
+            # Handle escape characters - remove backslash, preserve original character
+            res.append([token_text[1:], 1.0])
+        elif token_text == "(":
+            # Left parenthesis - push current position to stack, mark start of enhancement area
+            round_brackets.append(len(res))
+        elif token_text == "[":
+            # Left bracket - push current position to stack, mark start of reduction area
+            square_brackets.append(len(res))
+        elif weight is not None and round_brackets:
+            # Right parenthesis with custom weight - adjust area with specified weight
+            apply_multiplier_to_range(res, round_brackets.pop(), float(weight))
+        elif token_text == ")" and round_brackets:
+            # Regular right parenthesis - enhance area with default multiplier
+            apply_multiplier_to_range(res, round_brackets.pop(), round_bracket_multiplier)
+        elif token_text == "]" and square_brackets:
+            # Right bracket - reduce area with default multiplier
+            apply_multiplier_to_range(res, square_brackets.pop(), square_bracket_multiplier)
+        else:
+            # Process regular text or unmatched brackets
+            res.extend(process_text_token(token_text))
+    # Handle unclosed brackets (ensure all opening brackets have corresponding closing brackets)
+    for pos in round_brackets:
+        # Apply default enhancement for unclosed parentheses
+        apply_multiplier_to_range(res, pos, round_bracket_multiplier)
+    for pos in square_brackets:
+        # Apply default reduction for unclosed brackets
+        apply_multiplier_to_range(res, pos, square_bracket_multiplier)
+    # Merge consecutive tokens with identical weights
+    res = merge_identical_weights(res)
+    # Ensure all elements in the returned list have the correct types
+    return [[str(text), float(weight)] for text, weight in res]

diffusion_prompt_embedder/py.typed ADDED Viewed

File without changes

diffusion_prompt_embedder-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,152 @@
+Metadata-Version: 2.4
+Name: diffusion-prompt-embedder
+Version: 0.1.0
+Summary: A Python library for parsing and processing prompts with support for embedding and tokenization
+Project-URL: Homepage, https://github.com/jannchie/diffusion-prompt-embedder
+Project-URL: Bug Tracker, https://github.com/jannchie/diffusion-prompt-embedder/issues
+Project-URL: Documentation, https://github.com/jannchie/diffusion-prompt-embedder#readme
+Author-email: Jianqi Pan <jannchie@gmail.com>
+License: MIT
+Keywords: ai,embedding,nlp,prompt,tokenization
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.10
+Provides-Extra: all
+Requires-Dist: torch>=2.0.0; extra == 'all'
+Requires-Dist: transformers>=4.51.3; extra == 'all'
+Provides-Extra: dev
+Requires-Dist: pytest-cov>=6.1.1; extra == 'dev'
+Requires-Dist: pytest>=8.3.5; extra == 'dev'
+Requires-Dist: torch>=2.0.0; extra == 'dev'
+Requires-Dist: transformers>=4.51.3; extra == 'dev'
+Provides-Extra: torch
+Requires-Dist: torch>=2.0.0; extra == 'torch'
+Provides-Extra: transformers
+Requires-Dist: transformers>=4.51.3; extra == 'transformers'
+Description-Content-Type: text/markdown
+# Diffusion Prompt Embedder
+[![PyPI version](https://img.shields.io/pypi/v/diffusion-prompt-embedder.svg)](https://pypi.org/project/diffusion-prompt-embedder/)
+[![Python Version](https://img.shields.io/pypi/pyversions/diffusion-prompt-embedder.svg)](https://pypi.org/project/diffusion-prompt-embedder/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Code Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen.svg)](https://github.com/jannchie/diffusion-prompt-embedder)
+A Python library specialized for parsing and processing weighted prompt text, supporting embedding generation and tokenization to enhance text processing for AI models like Stable Diffusion. It's compatible with SD Web UI's weighted prompts but doesn't include scheduling.
+## Features
+- 💬 **Prompt Parsing**: Parse text prompts with weight markers (e.g., `a (cat:1.5) in the garden`)
+- 🔢 **Weight Management**: Support for positive weight `(text)` and negative weight `[text]` syntax
+- 📚 **CLIP Integration**: Seamless integration with CLIP text models for embedding generation
+- 🔄 **Batch Processing**: Efficiently process batches of multiple prompts
+- 🪄 **Long Text Support**: Handle prompts that exceed standard CLIP context length
+## Installation
+Install the base library using pip:
+```bash
+pip install diffusion-prompt-embedder
+```
+## Usage Examples
+### Parse Weighted Prompts
+```python
+from diffusion_prompt_embedder import parse_prompt_attention
+# Basic parsing
+result = parse_prompt_attention("a (cat:1.5) in the garden")
+print(result)  # [['a ', 1.0], ['cat', 1.5], [' in the garden', 1.0]]
+# Using brackets to lower weight
+result = parse_prompt_attention("a [cat] in the garden")
+print(result)  # [['a ', 1.0], ['cat', 0.9090909090909091], [' in the garden', 1.0]]
+# Complex prompt example
+result = parse_prompt_attention("a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).")
+print(result)
+```
+### Generate CLIP Embeddings
+```python
+import torch
+from transformers import CLIPTokenizer, CLIPTextModel
+from prompt_parser import get_embeddings_sd15
+# Initialize CLIP model
+tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
+text_encoder = CLIPTextModel.from_pretrained(
+    "openai/clip-vit-large-patch14",
+    torch_dtype=torch.float16
+).to("cuda")
+# Generate embeddings
+prompt_embeds, neg_prompt_embeds = get_embeddings_sd15(
+    tokenizer=tokenizer,
+    text_encoder=text_encoder,
+    prompt="a (white:1.2) cat",
+    neg_prompt="blur, bad quality",
+    clip_skip=1  # Optional: skip layers in CLIP model
+)
+# Batch processing multiple prompts
+from prompt_parser import get_embeddings_sd_15_batch
+batch_embeds = get_embeddings_sd_15_batch(
+    tokenizer=tokenizer,
+    text_encoder=text_encoder,
+    prompts=["a (white:1.2) cat", "a (blue:1.4) dog", "a red bird"]
+)
+```
+## Prompt Syntax
+### Basic Weight Syntax
+- `(text)` - Increases the prompt weight by 1.1x
+- `(text:1.5)` - Sets the prompt weight to 1.5
+- `[text]` - Decreases the prompt weight to 1/1.1 of original
+- `\( \[ \) \]` - Use backslash to escape bracket characters
+### BREAK Syntax
+Use the `BREAK` keyword to create breakpoints in prompts:
+```python
+result = parse_prompt_attention("text1 BREAK text2")
+# Result: [["text1", 1.0], ["BREAK", -1], ["text2", 1.0]]
+```
+## Development
+Clone the repository and install development dependencies:
+```bash
+git clone https://github.com/jannchie/diffusion-prompt-parser.git
+cd diffusion-prompt-parser
+pip install -e ".[dev]"
+```
+Run tests:
+```bash
+pytest
+```
+## License
+[MIT](https://opensource.org/licenses/MIT)
+## Author
+- Jianqi Pan ([@jannchie](https://github.com/jannchie))

diffusion_prompt_embedder-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+diffusion_prompt_embedder/__init__.py,sha256=60W03g8iWpUgaMmZTJSkMRsjWwZ1mAGpNQbD_O0QZ70,583
+diffusion_prompt_embedder/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+diffusion_prompt_embedder/clip/__init__.py,sha256=yikfkQ1fqg70OWMzO3i4G3yEmaXudV0XJHB-NMStXcA,286
+diffusion_prompt_embedder/clip/tokenization.py,sha256=63wd3-Gib7ZvUwmvVNfi74n0ntF79zjHc3KHvqwzAGs,4658
+diffusion_prompt_embedder/core/__init__.py,sha256=KUtvybDsDid_NCBlo9AcpLu5bbFNy24NDn8mqPQPWpc,543
+diffusion_prompt_embedder/core/embedding.py,sha256=KfgncrrYshYevD_RfVahymw7qoNrAB2S3hbRC2CZEG0,11278
+diffusion_prompt_embedder/core/parser.py,sha256=kp4Xr5XNl0JPzfouHMSZoyv_y6a7P1NEzlqhg669ubo,7383
+diffusion_prompt_embedder-0.1.0.dist-info/METADATA,sha256=dj8TG5hqOtIsHtNLfSNpA2qbUujHxv8gbH4riTytbQY,5204
+diffusion_prompt_embedder-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+diffusion_prompt_embedder-0.1.0.dist-info/RECORD,,

diffusion_prompt_embedder-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.27.0
+Root-Is-Purelib: true
+Tag: py3-none-any