PyPI - glitchlings - Versions diffs - 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl - Mend

glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

glitchlings/__init__.py +36 -17
glitchlings/__main__.py +0 -1
glitchlings/_zoo_rust/__init__.py +12 -0
glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
glitchlings/assets/__init__.py +180 -0
glitchlings/assets/apostrofae_pairs.json +32 -0
glitchlings/assets/ekkokin_homophones.json +2014 -0
glitchlings/assets/hokey_assets.json +193 -0
glitchlings/assets/lexemes/academic.json +1049 -0
glitchlings/assets/lexemes/colors.json +1333 -0
glitchlings/assets/lexemes/corporate.json +716 -0
glitchlings/assets/lexemes/cyberpunk.json +22 -0
glitchlings/assets/lexemes/lovecraftian.json +23 -0
glitchlings/assets/lexemes/synonyms.json +3354 -0
glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
glitchlings/assets/pipeline_assets.json +29 -0
glitchlings/attack/__init__.py +53 -0
glitchlings/attack/compose.py +299 -0
glitchlings/attack/core.py +465 -0
glitchlings/attack/encode.py +114 -0
glitchlings/attack/metrics.py +104 -0
glitchlings/attack/metrics_dispatch.py +70 -0
glitchlings/attack/tokenization.py +157 -0
glitchlings/auggie.py +283 -0
glitchlings/compat/__init__.py +9 -0
glitchlings/compat/loaders.py +355 -0
glitchlings/compat/types.py +41 -0
glitchlings/conf/__init__.py +41 -0
glitchlings/conf/loaders.py +331 -0
glitchlings/conf/schema.py +156 -0
glitchlings/conf/types.py +72 -0
glitchlings/config.toml +2 -0
glitchlings/constants.py +59 -0
glitchlings/dev/__init__.py +3 -0
glitchlings/dev/docs.py +45 -0
glitchlings/dlc/__init__.py +17 -3
glitchlings/dlc/_shared.py +296 -0
glitchlings/dlc/gutenberg.py +400 -0
glitchlings/dlc/huggingface.py +37 -65
glitchlings/dlc/prime.py +55 -114
glitchlings/dlc/pytorch.py +98 -0
glitchlings/dlc/pytorch_lightning.py +173 -0
glitchlings/internal/__init__.py +16 -0
glitchlings/internal/rust.py +159 -0
glitchlings/internal/rust_ffi.py +432 -0
glitchlings/main.py +123 -32
glitchlings/runtime_config.py +24 -0
glitchlings/util/__init__.py +29 -176
glitchlings/util/adapters.py +65 -0
glitchlings/util/keyboards.py +311 -0
glitchlings/util/transcripts.py +108 -0
glitchlings/zoo/__init__.py +47 -24
glitchlings/zoo/assets/__init__.py +29 -0
glitchlings/zoo/core.py +301 -167
glitchlings/zoo/core_execution.py +98 -0
glitchlings/zoo/core_planning.py +451 -0
glitchlings/zoo/corrupt_dispatch.py +295 -0
glitchlings/zoo/ekkokin.py +118 -0
glitchlings/zoo/hokey.py +137 -0
glitchlings/zoo/jargoyle.py +179 -274
glitchlings/zoo/mim1c.py +106 -68
glitchlings/zoo/pedant/__init__.py +107 -0
glitchlings/zoo/pedant/core.py +105 -0
glitchlings/zoo/pedant/forms.py +74 -0
glitchlings/zoo/pedant/stones.py +74 -0
glitchlings/zoo/redactyl.py +44 -175
glitchlings/zoo/rng.py +259 -0
glitchlings/zoo/rushmore.py +359 -116
glitchlings/zoo/scannequin.py +18 -125
glitchlings/zoo/transforms.py +386 -0
glitchlings/zoo/typogre.py +76 -162
glitchlings/zoo/validation.py +477 -0
glitchlings/zoo/zeedub.py +33 -86
glitchlings-0.9.3.dist-info/METADATA +334 -0
glitchlings-0.9.3.dist-info/RECORD +80 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
glitchlings/zoo/_ocr_confusions.py +0 -34
glitchlings/zoo/_rate.py +0 -21
glitchlings/zoo/reduple.py +0 -169
glitchlings-0.2.5.dist-info/METADATA +0 -490
glitchlings-0.2.5.dist-info/RECORD +0 -27
/glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0

glitchlings/zoo/validation.py ADDED Viewed

@@ -0,0 +1,477 @@
+"""Boundary validation layer for glitchling parameters.
+This module centralizes all input validation, type coercion, and defensive checks
+for glitchling parameters. Functions here are called at module boundaries (CLI,
+public API entry points, configuration loaders) to ensure that invalid data is
+rejected early.
+**Design Philosophy:**
+All functions in this module are *pure* - they perform validation and coercion
+based solely on their inputs, without side effects. They are intended to be
+called once at the boundary where untrusted input enters the system. Core
+transformation functions that call these validation helpers can then trust
+their inputs without re-validating.
+See AGENTS.md "Functional Purity Architecture" for full details.
+"""
+from __future__ import annotations
+import math
+import re
+from collections.abc import Collection, Iterable, Mapping, Sequence
+from dataclasses import dataclass
+from typing import Literal, TypeVar, cast
+# ---------------------------------------------------------------------------
+# Rate Validation (universal)
+# ---------------------------------------------------------------------------
+def clamp_rate(value: float, *, allow_nan: bool = False) -> float:
+    """Clamp a rate value to [0.0, infinity), optionally treating NaN as 0.0.
+    Args:
+        value: The rate to clamp.
+        allow_nan: If False (default), NaN values become 0.0.
+    Returns:
+        The clamped rate value.
+    """
+    if math.isnan(value):
+        return 0.0 if not allow_nan else value
+    return max(0.0, value)
+def clamp_rate_unit(value: float, *, allow_nan: bool = False) -> float:
+    """Clamp a rate value to [0.0, 1.0], optionally treating NaN as 0.0.
+    Args:
+        value: The rate to clamp.
+        allow_nan: If False (default), NaN values become 0.0.
+    Returns:
+        The clamped rate value in range [0.0, 1.0].
+    """
+    if math.isnan(value):
+        return 0.0 if not allow_nan else value
+    return max(0.0, min(1.0, value))
+def resolve_rate(
+    value: float | None,
+    default: float,
+    *,
+    clamp: bool = True,
+    unit_interval: bool = False,
+) -> float:
+    """Resolve a rate parameter, applying defaults and optional clamping.
+    Args:
+        value: The user-provided rate, or None for default.
+        default: The default rate to use when value is None.
+        clamp: Whether to clamp the result to non-negative.
+        unit_interval: If True, clamp to [0.0, 1.0] instead of [0.0, inf).
+    Returns:
+        The resolved, optionally clamped rate.
+    """
+    effective = default if value is None else value
+    if not clamp:
+        return effective
+    return clamp_rate_unit(effective) if unit_interval else clamp_rate(effective)
+# ---------------------------------------------------------------------------
+# Mim1c Validation
+# ---------------------------------------------------------------------------
+def normalise_mim1c_classes(
+    value: object,
+) -> tuple[str, ...] | Literal["all"] | None:
+    """Normalize Mim1c homoglyph class specification.
+    Args:
+        value: User input - None, "all", a single class name, or an iterable.
+    Returns:
+        Normalized tuple of class names, literal "all", or None.
+    Raises:
+        TypeError: If value is not None, string, or iterable.
+    """
+    if value is None:
+        return None
+    if isinstance(value, str):
+        if value.lower() == "all":
+            return "all"
+        return (value,)
+    if isinstance(value, Iterable):
+        return tuple(str(item) for item in value)
+    raise TypeError("classes must be an iterable of strings or 'all'")
+def normalise_mim1c_banned(value: object) -> tuple[str, ...] | None:
+    """Normalize Mim1c banned character specification.
+    Args:
+        value: User input - None, a string of characters, or an iterable.
+    Returns:
+        Normalized tuple of banned characters, or None.
+    Raises:
+        TypeError: If value is not None, string, or iterable.
+    """
+    if value is None:
+        return None
+    if isinstance(value, str):
+        return tuple(value)
+    if isinstance(value, Iterable):
+        return tuple(str(item) for item in value)
+    raise TypeError("banned_characters must be an iterable of strings")
+# ---------------------------------------------------------------------------
+# Ekkokin Validation
+# ---------------------------------------------------------------------------
+def normalise_homophone_group(group: Sequence[str]) -> tuple[str, ...]:
+    """Return a tuple of lowercase homophones preserving original order.
+    Uses dict.fromkeys to preserve ordering while de-duplicating.
+    Args:
+        group: Sequence of homophone words.
+    Returns:
+        De-duplicated tuple of lowercase words.
+    """
+    return tuple(dict.fromkeys(word.lower() for word in group if word))
+def build_homophone_lookup(
+    groups: Iterable[Sequence[str]],
+) -> Mapping[str, tuple[str, ...]]:
+    """Return a mapping from word -> homophone group.
+    Args:
+        groups: Iterable of homophone word groups.
+    Returns:
+        Dictionary mapping each word to its normalized group.
+    """
+    lookup: dict[str, tuple[str, ...]] = {}
+    for group in groups:
+        normalised = normalise_homophone_group(group)
+        if len(normalised) < 2:
+            continue
+        for word in normalised:
+            lookup[word] = normalised
+    return lookup
+# ---------------------------------------------------------------------------
+# Rushmore Validation
+# ---------------------------------------------------------------------------
+# Import enum locally to avoid circular dependencies at module level
+# The RushmoreMode enum is defined in rushmore.py but we need its values here
+# for mode validation. We use string-based validation to avoid the import cycle.
+_RUSHMORE_MODE_ALIASES: dict[str, str] = {
+    "delete": "delete",
+    "drop": "delete",
+    "rushmore": "delete",
+    "duplicate": "duplicate",
+    "reduplicate": "duplicate",
+    "repeat": "duplicate",
+    "swap": "swap",
+    "adjacent": "swap",
+}
+_RUSHMORE_EXECUTION_ORDER: tuple[str, ...] = ("delete", "duplicate", "swap")
+def normalize_rushmore_mode_item(value: str) -> list[str]:
+    """Parse a single Rushmore mode specification into canonical mode names.
+    Args:
+        value: A mode name, alias, or compound expression like "delete+duplicate".
+    Returns:
+        List of canonical mode names ("delete", "duplicate", "swap").
+    Raises:
+        ValueError: If the mode name is not recognized.
+    """
+    text = str(value).strip().lower()
+    if not text:
+        return []
+    if text in {"all", "any", "full"}:
+        return list(_RUSHMORE_EXECUTION_ORDER)
+    tokens = [token for token in re.split(r"[+,\s]+", text) if token]
+    if not tokens:
+        return []
+    modes: list[str] = []
+    for token in tokens:
+        mode = _RUSHMORE_MODE_ALIASES.get(token)
+        if mode is None:
+            raise ValueError(f"Unsupported Rushmore mode '{value}'")
+        modes.append(mode)
+    return modes
+def normalize_rushmore_modes(
+    modes: str | Iterable[str] | None,
+    *,
+    default: str = "delete",
+) -> tuple[str, ...]:
+    """Normalize Rushmore mode specification to canonical tuple.
+    Args:
+        modes: User input - None, single mode string, or iterable of modes.
+        default: Default mode when input is None or empty.
+    Returns:
+        Tuple of unique canonical mode names in insertion order.
+    """
+    if modes is None:
+        candidates: Sequence[str] = (default,)
+    elif isinstance(modes, str):
+        candidates = (modes,)
+    else:
+        collected = tuple(modes)
+        candidates = collected if collected else (default,)
+    resolved: list[str] = []
+    seen: set[str] = set()
+    for candidate in candidates:
+        for mode in normalize_rushmore_mode_item(candidate):
+            if mode not in seen:
+                seen.add(mode)
+                resolved.append(mode)
+    if not resolved:
+        return (default,)
+    return tuple(resolved)
+@dataclass(frozen=True)
+class RushmoreRateConfig:
+    """Resolved rate configuration for a single Rushmore mode."""
+    mode: str
+    rate: float
+    is_default: bool = False
+def resolve_rushmore_mode_rate(
+    *,
+    mode: str,
+    global_rate: float | None,
+    specific_rate: float | None,
+    default_rates: Mapping[str, float],
+    allow_default: bool,
+) -> float | None:
+    """Resolve the effective rate for a single Rushmore mode.
+    Args:
+        mode: The canonical mode name ("delete", "duplicate", "swap").
+        global_rate: User-provided global rate, or None.
+        specific_rate: User-provided mode-specific rate, or None.
+        default_rates: Mapping of mode names to default rates.
+        allow_default: Whether to fall back to defaults when no rate provided.
+    Returns:
+        The resolved rate, or None if no rate available and defaults disallowed.
+    """
+    baseline = specific_rate if specific_rate is not None else global_rate
+    if baseline is None:
+        if not allow_default:
+            return None
+        baseline = default_rates.get(mode)
+        if baseline is None:
+            return None
+    value = float(baseline)
+    value = max(0.0, value)
+    if mode == "swap":
+        value = min(1.0, value)
+    return value
+# ---------------------------------------------------------------------------
+# Keyboard Layout Validation
+# ---------------------------------------------------------------------------
+T = TypeVar("T")
+def validate_keyboard_layout(
+    keyboard: str,
+    layouts: object,
+    *,
+    context: str = "keyboard layout",
+) -> Mapping[str, Sequence[str]]:
+    """Validate that a keyboard layout name exists and return its mapping.
+    Args:
+        keyboard: The layout name to look up.
+        layouts: Object with layout names as attributes (e.g., KEYNEIGHBORS).
+        context: Description for error messages.
+    Returns:
+        The keyboard neighbor mapping.
+    Raises:
+        RuntimeError: If the layout name is not found.
+    """
+    layout = getattr(layouts, keyboard, None)
+    if layout is None:
+        raise RuntimeError(f"Unknown {context} '{keyboard}'")
+    return cast(Mapping[str, Sequence[str]], layout)
+def get_keyboard_layout_or_default(
+    keyboard: str,
+    layouts: object,
+    *,
+    default: Mapping[str, Sequence[str]] | None = None,
+) -> Mapping[str, Sequence[str]] | None:
+    """Look up a keyboard layout, returning None or default if not found.
+    Args:
+        keyboard: The layout name to look up.
+        layouts: Object with layout names as attributes.
+        default: Value to return if layout not found.
+    Returns:
+        The keyboard neighbor mapping, or default if not found.
+    """
+    layout = getattr(layouts, keyboard, None)
+    if layout is None:
+        return default
+    return cast(Mapping[str, Sequence[str]], layout)
+# ---------------------------------------------------------------------------
+# Zeedub Validation
+# ---------------------------------------------------------------------------
+def normalize_zero_width_palette(
+    characters: Sequence[str] | None,
+    default: tuple[str, ...],
+) -> tuple[str, ...]:
+    """Normalize zero-width character palette, filtering empty entries.
+    Args:
+        characters: User-provided character sequence, or None for default.
+        default: Default character palette.
+    Returns:
+        Tuple of non-empty characters.
+    """
+    palette: Sequence[str] = tuple(characters) if characters is not None else default
+    return tuple(char for char in palette if char)
+# ---------------------------------------------------------------------------
+# Redactyl Validation
+# ---------------------------------------------------------------------------
+def normalize_replacement_char(
+    replacement_char: str | None,
+    default: str,
+) -> str:
+    """Normalize redaction replacement character.
+    Args:
+        replacement_char: User-provided character, or None for default.
+        default: Default replacement character.
+    Returns:
+        The replacement character as a string.
+    """
+    return default if replacement_char is None else str(replacement_char)
+# ---------------------------------------------------------------------------
+# Boolean Flag Helpers
+# ---------------------------------------------------------------------------
+def resolve_bool_flag(
+    specific: bool | None,
+    global_default: bool,
+) -> bool:
+    """Resolve a boolean flag with specific/global precedence.
+    Args:
+        specific: Specific override value, or None to use global.
+        global_default: Global default when specific is None.
+    Returns:
+        The resolved boolean flag.
+    """
+    return bool(specific if specific is not None else global_default)
+# ---------------------------------------------------------------------------
+# Collection Helpers
+# ---------------------------------------------------------------------------
+def normalize_string_collection(
+    value: str | Collection[str] | None,
+) -> tuple[str, ...] | None:
+    """Normalize a string or collection of strings to a tuple.
+    Args:
+        value: Single string, collection of strings, or None.
+    Returns:
+        Tuple of strings, or None if input is None.
+    """
+    if value is None:
+        return None
+    if isinstance(value, str):
+        return (value,)
+    return tuple(value)
+__all__ = [
+    # Rate validation
+    "clamp_rate",
+    "clamp_rate_unit",
+    "resolve_rate",
+    # Mim1c
+    "normalise_mim1c_classes",
+    "normalise_mim1c_banned",
+    # Ekkokin
+    "normalise_homophone_group",
+    "build_homophone_lookup",
+    # Rushmore
+    "normalize_rushmore_mode_item",
+    "normalize_rushmore_modes",
+    "resolve_rushmore_mode_rate",
+    "RushmoreRateConfig",
+    # Keyboard
+    "validate_keyboard_layout",
+    "get_keyboard_layout_or_default",
+    # Zeedub
+    "normalize_zero_width_palette",
+    # Redactyl
+    "normalize_replacement_char",
+    # Flags and helpers
+    "resolve_bool_flag",
+    "normalize_string_collection",
+]

glitchlings/zoo/zeedub.py CHANGED Viewed

@@ -1,71 +1,18 @@
 from __future__ import annotations
-import math
 import random
 from collections.abc import Sequence
+from typing import cast
-from .core import Glitchling, AttackWave, AttackOrder
-from ._rate import resolve_rate
-try:
-    from glitchlings._zoo_rust import inject_zero_widths as _inject_zero_widths_rust
-except ImportError:  # pragma: no cover - compiled extension not present
-    _inject_zero_widths_rust = None
-_DEFAULT_ZERO_WIDTH_CHARACTERS: tuple[str, ...] = (
-    "\u200b",  # ZERO WIDTH SPACE
-    "\u200c",  # ZERO WIDTH NON-JOINER
-    "\u200d",  # ZERO WIDTH JOINER
-    "\ufeff",  # ZERO WIDTH NO-BREAK SPACE
-    "\u2060",  # WORD JOINER
+from glitchlings.constants import DEFAULT_ZEEDUB_RATE, ZEEDUB_DEFAULT_ZERO_WIDTHS
+from glitchlings.internal.rust_ffi import (
+    inject_zero_widths_rust,
+    resolve_seed,
 )
+from .core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
-def _python_insert_zero_widths(
-    text: str,
-    *,
-    rate: float,
-    rng: random.Random,
-    characters: Sequence[str],
-) -> str:
-    if not text:
-        return text
-    palette = [char for char in characters if char]
-    if not palette:
-        return text
-    positions = [
-        index + 1
-        for index in range(len(text) - 1)
-        if not text[index].isspace() and not text[index + 1].isspace()
-    ]
-    if not positions:
-        return text
-    total = len(positions)
-    clamped_rate = max(0.0, rate)
-    if clamped_rate <= 0.0:
-        return text
-    target = clamped_rate * total
-    count = math.floor(target)
-    remainder = target - count
-    if remainder > 0.0 and rng.random() < remainder:
-        count += 1
-    count = min(total, count)
-    if count <= 0:
-        return text
-    chosen = rng.sample(positions, count)
-    chosen.sort()
-    chars = list(text)
-    for position in reversed(chosen):
-        chars.insert(position, rng.choice(palette))
-    return "".join(chars)
+_DEFAULT_ZERO_WIDTH_CHARACTERS: tuple[str, ...] = ZEEDUB_DEFAULT_ZERO_WIDTHS
 def insert_zero_widths(
@@ -77,19 +24,10 @@ def insert_zero_widths(
     characters: Sequence[str] | None = None,
 ) -> str:
     """Inject zero-width characters between non-space character pairs."""
-    effective_rate = resolve_rate(
-        rate=rate,
-        legacy_value=None,
-        default=0.02,
-        legacy_name="rate",
-    )
-    if rng is None:
-        rng = random.Random(seed)
+    effective_rate = DEFAULT_ZEEDUB_RATE if rate is None else rate
     palette: Sequence[str] = (
-        tuple(characters) if characters is not None else _DEFAULT_ZERO_WIDTH_CHARACTERS
+        tuple(characters) if characters is not None else ZEEDUB_DEFAULT_ZERO_WIDTHS
     )
     cleaned_palette = tuple(char for char in palette if char)
@@ -100,20 +38,15 @@ def insert_zero_widths(
     if clamped_rate == 0.0:
         return text
-    if _inject_zero_widths_rust is not None:
-        return _inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng)
-    return _python_insert_zero_widths(
-        text,
-        rate=clamped_rate,
-        rng=rng,
-        characters=cleaned_palette,
-    )
+    seed_value = resolve_seed(seed, rng)
+    return inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), seed_value)
 class Zeedub(Glitchling):
     """Glitchling that plants zero-width glyphs inside words."""
+    flavor = "I'm invoking my right to remain silent."
     def __init__(
         self,
         *,
@@ -121,12 +54,7 @@ class Zeedub(Glitchling):
         seed: int | None = None,
         characters: Sequence[str] | None = None,
     ) -> None:
-        effective_rate = resolve_rate(
-            rate=rate,
-            legacy_value=None,
-            default=0.02,
-            legacy_name="rate",
-        )
+        effective_rate = DEFAULT_ZEEDUB_RATE if rate is None else rate
         super().__init__(
             name="Zeedub",
             corruption_function=insert_zero_widths,
@@ -137,6 +65,25 @@ class Zeedub(Glitchling):
             characters=tuple(characters) if characters is not None else None,
         )
+    def pipeline_operation(self) -> PipelineOperationPayload:
+        rate = float(self.kwargs.get("rate", DEFAULT_ZEEDUB_RATE))
+        raw_characters = self.kwargs.get("characters")
+        palette = (
+            tuple(ZEEDUB_DEFAULT_ZERO_WIDTHS)
+            if raw_characters is None
+            else tuple(str(char) for char in raw_characters if char)
+        )
+        return cast(
+            PipelineOperationPayload,
+            {
+                "type": "zwj",
+                "rate": rate,
+                "characters": list(palette),
+            },
+        )
 zeedub = Zeedub()