PyPI - glitchlings - Versions diffs - 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl - Mend

glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

glitchlings/__init__.py +36 -17
glitchlings/__main__.py +0 -1
glitchlings/_zoo_rust/__init__.py +12 -0
glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
glitchlings/assets/__init__.py +180 -0
glitchlings/assets/apostrofae_pairs.json +32 -0
glitchlings/assets/ekkokin_homophones.json +2014 -0
glitchlings/assets/hokey_assets.json +193 -0
glitchlings/assets/lexemes/academic.json +1049 -0
glitchlings/assets/lexemes/colors.json +1333 -0
glitchlings/assets/lexemes/corporate.json +716 -0
glitchlings/assets/lexemes/cyberpunk.json +22 -0
glitchlings/assets/lexemes/lovecraftian.json +23 -0
glitchlings/assets/lexemes/synonyms.json +3354 -0
glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
glitchlings/assets/pipeline_assets.json +29 -0
glitchlings/attack/__init__.py +53 -0
glitchlings/attack/compose.py +299 -0
glitchlings/attack/core.py +465 -0
glitchlings/attack/encode.py +114 -0
glitchlings/attack/metrics.py +104 -0
glitchlings/attack/metrics_dispatch.py +70 -0
glitchlings/attack/tokenization.py +157 -0
glitchlings/auggie.py +283 -0
glitchlings/compat/__init__.py +9 -0
glitchlings/compat/loaders.py +355 -0
glitchlings/compat/types.py +41 -0
glitchlings/conf/__init__.py +41 -0
glitchlings/conf/loaders.py +331 -0
glitchlings/conf/schema.py +156 -0
glitchlings/conf/types.py +72 -0
glitchlings/config.toml +2 -0
glitchlings/constants.py +59 -0
glitchlings/dev/__init__.py +3 -0
glitchlings/dev/docs.py +45 -0
glitchlings/dlc/__init__.py +17 -3
glitchlings/dlc/_shared.py +296 -0
glitchlings/dlc/gutenberg.py +400 -0
glitchlings/dlc/huggingface.py +37 -65
glitchlings/dlc/prime.py +55 -114
glitchlings/dlc/pytorch.py +98 -0
glitchlings/dlc/pytorch_lightning.py +173 -0
glitchlings/internal/__init__.py +16 -0
glitchlings/internal/rust.py +159 -0
glitchlings/internal/rust_ffi.py +432 -0
glitchlings/main.py +123 -32
glitchlings/runtime_config.py +24 -0
glitchlings/util/__init__.py +29 -176
glitchlings/util/adapters.py +65 -0
glitchlings/util/keyboards.py +311 -0
glitchlings/util/transcripts.py +108 -0
glitchlings/zoo/__init__.py +47 -24
glitchlings/zoo/assets/__init__.py +29 -0
glitchlings/zoo/core.py +301 -167
glitchlings/zoo/core_execution.py +98 -0
glitchlings/zoo/core_planning.py +451 -0
glitchlings/zoo/corrupt_dispatch.py +295 -0
glitchlings/zoo/ekkokin.py +118 -0
glitchlings/zoo/hokey.py +137 -0
glitchlings/zoo/jargoyle.py +179 -274
glitchlings/zoo/mim1c.py +106 -68
glitchlings/zoo/pedant/__init__.py +107 -0
glitchlings/zoo/pedant/core.py +105 -0
glitchlings/zoo/pedant/forms.py +74 -0
glitchlings/zoo/pedant/stones.py +74 -0
glitchlings/zoo/redactyl.py +44 -175
glitchlings/zoo/rng.py +259 -0
glitchlings/zoo/rushmore.py +359 -116
glitchlings/zoo/scannequin.py +18 -125
glitchlings/zoo/transforms.py +386 -0
glitchlings/zoo/typogre.py +76 -162
glitchlings/zoo/validation.py +477 -0
glitchlings/zoo/zeedub.py +33 -86
glitchlings-0.9.3.dist-info/METADATA +334 -0
glitchlings-0.9.3.dist-info/RECORD +80 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
glitchlings/zoo/_ocr_confusions.py +0 -34
glitchlings/zoo/_rate.py +0 -21
glitchlings/zoo/reduple.py +0 -169
glitchlings-0.2.5.dist-info/METADATA +0 -490
glitchlings-0.2.5.dist-info/RECORD +0 -27
/glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0

glitchlings/zoo/redactyl.py CHANGED Viewed

@@ -1,198 +1,59 @@
-import re
 import random
-from typing import Any
+from typing import cast
-from .core import Glitchling, AttackWave
-from ._rate import resolve_rate
+from glitchlings.constants import DEFAULT_REDACTYL_CHAR, DEFAULT_REDACTYL_RATE
+from glitchlings.internal.rust_ffi import redact_words_rust, resolve_seed
-FULL_BLOCK = "█"
+from .core import AttackWave, Glitchling, PipelineOperationPayload
-try:
-    from glitchlings._zoo_rust import redact_words as _redact_words_rust
-except ImportError:  # pragma: no cover - compiled extension not present
-    _redact_words_rust = None
-def _weighted_sample_without_replacement(
-    population: list[int],
-    weights: list[float],
-    *,
-    k: int,
-    rng: random.Random,
-) -> list[int]:
-    """Select `k` unique indices according to the given weights."""
-    selections: list[int] = []
-    items = list(zip(population, weights))
-    if k <= 0 or not items:
-        return selections
-    if k > len(items):
-        raise ValueError("Sample larger than population or is negative")
-    for _ in range(k):
-        total_weight = sum(weight for _, weight in items)
-        if total_weight <= 0:
-            chosen_index = rng.randrange(len(items))
-        else:
-            threshold = rng.random() * total_weight
-            cumulative = 0.0
-            chosen_index = len(items) - 1
-            for idx, (_, weight) in enumerate(items):
-                cumulative += weight
-                if cumulative >= threshold:
-                    chosen_index = idx
-                    break
-        value, _ = items.pop(chosen_index)
-        selections.append(value)
-    return selections
-def _python_redact_words(
-    text: str,
-    *,
-    replacement_char: str,
-    rate: float,
-    merge_adjacent: bool,
-    rng: random.Random,
-    unweighted: bool = False,
-) -> str:
-    """Redact random words by replacing their characters.
-    Parameters
-    - text: Input text.
-    - replacement_char: The character to use for redaction (default FULL_BLOCK).
-    - rate: Max proportion of words to redact (default 0.05).
-    - merge_adjacent: If True, merges adjacent redactions across intervening non-word chars.
-    - rng: RNG used for sampling decisions.
-    - unweighted: When True, sample words uniformly instead of by length.
-    """
-    # Preserve exact spacing and punctuation by using regex
-    tokens = re.split(r"(\s+)", text)
-    word_indices = [i for i, token in enumerate(tokens) if i % 2 == 0 and token.strip()]
-    if not word_indices:
-        raise ValueError(
-            "Cannot redact words because the input text contains no redactable words."
-        )
-    weights: list[float] = []
-    for index in word_indices:
-        word = tokens[index]
-        match = re.match(r"^(\W*)(.*?)(\W*)$", word)
-        core = match.group(2) if match else word
-        core_length = len(core) if core else len(word)
-        if core_length <= 0:
-            core_length = len(word.strip()) or len(word)
-        if core_length <= 0:
-            core_length = 1
-        weights.append(1.0 if unweighted else float(core_length))
-    num_to_redact = max(1, int(len(word_indices) * rate))
-    if num_to_redact > len(word_indices):
-        raise ValueError("Sample larger than population or is negative")
-    indices_to_redact = _weighted_sample_without_replacement(
-        word_indices,
-        weights,
-        k=num_to_redact,
-        rng=rng,
-    )
-    indices_to_redact.sort()
-    for i in indices_to_redact:
-        if i >= len(tokens):
-            break
-        word = tokens[i]
-        if not word or word.isspace():  # Skip empty or whitespace
-            continue
-        # Check if word has trailing punctuation
-        match = re.match(r"^(\W*)(.*?)(\W*)$", word)
-        if match:
-            prefix, core, suffix = match.groups()
-            tokens[i] = f"{prefix}{replacement_char * len(core)}{suffix}"
-        else:
-            tokens[i] = f"{replacement_char * len(word)}"
-    text = "".join(tokens)
-    if merge_adjacent:
-        text = re.sub(
-            rf"{replacement_char}\W+{replacement_char}",
-            lambda m: replacement_char * (len(m.group(0)) - 1),
-            text,
-        )
-    return text
+# Backwards compatibility alias
+FULL_BLOCK = DEFAULT_REDACTYL_CHAR
 def redact_words(
     text: str,
-    replacement_char: str = FULL_BLOCK,
+    replacement_char: str | None = DEFAULT_REDACTYL_CHAR,
     rate: float | None = None,
-    merge_adjacent: bool = False,
+    merge_adjacent: bool | None = False,
     seed: int = 151,
     rng: random.Random | None = None,
     *,
-    redaction_rate: float | None = None,
     unweighted: bool = False,
 ) -> str:
     """Redact random words by replacing their characters."""
+    effective_rate = DEFAULT_REDACTYL_RATE if rate is None else rate
-    effective_rate = resolve_rate(
-        rate=rate,
-        legacy_value=redaction_rate,
-        default=0.025,
-        legacy_name="redaction_rate",
-    )
-    if rng is None:
-        rng = random.Random(seed)
+    replacement = DEFAULT_REDACTYL_CHAR if replacement_char is None else str(replacement_char)
+    merge = False if merge_adjacent is None else bool(merge_adjacent)
-    clamped_rate = max(0.0, effective_rate)
+    clamped_rate = max(0.0, min(effective_rate, 1.0))
     unweighted_flag = bool(unweighted)
-    use_rust = _redact_words_rust is not None and isinstance(merge_adjacent, bool)
-    if use_rust:
-        return _redact_words_rust(
-            text,
-            replacement_char,
-            clamped_rate,
-            merge_adjacent,
-            unweighted_flag,
-            rng,
-        )
-    return _python_redact_words(
+    return redact_words_rust(
         text,
-        replacement_char=replacement_char,
-        rate=clamped_rate,
-        merge_adjacent=merge_adjacent,
-        rng=rng,
-        unweighted=unweighted_flag,
+        replacement,
+        clamped_rate,
+        merge,
+        unweighted_flag,
+        resolve_seed(seed, rng),
     )
 class Redactyl(Glitchling):
     """Glitchling that redacts words with block characters."""
+    flavor = "Some things are better left ████████."
     def __init__(
         self,
         *,
-        replacement_char: str = FULL_BLOCK,
+        replacement_char: str = DEFAULT_REDACTYL_CHAR,
         rate: float | None = None,
-        redaction_rate: float | None = None,
         merge_adjacent: bool = False,
         seed: int = 151,
         unweighted: bool = False,
     ) -> None:
-        self._param_aliases = {"redaction_rate": "rate"}
-        effective_rate = resolve_rate(
-            rate=rate,
-            legacy_value=redaction_rate,
-            default=0.025,
-            legacy_name="redaction_rate",
-        )
+        effective_rate = DEFAULT_REDACTYL_RATE if rate is None else rate
         super().__init__(
             name="Redactyl",
             corruption_function=redact_words,
@@ -204,23 +65,31 @@ class Redactyl(Glitchling):
             unweighted=unweighted,
         )
-    def pipeline_operation(self) -> dict[str, Any] | None:
-        replacement_char = self.kwargs.get("replacement_char")
-        rate = self.kwargs.get("rate")
-        merge_adjacent = self.kwargs.get("merge_adjacent")
-        if replacement_char is None or rate is None or merge_adjacent is None:
-            return None
+    def pipeline_operation(self) -> PipelineOperationPayload:
+        replacement_char_value = self.kwargs.get("replacement_char", DEFAULT_REDACTYL_CHAR)
+        rate_value = self.kwargs.get("rate", DEFAULT_REDACTYL_RATE)
+        merge_value = self.kwargs.get("merge_adjacent", False)
+        replacement_char = str(
+            DEFAULT_REDACTYL_CHAR if replacement_char_value is None else replacement_char_value
+        )
+        rate = float(DEFAULT_REDACTYL_RATE if rate_value is None else rate_value)
+        merge_adjacent = bool(merge_value)
         unweighted = bool(self.kwargs.get("unweighted", False))
-        return {
-            "type": "redact",
-            "replacement_char": str(replacement_char),
-            "redaction_rate": float(rate),
-            "merge_adjacent": bool(merge_adjacent),
-            "unweighted": unweighted,
-        }
+        return cast(
+            PipelineOperationPayload,
+            {
+                "type": "redact",
+                "replacement_char": replacement_char,
+                "rate": rate,
+                "merge_adjacent": merge_adjacent,
+                "unweighted": unweighted,
+            },
+        )
 redactyl = Redactyl()
-__all__ = ["Redactyl", "redactyl"]
+__all__ = ["Redactyl", "redactyl", "redact_words"]

glitchlings/zoo/rng.py ADDED Viewed

@@ -0,0 +1,259 @@
+"""RNG boundary layer for seed resolution.
+This module provides the interface between RNG state and concrete random values.
+All randomness in the glitchlings library flows through these functions.
+Design Philosophy
+-----------------
+RNG management is an *impure* operation - it involves stateful objects
+(random.Random) and non-deterministic behavior when no seed is provided.
+This module provides the boundary layer that converts RNG state into
+concrete values that can be passed to pure functions.
+The pattern is:
+    1. User provides `seed: int | None` and/or `rng: random.Random | None`
+    2. Boundary layer resolves to a concrete `int` via `resolve_seed()`
+    3. Pure/Rust functions receive the concrete seed value
+This separation means:
+    - Pure transformation code never touches RNG objects
+    - Tests can provide explicit seed values for reproducibility
+    - RNG state management is isolated to the boundary
+See AGENTS.md "Functional Purity Architecture" for full details.
+"""
+from __future__ import annotations
+import random
+from hashlib import blake2s
+from typing import Protocol, runtime_checkable
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+# Bit width for seed values (64-bit for compatibility with Rust u64)
+SEED_BIT_WIDTH = 64
+SEED_MASK = (1 << SEED_BIT_WIDTH) - 1  # 0xFFFFFFFFFFFFFFFF
+# ---------------------------------------------------------------------------
+# Protocols
+# ---------------------------------------------------------------------------
+@runtime_checkable
+class RandomBitsSource(Protocol):
+    """Protocol for objects that can provide random bits."""
+    def getrandbits(self, k: int) -> int:
+        """Return a non-negative integer with k random bits."""
+        ...
+# ---------------------------------------------------------------------------
+# Core Boundary Functions
+# ---------------------------------------------------------------------------
+def resolve_seed(
+    seed: int | None,
+    rng: random.Random | None,
+) -> int:
+    """Resolve a seed from optional explicit seed or RNG state.
+    This is the primary boundary function for RNG resolution. Call this
+    once at the boundary layer, then pass the resulting int to all
+    downstream pure/Rust functions.
+    Args:
+        seed: Explicit seed value. If provided, takes precedence over rng.
+        rng: Random generator to sample from if seed is None.
+    Returns:
+        A 64-bit unsigned integer suitable for Rust FFI.
+    Note:
+        If both seed and rng are None, uses module-level random state.
+        This is non-deterministic and should only happen at top-level CLI usage.
+    Examples:
+        >>> resolve_seed(42, None)  # explicit seed
+        42
+        >>> rng = random.Random(123)
+        >>> resolve_seed(None, rng)  # sample from RNG
+        14522756016584210807
+    """
+    if seed is not None:
+        return int(seed) & SEED_MASK
+    if rng is not None:
+        return rng.getrandbits(SEED_BIT_WIDTH)
+    return random.getrandbits(SEED_BIT_WIDTH)
+def resolve_seed_deterministic(
+    seed: int | None,
+    rng: random.Random | None,
+) -> int:
+    """Resolve a seed, requiring explicit seed or RNG.
+    Like resolve_seed(), but raises ValueError if both seed and rng are None.
+    Use this when non-deterministic behavior would be a bug.
+    Args:
+        seed: Explicit seed value.
+        rng: Random generator to sample from.
+    Returns:
+        A 64-bit unsigned integer.
+    Raises:
+        ValueError: If both seed and rng are None.
+    """
+    if seed is not None:
+        return int(seed) & SEED_MASK
+    if rng is not None:
+        return rng.getrandbits(SEED_BIT_WIDTH)
+    raise ValueError("Either seed or rng must be provided for deterministic behavior")
+# ---------------------------------------------------------------------------
+# Seed Derivation (Deterministic)
+# ---------------------------------------------------------------------------
+def derive_seed(base_seed: int, *components: int | str) -> int:
+    """Derive a new seed from a base seed and components.
+    This is a pure function for hierarchical seed derivation.
+    Used by Gaggle to give each glitchling a unique but reproducible seed.
+    Uses blake2s for stable hashing across interpreter runs (unlike Python's
+    built-in hash() which is salted per-process). This ensures identical
+    inputs always produce identical seeds regardless of PYTHONHASHSEED.
+    Args:
+        base_seed: The parent seed.
+        *components: Additional components to mix in (integers or strings).
+    Returns:
+        A derived 64-bit seed.
+    Examples:
+        >>> derive_seed(12345, 0)  # first child
+        13704458811836263874
+        >>> derive_seed(12345, 1)  # second child
+        7874335407589182396
+        >>> derive_seed(12345, "typogre")  # named child
+        561509252352425601
+    """
+    # Use blake2s for stable, deterministic hashing across runs
+    hasher = blake2s(digest_size=8)
+    # Helper to convert int to bytes (handles arbitrary size)
+    def _int_to_bytes(value: int) -> bytes:
+        if value == 0:
+            return b"\x00"
+        abs_value = abs(value)
+        length = (abs_value.bit_length() + 7) // 8
+        if value < 0:
+            while True:
+                try:
+                    return value.to_bytes(length, "big", signed=True)
+                except OverflowError:
+                    length += 1
+        return abs_value.to_bytes(length, "big", signed=False)
+    hasher.update(_int_to_bytes(base_seed))
+    for component in components:
+        hasher.update(b"\x00")  # separator
+        if isinstance(component, str):
+            hasher.update(component.encode("utf-8"))
+        else:
+            hasher.update(_int_to_bytes(component))
+    return int.from_bytes(hasher.digest(), "big")
+# ---------------------------------------------------------------------------
+# Random Value Generation (Impure)
+# ---------------------------------------------------------------------------
+def create_rng(seed: int) -> random.Random:
+    """Create a new Random instance from a seed.
+    Use this when you need to create child RNG states for parallel operations.
+    Prefer passing concrete seed values to functions when possible.
+    Args:
+        seed: The seed for the new RNG.
+    Returns:
+        A new random.Random instance.
+    """
+    return random.Random(seed)
+def sample_random_float(rng: random.Random) -> float:
+    """Sample a random float in [0.0, 1.0) from an RNG.
+    Args:
+        rng: The random generator.
+    Returns:
+        Float in range [0.0, 1.0).
+    """
+    return rng.random()
+def sample_random_int(rng: random.Random, *, low: int, high: int) -> int:
+    """Sample a random integer in [low, high] inclusive.
+    Args:
+        rng: The random generator.
+        low: Minimum value (inclusive).
+        high: Maximum value (inclusive).
+    Returns:
+        Random integer in range [low, high].
+    """
+    return rng.randint(low, high)
+def sample_random_index(rng: random.Random, length: int) -> int:
+    """Sample a random index for a sequence of given length.
+    Args:
+        rng: The random generator.
+        length: The sequence length.
+    Returns:
+        Random index in range [0, length).
+    Raises:
+        ValueError: If length <= 0.
+    """
+    if length <= 0:
+        raise ValueError("Cannot sample index from empty sequence")
+    return rng.randrange(length)
+__all__ = [
+    # Constants
+    "SEED_BIT_WIDTH",
+    "SEED_MASK",
+    # Protocols
+    "RandomBitsSource",
+    # Boundary functions
+    "resolve_seed",
+    "resolve_seed_deterministic",
+    # Derivation
+    "derive_seed",
+    # RNG operations (impure)
+    "create_rng",
+    "sample_random_float",
+    "sample_random_int",
+    "sample_random_index",
+]