PyPI - glitchlings - Versions diffs - 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl - Mend

glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

glitchlings/__init__.py +36 -17
glitchlings/__main__.py +0 -1
glitchlings/_zoo_rust/__init__.py +12 -0
glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
glitchlings/assets/__init__.py +180 -0
glitchlings/assets/apostrofae_pairs.json +32 -0
glitchlings/assets/ekkokin_homophones.json +2014 -0
glitchlings/assets/hokey_assets.json +193 -0
glitchlings/assets/lexemes/academic.json +1049 -0
glitchlings/assets/lexemes/colors.json +1333 -0
glitchlings/assets/lexemes/corporate.json +716 -0
glitchlings/assets/lexemes/cyberpunk.json +22 -0
glitchlings/assets/lexemes/lovecraftian.json +23 -0
glitchlings/assets/lexemes/synonyms.json +3354 -0
glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
glitchlings/assets/pipeline_assets.json +29 -0
glitchlings/attack/__init__.py +53 -0
glitchlings/attack/compose.py +299 -0
glitchlings/attack/core.py +465 -0
glitchlings/attack/encode.py +114 -0
glitchlings/attack/metrics.py +104 -0
glitchlings/attack/metrics_dispatch.py +70 -0
glitchlings/attack/tokenization.py +157 -0
glitchlings/auggie.py +283 -0
glitchlings/compat/__init__.py +9 -0
glitchlings/compat/loaders.py +355 -0
glitchlings/compat/types.py +41 -0
glitchlings/conf/__init__.py +41 -0
glitchlings/conf/loaders.py +331 -0
glitchlings/conf/schema.py +156 -0
glitchlings/conf/types.py +72 -0
glitchlings/config.toml +2 -0
glitchlings/constants.py +59 -0
glitchlings/dev/__init__.py +3 -0
glitchlings/dev/docs.py +45 -0
glitchlings/dlc/__init__.py +17 -3
glitchlings/dlc/_shared.py +296 -0
glitchlings/dlc/gutenberg.py +400 -0
glitchlings/dlc/huggingface.py +37 -65
glitchlings/dlc/prime.py +55 -114
glitchlings/dlc/pytorch.py +98 -0
glitchlings/dlc/pytorch_lightning.py +173 -0
glitchlings/internal/__init__.py +16 -0
glitchlings/internal/rust.py +159 -0
glitchlings/internal/rust_ffi.py +432 -0
glitchlings/main.py +123 -32
glitchlings/runtime_config.py +24 -0
glitchlings/util/__init__.py +29 -176
glitchlings/util/adapters.py +65 -0
glitchlings/util/keyboards.py +311 -0
glitchlings/util/transcripts.py +108 -0
glitchlings/zoo/__init__.py +47 -24
glitchlings/zoo/assets/__init__.py +29 -0
glitchlings/zoo/core.py +301 -167
glitchlings/zoo/core_execution.py +98 -0
glitchlings/zoo/core_planning.py +451 -0
glitchlings/zoo/corrupt_dispatch.py +295 -0
glitchlings/zoo/ekkokin.py +118 -0
glitchlings/zoo/hokey.py +137 -0
glitchlings/zoo/jargoyle.py +179 -274
glitchlings/zoo/mim1c.py +106 -68
glitchlings/zoo/pedant/__init__.py +107 -0
glitchlings/zoo/pedant/core.py +105 -0
glitchlings/zoo/pedant/forms.py +74 -0
glitchlings/zoo/pedant/stones.py +74 -0
glitchlings/zoo/redactyl.py +44 -175
glitchlings/zoo/rng.py +259 -0
glitchlings/zoo/rushmore.py +359 -116
glitchlings/zoo/scannequin.py +18 -125
glitchlings/zoo/transforms.py +386 -0
glitchlings/zoo/typogre.py +76 -162
glitchlings/zoo/validation.py +477 -0
glitchlings/zoo/zeedub.py +33 -86
glitchlings-0.9.3.dist-info/METADATA +334 -0
glitchlings-0.9.3.dist-info/RECORD +80 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
glitchlings/zoo/_ocr_confusions.py +0 -34
glitchlings/zoo/_rate.py +0 -21
glitchlings/zoo/reduple.py +0 -169
glitchlings-0.2.5.dist-info/METADATA +0 -490
glitchlings-0.2.5.dist-info/RECORD +0 -27
/glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0

glitchlings/zoo/rushmore.py CHANGED Viewed

@@ -1,171 +1,414 @@
-import math
+from __future__ import annotations
 import random
 import re
+from collections.abc import Iterable, Sequence
+from dataclasses import dataclass
+from enum import Enum, unique
 from typing import Any
-from .core import Glitchling, AttackWave
-from ._rate import resolve_rate
-try:
-    from glitchlings._zoo_rust import delete_random_words as _delete_random_words_rust
-except ImportError:  # pragma: no cover - compiled extension not present
-    _delete_random_words_rust = None
-def _python_delete_random_words(
-    text: str,
+from glitchlings.constants import RUSHMORE_DEFAULT_RATES
+from glitchlings.internal.rust_ffi import (
+    delete_random_words_rust,
+    reduplicate_words_rust,
+    resolve_seed,
+    swap_adjacent_words_rust,
+)
+from .core import AttackWave, Glitchling
+from .transforms import WordToken
+@unique
+class RushmoreMode(Enum):
+    """Enumerates Rushmore's selectable attack behaviours."""
+    DELETE = "delete"
+    DUPLICATE = "duplicate"
+    SWAP = "swap"
+    @classmethod
+    def execution_order(cls) -> tuple["RushmoreMode", ...]:
+        """Return the deterministic application order for Rushmore modes."""
+        return (cls.DELETE, cls.DUPLICATE, cls.SWAP)
+_MODE_ALIASES: dict[str, RushmoreMode] = {
+    "delete": RushmoreMode.DELETE,
+    "drop": RushmoreMode.DELETE,
+    "rushmore": RushmoreMode.DELETE,
+    "duplicate": RushmoreMode.DUPLICATE,
+    "reduplicate": RushmoreMode.DUPLICATE,
+    "repeat": RushmoreMode.DUPLICATE,
+    "swap": RushmoreMode.SWAP,
+    "adjacent": RushmoreMode.SWAP,
+}
+@dataclass(frozen=True)
+class RushmoreRuntimeConfig:
+    """Resolved Rushmore configuration used by both Python and Rust paths."""
+    modes: tuple[RushmoreMode, ...]
+    rates: dict[RushmoreMode, float]
+    delete_unweighted: bool
+    duplicate_unweighted: bool
+    def has_mode(self, mode: RushmoreMode) -> bool:
+        return mode in self.rates
+    def to_pipeline_descriptor(self) -> dict[str, Any]:
+        if not self.modes:
+            raise RuntimeError("Rushmore configuration is missing attack modes")
+        if len(self.modes) == 1:
+            mode = self.modes[0]
+            rate = self.rates.get(mode)
+            if rate is None:
+                message = f"Rushmore mode {mode!r} is missing a configured rate"
+                raise RuntimeError(message)
+            if mode is RushmoreMode.DELETE:
+                return {
+                    "type": "delete",
+                    "rate": rate,
+                    "unweighted": self.delete_unweighted,
+                }
+            if mode is RushmoreMode.DUPLICATE:
+                return {
+                    "type": "reduplicate",
+                    "rate": rate,
+                    "unweighted": self.duplicate_unweighted,
+                }
+            if mode is RushmoreMode.SWAP:
+                return {
+                    "type": "swap_adjacent",
+                    "rate": rate,
+                }
+            message = f"Rushmore mode {mode!r} is not serialisable"
+            raise RuntimeError(message)
+        descriptor: dict[str, Any] = {
+            "type": "rushmore_combo",
+            "modes": [mode.value for mode in self.modes],
+        }
+        if self.has_mode(RushmoreMode.DELETE):
+            descriptor["delete"] = {
+                "rate": self.rates[RushmoreMode.DELETE],
+                "unweighted": self.delete_unweighted,
+            }
+        if self.has_mode(RushmoreMode.DUPLICATE):
+            descriptor["duplicate"] = {
+                "rate": self.rates[RushmoreMode.DUPLICATE],
+                "unweighted": self.duplicate_unweighted,
+            }
+        if self.has_mode(RushmoreMode.SWAP):
+            descriptor["swap"] = {"rate": self.rates[RushmoreMode.SWAP]}
+        return descriptor
+@dataclass(frozen=True)
+class _WeightedWordToken:
+    """Internal helper that bundles weighting metadata with a token."""
+    token: WordToken
+    weight: float
+def _normalize_mode_item(value: RushmoreMode | str) -> list[RushmoreMode]:
+    if isinstance(value, RushmoreMode):
+        return [value]
+    text = str(value).strip().lower()
+    if not text:
+        return []
+    if text in {"all", "any", "full"}:
+        return list(RushmoreMode.execution_order())
+    tokens = [token for token in re.split(r"[+,\s]+", text) if token]
+    if not tokens:
+        return []
+    modes: list[RushmoreMode] = []
+    for token in tokens:
+        mode = _MODE_ALIASES.get(token)
+        if mode is None:
+            raise ValueError(f"Unsupported Rushmore mode '{value}'")
+        modes.append(mode)
+    return modes
+def _normalize_modes(
+    modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None,
+) -> tuple[RushmoreMode, ...]:
+    if modes is None:
+        candidates: Sequence[RushmoreMode | str] = (RushmoreMode.DELETE,)
+    elif isinstance(modes, (RushmoreMode, str)):
+        candidates = (modes,)
+    else:
+        collected = tuple(modes)
+        candidates = collected if collected else (RushmoreMode.DELETE,)
+    resolved: list[RushmoreMode] = []
+    seen: set[RushmoreMode] = set()
+    for candidate in candidates:
+        for mode in _normalize_mode_item(candidate):
+            if mode not in seen:
+                seen.add(mode)
+                resolved.append(mode)
+    if not resolved:
+        return (RushmoreMode.DELETE,)
+    return tuple(resolved)
+def _resolve_mode_rate(
     *,
-    rate: float,
-    rng: random.Random,
-    unweighted: bool = False,
-) -> str:
-    """Delete random words from the input text while preserving whitespace."""
+    mode: RushmoreMode,
+    global_rate: float | None,
+    specific_rate: float | None,
+    allow_default: bool,
+) -> float | None:
+    baseline = specific_rate if specific_rate is not None else global_rate
+    if baseline is None:
+        if not allow_default:
+            return None
+        baseline = RUSHMORE_DEFAULT_RATES[mode.value]
-    effective_rate = max(rate, 0.0)
-    if effective_rate <= 0.0:
-        return text
+    value = float(baseline)
+    value = max(0.0, value)
+    if mode is RushmoreMode.SWAP:
+        value = min(1.0, value)
+    return value
-    tokens = re.split(r"(\s+)", text)  # Split but keep separators for later rejoin
-    candidate_data: list[tuple[int, float]] = []
-    for i in range(2, len(tokens), 2):  # Every other token is a word, skip the first word
-        word = tokens[i]
-        if not word or word.isspace():
-            continue
+def _resolve_rushmore_config(
+    *,
+    modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None,
+    rate: float | None,
+    delete_rate: float | None,
+    duplicate_rate: float | None,
+    swap_rate: float | None,
+    unweighted: bool,
+    delete_unweighted: bool | None,
+    duplicate_unweighted: bool | None,
+    allow_defaults: bool,
+) -> RushmoreRuntimeConfig | None:
+    normalized_modes = _normalize_modes(modes)
+    global_rate = float(rate) if rate is not None else None
+    mode_specific_rates: dict[RushmoreMode, float | None] = {
+        RushmoreMode.DELETE: delete_rate,
+        RushmoreMode.DUPLICATE: duplicate_rate,
+        RushmoreMode.SWAP: swap_rate,
+    }
+    rates: dict[RushmoreMode, float] = {}
+    for mode in normalized_modes:
+        resolved = _resolve_mode_rate(
+            mode=mode,
+            global_rate=global_rate,
+            specific_rate=mode_specific_rates[mode],
+            allow_default=allow_defaults,
+        )
+        if resolved is None:
+            return None
+        rates[mode] = resolved
-        match = re.match(r"^(\W*)(.*?)(\W*)$", word)
-        core = match.group(2) if match else word
-        core_length = len(core) if core else len(word)
-        if core_length <= 0:
-            core_length = len(word.strip()) or len(word)
-        if core_length <= 0:
-            core_length = 1
-        weight = 1.0 if unweighted else 1.0 / core_length
-        candidate_data.append((i, weight))
-    if not candidate_data:
-        return text
+    delete_flag = bool(delete_unweighted if delete_unweighted is not None else unweighted)
+    duplicate_flag = bool(duplicate_unweighted if duplicate_unweighted is not None else unweighted)
-    allowed_deletions = min(
-        len(candidate_data), math.floor(len(candidate_data) * effective_rate)
+    return RushmoreRuntimeConfig(
+        modes=normalized_modes,
+        rates=rates,
+        delete_unweighted=delete_flag,
+        duplicate_unweighted=duplicate_flag,
     )
-    if allowed_deletions <= 0:
-        return text
-    mean_weight = sum(weight for _, weight in candidate_data) / len(candidate_data)
-    deletions = 0
-    for index, weight in candidate_data:
-        if deletions >= allowed_deletions:
-            break
+def delete_random_words(
+    text: str,
+    rate: float | None = None,
+    seed: int | None = None,
+    rng: random.Random | None = None,
+    unweighted: bool = False,
+) -> str:
+    """Delete random words from the input text."""
+    effective_rate = RUSHMORE_DEFAULT_RATES["delete"] if rate is None else rate
-        if effective_rate >= 1.0:
-            probability = 1.0
-        else:
-            if mean_weight <= 0.0:
-                probability = effective_rate
-            else:
-                probability = min(1.0, effective_rate * (weight / mean_weight))
-        if rng.random() >= probability:
-            continue
+    clamped_rate = max(0.0, effective_rate)
+    unweighted_flag = bool(unweighted)
-        word = tokens[index]
-        match = re.match(r"^(\W*)(.*?)(\W*)$", word)
-        if match:
-            prefix, _, suffix = match.groups()
-            tokens[index] = f"{prefix.strip()}{suffix.strip()}"
-        else:
-            tokens[index] = ""
+    seed_value = resolve_seed(seed, rng)
+    return delete_random_words_rust(text, clamped_rate, unweighted_flag, seed_value)
-        deletions += 1
-    text = "".join(tokens)
-    text = re.sub(r"\s+([.,;:])", r"\1", text)
-    text = re.sub(r"\s{2,}", " ", text).strip()
+def reduplicate_words(
+    text: str,
+    rate: float | None = None,
+    seed: int | None = None,
+    rng: random.Random | None = None,
+    *,
+    unweighted: bool = False,
+) -> str:
+    """Randomly reduplicate words in the text."""
+    effective_rate = RUSHMORE_DEFAULT_RATES["duplicate"] if rate is None else rate
+    clamped_rate = max(0.0, effective_rate)
+    unweighted_flag = bool(unweighted)
-    return text
+    seed_value = resolve_seed(seed, rng)
+    return reduplicate_words_rust(text, clamped_rate, unweighted_flag, seed_value)
-def delete_random_words(
+def swap_adjacent_words(
     text: str,
     rate: float | None = None,
     seed: int | None = None,
     rng: random.Random | None = None,
-    *,
-    max_deletion_rate: float | None = None,
-    unweighted: bool = False,
 ) -> str:
-    """Delete random words from the input text.
+    """Swap adjacent word cores while preserving spacing and punctuation."""
+    effective_rate = RUSHMORE_DEFAULT_RATES["swap"] if rate is None else rate
+    clamped_rate = max(0.0, min(effective_rate, 1.0))
+    seed_value = resolve_seed(seed, rng)
+    return swap_adjacent_words_rust(text, clamped_rate, seed_value)
-    Uses the optional Rust implementation when available.
-    """
-    effective_rate = resolve_rate(
+def rushmore_attack(
+    text: str,
+    *,
+    modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None = None,
+    rate: float | None = None,
+    delete_rate: float | None = None,
+    duplicate_rate: float | None = None,
+    swap_rate: float | None = None,
+    unweighted: bool = False,
+    delete_unweighted: bool | None = None,
+    duplicate_unweighted: bool | None = None,
+    seed: int | None = None,
+    rng: random.Random | None = None,
+) -> str:
+    """Apply the configured Rushmore attack modes to ``text``."""
+    config = _resolve_rushmore_config(
+        modes=modes,
         rate=rate,
-        legacy_value=max_deletion_rate,
-        default=0.01,
-        legacy_name="max_deletion_rate",
+        delete_rate=delete_rate,
+        duplicate_rate=duplicate_rate,
+        swap_rate=swap_rate,
+        unweighted=unweighted,
+        delete_unweighted=delete_unweighted,
+        duplicate_unweighted=duplicate_unweighted,
+        allow_defaults=True,
     )
+    if config is None:
+        return text
-    if rng is None:
-        rng = random.Random(seed)
+    mode_rng = rng
+    if mode_rng is None and seed is not None:
+        mode_rng = random.Random(resolve_seed(seed, None))
-    clamped_rate = max(0.0, effective_rate)
-    unweighted_flag = bool(unweighted)
+    result = text
+    for mode in config.modes:
+        if not config.has_mode(mode):
+            continue
-    if _delete_random_words_rust is not None:
-        return _delete_random_words_rust(text, clamped_rate, unweighted_flag, rng)
+        rate_value = config.rates[mode]
+        if rate_value <= 0.0:
+            continue
-    return _python_delete_random_words(
-        text,
-        rate=clamped_rate,
-        rng=rng,
-        unweighted=unweighted_flag,
+        if mode is RushmoreMode.DELETE:
+            result = delete_random_words(
+                result,
+                rate=rate_value,
+                rng=mode_rng,
+                unweighted=config.delete_unweighted,
+            )
+        elif mode is RushmoreMode.DUPLICATE:
+            result = reduplicate_words(
+                result,
+                rate=rate_value,
+                rng=mode_rng,
+                unweighted=config.duplicate_unweighted,
+            )
+        else:
+            result = swap_adjacent_words(
+                result,
+                rate=rate_value,
+                rng=mode_rng,
+            )
+    return result
+def _rushmore_pipeline_descriptor(glitchling: Glitchling) -> dict[str, Any] | None:
+    config = _resolve_rushmore_config(
+        modes=glitchling.kwargs.get("modes"),
+        rate=glitchling.kwargs.get("rate"),
+        delete_rate=glitchling.kwargs.get("delete_rate"),
+        duplicate_rate=glitchling.kwargs.get("duplicate_rate"),
+        swap_rate=glitchling.kwargs.get("swap_rate"),
+        unweighted=glitchling.kwargs.get("unweighted", False),
+        delete_unweighted=glitchling.kwargs.get("delete_unweighted"),
+        duplicate_unweighted=glitchling.kwargs.get("duplicate_unweighted"),
+        allow_defaults=True,
     )
+    if config is None:
+        return None
+    return config.to_pipeline_descriptor()
 class Rushmore(Glitchling):
-    """Glitchling that deletes words to simulate missing information."""
+    """Glitchling that bundles deletion, duplication, and swap attacks."""
+    flavor = (
+        "You shouldn't have waited for the last minute to write that paper, anon. "
+        "Sure hope everything is in the right place."
+    )
+    _param_aliases = {"mode": "modes"}
     def __init__(
         self,
         *,
+        name: str = "Rushmore",
+        modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None = None,
         rate: float | None = None,
-        max_deletion_rate: float | None = None,
+        delete_rate: float | None = None,
+        duplicate_rate: float | None = None,
+        swap_rate: float | None = None,
         seed: int | None = None,
         unweighted: bool = False,
+        delete_unweighted: bool | None = None,
+        duplicate_unweighted: bool | None = None,
     ) -> None:
-        self._param_aliases = {"max_deletion_rate": "rate"}
-        effective_rate = resolve_rate(
-            rate=rate,
-            legacy_value=max_deletion_rate,
-            default=0.01,
-            legacy_name="max_deletion_rate",
-        )
+        normalized_modes = _normalize_modes(modes)
         super().__init__(
-            name="Rushmore",
-            corruption_function=delete_random_words,
+            name=name,
+            corruption_function=rushmore_attack,
             scope=AttackWave.WORD,
             seed=seed,
-            rate=effective_rate,
+            pipeline_operation=_rushmore_pipeline_descriptor,
+            modes=normalized_modes,
+            rate=rate,
+            delete_rate=delete_rate,
+            duplicate_rate=duplicate_rate,
+            swap_rate=swap_rate,
             unweighted=unweighted,
+            delete_unweighted=delete_unweighted,
+            duplicate_unweighted=duplicate_unweighted,
         )
-    def pipeline_operation(self) -> dict[str, Any] | None:
-        rate = self.kwargs.get("rate")
-        if rate is None:
-            rate = self.kwargs.get("max_deletion_rate")
-        if rate is None:
-            return None
-        unweighted = bool(self.kwargs.get("unweighted", False))
-        return {
-            "type": "delete",
-            "max_deletion_rate": float(rate),
-            "unweighted": unweighted,
-        }
 rushmore = Rushmore()
-__all__ = ["Rushmore", "rushmore"]
+__all__ = [
+    "Rushmore",
+    "rushmore",
+    "RushmoreMode",
+    "rushmore_attack",
+    "delete_random_words",
+    "reduplicate_words",
+    "swap_adjacent_words",
+]