PyPI - glitchlings - Versions diffs - 0.4.1__cp311-cp311-macosx_11_0_universal2.whl → 0.4.2__cp311-cp311-macosx_11_0_universal2.whl - Mend

glitchlings 0.4.1__cp311-cp311-macosx_11_0_universal2.whl → 0.4.2__cp311-cp311-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of glitchlings might be problematic. Click here for more details.

Files changed (39) hide show

glitchlings/__init__.py +26 -17
glitchlings/__main__.py +0 -1
glitchlings/_zoo_rust.cpython-311-darwin.so +0 -0
glitchlings/compat.py +215 -0
glitchlings/config.py +136 -19
glitchlings/dlc/_shared.py +68 -0
glitchlings/dlc/huggingface.py +26 -41
glitchlings/dlc/prime.py +64 -101
glitchlings/lexicon/__init__.py +8 -19
glitchlings/lexicon/_cache.py +0 -7
glitchlings/lexicon/graph.py +4 -12
glitchlings/lexicon/metrics.py +1 -8
glitchlings/lexicon/vector.py +15 -34
glitchlings/lexicon/wordnet.py +31 -32
glitchlings/main.py +9 -13
glitchlings/util/__init__.py +18 -4
glitchlings/util/adapters.py +27 -0
glitchlings/zoo/__init__.py +21 -14
glitchlings/zoo/_ocr_confusions.py +1 -3
glitchlings/zoo/_rate.py +1 -4
glitchlings/zoo/_sampling.py +0 -1
glitchlings/zoo/_text_utils.py +1 -5
glitchlings/zoo/adjax.py +0 -2
glitchlings/zoo/core.py +114 -75
glitchlings/zoo/jargoyle.py +9 -14
glitchlings/zoo/mim1c.py +11 -10
glitchlings/zoo/redactyl.py +5 -8
glitchlings/zoo/reduple.py +3 -1
glitchlings/zoo/rushmore.py +2 -8
glitchlings/zoo/scannequin.py +5 -4
glitchlings/zoo/typogre.py +3 -7
glitchlings/zoo/zeedub.py +2 -2
{glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/METADATA +67 -3
glitchlings-0.4.2.dist-info/RECORD +42 -0
glitchlings-0.4.1.dist-info/RECORD +0 -39
{glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/WHEEL +0 -0
{glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/entry_points.txt +0 -0
{glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/licenses/LICENSE +0 -0
{glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/top_level.txt +0 -0

glitchlings/lexicon/vector.py CHANGED Viewed

@@ -6,17 +6,18 @@ import argparse
 import importlib
 import json
 import math
-from pathlib import Path
 import sys
+from pathlib import Path
 from typing import Any, Callable, Iterable, Iterator, Mapping, MutableMapping, Sequence
 from . import LexiconBackend
-from ._cache import CacheSnapshot, load_cache as _load_cache_file, write_cache as _write_cache_file
+from ._cache import CacheSnapshot
+from ._cache import load_cache as _load_cache_file
+from ._cache import write_cache as _write_cache_file
 def _cosine_similarity(vector_a: Sequence[float], vector_b: Sequence[float]) -> float:
     """Return the cosine similarity between two dense vectors."""
     dot_product = 0.0
     norm_a = 0.0
     norm_b = 0.0
@@ -144,7 +145,6 @@ class _SpaCyAdapter(_Adapter):
 def _load_json_vectors(path: Path) -> Mapping[str, Sequence[float]]:
     """Load embeddings from a JSON mapping of token to vector list."""
     with path.open("r", encoding="utf8") as handle:
         payload = json.load(handle)
@@ -164,11 +164,8 @@ def _load_json_vectors(path: Path) -> Mapping[str, Sequence[float]]:
 def _load_gensim_vectors(path: Path, *, binary: bool | None = None) -> Any:
     """Load ``gensim`` vectors from ``path``."""
     if importlib.util.find_spec("gensim") is None:
-        raise RuntimeError(
-            "The gensim package is required to load keyed vector embeddings."
-        )
+        raise RuntimeError("The gensim package is required to load keyed vector embeddings.")
     keyed_vectors_module = importlib.import_module("gensim.models.keyedvectors")
     if binary is None:
@@ -177,14 +174,11 @@ def _load_gensim_vectors(path: Path, *, binary: bool | None = None) -> Any:
     if path.suffix in {".kv", ".kv2"}:
         return keyed_vectors_module.KeyedVectors.load(str(path), mmap="r")
-    return keyed_vectors_module.KeyedVectors.load_word2vec_format(
-        str(path), binary=binary
-    )
+    return keyed_vectors_module.KeyedVectors.load_word2vec_format(str(path), binary=binary)
 def _load_spacy_language(model_name: str) -> Any:
     """Load a spaCy language pipeline by name."""
     if importlib.util.find_spec("spacy") is None:
         raise RuntimeError(
             "spaCy is required to use spaCy-backed vector lexicons; install the 'vectors' extra."
@@ -196,7 +190,6 @@ def _load_spacy_language(model_name: str) -> Any:
 def _resolve_source(source: Any | None) -> _Adapter | None:
     """Return an adapter instance for ``source`` if possible."""
     if source is None:
         return None
@@ -229,9 +222,7 @@ def _resolve_source(source: Any | None) -> _Adapter | None:
         if suffix in {".kv", ".kv2", ".bin", ".gz", ".txt", ".vec"}:
             binary_flag = False if suffix in {".txt", ".vec"} else None
-            return _GensimAdapter(
-                _load_gensim_vectors(resolved_path, binary=binary_flag)
-            )
+            return _GensimAdapter(_load_gensim_vectors(resolved_path, binary=binary_flag))
     if hasattr(source, "most_similar") and hasattr(source, "key_to_index"):
         return _GensimAdapter(source)
@@ -358,42 +349,33 @@ class VectorLexicon(LexiconBackend):
             self._cache_dirty = True
         return synonyms
-    def get_synonyms(
-        self, word: str, pos: str | None = None, n: int = 5
-    ) -> list[str]:
+    def get_synonyms(self, word: str, pos: str | None = None, n: int = 5) -> list[str]:
         normalized = self._normalize_for_lookup(word)
         synonyms = self._ensure_cached(original=word, normalized=normalized)
         return self._deterministic_sample(synonyms, limit=n, word=word, pos=pos)
     def precompute(self, word: str, *, limit: int | None = None) -> list[str]:
         """Populate the cache for ``word`` and return the stored synonyms."""
         normalized = self._normalize_for_lookup(word)
-        return list(
-            self._ensure_cached(original=word, normalized=normalized, limit=limit)
-        )
+        return list(self._ensure_cached(original=word, normalized=normalized, limit=limit))
     def iter_vocabulary(self) -> Iterator[str]:
         """Yield vocabulary tokens from the underlying embedding source."""
         if self._adapter is None:
             return iter(())
         return self._adapter.iter_keys()
     def export_cache(self) -> dict[str, list[str]]:
         """Return a copy of the in-memory synonym cache."""
         return {key: list(values) for key, values in self._cache.items()}
     @classmethod
     def load_cache(cls, path: str | Path) -> CacheSnapshot:
         """Load and validate a cache file for reuse."""
         return _load_cache_file(Path(path))
     def save_cache(self, path: str | Path | None = None) -> Path:
         """Persist the current cache to disk, returning the path used."""
         if path is None:
             if self._cache_path is None:
                 raise RuntimeError("No cache path supplied to VectorLexicon.")
@@ -430,7 +412,6 @@ def build_vector_cache(
     normalizer: Callable[[str], str] | None = None,
 ) -> Path:
     """Generate a synonym cache for ``words`` using ``source`` embeddings."""
     lexicon = VectorLexicon(
         source=source,
         max_neighbors=max_neighbors,
@@ -448,7 +429,6 @@ def build_vector_cache(
 def load_vector_source(spec: str) -> Any:
     """Resolve ``spec`` strings for the cache-building CLI."""
     if spec.startswith("spacy:"):
         model_name = spec.split(":", 1)[1]
         return _load_spacy_language(model_name)
@@ -538,7 +518,6 @@ def _iter_tokens_from_file(path: Path) -> Iterator[str]:
 def main(argv: Sequence[str] | None = None) -> int:
     """Entry-point for ``python -m glitchlings.lexicon.vector``."""
     args = _parse_cli(argv)
     if args.output.exists() and not args.overwrite:
@@ -547,11 +526,13 @@ def main(argv: Sequence[str] | None = None) -> int:
         )
     if args.normalizer == "lower":
-        normalizer: Callable[[str], str] | None = (
-            None if args.case_sensitive else str.lower
-        )
+        normalizer: Callable[[str], str] | None = None if args.case_sensitive else str.lower
     else:
-        normalizer = lambda value: value
+        def _identity(value: str) -> str:
+            return value
+        normalizer = _identity
     source = load_vector_source(args.source)
     if args.tokens is not None:

glitchlings/lexicon/wordnet.py CHANGED Viewed

@@ -2,41 +2,50 @@
 from __future__ import annotations
+from importlib import import_module
+from pathlib import Path
 from typing import TYPE_CHECKING, Any
-try:  # pragma: no cover - exercised when NLTK is available
-    import nltk  # type: ignore[import]
-except ModuleNotFoundError as exc:  # pragma: no cover - triggered when NLTK missing
-    nltk = None  # type: ignore[assignment]
-    find = None  # type: ignore[assignment]
-    _NLTK_IMPORT_ERROR = exc
-else:  # pragma: no cover - executed when NLTK is present
-    from nltk.corpus.reader import WordNetCorpusReader as _WordNetCorpusReader  # type: ignore[import]
-    from nltk.data import find as _nltk_find  # type: ignore[import]
+from ..compat import nltk as _nltk_dependency
+from . import LexiconBackend
+from ._cache import CacheSnapshot
-    find = _nltk_find
-    _NLTK_IMPORT_ERROR = None
+nltk = _nltk_dependency.get()  # type: ignore[assignment]
+_NLTK_IMPORT_ERROR = _nltk_dependency.error
 if TYPE_CHECKING:  # pragma: no cover - typing aid only
     from nltk.corpus.reader import WordNetCorpusReader  # type: ignore[import]
 else:  # pragma: no cover - runtime fallback to avoid hard dependency
     WordNetCorpusReader = Any
+find: Any | None = None
+_WORDNET_MODULE: Any | None = None
 if nltk is not None:  # pragma: no cover - guarded by import success
     try:
-        from nltk.corpus import wordnet as _WORDNET_MODULE  # type: ignore[import]
+        corpus_reader_module = import_module("nltk.corpus.reader")
+        WordNetCorpusReader = corpus_reader_module.WordNetCorpusReader  # type: ignore[assignment]
+    except ModuleNotFoundError as exc:  # pragma: no cover - triggered when corpus missing
+        if _NLTK_IMPORT_ERROR is None:
+            _NLTK_IMPORT_ERROR = exc  # type: ignore[assignment]
+    else:
+        try:
+            data_module = import_module("nltk.data")
+        except ModuleNotFoundError as exc:  # pragma: no cover - triggered when data missing
+            if _NLTK_IMPORT_ERROR is None:
+                _NLTK_IMPORT_ERROR = exc  # type: ignore[assignment]
+        else:
+            find = getattr(data_module, "find", None)
+    try:
+        _WORDNET_MODULE = import_module("nltk.corpus.wordnet")
     except ModuleNotFoundError:  # pragma: no cover - only hit on namespace packages
         _WORDNET_MODULE = None
-    else:
-        WordNetCorpusReader = _WordNetCorpusReader  # type: ignore[assignment]
 else:
+    nltk = None  # type: ignore[assignment]
+    find = None
     _WORDNET_MODULE = None
-from pathlib import Path
-from . import LexiconBackend
-from ._cache import CacheSnapshot
 _WORDNET_HANDLE: WordNetCorpusReader | Any | None = _WORDNET_MODULE
 _wordnet_ready = False
@@ -45,26 +54,23 @@ _VALID_POS: tuple[str, ...] = ("n", "v", "a", "r")
 def _require_nltk() -> None:
     """Ensure the NLTK dependency is present before continuing."""
     if nltk is None or find is None:
         message = (
             "The NLTK package is required for WordNet-backed lexicons; install "
             "`nltk` and its WordNet corpus manually to enable this backend."
         )
-        if '_NLTK_IMPORT_ERROR' in globals() and _NLTK_IMPORT_ERROR is not None:
+        if "_NLTK_IMPORT_ERROR" in globals() and _NLTK_IMPORT_ERROR is not None:
             raise RuntimeError(message) from _NLTK_IMPORT_ERROR
         raise RuntimeError(message)
 def dependencies_available() -> bool:
     """Return ``True`` when the runtime NLTK dependency is present."""
     return nltk is not None and find is not None
 def _load_wordnet_reader() -> WordNetCorpusReader:
     """Return a WordNet corpus reader from the downloaded corpus files."""
     _require_nltk()
     try:
@@ -83,7 +89,6 @@ def _load_wordnet_reader() -> WordNetCorpusReader:
 def _wordnet(force_refresh: bool = False) -> WordNetCorpusReader | Any:
     """Retrieve the active WordNet handle, rebuilding it on demand."""
     global _WORDNET_HANDLE
     if force_refresh:
@@ -98,7 +103,6 @@ def _wordnet(force_refresh: bool = False) -> WordNetCorpusReader | Any:
 def ensure_wordnet() -> None:
     """Ensure the WordNet corpus is available before use."""
     global _wordnet_ready
     if _wordnet_ready:
         return
@@ -115,16 +119,13 @@ def ensure_wordnet() -> None:
             resource = _wordnet(force_refresh=True)
             resource.ensure_loaded()
         except LookupError as exc:  # pragma: no cover - only triggered when download fails
-            raise RuntimeError(
-                "Unable to load NLTK WordNet corpus for synonym lookups."
-            ) from exc
+            raise RuntimeError("Unable to load NLTK WordNet corpus for synonym lookups.") from exc
     _wordnet_ready = True
 def _collect_synonyms(word: str, parts_of_speech: tuple[str, ...]) -> list[str]:
     """Gather deterministic synonym candidates for the supplied word."""
     normalized_word = word.lower()
     wordnet = _wordnet()
     synonyms: set[str] = set()
@@ -157,9 +158,7 @@ def _collect_synonyms(word: str, parts_of_speech: tuple[str, ...]) -> list[str]:
 class WordNetLexicon(LexiconBackend):
     """Lexicon that retrieves synonyms from the NLTK WordNet corpus."""
-    def get_synonyms(
-        self, word: str, pos: str | None = None, n: int = 5
-    ) -> list[str]:
+    def get_synonyms(self, word: str, pos: str | None = None, n: int = 5) -> list[str]:
         ensure_wordnet()
         if pos is None:

glitchlings/main.py CHANGED Viewed

@@ -4,16 +4,16 @@ from __future__ import annotations
 import argparse
 import difflib
-from pathlib import Path
 import sys
+from pathlib import Path
 from . import SAMPLE_TEXT
 from .config import DEFAULT_ATTACK_SEED, build_gaggle, load_attack_config
 from .zoo import (
-    Glitchling,
-    Gaggle,
     BUILTIN_GLITCHLINGS,
     DEFAULT_GLITCHLING_NAMES,
+    Gaggle,
+    Glitchling,
     parse_glitchling_spec,
     summon,
 )
@@ -26,8 +26,8 @@ def build_parser() -> argparse.ArgumentParser:
     Returns:
         argparse.ArgumentParser: The configured argument parser instance.
-    """
+    """
     parser = argparse.ArgumentParser(
         description=(
             "Summon glitchlings to corrupt text. Provide input text as an argument, "
@@ -157,7 +157,6 @@ def build_lexicon_parser() -> argparse.ArgumentParser:
 def list_glitchlings() -> None:
     """Print information about the available built-in glitchlings."""
     for key in DEFAULT_GLITCHLING_NAMES:
         glitchling = BUILTIN_GLITCHLINGS[key]
         display_name = glitchling.name
@@ -178,8 +177,8 @@ def read_text(args: argparse.Namespace, parser: argparse.ArgumentParser) -> str:
     Raises:
         SystemExit: Raised indirectly via ``parser.error`` on failure.
-    """
+    """
     if args.file is not None:
         try:
             return args.file.read_text(encoding="utf-8")
@@ -198,7 +197,8 @@ def read_text(args: argparse.Namespace, parser: argparse.ArgumentParser) -> str:
         return SAMPLE_TEXT
     parser.error(
-        "No input text provided. Supply text as an argument, use --file, pipe input, or pass --sample."
+        "No input text provided. Supply text as an argument, use --file, pipe input, or "
+        "pass --sample."
     )
     raise AssertionError("parser.error should exit")
@@ -211,7 +211,6 @@ def summon_glitchlings(
     config_path: Path | None = None,
 ) -> Gaggle:
     """Instantiate the requested glitchlings and bundle them in a ``Gaggle``."""
     if config_path is not None:
         if names:
             parser.error("Cannot combine --config with --glitchling.")
@@ -245,10 +244,8 @@ def summon_glitchlings(
         raise AssertionError("parser.error should exit")
 def show_diff(original: str, corrupted: str) -> None:
     """Display a unified diff between the original and corrupted text."""
     diff_lines = list(
         difflib.unified_diff(
             original.splitlines(keepends=True),
@@ -274,8 +271,8 @@ def run_cli(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int:
     Returns:
         int: Exit code for the process (``0`` on success).
-    """
+    """
     if args.list:
         list_glitchlings()
         return 0
@@ -300,7 +297,6 @@ def run_cli(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int:
 def run_build_lexicon(args: argparse.Namespace) -> int:
     """Delegate to the vector lexicon cache builder using CLI arguments."""
     from glitchlings.lexicon.vector import main as vector_main
     vector_args = [
@@ -337,8 +333,8 @@ def main(argv: list[str] | None = None) -> int:
     Returns:
         int: Exit code suitable for use with ``sys.exit``.
-    """
+    """
     if argv is None:
         raw_args = sys.argv[1:]
     else:

glitchlings/util/__init__.py CHANGED Viewed

@@ -1,12 +1,27 @@
 import difflib
 from collections.abc import Iterable
-SAMPLE_TEXT = "One morning, when Gregor Samsa woke from troubled dreams, he found himself transformed in his bed into a horrible vermin. He lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightly domed and divided by arches into stiff sections. The bedding was hardly able to cover it and seemed ready to slide off any moment. His many legs, pitifully thin compared with the size of the rest of him, waved about helplessly as he looked."
+__all__ = [
+    "SAMPLE_TEXT",
+    "string_diffs",
+    "KeyNeighborMap",
+    "KeyboardLayouts",
+    "KeyNeighbors",
+    "KEYNEIGHBORS",
+]
+SAMPLE_TEXT = (
+    "One morning, when Gregor Samsa woke from troubled dreams, he found himself "
+    "transformed in his bed into a horrible vermin. He lay on his armour-like back, and "
+    "if he lifted his head a little he could see his brown belly, slightly domed and "
+    "divided by arches into stiff sections. The bedding was hardly able to cover it and "
+    "seemed ready to slide off any moment. His many legs, pitifully thin compared with "
+    "the size of the rest of him, waved about helplessly as he looked."
+)
 def string_diffs(a: str, b: str) -> list[list[tuple[str, str, str]]]:
-    """
-    Compare two strings using SequenceMatcher and return
+    """Compare two strings using SequenceMatcher and return
     grouped adjacent opcodes (excluding 'equal' tags).
     Each element is a tuple: (tag, a_text, b_text).
@@ -39,7 +54,6 @@ KeyboardLayouts = dict[str, KeyNeighborMap]
 def _build_neighbor_map(rows: Iterable[str]) -> KeyNeighborMap:
     """Derive 8-neighbour adjacency lists from keyboard layout rows."""
     grid: dict[tuple[int, int], str] = {}
     for y, row in enumerate(rows):
         for x, char in enumerate(row):

glitchlings/util/adapters.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""Adapter helpers shared across Python and DLC integrations."""
+from __future__ import annotations
+from collections.abc import Iterable
+from ..zoo import Gaggle, Glitchling, summon
+def coerce_gaggle(
+    glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
+    *,
+    seed: int,
+) -> Gaggle:
+    """Return a :class:`Gaggle` built from any supported glitchling specifier."""
+    if isinstance(glitchlings, Gaggle):
+        return glitchlings
+    if isinstance(glitchlings, (Glitchling, str)):
+        resolved: Iterable[str | Glitchling] = [glitchlings]
+    else:
+        resolved = glitchlings
+    return summon(list(resolved), seed=seed)
+__all__ = ["coerce_gaggle"]

glitchlings/zoo/__init__.py CHANGED Viewed

@@ -3,16 +3,25 @@ from __future__ import annotations
 import ast
 from typing import Any
-from .typogre import Typogre, typogre
-from .mim1c import Mim1c, mim1c
-from .jargoyle import Jargoyle, jargoyle, dependencies_available as _jargoyle_available
 from .adjax import Adjax, adjax
+from .core import (
+    Gaggle,
+    Glitchling,
+    is_rust_pipeline_enabled,
+    is_rust_pipeline_supported,
+    pipeline_feature_flag_enabled,
+    plan_glitchling_specs,
+    plan_glitchlings,
+)
+from .jargoyle import Jargoyle, jargoyle
+from .jargoyle import dependencies_available as _jargoyle_available
+from .mim1c import Mim1c, mim1c
+from .redactyl import Redactyl, redactyl
 from .reduple import Reduple, reduple
 from .rushmore import Rushmore, rushmore
-from .redactyl import Redactyl, redactyl
 from .scannequin import Scannequin, scannequin
+from .typogre import Typogre, typogre
 from .zeedub import Zeedub, zeedub
-from .core import Glitchling, Gaggle
 __all__ = [
     "Typogre",
@@ -35,6 +44,11 @@ __all__ = [
     "zeedub",
     "Glitchling",
     "Gaggle",
+    "plan_glitchlings",
+    "plan_glitchling_specs",
+    "is_rust_pipeline_enabled",
+    "is_rust_pipeline_supported",
+    "pipeline_feature_flag_enabled",
     "summon",
     "BUILTIN_GLITCHLINGS",
     "DEFAULT_GLITCHLING_NAMES",
@@ -71,7 +85,6 @@ DEFAULT_GLITCHLING_NAMES: list[str] = list(BUILTIN_GLITCHLINGS.keys())
 def parse_glitchling_spec(specification: str) -> Glitchling:
     """Return a glitchling instance configured according to ``specification``."""
     text = specification.strip()
     if not text:
         raise ValueError("Glitchling specification cannot be empty.")
@@ -98,14 +111,10 @@ def parse_glitchling_spec(specification: str) -> Glitchling:
     try:
         call_expr = ast.parse(f"_({arg_source})", mode="eval").body
     except SyntaxError as exc:
-        raise ValueError(
-            f"Invalid parameter syntax for glitchling '{name}': {exc.msg}"
-        ) from exc
+        raise ValueError(f"Invalid parameter syntax for glitchling '{name}': {exc.msg}") from exc
     if not isinstance(call_expr, ast.Call) or call_expr.args:
-        raise ValueError(
-            f"Glitchling '{name}' parameters must be provided as keyword arguments."
-        )
+        raise ValueError(f"Glitchling '{name}' parameters must be provided as keyword arguments.")
     kwargs: dict[str, Any] = {}
     for keyword in call_expr.keywords:
@@ -128,7 +137,6 @@ def parse_glitchling_spec(specification: str) -> Glitchling:
 def get_glitchling_class(name: str) -> type[Glitchling]:
     """Look up the glitchling class registered under ``name``."""
     key = name.strip().lower()
     if not key:
         raise ValueError("Glitchling name cannot be empty.")
@@ -142,7 +150,6 @@ def get_glitchling_class(name: str) -> type[Glitchling]:
 def summon(glitchlings: list[str | Glitchling], seed: int = 151) -> Gaggle:
     """Summon glitchlings by name (using defaults) or instance (to change parameters)."""
     summoned: list[Glitchling] = []
     for entry in glitchlings:
         if isinstance(entry, Glitchling):

glitchlings/zoo/_ocr_confusions.py CHANGED Viewed

@@ -26,9 +26,7 @@ def load_confusion_table() -> list[tuple[str, list[str]]]:
     # Sort longer patterns first to avoid overlapping matches, mirroring the
     # behaviour of the Rust `confusion_table` helper.
-    indexed_entries.sort(
-        key=lambda item: (-len(item[1][0]), item[0])
-    )
+    indexed_entries.sort(key=lambda item: (-len(item[1][0]), item[0]))
     entries = [entry for _, entry in indexed_entries]
     _CONFUSION_TABLE = entries
     return entries

glitchlings/zoo/_rate.py CHANGED Viewed

@@ -9,11 +9,8 @@ def resolve_rate(
     legacy_name: str,
 ) -> float:
     """Return the effective rate while enforcing mutual exclusivity."""
     if rate is not None and legacy_value is not None:
-        raise ValueError(
-            f"Specify either 'rate' or '{legacy_name}', not both."
-        )
+        raise ValueError(f"Specify either 'rate' or '{legacy_name}', not both.")
     if rate is not None:
         return rate
     if legacy_value is not None:

glitchlings/zoo/_sampling.py CHANGED Viewed

@@ -17,7 +17,6 @@ def weighted_sample_without_replacement(
     handling and RNG interactions so the Python and Rust implementations remain
     aligned.
     """
     if k < 0:
         raise ValueError("Sample size cannot be negative")

glitchlings/zoo/_text_utils.py CHANGED Viewed

@@ -10,13 +10,11 @@ _TOKEN_EDGES_PATTERN = re.compile(r"^(\W*)(.*?)(\W*)$")
 def split_preserving_whitespace(text: str) -> list[str]:
     """Split text while keeping whitespace tokens for stable reconstruction."""
     return _WORD_SPLIT_PATTERN.split(text)
 def split_token_edges(token: str) -> tuple[str, str, str]:
     """Return leading, core, and trailing segments for a token."""
     match = _TOKEN_EDGES_PATTERN.match(token)
     if match is None:
         return "", token, ""
@@ -25,7 +23,6 @@ def split_token_edges(token: str) -> tuple[str, str, str]:
 def token_core_length(token: str) -> int:
     """Return the length of the main word characters for weighting heuristics."""
     _, core, _ = split_token_edges(token)
     candidate = core if core else token
     length = len(candidate)
@@ -50,7 +47,6 @@ class WordToken:
     @property
     def has_core(self) -> bool:
         """Return ``True`` when the token contains at least one core character."""
         return bool(self.core)
@@ -65,8 +61,8 @@ def collect_word_tokens(
         tokens: Token sequence produced by :func:`split_preserving_whitespace`.
         skip_first_word: Exclude the first candidate token (used by Rushmore to
             preserve leading words).
-    """
+    """
     start = 2 if skip_first_word else 0
     collected: list[WordToken] = []
     for index in range(start, len(tokens), 2):

glitchlings/zoo/adjax.py CHANGED Viewed

@@ -20,7 +20,6 @@ def _python_swap_adjacent_words(
     rng: random.Random,
 ) -> str:
     """Swap the cores of adjacent words while keeping affixes and spacing intact."""
     tokens = split_preserving_whitespace(text)
     if len(tokens) < 2:
         return text
@@ -72,7 +71,6 @@ def swap_adjacent_words(
     swap_rate: float | None = None,
 ) -> str:
     """Swap adjacent word cores while preserving spacing and punctuation."""
     effective_rate = resolve_rate(
         rate=rate,
         legacy_value=swap_rate,