PyPI - glitchlings - Versions diffs - 0.4.1__cp312-cp312-win_amd64.whl → 0.4.3__cp312-cp312-win_amd64.whl - Mend

glitchlings 0.4.1__cp312-cp312-win_amd64.whl → 0.4.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of glitchlings might be problematic. Click here for more details.

Files changed (47) hide show

glitchlings/__init__.py +30 -17
glitchlings/__main__.py +0 -1
glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
glitchlings/compat.py +284 -0
glitchlings/config.py +164 -34
glitchlings/config.toml +1 -1
glitchlings/dlc/__init__.py +3 -1
glitchlings/dlc/_shared.py +68 -0
glitchlings/dlc/huggingface.py +26 -41
glitchlings/dlc/prime.py +64 -101
glitchlings/dlc/pytorch.py +216 -0
glitchlings/dlc/pytorch_lightning.py +233 -0
glitchlings/lexicon/__init__.py +12 -33
glitchlings/lexicon/_cache.py +21 -22
glitchlings/lexicon/data/default_vector_cache.json +80 -14
glitchlings/lexicon/metrics.py +1 -8
glitchlings/lexicon/vector.py +109 -49
glitchlings/lexicon/wordnet.py +89 -49
glitchlings/main.py +30 -24
glitchlings/util/__init__.py +18 -4
glitchlings/util/adapters.py +27 -0
glitchlings/zoo/__init__.py +26 -15
glitchlings/zoo/_ocr_confusions.py +1 -3
glitchlings/zoo/_rate.py +1 -4
glitchlings/zoo/_sampling.py +0 -1
glitchlings/zoo/_text_utils.py +1 -5
glitchlings/zoo/adjax.py +2 -4
glitchlings/zoo/apostrofae.py +128 -0
glitchlings/zoo/assets/__init__.py +0 -0
glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
glitchlings/zoo/core.py +152 -87
glitchlings/zoo/jargoyle.py +50 -45
glitchlings/zoo/mim1c.py +11 -10
glitchlings/zoo/redactyl.py +16 -16
glitchlings/zoo/reduple.py +5 -3
glitchlings/zoo/rushmore.py +4 -10
glitchlings/zoo/scannequin.py +7 -6
glitchlings/zoo/typogre.py +8 -9
glitchlings/zoo/zeedub.py +6 -3
{glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/METADATA +101 -4
glitchlings-0.4.3.dist-info/RECORD +46 -0
glitchlings/lexicon/graph.py +0 -290
glitchlings-0.4.1.dist-info/RECORD +0 -39
{glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/WHEEL +0 -0
{glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/entry_points.txt +0 -0
{glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/licenses/LICENSE +0 -0
{glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/top_level.txt +0 -0

glitchlings/dlc/pytorch_lightning.py ADDED Viewed

@@ -0,0 +1,233 @@
+"""Integration helpers for PyTorch Lightning data modules."""
+from __future__ import annotations
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Any, cast
+from ..compat import get_pytorch_lightning_datamodule, require_pytorch_lightning
+from ..util.adapters import coerce_gaggle
+from ..zoo import Gaggle, Glitchling
+from ..zoo.core import _is_transcript
+def _normalise_columns(column: str | Sequence[str]) -> list[str]:
+    """Normalise a column specification to a list."""
+    if isinstance(column, str):
+        return [column]
+    normalised = list(column)
+    if not normalised:
+        raise ValueError("At least one column must be specified")
+    return normalised
+def _glitch_value(value: Any, gaggle: Gaggle) -> Any:
+    """Apply glitchlings to a value when it contains textual content."""
+    if isinstance(value, str) or _is_transcript(value, allow_empty=False, require_all_content=True):
+        return gaggle.corrupt(value)
+    if isinstance(value, Sequence) and value and all(isinstance(item, str) for item in value):
+        return [gaggle.corrupt(item) for item in value]
+    return value
+def _glitch_batch(batch: Any, columns: list[str], gaggle: Gaggle) -> Any:
+    """Apply glitchlings to the configured batch columns."""
+    if not isinstance(batch, Mapping):
+        return batch
+    if hasattr(batch, "copy"):
+        mutated = batch.copy()
+    else:
+        mutated = dict(batch)
+    missing = [column for column in columns if column not in mutated]
+    if missing:
+        missing_str = ", ".join(sorted(missing))
+        raise ValueError(f"Columns not found in batch: {missing_str}")
+    for column in columns:
+        mutated[column] = _glitch_value(mutated[column], gaggle)
+    return mutated
+def _wrap_dataloader(dataloader: Any, columns: list[str], gaggle: Gaggle) -> Any:
+    """Wrap a dataloader so yielded batches are corrupted lazily."""
+    if dataloader is None:
+        return None
+    if isinstance(dataloader, Mapping):
+        mapping_type = cast(type[Any], dataloader.__class__)
+        return mapping_type(
+            {
+                key: _wrap_dataloader(value, columns, gaggle)
+                for key, value in dataloader.items()
+            }
+        )
+    if isinstance(dataloader, list):
+        return [_wrap_dataloader(value, columns, gaggle) for value in dataloader]
+    if isinstance(dataloader, tuple):
+        return tuple(_wrap_dataloader(value, columns, gaggle) for value in dataloader)
+    if isinstance(dataloader, Sequence) and not isinstance(dataloader, (str, bytes, bytearray)):
+        sequence_type = cast(type[Any], dataloader.__class__)
+        return sequence_type(
+            _wrap_dataloader(value, columns, gaggle) for value in dataloader
+        )
+    return _GlitchedDataLoader(dataloader, columns, gaggle)
+class _GlitchedDataLoader:
+    """Proxy dataloader that glitches batches produced by the wrapped loader."""
+    def __init__(self, dataloader: Any, columns: list[str], gaggle: Gaggle) -> None:
+        self._dataloader = dataloader
+        self._columns = columns
+        self._gaggle = gaggle
+    def __iter__(self) -> Any:
+        for batch in self._dataloader:
+            yield _glitch_batch(batch, self._columns, self._gaggle)
+    def __len__(self) -> int:
+        return len(self._dataloader)
+    def __getattr__(self, attribute: str) -> Any:
+        return getattr(self._dataloader, attribute)
+def _glitch_datamodule(
+    datamodule: Any,
+    glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
+    column: str | Sequence[str],
+    *,
+    seed: int = 151,
+) -> Any:
+    """Return a proxy that applies glitchlings to batches from the datamodule."""
+    columns = _normalise_columns(column)
+    gaggle = coerce_gaggle(glitchlings, seed=seed)
+    return _GlitchedLightningDataModule(datamodule, columns, gaggle)
+class _GlitchedLightningDataModule:
+    """Proxy wrapper around a LightningDataModule applying glitchlings to batches."""
+    def __init__(self, base: Any, columns: list[str], gaggle: Gaggle) -> None:
+        object.__setattr__(self, "_glitch_base", base)
+        object.__setattr__(self, "_glitch_columns", columns)
+        object.__setattr__(self, "_glitch_gaggle", gaggle)
+    def __getattr__(self, attribute: str) -> Any:
+        return getattr(self._glitch_base, attribute)
+    def __setattr__(self, attribute: str, value: Any) -> None:
+        if attribute.startswith("_glitch_"):
+            object.__setattr__(self, attribute, value)
+        else:
+            setattr(self._glitch_base, attribute, value)
+    def __delattr__(self, attribute: str) -> None:
+        if attribute.startswith("_glitch_"):
+            object.__delattr__(self, attribute)
+        else:
+            delattr(self._glitch_base, attribute)
+    def __dir__(self) -> list[str]:
+        return sorted(set(dir(self.__class__)) | set(dir(self._glitch_base)))
+    # LightningDataModule API -------------------------------------------------
+    def prepare_data(self, *args: Any, **kwargs: Any) -> Any:
+        return self._glitch_base.prepare_data(*args, **kwargs)
+    def setup(self, *args: Any, **kwargs: Any) -> Any:
+        return self._glitch_base.setup(*args, **kwargs)
+    def teardown(self, *args: Any, **kwargs: Any) -> Any:
+        return self._glitch_base.teardown(*args, **kwargs)
+    def state_dict(self) -> Mapping[str, Any]:
+        state = self._glitch_base.state_dict()
+        return cast(Mapping[str, Any], state)
+    def load_state_dict(self, state_dict: Mapping[str, Any]) -> None:
+        self._glitch_base.load_state_dict(state_dict)
+    def transfer_batch_to_device(self, batch: Any, device: Any, dataloader_idx: int) -> Any:
+        return self._glitch_base.transfer_batch_to_device(batch, device, dataloader_idx)
+    def on_before_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
+        return self._glitch_base.on_before_batch_transfer(batch, dataloader_idx)
+    def on_after_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
+        return self._glitch_base.on_after_batch_transfer(batch, dataloader_idx)
+    def train_dataloader(self, *args: Any, **kwargs: Any) -> Any:
+        loader = self._glitch_base.train_dataloader(*args, **kwargs)
+        return _wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
+    def val_dataloader(self, *args: Any, **kwargs: Any) -> Any:
+        loader = self._glitch_base.val_dataloader(*args, **kwargs)
+        return _wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
+    def test_dataloader(self, *args: Any, **kwargs: Any) -> Any:
+        loader = self._glitch_base.test_dataloader(*args, **kwargs)
+        return _wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
+    def predict_dataloader(self, *args: Any, **kwargs: Any) -> Any:
+        loader = self._glitch_base.predict_dataloader(*args, **kwargs)
+        return _wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
+def _ensure_datamodule_class() -> Any:
+    """Return the Lightning ``LightningDataModule`` patched with ``.glitch``."""
+    datamodule_cls = get_pytorch_lightning_datamodule()
+    if datamodule_cls is None:  # pragma: no cover - dependency is optional
+        module = require_pytorch_lightning("pytorch_lightning is not installed")
+        datamodule_cls = getattr(module, "LightningDataModule", None)
+        if datamodule_cls is None:
+            raise ModuleNotFoundError("pytorch_lightning is not installed")
+    if getattr(datamodule_cls, "glitch", None) is None:
+        def glitch(
+            self: Any,
+            glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
+            *,
+            column: str | Sequence[str],
+            seed: int = 151,
+            **_: Any,
+        ) -> Any:
+            return _glitch_datamodule(self, glitchlings, column, seed=seed)
+        setattr(datamodule_cls, "glitch", glitch)
+    if not issubclass(_GlitchedLightningDataModule, datamodule_cls):
+        _GlitchedLightningDataModule.__bases__ = (datamodule_cls,)
+    return datamodule_cls
+def install() -> None:
+    """Monkeypatch ``LightningDataModule`` with ``.glitch``."""
+    _ensure_datamodule_class()
+LightningDataModule: type[Any] | None
+_LightningDataModuleAlias = get_pytorch_lightning_datamodule()
+if _LightningDataModuleAlias is not None:
+    LightningDataModule = _ensure_datamodule_class()
+else:  # pragma: no cover - optional dependency
+    LightningDataModule = None
+__all__ = ["LightningDataModule", "install"]

glitchlings/lexicon/__init__.py CHANGED Viewed

@@ -2,13 +2,14 @@
 from __future__ import annotations
+import random
 from abc import ABC, abstractmethod
 from hashlib import blake2s
 from pathlib import Path
-import random
 from typing import Callable, Iterable
 from glitchlings.config import get_config
 from ._cache import CacheEntries, CacheSnapshot
@@ -21,6 +22,7 @@ class Lexicon(ABC):
         Optional integer used to derive deterministic random number generators
         for synonym sampling. Identical seeds guarantee reproducible results for
         the same word/part-of-speech queries.
     """
     def __init__(self, *, seed: int | None = None) -> None:
@@ -29,17 +31,14 @@ class Lexicon(ABC):
     @property
     def seed(self) -> int | None:
         """Return the current base seed used for deterministic sampling."""
         return self._seed
     def reseed(self, seed: int | None) -> None:
         """Update the base seed driving deterministic synonym sampling."""
         self._seed = seed
     def _derive_rng(self, word: str, pos: str | None) -> random.Random:
         """Return an RNG derived from the base seed, word, and POS tag."""
         seed_material = blake2s(digest_size=8)
         seed_material.update(word.lower().encode("utf8"))
         if pos is not None:
@@ -53,7 +52,6 @@ class Lexicon(ABC):
         self, values: Iterable[str], *, limit: int, word: str, pos: str | None
     ) -> list[str]:
         """Return up to ``limit`` values sampled deterministically."""
         if limit <= 0:
             return []
@@ -67,14 +65,11 @@ class Lexicon(ABC):
         return [items[index] for index in indices]
     @abstractmethod
-    def get_synonyms(
-        self, word: str, pos: str | None = None, n: int = 5
-    ) -> list[str]:
+    def get_synonyms(self, word: str, pos: str | None = None, n: int = 5) -> list[str]:
         """Return up to ``n`` synonyms for ``word`` constrained by ``pos``."""
     def supports_pos(self, pos: str | None) -> bool:
         """Return ``True`` when the backend can service ``pos`` queries."""
         return True
     def __repr__(self) -> str:  # pragma: no cover - trivial representation
@@ -96,42 +91,39 @@ class LexiconBackend(Lexicon):
         """Persist the backend cache to ``path`` and return the destination."""
-from .graph import GraphLexicon
-from .metrics import (
+from .metrics import (  # noqa: E402
     compare_lexicons,
     coverage_ratio,
     mean_cosine_similarity,
     synonym_diversity,
 )
-from .vector import VectorLexicon, build_vector_cache
+from .vector import VectorLexicon, build_vector_cache  # noqa: E402
+_WordNetLexicon: type[LexiconBackend] | None
 try:  # pragma: no cover - optional dependency
-    from .wordnet import WordNetLexicon
+    from .wordnet import WordNetLexicon as _WordNetLexicon
 except Exception:  # pragma: no cover - triggered when nltk unavailable
-    WordNetLexicon = None  # type: ignore[assignment]
+    _WordNetLexicon = None
+WordNetLexicon: type[LexiconBackend] | None = _WordNetLexicon
 _BACKEND_FACTORIES: dict[str, Callable[[int | None], Lexicon | None]] = {}
-def register_backend(
-    name: str, factory: Callable[[int | None], Lexicon | None]
-) -> None:
+def register_backend(name: str, factory: Callable[[int | None], Lexicon | None]) -> None:
     """Register ``factory`` for ``name`` so it can be selected via config."""
     normalized = name.lower()
     _BACKEND_FACTORIES[normalized] = factory
 def unregister_backend(name: str) -> None:
     """Remove a previously registered backend."""
     _BACKEND_FACTORIES.pop(name.lower(), None)
 def available_backends() -> list[str]:
     """Return the names of registered lexicon factories."""
     return sorted(_BACKEND_FACTORIES)
@@ -145,16 +137,6 @@ def _vector_backend(seed: int | None) -> Lexicon | None:
     return VectorLexicon(cache_path=cache_path, seed=seed)
-def _graph_backend(seed: int | None) -> Lexicon | None:
-    config = get_config()
-    cache_path = config.lexicon.graph_cache
-    if cache_path is None:
-        return None
-    if not cache_path.exists():
-        return None
-    return GraphLexicon(cache_path=cache_path, seed=seed)
 def _wordnet_backend(seed: int | None) -> Lexicon | None:  # pragma: no cover - optional
     if WordNetLexicon is None:
         return None
@@ -166,13 +148,11 @@ def _wordnet_backend(seed: int | None) -> Lexicon | None:  # pragma: no cover -
 register_backend("vector", _vector_backend)
-register_backend("graph", _graph_backend)
 register_backend("wordnet", _wordnet_backend)
 def get_default_lexicon(seed: int | None = None) -> Lexicon:
     """Return the first available lexicon according to configuration priority."""
     config = get_config()
     attempts: list[str] = []
     for name in config.lexicon.priority:
@@ -195,7 +175,6 @@ __all__ = [
     "Lexicon",
     "LexiconBackend",
     "VectorLexicon",
-    "GraphLexicon",
     "WordNetLexicon",
     "build_vector_cache",
     "compare_lexicons",

glitchlings/lexicon/_cache.py CHANGED Viewed

@@ -6,8 +6,7 @@ import json
 from dataclasses import dataclass
 from hashlib import blake2s
 from pathlib import Path
-from typing import Mapping, Sequence
+from typing import Mapping, Sequence, cast
 CacheEntries = dict[str, list[str]]
@@ -20,9 +19,8 @@ class CacheSnapshot:
     checksum: str | None = None
-def _normalise_entries(payload: Mapping[str, Sequence[str]]) -> CacheEntries:
+def _normalise_entries(payload: Mapping[str, object]) -> CacheEntries:
     """Convert raw cache payloads into canonical mapping form."""
     entries: CacheEntries = {}
     for key, values in payload.items():
         if not isinstance(key, str):
@@ -35,46 +33,47 @@ def _normalise_entries(payload: Mapping[str, Sequence[str]]) -> CacheEntries:
 def _canonical_json(entries: Mapping[str, Sequence[str]]) -> str:
     """Return a deterministic JSON serialisation for ``entries``."""
     serialisable = {key: list(values) for key, values in sorted(entries.items())}
     return json.dumps(serialisable, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
 def compute_checksum(entries: Mapping[str, Sequence[str]]) -> str:
     """Return a BLAKE2s checksum for ``entries``."""
     digest = blake2s(_canonical_json(entries).encode("utf8"), digest_size=16)
     return digest.hexdigest()
 def load_cache(path: Path) -> CacheSnapshot:
     """Load a cache from ``path`` and verify its checksum if present."""
     if not path.exists():
         return CacheSnapshot(entries={}, checksum=None)
     with path.open("r", encoding="utf8") as handle:
-        payload = json.load(handle)
+        payload_obj = json.load(handle)
     checksum: str | None = None
-    entries_payload: Mapping[str, Sequence[str]]
+    entries_payload: Mapping[str, object]
+    if not isinstance(payload_obj, Mapping):
+        raise RuntimeError("Synonym cache payload must be a mapping of strings to lists.")
+    payload = cast(Mapping[str, object], payload_obj)
-    if isinstance(payload, Mapping) and "__meta__" in payload and "entries" in payload:
-        meta = payload["__meta__"]
-        entries_payload = payload["entries"]  # type: ignore[assignment]
-        if not isinstance(entries_payload, Mapping):
+    if "__meta__" in payload and "entries" in payload:
+        meta_obj = payload["__meta__"]
+        entries_obj = payload["entries"]
+        if not isinstance(entries_obj, Mapping):
             raise RuntimeError("Synonym cache entries must be stored as a mapping.")
-        if isinstance(meta, Mapping):
-            raw_checksum = meta.get("checksum")
+        entries_payload = cast(Mapping[str, object], entries_obj)
+        if isinstance(meta_obj, Mapping):
+            raw_checksum = meta_obj.get("checksum")
             if raw_checksum is not None and not isinstance(raw_checksum, str):
                 raise RuntimeError("Synonym cache checksum must be a string when provided.")
-            checksum = raw_checksum
+            checksum = raw_checksum if isinstance(raw_checksum, str) else None
         else:
             raise RuntimeError("Synonym cache metadata must be a mapping.")
-    elif isinstance(payload, Mapping):
-        entries_payload = payload  # legacy format without metadata
     else:
-        raise RuntimeError("Synonym cache payload must be a mapping of strings to lists.")
+        entries_payload = payload  # legacy format without metadata
     entries = _normalise_entries(entries_payload)
     if checksum is not None:
@@ -89,8 +88,9 @@ def load_cache(path: Path) -> CacheSnapshot:
 def write_cache(path: Path, entries: Mapping[str, Sequence[str]]) -> CacheSnapshot:
     """Persist ``entries`` to ``path`` with checksum metadata."""
-    serialisable = {key: list(values) for key, values in sorted(entries.items())}
+    serialisable: CacheEntries = {
+        key: list(values) for key, values in sorted(entries.items())
+    }
     checksum = compute_checksum(serialisable)
     payload = {
         "__meta__": {
@@ -108,4 +108,3 @@ def write_cache(path: Path, entries: Mapping[str, Sequence[str]]) -> CacheSnapsh
 __all__ = ["CacheEntries", "CacheSnapshot", "compute_checksum", "load_cache", "write_cache"]

glitchlings/lexicon/data/default_vector_cache.json CHANGED Viewed

@@ -1,16 +1,82 @@
 {
-  "sing": ["croon", "warble", "chant", "serenade"],
-  "happy": ["cheerful", "joyful", "contented", "gleeful"],
-  "songs": ["tunes", "melodies", "ballads", "airs"],
-  "quickly": ["rapidly", "swiftly", "speedily", "promptly"],
-  "text": ["passage", "excerpt", "phrase", "content"],
-  "words": ["terms", "phrases", "lexemes", "expressions"],
-  "alpha": ["beta", "gamma", "delta"],
-  "beta": ["alpha", "gamma", "delta"],
-  "gamma": ["alpha", "beta", "delta"],
-  "delta": ["alpha", "beta", "gamma"],
-  "they": ["these people", "those folks", "those individuals"],
-  "quick": ["rapid", "swift", "brisk", "prompt"],
-  "fast": ["rapid", "swift", "quick", "speedy"],
-  "slow": ["sluggish", "lethargic", "unhurried", "deliberate"]
+  "alpha": [
+    "beta",
+    "gamma",
+    "delta"
+  ],
+  "beta": [
+    "alpha",
+    "gamma",
+    "delta"
+  ],
+  "delta": [
+    "alpha",
+    "beta",
+    "gamma"
+  ],
+  "fast": [
+    "rapid",
+    "swift",
+    "speedy",
+    "brisk"
+  ],
+  "gamma": [
+    "alpha",
+    "beta",
+    "delta"
+  ],
+  "happy": [
+    "glad",
+    "joyful",
+    "content",
+    "upbeat"
+  ],
+  "quick": [
+    "swift",
+    "rapid",
+    "speedy",
+    "nimble"
+  ],
+  "quickly": [
+    "swiftly",
+    "rapidly",
+    "promptly",
+    "speedily"
+  ],
+  "sing": [
+    "croon",
+    "serenade",
+    "vocalize",
+    "perform"
+  ],
+  "slow": [
+    "sluggish",
+    "leisurely",
+    "unhurried",
+    "gradual"
+  ],
+  "songs": [
+    "tracks",
+    "melodies",
+    "ballads",
+    "tunes"
+  ],
+  "text": [
+    "passage",
+    "copy",
+    "script",
+    "narrative"
+  ],
+  "they": [
+    "those people",
+    "those individuals",
+    "the group",
+    "those folks"
+  ],
+  "words": [
+    "terms",
+    "phrases",
+    "lexicon",
+    "vocabulary"
+  ]
 }

glitchlings/lexicon/metrics.py CHANGED Viewed

@@ -18,7 +18,6 @@ def _unique_synonyms(
     sample_size: int,
 ) -> list[str]:
     """Return unique synonym candidates excluding the original token."""
     collected: list[str] = []
     seen: set[str] = set()
     source = word.lower()
@@ -41,7 +40,6 @@ def synonym_diversity(
     sample_size: int = 5,
 ) -> float:
     """Return the mean unique-synonym count for ``words`` using ``lexicon``."""
     totals = []
     for word in words:
         synonyms = _unique_synonyms(lexicon, word, pos=pos, sample_size=sample_size)
@@ -60,7 +58,6 @@ def coverage_ratio(
     min_synonyms: int = 3,
 ) -> float:
     """Return the fraction of ``words`` with at least ``min_synonyms`` candidates."""
     total = 0
     hits = 0
     for word in words:
@@ -96,7 +93,6 @@ def mean_cosine_similarity(
     sample_size: int = 5,
 ) -> float:
     """Return the mean cosine similarity between each word and its candidates."""
     total = 0.0
     count = 0
     for word in words:
@@ -126,11 +122,8 @@ def compare_lexicons(
     embeddings: Mapping[str, Sequence[float]] | None = None,
 ) -> dict[str, float]:
     """Return comparative coverage and diversity statistics for two lexicons."""
     stats = {
-        "baseline_diversity": synonym_diversity(
-            baseline, words, pos=pos, sample_size=sample_size
-        ),
+        "baseline_diversity": synonym_diversity(baseline, words, pos=pos, sample_size=sample_size),
         "candidate_diversity": synonym_diversity(
             candidate, words, pos=pos, sample_size=sample_size
         ),