glitchlings 0.4.1__cp310-cp310-macosx_11_0_universal2.whl → 0.4.3__cp310-cp310-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (47) hide show
  1. glitchlings/__init__.py +30 -17
  2. glitchlings/__main__.py +0 -1
  3. glitchlings/_zoo_rust.cpython-310-darwin.so +0 -0
  4. glitchlings/compat.py +284 -0
  5. glitchlings/config.py +164 -34
  6. glitchlings/config.toml +1 -1
  7. glitchlings/dlc/__init__.py +3 -1
  8. glitchlings/dlc/_shared.py +68 -0
  9. glitchlings/dlc/huggingface.py +26 -41
  10. glitchlings/dlc/prime.py +64 -101
  11. glitchlings/dlc/pytorch.py +216 -0
  12. glitchlings/dlc/pytorch_lightning.py +233 -0
  13. glitchlings/lexicon/__init__.py +12 -33
  14. glitchlings/lexicon/_cache.py +21 -22
  15. glitchlings/lexicon/data/default_vector_cache.json +80 -14
  16. glitchlings/lexicon/metrics.py +1 -8
  17. glitchlings/lexicon/vector.py +109 -49
  18. glitchlings/lexicon/wordnet.py +89 -49
  19. glitchlings/main.py +30 -24
  20. glitchlings/util/__init__.py +18 -4
  21. glitchlings/util/adapters.py +27 -0
  22. glitchlings/zoo/__init__.py +26 -15
  23. glitchlings/zoo/_ocr_confusions.py +1 -3
  24. glitchlings/zoo/_rate.py +1 -4
  25. glitchlings/zoo/_sampling.py +0 -1
  26. glitchlings/zoo/_text_utils.py +1 -5
  27. glitchlings/zoo/adjax.py +2 -4
  28. glitchlings/zoo/apostrofae.py +128 -0
  29. glitchlings/zoo/assets/__init__.py +0 -0
  30. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  31. glitchlings/zoo/core.py +152 -87
  32. glitchlings/zoo/jargoyle.py +50 -45
  33. glitchlings/zoo/mim1c.py +11 -10
  34. glitchlings/zoo/redactyl.py +16 -16
  35. glitchlings/zoo/reduple.py +5 -3
  36. glitchlings/zoo/rushmore.py +4 -10
  37. glitchlings/zoo/scannequin.py +7 -6
  38. glitchlings/zoo/typogre.py +8 -9
  39. glitchlings/zoo/zeedub.py +6 -3
  40. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/METADATA +101 -4
  41. glitchlings-0.4.3.dist-info/RECORD +46 -0
  42. glitchlings/lexicon/graph.py +0 -290
  43. glitchlings-0.4.1.dist-info/RECORD +0 -39
  44. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/WHEEL +0 -0
  45. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/entry_points.txt +0 -0
  46. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/licenses/LICENSE +0 -0
  47. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/top_level.txt +0 -0
@@ -4,19 +4,21 @@ from __future__ import annotations
4
4
 
5
5
  import argparse
6
6
  import importlib
7
+ import importlib.util
7
8
  import json
8
9
  import math
9
- from pathlib import Path
10
10
  import sys
11
+ from pathlib import Path
11
12
  from typing import Any, Callable, Iterable, Iterator, Mapping, MutableMapping, Sequence
12
13
 
13
14
  from . import LexiconBackend
14
- from ._cache import CacheSnapshot, load_cache as _load_cache_file, write_cache as _write_cache_file
15
+ from ._cache import CacheSnapshot
16
+ from ._cache import load_cache as _load_cache_file
17
+ from ._cache import write_cache as _write_cache_file
15
18
 
16
19
 
17
20
  def _cosine_similarity(vector_a: Sequence[float], vector_b: Sequence[float]) -> float:
18
21
  """Return the cosine similarity between two dense vectors."""
19
-
20
22
  dot_product = 0.0
21
23
  norm_a = 0.0
22
24
  norm_b = 0.0
@@ -144,7 +146,6 @@ class _SpaCyAdapter(_Adapter):
144
146
 
145
147
  def _load_json_vectors(path: Path) -> Mapping[str, Sequence[float]]:
146
148
  """Load embeddings from a JSON mapping of token to vector list."""
147
-
148
149
  with path.open("r", encoding="utf8") as handle:
149
150
  payload = json.load(handle)
150
151
 
@@ -164,11 +165,8 @@ def _load_json_vectors(path: Path) -> Mapping[str, Sequence[float]]:
164
165
 
165
166
  def _load_gensim_vectors(path: Path, *, binary: bool | None = None) -> Any:
166
167
  """Load ``gensim`` vectors from ``path``."""
167
-
168
168
  if importlib.util.find_spec("gensim") is None:
169
- raise RuntimeError(
170
- "The gensim package is required to load keyed vector embeddings."
171
- )
169
+ raise RuntimeError("The gensim package is required to load keyed vector embeddings.")
172
170
 
173
171
  keyed_vectors_module = importlib.import_module("gensim.models.keyedvectors")
174
172
  if binary is None:
@@ -177,14 +175,11 @@ def _load_gensim_vectors(path: Path, *, binary: bool | None = None) -> Any:
177
175
  if path.suffix in {".kv", ".kv2"}:
178
176
  return keyed_vectors_module.KeyedVectors.load(str(path), mmap="r")
179
177
 
180
- return keyed_vectors_module.KeyedVectors.load_word2vec_format(
181
- str(path), binary=binary
182
- )
178
+ return keyed_vectors_module.KeyedVectors.load_word2vec_format(str(path), binary=binary)
183
179
 
184
180
 
185
181
  def _load_spacy_language(model_name: str) -> Any:
186
182
  """Load a spaCy language pipeline by name."""
187
-
188
183
  if importlib.util.find_spec("spacy") is None:
189
184
  raise RuntimeError(
190
185
  "spaCy is required to use spaCy-backed vector lexicons; install the 'vectors' extra."
@@ -194,9 +189,60 @@ def _load_spacy_language(model_name: str) -> Any:
194
189
  return spacy_module.load(model_name)
195
190
 
196
191
 
192
+ def _load_sentence_transformer(model_name: str) -> Any:
193
+ """Return a ``SentenceTransformer`` instance for ``model_name``."""
194
+
195
+ if importlib.util.find_spec("sentence_transformers") is None:
196
+ raise RuntimeError(
197
+ "sentence-transformers is required for this source; install the 'st' extra."
198
+ )
199
+
200
+ module = importlib.import_module("sentence_transformers")
201
+ try:
202
+ model_cls = getattr(module, "SentenceTransformer")
203
+ except AttributeError as exc: # pragma: no cover - defensive
204
+ raise RuntimeError("sentence-transformers does not expose SentenceTransformer") from exc
205
+
206
+ return model_cls(model_name)
207
+
208
+
209
+ def _build_sentence_transformer_embeddings(
210
+ model_name: str, tokens: Sequence[str]
211
+ ) -> Mapping[str, Sequence[float]]:
212
+ """Return embeddings for ``tokens`` using ``model_name``."""
213
+
214
+ if not tokens:
215
+ return {}
216
+
217
+ model = _load_sentence_transformer(model_name)
218
+
219
+ unique_tokens: list[str] = []
220
+ seen: set[str] = set()
221
+ for token in tokens:
222
+ normalized = token.strip()
223
+ if not normalized or normalized in seen:
224
+ continue
225
+ unique_tokens.append(normalized)
226
+ seen.add(normalized)
227
+
228
+ if not unique_tokens:
229
+ return {}
230
+
231
+ embeddings = model.encode(
232
+ unique_tokens,
233
+ batch_size=64,
234
+ normalize_embeddings=True,
235
+ convert_to_numpy=True,
236
+ )
237
+
238
+ return {
239
+ token: [float(value) for value in vector]
240
+ for token, vector in zip(unique_tokens, embeddings, strict=True)
241
+ }
242
+
243
+
197
244
  def _resolve_source(source: Any | None) -> _Adapter | None:
198
245
  """Return an adapter instance for ``source`` if possible."""
199
-
200
246
  if source is None:
201
247
  return None
202
248
 
@@ -229,9 +275,7 @@ def _resolve_source(source: Any | None) -> _Adapter | None:
229
275
 
230
276
  if suffix in {".kv", ".kv2", ".bin", ".gz", ".txt", ".vec"}:
231
277
  binary_flag = False if suffix in {".txt", ".vec"} else None
232
- return _GensimAdapter(
233
- _load_gensim_vectors(resolved_path, binary=binary_flag)
234
- )
278
+ return _GensimAdapter(_load_gensim_vectors(resolved_path, binary=binary_flag))
235
279
 
236
280
  if hasattr(source, "most_similar") and hasattr(source, "key_to_index"):
237
281
  return _GensimAdapter(source)
@@ -257,6 +301,7 @@ class VectorLexicon(LexiconBackend):
257
301
  case_sensitive: bool = False,
258
302
  seed: int | None = None,
259
303
  ) -> None:
304
+ """Initialise the lexicon with an embedding ``source`` and optional cache."""
260
305
  super().__init__(seed=seed)
261
306
  self._adapter = _resolve_source(source)
262
307
  self._max_neighbors = max(1, max_neighbors)
@@ -358,42 +403,34 @@ class VectorLexicon(LexiconBackend):
358
403
  self._cache_dirty = True
359
404
  return synonyms
360
405
 
361
- def get_synonyms(
362
- self, word: str, pos: str | None = None, n: int = 5
363
- ) -> list[str]:
406
+ def get_synonyms(self, word: str, pos: str | None = None, n: int = 5) -> list[str]:
407
+ """Return up to ``n`` deterministic synonyms drawn from the embedding cache."""
364
408
  normalized = self._normalize_for_lookup(word)
365
409
  synonyms = self._ensure_cached(original=word, normalized=normalized)
366
410
  return self._deterministic_sample(synonyms, limit=n, word=word, pos=pos)
367
411
 
368
412
  def precompute(self, word: str, *, limit: int | None = None) -> list[str]:
369
413
  """Populate the cache for ``word`` and return the stored synonyms."""
370
-
371
414
  normalized = self._normalize_for_lookup(word)
372
- return list(
373
- self._ensure_cached(original=word, normalized=normalized, limit=limit)
374
- )
415
+ return list(self._ensure_cached(original=word, normalized=normalized, limit=limit))
375
416
 
376
417
  def iter_vocabulary(self) -> Iterator[str]:
377
418
  """Yield vocabulary tokens from the underlying embedding source."""
378
-
379
419
  if self._adapter is None:
380
420
  return iter(())
381
421
  return self._adapter.iter_keys()
382
422
 
383
423
  def export_cache(self) -> dict[str, list[str]]:
384
424
  """Return a copy of the in-memory synonym cache."""
385
-
386
425
  return {key: list(values) for key, values in self._cache.items()}
387
426
 
388
427
  @classmethod
389
428
  def load_cache(cls, path: str | Path) -> CacheSnapshot:
390
429
  """Load and validate a cache file for reuse."""
391
-
392
430
  return _load_cache_file(Path(path))
393
431
 
394
432
  def save_cache(self, path: str | Path | None = None) -> Path:
395
433
  """Persist the current cache to disk, returning the path used."""
396
-
397
434
  if path is None:
398
435
  if self._cache_path is None:
399
436
  raise RuntimeError("No cache path supplied to VectorLexicon.")
@@ -408,6 +445,7 @@ class VectorLexicon(LexiconBackend):
408
445
  return target
409
446
 
410
447
  def supports_pos(self, pos: str | None) -> bool:
448
+ """Always return ``True`` because vector sources do not encode POS metadata."""
411
449
  return True
412
450
 
413
451
  def __repr__(self) -> str: # pragma: no cover - debug helper
@@ -430,7 +468,6 @@ def build_vector_cache(
430
468
  normalizer: Callable[[str], str] | None = None,
431
469
  ) -> Path:
432
470
  """Generate a synonym cache for ``words`` using ``source`` embeddings."""
433
-
434
471
  lexicon = VectorLexicon(
435
472
  source=source,
436
473
  max_neighbors=max_neighbors,
@@ -448,7 +485,6 @@ def build_vector_cache(
448
485
 
449
486
  def load_vector_source(spec: str) -> Any:
450
487
  """Resolve ``spec`` strings for the cache-building CLI."""
451
-
452
488
  if spec.startswith("spacy:"):
453
489
  model_name = spec.split(":", 1)[1]
454
490
  return _load_spacy_language(model_name)
@@ -472,7 +508,8 @@ def _parse_cli(argv: Sequence[str] | None = None) -> argparse.Namespace:
472
508
  "--source",
473
509
  required=True,
474
510
  help=(
475
- "Vector source specification. Use 'spacy:<model>' for spaCy pipelines "
511
+ "Vector source specification. Use 'spacy:<model>' for spaCy pipelines, "
512
+ "'sentence-transformers:<model>' for HuggingFace checkpoints (requires --tokens), "
476
513
  "or provide a path to a gensim KeyedVectors/word2vec file."
477
514
  ),
478
515
  )
@@ -538,7 +575,6 @@ def _iter_tokens_from_file(path: Path) -> Iterator[str]:
538
575
 
539
576
  def main(argv: Sequence[str] | None = None) -> int:
540
577
  """Entry-point for ``python -m glitchlings.lexicon.vector``."""
541
-
542
578
  args = _parse_cli(argv)
543
579
 
544
580
  if args.output.exists() and not args.overwrite:
@@ -547,28 +583,52 @@ def main(argv: Sequence[str] | None = None) -> int:
547
583
  )
548
584
 
549
585
  if args.normalizer == "lower":
550
- normalizer: Callable[[str], str] | None = (
551
- None if args.case_sensitive else str.lower
552
- )
586
+ normalizer: Callable[[str], str] | None = None if args.case_sensitive else str.lower
553
587
  else:
554
- normalizer = lambda value: value
555
588
 
556
- source = load_vector_source(args.source)
589
+ def _identity(value: str) -> str:
590
+ return value
591
+
592
+ normalizer = _identity
593
+
594
+ tokens_from_file: list[str] | None = None
557
595
  if args.tokens is not None:
558
- token_iter: Iterable[str] = _iter_tokens_from_file(args.tokens)
596
+ tokens_from_file = list(_iter_tokens_from_file(args.tokens))
597
+ if args.limit is not None:
598
+ tokens_from_file = tokens_from_file[: args.limit]
599
+
600
+ source_spec = args.source
601
+ token_iter: Iterable[str]
602
+ if source_spec.startswith("sentence-transformers:"):
603
+ model_name = source_spec.split(":", 1)[1].strip()
604
+ if not model_name:
605
+ model_name = "sentence-transformers/all-mpnet-base-v2"
606
+ if tokens_from_file is None:
607
+ raise SystemExit(
608
+ "Sentence-transformers sources require --tokens to supply a vocabulary."
609
+ )
610
+ source = _build_sentence_transformer_embeddings(model_name, tokens_from_file)
611
+ token_iter = tokens_from_file
559
612
  else:
560
- lexicon = VectorLexicon(
561
- source=source,
562
- max_neighbors=args.max_neighbors,
563
- min_similarity=args.min_similarity,
564
- case_sensitive=args.case_sensitive,
565
- normalizer=normalizer,
566
- seed=args.seed,
567
- )
568
- token_iter = lexicon.iter_vocabulary()
569
-
570
- if args.limit is not None:
571
- token_iter = (token for index, token in enumerate(token_iter) if index < args.limit)
613
+ source = load_vector_source(source_spec)
614
+ if tokens_from_file is not None:
615
+ token_iter = tokens_from_file
616
+ else:
617
+ lexicon = VectorLexicon(
618
+ source=source,
619
+ max_neighbors=args.max_neighbors,
620
+ min_similarity=args.min_similarity,
621
+ case_sensitive=args.case_sensitive,
622
+ normalizer=normalizer,
623
+ seed=args.seed,
624
+ )
625
+ iterator = lexicon.iter_vocabulary()
626
+ if args.limit is not None:
627
+ token_iter = (
628
+ token for index, token in enumerate(iterator) if index < args.limit
629
+ )
630
+ else:
631
+ token_iter = iterator
572
632
 
573
633
  build_vector_cache(
574
634
  source=source,
@@ -2,42 +2,76 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import TYPE_CHECKING, Any
6
-
7
- try: # pragma: no cover - exercised when NLTK is available
8
- import nltk # type: ignore[import]
9
- except ModuleNotFoundError as exc: # pragma: no cover - triggered when NLTK missing
10
- nltk = None # type: ignore[assignment]
11
- find = None # type: ignore[assignment]
12
- _NLTK_IMPORT_ERROR = exc
13
- else: # pragma: no cover - executed when NLTK is present
14
- from nltk.corpus.reader import WordNetCorpusReader as _WordNetCorpusReader # type: ignore[import]
15
- from nltk.data import find as _nltk_find # type: ignore[import]
16
-
17
- find = _nltk_find
18
- _NLTK_IMPORT_ERROR = None
19
-
20
- if TYPE_CHECKING: # pragma: no cover - typing aid only
21
- from nltk.corpus.reader import WordNetCorpusReader # type: ignore[import]
22
- else: # pragma: no cover - runtime fallback to avoid hard dependency
23
- WordNetCorpusReader = Any
5
+ from importlib import import_module
6
+ from pathlib import Path
7
+ from types import ModuleType
8
+ from typing import Any, Callable, Protocol, Sequence, cast
9
+
10
+ from ..compat import nltk as _nltk_dependency
11
+ from . import LexiconBackend
12
+ from ._cache import CacheSnapshot
13
+
14
+
15
+ class _LemmaProtocol(Protocol):
16
+ def name(self) -> str:
17
+ ...
18
+
19
+
20
+ class _SynsetProtocol(Protocol):
21
+ def lemmas(self) -> Sequence[_LemmaProtocol]:
22
+ ...
23
+
24
+
25
+ class _WordNetResource(Protocol):
26
+ def synsets(self, word: str, pos: str | None = None) -> Sequence[_SynsetProtocol]:
27
+ ...
28
+
29
+ def ensure_loaded(self) -> None:
30
+ ...
31
+
32
+
33
+ WordNetCorpusReaderFactory = Callable[[Any, Any], _WordNetResource]
34
+
35
+ nltk: ModuleType | None = _nltk_dependency.get()
36
+ _NLTK_IMPORT_ERROR: ModuleNotFoundError | None = _nltk_dependency.error
37
+
38
+ WordNetCorpusReader: WordNetCorpusReaderFactory | None = None
39
+ find: Callable[[str], Any] | None = None
40
+ _WORDNET_MODULE: _WordNetResource | None = None
24
41
 
25
42
  if nltk is not None: # pragma: no cover - guarded by import success
26
43
  try:
27
- from nltk.corpus import wordnet as _WORDNET_MODULE # type: ignore[import]
44
+ corpus_reader_module = import_module("nltk.corpus.reader")
45
+ except ModuleNotFoundError as exc: # pragma: no cover - triggered when corpus missing
46
+ if _NLTK_IMPORT_ERROR is None:
47
+ _NLTK_IMPORT_ERROR = exc
48
+ else:
49
+ reader_candidate = getattr(corpus_reader_module, "WordNetCorpusReader", None)
50
+ if reader_candidate is not None:
51
+ WordNetCorpusReader = cast(WordNetCorpusReaderFactory, reader_candidate)
52
+
53
+ try:
54
+ data_module = import_module("nltk.data")
55
+ except ModuleNotFoundError as exc: # pragma: no cover - triggered when data missing
56
+ if _NLTK_IMPORT_ERROR is None:
57
+ _NLTK_IMPORT_ERROR = exc
58
+ else:
59
+ locator = getattr(data_module, "find", None)
60
+ if callable(locator):
61
+ find = cast(Callable[[str], Any], locator)
62
+
63
+ try:
64
+ module_candidate = import_module("nltk.corpus.wordnet")
28
65
  except ModuleNotFoundError: # pragma: no cover - only hit on namespace packages
29
66
  _WORDNET_MODULE = None
30
67
  else:
31
- WordNetCorpusReader = _WordNetCorpusReader # type: ignore[assignment]
68
+ _WORDNET_MODULE = cast(_WordNetResource, module_candidate)
32
69
  else:
70
+ nltk = None
71
+ find = None
33
72
  _WORDNET_MODULE = None
34
73
 
35
- from pathlib import Path
36
-
37
- from . import LexiconBackend
38
- from ._cache import CacheSnapshot
39
-
40
- _WORDNET_HANDLE: WordNetCorpusReader | Any | None = _WORDNET_MODULE
74
+ _WORDNET_HANDLE: _WordNetResource | None = _WORDNET_MODULE
41
75
  _wordnet_ready = False
42
76
 
43
77
  _VALID_POS: tuple[str, ...] = ("n", "v", "a", "r")
@@ -45,33 +79,37 @@ _VALID_POS: tuple[str, ...] = ("n", "v", "a", "r")
45
79
 
46
80
  def _require_nltk() -> None:
47
81
  """Ensure the NLTK dependency is present before continuing."""
48
-
49
82
  if nltk is None or find is None:
50
83
  message = (
51
84
  "The NLTK package is required for WordNet-backed lexicons; install "
52
85
  "`nltk` and its WordNet corpus manually to enable this backend."
53
86
  )
54
- if '_NLTK_IMPORT_ERROR' in globals() and _NLTK_IMPORT_ERROR is not None:
87
+ if "_NLTK_IMPORT_ERROR" in globals() and _NLTK_IMPORT_ERROR is not None:
55
88
  raise RuntimeError(message) from _NLTK_IMPORT_ERROR
56
89
  raise RuntimeError(message)
57
90
 
58
91
 
59
92
  def dependencies_available() -> bool:
60
93
  """Return ``True`` when the runtime NLTK dependency is present."""
61
-
62
94
  return nltk is not None and find is not None
63
95
 
64
96
 
65
- def _load_wordnet_reader() -> WordNetCorpusReader:
97
+ def _load_wordnet_reader() -> _WordNetResource:
66
98
  """Return a WordNet corpus reader from the downloaded corpus files."""
67
-
68
99
  _require_nltk()
69
100
 
101
+ if WordNetCorpusReader is None:
102
+ raise RuntimeError("The NLTK WordNet corpus reader is unavailable.")
103
+
104
+ locator = find
105
+ if locator is None:
106
+ raise RuntimeError("The NLTK data locator is unavailable.")
107
+
70
108
  try:
71
- root = find("corpora/wordnet")
109
+ root = locator("corpora/wordnet")
72
110
  except LookupError:
73
111
  try:
74
- zip_root = find("corpora/wordnet.zip")
112
+ zip_root = locator("corpora/wordnet.zip")
75
113
  except LookupError as exc:
76
114
  raise RuntimeError(
77
115
  "The NLTK WordNet corpus is not installed; run `nltk.download('wordnet')`."
@@ -81,24 +119,24 @@ def _load_wordnet_reader() -> WordNetCorpusReader:
81
119
  return WordNetCorpusReader(root, None)
82
120
 
83
121
 
84
- def _wordnet(force_refresh: bool = False) -> WordNetCorpusReader | Any:
122
+ def _wordnet(force_refresh: bool = False) -> _WordNetResource:
85
123
  """Retrieve the active WordNet handle, rebuilding it on demand."""
86
-
87
124
  global _WORDNET_HANDLE
88
125
 
89
126
  if force_refresh:
90
127
  _WORDNET_HANDLE = _WORDNET_MODULE
91
128
 
92
- if _WORDNET_HANDLE is not None:
93
- return _WORDNET_HANDLE
129
+ cached = _WORDNET_HANDLE
130
+ if cached is not None:
131
+ return cached
94
132
 
95
- _WORDNET_HANDLE = _load_wordnet_reader()
96
- return _WORDNET_HANDLE
133
+ resource = _load_wordnet_reader()
134
+ _WORDNET_HANDLE = resource
135
+ return resource
97
136
 
98
137
 
99
138
  def ensure_wordnet() -> None:
100
139
  """Ensure the WordNet corpus is available before use."""
101
-
102
140
  global _wordnet_ready
103
141
  if _wordnet_ready:
104
142
  return
@@ -106,25 +144,25 @@ def ensure_wordnet() -> None:
106
144
  _require_nltk()
107
145
 
108
146
  resource = _wordnet()
147
+ nltk_module = nltk
148
+ if nltk_module is None:
149
+ raise RuntimeError("The NLTK dependency is unexpectedly unavailable.")
109
150
 
110
151
  try:
111
152
  resource.ensure_loaded()
112
153
  except LookupError:
113
- nltk.download("wordnet", quiet=True)
154
+ nltk_module.download("wordnet", quiet=True)
114
155
  try:
115
156
  resource = _wordnet(force_refresh=True)
116
157
  resource.ensure_loaded()
117
158
  except LookupError as exc: # pragma: no cover - only triggered when download fails
118
- raise RuntimeError(
119
- "Unable to load NLTK WordNet corpus for synonym lookups."
120
- ) from exc
159
+ raise RuntimeError("Unable to load NLTK WordNet corpus for synonym lookups.") from exc
121
160
 
122
161
  _wordnet_ready = True
123
162
 
124
163
 
125
164
  def _collect_synonyms(word: str, parts_of_speech: tuple[str, ...]) -> list[str]:
126
165
  """Gather deterministic synonym candidates for the supplied word."""
127
-
128
166
  normalized_word = word.lower()
129
167
  wordnet = _wordnet()
130
168
  synonyms: set[str] = set()
@@ -157,9 +195,8 @@ def _collect_synonyms(word: str, parts_of_speech: tuple[str, ...]) -> list[str]:
157
195
  class WordNetLexicon(LexiconBackend):
158
196
  """Lexicon that retrieves synonyms from the NLTK WordNet corpus."""
159
197
 
160
- def get_synonyms(
161
- self, word: str, pos: str | None = None, n: int = 5
162
- ) -> list[str]:
198
+ def get_synonyms(self, word: str, pos: str | None = None, n: int = 5) -> list[str]:
199
+ """Return up to ``n`` WordNet lemmas for ``word`` filtered by ``pos`` if provided."""
163
200
  ensure_wordnet()
164
201
 
165
202
  if pos is None:
@@ -174,15 +211,18 @@ class WordNetLexicon(LexiconBackend):
174
211
  return self._deterministic_sample(synonyms, limit=n, word=word, pos=pos)
175
212
 
176
213
  def supports_pos(self, pos: str | None) -> bool:
214
+ """Return ``True`` when ``pos`` is unset or recognised by the WordNet corpus."""
177
215
  if pos is None:
178
216
  return True
179
217
  return pos.lower() in _VALID_POS
180
218
 
181
219
  @classmethod
182
220
  def load_cache(cls, path: str | Path) -> CacheSnapshot:
221
+ """WordNet lexicons do not persist caches; raising keeps the contract explicit."""
183
222
  raise RuntimeError("WordNetLexicon does not persist or load caches.")
184
223
 
185
224
  def save_cache(self, path: str | Path | None = None) -> Path | None:
225
+ """WordNet lexicons do not persist caches; raising keeps the contract explicit."""
186
226
  raise RuntimeError("WordNetLexicon does not persist or load caches.")
187
227
 
188
228
  def __repr__(self) -> str: # pragma: no cover - trivial representation