glitchlings 0.4.2__cp312-cp312-macosx_11_0_universal2.whl → 0.4.4__cp312-cp312-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (38) hide show
  1. glitchlings/__init__.py +4 -0
  2. glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
  3. glitchlings/compat.py +80 -11
  4. glitchlings/config.py +32 -19
  5. glitchlings/config.toml +1 -1
  6. glitchlings/dlc/__init__.py +3 -1
  7. glitchlings/dlc/_shared.py +86 -1
  8. glitchlings/dlc/pytorch.py +166 -0
  9. glitchlings/dlc/pytorch_lightning.py +215 -0
  10. glitchlings/lexicon/__init__.py +10 -16
  11. glitchlings/lexicon/_cache.py +21 -15
  12. glitchlings/lexicon/data/default_vector_cache.json +80 -14
  13. glitchlings/lexicon/vector.py +94 -15
  14. glitchlings/lexicon/wordnet.py +66 -25
  15. glitchlings/main.py +21 -11
  16. glitchlings/zoo/__init__.py +5 -1
  17. glitchlings/zoo/_rate.py +114 -1
  18. glitchlings/zoo/_rust_extensions.py +143 -0
  19. glitchlings/zoo/adjax.py +5 -6
  20. glitchlings/zoo/apostrofae.py +127 -0
  21. glitchlings/zoo/assets/__init__.py +0 -0
  22. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  23. glitchlings/zoo/core.py +61 -23
  24. glitchlings/zoo/jargoyle.py +50 -36
  25. glitchlings/zoo/redactyl.py +15 -13
  26. glitchlings/zoo/reduple.py +5 -6
  27. glitchlings/zoo/rushmore.py +5 -6
  28. glitchlings/zoo/scannequin.py +5 -6
  29. glitchlings/zoo/typogre.py +8 -6
  30. glitchlings/zoo/zeedub.py +8 -6
  31. {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/METADATA +40 -4
  32. glitchlings-0.4.4.dist-info/RECORD +47 -0
  33. glitchlings/lexicon/graph.py +0 -282
  34. glitchlings-0.4.2.dist-info/RECORD +0 -42
  35. {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/WHEEL +0 -0
  36. {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/entry_points.txt +0 -0
  37. {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/licenses/LICENSE +0 -0
  38. {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.4.2
3
+ Version: 0.4.4
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -217,6 +217,7 @@ Classifier: Programming Language :: Python :: 3
217
217
  Classifier: Programming Language :: Python :: 3.10
218
218
  Classifier: Programming Language :: Python :: 3.11
219
219
  Classifier: Programming Language :: Python :: 3.12
220
+ Classifier: Programming Language :: Python :: 3.13
220
221
  Classifier: Programming Language :: Rust
221
222
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
222
223
  Classifier: Topic :: Software Development :: Testing
@@ -226,19 +227,43 @@ License-File: LICENSE
226
227
  Requires-Dist: confusable-homoglyphs>=3.3.1
227
228
  Requires-Dist: tomli>=2.0.1; python_version < "3.11"
228
229
  Requires-Dist: pyyaml>=6.0.0
230
+ Provides-Extra: all
231
+ Requires-Dist: black>=24.4.0; extra == "all"
232
+ Requires-Dist: hypothesis>=6.140.0; extra == "all"
233
+ Requires-Dist: interrogate>=1.5.0; extra == "all"
234
+ Requires-Dist: jellyfish>=1.2.0; extra == "all"
235
+ Requires-Dist: isort>=5.13.0; extra == "all"
236
+ Requires-Dist: mkdocs>=1.6.0; extra == "all"
237
+ Requires-Dist: mkdocs-material>=9.5.0; extra == "all"
238
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "all"
239
+ Requires-Dist: mkdocstrings-python>=1.10.0; extra == "all"
240
+ Requires-Dist: mypy>=1.8.0; extra == "all"
241
+ Requires-Dist: numpy<3.0,>=1.24; extra == "all"
242
+ Requires-Dist: pre-commit>=3.8.0; extra == "all"
243
+ Requires-Dist: pytest>=8.0.0; extra == "all"
244
+ Requires-Dist: pytest-cov>=4.1.0; extra == "all"
245
+ Requires-Dist: ruff>=0.6.0; extra == "all"
246
+ Requires-Dist: verifiers>=0.1.3.post0; extra == "all"
229
247
  Provides-Extra: hf
230
248
  Requires-Dist: datasets>=4.0.0; extra == "hf"
249
+ Provides-Extra: lightning
250
+ Requires-Dist: pytorch_lightning>=2.0.0; extra == "lightning"
231
251
  Provides-Extra: vectors
232
- Requires-Dist: numpy<=2.0,>=1.24; extra == "vectors"
252
+ Requires-Dist: numpy<3.0,>=1.24; extra == "vectors"
233
253
  Requires-Dist: spacy>=3.7.2; extra == "vectors"
234
254
  Requires-Dist: gensim>=4.3.2; extra == "vectors"
255
+ Provides-Extra: st
256
+ Requires-Dist: sentence-transformers>=3.0.0; extra == "st"
235
257
  Provides-Extra: prime
236
258
  Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
237
259
  Requires-Dist: jellyfish>=1.2.0; extra == "prime"
260
+ Provides-Extra: torch
261
+ Requires-Dist: torch>=2.0.0; extra == "torch"
238
262
  Provides-Extra: dev
239
263
  Requires-Dist: pytest>=8.0.0; extra == "dev"
264
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
240
265
  Requires-Dist: hypothesis>=6.140.0; extra == "dev"
241
- Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
266
+ Requires-Dist: numpy<3.0,>=1.24; extra == "dev"
242
267
  Requires-Dist: mkdocs>=1.6.0; extra == "dev"
243
268
  Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "dev"
244
269
  Requires-Dist: mkdocs-material>=9.5.0; extra == "dev"
@@ -307,7 +332,7 @@ print(gaggle(SAMPLE_TEXT))
307
332
  > Onҽ m‎ھ‎rning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin‎٠‎ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
308
333
 
309
334
  Consult the [Glitchlings Usage Guide](docs/index.md)
310
- for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
335
+ for end-to-end instructions spanning the Python API, CLI, HuggingFace, PyTorch, and Prime Intellect
311
336
  integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
312
337
 
313
338
  ## Motivation
@@ -356,6 +381,7 @@ glitchlings --list
356
381
 
357
382
  ```text
358
383
  Typogre — scope: Character, order: early
384
+ Apostrofae — scope: Character, order: normal
359
385
  Mim1c — scope: Character, order: last
360
386
  Jargoyle — scope: Word, order: normal
361
387
  Adjax — scope: Word, order: normal
@@ -458,6 +484,16 @@ _What a nice word, would be a shame if something happened to it._
458
484
  > - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
459
485
  > - `seed (int)`: The random seed for reproducibility (default: 151).
460
486
 
487
+ ### Apostrofae
488
+
489
+ _It looks like you're trying to paste some text. Can I help?_
490
+
491
+ > _**Paperclip Manager.**_ Apostrofae scans for balanced runs of straight quotes, apostrophes, and backticks before replacing them with randomly sampled smart-quote pairs from a curated lookup table. The swap happens in-place so contractions and unpaired glyphs remain untouched.
492
+ >
493
+ > Args
494
+ >
495
+ > - `seed (int)`: Optional seed controlling the deterministic smart-quote sampling (default: 151).
496
+
461
497
  ### Mim1c
462
498
 
463
499
  _Wait, was that...?_
@@ -0,0 +1,47 @@
1
+ glitchlings/__init__.py,sha256=bkyRgzjC8ssidEO9UL9VpbYXQxTV1Hz3VAPOIqd9uMg,1182
2
+ glitchlings/__main__.py,sha256=f-P4jiVBd7ZpS6QxRpa_6SJgOG03UhZhcWasMDRWLs8,120
3
+ glitchlings/_zoo_rust.cpython-312-darwin.so,sha256=Dsh8k6oypyOVug9SqVuwsZvpxLfEqrVJfeL995FsMrI,2602496
4
+ glitchlings/compat.py,sha256=T_5Ia8yCzZvsMdicZ2TCcOgDO53_AjNGkSXWTR_qEnA,8908
5
+ glitchlings/config.py,sha256=ofxDMkoMg4j51CFube54aca1Ky9y_ZeVktXpeUEdWmA,12953
6
+ glitchlings/config.toml,sha256=04-Y_JCdQU68SRmwk2qZqrH_bbX4jEH9uh7URtxdIHA,99
7
+ glitchlings/main.py,sha256=uw8VbDgxov1m-wYHPDl2dP5ItpLB4ZHpb0ChJXzcL0o,10623
8
+ glitchlings/dlc/__init__.py,sha256=qlY4nuagy4AAWuPMwmuhwK2m36ktp-qkeiIxC7OXg34,305
9
+ glitchlings/dlc/_shared.py,sha256=OmEjJmSs1pQ7j1ggR_H8D8RDp5E1ZqOnzSIxyqRE1aE,4407
10
+ glitchlings/dlc/huggingface.py,sha256=9lW7TnTHA_bXyo4Is8pymZchrB9BIL1bMCP2p7LCMtg,2576
11
+ glitchlings/dlc/prime.py,sha256=qGFI1d4BiOEIgQZ5v9QnlbYx4J4q-vNlh5tWZng11xs,8607
12
+ glitchlings/dlc/pytorch.py,sha256=QaiIYyQ3koy2-enhUI9WY3SIMRX65gmsnjDvCsf8xbg,6233
13
+ glitchlings/dlc/pytorch_lightning.py,sha256=Ls7Xh5Mg643Tyk3KvCMq_MsB4vvekfUUZOhE0z4K22c,8074
14
+ glitchlings/lexicon/__init__.py,sha256=ooEPcAJhCI2Nw5z8OsQ0EtVpKBfiTrU0-AQJq8Zn2nQ,6007
15
+ glitchlings/lexicon/_cache.py,sha256=aWSUb5Ex162dr3HouO2Ic2O8ck3ViEFWs8-XMLKMeJ0,4086
16
+ glitchlings/lexicon/metrics.py,sha256=VBFfFpxjiEwZtK-jS55H8xP7MTC_0OjY8lQ5zSQ9aTY,4572
17
+ glitchlings/lexicon/vector.py,sha256=yWf-vlN2OEHnTCPu7tgDnJbhm47cmhdrTtjR0RZKkUM,22530
18
+ glitchlings/lexicon/wordnet.py,sha256=YcOliPHuesdlekmGspwAyR4fWDDxZWR_dIt_Nsq7ag0,7608
19
+ glitchlings/lexicon/data/default_vector_cache.json,sha256=3iVH0nX8EqMbqOkKWvORCGYtN0LKHn5G_Snlizsnm1g,997
20
+ glitchlings/util/__init__.py,sha256=vc3EAY8ehRjbOiryFdaqvvljXcyNGtZSPiEp9ok1vVw,4674
21
+ glitchlings/util/adapters.py,sha256=psxQFYSFmh1u7NuqtIrKwQP5FOhOrZoxZzc7X7DDi9U,693
22
+ glitchlings/zoo/__init__.py,sha256=1dWZPCTXuh5J7WdCxHX7ZX9bNd8bakzYndxQRhF43i8,5243
23
+ glitchlings/zoo/_ocr_confusions.py,sha256=Ju2_avXiwsr1p8zWFUTOzMxJ8vT5PpYobuGIn4L_sqI,1204
24
+ glitchlings/zoo/_rate.py,sha256=tkIlXHewE8s9w1jpCw8ZzkVN31690FAnvTM_R3dCIpY,3579
25
+ glitchlings/zoo/_rust_extensions.py,sha256=Bsd0kiPB1rUn5x3k7ykydFuk2YSvXS9CQGPRlE5XzXY,4211
26
+ glitchlings/zoo/_sampling.py,sha256=KrWyUSsYXghlvktS5hQBO0bPqywEEyA49A2qDWInB7Q,1586
27
+ glitchlings/zoo/_text_utils.py,sha256=fS5L_eq-foBbBdiv4ymI8-O0D0csc3yDekHpX8bqfV4,2754
28
+ glitchlings/zoo/adjax.py,sha256=XT5kKqPOUPgKSDOcR__HBnv4OXtBKee40GuNNmm1GYI,3518
29
+ glitchlings/zoo/apostrofae.py,sha256=qjpfnxdPWXMNzZnSD7UMfvHyzGKa7TLsvUhMsIvjwj8,3822
30
+ glitchlings/zoo/core.py,sha256=dRzUTmhOswDV0hWcaD-Sx7rZdPlrszn7C_1G2xd4ECk,20675
31
+ glitchlings/zoo/jargoyle.py,sha256=2TGU_z8gILwQ-lyZEqvmsrLupxqb8ydlDiwcp-O6WwY,11679
32
+ glitchlings/zoo/mim1c.py,sha256=-fgodKWZq--Xw8L2t1EqNbsh48bwX5jZxmiXdoaQShI,3437
33
+ glitchlings/zoo/ocr_confusions.tsv,sha256=KhtR7vJDTITpfTSGa-I7RHr6CK7LkGi2KjdhEWipI6o,183
34
+ glitchlings/zoo/redactyl.py,sha256=eWn7JC81BXkp2bSinwrBfU3jXukcUGDVkaa6BcGvte4,5559
35
+ glitchlings/zoo/reduple.py,sha256=zSc1N_-tz9Kl7CDMrdZKgCuW3Bxp_-g6axadAa6AszM,4224
36
+ glitchlings/zoo/rushmore.py,sha256=k429trwNPcWJHEOIoeGsdKBzJNL4Fxz9KRqX3Ro9u_0,4286
37
+ glitchlings/zoo/scannequin.py,sha256=GfjLYWWp-jdnOBmdg7gt5wQnobY8jWQHScB5EMgo6HE,4870
38
+ glitchlings/zoo/typogre.py,sha256=BQotNL-gn4PXQI9j63d2w9mQ4X6ZJKSJ4de-GN-gmUI,6686
39
+ glitchlings/zoo/zeedub.py,sha256=aNnjZGeTmMqA2WjgtGh7Fgl9pUQo3AZ2B-tYs2ZFOQE,4840
40
+ glitchlings/zoo/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ glitchlings/zoo/assets/apostrofae_pairs.json,sha256=bfjSEaMTI_axGNJ93nI431KXU0IVp7ayO42gGcMgL6U,521
42
+ glitchlings-0.4.4.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
43
+ glitchlings-0.4.4.dist-info/METADATA,sha256=onpPJTtANv13MyyvYS930OWiJb_Ipxvje5sTrNmNPQw,32388
44
+ glitchlings-0.4.4.dist-info/WHEEL,sha256=o0zAoJUNILGJZxEeFPjb7OMHp_94eqIkZBeZ0gvgOpo,114
45
+ glitchlings-0.4.4.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
46
+ glitchlings-0.4.4.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
47
+ glitchlings-0.4.4.dist-info/RECORD,,
@@ -1,282 +0,0 @@
1
- """Graph-based lexicon backed by ConceptNet/Numberbatch embeddings."""
2
-
3
- from __future__ import annotations
4
-
5
- import re
6
- from pathlib import Path
7
- from typing import Iterable, Mapping, MutableMapping, Sequence
8
-
9
- from . import LexiconBackend
10
- from ._cache import CacheSnapshot
11
- from ._cache import load_cache as _load_cache_file
12
- from ._cache import write_cache as _write_cache_file
13
- from .vector import VectorLexicon
14
-
15
- _CONCEPT_RE = re.compile(r"^/c/(?P<lang>[a-z]{2})/(?P<term>[^/]+)")
16
- _PUNCTUATION_RE = re.compile(r"[^\w\s-]+", re.UNICODE)
17
-
18
-
19
- def _lemmatize_token(token: str) -> str:
20
- """Return a lightweight lemma for ``token`` using heuristic rules."""
21
- irregular = {
22
- "children": "child",
23
- "mice": "mouse",
24
- "geese": "goose",
25
- "feet": "foot",
26
- "teeth": "tooth",
27
- "men": "man",
28
- "women": "woman",
29
- "better": "good",
30
- "worse": "bad",
31
- }
32
- lowered = token.lower()
33
- if lowered in irregular:
34
- return irregular[lowered]
35
-
36
- if lowered.endswith("ies") and len(lowered) > 3:
37
- return lowered[:-3] + "y"
38
- if lowered.endswith("ves") and len(lowered) > 3:
39
- return lowered[:-3] + "f"
40
- if lowered.endswith("men") and len(lowered) > 3:
41
- return lowered[:-3] + "man"
42
- if lowered.endswith("ses") and len(lowered) > 3:
43
- return lowered[:-2]
44
- if lowered.endswith("es") and len(lowered) > 3:
45
- return lowered[:-2]
46
- if lowered.endswith("s") and len(lowered) > 2 and not lowered.endswith("ss"):
47
- return lowered[:-1]
48
- if lowered.endswith("ing") and len(lowered) > 4:
49
- stem = lowered[:-3]
50
- if len(stem) > 2 and stem[-1] == stem[-2]:
51
- stem = stem[:-1]
52
- return stem
53
- if lowered.endswith("ed") and len(lowered) > 3:
54
- stem = lowered[:-2]
55
- if len(stem) > 2 and stem[-1] == stem[-2]:
56
- stem = stem[:-1]
57
- return stem
58
- return lowered
59
-
60
-
61
- def _normalize_phrase(phrase: str) -> str:
62
- """Normalise ``phrase`` for ConceptNet lookups."""
63
- stripped = _PUNCTUATION_RE.sub(" ", phrase.lower())
64
- tokens = [token for token in stripped.split() if token]
65
- if not tokens:
66
- return ""
67
- lemmatised = [_lemmatize_token(token) for token in tokens]
68
- return " ".join(lemmatised)
69
-
70
-
71
- def _concept_terms(normalized: str) -> list[str]:
72
- """Return ConceptNet term variants for ``normalized``."""
73
- collapsed = normalized.replace(" ", "_")
74
- if not collapsed:
75
- return []
76
- variants = {collapsed}
77
- variants.add(collapsed.replace("_", "-"))
78
- variants.add(collapsed.replace("-", "_"))
79
- return list(variants)
80
-
81
-
82
- def _surface_from_concept(concept: str) -> str | None:
83
- """Return a human-readable surface form for ``concept``."""
84
- match = _CONCEPT_RE.match(concept)
85
- if match is None:
86
- return None
87
- term = match.group("term")
88
- surface = term.replace("_", " ")
89
- surface = surface.replace("-", " ")
90
- return " ".join(surface.split())
91
-
92
-
93
- def _language_from_concept(concept: str) -> str | None:
94
- match = _CONCEPT_RE.match(concept)
95
- if match is None:
96
- return None
97
- return match.group("lang")
98
-
99
-
100
- def _load_numberbatch(path: Path, *, languages: set[str]) -> Mapping[str, list[float]]:
101
- """Load ConceptNet Numberbatch embeddings from ``path``."""
102
- if not path.exists():
103
- return {}
104
-
105
- if path.suffix == ".gz":
106
- import gzip
107
-
108
- handle = gzip.open(path, "rt", encoding="utf8")
109
- else:
110
- handle = path.open("r", encoding="utf8")
111
-
112
- with handle as stream:
113
- header = stream.readline()
114
- try:
115
- parts = header.strip().split()
116
- if len(parts) >= 2:
117
- int(parts[0])
118
- int(parts[1])
119
- except ValueError:
120
- stream.seek(0)
121
-
122
- embeddings: dict[str, list[float]] = {}
123
- for line in stream:
124
- tokens = line.strip().split()
125
- if len(tokens) <= 2:
126
- continue
127
- concept = tokens[0]
128
- lang = _language_from_concept(concept)
129
- if lang is None or lang not in languages:
130
- continue
131
- try:
132
- vector = [float(value) for value in tokens[1:]]
133
- except ValueError:
134
- continue
135
- embeddings[concept] = vector
136
- return embeddings
137
-
138
-
139
- class GraphLexicon(LexiconBackend):
140
- """Lexicon backed by ConceptNet/Numberbatch embeddings."""
141
-
142
- def __init__(
143
- self,
144
- *,
145
- source: Mapping[str, Sequence[float]] | str | Path | None = None,
146
- cache: Mapping[str, Sequence[str]] | None = None,
147
- cache_path: str | Path | None = None,
148
- languages: Iterable[str] = ("en",),
149
- max_neighbors: int = 50,
150
- min_similarity: float = 0.0,
151
- seed: int | None = None,
152
- ) -> None:
153
- super().__init__(seed=seed)
154
- self._languages = {language.lower() for language in languages}
155
- if not self._languages:
156
- self._languages = {"en"}
157
- self._max_neighbors = max(1, max_neighbors)
158
- self._min_similarity = min_similarity
159
- self._cache: MutableMapping[str, list[str]] = {}
160
- self._cache_path: Path | None = Path(cache_path) if cache_path is not None else None
161
- self._cache_checksum: str | None = None
162
- if self._cache_path is not None:
163
- snapshot = _load_cache_file(self._cache_path)
164
- self._cache.update(snapshot.entries)
165
- self._cache_checksum = snapshot.checksum
166
- if cache is not None:
167
- for key, values in cache.items():
168
- self._cache[str(key)] = [str(value) for value in values]
169
- self._cache_dirty = False
170
-
171
- prepared_source = self._prepare_source(source)
172
- self._backend = VectorLexicon(
173
- source=prepared_source if prepared_source else None,
174
- max_neighbors=self._max_neighbors,
175
- min_similarity=self._min_similarity,
176
- case_sensitive=True,
177
- seed=seed,
178
- )
179
-
180
- def _prepare_source(
181
- self, source: Mapping[str, Sequence[float]] | str | Path | None
182
- ) -> Mapping[str, Sequence[float]]:
183
- if source is None:
184
- return {}
185
- if isinstance(source, Mapping):
186
- prepared: dict[str, list[float]] = {}
187
- for key, vector in source.items():
188
- lang = _language_from_concept(key)
189
- if lang is None or lang not in self._languages:
190
- continue
191
- prepared[key] = [float(value) for value in vector]
192
- return prepared
193
- path = Path(source)
194
- embeddings = _load_numberbatch(path, languages=self._languages)
195
- return embeddings
196
-
197
- def reseed(self, seed: int | None) -> None:
198
- super().reseed(seed)
199
- self._backend.reseed(seed)
200
-
201
- def _concept_candidates(self, normalized: str) -> list[str]:
202
- terms = _concept_terms(normalized)
203
- concepts = []
204
- for language in sorted(self._languages):
205
- for term in terms:
206
- concepts.append(f"/c/{language}/{term}")
207
- return concepts
208
-
209
- def _collect_synonyms(self, normalized: str) -> list[str]:
210
- candidates: list[str] = []
211
- seen: set[str] = set()
212
- for concept in self._concept_candidates(normalized):
213
- neighbors = self._backend.precompute(concept, limit=self._max_neighbors)
214
- for neighbor in neighbors:
215
- lang = _language_from_concept(neighbor)
216
- if lang is None or lang not in self._languages:
217
- continue
218
- surface = _surface_from_concept(neighbor)
219
- if surface is None:
220
- continue
221
- surface_norm = _normalize_phrase(surface)
222
- if not surface_norm or surface_norm == normalized:
223
- continue
224
- if surface_norm in seen:
225
- continue
226
- seen.add(surface_norm)
227
- candidates.append(surface)
228
- return candidates
229
-
230
- def _ensure_cached(self, normalized: str) -> list[str]:
231
- if normalized in self._cache:
232
- return self._cache[normalized]
233
- synonyms = self._collect_synonyms(normalized)
234
- self._cache[normalized] = synonyms
235
- if self._cache_path is not None:
236
- self._cache_dirty = True
237
- return synonyms
238
-
239
- def get_synonyms(self, word: str, pos: str | None = None, n: int = 5) -> list[str]:
240
- normalized = _normalize_phrase(word)
241
- if not normalized:
242
- return []
243
- synonyms = self._ensure_cached(normalized)
244
- return self._deterministic_sample(synonyms, limit=n, word=word, pos=pos)
245
-
246
- def precompute(self, word: str) -> list[str]:
247
- normalized = _normalize_phrase(word)
248
- if not normalized:
249
- return []
250
- return list(self._ensure_cached(normalized))
251
-
252
- def export_cache(self) -> dict[str, list[str]]:
253
- return {key: list(values) for key, values in self._cache.items()}
254
-
255
- @classmethod
256
- def load_cache(cls, path: str | Path) -> CacheSnapshot:
257
- """Load and validate a persisted ConceptNet cache file."""
258
- return _load_cache_file(Path(path))
259
-
260
- def save_cache(self, path: str | Path | None = None) -> Path:
261
- if path is None:
262
- if self._cache_path is None:
263
- raise RuntimeError("No cache path supplied to GraphLexicon.")
264
- target = self._cache_path
265
- else:
266
- target = Path(path)
267
- self._cache_path = target
268
- snapshot = _write_cache_file(target, self._cache)
269
- self._cache_checksum = snapshot.checksum
270
- self._cache_dirty = False
271
- return target
272
-
273
- def supports_pos(self, pos: str | None) -> bool:
274
- return True
275
-
276
- def __repr__(self) -> str: # pragma: no cover - debug helper
277
- adapter = getattr(self._backend, "_adapter", None)
278
- state = "loaded" if adapter else "empty"
279
- return (
280
- f"GraphLexicon(languages={sorted(self._languages)!r}, "
281
- f"max_neighbors={self._max_neighbors}, seed={self.seed!r}, state={state})"
282
- )
@@ -1,42 +0,0 @@
1
- glitchlings/__init__.py,sha256=qAV0OXtnIGs4YnG_L9xUt9bhTcVhYKrDHfN6ZcBMMX4,1114
2
- glitchlings/__main__.py,sha256=f-P4jiVBd7ZpS6QxRpa_6SJgOG03UhZhcWasMDRWLs8,120
3
- glitchlings/_zoo_rust.cpython-312-darwin.so,sha256=Ti0hKooawrq220u7rD2WJUL0Z2ulfx4-vySM0I4-UZE,2488416
4
- glitchlings/compat.py,sha256=BdGFf4cKbbHbmLPirNT3U76AXhSn3vpZ59DfNdEQWPQ,6827
5
- glitchlings/config.py,sha256=TshOTvVlQOhokDTteTMTti-7S2qWnVUJR4LyBYDhQAQ,12638
6
- glitchlings/config.toml,sha256=MWwgbx1-KIRAY3JZmMrCVbZNxFjHgRJXbtNAVuUNcxY,108
7
- glitchlings/main.py,sha256=FIpDIqN42HCDCSpsU_JkSzyWC-ugszArwCLfmq_ZCYU,10090
8
- glitchlings/dlc/__init__.py,sha256=eTLEEWrVWPqniXHqee4W23H1rjElI1PQ_jcqWFe9D3g,141
9
- glitchlings/dlc/_shared.py,sha256=EFSnush3rjjaf4La5QfVaf_KEp0U_l_3-q4PKx0A6NQ,1972
10
- glitchlings/dlc/huggingface.py,sha256=9lW7TnTHA_bXyo4Is8pymZchrB9BIL1bMCP2p7LCMtg,2576
11
- glitchlings/dlc/prime.py,sha256=qGFI1d4BiOEIgQZ5v9QnlbYx4J4q-vNlh5tWZng11xs,8607
12
- glitchlings/lexicon/__init__.py,sha256=myW5MPFBvsurdisvolE6ECfUraO_mF8Dhyp_KhWxIGs,6244
13
- glitchlings/lexicon/_cache.py,sha256=KQBesSY-XkH2WwM7Xa_LAPbJEZgIARb2odgrZIHrme8,3948
14
- glitchlings/lexicon/graph.py,sha256=BbK1YfD9vgfQGDg-QTRegII10IDrervAsu6e20gTFPs,10057
15
- glitchlings/lexicon/metrics.py,sha256=VBFfFpxjiEwZtK-jS55H8xP7MTC_0OjY8lQ5zSQ9aTY,4572
16
- glitchlings/lexicon/vector.py,sha256=ILK727WItcHYIRqSVnaAAbGIlj9QTTqjbwHy43UXdb0,19671
17
- glitchlings/lexicon/wordnet.py,sha256=05ApyN9h0bSw0PQEfjZUeInmGWphxVIuGASc8Zoc5n0,6313
18
- glitchlings/lexicon/data/default_vector_cache.json,sha256=7obKHqmR3odbTfgJPWLSRFYFh4J_6uvv_CntCSe_EjI,725
19
- glitchlings/util/__init__.py,sha256=vc3EAY8ehRjbOiryFdaqvvljXcyNGtZSPiEp9ok1vVw,4674
20
- glitchlings/util/adapters.py,sha256=psxQFYSFmh1u7NuqtIrKwQP5FOhOrZoxZzc7X7DDi9U,693
21
- glitchlings/zoo/__init__.py,sha256=lu1wnD-lRDJy8uTJKVRwL4qL-nyb0Vyfz9GbiOletCI,5107
22
- glitchlings/zoo/_ocr_confusions.py,sha256=Ju2_avXiwsr1p8zWFUTOzMxJ8vT5PpYobuGIn4L_sqI,1204
23
- glitchlings/zoo/_rate.py,sha256=Vb1_5HAzrqr9eAh_zzngSV-d0zI264zcYspnT3VHPkE,504
24
- glitchlings/zoo/_sampling.py,sha256=KrWyUSsYXghlvktS5hQBO0bPqywEEyA49A2qDWInB7Q,1586
25
- glitchlings/zoo/_text_utils.py,sha256=fS5L_eq-foBbBdiv4ymI8-O0D0csc3yDekHpX8bqfV4,2754
26
- glitchlings/zoo/adjax.py,sha256=VJgUasyAk7K3E23B5PzoJ5HaqWtcPJG649TzQRAHraA,3528
27
- glitchlings/zoo/core.py,sha256=yuCgLXFWJtu2fLOJoCWLtHspbcTFRZhUKobv1AlqKqs,19385
28
- glitchlings/zoo/jargoyle.py,sha256=sUhCy_0sD0KOAHQKRmy9PTu9FwJitaSBDxdhuX9j7ME,11452
29
- glitchlings/zoo/mim1c.py,sha256=-fgodKWZq--Xw8L2t1EqNbsh48bwX5jZxmiXdoaQShI,3437
30
- glitchlings/zoo/ocr_confusions.tsv,sha256=KhtR7vJDTITpfTSGa-I7RHr6CK7LkGi2KjdhEWipI6o,183
31
- glitchlings/zoo/redactyl.py,sha256=6WgHIVumzRaDIyuLbewTJW5TXBs7s1CsKDPVXP4gkJc,5436
32
- glitchlings/zoo/reduple.py,sha256=GC1Sq4Ch7WOxL6gQZ9Ogs5EgXU2HkktLeMRVObBYFe4,4241
33
- glitchlings/zoo/rushmore.py,sha256=zxwiwcHHXHGLv8JLvKS4d0cduZ0qnwO9mcKOwXlMA1M,4305
34
- glitchlings/zoo/scannequin.py,sha256=JBpiSAnuSgCaEgPwCgSItiHms32wdLNXLPMotaBpYAs,4883
35
- glitchlings/zoo/typogre.py,sha256=7CHGfBkP4W2Bh8MCxtketA-3nlCb5QRCTUthRnXEhnk,6660
36
- glitchlings/zoo/zeedub.py,sha256=N4MBwWDRgcspLRlOCSAZ0hdwdnIj4h6uZLaJDFExj6Y,4823
37
- glitchlings-0.4.2.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
38
- glitchlings-0.4.2.dist-info/METADATA,sha256=1jm8iwNC4bhfp3nSc-O8POL_U0Fr6fw2RvvADpyxuqg,30721
39
- glitchlings-0.4.2.dist-info/WHEEL,sha256=o0zAoJUNILGJZxEeFPjb7OMHp_94eqIkZBeZ0gvgOpo,114
40
- glitchlings-0.4.2.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
41
- glitchlings-0.4.2.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
42
- glitchlings-0.4.2.dist-info/RECORD,,