glitchlings 0.3.0__cp312-cp312-manylinux_2_28_x86_64.whl → 0.4.0__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +4 -0
- glitchlings/_zoo_rust.cpython-312-x86_64-linux-gnu.so +0 -0
- glitchlings/config.py +258 -0
- glitchlings/config.toml +3 -0
- glitchlings/lexicon/__init__.py +191 -0
- glitchlings/lexicon/data/default_vector_cache.json +16 -0
- glitchlings/lexicon/graph.py +303 -0
- glitchlings/lexicon/metrics.py +169 -0
- glitchlings/lexicon/vector.py +610 -0
- glitchlings/lexicon/wordnet.py +182 -0
- glitchlings/main.py +145 -5
- glitchlings/zoo/__init__.py +15 -0
- glitchlings/zoo/_sampling.py +55 -0
- glitchlings/zoo/_text_utils.py +62 -0
- glitchlings/zoo/jargoyle.py +190 -200
- glitchlings/zoo/redactyl.py +26 -54
- glitchlings/zoo/reduple.py +10 -21
- glitchlings/zoo/rushmore.py +15 -21
- glitchlings/zoo/typogre.py +22 -1
- glitchlings/zoo/zeedub.py +40 -1
- {glitchlings-0.3.0.dist-info → glitchlings-0.4.0.dist-info}/METADATA +30 -8
- glitchlings-0.4.0.dist-info/RECORD +38 -0
- glitchlings-0.3.0.dist-info/RECORD +0 -29
- {glitchlings-0.3.0.dist-info → glitchlings-0.4.0.dist-info}/WHEEL +0 -0
- {glitchlings-0.3.0.dist-info → glitchlings-0.4.0.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.3.0.dist-info → glitchlings-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.3.0.dist-info → glitchlings-0.4.0.dist-info}/top_level.txt +0 -0
glitchlings/__init__.py
CHANGED
|
@@ -21,6 +21,7 @@ from .zoo import (
|
|
|
21
21
|
Gaggle,
|
|
22
22
|
summon,
|
|
23
23
|
)
|
|
24
|
+
from .config import AttackConfig, build_gaggle, load_attack_config
|
|
24
25
|
from .util import SAMPLE_TEXT
|
|
25
26
|
|
|
26
27
|
|
|
@@ -47,4 +48,7 @@ __all__ = [
|
|
|
47
48
|
"Glitchling",
|
|
48
49
|
"Gaggle",
|
|
49
50
|
"SAMPLE_TEXT",
|
|
51
|
+
"AttackConfig",
|
|
52
|
+
"build_gaggle",
|
|
53
|
+
"load_attack_config",
|
|
50
54
|
]
|
|
Binary file
|
glitchlings/config.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""Configuration utilities for runtime behaviour and declarative attack setups."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from io import TextIOBase
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Mapping, Sequence, TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
try: # Python 3.11+
|
|
12
|
+
import tomllib
|
|
13
|
+
except ModuleNotFoundError: # pragma: no cover - Python < 3.11
|
|
14
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
15
|
+
|
|
16
|
+
import yaml
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
20
|
+
from .zoo import Glitchling
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
CONFIG_ENV_VAR = "GLITCHLINGS_CONFIG"
|
|
24
|
+
DEFAULT_CONFIG_PATH = Path(__file__).with_name("config.toml")
|
|
25
|
+
DEFAULT_LEXICON_PRIORITY = ["vector", "graph", "wordnet"]
|
|
26
|
+
DEFAULT_ATTACK_SEED = 151
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(slots=True)
|
|
30
|
+
class LexiconConfig:
|
|
31
|
+
"""Lexicon-specific configuration section."""
|
|
32
|
+
|
|
33
|
+
priority: list[str] = field(default_factory=lambda: list(DEFAULT_LEXICON_PRIORITY))
|
|
34
|
+
vector_cache: Path | None = None
|
|
35
|
+
graph_cache: Path | None = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(slots=True)
|
|
39
|
+
class RuntimeConfig:
|
|
40
|
+
"""Top-level runtime configuration loaded from ``config.toml``."""
|
|
41
|
+
|
|
42
|
+
lexicon: LexiconConfig
|
|
43
|
+
path: Path
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
_CONFIG: RuntimeConfig | None = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def reset_config() -> None:
|
|
50
|
+
"""Forget any cached runtime configuration."""
|
|
51
|
+
|
|
52
|
+
global _CONFIG
|
|
53
|
+
_CONFIG = None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def reload_config() -> RuntimeConfig:
|
|
57
|
+
"""Reload the runtime configuration from disk."""
|
|
58
|
+
|
|
59
|
+
reset_config()
|
|
60
|
+
return get_config()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_config() -> RuntimeConfig:
|
|
64
|
+
"""Return the cached runtime configuration, loading it if necessary."""
|
|
65
|
+
|
|
66
|
+
global _CONFIG
|
|
67
|
+
if _CONFIG is None:
|
|
68
|
+
_CONFIG = _load_runtime_config()
|
|
69
|
+
return _CONFIG
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _load_runtime_config() -> RuntimeConfig:
|
|
73
|
+
path = _resolve_config_path()
|
|
74
|
+
data = _read_toml(path)
|
|
75
|
+
lexicon_section = data.get("lexicon", {})
|
|
76
|
+
|
|
77
|
+
priority = lexicon_section.get("priority", DEFAULT_LEXICON_PRIORITY)
|
|
78
|
+
if not isinstance(priority, Sequence) or isinstance(priority, (str, bytes)):
|
|
79
|
+
raise ValueError("lexicon.priority must be a sequence of strings.")
|
|
80
|
+
normalized_priority = [str(item) for item in priority]
|
|
81
|
+
|
|
82
|
+
vector_cache = _resolve_optional_path(
|
|
83
|
+
lexicon_section.get("vector_cache"),
|
|
84
|
+
base=path.parent,
|
|
85
|
+
)
|
|
86
|
+
graph_cache = _resolve_optional_path(
|
|
87
|
+
lexicon_section.get("graph_cache"),
|
|
88
|
+
base=path.parent,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
lexicon_config = LexiconConfig(
|
|
92
|
+
priority=normalized_priority,
|
|
93
|
+
vector_cache=vector_cache,
|
|
94
|
+
graph_cache=graph_cache,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return RuntimeConfig(lexicon=lexicon_config, path=path)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _resolve_config_path() -> Path:
|
|
101
|
+
override = os.environ.get(CONFIG_ENV_VAR)
|
|
102
|
+
if override:
|
|
103
|
+
return Path(override)
|
|
104
|
+
return DEFAULT_CONFIG_PATH
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _read_toml(path: Path) -> dict[str, Any]:
|
|
108
|
+
if not path.exists():
|
|
109
|
+
if path == DEFAULT_CONFIG_PATH:
|
|
110
|
+
return {}
|
|
111
|
+
raise FileNotFoundError(f"Configuration file '{path}' not found.")
|
|
112
|
+
with path.open("rb") as handle:
|
|
113
|
+
return tomllib.load(handle)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _resolve_optional_path(value: Any, *, base: Path) -> Path | None:
|
|
117
|
+
if value in (None, ""):
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
candidate = Path(str(value))
|
|
121
|
+
if not candidate.is_absolute():
|
|
122
|
+
candidate = (base / candidate).resolve()
|
|
123
|
+
return candidate
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass(slots=True)
|
|
127
|
+
class AttackConfig:
|
|
128
|
+
"""Structured representation of a glitchling roster loaded from YAML."""
|
|
129
|
+
|
|
130
|
+
glitchlings: list["Glitchling"]
|
|
131
|
+
seed: int | None = None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def load_attack_config(
|
|
135
|
+
source: str | Path | TextIOBase,
|
|
136
|
+
*,
|
|
137
|
+
encoding: str = "utf-8",
|
|
138
|
+
) -> AttackConfig:
|
|
139
|
+
"""Load and parse an attack configuration from YAML."""
|
|
140
|
+
|
|
141
|
+
if isinstance(source, (str, Path)):
|
|
142
|
+
path = Path(source)
|
|
143
|
+
label = str(path)
|
|
144
|
+
try:
|
|
145
|
+
text = path.read_text(encoding=encoding)
|
|
146
|
+
except FileNotFoundError as exc:
|
|
147
|
+
raise ValueError(f"Attack configuration '{label}' was not found.") from exc
|
|
148
|
+
elif isinstance(source, TextIOBase):
|
|
149
|
+
label = getattr(source, "name", "<stream>")
|
|
150
|
+
text = source.read()
|
|
151
|
+
else:
|
|
152
|
+
raise TypeError("Attack configuration source must be a path or text stream.")
|
|
153
|
+
|
|
154
|
+
data = _load_yaml(text, label)
|
|
155
|
+
return parse_attack_config(data, source=label)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def parse_attack_config(data: Any, *, source: str = "<config>") -> AttackConfig:
|
|
159
|
+
"""Convert arbitrary YAML data into a validated ``AttackConfig``."""
|
|
160
|
+
|
|
161
|
+
if data is None:
|
|
162
|
+
raise ValueError(f"Attack configuration '{source}' is empty.")
|
|
163
|
+
|
|
164
|
+
if not isinstance(data, Mapping):
|
|
165
|
+
raise ValueError(f"Attack configuration '{source}' must be a mapping.")
|
|
166
|
+
|
|
167
|
+
raw_glitchlings = data.get("glitchlings")
|
|
168
|
+
if raw_glitchlings is None:
|
|
169
|
+
raise ValueError(f"Attack configuration '{source}' must define 'glitchlings'.")
|
|
170
|
+
|
|
171
|
+
if not isinstance(raw_glitchlings, Sequence) or isinstance(raw_glitchlings, (str, bytes)):
|
|
172
|
+
raise ValueError(f"'glitchlings' in '{source}' must be a sequence.")
|
|
173
|
+
|
|
174
|
+
glitchlings: list["Glitchling"] = []
|
|
175
|
+
for index, entry in enumerate(raw_glitchlings, start=1):
|
|
176
|
+
glitchlings.append(_build_glitchling(entry, source, index))
|
|
177
|
+
|
|
178
|
+
seed = data.get("seed")
|
|
179
|
+
if seed is not None and not isinstance(seed, int):
|
|
180
|
+
raise ValueError(f"Seed in '{source}' must be an integer if provided.")
|
|
181
|
+
|
|
182
|
+
return AttackConfig(glitchlings=glitchlings, seed=seed)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def build_gaggle(config: AttackConfig, *, seed_override: int | None = None):
|
|
186
|
+
"""Instantiate a ``Gaggle`` according to ``config``."""
|
|
187
|
+
|
|
188
|
+
from .zoo import Gaggle # Imported lazily to avoid circular dependencies
|
|
189
|
+
|
|
190
|
+
seed = seed_override if seed_override is not None else config.seed
|
|
191
|
+
if seed is None:
|
|
192
|
+
seed = DEFAULT_ATTACK_SEED
|
|
193
|
+
|
|
194
|
+
return Gaggle(config.glitchlings, seed=seed)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _load_yaml(text: str, label: str) -> Any:
|
|
198
|
+
try:
|
|
199
|
+
return yaml.safe_load(text)
|
|
200
|
+
except yaml.YAMLError as exc:
|
|
201
|
+
raise ValueError(f"Failed to parse attack configuration '{label}': {exc}") from exc
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _build_glitchling(entry: Any, source: str, index: int):
|
|
205
|
+
from .zoo import get_glitchling_class, parse_glitchling_spec
|
|
206
|
+
|
|
207
|
+
if isinstance(entry, str):
|
|
208
|
+
try:
|
|
209
|
+
return parse_glitchling_spec(entry)
|
|
210
|
+
except ValueError as exc:
|
|
211
|
+
raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
|
|
212
|
+
|
|
213
|
+
if isinstance(entry, Mapping):
|
|
214
|
+
name_value = entry.get("name", entry.get("type"))
|
|
215
|
+
if not isinstance(name_value, str) or not name_value.strip():
|
|
216
|
+
raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
|
|
217
|
+
|
|
218
|
+
parameters = entry.get("parameters")
|
|
219
|
+
if parameters is not None:
|
|
220
|
+
if not isinstance(parameters, Mapping):
|
|
221
|
+
raise ValueError(f"{source}: glitchling '{name_value}' parameters must be a mapping.")
|
|
222
|
+
kwargs = dict(parameters)
|
|
223
|
+
else:
|
|
224
|
+
kwargs = {
|
|
225
|
+
key: value
|
|
226
|
+
for key, value in entry.items()
|
|
227
|
+
if key not in {"name", "type", "parameters"}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
glitchling_type = get_glitchling_class(name_value)
|
|
232
|
+
except ValueError as exc:
|
|
233
|
+
raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
return glitchling_type(**kwargs)
|
|
237
|
+
except TypeError as exc:
|
|
238
|
+
raise ValueError(
|
|
239
|
+
f"{source}: glitchling #{index}: failed to instantiate '{name_value}': {exc}"
|
|
240
|
+
) from exc
|
|
241
|
+
|
|
242
|
+
raise ValueError(f"{source}: glitchling #{index} must be a string or mapping.")
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
__all__ = [
|
|
246
|
+
"AttackConfig",
|
|
247
|
+
"DEFAULT_ATTACK_SEED",
|
|
248
|
+
"DEFAULT_CONFIG_PATH",
|
|
249
|
+
"DEFAULT_LEXICON_PRIORITY",
|
|
250
|
+
"RuntimeConfig",
|
|
251
|
+
"LexiconConfig",
|
|
252
|
+
"build_gaggle",
|
|
253
|
+
"get_config",
|
|
254
|
+
"load_attack_config",
|
|
255
|
+
"parse_attack_config",
|
|
256
|
+
"reload_config",
|
|
257
|
+
"reset_config",
|
|
258
|
+
]
|
glitchlings/config.toml
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""Lexicon abstractions and default backend resolution helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from hashlib import blake2s
|
|
7
|
+
import random
|
|
8
|
+
from typing import Callable, Iterable
|
|
9
|
+
|
|
10
|
+
from glitchlings.config import get_config
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Lexicon(ABC):
|
|
14
|
+
"""Abstract interface describing synonym lookup backends.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
seed:
|
|
19
|
+
Optional integer used to derive deterministic random number generators
|
|
20
|
+
for synonym sampling. Identical seeds guarantee reproducible results for
|
|
21
|
+
the same word/part-of-speech queries.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, *, seed: int | None = None) -> None:
|
|
25
|
+
self._seed = seed
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def seed(self) -> int | None:
|
|
29
|
+
"""Return the current base seed used for deterministic sampling."""
|
|
30
|
+
|
|
31
|
+
return self._seed
|
|
32
|
+
|
|
33
|
+
def reseed(self, seed: int | None) -> None:
|
|
34
|
+
"""Update the base seed driving deterministic synonym sampling."""
|
|
35
|
+
|
|
36
|
+
self._seed = seed
|
|
37
|
+
|
|
38
|
+
def _derive_rng(self, word: str, pos: str | None) -> random.Random:
|
|
39
|
+
"""Return an RNG derived from the base seed, word, and POS tag."""
|
|
40
|
+
|
|
41
|
+
seed_material = blake2s(digest_size=8)
|
|
42
|
+
seed_material.update(word.lower().encode("utf8"))
|
|
43
|
+
if pos is not None:
|
|
44
|
+
seed_material.update(pos.lower().encode("utf8"))
|
|
45
|
+
seed_repr = "None" if self._seed is None else str(self._seed)
|
|
46
|
+
seed_material.update(seed_repr.encode("utf8"))
|
|
47
|
+
derived_seed = int.from_bytes(seed_material.digest(), "big", signed=False)
|
|
48
|
+
return random.Random(derived_seed)
|
|
49
|
+
|
|
50
|
+
def _deterministic_sample(
|
|
51
|
+
self, values: Iterable[str], *, limit: int, word: str, pos: str | None
|
|
52
|
+
) -> list[str]:
|
|
53
|
+
"""Return up to ``limit`` values sampled deterministically."""
|
|
54
|
+
|
|
55
|
+
if limit <= 0:
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
items = list(values)
|
|
59
|
+
if len(items) <= limit:
|
|
60
|
+
return items
|
|
61
|
+
|
|
62
|
+
rng = self._derive_rng(word, pos)
|
|
63
|
+
indices = rng.sample(range(len(items)), k=limit)
|
|
64
|
+
indices.sort()
|
|
65
|
+
return [items[index] for index in indices]
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def get_synonyms(
|
|
69
|
+
self, word: str, pos: str | None = None, n: int = 5
|
|
70
|
+
) -> list[str]:
|
|
71
|
+
"""Return up to ``n`` synonyms for ``word`` constrained by ``pos``."""
|
|
72
|
+
|
|
73
|
+
def supports_pos(self, pos: str | None) -> bool:
|
|
74
|
+
"""Return ``True`` when the backend can service ``pos`` queries."""
|
|
75
|
+
|
|
76
|
+
return True
|
|
77
|
+
|
|
78
|
+
def __repr__(self) -> str: # pragma: no cover - trivial representation
|
|
79
|
+
return f"{self.__class__.__name__}(seed={self._seed!r})"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
from .graph import GraphLexicon
|
|
83
|
+
from .metrics import (
|
|
84
|
+
compare_lexicons,
|
|
85
|
+
coverage_ratio,
|
|
86
|
+
mean_cosine_similarity,
|
|
87
|
+
synonym_diversity,
|
|
88
|
+
)
|
|
89
|
+
from .vector import VectorLexicon, build_vector_cache
|
|
90
|
+
|
|
91
|
+
try: # pragma: no cover - optional dependency
|
|
92
|
+
from .wordnet import WordNetLexicon
|
|
93
|
+
except Exception: # pragma: no cover - triggered when nltk unavailable
|
|
94
|
+
WordNetLexicon = None # type: ignore[assignment]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
_BACKEND_FACTORIES: dict[str, Callable[[int | None], Lexicon | None]] = {}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def register_backend(
|
|
101
|
+
name: str, factory: Callable[[int | None], Lexicon | None]
|
|
102
|
+
) -> None:
|
|
103
|
+
"""Register ``factory`` for ``name`` so it can be selected via config."""
|
|
104
|
+
|
|
105
|
+
normalized = name.lower()
|
|
106
|
+
_BACKEND_FACTORIES[normalized] = factory
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def unregister_backend(name: str) -> None:
|
|
110
|
+
"""Remove a previously registered backend."""
|
|
111
|
+
|
|
112
|
+
_BACKEND_FACTORIES.pop(name.lower(), None)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def available_backends() -> list[str]:
|
|
116
|
+
"""Return the names of registered lexicon factories."""
|
|
117
|
+
|
|
118
|
+
return sorted(_BACKEND_FACTORIES)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _vector_backend(seed: int | None) -> Lexicon | None:
|
|
122
|
+
config = get_config()
|
|
123
|
+
cache_path = config.lexicon.vector_cache
|
|
124
|
+
if cache_path is None:
|
|
125
|
+
return None
|
|
126
|
+
if not cache_path.exists():
|
|
127
|
+
return None
|
|
128
|
+
return VectorLexicon(cache_path=cache_path, seed=seed)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _graph_backend(seed: int | None) -> Lexicon | None:
|
|
132
|
+
config = get_config()
|
|
133
|
+
cache_path = config.lexicon.graph_cache
|
|
134
|
+
if cache_path is None:
|
|
135
|
+
return None
|
|
136
|
+
if not cache_path.exists():
|
|
137
|
+
return None
|
|
138
|
+
return GraphLexicon(cache_path=cache_path, seed=seed)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _wordnet_backend(seed: int | None) -> Lexicon | None: # pragma: no cover - optional
|
|
142
|
+
if WordNetLexicon is None:
|
|
143
|
+
return None
|
|
144
|
+
try:
|
|
145
|
+
lexicon = WordNetLexicon(seed=seed)
|
|
146
|
+
except RuntimeError:
|
|
147
|
+
return None
|
|
148
|
+
return lexicon
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
register_backend("vector", _vector_backend)
|
|
152
|
+
register_backend("graph", _graph_backend)
|
|
153
|
+
register_backend("wordnet", _wordnet_backend)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def get_default_lexicon(seed: int | None = None) -> Lexicon:
|
|
157
|
+
"""Return the first available lexicon according to configuration priority."""
|
|
158
|
+
|
|
159
|
+
config = get_config()
|
|
160
|
+
attempts: list[str] = []
|
|
161
|
+
for name in config.lexicon.priority:
|
|
162
|
+
factory = _BACKEND_FACTORIES.get(name.lower())
|
|
163
|
+
if factory is None:
|
|
164
|
+
attempts.append(f"{name} (unknown)")
|
|
165
|
+
continue
|
|
166
|
+
lexicon = factory(seed)
|
|
167
|
+
if lexicon is not None:
|
|
168
|
+
return lexicon
|
|
169
|
+
attempts.append(f"{name} (unavailable)")
|
|
170
|
+
attempted = ", ".join(attempts) or "<none>"
|
|
171
|
+
raise RuntimeError(
|
|
172
|
+
"No lexicon backends available; configure lexicon.priority with at least one "
|
|
173
|
+
f"working backend. Attempts: {attempted}."
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
__all__ = [
|
|
178
|
+
"Lexicon",
|
|
179
|
+
"VectorLexicon",
|
|
180
|
+
"GraphLexicon",
|
|
181
|
+
"WordNetLexicon",
|
|
182
|
+
"build_vector_cache",
|
|
183
|
+
"compare_lexicons",
|
|
184
|
+
"coverage_ratio",
|
|
185
|
+
"mean_cosine_similarity",
|
|
186
|
+
"synonym_diversity",
|
|
187
|
+
"get_default_lexicon",
|
|
188
|
+
"register_backend",
|
|
189
|
+
"unregister_backend",
|
|
190
|
+
"available_backends",
|
|
191
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"sing": ["croon", "warble", "chant", "serenade"],
|
|
3
|
+
"happy": ["cheerful", "joyful", "contented", "gleeful"],
|
|
4
|
+
"songs": ["tunes", "melodies", "ballads", "airs"],
|
|
5
|
+
"quickly": ["rapidly", "swiftly", "speedily", "promptly"],
|
|
6
|
+
"text": ["passage", "excerpt", "phrase", "content"],
|
|
7
|
+
"words": ["terms", "phrases", "lexemes", "expressions"],
|
|
8
|
+
"alpha": ["beta", "gamma", "delta"],
|
|
9
|
+
"beta": ["alpha", "gamma", "delta"],
|
|
10
|
+
"gamma": ["alpha", "beta", "delta"],
|
|
11
|
+
"delta": ["alpha", "beta", "gamma"],
|
|
12
|
+
"they": ["these people", "those folks", "those individuals"],
|
|
13
|
+
"quick": ["rapid", "swift", "brisk", "prompt"],
|
|
14
|
+
"fast": ["rapid", "swift", "quick", "speedy"],
|
|
15
|
+
"slow": ["sluggish", "lethargic", "unhurried", "deliberate"]
|
|
16
|
+
}
|