glitchlings 0.3.0__cp310-cp310-macosx_11_0_universal2.whl → 0.4.0__cp310-cp310-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

glitchlings/__init__.py CHANGED
@@ -21,6 +21,7 @@ from .zoo import (
21
21
  Gaggle,
22
22
  summon,
23
23
  )
24
+ from .config import AttackConfig, build_gaggle, load_attack_config
24
25
  from .util import SAMPLE_TEXT
25
26
 
26
27
 
@@ -47,4 +48,7 @@ __all__ = [
47
48
  "Glitchling",
48
49
  "Gaggle",
49
50
  "SAMPLE_TEXT",
51
+ "AttackConfig",
52
+ "build_gaggle",
53
+ "load_attack_config",
50
54
  ]
Binary file
glitchlings/config.py ADDED
@@ -0,0 +1,258 @@
1
+ """Configuration utilities for runtime behaviour and declarative attack setups."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass, field
7
+ from io import TextIOBase
8
+ from pathlib import Path
9
+ from typing import Any, Mapping, Sequence, TYPE_CHECKING
10
+
11
+ try: # Python 3.11+
12
+ import tomllib
13
+ except ModuleNotFoundError: # pragma: no cover - Python < 3.11
14
+ import tomli as tomllib # type: ignore[no-redef]
15
+
16
+ import yaml
17
+
18
+
19
+ if TYPE_CHECKING: # pragma: no cover - typing only
20
+ from .zoo import Glitchling
21
+
22
+
23
+ CONFIG_ENV_VAR = "GLITCHLINGS_CONFIG"
24
+ DEFAULT_CONFIG_PATH = Path(__file__).with_name("config.toml")
25
+ DEFAULT_LEXICON_PRIORITY = ["vector", "graph", "wordnet"]
26
+ DEFAULT_ATTACK_SEED = 151
27
+
28
+
29
+ @dataclass(slots=True)
30
+ class LexiconConfig:
31
+ """Lexicon-specific configuration section."""
32
+
33
+ priority: list[str] = field(default_factory=lambda: list(DEFAULT_LEXICON_PRIORITY))
34
+ vector_cache: Path | None = None
35
+ graph_cache: Path | None = None
36
+
37
+
38
+ @dataclass(slots=True)
39
+ class RuntimeConfig:
40
+ """Top-level runtime configuration loaded from ``config.toml``."""
41
+
42
+ lexicon: LexiconConfig
43
+ path: Path
44
+
45
+
46
+ _CONFIG: RuntimeConfig | None = None
47
+
48
+
49
+ def reset_config() -> None:
50
+ """Forget any cached runtime configuration."""
51
+
52
+ global _CONFIG
53
+ _CONFIG = None
54
+
55
+
56
+ def reload_config() -> RuntimeConfig:
57
+ """Reload the runtime configuration from disk."""
58
+
59
+ reset_config()
60
+ return get_config()
61
+
62
+
63
+ def get_config() -> RuntimeConfig:
64
+ """Return the cached runtime configuration, loading it if necessary."""
65
+
66
+ global _CONFIG
67
+ if _CONFIG is None:
68
+ _CONFIG = _load_runtime_config()
69
+ return _CONFIG
70
+
71
+
72
+ def _load_runtime_config() -> RuntimeConfig:
73
+ path = _resolve_config_path()
74
+ data = _read_toml(path)
75
+ lexicon_section = data.get("lexicon", {})
76
+
77
+ priority = lexicon_section.get("priority", DEFAULT_LEXICON_PRIORITY)
78
+ if not isinstance(priority, Sequence) or isinstance(priority, (str, bytes)):
79
+ raise ValueError("lexicon.priority must be a sequence of strings.")
80
+ normalized_priority = [str(item) for item in priority]
81
+
82
+ vector_cache = _resolve_optional_path(
83
+ lexicon_section.get("vector_cache"),
84
+ base=path.parent,
85
+ )
86
+ graph_cache = _resolve_optional_path(
87
+ lexicon_section.get("graph_cache"),
88
+ base=path.parent,
89
+ )
90
+
91
+ lexicon_config = LexiconConfig(
92
+ priority=normalized_priority,
93
+ vector_cache=vector_cache,
94
+ graph_cache=graph_cache,
95
+ )
96
+
97
+ return RuntimeConfig(lexicon=lexicon_config, path=path)
98
+
99
+
100
+ def _resolve_config_path() -> Path:
101
+ override = os.environ.get(CONFIG_ENV_VAR)
102
+ if override:
103
+ return Path(override)
104
+ return DEFAULT_CONFIG_PATH
105
+
106
+
107
+ def _read_toml(path: Path) -> dict[str, Any]:
108
+ if not path.exists():
109
+ if path == DEFAULT_CONFIG_PATH:
110
+ return {}
111
+ raise FileNotFoundError(f"Configuration file '{path}' not found.")
112
+ with path.open("rb") as handle:
113
+ return tomllib.load(handle)
114
+
115
+
116
+ def _resolve_optional_path(value: Any, *, base: Path) -> Path | None:
117
+ if value in (None, ""):
118
+ return None
119
+
120
+ candidate = Path(str(value))
121
+ if not candidate.is_absolute():
122
+ candidate = (base / candidate).resolve()
123
+ return candidate
124
+
125
+
126
+ @dataclass(slots=True)
127
+ class AttackConfig:
128
+ """Structured representation of a glitchling roster loaded from YAML."""
129
+
130
+ glitchlings: list["Glitchling"]
131
+ seed: int | None = None
132
+
133
+
134
+ def load_attack_config(
135
+ source: str | Path | TextIOBase,
136
+ *,
137
+ encoding: str = "utf-8",
138
+ ) -> AttackConfig:
139
+ """Load and parse an attack configuration from YAML."""
140
+
141
+ if isinstance(source, (str, Path)):
142
+ path = Path(source)
143
+ label = str(path)
144
+ try:
145
+ text = path.read_text(encoding=encoding)
146
+ except FileNotFoundError as exc:
147
+ raise ValueError(f"Attack configuration '{label}' was not found.") from exc
148
+ elif isinstance(source, TextIOBase):
149
+ label = getattr(source, "name", "<stream>")
150
+ text = source.read()
151
+ else:
152
+ raise TypeError("Attack configuration source must be a path or text stream.")
153
+
154
+ data = _load_yaml(text, label)
155
+ return parse_attack_config(data, source=label)
156
+
157
+
158
+ def parse_attack_config(data: Any, *, source: str = "<config>") -> AttackConfig:
159
+ """Convert arbitrary YAML data into a validated ``AttackConfig``."""
160
+
161
+ if data is None:
162
+ raise ValueError(f"Attack configuration '{source}' is empty.")
163
+
164
+ if not isinstance(data, Mapping):
165
+ raise ValueError(f"Attack configuration '{source}' must be a mapping.")
166
+
167
+ raw_glitchlings = data.get("glitchlings")
168
+ if raw_glitchlings is None:
169
+ raise ValueError(f"Attack configuration '{source}' must define 'glitchlings'.")
170
+
171
+ if not isinstance(raw_glitchlings, Sequence) or isinstance(raw_glitchlings, (str, bytes)):
172
+ raise ValueError(f"'glitchlings' in '{source}' must be a sequence.")
173
+
174
+ glitchlings: list["Glitchling"] = []
175
+ for index, entry in enumerate(raw_glitchlings, start=1):
176
+ glitchlings.append(_build_glitchling(entry, source, index))
177
+
178
+ seed = data.get("seed")
179
+ if seed is not None and not isinstance(seed, int):
180
+ raise ValueError(f"Seed in '{source}' must be an integer if provided.")
181
+
182
+ return AttackConfig(glitchlings=glitchlings, seed=seed)
183
+
184
+
185
+ def build_gaggle(config: AttackConfig, *, seed_override: int | None = None):
186
+ """Instantiate a ``Gaggle`` according to ``config``."""
187
+
188
+ from .zoo import Gaggle # Imported lazily to avoid circular dependencies
189
+
190
+ seed = seed_override if seed_override is not None else config.seed
191
+ if seed is None:
192
+ seed = DEFAULT_ATTACK_SEED
193
+
194
+ return Gaggle(config.glitchlings, seed=seed)
195
+
196
+
197
+ def _load_yaml(text: str, label: str) -> Any:
198
+ try:
199
+ return yaml.safe_load(text)
200
+ except yaml.YAMLError as exc:
201
+ raise ValueError(f"Failed to parse attack configuration '{label}': {exc}") from exc
202
+
203
+
204
+ def _build_glitchling(entry: Any, source: str, index: int):
205
+ from .zoo import get_glitchling_class, parse_glitchling_spec
206
+
207
+ if isinstance(entry, str):
208
+ try:
209
+ return parse_glitchling_spec(entry)
210
+ except ValueError as exc:
211
+ raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
212
+
213
+ if isinstance(entry, Mapping):
214
+ name_value = entry.get("name", entry.get("type"))
215
+ if not isinstance(name_value, str) or not name_value.strip():
216
+ raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
217
+
218
+ parameters = entry.get("parameters")
219
+ if parameters is not None:
220
+ if not isinstance(parameters, Mapping):
221
+ raise ValueError(f"{source}: glitchling '{name_value}' parameters must be a mapping.")
222
+ kwargs = dict(parameters)
223
+ else:
224
+ kwargs = {
225
+ key: value
226
+ for key, value in entry.items()
227
+ if key not in {"name", "type", "parameters"}
228
+ }
229
+
230
+ try:
231
+ glitchling_type = get_glitchling_class(name_value)
232
+ except ValueError as exc:
233
+ raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
234
+
235
+ try:
236
+ return glitchling_type(**kwargs)
237
+ except TypeError as exc:
238
+ raise ValueError(
239
+ f"{source}: glitchling #{index}: failed to instantiate '{name_value}': {exc}"
240
+ ) from exc
241
+
242
+ raise ValueError(f"{source}: glitchling #{index} must be a string or mapping.")
243
+
244
+
245
+ __all__ = [
246
+ "AttackConfig",
247
+ "DEFAULT_ATTACK_SEED",
248
+ "DEFAULT_CONFIG_PATH",
249
+ "DEFAULT_LEXICON_PRIORITY",
250
+ "RuntimeConfig",
251
+ "LexiconConfig",
252
+ "build_gaggle",
253
+ "get_config",
254
+ "load_attack_config",
255
+ "parse_attack_config",
256
+ "reload_config",
257
+ "reset_config",
258
+ ]
@@ -0,0 +1,3 @@
1
+ [lexicon]
2
+ priority = ["vector", "graph", "wordnet"]
3
+ vector_cache = "lexicon/data/default_vector_cache.json"
@@ -0,0 +1,191 @@
1
+ """Lexicon abstractions and default backend resolution helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from hashlib import blake2s
7
+ import random
8
+ from typing import Callable, Iterable
9
+
10
+ from glitchlings.config import get_config
11
+
12
+
13
+ class Lexicon(ABC):
14
+ """Abstract interface describing synonym lookup backends.
15
+
16
+ Parameters
17
+ ----------
18
+ seed:
19
+ Optional integer used to derive deterministic random number generators
20
+ for synonym sampling. Identical seeds guarantee reproducible results for
21
+ the same word/part-of-speech queries.
22
+ """
23
+
24
+ def __init__(self, *, seed: int | None = None) -> None:
25
+ self._seed = seed
26
+
27
+ @property
28
+ def seed(self) -> int | None:
29
+ """Return the current base seed used for deterministic sampling."""
30
+
31
+ return self._seed
32
+
33
+ def reseed(self, seed: int | None) -> None:
34
+ """Update the base seed driving deterministic synonym sampling."""
35
+
36
+ self._seed = seed
37
+
38
+ def _derive_rng(self, word: str, pos: str | None) -> random.Random:
39
+ """Return an RNG derived from the base seed, word, and POS tag."""
40
+
41
+ seed_material = blake2s(digest_size=8)
42
+ seed_material.update(word.lower().encode("utf8"))
43
+ if pos is not None:
44
+ seed_material.update(pos.lower().encode("utf8"))
45
+ seed_repr = "None" if self._seed is None else str(self._seed)
46
+ seed_material.update(seed_repr.encode("utf8"))
47
+ derived_seed = int.from_bytes(seed_material.digest(), "big", signed=False)
48
+ return random.Random(derived_seed)
49
+
50
+ def _deterministic_sample(
51
+ self, values: Iterable[str], *, limit: int, word: str, pos: str | None
52
+ ) -> list[str]:
53
+ """Return up to ``limit`` values sampled deterministically."""
54
+
55
+ if limit <= 0:
56
+ return []
57
+
58
+ items = list(values)
59
+ if len(items) <= limit:
60
+ return items
61
+
62
+ rng = self._derive_rng(word, pos)
63
+ indices = rng.sample(range(len(items)), k=limit)
64
+ indices.sort()
65
+ return [items[index] for index in indices]
66
+
67
+ @abstractmethod
68
+ def get_synonyms(
69
+ self, word: str, pos: str | None = None, n: int = 5
70
+ ) -> list[str]:
71
+ """Return up to ``n`` synonyms for ``word`` constrained by ``pos``."""
72
+
73
+ def supports_pos(self, pos: str | None) -> bool:
74
+ """Return ``True`` when the backend can service ``pos`` queries."""
75
+
76
+ return True
77
+
78
+ def __repr__(self) -> str: # pragma: no cover - trivial representation
79
+ return f"{self.__class__.__name__}(seed={self._seed!r})"
80
+
81
+
82
+ from .graph import GraphLexicon
83
+ from .metrics import (
84
+ compare_lexicons,
85
+ coverage_ratio,
86
+ mean_cosine_similarity,
87
+ synonym_diversity,
88
+ )
89
+ from .vector import VectorLexicon, build_vector_cache
90
+
91
+ try: # pragma: no cover - optional dependency
92
+ from .wordnet import WordNetLexicon
93
+ except Exception: # pragma: no cover - triggered when nltk unavailable
94
+ WordNetLexicon = None # type: ignore[assignment]
95
+
96
+
97
+ _BACKEND_FACTORIES: dict[str, Callable[[int | None], Lexicon | None]] = {}
98
+
99
+
100
+ def register_backend(
101
+ name: str, factory: Callable[[int | None], Lexicon | None]
102
+ ) -> None:
103
+ """Register ``factory`` for ``name`` so it can be selected via config."""
104
+
105
+ normalized = name.lower()
106
+ _BACKEND_FACTORIES[normalized] = factory
107
+
108
+
109
+ def unregister_backend(name: str) -> None:
110
+ """Remove a previously registered backend."""
111
+
112
+ _BACKEND_FACTORIES.pop(name.lower(), None)
113
+
114
+
115
+ def available_backends() -> list[str]:
116
+ """Return the names of registered lexicon factories."""
117
+
118
+ return sorted(_BACKEND_FACTORIES)
119
+
120
+
121
+ def _vector_backend(seed: int | None) -> Lexicon | None:
122
+ config = get_config()
123
+ cache_path = config.lexicon.vector_cache
124
+ if cache_path is None:
125
+ return None
126
+ if not cache_path.exists():
127
+ return None
128
+ return VectorLexicon(cache_path=cache_path, seed=seed)
129
+
130
+
131
+ def _graph_backend(seed: int | None) -> Lexicon | None:
132
+ config = get_config()
133
+ cache_path = config.lexicon.graph_cache
134
+ if cache_path is None:
135
+ return None
136
+ if not cache_path.exists():
137
+ return None
138
+ return GraphLexicon(cache_path=cache_path, seed=seed)
139
+
140
+
141
+ def _wordnet_backend(seed: int | None) -> Lexicon | None: # pragma: no cover - optional
142
+ if WordNetLexicon is None:
143
+ return None
144
+ try:
145
+ lexicon = WordNetLexicon(seed=seed)
146
+ except RuntimeError:
147
+ return None
148
+ return lexicon
149
+
150
+
151
+ register_backend("vector", _vector_backend)
152
+ register_backend("graph", _graph_backend)
153
+ register_backend("wordnet", _wordnet_backend)
154
+
155
+
156
+ def get_default_lexicon(seed: int | None = None) -> Lexicon:
157
+ """Return the first available lexicon according to configuration priority."""
158
+
159
+ config = get_config()
160
+ attempts: list[str] = []
161
+ for name in config.lexicon.priority:
162
+ factory = _BACKEND_FACTORIES.get(name.lower())
163
+ if factory is None:
164
+ attempts.append(f"{name} (unknown)")
165
+ continue
166
+ lexicon = factory(seed)
167
+ if lexicon is not None:
168
+ return lexicon
169
+ attempts.append(f"{name} (unavailable)")
170
+ attempted = ", ".join(attempts) or "<none>"
171
+ raise RuntimeError(
172
+ "No lexicon backends available; configure lexicon.priority with at least one "
173
+ f"working backend. Attempts: {attempted}."
174
+ )
175
+
176
+
177
+ __all__ = [
178
+ "Lexicon",
179
+ "VectorLexicon",
180
+ "GraphLexicon",
181
+ "WordNetLexicon",
182
+ "build_vector_cache",
183
+ "compare_lexicons",
184
+ "coverage_ratio",
185
+ "mean_cosine_similarity",
186
+ "synonym_diversity",
187
+ "get_default_lexicon",
188
+ "register_backend",
189
+ "unregister_backend",
190
+ "available_backends",
191
+ ]
@@ -0,0 +1,16 @@
1
+ {
2
+ "sing": ["croon", "warble", "chant", "serenade"],
3
+ "happy": ["cheerful", "joyful", "contented", "gleeful"],
4
+ "songs": ["tunes", "melodies", "ballads", "airs"],
5
+ "quickly": ["rapidly", "swiftly", "speedily", "promptly"],
6
+ "text": ["passage", "excerpt", "phrase", "content"],
7
+ "words": ["terms", "phrases", "lexemes", "expressions"],
8
+ "alpha": ["beta", "gamma", "delta"],
9
+ "beta": ["alpha", "gamma", "delta"],
10
+ "gamma": ["alpha", "beta", "delta"],
11
+ "delta": ["alpha", "beta", "gamma"],
12
+ "they": ["these people", "those folks", "those individuals"],
13
+ "quick": ["rapid", "swift", "brisk", "prompt"],
14
+ "fast": ["rapid", "swift", "quick", "speedy"],
15
+ "slow": ["sluggish", "lethargic", "unhurried", "deliberate"]
16
+ }