glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. glitchlings/__init__.py +36 -17
  2. glitchlings/__main__.py +0 -1
  3. glitchlings/_zoo_rust/__init__.py +12 -0
  4. glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/pipeline_assets.json +29 -0
  17. glitchlings/attack/__init__.py +53 -0
  18. glitchlings/attack/compose.py +299 -0
  19. glitchlings/attack/core.py +465 -0
  20. glitchlings/attack/encode.py +114 -0
  21. glitchlings/attack/metrics.py +104 -0
  22. glitchlings/attack/metrics_dispatch.py +70 -0
  23. glitchlings/attack/tokenization.py +157 -0
  24. glitchlings/auggie.py +283 -0
  25. glitchlings/compat/__init__.py +9 -0
  26. glitchlings/compat/loaders.py +355 -0
  27. glitchlings/compat/types.py +41 -0
  28. glitchlings/conf/__init__.py +41 -0
  29. glitchlings/conf/loaders.py +331 -0
  30. glitchlings/conf/schema.py +156 -0
  31. glitchlings/conf/types.py +72 -0
  32. glitchlings/config.toml +2 -0
  33. glitchlings/constants.py +59 -0
  34. glitchlings/dev/__init__.py +3 -0
  35. glitchlings/dev/docs.py +45 -0
  36. glitchlings/dlc/__init__.py +17 -3
  37. glitchlings/dlc/_shared.py +296 -0
  38. glitchlings/dlc/gutenberg.py +400 -0
  39. glitchlings/dlc/huggingface.py +37 -65
  40. glitchlings/dlc/prime.py +55 -114
  41. glitchlings/dlc/pytorch.py +98 -0
  42. glitchlings/dlc/pytorch_lightning.py +173 -0
  43. glitchlings/internal/__init__.py +16 -0
  44. glitchlings/internal/rust.py +159 -0
  45. glitchlings/internal/rust_ffi.py +432 -0
  46. glitchlings/main.py +123 -32
  47. glitchlings/runtime_config.py +24 -0
  48. glitchlings/util/__init__.py +29 -176
  49. glitchlings/util/adapters.py +65 -0
  50. glitchlings/util/keyboards.py +311 -0
  51. glitchlings/util/transcripts.py +108 -0
  52. glitchlings/zoo/__init__.py +47 -24
  53. glitchlings/zoo/assets/__init__.py +29 -0
  54. glitchlings/zoo/core.py +301 -167
  55. glitchlings/zoo/core_execution.py +98 -0
  56. glitchlings/zoo/core_planning.py +451 -0
  57. glitchlings/zoo/corrupt_dispatch.py +295 -0
  58. glitchlings/zoo/ekkokin.py +118 -0
  59. glitchlings/zoo/hokey.py +137 -0
  60. glitchlings/zoo/jargoyle.py +179 -274
  61. glitchlings/zoo/mim1c.py +106 -68
  62. glitchlings/zoo/pedant/__init__.py +107 -0
  63. glitchlings/zoo/pedant/core.py +105 -0
  64. glitchlings/zoo/pedant/forms.py +74 -0
  65. glitchlings/zoo/pedant/stones.py +74 -0
  66. glitchlings/zoo/redactyl.py +44 -175
  67. glitchlings/zoo/rng.py +259 -0
  68. glitchlings/zoo/rushmore.py +359 -116
  69. glitchlings/zoo/scannequin.py +18 -125
  70. glitchlings/zoo/transforms.py +386 -0
  71. glitchlings/zoo/typogre.py +76 -162
  72. glitchlings/zoo/validation.py +477 -0
  73. glitchlings/zoo/zeedub.py +33 -86
  74. glitchlings-0.9.3.dist-info/METADATA +334 -0
  75. glitchlings-0.9.3.dist-info/RECORD +80 -0
  76. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
  77. glitchlings/zoo/_ocr_confusions.py +0 -34
  78. glitchlings/zoo/_rate.py +0 -21
  79. glitchlings/zoo/reduple.py +0 -169
  80. glitchlings-0.2.5.dist-info/METADATA +0 -490
  81. glitchlings-0.2.5.dist-info/RECORD +0 -27
  82. /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
  83. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
  84. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
  85. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0
@@ -1,198 +1,59 @@
1
- import re
2
1
  import random
3
- from typing import Any
2
+ from typing import cast
4
3
 
5
- from .core import Glitchling, AttackWave
6
- from ._rate import resolve_rate
4
+ from glitchlings.constants import DEFAULT_REDACTYL_CHAR, DEFAULT_REDACTYL_RATE
5
+ from glitchlings.internal.rust_ffi import redact_words_rust, resolve_seed
7
6
 
8
- FULL_BLOCK = "█"
7
+ from .core import AttackWave, Glitchling, PipelineOperationPayload
9
8
 
10
-
11
- try:
12
- from glitchlings._zoo_rust import redact_words as _redact_words_rust
13
- except ImportError: # pragma: no cover - compiled extension not present
14
- _redact_words_rust = None
15
-
16
-
17
- def _weighted_sample_without_replacement(
18
- population: list[int],
19
- weights: list[float],
20
- *,
21
- k: int,
22
- rng: random.Random,
23
- ) -> list[int]:
24
- """Select `k` unique indices according to the given weights."""
25
-
26
- selections: list[int] = []
27
- items = list(zip(population, weights))
28
- if k <= 0 or not items:
29
- return selections
30
- if k > len(items):
31
- raise ValueError("Sample larger than population or is negative")
32
-
33
- for _ in range(k):
34
- total_weight = sum(weight for _, weight in items)
35
- if total_weight <= 0:
36
- chosen_index = rng.randrange(len(items))
37
- else:
38
- threshold = rng.random() * total_weight
39
- cumulative = 0.0
40
- chosen_index = len(items) - 1
41
- for idx, (_, weight) in enumerate(items):
42
- cumulative += weight
43
- if cumulative >= threshold:
44
- chosen_index = idx
45
- break
46
- value, _ = items.pop(chosen_index)
47
- selections.append(value)
48
-
49
- return selections
50
-
51
-
52
- def _python_redact_words(
53
- text: str,
54
- *,
55
- replacement_char: str,
56
- rate: float,
57
- merge_adjacent: bool,
58
- rng: random.Random,
59
- unweighted: bool = False,
60
- ) -> str:
61
- """Redact random words by replacing their characters.
62
-
63
- Parameters
64
- - text: Input text.
65
- - replacement_char: The character to use for redaction (default FULL_BLOCK).
66
- - rate: Max proportion of words to redact (default 0.05).
67
- - merge_adjacent: If True, merges adjacent redactions across intervening non-word chars.
68
- - rng: RNG used for sampling decisions.
69
- - unweighted: When True, sample words uniformly instead of by length.
70
- """
71
- # Preserve exact spacing and punctuation by using regex
72
- tokens = re.split(r"(\s+)", text)
73
- word_indices = [i for i, token in enumerate(tokens) if i % 2 == 0 and token.strip()]
74
- if not word_indices:
75
- raise ValueError(
76
- "Cannot redact words because the input text contains no redactable words."
77
- )
78
- weights: list[float] = []
79
- for index in word_indices:
80
- word = tokens[index]
81
- match = re.match(r"^(\W*)(.*?)(\W*)$", word)
82
- core = match.group(2) if match else word
83
- core_length = len(core) if core else len(word)
84
- if core_length <= 0:
85
- core_length = len(word.strip()) or len(word)
86
- if core_length <= 0:
87
- core_length = 1
88
- weights.append(1.0 if unweighted else float(core_length))
89
- num_to_redact = max(1, int(len(word_indices) * rate))
90
- if num_to_redact > len(word_indices):
91
- raise ValueError("Sample larger than population or is negative")
92
- indices_to_redact = _weighted_sample_without_replacement(
93
- word_indices,
94
- weights,
95
- k=num_to_redact,
96
- rng=rng,
97
- )
98
- indices_to_redact.sort()
99
-
100
- for i in indices_to_redact:
101
- if i >= len(tokens):
102
- break
103
-
104
- word = tokens[i]
105
- if not word or word.isspace(): # Skip empty or whitespace
106
- continue
107
-
108
- # Check if word has trailing punctuation
109
- match = re.match(r"^(\W*)(.*?)(\W*)$", word)
110
- if match:
111
- prefix, core, suffix = match.groups()
112
- tokens[i] = f"{prefix}{replacement_char * len(core)}{suffix}"
113
- else:
114
- tokens[i] = f"{replacement_char * len(word)}"
115
-
116
- text = "".join(tokens)
117
-
118
- if merge_adjacent:
119
- text = re.sub(
120
- rf"{replacement_char}\W+{replacement_char}",
121
- lambda m: replacement_char * (len(m.group(0)) - 1),
122
- text,
123
- )
124
-
125
- return text
9
+ # Backwards compatibility alias
10
+ FULL_BLOCK = DEFAULT_REDACTYL_CHAR
126
11
 
127
12
 
128
13
  def redact_words(
129
14
  text: str,
130
- replacement_char: str = FULL_BLOCK,
15
+ replacement_char: str | None = DEFAULT_REDACTYL_CHAR,
131
16
  rate: float | None = None,
132
- merge_adjacent: bool = False,
17
+ merge_adjacent: bool | None = False,
133
18
  seed: int = 151,
134
19
  rng: random.Random | None = None,
135
20
  *,
136
- redaction_rate: float | None = None,
137
21
  unweighted: bool = False,
138
22
  ) -> str:
139
23
  """Redact random words by replacing their characters."""
24
+ effective_rate = DEFAULT_REDACTYL_RATE if rate is None else rate
140
25
 
141
- effective_rate = resolve_rate(
142
- rate=rate,
143
- legacy_value=redaction_rate,
144
- default=0.025,
145
- legacy_name="redaction_rate",
146
- )
147
-
148
- if rng is None:
149
- rng = random.Random(seed)
26
+ replacement = DEFAULT_REDACTYL_CHAR if replacement_char is None else str(replacement_char)
27
+ merge = False if merge_adjacent is None else bool(merge_adjacent)
150
28
 
151
- clamped_rate = max(0.0, effective_rate)
29
+ clamped_rate = max(0.0, min(effective_rate, 1.0))
152
30
  unweighted_flag = bool(unweighted)
153
31
 
154
- use_rust = _redact_words_rust is not None and isinstance(merge_adjacent, bool)
155
-
156
- if use_rust:
157
- return _redact_words_rust(
158
- text,
159
- replacement_char,
160
- clamped_rate,
161
- merge_adjacent,
162
- unweighted_flag,
163
- rng,
164
- )
165
-
166
- return _python_redact_words(
32
+ return redact_words_rust(
167
33
  text,
168
- replacement_char=replacement_char,
169
- rate=clamped_rate,
170
- merge_adjacent=merge_adjacent,
171
- rng=rng,
172
- unweighted=unweighted_flag,
34
+ replacement,
35
+ clamped_rate,
36
+ merge,
37
+ unweighted_flag,
38
+ resolve_seed(seed, rng),
173
39
  )
174
40
 
175
41
 
176
42
  class Redactyl(Glitchling):
177
43
  """Glitchling that redacts words with block characters."""
178
44
 
45
+ flavor = "Some things are better left ████████."
46
+
179
47
  def __init__(
180
48
  self,
181
49
  *,
182
- replacement_char: str = FULL_BLOCK,
50
+ replacement_char: str = DEFAULT_REDACTYL_CHAR,
183
51
  rate: float | None = None,
184
- redaction_rate: float | None = None,
185
52
  merge_adjacent: bool = False,
186
53
  seed: int = 151,
187
54
  unweighted: bool = False,
188
55
  ) -> None:
189
- self._param_aliases = {"redaction_rate": "rate"}
190
- effective_rate = resolve_rate(
191
- rate=rate,
192
- legacy_value=redaction_rate,
193
- default=0.025,
194
- legacy_name="redaction_rate",
195
- )
56
+ effective_rate = DEFAULT_REDACTYL_RATE if rate is None else rate
196
57
  super().__init__(
197
58
  name="Redactyl",
198
59
  corruption_function=redact_words,
@@ -204,23 +65,31 @@ class Redactyl(Glitchling):
204
65
  unweighted=unweighted,
205
66
  )
206
67
 
207
- def pipeline_operation(self) -> dict[str, Any] | None:
208
- replacement_char = self.kwargs.get("replacement_char")
209
- rate = self.kwargs.get("rate")
210
- merge_adjacent = self.kwargs.get("merge_adjacent")
211
- if replacement_char is None or rate is None or merge_adjacent is None:
212
- return None
68
+ def pipeline_operation(self) -> PipelineOperationPayload:
69
+ replacement_char_value = self.kwargs.get("replacement_char", DEFAULT_REDACTYL_CHAR)
70
+ rate_value = self.kwargs.get("rate", DEFAULT_REDACTYL_RATE)
71
+ merge_value = self.kwargs.get("merge_adjacent", False)
72
+
73
+ replacement_char = str(
74
+ DEFAULT_REDACTYL_CHAR if replacement_char_value is None else replacement_char_value
75
+ )
76
+ rate = float(DEFAULT_REDACTYL_RATE if rate_value is None else rate_value)
77
+ merge_adjacent = bool(merge_value)
213
78
  unweighted = bool(self.kwargs.get("unweighted", False))
214
- return {
215
- "type": "redact",
216
- "replacement_char": str(replacement_char),
217
- "redaction_rate": float(rate),
218
- "merge_adjacent": bool(merge_adjacent),
219
- "unweighted": unweighted,
220
- }
79
+
80
+ return cast(
81
+ PipelineOperationPayload,
82
+ {
83
+ "type": "redact",
84
+ "replacement_char": replacement_char,
85
+ "rate": rate,
86
+ "merge_adjacent": merge_adjacent,
87
+ "unweighted": unweighted,
88
+ },
89
+ )
221
90
 
222
91
 
223
92
  redactyl = Redactyl()
224
93
 
225
94
 
226
- __all__ = ["Redactyl", "redactyl"]
95
+ __all__ = ["Redactyl", "redactyl", "redact_words"]
glitchlings/zoo/rng.py ADDED
@@ -0,0 +1,259 @@
1
+ """RNG boundary layer for seed resolution.
2
+
3
+ This module provides the interface between RNG state and concrete random values.
4
+ All randomness in the glitchlings library flows through these functions.
5
+
6
+ Design Philosophy
7
+ -----------------
8
+ RNG management is an *impure* operation - it involves stateful objects
9
+ (random.Random) and non-deterministic behavior when no seed is provided.
10
+ This module provides the boundary layer that converts RNG state into
11
+ concrete values that can be passed to pure functions.
12
+
13
+ The pattern is:
14
+ 1. User provides `seed: int | None` and/or `rng: random.Random | None`
15
+ 2. Boundary layer resolves to a concrete `int` via `resolve_seed()`
16
+ 3. Pure/Rust functions receive the concrete seed value
17
+
18
+ This separation means:
19
+ - Pure transformation code never touches RNG objects
20
+ - Tests can provide explicit seed values for reproducibility
21
+ - RNG state management is isolated to the boundary
22
+
23
+ See AGENTS.md "Functional Purity Architecture" for full details.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import random
29
+ from hashlib import blake2s
30
+ from typing import Protocol, runtime_checkable
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Constants
34
+ # ---------------------------------------------------------------------------
35
+
36
+ # Bit width for seed values (64-bit for compatibility with Rust u64)
37
+ SEED_BIT_WIDTH = 64
38
+ SEED_MASK = (1 << SEED_BIT_WIDTH) - 1 # 0xFFFFFFFFFFFFFFFF
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # Protocols
43
+ # ---------------------------------------------------------------------------
44
+
45
+
46
+ @runtime_checkable
47
+ class RandomBitsSource(Protocol):
48
+ """Protocol for objects that can provide random bits."""
49
+
50
+ def getrandbits(self, k: int) -> int:
51
+ """Return a non-negative integer with k random bits."""
52
+ ...
53
+
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # Core Boundary Functions
57
+ # ---------------------------------------------------------------------------
58
+
59
+
60
+ def resolve_seed(
61
+ seed: int | None,
62
+ rng: random.Random | None,
63
+ ) -> int:
64
+ """Resolve a seed from optional explicit seed or RNG state.
65
+
66
+ This is the primary boundary function for RNG resolution. Call this
67
+ once at the boundary layer, then pass the resulting int to all
68
+ downstream pure/Rust functions.
69
+
70
+ Args:
71
+ seed: Explicit seed value. If provided, takes precedence over rng.
72
+ rng: Random generator to sample from if seed is None.
73
+
74
+ Returns:
75
+ A 64-bit unsigned integer suitable for Rust FFI.
76
+
77
+ Note:
78
+ If both seed and rng are None, uses module-level random state.
79
+ This is non-deterministic and should only happen at top-level CLI usage.
80
+
81
+ Examples:
82
+ >>> resolve_seed(42, None) # explicit seed
83
+ 42
84
+ >>> rng = random.Random(123)
85
+ >>> resolve_seed(None, rng) # sample from RNG
86
+ 14522756016584210807
87
+ """
88
+ if seed is not None:
89
+ return int(seed) & SEED_MASK
90
+ if rng is not None:
91
+ return rng.getrandbits(SEED_BIT_WIDTH)
92
+ return random.getrandbits(SEED_BIT_WIDTH)
93
+
94
+
95
+ def resolve_seed_deterministic(
96
+ seed: int | None,
97
+ rng: random.Random | None,
98
+ ) -> int:
99
+ """Resolve a seed, requiring explicit seed or RNG.
100
+
101
+ Like resolve_seed(), but raises ValueError if both seed and rng are None.
102
+ Use this when non-deterministic behavior would be a bug.
103
+
104
+ Args:
105
+ seed: Explicit seed value.
106
+ rng: Random generator to sample from.
107
+
108
+ Returns:
109
+ A 64-bit unsigned integer.
110
+
111
+ Raises:
112
+ ValueError: If both seed and rng are None.
113
+ """
114
+ if seed is not None:
115
+ return int(seed) & SEED_MASK
116
+ if rng is not None:
117
+ return rng.getrandbits(SEED_BIT_WIDTH)
118
+ raise ValueError("Either seed or rng must be provided for deterministic behavior")
119
+
120
+
121
+ # ---------------------------------------------------------------------------
122
+ # Seed Derivation (Deterministic)
123
+ # ---------------------------------------------------------------------------
124
+
125
+
126
+ def derive_seed(base_seed: int, *components: int | str) -> int:
127
+ """Derive a new seed from a base seed and components.
128
+
129
+ This is a pure function for hierarchical seed derivation.
130
+ Used by Gaggle to give each glitchling a unique but reproducible seed.
131
+
132
+ Uses blake2s for stable hashing across interpreter runs (unlike Python's
133
+ built-in hash() which is salted per-process). This ensures identical
134
+ inputs always produce identical seeds regardless of PYTHONHASHSEED.
135
+
136
+ Args:
137
+ base_seed: The parent seed.
138
+ *components: Additional components to mix in (integers or strings).
139
+
140
+ Returns:
141
+ A derived 64-bit seed.
142
+
143
+ Examples:
144
+ >>> derive_seed(12345, 0) # first child
145
+ 13704458811836263874
146
+ >>> derive_seed(12345, 1) # second child
147
+ 7874335407589182396
148
+ >>> derive_seed(12345, "typogre") # named child
149
+ 561509252352425601
150
+ """
151
+ # Use blake2s for stable, deterministic hashing across runs
152
+ hasher = blake2s(digest_size=8)
153
+
154
+ # Helper to convert int to bytes (handles arbitrary size)
155
+ def _int_to_bytes(value: int) -> bytes:
156
+ if value == 0:
157
+ return b"\x00"
158
+ abs_value = abs(value)
159
+ length = (abs_value.bit_length() + 7) // 8
160
+ if value < 0:
161
+ while True:
162
+ try:
163
+ return value.to_bytes(length, "big", signed=True)
164
+ except OverflowError:
165
+ length += 1
166
+ return abs_value.to_bytes(length, "big", signed=False)
167
+
168
+ hasher.update(_int_to_bytes(base_seed))
169
+ for component in components:
170
+ hasher.update(b"\x00") # separator
171
+ if isinstance(component, str):
172
+ hasher.update(component.encode("utf-8"))
173
+ else:
174
+ hasher.update(_int_to_bytes(component))
175
+
176
+ return int.from_bytes(hasher.digest(), "big")
177
+
178
+
179
+ # ---------------------------------------------------------------------------
180
+ # Random Value Generation (Impure)
181
+ # ---------------------------------------------------------------------------
182
+
183
+
184
+ def create_rng(seed: int) -> random.Random:
185
+ """Create a new Random instance from a seed.
186
+
187
+ Use this when you need to create child RNG states for parallel operations.
188
+ Prefer passing concrete seed values to functions when possible.
189
+
190
+ Args:
191
+ seed: The seed for the new RNG.
192
+
193
+ Returns:
194
+ A new random.Random instance.
195
+ """
196
+ return random.Random(seed)
197
+
198
+
199
+ def sample_random_float(rng: random.Random) -> float:
200
+ """Sample a random float in [0.0, 1.0) from an RNG.
201
+
202
+ Args:
203
+ rng: The random generator.
204
+
205
+ Returns:
206
+ Float in range [0.0, 1.0).
207
+ """
208
+ return rng.random()
209
+
210
+
211
+ def sample_random_int(rng: random.Random, *, low: int, high: int) -> int:
212
+ """Sample a random integer in [low, high] inclusive.
213
+
214
+ Args:
215
+ rng: The random generator.
216
+ low: Minimum value (inclusive).
217
+ high: Maximum value (inclusive).
218
+
219
+ Returns:
220
+ Random integer in range [low, high].
221
+ """
222
+ return rng.randint(low, high)
223
+
224
+
225
+ def sample_random_index(rng: random.Random, length: int) -> int:
226
+ """Sample a random index for a sequence of given length.
227
+
228
+ Args:
229
+ rng: The random generator.
230
+ length: The sequence length.
231
+
232
+ Returns:
233
+ Random index in range [0, length).
234
+
235
+ Raises:
236
+ ValueError: If length <= 0.
237
+ """
238
+ if length <= 0:
239
+ raise ValueError("Cannot sample index from empty sequence")
240
+ return rng.randrange(length)
241
+
242
+
243
+ __all__ = [
244
+ # Constants
245
+ "SEED_BIT_WIDTH",
246
+ "SEED_MASK",
247
+ # Protocols
248
+ "RandomBitsSource",
249
+ # Boundary functions
250
+ "resolve_seed",
251
+ "resolve_seed_deterministic",
252
+ # Derivation
253
+ "derive_seed",
254
+ # RNG operations (impure)
255
+ "create_rng",
256
+ "sample_random_float",
257
+ "sample_random_int",
258
+ "sample_random_index",
259
+ ]