glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (83) hide show
  1. glitchlings/__init__.py +99 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust/__init__.py +12 -0
  4. glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +147 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +493 -0
  21. glitchlings/attack/core_execution.py +367 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +218 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +227 -0
  27. glitchlings/auggie.py +284 -0
  28. glitchlings/compat/__init__.py +9 -0
  29. glitchlings/compat/loaders.py +355 -0
  30. glitchlings/compat/types.py +41 -0
  31. glitchlings/conf/__init__.py +41 -0
  32. glitchlings/conf/loaders.py +331 -0
  33. glitchlings/conf/schema.py +156 -0
  34. glitchlings/conf/types.py +72 -0
  35. glitchlings/config.toml +2 -0
  36. glitchlings/constants.py +59 -0
  37. glitchlings/dev/__init__.py +3 -0
  38. glitchlings/dev/docs.py +45 -0
  39. glitchlings/dlc/__init__.py +19 -0
  40. glitchlings/dlc/_shared.py +296 -0
  41. glitchlings/dlc/gutenberg.py +400 -0
  42. glitchlings/dlc/huggingface.py +68 -0
  43. glitchlings/dlc/prime.py +215 -0
  44. glitchlings/dlc/pytorch.py +98 -0
  45. glitchlings/dlc/pytorch_lightning.py +173 -0
  46. glitchlings/internal/__init__.py +16 -0
  47. glitchlings/internal/rust.py +159 -0
  48. glitchlings/internal/rust_ffi.py +490 -0
  49. glitchlings/main.py +426 -0
  50. glitchlings/protocols.py +91 -0
  51. glitchlings/runtime_config.py +24 -0
  52. glitchlings/util/__init__.py +27 -0
  53. glitchlings/util/adapters.py +65 -0
  54. glitchlings/util/keyboards.py +356 -0
  55. glitchlings/util/transcripts.py +108 -0
  56. glitchlings/zoo/__init__.py +161 -0
  57. glitchlings/zoo/assets/__init__.py +29 -0
  58. glitchlings/zoo/core.py +678 -0
  59. glitchlings/zoo/core_execution.py +154 -0
  60. glitchlings/zoo/core_planning.py +451 -0
  61. glitchlings/zoo/corrupt_dispatch.py +295 -0
  62. glitchlings/zoo/hokey.py +139 -0
  63. glitchlings/zoo/jargoyle.py +243 -0
  64. glitchlings/zoo/mim1c.py +148 -0
  65. glitchlings/zoo/pedant/__init__.py +109 -0
  66. glitchlings/zoo/pedant/core.py +105 -0
  67. glitchlings/zoo/pedant/forms.py +74 -0
  68. glitchlings/zoo/pedant/stones.py +74 -0
  69. glitchlings/zoo/redactyl.py +97 -0
  70. glitchlings/zoo/rng.py +259 -0
  71. glitchlings/zoo/rushmore.py +416 -0
  72. glitchlings/zoo/scannequin.py +66 -0
  73. glitchlings/zoo/transforms.py +346 -0
  74. glitchlings/zoo/typogre.py +128 -0
  75. glitchlings/zoo/validation.py +477 -0
  76. glitchlings/zoo/wherewolf.py +120 -0
  77. glitchlings/zoo/zeedub.py +93 -0
  78. glitchlings-0.10.2.dist-info/METADATA +337 -0
  79. glitchlings-0.10.2.dist-info/RECORD +83 -0
  80. glitchlings-0.10.2.dist-info/WHEEL +5 -0
  81. glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
  82. glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
  83. glitchlings-0.10.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,367 @@
1
+ """Impure execution dispatch for Attack orchestration.
2
+
3
+ This module handles the actual execution of attack plans, including
4
+ tokenizer resolution, glitchling invocation, and metric computation.
5
+ It is the impure counterpart to core_planning.py.
6
+
7
+ **Design Philosophy:**
8
+
9
+ This module is explicitly *impure* - it resolves tokenizers, invokes
10
+ glitchling corruption functions, and calls Rust metrics. All impure
11
+ operations for Attack execution flow through this module.
12
+
13
+ The separation allows:
14
+ - Pure planning logic to be tested without dependencies
15
+ - Clear boundaries between plan construction and execution
16
+ - Mocking execution for integration tests
17
+
18
+ See AGENTS.md "Functional Purity Architecture" for full details.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from collections.abc import Mapping, Sequence
24
+ from typing import TYPE_CHECKING, Any, cast
25
+
26
+ from ..util.adapters import coerce_gaggle
27
+ from ..util.transcripts import Transcript, is_transcript
28
+ from .core_planning import (
29
+ AttackPlan,
30
+ BatchAdapter,
31
+ EncodedData,
32
+ ResultPlan,
33
+ assemble_empty_result_fields,
34
+ assemble_result_fields,
35
+ extract_transcript_contents,
36
+ )
37
+ from .encode import encode_batch
38
+ from .metrics import (
39
+ Metric,
40
+ entropy_delta,
41
+ jensen_shannon_divergence,
42
+ merge_split_index,
43
+ normalized_edit_distance,
44
+ subsequence_retention,
45
+ )
46
+ from .tokenization import Tokenizer
47
+
48
+ if TYPE_CHECKING:
49
+ from ..protocols import Corruptor
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Default Metrics
54
+ # ---------------------------------------------------------------------------
55
+
56
+
57
+ def get_default_metrics() -> dict[str, Metric]:
58
+ """Return the default set of metrics for Attack.
59
+
60
+ Returns:
61
+ Dictionary mapping metric names to metric functions.
62
+ """
63
+ return {
64
+ "jensen_shannon_divergence": jensen_shannon_divergence,
65
+ "normalized_edit_distance": normalized_edit_distance,
66
+ "subsequence_retention": subsequence_retention,
67
+ "entropy_delta": entropy_delta,
68
+ "merge_split_index": merge_split_index,
69
+ }
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Glitchling Resolution
74
+ # ---------------------------------------------------------------------------
75
+
76
+
77
+ def resolve_glitchlings(
78
+ glitchlings: "Corruptor | str | Sequence[str | Corruptor]",
79
+ *,
80
+ seed: int | None,
81
+ transcript_target: Any = None,
82
+ ) -> "Corruptor":
83
+ """Resolve glitchling specification into a Gaggle.
84
+
85
+ This impure function clones glitchlings and coerces them into a
86
+ Gaggle with the specified seed.
87
+
88
+ Args:
89
+ glitchlings: Glitchling specification.
90
+ seed: Master seed for the gaggle. If None, uses DEFAULT_ATTACK_SEED.
91
+ transcript_target: Which transcript turns to corrupt.
92
+
93
+ Returns:
94
+ A Gaggle instance ready for corruption.
95
+ """
96
+ from ..conf import DEFAULT_ATTACK_SEED
97
+ from ..protocols import Corruptor as CorruptorProtocol
98
+
99
+ effective_seed = seed if seed is not None else DEFAULT_ATTACK_SEED
100
+
101
+ # Clone to avoid mutating caller-owned objects
102
+ cloned: Any
103
+ if isinstance(glitchlings, CorruptorProtocol):
104
+ cloned = glitchlings.clone()
105
+ elif isinstance(glitchlings, str):
106
+ cloned = glitchlings
107
+ elif isinstance(glitchlings, Sequence):
108
+ cloned_list: list[str | Corruptor] = []
109
+ for entry in glitchlings:
110
+ if isinstance(entry, CorruptorProtocol):
111
+ cloned_list.append(entry.clone())
112
+ else:
113
+ cloned_list.append(entry)
114
+ cloned = cloned_list
115
+ else:
116
+ cloned = glitchlings
117
+
118
+ return coerce_gaggle(
119
+ cloned,
120
+ seed=effective_seed,
121
+ apply_seed_to_existing=True,
122
+ transcript_target=transcript_target,
123
+ )
124
+
125
+
126
+ # ---------------------------------------------------------------------------
127
+ # Corruption Execution
128
+ # ---------------------------------------------------------------------------
129
+
130
+
131
+ def execute_corruption(
132
+ gaggle: "Corruptor",
133
+ plan: AttackPlan,
134
+ original_container: str | Transcript | Sequence[str],
135
+ ) -> tuple[str | Transcript | Sequence[str], list[str]]:
136
+ """Execute corruption according to the attack plan.
137
+
138
+ Args:
139
+ gaggle: The glitchling(s) to use for corruption.
140
+ plan: The attack execution plan.
141
+ original_container: The original input container.
142
+
143
+ Returns:
144
+ Tuple of (corrupted_container, corrupted_contents).
145
+
146
+ Raises:
147
+ TypeError: If output type doesn't match input type.
148
+ """
149
+ if plan.input_type == "batch":
150
+ original_batch = list(cast(Sequence[str], original_container))
151
+ corrupted_batch: list[str] = []
152
+ for entry in original_batch:
153
+ corrupted = gaggle.corrupt(entry)
154
+ if not isinstance(corrupted, str):
155
+ raise TypeError(
156
+ f"Attack expected str output for batch items, got {type(corrupted).__name__}"
157
+ )
158
+ corrupted_batch.append(corrupted)
159
+ return corrupted_batch, corrupted_batch
160
+
161
+ if plan.input_type == "transcript":
162
+ corrupted_transcript = gaggle.corrupt(cast(Transcript, original_container))
163
+ if not is_transcript(corrupted_transcript):
164
+ raise ValueError(
165
+ f"Attack expected transcript output for transcript input, "
166
+ f"got {type(corrupted_transcript).__name__}"
167
+ )
168
+ corrupted_contents = extract_transcript_contents(
169
+ cast(Sequence[Mapping[str, Any]], corrupted_transcript)
170
+ )
171
+ return corrupted_transcript, corrupted_contents
172
+
173
+ # Single string
174
+ corrupted = gaggle.corrupt(cast(str, original_container))
175
+ if not isinstance(corrupted, str):
176
+ raise TypeError(
177
+ f"Attack expected str output for string input, got {type(corrupted).__name__}"
178
+ )
179
+ return corrupted, [corrupted]
180
+
181
+
182
+ # ---------------------------------------------------------------------------
183
+ # Tokenization Execution
184
+ # ---------------------------------------------------------------------------
185
+
186
+
187
+ def execute_tokenization(
188
+ tokenizer: Tokenizer,
189
+ contents: list[str],
190
+ ) -> EncodedData:
191
+ """Execute tokenization on content strings.
192
+
193
+ Args:
194
+ tokenizer: Resolved tokenizer instance.
195
+ contents: List of strings to tokenize.
196
+
197
+ Returns:
198
+ EncodedData with tokens and token IDs.
199
+ """
200
+ if not contents:
201
+ return EncodedData(tokens=[], token_ids=[])
202
+
203
+ batched_tokens, batched_ids = encode_batch(tokenizer, contents)
204
+ return EncodedData(tokens=batched_tokens, token_ids=batched_ids)
205
+
206
+
207
+ # ---------------------------------------------------------------------------
208
+ # Metric Execution
209
+ # ---------------------------------------------------------------------------
210
+
211
+
212
+ def execute_metrics(
213
+ metrics: dict[str, Metric],
214
+ input_tokens: list[list[str]],
215
+ output_tokens: list[list[str]],
216
+ ) -> dict[str, list[float]]:
217
+ """Execute metric computation on batched tokens.
218
+
219
+ All inputs are processed as batches internally. Use BatchAdapter
220
+ to unwrap results for single-item inputs.
221
+
222
+ Args:
223
+ metrics: Dictionary of metric functions.
224
+ input_tokens: Original tokens (always batched 2D list).
225
+ output_tokens: Corrupted tokens (always batched 2D list).
226
+
227
+ Returns:
228
+ Dictionary of computed metric values (always as lists).
229
+ """
230
+ computed: dict[str, list[float]] = {}
231
+ for name, metric_fn in metrics.items():
232
+ result = metric_fn(input_tokens, output_tokens)
233
+ # Ensure result is always a list
234
+ if isinstance(result, list):
235
+ computed[name] = result
236
+ else:
237
+ computed[name] = [result]
238
+
239
+ return computed
240
+
241
+
242
+ # ---------------------------------------------------------------------------
243
+ # Full Attack Execution
244
+ # ---------------------------------------------------------------------------
245
+
246
+
247
+ def execute_attack(
248
+ gaggle: "Corruptor",
249
+ tokenizer: Tokenizer,
250
+ metrics: dict[str, Metric],
251
+ plan: AttackPlan,
252
+ result_plan: ResultPlan,
253
+ original_container: str | Transcript | Sequence[str],
254
+ ) -> dict[str, object]:
255
+ """Execute a complete attack and return result fields.
256
+
257
+ This function orchestrates the full attack execution:
258
+ 1. Create batch adapter for uniform processing
259
+ 2. Execute corruption
260
+ 3. Tokenize original and corrupted content (always as batch)
261
+ 4. Compute metrics (always as batch)
262
+ 5. Assemble result fields (adapter unwraps as needed)
263
+
264
+ Args:
265
+ gaggle: Glitchling(s) for corruption.
266
+ tokenizer: Resolved tokenizer.
267
+ metrics: Metric functions.
268
+ plan: Attack execution plan.
269
+ result_plan: Result assembly plan.
270
+ original_container: Original input container.
271
+
272
+ Returns:
273
+ Dictionary of fields for AttackResult construction.
274
+ """
275
+ # Handle empty input
276
+ if plan.is_empty:
277
+ return assemble_empty_result_fields(
278
+ original=original_container,
279
+ corrupted=original_container,
280
+ tokenizer_info=result_plan.tokenizer_info,
281
+ metric_names=result_plan.metric_names,
282
+ )
283
+
284
+ # Create batch adapter for uniform processing
285
+ adapter = BatchAdapter.from_plan(plan)
286
+
287
+ # Execute corruption
288
+ corrupted_container, corrupted_contents = execute_corruption(gaggle, plan, original_container)
289
+
290
+ # Tokenize (always returns batched EncodedData)
291
+ input_encoded = execute_tokenization(tokenizer, plan.original_contents)
292
+ output_encoded = execute_tokenization(tokenizer, corrupted_contents)
293
+
294
+ # Compute metrics (always returns batched metrics)
295
+ batch_metrics = execute_metrics(
296
+ metrics,
297
+ input_encoded.tokens,
298
+ output_encoded.tokens,
299
+ )
300
+
301
+ # Assemble result (adapter handles unwrapping for single inputs)
302
+ return assemble_result_fields(
303
+ adapter=adapter,
304
+ original=original_container,
305
+ corrupted=corrupted_container,
306
+ input_encoded=input_encoded,
307
+ output_encoded=output_encoded,
308
+ tokenizer_info=result_plan.tokenizer_info,
309
+ metrics=batch_metrics,
310
+ )
311
+
312
+
313
+ # ---------------------------------------------------------------------------
314
+ # Comparison Execution
315
+ # ---------------------------------------------------------------------------
316
+
317
+
318
+ def execute_comparison_entry(
319
+ gaggle: "Corruptor",
320
+ tokenizer: Tokenizer,
321
+ tokenizer_info: str,
322
+ metrics: dict[str, Metric],
323
+ text: str | Transcript | Sequence[str],
324
+ ) -> tuple[str, dict[str, object]]:
325
+ """Execute a single comparison entry.
326
+
327
+ Args:
328
+ gaggle: Glitchling(s) for corruption.
329
+ tokenizer: Resolved tokenizer.
330
+ tokenizer_info: Tokenizer description.
331
+ metrics: Metric functions.
332
+ text: Input text.
333
+
334
+ Returns:
335
+ Tuple of (tokenizer_info, result_fields).
336
+ """
337
+ from .core_planning import plan_attack, plan_result
338
+
339
+ # Create plans
340
+ attack_plan = plan_attack(text)
341
+ result_plan = plan_result(attack_plan, list(metrics.keys()), tokenizer_info)
342
+
343
+ # Execute
344
+ fields = execute_attack(
345
+ gaggle,
346
+ tokenizer,
347
+ metrics,
348
+ attack_plan,
349
+ result_plan,
350
+ text,
351
+ )
352
+
353
+ return tokenizer_info, fields
354
+
355
+
356
+ __all__ = [
357
+ # Defaults
358
+ "get_default_metrics",
359
+ # Resolution
360
+ "resolve_glitchlings",
361
+ # Execution
362
+ "execute_corruption",
363
+ "execute_tokenization",
364
+ "execute_metrics",
365
+ "execute_attack",
366
+ "execute_comparison_entry",
367
+ ]