glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (83) hide show
  1. glitchlings/__init__.py +99 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust/__init__.py +12 -0
  4. glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +147 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +493 -0
  21. glitchlings/attack/core_execution.py +367 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +218 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +227 -0
  27. glitchlings/auggie.py +284 -0
  28. glitchlings/compat/__init__.py +9 -0
  29. glitchlings/compat/loaders.py +355 -0
  30. glitchlings/compat/types.py +41 -0
  31. glitchlings/conf/__init__.py +41 -0
  32. glitchlings/conf/loaders.py +331 -0
  33. glitchlings/conf/schema.py +156 -0
  34. glitchlings/conf/types.py +72 -0
  35. glitchlings/config.toml +2 -0
  36. glitchlings/constants.py +59 -0
  37. glitchlings/dev/__init__.py +3 -0
  38. glitchlings/dev/docs.py +45 -0
  39. glitchlings/dlc/__init__.py +19 -0
  40. glitchlings/dlc/_shared.py +296 -0
  41. glitchlings/dlc/gutenberg.py +400 -0
  42. glitchlings/dlc/huggingface.py +68 -0
  43. glitchlings/dlc/prime.py +215 -0
  44. glitchlings/dlc/pytorch.py +98 -0
  45. glitchlings/dlc/pytorch_lightning.py +173 -0
  46. glitchlings/internal/__init__.py +16 -0
  47. glitchlings/internal/rust.py +159 -0
  48. glitchlings/internal/rust_ffi.py +490 -0
  49. glitchlings/main.py +426 -0
  50. glitchlings/protocols.py +91 -0
  51. glitchlings/runtime_config.py +24 -0
  52. glitchlings/util/__init__.py +27 -0
  53. glitchlings/util/adapters.py +65 -0
  54. glitchlings/util/keyboards.py +356 -0
  55. glitchlings/util/transcripts.py +108 -0
  56. glitchlings/zoo/__init__.py +161 -0
  57. glitchlings/zoo/assets/__init__.py +29 -0
  58. glitchlings/zoo/core.py +678 -0
  59. glitchlings/zoo/core_execution.py +154 -0
  60. glitchlings/zoo/core_planning.py +451 -0
  61. glitchlings/zoo/corrupt_dispatch.py +295 -0
  62. glitchlings/zoo/hokey.py +139 -0
  63. glitchlings/zoo/jargoyle.py +243 -0
  64. glitchlings/zoo/mim1c.py +148 -0
  65. glitchlings/zoo/pedant/__init__.py +109 -0
  66. glitchlings/zoo/pedant/core.py +105 -0
  67. glitchlings/zoo/pedant/forms.py +74 -0
  68. glitchlings/zoo/pedant/stones.py +74 -0
  69. glitchlings/zoo/redactyl.py +97 -0
  70. glitchlings/zoo/rng.py +259 -0
  71. glitchlings/zoo/rushmore.py +416 -0
  72. glitchlings/zoo/scannequin.py +66 -0
  73. glitchlings/zoo/transforms.py +346 -0
  74. glitchlings/zoo/typogre.py +128 -0
  75. glitchlings/zoo/validation.py +477 -0
  76. glitchlings/zoo/wherewolf.py +120 -0
  77. glitchlings/zoo/zeedub.py +93 -0
  78. glitchlings-0.10.2.dist-info/METADATA +337 -0
  79. glitchlings-0.10.2.dist-info/RECORD +83 -0
  80. glitchlings-0.10.2.dist-info/WHEEL +5 -0
  81. glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
  82. glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
  83. glitchlings-0.10.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1321 @@
1
+ """Analysis tools for comparing tokenizers and exploring parameter spaces.
2
+
3
+ This module provides three analysis tools following the functional purity
4
+ architecture:
5
+
6
+ 1. **SeedSweep**: Run an attack across many seeds to collect aggregate metrics
7
+ 2. **GridSearch**: Search across parameter combinations to find optimal settings
8
+ 3. **TokenizerComparison**: Compare token streams and metrics across tokenizers
9
+
10
+ Module Structure
11
+ ----------------
12
+ **Pure Functions** (no side effects):
13
+ - ``compute_aggregate_stats()``: Statistical aggregation
14
+ - ``format_stats_summary()``: String formatting
15
+ - ``extract_scalar_metrics()``: Metric extraction
16
+ - ``generate_param_combinations()``: Grid generation
17
+ - ``rank_grid_points()``: Sorting by metric
18
+
19
+ **Pure Data Classes** (immutable results):
20
+ - ``SeedSweepResult``, ``GridSearchResult``, ``TokenizerComparisonResult``
21
+ - ``GridSearchPoint``, ``TokenizerComparisonEntry``
22
+
23
+ **Impure Orchestrators** (coordinate execution):
24
+ - ``SeedSweep``, ``GridSearch``, ``TokenizerComparison``
25
+
26
+ See AGENTS.md "Functional Purity Architecture" for full details.
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import statistics
32
+ from collections.abc import Iterable, Mapping, Sequence
33
+ from dataclasses import dataclass, field
34
+ from itertools import product
35
+ from typing import TYPE_CHECKING, Any, Callable
36
+
37
+ from .core import Attack, AttackResult
38
+ from .core_execution import resolve_glitchlings
39
+ from .encode import describe_tokenizer
40
+ from .tokenization import Tokenizer, resolve_tokenizer
41
+
42
+ if TYPE_CHECKING: # pragma: no cover - typing only
43
+ from ..protocols import Corruptor
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Pure Statistical Helpers
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ def compute_aggregate_stats(values: Sequence[float]) -> dict[str, float]:
52
+ """Compute aggregate statistics for a sequence of values (pure).
53
+
54
+ Args:
55
+ values: Sequence of float values to aggregate.
56
+
57
+ Returns:
58
+ Dictionary with mean, std, min, max, and median.
59
+ """
60
+ if not values:
61
+ return {"mean": 0.0, "std": 0.0, "min": 0.0, "max": 0.0, "median": 0.0}
62
+
63
+ values_list = list(values)
64
+ mean = statistics.mean(values_list)
65
+ std = statistics.stdev(values_list) if len(values_list) > 1 else 0.0
66
+ minimum = min(values_list)
67
+ maximum = max(values_list)
68
+ median = statistics.median(values_list)
69
+
70
+ return {
71
+ "mean": mean,
72
+ "std": std,
73
+ "min": minimum,
74
+ "max": maximum,
75
+ "median": median,
76
+ }
77
+
78
+
79
+ def format_stats_summary(stats: dict[str, float], precision: int = 4) -> str:
80
+ """Format aggregate statistics as a compact string (pure).
81
+
82
+ Args:
83
+ stats: Dictionary of statistic name to value.
84
+ precision: Decimal precision for formatting.
85
+
86
+ Returns:
87
+ Formatted string like "mean=0.1234 std=0.0123 min=0.0100 max=0.2000".
88
+ """
89
+ return " ".join(f"{key}={value:.{precision}f}" for key, value in stats.items())
90
+
91
+
92
+ def extract_scalar_metrics(
93
+ metrics: dict[str, float | list[float]],
94
+ ) -> dict[str, float]:
95
+ """Extract scalar metric values from potentially batched metrics (pure).
96
+
97
+ For list metrics, returns the first element. For scalar metrics,
98
+ returns the value unchanged.
99
+
100
+ Args:
101
+ metrics: Dictionary of metric names to values.
102
+
103
+ Returns:
104
+ Dictionary with all values as scalars.
105
+ """
106
+ return {
107
+ name: val if isinstance(val, float) else val[0] if val else 0.0
108
+ for name, val in metrics.items()
109
+ }
110
+
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # Pure Grid Search Helpers
114
+ # ---------------------------------------------------------------------------
115
+
116
+
117
+ def generate_param_combinations(
118
+ param_grid: dict[str, list[Any]],
119
+ ) -> list[dict[str, Any]]:
120
+ """Generate all combinations of parameters from a grid (pure).
121
+
122
+ Args:
123
+ param_grid: Dictionary mapping parameter names to value lists.
124
+
125
+ Returns:
126
+ List of dictionaries, each representing one parameter combination.
127
+ """
128
+ if not param_grid:
129
+ return [{}]
130
+
131
+ param_names = list(param_grid.keys())
132
+ param_values = [param_grid[name] for name in param_names]
133
+
134
+ combinations: list[dict[str, Any]] = []
135
+ for values in product(*param_values):
136
+ combo = dict(zip(param_names, values))
137
+ combinations.append(combo)
138
+
139
+ return combinations
140
+
141
+
142
+ def rank_grid_points(
143
+ points: list["GridSearchPoint"],
144
+ *,
145
+ rank_by: str,
146
+ minimize: bool = True,
147
+ ) -> list["GridSearchPoint"]:
148
+ """Sort grid points by a metric (pure).
149
+
150
+ Args:
151
+ points: List of grid search points to sort.
152
+ rank_by: Metric name to rank by.
153
+ minimize: If True, lower values rank first.
154
+
155
+ Returns:
156
+ Sorted list of points.
157
+ """
158
+ return sorted(
159
+ points,
160
+ key=lambda p: p.metrics.get(rank_by, float("inf") if minimize else float("-inf")),
161
+ reverse=not minimize,
162
+ )
163
+
164
+
165
+ # ---------------------------------------------------------------------------
166
+ # SeedSweep: Result and Orchestrator
167
+ # ---------------------------------------------------------------------------
168
+
169
+
170
+ @dataclass
171
+ class SeedSweepResult:
172
+ """Results from sweeping across multiple seeds (pure data class).
173
+
174
+ Attributes:
175
+ seeds: List of seeds that were tested.
176
+ text: The input text that was corrupted.
177
+ tokenizer_info: Description of the tokenizer used.
178
+ per_seed_results: Mapping from seed to AttackResult.
179
+ per_seed_metrics: Mapping from seed to scalar metrics dict.
180
+ aggregate_stats: Aggregated statistics per metric.
181
+ """
182
+
183
+ seeds: list[int]
184
+ text: str
185
+ tokenizer_info: str
186
+ per_seed_results: dict[int, AttackResult]
187
+ per_seed_metrics: dict[int, dict[str, float]]
188
+ aggregate_stats: dict[str, dict[str, float]]
189
+
190
+ def summary(self, *, show_seeds: int = 5) -> str:
191
+ """Generate a human-readable summary (pure formatting)."""
192
+ lines: list[str] = [
193
+ f"SeedSweep Results ({len(self.seeds)} seeds)",
194
+ f"Tokenizer: {self.tokenizer_info}",
195
+ f"Input text: {self.text[:50]}{'...' if len(self.text) > 50 else ''}",
196
+ "",
197
+ "Aggregate Statistics:",
198
+ ]
199
+
200
+ for metric_name, stats in self.aggregate_stats.items():
201
+ lines.append(f" {metric_name}:")
202
+ lines.append(f" {format_stats_summary(stats)}")
203
+
204
+ if show_seeds > 0:
205
+ lines.append("")
206
+ lines.append(f"Per-Seed Metrics (first {min(show_seeds, len(self.seeds))}):")
207
+ for seed in self.seeds[:show_seeds]:
208
+ metrics = self.per_seed_metrics[seed]
209
+ metric_strs = [f"{k}={v:.4f}" for k, v in metrics.items()]
210
+ lines.append(f" seed={seed}: {', '.join(metric_strs)}")
211
+ if len(self.seeds) > show_seeds:
212
+ lines.append(f" ... {len(self.seeds) - show_seeds} more seeds")
213
+
214
+ return "\n".join(lines)
215
+
216
+ def to_report(self) -> dict[str, object]:
217
+ """Convert to JSON-serializable dictionary (pure)."""
218
+ return {
219
+ "seeds": self.seeds,
220
+ "text": self.text,
221
+ "tokenizer": self.tokenizer_info,
222
+ "per_seed_metrics": self.per_seed_metrics,
223
+ "aggregate_stats": self.aggregate_stats,
224
+ }
225
+
226
+ def filter_by_metric(
227
+ self,
228
+ metric_name: str,
229
+ *,
230
+ min_value: float | None = None,
231
+ max_value: float | None = None,
232
+ ) -> dict[int, AttackResult]:
233
+ """Filter per-seed results by metric thresholds.
234
+
235
+ Args:
236
+ metric_name: Name of the metric to filter by.
237
+ min_value: Minimum metric value (inclusive).
238
+ max_value: Maximum metric value (inclusive).
239
+
240
+ Returns:
241
+ Dictionary mapping seeds to AttackResults that meet criteria.
242
+ """
243
+ results: dict[int, AttackResult] = {}
244
+ for seed in self.seeds:
245
+ metrics = self.per_seed_metrics.get(seed, {})
246
+ value = metrics.get(metric_name)
247
+ if value is None:
248
+ continue
249
+ if min_value is not None and value < min_value:
250
+ continue
251
+ if max_value is not None and value > max_value:
252
+ continue
253
+ results[seed] = self.per_seed_results[seed]
254
+ return results
255
+
256
+ def export_csv(
257
+ self,
258
+ filepath: str,
259
+ *,
260
+ metrics: Sequence[str] | None = None,
261
+ ) -> None:
262
+ """Export per-seed metrics to CSV.
263
+
264
+ Args:
265
+ filepath: Path to write the CSV file.
266
+ metrics: Specific metrics to include (None = all).
267
+ """
268
+ import csv
269
+
270
+ if not self.per_seed_metrics:
271
+ return
272
+
273
+ # Determine metrics to export
274
+ first_metrics = next(iter(self.per_seed_metrics.values()))
275
+ if metrics is None:
276
+ metric_names = list(first_metrics.keys())
277
+ else:
278
+ metric_names = list(metrics)
279
+
280
+ with open(filepath, "w", newline="") as f:
281
+ writer = csv.writer(f)
282
+ writer.writerow(["seed"] + metric_names)
283
+ for seed in self.seeds:
284
+ seed_metrics = self.per_seed_metrics.get(seed, {})
285
+ row = [seed] + [seed_metrics.get(m, "") for m in metric_names]
286
+ writer.writerow(row)
287
+
288
+ def to_dataframe(self) -> "Any":
289
+ """Convert to pandas DataFrame (requires pandas).
290
+
291
+ Returns:
292
+ DataFrame with seeds as index and metrics as columns.
293
+
294
+ Raises:
295
+ ImportError: If pandas is not installed.
296
+ """
297
+ try:
298
+ import pandas as pd
299
+ except ImportError as e:
300
+ raise ImportError(
301
+ "pandas is required for to_dataframe(). Install with: pip install pandas"
302
+ ) from e
303
+
304
+ return pd.DataFrame(self.per_seed_metrics).T
305
+
306
+
307
+ class SeedSweep:
308
+ """Sweep across multiple seeds to collect aggregate metrics (impure).
309
+
310
+ This orchestrator runs attacks across many seeds and computes
311
+ aggregate statistics (mean, std, min, max, median) for each metric.
312
+
313
+ Example:
314
+ >>> from glitchlings import Typogre
315
+ >>> sweep = SeedSweep(Typogre(rate=0.05), tokenizer='cl100k_base')
316
+ >>> result = sweep.run("Hello world", seeds=range(100))
317
+ >>> print(result.summary())
318
+ """
319
+
320
+ def __init__(
321
+ self,
322
+ glitchlings: "Corruptor | str | Sequence[str | Corruptor]",
323
+ tokenizer: str | Tokenizer | None = None,
324
+ metrics: Mapping[str, Callable[..., float | list[float]]] | None = None,
325
+ ) -> None:
326
+ """Initialize a SeedSweep analyzer.
327
+
328
+ Args:
329
+ glitchlings: Glitchling specification (same as Attack).
330
+ tokenizer: Tokenizer name or instance.
331
+ metrics: Optional custom metrics (defaults to Attack defaults).
332
+ """
333
+ self._glitchlings_spec = glitchlings
334
+ self._tokenizer_spec = tokenizer
335
+ self._metrics = metrics
336
+ # Impure: resolve tokenizer once
337
+ self._resolved_tokenizer = resolve_tokenizer(tokenizer)
338
+ self._tokenizer_info = describe_tokenizer(self._resolved_tokenizer, tokenizer)
339
+
340
+ def run(
341
+ self,
342
+ text: str,
343
+ seeds: Iterable[int],
344
+ *,
345
+ progress_callback: Callable[[list[tuple[int, AttackResult]]], None] | None = None,
346
+ early_stop: Callable[[int, AttackResult], bool] | None = None,
347
+ ) -> SeedSweepResult:
348
+ """Run the sweep across specified seeds (impure execution).
349
+
350
+ Args:
351
+ text: Input text to corrupt.
352
+ seeds: Iterable of seed values to test.
353
+ progress_callback: Optional callback receiving list of (seed, result)
354
+ pairs collected so far.
355
+ early_stop: Optional predicate receiving (seed, result). If it returns
356
+ True, the sweep stops early.
357
+
358
+ Returns:
359
+ SeedSweepResult with per-seed and aggregate statistics.
360
+ """
361
+ seeds_list = list(seeds)
362
+ per_seed_results: dict[int, AttackResult] = {}
363
+ per_seed_metrics: dict[int, dict[str, float]] = {}
364
+ completed: list[tuple[int, AttackResult]] = []
365
+
366
+ # Impure: run attacks for each seed
367
+ for seed in seeds_list:
368
+ attack = Attack(
369
+ self._glitchlings_spec,
370
+ tokenizer=self._resolved_tokenizer,
371
+ metrics=self._metrics,
372
+ seed=seed,
373
+ )
374
+ result = attack.run(text)
375
+ per_seed_results[seed] = result
376
+ # Pure: extract scalar metrics
377
+ per_seed_metrics[seed] = extract_scalar_metrics(result.metrics)
378
+
379
+ # Track progress
380
+ completed.append((seed, result))
381
+ if progress_callback is not None:
382
+ progress_callback(completed)
383
+
384
+ # Check early stopping
385
+ if early_stop is not None and early_stop(seed, result):
386
+ break
387
+
388
+ # Pure: compute aggregate statistics
389
+ aggregate_stats: dict[str, dict[str, float]] = {}
390
+ completed_seeds = [seed for seed, _ in completed]
391
+ if per_seed_metrics:
392
+ metric_names = list(next(iter(per_seed_metrics.values())).keys())
393
+ for metric_name in metric_names:
394
+ values = [per_seed_metrics[seed][metric_name] for seed in completed_seeds]
395
+ aggregate_stats[metric_name] = compute_aggregate_stats(values)
396
+
397
+ return SeedSweepResult(
398
+ seeds=completed_seeds,
399
+ text=text,
400
+ tokenizer_info=self._tokenizer_info,
401
+ per_seed_results=per_seed_results,
402
+ per_seed_metrics=per_seed_metrics,
403
+ aggregate_stats=aggregate_stats,
404
+ )
405
+
406
+
407
+ # ---------------------------------------------------------------------------
408
+ # GridSearch: Result and Orchestrator
409
+ # ---------------------------------------------------------------------------
410
+
411
+
412
+ @dataclass
413
+ class GridSearchPoint:
414
+ """A single point in the parameter grid (pure data class).
415
+
416
+ Attributes:
417
+ params: Dictionary of parameter name to value for this point.
418
+ result: The AttackResult from running with these parameters.
419
+ metrics: Extracted scalar metrics for easy comparison.
420
+ """
421
+
422
+ params: dict[str, Any]
423
+ result: AttackResult
424
+ metrics: dict[str, float]
425
+
426
+
427
+ @dataclass
428
+ class GridSearchResult:
429
+ """Results from a grid search (pure data class).
430
+
431
+ Attributes:
432
+ text: The input text that was corrupted.
433
+ tokenizer_info: Description of the tokenizer used.
434
+ param_grid: The parameter grid that was searched.
435
+ points: All evaluated grid points with results.
436
+ best_point: The point with the best metric value (if ranked).
437
+ ranking_metric: Name of the metric used for ranking.
438
+ ranking_minimize: Whether ranking minimized (True) or maximized.
439
+ """
440
+
441
+ text: str
442
+ tokenizer_info: str
443
+ param_grid: dict[str, list[Any]]
444
+ points: list[GridSearchPoint]
445
+ best_point: GridSearchPoint | None
446
+ ranking_metric: str | None
447
+ ranking_minimize: bool
448
+
449
+ def summary(self, *, show_top: int = 10) -> str:
450
+ """Generate a human-readable summary (pure formatting)."""
451
+ lines: list[str] = [
452
+ f"GridSearch Results ({len(self.points)} combinations)",
453
+ f"Tokenizer: {self.tokenizer_info}",
454
+ f"Input text: {self.text[:50]}{'...' if len(self.text) > 50 else ''}",
455
+ "",
456
+ "Parameter Grid:",
457
+ ]
458
+
459
+ for param_name, values in self.param_grid.items():
460
+ values_str = ", ".join(str(v) for v in values[:5])
461
+ if len(values) > 5:
462
+ values_str += f", ... ({len(values)} total)"
463
+ lines.append(f" {param_name}: [{values_str}]")
464
+
465
+ if self.best_point and self.ranking_metric:
466
+ direction = "minimizing" if self.ranking_minimize else "maximizing"
467
+ lines.append("")
468
+ lines.append(f"Best ({direction} {self.ranking_metric}):")
469
+ lines.append(f" params: {self.best_point.params}")
470
+ metric_val = self.best_point.metrics.get(self.ranking_metric, 0.0)
471
+ lines.append(f" {self.ranking_metric}: {metric_val:.4f}")
472
+
473
+ if show_top > 0 and self.ranking_metric:
474
+ lines.append("")
475
+ lines.append(f"Top {min(show_top, len(self.points))} Results:")
476
+ # Pure: use rank_grid_points helper
477
+ sorted_points = rank_grid_points(
478
+ self.points,
479
+ rank_by=self.ranking_metric,
480
+ minimize=self.ranking_minimize,
481
+ )
482
+ for i, point in enumerate(sorted_points[:show_top], 1):
483
+ metric_val = point.metrics.get(self.ranking_metric, 0.0)
484
+ lines.append(f" {i}. {point.params} -> {self.ranking_metric}={metric_val:.4f}")
485
+
486
+ return "\n".join(lines)
487
+
488
+ def to_report(self) -> dict[str, object]:
489
+ """Convert to JSON-serializable dictionary (pure)."""
490
+ return {
491
+ "text": self.text,
492
+ "tokenizer": self.tokenizer_info,
493
+ "param_grid": self.param_grid,
494
+ "num_combinations": len(self.points),
495
+ "ranking_metric": self.ranking_metric,
496
+ "ranking_minimize": self.ranking_minimize,
497
+ "best_params": self.best_point.params if self.best_point else None,
498
+ "best_metrics": self.best_point.metrics if self.best_point else None,
499
+ "all_points": [{"params": p.params, "metrics": p.metrics} for p in self.points],
500
+ }
501
+
502
+ def filter_by_metric(
503
+ self,
504
+ metric_name: str,
505
+ *,
506
+ min_value: float | None = None,
507
+ max_value: float | None = None,
508
+ ) -> list[GridSearchPoint]:
509
+ """Filter grid points by metric thresholds.
510
+
511
+ Args:
512
+ metric_name: Name of the metric to filter by.
513
+ min_value: Minimum metric value (inclusive).
514
+ max_value: Maximum metric value (inclusive).
515
+
516
+ Returns:
517
+ List of GridSearchPoints that meet the criteria.
518
+ """
519
+ results: list[GridSearchPoint] = []
520
+ for point in self.points:
521
+ value = point.metrics.get(metric_name)
522
+ if value is None:
523
+ continue
524
+ if min_value is not None and value < min_value:
525
+ continue
526
+ if max_value is not None and value > max_value:
527
+ continue
528
+ results.append(point)
529
+ return results
530
+
531
+ def filter_by_params(self, **param_filters: Any) -> list[GridSearchPoint]:
532
+ """Filter grid points by parameter values.
533
+
534
+ Args:
535
+ **param_filters: Parameter name=value pairs to match.
536
+
537
+ Returns:
538
+ List of GridSearchPoints matching all filters.
539
+
540
+ Example:
541
+ >>> result.filter_by_params(rate=0.05)
542
+ """
543
+ results: list[GridSearchPoint] = []
544
+ for point in self.points:
545
+ match = all(point.params.get(name) == value for name, value in param_filters.items())
546
+ if match:
547
+ results.append(point)
548
+ return results
549
+
550
+ def export_csv(
551
+ self,
552
+ filepath: str,
553
+ *,
554
+ include_params: bool = True,
555
+ metrics: Sequence[str] | None = None,
556
+ ) -> None:
557
+ """Export all grid points to CSV.
558
+
559
+ Args:
560
+ filepath: Path to write the CSV file.
561
+ include_params: Whether to include parameter columns.
562
+ metrics: Specific metrics to include (None = all).
563
+ """
564
+ import csv
565
+
566
+ if not self.points:
567
+ return
568
+
569
+ # Determine columns
570
+ param_names = list(self.param_grid.keys()) if include_params else []
571
+ first_metrics = self.points[0].metrics
572
+ if metrics is None:
573
+ metric_names = list(first_metrics.keys())
574
+ else:
575
+ metric_names = list(metrics)
576
+
577
+ with open(filepath, "w", newline="") as f:
578
+ writer = csv.writer(f)
579
+ writer.writerow(param_names + metric_names)
580
+ for point in self.points:
581
+ param_values = [point.params.get(p, "") for p in param_names]
582
+ metric_values = [point.metrics.get(m, "") for m in metric_names]
583
+ writer.writerow(param_values + metric_values)
584
+
585
+ def to_dataframe(self) -> "Any":
586
+ """Convert to pandas DataFrame (requires pandas).
587
+
588
+ Returns:
589
+ DataFrame with parameters and metrics as columns.
590
+
591
+ Raises:
592
+ ImportError: If pandas is not installed.
593
+ """
594
+ try:
595
+ import pandas as pd
596
+ except ImportError as e:
597
+ raise ImportError(
598
+ "pandas is required for to_dataframe(). Install with: pip install pandas"
599
+ ) from e
600
+
601
+ rows = []
602
+ for point in self.points:
603
+ row = {**point.params, **point.metrics}
604
+ rows.append(row)
605
+ return pd.DataFrame(rows)
606
+
607
+
608
+ class GridSearch:
609
+ """Search across parameter combinations (impure orchestrator).
610
+
611
+ This tool performs a grid search over parameter ranges, evaluating
612
+ the attack at each combination and ranking by a specified metric.
613
+
614
+ Example:
615
+ >>> from glitchlings import Typogre
616
+ >>> grid = GridSearch(
617
+ ... Typogre,
618
+ ... param_grid={"rate": [0.01, 0.05, 0.1, 0.2]},
619
+ ... tokenizer='cl100k_base'
620
+ ... )
621
+ >>> result = grid.run("Hello world", rank_by="normalized_edit_distance")
622
+ >>> print(result.summary())
623
+ """
624
+
625
+ def __init__(
626
+ self,
627
+ glitchling_class: type["Corruptor"],
628
+ param_grid: dict[str, list[Any]],
629
+ *,
630
+ tokenizer: str | Tokenizer | None = None,
631
+ base_params: dict[str, Any] | None = None,
632
+ seed: int | None = None,
633
+ metrics: Mapping[str, Callable[..., float | list[float]]] | None = None,
634
+ ) -> None:
635
+ """Initialize a GridSearch analyzer.
636
+
637
+ Args:
638
+ glitchling_class: The Glitchling class to instantiate.
639
+ param_grid: Dictionary mapping param names to value lists.
640
+ tokenizer: Tokenizer name or instance.
641
+ base_params: Default parameters (grid params override).
642
+ seed: Seed for reproducibility.
643
+ metrics: Optional custom metrics.
644
+ """
645
+ self._glitchling_class = glitchling_class
646
+ self._param_grid = param_grid
647
+ self._base_params = base_params or {}
648
+ self._seed = seed
649
+ self._metrics = metrics
650
+ # Impure: resolve tokenizer once
651
+ self._resolved_tokenizer = resolve_tokenizer(tokenizer)
652
+ self._tokenizer_info = describe_tokenizer(self._resolved_tokenizer, tokenizer)
653
+
654
+ def run(
655
+ self,
656
+ text: str,
657
+ *,
658
+ rank_by: str | None = "normalized_edit_distance",
659
+ minimize: bool = True,
660
+ progress_callback: Callable[[list[GridSearchPoint]], None] | None = None,
661
+ early_stop: Callable[[GridSearchPoint], bool] | None = None,
662
+ ) -> GridSearchResult:
663
+ """Run grid search over all combinations (impure execution).
664
+
665
+ Args:
666
+ text: Input text to corrupt.
667
+ rank_by: Metric name to rank by (None for no ranking).
668
+ minimize: If True, lower metric values are better.
669
+ progress_callback: Optional callback receiving list of evaluated
670
+ GridSearchPoints so far.
671
+ early_stop: Optional predicate receiving a GridSearchPoint. If it
672
+ returns True, the search stops early.
673
+
674
+ Returns:
675
+ GridSearchResult with all points and best one.
676
+ """
677
+ # Pure: generate combinations
678
+ combinations = generate_param_combinations(self._param_grid)
679
+ points: list[GridSearchPoint] = []
680
+
681
+ # Impure: run attacks for each combination
682
+ for combo in combinations:
683
+ params = {**self._base_params, **combo}
684
+ glitchling = self._glitchling_class(**params)
685
+
686
+ attack = Attack(
687
+ glitchling,
688
+ tokenizer=self._resolved_tokenizer,
689
+ metrics=self._metrics,
690
+ seed=self._seed,
691
+ )
692
+ result = attack.run(text)
693
+
694
+ # Pure: extract scalar metrics
695
+ metrics_dict = extract_scalar_metrics(result.metrics)
696
+
697
+ point = GridSearchPoint(
698
+ params=combo,
699
+ result=result,
700
+ metrics=metrics_dict,
701
+ )
702
+ points.append(point)
703
+
704
+ # Callback with progress
705
+ if progress_callback is not None:
706
+ progress_callback(points)
707
+
708
+ # Check early stopping
709
+ if early_stop is not None and early_stop(point):
710
+ break
711
+
712
+ # Pure: find best point
713
+ best_point: GridSearchPoint | None = None
714
+ if rank_by and points:
715
+ sorted_points = rank_grid_points(points, rank_by=rank_by, minimize=minimize)
716
+ best_point = sorted_points[0]
717
+
718
+ return GridSearchResult(
719
+ text=text,
720
+ tokenizer_info=self._tokenizer_info,
721
+ param_grid=self._param_grid,
722
+ points=points,
723
+ best_point=best_point,
724
+ ranking_metric=rank_by,
725
+ ranking_minimize=minimize,
726
+ )
727
+
728
+
729
+ # ---------------------------------------------------------------------------
730
+ # TokenizerComparison: Result and Orchestrator
731
+ # ---------------------------------------------------------------------------
732
+
733
+
734
+ @dataclass
735
+ class TokenizerComparisonEntry:
736
+ """Results for a single tokenizer in a comparison (pure data class).
737
+
738
+ Attributes:
739
+ tokenizer_name: Identifier/description of the tokenizer.
740
+ result: Full AttackResult for this tokenizer.
741
+ tokens: Output token strings after corruption.
742
+ token_ids: Output token IDs after corruption.
743
+ metrics: Extracted scalar metrics.
744
+ """
745
+
746
+ tokenizer_name: str
747
+ result: AttackResult
748
+ tokens: list[str]
749
+ token_ids: list[int]
750
+ metrics: dict[str, float]
751
+
752
+
753
+ @dataclass
754
+ class TokenizerComparisonResult:
755
+ """Results from comparing multiple tokenizers (pure data class).
756
+
757
+ Attributes:
758
+ text: Original input text.
759
+ corrupted_text: Text after corruption (same for all tokenizers).
760
+ entries: Comparison entries for each tokenizer.
761
+ metric_comparison: Metrics side-by-side for all tokenizers.
762
+ """
763
+
764
+ text: str
765
+ corrupted_text: str
766
+ entries: list[TokenizerComparisonEntry]
767
+ metric_comparison: dict[str, dict[str, float]] = field(default_factory=dict)
768
+
769
+ def __post_init__(self) -> None:
770
+ """Build metric comparison table (pure computation)."""
771
+ if not self.metric_comparison and self.entries:
772
+ all_metric_names: set[str] = set()
773
+ for entry in self.entries:
774
+ all_metric_names.update(entry.metrics.keys())
775
+
776
+ for metric_name in sorted(all_metric_names):
777
+ self.metric_comparison[metric_name] = {
778
+ entry.tokenizer_name: entry.metrics.get(metric_name, 0.0)
779
+ for entry in self.entries
780
+ }
781
+
782
+ def summary(self, *, show_tokens: int = 10) -> str:
783
+ """Generate a human-readable comparison summary (pure formatting)."""
784
+ lines: list[str] = [
785
+ f"TokenizerComparison Results ({len(self.entries)} tokenizers)",
786
+ f"Input: {self.text[:60]}{'...' if len(self.text) > 60 else ''}",
787
+ f"Output: {self.corrupted_text[:60]}{'...' if len(self.corrupted_text) > 60 else ''}",
788
+ "",
789
+ "Metrics Comparison:",
790
+ ]
791
+
792
+ # Build metric comparison table
793
+ tokenizer_names = [e.tokenizer_name for e in self.entries]
794
+ header = " " + " | ".join(f"{name[:15]:>15}" for name in ["metric"] + tokenizer_names)
795
+ lines.append(header)
796
+ lines.append(" " + "-" * len(header))
797
+
798
+ for metric_name, values in self.metric_comparison.items():
799
+ row_values = [f"{values.get(name, 0.0):>15.4f}" for name in tokenizer_names]
800
+ lines.append(f" {metric_name[:15]:>15} | " + " | ".join(row_values))
801
+
802
+ # Token counts
803
+ lines.append("")
804
+ lines.append("Token Counts:")
805
+ for entry in self.entries:
806
+ input_count = len(entry.result.input_tokens)
807
+ output_count = len(entry.tokens)
808
+ delta = output_count - input_count
809
+ lines.append(f" {entry.tokenizer_name}: {input_count} -> {output_count} ({delta:+d})")
810
+
811
+ # Token streams
812
+ if show_tokens > 0:
813
+ lines.append("")
814
+ lines.append("Output Token Streams:")
815
+ for entry in self.entries:
816
+ lines.append(f" {entry.tokenizer_name}:")
817
+ display_tokens = entry.tokens[:show_tokens]
818
+ tokens_str = ", ".join(f"'{t}'" for t in display_tokens)
819
+ if len(entry.tokens) > show_tokens:
820
+ tokens_str += f", ... ({len(entry.tokens)} total)"
821
+ lines.append(f" [{tokens_str}]")
822
+
823
+ return "\n".join(lines)
824
+
825
+ def to_report(self, *, include_token_ids: bool = True) -> dict[str, object]:
826
+ """Convert to JSON-serializable dictionary (pure)."""
827
+ entries_data = []
828
+ for entry in self.entries:
829
+ entry_data: dict[str, object] = {
830
+ "tokenizer": entry.tokenizer_name,
831
+ "tokens": entry.tokens,
832
+ "metrics": entry.metrics,
833
+ "input_token_count": len(entry.result.input_tokens),
834
+ "output_token_count": len(entry.tokens),
835
+ }
836
+ if include_token_ids:
837
+ entry_data["token_ids"] = entry.token_ids
838
+ entries_data.append(entry_data)
839
+
840
+ return {
841
+ "text": self.text,
842
+ "corrupted_text": self.corrupted_text,
843
+ "entries": entries_data,
844
+ "metric_comparison": self.metric_comparison,
845
+ }
846
+
847
+ def to_dataframe(self) -> "Any":
848
+ """Convert to pandas DataFrame (requires pandas).
849
+
850
+ Returns:
851
+ DataFrame with tokenizer names as index and metrics as columns.
852
+
853
+ Raises:
854
+ ImportError: If pandas is not installed.
855
+ """
856
+ try:
857
+ import pandas as pd
858
+ except ImportError as e:
859
+ raise ImportError(
860
+ "pandas is required for to_dataframe(). Install with: pip install pandas"
861
+ ) from e
862
+
863
+ data = {entry.tokenizer_name: entry.metrics for entry in self.entries}
864
+ return pd.DataFrame(data).T
865
+
866
+ def export_csv(self, path: str) -> None:
867
+ """Export comparison results to CSV.
868
+
869
+ Args:
870
+ path: Output file path.
871
+ """
872
+ import csv
873
+
874
+ with open(path, "w", newline="", encoding="utf-8") as f:
875
+ writer = csv.writer(f)
876
+ if not self.entries:
877
+ return
878
+
879
+ # Header: tokenizer_name, metric1, metric2, ...
880
+ metric_names = list(self.entries[0].metrics.keys())
881
+ writer.writerow(["tokenizer"] + metric_names)
882
+
883
+ for entry in self.entries:
884
+ row = [entry.tokenizer_name] + [entry.metrics.get(m, 0.0) for m in metric_names]
885
+ writer.writerow(row)
886
+
887
+
888
+ def _extract_output_tokens(
889
+ result: AttackResult,
890
+ ) -> tuple[list[str], list[int]]:
891
+ """Extract output tokens from an AttackResult (pure helper).
892
+
893
+ Args:
894
+ result: AttackResult to extract from.
895
+
896
+ Returns:
897
+ Tuple of (tokens, token_ids).
898
+ """
899
+ if isinstance(result.output_tokens, list) and result.output_tokens:
900
+ if isinstance(result.output_tokens[0], list):
901
+ # Batched - take first
902
+ return result.output_tokens[0], result.output_token_ids[0] # type: ignore[return-value]
903
+ return result.output_tokens, result.output_token_ids # type: ignore[return-value]
904
+ return [], []
905
+
906
+
907
+ class TokenizerComparison:
908
+ """Compare token streams and metrics across tokenizers (impure).
909
+
910
+ This tool runs the same attack with multiple tokenizers to compare
911
+ how different tokenization schemes affect token streams and metrics.
912
+
913
+ Example:
914
+ >>> from glitchlings import Typogre
915
+ >>> compare = TokenizerComparison(
916
+ ... Typogre(rate=0.05),
917
+ ... tokenizers=['cl100k_base', 'o200k_base', 'gpt2']
918
+ ... )
919
+ >>> result = compare.run("Hello world")
920
+ >>> print(result.summary())
921
+ """
922
+
923
+ def __init__(
924
+ self,
925
+ glitchlings: "Corruptor | str | Sequence[str | Corruptor]",
926
+ tokenizers: Sequence[str | Tokenizer],
927
+ *,
928
+ seed: int | None = None,
929
+ metrics: Mapping[str, Callable[..., float | list[float]]] | None = None,
930
+ ) -> None:
931
+ """Initialize a TokenizerComparison analyzer.
932
+
933
+ Args:
934
+ glitchlings: Glitchling specification (same as Attack).
935
+ tokenizers: List of tokenizer names/instances to compare.
936
+ seed: Seed for reproducibility (same for all tokenizers).
937
+ metrics: Optional custom metrics.
938
+
939
+ Raises:
940
+ ValueError: If fewer than 1 tokenizer is provided.
941
+ """
942
+ if not tokenizers:
943
+ raise ValueError("At least one tokenizer must be provided for comparison.")
944
+
945
+ self._glitchlings_spec = glitchlings
946
+ self._tokenizer_specs = list(tokenizers)
947
+ self._seed = seed
948
+ self._metrics = metrics
949
+
950
+ # Impure: pre-resolve tokenizers
951
+ self._resolved_tokenizers: list[tuple[str, Tokenizer]] = []
952
+ for spec in self._tokenizer_specs:
953
+ resolved = resolve_tokenizer(spec)
954
+ info = describe_tokenizer(resolved, spec)
955
+ self._resolved_tokenizers.append((info, resolved))
956
+
957
+ def run(self, text: str) -> TokenizerComparisonResult:
958
+ """Run comparison across all tokenizers (impure execution).
959
+
960
+ Args:
961
+ text: Input text to corrupt.
962
+
963
+ Returns:
964
+ TokenizerComparisonResult with entries for each tokenizer.
965
+ """
966
+ entries: list[TokenizerComparisonEntry] = []
967
+ corrupted_text: str = ""
968
+
969
+ # Impure: create gaggle for consistent corruption across tokenizers
970
+ gaggle = resolve_glitchlings(
971
+ self._glitchlings_spec,
972
+ seed=self._seed,
973
+ transcript_target=None,
974
+ )
975
+ corrupted_result = gaggle.corrupt(text)
976
+ if isinstance(corrupted_result, str):
977
+ corrupted_text = corrupted_result
978
+ else:
979
+ # For transcripts, join content for display
980
+ corrupted_text = " ".join(
981
+ turn.get("content", "") for turn in corrupted_result if isinstance(turn, dict)
982
+ )
983
+
984
+ # Impure: run attack with each tokenizer
985
+ for tokenizer_name, tokenizer in self._resolved_tokenizers:
986
+ attack = Attack(
987
+ gaggle.clone(), # Clone to reset RNG state
988
+ tokenizer=tokenizer,
989
+ metrics=self._metrics,
990
+ seed=self._seed,
991
+ )
992
+ result = attack.run(text)
993
+
994
+ # Pure: extract tokens and metrics
995
+ tokens, token_ids = _extract_output_tokens(result)
996
+ metrics_dict = extract_scalar_metrics(result.metrics)
997
+
998
+ entries.append(
999
+ TokenizerComparisonEntry(
1000
+ tokenizer_name=tokenizer_name,
1001
+ result=result,
1002
+ tokens=tokens,
1003
+ token_ids=token_ids,
1004
+ metrics=metrics_dict,
1005
+ )
1006
+ )
1007
+
1008
+ return TokenizerComparisonResult(
1009
+ text=text,
1010
+ corrupted_text=corrupted_text,
1011
+ entries=entries,
1012
+ )
1013
+
1014
+
1015
+ # ---------------------------------------------------------------------------
1016
+ # GlitchlingComparison: Compare Multiple Glitchlings
1017
+ # ---------------------------------------------------------------------------
1018
+
1019
+
1020
+ @dataclass
1021
+ class GlitchlingComparisonEntry:
1022
+ """Results for a single glitchling in a comparison (pure data class).
1023
+
1024
+ Attributes:
1025
+ name: Identifier for the glitchling.
1026
+ glitchling: The glitchling instance used.
1027
+ result: Full AttackResult for this glitchling.
1028
+ metrics: Extracted scalar metrics.
1029
+ """
1030
+
1031
+ name: str
1032
+ glitchling: "Corruptor"
1033
+ result: AttackResult
1034
+ metrics: dict[str, float]
1035
+
1036
+
1037
+ @dataclass
1038
+ class GlitchlingComparisonResult:
1039
+ """Results from comparing multiple glitchlings (pure data class).
1040
+
1041
+ Attributes:
1042
+ text: The input text that was corrupted.
1043
+ tokenizer_info: Description of the tokenizer used.
1044
+ entries: List of results per glitchling.
1045
+ """
1046
+
1047
+ text: str
1048
+ tokenizer_info: str
1049
+ entries: list[GlitchlingComparisonEntry]
1050
+
1051
+ @property
1052
+ def metric_comparison(self) -> dict[str, dict[str, float]]:
1053
+ """Get metrics organized by metric name -> glitchling name -> value."""
1054
+ if not self.entries:
1055
+ return {}
1056
+
1057
+ metric_names = list(self.entries[0].metrics.keys())
1058
+ comparison: dict[str, dict[str, float]] = {}
1059
+ for metric_name in metric_names:
1060
+ comparison[metric_name] = {
1061
+ entry.name: entry.metrics.get(metric_name, 0.0) for entry in self.entries
1062
+ }
1063
+ return comparison
1064
+
1065
+ def rank_by(
1066
+ self,
1067
+ metric_name: str,
1068
+ *,
1069
+ minimize: bool = True,
1070
+ ) -> list[GlitchlingComparisonEntry]:
1071
+ """Rank glitchlings by a specific metric.
1072
+
1073
+ Args:
1074
+ metric_name: Metric to rank by.
1075
+ minimize: If True, lower is better.
1076
+
1077
+ Returns:
1078
+ Entries sorted by the metric.
1079
+ """
1080
+ return sorted(
1081
+ self.entries,
1082
+ key=lambda e: e.metrics.get(metric_name, float("inf")),
1083
+ reverse=not minimize,
1084
+ )
1085
+
1086
+ def summary(self, *, show_corrupted: bool = True) -> str:
1087
+ """Generate a human-readable summary (pure formatting)."""
1088
+ lines: list[str] = [
1089
+ "╭─ Glitchling Comparison ─────────────────────────────────╮",
1090
+ f"│ Tokenizer: {self.tokenizer_info:<45} │",
1091
+ f"│ Input: {self.text[:47]:<47} │"
1092
+ if len(self.text) <= 47
1093
+ else f"│ Input: {self.text[:44]}... │",
1094
+ "├──────────────────────────────────────────────────────────┤",
1095
+ ]
1096
+
1097
+ # Metric comparison table
1098
+ if self.entries:
1099
+ metric_names = list(self.entries[0].metrics.keys())
1100
+
1101
+ # Header
1102
+ header = "│ Glitchling"
1103
+ for name in metric_names:
1104
+ short_name = name[:10] if len(name) > 10 else name
1105
+ header += f" │ {short_name:>10}"
1106
+ header += " │"
1107
+ lines.append(header)
1108
+ lines.append("├" + "─" * 58 + "┤")
1109
+
1110
+ # Rows
1111
+ for entry in self.entries:
1112
+ row = f"│ {entry.name:<10}"
1113
+ for metric_name in metric_names:
1114
+ val = entry.metrics.get(metric_name, 0.0)
1115
+ row += f" │ {val:>10.4f}"
1116
+ row += " │"
1117
+ lines.append(row)
1118
+
1119
+ if show_corrupted and self.entries:
1120
+ lines.append("├──────────────────────────────────────────────────────────┤")
1121
+ lines.append("│ Corrupted Outputs: │")
1122
+ for entry in self.entries:
1123
+ corrupted = str(entry.result.corrupted)
1124
+ if len(corrupted) > 45:
1125
+ corrupted = corrupted[:42] + "..."
1126
+ lines.append(f"│ {entry.name}: {corrupted:<43} │")
1127
+
1128
+ lines.append("╰──────────────────────────────────────────────────────────╯")
1129
+ return "\n".join(lines)
1130
+
1131
+ def to_report(self) -> dict[str, object]:
1132
+ """Convert to JSON-serializable dictionary (pure)."""
1133
+ return {
1134
+ "text": self.text,
1135
+ "tokenizer": self.tokenizer_info,
1136
+ "entries": [
1137
+ {
1138
+ "name": e.name,
1139
+ "corrupted": e.result.corrupted,
1140
+ "metrics": e.metrics,
1141
+ }
1142
+ for e in self.entries
1143
+ ],
1144
+ "metric_comparison": self.metric_comparison,
1145
+ }
1146
+
1147
+ def to_dataframe(self) -> "Any":
1148
+ """Convert to pandas DataFrame (requires pandas).
1149
+
1150
+ Returns:
1151
+ DataFrame with glitchling names as index and metrics as columns.
1152
+
1153
+ Raises:
1154
+ ImportError: If pandas is not installed.
1155
+ """
1156
+ try:
1157
+ import pandas as pd
1158
+ except ImportError as e:
1159
+ raise ImportError(
1160
+ "pandas is required for to_dataframe(). Install with: pip install pandas"
1161
+ ) from e
1162
+
1163
+ data = {entry.name: entry.metrics for entry in self.entries}
1164
+ return pd.DataFrame(data).T
1165
+
1166
+ def export_csv(self, path: str) -> None:
1167
+ """Export comparison results to CSV.
1168
+
1169
+ Args:
1170
+ path: Output file path.
1171
+ """
1172
+ import csv
1173
+
1174
+ with open(path, "w", newline="", encoding="utf-8") as f:
1175
+ writer = csv.writer(f)
1176
+ if not self.entries:
1177
+ return
1178
+
1179
+ # Header: glitchling_name, metric1, metric2, ...
1180
+ metric_names = list(self.entries[0].metrics.keys())
1181
+ writer.writerow(["glitchling"] + metric_names)
1182
+
1183
+ for entry in self.entries:
1184
+ row = [entry.name] + [entry.metrics.get(m, 0.0) for m in metric_names]
1185
+ writer.writerow(row)
1186
+
1187
+
1188
+ def compare_glitchlings(
1189
+ text: str,
1190
+ glitchlings: Sequence[tuple[str, "Corruptor"]],
1191
+ *,
1192
+ tokenizer: str | Tokenizer | None = None,
1193
+ metrics: Mapping[str, Callable[..., float | list[float]]] | None = None,
1194
+ seed: int | None = None,
1195
+ ) -> GlitchlingComparisonResult:
1196
+ """Compare multiple glitchlings on the same text with the same tokenizer.
1197
+
1198
+ Holds the tokenizer fixed and varies the glitchlings - useful for finding
1199
+ which corruption strategy has the most impact for a specific tokenizer.
1200
+
1201
+ Example:
1202
+ >>> from glitchlings import Typogre, Mim1c, Wherewolf
1203
+ >>> result = compare_glitchlings(
1204
+ ... "Hello world",
1205
+ ... [
1206
+ ... ("typogre", Typogre(rate=0.05)),
1207
+ ... ("mim1c", Mim1c(rate=0.05)),
1208
+ ... ("wherewolf", Wherewolf(rate=0.05)),
1209
+ ... ],
1210
+ ... tokenizer="o200k_base",
1211
+ ... )
1212
+ >>> print(result.summary())
1213
+ >>> best = result.rank_by("normalized_edit_distance", minimize=False)[0]
1214
+ >>> print(f"Most disruptive: {best.name}")
1215
+
1216
+ Args:
1217
+ text: Input text to corrupt.
1218
+ glitchlings: List of (name, glitchling) pairs to compare.
1219
+ tokenizer: Tokenizer to use (same for all glitchlings).
1220
+ metrics: Custom metrics (defaults to Attack defaults).
1221
+ seed: Seed for reproducibility.
1222
+
1223
+ Returns:
1224
+ GlitchlingComparisonResult with all entries.
1225
+ """
1226
+ resolved_tokenizer = resolve_tokenizer(tokenizer)
1227
+ tokenizer_info = describe_tokenizer(resolved_tokenizer, tokenizer)
1228
+
1229
+ entries: list[GlitchlingComparisonEntry] = []
1230
+ for name, glitchling in glitchlings:
1231
+ attack = Attack(
1232
+ glitchling,
1233
+ tokenizer=resolved_tokenizer,
1234
+ metrics=metrics,
1235
+ seed=seed,
1236
+ )
1237
+ result = attack.run(text)
1238
+ metrics_dict = extract_scalar_metrics(result.metrics)
1239
+
1240
+ entries.append(
1241
+ GlitchlingComparisonEntry(
1242
+ name=name,
1243
+ glitchling=glitchling,
1244
+ result=result,
1245
+ metrics=metrics_dict,
1246
+ )
1247
+ )
1248
+
1249
+ return GlitchlingComparisonResult(
1250
+ text=text,
1251
+ tokenizer_info=tokenizer_info,
1252
+ entries=entries,
1253
+ )
1254
+
1255
+
1256
+ def compare_tokenizers(
1257
+ text: str,
1258
+ glitchling: "Corruptor | str | Sequence[str | Corruptor]",
1259
+ tokenizers: Sequence[str | Tokenizer],
1260
+ *,
1261
+ metrics: Mapping[str, Callable[..., float | list[float]]] | None = None,
1262
+ seed: int | None = None,
1263
+ ) -> "TokenizerComparisonResult":
1264
+ """Compare multiple tokenizers on the same corrupted text.
1265
+
1266
+ Holds the glitchling fixed and varies the tokenizers - useful for finding
1267
+ which tokenizer is most affected by a specific corruption strategy.
1268
+
1269
+ Example:
1270
+ >>> from glitchlings import Typogre
1271
+ >>> result = compare_tokenizers(
1272
+ ... "Hello world",
1273
+ ... Typogre(rate=0.1),
1274
+ ... tokenizers=["o200k_base", "cl100k_base"],
1275
+ ... )
1276
+ >>> print(result.summary())
1277
+
1278
+ Args:
1279
+ text: Input text to corrupt.
1280
+ glitchling: Glitchling to apply (same corruption for all tokenizers).
1281
+ tokenizers: List of tokenizer names/instances to compare.
1282
+ metrics: Custom metrics (defaults to Attack defaults).
1283
+ seed: Seed for reproducibility.
1284
+
1285
+ Returns:
1286
+ TokenizerComparisonResult with all entries.
1287
+ """
1288
+ comparison = TokenizerComparison(
1289
+ glitchling,
1290
+ tokenizers=tokenizers,
1291
+ metrics=metrics,
1292
+ seed=seed,
1293
+ )
1294
+ return comparison.run(text)
1295
+
1296
+
1297
+ __all__ = [
1298
+ # Pure statistical helpers
1299
+ "compute_aggregate_stats",
1300
+ "format_stats_summary",
1301
+ "extract_scalar_metrics",
1302
+ # Pure grid helpers
1303
+ "generate_param_combinations",
1304
+ "rank_grid_points",
1305
+ # SeedSweep
1306
+ "SeedSweep",
1307
+ "SeedSweepResult",
1308
+ # GridSearch
1309
+ "GridSearch",
1310
+ "GridSearchResult",
1311
+ "GridSearchPoint",
1312
+ # TokenizerComparison
1313
+ "TokenizerComparison",
1314
+ "TokenizerComparisonResult",
1315
+ "TokenizerComparisonEntry",
1316
+ # Comparison functions
1317
+ "compare_glitchlings",
1318
+ "compare_tokenizers",
1319
+ "GlitchlingComparisonResult",
1320
+ "GlitchlingComparisonEntry",
1321
+ ]