glitchlings 1.0.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +101 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_corruption_engine/__init__.py +12 -0
- glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +184 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +819 -0
- glitchlings/attack/core_execution.py +378 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +211 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +338 -0
- glitchlings/attack/tokenizer_metrics.py +373 -0
- glitchlings/auggie.py +285 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +39 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +139 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +21 -0
- glitchlings/dlc/_shared.py +300 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/langchain.py +147 -0
- glitchlings/dlc/nemo.py +283 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +599 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +41 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +508 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +852 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +291 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +301 -0
- glitchlings/zoo/mim1c.py +269 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +99 -0
- glitchlings/zoo/pedant/forms.py +50 -0
- glitchlings/zoo/pedant/stones.py +83 -0
- glitchlings/zoo/redactyl.py +94 -0
- glitchlings/zoo/rng.py +280 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +370 -0
- glitchlings/zoo/transforms.py +331 -0
- glitchlings/zoo/typogre.py +194 -0
- glitchlings/zoo/validation.py +643 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +165 -0
- glitchlings-1.0.0.dist-info/METADATA +404 -0
- glitchlings-1.0.0.dist-info/RECORD +86 -0
- glitchlings-1.0.0.dist-info/WHEEL +5 -0
- glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
- glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
- glitchlings-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,819 @@
|
|
|
1
|
+
"""Attack orchestrator for measuring corruption impact.
|
|
2
|
+
|
|
3
|
+
This module provides the Attack class, a boundary layer that coordinates
|
|
4
|
+
glitchling corruption and metric computation. It follows the functional
|
|
5
|
+
purity architecture:
|
|
6
|
+
|
|
7
|
+
- **Pure planning**: Input analysis and result planning (core_planning.py)
|
|
8
|
+
- **Impure execution**: Corruption, tokenization, metrics (core_execution.py)
|
|
9
|
+
- **Boundary layer**: This module - validates inputs and delegates
|
|
10
|
+
|
|
11
|
+
See AGENTS.md "Functional Purity Architecture" for full details.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import inspect
|
|
17
|
+
from collections.abc import Callable, Generator, Iterator, Mapping, Sequence
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from typing import TYPE_CHECKING, cast
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
pass # For forward references in type hints
|
|
23
|
+
|
|
24
|
+
from ..conf import DEFAULT_ATTACK_SEED
|
|
25
|
+
from ..protocols import Corruptor
|
|
26
|
+
from ..util.transcripts import Transcript, TranscriptTarget
|
|
27
|
+
from .core_execution import (
|
|
28
|
+
execute_attack,
|
|
29
|
+
get_default_metrics,
|
|
30
|
+
resolve_glitchlings,
|
|
31
|
+
)
|
|
32
|
+
from .core_planning import (
|
|
33
|
+
plan_attack,
|
|
34
|
+
plan_result,
|
|
35
|
+
)
|
|
36
|
+
from .encode import describe_tokenizer
|
|
37
|
+
from .metrics import Metric
|
|
38
|
+
from .tokenization import Tokenizer, resolve_tokenizer
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# Streaming Token Iterator
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class TokenWindow:
|
|
47
|
+
"""A window of tokens for streaming processing.
|
|
48
|
+
|
|
49
|
+
Represents a chunk of tokens that can be processed without loading
|
|
50
|
+
the entire token sequence into memory.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
tokens: Token strings in this window.
|
|
54
|
+
token_ids: Token IDs in this window.
|
|
55
|
+
start_index: Starting index of this window in the full sequence.
|
|
56
|
+
is_last: Whether this is the final window.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
tokens: list[str]
|
|
60
|
+
token_ids: list[int]
|
|
61
|
+
start_index: int
|
|
62
|
+
is_last: bool
|
|
63
|
+
|
|
64
|
+
def __len__(self) -> int:
|
|
65
|
+
return len(self.tokens)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class StreamingTokens:
|
|
69
|
+
"""Iterator for windowed access to token sequences.
|
|
70
|
+
|
|
71
|
+
Provides fixed-size window iteration over token sequences, useful for
|
|
72
|
+
processing large results in chunks without copying the entire sequence.
|
|
73
|
+
|
|
74
|
+
Note: This class provides windowed *access* to an existing token list,
|
|
75
|
+
not lazy loading. The full token list must already be in memory. For
|
|
76
|
+
true memory savings during tokenization, process texts in smaller batches.
|
|
77
|
+
|
|
78
|
+
Attributes:
|
|
79
|
+
window_size: Number of tokens per window.
|
|
80
|
+
total_tokens: Total number of tokens.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
tokens: list[str],
|
|
86
|
+
token_ids: list[int],
|
|
87
|
+
*,
|
|
88
|
+
window_size: int = 10000,
|
|
89
|
+
):
|
|
90
|
+
"""Initialize windowed token access.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
tokens: Full token list to provide windowed access to.
|
|
94
|
+
token_ids: Full token ID list (must match tokens length).
|
|
95
|
+
window_size: Number of tokens per window. Defaults to 10000.
|
|
96
|
+
"""
|
|
97
|
+
self._tokens = tokens
|
|
98
|
+
self._token_ids = token_ids
|
|
99
|
+
self.window_size = window_size
|
|
100
|
+
self.total_tokens = len(tokens)
|
|
101
|
+
|
|
102
|
+
def __iter__(self) -> Iterator[TokenWindow]:
|
|
103
|
+
"""Iterate over token windows."""
|
|
104
|
+
for start in range(0, self.total_tokens, self.window_size):
|
|
105
|
+
end = min(start + self.window_size, self.total_tokens)
|
|
106
|
+
yield TokenWindow(
|
|
107
|
+
tokens=self._tokens[start:end],
|
|
108
|
+
token_ids=self._token_ids[start:end],
|
|
109
|
+
start_index=start,
|
|
110
|
+
is_last=(end >= self.total_tokens),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def __len__(self) -> int:
|
|
114
|
+
"""Return total number of tokens."""
|
|
115
|
+
return self.total_tokens
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def all_tokens(self) -> list[str]:
|
|
119
|
+
"""Get all tokens (materializes full list)."""
|
|
120
|
+
return self._tokens
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def all_token_ids(self) -> list[int]:
|
|
124
|
+
"""Get all token IDs (materializes full list)."""
|
|
125
|
+
return self._token_ids
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ---------------------------------------------------------------------------
|
|
129
|
+
# Result Data Classes
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class AttackResult:
|
|
135
|
+
"""Result of an attack operation containing tokens and metrics.
|
|
136
|
+
|
|
137
|
+
Attributes:
|
|
138
|
+
original: Original input (string, transcript, or batch).
|
|
139
|
+
corrupted: Corrupted output (same type as original).
|
|
140
|
+
input_tokens: Tokenized original content.
|
|
141
|
+
output_tokens: Tokenized corrupted content.
|
|
142
|
+
input_token_ids: Token IDs for original.
|
|
143
|
+
output_token_ids: Token IDs for corrupted.
|
|
144
|
+
tokenizer_info: Description of the tokenizer used.
|
|
145
|
+
metrics: Computed metric values.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
original: str | Transcript | Sequence[str]
|
|
149
|
+
corrupted: str | Transcript | Sequence[str]
|
|
150
|
+
input_tokens: list[str] | list[list[str]]
|
|
151
|
+
output_tokens: list[str] | list[list[str]]
|
|
152
|
+
input_token_ids: list[int] | list[list[int]]
|
|
153
|
+
output_token_ids: list[int] | list[list[int]]
|
|
154
|
+
tokenizer_info: str
|
|
155
|
+
metrics: dict[str, float | list[float]]
|
|
156
|
+
|
|
157
|
+
def _tokens_are_batched(self) -> bool:
|
|
158
|
+
"""Check if tokens represent a batch."""
|
|
159
|
+
tokens = self.input_tokens
|
|
160
|
+
if tokens and isinstance(tokens[0], list):
|
|
161
|
+
return True
|
|
162
|
+
return isinstance(self.original, list) or isinstance(self.corrupted, list)
|
|
163
|
+
|
|
164
|
+
def _token_batches(self) -> tuple[list[list[str]], list[list[str]]]:
|
|
165
|
+
"""Get tokens as batches (wrapping single sequences if needed)."""
|
|
166
|
+
if self._tokens_are_batched():
|
|
167
|
+
return (
|
|
168
|
+
cast(list[list[str]], self.input_tokens),
|
|
169
|
+
cast(list[list[str]], self.output_tokens),
|
|
170
|
+
)
|
|
171
|
+
return (
|
|
172
|
+
[cast(list[str], self.input_tokens)],
|
|
173
|
+
[cast(list[str], self.output_tokens)],
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def _token_counts(self) -> tuple[list[int], list[int]]:
|
|
177
|
+
"""Compute token counts per batch item."""
|
|
178
|
+
inputs, outputs = self._token_batches()
|
|
179
|
+
return [len(tokens) for tokens in inputs], [len(tokens) for tokens in outputs]
|
|
180
|
+
|
|
181
|
+
@staticmethod
|
|
182
|
+
def _format_metric_value(value: float | list[float]) -> str:
|
|
183
|
+
"""Format a metric value for display."""
|
|
184
|
+
if isinstance(value, list):
|
|
185
|
+
if not value:
|
|
186
|
+
return "[]"
|
|
187
|
+
if len(value) <= 4:
|
|
188
|
+
rendered = ", ".join(f"{entry:.3f}" for entry in value)
|
|
189
|
+
return f"[{rendered}]"
|
|
190
|
+
total = sum(value)
|
|
191
|
+
minimum = min(value)
|
|
192
|
+
maximum = max(value)
|
|
193
|
+
mean = total / len(value)
|
|
194
|
+
return f"avg={mean:.3f} min={minimum:.3f} max={maximum:.3f}"
|
|
195
|
+
return f"{value:.3f}"
|
|
196
|
+
|
|
197
|
+
@staticmethod
|
|
198
|
+
def _format_token(token: str, *, max_length: int) -> str:
|
|
199
|
+
"""Format a token for display, truncating if needed."""
|
|
200
|
+
clean = token.replace("\n", "\\n")
|
|
201
|
+
if len(clean) > max_length:
|
|
202
|
+
return clean[: max_length - 3] + "..."
|
|
203
|
+
return clean
|
|
204
|
+
|
|
205
|
+
def to_report(self) -> dict[str, object]:
|
|
206
|
+
"""Convert to a JSON-serializable dictionary."""
|
|
207
|
+
input_counts, output_counts = self._token_counts()
|
|
208
|
+
return {
|
|
209
|
+
"tokenizer": self.tokenizer_info,
|
|
210
|
+
"original": self.original,
|
|
211
|
+
"corrupted": self.corrupted,
|
|
212
|
+
"input_tokens": self.input_tokens,
|
|
213
|
+
"output_tokens": self.output_tokens,
|
|
214
|
+
"input_token_ids": self.input_token_ids,
|
|
215
|
+
"output_token_ids": self.output_token_ids,
|
|
216
|
+
"token_counts": {
|
|
217
|
+
"input": {"per_sample": input_counts, "total": sum(input_counts)},
|
|
218
|
+
"output": {"per_sample": output_counts, "total": sum(output_counts)},
|
|
219
|
+
},
|
|
220
|
+
"metrics": self.metrics,
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
def summary(self, *, max_rows: int = 8, max_token_length: int = 24) -> str:
|
|
224
|
+
"""Generate a human-readable summary.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
max_rows: Maximum rows to display in token drift.
|
|
228
|
+
max_token_length: Maximum characters per token.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Formatted multi-line summary string.
|
|
232
|
+
"""
|
|
233
|
+
input_batches, output_batches = self._token_batches()
|
|
234
|
+
input_counts, output_counts = self._token_counts()
|
|
235
|
+
is_batch = self._tokens_are_batched()
|
|
236
|
+
|
|
237
|
+
lines: list[str] = [f"Tokenizer: {self.tokenizer_info}"]
|
|
238
|
+
if is_batch:
|
|
239
|
+
lines.append(f"Samples: {len(input_batches)}")
|
|
240
|
+
|
|
241
|
+
lines.append("Token counts:")
|
|
242
|
+
for index, (input_count, output_count) in enumerate(
|
|
243
|
+
zip(input_counts, output_counts), start=1
|
|
244
|
+
):
|
|
245
|
+
prefix = f"#{index} " if is_batch else ""
|
|
246
|
+
delta = output_count - input_count
|
|
247
|
+
lines.append(f" {prefix}{input_count} -> {output_count} ({delta:+d})")
|
|
248
|
+
if index >= max_rows and len(input_batches) > max_rows:
|
|
249
|
+
remaining = len(input_batches) - max_rows
|
|
250
|
+
lines.append(f" ... {remaining} more samples")
|
|
251
|
+
break
|
|
252
|
+
|
|
253
|
+
lines.append("Metrics:")
|
|
254
|
+
for name, value in self.metrics.items():
|
|
255
|
+
lines.append(f" {name}: {self._format_metric_value(value)}")
|
|
256
|
+
|
|
257
|
+
if input_batches:
|
|
258
|
+
focus_index = 0
|
|
259
|
+
if is_batch and len(input_batches) > 1:
|
|
260
|
+
lines.append("Token drift (first sample):")
|
|
261
|
+
else:
|
|
262
|
+
lines.append("Token drift:")
|
|
263
|
+
input_tokens = input_batches[focus_index]
|
|
264
|
+
output_tokens = output_batches[focus_index]
|
|
265
|
+
rows = max(len(input_tokens), len(output_tokens))
|
|
266
|
+
display_rows = min(rows, max_rows)
|
|
267
|
+
for idx in range(display_rows):
|
|
268
|
+
left = (
|
|
269
|
+
self._format_token(input_tokens[idx], max_length=max_token_length)
|
|
270
|
+
if idx < len(input_tokens)
|
|
271
|
+
else ""
|
|
272
|
+
)
|
|
273
|
+
right = (
|
|
274
|
+
self._format_token(output_tokens[idx], max_length=max_token_length)
|
|
275
|
+
if idx < len(output_tokens)
|
|
276
|
+
else ""
|
|
277
|
+
)
|
|
278
|
+
if idx >= len(input_tokens):
|
|
279
|
+
marker = "+"
|
|
280
|
+
elif idx >= len(output_tokens):
|
|
281
|
+
marker = "-"
|
|
282
|
+
elif input_tokens[idx] == output_tokens[idx]:
|
|
283
|
+
marker = "="
|
|
284
|
+
else:
|
|
285
|
+
marker = "!"
|
|
286
|
+
lines.append(f" {idx + 1:>3}{marker} {left} -> {right}")
|
|
287
|
+
if rows > display_rows:
|
|
288
|
+
lines.append(f" ... {rows - display_rows} more tokens")
|
|
289
|
+
else:
|
|
290
|
+
lines.append("Token drift: (empty input)")
|
|
291
|
+
|
|
292
|
+
return "\n".join(lines)
|
|
293
|
+
|
|
294
|
+
# -------------------------------------------------------------------------
|
|
295
|
+
# Token-Level Analysis
|
|
296
|
+
# -------------------------------------------------------------------------
|
|
297
|
+
|
|
298
|
+
def get_metric(self, name: str) -> float | list[float] | None:
|
|
299
|
+
"""Get a specific metric value by name.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
name: Metric name (e.g., 'normalized_edit_distance').
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Metric value, or None if not found.
|
|
306
|
+
"""
|
|
307
|
+
return self.metrics.get(name)
|
|
308
|
+
|
|
309
|
+
def get_changed_tokens(self, batch_index: int = 0) -> list[tuple[str, str]]:
|
|
310
|
+
"""Get tokens that changed between original and corrupted.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
batch_index: Which batch item to analyze (0 for single strings).
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
List of (original_token, corrupted_token) pairs where they differ.
|
|
317
|
+
Only includes positions where both tokens exist and are different.
|
|
318
|
+
"""
|
|
319
|
+
input_batches, output_batches = self._token_batches()
|
|
320
|
+
if batch_index >= len(input_batches):
|
|
321
|
+
return []
|
|
322
|
+
|
|
323
|
+
input_tokens = input_batches[batch_index]
|
|
324
|
+
output_tokens = output_batches[batch_index]
|
|
325
|
+
|
|
326
|
+
changes: list[tuple[str, str]] = []
|
|
327
|
+
for i in range(min(len(input_tokens), len(output_tokens))):
|
|
328
|
+
if input_tokens[i] != output_tokens[i]:
|
|
329
|
+
changes.append((input_tokens[i], output_tokens[i]))
|
|
330
|
+
return changes
|
|
331
|
+
|
|
332
|
+
def get_mutation_positions(self, batch_index: int = 0) -> list[int]:
|
|
333
|
+
"""Get indices of tokens that were mutated.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
batch_index: Which batch item to analyze (0 for single strings).
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
List of token positions where original != corrupted.
|
|
340
|
+
Only includes positions where both tokens exist.
|
|
341
|
+
"""
|
|
342
|
+
input_batches, output_batches = self._token_batches()
|
|
343
|
+
if batch_index >= len(input_batches):
|
|
344
|
+
return []
|
|
345
|
+
|
|
346
|
+
input_tokens = input_batches[batch_index]
|
|
347
|
+
output_tokens = output_batches[batch_index]
|
|
348
|
+
|
|
349
|
+
positions: list[int] = []
|
|
350
|
+
for i in range(min(len(input_tokens), len(output_tokens))):
|
|
351
|
+
if input_tokens[i] != output_tokens[i]:
|
|
352
|
+
positions.append(i)
|
|
353
|
+
return positions
|
|
354
|
+
|
|
355
|
+
def get_token_alignment(self, batch_index: int = 0) -> list[dict[str, object]]:
|
|
356
|
+
"""Get detailed token-by-token comparison with alignment info.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
batch_index: Which batch item to analyze (0 for single strings).
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
List of alignment entries, each containing:
|
|
363
|
+
- index: Token position
|
|
364
|
+
- original: Original token (empty string if added)
|
|
365
|
+
- corrupted: Corrupted token (empty string if removed)
|
|
366
|
+
- changed: Whether the token changed
|
|
367
|
+
- op: Operation type ('=' unchanged, '!' modified, '+' added, '-' removed)
|
|
368
|
+
"""
|
|
369
|
+
input_batches, output_batches = self._token_batches()
|
|
370
|
+
if batch_index >= len(input_batches):
|
|
371
|
+
return []
|
|
372
|
+
|
|
373
|
+
input_tokens = input_batches[batch_index]
|
|
374
|
+
output_tokens = output_batches[batch_index]
|
|
375
|
+
|
|
376
|
+
alignment: list[dict[str, object]] = []
|
|
377
|
+
max_len = max(len(input_tokens), len(output_tokens))
|
|
378
|
+
|
|
379
|
+
for i in range(max_len):
|
|
380
|
+
orig = input_tokens[i] if i < len(input_tokens) else ""
|
|
381
|
+
corr = output_tokens[i] if i < len(output_tokens) else ""
|
|
382
|
+
|
|
383
|
+
if i >= len(input_tokens):
|
|
384
|
+
op = "+"
|
|
385
|
+
changed = True
|
|
386
|
+
elif i >= len(output_tokens):
|
|
387
|
+
op = "-"
|
|
388
|
+
changed = True
|
|
389
|
+
elif orig == corr:
|
|
390
|
+
op = "="
|
|
391
|
+
changed = False
|
|
392
|
+
else:
|
|
393
|
+
op = "!"
|
|
394
|
+
changed = True
|
|
395
|
+
|
|
396
|
+
alignment.append(
|
|
397
|
+
{
|
|
398
|
+
"index": i,
|
|
399
|
+
"original": orig,
|
|
400
|
+
"corrupted": corr,
|
|
401
|
+
"changed": changed,
|
|
402
|
+
"op": op,
|
|
403
|
+
}
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
return alignment
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
# ---------------------------------------------------------------------------
|
|
410
|
+
# Attack Orchestrator
|
|
411
|
+
# ---------------------------------------------------------------------------
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
class Attack:
|
|
415
|
+
"""Orchestrator for applying glitchling corruptions and measuring impact.
|
|
416
|
+
|
|
417
|
+
Attack is a thin boundary layer that:
|
|
418
|
+
1. Validates inputs at construction time
|
|
419
|
+
2. Delegates planning to pure functions (core_planning.py)
|
|
420
|
+
3. Delegates execution to impure functions (core_execution.py)
|
|
421
|
+
|
|
422
|
+
Example:
|
|
423
|
+
>>> attack = Attack(Typogre(rate=0.05), tokenizer='cl100k_base')
|
|
424
|
+
>>> result = attack.run("Hello world")
|
|
425
|
+
>>> print(result.summary())
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
def __init__(
|
|
429
|
+
self,
|
|
430
|
+
glitchlings: Corruptor | str | Sequence[str | Corruptor],
|
|
431
|
+
tokenizer: str | Tokenizer | None = None,
|
|
432
|
+
metrics: Mapping[str, Metric] | None = None,
|
|
433
|
+
*,
|
|
434
|
+
seed: int | None = None,
|
|
435
|
+
transcript_target: TranscriptTarget | None = None,
|
|
436
|
+
) -> None:
|
|
437
|
+
"""Initialize an Attack.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
glitchlings: Glitchling specification - a single Glitchling,
|
|
441
|
+
string spec (e.g. 'Typogre(rate=0.05)'), or iterable of these.
|
|
442
|
+
tokenizer: Tokenizer name (e.g. 'cl100k_base'), Tokenizer instance,
|
|
443
|
+
or None (defaults to whitespace tokenizer).
|
|
444
|
+
metrics: Dictionary of metric functions. If None, uses defaults
|
|
445
|
+
(jensen_shannon_divergence, normalized_edit_distance,
|
|
446
|
+
subsequence_retention).
|
|
447
|
+
seed: Master seed for the Gaggle. If None, uses DEFAULT_ATTACK_SEED.
|
|
448
|
+
transcript_target: Which transcript turns to corrupt. Accepts:
|
|
449
|
+
- "last": corrupt only the last turn (default)
|
|
450
|
+
- "all": corrupt all turns
|
|
451
|
+
- "assistant"/"user": corrupt only those roles
|
|
452
|
+
- int: corrupt a specific index
|
|
453
|
+
- Sequence[int]: corrupt specific indices
|
|
454
|
+
"""
|
|
455
|
+
# Boundary: resolve seed
|
|
456
|
+
gaggle_seed = seed if seed is not None else DEFAULT_ATTACK_SEED
|
|
457
|
+
|
|
458
|
+
# Impure: resolve glitchlings (clones to avoid mutation)
|
|
459
|
+
self.glitchlings = resolve_glitchlings(
|
|
460
|
+
glitchlings,
|
|
461
|
+
seed=gaggle_seed,
|
|
462
|
+
transcript_target=transcript_target,
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
# Impure: resolve tokenizer
|
|
466
|
+
self.tokenizer = resolve_tokenizer(tokenizer)
|
|
467
|
+
self.tokenizer_info = describe_tokenizer(self.tokenizer, tokenizer)
|
|
468
|
+
|
|
469
|
+
# Setup metrics
|
|
470
|
+
if metrics is None:
|
|
471
|
+
self.metrics: dict[str, Metric] = get_default_metrics()
|
|
472
|
+
else:
|
|
473
|
+
self.metrics = dict(metrics)
|
|
474
|
+
|
|
475
|
+
# Validate custom metrics have correct signature
|
|
476
|
+
self._validate_metrics()
|
|
477
|
+
|
|
478
|
+
def _validate_metrics(self) -> None:
|
|
479
|
+
"""Validate that metric functions have correct signatures.
|
|
480
|
+
|
|
481
|
+
Uses signature inspection to avoid executing metrics (which may have
|
|
482
|
+
side effects).
|
|
483
|
+
|
|
484
|
+
Raises:
|
|
485
|
+
ValueError: If a metric function has an invalid signature.
|
|
486
|
+
"""
|
|
487
|
+
for name, func in self.metrics.items():
|
|
488
|
+
if not callable(func):
|
|
489
|
+
raise ValueError(f"Metric '{name}' is not callable")
|
|
490
|
+
|
|
491
|
+
try:
|
|
492
|
+
sig = inspect.signature(func)
|
|
493
|
+
params = list(sig.parameters.values())
|
|
494
|
+
|
|
495
|
+
# Count required positional parameters (no default, not *args/**kwargs)
|
|
496
|
+
positional_params = [
|
|
497
|
+
p
|
|
498
|
+
for p in params
|
|
499
|
+
if p.kind
|
|
500
|
+
in (
|
|
501
|
+
inspect.Parameter.POSITIONAL_ONLY,
|
|
502
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
503
|
+
)
|
|
504
|
+
and p.default is inspect.Parameter.empty
|
|
505
|
+
]
|
|
506
|
+
|
|
507
|
+
if len(positional_params) < 2:
|
|
508
|
+
raise ValueError(
|
|
509
|
+
f"Metric '{name}' must accept at least 2 positional arguments "
|
|
510
|
+
f"(original_tokens, corrupted_tokens), found {len(positional_params)}"
|
|
511
|
+
)
|
|
512
|
+
except (ValueError, TypeError) as e:
|
|
513
|
+
if "Metric" in str(e):
|
|
514
|
+
raise
|
|
515
|
+
raise ValueError(f"Metric '{name}' has invalid signature: {e}") from e
|
|
516
|
+
|
|
517
|
+
def run(
|
|
518
|
+
self,
|
|
519
|
+
text: str | Transcript | Sequence[str],
|
|
520
|
+
*,
|
|
521
|
+
include_tokens: bool = True,
|
|
522
|
+
) -> AttackResult:
|
|
523
|
+
"""Apply corruptions and calculate metrics.
|
|
524
|
+
|
|
525
|
+
Supports single strings, batches of strings, and chat transcripts.
|
|
526
|
+
For batched inputs, metrics are computed per entry and returned
|
|
527
|
+
as lists.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
text: Input text, transcript, or batch of strings to corrupt.
|
|
531
|
+
include_tokens: Whether to include tokens in the result. Set to
|
|
532
|
+
False for a lightweight result with only metrics. Tokens are
|
|
533
|
+
still computed internally for metrics but not stored in the
|
|
534
|
+
result. Defaults to True.
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
AttackResult containing original, corrupted, tokens, and metrics.
|
|
538
|
+
|
|
539
|
+
Raises:
|
|
540
|
+
TypeError: If input type is not recognized.
|
|
541
|
+
"""
|
|
542
|
+
# Pure: plan the attack
|
|
543
|
+
attack_plan = plan_attack(text)
|
|
544
|
+
result_plan = plan_result(
|
|
545
|
+
attack_plan,
|
|
546
|
+
list(self.metrics.keys()),
|
|
547
|
+
self.tokenizer_info,
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
# Impure: execute the attack
|
|
551
|
+
fields = execute_attack(
|
|
552
|
+
self.glitchlings,
|
|
553
|
+
self.tokenizer,
|
|
554
|
+
self.metrics,
|
|
555
|
+
attack_plan,
|
|
556
|
+
result_plan,
|
|
557
|
+
text,
|
|
558
|
+
include_tokens=include_tokens,
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
return AttackResult(**fields) # type: ignore[arg-type]
|
|
562
|
+
|
|
563
|
+
def run_batch(
|
|
564
|
+
self,
|
|
565
|
+
texts: Sequence[str | Transcript],
|
|
566
|
+
*,
|
|
567
|
+
include_tokens: bool = True,
|
|
568
|
+
progress_callback: Callable[[list[AttackResult]], None] | None = None,
|
|
569
|
+
) -> list[AttackResult]:
|
|
570
|
+
"""Run attack on multiple texts, returning results in order.
|
|
571
|
+
|
|
572
|
+
Args:
|
|
573
|
+
texts: List of inputs to process.
|
|
574
|
+
include_tokens: Whether to include tokens in results. Set to
|
|
575
|
+
False for lightweight results with only metrics. Defaults to True.
|
|
576
|
+
progress_callback: Optional callback called after each result,
|
|
577
|
+
receiving the list of results so far.
|
|
578
|
+
|
|
579
|
+
Returns:
|
|
580
|
+
List of AttackResult objects in input order.
|
|
581
|
+
"""
|
|
582
|
+
results: list[AttackResult] = []
|
|
583
|
+
for text in texts:
|
|
584
|
+
result = self.run(text, include_tokens=include_tokens)
|
|
585
|
+
results.append(result)
|
|
586
|
+
if progress_callback is not None:
|
|
587
|
+
progress_callback(results)
|
|
588
|
+
return results
|
|
589
|
+
|
|
590
|
+
def run_stream(
|
|
591
|
+
self,
|
|
592
|
+
texts: Iterator[str | Transcript] | Sequence[str | Transcript],
|
|
593
|
+
*,
|
|
594
|
+
include_tokens: bool = True,
|
|
595
|
+
) -> Generator[AttackResult, None, None]:
|
|
596
|
+
"""Stream attack results as they are computed.
|
|
597
|
+
|
|
598
|
+
Unlike run_batch(), this method yields results immediately as each
|
|
599
|
+
text is processed, allowing for memory-efficient processing of large
|
|
600
|
+
datasets without holding all results in memory.
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
texts: Iterator or sequence of inputs to process.
|
|
604
|
+
include_tokens: Whether to include tokens in results. Set to
|
|
605
|
+
False for lightweight results with only metrics. Defaults to True.
|
|
606
|
+
|
|
607
|
+
Yields:
|
|
608
|
+
AttackResult objects as they are computed.
|
|
609
|
+
|
|
610
|
+
Example:
|
|
611
|
+
>>> attack = Attack(Typogre(rate=0.05))
|
|
612
|
+
>>> for result in attack.run_stream(large_text_iterator):
|
|
613
|
+
... process_result(result) # Process each result immediately
|
|
614
|
+
"""
|
|
615
|
+
for text in texts:
|
|
616
|
+
yield self.run(text, include_tokens=include_tokens)
|
|
617
|
+
|
|
618
|
+
def run_streaming_result(
|
|
619
|
+
self,
|
|
620
|
+
text: str | Transcript | Sequence[str],
|
|
621
|
+
*,
|
|
622
|
+
window_size: int = 10000,
|
|
623
|
+
) -> "StreamingAttackResult":
|
|
624
|
+
"""Run attack and return result with windowed token access.
|
|
625
|
+
|
|
626
|
+
Returns a StreamingAttackResult that provides windowed iteration
|
|
627
|
+
over tokens, useful for chunk-based processing of results.
|
|
628
|
+
|
|
629
|
+
Note: This does not reduce memory usage during tokenization. For
|
|
630
|
+
memory-efficient processing of many texts, use run_stream() instead.
|
|
631
|
+
|
|
632
|
+
Args:
|
|
633
|
+
text: Input text, transcript, or batch of strings to corrupt.
|
|
634
|
+
window_size: Number of tokens per window during iteration.
|
|
635
|
+
Defaults to 10000.
|
|
636
|
+
|
|
637
|
+
Returns:
|
|
638
|
+
StreamingAttackResult with windowed token iteration.
|
|
639
|
+
"""
|
|
640
|
+
result = self.run(text)
|
|
641
|
+
return StreamingAttackResult.from_attack_result(result, window_size=window_size)
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
# ---------------------------------------------------------------------------
|
|
645
|
+
# Streaming Attack Result
|
|
646
|
+
# ---------------------------------------------------------------------------
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
@dataclass
|
|
650
|
+
class StreamingAttackResult:
|
|
651
|
+
"""Attack result with windowed token access for chunk-based processing.
|
|
652
|
+
|
|
653
|
+
Wraps an AttackResult and provides windowed iteration over tokens,
|
|
654
|
+
useful for processing results in fixed-size chunks (e.g., for batched
|
|
655
|
+
metric computation or memory-bounded downstream processing).
|
|
656
|
+
|
|
657
|
+
Note: Tokens are still stored in memory. This class provides windowed
|
|
658
|
+
*access*, not lazy loading. For true memory savings with very large
|
|
659
|
+
texts, process inputs in smaller batches using Attack.run_stream().
|
|
660
|
+
|
|
661
|
+
Attributes:
|
|
662
|
+
original: Original input text/transcript/batch.
|
|
663
|
+
corrupted: Corrupted output.
|
|
664
|
+
tokenizer_info: Description of the tokenizer used.
|
|
665
|
+
metrics: Computed metric values.
|
|
666
|
+
window_size: Number of tokens per window iteration.
|
|
667
|
+
"""
|
|
668
|
+
|
|
669
|
+
original: str | Transcript | Sequence[str]
|
|
670
|
+
corrupted: str | Transcript | Sequence[str]
|
|
671
|
+
tokenizer_info: str
|
|
672
|
+
metrics: dict[str, float | list[float]]
|
|
673
|
+
window_size: int = field(default=10000)
|
|
674
|
+
_input_tokens: list[str] | list[list[str]] = field(default_factory=list, repr=False)
|
|
675
|
+
_output_tokens: list[str] | list[list[str]] = field(default_factory=list, repr=False)
|
|
676
|
+
_input_token_ids: list[int] | list[list[int]] = field(default_factory=list, repr=False)
|
|
677
|
+
_output_token_ids: list[int] | list[list[int]] = field(default_factory=list, repr=False)
|
|
678
|
+
|
|
679
|
+
@classmethod
|
|
680
|
+
def from_attack_result(
|
|
681
|
+
cls,
|
|
682
|
+
result: AttackResult,
|
|
683
|
+
*,
|
|
684
|
+
window_size: int = 10000,
|
|
685
|
+
) -> "StreamingAttackResult":
|
|
686
|
+
"""Create a StreamingAttackResult from an AttackResult.
|
|
687
|
+
|
|
688
|
+
Args:
|
|
689
|
+
result: The AttackResult to wrap.
|
|
690
|
+
window_size: Number of tokens per window.
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
StreamingAttackResult with windowed token access.
|
|
694
|
+
"""
|
|
695
|
+
return cls(
|
|
696
|
+
original=result.original,
|
|
697
|
+
corrupted=result.corrupted,
|
|
698
|
+
tokenizer_info=result.tokenizer_info,
|
|
699
|
+
metrics=result.metrics,
|
|
700
|
+
window_size=window_size,
|
|
701
|
+
_input_tokens=result.input_tokens,
|
|
702
|
+
_output_tokens=result.output_tokens,
|
|
703
|
+
_input_token_ids=result.input_token_ids,
|
|
704
|
+
_output_token_ids=result.output_token_ids,
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
def _is_batched(self) -> bool:
|
|
708
|
+
"""Check if tokens represent a batch."""
|
|
709
|
+
tokens = self._input_tokens
|
|
710
|
+
if tokens and isinstance(tokens[0], list):
|
|
711
|
+
return True
|
|
712
|
+
return isinstance(self.original, list) or isinstance(self.corrupted, list)
|
|
713
|
+
|
|
714
|
+
def stream_input_tokens(self, batch_index: int = 0) -> StreamingTokens:
|
|
715
|
+
"""Get streaming access to input tokens.
|
|
716
|
+
|
|
717
|
+
Args:
|
|
718
|
+
batch_index: Which batch item to stream (0 for single strings).
|
|
719
|
+
|
|
720
|
+
Returns:
|
|
721
|
+
StreamingTokens iterator for windowed access.
|
|
722
|
+
"""
|
|
723
|
+
if self._is_batched():
|
|
724
|
+
tokens = cast(list[list[str]], self._input_tokens)[batch_index]
|
|
725
|
+
token_ids = cast(list[list[int]], self._input_token_ids)[batch_index]
|
|
726
|
+
else:
|
|
727
|
+
tokens = cast(list[str], self._input_tokens)
|
|
728
|
+
token_ids = cast(list[int], self._input_token_ids)
|
|
729
|
+
|
|
730
|
+
return StreamingTokens(tokens, token_ids, window_size=self.window_size)
|
|
731
|
+
|
|
732
|
+
def stream_output_tokens(self, batch_index: int = 0) -> StreamingTokens:
|
|
733
|
+
"""Get streaming access to output tokens.
|
|
734
|
+
|
|
735
|
+
Args:
|
|
736
|
+
batch_index: Which batch item to stream (0 for single strings).
|
|
737
|
+
|
|
738
|
+
Returns:
|
|
739
|
+
StreamingTokens iterator for windowed access.
|
|
740
|
+
"""
|
|
741
|
+
if self._is_batched():
|
|
742
|
+
tokens = cast(list[list[str]], self._output_tokens)[batch_index]
|
|
743
|
+
token_ids = cast(list[list[int]], self._output_token_ids)[batch_index]
|
|
744
|
+
else:
|
|
745
|
+
tokens = cast(list[str], self._output_tokens)
|
|
746
|
+
token_ids = cast(list[int], self._output_token_ids)
|
|
747
|
+
|
|
748
|
+
return StreamingTokens(tokens, token_ids, window_size=self.window_size)
|
|
749
|
+
|
|
750
|
+
def stream_token_pairs(
|
|
751
|
+
self,
|
|
752
|
+
batch_index: int = 0,
|
|
753
|
+
) -> Generator[tuple[TokenWindow, TokenWindow], None, None]:
|
|
754
|
+
"""Stream paired windows of input and output tokens.
|
|
755
|
+
|
|
756
|
+
Yields aligned (input_window, output_window) pairs for comparison.
|
|
757
|
+
Windows are paired by index, so the first input window pairs with
|
|
758
|
+
the first output window, etc.
|
|
759
|
+
|
|
760
|
+
Note: If input and output have different token counts, iteration
|
|
761
|
+
stops at the shorter sequence (like zip). Use stream_input_tokens()
|
|
762
|
+
and stream_output_tokens() separately if you need all windows.
|
|
763
|
+
|
|
764
|
+
Args:
|
|
765
|
+
batch_index: Which batch item to stream (0 for single strings).
|
|
766
|
+
|
|
767
|
+
Yields:
|
|
768
|
+
Tuples of (input_window, output_window) aligned by window index.
|
|
769
|
+
"""
|
|
770
|
+
input_stream = self.stream_input_tokens(batch_index)
|
|
771
|
+
output_stream = self.stream_output_tokens(batch_index)
|
|
772
|
+
|
|
773
|
+
for input_window, output_window in zip(input_stream, output_stream):
|
|
774
|
+
yield input_window, output_window
|
|
775
|
+
|
|
776
|
+
def get_token_count(self, batch_index: int = 0) -> tuple[int, int]:
|
|
777
|
+
"""Get token counts without materializing full lists.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
batch_index: Which batch item to count (0 for single strings).
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
Tuple of (input_token_count, output_token_count).
|
|
784
|
+
"""
|
|
785
|
+
input_stream = self.stream_input_tokens(batch_index)
|
|
786
|
+
output_stream = self.stream_output_tokens(batch_index)
|
|
787
|
+
return len(input_stream), len(output_stream)
|
|
788
|
+
|
|
789
|
+
def to_attack_result(self) -> AttackResult:
|
|
790
|
+
"""Convert back to a standard AttackResult.
|
|
791
|
+
|
|
792
|
+
Warning: This materializes all tokens in memory.
|
|
793
|
+
|
|
794
|
+
Returns:
|
|
795
|
+
AttackResult with all tokens loaded.
|
|
796
|
+
"""
|
|
797
|
+
return AttackResult(
|
|
798
|
+
original=self.original,
|
|
799
|
+
corrupted=self.corrupted,
|
|
800
|
+
input_tokens=self._input_tokens,
|
|
801
|
+
output_tokens=self._output_tokens,
|
|
802
|
+
input_token_ids=self._input_token_ids,
|
|
803
|
+
output_token_ids=self._output_token_ids,
|
|
804
|
+
tokenizer_info=self.tokenizer_info,
|
|
805
|
+
metrics=self.metrics,
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
def get_metric(self, name: str) -> float | list[float] | None:
|
|
809
|
+
"""Get a specific metric value by name."""
|
|
810
|
+
return self.metrics.get(name)
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
__all__ = [
|
|
814
|
+
"Attack",
|
|
815
|
+
"AttackResult",
|
|
816
|
+
"StreamingAttackResult",
|
|
817
|
+
"StreamingTokens",
|
|
818
|
+
"TokenWindow",
|
|
819
|
+
]
|