glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +99 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +147 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +493 -0
- glitchlings/attack/core_execution.py +367 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +218 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +227 -0
- glitchlings/auggie.py +284 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +19 -0
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +490 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +27 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +356 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +678 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +243 -0
- glitchlings/zoo/mim1c.py +148 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +97 -0
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +66 -0
- glitchlings/zoo/transforms.py +346 -0
- glitchlings/zoo/typogre.py +128 -0
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +93 -0
- glitchlings-0.10.2.dist-info/METADATA +337 -0
- glitchlings-0.10.2.dist-info/RECORD +83 -0
- glitchlings-0.10.2.dist-info/WHEEL +5 -0
- glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
- glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.10.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
"""Pure planning functions for Attack orchestration.
|
|
2
|
+
|
|
3
|
+
This module contains deterministic, side-effect-free logic for planning
|
|
4
|
+
attack execution and assembling results. Functions here operate on
|
|
5
|
+
already-resolved inputs without performing IO or invoking FFI.
|
|
6
|
+
|
|
7
|
+
**Design Philosophy:**
|
|
8
|
+
|
|
9
|
+
All functions in this module are *pure* - they perform planning and
|
|
10
|
+
composition based solely on their inputs, without side effects. They do not:
|
|
11
|
+
- Import or invoke Rust FFI
|
|
12
|
+
- Resolve tokenizers or glitchlings
|
|
13
|
+
- Create RNG instances
|
|
14
|
+
- Perform I/O of any kind
|
|
15
|
+
|
|
16
|
+
The separation allows:
|
|
17
|
+
- Plan verification without Rust dependencies
|
|
18
|
+
- Unit testing of orchestration logic in isolation
|
|
19
|
+
- Clear boundaries between planning and execution
|
|
20
|
+
|
|
21
|
+
See AGENTS.md "Functional Purity Architecture" for full details.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from collections.abc import Mapping, Sequence
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
from typing import Any, TypeGuard
|
|
29
|
+
|
|
30
|
+
from ..util.transcripts import Transcript
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Type Guards
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def is_string_batch(value: Any) -> TypeGuard[Sequence[str]]:
|
|
38
|
+
"""Determine if value is a batch of strings (not a single string).
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
value: Value to check.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
True if value is a non-string sequence of strings.
|
|
45
|
+
"""
|
|
46
|
+
if isinstance(value, (str, bytes)):
|
|
47
|
+
return False
|
|
48
|
+
if not isinstance(value, Sequence):
|
|
49
|
+
return False
|
|
50
|
+
return all(isinstance(item, str) for item in value)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def is_transcript_like(value: Any) -> bool:
|
|
54
|
+
"""Check if value resembles a transcript structure.
|
|
55
|
+
|
|
56
|
+
A transcript is a sequence of mappings with 'role' and 'content' keys.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
value: Value to check.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
True if value appears to be a transcript.
|
|
63
|
+
"""
|
|
64
|
+
if not isinstance(value, Sequence):
|
|
65
|
+
return False
|
|
66
|
+
if isinstance(value, (str, bytes)):
|
|
67
|
+
return False
|
|
68
|
+
if not value:
|
|
69
|
+
return True # Empty sequence could be empty transcript
|
|
70
|
+
first = value[0]
|
|
71
|
+
return isinstance(first, Mapping) and "content" in first
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Attack Planning
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass(frozen=True, slots=True)
|
|
80
|
+
class AttackPlan:
|
|
81
|
+
"""Pure representation of what an Attack will do.
|
|
82
|
+
|
|
83
|
+
Attributes:
|
|
84
|
+
input_type: Type of input ("string", "batch", "transcript").
|
|
85
|
+
original_contents: List of content strings to process.
|
|
86
|
+
batch_size: Number of items in the batch.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
input_type: str
|
|
90
|
+
original_contents: list[str]
|
|
91
|
+
batch_size: int
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def is_batch(self) -> bool:
|
|
95
|
+
"""Return True if this plan represents batched input."""
|
|
96
|
+
return self.input_type in ("batch", "transcript")
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def is_empty(self) -> bool:
|
|
100
|
+
"""Return True if there are no contents to process."""
|
|
101
|
+
return self.batch_size == 0
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def plan_attack(text: str | Transcript | Sequence[str]) -> AttackPlan:
|
|
105
|
+
"""Create an execution plan for the given input.
|
|
106
|
+
|
|
107
|
+
This pure function analyzes the input structure and creates a plan
|
|
108
|
+
without actually executing anything.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
text: Input text, transcript, or batch of strings.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
AttackPlan describing how to process the input.
|
|
115
|
+
|
|
116
|
+
Raises:
|
|
117
|
+
TypeError: If input type is not recognized.
|
|
118
|
+
"""
|
|
119
|
+
if is_string_batch(text):
|
|
120
|
+
contents = list(text)
|
|
121
|
+
return AttackPlan(
|
|
122
|
+
input_type="batch",
|
|
123
|
+
original_contents=contents,
|
|
124
|
+
batch_size=len(contents),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if is_transcript_like(text):
|
|
128
|
+
contents = extract_transcript_contents(text) # type: ignore[arg-type]
|
|
129
|
+
return AttackPlan(
|
|
130
|
+
input_type="transcript",
|
|
131
|
+
original_contents=contents,
|
|
132
|
+
batch_size=len(contents),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
if isinstance(text, str):
|
|
136
|
+
return AttackPlan(
|
|
137
|
+
input_type="string",
|
|
138
|
+
original_contents=[text],
|
|
139
|
+
batch_size=1,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
message = f"Attack expects string, transcript, or list of strings, got {type(text).__name__}"
|
|
143
|
+
raise TypeError(message)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def extract_transcript_contents(transcript: Sequence[Mapping[str, Any]]) -> list[str]:
|
|
147
|
+
"""Extract content strings from a transcript (pure version).
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
transcript: Sequence of turn mappings with 'content' keys.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
List of content strings.
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
TypeError: If transcript structure is invalid.
|
|
157
|
+
"""
|
|
158
|
+
contents: list[str] = []
|
|
159
|
+
for index, turn in enumerate(transcript):
|
|
160
|
+
if not isinstance(turn, Mapping):
|
|
161
|
+
raise TypeError(f"Transcript turn #{index + 1} must be a mapping.")
|
|
162
|
+
content = turn.get("content")
|
|
163
|
+
if not isinstance(content, str):
|
|
164
|
+
raise TypeError(f"Transcript turn #{index + 1} is missing string content.")
|
|
165
|
+
contents.append(content)
|
|
166
|
+
return contents
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# ---------------------------------------------------------------------------
|
|
170
|
+
# Result Planning
|
|
171
|
+
# ---------------------------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@dataclass(frozen=True, slots=True)
|
|
175
|
+
class MetricPlan:
|
|
176
|
+
"""Plan for computing a single metric.
|
|
177
|
+
|
|
178
|
+
Attributes:
|
|
179
|
+
name: Metric name.
|
|
180
|
+
use_batch: Whether to use batch computation.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
name: str
|
|
184
|
+
use_batch: bool
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@dataclass(frozen=True, slots=True)
|
|
188
|
+
class ResultPlan:
|
|
189
|
+
"""Plan for assembling attack results.
|
|
190
|
+
|
|
191
|
+
Attributes:
|
|
192
|
+
is_batch: Whether results are batched.
|
|
193
|
+
metric_names: Names of metrics to compute.
|
|
194
|
+
tokenizer_info: Description of tokenizer being used.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
is_batch: bool
|
|
198
|
+
metric_names: tuple[str, ...]
|
|
199
|
+
tokenizer_info: str
|
|
200
|
+
|
|
201
|
+
def format_metrics(
|
|
202
|
+
self,
|
|
203
|
+
raw_metrics: dict[str, float | list[float]],
|
|
204
|
+
) -> dict[str, float | list[float]]:
|
|
205
|
+
"""Format metrics according to the result type.
|
|
206
|
+
|
|
207
|
+
For single results, collapses list metrics to scalars.
|
|
208
|
+
For batch results, ensures all metrics are lists.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
raw_metrics: Raw metric values from computation.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Formatted metrics appropriate for the result type.
|
|
215
|
+
"""
|
|
216
|
+
if self.is_batch:
|
|
217
|
+
return _format_metrics_for_batch(raw_metrics) # type: ignore[return-value]
|
|
218
|
+
return _format_metrics_for_single(raw_metrics) # type: ignore[return-value]
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def plan_result(
|
|
222
|
+
attack_plan: AttackPlan,
|
|
223
|
+
metric_names: Sequence[str],
|
|
224
|
+
tokenizer_info: str,
|
|
225
|
+
) -> ResultPlan:
|
|
226
|
+
"""Create a plan for assembling results.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
attack_plan: The attack execution plan.
|
|
230
|
+
metric_names: Names of metrics being computed.
|
|
231
|
+
tokenizer_info: Description of the tokenizer.
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
ResultPlan for assembling the final result.
|
|
235
|
+
"""
|
|
236
|
+
return ResultPlan(
|
|
237
|
+
is_batch=attack_plan.is_batch,
|
|
238
|
+
metric_names=tuple(metric_names),
|
|
239
|
+
tokenizer_info=tokenizer_info,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# ---------------------------------------------------------------------------
|
|
244
|
+
# Metric Formatting (Pure)
|
|
245
|
+
# ---------------------------------------------------------------------------
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _format_metrics_for_single(
|
|
249
|
+
metrics: dict[str, float | list[float]],
|
|
250
|
+
) -> dict[str, float]:
|
|
251
|
+
"""Collapse batch metrics to single values.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
metrics: Raw metrics that may be lists.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Metrics as scalar floats.
|
|
258
|
+
"""
|
|
259
|
+
result: dict[str, float] = {}
|
|
260
|
+
for name, value in metrics.items():
|
|
261
|
+
if isinstance(value, list):
|
|
262
|
+
result[name] = value[0] if value else 0.0
|
|
263
|
+
else:
|
|
264
|
+
result[name] = value
|
|
265
|
+
return result
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def _format_metrics_for_batch(
|
|
269
|
+
metrics: dict[str, float | list[float]],
|
|
270
|
+
) -> dict[str, list[float]]:
|
|
271
|
+
"""Ensure all metrics are lists.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
metrics: Raw metrics that may be scalars.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Metrics as lists of floats.
|
|
278
|
+
"""
|
|
279
|
+
result: dict[str, list[float]] = {}
|
|
280
|
+
for name, value in metrics.items():
|
|
281
|
+
if isinstance(value, list):
|
|
282
|
+
result[name] = list(value)
|
|
283
|
+
else:
|
|
284
|
+
result[name] = [value]
|
|
285
|
+
return result
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# ---------------------------------------------------------------------------
|
|
289
|
+
# Batch Adapter (Pure)
|
|
290
|
+
# ---------------------------------------------------------------------------
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
@dataclass(frozen=True, slots=True)
|
|
294
|
+
class BatchAdapter:
|
|
295
|
+
"""Adapter that normalizes all inputs to batch format internally.
|
|
296
|
+
|
|
297
|
+
This adapter wraps single strings as batches of size 1, allowing
|
|
298
|
+
uniform processing throughout the attack pipeline. It tracks whether
|
|
299
|
+
to unwrap results back to single format at output time.
|
|
300
|
+
|
|
301
|
+
Attributes:
|
|
302
|
+
contents: List of content strings (always a list, even for single).
|
|
303
|
+
unwrap_single: True if the original input was a single string.
|
|
304
|
+
input_type: Original input type ("string", "batch", "transcript").
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
contents: list[str]
|
|
308
|
+
unwrap_single: bool
|
|
309
|
+
input_type: str
|
|
310
|
+
|
|
311
|
+
@classmethod
|
|
312
|
+
def from_plan(cls, plan: "AttackPlan") -> "BatchAdapter":
|
|
313
|
+
"""Create a BatchAdapter from an AttackPlan.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
plan: The attack execution plan.
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
BatchAdapter configured for the plan's input type.
|
|
320
|
+
"""
|
|
321
|
+
return cls(
|
|
322
|
+
contents=plan.original_contents,
|
|
323
|
+
unwrap_single=plan.input_type == "string",
|
|
324
|
+
input_type=plan.input_type,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
def unwrap_tokens(
|
|
328
|
+
self,
|
|
329
|
+
tokens: list[list[str]],
|
|
330
|
+
) -> list[str] | list[list[str]]:
|
|
331
|
+
"""Unwrap batched tokens to match original input format.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
tokens: Batched token lists (2D).
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
1D list for single input, 2D list for batch input.
|
|
338
|
+
"""
|
|
339
|
+
if self.unwrap_single and tokens:
|
|
340
|
+
return tokens[0]
|
|
341
|
+
return tokens
|
|
342
|
+
|
|
343
|
+
def unwrap_token_ids(
|
|
344
|
+
self,
|
|
345
|
+
token_ids: list[list[int]],
|
|
346
|
+
) -> list[int] | list[list[int]]:
|
|
347
|
+
"""Unwrap batched token IDs to match original input format.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
token_ids: Batched token ID lists (2D).
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
1D list for single input, 2D list for batch input.
|
|
354
|
+
"""
|
|
355
|
+
if self.unwrap_single and token_ids:
|
|
356
|
+
return token_ids[0]
|
|
357
|
+
return token_ids
|
|
358
|
+
|
|
359
|
+
def unwrap_metrics(
|
|
360
|
+
self,
|
|
361
|
+
metrics: dict[str, list[float]],
|
|
362
|
+
) -> dict[str, float | list[float]]:
|
|
363
|
+
"""Unwrap batched metrics to match original input format.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
metrics: Batched metrics (values are lists).
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
Scalar metrics for single input, list metrics for batch.
|
|
370
|
+
"""
|
|
371
|
+
if self.unwrap_single:
|
|
372
|
+
return {name: values[0] if values else 0.0 for name, values in metrics.items()}
|
|
373
|
+
# Explicitly construct new dict to satisfy type checker (dict invariance)
|
|
374
|
+
result: dict[str, float | list[float]] = {name: values for name, values in metrics.items()}
|
|
375
|
+
return result
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
# ---------------------------------------------------------------------------
|
|
379
|
+
# Result Assembly (Pure)
|
|
380
|
+
# ---------------------------------------------------------------------------
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
@dataclass(frozen=True, slots=True)
|
|
384
|
+
class EncodedData:
|
|
385
|
+
"""Encoded token data for result assembly.
|
|
386
|
+
|
|
387
|
+
Tokens and IDs are always stored in batch format (2D lists)
|
|
388
|
+
internally. Use BatchAdapter.unwrap_* methods to convert to
|
|
389
|
+
the appropriate output format.
|
|
390
|
+
|
|
391
|
+
Attributes:
|
|
392
|
+
tokens: Token strings as batched 2D list.
|
|
393
|
+
token_ids: Token IDs as batched 2D list.
|
|
394
|
+
"""
|
|
395
|
+
|
|
396
|
+
tokens: list[list[str]]
|
|
397
|
+
token_ids: list[list[int]]
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def assemble_single_result_fields(
|
|
401
|
+
*,
|
|
402
|
+
original: str,
|
|
403
|
+
corrupted: str,
|
|
404
|
+
input_encoded: EncodedData,
|
|
405
|
+
output_encoded: EncodedData,
|
|
406
|
+
tokenizer_info: str,
|
|
407
|
+
metrics: dict[str, float],
|
|
408
|
+
) -> dict[str, object]:
|
|
409
|
+
"""Assemble field dictionary for single-string AttackResult.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
original: Original input string.
|
|
413
|
+
corrupted: Corrupted output string.
|
|
414
|
+
input_encoded: Encoded original tokens.
|
|
415
|
+
output_encoded: Encoded corrupted tokens.
|
|
416
|
+
tokenizer_info: Tokenizer description.
|
|
417
|
+
metrics: Computed metrics (scalar).
|
|
418
|
+
|
|
419
|
+
Returns:
|
|
420
|
+
Dictionary suitable for AttackResult construction.
|
|
421
|
+
"""
|
|
422
|
+
# For single strings, tokens are batched internally as [[...]]
|
|
423
|
+
# so we unwrap the first (and only) element
|
|
424
|
+
input_tokens = input_encoded.tokens[0] if input_encoded.tokens else []
|
|
425
|
+
output_tokens = output_encoded.tokens[0] if output_encoded.tokens else []
|
|
426
|
+
input_ids = input_encoded.token_ids[0] if input_encoded.token_ids else []
|
|
427
|
+
output_ids = output_encoded.token_ids[0] if output_encoded.token_ids else []
|
|
428
|
+
|
|
429
|
+
return {
|
|
430
|
+
"original": original,
|
|
431
|
+
"corrupted": corrupted,
|
|
432
|
+
"input_tokens": input_tokens,
|
|
433
|
+
"output_tokens": output_tokens,
|
|
434
|
+
"input_token_ids": input_ids,
|
|
435
|
+
"output_token_ids": output_ids,
|
|
436
|
+
"tokenizer_info": tokenizer_info,
|
|
437
|
+
"metrics": metrics,
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def assemble_batch_result_fields(
|
|
442
|
+
*,
|
|
443
|
+
original: Transcript | Sequence[str],
|
|
444
|
+
corrupted: Transcript | Sequence[str],
|
|
445
|
+
input_encoded: EncodedData,
|
|
446
|
+
output_encoded: EncodedData,
|
|
447
|
+
tokenizer_info: str,
|
|
448
|
+
metrics: dict[str, float | list[float]],
|
|
449
|
+
) -> dict[str, object]:
|
|
450
|
+
"""Assemble field dictionary for batched AttackResult.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
original: Original transcript or string batch.
|
|
454
|
+
corrupted: Corrupted transcript or string batch.
|
|
455
|
+
input_encoded: Encoded original tokens (batched).
|
|
456
|
+
output_encoded: Encoded corrupted tokens (batched).
|
|
457
|
+
tokenizer_info: Tokenizer description.
|
|
458
|
+
metrics: Computed metrics (list per batch item).
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Dictionary suitable for AttackResult construction.
|
|
462
|
+
"""
|
|
463
|
+
return {
|
|
464
|
+
"original": original,
|
|
465
|
+
"corrupted": corrupted,
|
|
466
|
+
"input_tokens": input_encoded.tokens,
|
|
467
|
+
"output_tokens": output_encoded.tokens,
|
|
468
|
+
"input_token_ids": input_encoded.token_ids,
|
|
469
|
+
"output_token_ids": output_encoded.token_ids,
|
|
470
|
+
"tokenizer_info": tokenizer_info,
|
|
471
|
+
"metrics": metrics,
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def assemble_empty_result_fields(
|
|
476
|
+
*,
|
|
477
|
+
original: Transcript | Sequence[str],
|
|
478
|
+
corrupted: Transcript | Sequence[str],
|
|
479
|
+
tokenizer_info: str,
|
|
480
|
+
metric_names: Sequence[str],
|
|
481
|
+
) -> dict[str, object]:
|
|
482
|
+
"""Assemble field dictionary for empty input.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
original: Original empty transcript or list.
|
|
486
|
+
corrupted: Corrupted empty transcript or list.
|
|
487
|
+
tokenizer_info: Tokenizer description.
|
|
488
|
+
metric_names: Names of metrics to include as empty.
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
Dictionary suitable for AttackResult construction.
|
|
492
|
+
"""
|
|
493
|
+
return {
|
|
494
|
+
"original": original,
|
|
495
|
+
"corrupted": corrupted,
|
|
496
|
+
"input_tokens": [],
|
|
497
|
+
"output_tokens": [],
|
|
498
|
+
"input_token_ids": [],
|
|
499
|
+
"output_token_ids": [],
|
|
500
|
+
"tokenizer_info": tokenizer_info,
|
|
501
|
+
"metrics": {name: [] for name in metric_names},
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def assemble_result_fields(
|
|
506
|
+
*,
|
|
507
|
+
adapter: BatchAdapter,
|
|
508
|
+
original: str | Transcript | Sequence[str],
|
|
509
|
+
corrupted: str | Transcript | Sequence[str],
|
|
510
|
+
input_encoded: EncodedData,
|
|
511
|
+
output_encoded: EncodedData,
|
|
512
|
+
tokenizer_info: str,
|
|
513
|
+
metrics: dict[str, list[float]],
|
|
514
|
+
) -> dict[str, object]:
|
|
515
|
+
"""Assemble AttackResult fields using batch adapter for uniform handling.
|
|
516
|
+
|
|
517
|
+
This function uses the BatchAdapter to handle both single and batch
|
|
518
|
+
inputs uniformly. Internally, all data is processed as batches, then
|
|
519
|
+
unwrapped appropriately based on the original input type.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
adapter: BatchAdapter tracking input type.
|
|
523
|
+
original: Original input container.
|
|
524
|
+
corrupted: Corrupted output container.
|
|
525
|
+
input_encoded: Encoded original tokens (always batched internally).
|
|
526
|
+
output_encoded: Encoded corrupted tokens (always batched internally).
|
|
527
|
+
tokenizer_info: Tokenizer description.
|
|
528
|
+
metrics: Computed metrics (always batched as lists internally).
|
|
529
|
+
|
|
530
|
+
Returns:
|
|
531
|
+
Dictionary suitable for AttackResult construction.
|
|
532
|
+
"""
|
|
533
|
+
return {
|
|
534
|
+
"original": original,
|
|
535
|
+
"corrupted": corrupted,
|
|
536
|
+
"input_tokens": adapter.unwrap_tokens(input_encoded.tokens),
|
|
537
|
+
"output_tokens": adapter.unwrap_tokens(output_encoded.tokens),
|
|
538
|
+
"input_token_ids": adapter.unwrap_token_ids(input_encoded.token_ids),
|
|
539
|
+
"output_token_ids": adapter.unwrap_token_ids(output_encoded.token_ids),
|
|
540
|
+
"tokenizer_info": tokenizer_info,
|
|
541
|
+
"metrics": adapter.unwrap_metrics(metrics),
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
# ---------------------------------------------------------------------------
|
|
546
|
+
# Token Count Helpers (Pure)
|
|
547
|
+
# ---------------------------------------------------------------------------
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def compute_token_counts(
|
|
551
|
+
input_tokens: list[str] | list[list[str]],
|
|
552
|
+
output_tokens: list[str] | list[list[str]],
|
|
553
|
+
) -> tuple[list[int], list[int]]:
|
|
554
|
+
"""Compute token counts for inputs and outputs.
|
|
555
|
+
|
|
556
|
+
Handles both single sequences and batches.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
input_tokens: Input token sequence(s).
|
|
560
|
+
output_tokens: Output token sequence(s).
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
Tuple of (input_counts, output_counts) as lists.
|
|
564
|
+
"""
|
|
565
|
+
# Check if batched
|
|
566
|
+
if input_tokens and isinstance(input_tokens[0], list):
|
|
567
|
+
input_counts = [len(batch) for batch in input_tokens]
|
|
568
|
+
output_counts = [len(batch) for batch in output_tokens]
|
|
569
|
+
else:
|
|
570
|
+
input_counts = [len(input_tokens)]
|
|
571
|
+
output_counts = [len(output_tokens)]
|
|
572
|
+
return input_counts, output_counts
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def format_token_count_delta(input_count: int, output_count: int) -> str:
|
|
576
|
+
"""Format a token count change as a string.
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
input_count: Number of input tokens.
|
|
580
|
+
output_count: Number of output tokens.
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
Formatted string like "10 -> 12 (+2)".
|
|
584
|
+
"""
|
|
585
|
+
delta = output_count - input_count
|
|
586
|
+
return f"{input_count} -> {output_count} ({delta:+d})"
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
__all__ = [
|
|
590
|
+
# Type guards
|
|
591
|
+
"is_string_batch",
|
|
592
|
+
"is_transcript_like",
|
|
593
|
+
# Attack planning
|
|
594
|
+
"AttackPlan",
|
|
595
|
+
"plan_attack",
|
|
596
|
+
"extract_transcript_contents",
|
|
597
|
+
# Result planning
|
|
598
|
+
"MetricPlan",
|
|
599
|
+
"ResultPlan",
|
|
600
|
+
"plan_result",
|
|
601
|
+
# Batch adapter
|
|
602
|
+
"BatchAdapter",
|
|
603
|
+
# Result assembly
|
|
604
|
+
"EncodedData",
|
|
605
|
+
"assemble_result_fields",
|
|
606
|
+
"assemble_single_result_fields",
|
|
607
|
+
"assemble_batch_result_fields",
|
|
608
|
+
"assemble_empty_result_fields",
|
|
609
|
+
# Token counts
|
|
610
|
+
"compute_token_counts",
|
|
611
|
+
"format_token_count_delta",
|
|
612
|
+
]
|