glitchlings 1.0.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. glitchlings/__init__.py +101 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_corruption_engine/__init__.py +12 -0
  4. glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +184 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +819 -0
  21. glitchlings/attack/core_execution.py +378 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +211 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +338 -0
  27. glitchlings/attack/tokenizer_metrics.py +373 -0
  28. glitchlings/auggie.py +285 -0
  29. glitchlings/compat/__init__.py +9 -0
  30. glitchlings/compat/loaders.py +355 -0
  31. glitchlings/compat/types.py +41 -0
  32. glitchlings/conf/__init__.py +39 -0
  33. glitchlings/conf/loaders.py +331 -0
  34. glitchlings/conf/schema.py +156 -0
  35. glitchlings/conf/types.py +72 -0
  36. glitchlings/config.toml +2 -0
  37. glitchlings/constants.py +139 -0
  38. glitchlings/dev/__init__.py +3 -0
  39. glitchlings/dev/docs.py +45 -0
  40. glitchlings/dlc/__init__.py +21 -0
  41. glitchlings/dlc/_shared.py +300 -0
  42. glitchlings/dlc/gutenberg.py +400 -0
  43. glitchlings/dlc/huggingface.py +68 -0
  44. glitchlings/dlc/langchain.py +147 -0
  45. glitchlings/dlc/nemo.py +283 -0
  46. glitchlings/dlc/prime.py +215 -0
  47. glitchlings/dlc/pytorch.py +98 -0
  48. glitchlings/dlc/pytorch_lightning.py +173 -0
  49. glitchlings/internal/__init__.py +16 -0
  50. glitchlings/internal/rust.py +159 -0
  51. glitchlings/internal/rust_ffi.py +599 -0
  52. glitchlings/main.py +426 -0
  53. glitchlings/protocols.py +91 -0
  54. glitchlings/runtime_config.py +24 -0
  55. glitchlings/util/__init__.py +41 -0
  56. glitchlings/util/adapters.py +65 -0
  57. glitchlings/util/keyboards.py +508 -0
  58. glitchlings/util/transcripts.py +108 -0
  59. glitchlings/zoo/__init__.py +161 -0
  60. glitchlings/zoo/assets/__init__.py +29 -0
  61. glitchlings/zoo/core.py +852 -0
  62. glitchlings/zoo/core_execution.py +154 -0
  63. glitchlings/zoo/core_planning.py +451 -0
  64. glitchlings/zoo/corrupt_dispatch.py +291 -0
  65. glitchlings/zoo/hokey.py +139 -0
  66. glitchlings/zoo/jargoyle.py +301 -0
  67. glitchlings/zoo/mim1c.py +269 -0
  68. glitchlings/zoo/pedant/__init__.py +109 -0
  69. glitchlings/zoo/pedant/core.py +99 -0
  70. glitchlings/zoo/pedant/forms.py +50 -0
  71. glitchlings/zoo/pedant/stones.py +83 -0
  72. glitchlings/zoo/redactyl.py +94 -0
  73. glitchlings/zoo/rng.py +280 -0
  74. glitchlings/zoo/rushmore.py +416 -0
  75. glitchlings/zoo/scannequin.py +370 -0
  76. glitchlings/zoo/transforms.py +331 -0
  77. glitchlings/zoo/typogre.py +194 -0
  78. glitchlings/zoo/validation.py +643 -0
  79. glitchlings/zoo/wherewolf.py +120 -0
  80. glitchlings/zoo/zeedub.py +165 -0
  81. glitchlings-1.0.0.dist-info/METADATA +404 -0
  82. glitchlings-1.0.0.dist-info/RECORD +86 -0
  83. glitchlings-1.0.0.dist-info/WHEEL +5 -0
  84. glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
  85. glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
  86. glitchlings-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,612 @@
1
+ """Pure planning functions for Attack orchestration.
2
+
3
+ This module contains deterministic, side-effect-free logic for planning
4
+ attack execution and assembling results. Functions here operate on
5
+ already-resolved inputs without performing IO or invoking FFI.
6
+
7
+ **Design Philosophy:**
8
+
9
+ All functions in this module are *pure* - they perform planning and
10
+ composition based solely on their inputs, without side effects. They do not:
11
+ - Import or invoke Rust FFI
12
+ - Resolve tokenizers or glitchlings
13
+ - Create RNG instances
14
+ - Perform I/O of any kind
15
+
16
+ The separation allows:
17
+ - Plan verification without Rust dependencies
18
+ - Unit testing of orchestration logic in isolation
19
+ - Clear boundaries between planning and execution
20
+
21
+ See AGENTS.md "Functional Purity Architecture" for full details.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from collections.abc import Mapping, Sequence
27
+ from dataclasses import dataclass
28
+ from typing import Any, TypeGuard
29
+
30
+ from ..util.transcripts import Transcript
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Type Guards
34
+ # ---------------------------------------------------------------------------
35
+
36
+
37
+ def is_string_batch(value: Any) -> TypeGuard[Sequence[str]]:
38
+ """Determine if value is a batch of strings (not a single string).
39
+
40
+ Args:
41
+ value: Value to check.
42
+
43
+ Returns:
44
+ True if value is a non-string sequence of strings.
45
+ """
46
+ if isinstance(value, (str, bytes)):
47
+ return False
48
+ if not isinstance(value, Sequence):
49
+ return False
50
+ return all(isinstance(item, str) for item in value)
51
+
52
+
53
+ def is_transcript_like(value: Any) -> bool:
54
+ """Check if value resembles a transcript structure.
55
+
56
+ A transcript is a sequence of mappings with 'role' and 'content' keys.
57
+
58
+ Args:
59
+ value: Value to check.
60
+
61
+ Returns:
62
+ True if value appears to be a transcript.
63
+ """
64
+ if not isinstance(value, Sequence):
65
+ return False
66
+ if isinstance(value, (str, bytes)):
67
+ return False
68
+ if not value:
69
+ return True # Empty sequence could be empty transcript
70
+ first = value[0]
71
+ return isinstance(first, Mapping) and "content" in first
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Attack Planning
76
+ # ---------------------------------------------------------------------------
77
+
78
+
79
+ @dataclass(frozen=True, slots=True)
80
+ class AttackPlan:
81
+ """Pure representation of what an Attack will do.
82
+
83
+ Attributes:
84
+ input_type: Type of input ("string", "batch", "transcript").
85
+ original_contents: List of content strings to process.
86
+ batch_size: Number of items in the batch.
87
+ """
88
+
89
+ input_type: str
90
+ original_contents: list[str]
91
+ batch_size: int
92
+
93
+ @property
94
+ def is_batch(self) -> bool:
95
+ """Return True if this plan represents batched input."""
96
+ return self.input_type in ("batch", "transcript")
97
+
98
+ @property
99
+ def is_empty(self) -> bool:
100
+ """Return True if there are no contents to process."""
101
+ return self.batch_size == 0
102
+
103
+
104
+ def plan_attack(text: str | Transcript | Sequence[str]) -> AttackPlan:
105
+ """Create an execution plan for the given input.
106
+
107
+ This pure function analyzes the input structure and creates a plan
108
+ without actually executing anything.
109
+
110
+ Args:
111
+ text: Input text, transcript, or batch of strings.
112
+
113
+ Returns:
114
+ AttackPlan describing how to process the input.
115
+
116
+ Raises:
117
+ TypeError: If input type is not recognized.
118
+ """
119
+ if is_string_batch(text):
120
+ contents = list(text)
121
+ return AttackPlan(
122
+ input_type="batch",
123
+ original_contents=contents,
124
+ batch_size=len(contents),
125
+ )
126
+
127
+ if is_transcript_like(text):
128
+ contents = extract_transcript_contents(text) # type: ignore[arg-type]
129
+ return AttackPlan(
130
+ input_type="transcript",
131
+ original_contents=contents,
132
+ batch_size=len(contents),
133
+ )
134
+
135
+ if isinstance(text, str):
136
+ return AttackPlan(
137
+ input_type="string",
138
+ original_contents=[text],
139
+ batch_size=1,
140
+ )
141
+
142
+ message = f"Attack expects string, transcript, or list of strings, got {type(text).__name__}"
143
+ raise TypeError(message)
144
+
145
+
146
+ def extract_transcript_contents(transcript: Sequence[Mapping[str, Any]]) -> list[str]:
147
+ """Extract content strings from a transcript (pure version).
148
+
149
+ Args:
150
+ transcript: Sequence of turn mappings with 'content' keys.
151
+
152
+ Returns:
153
+ List of content strings.
154
+
155
+ Raises:
156
+ TypeError: If transcript structure is invalid.
157
+ """
158
+ contents: list[str] = []
159
+ for index, turn in enumerate(transcript):
160
+ if not isinstance(turn, Mapping):
161
+ raise TypeError(f"Transcript turn #{index + 1} must be a mapping.")
162
+ content = turn.get("content")
163
+ if not isinstance(content, str):
164
+ raise TypeError(f"Transcript turn #{index + 1} is missing string content.")
165
+ contents.append(content)
166
+ return contents
167
+
168
+
169
+ # ---------------------------------------------------------------------------
170
+ # Result Planning
171
+ # ---------------------------------------------------------------------------
172
+
173
+
174
+ @dataclass(frozen=True, slots=True)
175
+ class MetricPlan:
176
+ """Plan for computing a single metric.
177
+
178
+ Attributes:
179
+ name: Metric name.
180
+ use_batch: Whether to use batch computation.
181
+ """
182
+
183
+ name: str
184
+ use_batch: bool
185
+
186
+
187
+ @dataclass(frozen=True, slots=True)
188
+ class ResultPlan:
189
+ """Plan for assembling attack results.
190
+
191
+ Attributes:
192
+ is_batch: Whether results are batched.
193
+ metric_names: Names of metrics to compute.
194
+ tokenizer_info: Description of tokenizer being used.
195
+ """
196
+
197
+ is_batch: bool
198
+ metric_names: tuple[str, ...]
199
+ tokenizer_info: str
200
+
201
+ def format_metrics(
202
+ self,
203
+ raw_metrics: dict[str, float | list[float]],
204
+ ) -> dict[str, float | list[float]]:
205
+ """Format metrics according to the result type.
206
+
207
+ For single results, collapses list metrics to scalars.
208
+ For batch results, ensures all metrics are lists.
209
+
210
+ Args:
211
+ raw_metrics: Raw metric values from computation.
212
+
213
+ Returns:
214
+ Formatted metrics appropriate for the result type.
215
+ """
216
+ if self.is_batch:
217
+ return _format_metrics_for_batch(raw_metrics) # type: ignore[return-value]
218
+ return _format_metrics_for_single(raw_metrics) # type: ignore[return-value]
219
+
220
+
221
+ def plan_result(
222
+ attack_plan: AttackPlan,
223
+ metric_names: Sequence[str],
224
+ tokenizer_info: str,
225
+ ) -> ResultPlan:
226
+ """Create a plan for assembling results.
227
+
228
+ Args:
229
+ attack_plan: The attack execution plan.
230
+ metric_names: Names of metrics being computed.
231
+ tokenizer_info: Description of the tokenizer.
232
+
233
+ Returns:
234
+ ResultPlan for assembling the final result.
235
+ """
236
+ return ResultPlan(
237
+ is_batch=attack_plan.is_batch,
238
+ metric_names=tuple(metric_names),
239
+ tokenizer_info=tokenizer_info,
240
+ )
241
+
242
+
243
+ # ---------------------------------------------------------------------------
244
+ # Metric Formatting (Pure)
245
+ # ---------------------------------------------------------------------------
246
+
247
+
248
+ def _format_metrics_for_single(
249
+ metrics: dict[str, float | list[float]],
250
+ ) -> dict[str, float]:
251
+ """Collapse batch metrics to single values.
252
+
253
+ Args:
254
+ metrics: Raw metrics that may be lists.
255
+
256
+ Returns:
257
+ Metrics as scalar floats.
258
+ """
259
+ result: dict[str, float] = {}
260
+ for name, value in metrics.items():
261
+ if isinstance(value, list):
262
+ result[name] = value[0] if value else 0.0
263
+ else:
264
+ result[name] = value
265
+ return result
266
+
267
+
268
+ def _format_metrics_for_batch(
269
+ metrics: dict[str, float | list[float]],
270
+ ) -> dict[str, list[float]]:
271
+ """Ensure all metrics are lists.
272
+
273
+ Args:
274
+ metrics: Raw metrics that may be scalars.
275
+
276
+ Returns:
277
+ Metrics as lists of floats.
278
+ """
279
+ result: dict[str, list[float]] = {}
280
+ for name, value in metrics.items():
281
+ if isinstance(value, list):
282
+ result[name] = list(value)
283
+ else:
284
+ result[name] = [value]
285
+ return result
286
+
287
+
288
+ # ---------------------------------------------------------------------------
289
+ # Batch Adapter (Pure)
290
+ # ---------------------------------------------------------------------------
291
+
292
+
293
+ @dataclass(frozen=True, slots=True)
294
+ class BatchAdapter:
295
+ """Adapter that normalizes all inputs to batch format internally.
296
+
297
+ This adapter wraps single strings as batches of size 1, allowing
298
+ uniform processing throughout the attack pipeline. It tracks whether
299
+ to unwrap results back to single format at output time.
300
+
301
+ Attributes:
302
+ contents: List of content strings (always a list, even for single).
303
+ unwrap_single: True if the original input was a single string.
304
+ input_type: Original input type ("string", "batch", "transcript").
305
+ """
306
+
307
+ contents: list[str]
308
+ unwrap_single: bool
309
+ input_type: str
310
+
311
+ @classmethod
312
+ def from_plan(cls, plan: "AttackPlan") -> "BatchAdapter":
313
+ """Create a BatchAdapter from an AttackPlan.
314
+
315
+ Args:
316
+ plan: The attack execution plan.
317
+
318
+ Returns:
319
+ BatchAdapter configured for the plan's input type.
320
+ """
321
+ return cls(
322
+ contents=plan.original_contents,
323
+ unwrap_single=plan.input_type == "string",
324
+ input_type=plan.input_type,
325
+ )
326
+
327
+ def unwrap_tokens(
328
+ self,
329
+ tokens: list[list[str]],
330
+ ) -> list[str] | list[list[str]]:
331
+ """Unwrap batched tokens to match original input format.
332
+
333
+ Args:
334
+ tokens: Batched token lists (2D).
335
+
336
+ Returns:
337
+ 1D list for single input, 2D list for batch input.
338
+ """
339
+ if self.unwrap_single and tokens:
340
+ return tokens[0]
341
+ return tokens
342
+
343
+ def unwrap_token_ids(
344
+ self,
345
+ token_ids: list[list[int]],
346
+ ) -> list[int] | list[list[int]]:
347
+ """Unwrap batched token IDs to match original input format.
348
+
349
+ Args:
350
+ token_ids: Batched token ID lists (2D).
351
+
352
+ Returns:
353
+ 1D list for single input, 2D list for batch input.
354
+ """
355
+ if self.unwrap_single and token_ids:
356
+ return token_ids[0]
357
+ return token_ids
358
+
359
+ def unwrap_metrics(
360
+ self,
361
+ metrics: dict[str, list[float]],
362
+ ) -> dict[str, float | list[float]]:
363
+ """Unwrap batched metrics to match original input format.
364
+
365
+ Args:
366
+ metrics: Batched metrics (values are lists).
367
+
368
+ Returns:
369
+ Scalar metrics for single input, list metrics for batch.
370
+ """
371
+ if self.unwrap_single:
372
+ return {name: values[0] if values else 0.0 for name, values in metrics.items()}
373
+ # Explicitly construct new dict to satisfy type checker (dict invariance)
374
+ result: dict[str, float | list[float]] = {name: values for name, values in metrics.items()}
375
+ return result
376
+
377
+
378
+ # ---------------------------------------------------------------------------
379
+ # Result Assembly (Pure)
380
+ # ---------------------------------------------------------------------------
381
+
382
+
383
+ @dataclass(frozen=True, slots=True)
384
+ class EncodedData:
385
+ """Encoded token data for result assembly.
386
+
387
+ Tokens and IDs are always stored in batch format (2D lists)
388
+ internally. Use BatchAdapter.unwrap_* methods to convert to
389
+ the appropriate output format.
390
+
391
+ Attributes:
392
+ tokens: Token strings as batched 2D list.
393
+ token_ids: Token IDs as batched 2D list.
394
+ """
395
+
396
+ tokens: list[list[str]]
397
+ token_ids: list[list[int]]
398
+
399
+
400
+ def assemble_single_result_fields(
401
+ *,
402
+ original: str,
403
+ corrupted: str,
404
+ input_encoded: EncodedData,
405
+ output_encoded: EncodedData,
406
+ tokenizer_info: str,
407
+ metrics: dict[str, float],
408
+ ) -> dict[str, object]:
409
+ """Assemble field dictionary for single-string AttackResult.
410
+
411
+ Args:
412
+ original: Original input string.
413
+ corrupted: Corrupted output string.
414
+ input_encoded: Encoded original tokens.
415
+ output_encoded: Encoded corrupted tokens.
416
+ tokenizer_info: Tokenizer description.
417
+ metrics: Computed metrics (scalar).
418
+
419
+ Returns:
420
+ Dictionary suitable for AttackResult construction.
421
+ """
422
+ # For single strings, tokens are batched internally as [[...]]
423
+ # so we unwrap the first (and only) element
424
+ input_tokens = input_encoded.tokens[0] if input_encoded.tokens else []
425
+ output_tokens = output_encoded.tokens[0] if output_encoded.tokens else []
426
+ input_ids = input_encoded.token_ids[0] if input_encoded.token_ids else []
427
+ output_ids = output_encoded.token_ids[0] if output_encoded.token_ids else []
428
+
429
+ return {
430
+ "original": original,
431
+ "corrupted": corrupted,
432
+ "input_tokens": input_tokens,
433
+ "output_tokens": output_tokens,
434
+ "input_token_ids": input_ids,
435
+ "output_token_ids": output_ids,
436
+ "tokenizer_info": tokenizer_info,
437
+ "metrics": metrics,
438
+ }
439
+
440
+
441
+ def assemble_batch_result_fields(
442
+ *,
443
+ original: Transcript | Sequence[str],
444
+ corrupted: Transcript | Sequence[str],
445
+ input_encoded: EncodedData,
446
+ output_encoded: EncodedData,
447
+ tokenizer_info: str,
448
+ metrics: dict[str, float | list[float]],
449
+ ) -> dict[str, object]:
450
+ """Assemble field dictionary for batched AttackResult.
451
+
452
+ Args:
453
+ original: Original transcript or string batch.
454
+ corrupted: Corrupted transcript or string batch.
455
+ input_encoded: Encoded original tokens (batched).
456
+ output_encoded: Encoded corrupted tokens (batched).
457
+ tokenizer_info: Tokenizer description.
458
+ metrics: Computed metrics (list per batch item).
459
+
460
+ Returns:
461
+ Dictionary suitable for AttackResult construction.
462
+ """
463
+ return {
464
+ "original": original,
465
+ "corrupted": corrupted,
466
+ "input_tokens": input_encoded.tokens,
467
+ "output_tokens": output_encoded.tokens,
468
+ "input_token_ids": input_encoded.token_ids,
469
+ "output_token_ids": output_encoded.token_ids,
470
+ "tokenizer_info": tokenizer_info,
471
+ "metrics": metrics,
472
+ }
473
+
474
+
475
+ def assemble_empty_result_fields(
476
+ *,
477
+ original: Transcript | Sequence[str],
478
+ corrupted: Transcript | Sequence[str],
479
+ tokenizer_info: str,
480
+ metric_names: Sequence[str],
481
+ ) -> dict[str, object]:
482
+ """Assemble field dictionary for empty input.
483
+
484
+ Args:
485
+ original: Original empty transcript or list.
486
+ corrupted: Corrupted empty transcript or list.
487
+ tokenizer_info: Tokenizer description.
488
+ metric_names: Names of metrics to include as empty.
489
+
490
+ Returns:
491
+ Dictionary suitable for AttackResult construction.
492
+ """
493
+ return {
494
+ "original": original,
495
+ "corrupted": corrupted,
496
+ "input_tokens": [],
497
+ "output_tokens": [],
498
+ "input_token_ids": [],
499
+ "output_token_ids": [],
500
+ "tokenizer_info": tokenizer_info,
501
+ "metrics": {name: [] for name in metric_names},
502
+ }
503
+
504
+
505
+ def assemble_result_fields(
506
+ *,
507
+ adapter: BatchAdapter,
508
+ original: str | Transcript | Sequence[str],
509
+ corrupted: str | Transcript | Sequence[str],
510
+ input_encoded: EncodedData,
511
+ output_encoded: EncodedData,
512
+ tokenizer_info: str,
513
+ metrics: dict[str, list[float]],
514
+ ) -> dict[str, object]:
515
+ """Assemble AttackResult fields using batch adapter for uniform handling.
516
+
517
+ This function uses the BatchAdapter to handle both single and batch
518
+ inputs uniformly. Internally, all data is processed as batches, then
519
+ unwrapped appropriately based on the original input type.
520
+
521
+ Args:
522
+ adapter: BatchAdapter tracking input type.
523
+ original: Original input container.
524
+ corrupted: Corrupted output container.
525
+ input_encoded: Encoded original tokens (always batched internally).
526
+ output_encoded: Encoded corrupted tokens (always batched internally).
527
+ tokenizer_info: Tokenizer description.
528
+ metrics: Computed metrics (always batched as lists internally).
529
+
530
+ Returns:
531
+ Dictionary suitable for AttackResult construction.
532
+ """
533
+ return {
534
+ "original": original,
535
+ "corrupted": corrupted,
536
+ "input_tokens": adapter.unwrap_tokens(input_encoded.tokens),
537
+ "output_tokens": adapter.unwrap_tokens(output_encoded.tokens),
538
+ "input_token_ids": adapter.unwrap_token_ids(input_encoded.token_ids),
539
+ "output_token_ids": adapter.unwrap_token_ids(output_encoded.token_ids),
540
+ "tokenizer_info": tokenizer_info,
541
+ "metrics": adapter.unwrap_metrics(metrics),
542
+ }
543
+
544
+
545
+ # ---------------------------------------------------------------------------
546
+ # Token Count Helpers (Pure)
547
+ # ---------------------------------------------------------------------------
548
+
549
+
550
+ def compute_token_counts(
551
+ input_tokens: list[str] | list[list[str]],
552
+ output_tokens: list[str] | list[list[str]],
553
+ ) -> tuple[list[int], list[int]]:
554
+ """Compute token counts for inputs and outputs.
555
+
556
+ Handles both single sequences and batches.
557
+
558
+ Args:
559
+ input_tokens: Input token sequence(s).
560
+ output_tokens: Output token sequence(s).
561
+
562
+ Returns:
563
+ Tuple of (input_counts, output_counts) as lists.
564
+ """
565
+ # Check if batched
566
+ if input_tokens and isinstance(input_tokens[0], list):
567
+ input_counts = [len(batch) for batch in input_tokens]
568
+ output_counts = [len(batch) for batch in output_tokens]
569
+ else:
570
+ input_counts = [len(input_tokens)]
571
+ output_counts = [len(output_tokens)]
572
+ return input_counts, output_counts
573
+
574
+
575
+ def format_token_count_delta(input_count: int, output_count: int) -> str:
576
+ """Format a token count change as a string.
577
+
578
+ Args:
579
+ input_count: Number of input tokens.
580
+ output_count: Number of output tokens.
581
+
582
+ Returns:
583
+ Formatted string like "10 -> 12 (+2)".
584
+ """
585
+ delta = output_count - input_count
586
+ return f"{input_count} -> {output_count} ({delta:+d})"
587
+
588
+
589
+ __all__ = [
590
+ # Type guards
591
+ "is_string_batch",
592
+ "is_transcript_like",
593
+ # Attack planning
594
+ "AttackPlan",
595
+ "plan_attack",
596
+ "extract_transcript_contents",
597
+ # Result planning
598
+ "MetricPlan",
599
+ "ResultPlan",
600
+ "plan_result",
601
+ # Batch adapter
602
+ "BatchAdapter",
603
+ # Result assembly
604
+ "EncodedData",
605
+ "assemble_result_fields",
606
+ "assemble_single_result_fields",
607
+ "assemble_batch_result_fields",
608
+ "assemble_empty_result_fields",
609
+ # Token counts
610
+ "compute_token_counts",
611
+ "format_token_count_delta",
612
+ ]