glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. glitchlings/__init__.py +36 -17
  2. glitchlings/__main__.py +0 -1
  3. glitchlings/_zoo_rust/__init__.py +12 -0
  4. glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/pipeline_assets.json +29 -0
  17. glitchlings/attack/__init__.py +53 -0
  18. glitchlings/attack/compose.py +299 -0
  19. glitchlings/attack/core.py +465 -0
  20. glitchlings/attack/encode.py +114 -0
  21. glitchlings/attack/metrics.py +104 -0
  22. glitchlings/attack/metrics_dispatch.py +70 -0
  23. glitchlings/attack/tokenization.py +157 -0
  24. glitchlings/auggie.py +283 -0
  25. glitchlings/compat/__init__.py +9 -0
  26. glitchlings/compat/loaders.py +355 -0
  27. glitchlings/compat/types.py +41 -0
  28. glitchlings/conf/__init__.py +41 -0
  29. glitchlings/conf/loaders.py +331 -0
  30. glitchlings/conf/schema.py +156 -0
  31. glitchlings/conf/types.py +72 -0
  32. glitchlings/config.toml +2 -0
  33. glitchlings/constants.py +59 -0
  34. glitchlings/dev/__init__.py +3 -0
  35. glitchlings/dev/docs.py +45 -0
  36. glitchlings/dlc/__init__.py +17 -3
  37. glitchlings/dlc/_shared.py +296 -0
  38. glitchlings/dlc/gutenberg.py +400 -0
  39. glitchlings/dlc/huggingface.py +37 -65
  40. glitchlings/dlc/prime.py +55 -114
  41. glitchlings/dlc/pytorch.py +98 -0
  42. glitchlings/dlc/pytorch_lightning.py +173 -0
  43. glitchlings/internal/__init__.py +16 -0
  44. glitchlings/internal/rust.py +159 -0
  45. glitchlings/internal/rust_ffi.py +432 -0
  46. glitchlings/main.py +123 -32
  47. glitchlings/runtime_config.py +24 -0
  48. glitchlings/util/__init__.py +29 -176
  49. glitchlings/util/adapters.py +65 -0
  50. glitchlings/util/keyboards.py +311 -0
  51. glitchlings/util/transcripts.py +108 -0
  52. glitchlings/zoo/__init__.py +47 -24
  53. glitchlings/zoo/assets/__init__.py +29 -0
  54. glitchlings/zoo/core.py +301 -167
  55. glitchlings/zoo/core_execution.py +98 -0
  56. glitchlings/zoo/core_planning.py +451 -0
  57. glitchlings/zoo/corrupt_dispatch.py +295 -0
  58. glitchlings/zoo/ekkokin.py +118 -0
  59. glitchlings/zoo/hokey.py +137 -0
  60. glitchlings/zoo/jargoyle.py +179 -274
  61. glitchlings/zoo/mim1c.py +106 -68
  62. glitchlings/zoo/pedant/__init__.py +107 -0
  63. glitchlings/zoo/pedant/core.py +105 -0
  64. glitchlings/zoo/pedant/forms.py +74 -0
  65. glitchlings/zoo/pedant/stones.py +74 -0
  66. glitchlings/zoo/redactyl.py +44 -175
  67. glitchlings/zoo/rng.py +259 -0
  68. glitchlings/zoo/rushmore.py +359 -116
  69. glitchlings/zoo/scannequin.py +18 -125
  70. glitchlings/zoo/transforms.py +386 -0
  71. glitchlings/zoo/typogre.py +76 -162
  72. glitchlings/zoo/validation.py +477 -0
  73. glitchlings/zoo/zeedub.py +33 -86
  74. glitchlings-0.9.3.dist-info/METADATA +334 -0
  75. glitchlings-0.9.3.dist-info/RECORD +80 -0
  76. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
  77. glitchlings/zoo/_ocr_confusions.py +0 -34
  78. glitchlings/zoo/_rate.py +0 -21
  79. glitchlings/zoo/reduple.py +0 -169
  80. glitchlings-0.2.5.dist-info/METADATA +0 -490
  81. glitchlings-0.2.5.dist-info/RECORD +0 -27
  82. /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
  83. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
  84. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
  85. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0
glitchlings/zoo/core.py CHANGED
@@ -1,69 +1,68 @@
1
1
  """Core data structures used to model glitchlings and their interactions."""
2
2
 
3
3
  import inspect
4
- import logging
5
- import os
6
4
  import random
5
+ from collections.abc import Mapping, Sequence
7
6
  from enum import IntEnum, auto
8
7
  from hashlib import blake2s
9
- from typing import TYPE_CHECKING, Any, Callable, Protocol
10
-
11
- _datasets_error: ModuleNotFoundError | None = None
12
- try: # pragma: no cover - optional dependency
13
- from datasets import Dataset as _DatasetsDataset
14
- except ModuleNotFoundError as error: # pragma: no cover - optional dependency
15
- _DatasetsDataset = None # type: ignore[assignment]
16
- _datasets_error = error
17
- else:
18
- _datasets_error = None
19
-
20
- try: # pragma: no cover - optional dependency
21
- from glitchlings._zoo_rust import compose_glitchlings as _compose_glitchlings_rust
22
- except ImportError: # pragma: no cover - compiled extension not present
23
- _compose_glitchlings_rust = None
24
-
25
-
26
- log = logging.getLogger(__name__)
27
-
8
+ from typing import TYPE_CHECKING, Any, Callable, Protocol, cast
9
+
10
+ from glitchlings.internal.rust_ffi import plan_glitchlings_rust
11
+
12
+ from ..compat.loaders import get_datasets_dataset, require_datasets
13
+ from ..compat.types import Dataset as DatasetProtocol
14
+ from ..util.transcripts import (
15
+ Transcript,
16
+ TranscriptTarget,
17
+ is_transcript,
18
+ )
19
+ from .core_execution import execute_plan
20
+ from .core_planning import (
21
+ PipelineDescriptor,
22
+ PipelineOperationPayload,
23
+ build_execution_plan,
24
+ build_pipeline_descriptor,
25
+ normalize_plan_entries,
26
+ )
27
+ from .core_planning import (
28
+ PlanEntry as _PlanEntry,
29
+ )
30
+ from .corrupt_dispatch import (
31
+ StringCorruptionTarget,
32
+ assemble_corruption_result,
33
+ resolve_corruption_target,
34
+ )
35
+
36
+ _DatasetsDataset = get_datasets_dataset()
37
+
38
+ _is_transcript = is_transcript
39
+
40
+
41
+ def plan_glitchlings(
42
+ entries: Sequence[_PlanEntry],
43
+ master_seed: int | None,
44
+ ) -> list[tuple[int, int]]:
45
+ """Normalize glitchling instances or specs and compute an orchestration plan.
46
+
47
+ Notes
48
+ -----
49
+ The Rust extension is required for orchestration.
50
+ """
51
+ if master_seed is None:
52
+ message = "Gaggle orchestration requires a master seed"
53
+ raise ValueError(message)
54
+
55
+ normalized_specs = [spec.as_mapping() for spec in normalize_plan_entries(entries)]
56
+ master_seed_int = int(master_seed)
57
+ return plan_glitchlings_rust(list(normalized_specs), master_seed_int)
28
58
 
29
- _PIPELINE_FEATURE_FLAG_ENV = "GLITCHLINGS_RUST_PIPELINE"
30
-
31
-
32
- def _pipeline_feature_flag_enabled() -> bool:
33
- """Return ``True`` when the environment explicitly opts into the Rust pipeline."""
34
-
35
- value = os.environ.get(_PIPELINE_FEATURE_FLAG_ENV)
36
- if value is None:
37
- return False
38
-
39
- normalized = value.strip().lower()
40
- return normalized in {"1", "true", "yes", "on"}
41
59
 
42
60
  if TYPE_CHECKING: # pragma: no cover - typing only
43
- from datasets import Dataset # type: ignore
61
+ from datasets import Dataset
44
62
  elif _DatasetsDataset is not None:
45
63
  Dataset = _DatasetsDataset
46
64
  else:
47
-
48
- class Dataset(Protocol): # type: ignore[no-redef]
49
- """Typed stub mirroring the Hugging Face dataset interface used here."""
50
-
51
- def with_transform(self, function: Any) -> "Dataset": ...
52
-
53
-
54
- def _is_transcript(value: Any) -> bool:
55
- """Return True when the value resembles a chat transcript."""
56
-
57
- if not isinstance(value, list):
58
- return False
59
-
60
- if not value:
61
- return True
62
-
63
- if not all(isinstance(turn, dict) for turn in value):
64
- return False
65
-
66
- return "content" in value[-1]
65
+ Dataset = DatasetProtocol
67
66
 
68
67
 
69
68
  class CorruptionCallable(Protocol):
@@ -107,7 +106,8 @@ class Glitchling:
107
106
  scope: AttackWave,
108
107
  order: AttackOrder = AttackOrder.NORMAL,
109
108
  seed: int | None = None,
110
- pipeline_operation: Callable[["Glitchling"], dict[str, Any] | None] | None = None,
109
+ pipeline_operation: Callable[["Glitchling"], Mapping[str, Any] | None] | None = None,
110
+ transcript_target: TranscriptTarget = "last",
111
111
  **kwargs: Any,
112
112
  ) -> None:
113
113
  """Initialize a glitchling.
@@ -118,9 +118,17 @@ class Glitchling:
118
118
  scope: Text granularity on which the glitchling operates.
119
119
  order: Relative ordering within the same scope.
120
120
  seed: Optional seed for deterministic random behaviour.
121
+ pipeline_operation: Optional factory for Rust pipeline descriptors.
122
+ transcript_target: Which transcript turns to corrupt. Accepts:
123
+ - ``"last"`` (default): corrupt only the last turn
124
+ - ``"all"``: corrupt all turns
125
+ - ``"assistant"``: corrupt only assistant turns
126
+ - ``"user"``: corrupt only user turns
127
+ - ``int``: corrupt a specific index (negative indexing supported)
128
+ - ``Sequence[int]``: corrupt specific indices
121
129
  **kwargs: Additional parameters forwarded to the corruption callable.
122
- """
123
130
 
131
+ """
124
132
  # Each Glitchling maintains its own RNG for deterministic yet isolated behavior.
125
133
  # If no seed is supplied, we fall back to Python's default entropy.
126
134
  self.seed = seed
@@ -130,6 +138,7 @@ class Glitchling:
130
138
  self.level: AttackWave = scope
131
139
  self.order: AttackOrder = order
132
140
  self._pipeline_descriptor_factory = pipeline_operation
141
+ self.transcript_target: TranscriptTarget = transcript_target
133
142
  self.kwargs: dict[str, Any] = {}
134
143
  self._cached_rng_callable: CorruptionCallable | None = None
135
144
  self._cached_rng_expectation: bool | None = None
@@ -138,7 +147,6 @@ class Glitchling:
138
147
 
139
148
  def set_param(self, key: str, value: Any) -> None:
140
149
  """Persist a parameter for use by the corruption callable."""
141
-
142
150
  aliases = getattr(self, "_param_aliases", {})
143
151
  canonical = aliases.get(key, key)
144
152
 
@@ -158,26 +166,42 @@ class Glitchling:
158
166
  if target == canonical:
159
167
  setattr(self, alias, value)
160
168
 
161
- def pipeline_operation(self) -> dict[str, Any] | None:
162
- """Return the Rust pipeline operation descriptor for this glitchling."""
169
+ def pipeline_operation(self) -> PipelineOperationPayload | None:
170
+ """Return the Rust pipeline descriptor or ``None`` when unavailable.
171
+
172
+ Glitchlings that cannot provide a compiled pipeline (for example the
173
+ lightweight helpers used in tests) should override this hook or supply
174
+ a ``pipeline_operation`` factory that returns ``None`` to indicate that
175
+ Python orchestration must be used instead. When a descriptor mapping is
176
+ returned it is validated and forwarded to the Rust pipeline.
177
+ """
163
178
 
164
179
  factory = self._pipeline_descriptor_factory
165
180
  if factory is None:
166
181
  return None
167
182
 
168
- return factory(self)
183
+ descriptor = factory(self)
184
+ if descriptor is None:
185
+ return None
186
+
187
+ if not isinstance(descriptor, Mapping): # pragma: no cover - defensive
188
+ raise TypeError("Pipeline descriptor factories must return a mapping or None")
189
+
190
+ payload = dict(descriptor)
191
+ payload_type = payload.get("type")
192
+ if not isinstance(payload_type, str):
193
+ message = f"Pipeline descriptor for {self.name} is missing a string 'type' field"
194
+ raise RuntimeError(message)
195
+
196
+ return cast(PipelineOperationPayload, payload)
169
197
 
170
198
  def _corruption_expects_rng(self) -> bool:
171
199
  """Return `True` when the corruption function accepts an rng keyword."""
172
-
173
200
  cached_callable = self._cached_rng_callable
174
201
  cached_expectation = self._cached_rng_expectation
175
202
  corruption_function = self.corruption_function
176
203
 
177
- if (
178
- cached_callable is corruption_function
179
- and cached_expectation is not None
180
- ):
204
+ if cached_callable is corruption_function and cached_expectation is not None:
181
205
  return cached_expectation
182
206
 
183
207
  expects_rng = False
@@ -195,7 +219,6 @@ class Glitchling:
195
219
 
196
220
  def __corrupt(self, text: str, *args: Any, **kwargs: Any) -> str:
197
221
  """Execute the corruption callable, injecting the RNG when required."""
198
-
199
222
  # Pass rng to underlying corruption function if it expects it.
200
223
  expects_rng = self._corruption_expects_rng()
201
224
 
@@ -205,41 +228,66 @@ class Glitchling:
205
228
  corrupted = self.corruption_function(text, *args, **kwargs)
206
229
  return corrupted
207
230
 
208
- def corrupt(self, text: str | list[dict[str, Any]]) -> str | list[dict[str, Any]]:
209
- """Apply the corruption function to text or conversational transcripts."""
231
+ def _execute_corruption(self, text: str) -> str:
232
+ """Execute the actual corruption on a single text string.
210
233
 
211
- if _is_transcript(text):
212
- transcript = [dict(turn) for turn in text]
213
- if transcript:
214
- transcript[-1]["content"] = self.__corrupt(
215
- transcript[-1]["content"], **self.kwargs
216
- )
217
- return transcript
234
+ This is the impure execution point that invokes the corruption callable.
235
+ All corruption for this glitchling flows through this single method.
218
236
 
237
+ Args:
238
+ text: The text to corrupt.
239
+
240
+ Returns:
241
+ The corrupted text.
242
+ """
219
243
  return self.__corrupt(text, **self.kwargs)
220
244
 
221
- def corrupt_dataset(self, dataset: Dataset, columns: list[str]) -> Dataset:
222
- """Apply corruption lazily across dataset columns."""
245
+ def corrupt(self, text: str | Transcript) -> str | Transcript:
246
+ """Apply the corruption function to text or conversational transcripts.
223
247
 
224
- if _DatasetsDataset is None:
225
- message = "datasets is not installed"
226
- raise ModuleNotFoundError(message) from _datasets_error
248
+ This method uses a pure dispatch pattern:
249
+ 1. Resolve the corruption target (pure - what to corrupt)
250
+ 2. Execute corruption (impure - single isolated point)
251
+ 3. Assemble the result (pure - combine results)
227
252
 
228
- def _is_transcript(value: Any) -> bool:
229
- """Return ``True`` when the value resembles a chat transcript."""
253
+ When the input is a transcript, the ``transcript_target`` setting
254
+ controls which turns are corrupted:
230
255
 
231
- if not isinstance(value, list) or not value:
232
- return False
256
+ - ``"last"``: corrupt only the last turn (default)
257
+ - ``"all"``: corrupt all turns
258
+ - ``"assistant"``: corrupt only turns with ``role="assistant"``
259
+ - ``"user"``: corrupt only turns with ``role="user"``
260
+ - ``int``: corrupt a specific turn by index
261
+ - ``Sequence[int]``: corrupt specific turns by index
262
+ """
263
+ # Step 1: Pure dispatch - determine what to corrupt
264
+ target = resolve_corruption_target(text, self.transcript_target)
233
265
 
234
- return all(
235
- isinstance(turn, dict) and "content" in turn for turn in value
236
- )
266
+ # Step 2: Impure execution - apply corruption via isolated method
267
+ if isinstance(target, StringCorruptionTarget):
268
+ corrupted: str | dict[int, str] = self._execute_corruption(target.text)
269
+ else:
270
+ # TranscriptCorruptionTarget
271
+ corrupted = {
272
+ turn.index: self._execute_corruption(turn.content) for turn in target.turns
273
+ }
274
+
275
+ # Step 3: Pure assembly - combine results
276
+ return assemble_corruption_result(target, corrupted)
277
+
278
+ def corrupt_dataset(self, dataset: Dataset, columns: list[str]) -> Dataset:
279
+ """Apply corruption lazily across dataset columns."""
280
+ require_datasets("datasets is not installed")
237
281
 
238
282
  def __corrupt_row(row: dict[str, Any]) -> dict[str, Any]:
239
283
  row = dict(row)
240
284
  for column in columns:
241
285
  value = row[column]
242
- if _is_transcript(value):
286
+ if _is_transcript(
287
+ value,
288
+ allow_empty=False,
289
+ require_all_content=True,
290
+ ):
243
291
  row[column] = self.corrupt(value)
244
292
  elif isinstance(value, list):
245
293
  row[column] = [self.corrupt(item) for item in value]
@@ -249,14 +297,12 @@ class Glitchling:
249
297
 
250
298
  return dataset.with_transform(__corrupt_row)
251
299
 
252
- def __call__(self, text: str, *args: Any, **kwds: Any) -> str | list[dict[str, Any]]:
300
+ def __call__(self, text: str, *args: Any, **kwds: Any) -> str | Transcript:
253
301
  """Allow a glitchling to be invoked directly like a callable."""
254
-
255
302
  return self.corrupt(text, *args, **kwds)
256
303
 
257
304
  def reset_rng(self, seed: int | None = None) -> None:
258
305
  """Reset the glitchling's RNG to its initial seed."""
259
-
260
306
  if seed is not None:
261
307
  self.seed = seed
262
308
  if self.seed is not None:
@@ -264,57 +310,98 @@ class Glitchling:
264
310
 
265
311
  def clone(self, seed: int | None = None) -> "Glitchling":
266
312
  """Create a copy of this glitchling, optionally with a new seed."""
267
-
268
313
  cls = self.__class__
269
314
  filtered_kwargs = {k: v for k, v in self.kwargs.items() if k != "seed"}
270
315
  clone_seed = seed if seed is not None else self.seed
271
- if clone_seed is not None:
272
- filtered_kwargs["seed"] = clone_seed
273
316
 
274
317
  if cls is Glitchling:
318
+ if clone_seed is not None:
319
+ filtered_kwargs["seed"] = clone_seed
275
320
  return Glitchling(
276
321
  self.name,
277
322
  self.corruption_function,
278
323
  self.level,
279
324
  self.order,
280
325
  pipeline_operation=self._pipeline_descriptor_factory,
326
+ transcript_target=self.transcript_target,
281
327
  **filtered_kwargs,
282
328
  )
283
329
 
284
- return cls(**filtered_kwargs)
330
+ # Check which kwargs subclass accepts via **kwargs or explicit params
331
+ try:
332
+ signature = inspect.signature(cls.__init__)
333
+ params = signature.parameters
334
+ has_var_keyword = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params.values())
335
+ except (TypeError, ValueError):
336
+ # If we can't introspect, play it safe and pass nothing extra
337
+ return cls(**filtered_kwargs)
285
338
 
339
+ # Only include seed if subclass accepts it
340
+ if clone_seed is not None:
341
+ if has_var_keyword or "seed" in params:
342
+ filtered_kwargs["seed"] = clone_seed
286
343
 
344
+ # Only include transcript_target if subclass accepts it
345
+ if "transcript_target" not in filtered_kwargs:
346
+ if has_var_keyword or "transcript_target" in params:
347
+ filtered_kwargs["transcript_target"] = self.transcript_target
287
348
 
349
+ return cls(**filtered_kwargs)
288
350
 
289
351
 
290
352
  class Gaggle(Glitchling):
291
353
  """A collection of glitchlings executed in a deterministic order."""
292
354
 
293
- def __init__(self, glitchlings: list[Glitchling], seed: int = 151):
355
+ def __init__(
356
+ self,
357
+ glitchlings: list[Glitchling],
358
+ seed: int = 151,
359
+ transcript_target: TranscriptTarget = "last",
360
+ ):
294
361
  """Initialize the gaggle and derive per-glitchling RNG seeds.
295
362
 
296
363
  Args:
297
364
  glitchlings: Glitchlings to orchestrate.
298
365
  seed: Master seed used to derive per-glitchling seeds.
299
- """
366
+ transcript_target: Which transcript turns to corrupt. Accepts:
367
+ - ``"last"`` (default): corrupt only the last turn
368
+ - ``"all"``: corrupt all turns
369
+ - ``"assistant"``: corrupt only assistant turns
370
+ - ``"user"``: corrupt only user turns
371
+ - ``int``: corrupt a specific index (negative indexing supported)
372
+ - ``Sequence[int]``: corrupt specific indices
300
373
 
301
- super().__init__("Gaggle", self.corrupt, AttackWave.DOCUMENT, seed=seed)
302
- self.glitchlings: dict[AttackWave, list[Glitchling]] = {
303
- level: [] for level in AttackWave
304
- }
374
+ """
375
+ super().__init__(
376
+ "Gaggle",
377
+ self._corrupt_text,
378
+ AttackWave.DOCUMENT,
379
+ seed=seed,
380
+ transcript_target=transcript_target,
381
+ )
382
+ self._clones_by_index: list[Glitchling] = []
383
+ for idx, glitchling in enumerate(glitchlings):
384
+ clone = glitchling.clone()
385
+ setattr(clone, "_gaggle_index", idx)
386
+ self._clones_by_index.append(clone)
387
+
388
+ self.glitchlings: dict[AttackWave, list[Glitchling]] = {level: [] for level in AttackWave}
305
389
  self.apply_order: list[Glitchling] = []
306
- # Derive deterministic per-glitchling seeds from master seed if provided
307
- for idx, g in enumerate(glitchlings):
308
- _g = g.clone()
309
- derived_seed = Gaggle.derive_seed(seed, _g.name, idx)
310
- _g.reset_rng(derived_seed)
311
- setattr(_g, "_gaggle_index", idx)
312
- self.glitchlings[g.level].append(_g)
390
+ self._plan: list[tuple[int, int]] = []
313
391
  self.sort_glitchlings()
314
392
 
393
+ def clone(self, seed: int | None = None) -> "Gaggle":
394
+ """Create a copy of this gaggle, cloning member glitchlings."""
395
+ clone_seed = seed if seed is not None else self.seed
396
+ if clone_seed is None:
397
+ clone_seed = 151 # Default seed for Gaggle
398
+ cloned_members = [glitchling.clone() for glitchling in self._clones_by_index]
399
+ return Gaggle(cloned_members, seed=clone_seed, transcript_target=self.transcript_target)
400
+
315
401
  @staticmethod
316
402
  def derive_seed(master_seed: int, glitchling_name: str, index: int) -> int:
317
403
  """Derive a deterministic seed for a glitchling based on the master seed."""
404
+
318
405
  def _int_to_bytes(value: int) -> bytes:
319
406
  if value == 0:
320
407
  return b"\x00"
@@ -341,65 +428,112 @@ class Gaggle(Glitchling):
341
428
 
342
429
  def sort_glitchlings(self) -> None:
343
430
  """Sort glitchlings by wave then order to produce application order."""
344
-
345
- self.apply_order = [
346
- g
347
- for _, glitchlings in sorted(self.glitchlings.items())
348
- for g in sorted(glitchlings, key=lambda x: (x.order, x.name))
349
- ]
350
-
351
- @staticmethod
352
- def rust_pipeline_supported() -> bool:
353
- """Return ``True`` when the compiled Rust pipeline is importable."""
354
-
355
- return _compose_glitchlings_rust is not None
356
-
357
- @staticmethod
358
- def rust_pipeline_enabled() -> bool:
359
- """Return ``True`` when the Rust pipeline is available and opted in."""
360
-
361
- return Gaggle.rust_pipeline_supported() and _pipeline_feature_flag_enabled()
362
-
363
- def _pipeline_descriptors(self) -> list[dict[str, Any]] | None:
364
- if not self.rust_pipeline_enabled():
365
- return None
366
-
367
- descriptors: list[dict[str, Any]] = []
431
+ plan = plan_glitchlings(self._clones_by_index, self.seed)
432
+ self._plan = plan
433
+
434
+ self.glitchlings = {level: [] for level in AttackWave}
435
+ for clone in self._clones_by_index:
436
+ self.glitchlings[clone.level].append(clone)
437
+
438
+ missing = set(range(len(self._clones_by_index)))
439
+ apply_order: list[Glitchling] = []
440
+ for index, derived_seed in plan:
441
+ clone = self._clones_by_index[index]
442
+ clone.reset_rng(int(derived_seed))
443
+ apply_order.append(clone)
444
+ missing.discard(index)
445
+
446
+ if missing:
447
+ missing_indices = ", ".join(str(idx) for idx in sorted(missing))
448
+ message = f"Orchestration plan missing glitchlings at indices: {missing_indices}"
449
+ raise RuntimeError(message)
450
+
451
+ self.apply_order = apply_order
452
+
453
+ def _pipeline_descriptors(self) -> tuple[list[PipelineDescriptor], list[Glitchling]]:
454
+ """Collect pipeline descriptors and track glitchlings missing them."""
455
+ descriptors: list[PipelineDescriptor] = []
456
+ missing: list[Glitchling] = []
457
+ master_seed = self.seed
368
458
  for glitchling in self.apply_order:
369
- operation = glitchling.pipeline_operation()
370
- if operation is None:
371
- return None
372
-
373
- seed = glitchling.seed
374
- if seed is None:
375
- index = getattr(glitchling, "_gaggle_index", None)
376
- master_seed = self.seed
377
- if index is None or master_seed is None:
378
- return None
379
- seed = Gaggle.derive_seed(master_seed, glitchling.name, index)
380
-
381
- descriptors.append(
382
- {
383
- "name": glitchling.name,
384
- "operation": operation,
385
- "seed": int(seed),
386
- }
459
+ descriptor = build_pipeline_descriptor(
460
+ glitchling,
461
+ master_seed=master_seed,
462
+ derive_seed_fn=Gaggle.derive_seed,
387
463
  )
388
-
389
- return descriptors
390
-
391
- def corrupt(self, text: str) -> str:
392
- """Apply each glitchling to the provided text sequentially."""
393
-
464
+ if descriptor is None:
465
+ missing.append(glitchling)
466
+ continue
467
+ descriptors.append(descriptor.as_mapping())
468
+
469
+ return descriptors, missing
470
+
471
+ def _corrupt_text(self, text: str) -> str:
472
+ """Apply each glitchling to string input sequentially.
473
+
474
+ This method uses a batched execution strategy to minimize tokenization
475
+ overhead. Consecutive glitchlings with pipeline support are grouped and
476
+ executed together via the Rust pipeline, while glitchlings without
477
+ pipeline support are executed individually. This hybrid approach ensures
478
+ the text is tokenized fewer times compared to executing every glitchling
479
+ individually.
480
+ """
394
481
  master_seed = self.seed
395
- descriptors = self._pipeline_descriptors()
396
- if master_seed is not None and descriptors is not None:
397
- try:
398
- return _compose_glitchlings_rust(text, descriptors, master_seed)
399
- except Exception: # pragma: no cover - fall back to Python execution
400
- log.debug("Rust pipeline failed; falling back", exc_info=True)
401
-
402
- corrupted = text
403
- for glitchling in self.apply_order:
404
- corrupted = glitchling(corrupted)
405
- return corrupted
482
+ if master_seed is None:
483
+ message = "Gaggle orchestration requires a master seed"
484
+ raise RuntimeError(message)
485
+
486
+ # Build the pure execution plan
487
+ plan = build_execution_plan(
488
+ self.apply_order,
489
+ master_seed=master_seed,
490
+ derive_seed_fn=Gaggle.derive_seed,
491
+ )
492
+
493
+ # Execute via the impure dispatch layer
494
+ return execute_plan(text, plan, master_seed)
495
+
496
+ def corrupt(self, text: str | Transcript) -> str | Transcript:
497
+ """Apply each glitchling to the provided text sequentially.
498
+
499
+ This method uses a pure dispatch pattern:
500
+ 1. Resolve the corruption target (pure - what to corrupt)
501
+ 2. Execute corruption (impure - single isolated point)
502
+ 3. Assemble the result (pure - combine results)
503
+
504
+ When the input is a transcript, the ``transcript_target`` setting
505
+ controls which turns are corrupted:
506
+
507
+ - ``"last"``: corrupt only the last turn (default)
508
+ - ``"all"``: corrupt all turns
509
+ - ``"assistant"``: corrupt only turns with ``role="assistant"``
510
+ - ``"user"``: corrupt only turns with ``role="user"``
511
+ - ``int``: corrupt a specific turn by index
512
+ - ``Sequence[int]``: corrupt specific turns by index
513
+ """
514
+ # Step 1: Pure dispatch - determine what to corrupt
515
+ target = resolve_corruption_target(text, self.transcript_target)
516
+
517
+ # Step 2: Impure execution - apply corruption via isolated method
518
+ if isinstance(target, StringCorruptionTarget):
519
+ corrupted: str | dict[int, str] = self._corrupt_text(target.text)
520
+ else:
521
+ # TranscriptCorruptionTarget
522
+ corrupted = {turn.index: self._corrupt_text(turn.content) for turn in target.turns}
523
+
524
+ # Step 3: Pure assembly - combine results
525
+ return assemble_corruption_result(target, corrupted)
526
+
527
+
528
+ __all__ = [
529
+ # Enums
530
+ "AttackWave",
531
+ "AttackOrder",
532
+ # Core classes
533
+ "Glitchling",
534
+ "Gaggle",
535
+ # Planning functions
536
+ "plan_glitchlings",
537
+ "PipelineOperationPayload",
538
+ "PipelineDescriptor",
539
+ ]