glitchlings 1.0.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. glitchlings/__init__.py +101 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_corruption_engine/__init__.py +12 -0
  4. glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +184 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +819 -0
  21. glitchlings/attack/core_execution.py +378 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +211 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +338 -0
  27. glitchlings/attack/tokenizer_metrics.py +373 -0
  28. glitchlings/auggie.py +285 -0
  29. glitchlings/compat/__init__.py +9 -0
  30. glitchlings/compat/loaders.py +355 -0
  31. glitchlings/compat/types.py +41 -0
  32. glitchlings/conf/__init__.py +39 -0
  33. glitchlings/conf/loaders.py +331 -0
  34. glitchlings/conf/schema.py +156 -0
  35. glitchlings/conf/types.py +72 -0
  36. glitchlings/config.toml +2 -0
  37. glitchlings/constants.py +139 -0
  38. glitchlings/dev/__init__.py +3 -0
  39. glitchlings/dev/docs.py +45 -0
  40. glitchlings/dlc/__init__.py +21 -0
  41. glitchlings/dlc/_shared.py +300 -0
  42. glitchlings/dlc/gutenberg.py +400 -0
  43. glitchlings/dlc/huggingface.py +68 -0
  44. glitchlings/dlc/langchain.py +147 -0
  45. glitchlings/dlc/nemo.py +283 -0
  46. glitchlings/dlc/prime.py +215 -0
  47. glitchlings/dlc/pytorch.py +98 -0
  48. glitchlings/dlc/pytorch_lightning.py +173 -0
  49. glitchlings/internal/__init__.py +16 -0
  50. glitchlings/internal/rust.py +159 -0
  51. glitchlings/internal/rust_ffi.py +599 -0
  52. glitchlings/main.py +426 -0
  53. glitchlings/protocols.py +91 -0
  54. glitchlings/runtime_config.py +24 -0
  55. glitchlings/util/__init__.py +41 -0
  56. glitchlings/util/adapters.py +65 -0
  57. glitchlings/util/keyboards.py +508 -0
  58. glitchlings/util/transcripts.py +108 -0
  59. glitchlings/zoo/__init__.py +161 -0
  60. glitchlings/zoo/assets/__init__.py +29 -0
  61. glitchlings/zoo/core.py +852 -0
  62. glitchlings/zoo/core_execution.py +154 -0
  63. glitchlings/zoo/core_planning.py +451 -0
  64. glitchlings/zoo/corrupt_dispatch.py +291 -0
  65. glitchlings/zoo/hokey.py +139 -0
  66. glitchlings/zoo/jargoyle.py +301 -0
  67. glitchlings/zoo/mim1c.py +269 -0
  68. glitchlings/zoo/pedant/__init__.py +109 -0
  69. glitchlings/zoo/pedant/core.py +99 -0
  70. glitchlings/zoo/pedant/forms.py +50 -0
  71. glitchlings/zoo/pedant/stones.py +83 -0
  72. glitchlings/zoo/redactyl.py +94 -0
  73. glitchlings/zoo/rng.py +280 -0
  74. glitchlings/zoo/rushmore.py +416 -0
  75. glitchlings/zoo/scannequin.py +370 -0
  76. glitchlings/zoo/transforms.py +331 -0
  77. glitchlings/zoo/typogre.py +194 -0
  78. glitchlings/zoo/validation.py +643 -0
  79. glitchlings/zoo/wherewolf.py +120 -0
  80. glitchlings/zoo/zeedub.py +165 -0
  81. glitchlings-1.0.0.dist-info/METADATA +404 -0
  82. glitchlings-1.0.0.dist-info/RECORD +86 -0
  83. glitchlings-1.0.0.dist-info/WHEEL +5 -0
  84. glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
  85. glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
  86. glitchlings-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,291 @@
1
+ """Pure dispatch logic for Glitchling corruption operations.
2
+
3
+ This module contains the deterministic, side-effect-free logic for building
4
+ corruption plans. It separates the "what to corrupt" decision from the
5
+ "how to corrupt" execution.
6
+
7
+ **Design Philosophy:**
8
+
9
+ All functions in this module are *pure* - they perform dispatch analysis
10
+ based solely on their inputs, without side effects. They do not:
11
+ - Invoke corruption functions
12
+ - Modify state
13
+ - Perform I/O
14
+
15
+ The separation allows:
16
+ - Corruption dispatch to be tested without actual corruption
17
+ - Clear boundaries between planning and execution
18
+ - Reasoning about what will be corrupted before execution
19
+
20
+ See AGENTS.md "Functional Purity Architecture" for full details.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from dataclasses import dataclass
26
+ from typing import Any, Literal
27
+
28
+ from ..util.transcripts import (
29
+ Transcript,
30
+ TranscriptTarget,
31
+ TranscriptTurn,
32
+ is_transcript,
33
+ resolve_transcript_indices,
34
+ )
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Type Definitions
38
+ # ---------------------------------------------------------------------------
39
+
40
+
41
+ @dataclass(slots=True, frozen=True)
42
+ class StringCorruptionTarget:
43
+ """Target specification for corrupting a plain string.
44
+
45
+ Attributes:
46
+ text: The string to corrupt.
47
+ """
48
+
49
+ text: str
50
+ kind: Literal["string"] = "string"
51
+
52
+
53
+ @dataclass(slots=True, frozen=True)
54
+ class TranscriptTurnTarget:
55
+ """Target specification for a single turn within a transcript.
56
+
57
+ Attributes:
58
+ index: Position of the turn in the transcript.
59
+ content: The text content to corrupt.
60
+ """
61
+
62
+ index: int
63
+ content: str
64
+
65
+
66
+ @dataclass(slots=True, frozen=True)
67
+ class TranscriptCorruptionTarget:
68
+ """Target specification for corrupting transcript turns.
69
+
70
+ Attributes:
71
+ turns: List of turn targets with their indices and content.
72
+ original_transcript: The original transcript for result assembly.
73
+ """
74
+
75
+ turns: tuple[TranscriptTurnTarget, ...]
76
+ original_transcript: Transcript
77
+ kind: Literal["transcript"] = "transcript"
78
+
79
+
80
+ # Union type for corruption targets
81
+ CorruptionTarget = StringCorruptionTarget | TranscriptCorruptionTarget
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Dispatch Functions
86
+ # ---------------------------------------------------------------------------
87
+
88
+
89
+ def resolve_corruption_target(
90
+ text: str | Transcript,
91
+ transcript_target: TranscriptTarget,
92
+ ) -> CorruptionTarget:
93
+ """Determine what needs to be corrupted from the input.
94
+
95
+ This is a pure function that analyzes the input and returns a structured
96
+ target specification. It does not perform any corruption.
97
+
98
+ Args:
99
+ text: Input text or transcript to analyze.
100
+ transcript_target: Specification for which transcript turns to target.
101
+
102
+ Returns:
103
+ CorruptionTarget describing what should be corrupted.
104
+
105
+ Note:
106
+ Lists that are not valid transcripts (e.g., lists of strings) are
107
+ treated as strings via casting. This handles cases like dataset column
108
+ transformations where HuggingFace may batch values as lists.
109
+ """
110
+ # Handle plain strings
111
+ if isinstance(text, str):
112
+ return StringCorruptionTarget(text=text)
113
+
114
+ # Handle transcripts (lists of dicts with "content" keys)
115
+ if is_transcript(text):
116
+ indices = resolve_transcript_indices(text, transcript_target)
117
+ turn_targets: list[TranscriptTurnTarget] = []
118
+
119
+ for idx in indices:
120
+ turn = text[idx]
121
+ content = turn.get("content")
122
+ if isinstance(content, str):
123
+ turn_targets.append(TranscriptTurnTarget(index=idx, content=content))
124
+
125
+ return TranscriptCorruptionTarget(
126
+ turns=tuple(turn_targets),
127
+ original_transcript=text,
128
+ )
129
+
130
+ # Treat other types (including lists of strings) as strings by casting.
131
+ # This handles cases like dataset column transformations where HuggingFace
132
+ # may batch values as lists.
133
+ return StringCorruptionTarget(text=str(text))
134
+
135
+
136
+ def count_corruption_targets(target: CorruptionTarget) -> int:
137
+ """Count how many text segments will be corrupted.
138
+
139
+ Args:
140
+ target: The corruption target specification.
141
+
142
+ Returns:
143
+ Number of text segments that will be processed.
144
+ """
145
+ if isinstance(target, StringCorruptionTarget):
146
+ return 1
147
+ return len(target.turns)
148
+
149
+
150
+ def extract_texts_to_corrupt(target: CorruptionTarget) -> list[str]:
151
+ """Extract all text strings that need to be corrupted.
152
+
153
+ This is useful for batch processing or analysis.
154
+
155
+ Args:
156
+ target: The corruption target specification.
157
+
158
+ Returns:
159
+ List of text strings to corrupt.
160
+ """
161
+ if isinstance(target, StringCorruptionTarget):
162
+ return [target.text]
163
+ return [turn.content for turn in target.turns]
164
+
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # Result Assembly Functions
168
+ # ---------------------------------------------------------------------------
169
+
170
+
171
+ def assemble_string_result(
172
+ _target: StringCorruptionTarget,
173
+ corrupted: str,
174
+ ) -> str:
175
+ """Assemble the result for a string corruption.
176
+
177
+ Args:
178
+ _target: The original target (unused, included for symmetry).
179
+ corrupted: The corrupted text.
180
+
181
+ Returns:
182
+ The corrupted string.
183
+ """
184
+ return corrupted
185
+
186
+
187
+ def assemble_transcript_result(
188
+ target: TranscriptCorruptionTarget,
189
+ corrupted_contents: dict[int, str],
190
+ ) -> Transcript:
191
+ """Assemble the result for a transcript corruption.
192
+
193
+ Creates a copy of the original transcript with specified turns updated.
194
+
195
+ Args:
196
+ target: The original target specification.
197
+ corrupted_contents: Mapping of turn indices to corrupted content.
198
+
199
+ Returns:
200
+ New transcript with corrupted turns.
201
+ """
202
+ # Create a deep copy of the transcript
203
+ result: list[TranscriptTurn] = [dict(turn) for turn in target.original_transcript]
204
+
205
+ # Apply corrupted content to targeted turns
206
+ for idx, content in corrupted_contents.items():
207
+ if 0 <= idx < len(result):
208
+ result[idx]["content"] = content
209
+
210
+ return result
211
+
212
+
213
+ def assemble_corruption_result(
214
+ target: CorruptionTarget,
215
+ corrupted: str | dict[int, str],
216
+ ) -> str | Transcript:
217
+ """Assemble the final result based on target type.
218
+
219
+ This is a pure function that combines the original target structure
220
+ with the corrupted content.
221
+
222
+ Args:
223
+ target: The original corruption target.
224
+ corrupted: Either a single corrupted string (for StringCorruptionTarget)
225
+ or a mapping of indices to corrupted content (for TranscriptCorruptionTarget).
226
+
227
+ Returns:
228
+ The assembled result matching the input type.
229
+
230
+ Raises:
231
+ TypeError: If corrupted value type doesn't match target type.
232
+ """
233
+ if isinstance(target, StringCorruptionTarget):
234
+ if not isinstance(corrupted, str):
235
+ message = "String target requires corrupted string result"
236
+ raise TypeError(message)
237
+ return assemble_string_result(target, corrupted)
238
+
239
+ if isinstance(target, TranscriptCorruptionTarget):
240
+ if not isinstance(corrupted, dict):
241
+ message = "Transcript target requires corrupted content mapping"
242
+ raise TypeError(message)
243
+ return assemble_transcript_result(target, corrupted)
244
+
245
+ # Should be unreachable due to typing, but be explicit
246
+ message = f"Unknown target type: {type(target).__name__}"
247
+ raise TypeError(message)
248
+
249
+
250
+ # ---------------------------------------------------------------------------
251
+ # Validation Helpers
252
+ # ---------------------------------------------------------------------------
253
+
254
+
255
+ def validate_text_input(text: Any) -> str | Transcript:
256
+ """Validate that input is a supported text type.
257
+
258
+ Args:
259
+ text: Input to validate.
260
+
261
+ Returns:
262
+ The validated input.
263
+
264
+ Raises:
265
+ TypeError: If input is not a string or transcript.
266
+ """
267
+ if isinstance(text, str):
268
+ return text
269
+ if is_transcript(text):
270
+ return text
271
+ message = f"Expected string or transcript, got {type(text).__name__}"
272
+ raise TypeError(message)
273
+
274
+
275
+ __all__ = [
276
+ # Target types
277
+ "StringCorruptionTarget",
278
+ "TranscriptTurnTarget",
279
+ "TranscriptCorruptionTarget",
280
+ "CorruptionTarget",
281
+ # Dispatch functions
282
+ "resolve_corruption_target",
283
+ "count_corruption_targets",
284
+ "extract_texts_to_corrupt",
285
+ # Result assembly
286
+ "assemble_string_result",
287
+ "assemble_transcript_result",
288
+ "assemble_corruption_result",
289
+ # Validation
290
+ "validate_text_input",
291
+ ]
@@ -0,0 +1,139 @@
1
+ """Hokey glitchling that performs expressive lengthening."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from typing import Any, cast
7
+
8
+ from glitchlings.internal.rust_ffi import resolve_seed, stretch_word_rust
9
+
10
+ from .core import AttackOrder, AttackWave, Gaggle, PipelineOperationPayload
11
+ from .core import Glitchling as GlitchlingBase
12
+
13
+
14
+ def extend_vowels(
15
+ text: str,
16
+ rate: float = 0.3,
17
+ extension_min: int = 2,
18
+ extension_max: int = 5,
19
+ word_length_threshold: int = 6,
20
+ seed: int | None = None,
21
+ rng: random.Random | None = None,
22
+ base_p: float | None = None,
23
+ ) -> str:
24
+ """Extend expressive segments of words for emphasis.
25
+
26
+ Parameters
27
+ ----------
28
+ text : str
29
+ Input text to transform.
30
+ rate : float, optional
31
+ Global selection rate for candidate words.
32
+ extension_min : int, optional
33
+ Minimum number of extra repetitions for the stretch unit.
34
+ extension_max : int, optional
35
+ Maximum number of extra repetitions for the stretch unit.
36
+ word_length_threshold : int, optional
37
+ Preferred maximum alphabetic length; longer words are de-emphasised but not
38
+ excluded.
39
+ seed : int, optional
40
+ Deterministic seed when ``rng`` is not supplied.
41
+ rng : random.Random, optional
42
+ Random number generator to drive sampling.
43
+ base_p : float, optional
44
+ Base probability for the negative-binomial sampler (heavier tails for smaller
45
+ values). Defaults to ``0.45``.
46
+ """
47
+ if not text:
48
+ return text
49
+
50
+ base_probability = base_p if base_p is not None else 0.45
51
+
52
+ seed_value = resolve_seed(seed, rng)
53
+ return stretch_word_rust(
54
+ text,
55
+ rate,
56
+ extension_min,
57
+ extension_max,
58
+ word_length_threshold,
59
+ base_probability,
60
+ seed_value,
61
+ )
62
+
63
+
64
+ class Hokey(GlitchlingBase):
65
+ """Glitchling that stretches words using linguistic heuristics."""
66
+
67
+ flavor = "Sooooo excited to meet you! We reeeeeally missed you last week."
68
+
69
+ seed: int | None
70
+
71
+ def __init__(
72
+ self,
73
+ *,
74
+ rate: float = 0.3,
75
+ extension_min: int = 2,
76
+ extension_max: int = 5,
77
+ word_length_threshold: int = 6,
78
+ base_p: float = 0.45,
79
+ seed: int | None = None,
80
+ **kwargs: Any,
81
+ ) -> None:
82
+ self._master_seed: int | None = seed
83
+
84
+ def _corruption_wrapper(text: str, **kwargs: Any) -> str:
85
+ return extend_vowels(text, **kwargs)
86
+
87
+ super().__init__(
88
+ name="Hokey",
89
+ corruption_function=_corruption_wrapper,
90
+ scope=AttackWave.CHARACTER,
91
+ order=AttackOrder.FIRST,
92
+ seed=seed,
93
+ rate=rate,
94
+ extension_min=extension_min,
95
+ extension_max=extension_max,
96
+ word_length_threshold=word_length_threshold,
97
+ base_p=base_p,
98
+ **kwargs,
99
+ )
100
+
101
+ def pipeline_operation(self) -> PipelineOperationPayload:
102
+ kwargs = self.kwargs
103
+ rate = kwargs.get("rate")
104
+ extension_min = kwargs.get("extension_min")
105
+ extension_max = kwargs.get("extension_max")
106
+ word_length_threshold = kwargs.get("word_length_threshold")
107
+ base_p = kwargs.get("base_p")
108
+ return cast(
109
+ PipelineOperationPayload,
110
+ {
111
+ "type": "hokey",
112
+ "rate": 0.3 if rate is None else float(rate),
113
+ "extension_min": 2 if extension_min is None else int(extension_min),
114
+ "extension_max": 5 if extension_max is None else int(extension_max),
115
+ "word_length_threshold": 6
116
+ if word_length_threshold is None
117
+ else int(word_length_threshold),
118
+ "base_p": 0.45 if base_p is None else float(base_p),
119
+ },
120
+ )
121
+
122
+ def reset_rng(self, seed: int | None = None) -> None:
123
+ if seed is not None:
124
+ self._master_seed = seed
125
+ super().reset_rng(seed)
126
+ if self.seed is None:
127
+ return
128
+ derived = Gaggle.derive_seed(int(seed), self.name, 0)
129
+ self.seed = int(derived)
130
+ self.rng = random.Random(self.seed)
131
+ self.kwargs["seed"] = self.seed
132
+ else:
133
+ super().reset_rng(None)
134
+
135
+
136
+ hokey = Hokey()
137
+
138
+
139
+ __all__ = ["Hokey", "hokey", "extend_vowels"]