glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +99 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +147 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +493 -0
- glitchlings/attack/core_execution.py +367 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +218 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +227 -0
- glitchlings/auggie.py +284 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +19 -0
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +490 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +27 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +356 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +678 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +243 -0
- glitchlings/zoo/mim1c.py +148 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +97 -0
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +66 -0
- glitchlings/zoo/transforms.py +346 -0
- glitchlings/zoo/typogre.py +128 -0
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +93 -0
- glitchlings-0.10.2.dist-info/METADATA +337 -0
- glitchlings-0.10.2.dist-info/RECORD +83 -0
- glitchlings-0.10.2.dist-info/WHEEL +5 -0
- glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
- glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.10.2.dist-info/top_level.txt +1 -0
glitchlings/main.py
ADDED
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
"""Command line interface for summoning and running glitchlings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import difflib
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
import sys
|
|
10
|
+
from collections.abc import Sequence
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, cast
|
|
13
|
+
|
|
14
|
+
import yaml
|
|
15
|
+
|
|
16
|
+
from . import SAMPLE_TEXT
|
|
17
|
+
from .attack import Attack
|
|
18
|
+
from .conf import DEFAULT_ATTACK_SEED, build_gaggle, load_attack_config
|
|
19
|
+
from .zoo import (
|
|
20
|
+
BUILTIN_GLITCHLINGS,
|
|
21
|
+
DEFAULT_GLITCHLING_NAMES,
|
|
22
|
+
Gaggle,
|
|
23
|
+
Glitchling,
|
|
24
|
+
parse_glitchling_spec,
|
|
25
|
+
summon,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
MAX_NAME_WIDTH = max(len(glitchling.name) for glitchling in BUILTIN_GLITCHLINGS.values())
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def build_parser(
|
|
32
|
+
*,
|
|
33
|
+
exit_on_error: bool = True,
|
|
34
|
+
include_text: bool = True,
|
|
35
|
+
) -> argparse.ArgumentParser:
|
|
36
|
+
"""Create and configure the CLI argument parser.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
argparse.ArgumentParser: The configured argument parser instance.
|
|
40
|
+
|
|
41
|
+
"""
|
|
42
|
+
parser = argparse.ArgumentParser(
|
|
43
|
+
description=(
|
|
44
|
+
"Summon glitchlings to corrupt text. Provide input text as an argument, "
|
|
45
|
+
"via --input-file, or pipe it on stdin."
|
|
46
|
+
),
|
|
47
|
+
exit_on_error=exit_on_error,
|
|
48
|
+
)
|
|
49
|
+
if include_text:
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"text",
|
|
52
|
+
nargs="*",
|
|
53
|
+
help="Text to corrupt. If omitted, stdin is used or --sample provides fallback text.",
|
|
54
|
+
)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"-g",
|
|
57
|
+
"--glitchling",
|
|
58
|
+
dest="glitchlings",
|
|
59
|
+
action="append",
|
|
60
|
+
metavar="SPEC",
|
|
61
|
+
help=(
|
|
62
|
+
"Glitchling to apply, optionally with parameters like "
|
|
63
|
+
"Typogre(rate=0.05). Repeat for multiples; defaults to all built-ins."
|
|
64
|
+
),
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"-s",
|
|
68
|
+
"--seed",
|
|
69
|
+
type=int,
|
|
70
|
+
default=None,
|
|
71
|
+
help="Seed controlling deterministic corruption order (default: 151).",
|
|
72
|
+
)
|
|
73
|
+
parser.add_argument(
|
|
74
|
+
"-i",
|
|
75
|
+
"--input-file",
|
|
76
|
+
dest="input_file",
|
|
77
|
+
type=Path,
|
|
78
|
+
help="Read input text from a file instead of the command line argument.",
|
|
79
|
+
)
|
|
80
|
+
parser.add_argument(
|
|
81
|
+
"-o",
|
|
82
|
+
"--output-file",
|
|
83
|
+
dest="output_file",
|
|
84
|
+
type=Path,
|
|
85
|
+
help="Write output to a file instead of stdout.",
|
|
86
|
+
)
|
|
87
|
+
parser.add_argument(
|
|
88
|
+
"--sample",
|
|
89
|
+
action="store_true",
|
|
90
|
+
help="Use the included SAMPLE_TEXT when no other input is provided.",
|
|
91
|
+
)
|
|
92
|
+
parser.add_argument(
|
|
93
|
+
"--diff",
|
|
94
|
+
action="store_true",
|
|
95
|
+
help="Show a unified diff between the original and corrupted text.",
|
|
96
|
+
)
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"--list",
|
|
99
|
+
action="store_true",
|
|
100
|
+
help="List available glitchlings and exit.",
|
|
101
|
+
)
|
|
102
|
+
parser.add_argument(
|
|
103
|
+
"-c",
|
|
104
|
+
"--config",
|
|
105
|
+
type=Path,
|
|
106
|
+
help="Load glitchlings from a YAML configuration file.",
|
|
107
|
+
)
|
|
108
|
+
parser.add_argument(
|
|
109
|
+
"--attack",
|
|
110
|
+
action="store_true",
|
|
111
|
+
help=("Output an Attack summary. Includes metrics and counts without full token lists."),
|
|
112
|
+
)
|
|
113
|
+
parser.add_argument(
|
|
114
|
+
"--report",
|
|
115
|
+
action="store_true",
|
|
116
|
+
help=("Output a full Attack report. Includes tokens, token IDs, metrics, and counts."),
|
|
117
|
+
)
|
|
118
|
+
parser.add_argument(
|
|
119
|
+
"-f",
|
|
120
|
+
"--format",
|
|
121
|
+
dest="output_format",
|
|
122
|
+
choices=["json", "yaml", "yml"],
|
|
123
|
+
default="json",
|
|
124
|
+
help="Output format for --attack or --report (default: json).",
|
|
125
|
+
)
|
|
126
|
+
parser.add_argument(
|
|
127
|
+
"-t",
|
|
128
|
+
"--tokenizer",
|
|
129
|
+
dest="tokenizer",
|
|
130
|
+
help=(
|
|
131
|
+
"Tokenizer to use for --attack or --report. "
|
|
132
|
+
"Checks tiktoken first, then HuggingFace tokenizers library. "
|
|
133
|
+
"Examples: cl100k_base, gpt-4, bert-base-uncased."
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
return parser
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def list_glitchlings() -> None:
|
|
141
|
+
"""Print information about the available built-in glitchlings."""
|
|
142
|
+
for key in DEFAULT_GLITCHLING_NAMES:
|
|
143
|
+
glitchling = BUILTIN_GLITCHLINGS[key]
|
|
144
|
+
display_name = glitchling.name
|
|
145
|
+
scope = glitchling.level.name.title()
|
|
146
|
+
order = glitchling.order.name.lower()
|
|
147
|
+
print(f"{display_name:>{MAX_NAME_WIDTH}} — scope: {scope}, order: {order}")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def read_text(args: argparse.Namespace, parser: argparse.ArgumentParser) -> str:
|
|
151
|
+
"""Resolve the input text based on CLI arguments.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
args: Parsed arguments from the CLI.
|
|
155
|
+
parser: The argument parser used for emitting user-facing errors.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
str: The text to corrupt.
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
SystemExit: Raised indirectly via ``parser.error`` on failure.
|
|
162
|
+
|
|
163
|
+
"""
|
|
164
|
+
file_path = cast(Path | None, getattr(args, "input_file", None))
|
|
165
|
+
if file_path is not None:
|
|
166
|
+
try:
|
|
167
|
+
return file_path.read_text(encoding="utf-8")
|
|
168
|
+
except OSError as exc:
|
|
169
|
+
filename = getattr(exc, "filename", None) or file_path
|
|
170
|
+
reason = exc.strerror or str(exc)
|
|
171
|
+
parser.error(f"Failed to read file {filename}: {reason}")
|
|
172
|
+
|
|
173
|
+
text_argument = cast(str | list[str] | None, getattr(args, "text", None))
|
|
174
|
+
if isinstance(text_argument, list):
|
|
175
|
+
if text_argument:
|
|
176
|
+
return " ".join(text_argument)
|
|
177
|
+
text_argument = None
|
|
178
|
+
if isinstance(text_argument, str) and text_argument:
|
|
179
|
+
return text_argument
|
|
180
|
+
|
|
181
|
+
if not sys.stdin.isatty():
|
|
182
|
+
return sys.stdin.read()
|
|
183
|
+
|
|
184
|
+
if bool(getattr(args, "sample", False)):
|
|
185
|
+
return SAMPLE_TEXT
|
|
186
|
+
|
|
187
|
+
parser.error(
|
|
188
|
+
"No input text provided. Supply text as an argument, use --input-file, pipe input, or "
|
|
189
|
+
"pass --sample."
|
|
190
|
+
)
|
|
191
|
+
raise AssertionError("parser.error should exit")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def summon_glitchlings(
|
|
195
|
+
names: list[str] | None,
|
|
196
|
+
parser: argparse.ArgumentParser,
|
|
197
|
+
seed: int | None,
|
|
198
|
+
*,
|
|
199
|
+
config_path: Path | None = None,
|
|
200
|
+
) -> Gaggle:
|
|
201
|
+
"""Instantiate the requested glitchlings and bundle them in a ``Gaggle``."""
|
|
202
|
+
if config_path is not None:
|
|
203
|
+
if names:
|
|
204
|
+
parser.error("Cannot combine --config with --glitchling.")
|
|
205
|
+
raise AssertionError("parser.error should exit")
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
config = load_attack_config(config_path)
|
|
209
|
+
except (TypeError, ValueError) as exc:
|
|
210
|
+
parser.error(str(exc))
|
|
211
|
+
raise AssertionError("parser.error should exit")
|
|
212
|
+
|
|
213
|
+
return build_gaggle(config, seed_override=seed)
|
|
214
|
+
|
|
215
|
+
normalized: Sequence[str | Glitchling]
|
|
216
|
+
if names:
|
|
217
|
+
parsed: list[str | Glitchling] = []
|
|
218
|
+
for specification in names:
|
|
219
|
+
try:
|
|
220
|
+
parsed.append(parse_glitchling_spec(specification))
|
|
221
|
+
except ValueError as exc:
|
|
222
|
+
parser.error(str(exc))
|
|
223
|
+
raise AssertionError("parser.error should exit")
|
|
224
|
+
normalized = parsed
|
|
225
|
+
else:
|
|
226
|
+
normalized = list(DEFAULT_GLITCHLING_NAMES)
|
|
227
|
+
|
|
228
|
+
effective_seed = seed if seed is not None else DEFAULT_ATTACK_SEED
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
return summon(list(normalized), seed=effective_seed)
|
|
232
|
+
except ValueError as exc:
|
|
233
|
+
parser.error(str(exc))
|
|
234
|
+
raise AssertionError("parser.error should exit")
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def show_diff(original: str, corrupted: str) -> None:
|
|
238
|
+
"""Display a unified diff between the original and corrupted text."""
|
|
239
|
+
diff_lines = list(
|
|
240
|
+
difflib.unified_diff(
|
|
241
|
+
original.splitlines(keepends=True),
|
|
242
|
+
corrupted.splitlines(keepends=True),
|
|
243
|
+
fromfile="original",
|
|
244
|
+
tofile="corrupted",
|
|
245
|
+
lineterm="",
|
|
246
|
+
)
|
|
247
|
+
)
|
|
248
|
+
if diff_lines:
|
|
249
|
+
for line in diff_lines:
|
|
250
|
+
print(line)
|
|
251
|
+
else:
|
|
252
|
+
print("No changes detected.")
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _format_report_json(payload: dict[str, Any]) -> str:
|
|
256
|
+
"""Format a report payload as JSON with compact token arrays.
|
|
257
|
+
|
|
258
|
+
Token lists are formatted on a single line for readability, while
|
|
259
|
+
other structures retain standard indented formatting.
|
|
260
|
+
"""
|
|
261
|
+
# Keys whose values should be formatted compactly (single line)
|
|
262
|
+
compact_keys = {
|
|
263
|
+
"input_tokens",
|
|
264
|
+
"output_tokens",
|
|
265
|
+
"input_token_ids",
|
|
266
|
+
"output_token_ids",
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
# First, serialize with standard formatting
|
|
270
|
+
raw = json.dumps(payload, indent=2)
|
|
271
|
+
|
|
272
|
+
# Then compact token arrays: find multi-line arrays for compact_keys
|
|
273
|
+
for key in compact_keys:
|
|
274
|
+
# Pattern matches: "key": [\n items...\n ]
|
|
275
|
+
# and replaces with: "key": [items...]
|
|
276
|
+
pattern = rf'("{key}":\s*)\[\s*\n((?:\s+.*?\n)*?)\s*\]'
|
|
277
|
+
|
|
278
|
+
def compact_array(match: re.Match[str]) -> str:
|
|
279
|
+
prefix = match.group(1)
|
|
280
|
+
content = match.group(2)
|
|
281
|
+
# Extract items from the multi-line content
|
|
282
|
+
items = re.findall(r"(?:^\s+)(.+?)(?:,?\s*$)", content, re.MULTILINE)
|
|
283
|
+
return f"{prefix}[{', '.join(items)}]"
|
|
284
|
+
|
|
285
|
+
raw = re.sub(pattern, compact_array, raw)
|
|
286
|
+
|
|
287
|
+
return raw
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _write_output(content: str, output_file: Path | None) -> None:
|
|
291
|
+
"""Write content to output file or stdout."""
|
|
292
|
+
if output_file is not None:
|
|
293
|
+
output_file.write_text(content, encoding="utf-8")
|
|
294
|
+
else:
|
|
295
|
+
print(content, end="" if content.endswith("\n") else "\n")
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def run_cli(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int:
|
|
299
|
+
"""Execute the CLI workflow using the provided arguments.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
args: Parsed CLI arguments.
|
|
303
|
+
parser: Argument parser used for error reporting.
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
int: Exit code for the process (``0`` on success).
|
|
307
|
+
|
|
308
|
+
"""
|
|
309
|
+
if args.list:
|
|
310
|
+
list_glitchlings()
|
|
311
|
+
return 0
|
|
312
|
+
|
|
313
|
+
wants_attack = bool(getattr(args, "attack", False))
|
|
314
|
+
wants_report = bool(getattr(args, "report", False))
|
|
315
|
+
|
|
316
|
+
if wants_attack and wants_report:
|
|
317
|
+
parser.error("Cannot combine --attack with --report. Use one or the other.")
|
|
318
|
+
raise AssertionError("parser.error should exit")
|
|
319
|
+
|
|
320
|
+
wants_metrics = wants_attack or wants_report
|
|
321
|
+
if wants_metrics and args.diff:
|
|
322
|
+
parser.error("--diff cannot be combined with --report/--attack output.")
|
|
323
|
+
raise AssertionError("parser.error should exit")
|
|
324
|
+
|
|
325
|
+
# Get output file path
|
|
326
|
+
output_file = cast(Path | None, getattr(args, "output_file", None))
|
|
327
|
+
|
|
328
|
+
# Validate --diff and --output-file are not combined
|
|
329
|
+
if args.diff and output_file:
|
|
330
|
+
parser.error("--diff cannot be combined with --output-file.")
|
|
331
|
+
raise AssertionError("parser.error should exit")
|
|
332
|
+
|
|
333
|
+
# Normalize output format
|
|
334
|
+
output_format = cast(str, args.output_format)
|
|
335
|
+
normalized_format = "yaml" if output_format == "yml" else output_format
|
|
336
|
+
|
|
337
|
+
# Validate --format is only used with --attack or --report
|
|
338
|
+
if output_format != "json" and not wants_metrics:
|
|
339
|
+
parser.error("--format requires --attack or --report.")
|
|
340
|
+
raise AssertionError("parser.error should exit")
|
|
341
|
+
|
|
342
|
+
# Validate tokenizer is only used with --attack or --report
|
|
343
|
+
tokenizer_spec = cast(str | None, getattr(args, "tokenizer", None))
|
|
344
|
+
if tokenizer_spec and not wants_metrics:
|
|
345
|
+
parser.error("--tokenizer requires --attack or --report.")
|
|
346
|
+
raise AssertionError("parser.error should exit")
|
|
347
|
+
|
|
348
|
+
text = read_text(args, parser)
|
|
349
|
+
gaggle = summon_glitchlings(
|
|
350
|
+
args.glitchlings,
|
|
351
|
+
parser,
|
|
352
|
+
args.seed,
|
|
353
|
+
config_path=args.config,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
if wants_metrics:
|
|
357
|
+
attack_seed = args.seed if args.seed is not None else getattr(gaggle, "seed", None)
|
|
358
|
+
attack = Attack(gaggle, tokenizer=tokenizer_spec, seed=attack_seed)
|
|
359
|
+
result = attack.run(text)
|
|
360
|
+
|
|
361
|
+
if wants_attack:
|
|
362
|
+
# --attack: output summary only (metrics and counts, no token lists)
|
|
363
|
+
full_report = result.to_report()
|
|
364
|
+
payload = {
|
|
365
|
+
k: v
|
|
366
|
+
for k, v in full_report.items()
|
|
367
|
+
if k
|
|
368
|
+
not in {
|
|
369
|
+
"input_tokens",
|
|
370
|
+
"output_tokens",
|
|
371
|
+
"input_token_ids",
|
|
372
|
+
"output_token_ids",
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
else:
|
|
376
|
+
# --report: output full report (no summary)
|
|
377
|
+
payload = result.to_report()
|
|
378
|
+
|
|
379
|
+
if normalized_format == "json":
|
|
380
|
+
if wants_attack:
|
|
381
|
+
# Summary is a dict, format with standard indentation
|
|
382
|
+
output_content = json.dumps(payload, indent=2)
|
|
383
|
+
else:
|
|
384
|
+
# Full report - use compact token formatting
|
|
385
|
+
output_content = _format_report_json(payload)
|
|
386
|
+
else:
|
|
387
|
+
output_content = yaml.safe_dump(payload, sort_keys=False)
|
|
388
|
+
|
|
389
|
+
_write_output(output_content, output_file)
|
|
390
|
+
return 0
|
|
391
|
+
|
|
392
|
+
corrupted = gaggle.corrupt(text)
|
|
393
|
+
if not isinstance(corrupted, str):
|
|
394
|
+
message = "Gaggle returned non-string output for string input"
|
|
395
|
+
raise TypeError(message)
|
|
396
|
+
|
|
397
|
+
if args.diff:
|
|
398
|
+
show_diff(text, corrupted)
|
|
399
|
+
else:
|
|
400
|
+
_write_output(corrupted, output_file)
|
|
401
|
+
|
|
402
|
+
return 0
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def main(argv: list[str] | None = None) -> int:
|
|
406
|
+
"""Entry point for the ``glitchlings`` command line interface.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
argv: Optional list of command line arguments. Defaults to ``sys.argv``.
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
int: Exit code suitable for use with ``sys.exit``.
|
|
413
|
+
|
|
414
|
+
"""
|
|
415
|
+
if argv is None:
|
|
416
|
+
raw_args = sys.argv[1:]
|
|
417
|
+
else:
|
|
418
|
+
raw_args = list(argv)
|
|
419
|
+
|
|
420
|
+
parser = build_parser()
|
|
421
|
+
args = parser.parse_args(raw_args)
|
|
422
|
+
return run_cli(args, parser)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
if __name__ == "__main__":
|
|
426
|
+
sys.exit(main())
|
glitchlings/protocols.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Protocols for dependency inversion across submodules.
|
|
2
|
+
|
|
3
|
+
This module defines protocol classes that allow submodules to depend on
|
|
4
|
+
abstract interfaces rather than concrete implementations. This eliminates
|
|
5
|
+
circular imports and improves testability.
|
|
6
|
+
|
|
7
|
+
Design Philosophy
|
|
8
|
+
-----------------
|
|
9
|
+
The attack submodule needs to work with glitchlings but shouldn't depend
|
|
10
|
+
on the concrete zoo.core.Glitchling class. Instead, it depends on the
|
|
11
|
+
Corruptor protocol which defines the minimal interface needed.
|
|
12
|
+
|
|
13
|
+
This follows the Dependency Inversion Principle (the D in SOLID):
|
|
14
|
+
- High-level modules (attack) should not depend on low-level modules (zoo)
|
|
15
|
+
- Both should depend on abstractions (protocols)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from typing import TYPE_CHECKING, Protocol, runtime_checkable
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from .util.transcripts import Transcript, TranscriptTarget
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@runtime_checkable
|
|
27
|
+
class Corruptor(Protocol):
|
|
28
|
+
"""Protocol for objects that can corrupt text.
|
|
29
|
+
|
|
30
|
+
This protocol defines the minimal interface that the attack submodule
|
|
31
|
+
needs from glitchlings. Any object implementing these methods can be
|
|
32
|
+
used with Attack, SeedSweep, GridSearch, and TokenizerComparison.
|
|
33
|
+
|
|
34
|
+
The zoo.core.Glitchling and zoo.core.Gaggle classes satisfy this protocol.
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
seed: The RNG seed for deterministic corruption.
|
|
38
|
+
transcript_target: Which transcript turns to target for corruption.
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
>>> class MockCorruptor:
|
|
42
|
+
... seed = 42
|
|
43
|
+
... transcript_target = "last"
|
|
44
|
+
... def corrupt(self, text): return text.upper()
|
|
45
|
+
... def clone(self, seed=None): return MockCorruptor()
|
|
46
|
+
>>> attack = Attack(MockCorruptor()) # Works with protocol
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
seed: int | None
|
|
50
|
+
transcript_target: "TranscriptTarget"
|
|
51
|
+
|
|
52
|
+
def corrupt(
|
|
53
|
+
self,
|
|
54
|
+
text: "str | Transcript",
|
|
55
|
+
) -> "str | Transcript":
|
|
56
|
+
"""Apply corruption to text or transcript.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
text: Input text string or chat transcript.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Corrupted text or transcript (same type as input).
|
|
63
|
+
"""
|
|
64
|
+
...
|
|
65
|
+
|
|
66
|
+
def clone(self, seed: int | None = None) -> "Corruptor":
|
|
67
|
+
"""Create a copy of this corruptor, optionally with a new seed.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
seed: Optional new seed for the clone.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
A new Corruptor instance with the same configuration.
|
|
74
|
+
"""
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@runtime_checkable
|
|
79
|
+
class Clonable(Protocol):
|
|
80
|
+
"""Protocol for objects that support cloning.
|
|
81
|
+
|
|
82
|
+
This minimal protocol is used when we only need to clone objects
|
|
83
|
+
without caring about their other capabilities.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def clone(self, seed: int | None = None) -> "Clonable":
|
|
87
|
+
"""Create a copy of this object."""
|
|
88
|
+
...
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
__all__ = ["Clonable", "Corruptor"]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Compatibility wrapper for runtime configuration helpers.
|
|
2
|
+
|
|
3
|
+
Prefer ``glitchlings.conf`` for imports.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from .conf import (
|
|
9
|
+
CONFIG_ENV_VAR,
|
|
10
|
+
DEFAULT_CONFIG_PATH,
|
|
11
|
+
RuntimeConfig,
|
|
12
|
+
get_config,
|
|
13
|
+
reload_config,
|
|
14
|
+
reset_config,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"CONFIG_ENV_VAR",
|
|
19
|
+
"DEFAULT_CONFIG_PATH",
|
|
20
|
+
"RuntimeConfig",
|
|
21
|
+
"get_config",
|
|
22
|
+
"reload_config",
|
|
23
|
+
"reset_config",
|
|
24
|
+
]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from .keyboards import (
|
|
2
|
+
KEYNEIGHBORS,
|
|
3
|
+
SHIFT_MAPS,
|
|
4
|
+
KeyboardLayouts,
|
|
5
|
+
KeyNeighbors,
|
|
6
|
+
ShiftMap,
|
|
7
|
+
ShiftMaps,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"SAMPLE_TEXT",
|
|
12
|
+
"KeyboardLayouts",
|
|
13
|
+
"ShiftMap",
|
|
14
|
+
"ShiftMaps",
|
|
15
|
+
"KeyNeighbors",
|
|
16
|
+
"KEYNEIGHBORS",
|
|
17
|
+
"SHIFT_MAPS",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
SAMPLE_TEXT = (
|
|
21
|
+
"One morning, when Gregor Samsa woke from troubled dreams, he found himself "
|
|
22
|
+
"transformed in his bed into a horrible vermin. He lay on his armour-like back, and "
|
|
23
|
+
"if he lifted his head a little he could see his brown belly, slightly domed and "
|
|
24
|
+
"divided by arches into stiff sections. The bedding was hardly able to cover it and "
|
|
25
|
+
"seemed ready to slide off any moment. His many legs, pitifully thin compared with "
|
|
26
|
+
"the size of the rest of him, waved about helplessly as he looked."
|
|
27
|
+
)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Adapter helpers shared across Python and DLC integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from ..zoo import Gaggle, Glitchling, summon
|
|
9
|
+
from .transcripts import TranscriptTarget
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def coerce_gaggle(
|
|
13
|
+
glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
|
|
14
|
+
*,
|
|
15
|
+
seed: int,
|
|
16
|
+
apply_seed_to_existing: bool = False,
|
|
17
|
+
transcript_target: TranscriptTarget | None = None,
|
|
18
|
+
) -> Gaggle:
|
|
19
|
+
"""Return a :class:`Gaggle` built from any supported glitchling specifier.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
glitchlings: A single Glitchling, Gaggle, string specification, or iterable
|
|
23
|
+
of glitchlings/specs.
|
|
24
|
+
seed: Seed to use when constructing a new Gaggle from the input.
|
|
25
|
+
apply_seed_to_existing: When True, also apply the seed to an existing
|
|
26
|
+
Gaggle instance. When False (default), existing Gaggles keep their
|
|
27
|
+
current seed.
|
|
28
|
+
transcript_target: Which transcript turns to corrupt. When None (default),
|
|
29
|
+
uses the Gaggle default ("last"). Accepts:
|
|
30
|
+
- "last": corrupt only the last turn (default)
|
|
31
|
+
- "all": corrupt all turns
|
|
32
|
+
- "assistant": corrupt only assistant turns
|
|
33
|
+
- "user": corrupt only user turns
|
|
34
|
+
- int: corrupt a specific index (negative indexing supported)
|
|
35
|
+
- Sequence[int]: corrupt specific indices
|
|
36
|
+
"""
|
|
37
|
+
if isinstance(glitchlings, Gaggle):
|
|
38
|
+
if apply_seed_to_existing:
|
|
39
|
+
glitchlings.seed = seed
|
|
40
|
+
glitchlings.sort_glitchlings()
|
|
41
|
+
if transcript_target is not None:
|
|
42
|
+
glitchlings.transcript_target = transcript_target
|
|
43
|
+
return glitchlings
|
|
44
|
+
|
|
45
|
+
if isinstance(glitchlings, (Glitchling, str)):
|
|
46
|
+
resolved: Iterable[Any] = [glitchlings]
|
|
47
|
+
else:
|
|
48
|
+
resolved = glitchlings
|
|
49
|
+
|
|
50
|
+
# Validate entries before passing to summon to give better error messages
|
|
51
|
+
resolved_list = list(resolved)
|
|
52
|
+
for index, entry in enumerate(resolved_list):
|
|
53
|
+
if not isinstance(entry, (str, Glitchling)):
|
|
54
|
+
raise TypeError(
|
|
55
|
+
f"glitchlings sequence entries must be Glitchling instances "
|
|
56
|
+
f"or string specifications (index {index})"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
gaggle = summon(resolved_list, seed=seed)
|
|
60
|
+
if transcript_target is not None:
|
|
61
|
+
gaggle.transcript_target = transcript_target
|
|
62
|
+
return gaggle
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
__all__ = ["coerce_gaggle"]
|