glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +99 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +147 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +493 -0
- glitchlings/attack/core_execution.py +367 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +218 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +227 -0
- glitchlings/auggie.py +284 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +19 -0
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +490 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +27 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +356 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +678 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +243 -0
- glitchlings/zoo/mim1c.py +148 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +97 -0
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +66 -0
- glitchlings/zoo/transforms.py +346 -0
- glitchlings/zoo/typogre.py +128 -0
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +93 -0
- glitchlings-0.10.2.dist-info/METADATA +337 -0
- glitchlings-0.10.2.dist-info/RECORD +83 -0
- glitchlings-0.10.2.dist-info/WHEEL +5 -0
- glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
- glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.10.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Source Replacements (space-separated)
|
|
2
|
+
li h
|
|
3
|
+
h li
|
|
4
|
+
rn m
|
|
5
|
+
m rn
|
|
6
|
+
cl d
|
|
7
|
+
d cl
|
|
8
|
+
I l
|
|
9
|
+
l I 1
|
|
10
|
+
1 l I
|
|
11
|
+
0 O
|
|
12
|
+
O 0
|
|
13
|
+
B 8
|
|
14
|
+
8 B
|
|
15
|
+
S 5
|
|
16
|
+
5 S
|
|
17
|
+
Z 2
|
|
18
|
+
2 Z
|
|
19
|
+
G 6
|
|
20
|
+
6 G
|
|
21
|
+
“ "
|
|
22
|
+
” "
|
|
23
|
+
‘ '
|
|
24
|
+
’ '
|
|
25
|
+
— -
|
|
26
|
+
– -
|
|
27
|
+
vv w
|
|
28
|
+
w vv
|
|
29
|
+
ri n
|
|
30
|
+
n ri
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"pipeline_assets": [
|
|
3
|
+
{
|
|
4
|
+
"name": "apostrofae_pairs.json",
|
|
5
|
+
"kind": "copy"
|
|
6
|
+
},
|
|
7
|
+
{
|
|
8
|
+
"name": "ekkokin_homophones.json",
|
|
9
|
+
"kind": "copy"
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"name": "hokey_assets.json",
|
|
13
|
+
"kind": "copy"
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"name": "lexemes",
|
|
17
|
+
"kind": "copy"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"name": "ocr_confusions.tsv",
|
|
21
|
+
"kind": "copy"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "mim1c_homoglyphs.json.gz.b64",
|
|
25
|
+
"kind": "compressed",
|
|
26
|
+
"output": "mim1c_homoglyphs.json"
|
|
27
|
+
}
|
|
28
|
+
]
|
|
29
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Attack submodule for comparing text before and after corruption.
|
|
2
|
+
|
|
3
|
+
This module follows the functional purity architecture:
|
|
4
|
+
|
|
5
|
+
**Pure Planning** (core_planning.py):
|
|
6
|
+
- Input analysis and type guards
|
|
7
|
+
- Attack plan construction
|
|
8
|
+
- Result assembly helpers
|
|
9
|
+
|
|
10
|
+
**Impure Execution** (core_execution.py):
|
|
11
|
+
- Glitchling resolution
|
|
12
|
+
- Tokenization execution
|
|
13
|
+
- Metric computation
|
|
14
|
+
|
|
15
|
+
**Boundary Layer** (core.py):
|
|
16
|
+
- Input validation
|
|
17
|
+
- Orchestration via Attack class
|
|
18
|
+
|
|
19
|
+
**Analysis Tools** (analysis.py):
|
|
20
|
+
- SeedSweep, GridSearch, TokenizerComparison
|
|
21
|
+
|
|
22
|
+
See AGENTS.md "Functional Purity Architecture" for full details.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from .analysis import (
|
|
26
|
+
GlitchlingComparisonEntry,
|
|
27
|
+
GlitchlingComparisonResult,
|
|
28
|
+
GridSearch,
|
|
29
|
+
GridSearchPoint,
|
|
30
|
+
GridSearchResult,
|
|
31
|
+
SeedSweep,
|
|
32
|
+
SeedSweepResult,
|
|
33
|
+
TokenizerComparison,
|
|
34
|
+
TokenizerComparisonEntry,
|
|
35
|
+
TokenizerComparisonResult,
|
|
36
|
+
compare_glitchlings,
|
|
37
|
+
compare_tokenizers,
|
|
38
|
+
compute_aggregate_stats,
|
|
39
|
+
extract_scalar_metrics,
|
|
40
|
+
format_stats_summary,
|
|
41
|
+
generate_param_combinations,
|
|
42
|
+
rank_grid_points,
|
|
43
|
+
)
|
|
44
|
+
from .core import Attack, AttackResult
|
|
45
|
+
from .core_execution import (
|
|
46
|
+
execute_attack,
|
|
47
|
+
execute_corruption,
|
|
48
|
+
execute_metrics,
|
|
49
|
+
execute_tokenization,
|
|
50
|
+
get_default_metrics,
|
|
51
|
+
resolve_glitchlings,
|
|
52
|
+
)
|
|
53
|
+
from .core_planning import (
|
|
54
|
+
AttackPlan,
|
|
55
|
+
BatchAdapter,
|
|
56
|
+
EncodedData,
|
|
57
|
+
ResultPlan,
|
|
58
|
+
assemble_batch_result_fields,
|
|
59
|
+
assemble_empty_result_fields,
|
|
60
|
+
assemble_result_fields,
|
|
61
|
+
assemble_single_result_fields,
|
|
62
|
+
compute_token_counts,
|
|
63
|
+
extract_transcript_contents,
|
|
64
|
+
format_token_count_delta,
|
|
65
|
+
is_string_batch,
|
|
66
|
+
is_transcript_like,
|
|
67
|
+
plan_attack,
|
|
68
|
+
plan_result,
|
|
69
|
+
)
|
|
70
|
+
from .encode import describe_tokenizer, encode_batch, encode_single
|
|
71
|
+
from .metrics import (
|
|
72
|
+
MetricName,
|
|
73
|
+
entropy_delta,
|
|
74
|
+
jensen_shannon_divergence,
|
|
75
|
+
merge_split_index,
|
|
76
|
+
normalized_edit_distance,
|
|
77
|
+
subsequence_retention,
|
|
78
|
+
)
|
|
79
|
+
from .metrics_dispatch import TokenBatch, TokenSequence, is_batch, validate_batch_consistency
|
|
80
|
+
from .tokenization import Tokenizer, list_available_tokenizers
|
|
81
|
+
|
|
82
|
+
__all__ = [
|
|
83
|
+
# Core orchestration
|
|
84
|
+
"Attack",
|
|
85
|
+
"AttackResult",
|
|
86
|
+
"Tokenizer",
|
|
87
|
+
"list_available_tokenizers",
|
|
88
|
+
# Metrics
|
|
89
|
+
"MetricName",
|
|
90
|
+
"jensen_shannon_divergence",
|
|
91
|
+
"normalized_edit_distance",
|
|
92
|
+
"subsequence_retention",
|
|
93
|
+
"entropy_delta",
|
|
94
|
+
"merge_split_index",
|
|
95
|
+
# Analysis tools (impure orchestrators)
|
|
96
|
+
"SeedSweep",
|
|
97
|
+
"SeedSweepResult",
|
|
98
|
+
"GridSearch",
|
|
99
|
+
"GridSearchResult",
|
|
100
|
+
"GridSearchPoint",
|
|
101
|
+
"TokenizerComparison",
|
|
102
|
+
"TokenizerComparisonResult",
|
|
103
|
+
"TokenizerComparisonEntry",
|
|
104
|
+
# Comparison functions
|
|
105
|
+
"compare_glitchlings",
|
|
106
|
+
"compare_tokenizers",
|
|
107
|
+
"GlitchlingComparisonEntry",
|
|
108
|
+
"GlitchlingComparisonResult",
|
|
109
|
+
# Analysis pure helpers
|
|
110
|
+
"compute_aggregate_stats",
|
|
111
|
+
"format_stats_summary",
|
|
112
|
+
"extract_scalar_metrics",
|
|
113
|
+
"generate_param_combinations",
|
|
114
|
+
"rank_grid_points",
|
|
115
|
+
# Core planning (pure)
|
|
116
|
+
"AttackPlan",
|
|
117
|
+
"BatchAdapter",
|
|
118
|
+
"ResultPlan",
|
|
119
|
+
"EncodedData",
|
|
120
|
+
"plan_attack",
|
|
121
|
+
"plan_result",
|
|
122
|
+
"is_string_batch",
|
|
123
|
+
"is_transcript_like",
|
|
124
|
+
"assemble_result_fields",
|
|
125
|
+
"assemble_single_result_fields",
|
|
126
|
+
"assemble_batch_result_fields",
|
|
127
|
+
"assemble_empty_result_fields",
|
|
128
|
+
"compute_token_counts",
|
|
129
|
+
"extract_transcript_contents",
|
|
130
|
+
"format_token_count_delta",
|
|
131
|
+
# Core execution (impure)
|
|
132
|
+
"get_default_metrics",
|
|
133
|
+
"resolve_glitchlings",
|
|
134
|
+
"execute_corruption",
|
|
135
|
+
"execute_tokenization",
|
|
136
|
+
"execute_metrics",
|
|
137
|
+
"execute_attack",
|
|
138
|
+
# Encode (pure)
|
|
139
|
+
"describe_tokenizer",
|
|
140
|
+
"encode_batch",
|
|
141
|
+
"encode_single",
|
|
142
|
+
# Metrics dispatch (pure)
|
|
143
|
+
"TokenBatch",
|
|
144
|
+
"TokenSequence",
|
|
145
|
+
"is_batch",
|
|
146
|
+
"validate_batch_consistency",
|
|
147
|
+
]
|