glitchlings 1.0.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. glitchlings/__init__.py +101 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_corruption_engine/__init__.py +12 -0
  4. glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +184 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +819 -0
  21. glitchlings/attack/core_execution.py +378 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +211 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +338 -0
  27. glitchlings/attack/tokenizer_metrics.py +373 -0
  28. glitchlings/auggie.py +285 -0
  29. glitchlings/compat/__init__.py +9 -0
  30. glitchlings/compat/loaders.py +355 -0
  31. glitchlings/compat/types.py +41 -0
  32. glitchlings/conf/__init__.py +39 -0
  33. glitchlings/conf/loaders.py +331 -0
  34. glitchlings/conf/schema.py +156 -0
  35. glitchlings/conf/types.py +72 -0
  36. glitchlings/config.toml +2 -0
  37. glitchlings/constants.py +139 -0
  38. glitchlings/dev/__init__.py +3 -0
  39. glitchlings/dev/docs.py +45 -0
  40. glitchlings/dlc/__init__.py +21 -0
  41. glitchlings/dlc/_shared.py +300 -0
  42. glitchlings/dlc/gutenberg.py +400 -0
  43. glitchlings/dlc/huggingface.py +68 -0
  44. glitchlings/dlc/langchain.py +147 -0
  45. glitchlings/dlc/nemo.py +283 -0
  46. glitchlings/dlc/prime.py +215 -0
  47. glitchlings/dlc/pytorch.py +98 -0
  48. glitchlings/dlc/pytorch_lightning.py +173 -0
  49. glitchlings/internal/__init__.py +16 -0
  50. glitchlings/internal/rust.py +159 -0
  51. glitchlings/internal/rust_ffi.py +599 -0
  52. glitchlings/main.py +426 -0
  53. glitchlings/protocols.py +91 -0
  54. glitchlings/runtime_config.py +24 -0
  55. glitchlings/util/__init__.py +41 -0
  56. glitchlings/util/adapters.py +65 -0
  57. glitchlings/util/keyboards.py +508 -0
  58. glitchlings/util/transcripts.py +108 -0
  59. glitchlings/zoo/__init__.py +161 -0
  60. glitchlings/zoo/assets/__init__.py +29 -0
  61. glitchlings/zoo/core.py +852 -0
  62. glitchlings/zoo/core_execution.py +154 -0
  63. glitchlings/zoo/core_planning.py +451 -0
  64. glitchlings/zoo/corrupt_dispatch.py +291 -0
  65. glitchlings/zoo/hokey.py +139 -0
  66. glitchlings/zoo/jargoyle.py +301 -0
  67. glitchlings/zoo/mim1c.py +269 -0
  68. glitchlings/zoo/pedant/__init__.py +109 -0
  69. glitchlings/zoo/pedant/core.py +99 -0
  70. glitchlings/zoo/pedant/forms.py +50 -0
  71. glitchlings/zoo/pedant/stones.py +83 -0
  72. glitchlings/zoo/redactyl.py +94 -0
  73. glitchlings/zoo/rng.py +280 -0
  74. glitchlings/zoo/rushmore.py +416 -0
  75. glitchlings/zoo/scannequin.py +370 -0
  76. glitchlings/zoo/transforms.py +331 -0
  77. glitchlings/zoo/typogre.py +194 -0
  78. glitchlings/zoo/validation.py +643 -0
  79. glitchlings/zoo/wherewolf.py +120 -0
  80. glitchlings/zoo/zeedub.py +165 -0
  81. glitchlings-1.0.0.dist-info/METADATA +404 -0
  82. glitchlings-1.0.0.dist-info/RECORD +86 -0
  83. glitchlings-1.0.0.dist-info/WHEEL +5 -0
  84. glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
  85. glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
  86. glitchlings-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,101 @@
1
+ from .attack import (
2
+ Attack,
3
+ AttackResult,
4
+ GlitchlingComparisonEntry,
5
+ GlitchlingComparisonResult,
6
+ GridSearch,
7
+ GridSearchResult,
8
+ MetricName,
9
+ SeedSweep,
10
+ SeedSweepResult,
11
+ TokenizerComparison,
12
+ TokenizerComparisonResult,
13
+ compare_glitchlings,
14
+ compare_tokenizers,
15
+ )
16
+ from .auggie import Auggie
17
+ from .conf import AttackConfig, build_gaggle, load_attack_config
18
+ from .util import SAMPLE_TEXT
19
+ from .util.transcripts import TranscriptTarget
20
+ from .zoo import (
21
+ Gaggle,
22
+ Glitchling,
23
+ Hokey,
24
+ Jargoyle,
25
+ Mim1c,
26
+ Pedant,
27
+ Redactyl,
28
+ Rushmore,
29
+ RushmoreMode,
30
+ Scannequin,
31
+ Typogre,
32
+ Wherewolf,
33
+ Zeedub,
34
+ hokey,
35
+ jargoyle,
36
+ mim1c,
37
+ pedant,
38
+ plan_operations,
39
+ redactyl,
40
+ rushmore,
41
+ scannequin,
42
+ summon,
43
+ typogre,
44
+ wherewolf,
45
+ zeedub,
46
+ )
47
+
48
+ __version__ = "1.0.0"
49
+
50
+ __all__ = [
51
+ # Attack analysis
52
+ "Attack",
53
+ "AttackResult",
54
+ "SeedSweep",
55
+ "SeedSweepResult",
56
+ "GridSearch",
57
+ "GridSearchResult",
58
+ "TokenizerComparison",
59
+ "TokenizerComparisonResult",
60
+ # Comparison functions
61
+ "compare_glitchlings",
62
+ "compare_tokenizers",
63
+ "GlitchlingComparisonEntry",
64
+ "GlitchlingComparisonResult",
65
+ # Metrics
66
+ "MetricName",
67
+ # Builder
68
+ "Auggie",
69
+ # Glitchlings
70
+ "Typogre",
71
+ "typogre",
72
+ "Mim1c",
73
+ "mim1c",
74
+ "Jargoyle",
75
+ "jargoyle",
76
+ "Wherewolf",
77
+ "wherewolf",
78
+ "Hokey",
79
+ "hokey",
80
+ "Pedant",
81
+ "pedant",
82
+ "Redactyl",
83
+ "redactyl",
84
+ "Rushmore",
85
+ "rushmore",
86
+ "RushmoreMode",
87
+ "Scannequin",
88
+ "scannequin",
89
+ "Zeedub",
90
+ "zeedub",
91
+ "summon",
92
+ "Glitchling",
93
+ "Gaggle",
94
+ "plan_operations",
95
+ # Utilities
96
+ "SAMPLE_TEXT",
97
+ "AttackConfig",
98
+ "build_gaggle",
99
+ "load_attack_config",
100
+ "TranscriptTarget",
101
+ ]
@@ -0,0 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+
5
+ from .main import main
6
+
7
+ if __name__ == "__main__":
8
+ sys.exit(main())
@@ -0,0 +1,12 @@
1
+ """Compatibility wrapper for the compiled Rust extension."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+ from glitchlings.internal.rust import load_rust_module
8
+
9
+ _module = load_rust_module()
10
+
11
+ sys.modules.setdefault("_corruption_engine", _module)
12
+ sys.modules[__name__] = _module
@@ -0,0 +1,180 @@
1
+ """Shared asset helpers for Python and Rust consumers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass
7
+ from functools import cache
8
+ from hashlib import blake2b
9
+ from importlib import resources
10
+ from pathlib import Path
11
+ from typing import Any, BinaryIO, Iterable, Iterator, Literal, TextIO, cast
12
+
13
+ try:
14
+ from importlib.resources.abc import Traversable # Python 3.11+
15
+ except ImportError: # pragma: no cover - Python <3.11
16
+ from importlib_resources.abc import Traversable
17
+
18
+ AssetKind = Literal["copy", "compressed"]
19
+
20
+ _DEFAULT_DIGEST_SIZE = 32
21
+ _PIPELINE_MANIFEST_NAME = "pipeline_assets.json"
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class PipelineAsset:
26
+ """Descriptor for an asset staged into the Rust build."""
27
+
28
+ name: str
29
+ kind: AssetKind = "copy"
30
+ output: str | None = None
31
+
32
+ @property
33
+ def staged_name(self) -> str:
34
+ return self.output if self.output is not None else self.name
35
+
36
+
37
+ def _iter_asset_roots() -> Iterable[Traversable]:
38
+ """Yield candidate locations for the shared glitchling asset bundle."""
39
+
40
+ package_root: Traversable | None
41
+ try:
42
+ package_root = resources.files(__name__)
43
+ except ModuleNotFoundError: # pragma: no cover - defensive guard for install issues
44
+ package_root = None
45
+ if package_root is not None and package_root.is_dir():
46
+ yield package_root
47
+
48
+ repo_root = Path(__file__).resolve().parents[3] / "assets"
49
+ if repo_root.is_dir():
50
+ yield cast(Traversable, repo_root)
51
+
52
+
53
+ def _asset(name: str) -> Traversable:
54
+ asset_roots = list(_iter_asset_roots())
55
+ for root in asset_roots:
56
+ candidate = root.joinpath(name)
57
+ if candidate.is_file() or candidate.is_dir():
58
+ return candidate
59
+
60
+ searched = ", ".join(str(root.joinpath(name)) for root in asset_roots) or "<unavailable>"
61
+ raise FileNotFoundError(f"Asset '{name}' not found in: {searched}")
62
+
63
+
64
+ def read_text(name: str, *, encoding: str = "utf-8") -> str:
65
+ """Return the decoded contents of a bundled text asset."""
66
+
67
+ return cast(str, _asset(name).read_text(encoding=encoding))
68
+
69
+
70
+ def open_text(name: str, *, encoding: str = "utf-8") -> TextIO:
71
+ """Open a bundled text asset for reading."""
72
+
73
+ return cast(TextIO, _asset(name).open("r", encoding=encoding))
74
+
75
+
76
+ def open_binary(name: str) -> BinaryIO:
77
+ """Open a bundled binary asset for reading."""
78
+
79
+ return cast(BinaryIO, _asset(name).open("rb"))
80
+
81
+
82
+ def load_json(name: str, *, encoding: str = "utf-8") -> Any:
83
+ """Deserialize a JSON asset using the shared loader helpers."""
84
+
85
+ with open_text(name, encoding=encoding) as handle:
86
+ return json.load(handle)
87
+
88
+
89
+ def _iter_asset_files(root: Traversable, prefix: str = "") -> Iterator[tuple[str, Traversable]]:
90
+ """Yield file entries within an asset directory with deterministic ordering."""
91
+
92
+ entries = sorted(root.iterdir(), key=lambda entry: entry.name)
93
+ for entry in entries:
94
+ relative = f"{prefix}{entry.name}"
95
+ if entry.is_dir():
96
+ yield from _iter_asset_files(entry, prefix=f"{relative}/")
97
+ else:
98
+ yield relative, entry
99
+
100
+
101
+ def hash_asset(name: str) -> str:
102
+ """Return a BLAKE2b digest for the bundled asset ``name``."""
103
+
104
+ digest = blake2b(digest_size=_DEFAULT_DIGEST_SIZE)
105
+ asset = _asset(name)
106
+
107
+ if asset.is_dir():
108
+ for relative, entry in _iter_asset_files(asset):
109
+ digest.update(relative.encode("utf-8"))
110
+ with entry.open("rb") as handle:
111
+ for chunk in iter(lambda: handle.read(8192), b""):
112
+ digest.update(chunk)
113
+ return digest.hexdigest()
114
+
115
+ with asset.open("rb") as handle:
116
+ for chunk in iter(lambda: handle.read(8192), b""):
117
+ digest.update(chunk)
118
+ return digest.hexdigest()
119
+
120
+
121
+ @cache
122
+ def load_homophone_groups(name: str = "ekkokin_homophones.json") -> tuple[tuple[str, ...], ...]:
123
+ """Return the curated homophone sets bundled for the Wherewolf glitchling."""
124
+
125
+ data: list[list[str]] = load_json(name)
126
+ return tuple(tuple(group) for group in data)
127
+
128
+
129
+ def _parse_pipeline_manifest(raw: Any) -> tuple[PipelineAsset, ...]:
130
+ if not isinstance(raw, dict) or "pipeline_assets" not in raw:
131
+ raise ValueError("pipeline_assets manifest must be a mapping with a 'pipeline_assets' list")
132
+
133
+ entries = raw["pipeline_assets"]
134
+ if not isinstance(entries, list):
135
+ raise ValueError("pipeline_assets manifest must contain a list of entries")
136
+
137
+ specs: list[PipelineAsset] = []
138
+ for entry in entries:
139
+ if not isinstance(entry, dict):
140
+ raise ValueError("pipeline_assets entries must be objects with a name field")
141
+
142
+ name = entry.get("name")
143
+ if not isinstance(name, str) or not name:
144
+ raise ValueError("pipeline_assets entries must supply a non-empty name")
145
+
146
+ kind = cast(AssetKind, entry.get("kind", "copy"))
147
+ if kind not in ("copy", "compressed"):
148
+ raise ValueError(f"unsupported asset kind '{kind}' in pipeline manifest")
149
+
150
+ output = entry.get("output")
151
+ if output is not None and not isinstance(output, str):
152
+ raise ValueError("pipeline_assets output names must be strings when provided")
153
+
154
+ specs.append(PipelineAsset(name=name, kind=kind, output=output))
155
+
156
+ return tuple(specs)
157
+
158
+
159
+ @cache
160
+ def _load_pipeline_asset_specs() -> tuple[PipelineAsset, ...]:
161
+ manifest = load_json(_PIPELINE_MANIFEST_NAME)
162
+ return _parse_pipeline_manifest(manifest)
163
+
164
+
165
+ PIPELINE_ASSET_SPECS = _load_pipeline_asset_specs()
166
+ PIPELINE_ASSETS = frozenset(spec.name for spec in PIPELINE_ASSET_SPECS)
167
+
168
+
169
+ __all__ = [
170
+ "AssetKind",
171
+ "PipelineAsset",
172
+ "PIPELINE_ASSETS",
173
+ "PIPELINE_ASSET_SPECS",
174
+ "read_text",
175
+ "open_text",
176
+ "open_binary",
177
+ "load_json",
178
+ "hash_asset",
179
+ "load_homophone_groups",
180
+ ]
@@ -0,0 +1,32 @@
1
+ {
2
+ "\"": [
3
+ ["“", "”"],
4
+ ["„", "“"],
5
+ ["«", "»"],
6
+ ["‹", "›"],
7
+ ["『", "』"],
8
+ ["「", "」"],
9
+ ["﹁", "﹂"],
10
+ ["﹃", "﹄"],
11
+ ["〝", "〞"],
12
+ ["❝", "❞"]
13
+ ],
14
+ "'": [
15
+ ["‘", "’"],
16
+ ["‚", "‘"],
17
+ ["‹", "›"],
18
+ ["❮", "❯"],
19
+ ["❛", "❜"],
20
+ ["﹇", "﹈"]
21
+ ],
22
+ "`": [
23
+ ["‵", "′"],
24
+ ["﹁", "﹂"],
25
+ ["﹃", "﹄"],
26
+ ["⌈", "⌉"],
27
+ ["⌊", "⌋"],
28
+ ["⎡", "⎤"],
29
+ ["⎣", "⎦"],
30
+ ["〝", "〞"]
31
+ ]
32
+ }