glitchlings 1.0.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. glitchlings/__init__.py +101 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_corruption_engine/__init__.py +12 -0
  4. glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +184 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +819 -0
  21. glitchlings/attack/core_execution.py +378 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +211 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +338 -0
  27. glitchlings/attack/tokenizer_metrics.py +373 -0
  28. glitchlings/auggie.py +285 -0
  29. glitchlings/compat/__init__.py +9 -0
  30. glitchlings/compat/loaders.py +355 -0
  31. glitchlings/compat/types.py +41 -0
  32. glitchlings/conf/__init__.py +39 -0
  33. glitchlings/conf/loaders.py +331 -0
  34. glitchlings/conf/schema.py +156 -0
  35. glitchlings/conf/types.py +72 -0
  36. glitchlings/config.toml +2 -0
  37. glitchlings/constants.py +139 -0
  38. glitchlings/dev/__init__.py +3 -0
  39. glitchlings/dev/docs.py +45 -0
  40. glitchlings/dlc/__init__.py +21 -0
  41. glitchlings/dlc/_shared.py +300 -0
  42. glitchlings/dlc/gutenberg.py +400 -0
  43. glitchlings/dlc/huggingface.py +68 -0
  44. glitchlings/dlc/langchain.py +147 -0
  45. glitchlings/dlc/nemo.py +283 -0
  46. glitchlings/dlc/prime.py +215 -0
  47. glitchlings/dlc/pytorch.py +98 -0
  48. glitchlings/dlc/pytorch_lightning.py +173 -0
  49. glitchlings/internal/__init__.py +16 -0
  50. glitchlings/internal/rust.py +159 -0
  51. glitchlings/internal/rust_ffi.py +599 -0
  52. glitchlings/main.py +426 -0
  53. glitchlings/protocols.py +91 -0
  54. glitchlings/runtime_config.py +24 -0
  55. glitchlings/util/__init__.py +41 -0
  56. glitchlings/util/adapters.py +65 -0
  57. glitchlings/util/keyboards.py +508 -0
  58. glitchlings/util/transcripts.py +108 -0
  59. glitchlings/zoo/__init__.py +161 -0
  60. glitchlings/zoo/assets/__init__.py +29 -0
  61. glitchlings/zoo/core.py +852 -0
  62. glitchlings/zoo/core_execution.py +154 -0
  63. glitchlings/zoo/core_planning.py +451 -0
  64. glitchlings/zoo/corrupt_dispatch.py +291 -0
  65. glitchlings/zoo/hokey.py +139 -0
  66. glitchlings/zoo/jargoyle.py +301 -0
  67. glitchlings/zoo/mim1c.py +269 -0
  68. glitchlings/zoo/pedant/__init__.py +109 -0
  69. glitchlings/zoo/pedant/core.py +99 -0
  70. glitchlings/zoo/pedant/forms.py +50 -0
  71. glitchlings/zoo/pedant/stones.py +83 -0
  72. glitchlings/zoo/redactyl.py +94 -0
  73. glitchlings/zoo/rng.py +280 -0
  74. glitchlings/zoo/rushmore.py +416 -0
  75. glitchlings/zoo/scannequin.py +370 -0
  76. glitchlings/zoo/transforms.py +331 -0
  77. glitchlings/zoo/typogre.py +194 -0
  78. glitchlings/zoo/validation.py +643 -0
  79. glitchlings/zoo/wherewolf.py +120 -0
  80. glitchlings/zoo/zeedub.py +165 -0
  81. glitchlings-1.0.0.dist-info/METADATA +404 -0
  82. glitchlings-1.0.0.dist-info/RECORD +86 -0
  83. glitchlings-1.0.0.dist-info/WHEEL +5 -0
  84. glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
  85. glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
  86. glitchlings-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,173 @@
1
+ """Integration helpers for PyTorch Lightning data modules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterable, Mapping, Sequence
6
+ from typing import Any, cast
7
+
8
+ from ..compat.loaders import get_pytorch_lightning_datamodule
9
+ from ..util.adapters import coerce_gaggle
10
+ from ..zoo import Gaggle, Glitchling
11
+ from ._shared import normalize_column_spec, wrap_dataloader
12
+
13
+
14
+ def _glitch_datamodule(
15
+ datamodule: Any,
16
+ glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
17
+ column: str | Sequence[str],
18
+ *,
19
+ seed: int = 151,
20
+ ) -> Any:
21
+ """Return a proxy that applies glitchlings to batches from the datamodule."""
22
+
23
+ columns = normalize_column_spec(column)
24
+ if columns is None: # pragma: no cover - defensive
25
+ raise ValueError("At least one column must be specified")
26
+ # Lightning datamodules only support string column names (mapping keys)
27
+ columns_str = cast(list[str], columns)
28
+ gaggle = coerce_gaggle(glitchlings, seed=seed)
29
+
30
+ return _GlitchedLightningDataModule(datamodule, columns_str, gaggle)
31
+
32
+
33
+ def GlitchedLightningDataModule(
34
+ datamodule: Any,
35
+ glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
36
+ *,
37
+ column: str | Sequence[str],
38
+ seed: int = 151,
39
+ ) -> Any:
40
+ """Return a glitched wrapper around a PyTorch Lightning LightningDataModule.
41
+
42
+ This function wraps a LightningDataModule to apply glitchlings to specified
43
+ columns in batches yielded by the module's dataloaders.
44
+
45
+ Args:
46
+ datamodule: The LightningDataModule to wrap.
47
+ glitchlings: A glitchling, gaggle, or specification of glitchlings to apply.
48
+ column: The column name (string) or names (sequence of strings) to corrupt.
49
+ seed: RNG seed for deterministic corruption (default: 151).
50
+
51
+ Returns:
52
+ A wrapped datamodule that yields corrupted batches from its dataloaders.
53
+
54
+ Example:
55
+ >>> from pytorch_lightning import LightningDataModule
56
+ >>> from glitchlings.dlc.pytorch_lightning import GlitchedLightningDataModule
57
+ >>> class MyDataModule(LightningDataModule):
58
+ ... def train_dataloader(self):
59
+ ... return [{"text": "hello", "label": 0}]
60
+ >>> dm = MyDataModule()
61
+ >>> glitched = GlitchedLightningDataModule(dm, "typogre", column="text")
62
+ >>> batches = list(glitched.train_dataloader())
63
+ """
64
+ return _glitch_datamodule(datamodule, glitchlings, column, seed=seed)
65
+
66
+
67
+ class _GlitchedLightningDataModule:
68
+ """Proxy wrapper around a LightningDataModule applying glitchlings to batches."""
69
+
70
+ def __init__(self, base: Any, columns: list[str], gaggle: Gaggle) -> None:
71
+ object.__setattr__(self, "_glitch_base", base)
72
+ object.__setattr__(self, "_glitch_columns", columns)
73
+ object.__setattr__(self, "_glitch_gaggle", gaggle)
74
+
75
+ def __getattr__(self, attribute: str) -> Any:
76
+ return getattr(self._glitch_base, attribute)
77
+
78
+ def __setattr__(self, attribute: str, value: Any) -> None:
79
+ if attribute.startswith("_glitch_"):
80
+ object.__setattr__(self, attribute, value)
81
+ else:
82
+ setattr(self._glitch_base, attribute, value)
83
+
84
+ def __delattr__(self, attribute: str) -> None:
85
+ if attribute.startswith("_glitch_"):
86
+ object.__delattr__(self, attribute)
87
+ else:
88
+ delattr(self._glitch_base, attribute)
89
+
90
+ def __dir__(self) -> list[str]:
91
+ return sorted(set(dir(self.__class__)) | set(dir(self._glitch_base)))
92
+
93
+ # LightningDataModule API -------------------------------------------------
94
+ def prepare_data(self, *args: Any, **kwargs: Any) -> Any:
95
+ return self._glitch_base.prepare_data(*args, **kwargs)
96
+
97
+ def setup(self, *args: Any, **kwargs: Any) -> Any:
98
+ return self._glitch_base.setup(*args, **kwargs)
99
+
100
+ def teardown(self, *args: Any, **kwargs: Any) -> Any:
101
+ return self._glitch_base.teardown(*args, **kwargs)
102
+
103
+ def state_dict(self) -> Mapping[str, Any]:
104
+ state = self._glitch_base.state_dict()
105
+ return cast(Mapping[str, Any], state)
106
+
107
+ def load_state_dict(self, state_dict: Mapping[str, Any]) -> None:
108
+ self._glitch_base.load_state_dict(state_dict)
109
+
110
+ def transfer_batch_to_device(self, batch: Any, device: Any, dataloader_idx: int) -> Any:
111
+ return self._glitch_base.transfer_batch_to_device(batch, device, dataloader_idx)
112
+
113
+ def on_before_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
114
+ return self._glitch_base.on_before_batch_transfer(batch, dataloader_idx)
115
+
116
+ def on_after_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
117
+ return self._glitch_base.on_after_batch_transfer(batch, dataloader_idx)
118
+
119
+ def train_dataloader(self, *args: Any, **kwargs: Any) -> Any:
120
+ loader = self._glitch_base.train_dataloader(*args, **kwargs)
121
+ return wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
122
+
123
+ def val_dataloader(self, *args: Any, **kwargs: Any) -> Any:
124
+ loader = self._glitch_base.val_dataloader(*args, **kwargs)
125
+ return wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
126
+
127
+ def test_dataloader(self, *args: Any, **kwargs: Any) -> Any:
128
+ loader = self._glitch_base.test_dataloader(*args, **kwargs)
129
+ return wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
130
+
131
+ def predict_dataloader(self, *args: Any, **kwargs: Any) -> Any:
132
+ loader = self._glitch_base.predict_dataloader(*args, **kwargs)
133
+ return wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
134
+
135
+
136
+ # Module initialization: set up inheritance from LightningDataModule if available
137
+ def _setup_inheritance() -> None:
138
+ """Set up _GlitchedLightningDataModule to inherit from LightningDataModule.
139
+
140
+ This function is called once at module import time to dynamically set the base
141
+ class of _GlitchedLightningDataModule to inherit from
142
+ pytorch_lightning.LightningDataModule when available. This ensures that
143
+ isinstance(glitched, LightningDataModule) checks work correctly and that the
144
+ wrapper interoperates with Lightning APIs that require that type.
145
+ """
146
+ datamodule_cls = get_pytorch_lightning_datamodule()
147
+ if datamodule_cls is None:
148
+ # If LightningDataModule is not available, keep as plain object
149
+ return
150
+
151
+ # Try to dynamically set __bases__ to inherit from LightningDataModule
152
+ try:
153
+ _GlitchedLightningDataModule.__bases__ = (datamodule_cls,)
154
+ except TypeError:
155
+ # If we can't modify __bases__ (e.g., due to __slots__), create a new class
156
+ namespace = {
157
+ name: value
158
+ for name, value in vars(_GlitchedLightningDataModule).items()
159
+ if name not in {"__dict__", "__weakref__"}
160
+ }
161
+ replacement = cast(
162
+ type[Any],
163
+ type("_GlitchedLightningDataModule", (datamodule_cls,), namespace),
164
+ )
165
+ # Update the module's global namespace
166
+ globals()["_GlitchedLightningDataModule"] = replacement
167
+
168
+
169
+ # Set up inheritance at module import time
170
+ _setup_inheritance()
171
+
172
+
173
+ __all__ = ["GlitchedLightningDataModule"]
@@ -0,0 +1,16 @@
1
+ """Internal utilities shared across the glitchlings package.
2
+
3
+ This subpackage contains impure modules that handle side effects:
4
+
5
+ - ``rust.py``: Low-level Rust extension loader and FFI primitives
6
+ - ``rust_ffi.py``: High-level Rust operation wrappers (preferred entry point)
7
+
8
+ Pure modules should NOT import from this package. Use the operations in
9
+ ``rust_ffi.py`` at boundary layers only.
10
+
11
+ See AGENTS.md "Functional Purity Architecture" for details.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ __all__ = []
@@ -0,0 +1,159 @@
1
+ """Shared helpers for loading the compiled Rust extension."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ import sys
7
+ from importlib import machinery, util
8
+ from pathlib import Path
9
+ from types import ModuleType
10
+ from typing import Any, Callable, Mapping, MutableMapping, cast
11
+
12
+ _EXTENSION_STEM = "_corruption_engine"
13
+
14
+
15
+ class RustExtensionImportError(RuntimeError):
16
+ """Raised when the compiled Rust extension cannot be imported."""
17
+
18
+
19
+ def _iter_extension_candidates() -> tuple[Path, ...]:
20
+ """Return likely paths for the compiled extension within the package."""
21
+
22
+ package_root = Path(__file__).resolve().parents[1]
23
+ extension_dir = package_root / _EXTENSION_STEM
24
+ search_roots = (extension_dir, package_root)
25
+
26
+ candidates: list[Path] = []
27
+ for root in search_roots:
28
+ for suffix in machinery.EXTENSION_SUFFIXES:
29
+ candidate = (root / _EXTENSION_STEM).with_suffix(suffix)
30
+ if candidate.exists():
31
+ candidates.append(candidate)
32
+ return tuple(candidates)
33
+
34
+
35
+ def _existing_compiled_module() -> ModuleType | None:
36
+ """Return a previously loaded compiled module if one is present."""
37
+
38
+ for name in ("glitchlings._corruption_engine", "_corruption_engine"):
39
+ module = sys.modules.get(name)
40
+ if module is None:
41
+ continue
42
+ module_file = getattr(module, "__file__", "")
43
+ if module_file and not str(module_file).endswith("__init__.py"):
44
+ return module
45
+ return None
46
+
47
+
48
+ def _load_extension_from_disk() -> ModuleType:
49
+ """Load the compiled extension from disk or raise if unavailable."""
50
+
51
+ candidates = _iter_extension_candidates()
52
+ for candidate in candidates:
53
+ spec = util.spec_from_file_location(_EXTENSION_STEM, candidate)
54
+ if spec is None or spec.loader is None:
55
+ continue
56
+ module = util.module_from_spec(spec)
57
+ spec.loader.exec_module(module)
58
+ return module
59
+
60
+ searched = ", ".join(str(path) for path in candidates) or "<unavailable>"
61
+ message = (
62
+ "glitchlings._corruption_engine failed to import. Rebuild the project with"
63
+ "`pip install .` or `maturin develop` so the compiled extension is available "
64
+ f"(searched: {searched})."
65
+ )
66
+ raise RustExtensionImportError(message)
67
+
68
+
69
+ def load_rust_module() -> ModuleType:
70
+ """Return the compiled Rust module, loading it on demand."""
71
+
72
+ existing = _existing_compiled_module()
73
+ if existing is not None:
74
+ return existing
75
+
76
+ module = _load_extension_from_disk()
77
+ sys.modules.setdefault("glitchlings._corruption_engine", module)
78
+ sys.modules.setdefault("_corruption_engine", module)
79
+ return module
80
+
81
+
82
+ _RUST_MODULE: ModuleType | None = None
83
+ _OPERATION_CACHE: MutableMapping[str, Callable[..., Any]] = {}
84
+
85
+
86
+ def _get_rust_module() -> ModuleType:
87
+ """Return the compiled Rust module, importing it on first use."""
88
+
89
+ global _RUST_MODULE
90
+
91
+ if _RUST_MODULE is None:
92
+ _RUST_MODULE = load_rust_module()
93
+
94
+ return _RUST_MODULE
95
+
96
+
97
+ def _build_missing_operation_error(name: str) -> RuntimeError:
98
+ message = (
99
+ "Rust operation '{name}' is not exported by glitchlings._corruption_engine."
100
+ "Rebuild the project to refresh the compiled extension."
101
+ )
102
+ return RuntimeError(message.format(name=name))
103
+
104
+
105
+ def resolve_seed(seed: int | None, rng: random.Random | None) -> int:
106
+ """Resolve a 64-bit seed using an optional RNG."""
107
+
108
+ if seed is not None:
109
+ return int(seed) & 0xFFFFFFFFFFFFFFFF
110
+ if rng is not None:
111
+ return rng.getrandbits(64)
112
+ return random.getrandbits(64)
113
+
114
+
115
+ def get_rust_operation(operation_name: str) -> Callable[..., Any]:
116
+ """Return a callable exported by :mod:`glitchlings._corruption_engine`.
117
+
118
+ Parameters
119
+ ----------
120
+ operation_name : str
121
+ Name of the function to retrieve from the compiled extension.
122
+
123
+ Raises
124
+ ------
125
+ RuntimeError
126
+ If the operation cannot be located or is not callable.
127
+ """
128
+
129
+ operation = _OPERATION_CACHE.get(operation_name)
130
+ if operation is not None:
131
+ return operation
132
+
133
+ module = _get_rust_module()
134
+ try:
135
+ candidate = getattr(module, operation_name)
136
+ except AttributeError as exc:
137
+ raise _build_missing_operation_error(operation_name) from exc
138
+
139
+ if not callable(candidate):
140
+ raise _build_missing_operation_error(operation_name)
141
+
142
+ operation = cast(Callable[..., Any], candidate)
143
+ _OPERATION_CACHE[operation_name] = operation
144
+ return operation
145
+
146
+
147
+ def preload_operations(*operation_names: str) -> Mapping[str, Callable[..., Any]]:
148
+ """Eagerly load multiple Rust operations at once."""
149
+
150
+ return {name: get_rust_operation(name) for name in operation_names}
151
+
152
+
153
+ __all__ = [
154
+ "RustExtensionImportError",
155
+ "get_rust_operation",
156
+ "load_rust_module",
157
+ "preload_operations",
158
+ "resolve_seed",
159
+ ]