glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +36 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +53 -0
- glitchlings/attack/compose.py +299 -0
- glitchlings/attack/core.py +465 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +104 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +157 -0
- glitchlings/auggie.py +283 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +17 -3
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +37 -65
- glitchlings/dlc/prime.py +55 -114
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +432 -0
- glitchlings/main.py +123 -32
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +29 -176
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +311 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +47 -24
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +301 -167
- glitchlings/zoo/core_execution.py +98 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/ekkokin.py +118 -0
- glitchlings/zoo/hokey.py +137 -0
- glitchlings/zoo/jargoyle.py +179 -274
- glitchlings/zoo/mim1c.py +106 -68
- glitchlings/zoo/pedant/__init__.py +107 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +44 -175
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +359 -116
- glitchlings/zoo/scannequin.py +18 -125
- glitchlings/zoo/transforms.py +386 -0
- glitchlings/zoo/typogre.py +76 -162
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/zeedub.py +33 -86
- glitchlings-0.9.3.dist-info/METADATA +334 -0
- glitchlings-0.9.3.dist-info/RECORD +80 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
- glitchlings/zoo/_ocr_confusions.py +0 -34
- glitchlings/zoo/_rate.py +0 -21
- glitchlings/zoo/reduple.py +0 -169
- glitchlings-0.2.5.dist-info/METADATA +0 -490
- glitchlings-0.2.5.dist-info/RECORD +0 -27
- /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Internal utilities shared across the glitchlings package.
|
|
2
|
+
|
|
3
|
+
This subpackage contains impure modules that handle side effects:
|
|
4
|
+
|
|
5
|
+
- ``rust.py``: Low-level Rust extension loader and FFI primitives
|
|
6
|
+
- ``rust_ffi.py``: High-level Rust operation wrappers (preferred entry point)
|
|
7
|
+
|
|
8
|
+
Pure modules should NOT import from this package. Use the operations in
|
|
9
|
+
``rust_ffi.py`` at boundary layers only.
|
|
10
|
+
|
|
11
|
+
See AGENTS.md "Functional Purity Architecture" for details.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
__all__ = []
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Shared helpers for loading the compiled Rust extension."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
import sys
|
|
7
|
+
from importlib import machinery, util
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from types import ModuleType
|
|
10
|
+
from typing import Any, Callable, Mapping, MutableMapping, cast
|
|
11
|
+
|
|
12
|
+
_EXTENSION_STEM = "_zoo_rust"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RustExtensionImportError(RuntimeError):
|
|
16
|
+
"""Raised when the compiled Rust extension cannot be imported."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _iter_extension_candidates() -> tuple[Path, ...]:
|
|
20
|
+
"""Return likely paths for the compiled extension within the package."""
|
|
21
|
+
|
|
22
|
+
package_root = Path(__file__).resolve().parents[1]
|
|
23
|
+
extension_dir = package_root / _EXTENSION_STEM
|
|
24
|
+
search_roots = (extension_dir, package_root)
|
|
25
|
+
|
|
26
|
+
candidates: list[Path] = []
|
|
27
|
+
for root in search_roots:
|
|
28
|
+
for suffix in machinery.EXTENSION_SUFFIXES:
|
|
29
|
+
candidate = (root / _EXTENSION_STEM).with_suffix(suffix)
|
|
30
|
+
if candidate.exists():
|
|
31
|
+
candidates.append(candidate)
|
|
32
|
+
return tuple(candidates)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _existing_compiled_module() -> ModuleType | None:
|
|
36
|
+
"""Return a previously loaded compiled module if one is present."""
|
|
37
|
+
|
|
38
|
+
for name in ("glitchlings._zoo_rust", "_zoo_rust"):
|
|
39
|
+
module = sys.modules.get(name)
|
|
40
|
+
if module is None:
|
|
41
|
+
continue
|
|
42
|
+
module_file = getattr(module, "__file__", "")
|
|
43
|
+
if module_file and not str(module_file).endswith("__init__.py"):
|
|
44
|
+
return module
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _load_extension_from_disk() -> ModuleType:
|
|
49
|
+
"""Load the compiled extension from disk or raise if unavailable."""
|
|
50
|
+
|
|
51
|
+
candidates = _iter_extension_candidates()
|
|
52
|
+
for candidate in candidates:
|
|
53
|
+
spec = util.spec_from_file_location(_EXTENSION_STEM, candidate)
|
|
54
|
+
if spec is None or spec.loader is None:
|
|
55
|
+
continue
|
|
56
|
+
module = util.module_from_spec(spec)
|
|
57
|
+
spec.loader.exec_module(module)
|
|
58
|
+
return module
|
|
59
|
+
|
|
60
|
+
searched = ", ".join(str(path) for path in candidates) or "<unavailable>"
|
|
61
|
+
message = (
|
|
62
|
+
"glitchlings._zoo_rust failed to import. Rebuild the project with "
|
|
63
|
+
"`pip install .` or `maturin develop` so the compiled extension is available "
|
|
64
|
+
f"(searched: {searched})."
|
|
65
|
+
)
|
|
66
|
+
raise RustExtensionImportError(message)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def load_rust_module() -> ModuleType:
|
|
70
|
+
"""Return the compiled Rust module, loading it on demand."""
|
|
71
|
+
|
|
72
|
+
existing = _existing_compiled_module()
|
|
73
|
+
if existing is not None:
|
|
74
|
+
return existing
|
|
75
|
+
|
|
76
|
+
module = _load_extension_from_disk()
|
|
77
|
+
sys.modules.setdefault("glitchlings._zoo_rust", module)
|
|
78
|
+
sys.modules.setdefault("_zoo_rust", module)
|
|
79
|
+
return module
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
_RUST_MODULE: ModuleType | None = None
|
|
83
|
+
_OPERATION_CACHE: MutableMapping[str, Callable[..., Any]] = {}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _get_rust_module() -> ModuleType:
|
|
87
|
+
"""Return the compiled Rust module, importing it on first use."""
|
|
88
|
+
|
|
89
|
+
global _RUST_MODULE
|
|
90
|
+
|
|
91
|
+
if _RUST_MODULE is None:
|
|
92
|
+
_RUST_MODULE = load_rust_module()
|
|
93
|
+
|
|
94
|
+
return _RUST_MODULE
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _build_missing_operation_error(name: str) -> RuntimeError:
|
|
98
|
+
message = (
|
|
99
|
+
"Rust operation '{name}' is not exported by glitchlings._zoo_rust. "
|
|
100
|
+
"Rebuild the project to refresh the compiled extension."
|
|
101
|
+
)
|
|
102
|
+
return RuntimeError(message.format(name=name))
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def resolve_seed(seed: int | None, rng: random.Random | None) -> int:
|
|
106
|
+
"""Resolve a 64-bit seed using an optional RNG."""
|
|
107
|
+
|
|
108
|
+
if seed is not None:
|
|
109
|
+
return int(seed) & 0xFFFFFFFFFFFFFFFF
|
|
110
|
+
if rng is not None:
|
|
111
|
+
return rng.getrandbits(64)
|
|
112
|
+
return random.getrandbits(64)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_rust_operation(operation_name: str) -> Callable[..., Any]:
|
|
116
|
+
"""Return a callable exported by :mod:`glitchlings._zoo_rust`.
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
operation_name : str
|
|
121
|
+
Name of the function to retrieve from the compiled extension.
|
|
122
|
+
|
|
123
|
+
Raises
|
|
124
|
+
------
|
|
125
|
+
RuntimeError
|
|
126
|
+
If the operation cannot be located or is not callable.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
operation = _OPERATION_CACHE.get(operation_name)
|
|
130
|
+
if operation is not None:
|
|
131
|
+
return operation
|
|
132
|
+
|
|
133
|
+
module = _get_rust_module()
|
|
134
|
+
try:
|
|
135
|
+
candidate = getattr(module, operation_name)
|
|
136
|
+
except AttributeError as exc:
|
|
137
|
+
raise _build_missing_operation_error(operation_name) from exc
|
|
138
|
+
|
|
139
|
+
if not callable(candidate):
|
|
140
|
+
raise _build_missing_operation_error(operation_name)
|
|
141
|
+
|
|
142
|
+
operation = cast(Callable[..., Any], candidate)
|
|
143
|
+
_OPERATION_CACHE[operation_name] = operation
|
|
144
|
+
return operation
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def preload_operations(*operation_names: str) -> Mapping[str, Callable[..., Any]]:
|
|
148
|
+
"""Eagerly load multiple Rust operations at once."""
|
|
149
|
+
|
|
150
|
+
return {name: get_rust_operation(name) for name in operation_names}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
__all__ = [
|
|
154
|
+
"RustExtensionImportError",
|
|
155
|
+
"get_rust_operation",
|
|
156
|
+
"load_rust_module",
|
|
157
|
+
"preload_operations",
|
|
158
|
+
"resolve_seed",
|
|
159
|
+
]
|
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
"""Centralized Rust FFI operations module.
|
|
2
|
+
|
|
3
|
+
This module is the **single entry point** for all Rust FFI calls in the codebase.
|
|
4
|
+
All glitchling transformations that delegate to Rust must go through this module.
|
|
5
|
+
|
|
6
|
+
**Design Philosophy:**
|
|
7
|
+
|
|
8
|
+
This module is explicitly *impure* - it loads and invokes compiled Rust functions
|
|
9
|
+
which are stateful operations. By centralizing all FFI here:
|
|
10
|
+
|
|
11
|
+
1. Pure modules (validation.py, transforms.py, rng.py) never import Rust
|
|
12
|
+
2. The Rust dependency is explicit and traceable
|
|
13
|
+
3. Testing can mock this module to verify Python-only paths
|
|
14
|
+
4. Side effects from FFI are isolated to one location
|
|
15
|
+
|
|
16
|
+
**Usage Pattern:**
|
|
17
|
+
|
|
18
|
+
# In a glitchling module (e.g., typogre.py)
|
|
19
|
+
from glitchlings.internal.rust_ffi import fatfinger_rust
|
|
20
|
+
|
|
21
|
+
def fatfinger(text: str, rate: float, ...) -> str:
|
|
22
|
+
# ... validation and setup ...
|
|
23
|
+
return fatfinger_rust(text, rate, layout, seed)
|
|
24
|
+
|
|
25
|
+
See AGENTS.md "Functional Purity Architecture" for full details.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from typing import Any, Literal, Mapping, Sequence, cast
|
|
31
|
+
|
|
32
|
+
from .rust import get_rust_operation, resolve_seed
|
|
33
|
+
|
|
34
|
+
# Re-export resolve_seed for backward compatibility
|
|
35
|
+
__all__ = [
|
|
36
|
+
# Seed resolution (re-exported from rust.py)
|
|
37
|
+
"resolve_seed",
|
|
38
|
+
# Orchestration operations
|
|
39
|
+
"plan_glitchlings_rust",
|
|
40
|
+
"compose_glitchlings_rust",
|
|
41
|
+
# Character-level operations
|
|
42
|
+
"fatfinger_rust",
|
|
43
|
+
"slip_modifier_rust",
|
|
44
|
+
"mim1c_rust",
|
|
45
|
+
"ocr_artifacts_rust",
|
|
46
|
+
"inject_zero_widths_rust",
|
|
47
|
+
"hokey_rust",
|
|
48
|
+
# Word-level operations
|
|
49
|
+
"delete_random_words_rust",
|
|
50
|
+
"reduplicate_words_rust",
|
|
51
|
+
"swap_adjacent_words_rust",
|
|
52
|
+
"redact_words_rust",
|
|
53
|
+
"jargoyle_drift_rust",
|
|
54
|
+
"list_lexeme_dictionaries_rust",
|
|
55
|
+
"ekkokin_homophones_rust",
|
|
56
|
+
# Grammar operations
|
|
57
|
+
"pedant_rust",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
# Type Aliases
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
# Orchestration types
|
|
66
|
+
PlanResult = list[tuple[int, int]]
|
|
67
|
+
PipelineDescriptor = Mapping[str, Any]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
# Orchestration Operations
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def plan_glitchlings_rust(
|
|
76
|
+
specs: Sequence[Mapping[str, Any]],
|
|
77
|
+
master_seed: int,
|
|
78
|
+
) -> PlanResult:
|
|
79
|
+
"""Invoke Rust orchestration planner.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
specs: Sequence of glitchling specifications with name/scope/order.
|
|
83
|
+
master_seed: Master seed for deterministic ordering.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
List of (index, derived_seed) tuples defining execution order.
|
|
87
|
+
"""
|
|
88
|
+
plan_fn = get_rust_operation("plan_glitchlings")
|
|
89
|
+
plan = plan_fn(specs, int(master_seed))
|
|
90
|
+
return [(int(index), int(seed)) for index, seed in plan]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def compose_glitchlings_rust(
|
|
94
|
+
text: str,
|
|
95
|
+
descriptors: Sequence[PipelineDescriptor],
|
|
96
|
+
master_seed: int,
|
|
97
|
+
) -> str:
|
|
98
|
+
"""Execute a sequence of glitchlings through the Rust pipeline.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
text: Input text to transform.
|
|
102
|
+
descriptors: Pipeline descriptors for each glitchling.
|
|
103
|
+
master_seed: Master seed for determinism.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Transformed text.
|
|
107
|
+
"""
|
|
108
|
+
compose_fn = get_rust_operation("compose_glitchlings")
|
|
109
|
+
return cast(str, compose_fn(text, descriptors, int(master_seed)))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# Character-Level Operations
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def fatfinger_rust(
|
|
118
|
+
text: str,
|
|
119
|
+
rate: float,
|
|
120
|
+
layout: Mapping[str, Sequence[str]],
|
|
121
|
+
seed: int,
|
|
122
|
+
*,
|
|
123
|
+
shift_slip_rate: float | None = None,
|
|
124
|
+
shift_slip_exit_rate: float | None = None,
|
|
125
|
+
shift_map: Mapping[str, str] | None = None,
|
|
126
|
+
) -> str:
|
|
127
|
+
"""Introduce keyboard typos via Rust.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
text: Input text.
|
|
131
|
+
rate: Probability of corrupting each character.
|
|
132
|
+
layout: Keyboard neighbor mapping.
|
|
133
|
+
seed: Deterministic seed.
|
|
134
|
+
shift_slip_rate: Probability of entering a shifted burst before fat-fingering.
|
|
135
|
+
shift_slip_exit_rate: Probability of releasing shift during a burst.
|
|
136
|
+
shift_map: Mapping of unshifted -> shifted keys for the active layout.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Text with simulated typing errors.
|
|
140
|
+
"""
|
|
141
|
+
fn = get_rust_operation("fatfinger")
|
|
142
|
+
return cast(
|
|
143
|
+
str,
|
|
144
|
+
fn(text, rate, layout, seed, shift_slip_rate, shift_slip_exit_rate, shift_map),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def slip_modifier_rust(
|
|
149
|
+
text: str,
|
|
150
|
+
enter_rate: float,
|
|
151
|
+
exit_rate: float,
|
|
152
|
+
shift_map: Mapping[str, str],
|
|
153
|
+
seed: int | None,
|
|
154
|
+
) -> str:
|
|
155
|
+
"""Apply a modifier slippage burst using Rust.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
text: Input text.
|
|
159
|
+
enter_rate: Probability of starting a shift burst.
|
|
160
|
+
exit_rate: Probability of ending a burst once started.
|
|
161
|
+
shift_map: Mapping of unshifted -> shifted characters.
|
|
162
|
+
seed: Deterministic seed.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Text with modifier slippage applied.
|
|
166
|
+
"""
|
|
167
|
+
fn = get_rust_operation("slip_modifier")
|
|
168
|
+
return cast(str, fn(text, enter_rate, exit_rate, shift_map, seed))
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def mim1c_rust(
|
|
172
|
+
text: str,
|
|
173
|
+
rate: float,
|
|
174
|
+
classes: list[str] | Literal["all"] | None,
|
|
175
|
+
banned: list[str] | None,
|
|
176
|
+
seed: int,
|
|
177
|
+
) -> str:
|
|
178
|
+
"""Replace characters with homoglyphs via Rust.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
text: Input text.
|
|
182
|
+
rate: Probability of swapping each character.
|
|
183
|
+
classes: Homoglyph classes to use, or "all".
|
|
184
|
+
banned: Characters to never replace with.
|
|
185
|
+
seed: Deterministic seed.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Text with homoglyph substitutions.
|
|
189
|
+
"""
|
|
190
|
+
fn = get_rust_operation("mim1c")
|
|
191
|
+
return cast(str, fn(text, rate, classes, banned, seed))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def ocr_artifacts_rust(
|
|
195
|
+
text: str,
|
|
196
|
+
rate: float,
|
|
197
|
+
seed: int,
|
|
198
|
+
) -> str:
|
|
199
|
+
"""Introduce OCR-like artifacts via Rust.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
text: Input text.
|
|
203
|
+
rate: Probability of introducing artifacts.
|
|
204
|
+
seed: Deterministic seed.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Text with simulated OCR errors.
|
|
208
|
+
"""
|
|
209
|
+
fn = get_rust_operation("ocr_artifacts")
|
|
210
|
+
return cast(str, fn(text, rate, seed))
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def inject_zero_widths_rust(
|
|
214
|
+
text: str,
|
|
215
|
+
rate: float,
|
|
216
|
+
characters: list[str],
|
|
217
|
+
seed: int | None,
|
|
218
|
+
) -> str:
|
|
219
|
+
"""Inject zero-width characters via Rust.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
text: Input text.
|
|
223
|
+
rate: Probability of injection between characters.
|
|
224
|
+
characters: Palette of zero-width characters to use.
|
|
225
|
+
seed: Deterministic seed.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Text with injected zero-width characters.
|
|
229
|
+
"""
|
|
230
|
+
fn = get_rust_operation("inject_zero_widths")
|
|
231
|
+
return cast(str, fn(text, rate, characters, seed))
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def hokey_rust(
|
|
235
|
+
text: str,
|
|
236
|
+
rate: float,
|
|
237
|
+
extension_min: int,
|
|
238
|
+
extension_max: int,
|
|
239
|
+
word_length_threshold: int,
|
|
240
|
+
base_p: float,
|
|
241
|
+
seed: int | None,
|
|
242
|
+
) -> str:
|
|
243
|
+
"""Extend expressive segments via Rust.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
text: Input text.
|
|
247
|
+
rate: Selection rate for candidate words.
|
|
248
|
+
extension_min: Minimum extra repetitions.
|
|
249
|
+
extension_max: Maximum extra repetitions.
|
|
250
|
+
word_length_threshold: Preferred max word length.
|
|
251
|
+
base_p: Base probability for sampler.
|
|
252
|
+
seed: Deterministic seed.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Text with extended expressive segments.
|
|
256
|
+
"""
|
|
257
|
+
fn = get_rust_operation("hokey")
|
|
258
|
+
return cast(
|
|
259
|
+
str,
|
|
260
|
+
fn(text, rate, extension_min, extension_max, word_length_threshold, base_p, seed),
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# ---------------------------------------------------------------------------
|
|
265
|
+
# Word-Level Operations
|
|
266
|
+
# ---------------------------------------------------------------------------
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def delete_random_words_rust(
|
|
270
|
+
text: str,
|
|
271
|
+
rate: float,
|
|
272
|
+
unweighted: bool,
|
|
273
|
+
seed: int,
|
|
274
|
+
) -> str:
|
|
275
|
+
"""Delete random words via Rust.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
text: Input text.
|
|
279
|
+
rate: Probability of deleting each word.
|
|
280
|
+
unweighted: If True, use uniform selection; else weight by length.
|
|
281
|
+
seed: Deterministic seed.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Text with words deleted.
|
|
285
|
+
"""
|
|
286
|
+
fn = get_rust_operation("delete_random_words")
|
|
287
|
+
return cast(str, fn(text, rate, unweighted, seed))
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def reduplicate_words_rust(
|
|
291
|
+
text: str,
|
|
292
|
+
rate: float,
|
|
293
|
+
unweighted: bool,
|
|
294
|
+
seed: int,
|
|
295
|
+
) -> str:
|
|
296
|
+
"""Reduplicate random words via Rust.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
text: Input text.
|
|
300
|
+
rate: Probability of duplicating each word.
|
|
301
|
+
unweighted: If True, use uniform selection; else weight by length.
|
|
302
|
+
seed: Deterministic seed.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Text with words duplicated.
|
|
306
|
+
"""
|
|
307
|
+
fn = get_rust_operation("reduplicate_words")
|
|
308
|
+
return cast(str, fn(text, rate, unweighted, seed))
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def swap_adjacent_words_rust(
|
|
312
|
+
text: str,
|
|
313
|
+
rate: float,
|
|
314
|
+
seed: int,
|
|
315
|
+
) -> str:
|
|
316
|
+
"""Swap adjacent words via Rust.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
text: Input text.
|
|
320
|
+
rate: Probability of swapping adjacent word pairs.
|
|
321
|
+
seed: Deterministic seed.
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Text with adjacent words swapped.
|
|
325
|
+
"""
|
|
326
|
+
fn = get_rust_operation("swap_adjacent_words")
|
|
327
|
+
return cast(str, fn(text, rate, seed))
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def redact_words_rust(
|
|
331
|
+
text: str,
|
|
332
|
+
replacement: str,
|
|
333
|
+
rate: float,
|
|
334
|
+
merge: bool,
|
|
335
|
+
unweighted: bool,
|
|
336
|
+
seed: int,
|
|
337
|
+
) -> str:
|
|
338
|
+
"""Redact random words via Rust.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
text: Input text.
|
|
342
|
+
replacement: Character to replace word characters with.
|
|
343
|
+
rate: Probability of redacting each word.
|
|
344
|
+
merge: If True, merge adjacent redactions.
|
|
345
|
+
unweighted: If True, use uniform selection; else weight by length.
|
|
346
|
+
seed: Deterministic seed.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
Text with words redacted.
|
|
350
|
+
"""
|
|
351
|
+
fn = get_rust_operation("redact_words")
|
|
352
|
+
return cast(str, fn(text, replacement, rate, merge, unweighted, seed))
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def jargoyle_drift_rust(
|
|
356
|
+
text: str,
|
|
357
|
+
lexemes: str,
|
|
358
|
+
mode: str,
|
|
359
|
+
rate: float,
|
|
360
|
+
seed: int | None,
|
|
361
|
+
) -> str:
|
|
362
|
+
"""Apply Jargoyle dictionary-based word drift via Rust.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
text: Input text.
|
|
366
|
+
lexemes: Name of the dictionary to use (colors, synonyms, corporate, academic, cyberpunk,
|
|
367
|
+
lovecraftian, or any custom dictionary discovered in the lexemes directory).
|
|
368
|
+
mode: Drift mode ("literal" or "drift").
|
|
369
|
+
rate: Probability of transforming each matching word.
|
|
370
|
+
seed: Deterministic seed (only used for "drift" mode).
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
Text with word substitutions applied.
|
|
374
|
+
"""
|
|
375
|
+
fn = get_rust_operation("jargoyle_drift")
|
|
376
|
+
return cast(str, fn(text, lexemes, mode, rate, seed))
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def list_lexeme_dictionaries_rust() -> list[str]:
|
|
380
|
+
"""List available lexeme dictionaries.
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
List of dictionary names available for Jargoyle.
|
|
384
|
+
"""
|
|
385
|
+
fn = get_rust_operation("list_lexeme_dictionaries")
|
|
386
|
+
return cast(list[str], fn())
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def ekkokin_homophones_rust(
|
|
390
|
+
text: str,
|
|
391
|
+
rate: float,
|
|
392
|
+
weighting: str,
|
|
393
|
+
seed: int | None,
|
|
394
|
+
) -> str:
|
|
395
|
+
"""Substitute words with homophones via Rust.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
text: Input text.
|
|
399
|
+
rate: Probability of substituting each word.
|
|
400
|
+
weighting: Weighting mode for selection.
|
|
401
|
+
seed: Deterministic seed.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
Text with homophone substitutions.
|
|
405
|
+
"""
|
|
406
|
+
fn = get_rust_operation("ekkokin_homophones")
|
|
407
|
+
return cast(str, fn(text, rate, weighting, seed))
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
# ---------------------------------------------------------------------------
|
|
411
|
+
# Grammar Operations
|
|
412
|
+
# ---------------------------------------------------------------------------
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def pedant_rust(
|
|
416
|
+
text: str,
|
|
417
|
+
*,
|
|
418
|
+
stone: str,
|
|
419
|
+
seed: int,
|
|
420
|
+
) -> str:
|
|
421
|
+
"""Apply pedant grammar transformation via Rust.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
text: Input text.
|
|
425
|
+
stone: Pedant stone label defining transformation type.
|
|
426
|
+
seed: Deterministic seed.
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
Text with grammar transformation applied.
|
|
430
|
+
"""
|
|
431
|
+
fn = get_rust_operation("pedant")
|
|
432
|
+
return cast(str, fn(text, stone=stone, seed=seed))
|