glitchlings 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. glitchlings-0.2.2/MANIFEST.in +4 -0
  2. {glitchlings-0.2.1 → glitchlings-0.2.2}/PKG-INFO +12 -18
  3. {glitchlings-0.2.1 → glitchlings-0.2.2}/README.md +8 -17
  4. {glitchlings-0.2.1 → glitchlings-0.2.2}/pyproject.toml +8 -7
  5. {glitchlings-0.2.1 → glitchlings-0.2.2}/rust/Cargo.lock +0 -7
  6. {glitchlings-0.2.1 → glitchlings-0.2.2}/rust/Cargo.toml +0 -1
  7. glitchlings-0.2.2/rust/zoo/assets/ocr_confusions.tsv +30 -0
  8. glitchlings-0.2.2/rust/zoo/build.rs +134 -0
  9. {glitchlings-0.2.1 → glitchlings-0.2.2}/rust/zoo/src/glitch_ops.rs +1 -1
  10. {glitchlings-0.2.1 → glitchlings-0.2.2}/rust/zoo/src/lib.rs +2 -1
  11. {glitchlings-0.2.1 → glitchlings-0.2.2}/rust/zoo/src/resources.rs +24 -34
  12. glitchlings-0.2.1/rust/typogre/src/lib.rs → glitchlings-0.2.2/rust/zoo/src/typogre.rs +3 -9
  13. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/main.py +17 -39
  14. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/util/__init__.py +30 -0
  15. glitchlings-0.2.2/src/glitchlings/zoo/__init__.py +134 -0
  16. glitchlings-0.2.2/src/glitchlings/zoo/_ocr_confusions.py +34 -0
  17. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/zoo/jargoyle.py +53 -11
  18. glitchlings-0.2.2/src/glitchlings/zoo/ocr_confusions.tsv +30 -0
  19. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/zoo/redactyl.py +3 -1
  20. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/zoo/scannequin.py +4 -29
  21. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/zoo/typogre.py +12 -4
  22. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings.egg-info/PKG-INFO +12 -18
  23. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings.egg-info/SOURCES.txt +4 -2
  24. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings.egg-info/requires.txt +3 -0
  25. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_cli.py +29 -1
  26. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_gaggle.py +19 -6
  27. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_glitchlings_determinism.py +0 -11
  28. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_jargoyle.py +1 -11
  29. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_keyboard_layouts.py +18 -1
  30. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_parameter_effects.py +6 -1
  31. glitchlings-0.2.2/tests/test_prime_echo_chamber.py +205 -0
  32. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_property_based.py +1 -1
  33. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_rust_backed_glitchlings.py +41 -1
  34. glitchlings-0.2.1/MANIFEST.in +0 -4
  35. glitchlings-0.2.1/rust/typogre/Cargo.toml +0 -14
  36. glitchlings-0.2.1/rust/zoo/build.rs +0 -60
  37. glitchlings-0.2.1/src/glitchlings/zoo/__init__.py +0 -57
  38. glitchlings-0.2.1/tests/test_prime_echo_chamber.py +0 -99
  39. {glitchlings-0.2.1 → glitchlings-0.2.2}/LICENSE +0 -0
  40. {glitchlings-0.2.1 → glitchlings-0.2.2}/rust/zoo/Cargo.toml +0 -0
  41. {glitchlings-0.2.1 → glitchlings-0.2.2}/rust/zoo/src/pipeline.rs +0 -0
  42. {glitchlings-0.2.1 → glitchlings-0.2.2}/rust/zoo/src/rng.rs +0 -0
  43. {glitchlings-0.2.1 → glitchlings-0.2.2}/rust/zoo/src/text_buffer.rs +0 -0
  44. {glitchlings-0.2.1 → glitchlings-0.2.2}/setup.cfg +0 -0
  45. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/__init__.py +0 -0
  46. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/__main__.py +0 -0
  47. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/dlc/__init__.py +0 -0
  48. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/dlc/huggingface.py +0 -0
  49. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/dlc/prime.py +0 -0
  50. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/zoo/core.py +0 -0
  51. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/zoo/mim1c.py +0 -0
  52. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/zoo/reduple.py +0 -0
  53. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings/zoo/rushmore.py +0 -0
  54. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings.egg-info/dependency_links.txt +0 -0
  55. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings.egg-info/entry_points.txt +0 -0
  56. {glitchlings-0.2.1 → glitchlings-0.2.2}/src/glitchlings.egg-info/top_level.txt +0 -0
  57. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_dataset_corruption.py +0 -0
  58. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_glitchling_core.py +0 -0
  59. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_huggingface_dlc.py +0 -0
  60. {glitchlings-0.2.1 → glitchlings-0.2.2}/tests/test_util.py +0 -0
@@ -0,0 +1,4 @@
1
+ recursive-include rust *.rs *.toml *.lock *.tsv
2
+ recursive-include src/glitchlings/zoo *.tsv
3
+ prune rust/target
4
+ prune rust/zoo/target
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -232,11 +232,14 @@ Provides-Extra: hf
232
232
  Requires-Dist: datasets>=4.0.0; extra == "hf"
233
233
  Provides-Extra: wordnet
234
234
  Requires-Dist: nltk>=3.9.1; extra == "wordnet"
235
+ Requires-Dist: numpy<=2.0,>=1.24; extra == "wordnet"
235
236
  Provides-Extra: prime
236
237
  Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
237
238
  Provides-Extra: dev
238
239
  Requires-Dist: pytest>=8.0.0; extra == "dev"
239
240
  Requires-Dist: hypothesis>=6.140.0; extra == "dev"
241
+ Requires-Dist: nltk>=3.9.1; extra == "dev"
242
+ Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
240
243
  Dynamic: license-file
241
244
 
242
245
  #
@@ -294,22 +297,10 @@ print(gaggle(SAMPLE_TEXT))
294
297
 
295
298
  ## Usage
296
299
 
297
- Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
298
-
299
- - **Direct invocation** Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
300
- - **Dataset corruption** – After ``import glitchlings.dlc.huggingface``, call ``Dataset.glitch(...)`` (or a `Gaggle`'s `.corrupt_dataset`) to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
301
-
302
- ### Rust pipeline acceleration (opt-in)
303
-
304
- The refactored Rust pipeline can execute multiple glitchlings without
305
- bouncing back through Python, but it is gated behind a feature flag so
306
- teams can roll it out gradually. After compiling the Rust extension
307
- (`python -m cibuildwheel --output-dir dist`) set
308
- `GLITCHLINGS_RUST_PIPELINE=1` (or `true`, `yes`, `on`) before importing
309
- `glitchlings`. When the flag is set and the extension is available,
310
- `Gaggle` automatically batches compatible glitchlings into the Rust
311
- pipeline; otherwise it transparently falls back to the legacy Python
312
- loop.
300
+ Need detailed usage patterns, dataset workflows, or tips for enabling the
301
+ Rust accelerator? Consult the [Glitchlings Usage Guide](docs/index.md)
302
+ for end-to-end instructions spanning the Python API, CLI, Hugging Face
303
+ integrations, and the feature-flagged Rust pipeline.
313
304
 
314
305
  ### Prime Intellect environments
315
306
 
@@ -384,11 +375,14 @@ glitchlings --list
384
375
  # Run Typogre against the contents of a file and inspect the diff.
385
376
  glitchlings -g typogre --file documents/report.txt --diff
386
377
 
378
+ # Configure glitchlings inline by passing keyword arguments.
379
+ glitchlings -g "Typogre(max_change_rate=0.05)" "Ghouls just wanna have fun"
380
+
387
381
  # Pipe text straight into the CLI for an on-the-fly corruption.
388
382
  echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
389
383
  ```
390
384
 
391
- Use `--help` for a complete breakdown of available options.
385
+ Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
392
386
 
393
387
  ## Development
394
388
 
@@ -53,22 +53,10 @@ print(gaggle(SAMPLE_TEXT))
53
53
 
54
54
  ## Usage
55
55
 
56
- Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
57
-
58
- - **Direct invocation** Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
59
- - **Dataset corruption** – After ``import glitchlings.dlc.huggingface``, call ``Dataset.glitch(...)`` (or a `Gaggle`'s `.corrupt_dataset`) to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
60
-
61
- ### Rust pipeline acceleration (opt-in)
62
-
63
- The refactored Rust pipeline can execute multiple glitchlings without
64
- bouncing back through Python, but it is gated behind a feature flag so
65
- teams can roll it out gradually. After compiling the Rust extension
66
- (`python -m cibuildwheel --output-dir dist`) set
67
- `GLITCHLINGS_RUST_PIPELINE=1` (or `true`, `yes`, `on`) before importing
68
- `glitchlings`. When the flag is set and the extension is available,
69
- `Gaggle` automatically batches compatible glitchlings into the Rust
70
- pipeline; otherwise it transparently falls back to the legacy Python
71
- loop.
56
+ Need detailed usage patterns, dataset workflows, or tips for enabling the
57
+ Rust accelerator? Consult the [Glitchlings Usage Guide](docs/index.md)
58
+ for end-to-end instructions spanning the Python API, CLI, Hugging Face
59
+ integrations, and the feature-flagged Rust pipeline.
72
60
 
73
61
  ### Prime Intellect environments
74
62
 
@@ -143,11 +131,14 @@ glitchlings --list
143
131
  # Run Typogre against the contents of a file and inspect the diff.
144
132
  glitchlings -g typogre --file documents/report.txt --diff
145
133
 
134
+ # Configure glitchlings inline by passing keyword arguments.
135
+ glitchlings -g "Typogre(max_change_rate=0.05)" "Ghouls just wanna have fun"
136
+
146
137
  # Pipe text straight into the CLI for an on-the-fly corruption.
147
138
  echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
148
139
  ```
149
140
 
150
- Use `--help` for a complete breakdown of available options.
141
+ Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
151
142
 
152
143
  ## Development
153
144
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "glitchlings"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  description = "Monsters for your language games."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -46,11 +46,13 @@ glitchlings = "glitchlings.main:main"
46
46
 
47
47
  [project.optional-dependencies]
48
48
  hf = ["datasets>=4.0.0"]
49
- wordnet = ["nltk>=3.9.1"]
49
+ wordnet = ["nltk>=3.9.1", "numpy>=1.24,<=2.0"]
50
50
  prime = ["verifiers>=0.1.3.post0"]
51
51
  dev = [
52
52
  "pytest>=8.0.0",
53
53
  "hypothesis>=6.140.0",
54
+ "nltk>=3.9.1",
55
+ "numpy>=1.24,<=2.0",
54
56
  ]
55
57
 
56
58
  [build-system]
@@ -59,6 +61,10 @@ build-backend = "setuptools.build_meta"
59
61
 
60
62
  [tool.setuptools]
61
63
  package-dir = {"" = "src"}
64
+ include-package-data = true
65
+
66
+ [tool.setuptools.package-data]
67
+ "glitchlings.zoo" = ["ocr_confusions.tsv"]
62
68
 
63
69
  [tool.setuptools.packages.find]
64
70
  where = ["src"]
@@ -69,11 +75,6 @@ path = "rust/zoo/Cargo.toml"
69
75
  binding = "PyO3"
70
76
  debug = false
71
77
 
72
- [[tool.setuptools-rust.ext-modules]]
73
- target = "glitchlings._typogre_rust"
74
- path = "rust/typogre/Cargo.toml"
75
- binding = "PyO3"
76
- debug = false
77
78
 
78
79
  [tool.pytest.ini_options]
79
80
  pythonpath = [
@@ -316,13 +316,6 @@ version = "1.19.0"
316
316
  source = "registry+https://github.com/rust-lang/crates.io-index"
317
317
  checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
318
318
 
319
- [[package]]
320
- name = "typogre_rust"
321
- version = "0.1.0"
322
- dependencies = [
323
- "pyo3",
324
- ]
325
-
326
319
  [[package]]
327
320
  name = "unicode-ident"
328
321
  version = "1.0.19"
@@ -1,6 +1,5 @@
1
1
  [workspace]
2
2
  members = [
3
- "typogre",
4
3
  "zoo",
5
4
  ]
6
5
  resolver = "2"
@@ -0,0 +1,30 @@
1
+ # Source Replacements (space-separated)
2
+ li h
3
+ h li
4
+ rn m
5
+ m rn
6
+ cl d
7
+ d cl
8
+ I l
9
+ l I 1
10
+ 1 l I
11
+ 0 O
12
+ O 0
13
+ B 8
14
+ 8 B
15
+ S 5
16
+ 5 S
17
+ Z 2
18
+ 2 Z
19
+ G 6
20
+ 6 G
21
+ “ "
22
+ ” "
23
+ ‘ '
24
+ ’ '
25
+ — -
26
+ – -
27
+ vv w
28
+ w vv
29
+ ri n
30
+ n ri
@@ -0,0 +1,134 @@
1
+ use std::env;
2
+ use std::ffi::{OsStr, OsString};
3
+ use std::fs;
4
+ use std::io::{self, ErrorKind};
5
+ use std::path::PathBuf;
6
+ use std::process::Command;
7
+
8
+ fn main() {
9
+ prepare_confusion_table().expect("failed to stage OCR confusion table for compilation");
10
+ pyo3_build_config::add_extension_module_link_args();
11
+
12
+ if let Some(python) = configured_python() {
13
+ link_python(&python);
14
+ } else if let Some(python) = detect_python() {
15
+ link_python(&python);
16
+ }
17
+ }
18
+
19
+ fn configured_python() -> Option<OsString> {
20
+ std::env::var_os("PYO3_PYTHON")
21
+ .or_else(|| std::env::var_os("PYTHON"))
22
+ .filter(|path| !path.is_empty())
23
+ }
24
+
25
+ fn detect_python() -> Option<OsString> {
26
+ const CANDIDATES: &[&str] = &["python3.12", "python3", "python"];
27
+
28
+ for candidate in CANDIDATES {
29
+ let status = Command::new(candidate)
30
+ .arg("-c")
31
+ .arg("import sys")
32
+ .output();
33
+
34
+ if let Ok(output) = status {
35
+ if output.status.success() {
36
+ return Some(OsString::from(candidate));
37
+ }
38
+ }
39
+ }
40
+
41
+ None
42
+ }
43
+
44
+ fn link_python(python: &OsStr) {
45
+ if let Some(path) = query_python(
46
+ python,
47
+ "import sysconfig; print(sysconfig.get_config_var('LIBDIR') or '')",
48
+ ) {
49
+ let trimmed = path.trim();
50
+ if !trimmed.is_empty() {
51
+ println!("cargo:rustc-link-search=native={trimmed}");
52
+ }
53
+ }
54
+
55
+ if let Some(path) = query_python(
56
+ python,
57
+ "import sysconfig; print(sysconfig.get_config_var('LIBPL') or '')",
58
+ ) {
59
+ let trimmed = path.trim();
60
+ if !trimmed.is_empty() {
61
+ println!("cargo:rustc-link-search=native={trimmed}");
62
+ }
63
+ }
64
+
65
+ if let Some(library) = query_python(
66
+ python,
67
+ "import sysconfig; print(sysconfig.get_config_var('LDLIBRARY') or '')",
68
+ ) {
69
+ let name = library.trim();
70
+ if let Some(stripped) = name.strip_prefix("lib") {
71
+ let stem = stripped
72
+ .strip_suffix(".so")
73
+ .or_else(|| stripped.strip_suffix(".a"))
74
+ .unwrap_or(stripped);
75
+ if !stem.is_empty() {
76
+ println!("cargo:rustc-link-lib={stem}");
77
+ }
78
+ }
79
+ }
80
+ }
81
+
82
+ fn query_python(python: &OsStr, command: &str) -> Option<String> {
83
+ let output = Command::new(python).arg("-c").arg(command).output().ok()?;
84
+ if !output.status.success() {
85
+ return None;
86
+ }
87
+ let value = String::from_utf8(output.stdout).ok()?;
88
+ Some(value)
89
+ }
90
+
91
+ fn prepare_confusion_table() -> io::Result<()> {
92
+ let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("missing manifest dir"));
93
+ let out_dir = PathBuf::from(env::var("OUT_DIR").expect("missing OUT_DIR"));
94
+
95
+ let repo_path = manifest_dir.join("../../src/glitchlings/zoo/ocr_confusions.tsv");
96
+ let packaged_path = manifest_dir.join("assets/ocr_confusions.tsv");
97
+ println!("cargo:rerun-if-changed={}", packaged_path.display());
98
+
99
+ let source_path = if repo_path.exists() {
100
+ println!("cargo:rerun-if-changed={}", repo_path.display());
101
+ if packaged_path.exists() {
102
+ let repo_bytes = fs::read(&repo_path)?;
103
+ let packaged_bytes = fs::read(&packaged_path)?;
104
+ if repo_bytes != packaged_bytes {
105
+ return Err(io::Error::new(
106
+ ErrorKind::Other,
107
+ format!(
108
+ "OCR confusion table at {} is out of sync with {}",
109
+ packaged_path.display(),
110
+ repo_path.display()
111
+ ),
112
+ ));
113
+ }
114
+ }
115
+ repo_path
116
+ } else {
117
+ if !packaged_path.exists() {
118
+ return Err(io::Error::new(
119
+ ErrorKind::NotFound,
120
+ format!(
121
+ "missing OCR confusion table; looked for {} and {}",
122
+ repo_path.display(),
123
+ packaged_path.display()
124
+ ),
125
+ ));
126
+ }
127
+ packaged_path
128
+ };
129
+
130
+ fs::create_dir_all(&out_dir)?;
131
+ fs::copy(&source_path, out_dir.join("ocr_confusions.tsv"))?;
132
+ Ok(())
133
+ }
134
+
@@ -500,6 +500,6 @@ mod tests {
500
500
  let mut rng = PyRng::new(1);
501
501
  let op = OcrArtifactsOp { error_rate: 1.0 };
502
502
  op.apply(&mut buffer, &mut rng).expect("ocr succeeds");
503
- assert_eq!(buffer.to_string(), "Tlie rn m");
503
+ assert_eq!(buffer.to_string(), "Tlie rn rri");
504
504
  }
505
505
  }
@@ -3,6 +3,7 @@ mod pipeline;
3
3
  mod resources;
4
4
  mod rng;
5
5
  mod text_buffer;
6
+ mod typogre;
6
7
 
7
8
  use glitch_ops::{GlitchOp, GlitchRng};
8
9
  use pyo3::prelude::*;
@@ -17,7 +18,6 @@ pub use glitch_ops::{
17
18
  pub use pipeline::{derive_seed, GlitchDescriptor, Pipeline, PipelineError};
18
19
  pub use rng::{PyRng, PyRngError};
19
20
  pub use text_buffer::{SegmentKind, TextBuffer, TextBufferError, TextSegment, TextSpan};
20
-
21
21
  struct PythonRngAdapter<'py> {
22
22
  rng: Bound<'py, PyAny>,
23
23
  }
@@ -279,5 +279,6 @@ fn _zoo_rust(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
279
279
  m.add_function(wrap_pyfunction!(ocr_artifacts, m)?)?;
280
280
  m.add_function(wrap_pyfunction!(redact_words, m)?)?;
281
281
  m.add_function(wrap_pyfunction!(compose_glitchlings, m)?)?;
282
+ m.add_function(wrap_pyfunction!(typogre::fatfinger, m)?)?;
282
283
  Ok(())
283
284
  }
@@ -1,6 +1,8 @@
1
1
  use once_cell::sync::Lazy;
2
2
  use regex::Regex;
3
3
 
4
+ const RAW_OCR_CONFUSIONS: &str = include_str!(concat!(env!("OUT_DIR"), "/ocr_confusions.tsv"));
5
+
4
6
  /// Precompiled regex removing spaces before punctuation characters.
5
7
  pub static SPACE_BEFORE_PUNCTUATION: Lazy<Regex> =
6
8
  Lazy::new(|| Regex::new(r"\s+([.,;:])").expect("valid punctuation regex"));
@@ -9,43 +11,30 @@ pub static SPACE_BEFORE_PUNCTUATION: Lazy<Regex> =
9
11
  pub static MULTIPLE_WHITESPACE: Lazy<Regex> =
10
12
  Lazy::new(|| Regex::new(r"\s{2,}").expect("valid multi-whitespace regex"));
11
13
 
12
- static BASE_CONFUSION_TABLE: &[(&str, &[&str])] = &[
13
- ("li", &["h"]),
14
- ("h", &["li"]),
15
- ("rn", &["m"]),
16
- ("m", &["rn"]),
17
- ("cl", &["d"]),
18
- ("d", &["cl"]),
19
- ("I", &["l"]),
20
- ("l", &["I", "1"]),
21
- ("1", &["l", "I"]),
22
- ("0", &["O"]),
23
- ("O", &["0"]),
24
- ("B", &["8"]),
25
- ("8", &["B"]),
26
- ("S", &["5"]),
27
- ("5", &["S"]),
28
- ("Z", &["2"]),
29
- ("2", &["Z"]),
30
- ("G", &["6"]),
31
- ("6", &["G"]),
32
- ("“", &["\""]),
33
- ("”", &["\""]),
34
- ("‘", &["'"]),
35
- ("’", &["'"]),
36
- ("—", &["-"]),
37
- ("–", &["-"]),
38
- ];
39
-
40
14
  /// Sorted confusion pairs reused by glitchling implementations.
41
15
  pub static OCR_CONFUSION_TABLE: Lazy<Vec<(&'static str, &'static [&'static str])>> =
42
16
  Lazy::new(|| {
43
- let mut entries: Vec<(usize, (&'static str, &'static [&'static str]))> =
44
- BASE_CONFUSION_TABLE
45
- .iter()
46
- .copied()
47
- .enumerate()
48
- .collect();
17
+ let mut entries: Vec<(usize, (&'static str, &'static [&'static str]))> = Vec::new();
18
+
19
+ for (line_number, line) in RAW_OCR_CONFUSIONS.lines().enumerate() {
20
+ let trimmed = line.trim();
21
+ if trimmed.is_empty() || trimmed.starts_with('#') {
22
+ continue;
23
+ }
24
+
25
+ let mut parts = trimmed.split_whitespace();
26
+ let Some(source) = parts.next() else {
27
+ continue;
28
+ };
29
+ let replacements: Vec<&'static str> = parts.collect();
30
+ if replacements.is_empty() {
31
+ continue;
32
+ }
33
+
34
+ let leaked: &'static [&'static str] = Box::leak(replacements.into_boxed_slice());
35
+ entries.push((line_number, (source, leaked)));
36
+ }
37
+
49
38
  entries.sort_by(|a, b| {
50
39
  let a_len = a.1 .0.len();
51
40
  let b_len = b.1 .0.len();
@@ -53,6 +42,7 @@ pub static OCR_CONFUSION_TABLE: Lazy<Vec<(&'static str, &'static [&'static str])
53
42
  .cmp(&a_len)
54
43
  .then_with(|| a.0.cmp(&b.0))
55
44
  });
45
+
56
46
  entries.into_iter().map(|(_, pair)| pair).collect()
57
47
  });
58
48
 
@@ -1,5 +1,5 @@
1
1
  use pyo3::prelude::*;
2
- use pyo3::types::{PyAny, PyDict, PyList, PyModule};
2
+ use pyo3::types::{PyAny, PyDict, PyList};
3
3
  use pyo3::Bound;
4
4
  use std::collections::HashMap;
5
5
 
@@ -205,7 +205,7 @@ fn global_action(rng: &Bound<'_, PyAny>, action: &str, chars: &mut Vec<char>) ->
205
205
  }
206
206
 
207
207
  #[pyfunction]
208
- fn fatfinger(
208
+ pub(crate) fn fatfinger(
209
209
  text: &str,
210
210
  max_change_rate: f64,
211
211
  layout: &Bound<'_, PyDict>,
@@ -224,7 +224,7 @@ fn fatfinger(
224
224
  }
225
225
 
226
226
  let length = chars.len();
227
- let mut max_changes = (length as f64 * max_change_rate).floor() as usize;
227
+ let mut max_changes = (length as f64 * max_change_rate).ceil() as usize;
228
228
  if max_changes < 1 {
229
229
  max_changes = 1;
230
230
  }
@@ -252,9 +252,3 @@ fn fatfinger(
252
252
 
253
253
  Ok(chars.into_iter().collect())
254
254
  }
255
-
256
- #[pymodule]
257
- fn _typogre_rust(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
258
- m.add_function(wrap_pyfunction!(fatfinger, m)?)?;
259
- Ok(())
260
- }
@@ -11,31 +11,12 @@ from . import SAMPLE_TEXT
11
11
  from .zoo import (
12
12
  Glitchling,
13
13
  Gaggle,
14
- jargoyle,
15
- mim1c,
16
- typogre,
17
- reduple,
18
- rushmore,
19
- redactyl,
20
- scannequin,
14
+ BUILTIN_GLITCHLINGS,
15
+ DEFAULT_GLITCHLING_NAMES,
16
+ parse_glitchling_spec,
21
17
  summon,
22
18
  )
23
19
 
24
-
25
- BUILTIN_GLITCHLINGS: dict[str, Glitchling] = {
26
- g.name.lower(): g
27
- for g in [
28
- typogre,
29
- mim1c,
30
- jargoyle,
31
- reduple,
32
- rushmore,
33
- redactyl,
34
- scannequin,
35
- ]
36
- }
37
-
38
- DEFAULT_GLITCHLING_NAMES: list[str] = list(BUILTIN_GLITCHLINGS.keys())
39
20
  MAX_NAME_WIDTH = max(len(glitchling.name) for glitchling in BUILTIN_GLITCHLINGS.values())
40
21
 
41
22
 
@@ -62,8 +43,11 @@ def build_parser() -> argparse.ArgumentParser:
62
43
  "--glitchling",
63
44
  dest="glitchlings",
64
45
  action="append",
65
- metavar="NAME",
66
- help="Glitchling to apply (repeat for multiples). Defaults to all built-ins.",
46
+ metavar="SPEC",
47
+ help=(
48
+ "Glitchling to apply, optionally with parameters like "
49
+ "Typogre(max_change_rate=0.05). Repeat for multiples; defaults to all built-ins."
50
+ ),
67
51
  )
68
52
  parser.add_argument(
69
53
  "-s",
@@ -147,23 +131,16 @@ def read_text(args: argparse.Namespace, parser: argparse.ArgumentParser) -> str:
147
131
  def summon_glitchlings(
148
132
  names: list[str] | None, parser: argparse.ArgumentParser, seed: int
149
133
  ) -> Gaggle:
150
- """Instantiate the requested glitchlings and bundle them in a ``Gaggle``.
151
-
152
- Args:
153
- names: Optional list of glitchling names provided by the user.
154
- parser: The argument parser used for emitting user-facing errors.
155
- seed: Master seed controlling deterministic corruption order.
156
-
157
- Returns:
158
- Gaggle: A ready-to-use collection of glitchlings.
159
-
160
- Raises:
161
- SystemExit: Raised indirectly via ``parser.error`` when a provided glitchling
162
- name is invalid.
163
- """
134
+ """Instantiate the requested glitchlings and bundle them in a ``Gaggle``."""
164
135
 
165
136
  if names:
166
- normalized = [name.lower() for name in names]
137
+ normalized: list[str | Glitchling] = []
138
+ for specification in names:
139
+ try:
140
+ normalized.append(parse_glitchling_spec(specification))
141
+ except ValueError as exc:
142
+ parser.error(str(exc))
143
+ raise AssertionError("parser.error should exit")
167
144
  else:
168
145
  normalized = DEFAULT_GLITCHLING_NAMES
169
146
 
@@ -174,6 +151,7 @@ def summon_glitchlings(
174
151
  raise AssertionError("parser.error should exit")
175
152
 
176
153
 
154
+
177
155
  def show_diff(original: str, corrupted: str) -> None:
178
156
  """Display a unified diff between the original and corrupted text."""
179
157
 
@@ -141,6 +141,36 @@ _register_layout(
141
141
  ),
142
142
  )
143
143
 
144
+ _register_layout(
145
+ "QWERTZ",
146
+ (
147
+ "^1234567890ß´",
148
+ " qwertzuiopü+",
149
+ " asdfghjklöä#",
150
+ " yxcvbnm,.-",
151
+ ),
152
+ )
153
+
154
+ _register_layout(
155
+ "SPANISH_QWERTY",
156
+ (
157
+ "º1234567890'¡",
158
+ " qwertyuiop´+",
159
+ " asdfghjklñ´",
160
+ " <zxcvbnm,.-",
161
+ ),
162
+ )
163
+
164
+ _register_layout(
165
+ "SWEDISH_QWERTY",
166
+ (
167
+ "§1234567890+´",
168
+ " qwertyuiopå¨",
169
+ " asdfghjklöä'",
170
+ " <zxcvbnm,.-",
171
+ ),
172
+ )
173
+
144
174
 
145
175
  class KeyNeighbors:
146
176
  def __init__(self) -> None: