glitchlings 0.4.4__cp310-cp310-win_amd64.whl → 0.4.5__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

@@ -0,0 +1,173 @@
1
+ """Hokey glitchling that performs expressive lengthening."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from typing import Any, cast
7
+
8
+ from ..util.hokey_generator import HokeyConfig, HokeyGenerator, StretchEvent
9
+ from ..util.stretchability import StretchabilityAnalyzer
10
+ from ._rust_extensions import get_rust_operation
11
+ from .core import AttackOrder, AttackWave, Gaggle
12
+ from .core import Glitchling as GlitchlingBase
13
+
14
+ _hokey_rust = get_rust_operation("hokey")
15
+ _ANALYZER = StretchabilityAnalyzer()
16
+ _GENERATOR = HokeyGenerator(analyzer=_ANALYZER)
17
+
18
+
19
+ def _python_extend_vowels(
20
+ text: str,
21
+ *,
22
+ rate: float,
23
+ extension_min: int,
24
+ extension_max: int,
25
+ word_length_threshold: int,
26
+ base_p: float,
27
+ rng: random.Random,
28
+ return_trace: bool = False,
29
+ ) -> str | tuple[str, list[StretchEvent]]:
30
+ config = HokeyConfig(
31
+ rate=rate,
32
+ extension_min=extension_min,
33
+ extension_max=extension_max,
34
+ word_length_threshold=word_length_threshold,
35
+ base_p=base_p,
36
+ )
37
+ result, events = _GENERATOR.generate(text, rng=rng, config=config)
38
+ return (result, events) if return_trace else result
39
+
40
+
41
+ def extend_vowels(
42
+ text: str,
43
+ rate: float = 0.3,
44
+ extension_min: int = 2,
45
+ extension_max: int = 5,
46
+ word_length_threshold: int = 6,
47
+ seed: int | None = None,
48
+ rng: random.Random | None = None,
49
+ *,
50
+ return_trace: bool = False,
51
+ base_p: float | None = None,
52
+ ) -> str | tuple[str, list[StretchEvent]]:
53
+ """Extend expressive segments of words for emphasis.
54
+
55
+ Parameters
56
+ ----------
57
+ text : str
58
+ Input text to transform.
59
+ rate : float, optional
60
+ Global selection rate for candidate words.
61
+ extension_min : int, optional
62
+ Minimum number of extra repetitions for the stretch unit.
63
+ extension_max : int, optional
64
+ Maximum number of extra repetitions for the stretch unit.
65
+ word_length_threshold : int, optional
66
+ Preferred maximum alphabetic length; longer words are de-emphasised but not
67
+ excluded.
68
+ seed : int, optional
69
+ Deterministic seed when ``rng`` is not supplied.
70
+ rng : random.Random, optional
71
+ Random number generator to drive sampling.
72
+ return_trace : bool, optional
73
+ When ``True`` also return the stretch events for introspection.
74
+ base_p : float, optional
75
+ Base probability for the negative-binomial sampler (heavier tails for smaller
76
+ values). Defaults to ``0.45``.
77
+ """
78
+ if not text:
79
+ empty_trace: list[StretchEvent] = []
80
+ return (text, empty_trace) if return_trace else text
81
+
82
+ if rng is None:
83
+ rng = random.Random(seed)
84
+ base_probability = base_p if base_p is not None else 0.45
85
+
86
+ if return_trace or _hokey_rust is None:
87
+ return _python_extend_vowels(
88
+ text,
89
+ rate=rate,
90
+ extension_min=extension_min,
91
+ extension_max=extension_max,
92
+ word_length_threshold=word_length_threshold,
93
+ base_p=base_probability,
94
+ rng=rng,
95
+ return_trace=return_trace,
96
+ )
97
+
98
+ return cast(
99
+ str,
100
+ _hokey_rust(
101
+ text,
102
+ rate,
103
+ extension_min,
104
+ extension_max,
105
+ word_length_threshold,
106
+ base_probability,
107
+ rng,
108
+ ),
109
+ )
110
+
111
+
112
+ class Hokey(GlitchlingBase):
113
+ """Glitchling that stretches words using linguistic heuristics."""
114
+
115
+ seed: int | None
116
+
117
+ def __init__(
118
+ self,
119
+ *,
120
+ rate: float = 0.3,
121
+ extension_min: int = 2,
122
+ extension_max: int = 5,
123
+ word_length_threshold: int = 6,
124
+ base_p: float = 0.45,
125
+ seed: int | None = None,
126
+ ) -> None:
127
+ self._master_seed: int | None = seed
128
+
129
+ def _corruption_wrapper(text: str, **kwargs: Any) -> str:
130
+ result = extend_vowels(text, **kwargs)
131
+ return result if isinstance(result, str) else result[0]
132
+
133
+ super().__init__(
134
+ name="Hokey",
135
+ corruption_function=_corruption_wrapper,
136
+ scope=AttackWave.CHARACTER,
137
+ order=AttackOrder.FIRST,
138
+ seed=seed,
139
+ rate=rate,
140
+ extension_min=extension_min,
141
+ extension_max=extension_max,
142
+ word_length_threshold=word_length_threshold,
143
+ base_p=base_p,
144
+ )
145
+
146
+ def pipeline_operation(self) -> dict[str, Any] | None:
147
+ return {
148
+ "type": "hokey",
149
+ "rate": self.kwargs.get("rate", 0.3),
150
+ "extension_min": self.kwargs.get("extension_min", 2),
151
+ "extension_max": self.kwargs.get("extension_max", 5),
152
+ "word_length_threshold": self.kwargs.get("word_length_threshold", 6),
153
+ "base_p": self.kwargs.get("base_p", 0.45),
154
+ }
155
+
156
+ def reset_rng(self, seed: int | None = None) -> None:
157
+ if seed is not None:
158
+ self._master_seed = seed
159
+ super().reset_rng(seed)
160
+ if self.seed is None:
161
+ return
162
+ derived = Gaggle.derive_seed(int(seed), self.name, 0)
163
+ self.seed = int(derived)
164
+ self.rng = random.Random(self.seed)
165
+ self.kwargs["seed"] = self.seed
166
+ else:
167
+ super().reset_rng(None)
168
+
169
+
170
+ hokey = Hokey()
171
+
172
+
173
+ __all__ = ["Hokey", "hokey", "extend_vowels"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.4.4
3
+ Version: 0.4.5
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -294,6 +294,12 @@ Dynamic: license-file
294
294
  Every language game breeds monsters.
295
295
  ```
296
296
 
297
+ [![PyPI version](https://img.shields.io/pypi/v/glitchlings.svg)](https://pypi.org/project/glitchlings/)
298
+ [![PyPI Status](https://github.com/osoleve/glitchlings/actions/workflows/publish.yml/badge.svg)](https://github.com/osoleve/glitchlings/actions/workflows/publish.yml)
299
+ [![Lint and Type](https://github.com/osoleve/glitchlings/actions/workflows/ci.yml/badge.svg)](https://github.com/osoleve/glitchlings/actions/workflows/ci.yml)
300
+ [![Website status](https://img.shields.io/website-up-down-green-red/https/osoleve.github.io/glitchlings)](https://osoleve.github.io/glitchlings/)
301
+ [![License](https://img.shields.io/github/license/osoleve/glitchlings.svg)](https://github.com/osoleve/glitchlings/blob/main/LICENSE)
302
+
297
303
  `Glitchlings` are **utilities for corrupting the text inputs to your language models in deterministic, _linguistically principled_** ways.
298
304
  Each embodies a different way that documents can be compromised in the wild.
299
305
 
@@ -382,6 +388,7 @@ glitchlings --list
382
388
  ```text
383
389
  Typogre — scope: Character, order: early
384
390
  Apostrofae — scope: Character, order: normal
391
+ Hokey — scope: Character, order: first
385
392
  Mim1c — scope: Character, order: last
386
393
  Jargoyle — scope: Word, order: normal
387
394
  Adjax — scope: Word, order: normal
@@ -429,10 +436,6 @@ options:
429
436
  ```
430
437
  <!-- END: CLI_USAGE -->
431
438
 
432
- Run `python docs/build_cli_reference.py` whenever you tweak the CLI so the README stays in sync with the actual output. The script executes the commands above and replaces the block between the markers automatically.
433
-
434
- Prefer inline tweaks? You can still configure glitchlings directly in the shell:
435
-
436
439
  ```bash
437
440
  # Run Typogre against the contents of a file and inspect the diff.
438
441
  glitchlings -g typogre --file documents/report.txt --diff
@@ -507,6 +510,22 @@ _Wait, was that...?_
507
510
  > - `banned_characters (Collection[str])`: Characters that must never appear as replacements (default: none).
508
511
  > - `seed (int)`: The random seed for reproducibility (default: 151).
509
512
 
513
+ ### Hokey
514
+
515
+ _She's soooooo coooool!_
516
+
517
+ > _**Passionista.**_ Hokey sometimes gets a little excited and elongates words for emphasis.
518
+ >
519
+ > Args
520
+ >
521
+ > - `rate (float)`: Share of high-scoring tokens to stretch (default: 0.3).
522
+ > - `extension_min` / `extension_max (int)`: Bounds for extra repetitions (defaults: 2 / 5).
523
+ > - `word_length_threshold (int)`: Preferred maximum alphabetic length; longer words are damped instead of excluded (default: 6).
524
+ > - `base_p (float)`: Base probability for the heavy-tailed sampler (default: 0.45).
525
+ > - `seed (int)`: The random seed for reproducibility (default: 151).
526
+
527
+ _Apocryphal Glitchling contributed by Chloé Nunes_
528
+
510
529
  ### Scannequin
511
530
 
512
531
  _How can a computer need reading glasses?_
@@ -539,7 +558,9 @@ _Uh oh. The worst person you know just bought a thesaurus._
539
558
  > Args
540
559
  >
541
560
  > - `rate (float)`: The maximum proportion of words to replace (default: 0.01, 1%).
561
+ >
542
562
  - `part_of_speech`: The WordNet-style part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all. Vector/graph backends ignore this filter while still honouring deterministic sampling.
563
+ >
543
564
  > - `seed (int)`: The random seed for reproducibility (default: 151).
544
565
 
545
566
  ### Reduple
@@ -1,25 +1,30 @@
1
- glitchlings/__init__.py,sha256=jyfvslkaFlYVefDFq0CQKkc52F8p8I1UrbFbcyihFCU,1249
1
+ glitchlings/__init__.py,sha256=pNL6Vx6ggpRq-vW0CXJjnWOe_LktP6Zz9xNhqDHj3Dw,1301
2
2
  glitchlings/__main__.py,sha256=nB7btO_T4wBFOcyawfWpjEindVrUfTqqV5hdeeS1HT8,128
3
- glitchlings/_zoo_rust.cp310-win_amd64.pyd,sha256=o5tlRkqbmzbnWqe6r0k5Ltk5lfo-WZExCJTz_s1IjnE,2215936
4
- glitchlings/compat.py,sha256=xM5fT5RELgIdQTmgKQFZaPZJJpOg0noC-gLDcO390Ro,9192
5
- glitchlings/config.py,sha256=pKpM5onr9UG8sHKVIZKj2Ti2gD5bZ6N1wyxOhzgrviQ,13341
3
+ glitchlings/_zoo_rust.cp310-win_amd64.pyd,sha256=HXlrBd0WtqjwzuxEYFpvVxEHnylaGfOetxxZDIlncU0,2350080
4
+ glitchlings/compat.py,sha256=j4lkWNtyox5sen5j7u0SHfnk8QUn-yicaqvuLlZp1-s,9174
5
+ glitchlings/config.py,sha256=rBBHRmkIyoWRPcUFrsxmSm08qxZ07wDAuO5AAF6sjAk,13323
6
6
  glitchlings/config.toml,sha256=OywXmEpuOPtyJRbcRt4cwQkHiZ__5axEHoCaX9ye-uA,102
7
7
  glitchlings/main.py,sha256=eCUEFsu8-NLDz1xyKNDIucVm975HNQZJYm6YCv8RIyg,10987
8
+ glitchlings/data/__init__.py,sha256=kIT4a2EDNo_R-iL8kVisJ8PxR_BrygNYegfG_Ua6DcE,69
9
+ glitchlings/data/hokey_assets.json,sha256=1GaSEzXwtT1nvf0B9mFyLzHOcqzKbPreibsC6iBWAHA,3083
8
10
  glitchlings/dlc/__init__.py,sha256=iFDTwkaWl2C0_QUYykIXfmOUzy__oURX_BiJhexf-8o,312
9
- glitchlings/dlc/_shared.py,sha256=L6dc4Xi0q2K4ZwkyytXIU8Zu3MPugIDXhseZNuJNqyY,4560
10
- glitchlings/dlc/huggingface.py,sha256=Ym8dTArb-43AnCyukOO1m66iAbs8al9YkIWB3rGdhTk,2657
11
- glitchlings/dlc/prime.py,sha256=KY3so8WOwksbsKhZXfWorsZSYvIdPiUtc8e9apHabkM,8861
12
- glitchlings/dlc/pytorch.py,sha256=FZZbWHVX94FA9Ab77twZPUwQNno4IKTAjuHPXNhdY6M,6399
13
- glitchlings/dlc/pytorch_lightning.py,sha256=cKLFn3cFcOhFSN3QFlcvRPP5sLQts0M6DQ1J1cQuqRM,8289
11
+ glitchlings/dlc/_shared.py,sha256=q1xMclEsbR0KIEn9chwZXRubMnIvU-uIc_0PxCFpmqE,4560
12
+ glitchlings/dlc/huggingface.py,sha256=6wD4Vu2jp1d8GYSUiAvOAXqCO9w4HBxCOSfbJM8Ylzw,2657
13
+ glitchlings/dlc/prime.py,sha256=8-Ix8bjKyDKDMyXHDESE-gFDwC6blAP3mPXm1YiP3_U,8861
14
+ glitchlings/dlc/pytorch.py,sha256=xR-uoeo8f6g-aM2CmQ1cfHKfqn12uwRA9eCebEO5wXA,6399
15
+ glitchlings/dlc/pytorch_lightning.py,sha256=EQq7SMlvNoyBLYVW2Vi19H5k8VUqoAiNx8IqkDWqy5I,8214
14
16
  glitchlings/lexicon/__init__.py,sha256=oSLI1bOMfTpqiI7bMp9beeQ7Vp81G5coXxwdmCIQfV0,6199
15
- glitchlings/lexicon/_cache.py,sha256=KWqJ__WrM2ccIlplaaqoVT0ns65uU5WHewlJd4BvnJE,4196
17
+ glitchlings/lexicon/_cache.py,sha256=MRvomTi2Rx0l9FzHm91VrfRkrHqACjS5LG4a4OOCvLY,4180
16
18
  glitchlings/lexicon/metrics.py,sha256=TZAafSKgHpUS4h6vCuhTKGsvu_fru9kMqsXqLID6BTM,4734
17
- glitchlings/lexicon/vector.py,sha256=x9iT1O8Osolwt08g41V_70WHZt_b4OGzHBU72YHkmwg,23181
18
- glitchlings/lexicon/wordnet.py,sha256=fJi5SNa-sLpQiTIoXorkYzc2ZArejIms6zhoe8TPIOg,7840
19
+ glitchlings/lexicon/vector.py,sha256=1YivmCdb9IFqVNQTJiTztxRoFQi7Fmmg-8qKO7FUKzM,23252
20
+ glitchlings/lexicon/wordnet.py,sha256=Kdj0k1m9GJAdH41zVw0MkEAoLy0-KG0ZyS6hGFq_raY,7804
19
21
  glitchlings/lexicon/data/default_vector_cache.json,sha256=bnMV4tHIVOQtK7FDH81yqSLRkeViEzclGKXrrS8fEJ8,1079
20
22
  glitchlings/util/__init__.py,sha256=Q5lkncOaM6f2eJK3HAtZyxpCjGnekCpwPloqasS3JDo,4869
21
23
  glitchlings/util/adapters.py,sha256=mFhPlE8JaFuO_C-3_aqhgwkqa6isV8Y2ifqVh3Iv9JM,720
22
- glitchlings/zoo/__init__.py,sha256=j21naQtFunJYgdgYsyTNYUSa7sl88yNQWaGP8sWyw5U,5411
24
+ glitchlings/util/hokey_generator.py,sha256=hNWVbscVeKvcqwFtJ1oaWYf2Z0qHSxy0-iMLjht6zuM,4627
25
+ glitchlings/util/stretch_locator.py,sha256=tM4XsQ_asNXQq2Yee8STybFMCC2HbSKCu9I49G0yJ3c,4334
26
+ glitchlings/util/stretchability.py,sha256=ja1PJKVAKySUP5G2T0Q_THwIvydJn49xYFaQn58cQYw,13194
27
+ glitchlings/zoo/__init__.py,sha256=Ab69SD_RUXfiWUEzqU3wteGWobpm2kkbmwndYLEbv_0,5511
23
28
  glitchlings/zoo/_ocr_confusions.py,sha256=pPlvJOoan3ouwwGt8hATcO-9luIrGJl0vwUqssUMXD8,1236
24
29
  glitchlings/zoo/_rate.py,sha256=KxFDFJEGWsv76v1JcoHXIETj9kGdbbAiUqCPwAcWcDw,3710
25
30
  glitchlings/zoo/_rust_extensions.py,sha256=SdU06m-qjf-rRHnqM5OMophx1IacoI4KbyRu2HXrTUc,4354
@@ -28,6 +33,7 @@ glitchlings/zoo/_text_utils.py,sha256=LqCa33E-Qxbk6N5AVfxEmAz6C2u7_mCF0xPT9-404A
28
33
  glitchlings/zoo/adjax.py,sha256=8AqTfcJe50vPVxER5wREuyr4qqsmyKqi-klDW77HVYM,3646
29
34
  glitchlings/zoo/apostrofae.py,sha256=WB1zvCc1_YvTD9XdTNtFrK8H9FleaF-UNMCai_E7lqE,3949
30
35
  glitchlings/zoo/core.py,sha256=tkJFqGwpVa7qQxkUr9lsHOB8zS3lDCo987R6Nvz375U,21257
36
+ glitchlings/zoo/hokey.py,sha256=h-yjCLqYAZFksIYw4fMniuTWUywFw9GU9yUFs_uECHo,5471
31
37
  glitchlings/zoo/jargoyle.py,sha256=kgKw_hsaJaKQjrxzt_CMlgtZApedM6I2-U3d3aeipXs,12014
32
38
  glitchlings/zoo/mim1c.py,sha256=GqUMErVAVcqMAZjx4hhJ0Af25CxA0Aorv3U_fTqLZek,3546
33
39
  glitchlings/zoo/ocr_confusions.tsv,sha256=S-IJEYCIXYKT1Uu7Id8Lnvg5pw528yNigTtWUdnMv9k,213
@@ -39,9 +45,9 @@ glitchlings/zoo/typogre.py,sha256=X6jcnCYifNtoSwe2ig7YS3QrODyrACirMZ9pjN5LLBA,69
39
45
  glitchlings/zoo/zeedub.py,sha256=KApSCSiTr3b412vsA8UHWFhYQDxe_jg0308wWK-GNtM,5025
40
46
  glitchlings/zoo/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
47
  glitchlings/zoo/assets/apostrofae_pairs.json,sha256=lPLFLndzn_f7_5wZizxsLMnwBY4O63zsCvDjyJ56MLA,553
42
- glitchlings-0.4.4.dist-info/licenses/LICENSE,sha256=EFEP1evBfHaxsMTBjxm0sZVRp2wct8QLvHE1saII5FI,11538
43
- glitchlings-0.4.4.dist-info/METADATA,sha256=G5yU2jAph6LYY0Hvr5WTa3tnSdmDI890nTiecmP7P68,33015
44
- glitchlings-0.4.4.dist-info/WHEEL,sha256=KUuBC6lxAbHCKilKua8R9W_TM71_-9Sg5uEP3uDWcoU,101
45
- glitchlings-0.4.4.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
46
- glitchlings-0.4.4.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
47
- glitchlings-0.4.4.dist-info/RECORD,,
48
+ glitchlings-0.4.5.dist-info/licenses/LICENSE,sha256=EFEP1evBfHaxsMTBjxm0sZVRp2wct8QLvHE1saII5FI,11538
49
+ glitchlings-0.4.5.dist-info/METADATA,sha256=sULt2kvkK1lfhdqBbSvawNoRNyevwLsFrITBPMBrE9M,34146
50
+ glitchlings-0.4.5.dist-info/WHEEL,sha256=KUuBC6lxAbHCKilKua8R9W_TM71_-9Sg5uEP3uDWcoU,101
51
+ glitchlings-0.4.5.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
52
+ glitchlings-0.4.5.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
53
+ glitchlings-0.4.5.dist-info/RECORD,,