glitchlings 0.4.4__cp312-cp312-macosx_11_0_universal2.whl → 0.4.5__cp312-cp312-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,173 @@
1
+ """Hokey glitchling that performs expressive lengthening."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from typing import Any, cast
7
+
8
+ from ..util.hokey_generator import HokeyConfig, HokeyGenerator, StretchEvent
9
+ from ..util.stretchability import StretchabilityAnalyzer
10
+ from ._rust_extensions import get_rust_operation
11
+ from .core import AttackOrder, AttackWave, Gaggle
12
+ from .core import Glitchling as GlitchlingBase
13
+
14
+ _hokey_rust = get_rust_operation("hokey")
15
+ _ANALYZER = StretchabilityAnalyzer()
16
+ _GENERATOR = HokeyGenerator(analyzer=_ANALYZER)
17
+
18
+
19
+ def _python_extend_vowels(
20
+ text: str,
21
+ *,
22
+ rate: float,
23
+ extension_min: int,
24
+ extension_max: int,
25
+ word_length_threshold: int,
26
+ base_p: float,
27
+ rng: random.Random,
28
+ return_trace: bool = False,
29
+ ) -> str | tuple[str, list[StretchEvent]]:
30
+ config = HokeyConfig(
31
+ rate=rate,
32
+ extension_min=extension_min,
33
+ extension_max=extension_max,
34
+ word_length_threshold=word_length_threshold,
35
+ base_p=base_p,
36
+ )
37
+ result, events = _GENERATOR.generate(text, rng=rng, config=config)
38
+ return (result, events) if return_trace else result
39
+
40
+
41
+ def extend_vowels(
42
+ text: str,
43
+ rate: float = 0.3,
44
+ extension_min: int = 2,
45
+ extension_max: int = 5,
46
+ word_length_threshold: int = 6,
47
+ seed: int | None = None,
48
+ rng: random.Random | None = None,
49
+ *,
50
+ return_trace: bool = False,
51
+ base_p: float | None = None,
52
+ ) -> str | tuple[str, list[StretchEvent]]:
53
+ """Extend expressive segments of words for emphasis.
54
+
55
+ Parameters
56
+ ----------
57
+ text : str
58
+ Input text to transform.
59
+ rate : float, optional
60
+ Global selection rate for candidate words.
61
+ extension_min : int, optional
62
+ Minimum number of extra repetitions for the stretch unit.
63
+ extension_max : int, optional
64
+ Maximum number of extra repetitions for the stretch unit.
65
+ word_length_threshold : int, optional
66
+ Preferred maximum alphabetic length; longer words are de-emphasised but not
67
+ excluded.
68
+ seed : int, optional
69
+ Deterministic seed when ``rng`` is not supplied.
70
+ rng : random.Random, optional
71
+ Random number generator to drive sampling.
72
+ return_trace : bool, optional
73
+ When ``True`` also return the stretch events for introspection.
74
+ base_p : float, optional
75
+ Base probability for the negative-binomial sampler (heavier tails for smaller
76
+ values). Defaults to ``0.45``.
77
+ """
78
+ if not text:
79
+ empty_trace: list[StretchEvent] = []
80
+ return (text, empty_trace) if return_trace else text
81
+
82
+ if rng is None:
83
+ rng = random.Random(seed)
84
+ base_probability = base_p if base_p is not None else 0.45
85
+
86
+ if return_trace or _hokey_rust is None:
87
+ return _python_extend_vowels(
88
+ text,
89
+ rate=rate,
90
+ extension_min=extension_min,
91
+ extension_max=extension_max,
92
+ word_length_threshold=word_length_threshold,
93
+ base_p=base_probability,
94
+ rng=rng,
95
+ return_trace=return_trace,
96
+ )
97
+
98
+ return cast(
99
+ str,
100
+ _hokey_rust(
101
+ text,
102
+ rate,
103
+ extension_min,
104
+ extension_max,
105
+ word_length_threshold,
106
+ base_probability,
107
+ rng,
108
+ ),
109
+ )
110
+
111
+
112
+ class Hokey(GlitchlingBase):
113
+ """Glitchling that stretches words using linguistic heuristics."""
114
+
115
+ seed: int | None
116
+
117
+ def __init__(
118
+ self,
119
+ *,
120
+ rate: float = 0.3,
121
+ extension_min: int = 2,
122
+ extension_max: int = 5,
123
+ word_length_threshold: int = 6,
124
+ base_p: float = 0.45,
125
+ seed: int | None = None,
126
+ ) -> None:
127
+ self._master_seed: int | None = seed
128
+
129
+ def _corruption_wrapper(text: str, **kwargs: Any) -> str:
130
+ result = extend_vowels(text, **kwargs)
131
+ return result if isinstance(result, str) else result[0]
132
+
133
+ super().__init__(
134
+ name="Hokey",
135
+ corruption_function=_corruption_wrapper,
136
+ scope=AttackWave.CHARACTER,
137
+ order=AttackOrder.FIRST,
138
+ seed=seed,
139
+ rate=rate,
140
+ extension_min=extension_min,
141
+ extension_max=extension_max,
142
+ word_length_threshold=word_length_threshold,
143
+ base_p=base_p,
144
+ )
145
+
146
+ def pipeline_operation(self) -> dict[str, Any] | None:
147
+ return {
148
+ "type": "hokey",
149
+ "rate": self.kwargs.get("rate", 0.3),
150
+ "extension_min": self.kwargs.get("extension_min", 2),
151
+ "extension_max": self.kwargs.get("extension_max", 5),
152
+ "word_length_threshold": self.kwargs.get("word_length_threshold", 6),
153
+ "base_p": self.kwargs.get("base_p", 0.45),
154
+ }
155
+
156
+ def reset_rng(self, seed: int | None = None) -> None:
157
+ if seed is not None:
158
+ self._master_seed = seed
159
+ super().reset_rng(seed)
160
+ if self.seed is None:
161
+ return
162
+ derived = Gaggle.derive_seed(int(seed), self.name, 0)
163
+ self.seed = int(derived)
164
+ self.rng = random.Random(self.seed)
165
+ self.kwargs["seed"] = self.seed
166
+ else:
167
+ super().reset_rng(None)
168
+
169
+
170
+ hokey = Hokey()
171
+
172
+
173
+ __all__ = ["Hokey", "hokey", "extend_vowels"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.4.4
3
+ Version: 0.4.5
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -294,6 +294,12 @@ Dynamic: license-file
294
294
  Every language game breeds monsters.
295
295
  ```
296
296
 
297
+ [![PyPI version](https://img.shields.io/pypi/v/glitchlings.svg)](https://pypi.org/project/glitchlings/)
298
+ [![PyPI Status](https://github.com/osoleve/glitchlings/actions/workflows/publish.yml/badge.svg)](https://github.com/osoleve/glitchlings/actions/workflows/publish.yml)
299
+ [![Lint and Type](https://github.com/osoleve/glitchlings/actions/workflows/ci.yml/badge.svg)](https://github.com/osoleve/glitchlings/actions/workflows/ci.yml)
300
+ [![Website status](https://img.shields.io/website-up-down-green-red/https/osoleve.github.io/glitchlings)](https://osoleve.github.io/glitchlings/)
301
+ [![License](https://img.shields.io/github/license/osoleve/glitchlings.svg)](https://github.com/osoleve/glitchlings/blob/main/LICENSE)
302
+
297
303
  `Glitchlings` are **utilities for corrupting the text inputs to your language models in deterministic, _linguistically principled_** ways.
298
304
  Each embodies a different way that documents can be compromised in the wild.
299
305
 
@@ -382,6 +388,7 @@ glitchlings --list
382
388
  ```text
383
389
  Typogre — scope: Character, order: early
384
390
  Apostrofae — scope: Character, order: normal
391
+ Hokey — scope: Character, order: first
385
392
  Mim1c — scope: Character, order: last
386
393
  Jargoyle — scope: Word, order: normal
387
394
  Adjax — scope: Word, order: normal
@@ -429,10 +436,6 @@ options:
429
436
  ```
430
437
  <!-- END: CLI_USAGE -->
431
438
 
432
- Run `python docs/build_cli_reference.py` whenever you tweak the CLI so the README stays in sync with the actual output. The script executes the commands above and replaces the block between the markers automatically.
433
-
434
- Prefer inline tweaks? You can still configure glitchlings directly in the shell:
435
-
436
439
  ```bash
437
440
  # Run Typogre against the contents of a file and inspect the diff.
438
441
  glitchlings -g typogre --file documents/report.txt --diff
@@ -507,6 +510,22 @@ _Wait, was that...?_
507
510
  > - `banned_characters (Collection[str])`: Characters that must never appear as replacements (default: none).
508
511
  > - `seed (int)`: The random seed for reproducibility (default: 151).
509
512
 
513
+ ### Hokey
514
+
515
+ _She's soooooo coooool!_
516
+
517
+ > _**Passionista.**_ Hokey sometimes gets a little excited and elongates words for emphasis.
518
+ >
519
+ > Args
520
+ >
521
+ > - `rate (float)`: Share of high-scoring tokens to stretch (default: 0.3).
522
+ > - `extension_min` / `extension_max (int)`: Bounds for extra repetitions (defaults: 2 / 5).
523
+ > - `word_length_threshold (int)`: Preferred maximum alphabetic length; longer words are damped instead of excluded (default: 6).
524
+ > - `base_p (float)`: Base probability for the heavy-tailed sampler (default: 0.45).
525
+ > - `seed (int)`: The random seed for reproducibility (default: 151).
526
+
527
+ _Apocryphal Glitchling contributed by Chloé Nunes_
528
+
510
529
  ### Scannequin
511
530
 
512
531
  _How can a computer need reading glasses?_
@@ -539,7 +558,9 @@ _Uh oh. The worst person you know just bought a thesaurus._
539
558
  > Args
540
559
  >
541
560
  > - `rate (float)`: The maximum proportion of words to replace (default: 0.01, 1%).
561
+ >
542
562
  - `part_of_speech`: The WordNet-style part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all. Vector/graph backends ignore this filter while still honouring deterministic sampling.
563
+ >
543
564
  > - `seed (int)`: The random seed for reproducibility (default: 151).
544
565
 
545
566
  ### Reduple
@@ -1,25 +1,30 @@
1
- glitchlings/__init__.py,sha256=bkyRgzjC8ssidEO9UL9VpbYXQxTV1Hz3VAPOIqd9uMg,1182
1
+ glitchlings/__init__.py,sha256=A6m-sj1Gnq5DXdaD7IRsPnMvXW9UjZh3fQGcvps22uw,1230
2
2
  glitchlings/__main__.py,sha256=f-P4jiVBd7ZpS6QxRpa_6SJgOG03UhZhcWasMDRWLs8,120
3
- glitchlings/_zoo_rust.cpython-312-darwin.so,sha256=Dsh8k6oypyOVug9SqVuwsZvpxLfEqrVJfeL995FsMrI,2602496
4
- glitchlings/compat.py,sha256=T_5Ia8yCzZvsMdicZ2TCcOgDO53_AjNGkSXWTR_qEnA,8908
5
- glitchlings/config.py,sha256=ofxDMkoMg4j51CFube54aca1Ky9y_ZeVktXpeUEdWmA,12953
3
+ glitchlings/_zoo_rust.cpython-312-darwin.so,sha256=ze2DcbvNHkfw2GjIOHE8IIjwecXkEQIo8IP4T23J1EM,2697056
4
+ glitchlings/compat.py,sha256=lswR7J3kXER5hPXeyfkO-7USJvUhdtTi1ovJgEpspKc,8892
5
+ glitchlings/config.py,sha256=J_Bk901-WLeqDhrmvTrjUZLxgIgSqR0qYU2oDaNPjt8,12937
6
6
  glitchlings/config.toml,sha256=04-Y_JCdQU68SRmwk2qZqrH_bbX4jEH9uh7URtxdIHA,99
7
7
  glitchlings/main.py,sha256=uw8VbDgxov1m-wYHPDl2dP5ItpLB4ZHpb0ChJXzcL0o,10623
8
+ glitchlings/data/__init__.py,sha256=JZwsJhnZHnr2onnukNduNjfNSurzGn3v7r1flq_3yl4,68
9
+ glitchlings/data/hokey_assets.json,sha256=9drpOv_PHHxs7jZOcgMr9G-Nswx_UuMzC4yQ0O8mIZ0,2890
8
10
  glitchlings/dlc/__init__.py,sha256=qlY4nuagy4AAWuPMwmuhwK2m36ktp-qkeiIxC7OXg34,305
9
- glitchlings/dlc/_shared.py,sha256=OmEjJmSs1pQ7j1ggR_H8D8RDp5E1ZqOnzSIxyqRE1aE,4407
10
- glitchlings/dlc/huggingface.py,sha256=9lW7TnTHA_bXyo4Is8pymZchrB9BIL1bMCP2p7LCMtg,2576
11
- glitchlings/dlc/prime.py,sha256=qGFI1d4BiOEIgQZ5v9QnlbYx4J4q-vNlh5tWZng11xs,8607
12
- glitchlings/dlc/pytorch.py,sha256=QaiIYyQ3koy2-enhUI9WY3SIMRX65gmsnjDvCsf8xbg,6233
13
- glitchlings/dlc/pytorch_lightning.py,sha256=Ls7Xh5Mg643Tyk3KvCMq_MsB4vvekfUUZOhE0z4K22c,8074
11
+ glitchlings/dlc/_shared.py,sha256=moQwnJdHMo-dx5uK0zM8XdPy5cs-OlAzYKVWfUP0RSQ,4407
12
+ glitchlings/dlc/huggingface.py,sha256=BWceVUm28Yd8b7Tf_lmnPGgSVN1hZEmseRjf17nAPJw,2576
13
+ glitchlings/dlc/prime.py,sha256=zhm0oTVKNDa1ByxZTP42rmMVaJDyx77w2g6NHy4jndc,8607
14
+ glitchlings/dlc/pytorch.py,sha256=Laqz5o0UZXE1X9P-Qxb6KE0D5lcBKAoBbKsn-fnd6c0,6233
15
+ glitchlings/dlc/pytorch_lightning.py,sha256=rhDwRgOGVzaEet27QG8GigKh6hK5ZdTBOxGE2G0MPxw,8005
14
16
  glitchlings/lexicon/__init__.py,sha256=ooEPcAJhCI2Nw5z8OsQ0EtVpKBfiTrU0-AQJq8Zn2nQ,6007
15
- glitchlings/lexicon/_cache.py,sha256=aWSUb5Ex162dr3HouO2Ic2O8ck3ViEFWs8-XMLKMeJ0,4086
17
+ glitchlings/lexicon/_cache.py,sha256=oWdQtiU3csUAs-fYJRHuS_314j8JJ-T8AL73-GxVXzA,4072
16
18
  glitchlings/lexicon/metrics.py,sha256=VBFfFpxjiEwZtK-jS55H8xP7MTC_0OjY8lQ5zSQ9aTY,4572
17
- glitchlings/lexicon/vector.py,sha256=yWf-vlN2OEHnTCPu7tgDnJbhm47cmhdrTtjR0RZKkUM,22530
18
- glitchlings/lexicon/wordnet.py,sha256=YcOliPHuesdlekmGspwAyR4fWDDxZWR_dIt_Nsq7ag0,7608
19
+ glitchlings/lexicon/vector.py,sha256=hUtMewkdLdMscQWjFgWcspSg0C_C0JdhFSMAs0HA-i0,22600
20
+ glitchlings/lexicon/wordnet.py,sha256=8wEN3XHI8Cf01h4h9cP4F25FLlGIoUrvk5l2nsgkfx4,7576
19
21
  glitchlings/lexicon/data/default_vector_cache.json,sha256=3iVH0nX8EqMbqOkKWvORCGYtN0LKHn5G_Snlizsnm1g,997
20
22
  glitchlings/util/__init__.py,sha256=vc3EAY8ehRjbOiryFdaqvvljXcyNGtZSPiEp9ok1vVw,4674
21
23
  glitchlings/util/adapters.py,sha256=psxQFYSFmh1u7NuqtIrKwQP5FOhOrZoxZzc7X7DDi9U,693
22
- glitchlings/zoo/__init__.py,sha256=1dWZPCTXuh5J7WdCxHX7ZX9bNd8bakzYndxQRhF43i8,5243
24
+ glitchlings/util/hokey_generator.py,sha256=NCbOGw55SG720VYnuwEdFAfdOvYlmjsZ0hAr5Y0Ja0Y,4483
25
+ glitchlings/util/stretch_locator.py,sha256=INTMz7PXe-0HoDaMnQIQxJ266nABMXBYD67oJM8ur8g,4194
26
+ glitchlings/util/stretchability.py,sha256=W_FC1-6x1LICLjEyv5rqpsnKnBcj6Lhc5u5A_5cWWIM,12819
27
+ glitchlings/zoo/__init__.py,sha256=xCVVAHLJ4lxl1JQUzff9fr84pJbB04fyXBjuX1m3YSw,5339
23
28
  glitchlings/zoo/_ocr_confusions.py,sha256=Ju2_avXiwsr1p8zWFUTOzMxJ8vT5PpYobuGIn4L_sqI,1204
24
29
  glitchlings/zoo/_rate.py,sha256=tkIlXHewE8s9w1jpCw8ZzkVN31690FAnvTM_R3dCIpY,3579
25
30
  glitchlings/zoo/_rust_extensions.py,sha256=Bsd0kiPB1rUn5x3k7ykydFuk2YSvXS9CQGPRlE5XzXY,4211
@@ -28,6 +33,7 @@ glitchlings/zoo/_text_utils.py,sha256=fS5L_eq-foBbBdiv4ymI8-O0D0csc3yDekHpX8bqfV
28
33
  glitchlings/zoo/adjax.py,sha256=XT5kKqPOUPgKSDOcR__HBnv4OXtBKee40GuNNmm1GYI,3518
29
34
  glitchlings/zoo/apostrofae.py,sha256=qjpfnxdPWXMNzZnSD7UMfvHyzGKa7TLsvUhMsIvjwj8,3822
30
35
  glitchlings/zoo/core.py,sha256=dRzUTmhOswDV0hWcaD-Sx7rZdPlrszn7C_1G2xd4ECk,20675
36
+ glitchlings/zoo/hokey.py,sha256=71z1JGzKGb_N8Wo7LVuS7qAqH2T0Y0h2LemIw66eprs,5298
31
37
  glitchlings/zoo/jargoyle.py,sha256=2TGU_z8gILwQ-lyZEqvmsrLupxqb8ydlDiwcp-O6WwY,11679
32
38
  glitchlings/zoo/mim1c.py,sha256=-fgodKWZq--Xw8L2t1EqNbsh48bwX5jZxmiXdoaQShI,3437
33
39
  glitchlings/zoo/ocr_confusions.tsv,sha256=KhtR7vJDTITpfTSGa-I7RHr6CK7LkGi2KjdhEWipI6o,183
@@ -39,9 +45,9 @@ glitchlings/zoo/typogre.py,sha256=BQotNL-gn4PXQI9j63d2w9mQ4X6ZJKSJ4de-GN-gmUI,66
39
45
  glitchlings/zoo/zeedub.py,sha256=aNnjZGeTmMqA2WjgtGh7Fgl9pUQo3AZ2B-tYs2ZFOQE,4840
40
46
  glitchlings/zoo/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
47
  glitchlings/zoo/assets/apostrofae_pairs.json,sha256=bfjSEaMTI_axGNJ93nI431KXU0IVp7ayO42gGcMgL6U,521
42
- glitchlings-0.4.4.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
43
- glitchlings-0.4.4.dist-info/METADATA,sha256=onpPJTtANv13MyyvYS930OWiJb_Ipxvje5sTrNmNPQw,32388
44
- glitchlings-0.4.4.dist-info/WHEEL,sha256=o0zAoJUNILGJZxEeFPjb7OMHp_94eqIkZBeZ0gvgOpo,114
45
- glitchlings-0.4.4.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
46
- glitchlings-0.4.4.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
47
- glitchlings-0.4.4.dist-info/RECORD,,
48
+ glitchlings-0.4.5.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
49
+ glitchlings-0.4.5.dist-info/METADATA,sha256=3h0Eb8nUL3NXb67OQBYEnuL-aNs0oJ8-dLVAQDeFfD8,33498
50
+ glitchlings-0.4.5.dist-info/WHEEL,sha256=o0zAoJUNILGJZxEeFPjb7OMHp_94eqIkZBeZ0gvgOpo,114
51
+ glitchlings-0.4.5.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
52
+ glitchlings-0.4.5.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
53
+ glitchlings-0.4.5.dist-info/RECORD,,