glitchlings 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/dlc/prime.py +85 -24
- {glitchlings-0.1.3.dist-info → glitchlings-0.1.4.dist-info}/METADATA +26 -1
- {glitchlings-0.1.3.dist-info → glitchlings-0.1.4.dist-info}/RECORD +6 -6
- {glitchlings-0.1.3.dist-info → glitchlings-0.1.4.dist-info}/WHEEL +0 -0
- {glitchlings-0.1.3.dist-info → glitchlings-0.1.4.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.1.3.dist-info → glitchlings-0.1.4.dist-info}/licenses/LICENSE +0 -0
glitchlings/dlc/prime.py
CHANGED
@@ -1,10 +1,59 @@
|
|
1
|
+
"""Integration helpers for the optional verifiers prime DLC."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from collections.abc import Iterable, Sequence
|
1
6
|
from enum import Enum
|
2
|
-
import functools as ft
|
3
7
|
|
4
8
|
import verifiers as vf
|
5
|
-
from datasets import Dataset
|
6
9
|
|
7
|
-
|
10
|
+
try:
|
11
|
+
from datasets import Dataset
|
12
|
+
except ModuleNotFoundError: # pragma: no cover - optional dependency
|
13
|
+
Dataset = object # type: ignore[assignment]
|
14
|
+
|
15
|
+
from ..zoo import Gaggle, Glitchling, Mim1c, Typogre, summon
|
16
|
+
|
17
|
+
|
18
|
+
def _resolve_environment(env: str | vf.Environment) -> vf.Environment:
|
19
|
+
"""Return a fully-instantiated verifier environment."""
|
20
|
+
|
21
|
+
if isinstance(env, str):
|
22
|
+
env = vf.load_environment(env)
|
23
|
+
|
24
|
+
if not isinstance(env, vf.Environment):
|
25
|
+
raise TypeError("Invalid environment type")
|
26
|
+
|
27
|
+
return env
|
28
|
+
|
29
|
+
|
30
|
+
def _resolve_columns(dataset: Dataset, columns: Sequence[str] | None) -> list[str]:
|
31
|
+
"""Identify which dataset columns should be corrupted."""
|
32
|
+
|
33
|
+
available = set(dataset.column_names)
|
34
|
+
|
35
|
+
if columns is not None:
|
36
|
+
missing = sorted(set(columns) - available)
|
37
|
+
if missing:
|
38
|
+
missing_str = ", ".join(missing)
|
39
|
+
raise ValueError(f"Columns not found in dataset: {missing_str}")
|
40
|
+
return list(columns)
|
41
|
+
|
42
|
+
for candidate in ("prompt", "question"):
|
43
|
+
if candidate in available:
|
44
|
+
return [candidate]
|
45
|
+
|
46
|
+
sample = dataset[0] if len(dataset) else {}
|
47
|
+
inferred = [
|
48
|
+
name
|
49
|
+
for name in dataset.column_names
|
50
|
+
if isinstance(sample.get(name), str)
|
51
|
+
]
|
52
|
+
|
53
|
+
if inferred:
|
54
|
+
return inferred
|
55
|
+
|
56
|
+
raise ValueError("Unable to determine which dataset columns to corrupt.")
|
8
57
|
|
9
58
|
|
10
59
|
class Difficulty(Enum):
|
@@ -18,35 +67,47 @@ class Difficulty(Enum):
|
|
18
67
|
|
19
68
|
|
20
69
|
def tutorial_level(
|
21
|
-
env: vf.Environment | str,
|
70
|
+
env: vf.Environment | str,
|
71
|
+
seed: int = 151,
|
72
|
+
difficulty: Difficulty = Difficulty.Normal,
|
22
73
|
) -> vf.Environment:
|
23
|
-
"""Create a low-corruption environment."""
|
74
|
+
"""Create a low-corruption environment using tuned defaults."""
|
24
75
|
|
25
76
|
tuned_mim1c = Mim1c(replacement_rate=0.01 * difficulty.value)
|
26
77
|
tuned_typogre = Typogre(max_change_rate=0.025 * difficulty.value)
|
27
78
|
|
28
|
-
|
79
|
+
return load_environment(
|
80
|
+
env,
|
81
|
+
glitchlings=[tuned_mim1c, tuned_typogre],
|
82
|
+
seed=seed,
|
83
|
+
)
|
29
84
|
|
30
|
-
if isinstance(env, str):
|
31
|
-
env = vf.load_environment(env)
|
32
85
|
|
33
|
-
|
86
|
+
def load_environment(
|
87
|
+
env: str | vf.Environment,
|
88
|
+
glitchlings: Iterable[str | Glitchling] | Glitchling | str | Gaggle | None = None,
|
89
|
+
*,
|
90
|
+
seed: int = 151,
|
91
|
+
columns: Sequence[str] | None = None,
|
92
|
+
) -> vf.Environment:
|
93
|
+
"""Load an environment and optionally corrupt it with glitchlings."""
|
94
|
+
|
95
|
+
environment = _resolve_environment(env)
|
34
96
|
|
35
|
-
if
|
36
|
-
|
37
|
-
elif "question" in env.dataset.column_names:
|
38
|
-
env.dataset = glitchlings.corrupt_dataset(env.dataset, ["question"])
|
39
|
-
else:
|
40
|
-
raise ValueError("Can't find prompt or question column")
|
97
|
+
if glitchlings is None:
|
98
|
+
return environment
|
41
99
|
|
42
|
-
|
100
|
+
if isinstance(glitchlings, Gaggle):
|
101
|
+
gaggle = glitchlings
|
102
|
+
else:
|
103
|
+
if isinstance(glitchlings, (Glitchling, str)):
|
104
|
+
resolved = [glitchlings]
|
105
|
+
else:
|
106
|
+
resolved = list(glitchlings)
|
43
107
|
|
108
|
+
gaggle = summon(resolved, seed=seed)
|
44
109
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
loader=tutorial_level,
|
50
|
-
) -> vf.Environment:
|
51
|
-
"""Load an environment by name."""
|
52
|
-
return loader(env, seed=seed, difficulty=difficulty)
|
110
|
+
dataset = environment.dataset
|
111
|
+
corrupt_columns = _resolve_columns(dataset, columns)
|
112
|
+
environment.dataset = gaggle.corrupt_dataset(dataset, corrupt_columns)
|
113
|
+
return environment
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: glitchlings
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.4
|
4
4
|
Summary: Monsters for your language games.
|
5
5
|
Project-URL: Homepage, https://github.com/osoleve/glitchlings
|
6
6
|
Project-URL: Repository, https://github.com/osoleve/glitchlings.git
|
@@ -225,6 +225,7 @@ Requires-Dist: datasets>=4.0.0
|
|
225
225
|
Requires-Dist: jellyfish>=1.2.0
|
226
226
|
Requires-Dist: nltk>=3.9.1
|
227
227
|
Provides-Extra: dev
|
228
|
+
Requires-Dist: hypothesis>=6.100.0; extra == 'dev'
|
228
229
|
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
229
230
|
Provides-Extra: prime
|
230
231
|
Requires-Dist: verifiers>=0.1.3.post0; extra == 'prime'
|
@@ -283,6 +284,30 @@ print(gaggle(SAMPLE_TEXT))
|
|
283
284
|
|
284
285
|
> Onҽ mھrning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin٠ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
|
285
286
|
|
287
|
+
## Usage
|
288
|
+
|
289
|
+
Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
|
290
|
+
|
291
|
+
- **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
|
292
|
+
- **Dataset corruption** – Use a `Gaggle`'s `.corrupt_dataset` helper to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
|
293
|
+
|
294
|
+
### Prime Intellect environments
|
295
|
+
|
296
|
+
After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
|
297
|
+
|
298
|
+
```python
|
299
|
+
from glitchlings import Mim1c, Typogre
|
300
|
+
from glitchlings.dlc.prime import load_environment
|
301
|
+
|
302
|
+
env = load_environment(
|
303
|
+
"osoleve/syllabify-en",
|
304
|
+
glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
|
305
|
+
seed=404,
|
306
|
+
)
|
307
|
+
```
|
308
|
+
|
309
|
+
Skip the `glitchlings` argument to receive an untouched verifier dataset.
|
310
|
+
|
286
311
|
## Motivation
|
287
312
|
|
288
313
|
If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
|
@@ -2,7 +2,7 @@ glitchlings/__init__.py,sha256=yD0BaldUpcc_QlHVca1z1iwpOp8ne1H9YVQHc85d1So,580
|
|
2
2
|
glitchlings/__main__.py,sha256=EOiBgay0x6B9VlSDzSQvMuoq6bHJdSvFSgcAVGGKkd4,121
|
3
3
|
glitchlings/main.py,sha256=1pdVqytcrkh_GxOb0UPnZ0NzYKMoUnXmAWQB4cY5SEg,6199
|
4
4
|
glitchlings/dlc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
glitchlings/dlc/prime.py,sha256=
|
5
|
+
glitchlings/dlc/prime.py,sha256=3ugrF7SQTh64JTiH3F3Xii2m1eybRaVRX5sqd1WxrME,3078
|
6
6
|
glitchlings/util/__init__.py,sha256=OCpWFtloU-sATBv2XpBGlkR7UFR6RemUtuCheuRA4yw,4018
|
7
7
|
glitchlings/zoo/__init__.py,sha256=hXQci2tysMoRHXiR6NDkWtGkKgcO0xxsMB91eiM_Llc,1344
|
8
8
|
glitchlings/zoo/core.py,sha256=5f9pWBZZSDADiUSs-xUahIqCEb9EUq-YcR_N5HzBAw0,8021
|
@@ -13,8 +13,8 @@ glitchlings/zoo/reduple.py,sha256=ML4TLQNfOkSaF7G9Sjy_i9ILB4FIl1I101CIppNGmOw,27
|
|
13
13
|
glitchlings/zoo/rushmore.py,sha256=FH-pHnj1XKFzLRRQIHOojTkbkCpipNKnxSfxP9UGYZI,2528
|
14
14
|
glitchlings/zoo/scannequin.py,sha256=4QP_dpReUxno0mk5Hnn2uCfd3B6eDa7ZGePuW1dyqBU,4630
|
15
15
|
glitchlings/zoo/typogre.py,sha256=8aYULO4nvdyFDsknAfrlQYKeWz_Tgh5uXAkF3omHe0o,5358
|
16
|
-
glitchlings-0.1.
|
17
|
-
glitchlings-0.1.
|
18
|
-
glitchlings-0.1.
|
19
|
-
glitchlings-0.1.
|
20
|
-
glitchlings-0.1.
|
16
|
+
glitchlings-0.1.4.dist-info/METADATA,sha256=tbagFiEgfGaqU2DrARiOsUJKlwGWaUFXVIk7Flcgd7M,26059
|
17
|
+
glitchlings-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
glitchlings-0.1.4.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
|
19
|
+
glitchlings-0.1.4.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
|
20
|
+
glitchlings-0.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|