glitchlings 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
glitchlings/dlc/prime.py CHANGED
@@ -1,10 +1,59 @@
1
+ """Integration helpers for the optional verifiers prime DLC."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterable, Sequence
1
6
  from enum import Enum
2
- import functools as ft
3
7
 
4
8
  import verifiers as vf
5
- from datasets import Dataset
6
9
 
7
- from ..zoo import Glitchling, Gaggle, Mim1c, Typogre, summon
10
+ try:
11
+ from datasets import Dataset
12
+ except ModuleNotFoundError: # pragma: no cover - optional dependency
13
+ Dataset = object # type: ignore[assignment]
14
+
15
+ from ..zoo import Gaggle, Glitchling, Mim1c, Typogre, summon
16
+
17
+
18
+ def _resolve_environment(env: str | vf.Environment) -> vf.Environment:
19
+ """Return a fully-instantiated verifier environment."""
20
+
21
+ if isinstance(env, str):
22
+ env = vf.load_environment(env)
23
+
24
+ if not isinstance(env, vf.Environment):
25
+ raise TypeError("Invalid environment type")
26
+
27
+ return env
28
+
29
+
30
+ def _resolve_columns(dataset: Dataset, columns: Sequence[str] | None) -> list[str]:
31
+ """Identify which dataset columns should be corrupted."""
32
+
33
+ available = set(dataset.column_names)
34
+
35
+ if columns is not None:
36
+ missing = sorted(set(columns) - available)
37
+ if missing:
38
+ missing_str = ", ".join(missing)
39
+ raise ValueError(f"Columns not found in dataset: {missing_str}")
40
+ return list(columns)
41
+
42
+ for candidate in ("prompt", "question"):
43
+ if candidate in available:
44
+ return [candidate]
45
+
46
+ sample = dataset[0] if len(dataset) else {}
47
+ inferred = [
48
+ name
49
+ for name in dataset.column_names
50
+ if isinstance(sample.get(name), str)
51
+ ]
52
+
53
+ if inferred:
54
+ return inferred
55
+
56
+ raise ValueError("Unable to determine which dataset columns to corrupt.")
8
57
 
9
58
 
10
59
  class Difficulty(Enum):
@@ -18,35 +67,47 @@ class Difficulty(Enum):
18
67
 
19
68
 
20
69
  def tutorial_level(
21
- env: vf.Environment | str, seed=151, difficulty: Difficulty = Difficulty.Normal
70
+ env: vf.Environment | str,
71
+ seed: int = 151,
72
+ difficulty: Difficulty = Difficulty.Normal,
22
73
  ) -> vf.Environment:
23
- """Create a low-corruption environment."""
74
+ """Create a low-corruption environment using tuned defaults."""
24
75
 
25
76
  tuned_mim1c = Mim1c(replacement_rate=0.01 * difficulty.value)
26
77
  tuned_typogre = Typogre(max_change_rate=0.025 * difficulty.value)
27
78
 
28
- glitchlings: Gaggle = summon([tuned_mim1c, tuned_typogre], seed=seed)
79
+ return load_environment(
80
+ env,
81
+ glitchlings=[tuned_mim1c, tuned_typogre],
82
+ seed=seed,
83
+ )
29
84
 
30
- if isinstance(env, str):
31
- env = vf.load_environment(env)
32
85
 
33
- assert isinstance(env, vf.Environment), "Invalid environment type"
86
+ def load_environment(
87
+ env: str | vf.Environment,
88
+ glitchlings: Iterable[str | Glitchling] | Glitchling | str | Gaggle | None = None,
89
+ *,
90
+ seed: int = 151,
91
+ columns: Sequence[str] | None = None,
92
+ ) -> vf.Environment:
93
+ """Load an environment and optionally corrupt it with glitchlings."""
94
+
95
+ environment = _resolve_environment(env)
34
96
 
35
- if "prompt" in env.dataset.column_names:
36
- env.dataset = glitchlings.corrupt_dataset(env.dataset, ["prompt"])
37
- elif "question" in env.dataset.column_names:
38
- env.dataset = glitchlings.corrupt_dataset(env.dataset, ["question"])
39
- else:
40
- raise ValueError("Can't find prompt or question column")
97
+ if glitchlings is None:
98
+ return environment
41
99
 
42
- return env
100
+ if isinstance(glitchlings, Gaggle):
101
+ gaggle = glitchlings
102
+ else:
103
+ if isinstance(glitchlings, (Glitchling, str)):
104
+ resolved = [glitchlings]
105
+ else:
106
+ resolved = list(glitchlings)
43
107
 
108
+ gaggle = summon(resolved, seed=seed)
44
109
 
45
- def load_environment(
46
- env: str | vf.Environment,
47
- seed=151,
48
- difficulty: Difficulty = Difficulty.Normal,
49
- loader=tutorial_level,
50
- ) -> vf.Environment:
51
- """Load an environment by name."""
52
- return loader(env, seed=seed, difficulty=difficulty)
110
+ dataset = environment.dataset
111
+ corrupt_columns = _resolve_columns(dataset, columns)
112
+ environment.dataset = gaggle.corrupt_dataset(dataset, corrupt_columns)
113
+ return environment
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Monsters for your language games.
5
5
  Project-URL: Homepage, https://github.com/osoleve/glitchlings
6
6
  Project-URL: Repository, https://github.com/osoleve/glitchlings.git
@@ -225,6 +225,7 @@ Requires-Dist: datasets>=4.0.0
225
225
  Requires-Dist: jellyfish>=1.2.0
226
226
  Requires-Dist: nltk>=3.9.1
227
227
  Provides-Extra: dev
228
+ Requires-Dist: hypothesis>=6.100.0; extra == 'dev'
228
229
  Requires-Dist: pytest>=8.0.0; extra == 'dev'
229
230
  Provides-Extra: prime
230
231
  Requires-Dist: verifiers>=0.1.3.post0; extra == 'prime'
@@ -283,6 +284,30 @@ print(gaggle(SAMPLE_TEXT))
283
284
 
284
285
  > Onҽ m‎ھ‎rning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin‎٠‎ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
285
286
 
287
+ ## Usage
288
+
289
+ Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
290
+
291
+ - **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
292
+ - **Dataset corruption** – Use a `Gaggle`'s `.corrupt_dataset` helper to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
293
+
294
+ ### Prime Intellect environments
295
+
296
+ After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
297
+
298
+ ```python
299
+ from glitchlings import Mim1c, Typogre
300
+ from glitchlings.dlc.prime import load_environment
301
+
302
+ env = load_environment(
303
+ "osoleve/syllabify-en",
304
+ glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
305
+ seed=404,
306
+ )
307
+ ```
308
+
309
+ Skip the `glitchlings` argument to receive an untouched verifier dataset.
310
+
286
311
  ## Motivation
287
312
 
288
313
  If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
@@ -2,7 +2,7 @@ glitchlings/__init__.py,sha256=yD0BaldUpcc_QlHVca1z1iwpOp8ne1H9YVQHc85d1So,580
2
2
  glitchlings/__main__.py,sha256=EOiBgay0x6B9VlSDzSQvMuoq6bHJdSvFSgcAVGGKkd4,121
3
3
  glitchlings/main.py,sha256=1pdVqytcrkh_GxOb0UPnZ0NzYKMoUnXmAWQB4cY5SEg,6199
4
4
  glitchlings/dlc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- glitchlings/dlc/prime.py,sha256=WnLIon2WbdPGx_PK4vF6nOwJICXudZ6zKGR1hVES4Oc,1452
5
+ glitchlings/dlc/prime.py,sha256=3ugrF7SQTh64JTiH3F3Xii2m1eybRaVRX5sqd1WxrME,3078
6
6
  glitchlings/util/__init__.py,sha256=OCpWFtloU-sATBv2XpBGlkR7UFR6RemUtuCheuRA4yw,4018
7
7
  glitchlings/zoo/__init__.py,sha256=hXQci2tysMoRHXiR6NDkWtGkKgcO0xxsMB91eiM_Llc,1344
8
8
  glitchlings/zoo/core.py,sha256=5f9pWBZZSDADiUSs-xUahIqCEb9EUq-YcR_N5HzBAw0,8021
@@ -13,8 +13,8 @@ glitchlings/zoo/reduple.py,sha256=ML4TLQNfOkSaF7G9Sjy_i9ILB4FIl1I101CIppNGmOw,27
13
13
  glitchlings/zoo/rushmore.py,sha256=FH-pHnj1XKFzLRRQIHOojTkbkCpipNKnxSfxP9UGYZI,2528
14
14
  glitchlings/zoo/scannequin.py,sha256=4QP_dpReUxno0mk5Hnn2uCfd3B6eDa7ZGePuW1dyqBU,4630
15
15
  glitchlings/zoo/typogre.py,sha256=8aYULO4nvdyFDsknAfrlQYKeWz_Tgh5uXAkF3omHe0o,5358
16
- glitchlings-0.1.3.dist-info/METADATA,sha256=fwqJfu1FrQwJfAnc5UQIaaN3L7er_FWek0cMzRFSVuw,24978
17
- glitchlings-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- glitchlings-0.1.3.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
19
- glitchlings-0.1.3.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
20
- glitchlings-0.1.3.dist-info/RECORD,,
16
+ glitchlings-0.1.4.dist-info/METADATA,sha256=tbagFiEgfGaqU2DrARiOsUJKlwGWaUFXVIk7Flcgd7M,26059
17
+ glitchlings-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ glitchlings-0.1.4.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
19
+ glitchlings-0.1.4.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
20
+ glitchlings-0.1.4.dist-info/RECORD,,