glitchlings 0.2.5__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {glitchlings-0.2.5 → glitchlings-0.2.6}/PKG-INFO +2 -2
- {glitchlings-0.2.5 → glitchlings-0.2.6}/README.md +1 -1
- {glitchlings-0.2.5 → glitchlings-0.2.6}/pyproject.toml +1 -1
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/dlc/prime.py +18 -1
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/core.py +12 -4
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/redactyl.py +4 -1
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/PKG-INFO +2 -2
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/SOURCES.txt +1 -0
- glitchlings-0.2.6/tests/test_benchmarks.py +60 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_prime_echo_chamber.py +24 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_rust_backed_glitchlings.py +24 -3
- {glitchlings-0.2.5 → glitchlings-0.2.6}/LICENSE +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/MANIFEST.in +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/Cargo.lock +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/Cargo.toml +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/Cargo.toml +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/assets/ocr_confusions.tsv +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/build.rs +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/glitch_ops.rs +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/lib.rs +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/pipeline.rs +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/resources.rs +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/rng.rs +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/text_buffer.rs +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/typogre.rs +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/zeedub.rs +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/setup.cfg +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/__init__.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/__main__.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/dlc/__init__.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/dlc/huggingface.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/main.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/util/__init__.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/__init__.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/_ocr_confusions.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/_rate.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/jargoyle.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/mim1c.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/ocr_confusions.tsv +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/reduple.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/rushmore.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/scannequin.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/typogre.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/zeedub.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/dependency_links.txt +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/entry_points.txt +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/requires.txt +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/top_level.txt +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_cli.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_dataset_corruption.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_gaggle.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_glitchling_core.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_glitchlings_determinism.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_huggingface_dlc.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_jargoyle.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_keyboard_layouts.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_parameter_effects.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_property_based.py +0 -0
- {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_util.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: glitchlings
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.6
|
4
4
|
Summary: Monsters for your language games.
|
5
5
|
Author: osoleve
|
6
6
|
License: Apache License
|
@@ -296,7 +296,7 @@ print(gaggle(SAMPLE_TEXT))
|
|
296
296
|
|
297
297
|
Consult the [Glitchlings Usage Guide](docs/index.md)
|
298
298
|
for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
|
299
|
-
integrations, and the
|
299
|
+
integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
|
300
300
|
|
301
301
|
## Motivation
|
302
302
|
|
@@ -55,7 +55,7 @@ print(gaggle(SAMPLE_TEXT))
|
|
55
55
|
|
56
56
|
Consult the [Glitchlings Usage Guide](docs/index.md)
|
57
57
|
for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
|
58
|
-
integrations, and the
|
58
|
+
integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
|
59
59
|
|
60
60
|
## Motivation
|
61
61
|
|
@@ -49,7 +49,24 @@ def _resolve_columns(dataset: Dataset, columns: Sequence[str] | None) -> list[st
|
|
49
49
|
if candidate in available:
|
50
50
|
return [candidate]
|
51
51
|
|
52
|
-
|
52
|
+
try:
|
53
|
+
dataset_length = len(dataset) # type: ignore[arg-type]
|
54
|
+
except TypeError:
|
55
|
+
preview_rows: list[dict[str, Any]]
|
56
|
+
take_fn = getattr(dataset, "take", None)
|
57
|
+
if callable(take_fn):
|
58
|
+
preview_rows = list(take_fn(1))
|
59
|
+
else:
|
60
|
+
iterator = iter(dataset)
|
61
|
+
try:
|
62
|
+
first_row = next(iterator)
|
63
|
+
except StopIteration:
|
64
|
+
preview_rows = []
|
65
|
+
else:
|
66
|
+
preview_rows = [first_row]
|
67
|
+
sample = dict(preview_rows[0]) if preview_rows else {}
|
68
|
+
else:
|
69
|
+
sample = dataset[0] if dataset_length else {}
|
53
70
|
inferred = [
|
54
71
|
name
|
55
72
|
for name in dataset.column_names
|
@@ -27,17 +27,25 @@ log = logging.getLogger(__name__)
|
|
27
27
|
|
28
28
|
|
29
29
|
_PIPELINE_FEATURE_FLAG_ENV = "GLITCHLINGS_RUST_PIPELINE"
|
30
|
+
_PIPELINE_ENABLE_VALUES = {"1", "true", "yes", "on"}
|
31
|
+
_PIPELINE_DISABLE_VALUES = {"0", "false", "no", "off"}
|
30
32
|
|
31
33
|
|
32
34
|
def _pipeline_feature_flag_enabled() -> bool:
|
33
|
-
"""Return ``True`` when the environment explicitly
|
35
|
+
"""Return ``True`` when the environment does not explicitly disable the Rust pipeline."""
|
34
36
|
|
35
37
|
value = os.environ.get(_PIPELINE_FEATURE_FLAG_ENV)
|
36
38
|
if value is None:
|
37
|
-
return
|
39
|
+
return True
|
38
40
|
|
39
41
|
normalized = value.strip().lower()
|
40
|
-
|
42
|
+
if normalized in _PIPELINE_DISABLE_VALUES:
|
43
|
+
return False
|
44
|
+
|
45
|
+
if normalized in _PIPELINE_ENABLE_VALUES:
|
46
|
+
return True
|
47
|
+
|
48
|
+
return True
|
41
49
|
|
42
50
|
if TYPE_CHECKING: # pragma: no cover - typing only
|
43
51
|
from datasets import Dataset # type: ignore
|
@@ -356,7 +364,7 @@ class Gaggle(Glitchling):
|
|
356
364
|
|
357
365
|
@staticmethod
|
358
366
|
def rust_pipeline_enabled() -> bool:
|
359
|
-
"""Return ``True`` when the Rust pipeline is available and
|
367
|
+
"""Return ``True`` when the Rust pipeline is available and not explicitly disabled."""
|
360
368
|
|
361
369
|
return Gaggle.rust_pipeline_supported() and _pipeline_feature_flag_enabled()
|
362
370
|
|
@@ -86,7 +86,10 @@ def _python_redact_words(
|
|
86
86
|
if core_length <= 0:
|
87
87
|
core_length = 1
|
88
88
|
weights.append(1.0 if unweighted else float(core_length))
|
89
|
-
|
89
|
+
raw_quota = len(word_indices) * rate
|
90
|
+
num_to_redact = int(raw_quota)
|
91
|
+
if rate > 0:
|
92
|
+
num_to_redact = max(1, num_to_redact)
|
90
93
|
if num_to_redact > len(word_indices):
|
91
94
|
raise ValueError("Sample larger than population or is negative")
|
92
95
|
indices_to_redact = _weighted_sample_without_replacement(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: glitchlings
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.6
|
4
4
|
Summary: Monsters for your language games.
|
5
5
|
Author: osoleve
|
6
6
|
License: Apache License
|
@@ -296,7 +296,7 @@ print(gaggle(SAMPLE_TEXT))
|
|
296
296
|
|
297
297
|
Consult the [Glitchlings Usage Guide](docs/index.md)
|
298
298
|
for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
|
299
|
-
integrations, and the
|
299
|
+
integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
|
300
300
|
|
301
301
|
## Motivation
|
302
302
|
|
@@ -0,0 +1,60 @@
|
|
1
|
+
"""Regression checks for the benchmarking utilities."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from collections.abc import Mapping
|
6
|
+
from pathlib import Path
|
7
|
+
import sys
|
8
|
+
|
9
|
+
import pytest
|
10
|
+
|
11
|
+
ROOT = Path(__file__).resolve().parents[1]
|
12
|
+
if str(ROOT) not in sys.path:
|
13
|
+
sys.path.insert(0, str(ROOT))
|
14
|
+
|
15
|
+
from benchmarks.pipeline_benchmark import (
|
16
|
+
BenchmarkResult,
|
17
|
+
collect_benchmark_results,
|
18
|
+
)
|
19
|
+
|
20
|
+
|
21
|
+
@pytest.fixture(scope="module")
|
22
|
+
def benchmark_results() -> Mapping[str, BenchmarkResult]:
|
23
|
+
"""Collect a small sample of benchmark data once per test run."""
|
24
|
+
|
25
|
+
results = collect_benchmark_results(iterations=5)
|
26
|
+
return {result.label: result for result in results}
|
27
|
+
|
28
|
+
|
29
|
+
def test_collect_benchmark_results_structure(
|
30
|
+
benchmark_results: Mapping[str, BenchmarkResult],
|
31
|
+
) -> None:
|
32
|
+
"""Top-level sanity check that the benchmark harness returns populated results."""
|
33
|
+
|
34
|
+
assert benchmark_results
|
35
|
+
assert {"short", "medium", "long"}.issubset(benchmark_results.keys())
|
36
|
+
for result in benchmark_results.values():
|
37
|
+
assert result.char_count > 0
|
38
|
+
assert result.python.mean_seconds >= 0
|
39
|
+
assert result.python.stdev_seconds >= 0
|
40
|
+
|
41
|
+
|
42
|
+
@pytest.mark.parametrize(
|
43
|
+
("label", "threshold"),
|
44
|
+
[
|
45
|
+
("short", 0.01),
|
46
|
+
("medium", 0.03),
|
47
|
+
("long", 0.1),
|
48
|
+
],
|
49
|
+
)
|
50
|
+
def test_python_pipeline_regression_guard(
|
51
|
+
benchmark_results: Mapping[str, BenchmarkResult],
|
52
|
+
label: str,
|
53
|
+
threshold: float,
|
54
|
+
) -> None:
|
55
|
+
"""Fail fast if the Python pipeline slows down dramatically on canonical samples."""
|
56
|
+
|
57
|
+
mean_seconds = benchmark_results[label].python.mean_seconds
|
58
|
+
assert mean_seconds <= threshold, (
|
59
|
+
f"Python pipeline mean for '{label}' text exceeded {threshold:.3f}s: {mean_seconds:.3f}s"
|
60
|
+
)
|
@@ -179,6 +179,30 @@ class _RecordingGaggle:
|
|
179
179
|
return dataset
|
180
180
|
|
181
181
|
|
182
|
+
|
183
|
+
def test_prime_resolve_columns_handles_streaming_dataset():
|
184
|
+
row = {"context": "alpha", "score": 1, "response": "beta"}
|
185
|
+
|
186
|
+
class StreamingDataset:
|
187
|
+
def __init__(self):
|
188
|
+
self.column_names = ["context", "score", "response"]
|
189
|
+
|
190
|
+
def __len__(self):
|
191
|
+
raise TypeError("Streaming dataset does not define __len__.")
|
192
|
+
|
193
|
+
def __getitem__(self, index):
|
194
|
+
raise TypeError("Streaming dataset does not support indexing.")
|
195
|
+
|
196
|
+
def take(self, n):
|
197
|
+
return [row][:n]
|
198
|
+
|
199
|
+
def __iter__(self):
|
200
|
+
return iter([row])
|
201
|
+
|
202
|
+
inferred = prime._resolve_columns(StreamingDataset(), None)
|
203
|
+
|
204
|
+
assert inferred == ["context", "response"]
|
205
|
+
|
182
206
|
def test_load_environment_respects_explicit_columns(monkeypatch):
|
183
207
|
dataset = Dataset.from_dict({"prompt": ["alpha"], "extra": ["beta"]})
|
184
208
|
stub = _RecordingGaggle()
|
@@ -249,6 +249,23 @@ def test_redactyl_merge_adjacent_blocks():
|
|
249
249
|
assert result == expected == "█████████████████"
|
250
250
|
|
251
251
|
|
252
|
+
|
253
|
+
def test_redactyl_zero_rate_is_noop(monkeypatch):
|
254
|
+
text = "alpha beta gamma"
|
255
|
+
monkeypatch.setattr(redactyl_module, "_redact_words_rust", None, raising=False)
|
256
|
+
|
257
|
+
result = redactyl_module.redact_words(text, rate=0.0, seed=42)
|
258
|
+
assert result == text
|
259
|
+
|
260
|
+
python_result = redactyl_module._python_redact_words(
|
261
|
+
text,
|
262
|
+
replacement_char=redactyl_module.FULL_BLOCK,
|
263
|
+
rate=0.0,
|
264
|
+
merge_adjacent=False,
|
265
|
+
rng=random.Random(42),
|
266
|
+
)
|
267
|
+
assert python_result == text
|
268
|
+
|
252
269
|
def test_redactyl_empty_text_raises_value_error():
|
253
270
|
message = "contains no redactable words"
|
254
271
|
with pytest.raises(ValueError, match=message):
|
@@ -425,9 +442,9 @@ def test_gaggle_python_fallback_when_pipeline_disabled(monkeypatch):
|
|
425
442
|
pytest.importorskip("glitchlings._zoo_rust")
|
426
443
|
|
427
444
|
def _fail(*_args: object, **_kwargs: object) -> str:
|
428
|
-
raise AssertionError("Rust pipeline should not run when
|
445
|
+
raise AssertionError("Rust pipeline should not run when explicitly disabled")
|
429
446
|
|
430
|
-
monkeypatch.
|
447
|
+
monkeypatch.setenv("GLITCHLINGS_RUST_PIPELINE", "0")
|
431
448
|
monkeypatch.setattr(core_module, "_compose_glitchlings_rust", _fail, raising=False)
|
432
449
|
|
433
450
|
gaggle = core_module.Gaggle(
|
@@ -500,10 +517,14 @@ def test_pipeline_falls_back_for_incomplete_operation(monkeypatch):
|
|
500
517
|
|
501
518
|
def test_rust_pipeline_feature_flag_introspection(monkeypatch):
|
502
519
|
monkeypatch.delenv("GLITCHLINGS_RUST_PIPELINE", raising=False)
|
503
|
-
assert
|
520
|
+
assert core_module._pipeline_feature_flag_enabled()
|
504
521
|
assert core_module.Gaggle.rust_pipeline_supported() is (
|
505
522
|
core_module._compose_glitchlings_rust is not None
|
506
523
|
)
|
524
|
+
assert core_module.Gaggle.rust_pipeline_enabled() is core_module.Gaggle.rust_pipeline_supported()
|
525
|
+
|
526
|
+
monkeypatch.setenv("GLITCHLINGS_RUST_PIPELINE", "0")
|
527
|
+
assert not core_module._pipeline_feature_flag_enabled()
|
507
528
|
assert not core_module.Gaggle.rust_pipeline_enabled()
|
508
529
|
|
509
530
|
monkeypatch.setenv("GLITCHLINGS_RUST_PIPELINE", "1")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|