glitchlings 0.2.5__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {glitchlings-0.2.5 → glitchlings-0.2.6}/PKG-INFO +2 -2
  2. {glitchlings-0.2.5 → glitchlings-0.2.6}/README.md +1 -1
  3. {glitchlings-0.2.5 → glitchlings-0.2.6}/pyproject.toml +1 -1
  4. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/dlc/prime.py +18 -1
  5. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/core.py +12 -4
  6. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/redactyl.py +4 -1
  7. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/PKG-INFO +2 -2
  8. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/SOURCES.txt +1 -0
  9. glitchlings-0.2.6/tests/test_benchmarks.py +60 -0
  10. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_prime_echo_chamber.py +24 -0
  11. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_rust_backed_glitchlings.py +24 -3
  12. {glitchlings-0.2.5 → glitchlings-0.2.6}/LICENSE +0 -0
  13. {glitchlings-0.2.5 → glitchlings-0.2.6}/MANIFEST.in +0 -0
  14. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/Cargo.lock +0 -0
  15. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/Cargo.toml +0 -0
  16. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/Cargo.toml +0 -0
  17. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/assets/ocr_confusions.tsv +0 -0
  18. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/build.rs +0 -0
  19. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/glitch_ops.rs +0 -0
  20. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/lib.rs +0 -0
  21. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/pipeline.rs +0 -0
  22. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/resources.rs +0 -0
  23. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/rng.rs +0 -0
  24. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/text_buffer.rs +0 -0
  25. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/typogre.rs +0 -0
  26. {glitchlings-0.2.5 → glitchlings-0.2.6}/rust/zoo/src/zeedub.rs +0 -0
  27. {glitchlings-0.2.5 → glitchlings-0.2.6}/setup.cfg +0 -0
  28. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/__init__.py +0 -0
  29. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/__main__.py +0 -0
  30. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/dlc/__init__.py +0 -0
  31. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/dlc/huggingface.py +0 -0
  32. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/main.py +0 -0
  33. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/util/__init__.py +0 -0
  34. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/__init__.py +0 -0
  35. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/_ocr_confusions.py +0 -0
  36. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/_rate.py +0 -0
  37. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/jargoyle.py +0 -0
  38. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/mim1c.py +0 -0
  39. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/ocr_confusions.tsv +0 -0
  40. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/reduple.py +0 -0
  41. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/rushmore.py +0 -0
  42. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/scannequin.py +0 -0
  43. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/typogre.py +0 -0
  44. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings/zoo/zeedub.py +0 -0
  45. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/dependency_links.txt +0 -0
  46. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/entry_points.txt +0 -0
  47. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/requires.txt +0 -0
  48. {glitchlings-0.2.5 → glitchlings-0.2.6}/src/glitchlings.egg-info/top_level.txt +0 -0
  49. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_cli.py +0 -0
  50. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_dataset_corruption.py +0 -0
  51. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_gaggle.py +0 -0
  52. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_glitchling_core.py +0 -0
  53. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_glitchlings_determinism.py +0 -0
  54. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_huggingface_dlc.py +0 -0
  55. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_jargoyle.py +0 -0
  56. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_keyboard_layouts.py +0 -0
  57. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_parameter_effects.py +0 -0
  58. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_property_based.py +0 -0
  59. {glitchlings-0.2.5 → glitchlings-0.2.6}/tests/test_util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -296,7 +296,7 @@ print(gaggle(SAMPLE_TEXT))
296
296
 
297
297
  Consult the [Glitchlings Usage Guide](docs/index.md)
298
298
  for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
299
- integrations, and the feature-flagged Rust pipeline.
299
+ integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
300
300
 
301
301
  ## Motivation
302
302
 
@@ -55,7 +55,7 @@ print(gaggle(SAMPLE_TEXT))
55
55
 
56
56
  Consult the [Glitchlings Usage Guide](docs/index.md)
57
57
  for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
58
- integrations, and the feature-flagged Rust pipeline.
58
+ integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
59
59
 
60
60
  ## Motivation
61
61
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "glitchlings"
3
- version = "0.2.5"
3
+ version = "0.2.6"
4
4
  description = "Monsters for your language games."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -49,7 +49,24 @@ def _resolve_columns(dataset: Dataset, columns: Sequence[str] | None) -> list[st
49
49
  if candidate in available:
50
50
  return [candidate]
51
51
 
52
- sample = dataset[0] if len(dataset) else {}
52
+ try:
53
+ dataset_length = len(dataset) # type: ignore[arg-type]
54
+ except TypeError:
55
+ preview_rows: list[dict[str, Any]]
56
+ take_fn = getattr(dataset, "take", None)
57
+ if callable(take_fn):
58
+ preview_rows = list(take_fn(1))
59
+ else:
60
+ iterator = iter(dataset)
61
+ try:
62
+ first_row = next(iterator)
63
+ except StopIteration:
64
+ preview_rows = []
65
+ else:
66
+ preview_rows = [first_row]
67
+ sample = dict(preview_rows[0]) if preview_rows else {}
68
+ else:
69
+ sample = dataset[0] if dataset_length else {}
53
70
  inferred = [
54
71
  name
55
72
  for name in dataset.column_names
@@ -27,17 +27,25 @@ log = logging.getLogger(__name__)
27
27
 
28
28
 
29
29
  _PIPELINE_FEATURE_FLAG_ENV = "GLITCHLINGS_RUST_PIPELINE"
30
+ _PIPELINE_ENABLE_VALUES = {"1", "true", "yes", "on"}
31
+ _PIPELINE_DISABLE_VALUES = {"0", "false", "no", "off"}
30
32
 
31
33
 
32
34
  def _pipeline_feature_flag_enabled() -> bool:
33
- """Return ``True`` when the environment explicitly opts into the Rust pipeline."""
35
+ """Return ``True`` when the environment does not explicitly disable the Rust pipeline."""
34
36
 
35
37
  value = os.environ.get(_PIPELINE_FEATURE_FLAG_ENV)
36
38
  if value is None:
37
- return False
39
+ return True
38
40
 
39
41
  normalized = value.strip().lower()
40
- return normalized in {"1", "true", "yes", "on"}
42
+ if normalized in _PIPELINE_DISABLE_VALUES:
43
+ return False
44
+
45
+ if normalized in _PIPELINE_ENABLE_VALUES:
46
+ return True
47
+
48
+ return True
41
49
 
42
50
  if TYPE_CHECKING: # pragma: no cover - typing only
43
51
  from datasets import Dataset # type: ignore
@@ -356,7 +364,7 @@ class Gaggle(Glitchling):
356
364
 
357
365
  @staticmethod
358
366
  def rust_pipeline_enabled() -> bool:
359
- """Return ``True`` when the Rust pipeline is available and opted in."""
367
+ """Return ``True`` when the Rust pipeline is available and not explicitly disabled."""
360
368
 
361
369
  return Gaggle.rust_pipeline_supported() and _pipeline_feature_flag_enabled()
362
370
 
@@ -86,7 +86,10 @@ def _python_redact_words(
86
86
  if core_length <= 0:
87
87
  core_length = 1
88
88
  weights.append(1.0 if unweighted else float(core_length))
89
- num_to_redact = max(1, int(len(word_indices) * rate))
89
+ raw_quota = len(word_indices) * rate
90
+ num_to_redact = int(raw_quota)
91
+ if rate > 0:
92
+ num_to_redact = max(1, num_to_redact)
90
93
  if num_to_redact > len(word_indices):
91
94
  raise ValueError("Sample larger than population or is negative")
92
95
  indices_to_redact = _weighted_sample_without_replacement(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -296,7 +296,7 @@ print(gaggle(SAMPLE_TEXT))
296
296
 
297
297
  Consult the [Glitchlings Usage Guide](docs/index.md)
298
298
  for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
299
- integrations, and the feature-flagged Rust pipeline.
299
+ integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
300
300
 
301
301
  ## Motivation
302
302
 
@@ -41,6 +41,7 @@ src/glitchlings/zoo/rushmore.py
41
41
  src/glitchlings/zoo/scannequin.py
42
42
  src/glitchlings/zoo/typogre.py
43
43
  src/glitchlings/zoo/zeedub.py
44
+ tests/test_benchmarks.py
44
45
  tests/test_cli.py
45
46
  tests/test_dataset_corruption.py
46
47
  tests/test_gaggle.py
@@ -0,0 +1,60 @@
1
+ """Regression checks for the benchmarking utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+ from pathlib import Path
7
+ import sys
8
+
9
+ import pytest
10
+
11
+ ROOT = Path(__file__).resolve().parents[1]
12
+ if str(ROOT) not in sys.path:
13
+ sys.path.insert(0, str(ROOT))
14
+
15
+ from benchmarks.pipeline_benchmark import (
16
+ BenchmarkResult,
17
+ collect_benchmark_results,
18
+ )
19
+
20
+
21
+ @pytest.fixture(scope="module")
22
+ def benchmark_results() -> Mapping[str, BenchmarkResult]:
23
+ """Collect a small sample of benchmark data once per test run."""
24
+
25
+ results = collect_benchmark_results(iterations=5)
26
+ return {result.label: result for result in results}
27
+
28
+
29
+ def test_collect_benchmark_results_structure(
30
+ benchmark_results: Mapping[str, BenchmarkResult],
31
+ ) -> None:
32
+ """Top-level sanity check that the benchmark harness returns populated results."""
33
+
34
+ assert benchmark_results
35
+ assert {"short", "medium", "long"}.issubset(benchmark_results.keys())
36
+ for result in benchmark_results.values():
37
+ assert result.char_count > 0
38
+ assert result.python.mean_seconds >= 0
39
+ assert result.python.stdev_seconds >= 0
40
+
41
+
42
+ @pytest.mark.parametrize(
43
+ ("label", "threshold"),
44
+ [
45
+ ("short", 0.01),
46
+ ("medium", 0.03),
47
+ ("long", 0.1),
48
+ ],
49
+ )
50
+ def test_python_pipeline_regression_guard(
51
+ benchmark_results: Mapping[str, BenchmarkResult],
52
+ label: str,
53
+ threshold: float,
54
+ ) -> None:
55
+ """Fail fast if the Python pipeline slows down dramatically on canonical samples."""
56
+
57
+ mean_seconds = benchmark_results[label].python.mean_seconds
58
+ assert mean_seconds <= threshold, (
59
+ f"Python pipeline mean for '{label}' text exceeded {threshold:.3f}s: {mean_seconds:.3f}s"
60
+ )
@@ -179,6 +179,30 @@ class _RecordingGaggle:
179
179
  return dataset
180
180
 
181
181
 
182
+
183
+ def test_prime_resolve_columns_handles_streaming_dataset():
184
+ row = {"context": "alpha", "score": 1, "response": "beta"}
185
+
186
+ class StreamingDataset:
187
+ def __init__(self):
188
+ self.column_names = ["context", "score", "response"]
189
+
190
+ def __len__(self):
191
+ raise TypeError("Streaming dataset does not define __len__.")
192
+
193
+ def __getitem__(self, index):
194
+ raise TypeError("Streaming dataset does not support indexing.")
195
+
196
+ def take(self, n):
197
+ return [row][:n]
198
+
199
+ def __iter__(self):
200
+ return iter([row])
201
+
202
+ inferred = prime._resolve_columns(StreamingDataset(), None)
203
+
204
+ assert inferred == ["context", "response"]
205
+
182
206
  def test_load_environment_respects_explicit_columns(monkeypatch):
183
207
  dataset = Dataset.from_dict({"prompt": ["alpha"], "extra": ["beta"]})
184
208
  stub = _RecordingGaggle()
@@ -249,6 +249,23 @@ def test_redactyl_merge_adjacent_blocks():
249
249
  assert result == expected == "█████████████████"
250
250
 
251
251
 
252
+
253
+ def test_redactyl_zero_rate_is_noop(monkeypatch):
254
+ text = "alpha beta gamma"
255
+ monkeypatch.setattr(redactyl_module, "_redact_words_rust", None, raising=False)
256
+
257
+ result = redactyl_module.redact_words(text, rate=0.0, seed=42)
258
+ assert result == text
259
+
260
+ python_result = redactyl_module._python_redact_words(
261
+ text,
262
+ replacement_char=redactyl_module.FULL_BLOCK,
263
+ rate=0.0,
264
+ merge_adjacent=False,
265
+ rng=random.Random(42),
266
+ )
267
+ assert python_result == text
268
+
252
269
  def test_redactyl_empty_text_raises_value_error():
253
270
  message = "contains no redactable words"
254
271
  with pytest.raises(ValueError, match=message):
@@ -425,9 +442,9 @@ def test_gaggle_python_fallback_when_pipeline_disabled(monkeypatch):
425
442
  pytest.importorskip("glitchlings._zoo_rust")
426
443
 
427
444
  def _fail(*_args: object, **_kwargs: object) -> str:
428
- raise AssertionError("Rust pipeline should not run when feature flag is disabled")
445
+ raise AssertionError("Rust pipeline should not run when explicitly disabled")
429
446
 
430
- monkeypatch.delenv("GLITCHLINGS_RUST_PIPELINE", raising=False)
447
+ monkeypatch.setenv("GLITCHLINGS_RUST_PIPELINE", "0")
431
448
  monkeypatch.setattr(core_module, "_compose_glitchlings_rust", _fail, raising=False)
432
449
 
433
450
  gaggle = core_module.Gaggle(
@@ -500,10 +517,14 @@ def test_pipeline_falls_back_for_incomplete_operation(monkeypatch):
500
517
 
501
518
  def test_rust_pipeline_feature_flag_introspection(monkeypatch):
502
519
  monkeypatch.delenv("GLITCHLINGS_RUST_PIPELINE", raising=False)
503
- assert not core_module._pipeline_feature_flag_enabled()
520
+ assert core_module._pipeline_feature_flag_enabled()
504
521
  assert core_module.Gaggle.rust_pipeline_supported() is (
505
522
  core_module._compose_glitchlings_rust is not None
506
523
  )
524
+ assert core_module.Gaggle.rust_pipeline_enabled() is core_module.Gaggle.rust_pipeline_supported()
525
+
526
+ monkeypatch.setenv("GLITCHLINGS_RUST_PIPELINE", "0")
527
+ assert not core_module._pipeline_feature_flag_enabled()
507
528
  assert not core_module.Gaggle.rust_pipeline_enabled()
508
529
 
509
530
  monkeypatch.setenv("GLITCHLINGS_RUST_PIPELINE", "1")
File without changes
File without changes
File without changes
File without changes
File without changes