PyPI - glitchlings - Versions diffs - 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl - Mend

glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

glitchlings/__init__.py +36 -17
glitchlings/__main__.py +0 -1
glitchlings/_zoo_rust/__init__.py +12 -0
glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
glitchlings/assets/__init__.py +180 -0
glitchlings/assets/apostrofae_pairs.json +32 -0
glitchlings/assets/ekkokin_homophones.json +2014 -0
glitchlings/assets/hokey_assets.json +193 -0
glitchlings/assets/lexemes/academic.json +1049 -0
glitchlings/assets/lexemes/colors.json +1333 -0
glitchlings/assets/lexemes/corporate.json +716 -0
glitchlings/assets/lexemes/cyberpunk.json +22 -0
glitchlings/assets/lexemes/lovecraftian.json +23 -0
glitchlings/assets/lexemes/synonyms.json +3354 -0
glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
glitchlings/assets/pipeline_assets.json +29 -0
glitchlings/attack/__init__.py +53 -0
glitchlings/attack/compose.py +299 -0
glitchlings/attack/core.py +465 -0
glitchlings/attack/encode.py +114 -0
glitchlings/attack/metrics.py +104 -0
glitchlings/attack/metrics_dispatch.py +70 -0
glitchlings/attack/tokenization.py +157 -0
glitchlings/auggie.py +283 -0
glitchlings/compat/__init__.py +9 -0
glitchlings/compat/loaders.py +355 -0
glitchlings/compat/types.py +41 -0
glitchlings/conf/__init__.py +41 -0
glitchlings/conf/loaders.py +331 -0
glitchlings/conf/schema.py +156 -0
glitchlings/conf/types.py +72 -0
glitchlings/config.toml +2 -0
glitchlings/constants.py +59 -0
glitchlings/dev/__init__.py +3 -0
glitchlings/dev/docs.py +45 -0
glitchlings/dlc/__init__.py +17 -3
glitchlings/dlc/_shared.py +296 -0
glitchlings/dlc/gutenberg.py +400 -0
glitchlings/dlc/huggingface.py +37 -65
glitchlings/dlc/prime.py +55 -114
glitchlings/dlc/pytorch.py +98 -0
glitchlings/dlc/pytorch_lightning.py +173 -0
glitchlings/internal/__init__.py +16 -0
glitchlings/internal/rust.py +159 -0
glitchlings/internal/rust_ffi.py +432 -0
glitchlings/main.py +123 -32
glitchlings/runtime_config.py +24 -0
glitchlings/util/__init__.py +29 -176
glitchlings/util/adapters.py +65 -0
glitchlings/util/keyboards.py +311 -0
glitchlings/util/transcripts.py +108 -0
glitchlings/zoo/__init__.py +47 -24
glitchlings/zoo/assets/__init__.py +29 -0
glitchlings/zoo/core.py +301 -167
glitchlings/zoo/core_execution.py +98 -0
glitchlings/zoo/core_planning.py +451 -0
glitchlings/zoo/corrupt_dispatch.py +295 -0
glitchlings/zoo/ekkokin.py +118 -0
glitchlings/zoo/hokey.py +137 -0
glitchlings/zoo/jargoyle.py +179 -274
glitchlings/zoo/mim1c.py +106 -68
glitchlings/zoo/pedant/__init__.py +107 -0
glitchlings/zoo/pedant/core.py +105 -0
glitchlings/zoo/pedant/forms.py +74 -0
glitchlings/zoo/pedant/stones.py +74 -0
glitchlings/zoo/redactyl.py +44 -175
glitchlings/zoo/rng.py +259 -0
glitchlings/zoo/rushmore.py +359 -116
glitchlings/zoo/scannequin.py +18 -125
glitchlings/zoo/transforms.py +386 -0
glitchlings/zoo/typogre.py +76 -162
glitchlings/zoo/validation.py +477 -0
glitchlings/zoo/zeedub.py +33 -86
glitchlings-0.9.3.dist-info/METADATA +334 -0
glitchlings-0.9.3.dist-info/RECORD +80 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
glitchlings/zoo/_ocr_confusions.py +0 -34
glitchlings/zoo/_rate.py +0 -21
glitchlings/zoo/reduple.py +0 -169
glitchlings-0.2.5.dist-info/METADATA +0 -490
glitchlings-0.2.5.dist-info/RECORD +0 -27
/glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
{glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0

glitchlings/dlc/huggingface.py CHANGED Viewed

@@ -5,40 +5,19 @@ from __future__ import annotations
 from collections.abc import Iterable, Sequence
 from typing import Any
-try:  # pragma: no cover - optional dependency is required at runtime
-    from datasets import Dataset as _DatasetsDataset
-except ModuleNotFoundError as _datasets_error:  # pragma: no cover - optional dependency
-    _DatasetsDataset = None  # type: ignore[assignment]
-else:
-    _datasets_error = None
+from ..util.adapters import coerce_gaggle
+from ..zoo import Gaggle, Glitchling
-from ..zoo import Gaggle, Glitchling, summon
-def _normalise_columns(column: str | Sequence[str]) -> list[str]:
-    """Normalise a column specification to a list."""
+def _normalize_columns(column: str | Sequence[str]) -> list[str]:
+    """Normalize a column specification to a list."""
     if isinstance(column, str):
         return [column]
-    normalised = list(column)
-    if not normalised:
+    normalized = list(column)
+    if not normalized:
         raise ValueError("At least one column must be specified")
-    return normalised
-def _as_gaggle(glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling], seed: int) -> Gaggle:
-    """Coerce any supported glitchling specification into a :class:`Gaggle`."""
-    if isinstance(glitchlings, Gaggle):
-        return glitchlings
-    if isinstance(glitchlings, (Glitchling, str)):
-        resolved: Iterable[str | Glitchling] = [glitchlings]
-    else:
-        resolved = glitchlings
-    return summon(list(resolved), seed=seed)
+    return normalized
 def _glitch_dataset(
@@ -48,49 +27,42 @@ def _glitch_dataset(
     *,
     seed: int = 151,
 ) -> Any:
-    """Internal helper implementing :meth:`Dataset.glitch`."""
-    columns = _normalise_columns(column)
-    gaggle = _as_gaggle(glitchlings, seed=seed)
+    """Apply glitchlings to the provided dataset columns."""
+    columns = _normalize_columns(column)
+    gaggle = coerce_gaggle(glitchlings, seed=seed)
     return gaggle.corrupt_dataset(dataset, columns)
-def _ensure_dataset_class() -> Any:
-    """Return the Hugging Face :class:`~datasets.Dataset` patched with ``.glitch``."""
-    if _DatasetsDataset is None:  # pragma: no cover - datasets is an install-time dependency
-        message = "datasets is not installed"
-        raise ModuleNotFoundError(message) from _datasets_error
-    if getattr(_DatasetsDataset, "glitch", None) is None:
-        def glitch(  # type: ignore[override]
-            self: Any,
-            glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
-            *,
-            column: str | Sequence[str],
-            seed: int = 151,
-            **_: Any,
-        ) -> Any:
-            """Return a lazily corrupted copy of the dataset."""
-            return _glitch_dataset(self, glitchlings, column, seed=seed)
-        setattr(_DatasetsDataset, "glitch", glitch)
-    return _DatasetsDataset
+def GlitchedDataset(
+    dataset: Any,
+    glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
+    *,
+    column: str | Sequence[str],
+    seed: int = 151,
+) -> Any:
+    """Return a lazily corrupted copy of a Hugging Face dataset.
-def install() -> None:
-    """Monkeypatch the Hugging Face :class:`~datasets.Dataset` with ``.glitch``."""
+    This function applies glitchlings to the specified columns of a dataset,
+    returning a new dataset that lazily corrupts data as it's accessed.
-    _ensure_dataset_class()
+    Args:
+        dataset: The Hugging Face Dataset to corrupt.
+        glitchlings: A glitchling, gaggle, or specification of glitchlings to apply.
+        column: The column name (string) or names (sequence of strings) to corrupt.
+        seed: RNG seed for deterministic corruption (default: 151).
+    Returns:
+        A new dataset with the specified columns corrupted by the glitchlings.
-if _DatasetsDataset is not None:
-    Dataset = _ensure_dataset_class()
-else:  # pragma: no cover - datasets is an install-time dependency
-    Dataset = None  # type: ignore[assignment]
+    Example:
+        >>> from datasets import Dataset
+        >>> from glitchlings.dlc.huggingface import GlitchedDataset
+        >>> dataset = Dataset.from_dict({"text": ["hello", "world"]})
+        >>> corrupted = GlitchedDataset(dataset, "typogre", column="text")
+        >>> list(corrupted)
+        [{'text': 'helo'}, {'text': 'wrold'}]
+    """
+    return _glitch_dataset(dataset, glitchlings, column, seed=seed)
-__all__ = ["Dataset", "install"]
+__all__ = ["GlitchedDataset"]

glitchlings/dlc/prime.py CHANGED Viewed

@@ -3,115 +3,62 @@
 from __future__ import annotations
 from collections.abc import Iterable, Sequence
-from enum import Enum
-from typing import Any, Callable
+from typing import Any, Callable, Protocol, cast
-import verifiers as vf
+from ..compat.loaders import require_datasets, require_jellyfish, require_verifiers
+from ..util.adapters import coerce_gaggle
+from ..zoo import Gaggle, Glitchling, Mim1c, Typogre  # noqa: F401
+from ._shared import resolve_columns as _resolve_columns_shared
-from jellyfish import damerau_levenshtein_distance
-try:
-    from .huggingface import Dataset
-except ModuleNotFoundError:  # pragma: no cover - optional dependency
-    Dataset = object  # type: ignore[assignment]
-else:
-    if Dataset is None:  # pragma: no cover - optional dependency
-        Dataset = object  # type: ignore[assignment]
+class VerifierEnvironment(Protocol):
+    """Minimal interface for verifiers environments."""
-from ..zoo import Gaggle, Glitchling, Mim1c, Typogre, summon
+    dataset: Any
-def _resolve_environment(env: str | vf.Environment) -> vf.Environment:
-    """Return a fully-instantiated verifier environment."""
-    if isinstance(env, str):
-        env = vf.load_environment(env)
-    if not isinstance(env, vf.Environment):
-        raise TypeError("Invalid environment type")
-    return env
-def _resolve_columns(dataset: Dataset, columns: Sequence[str] | None) -> list[str]:
-    """Identify which dataset columns should be corrupted."""
+class VerifierSingleTurnEnv(Protocol):
+    """Minimal interface for single-turn verifier environments."""
-    available = set(dataset.column_names)
+    dataset: Any
+    rubric: Any
-    if columns is not None:
-        missing = sorted(set(columns) - available)
-        if missing:
-            missing_str = ", ".join(missing)
-            raise ValueError(f"Columns not found in dataset: {missing_str}")
-        return list(columns)
-    for candidate in ("prompt", "question"):
-        if candidate in available:
-            return [candidate]
+vf = require_verifiers("verifiers is not installed; install glitchlings[prime]")
+_jellyfish = require_jellyfish("jellyfish is not installed; install glitchlings[prime]")
+levenshtein_distance = _jellyfish.levenshtein_distance
-    sample = dataset[0] if len(dataset) else {}
-    inferred = [
-        name
-        for name in dataset.column_names
-        if isinstance(sample.get(name), str)
-    ]
-    if inferred:
-        return inferred
-    raise ValueError("Unable to determine which dataset columns to corrupt.")
-class Difficulty(Enum):
-    """Difficulty levels for tutorial environments."""
-    Easy = 0.25
-    Normal = 1.0
-    Hard = 1.75
-    Extreme = 3
-    Impossible = 9
+def _resolve_environment(env: str | VerifierEnvironment) -> VerifierEnvironment:
+    """Return a fully-instantiated verifier environment."""
+    if isinstance(env, str):
+        env = vf.load_environment(env)
+    if not isinstance(env, cast(type[Any], vf.Environment)):
+        raise TypeError("Invalid environment type")
-def tutorial_level(
-    env: vf.Environment | str,
-    seed: int = 151,
-    difficulty: Difficulty = Difficulty.Normal,
-) -> vf.Environment:
-    """Create a low-corruption environment using tuned defaults."""
+    return cast(VerifierEnvironment, env)
-    tuned_mim1c = Mim1c(rate=0.01 * difficulty.value)
-    tuned_typogre = Typogre(rate=0.025 * difficulty.value)
-    return load_environment(
-        env,
-        glitchlings=[tuned_mim1c, tuned_typogre],
-        seed=seed,
-    )
+def _resolve_columns(dataset: Any, columns: Sequence[str] | None) -> list[str]:
+    """Identify which dataset columns should be corrupted."""
+    return _resolve_columns_shared(dataset, columns)
 def load_environment(
-    env: str | vf.Environment,
+    env: str | VerifierEnvironment,
     glitchlings: Iterable[str | Glitchling] | Glitchling | str | Gaggle | None = None,
     *,
     seed: int = 151,
     columns: Sequence[str] | None = None,
-) -> vf.Environment:
+) -> VerifierEnvironment:
     """Load an environment and optionally corrupt it with glitchlings."""
     environment = _resolve_environment(env)
     if glitchlings is None:
         return environment
-    if isinstance(glitchlings, Gaggle):
-        gaggle = glitchlings
-    else:
-        if isinstance(glitchlings, (Glitchling, str)):
-            resolved = [glitchlings]
-        else:
-            resolved = list(glitchlings)
-        gaggle = summon(resolved, seed=seed)
+    gaggle = coerce_gaggle(glitchlings, seed=seed)
     dataset = environment.dataset
     corrupt_columns = _resolve_columns(dataset, columns)
@@ -125,21 +72,11 @@ def _as_gaggle(
     seed: int,
 ) -> Gaggle:
     """Coerce any supported glitchling specification into a :class:`Gaggle`."""
-    if isinstance(glitchlings, Gaggle):
-        return glitchlings
-    if isinstance(glitchlings, (Glitchling, str)):
-        resolved: Iterable[str | Glitchling] = [glitchlings]
-    else:
-        resolved = glitchlings
-    return summon(list(resolved), seed=seed)
+    return coerce_gaggle(glitchlings, seed=seed)
 def _extract_completion_text(completion: Any) -> str:
-    """Normalise a completion payload into a plain string."""
+    """Normalize a completion payload into a plain string."""
     if isinstance(completion, str):
         return completion
@@ -152,21 +89,22 @@ def _extract_completion_text(completion: Any) -> str:
     return str(completion)
-def symmetric_damerau_levenshtein_similarity(
+def normalized_edit_distance(
     _: Any,
     completion: Any,
     answer: str,
 ) -> float:
-    """Return ``1 - (distance / max_len)`` using Damerau-Levenshtein distance."""
+    """Return ``1 - (distance / max_len)`` using Levenshtein distance."""
     completion_text = _extract_completion_text(completion)
     target = answer or ""
     denominator = max(len(completion_text), len(target), 1)
-    distance = damerau_levenshtein_distance(completion_text, target)
+    distance = cast(int, levenshtein_distance(completion_text, target))
     score = 1.0 - (distance / denominator)
     return max(0.0, min(1.0, score))
+symmetric_levenshtein_similarity = normalized_edit_distance
 DEFAULT_CLEANUP_INSTRUCTIONS = (
     "You are a meticulous copy editor. Restore the provided text to its original form."
 )
@@ -182,32 +120,34 @@ def echo_chamber(
     reward_function: Callable[..., float] | None = None,
     split: str | None = None,
     **load_dataset_kwargs: Any,
-) -> vf.Environment:
+) -> VerifierSingleTurnEnv:
     """Create an Echo Chamber Prime environment from a Hugging Face dataset column.
     Args:
         dataset_id: Identifier of the Hugging Face dataset to load.
         column: Name of the column whose text should be glitched.
         glitchlings: Glitchling specifiers that will corrupt the prompts.
-        seed: RNG seed forwarded to :func:`summon`.
+        seed: RNG seed forwarded to :func:`glitchlings.util.adapters.coerce_gaggle`.
         instructions: System instructions supplied to the environment prompts.
         reward_function: Optional callable used to score completions. Defaults to
-            :func:`symmetric_damerau_levenshtein_similarity` when omitted.
+            :func:`symmetric_levenshtein_similarity` when omitted.
         split: Optional dataset split to load.
         **load_dataset_kwargs: Extra keyword arguments forwarded to
             :func:`datasets.load_dataset`.
-    """
-    try:
-        from datasets import Dataset as HFDataset, DatasetDict, load_dataset
-    except ModuleNotFoundError as exc:  # pragma: no cover - optional dependency
+    """
+    datasets_module = require_datasets("datasets is required to build an echo chamber")
+    load_dataset = getattr(datasets_module, "load_dataset", None)
+    if load_dataset is None:  # pragma: no cover - defensive
         message = "datasets is required to build an echo chamber"
-        raise ModuleNotFoundError(message) from exc
+        raise ModuleNotFoundError(message)
-    hf_dataset: HFDataset | DatasetDict
+    dataset_dict_cls = getattr(datasets_module, "DatasetDict", dict)
+    hf_dataset: Any
     if split is None:
         hf_dataset = load_dataset(dataset_id, **load_dataset_kwargs)
-        if isinstance(hf_dataset, DatasetDict):
+        if isinstance(hf_dataset, dataset_dict_cls):
             try:
                 hf_dataset = next(iter(hf_dataset.values()))
             except StopIteration as exc:  # pragma: no cover - defensive
@@ -215,10 +155,8 @@ def echo_chamber(
     else:
         hf_dataset = load_dataset(dataset_id, split=split, **load_dataset_kwargs)
-    if isinstance(hf_dataset, DatasetDict):
-        raise ValueError(
-            "Specify which split to use when the dataset loads as a DatasetDict."
-        )
+    if isinstance(hf_dataset, dataset_dict_cls):
+        raise ValueError("Specify which split to use when the dataset loads as a DatasetDict.")
     filtered_dataset = hf_dataset.filter(
         lambda row: row.get(column) is not None,
@@ -242,7 +180,7 @@ def echo_chamber(
     )
     try:
-        dataset_length = len(base_dataset)  # type: ignore[arg-type]
+        dataset_length = len(base_dataset)
     except TypeError:
         preview_rows: list[dict[str, Any]]
         take_fn = getattr(base_dataset, "take", None)
@@ -269,6 +207,9 @@ def echo_chamber(
     gaggle = _as_gaggle(glitchlings, seed=seed)
     glitched_dataset = gaggle.corrupt_dataset(base_dataset, ["prompt"])
-    rubric_func = reward_function or symmetric_damerau_levenshtein_similarity
+    rubric_func = reward_function or normalized_edit_distance
     rubric = vf.Rubric(funcs=[rubric_func], weights=[1.0])
-    return vf.SingleTurnEnv(dataset=glitched_dataset, rubric=rubric)
+    return cast(
+        VerifierSingleTurnEnv,
+        vf.SingleTurnEnv(dataset=glitched_dataset, rubric=rubric),
+    )

glitchlings/dlc/pytorch.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Integration helpers for PyTorch data loaders."""
+from __future__ import annotations
+from collections.abc import Iterable, Iterator, Sequence
+from typing import Any, cast
+from ..util.adapters import coerce_gaggle
+from ..zoo import Gaggle, Glitchling
+from ._shared import corrupt_batch, infer_batch_targets, normalize_column_spec
+class _GlitchedDataLoader(Iterable[Any]):
+    """Wrapper that applies glitchlings lazily to each batch from a data loader."""
+    def __init__(
+        self,
+        dataloader: Any,
+        gaggle: Gaggle,
+        *,
+        columns: list[str | int] | None,
+    ) -> None:
+        self._dataloader = dataloader
+        self._gaggle = gaggle
+        self._explicit_columns = columns
+        self._inferred_columns: list[str | int] | None | _Sentinel = _UNINITIALISED
+    def __iter__(self) -> Iterator[Any]:
+        # Reset all glitchling RNGs before each fresh pass for determinism.
+        self._gaggle.sort_glitchlings()
+        for batch in self._dataloader:
+            targets = self._resolve_columns(batch)
+            yield corrupt_batch(batch, targets, self._gaggle)
+    def __len__(self) -> int:
+        return len(self._dataloader)
+    def __getattr__(self, attribute: str) -> Any:
+        return getattr(self._dataloader, attribute)
+    def _resolve_columns(self, batch: Any) -> list[str | int] | None:
+        if self._explicit_columns is not None:
+            return self._explicit_columns
+        if self._inferred_columns is _UNINITIALISED:
+            self._inferred_columns = infer_batch_targets(batch)
+        return cast(list[str | int] | None, self._inferred_columns)
+class _Sentinel:
+    """Sentinel type for deferred column inference."""
+_UNINITIALISED = _Sentinel()
+def GlitchedDataLoader(
+    dataloader: Any,
+    glitchlings: Iterable[str | Glitchling] | Glitchling | str | Gaggle,
+    *,
+    columns: str | int | Sequence[str | int] | None = None,
+    seed: int = 151,
+) -> _GlitchedDataLoader:
+    """Return a lazily glitched view of a PyTorch DataLoader's batches.
+    This function wraps a PyTorch DataLoader to apply glitchlings to specified
+    columns (or auto-inferred text columns) in each batch as it's yielded.
+    Args:
+        dataloader: The PyTorch DataLoader to wrap.
+        glitchlings: A glitchling, gaggle, or specification of glitchlings to apply.
+        columns: Column name(s) or index/indices to corrupt. Can be:
+                 - A single string column name (for dict-like batches)
+                 - A single integer index (for sequence-like batches)
+                 - A sequence of column names or indices
+                 - None to auto-infer text columns (default)
+        seed: RNG seed for deterministic corruption (default: 151).
+    Returns:
+        A wrapped dataloader that yields corrupted batches.
+    Example:
+        >>> from torch.utils.data import DataLoader
+        >>> from glitchlings.dlc.pytorch import GlitchedDataLoader
+        >>> dataset = [{"text": "hello", "label": 0}]
+        >>> loader = DataLoader(dataset)
+        >>> glitched = GlitchedDataLoader(loader, "typogre", columns="text")
+        >>> for batch in glitched:
+        ...     print(batch)
+        {'text': 'helo', 'label': 0}
+    """
+    gaggle = coerce_gaggle(glitchlings, seed=seed)
+    normalized = normalize_column_spec(columns)
+    return _GlitchedDataLoader(dataloader, gaggle, columns=normalized)
+__all__ = ["GlitchedDataLoader"]

glitchlings/dlc/pytorch_lightning.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""Integration helpers for PyTorch Lightning data modules."""
+from __future__ import annotations
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Any, cast
+from ..compat.loaders import get_pytorch_lightning_datamodule
+from ..util.adapters import coerce_gaggle
+from ..zoo import Gaggle, Glitchling
+from ._shared import normalize_column_spec, wrap_dataloader
+def _glitch_datamodule(
+    datamodule: Any,
+    glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
+    column: str | Sequence[str],
+    *,
+    seed: int = 151,
+) -> Any:
+    """Return a proxy that applies glitchlings to batches from the datamodule."""
+    columns = normalize_column_spec(column)
+    if columns is None:  # pragma: no cover - defensive
+        raise ValueError("At least one column must be specified")
+    # Lightning datamodules only support string column names (mapping keys)
+    columns_str = cast(list[str], columns)
+    gaggle = coerce_gaggle(glitchlings, seed=seed)
+    return _GlitchedLightningDataModule(datamodule, columns_str, gaggle)
+def GlitchedLightningDataModule(
+    datamodule: Any,
+    glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
+    *,
+    column: str | Sequence[str],
+    seed: int = 151,
+) -> Any:
+    """Return a glitched wrapper around a PyTorch Lightning LightningDataModule.
+    This function wraps a LightningDataModule to apply glitchlings to specified
+    columns in batches yielded by the module's dataloaders.
+    Args:
+        datamodule: The LightningDataModule to wrap.
+        glitchlings: A glitchling, gaggle, or specification of glitchlings to apply.
+        column: The column name (string) or names (sequence of strings) to corrupt.
+        seed: RNG seed for deterministic corruption (default: 151).
+    Returns:
+        A wrapped datamodule that yields corrupted batches from its dataloaders.
+    Example:
+        >>> from pytorch_lightning import LightningDataModule
+        >>> from glitchlings.dlc.pytorch_lightning import GlitchedLightningDataModule
+        >>> class MyDataModule(LightningDataModule):
+        ...     def train_dataloader(self):
+        ...         return [{"text": "hello", "label": 0}]
+        >>> dm = MyDataModule()
+        >>> glitched = GlitchedLightningDataModule(dm, "typogre", column="text")
+        >>> batches = list(glitched.train_dataloader())
+    """
+    return _glitch_datamodule(datamodule, glitchlings, column, seed=seed)
+class _GlitchedLightningDataModule:
+    """Proxy wrapper around a LightningDataModule applying glitchlings to batches."""
+    def __init__(self, base: Any, columns: list[str], gaggle: Gaggle) -> None:
+        object.__setattr__(self, "_glitch_base", base)
+        object.__setattr__(self, "_glitch_columns", columns)
+        object.__setattr__(self, "_glitch_gaggle", gaggle)
+    def __getattr__(self, attribute: str) -> Any:
+        return getattr(self._glitch_base, attribute)
+    def __setattr__(self, attribute: str, value: Any) -> None:
+        if attribute.startswith("_glitch_"):
+            object.__setattr__(self, attribute, value)
+        else:
+            setattr(self._glitch_base, attribute, value)
+    def __delattr__(self, attribute: str) -> None:
+        if attribute.startswith("_glitch_"):
+            object.__delattr__(self, attribute)
+        else:
+            delattr(self._glitch_base, attribute)
+    def __dir__(self) -> list[str]:
+        return sorted(set(dir(self.__class__)) | set(dir(self._glitch_base)))
+    # LightningDataModule API -------------------------------------------------
+    def prepare_data(self, *args: Any, **kwargs: Any) -> Any:
+        return self._glitch_base.prepare_data(*args, **kwargs)
+    def setup(self, *args: Any, **kwargs: Any) -> Any:
+        return self._glitch_base.setup(*args, **kwargs)
+    def teardown(self, *args: Any, **kwargs: Any) -> Any:
+        return self._glitch_base.teardown(*args, **kwargs)
+    def state_dict(self) -> Mapping[str, Any]:
+        state = self._glitch_base.state_dict()
+        return cast(Mapping[str, Any], state)
+    def load_state_dict(self, state_dict: Mapping[str, Any]) -> None:
+        self._glitch_base.load_state_dict(state_dict)
+    def transfer_batch_to_device(self, batch: Any, device: Any, dataloader_idx: int) -> Any:
+        return self._glitch_base.transfer_batch_to_device(batch, device, dataloader_idx)
+    def on_before_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
+        return self._glitch_base.on_before_batch_transfer(batch, dataloader_idx)
+    def on_after_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
+        return self._glitch_base.on_after_batch_transfer(batch, dataloader_idx)
+    def train_dataloader(self, *args: Any, **kwargs: Any) -> Any:
+        loader = self._glitch_base.train_dataloader(*args, **kwargs)
+        return wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
+    def val_dataloader(self, *args: Any, **kwargs: Any) -> Any:
+        loader = self._glitch_base.val_dataloader(*args, **kwargs)
+        return wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
+    def test_dataloader(self, *args: Any, **kwargs: Any) -> Any:
+        loader = self._glitch_base.test_dataloader(*args, **kwargs)
+        return wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
+    def predict_dataloader(self, *args: Any, **kwargs: Any) -> Any:
+        loader = self._glitch_base.predict_dataloader(*args, **kwargs)
+        return wrap_dataloader(loader, self._glitch_columns, self._glitch_gaggle)
+# Module initialization: set up inheritance from LightningDataModule if available
+def _setup_inheritance() -> None:
+    """Set up _GlitchedLightningDataModule to inherit from LightningDataModule.
+    This function is called once at module import time to dynamically set the base
+    class of _GlitchedLightningDataModule to inherit from
+    pytorch_lightning.LightningDataModule when available. This ensures that
+    isinstance(glitched, LightningDataModule) checks work correctly and that the
+    wrapper interoperates with Lightning APIs that require that type.
+    """
+    datamodule_cls = get_pytorch_lightning_datamodule()
+    if datamodule_cls is None:
+        # If LightningDataModule is not available, keep as plain object
+        return
+    # Try to dynamically set __bases__ to inherit from LightningDataModule
+    try:
+        _GlitchedLightningDataModule.__bases__ = (datamodule_cls,)
+    except TypeError:
+        # If we can't modify __bases__ (e.g., due to __slots__), create a new class
+        namespace = {
+            name: value
+            for name, value in vars(_GlitchedLightningDataModule).items()
+            if name not in {"__dict__", "__weakref__"}
+        }
+        replacement = cast(
+            type[Any],
+            type("_GlitchedLightningDataModule", (datamodule_cls,), namespace),
+        )
+        # Update the module's global namespace
+        globals()["_GlitchedLightningDataModule"] = replacement
+# Set up inheritance at module import time
+_setup_inheritance()
+__all__ = ["GlitchedLightningDataModule"]