PyPI - frameright - Versions diffs - 0.3.0__py3-none-any.whl - Mend

frameright 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

frameright/__init__.py +31 -0
frameright/backends/__init__.py +11 -0
frameright/backends/base.py +176 -0
frameright/backends/narwhals_backend.py +254 -0
frameright/backends/pandas_backend.py +309 -0
frameright/backends/polars_eager_backend.py +314 -0
frameright/backends/polars_lazy_backend.py +321 -0
frameright/backends/registry.py +77 -0
frameright/core.py +487 -0
frameright/exceptions.py +37 -0
frameright/narwhals/__init__.py +35 -0
frameright/narwhals/eager.py +89 -0
frameright/narwhals/lazy.py +89 -0
frameright/pandas/__init__.py +95 -0
frameright/polars/__init__.py +35 -0
frameright/polars/eager.py +88 -0
frameright/polars/lazy.py +88 -0
frameright/polars_eager/__init__.py +55 -0
frameright/py.typed +0 -0
frameright/typing/__init__.py +50 -0
frameright/typing/narwhals.py +43 -0
frameright/typing/pandas.py +19 -0
frameright/typing/polars.py +14 -0
frameright/typing/polars_eager.py +43 -0
frameright/typing/polars_lazy.py +47 -0
frameright-0.3.0.dist-info/METADATA +664 -0
frameright-0.3.0.dist-info/RECORD +30 -0
frameright-0.3.0.dist-info/WHEEL +5 -0
frameright-0.3.0.dist-info/licenses/LICENSE.txt +21 -0
frameright-0.3.0.dist-info/top_level.txt +1 -0

frameright/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+FrameRight: A lightweight Object-DataFrame Mapper (ODM).
+Provides type-safe DataFrame wrappers with runtime validation (via Pandera),
+IDE-friendly autocomplete, and Pydantic-style field constraints.
+Supports Pandas, Polars, and other DataFrame backends.
+"""
+from .core import Field, FieldInfo
+from .exceptions import (
+    ConstraintViolationError,
+    MissingColumnError,
+    SchemaError,
+    StructFrameError,
+    TypeMismatchError,
+    ValidationError,
+)
+from .typing import Col
+__version__ = "0.3.0"
+__all__ = [
+    "Field",
+    "FieldInfo",
+    "Col",
+    "StructFrameError",
+    "SchemaError",
+    "ValidationError",
+    "TypeMismatchError",
+    "ConstraintViolationError",
+    "MissingColumnError",
+]

frameright/backends/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Backend adapters for Schema.
+Each backend provides a consistent interface for DataFrame operations,
+allowing Schema to work with Pandas, Polars, and other libraries.
+"""
+from .base import BackendAdapter
+__all__ = [
+    "BackendAdapter",
+]

frameright/backends/base.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""Abstract base class for backend adapters."""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Type
+class BackendAdapter(ABC):
+    """Abstract interface that every DataFrame backend must implement.
+    Schema delegates all backend-specific operations (column access,
+    dtype inspection, validation, coercion, etc.) to a concrete adapter.
+    """
+    # ------------------------------------------------------------------
+    # Identity
+    # ------------------------------------------------------------------
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Short name for the backend, e.g. 'pandas' or 'polars'."""
+        ...
+    # ------------------------------------------------------------------
+    # DataFrame operations
+    # ------------------------------------------------------------------
+    @abstractmethod
+    def copy(self, df: Any) -> Any:
+        """Return a deep copy of the DataFrame."""
+        ...
+    @abstractmethod
+    def get_column(self, df: Any, col: str) -> Any:
+        """Return a column (Series) from the DataFrame."""
+        ...
+    @abstractmethod
+    def get_column_ref(self, df: Any, col: str) -> Any:
+        """Return a *lazy* column reference for use in property getters.
+        For eager backends (Pandas) this is the same as ``get_column``
+        and returns the materialised ``pd.Series``.
+        For lazy-capable backends (Polars) this returns ``pl.col(col)``
+        — a lazy expression that preserves the query optimizer.
+        """
+        ...
+    @abstractmethod
+    def set_column(self, df: Any, col: str, value: Any) -> Any:
+        """Set/replace a column. Returns the (possibly new) DataFrame.
+        For mutable backends (Pandas) the original may be mutated and returned.
+        For immutable backends (Polars) a new DataFrame is returned.
+        """
+        ...
+    @abstractmethod
+    def has_column(self, df: Any, col: str) -> bool:
+        """Check whether *col* exists as a column name."""
+        ...
+    @abstractmethod
+    def column_names(self, df: Any) -> List[str]:
+        """Return the list of column names."""
+        ...
+    @abstractmethod
+    def num_rows(self, df: Any) -> int:
+        """Return the number of rows."""
+        ...
+    @abstractmethod
+    def num_cols(self, df: Any) -> int:
+        """Return the number of columns."""
+        ...
+    # ------------------------------------------------------------------
+    # Iteration / conversion
+    # ------------------------------------------------------------------
+    @abstractmethod
+    def head(self, df: Any, n: int = 5) -> Any:
+        """Return the first *n* rows."""
+        ...
+    @abstractmethod
+    def equals(self, df1: Any, df2: Any) -> bool:
+        """Check data equality between two DataFrames."""
+        ...
+    # ------------------------------------------------------------------
+    # Pandera validation
+    # ------------------------------------------------------------------
+    @abstractmethod
+    def build_pandera_schema(
+        self,
+        fr_schema: Dict[str, dict],
+        df: Optional[Any] = None,
+        check_types: bool = True,
+        strict: bool = False,
+    ) -> Any:
+        """Build a Pandera DataFrameSchema from the parsed Schema schema.
+        Args:
+            fr_schema: The ``_fr_schema`` dict from a Schema subclass.
+            df: Optional native dataframe (used by narwhals to detect backend type).
+            check_types: Whether to include dtype checks.
+            strict: If True, reject DataFrames with columns not in the schema.
+        Returns:
+            A ``pandera.DataFrameSchema`` instance for this backend.
+        """
+        ...
+    @abstractmethod
+    def validate_with_pandera(
+        self,
+        df: Any,
+        pandera_schema: Any,
+        lazy: bool = True,
+    ) -> None:
+        """Run Pandera validation and translate errors into Schema exceptions.
+        Args:
+            df: The DataFrame to validate.
+            pandera_schema: The Pandera schema to validate against.
+            lazy: If True, collect all errors before raising.
+        Raises:
+            MissingColumnError, TypeMismatchError, ConstraintViolationError
+        """
+        ...
+    # ------------------------------------------------------------------
+    # Type coercion
+    # ------------------------------------------------------------------
+    @abstractmethod
+    def coerce_column(
+        self,
+        df: Any,
+        col: str,
+        inner_type: Type,
+        errors: str = "raise",
+        nullable: bool = True,
+    ) -> Any:
+        """Coerce a single column to match *inner_type*. Returns the (possibly new) DataFrame."""
+        ...
+    # ------------------------------------------------------------------
+    # Schema introspection
+    # ------------------------------------------------------------------
+    @abstractmethod
+    def schema_info_to_dataframe(self, rows: List[dict]) -> Any:
+        """Convert a list of schema-info dicts into a backend-native DataFrame."""
+        ...
+    # ------------------------------------------------------------------
+    # Materialisation
+    # ------------------------------------------------------------------
+    @abstractmethod
+    def collect(self, df: Any) -> Any:
+        """Materialise a lazy frame into an eager one.
+        For eager backends (Pandas, Polars DataFrame) this should return
+        *df* unchanged.  For lazy backends (Polars LazyFrame) this should
+        call ``df.collect()``.
+        """
+        ...

frameright/backends/narwhals_backend.py ADDED Viewed

@@ -0,0 +1,254 @@
+"""Narwhals backend adapter for Schema.
+This backend handles narwhals DataFrames (nw.DataFrame) for users who want
+backend-agnostic code. For native pandas or polars functionality, use the
+PandasBackend or PolarsBackend respectively.
+Users would use this by wrapping their DataFrame:
+    import narwhals as nw
+    import pandas as pd
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    nw_df = nw.from_native(df)
+    class MyFrame(Schema):
+        a: Col[int]
+    frame = MyFrame(nw_df)  # Uses NarwhalsBackend
+    frame.a  # Returns nw.Series
+"""
+from __future__ import annotations
+from datetime import date, datetime
+from typing import Any, Dict, List
+import narwhals as nw
+from ..exceptions import ConstraintViolationError, MissingColumnError, TypeMismatchError
+from .base import BackendAdapter
+class NarwhalsBackend(BackendAdapter):
+    """Backend adapter for narwhals DataFrames (backend-agnostic operations)."""
+    @property
+    def name(self) -> str:
+        return "narwhals"
+    # DataFrame operations
+    def copy(self, df: nw.DataFrame) -> nw.DataFrame:
+        return df.clone() if hasattr(df, "clone") else df
+    def get_column(self, df: nw.DataFrame, col: str) -> nw.Series:
+        return df[col]
+    def get_column_ref(self, df: nw.DataFrame, col: str) -> nw.Series:
+        return df[col]
+    def set_column(self, df: nw.DataFrame, col: str, value: Any) -> nw.DataFrame:
+        if isinstance(value, nw.Series):
+            return df.with_columns(value.alias(col))
+        # Let narwhals handle scalars/arrays
+        return df.with_columns(**{col: value})
+    def has_column(self, df: nw.DataFrame, col: str) -> bool:
+        # For LazyFrames, use collect_schema().names() to avoid performance warning
+        if hasattr(df, "collect_schema"):
+            return col in df.collect_schema().names()
+        return col in df.columns
+    def column_names(self, df: nw.DataFrame) -> List[str]:
+        return list(df.columns)
+    def num_rows(self, df: nw.DataFrame) -> int:
+        return len(df)
+    def num_cols(self, df: nw.DataFrame) -> int:
+        return len(df.columns)
+    # Iteration / conversion
+    def head(self, df: nw.DataFrame, n: int = 5) -> nw.DataFrame:
+        return df.head(n)
+    def equals(self, df1: nw.DataFrame, df2: nw.DataFrame) -> bool:
+        # Compare via native
+        return df1.to_native().equals(df2.to_native())
+    # Pandera validation (narwhals wraps native, so validate the native)
+    def build_pandera_schema(
+        self,
+        fr_schema: Dict[str, dict],
+        df: nw.DataFrame,
+        check_types: bool = True,
+        strict: bool = False,
+    ) -> Any:
+        """Build pandera schema based on the underlying native DataFrame."""
+        native = df.to_native()
+        is_polars = (
+            hasattr(native, "__class__") and "polars" in native.__class__.__module__
+        )
+        if is_polars:
+            import pandera.polars as pa
+            import polars as pl
+            dtype_map: Dict[type, Any] = {
+                int: pl.Int64,
+                float: pl.Float64,
+                str: pl.String,
+                bool: pl.Boolean,
+                datetime: pl.Datetime,
+                date: pl.Date,
+            }
+        else:
+            import pandera.pandas as pa
+            dtype_map: Dict[type, Any] = {
+                int: int,
+                float: float,
+                str: str,
+                bool: bool,
+                datetime: "datetime64[ns]",
+                date: "datetime64[ns]",
+            }
+        columns: Dict[str, pa.Column] = {}
+        for attr_name, meta in fr_schema.items():
+            df_col: str = meta["df_col"]
+            inner_type = meta["inner_type"]
+            fi = meta["field_info"]
+            is_optional: bool = meta["is_optional"]
+            checks: List[Any] = []
+            if fi.ge is not None:
+                checks.append(pa.Check.ge(fi.ge))
+            if fi.gt is not None:
+                checks.append(pa.Check.gt(fi.gt))
+            if fi.le is not None:
+                checks.append(pa.Check.le(fi.le))
+            if fi.lt is not None:
+                checks.append(pa.Check.lt(fi.lt))
+            if fi.isin is not None:
+                checks.append(pa.Check.isin(fi.isin))
+            if fi.regex is not None:
+                checks.append(pa.Check.str_matches(fi.regex))
+            if fi.min_length is not None or fi.max_length is not None:
+                checks.append(
+                    pa.Check.str_length(
+                        min_value=fi.min_length,
+                        max_value=fi.max_length,
+                    )
+                )
+            pa_dtype: Any = None
+            if check_types and inner_type is not None:
+                pa_dtype = dtype_map.get(inner_type)
+                if not is_polars and inner_type is bool and fi.nullable:
+                    pa_dtype = "boolean"
+            columns[df_col] = pa.Column(
+                dtype=pa_dtype,
+                checks=checks or None,
+                nullable=fi.nullable,
+                unique=fi.unique,
+                required=not is_optional,
+                coerce=False,
+            )
+        return pa.DataFrameSchema(columns=columns, strict=strict)
+    def validate_with_pandera(
+        self,
+        df: nw.DataFrame,
+        pandera_schema: Any,
+        lazy: bool = True,
+    ) -> None:
+        from pandera import errors
+        native = df.to_native()
+        try:
+            pandera_schema.validate(native, lazy=lazy)
+        except errors.SchemaErrors as exc:
+            self._translate_pandera_errors(exc)
+        except errors.SchemaError as exc:
+            self._translate_single_pandera_error(exc)
+    def _translate_pandera_errors(self, exc: Any) -> None:
+        fc = exc.failure_cases
+        if hasattr(fc, "to_pandas"):
+            fc = fc.to_pandas()
+        missing_mask = fc["check"] == "column_in_dataframe"
+        if missing_mask.any():
+            missing_cols = sorted(
+                fc.loc[missing_mask, "failure_case"].unique().tolist()
+            )
+            raise MissingColumnError(
+                f"Missing required columns: {missing_cols}"
+            ) from exc
+        dtype_mask = fc["check"].str.startswith("dtype(", na=False)
+        if dtype_mask.any():
+            row = fc.loc[dtype_mask].iloc[0]
+            raise TypeMismatchError(
+                f"Column '{row['column']}' dtype mismatch: {row['check']}"
+            ) from exc
+        if len(fc) > 0:
+            row = fc.iloc[0]
+            col = row.get("column", "?")
+            check = row.get("check", "?")
+            raise ConstraintViolationError(
+                f"Column '{col}' failed check: {check}"
+            ) from exc
+    def _translate_single_pandera_error(self, exc: Any) -> None:
+        msg = str(exc)
+        if "not in dataframe" in msg or "column_in_dataframe" in msg:
+            raise MissingColumnError(msg) from exc
+        elif "dtype" in msg.lower():
+            raise TypeMismatchError(msg) from exc
+        else:
+            raise ConstraintViolationError(msg) from exc
+    # Type coercion
+    def coerce_column(
+        self,
+        df: nw.DataFrame,
+        col: str,
+        target_type: type,
+    ) -> nw.DataFrame:
+        """Coerce a column to target type using narwhals."""
+        # Narwhals dtype mapping
+        dtype_map = {
+            int: nw.Int64,
+            float: nw.Float64,
+            str: nw.String,
+            bool: nw.Boolean,
+        }
+        target_dtype = dtype_map.get(target_type)
+        if target_dtype is None:
+            return df
+        return df.with_columns(df[col].cast(target_dtype).alias(col))
+    # Lazy evaluation
+    def is_lazy(self, df: Any) -> bool:
+        # Narwhals DataFrames are eager
+        return False
+    def collect(self, df: nw.DataFrame) -> nw.DataFrame:
+        # Narwhals DataFrames are already collected
+        return df
+    def schema_info_to_dataframe(self, rows: List[dict]) -> nw.DataFrame:
+        """Convert schema info rows to narwhals DataFrame."""
+        import pandas as pd
+        pd_df = pd.DataFrame(rows)
+        return nw.from_native(pd_df)