frameright 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
frameright/__init__.py ADDED
@@ -0,0 +1,31 @@
1
+ """
2
+ FrameRight: A lightweight Object-DataFrame Mapper (ODM).
3
+
4
+ Provides type-safe DataFrame wrappers with runtime validation (via Pandera),
5
+ IDE-friendly autocomplete, and Pydantic-style field constraints.
6
+ Supports Pandas, Polars, and other DataFrame backends.
7
+ """
8
+
9
+ from .core import Field, FieldInfo
10
+ from .exceptions import (
11
+ ConstraintViolationError,
12
+ MissingColumnError,
13
+ SchemaError,
14
+ StructFrameError,
15
+ TypeMismatchError,
16
+ ValidationError,
17
+ )
18
+ from .typing import Col
19
+
20
+ __version__ = "0.3.0"
21
+ __all__ = [
22
+ "Field",
23
+ "FieldInfo",
24
+ "Col",
25
+ "StructFrameError",
26
+ "SchemaError",
27
+ "ValidationError",
28
+ "TypeMismatchError",
29
+ "ConstraintViolationError",
30
+ "MissingColumnError",
31
+ ]
@@ -0,0 +1,11 @@
1
+ """Backend adapters for Schema.
2
+
3
+ Each backend provides a consistent interface for DataFrame operations,
4
+ allowing Schema to work with Pandas, Polars, and other libraries.
5
+ """
6
+
7
+ from .base import BackendAdapter
8
+
9
+ __all__ = [
10
+ "BackendAdapter",
11
+ ]
@@ -0,0 +1,176 @@
1
+ """Abstract base class for backend adapters."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any, Dict, List, Optional, Type
7
+
8
+
9
+ class BackendAdapter(ABC):
10
+ """Abstract interface that every DataFrame backend must implement.
11
+
12
+ Schema delegates all backend-specific operations (column access,
13
+ dtype inspection, validation, coercion, etc.) to a concrete adapter.
14
+ """
15
+
16
+ # ------------------------------------------------------------------
17
+ # Identity
18
+ # ------------------------------------------------------------------
19
+
20
+ @property
21
+ @abstractmethod
22
+ def name(self) -> str:
23
+ """Short name for the backend, e.g. 'pandas' or 'polars'."""
24
+ ...
25
+
26
+ # ------------------------------------------------------------------
27
+ # DataFrame operations
28
+ # ------------------------------------------------------------------
29
+
30
+ @abstractmethod
31
+ def copy(self, df: Any) -> Any:
32
+ """Return a deep copy of the DataFrame."""
33
+ ...
34
+
35
+ @abstractmethod
36
+ def get_column(self, df: Any, col: str) -> Any:
37
+ """Return a column (Series) from the DataFrame."""
38
+ ...
39
+
40
+ @abstractmethod
41
+ def get_column_ref(self, df: Any, col: str) -> Any:
42
+ """Return a *lazy* column reference for use in property getters.
43
+
44
+ For eager backends (Pandas) this is the same as ``get_column``
45
+ and returns the materialised ``pd.Series``.
46
+
47
+ For lazy-capable backends (Polars) this returns ``pl.col(col)``
48
+ — a lazy expression that preserves the query optimizer.
49
+ """
50
+ ...
51
+
52
+ @abstractmethod
53
+ def set_column(self, df: Any, col: str, value: Any) -> Any:
54
+ """Set/replace a column. Returns the (possibly new) DataFrame.
55
+
56
+ For mutable backends (Pandas) the original may be mutated and returned.
57
+ For immutable backends (Polars) a new DataFrame is returned.
58
+ """
59
+ ...
60
+
61
+ @abstractmethod
62
+ def has_column(self, df: Any, col: str) -> bool:
63
+ """Check whether *col* exists as a column name."""
64
+ ...
65
+
66
+ @abstractmethod
67
+ def column_names(self, df: Any) -> List[str]:
68
+ """Return the list of column names."""
69
+ ...
70
+
71
+ @abstractmethod
72
+ def num_rows(self, df: Any) -> int:
73
+ """Return the number of rows."""
74
+ ...
75
+
76
+ @abstractmethod
77
+ def num_cols(self, df: Any) -> int:
78
+ """Return the number of columns."""
79
+ ...
80
+
81
+ # ------------------------------------------------------------------
82
+ # Iteration / conversion
83
+ # ------------------------------------------------------------------
84
+
85
+ @abstractmethod
86
+ def head(self, df: Any, n: int = 5) -> Any:
87
+ """Return the first *n* rows."""
88
+ ...
89
+
90
+ @abstractmethod
91
+ def equals(self, df1: Any, df2: Any) -> bool:
92
+ """Check data equality between two DataFrames."""
93
+ ...
94
+
95
+ # ------------------------------------------------------------------
96
+ # Pandera validation
97
+ # ------------------------------------------------------------------
98
+
99
+ @abstractmethod
100
+ def build_pandera_schema(
101
+ self,
102
+ fr_schema: Dict[str, dict],
103
+ df: Optional[Any] = None,
104
+ check_types: bool = True,
105
+ strict: bool = False,
106
+ ) -> Any:
107
+ """Build a Pandera DataFrameSchema from the parsed Schema schema.
108
+
109
+ Args:
110
+ fr_schema: The ``_fr_schema`` dict from a Schema subclass.
111
+ df: Optional native dataframe (used by narwhals to detect backend type).
112
+ check_types: Whether to include dtype checks.
113
+ strict: If True, reject DataFrames with columns not in the schema.
114
+
115
+ Returns:
116
+ A ``pandera.DataFrameSchema`` instance for this backend.
117
+ """
118
+ ...
119
+
120
+ @abstractmethod
121
+ def validate_with_pandera(
122
+ self,
123
+ df: Any,
124
+ pandera_schema: Any,
125
+ lazy: bool = True,
126
+ ) -> None:
127
+ """Run Pandera validation and translate errors into Schema exceptions.
128
+
129
+ Args:
130
+ df: The DataFrame to validate.
131
+ pandera_schema: The Pandera schema to validate against.
132
+ lazy: If True, collect all errors before raising.
133
+
134
+ Raises:
135
+ MissingColumnError, TypeMismatchError, ConstraintViolationError
136
+ """
137
+ ...
138
+
139
+ # ------------------------------------------------------------------
140
+ # Type coercion
141
+ # ------------------------------------------------------------------
142
+
143
+ @abstractmethod
144
+ def coerce_column(
145
+ self,
146
+ df: Any,
147
+ col: str,
148
+ inner_type: Type,
149
+ errors: str = "raise",
150
+ nullable: bool = True,
151
+ ) -> Any:
152
+ """Coerce a single column to match *inner_type*. Returns the (possibly new) DataFrame."""
153
+ ...
154
+
155
+ # ------------------------------------------------------------------
156
+ # Schema introspection
157
+ # ------------------------------------------------------------------
158
+
159
+ @abstractmethod
160
+ def schema_info_to_dataframe(self, rows: List[dict]) -> Any:
161
+ """Convert a list of schema-info dicts into a backend-native DataFrame."""
162
+ ...
163
+
164
+ # ------------------------------------------------------------------
165
+ # Materialisation
166
+ # ------------------------------------------------------------------
167
+
168
+ @abstractmethod
169
+ def collect(self, df: Any) -> Any:
170
+ """Materialise a lazy frame into an eager one.
171
+
172
+ For eager backends (Pandas, Polars DataFrame) this should return
173
+ *df* unchanged. For lazy backends (Polars LazyFrame) this should
174
+ call ``df.collect()``.
175
+ """
176
+ ...
@@ -0,0 +1,254 @@
1
+ """Narwhals backend adapter for Schema.
2
+
3
+ This backend handles narwhals DataFrames (nw.DataFrame) for users who want
4
+ backend-agnostic code. For native pandas or polars functionality, use the
5
+ PandasBackend or PolarsBackend respectively.
6
+
7
+ Users would use this by wrapping their DataFrame:
8
+ import narwhals as nw
9
+ import pandas as pd
10
+
11
+ df = pd.DataFrame({"a": [1, 2, 3]})
12
+ nw_df = nw.from_native(df)
13
+
14
+ class MyFrame(Schema):
15
+ a: Col[int]
16
+
17
+ frame = MyFrame(nw_df) # Uses NarwhalsBackend
18
+ frame.a # Returns nw.Series
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from datetime import date, datetime
24
+ from typing import Any, Dict, List
25
+
26
+ import narwhals as nw
27
+
28
+ from ..exceptions import ConstraintViolationError, MissingColumnError, TypeMismatchError
29
+ from .base import BackendAdapter
30
+
31
+
32
+ class NarwhalsBackend(BackendAdapter):
33
+ """Backend adapter for narwhals DataFrames (backend-agnostic operations)."""
34
+
35
+ @property
36
+ def name(self) -> str:
37
+ return "narwhals"
38
+
39
+ # DataFrame operations
40
+ def copy(self, df: nw.DataFrame) -> nw.DataFrame:
41
+ return df.clone() if hasattr(df, "clone") else df
42
+
43
+ def get_column(self, df: nw.DataFrame, col: str) -> nw.Series:
44
+ return df[col]
45
+
46
+ def get_column_ref(self, df: nw.DataFrame, col: str) -> nw.Series:
47
+ return df[col]
48
+
49
+ def set_column(self, df: nw.DataFrame, col: str, value: Any) -> nw.DataFrame:
50
+ if isinstance(value, nw.Series):
51
+ return df.with_columns(value.alias(col))
52
+ # Let narwhals handle scalars/arrays
53
+ return df.with_columns(**{col: value})
54
+
55
+ def has_column(self, df: nw.DataFrame, col: str) -> bool:
56
+ # For LazyFrames, use collect_schema().names() to avoid performance warning
57
+ if hasattr(df, "collect_schema"):
58
+ return col in df.collect_schema().names()
59
+ return col in df.columns
60
+
61
+ def column_names(self, df: nw.DataFrame) -> List[str]:
62
+ return list(df.columns)
63
+
64
+ def num_rows(self, df: nw.DataFrame) -> int:
65
+ return len(df)
66
+
67
+ def num_cols(self, df: nw.DataFrame) -> int:
68
+ return len(df.columns)
69
+
70
+ # Iteration / conversion
71
+ def head(self, df: nw.DataFrame, n: int = 5) -> nw.DataFrame:
72
+ return df.head(n)
73
+
74
+ def equals(self, df1: nw.DataFrame, df2: nw.DataFrame) -> bool:
75
+ # Compare via native
76
+ return df1.to_native().equals(df2.to_native())
77
+
78
+ # Pandera validation (narwhals wraps native, so validate the native)
79
+ def build_pandera_schema(
80
+ self,
81
+ fr_schema: Dict[str, dict],
82
+ df: nw.DataFrame,
83
+ check_types: bool = True,
84
+ strict: bool = False,
85
+ ) -> Any:
86
+ """Build pandera schema based on the underlying native DataFrame."""
87
+ native = df.to_native()
88
+ is_polars = (
89
+ hasattr(native, "__class__") and "polars" in native.__class__.__module__
90
+ )
91
+
92
+ if is_polars:
93
+ import pandera.polars as pa
94
+ import polars as pl
95
+
96
+ dtype_map: Dict[type, Any] = {
97
+ int: pl.Int64,
98
+ float: pl.Float64,
99
+ str: pl.String,
100
+ bool: pl.Boolean,
101
+ datetime: pl.Datetime,
102
+ date: pl.Date,
103
+ }
104
+ else:
105
+ import pandera.pandas as pa
106
+
107
+ dtype_map: Dict[type, Any] = {
108
+ int: int,
109
+ float: float,
110
+ str: str,
111
+ bool: bool,
112
+ datetime: "datetime64[ns]",
113
+ date: "datetime64[ns]",
114
+ }
115
+
116
+ columns: Dict[str, pa.Column] = {}
117
+
118
+ for attr_name, meta in fr_schema.items():
119
+ df_col: str = meta["df_col"]
120
+ inner_type = meta["inner_type"]
121
+ fi = meta["field_info"]
122
+ is_optional: bool = meta["is_optional"]
123
+
124
+ checks: List[Any] = []
125
+
126
+ if fi.ge is not None:
127
+ checks.append(pa.Check.ge(fi.ge))
128
+ if fi.gt is not None:
129
+ checks.append(pa.Check.gt(fi.gt))
130
+ if fi.le is not None:
131
+ checks.append(pa.Check.le(fi.le))
132
+ if fi.lt is not None:
133
+ checks.append(pa.Check.lt(fi.lt))
134
+ if fi.isin is not None:
135
+ checks.append(pa.Check.isin(fi.isin))
136
+ if fi.regex is not None:
137
+ checks.append(pa.Check.str_matches(fi.regex))
138
+ if fi.min_length is not None or fi.max_length is not None:
139
+ checks.append(
140
+ pa.Check.str_length(
141
+ min_value=fi.min_length,
142
+ max_value=fi.max_length,
143
+ )
144
+ )
145
+
146
+ pa_dtype: Any = None
147
+ if check_types and inner_type is not None:
148
+ pa_dtype = dtype_map.get(inner_type)
149
+ if not is_polars and inner_type is bool and fi.nullable:
150
+ pa_dtype = "boolean"
151
+
152
+ columns[df_col] = pa.Column(
153
+ dtype=pa_dtype,
154
+ checks=checks or None,
155
+ nullable=fi.nullable,
156
+ unique=fi.unique,
157
+ required=not is_optional,
158
+ coerce=False,
159
+ )
160
+
161
+ return pa.DataFrameSchema(columns=columns, strict=strict)
162
+
163
+ def validate_with_pandera(
164
+ self,
165
+ df: nw.DataFrame,
166
+ pandera_schema: Any,
167
+ lazy: bool = True,
168
+ ) -> None:
169
+ from pandera import errors
170
+
171
+ native = df.to_native()
172
+
173
+ try:
174
+ pandera_schema.validate(native, lazy=lazy)
175
+ except errors.SchemaErrors as exc:
176
+ self._translate_pandera_errors(exc)
177
+ except errors.SchemaError as exc:
178
+ self._translate_single_pandera_error(exc)
179
+
180
+ def _translate_pandera_errors(self, exc: Any) -> None:
181
+ fc = exc.failure_cases
182
+ if hasattr(fc, "to_pandas"):
183
+ fc = fc.to_pandas()
184
+
185
+ missing_mask = fc["check"] == "column_in_dataframe"
186
+ if missing_mask.any():
187
+ missing_cols = sorted(
188
+ fc.loc[missing_mask, "failure_case"].unique().tolist()
189
+ )
190
+ raise MissingColumnError(
191
+ f"Missing required columns: {missing_cols}"
192
+ ) from exc
193
+
194
+ dtype_mask = fc["check"].str.startswith("dtype(", na=False)
195
+ if dtype_mask.any():
196
+ row = fc.loc[dtype_mask].iloc[0]
197
+ raise TypeMismatchError(
198
+ f"Column '{row['column']}' dtype mismatch: {row['check']}"
199
+ ) from exc
200
+
201
+ if len(fc) > 0:
202
+ row = fc.iloc[0]
203
+ col = row.get("column", "?")
204
+ check = row.get("check", "?")
205
+ raise ConstraintViolationError(
206
+ f"Column '{col}' failed check: {check}"
207
+ ) from exc
208
+
209
+ def _translate_single_pandera_error(self, exc: Any) -> None:
210
+ msg = str(exc)
211
+ if "not in dataframe" in msg or "column_in_dataframe" in msg:
212
+ raise MissingColumnError(msg) from exc
213
+ elif "dtype" in msg.lower():
214
+ raise TypeMismatchError(msg) from exc
215
+ else:
216
+ raise ConstraintViolationError(msg) from exc
217
+
218
+ # Type coercion
219
+ def coerce_column(
220
+ self,
221
+ df: nw.DataFrame,
222
+ col: str,
223
+ target_type: type,
224
+ ) -> nw.DataFrame:
225
+ """Coerce a column to target type using narwhals."""
226
+ # Narwhals dtype mapping
227
+ dtype_map = {
228
+ int: nw.Int64,
229
+ float: nw.Float64,
230
+ str: nw.String,
231
+ bool: nw.Boolean,
232
+ }
233
+
234
+ target_dtype = dtype_map.get(target_type)
235
+ if target_dtype is None:
236
+ return df
237
+
238
+ return df.with_columns(df[col].cast(target_dtype).alias(col))
239
+
240
+ # Lazy evaluation
241
+ def is_lazy(self, df: Any) -> bool:
242
+ # Narwhals DataFrames are eager
243
+ return False
244
+
245
+ def collect(self, df: nw.DataFrame) -> nw.DataFrame:
246
+ # Narwhals DataFrames are already collected
247
+ return df
248
+
249
+ def schema_info_to_dataframe(self, rows: List[dict]) -> nw.DataFrame:
250
+ """Convert schema info rows to narwhals DataFrame."""
251
+ import pandas as pd
252
+
253
+ pd_df = pd.DataFrame(rows)
254
+ return nw.from_native(pd_df)