frameright 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. frameright-0.3.0/LICENSE.txt +21 -0
  2. frameright-0.3.0/PKG-INFO +664 -0
  3. frameright-0.3.0/README.md +617 -0
  4. frameright-0.3.0/pyproject.toml +127 -0
  5. frameright-0.3.0/setup.cfg +4 -0
  6. frameright-0.3.0/src/frameright/__init__.py +31 -0
  7. frameright-0.3.0/src/frameright/backends/__init__.py +11 -0
  8. frameright-0.3.0/src/frameright/backends/base.py +176 -0
  9. frameright-0.3.0/src/frameright/backends/narwhals_backend.py +254 -0
  10. frameright-0.3.0/src/frameright/backends/pandas_backend.py +309 -0
  11. frameright-0.3.0/src/frameright/backends/polars_eager_backend.py +314 -0
  12. frameright-0.3.0/src/frameright/backends/polars_lazy_backend.py +321 -0
  13. frameright-0.3.0/src/frameright/backends/registry.py +77 -0
  14. frameright-0.3.0/src/frameright/core.py +487 -0
  15. frameright-0.3.0/src/frameright/exceptions.py +37 -0
  16. frameright-0.3.0/src/frameright/narwhals/__init__.py +35 -0
  17. frameright-0.3.0/src/frameright/narwhals/eager.py +89 -0
  18. frameright-0.3.0/src/frameright/narwhals/lazy.py +89 -0
  19. frameright-0.3.0/src/frameright/pandas/__init__.py +95 -0
  20. frameright-0.3.0/src/frameright/polars/__init__.py +35 -0
  21. frameright-0.3.0/src/frameright/polars/eager.py +88 -0
  22. frameright-0.3.0/src/frameright/polars/lazy.py +88 -0
  23. frameright-0.3.0/src/frameright/polars_eager/__init__.py +55 -0
  24. frameright-0.3.0/src/frameright/py.typed +0 -0
  25. frameright-0.3.0/src/frameright/typing/__init__.py +50 -0
  26. frameright-0.3.0/src/frameright/typing/narwhals.py +43 -0
  27. frameright-0.3.0/src/frameright/typing/pandas.py +19 -0
  28. frameright-0.3.0/src/frameright/typing/polars.py +14 -0
  29. frameright-0.3.0/src/frameright/typing/polars_eager.py +43 -0
  30. frameright-0.3.0/src/frameright/typing/polars_lazy.py +47 -0
  31. frameright-0.3.0/src/frameright.egg-info/PKG-INFO +664 -0
  32. frameright-0.3.0/src/frameright.egg-info/SOURCES.txt +40 -0
  33. frameright-0.3.0/src/frameright.egg-info/dependency_links.txt +1 -0
  34. frameright-0.3.0/src/frameright.egg-info/requires.txt +24 -0
  35. frameright-0.3.0/src/frameright.egg-info/top_level.txt +1 -0
  36. frameright-0.3.0/tests/test_coverage_additions.py +474 -0
  37. frameright-0.3.0/tests/test_future_annotations.py +187 -0
  38. frameright-0.3.0/tests/test_performance.py +425 -0
  39. frameright-0.3.0/tests/test_polars_backend.py +623 -0
  40. frameright-0.3.0/tests/test_static_typing_mypy.py +71 -0
  41. frameright-0.3.0/tests/test_static_typing_pyright.py +47 -0
  42. frameright-0.3.0/tests/test_structframe.py +1042 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 James Norman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,664 @@
1
+ Metadata-Version: 2.4
2
+ Name: frameright
3
+ Version: 0.3.0
4
+ Summary: A lightweight Object-DataFrame Mapper (ODM) with runtime validation, multi-backend support (Pandas, Polars), and static type checking.
5
+ Author-email: Your Name <your.email@example.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/yourusername/frameright
8
+ Project-URL: Documentation, https://frameright.readthedocs.io
9
+ Project-URL: Repository, https://github.com/yourusername/frameright
10
+ Project-URL: Bug Tracker, https://github.com/yourusername/frameright/issues
11
+ Keywords: pandas,polars,dataframe,typing,validation,schema,odm,pandera
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: License :: OSI Approved :: MIT License
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Intended Audience :: Developers
21
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE.txt
26
+ Requires-Dist: pandas>=1.5.0
27
+ Requires-Dist: pandera>=0.20.0
28
+ Requires-Dist: typing-extensions>=4.0.0
29
+ Provides-Extra: polars
30
+ Requires-Dist: polars>=0.20.0; extra == "polars"
31
+ Provides-Extra: narwhals
32
+ Requires-Dist: narwhals>=1.0.0; extra == "narwhals"
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=7.0; extra == "dev"
35
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
36
+ Requires-Dist: mypy>=1.0; extra == "dev"
37
+ Requires-Dist: pyright>=1.1.0; extra == "dev"
38
+ Requires-Dist: pandas-stubs>=2.0.0; extra == "dev"
39
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
40
+ Requires-Dist: pre-commit>=3.0; extra == "dev"
41
+ Requires-Dist: genbadge[coverage]>=1.1.0; extra == "dev"
42
+ Provides-Extra: docs
43
+ Requires-Dist: sphinx>=6.0; extra == "docs"
44
+ Requires-Dist: sphinx-rtd-theme>=1.2; extra == "docs"
45
+ Requires-Dist: sphinx-autodoc-typehints>=1.23; extra == "docs"
46
+ Dynamic: license-file
47
+
48
+ # FrameRight
49
+
50
+ <p align="center">
51
+ <img src="logo-banner.svg" alt="FrameRight - Type-Safe, Multi-Backend DataFrames" width="100%">
52
+ </p>
53
+
54
+ <p align="center">
55
+ <a href="https://github.com/yourusername/FrameRight/actions"><img src="https://github.com/yourusername/FrameRight/workflows/Tests/badge.svg" alt="Tests"></a>
56
+ <a href="./coverage-badge.svg"><img src="./coverage-badge.svg" alt="Coverage"></a>
57
+ <a href="https://badge.fury.io/py/FrameRight"><img src="https://badge.fury.io/py/FrameRight.svg" alt="PyPI version"></a>
58
+ <a href="https://pypi.org/project/FrameRight/"><img src="https://img.shields.io/pypi/pyversions/FrameRight.svg" alt="Python Versions"></a>
59
+ <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT"></a>
60
+ </p>
61
+
62
+ **A lightweight Object-DataFrame Mapper (ODM) for Pandas, Polars, and Narwhals.** Define your DataFrame schema as a Python class with typed attributes. Get full IDE autocomplete, catch column typos and type errors at edit-time with Pylance, Pyright, or mypy, validate data at runtime with production-grade Pandera validation, and write self-documenting data contracts — all while keeping native backend speed and APIs.
63
+
64
+ **Native Backend Support**: Works with **Pandas**, **Polars**, and **Narwhals** DataFrames. Column properties return native Series types (`pd.Series`, `pl.Series`, or `nw.Series`), so you get the full native API for your backend. FrameRight is an Object DataFrame Mapper — it provides typed attribute access to columns, not DataFrame abstraction.
65
+
66
+ **Multi-Backend or Backend-Agnostic**: Choose your style:
67
+
68
+ - Use **Pandas** → get `pd.Series` with pandas methods
69
+ - Use **Polars** → get `pl.Series` with polars methods
70
+ - Use **Narwhals** → get `nw.Series` for backend-agnostic code
71
+
72
+ **Runtime Validation Powered by Pandera**: Runtime validation uses [Pandera](https://pandera.readthedocs.io/) under the hood, giving you production-tested constraint checking with helpful error messages. FrameRight wraps Pandera's validation in a clean API while adding IDE-first typed column access.
73
+
74
+ ---
75
+
76
+ ## The Problem
77
+
78
+ Pandas DataFrames use string-based column lookups. This is fine for exploratory analysis, but in production code it means:
79
+
80
+ - **No autocomplete** — you have to remember every column name
81
+ - **Invisible to static type checkers** — Pylance, Pyright, and mypy cannot see inside `df["col"]` lookups. Typos like `df["reveneu"]` pass every check and only crash at runtime
82
+ - **No type information** — the IDE can't tell you what type a column contains
83
+ - **No documentation** — what does `"ltv_v2"` represent? Only the original author knows
84
+ - **No safe refactoring** — renaming a column means find-and-replace across the entire codebase
85
+
86
+ This creates enormous friction when DataFrames are passed between functions. Consider:
87
+
88
+ ```python
89
+ def calculate_revenue(df: pd.DataFrame) -> pd.DataFrame:
90
+ # What columns does df have? What types? What constraints?
91
+ # We have no idea without reading the caller.
92
+ df['total_revenue'] = df['item_price'] * df['quantity_sold'] # Typo? Who knows.
93
+ return df
94
+ ```
95
+
96
+ ---
97
+
98
+ ## The Solution
99
+
100
+ FrameRight lets you define your DataFrame schema as a standard Python class. Columns become real attributes with type hints and docstrings. Your IDE and static type checkers (Pylance, Pyright, mypy) treat them like any other Python class — meaning you get full autocomplete, hover documentation, and **edit-time error checking for column names and types**.
101
+
102
+ ```python
103
+ from frameright import Schema, Field
104
+ from frameright.typing import Col
105
+ from typing import Optional
106
+
107
+ class Orders(Schema):
108
+ item_price: Col[float]
109
+ """The price per unit of the item."""
110
+ quantity_sold: Col[int] = Field(ge=0)
111
+ """Number of units sold."""
112
+ revenue: Optional[Col[float]]
113
+ """Computed revenue per line item."""
114
+
115
+ orders = Orders(df)
116
+
117
+ # Full IDE autocomplete — type "orders." and see all columns
118
+ # Hover to see type and docstring
119
+ orders.revenue = orders.item_price * orders.quantity_sold
120
+ total = orders.revenue.sum()
121
+ ```
122
+
123
+ The real power is using these types deep in your codebase as strict contracts for function inputs and outputs:
124
+
125
+ ```python
126
+ def calculate_revenue(orders: Orders) -> RevenueResult:
127
+ # Self-documenting. The signature tells you exactly what goes in and comes out.
128
+ ...
129
+ return RevenueResult(...)
130
+ ```
131
+
132
+ ---
133
+
134
+ ## What Static Type Checkers Catch — Without Running Your Code
135
+
136
+ Because Schema columns are real class attributes, **static type checkers catch errors at edit-time**, just as they would for any Python class. This works out of the box with:
137
+
138
+ - **Pylance / Pyright** (VS Code)
139
+ - **mypy** (CI / command line)
140
+ - **PyCharm's built-in checker**
141
+
142
+ No plugins, no extensions, no configuration. If your type checker can analyse a Python class, it can analyse a Schema:
143
+
144
+ ```python
145
+ orders = Orders(df)
146
+
147
+ # Typo in column name
148
+ orders.reveneu
149
+ # ^^^^^^^ Error: Cannot access attribute "reveneu" for class "Orders"
150
+
151
+ # Wrong schema type passed to function
152
+ def process(risk: RiskProfile) -> None: ...
153
+ process(orders)
154
+ # ^^^^^^ Error: Argument of type "Orders" cannot be assigned to parameter "risk" of type "RiskProfile"
155
+
156
+ # Rename a column? Right-click → Rename Symbol. Done. Every reference updated.
157
+ ```
158
+
159
+ Compare this to standard Pandas, where **none of these errors are caught**:
160
+
161
+ ```python
162
+ df["reveneu"] # No error. Silent KeyError at runtime.
163
+ process(wrong_df) # No error. It's just a DataFrame.
164
+ # Rename a column? Good luck with find-and-replace across 50 files.
165
+ ```
166
+
167
+ | Capability | Standard Pandas | FrameRight |
168
+ | ------------------------------------ | ------------------- | ---------------------------------------------- |
169
+ | Autocomplete on column names | No | Yes |
170
+ | Errors on typos (before running) | No | Yes — via Pylance/mypy |
171
+ | Hover to see column type + docstring | No | Yes |
172
+ | Rename-symbol refactoring | No | Yes |
173
+ | Find all references to a column | No | Yes |
174
+ | Type-safe function signatures | No (`pd.DataFrame`) | Yes (`Orders` vs `Revenue` are distinct types) |
175
+
176
+ **This is FrameRight's core value.** It turns the entire Python static analysis ecosystem — Pylance, Pyright, mypy, PyCharm — into your DataFrame tooling layer. No plugins required. If your type checker can analyse a Python class, it can analyse a Schema.
177
+
178
+ ---
179
+
180
+ ## Installation
181
+
182
+ ```bash
183
+ # For Pandas backend (default, always installed)
184
+ pip install frameright
185
+
186
+ # For Polars backend (optional)
187
+ pip install frameright[polars]
188
+
189
+ # For Narwhals backend-agnostic support (optional)
190
+ pip install frameright[narwhals]
191
+ ```
192
+
193
+ **Backend Selection** — Choose your approach:
194
+
195
+ ```python
196
+ from frameright.pandas import Schema
197
+
198
+ class Sales(Schema): # Always uses pandas
199
+ revenue: Col[float]
200
+
201
+ from frameright.polars.eager import Schema
202
+ class Sales(Schema): # Always uses polars eager (pl.DataFrame)
203
+ revenue: Col[float]
204
+
205
+ from frameright.polars.lazy import Schema
206
+ class Sales(Schema): # Always uses polars lazy (pl.LazyFrame)
207
+ revenue: Col[float]
208
+ ```
209
+
210
+ **What you get**:
211
+
212
+ - `pd.DataFrame` → columns return `pd.Series`
213
+ - `pl.DataFrame` → columns return `pl.Series`
214
+ - `pl.LazyFrame` → columns return `pl.Expr` (lazy expressions)
215
+ - `nw.DataFrame` → columns return `nw.Series` (backend-agnostic)
216
+
217
+ ---
218
+
219
+ ## Quick Start
220
+
221
+ **With Pandas:**
222
+
223
+ ```python
224
+ import pandas as pd
225
+ from frameright import Schema, Field
226
+ from frameright.typing import Col
227
+ from typing import Optional
228
+
229
+ # 1. Define your schema
230
+ class OrderData(Schema):
231
+ order_id: Col[int] = Field(unique=True)
232
+ """Unique order identifier."""
233
+ customer_id: Col[int]
234
+ """Customer who placed the order."""
235
+ item_price: Col[float] = Field(ge=0)
236
+ """Price per unit (must be non-negative)."""
237
+ quantity_sold: Col[int] = Field(ge=1)
238
+ """Number of units sold (at least 1)."""
239
+ revenue: Optional[Col[float]]
240
+ """Computed revenue (optional — may not exist yet)."""
241
+
242
+ # 2. Wrap your data (validates with Pandera on construction)
243
+ raw_df = pd.DataFrame({
244
+ 'order_id': [1, 2, 3],
245
+ 'customer_id': [101, 102, 101],
246
+ 'item_price': [15.50, 42.00, 9.99],
247
+ 'quantity_sold': [2, 1, 5]
248
+ })
249
+ orders = OrderData(raw_df)
250
+
251
+ # 3. Work with typed, documented columns
252
+ orders.revenue = orders.item_price * orders.quantity_sold
253
+ total = orders.revenue.sum()
254
+ ```
255
+
256
+ **With Polars (eager - recommended for interactive use):**
257
+
258
+ ```python
259
+ import polars as pl
260
+ from frameright.polars.eager import Schema, Col, Field
261
+ from typing import Optional
262
+
263
+ # Same schema definition (for brevity, showing abbreviated version)
264
+ class OrderData(Schema): # Explicitly uses Polars eager backend
265
+ order_id: Col[int] = Field(unique=True)
266
+ item_price: Col[float] = Field(ge=0)
267
+ quantity_sold: Col[int] = Field(ge=1)
268
+ revenue: Optional[Col[float]]
269
+
270
+ raw_df = pl.DataFrame({
271
+ 'order_id': [1, 2, 3],
272
+ 'customer_id': [101, 102, 101],
273
+ 'item_price': [15.50, 42.00, 9.99],
274
+ 'quantity_sold': [2, 1, 5]
275
+ })
276
+
277
+ # Uses Polars eager backend (set by importing from frameright.polars.eager)
278
+ orders = OrderData(raw_df)
279
+ orders.revenue = orders.item_price * orders.quantity_sold # pl.Series operations
280
+ total = orders.revenue.sum() # Polars .sum() method
281
+ ```
282
+
283
+ **With Narwhals (backend-agnostic code):**
284
+
285
+ ```python
286
+ import narwhals as nw
287
+ import pandas as pd # or polars, duckdb, etc.
288
+ from frameright.narwhals.eager import Schema, Col, Field
289
+ from typing import Optional
290
+
291
+ # Schema with narwhals types
292
+ class OrderData(Schema): # Uses Narwhals backend
293
+ order_id: Col[int] = Field(unique=True)
294
+ item_price: Col[float] = Field(ge=0)
295
+ quantity_sold: Col[int] = Field(ge=1)
296
+ revenue: Optional[Col[float]]
297
+
298
+ # Wrap any DataFrame with narwhals for backend-agnostic operations
299
+ raw_df = pd.DataFrame({'order_id': [1, 2, 3], ...})
300
+ nw_df = nw.from_native(raw_df)
301
+
302
+ # Uses Narwhals backend
303
+ orders = OrderData(nw_df)
304
+ orders.revenue = orders.item_price * orders.quantity_sold # nw.Series operations
305
+ total = orders.revenue.sum() # Backend-agnostic .sum()
306
+ ```
307
+
308
+ **Type Hints Matter**: Use the appropriate `Col` import for your backend to get IDE autocomplete:
309
+
310
+ - `from frameright.typing import Col` → pandas methods (default)
311
+ - `from frameright.typing.polars_eager import Col` → polars eager (Series) methods
312
+ - `from frameright.typing.polars_lazy import Col` → polars lazy (Expr) methods
313
+ - `from frameright.typing.narwhals import Col` → narwhals methods
314
+
315
+ **Typing note (important):** Pandas has mature type stubs, so type checkers can treat attribute accessors like `orders.amount` as `pd.Series[float]` in many cases.
316
+ Polars and Narwhals do not currently expose fully generic `Series[T]` / `Expr[T]` types upstream, so type checkers typically see `pl.Series` / `pl.Expr` / `nw.Series` (inner type is best-effort today). FrameRight still gives you safe column _names_, schema-level `Col[T]` annotations, and IDE autocomplete.
317
+
318
+ **Type Safety:** Explicit imports from backend modules like `frameright.pandas`, `frameright.polars.eager`, or `frameright.polars.lazy` give you the strongest type guarantees. Each backend module provides its own `Schema` class optimized for that backend.
319
+
320
+ **Using Native DataFrame Operations:**
321
+
322
+ Schema is an **Object DataFrame Mapper** — it provides typed attribute access to columns, not DataFrame abstraction. Use native DataFrame methods for operations:
323
+
324
+ ```python
325
+ # Filtering - use native methods
326
+ filtered_orders = OrderData(orders.fr_data[orders.revenue > 100], validate=False) # Pandas
327
+ filtered_orders = OrderData(orders.fr_data.filter(orders.revenue > 100), validate=False) # Polars
328
+
329
+ # Exporting - use native methods
330
+ orders.fr_data.to_csv('output.csv') # Pandas
331
+ orders.fr_data.write_csv('output.csv') # Polars
332
+
333
+ # Grouping - use native methods
334
+ orders.fr_data.groupby(orders.customer_id).sum() # Pandas
335
+ orders.fr_data.group_by(orders.customer_id).sum() # Polars
336
+
337
+ # LazyFrame collection - use native methods
338
+ lazy_orders = OrderData(pl.scan_csv('orders.csv')) # Still lazy
339
+ result = lazy_orders.fr_data.collect() # Executes query plan
340
+ ```
341
+
342
+ The `fr_data` property gives you direct access to the underlying DataFrame.
343
+
344
+ ---
345
+
346
+ ## Performance
347
+
348
+ FrameRight is designed to add type safety with minimal overhead:
349
+
350
+ - **Memory**: 48 bytes per Schema instance — same whether wrapping 1,000 or 1,000,000 rows
351
+ - **Column access**: Adds ~0.2 microseconds per property access (negligible)
352
+ - **Construction**: Sub-millisecond without validation; 25-51ms for 100k rows with validation
353
+ - **Operations**: Run at native backend speed — no overhead once you have the Series
354
+
355
+ **Detailed benchmarks** (100,000 rows):
356
+
357
+ | Metric | Raw DataFrame | FrameRight Wrapper | Overhead |
358
+ | ------------------------------ | ------------- | ------------------ | ---------------- |
359
+ | Memory (100k rows) | 3.05 MB | 3.05 MB | 48 bytes (0.00%) |
360
+ | Single column access | 9.36 μs | 9.59 μs | +0.23 μs |
361
+ | Column operation (`sum()`) | 55.3 μs | 55.3 μs | ~0 μs |
362
+ | Construction (no validation) | — | 0.3 μs | — |
363
+ | Construction (with validation) | — | 13-51 ms | — |
364
+
365
+ **Polars backend** (100,000 rows):
366
+
367
+ | Metric | Polars | FrameRight Wrapper | Overhead |
368
+ | ------------------------------ | ------- | ------------------ | -------- |
369
+ | Construction (with validation) | — | 5.3 ms | — |
370
+ | Column access | 0.43 μs | 0.64 μs | +0.21 μs |
371
+
372
+ Polars is ~5x faster than Pandas for validation and ~20x faster for column access.
373
+
374
+ **Best practice**: Validate once at the entry point (`validate=True`), then use `validate=False` for internal operations. This gives you type safety and validation guarantees without paying the validation cost repeatedly.
375
+
376
+ **Note**: These measurements are per-instance overhead (the cost of each Schema wrapper). There is also a one-time cost to import FrameRight and its dependencies (Pandera, etc.), which is amortized across all Schema instances.
377
+
378
+ See [tests/test_performance.py](tests/test_performance.py) for the complete benchmark suite.
379
+
380
+ ---
381
+
382
+ ## Key Features
383
+
384
+ ### IDE-First Design
385
+
386
+ - **Autocomplete** — type `orders.` and see every column with its type and docstring
387
+ - **Static error checking** — Pylance/mypy catch typos and type mismatches before runtime
388
+ - **Hover documentation** — see column descriptions inline, no need to look up the schema
389
+ - **Rename symbol** — refactor column names across your entire codebase safely
390
+ - **Find all references** — see everywhere a column is used
391
+
392
+ ### Runtime Validation (Powered by Pandera)
393
+
394
+ - **Production-grade validation** — uses [Pandera](https://pandera.readthedocs.io/), the industry-standard DataFrame validation library
395
+ - **Type checking** — column dtypes are validated against annotations on construction
396
+ - **Field constraints** — Pydantic-style rules via `Field()`: `ge`, `gt`, `le`, `lt`, `isin`, `regex`, `min_length`, `max_length`, `nullable`, `unique`
397
+ - **Helpful error messages** — clear, actionable validation failures with row/column context
398
+ - **Specific exceptions** — `MissingColumnError`, `TypeMismatchError`, `ConstraintViolationError`
399
+ - **Multi-backend** — works with `pandera.pandas` for Pandas and `pandera.polars` for Polars
400
+
401
+ ### Data Handling
402
+
403
+ - **Native backend support** — works with Pandas, Polars, and Narwhals DataFrames transparently
404
+ - **Native Series types** — column properties return native types: `pd.Series`, `pl.Series`, or `nw.Series`
405
+ - **Full backend API** — use native methods on columns (e.g., `orders.revenue.mean()` uses pandas/polars/narwhals methods)
406
+ - **Object DataFrame Mapper** — Schema maps typed attributes to columns, it doesn't abstract the DataFrame
407
+ - **Column aliasing** — map clean attribute names to messy column names with `Field(alias="UGLY_COL_NAME")`
408
+ - **Optional columns** — `Optional[Col[T]]` for columns that may not exist; returns `None` safely
409
+ - **Type coercion** — `coerce=True` in constructor auto-converts dtypes to match the schema
410
+ - **Schema introspection** — `fr_schema_info()` returns a list of dicts describing the schema
411
+ - **Native backend speed** — column access maps directly to vectorized operations (pandas, polars, or narwhals)
412
+
413
+ ---
414
+
415
+ ## How It Compares
416
+
417
+ | | **Schema** | **Pandera** | **Pydantic v2** |
418
+ | ------------------------------- | ----------------------------------------------------------------- | ------------------------------------------------------------------------ | -------------------------------- |
419
+ | **Purpose** | Typed DataFrame access (ODM) + validation | DataFrame validation | Row-oriented data validation |
420
+ | **IDE autocomplete on columns** | Yes | No — validator only, not an accessor | N/A — no column concept |
421
+ | **Static error checking** | Yes — Pylance/mypy catch typos | No — column names are still strings internally | N/A |
422
+ | **Column access** | `orders.revenue` → native `pd.Series` / `pl.Series` / `nw.Series` | Not designed for column access | Not designed for columnar data |
423
+ | **Runtime validation** | Yes (uses Pandera internally) | Yes (more extensive: lazy validation, custom checks, hypothesis testing) | Yes (row-by-row) |
424
+ | **Performance at scale** | Native backend (vectorized) | Native backend (vectorized) | Slow — O(n) model instantiations |
425
+ | **Backend** | Pandas, Polars, Narwhals | Pandas, Polars, PySpark, Modin, Dask | Backend-agnostic (row-oriented) |
426
+ | **Developer experience** | Pydantic-like API + strong typing | Schema-centric, validation-focused | Models over DataFrames |
427
+
428
+ **Schema uses Pandera for validation.** Think of FrameRight as a developer-friendly wrapper around Pandera that adds IDE-first typed column access and a Pydantic-like API. You get the best of both worlds: production-tested validation from Pandera plus the ergonomics of typed Python classes.
429
+
430
+ For advanced Pandera features (custom checks, hypothesis testing, lazy error collection), use Pandera directly alongside FrameRight (for example by validating `obj.fr_data` or by applying additional Pandera checks in your pipeline).
431
+
432
+ ---
433
+
434
+ ## Why Polars?
435
+
436
+ Polars is a modern DataFrame library written in Rust that offers significant performance advantages over Pandas:
437
+
438
+ - **10-100x faster** for many operations, especially on larger datasets (1M+ rows)
439
+ - **Lazy evaluation** — build complex query plans that are optimized before execution
440
+ - **Parallel execution** — automatically uses all CPU cores
441
+ - **Memory efficiency** — better memory layout and columnar processing
442
+ - **Growing ecosystem** — rapidly becoming the standard for high-performance data processing in Python
443
+
444
+ FrameRight makes it trivial to switch between Pandas and Polars. Define your schema once, and you can use whichever backend fits your needs:
445
+
446
+ ```python
447
+ # Development: use Pandas for its familiar API and extensive ecosystem
448
+ from frameright.pandas import Schema, Col
449
+ ...
450
+ df_pandas = pd.read_csv("data.csv")
451
+ orders = OrderData(df_pandas)
452
+
453
+ # Production: use Polars for better performance on large datasets
454
+ from frameright.polars import Schema, Col
455
+ ....
456
+ df_polars = pl.read_csv("data.csv")
457
+ orders = OrderData(df_polars) # Same schema, different backend
458
+ ```
459
+
460
+ No code changes required. The schema class is backend-agnostic.
461
+
462
+ ## Why Narwhals?
463
+
464
+ [Narwhals](https://narwhals-dev.github.io/narwhals/) is a lightweight compatibility layer that lets you write backend-agnostic DataFrame code. It provides a unified API that works across Pandas, Polars, DuckDB, and more.
465
+
466
+ **When to use Narwhals with FrameRight:**
467
+
468
+ - You're building a **library** that needs to work with any DataFrame type
469
+ - You want to write **portable data pipelines** that work unchanged on Pandas, Polars, or DuckDB
470
+ - You need to **switch backends** without rewriting code
471
+
472
+ **FrameRight's Approach to Narwhals:**
473
+
474
+ FrameRight is an **Object DataFrame Mapper**, not a DataFrame abstraction layer. This means:
475
+
476
+ - **Native backends preferred**: If you're using Pandas, use the PandasBackend and get `pd.Series` with pandas methods
477
+ - **Native backends preferred**: If you're using Polars, use the PolarsBackend and get `pl.Series` with polars methods
478
+ - **Narwhals for portability**: If you need backend-agnostic code, use the NarwhalsBackend and get `nw.Series`
479
+
480
+ ```python
481
+ # Pandas users: get pd.Series with pandas API
482
+ import pandas as pd
483
+ from frameright.pandas import Col # defaults to pd.Series[T]
484
+
485
+ # Polars users: get pl.Series with polars eager API
486
+ import polars as pl
487
+ from frameright.polars.eager import Col # pl.Series (with polars autocomplete)
488
+
489
+ # Backend-agnostic users: get nw.Series with narwhals API
490
+ import narwhals as nw
491
+ from frameright.narwhals.eager import Col # nw.Series (with narwhals autocomplete)
492
+ ```
493
+
494
+ **The value of FrameRight is typed attribute access**, not DataFrame abstraction. Choose the backend that fits your needs, and FrameRight gives you typed column access with full IDE support.
495
+
496
+ ## Why Pandera?
497
+
498
+ [Pandera](https://pandera.readthedocs.io/) is the industry-standard DataFrame validation library with:
499
+
500
+ - **Battle-tested** — used by thousands of data teams in production
501
+ - **Clear error messages** — get actionable feedback with row/column context when validation fails
502
+ - **Extensive validation** — supports complex constraints, custom checks, and hypothesis testing
503
+ - **Multi-backend** — works with Pandas, Polars, PySpark, Modin, and Dask
504
+ - **Active development** — regular releases and strong community support
505
+
506
+ FrameRight leverages Pandera for all runtime validation, giving you production-grade constraint checking without the boilerplate. You get:
507
+
508
+ ```python
509
+ class Orders(Schema):
510
+ order_id: Col[int] = Field(unique=True) # Pandera checks uniqueness
511
+ item_price: Col[float] = Field(ge=0) # Pandera checks >= 0
512
+ currency: Col[str] = Field(isin=["USD", "EUR"]) # Pandera checks enum
513
+
514
+ orders = Orders(df) # Automatic Pandera validation
515
+ ```
516
+
517
+ Behind the scenes, FrameRight builds a Pandera schema from your class definition and runs validation on construction. You get all of Pandera's power with a cleaner, more Pydantic-like API.
518
+
519
+ ---
520
+
521
+ ## Advanced Usage
522
+
523
+ ### Data Validation & Constraints
524
+
525
+ ```python
526
+ class RiskProfile(Schema):
527
+ limit: Col[float] = Field(ge=0)
528
+ """Policy limit."""
529
+ premium: Col[float] = Field(gt=0)
530
+ """Annual premium."""
531
+ currency: Col[str] = Field(isin=["USD", "EUR", "GBP"])
532
+ country: Optional[Col[str]]
533
+
534
+ risk = RiskProfile(df) # Validates immediately
535
+ ```
536
+
537
+ ### Column Aliasing
538
+
539
+ ```python
540
+ class LegacyData(Schema):
541
+ user_id: Col[int] = Field(alias="USER_ID_V2")
542
+ signup_date: Col[str] = Field(alias="dt_signup_YYYYMMDD")
543
+
544
+ data = LegacyData(df)
545
+ print(data.user_id) # Accesses "USER_ID_V2" column
546
+ ```
547
+
548
+ ### Type Coercion
549
+
550
+ ```python
551
+ # CSV data where everything is a string
552
+ messy_df = pd.read_csv("data.csv")
553
+ orders = OrderData(messy_df, coerce=True) # Auto-converts dtypes
554
+ ```
555
+
556
+ ### Schema Introspection
557
+
558
+ ```python
559
+ for col in OrderData.fr_schema_info():
560
+ print(col["attribute"], col["type"], col["required"])
561
+ # order_id int True
562
+ # customer_id int True
563
+ # ...
564
+ ```
565
+
566
+ Returns a list of dicts with keys: `attribute`, `column`, `type`, `required`, `nullable`, `unique`, `constraints`.
567
+
568
+ ### The Escape Hatch
569
+
570
+ For complex operations like `.groupby()`, `.merge()`, or `.melt()`, you need access to a DataFrame object.
571
+
572
+ FrameRight provides an **escape hatche**:
573
+
574
+ **`fr_data` — Get the underlying dataframe**
575
+
576
+ ```python
577
+ import narwhals as nw
578
+
579
+ orders = OrderData(df)
580
+
581
+ # fr_data returns a narwhals DataFrame — same API for any backend
582
+ # Full IDE autocomplete via type stubs, zero-copy wrapper
583
+ nw_df = orders.fr_data
584
+ print(nw_df.columns) # ['order_id', 'customer_id', 'revenue']
585
+ print(nw_df.schema) # Column names → narwhals dtypes
586
+ filtered = nw_df.filter(nw.col('revenue') > 100)
587
+ grouped = nw_df.group_by('customer_id').agg(nw.col('revenue').sum())
588
+ ```
589
+
590
+ ### Type-Safe Group Keys
591
+
592
+ When working with native DataFrames, you can often pass the **column object itself** (recommended), instead of spelling a column name:
593
+
594
+ ```python
595
+ # No string literals, no `.name` needed
596
+ customer_totals = orders.fr_data.groupby(orders.customer_id).sum() # Pandas
597
+ customer_totals = orders.fr_data.group_by(orders.customer_id).sum() # Polars
598
+ ```
599
+
600
+ This also respects aliases automatically because the grouping key is derived from the real column:
601
+
602
+ ```python
603
+ class LegacyData(Schema):
604
+ user_id: Col[int] = Field(alias="USER_ID_V2")
605
+ total_spent: Col[float] = Field(alias="TOT_SPEND_USD")
606
+
607
+ data = LegacyData(df)
608
+
609
+ by_user = data.fr_data.groupby(data.user_id).sum() # Pandas
610
+ ```
611
+
612
+ **Benefits:**
613
+
614
+ - IDE autocomplete on `orders.customer_id`
615
+ - Static type checkers catch typos
616
+ - Respects aliases automatically
617
+ - No string literals anywhere in your code
618
+
619
+ ---
620
+
621
+ ## Exceptions
622
+
623
+ FrameRight raises specific exceptions for different failure modes:
624
+
625
+ | Exception | When It's Raised |
626
+ | -------------------------- | ----------------------------------------------------------------- |
627
+ | `MissingColumnError` | A required column is not in the DataFrame |
628
+ | `TypeMismatchError` | A column's dtype doesn't match the type annotation |
629
+ | `ConstraintViolationError` | A `Field()` constraint is violated (e.g., `ge`, `isin`, `unique`) |
630
+
631
+ All inherit from `ValidationError` → `StructFrameError` → `Exception`.
632
+
633
+ ---
634
+
635
+ ## Contributing
636
+
637
+ Pull requests are welcome! If you find a bug or have a feature request, please open an issue.
638
+
639
+ ```bash
640
+ # Development setup
641
+ git clone https://github.com/yourusername/frameright.git
642
+ cd frameright
643
+ pip install -e ".[dev,polars]"
644
+
645
+ # Run tests
646
+ make test
647
+
648
+ # Run tests with coverage report
649
+ make coverage
650
+
651
+ # Generate coverage badge (update before committing)
652
+ make badge
653
+
654
+ # Run type checking
655
+ make lint
656
+ ```
657
+
658
+ The coverage badge is automatically generated from test results. After making changes that affect coverage, run `make badge` to update [coverage-badge.svg](coverage-badge.svg) before committing.
659
+
660
+ ---
661
+
662
+ ## License
663
+
664
+ MIT License — see [LICENSE](LICENSE) for details.