planframe-polars 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ # Python bytecode / caches
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual environments
7
+ .venv/
8
+ venv/
9
+ ENV/
10
+ env/
11
+ .env/
12
+
13
+ # Packaging / build artifacts
14
+ build/
15
+ dist/
16
+ *.egg-info/
17
+ *.egg
18
+ pip-wheel-metadata/
19
+
20
+ # Test / coverage artifacts
21
+ .pytest_cache/
22
+ .coverage
23
+ .coverage.*
24
+ coverage.xml
25
+ htmlcov/
26
+
27
+ # Type check / lint caches
28
+ .mypy_cache/
29
+ .pyright/
30
+ .ruff_cache/
31
+
32
+ # Notebook checkpoints
33
+ .ipynb_checkpoints/
34
+
35
+ # IDE/editor settings
36
+ .vscode/
37
+ .idea/
38
+
39
+ # OS files
40
+ .DS_Store
41
+ Thumbs.db
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 PlanFrame Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,63 @@
1
+ Metadata-Version: 2.4
2
+ Name: planframe-polars
3
+ Version: 0.1.0
4
+ Summary: Polars backend adapter for PlanFrame.
5
+ Project-URL: Repository, https://github.com/eddiethedean/planframe
6
+ Project-URL: Documentation, https://github.com/eddiethedean/planframe/blob/main/README.md
7
+ Project-URL: Issues, https://github.com/eddiethedean/planframe/issues
8
+ Author: PlanFrame Contributors
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: planframe<0.2.0,>=0.1.0
18
+ Requires-Dist: polars>=0.20
19
+ Description-Content-Type: text/markdown
20
+
21
+ ## planframe-polars
22
+
23
+ Polars adapter package for PlanFrame. Import as `planframe_polars`.
24
+
25
+ ### Usage
26
+
27
+ ```python
28
+ import polars as pl
29
+ from dataclasses import dataclass
30
+
31
+ from planframe_polars import PolarsFrame
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class UserSchema:
36
+ id: int
37
+ age: int
38
+
39
+
40
+ lf = pl.DataFrame({"id": [1], "age": [2]}).lazy()
41
+ class User(PolarsFrame):
42
+ id: int
43
+ age: int
44
+
45
+ pf = User(lf)
46
+ df = pf.select("id").collect()
47
+
48
+ # Or construct from python data:
49
+ pf2 = User({"id": [1], "age": [2]})
50
+ ```
51
+
52
+ ### Execution model
53
+
54
+ PlanFrame is always lazy:
55
+ - Chaining methods (like `.select(...)`) does **not** run Polars operations.
56
+ - `collect()` evaluates the full plan. If the source is a `polars.LazyFrame`, this naturally compiles into a single lazy query before collecting.
57
+
58
+ ### Notes (Polars-specific)
59
+
60
+ - **Pivot**: `LazyFrame.pivot(...)` requires `on_columns` to be provided up-front (Polars must know the output schema prior to `collect()`). PlanFrame enforces this at execution time.
61
+ - **concat_vertical**: implemented via `polars.concat(..., how="vertical")`.
62
+ - **Join**: implemented via `LazyFrame.join(...)` / `DataFrame.join(...)` using the provided `on`, `how`, and `suffix`.
63
+
@@ -0,0 +1,43 @@
1
+ ## planframe-polars
2
+
3
+ Polars adapter package for PlanFrame. Import as `planframe_polars`.
4
+
5
+ ### Usage
6
+
7
+ ```python
8
+ import polars as pl
9
+ from dataclasses import dataclass
10
+
11
+ from planframe_polars import PolarsFrame
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class UserSchema:
16
+ id: int
17
+ age: int
18
+
19
+
20
+ lf = pl.DataFrame({"id": [1], "age": [2]}).lazy()
21
+ class User(PolarsFrame):
22
+ id: int
23
+ age: int
24
+
25
+ pf = User(lf)
26
+ df = pf.select("id").collect()
27
+
28
+ # Or construct from python data:
29
+ pf2 = User({"id": [1], "age": [2]})
30
+ ```
31
+
32
+ ### Execution model
33
+
34
+ PlanFrame is always lazy:
35
+ - Chaining methods (like `.select(...)`) does **not** run Polars operations.
36
+ - `collect()` evaluates the full plan. If the source is a `polars.LazyFrame`, this naturally compiles into a single lazy query before collecting.
37
+
38
+ ### Notes (Polars-specific)
39
+
40
+ - **Pivot**: `LazyFrame.pivot(...)` requires `on_columns` to be provided up-front (Polars must know the output schema prior to `collect()`). PlanFrame enforces this at execution time.
41
+ - **concat_vertical**: implemented via `polars.concat(..., how="vertical")`.
42
+ - **Join**: implemented via `LazyFrame.join(...)` / `DataFrame.join(...)` using the provided `on`, `how`, and `suffix`.
43
+
@@ -0,0 +1,7 @@
1
+ from planframe_polars.adapter import PolarsAdapter
2
+ from planframe_polars.frame import PolarsFrame
3
+
4
+ __all__ = [
5
+ "PolarsAdapter",
6
+ "PolarsFrame",
7
+ ]
@@ -0,0 +1,442 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Literal, cast
4
+
5
+ import polars as pl
6
+
7
+ from planframe.backend.adapter import BaseAdapter
8
+ from planframe.expr.api import Expr
9
+ from planframe_polars.compile_expr import compile_expr
10
+
11
+ PolarsBackendFrame = pl.DataFrame | pl.LazyFrame
12
+
13
+
14
+ class PolarsAdapter(BaseAdapter[PolarsBackendFrame, pl.Expr]):
15
+ name = "polars"
16
+
17
+ def _collect_df(self, df: PolarsBackendFrame) -> pl.DataFrame:
18
+ out = df.collect() if isinstance(df, pl.LazyFrame) else df
19
+ # Polars' type stubs may model `collect()` as returning an intermediate query.
20
+ if not isinstance(out, pl.DataFrame):
21
+ raise TypeError("Expected Polars collect() to return a DataFrame")
22
+ return out
23
+
24
+ def select(self, df: PolarsBackendFrame, columns: tuple[str, ...]) -> PolarsBackendFrame:
25
+ return df.select(list(columns))
26
+
27
+ def drop(self, df: PolarsBackendFrame, columns: tuple[str, ...]) -> PolarsBackendFrame:
28
+ return df.drop(list(columns))
29
+
30
+ def rename(self, df: PolarsBackendFrame, mapping: dict[str, str]) -> PolarsBackendFrame:
31
+ return df.rename(mapping)
32
+
33
+ def with_column(self, df: PolarsBackendFrame, name: str, expr: pl.Expr) -> PolarsBackendFrame:
34
+ return df.with_columns(expr.alias(name))
35
+
36
+ def cast(self, df: PolarsBackendFrame, name: str, dtype: Any) -> PolarsBackendFrame:
37
+ return df.with_columns(pl.col(name).cast(dtype))
38
+
39
+ def filter(self, df: PolarsBackendFrame, predicate: pl.Expr) -> PolarsBackendFrame:
40
+ return df.filter(predicate)
41
+
42
+ def sort(
43
+ self,
44
+ df: PolarsBackendFrame,
45
+ columns: tuple[str, ...],
46
+ *,
47
+ descending: bool = False,
48
+ nulls_last: bool = False,
49
+ ) -> PolarsBackendFrame:
50
+ if not columns:
51
+ return df
52
+ return df.sort(list(columns), descending=descending, nulls_last=nulls_last)
53
+
54
+ def unique(
55
+ self,
56
+ df: PolarsBackendFrame,
57
+ subset: tuple[str, ...] | None,
58
+ *,
59
+ keep: str = "first",
60
+ maintain_order: bool = False,
61
+ ) -> PolarsBackendFrame:
62
+ kwargs: dict[str, Any] = {"keep": keep, "maintain_order": maintain_order}
63
+ if subset is not None:
64
+ kwargs["subset"] = list(subset)
65
+ return df.unique(**kwargs) # type: ignore[arg-type]
66
+
67
+ def duplicated(
68
+ self,
69
+ df: PolarsBackendFrame,
70
+ subset: tuple[str, ...] | None,
71
+ *,
72
+ keep: str | bool = "first",
73
+ out_name: str = "duplicated",
74
+ ) -> PolarsBackendFrame:
75
+ if keep is False:
76
+ raise NotImplementedError(
77
+ "duplicated(..., keep=False) is not supported in this adapter yet"
78
+ )
79
+ if keep not in {"first", "last"}:
80
+ raise ValueError("keep must be 'first', 'last', or False")
81
+
82
+ cols = list(subset) if subset is not None else None
83
+ expr = pl.struct(cols) if cols is not None else pl.struct(pl.all())
84
+ mask_expr = expr.is_duplicated()
85
+
86
+ if isinstance(df, pl.LazyFrame):
87
+ return df.select(mask_expr.alias(out_name))
88
+ mask = df.select(mask_expr.alias(out_name))[out_name]
89
+ return pl.DataFrame({out_name: mask})
90
+
91
+ def compile_expr(self, expr: Any) -> pl.Expr:
92
+ if not isinstance(expr, Expr):
93
+ raise TypeError(f"Expected PlanFrame Expr, got {type(expr)!r}")
94
+ return compile_expr(expr)
95
+
96
+ def group_by_agg(
97
+ self,
98
+ df: PolarsBackendFrame,
99
+ *,
100
+ keys: tuple[str, ...],
101
+ named_aggs: dict[str, tuple[str, str]],
102
+ ) -> PolarsBackendFrame:
103
+ if not keys:
104
+ raise ValueError("keys must be non-empty")
105
+ agg_exprs: list[pl.Expr] = []
106
+ for out_name, (op, col) in named_aggs.items():
107
+ e = pl.col(col)
108
+ if op == "count":
109
+ ex = e.count()
110
+ elif op == "sum":
111
+ ex = e.sum()
112
+ elif op == "mean":
113
+ ex = e.mean()
114
+ elif op == "min":
115
+ ex = e.min()
116
+ elif op == "max":
117
+ ex = e.max()
118
+ elif op == "n_unique":
119
+ ex = e.n_unique()
120
+ else:
121
+ raise ValueError(f"Unsupported agg op: {op!r}")
122
+ agg_exprs.append(ex.alias(out_name))
123
+ return df.group_by(list(keys)).agg(agg_exprs)
124
+
125
+ def drop_nulls(
126
+ self, df: PolarsBackendFrame, subset: tuple[str, ...] | None
127
+ ) -> PolarsBackendFrame:
128
+ if subset is None:
129
+ return df.drop_nulls()
130
+ return df.drop_nulls(list(subset))
131
+
132
+ def fill_null(
133
+ self, df: PolarsBackendFrame, value: Any, subset: tuple[str, ...] | None
134
+ ) -> PolarsBackendFrame:
135
+ if subset is None:
136
+ return df.fill_null(value)
137
+ exprs = [pl.col(c).fill_null(value) for c in subset]
138
+ return df.with_columns(exprs)
139
+
140
+ def melt(
141
+ self,
142
+ df: PolarsBackendFrame,
143
+ *,
144
+ id_vars: tuple[str, ...],
145
+ value_vars: tuple[str, ...],
146
+ variable_name: str,
147
+ value_name: str,
148
+ ) -> PolarsBackendFrame:
149
+ # Prefer unpivot (polars deprecates melt on LazyFrame).
150
+ return df.unpivot(
151
+ index=list(id_vars),
152
+ on=list(value_vars),
153
+ variable_name=variable_name,
154
+ value_name=value_name,
155
+ )
156
+
157
+ def join(
158
+ self,
159
+ left: PolarsBackendFrame,
160
+ right: PolarsBackendFrame,
161
+ *,
162
+ on: tuple[str, ...],
163
+ how: str = "inner",
164
+ suffix: str = "_right",
165
+ ) -> PolarsBackendFrame:
166
+ if not on:
167
+ raise ValueError("on must be non-empty")
168
+ allowed_how = {
169
+ "inner",
170
+ "left",
171
+ "right",
172
+ "full",
173
+ "semi",
174
+ "anti",
175
+ "cross",
176
+ }
177
+ if how not in allowed_how:
178
+ raise ValueError(f"Unsupported join how={how!r}")
179
+
180
+ # Keep plans always-lazy: coerce eager frames to LazyFrame.
181
+ left_lf = left.lazy() if isinstance(left, pl.DataFrame) else left
182
+ right_lf = right.lazy() if isinstance(right, pl.DataFrame) else right
183
+
184
+ how_lit = cast(
185
+ Literal["inner", "left", "right", "full", "semi", "anti", "cross"],
186
+ how,
187
+ )
188
+ return left_lf.join(right_lf, on=list(on), how=how_lit, suffix=suffix)
189
+
190
+ def slice(
191
+ self, df: PolarsBackendFrame, *, offset: int, length: int | None
192
+ ) -> PolarsBackendFrame:
193
+ return df.slice(offset, length)
194
+
195
+ def head(self, df: PolarsBackendFrame, n: int) -> PolarsBackendFrame:
196
+ return df.head(n)
197
+
198
+ def tail(self, df: PolarsBackendFrame, n: int) -> PolarsBackendFrame:
199
+ return df.tail(n)
200
+
201
+ def concat_vertical(
202
+ self, left: PolarsBackendFrame, right: PolarsBackendFrame
203
+ ) -> PolarsBackendFrame:
204
+ return pl.concat([left, right], how="vertical")
205
+
206
+ def concat_horizontal(
207
+ self, left: PolarsBackendFrame, right: PolarsBackendFrame
208
+ ) -> PolarsBackendFrame:
209
+ return pl.concat([left, right], how="horizontal")
210
+
211
+ def pivot(
212
+ self,
213
+ df: PolarsBackendFrame,
214
+ *,
215
+ index: tuple[str, ...],
216
+ on: str,
217
+ values: str,
218
+ agg: str = "first",
219
+ on_columns: tuple[str, ...] | None = None,
220
+ separator: str = "_",
221
+ ) -> PolarsBackendFrame:
222
+ allowed_agg = {
223
+ "min",
224
+ "max",
225
+ "first",
226
+ "last",
227
+ "sum",
228
+ "mean",
229
+ "median",
230
+ "len", # polars name for "count" in pivot
231
+ "count",
232
+ "n_unique",
233
+ }
234
+ if agg not in allowed_agg:
235
+ raise ValueError(f"Unsupported pivot agg={agg!r}")
236
+ if agg == "count":
237
+ agg_arg: Literal[
238
+ "min",
239
+ "max",
240
+ "first",
241
+ "last",
242
+ "sum",
243
+ "mean",
244
+ "median",
245
+ "len",
246
+ ] | pl.Expr = "len"
247
+ elif agg == "n_unique":
248
+ agg_arg = pl.col(values).n_unique()
249
+ else:
250
+ agg_arg = cast(
251
+ Literal[
252
+ "min",
253
+ "max",
254
+ "first",
255
+ "last",
256
+ "sum",
257
+ "mean",
258
+ "median",
259
+ "len",
260
+ ],
261
+ agg,
262
+ )
263
+
264
+ if isinstance(df, pl.LazyFrame):
265
+ if on_columns is None:
266
+ raise ValueError("Lazy pivot requires on_columns to be provided")
267
+ return df.pivot(
268
+ index=list(index),
269
+ on=on,
270
+ values=values,
271
+ aggregate_function=agg_arg,
272
+ on_columns=list(on_columns),
273
+ separator=separator,
274
+ )
275
+
276
+ return df.pivot(
277
+ index=list(index),
278
+ on=on,
279
+ values=values,
280
+ aggregate_function=agg_arg,
281
+ on_columns=list(on_columns) if on_columns is not None else None,
282
+ separator=separator,
283
+ )
284
+
285
+ def write_parquet(
286
+ self,
287
+ df: PolarsBackendFrame,
288
+ path: str,
289
+ *,
290
+ compression: str = "zstd",
291
+ row_group_size: int | None = None,
292
+ partition_by: tuple[str, ...] | None = None,
293
+ storage_options: dict[str, Any] | None = None,
294
+ ) -> None:
295
+ out = self._collect_df(df)
296
+ if compression not in {"uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4"}:
297
+ raise ValueError(f"Unsupported parquet compression={compression!r}")
298
+ comp_lit = cast(
299
+ Literal["uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4"],
300
+ compression,
301
+ )
302
+ out.write_parquet(
303
+ path,
304
+ compression=comp_lit,
305
+ row_group_size=row_group_size,
306
+ partition_by=list(partition_by) if partition_by is not None else None,
307
+ storage_options=storage_options,
308
+ )
309
+
310
+ def write_csv(
311
+ self,
312
+ df: PolarsBackendFrame,
313
+ path: str,
314
+ *,
315
+ separator: str = ",",
316
+ include_header: bool = True,
317
+ storage_options: dict[str, Any] | None = None,
318
+ ) -> None:
319
+ out = self._collect_df(df)
320
+ out.write_csv(
321
+ path,
322
+ separator=separator,
323
+ include_header=include_header,
324
+ storage_options=storage_options,
325
+ )
326
+
327
+ def write_ndjson(
328
+ self, df: PolarsBackendFrame, path: str, *, storage_options: dict[str, Any] | None = None
329
+ ) -> None:
330
+ out = self._collect_df(df)
331
+ # Polars write_ndjson does not currently accept storage_options.
332
+ # The path may still be a cloud URI if the Polars build supports it implicitly.
333
+ out.write_ndjson(path)
334
+
335
+ def write_ipc(
336
+ self,
337
+ df: PolarsBackendFrame,
338
+ path: str,
339
+ *,
340
+ compression: str = "uncompressed",
341
+ storage_options: dict[str, Any] | None = None,
342
+ ) -> None:
343
+ out = self._collect_df(df)
344
+ if compression not in {"uncompressed", "lz4", "zstd"}:
345
+ raise ValueError(f"Unsupported ipc compression={compression!r}")
346
+ comp_lit = cast(Literal["uncompressed", "lz4", "zstd"], compression)
347
+ # Polars write_ipc does not currently accept storage_options.
348
+ out.write_ipc(path, compression=comp_lit)
349
+
350
+ def write_database(
351
+ self,
352
+ df: PolarsBackendFrame,
353
+ *,
354
+ table_name: str,
355
+ connection: Any,
356
+ if_table_exists: str = "fail",
357
+ engine: str | None = None,
358
+ ) -> None:
359
+ out = self._collect_df(df)
360
+ kwargs: dict[str, Any] = {"if_table_exists": if_table_exists}
361
+ if engine is not None:
362
+ kwargs["engine"] = engine
363
+ out.write_database(table_name=table_name, connection=connection, **kwargs)
364
+
365
+ def write_excel(self, df: PolarsBackendFrame, path: str, *, worksheet: str = "Sheet1") -> None:
366
+ out = self._collect_df(df)
367
+ out.write_excel(workbook=path, worksheet=worksheet)
368
+
369
+ def write_delta(
370
+ self,
371
+ df: PolarsBackendFrame,
372
+ target: str,
373
+ *,
374
+ mode: str = "error",
375
+ storage_options: dict[str, Any] | None = None,
376
+ ) -> None:
377
+ out = self._collect_df(df)
378
+ if mode not in {"error", "append", "overwrite", "ignore"}:
379
+ raise ValueError(f"Unsupported delta mode={mode!r}")
380
+ mode_lit = cast(Literal["error", "append", "overwrite", "ignore"], mode)
381
+ out.write_delta(target, mode=mode_lit, storage_options=storage_options)
382
+
383
+ def write_avro(
384
+ self,
385
+ df: PolarsBackendFrame,
386
+ path: str,
387
+ *,
388
+ compression: str = "uncompressed",
389
+ name: str = "",
390
+ ) -> None:
391
+ out = self._collect_df(df)
392
+ if compression not in {"uncompressed", "snappy", "deflate"}:
393
+ raise ValueError(f"Unsupported avro compression={compression!r}")
394
+ comp_lit = cast(Literal["uncompressed", "snappy", "deflate"], compression)
395
+ out.write_avro(path, compression=comp_lit, name=name)
396
+
397
+ def explode(self, df: PolarsBackendFrame, column: str) -> PolarsBackendFrame:
398
+ return df.explode(column)
399
+
400
+ def unnest(self, df: PolarsBackendFrame, column: str) -> PolarsBackendFrame:
401
+ return df.unnest(column)
402
+
403
+ def drop_nulls_all(
404
+ self, df: PolarsBackendFrame, subset: tuple[str, ...] | None
405
+ ) -> PolarsBackendFrame:
406
+ if subset is None:
407
+ # Drop rows where *all* columns are null.
408
+ return df.filter(~pl.all_horizontal(pl.all().is_null()))
409
+ cols = list(subset)
410
+ if not cols:
411
+ return df
412
+ mask = pl.all_horizontal([pl.col(c).is_null() for c in cols])
413
+ return df.filter(~mask)
414
+
415
+ def sample(
416
+ self,
417
+ df: PolarsBackendFrame,
418
+ *,
419
+ n: int | None = None,
420
+ frac: float | None = None,
421
+ with_replacement: bool = False,
422
+ shuffle: bool = False,
423
+ seed: int | None = None,
424
+ ) -> PolarsBackendFrame:
425
+ df2 = self._collect_df(df)
426
+ kwargs: dict[str, Any] = {
427
+ "with_replacement": with_replacement,
428
+ "shuffle": shuffle,
429
+ "seed": seed,
430
+ }
431
+ if n is not None:
432
+ return df2.sample(n=n, **kwargs)
433
+ return df2.sample(fraction=frac, **kwargs)
434
+
435
+ def collect(self, df: PolarsBackendFrame) -> PolarsBackendFrame:
436
+ return self._collect_df(df) if isinstance(df, pl.LazyFrame) else df
437
+
438
+ def to_dicts(self, df: PolarsBackendFrame) -> list[dict[str, object]]:
439
+ return self._collect_df(df).to_dicts()
440
+
441
+ def to_dict(self, df: PolarsBackendFrame) -> dict[str, list[object]]:
442
+ return self._collect_df(df).to_dict(as_series=False) # type: ignore[return-value]
@@ -0,0 +1,179 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Literal, cast
4
+
5
+ import polars as pl
6
+
7
+ from planframe.backend.errors import PlanFrameExpressionError
8
+ from planframe.expr.api import (
9
+ Add,
10
+ And,
11
+ Abs,
12
+ Between,
13
+ Ceil,
14
+ Col,
15
+ Coalesce,
16
+ Clip,
17
+ DtDay,
18
+ DtMonth,
19
+ DtYear,
20
+ Eq,
21
+ Exp,
22
+ Expr,
23
+ Floor,
24
+ Ge,
25
+ Gt,
26
+ IfElse,
27
+ IsIn,
28
+ IsNotNull,
29
+ IsNull,
30
+ Log,
31
+ Le,
32
+ Lit,
33
+ Lt,
34
+ Mul,
35
+ Ne,
36
+ Not,
37
+ Over,
38
+ Or,
39
+ Pow,
40
+ Round,
41
+ StrContains,
42
+ StrEndsWith,
43
+ StrLen,
44
+ StrLower,
45
+ StrReplace,
46
+ StrSplit,
47
+ StrStrip,
48
+ StrStartsWith,
49
+ StrUpper,
50
+ Sub,
51
+ Sqrt,
52
+ TrueDiv,
53
+ IsFinite,
54
+ Xor,
55
+ )
56
+
57
+
58
+ def compile_expr(expr: Expr[Any]) -> pl.Expr:
59
+ if isinstance(expr, Col):
60
+ return pl.col(expr.name)
61
+ if isinstance(expr, Lit):
62
+ return pl.lit(expr.value)
63
+ if isinstance(expr, Add):
64
+ return compile_expr(expr.left) + compile_expr(expr.right)
65
+ if isinstance(expr, Sub):
66
+ return compile_expr(expr.left) - compile_expr(expr.right)
67
+ if isinstance(expr, Mul):
68
+ return compile_expr(expr.left) * compile_expr(expr.right)
69
+ if isinstance(expr, TrueDiv):
70
+ return compile_expr(expr.left) / compile_expr(expr.right)
71
+ if isinstance(expr, Eq):
72
+ return compile_expr(expr.left) == compile_expr(expr.right)
73
+ if isinstance(expr, Ne):
74
+ return compile_expr(expr.left) != compile_expr(expr.right)
75
+ if isinstance(expr, Lt):
76
+ return compile_expr(expr.left) < compile_expr(expr.right)
77
+ if isinstance(expr, Le):
78
+ return compile_expr(expr.left) <= compile_expr(expr.right)
79
+ if isinstance(expr, Gt):
80
+ return compile_expr(expr.left) > compile_expr(expr.right)
81
+ if isinstance(expr, Ge):
82
+ return compile_expr(expr.left) >= compile_expr(expr.right)
83
+ if isinstance(expr, IsNull):
84
+ return compile_expr(expr.value).is_null()
85
+ if isinstance(expr, IsNotNull):
86
+ return compile_expr(expr.value).is_not_null()
87
+ if isinstance(expr, IsIn):
88
+ return compile_expr(expr.value).is_in(list(expr.options))
89
+ if isinstance(expr, And):
90
+ return compile_expr(expr.left) & compile_expr(expr.right)
91
+ if isinstance(expr, Or):
92
+ return compile_expr(expr.left) | compile_expr(expr.right)
93
+ if isinstance(expr, Not):
94
+ return ~compile_expr(expr.value)
95
+ if isinstance(expr, Xor):
96
+ return compile_expr(expr.left) ^ compile_expr(expr.right)
97
+ if isinstance(expr, Abs):
98
+ return compile_expr(expr.value).abs()
99
+ if isinstance(expr, Round):
100
+ e = compile_expr(expr.value)
101
+ return e.round(expr.ndigits) if expr.ndigits is not None else e.round()
102
+ if isinstance(expr, Floor):
103
+ return compile_expr(expr.value).floor()
104
+ if isinstance(expr, Ceil):
105
+ return compile_expr(expr.value).ceil()
106
+ if isinstance(expr, Coalesce):
107
+ return pl.coalesce([compile_expr(v) for v in expr.values])
108
+ if isinstance(expr, IfElse):
109
+ return (
110
+ pl.when(compile_expr(expr.cond))
111
+ .then(compile_expr(expr.then_value))
112
+ .otherwise(compile_expr(expr.else_value))
113
+ )
114
+ if isinstance(expr, Over):
115
+ e = compile_expr(expr.value)
116
+ return e.over(
117
+ partition_by=list(expr.partition_by),
118
+ order_by=(list(expr.order_by) if expr.order_by is not None else None),
119
+ )
120
+ if isinstance(expr, Between):
121
+ allowed_closed = {"left", "right", "both", "none"}
122
+ if expr.closed not in allowed_closed:
123
+ raise ValueError(f"Unsupported closed interval: {expr.closed!r}")
124
+ closed_lit = cast(Literal["left", "right", "both", "none"], expr.closed)
125
+ return compile_expr(expr.value).is_between(
126
+ compile_expr(expr.low),
127
+ compile_expr(expr.high),
128
+ closed=closed_lit,
129
+ )
130
+ if isinstance(expr, Clip):
131
+ e = compile_expr(expr.value)
132
+ lower = compile_expr(expr.lower) if expr.lower is not None else None
133
+ upper = compile_expr(expr.upper) if expr.upper is not None else None
134
+ return e.clip(lower_bound=lower, upper_bound=upper)
135
+ if isinstance(expr, Pow):
136
+ return compile_expr(expr.base) ** compile_expr(expr.exponent)
137
+ if isinstance(expr, Exp):
138
+ return compile_expr(expr.value).exp()
139
+ if isinstance(expr, Log):
140
+ return compile_expr(expr.value).log()
141
+ if isinstance(expr, StrContains):
142
+ e = compile_expr(expr.value).cast(pl.Utf8)
143
+ return e.str.contains(expr.pattern, literal=expr.literal)
144
+ if isinstance(expr, StrStartsWith):
145
+ e = compile_expr(expr.value).cast(pl.Utf8)
146
+ return e.str.starts_with(expr.prefix)
147
+ if isinstance(expr, StrEndsWith):
148
+ e = compile_expr(expr.value).cast(pl.Utf8)
149
+ return e.str.ends_with(expr.suffix)
150
+ if isinstance(expr, StrLower):
151
+ e = compile_expr(expr.value).cast(pl.Utf8)
152
+ return e.str.to_lowercase()
153
+ if isinstance(expr, StrUpper):
154
+ e = compile_expr(expr.value).cast(pl.Utf8)
155
+ return e.str.to_uppercase()
156
+ if isinstance(expr, StrLen):
157
+ e = compile_expr(expr.value).cast(pl.Utf8)
158
+ return e.str.len_chars()
159
+ if isinstance(expr, StrReplace):
160
+ e = compile_expr(expr.value).cast(pl.Utf8)
161
+ return e.str.replace_all(expr.pattern, expr.replacement, literal=expr.literal)
162
+ if isinstance(expr, StrStrip):
163
+ e = compile_expr(expr.value).cast(pl.Utf8)
164
+ return e.str.strip_chars()
165
+ if isinstance(expr, StrSplit):
166
+ e = compile_expr(expr.value).cast(pl.Utf8)
167
+ return e.str.split(expr.by)
168
+ if isinstance(expr, DtYear):
169
+ return compile_expr(expr.value).dt.year()
170
+ if isinstance(expr, DtMonth):
171
+ return compile_expr(expr.value).dt.month()
172
+ if isinstance(expr, DtDay):
173
+ return compile_expr(expr.value).dt.day()
174
+ if isinstance(expr, Sqrt):
175
+ return compile_expr(expr.value).sqrt()
176
+ if isinstance(expr, IsFinite):
177
+ return compile_expr(expr.value).is_finite()
178
+
179
+ raise PlanFrameExpressionError(f"Unsupported expr node: {type(expr)!r}")
@@ -0,0 +1,229 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping, Sequence
4
+ from typing import Any, ClassVar, Generic, Literal, TypeVar, cast
5
+
6
+ import polars as pl
7
+
8
+ from planframe.frame import Frame
9
+ from planframe_polars.adapter import PolarsAdapter, PolarsBackendFrame
10
+
11
+ SchemaT = TypeVar("SchemaT")
12
+
13
+ PolarsData = Mapping[str, Sequence[object]] | Sequence[Mapping[str, object]]
14
+
15
+
16
+ def _schema_defaults(schema: type[Any]) -> dict[str, object]:
17
+ ann = dict(getattr(schema, "__dict__", {}).get("__annotations__", {}))
18
+ out: dict[str, object] = {}
19
+ for name in ann.keys():
20
+ if name in getattr(schema, "__dict__", {}):
21
+ out[name] = getattr(schema, name)
22
+ return out
23
+
24
+
25
+ def _fill_missing_from_defaults(data: PolarsData, *, defaults: dict[str, object]) -> PolarsData:
26
+ if not defaults:
27
+ return data
28
+
29
+ if isinstance(data, Mapping):
30
+ data_map = cast(Mapping[str, Sequence[object]], data)
31
+ if not data_map:
32
+ return dict(data_map)
33
+ # Infer row count from the first column.
34
+ first = next(iter(data_map.values()))
35
+ n = len(first)
36
+ out: dict[str, list[object]] = {k: list(v) for k, v in data_map.items()}
37
+ for k, dv in defaults.items():
38
+ if k not in out:
39
+ out[k] = [dv] * n
40
+ return out
41
+
42
+ # list-of-dicts
43
+ out_rows: list[dict[str, object]] = []
44
+ for row in data:
45
+ r = dict(row)
46
+ for k, dv in defaults.items():
47
+ if k not in r:
48
+ r[k] = dv
49
+ out_rows.append(r)
50
+ return out_rows
51
+
52
+
53
+ def _to_polars_backend_frame(
54
+ data: PolarsData, *, schema: type[Any], lazy: bool
55
+ ) -> PolarsBackendFrame:
56
+ defaults = _schema_defaults(schema)
57
+ data2 = _fill_missing_from_defaults(data, defaults=defaults)
58
+ df = pl.DataFrame(data2) # type: ignore[arg-type]
59
+ return df.lazy() if lazy else df
60
+
61
+
62
+ class _PolarsFrameMeta(type):
63
+ def __call__(cls, *args: Any, **kwargs: Any) -> Any:
64
+ # Allow normal dataclass construction when `Frame.source(...)` calls `cls(_data=..., ...)`.
65
+ if "_data" in kwargs and "_adapter" in kwargs and "_plan" in kwargs and "_schema" in kwargs:
66
+ return super().__call__(*args, **kwargs)
67
+
68
+ data = args[0] if args else kwargs.pop("data")
69
+ if isinstance(data, (pl.DataFrame, pl.LazyFrame)):
70
+ raise TypeError(
71
+ "PolarsFrame constructors accept only Python data (dict-of-lists or list-of-dicts). "
72
+ "Use `Frame.source(...)` for advanced usage."
73
+ )
74
+ lazy = kwargs.pop("lazy", True)
75
+ if kwargs:
76
+ raise TypeError(f"Unexpected constructor kwargs: {sorted(kwargs)}")
77
+ df = _to_polars_backend_frame(data, schema=cls, lazy=lazy)
78
+ return PolarsFrame.source(
79
+ df,
80
+ adapter=PolarsFrame._adapter_singleton,
81
+ schema=cast(type[SchemaT], cls),
82
+ )
83
+
84
+
85
+ class PolarsFrame(
86
+ Frame[SchemaT, PolarsBackendFrame, pl.Expr], Generic[SchemaT], metaclass=_PolarsFrameMeta
87
+ ):
88
+ """A PlanFrame `Frame` bound to the Polars backend."""
89
+
90
+ _adapter_singleton: ClassVar[PolarsAdapter] = PolarsAdapter()
91
+ __planframe_model__ = True
92
+
93
+ @classmethod
94
+ def scan_parquet(
95
+ cls,
96
+ path: str,
97
+ *,
98
+ schema: type[SchemaT],
99
+ hive_partitioning: bool | None = None,
100
+ storage_options: dict[str, Any] | None = None,
101
+ ) -> PolarsFrame[SchemaT]:
102
+ kwargs: dict[str, Any] = {"storage_options": storage_options}
103
+ if hive_partitioning is not None:
104
+ kwargs["hive_partitioning"] = hive_partitioning
105
+ lf = pl.scan_parquet(path, **kwargs)
106
+ return cls.source(lf, adapter=cls._adapter_singleton, schema=schema)
107
+
108
+ @classmethod
109
+ def scan_parquet_dataset(
110
+ cls,
111
+ path_or_glob: str,
112
+ *,
113
+ schema: type[SchemaT],
114
+ storage_options: dict[str, Any] | None = None,
115
+ ) -> PolarsFrame[SchemaT]:
116
+ return cls.scan_parquet(
117
+ path_or_glob,
118
+ schema=schema,
119
+ hive_partitioning=True,
120
+ storage_options=storage_options,
121
+ )
122
+
123
+ @classmethod
124
+ def scan_csv(
125
+ cls,
126
+ path: str,
127
+ *,
128
+ schema: type[SchemaT],
129
+ storage_options: dict[str, Any] | None = None,
130
+ ) -> PolarsFrame[SchemaT]:
131
+ lf = pl.scan_csv(path, storage_options=storage_options)
132
+ return cls.source(lf, adapter=cls._adapter_singleton, schema=schema)
133
+
134
+ @classmethod
135
+ def scan_ndjson(
136
+ cls,
137
+ path: str,
138
+ *,
139
+ schema: type[SchemaT],
140
+ storage_options: dict[str, Any] | None = None,
141
+ ) -> PolarsFrame[SchemaT]:
142
+ lf = pl.scan_ndjson(path, storage_options=storage_options)
143
+ return cls.source(lf, adapter=cls._adapter_singleton, schema=schema)
144
+
145
+ @classmethod
146
+ def scan_ipc(
147
+ cls,
148
+ path: str,
149
+ *,
150
+ schema: type[SchemaT],
151
+ hive_partitioning: bool | None = None,
152
+ storage_options: dict[str, Any] | None = None,
153
+ ) -> PolarsFrame[SchemaT]:
154
+ kwargs: dict[str, Any] = {"storage_options": storage_options}
155
+ if hive_partitioning is not None:
156
+ kwargs["hive_partitioning"] = hive_partitioning
157
+ lf = pl.scan_ipc(path, **kwargs)
158
+ return cls.source(lf, adapter=cls._adapter_singleton, schema=schema)
159
+
160
+ @classmethod
161
+ def scan_delta(
162
+ cls,
163
+ source: str,
164
+ *,
165
+ schema: type[SchemaT],
166
+ version: int | str | None = None,
167
+ storage_options: dict[str, Any] | None = None,
168
+ ) -> PolarsFrame[SchemaT]:
169
+ kwargs: dict[str, Any] = {"storage_options": storage_options}
170
+ if version is not None:
171
+ kwargs["version"] = version
172
+ lf = pl.scan_delta(source, **kwargs)
173
+ return cls.source(lf, adapter=cls._adapter_singleton, schema=schema)
174
+
175
+ @classmethod
176
+ def read_delta(
177
+ cls,
178
+ source: str,
179
+ *,
180
+ schema: type[SchemaT],
181
+ version: int | str | None = None,
182
+ storage_options: dict[str, Any] | None = None,
183
+ ) -> PolarsFrame[SchemaT]:
184
+ kwargs: dict[str, Any] = {"storage_options": storage_options}
185
+ if version is not None:
186
+ kwargs["version"] = version
187
+ df = pl.read_delta(source, **kwargs)
188
+ return cls.source(df, adapter=cls._adapter_singleton, schema=schema)
189
+
190
+ @classmethod
191
+ def read_excel(
192
+ cls,
193
+ path: str,
194
+ *,
195
+ schema: type[SchemaT],
196
+ sheet_name: str | None = None,
197
+ ) -> PolarsFrame[SchemaT]:
198
+ kwargs: dict[str, Any] = {}
199
+ if sheet_name is not None:
200
+ kwargs["sheet_name"] = sheet_name
201
+ df = pl.read_excel(path, **kwargs)
202
+ return cls.source(df, adapter=cls._adapter_singleton, schema=schema)
203
+
204
+ @classmethod
205
+ def read_avro(cls, path: str, *, schema: type[SchemaT]) -> PolarsFrame[SchemaT]:
206
+ df = pl.read_avro(path)
207
+ return cls.source(df, adapter=cls._adapter_singleton, schema=schema)
208
+
209
+ @classmethod
210
+ def read_database(
211
+ cls, query: str, *, connection: Any, schema: type[SchemaT]
212
+ ) -> PolarsFrame[SchemaT]:
213
+ df = pl.read_database(query=query, connection=connection)
214
+ return cls.source(df, adapter=cls._adapter_singleton, schema=schema)
215
+
216
+ @classmethod
217
+ def read_database_uri(
218
+ cls,
219
+ query: str,
220
+ *,
221
+ uri: str,
222
+ engine: Literal["connectorx", "adbc"] | None = None,
223
+ schema: type[SchemaT],
224
+ ) -> PolarsFrame[SchemaT]:
225
+ kwargs: dict[str, Any] = {}
226
+ if engine is not None:
227
+ kwargs["engine"] = engine
228
+ df = pl.read_database_uri(query=query, uri=uri, **kwargs)
229
+ return cls.source(df, adapter=cls._adapter_singleton, schema=schema)
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping, Sequence
4
+ from typing import Any, Literal, TypeVar
5
+
6
+ import polars as pl
7
+
8
+ from planframe.frame import Frame
9
+
10
+ SchemaT = TypeVar("SchemaT")
11
+
12
+ PolarsBackendFrame = pl.DataFrame | pl.LazyFrame
13
+
14
+ class PolarsFrame(Frame[Any, PolarsBackendFrame, pl.Expr]):
15
+ def __new__(
16
+ cls,
17
+ data: Mapping[str, Sequence[object]] | Sequence[Mapping[str, object]],
18
+ *,
19
+ lazy: bool = ...,
20
+ ) -> PolarsFrame: ...
21
+ @classmethod
22
+ def scan_parquet(
23
+ cls,
24
+ path: str,
25
+ *,
26
+ schema: type[SchemaT],
27
+ hive_partitioning: bool | None = ...,
28
+ storage_options: dict[str, Any] | None = ...,
29
+ ) -> PolarsFrame: ...
30
+ @classmethod
31
+ def scan_parquet_dataset(
32
+ cls,
33
+ path_or_glob: str,
34
+ *,
35
+ schema: type[SchemaT],
36
+ storage_options: dict[str, Any] | None = ...,
37
+ ) -> PolarsFrame: ...
38
+ @classmethod
39
+ def scan_csv(
40
+ cls,
41
+ path: str,
42
+ *,
43
+ schema: type[SchemaT],
44
+ storage_options: dict[str, Any] | None = ...,
45
+ ) -> PolarsFrame: ...
46
+ @classmethod
47
+ def scan_ndjson(
48
+ cls,
49
+ path: str,
50
+ *,
51
+ schema: type[SchemaT],
52
+ storage_options: dict[str, Any] | None = ...,
53
+ ) -> PolarsFrame: ...
54
+ @classmethod
55
+ def scan_ipc(
56
+ cls,
57
+ path: str,
58
+ *,
59
+ schema: type[SchemaT],
60
+ hive_partitioning: bool | None = ...,
61
+ storage_options: dict[str, Any] | None = ...,
62
+ ) -> PolarsFrame: ...
63
+ @classmethod
64
+ def scan_delta(
65
+ cls,
66
+ source: str,
67
+ *,
68
+ schema: type[SchemaT],
69
+ version: int | str | None = ...,
70
+ storage_options: dict[str, Any] | None = ...,
71
+ ) -> PolarsFrame: ...
72
+ @classmethod
73
+ def read_delta(
74
+ cls,
75
+ source: str,
76
+ *,
77
+ schema: type[SchemaT],
78
+ version: int | str | None = ...,
79
+ storage_options: dict[str, Any] | None = ...,
80
+ ) -> PolarsFrame: ...
81
+ @classmethod
82
+ def read_excel(
83
+ cls,
84
+ path: str,
85
+ *,
86
+ schema: type[SchemaT],
87
+ sheet_name: str | None = ...,
88
+ ) -> PolarsFrame: ...
89
+ @classmethod
90
+ def read_avro(cls, path: str, *, schema: type[SchemaT]) -> PolarsFrame: ...
91
+ @classmethod
92
+ def read_database(
93
+ cls, query: str, *, connection: Any, schema: type[SchemaT]
94
+ ) -> PolarsFrame: ...
95
+ @classmethod
96
+ def read_database_uri(
97
+ cls,
98
+ query: str,
99
+ *,
100
+ uri: str,
101
+ engine: Literal["connectorx", "adbc"] | None = ...,
102
+ schema: type[SchemaT],
103
+ ) -> PolarsFrame: ...
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.25"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "planframe-polars"
7
+ version = "0.1.0"
8
+ description = "Polars backend adapter for PlanFrame."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "PlanFrame Contributors" }]
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3 :: Only",
16
+ "Programming Language :: Python :: 3.10",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Typing :: Typed",
19
+ ]
20
+ dependencies = [
21
+ "planframe>=0.1.0,<0.2.0",
22
+ "polars>=0.20",
23
+ ]
24
+
25
+ [project.urls]
26
+ Repository = "https://github.com/eddiethedean/planframe"
27
+ Documentation = "https://github.com/eddiethedean/planframe/blob/main/README.md"
28
+ Issues = "https://github.com/eddiethedean/planframe/issues"
29
+
30
+ [tool.hatch.build.targets.wheel]
31
+ packages = ["planframe_polars"]
32
+