robin-sparkless 0.1.0__cp38-abi3-win_amd64.whl → 0.1.1__cp38-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,358 @@
1
+ """
2
+ Type stubs for robin_sparkless (PySpark-like DataFrame API on Polars).
3
+ Use for static type checking (mypy, pyright, etc.).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+
10
+ # --- Classes ---
11
+
12
+ class SparkSession:
13
+ @classmethod
14
+ def builder(cls) -> SparkSessionBuilder: ...
15
+ def is_case_sensitive(self) -> bool: ...
16
+ def create_dataframe(
17
+ self,
18
+ data: Any,
19
+ column_names: list[str],
20
+ ) -> DataFrame: ...
21
+ def createDataFrame(
22
+ self,
23
+ data: Any,
24
+ column_names: list[str],
25
+ ) -> DataFrame: ...
26
+ def create_dataframe_from_rows(
27
+ self,
28
+ data: Any,
29
+ schema: list[tuple[str, str]],
30
+ ) -> DataFrame: ...
31
+ def read_csv(self, path: str) -> DataFrame: ...
32
+ def read_parquet(self, path: str) -> DataFrame: ...
33
+ def read_json(self, path: str) -> DataFrame: ...
34
+ def read_delta(self, path: str) -> DataFrame: ...
35
+ def read_delta_version(
36
+ self, path: str, version: int | None = None
37
+ ) -> DataFrame: ...
38
+ def create_or_replace_temp_view(self, name: str, df: DataFrame) -> None: ...
39
+ def table(self, name: str) -> DataFrame: ...
40
+ def sql(self, query: str) -> DataFrame: ...
41
+
42
+ class SparkSessionBuilder:
43
+ def app_name(self, name: str) -> SparkSessionBuilder: ...
44
+ def master(self, master: str) -> SparkSessionBuilder: ...
45
+ def config(self, key: str, value: str) -> SparkSessionBuilder: ...
46
+ def get_or_create(self) -> SparkSession: ...
47
+
48
+ class DataFrame:
49
+ def count(self) -> int: ...
50
+ def show(self, n: int | None = None) -> None: ...
51
+ def collect(self) -> list[dict[str, Any]]: ...
52
+ def filter(self, condition: Column) -> DataFrame: ...
53
+ def select(self, cols: list[str]) -> DataFrame: ...
54
+ def select_expr(self, exprs: list[str]) -> DataFrame: ...
55
+ def with_column(self, column_name: str, expr: Column) -> DataFrame: ...
56
+ def with_columns(
57
+ self, mapping: dict[str, Column] | list[tuple[str, Column]]
58
+ ) -> DataFrame: ...
59
+ def with_columns_renamed(
60
+ self,
61
+ mapping: dict[str, str] | list[tuple[str, str]],
62
+ ) -> DataFrame: ...
63
+ def with_column_renamed(self, old: str, new: str) -> DataFrame: ...
64
+ def order_by(
65
+ self,
66
+ cols: list[str],
67
+ ascending: list[bool] | None = None,
68
+ ) -> DataFrame: ...
69
+ def order_by_exprs(self, sort_orders: list[SortOrder]) -> DataFrame: ...
70
+ def group_by(self, cols: list[str]) -> GroupedData: ...
71
+ def limit(self, n: int) -> DataFrame: ...
72
+ def distinct(self, subset: list[str] | None = None) -> DataFrame: ...
73
+ def union(self, other: DataFrame) -> DataFrame: ...
74
+ def union_by_name(self, other: DataFrame) -> DataFrame: ...
75
+ def join(
76
+ self,
77
+ other: DataFrame,
78
+ on: str | list[str],
79
+ how: str = "inner",
80
+ ) -> DataFrame: ...
81
+ def drop(self, *cols: str) -> DataFrame: ...
82
+ def dropna(self, subset: list[str] | None = None) -> DataFrame: ...
83
+ def col_regex(self, pattern: str) -> DataFrame: ...
84
+ def stat(self) -> DataFrameStat: ...
85
+ def na(self) -> DataFrameNa: ...
86
+ def to_pandas(self) -> list[dict[str, Any]]: ...
87
+ def corr(self) -> DataFrame: ...
88
+ def write(self) -> DataFrameWriter: ...
89
+
90
+ class DataFrameStat:
91
+ def cov(self, col1: str, col2: str) -> float: ...
92
+ def corr(self, col1: str, col2: str) -> float: ...
93
+ def corr_matrix(self) -> DataFrame: ...
94
+
95
+ class DataFrameNa:
96
+ def fill(self, value: Column) -> DataFrame: ...
97
+ def drop(self, subset: list[str] | None = None) -> DataFrame: ...
98
+
99
+ class Column:
100
+ def alias(self, name: str) -> Column: ...
101
+ def gt(self, other: Column) -> Column: ...
102
+ def ge(self, other: Column) -> Column: ...
103
+ def lt(self, other: Column) -> Column: ...
104
+ def le(self, other: Column) -> Column: ...
105
+ def eq(self, other: Column) -> Column: ...
106
+ def neq(self, other: Column) -> Column: ...
107
+ def __eq__(self, other: object) -> Column: ... # type: ignore[override]
108
+ def __ne__(self, other: object) -> Column: ... # type: ignore[override]
109
+ def __lt__(self, other: Column) -> Column: ...
110
+ def __le__(self, other: Column) -> Column: ...
111
+ def __gt__(self, other: Column) -> Column: ...
112
+ def __ge__(self, other: Column) -> Column: ...
113
+ def __add__(self, other: Column) -> Column: ...
114
+ def __sub__(self, other: Column) -> Column: ...
115
+ def __mul__(self, other: Column) -> Column: ...
116
+ def __truediv__(self, other: Column) -> Column: ...
117
+ def __mod__(self, other: Column) -> Column: ...
118
+ def __and__(self, other: Column) -> Column: ...
119
+ def __or__(self, other: Column) -> Column: ...
120
+ def map_concat(self, other: Column) -> Column: ...
121
+
122
+ class SortOrder: ...
123
+
124
+ class WhenBuilder:
125
+ def when(self, condition: Column) -> ThenBuilder: ...
126
+ def otherwise(self, value: Column) -> Column: ...
127
+
128
+ class ThenBuilder:
129
+ def then(self, value: Column) -> WhenBuilder: ...
130
+ def otherwise(self, value: Column) -> Column: ...
131
+
132
+ class GroupedData:
133
+ def count(self) -> DataFrame: ...
134
+ def sum(self, column: str) -> DataFrame: ...
135
+ def avg(self, column: str) -> DataFrame: ...
136
+ def min(self, column: str) -> DataFrame: ...
137
+ def max(self, column: str) -> DataFrame: ...
138
+ def agg(self, exprs: list[Column]) -> DataFrame: ...
139
+ def collect_list(self, column: str) -> DataFrame: ...
140
+ def collect_set(self, column: str) -> DataFrame: ...
141
+ def count_if(self, column: str) -> DataFrame: ...
142
+ def any_value(self, column: str) -> DataFrame: ...
143
+ def bool_and(self, column: str) -> DataFrame: ...
144
+ def bool_or(self, column: str) -> DataFrame: ...
145
+ def product(self, column: str) -> DataFrame: ...
146
+ def kurtosis(self, column: str) -> DataFrame: ...
147
+ def skewness(self, column: str) -> DataFrame: ...
148
+
149
+ class CubeRollupData:
150
+ def agg(self, exprs: list[Column]) -> DataFrame: ...
151
+
152
+ class DataFrameWriter:
153
+ def mode(self, mode: str) -> DataFrameWriter: ...
154
+ def format(self, format: str) -> DataFrameWriter: ...
155
+ def save(self, path: str) -> None: ...
156
+
157
+ # --- Module-level functions (expression builders return Column) ---
158
+
159
+ def col(name: str) -> Column: ...
160
+ def lit(value: None | int | float | bool | str) -> Column: ...
161
+ def when(condition: Column) -> ThenBuilder: ...
162
+ def coalesce(*cols: Column) -> Column: ...
163
+ def sum(column: Column) -> Column: ...
164
+ def avg(column: Column) -> Column: ...
165
+ def min(column: Column) -> Column: ...
166
+ def max(column: Column) -> Column: ...
167
+ def count(column: Column) -> Column: ...
168
+ def execute_plan(
169
+ data: Any,
170
+ schema: list[tuple[str, str]],
171
+ plan_json: str,
172
+ ) -> DataFrame: ...
173
+
174
+ # Unary column functions (column -> Column)
175
+ def ascii(column: Column) -> Column: ...
176
+ def base64(column: Column) -> Column: ...
177
+ def unbase64(column: Column) -> Column: ...
178
+ def cast(column: Column, dtype: str) -> Column: ...
179
+ def try_cast(column: Column, dtype: str) -> Column: ...
180
+ def isnull(column: Column) -> Column: ...
181
+ def isnotnull(column: Column) -> Column: ...
182
+ def isnan(column: Column) -> Column: ...
183
+ def asc(column: Column) -> SortOrder: ...
184
+ def asc_nulls_first(column: Column) -> SortOrder: ...
185
+ def asc_nulls_last(column: Column) -> SortOrder: ...
186
+ def desc(column: Column) -> SortOrder: ...
187
+ def desc_nulls_first(column: Column) -> SortOrder: ...
188
+ def desc_nulls_last(column: Column) -> SortOrder: ...
189
+
190
+ # Binary / variadic (common patterns; others follow same style)
191
+ def greatest(*columns: Column) -> Column: ...
192
+ def least(*columns: Column) -> Column: ...
193
+ def add_months(column: Column, n: int) -> Column: ...
194
+ def substring(column: Column, pos: int, len: int) -> Column: ...
195
+ def overlay(src: Column, replace: Column, pos: int, len: int = -1) -> Column: ...
196
+ def format_number(column: Column, d: int) -> Column: ...
197
+ def format_string(fmt: str, *columns: Column) -> Column: ...
198
+ def concat(*columns: Column) -> Column: ...
199
+ def concat_ws(sep: str, *columns: Column) -> Column: ...
200
+ def array(*columns: Column) -> Column: ...
201
+ def struct(*columns: Column) -> Column: ...
202
+ def named_struct(*name_column_pairs: Any) -> Column: ...
203
+ def create_map(*key_value_columns: Column) -> Column: ...
204
+ def map_concat(a: Column, b: Column) -> Column: ...
205
+ def equal_null(a: Column, b: Column) -> Column: ...
206
+ def get(column: Column, key: Any) -> Column: ...
207
+ def isin(column: Column, values: Any) -> Column: ...
208
+ def rand(seed: int | None = None) -> Column: ...
209
+ def randn(seed: int | None = None) -> Column: ...
210
+
211
+ # Placeholder for remaining 200+ expression functions (same pattern: Column in, Column out)
212
+ def acos(column: Column) -> Column: ...
213
+ def acosh(column: Column) -> Column: ...
214
+ def asin(column: Column) -> Column: ...
215
+ def asinh(column: Column) -> Column: ...
216
+ def atan(column: Column) -> Column: ...
217
+ def atan2(y: Column, x: Column) -> Column: ...
218
+ def atanh(column: Column) -> Column: ...
219
+ def sin(column: Column) -> Column: ...
220
+ def cos(column: Column) -> Column: ...
221
+ def tan(column: Column) -> Column: ...
222
+ def sinh(column: Column) -> Column: ...
223
+ def cosh(column: Column) -> Column: ...
224
+ def tanh(column: Column) -> Column: ...
225
+ def degrees(column: Column) -> Column: ...
226
+ def radians(column: Column) -> Column: ...
227
+ def cbrt(column: Column) -> Column: ...
228
+ def ceiling(column: Column) -> Column: ...
229
+ def floor(column: Column) -> Column: ...
230
+ def exp(column: Column) -> Column: ...
231
+ def expm1(column: Column) -> Column: ...
232
+ def ln(column: Column) -> Column: ...
233
+ def log10(column: Column) -> Column: ...
234
+ def log1p(column: Column) -> Column: ...
235
+ def log2(column: Column) -> Column: ...
236
+ def power(base: Column, exp: Column) -> Column: ...
237
+ def rint(column: Column) -> Column: ...
238
+ def round(column: Column, scale: int = 0) -> Column: ...
239
+ def signum(column: Column) -> Column: ...
240
+ def sqrt(column: Column) -> Column: ...
241
+ def left(column: Column, n: int) -> Column: ...
242
+ def right(column: Column, n: int) -> Column: ...
243
+ def length(column: Column) -> Column: ...
244
+ def lower(column: Column) -> Column: ...
245
+ def upper(column: Column) -> Column: ...
246
+ def ltrim(column: Column) -> Column: ...
247
+ def rtrim(column: Column) -> Column: ...
248
+ def trim(column: Column) -> Column: ...
249
+ def reverse(column: Column) -> Column: ...
250
+ def repeat(column: Column, n: int) -> Column: ...
251
+ def replace(src: Column, search: str, replacement: str) -> Column: ...
252
+ def contains(column: Column, literal: str) -> Column: ...
253
+ def startswith(column: Column, literal: str) -> Column: ...
254
+ def endswith(column: Column, literal: str) -> Column: ...
255
+ def like(column: Column, pattern: str) -> Column: ...
256
+ def ilike(column: Column, pattern: str) -> Column: ...
257
+ def rlike(column: Column, pattern: str) -> Column: ...
258
+ def regexp_extract(column: Column, pattern: str, idx: int = 0) -> Column: ...
259
+ def regexp_replace(column: Column, pattern: str, replacement: str) -> Column: ...
260
+ def split(column: Column, pattern: str) -> Column: ...
261
+ def substring_index(column: Column, delim: str, count: int) -> Column: ...
262
+ def chr(column: Column) -> Column: ...
263
+ def char(column: Column) -> Column: ...
264
+ def md5(column: Column) -> Column: ...
265
+ def sha1(column: Column) -> Column: ...
266
+ def sha2(column: Column, numBits: int) -> Column: ...
267
+ def dayofmonth(column: Column) -> Column: ...
268
+ def dayofweek(column: Column) -> Column: ...
269
+ def dayofyear(column: Column) -> Column: ...
270
+ def hour(column: Column) -> Column: ...
271
+ def minute(column: Column) -> Column: ...
272
+ def month(column: Column) -> Column: ...
273
+ def quarter(column: Column) -> Column: ...
274
+ def second(column: Column) -> Column: ...
275
+ def weekofyear(column: Column) -> Column: ...
276
+ def year(column: Column) -> Column: ...
277
+ def date_add(column: Column, days: int) -> Column: ...
278
+ def date_sub(column: Column, days: int) -> Column: ...
279
+ def datediff(end: Column, start: Column) -> Column: ...
280
+ def months_between(end: Column, start: Column) -> Column: ...
281
+ def to_date(column: Column) -> Column: ...
282
+ def to_timestamp(column: Column) -> Column: ...
283
+ def unix_timestamp(column: Column, fmt: str | None = None) -> Column: ...
284
+ def from_unixtime(column: Column, fmt: str = "yyyy-MM-dd HH:mm:ss") -> Column: ...
285
+ def current_timestamp() -> Column: ...
286
+ def current_date() -> Column: ...
287
+ def date_format(column: Column, fmt: str) -> Column: ...
288
+ def nvl(column: Column, replacement: Column) -> Column: ...
289
+ def nvl2(column: Column, if_not_null: Column, if_null: Column) -> Column: ...
290
+ def ifnull(column: Column, replacement: Column) -> Column: ...
291
+ def array_contains(column: Column, value: Any) -> Column: ...
292
+ def array_distinct(column: Column) -> Column: ...
293
+ def array_except(a: Column, b: Column) -> Column: ...
294
+ def array_intersect(a: Column, b: Column) -> Column: ...
295
+ def array_join(
296
+ column: Column, delimiter: str, null_replacement: str | None = None
297
+ ) -> Column: ...
298
+ def array_max(column: Column) -> Column: ...
299
+ def array_min(column: Column) -> Column: ...
300
+ def array_position(column: Column, value: Any) -> Column: ...
301
+ def array_remove(column: Column, value: Any) -> Column: ...
302
+ def array_sort(column: Column) -> Column: ...
303
+ def array_union(a: Column, b: Column) -> Column: ...
304
+ def arrays_zip(*columns: Column) -> Column: ...
305
+ def explode(column: Column) -> Column: ...
306
+ def explode_outer(column: Column) -> Column: ...
307
+ def posexplode(column: Column) -> Column: ...
308
+ def size(column: Column) -> Column: ...
309
+ def slice(column: Column, start: int, length: int) -> Column: ...
310
+ def sort_array(column: Column, asc: bool = True) -> Column: ...
311
+ def element_at(column: Column, idx: int) -> Column: ...
312
+ def map_keys(column: Column) -> Column: ...
313
+ def map_values(column: Column) -> Column: ...
314
+ def map_contains_key(column: Column, key: Any) -> Column: ...
315
+ def row_number() -> Column: ...
316
+ def rank() -> Column: ...
317
+ def dense_rank() -> Column: ...
318
+ def ntile(n: int) -> Column: ...
319
+ def broadcast(df: DataFrame) -> DataFrame: ...
320
+ def input_file_name() -> Column: ...
321
+ def monotonically_increasing_id() -> Column: ...
322
+ def spark_partition_id() -> Column: ...
323
+ def version() -> Column: ...
324
+ def current_user() -> Column: ...
325
+ def user() -> Column: ...
326
+ def hash(*columns: Column) -> Column: ...
327
+ def crc32(column: Column) -> Column: ...
328
+ def xxhash64(*columns: Column) -> Column: ...
329
+ def assert_true(column: Column) -> Column: ...
330
+ def raise_error(column: Column) -> Column: ...
331
+ def bitwiseNOT(column: Column) -> Column: ...
332
+ def bitwise_not(column: Column) -> Column: ...
333
+ def bit_count(column: Column) -> Column: ...
334
+ def bit_get(column: Column, bit: int) -> Column: ...
335
+ def getbit(column: Column, bit: int) -> Column: ...
336
+ def shiftleft(column: Column, n: int) -> Column: ...
337
+ def shiftright(column: Column, n: int) -> Column: ...
338
+ def shift_left(column: Column, n: int) -> Column: ...
339
+ def shift_right(column: Column, n: int) -> Column: ...
340
+ def typeof(column: Column) -> Column: ...
341
+ def bround(column: Column, scale: int = 0) -> Column: ...
342
+ def negate(column: Column) -> Column: ...
343
+ def positive(column: Column) -> Column: ...
344
+ def cot(column: Column) -> Column: ...
345
+ def csc(column: Column) -> Column: ...
346
+ def sec(column: Column) -> Column: ...
347
+ def e() -> Column: ...
348
+ def pi() -> Column: ...
349
+ def median(column: Column) -> Column: ...
350
+ def mode(column: Column) -> Column: ...
351
+ def stddev_pop(column: Column) -> Column: ...
352
+ def var_pop(column: Column) -> Column: ...
353
+ def count_distinct(column: Column) -> Column: ...
354
+ def approx_count_distinct(column: Column) -> Column: ...
355
+ def first(column: Column) -> Column: ...
356
+ def last(column: Column) -> Column: ...
357
+ def collect_list(column: Column) -> Column: ...
358
+ def collect_set(column: Column) -> Column: ...
File without changes
Binary file
@@ -0,0 +1,112 @@
1
+ Metadata-Version: 2.4
2
+ Name: robin-sparkless
3
+ Version: 0.1.1
4
+ Classifier: Development Status :: 3 - Alpha
5
+ Classifier: License :: OSI Approved :: MIT License
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Programming Language :: Rust
8
+ Classifier: Topic :: Scientific/Engineering
9
+ Summary: PySpark-like DataFrame API in Rust (Polars backend), with Python bindings via PyO3
10
+ Author: Robin Sparkless contributors
11
+ License: MIT
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
14
+
15
+ # robin-sparkless (Python)
16
+
17
+ [![PyPI version](https://badge.fury.io/py/robin-sparkless.svg)](https://pypi.org/project/robin-sparkless/)
18
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
19
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
20
+ [![Documentation](https://readthedocs.org/projects/robin-sparkless/badge/?version=latest)](https://robin-sparkless.readthedocs.io/)
21
+ [![Source](https://img.shields.io/badge/source-GitHub-black.svg)](https://github.com/eddiethedean/robin-sparkless)
22
+
23
+ **PySpark-style DataFrames in Python—no JVM.** Uses [Polars](https://www.pola.rs/) under the hood for fast execution.
24
+
25
+ ## Install
26
+
27
+ ```bash
28
+ pip install robin-sparkless
29
+ ```
30
+
31
+ **Requirements:** Python 3.8+
32
+
33
+ ## Quick start
34
+
35
+ ```python
36
+ import robin_sparkless as rs
37
+
38
+ spark = rs.SparkSession.builder().app_name("demo").get_or_create()
39
+ df = spark.create_dataframe(
40
+ [(1, 25, "Alice"), (2, 30, "Bob"), (3, 35, "Charlie")],
41
+ ["id", "age", "name"],
42
+ )
43
+ filtered = df.filter(rs.col("age").gt(rs.lit(26)))
44
+ print(filtered.collect())
45
+ # [{"id": 2, "age": 30, "name": "Bob"}, {"id": 3, "age": 35, "name": "Charlie"}]
46
+ ```
47
+
48
+ Read from files:
49
+
50
+ ```python
51
+ df = spark.read_csv("data.csv")
52
+ df = spark.read_parquet("data.parquet")
53
+ df = spark.read_json("data.json")
54
+ ```
55
+
56
+ Filter, select, group, join, and use window functions with a PySpark-like API. See the [full documentation](https://robin-sparkless.readthedocs.io/) for details.
57
+
58
+ ## Optional features (install from source)
59
+
60
+ Building from source requires [Rust](https://rustup.rs/) and [maturin](https://www.maturin.rs/). Clone the repo, then:
61
+
62
+ ```bash
63
+ pip install maturin
64
+ maturin develop --features pyo3 # default: DataFrame API
65
+ maturin develop --features "pyo3,sql" # spark.sql() and temp views
66
+ maturin develop --features "pyo3,delta" # read_delta / write_delta
67
+ maturin develop --features "pyo3,sql,delta" # all optional features
68
+ ```
69
+
70
+ ## Type checking
71
+
72
+ The package ships with PEP 561 type stubs (`robin_sparkless.pyi`). Use mypy, pyright, or another checker:
73
+
74
+ ```bash
75
+ pip install robin-sparkless mypy
76
+ mypy your_script.py
77
+ ```
78
+
79
+ For **Python 3.8** compatibility, use mypy <1.10 (newer mypy drops support for `python_version = "3.8"` in config). The project’s `pyproject.toml` includes `[tool.mypy]` and `[tool.ruff]` with `target-version` / `python_version` set for 3.8.
80
+
81
+ ## Development
82
+
83
+ From a clone of the repo:
84
+
85
+ ```bash
86
+ python -m venv .venv
87
+ source .venv/bin/activate # or .venv\Scripts\activate on Windows
88
+ pip install -r requirements-ci.txt
89
+ maturin develop --features pyo3 --release
90
+ pytest tests/python/ -v
91
+ ```
92
+
93
+ Lint and format:
94
+
95
+ ```bash
96
+ ruff format .
97
+ ruff check .
98
+ mypy .
99
+ ```
100
+
101
+ CI uses `requirements-ci.txt` (maturin, pytest, mypy<1.10 for Python 3.8).
102
+
103
+ ## Links
104
+
105
+ - **Documentation:** [robin-sparkless.readthedocs.io](https://robin-sparkless.readthedocs.io/)
106
+ - **Source:** [github.com/eddiethedean/robin-sparkless](https://github.com/eddiethedean/robin-sparkless)
107
+ - **Rust crate:** [crates.io/crates/robin-sparkless](https://crates.io/crates/robin-sparkless)
108
+
109
+ ## License
110
+
111
+ MIT
112
+
@@ -0,0 +1,8 @@
1
+ robin_sparkless\__init__.py,sha256=h26i0kv9czksgzJ0zbFJvX-5IZV8hrWVEerFQOVG3fA,143
2
+ robin_sparkless\__init__.pyi,sha256=RUzDhVjxwH14YRwqwwqsIPaiLNDWbYGoc51vYjhbo54,15677
3
+ robin_sparkless\py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ robin_sparkless\robin_sparkless.pyd,sha256=4YCUz0p_2-JN8_1FZ1R-YH9aCeQzkzY-YWRbMtryZgg,58458624
5
+ robin_sparkless-0.1.1.dist-info\METADATA,sha256=BqIYlQkQG4LeSX-yh-FdHc5v1YrI2EnttpuZGx8Y1PQ,3748
6
+ robin_sparkless-0.1.1.dist-info\WHEEL,sha256=gPqN4EsdiAyGvmfrYy_ONrF276O8o0hPitI2CKZrEFA,95
7
+ robin_sparkless.pyi,sha256=RUzDhVjxwH14YRwqwwqsIPaiLNDWbYGoc51vYjhbo54,15677
8
+ robin_sparkless-0.1.1.dist-info\RECORD,,
robin_sparkless.pyi ADDED
@@ -0,0 +1,358 @@
1
+ """
2
+ Type stubs for robin_sparkless (PySpark-like DataFrame API on Polars).
3
+ Use for static type checking (mypy, pyright, etc.).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+
10
+ # --- Classes ---
11
+
12
+ class SparkSession:
13
+ @classmethod
14
+ def builder(cls) -> SparkSessionBuilder: ...
15
+ def is_case_sensitive(self) -> bool: ...
16
+ def create_dataframe(
17
+ self,
18
+ data: Any,
19
+ column_names: list[str],
20
+ ) -> DataFrame: ...
21
+ def createDataFrame(
22
+ self,
23
+ data: Any,
24
+ column_names: list[str],
25
+ ) -> DataFrame: ...
26
+ def create_dataframe_from_rows(
27
+ self,
28
+ data: Any,
29
+ schema: list[tuple[str, str]],
30
+ ) -> DataFrame: ...
31
+ def read_csv(self, path: str) -> DataFrame: ...
32
+ def read_parquet(self, path: str) -> DataFrame: ...
33
+ def read_json(self, path: str) -> DataFrame: ...
34
+ def read_delta(self, path: str) -> DataFrame: ...
35
+ def read_delta_version(
36
+ self, path: str, version: int | None = None
37
+ ) -> DataFrame: ...
38
+ def create_or_replace_temp_view(self, name: str, df: DataFrame) -> None: ...
39
+ def table(self, name: str) -> DataFrame: ...
40
+ def sql(self, query: str) -> DataFrame: ...
41
+
42
+ class SparkSessionBuilder:
43
+ def app_name(self, name: str) -> SparkSessionBuilder: ...
44
+ def master(self, master: str) -> SparkSessionBuilder: ...
45
+ def config(self, key: str, value: str) -> SparkSessionBuilder: ...
46
+ def get_or_create(self) -> SparkSession: ...
47
+
48
+ class DataFrame:
49
+ def count(self) -> int: ...
50
+ def show(self, n: int | None = None) -> None: ...
51
+ def collect(self) -> list[dict[str, Any]]: ...
52
+ def filter(self, condition: Column) -> DataFrame: ...
53
+ def select(self, cols: list[str]) -> DataFrame: ...
54
+ def select_expr(self, exprs: list[str]) -> DataFrame: ...
55
+ def with_column(self, column_name: str, expr: Column) -> DataFrame: ...
56
+ def with_columns(
57
+ self, mapping: dict[str, Column] | list[tuple[str, Column]]
58
+ ) -> DataFrame: ...
59
+ def with_columns_renamed(
60
+ self,
61
+ mapping: dict[str, str] | list[tuple[str, str]],
62
+ ) -> DataFrame: ...
63
+ def with_column_renamed(self, old: str, new: str) -> DataFrame: ...
64
+ def order_by(
65
+ self,
66
+ cols: list[str],
67
+ ascending: list[bool] | None = None,
68
+ ) -> DataFrame: ...
69
+ def order_by_exprs(self, sort_orders: list[SortOrder]) -> DataFrame: ...
70
+ def group_by(self, cols: list[str]) -> GroupedData: ...
71
+ def limit(self, n: int) -> DataFrame: ...
72
+ def distinct(self, subset: list[str] | None = None) -> DataFrame: ...
73
+ def union(self, other: DataFrame) -> DataFrame: ...
74
+ def union_by_name(self, other: DataFrame) -> DataFrame: ...
75
+ def join(
76
+ self,
77
+ other: DataFrame,
78
+ on: str | list[str],
79
+ how: str = "inner",
80
+ ) -> DataFrame: ...
81
+ def drop(self, *cols: str) -> DataFrame: ...
82
+ def dropna(self, subset: list[str] | None = None) -> DataFrame: ...
83
+ def col_regex(self, pattern: str) -> DataFrame: ...
84
+ def stat(self) -> DataFrameStat: ...
85
+ def na(self) -> DataFrameNa: ...
86
+ def to_pandas(self) -> list[dict[str, Any]]: ...
87
+ def corr(self) -> DataFrame: ...
88
+ def write(self) -> DataFrameWriter: ...
89
+
90
+ class DataFrameStat:
91
+ def cov(self, col1: str, col2: str) -> float: ...
92
+ def corr(self, col1: str, col2: str) -> float: ...
93
+ def corr_matrix(self) -> DataFrame: ...
94
+
95
+ class DataFrameNa:
96
+ def fill(self, value: Column) -> DataFrame: ...
97
+ def drop(self, subset: list[str] | None = None) -> DataFrame: ...
98
+
99
+ class Column:
100
+ def alias(self, name: str) -> Column: ...
101
+ def gt(self, other: Column) -> Column: ...
102
+ def ge(self, other: Column) -> Column: ...
103
+ def lt(self, other: Column) -> Column: ...
104
+ def le(self, other: Column) -> Column: ...
105
+ def eq(self, other: Column) -> Column: ...
106
+ def neq(self, other: Column) -> Column: ...
107
+ def __eq__(self, other: object) -> Column: ... # type: ignore[override]
108
+ def __ne__(self, other: object) -> Column: ... # type: ignore[override]
109
+ def __lt__(self, other: Column) -> Column: ...
110
+ def __le__(self, other: Column) -> Column: ...
111
+ def __gt__(self, other: Column) -> Column: ...
112
+ def __ge__(self, other: Column) -> Column: ...
113
+ def __add__(self, other: Column) -> Column: ...
114
+ def __sub__(self, other: Column) -> Column: ...
115
+ def __mul__(self, other: Column) -> Column: ...
116
+ def __truediv__(self, other: Column) -> Column: ...
117
+ def __mod__(self, other: Column) -> Column: ...
118
+ def __and__(self, other: Column) -> Column: ...
119
+ def __or__(self, other: Column) -> Column: ...
120
+ def map_concat(self, other: Column) -> Column: ...
121
+
122
+ class SortOrder: ...
123
+
124
+ class WhenBuilder:
125
+ def when(self, condition: Column) -> ThenBuilder: ...
126
+ def otherwise(self, value: Column) -> Column: ...
127
+
128
+ class ThenBuilder:
129
+ def then(self, value: Column) -> WhenBuilder: ...
130
+ def otherwise(self, value: Column) -> Column: ...
131
+
132
+ class GroupedData:
133
+ def count(self) -> DataFrame: ...
134
+ def sum(self, column: str) -> DataFrame: ...
135
+ def avg(self, column: str) -> DataFrame: ...
136
+ def min(self, column: str) -> DataFrame: ...
137
+ def max(self, column: str) -> DataFrame: ...
138
+ def agg(self, exprs: list[Column]) -> DataFrame: ...
139
+ def collect_list(self, column: str) -> DataFrame: ...
140
+ def collect_set(self, column: str) -> DataFrame: ...
141
+ def count_if(self, column: str) -> DataFrame: ...
142
+ def any_value(self, column: str) -> DataFrame: ...
143
+ def bool_and(self, column: str) -> DataFrame: ...
144
+ def bool_or(self, column: str) -> DataFrame: ...
145
+ def product(self, column: str) -> DataFrame: ...
146
+ def kurtosis(self, column: str) -> DataFrame: ...
147
+ def skewness(self, column: str) -> DataFrame: ...
148
+
149
+ class CubeRollupData:
150
+ def agg(self, exprs: list[Column]) -> DataFrame: ...
151
+
152
+ class DataFrameWriter:
153
+ def mode(self, mode: str) -> DataFrameWriter: ...
154
+ def format(self, format: str) -> DataFrameWriter: ...
155
+ def save(self, path: str) -> None: ...
156
+
157
+ # --- Module-level functions (expression builders return Column) ---
158
+
159
+ def col(name: str) -> Column: ...
160
+ def lit(value: None | int | float | bool | str) -> Column: ...
161
+ def when(condition: Column) -> ThenBuilder: ...
162
+ def coalesce(*cols: Column) -> Column: ...
163
+ def sum(column: Column) -> Column: ...
164
+ def avg(column: Column) -> Column: ...
165
+ def min(column: Column) -> Column: ...
166
+ def max(column: Column) -> Column: ...
167
+ def count(column: Column) -> Column: ...
168
+ def execute_plan(
169
+ data: Any,
170
+ schema: list[tuple[str, str]],
171
+ plan_json: str,
172
+ ) -> DataFrame: ...
173
+
174
+ # Unary column functions (column -> Column)
175
+ def ascii(column: Column) -> Column: ...
176
+ def base64(column: Column) -> Column: ...
177
+ def unbase64(column: Column) -> Column: ...
178
+ def cast(column: Column, dtype: str) -> Column: ...
179
+ def try_cast(column: Column, dtype: str) -> Column: ...
180
+ def isnull(column: Column) -> Column: ...
181
+ def isnotnull(column: Column) -> Column: ...
182
+ def isnan(column: Column) -> Column: ...
183
+ def asc(column: Column) -> SortOrder: ...
184
+ def asc_nulls_first(column: Column) -> SortOrder: ...
185
+ def asc_nulls_last(column: Column) -> SortOrder: ...
186
+ def desc(column: Column) -> SortOrder: ...
187
+ def desc_nulls_first(column: Column) -> SortOrder: ...
188
+ def desc_nulls_last(column: Column) -> SortOrder: ...
189
+
190
+ # Binary / variadic (common patterns; others follow same style)
191
+ def greatest(*columns: Column) -> Column: ...
192
+ def least(*columns: Column) -> Column: ...
193
+ def add_months(column: Column, n: int) -> Column: ...
194
+ def substring(column: Column, pos: int, len: int) -> Column: ...
195
+ def overlay(src: Column, replace: Column, pos: int, len: int = -1) -> Column: ...
196
+ def format_number(column: Column, d: int) -> Column: ...
197
+ def format_string(fmt: str, *columns: Column) -> Column: ...
198
+ def concat(*columns: Column) -> Column: ...
199
+ def concat_ws(sep: str, *columns: Column) -> Column: ...
200
+ def array(*columns: Column) -> Column: ...
201
+ def struct(*columns: Column) -> Column: ...
202
+ def named_struct(*name_column_pairs: Any) -> Column: ...
203
+ def create_map(*key_value_columns: Column) -> Column: ...
204
+ def map_concat(a: Column, b: Column) -> Column: ...
205
+ def equal_null(a: Column, b: Column) -> Column: ...
206
+ def get(column: Column, key: Any) -> Column: ...
207
+ def isin(column: Column, values: Any) -> Column: ...
208
+ def rand(seed: int | None = None) -> Column: ...
209
+ def randn(seed: int | None = None) -> Column: ...
210
+
211
+ # Placeholder for remaining 200+ expression functions (same pattern: Column in, Column out)
212
+ def acos(column: Column) -> Column: ...
213
+ def acosh(column: Column) -> Column: ...
214
+ def asin(column: Column) -> Column: ...
215
+ def asinh(column: Column) -> Column: ...
216
+ def atan(column: Column) -> Column: ...
217
+ def atan2(y: Column, x: Column) -> Column: ...
218
+ def atanh(column: Column) -> Column: ...
219
+ def sin(column: Column) -> Column: ...
220
+ def cos(column: Column) -> Column: ...
221
+ def tan(column: Column) -> Column: ...
222
+ def sinh(column: Column) -> Column: ...
223
+ def cosh(column: Column) -> Column: ...
224
+ def tanh(column: Column) -> Column: ...
225
+ def degrees(column: Column) -> Column: ...
226
+ def radians(column: Column) -> Column: ...
227
+ def cbrt(column: Column) -> Column: ...
228
+ def ceiling(column: Column) -> Column: ...
229
+ def floor(column: Column) -> Column: ...
230
+ def exp(column: Column) -> Column: ...
231
+ def expm1(column: Column) -> Column: ...
232
+ def ln(column: Column) -> Column: ...
233
+ def log10(column: Column) -> Column: ...
234
+ def log1p(column: Column) -> Column: ...
235
+ def log2(column: Column) -> Column: ...
236
+ def power(base: Column, exp: Column) -> Column: ...
237
+ def rint(column: Column) -> Column: ...
238
+ def round(column: Column, scale: int = 0) -> Column: ...
239
+ def signum(column: Column) -> Column: ...
240
+ def sqrt(column: Column) -> Column: ...
241
+ def left(column: Column, n: int) -> Column: ...
242
+ def right(column: Column, n: int) -> Column: ...
243
+ def length(column: Column) -> Column: ...
244
+ def lower(column: Column) -> Column: ...
245
+ def upper(column: Column) -> Column: ...
246
+ def ltrim(column: Column) -> Column: ...
247
+ def rtrim(column: Column) -> Column: ...
248
+ def trim(column: Column) -> Column: ...
249
+ def reverse(column: Column) -> Column: ...
250
+ def repeat(column: Column, n: int) -> Column: ...
251
+ def replace(src: Column, search: str, replacement: str) -> Column: ...
252
+ def contains(column: Column, literal: str) -> Column: ...
253
+ def startswith(column: Column, literal: str) -> Column: ...
254
+ def endswith(column: Column, literal: str) -> Column: ...
255
+ def like(column: Column, pattern: str) -> Column: ...
256
+ def ilike(column: Column, pattern: str) -> Column: ...
257
+ def rlike(column: Column, pattern: str) -> Column: ...
258
+ def regexp_extract(column: Column, pattern: str, idx: int = 0) -> Column: ...
259
+ def regexp_replace(column: Column, pattern: str, replacement: str) -> Column: ...
260
+ def split(column: Column, pattern: str) -> Column: ...
261
+ def substring_index(column: Column, delim: str, count: int) -> Column: ...
262
+ def chr(column: Column) -> Column: ...
263
+ def char(column: Column) -> Column: ...
264
+ def md5(column: Column) -> Column: ...
265
+ def sha1(column: Column) -> Column: ...
266
+ def sha2(column: Column, numBits: int) -> Column: ...
267
+ def dayofmonth(column: Column) -> Column: ...
268
+ def dayofweek(column: Column) -> Column: ...
269
+ def dayofyear(column: Column) -> Column: ...
270
+ def hour(column: Column) -> Column: ...
271
+ def minute(column: Column) -> Column: ...
272
+ def month(column: Column) -> Column: ...
273
+ def quarter(column: Column) -> Column: ...
274
+ def second(column: Column) -> Column: ...
275
+ def weekofyear(column: Column) -> Column: ...
276
+ def year(column: Column) -> Column: ...
277
+ def date_add(column: Column, days: int) -> Column: ...
278
+ def date_sub(column: Column, days: int) -> Column: ...
279
+ def datediff(end: Column, start: Column) -> Column: ...
280
+ def months_between(end: Column, start: Column) -> Column: ...
281
+ def to_date(column: Column) -> Column: ...
282
+ def to_timestamp(column: Column) -> Column: ...
283
+ def unix_timestamp(column: Column, fmt: str | None = None) -> Column: ...
284
+ def from_unixtime(column: Column, fmt: str = "yyyy-MM-dd HH:mm:ss") -> Column: ...
285
+ def current_timestamp() -> Column: ...
286
+ def current_date() -> Column: ...
287
+ def date_format(column: Column, fmt: str) -> Column: ...
288
+ def nvl(column: Column, replacement: Column) -> Column: ...
289
+ def nvl2(column: Column, if_not_null: Column, if_null: Column) -> Column: ...
290
+ def ifnull(column: Column, replacement: Column) -> Column: ...
291
+ def array_contains(column: Column, value: Any) -> Column: ...
292
+ def array_distinct(column: Column) -> Column: ...
293
+ def array_except(a: Column, b: Column) -> Column: ...
294
+ def array_intersect(a: Column, b: Column) -> Column: ...
295
+ def array_join(
296
+ column: Column, delimiter: str, null_replacement: str | None = None
297
+ ) -> Column: ...
298
+ def array_max(column: Column) -> Column: ...
299
+ def array_min(column: Column) -> Column: ...
300
+ def array_position(column: Column, value: Any) -> Column: ...
301
+ def array_remove(column: Column, value: Any) -> Column: ...
302
+ def array_sort(column: Column) -> Column: ...
303
+ def array_union(a: Column, b: Column) -> Column: ...
304
+ def arrays_zip(*columns: Column) -> Column: ...
305
+ def explode(column: Column) -> Column: ...
306
+ def explode_outer(column: Column) -> Column: ...
307
+ def posexplode(column: Column) -> Column: ...
308
+ def size(column: Column) -> Column: ...
309
+ def slice(column: Column, start: int, length: int) -> Column: ...
310
+ def sort_array(column: Column, asc: bool = True) -> Column: ...
311
+ def element_at(column: Column, idx: int) -> Column: ...
312
+ def map_keys(column: Column) -> Column: ...
313
+ def map_values(column: Column) -> Column: ...
314
+ def map_contains_key(column: Column, key: Any) -> Column: ...
315
+ def row_number() -> Column: ...
316
+ def rank() -> Column: ...
317
+ def dense_rank() -> Column: ...
318
+ def ntile(n: int) -> Column: ...
319
+ def broadcast(df: DataFrame) -> DataFrame: ...
320
+ def input_file_name() -> Column: ...
321
+ def monotonically_increasing_id() -> Column: ...
322
+ def spark_partition_id() -> Column: ...
323
+ def version() -> Column: ...
324
+ def current_user() -> Column: ...
325
+ def user() -> Column: ...
326
+ def hash(*columns: Column) -> Column: ...
327
+ def crc32(column: Column) -> Column: ...
328
+ def xxhash64(*columns: Column) -> Column: ...
329
+ def assert_true(column: Column) -> Column: ...
330
+ def raise_error(column: Column) -> Column: ...
331
+ def bitwiseNOT(column: Column) -> Column: ...
332
+ def bitwise_not(column: Column) -> Column: ...
333
+ def bit_count(column: Column) -> Column: ...
334
+ def bit_get(column: Column, bit: int) -> Column: ...
335
+ def getbit(column: Column, bit: int) -> Column: ...
336
+ def shiftleft(column: Column, n: int) -> Column: ...
337
+ def shiftright(column: Column, n: int) -> Column: ...
338
+ def shift_left(column: Column, n: int) -> Column: ...
339
+ def shift_right(column: Column, n: int) -> Column: ...
340
+ def typeof(column: Column) -> Column: ...
341
+ def bround(column: Column, scale: int = 0) -> Column: ...
342
+ def negate(column: Column) -> Column: ...
343
+ def positive(column: Column) -> Column: ...
344
+ def cot(column: Column) -> Column: ...
345
+ def csc(column: Column) -> Column: ...
346
+ def sec(column: Column) -> Column: ...
347
+ def e() -> Column: ...
348
+ def pi() -> Column: ...
349
+ def median(column: Column) -> Column: ...
350
+ def mode(column: Column) -> Column: ...
351
+ def stddev_pop(column: Column) -> Column: ...
352
+ def var_pop(column: Column) -> Column: ...
353
+ def count_distinct(column: Column) -> Column: ...
354
+ def approx_count_distinct(column: Column) -> Column: ...
355
+ def first(column: Column) -> Column: ...
356
+ def last(column: Column) -> Column: ...
357
+ def collect_list(column: Column) -> Column: ...
358
+ def collect_set(column: Column) -> Column: ...
@@ -1,166 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: robin-sparkless
3
- Version: 0.1.0
4
- Classifier: Development Status :: 3 - Alpha
5
- Classifier: License :: OSI Approved :: MIT License
6
- Classifier: Programming Language :: Python :: 3
7
- Classifier: Programming Language :: Rust
8
- Classifier: Topic :: Scientific/Engineering
9
- Summary: PySpark-like DataFrame API in Rust (Polars backend), with Python bindings via PyO3
10
- Author: Robin Sparkless contributors
11
- License: MIT
12
- Requires-Python: >=3.8
13
- Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
14
-
15
- # Robin Sparkless
16
-
17
- **PySpark-style DataFrames in Rust—no JVM.** A DataFrame library that mirrors PySpark’s API and semantics while using [Polars](https://www.pola.rs/) as the execution engine.
18
-
19
- [![crates.io](https://img.shields.io/crates/v/robin-sparkless.svg)](https://crates.io/crates/robin-sparkless)
20
- [![docs.rs](https://docs.rs/robin-sparkless/badge.svg)](https://docs.rs/robin-sparkless)
21
- [![Documentation](https://readthedocs.org/projects/robin-sparkless/badge/?version=latest)](https://robin-sparkless.readthedocs.io/en/latest/)
22
- [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
23
-
24
- ---
25
-
26
- ## Why Robin Sparkless?
27
-
28
- - **Familiar API** — `SparkSession`, `DataFrame`, `Column`, and PySpark-like functions so you can reuse patterns without the JVM.
29
- - **Polars under the hood** — Fast, native Rust execution with Polars for IO, expressions, and aggregations.
30
- - **Rust-first, Python optional** — Use it as a Rust library or build the Python extension via PyO3 for a drop-in style API.
31
- - **Sparkless backend target** — Designed to power [Sparkless](https://github.com/eddiethedean/sparkless) (the Python PySpark replacement) so Sparkless can run on this engine via PyO3.
32
-
33
- ---
34
-
35
- ## Features
36
-
37
- | Area | What’s included |
38
- |------|------------------|
39
- | **Core** | `SparkSession`, `DataFrame`, `Column`; `filter`, `select`, `with_column`, `order_by`, `group_by`, joins |
40
- | **IO** | CSV, Parquet, JSON via `SparkSession::read_*` |
41
- | **Expressions** | `col()`, `lit()`, `when`/`then`/`otherwise`, `coalesce`, cast, type/conditional helpers |
42
- | **Aggregates** | `count`, `sum`, `avg`, `min`, `max`, and more; multi-column groupBy |
43
- | **Window** | `row_number`, `rank`, `dense_rank`, `lag`, `lead`, `first_value`, `last_value`, and others with `.over()` |
44
- | **Arrays & maps** | `array_*`, `explode`, `create_map`, `map_keys`, `map_values`, and related functions |
45
- | **Strings & JSON** | String functions (`upper`, `lower`, `substring`, `regexp_*`, etc.), `get_json_object`, `from_json`, `to_json` |
46
- | **Datetime & math** | Date/time extractors and arithmetic, `year`/`month`/`day`, math (`sin`, `cos`, `sqrt`, `pow`, …) |
47
- | **Optional SQL** | `spark.sql("SELECT ...")` with temp views (`createOrReplaceTempView`, `table`) — enable with `--features sql` |
48
- | **Optional Delta** | `read_delta`, `read_delta_with_version`, `write_delta` — enable with `--features delta` |
49
-
50
- Known differences from PySpark are documented in [docs/PYSPARK_DIFFERENCES.md](docs/PYSPARK_DIFFERENCES.md). Parity status and roadmap are in [docs/PARITY_STATUS.md](docs/PARITY_STATUS.md) and [docs/ROADMAP.md](docs/ROADMAP.md).
51
-
52
- ---
53
-
54
- ## Installation
55
-
56
- ### Rust
57
-
58
- Add to your `Cargo.toml`:
59
-
60
- ```toml
61
- [dependencies]
62
- robin-sparkless = "0.1.0"
63
- ```
64
-
65
- Optional features:
66
-
67
- ```toml
68
- robin-sparkless = { version = "0.1.0", features = ["sql"] } # spark.sql(), temp views
69
- robin-sparkless = { version = "0.1.0", features = ["delta"] } # Delta Lake read/write
70
- ```
71
-
72
- ### Python (PyO3)
73
-
74
- Build the Python extension with [maturin](https://www.maturin.rs/) (Rust + Python 3.8+):
75
-
76
- ```bash
77
- pip install maturin
78
- maturin develop --features pyo3
79
- # With optional SQL and/or Delta:
80
- maturin develop --features "pyo3,sql"
81
- maturin develop --features "pyo3,delta"
82
- maturin develop --features "pyo3,sql,delta"
83
- ```
84
-
85
- Then use the `robin_sparkless` module; see [docs/PYTHON_API.md](docs/PYTHON_API.md).
86
-
87
- ---
88
-
89
- ## Quick start
90
-
91
- ### Rust
92
-
93
- ```rust
94
- use robin_sparkless::{col, lit_i64, SparkSession};
95
-
96
- fn main() -> Result<(), Box<dyn std::error::Error>> {
97
- let spark = SparkSession::builder().app_name("demo").get_or_create();
98
-
99
- // Create a DataFrame from rows (id, age, name)
100
- let df = spark.create_dataframe(
101
- vec![
102
- (1, 25, "Alice".to_string()),
103
- (2, 30, "Bob".to_string()),
104
- (3, 35, "Charlie".to_string()),
105
- ],
106
- vec!["id", "age", "name"],
107
- )?;
108
-
109
- // Filter and show
110
- let adults = df.filter(col("age").gt(lit_i64(26)))?;
111
- adults.show(Some(10))?;
112
-
113
- Ok(())
114
- }
115
- ```
116
-
117
- You can also wrap an existing Polars `DataFrame` with `DataFrame::from_polars(polars_df)`. See [docs/QUICKSTART.md](docs/QUICKSTART.md) for joins, window functions, and more.
118
-
119
- ### Python
120
-
121
- ```python
122
- import robin_sparkless as rs
123
-
124
- spark = rs.SparkSession.builder().app_name("demo").get_or_create()
125
- df = spark.create_dataframe([(1, 25, "Alice"), (2, 30, "Bob")], ["id", "age", "name"])
126
- filtered = df.filter(rs.col("age").gt(rs.lit(26)))
127
- print(filtered.collect()) # [{"id": 2, "age": 30, "name": "Bob"}]
128
- ```
129
-
130
- ---
131
-
132
- ## Development
133
-
134
- **Prerequisites:** Rust (see [rust-toolchain.toml](rust-toolchain.toml)), and for Python tests: Python 3.8+, `maturin`, `pytest`.
135
-
136
- | Command | Description |
137
- |---------|-------------|
138
- | `cargo build` | Build (Rust only) |
139
- | `cargo build --features pyo3` | Build with Python extension |
140
- | `cargo test` | Run Rust tests |
141
- | `make test` | Run Rust + Python tests (creates venv, `maturin develop`, `pytest`) |
142
- | `make check` | Format, clippy, audit, deny, tests |
143
- | `cargo bench` | Benchmarks (robin-sparkless vs Polars) |
144
- | `cargo doc --open` | Build and open API docs |
145
-
146
- CI runs the same checks on push/PR (see [.github/workflows/ci.yml](.github/workflows/ci.yml)).
147
-
148
- ---
149
-
150
- ## Documentation
151
-
152
- - [**Full documentation (Read the Docs)**](https://robin-sparkless.readthedocs.io/) — Quickstart, Python API, reference, and Sparkless integration (MkDocs)
153
- - [**API reference (docs.rs)**](https://docs.rs/robin-sparkless) — Crate API
154
- - [**QUICKSTART**](docs/QUICKSTART.md) — Build, usage, optional features, benchmarks
155
- - [**ROADMAP**](docs/ROADMAP.md) — Development roadmap and Sparkless integration
156
- - [**PYSPARK_DIFFERENCES**](docs/PYSPARK_DIFFERENCES.md) — Known divergences from PySpark
157
- - [**RELEASING**](docs/RELEASING.md) — Releasing and publishing to crates.io
158
-
159
- See also [CHANGELOG.md](CHANGELOG.md) for version history.
160
-
161
- ---
162
-
163
- ## License
164
-
165
- MIT
166
-
@@ -1,5 +0,0 @@
1
- robin_sparkless\__init__.py,sha256=h26i0kv9czksgzJ0zbFJvX-5IZV8hrWVEerFQOVG3fA,143
2
- robin_sparkless\robin_sparkless.pyd,sha256=st7JPqkmNtOfSiCGQvS-d_95cMDUBy0zB5XX1bejwOI,58464256
3
- robin_sparkless-0.1.0.dist-info\METADATA,sha256=oriVbTGk7qSpqb53OaL4l3-NG0almNl-CGrctUIxHtQ,6544
4
- robin_sparkless-0.1.0.dist-info\WHEEL,sha256=gPqN4EsdiAyGvmfrYy_ONrF276O8o0hPitI2CKZrEFA,95
5
- robin_sparkless-0.1.0.dist-info\RECORD,,