dpyr 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dpyr/__init__.py +17 -0
- dpyr-0.0.1.dist-info/METADATA +69 -0
- dpyr-0.0.1.dist-info/RECORD +4 -0
- dpyr-0.0.1.dist-info/WHEEL +4 -0
dpyr/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""dataframe — dplyr for Python, fronting polars and duckdb.
|
|
2
|
+
|
|
3
|
+
This release reserves the package name while the library is under active
|
|
4
|
+
development. See https://github.com/maximerivest/dpyr for the design
|
|
5
|
+
documents and roadmap.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.0.1"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def __getattr__(name: str):
|
|
12
|
+
raise NotImplementedError(
|
|
13
|
+
f"dpyr.{name} is not available yet: version {__version__} is a "
|
|
14
|
+
"name-reservation release. The dplyr-style API (filter, mutate, "
|
|
15
|
+
"group_by, summarize, ...) is under development — see "
|
|
16
|
+
"https://github.com/maximerivest/dpyr for the roadmap."
|
|
17
|
+
)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dpyr
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: dplyr for Python: tidy piped verbs over polars and duckdb, with real autocompletion. Name reservation — API under active development.
|
|
5
|
+
Project-URL: Repository, https://github.com/maximerivest/dataframe
|
|
6
|
+
Author-email: Maxime Rivest <mrive052@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: data-analysis,dataframe,dplyr,duckdb,polars,tidyverse
|
|
9
|
+
Classifier: Development Status :: 1 - Planning
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# dpyr
|
|
18
|
+
|
|
19
|
+
**dplyr for Python.** A tidy, pipe-style data manipulation API — fronting
|
|
20
|
+
[polars](https://pola.rs) and [duckdb](https://duckdb.org) — with real IDE
|
|
21
|
+
autocompletion and dplyr-faithful semantics, verified by differential testing
|
|
22
|
+
against dplyr itself.
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from dpyr import read_parquet, col, n, desc, starts_with
|
|
26
|
+
|
|
27
|
+
(
|
|
28
|
+
starwars
|
|
29
|
+
.filter(col.height > 180, col.mass < 100)
|
|
30
|
+
.mutate(bmi = col.mass / (col.height / 100) ** 2)
|
|
31
|
+
.group_by(col.species)
|
|
32
|
+
.summarize(
|
|
33
|
+
n = n(),
|
|
34
|
+
mean_bmi = col.bmi.mean(),
|
|
35
|
+
)
|
|
36
|
+
.arrange(desc(col.mean_bmi))
|
|
37
|
+
)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Principles (the elevator pitch)
|
|
41
|
+
|
|
42
|
+
1. **dplyr's vocabulary, Python's idiom.** The verbs are dplyr's, verbatim
|
|
43
|
+
(`filter`, `mutate`, `select`, `arrange`, `group_by`, `summarize`,
|
|
44
|
+
joins, tidyselect). The pipe is Python's: method chaining.
|
|
45
|
+
2. **As lazy as possible internally, as eager as possible observably.**
|
|
46
|
+
Verbs build a plan; schema errors raise immediately on the offending
|
|
47
|
+
line; displaying/exporting auto-collects. Interactive feel, query-engine
|
|
48
|
+
performance.
|
|
49
|
+
3. **Autocompletion is a feature, not an accident.** The `col` proxy and
|
|
50
|
+
per-schema stub generation make column names and column-typed methods
|
|
51
|
+
complete in any IDE.
|
|
52
|
+
4. **Two backends, one semantics.** polars (in-memory/files) and duckdb
|
|
53
|
+
(SQL pushdown) must agree, bit-for-bit modulo the documented semantics
|
|
54
|
+
spec. Verified continuously.
|
|
55
|
+
5. **dplyr is the oracle.** Compatibility is demonstrated, not claimed:
|
|
56
|
+
golden outputs are generated by actual dplyr in CI.
|
|
57
|
+
|
|
58
|
+
## Project documents
|
|
59
|
+
|
|
60
|
+
| Doc | What it pins down |
|
|
61
|
+
|---|---|
|
|
62
|
+
| [docs/DESIGN.md](docs/DESIGN.md) | API design, laziness/materialization model, autocompletion strategy, architecture |
|
|
63
|
+
| [docs/SEMANTICS.md](docs/SEMANTICS.md) | The conformance spec: every deliberate decision where R, polars and duckdb disagree |
|
|
64
|
+
| [docs/TESTING.md](docs/TESTING.md) | Test strategy: dplyr-as-oracle, backend differential tests, Hypothesis properties |
|
|
65
|
+
| [docs/ROADMAP.md](docs/ROADMAP.md) | Epics and stories to MVP, in dependency order |
|
|
66
|
+
|
|
67
|
+
## Status
|
|
68
|
+
|
|
69
|
+
Pre-MVP. The plan is in [docs/ROADMAP.md](docs/ROADMAP.md).
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
dpyr/__init__.py,sha256=bSJaO4IuayOewIX-pdVNtFxGhZD7bcI_PeDWesPPNrc,600
|
|
2
|
+
dpyr-0.0.1.dist-info/METADATA,sha256=HEQlbYkM-LMNZ1X9JoavjwFHWQoA4mpsnje_8T06EEo,2824
|
|
3
|
+
dpyr-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
4
|
+
dpyr-0.0.1.dist-info/RECORD,,
|