pyframe-xpy 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. pyframe_xpy-0.1.0/LICENSE +21 -0
  2. pyframe_xpy-0.1.0/MANIFEST.in +9 -0
  3. pyframe_xpy-0.1.0/PKG-INFO +275 -0
  4. pyframe_xpy-0.1.0/README.md +222 -0
  5. pyframe_xpy-0.1.0/framex/__init__.py +110 -0
  6. pyframe_xpy-0.1.0/framex/_version.py +1 -0
  7. pyframe_xpy-0.1.0/framex/backends/__init__.py +9 -0
  8. pyframe_xpy-0.1.0/framex/backends/array_accel.py +201 -0
  9. pyframe_xpy-0.1.0/framex/backends/c_backend.py +391 -0
  10. pyframe_xpy-0.1.0/framex/backends/c_kernels.c +207 -0
  11. pyframe_xpy-0.1.0/framex/compat.py +265 -0
  12. pyframe_xpy-0.1.0/framex/config.py +244 -0
  13. pyframe_xpy-0.1.0/framex/core/__init__.py +7 -0
  14. pyframe_xpy-0.1.0/framex/core/array.py +480 -0
  15. pyframe_xpy-0.1.0/framex/core/dataframe.py +946 -0
  16. pyframe_xpy-0.1.0/framex/core/dtypes.py +98 -0
  17. pyframe_xpy-0.1.0/framex/core/index.py +86 -0
  18. pyframe_xpy-0.1.0/framex/core/series.py +272 -0
  19. pyframe_xpy-0.1.0/framex/interchange/__init__.py +11 -0
  20. pyframe_xpy-0.1.0/framex/interchange/dataframe_protocol.py +114 -0
  21. pyframe_xpy-0.1.0/framex/interchange/numpy_protocols.py +23 -0
  22. pyframe_xpy-0.1.0/framex/io/__init__.py +20 -0
  23. pyframe_xpy-0.1.0/framex/io/arrow_ipc.py +28 -0
  24. pyframe_xpy-0.1.0/framex/io/csv.py +46 -0
  25. pyframe_xpy-0.1.0/framex/io/file.py +316 -0
  26. pyframe_xpy-0.1.0/framex/io/json.py +152 -0
  27. pyframe_xpy-0.1.0/framex/io/parquet.py +31 -0
  28. pyframe_xpy-0.1.0/framex/memory/__init__.py +5 -0
  29. pyframe_xpy-0.1.0/framex/memory/buffer.py +196 -0
  30. pyframe_xpy-0.1.0/framex/memory/pool.py +82 -0
  31. pyframe_xpy-0.1.0/framex/memory/transport.py +198 -0
  32. pyframe_xpy-0.1.0/framex/ops/__init__.py +23 -0
  33. pyframe_xpy-0.1.0/framex/ops/elementwise.py +174 -0
  34. pyframe_xpy-0.1.0/framex/ops/filter.py +66 -0
  35. pyframe_xpy-0.1.0/framex/ops/groupby.py +70 -0
  36. pyframe_xpy-0.1.0/framex/ops/join.py +81 -0
  37. pyframe_xpy-0.1.0/framex/ops/projection.py +32 -0
  38. pyframe_xpy-0.1.0/framex/ops/reduction.py +169 -0
  39. pyframe_xpy-0.1.0/framex/ops/sort.py +41 -0
  40. pyframe_xpy-0.1.0/framex/ops/window.py +275 -0
  41. pyframe_xpy-0.1.0/framex/pandas_engine.py +52 -0
  42. pyframe_xpy-0.1.0/framex/runtime/__init__.py +16 -0
  43. pyframe_xpy-0.1.0/framex/runtime/executor.py +256 -0
  44. pyframe_xpy-0.1.0/framex/runtime/partition.py +67 -0
  45. pyframe_xpy-0.1.0/framex/runtime/scheduler.py +95 -0
  46. pyframe_xpy-0.1.0/framex/runtime/streaming.py +64 -0
  47. pyframe_xpy-0.1.0/framex/runtime/task.py +103 -0
  48. pyframe_xpy-0.1.0/pyframe_xpy.egg-info/PKG-INFO +275 -0
  49. pyframe_xpy-0.1.0/pyframe_xpy.egg-info/SOURCES.txt +52 -0
  50. pyframe_xpy-0.1.0/pyframe_xpy.egg-info/dependency_links.txt +1 -0
  51. pyframe_xpy-0.1.0/pyframe_xpy.egg-info/requires.txt +38 -0
  52. pyframe_xpy-0.1.0/pyframe_xpy.egg-info/top_level.txt +1 -0
  53. pyframe_xpy-0.1.0/pyproject.toml +93 -0
  54. pyframe_xpy-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Theerayut Bubpamala
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,9 @@
1
+ include README.md
2
+ include LICENSE
3
+ include pyproject.toml
4
+ recursive-include framex *.py *.c
5
+ recursive-exclude framex *.so *.dylib *.pyd
6
+ prune tests
7
+ prune benchmarks
8
+ global-exclude __pycache__
9
+ global-exclude *.py[cod]
@@ -0,0 +1,275 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyframe-xpy
3
+ Version: 0.1.0
4
+ Summary: High-performance parallel dataframe and array processing with Arrow-backed storage
5
+ Author: FrameX Contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/aeiwz/FrameX
8
+ Project-URL: Repository, https://github.com/aeiwz/FrameX
9
+ Project-URL: Issues, https://github.com/aeiwz/FrameX/issues
10
+ Project-URL: Documentation, https://github.com/aeiwz/FrameX/tree/main/docs
11
+ Keywords: dataframe,array,analytics,arrow,dask,ray,numpy,parallel
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Classifier: Topic :: Software Development :: Libraries
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: pyarrow>=14.0
26
+ Requires-Dist: numpy>=1.24
27
+ Provides-Extra: pandas-compat
28
+ Requires-Dist: pandas>=2.0; extra == "pandas-compat"
29
+ Provides-Extra: distributed
30
+ Requires-Dist: dask[dataframe,distributed]>=2024.1.0; extra == "distributed"
31
+ Requires-Dist: ray[data]>=2.9.0; extra == "distributed"
32
+ Provides-Extra: accel
33
+ Requires-Dist: numexpr>=2.9; extra == "accel"
34
+ Requires-Dist: numba>=0.59; extra == "accel"
35
+ Provides-Extra: gpu
36
+ Requires-Dist: cupy-cuda12x>=13.0; platform_system != "Windows" and extra == "gpu"
37
+ Provides-Extra: ml-accel
38
+ Requires-Dist: torch>=2.2; extra == "ml-accel"
39
+ Requires-Dist: jax>=0.4.30; extra == "ml-accel"
40
+ Provides-Extra: pandas-fast
41
+ Requires-Dist: modin[ray]>=0.30; extra == "pandas-fast"
42
+ Provides-Extra: dev
43
+ Requires-Dist: pytest>=7.0; extra == "dev"
44
+ Requires-Dist: pytest-benchmark; extra == "dev"
45
+ Requires-Dist: hypothesis; extra == "dev"
46
+ Provides-Extra: bench
47
+ Requires-Dist: matplotlib>=3.8; extra == "bench"
48
+ Requires-Dist: psutil>=5.9; extra == "bench"
49
+ Provides-Extra: release
50
+ Requires-Dist: build>=1.2.2; extra == "release"
51
+ Requires-Dist: twine>=5.1.1; extra == "release"
52
+ Dynamic: license-file
53
+
54
+ # FrameX
55
+
56
+ FrameX is an Arrow-backed Python library for parallel dataframe and array processing on a single machine.
57
+
58
+ It combines:
59
+
60
+ - Pandas-like tabular APIs (`DataFrame`, `Series`, `GroupBy`)
61
+ - NumPy-compatible chunked arrays (`NDArray` with NumPy protocol support)
62
+ - Arrow-native storage/interop (`to_arrow`, Parquet/IPC I/O)
63
+ - Eager execution with optional lazy pipelines (`.lazy().collect()`)
64
+ - Runtime backends for local threads/processes plus optional Ray/Dask executors
65
+
66
+ ## Why FrameX
67
+
68
+ FrameX is aimed at local analytics workflows that are bigger than comfortable single-threaded scripts but do not yet require distributed infrastructure.
69
+
70
+ Typical fit:
71
+
72
+ - ETL and analytics pipelines on medium-to-large local datasets
73
+ - feature engineering workflows that mix table and array operations
74
+ - migration paths from Pandas scripts where API familiarity matters
75
+
76
+ ## Installation
77
+
78
+ From PyPI:
79
+
80
+ ```bash
81
+ pip install pyframe-xpy
82
+ ```
83
+
84
+ From source:
85
+
86
+ ```bash
87
+ git clone https://github.com/aeiwz/FrameX.git
88
+ cd FrameX
89
+ pip install -e .
90
+ ```
91
+
92
+ Requirements:
93
+
94
+ - Python `>=3.10`
95
+ - Core dependencies: `pyarrow`, `numpy`
96
+ - Optional compatibility: `pandas` (`pip install pyframe-xpy[pandas_compat]`)
97
+
98
+ ## Quick Start
99
+
100
+ ```python
101
+ import framex as fx
102
+
103
+ df = fx.DataFrame(
104
+ {
105
+ "group": ["a", "a", "b"],
106
+ "value": [10, 20, 30],
107
+ "is_refund": [False, True, False],
108
+ }
109
+ )
110
+
111
+ result = (
112
+ df.filter(~df["is_refund"])
113
+ .groupby("group")
114
+ .agg({"value": ["sum", "mean", "count"]})
115
+ .sort("value_sum", ascending=False)
116
+ )
117
+
118
+ print(result.to_pandas())
119
+ ```
120
+
121
+ ## Core API
122
+
123
+ Top-level imports:
124
+
125
+ ```python
126
+ import framex as fx
127
+ ```
128
+
129
+ Main objects and helpers:
130
+
131
+ - `fx.DataFrame`, `fx.Series`, `fx.Index`, `fx.LazyFrame`
132
+ - `fx.NDArray`, `fx.array(...)`
133
+ - `fx.read_parquet`, `fx.write_parquet`, `fx.read_ipc`, `fx.write_ipc`, `fx.read_csv`, `fx.write_csv`
134
+ - `fx.read_json`, `fx.write_json`, `fx.read_ndjson`, `fx.write_ndjson`
135
+ - `fx.read_file`, `fx.write_file` for format auto-detection
136
+
137
+ Compression:
138
+ - transparent extension-based compression for `read_file` / `write_file`
139
+ - supported wrappers: `.gz`, `.bz2`, `.xz`, `.zip`, and `.zst`/`.zstd` (when `zstandard` is installed)
140
+ - `fx.from_pandas`, `fx.from_dask`, `fx.from_ray`, `fx.from_dataframe`
141
+ - `fx.get_config`, `fx.set_backend`, `fx.set_workers`, `fx.set_serializer`, `fx.set_kernel_backend`
142
+ - `fx.set_array_backend` for auto/NumExpr/Numba/JAX/PyTorch/CuPy acceleration modes
143
+ - `fx.recommend_best_performance_config()` to inspect hardware-tuned settings
144
+ - `fx.auto_configure_hardware()` to apply best-performance config automatically
145
+ - `fx.StreamProcessor` for micro-batch streaming pipelines
146
+
147
+ Acceleration extras:
148
+
149
+ ```bash
150
+ pip install pyframe-xpy[accel] # numexpr + numba
151
+ pip install pyframe-xpy[gpu] # cupy (CUDA)
152
+ pip install pyframe-xpy[ml_accel] # jax + pytorch
153
+ pip install pyframe-xpy[pandas_fast] # modin backend
154
+ pip install pyframe-xpy[distributed] # Dask + Ray distributed/HPC backends
155
+ pip install zstandard # .zst/.zstd file compression
156
+ ```
157
+
158
+ Backend notes:
159
+
160
+ - `fx.set_backend("threads" | "processes" | "ray" | "dask" | "hpc")`
161
+ - Ray and Dask execution backends require their respective runtimes to be installed/available.
162
+ - HPC mode (`"hpc"`) uses cluster-oriented execution via Dask or Ray:
163
+ - `FRAMEX_HPC_ENGINE=dask|ray`
164
+ - `FRAMEX_DASK_SCHEDULER_ADDRESS=<tcp://...>` to connect existing Dask clusters
165
+ - `FRAMEX_RAY_ADDRESS=<ray://...>` to connect existing Ray clusters
166
+ - optional SLURM bootstrap: `FRAMEX_DASK_SLURM=1` (requires `dask-jobqueue`)
167
+
168
+ Test support notes:
169
+
170
+ - Some tests are optional-backend gated and intentionally `skipped` when deps are not installed.
171
+ - Typical skip reasons: missing `dask.distributed`, `dask.dataframe`, `ray`, or `ray.data`.
172
+ - Run full optional matrix locally:
173
+
174
+ ```bash
175
+ pip install pyframe-xpy[distributed]
176
+ pytest -q
177
+ ```
178
+
179
+ ## Documentation
180
+
181
+ Canonical docs are in [`docs/documents`](docs/documents):
182
+
183
+ - [Overview](docs/documents/overview.md)
184
+ - [Features](docs/documents/features.md)
185
+ - [Getting Started](docs/documents/getting_started.md)
186
+ - [Installation](docs/documents/installation.md)
187
+ - [Tutorial: ETL Pipeline](docs/documents/tutorial_etl_pipeline.md)
188
+ - [Tutorial: NumPy NDArray Interop](docs/documents/tutorial_numpy_array.md)
189
+ - [Use Cases](docs/documents/use_cases.md)
190
+ - [Configuration Guide](docs/documents/configuration_guide.md)
191
+ - [Performance Test](docs/documents/performance_test.md)
192
+ - [Architecture](docs/documents/architecture.md)
193
+ - [API Reference](docs/documents/api_reference.md)
194
+ - [Roadmap](docs/documents/roadmap.md)
195
+ - [FAQ](docs/documents/faq.md)
196
+
197
+ ## Website (Docs UI)
198
+
199
+ The docs website lives in [`website`](website) (Next.js App Router).
200
+
201
+ Main docs routes:
202
+
203
+ - `http://localhost:3000/docs/features`
204
+ - `http://localhost:3000/docs/tutorial_etl_pipeline`
205
+ - `http://localhost:3000/docs/use_cases`
206
+ - `http://localhost:3000/docs/configuration_guide`
207
+ - `http://localhost:3000/docs/performance_test`
208
+
209
+ Run locally:
210
+
211
+ ```bash
212
+ cd website
213
+ npm install
214
+ npm run dev
215
+ ```
216
+
217
+ Production build:
218
+
219
+ ```bash
220
+ npm run build
221
+ npm run start
222
+ ```
223
+
224
+ ## Development
225
+
226
+ Install dev dependencies:
227
+
228
+ ```bash
229
+ pip install -e .[dev]
230
+ ```
231
+
232
+ Run tests:
233
+
234
+ ```bash
235
+ pytest
236
+ ```
237
+
238
+ ## Benchmarks
239
+
240
+ Benchmark code and generated reports are in [`benchmarks`](benchmarks).
241
+
242
+ Run the full benchmark suite (includes in-terminal progress bar and report generation):
243
+
244
+ ```bash
245
+ python3 -m benchmarks.benchmark_suite
246
+ ```
247
+
248
+ Run workload capability matrix checks:
249
+
250
+ ```bash
251
+ python3 -m benchmarks.check_framex_workloads
252
+ ```
253
+
254
+ Benchmark outputs are written to `benchmarks/results`:
255
+
256
+ - `benchmark_results.json`
257
+ - `benchmark_results.csv`
258
+ - `benchmark_report.md`
259
+ - `framex_workload_check.json`
260
+ - `performance_speedup.png`
261
+ - `parallel_processing_scaling.png`
262
+ - `multiprocessing_scaling.png`
263
+ - `memory_peak_rss.png`
264
+
265
+ ## Project Status
266
+
267
+ FrameX is pre-1.0 (`0.1.0`) and in active development.
268
+
269
+ - APIs are usable and documented
270
+ - compatibility/performance behavior will continue to evolve
271
+ - pin versions for production-critical workloads
272
+
273
+ ## License
274
+
275
+ [MIT](LICENSE)
@@ -0,0 +1,222 @@
1
+ # FrameX
2
+
3
+ FrameX is an Arrow-backed Python library for parallel dataframe and array processing on a single machine.
4
+
5
+ It combines:
6
+
7
+ - Pandas-like tabular APIs (`DataFrame`, `Series`, `GroupBy`)
8
+ - NumPy-compatible chunked arrays (`NDArray` with NumPy protocol support)
9
+ - Arrow-native storage/interop (`to_arrow`, Parquet/IPC I/O)
10
+ - Eager execution with optional lazy pipelines (`.lazy().collect()`)
11
+ - Runtime backends for local threads/processes plus optional Ray/Dask executors
12
+
13
+ ## Why FrameX
14
+
15
+ FrameX is aimed at local analytics workflows that are bigger than comfortable single-threaded scripts but do not yet require distributed infrastructure.
16
+
17
+ Typical fit:
18
+
19
+ - ETL and analytics pipelines on medium-to-large local datasets
20
+ - feature engineering workflows that mix table and array operations
21
+ - migration paths from Pandas scripts where API familiarity matters
22
+
23
+ ## Installation
24
+
25
+ From PyPI:
26
+
27
+ ```bash
28
+ pip install pyframe-xpy
29
+ ```
30
+
31
+ From source:
32
+
33
+ ```bash
34
+ git clone https://github.com/aeiwz/FrameX.git
35
+ cd FrameX
36
+ pip install -e .
37
+ ```
38
+
39
+ Requirements:
40
+
41
+ - Python `>=3.10`
42
+ - Core dependencies: `pyarrow`, `numpy`
43
+ - Optional compatibility: `pandas` (`pip install pyframe-xpy[pandas_compat]`)
44
+
45
+ ## Quick Start
46
+
47
+ ```python
48
+ import framex as fx
49
+
50
+ df = fx.DataFrame(
51
+ {
52
+ "group": ["a", "a", "b"],
53
+ "value": [10, 20, 30],
54
+ "is_refund": [False, True, False],
55
+ }
56
+ )
57
+
58
+ result = (
59
+ df.filter(~df["is_refund"])
60
+ .groupby("group")
61
+ .agg({"value": ["sum", "mean", "count"]})
62
+ .sort("value_sum", ascending=False)
63
+ )
64
+
65
+ print(result.to_pandas())
66
+ ```
67
+
68
+ ## Core API
69
+
70
+ Top-level imports:
71
+
72
+ ```python
73
+ import framex as fx
74
+ ```
75
+
76
+ Main objects and helpers:
77
+
78
+ - `fx.DataFrame`, `fx.Series`, `fx.Index`, `fx.LazyFrame`
79
+ - `fx.NDArray`, `fx.array(...)`
80
+ - `fx.read_parquet`, `fx.write_parquet`, `fx.read_ipc`, `fx.write_ipc`, `fx.read_csv`, `fx.write_csv`
81
+ - `fx.read_json`, `fx.write_json`, `fx.read_ndjson`, `fx.write_ndjson`
82
+ - `fx.read_file`, `fx.write_file` for format auto-detection
83
+
84
+ Compression:
85
+ - transparent extension-based compression for `read_file` / `write_file`
86
+ - supported wrappers: `.gz`, `.bz2`, `.xz`, `.zip`, and `.zst`/`.zstd` (when `zstandard` is installed)
87
+ - `fx.from_pandas`, `fx.from_dask`, `fx.from_ray`, `fx.from_dataframe`
88
+ - `fx.get_config`, `fx.set_backend`, `fx.set_workers`, `fx.set_serializer`, `fx.set_kernel_backend`
89
+ - `fx.set_array_backend` for auto/NumExpr/Numba/JAX/PyTorch/CuPy acceleration modes
90
+ - `fx.recommend_best_performance_config()` to inspect hardware-tuned settings
91
+ - `fx.auto_configure_hardware()` to apply best-performance config automatically
92
+ - `fx.StreamProcessor` for micro-batch streaming pipelines
93
+
94
+ Acceleration extras:
95
+
96
+ ```bash
97
+ pip install pyframe-xpy[accel] # numexpr + numba
98
+ pip install pyframe-xpy[gpu] # cupy (CUDA)
99
+ pip install pyframe-xpy[ml_accel] # jax + pytorch
100
+ pip install pyframe-xpy[pandas_fast] # modin backend
101
+ pip install pyframe-xpy[distributed] # Dask + Ray distributed/HPC backends
102
+ pip install zstandard # .zst/.zstd file compression
103
+ ```
104
+
105
+ Backend notes:
106
+
107
+ - `fx.set_backend("threads" | "processes" | "ray" | "dask" | "hpc")`
108
+ - Ray and Dask execution backends require their respective runtimes to be installed/available.
109
+ - HPC mode (`"hpc"`) uses cluster-oriented execution via Dask or Ray:
110
+ - `FRAMEX_HPC_ENGINE=dask|ray`
111
+ - `FRAMEX_DASK_SCHEDULER_ADDRESS=<tcp://...>` to connect existing Dask clusters
112
+ - `FRAMEX_RAY_ADDRESS=<ray://...>` to connect existing Ray clusters
113
+ - optional SLURM bootstrap: `FRAMEX_DASK_SLURM=1` (requires `dask-jobqueue`)
114
+
115
+ Test support notes:
116
+
117
+ - Some tests are optional-backend gated and intentionally `skipped` when deps are not installed.
118
+ - Typical skip reasons: missing `dask.distributed`, `dask.dataframe`, `ray`, or `ray.data`.
119
+ - Run full optional matrix locally:
120
+
121
+ ```bash
122
+ pip install pyframe-xpy[distributed]
123
+ pytest -q
124
+ ```
125
+
126
+ ## Documentation
127
+
128
+ Canonical docs are in [`docs/documents`](docs/documents):
129
+
130
+ - [Overview](docs/documents/overview.md)
131
+ - [Features](docs/documents/features.md)
132
+ - [Getting Started](docs/documents/getting_started.md)
133
+ - [Installation](docs/documents/installation.md)
134
+ - [Tutorial: ETL Pipeline](docs/documents/tutorial_etl_pipeline.md)
135
+ - [Tutorial: NumPy NDArray Interop](docs/documents/tutorial_numpy_array.md)
136
+ - [Use Cases](docs/documents/use_cases.md)
137
+ - [Configuration Guide](docs/documents/configuration_guide.md)
138
+ - [Performance Test](docs/documents/performance_test.md)
139
+ - [Architecture](docs/documents/architecture.md)
140
+ - [API Reference](docs/documents/api_reference.md)
141
+ - [Roadmap](docs/documents/roadmap.md)
142
+ - [FAQ](docs/documents/faq.md)
143
+
144
+ ## Website (Docs UI)
145
+
146
+ The docs website lives in [`website`](website) (Next.js App Router).
147
+
148
+ Main docs routes:
149
+
150
+ - `http://localhost:3000/docs/features`
151
+ - `http://localhost:3000/docs/tutorial_etl_pipeline`
152
+ - `http://localhost:3000/docs/use_cases`
153
+ - `http://localhost:3000/docs/configuration_guide`
154
+ - `http://localhost:3000/docs/performance_test`
155
+
156
+ Run locally:
157
+
158
+ ```bash
159
+ cd website
160
+ npm install
161
+ npm run dev
162
+ ```
163
+
164
+ Production build:
165
+
166
+ ```bash
167
+ npm run build
168
+ npm run start
169
+ ```
170
+
171
+ ## Development
172
+
173
+ Install dev dependencies:
174
+
175
+ ```bash
176
+ pip install -e .[dev]
177
+ ```
178
+
179
+ Run tests:
180
+
181
+ ```bash
182
+ pytest
183
+ ```
184
+
185
+ ## Benchmarks
186
+
187
+ Benchmark code and generated reports are in [`benchmarks`](benchmarks).
188
+
189
+ Run the full benchmark suite (includes in-terminal progress bar and report generation):
190
+
191
+ ```bash
192
+ python3 -m benchmarks.benchmark_suite
193
+ ```
194
+
195
+ Run workload capability matrix checks:
196
+
197
+ ```bash
198
+ python3 -m benchmarks.check_framex_workloads
199
+ ```
200
+
201
+ Benchmark outputs are written to `benchmarks/results`:
202
+
203
+ - `benchmark_results.json`
204
+ - `benchmark_results.csv`
205
+ - `benchmark_report.md`
206
+ - `framex_workload_check.json`
207
+ - `performance_speedup.png`
208
+ - `parallel_processing_scaling.png`
209
+ - `multiprocessing_scaling.png`
210
+ - `memory_peak_rss.png`
211
+
212
+ ## Project Status
213
+
214
+ FrameX is pre-1.0 (`0.1.0`) and in active development.
215
+
216
+ - APIs are usable and documented
217
+ - compatibility/performance behavior will continue to evolve
218
+ - pin versions for production-critical workloads
219
+
220
+ ## License
221
+
222
+ [MIT](LICENSE)
@@ -0,0 +1,110 @@
1
+ """FrameX: High-performance parallel dataframe and array processing."""
2
+
3
+ from framex._version import __version__
4
+ from framex.config import (
5
+ ArrayBackendType,
6
+ Config,
7
+ auto_configure_hardware,
8
+ config,
9
+ get_config,
10
+ recommend_best_performance_config,
11
+ set_array_backend,
12
+ set_backend,
13
+ set_kernel_backend,
14
+ set_serializer,
15
+ set_workers,
16
+ )
17
+ from framex.core.array import NDArray
18
+ from framex.core.dataframe import DataFrame, LazyFrame
19
+ from framex.core.dtypes import DType
20
+ from framex.core.index import Index
21
+ from framex.core.series import Series
22
+ from framex.interchange.dataframe_protocol import (
23
+ add_dataframe_protocol,
24
+ from_dask,
25
+ from_dataframe,
26
+ from_pandas,
27
+ from_ray,
28
+ )
29
+ from framex.io.arrow_ipc import read_ipc, write_ipc
30
+ from framex.io.csv import read_csv, write_csv
31
+ from framex.io.file import read_file, write_file
32
+ from framex.io.json import read_json, read_ndjson, write_json, write_ndjson
33
+ from framex.io.parquet import read_parquet, write_parquet
34
+ from framex.ops.window import rolling_mean, rolling_sum, rolling_std, rolling_min, rolling_max, top_k, rank
35
+ from framex.runtime.executor import detect_backend
36
+ from framex.runtime.streaming import StreamProcessor, StreamStats
37
+ from framex.compat import list_divergences, check_pandas_compat, DIVERGENCES
38
+
39
+ # Apply the __dataframe__ interchange protocol to DataFrame.
40
+ add_dataframe_protocol(DataFrame)
41
+
42
+
43
+ def array(
44
+ data: list | None = None,
45
+ *,
46
+ dtype: str | None = None,
47
+ chunks: int | None = None,
48
+ ) -> NDArray:
49
+ """Convenience constructor for ``NDArray``."""
50
+ return NDArray(data, dtype=dtype, chunks=chunks)
51
+
52
+
53
+ __all__ = [
54
+ "__version__",
55
+ # Config
56
+ "Config",
57
+ "ArrayBackendType",
58
+ "recommend_best_performance_config",
59
+ "auto_configure_hardware",
60
+ "config",
61
+ "get_config",
62
+ "set_array_backend",
63
+ "set_backend",
64
+ "set_kernel_backend",
65
+ "set_serializer",
66
+ "set_workers",
67
+ # Core types
68
+ "DataFrame",
69
+ "DType",
70
+ "Index",
71
+ "LazyFrame",
72
+ "NDArray",
73
+ "Series",
74
+ # Interchange
75
+ "from_dataframe",
76
+ "from_pandas",
77
+ "from_dask",
78
+ "from_ray",
79
+ # IO
80
+ "read_csv",
81
+ "read_file",
82
+ "read_ipc",
83
+ "read_json",
84
+ "read_ndjson",
85
+ "read_parquet",
86
+ "write_csv",
87
+ "write_file",
88
+ "write_ipc",
89
+ "write_json",
90
+ "write_ndjson",
91
+ "write_parquet",
92
+ # Convenience
93
+ "array",
94
+ # Window ops
95
+ "rolling_mean",
96
+ "rolling_sum",
97
+ "rolling_std",
98
+ "rolling_min",
99
+ "rolling_max",
100
+ "top_k",
101
+ "rank",
102
+ # Runtime
103
+ "detect_backend",
104
+ "StreamProcessor",
105
+ "StreamStats",
106
+ # Compatibility
107
+ "DIVERGENCES",
108
+ "check_pandas_compat",
109
+ "list_divergences",
110
+ ]
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,9 @@
1
+ """FrameX compute backends.
2
+
3
+ ``c_backend`` — C kernels compiled at first import via ctypes.
4
+ ``C_AVAILABLE`` — True when compilation succeeded.
5
+ """
6
+
7
+ from framex.backends.c_backend import C_AVAILABLE
8
+
9
+ __all__ = ["C_AVAILABLE"]