pycanopy 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycanopy-0.1.0/.cargo/config.toml +5 -0
- pycanopy-0.1.0/.github/workflows/CI.yml +37 -0
- pycanopy-0.1.0/.github/workflows/release.yml +84 -0
- pycanopy-0.1.0/.gitignore +38 -0
- pycanopy-0.1.0/CLAUDE.md +129 -0
- pycanopy-0.1.0/Cargo.lock +1028 -0
- pycanopy-0.1.0/Cargo.toml +33 -0
- pycanopy-0.1.0/LICENSE +21 -0
- pycanopy-0.1.0/PKG-INFO +91 -0
- pycanopy-0.1.0/README.md +65 -0
- pycanopy-0.1.0/benches/calibrate.rs +1 -0
- pycanopy-0.1.0/benches/index_comparison.rs +1 -0
- pycanopy-0.1.0/examples/basic_nearest.py +0 -0
- pycanopy-0.1.0/examples/range_query.py +0 -0
- pycanopy-0.1.0/pyproject.toml +66 -0
- pycanopy-0.1.0/python/pycanopy/__init__.py +4 -0
- pycanopy-0.1.0/python/pycanopy/engine.py +163 -0
- pycanopy-0.1.0/python/pycanopy/query.py +18 -0
- pycanopy-0.1.0/rustfmt.toml +3 -0
- pycanopy-0.1.0/src/index/brute.rs +152 -0
- pycanopy-0.1.0/src/index/grid.rs +287 -0
- pycanopy-0.1.0/src/index/kdtree.rs +161 -0
- pycanopy-0.1.0/src/index/mod.rs +35 -0
- pycanopy-0.1.0/src/index/rtree.rs +114 -0
- pycanopy-0.1.0/src/lib.rs +186 -0
- pycanopy-0.1.0/src/planner/calibration.rs +19 -0
- pycanopy-0.1.0/src/planner/cost.rs +86 -0
- pycanopy-0.1.0/src/planner/mod.rs +3 -0
- pycanopy-0.1.0/src/planner/selector.rs +139 -0
- pycanopy-0.1.0/src/query/join.rs +1 -0
- pycanopy-0.1.0/src/query/mod.rs +4 -0
- pycanopy-0.1.0/src/query/nearest.rs +14 -0
- pycanopy-0.1.0/src/query/range.rs +119 -0
- pycanopy-0.1.0/src/query/types.rs +17 -0
- pycanopy-0.1.0/src/stats/collector.rs +223 -0
- pycanopy-0.1.0/src/stats/mod.rs +2 -0
- pycanopy-0.1.0/src/stats/types.rs +91 -0
- pycanopy-0.1.0/tests/python/test_engine.py +168 -0
- pycanopy-0.1.0/tests/python/test_query.py +100 -0
- pycanopy-0.1.0/tests/rust/index_tests.rs +172 -0
- pycanopy-0.1.0/tests/rust/planner_tests.rs +133 -0
- pycanopy-0.1.0/tests/rust/stats_tests.rs +76 -0
- pycanopy-0.1.0/tests/rust.rs +8 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
rust:
|
|
11
|
+
name: Rust tests
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
16
|
+
- uses: Swatinem/rust-cache@v2
|
|
17
|
+
- run: cargo test
|
|
18
|
+
- run: cargo fmt --check
|
|
19
|
+
- run: cargo clippy -- -D warnings
|
|
20
|
+
|
|
21
|
+
python:
|
|
22
|
+
name: Python tests
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
27
|
+
- uses: Swatinem/rust-cache@v2
|
|
28
|
+
- uses: actions/setup-python@v5
|
|
29
|
+
with:
|
|
30
|
+
python-version: "3.12"
|
|
31
|
+
- name: Install uv
|
|
32
|
+
uses: astral-sh/setup-uv@v4
|
|
33
|
+
- run: uv sync --group dev
|
|
34
|
+
- run: uv run maturin develop
|
|
35
|
+
- run: uv run ruff check python/ tests/python/
|
|
36
|
+
- run: uv run ruff format --check python/ tests/python/
|
|
37
|
+
- run: uv run pytest
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
linux:
|
|
10
|
+
name: Linux wheels
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
target: [x86_64, aarch64]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- uses: PyO3/maturin-action@v1
|
|
18
|
+
with:
|
|
19
|
+
target: ${{ matrix.target }}
|
|
20
|
+
args: --release --out dist --features pyo3/abi3-py39
|
|
21
|
+
manylinux: auto
|
|
22
|
+
- uses: actions/upload-artifact@v4
|
|
23
|
+
with:
|
|
24
|
+
name: wheels-linux-${{ matrix.target }}
|
|
25
|
+
path: dist
|
|
26
|
+
|
|
27
|
+
macos:
|
|
28
|
+
name: macOS wheels
|
|
29
|
+
runs-on: macos-latest
|
|
30
|
+
strategy:
|
|
31
|
+
matrix:
|
|
32
|
+
target: [x86_64, aarch64]
|
|
33
|
+
steps:
|
|
34
|
+
- uses: actions/checkout@v4
|
|
35
|
+
- uses: PyO3/maturin-action@v1
|
|
36
|
+
with:
|
|
37
|
+
target: ${{ matrix.target }}
|
|
38
|
+
args: --release --out dist --features pyo3/abi3-py39
|
|
39
|
+
- uses: actions/upload-artifact@v4
|
|
40
|
+
with:
|
|
41
|
+
name: wheels-macos-${{ matrix.target }}
|
|
42
|
+
path: dist
|
|
43
|
+
|
|
44
|
+
windows:
|
|
45
|
+
name: Windows wheels
|
|
46
|
+
runs-on: windows-latest
|
|
47
|
+
steps:
|
|
48
|
+
- uses: actions/checkout@v4
|
|
49
|
+
- uses: PyO3/maturin-action@v1
|
|
50
|
+
with:
|
|
51
|
+
args: --release --out dist --features pyo3/abi3-py39
|
|
52
|
+
- uses: actions/upload-artifact@v4
|
|
53
|
+
with:
|
|
54
|
+
name: wheels-windows-x86_64
|
|
55
|
+
path: dist
|
|
56
|
+
|
|
57
|
+
sdist:
|
|
58
|
+
name: Source distribution
|
|
59
|
+
runs-on: ubuntu-latest
|
|
60
|
+
steps:
|
|
61
|
+
- uses: actions/checkout@v4
|
|
62
|
+
- uses: PyO3/maturin-action@v1
|
|
63
|
+
with:
|
|
64
|
+
command: sdist
|
|
65
|
+
args: --out dist
|
|
66
|
+
- uses: actions/upload-artifact@v4
|
|
67
|
+
with:
|
|
68
|
+
name: wheels-sdist
|
|
69
|
+
path: dist
|
|
70
|
+
|
|
71
|
+
publish:
|
|
72
|
+
name: Publish to PyPI
|
|
73
|
+
runs-on: ubuntu-latest
|
|
74
|
+
needs: [linux, macos, windows, sdist]
|
|
75
|
+
environment: pypi
|
|
76
|
+
permissions:
|
|
77
|
+
id-token: write
|
|
78
|
+
steps:
|
|
79
|
+
- uses: actions/download-artifact@v4
|
|
80
|
+
with:
|
|
81
|
+
pattern: wheels-*
|
|
82
|
+
merge-multiple: true
|
|
83
|
+
path: dist
|
|
84
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Rust
|
|
2
|
+
target/
|
|
3
|
+
Cargo.lock
|
|
4
|
+
|
|
5
|
+
# Python
|
|
6
|
+
__pycache__/
|
|
7
|
+
*.py[cod]
|
|
8
|
+
*.pyo
|
|
9
|
+
*.pyd
|
|
10
|
+
*.so
|
|
11
|
+
*.dylib
|
|
12
|
+
*.egg-info/
|
|
13
|
+
dist/
|
|
14
|
+
build/
|
|
15
|
+
.eggs/
|
|
16
|
+
*.egg
|
|
17
|
+
wheels/
|
|
18
|
+
|
|
19
|
+
# Virtual environments
|
|
20
|
+
.venv/
|
|
21
|
+
venv/
|
|
22
|
+
env/
|
|
23
|
+
|
|
24
|
+
# maturin
|
|
25
|
+
*.whl
|
|
26
|
+
|
|
27
|
+
# pytest
|
|
28
|
+
.pytest_cache/
|
|
29
|
+
|
|
30
|
+
# ruff
|
|
31
|
+
.ruff_cache/
|
|
32
|
+
|
|
33
|
+
# IDE
|
|
34
|
+
.vscode/
|
|
35
|
+
.idea/
|
|
36
|
+
*.swp
|
|
37
|
+
*.swo
|
|
38
|
+
.DS_Store
|
pycanopy-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# PyCanopy
|
|
2
|
+
|
|
3
|
+
Geospatial query engine with dynamic index selection. Rust core, Python bindings via PyO3/Maturin.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## What This Is
|
|
8
|
+
|
|
9
|
+
A standalone Python-callable geospatial query engine that automatically selects the best spatial index based on dataset statistics and query type. The novel part is the query planner — the index implementations wrap existing crates.
|
|
10
|
+
|
|
11
|
+
Not a geometry operations library. Not a distributed engine. Scope: local, single-node, in-memory, pure spatial lookup (no attribute storage).
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Key Decisions
|
|
16
|
+
|
|
17
|
+
**Language:** Rust core + PyO3/Maturin. R-tree and KD-tree use `geo-index` packed immutable implementations. Grid written from scratch. `rstar` not used.
|
|
18
|
+
|
|
19
|
+
**Memory:** Hard cap at ~10GB. Fail with a clear error above the threshold.
|
|
20
|
+
|
|
21
|
+
**Wire format:** GeoArrow at the Python→Rust boundary (Arrow C Data Interface), converted to `Vec<Geometry<f64>>` (geo-types) at load time. geo-types used throughout Rust core.
|
|
22
|
+
|
|
23
|
+
**Query model:** Pure spatial lookup. Returns indices into the caller's dataset. No attribute storage or compound predicates in v1.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Query Planner Flow
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
query arrives
|
|
31
|
+
│
|
|
32
|
+
├─ size check → error if > 10GB
|
|
33
|
+
│
|
|
34
|
+
├─ selectivity estimate (query_bbox_area / total_extent_area)
|
|
35
|
+
│ > 0.5 or k/N > 0.1 → bypass index, full scan
|
|
36
|
+
│
|
|
37
|
+
├─ index selection
|
|
38
|
+
│ N < 500 → brute force
|
|
39
|
+
│ points + kNN → KD-tree
|
|
40
|
+
│ points + uniform + range → grid
|
|
41
|
+
│ points + clustered + range → KD-tree
|
|
42
|
+
│ polygons / mixed → R-tree
|
|
43
|
+
│ N > 1M + uniform → grid
|
|
44
|
+
│
|
|
45
|
+
├─ execution strategy
|
|
46
|
+
│ range → two-phase: MBR candidates → exact check
|
|
47
|
+
│ kNN → density-based radius estimate → bounded traversal
|
|
48
|
+
│ contains → two-phase
|
|
49
|
+
│
|
|
50
|
+
├─ parallelism
|
|
51
|
+
│ batch queries or N > 100K → rayon
|
|
52
|
+
│
|
|
53
|
+
└─ result pre-allocation from selectivity estimate
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Query Types (v1)
|
|
59
|
+
|
|
60
|
+
```rust
|
|
61
|
+
pub enum Query {
|
|
62
|
+
Knn { point: Point, k: usize, approximate: bool },
|
|
63
|
+
Range { bbox: Rect },
|
|
64
|
+
Contains { point: Point },
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Implementation Order
|
|
71
|
+
|
|
72
|
+
1. `Cargo.toml` + `pyproject.toml`
|
|
73
|
+
2. `src/stats/types.rs` — `DatasetStats`, `GeometryKind`
|
|
74
|
+
3. `src/stats/collector.rs`
|
|
75
|
+
4. `src/index/mod.rs` — `SpatialIndex` trait ✓
|
|
76
|
+
5. `src/index/brute.rs`
|
|
77
|
+
6. `src/index/rtree.rs` — wraps geo-index packed R-tree
|
|
78
|
+
7. `src/index/kdtree.rs` — wraps geo-index packed KD-tree
|
|
79
|
+
8. `src/index/grid.rs`
|
|
80
|
+
9. `src/planner/cost.rs`
|
|
81
|
+
10. `src/planner/calibration.rs`
|
|
82
|
+
11. `src/planner/selector.rs`
|
|
83
|
+
12. `src/query/types.rs`
|
|
84
|
+
13. `src/query/nearest.rs` + `range.rs`
|
|
85
|
+
14. `src/lib.rs` — PyO3 bindings
|
|
86
|
+
15. `python/` layer
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Deferred
|
|
91
|
+
|
|
92
|
+
- Spatial joins + join order (v2)
|
|
93
|
+
- Predicate pushdown (v2)
|
|
94
|
+
- Histogram-based selectivity estimation (v2, v1 uses `query_area / total_extent`)
|
|
95
|
+
- Learned/ML planner (v2)
|
|
96
|
+
- Out-of-core / mmap support (v2)
|
|
97
|
+
- **Delta buffer for incremental inserts (v2):** The packed immutable indices (geo-index RTree/KDTree) cannot accept point inserts after construction. v1 is load-once — recreate the Engine when data changes. v2 should add a write buffer: accumulate inserts into a small `delta: Vec<Geometry>`, answer queries against both the main index and a brute-force scan of the delta, and flush (rebuild the full index) when `delta.len()` exceeds a threshold (e.g. 5% of N).
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Style Guide
|
|
102
|
+
|
|
103
|
+
### Period convention (Rust and Python)
|
|
104
|
+
|
|
105
|
+
- Isolated single-line doc comment or docstring: **no period**
|
|
106
|
+
- Single-line within a multi-line block: **period**
|
|
107
|
+
- Last line of a multi-line block: **period**
|
|
108
|
+
|
|
109
|
+
Never use em dashes in comments or docstrings.
|
|
110
|
+
|
|
111
|
+
### Rust
|
|
112
|
+
|
|
113
|
+
- `///` rustdoc on all `pub` items
|
|
114
|
+
- Single-line: `/// Short description`
|
|
115
|
+
- Multi-line: summary line ends with period, body lines end with period
|
|
116
|
+
- Formatting enforced by `rustfmt` (`rustfmt.toml` at repo root)
|
|
117
|
+
- Linting via `cargo clippy`
|
|
118
|
+
|
|
119
|
+
### Python
|
|
120
|
+
|
|
121
|
+
- Google-style docstrings (`Args:` / `Returns:` sections, not NumPy `---` separators)
|
|
122
|
+
- Single-line: `"""Short description"""`
|
|
123
|
+
- Multi-line: summary line ends with period, `Args`/`Returns` entries end with period
|
|
124
|
+
- Hard dependencies imported at module level
|
|
125
|
+
- Optional dependencies imported inside the function that needs them (not in `try/except` at module level)
|
|
126
|
+
- Formatting and linting enforced by `ruff` (configured in `pyproject.toml`)
|
|
127
|
+
|
|
128
|
+
## Notes
|
|
129
|
+
|