pycanopy 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pycanopy-0.1.0/.cargo/config.toml +5 -0
  2. pycanopy-0.1.0/.github/workflows/CI.yml +37 -0
  3. pycanopy-0.1.0/.github/workflows/release.yml +84 -0
  4. pycanopy-0.1.0/.gitignore +38 -0
  5. pycanopy-0.1.0/CLAUDE.md +129 -0
  6. pycanopy-0.1.0/Cargo.lock +1028 -0
  7. pycanopy-0.1.0/Cargo.toml +33 -0
  8. pycanopy-0.1.0/LICENSE +21 -0
  9. pycanopy-0.1.0/PKG-INFO +91 -0
  10. pycanopy-0.1.0/README.md +65 -0
  11. pycanopy-0.1.0/benches/calibrate.rs +1 -0
  12. pycanopy-0.1.0/benches/index_comparison.rs +1 -0
  13. pycanopy-0.1.0/examples/basic_nearest.py +0 -0
  14. pycanopy-0.1.0/examples/range_query.py +0 -0
  15. pycanopy-0.1.0/pyproject.toml +66 -0
  16. pycanopy-0.1.0/python/pycanopy/__init__.py +4 -0
  17. pycanopy-0.1.0/python/pycanopy/engine.py +163 -0
  18. pycanopy-0.1.0/python/pycanopy/query.py +18 -0
  19. pycanopy-0.1.0/rustfmt.toml +3 -0
  20. pycanopy-0.1.0/src/index/brute.rs +152 -0
  21. pycanopy-0.1.0/src/index/grid.rs +287 -0
  22. pycanopy-0.1.0/src/index/kdtree.rs +161 -0
  23. pycanopy-0.1.0/src/index/mod.rs +35 -0
  24. pycanopy-0.1.0/src/index/rtree.rs +114 -0
  25. pycanopy-0.1.0/src/lib.rs +186 -0
  26. pycanopy-0.1.0/src/planner/calibration.rs +19 -0
  27. pycanopy-0.1.0/src/planner/cost.rs +86 -0
  28. pycanopy-0.1.0/src/planner/mod.rs +3 -0
  29. pycanopy-0.1.0/src/planner/selector.rs +139 -0
  30. pycanopy-0.1.0/src/query/join.rs +1 -0
  31. pycanopy-0.1.0/src/query/mod.rs +4 -0
  32. pycanopy-0.1.0/src/query/nearest.rs +14 -0
  33. pycanopy-0.1.0/src/query/range.rs +119 -0
  34. pycanopy-0.1.0/src/query/types.rs +17 -0
  35. pycanopy-0.1.0/src/stats/collector.rs +223 -0
  36. pycanopy-0.1.0/src/stats/mod.rs +2 -0
  37. pycanopy-0.1.0/src/stats/types.rs +91 -0
  38. pycanopy-0.1.0/tests/python/test_engine.py +168 -0
  39. pycanopy-0.1.0/tests/python/test_query.py +100 -0
  40. pycanopy-0.1.0/tests/rust/index_tests.rs +172 -0
  41. pycanopy-0.1.0/tests/rust/planner_tests.rs +133 -0
  42. pycanopy-0.1.0/tests/rust/stats_tests.rs +76 -0
  43. pycanopy-0.1.0/tests/rust.rs +8 -0
@@ -0,0 +1,5 @@
1
+ [target.aarch64-apple-darwin]
2
+ rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"]
3
+
4
+ [target.x86_64-apple-darwin]
5
+ rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"]
@@ -0,0 +1,37 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ rust:
11
+ name: Rust tests
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: dtolnay/rust-toolchain@stable
16
+ - uses: Swatinem/rust-cache@v2
17
+ - run: cargo test
18
+ - run: cargo fmt --check
19
+ - run: cargo clippy -- -D warnings
20
+
21
+ python:
22
+ name: Python tests
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: dtolnay/rust-toolchain@stable
27
+ - uses: Swatinem/rust-cache@v2
28
+ - uses: actions/setup-python@v5
29
+ with:
30
+ python-version: "3.12"
31
+ - name: Install uv
32
+ uses: astral-sh/setup-uv@v4
33
+ - run: uv sync --group dev
34
+ - run: uv run maturin develop
35
+ - run: uv run ruff check python/ tests/python/
36
+ - run: uv run ruff format --check python/ tests/python/
37
+ - run: uv run pytest
@@ -0,0 +1,84 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ linux:
10
+ name: Linux wheels
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ target: [x86_64, aarch64]
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: PyO3/maturin-action@v1
18
+ with:
19
+ target: ${{ matrix.target }}
20
+ args: --release --out dist --features pyo3/abi3-py39
21
+ manylinux: auto
22
+ - uses: actions/upload-artifact@v4
23
+ with:
24
+ name: wheels-linux-${{ matrix.target }}
25
+ path: dist
26
+
27
+ macos:
28
+ name: macOS wheels
29
+ runs-on: macos-latest
30
+ strategy:
31
+ matrix:
32
+ target: [x86_64, aarch64]
33
+ steps:
34
+ - uses: actions/checkout@v4
35
+ - uses: PyO3/maturin-action@v1
36
+ with:
37
+ target: ${{ matrix.target }}
38
+ args: --release --out dist --features pyo3/abi3-py39
39
+ - uses: actions/upload-artifact@v4
40
+ with:
41
+ name: wheels-macos-${{ matrix.target }}
42
+ path: dist
43
+
44
+ windows:
45
+ name: Windows wheels
46
+ runs-on: windows-latest
47
+ steps:
48
+ - uses: actions/checkout@v4
49
+ - uses: PyO3/maturin-action@v1
50
+ with:
51
+ args: --release --out dist --features pyo3/abi3-py39
52
+ - uses: actions/upload-artifact@v4
53
+ with:
54
+ name: wheels-windows-x86_64
55
+ path: dist
56
+
57
+ sdist:
58
+ name: Source distribution
59
+ runs-on: ubuntu-latest
60
+ steps:
61
+ - uses: actions/checkout@v4
62
+ - uses: PyO3/maturin-action@v1
63
+ with:
64
+ command: sdist
65
+ args: --out dist
66
+ - uses: actions/upload-artifact@v4
67
+ with:
68
+ name: wheels-sdist
69
+ path: dist
70
+
71
+ publish:
72
+ name: Publish to PyPI
73
+ runs-on: ubuntu-latest
74
+ needs: [linux, macos, windows, sdist]
75
+ environment: pypi
76
+ permissions:
77
+ id-token: write
78
+ steps:
79
+ - uses: actions/download-artifact@v4
80
+ with:
81
+ pattern: wheels-*
82
+ merge-multiple: true
83
+ path: dist
84
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,38 @@
1
+ # Rust
2
+ target/
3
+ Cargo.lock
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *.pyo
9
+ *.pyd
10
+ *.so
11
+ *.dylib
12
+ *.egg-info/
13
+ dist/
14
+ build/
15
+ .eggs/
16
+ *.egg
17
+ wheels/
18
+
19
+ # Virtual environments
20
+ .venv/
21
+ venv/
22
+ env/
23
+
24
+ # maturin
25
+ *.whl
26
+
27
+ # pytest
28
+ .pytest_cache/
29
+
30
+ # ruff
31
+ .ruff_cache/
32
+
33
+ # IDE
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+ *.swo
38
+ .DS_Store
@@ -0,0 +1,129 @@
1
+ # PyCanopy
2
+
3
+ Geospatial query engine with dynamic index selection. Rust core, Python bindings via PyO3/Maturin.
4
+
5
+ ---
6
+
7
+ ## What This Is
8
+
9
+ A standalone Python-callable geospatial query engine that automatically selects the best spatial index based on dataset statistics and query type. The novel part is the query planner — the index implementations wrap existing crates.
10
+
11
+ Not a geometry operations library. Not a distributed engine. Scope: local, single-node, in-memory, pure spatial lookup (no attribute storage).
12
+
13
+ ---
14
+
15
+ ## Key Decisions
16
+
17
+ **Language:** Rust core + PyO3/Maturin. R-tree and KD-tree use `geo-index` packed immutable implementations. Grid written from scratch. `rstar` not used.
18
+
19
+ **Memory:** Hard cap at ~10GB. Fail with a clear error above the threshold.
20
+
21
+ **Wire format:** GeoArrow at the Python→Rust boundary (Arrow C Data Interface), converted to `Vec<Geometry<f64>>` (geo-types) at load time. geo-types used throughout Rust core.
22
+
23
+ **Query model:** Pure spatial lookup. Returns indices into the caller's dataset. No attribute storage or compound predicates in v1.
24
+
25
+ ---
26
+
27
+ ## Query Planner Flow
28
+
29
+ ```
30
+ query arrives
31
+
32
+ ├─ size check → error if > 10GB
33
+
34
+ ├─ selectivity estimate (query_bbox_area / total_extent_area)
35
+ │ > 0.5 or k/N > 0.1 → bypass index, full scan
36
+
37
+ ├─ index selection
38
+ │ N < 500 → brute force
39
+ │ points + kNN → KD-tree
40
+ │ points + uniform + range → grid
41
+ │ points + clustered + range → KD-tree
42
+ │ polygons / mixed → R-tree
43
+ │ N > 1M + uniform → grid
44
+
45
+ ├─ execution strategy
46
+ │ range → two-phase: MBR candidates → exact check
47
+ │ kNN → density-based radius estimate → bounded traversal
48
+ │ contains → two-phase
49
+
50
+ ├─ parallelism
51
+ │ batch queries or N > 100K → rayon
52
+
53
+ └─ result pre-allocation from selectivity estimate
54
+ ```
55
+
56
+ ---
57
+
58
+ ## Query Types (v1)
59
+
60
+ ```rust
61
+ pub enum Query {
62
+ Knn { point: Point, k: usize, approximate: bool },
63
+ Range { bbox: Rect },
64
+ Contains { point: Point },
65
+ }
66
+ ```
67
+
68
+ ---
69
+
70
+ ## Implementation Order
71
+
72
+ 1. `Cargo.toml` + `pyproject.toml`
73
+ 2. `src/stats/types.rs` — `DatasetStats`, `GeometryKind`
74
+ 3. `src/stats/collector.rs`
75
+ 4. `src/index/mod.rs` — `SpatialIndex` trait ✓
76
+ 5. `src/index/brute.rs`
77
+ 6. `src/index/rtree.rs` — wraps geo-index packed R-tree
78
+ 7. `src/index/kdtree.rs` — wraps geo-index packed KD-tree
79
+ 8. `src/index/grid.rs`
80
+ 9. `src/planner/cost.rs`
81
+ 10. `src/planner/calibration.rs`
82
+ 11. `src/planner/selector.rs`
83
+ 12. `src/query/types.rs`
84
+ 13. `src/query/nearest.rs` + `range.rs`
85
+ 14. `src/lib.rs` — PyO3 bindings
86
+ 15. `python/` layer
87
+
88
+ ---
89
+
90
+ ## Deferred
91
+
92
+ - Spatial joins + join order (v2)
93
+ - Predicate pushdown (v2)
94
+ - Histogram-based selectivity estimation (v2, v1 uses `query_area / total_extent`)
95
+ - Learned/ML planner (v2)
96
+ - Out-of-core / mmap support (v2)
97
+ - **Delta buffer for incremental inserts (v2):** The packed immutable indices (geo-index RTree/KDTree) cannot accept point inserts after construction. v1 is load-once — recreate the Engine when data changes. v2 should add a write buffer: accumulate inserts into a small `delta: Vec<Geometry>`, answer queries against both the main index and a brute-force scan of the delta, and flush (rebuild the full index) when `delta.len()` exceeds a threshold (e.g. 5% of N).
98
+
99
+ ---
100
+
101
+ ## Style Guide
102
+
103
+ ### Period convention (Rust and Python)
104
+
105
+ - Isolated single-line doc comment or docstring: **no period**
106
+ - Single-line within a multi-line block: **period**
107
+ - Last line of a multi-line block: **period**
108
+
109
+ Never use em dashes in comments or docstrings.
110
+
111
+ ### Rust
112
+
113
+ - `///` rustdoc on all `pub` items
114
+ - Single-line: `/// Short description`
115
+ - Multi-line: summary line ends with period, body lines end with period
116
+ - Formatting enforced by `rustfmt` (`rustfmt.toml` at repo root)
117
+ - Linting via `cargo clippy`
118
+
119
+ ### Python
120
+
121
+ - Google-style docstrings (`Args:` / `Returns:` sections, not NumPy `---` separators)
122
+ - Single-line: `"""Short description"""`
123
+ - Multi-line: summary line ends with period, `Args`/`Returns` entries end with period
124
+ - Hard dependencies imported at module level
125
+ - Optional dependencies imported inside the function that needs them (not in `try/except` at module level)
126
+ - Formatting and linting enforced by `ruff` (configured in `pyproject.toml`)
127
+
128
+ ## Notes
129
+