portolan-cli 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.github/workflows/nightly.yml +2 -2
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.pre-commit-config.yaml +1 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/CHANGELOG.md +6 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/CLAUDE.md +48 -31
- portolan_cli-0.1.3/PKG-INFO +130 -0
- portolan_cli-0.1.3/README.md +83 -0
- portolan_cli-0.1.3/ROADMAP.md +252 -0
- portolan_cli-0.1.3/context/architecture.md +183 -0
- portolan_cli-0.1.3/context/shared/adr/0003-plugin-architecture.md +83 -0
- portolan_cli-0.1.3/context/shared/adr/0004-iceberg-as-plugin.md +79 -0
- portolan_cli-0.1.3/context/shared/adr/0005-versions-json-source-of-truth.md +92 -0
- portolan_cli-0.1.3/context/shared/adr/0006-remote-ownership-model.md +71 -0
- portolan_cli-0.1.3/context/shared/adr/0007-cli-wraps-api.md +95 -0
- portolan_cli-0.1.3/context/shared/adr/0008-pipx-for-installation.md +110 -0
- portolan_cli-0.1.3/docs/BRANDING.md +109 -0
- portolan_cli-0.1.3/docs/assets/images/cover.png +0 -0
- portolan_cli-0.1.3/docs/assets/images/icon-white.svg +5 -0
- portolan_cli-0.1.3/docs/assets/images/icon.svg +11 -0
- portolan_cli-0.1.3/docs/assets/images/logo.png +0 -0
- portolan_cli-0.1.3/docs/assets/images/logo.svg +1 -0
- portolan_cli-0.1.3/docs/assets/images/social-card.png +0 -0
- portolan_cli-0.1.3/docs/assets/stylesheets/extra.css +192 -0
- portolan_cli-0.1.3/docs/changelog.md +1 -0
- portolan_cli-0.1.3/docs/index.md +170 -0
- portolan_cli-0.1.3/docs/roadmap.md +1 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/mkdocs.yml +27 -5
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/pyproject.toml +2 -2
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/uv.lock +1 -1
- portolan_cli-0.1.2/PKG-INFO +0 -49
- portolan_cli-0.1.2/README.md +0 -2
- portolan_cli-0.1.2/context/architecture.md +0 -0
- portolan_cli-0.1.2/docs/changelog.md +0 -0
- portolan_cli-0.1.2/docs/index.md +0 -0
- portolan_cli-0.1.2/docs/roadmap.md +0 -30
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.claude/hooks/post-bash-remind.sh +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.claude/hooks/pre-read-check.sh +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.claude/hooks/prompt-inject.sh +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.coderabbit.yaml +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.github/CODEOWNERS +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.github/dependabot.yml +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.github/pull_request_template.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.github/workflows/ci.yml +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.github/workflows/docs.yml +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.github/workflows/release.yml +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.gitignore +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/.python-version +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/LICENSE +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/SECURITY.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/context/shared/adr/0000-template.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/context/shared/adr/0001-agentic-first-development.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/context/shared/adr/0002-click-for-cli.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/context/shared/documentation/ci.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/context/shared/documentation/distill-mcp.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/context/shared/known-issues/example.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/docs/contributing.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/portolan_cli/__init__.py +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/portolan_cli/cli.py +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/portolan_cli/output.py +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/tests/conftest.py +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/tests/specs/README.md +0 -0
- {portolan_cli-0.1.2 → portolan_cli-0.1.3}/tests/test_placeholder.py +0 -0
|
@@ -65,7 +65,7 @@ jobs:
|
|
|
65
65
|
run: uv run mutmut html || true
|
|
66
66
|
|
|
67
67
|
- name: Upload mutation report
|
|
68
|
-
uses: actions/upload-artifact@
|
|
68
|
+
uses: actions/upload-artifact@v6
|
|
69
69
|
with:
|
|
70
70
|
name: mutation-report
|
|
71
71
|
path: html/
|
|
@@ -113,7 +113,7 @@ jobs:
|
|
|
113
113
|
fi
|
|
114
114
|
|
|
115
115
|
- name: Upload benchmark results
|
|
116
|
-
uses: actions/upload-artifact@
|
|
116
|
+
uses: actions/upload-artifact@v6
|
|
117
117
|
with:
|
|
118
118
|
name: benchmark-results
|
|
119
119
|
path: benchmark-results.json
|
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# Portolan CLI - Development Guide
|
|
2
2
|
|
|
3
|
+
## What is Portolan?
|
|
4
|
+
|
|
5
|
+
Portolan is a CLI for publishing and managing **cloud-native geospatial data catalogs**. It orchestrates format conversion (GeoParquet, COG), versioning, and sync to object storage (S3, GCS, Azure)—no running servers, just static files.
|
|
6
|
+
|
|
7
|
+
**Key concepts:**
|
|
8
|
+
- **STAC** (SpatioTemporal Asset Catalog) — The catalog metadata format
|
|
9
|
+
- **GeoParquet** — Cloud-optimized vector data (columnar, spatial indexing)
|
|
10
|
+
- **COG** (Cloud-Optimized GeoTIFF) — Cloud-optimized raster data (HTTP range requests)
|
|
11
|
+
- **versions.json** — Single source of truth for version history, sync state, and checksums
|
|
12
|
+
|
|
13
|
+
Portolan doesn't do the heavy lifting—it orchestrates libraries like `geoparquet-io` and `rio-cogeo`.
|
|
14
|
+
|
|
15
|
+
**Key dependencies (check these repos for API docs):**
|
|
16
|
+
- [geoparquet-io](https://github.com/geoparquet/geoparquet-io) — Vector format conversion
|
|
17
|
+
- [gpio-pmtiles](https://github.com/geoparquet-io/gpio-pmtiles) — PMTiles generation from GeoParquet
|
|
18
|
+
- [rio-cogeo](https://github.com/cogeotiff/rio-cogeo) — Raster conversion to COG
|
|
19
|
+
|
|
3
20
|
## Guiding Principle
|
|
4
21
|
|
|
5
22
|
AI agents will write most of the code. Human review does not scale to match AI output volume. Therefore: every quality gate must be automated, every convention must be enforceable, and tests must be verified to actually test something.
|
|
@@ -8,7 +25,9 @@ AI agents will write most of the code. Human review does not scale to match AI o
|
|
|
8
25
|
|
|
9
26
|
| Resource | Location |
|
|
10
27
|
|----------|----------|
|
|
28
|
+
| **Roadmap** | `ROADMAP.md` |
|
|
11
29
|
| Contributing guide | `docs/contributing.md` |
|
|
30
|
+
| Architecture | `context/architecture.md` |
|
|
12
31
|
| CI/CD documentation | `context/shared/documentation/ci.md` |
|
|
13
32
|
| Distill MCP tools | `context/shared/documentation/distill-mcp.md` |
|
|
14
33
|
| ADRs | `context/shared/adr/` |
|
|
@@ -16,6 +35,19 @@ AI agents will write most of the code. Human review does not scale to match AI o
|
|
|
16
35
|
|
|
17
36
|
**Target Python version:** 3.10+ (matches geoparquet-io dependency)
|
|
18
37
|
|
|
38
|
+
### ADR Index
|
|
39
|
+
|
|
40
|
+
| ADR | Decision |
|
|
41
|
+
|-----|----------|
|
|
42
|
+
| [0001](context/shared/adr/0001-agentic-first-development.md) | Agentic-first: automate all quality gates, TDD mandatory |
|
|
43
|
+
| [0002](context/shared/adr/0002-click-for-cli.md) | Click for CLI framework |
|
|
44
|
+
| [0003](context/shared/adr/0003-plugin-architecture.md) | Plugin architecture for formats (GeoParquet/COG core, others optional) |
|
|
45
|
+
| [0004](context/shared/adr/0004-iceberg-as-plugin.md) | Iceberg as plugin, STAC remains catalog layer |
|
|
46
|
+
| [0005](context/shared/adr/0005-versions-json-source-of-truth.md) | versions.json as single source of truth |
|
|
47
|
+
| [0006](context/shared/adr/0006-remote-ownership-model.md) | Portolan owns bucket contents (no external edits) |
|
|
48
|
+
| [0007](context/shared/adr/0007-cli-wraps-api.md) | CLI wraps Python API (all logic in library layer) |
|
|
49
|
+
| [0008](context/shared/adr/0008-pipx-for-installation.md) | pipx for global installation, uv for development |
|
|
50
|
+
|
|
19
51
|
## Common Commands
|
|
20
52
|
|
|
21
53
|
```bash
|
|
@@ -125,11 +157,7 @@ Store small, representative data files in `tests/fixtures/`. Fixtures should be:
|
|
|
125
157
|
|
|
126
158
|
### Pre-commit Hooks
|
|
127
159
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
Hooks run: trailing-whitespace, end-of-file-fixer, check-yaml, check-toml, check-merge-conflict, mixed-line-ending, check-added-large-files, ruff (fix + format), vulture, xenon, mypy, fast unit tests, commitizen (commit-msg).
|
|
131
|
-
|
|
132
|
-
If a hook fails, fix the issue before committing. No `--no-verify`.
|
|
160
|
+
Install: `uv run pre-commit install`. All hooks block—no `--no-verify`. See `.pre-commit-config.yaml` for full list.
|
|
133
161
|
|
|
134
162
|
## Code Quality
|
|
135
163
|
|
|
@@ -202,13 +230,6 @@ Releases are automated via commitizen on push to main. See `.github/workflows/re
|
|
|
202
230
|
| API contracts | Docstrings | All public functions/classes |
|
|
203
231
|
| Gotchas/quirks | CLAUDE.md or inline | Anything that surprised you |
|
|
204
232
|
|
|
205
|
-
### Why This Matters
|
|
206
|
-
|
|
207
|
-
- **AI agents start fresh each session** — They don't remember past conversations
|
|
208
|
-
- **Context files are their memory** — ADRs, known-issues, and CLAUDE.md persist knowledge
|
|
209
|
-
- **Documentation compounds** — Each documented decision helps all future sessions
|
|
210
|
-
- **Undocumented knowledge is lost** — If it's not written down, it doesn't exist for agents
|
|
211
|
-
|
|
212
233
|
### ADR Guidelines
|
|
213
234
|
|
|
214
235
|
Create an ADR (`context/shared/adr/NNNN-title.md`) when:
|
|
@@ -222,25 +243,10 @@ Use the template at `context/shared/adr/0000-template.md`.
|
|
|
222
243
|
|
|
223
244
|
### Two Documentation Audiences
|
|
224
245
|
|
|
225
|
-
| Audience | Location |
|
|
226
|
-
|
|
227
|
-
| **Humans** | `docs/` (mkdocs) |
|
|
228
|
-
| **AI agents** | Docstrings, CLAUDE.md, ADRs
|
|
229
|
-
|
|
230
|
-
**Human docs (`docs/`):**
|
|
231
|
-
- Rendered website via mkdocs
|
|
232
|
-
- Prose-heavy with examples and screenshots
|
|
233
|
-
- Organized by user journey (getting started → advanced topics)
|
|
234
|
-
- Can be verbose — humans skim and navigate
|
|
235
|
-
|
|
236
|
-
**AI docs (in-repo):**
|
|
237
|
-
- Docstrings: Complete API contracts (args, returns, raises, examples)
|
|
238
|
-
- CLAUDE.md: Development patterns, commands, gotchas
|
|
239
|
-
- ADRs: Decision rationale with alternatives considered
|
|
240
|
-
- Inline comments: Non-obvious code behavior
|
|
241
|
-
- Dense and structured (tables, bullet lists) — agents parse linearly
|
|
242
|
-
|
|
243
|
-
**Key difference:** Human docs explain *how to use* the tool. AI docs explain *how to modify* the codebase.
|
|
246
|
+
| Audience | Location | Purpose |
|
|
247
|
+
|----------|----------|---------|
|
|
248
|
+
| **Humans** | `docs/` (mkdocs) | *How to use* — tutorials, visual guides |
|
|
249
|
+
| **AI agents** | Docstrings, CLAUDE.md, ADRs | *How to modify* — dense, structured, co-located with code |
|
|
244
250
|
|
|
245
251
|
## Standardized Terminal Output
|
|
246
252
|
|
|
@@ -256,6 +262,17 @@ error("No geometry column (required)") # ✗ Red X
|
|
|
256
262
|
detail("Processing chunk 3/10...") # Dimmed text
|
|
257
263
|
```
|
|
258
264
|
|
|
265
|
+
## Design Principles
|
|
266
|
+
|
|
267
|
+
| Principle | Meaning | ADR |
|
|
268
|
+
|-----------|---------|-----|
|
|
269
|
+
| **Don't duplicate** | Orchestrate libraries (geoparquet-io, rio-cogeo), never reimplement | — |
|
|
270
|
+
| **YAGNI** | No speculative features; complexity is expensive | — |
|
|
271
|
+
| **Interactive + automatable** | Every prompt has `--auto` fallback | — |
|
|
272
|
+
| **versions.json is truth** | Drives sync, validation, history | [ADR-0005](context/shared/adr/0005-versions-json-source-of-truth.md) |
|
|
273
|
+
| **Plugin interface early** | Handlers follow consistent interface for future plugins | [ADR-0003](context/shared/adr/0003-plugin-architecture.md) |
|
|
274
|
+
| **CLI wraps API** | All logic in library; CLI is thin Click layer | [ADR-0007](context/shared/adr/0007-cli-wraps-api.md) |
|
|
275
|
+
|
|
259
276
|
## Tool Usage
|
|
260
277
|
|
|
261
278
|
| Tool | Purpose | Documentation |
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: portolan-cli
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: A CLI tool for managing cloud-native geospatial data
|
|
5
|
+
Project-URL: Homepage, https://github.com/portolan-sdi/portolan-cli
|
|
6
|
+
Project-URL: Bug Tracker, https://github.com/portolan-sdi/portolan-cli/issues
|
|
7
|
+
Project-URL: Documentation, https://github.com/portolan-sdi/portolan-cli#readme
|
|
8
|
+
Project-URL: Source, https://github.com/portolan-sdi/portolan-cli
|
|
9
|
+
Author-email: Nissim Lebovits <nlebovits@pm.me>
|
|
10
|
+
License-Expression: Apache-2.0
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: cloud-native,geospatial,gis,io
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Requires-Dist: click>=8.3.1
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: bandit>=1.9.3; extra == 'dev'
|
|
27
|
+
Requires-Dist: codespell>=2.4.1; extra == 'dev'
|
|
28
|
+
Requires-Dist: commitizen>=4.13.4; extra == 'dev'
|
|
29
|
+
Requires-Dist: hypothesis>=6.151.5; extra == 'dev'
|
|
30
|
+
Requires-Dist: mutmut>=3.4.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: mypy>=1.19.1; extra == 'dev'
|
|
32
|
+
Requires-Dist: pip-audit>=2.10.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pre-commit>=4.5.1; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest-benchmark>=5.0.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest-timeout>=2.3.1; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest>=9.0.2; extra == 'dev'
|
|
38
|
+
Requires-Dist: radon>=6.0.1; extra == 'dev'
|
|
39
|
+
Requires-Dist: ruff>=0.14.11; extra == 'dev'
|
|
40
|
+
Requires-Dist: vulture>=2.14; extra == 'dev'
|
|
41
|
+
Requires-Dist: xenon>=0.9.3; extra == 'dev'
|
|
42
|
+
Provides-Extra: docs
|
|
43
|
+
Requires-Dist: mkdocs-material>=9.7.1; extra == 'docs'
|
|
44
|
+
Requires-Dist: mkdocs>=1.6.1; extra == 'docs'
|
|
45
|
+
Requires-Dist: mkdocstrings[python]>=1.0.0; extra == 'docs'
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
|
|
48
|
+
<div align="center">
|
|
49
|
+
<img src="docs/assets/images/logo.svg" alt="Portolan Logo" width="200"/>
|
|
50
|
+
<h1>Portolan CLI</h1>
|
|
51
|
+
<p><strong>Cloud-native geospatial data catalogs, simplified</strong></p>
|
|
52
|
+
</div>
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
A CLI for publishing and managing **cloud-native geospatial data catalogs**. Portolan orchestrates format conversion (GeoParquet, COG), versioning, and sync to object storage—no running servers, just static files.
|
|
57
|
+
|
|
58
|
+
## Why Portolan?
|
|
59
|
+
|
|
60
|
+
| Benefit | How |
|
|
61
|
+
|---------|-----|
|
|
62
|
+
| **Scalable** | Cloud object storage that scales to petabytes |
|
|
63
|
+
| **Open** | 100% open source, open formats (GeoParquet, COG, STAC, Iceberg) |
|
|
64
|
+
| **AI-Ready** | STAC metadata enables semantic search and LLM integration |
|
|
65
|
+
| **Cheap** | Pay only for storage + egress — no servers to run |
|
|
66
|
+
| **Sovereign** | Host anywhere (AWS, GCS, Azure, MinIO, Cloudflare R2) |
|
|
67
|
+
| **Breaks the GIS silo** | Query with DuckDB, Snowflake, BigQuery, Databricks, Pandas — not just GIS tools |
|
|
68
|
+
|
|
69
|
+
## What It Does
|
|
70
|
+
|
|
71
|
+
- **Convert** vector/raster data to cloud-native formats (GeoParquet, COG)
|
|
72
|
+
- **Generate** STAC catalogs with rich metadata, thumbnails, and MapLibre styles
|
|
73
|
+
- **Version** datasets with checksums and history tracking
|
|
74
|
+
- **Sync** to S3, GCS, Azure, or any S3-compatible storage
|
|
75
|
+
|
|
76
|
+
## Quick Example
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
portolan init
|
|
80
|
+
portolan dataset add census.parquet --title "Census 2022" --auto
|
|
81
|
+
portolan remote add prod s3://my-bucket/catalog
|
|
82
|
+
portolan sync
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Installation
|
|
86
|
+
|
|
87
|
+
### Recommended: pipx (for global use)
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pipx install portolan-cli
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
This installs `portolan` in an isolated environment while making the command globally available.
|
|
94
|
+
|
|
95
|
+
If you don't have pipx installed:
|
|
96
|
+
```bash
|
|
97
|
+
python3 -m pip install --user pipx
|
|
98
|
+
python3 -m pipx ensurepath
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Alternative: pip
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install portolan-cli
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Note:** This installs into your global or user site-packages and may conflict with other packages.
|
|
108
|
+
|
|
109
|
+
### For Development
|
|
110
|
+
|
|
111
|
+
Use [uv](https://github.com/astral-sh/uv) for local development:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
git clone https://github.com/portolan-sdi/portolan-cli.git
|
|
115
|
+
cd portolan-cli
|
|
116
|
+
uv sync --all-extras
|
|
117
|
+
uv run portolan --help
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
See [Contributing Guide](docs/contributing.md) for full development setup.
|
|
121
|
+
|
|
122
|
+
## Documentation
|
|
123
|
+
|
|
124
|
+
- [Contributing Guide](docs/contributing.md)
|
|
125
|
+
- [Architecture](context/architecture.md)
|
|
126
|
+
- [Roadmap](ROADMAP.md)
|
|
127
|
+
|
|
128
|
+
## License
|
|
129
|
+
|
|
130
|
+
Apache 2.0 — see [LICENSE](LICENSE)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<img src="docs/assets/images/logo.svg" alt="Portolan Logo" width="200"/>
|
|
3
|
+
<h1>Portolan CLI</h1>
|
|
4
|
+
<p><strong>Cloud-native geospatial data catalogs, simplified</strong></p>
|
|
5
|
+
</div>
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
A CLI for publishing and managing **cloud-native geospatial data catalogs**. Portolan orchestrates format conversion (GeoParquet, COG), versioning, and sync to object storage—no running servers, just static files.
|
|
10
|
+
|
|
11
|
+
## Why Portolan?
|
|
12
|
+
|
|
13
|
+
| Benefit | How |
|
|
14
|
+
|---------|-----|
|
|
15
|
+
| **Scalable** | Cloud object storage that scales to petabytes |
|
|
16
|
+
| **Open** | 100% open source, open formats (GeoParquet, COG, STAC, Iceberg) |
|
|
17
|
+
| **AI-Ready** | STAC metadata enables semantic search and LLM integration |
|
|
18
|
+
| **Cheap** | Pay only for storage + egress — no servers to run |
|
|
19
|
+
| **Sovereign** | Host anywhere (AWS, GCS, Azure, MinIO, Cloudflare R2) |
|
|
20
|
+
| **Breaks the GIS silo** | Query with DuckDB, Snowflake, BigQuery, Databricks, Pandas — not just GIS tools |
|
|
21
|
+
|
|
22
|
+
## What It Does
|
|
23
|
+
|
|
24
|
+
- **Convert** vector/raster data to cloud-native formats (GeoParquet, COG)
|
|
25
|
+
- **Generate** STAC catalogs with rich metadata, thumbnails, and MapLibre styles
|
|
26
|
+
- **Version** datasets with checksums and history tracking
|
|
27
|
+
- **Sync** to S3, GCS, Azure, or any S3-compatible storage
|
|
28
|
+
|
|
29
|
+
## Quick Example
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
portolan init
|
|
33
|
+
portolan dataset add census.parquet --title "Census 2022" --auto
|
|
34
|
+
portolan remote add prod s3://my-bucket/catalog
|
|
35
|
+
portolan sync
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
### Recommended: pipx (for global use)
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pipx install portolan-cli
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
This installs `portolan` in an isolated environment while making the command globally available.
|
|
47
|
+
|
|
48
|
+
If you don't have pipx installed:
|
|
49
|
+
```bash
|
|
50
|
+
python3 -m pip install --user pipx
|
|
51
|
+
python3 -m pipx ensurepath
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Alternative: pip
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install portolan-cli
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Note:** This installs into your global or user site-packages and may conflict with other packages.
|
|
61
|
+
|
|
62
|
+
### For Development
|
|
63
|
+
|
|
64
|
+
Use [uv](https://github.com/astral-sh/uv) for local development:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
git clone https://github.com/portolan-sdi/portolan-cli.git
|
|
68
|
+
cd portolan-cli
|
|
69
|
+
uv sync --all-extras
|
|
70
|
+
uv run portolan --help
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
See [Contributing Guide](docs/contributing.md) for full development setup.
|
|
74
|
+
|
|
75
|
+
## Documentation
|
|
76
|
+
|
|
77
|
+
- [Contributing Guide](docs/contributing.md)
|
|
78
|
+
- [Architecture](context/architecture.md)
|
|
79
|
+
- [Roadmap](ROADMAP.md)
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
Apache 2.0 — see [LICENSE](LICENSE)
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# Portolan Ecosystem Roadmap
|
|
2
|
+
|
|
3
|
+
## Vision
|
|
4
|
+
|
|
5
|
+
Portolan makes it easy to publish and consume cloud-native geospatial data. The ecosystem includes a spec, CLI, format plugins, a QGIS plugin, and a global data bootstrapper—each designed to work standalone or together.
|
|
6
|
+
|
|
7
|
+
Development is **spec-driven but implementation-informed**: the [Portolan Spec](https://github.com/portolan-sdi/portolan-spec) evolves alongside the CLI.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Phase 1: Core CLI + Spec
|
|
12
|
+
|
|
13
|
+
The foundation. A complete, working CLI with Python API underneath.
|
|
14
|
+
|
|
15
|
+
### Epic: Dataset Lifecycle
|
|
16
|
+
|
|
17
|
+
Convert files to cloud-native formats, manage metadata, organize into a local catalog. Vector conversion uses [geoparquet-io](https://github.com/geoparquet/geoparquet-io).
|
|
18
|
+
|
|
19
|
+
| Capability | Description |
|
|
20
|
+
|------------|-------------|
|
|
21
|
+
| `portolan init` | Create `.portolan/` catalog structure |
|
|
22
|
+
| `portolan dataset add` | Detect format → convert (GeoParquet/COG) → extract metadata → stage |
|
|
23
|
+
| `portolan dataset remove` | Remove datasets from catalog |
|
|
24
|
+
| `portolan dataset list/info` | Catalog exploration |
|
|
25
|
+
| Interactive + `--auto` | Works for humans and agents |
|
|
26
|
+
|
|
27
|
+
### Epic: Cloud Sync
|
|
28
|
+
|
|
29
|
+
Push catalogs to object storage. Portolan owns the bucket contents.
|
|
30
|
+
|
|
31
|
+
| Capability | Description |
|
|
32
|
+
|------------|-------------|
|
|
33
|
+
| `portolan remote add/list` | Configure S3, GCS, Azure backends |
|
|
34
|
+
| `portolan sync` | Push `.portolan/` to remote |
|
|
35
|
+
| `versions.json` | Version history, checksums, sync state |
|
|
36
|
+
|
|
37
|
+
### Epic: Validation & Repair
|
|
38
|
+
|
|
39
|
+
Ensure catalogs meet the Portolan spec. Detect and fix drift.
|
|
40
|
+
|
|
41
|
+
| Capability | Description |
|
|
42
|
+
|------------|-------------|
|
|
43
|
+
| `portolan check` | Validate local catalog against spec |
|
|
44
|
+
| `portolan check --remote` | Detect drift (external edits to bucket) |
|
|
45
|
+
| `portolan repair` | Re-sync remote from local truth |
|
|
46
|
+
| `portolan prune` | Clean up old versions |
|
|
47
|
+
| Actionable output | Specific guidance, not just pass/fail |
|
|
48
|
+
|
|
49
|
+
### Epic: Styling & Thumbnails
|
|
50
|
+
|
|
51
|
+
Make datasets visually browsable.
|
|
52
|
+
|
|
53
|
+
| Capability | Description |
|
|
54
|
+
|------------|-------------|
|
|
55
|
+
| `style.json` | MapLibre-compatible style definitions |
|
|
56
|
+
| Thumbnail generation | Auto-render preview images |
|
|
57
|
+
| Smart defaults | Infer styles from data characteristics |
|
|
58
|
+
|
|
59
|
+
### Epic: PMTiles Generation
|
|
60
|
+
|
|
61
|
+
Generate vector tile overviews from GeoParquet datasets using [gpio-pmtiles](https://github.com/geoparquet-io/gpio-pmtiles).
|
|
62
|
+
|
|
63
|
+
| Capability | Description |
|
|
64
|
+
|------------|-------------|
|
|
65
|
+
| PMTiles as derivative | Generated from GeoParquet for web display |
|
|
66
|
+
| Automatic on `dataset add` | Optional; controlled by flag or config |
|
|
67
|
+
| Stored alongside source | Part of the dataset, not a separate dataset |
|
|
68
|
+
|
|
69
|
+
**Note:** PMTiles are a *view* of the data for rendering, not the source of truth. GeoParquet remains the canonical format. (PMTiles *could* be added as standalone datasets, but the primary use case is as overviews.)
|
|
70
|
+
|
|
71
|
+
### Epic: COPC Support
|
|
72
|
+
|
|
73
|
+
Cloud-optimized point clouds for LiDAR and similar data.
|
|
74
|
+
|
|
75
|
+
| Capability | Description |
|
|
76
|
+
|------------|-------------|
|
|
77
|
+
| COPC conversion | Convert point cloud formats to COPC |
|
|
78
|
+
| Metadata extraction | Bounds, point count, CRS |
|
|
79
|
+
| Styling conventions | Point cloud visualization defaults |
|
|
80
|
+
|
|
81
|
+
### Epic: Python API
|
|
82
|
+
|
|
83
|
+
All functionality is implemented as a Python library; CLI wraps it.
|
|
84
|
+
|
|
85
|
+
| Capability | Description |
|
|
86
|
+
|------------|-------------|
|
|
87
|
+
| `Catalog` class | `init()`, `add()`, `sync()`, `check()` |
|
|
88
|
+
| Built simultaneously | API *is* the implementation; CLI is the interface |
|
|
89
|
+
| Agent-friendly | Clear errors, predictable outputs |
|
|
90
|
+
| `SKILLS.md` | LLM-optimized documentation |
|
|
91
|
+
|
|
92
|
+
### Spec Evolution (Phase 1)
|
|
93
|
+
|
|
94
|
+
The [Portolan Spec](https://github.com/portolan-sdi/portolan-spec) develops in lockstep:
|
|
95
|
+
|
|
96
|
+
- Required metadata fields
|
|
97
|
+
- Catalog structure and naming
|
|
98
|
+
- Validation rules
|
|
99
|
+
- Remote structure and versioning
|
|
100
|
+
- PMTiles and COPC conventions
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Parallel: Iceberg Plugin
|
|
105
|
+
|
|
106
|
+
Tabular analytics on geospatial data. Developed by Javier alongside Phase 1.
|
|
107
|
+
|
|
108
|
+
| Capability | Description |
|
|
109
|
+
|------------|-------------|
|
|
110
|
+
| `portolan-iceberg` | Apache Iceberg tables alongside STAC |
|
|
111
|
+
| Query integration | SQL/DataFrame access to versioned data |
|
|
112
|
+
|
|
113
|
+
**Note:** Separate package, separate maintainer, but expected to land around the same time as Phase 1. STAC remains the catalog layer; Iceberg is the analytics layer.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Phase 2: QGIS Plugin
|
|
118
|
+
|
|
119
|
+
Bring Portolan catalogs into desktop GIS workflows.
|
|
120
|
+
|
|
121
|
+
| Capability | Description |
|
|
122
|
+
|------------|-------------|
|
|
123
|
+
| Browse catalogs | Connect to Portolan remotes, explore datasets |
|
|
124
|
+
| Pull data | Load GeoParquet/COG into QGIS layers |
|
|
125
|
+
| Edit metadata | Update titles, descriptions, licenses |
|
|
126
|
+
| Spec validation | Check datasets from within QGIS |
|
|
127
|
+
|
|
128
|
+
**Dependency:** Phase 1 complete.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Phase 3: Global Data Bootstrapper
|
|
133
|
+
|
|
134
|
+
Subset global datasets to bootstrap local catalogs.
|
|
135
|
+
|
|
136
|
+
| Capability | Description |
|
|
137
|
+
|------------|-------------|
|
|
138
|
+
| Source registry | Curated global datasets (Overture, ESA, etc.) |
|
|
139
|
+
| Region extraction | Clip to bounding box or admin boundary |
|
|
140
|
+
| One-command bootstrap | `portolan bootstrap --region "Nairobi"` |
|
|
141
|
+
|
|
142
|
+
**Dependency:** Phase 1 complete.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## TBD: Access Control & Visibility
|
|
147
|
+
|
|
148
|
+
Multi-tenant access control for teams sharing a Portolan catalog. Timing and scope to be determined.
|
|
149
|
+
|
|
150
|
+
### Potential Capabilities
|
|
151
|
+
|
|
152
|
+
| Capability | Description |
|
|
153
|
+
|------------|-------------|
|
|
154
|
+
| Visibility metadata | Mark datasets as `public` or `private` with optional tenant assignment |
|
|
155
|
+
| User management | Create/list/remove users with credentials |
|
|
156
|
+
| Access policies | Grant/revoke user access to specific dataset paths |
|
|
157
|
+
| Policy enforcement | Integration with storage IAM (MinIO, S3, GCS) |
|
|
158
|
+
|
|
159
|
+
### Open Questions
|
|
160
|
+
|
|
161
|
+
- **Where does auth live?** Portolan is "static files only" — user management likely requires a sidecar service or delegation to storage provider IAM
|
|
162
|
+
- **MinIO vs generic?** MinIO has rich policy APIs; S3/GCS use IAM. Do we abstract or specialize?
|
|
163
|
+
- **Scope boundary:** Portolan may just *tag* visibility; enforcement happens at the storage layer
|
|
164
|
+
|
|
165
|
+
### Example Workflow (Conceptual)
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
# Add a public dataset
|
|
169
|
+
portolan dataset add satellite.parquet --visibility public
|
|
170
|
+
|
|
171
|
+
# Add a private dataset for a tenant
|
|
172
|
+
portolan dataset add confidential.parquet --visibility private --tenant acme
|
|
173
|
+
|
|
174
|
+
# User management (if Portolan handles it)
|
|
175
|
+
portolan user add analyst
|
|
176
|
+
portolan access grant analyst acme/*
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
**See also:** [ADR-0006 (Remote Ownership Model)](https://github.com/portolan-sdi/portolan-cli/blob/main/context/shared/adr/0006-remote-ownership-model.md) — explains why multi-user collaboration is complex under the current ownership model.
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## TBD: Data Consumption & SQL Engines
|
|
184
|
+
|
|
185
|
+
Documentation and tooling for consuming Portolan catalogs from analytics engines. Timing to be determined.
|
|
186
|
+
|
|
187
|
+
### Potential Capabilities
|
|
188
|
+
|
|
189
|
+
| Capability | Description |
|
|
190
|
+
|------------|-------------|
|
|
191
|
+
| Consumption guides | How to query Portolan catalogs from popular engines |
|
|
192
|
+
| Connection snippets | Copy-paste connection strings for each engine |
|
|
193
|
+
| `portolan connect` | Generate connection config for a specific engine |
|
|
194
|
+
|
|
195
|
+
### Target Engines
|
|
196
|
+
|
|
197
|
+
| Engine | Protocol | Notes |
|
|
198
|
+
|--------|----------|-------|
|
|
199
|
+
| **DuckDB** | S3/HTTP | Native GeoParquet support |
|
|
200
|
+
| **Snowflake** | External tables | Via stage or external access |
|
|
201
|
+
| **BigQuery** | BigLake / external tables | GCS-native |
|
|
202
|
+
| **Databricks** | Unity Catalog / S3 | Delta Lake interop via Iceberg |
|
|
203
|
+
| **Trino/Presto** | Hive connector | S3-backed |
|
|
204
|
+
| **Oracle** | External tables | Via object storage |
|
|
205
|
+
| **Pandas/GeoPandas** | obstore / fsspec | Direct Python access |
|
|
206
|
+
|
|
207
|
+
### Example: DuckDB
|
|
208
|
+
|
|
209
|
+
```sql
|
|
210
|
+
-- Configure S3 access
|
|
211
|
+
SET s3_endpoint = 'storage.example.com';
|
|
212
|
+
SET s3_access_key_id = 'analyst';
|
|
213
|
+
SET s3_secret_access_key = '...';
|
|
214
|
+
SET s3_use_ssl = true;
|
|
215
|
+
|
|
216
|
+
-- Query a Portolan dataset directly
|
|
217
|
+
SELECT * FROM 's3://catalog/public/census/census.parquet';
|
|
218
|
+
|
|
219
|
+
-- Or with spatial filtering
|
|
220
|
+
SELECT * FROM 's3://catalog/public/census/census.parquet'
|
|
221
|
+
WHERE ST_Within(geometry, ST_GeomFromText('POLYGON((...))'));
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Why This Matters
|
|
225
|
+
|
|
226
|
+
Portolan's value is "publish once, consume anywhere." Without consumption docs, users publish data but don't know how to use it. This closes the loop.
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Out of Scope for v1.0
|
|
231
|
+
|
|
232
|
+
| Item | Reason |
|
|
233
|
+
|------|--------|
|
|
234
|
+
| 3D Tiles | Niche; can be community-contributed later |
|
|
235
|
+
| Browser/Map UI | May be unnecessary with agentic workflows; revisit post-v1 |
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## Summary
|
|
240
|
+
|
|
241
|
+
| Phase | Scope | Timing |
|
|
242
|
+
|-------|-------|--------|
|
|
243
|
+
| **Phase 1** | Core CLI, Python API, Spec, PMTiles, COPC | Now |
|
|
244
|
+
| **Parallel** | Iceberg Plugin (Javier) | Alongside Phase 1 |
|
|
245
|
+
| **Phase 2** | QGIS Plugin | After Phase 1 |
|
|
246
|
+
| **Phase 3** | Global Bootstrapper | After Phase 1 |
|
|
247
|
+
| **TBD** | Access Control & Visibility | To be scoped |
|
|
248
|
+
| **TBD** | Data Consumption & SQL Engines | To be scoped |
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
*Portolan is an open source project under [Radiant Earth](https://radiant.earth).*
|