portolan-cli 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. portolan_cli-0.2.0/.github/SOCIAL_PREVIEW.md +30 -0
  2. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.github/workflows/nightly.yml +2 -2
  3. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.pre-commit-config.yaml +1 -0
  4. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/CHANGELOG.md +12 -0
  5. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/CLAUDE.md +48 -31
  6. portolan_cli-0.2.0/PKG-INFO +133 -0
  7. portolan_cli-0.2.0/README.md +83 -0
  8. portolan_cli-0.2.0/ROADMAP.md +252 -0
  9. portolan_cli-0.2.0/context/architecture.md +183 -0
  10. portolan_cli-0.2.0/context/shared/adr/0003-plugin-architecture.md +83 -0
  11. portolan_cli-0.2.0/context/shared/adr/0004-iceberg-as-plugin.md +79 -0
  12. portolan_cli-0.2.0/context/shared/adr/0005-versions-json-source-of-truth.md +92 -0
  13. portolan_cli-0.2.0/context/shared/adr/0006-remote-ownership-model.md +71 -0
  14. portolan_cli-0.2.0/context/shared/adr/0007-cli-wraps-api.md +95 -0
  15. portolan_cli-0.2.0/context/shared/adr/0008-pipx-for-installation.md +110 -0
  16. portolan_cli-0.2.0/context/shared/known-issues/pyarrow-abseil-abi.md +36 -0
  17. portolan_cli-0.2.0/docs/BRANDING.md +109 -0
  18. portolan_cli-0.2.0/docs/assets/images/cover.png +0 -0
  19. portolan_cli-0.2.0/docs/assets/images/icon-white.svg +5 -0
  20. portolan_cli-0.2.0/docs/assets/images/icon.svg +11 -0
  21. portolan_cli-0.2.0/docs/assets/images/logo.png +0 -0
  22. portolan_cli-0.2.0/docs/assets/images/logo.svg +1 -0
  23. portolan_cli-0.2.0/docs/assets/images/social-card.png +0 -0
  24. portolan_cli-0.2.0/docs/assets/stylesheets/extra.css +192 -0
  25. portolan_cli-0.2.0/docs/changelog.md +1 -0
  26. portolan_cli-0.2.0/docs/index.md +174 -0
  27. portolan_cli-0.2.0/docs/roadmap.md +1 -0
  28. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/mkdocs.yml +27 -5
  29. portolan_cli-0.2.0/portolan_cli/__init__.py +5 -0
  30. portolan_cli-0.2.0/portolan_cli/catalog.py +94 -0
  31. portolan_cli-0.2.0/portolan_cli/cli.py +38 -0
  32. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/pyproject.toml +6 -3
  33. portolan_cli-0.2.0/tests/unit/test_catalog_init.py +65 -0
  34. portolan_cli-0.2.0/tests/unit/test_cli_init.py +63 -0
  35. portolan_cli-0.2.0/uv.lock +3702 -0
  36. portolan_cli-0.1.2/PKG-INFO +0 -49
  37. portolan_cli-0.1.2/README.md +0 -2
  38. portolan_cli-0.1.2/context/architecture.md +0 -0
  39. portolan_cli-0.1.2/docs/changelog.md +0 -0
  40. portolan_cli-0.1.2/docs/index.md +0 -0
  41. portolan_cli-0.1.2/docs/roadmap.md +0 -30
  42. portolan_cli-0.1.2/portolan_cli/cli.py +0 -0
  43. portolan_cli-0.1.2/uv.lock +0 -1975
  44. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.claude/hooks/post-bash-remind.sh +0 -0
  45. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.claude/hooks/pre-read-check.sh +0 -0
  46. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.claude/hooks/prompt-inject.sh +0 -0
  47. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.coderabbit.yaml +0 -0
  48. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.github/CODEOWNERS +0 -0
  49. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.github/dependabot.yml +0 -0
  50. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.github/pull_request_template.md +0 -0
  51. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.github/workflows/ci.yml +0 -0
  52. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.github/workflows/docs.yml +0 -0
  53. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.github/workflows/release.yml +0 -0
  54. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.gitignore +0 -0
  55. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/.python-version +0 -0
  56. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/LICENSE +0 -0
  57. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/SECURITY.md +0 -0
  58. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/context/shared/adr/0000-template.md +0 -0
  59. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/context/shared/adr/0001-agentic-first-development.md +0 -0
  60. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/context/shared/adr/0002-click-for-cli.md +0 -0
  61. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/context/shared/documentation/ci.md +0 -0
  62. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/context/shared/documentation/distill-mcp.md +0 -0
  63. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/context/shared/known-issues/example.md +0 -0
  64. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/docs/contributing.md +0 -0
  65. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/portolan_cli/output.py +0 -0
  66. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/tests/conftest.py +0 -0
  67. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/tests/specs/README.md +0 -0
  68. {portolan_cli-0.1.2 → portolan_cli-0.2.0}/tests/test_placeholder.py +0 -0
  69. {portolan_cli-0.1.2/portolan_cli → portolan_cli-0.2.0/tests/unit}/__init__.py +0 -0
@@ -0,0 +1,30 @@
1
+ # GitHub Social Preview Setup
2
+
3
+ To configure the repository's social preview image:
4
+
5
+ 1. Go to repository **Settings** → **General**
6
+ 2. Scroll to **Social preview**
7
+ 3. Click **Edit**
8
+ 4. Upload the image at `docs/assets/images/social-card.png` (3500x1440px)
9
+
10
+ This will make the Portolan logo and cover image appear when sharing the repository on social media, Slack, Discord, etc.
11
+
12
+ ## Files Available
13
+
14
+ - **Logo (square)**: `docs/assets/images/logo.png` (1000x1000)
15
+ - **Logo (vector)**: `docs/assets/images/logo.svg`
16
+ - **Social card**: `docs/assets/images/social-card.png` (3500x1440)
17
+
18
+ ## Brand Colors
19
+
20
+ - **Background**: `#eaedf9`
21
+ - **Dark text**: `#202a4f`
22
+ - **Primary**: `#4163cc`
23
+ - **Gradient**: `#395eca` → `#848bd8`
24
+
25
+ ## Typography
26
+
27
+ - **Font**: Archivo Medium
28
+ - **Designer**: Omnibus-Type
29
+ - **License**: Open Font License
30
+ - **Icon**: Designed by Icons By Alfredo
@@ -65,7 +65,7 @@ jobs:
65
65
  run: uv run mutmut html || true
66
66
 
67
67
  - name: Upload mutation report
68
- uses: actions/upload-artifact@v4
68
+ uses: actions/upload-artifact@v6
69
69
  with:
70
70
  name: mutation-report
71
71
  path: html/
@@ -113,7 +113,7 @@ jobs:
113
113
  fi
114
114
 
115
115
  - name: Upload benchmark results
116
- uses: actions/upload-artifact@v4
116
+ uses: actions/upload-artifact@v6
117
117
  with:
118
118
  name: benchmark-results
119
119
  path: benchmark-results.json
@@ -10,6 +10,7 @@ repos:
10
10
  - id: trailing-whitespace
11
11
  - id: end-of-file-fixer
12
12
  - id: check-yaml
13
+ args: ['--unsafe'] # Allow Python tags in mkdocs.yml
13
14
  - id: check-added-large-files
14
15
  args: ['--maxkb=500']
15
16
  - id: check-merge-conflict
@@ -1,3 +1,15 @@
1
+ ## v0.2.0 (2026-02-05)
2
+
3
+ ### Feat
4
+
5
+ - **cli**: add `portolan init` command (#20)
6
+
7
+ ## v0.1.3 (2026-02-05)
8
+
9
+ ### Fix
10
+
11
+ - **docs**: use absolute GitHub URL for ADR link in roadmap
12
+
1
13
  ## v0.1.2 (2026-02-04)
2
14
 
3
15
  ### Fix
@@ -1,5 +1,22 @@
1
1
  # Portolan CLI - Development Guide
2
2
 
3
+ ## What is Portolan?
4
+
5
+ Portolan is a CLI for publishing and managing **cloud-native geospatial data catalogs**. It orchestrates format conversion (GeoParquet, COG), versioning, and sync to object storage (S3, GCS, Azure)—no running servers, just static files.
6
+
7
+ **Key concepts:**
8
+ - **STAC** (SpatioTemporal Asset Catalog) — The catalog metadata format
9
+ - **GeoParquet** — Cloud-optimized vector data (columnar, spatial indexing)
10
+ - **COG** (Cloud-Optimized GeoTIFF) — Cloud-optimized raster data (HTTP range requests)
11
+ - **versions.json** — Single source of truth for version history, sync state, and checksums
12
+
13
+ Portolan doesn't do the heavy lifting—it orchestrates libraries like `geoparquet-io` and `rio-cogeo`.
14
+
15
+ **Key dependencies (check these repos for API docs):**
16
+ - [geoparquet-io](https://github.com/geoparquet/geoparquet-io) — Vector format conversion
17
+ - [gpio-pmtiles](https://github.com/geoparquet-io/gpio-pmtiles) — PMTiles generation from GeoParquet
18
+ - [rio-cogeo](https://github.com/cogeotiff/rio-cogeo) — Raster conversion to COG
19
+
3
20
  ## Guiding Principle
4
21
 
5
22
  AI agents will write most of the code. Human review does not scale to match AI output volume. Therefore: every quality gate must be automated, every convention must be enforceable, and tests must be verified to actually test something.
@@ -8,7 +25,9 @@ AI agents will write most of the code. Human review does not scale to match AI o
8
25
 
9
26
  | Resource | Location |
10
27
  |----------|----------|
28
+ | **Roadmap** | `ROADMAP.md` |
11
29
  | Contributing guide | `docs/contributing.md` |
30
+ | Architecture | `context/architecture.md` |
12
31
  | CI/CD documentation | `context/shared/documentation/ci.md` |
13
32
  | Distill MCP tools | `context/shared/documentation/distill-mcp.md` |
14
33
  | ADRs | `context/shared/adr/` |
@@ -16,6 +35,19 @@ AI agents will write most of the code. Human review does not scale to match AI o
16
35
 
17
36
  **Target Python version:** 3.10+ (matches geoparquet-io dependency)
18
37
 
38
+ ### ADR Index
39
+
40
+ | ADR | Decision |
41
+ |-----|----------|
42
+ | [0001](context/shared/adr/0001-agentic-first-development.md) | Agentic-first: automate all quality gates, TDD mandatory |
43
+ | [0002](context/shared/adr/0002-click-for-cli.md) | Click for CLI framework |
44
+ | [0003](context/shared/adr/0003-plugin-architecture.md) | Plugin architecture for formats (GeoParquet/COG core, others optional) |
45
+ | [0004](context/shared/adr/0004-iceberg-as-plugin.md) | Iceberg as plugin, STAC remains catalog layer |
46
+ | [0005](context/shared/adr/0005-versions-json-source-of-truth.md) | versions.json as single source of truth |
47
+ | [0006](context/shared/adr/0006-remote-ownership-model.md) | Portolan owns bucket contents (no external edits) |
48
+ | [0007](context/shared/adr/0007-cli-wraps-api.md) | CLI wraps Python API (all logic in library layer) |
49
+ | [0008](context/shared/adr/0008-pipx-for-installation.md) | pipx for global installation, uv for development |
50
+
19
51
  ## Common Commands
20
52
 
21
53
  ```bash
@@ -125,11 +157,7 @@ Store small, representative data files in `tests/fixtures/`. Fixtures should be:
125
157
 
126
158
  ### Pre-commit Hooks
127
159
 
128
- Pre-commit blocks on ALL checks. Install with `uv run pre-commit install`.
129
-
130
- Hooks run: trailing-whitespace, end-of-file-fixer, check-yaml, check-toml, check-merge-conflict, mixed-line-ending, check-added-large-files, ruff (fix + format), vulture, xenon, mypy, fast unit tests, commitizen (commit-msg).
131
-
132
- If a hook fails, fix the issue before committing. No `--no-verify`.
160
+ Install: `uv run pre-commit install`. All hooks block—no `--no-verify`. See `.pre-commit-config.yaml` for full list.
133
161
 
134
162
  ## Code Quality
135
163
 
@@ -202,13 +230,6 @@ Releases are automated via commitizen on push to main. See `.github/workflows/re
202
230
  | API contracts | Docstrings | All public functions/classes |
203
231
  | Gotchas/quirks | CLAUDE.md or inline | Anything that surprised you |
204
232
 
205
- ### Why This Matters
206
-
207
- - **AI agents start fresh each session** — They don't remember past conversations
208
- - **Context files are their memory** — ADRs, known-issues, and CLAUDE.md persist knowledge
209
- - **Documentation compounds** — Each documented decision helps all future sessions
210
- - **Undocumented knowledge is lost** — If it's not written down, it doesn't exist for agents
211
-
212
233
  ### ADR Guidelines
213
234
 
214
235
  Create an ADR (`context/shared/adr/NNNN-title.md`) when:
@@ -222,25 +243,10 @@ Use the template at `context/shared/adr/0000-template.md`.
222
243
 
223
244
  ### Two Documentation Audiences
224
245
 
225
- | Audience | Location | Optimized For |
226
- |----------|----------|---------------|
227
- | **Humans** | `docs/` (mkdocs) | Readability, navigation, tutorials, visual presentation |
228
- | **AI agents** | Docstrings, CLAUDE.md, ADRs, inline comments | Context windows, searchability, co-location with code |
229
-
230
- **Human docs (`docs/`):**
231
- - Rendered website via mkdocs
232
- - Prose-heavy with examples and screenshots
233
- - Organized by user journey (getting started → advanced topics)
234
- - Can be verbose — humans skim and navigate
235
-
236
- **AI docs (in-repo):**
237
- - Docstrings: Complete API contracts (args, returns, raises, examples)
238
- - CLAUDE.md: Development patterns, commands, gotchas
239
- - ADRs: Decision rationale with alternatives considered
240
- - Inline comments: Non-obvious code behavior
241
- - Dense and structured (tables, bullet lists) — agents parse linearly
242
-
243
- **Key difference:** Human docs explain *how to use* the tool. AI docs explain *how to modify* the codebase.
246
+ | Audience | Location | Purpose |
247
+ |----------|----------|---------|
248
+ | **Humans** | `docs/` (mkdocs) | *How to use* — tutorials, visual guides |
249
+ | **AI agents** | Docstrings, CLAUDE.md, ADRs | *How to modify* — dense, structured, co-located with code |
244
250
 
245
251
  ## Standardized Terminal Output
246
252
 
@@ -256,6 +262,17 @@ error("No geometry column (required)") # ✗ Red X
256
262
  detail("Processing chunk 3/10...") # Dimmed text
257
263
  ```
258
264
 
265
+ ## Design Principles
266
+
267
+ | Principle | Meaning | ADR |
268
+ |-----------|---------|-----|
269
+ | **Don't duplicate** | Orchestrate libraries (geoparquet-io, rio-cogeo), never reimplement | — |
270
+ | **YAGNI** | No speculative features; complexity is expensive | — |
271
+ | **Interactive + automatable** | Every prompt has `--auto` fallback | — |
272
+ | **versions.json is truth** | Drives sync, validation, history | [ADR-0005](context/shared/adr/0005-versions-json-source-of-truth.md) |
273
+ | **Plugin interface early** | Handlers follow consistent interface for future plugins | [ADR-0003](context/shared/adr/0003-plugin-architecture.md) |
274
+ | **CLI wraps API** | All logic in library; CLI is thin Click layer | [ADR-0007](context/shared/adr/0007-cli-wraps-api.md) |
275
+
259
276
  ## Tool Usage
260
277
 
261
278
  | Tool | Purpose | Documentation |
@@ -0,0 +1,133 @@
1
+ Metadata-Version: 2.4
2
+ Name: portolan-cli
3
+ Version: 0.2.0
4
+ Summary: A CLI tool for managing cloud-native geospatial data
5
+ Project-URL: Homepage, https://github.com/portolan-sdi/portolan-cli
6
+ Project-URL: Bug Tracker, https://github.com/portolan-sdi/portolan-cli/issues
7
+ Project-URL: Documentation, https://github.com/portolan-sdi/portolan-cli#readme
8
+ Project-URL: Source, https://github.com/portolan-sdi/portolan-cli
9
+ Author-email: Nissim Lebovits <nlebovits@pm.me>
10
+ License-Expression: Apache-2.0
11
+ License-File: LICENSE
12
+ Keywords: cloud-native,geospatial,gis,io
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: Apache Software License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: GIS
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: click>=8.3.1
25
+ Requires-Dist: geoparquet-io>=0.3.0
26
+ Requires-Dist: pyarrow<22.0.0,>=12.0.0
27
+ Requires-Dist: typing-extensions>=4.0.0; python_version < '3.11'
28
+ Provides-Extra: dev
29
+ Requires-Dist: bandit>=1.9.3; extra == 'dev'
30
+ Requires-Dist: codespell>=2.4.1; extra == 'dev'
31
+ Requires-Dist: commitizen>=4.13.4; extra == 'dev'
32
+ Requires-Dist: hypothesis>=6.151.5; extra == 'dev'
33
+ Requires-Dist: mutmut>=3.4.0; extra == 'dev'
34
+ Requires-Dist: mypy>=1.19.1; extra == 'dev'
35
+ Requires-Dist: pip-audit>=2.10.0; extra == 'dev'
36
+ Requires-Dist: pre-commit>=4.5.1; extra == 'dev'
37
+ Requires-Dist: pytest-benchmark>=5.0.0; extra == 'dev'
38
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
39
+ Requires-Dist: pytest-timeout>=2.3.1; extra == 'dev'
40
+ Requires-Dist: pytest>=9.0.2; extra == 'dev'
41
+ Requires-Dist: radon>=6.0.1; extra == 'dev'
42
+ Requires-Dist: ruff>=0.14.11; extra == 'dev'
43
+ Requires-Dist: vulture>=2.14; extra == 'dev'
44
+ Requires-Dist: xenon>=0.9.3; extra == 'dev'
45
+ Provides-Extra: docs
46
+ Requires-Dist: mkdocs-material>=9.7.1; extra == 'docs'
47
+ Requires-Dist: mkdocs>=1.6.1; extra == 'docs'
48
+ Requires-Dist: mkdocstrings[python]>=1.0.0; extra == 'docs'
49
+ Description-Content-Type: text/markdown
50
+
51
+ <div align="center">
52
+ <img src="docs/assets/images/cover.png" alt="Portolan" width="600"/>
53
+ </div>
54
+
55
+ ---
56
+
57
+ Portolan enables organizations to share geospatial data in a low-cost, accessible, sovereign, and reliable way. Built on [cloud-native geospatial](https://cloudnativegeo.org) formats, a Portolan catalog is as interactive as any geospatial portal—but faster, more scalable, and much cheaper to run. A small government's vector data costs a few dollars a month; even full imagery and point clouds typically stay under $50/month.
58
+
59
+ This CLI converts data to cloud-native formats (GeoParquet, COG), generates rich STAC metadata, and syncs to any object storage—no servers required.
60
+
61
+ ## Why Portolan?
62
+
63
+ | Benefit | How |
64
+ |---------|-----|
65
+ | **Scalable** | Cloud object storage that scales to petabytes |
66
+ | **Open** | 100% open source, open formats (GeoParquet, COG, STAC, Iceberg) |
67
+ | **AI-Ready** | STAC metadata enables semantic search and LLM integration |
68
+ | **Cheap** | Pay only for storage + egress — no servers to run |
69
+ | **Sovereign** | Host anywhere (AWS, GCS, Azure, MinIO, Cloudflare R2) |
70
+ | **Breaks the GIS silo** | Query with DuckDB, Snowflake, BigQuery, Databricks, Pandas — not just GIS tools |
71
+
72
+ ## What It Does
73
+
74
+ - **Convert** vector/raster data to cloud-native formats (GeoParquet, COG)
75
+ - **Generate** STAC catalogs with rich metadata, thumbnails, and MapLibre styles
76
+ - **Version** datasets with checksums and history tracking
77
+ - **Sync** to S3, GCS, Azure, or any S3-compatible storage
78
+
79
+ ## Quick Example
80
+
81
+ ```bash
82
+ portolan init
83
+ portolan dataset add census.parquet --title "Census 2022" --auto
84
+ portolan remote add prod s3://my-bucket/catalog
85
+ portolan sync
86
+ ```
87
+
88
+ ## Installation
89
+
90
+ ### Recommended: pipx (for global use)
91
+
92
+ ```bash
93
+ pipx install portolan-cli
94
+ ```
95
+
96
+ This installs `portolan` in an isolated environment while making the command globally available.
97
+
98
+ If you don't have pipx installed:
99
+ ```bash
100
+ python3 -m pip install --user pipx
101
+ python3 -m pipx ensurepath
102
+ ```
103
+
104
+ ### Alternative: pip
105
+
106
+ ```bash
107
+ pip install portolan-cli
108
+ ```
109
+
110
+ **Note:** This installs into your global or user site-packages and may conflict with other packages.
111
+
112
+ ### For Development
113
+
114
+ Use [uv](https://github.com/astral-sh/uv) for local development:
115
+
116
+ ```bash
117
+ git clone https://github.com/portolan-sdi/portolan-cli.git
118
+ cd portolan-cli
119
+ uv sync --all-extras
120
+ uv run portolan --help
121
+ ```
122
+
123
+ See [Contributing Guide](docs/contributing.md) for full development setup.
124
+
125
+ ## Documentation
126
+
127
+ - [Contributing Guide](docs/contributing.md)
128
+ - [Architecture](context/architecture.md)
129
+ - [Roadmap](ROADMAP.md)
130
+
131
+ ## License
132
+
133
+ Apache 2.0 — see [LICENSE](LICENSE)
@@ -0,0 +1,83 @@
1
+ <div align="center">
2
+ <img src="docs/assets/images/cover.png" alt="Portolan" width="600"/>
3
+ </div>
4
+
5
+ ---
6
+
7
+ Portolan enables organizations to share geospatial data in a low-cost, accessible, sovereign, and reliable way. Built on [cloud-native geospatial](https://cloudnativegeo.org) formats, a Portolan catalog is as interactive as any geospatial portal—but faster, more scalable, and much cheaper to run. A small government's vector data costs a few dollars a month; even full imagery and point clouds typically stay under $50/month.
8
+
9
+ This CLI converts data to cloud-native formats (GeoParquet, COG), generates rich STAC metadata, and syncs to any object storage—no servers required.
10
+
11
+ ## Why Portolan?
12
+
13
+ | Benefit | How |
14
+ |---------|-----|
15
+ | **Scalable** | Cloud object storage that scales to petabytes |
16
+ | **Open** | 100% open source, open formats (GeoParquet, COG, STAC, Iceberg) |
17
+ | **AI-Ready** | STAC metadata enables semantic search and LLM integration |
18
+ | **Cheap** | Pay only for storage + egress — no servers to run |
19
+ | **Sovereign** | Host anywhere (AWS, GCS, Azure, MinIO, Cloudflare R2) |
20
+ | **Breaks the GIS silo** | Query with DuckDB, Snowflake, BigQuery, Databricks, Pandas — not just GIS tools |
21
+
22
+ ## What It Does
23
+
24
+ - **Convert** vector/raster data to cloud-native formats (GeoParquet, COG)
25
+ - **Generate** STAC catalogs with rich metadata, thumbnails, and MapLibre styles
26
+ - **Version** datasets with checksums and history tracking
27
+ - **Sync** to S3, GCS, Azure, or any S3-compatible storage
28
+
29
+ ## Quick Example
30
+
31
+ ```bash
32
+ portolan init
33
+ portolan dataset add census.parquet --title "Census 2022" --auto
34
+ portolan remote add prod s3://my-bucket/catalog
35
+ portolan sync
36
+ ```
37
+
38
+ ## Installation
39
+
40
+ ### Recommended: pipx (for global use)
41
+
42
+ ```bash
43
+ pipx install portolan-cli
44
+ ```
45
+
46
+ This installs `portolan` in an isolated environment while making the command globally available.
47
+
48
+ If you don't have pipx installed:
49
+ ```bash
50
+ python3 -m pip install --user pipx
51
+ python3 -m pipx ensurepath
52
+ ```
53
+
54
+ ### Alternative: pip
55
+
56
+ ```bash
57
+ pip install portolan-cli
58
+ ```
59
+
60
+ **Note:** This installs into your global or user site-packages and may conflict with other packages.
61
+
62
+ ### For Development
63
+
64
+ Use [uv](https://github.com/astral-sh/uv) for local development:
65
+
66
+ ```bash
67
+ git clone https://github.com/portolan-sdi/portolan-cli.git
68
+ cd portolan-cli
69
+ uv sync --all-extras
70
+ uv run portolan --help
71
+ ```
72
+
73
+ See [Contributing Guide](docs/contributing.md) for full development setup.
74
+
75
+ ## Documentation
76
+
77
+ - [Contributing Guide](docs/contributing.md)
78
+ - [Architecture](context/architecture.md)
79
+ - [Roadmap](ROADMAP.md)
80
+
81
+ ## License
82
+
83
+ Apache 2.0 — see [LICENSE](LICENSE)
@@ -0,0 +1,252 @@
1
+ # Portolan Ecosystem Roadmap
2
+
3
+ ## Vision
4
+
5
+ Portolan makes it easy to publish and consume cloud-native geospatial data. The ecosystem includes a spec, CLI, format plugins, a QGIS plugin, and a global data bootstrapper—each designed to work standalone or together.
6
+
7
+ Development is **spec-driven but implementation-informed**: the [Portolan Spec](https://github.com/portolan-sdi/portolan-spec) evolves alongside the CLI.
8
+
9
+ ---
10
+
11
+ ## Phase 1: Core CLI + Spec
12
+
13
+ The foundation. A complete, working CLI with Python API underneath.
14
+
15
+ ### Epic: Dataset Lifecycle
16
+
17
+ Convert files to cloud-native formats, manage metadata, organize into a local catalog. Vector conversion uses [geoparquet-io](https://github.com/geoparquet/geoparquet-io).
18
+
19
+ | Capability | Description |
20
+ |------------|-------------|
21
+ | `portolan init` | Create `.portolan/` catalog structure |
22
+ | `portolan dataset add` | Detect format → convert (GeoParquet/COG) → extract metadata → stage |
23
+ | `portolan dataset remove` | Remove datasets from catalog |
24
+ | `portolan dataset list/info` | Catalog exploration |
25
+ | Interactive + `--auto` | Works for humans and agents |
26
+
27
+ ### Epic: Cloud Sync
28
+
29
+ Push catalogs to object storage. Portolan owns the bucket contents.
30
+
31
+ | Capability | Description |
32
+ |------------|-------------|
33
+ | `portolan remote add/list` | Configure S3, GCS, Azure backends |
34
+ | `portolan sync` | Push `.portolan/` to remote |
35
+ | `versions.json` | Version history, checksums, sync state |
36
+
37
+ ### Epic: Validation & Repair
38
+
39
+ Ensure catalogs meet the Portolan spec. Detect and fix drift.
40
+
41
+ | Capability | Description |
42
+ |------------|-------------|
43
+ | `portolan check` | Validate local catalog against spec |
44
+ | `portolan check --remote` | Detect drift (external edits to bucket) |
45
+ | `portolan repair` | Re-sync remote from local truth |
46
+ | `portolan prune` | Clean up old versions |
47
+ | Actionable output | Specific guidance, not just pass/fail |
48
+
49
+ ### Epic: Styling & Thumbnails
50
+
51
+ Make datasets visually browsable.
52
+
53
+ | Capability | Description |
54
+ |------------|-------------|
55
+ | `style.json` | MapLibre-compatible style definitions |
56
+ | Thumbnail generation | Auto-render preview images |
57
+ | Smart defaults | Infer styles from data characteristics |
58
+
59
+ ### Epic: PMTiles Generation
60
+
61
+ Generate vector tile overviews from GeoParquet datasets using [gpio-pmtiles](https://github.com/geoparquet-io/gpio-pmtiles).
62
+
63
+ | Capability | Description |
64
+ |------------|-------------|
65
+ | PMTiles as derivative | Generated from GeoParquet for web display |
66
+ | Automatic on `dataset add` | Optional; controlled by flag or config |
67
+ | Stored alongside source | Part of the dataset, not a separate dataset |
68
+
69
+ **Note:** PMTiles are a *view* of the data for rendering, not the source of truth. GeoParquet remains the canonical format. (PMTiles *could* be added as standalone datasets, but the primary use case is as overviews.)
70
+
71
+ ### Epic: COPC Support
72
+
73
+ Cloud-optimized point clouds for LiDAR and similar data.
74
+
75
+ | Capability | Description |
76
+ |------------|-------------|
77
+ | COPC conversion | Convert point cloud formats to COPC |
78
+ | Metadata extraction | Bounds, point count, CRS |
79
+ | Styling conventions | Point cloud visualization defaults |
80
+
81
+ ### Epic: Python API
82
+
83
+ All functionality is implemented as a Python library; CLI wraps it.
84
+
85
+ | Capability | Description |
86
+ |------------|-------------|
87
+ | `Catalog` class | `init()`, `add()`, `sync()`, `check()` |
88
+ | Built simultaneously | API *is* the implementation; CLI is the interface |
89
+ | Agent-friendly | Clear errors, predictable outputs |
90
+ | `SKILLS.md` | LLM-optimized documentation |
91
+
92
+ ### Spec Evolution (Phase 1)
93
+
94
+ The [Portolan Spec](https://github.com/portolan-sdi/portolan-spec) develops in lockstep:
95
+
96
+ - Required metadata fields
97
+ - Catalog structure and naming
98
+ - Validation rules
99
+ - Remote structure and versioning
100
+ - PMTiles and COPC conventions
101
+
102
+ ---
103
+
104
+ ## Parallel: Iceberg Plugin
105
+
106
+ Tabular analytics on geospatial data. Developed by Javier alongside Phase 1.
107
+
108
+ | Capability | Description |
109
+ |------------|-------------|
110
+ | `portolan-iceberg` | Apache Iceberg tables alongside STAC |
111
+ | Query integration | SQL/DataFrame access to versioned data |
112
+
113
+ **Note:** Separate package, separate maintainer, but expected to land around the same time as Phase 1. STAC remains the catalog layer; Iceberg is the analytics layer.
114
+
115
+ ---
116
+
117
+ ## Phase 2: QGIS Plugin
118
+
119
+ Bring Portolan catalogs into desktop GIS workflows.
120
+
121
+ | Capability | Description |
122
+ |------------|-------------|
123
+ | Browse catalogs | Connect to Portolan remotes, explore datasets |
124
+ | Pull data | Load GeoParquet/COG into QGIS layers |
125
+ | Edit metadata | Update titles, descriptions, licenses |
126
+ | Spec validation | Check datasets from within QGIS |
127
+
128
+ **Dependency:** Phase 1 complete.
129
+
130
+ ---
131
+
132
+ ## Phase 3: Global Data Bootstrapper
133
+
134
+ Subset global datasets to bootstrap local catalogs.
135
+
136
+ | Capability | Description |
137
+ |------------|-------------|
138
+ | Source registry | Curated global datasets (Overture, ESA, etc.) |
139
+ | Region extraction | Clip to bounding box or admin boundary |
140
+ | One-command bootstrap | `portolan bootstrap --region "Nairobi"` |
141
+
142
+ **Dependency:** Phase 1 complete.
143
+
144
+ ---
145
+
146
+ ## TBD: Access Control & Visibility
147
+
148
+ Multi-tenant access control for teams sharing a Portolan catalog. Timing and scope to be determined.
149
+
150
+ ### Potential Capabilities
151
+
152
+ | Capability | Description |
153
+ |------------|-------------|
154
+ | Visibility metadata | Mark datasets as `public` or `private` with optional tenant assignment |
155
+ | User management | Create/list/remove users with credentials |
156
+ | Access policies | Grant/revoke user access to specific dataset paths |
157
+ | Policy enforcement | Integration with storage IAM (MinIO, S3, GCS) |
158
+
159
+ ### Open Questions
160
+
161
+ - **Where does auth live?** Portolan is "static files only" — user management likely requires a sidecar service or delegation to storage provider IAM
162
+ - **MinIO vs generic?** MinIO has rich policy APIs; S3/GCS use IAM. Do we abstract or specialize?
163
+ - **Scope boundary:** Portolan may just *tag* visibility; enforcement happens at the storage layer
164
+
165
+ ### Example Workflow (Conceptual)
166
+
167
+ ```bash
168
+ # Add a public dataset
169
+ portolan dataset add satellite.parquet --visibility public
170
+
171
+ # Add a private dataset for a tenant
172
+ portolan dataset add confidential.parquet --visibility private --tenant acme
173
+
174
+ # User management (if Portolan handles it)
175
+ portolan user add analyst
176
+ portolan access grant analyst acme/*
177
+ ```
178
+
179
+ **See also:** [ADR-0006 (Remote Ownership Model)](https://github.com/portolan-sdi/portolan-cli/blob/main/context/shared/adr/0006-remote-ownership-model.md) — explains why multi-user collaboration is complex under the current ownership model.
180
+
181
+ ---
182
+
183
+ ## TBD: Data Consumption & SQL Engines
184
+
185
+ Documentation and tooling for consuming Portolan catalogs from analytics engines. Timing to be determined.
186
+
187
+ ### Potential Capabilities
188
+
189
+ | Capability | Description |
190
+ |------------|-------------|
191
+ | Consumption guides | How to query Portolan catalogs from popular engines |
192
+ | Connection snippets | Copy-paste connection strings for each engine |
193
+ | `portolan connect` | Generate connection config for a specific engine |
194
+
195
+ ### Target Engines
196
+
197
+ | Engine | Protocol | Notes |
198
+ |--------|----------|-------|
199
+ | **DuckDB** | S3/HTTP | Native GeoParquet support |
200
+ | **Snowflake** | External tables | Via stage or external access |
201
+ | **BigQuery** | BigLake / external tables | GCS-native |
202
+ | **Databricks** | Unity Catalog / S3 | Delta Lake interop via Iceberg |
203
+ | **Trino/Presto** | Hive connector | S3-backed |
204
+ | **Oracle** | External tables | Via object storage |
205
+ | **Pandas/GeoPandas** | obstore / fsspec | Direct Python access |
206
+
207
+ ### Example: DuckDB
208
+
209
+ ```sql
210
+ -- Configure S3 access
211
+ SET s3_endpoint = 'storage.example.com';
212
+ SET s3_access_key_id = 'analyst';
213
+ SET s3_secret_access_key = '...';
214
+ SET s3_use_ssl = true;
215
+
216
+ -- Query a Portolan dataset directly
217
+ SELECT * FROM 's3://catalog/public/census/census.parquet';
218
+
219
+ -- Or with spatial filtering
220
+ SELECT * FROM 's3://catalog/public/census/census.parquet'
221
+ WHERE ST_Within(geometry, ST_GeomFromText('POLYGON((...))'));
222
+ ```
223
+
224
+ ### Why This Matters
225
+
226
+ Portolan's value is "publish once, consume anywhere." Without consumption docs, users publish data but don't know how to use it. This closes the loop.
227
+
228
+ ---
229
+
230
+ ## Out of Scope for v1.0
231
+
232
+ | Item | Reason |
233
+ |------|--------|
234
+ | 3D Tiles | Niche; can be community-contributed later |
235
+ | Browser/Map UI | May be unnecessary with agentic workflows; revisit post-v1 |
236
+
237
+ ---
238
+
239
+ ## Summary
240
+
241
+ | Phase | Scope | Timing |
242
+ |-------|-------|--------|
243
+ | **Phase 1** | Core CLI, Python API, Spec, PMTiles, COPC | Now |
244
+ | **Parallel** | Iceberg Plugin (Javier) | Alongside Phase 1 |
245
+ | **Phase 2** | QGIS Plugin | After Phase 1 |
246
+ | **Phase 3** | Global Bootstrapper | After Phase 1 |
247
+ | **TBD** | Access Control & Visibility | To be scoped |
248
+ | **TBD** | Data Consumption & SQL Engines | To be scoped |
249
+
250
+ ---
251
+
252
+ *Portolan is an open source project under [Radiant Earth](https://radiant.earth).*