portolan-cli 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.github/workflows/nightly.yml +2 -2
  2. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.github/workflows/release.yml +12 -5
  3. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.pre-commit-config.yaml +1 -0
  4. portolan_cli-0.1.3/CHANGELOG.md +19 -0
  5. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/CLAUDE.md +48 -31
  6. portolan_cli-0.1.3/PKG-INFO +130 -0
  7. portolan_cli-0.1.3/README.md +83 -0
  8. portolan_cli-0.1.3/ROADMAP.md +252 -0
  9. portolan_cli-0.1.3/context/architecture.md +183 -0
  10. portolan_cli-0.1.3/context/shared/adr/0003-plugin-architecture.md +83 -0
  11. portolan_cli-0.1.3/context/shared/adr/0004-iceberg-as-plugin.md +79 -0
  12. portolan_cli-0.1.3/context/shared/adr/0005-versions-json-source-of-truth.md +92 -0
  13. portolan_cli-0.1.3/context/shared/adr/0006-remote-ownership-model.md +71 -0
  14. portolan_cli-0.1.3/context/shared/adr/0007-cli-wraps-api.md +95 -0
  15. portolan_cli-0.1.3/context/shared/adr/0008-pipx-for-installation.md +110 -0
  16. portolan_cli-0.1.3/docs/BRANDING.md +109 -0
  17. portolan_cli-0.1.3/docs/assets/images/cover.png +0 -0
  18. portolan_cli-0.1.3/docs/assets/images/icon-white.svg +5 -0
  19. portolan_cli-0.1.3/docs/assets/images/icon.svg +11 -0
  20. portolan_cli-0.1.3/docs/assets/images/logo.png +0 -0
  21. portolan_cli-0.1.3/docs/assets/images/logo.svg +1 -0
  22. portolan_cli-0.1.3/docs/assets/images/social-card.png +0 -0
  23. portolan_cli-0.1.3/docs/assets/stylesheets/extra.css +192 -0
  24. portolan_cli-0.1.3/docs/changelog.md +1 -0
  25. portolan_cli-0.1.3/docs/index.md +170 -0
  26. portolan_cli-0.1.3/docs/roadmap.md +1 -0
  27. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/mkdocs.yml +27 -5
  28. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/pyproject.toml +2 -2
  29. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/uv.lock +960 -960
  30. portolan_cli-0.1.1/CHANGELOG.md +0 -6
  31. portolan_cli-0.1.1/PKG-INFO +0 -49
  32. portolan_cli-0.1.1/README.md +0 -2
  33. portolan_cli-0.1.1/context/architecture.md +0 -0
  34. portolan_cli-0.1.1/docs/changelog.md +0 -0
  35. portolan_cli-0.1.1/docs/index.md +0 -0
  36. portolan_cli-0.1.1/docs/roadmap.md +0 -30
  37. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.claude/hooks/post-bash-remind.sh +0 -0
  38. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.claude/hooks/pre-read-check.sh +0 -0
  39. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.claude/hooks/prompt-inject.sh +0 -0
  40. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.coderabbit.yaml +0 -0
  41. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.github/CODEOWNERS +0 -0
  42. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.github/dependabot.yml +0 -0
  43. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.github/pull_request_template.md +0 -0
  44. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.github/workflows/ci.yml +0 -0
  45. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.github/workflows/docs.yml +0 -0
  46. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.gitignore +0 -0
  47. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/.python-version +0 -0
  48. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/LICENSE +0 -0
  49. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/SECURITY.md +0 -0
  50. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/context/shared/adr/0000-template.md +0 -0
  51. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/context/shared/adr/0001-agentic-first-development.md +0 -0
  52. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/context/shared/adr/0002-click-for-cli.md +0 -0
  53. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/context/shared/documentation/ci.md +0 -0
  54. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/context/shared/documentation/distill-mcp.md +0 -0
  55. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/context/shared/known-issues/example.md +0 -0
  56. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/docs/contributing.md +0 -0
  57. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/portolan_cli/__init__.py +0 -0
  58. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/portolan_cli/cli.py +0 -0
  59. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/portolan_cli/output.py +0 -0
  60. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/tests/conftest.py +0 -0
  61. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/tests/specs/README.md +0 -0
  62. {portolan_cli-0.1.1 → portolan_cli-0.1.3}/tests/test_placeholder.py +0 -0
@@ -65,7 +65,7 @@ jobs:
65
65
  run: uv run mutmut html || true
66
66
 
67
67
  - name: Upload mutation report
68
- uses: actions/upload-artifact@v4
68
+ uses: actions/upload-artifact@v6
69
69
  with:
70
70
  name: mutation-report
71
71
  path: html/
@@ -113,7 +113,7 @@ jobs:
113
113
  fi
114
114
 
115
115
  - name: Upload benchmark results
116
- uses: actions/upload-artifact@v4
116
+ uses: actions/upload-artifact@v6
117
117
  with:
118
118
  name: benchmark-results
119
119
  path: benchmark-results.json
@@ -48,8 +48,11 @@ jobs:
48
48
  id: check
49
49
  run: |
50
50
  # Check if there are any commits that would trigger a version bump
51
- # Exit code 21 = NO_COMMITS_TO_BUMP (commits exist but none are feat/fix/breaking)
52
- # Exit code 3 = NO_COMMITS_FOUND (no commits at all since last tag)
51
+ # Commitizen exit codes:
52
+ # 0 = Success, release needed
53
+ # 3 = NO_COMMITS_FOUND (no commits at all since last tag)
54
+ # 16 = NO_PATTERN_MAP (no tag for changelog, but bump would succeed)
55
+ # 21 = NO_COMMITS_TO_BUMP (commits exist but none are feat/fix/breaking)
53
56
  # --yes flag auto-confirms "is this the first tag?" prompt in CI
54
57
  set +e
55
58
  output=$(uv run cz bump --dry-run --yes 2>&1)
@@ -58,9 +61,9 @@ jobs:
58
61
 
59
62
  echo "$output"
60
63
 
61
- if [ $exit_code -eq 0 ]; then
64
+ if [ $exit_code -eq 0 ] || [ $exit_code -eq 16 ]; then
62
65
  echo "release_needed=true" >> $GITHUB_OUTPUT
63
- echo "✓ Release-worthy commits found"
66
+ echo "✓ Release-worthy commits found (exit code: $exit_code)"
64
67
  elif [ $exit_code -eq 21 ]; then
65
68
  echo "release_needed=false" >> $GITHUB_OUTPUT
66
69
  echo "✓ No release needed (commits are docs/refactor/test/chore only)"
@@ -91,11 +94,15 @@ jobs:
91
94
  - name: Create GitHub Release
92
95
  if: steps.check.outputs.release_needed == 'true'
93
96
  run: |
94
- # Get the latest tag
97
+ # Get the latest tag and its commit SHA
95
98
  TAG=$(git describe --tags --abbrev=0)
99
+ TAG_SHA=$(git rev-list -n 1 "$TAG")
96
100
  # Create a GitHub release from the tag
101
+ # Use --target to specify the tagged commit SHA, which works even if
102
+ # the tag push hasn't fully propagated to GitHub yet
97
103
  gh release create "$TAG" \
98
104
  --title "$TAG" \
105
+ --target "$TAG_SHA" \
99
106
  --notes "See [CHANGELOG](https://github.com/${{ github.repository }}/blob/main/CHANGELOG.md) for details." \
100
107
  dist/*
101
108
  env:
@@ -10,6 +10,7 @@ repos:
10
10
  - id: trailing-whitespace
11
11
  - id: end-of-file-fixer
12
12
  - id: check-yaml
13
+ args: ['--unsafe'] # Allow Python tags in mkdocs.yml
13
14
  - id: check-added-large-files
14
15
  args: ['--maxkb=500']
15
16
  - id: check-merge-conflict
@@ -0,0 +1,19 @@
1
+ ## v0.1.3 (2026-02-05)
2
+
3
+ ### Fix
4
+
5
+ - **docs**: use absolute GitHub URL for ADR link in roadmap
6
+
7
+ ## v0.1.2 (2026-02-04)
8
+
9
+ ### Fix
10
+
11
+ - **ci**: handle commitizen exit code 16 (NO_PATTERN_MAP)
12
+ - **ci**: use commit SHA for GitHub release target
13
+
14
+ ## v0.1.1 (2026-02-04)
15
+
16
+ ### Fix
17
+
18
+ - **ci**: add --yes flag to commitizen dry-run for first tag
19
+ - **ci**: handle commitizen NO_COMMITS_TO_BUMP exit code gracefully
@@ -1,5 +1,22 @@
1
1
  # Portolan CLI - Development Guide
2
2
 
3
+ ## What is Portolan?
4
+
5
+ Portolan is a CLI for publishing and managing **cloud-native geospatial data catalogs**. It orchestrates format conversion (GeoParquet, COG), versioning, and sync to object storage (S3, GCS, Azure)—no running servers, just static files.
6
+
7
+ **Key concepts:**
8
+ - **STAC** (SpatioTemporal Asset Catalog) — The catalog metadata format
9
+ - **GeoParquet** — Cloud-optimized vector data (columnar, spatial indexing)
10
+ - **COG** (Cloud-Optimized GeoTIFF) — Cloud-optimized raster data (HTTP range requests)
11
+ - **versions.json** — Single source of truth for version history, sync state, and checksums
12
+
13
+ Portolan doesn't do the heavy lifting—it orchestrates libraries like `geoparquet-io` and `rio-cogeo`.
14
+
15
+ **Key dependencies (check these repos for API docs):**
16
+ - [geoparquet-io](https://github.com/geoparquet/geoparquet-io) — Vector format conversion
17
+ - [gpio-pmtiles](https://github.com/geoparquet-io/gpio-pmtiles) — PMTiles generation from GeoParquet
18
+ - [rio-cogeo](https://github.com/cogeotiff/rio-cogeo) — Raster conversion to COG
19
+
3
20
  ## Guiding Principle
4
21
 
5
22
  AI agents will write most of the code. Human review does not scale to match AI output volume. Therefore: every quality gate must be automated, every convention must be enforceable, and tests must be verified to actually test something.
@@ -8,7 +25,9 @@ AI agents will write most of the code. Human review does not scale to match AI o
8
25
 
9
26
  | Resource | Location |
10
27
  |----------|----------|
28
+ | **Roadmap** | `ROADMAP.md` |
11
29
  | Contributing guide | `docs/contributing.md` |
30
+ | Architecture | `context/architecture.md` |
12
31
  | CI/CD documentation | `context/shared/documentation/ci.md` |
13
32
  | Distill MCP tools | `context/shared/documentation/distill-mcp.md` |
14
33
  | ADRs | `context/shared/adr/` |
@@ -16,6 +35,19 @@ AI agents will write most of the code. Human review does not scale to match AI o
16
35
 
17
36
  **Target Python version:** 3.10+ (matches geoparquet-io dependency)
18
37
 
38
+ ### ADR Index
39
+
40
+ | ADR | Decision |
41
+ |-----|----------|
42
+ | [0001](context/shared/adr/0001-agentic-first-development.md) | Agentic-first: automate all quality gates, TDD mandatory |
43
+ | [0002](context/shared/adr/0002-click-for-cli.md) | Click for CLI framework |
44
+ | [0003](context/shared/adr/0003-plugin-architecture.md) | Plugin architecture for formats (GeoParquet/COG core, others optional) |
45
+ | [0004](context/shared/adr/0004-iceberg-as-plugin.md) | Iceberg as plugin, STAC remains catalog layer |
46
+ | [0005](context/shared/adr/0005-versions-json-source-of-truth.md) | versions.json as single source of truth |
47
+ | [0006](context/shared/adr/0006-remote-ownership-model.md) | Portolan owns bucket contents (no external edits) |
48
+ | [0007](context/shared/adr/0007-cli-wraps-api.md) | CLI wraps Python API (all logic in library layer) |
49
+ | [0008](context/shared/adr/0008-pipx-for-installation.md) | pipx for global installation, uv for development |
50
+
19
51
  ## Common Commands
20
52
 
21
53
  ```bash
@@ -125,11 +157,7 @@ Store small, representative data files in `tests/fixtures/`. Fixtures should be:
125
157
 
126
158
  ### Pre-commit Hooks
127
159
 
128
- Pre-commit blocks on ALL checks. Install with `uv run pre-commit install`.
129
-
130
- Hooks run: trailing-whitespace, end-of-file-fixer, check-yaml, check-toml, check-merge-conflict, mixed-line-ending, check-added-large-files, ruff (fix + format), vulture, xenon, mypy, fast unit tests, commitizen (commit-msg).
131
-
132
- If a hook fails, fix the issue before committing. No `--no-verify`.
160
+ Install: `uv run pre-commit install`. All hooks block—no `--no-verify`. See `.pre-commit-config.yaml` for full list.
133
161
 
134
162
  ## Code Quality
135
163
 
@@ -202,13 +230,6 @@ Releases are automated via commitizen on push to main. See `.github/workflows/re
202
230
  | API contracts | Docstrings | All public functions/classes |
203
231
  | Gotchas/quirks | CLAUDE.md or inline | Anything that surprised you |
204
232
 
205
- ### Why This Matters
206
-
207
- - **AI agents start fresh each session** — They don't remember past conversations
208
- - **Context files are their memory** — ADRs, known-issues, and CLAUDE.md persist knowledge
209
- - **Documentation compounds** — Each documented decision helps all future sessions
210
- - **Undocumented knowledge is lost** — If it's not written down, it doesn't exist for agents
211
-
212
233
  ### ADR Guidelines
213
234
 
214
235
  Create an ADR (`context/shared/adr/NNNN-title.md`) when:
@@ -222,25 +243,10 @@ Use the template at `context/shared/adr/0000-template.md`.
222
243
 
223
244
  ### Two Documentation Audiences
224
245
 
225
- | Audience | Location | Optimized For |
226
- |----------|----------|---------------|
227
- | **Humans** | `docs/` (mkdocs) | Readability, navigation, tutorials, visual presentation |
228
- | **AI agents** | Docstrings, CLAUDE.md, ADRs, inline comments | Context windows, searchability, co-location with code |
229
-
230
- **Human docs (`docs/`):**
231
- - Rendered website via mkdocs
232
- - Prose-heavy with examples and screenshots
233
- - Organized by user journey (getting started → advanced topics)
234
- - Can be verbose — humans skim and navigate
235
-
236
- **AI docs (in-repo):**
237
- - Docstrings: Complete API contracts (args, returns, raises, examples)
238
- - CLAUDE.md: Development patterns, commands, gotchas
239
- - ADRs: Decision rationale with alternatives considered
240
- - Inline comments: Non-obvious code behavior
241
- - Dense and structured (tables, bullet lists) — agents parse linearly
242
-
243
- **Key difference:** Human docs explain *how to use* the tool. AI docs explain *how to modify* the codebase.
246
+ | Audience | Location | Purpose |
247
+ |----------|----------|---------|
248
+ | **Humans** | `docs/` (mkdocs) | *How to use* — tutorials, visual guides |
249
+ | **AI agents** | Docstrings, CLAUDE.md, ADRs | *How to modify* — dense, structured, co-located with code |
244
250
 
245
251
  ## Standardized Terminal Output
246
252
 
@@ -256,6 +262,17 @@ error("No geometry column (required)") # ✗ Red X
256
262
  detail("Processing chunk 3/10...") # Dimmed text
257
263
  ```
258
264
 
265
+ ## Design Principles
266
+
267
+ | Principle | Meaning | ADR |
268
+ |-----------|---------|-----|
269
+ | **Don't duplicate** | Orchestrate libraries (geoparquet-io, rio-cogeo), never reimplement | — |
270
+ | **YAGNI** | No speculative features; complexity is expensive | — |
271
+ | **Interactive + automatable** | Every prompt has `--auto` fallback | — |
272
+ | **versions.json is truth** | Drives sync, validation, history | [ADR-0005](context/shared/adr/0005-versions-json-source-of-truth.md) |
273
+ | **Plugin interface early** | Handlers follow consistent interface for future plugins | [ADR-0003](context/shared/adr/0003-plugin-architecture.md) |
274
+ | **CLI wraps API** | All logic in library; CLI is thin Click layer | [ADR-0007](context/shared/adr/0007-cli-wraps-api.md) |
275
+
259
276
  ## Tool Usage
260
277
 
261
278
  | Tool | Purpose | Documentation |
@@ -0,0 +1,130 @@
1
+ Metadata-Version: 2.4
2
+ Name: portolan-cli
3
+ Version: 0.1.3
4
+ Summary: A CLI tool for managing cloud-native geospatial data
5
+ Project-URL: Homepage, https://github.com/portolan-sdi/portolan-cli
6
+ Project-URL: Bug Tracker, https://github.com/portolan-sdi/portolan-cli/issues
7
+ Project-URL: Documentation, https://github.com/portolan-sdi/portolan-cli#readme
8
+ Project-URL: Source, https://github.com/portolan-sdi/portolan-cli
9
+ Author-email: Nissim Lebovits <nlebovits@pm.me>
10
+ License-Expression: Apache-2.0
11
+ License-File: LICENSE
12
+ Keywords: cloud-native,geospatial,gis,io
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: Apache Software License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: GIS
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: click>=8.3.1
25
+ Provides-Extra: dev
26
+ Requires-Dist: bandit>=1.9.3; extra == 'dev'
27
+ Requires-Dist: codespell>=2.4.1; extra == 'dev'
28
+ Requires-Dist: commitizen>=4.13.4; extra == 'dev'
29
+ Requires-Dist: hypothesis>=6.151.5; extra == 'dev'
30
+ Requires-Dist: mutmut>=3.4.0; extra == 'dev'
31
+ Requires-Dist: mypy>=1.19.1; extra == 'dev'
32
+ Requires-Dist: pip-audit>=2.10.0; extra == 'dev'
33
+ Requires-Dist: pre-commit>=4.5.1; extra == 'dev'
34
+ Requires-Dist: pytest-benchmark>=5.0.0; extra == 'dev'
35
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
36
+ Requires-Dist: pytest-timeout>=2.3.1; extra == 'dev'
37
+ Requires-Dist: pytest>=9.0.2; extra == 'dev'
38
+ Requires-Dist: radon>=6.0.1; extra == 'dev'
39
+ Requires-Dist: ruff>=0.14.11; extra == 'dev'
40
+ Requires-Dist: vulture>=2.14; extra == 'dev'
41
+ Requires-Dist: xenon>=0.9.3; extra == 'dev'
42
+ Provides-Extra: docs
43
+ Requires-Dist: mkdocs-material>=9.7.1; extra == 'docs'
44
+ Requires-Dist: mkdocs>=1.6.1; extra == 'docs'
45
+ Requires-Dist: mkdocstrings[python]>=1.0.0; extra == 'docs'
46
+ Description-Content-Type: text/markdown
47
+
48
+ <div align="center">
49
+ <img src="docs/assets/images/logo.svg" alt="Portolan Logo" width="200"/>
50
+ <h1>Portolan CLI</h1>
51
+ <p><strong>Cloud-native geospatial data catalogs, simplified</strong></p>
52
+ </div>
53
+
54
+ ---
55
+
56
+ A CLI for publishing and managing **cloud-native geospatial data catalogs**. Portolan orchestrates format conversion (GeoParquet, COG), versioning, and sync to object storage—no running servers, just static files.
57
+
58
+ ## Why Portolan?
59
+
60
+ | Benefit | How |
61
+ |---------|-----|
62
+ | **Scalable** | Cloud object storage that scales to petabytes |
63
+ | **Open** | 100% open source, open formats (GeoParquet, COG, STAC, Iceberg) |
64
+ | **AI-Ready** | STAC metadata enables semantic search and LLM integration |
65
+ | **Cheap** | Pay only for storage + egress — no servers to run |
66
+ | **Sovereign** | Host anywhere (AWS, GCS, Azure, MinIO, Cloudflare R2) |
67
+ | **Breaks the GIS silo** | Query with DuckDB, Snowflake, BigQuery, Databricks, Pandas — not just GIS tools |
68
+
69
+ ## What It Does
70
+
71
+ - **Convert** vector/raster data to cloud-native formats (GeoParquet, COG)
72
+ - **Generate** STAC catalogs with rich metadata, thumbnails, and MapLibre styles
73
+ - **Version** datasets with checksums and history tracking
74
+ - **Sync** to S3, GCS, Azure, or any S3-compatible storage
75
+
76
+ ## Quick Example
77
+
78
+ ```bash
79
+ portolan init
80
+ portolan dataset add census.parquet --title "Census 2022" --auto
81
+ portolan remote add prod s3://my-bucket/catalog
82
+ portolan sync
83
+ ```
84
+
85
+ ## Installation
86
+
87
+ ### Recommended: pipx (for global use)
88
+
89
+ ```bash
90
+ pipx install portolan-cli
91
+ ```
92
+
93
+ This installs `portolan` in an isolated environment while making the command globally available.
94
+
95
+ If you don't have pipx installed:
96
+ ```bash
97
+ python3 -m pip install --user pipx
98
+ python3 -m pipx ensurepath
99
+ ```
100
+
101
+ ### Alternative: pip
102
+
103
+ ```bash
104
+ pip install portolan-cli
105
+ ```
106
+
107
+ **Note:** This installs into your global or user site-packages and may conflict with other packages.
108
+
109
+ ### For Development
110
+
111
+ Use [uv](https://github.com/astral-sh/uv) for local development:
112
+
113
+ ```bash
114
+ git clone https://github.com/portolan-sdi/portolan-cli.git
115
+ cd portolan-cli
116
+ uv sync --all-extras
117
+ uv run portolan --help
118
+ ```
119
+
120
+ See [Contributing Guide](docs/contributing.md) for full development setup.
121
+
122
+ ## Documentation
123
+
124
+ - [Contributing Guide](docs/contributing.md)
125
+ - [Architecture](context/architecture.md)
126
+ - [Roadmap](ROADMAP.md)
127
+
128
+ ## License
129
+
130
+ Apache 2.0 — see [LICENSE](LICENSE)
@@ -0,0 +1,83 @@
1
+ <div align="center">
2
+ <img src="docs/assets/images/logo.svg" alt="Portolan Logo" width="200"/>
3
+ <h1>Portolan CLI</h1>
4
+ <p><strong>Cloud-native geospatial data catalogs, simplified</strong></p>
5
+ </div>
6
+
7
+ ---
8
+
9
+ A CLI for publishing and managing **cloud-native geospatial data catalogs**. Portolan orchestrates format conversion (GeoParquet, COG), versioning, and sync to object storage—no running servers, just static files.
10
+
11
+ ## Why Portolan?
12
+
13
+ | Benefit | How |
14
+ |---------|-----|
15
+ | **Scalable** | Cloud object storage that scales to petabytes |
16
+ | **Open** | 100% open source, open formats (GeoParquet, COG, STAC, Iceberg) |
17
+ | **AI-Ready** | STAC metadata enables semantic search and LLM integration |
18
+ | **Cheap** | Pay only for storage + egress — no servers to run |
19
+ | **Sovereign** | Host anywhere (AWS, GCS, Azure, MinIO, Cloudflare R2) |
20
+ | **Breaks the GIS silo** | Query with DuckDB, Snowflake, BigQuery, Databricks, Pandas — not just GIS tools |
21
+
22
+ ## What It Does
23
+
24
+ - **Convert** vector/raster data to cloud-native formats (GeoParquet, COG)
25
+ - **Generate** STAC catalogs with rich metadata, thumbnails, and MapLibre styles
26
+ - **Version** datasets with checksums and history tracking
27
+ - **Sync** to S3, GCS, Azure, or any S3-compatible storage
28
+
29
+ ## Quick Example
30
+
31
+ ```bash
32
+ portolan init
33
+ portolan dataset add census.parquet --title "Census 2022" --auto
34
+ portolan remote add prod s3://my-bucket/catalog
35
+ portolan sync
36
+ ```
37
+
38
+ ## Installation
39
+
40
+ ### Recommended: pipx (for global use)
41
+
42
+ ```bash
43
+ pipx install portolan-cli
44
+ ```
45
+
46
+ This installs `portolan` in an isolated environment while making the command globally available.
47
+
48
+ If you don't have pipx installed:
49
+ ```bash
50
+ python3 -m pip install --user pipx
51
+ python3 -m pipx ensurepath
52
+ ```
53
+
54
+ ### Alternative: pip
55
+
56
+ ```bash
57
+ pip install portolan-cli
58
+ ```
59
+
60
+ **Note:** This installs into your global or user site-packages and may conflict with other packages.
61
+
62
+ ### For Development
63
+
64
+ Use [uv](https://github.com/astral-sh/uv) for local development:
65
+
66
+ ```bash
67
+ git clone https://github.com/portolan-sdi/portolan-cli.git
68
+ cd portolan-cli
69
+ uv sync --all-extras
70
+ uv run portolan --help
71
+ ```
72
+
73
+ See [Contributing Guide](docs/contributing.md) for full development setup.
74
+
75
+ ## Documentation
76
+
77
+ - [Contributing Guide](docs/contributing.md)
78
+ - [Architecture](context/architecture.md)
79
+ - [Roadmap](ROADMAP.md)
80
+
81
+ ## License
82
+
83
+ Apache 2.0 — see [LICENSE](LICENSE)
@@ -0,0 +1,252 @@
1
+ # Portolan Ecosystem Roadmap
2
+
3
+ ## Vision
4
+
5
+ Portolan makes it easy to publish and consume cloud-native geospatial data. The ecosystem includes a spec, CLI, format plugins, a QGIS plugin, and a global data bootstrapper—each designed to work standalone or together.
6
+
7
+ Development is **spec-driven but implementation-informed**: the [Portolan Spec](https://github.com/portolan-sdi/portolan-spec) evolves alongside the CLI.
8
+
9
+ ---
10
+
11
+ ## Phase 1: Core CLI + Spec
12
+
13
+ The foundation. A complete, working CLI with Python API underneath.
14
+
15
+ ### Epic: Dataset Lifecycle
16
+
17
+ Convert files to cloud-native formats, manage metadata, organize into a local catalog. Vector conversion uses [geoparquet-io](https://github.com/geoparquet/geoparquet-io).
18
+
19
+ | Capability | Description |
20
+ |------------|-------------|
21
+ | `portolan init` | Create `.portolan/` catalog structure |
22
+ | `portolan dataset add` | Detect format → convert (GeoParquet/COG) → extract metadata → stage |
23
+ | `portolan dataset remove` | Remove datasets from catalog |
24
+ | `portolan dataset list/info` | Catalog exploration |
25
+ | Interactive + `--auto` | Works for humans and agents |
26
+
27
+ ### Epic: Cloud Sync
28
+
29
+ Push catalogs to object storage. Portolan owns the bucket contents.
30
+
31
+ | Capability | Description |
32
+ |------------|-------------|
33
+ | `portolan remote add/list` | Configure S3, GCS, Azure backends |
34
+ | `portolan sync` | Push `.portolan/` to remote |
35
+ | `versions.json` | Version history, checksums, sync state |
36
+
37
+ ### Epic: Validation & Repair
38
+
39
+ Ensure catalogs meet the Portolan spec. Detect and fix drift.
40
+
41
+ | Capability | Description |
42
+ |------------|-------------|
43
+ | `portolan check` | Validate local catalog against spec |
44
+ | `portolan check --remote` | Detect drift (external edits to bucket) |
45
+ | `portolan repair` | Re-sync remote from local truth |
46
+ | `portolan prune` | Clean up old versions |
47
+ | Actionable output | Specific guidance, not just pass/fail |
48
+
49
+ ### Epic: Styling & Thumbnails
50
+
51
+ Make datasets visually browsable.
52
+
53
+ | Capability | Description |
54
+ |------------|-------------|
55
+ | `style.json` | MapLibre-compatible style definitions |
56
+ | Thumbnail generation | Auto-render preview images |
57
+ | Smart defaults | Infer styles from data characteristics |
58
+
59
+ ### Epic: PMTiles Generation
60
+
61
+ Generate vector tile overviews from GeoParquet datasets using [gpio-pmtiles](https://github.com/geoparquet-io/gpio-pmtiles).
62
+
63
+ | Capability | Description |
64
+ |------------|-------------|
65
+ | PMTiles as derivative | Generated from GeoParquet for web display |
66
+ | Automatic on `dataset add` | Optional; controlled by flag or config |
67
+ | Stored alongside source | Part of the dataset, not a separate dataset |
68
+
69
+ **Note:** PMTiles are a *view* of the data for rendering, not the source of truth. GeoParquet remains the canonical format. (PMTiles *could* be added as standalone datasets, but the primary use case is as overviews.)
70
+
71
+ ### Epic: COPC Support
72
+
73
+ Cloud-optimized point clouds for LiDAR and similar data.
74
+
75
+ | Capability | Description |
76
+ |------------|-------------|
77
+ | COPC conversion | Convert point cloud formats to COPC |
78
+ | Metadata extraction | Bounds, point count, CRS |
79
+ | Styling conventions | Point cloud visualization defaults |
80
+
81
+ ### Epic: Python API
82
+
83
+ All functionality is implemented as a Python library; CLI wraps it.
84
+
85
+ | Capability | Description |
86
+ |------------|-------------|
87
+ | `Catalog` class | `init()`, `add()`, `sync()`, `check()` |
88
+ | Built simultaneously | API *is* the implementation; CLI is the interface |
89
+ | Agent-friendly | Clear errors, predictable outputs |
90
+ | `SKILLS.md` | LLM-optimized documentation |
91
+
92
+ ### Spec Evolution (Phase 1)
93
+
94
+ The [Portolan Spec](https://github.com/portolan-sdi/portolan-spec) develops in lockstep:
95
+
96
+ - Required metadata fields
97
+ - Catalog structure and naming
98
+ - Validation rules
99
+ - Remote structure and versioning
100
+ - PMTiles and COPC conventions
101
+
102
+ ---
103
+
104
+ ## Parallel: Iceberg Plugin
105
+
106
+ Tabular analytics on geospatial data. Developed by Javier alongside Phase 1.
107
+
108
+ | Capability | Description |
109
+ |------------|-------------|
110
+ | `portolan-iceberg` | Apache Iceberg tables alongside STAC |
111
+ | Query integration | SQL/DataFrame access to versioned data |
112
+
113
+ **Note:** Separate package, separate maintainer, but expected to land around the same time as Phase 1. STAC remains the catalog layer; Iceberg is the analytics layer.
114
+
115
+ ---
116
+
117
+ ## Phase 2: QGIS Plugin
118
+
119
+ Bring Portolan catalogs into desktop GIS workflows.
120
+
121
+ | Capability | Description |
122
+ |------------|-------------|
123
+ | Browse catalogs | Connect to Portolan remotes, explore datasets |
124
+ | Pull data | Load GeoParquet/COG into QGIS layers |
125
+ | Edit metadata | Update titles, descriptions, licenses |
126
+ | Spec validation | Check datasets from within QGIS |
127
+
128
+ **Dependency:** Phase 1 complete.
129
+
130
+ ---
131
+
132
+ ## Phase 3: Global Data Bootstrapper
133
+
134
+ Subset global datasets to bootstrap local catalogs.
135
+
136
+ | Capability | Description |
137
+ |------------|-------------|
138
+ | Source registry | Curated global datasets (Overture, ESA, etc.) |
139
+ | Region extraction | Clip to bounding box or admin boundary |
140
+ | One-command bootstrap | `portolan bootstrap --region "Nairobi"` |
141
+
142
+ **Dependency:** Phase 1 complete.
143
+
144
+ ---
145
+
146
+ ## TBD: Access Control & Visibility
147
+
148
+ Multi-tenant access control for teams sharing a Portolan catalog. Timing and scope to be determined.
149
+
150
+ ### Potential Capabilities
151
+
152
+ | Capability | Description |
153
+ |------------|-------------|
154
+ | Visibility metadata | Mark datasets as `public` or `private` with optional tenant assignment |
155
+ | User management | Create/list/remove users with credentials |
156
+ | Access policies | Grant/revoke user access to specific dataset paths |
157
+ | Policy enforcement | Integration with storage IAM (MinIO, S3, GCS) |
158
+
159
+ ### Open Questions
160
+
161
+ - **Where does auth live?** Portolan is "static files only" — user management likely requires a sidecar service or delegation to storage provider IAM
162
+ - **MinIO vs generic?** MinIO has rich policy APIs; S3/GCS use IAM. Do we abstract or specialize?
163
+ - **Scope boundary:** Portolan may just *tag* visibility; enforcement happens at the storage layer
164
+
165
+ ### Example Workflow (Conceptual)
166
+
167
+ ```bash
168
+ # Add a public dataset
169
+ portolan dataset add satellite.parquet --visibility public
170
+
171
+ # Add a private dataset for a tenant
172
+ portolan dataset add confidential.parquet --visibility private --tenant acme
173
+
174
+ # User management (if Portolan handles it)
175
+ portolan user add analyst
176
+ portolan access grant analyst acme/*
177
+ ```
178
+
179
+ **See also:** [ADR-0006 (Remote Ownership Model)](https://github.com/portolan-sdi/portolan-cli/blob/main/context/shared/adr/0006-remote-ownership-model.md) — explains why multi-user collaboration is complex under the current ownership model.
180
+
181
+ ---
182
+
183
+ ## TBD: Data Consumption & SQL Engines
184
+
185
+ Documentation and tooling for consuming Portolan catalogs from analytics engines. Timing to be determined.
186
+
187
+ ### Potential Capabilities
188
+
189
+ | Capability | Description |
190
+ |------------|-------------|
191
+ | Consumption guides | How to query Portolan catalogs from popular engines |
192
+ | Connection snippets | Copy-paste connection strings for each engine |
193
+ | `portolan connect` | Generate connection config for a specific engine |
194
+
195
+ ### Target Engines
196
+
197
+ | Engine | Protocol | Notes |
198
+ |--------|----------|-------|
199
+ | **DuckDB** | S3/HTTP | Native GeoParquet support |
200
+ | **Snowflake** | External tables | Via stage or external access |
201
+ | **BigQuery** | BigLake / external tables | GCS-native |
202
+ | **Databricks** | Unity Catalog / S3 | Delta Lake interop via Iceberg |
203
+ | **Trino/Presto** | Hive connector | S3-backed |
204
+ | **Oracle** | External tables | Via object storage |
205
+ | **Pandas/GeoPandas** | obstore / fsspec | Direct Python access |
206
+
207
+ ### Example: DuckDB
208
+
209
+ ```sql
210
+ -- Configure S3 access
211
+ SET s3_endpoint = 'storage.example.com';
212
+ SET s3_access_key_id = 'analyst';
213
+ SET s3_secret_access_key = '...';
214
+ SET s3_use_ssl = true;
215
+
216
+ -- Query a Portolan dataset directly
217
+ SELECT * FROM 's3://catalog/public/census/census.parquet';
218
+
219
+ -- Or with spatial filtering
220
+ SELECT * FROM 's3://catalog/public/census/census.parquet'
221
+ WHERE ST_Within(geometry, ST_GeomFromText('POLYGON((...))'));
222
+ ```
223
+
224
+ ### Why This Matters
225
+
226
+ Portolan's value is "publish once, consume anywhere." Without consumption docs, users publish data but don't know how to use it. This closes the loop.
227
+
228
+ ---
229
+
230
+ ## Out of Scope for v1.0
231
+
232
+ | Item | Reason |
233
+ |------|--------|
234
+ | 3D Tiles | Niche; can be community-contributed later |
235
+ | Browser/Map UI | May be unnecessary with agentic workflows; revisit post-v1 |
236
+
237
+ ---
238
+
239
+ ## Summary
240
+
241
+ | Phase | Scope | Timing |
242
+ |-------|-------|--------|
243
+ | **Phase 1** | Core CLI, Python API, Spec, PMTiles, COPC | Now |
244
+ | **Parallel** | Iceberg Plugin (Javier) | Alongside Phase 1 |
245
+ | **Phase 2** | QGIS Plugin | After Phase 1 |
246
+ | **Phase 3** | Global Bootstrapper | After Phase 1 |
247
+ | **TBD** | Access Control & Visibility | To be scoped |
248
+ | **TBD** | Data Consumption & SQL Engines | To be scoped |
249
+
250
+ ---
251
+
252
+ *Portolan is an open source project under [Radiant Earth](https://radiant.earth).*