mapular-geo-mcp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ data/
2
+ .env
3
+ __pycache__/
4
+ *.pyc
5
+ .venv/
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: mapular-geo-mcp
3
+ Version: 0.1.0
4
+ Summary: Geospatial analysis MCP server powered by DuckDB spatial + H3
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: duckdb>=0.10.0
7
+ Requires-Dist: mcp[cli]>=1.0.0
8
+ Requires-Dist: python-dotenv>=1.0.0
@@ -0,0 +1,102 @@
1
+ # mapular-geo-mcp
2
+
3
+ Spatial SQL analysis for AI agents — DuckDB with spatial + H3 extensions.
4
+
5
+ ## Quick start
6
+
7
+ ```bash
8
+ uvx mapular-geo-mcp
9
+ ```
10
+
11
+ No credentials needed. Run SQL queries over local geospatial files (Parquet, CSV, GeoJSON) with full spatial and H3 support.
12
+
13
+ ## Claude Code setup
14
+
15
+ Add to your `.mcp.json`:
16
+
17
+ ```json
18
+ {
19
+ "mcpServers": {
20
+ "mapular-geo-mcp": {
21
+ "command": "uvx",
22
+ "args": ["mapular-geo-mcp"]
23
+ }
24
+ }
25
+ }
26
+ ```
27
+
28
+ ## Tools
29
+
30
+ | Tool | Description |
31
+ |------|-------------|
32
+ | `query` | Run DuckDB SQL with spatial + H3 extensions; returns file path + preview rows |
33
+ | `export` | Export SQL results to Parquet, CSV, or GeoJSON |
34
+ | `geocode` | Convert a free-form address to lat/lon coordinates |
35
+
36
+ **Typical workflow:** `geocode` an address -> `query` nearby POIs with ST_DWithin -> `export` results to GeoJSON.
37
+
38
+ ## Usage examples
39
+
40
+ ### Basic SQL query on a local file
41
+
42
+ ```
43
+ query(sql="SELECT * FROM read_parquet('/tmp/pois.parquet') WHERE category = 'restaurant' LIMIT 10")
44
+ ```
45
+
46
+ ### Spatial proximity query (ST_DWithin)
47
+
48
+ ```
49
+ query(sql="""
50
+ SELECT name, category, latitude, longitude
51
+ FROM read_parquet('/tmp/pois.parquet')
52
+ WHERE ST_DWithin(
53
+ ST_Point(longitude, latitude),
54
+ ST_Point(13.377, 52.516), -- Brandenburg Gate
55
+ 0.01 -- ~1 km in degrees
56
+ )
57
+ """)
58
+ ```
59
+
60
+ ### H3 spatial indexing
61
+
62
+ ```
63
+ query(sql="""
64
+ SELECT h3_latlng_to_cell(latitude, longitude, 7) AS h3_index,
65
+ COUNT(*) AS poi_count
66
+ FROM read_parquet('/tmp/pois.parquet')
67
+ GROUP BY 1
68
+ ORDER BY poi_count DESC
69
+ LIMIT 10
70
+ """)
71
+ ```
72
+
73
+ ### Export to GeoJSON
74
+
75
+ ```
76
+ export(
77
+ sql="SELECT * FROM read_parquet('/tmp/pois.parquet') LIMIT 100",
78
+ format="geojson"
79
+ )
80
+ ```
81
+
82
+ ### Geocode an address
83
+
84
+ ```
85
+ geocode(address="Brandenburg Gate, Berlin")
86
+ # -> {"lat": 52.5163, "lon": 13.3777, "display_name": "...", "confidence": "high", "provider": "nominatim"}
87
+ ```
88
+
89
+ ## File path restrictions
90
+
91
+ The `query` tool restricts `read_parquet()`, `read_csv_auto()`, `read_json()`, and `st_read()` calls to these directories:
92
+
93
+ - System temp directory (`/tmp` or `$TMPDIR`) — where `mpoi_fetch` writes its output
94
+ - `~/Downloads`
95
+
96
+ To allow an additional directory, set `MAPULAR_GEO_DATA_DIR=/path/to/data` in the server's environment.
97
+
98
+ ## Companion servers
99
+
100
+ - **mapular-mpoi-mcp** — Pre-indexed POI data (Overture/OSM). Use `mpoi_fetch` to download a Parquet file, then analyze it here with `query`.
101
+ - **mapular-mapbook-mcp** — Map visualization (future).
102
+
@@ -0,0 +1,23 @@
1
+ [project]
2
+ name = "mapular-geo-mcp"
3
+ version = "0.1.0"
4
+ description = "Geospatial analysis MCP server powered by DuckDB spatial + H3"
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "mcp[cli]>=1.0.0",
8
+ "duckdb>=0.10.0",
9
+ "python-dotenv>=1.0.0",
10
+ ]
11
+
12
+ [project.scripts]
13
+ mapular-geo-mcp = "mapular_geo_mcp.server:main"
14
+
15
+ [build-system]
16
+ requires = ["hatchling"]
17
+ build-backend = "hatchling.build"
18
+
19
+ [tool.hatch.build.targets.wheel]
20
+ packages = ["src/mapular_geo_mcp"]
21
+
22
+ [dependency-groups]
23
+ dev = ["pytest"]
@@ -0,0 +1 @@
1
+ """mapular-geo-mcp: Geospatial analysis MCP server powered by DuckDB spatial + H3."""
@@ -0,0 +1,166 @@
1
+ """Agent-friendly error classification for DuckDB queries.
2
+
3
+ Wraps raw DuckDB exceptions into structured, actionable error dicts
4
+ so the LLM agent can understand what went wrong and how to fix it.
5
+ """
6
+
7
+ import re
8
+
9
+ import duckdb
10
+
11
+
12
+ def classify_error(exc: Exception) -> dict:
13
+ """Classify an exception into an agent-actionable error dict.
14
+
15
+ Returns:
16
+ {"error": True, "category": str, "message": str, "hint": str}
17
+ """
18
+ msg = str(exc)
19
+
20
+ # --- syntax_error: SQL parse failures ---
21
+ if isinstance(exc, duckdb.ParserException):
22
+ return _build(
23
+ "syntax_error",
24
+ msg,
25
+ "Check SQL syntax. DuckDB uses PostgreSQL-style SQL. "
26
+ "Common issues: missing quotes, unsupported keywords, or wrong function names.",
27
+ recovery={"action": "rewrite_sql"},
28
+ )
29
+
30
+ # --- missing_extension: spatial/H3 function not found ---
31
+ if isinstance(exc, duckdb.CatalogException) and _looks_like_missing_function(msg):
32
+ func_name = _extract_function_name(msg)
33
+ hint = _extension_hint(func_name)
34
+ setup_sql = _extension_setup_sql(func_name)
35
+ return _build(
36
+ "missing_extension",
37
+ msg,
38
+ hint,
39
+ recovery={"action": "retry_with_setup", "setup_sql": setup_sql},
40
+ )
41
+
42
+ # --- file_not_found: input path does not exist ---
43
+ if isinstance(exc, (FileNotFoundError,)):
44
+ return _build(
45
+ "file_not_found",
46
+ msg,
47
+ "The file path does not exist. Verify the path and check for typos.",
48
+ recovery={"action": "check_path"},
49
+ )
50
+ if isinstance(exc, duckdb.IOException) and _looks_like_file_not_found(msg):
51
+ path = _extract_path(msg)
52
+ return _build(
53
+ "file_not_found",
54
+ msg,
55
+ f"File not found: {path}. Verify the path exists and is accessible.",
56
+ recovery={"action": "check_path"},
57
+ )
58
+
59
+ # --- timeout: query exceeded time limit ---
60
+ if _looks_like_timeout(exc, msg):
61
+ return _build(
62
+ "timeout",
63
+ msg,
64
+ "Query exceeded the time limit. Try adding LIMIT, filtering with WHERE, "
65
+ "or reducing the dataset size.",
66
+ recovery={"action": "simplify_query"},
67
+ )
68
+
69
+ # --- oom: memory exceeded ---
70
+ if isinstance(exc, MemoryError) or _looks_like_oom(msg):
71
+ return _build(
72
+ "oom",
73
+ msg,
74
+ "Query exceeded available memory. Try using LIMIT, sampling with "
75
+ "TABLESAMPLE, or filtering to reduce the data size.",
76
+ recovery={"action": "simplify_query"},
77
+ )
78
+
79
+ # --- unknown: catch-all ---
80
+ return _build(
81
+ "unknown",
82
+ msg,
83
+ "An unexpected error occurred. Check the message for details.",
84
+ )
85
+
86
+
87
+ def _build(category: str, message: str, hint: str, recovery: dict | None = None) -> dict:
88
+ d = {
89
+ "error": True,
90
+ "category": category,
91
+ "message": message,
92
+ "hint": hint,
93
+ }
94
+ if recovery:
95
+ d["recovery"] = recovery
96
+ return d
97
+
98
+
99
+ def _looks_like_missing_function(msg: str) -> bool:
100
+ lower = msg.lower()
101
+ return "does not exist" in lower and ("function" in lower or "macro" in lower)
102
+
103
+
104
+ def _extract_function_name(msg: str) -> str:
105
+ # "Scalar Function with name st_point does not exist!"
106
+ match = re.search(r"name\s+(\w+)\s+does not exist", msg, re.IGNORECASE)
107
+ return match.group(1) if match else ""
108
+
109
+
110
+ def _extension_hint(func_name: str) -> str:
111
+ lower = func_name.lower()
112
+ if lower.startswith("st_"):
113
+ return (
114
+ f"Function '{func_name}' requires the spatial extension. "
115
+ "Run: INSTALL spatial; LOAD spatial;"
116
+ )
117
+ if lower.startswith("h3"):
118
+ return (
119
+ f"Function '{func_name}' requires the H3 extension. "
120
+ "Run: INSTALL h3 FROM community; LOAD h3;"
121
+ )
122
+ return (
123
+ f"Function '{func_name}' not found. It may require an extension. "
124
+ "Check https://duckdb.org/docs/extensions/overview for available extensions."
125
+ )
126
+
127
+
128
+ def _extension_setup_sql(func_name: str) -> str:
129
+ """Return the SQL needed to install and load the extension for a function."""
130
+ lower = func_name.lower()
131
+ if lower.startswith("st_"):
132
+ return "INSTALL spatial; LOAD spatial;"
133
+ if lower.startswith("h3"):
134
+ return "INSTALL h3 FROM community; LOAD h3;"
135
+ return f"-- Unknown extension for '{func_name}'. Check DuckDB docs."
136
+
137
+
138
+ def _looks_like_file_not_found(msg: str) -> bool:
139
+ lower = msg.lower()
140
+ return any(
141
+ pattern in lower
142
+ for pattern in [
143
+ "no such file",
144
+ "file not found",
145
+ "does not exist",
146
+ "cannot open",
147
+ "no files found",
148
+ ]
149
+ )
150
+
151
+
152
+ def _extract_path(msg: str) -> str:
153
+ match = re.search(r"['\"]([^'\"]+)['\"]", msg)
154
+ return match.group(1) if match else "(unknown path)"
155
+
156
+
157
+ def _looks_like_timeout(exc: Exception, msg: str) -> bool:
158
+ lower = msg.lower()
159
+ if "timeout" in lower or "cancelled" in lower or "interrupted" in lower:
160
+ return True
161
+ return isinstance(exc, duckdb.InvalidInputException) and "interrupt" in lower
162
+
163
+
164
+ def _looks_like_oom(msg: str) -> bool:
165
+ lower = msg.lower()
166
+ return "out of memory" in lower or ("memory" in lower and "exceed" in lower)
@@ -0,0 +1,108 @@
1
+ """Export query results to Parquet, CSV, or GeoJSON.
2
+
3
+ Reuses the persistent GeoSession from session.py.
4
+ Views and tables persist across calls within the same process.
5
+
6
+ SECURITY: LOCAL-ONLY tool — DuckDB SQL can access the local filesystem.
7
+ """
8
+
9
+ import tempfile
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from uuid import uuid4
13
+
14
+ from mapular_geo_mcp.errors import classify_error
15
+ from mapular_geo_mcp.query import _copy_with_geometry_fallback
16
+ from mapular_geo_mcp.session import _session
17
+
18
+ SUPPORTED_FORMATS = ("parquet", "csv", "geojson")
19
+
20
+
21
+ def export_query(
22
+ sql: str,
23
+ format: str,
24
+ output_path: str | None = None,
25
+ ) -> dict:
26
+ """Export DuckDB SQL results to a file.
27
+
28
+ Args:
29
+ sql: DuckDB SQL query. Reference files via read_parquet(), read_csv_auto(), etc.
30
+ format: Output format — "parquet", "csv", or "geojson".
31
+ output_path: Optional absolute path for the output file.
32
+ If None, auto-generates in a temp directory.
33
+
34
+ Returns:
35
+ On success: {"path": str, "format": str, "row_count": int}
36
+ On error: {"error": str}
37
+ """
38
+ if format not in SUPPORTED_FORMATS:
39
+ return classify_error(
40
+ ValueError(f"Unsupported format: {format!r}. Use one of: {', '.join(SUPPORTED_FORMATS)}.")
41
+ )
42
+
43
+ # Auto-create views for any read_parquet() references
44
+ try:
45
+ _session.ensure_views(sql)
46
+ except ValueError as e:
47
+ return classify_error(e)
48
+
49
+ result_table = f"__export_{uuid4().hex[:8]}"
50
+ cursor = _session.conn.cursor()
51
+ try:
52
+ # Materialize user SQL exactly once
53
+ cursor.execute(f"CREATE TEMP TABLE {result_table} AS ({sql})")
54
+ try:
55
+ row_count = cursor.execute(f"SELECT count(*) FROM {result_table}").fetchone()[0]
56
+
57
+ # Resolve output path
58
+ dest = _resolve_output_path(output_path, format)
59
+ dest.parent.mkdir(parents=True, exist_ok=True)
60
+
61
+ if format == "geojson":
62
+ _export_geojson(cursor, result_table, dest, row_count)
63
+ else:
64
+ _copy_with_geometry_fallback(cursor, result_table, dest, format)
65
+
66
+ return {
67
+ "path": str(dest.resolve()),
68
+ "format": format,
69
+ "row_count": row_count,
70
+ }
71
+ finally:
72
+ cursor.execute(f"DROP TABLE IF EXISTS {result_table}")
73
+
74
+ except Exception as e:
75
+ return classify_error(e)
76
+ finally:
77
+ cursor.close()
78
+
79
+
80
+ def _resolve_output_path(output_path: str | None, format: str) -> Path:
81
+ """Resolve the output file path, auto-generating if needed."""
82
+ if output_path is not None:
83
+ return Path(output_path)
84
+
85
+ output_dir = Path(tempfile.gettempdir()) / "mapular-geo-mcp"
86
+ output_dir.mkdir(parents=True, exist_ok=True)
87
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
88
+ ext = format if format != "geojson" else "geojson"
89
+ return output_dir / f"export_{timestamp}.{ext}"
90
+
91
+
92
+ def _export_geojson(
93
+ conn,
94
+ table_name: str,
95
+ dest: Path,
96
+ row_count: int,
97
+ ) -> None:
98
+ """Export to GeoJSON using DuckDB's GDAL driver.
99
+
100
+ For zero-row results, writes a minimal empty FeatureCollection.
101
+ """
102
+ if row_count == 0:
103
+ dest.write_text('{"type":"FeatureCollection","features":[]}')
104
+ return
105
+
106
+ conn.execute(
107
+ f"COPY (SELECT * FROM {table_name}) TO '{dest}' WITH (FORMAT GDAL, DRIVER 'GeoJSON')"
108
+ )
@@ -0,0 +1,125 @@
1
+ """Geocode addresses to lat/lon coordinates.
2
+
3
+ Uses Nominatim (OpenStreetMap) by default. Optionally supports Mapbox
4
+ when MAPBOX_ACCESS_TOKEN is set.
5
+
6
+ Nominatim usage policy: identifiable User-Agent, max 1 req/s.
7
+ """
8
+
9
+ import json
10
+ import os
11
+ import time
12
+ from urllib.error import URLError
13
+ from urllib.parse import quote_plus
14
+ from urllib.request import Request, urlopen
15
+
16
+ USER_AGENT = "mapular-geo-mcp/0.1.0 (https://github.com/mapular/mapular-geo-mcp)"
17
+
18
+ # Simple rate limiter: track last request time
19
+ _last_nominatim_request: float = 0.0
20
+
21
+
22
+ def _importance_to_confidence(importance: float) -> str:
23
+ """Map Nominatim importance score to confidence level."""
24
+ if importance >= 0.7:
25
+ return "high"
26
+ elif importance >= 0.4:
27
+ return "medium"
28
+ return "low"
29
+
30
+
31
+ def _geocode_nominatim(address: str) -> dict:
32
+ """Geocode via Nominatim (OpenStreetMap)."""
33
+ global _last_nominatim_request
34
+
35
+ # Rate limit: max 1 req/s
36
+ now = time.monotonic()
37
+ elapsed = now - _last_nominatim_request
38
+ if _last_nominatim_request > 0 and elapsed < 1.0:
39
+ time.sleep(1.0 - elapsed)
40
+
41
+ url = (
42
+ f"https://nominatim.openstreetmap.org/search"
43
+ f"?q={quote_plus(address)}&format=json&limit=1"
44
+ )
45
+ req = Request(url)
46
+ req.add_header("User-Agent", USER_AGENT)
47
+
48
+ try:
49
+ resp = urlopen(req, timeout=10)
50
+ data = json.loads(resp.read())
51
+ except URLError:
52
+ return {"error": True, "message": "Geocoding service unreachable. Check network connection."}
53
+ finally:
54
+ _last_nominatim_request = time.monotonic()
55
+
56
+ if not data:
57
+ return {"error": True, "message": "No results for address. Try a more specific query."}
58
+
59
+ hit = data[0]
60
+ importance = float(hit.get("importance", 0))
61
+
62
+ return {
63
+ "lat": float(hit["lat"]),
64
+ "lon": float(hit["lon"]),
65
+ "display_name": hit.get("display_name", ""),
66
+ "confidence": _importance_to_confidence(importance),
67
+ "provider": "nominatim",
68
+ }
69
+
70
+
71
+ def _geocode_mapbox(address: str) -> dict:
72
+ """Geocode via Mapbox Geocoding API."""
73
+ token = os.environ.get("MAPBOX_ACCESS_TOKEN")
74
+ if not token:
75
+ # Fall back to nominatim with warning
76
+ result = _geocode_nominatim(address)
77
+ result["warning"] = "MAPBOX_ACCESS_TOKEN not set. Using nominatim as fallback."
78
+ return result
79
+
80
+ url = (
81
+ f"https://api.mapbox.com/geocoding/v5/mapbox.places"
82
+ f"/{quote_plus(address)}.json?access_token={token}&limit=1"
83
+ )
84
+ req = Request(url)
85
+ req.add_header("User-Agent", USER_AGENT)
86
+
87
+ try:
88
+ resp = urlopen(req, timeout=10)
89
+ data = json.loads(resp.read())
90
+ except URLError:
91
+ return {"error": True, "message": "Geocoding service unreachable. Check network connection."}
92
+
93
+ features = data.get("features", [])
94
+ if not features:
95
+ return {"error": True, "message": "No results for address. Try a more specific query."}
96
+
97
+ feat = features[0]
98
+ lon, lat = feat["center"]
99
+ relevance = feat.get("relevance", 0)
100
+
101
+ return {
102
+ "lat": lat,
103
+ "lon": lon,
104
+ "display_name": feat.get("place_name", ""),
105
+ "confidence": _importance_to_confidence(relevance),
106
+ "provider": "mapbox",
107
+ }
108
+
109
+
110
+ def geocode_address(address: str, provider: str = "nominatim") -> dict:
111
+ """Geocode a free-form address to lat/lon coordinates.
112
+
113
+ Args:
114
+ address: Free-form address string.
115
+ provider: "nominatim" (default) or "mapbox".
116
+
117
+ Returns:
118
+ On success: {"lat": float, "lon": float, "display_name": str,
119
+ "confidence": "high"|"medium"|"low", "provider": str}
120
+ On no results: {"error": True, "message": str}
121
+ On network error: {"error": True, "message": str}
122
+ """
123
+ if provider == "mapbox":
124
+ return _geocode_mapbox(address)
125
+ return _geocode_nominatim(address)