oex 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oex/__init__.py +32 -0
- oex/boundary.py +120 -0
- oex/cli.py +207 -0
- oex/config/__init__.py +35 -0
- oex/config/loader.py +118 -0
- oex/config/schema.py +143 -0
- oex/defaults/__init__.py +1 -0
- oex/defaults/base.yaml +372 -0
- oex/duckdb_session.py +56 -0
- oex/exporter.py +366 -0
- oex/hdx_publisher.py +108 -0
- oex/logging_setup.py +29 -0
- oex/metadata.py +160 -0
- oex/osm/__init__.py +7 -0
- oex/osm/build_cache.py +129 -0
- oex/osm/fetch_planet.py +103 -0
- oex/osm/geofabrik.py +110 -0
- oex/osm/runner.py +225 -0
- oex/overture/__init__.py +5 -0
- oex/overture/runner.py +101 -0
- oex/py.typed +0 -0
- oex/sources/__init__.py +10 -0
- oex/sources/base.py +37 -0
- oex/sql.py +59 -0
- oex/system.py +19 -0
- oex/writers.py +120 -0
- oex/zip_bundle.py +56 -0
- oex-0.2.0.dist-info/METADATA +132 -0
- oex-0.2.0.dist-info/RECORD +32 -0
- oex-0.2.0.dist-info/WHEEL +4 -0
- oex-0.2.0.dist-info/entry_points.txt +3 -0
- oex-0.2.0.dist-info/licenses/LICENSE +674 -0
oex/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""oex: country-scale OSM and Overture vector exports."""
|
|
2
|
+
|
|
3
|
+
from oex.config.schema import (
|
|
4
|
+
BoundaryConfig,
|
|
5
|
+
CategoryConfig,
|
|
6
|
+
DuckdbConfig,
|
|
7
|
+
HdxConfig,
|
|
8
|
+
LoggingConfig,
|
|
9
|
+
OsmSourceConfig,
|
|
10
|
+
OutputConfig,
|
|
11
|
+
OvertureSourceConfig,
|
|
12
|
+
ParallelConfig,
|
|
13
|
+
RootConfig,
|
|
14
|
+
)
|
|
15
|
+
from oex.exporter import Exporter, ExportResult
|
|
16
|
+
|
|
17
|
+
__version__ = "0.2.0"
|
|
18
|
+
__all__ = [
|
|
19
|
+
"BoundaryConfig",
|
|
20
|
+
"CategoryConfig",
|
|
21
|
+
"DuckdbConfig",
|
|
22
|
+
"ExportResult",
|
|
23
|
+
"Exporter",
|
|
24
|
+
"HdxConfig",
|
|
25
|
+
"LoggingConfig",
|
|
26
|
+
"OsmSourceConfig",
|
|
27
|
+
"OutputConfig",
|
|
28
|
+
"OvertureSourceConfig",
|
|
29
|
+
"ParallelConfig",
|
|
30
|
+
"RootConfig",
|
|
31
|
+
"__version__",
|
|
32
|
+
]
|
oex/boundary.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Country boundary resolution: user-supplied geom or geoBoundaries ADM0."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import threading
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from oex.config.schema import BoundaryConfig
|
|
11
|
+
from oex.logging_setup import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
_GEOBOUNDARIES_TPL = "https://www.geoboundaries.org/api/current/gbOpen/{iso3}/{level}/"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class Boundary:
|
|
20
|
+
iso3: str
|
|
21
|
+
bbox: tuple[float, float, float, float]
|
|
22
|
+
geojson: str
|
|
23
|
+
source: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_lock = threading.Lock()
|
|
27
|
+
_cache: dict[tuple[str, str, str], Boundary] = {}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _bbox_from_geometry(geometry: dict[str, Any]) -> tuple[float, float, float, float]:
|
|
31
|
+
coords: list[float] = []
|
|
32
|
+
|
|
33
|
+
def walk(node: Any) -> None:
|
|
34
|
+
if (
|
|
35
|
+
isinstance(node, list)
|
|
36
|
+
and len(node) == 2
|
|
37
|
+
and all(isinstance(v, (int, float)) for v in node)
|
|
38
|
+
):
|
|
39
|
+
coords.extend(node)
|
|
40
|
+
elif isinstance(node, list):
|
|
41
|
+
for item in node:
|
|
42
|
+
walk(item)
|
|
43
|
+
|
|
44
|
+
walk(geometry.get("coordinates", []))
|
|
45
|
+
if not coords:
|
|
46
|
+
raise ValueError("No coordinates found in geometry")
|
|
47
|
+
xs = coords[0::2]
|
|
48
|
+
ys = coords[1::2]
|
|
49
|
+
return (min(xs), min(ys), max(xs), max(ys))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _featurecollection_to_geometry(fc: dict[str, Any]) -> dict[str, Any]:
|
|
53
|
+
# ST_GeomFromGeoJSON accepts a single geometry or a GeometryCollection,
|
|
54
|
+
# not a FeatureCollection.
|
|
55
|
+
features = fc.get("features", [])
|
|
56
|
+
geometries = [f["geometry"] for f in features if f.get("geometry")]
|
|
57
|
+
if len(geometries) == 1:
|
|
58
|
+
return geometries[0]
|
|
59
|
+
return {"type": "GeometryCollection", "geometries": geometries}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _fetch_geoboundaries(iso3: str, release: str, level: str) -> Boundary:
|
|
63
|
+
url = _GEOBOUNDARIES_TPL.format(iso3=iso3.upper(), level=level)
|
|
64
|
+
logger.info("Fetching boundary metadata: %s", url)
|
|
65
|
+
meta = requests.get(url, timeout=60)
|
|
66
|
+
meta.raise_for_status()
|
|
67
|
+
payload = meta.json()
|
|
68
|
+
geojson_url = payload.get("gjDownloadURL") or payload.get("simplifiedGeometryGeoJSON")
|
|
69
|
+
if not geojson_url:
|
|
70
|
+
raise RuntimeError(f"geoBoundaries response missing GeoJSON URL for {iso3}")
|
|
71
|
+
|
|
72
|
+
logger.info("Downloading boundary geometry: %s", geojson_url)
|
|
73
|
+
resp = requests.get(geojson_url, timeout=180)
|
|
74
|
+
resp.raise_for_status()
|
|
75
|
+
fc = resp.json()
|
|
76
|
+
geometry = _featurecollection_to_geometry(fc) if fc.get("type") == "FeatureCollection" else fc
|
|
77
|
+
bbox = _bbox_from_geometry(geometry)
|
|
78
|
+
return Boundary(
|
|
79
|
+
iso3=iso3.upper(),
|
|
80
|
+
bbox=bbox,
|
|
81
|
+
geojson=json.dumps(geometry),
|
|
82
|
+
source=f"geoBoundaries {release} {level}",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _from_user_geom(iso3: str, geom_str: str) -> Boundary:
|
|
87
|
+
fc = json.loads(geom_str)
|
|
88
|
+
geometry = _featurecollection_to_geometry(fc) if fc.get("type") == "FeatureCollection" else fc
|
|
89
|
+
bbox = _bbox_from_geometry(geometry)
|
|
90
|
+
return Boundary(
|
|
91
|
+
iso3=iso3.upper(),
|
|
92
|
+
bbox=bbox,
|
|
93
|
+
geojson=json.dumps(geometry),
|
|
94
|
+
source="user-provided",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def resolve_boundary(iso3: str, cfg: BoundaryConfig) -> Boundary:
|
|
99
|
+
if not iso3:
|
|
100
|
+
raise ValueError("iso3 must be set on the config")
|
|
101
|
+
key = (iso3.upper(), cfg.geoboundaries_release, cfg.geoboundaries_level)
|
|
102
|
+
with _lock:
|
|
103
|
+
cached = _cache.get(key)
|
|
104
|
+
if cached is not None:
|
|
105
|
+
return cached
|
|
106
|
+
|
|
107
|
+
if cfg.geom:
|
|
108
|
+
boundary = _from_user_geom(iso3, cfg.geom)
|
|
109
|
+
else:
|
|
110
|
+
boundary = _fetch_geoboundaries(iso3, cfg.geoboundaries_release, cfg.geoboundaries_level)
|
|
111
|
+
|
|
112
|
+
with _lock:
|
|
113
|
+
_cache[key] = boundary
|
|
114
|
+
logger.info(
|
|
115
|
+
"Resolved boundary for %s from %s; bbox=%s",
|
|
116
|
+
boundary.iso3,
|
|
117
|
+
boundary.source,
|
|
118
|
+
boundary.bbox,
|
|
119
|
+
)
|
|
120
|
+
return boundary
|
oex/cli.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""Typer CLI for oex."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from oex.config.loader import (
|
|
8
|
+
apply_overrides,
|
|
9
|
+
iter_configs,
|
|
10
|
+
load_config,
|
|
11
|
+
select_categories,
|
|
12
|
+
)
|
|
13
|
+
from oex.config.schema import RootConfig
|
|
14
|
+
from oex.exporter import Exporter, ExportResult
|
|
15
|
+
from oex.logging_setup import get_logger, setup_logging
|
|
16
|
+
from oex.osm.runner import OsmRunner
|
|
17
|
+
from oex.overture.runner import OvertureRunner
|
|
18
|
+
|
|
19
|
+
app = typer.Typer(
|
|
20
|
+
add_completion=False,
|
|
21
|
+
no_args_is_help=True,
|
|
22
|
+
help="Country-scale OSM and Overture vector exports.",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@app.callback()
|
|
27
|
+
def _global(
|
|
28
|
+
log_level: str = typer.Option("INFO", envvar="LOG_LEVEL"),
|
|
29
|
+
) -> None:
|
|
30
|
+
setup_logging(level=log_level)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _resolve_config(
|
|
34
|
+
iso3_or_yaml: str | None,
|
|
35
|
+
configs_dir: Path | None,
|
|
36
|
+
config: Path | None,
|
|
37
|
+
) -> list[Path | None]:
|
|
38
|
+
if configs_dir is not None:
|
|
39
|
+
return list(iter_configs(configs_dir))
|
|
40
|
+
if config is not None:
|
|
41
|
+
return [config]
|
|
42
|
+
if iso3_or_yaml:
|
|
43
|
+
candidate = Path("configs") / f"{iso3_or_yaml.lower()}.yaml"
|
|
44
|
+
if candidate.exists():
|
|
45
|
+
return [candidate]
|
|
46
|
+
return [None]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _build_overrides(
|
|
50
|
+
iso3_or_yaml: str | None,
|
|
51
|
+
hdx_push: bool | None,
|
|
52
|
+
output_dir: Path | None,
|
|
53
|
+
osm_engine: str | None = None,
|
|
54
|
+
) -> dict[str, object]:
|
|
55
|
+
overrides: dict[str, object] = {}
|
|
56
|
+
if iso3_or_yaml and len(iso3_or_yaml) <= 3 and iso3_or_yaml.isalpha():
|
|
57
|
+
overrides["iso3"] = iso3_or_yaml.upper()
|
|
58
|
+
if hdx_push is True:
|
|
59
|
+
overrides["hdx.push"] = True
|
|
60
|
+
if hdx_push is False:
|
|
61
|
+
overrides["hdx.push"] = False
|
|
62
|
+
if output_dir is not None:
|
|
63
|
+
overrides["output.dir"] = str(output_dir)
|
|
64
|
+
if osm_engine is not None:
|
|
65
|
+
overrides["source.osm.engine"] = osm_engine
|
|
66
|
+
return overrides
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _summarise(results: list[ExportResult]) -> int:
|
|
70
|
+
log = get_logger("oex.cli")
|
|
71
|
+
total_fail = sum(r.failed for r in results)
|
|
72
|
+
for r in results:
|
|
73
|
+
log.info(
|
|
74
|
+
"%s/%s: %d ok, %d empty, %d skipped, %d failed in %.1fs",
|
|
75
|
+
r.iso3,
|
|
76
|
+
r.source_name,
|
|
77
|
+
r.succeeded,
|
|
78
|
+
r.empty,
|
|
79
|
+
r.skipped,
|
|
80
|
+
r.failed,
|
|
81
|
+
r.total_duration_s,
|
|
82
|
+
)
|
|
83
|
+
return 0 if total_fail == 0 else 1
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _run_one(
|
|
87
|
+
yaml_path: Path | None,
|
|
88
|
+
overrides: dict[str, object],
|
|
89
|
+
theme: str | None,
|
|
90
|
+
runner_factory,
|
|
91
|
+
) -> ExportResult:
|
|
92
|
+
cfg: RootConfig = load_config(yaml_path)
|
|
93
|
+
cfg = apply_overrides(cfg, overrides)
|
|
94
|
+
cfg = select_categories(cfg, theme)
|
|
95
|
+
return Exporter(cfg, runner_factory()).run()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _resolve_args(
|
|
99
|
+
arg1: str | None,
|
|
100
|
+
arg2: str | None,
|
|
101
|
+
configs_dir: Path | None,
|
|
102
|
+
config: Path | None,
|
|
103
|
+
) -> tuple[str | None, str | None]:
|
|
104
|
+
# When --config or --configs-dir is given, the first positional is the theme,
|
|
105
|
+
# not the iso3 (iso3 comes from the YAML).
|
|
106
|
+
if configs_dir is not None or config is not None:
|
|
107
|
+
return None, arg1 if arg2 is None else arg2
|
|
108
|
+
return arg1, arg2
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@app.command("overture")
|
|
112
|
+
def cmd_overture(
|
|
113
|
+
iso3_or_yaml: str | None = typer.Argument(
|
|
114
|
+
None, help="ISO3 like NPL, or name of a YAML in ./configs/"
|
|
115
|
+
),
|
|
116
|
+
theme: str | None = typer.Argument(None, help="Optional theme override (e.g. buildings)"),
|
|
117
|
+
configs_dir: Path | None = typer.Option(
|
|
118
|
+
None, "--configs-dir", help="Run every YAML in this directory"
|
|
119
|
+
),
|
|
120
|
+
config: Path | None = typer.Option(None, "--config", "-c", help="Explicit config YAML path"),
|
|
121
|
+
output_dir: Path | None = typer.Option(None, "--output-dir", "-o"),
|
|
122
|
+
hdx_push: bool | None = typer.Option(None, "--hdx-push/--no-hdx-push"),
|
|
123
|
+
) -> None:
|
|
124
|
+
"""Export Overture data."""
|
|
125
|
+
iso3_resolved, theme_resolved = _resolve_args(iso3_or_yaml, theme, configs_dir, config)
|
|
126
|
+
yamls = _resolve_config(iso3_resolved, configs_dir, config)
|
|
127
|
+
overrides = _build_overrides(iso3_resolved, hdx_push, output_dir)
|
|
128
|
+
results = [_run_one(y, overrides, theme_resolved, OvertureRunner) for y in yamls]
|
|
129
|
+
raise typer.Exit(code=_summarise(results))
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@app.command("osm")
|
|
133
|
+
def cmd_osm(
|
|
134
|
+
iso3_or_yaml: str | None = typer.Argument(
|
|
135
|
+
None, help="ISO3 like NPL, or name of a YAML in ./configs/"
|
|
136
|
+
),
|
|
137
|
+
theme: str | None = typer.Argument(None, help="Optional theme override (e.g. buildings)"),
|
|
138
|
+
configs_dir: Path | None = typer.Option(None, "--configs-dir"),
|
|
139
|
+
config: Path | None = typer.Option(None, "--config", "-c"),
|
|
140
|
+
output_dir: Path | None = typer.Option(None, "--output-dir", "-o"),
|
|
141
|
+
hdx_push: bool | None = typer.Option(None, "--hdx-push/--no-hdx-push"),
|
|
142
|
+
engine: str | None = typer.Option(
|
|
143
|
+
None,
|
|
144
|
+
"--engine",
|
|
145
|
+
help="OSM engine: 'geofabrik' (default) or 'planet_parquet'",
|
|
146
|
+
),
|
|
147
|
+
) -> None:
|
|
148
|
+
"""Export OSM data via the configured engine."""
|
|
149
|
+
iso3_resolved, theme_resolved = _resolve_args(iso3_or_yaml, theme, configs_dir, config)
|
|
150
|
+
yamls = _resolve_config(iso3_resolved, configs_dir, config)
|
|
151
|
+
overrides = _build_overrides(iso3_resolved, hdx_push, output_dir, osm_engine=engine)
|
|
152
|
+
results = [_run_one(y, overrides, theme_resolved, OsmRunner) for y in yamls]
|
|
153
|
+
raise typer.Exit(code=_summarise(results))
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@app.command("osm-build-cache")
|
|
157
|
+
def cmd_osm_build_cache(
|
|
158
|
+
pbf: Path | None = typer.Option(None, "--pbf", help="Local PBF path"),
|
|
159
|
+
planet: bool = typer.Option(False, "--planet", help="Download the latest planet PBF"),
|
|
160
|
+
config: Path | None = typer.Option(
|
|
161
|
+
None, "--config", "-c", help="Config to drive theme tag filters"
|
|
162
|
+
),
|
|
163
|
+
snapshot: str | None = typer.Option(
|
|
164
|
+
None, "--snapshot", help="Snapshot label, defaults to today"
|
|
165
|
+
),
|
|
166
|
+
themes: str | None = typer.Option(
|
|
167
|
+
None, "--themes", help="Comma-separated theme slugs to limit"
|
|
168
|
+
),
|
|
169
|
+
) -> None:
|
|
170
|
+
"""Build the planet OSM PBF -> per-theme parquet cache at <cache_dir>/planet/<snapshot>/."""
|
|
171
|
+
from oex.osm.build_cache import build_cache
|
|
172
|
+
from oex.osm.fetch_planet import download_pbf
|
|
173
|
+
|
|
174
|
+
cfg: RootConfig = load_config(config)
|
|
175
|
+
|
|
176
|
+
if planet and pbf is not None:
|
|
177
|
+
raise typer.BadParameter("Pass either --planet or --pbf, not both")
|
|
178
|
+
if not planet and pbf is None:
|
|
179
|
+
raise typer.BadParameter("One of --planet or --pbf is required")
|
|
180
|
+
|
|
181
|
+
if planet:
|
|
182
|
+
src = cfg.source["osm"]
|
|
183
|
+
result = download_pbf(src.pbf_url, src.cache_dir + "/_pbf", md5_url=src.md5_url)
|
|
184
|
+
pbf_path = result.path
|
|
185
|
+
else:
|
|
186
|
+
assert pbf is not None
|
|
187
|
+
pbf_path = pbf
|
|
188
|
+
|
|
189
|
+
theme_list = [t.strip() for t in themes.split(",")] if themes else None
|
|
190
|
+
cache_root = Path(cfg.source["osm"].cache_dir) / "planet"
|
|
191
|
+
manifest = build_cache(
|
|
192
|
+
cfg,
|
|
193
|
+
pbf_path,
|
|
194
|
+
cache_root=cache_root,
|
|
195
|
+
snapshot=snapshot,
|
|
196
|
+
themes_filter=theme_list,
|
|
197
|
+
)
|
|
198
|
+
typer.echo(f"Cache snapshot: {manifest.snapshot}")
|
|
199
|
+
typer.echo(f"Themes built: {[t.theme for t in manifest.themes]}")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def main() -> None:
|
|
203
|
+
app()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
if __name__ == "__main__":
|
|
207
|
+
main()
|
oex/config/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Typed configuration loading."""
|
|
2
|
+
|
|
3
|
+
from oex.config.loader import (
|
|
4
|
+
ConfigError,
|
|
5
|
+
apply_overrides,
|
|
6
|
+
iter_configs,
|
|
7
|
+
load_config,
|
|
8
|
+
select_categories,
|
|
9
|
+
)
|
|
10
|
+
from oex.config.schema import (
|
|
11
|
+
BoundaryConfig,
|
|
12
|
+
CategoryConfig,
|
|
13
|
+
HdxConfig,
|
|
14
|
+
OsmSourceConfig,
|
|
15
|
+
OutputConfig,
|
|
16
|
+
OvertureSourceConfig,
|
|
17
|
+
ParallelConfig,
|
|
18
|
+
RootConfig,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"BoundaryConfig",
|
|
23
|
+
"CategoryConfig",
|
|
24
|
+
"ConfigError",
|
|
25
|
+
"HdxConfig",
|
|
26
|
+
"OsmSourceConfig",
|
|
27
|
+
"OutputConfig",
|
|
28
|
+
"OvertureSourceConfig",
|
|
29
|
+
"ParallelConfig",
|
|
30
|
+
"RootConfig",
|
|
31
|
+
"apply_overrides",
|
|
32
|
+
"iter_configs",
|
|
33
|
+
"load_config",
|
|
34
|
+
"select_categories",
|
|
35
|
+
]
|
oex/config/loader.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Layered YAML config: bundled defaults < user YAML < dotlist overrides."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
from importlib import resources
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, cast
|
|
8
|
+
|
|
9
|
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
|
10
|
+
|
|
11
|
+
from oex.config.schema import RootConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ConfigError(ValueError):
|
|
15
|
+
"""Raised when a configuration is malformed."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _load_yaml(source: str | os.PathLike[str]) -> DictConfig:
|
|
19
|
+
text: str
|
|
20
|
+
if isinstance(source, (str, os.PathLike)) and Path(source).exists():
|
|
21
|
+
text = Path(source).read_text(encoding="utf-8")
|
|
22
|
+
elif isinstance(source, str):
|
|
23
|
+
text = source
|
|
24
|
+
else:
|
|
25
|
+
raise ConfigError(f"Cannot load config from {source!r}")
|
|
26
|
+
cfg = OmegaConf.create(text)
|
|
27
|
+
if not isinstance(cfg, DictConfig):
|
|
28
|
+
raise ConfigError("Top-level YAML must be a mapping")
|
|
29
|
+
return cfg
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _load_defaults() -> DictConfig:
|
|
33
|
+
pkg = resources.files("oex.defaults")
|
|
34
|
+
text = (pkg / "base.yaml").read_text(encoding="utf-8")
|
|
35
|
+
cfg = OmegaConf.create(text)
|
|
36
|
+
if not isinstance(cfg, DictConfig):
|
|
37
|
+
raise ConfigError("base.yaml is malformed")
|
|
38
|
+
return cfg
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _load_categories_file(path: str | os.PathLike[str]) -> ListConfig:
|
|
42
|
+
raw = _load_yaml(path) if Path(str(path)).exists() else _load_yaml(str(path))
|
|
43
|
+
if "categories" in raw and isinstance(raw.categories, ListConfig):
|
|
44
|
+
return raw.categories
|
|
45
|
+
raise ConfigError(f"categories_file {path!r} must contain a top-level `categories:` list")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def load_config(
|
|
49
|
+
user_config: str | os.PathLike[str] | None = None,
|
|
50
|
+
overrides: list[str] | None = None,
|
|
51
|
+
) -> RootConfig:
|
|
52
|
+
"""Build a RootConfig. categories precedence: defaults < categories_file < inline `categories:`."""
|
|
53
|
+
# Merge plain (untyped) configs first so user YAML can replace the
|
|
54
|
+
# categories list wholesale without tripping the structured-list type check.
|
|
55
|
+
merged: DictConfig = _load_defaults()
|
|
56
|
+
|
|
57
|
+
if user_config is not None:
|
|
58
|
+
user = _load_yaml(user_config)
|
|
59
|
+
|
|
60
|
+
if "categories_file" in user and user.categories_file:
|
|
61
|
+
merged.categories = _load_categories_file(str(user.categories_file))
|
|
62
|
+
|
|
63
|
+
if "categories" in user and isinstance(user.categories, ListConfig):
|
|
64
|
+
merged.categories = user.categories
|
|
65
|
+
del user["categories"]
|
|
66
|
+
|
|
67
|
+
merged = cast(DictConfig, OmegaConf.merge(merged, user))
|
|
68
|
+
|
|
69
|
+
if overrides:
|
|
70
|
+
merged = cast(DictConfig, OmegaConf.merge(merged, OmegaConf.from_dotlist(overrides)))
|
|
71
|
+
|
|
72
|
+
OmegaConf.resolve(merged)
|
|
73
|
+
|
|
74
|
+
schema = OmegaConf.structured(RootConfig)
|
|
75
|
+
typed = cast(DictConfig, OmegaConf.merge(schema, merged))
|
|
76
|
+
container: Any = OmegaConf.to_object(typed)
|
|
77
|
+
if not isinstance(container, RootConfig):
|
|
78
|
+
raise ConfigError("Merged config did not resolve to RootConfig")
|
|
79
|
+
return container
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def apply_overrides(cfg: RootConfig, overrides: dict[str, Any]) -> RootConfig:
|
|
83
|
+
"""Apply a dict of dotted overrides to an already-loaded config."""
|
|
84
|
+
structured: DictConfig = cast(DictConfig, OmegaConf.structured(cfg))
|
|
85
|
+
dotlist = [f"{k}={v}" for k, v in overrides.items() if v is not None]
|
|
86
|
+
if dotlist:
|
|
87
|
+
structured = cast(DictConfig, OmegaConf.merge(structured, OmegaConf.from_dotlist(dotlist)))
|
|
88
|
+
OmegaConf.resolve(structured)
|
|
89
|
+
container: Any = OmegaConf.to_object(structured)
|
|
90
|
+
if not isinstance(container, RootConfig):
|
|
91
|
+
raise ConfigError("Override merge did not resolve to RootConfig")
|
|
92
|
+
return container
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def select_categories(cfg: RootConfig, theme: str | None) -> RootConfig:
|
|
96
|
+
"""Restrict the config to a single category whose slugified name matches `theme`."""
|
|
97
|
+
if theme is None:
|
|
98
|
+
return cfg
|
|
99
|
+
needle = theme.strip().lower().replace("-", "_").replace(" ", "_")
|
|
100
|
+
kept = [c for c in cfg.categories if c.name.lower().replace(" ", "_") == needle]
|
|
101
|
+
if not kept:
|
|
102
|
+
available = ", ".join(c.name for c in cfg.categories) or "<none>"
|
|
103
|
+
raise ConfigError(f"Theme {theme!r} not found. Available: {available}")
|
|
104
|
+
new_cfg = OmegaConf.to_object(OmegaConf.structured(cfg))
|
|
105
|
+
if not isinstance(new_cfg, RootConfig):
|
|
106
|
+
raise ConfigError("select_categories failed to round-trip RootConfig")
|
|
107
|
+
new_cfg.categories = kept
|
|
108
|
+
return new_cfg
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def iter_configs(configs_dir: str | os.PathLike[str]) -> Iterator[Path]:
|
|
112
|
+
root = Path(configs_dir)
|
|
113
|
+
if not root.is_dir():
|
|
114
|
+
raise ConfigError(f"Not a directory: {root}")
|
|
115
|
+
for path in sorted(root.glob("*.yaml")):
|
|
116
|
+
yield path
|
|
117
|
+
for path in sorted(root.glob("*.yml")):
|
|
118
|
+
yield path
|
oex/config/schema.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Typed run configuration."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
OsmTagFilter = dict[str, Any]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class HdxConfig:
|
|
11
|
+
push: bool = False
|
|
12
|
+
site: str = "demo"
|
|
13
|
+
api_key: str | None = None
|
|
14
|
+
owner_org: str | None = None
|
|
15
|
+
maintainer: str | None = None
|
|
16
|
+
user_agent: str = "oex"
|
|
17
|
+
methodology: str = "Other"
|
|
18
|
+
methodology_other: str = "Open Source Geographic information"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class DuckdbConfig:
|
|
23
|
+
# 8 retries / 500 ms initial / 2x backoff and a 120 s timeout absorb
|
|
24
|
+
# transient S3 blips so a 200-country batch doesn't abort on one shard.
|
|
25
|
+
http_retries: int = 8
|
|
26
|
+
http_retry_wait_ms: int = 500
|
|
27
|
+
http_retry_backoff: float = 2.0
|
|
28
|
+
http_timeout_ms: int = 120_000
|
|
29
|
+
temp_dir: str = "/tmp/duckdb_temp"
|
|
30
|
+
enable_object_cache: bool = True
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class LoggingConfig:
|
|
35
|
+
level: str = "INFO"
|
|
36
|
+
fmt: str | None = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class OutputConfig:
|
|
41
|
+
dir: str = "output"
|
|
42
|
+
formats: list[str] = field(default_factory=lambda: ["gpkg", "shp"])
|
|
43
|
+
metadata: bool = False
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class ParallelConfig:
|
|
48
|
+
enabled: bool = True
|
|
49
|
+
threads: int | None = None
|
|
50
|
+
memory_gb: int | None = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class BoundaryConfig:
|
|
55
|
+
geom: str | None = None
|
|
56
|
+
geoboundaries_release: str = "CGAZ"
|
|
57
|
+
geoboundaries_level: str = "ADM0"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class OvertureSourceConfig:
|
|
62
|
+
enabled: bool = True
|
|
63
|
+
engine: str = "duckdb"
|
|
64
|
+
release: str = "latest"
|
|
65
|
+
s3_region: str = "us-west-2"
|
|
66
|
+
s3_bucket: str = "overturemaps-us-west-2"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class OsmSourceConfig:
|
|
71
|
+
enabled: bool = True
|
|
72
|
+
engine: str = "geofabrik"
|
|
73
|
+
cache_dir: str = "data/osm"
|
|
74
|
+
snapshot: str = "latest"
|
|
75
|
+
keep_pbf: bool = False
|
|
76
|
+
pbf_url: str = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf"
|
|
77
|
+
md5_url: str = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf.md5"
|
|
78
|
+
geofabrik_index_url: str = "https://download.geofabrik.de/index-v1.json"
|
|
79
|
+
geofabrik_clip_to_boundary: bool = True
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass
|
|
83
|
+
class CategoryHdx:
|
|
84
|
+
title: str | None = None
|
|
85
|
+
notes: str = "Vector data export."
|
|
86
|
+
tags: list[str] = field(default_factory=lambda: ["geodata"])
|
|
87
|
+
license: str = "hdx-odc-odbl"
|
|
88
|
+
license_url: str | None = None
|
|
89
|
+
caveats: str = (
|
|
90
|
+
"Data may contain errors. Verified at the community level only; "
|
|
91
|
+
"individual features may need correction."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class CategoryOverture:
|
|
97
|
+
enabled: bool = True
|
|
98
|
+
theme: str = ""
|
|
99
|
+
feature_type: str = ""
|
|
100
|
+
select: list[str] = field(default_factory=list)
|
|
101
|
+
where: list[str] = field(default_factory=list)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class CategoryOsm:
|
|
106
|
+
# `filter` is the quackosm tag filter applied at parquet BUILD time.
|
|
107
|
+
# `where` is SQL applied at QUERY time over the already-built parquet.
|
|
108
|
+
enabled: bool = True
|
|
109
|
+
select: list[str] = field(default_factory=list)
|
|
110
|
+
where: list[str] = field(default_factory=list)
|
|
111
|
+
filter: OsmTagFilter = field(default_factory=dict)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class CategoryConfig:
|
|
116
|
+
name: str = ""
|
|
117
|
+
formats: list[str] | None = None
|
|
118
|
+
hdx: CategoryHdx = field(default_factory=CategoryHdx)
|
|
119
|
+
overture: CategoryOverture = field(default_factory=CategoryOverture)
|
|
120
|
+
osm: CategoryOsm = field(default_factory=CategoryOsm)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dataclass
|
|
124
|
+
class RootConfig:
|
|
125
|
+
iso3: str = ""
|
|
126
|
+
key: str = ""
|
|
127
|
+
dataset_name: str | None = None
|
|
128
|
+
subnational: bool = False
|
|
129
|
+
frequency: str = "yearly"
|
|
130
|
+
categories_file: str | None = None
|
|
131
|
+
boundary: BoundaryConfig = field(default_factory=BoundaryConfig)
|
|
132
|
+
output: OutputConfig = field(default_factory=OutputConfig)
|
|
133
|
+
parallel: ParallelConfig = field(default_factory=ParallelConfig)
|
|
134
|
+
duckdb: DuckdbConfig = field(default_factory=DuckdbConfig)
|
|
135
|
+
logging: LoggingConfig = field(default_factory=LoggingConfig)
|
|
136
|
+
hdx: HdxConfig = field(default_factory=HdxConfig)
|
|
137
|
+
source: dict[str, Any] = field(
|
|
138
|
+
default_factory=lambda: {
|
|
139
|
+
"overture": OvertureSourceConfig(),
|
|
140
|
+
"osm": OsmSourceConfig(),
|
|
141
|
+
}
|
|
142
|
+
)
|
|
143
|
+
categories: list[CategoryConfig] = field(default_factory=list)
|
oex/defaults/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Bundled default YAML configuration."""
|