oex 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oex/__init__.py ADDED
@@ -0,0 +1,32 @@
1
+ """oex: country-scale OSM and Overture vector exports."""
2
+
3
+ from oex.config.schema import (
4
+ BoundaryConfig,
5
+ CategoryConfig,
6
+ DuckdbConfig,
7
+ HdxConfig,
8
+ LoggingConfig,
9
+ OsmSourceConfig,
10
+ OutputConfig,
11
+ OvertureSourceConfig,
12
+ ParallelConfig,
13
+ RootConfig,
14
+ )
15
+ from oex.exporter import Exporter, ExportResult
16
+
17
+ __version__ = "0.2.0"
18
+ __all__ = [
19
+ "BoundaryConfig",
20
+ "CategoryConfig",
21
+ "DuckdbConfig",
22
+ "ExportResult",
23
+ "Exporter",
24
+ "HdxConfig",
25
+ "LoggingConfig",
26
+ "OsmSourceConfig",
27
+ "OutputConfig",
28
+ "OvertureSourceConfig",
29
+ "ParallelConfig",
30
+ "RootConfig",
31
+ "__version__",
32
+ ]
oex/boundary.py ADDED
@@ -0,0 +1,120 @@
1
+ """Country boundary resolution: user-supplied geom or geoBoundaries ADM0."""
2
+
3
+ import json
4
+ import threading
5
+ from dataclasses import dataclass
6
+ from typing import Any
7
+
8
+ import requests
9
+
10
+ from oex.config.schema import BoundaryConfig
11
+ from oex.logging_setup import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
15
+ _GEOBOUNDARIES_TPL = "https://www.geoboundaries.org/api/current/gbOpen/{iso3}/{level}/"
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class Boundary:
20
+ iso3: str
21
+ bbox: tuple[float, float, float, float]
22
+ geojson: str
23
+ source: str
24
+
25
+
26
+ _lock = threading.Lock()
27
+ _cache: dict[tuple[str, str, str], Boundary] = {}
28
+
29
+
30
+ def _bbox_from_geometry(geometry: dict[str, Any]) -> tuple[float, float, float, float]:
31
+ coords: list[float] = []
32
+
33
+ def walk(node: Any) -> None:
34
+ if (
35
+ isinstance(node, list)
36
+ and len(node) == 2
37
+ and all(isinstance(v, (int, float)) for v in node)
38
+ ):
39
+ coords.extend(node)
40
+ elif isinstance(node, list):
41
+ for item in node:
42
+ walk(item)
43
+
44
+ walk(geometry.get("coordinates", []))
45
+ if not coords:
46
+ raise ValueError("No coordinates found in geometry")
47
+ xs = coords[0::2]
48
+ ys = coords[1::2]
49
+ return (min(xs), min(ys), max(xs), max(ys))
50
+
51
+
52
+ def _featurecollection_to_geometry(fc: dict[str, Any]) -> dict[str, Any]:
53
+ # ST_GeomFromGeoJSON accepts a single geometry or a GeometryCollection,
54
+ # not a FeatureCollection.
55
+ features = fc.get("features", [])
56
+ geometries = [f["geometry"] for f in features if f.get("geometry")]
57
+ if len(geometries) == 1:
58
+ return geometries[0]
59
+ return {"type": "GeometryCollection", "geometries": geometries}
60
+
61
+
62
+ def _fetch_geoboundaries(iso3: str, release: str, level: str) -> Boundary:
63
+ url = _GEOBOUNDARIES_TPL.format(iso3=iso3.upper(), level=level)
64
+ logger.info("Fetching boundary metadata: %s", url)
65
+ meta = requests.get(url, timeout=60)
66
+ meta.raise_for_status()
67
+ payload = meta.json()
68
+ geojson_url = payload.get("gjDownloadURL") or payload.get("simplifiedGeometryGeoJSON")
69
+ if not geojson_url:
70
+ raise RuntimeError(f"geoBoundaries response missing GeoJSON URL for {iso3}")
71
+
72
+ logger.info("Downloading boundary geometry: %s", geojson_url)
73
+ resp = requests.get(geojson_url, timeout=180)
74
+ resp.raise_for_status()
75
+ fc = resp.json()
76
+ geometry = _featurecollection_to_geometry(fc) if fc.get("type") == "FeatureCollection" else fc
77
+ bbox = _bbox_from_geometry(geometry)
78
+ return Boundary(
79
+ iso3=iso3.upper(),
80
+ bbox=bbox,
81
+ geojson=json.dumps(geometry),
82
+ source=f"geoBoundaries {release} {level}",
83
+ )
84
+
85
+
86
+ def _from_user_geom(iso3: str, geom_str: str) -> Boundary:
87
+ fc = json.loads(geom_str)
88
+ geometry = _featurecollection_to_geometry(fc) if fc.get("type") == "FeatureCollection" else fc
89
+ bbox = _bbox_from_geometry(geometry)
90
+ return Boundary(
91
+ iso3=iso3.upper(),
92
+ bbox=bbox,
93
+ geojson=json.dumps(geometry),
94
+ source="user-provided",
95
+ )
96
+
97
+
98
+ def resolve_boundary(iso3: str, cfg: BoundaryConfig) -> Boundary:
99
+ if not iso3:
100
+ raise ValueError("iso3 must be set on the config")
101
+ key = (iso3.upper(), cfg.geoboundaries_release, cfg.geoboundaries_level)
102
+ with _lock:
103
+ cached = _cache.get(key)
104
+ if cached is not None:
105
+ return cached
106
+
107
+ if cfg.geom:
108
+ boundary = _from_user_geom(iso3, cfg.geom)
109
+ else:
110
+ boundary = _fetch_geoboundaries(iso3, cfg.geoboundaries_release, cfg.geoboundaries_level)
111
+
112
+ with _lock:
113
+ _cache[key] = boundary
114
+ logger.info(
115
+ "Resolved boundary for %s from %s; bbox=%s",
116
+ boundary.iso3,
117
+ boundary.source,
118
+ boundary.bbox,
119
+ )
120
+ return boundary
oex/cli.py ADDED
@@ -0,0 +1,207 @@
1
+ """Typer CLI for oex."""
2
+
3
+ from pathlib import Path
4
+
5
+ import typer
6
+
7
+ from oex.config.loader import (
8
+ apply_overrides,
9
+ iter_configs,
10
+ load_config,
11
+ select_categories,
12
+ )
13
+ from oex.config.schema import RootConfig
14
+ from oex.exporter import Exporter, ExportResult
15
+ from oex.logging_setup import get_logger, setup_logging
16
+ from oex.osm.runner import OsmRunner
17
+ from oex.overture.runner import OvertureRunner
18
+
19
+ app = typer.Typer(
20
+ add_completion=False,
21
+ no_args_is_help=True,
22
+ help="Country-scale OSM and Overture vector exports.",
23
+ )
24
+
25
+
26
+ @app.callback()
27
+ def _global(
28
+ log_level: str = typer.Option("INFO", envvar="LOG_LEVEL"),
29
+ ) -> None:
30
+ setup_logging(level=log_level)
31
+
32
+
33
+ def _resolve_config(
34
+ iso3_or_yaml: str | None,
35
+ configs_dir: Path | None,
36
+ config: Path | None,
37
+ ) -> list[Path | None]:
38
+ if configs_dir is not None:
39
+ return list(iter_configs(configs_dir))
40
+ if config is not None:
41
+ return [config]
42
+ if iso3_or_yaml:
43
+ candidate = Path("configs") / f"{iso3_or_yaml.lower()}.yaml"
44
+ if candidate.exists():
45
+ return [candidate]
46
+ return [None]
47
+
48
+
49
+ def _build_overrides(
50
+ iso3_or_yaml: str | None,
51
+ hdx_push: bool | None,
52
+ output_dir: Path | None,
53
+ osm_engine: str | None = None,
54
+ ) -> dict[str, object]:
55
+ overrides: dict[str, object] = {}
56
+ if iso3_or_yaml and len(iso3_or_yaml) <= 3 and iso3_or_yaml.isalpha():
57
+ overrides["iso3"] = iso3_or_yaml.upper()
58
+ if hdx_push is True:
59
+ overrides["hdx.push"] = True
60
+ if hdx_push is False:
61
+ overrides["hdx.push"] = False
62
+ if output_dir is not None:
63
+ overrides["output.dir"] = str(output_dir)
64
+ if osm_engine is not None:
65
+ overrides["source.osm.engine"] = osm_engine
66
+ return overrides
67
+
68
+
69
+ def _summarise(results: list[ExportResult]) -> int:
70
+ log = get_logger("oex.cli")
71
+ total_fail = sum(r.failed for r in results)
72
+ for r in results:
73
+ log.info(
74
+ "%s/%s: %d ok, %d empty, %d skipped, %d failed in %.1fs",
75
+ r.iso3,
76
+ r.source_name,
77
+ r.succeeded,
78
+ r.empty,
79
+ r.skipped,
80
+ r.failed,
81
+ r.total_duration_s,
82
+ )
83
+ return 0 if total_fail == 0 else 1
84
+
85
+
86
+ def _run_one(
87
+ yaml_path: Path | None,
88
+ overrides: dict[str, object],
89
+ theme: str | None,
90
+ runner_factory,
91
+ ) -> ExportResult:
92
+ cfg: RootConfig = load_config(yaml_path)
93
+ cfg = apply_overrides(cfg, overrides)
94
+ cfg = select_categories(cfg, theme)
95
+ return Exporter(cfg, runner_factory()).run()
96
+
97
+
98
+ def _resolve_args(
99
+ arg1: str | None,
100
+ arg2: str | None,
101
+ configs_dir: Path | None,
102
+ config: Path | None,
103
+ ) -> tuple[str | None, str | None]:
104
+ # When --config or --configs-dir is given, the first positional is the theme,
105
+ # not the iso3 (iso3 comes from the YAML).
106
+ if configs_dir is not None or config is not None:
107
+ return None, arg1 if arg2 is None else arg2
108
+ return arg1, arg2
109
+
110
+
111
+ @app.command("overture")
112
+ def cmd_overture(
113
+ iso3_or_yaml: str | None = typer.Argument(
114
+ None, help="ISO3 like NPL, or name of a YAML in ./configs/"
115
+ ),
116
+ theme: str | None = typer.Argument(None, help="Optional theme override (e.g. buildings)"),
117
+ configs_dir: Path | None = typer.Option(
118
+ None, "--configs-dir", help="Run every YAML in this directory"
119
+ ),
120
+ config: Path | None = typer.Option(None, "--config", "-c", help="Explicit config YAML path"),
121
+ output_dir: Path | None = typer.Option(None, "--output-dir", "-o"),
122
+ hdx_push: bool | None = typer.Option(None, "--hdx-push/--no-hdx-push"),
123
+ ) -> None:
124
+ """Export Overture data."""
125
+ iso3_resolved, theme_resolved = _resolve_args(iso3_or_yaml, theme, configs_dir, config)
126
+ yamls = _resolve_config(iso3_resolved, configs_dir, config)
127
+ overrides = _build_overrides(iso3_resolved, hdx_push, output_dir)
128
+ results = [_run_one(y, overrides, theme_resolved, OvertureRunner) for y in yamls]
129
+ raise typer.Exit(code=_summarise(results))
130
+
131
+
132
+ @app.command("osm")
133
+ def cmd_osm(
134
+ iso3_or_yaml: str | None = typer.Argument(
135
+ None, help="ISO3 like NPL, or name of a YAML in ./configs/"
136
+ ),
137
+ theme: str | None = typer.Argument(None, help="Optional theme override (e.g. buildings)"),
138
+ configs_dir: Path | None = typer.Option(None, "--configs-dir"),
139
+ config: Path | None = typer.Option(None, "--config", "-c"),
140
+ output_dir: Path | None = typer.Option(None, "--output-dir", "-o"),
141
+ hdx_push: bool | None = typer.Option(None, "--hdx-push/--no-hdx-push"),
142
+ engine: str | None = typer.Option(
143
+ None,
144
+ "--engine",
145
+ help="OSM engine: 'geofabrik' (default) or 'planet_parquet'",
146
+ ),
147
+ ) -> None:
148
+ """Export OSM data via the configured engine."""
149
+ iso3_resolved, theme_resolved = _resolve_args(iso3_or_yaml, theme, configs_dir, config)
150
+ yamls = _resolve_config(iso3_resolved, configs_dir, config)
151
+ overrides = _build_overrides(iso3_resolved, hdx_push, output_dir, osm_engine=engine)
152
+ results = [_run_one(y, overrides, theme_resolved, OsmRunner) for y in yamls]
153
+ raise typer.Exit(code=_summarise(results))
154
+
155
+
156
+ @app.command("osm-build-cache")
157
+ def cmd_osm_build_cache(
158
+ pbf: Path | None = typer.Option(None, "--pbf", help="Local PBF path"),
159
+ planet: bool = typer.Option(False, "--planet", help="Download the latest planet PBF"),
160
+ config: Path | None = typer.Option(
161
+ None, "--config", "-c", help="Config to drive theme tag filters"
162
+ ),
163
+ snapshot: str | None = typer.Option(
164
+ None, "--snapshot", help="Snapshot label, defaults to today"
165
+ ),
166
+ themes: str | None = typer.Option(
167
+ None, "--themes", help="Comma-separated theme slugs to limit"
168
+ ),
169
+ ) -> None:
170
+ """Build the planet OSM PBF -> per-theme parquet cache at <cache_dir>/planet/<snapshot>/."""
171
+ from oex.osm.build_cache import build_cache
172
+ from oex.osm.fetch_planet import download_pbf
173
+
174
+ cfg: RootConfig = load_config(config)
175
+
176
+ if planet and pbf is not None:
177
+ raise typer.BadParameter("Pass either --planet or --pbf, not both")
178
+ if not planet and pbf is None:
179
+ raise typer.BadParameter("One of --planet or --pbf is required")
180
+
181
+ if planet:
182
+ src = cfg.source["osm"]
183
+ result = download_pbf(src.pbf_url, src.cache_dir + "/_pbf", md5_url=src.md5_url)
184
+ pbf_path = result.path
185
+ else:
186
+ assert pbf is not None
187
+ pbf_path = pbf
188
+
189
+ theme_list = [t.strip() for t in themes.split(",")] if themes else None
190
+ cache_root = Path(cfg.source["osm"].cache_dir) / "planet"
191
+ manifest = build_cache(
192
+ cfg,
193
+ pbf_path,
194
+ cache_root=cache_root,
195
+ snapshot=snapshot,
196
+ themes_filter=theme_list,
197
+ )
198
+ typer.echo(f"Cache snapshot: {manifest.snapshot}")
199
+ typer.echo(f"Themes built: {[t.theme for t in manifest.themes]}")
200
+
201
+
202
+ def main() -> None:
203
+ app()
204
+
205
+
206
+ if __name__ == "__main__":
207
+ main()
oex/config/__init__.py ADDED
@@ -0,0 +1,35 @@
1
+ """Typed configuration loading."""
2
+
3
+ from oex.config.loader import (
4
+ ConfigError,
5
+ apply_overrides,
6
+ iter_configs,
7
+ load_config,
8
+ select_categories,
9
+ )
10
+ from oex.config.schema import (
11
+ BoundaryConfig,
12
+ CategoryConfig,
13
+ HdxConfig,
14
+ OsmSourceConfig,
15
+ OutputConfig,
16
+ OvertureSourceConfig,
17
+ ParallelConfig,
18
+ RootConfig,
19
+ )
20
+
21
+ __all__ = [
22
+ "BoundaryConfig",
23
+ "CategoryConfig",
24
+ "ConfigError",
25
+ "HdxConfig",
26
+ "OsmSourceConfig",
27
+ "OutputConfig",
28
+ "OvertureSourceConfig",
29
+ "ParallelConfig",
30
+ "RootConfig",
31
+ "apply_overrides",
32
+ "iter_configs",
33
+ "load_config",
34
+ "select_categories",
35
+ ]
oex/config/loader.py ADDED
@@ -0,0 +1,118 @@
1
+ """Layered YAML config: bundled defaults < user YAML < dotlist overrides."""
2
+
3
+ import os
4
+ from collections.abc import Iterator
5
+ from importlib import resources
6
+ from pathlib import Path
7
+ from typing import Any, cast
8
+
9
+ from omegaconf import DictConfig, ListConfig, OmegaConf
10
+
11
+ from oex.config.schema import RootConfig
12
+
13
+
14
+ class ConfigError(ValueError):
15
+ """Raised when a configuration is malformed."""
16
+
17
+
18
+ def _load_yaml(source: str | os.PathLike[str]) -> DictConfig:
19
+ text: str
20
+ if isinstance(source, (str, os.PathLike)) and Path(source).exists():
21
+ text = Path(source).read_text(encoding="utf-8")
22
+ elif isinstance(source, str):
23
+ text = source
24
+ else:
25
+ raise ConfigError(f"Cannot load config from {source!r}")
26
+ cfg = OmegaConf.create(text)
27
+ if not isinstance(cfg, DictConfig):
28
+ raise ConfigError("Top-level YAML must be a mapping")
29
+ return cfg
30
+
31
+
32
+ def _load_defaults() -> DictConfig:
33
+ pkg = resources.files("oex.defaults")
34
+ text = (pkg / "base.yaml").read_text(encoding="utf-8")
35
+ cfg = OmegaConf.create(text)
36
+ if not isinstance(cfg, DictConfig):
37
+ raise ConfigError("base.yaml is malformed")
38
+ return cfg
39
+
40
+
41
+ def _load_categories_file(path: str | os.PathLike[str]) -> ListConfig:
42
+ raw = _load_yaml(path) if Path(str(path)).exists() else _load_yaml(str(path))
43
+ if "categories" in raw and isinstance(raw.categories, ListConfig):
44
+ return raw.categories
45
+ raise ConfigError(f"categories_file {path!r} must contain a top-level `categories:` list")
46
+
47
+
48
+ def load_config(
49
+ user_config: str | os.PathLike[str] | None = None,
50
+ overrides: list[str] | None = None,
51
+ ) -> RootConfig:
52
+ """Build a RootConfig. categories precedence: defaults < categories_file < inline `categories:`."""
53
+ # Merge plain (untyped) configs first so user YAML can replace the
54
+ # categories list wholesale without tripping the structured-list type check.
55
+ merged: DictConfig = _load_defaults()
56
+
57
+ if user_config is not None:
58
+ user = _load_yaml(user_config)
59
+
60
+ if "categories_file" in user and user.categories_file:
61
+ merged.categories = _load_categories_file(str(user.categories_file))
62
+
63
+ if "categories" in user and isinstance(user.categories, ListConfig):
64
+ merged.categories = user.categories
65
+ del user["categories"]
66
+
67
+ merged = cast(DictConfig, OmegaConf.merge(merged, user))
68
+
69
+ if overrides:
70
+ merged = cast(DictConfig, OmegaConf.merge(merged, OmegaConf.from_dotlist(overrides)))
71
+
72
+ OmegaConf.resolve(merged)
73
+
74
+ schema = OmegaConf.structured(RootConfig)
75
+ typed = cast(DictConfig, OmegaConf.merge(schema, merged))
76
+ container: Any = OmegaConf.to_object(typed)
77
+ if not isinstance(container, RootConfig):
78
+ raise ConfigError("Merged config did not resolve to RootConfig")
79
+ return container
80
+
81
+
82
+ def apply_overrides(cfg: RootConfig, overrides: dict[str, Any]) -> RootConfig:
83
+ """Apply a dict of dotted overrides to an already-loaded config."""
84
+ structured: DictConfig = cast(DictConfig, OmegaConf.structured(cfg))
85
+ dotlist = [f"{k}={v}" for k, v in overrides.items() if v is not None]
86
+ if dotlist:
87
+ structured = cast(DictConfig, OmegaConf.merge(structured, OmegaConf.from_dotlist(dotlist)))
88
+ OmegaConf.resolve(structured)
89
+ container: Any = OmegaConf.to_object(structured)
90
+ if not isinstance(container, RootConfig):
91
+ raise ConfigError("Override merge did not resolve to RootConfig")
92
+ return container
93
+
94
+
95
+ def select_categories(cfg: RootConfig, theme: str | None) -> RootConfig:
96
+ """Restrict the config to a single category whose slugified name matches `theme`."""
97
+ if theme is None:
98
+ return cfg
99
+ needle = theme.strip().lower().replace("-", "_").replace(" ", "_")
100
+ kept = [c for c in cfg.categories if c.name.lower().replace(" ", "_") == needle]
101
+ if not kept:
102
+ available = ", ".join(c.name for c in cfg.categories) or "<none>"
103
+ raise ConfigError(f"Theme {theme!r} not found. Available: {available}")
104
+ new_cfg = OmegaConf.to_object(OmegaConf.structured(cfg))
105
+ if not isinstance(new_cfg, RootConfig):
106
+ raise ConfigError("select_categories failed to round-trip RootConfig")
107
+ new_cfg.categories = kept
108
+ return new_cfg
109
+
110
+
111
+ def iter_configs(configs_dir: str | os.PathLike[str]) -> Iterator[Path]:
112
+ root = Path(configs_dir)
113
+ if not root.is_dir():
114
+ raise ConfigError(f"Not a directory: {root}")
115
+ for path in sorted(root.glob("*.yaml")):
116
+ yield path
117
+ for path in sorted(root.glob("*.yml")):
118
+ yield path
oex/config/schema.py ADDED
@@ -0,0 +1,143 @@
1
+ """Typed run configuration."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any
5
+
6
+ OsmTagFilter = dict[str, Any]
7
+
8
+
9
+ @dataclass
10
+ class HdxConfig:
11
+ push: bool = False
12
+ site: str = "demo"
13
+ api_key: str | None = None
14
+ owner_org: str | None = None
15
+ maintainer: str | None = None
16
+ user_agent: str = "oex"
17
+ methodology: str = "Other"
18
+ methodology_other: str = "Open Source Geographic information"
19
+
20
+
21
+ @dataclass
22
+ class DuckdbConfig:
23
+ # 8 retries / 500 ms initial / 2x backoff and a 120 s timeout absorb
24
+ # transient S3 blips so a 200-country batch doesn't abort on one shard.
25
+ http_retries: int = 8
26
+ http_retry_wait_ms: int = 500
27
+ http_retry_backoff: float = 2.0
28
+ http_timeout_ms: int = 120_000
29
+ temp_dir: str = "/tmp/duckdb_temp"
30
+ enable_object_cache: bool = True
31
+
32
+
33
+ @dataclass
34
+ class LoggingConfig:
35
+ level: str = "INFO"
36
+ fmt: str | None = None
37
+
38
+
39
+ @dataclass
40
+ class OutputConfig:
41
+ dir: str = "output"
42
+ formats: list[str] = field(default_factory=lambda: ["gpkg", "shp"])
43
+ metadata: bool = False
44
+
45
+
46
+ @dataclass
47
+ class ParallelConfig:
48
+ enabled: bool = True
49
+ threads: int | None = None
50
+ memory_gb: int | None = None
51
+
52
+
53
+ @dataclass
54
+ class BoundaryConfig:
55
+ geom: str | None = None
56
+ geoboundaries_release: str = "CGAZ"
57
+ geoboundaries_level: str = "ADM0"
58
+
59
+
60
+ @dataclass
61
+ class OvertureSourceConfig:
62
+ enabled: bool = True
63
+ engine: str = "duckdb"
64
+ release: str = "latest"
65
+ s3_region: str = "us-west-2"
66
+ s3_bucket: str = "overturemaps-us-west-2"
67
+
68
+
69
+ @dataclass
70
+ class OsmSourceConfig:
71
+ enabled: bool = True
72
+ engine: str = "geofabrik"
73
+ cache_dir: str = "data/osm"
74
+ snapshot: str = "latest"
75
+ keep_pbf: bool = False
76
+ pbf_url: str = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf"
77
+ md5_url: str = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf.md5"
78
+ geofabrik_index_url: str = "https://download.geofabrik.de/index-v1.json"
79
+ geofabrik_clip_to_boundary: bool = True
80
+
81
+
82
+ @dataclass
83
+ class CategoryHdx:
84
+ title: str | None = None
85
+ notes: str = "Vector data export."
86
+ tags: list[str] = field(default_factory=lambda: ["geodata"])
87
+ license: str = "hdx-odc-odbl"
88
+ license_url: str | None = None
89
+ caveats: str = (
90
+ "Data may contain errors. Verified at the community level only; "
91
+ "individual features may need correction."
92
+ )
93
+
94
+
95
+ @dataclass
96
+ class CategoryOverture:
97
+ enabled: bool = True
98
+ theme: str = ""
99
+ feature_type: str = ""
100
+ select: list[str] = field(default_factory=list)
101
+ where: list[str] = field(default_factory=list)
102
+
103
+
104
+ @dataclass
105
+ class CategoryOsm:
106
+ # `filter` is the quackosm tag filter applied at parquet BUILD time.
107
+ # `where` is SQL applied at QUERY time over the already-built parquet.
108
+ enabled: bool = True
109
+ select: list[str] = field(default_factory=list)
110
+ where: list[str] = field(default_factory=list)
111
+ filter: OsmTagFilter = field(default_factory=dict)
112
+
113
+
114
+ @dataclass
115
+ class CategoryConfig:
116
+ name: str = ""
117
+ formats: list[str] | None = None
118
+ hdx: CategoryHdx = field(default_factory=CategoryHdx)
119
+ overture: CategoryOverture = field(default_factory=CategoryOverture)
120
+ osm: CategoryOsm = field(default_factory=CategoryOsm)
121
+
122
+
123
+ @dataclass
124
+ class RootConfig:
125
+ iso3: str = ""
126
+ key: str = ""
127
+ dataset_name: str | None = None
128
+ subnational: bool = False
129
+ frequency: str = "yearly"
130
+ categories_file: str | None = None
131
+ boundary: BoundaryConfig = field(default_factory=BoundaryConfig)
132
+ output: OutputConfig = field(default_factory=OutputConfig)
133
+ parallel: ParallelConfig = field(default_factory=ParallelConfig)
134
+ duckdb: DuckdbConfig = field(default_factory=DuckdbConfig)
135
+ logging: LoggingConfig = field(default_factory=LoggingConfig)
136
+ hdx: HdxConfig = field(default_factory=HdxConfig)
137
+ source: dict[str, Any] = field(
138
+ default_factory=lambda: {
139
+ "overture": OvertureSourceConfig(),
140
+ "osm": OsmSourceConfig(),
141
+ }
142
+ )
143
+ categories: list[CategoryConfig] = field(default_factory=list)
@@ -0,0 +1 @@
1
+ """Bundled default YAML configuration."""