buildingdata 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ # -*- coding: utf-8 -*-
2
+ from .config import write_config
3
+ from .reference import get_census, get_diagnosis, get_districts, get_gas_network
4
+ from .simulation import get_bdtopo, get_era5_climate
5
+
6
+
7
+ def configure(bucket=None, cache_dir=None, credentials=None):
8
+ """Write or update ~/.config/buildingdata/config.ini.
9
+
10
+ Args:
11
+ bucket (str, optional): GCS bucket name.
12
+ cache_dir (str or pathlib.Path, optional): local cache directory.
13
+ credentials (str or pathlib.Path, optional): path to GCS service
14
+ account JSON. Pass None to use Application Default Credentials.
15
+ """
16
+ write_config(bucket=bucket, cache_dir=cache_dir, credentials=credentials)
buildingdata/_cli.py ADDED
@@ -0,0 +1,29 @@
1
+ # -*- coding: utf-8 -*-
2
+ import argparse
3
+
4
+ from .config import write_config
5
+
6
+
7
+ def main():
8
+ parser = argparse.ArgumentParser(
9
+ prog="buildingdata",
10
+ description="Configure the buildingdata package.",
11
+ )
12
+ sub = parser.add_subparsers(dest="command")
13
+
14
+ cfg_parser = sub.add_parser("configure", help="Write ~/.config/buildingdata/config.ini")
15
+ cfg_parser.add_argument("--bucket", default=None, help="GCS bucket name")
16
+ cfg_parser.add_argument("--cache-dir", default=None, dest="cache_dir", help="Local cache directory")
17
+ cfg_parser.add_argument("--credentials", default=None, help="Path to GCS service account JSON")
18
+
19
+ args = parser.parse_args()
20
+
21
+ if args.command == "configure":
22
+ write_config(
23
+ bucket=args.bucket,
24
+ cache_dir=args.cache_dir,
25
+ credentials=args.credentials,
26
+ )
27
+ print("Configuration saved.")
28
+ else:
29
+ parser.print_help()
buildingdata/cache.py ADDED
@@ -0,0 +1,92 @@
1
+ # -*- coding: utf-8 -*-
2
+ import json
3
+ from datetime import datetime, timezone
4
+ from pathlib import Path
5
+
6
+ from .config import get_cache_dir
7
+ from .exceptions import CacheError
8
+
9
+
10
+ def cache_path(name):
11
+ """Return the local Path where a named artifact should be cached.
12
+
13
+ Args:
14
+ name (str): logical artifact name, e.g. "census_latest.parquet".
15
+
16
+ Returns:
17
+ pathlib.Path: absolute path under the configured cache dir.
18
+ """
19
+ return get_cache_dir() / name
20
+
21
+
22
+ def _sidecar_path(name):
23
+ return get_cache_dir() / (name + ".meta.json")
24
+
25
+
26
+ def is_cached(name):
27
+ """Return True if the artifact file and a valid sidecar both exist.
28
+
29
+ Does not contact GCS. Use needs_refresh() for a freshness check.
30
+
31
+ Args:
32
+ name (str): logical artifact name.
33
+
34
+ Returns:
35
+ bool: True if the local copy can be used without downloading.
36
+ """
37
+ return cache_path(name).exists() and _sidecar_path(name).exists()
38
+
39
+
40
+ def needs_refresh(name, blob):
41
+ """Return True if the local copy is older than the GCS blob.
42
+
43
+ Args:
44
+ name (str): logical artifact name.
45
+ blob (google.cloud.storage.Blob): live blob object from GCS.
46
+
47
+ Returns:
48
+ bool: True if the blob generation has advanced past the cached one.
49
+ """
50
+ sidecar = _sidecar_path(name)
51
+ if not sidecar.exists():
52
+ return True
53
+ try:
54
+ with open(sidecar) as f:
55
+ meta = json.load(f)
56
+ return int(meta.get("generation", -1)) < blob.generation
57
+ except Exception:
58
+ return True
59
+
60
+
61
+ def write_sidecar(name, blob):
62
+ """Write a .meta.json sidecar after a successful download.
63
+
64
+ Args:
65
+ name (str): logical artifact name.
66
+ blob (google.cloud.storage.Blob): the blob that was downloaded.
67
+ """
68
+ sidecar = _sidecar_path(name)
69
+ meta = {
70
+ "blob_name": blob.name,
71
+ "generation": blob.generation,
72
+ "downloaded_at": datetime.now(timezone.utc).isoformat(),
73
+ }
74
+ try:
75
+ with open(sidecar, "w") as f:
76
+ json.dump(meta, f, indent=2)
77
+ except OSError as e:
78
+ raise CacheError(f"Failed to write cache sidecar {sidecar}: {e}") from e
79
+
80
+
81
+ def ensure_subdir(subdir):
82
+ """Return a subdirectory of the cache dir, creating it if absent.
83
+
84
+ Args:
85
+ subdir (str): subdirectory name, e.g. "bdtopo" or "era5".
86
+
87
+ Returns:
88
+ pathlib.Path: absolute path to the subdirectory.
89
+ """
90
+ path = get_cache_dir() / subdir
91
+ path.mkdir(parents=True, exist_ok=True)
92
+ return path
buildingdata/config.py ADDED
@@ -0,0 +1,119 @@
1
+ # -*- coding: utf-8 -*-
2
+ import configparser
3
+ import hashlib
4
+ import os
5
+ from pathlib import Path
6
+
7
+ from platformdirs import user_data_dir
8
+
9
+ _DEFAULT_BUCKET = "building-inference-data"
10
+ _CONFIG_FILE = Path.home() / ".config" / "buildingdata" / "config.ini"
11
+
12
+
13
+ def _install_id():
14
+ """Return a short, stable id for this package installation.
15
+
16
+ Derived from the on-disk location of the installed package, so that
17
+ each install (e.g. a separate virtualenv) maps to a distinct id while
18
+ reinstalling in place keeps the same one.
19
+
20
+ Returns:
21
+ str: 8-char hex digest identifying this installation.
22
+ """
23
+ install_root = str(Path(__file__).resolve().parent)
24
+ return hashlib.sha256(install_root.encode("utf-8")).hexdigest()[:8]
25
+
26
+
27
+ def _default_cache_dir():
28
+ """Return the per-installation default cache directory.
29
+
30
+ Uses the platform user-data dir as the base and namespaces it by
31
+ installation id so multiple installs do not overwrite each other's
32
+ cache.
33
+
34
+ Returns:
35
+ pathlib.Path: absolute path to the default cache directory.
36
+ """
37
+ return Path(user_data_dir("buildingdata")) / "cache" / _install_id()
38
+
39
+
40
+ def _read_config():
41
+ cfg = configparser.ConfigParser()
42
+ if _CONFIG_FILE.exists():
43
+ cfg.read(_CONFIG_FILE)
44
+ return cfg
45
+
46
+
47
+ def get_bucket():
48
+ """Return the GCS bucket name.
49
+
50
+ Precedence: BUILDINGDATA_BUCKET env var → config file → default.
51
+
52
+ Returns:
53
+ str: GCS bucket name without gs:// prefix.
54
+ """
55
+ if "BUILDINGDATA_BUCKET" in os.environ:
56
+ return os.environ["BUILDINGDATA_BUCKET"]
57
+ cfg = _read_config()
58
+ return cfg.get("gcs", "bucket", fallback=_DEFAULT_BUCKET)
59
+
60
+
61
+ def get_cache_dir():
62
+ """Return the local cache directory, creating it if absent.
63
+
64
+ Precedence: BUILDINGDATA_CACHE_DIR env var → config file → default.
65
+
66
+ Returns:
67
+ pathlib.Path: absolute path to the cache directory.
68
+ """
69
+ if "BUILDINGDATA_CACHE_DIR" in os.environ:
70
+ path = Path(os.environ["BUILDINGDATA_CACHE_DIR"])
71
+ else:
72
+ cfg = _read_config()
73
+ raw = cfg.get("cache", "dir", fallback=str(_default_cache_dir()))
74
+ path = Path(raw)
75
+ path.mkdir(parents=True, exist_ok=True)
76
+ return path
77
+
78
+
79
+ def get_credentials_file():
80
+ """Return path to a GCS service account JSON, or None for anonymous access.
81
+
82
+ When None, the storage client reads public buckets anonymously, with no
83
+ authentication required.
84
+
85
+ Precedence: GOOGLE_APPLICATION_CREDENTIALS env var → config file → None.
86
+
87
+ Returns:
88
+ pathlib.Path or None: path to credentials JSON, or None.
89
+ """
90
+ if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ:
91
+ return Path(os.environ["GOOGLE_APPLICATION_CREDENTIALS"])
92
+ cfg = _read_config()
93
+ raw = cfg.get("gcs", "credentials", fallback=None)
94
+ return Path(raw) if raw else None
95
+
96
+
97
+ def write_config(bucket=None, cache_dir=None, credentials=None):
98
+ """Write or update ~/.config/buildingdata/config.ini.
99
+
100
+ Args:
101
+ bucket (str, optional): GCS bucket name.
102
+ cache_dir (str or pathlib.Path, optional): local cache directory.
103
+ credentials (str or pathlib.Path, optional): path to GCS service
104
+ account JSON. Pass None to leave unchanged.
105
+ """
106
+ _CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
107
+ cfg = _read_config()
108
+ if not cfg.has_section("gcs"):
109
+ cfg.add_section("gcs")
110
+ if not cfg.has_section("cache"):
111
+ cfg.add_section("cache")
112
+ if bucket is not None:
113
+ cfg.set("gcs", "bucket", bucket)
114
+ if credentials is not None:
115
+ cfg.set("gcs", "credentials", str(credentials))
116
+ if cache_dir is not None:
117
+ cfg.set("cache", "dir", str(cache_dir))
118
+ with open(_CONFIG_FILE, "w") as f:
119
+ cfg.write(f)
@@ -0,0 +1,17 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+
4
+ class BuildingDataError(Exception):
5
+ """Base exception for buildingdata errors."""
6
+
7
+
8
+ class CacheError(BuildingDataError):
9
+ """Raised when a cache read or write operation fails."""
10
+
11
+
12
+ class RemoteNotAvailableError(BuildingDataError):
13
+ """Raised when a remote endpoint returns a non-success status."""
14
+
15
+
16
+ class ConfigurationError(BuildingDataError):
17
+ """Raised when required configuration (credentials, API key) is missing."""
buildingdata/gcs.py ADDED
@@ -0,0 +1,82 @@
1
+ # -*- coding: utf-8 -*-
2
+ from pathlib import Path
3
+
4
+ from tqdm import tqdm
5
+
6
+ from .config import get_bucket, get_credentials_file
7
+
8
+
9
+ def get_client():
10
+ """Return a google.cloud.storage.Client.
11
+
12
+ Uses the credentials file from config if one is set. Otherwise returns
13
+ an anonymous client, which can read public buckets without any
14
+ authentication or Google Cloud project.
15
+
16
+ Returns:
17
+ google.cloud.storage.Client: storage client.
18
+ """
19
+ from google.cloud import storage
20
+
21
+ creds_file = get_credentials_file()
22
+ if creds_file is not None:
23
+ return storage.Client.from_service_account_json(str(creds_file))
24
+ return storage.Client.create_anonymous_client()
25
+
26
+
27
+ def get_blob(name):
28
+ """Return the Blob object for a given name, or None if not found.
29
+
30
+ Args:
31
+ name (str): blob name inside the configured bucket.
32
+
33
+ Returns:
34
+ google.cloud.storage.Blob or None.
35
+ """
36
+ client = get_client()
37
+ bucket = client.bucket(get_bucket())
38
+ blob = bucket.blob(name)
39
+ blob.reload()
40
+ return blob if blob.exists() else None
41
+
42
+
43
+ def download_blob(name, dest_path, show_progress=True):
44
+ """Download a blob from the configured bucket to dest_path.
45
+
46
+ Args:
47
+ name (str): blob name inside the bucket, e.g. "census_latest.parquet".
48
+ dest_path (pathlib.Path): local destination file path.
49
+ show_progress (bool): display a tqdm progress bar. Defaults to True.
50
+ """
51
+ from google.cloud import storage
52
+
53
+ dest_path = Path(dest_path)
54
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
55
+
56
+ client = get_client()
57
+ bucket = client.bucket(get_bucket())
58
+ blob = bucket.blob(name)
59
+ blob.reload()
60
+
61
+ total = blob.size or 0
62
+ tmp_path = dest_path.with_suffix(dest_path.suffix + ".tmp")
63
+
64
+ with open(tmp_path, "wb") as f:
65
+ if show_progress:
66
+ with tqdm(
67
+ total=total,
68
+ unit="B",
69
+ unit_scale=True,
70
+ desc=name,
71
+ leave=True,
72
+ ) as bar:
73
+ def _callback(chunk):
74
+ bar.update(len(chunk))
75
+
76
+ blob.download_to_file(f, checksum="md5", raw_download=True)
77
+ bar.update(total - bar.n)
78
+ else:
79
+ blob.download_to_file(f)
80
+
81
+ tmp_path.replace(dest_path)
82
+ return blob
@@ -0,0 +1,5 @@
1
+ # -*- coding: utf-8 -*-
2
+ from .census import get_census
3
+ from .diagnosis import get_diagnosis
4
+ from .districts import get_districts
5
+ from .gas_network import get_gas_network
@@ -0,0 +1,48 @@
1
+ # -*- coding: utf-8 -*-
2
+ import polars as pl
3
+
4
+ from ..cache import cache_path, is_cached, needs_refresh, write_sidecar
5
+ from ..exceptions import RemoteNotAvailableError
6
+ from ..gcs import download_blob, get_blob
7
+
8
+ _BLOB_NAME = "census_latest.parquet"
9
+
10
+
11
+ def get_census(city_group_list=None, refresh=False):
12
+ """Return the processed INSEE census DataFrame, optionally pre-filtered.
13
+
14
+ Downloads census_latest.parquet from GCS to the local cache on first call
15
+ (or when refresh=True). Filtering by city_group_list uses Polars lazy scan
16
+ so only the needed rows are loaded into memory.
17
+
18
+ Args:
19
+ city_group_list (list of str, optional): EPCI codes to filter on. If
20
+ None, the full national census is returned. Defaults to None.
21
+ refresh (bool): force re-download even if the cache is warm.
22
+ Defaults to False.
23
+
24
+ Returns:
25
+ polars.DataFrame: census records with columns occupancy_type,
26
+ occupant_status, living_area_class, occupant_count, heating_system,
27
+ residential_type, construction_year_class, district, city,
28
+ city_group, department, region, main_cooking_energy,
29
+ secondary_cooking_energy, ipondl.
30
+ """
31
+ dest = cache_path(_BLOB_NAME)
32
+
33
+ if refresh or not is_cached(_BLOB_NAME):
34
+ blob = get_blob(_BLOB_NAME)
35
+ if blob is None:
36
+ raise RemoteNotAvailableError(f"Blob {_BLOB_NAME!r} not found in GCS bucket.")
37
+ download_blob(_BLOB_NAME, dest)
38
+ write_sidecar(_BLOB_NAME, blob)
39
+ elif refresh:
40
+ blob = get_blob(_BLOB_NAME)
41
+ if blob is not None and needs_refresh(_BLOB_NAME, blob):
42
+ download_blob(_BLOB_NAME, dest)
43
+ write_sidecar(_BLOB_NAME, blob)
44
+
45
+ lf = pl.scan_parquet(dest)
46
+ if city_group_list is not None:
47
+ lf = lf.filter(pl.col("city_group").is_in(city_group_list))
48
+ return lf.collect()
@@ -0,0 +1,58 @@
1
+ # -*- coding: utf-8 -*-
2
+ import polars as pl
3
+
4
+ from ..cache import cache_path, is_cached, needs_refresh, write_sidecar
5
+ from ..exceptions import RemoteNotAvailableError
6
+ from ..gcs import download_blob, get_blob
7
+
8
+ _BLOB_NAME = "energy_performance_diagnosis_latest.parquet"
9
+
10
+ # Heating/DHW energies excluded from inference (no meaningful DPE data for coal)
11
+ _EXCLUDED_ENERGIES = ["Charbon"]
12
+
13
+
14
+ def get_diagnosis(refresh=False):
15
+ """Return the cleaned DPE energy performance diagnosis DataFrame.
16
+
17
+ Downloads energy_performance_diagnosis_latest.parquet from GCS on first
18
+ call. Applies the filtering and type casts that previously lived in
19
+ buildingmodel/io/diagnosis.py so that buildingmodel receives a clean frame.
20
+
21
+ Args:
22
+ refresh (bool): force re-download even if the cache is warm.
23
+ Defaults to False.
24
+
25
+ Returns:
26
+ polars.DataFrame: DPE records with columns heating_system (Categorical),
27
+ region (Int64), backup_heating_energy, dhw_energy, and all U-value
28
+ and efficiency columns used by inference/building_attributes.py.
29
+ """
30
+ dest = cache_path(_BLOB_NAME)
31
+
32
+ if not is_cached(_BLOB_NAME) or refresh:
33
+ blob = get_blob(_BLOB_NAME)
34
+ if blob is None:
35
+ raise RemoteNotAvailableError(f"Blob {_BLOB_NAME!r} not found in GCS bucket.")
36
+ if not is_cached(_BLOB_NAME) or needs_refresh(_BLOB_NAME, blob):
37
+ download_blob(_BLOB_NAME, dest)
38
+ write_sidecar(_BLOB_NAME, blob)
39
+
40
+ df = pl.read_parquet(dest)
41
+
42
+ # Remove records with coal heating/DHW — no useful inference data
43
+ df = df.filter(
44
+ ~pl.col("backup_heating_energy").is_in(_EXCLUDED_ENERGIES)
45
+ & ~pl.col("dhw_energy").is_in(_EXCLUDED_ENERGIES)
46
+ )
47
+
48
+ df = df.with_columns([
49
+ pl.col("heating_system").cast(pl.Categorical),
50
+ pl.col("region").cast(pl.Int64),
51
+ ])
52
+
53
+ if "living_area" in df.columns:
54
+ df = df.drop(["living_area"])
55
+ if "living_area_class" in df.columns:
56
+ df = df.drop(["living_area_class"])
57
+
58
+ return df
@@ -0,0 +1,41 @@
1
+ # -*- coding: utf-8 -*-
2
+ import geopandas as gpd
3
+
4
+ from ..cache import cache_path, is_cached, needs_refresh, write_sidecar
5
+ from ..exceptions import RemoteNotAvailableError
6
+ from ..gcs import download_blob, get_blob
7
+
8
+ _BLOB_NAME = "districts_latest.parquet"
9
+
10
+
11
+ def get_districts(bbox=None, refresh=False):
12
+ """Return the IRIS district boundary GeoDataFrame in EPSG:2154.
13
+
14
+ Downloads districts_latest.parquet from GCS on first call.
15
+
16
+ Args:
17
+ bbox (tuple of float, optional): (minx, miny, maxx, maxy) bounding box
18
+ in EPSG:2154 passed to geopandas.read_parquet for spatial filtering.
19
+ If None, the full national layer is returned. Defaults to None.
20
+ refresh (bool): force re-download even if the cache is warm.
21
+ Defaults to False.
22
+
23
+ Returns:
24
+ geopandas.GeoDataFrame: IRIS polygons in EPSG:2154 with columns
25
+ district, city, city_name, city_group, department, region,
26
+ has_network_city_level, has_network_grdf_data, geometry.
27
+ """
28
+ dest = cache_path(_BLOB_NAME)
29
+
30
+ if not is_cached(_BLOB_NAME) or refresh:
31
+ blob = get_blob(_BLOB_NAME)
32
+ if blob is None:
33
+ raise RemoteNotAvailableError(f"Blob {_BLOB_NAME!r} not found in GCS bucket.")
34
+ if not is_cached(_BLOB_NAME) or needs_refresh(_BLOB_NAME, blob):
35
+ download_blob(_BLOB_NAME, dest)
36
+ write_sidecar(_BLOB_NAME, blob)
37
+
38
+ kwargs = {}
39
+ if bbox is not None:
40
+ kwargs["bbox"] = bbox
41
+ return gpd.read_parquet(dest, **kwargs)
@@ -0,0 +1,40 @@
1
+ # -*- coding: utf-8 -*-
2
+ import geopandas as gpd
3
+
4
+ from ..cache import cache_path, is_cached, needs_refresh, write_sidecar
5
+ from ..exceptions import RemoteNotAvailableError
6
+ from ..gcs import download_blob, get_blob
7
+
8
+ _BLOB_NAME = "gas_network_route_latest.gpkg"
9
+
10
+
11
+ def get_gas_network(bbox=None, refresh=False):
12
+ """Return the GRDF gas pipeline network GeoDataFrame in EPSG:2154.
13
+
14
+ Downloads gas_network_route_latest.gpkg from GCS on first call (~789 MB).
15
+ The bbox argument is passed to geopandas.read_file for GDAL-level spatial
16
+ filtering so that only the local area is loaded from the GeoPackage.
17
+
18
+ Args:
19
+ bbox (tuple of float, optional): (minx, miny, maxx, maxy) in EPSG:2154.
20
+ If None, the full national layer is returned. Defaults to None.
21
+ refresh (bool): force re-download even if the cache is warm.
22
+ Defaults to False.
23
+
24
+ Returns:
25
+ geopandas.GeoDataFrame: gas pipeline routes in EPSG:2154.
26
+ """
27
+ dest = cache_path(_BLOB_NAME)
28
+
29
+ if not is_cached(_BLOB_NAME) or refresh:
30
+ blob = get_blob(_BLOB_NAME)
31
+ if blob is None:
32
+ raise RemoteNotAvailableError(f"Blob {_BLOB_NAME!r} not found in GCS bucket.")
33
+ if not is_cached(_BLOB_NAME) or needs_refresh(_BLOB_NAME, blob):
34
+ download_blob(_BLOB_NAME, dest)
35
+ write_sidecar(_BLOB_NAME, blob)
36
+
37
+ kwargs = {}
38
+ if bbox is not None:
39
+ kwargs["bbox"] = bbox
40
+ return gpd.read_file(dest, **kwargs)
@@ -0,0 +1,3 @@
1
+ # -*- coding: utf-8 -*-
2
+ from .bdtopo import get_bdtopo
3
+ from .era5 import get_era5_climate
@@ -0,0 +1,87 @@
1
+ # -*- coding: utf-8 -*-
2
+ import geopandas as gpd
3
+ import requests
4
+
5
+ from ..cache import ensure_subdir
6
+ from ..exceptions import RemoteNotAvailableError
7
+ from ..reference.districts import get_districts
8
+
9
+ # IGN Géoplateforme WFS endpoint for BDTOPO buildings
10
+ _IGN_BASE_URL = (
11
+ "https://data.geopf.fr/wfs/ows"
12
+ "?SERVICE=WFS"
13
+ "&VERSION=2.0.0"
14
+ "&REQUEST=GetFeature"
15
+ "&TYPENAMES=BDTOPO_V3:batiment"
16
+ "&OUTPUTFORMAT=application/json"
17
+ "&SRSNAME=EPSG:2154"
18
+ "&BBOX={minx},{miny},{maxx},{maxy},urn:ogc:def:crs:EPSG::2154"
19
+ )
20
+
21
+ _TIMEOUT = 120 # seconds
22
+
23
+
24
+ def _cache_path(iris_code):
25
+ subdir = ensure_subdir("bdtopo")
26
+ return subdir / f"bdtopo_iris_{iris_code}.parquet"
27
+
28
+
29
+ def _sidecar_path(iris_code):
30
+ subdir = ensure_subdir("bdtopo")
31
+ return subdir / f"bdtopo_iris_{iris_code}.parquet.meta.json"
32
+
33
+
34
+ def get_bdtopo(iris_code, refresh=False):
35
+ """Download BDTOPO buildings for one IRIS from the IGN Géoplateforme WFS.
36
+
37
+ The GeoDataFrame is cached as a parquet file under
38
+ ~/.cache/buildingdata/bdtopo/bdtopo_iris_<iris_code>.parquet.
39
+ Column names are the raw French BDTOPO attribute names (hauteur,
40
+ nombre_de_logements, etc.) so the result can be passed directly as
41
+ building_data to buildingmodel.Simulation.
42
+
43
+ Args:
44
+ iris_code (str): 9-digit IRIS code, e.g. "461290000".
45
+ refresh (bool): force re-download even if cached. Defaults to False.
46
+
47
+ Returns:
48
+ geopandas.GeoDataFrame: BDTOPO buildings in EPSG:2154.
49
+
50
+ Raises:
51
+ RemoteNotAvailableError: if the IGN endpoint returns a non-200 status
52
+ or returns no features for the given IRIS code.
53
+ """
54
+ dest = _cache_path(iris_code)
55
+
56
+ if dest.exists() and not refresh:
57
+ return gpd.read_parquet(dest)
58
+
59
+ # Get the district geometry to derive the bounding box and filter buildings
60
+ districts = get_districts()
61
+ iris_geom = districts[districts["district"] == iris_code]
62
+ if iris_geom.empty:
63
+ raise RemoteNotAvailableError(f"IRIS code {iris_code!r} not found in districts.")
64
+
65
+ district_geom = iris_geom.iloc[0].geometry
66
+ bbox = district_geom.bounds # (minx, miny, maxx, maxy)
67
+
68
+ url = _IGN_BASE_URL.format(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3])
69
+ response = requests.get(url, timeout=_TIMEOUT)
70
+ if response.status_code != 200:
71
+ raise RemoteNotAvailableError(
72
+ f"IGN Géoplateforme returned HTTP {response.status_code} for IRIS {iris_code!r}."
73
+ )
74
+
75
+ features = response.json()["features"]
76
+ if not features:
77
+ raise RemoteNotAvailableError(
78
+ f"IGN Géoplateforme returned no buildings for IRIS {iris_code!r}."
79
+ )
80
+
81
+ gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:2154")
82
+
83
+ # Filter to keep only buildings that intersect with the district geometry
84
+ gdf = gdf[gdf.geometry.intersects(district_geom)].copy()
85
+
86
+ gdf.to_parquet(dest)
87
+ return gdf