fimeval 0.1.57__py3-none-any.whl → 0.1.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,195 @@
1
+ """
2
+ Author: Supath Dhital
3
+ Date CreatedL January 2026
4
+
5
+ Description: This will extract Microsoft Building Footprints using ArcGIS REST API for a given boundary.
6
+ """
7
+
8
+ import geopandas as gpd
9
+ import requests
10
+ import pandas as pd
11
+ from pathlib import Path
12
+ from typing import Union, Optional
13
+
14
+
15
+ # Main class
16
+ class getBuildingFootprint:
17
+ """Extract Microsoft Building Footprints within a boundary using spatial queries."""
18
+
19
+ MSBFP_URL = "https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/MSBFP2/FeatureServer/0"
20
+
21
+ def __init__(
22
+ self,
23
+ boundary: Union[str, Path, gpd.GeoDataFrame],
24
+ layer: Optional[str] = None,
25
+ output_dir: Optional[Union[str, Path]] = None,
26
+ service_url: Optional[str] = None,
27
+ ):
28
+ """
29
+ Parameters
30
+ ----------
31
+ boundary : str, Path, or GeoDataFrame
32
+ Boundary as file path or GeoDataFrame
33
+ layer : str, optional
34
+ Layer name if boundary is a geopackage with multiple layers
35
+ service_url : str, optional
36
+ Custom ArcGIS feature service URL
37
+ """
38
+ self.boundary = self._load_boundary(boundary, layer)
39
+ self.service_url = service_url or self.MSBFP_URL
40
+
41
+ # Setup output directory
42
+ if output_dir is None:
43
+ output_dir = Path.cwd() / "BFOutputs"
44
+ else:
45
+ output_dir = Path(output_dir)
46
+ output_dir.mkdir(parents=True, exist_ok=True)
47
+
48
+ # Run the code
49
+ self.extract(output_dir=output_dir)
50
+
51
+ def _load_boundary(
52
+ self, boundary: Union[str, Path, gpd.GeoDataFrame], layer: Optional[str]
53
+ ) -> gpd.GeoDataFrame:
54
+ """Load and validate boundary."""
55
+ if isinstance(boundary, gpd.GeoDataFrame):
56
+ gdf = boundary.copy()
57
+ else:
58
+ kwargs = {"layer": layer} if layer else {}
59
+ gdf = gpd.read_file(boundary, **kwargs)
60
+
61
+ # Ensure WGS84
62
+ if gdf.crs != "EPSG:4326":
63
+ gdf = gdf.to_crs("EPSG:4326")
64
+
65
+ return gdf
66
+
67
+ def extract(
68
+ self,
69
+ output_dir: Optional[Union[str, Path]] = None,
70
+ output_filename: str = "building_footprints.gpkg",
71
+ batch_size: int = 2000,
72
+ timeout: int = 60,
73
+ verbose: bool = True,
74
+ ) -> gpd.GeoDataFrame:
75
+ """
76
+ Extract building footprints within the boundary.
77
+
78
+ Parameters
79
+ ----------
80
+ output_dir : str or Path, optional
81
+ Output directory (defaults to cwd/building_footprints)
82
+ output_filename : str, default="building_footprints.gpkg"
83
+ Output filename
84
+ batch_size : int, default=2000
85
+ Number of features to retrieve per request
86
+ timeout : int, default=60
87
+ Request timeout in seconds
88
+ verbose : bool, default=True
89
+ Print progress messages
90
+
91
+ Returns
92
+ -------
93
+ GeoDataFrame
94
+ Extracted building footprints
95
+ """
96
+
97
+ # Get bounding box
98
+ xmin, ymin, xmax, ymax = self.boundary.total_bounds
99
+
100
+ if verbose:
101
+ print(f"Querying {self.service_url}...")
102
+
103
+ # Query the service
104
+ all_features = []
105
+ offset = 0
106
+ query_url = f"{self.service_url}/query"
107
+
108
+ while True:
109
+ params = {
110
+ "f": "geojson",
111
+ "where": "1=1",
112
+ "geometry": f"{xmin},{ymin},{xmax},{ymax}",
113
+ "geometryType": "esriGeometryEnvelope",
114
+ "inSR": "4326",
115
+ "spatialRel": "esriSpatialRelIntersects",
116
+ "outFields": "*",
117
+ "returnGeometry": "true",
118
+ "outSR": "4326",
119
+ "resultOffset": offset,
120
+ "resultRecordCount": batch_size,
121
+ }
122
+
123
+ try:
124
+ response = requests.get(query_url, params=params, timeout=timeout)
125
+
126
+ if response.status_code != 200:
127
+ if verbose:
128
+ print(f"Error {response.status_code}")
129
+ break
130
+
131
+ data = response.json()
132
+
133
+ if "error" in data:
134
+ if verbose:
135
+ print(f"Server error: {data['error'].get('message')}")
136
+ break
137
+
138
+ if "features" in data and data["features"]:
139
+ batch_gdf = gpd.GeoDataFrame.from_features(
140
+ data["features"], crs="EPSG:4326"
141
+ )
142
+ all_features.append(batch_gdf)
143
+
144
+ if verbose:
145
+ total = sum(len(gdf) for gdf in all_features)
146
+
147
+ if len(data["features"]) < batch_size:
148
+ break
149
+ offset += batch_size
150
+ else:
151
+ break
152
+
153
+ except requests.exceptions.Timeout:
154
+ if verbose:
155
+ print(" Request timed out, retrying...")
156
+ continue
157
+ except Exception as e:
158
+ if verbose:
159
+ print(f" Error: {e}")
160
+ break
161
+
162
+ if not all_features:
163
+ if verbose:
164
+ print("No features found.")
165
+ return gpd.GeoDataFrame()
166
+
167
+ # Combine and process
168
+ gdf = pd.concat(all_features, ignore_index=True)
169
+ gdf = gpd.GeoDataFrame(gdf, crs="EPSG:4326")
170
+
171
+ # Remove duplicates
172
+ for id_field in ["OBJECTID", "FID", "ID"]:
173
+ if id_field in gdf.columns:
174
+ initial = len(gdf)
175
+ gdf = gdf.drop_duplicates(subset=[id_field])
176
+ if verbose and (initial - len(gdf)) > 0:
177
+ print(f"Removed {initial - len(gdf)} duplicates")
178
+ break
179
+
180
+ # Clip to exact boundary
181
+ if verbose:
182
+ print(f"Clipping {len(gdf)} features to boundary...")
183
+ gdf = gpd.clip(gdf, self.boundary)
184
+
185
+ # Save
186
+ output_path = output_dir / output_filename
187
+ gdf.to_file(output_path, driver="GPKG")
188
+
189
+ if verbose:
190
+ print(f"\n{'='*60}")
191
+ print(f"SUCCESS: Saved {len(gdf)} buildings to:")
192
+ print(f" {output_path}")
193
+ print(f"{'='*60}")
194
+
195
+ return gdf
@@ -10,6 +10,9 @@ import seaborn as sns
10
10
  import matplotlib.pyplot as plt
11
11
  import matplotlib.gridspec as gridspec
12
12
 
13
+ # Import building footprint module
14
+ from .arcgis_API import getBuildingFootprint
15
+
13
16
 
14
17
  def Changeintogpkg(input_path, output_dir, layer_name):
15
18
  input_path = str(input_path)
@@ -355,38 +358,12 @@ def detect_shapefile(folder):
355
358
  return None
356
359
 
357
360
 
358
- def ensure_pyspark(version: str | None = "3.5.4") -> None:
359
- """Install pyspark at runtime via `uv pip` into this env (no-op if present)."""
360
- import importlib, shutil, subprocess, sys, re
361
-
362
- try:
363
- import importlib.util
364
-
365
- if importlib.util.find_spec("pyspark"):
366
- return
367
- except Exception:
368
- pass
369
- uv = shutil.which("uv")
370
- if not uv:
371
- raise RuntimeError(
372
- "`uv` not found on PATH. Please install uv or add it to PATH."
373
- )
374
- if version is None:
375
- spec = "pyspark"
376
- else:
377
- v = version.strip()
378
- spec = f"pyspark{v}" if re.match(r"^[<>=!~]", v) else f"pyspark=={v}"
379
- subprocess.check_call([uv, "pip", "install", "--python", sys.executable, spec])
380
-
381
-
382
361
  def EvaluationWithBuildingFootprint(
383
362
  main_dir,
384
363
  method_name,
385
364
  output_dir,
386
- country=None,
387
365
  building_footprint=None,
388
366
  shapefile_dir=None,
389
- geeprojectID=None,
390
367
  ):
391
368
  tif_files_main = glob.glob(os.path.join(main_dir, "*.tif"))
392
369
  if tif_files_main:
@@ -410,31 +387,23 @@ def EvaluationWithBuildingFootprint(
410
387
 
411
388
  building_footprintMS = building_footprint
412
389
 
390
+ # If no building footprint provided, extract using ArcGIS API
413
391
  if building_footprintMS is None:
414
- ensure_pyspark()
415
- from .microsoftBF import BuildingFootprintwithISO
416
-
417
392
  out_dir = os.path.join(method_path, "BuildingFootprint")
418
393
  if not os.path.exists(out_dir):
419
394
  os.makedirs(out_dir)
420
395
  EX_building_footprint = find_existing_footprint(out_dir)
421
- if not EX_building_footprint:
396
+ if EX_building_footprint:
397
+ building_footprintMS = EX_building_footprint
398
+ else:
422
399
  boundary_dir = shapefile_dir if shapefile_dir else boundary
423
-
424
- if geeprojectID:
425
- BuildingFootprintwithISO(
426
- country,
427
- boundary_dir,
428
- out_dir,
429
- geeprojectID=geeprojectID,
430
- )
431
- else:
432
- BuildingFootprintwithISO(country, boundary_dir, out_dir)
433
- building_footprintMS = os.path.join(
434
- out_dir, f"building_footprint.gpkg"
400
+ getBuildingFootprint(
401
+ boundary=boundary_dir,
402
+ output_dir=out_dir,
435
403
  )
436
- else:
437
- building_footprintMS = EX_building_footprint
404
+ # After downloading, find the newly created footprint
405
+ building_footprintMS = find_existing_footprint(out_dir)
406
+
438
407
  process_TIFF(
439
408
  tif_files,
440
409
  contingency_files,
@@ -471,33 +440,22 @@ def EvaluationWithBuildingFootprint(
471
440
  building_footprintMS = building_footprint
472
441
 
473
442
  if building_footprintMS is None:
474
- ensure_pyspark()
475
- from .microsoftBF import BuildingFootprintwithISO
476
-
477
443
  out_dir = os.path.join(method_path, "BuildingFootprint")
478
444
  if not os.path.exists(out_dir):
479
445
  os.makedirs(out_dir)
480
446
  EX_building_footprint = find_existing_footprint(out_dir)
481
- if not EX_building_footprint:
447
+ if EX_building_footprint:
448
+ building_footprintMS = EX_building_footprint
449
+ else:
482
450
  boundary_dir = (
483
451
  shapefile_dir if shapefile_dir else boundary
484
452
  )
485
- if geeprojectID:
486
- BuildingFootprintwithISO(
487
- country,
488
- boundary_dir,
489
- out_dir,
490
- geeprojectID=geeprojectID,
491
- )
492
- else:
493
- BuildingFootprintwithISO(
494
- country, boundary_dir, out_dir
495
- )
496
- building_footprintMS = os.path.join(
497
- out_dir, f"building_footprint.gpkg"
453
+ getBuildingFootprint(
454
+ boundary=boundary_dir,
455
+ output_dir=out_dir,
498
456
  )
499
- else:
500
- building_footprintMS = EX_building_footprint
457
+ # After downloading, find the newly created footprint
458
+ building_footprintMS = find_existing_footprint(out_dir)
501
459
 
502
460
  process_TIFF(
503
461
  tif_files,
@@ -1,6 +1,6 @@
1
1
  from .evaluationFIM import EvaluateFIM
2
2
  from .printcontingency import PrintContingencyMap
3
3
  from .plotevaluationmetrics import PlotEvaluationMetrics
4
- from .PWBs3 import get_PWB
4
+ from .water_bodies import get_PWB, ExtractPWB
5
5
 
6
- __all__ = ["EvaluateFIM", "PrintContingencyMap", "PlotEvaluationMetrics", "get_PWB"]
6
+ __all__ = ["EvaluateFIM", "PrintContingencyMap", "PlotEvaluationMetrics", "get_PWB", "ExtractPWB"]
@@ -1,3 +1,7 @@
1
+ """
2
+ Author: Supath Dhital
3
+ Date Updated: January 2026
4
+ """
1
5
  import os
2
6
  import re
3
7
  import numpy as np
@@ -11,6 +15,7 @@ import pandas as pd
11
15
  from rasterio.warp import reproject, Resampling
12
16
  from rasterio.io import MemoryFile
13
17
  from rasterio import features
18
+ from shapely.geometry import shape
14
19
  from rasterio.mask import mask
15
20
 
16
21
  os.environ["CHECK_DISK_FREE_SPACE"] = "NO"
@@ -19,9 +24,9 @@ import warnings
19
24
 
20
25
  warnings.filterwarnings("ignore", category=rasterio.errors.ShapeSkipWarning)
21
26
 
22
- from .methods import AOI, smallest_extent, convex_hull, get_smallest_raster_path
27
+ from .methods import AOI, convex_hull, smallest_extent, get_smallest_raster_path
23
28
  from .metrics import evaluationmetrics
24
- from .PWBs3 import get_PWB
29
+ from .water_bodies import ExtractPWB
25
30
  from ..utilis import MakeFIMsUniform, benchmark_name, find_best_boundary
26
31
  from ..setup_benchFIM import ensure_benchmark
27
32
 
@@ -68,7 +73,7 @@ def fix_permissions(path):
68
73
 
69
74
  # Function for the evalution of the model
70
75
  def evaluateFIM(
71
- benchmark_path, candidate_paths, gdf, folder, method, output_dir, shapefile=None
76
+ benchmark_path, candidate_paths, PWB_Dir, folder, method, output_dir, shapefile=None
72
77
  ):
73
78
  # Lists to store evaluation metrics
74
79
  csi_values = []
@@ -129,8 +134,23 @@ def evaluateFIM(
129
134
  benchmark_nodata = src1.nodata
130
135
  benchmark_crs = src1.crs
131
136
  b_profile = src1.profile
137
+
138
+ #Getting the correct geometry shape and crs to extract PWB
139
+ boundary_shape = shape(bounding_geom[0])
140
+ boundary_gdf = gpd.GeoDataFrame(geometry=[boundary_shape], crs=benchmark_crs)
141
+
142
+ #Proceed the masking
132
143
  out_image1[out_image1 == benchmark_nodata] = 0
133
144
  out_image1 = np.where(out_image1 > 0, 2, 0).astype(np.float32)
145
+
146
+ #If PWB_Dir is provided, use the local PWB shapefile, else download from ArcGIS API
147
+ if PWB_Dir is not None:
148
+ gdf = gpd.read_file(PWB_Dir)
149
+ else:
150
+ #Get the permanent water bodies from ArcGIS REST API
151
+ pwb_obj = ExtractPWB(boundary = boundary_gdf, save = False)
152
+ gdf = pwb_obj.gdf
153
+
134
154
  gdf = gdf.to_crs(benchmark_crs)
135
155
  shapes1 = [
136
156
  geom for geom in gdf.geometry if geom is not None and not geom.is_empty
@@ -394,23 +414,18 @@ def safe_delete_folder(folder_path):
394
414
 
395
415
  def EvaluateFIM(
396
416
  main_dir,
397
- method_name=None,
398
- output_dir=None,
417
+ method_name=None,
418
+ output_dir=None,
399
419
  PWB_dir=None,
400
420
  shapefile_dir=None,
401
421
  target_crs=None,
402
422
  target_resolution=None,
403
423
  benchmark_dict=None,
404
424
  ):
405
- if output_dir is None:
425
+ if output_dir is None:
406
426
  output_dir = os.path.join(os.getcwd(), "Evaluation_Results")
407
427
 
408
428
  main_dir = Path(main_dir)
409
- # Read the permanent water bodies
410
- if PWB_dir is None:
411
- gdf = get_PWB()
412
- else:
413
- gdf = gpd.read_file(PWB_dir)
414
429
 
415
430
  # Grant the permission to the main directory
416
431
  fix_permissions(main_dir)
@@ -429,8 +444,8 @@ def EvaluateFIM(
429
444
  if benchmark_path and candidate_path:
430
445
  if method_name is None:
431
446
  local_method = "AOI"
432
-
433
- #For single case, if user have explicitly send boundary, use that, else use the boundary from the benchmark FIM evaluation
447
+
448
+ # For single case, if user have explicitly send boundary, use that, else use the boundary from the benchmark FIM evaluation
434
449
  if shapefile_dir is not None:
435
450
  local_shapefile = shapefile_dir
436
451
  else:
@@ -444,20 +459,29 @@ def EvaluateFIM(
444
459
  local_shapefile = str(boundary)
445
460
  else:
446
461
  local_method = method_name
447
- local_shapefile = shapefile_dir
462
+ local_shapefile = shapefile_dir
448
463
 
449
464
  print(f"**Flood Inundation Evaluation of {folder_dir.name}**")
450
465
  try:
451
466
  Metrics = evaluateFIM(
452
467
  benchmark_path,
453
468
  candidate_path,
454
- gdf,
469
+ PWB_dir,
455
470
  folder_dir,
456
471
  local_method,
457
472
  output_dir,
458
- shapefile=local_shapefile,
473
+ shapefile=local_shapefile,
459
474
  )
460
- print("\n", Metrics, "\n")
475
+
476
+ # Print results in structured table format with 3 decimal points
477
+ candidate_names = [os.path.splitext(os.path.basename(path))[0] for path in candidate_path]
478
+ df_display = pd.DataFrame.from_dict(Metrics, orient='index')
479
+ df_display.columns = candidate_names
480
+ df_display.reset_index(inplace=True)
481
+ df_display.rename(columns={'index': 'Metrics'}, inplace=True)
482
+ print("\n")
483
+ print(df_display.to_string(index=False, float_format='%.3f'))
484
+ print("\n")
461
485
  except Exception as e:
462
486
  print(f"Error evaluating {folder_dir.name}: {e}")
463
487
  else:
@@ -470,7 +494,7 @@ def EvaluateFIM(
470
494
  if TIFFfiles_main_dir:
471
495
 
472
496
  # Ensure benchmark is present if needed
473
- TIFFfiles_main_dir = ensure_benchmark(
497
+ TIFFfiles_main_dir = ensure_benchmark(
474
498
  main_dir, TIFFfiles_main_dir, benchmark_dict
475
499
  )
476
500
 
@@ -494,12 +518,10 @@ def EvaluateFIM(
494
518
  tif_files = list(folder.glob("*.tif"))
495
519
 
496
520
  if tif_files:
497
- processing_folder = folder / "processing"
521
+ processing_folder = folder / "processing"
498
522
  try:
499
523
  # Ensure benchmark is present if needed
500
- tif_files = ensure_benchmark(
501
- folder, tif_files, benchmark_dict
502
- )
524
+ tif_files = ensure_benchmark(folder, tif_files, benchmark_dict)
503
525
 
504
526
  MakeFIMsUniform(
505
527
  folder,
@@ -517,5 +539,4 @@ def EvaluateFIM(
517
539
  else:
518
540
  print(
519
541
  f"Skipping {folder.name} as it doesn't contain any tif files."
520
- )
521
-
542
+ )
@@ -101,7 +101,9 @@ def getContingencyMap(raster_path, method_path):
101
101
  base_name = os.path.basename(raster_path).split(".")[0]
102
102
  output_path = os.path.join(plot_dir, f"{base_name}.png")
103
103
  plt.savefig(output_path, dpi=500, bbox_inches="tight")
104
- plt.show()
104
+ plt.show(block=False)
105
+ plt.pause(5.0)
106
+ plt.close()
105
107
 
106
108
 
107
109
  def PrintContingencyMap(main_dir, method_name, out_dir):
@@ -0,0 +1,175 @@
1
+ """
2
+ Author: Supath Dhital
3
+ Date Created: January 2026
4
+
5
+ Description: This module extracts permanent water bodies
6
+ using the ArcGIS REST API and AWS S3 for a given boundary file. The mechanism is using AWS, it retrieve all the US permanent water bodies shapefile from S3 bucket and then clip on the boundary.
7
+
8
+ This FIMeval module now uses the ArcGIS REST API to extract water bodies within a specified boundary. As it query data for only specified boundary, it is more efficient and faster than downloading the entire US water bodies dataset.
9
+ """
10
+
11
+ # import Libraries
12
+ import geopandas as gpd
13
+ import boto3
14
+ import botocore
15
+ import os
16
+ import tempfile
17
+ import requests
18
+ import pandas as pd
19
+ import numpy as np
20
+ import json
21
+ from pathlib import Path
22
+ from typing import Union, Optional
23
+ from shapely.geometry import box
24
+
25
+ #USING ANONYMOUS S3 CLIENT TO ACCESS PUBLIC DATA
26
+ # Initialize an anonymous S3 client
27
+ s3 = boto3.client(
28
+ "s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED)
29
+ )
30
+
31
+ bucket_name = "sdmlab"
32
+ pwb_folder = "PWB/"
33
+
34
+
35
+ def PWB_inS3(s3_client, bucket, prefix):
36
+ """Download all components of a shapefile from S3 into a temporary directory."""
37
+ tmp_dir = tempfile.mkdtemp()
38
+ response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)
39
+ if "Contents" not in response:
40
+ raise ValueError("No files found in the specified S3 folder.")
41
+
42
+ for obj in response["Contents"]:
43
+ file_key = obj["Key"]
44
+ file_name = os.path.basename(file_key)
45
+ if file_name.endswith((".shp", ".shx", ".dbf", ".prj", ".cpg")):
46
+ local_path = os.path.join(tmp_dir, file_name)
47
+ s3_client.download_file(bucket, file_key, local_path)
48
+
49
+ shp_files = [f for f in os.listdir(tmp_dir) if f.endswith(".shp")]
50
+ if not shp_files:
51
+ raise ValueError("No .shp file found after download.")
52
+
53
+ shp_path = os.path.join(tmp_dir, shp_files[0])
54
+ return shp_path
55
+
56
+
57
+ def get_PWB():
58
+ shp_path = PWB_inS3(s3, bucket_name, pwb_folder)
59
+ pwb = gpd.read_file(shp_path)
60
+ return pwb
61
+
62
+
63
+ #USING ARCGIS REST TO ACCESS PUBLIC DATA- More fast
64
+ class ExtractPWB:
65
+ SERVICE_URL = "https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/USA_Detailed_Water_Bodies/FeatureServer/0"
66
+
67
+ def __init__(
68
+ self,
69
+ boundary: Union[str, Path, gpd.GeoDataFrame],
70
+ layer: Optional[str] = None,
71
+ output_dir: Optional[Union[str, Path]] = None,
72
+ save: bool = True,
73
+ output_filename: str = "permanent_water.gpkg"
74
+ ):
75
+ self.boundary_gdf = self._load_boundary(boundary, layer)
76
+ self.output_dir = Path(output_dir) if output_dir else Path.cwd() / "PWBOutputs"
77
+
78
+ # We store the final result in self.gdf so it can be accessed after init
79
+ self.gdf = self.extract(save=save, output_filename=output_filename)
80
+
81
+ def _load_boundary(self, boundary, layer):
82
+ if isinstance(boundary, gpd.GeoDataFrame):
83
+ gdf = boundary.copy()
84
+ else:
85
+ kwargs = {"layer": layer} if layer else {}
86
+ gdf = gpd.read_file(boundary, **kwargs)
87
+ return gdf.to_crs("EPSG:4326") if gdf.crs != "EPSG:4326" else gdf
88
+
89
+ def _get_query_envelopes(self, threshold=1.0):
90
+ xmin, ymin, xmax, ymax = self.boundary_gdf.total_bounds
91
+ cols = list(np.arange(xmin, xmax, threshold)) + [xmax]
92
+ rows = list(np.arange(ymin, ymax, threshold)) + [ymax]
93
+
94
+ grid = []
95
+ for i in range(len(cols)-1):
96
+ for j in range(len(rows)-1):
97
+ grid.append({
98
+ "xmin": cols[i], "ymin": rows[j],
99
+ "xmax": cols[i+1], "ymax": rows[j+1],
100
+ "spatialReference": {"wkid": 4326}
101
+ })
102
+ return grid
103
+
104
+ def extract(self, save: bool = True, output_filename: str = "permanent_water.gpkg", verbose: bool = True) -> gpd.GeoDataFrame:
105
+ all_features = []
106
+ query_url = f"{self.SERVICE_URL}/query"
107
+ envelopes = self._get_query_envelopes()
108
+
109
+ permanent_filter = "FTYPE IN ('Lake/Pond', 'Stream/River', 'Reservoir', 'Canal/Ditch')"
110
+
111
+ for env_idx, env in enumerate(envelopes):
112
+ offset = 0
113
+ limit = 1000
114
+
115
+ while True:
116
+ payload = {
117
+ "f": "geojson",
118
+ "where": permanent_filter,
119
+ "geometry": json.dumps(env),
120
+ "geometryType": "esriGeometryEnvelope",
121
+ "inSR": "4326",
122
+ "spatialRel": "esriSpatialRelIntersects",
123
+ "outFields": "NAME,FTYPE,FCODE,SQKM",
124
+ "returnGeometry": "true",
125
+ "outSR": "4326",
126
+ "resultOffset": offset,
127
+ "resultRecordCount": limit
128
+ }
129
+
130
+ try:
131
+ response = requests.post(query_url, data=payload, timeout=60)
132
+ response.raise_for_status()
133
+ data = response.json()
134
+
135
+ features = data.get("features", [])
136
+ if not features:
137
+ break
138
+
139
+ batch_gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
140
+ all_features.append(batch_gdf)
141
+
142
+ if verbose and offset > 0:
143
+ print(f" Grid {env_idx}: Paginated to offset {offset}...")
144
+
145
+ if len(features) < limit:
146
+ break
147
+
148
+ offset += limit
149
+
150
+ except Exception as e:
151
+ print(f"Error at grid {env_idx}, offset {offset}: {e}")
152
+ break
153
+
154
+ if not all_features:
155
+ print("No water bodies found.")
156
+ return gpd.GeoDataFrame()
157
+
158
+ # Combine and Deduplicate
159
+ full_gdf = pd.concat(all_features, ignore_index=True)
160
+ full_gdf = gpd.GeoDataFrame(full_gdf, crs="EPSG:4326")
161
+ full_gdf = full_gdf.loc[full_gdf.geometry.to_wkt().drop_duplicates().index]
162
+
163
+ # Clip to exact AOI
164
+ final_gdf = gpd.clip(full_gdf, self.boundary_gdf)
165
+
166
+ # Conditional Saving
167
+ if save:
168
+ self.output_dir.mkdir(parents=True, exist_ok=True)
169
+ output_path = self.output_dir / output_filename
170
+ final_gdf.to_file(output_path, driver="GPKG")
171
+ if verbose: print(f"Saved {len(final_gdf)} features to {output_path}")
172
+ else:
173
+ if verbose: print(f"PWB Extraction complete.")
174
+
175
+ return final_gdf