geoai-py 0.4.2__py2.py3-none-any.whl → 0.5.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geoai/download.py CHANGED
@@ -10,8 +10,9 @@ import matplotlib.pyplot as plt
10
10
  import numpy as np
11
11
  import pandas as pd
12
12
  import planetary_computer as pc
13
+ import pystac
13
14
  import requests
14
- import rioxarray
15
+ import rioxarray as rxr
15
16
  import xarray as xr
16
17
  from pystac_client import Client
17
18
  from shapely.geometry import box
@@ -121,7 +122,7 @@ def download_naip(
121
122
  #
122
123
  else:
123
124
  # Fallback to direct rioxarray opening (less common case)
124
- data = rioxarray.open_rasterio(rgb_asset.href)
125
+ data = rxr.open_rasterio(rgb_asset.href)
125
126
  data.rio.to_raster(output_path)
126
127
 
127
128
  downloaded_files.append(output_path)
@@ -129,7 +130,7 @@ def download_naip(
129
130
 
130
131
  # Optional: Display a preview (uncomment if needed)
131
132
  if preview:
132
- data = rioxarray.open_rasterio(output_path)
133
+ data = rxr.open_rasterio(output_path)
133
134
  preview_raster(data)
134
135
 
135
136
  except Exception as e:
@@ -206,98 +207,130 @@ def json_serializable(obj: Any) -> Any:
206
207
  return obj
207
208
 
208
209
 
210
+ def get_overture_latest_release(patch=True) -> str:
211
+ """
212
+ Retrieves the value of the 'latest' key from the Overture Maps release JSON file.
213
+
214
+ Args:
215
+ patch (bool): If True, returns the full version string (e.g., "2025-02-19.0").
216
+
217
+ Returns:
218
+ str: The value of the 'latest' key from the releases.json file.
219
+
220
+ Raises:
221
+ requests.RequestException: If there's an issue with the HTTP request.
222
+ KeyError: If the 'latest' key is not found in the JSON data.
223
+ json.JSONDecodeError: If the response cannot be parsed as JSON.
224
+ """
225
+ url = "https://labs.overturemaps.org/data/releases.json"
226
+
227
+ try:
228
+ response = requests.get(url)
229
+ response.raise_for_status() # Raise an exception for HTTP errors
230
+
231
+ data = response.json()
232
+ if patch:
233
+ latest_release = data.get("latest")
234
+ else:
235
+ latest_release = data.get("latest").split(".")[
236
+ 0
237
+ ] # Extract the version number
238
+
239
+ if latest_release is None:
240
+ raise KeyError("The 'latest' key was not found in the releases.json file")
241
+
242
+ return latest_release
243
+
244
+ except requests.RequestException as e:
245
+ print(f"Error making the request: {e}")
246
+ raise
247
+ except json.JSONDecodeError as e:
248
+ print(f"Error parsing JSON response: {e}")
249
+ raise
250
+ except KeyError as e:
251
+ print(f"Key error: {e}")
252
+ raise
253
+
254
+
255
+ def get_all_overture_types():
256
+ """Get a list of all available Overture Maps data types.
257
+
258
+ Returns:
259
+ list: List of available Overture Maps data types.
260
+ """
261
+ from overturemaps import core
262
+
263
+ return core.get_all_overture_types()
264
+
265
+
209
266
  def download_overture_buildings(
210
267
  bbox: Tuple[float, float, float, float],
211
- output_file: str,
212
- output_format: str = "geojson",
213
- data_type: str = "building",
214
- verbose: bool = True,
268
+ output: str,
269
+ overture_type: str = "building",
270
+ **kwargs: Any,
215
271
  ) -> str:
216
272
  """Download building data from Overture Maps for a given bounding box using the overturemaps CLI tool.
217
273
 
218
274
  Args:
219
275
  bbox: Bounding box in the format (min_lon, min_lat, max_lon, max_lat) in WGS84 coordinates.
220
- output_file: Path to save the output file.
221
- output_format: Format to save the output, one of "geojson", "geojsonseq", or "geoparquet".
222
- data_type: The Overture Maps data type to download (building, place, etc.).
223
- verbose: Whether to print verbose output.
276
+ output: Path to save the output file.
277
+ overture_type: The Overture Maps data type to download (building, place, etc.).
224
278
 
225
279
  Returns:
226
280
  Path to the output file.
227
281
  """
228
- # Create output directory if needed
229
- output_dir = os.path.dirname(output_file)
230
- if output_dir and not os.path.exists(output_dir):
231
- os.makedirs(output_dir, exist_ok=True)
232
-
233
- # Format the bounding box string for the command
234
- west, south, east, north = bbox
235
- bbox_str = f"{west},{south},{east},{north}"
236
-
237
- # Build the command
238
- cmd = [
239
- "overturemaps",
240
- "download",
241
- "--bbox",
242
- bbox_str,
243
- "-f",
244
- output_format,
245
- "--type",
246
- data_type,
247
- "--output",
248
- output_file,
249
- ]
250
-
251
- if verbose:
252
- logger.info(f"Running command: {' '.join(cmd)}")
253
- logger.info("Downloading %s data for area: %s", data_type, bbox_str)
282
+
283
+ return get_overture_data(
284
+ overture_type=overture_type, bbox=bbox, output=output, **kwargs
285
+ )
286
+
287
+
288
+ def get_overture_data(
289
+ overture_type: str,
290
+ bbox: Tuple[float, float, float, float] = None,
291
+ columns: List[str] = None,
292
+ output: str = None,
293
+ **kwargs: Any,
294
+ ) -> "gpd.GeoDataFrame":
295
+ """Fetches overture data and returns it as a GeoDataFrame.
296
+
297
+ Args:
298
+ overture_type (str): The type of overture data to fetch.It can be one of the following:
299
+ address|building|building_part|division|division_area|division_boundary|place|
300
+ segment|connector|infrastructure|land|land_cover|land_use|water
301
+ bbox (Tuple[float, float, float, float], optional): The bounding box to
302
+ filter the data. Defaults to None.
303
+ columns (List[str], optional): The columns to include in the output.
304
+ Defaults to None.
305
+ output (str, optional): The file path to save the output GeoDataFrame.
306
+ Defaults to None.
307
+
308
+ Returns:
309
+ gpd.GeoDataFrame: The fetched overture data as a GeoDataFrame.
310
+
311
+ Raises:
312
+ ImportError: If the overture package is not installed.
313
+ """
254
314
 
255
315
  try:
256
- # Run the command
257
- result = subprocess.run(
258
- cmd,
259
- check=True,
260
- stdout=subprocess.PIPE if not verbose else None,
261
- stderr=subprocess.PIPE,
262
- text=True,
263
- )
316
+ from overturemaps import core
317
+ except ImportError:
318
+ raise ImportError("The overturemaps package is required to use this function")
264
319
 
265
- # Check if the file was created
266
- if os.path.exists(output_file):
267
- file_size = os.path.getsize(output_file) / (1024 * 1024) # Size in MB
268
- logger.info(
269
- f"Successfully downloaded data to {output_file} ({file_size:.2f} MB)"
270
- )
320
+ gdf = core.geodataframe(overture_type, bbox=bbox)
321
+ if columns is not None:
322
+ gdf = gdf[columns]
271
323
 
272
- # Optionally show some stats about the downloaded data
273
- if output_format == "geojson" and os.path.getsize(output_file) > 0:
274
- try:
275
- gdf = gpd.read_file(output_file)
276
- logger.info(f"Downloaded {len(gdf)} features")
277
-
278
- if len(gdf) > 0 and verbose:
279
- # Show a sample of the attribute names
280
- attrs = list(gdf.columns)
281
- attrs.remove("geometry")
282
- logger.info(f"Available attributes: {', '.join(attrs[:10])}...")
283
- except Exception as e:
284
- logger.warning(f"Could not read the GeoJSON file: {str(e)}")
324
+ gdf.crs = "EPSG:4326"
285
325
 
286
- return output_file
287
- else:
288
- logger.error(f"Command completed but file {output_file} was not created")
289
- if result.stderr:
290
- logger.error(f"Command error output: {result.stderr}")
291
- return None
292
-
293
- except subprocess.CalledProcessError as e:
294
- logger.error(f"Error running overturemaps command: {str(e)}")
295
- if e.stderr:
296
- logger.error(f"Command error output: {e.stderr}")
297
- raise RuntimeError(f"Failed to download Overture Maps data: {str(e)}")
298
- except Exception as e:
299
- logger.error(f"Unexpected error: {str(e)}")
300
- raise
326
+ out_dir = os.path.dirname(os.path.abspath(output))
327
+ if not os.path.exists(out_dir):
328
+ os.makedirs(out_dir, exist_ok=True)
329
+
330
+ if output is not None:
331
+ gdf.to_file(output, **kwargs)
332
+
333
+ return gdf
301
334
 
302
335
 
303
336
  def convert_vector_format(
@@ -360,18 +393,23 @@ def convert_vector_format(
360
393
  raise
361
394
 
362
395
 
363
- def extract_building_stats(geojson_file: str) -> Dict[str, Any]:
396
+ def extract_building_stats(data: str) -> Dict[str, Any]:
364
397
  """Extract statistics from the building data.
365
398
 
366
399
  Args:
367
- geojson_file: Path to the GeoJSON file.
400
+ data: Path to the GeoJSON file or GeoDataFrame containing building data.
368
401
 
369
402
  Returns:
370
403
  Dictionary with statistics.
371
404
  """
372
405
  try:
373
406
  # Read the GeoJSON file
374
- gdf = gpd.read_file(geojson_file)
407
+
408
+ if isinstance(data, gpd.GeoDataFrame):
409
+ gdf = data
410
+ else:
411
+
412
+ gdf = gpd.read_file(data)
375
413
 
376
414
  # Calculate statistics
377
415
  bbox = gdf.total_bounds.tolist()
@@ -516,7 +554,7 @@ def download_pc_stac_item(
516
554
  )
517
555
  # Still need to open the file to get the data for merging
518
556
  if merge_bands:
519
- band_data = rioxarray.open_rasterio(file_path)
557
+ band_data = rxr.open_rasterio(file_path)
520
558
  band_data_arrays.append((band, band_data))
521
559
  band_names.append(band)
522
560
  result[band] = file_path
@@ -525,7 +563,7 @@ def download_pc_stac_item(
525
563
  if show_progress and not isinstance(progress_iter, list):
526
564
  progress_iter.set_description(f"Downloading {band}")
527
565
 
528
- band_data = rioxarray.open_rasterio(band_url)
566
+ band_data = rxr.open_rasterio(band_url)
529
567
 
530
568
  # Store the data array for potential merging later
531
569
  if merge_bands:
@@ -866,10 +904,9 @@ def pc_stac_search(
866
904
  def pc_stac_download(
867
905
  items,
868
906
  output_dir=".",
869
- asset_keys=None,
907
+ assets=None,
870
908
  max_workers=4,
871
909
  skip_existing=True,
872
- sign_urls=True,
873
910
  ):
874
911
  """
875
912
  Download assets from STAC items retrieved from the Planetary Computer.
@@ -882,7 +919,7 @@ def pc_stac_download(
882
919
  items (list or pystac.Item): STAC Item object or list of STAC Item objects.
883
920
  output_dir (str, optional): Directory where assets will be saved.
884
921
  Defaults to current directory.
885
- asset_keys (list, optional): List of asset keys to download. If None,
922
+ assets (list, optional): List of asset keys to download. If None,
886
923
  downloads all available assets. Defaults to None.
887
924
  max_workers (int, optional): Maximum number of concurrent download threads.
888
925
  Defaults to 4.
@@ -899,11 +936,11 @@ def pc_stac_download(
899
936
  TypeError: If items is not a STAC Item or list of STAC Items.
900
937
  IOError: If there's an error writing the downloaded assets to disk.
901
938
  """
902
- import pystac
939
+
903
940
  from concurrent.futures import ThreadPoolExecutor, as_completed
904
941
 
905
942
  # Handle single item case
906
- if isinstance(items, pystac.Item):
943
+ if isinstance(items, pystac.Item) or isinstance(items, str):
907
944
  items = [items]
908
945
  elif not isinstance(items, list):
909
946
  raise TypeError("items must be a STAC Item or list of STAC Items")
@@ -911,31 +948,13 @@ def pc_stac_download(
911
948
  # Create output directory if it doesn't exist
912
949
  os.makedirs(output_dir, exist_ok=True)
913
950
 
914
- # Function to sign URLs if needed
915
- def get_signed_url(href):
916
- if not sign_urls:
917
- return href
918
-
919
- # Planetary Computer typically requires signing URLs for accessing data
920
- # Check if the URL is from Microsoft Planetary Computer
921
- if "planetarycomputer" in href:
922
- try:
923
- sign_url = "https://planetarycomputer.microsoft.com/api/sas/v1/sign"
924
- response = requests.get(sign_url, params={"href": href})
925
- response.raise_for_status()
926
- return response.json().get("href", href)
927
- except Exception as e:
928
- print(f"Warning: Failed to sign URL {href}: {str(e)}")
929
- return href
930
- return href
931
-
932
951
  # Function to download a single asset
933
952
  def download_asset(item, asset_key, asset):
953
+ item = pc.sign(item)
934
954
  item_id = item.id
935
955
 
936
956
  # Get the asset URL and sign it if needed
937
- asset_url = get_signed_url(asset.href)
938
-
957
+ asset_url = item.assets[asset_key].href
939
958
  # Determine output filename
940
959
  if asset.media_type:
941
960
  # Use appropriate file extension based on media type
@@ -991,17 +1010,17 @@ def pc_stac_download(
991
1010
 
992
1011
  for item in items:
993
1012
  item_assets = {}
1013
+ if isinstance(item, str):
1014
+ item = pystac.Item.from_file(item)
994
1015
  item_id = item.id
995
1016
  print(f"Processing STAC item: {item_id}")
996
1017
 
997
1018
  # Determine which assets to download
998
- if asset_keys:
999
- assets_to_download = {
1000
- k: v for k, v in item.assets.items() if k in asset_keys
1001
- }
1019
+ if assets:
1020
+ assets_to_download = {k: v for k, v in item.assets.items() if k in assets}
1002
1021
  if not assets_to_download:
1003
1022
  print(
1004
- f"Warning: None of the specified asset keys {asset_keys} found in item {item_id}"
1023
+ f"Warning: None of the specified asset keys {assets} found in item {item_id}"
1005
1024
  )
1006
1025
  print(f"Available asset keys: {list(item.assets.keys())}")
1007
1026
  continue
@@ -1038,3 +1057,127 @@ def pc_stac_download(
1038
1057
  print(f"\nDownloaded {total_assets} assets for {len(results)} items")
1039
1058
 
1040
1059
  return results
1060
+
1061
+
1062
+ def pc_item_asset_list(item):
1063
+ """
1064
+ Retrieve the list of asset keys from a STAC item in the Planetary Computer catalog.
1065
+
1066
+ Args:
1067
+ item (str): The URL of the STAC item.
1068
+
1069
+ Returns:
1070
+ list: A list of asset keys available in the signed STAC item.
1071
+ """
1072
+ if isinstance(item, str):
1073
+ item = pystac.Item.from_file(item)
1074
+
1075
+ if not isinstance(item, pystac.Item):
1076
+ raise ValueError("item_url must be a string (URL) or a pystac.Item object")
1077
+
1078
+ return list(item.assets.keys())
1079
+
1080
+
1081
+ def read_pc_item_asset(item, asset, output=None, as_cog=True, **kwargs):
1082
+ """
1083
+ Read a specific asset from a STAC item in the Planetary Computer catalog.
1084
+
1085
+ Args:
1086
+ item (str): The URL of the STAC item.
1087
+ asset (str): The key of the asset to read.
1088
+ output (str, optional): If specified, the path to save the asset as a raster file.
1089
+ as_cog (bool, optional): If True, save the asset as a Cloud Optimized GeoTIFF (COG).
1090
+
1091
+ Returns:
1092
+ xarray.DataArray: The data array for the specified asset.
1093
+ """
1094
+ if isinstance(item, str):
1095
+ item = pystac.Item.from_file(item)
1096
+
1097
+ if not isinstance(item, pystac.Item):
1098
+ raise ValueError("item must be a string (URL) or a pystac.Item object")
1099
+
1100
+ signed_item = pc.sign(item)
1101
+
1102
+ if asset not in signed_item.assets:
1103
+ raise ValueError(
1104
+ f"Asset '{asset}' not found in item '{item.id}'. It has available assets: {list(signed_item.assets.keys())}"
1105
+ )
1106
+
1107
+ asset_url = signed_item.assets[asset].href
1108
+ ds = rxr.open_rasterio(asset_url)
1109
+
1110
+ if as_cog:
1111
+ kwargs["driver"] = "COG" # Ensure the output is a Cloud Optimized GeoTIFF
1112
+
1113
+ if output:
1114
+ print(f"Saving asset '{asset}' to {output}...")
1115
+ ds.rio.to_raster(output, **kwargs)
1116
+ print(f"Asset '{asset}' saved successfully.")
1117
+ return ds
1118
+
1119
+
1120
+ def view_pc_item(
1121
+ url=None,
1122
+ collection=None,
1123
+ item=None,
1124
+ assets=None,
1125
+ bands=None,
1126
+ titiler_endpoint=None,
1127
+ name="STAC Item",
1128
+ attribution="Planetary Computer",
1129
+ opacity=1.0,
1130
+ shown=True,
1131
+ fit_bounds=True,
1132
+ layer_index=None,
1133
+ backend="folium",
1134
+ basemap=None,
1135
+ map_args=None,
1136
+ **kwargs,
1137
+ ):
1138
+
1139
+ if backend == "folium":
1140
+ import leafmap.foliumap as leafmap
1141
+
1142
+ elif backend == "ipyleaflet":
1143
+ import leafmap.leafmap as leafmap
1144
+
1145
+ else:
1146
+ raise ValueError(
1147
+ f"Unsupported backend: {backend}. Supported backends are 'folium' and 'ipyleaflet'."
1148
+ )
1149
+
1150
+ if map_args is None:
1151
+ map_args = {}
1152
+
1153
+ if "draw_control" not in map_args:
1154
+ map_args["draw_control"] = False
1155
+
1156
+ if url is not None:
1157
+
1158
+ item = pystac.Item.from_file(url)
1159
+
1160
+ if isinstance(item, pystac.Item):
1161
+ collection = item.collection_id
1162
+ if assets is None:
1163
+ assets = [list(item.assets.keys())[0]]
1164
+ item = item.id
1165
+
1166
+ m = leafmap.Map(**map_args)
1167
+ if basemap is not None:
1168
+ m.add_basemap(basemap)
1169
+ m.add_stac_layer(
1170
+ collection=collection,
1171
+ item=item,
1172
+ assets=assets,
1173
+ bands=bands,
1174
+ titiler_endpoint=titiler_endpoint,
1175
+ name=name,
1176
+ attribution=attribution,
1177
+ opacity=opacity,
1178
+ shown=shown,
1179
+ fit_bounds=fit_bounds,
1180
+ layer_index=layer_index,
1181
+ **kwargs,
1182
+ )
1183
+ return m