PyPI - geoai-py - Versions diffs - 0.1.6__py2.py3-none-any.whl → 0.2.0__py2.py3-none-any.whl - Mend

geoai-py 0.1.6py2.py3-none-any.whl → 0.2.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

geoai/__init__.py +26 -1
geoai/common.py +158 -1
geoai/download.py +395 -0
geoai/extract.py +832 -0
geoai/preprocess.py +2008 -0
geoai_py-0.2.0.dist-info/METADATA +130 -0
geoai_py-0.2.0.dist-info/RECORD +13 -0
{geoai_py-0.1.6.dist-info → geoai_py-0.2.0.dist-info}/WHEEL +1 -1
geoai_py-0.1.6.dist-info/METADATA +0 -44
geoai_py-0.1.6.dist-info/RECORD +0 -10
{geoai_py-0.1.6.dist-info → geoai_py-0.2.0.dist-info}/LICENSE +0 -0
{geoai_py-0.1.6.dist-info → geoai_py-0.2.0.dist-info}/entry_points.txt +0 -0
{geoai_py-0.1.6.dist-info → geoai_py-0.2.0.dist-info}/top_level.txt +0 -0

geoai/__init__.py CHANGED Viewed

@@ -2,7 +2,32 @@
 __author__ = """Qiusheng Wu"""
 __email__ = "giswqs@gmail.com"
-__version__ = "0.1.6"
+__version__ = "0.2.0"
+import os
+import sys
+def set_proj_lib_path():
+    """Set the PROJ_LIB environment variable based on the current conda environment."""
+    try:
+        # Get conda environment path
+        conda_env_path = os.environ.get("CONDA_PREFIX") or sys.prefix
+        # Set PROJ_LIB environment variable
+        proj_path = os.path.join(conda_env_path, "share", "proj")
+        gdal_path = os.path.join(conda_env_path, "share", "gdal")
+        # Check if the directory exists before setting
+        if os.path.exists(proj_path):
+            os.environ["PROJ_LIB"] = proj_path
+        if os.path.exists(gdal_path):
+            os.environ["GDAL_DATA"] = gdal_path
+    except Exception as e:
+        print(e)
+        return
+set_proj_lib_path()
 from .geoai import *

geoai/common.py CHANGED Viewed

@@ -5,8 +5,12 @@ from collections.abc import Iterable
 from typing import Any, Dict, List, Optional, Tuple, Type, Union, Callable
 import matplotlib.pyplot as plt
+import leafmap
 import torch
 import numpy as np
+import xarray as xr
+import rioxarray
+import rasterio as rio
 from torch.utils.data import DataLoader
 from torchgeo.datasets import RasterDataset, stack_samples, unbind_samples, utils
 from torchgeo.samplers import RandomGeoSampler, Units
@@ -55,10 +59,12 @@ def viz_raster(
     Returns:
         leafmap.Map: The map object with the raster layer added.
     """
-    import leafmap
     m = leafmap.Map(basemap=basemap)
+    if isinstance(source, dict):
+        source = dict_to_image(source)
     m.add_raster(
         source=source,
         indexes=indexes,
@@ -86,6 +92,7 @@ def viz_image(
     scale_factor: float = 1.0,
     figsize: Tuple[int, int] = (10, 5),
     axis_off: bool = True,
+    title: Optional[str] = None,
     **kwargs: Any,
 ) -> None:
     """
@@ -98,6 +105,7 @@ def viz_image(
         scale_factor (float, optional): The scale factor to apply to the image. Defaults to 1.0.
         figsize (Tuple[int, int], optional): The size of the figure. Defaults to (10, 5).
         axis_off (bool, optional): Whether to turn off the axis. Defaults to True.
+        title (Optional[str], optional): The title of the plot. Defaults to None.
         **kwargs (Any): Additional keyword arguments for plt.imshow().
     Returns:
@@ -124,6 +132,8 @@ def viz_image(
     plt.imshow(image, **kwargs)
     if axis_off:
         plt.axis("off")
+    if title is not None:
+        plt.title(title)
     plt.show()
     plt.close()
@@ -277,3 +287,150 @@ def calc_stats(
     # at the end, we shall have 2 vectors with length n=chnls
     # we will average them considering the number of images
     return accum_mean / len(files), accum_std / len(files)
+def dict_to_rioxarray(data_dict: Dict) -> xr.DataArray:
+    """Convert a dictionary to a xarray DataArray. The dictionary should contain the
+    following keys: "crs", "bounds", and "image". It can be generated from a TorchGeo
+    dataset sampler.
+    Args:
+        data_dict (Dict): The dictionary containing the data.
+    Returns:
+        xr.DataArray: The xarray DataArray.
+    """
+    from affine import Affine
+    # Extract components from the dictionary
+    crs = data_dict["crs"]
+    bounds = data_dict["bounds"]
+    image_tensor = data_dict["image"]
+    # Convert tensor to numpy array if needed
+    if hasattr(image_tensor, "numpy"):
+        # For PyTorch tensors
+        image_array = image_tensor.numpy()
+    else:
+        # If it's already a numpy array or similar
+        image_array = np.array(image_tensor)
+    # Calculate pixel resolution
+    width = image_array.shape[2]  # Width is the size of the last dimension
+    height = image_array.shape[1]  # Height is the size of the middle dimension
+    res_x = (bounds.maxx - bounds.minx) / width
+    res_y = (bounds.maxy - bounds.miny) / height
+    # Create the transform matrix
+    transform = Affine(res_x, 0.0, bounds.minx, 0.0, -res_y, bounds.maxy)
+    # Create dimensions
+    x_coords = np.linspace(bounds.minx + res_x / 2, bounds.maxx - res_x / 2, width)
+    y_coords = np.linspace(bounds.maxy - res_y / 2, bounds.miny + res_y / 2, height)
+    # If time dimension exists in the bounds
+    if hasattr(bounds, "mint") and hasattr(bounds, "maxt"):
+        # Create a single time value or range if needed
+        t_coords = [
+            bounds.mint
+        ]  # Or np.linspace(bounds.mint, bounds.maxt, num_time_steps)
+        # Create DataArray with time dimension
+        dims = (
+            ("band", "y", "x")
+            if image_array.shape[0] <= 10
+            else ("time", "band", "y", "x")
+        )
+        if dims[0] == "band":
+            # For multi-band single time
+            da = xr.DataArray(
+                image_array,
+                dims=dims,
+                coords={
+                    "band": np.arange(1, image_array.shape[0] + 1),
+                    "y": y_coords,
+                    "x": x_coords,
+                },
+            )
+        else:
+            # For multi-time multi-band
+            da = xr.DataArray(
+                image_array,
+                dims=dims,
+                coords={
+                    "time": t_coords,
+                    "band": np.arange(1, image_array.shape[1] + 1),
+                    "y": y_coords,
+                    "x": x_coords,
+                },
+            )
+    else:
+        # Create DataArray without time dimension
+        da = xr.DataArray(
+            image_array,
+            dims=("band", "y", "x"),
+            coords={
+                "band": np.arange(1, image_array.shape[0] + 1),
+                "y": y_coords,
+                "x": x_coords,
+            },
+        )
+    # Set spatial attributes
+    da.rio.write_crs(crs, inplace=True)
+    da.rio.write_transform(transform, inplace=True)
+    return da
+def dict_to_image(
+    data_dict: Dict[str, Any], output: Optional[str] = None, **kwargs
+) -> rio.DatasetReader:
+    """Convert a dictionary containing spatial data to a rasterio dataset or save it to
+    a file. The dictionary should contain the following keys: "crs", "bounds", and "image".
+    It can be generated from a TorchGeo dataset sampler.
+    This function transforms a dictionary with CRS, bounding box, and image data
+    into a rasterio DatasetReader using leafmap's array_to_image utility after
+    first converting to a rioxarray DataArray.
+    Args:
+        data_dict: A dictionary containing:
+            - 'crs': A pyproj CRS object
+            - 'bounds': A BoundingBox object with minx, maxx, miny, maxy attributes
+              and optionally mint, maxt for temporal bounds
+            - 'image': A tensor or array-like object with image data
+        output: Optional path to save the image to a file. If not provided, the image
+            will be returned as a rasterio DatasetReader object.
+        **kwargs: Additional keyword arguments to pass to leafmap.array_to_image.
+            Common options include:
+            - colormap: str, name of the colormap (e.g., 'viridis', 'terrain')
+            - vmin: float, minimum value for colormap scaling
+            - vmax: float, maximum value for colormap scaling
+    Returns:
+        A rasterio DatasetReader object that can be used for visualization or
+        further processing.
+    Examples:
+        >>> image = dict_to_image(
+        ...     {'crs': CRS.from_epsg(26911), 'bounds': bbox, 'image': tensor},
+        ...     colormap='terrain'
+        ... )
+        >>> fig, ax = plt.subplots(figsize=(10, 10))
+        >>> show(image, ax=ax)
+    """
+    da = dict_to_rioxarray(data_dict)
+    if output is not None:
+        out_dir = os.path.abspath(os.path.dirname(output))
+        if not os.path.exists(out_dir):
+            os.makedirs(out_dir, exist_ok=True)
+        da.rio.to_raster(output)
+        return output
+    else:
+        image = leafmap.array_to_image(da, **kwargs)
+        return image

geoai/download.py ADDED Viewed

@@ -0,0 +1,395 @@
+"""This module provides functions to download data, including NAIP imagery and building data from Overture Maps."""
+import os
+from typing import List, Tuple, Optional, Dict, Any
+import rioxarray
+import numpy as np
+import matplotlib.pyplot as plt
+from pystac_client import Client
+import planetary_computer as pc
+import geopandas as gpd
+from shapely.geometry import box
+from tqdm import tqdm
+import requests
+import subprocess
+import logging
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+def download_naip(
+    bbox: Tuple[float, float, float, float],
+    output_dir: str,
+    year: Optional[int] = None,
+    max_items: int = 10,
+    overwrite: bool = False,
+    preview: bool = False,
+    **kwargs: Any,
+) -> List[str]:
+    """Download NAIP imagery from Planetary Computer based on a bounding box.
+    This function searches for NAIP (National Agriculture Imagery Program) imagery
+    from Microsoft's Planetary Computer that intersects with the specified bounding box.
+    It downloads the imagery and saves it as GeoTIFF files.
+    Args:
+        bbox: Bounding box in the format (min_lon, min_lat, max_lon, max_lat) in WGS84 coordinates.
+        output_dir: Directory to save the downloaded imagery.
+        year: Specific year of NAIP imagery to download (e.g., 2020). If None, returns imagery from all available years.
+        max_items: Maximum number of items to download.
+        overwrite: If True, overwrite existing files with the same name.
+        preview: If True, display a preview of the downloaded imagery.
+    Returns:
+        List of downloaded file paths.
+    Raises:
+        Exception: If there is an error downloading or saving the imagery.
+    """
+    # Create output directory if it doesn't exist
+    os.makedirs(output_dir, exist_ok=True)
+    # Create a geometry from the bounding box
+    geometry = box(*bbox)
+    # Connect to Planetary Computer STAC API
+    catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
+    # Build query for NAIP data
+    search_params = {
+        "collections": ["naip"],
+        "intersects": geometry,
+        "limit": max_items,
+    }
+    # Add year filter if specified
+    if year:
+        search_params["query"] = {"naip:year": {"eq": year}}
+    for key, value in kwargs.items():
+        search_params[key] = value
+    # Search for NAIP imagery
+    search_results = catalog.search(**search_params)
+    items = list(search_results.items())
+    if len(items) > max_items:
+        items = items[:max_items]
+    if not items:
+        print("No NAIP imagery found for the specified region and parameters.")
+        return []
+    print(f"Found {len(items)} NAIP items.")
+    # Download and save each item
+    downloaded_files = []
+    for i, item in enumerate(items):
+        # Sign the assets (required for Planetary Computer)
+        signed_item = pc.sign(item)
+        # Get the RGB asset URL
+        rgb_asset = signed_item.assets.get("image")
+        if not rgb_asset:
+            print(f"No RGB asset found for item {i+1}")
+            continue
+        # Use the original filename from the asset
+        original_filename = os.path.basename(
+            rgb_asset.href.split("?")[0]
+        )  # Remove query parameters
+        output_path = os.path.join(output_dir, original_filename)
+        if not overwrite and os.path.exists(output_path):
+            print(f"Skipping existing file: {output_path}")
+            downloaded_files.append(output_path)
+            continue
+        print(f"Downloading item {i+1}/{len(items)}: {original_filename}")
+        try:
+            # Open and save the data with progress bar
+            # For direct file download with progress bar
+            if rgb_asset.href.startswith("http"):
+                download_with_progress(rgb_asset.href, output_path)
+                #
+            else:
+                # Fallback to direct rioxarray opening (less common case)
+                data = rioxarray.open_rasterio(rgb_asset.href)
+                data.rio.to_raster(output_path)
+            downloaded_files.append(output_path)
+            print(f"Successfully saved to {output_path}")
+            # Optional: Display a preview (uncomment if needed)
+            if preview:
+                data = rioxarray.open_rasterio(output_path)
+                preview_raster(data)
+        except Exception as e:
+            print(f"Error downloading item {i+1}: {str(e)}")
+    return downloaded_files
+def download_with_progress(url: str, output_path: str) -> None:
+    """Download a file with a progress bar.
+    Args:
+        url: URL of the file to download.
+        output_path: Path where the file will be saved.
+    """
+    response = requests.get(url, stream=True)
+    total_size = int(response.headers.get("content-length", 0))
+    block_size = 1024  # 1 Kibibyte
+    with (
+        open(output_path, "wb") as file,
+        tqdm(
+            desc=os.path.basename(output_path),
+            total=total_size,
+            unit="iB",
+            unit_scale=True,
+            unit_divisor=1024,
+        ) as bar,
+    ):
+        for data in response.iter_content(block_size):
+            size = file.write(data)
+            bar.update(size)
+def preview_raster(data: Any, title: str = None) -> None:
+    """Display a preview of the downloaded imagery.
+    This function creates a visualization of the downloaded NAIP imagery
+    by converting it to an RGB array and displaying it with matplotlib.
+    Args:
+        data: The raster data as a rioxarray object.
+        title: The title for the preview plot.
+    """
+    # Convert to 8-bit RGB for display
+    rgb_data = data.transpose("y", "x", "band").values[:, :, 0:3]
+    rgb_data = np.where(rgb_data > 255, 255, rgb_data).astype(np.uint8)
+    plt.figure(figsize=(10, 10))
+    plt.imshow(rgb_data)
+    if title is not None:
+        plt.title(title)
+    plt.axis("off")
+    plt.show()
+# Helper function to convert NumPy types to native Python types for JSON serialization
+def json_serializable(obj: Any) -> Any:
+    """Convert NumPy types to native Python types for JSON serialization.
+    Args:
+        obj: Any object to convert.
+    Returns:
+        JSON serializable version of the object.
+    """
+    if isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    else:
+        return obj
+def download_overture_buildings(
+    bbox: Tuple[float, float, float, float],
+    output_file: str,
+    output_format: str = "geojson",
+    data_type: str = "building",
+    verbose: bool = True,
+) -> str:
+    """Download building data from Overture Maps for a given bounding box using the overturemaps CLI tool.
+    Args:
+        bbox: Bounding box in the format (min_lon, min_lat, max_lon, max_lat) in WGS84 coordinates.
+        output_file: Path to save the output file.
+        output_format: Format to save the output, one of "geojson", "geojsonseq", or "geoparquet".
+        data_type: The Overture Maps data type to download (building, place, etc.).
+        verbose: Whether to print verbose output.
+    Returns:
+        Path to the output file.
+    """
+    # Create output directory if needed
+    output_dir = os.path.dirname(output_file)
+    if output_dir and not os.path.exists(output_dir):
+        os.makedirs(output_dir, exist_ok=True)
+    # Format the bounding box string for the command
+    west, south, east, north = bbox
+    bbox_str = f"{west},{south},{east},{north}"
+    # Build the command
+    cmd = [
+        "overturemaps",
+        "download",
+        "--bbox",
+        bbox_str,
+        "-f",
+        output_format,
+        "--type",
+        data_type,
+        "--output",
+        output_file,
+    ]
+    if verbose:
+        logger.info(f"Running command: {' '.join(cmd)}")
+        logger.info("Downloading %s data for area: %s", data_type, bbox_str)
+    try:
+        # Run the command
+        result = subprocess.run(
+            cmd,
+            check=True,
+            stdout=subprocess.PIPE if not verbose else None,
+            stderr=subprocess.PIPE,
+            text=True,
+        )
+        # Check if the file was created
+        if os.path.exists(output_file):
+            file_size = os.path.getsize(output_file) / (1024 * 1024)  # Size in MB
+            logger.info(
+                f"Successfully downloaded data to {output_file} ({file_size:.2f} MB)"
+            )
+            # Optionally show some stats about the downloaded data
+            if output_format == "geojson" and os.path.getsize(output_file) > 0:
+                try:
+                    gdf = gpd.read_file(output_file)
+                    logger.info(f"Downloaded {len(gdf)} features")
+                    if len(gdf) > 0 and verbose:
+                        # Show a sample of the attribute names
+                        attrs = list(gdf.columns)
+                        attrs.remove("geometry")
+                        logger.info(f"Available attributes: {', '.join(attrs[:10])}...")
+                except Exception as e:
+                    logger.warning(f"Could not read the GeoJSON file: {str(e)}")
+            return output_file
+        else:
+            logger.error(f"Command completed but file {output_file} was not created")
+            if result.stderr:
+                logger.error(f"Command error output: {result.stderr}")
+            return None
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Error running overturemaps command: {str(e)}")
+        if e.stderr:
+            logger.error(f"Command error output: {e.stderr}")
+        raise RuntimeError(f"Failed to download Overture Maps data: {str(e)}")
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        raise
+def convert_vector_format(
+    input_file: str,
+    output_format: str = "geojson",
+    filter_expression: Optional[str] = None,
+) -> str:
+    """Convert the downloaded data to a different format or filter it.
+    Args:
+        input_file: Path to the input file.
+        output_format: Format to convert to, one of "geojson", "parquet", "shapefile", "csv".
+        filter_expression: Optional GeoDataFrame query expression to filter the data.
+    Returns:
+        Path to the converted file.
+    """
+    try:
+        # Read the input file
+        logger.info(f"Reading {input_file}")
+        gdf = gpd.read_file(input_file)
+        # Apply filter if specified
+        if filter_expression:
+            logger.info(f"Filtering data using expression: {filter_expression}")
+            gdf = gdf.query(filter_expression)
+            logger.info(f"After filtering: {len(gdf)} features")
+        # Define output file path
+        base_path = os.path.splitext(input_file)[0]
+        if output_format == "geojson":
+            output_file = f"{base_path}.geojson"
+            logger.info(f"Converting to GeoJSON: {output_file}")
+            gdf.to_file(output_file, driver="GeoJSON")
+        elif output_format == "parquet":
+            output_file = f"{base_path}.parquet"
+            logger.info(f"Converting to Parquet: {output_file}")
+            gdf.to_parquet(output_file)
+        elif output_format == "shapefile":
+            output_file = f"{base_path}.shp"
+            logger.info(f"Converting to Shapefile: {output_file}")
+            gdf.to_file(output_file)
+        elif output_format == "csv":
+            output_file = f"{base_path}.csv"
+            logger.info(f"Converting to CSV: {output_file}")
+            # For CSV, we need to convert geometry to WKT
+            gdf["geometry_wkt"] = gdf.geometry.apply(lambda g: g.wkt)
+            # Save to CSV with geometry as WKT
+            gdf.drop(columns=["geometry"]).to_csv(output_file, index=False)
+        else:
+            raise ValueError(f"Unsupported output format: {output_format}")
+        return output_file
+    except Exception as e:
+        logger.error(f"Error converting data: {str(e)}")
+        raise
+def extract_building_stats(geojson_file: str) -> Dict[str, Any]:
+    """Extract statistics from the building data.
+    Args:
+        geojson_file: Path to the GeoJSON file.
+    Returns:
+        Dictionary with statistics.
+    """
+    try:
+        # Read the GeoJSON file
+        gdf = gpd.read_file(geojson_file)
+        # Calculate statistics
+        bbox = gdf.total_bounds.tolist()
+        # Convert numpy values to Python native types
+        bbox = [float(x) for x in bbox]
+        stats = {
+            "total_buildings": int(len(gdf)),
+            "has_height": (
+                int(gdf["height"].notna().sum()) if "height" in gdf.columns else 0
+            ),
+            "has_name": (
+                int(gdf["names.common.value"].notna().sum())
+                if "names.common.value" in gdf.columns
+                else 0
+            ),
+            "bbox": bbox,
+        }
+        return stats
+    except Exception as e:
+        logger.error(f"Error extracting statistics: {str(e)}")
+        return {"error": str(e)}

geoai-py 0.1.6__py2.py3-none-any.whl → 0.2.0__py2.py3-none-any.whl

geoai-py 0.1.6py2.py3-none-any.whl → 0.2.0py2.py3-none-any.whl