PyPI - geoai-py - Versions diffs - 0.4.3__py2.py3-none-any.whl → 0.5.1__py2.py3-none-any.whl - Mend

geoai-py 0.4.3py2.py3-none-any.whl → 0.5.1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

geoai/__init__.py +6 -1
geoai/classify.py +933 -0
geoai/download.py +119 -80
geoai/geoai.py +93 -1
geoai/train.py +115 -6
geoai/utils.py +196 -0
{geoai_py-0.4.3.dist-info → geoai_py-0.5.1.dist-info}/METADATA +9 -1
geoai_py-0.5.1.dist-info/RECORD +16 -0
{geoai_py-0.4.3.dist-info → geoai_py-0.5.1.dist-info}/WHEEL +1 -1
geoai_py-0.4.3.dist-info/RECORD +0 -15
{geoai_py-0.4.3.dist-info → geoai_py-0.5.1.dist-info}/entry_points.txt +0 -0
{geoai_py-0.4.3.dist-info → geoai_py-0.5.1.dist-info}/licenses/LICENSE +0 -0
{geoai_py-0.4.3.dist-info → geoai_py-0.5.1.dist-info}/top_level.txt +0 -0

geoai/download.py CHANGED Viewed

@@ -207,98 +207,130 @@ def json_serializable(obj: Any) -> Any:
         return obj
+def get_overture_latest_release(patch=True) -> str:
+    """
+    Retrieves the value of the 'latest' key from the Overture Maps release JSON file.
+    Args:
+        patch (bool): If True, returns the full version string (e.g., "2025-02-19.0").
+    Returns:
+        str: The value of the 'latest' key from the releases.json file.
+    Raises:
+        requests.RequestException: If there's an issue with the HTTP request.
+        KeyError: If the 'latest' key is not found in the JSON data.
+        json.JSONDecodeError: If the response cannot be parsed as JSON.
+    """
+    url = "https://labs.overturemaps.org/data/releases.json"
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+        data = response.json()
+        if patch:
+            latest_release = data.get("latest")
+        else:
+            latest_release = data.get("latest").split(".")[
+                0
+            ]  # Extract the version number
+        if latest_release is None:
+            raise KeyError("The 'latest' key was not found in the releases.json file")
+        return latest_release
+    except requests.RequestException as e:
+        print(f"Error making the request: {e}")
+        raise
+    except json.JSONDecodeError as e:
+        print(f"Error parsing JSON response: {e}")
+        raise
+    except KeyError as e:
+        print(f"Key error: {e}")
+        raise
+def get_all_overture_types():
+    """Get a list of all available Overture Maps data types.
+    Returns:
+        list: List of available Overture Maps data types.
+    """
+    from overturemaps import core
+    return core.get_all_overture_types()
 def download_overture_buildings(
     bbox: Tuple[float, float, float, float],
-    output_file: str,
-    output_format: str = "geojson",
-    data_type: str = "building",
-    verbose: bool = True,
+    output: str,
+    overture_type: str = "building",
+    **kwargs: Any,
 ) -> str:
     """Download building data from Overture Maps for a given bounding box using the overturemaps CLI tool.
     Args:
         bbox: Bounding box in the format (min_lon, min_lat, max_lon, max_lat) in WGS84 coordinates.
-        output_file: Path to save the output file.
-        output_format: Format to save the output, one of "geojson", "geojsonseq", or "geoparquet".
-        data_type: The Overture Maps data type to download (building, place, etc.).
-        verbose: Whether to print verbose output.
+        output: Path to save the output file.
+        overture_type: The Overture Maps data type to download (building, place, etc.).
     Returns:
         Path to the output file.
     """
-    # Create output directory if needed
-    output_dir = os.path.dirname(output_file)
-    if output_dir and not os.path.exists(output_dir):
-        os.makedirs(output_dir, exist_ok=True)
-    # Format the bounding box string for the command
-    west, south, east, north = bbox
-    bbox_str = f"{west},{south},{east},{north}"
-    # Build the command
-    cmd = [
-        "overturemaps",
-        "download",
-        "--bbox",
-        bbox_str,
-        "-f",
-        output_format,
-        "--type",
-        data_type,
-        "--output",
-        output_file,
-    ]
-    if verbose:
-        logger.info(f"Running command: {' '.join(cmd)}")
-        logger.info("Downloading %s data for area: %s", data_type, bbox_str)
+    return get_overture_data(
+        overture_type=overture_type, bbox=bbox, output=output, **kwargs
+    )
+def get_overture_data(
+    overture_type: str,
+    bbox: Tuple[float, float, float, float] = None,
+    columns: List[str] = None,
+    output: str = None,
+    **kwargs: Any,
+) -> "gpd.GeoDataFrame":
+    """Fetches overture data and returns it as a GeoDataFrame.
+    Args:
+        overture_type (str): The type of overture data to fetch.It can be one of the following:
+            address|building|building_part|division|division_area|division_boundary|place|
+            segment|connector|infrastructure|land|land_cover|land_use|water
+        bbox (Tuple[float, float, float, float], optional): The bounding box to
+            filter the data. Defaults to None.
+        columns (List[str], optional): The columns to include in the output.
+            Defaults to None.
+        output (str, optional): The file path to save the output GeoDataFrame.
+            Defaults to None.
+    Returns:
+        gpd.GeoDataFrame: The fetched overture data as a GeoDataFrame.
+    Raises:
+        ImportError: If the overture package is not installed.
+    """
     try:
-        # Run the command
-        result = subprocess.run(
-            cmd,
-            check=True,
-            stdout=subprocess.PIPE if not verbose else None,
-            stderr=subprocess.PIPE,
-            text=True,
-        )
+        from overturemaps import core
+    except ImportError:
+        raise ImportError("The overturemaps package is required to use this function")
-        # Check if the file was created
-        if os.path.exists(output_file):
-            file_size = os.path.getsize(output_file) / (1024 * 1024)  # Size in MB
-            logger.info(
-                f"Successfully downloaded data to {output_file} ({file_size:.2f} MB)"
-            )
+    gdf = core.geodataframe(overture_type, bbox=bbox)
+    if columns is not None:
+        gdf = gdf[columns]
-            # Optionally show some stats about the downloaded data
-            if output_format == "geojson" and os.path.getsize(output_file) > 0:
-                try:
-                    gdf = gpd.read_file(output_file)
-                    logger.info(f"Downloaded {len(gdf)} features")
-                    if len(gdf) > 0 and verbose:
-                        # Show a sample of the attribute names
-                        attrs = list(gdf.columns)
-                        attrs.remove("geometry")
-                        logger.info(f"Available attributes: {', '.join(attrs[:10])}...")
-                except Exception as e:
-                    logger.warning(f"Could not read the GeoJSON file: {str(e)}")
+    gdf.crs = "EPSG:4326"
-            return output_file
-        else:
-            logger.error(f"Command completed but file {output_file} was not created")
-            if result.stderr:
-                logger.error(f"Command error output: {result.stderr}")
-            return None
-    except subprocess.CalledProcessError as e:
-        logger.error(f"Error running overturemaps command: {str(e)}")
-        if e.stderr:
-            logger.error(f"Command error output: {e.stderr}")
-        raise RuntimeError(f"Failed to download Overture Maps data: {str(e)}")
-    except Exception as e:
-        logger.error(f"Unexpected error: {str(e)}")
-        raise
+    out_dir = os.path.dirname(os.path.abspath(output))
+    if not os.path.exists(out_dir):
+        os.makedirs(out_dir, exist_ok=True)
+    if output is not None:
+        gdf.to_file(output, **kwargs)
+    return gdf
 def convert_vector_format(
@@ -361,18 +393,23 @@ def convert_vector_format(
         raise
-def extract_building_stats(geojson_file: str) -> Dict[str, Any]:
+def extract_building_stats(data: str) -> Dict[str, Any]:
     """Extract statistics from the building data.
     Args:
-        geojson_file: Path to the GeoJSON file.
+        data: Path to the GeoJSON file or GeoDataFrame containing building data.
     Returns:
         Dictionary with statistics.
     """
     try:
         # Read the GeoJSON file
-        gdf = gpd.read_file(geojson_file)
+        if isinstance(data, gpd.GeoDataFrame):
+            gdf = data
+        else:
+            gdf = gpd.read_file(data)
         # Calculate statistics
         bbox = gdf.total_bounds.tolist()
@@ -903,7 +940,7 @@ def pc_stac_download(
     from concurrent.futures import ThreadPoolExecutor, as_completed
     # Handle single item case
-    if isinstance(items, pystac.Item):
+    if isinstance(items, pystac.Item) or isinstance(items, str):
         items = [items]
     elif not isinstance(items, list):
         raise TypeError("items must be a STAC Item or list of STAC Items")
@@ -973,6 +1010,8 @@ def pc_stac_download(
     for item in items:
         item_assets = {}
+        if isinstance(item, str):
+            item = pystac.Item.from_file(item)
         item_id = item.id
         print(f"Processing STAC item: {item_id}")

geoai/geoai.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Main module."""
 import leafmap
+import leafmap.maplibregl as maplibregl
 from .download import (
     download_naip,
@@ -13,10 +14,11 @@ from .download import (
     read_pc_item_asset,
     view_pc_item,
 )
+from .classify import train_classifier, classify_image, classify_images
 from .extract import *
 from .hf import *
 from .segment import *
-from .train import object_detection, train_MaskRCNN_model
+from .train import object_detection, object_detection_batch, train_MaskRCNN_model
 from .utils import *
@@ -26,3 +28,93 @@ class Map(leafmap.Map):
     def __init__(self, *args, **kwargs):
         """Initialize the Map class."""
         super().__init__(*args, **kwargs)
+class MapLibre(maplibregl.Map):
+    """A subclass of maplibregl.Map for GeoAI applications."""
+    def __init__(self, *args, **kwargs):
+        """Initialize the MapLibre class."""
+        super().__init__(*args, **kwargs)
+def create_vector_data(
+    m: Optional[Map] = None,
+    properties: Optional[Dict[str, List[Any]]] = None,
+    time_format: str = "%Y%m%dT%H%M%S",
+    column_widths: Optional[List[int]] = (9, 3),
+    map_height: str = "600px",
+    out_dir: Optional[str] = None,
+    filename_prefix: str = "",
+    file_ext: str = "geojson",
+    add_mapillary: bool = False,
+    style: str = "photo",
+    radius: float = 0.00005,
+    width: int = 300,
+    height: int = 420,
+    frame_border: int = 0,
+    **kwargs: Any,
+):
+    """Generates a widget-based interface for creating and managing vector data on a map.
+    This function creates an interactive widget interface that allows users to draw features
+    (points, lines, polygons) on a map, assign properties to these features, and export them
+    as GeoJSON files. The interface includes a map, a sidebar for property management, and
+    buttons for saving, exporting, and resetting the data.
+    Args:
+        m (Map, optional): An existing Map object. If not provided, a default map with
+            basemaps and drawing controls will be created. Defaults to None.
+        properties (Dict[str, List[Any]], optional): A dictionary where keys are property names
+            and values are lists of possible values for each property. These properties can be
+            assigned to the drawn features. Defaults to None.
+        time_format (str, optional): The format string for the timestamp used in the exported
+            filename. Defaults to "%Y%m%dT%H%M%S".
+        column_widths (Optional[List[int]], optional): A list of two integers specifying the
+            relative widths of the map and sidebar columns. Defaults to (9, 3).
+        map_height (str, optional): The height of the map widget. Defaults to "600px".
+        out_dir (str, optional): The directory where the exported GeoJSON files will be saved.
+            If not provided, the current working directory is used. Defaults to None.
+        filename_prefix (str, optional): A prefix to be added to the exported filename.
+            Defaults to "".
+        file_ext (str, optional): The file extension for the exported file. Defaults to "geojson".
+        add_mapillary (bool, optional): Whether to add a Mapillary image widget that displays the
+            nearest image to the clicked point on the map. Defaults to False.
+        style (str, optional): The style of the Mapillary image widget. Can be "classic", "photo",
+            or "split". Defaults to "photo".
+        radius (float, optional): The radius (in degrees) used to search for the nearest Mapillary
+            image. Defaults to 0.00005 degrees.
+        width (int, optional): The width of the Mapillary image widget. Defaults to 300.
+        height (int, optional): The height of the Mapillary image widget. Defaults to 420.
+        frame_border (int, optional): The width of the frame border for the Mapillary image widget.
+            Defaults to 0.
+        **kwargs (Any): Additional keyword arguments that may be passed to the function.
+    Returns:
+        widgets.VBox: A vertical box widget containing the map, sidebar, and control buttons.
+    Example:
+        >>> properties = {
+        ...     "Type": ["Residential", "Commercial", "Industrial"],
+        ...     "Area": [100, 200, 300],
+        ... }
+        >>> widget = create_vector_data(properties=properties)
+        >>> display(widget)  # Display the widget in a Jupyter notebook
+    """
+    return maplibregl.create_vector_data(
+        m=m,
+        properties=properties,
+        time_format=time_format,
+        column_widths=column_widths,
+        map_height=map_height,
+        out_dir=out_dir,
+        filename_prefix=filename_prefix,
+        file_ext=file_ext,
+        add_mapillary=add_mapillary,
+        style=style,
+        radius=radius,
+        width=width,
+        height=height,
+        frame_border=frame_border,
+        **kwargs,
+    )

geoai/train.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import glob
 import math
 import os
 import random
@@ -20,6 +21,8 @@ from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
 from tqdm import tqdm
+from .utils import download_model_from_hf
 def get_instance_segmentation_model(num_classes=2, num_channels=3, pretrained=True):
     """
@@ -352,7 +355,9 @@ def collate_fn(batch):
     return tuple(zip(*batch))
-def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10):
+def train_one_epoch(
+    model, optimizer, data_loader, device, epoch, print_freq=10, verbose=True
+):
     """
     Train the model for one epoch.
@@ -363,6 +368,7 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
         device (torch.device): Device to train on.
         epoch (int): Current epoch number.
         print_freq (int): How often to print progress.
+        verbose (bool): Whether to print detailed progress.
     Returns:
         float: Average loss for the epoch.
@@ -392,9 +398,10 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
         # Print progress
         if i % print_freq == 0:
             elapsed_time = time.time() - start_time
-            print(
-                f"Epoch: {epoch}, Batch: {i}/{len(data_loader)}, Loss: {losses.item():.4f}, Time: {elapsed_time:.2f}s"
-            )
+            if verbose:
+                print(
+                    f"Epoch: {epoch}, Batch: {i}/{len(data_loader)}, Loss: {losses.item():.4f}, Time: {elapsed_time:.2f}s"
+                )
             start_time = time.time()
     # Calculate average loss
@@ -579,6 +586,8 @@ def train_MaskRCNN_model(
     val_split=0.2,
     visualize=False,
     resume_training=False,
+    print_freq=10,
+    verbose=True,
 ):
     """Train and evaluate Mask R-CNN model for instance segmentation.
@@ -605,7 +614,8 @@ def train_MaskRCNN_model(
             Defaults to False.
         resume_training (bool): If True and pretrained_model_path is provided,
             will try to load optimizer and scheduler states as well. Defaults to False.
+        print_freq (int): Frequency of printing training progress. Defaults to 10.
+        verbose (bool): If True, prints detailed training progress. Defaults to True.
     Returns:
         None: Model weights are saved to output_dir.
@@ -756,7 +766,9 @@ def train_MaskRCNN_model(
     # Training loop
     for epoch in range(start_epoch, num_epochs):
         # Train one epoch
-        train_loss = train_one_epoch(model, optimizer, train_loader, device, epoch)
+        train_loss = train_one_epoch(
+            model, optimizer, train_loader, device, epoch, print_freq, verbose
+        )
         # Update learning rate
         lr_scheduler.step()
@@ -1107,6 +1119,13 @@ def object_detection(
     model = get_instance_segmentation_model(
         num_classes=2, num_channels=num_channels, pretrained=pretrained
     )
+    if not os.path.exists(model_path):
+        try:
+            model_path = download_model_from_hf(model_path)
+        except Exception as e:
+            raise FileNotFoundError(f"Model file not found: {model_path}")
     model.load_state_dict(torch.load(model_path, map_location=device))
     model.to(device)
     model.eval()
@@ -1123,3 +1142,93 @@ def object_detection(
         device=device,
         **kwargs,
     )
+def object_detection_batch(
+    input_paths,
+    output_dir,
+    model_path,
+    filenames=None,
+    window_size=512,
+    overlap=256,
+    confidence_threshold=0.5,
+    batch_size=4,
+    num_channels=3,
+    pretrained=True,
+    device=None,
+    **kwargs,
+):
+    """
+    Perform object detection on a GeoTIFF using a pre-trained Mask R-CNN model.
+    Args:
+        input_paths (str or list): Path(s) to input GeoTIFF file(s). If a directory is provided,
+            all .tif files in that directory will be processed.
+        output_dir (str): Directory to save output mask GeoTIFF files.
+        model_path (str): Path to trained model weights.
+        filenames (list, optional): List of output filenames. If None, defaults to
+            "<input_filename>_mask.tif" for each input file.
+            If provided, must match the number of input files.
+        window_size (int): Size of sliding window for inference.
+        overlap (int): Overlap between adjacent windows.
+        confidence_threshold (float): Confidence threshold for predictions (0-1).
+        batch_size (int): Batch size for inference.
+        num_channels (int): Number of channels in the input image and model.
+        pretrained (bool): Whether to use pretrained backbone for model loading.
+        device (torch.device, optional): Device to run inference on. If None, uses CUDA if available.
+        **kwargs: Additional arguments passed to inference_on_geotiff.
+    Returns:
+        None: Output mask is saved to output_path.
+    """
+    # Load your trained model
+    if device is None:
+        device = (
+            torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        )
+    model = get_instance_segmentation_model(
+        num_classes=2, num_channels=num_channels, pretrained=pretrained
+    )
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir, exist_ok=True)
+    if not os.path.exists(model_path):
+        try:
+            model_path = download_model_from_hf(model_path)
+        except Exception as e:
+            raise FileNotFoundError(f"Model file not found: {model_path}")
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.to(device)
+    model.eval()
+    if isinstance(input_paths, str) and (not input_paths.endswith(".tif")):
+        files = glob.glob(os.path.join(input_paths, "*.tif"))
+        files.sort()
+    elif isinstance(input_paths, str):
+        files = [input_paths]
+    if filenames is None:
+        filenames = [
+            os.path.join(output_dir, os.path.basename(f).replace(".tif", "_mask.tif"))
+            for f in files
+        ]
+    else:
+        if len(filenames) != len(files):
+            raise ValueError("Number of filenames must match number of input files.")
+    for index, file in enumerate(files):
+        print(f"Processing file {index + 1}/{len(files)}: {file}")
+        inference_on_geotiff(
+            model=model,
+            geotiff_path=file,
+            output_path=filenames[index],
+            window_size=window_size,  # Adjust based on your model and memory
+            overlap=overlap,  # Overlap to avoid edge artifacts
+            confidence_threshold=confidence_threshold,
+            batch_size=batch_size,  # Adjust based on your GPU memory
+            num_channels=num_channels,
+            device=device,
+            **kwargs,
+        )

geoai-py 0.4.3__py2.py3-none-any.whl → 0.5.1__py2.py3-none-any.whl

geoai-py 0.4.3py2.py3-none-any.whl → 0.5.1py2.py3-none-any.whl