PyPI - eoml - Versions diffs - 0.9.0__py3-none-any.whl - Mend

eoml 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

eoml/__init__.py +74 -0
eoml/automation/__init__.py +7 -0
eoml/automation/configuration.py +105 -0
eoml/automation/dag.py +233 -0
eoml/automation/experience.py +618 -0
eoml/automation/tasks.py +825 -0
eoml/bin/__init__.py +6 -0
eoml/bin/clean_checkpoint.py +146 -0
eoml/bin/land_cover_mapping_toml.py +435 -0
eoml/bin/mosaic_images.py +137 -0
eoml/data/__init__.py +7 -0
eoml/data/basic_geo_data.py +214 -0
eoml/data/dataset_utils.py +98 -0
eoml/data/persistence/__init__.py +7 -0
eoml/data/persistence/generic.py +253 -0
eoml/data/persistence/lmdb.py +379 -0
eoml/data/persistence/serializer.py +82 -0
eoml/raster/__init__.py +7 -0
eoml/raster/band.py +141 -0
eoml/raster/dataset/__init__.py +6 -0
eoml/raster/dataset/extractor.py +604 -0
eoml/raster/raster_reader.py +602 -0
eoml/raster/raster_utils.py +116 -0
eoml/torch/__init__.py +7 -0
eoml/torch/cnn/__init__.py +7 -0
eoml/torch/cnn/augmentation.py +150 -0
eoml/torch/cnn/dataset_evaluator.py +68 -0
eoml/torch/cnn/db_dataset.py +605 -0
eoml/torch/cnn/map_dataset.py +579 -0
eoml/torch/cnn/map_dataset_const_mem.py +135 -0
eoml/torch/cnn/outputs_transformer.py +130 -0
eoml/torch/cnn/torch_utils.py +404 -0
eoml/torch/cnn/training_dataset.py +241 -0
eoml/torch/cnn/windows_dataset.py +120 -0
eoml/torch/dataset/__init__.py +6 -0
eoml/torch/dataset/shade_dataset_tester.py +46 -0
eoml/torch/dataset/shade_tree_dataset_creators.py +537 -0
eoml/torch/model_low_use.py +507 -0
eoml/torch/models.py +282 -0
eoml/torch/resnet.py +437 -0
eoml/torch/sample_statistic.py +260 -0
eoml/torch/trainer.py +782 -0
eoml/torch/trainer_v2.py +253 -0
eoml-0.9.0.dist-info/METADATA +93 -0
eoml-0.9.0.dist-info/RECORD +47 -0
eoml-0.9.0.dist-info/WHEEL +4 -0
eoml-0.9.0.dist-info/entry_points.txt +3 -0

eoml/bin/mosaic_images.py ADDED Viewed

@@ -0,0 +1,137 @@
+import typer
+from pathlib import Path
+from rasterio.enums import Resampling
+from eoml import get_read_profile, get_write_profile
+from eoml.automation.tasks import tiled_task
+from rasterop.tiled_op.operation import CopyFirstNonNullOP
+from rasterop.tiled_op.tiled_raster_op import get_image_file, TiledOPExecutor
+app = typer.Typer(help="Raster merging utility that take all the TIFF file in the input directories sorted by"
+                       " alphabetical order and copies the first non-nan value to the final TIFF")
+def parse_resampling(value: str) -> Resampling:
+    value_norm = value.strip().lower()
+    by_name = {m.name.lower(): m for m in Resampling}
+    if value_norm in by_name:
+        return by_name[value_norm]
+    # Optional: allow numeric values too (handy for backwards-compat)
+    if value_norm.isdigit():
+        return Resampling(int(value_norm))
+    raise typer.BadParameter(
+        f"Invalid resampling '{value}'. Choose one of: {', '.join(sorted(by_name))}"
+    )
+@app.command()
+def merge_rasters(
+        input_dir: Path = typer.Argument(
+            ...,
+            help="Input directory containing TIFF files",
+            exists=True,
+            dir_okay=True,
+            file_okay=False
+        ),
+        output_file: Path = typer.Argument(
+            ...,
+            help="Output TIFF file path"
+        ),
+        num_threads: str = typer.Option(
+            "all_cpus",
+            "--threads", "-t",
+            help="Number of threads to use by gdal for compression"
+        ),
+        block_size: int = typer.Option(
+            256,
+            "--block-size", "-b",
+            help="Block size for x and y dimensions for the geotiff internal structure"
+        ),
+        tile_size: int = typer.Option(
+            2028,
+            "--tile-size", "-T",
+            help="Block size for the operation"
+        ),
+        num_workers: int = typer.Option(
+            8,
+            "--workers", "-w",
+            help="Number of workers for processing"
+        ),
+        resampling: Resampling = typer.Option(
+            Resampling.nearest,  # internal default as enum
+            "--resampling", "-r",
+            callback=lambda v: parse_resampling(v) if isinstance(v, str) else v,
+            help="Resampling method by name (nearest, bilinear, cubic, ...)",
+        )
+):
+    """
+    Merge multiple raster files by copying the first non-nan value to the final TIFF.
+    """
+    try:
+        # Get the list of raster files
+        rasters = get_image_file(input_dir,  extension = ["tif", "tiff", "TIF", "TIFF"])
+        rasters.sort()
+        if len(rasters) == 0:
+            raise typer.BadParameter(f"No raster files found in {input_dir}")
+        # Set up writing and reading profiles
+        read_profile = get_read_profile()
+        profile = get_write_profile()
+        read_profile.update({'num_threads': num_threads})
+        profile.update({
+            "driver": "COG",
+            'num_threads': num_threads,
+            'blockxsize': block_size,
+            'blockysize': block_size
+        })
+        # Set up operation parameters
+        default_op_param = {
+            "bounds": None,
+            "res": None,
+            "resampling": resampling,
+            "target_aligned_pixels": False,
+            "indexes": None,
+            "src_kwds": None,
+            "dst_kwds": None,
+            "num_workers": num_workers
+        }
+        # Create operator and set parameters
+        operator = CopyFirstNonNullOP.same_as(rasters[0])
+        operator_param = {
+            "maps": rasters,
+            "raster_out": str(output_file),
+            "operation": operator,
+            "dst_kwds": profile
+        }
+        operator_param.update(default_op_param)
+        # Execute tiled task
+        typer.echo(f"Merging {len(rasters)} raster files...")
+        # TiledOPExecutor(res=None,
+        #          indexes=None,
+        #          resampling=Resampling.nearest,
+        #          target_aligned_pixels=False,
+        #          dst_kwds=None,
+        #          src_kwds=None,
+        #          num_workers=2,
+        #          window_size=None).execute(**operator_param)
+        #
+        #
+        tiled_task(**operator_param)
+        typer.echo(f"Successfully merged rasters to {output_file}")
+    except Exception as e:
+        typer.echo(f"Error: {str(e)}", err=True)
+        raise typer.Exit(1)
+if __name__ == "__main__":
+    app()

eoml/data/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+Data Module for EOML.
+This module provides data structures and utilities for handling geospatial
+data in Earth observation applications. It includes basic geodata classes,
+dataset utilities, and persistence mechanisms.
+"""

eoml/data/basic_geo_data.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""Basic geographical data structures for storing raster samples with metadata.
+This module defines core data structures for representing geospatial training samples,
+including headers with geometry information and complete samples combining raster data
+with labels and metadata.
+"""
+import math
+import numpy as np
+import rasterio
+from eoml import get_read_profile, get_write_profile
+class GeoDataHeader:
+    """Header containing metadata for a geospatial data sample.
+    Stores identifying information about a geographic sample including its unique identifier,
+    spatial geometry (typically a point), and source file name.
+    Attributes:
+        idx: Unique identifier for the sample (from vector file or assigned)
+        geometry: Shapely geometry object representing the sample location
+        file_name: Name of the source file containing this sample
+    """
+    def __init__(self, idx, geometry, file_name):
+        """Initialize a GeoDataHeader.
+        Args:
+            idx: Unique identifier for the sample
+            geometry: Shapely geometry object (typically Point) for sample location
+            file_name: Source filename where this sample originates
+        """
+        self.idx = idx
+        self.geometry = geometry
+        self.file_name = file_name
+    def __eq__(self, other):
+        """Check equality based on idx, geometry, and file_name.
+        Args:
+            other: Another object to compare against
+        Returns:
+            True if all attributes match, False otherwise
+        """
+        if isinstance(other, GeoDataHeader):
+            return self.idx == other.idx and self.geometry == other.geometry and self.file_name == other.file_name
+        return NotImplemented
+    def __repr__(self):
+        """Return string representation of the header.
+        Returns:
+            String showing id, geometry WKT, and filename
+        """
+        return f"GeoDataHeader(id:{self.idx}, geometry:{self.geometry.wkt}, file_name:{self.file_name})"
+class BasicGeoData:
+    """Complete geospatial sample with header, raster data, and target label.
+    Represents a training sample combining metadata (header), multi-band raster data
+    (typically a small image window), and a target value (class label or regression value).
+    Attributes:
+        header: GeoDataHeader containing sample metadata
+        data: NumPy array of raster data, typically shape (bands, height, width)
+        target: Target value (int for classification, float for regression, or array)
+    """
+    def __init__(self, header, data, target):
+        """Initialize a BasicGeoData sample.
+        Args:
+            header: GeoDataHeader with sample metadata
+            data: NumPy array containing raster data
+            target: Target value(s) for supervised learning
+        """
+        self.header = header
+        self.data = data
+        self.target = target
+    @property
+    def header(self):
+        """Get the sample header.
+        Returns:
+            GeoDataHeader instance
+        """
+        return self._header
+    @header.setter
+    def header(self, value):
+        """Set the sample header.
+        Args:
+            value: GeoDataHeader instance
+        """
+        self._header = value
+    @property
+    def data(self):
+        """Get the raster data array.
+        Returns:
+            NumPy array of raster data
+        """
+        return self._data
+    @data.setter
+    def data(self, value):
+        """Set the raster data array.
+        Args:
+            value: NumPy array containing raster data
+        """
+        self._data = value
+    @property
+    def target(self):
+        """Get the target label or value.
+        Returns:
+            Target value (scalar or array)
+        """
+        return self._target
+    @target.setter
+    def target(self, value):
+        """Set the target label or value.
+        Args:
+            value: Target value for the sample
+        """
+        self._target = value
+    def __eq__(self, other):
+        """Check equality based on header, data, and target.
+        Args:
+            other: Another object to compare against
+        Returns:
+            True if all components match (including NaN values), False otherwise
+        """
+        if isinstance(other, BasicGeoData):
+            return self.header == other.header and np.array_equal(self.data, other.data, equal_nan=True)\
+                   and self.target == other.target
+        return NotImplemented
+    def to_file(self, path, ref):
+        """Write the raster data to a GeoTIFF file with proper georeferencing.
+        Exports the sample's raster data to a georeferenced GeoTIFF using the coordinate
+        reference system and transform from a reference raster. The output raster is
+        centered on the sample's geometry point.
+        Args:
+            path: Output path for the GeoTIFF file
+            ref: Path to reference raster file for CRS and transform information
+        Returns:
+            None. Writes GeoTIFF to specified path
+        Raises:
+            IOError: If reference file cannot be opened or output cannot be written
+        """
+        with rasterio.open(ref) as src:
+            #aff = src.transform
+            #pixelSizeX = aff[0]
+            #pixelSizeY = -aff[4]
+            crs = src.crs
+            x = self.header.geometry.x
+            y = self.header.geometry.y
+            row, col = src.index(x, y, op=math.floor)
+            sizeX = self.data.shape[1] / 2
+            sizeY = self.data.shape[1] / 2
+            west, north = src.xy(row-sizeX, col-sizeY)
+            east, south = src.xy(row + sizeX, col + sizeY)
+            #west, south, east, north = self.header.geometry.extends
+            transform = rasterio.transform.from_bounds(west, south, east, north,
+                                                   self.data.shape[1], self.data.shape[2])
+        profile = get_write_profile()
+        profile.update({"height": self.data.shape[1],
+                        "width": self.data.shape[2],
+                        "count": self.data.shape[0],
+                        "dtype": self.data.dtype,
+                        "crs": crs,
+                        "transform": transform})
+        with rasterio.open(path, "w", **profile) as src:
+            src.write(self.data)
+    def __repr__(self):
+        """Return string representation of the sample.
+        Returns:
+            String showing header, data shape/dtype, and target
+        """
+        return f"BasicGeoData(header:{self.header.__repr__()}, data:{self.data.__repr__()}, target:{self.target.__repr__()})"

eoml/data/dataset_utils.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""
+Dataset utility functions for machine learning workflows.
+This module provides utility functions for splitting and organizing datasets
+for machine learning experiments, including random splitting and k-fold
+cross-validation setup.
+"""
+import math
+import random
+import numpy as np
+import rasterio
+def random_split(id_list, counts_list, relative=False) -> list:
+    """
+    Randomly split a list of IDs into multiple subsets.
+    This function splits a list of identifiers into multiple subsets according
+    to specified counts. Useful for creating train/validation/test splits.
+    Args:
+        id_list (list): List of identifiers to split.
+        counts_list (list): List of counts for each split. If relative=True,
+            these are interpreted as proportions; otherwise as absolute counts.
+        relative (bool, optional): If True, counts_list values are proportions
+            of the total. If False, they are absolute counts. Defaults to False.
+    Returns:
+        list: List of lists, where each sublist contains IDs for one split.
+    Raises:
+        Exception: If the requested number of samples exceeds the list length.
+    Examples:
+        >>> ids = list(range(100))
+        >>> train, val, test = random_split(ids, [0.7, 0.15, 0.15], relative=True)
+        >>> # or with absolute counts:
+        >>> train, val = random_split(ids, [80, 20], relative=False)
+    """
+    n_el = len(id_list)
+    ids = id_list.copy()
+    counts = counts_list
+    if relative:
+        counts = list(map(lambda x: round(x*n_el), counts))
+    random.shuffle(ids)
+    sums = np.cumsum(counts)
+    sums = np.insert(sums, 0, 0)
+    if sums[-1] > n_el:
+        raise Exception("number of sample requested higher than list length")
+    split = []
+    for i in range(1, len(sums)):
+        start = sums[i-1]
+        end = sums[i]
+        split.append(ids[start:end])
+    return split
+def k_fold_sample(id_list, n_fold):
+    """
+    Create k-fold cross-validation splits from a list of IDs.
+    This function creates n_fold partitions of the data and generates fold
+    definitions for k-fold cross-validation, where each fold is used once
+    as validation while the remaining folds are used for training.
+    Args:
+        id_list (list): List of sample identifiers to split.
+        n_fold (int): Number of folds to create.
+    Returns:
+        tuple: A tuple containing:
+            - folds (list): List of n_fold lists, each containing sample IDs for that fold.
+            - fold_id (list): List of tuples defining train/validation splits, where each
+              tuple contains ([training_fold_indices], [validation_fold_index]).
+    Examples:
+        >>> ids = list(range(100))
+        >>> folds, fold_splits = k_fold_sample(ids, n_fold=5)
+        >>> # folds[0] contains ~20 samples, fold_splits[0] is ([1,2,3,4], [0])
+    """
+    # create n partition of the data
+    random.shuffle(id_list)
+    # create n partition of the data
+    folds = [id_list[cross::n_fold] for cross in range(n_fold)]
+    fold_id = []
+    # make the fold, exclude 1 sample each time
+    for i in range(n_fold):
+        fold_id.append(([j for j in range(n_fold) if j != i], [i]))
+    return folds, fold_id

eoml/data/persistence/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+Persistence Submodule for Data.
+This submodule provides persistence mechanisms for geodata, including
+database access objects (DAOs), readers, writers, and serializers for
+efficient storage and retrieval of geospatial machine learning datasets.
+"""