PyPI - eoml - Versions diffs - 0.9.0__py3-none-any.whl - Mend

eoml 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

eoml/__init__.py +74 -0
eoml/automation/__init__.py +7 -0
eoml/automation/configuration.py +105 -0
eoml/automation/dag.py +233 -0
eoml/automation/experience.py +618 -0
eoml/automation/tasks.py +825 -0
eoml/bin/__init__.py +6 -0
eoml/bin/clean_checkpoint.py +146 -0
eoml/bin/land_cover_mapping_toml.py +435 -0
eoml/bin/mosaic_images.py +137 -0
eoml/data/__init__.py +7 -0
eoml/data/basic_geo_data.py +214 -0
eoml/data/dataset_utils.py +98 -0
eoml/data/persistence/__init__.py +7 -0
eoml/data/persistence/generic.py +253 -0
eoml/data/persistence/lmdb.py +379 -0
eoml/data/persistence/serializer.py +82 -0
eoml/raster/__init__.py +7 -0
eoml/raster/band.py +141 -0
eoml/raster/dataset/__init__.py +6 -0
eoml/raster/dataset/extractor.py +604 -0
eoml/raster/raster_reader.py +602 -0
eoml/raster/raster_utils.py +116 -0
eoml/torch/__init__.py +7 -0
eoml/torch/cnn/__init__.py +7 -0
eoml/torch/cnn/augmentation.py +150 -0
eoml/torch/cnn/dataset_evaluator.py +68 -0
eoml/torch/cnn/db_dataset.py +605 -0
eoml/torch/cnn/map_dataset.py +579 -0
eoml/torch/cnn/map_dataset_const_mem.py +135 -0
eoml/torch/cnn/outputs_transformer.py +130 -0
eoml/torch/cnn/torch_utils.py +404 -0
eoml/torch/cnn/training_dataset.py +241 -0
eoml/torch/cnn/windows_dataset.py +120 -0
eoml/torch/dataset/__init__.py +6 -0
eoml/torch/dataset/shade_dataset_tester.py +46 -0
eoml/torch/dataset/shade_tree_dataset_creators.py +537 -0
eoml/torch/model_low_use.py +507 -0
eoml/torch/models.py +282 -0
eoml/torch/resnet.py +437 -0
eoml/torch/sample_statistic.py +260 -0
eoml/torch/trainer.py +782 -0
eoml/torch/trainer_v2.py +253 -0
eoml-0.9.0.dist-info/METADATA +93 -0
eoml-0.9.0.dist-info/RECORD +47 -0
eoml-0.9.0.dist-info/WHEEL +4 -0
eoml-0.9.0.dist-info/entry_points.txt +3 -0

eoml/torch/cnn/map_dataset_const_mem.py ADDED Viewed

@@ -0,0 +1,135 @@
+import torch
+from eoml.torch.cnn.map_dataset import MapResultAggregator, BatchMeta, IterableMapDataset
+from rasterio.windows import Window
+class ConstMemBatchMeta(BatchMeta):
+    def __init__(self, window, is_finished, count, worker):
+        super().__init__(window, is_finished, worker)
+        self.window = window
+        self.is_finished = is_finished
+        self.count = count
+        self.worker = worker
+class Buffer:
+    def __init__(self, bands, height, width, device):
+        self.buffer = None
+        self.device = device
+        self.stored_height = 0
+        self.stored_width = 0
+        self.bands = bands
+    def store(self, data):
+        channel, height, width = data.shape
+        # we need a new buffer to avoid writing in the buffer being used for computation on gpu
+        # we cound have prefectch number of buffer to solve this issue
+        self.buffer = torch.empty((channel, height, width), device=self.device)
+        self.buffer[:, 0:height, 0:width] = data
+        self.stored_height = height
+        self.stored_width = width
+    def __getitem__(self, item):
+        self.buffer.__getitem__(item)
+    def __setitem__(self, key, value):
+        self.buffer.__setitem__(key, value)
+    @property
+    def shape(self):
+        return self.bands, self.stored_height, self.stored_width
+class IterableMapDatasetConstMem(IterableMapDataset):
+    # Create an aligned raster with cropped border to take the convolution into account.
+    # If stride is >1, the widows starting at the top left corner and size stride X stride
+    # will be filled with the value returned by the NN.
+    def __init__(self, raster_reader, kernel_size, target_windows, off_x, off_y, stride=1, batch_size=1024,
+                 device="cpu"):
+        super().__init__(raster_reader, kernel_size, target_windows, off_x, off_y, stride, batch_size, device)
+        self.max_width, self.max_height = self._max_win_size(target_windows, kernel_size)
+        self.buffer = Buffer(raster_reader.n_band, self.max_width, self.max_height, device)
+    def _max_win_size(self, windows, size):
+        # windows is a tuble ((i,j), windows)
+        width = max(windows, key=lambda w: w[1].width)[1].width + size
+        height = max(windows, key=lambda w: w[1].height)[1].height + size
+        return width, height
+    def __iter__(self):
+        """
+        iteratro over the dataset. return at most batch_size data or the number of data needed to finish the current
+        block of data.
+        :return: data, (target_windows, is_block_finished, worker_id)
+        """
+        #flush = 0
+        for ji, window in self.target_windows:
+            #flush +=1
+            (col_off, row_off, w_width, w_height) = window.flatten()
+            # compute the source windows
+            window_source = Window(col_off + self.off_x - self.half_size, row_off + self.off_y - self.half_size,
+                                   w_width + self.size - 1, w_height + self.size - 1)
+            np_buff = self.read_windows(window_source)
+            self.buffer.store(torch.from_numpy(np_buff))
+            for sample, meta in self.extract_tensor_iter(self.buffer, self.batch_size):
+                meta.window = window
+                yield sample, meta
+            #if flush == self.flush_threshold:
+            #    flush = 0
+            #    del sample
+            #    gc.collect()
+            #    torch.cuda.empty_cache()
+    def extract_tensor_iter(self, data, batch_size):
+        channel, height, width = data.shape
+        height = height - self.size + 1
+        width = width - self.size + 1
+        samples = []
+        count = 0
+        for i in range(0, height):
+            for j in range(0, width):
+                if count == batch_size:
+                    yield torch.stack(samples, dim=0), ConstMemBatchMeta(None, False, count, self.worker_id)
+                    samples = []
+                    count = 0
+                source_w = self.buffer.buffer.narrow(1, i, self.size).narrow(2, j, self.size)
+                samples.append(source_w)
+                count += 1
+        # file the batch with empty sample
+        valid_count = count
+        while count < batch_size:
+            # seems to be a bit faster
+            self.buffer.buffer.narrow(1, 0, self.size).narrow(2, 0, self.size)
+            #samples.append(torch.empty(channel, self.size, self.size,device=self.device))
+            count += 1
+        yield torch.stack(samples, dim=0), ConstMemBatchMeta(None, True, valid_count, self.worker_id)
+class MapResultAggregatorConstMem(MapResultAggregator):
+    def __init__(self, path_out, transform_result_f, n_windows, write_profile):
+        super().__init__(path_out, transform_result_f, n_windows, write_profile)
+    def submit_result(self, values, meta: ConstMemBatchMeta):
+        values = values[:meta.count]
+        super().submit_result(values,meta)

eoml/torch/cnn/outputs_transformer.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""Output transformation classes for neural network predictions.
+This module provides classes for transforming raw neural network outputs into
+usable formats for geospatial mapping, including classification, regression,
+and probability outputs.
+"""
+from typing import List
+import numpy as np
+class OutputTransformer:
+    """Abstract base class for transforming neural network outputs.
+    Defines interface for converting raw NN outputs to map-ready values.
+    Attributes:
+        _shape: Shape of the output data.
+        _dtype: Data type for output values.
+        _nodata: No-data value for invalid outputs.
+    """
+    def __init__(self, shape, dtype, nodata):
+        self._shape = shape
+        self._dtype = dtype
+        self._nodata = nodata
+    def __call__(self, v):
+        ...
+    @property
+    def shape(self):
+        """
+        shape of the output
+        :return:
+        """
+        return self._shape
+    @property
+    def bands(self):
+        """
+        shape of the output
+        :return:
+        """
+        return self.shape[0]
+    @property
+    def dtype(self):
+        """
+        shape of the input
+        :return:
+        """
+        return self._dtype
+    @property
+    def nodata(self):
+        """
+        shape of the input
+        :return:
+        """
+        return self._nodata
+class ArgMax(OutputTransformer):
+    """Return the index of the highest neural network output.
+    Performs argmax operation for classification tasks.
+    Attributes:
+        dtype: Data type for output indices. Defaults to "int16".
+        nodata: Value for invalid outputs. Defaults to -1.
+    """
+    def __init__(self, dtype="int16", nodata=-1):
+        super().__init__([1], dtype, nodata)
+    def __call__(self, vec):
+        return np.argmax(vec, axis=1).astype(self.dtype)
+class ArgMaxToCategory(ArgMax):
+    """Transform neural network categories to map category values.
+    Performs argmax to find the highest output, then maps the index to a
+    specific category value from the provided mapping.
+    Attributes:
+        category_map: List mapping NN output indices to category values.
+        dtype: Data type for output values. Defaults to "int16".
+        nodata: Value for invalid outputs. Defaults to -1.
+    """
+    def __init__(self, category_map: List, dtype="int16", nodata=-1):
+        super().__init__(dtype, nodata)
+        self.category_map = category_map
+    def __call__(self, vec):
+        return np.array([self.category_map[x] for x in super().__call__(vec)], dtype=self.dtype)
+class Identity(OutputTransformer):
+    """Return neural network output as-is with type casting.
+    Passes through NN output but casts to specified map format. Output shape
+    must be specified in constructor.
+    Attributes:
+        shape: Shape of the output data.
+        dtype: Data type for output values.
+        nodata: Value for invalid outputs.
+    """
+    def __init__(self, shape, dtype, nodata):
+        super().__init__(shape, dtype, nodata)
+    def __call__(self, vec):
+        return vec.astype(self.dtype)
+class ToPercentage(OutputTransformer):
+    """Convert neural network output to percentage values.
+    Multiplies output by 100 and casts to specified type, useful for
+    probability or confidence outputs.
+    Attributes:
+        shape: Shape of the output data. Defaults to [1].
+        dtype: Data type for output values. Defaults to "int16".
+        nodata: Value for invalid outputs. Defaults to -255.
+    """
+    def __init__(self, shape=[1], dtype="int16", nodata=-255):
+        super().__init__(shape, dtype, nodata)
+    def __call__(self, vec):
+        return (100*vec).astype(self.dtype)

eoml/torch/cnn/torch_utils.py ADDED Viewed

@@ -0,0 +1,404 @@
+"""PyTorch utility functions for neural network operations.
+This module provides helper functions for PyTorch-based deep learning, including
+convolution size calculations, custom collation functions for data loaders, pixel
+extraction utilities, and grid alignment functions for geospatial raster data.
+"""
+import logging
+import math
+import numpy as np
+import torch
+from rasterio.transform import xy, rowcol, guard_transform
+from rasterio.warp import Affine
+from rasterio.windows import Window, transform
+from torch.utils.data import default_collate
+logger = logging.getLogger(__name__)
+def int_to_list(var, size):
+    """Convert integer to list or validate list size.
+    Used for managing convolution size inputs. Repeats an integer value into a
+    list of specified size, or validates that an existing list has the correct size.
+    Args:
+        var: Integer value to repeat or list to validate.
+        size: Target list size.
+    Returns:
+        list: List of specified size containing the value(s).
+    Raises:
+        Exception: If var is a list of wrong size.
+    """
+    if isinstance(var, int):
+        list_var = [var for i in range(size)]
+    else:
+        if len(var) != size:
+            raise Exception(" Input should have size n")
+        list_var = var
+    return list_var
+def conv_out_size(in_size, conv, stride, padding):
+    """Calculate output size of a convolution along one dimension.
+    Args:
+        in_size: Input size in pixels.
+        conv: Convolution kernel size.
+        stride: Convolution stride.
+        padding: Padding size.
+    Returns:
+        int: Output size after convolution.
+    """
+    return math.floor((in_size - conv + 2 * padding) / stride + 1)
+def conv_out_sizes(in_size, convs, strides, paddings):
+    """Calculate output sizes of a series of convolutions.
+    Args:
+        in_size: Initial input size in pixels.
+        convs: List of convolution kernel sizes (or single value).
+        strides: List of strides (or single value).
+        paddings: List of padding sizes (or single value).
+    Returns:
+        list: List of output sizes after each convolution, including initial size.
+    """
+    n_layer = len(convs) if hasattr(convs, '__len__') else 1
+    n_layer = len(strides) if hasattr(strides, '__len__') else n_layer
+    n_layer = len(paddings) if hasattr(paddings, '__len__') else n_layer
+    convs = int_to_list(convs, n_layer)
+    strides = int_to_list(strides, n_layer)
+    paddings = int_to_list(paddings, n_layer)
+    sizes = [in_size]
+    for conv, stride, padding in zip(convs, strides, paddings):
+        sizes.append(conv_out_size(sizes[-1], conv, stride, padding))
+    return sizes
+class PixelAt:
+    """Extract specific pixel values from an array.
+    Can use lists to get multiple values at once.
+    Attributes:
+        c: Channel index(es).
+        h: Height/row index(es).
+        w: Width/column index(es).
+    """
+    def __init__(self, c, h, w):
+        self.c = c
+        self.h = h
+        self.w = w
+    def __call__(self, array):
+        return pixel_at(array, self.c, self.h, self.w)
+class PixelAtBand:
+    def __init__(self, h, w):
+        self.h = h
+        self.w = w
+    def __call__(self, array):
+        return pixel_at_band(array, self.h, self.w)
+class PixelAtBandSkipValue:
+    def __init__(self, h, w, skip):
+        """
+        :param h:
+        :param w:
+        :param skip: if the value is in any of the return array, we skip it
+        """
+        self.h = h
+        self.w = w
+        self.skip = skip
+    def __call__(self, array):
+        out = pixel_at_band(array, self.h, self.w)
+        if (out == self.skip).any():
+            return None
+        return out
+def center_pixel(array):
+    """
+    look for the central pixel of an array and return it. Pixel at is error-prone but more efficient
+    :param array:
+    :return the centrer pixels values (all the band):
+    """
+    h, w = array.shape
+    if h % 2 != 1 or w % 2 != 1:
+        raise "h,w has no clear center, size should be odd"
+    center_h = math.ceil(h / 2)
+    center_w = math.ceil(w / 2)
+    return array[:, center_h, center_w]
+def pixel_at(array, c, h, w):
+    """
+    Return the pixel at a given position and return an array (scalar value are transformed to a vector of size 1
+    :param array: to extract value from
+    :param c: chanel to extract value from (use a list to extract multiple value
+    :param h: height index (use a list to extract multiple value
+    :param w: width index (use a list to extract multiple value
+    :return:
+    """
+    # can use a list to get multiple value
+    v = array[c, h, w]
+    if not isinstance(v, np.ndarray):
+        v = np.array([v])
+    return v
+def pixel_at_band(array, h, w):
+    """
+    Return the pixel at a given position and return an array (scalar value are transformed to a vector of size 1
+    :param array: to extract value from
+    :param c: chanel to extract value from (use a list to extract multiple value
+    :param h: height index (use a list to extract multiple value
+    :param w: width index (use a list to extract multiple value
+    :return:
+    """
+    # can use a list to get multiple value
+    v = array[:, h, w]
+    if not isinstance(v, np.ndarray):
+        v = np.array([v])
+    return v
+def multi_input_training_collate(batch):
+    """
+    custom collate function, used then there is multiple input.
+    :param batch:
+    :return: collated data separated by batch
+    """
+    data_entries = [[] for _ in range(len(batch[0]))]
+    for b in batch:
+        for i, entry in enumerate(b):
+            data_entries[i].append(entry)
+    # need index 0 as collate keep the outside list
+    out = list(map(lambda x: default_collate(x), data_entries))
+    # return the input out output separated
+    return out[:-1], out[-1]
+def batch_collate(batch):
+    """
+    custom collate function, used when we use a batch sampler.
+    :param batch:
+    :return: collated data and metadata separated
+    """
+    return batch[0]
+def meta_data_collate(batch):
+    """
+    custom collate function, used when the __get_item__/the iterator return data, metadata. Applly default collate to
+    the data and left the meta-data untransformed.
+    Apply the default collate the
+    :param batch:
+    :return: collated data and metadata separated
+    """
+    data = []
+    meta = []
+    for b in batch:
+        data.append(b[0])
+        meta.append(b[1])
+    return torch.utils.data.default_collate(data), meta
+def multi_input_meta_data_collate(batch):
+    """
+    custom collate function, used when the __get_item__/the iterator return multiple data, and 1 metadata. Applly default collate to
+    each data independantly and left the meta-data untransformed.
+    Apply the default collate the
+    :param batch:
+    :return: collated data and metadata separated
+    """
+    data_entries = [[] for _ in range(len(batch[0][0]))]
+    sample, meta = batch[0]
+    #skip le last as it is meta data
+    for i, entry in enumerate(sample):
+        data_entries[i].append(entry)
+    # we get an outside list so we take index 0
+    out = list(map(lambda x: default_collate(x)[0], data_entries))
+    return out, [meta]
+def no_collate(batch):
+    """
+        Do not transform data to tensor. flatten numpy array and separate data from meta data
+    """
+    data = []
+    meta = []
+    for b in batch:
+        d = b[0].numpy().reshape(len(b[0]), len(b[0][0]))
+        np.nan_to_num(d, copy=False, )
+        data.append(b[0].numpy().reshape(len(b[0]), len(b[0][0])))
+        meta.append(b[1])
+    return data, meta
+def align_grid_deprecated(source_meta, bounds, size):
+    """
+    Given the bounds we want to apply convolution to, the function will align the bound to the best matching pixel.
+    The bounds are computed for the center pixel of the windows to always be inside. TODO add offset?
+    :param source_meta:
+    :param bounds:
+    :param size:
+    :return:
+    """
+    ##align the grids taking into account the covolution on the border
+    ## take into account shifted coordinate system
+    half_size = size // 2
+    transform = source_meta["transform"]
+    # inverted coordinate
+    assert transform.e < 0
+    # grid bound in the source grid coordinate
+    (bottom, left) = rowcol(transform, bounds.left, bounds.bottom, op)
+    (top, right) = rowcol(transform, bounds.right, bounds.top, op)
+    # compute the target bounds taking into account the convolution
+    left = max(0, left - half_size) + half_size
+    bottom = min(bottom + half_size, source_meta["height"]) - half_size
+    top = max(0, top - half_size) + half_size
+    right = min(right + half_size, source_meta["width"]) - half_size
+    # dimension of the bound grid
+    width = right - left
+    height = bottom - top
+    (west, north) = xy(transform, top, left, offset="ul")
+    # based on transformation from bound. specify the left, top pixel and pixel size same as original)
+    target_transform = Affine.translation(west, north) * Affine.scale(transform.a, transform.e)
+    # lef top is the offset
+    return target_transform, width, height, left, top
+def align_grid(src_transform, bounds, r_width, r_height, size, shrink_for_conv=False, precision=0.01):
+    """
+    Given the bounds we want to apply convolution to, the function will align the bound to the best matching pixel.
+    The bounds are computed for the center pixel of the windows to always be inside. TODO add offset?
+    :param transform:
+    :param bounds:
+    :param size:
+    :return:
+    """
+    ##align the grids taking into account the covolution on the border
+    ## take into account shifted coordinate system
+    half_size = size // 2
+    # grid bound in the source grid coordinate
+    window = aligned_bound(bounds.left, bounds.bottom, bounds.right, bounds.top, src_transform, precision=precision)
+    left = window.col_off
+    right = left + window.width
+    top = window.row_off
+    bottom = top + window.height
+    # compute the target bounds taking into account the convolution
+    if shrink_for_conv:
+        left = max(0, left - half_size) + half_size
+        bottom = min(bottom + half_size, r_height) - half_size + 1
+        top = max(0, top - half_size) + half_size
+        right = min(right + half_size, r_width) - half_size + 1
+        # dimension of the bound grid
+        # new windows with convolution inside
+        window = Window(left, top, right - left, bottom - top)
+    # from window_transform(windows)
+    width = right - left
+    height = bottom - top
+    gtransform = guard_transform(src_transform)
+    target_transform = transform(window, gtransform)
+    #(west, north) = xy(transform, top, left, offset="ul")
+    # based on transformation from bound. specify the left, top pixel and pixel size same as original)
+    #target_transform = Affine.translation(west, north) * Affine.scale(transform.a, transform.e)
+    # lef top is the offset
+    return target_transform, width, height, left, top
+def aligned_bound(left, bottom, right, top, transform, precision=0.01):
+    """
+    Compute the input windows with a shrink of 1 pixel.
+    We assume pixel is area. pixel "point" is located at the top left of the pixel and bounding is the real bounding
+    box. This mean boding box is effectively at pixel (0, 0) and (length, length). the actual array pixel as last
+    pixel at length-1, length-1
+    the pixel is included if precision percent of it is covered by the raw raster
+    """
+    def idx(x):
+        return x
+    #index invert coordinate order ie rowcol (could use row col
+    bottom, left = rowcol(transform, left, bottom, op=idx)  #transform.index(left, bottom, op=idx)
+    top, right = rowcol(transform, right, top, op=idx)     # transform.index(right, top, op=idx)
+    # top left pixel if contained more than precision from the top lef corner of the pixel we need to round up
+    left = _round_high(left, precision) # the index match the bound
+    top = _round_high(top, precision)
+    # we need to be very close to the bottom right of the pixel (it mean it includ almost all the pixel, if not round down
+    bottom = _round_low(bottom, precision)-1 #the bound is at index + 1
+    right = _round_low(right, precision)-1
+    return Window(left, top, right-left, bottom-top)
+def _round_low(value, precision):
+    if math.ceil(value)-value < precision:
+        value = math.ceil(value)
+    return math.floor(value)
+def _round_high(value, precision):
+    # if pixel of input almost covered we use it anyway (the the left and top pixel. covered mean close to floor)
+    if value - math.floor(value) < precision:
+        value = math.floor(value)
+    return math.ceil(value)