PyPI - eoml - Versions diffs - 0.9.0__py3-none-any.whl - Mend

eoml 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

eoml/__init__.py +74 -0
eoml/automation/__init__.py +7 -0
eoml/automation/configuration.py +105 -0
eoml/automation/dag.py +233 -0
eoml/automation/experience.py +618 -0
eoml/automation/tasks.py +825 -0
eoml/bin/__init__.py +6 -0
eoml/bin/clean_checkpoint.py +146 -0
eoml/bin/land_cover_mapping_toml.py +435 -0
eoml/bin/mosaic_images.py +137 -0
eoml/data/__init__.py +7 -0
eoml/data/basic_geo_data.py +214 -0
eoml/data/dataset_utils.py +98 -0
eoml/data/persistence/__init__.py +7 -0
eoml/data/persistence/generic.py +253 -0
eoml/data/persistence/lmdb.py +379 -0
eoml/data/persistence/serializer.py +82 -0
eoml/raster/__init__.py +7 -0
eoml/raster/band.py +141 -0
eoml/raster/dataset/__init__.py +6 -0
eoml/raster/dataset/extractor.py +604 -0
eoml/raster/raster_reader.py +602 -0
eoml/raster/raster_utils.py +116 -0
eoml/torch/__init__.py +7 -0
eoml/torch/cnn/__init__.py +7 -0
eoml/torch/cnn/augmentation.py +150 -0
eoml/torch/cnn/dataset_evaluator.py +68 -0
eoml/torch/cnn/db_dataset.py +605 -0
eoml/torch/cnn/map_dataset.py +579 -0
eoml/torch/cnn/map_dataset_const_mem.py +135 -0
eoml/torch/cnn/outputs_transformer.py +130 -0
eoml/torch/cnn/torch_utils.py +404 -0
eoml/torch/cnn/training_dataset.py +241 -0
eoml/torch/cnn/windows_dataset.py +120 -0
eoml/torch/dataset/__init__.py +6 -0
eoml/torch/dataset/shade_dataset_tester.py +46 -0
eoml/torch/dataset/shade_tree_dataset_creators.py +537 -0
eoml/torch/model_low_use.py +507 -0
eoml/torch/models.py +282 -0
eoml/torch/resnet.py +437 -0
eoml/torch/sample_statistic.py +260 -0
eoml/torch/trainer.py +782 -0
eoml/torch/trainer_v2.py +253 -0
eoml-0.9.0.dist-info/METADATA +93 -0
eoml-0.9.0.dist-info/RECORD +47 -0
eoml-0.9.0.dist-info/WHEEL +4 -0
eoml-0.9.0.dist-info/entry_points.txt +3 -0

eoml/raster/raster_utils.py ADDED Viewed

@@ -0,0 +1,116 @@
+import json
+import numpy as np
+import rasterio
+from rasterio.transform import TransformMethodsMixin
+from rasterio.windows import WindowMethodsMixin
+class RasterInfo(WindowMethodsMixin, TransformMethodsMixin):
+    def __init__(self, transform, height, width, crs, bounds):
+        self.transform = transform
+        self.height = height
+        self.width = width
+        self.crs = crs
+        self.bounds = bounds
+    @classmethod
+    def from_file(cls, path):
+        with rasterio.open(path) as src:
+            return cls(src.transform, src.height, src.width, src.crs, src.bounds)
+def read_gdal_stats(path):
+    with open(path) as file:
+        # returns JSON object as VN
+        # a dictionary
+        data = json.load(file)
+        bands = data["bands"]
+        stats = np.zeros((len(bands),2))
+        for b in data["bands"]:
+            stats[b["band"]-1]= np.array([b["mean"], b["stdDev"]])
+        return stats
+def normalize_sigma(data, means, std_devs, n, truncate=False, transform_no_data=None):
+    """
+    Normalize in place, the values between mean +- n*sigma are compressed between 0 and 1
+    :param data: to normalize in place
+    :param means: of the original data
+    :param std_devs: of the original data
+    :param n: number of sigma to map betweren 0 and 1
+    :param truncate: weather to truncat value smaller or bigger than 0 or 1 to 0 or 1
+    :return: The array changed in place
+    """
+    for b in range(len(data)):
+        data[b] = (1 + (data[b] - means[b]) / (n * std_devs[b])) / 2
+    if transform_no_data is not None:
+        np.nan_to_num(data, copy=False, nan=transform_no_data, posinf=None, neginf=None)
+    if truncate:
+        bigger = data > 1
+        data[bigger] = 1
+        smaller = data < 0
+        data[smaller] = 0
+class NaNToNumber:
+    def __init__(self, number):
+        """
+        """
+        self.number = number
+    def __call__(self, data):
+        np.nan_to_num(data, copy=False, nan=self.number, posinf=None, neginf=None)
+        return data
+class SigmaNormalizer:
+    def __init__(self, means, std_devs, n, truncate=False, transform_no_data=None):
+        """
+        Normalize in place, the values between mean +- n*sigma are compressed between 0 and 1
+        Object version of function. Usefull for multi threading usinf the spawn methode
+        :param means: of the original data
+        :param std_devs: of the original data
+        :param n: number of sigma to map betweren 0 and 1
+        :param truncate: weather to truncat value smaller or bigger than 0 or 1 to 0 or 1
+        :return: The array changed in place
+        """
+        self.means = means
+        self.std_devs = std_devs
+        self.n = n
+        self.truncate = truncate
+        self.transform_no_data = transform_no_data
+    def __call__(self, data):
+        normalize_sigma(data, self.means, self.std_devs, self.n, self.truncate, self.transform_no_data)
+class CastSigmaNormalizer:
+    def __init__(self, means, std_devs, n, truncate=False, transform_no_data=None, dtype=None):
+        """
+        Normalize in place, the values between mean +- n*sigma are compressed between 0 and 1
+        Object version of function. Usefull for multi threading usinf the spawn methode
+        :param means: of the original data
+        :param std_devs: of the original data
+        :param n: number of sigma to map betweren 0 and 1
+        :param truncate: weather to truncat value smaller or bigger than 0 or 1 to 0 or 1
+        :return: The array changed in place
+        """
+        self.means = means
+        self.std_devs = std_devs
+        self.n = n
+        self.truncate = truncate
+        self.transform_no_data = transform_no_data
+        self.dtype = dtype
+    def __call__(self, data):
+        if self.dtype is not None:
+            data.astype(self.dtype, copy=False)
+        normalize_sigma(data, self.means, self.std_devs, self.n, self.truncate, self.transform_no_data)

eoml/torch/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+PyTorch Module for EOML.
+This module provides PyTorch-based machine learning utilities for Earth
+observation applications, including neural network architectures, training
+utilities, dataset loaders, and model evaluation tools.
+"""

eoml/torch/cnn/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+CNN Submodule for PyTorch.
+This submodule provides convolutional neural network utilities including
+dataset loaders, augmentation strategies, training datasets, and mapping
+utilities for applying trained models to large raster datasets.
+"""

eoml/torch/cnn/augmentation.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""Data augmentation transformations for PyTorch image datasets.
+This module provides transformation classes and functions for augmenting image data
+during training. Includes rotation, flipping, cropping, scaling, shearing, and
+blurring operations using torchvision's functional API.
+"""
+import logging
+import math
+import random
+import torchvision.transforms.functional as TF
+logger = logging.getLogger(__name__)
+def rotate_crop_flip_transform(img, size=13, angle=180, vflip=False):
+    """Apply rotation, crop, and optional flip to an image.
+    Args:
+        img: Input image tensor.
+        size: Size to crop to after rotation. Defaults to 13.
+        angle: Rotation angle in degrees. Defaults to 180.
+        vflip: Whether to apply vertical flip. Defaults to False.
+    Returns:
+        Transformed image tensor.
+    """
+    img = TF.rotate(img, angle=angle)
+    img = TF.center_crop(img, size)
+    if vflip:
+        img = TF.vflip(img)
+    return img
+# Due to memory bug in dataset, the dataset return numpy type that we cast to int Todo need to be fixed in dataset
+def rotate_flip_transform(img, angle=180, vflip=False):
+    """Apply rotation and optional flip to an image.
+    Note:
+        Due to dataset memory bug, parameters are cast from numpy types.
+    Todo:
+        Fix dataset to avoid numpy type issue.
+    Args:
+        img: Input image tensor.
+        angle: Rotation angle in degrees. Defaults to 180.
+        vflip: Whether to apply vertical flip. Defaults to False.
+    Returns:
+        Transformed image tensor.
+    """
+    img = TF.rotate(img, angle=int(angle))
+    if bool(vflip):
+        img = TF.vflip(img)
+    return img
+class CropTransform:
+    """Crop images to a specified size.
+    Useful for working with databases containing samples larger than needed,
+    allowing on-the-fly cropping to the desired size.
+    Attributes:
+        width: Target width/size for square crop.
+    """
+    def __init__(self, width):
+        self.width = width
+    def __call__(self, inputs):
+        inputs = TF.center_crop(inputs, self.width)
+        return inputs
+    def __repr__(self):
+        return f'CropTransform(width: {self.width})'
+class RandomTransform:
+    """Randomly apply augmentation transformations to images.
+    Applies random combinations of rotation, flip, scale, shear, and blur,
+    then crops to the specified size. Useful for data augmentation during training.
+    Attributes:
+        width: Target width for final crop.
+        p_rot: Probability of applying rotation. Defaults to 0.50.
+        p_flip: Probability of applying vertical flip. Defaults to 0.50.
+        p_scale: Probability of applying scaling. Defaults to 0.2.
+        p_shear: Probability of applying shear. Defaults to 0.2.
+        p_blur: Probability of applying Gaussian blur. Defaults to 0.2.
+    """
+    def __init__(self, width, p_rot=0.50, p_flip=0.50, p_scale=0.2, p_shear= 0.2, p_blur= 0.2):
+        self.width = width
+        self.p_rot = p_rot
+        self.p_flip = p_flip
+        self.p_scale = p_scale
+        self.p_shear = p_shear
+        self.p_blur = p_blur
+    def __repr__(self):
+        return f'RandomTransform(width: {self.width}, ' \
+               f'p_rot: {self.p_rot} , ' \
+               f'p_flip: {self.p_flip} , ' \
+               f'p_scale: {self.p_scale} , ' \
+               f'p_shear: {self.p_shear} , ' \
+               f'p_blur: {self.p_blur})' \
+    # size need to be multiplied by to avoid dark pixel
+    # 1.4145
+    def __call__(self, inputs):
+        c, i_h, i_w = inputs.shape
+        rotation_angle = random.randint(-180, 180) if self.p_rot > random.uniform(0, 1) else 0
+        shear = random.randint(-15, 15) if self.p_shear > random.uniform(0, 1) else 0
+        scale = random.randint(2, 4) if self.p_scale > random.uniform(0, 1) else 1
+        flip = True if self.p_flip > random.uniform(0, 1) else False
+        blur = True if self.p_blur > random.uniform(0, 1) else False
+        # distance to the border to avoid black border du to rotation
+        safe_width = math.ceil(1.4143* self.width)
+        if i_h < safe_width:
+            logger.warning("Transformation: the width of the input is not big enough and may be truncated.")
+        #input = TF.rotate(input, rotation_angle, interpolation=TF.InterpolationMode.BILINEAR)
+        # affine
+        inputs = TF.affine(inputs, angle=rotation_angle, translate=[0,0], scale=scale, shear=shear,
+                          interpolation=TF.InterpolationMode.NEAREST)
+        inputs = TF.center_crop(inputs, self.width)
+        # flip
+        if flip:
+            inputs = TF.vflip(inputs)
+        # gaussian
+        if blur:
+            inputs = TF.gaussian_blur(inputs, kernel_size=[3, 3], sigma=[0.45, 0.45])
+        return inputs

eoml/torch/cnn/dataset_evaluator.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""Model evaluation utilities for PyTorch datasets.
+This module provides classes for evaluating trained neural network models on
+datasets, collecting predictions and reference values for analysis.
+"""
+import numpy as np
+import torch
+from tqdm import tqdm
+class DatasetEvaluator:
+    """Evaluate a neural network model on a dataset.
+    Runs inference on a complete dataset and collects predictions along with
+    reference labels for evaluation metrics.
+    Todo:
+        Implement aggressive/optimized version.
+    Attributes:
+        model: PyTorch model or path to JIT-compiled model.
+    """
+    def __init__(self, model):
+        if isinstance(model, str):
+            self.model = torch.jit.load(model)
+        else:
+            self.model = model
+    def evaluate(self, loader, device="cpu"):
+        """Evaluate model on dataset and collect predictions.
+        Args:
+            loader: PyTorch DataLoader providing test samples.
+            device: Device to run inference on ('cpu' or 'cuda'). Defaults to "cpu".
+        Returns:
+            tuple: (reference_labels, predictions) as numpy arrays.
+        """
+        # Make sure gradient tracking is off, and do a pass over the data
+        self.model.train(False)
+        results=[]
+        reference=[]
+        with torch.inference_mode():
+            with tqdm(total=len(loader),desc="Batch") as pbar:
+                for i, data in enumerate(loader):
+                    # Every data instance is an input + label pair
+                    inputs, labels = data
+                    if device is not None:
+                        if isinstance(inputs, (list, tuple)):
+                            inputs = map(lambda x: x.to(device, non_blocking=True), inputs)
+                        else:
+                            inputs = inputs.to(device, non_blocking=True)
+                    # Make predictions for this batch
+                    outputs = self.model(*inputs)
+                    results.extend(outputs.cpu())
+                    reference.extend(labels)
+        return np.array(reference), np.array(results)