eoml 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. eoml/__init__.py +74 -0
  2. eoml/automation/__init__.py +7 -0
  3. eoml/automation/configuration.py +105 -0
  4. eoml/automation/dag.py +233 -0
  5. eoml/automation/experience.py +618 -0
  6. eoml/automation/tasks.py +825 -0
  7. eoml/bin/__init__.py +6 -0
  8. eoml/bin/clean_checkpoint.py +146 -0
  9. eoml/bin/land_cover_mapping_toml.py +435 -0
  10. eoml/bin/mosaic_images.py +137 -0
  11. eoml/data/__init__.py +7 -0
  12. eoml/data/basic_geo_data.py +214 -0
  13. eoml/data/dataset_utils.py +98 -0
  14. eoml/data/persistence/__init__.py +7 -0
  15. eoml/data/persistence/generic.py +253 -0
  16. eoml/data/persistence/lmdb.py +379 -0
  17. eoml/data/persistence/serializer.py +82 -0
  18. eoml/raster/__init__.py +7 -0
  19. eoml/raster/band.py +141 -0
  20. eoml/raster/dataset/__init__.py +6 -0
  21. eoml/raster/dataset/extractor.py +604 -0
  22. eoml/raster/raster_reader.py +602 -0
  23. eoml/raster/raster_utils.py +116 -0
  24. eoml/torch/__init__.py +7 -0
  25. eoml/torch/cnn/__init__.py +7 -0
  26. eoml/torch/cnn/augmentation.py +150 -0
  27. eoml/torch/cnn/dataset_evaluator.py +68 -0
  28. eoml/torch/cnn/db_dataset.py +605 -0
  29. eoml/torch/cnn/map_dataset.py +579 -0
  30. eoml/torch/cnn/map_dataset_const_mem.py +135 -0
  31. eoml/torch/cnn/outputs_transformer.py +130 -0
  32. eoml/torch/cnn/torch_utils.py +404 -0
  33. eoml/torch/cnn/training_dataset.py +241 -0
  34. eoml/torch/cnn/windows_dataset.py +120 -0
  35. eoml/torch/dataset/__init__.py +6 -0
  36. eoml/torch/dataset/shade_dataset_tester.py +46 -0
  37. eoml/torch/dataset/shade_tree_dataset_creators.py +537 -0
  38. eoml/torch/model_low_use.py +507 -0
  39. eoml/torch/models.py +282 -0
  40. eoml/torch/resnet.py +437 -0
  41. eoml/torch/sample_statistic.py +260 -0
  42. eoml/torch/trainer.py +782 -0
  43. eoml/torch/trainer_v2.py +253 -0
  44. eoml-0.9.0.dist-info/METADATA +93 -0
  45. eoml-0.9.0.dist-info/RECORD +47 -0
  46. eoml-0.9.0.dist-info/WHEEL +4 -0
  47. eoml-0.9.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,82 @@
1
+ """MessagePack-based serializers for geospatial data and headers.
2
+
3
+ This module provides efficient binary serialization using MessagePack format
4
+ for geospatial dataset headers and numpy array data. MessagePack offers compact
5
+ representation and fast serialization/deserialization, making it ideal for
6
+ high-performance machine learning pipelines.
7
+
8
+ Geometries are serialized as WKT (Well-Known Text) strings for portability
9
+ and ease of reconstruction.
10
+ """
11
+
12
+ import msgpack
13
+ import numpy as np
14
+ import shapely
15
+ import shapely.wkt
16
+
17
+ from eoml.data.basic_geo_data import GeoDataHeader
18
+ from eoml.data.persistence.generic import GeoDataHeaderSerializer, GeoDataSerializer
19
+
20
+
21
+ class MsgpackGeoDataHeaderSerializer(GeoDataHeaderSerializer):
22
+ """MessagePack serializer for GeoDataHeader objects.
23
+
24
+ Serializes geospatial sample headers including geometry (as WKT),
25
+ source file information, and sample identifiers.
26
+ """
27
+ def decode_geodata_header(self, obj):
28
+ """
29
+ if the field __header__ is in the obj to deserialize, create a GeodataHeader from the dic
30
+ """
31
+ if '__header__' in obj:
32
+ obj = GeoDataHeader(obj["id"], shapely.wkt.loads(obj["geometry"]), obj["file_name"])
33
+ return obj
34
+
35
+ def encode_geodata_header(self, obj):
36
+ """
37
+ transform the header to a dictionary of known type and add a field to recognise the type
38
+ """
39
+ if isinstance(obj, GeoDataHeader):
40
+ return {'__header__': True,
41
+ "id": obj.idx,
42
+ "geometry": obj.geometry.wkt,
43
+ "file_name": obj.file_name.stem}
44
+ return obj
45
+
46
+ def serialize(self, header) -> bytes:
47
+ return msgpack.packb(header, default=self.encode_geodata_header, use_bin_type=True)
48
+
49
+ def deserialize(self, msg):
50
+ return msgpack.unpackb(msg, object_hook=self.decode_geodata_header, raw=False)
51
+
52
+
53
+ class MsgpackGeoDataSerializer(GeoDataSerializer):
54
+ """MessagePack serializer for numpy array data.
55
+
56
+ Efficiently serializes numpy arrays by storing dtype, shape, and raw bytes,
57
+ enabling fast reconstruction without data loss.
58
+ """
59
+ def encode_geodata(self, obj):
60
+ """
61
+ Assum the data are a numpy array. Save it to bytes adding info about type and shape to reconstruct it
62
+ """
63
+ if isinstance(obj, np.ndarray):
64
+ return {'__array__': True,
65
+ "dtype": obj.dtype.str,
66
+ "shape": obj.shape,
67
+ "data": obj.tobytes()}
68
+ return obj
69
+
70
+ def decode_geodata(self, obj):
71
+ """
72
+ Reconstruct numpy array
73
+ """
74
+ if '__array__' in obj:
75
+ obj = np.frombuffer(obj["data"], dtype=np.dtype(obj["dtype"])).reshape(obj["shape"])
76
+ return obj
77
+
78
+ def serialize(self, data)-> bytes:
79
+ return msgpack.packb(data, default=self.encode_geodata, use_bin_type=True)
80
+
81
+ def deserialize(self, msg):
82
+ return msgpack.unpackb(msg, object_hook=self.decode_geodata, raw=False)
@@ -0,0 +1,7 @@
1
+ """
2
+ Raster Module for EOML.
3
+
4
+ This module provides utilities for reading, processing, and extracting data
5
+ from raster files (GeoTIFFs). It includes raster readers, band management,
6
+ and dataset extraction tools for machine learning applications.
7
+ """
eoml/raster/band.py ADDED
@@ -0,0 +1,141 @@
1
+ """
2
+ Band management module for raster operations.
3
+
4
+ This module provides the Band class for managing raster band selections,
5
+ supporting both 0-indexed and 1-indexed band numbering systems used by
6
+ different rasterio operations.
7
+ """
8
+ from pathlib import Path
9
+
10
+ import rasterio
11
+
12
+
13
+ class Band:
14
+ """
15
+ Manage band numbers with support for both 0-indexed and 1-indexed access.
16
+
17
+ This class facilitates working with raster bands by maintaining both
18
+ 0-indexed (Python-style) and 1-indexed (rasterio-style) band lists.
19
+ This is particularly useful when interfacing with rasterio, which uses
20
+ 1-based indexing for band operations.
21
+
22
+ Attributes:
23
+ length (int): Number of bands in the selection.
24
+
25
+ Properties:
26
+ selected (list): 0-indexed list of band numbers (Python-style).
27
+ selected1 (list): 1-indexed list of band numbers (rasterio-style).
28
+
29
+ Examples:
30
+ >>> # Create a band selection for bands 0, 1, 2
31
+ >>> bands = Band([0, 1, 2])
32
+ >>> print(bands.selected) # [0, 1, 2]
33
+ >>> print(bands.selected1) # [1, 2, 3]
34
+ >>>
35
+ >>> # Create from a file
36
+ >>> bands = Band.from_file("path/to/raster.tif")
37
+ """
38
+
39
+ def __init__(self, selected):
40
+ """
41
+ Initialize a Band instance with a list of band numbers.
42
+
43
+ Args:
44
+ selected (list): List of 0-indexed band numbers to select.
45
+
46
+ Raises:
47
+ Exception: If selected is None.
48
+
49
+ Examples:
50
+ >>> bands = Band([0, 2, 4]) # Select bands 0, 2, and 4
51
+ """
52
+ if selected == None:
53
+ raise Exception("specify a band range")
54
+
55
+ self._selected = selected.copy()
56
+ self._selected1 = [b+1 for b in selected]
57
+ self.length = len(self.selected)
58
+
59
+ @classmethod
60
+ def from_file(cls, raster_path:Path):
61
+ """
62
+ Create a Band instance from a raster file, selecting all bands.
63
+
64
+ This class method reads a raster file and creates a Band object
65
+ containing all bands present in the file.
66
+
67
+ Args:
68
+ raster_path (str): Path to the raster file.
69
+
70
+ Returns:
71
+ Band: Band instance containing all bands from the raster.
72
+
73
+ Examples:
74
+ >>> bands = Band.from_file("/path/to/satellite_image.tif")
75
+ >>> print(len(bands)) # Number of bands in the file
76
+ """
77
+ with rasterio.open(raster_path) as src:
78
+ length = src.count
79
+
80
+ selected = list(range(length))
81
+
82
+ return cls(selected)
83
+
84
+ def remove(self, band):
85
+ """
86
+ Remove a band from the selection.
87
+
88
+ Args:
89
+ band (int): 0-indexed band number to remove.
90
+
91
+ Examples:
92
+ >>> bands = Band([0, 1, 2, 3])
93
+ >>> bands.remove(2) # Remove band 2
94
+ >>> print(bands.selected) # [0, 1, 3]
95
+ """
96
+ self._selected.remove(band)
97
+ self._selected1.remove(band+1)
98
+
99
+ def append(self, band):
100
+ """
101
+ Add a band to the selection.
102
+
103
+ Args:
104
+ band (int): 0-indexed band number to add.
105
+
106
+ Examples:
107
+ >>> bands = Band([0, 1])
108
+ >>> bands.append(2) # Add band 2
109
+ >>> print(bands.selected) # [0, 1, 2]
110
+ """
111
+ self._selected.append(band)
112
+ self._selected1.append(band+1)
113
+
114
+ @property
115
+ def selected(self):
116
+ """
117
+ Get the 0-indexed band list (Python-style).
118
+
119
+ Returns:
120
+ list: List of 0-indexed band numbers.
121
+ """
122
+ return self._selected
123
+
124
+ @property
125
+ def selected1(self):
126
+ """
127
+ Get the 1-indexed band list (rasterio-style).
128
+
129
+ Returns:
130
+ list: List of 1-indexed band numbers.
131
+ """
132
+ return self._selected1
133
+
134
+ def __len__(self):
135
+ """
136
+ Get the number of bands in the selection.
137
+
138
+ Returns:
139
+ int: Number of selected bands.
140
+ """
141
+ return self.length
@@ -0,0 +1,6 @@
1
+ """
2
+ Raster Dataset Submodule.
3
+
4
+ This submodule provides tools for extracting training datasets from raster
5
+ files, including efficient window extraction and multi-threaded processing.
6
+ """