eoml 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eoml/__init__.py +74 -0
- eoml/automation/__init__.py +7 -0
- eoml/automation/configuration.py +105 -0
- eoml/automation/dag.py +233 -0
- eoml/automation/experience.py +618 -0
- eoml/automation/tasks.py +825 -0
- eoml/bin/__init__.py +6 -0
- eoml/bin/clean_checkpoint.py +146 -0
- eoml/bin/land_cover_mapping_toml.py +435 -0
- eoml/bin/mosaic_images.py +137 -0
- eoml/data/__init__.py +7 -0
- eoml/data/basic_geo_data.py +214 -0
- eoml/data/dataset_utils.py +98 -0
- eoml/data/persistence/__init__.py +7 -0
- eoml/data/persistence/generic.py +253 -0
- eoml/data/persistence/lmdb.py +379 -0
- eoml/data/persistence/serializer.py +82 -0
- eoml/raster/__init__.py +7 -0
- eoml/raster/band.py +141 -0
- eoml/raster/dataset/__init__.py +6 -0
- eoml/raster/dataset/extractor.py +604 -0
- eoml/raster/raster_reader.py +602 -0
- eoml/raster/raster_utils.py +116 -0
- eoml/torch/__init__.py +7 -0
- eoml/torch/cnn/__init__.py +7 -0
- eoml/torch/cnn/augmentation.py +150 -0
- eoml/torch/cnn/dataset_evaluator.py +68 -0
- eoml/torch/cnn/db_dataset.py +605 -0
- eoml/torch/cnn/map_dataset.py +579 -0
- eoml/torch/cnn/map_dataset_const_mem.py +135 -0
- eoml/torch/cnn/outputs_transformer.py +130 -0
- eoml/torch/cnn/torch_utils.py +404 -0
- eoml/torch/cnn/training_dataset.py +241 -0
- eoml/torch/cnn/windows_dataset.py +120 -0
- eoml/torch/dataset/__init__.py +6 -0
- eoml/torch/dataset/shade_dataset_tester.py +46 -0
- eoml/torch/dataset/shade_tree_dataset_creators.py +537 -0
- eoml/torch/model_low_use.py +507 -0
- eoml/torch/models.py +282 -0
- eoml/torch/resnet.py +437 -0
- eoml/torch/sample_statistic.py +260 -0
- eoml/torch/trainer.py +782 -0
- eoml/torch/trainer_v2.py +253 -0
- eoml-0.9.0.dist-info/METADATA +93 -0
- eoml-0.9.0.dist-info/RECORD +47 -0
- eoml-0.9.0.dist-info/WHEEL +4 -0
- eoml-0.9.0.dist-info/entry_points.txt +3 -0
eoml/__init__.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Earth Observation Machine Learning (EOML) Package.
|
|
3
|
+
|
|
4
|
+
This package provides tools and utilities for processing Earth observation data
|
|
5
|
+
and machine learning workflows for remote sensing applications. It includes modules
|
|
6
|
+
for data processing, raster operations, neural network training, and automation.
|
|
7
|
+
|
|
8
|
+
The package is organized into the following main modules:
|
|
9
|
+
- automation: Task automation and experiment configuration
|
|
10
|
+
- data: Data structures and persistence utilities
|
|
11
|
+
- ee: Google Earth Engine integration
|
|
12
|
+
- raster: Raster data reading and processing
|
|
13
|
+
- torch: PyTorch-based machine learning models and training
|
|
14
|
+
- bin: Command-line utilities
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
default_read_profile = {'num_threads': 'all_cpus'} #'all_cpus'
|
|
18
|
+
|
|
19
|
+
default_write_profile = {'driver': 'GTiff',
|
|
20
|
+
'BIGTIFF': 'IF_SAFER',
|
|
21
|
+
'num_threads': 'all_cpus',
|
|
22
|
+
'tiled': True,
|
|
23
|
+
'blockxsize': 512,
|
|
24
|
+
'blockysize': 512,
|
|
25
|
+
'compress': 'zstd'}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_read_profile(**kargs):
|
|
29
|
+
"""
|
|
30
|
+
Get a default reasonable TIFF reader profile.
|
|
31
|
+
|
|
32
|
+
Returns a profile dictionary with default settings for reading GeoTIFF files
|
|
33
|
+
using rasterio. The default profile uses all available CPU threads for reading.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
**kargs: Additional keyword arguments to override default profile settings.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
dict: A profile dictionary suitable for use with rasterio.open().
|
|
40
|
+
|
|
41
|
+
Examples:
|
|
42
|
+
>>> profile = get_read_profile()
|
|
43
|
+
>>> profile = get_read_profile(num_threads=4)
|
|
44
|
+
"""
|
|
45
|
+
profile = default_read_profile.copy()
|
|
46
|
+
profile.update(kargs)
|
|
47
|
+
return profile
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_write_profile(**kargs):
|
|
51
|
+
"""
|
|
52
|
+
Get a default reasonable TIFF writer profile.
|
|
53
|
+
|
|
54
|
+
Returns a profile dictionary with default settings for writing GeoTIFF files
|
|
55
|
+
using rasterio. The default profile uses:
|
|
56
|
+
- GTiff driver
|
|
57
|
+
- ZSTD compression
|
|
58
|
+
- Tiled format with 512x512 blocks
|
|
59
|
+
- All available CPU threads
|
|
60
|
+
- BIGTIFF when safer
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
**kargs: Additional keyword arguments to override default profile settings.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
dict: A profile dictionary suitable for use with rasterio.open() in write mode.
|
|
67
|
+
|
|
68
|
+
Examples:
|
|
69
|
+
>>> profile = get_write_profile()
|
|
70
|
+
>>> profile = get_write_profile(compress='lzw', num_threads=4)
|
|
71
|
+
"""
|
|
72
|
+
profile = default_write_profile.copy()
|
|
73
|
+
profile.update(kargs)
|
|
74
|
+
return profile
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration management module for EOML automation.
|
|
3
|
+
|
|
4
|
+
This module provides system-wide configuration management for EOML experiments,
|
|
5
|
+
including paths to data directories, raster processing profiles, and neural
|
|
6
|
+
network device settings.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
import toml
|
|
12
|
+
from typing import Any, Dict, Union, Optional
|
|
13
|
+
|
|
14
|
+
from eoml import default_read_profile, default_write_profile
|
|
15
|
+
|
|
16
|
+
# Pydantic model for configuration (v2)
|
|
17
|
+
try:
|
|
18
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
19
|
+
except Exception: # pragma: no cover - optional import hint
|
|
20
|
+
BaseModel = object # type: ignore
|
|
21
|
+
Field = lambda *args, **kwargs: None # type: ignore
|
|
22
|
+
class AliasChoices: # type: ignore
|
|
23
|
+
def __init__(self, *args, **kwargs):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
SYSTEM_CONFIG = None
|
|
27
|
+
|
|
28
|
+
class SystemConfigModel(BaseModel):
|
|
29
|
+
"""
|
|
30
|
+
Pydantic representation of System configuration.
|
|
31
|
+
|
|
32
|
+
This class stores configuration settings for data directories, raster
|
|
33
|
+
processing profiles, and neural network execution parameters.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
data_dir (str): Base directory for data storage.
|
|
37
|
+
raster_dir (str): Directory containing raster files.
|
|
38
|
+
shade_dir (str): Directory for shade/canopy data.
|
|
39
|
+
land_cover_dir (str): Directory for land cover data.
|
|
40
|
+
raster_read_profile (dict): Rasterio profile for reading rasters.
|
|
41
|
+
raster_write_profile (dict): Rasterio profile for writing rasters.
|
|
42
|
+
device (str): Device for neural network execution ('cpu', 'cuda', etc.).
|
|
43
|
+
mapping_mode (int): Mode for map generation (0=CPU, 1=GPU with pinned memory, etc.).
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
data_dir: str
|
|
47
|
+
raster_dir: str
|
|
48
|
+
shade_dir: str
|
|
49
|
+
land_cover_dir: str
|
|
50
|
+
raster_read_profile: Dict[str, Any]
|
|
51
|
+
raster_write_profile: Dict[str, Any]
|
|
52
|
+
device: str
|
|
53
|
+
mapping_mode: int
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def load_toml(cls, path: str) -> "SystemConfigModel":
|
|
57
|
+
"""Load a configuration model from a TOML file."""
|
|
58
|
+
data = toml.load(path)
|
|
59
|
+
return cls(**data)
|
|
60
|
+
|
|
61
|
+
def set_as_global(self, name: str = "default", set_default: bool = True) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Set this configuration as a global system configuration.
|
|
64
|
+
|
|
65
|
+
This method stores the configuration in the global SYSTEM_CONFIG dictionary
|
|
66
|
+
and optionally updates the default raster read/write profiles.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
name (str, optional): Name to store this configuration under in the
|
|
70
|
+
SYSTEM_CONFIG dictionary. Defaults to "default".
|
|
71
|
+
set_default (bool, optional): If True, updates the global default_read_profile
|
|
72
|
+
and default_write_profile with values from this configuration.
|
|
73
|
+
Defaults to True.
|
|
74
|
+
|
|
75
|
+
Side Effects:
|
|
76
|
+
- Updates global SYSTEM_CONFIG dictionary
|
|
77
|
+
- Optionally updates global default_read_profile and default_write_profile
|
|
78
|
+
|
|
79
|
+
Examples:
|
|
80
|
+
>>> config = SystemConfigModel.load_toml("/path/to/config.toml")
|
|
81
|
+
>>> config.set_as_global("my_config")
|
|
82
|
+
"""
|
|
83
|
+
global SYSTEM_CONFIG
|
|
84
|
+
|
|
85
|
+
SYSTEM_CONFIG[name] = self
|
|
86
|
+
|
|
87
|
+
if set_default:
|
|
88
|
+
default_read_profile.update(self.raster_read_profile)
|
|
89
|
+
default_write_profile.update(self.raster_write_profile)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_config() -> Optional[SystemConfigModel]:
|
|
95
|
+
"""
|
|
96
|
+
Retrieve a stored system configuration by name.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
SystemConfig: The requested configuration object.
|
|
100
|
+
|
|
101
|
+
Examples:
|
|
102
|
+
>>> config = get_config()
|
|
103
|
+
"""
|
|
104
|
+
return SYSTEM_CONFIG
|
|
105
|
+
|
eoml/automation/dag.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Directed Acyclic Graph (DAG) module for task automation.
|
|
3
|
+
|
|
4
|
+
This module provides classes for creating and managing directed acyclic graphs
|
|
5
|
+
of tasks with dependencies. It enables workflow automation and task orchestration
|
|
6
|
+
for machine learning experiments.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TaskGraph:
|
|
11
|
+
"""
|
|
12
|
+
Container for a directed acyclic graph of tasks.
|
|
13
|
+
|
|
14
|
+
The TaskGraph manages a collection of tasks with dependencies, ensuring
|
|
15
|
+
unique task names and maintaining the execution order.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
task_list (list): Ordered list of tasks in the graph.
|
|
19
|
+
task_dic (dict): Dictionary mapping task names to task objects.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, task_list):
|
|
23
|
+
"""
|
|
24
|
+
Initialize a TaskGraph with a list of tasks.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
task_list (list): List of Task objects to include in the graph.
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
Exception: If duplicate task names are found.
|
|
31
|
+
"""
|
|
32
|
+
self.task_list = task_list
|
|
33
|
+
self.task_dic = self._make_task_dic(task_list)
|
|
34
|
+
|
|
35
|
+
for t in task_list:
|
|
36
|
+
t.task_graph = self
|
|
37
|
+
|
|
38
|
+
def _make_task_dic(self, task_list):
|
|
39
|
+
"""
|
|
40
|
+
Create a dictionary mapping task names to task objects.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
task_list (list): List of Task objects.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
dict: Dictionary with task names as keys and Task objects as values.
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
Exception: If duplicate task names are found.
|
|
50
|
+
"""
|
|
51
|
+
task_dic = {}
|
|
52
|
+
for task in task_list:
|
|
53
|
+
if task.name in task_dic:
|
|
54
|
+
raise Exception(f"Duplicate task with task id {task.name}, task id should be unique ")
|
|
55
|
+
|
|
56
|
+
task_dic[task.name] = task
|
|
57
|
+
|
|
58
|
+
return task_dic
|
|
59
|
+
|
|
60
|
+
def manage_outputs(self, task_id, outputs):
|
|
61
|
+
"""
|
|
62
|
+
Manage outputs of a task and propagate them to dependent tasks.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
task_id: Identifier of the task producing outputs.
|
|
66
|
+
outputs: Outputs produced by the task.
|
|
67
|
+
|
|
68
|
+
Note:
|
|
69
|
+
This method is a placeholder and needs implementation.
|
|
70
|
+
"""
|
|
71
|
+
# todo need function
|
|
72
|
+
index = self.task_list.index(task_id)
|
|
73
|
+
|
|
74
|
+
for n in range(index, len(self.task_list)):
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class Task:
|
|
79
|
+
"""
|
|
80
|
+
Base class representing a task in a task graph.
|
|
81
|
+
|
|
82
|
+
A task has a name, inputs, and can be executed as part of a workflow.
|
|
83
|
+
Tasks can depend on outputs from other tasks in the graph.
|
|
84
|
+
|
|
85
|
+
Attributes:
|
|
86
|
+
name (str): Unique identifier for the task.
|
|
87
|
+
inputs (list): List of TaskInput objects defining task inputs.
|
|
88
|
+
need (dict): Dictionary of dependencies on other tasks.
|
|
89
|
+
task_graph (TaskGraph): Reference to the containing task graph.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
def __init__(self, name, inputs):
|
|
93
|
+
"""
|
|
94
|
+
Initialize a Task.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
name (str): Unique identifier for the task.
|
|
98
|
+
inputs (list): List of TaskInput objects defining inputs.
|
|
99
|
+
"""
|
|
100
|
+
self.name = name
|
|
101
|
+
self.inputs = inputs
|
|
102
|
+
|
|
103
|
+
self.need = self._need()
|
|
104
|
+
|
|
105
|
+
self.task_graph: TaskGraph = None
|
|
106
|
+
|
|
107
|
+
def make_input(self):
|
|
108
|
+
"""
|
|
109
|
+
Resolve task inputs from the task graph.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
dict: Dictionary of resolved input values, with input names as keys.
|
|
113
|
+
"""
|
|
114
|
+
inputs_dic = {}
|
|
115
|
+
|
|
116
|
+
for task_input in self.inputs:
|
|
117
|
+
|
|
118
|
+
if task_input.depend:
|
|
119
|
+
inputs_dic[task_input.name] = self.task_graph.task_dic[task_input.value]
|
|
120
|
+
else:
|
|
121
|
+
inputs_dic[task_input.name] = task_input.value
|
|
122
|
+
|
|
123
|
+
return inputs_dic
|
|
124
|
+
|
|
125
|
+
def _need(self):
|
|
126
|
+
"""
|
|
127
|
+
Compute the dependencies the task needs from other tasks.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
dict: Dictionary mapping dependency names to lists of input names.
|
|
131
|
+
|
|
132
|
+
Note:
|
|
133
|
+
This method needs implementation (TODO noted in original code).
|
|
134
|
+
"""
|
|
135
|
+
# TODO
|
|
136
|
+
|
|
137
|
+
need = {}
|
|
138
|
+
for i in self.inputs:
|
|
139
|
+
if i.depend:
|
|
140
|
+
need.get(i.value, []).append(i.name)
|
|
141
|
+
|
|
142
|
+
return need
|
|
143
|
+
|
|
144
|
+
def execute(self):
|
|
145
|
+
"""
|
|
146
|
+
Execute the task by resolving inputs and calling process().
|
|
147
|
+
|
|
148
|
+
This method resolves all inputs from the task graph and passes them
|
|
149
|
+
to the process() method for execution.
|
|
150
|
+
"""
|
|
151
|
+
inputs = self.make_input()
|
|
152
|
+
self.process(**inputs)
|
|
153
|
+
|
|
154
|
+
def process(self, **inputs):
|
|
155
|
+
"""
|
|
156
|
+
Process the task with resolved inputs.
|
|
157
|
+
|
|
158
|
+
This is a placeholder method to be overridden by subclasses to implement
|
|
159
|
+
specific task logic.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
**inputs: Resolved input values as keyword arguments.
|
|
163
|
+
"""
|
|
164
|
+
pass
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class TaskInput:
|
|
168
|
+
"""
|
|
169
|
+
Represents an input to a task.
|
|
170
|
+
|
|
171
|
+
A TaskInput can either be a direct value or a dependency on another task's output.
|
|
172
|
+
|
|
173
|
+
Attributes:
|
|
174
|
+
name (str): Name of the input parameter.
|
|
175
|
+
value: The value or name of the dependency.
|
|
176
|
+
depend (bool): If True, value refers to another task name; if False, value is used directly.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
def __init__(self, name, value, depend=False):
|
|
180
|
+
"""
|
|
181
|
+
Initialize a TaskInput.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
name (str): Name of the input parameter.
|
|
185
|
+
value: The value or name of the dependency.
|
|
186
|
+
depend (bool, optional): Whether this input depends on another task. Defaults to False.
|
|
187
|
+
"""
|
|
188
|
+
self.name = name
|
|
189
|
+
self.value = value
|
|
190
|
+
self.depend = depend
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class Value(Task):
|
|
194
|
+
"""
|
|
195
|
+
A task that represents a constant value.
|
|
196
|
+
|
|
197
|
+
This is a simple task type that holds a value without any inputs or processing.
|
|
198
|
+
Useful for providing constant parameters to other tasks in the graph.
|
|
199
|
+
|
|
200
|
+
Attributes:
|
|
201
|
+
value: The constant value held by this task.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def __init__(self, name, value):
|
|
205
|
+
"""
|
|
206
|
+
Initialize a Value task.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
name (str): Unique identifier for the task.
|
|
210
|
+
value: The constant value to hold.
|
|
211
|
+
"""
|
|
212
|
+
super().__init__(name, [])
|
|
213
|
+
self.value = value
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class ReadProfile(Task):
|
|
217
|
+
"""
|
|
218
|
+
A task for reading a profile or configuration.
|
|
219
|
+
|
|
220
|
+
This task is intended to read and provide configuration profiles to other tasks.
|
|
221
|
+
|
|
222
|
+
Note:
|
|
223
|
+
This appears to be a placeholder class requiring implementation.
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
def __init__(self, name):
|
|
227
|
+
"""
|
|
228
|
+
Initialize a ReadProfile task.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
name (str): Unique identifier for the task.
|
|
232
|
+
"""
|
|
233
|
+
super().__init__(name, [])
|