eoml 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. eoml/__init__.py +74 -0
  2. eoml/automation/__init__.py +7 -0
  3. eoml/automation/configuration.py +105 -0
  4. eoml/automation/dag.py +233 -0
  5. eoml/automation/experience.py +618 -0
  6. eoml/automation/tasks.py +825 -0
  7. eoml/bin/__init__.py +6 -0
  8. eoml/bin/clean_checkpoint.py +146 -0
  9. eoml/bin/land_cover_mapping_toml.py +435 -0
  10. eoml/bin/mosaic_images.py +137 -0
  11. eoml/data/__init__.py +7 -0
  12. eoml/data/basic_geo_data.py +214 -0
  13. eoml/data/dataset_utils.py +98 -0
  14. eoml/data/persistence/__init__.py +7 -0
  15. eoml/data/persistence/generic.py +253 -0
  16. eoml/data/persistence/lmdb.py +379 -0
  17. eoml/data/persistence/serializer.py +82 -0
  18. eoml/raster/__init__.py +7 -0
  19. eoml/raster/band.py +141 -0
  20. eoml/raster/dataset/__init__.py +6 -0
  21. eoml/raster/dataset/extractor.py +604 -0
  22. eoml/raster/raster_reader.py +602 -0
  23. eoml/raster/raster_utils.py +116 -0
  24. eoml/torch/__init__.py +7 -0
  25. eoml/torch/cnn/__init__.py +7 -0
  26. eoml/torch/cnn/augmentation.py +150 -0
  27. eoml/torch/cnn/dataset_evaluator.py +68 -0
  28. eoml/torch/cnn/db_dataset.py +605 -0
  29. eoml/torch/cnn/map_dataset.py +579 -0
  30. eoml/torch/cnn/map_dataset_const_mem.py +135 -0
  31. eoml/torch/cnn/outputs_transformer.py +130 -0
  32. eoml/torch/cnn/torch_utils.py +404 -0
  33. eoml/torch/cnn/training_dataset.py +241 -0
  34. eoml/torch/cnn/windows_dataset.py +120 -0
  35. eoml/torch/dataset/__init__.py +6 -0
  36. eoml/torch/dataset/shade_dataset_tester.py +46 -0
  37. eoml/torch/dataset/shade_tree_dataset_creators.py +537 -0
  38. eoml/torch/model_low_use.py +507 -0
  39. eoml/torch/models.py +282 -0
  40. eoml/torch/resnet.py +437 -0
  41. eoml/torch/sample_statistic.py +260 -0
  42. eoml/torch/trainer.py +782 -0
  43. eoml/torch/trainer_v2.py +253 -0
  44. eoml-0.9.0.dist-info/METADATA +93 -0
  45. eoml-0.9.0.dist-info/RECORD +47 -0
  46. eoml-0.9.0.dist-info/WHEEL +4 -0
  47. eoml-0.9.0.dist-info/entry_points.txt +3 -0
eoml/__init__.py ADDED
@@ -0,0 +1,74 @@
1
+ """
2
+ Earth Observation Machine Learning (EOML) Package.
3
+
4
+ This package provides tools and utilities for processing Earth observation data
5
+ and machine learning workflows for remote sensing applications. It includes modules
6
+ for data processing, raster operations, neural network training, and automation.
7
+
8
+ The package is organized into the following main modules:
9
+ - automation: Task automation and experiment configuration
10
+ - data: Data structures and persistence utilities
11
+ - ee: Google Earth Engine integration
12
+ - raster: Raster data reading and processing
13
+ - torch: PyTorch-based machine learning models and training
14
+ - bin: Command-line utilities
15
+ """
16
+
17
+ default_read_profile = {'num_threads': 'all_cpus'} #'all_cpus'
18
+
19
+ default_write_profile = {'driver': 'GTiff',
20
+ 'BIGTIFF': 'IF_SAFER',
21
+ 'num_threads': 'all_cpus',
22
+ 'tiled': True,
23
+ 'blockxsize': 512,
24
+ 'blockysize': 512,
25
+ 'compress': 'zstd'}
26
+
27
+
28
+ def get_read_profile(**kargs):
29
+ """
30
+ Get a default reasonable TIFF reader profile.
31
+
32
+ Returns a profile dictionary with default settings for reading GeoTIFF files
33
+ using rasterio. The default profile uses all available CPU threads for reading.
34
+
35
+ Args:
36
+ **kargs: Additional keyword arguments to override default profile settings.
37
+
38
+ Returns:
39
+ dict: A profile dictionary suitable for use with rasterio.open().
40
+
41
+ Examples:
42
+ >>> profile = get_read_profile()
43
+ >>> profile = get_read_profile(num_threads=4)
44
+ """
45
+ profile = default_read_profile.copy()
46
+ profile.update(kargs)
47
+ return profile
48
+
49
+
50
+ def get_write_profile(**kargs):
51
+ """
52
+ Get a default reasonable TIFF writer profile.
53
+
54
+ Returns a profile dictionary with default settings for writing GeoTIFF files
55
+ using rasterio. The default profile uses:
56
+ - GTiff driver
57
+ - ZSTD compression
58
+ - Tiled format with 512x512 blocks
59
+ - All available CPU threads
60
+ - BIGTIFF when safer
61
+
62
+ Args:
63
+ **kargs: Additional keyword arguments to override default profile settings.
64
+
65
+ Returns:
66
+ dict: A profile dictionary suitable for use with rasterio.open() in write mode.
67
+
68
+ Examples:
69
+ >>> profile = get_write_profile()
70
+ >>> profile = get_write_profile(compress='lzw', num_threads=4)
71
+ """
72
+ profile = default_write_profile.copy()
73
+ profile.update(kargs)
74
+ return profile
@@ -0,0 +1,7 @@
1
+ """
2
+ Automation Module for EOML.
3
+
4
+ This module provides automation utilities for machine learning experiments
5
+ in Earth observation applications. It includes configuration management,
6
+ task scheduling, and experiment workflow orchestration.
7
+ """
@@ -0,0 +1,105 @@
1
+ """
2
+ Configuration management module for EOML automation.
3
+
4
+ This module provides system-wide configuration management for EOML experiments,
5
+ including paths to data directories, raster processing profiles, and neural
6
+ network device settings.
7
+ """
8
+
9
+ import os
10
+
11
+ import toml
12
+ from typing import Any, Dict, Union, Optional
13
+
14
+ from eoml import default_read_profile, default_write_profile
15
+
16
+ # Pydantic model for configuration (v2)
17
+ try:
18
+ from pydantic import BaseModel, Field, AliasChoices
19
+ except Exception: # pragma: no cover - optional import hint
20
+ BaseModel = object # type: ignore
21
+ Field = lambda *args, **kwargs: None # type: ignore
22
+ class AliasChoices: # type: ignore
23
+ def __init__(self, *args, **kwargs):
24
+ pass
25
+
26
+ SYSTEM_CONFIG = None
27
+
28
+ class SystemConfigModel(BaseModel):
29
+ """
30
+ Pydantic representation of System configuration.
31
+
32
+ This class stores configuration settings for data directories, raster
33
+ processing profiles, and neural network execution parameters.
34
+
35
+ Attributes:
36
+ data_dir (str): Base directory for data storage.
37
+ raster_dir (str): Directory containing raster files.
38
+ shade_dir (str): Directory for shade/canopy data.
39
+ land_cover_dir (str): Directory for land cover data.
40
+ raster_read_profile (dict): Rasterio profile for reading rasters.
41
+ raster_write_profile (dict): Rasterio profile for writing rasters.
42
+ device (str): Device for neural network execution ('cpu', 'cuda', etc.).
43
+ mapping_mode (int): Mode for map generation (0=CPU, 1=GPU with pinned memory, etc.).
44
+ """
45
+
46
+ data_dir: str
47
+ raster_dir: str
48
+ shade_dir: str
49
+ land_cover_dir: str
50
+ raster_read_profile: Dict[str, Any]
51
+ raster_write_profile: Dict[str, Any]
52
+ device: str
53
+ mapping_mode: int
54
+
55
+ @classmethod
56
+ def load_toml(cls, path: str) -> "SystemConfigModel":
57
+ """Load a configuration model from a TOML file."""
58
+ data = toml.load(path)
59
+ return cls(**data)
60
+
61
+ def set_as_global(self, name: str = "default", set_default: bool = True) -> None:
62
+ """
63
+ Set this configuration as a global system configuration.
64
+
65
+ This method stores the configuration in the global SYSTEM_CONFIG dictionary
66
+ and optionally updates the default raster read/write profiles.
67
+
68
+ Args:
69
+ name (str, optional): Name to store this configuration under in the
70
+ SYSTEM_CONFIG dictionary. Defaults to "default".
71
+ set_default (bool, optional): If True, updates the global default_read_profile
72
+ and default_write_profile with values from this configuration.
73
+ Defaults to True.
74
+
75
+ Side Effects:
76
+ - Updates global SYSTEM_CONFIG dictionary
77
+ - Optionally updates global default_read_profile and default_write_profile
78
+
79
+ Examples:
80
+ >>> config = SystemConfigModel.load_toml("/path/to/config.toml")
81
+ >>> config.set_as_global("my_config")
82
+ """
83
+ global SYSTEM_CONFIG
84
+
85
+ SYSTEM_CONFIG[name] = self
86
+
87
+ if set_default:
88
+ default_read_profile.update(self.raster_read_profile)
89
+ default_write_profile.update(self.raster_write_profile)
90
+
91
+
92
+
93
+
94
+ def get_config() -> Optional[SystemConfigModel]:
95
+ """
96
+ Retrieve a stored system configuration by name.
97
+
98
+ Returns:
99
+ SystemConfig: The requested configuration object.
100
+
101
+ Examples:
102
+ >>> config = get_config()
103
+ """
104
+ return SYSTEM_CONFIG
105
+
eoml/automation/dag.py ADDED
@@ -0,0 +1,233 @@
1
+ """
2
+ Directed Acyclic Graph (DAG) module for task automation.
3
+
4
+ This module provides classes for creating and managing directed acyclic graphs
5
+ of tasks with dependencies. It enables workflow automation and task orchestration
6
+ for machine learning experiments.
7
+ """
8
+
9
+
10
+ class TaskGraph:
11
+ """
12
+ Container for a directed acyclic graph of tasks.
13
+
14
+ The TaskGraph manages a collection of tasks with dependencies, ensuring
15
+ unique task names and maintaining the execution order.
16
+
17
+ Attributes:
18
+ task_list (list): Ordered list of tasks in the graph.
19
+ task_dic (dict): Dictionary mapping task names to task objects.
20
+ """
21
+
22
+ def __init__(self, task_list):
23
+ """
24
+ Initialize a TaskGraph with a list of tasks.
25
+
26
+ Args:
27
+ task_list (list): List of Task objects to include in the graph.
28
+
29
+ Raises:
30
+ Exception: If duplicate task names are found.
31
+ """
32
+ self.task_list = task_list
33
+ self.task_dic = self._make_task_dic(task_list)
34
+
35
+ for t in task_list:
36
+ t.task_graph = self
37
+
38
+ def _make_task_dic(self, task_list):
39
+ """
40
+ Create a dictionary mapping task names to task objects.
41
+
42
+ Args:
43
+ task_list (list): List of Task objects.
44
+
45
+ Returns:
46
+ dict: Dictionary with task names as keys and Task objects as values.
47
+
48
+ Raises:
49
+ Exception: If duplicate task names are found.
50
+ """
51
+ task_dic = {}
52
+ for task in task_list:
53
+ if task.name in task_dic:
54
+ raise Exception(f"Duplicate task with task id {task.name}, task id should be unique ")
55
+
56
+ task_dic[task.name] = task
57
+
58
+ return task_dic
59
+
60
+ def manage_outputs(self, task_id, outputs):
61
+ """
62
+ Manage outputs of a task and propagate them to dependent tasks.
63
+
64
+ Args:
65
+ task_id: Identifier of the task producing outputs.
66
+ outputs: Outputs produced by the task.
67
+
68
+ Note:
69
+ This method is a placeholder and needs implementation.
70
+ """
71
+ # todo need function
72
+ index = self.task_list.index(task_id)
73
+
74
+ for n in range(index, len(self.task_list)):
75
+ pass
76
+
77
+
78
+ class Task:
79
+ """
80
+ Base class representing a task in a task graph.
81
+
82
+ A task has a name, inputs, and can be executed as part of a workflow.
83
+ Tasks can depend on outputs from other tasks in the graph.
84
+
85
+ Attributes:
86
+ name (str): Unique identifier for the task.
87
+ inputs (list): List of TaskInput objects defining task inputs.
88
+ need (dict): Dictionary of dependencies on other tasks.
89
+ task_graph (TaskGraph): Reference to the containing task graph.
90
+ """
91
+
92
+ def __init__(self, name, inputs):
93
+ """
94
+ Initialize a Task.
95
+
96
+ Args:
97
+ name (str): Unique identifier for the task.
98
+ inputs (list): List of TaskInput objects defining inputs.
99
+ """
100
+ self.name = name
101
+ self.inputs = inputs
102
+
103
+ self.need = self._need()
104
+
105
+ self.task_graph: TaskGraph = None
106
+
107
+ def make_input(self):
108
+ """
109
+ Resolve task inputs from the task graph.
110
+
111
+ Returns:
112
+ dict: Dictionary of resolved input values, with input names as keys.
113
+ """
114
+ inputs_dic = {}
115
+
116
+ for task_input in self.inputs:
117
+
118
+ if task_input.depend:
119
+ inputs_dic[task_input.name] = self.task_graph.task_dic[task_input.value]
120
+ else:
121
+ inputs_dic[task_input.name] = task_input.value
122
+
123
+ return inputs_dic
124
+
125
+ def _need(self):
126
+ """
127
+ Compute the dependencies the task needs from other tasks.
128
+
129
+ Returns:
130
+ dict: Dictionary mapping dependency names to lists of input names.
131
+
132
+ Note:
133
+ This method needs implementation (TODO noted in original code).
134
+ """
135
+ # TODO
136
+
137
+ need = {}
138
+ for i in self.inputs:
139
+ if i.depend:
140
+ need.get(i.value, []).append(i.name)
141
+
142
+ return need
143
+
144
+ def execute(self):
145
+ """
146
+ Execute the task by resolving inputs and calling process().
147
+
148
+ This method resolves all inputs from the task graph and passes them
149
+ to the process() method for execution.
150
+ """
151
+ inputs = self.make_input()
152
+ self.process(**inputs)
153
+
154
+ def process(self, **inputs):
155
+ """
156
+ Process the task with resolved inputs.
157
+
158
+ This is a placeholder method to be overridden by subclasses to implement
159
+ specific task logic.
160
+
161
+ Args:
162
+ **inputs: Resolved input values as keyword arguments.
163
+ """
164
+ pass
165
+
166
+
167
+ class TaskInput:
168
+ """
169
+ Represents an input to a task.
170
+
171
+ A TaskInput can either be a direct value or a dependency on another task's output.
172
+
173
+ Attributes:
174
+ name (str): Name of the input parameter.
175
+ value: The value or name of the dependency.
176
+ depend (bool): If True, value refers to another task name; if False, value is used directly.
177
+ """
178
+
179
+ def __init__(self, name, value, depend=False):
180
+ """
181
+ Initialize a TaskInput.
182
+
183
+ Args:
184
+ name (str): Name of the input parameter.
185
+ value: The value or name of the dependency.
186
+ depend (bool, optional): Whether this input depends on another task. Defaults to False.
187
+ """
188
+ self.name = name
189
+ self.value = value
190
+ self.depend = depend
191
+
192
+
193
+ class Value(Task):
194
+ """
195
+ A task that represents a constant value.
196
+
197
+ This is a simple task type that holds a value without any inputs or processing.
198
+ Useful for providing constant parameters to other tasks in the graph.
199
+
200
+ Attributes:
201
+ value: The constant value held by this task.
202
+ """
203
+
204
+ def __init__(self, name, value):
205
+ """
206
+ Initialize a Value task.
207
+
208
+ Args:
209
+ name (str): Unique identifier for the task.
210
+ value: The constant value to hold.
211
+ """
212
+ super().__init__(name, [])
213
+ self.value = value
214
+
215
+
216
+ class ReadProfile(Task):
217
+ """
218
+ A task for reading a profile or configuration.
219
+
220
+ This task is intended to read and provide configuration profiles to other tasks.
221
+
222
+ Note:
223
+ This appears to be a placeholder class requiring implementation.
224
+ """
225
+
226
+ def __init__(self, name):
227
+ """
228
+ Initialize a ReadProfile task.
229
+
230
+ Args:
231
+ name (str): Unique identifier for the task.
232
+ """
233
+ super().__init__(name, [])