openeo-gfmap 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openeo_gfmap/__init__.py +23 -0
- openeo_gfmap/backend.py +122 -0
- openeo_gfmap/features/__init__.py +17 -0
- openeo_gfmap/features/feature_extractor.py +389 -0
- openeo_gfmap/fetching/__init__.py +21 -0
- openeo_gfmap/fetching/commons.py +213 -0
- openeo_gfmap/fetching/fetching.py +98 -0
- openeo_gfmap/fetching/generic.py +165 -0
- openeo_gfmap/fetching/meteo.py +126 -0
- openeo_gfmap/fetching/s1.py +195 -0
- openeo_gfmap/fetching/s2.py +236 -0
- openeo_gfmap/inference/__init__.py +3 -0
- openeo_gfmap/inference/model_inference.py +347 -0
- openeo_gfmap/manager/__init__.py +31 -0
- openeo_gfmap/manager/job_manager.py +469 -0
- openeo_gfmap/manager/job_splitters.py +144 -0
- openeo_gfmap/metadata.py +24 -0
- openeo_gfmap/preprocessing/__init__.py +22 -0
- openeo_gfmap/preprocessing/cloudmasking.py +268 -0
- openeo_gfmap/preprocessing/compositing.py +74 -0
- openeo_gfmap/preprocessing/interpolation.py +12 -0
- openeo_gfmap/preprocessing/sar.py +64 -0
- openeo_gfmap/preprocessing/scaling.py +65 -0
- openeo_gfmap/preprocessing/udf_cldmask.py +36 -0
- openeo_gfmap/preprocessing/udf_rank.py +37 -0
- openeo_gfmap/preprocessing/udf_score.py +103 -0
- openeo_gfmap/spatial.py +53 -0
- openeo_gfmap/stac/__init__.py +2 -0
- openeo_gfmap/stac/constants.py +51 -0
- openeo_gfmap/temporal.py +22 -0
- openeo_gfmap/utils/__init__.py +23 -0
- openeo_gfmap/utils/build_df.py +48 -0
- openeo_gfmap/utils/catalogue.py +248 -0
- openeo_gfmap/utils/intervals.py +64 -0
- openeo_gfmap/utils/netcdf.py +25 -0
- openeo_gfmap/utils/tile_processing.py +64 -0
- openeo_gfmap-0.1.0.dist-info/METADATA +57 -0
- openeo_gfmap-0.1.0.dist-info/RECORD +40 -0
- openeo_gfmap-0.1.0.dist-info/WHEEL +4 -0
- openeo_gfmap-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,347 @@
|
|
1
|
+
"""Inference functionalities. Such as a base class to assist the implementation
|
2
|
+
of inference models on an UDF.
|
3
|
+
"""
|
4
|
+
import functools
|
5
|
+
import inspect
|
6
|
+
import logging
|
7
|
+
import re
|
8
|
+
import shutil
|
9
|
+
import sys
|
10
|
+
import urllib.request
|
11
|
+
from abc import ABC, abstractmethod
|
12
|
+
from pathlib import Path
|
13
|
+
|
14
|
+
import numpy as np
|
15
|
+
import openeo
|
16
|
+
import requests
|
17
|
+
import xarray as xr
|
18
|
+
from openeo.udf import XarrayDataCube
|
19
|
+
from openeo.udf import inspect as udf_inspect
|
20
|
+
from openeo.udf.udf_data import UdfData
|
21
|
+
|
22
|
+
sys.path.insert(0, "onnx_deps")
|
23
|
+
import onnxruntime as ort # noqa: E402
|
24
|
+
|
25
|
+
EPSG_HARMONIZED_NAME = "GEO-EPSG"
|
26
|
+
|
27
|
+
|
28
|
+
class ModelInference(ABC):
|
29
|
+
"""Base class for all model inference UDFs. It provides some common
|
30
|
+
methods and attributes to be used by other model inference classes.
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(self) -> None:
|
34
|
+
"""
|
35
|
+
Initializes the PrestoFeatureExtractor object, starting a logger.
|
36
|
+
"""
|
37
|
+
logging.basicConfig(level=logging.INFO)
|
38
|
+
self.logger = logging.getLogger(self.__class__.__name__)
|
39
|
+
|
40
|
+
@classmethod
|
41
|
+
@functools.lru_cache(maxsize=6)
|
42
|
+
def extract_dependencies(cls, base_url: str, dependency_name: str) -> str:
|
43
|
+
"""Extract the dependencies from the given URL. Unpacking a zip
|
44
|
+
file in the current working directory and return the path to the
|
45
|
+
unpacked directory.
|
46
|
+
|
47
|
+
Parameters:
|
48
|
+
- base_url: The base public URL where the dependencies are stored.
|
49
|
+
- dependency_name: The name of the dependency file to download. This
|
50
|
+
parameter is added to `base_url` as a download path to the .zip
|
51
|
+
archive
|
52
|
+
Returns:
|
53
|
+
- The absolute path to the extracted dependencies directory, to be added
|
54
|
+
to the python path with the `sys.path.append` method.
|
55
|
+
"""
|
56
|
+
|
57
|
+
# Generate absolute path for the dependencies folder
|
58
|
+
dependencies_dir = Path.cwd() / "dependencies"
|
59
|
+
|
60
|
+
# Create the directory if it doesn't exist
|
61
|
+
dependencies_dir.mkdir(exist_ok=True, parents=True)
|
62
|
+
|
63
|
+
# Download and extract the model file
|
64
|
+
modelfile_url = f"{base_url}/{dependency_name}"
|
65
|
+
modelfile, _ = urllib.request.urlretrieve(
|
66
|
+
modelfile_url, filename=dependencies_dir / Path(modelfile_url).name
|
67
|
+
)
|
68
|
+
shutil.unpack_archive(modelfile, extract_dir=dependencies_dir)
|
69
|
+
|
70
|
+
# Add the model directory to system path if it's not already there
|
71
|
+
abs_path = str(
|
72
|
+
dependencies_dir / Path(modelfile_url).name.split(".zip")[0]
|
73
|
+
) # NOQA
|
74
|
+
|
75
|
+
return abs_path
|
76
|
+
|
77
|
+
@functools.lru_cache(maxsize=6)
|
78
|
+
def load_ort_session(self, model_url: str):
|
79
|
+
"""Loads an onnx session from a publicly available URL. The URL must be a direct
|
80
|
+
download link to the ONNX session file.
|
81
|
+
The `lru_cache` decorator avoids loading multiple time the model within the same worker.
|
82
|
+
"""
|
83
|
+
# Two minutes timeout to download the model
|
84
|
+
response = requests.get(model_url, timeout=120)
|
85
|
+
model = response.content
|
86
|
+
|
87
|
+
return ort.InferenceSession(model)
|
88
|
+
|
89
|
+
def apply_ml(
|
90
|
+
self, tensor: np.ndarray, session: ort.InferenceSession, input_name: str
|
91
|
+
) -> np.ndarray:
|
92
|
+
"""Applies the machine learning model to the input data as a tensor.
|
93
|
+
|
94
|
+
Parameters
|
95
|
+
----------
|
96
|
+
tensor: np.ndarray
|
97
|
+
The input data with shape (bands, instance). If the input data is a tile (bands, y, x),
|
98
|
+
then the y, x dimension must be flattened before being applied in this function.
|
99
|
+
session: ort.InferenceSession
|
100
|
+
The ONNX Session object, loaded from the `load_ort_session` class method.
|
101
|
+
input_name: str
|
102
|
+
The name of the input tensor in the ONNX session. Depends on how is the ONNX serialized
|
103
|
+
model generated. For example, CatBoost models have their input tensor named as
|
104
|
+
features: https://catboost.ai/en/docs/concepts/apply-onnx-ml
|
105
|
+
"""
|
106
|
+
return session.run(None, {input_name: tensor})[0]
|
107
|
+
|
108
|
+
def _common_preparations(
|
109
|
+
self, inarr: xr.DataArray, parameters: dict
|
110
|
+
) -> xr.DataArray:
|
111
|
+
"""Common preparations for all inference models. This method will be
|
112
|
+
executed at the very beginning of the process.
|
113
|
+
"""
|
114
|
+
self._epsg = parameters.pop(EPSG_HARMONIZED_NAME)
|
115
|
+
self._parameters = parameters
|
116
|
+
return inarr
|
117
|
+
|
118
|
+
def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:
|
119
|
+
arr = cube.get_array().transpose("bands", "y", "x")
|
120
|
+
arr = self._common_preparations(arr, parameters)
|
121
|
+
arr = self.execute(arr).transpose("bands", "y", "x")
|
122
|
+
return XarrayDataCube(arr)
|
123
|
+
|
124
|
+
@property
|
125
|
+
def epsg(self) -> int:
|
126
|
+
"""EPSG code of the input data."""
|
127
|
+
return self._epsg
|
128
|
+
|
129
|
+
def dependencies(self) -> list:
|
130
|
+
"""Returns the additional dependencies such as wheels or zip files.
|
131
|
+
Dependencies should be returned as a list of string, which will set-up at the top of the
|
132
|
+
generated UDF. More information can be found at:
|
133
|
+
https://open-eo.github.io/openeo-python-client/udf.html#standard-for-declaring-python-udf-dependencies
|
134
|
+
"""
|
135
|
+
self.logger.warning(
|
136
|
+
"Only onnx is defined as dependency. If you wish to add "
|
137
|
+
"dependencies to your model inference, override the "
|
138
|
+
"`dependencies` method in your class."
|
139
|
+
)
|
140
|
+
return ["onnxruntime"]
|
141
|
+
|
142
|
+
@abstractmethod
|
143
|
+
def output_labels(self) -> list:
|
144
|
+
"""Returns the labels of the output data."""
|
145
|
+
raise NotImplementedError(
|
146
|
+
"ModelInference is a base abstract class, please implement the "
|
147
|
+
"output_labels property."
|
148
|
+
)
|
149
|
+
|
150
|
+
@abstractmethod
|
151
|
+
def execute(self, inarr: xr.DataArray) -> xr.DataArray:
|
152
|
+
"""Executes the model inference."""
|
153
|
+
raise NotImplementedError(
|
154
|
+
"ModelInference is a base abstract class, please implement the "
|
155
|
+
"execute method."
|
156
|
+
)
|
157
|
+
|
158
|
+
|
159
|
+
class ONNXModelInference(ModelInference):
|
160
|
+
"""Basic implementation of model inference that loads an ONNX model and runs the data
|
161
|
+
through it. The input data, as model inference classes, is expected to have ('bands', 'y', 'x')
|
162
|
+
as dimension orders, where 'bands' are the features that were computed the same way as for the
|
163
|
+
training data.
|
164
|
+
|
165
|
+
The following parameters are necessary:
|
166
|
+
- `model_url`: URL to download the ONNX model.
|
167
|
+
- `input_name`: Name of the input tensor in the ONNX model.
|
168
|
+
- `output_labels`: Labels of the output data.
|
169
|
+
|
170
|
+
"""
|
171
|
+
|
172
|
+
def dependencies(self) -> list:
|
173
|
+
return [] # Disable dependencies
|
174
|
+
|
175
|
+
def output_labels(self) -> list:
|
176
|
+
return self._parameters["output_labels"]
|
177
|
+
|
178
|
+
def execute(self, inarr: xr.DataArray) -> xr.DataArray:
|
179
|
+
if self._parameters.get("model_url") is None:
|
180
|
+
raise ValueError("The model_url must be defined in the parameters.")
|
181
|
+
|
182
|
+
# Load the model and the input_name parameters
|
183
|
+
session = self.load_ort_session(self._parameters.get("model_url"))
|
184
|
+
|
185
|
+
input_name = self._parameters.get("input_name")
|
186
|
+
if input_name is None:
|
187
|
+
input_name = session.get_inputs()[0].name
|
188
|
+
udf_inspect(
|
189
|
+
message=f"Input name not defined. Using name of parameters from the model session: {input_name}.",
|
190
|
+
level="warning",
|
191
|
+
)
|
192
|
+
|
193
|
+
# Run the model inference on the input data
|
194
|
+
input_data = inarr.values.astype(np.float32)
|
195
|
+
n_bands, height, width = input_data.shape
|
196
|
+
|
197
|
+
# Flatten the x and y coordiantes into one
|
198
|
+
input_data = input_data.reshape(n_bands, -1).T
|
199
|
+
|
200
|
+
# Make the prediction
|
201
|
+
output = self.apply_ml(input_data, session, input_name)
|
202
|
+
|
203
|
+
output = output.reshape(len(self.output_labels()), height, width)
|
204
|
+
|
205
|
+
return xr.DataArray(
|
206
|
+
output,
|
207
|
+
dims=["bands", "y", "x"],
|
208
|
+
coords={"bands": self.output_labels(), "x": inarr.x, "y": inarr.y},
|
209
|
+
)
|
210
|
+
|
211
|
+
|
212
|
+
def apply_udf_data(udf_data: UdfData) -> XarrayDataCube:
|
213
|
+
model_inference_class = "<model_inference_class>"
|
214
|
+
|
215
|
+
model_inference = model_inference_class()
|
216
|
+
|
217
|
+
# User-defined, model inference class initialized here
|
218
|
+
cube = udf_data.datacube_list[0]
|
219
|
+
parameters = udf_data.user_context
|
220
|
+
|
221
|
+
proj = udf_data.proj
|
222
|
+
if proj is not None:
|
223
|
+
proj = proj.get("EPSG")
|
224
|
+
|
225
|
+
parameters[EPSG_HARMONIZED_NAME] = proj
|
226
|
+
|
227
|
+
cube = model_inference._execute(cube, parameters=parameters)
|
228
|
+
|
229
|
+
udf_data.datacube_list = [cube]
|
230
|
+
|
231
|
+
return udf_data
|
232
|
+
|
233
|
+
|
234
|
+
def _get_imports() -> str:
|
235
|
+
with open(__file__, "r", encoding="UTF-8") as f:
|
236
|
+
script_source = f.read()
|
237
|
+
|
238
|
+
lines = script_source.split("\n")
|
239
|
+
|
240
|
+
imports = []
|
241
|
+
static_globals = []
|
242
|
+
|
243
|
+
for line in lines:
|
244
|
+
if line.strip().startswith(
|
245
|
+
("import ", "from ", "sys.path.insert(", "sys.path.append(")
|
246
|
+
):
|
247
|
+
imports.append(line)
|
248
|
+
elif re.match("^[A-Z_0-9]+\s*=.*$", line):
|
249
|
+
static_globals.append(line)
|
250
|
+
|
251
|
+
return "\n".join(imports) + "\n\n" + "\n".join(static_globals)
|
252
|
+
|
253
|
+
|
254
|
+
def _get_apply_udf_data(model_inference: ModelInference) -> str:
|
255
|
+
source_lines = inspect.getsource(apply_udf_data)
|
256
|
+
source = "".join(source_lines)
|
257
|
+
# replace in the source function the `model_inference_class`
|
258
|
+
return source.replace('"<model_inference_class>"', model_inference.__name__)
|
259
|
+
|
260
|
+
|
261
|
+
def _generate_udf_code(
|
262
|
+
model_inference_class: ModelInference, dependencies: list
|
263
|
+
) -> openeo.UDF:
|
264
|
+
"""Generates the udf code by packing imports of this file, the necessary
|
265
|
+
superclass and subclasses as well as the user defined model inference
|
266
|
+
class and the apply_datacube function.
|
267
|
+
"""
|
268
|
+
|
269
|
+
# UDF code that will be built here
|
270
|
+
udf_code = ""
|
271
|
+
|
272
|
+
assert issubclass(
|
273
|
+
model_inference_class, ModelInference
|
274
|
+
), "The model inference class must be a subclass of ModelInference."
|
275
|
+
|
276
|
+
dependencies_code = ""
|
277
|
+
dependencies_code += "# /// script\n"
|
278
|
+
dependencies_code += "# dependencies = {}\n".format(
|
279
|
+
str(dependencies).replace("'", '"')
|
280
|
+
)
|
281
|
+
dependencies_code += "# ///\n"
|
282
|
+
|
283
|
+
udf_code += dependencies_code + "\n"
|
284
|
+
udf_code += _get_imports() + "\n\n"
|
285
|
+
udf_code += f"{inspect.getsource(ModelInference)}\n\n"
|
286
|
+
udf_code += f"{inspect.getsource(model_inference_class)}\n\n"
|
287
|
+
udf_code += _get_apply_udf_data(model_inference_class)
|
288
|
+
return udf_code
|
289
|
+
|
290
|
+
|
291
|
+
def apply_model_inference(
|
292
|
+
model_inference_class: ModelInference,
|
293
|
+
cube: openeo.rest.datacube.DataCube,
|
294
|
+
parameters: dict,
|
295
|
+
size: list,
|
296
|
+
overlap: list = [],
|
297
|
+
) -> openeo.rest.datacube.DataCube:
|
298
|
+
"""Applies an user-defined model inference on the cube by using the
|
299
|
+
`openeo.Cube.apply_neighborhood` method. The defined class as well as the
|
300
|
+
required subclasses will be packed into a generated UDF file that will be
|
301
|
+
executed.
|
302
|
+
"""
|
303
|
+
model_inference = model_inference_class()
|
304
|
+
model_inference._parameters = parameters
|
305
|
+
output_labels = model_inference.output_labels()
|
306
|
+
dependencies = model_inference.dependencies()
|
307
|
+
|
308
|
+
udf_code = _generate_udf_code(model_inference_class, dependencies)
|
309
|
+
|
310
|
+
udf = openeo.UDF(code=udf_code, context=parameters)
|
311
|
+
|
312
|
+
cube = cube.apply_neighborhood(process=udf, size=size, overlap=overlap)
|
313
|
+
return cube.rename_labels(dimension="bands", target=output_labels)
|
314
|
+
|
315
|
+
|
316
|
+
def apply_model_inference_local(
|
317
|
+
model_inference_class: ModelInference, cube: xr.DataArray, parameters: dict
|
318
|
+
) -> xr.DataArray:
|
319
|
+
"""Applies and user-defined model inference, but locally. The
|
320
|
+
parameters are the same as in the `apply_model_inference` function,
|
321
|
+
excepts for the cube parameter which expects a `xarray.DataArray` instead of
|
322
|
+
a `openeo.rest.datacube.DataCube` object.
|
323
|
+
"""
|
324
|
+
# Trying to get the local EPSG code
|
325
|
+
if EPSG_HARMONIZED_NAME not in parameters:
|
326
|
+
raise ValueError(
|
327
|
+
f"Please specify an EPSG code in the parameters with key: {EPSG_HARMONIZED_NAME} when "
|
328
|
+
f"running a Model Inference locally."
|
329
|
+
)
|
330
|
+
|
331
|
+
model_inference = model_inference_class()
|
332
|
+
output_labels = model_inference.output_labels()
|
333
|
+
dependencies = model_inference.dependencies()
|
334
|
+
|
335
|
+
if len(dependencies) > 0:
|
336
|
+
model_inference.logger.warning(
|
337
|
+
"Running UDFs locally with pip dependencies is not supported yet, "
|
338
|
+
"dependencies will not be installed."
|
339
|
+
)
|
340
|
+
|
341
|
+
cube = XarrayDataCube(cube)
|
342
|
+
|
343
|
+
return (
|
344
|
+
model_inference._execute(cube, parameters)
|
345
|
+
.get_array()
|
346
|
+
.assign_coords({"bands": output_labels})
|
347
|
+
)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
"""OpenEO GFMAP Manager submodule. Implements the logic of splitting the jobs into subjobs and
|
2
|
+
managing the subjobs.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
|
7
|
+
_log = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
_log.setLevel(logging.INFO)
|
10
|
+
|
11
|
+
stream_handler = logging.StreamHandler()
|
12
|
+
_log.addHandler(stream_handler)
|
13
|
+
|
14
|
+
formatter = logging.Formatter("%(asctime)s|%(name)s|%(levelname)s: %(message)s")
|
15
|
+
stream_handler.setFormatter(formatter)
|
16
|
+
|
17
|
+
|
18
|
+
# Exclude the other loggers from other libraries
|
19
|
+
class ManagerLoggerFilter(logging.Filter):
|
20
|
+
"""Filter to only accept the OpenEO-GFMAP manager logs."""
|
21
|
+
|
22
|
+
def filter(self, record):
|
23
|
+
return record.name in [_log.name]
|
24
|
+
|
25
|
+
|
26
|
+
stream_handler.addFilter(ManagerLoggerFilter())
|
27
|
+
|
28
|
+
|
29
|
+
def set_log_level(level):
|
30
|
+
"""Set the log level of the OpenEO-GFMAP manager logger."""
|
31
|
+
_log.setLevel(level)
|