openeo-gfmap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. openeo_gfmap/__init__.py +23 -0
  2. openeo_gfmap/backend.py +122 -0
  3. openeo_gfmap/features/__init__.py +17 -0
  4. openeo_gfmap/features/feature_extractor.py +389 -0
  5. openeo_gfmap/fetching/__init__.py +21 -0
  6. openeo_gfmap/fetching/commons.py +213 -0
  7. openeo_gfmap/fetching/fetching.py +98 -0
  8. openeo_gfmap/fetching/generic.py +165 -0
  9. openeo_gfmap/fetching/meteo.py +126 -0
  10. openeo_gfmap/fetching/s1.py +195 -0
  11. openeo_gfmap/fetching/s2.py +236 -0
  12. openeo_gfmap/inference/__init__.py +3 -0
  13. openeo_gfmap/inference/model_inference.py +347 -0
  14. openeo_gfmap/manager/__init__.py +31 -0
  15. openeo_gfmap/manager/job_manager.py +469 -0
  16. openeo_gfmap/manager/job_splitters.py +144 -0
  17. openeo_gfmap/metadata.py +24 -0
  18. openeo_gfmap/preprocessing/__init__.py +22 -0
  19. openeo_gfmap/preprocessing/cloudmasking.py +268 -0
  20. openeo_gfmap/preprocessing/compositing.py +74 -0
  21. openeo_gfmap/preprocessing/interpolation.py +12 -0
  22. openeo_gfmap/preprocessing/sar.py +64 -0
  23. openeo_gfmap/preprocessing/scaling.py +65 -0
  24. openeo_gfmap/preprocessing/udf_cldmask.py +36 -0
  25. openeo_gfmap/preprocessing/udf_rank.py +37 -0
  26. openeo_gfmap/preprocessing/udf_score.py +103 -0
  27. openeo_gfmap/spatial.py +53 -0
  28. openeo_gfmap/stac/__init__.py +2 -0
  29. openeo_gfmap/stac/constants.py +51 -0
  30. openeo_gfmap/temporal.py +22 -0
  31. openeo_gfmap/utils/__init__.py +23 -0
  32. openeo_gfmap/utils/build_df.py +48 -0
  33. openeo_gfmap/utils/catalogue.py +248 -0
  34. openeo_gfmap/utils/intervals.py +64 -0
  35. openeo_gfmap/utils/netcdf.py +25 -0
  36. openeo_gfmap/utils/tile_processing.py +64 -0
  37. openeo_gfmap-0.1.0.dist-info/METADATA +57 -0
  38. openeo_gfmap-0.1.0.dist-info/RECORD +40 -0
  39. openeo_gfmap-0.1.0.dist-info/WHEEL +4 -0
  40. openeo_gfmap-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,347 @@
1
+ """Inference functionalities. Such as a base class to assist the implementation
2
+ of inference models on an UDF.
3
+ """
4
+ import functools
5
+ import inspect
6
+ import logging
7
+ import re
8
+ import shutil
9
+ import sys
10
+ import urllib.request
11
+ from abc import ABC, abstractmethod
12
+ from pathlib import Path
13
+
14
+ import numpy as np
15
+ import openeo
16
+ import requests
17
+ import xarray as xr
18
+ from openeo.udf import XarrayDataCube
19
+ from openeo.udf import inspect as udf_inspect
20
+ from openeo.udf.udf_data import UdfData
21
+
22
+ sys.path.insert(0, "onnx_deps")
23
+ import onnxruntime as ort # noqa: E402
24
+
25
+ EPSG_HARMONIZED_NAME = "GEO-EPSG"
26
+
27
+
28
+ class ModelInference(ABC):
29
+ """Base class for all model inference UDFs. It provides some common
30
+ methods and attributes to be used by other model inference classes.
31
+ """
32
+
33
+ def __init__(self) -> None:
34
+ """
35
+ Initializes the PrestoFeatureExtractor object, starting a logger.
36
+ """
37
+ logging.basicConfig(level=logging.INFO)
38
+ self.logger = logging.getLogger(self.__class__.__name__)
39
+
40
+ @classmethod
41
+ @functools.lru_cache(maxsize=6)
42
+ def extract_dependencies(cls, base_url: str, dependency_name: str) -> str:
43
+ """Extract the dependencies from the given URL. Unpacking a zip
44
+ file in the current working directory and return the path to the
45
+ unpacked directory.
46
+
47
+ Parameters:
48
+ - base_url: The base public URL where the dependencies are stored.
49
+ - dependency_name: The name of the dependency file to download. This
50
+ parameter is added to `base_url` as a download path to the .zip
51
+ archive
52
+ Returns:
53
+ - The absolute path to the extracted dependencies directory, to be added
54
+ to the python path with the `sys.path.append` method.
55
+ """
56
+
57
+ # Generate absolute path for the dependencies folder
58
+ dependencies_dir = Path.cwd() / "dependencies"
59
+
60
+ # Create the directory if it doesn't exist
61
+ dependencies_dir.mkdir(exist_ok=True, parents=True)
62
+
63
+ # Download and extract the model file
64
+ modelfile_url = f"{base_url}/{dependency_name}"
65
+ modelfile, _ = urllib.request.urlretrieve(
66
+ modelfile_url, filename=dependencies_dir / Path(modelfile_url).name
67
+ )
68
+ shutil.unpack_archive(modelfile, extract_dir=dependencies_dir)
69
+
70
+ # Add the model directory to system path if it's not already there
71
+ abs_path = str(
72
+ dependencies_dir / Path(modelfile_url).name.split(".zip")[0]
73
+ ) # NOQA
74
+
75
+ return abs_path
76
+
77
+ @functools.lru_cache(maxsize=6)
78
+ def load_ort_session(self, model_url: str):
79
+ """Loads an onnx session from a publicly available URL. The URL must be a direct
80
+ download link to the ONNX session file.
81
+ The `lru_cache` decorator avoids loading multiple time the model within the same worker.
82
+ """
83
+ # Two minutes timeout to download the model
84
+ response = requests.get(model_url, timeout=120)
85
+ model = response.content
86
+
87
+ return ort.InferenceSession(model)
88
+
89
+ def apply_ml(
90
+ self, tensor: np.ndarray, session: ort.InferenceSession, input_name: str
91
+ ) -> np.ndarray:
92
+ """Applies the machine learning model to the input data as a tensor.
93
+
94
+ Parameters
95
+ ----------
96
+ tensor: np.ndarray
97
+ The input data with shape (bands, instance). If the input data is a tile (bands, y, x),
98
+ then the y, x dimension must be flattened before being applied in this function.
99
+ session: ort.InferenceSession
100
+ The ONNX Session object, loaded from the `load_ort_session` class method.
101
+ input_name: str
102
+ The name of the input tensor in the ONNX session. Depends on how is the ONNX serialized
103
+ model generated. For example, CatBoost models have their input tensor named as
104
+ features: https://catboost.ai/en/docs/concepts/apply-onnx-ml
105
+ """
106
+ return session.run(None, {input_name: tensor})[0]
107
+
108
+ def _common_preparations(
109
+ self, inarr: xr.DataArray, parameters: dict
110
+ ) -> xr.DataArray:
111
+ """Common preparations for all inference models. This method will be
112
+ executed at the very beginning of the process.
113
+ """
114
+ self._epsg = parameters.pop(EPSG_HARMONIZED_NAME)
115
+ self._parameters = parameters
116
+ return inarr
117
+
118
+ def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:
119
+ arr = cube.get_array().transpose("bands", "y", "x")
120
+ arr = self._common_preparations(arr, parameters)
121
+ arr = self.execute(arr).transpose("bands", "y", "x")
122
+ return XarrayDataCube(arr)
123
+
124
+ @property
125
+ def epsg(self) -> int:
126
+ """EPSG code of the input data."""
127
+ return self._epsg
128
+
129
+ def dependencies(self) -> list:
130
+ """Returns the additional dependencies such as wheels or zip files.
131
+ Dependencies should be returned as a list of string, which will set-up at the top of the
132
+ generated UDF. More information can be found at:
133
+ https://open-eo.github.io/openeo-python-client/udf.html#standard-for-declaring-python-udf-dependencies
134
+ """
135
+ self.logger.warning(
136
+ "Only onnx is defined as dependency. If you wish to add "
137
+ "dependencies to your model inference, override the "
138
+ "`dependencies` method in your class."
139
+ )
140
+ return ["onnxruntime"]
141
+
142
+ @abstractmethod
143
+ def output_labels(self) -> list:
144
+ """Returns the labels of the output data."""
145
+ raise NotImplementedError(
146
+ "ModelInference is a base abstract class, please implement the "
147
+ "output_labels property."
148
+ )
149
+
150
+ @abstractmethod
151
+ def execute(self, inarr: xr.DataArray) -> xr.DataArray:
152
+ """Executes the model inference."""
153
+ raise NotImplementedError(
154
+ "ModelInference is a base abstract class, please implement the "
155
+ "execute method."
156
+ )
157
+
158
+
159
+ class ONNXModelInference(ModelInference):
160
+ """Basic implementation of model inference that loads an ONNX model and runs the data
161
+ through it. The input data, as model inference classes, is expected to have ('bands', 'y', 'x')
162
+ as dimension orders, where 'bands' are the features that were computed the same way as for the
163
+ training data.
164
+
165
+ The following parameters are necessary:
166
+ - `model_url`: URL to download the ONNX model.
167
+ - `input_name`: Name of the input tensor in the ONNX model.
168
+ - `output_labels`: Labels of the output data.
169
+
170
+ """
171
+
172
+ def dependencies(self) -> list:
173
+ return [] # Disable dependencies
174
+
175
+ def output_labels(self) -> list:
176
+ return self._parameters["output_labels"]
177
+
178
+ def execute(self, inarr: xr.DataArray) -> xr.DataArray:
179
+ if self._parameters.get("model_url") is None:
180
+ raise ValueError("The model_url must be defined in the parameters.")
181
+
182
+ # Load the model and the input_name parameters
183
+ session = self.load_ort_session(self._parameters.get("model_url"))
184
+
185
+ input_name = self._parameters.get("input_name")
186
+ if input_name is None:
187
+ input_name = session.get_inputs()[0].name
188
+ udf_inspect(
189
+ message=f"Input name not defined. Using name of parameters from the model session: {input_name}.",
190
+ level="warning",
191
+ )
192
+
193
+ # Run the model inference on the input data
194
+ input_data = inarr.values.astype(np.float32)
195
+ n_bands, height, width = input_data.shape
196
+
197
+ # Flatten the x and y coordiantes into one
198
+ input_data = input_data.reshape(n_bands, -1).T
199
+
200
+ # Make the prediction
201
+ output = self.apply_ml(input_data, session, input_name)
202
+
203
+ output = output.reshape(len(self.output_labels()), height, width)
204
+
205
+ return xr.DataArray(
206
+ output,
207
+ dims=["bands", "y", "x"],
208
+ coords={"bands": self.output_labels(), "x": inarr.x, "y": inarr.y},
209
+ )
210
+
211
+
212
+ def apply_udf_data(udf_data: UdfData) -> XarrayDataCube:
213
+ model_inference_class = "<model_inference_class>"
214
+
215
+ model_inference = model_inference_class()
216
+
217
+ # User-defined, model inference class initialized here
218
+ cube = udf_data.datacube_list[0]
219
+ parameters = udf_data.user_context
220
+
221
+ proj = udf_data.proj
222
+ if proj is not None:
223
+ proj = proj.get("EPSG")
224
+
225
+ parameters[EPSG_HARMONIZED_NAME] = proj
226
+
227
+ cube = model_inference._execute(cube, parameters=parameters)
228
+
229
+ udf_data.datacube_list = [cube]
230
+
231
+ return udf_data
232
+
233
+
234
+ def _get_imports() -> str:
235
+ with open(__file__, "r", encoding="UTF-8") as f:
236
+ script_source = f.read()
237
+
238
+ lines = script_source.split("\n")
239
+
240
+ imports = []
241
+ static_globals = []
242
+
243
+ for line in lines:
244
+ if line.strip().startswith(
245
+ ("import ", "from ", "sys.path.insert(", "sys.path.append(")
246
+ ):
247
+ imports.append(line)
248
+ elif re.match("^[A-Z_0-9]+\s*=.*$", line):
249
+ static_globals.append(line)
250
+
251
+ return "\n".join(imports) + "\n\n" + "\n".join(static_globals)
252
+
253
+
254
+ def _get_apply_udf_data(model_inference: ModelInference) -> str:
255
+ source_lines = inspect.getsource(apply_udf_data)
256
+ source = "".join(source_lines)
257
+ # replace in the source function the `model_inference_class`
258
+ return source.replace('"<model_inference_class>"', model_inference.__name__)
259
+
260
+
261
+ def _generate_udf_code(
262
+ model_inference_class: ModelInference, dependencies: list
263
+ ) -> openeo.UDF:
264
+ """Generates the udf code by packing imports of this file, the necessary
265
+ superclass and subclasses as well as the user defined model inference
266
+ class and the apply_datacube function.
267
+ """
268
+
269
+ # UDF code that will be built here
270
+ udf_code = ""
271
+
272
+ assert issubclass(
273
+ model_inference_class, ModelInference
274
+ ), "The model inference class must be a subclass of ModelInference."
275
+
276
+ dependencies_code = ""
277
+ dependencies_code += "# /// script\n"
278
+ dependencies_code += "# dependencies = {}\n".format(
279
+ str(dependencies).replace("'", '"')
280
+ )
281
+ dependencies_code += "# ///\n"
282
+
283
+ udf_code += dependencies_code + "\n"
284
+ udf_code += _get_imports() + "\n\n"
285
+ udf_code += f"{inspect.getsource(ModelInference)}\n\n"
286
+ udf_code += f"{inspect.getsource(model_inference_class)}\n\n"
287
+ udf_code += _get_apply_udf_data(model_inference_class)
288
+ return udf_code
289
+
290
+
291
+ def apply_model_inference(
292
+ model_inference_class: ModelInference,
293
+ cube: openeo.rest.datacube.DataCube,
294
+ parameters: dict,
295
+ size: list,
296
+ overlap: list = [],
297
+ ) -> openeo.rest.datacube.DataCube:
298
+ """Applies an user-defined model inference on the cube by using the
299
+ `openeo.Cube.apply_neighborhood` method. The defined class as well as the
300
+ required subclasses will be packed into a generated UDF file that will be
301
+ executed.
302
+ """
303
+ model_inference = model_inference_class()
304
+ model_inference._parameters = parameters
305
+ output_labels = model_inference.output_labels()
306
+ dependencies = model_inference.dependencies()
307
+
308
+ udf_code = _generate_udf_code(model_inference_class, dependencies)
309
+
310
+ udf = openeo.UDF(code=udf_code, context=parameters)
311
+
312
+ cube = cube.apply_neighborhood(process=udf, size=size, overlap=overlap)
313
+ return cube.rename_labels(dimension="bands", target=output_labels)
314
+
315
+
316
+ def apply_model_inference_local(
317
+ model_inference_class: ModelInference, cube: xr.DataArray, parameters: dict
318
+ ) -> xr.DataArray:
319
+ """Applies and user-defined model inference, but locally. The
320
+ parameters are the same as in the `apply_model_inference` function,
321
+ excepts for the cube parameter which expects a `xarray.DataArray` instead of
322
+ a `openeo.rest.datacube.DataCube` object.
323
+ """
324
+ # Trying to get the local EPSG code
325
+ if EPSG_HARMONIZED_NAME not in parameters:
326
+ raise ValueError(
327
+ f"Please specify an EPSG code in the parameters with key: {EPSG_HARMONIZED_NAME} when "
328
+ f"running a Model Inference locally."
329
+ )
330
+
331
+ model_inference = model_inference_class()
332
+ output_labels = model_inference.output_labels()
333
+ dependencies = model_inference.dependencies()
334
+
335
+ if len(dependencies) > 0:
336
+ model_inference.logger.warning(
337
+ "Running UDFs locally with pip dependencies is not supported yet, "
338
+ "dependencies will not be installed."
339
+ )
340
+
341
+ cube = XarrayDataCube(cube)
342
+
343
+ return (
344
+ model_inference._execute(cube, parameters)
345
+ .get_array()
346
+ .assign_coords({"bands": output_labels})
347
+ )
@@ -0,0 +1,31 @@
1
+ """OpenEO GFMAP Manager submodule. Implements the logic of splitting the jobs into subjobs and
2
+ managing the subjobs.
3
+ """
4
+
5
+ import logging
6
+
7
+ _log = logging.getLogger(__name__)
8
+
9
+ _log.setLevel(logging.INFO)
10
+
11
+ stream_handler = logging.StreamHandler()
12
+ _log.addHandler(stream_handler)
13
+
14
+ formatter = logging.Formatter("%(asctime)s|%(name)s|%(levelname)s: %(message)s")
15
+ stream_handler.setFormatter(formatter)
16
+
17
+
18
+ # Exclude the other loggers from other libraries
19
+ class ManagerLoggerFilter(logging.Filter):
20
+ """Filter to only accept the OpenEO-GFMAP manager logs."""
21
+
22
+ def filter(self, record):
23
+ return record.name in [_log.name]
24
+
25
+
26
+ stream_handler.addFilter(ManagerLoggerFilter())
27
+
28
+
29
+ def set_log_level(level):
30
+ """Set the log level of the OpenEO-GFMAP manager logger."""
31
+ _log.setLevel(level)