eotdl 2025.2.10__py3-none-any.whl → 2025.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. eotdl/__init__.py +1 -1
  2. eotdl/access/__init__.py +13 -3
  3. eotdl/access/download.py +47 -14
  4. eotdl/access/search.py +33 -5
  5. eotdl/access/sentinelhub/__init__.py +6 -2
  6. eotdl/access/sentinelhub/client.py +7 -6
  7. eotdl/access/sentinelhub/evalscripts.py +266 -0
  8. eotdl/access/sentinelhub/parameters.py +101 -23
  9. eotdl/access/sentinelhub/utils.py +54 -15
  10. eotdl/cli.py +2 -2
  11. eotdl/commands/datasets.py +28 -31
  12. eotdl/commands/models.py +27 -30
  13. eotdl/commands/stac.py +57 -0
  14. eotdl/curation/__init__.py +0 -8
  15. eotdl/curation/stac/__init__.py +1 -8
  16. eotdl/curation/stac/api.py +58 -0
  17. eotdl/curation/stac/stac.py +31 -341
  18. eotdl/datasets/__init__.py +2 -2
  19. eotdl/datasets/ingest.py +36 -161
  20. eotdl/datasets/retrieve.py +0 -9
  21. eotdl/datasets/stage.py +64 -0
  22. eotdl/files/__init__.py +0 -2
  23. eotdl/files/ingest.bck +178 -0
  24. eotdl/files/ingest.py +237 -166
  25. eotdl/{datasets → files}/metadata.py +16 -17
  26. eotdl/models/__init__.py +1 -1
  27. eotdl/models/ingest.py +35 -158
  28. eotdl/models/stage.py +63 -0
  29. eotdl/repos/APIRepo.py +1 -1
  30. eotdl/repos/DatasetsAPIRepo.py +56 -43
  31. eotdl/repos/FilesAPIRepo.py +260 -167
  32. eotdl/repos/ModelsAPIRepo.py +50 -42
  33. eotdl/repos/STACAPIRepo.py +40 -0
  34. eotdl/repos/__init__.py +1 -0
  35. eotdl/tools/time_utils.py +3 -3
  36. {eotdl-2025.2.10.dist-info → eotdl-2025.4.2.dist-info}/METADATA +1 -1
  37. eotdl-2025.4.2.dist-info/RECORD +66 -0
  38. eotdl/curation/stac/assets.py +0 -110
  39. eotdl/curation/stac/dataframe.py +0 -172
  40. eotdl/curation/stac/dataframe_bck.py +0 -253
  41. eotdl/curation/stac/dataframe_labeling.py +0 -63
  42. eotdl/curation/stac/extensions/__init__.py +0 -23
  43. eotdl/curation/stac/extensions/base.py +0 -30
  44. eotdl/curation/stac/extensions/dem.py +0 -18
  45. eotdl/curation/stac/extensions/eo.py +0 -117
  46. eotdl/curation/stac/extensions/label/__init__.py +0 -7
  47. eotdl/curation/stac/extensions/label/base.py +0 -136
  48. eotdl/curation/stac/extensions/label/image_name_labeler.py +0 -203
  49. eotdl/curation/stac/extensions/label/scaneo.py +0 -219
  50. eotdl/curation/stac/extensions/ml_dataset.py +0 -648
  51. eotdl/curation/stac/extensions/projection.py +0 -44
  52. eotdl/curation/stac/extensions/raster.py +0 -53
  53. eotdl/curation/stac/extensions/sar.py +0 -55
  54. eotdl/curation/stac/extent.py +0 -158
  55. eotdl/curation/stac/parsers.py +0 -61
  56. eotdl/datasets/download.py +0 -104
  57. eotdl/files/list_files.py +0 -13
  58. eotdl/models/metadata.py +0 -43
  59. eotdl-2025.2.10.dist-info/RECORD +0 -81
  60. {eotdl-2025.2.10.dist-info → eotdl-2025.4.2.dist-info}/WHEEL +0 -0
  61. {eotdl-2025.2.10.dist-info → eotdl-2025.4.2.dist-info}/entry_points.txt +0 -0
@@ -1,219 +0,0 @@
1
- """
2
- Module for the STAC label extension ScaneoLabeler object
3
- """
4
-
5
- import json
6
- from os.path import join, dirname, exists, splitext, basename, abspath
7
- from typing import List, Optional, Union
8
- from glob import glob
9
-
10
- import pystac
11
-
12
- from tqdm import tqdm
13
- from pystac.extensions.label import LabelExtension
14
-
15
- from .base import LabelExtensionObject
16
- from ...extent import get_unknow_extent
17
-
18
-
19
- class ScaneoLabeler(LabelExtensionObject):
20
- """
21
- STAC label extension ScaneoLabeler object in EOTDL
22
- """
23
- def __init__(self) -> None:
24
- super().__init__()
25
-
26
- def generate_stac_labels(
27
- self,
28
- catalog: Union[pystac.Catalog, str],
29
- root_folder: str,
30
- collection: Optional[Union[pystac.Collection, str]] = "source",
31
- label_description: Optional[str] = "Item label",
32
- label_type: Optional[str] = "vector",
33
- label_names: Optional[List[str]] = ["label"],
34
- **kwargs,
35
- ) -> None:
36
- """
37
- Generate a labels collection from a STAC dataframe.
38
- This class should be used when the items have been labeled using SCANEO, as is implemented
39
- taking into account the SCANEO labeling format.
40
-
41
- :param catalog: catalog to add the labels collection to
42
- :param root_folder: root folder where are the images and the labels as GeoJSON files, following the SCANEO labeling format
43
- :param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
44
- :param collection: collection to add the labels collection to
45
- :param label_description: label description
46
- :param label_type: label type
47
- :param label_names: list of label names
48
- :param kwargs: optional arguments
49
- :param kwargs.label_properties: list of label properties
50
- :param kwargs.label_methods: list of label methods
51
- """
52
- if isinstance(catalog, str):
53
- catalog = pystac.Catalog.from_file(catalog)
54
-
55
- # Add the labels collection to the catalog
56
- # If exists a source collection, get it extent
57
- source_collection = catalog.get_child(collection)
58
- if source_collection:
59
- extent = source_collection.extent
60
- source_items = source_collection.get_stac_objects(pystac.RelType.ITEM)
61
- else:
62
- if not collection:
63
- raise ValueError(
64
- "No source collection provided, please provide a source collection"
65
- )
66
- extent = get_unknow_extent()
67
-
68
- # Create the labels collection and add it to the catalog if it does not exist
69
- # If it exists, remove it
70
- collection = pystac.Collection(id="labels", description="Labels", extent=extent)
71
- if collection.id in [c.id for c in catalog.get_children()]:
72
- catalog.remove_child(collection.id)
73
- catalog.add_child(collection)
74
-
75
- # Get the GeoJSON files
76
- geojson_files = glob(join(root_folder, "*.geojson"))
77
- if not geojson_files:
78
- raise ValueError(
79
- "No GeoJSON files found in the root folder, please provide a root folder with the GeoJSON files"
80
- )
81
-
82
- # Get the label classes
83
- label_classes = self.get_label_classes(root_folder, geojson_files)
84
-
85
- # Generate the labels items
86
- for source_item in tqdm(source_items, desc="Generating labels collection..."):
87
- # Get the GeoJSON label of the item
88
- geojson_label = self.get_geojson_of_item(source_item, geojson_files)
89
- # Get the tasks from the GeoJSON label
90
- tasks = self.get_tasks_from_geojson(geojson_label) if geojson_label else None
91
- # Add the tasks to the kwargs
92
- kwargs["label_tasks"] = tasks
93
-
94
- # Create the label item
95
- label_item = self.add_extension_to_item(
96
- source_item,
97
- label_description=label_description,
98
- label_type=label_type,
99
- label_names=label_names,
100
- label_classes=label_classes,
101
- **kwargs,
102
- )
103
- # Add the self href to the label item, following the Best Practices Layout
104
- # https://github.com/radiantearth/stac-spec/blob/master/best-practices.md
105
- label_item.set_self_href(
106
- join(
107
- dirname(collection.get_self_href()),
108
- label_item.id,
109
- f"{label_item.id}.json",
110
- )
111
- )
112
- # Match the GeoJSON label with the label item, if exists
113
- if geojson_label and exists(geojson_label):
114
- self.add_geojson_to_item(label_item, geojson_label, label_type)
115
- # Add the item to the collection
116
- collection.add_item(label_item)
117
-
118
- # Add the extension to the collection
119
- self.add_extension_to_collection(
120
- collection,
121
- label_names=[label_names],
122
- label_classes=[label_classes],
123
- label_type=label_type,
124
- )
125
-
126
- # Validate and save the catalog
127
- # Before adding the geojson, we need to save the catalog
128
- # and then iterate over the items to add the geojson
129
- try:
130
- pystac.validation.validate(catalog)
131
- catalog.normalize_and_save(
132
- dirname(catalog.get_self_href()), pystac.CatalogType.SELF_CONTAINED
133
- )
134
- print("Success on labels generation!")
135
- except pystac.STACValidationError as e:
136
- raise pystac.STACError(f"Catalog validation error: {e}")
137
-
138
- def add_geojson_to_item(
139
- self, item: pystac.Item, geojson_path: str, label_type: str
140
- ) -> None:
141
- """
142
- Add a GeoJSON FeatureCollection to every label item, as recommended by the spec
143
- https://github.com/stac-extensions/label#assets
144
-
145
- :param collection: collection to add the labels collection to
146
- :param df: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
147
- :param label_type: label type
148
- """
149
- properties = {"roles": ["labels", f"labels-{label_type}"]}
150
-
151
- label_ext = LabelExtension.ext(item, add_if_missing=True)
152
- item.make_asset_hrefs_absolute()
153
- label_ext.add_geojson_labels(
154
- href=abspath(geojson_path), title="Label", properties=properties
155
- )
156
- item.make_asset_hrefs_relative()
157
-
158
- def get_label_classes(self, root_folder: str, geojsons: List[str]) -> List[str]:
159
- """
160
- Get the label classes from the labels.json file if exists, or from the GeoJSON files instead
161
- """
162
- label_classes = []
163
-
164
- labels_json = glob(join(root_folder, "labels.json"))
165
- if len(labels_json) > 0 and exists(labels_json[0]):
166
- labels_json = labels_json[0]
167
- with open(labels_json, "r", encoding="utf-8") as f:
168
- labels = json.load(f)
169
- for value in labels["labels"]:
170
- label_classes.append(value["name"]) if value[
171
- "name"
172
- ] not in label_classes else None
173
- else:
174
- for geojson in geojsons:
175
- with open(geojson, "r", encoding="utf-8") as f:
176
- labels = json.load(f)
177
- for value in labels["features"]:
178
- label_classes.append(value["properties"]["labels"]) if value[
179
- "properties"
180
- ]["labels"] not in label_classes else None
181
-
182
- return [label_classes]
183
-
184
- def get_geojson_of_item(self, item: pystac.Item, geojsons: List[str]) -> str:
185
- """
186
- Get the GeoJSON label of the item from a list of GeoJSON files
187
-
188
- :param item: item to get the GeoJSON label
189
- :param geojsons: list of GeoJSON files
190
-
191
- :return: path to the GeoJSON label of the item
192
- """
193
- item_id = item.id
194
- geojson_name = f"{item_id}_labels"
195
- # Get a dict with <geojson_filename>: <geojson_path>, as the geojson_filename
196
- # must match the item ID
197
- geojsons_dict = dict(
198
- zip([splitext(basename(geojson))[0] for geojson in geojsons], geojsons)
199
- )
200
- geojson_path = geojsons_dict.get(geojson_name)
201
-
202
- return geojson_path
203
-
204
- def get_tasks_from_geojson(self, geojson_path: str) -> List[str]:
205
- """
206
- Get the tasks from the GeoJSON label
207
-
208
- :param geojson_path: path to the GeoJSON label
209
-
210
- :return: list of tasks
211
- """
212
- with open(geojson_path, "r", encoding="utf-8") as f:
213
- geojson = json.load(f)
214
- tasks = []
215
- for feature in geojson["features"]:
216
- for task in feature["properties"]["tasks"]:
217
- tasks.append(task) if task not in tasks else None
218
-
219
- return tasks