eotdl 2025.2.10__py3-none-any.whl → 2025.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/__init__.py +1 -1
- eotdl/access/__init__.py +13 -3
- eotdl/access/download.py +47 -14
- eotdl/access/search.py +33 -5
- eotdl/access/sentinelhub/__init__.py +6 -2
- eotdl/access/sentinelhub/client.py +7 -6
- eotdl/access/sentinelhub/evalscripts.py +266 -0
- eotdl/access/sentinelhub/parameters.py +101 -23
- eotdl/access/sentinelhub/utils.py +54 -15
- eotdl/cli.py +2 -2
- eotdl/commands/datasets.py +28 -31
- eotdl/commands/models.py +27 -30
- eotdl/commands/stac.py +57 -0
- eotdl/curation/__init__.py +0 -8
- eotdl/curation/stac/__init__.py +1 -8
- eotdl/curation/stac/api.py +58 -0
- eotdl/curation/stac/stac.py +31 -341
- eotdl/datasets/__init__.py +2 -2
- eotdl/datasets/ingest.py +36 -161
- eotdl/datasets/retrieve.py +0 -9
- eotdl/datasets/stage.py +64 -0
- eotdl/files/__init__.py +0 -2
- eotdl/files/ingest.bck +178 -0
- eotdl/files/ingest.py +237 -166
- eotdl/{datasets → files}/metadata.py +16 -17
- eotdl/models/__init__.py +1 -1
- eotdl/models/ingest.py +35 -158
- eotdl/models/stage.py +63 -0
- eotdl/repos/APIRepo.py +1 -1
- eotdl/repos/DatasetsAPIRepo.py +56 -43
- eotdl/repos/FilesAPIRepo.py +260 -167
- eotdl/repos/ModelsAPIRepo.py +50 -42
- eotdl/repos/STACAPIRepo.py +40 -0
- eotdl/repos/__init__.py +1 -0
- eotdl/tools/time_utils.py +3 -3
- {eotdl-2025.2.10.dist-info → eotdl-2025.4.2.dist-info}/METADATA +1 -1
- eotdl-2025.4.2.dist-info/RECORD +66 -0
- eotdl/curation/stac/assets.py +0 -110
- eotdl/curation/stac/dataframe.py +0 -172
- eotdl/curation/stac/dataframe_bck.py +0 -253
- eotdl/curation/stac/dataframe_labeling.py +0 -63
- eotdl/curation/stac/extensions/__init__.py +0 -23
- eotdl/curation/stac/extensions/base.py +0 -30
- eotdl/curation/stac/extensions/dem.py +0 -18
- eotdl/curation/stac/extensions/eo.py +0 -117
- eotdl/curation/stac/extensions/label/__init__.py +0 -7
- eotdl/curation/stac/extensions/label/base.py +0 -136
- eotdl/curation/stac/extensions/label/image_name_labeler.py +0 -203
- eotdl/curation/stac/extensions/label/scaneo.py +0 -219
- eotdl/curation/stac/extensions/ml_dataset.py +0 -648
- eotdl/curation/stac/extensions/projection.py +0 -44
- eotdl/curation/stac/extensions/raster.py +0 -53
- eotdl/curation/stac/extensions/sar.py +0 -55
- eotdl/curation/stac/extent.py +0 -158
- eotdl/curation/stac/parsers.py +0 -61
- eotdl/datasets/download.py +0 -104
- eotdl/files/list_files.py +0 -13
- eotdl/models/metadata.py +0 -43
- eotdl-2025.2.10.dist-info/RECORD +0 -81
- {eotdl-2025.2.10.dist-info → eotdl-2025.4.2.dist-info}/WHEEL +0 -0
- {eotdl-2025.2.10.dist-info → eotdl-2025.4.2.dist-info}/entry_points.txt +0 -0
@@ -1,219 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for the STAC label extension ScaneoLabeler object
|
3
|
-
"""
|
4
|
-
|
5
|
-
import json
|
6
|
-
from os.path import join, dirname, exists, splitext, basename, abspath
|
7
|
-
from typing import List, Optional, Union
|
8
|
-
from glob import glob
|
9
|
-
|
10
|
-
import pystac
|
11
|
-
|
12
|
-
from tqdm import tqdm
|
13
|
-
from pystac.extensions.label import LabelExtension
|
14
|
-
|
15
|
-
from .base import LabelExtensionObject
|
16
|
-
from ...extent import get_unknow_extent
|
17
|
-
|
18
|
-
|
19
|
-
class ScaneoLabeler(LabelExtensionObject):
|
20
|
-
"""
|
21
|
-
STAC label extension ScaneoLabeler object in EOTDL
|
22
|
-
"""
|
23
|
-
def __init__(self) -> None:
|
24
|
-
super().__init__()
|
25
|
-
|
26
|
-
def generate_stac_labels(
|
27
|
-
self,
|
28
|
-
catalog: Union[pystac.Catalog, str],
|
29
|
-
root_folder: str,
|
30
|
-
collection: Optional[Union[pystac.Collection, str]] = "source",
|
31
|
-
label_description: Optional[str] = "Item label",
|
32
|
-
label_type: Optional[str] = "vector",
|
33
|
-
label_names: Optional[List[str]] = ["label"],
|
34
|
-
**kwargs,
|
35
|
-
) -> None:
|
36
|
-
"""
|
37
|
-
Generate a labels collection from a STAC dataframe.
|
38
|
-
This class should be used when the items have been labeled using SCANEO, as is implemented
|
39
|
-
taking into account the SCANEO labeling format.
|
40
|
-
|
41
|
-
:param catalog: catalog to add the labels collection to
|
42
|
-
:param root_folder: root folder where are the images and the labels as GeoJSON files, following the SCANEO labeling format
|
43
|
-
:param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
44
|
-
:param collection: collection to add the labels collection to
|
45
|
-
:param label_description: label description
|
46
|
-
:param label_type: label type
|
47
|
-
:param label_names: list of label names
|
48
|
-
:param kwargs: optional arguments
|
49
|
-
:param kwargs.label_properties: list of label properties
|
50
|
-
:param kwargs.label_methods: list of label methods
|
51
|
-
"""
|
52
|
-
if isinstance(catalog, str):
|
53
|
-
catalog = pystac.Catalog.from_file(catalog)
|
54
|
-
|
55
|
-
# Add the labels collection to the catalog
|
56
|
-
# If exists a source collection, get it extent
|
57
|
-
source_collection = catalog.get_child(collection)
|
58
|
-
if source_collection:
|
59
|
-
extent = source_collection.extent
|
60
|
-
source_items = source_collection.get_stac_objects(pystac.RelType.ITEM)
|
61
|
-
else:
|
62
|
-
if not collection:
|
63
|
-
raise ValueError(
|
64
|
-
"No source collection provided, please provide a source collection"
|
65
|
-
)
|
66
|
-
extent = get_unknow_extent()
|
67
|
-
|
68
|
-
# Create the labels collection and add it to the catalog if it does not exist
|
69
|
-
# If it exists, remove it
|
70
|
-
collection = pystac.Collection(id="labels", description="Labels", extent=extent)
|
71
|
-
if collection.id in [c.id for c in catalog.get_children()]:
|
72
|
-
catalog.remove_child(collection.id)
|
73
|
-
catalog.add_child(collection)
|
74
|
-
|
75
|
-
# Get the GeoJSON files
|
76
|
-
geojson_files = glob(join(root_folder, "*.geojson"))
|
77
|
-
if not geojson_files:
|
78
|
-
raise ValueError(
|
79
|
-
"No GeoJSON files found in the root folder, please provide a root folder with the GeoJSON files"
|
80
|
-
)
|
81
|
-
|
82
|
-
# Get the label classes
|
83
|
-
label_classes = self.get_label_classes(root_folder, geojson_files)
|
84
|
-
|
85
|
-
# Generate the labels items
|
86
|
-
for source_item in tqdm(source_items, desc="Generating labels collection..."):
|
87
|
-
# Get the GeoJSON label of the item
|
88
|
-
geojson_label = self.get_geojson_of_item(source_item, geojson_files)
|
89
|
-
# Get the tasks from the GeoJSON label
|
90
|
-
tasks = self.get_tasks_from_geojson(geojson_label) if geojson_label else None
|
91
|
-
# Add the tasks to the kwargs
|
92
|
-
kwargs["label_tasks"] = tasks
|
93
|
-
|
94
|
-
# Create the label item
|
95
|
-
label_item = self.add_extension_to_item(
|
96
|
-
source_item,
|
97
|
-
label_description=label_description,
|
98
|
-
label_type=label_type,
|
99
|
-
label_names=label_names,
|
100
|
-
label_classes=label_classes,
|
101
|
-
**kwargs,
|
102
|
-
)
|
103
|
-
# Add the self href to the label item, following the Best Practices Layout
|
104
|
-
# https://github.com/radiantearth/stac-spec/blob/master/best-practices.md
|
105
|
-
label_item.set_self_href(
|
106
|
-
join(
|
107
|
-
dirname(collection.get_self_href()),
|
108
|
-
label_item.id,
|
109
|
-
f"{label_item.id}.json",
|
110
|
-
)
|
111
|
-
)
|
112
|
-
# Match the GeoJSON label with the label item, if exists
|
113
|
-
if geojson_label and exists(geojson_label):
|
114
|
-
self.add_geojson_to_item(label_item, geojson_label, label_type)
|
115
|
-
# Add the item to the collection
|
116
|
-
collection.add_item(label_item)
|
117
|
-
|
118
|
-
# Add the extension to the collection
|
119
|
-
self.add_extension_to_collection(
|
120
|
-
collection,
|
121
|
-
label_names=[label_names],
|
122
|
-
label_classes=[label_classes],
|
123
|
-
label_type=label_type,
|
124
|
-
)
|
125
|
-
|
126
|
-
# Validate and save the catalog
|
127
|
-
# Before adding the geojson, we need to save the catalog
|
128
|
-
# and then iterate over the items to add the geojson
|
129
|
-
try:
|
130
|
-
pystac.validation.validate(catalog)
|
131
|
-
catalog.normalize_and_save(
|
132
|
-
dirname(catalog.get_self_href()), pystac.CatalogType.SELF_CONTAINED
|
133
|
-
)
|
134
|
-
print("Success on labels generation!")
|
135
|
-
except pystac.STACValidationError as e:
|
136
|
-
raise pystac.STACError(f"Catalog validation error: {e}")
|
137
|
-
|
138
|
-
def add_geojson_to_item(
|
139
|
-
self, item: pystac.Item, geojson_path: str, label_type: str
|
140
|
-
) -> None:
|
141
|
-
"""
|
142
|
-
Add a GeoJSON FeatureCollection to every label item, as recommended by the spec
|
143
|
-
https://github.com/stac-extensions/label#assets
|
144
|
-
|
145
|
-
:param collection: collection to add the labels collection to
|
146
|
-
:param df: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
147
|
-
:param label_type: label type
|
148
|
-
"""
|
149
|
-
properties = {"roles": ["labels", f"labels-{label_type}"]}
|
150
|
-
|
151
|
-
label_ext = LabelExtension.ext(item, add_if_missing=True)
|
152
|
-
item.make_asset_hrefs_absolute()
|
153
|
-
label_ext.add_geojson_labels(
|
154
|
-
href=abspath(geojson_path), title="Label", properties=properties
|
155
|
-
)
|
156
|
-
item.make_asset_hrefs_relative()
|
157
|
-
|
158
|
-
def get_label_classes(self, root_folder: str, geojsons: List[str]) -> List[str]:
|
159
|
-
"""
|
160
|
-
Get the label classes from the labels.json file if exists, or from the GeoJSON files instead
|
161
|
-
"""
|
162
|
-
label_classes = []
|
163
|
-
|
164
|
-
labels_json = glob(join(root_folder, "labels.json"))
|
165
|
-
if len(labels_json) > 0 and exists(labels_json[0]):
|
166
|
-
labels_json = labels_json[0]
|
167
|
-
with open(labels_json, "r", encoding="utf-8") as f:
|
168
|
-
labels = json.load(f)
|
169
|
-
for value in labels["labels"]:
|
170
|
-
label_classes.append(value["name"]) if value[
|
171
|
-
"name"
|
172
|
-
] not in label_classes else None
|
173
|
-
else:
|
174
|
-
for geojson in geojsons:
|
175
|
-
with open(geojson, "r", encoding="utf-8") as f:
|
176
|
-
labels = json.load(f)
|
177
|
-
for value in labels["features"]:
|
178
|
-
label_classes.append(value["properties"]["labels"]) if value[
|
179
|
-
"properties"
|
180
|
-
]["labels"] not in label_classes else None
|
181
|
-
|
182
|
-
return [label_classes]
|
183
|
-
|
184
|
-
def get_geojson_of_item(self, item: pystac.Item, geojsons: List[str]) -> str:
|
185
|
-
"""
|
186
|
-
Get the GeoJSON label of the item from a list of GeoJSON files
|
187
|
-
|
188
|
-
:param item: item to get the GeoJSON label
|
189
|
-
:param geojsons: list of GeoJSON files
|
190
|
-
|
191
|
-
:return: path to the GeoJSON label of the item
|
192
|
-
"""
|
193
|
-
item_id = item.id
|
194
|
-
geojson_name = f"{item_id}_labels"
|
195
|
-
# Get a dict with <geojson_filename>: <geojson_path>, as the geojson_filename
|
196
|
-
# must match the item ID
|
197
|
-
geojsons_dict = dict(
|
198
|
-
zip([splitext(basename(geojson))[0] for geojson in geojsons], geojsons)
|
199
|
-
)
|
200
|
-
geojson_path = geojsons_dict.get(geojson_name)
|
201
|
-
|
202
|
-
return geojson_path
|
203
|
-
|
204
|
-
def get_tasks_from_geojson(self, geojson_path: str) -> List[str]:
|
205
|
-
"""
|
206
|
-
Get the tasks from the GeoJSON label
|
207
|
-
|
208
|
-
:param geojson_path: path to the GeoJSON label
|
209
|
-
|
210
|
-
:return: list of tasks
|
211
|
-
"""
|
212
|
-
with open(geojson_path, "r", encoding="utf-8") as f:
|
213
|
-
geojson = json.load(f)
|
214
|
-
tasks = []
|
215
|
-
for feature in geojson["features"]:
|
216
|
-
for task in feature["properties"]["tasks"]:
|
217
|
-
tasks.append(task) if task not in tasks else None
|
218
|
-
|
219
|
-
return tasks
|