eotdl 2024.10.7__py3-none-any.whl → 2025.3.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/__init__.py +1 -1
- eotdl/access/search.py +0 -2
- eotdl/access/sentinelhub/parameters.py +1 -1
- eotdl/cli.py +2 -2
- eotdl/commands/datasets.py +28 -31
- eotdl/commands/models.py +27 -30
- eotdl/commands/stac.py +57 -0
- eotdl/curation/__init__.py +0 -8
- eotdl/curation/stac/__init__.py +1 -8
- eotdl/curation/stac/api.py +58 -0
- eotdl/curation/stac/stac.py +31 -341
- eotdl/datasets/__init__.py +1 -1
- eotdl/datasets/ingest.py +28 -159
- eotdl/datasets/retrieve.py +0 -9
- eotdl/datasets/stage.py +64 -0
- eotdl/files/__init__.py +0 -2
- eotdl/files/ingest.bck +178 -0
- eotdl/files/ingest.py +229 -164
- eotdl/{datasets → files}/metadata.py +16 -17
- eotdl/models/__init__.py +1 -1
- eotdl/models/ingest.py +28 -159
- eotdl/models/stage.py +60 -0
- eotdl/repos/APIRepo.py +1 -1
- eotdl/repos/DatasetsAPIRepo.py +56 -43
- eotdl/repos/FilesAPIRepo.py +260 -167
- eotdl/repos/STACAPIRepo.py +40 -0
- eotdl/repos/__init__.py +1 -0
- eotdl/tools/geo_utils.py +7 -2
- {eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/METADATA +5 -4
- eotdl-2025.3.25.dist-info/RECORD +65 -0
- {eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/WHEEL +1 -1
- eotdl/curation/stac/assets.py +0 -110
- eotdl/curation/stac/dataframe.py +0 -172
- eotdl/curation/stac/dataframe_bck.py +0 -253
- eotdl/curation/stac/dataframe_labeling.py +0 -63
- eotdl/curation/stac/extensions/__init__.py +0 -23
- eotdl/curation/stac/extensions/base.py +0 -30
- eotdl/curation/stac/extensions/dem.py +0 -18
- eotdl/curation/stac/extensions/eo.py +0 -117
- eotdl/curation/stac/extensions/label/__init__.py +0 -7
- eotdl/curation/stac/extensions/label/base.py +0 -136
- eotdl/curation/stac/extensions/label/image_name_labeler.py +0 -203
- eotdl/curation/stac/extensions/label/scaneo.py +0 -219
- eotdl/curation/stac/extensions/ml_dataset.py +0 -648
- eotdl/curation/stac/extensions/projection.py +0 -44
- eotdl/curation/stac/extensions/raster.py +0 -53
- eotdl/curation/stac/extensions/sar.py +0 -55
- eotdl/curation/stac/extent.py +0 -158
- eotdl/curation/stac/parsers.py +0 -61
- eotdl/datasets/download.py +0 -104
- eotdl/files/list_files.py +0 -13
- eotdl/models/download.py +0 -101
- eotdl/models/metadata.py +0 -43
- eotdl/wrappers/utils.py +0 -35
- eotdl-2024.10.7.dist-info/RECORD +0 -82
- {eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/entry_points.txt +0 -0
@@ -1,117 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for EO STAC extensions object
|
3
|
-
"""
|
4
|
-
|
5
|
-
from typing import Union
|
6
|
-
|
7
|
-
import pystac
|
8
|
-
import pandas as pd
|
9
|
-
|
10
|
-
from pystac.extensions.eo import Band, EOExtension
|
11
|
-
|
12
|
-
from .base import STACExtensionObject
|
13
|
-
|
14
|
-
|
15
|
-
class EOS2ExtensionObject(STACExtensionObject):
|
16
|
-
"""
|
17
|
-
EO STAC extension object
|
18
|
-
"""
|
19
|
-
def __init__(self) -> None:
|
20
|
-
super().__init__()
|
21
|
-
self.bands_dict = {
|
22
|
-
"B01": Band.create(
|
23
|
-
name="B01",
|
24
|
-
description="Coastal aerosol, 442.7 nm (S2A), 442.3 nm (S2B)",
|
25
|
-
common_name="coastal",
|
26
|
-
),
|
27
|
-
"B02": Band.create(
|
28
|
-
name="B02",
|
29
|
-
description="Blue, 492.4 nm (S2A), 492.1 nm (S2B)",
|
30
|
-
common_name="blue",
|
31
|
-
),
|
32
|
-
"B03": Band.create(
|
33
|
-
name="B03",
|
34
|
-
description="Green, 559.8 nm (S2A), 559.0 nm (S2B)",
|
35
|
-
common_name="green",
|
36
|
-
),
|
37
|
-
"B04": Band.create(
|
38
|
-
name="B04",
|
39
|
-
description="Red, 664.6 nm (S2A), 665.0 nm (S2B)",
|
40
|
-
common_name="red",
|
41
|
-
),
|
42
|
-
"B05": Band.create(
|
43
|
-
name="B05",
|
44
|
-
description="Vegetation red edge, 704.1 nm (S2A), 703.8 nm (S2B)",
|
45
|
-
common_name="rededge",
|
46
|
-
),
|
47
|
-
"B06": Band.create(
|
48
|
-
name="B06",
|
49
|
-
description="Vegetation red edge, 740.5 nm (S2A), 739.1 nm (S2B)",
|
50
|
-
common_name="rededge",
|
51
|
-
),
|
52
|
-
"B07": Band.create(
|
53
|
-
name="B07",
|
54
|
-
description="Vegetation red edge, 782.8 nm (S2A), 779.7 nm (S2B)",
|
55
|
-
common_name="rededge",
|
56
|
-
),
|
57
|
-
"B08": Band.create(
|
58
|
-
name="B08",
|
59
|
-
description="NIR, 832.8 nm (S2A), 833.0 nm (S2B)",
|
60
|
-
common_name="nir",
|
61
|
-
),
|
62
|
-
"B08a": Band.create(
|
63
|
-
name="B08a",
|
64
|
-
description="Narrow NIR, 864.7 nm (S2A), 864.0 nm (S2B)",
|
65
|
-
common_name="nir08",
|
66
|
-
),
|
67
|
-
"B09": Band.create(
|
68
|
-
name="B09",
|
69
|
-
description="Water vapour, 945.1 nm (S2A), 943.2 nm (S2B)",
|
70
|
-
common_name="nir09",
|
71
|
-
),
|
72
|
-
"B10": Band.create(
|
73
|
-
name="B10",
|
74
|
-
description="SWIR – Cirrus, 1373.5 nm (S2A), 1376.9 nm (S2B)",
|
75
|
-
common_name="cirrus",
|
76
|
-
),
|
77
|
-
"B11": Band.create(
|
78
|
-
name="B11",
|
79
|
-
description="SWIR, 1613.7 nm (S2A), 1610.4 nm (S2B)",
|
80
|
-
common_name="swir16",
|
81
|
-
),
|
82
|
-
"B12": Band.create(
|
83
|
-
name="B12",
|
84
|
-
description="SWIR, 2202.4 nm (S2A), 2185.7 nm (S2B)",
|
85
|
-
common_name="swir22",
|
86
|
-
),
|
87
|
-
}
|
88
|
-
|
89
|
-
def add_extension_to_object(
|
90
|
-
self, obj: Union[pystac.Item, pystac.Asset], obj_info: pd.DataFrame
|
91
|
-
) -> Union[pystac.Item, pystac.Asset]:
|
92
|
-
"""
|
93
|
-
Add the extension to the given object
|
94
|
-
|
95
|
-
:param obj: object to add the extension
|
96
|
-
:param obj_info: object info from the STACDataFrame
|
97
|
-
"""
|
98
|
-
# Add EO extension
|
99
|
-
eo_ext = EOExtension.ext(obj, add_if_missing=True)
|
100
|
-
# Add common metadata
|
101
|
-
if isinstance(obj, pystac.Item) or (
|
102
|
-
isinstance(obj, pystac.Asset) and obj.title not in self.bands_dict.keys()
|
103
|
-
):
|
104
|
-
obj.common_metadata.constellation = "Sentinel-2"
|
105
|
-
obj.common_metadata.platform = "Sentinel-2"
|
106
|
-
obj.common_metadata.instruments = ["Sentinel-2"]
|
107
|
-
obj.common_metadata.gsd = 10
|
108
|
-
# Add bands
|
109
|
-
bands = obj_info["bands"].values
|
110
|
-
bands = bands[0] if bands else None
|
111
|
-
bands_list = [self.bands_dict[band] for band in bands] if bands else None
|
112
|
-
eo_ext.apply(bands=bands_list)
|
113
|
-
|
114
|
-
elif isinstance(obj, pystac.Asset):
|
115
|
-
eo_ext.apply(bands=[self.bands_dict[obj.title]])
|
116
|
-
|
117
|
-
return obj
|
@@ -1,136 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for the STAC label extension base object
|
3
|
-
"""
|
4
|
-
|
5
|
-
from typing import List, Union
|
6
|
-
|
7
|
-
import pystac
|
8
|
-
from pystac.extensions.label import (
|
9
|
-
LabelClasses,
|
10
|
-
LabelExtension,
|
11
|
-
SummariesLabelExtension,
|
12
|
-
)
|
13
|
-
|
14
|
-
from ..base import STACExtensionObject
|
15
|
-
|
16
|
-
|
17
|
-
class LabelExtensionObject(STACExtensionObject):
|
18
|
-
"""
|
19
|
-
STAC Label extension base object in EOTDL
|
20
|
-
"""
|
21
|
-
def __init__(self) -> None:
|
22
|
-
super().__init__()
|
23
|
-
|
24
|
-
@classmethod
|
25
|
-
def generate_stac_labels(cls) -> None:
|
26
|
-
"""
|
27
|
-
Generate a labels collection from a STAC dataframe.
|
28
|
-
"""
|
29
|
-
return
|
30
|
-
|
31
|
-
def add_extension_to_item(
|
32
|
-
self,
|
33
|
-
obj: pystac.Item,
|
34
|
-
label_description: str,
|
35
|
-
label_type: str,
|
36
|
-
label_names: List[str],
|
37
|
-
label_classes: List[str],
|
38
|
-
**kwargs
|
39
|
-
) -> Union[pystac.Item, pystac.Asset]:
|
40
|
-
"""
|
41
|
-
Add the extension to the given object
|
42
|
-
|
43
|
-
:param obj: object to add the extension
|
44
|
-
:param label_description: label description
|
45
|
-
:param label_type: label type
|
46
|
-
:param label_names: list of label names
|
47
|
-
:param label_classes: list of label classes of the item
|
48
|
-
:param kwargs: optional arguments
|
49
|
-
:param kwargs.label_properties: list of label properties
|
50
|
-
:param kwargs.label_methods: list of label methods
|
51
|
-
:param kwargs.label_tasks: list of label tasks
|
52
|
-
|
53
|
-
:return: the item with the label extension
|
54
|
-
"""
|
55
|
-
label_item = pystac.Item(
|
56
|
-
id=obj.id,
|
57
|
-
geometry=obj.geometry,
|
58
|
-
bbox=obj.bbox,
|
59
|
-
properties={},
|
60
|
-
datetime=obj.datetime,
|
61
|
-
)
|
62
|
-
|
63
|
-
# Add the label extension to the item
|
64
|
-
LabelExtension.add_to(label_item)
|
65
|
-
|
66
|
-
# Access the label extension
|
67
|
-
label_ext = LabelExtension.ext(label_item)
|
68
|
-
|
69
|
-
# Add the label classes
|
70
|
-
for name, classes in zip(label_names, label_classes):
|
71
|
-
label_classes = LabelClasses.create(
|
72
|
-
name=name,
|
73
|
-
classes=classes,
|
74
|
-
)
|
75
|
-
label_ext.label_classes = [label_classes]
|
76
|
-
|
77
|
-
# Add the label description
|
78
|
-
label_ext.label_description = label_description
|
79
|
-
# Add the label type
|
80
|
-
label_ext.label_type = label_type
|
81
|
-
# Add the label properties, if any
|
82
|
-
label_ext.label_properties = (
|
83
|
-
kwargs.get("label_properties")
|
84
|
-
if kwargs.get("label_properties", None)
|
85
|
-
else label_names
|
86
|
-
)
|
87
|
-
# Add the label methods, if any
|
88
|
-
label_ext.label_methods = (
|
89
|
-
kwargs.get("label_methods") if kwargs.get("label_methods", None) else None
|
90
|
-
)
|
91
|
-
# Add the label tasks, if any
|
92
|
-
label_ext.label_tasks = (
|
93
|
-
kwargs.get("label_tasks") if kwargs.get("label_tasks", None) else None
|
94
|
-
)
|
95
|
-
# Add the source
|
96
|
-
label_ext.add_source(obj)
|
97
|
-
|
98
|
-
return label_item
|
99
|
-
|
100
|
-
def add_extension_to_collection(
|
101
|
-
self,
|
102
|
-
obj: pystac.Collection,
|
103
|
-
label_names: List[str],
|
104
|
-
label_classes: List[Union[list, tuple]],
|
105
|
-
label_type: str,
|
106
|
-
) -> None:
|
107
|
-
"""
|
108
|
-
Add the label extension to the given collection
|
109
|
-
|
110
|
-
:param obj: object to add the extension
|
111
|
-
:param label_names: list of label names
|
112
|
-
:param label_classes: list of label classes
|
113
|
-
:param label_type: label type
|
114
|
-
"""
|
115
|
-
LabelExtension.add_to(obj)
|
116
|
-
|
117
|
-
# Add the label extension to the collection
|
118
|
-
label_ext = SummariesLabelExtension(obj)
|
119
|
-
|
120
|
-
# Add the label classes
|
121
|
-
for name, classes in zip(label_names, label_classes):
|
122
|
-
label_classes = LabelClasses.create(
|
123
|
-
name=name,
|
124
|
-
classes=classes,
|
125
|
-
)
|
126
|
-
label_ext.label_classes = [label_classes]
|
127
|
-
|
128
|
-
# Add the label type
|
129
|
-
label_ext.label_type = label_type
|
130
|
-
|
131
|
-
def add_geojson_to_items(self) -> None:
|
132
|
-
"""
|
133
|
-
Add a GeoJSON FeatureCollection to every label item, as recommended by the spec
|
134
|
-
https://github.com/stac-extensions/label#assets
|
135
|
-
"""
|
136
|
-
return
|
@@ -1,203 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for the STAC label extension ImageNameLabeler object
|
3
|
-
"""
|
4
|
-
|
5
|
-
import json
|
6
|
-
from os.path import join, dirname
|
7
|
-
from typing import List, Optional, Union
|
8
|
-
|
9
|
-
import pystac
|
10
|
-
import pandas as pd
|
11
|
-
|
12
|
-
from tqdm import tqdm
|
13
|
-
from pystac.extensions.label import LabelExtension
|
14
|
-
from ...extent import get_unknow_extent
|
15
|
-
from .base import LabelExtensionObject
|
16
|
-
|
17
|
-
|
18
|
-
class ImageNameLabeler(LabelExtensionObject):
|
19
|
-
"""
|
20
|
-
STAC label extension ImageNameLabeler object in EOTDL
|
21
|
-
"""
|
22
|
-
def __init__(self) -> None:
|
23
|
-
super().__init__()
|
24
|
-
|
25
|
-
def generate_stac_labels(
|
26
|
-
self,
|
27
|
-
catalog: Union[pystac.Catalog, str],
|
28
|
-
stac_dataframe: Optional[pd.DataFrame] = None,
|
29
|
-
collection: Optional[Union[pystac.Collection, str]] = "source",
|
30
|
-
label_description: Optional[str] = "Item label",
|
31
|
-
label_type: Optional[str] = "vector",
|
32
|
-
label_names: Optional[List[str]] = ["label"],
|
33
|
-
**kwargs,
|
34
|
-
) -> None:
|
35
|
-
"""
|
36
|
-
Generate a labels collection from a STAC dataframe.
|
37
|
-
This class uses the label column of the dataframe as the label names.
|
38
|
-
|
39
|
-
:param catalog: catalog to add the labels collection to
|
40
|
-
:param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
41
|
-
:param collection: collection to add the labels collection to
|
42
|
-
:param label_description: label description
|
43
|
-
:param label_type: label type
|
44
|
-
:param label_names: list of label names
|
45
|
-
:param kwargs: optional arguments
|
46
|
-
:param kwargs.label_properties: list of label properties
|
47
|
-
:param kwargs.label_methods: list of label methods
|
48
|
-
:param kwargs.label_tasks: list of label tasks
|
49
|
-
"""
|
50
|
-
if stac_dataframe.empty:
|
51
|
-
raise ValueError(
|
52
|
-
"No STAC dataframe provided, please provide a STAC dataframe or generate it with <get_stac_dataframe> method"
|
53
|
-
)
|
54
|
-
if isinstance(catalog, str):
|
55
|
-
catalog = pystac.Catalog.from_file(catalog)
|
56
|
-
|
57
|
-
# Add the labels collection to the catalog
|
58
|
-
# If exists a source collection, get it extent
|
59
|
-
source_collection = catalog.get_child(collection)
|
60
|
-
if source_collection:
|
61
|
-
extent = source_collection.extent
|
62
|
-
source_items = source_collection.get_stac_objects(pystac.RelType.ITEM)
|
63
|
-
else:
|
64
|
-
if not collection:
|
65
|
-
raise ValueError(
|
66
|
-
"No source collection provided, please provide a source collection"
|
67
|
-
)
|
68
|
-
extent = get_unknow_extent()
|
69
|
-
|
70
|
-
# Create the labels collection and add it to the catalog if it does not exist
|
71
|
-
# If it exists, remove it
|
72
|
-
collection = pystac.Collection(id="labels", description="Labels", extent=extent)
|
73
|
-
if collection.id in [c.id for c in catalog.get_children()]:
|
74
|
-
catalog.remove_child(collection.id)
|
75
|
-
catalog.add_child(collection)
|
76
|
-
|
77
|
-
# Generate the labels items
|
78
|
-
print("Generating labels collection...")
|
79
|
-
for source_item in tqdm(source_items):
|
80
|
-
# There must be an item ID column in the STAC dataframe
|
81
|
-
if "id" not in stac_dataframe.columns:
|
82
|
-
raise ValueError(
|
83
|
-
"No item ID column found in the STAC dataframe, please provide a STAC dataframe with the item ID column"
|
84
|
-
)
|
85
|
-
label_classes = stac_dataframe.label.unique().tolist()
|
86
|
-
|
87
|
-
# Create the label item
|
88
|
-
label_item = self.add_extension_to_item(
|
89
|
-
source_item,
|
90
|
-
label_description=label_description,
|
91
|
-
label_type=label_type,
|
92
|
-
label_names=[label_names],
|
93
|
-
label_classes=[label_classes],
|
94
|
-
**kwargs,
|
95
|
-
)
|
96
|
-
# Add the self href to the label item, following the Best Practices Layout
|
97
|
-
# https://github.com/radiantearth/stac-spec/blob/master/best-practices.md
|
98
|
-
label_item.set_self_href(
|
99
|
-
join(
|
100
|
-
dirname(collection.get_self_href()),
|
101
|
-
label_item.id,
|
102
|
-
f"{label_item.id}.json",
|
103
|
-
)
|
104
|
-
)
|
105
|
-
collection.add_item(label_item)
|
106
|
-
|
107
|
-
# Add the extension to the collection
|
108
|
-
self.add_extension_to_collection(
|
109
|
-
collection,
|
110
|
-
label_names=[label_names],
|
111
|
-
label_classes=[label_classes],
|
112
|
-
label_type=label_type,
|
113
|
-
)
|
114
|
-
|
115
|
-
# Validate and save the catalog
|
116
|
-
# Before adding the geojson, we need to save the catalog
|
117
|
-
# and then iterate over the items to add the geojson
|
118
|
-
try:
|
119
|
-
pystac.validation.validate(catalog)
|
120
|
-
catalog.normalize_and_save(
|
121
|
-
dirname(catalog.get_self_href()), pystac.CatalogType.SELF_CONTAINED
|
122
|
-
)
|
123
|
-
except pystac.STACValidationError as e:
|
124
|
-
raise pystac.STACError(f"Catalog validation error: {e}")
|
125
|
-
|
126
|
-
# Add a GeoJSON FeatureCollection to every label item, as recommended by the spec
|
127
|
-
# https://github.com/stac-extensions/label#assets
|
128
|
-
self.add_geojson_to_items(collection, stac_dataframe, label_type=label_type)
|
129
|
-
catalog.normalize_and_save(
|
130
|
-
dirname(catalog.get_self_href()), pystac.CatalogType.SELF_CONTAINED
|
131
|
-
)
|
132
|
-
print("Success on labels generation!")
|
133
|
-
|
134
|
-
def add_geojson_to_items(
|
135
|
-
self, collection: pystac.Collection, df: pd.DataFrame, label_type: str
|
136
|
-
) -> None:
|
137
|
-
"""
|
138
|
-
Add a GeoJSON FeatureCollection to every label item, as recommended by the spec
|
139
|
-
https://github.com/stac-extensions/label#assets
|
140
|
-
|
141
|
-
:param collection: collection to add the labels collection to
|
142
|
-
:param df: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
143
|
-
:param label_type: label type
|
144
|
-
"""
|
145
|
-
for item in collection.get_all_items():
|
146
|
-
geojson_path = join(dirname(item.get_self_href()), f"{item.id}.geojson")
|
147
|
-
|
148
|
-
properties = {"roles": ["labels", f"labels-{label_type}"]}
|
149
|
-
|
150
|
-
# TODO depending on the tasks, there must be extra fields
|
151
|
-
# https://github.com/stac-extensions/label#assets
|
152
|
-
if "label:tasks" in item.properties:
|
153
|
-
tasks = item.properties["label:tasks"]
|
154
|
-
if "tile_regression" in tasks:
|
155
|
-
pass
|
156
|
-
elif any(
|
157
|
-
task in tasks
|
158
|
-
for task in (
|
159
|
-
"tile_classification",
|
160
|
-
"object_detection",
|
161
|
-
"segmentation",
|
162
|
-
)
|
163
|
-
):
|
164
|
-
pass
|
165
|
-
|
166
|
-
label_ext = LabelExtension.ext(item)
|
167
|
-
label_ext.add_geojson_labels(
|
168
|
-
href=geojson_path, title="Label", properties=properties
|
169
|
-
)
|
170
|
-
item.make_asset_hrefs_relative()
|
171
|
-
|
172
|
-
item_id = item.id
|
173
|
-
geometry = item.geometry
|
174
|
-
labels = [df[df["id"] == item_id]["label"].values[0]]
|
175
|
-
# There is data like DEM data that does not have datetime but start and end datetime
|
176
|
-
datetime = (
|
177
|
-
item.datetime.isoformat()
|
178
|
-
if item.datetime
|
179
|
-
else (
|
180
|
-
item.properties.start_datetime.isoformat(),
|
181
|
-
item.properties.end_datetime.isoformat(),
|
182
|
-
)
|
183
|
-
)
|
184
|
-
labels_properties = (
|
185
|
-
dict(zip(item.properties["label:properties"], labels))
|
186
|
-
if label_type == "vector"
|
187
|
-
else {}
|
188
|
-
)
|
189
|
-
labels_properties["datetime"] = datetime
|
190
|
-
|
191
|
-
geojson = {
|
192
|
-
"type": "FeatureCollection",
|
193
|
-
"features": [
|
194
|
-
{
|
195
|
-
"type": "Feature",
|
196
|
-
"geometry": geometry,
|
197
|
-
"properties": labels_properties,
|
198
|
-
}
|
199
|
-
],
|
200
|
-
}
|
201
|
-
|
202
|
-
with open(geojson_path, "w", encoding="utf-8") as f:
|
203
|
-
json.dump(geojson, f)
|
@@ -1,219 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for the STAC label extension ScaneoLabeler object
|
3
|
-
"""
|
4
|
-
|
5
|
-
import json
|
6
|
-
from os.path import join, dirname, exists, splitext, basename, abspath
|
7
|
-
from typing import List, Optional, Union
|
8
|
-
from glob import glob
|
9
|
-
|
10
|
-
import pystac
|
11
|
-
|
12
|
-
from tqdm import tqdm
|
13
|
-
from pystac.extensions.label import LabelExtension
|
14
|
-
|
15
|
-
from .base import LabelExtensionObject
|
16
|
-
from ...extent import get_unknow_extent
|
17
|
-
|
18
|
-
|
19
|
-
class ScaneoLabeler(LabelExtensionObject):
|
20
|
-
"""
|
21
|
-
STAC label extension ScaneoLabeler object in EOTDL
|
22
|
-
"""
|
23
|
-
def __init__(self) -> None:
|
24
|
-
super().__init__()
|
25
|
-
|
26
|
-
def generate_stac_labels(
|
27
|
-
self,
|
28
|
-
catalog: Union[pystac.Catalog, str],
|
29
|
-
root_folder: str,
|
30
|
-
collection: Optional[Union[pystac.Collection, str]] = "source",
|
31
|
-
label_description: Optional[str] = "Item label",
|
32
|
-
label_type: Optional[str] = "vector",
|
33
|
-
label_names: Optional[List[str]] = ["label"],
|
34
|
-
**kwargs,
|
35
|
-
) -> None:
|
36
|
-
"""
|
37
|
-
Generate a labels collection from a STAC dataframe.
|
38
|
-
This class should be used when the items have been labeled using SCANEO, as is implemented
|
39
|
-
taking into account the SCANEO labeling format.
|
40
|
-
|
41
|
-
:param catalog: catalog to add the labels collection to
|
42
|
-
:param root_folder: root folder where are the images and the labels as GeoJSON files, following the SCANEO labeling format
|
43
|
-
:param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
44
|
-
:param collection: collection to add the labels collection to
|
45
|
-
:param label_description: label description
|
46
|
-
:param label_type: label type
|
47
|
-
:param label_names: list of label names
|
48
|
-
:param kwargs: optional arguments
|
49
|
-
:param kwargs.label_properties: list of label properties
|
50
|
-
:param kwargs.label_methods: list of label methods
|
51
|
-
"""
|
52
|
-
if isinstance(catalog, str):
|
53
|
-
catalog = pystac.Catalog.from_file(catalog)
|
54
|
-
|
55
|
-
# Add the labels collection to the catalog
|
56
|
-
# If exists a source collection, get it extent
|
57
|
-
source_collection = catalog.get_child(collection)
|
58
|
-
if source_collection:
|
59
|
-
extent = source_collection.extent
|
60
|
-
source_items = source_collection.get_stac_objects(pystac.RelType.ITEM)
|
61
|
-
else:
|
62
|
-
if not collection:
|
63
|
-
raise ValueError(
|
64
|
-
"No source collection provided, please provide a source collection"
|
65
|
-
)
|
66
|
-
extent = get_unknow_extent()
|
67
|
-
|
68
|
-
# Create the labels collection and add it to the catalog if it does not exist
|
69
|
-
# If it exists, remove it
|
70
|
-
collection = pystac.Collection(id="labels", description="Labels", extent=extent)
|
71
|
-
if collection.id in [c.id for c in catalog.get_children()]:
|
72
|
-
catalog.remove_child(collection.id)
|
73
|
-
catalog.add_child(collection)
|
74
|
-
|
75
|
-
# Get the GeoJSON files
|
76
|
-
geojson_files = glob(join(root_folder, "*.geojson"))
|
77
|
-
if not geojson_files:
|
78
|
-
raise ValueError(
|
79
|
-
"No GeoJSON files found in the root folder, please provide a root folder with the GeoJSON files"
|
80
|
-
)
|
81
|
-
|
82
|
-
# Get the label classes
|
83
|
-
label_classes = self.get_label_classes(root_folder, geojson_files)
|
84
|
-
|
85
|
-
# Generate the labels items
|
86
|
-
for source_item in tqdm(source_items, desc="Generating labels collection..."):
|
87
|
-
# Get the GeoJSON label of the item
|
88
|
-
geojson_label = self.get_geojson_of_item(source_item, geojson_files)
|
89
|
-
# Get the tasks from the GeoJSON label
|
90
|
-
tasks = self.get_tasks_from_geojson(geojson_label) if geojson_label else None
|
91
|
-
# Add the tasks to the kwargs
|
92
|
-
kwargs["label_tasks"] = tasks
|
93
|
-
|
94
|
-
# Create the label item
|
95
|
-
label_item = self.add_extension_to_item(
|
96
|
-
source_item,
|
97
|
-
label_description=label_description,
|
98
|
-
label_type=label_type,
|
99
|
-
label_names=label_names,
|
100
|
-
label_classes=label_classes,
|
101
|
-
**kwargs,
|
102
|
-
)
|
103
|
-
# Add the self href to the label item, following the Best Practices Layout
|
104
|
-
# https://github.com/radiantearth/stac-spec/blob/master/best-practices.md
|
105
|
-
label_item.set_self_href(
|
106
|
-
join(
|
107
|
-
dirname(collection.get_self_href()),
|
108
|
-
label_item.id,
|
109
|
-
f"{label_item.id}.json",
|
110
|
-
)
|
111
|
-
)
|
112
|
-
# Match the GeoJSON label with the label item, if exists
|
113
|
-
if geojson_label and exists(geojson_label):
|
114
|
-
self.add_geojson_to_item(label_item, geojson_label, label_type)
|
115
|
-
# Add the item to the collection
|
116
|
-
collection.add_item(label_item)
|
117
|
-
|
118
|
-
# Add the extension to the collection
|
119
|
-
self.add_extension_to_collection(
|
120
|
-
collection,
|
121
|
-
label_names=[label_names],
|
122
|
-
label_classes=[label_classes],
|
123
|
-
label_type=label_type,
|
124
|
-
)
|
125
|
-
|
126
|
-
# Validate and save the catalog
|
127
|
-
# Before adding the geojson, we need to save the catalog
|
128
|
-
# and then iterate over the items to add the geojson
|
129
|
-
try:
|
130
|
-
pystac.validation.validate(catalog)
|
131
|
-
catalog.normalize_and_save(
|
132
|
-
dirname(catalog.get_self_href()), pystac.CatalogType.SELF_CONTAINED
|
133
|
-
)
|
134
|
-
print("Success on labels generation!")
|
135
|
-
except pystac.STACValidationError as e:
|
136
|
-
raise pystac.STACError(f"Catalog validation error: {e}")
|
137
|
-
|
138
|
-
def add_geojson_to_item(
|
139
|
-
self, item: pystac.Item, geojson_path: str, label_type: str
|
140
|
-
) -> None:
|
141
|
-
"""
|
142
|
-
Add a GeoJSON FeatureCollection to every label item, as recommended by the spec
|
143
|
-
https://github.com/stac-extensions/label#assets
|
144
|
-
|
145
|
-
:param collection: collection to add the labels collection to
|
146
|
-
:param df: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
147
|
-
:param label_type: label type
|
148
|
-
"""
|
149
|
-
properties = {"roles": ["labels", f"labels-{label_type}"]}
|
150
|
-
|
151
|
-
label_ext = LabelExtension.ext(item, add_if_missing=True)
|
152
|
-
item.make_asset_hrefs_absolute()
|
153
|
-
label_ext.add_geojson_labels(
|
154
|
-
href=abspath(geojson_path), title="Label", properties=properties
|
155
|
-
)
|
156
|
-
item.make_asset_hrefs_relative()
|
157
|
-
|
158
|
-
def get_label_classes(self, root_folder: str, geojsons: List[str]) -> List[str]:
|
159
|
-
"""
|
160
|
-
Get the label classes from the labels.json file if exists, or from the GeoJSON files instead
|
161
|
-
"""
|
162
|
-
label_classes = []
|
163
|
-
|
164
|
-
labels_json = glob(join(root_folder, "labels.json"))
|
165
|
-
if len(labels_json) > 0 and exists(labels_json[0]):
|
166
|
-
labels_json = labels_json[0]
|
167
|
-
with open(labels_json, "r", encoding="utf-8") as f:
|
168
|
-
labels = json.load(f)
|
169
|
-
for value in labels["labels"]:
|
170
|
-
label_classes.append(value["name"]) if value[
|
171
|
-
"name"
|
172
|
-
] not in label_classes else None
|
173
|
-
else:
|
174
|
-
for geojson in geojsons:
|
175
|
-
with open(geojson, "r", encoding="utf-8") as f:
|
176
|
-
labels = json.load(f)
|
177
|
-
for value in labels["features"]:
|
178
|
-
label_classes.append(value["properties"]["labels"]) if value[
|
179
|
-
"properties"
|
180
|
-
]["labels"] not in label_classes else None
|
181
|
-
|
182
|
-
return [label_classes]
|
183
|
-
|
184
|
-
def get_geojson_of_item(self, item: pystac.Item, geojsons: List[str]) -> str:
|
185
|
-
"""
|
186
|
-
Get the GeoJSON label of the item from a list of GeoJSON files
|
187
|
-
|
188
|
-
:param item: item to get the GeoJSON label
|
189
|
-
:param geojsons: list of GeoJSON files
|
190
|
-
|
191
|
-
:return: path to the GeoJSON label of the item
|
192
|
-
"""
|
193
|
-
item_id = item.id
|
194
|
-
geojson_name = f"{item_id}_labels"
|
195
|
-
# Get a dict with <geojson_filename>: <geojson_path>, as the geojson_filename
|
196
|
-
# must match the item ID
|
197
|
-
geojsons_dict = dict(
|
198
|
-
zip([splitext(basename(geojson))[0] for geojson in geojsons], geojsons)
|
199
|
-
)
|
200
|
-
geojson_path = geojsons_dict.get(geojson_name)
|
201
|
-
|
202
|
-
return geojson_path
|
203
|
-
|
204
|
-
def get_tasks_from_geojson(self, geojson_path: str) -> List[str]:
|
205
|
-
"""
|
206
|
-
Get the tasks from the GeoJSON label
|
207
|
-
|
208
|
-
:param geojson_path: path to the GeoJSON label
|
209
|
-
|
210
|
-
:return: list of tasks
|
211
|
-
"""
|
212
|
-
with open(geojson_path, "r", encoding="utf-8") as f:
|
213
|
-
geojson = json.load(f)
|
214
|
-
tasks = []
|
215
|
-
for feature in geojson["features"]:
|
216
|
-
for task in feature["properties"]["tasks"]:
|
217
|
-
tasks.append(task) if task not in tasks else None
|
218
|
-
|
219
|
-
return tasks
|