eotdl 2023.11.2.post5__py3-none-any.whl → 2023.11.3.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/__init__.py +1 -1
- eotdl/access/__init__.py +6 -3
- eotdl/access/airbus/__init__.py +5 -1
- eotdl/access/airbus/client.py +356 -338
- eotdl/access/airbus/parameters.py +19 -4
- eotdl/access/airbus/utils.py +26 -21
- eotdl/access/download.py +30 -14
- eotdl/access/search.py +17 -6
- eotdl/access/sentinelhub/__init__.py +5 -1
- eotdl/access/sentinelhub/client.py +57 -54
- eotdl/access/sentinelhub/evalscripts.py +38 -39
- eotdl/access/sentinelhub/parameters.py +43 -23
- eotdl/access/sentinelhub/utils.py +38 -28
- eotdl/auth/errors.py +2 -1
- eotdl/commands/auth.py +3 -3
- eotdl/curation/__init__.py +5 -1
- eotdl/curation/stac/__init__.py +5 -1
- eotdl/curation/stac/assets.py +55 -32
- eotdl/curation/stac/dataframe.py +20 -14
- eotdl/curation/stac/dataframe_bck.py +2 -2
- eotdl/curation/stac/dataframe_labeling.py +15 -12
- eotdl/curation/stac/extensions/__init__.py +6 -2
- eotdl/curation/stac/extensions/base.py +8 -4
- eotdl/curation/stac/extensions/dem.py +6 -3
- eotdl/curation/stac/extensions/eo.py +10 -6
- eotdl/curation/stac/extensions/label/__init__.py +5 -1
- eotdl/curation/stac/extensions/label/base.py +40 -26
- eotdl/curation/stac/extensions/label/image_name_labeler.py +64 -43
- eotdl/curation/stac/extensions/label/scaneo.py +59 -56
- eotdl/curation/stac/extensions/ml_dataset.py +154 -56
- eotdl/curation/stac/extensions/projection.py +11 -9
- eotdl/curation/stac/extensions/raster.py +22 -14
- eotdl/curation/stac/extensions/sar.py +12 -7
- eotdl/curation/stac/extent.py +67 -40
- eotdl/curation/stac/parsers.py +18 -10
- eotdl/curation/stac/stac.py +81 -62
- eotdl/datasets/__init__.py +1 -1
- eotdl/datasets/download.py +42 -55
- eotdl/datasets/ingest.py +68 -11
- eotdl/files/__init__.py +1 -1
- eotdl/files/ingest.py +3 -1
- eotdl/models/download.py +1 -1
- eotdl/repos/AuthAPIRepo.py +0 -1
- eotdl/repos/DatasetsAPIRepo.py +22 -146
- eotdl/repos/FilesAPIRepo.py +7 -92
- eotdl/repos/ModelsAPIRepo.py +0 -1
- eotdl/tools/__init__.py +5 -1
- eotdl/tools/geo_utils.py +78 -48
- eotdl/tools/metadata.py +13 -11
- eotdl/tools/paths.py +14 -14
- eotdl/tools/stac.py +36 -31
- eotdl/tools/time_utils.py +53 -26
- eotdl/tools/tools.py +84 -50
- {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/METADATA +5 -3
- eotdl-2023.11.3.post2.dist-info/RECORD +84 -0
- eotdl-2023.11.2.post5.dist-info/RECORD +0 -84
- {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/WHEEL +0 -0
- {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/entry_points.txt +0 -0
eotdl/curation/stac/extent.py
CHANGED
@@ -1,16 +1,14 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
Module for STAC extent
|
3
|
-
|
3
|
+
"""
|
4
4
|
|
5
|
-
import pystac
|
6
5
|
from datetime import datetime
|
7
|
-
import rasterio
|
8
|
-
import json
|
9
|
-
|
10
|
-
from glob import glob
|
11
|
-
from os.path import dirname
|
12
6
|
from typing import List
|
13
7
|
|
8
|
+
import pystac
|
9
|
+
|
10
|
+
import rasterio
|
11
|
+
|
14
12
|
from ...tools import get_item_metadata
|
15
13
|
|
16
14
|
|
@@ -18,33 +16,43 @@ def get_dem_temporal_interval() -> pystac.TemporalExtent:
|
|
18
16
|
"""
|
19
17
|
Get a temporal interval for DEM data
|
20
18
|
"""
|
21
|
-
min_date = datetime.strptime(
|
22
|
-
max_date = datetime.strptime(
|
19
|
+
min_date = datetime.strptime("2011-01-01", "%Y-%m-%d")
|
20
|
+
max_date = datetime.strptime("2015-01-07", "%Y-%m-%d")
|
23
21
|
|
24
22
|
return pystac.TemporalExtent([(min_date, max_date)])
|
25
|
-
|
23
|
+
|
24
|
+
|
26
25
|
def get_unknow_temporal_interval() -> pystac.TemporalExtent:
|
27
26
|
"""
|
28
27
|
Get an unknown temporal interval
|
29
28
|
"""
|
30
|
-
min_date = datetime.strptime(
|
31
|
-
max_date = datetime.strptime(
|
29
|
+
min_date = datetime.strptime("2000-01-01", "%Y-%m-%d")
|
30
|
+
max_date = datetime.strptime("2023-12-31", "%Y-%m-%d")
|
32
31
|
|
33
32
|
return pystac.TemporalExtent([(min_date, max_date)])
|
34
|
-
|
33
|
+
|
34
|
+
|
35
35
|
def get_unknow_extent() -> pystac.Extent:
|
36
36
|
"""
|
37
|
+
Get an unknown extent
|
37
38
|
"""
|
38
|
-
return pystac.Extent(
|
39
|
-
|
40
|
-
|
41
|
-
|
39
|
+
return pystac.Extent(
|
40
|
+
spatial=pystac.SpatialExtent([[0, 0, 0, 0]]),
|
41
|
+
temporal=pystac.TemporalExtent(
|
42
|
+
[
|
43
|
+
(
|
44
|
+
datetime.strptime("2000-01-01", "%Y-%m-%d"),
|
45
|
+
datetime.strptime("2023-12-31", "%Y-%m-%d"),
|
46
|
+
)
|
47
|
+
]
|
48
|
+
),
|
49
|
+
)
|
42
50
|
|
43
51
|
|
44
52
|
def get_collection_extent(rasters: List[str]) -> pystac.Extent:
|
45
53
|
"""
|
46
54
|
Get the extent of a collection
|
47
|
-
|
55
|
+
|
48
56
|
:param rasters: list of rasters
|
49
57
|
"""
|
50
58
|
# Get the spatial extent of the collection
|
@@ -55,7 +63,8 @@ def get_collection_extent(rasters: List[str]) -> pystac.Extent:
|
|
55
63
|
extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_interval)
|
56
64
|
|
57
65
|
return extent
|
58
|
-
|
66
|
+
|
67
|
+
|
59
68
|
def get_collection_spatial_extent(rasters: List[str]) -> pystac.SpatialExtent:
|
60
69
|
"""
|
61
70
|
Get the spatial extent of a collection
|
@@ -63,13 +72,15 @@ def get_collection_spatial_extent(rasters: List[str]) -> pystac.SpatialExtent:
|
|
63
72
|
:param path: path to the directory
|
64
73
|
"""
|
65
74
|
# Get the bounding boxes of all the given rasters
|
66
|
-
bboxes =
|
75
|
+
bboxes = []
|
67
76
|
for raster in rasters:
|
68
77
|
with rasterio.open(raster) as ds:
|
69
78
|
bounds = ds.bounds
|
70
|
-
dst_crs =
|
79
|
+
dst_crs = "EPSG:4326"
|
71
80
|
try:
|
72
|
-
left, bottom, right, top = rasterio.warp.transform_bounds(
|
81
|
+
left, bottom, right, top = rasterio.warp.transform_bounds(
|
82
|
+
ds.crs, dst_crs, *bounds
|
83
|
+
)
|
73
84
|
bbox = [left, bottom, right, top]
|
74
85
|
except rasterio.errors.CRSError:
|
75
86
|
spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
|
@@ -77,16 +88,17 @@ def get_collection_spatial_extent(rasters: List[str]) -> pystac.SpatialExtent:
|
|
77
88
|
bboxes.append(bbox)
|
78
89
|
# Get the minimum and maximum values of the bounding boxes
|
79
90
|
try:
|
80
|
-
left = min(
|
81
|
-
bottom = min(
|
82
|
-
right = max(
|
83
|
-
top = max(
|
91
|
+
left = min(bbox[0] for bbox in bboxes)
|
92
|
+
bottom = min(bbox[1] for bbox in bboxes)
|
93
|
+
right = max(bbox[2] for bbox in bboxes)
|
94
|
+
top = max(bbox[3] for bbox in bboxes)
|
84
95
|
spatial_extent = pystac.SpatialExtent([[left, bottom, right, top]])
|
85
96
|
except ValueError:
|
86
97
|
spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
|
87
98
|
finally:
|
88
99
|
return spatial_extent
|
89
100
|
|
101
|
+
|
90
102
|
def get_collection_temporal_interval(rasters: List[str]) -> pystac.TemporalExtent:
|
91
103
|
"""
|
92
104
|
Get the temporal interval of a collection
|
@@ -94,38 +106,53 @@ def get_collection_temporal_interval(rasters: List[str]) -> pystac.TemporalExten
|
|
94
106
|
:param path: path to the directory
|
95
107
|
"""
|
96
108
|
# Get all the metadata.json files in the directory of all the given rasters
|
97
|
-
metadata_jsons =
|
109
|
+
metadata_jsons = []
|
98
110
|
for raster in rasters:
|
99
111
|
metadata_json = get_item_metadata(raster)
|
100
112
|
if metadata_json:
|
101
113
|
metadata_jsons.append(metadata_json)
|
102
114
|
|
103
115
|
if not metadata_jsons:
|
104
|
-
return
|
105
|
-
|
116
|
+
return (
|
117
|
+
get_unknow_temporal_interval()
|
118
|
+
) # If there is no metadata, set a generic temporal interval
|
119
|
+
|
106
120
|
# Get the temporal interval of every metadata.json file and the type of the data
|
107
|
-
data_types =
|
108
|
-
temporal_intervals =
|
121
|
+
data_types = []
|
122
|
+
temporal_intervals = []
|
109
123
|
for metadata in metadata_jsons:
|
110
124
|
# Append the temporal interval to the list as a datetime object
|
111
|
-
temporal_intervals.append(metadata[
|
125
|
+
temporal_intervals.append(metadata["acquisition-date"]) if metadata[
|
126
|
+
"acquisition-date"
|
127
|
+
] else None
|
112
128
|
# Append the data type to the list
|
113
|
-
data_types.append(metadata[
|
114
|
-
|
129
|
+
data_types.append(metadata["type"]) if metadata["type"] else None
|
130
|
+
|
115
131
|
if temporal_intervals:
|
132
|
+
min_date, max_date = None, None
|
116
133
|
try:
|
117
134
|
# Get the minimum and maximum values of the temporal intervals
|
118
|
-
min_date = min(
|
119
|
-
|
135
|
+
min_date = min(
|
136
|
+
datetime.strptime(interval, "%Y-%m-%d")
|
137
|
+
for interval in temporal_intervals
|
138
|
+
)
|
139
|
+
max_date = max(
|
140
|
+
datetime.strptime(interval, "%Y-%m-%d")
|
141
|
+
for interval in temporal_intervals
|
142
|
+
)
|
120
143
|
except ValueError:
|
121
|
-
min_date = datetime.strptime(
|
122
|
-
max_date = datetime.strptime(
|
144
|
+
min_date = datetime.strptime("2000-01-01", "%Y-%m-%d")
|
145
|
+
max_date = datetime.strptime("2023-12-31", "%Y-%m-%d")
|
123
146
|
finally:
|
124
147
|
# Create the temporal interval
|
125
148
|
return pystac.TemporalExtent([(min_date, max_date)])
|
126
149
|
else:
|
127
150
|
# Check if the collection is composed by DEM data. If not, set a generic temporal interval
|
128
|
-
if
|
151
|
+
if (
|
152
|
+
set(data_types) == {"dem"}
|
153
|
+
or set(data_types) == {"DEM"}
|
154
|
+
or set(data_types) == {"dem", "DEM"}
|
155
|
+
):
|
129
156
|
return get_dem_temporal_interval()
|
130
157
|
else:
|
131
158
|
return get_unknow_temporal_interval()
|
eotdl/curation/stac/parsers.py
CHANGED
@@ -1,26 +1,31 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
Module for STAC parsers
|
3
|
-
|
3
|
+
"""
|
4
4
|
|
5
5
|
from os.path import dirname, basename
|
6
6
|
|
7
7
|
|
8
8
|
class STACIdParser:
|
9
|
-
|
9
|
+
"""
|
10
|
+
STAC ID parser base class
|
11
|
+
"""
|
10
12
|
def get_item_id(self, raster_path: str):
|
11
13
|
"""
|
12
14
|
Get the ID of the STAC Item from the given raster path
|
13
15
|
|
14
16
|
:param raster_path: path to the raster file
|
15
17
|
"""
|
16
|
-
|
18
|
+
return
|
17
19
|
|
18
20
|
|
19
21
|
class StructuredParser(STACIdParser):
|
22
|
+
"""
|
23
|
+
Structured STAC ID parser
|
24
|
+
"""
|
20
25
|
|
21
26
|
def __init__(self) -> None:
|
22
27
|
super().__init__()
|
23
|
-
|
28
|
+
|
24
29
|
def get_item_id(self, raster_path: str):
|
25
30
|
"""
|
26
31
|
Get the ID of the STAC Item from the given raster path.
|
@@ -30,16 +35,19 @@ class StructuredParser(STACIdParser):
|
|
30
35
|
:param raster_path: path to the raster file
|
31
36
|
"""
|
32
37
|
tiff_dir_path = dirname(raster_path)
|
33
|
-
|
38
|
+
item_id = tiff_dir_path.split("/")[-1]
|
34
39
|
|
35
|
-
return
|
40
|
+
return item_id
|
36
41
|
|
37
42
|
|
38
43
|
class UnestructuredParser(STACIdParser):
|
44
|
+
"""
|
45
|
+
Unstructured STAC ID parser
|
46
|
+
"""
|
39
47
|
|
40
48
|
def __init__(self) -> None:
|
41
49
|
super().__init__()
|
42
|
-
|
50
|
+
|
43
51
|
def get_item_id(self, raster_path: str):
|
44
52
|
"""
|
45
53
|
Get the ID of the STAC Item from the given raster path.
|
@@ -48,6 +56,6 @@ class UnestructuredParser(STACIdParser):
|
|
48
56
|
|
49
57
|
:param raster_path: path to the raster file
|
50
58
|
"""
|
51
|
-
|
59
|
+
item_id = basename(raster_path).split(".")[0]
|
52
60
|
|
53
|
-
return
|
61
|
+
return item_id
|
eotdl/curation/stac/stac.py
CHANGED
@@ -1,41 +1,39 @@
|
|
1
1
|
"""
|
2
|
-
Module for generating STAC metadata
|
2
|
+
Module for generating STAC metadata
|
3
3
|
"""
|
4
4
|
|
5
|
-
import
|
6
|
-
from
|
7
|
-
import
|
8
|
-
import pystac
|
9
|
-
from tqdm import tqdm
|
10
|
-
|
5
|
+
import random
|
6
|
+
from datetime import datetime
|
7
|
+
from typing import Union, Optional
|
11
8
|
from os.path import join, basename, dirname
|
12
|
-
from shutil import rmtree
|
13
9
|
|
10
|
+
import pandas as pd
|
11
|
+
import pystac
|
14
12
|
import rasterio
|
15
|
-
import
|
16
|
-
from rasterio.warp import transform_bounds
|
17
|
-
from typing import Union, List
|
18
|
-
|
19
|
-
from datetime import datetime
|
13
|
+
from tqdm import tqdm
|
20
14
|
from shapely.geometry import Polygon, mapping
|
21
|
-
from glob import glob
|
22
|
-
from typing import Union, Optional
|
23
15
|
|
24
16
|
from .parsers import STACIdParser, StructuredParser
|
25
17
|
from .assets import STACAssetGenerator
|
26
18
|
from .dataframe_labeling import LabelingStrategy, UnlabeledStrategy
|
27
|
-
from ...tools import (
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
19
|
+
from ...tools import (
|
20
|
+
format_time_acquired,
|
21
|
+
cut_images,
|
22
|
+
get_item_metadata,
|
23
|
+
get_all_images_in_path,
|
24
|
+
)
|
25
|
+
from .extensions import (
|
26
|
+
type_stac_extensions_dict,
|
27
|
+
SUPPORTED_EXTENSIONS,
|
28
|
+
)
|
29
|
+
from .extent import get_collection_extent
|
36
30
|
|
37
31
|
|
38
32
|
class STACGenerator:
|
33
|
+
"""
|
34
|
+
STAC generator class
|
35
|
+
"""
|
36
|
+
|
39
37
|
def __init__(
|
40
38
|
self,
|
41
39
|
image_format: str = "tiff",
|
@@ -63,7 +61,7 @@ class STACGenerator:
|
|
63
61
|
|
64
62
|
def generate_stac_metadata(
|
65
63
|
self,
|
66
|
-
|
64
|
+
stac_id: str,
|
67
65
|
description: str,
|
68
66
|
stac_dataframe: pd.DataFrame = None,
|
69
67
|
output_folder: str = "stac",
|
@@ -84,8 +82,8 @@ class STACGenerator:
|
|
84
82
|
raise ValueError("No STAC dataframe provided")
|
85
83
|
|
86
84
|
# Create an empty catalog
|
87
|
-
catalog = pystac.Catalog(id=
|
88
|
-
|
85
|
+
catalog = pystac.Catalog(id=stac_id, description=description, **kwargs)
|
86
|
+
|
89
87
|
# Add the collections to the catalog
|
90
88
|
collections = self._stac_dataframe.collection.unique()
|
91
89
|
for collection_path in collections:
|
@@ -97,7 +95,9 @@ class STACGenerator:
|
|
97
95
|
# Check there have been generate all the items from the images
|
98
96
|
items_count = 0
|
99
97
|
for collection in catalog.get_children():
|
100
|
-
items = list(
|
98
|
+
items = list(
|
99
|
+
set([item.id for item in collection.get_items(recursive=True)])
|
100
|
+
)
|
101
101
|
items_count += len(items)
|
102
102
|
if len(self._stac_dataframe) != items_count:
|
103
103
|
raise pystac.STACError(
|
@@ -117,13 +117,14 @@ class STACGenerator:
|
|
117
117
|
print(f"Catalog validation error: {e}")
|
118
118
|
return
|
119
119
|
|
120
|
-
def get_stac_dataframe(
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
120
|
+
def get_stac_dataframe(
|
121
|
+
self,
|
122
|
+
path: str,
|
123
|
+
collections: Optional[Union[str, dict]] = "source",
|
124
|
+
bands: Optional[dict] = None,
|
125
|
+
extensions: Optional[dict] = None,
|
126
|
+
sample: Optional[int] = None,
|
127
|
+
) -> pd.DataFrame:
|
127
128
|
"""
|
128
129
|
Get a dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
129
130
|
|
@@ -134,16 +135,20 @@ class STACGenerator:
|
|
134
135
|
"""
|
135
136
|
images = get_all_images_in_path(path, self._image_format)
|
136
137
|
if len(images) == 0:
|
137
|
-
raise ValueError(
|
138
|
-
|
139
|
-
|
138
|
+
raise ValueError(
|
139
|
+
"No images found in the given path with the given extension. Please check the path and the extension"
|
140
|
+
)
|
141
|
+
|
142
|
+
if self._assets_generator.type == "Extracted":
|
140
143
|
images = cut_images(images)
|
141
144
|
|
142
145
|
if sample:
|
143
146
|
try:
|
144
147
|
images = random.sample(images, sample)
|
145
148
|
except ValueError:
|
146
|
-
raise ValueError(
|
149
|
+
raise ValueError(
|
150
|
+
f"Sample size must be smaller than the number of images ({len(images)}). May be there are no images found in the given path with the given extension"
|
151
|
+
)
|
147
152
|
|
148
153
|
labels, ixs = self._labeling_strategy.get_images_labels(images)
|
149
154
|
bands_values = self._get_items_list_from_dict(labels, bands)
|
@@ -152,22 +157,32 @@ class STACGenerator:
|
|
152
157
|
if collections == "source":
|
153
158
|
# List of path with the same value repeated as many times as the number of images
|
154
159
|
collections_values = [join(path, "source") for i in range(len(images))]
|
155
|
-
elif collections ==
|
156
|
-
collections_values = [
|
160
|
+
elif collections == "*":
|
161
|
+
collections_values = [
|
162
|
+
join(path, basename(dirname(image))) for image in images
|
163
|
+
]
|
157
164
|
else:
|
158
165
|
try:
|
159
|
-
collections_values = [
|
166
|
+
collections_values = [
|
167
|
+
join(path, value)
|
168
|
+
for value in self._get_items_list_from_dict(labels, collections)
|
169
|
+
]
|
160
170
|
except TypeError:
|
161
|
-
raise pystac.STACError(
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
+
raise pystac.STACError(
|
172
|
+
"There is an error generating the collections. Please check the collections dictionary"
|
173
|
+
)
|
174
|
+
|
175
|
+
df = pd.DataFrame(
|
176
|
+
{
|
177
|
+
"image": images,
|
178
|
+
"label": labels,
|
179
|
+
"ix": ixs,
|
180
|
+
"collection": collections_values,
|
181
|
+
"extensions": extensions_values,
|
182
|
+
"bands": bands_values,
|
183
|
+
}
|
184
|
+
)
|
185
|
+
|
171
186
|
self._stac_dataframe = df
|
172
187
|
|
173
188
|
return df
|
@@ -182,7 +197,7 @@ class STACGenerator:
|
|
182
197
|
if not items:
|
183
198
|
# Create list of None with the same length as the labels list
|
184
199
|
return [None for _ in labels]
|
185
|
-
items_list =
|
200
|
+
items_list = []
|
186
201
|
for label in labels:
|
187
202
|
if label in items.keys():
|
188
203
|
items_list.append(items[label])
|
@@ -219,7 +234,7 @@ class STACGenerator:
|
|
219
234
|
# Return the collection
|
220
235
|
return collection
|
221
236
|
|
222
|
-
def create_stac_item(self, raster_path: str
|
237
|
+
def create_stac_item(self, raster_path: str) -> pystac.Item:
|
223
238
|
"""
|
224
239
|
Create a STAC item from a directory containing the raster files and the metadata.json file
|
225
240
|
|
@@ -250,12 +265,16 @@ class STACGenerator:
|
|
250
265
|
)
|
251
266
|
|
252
267
|
# Initialize pySTAC item parameters
|
253
|
-
params =
|
254
|
-
params["properties"] =
|
268
|
+
params = {}
|
269
|
+
params["properties"] = {}
|
255
270
|
|
256
271
|
# Obtain the date acquired
|
257
272
|
start_time, end_time = None, None
|
258
|
-
if
|
273
|
+
if (
|
274
|
+
metadata
|
275
|
+
and metadata["acquisition-date"]
|
276
|
+
and metadata["type"] not in ("dem", "DEM")
|
277
|
+
):
|
259
278
|
time_acquired = format_time_acquired(metadata["acquisition-date"])
|
260
279
|
else:
|
261
280
|
# Check if the type of the data is DEM
|
@@ -270,15 +289,15 @@ class STACGenerator:
|
|
270
289
|
time_acquired = datetime.strptime("2000-01-01", "%Y-%m-%d")
|
271
290
|
|
272
291
|
# Obtain the item ID. The approach depends on the item parser
|
273
|
-
|
292
|
+
item_id = self._item_parser.get_item_id(raster_path)
|
274
293
|
# Add the item ID to the dataframe, to be able to get it later
|
275
294
|
self._stac_dataframe.loc[
|
276
295
|
self._stac_dataframe["image"] == raster_path, "id"
|
277
|
-
] =
|
278
|
-
|
296
|
+
] = item_id
|
297
|
+
|
279
298
|
# Instantiate pystac item
|
280
299
|
item = pystac.Item(
|
281
|
-
id=
|
300
|
+
id=item_id, geometry=geom, bbox=bbox, datetime=time_acquired, **params
|
282
301
|
)
|
283
302
|
|
284
303
|
# Get the item info, from the raster path
|
@@ -319,6 +338,6 @@ class STACGenerator:
|
|
319
338
|
else:
|
320
339
|
extension_obj = self._extensions_dict[extension]
|
321
340
|
extension_obj.add_extension_to_object(asset, item_info)
|
322
|
-
item.set_self_href(join(dirname(raster_path), f"{
|
341
|
+
item.set_self_href(join(dirname(raster_path), f"{item_id}.json"))
|
323
342
|
item.make_asset_hrefs_relative()
|
324
343
|
return item
|
eotdl/datasets/__init__.py
CHANGED
eotdl/datasets/download.py
CHANGED
@@ -4,8 +4,8 @@ from tqdm import tqdm
|
|
4
4
|
|
5
5
|
from ..auth import with_auth
|
6
6
|
from .retrieve import retrieve_dataset, retrieve_dataset_files
|
7
|
-
from ..
|
8
|
-
from ..
|
7
|
+
from ..repos import FilesAPIRepo, DatasetsAPIRepo
|
8
|
+
from ..curation.stac import STACDataFrame
|
9
9
|
|
10
10
|
|
11
11
|
@with_auth
|
@@ -13,7 +13,7 @@ def download_dataset(
|
|
13
13
|
dataset_name,
|
14
14
|
version=None,
|
15
15
|
path=None,
|
16
|
-
logger=
|
16
|
+
logger=print,
|
17
17
|
assets=False,
|
18
18
|
force=False,
|
19
19
|
verbose=False,
|
@@ -45,20 +45,6 @@ def download_dataset(
|
|
45
45
|
if dataset["quality"] == 0:
|
46
46
|
if file:
|
47
47
|
raise NotImplementedError("Downloading a specific file is not implemented")
|
48
|
-
# files = [f for f in dataset["files"] if f["name"] == file]
|
49
|
-
# if not files:
|
50
|
-
# raise Exception(f"File {file} not found")
|
51
|
-
# if len(files) > 1:
|
52
|
-
# raise Exception(f"Multiple files with name {file} found")
|
53
|
-
# dst_path = download(
|
54
|
-
# dataset,
|
55
|
-
# dataset["id"],
|
56
|
-
# file,
|
57
|
-
# files[0]["checksum"],
|
58
|
-
# download_path,
|
59
|
-
# user,
|
60
|
-
# )
|
61
|
-
# return Outputs(dst_path=dst_path)
|
62
48
|
dataset_files = retrieve_dataset_files(dataset["id"], version)
|
63
49
|
repo = FilesAPIRepo()
|
64
50
|
for file in tqdm(dataset_files, disable=verbose, unit="file", position=0):
|
@@ -76,44 +62,45 @@ def download_dataset(
|
|
76
62
|
# if calculate_checksum(dst_path) != checksum:
|
77
63
|
# logger(f"Checksum for {file} does not match")
|
78
64
|
if verbose:
|
79
|
-
logger(
|
80
|
-
return download_path
|
65
|
+
logger("Done")
|
81
66
|
else:
|
82
|
-
raise NotImplementedError("Downloading a STAC dataset is not implemented")
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
67
|
+
# raise NotImplementedError("Downloading a STAC dataset is not implemented")
|
68
|
+
if verbose:
|
69
|
+
logger("Downloading STAC metadata...")
|
70
|
+
repo = DatasetsAPIRepo()
|
71
|
+
gdf, error = repo.download_stac(
|
72
|
+
dataset["id"],
|
73
|
+
user["id_token"],
|
74
|
+
)
|
75
|
+
if error:
|
76
|
+
raise Exception(error)
|
77
|
+
df = STACDataFrame(gdf)
|
78
|
+
# df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
|
79
|
+
df.to_stac(download_path)
|
80
|
+
# download assets
|
81
|
+
if assets:
|
82
|
+
if verbose:
|
83
|
+
logger("Downloading assets...")
|
84
|
+
repo = FilesAPIRepo()
|
85
|
+
df = df.dropna(subset=["assets"])
|
86
|
+
for row in tqdm(df.iterrows(), total=len(df)):
|
87
|
+
for k, v in row[1]["assets"].items():
|
88
|
+
href = v["href"]
|
89
|
+
_, filename = href.split("/download/")
|
90
|
+
# will overwrite assets with same name :(
|
91
|
+
repo.download_file_url(
|
92
|
+
href, filename, f"{download_path}/assets", user["id_token"]
|
93
|
+
)
|
94
|
+
else:
|
95
|
+
if verbose:
|
96
|
+
logger("To download assets, set assets=True or -a in the CLI.")
|
97
|
+
return download_path
|
111
98
|
|
112
99
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
100
|
+
@with_auth
|
101
|
+
def download_file_url(url, path, progress=True, logger=print, user=None):
|
102
|
+
repo = FilesAPIRepo()
|
103
|
+
_, filename = url.split("/download/")
|
104
|
+
return repo.download_file_url(
|
105
|
+
url, filename, f"{path}/assets", user["id_token"], progress
|
106
|
+
)
|