eotdl 2024.10.7__py3-none-any.whl → 2025.3.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/__init__.py +1 -1
- eotdl/access/search.py +0 -2
- eotdl/access/sentinelhub/parameters.py +1 -1
- eotdl/cli.py +2 -2
- eotdl/commands/datasets.py +28 -31
- eotdl/commands/models.py +27 -30
- eotdl/commands/stac.py +57 -0
- eotdl/curation/__init__.py +0 -8
- eotdl/curation/stac/__init__.py +1 -8
- eotdl/curation/stac/api.py +58 -0
- eotdl/curation/stac/stac.py +31 -341
- eotdl/datasets/__init__.py +1 -1
- eotdl/datasets/ingest.py +28 -159
- eotdl/datasets/retrieve.py +0 -9
- eotdl/datasets/stage.py +64 -0
- eotdl/files/__init__.py +0 -2
- eotdl/files/ingest.bck +178 -0
- eotdl/files/ingest.py +229 -164
- eotdl/{datasets → files}/metadata.py +16 -17
- eotdl/models/__init__.py +1 -1
- eotdl/models/ingest.py +28 -159
- eotdl/models/stage.py +60 -0
- eotdl/repos/APIRepo.py +1 -1
- eotdl/repos/DatasetsAPIRepo.py +56 -43
- eotdl/repos/FilesAPIRepo.py +260 -167
- eotdl/repos/STACAPIRepo.py +40 -0
- eotdl/repos/__init__.py +1 -0
- eotdl/tools/geo_utils.py +7 -2
- {eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/METADATA +5 -4
- eotdl-2025.3.25.dist-info/RECORD +65 -0
- {eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/WHEEL +1 -1
- eotdl/curation/stac/assets.py +0 -110
- eotdl/curation/stac/dataframe.py +0 -172
- eotdl/curation/stac/dataframe_bck.py +0 -253
- eotdl/curation/stac/dataframe_labeling.py +0 -63
- eotdl/curation/stac/extensions/__init__.py +0 -23
- eotdl/curation/stac/extensions/base.py +0 -30
- eotdl/curation/stac/extensions/dem.py +0 -18
- eotdl/curation/stac/extensions/eo.py +0 -117
- eotdl/curation/stac/extensions/label/__init__.py +0 -7
- eotdl/curation/stac/extensions/label/base.py +0 -136
- eotdl/curation/stac/extensions/label/image_name_labeler.py +0 -203
- eotdl/curation/stac/extensions/label/scaneo.py +0 -219
- eotdl/curation/stac/extensions/ml_dataset.py +0 -648
- eotdl/curation/stac/extensions/projection.py +0 -44
- eotdl/curation/stac/extensions/raster.py +0 -53
- eotdl/curation/stac/extensions/sar.py +0 -55
- eotdl/curation/stac/extent.py +0 -158
- eotdl/curation/stac/parsers.py +0 -61
- eotdl/datasets/download.py +0 -104
- eotdl/files/list_files.py +0 -13
- eotdl/models/download.py +0 -101
- eotdl/models/metadata.py +0 -43
- eotdl/wrappers/utils.py +0 -35
- eotdl-2024.10.7.dist-info/RECORD +0 -82
- {eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/entry_points.txt +0 -0
@@ -1,53 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for raster STAC extensions object
|
3
|
-
"""
|
4
|
-
|
5
|
-
from typing import Union, Optional
|
6
|
-
|
7
|
-
import pystac
|
8
|
-
import rasterio
|
9
|
-
import pandas as pd
|
10
|
-
|
11
|
-
from pystac.extensions.raster import RasterExtension, RasterBand
|
12
|
-
from .base import STACExtensionObject
|
13
|
-
|
14
|
-
|
15
|
-
class RasterExtensionObject(STACExtensionObject):
|
16
|
-
"""
|
17
|
-
Raster extension object
|
18
|
-
"""
|
19
|
-
def __init__(self) -> None:
|
20
|
-
super().__init__()
|
21
|
-
|
22
|
-
def add_extension_to_object(
|
23
|
-
self,
|
24
|
-
obj: Union[pystac.Item, pystac.Asset],
|
25
|
-
obj_info: Optional[pd.DataFrame] = None,
|
26
|
-
) -> Union[pystac.Item, pystac.Asset]:
|
27
|
-
"""
|
28
|
-
Add the extension to the given object
|
29
|
-
|
30
|
-
:param obj: object to add the extension
|
31
|
-
:param obj_info: object info from the STACDataFrame
|
32
|
-
"""
|
33
|
-
if not isinstance(obj, pystac.Asset):
|
34
|
-
return obj
|
35
|
-
else:
|
36
|
-
raster_ext = RasterExtension.ext(obj, add_if_missing=True)
|
37
|
-
src = rasterio.open(obj.href)
|
38
|
-
bands = []
|
39
|
-
for band in src.indexes:
|
40
|
-
bands.append(
|
41
|
-
RasterBand.create(
|
42
|
-
nodata=src.nodatavals[band - 1],
|
43
|
-
data_type=src.dtypes[band - 1],
|
44
|
-
spatial_resolution=src.res,
|
45
|
-
)
|
46
|
-
if src.nodatavals
|
47
|
-
else RasterBand.create(
|
48
|
-
data_type=src.dtypes[band - 1], spatial_resolution=src.res
|
49
|
-
)
|
50
|
-
)
|
51
|
-
raster_ext.apply(bands=bands)
|
52
|
-
|
53
|
-
return obj
|
@@ -1,55 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for SAR STAC extensions object
|
3
|
-
"""
|
4
|
-
|
5
|
-
from typing import Optional, Union
|
6
|
-
|
7
|
-
import pystac
|
8
|
-
import pandas as pd
|
9
|
-
|
10
|
-
from pystac.extensions.sar import SarExtension
|
11
|
-
from pystac.extensions.sar import FrequencyBand, Polarization
|
12
|
-
|
13
|
-
from .base import STACExtensionObject
|
14
|
-
|
15
|
-
|
16
|
-
class SarExtensionObject(STACExtensionObject):
|
17
|
-
"""
|
18
|
-
SAR extension object
|
19
|
-
"""
|
20
|
-
def __init__(self) -> None:
|
21
|
-
super().__init__()
|
22
|
-
self.polarizations = [Polarization.VV, Polarization.VH]
|
23
|
-
self.polarizations_dict = {"VV": Polarization.VV, "VH": Polarization.VH}
|
24
|
-
|
25
|
-
def add_extension_to_object(
|
26
|
-
self,
|
27
|
-
obj: Union[pystac.Item, pystac.Asset],
|
28
|
-
obj_info: Optional[pd.DataFrame] = None,
|
29
|
-
) -> Union[pystac.Item, pystac.Asset]:
|
30
|
-
"""
|
31
|
-
Add the extension to the given object
|
32
|
-
|
33
|
-
:param obj: object to add the extension
|
34
|
-
:param obj_info: object info from the STACDataFrame
|
35
|
-
"""
|
36
|
-
# Add SAR extension to the item
|
37
|
-
sar_ext = SarExtension.ext(obj, add_if_missing=True)
|
38
|
-
if isinstance(obj, pystac.Item) or (
|
39
|
-
isinstance(obj, pystac.Asset)
|
40
|
-
and obj.title not in self.polarizations_dict.keys()
|
41
|
-
):
|
42
|
-
polarizations = self.polarizations
|
43
|
-
elif (
|
44
|
-
isinstance(obj, pystac.Asset)
|
45
|
-
and obj.title in self.polarizations_dict.keys()
|
46
|
-
):
|
47
|
-
polarizations = [self.polarizations_dict[obj.title]]
|
48
|
-
sar_ext.apply(
|
49
|
-
instrument_mode="EW",
|
50
|
-
polarizations=polarizations,
|
51
|
-
frequency_band=FrequencyBand.C,
|
52
|
-
product_type="GRD",
|
53
|
-
)
|
54
|
-
|
55
|
-
return obj
|
eotdl/curation/stac/extent.py
DELETED
@@ -1,158 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for STAC extent
|
3
|
-
"""
|
4
|
-
|
5
|
-
from datetime import datetime
|
6
|
-
from typing import List
|
7
|
-
|
8
|
-
import pystac
|
9
|
-
|
10
|
-
import rasterio
|
11
|
-
|
12
|
-
from ...tools import get_item_metadata
|
13
|
-
|
14
|
-
|
15
|
-
def get_dem_temporal_interval() -> pystac.TemporalExtent:
|
16
|
-
"""
|
17
|
-
Get a temporal interval for DEM data
|
18
|
-
"""
|
19
|
-
min_date = datetime.strptime("2011-01-01", "%Y-%m-%d")
|
20
|
-
max_date = datetime.strptime("2015-01-07", "%Y-%m-%d")
|
21
|
-
|
22
|
-
return pystac.TemporalExtent([(min_date, max_date)])
|
23
|
-
|
24
|
-
|
25
|
-
def get_unknow_temporal_interval() -> pystac.TemporalExtent:
|
26
|
-
"""
|
27
|
-
Get an unknown temporal interval
|
28
|
-
"""
|
29
|
-
min_date = datetime.strptime("2000-01-01", "%Y-%m-%d")
|
30
|
-
max_date = datetime.strptime("2023-12-31", "%Y-%m-%d")
|
31
|
-
|
32
|
-
return pystac.TemporalExtent([(min_date, max_date)])
|
33
|
-
|
34
|
-
|
35
|
-
def get_unknow_extent() -> pystac.Extent:
|
36
|
-
"""
|
37
|
-
Get an unknown extent
|
38
|
-
"""
|
39
|
-
return pystac.Extent(
|
40
|
-
spatial=pystac.SpatialExtent([[0, 0, 0, 0]]),
|
41
|
-
temporal=pystac.TemporalExtent(
|
42
|
-
[
|
43
|
-
(
|
44
|
-
datetime.strptime("2000-01-01", "%Y-%m-%d"),
|
45
|
-
datetime.strptime("2023-12-31", "%Y-%m-%d"),
|
46
|
-
)
|
47
|
-
]
|
48
|
-
),
|
49
|
-
)
|
50
|
-
|
51
|
-
|
52
|
-
def get_collection_extent(rasters: List[str]) -> pystac.Extent:
|
53
|
-
"""
|
54
|
-
Get the extent of a collection
|
55
|
-
|
56
|
-
:param rasters: list of rasters
|
57
|
-
"""
|
58
|
-
# Get the spatial extent of the collection
|
59
|
-
spatial_extent = get_collection_spatial_extent(rasters)
|
60
|
-
# Get the temporal interval of the collection
|
61
|
-
temporal_interval = get_collection_temporal_interval(rasters)
|
62
|
-
# Create the Extent object
|
63
|
-
extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_interval)
|
64
|
-
|
65
|
-
return extent
|
66
|
-
|
67
|
-
|
68
|
-
def get_collection_spatial_extent(rasters: List[str]) -> pystac.SpatialExtent:
|
69
|
-
"""
|
70
|
-
Get the spatial extent of a collection
|
71
|
-
|
72
|
-
:param path: path to the directory
|
73
|
-
"""
|
74
|
-
# Get the bounding boxes of all the given rasters
|
75
|
-
bboxes = []
|
76
|
-
for raster in rasters:
|
77
|
-
with rasterio.open(raster) as ds:
|
78
|
-
bounds = ds.bounds
|
79
|
-
dst_crs = "EPSG:4326"
|
80
|
-
try:
|
81
|
-
left, bottom, right, top = rasterio.warp.transform_bounds(
|
82
|
-
ds.crs, dst_crs, *bounds
|
83
|
-
)
|
84
|
-
bbox = [left, bottom, right, top]
|
85
|
-
except rasterio.errors.CRSError:
|
86
|
-
spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
|
87
|
-
return spatial_extent
|
88
|
-
bboxes.append(bbox)
|
89
|
-
# Get the minimum and maximum values of the bounding boxes
|
90
|
-
try:
|
91
|
-
left = min(bbox[0] for bbox in bboxes)
|
92
|
-
bottom = min(bbox[1] for bbox in bboxes)
|
93
|
-
right = max(bbox[2] for bbox in bboxes)
|
94
|
-
top = max(bbox[3] for bbox in bboxes)
|
95
|
-
spatial_extent = pystac.SpatialExtent([[left, bottom, right, top]])
|
96
|
-
except ValueError:
|
97
|
-
spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
|
98
|
-
finally:
|
99
|
-
return spatial_extent
|
100
|
-
|
101
|
-
|
102
|
-
def get_collection_temporal_interval(rasters: List[str]) -> pystac.TemporalExtent:
|
103
|
-
"""
|
104
|
-
Get the temporal interval of a collection
|
105
|
-
|
106
|
-
:param path: path to the directory
|
107
|
-
"""
|
108
|
-
# Get all the metadata.json files in the directory of all the given rasters
|
109
|
-
metadata_jsons = []
|
110
|
-
for raster in rasters:
|
111
|
-
metadata_json = get_item_metadata(raster)
|
112
|
-
if metadata_json:
|
113
|
-
metadata_jsons.append(metadata_json)
|
114
|
-
|
115
|
-
if not metadata_jsons:
|
116
|
-
return (
|
117
|
-
get_unknow_temporal_interval()
|
118
|
-
) # If there is no metadata, set a generic temporal interval
|
119
|
-
|
120
|
-
# Get the temporal interval of every metadata.json file and the type of the data
|
121
|
-
data_types = []
|
122
|
-
temporal_intervals = []
|
123
|
-
for metadata in metadata_jsons:
|
124
|
-
# Append the temporal interval to the list as a datetime object
|
125
|
-
temporal_intervals.append(metadata["acquisition-date"]) if metadata[
|
126
|
-
"acquisition-date"
|
127
|
-
] else None
|
128
|
-
# Append the data type to the list
|
129
|
-
data_types.append(metadata["type"]) if metadata["type"] else None
|
130
|
-
|
131
|
-
if temporal_intervals:
|
132
|
-
min_date, max_date = None, None
|
133
|
-
try:
|
134
|
-
# Get the minimum and maximum values of the temporal intervals
|
135
|
-
min_date = min(
|
136
|
-
datetime.strptime(interval, "%Y-%m-%d")
|
137
|
-
for interval in temporal_intervals
|
138
|
-
)
|
139
|
-
max_date = max(
|
140
|
-
datetime.strptime(interval, "%Y-%m-%d")
|
141
|
-
for interval in temporal_intervals
|
142
|
-
)
|
143
|
-
except ValueError:
|
144
|
-
min_date = datetime.strptime("2000-01-01", "%Y-%m-%d")
|
145
|
-
max_date = datetime.strptime("2023-12-31", "%Y-%m-%d")
|
146
|
-
finally:
|
147
|
-
# Create the temporal interval
|
148
|
-
return pystac.TemporalExtent([(min_date, max_date)])
|
149
|
-
else:
|
150
|
-
# Check if the collection is composed by DEM data. If not, set a generic temporal interval
|
151
|
-
if (
|
152
|
-
set(data_types) == {"dem"}
|
153
|
-
or set(data_types) == {"DEM"}
|
154
|
-
or set(data_types) == {"dem", "DEM"}
|
155
|
-
):
|
156
|
-
return get_dem_temporal_interval()
|
157
|
-
else:
|
158
|
-
return get_unknow_temporal_interval()
|
eotdl/curation/stac/parsers.py
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Module for STAC parsers
|
3
|
-
"""
|
4
|
-
|
5
|
-
from os.path import dirname, basename
|
6
|
-
|
7
|
-
|
8
|
-
class STACIdParser:
|
9
|
-
"""
|
10
|
-
STAC ID parser base class
|
11
|
-
"""
|
12
|
-
def get_item_id(self, raster_path: str):
|
13
|
-
"""
|
14
|
-
Get the ID of the STAC Item from the given raster path
|
15
|
-
|
16
|
-
:param raster_path: path to the raster file
|
17
|
-
"""
|
18
|
-
return
|
19
|
-
|
20
|
-
|
21
|
-
class StructuredParser(STACIdParser):
|
22
|
-
"""
|
23
|
-
Structured STAC ID parser
|
24
|
-
"""
|
25
|
-
|
26
|
-
def __init__(self) -> None:
|
27
|
-
super().__init__()
|
28
|
-
|
29
|
-
def get_item_id(self, raster_path: str):
|
30
|
-
"""
|
31
|
-
Get the ID of the STAC Item from the given raster path.
|
32
|
-
This function assumes that the project given by the user is structured,
|
33
|
-
meaning that the raster files are stored in a folder with the same name
|
34
|
-
|
35
|
-
:param raster_path: path to the raster file
|
36
|
-
"""
|
37
|
-
tiff_dir_path = dirname(raster_path)
|
38
|
-
item_id = tiff_dir_path.split("/")[-1]
|
39
|
-
|
40
|
-
return item_id
|
41
|
-
|
42
|
-
|
43
|
-
class UnestructuredParser(STACIdParser):
|
44
|
-
"""
|
45
|
-
Unstructured STAC ID parser
|
46
|
-
"""
|
47
|
-
|
48
|
-
def __init__(self) -> None:
|
49
|
-
super().__init__()
|
50
|
-
|
51
|
-
def get_item_id(self, raster_path: str):
|
52
|
-
"""
|
53
|
-
Get the ID of the STAC Item from the given raster path.
|
54
|
-
This function assumes that the project given by the user is unstructured,
|
55
|
-
meaning that the raster files are stored in the root folder or in a folder
|
56
|
-
|
57
|
-
:param raster_path: path to the raster file
|
58
|
-
"""
|
59
|
-
item_id = basename(raster_path).split(".")[0]
|
60
|
-
|
61
|
-
return item_id
|
eotdl/datasets/download.py
DELETED
@@ -1,104 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
from pathlib import Path
|
3
|
-
from tqdm import tqdm
|
4
|
-
|
5
|
-
from ..auth import with_auth
|
6
|
-
from .retrieve import retrieve_dataset, retrieve_dataset_files
|
7
|
-
from ..repos import FilesAPIRepo, DatasetsAPIRepo
|
8
|
-
from ..curation.stac import STACDataFrame
|
9
|
-
from .metadata import generate_metadata
|
10
|
-
|
11
|
-
|
12
|
-
@with_auth
|
13
|
-
def download_dataset(
|
14
|
-
dataset_name,
|
15
|
-
version=None,
|
16
|
-
path=None,
|
17
|
-
logger=print,
|
18
|
-
assets=False,
|
19
|
-
force=False,
|
20
|
-
verbose=False,
|
21
|
-
user=None,
|
22
|
-
file=None,
|
23
|
-
):
|
24
|
-
dataset = retrieve_dataset(dataset_name)
|
25
|
-
if version is None:
|
26
|
-
version = sorted(dataset["versions"], key=lambda v: v["version_id"])[-1][
|
27
|
-
"version_id"
|
28
|
-
]
|
29
|
-
else:
|
30
|
-
assert version in [
|
31
|
-
v["version_id"] for v in dataset["versions"]
|
32
|
-
], f"Version {version} not found"
|
33
|
-
download_base_path = os.getenv(
|
34
|
-
"EOTDL_DOWNLOAD_PATH", str(Path.home()) + "/.cache/eotdl/datasets"
|
35
|
-
)
|
36
|
-
if path is None:
|
37
|
-
download_path = download_base_path + "/" + dataset_name + "/v" + str(version)
|
38
|
-
else:
|
39
|
-
download_path = path + "/" + dataset_name + "/v" + str(version)
|
40
|
-
# check if dataset already exists
|
41
|
-
if os.path.exists(download_path) and not force:
|
42
|
-
os.makedirs(download_path, exist_ok=True)
|
43
|
-
raise Exception(
|
44
|
-
f"Dataset `{dataset['name']} v{str(version)}` already exists at {download_path}. To force download, use force=True or -f in the CLI."
|
45
|
-
)
|
46
|
-
if dataset["quality"] == 0:
|
47
|
-
if file:
|
48
|
-
raise NotImplementedError("Downloading a specific file is not implemented")
|
49
|
-
dataset_files = retrieve_dataset_files(dataset["id"], version)
|
50
|
-
repo = FilesAPIRepo()
|
51
|
-
for file in tqdm(dataset_files, disable=verbose, unit="file", position=0):
|
52
|
-
filename, file_version = file["filename"], file["version"]
|
53
|
-
if verbose:
|
54
|
-
logger(f"Downloading {file['filename']}...")
|
55
|
-
dst_path = repo.download_file(
|
56
|
-
dataset["id"],
|
57
|
-
filename,
|
58
|
-
user,
|
59
|
-
download_path,
|
60
|
-
file_version,
|
61
|
-
progress=True,
|
62
|
-
)
|
63
|
-
if verbose:
|
64
|
-
logger("Generating README.md ...")
|
65
|
-
generate_metadata(download_path, dataset)
|
66
|
-
else:
|
67
|
-
if verbose:
|
68
|
-
logger("Downloading STAC metadata...")
|
69
|
-
repo = DatasetsAPIRepo()
|
70
|
-
gdf, error = repo.download_stac(
|
71
|
-
dataset["id"],
|
72
|
-
user,
|
73
|
-
)
|
74
|
-
if error:
|
75
|
-
raise Exception(error)
|
76
|
-
df = STACDataFrame(gdf)
|
77
|
-
# df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
|
78
|
-
df.to_stac(download_path)
|
79
|
-
# download assets
|
80
|
-
if assets:
|
81
|
-
if verbose:
|
82
|
-
logger("Downloading assets...")
|
83
|
-
repo = FilesAPIRepo()
|
84
|
-
df = df.dropna(subset=["assets"])
|
85
|
-
for row in tqdm(df.iterrows(), total=len(df)):
|
86
|
-
for k, v in row[1]["assets"].items():
|
87
|
-
href = v["href"]
|
88
|
-
_, filename = href.split("/download/")
|
89
|
-
# will overwrite assets with same name :(
|
90
|
-
repo.download_file_url(
|
91
|
-
href, filename, f"{download_path}/assets", user
|
92
|
-
)
|
93
|
-
else:
|
94
|
-
logger("To download assets, set assets=True or -a in the CLI.")
|
95
|
-
if verbose:
|
96
|
-
logger("Done")
|
97
|
-
return download_path
|
98
|
-
|
99
|
-
|
100
|
-
@with_auth
|
101
|
-
def download_file_url(url, path, progress=True, logger=print, user=None):
|
102
|
-
repo = FilesAPIRepo()
|
103
|
-
_, filename = url.split("/download/")
|
104
|
-
return repo.download_file_url(url, filename, f"{path}/assets", user, progress)
|
eotdl/files/list_files.py
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
from ..datasets import retrieve_dataset, retrieve_dataset_files
|
2
|
-
from ..models import retrieve_model, retrieve_model_files
|
3
|
-
|
4
|
-
def list_files(dataset_or_model_name, version=1):
|
5
|
-
try:
|
6
|
-
dataset = retrieve_dataset(dataset_or_model_name)
|
7
|
-
return retrieve_dataset_files(dataset['id'], version)
|
8
|
-
except Exception as e:
|
9
|
-
try:
|
10
|
-
model = retrieve_model(dataset_or_model_name)
|
11
|
-
return retrieve_model_files(model['id'], version)
|
12
|
-
except Exception as e:
|
13
|
-
raise Exception(f"Dataset or model {dataset_or_model_name} not found.")
|
eotdl/models/download.py
DELETED
@@ -1,101 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
from pathlib import Path
|
3
|
-
from tqdm import tqdm
|
4
|
-
|
5
|
-
from ..auth import with_auth
|
6
|
-
from .retrieve import retrieve_model, retrieve_model_files
|
7
|
-
from ..shared import calculate_checksum
|
8
|
-
from ..repos import FilesAPIRepo, ModelsAPIRepo
|
9
|
-
from .metadata import generate_metadata
|
10
|
-
from ..curation.stac import STACDataFrame
|
11
|
-
|
12
|
-
|
13
|
-
@with_auth
|
14
|
-
def download_model(
|
15
|
-
model_name,
|
16
|
-
version=None,
|
17
|
-
path=None,
|
18
|
-
logger=None,
|
19
|
-
assets=False,
|
20
|
-
force=False,
|
21
|
-
verbose=False,
|
22
|
-
user=None,
|
23
|
-
file=None,
|
24
|
-
):
|
25
|
-
model = retrieve_model(model_name)
|
26
|
-
if version is None:
|
27
|
-
version = sorted(model["versions"], key=lambda v: v["version_id"])[-1][
|
28
|
-
"version_id"
|
29
|
-
]
|
30
|
-
else:
|
31
|
-
assert version in [
|
32
|
-
v["version_id"] for v in model["versions"]
|
33
|
-
], f"Version {version} not found"
|
34
|
-
download_base_path = os.getenv(
|
35
|
-
"EOTDL_DOWNLOAD_PATH", str(Path.home()) + "/.cache/eotdl/models"
|
36
|
-
)
|
37
|
-
if path is None:
|
38
|
-
download_path = download_base_path + "/" + model_name + "/v" + str(version)
|
39
|
-
else:
|
40
|
-
download_path = path + "/" + model_name + "/v" + str(version)
|
41
|
-
# check if model already exists
|
42
|
-
if os.path.exists(download_path) and not force:
|
43
|
-
os.makedirs(download_path, exist_ok=True)
|
44
|
-
raise Exception(
|
45
|
-
f"model `{model['name']} v{str(version)}` already exists at {download_path}. To force download, use force=True or -f in the CLI."
|
46
|
-
)
|
47
|
-
if model["quality"] == 0:
|
48
|
-
if file:
|
49
|
-
raise NotImplementedError("Downloading a specific file is not implemented")
|
50
|
-
model_files = retrieve_model_files(model["id"], version)
|
51
|
-
repo = FilesAPIRepo()
|
52
|
-
for file in tqdm(model_files, disable=verbose, unit="file"):
|
53
|
-
filename, file_version = file["filename"], file["version"]
|
54
|
-
if verbose:
|
55
|
-
logger(f"Downloading {file['filename']}...")
|
56
|
-
dst_path = repo.download_file(
|
57
|
-
model["id"],
|
58
|
-
filename,
|
59
|
-
user,
|
60
|
-
download_path,
|
61
|
-
file_version,
|
62
|
-
endpoint="models",
|
63
|
-
)
|
64
|
-
if verbose:
|
65
|
-
logger("Generating README.md ...")
|
66
|
-
generate_metadata(download_path, model)
|
67
|
-
else:
|
68
|
-
if verbose:
|
69
|
-
logger("Downloading STAC metadata...")
|
70
|
-
repo = ModelsAPIRepo()
|
71
|
-
gdf, error = repo.download_stac(
|
72
|
-
model["id"],
|
73
|
-
user,
|
74
|
-
)
|
75
|
-
if error:
|
76
|
-
raise Exception(error)
|
77
|
-
print(gdf)
|
78
|
-
df = STACDataFrame(gdf)
|
79
|
-
# df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
|
80
|
-
df.to_stac(download_path)
|
81
|
-
print("----")
|
82
|
-
print(df)
|
83
|
-
# download assets
|
84
|
-
if assets:
|
85
|
-
if verbose:
|
86
|
-
logger("Downloading assets...")
|
87
|
-
repo = FilesAPIRepo()
|
88
|
-
df = df.dropna(subset=["assets"])
|
89
|
-
for row in tqdm(df.iterrows(), total=len(df)):
|
90
|
-
for k, v in row[1]["assets"].items():
|
91
|
-
href = v["href"]
|
92
|
-
_, filename = href.split("/download/")
|
93
|
-
# will overwrite assets with same name :(
|
94
|
-
repo.download_file_url(
|
95
|
-
href, filename, f"{download_path}/assets", user
|
96
|
-
)
|
97
|
-
else:
|
98
|
-
logger("To download assets, set assets=True or -a in the CLI.")
|
99
|
-
if verbose:
|
100
|
-
logger("Done")
|
101
|
-
return download_path
|
eotdl/models/metadata.py
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
from pydantic import BaseModel, validator
|
2
|
-
from typing import List, Optional
|
3
|
-
from pathlib import Path
|
4
|
-
|
5
|
-
|
6
|
-
class Metadata(BaseModel):
|
7
|
-
authors: List[str]
|
8
|
-
license: str
|
9
|
-
source: str
|
10
|
-
name: str
|
11
|
-
thumbnail: Optional[str] = ""
|
12
|
-
|
13
|
-
# validate source is a URL
|
14
|
-
@validator("source")
|
15
|
-
def source_is_url(cls, v):
|
16
|
-
if not v.startswith("http") and not v.startswith("https"):
|
17
|
-
raise ValueError("source must be a URL")
|
18
|
-
return v
|
19
|
-
|
20
|
-
# validate thumbnail is a url
|
21
|
-
@validator("thumbnail")
|
22
|
-
def thumbnail_is_url(cls, v):
|
23
|
-
if not v.startswith("http") and not v.startswith("https"):
|
24
|
-
raise ValueError("thumbnail must be a URL")
|
25
|
-
return v
|
26
|
-
|
27
|
-
|
28
|
-
def generate_metadata(download_path, model):
|
29
|
-
with open(download_path + "/README.md", "w") as f:
|
30
|
-
f.write("---\n")
|
31
|
-
f.write(f"name: {model['name']}\n")
|
32
|
-
f.write(f"license: {model['license']}\n")
|
33
|
-
f.write(f"source: {model['source']}\n")
|
34
|
-
f.write(f"thumbnail: {model['thumbnail']}\n")
|
35
|
-
f.write(f"authors:\n")
|
36
|
-
for author in model["authors"]:
|
37
|
-
f.write(f" - {author}\n")
|
38
|
-
f.write("---\n")
|
39
|
-
f.write(model["description"])
|
40
|
-
# remove metadata.yml if exists
|
41
|
-
if Path(download_path + "/metadata.yml").exists():
|
42
|
-
Path(download_path + "/metadata.yml").unlink()
|
43
|
-
return download_path + "/README.md"
|
eotdl/wrappers/utils.py
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
from ..curation.stac import STACDataFrame
|
2
|
-
|
3
|
-
def download_model(model_name, dst_path, version, force=False, download=True):
|
4
|
-
# check if model already downloaded
|
5
|
-
version = 1 if version is None else version
|
6
|
-
download_path = dst_path + "/" + model_name + "/v" + str(version)
|
7
|
-
if os.path.exists(download_path) and not force:
|
8
|
-
df = STACDataFrame.from_stac_file(download_path + f"/{model_name}/catalog.json")
|
9
|
-
return download_path, df
|
10
|
-
# check model exists
|
11
|
-
model, error = retrieve_model(model_name)
|
12
|
-
if error:
|
13
|
-
raise Exception(error)
|
14
|
-
if model["quality"] < 2:
|
15
|
-
raise Exception("Only Q2+ models are supported")
|
16
|
-
# check version exist
|
17
|
-
assert version in [
|
18
|
-
v["version_id"] for v in model["versions"]
|
19
|
-
], f"Version {version} not found"
|
20
|
-
# download model files
|
21
|
-
gdf, error = retrieve_model_stac(model["id"], version)
|
22
|
-
if error:
|
23
|
-
raise Exception(error)
|
24
|
-
df = STACDataFrame(gdf)
|
25
|
-
if not download:
|
26
|
-
return download_path, df
|
27
|
-
os.makedirs(download_path, exist_ok=True)
|
28
|
-
df.to_stac(download_path)
|
29
|
-
df = df.dropna(subset=["assets"])
|
30
|
-
for row in df.iterrows():
|
31
|
-
for k, v in row[1]["assets"].items():
|
32
|
-
href = v["href"]
|
33
|
-
_, filename = href.split("/download/")
|
34
|
-
download_file_url(href, filename, f"{download_path}/assets")
|
35
|
-
return download_path, df
|