eotdl 2025.3.25__py3-none-any.whl → 2025.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/__init__.py +1 -1
- eotdl/access/__init__.py +13 -3
- eotdl/access/download.py +47 -13
- eotdl/access/search.py +33 -5
- eotdl/access/sentinelhub/__init__.py +6 -2
- eotdl/access/sentinelhub/client.py +9 -8
- eotdl/access/sentinelhub/evalscripts.py +266 -0
- eotdl/access/sentinelhub/parameters.py +101 -23
- eotdl/access/sentinelhub/utils.py +55 -20
- eotdl/curation/stac/stac.py +1 -1
- eotdl/datasets/__init__.py +1 -1
- eotdl/datasets/ingest.py +9 -2
- eotdl/datasets/stage.py +5 -5
- eotdl/files/ingest.py +17 -11
- eotdl/models/download.py +101 -0
- eotdl/models/ingest.py +11 -3
- eotdl/models/stage.py +4 -1
- eotdl/repos/FilesAPIRepo.py +1 -1
- eotdl/repos/ModelsAPIRepo.py +50 -42
- eotdl/tools/time_utils.py +3 -3
- {eotdl-2025.3.25.dist-info → eotdl-2025.4.2.dist-info}/METADATA +19 -32
- {eotdl-2025.3.25.dist-info → eotdl-2025.4.2.dist-info}/RECORD +25 -24
- {eotdl-2025.3.25.dist-info → eotdl-2025.4.2.dist-info}/WHEEL +1 -1
- eotdl-2025.4.2.dist-info/entry_points.txt +2 -0
- eotdl-2025.3.25.dist-info/entry_points.txt +0 -3
@@ -2,15 +2,27 @@
|
|
2
2
|
Utils
|
3
3
|
"""
|
4
4
|
|
5
|
-
from sentinelhub import DataCollection, MosaickingOrder
|
6
|
-
|
5
|
+
from sentinelhub import DataCollection, MosaickingOrder, MimeType
|
7
6
|
from .evalscripts import EvalScripts
|
8
7
|
|
9
8
|
|
9
|
+
class OUTPUT_FORMAT:
|
10
|
+
TIFF = MimeType.TIFF
|
11
|
+
JPG = MimeType.JPG
|
12
|
+
PNG = MimeType.PNG
|
13
|
+
|
14
|
+
|
10
15
|
class SHParameters:
|
11
16
|
"""
|
12
17
|
Sentinel Hub Parameters base class
|
13
18
|
"""
|
19
|
+
|
20
|
+
MAX_CLOUD_COVERAGE: float = None
|
21
|
+
FIELDS = None
|
22
|
+
MOSAICKING_ORDER = MosaickingOrder.MOST_RECENT
|
23
|
+
EVALSCRIPT = None
|
24
|
+
OUTPUT_FORMAT = MimeType.TIFF
|
25
|
+
|
14
26
|
def __init__(self):
|
15
27
|
pass
|
16
28
|
|
@@ -19,8 +31,8 @@ class SHS2L2AParameters(SHParameters):
|
|
19
31
|
"""
|
20
32
|
Sentinel-2-L2A parameters
|
21
33
|
"""
|
34
|
+
|
22
35
|
DATA_COLLECTION = DataCollection.SENTINEL2_L2A
|
23
|
-
RESOLUTION = 10
|
24
36
|
MOSAICKING_ORDER = MosaickingOrder.LEAST_CC
|
25
37
|
EVALSCRIPT = EvalScripts.SENTINEL_2_L2A
|
26
38
|
FIELDS = {
|
@@ -28,14 +40,17 @@ class SHS2L2AParameters(SHParameters):
|
|
28
40
|
"exclude": [],
|
29
41
|
}
|
30
42
|
FILTER = None
|
43
|
+
RESOLUTION = 10
|
44
|
+
BASE_URL = "https://services.sentinel-hub.com"
|
45
|
+
CLOUD_COVERAGE = True
|
31
46
|
|
32
47
|
|
33
48
|
class SHS2L1CParameters(SHParameters):
|
34
49
|
"""
|
35
50
|
Sentinel-2-L1C parameters
|
36
51
|
"""
|
52
|
+
|
37
53
|
DATA_COLLECTION = DataCollection.SENTINEL2_L1C
|
38
|
-
RESOLUTION = 10
|
39
54
|
MOSAICKING_ORDER = MosaickingOrder.LEAST_CC
|
40
55
|
EVALSCRIPT = EvalScripts.SENTINEL_2_L1C
|
41
56
|
FIELDS = {
|
@@ -43,47 +58,110 @@ class SHS2L1CParameters(SHParameters):
|
|
43
58
|
"exclude": [],
|
44
59
|
}
|
45
60
|
FILTER = None
|
61
|
+
RESOLUTION = 10
|
62
|
+
BASE_URL = "https://services.sentinel-hub.com"
|
63
|
+
CLOUD_COVERAGE = True
|
64
|
+
|
46
65
|
|
47
66
|
class SHS1Parameters(SHParameters):
|
48
67
|
"""
|
49
68
|
Sentinel-1 parameters
|
50
69
|
"""
|
70
|
+
|
51
71
|
DATA_COLLECTION = DataCollection.SENTINEL1
|
52
|
-
RESOLUTION = 3
|
53
72
|
EVALSCRIPT = EvalScripts.SENTINEL_1
|
54
|
-
MOSAICKING_ORDER =
|
73
|
+
MOSAICKING_ORDER = MosaickingOrder.MOST_RECENT
|
55
74
|
FIELDS = {
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
75
|
+
"include": [
|
76
|
+
"id",
|
77
|
+
"properties.datetime",
|
78
|
+
"sar:instrument_mode",
|
79
|
+
"s1:polarization",
|
80
|
+
"sat:orbit_state",
|
81
|
+
"s1:resolution",
|
82
|
+
"s1:timeliness",
|
83
|
+
],
|
84
|
+
"exclude": [],
|
85
|
+
}
|
67
86
|
FILTER = None
|
87
|
+
RESOLUTION = 3
|
88
|
+
BASE_URL = "https://services.sentinel-hub.com"
|
89
|
+
CLOUD_COVERAGE = False
|
68
90
|
|
69
91
|
|
70
92
|
class SHDEMParameters(SHParameters):
|
71
93
|
"""
|
72
94
|
Copernicus DEM parameters
|
73
95
|
"""
|
96
|
+
|
74
97
|
DATA_COLLECTION = DataCollection.DEM_COPERNICUS_30
|
75
|
-
RESOLUTION = 3
|
76
98
|
MOSAICKING_ORDER = None
|
77
99
|
EVALSCRIPT = EvalScripts.DEM
|
100
|
+
FIELDS = None
|
101
|
+
FILTER = None
|
102
|
+
RESOLUTION = 3
|
103
|
+
BASE_URL = "https://services.sentinel-hub.com"
|
104
|
+
CLOUD_COVERAGE = False
|
105
|
+
|
106
|
+
|
107
|
+
class SHHarmonizedLandsatSentinel(SHParameters):
|
108
|
+
"""
|
109
|
+
Harmonized Landsat Sentinel parameters
|
110
|
+
"""
|
111
|
+
|
112
|
+
DATA_COLLECTION = DataCollection.HARMONIZED_LANDSAT_SENTINEL
|
113
|
+
MOSAICKING_ORDER = MosaickingOrder.LEAST_CC
|
114
|
+
EVALSCRIPT = EvalScripts.HLS_TRUE_COLOR
|
115
|
+
FIELDS = None
|
78
116
|
FILTER = None
|
117
|
+
RESOLUTION = 10
|
118
|
+
BASE_URL = "https://services-uswest2.sentinel-hub.com"
|
119
|
+
CLOUD_COVERAGE = True
|
120
|
+
|
121
|
+
|
122
|
+
class SHLandsatOTL2(SHParameters):
|
123
|
+
"""
|
124
|
+
Landsat 8-9 Collection 2 imagery processed to level 2
|
125
|
+
"""
|
126
|
+
|
127
|
+
DATA_COLLECTION = DataCollection.LANDSAT_OT_L2
|
128
|
+
MOSAICKING_ORDER = MosaickingOrder.LEAST_CC
|
129
|
+
EVALSCRIPT = EvalScripts.LANDSAT_OT_L2_TRUE_COLOR
|
79
130
|
FIELDS = None
|
131
|
+
FILTER = None
|
132
|
+
RESOLUTION = 10
|
133
|
+
BASE_URL = "https://services-uswest2.sentinel-hub.com"
|
134
|
+
CLOUD_COVERAGE = True
|
135
|
+
|
80
136
|
|
137
|
+
class DATA_COLLECTION_ID:
|
138
|
+
SENTINEL_1_GRD = DataCollection.SENTINEL1.api_id
|
139
|
+
SENTINEL_2_L1C = DataCollection.SENTINEL2_L1C.api_id
|
140
|
+
SENTINEL_2_L2A = DataCollection.SENTINEL2_L2A.api_id
|
141
|
+
DEM = DataCollection.DEM_COPERNICUS_30.api_id
|
142
|
+
HLS = DataCollection.HARMONIZED_LANDSAT_SENTINEL.api_id
|
143
|
+
LANDSAT_OT_L2 = DataCollection.LANDSAT_OT_L2.api_id
|
81
144
|
|
82
|
-
|
145
|
+
|
146
|
+
SUPPORTED_COLLECTION_IDS = [
|
147
|
+
value
|
148
|
+
for name, value in DATA_COLLECTION_ID.__dict__.items()
|
149
|
+
if not name.startswith("__")
|
150
|
+
]
|
83
151
|
|
84
152
|
SH_PARAMETERS_DICT = {
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
153
|
+
DATA_COLLECTION_ID.SENTINEL_1_GRD: SHS1Parameters,
|
154
|
+
DATA_COLLECTION_ID.SENTINEL_2_L1C: SHS2L1CParameters,
|
155
|
+
DATA_COLLECTION_ID.SENTINEL_2_L2A: SHS2L2AParameters,
|
156
|
+
DATA_COLLECTION_ID.DEM: SHDEMParameters,
|
157
|
+
DATA_COLLECTION_ID.HLS: SHHarmonizedLandsatSentinel,
|
158
|
+
DATA_COLLECTION_ID.LANDSAT_OT_L2: SHLandsatOTL2,
|
89
159
|
}
|
160
|
+
|
161
|
+
|
162
|
+
def get_default_parameters(collection_id: str) -> SHParameters:
|
163
|
+
return SH_PARAMETERS_DICT[collection_id]()
|
164
|
+
|
165
|
+
|
166
|
+
def supports_cloud_coverage(collection_id: str):
|
167
|
+
return SH_PARAMETERS_DICT[collection_id]().CLOUD_COVERAGE
|
@@ -5,22 +5,23 @@ Utils for Sentinel Hub access
|
|
5
5
|
import json
|
6
6
|
|
7
7
|
from os import makedirs
|
8
|
-
from datetime import datetime
|
9
|
-
from typing import Union, Optional
|
8
|
+
from datetime import datetime, timedelta
|
9
|
+
from typing import Union, Optional, Iterable, List
|
10
10
|
from glob import glob
|
11
11
|
from shutil import copyfile, rmtree
|
12
12
|
|
13
|
-
from .parameters import
|
13
|
+
from .parameters import SUPPORTED_COLLECTION_IDS, SHParameters, OUTPUT_FORMAT
|
14
14
|
from ...tools.geo_utils import is_bounding_box, get_image_bbox
|
15
15
|
from ...tools.time_utils import is_time_interval, get_day_between
|
16
16
|
|
17
17
|
|
18
18
|
def evaluate_sentinel_parameters(
|
19
|
-
sensor: str,
|
20
19
|
time_interval: Union[str, datetime],
|
21
20
|
bounding_box: list,
|
21
|
+
collection_id: Optional[str] = None,
|
22
22
|
output: Optional[str] = None,
|
23
23
|
output_needed: Optional[bool] = True,
|
24
|
+
parameters: Optional[SHParameters] = None,
|
24
25
|
) -> None:
|
25
26
|
"""
|
26
27
|
Evaluate parameters for Sentinel Hub access
|
@@ -28,10 +29,20 @@ def evaluate_sentinel_parameters(
|
|
28
29
|
if output_needed:
|
29
30
|
if not output:
|
30
31
|
raise ValueError("Output path must be specified.")
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
if parameters and not parameters.OUTPUT_FORMAT:
|
33
|
+
raise ValueError("Output format must be specified.")
|
34
|
+
if collection_id:
|
35
|
+
if collection_id not in SUPPORTED_COLLECTION_IDS:
|
36
|
+
raise ValueError(
|
37
|
+
f"Collection id {collection_id} is not supported. Supported collections ids are: {SUPPORTED_COLLECTION_IDS}"
|
38
|
+
)
|
39
|
+
else:
|
40
|
+
if not (
|
41
|
+
parameters
|
42
|
+
and hasattr(parameters, "DATA_COLLECTION")
|
43
|
+
and hasattr(parameters.DATA_COLLECTION, "api_id")
|
44
|
+
):
|
45
|
+
raise ValueError(f"Data collection is not defined properly.")
|
35
46
|
if not time_interval:
|
36
47
|
raise ValueError("Time interval must be specified.")
|
37
48
|
else:
|
@@ -46,26 +57,34 @@ def evaluate_sentinel_parameters(
|
|
46
57
|
raise ValueError(
|
47
58
|
"Bounding box must be a list or tuple with four elements in format (lon_min, lat_min, lon_max, lat_max)."
|
48
59
|
)
|
60
|
+
if parameters and parameters.MAX_CLOUD_COVERAGE:
|
61
|
+
if not isinstance(parameters.MAX_CLOUD_COVERAGE, (int, float)) or (
|
62
|
+
parameters.MAX_CLOUD_COVERAGE < 0 or parameters.MAX_CLOUD_COVERAGE > 100
|
63
|
+
):
|
64
|
+
raise ValueError("Max cloud coverage must be a number between 0 and 100.")
|
49
65
|
|
50
66
|
|
51
67
|
def imagery_from_tmp_to_dir(
|
52
68
|
output_dir: str,
|
53
|
-
|
69
|
+
bounding_box: List[Union[int, float]],
|
70
|
+
tmp_dir: Optional[str],
|
54
71
|
name: Optional[str] = None,
|
55
72
|
bulk: Optional[bool] = False,
|
73
|
+
output_format: Optional[str] = OUTPUT_FORMAT.TIFF,
|
56
74
|
) -> None:
|
57
75
|
"""
|
58
76
|
Copy imagery from tmp to output dir
|
59
77
|
"""
|
60
|
-
|
78
|
+
format = output_format
|
79
|
+
downloaded_files = glob(f"{tmp_dir}/**/response." + format)
|
80
|
+
|
61
81
|
if len(downloaded_files) == 0:
|
62
82
|
return
|
63
|
-
|
64
83
|
makedirs(output_dir, exist_ok=True)
|
65
|
-
|
66
84
|
for downloaded_file in downloaded_files:
|
67
|
-
request_json = downloaded_file.replace("response.
|
68
|
-
metadata = generate_raster_metadata(
|
85
|
+
request_json = downloaded_file.replace("response." + format, "request.json")
|
86
|
+
metadata = generate_raster_metadata(request_json, bounding_box)
|
87
|
+
|
69
88
|
if name and not bulk:
|
70
89
|
output_filename = name
|
71
90
|
elif name and bulk:
|
@@ -75,19 +94,16 @@ def imagery_from_tmp_to_dir(
|
|
75
94
|
output_filename = f"{metadata['type']}_{metadata['acquisition-date']}"
|
76
95
|
else:
|
77
96
|
output_filename = metadata["type"]
|
78
|
-
|
79
|
-
copyfile(downloaded_file, f"{output_dir}/{output_filename}.tif")
|
97
|
+
copyfile(downloaded_file, f"{output_dir}/{output_filename}." + format)
|
80
98
|
with open(f"{output_dir}/{output_filename}.json", "w", encoding="utf-8") as f:
|
81
99
|
json.dump(metadata, f)
|
82
|
-
|
83
100
|
rmtree(tmp_dir)
|
84
101
|
|
85
102
|
|
86
|
-
def generate_raster_metadata(
|
103
|
+
def generate_raster_metadata(request_json: str, bounding_box) -> None:
|
87
104
|
"""
|
88
105
|
Generate metadata for raster
|
89
106
|
"""
|
90
|
-
bbox = get_image_bbox(raster)
|
91
107
|
with open(request_json, "r", encoding="utf-8") as f:
|
92
108
|
json_content = json.load(f)
|
93
109
|
|
@@ -102,8 +118,27 @@ def generate_raster_metadata(raster: str, request_json: str) -> None:
|
|
102
118
|
|
103
119
|
metadata = {
|
104
120
|
"acquisition-date": acquisition_date,
|
105
|
-
"bounding-box":
|
121
|
+
"bounding-box": bounding_box,
|
106
122
|
"type": sensor_type,
|
107
123
|
}
|
108
124
|
|
109
125
|
return metadata
|
126
|
+
|
127
|
+
|
128
|
+
def filter_times(
|
129
|
+
timestamps: Iterable[datetime], time_difference: timedelta
|
130
|
+
) -> list[datetime]:
|
131
|
+
"""
|
132
|
+
Filters out timestamps within time_difference, preserving only the oldest timestamp.
|
133
|
+
"""
|
134
|
+
timestamps = sorted(set(timestamps))
|
135
|
+
|
136
|
+
filtered_timestamps: list[datetime] = []
|
137
|
+
for current_timestamp in timestamps:
|
138
|
+
if (
|
139
|
+
not filtered_timestamps
|
140
|
+
or current_timestamp - filtered_timestamps[-1] > time_difference
|
141
|
+
):
|
142
|
+
filtered_timestamps.append(current_timestamp)
|
143
|
+
|
144
|
+
return filtered_timestamps
|
eotdl/curation/stac/stac.py
CHANGED
@@ -11,7 +11,7 @@ def create_stac_catalog(parquet_catalog_path, stac_catalog = None):
|
|
11
11
|
items = []
|
12
12
|
for item in tqdm(stac_geoparquet.arrow.stac_table_to_items(table), total=len(table)):
|
13
13
|
item = pystac.Item.from_dict(item)
|
14
|
-
item.validate()
|
14
|
+
# item.validate()
|
15
15
|
# collection.add_item(item)
|
16
16
|
if stac_catalog is not None:
|
17
17
|
stac_catalog.add_item(item)
|
eotdl/datasets/__init__.py
CHANGED
eotdl/datasets/ingest.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
|
3
3
|
from ..repos import DatasetsAPIRepo
|
4
|
-
from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest
|
4
|
+
from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest, ingest_virtual
|
5
5
|
|
6
6
|
def retrieve_dataset(metadata, user):
|
7
7
|
repo = DatasetsAPIRepo()
|
8
8
|
data, error = repo.retrieve_dataset(metadata.name)
|
9
|
-
# print(data, error)
|
10
9
|
if data and data["uid"] != user["uid"]:
|
11
10
|
raise Exception("Dataset already exists.")
|
12
11
|
if error and error == "Dataset doesn't exist":
|
@@ -34,3 +33,11 @@ def ingest_dataset(
|
|
34
33
|
return ingest(path, DatasetsAPIRepo(), retrieve_dataset, 'datasets')
|
35
34
|
|
36
35
|
|
36
|
+
def ingest_virtual_dataset( # could work for a list of paths with minimal changes...
|
37
|
+
path,
|
38
|
+
links,
|
39
|
+
metadata = None,
|
40
|
+
logger=print,
|
41
|
+
user=None,
|
42
|
+
):
|
43
|
+
return ingest_virtual(path, links, DatasetsAPIRepo(), retrieve_dataset, 'datasets', metadata, logger)
|
eotdl/datasets/stage.py
CHANGED
@@ -6,6 +6,7 @@ import geopandas as gpd
|
|
6
6
|
from ..auth import with_auth
|
7
7
|
from .retrieve import retrieve_dataset
|
8
8
|
from ..repos import FilesAPIRepo
|
9
|
+
from ..files.metadata import Metadata
|
9
10
|
|
10
11
|
@with_auth
|
11
12
|
def stage_dataset(
|
@@ -42,19 +43,18 @@ def stage_dataset(
|
|
42
43
|
raise Exception(
|
43
44
|
f"Dataset `{dataset['name']}` already exists at {download_path}. To force download, use force=True or -f in the CLI."
|
44
45
|
)
|
45
|
-
|
46
46
|
# stage metadata
|
47
47
|
repo = FilesAPIRepo()
|
48
48
|
catalog_path = repo.stage_file(dataset["id"], f"catalog.v{version}.parquet", user, download_path)
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
# stage README.md
|
50
|
+
metadata = Metadata(**dataset['metadata'], name=dataset['name'])
|
51
|
+
metadata.save_metadata(download_path)
|
52
|
+
# stage assets
|
52
53
|
if assets:
|
53
54
|
gdf = gpd.read_parquet(catalog_path)
|
54
55
|
for _, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Staging assets"):
|
55
56
|
for k, v in row["assets"].items():
|
56
57
|
stage_dataset_file(v["href"], download_path)
|
57
|
-
|
58
58
|
return download_path
|
59
59
|
|
60
60
|
|
eotdl/files/ingest.py
CHANGED
@@ -56,6 +56,15 @@ def prep_ingest_stac(path, logger=None): # in theory should work with a remote c
|
|
56
56
|
# iterate over items
|
57
57
|
for item in tqdm(collection.get_items(), desc=f"Ingesting items from collection {collection.id}"):
|
58
58
|
assert isinstance(item, pystac.Item)
|
59
|
+
# Process each asset in the item
|
60
|
+
for asset in item.assets.values():
|
61
|
+
if not asset.href.startswith(('http://', 'https://')):
|
62
|
+
# Asset is a local file
|
63
|
+
file_path = Path(asset.href)
|
64
|
+
# Calculate and add file size
|
65
|
+
asset.extra_fields['size'] = file_path.stat().st_size
|
66
|
+
# Calculate and add checksum
|
67
|
+
asset.extra_fields['checksum'] = calculate_checksum(str(file_path))
|
59
68
|
items.append(item)
|
60
69
|
# save parquet file
|
61
70
|
record_batch_reader = stac_geoparquet.arrow.parse_stac_items_to_arrow(items)
|
@@ -63,13 +72,14 @@ def prep_ingest_stac(path, logger=None): # in theory should work with a remote c
|
|
63
72
|
stac_geoparquet.arrow.to_parquet(record_batch_reader, output_path)
|
64
73
|
return output_path
|
65
74
|
|
66
|
-
|
67
|
-
def ingest_virutal_dataset( # could work for a list of paths with minimal changes...
|
75
|
+
def ingest_virtual( # could work for a list of paths with minimal changes...
|
68
76
|
path,
|
69
77
|
links,
|
78
|
+
repo,
|
79
|
+
retrieve,
|
80
|
+
mode,
|
70
81
|
metadata = None,
|
71
82
|
logger=print,
|
72
|
-
user=None,
|
73
83
|
):
|
74
84
|
path = Path(path)
|
75
85
|
if metadata is None:
|
@@ -88,7 +98,7 @@ def ingest_virutal_dataset( # could work for a list of paths with minimal change
|
|
88
98
|
data.append(create_stac_item('README.md', str(path / "README.md")))
|
89
99
|
gdf = gpd.GeoDataFrame(data, geometry='geometry')
|
90
100
|
gdf.to_parquet(path / "catalog.parquet")
|
91
|
-
return ingest(path)
|
101
|
+
return ingest(path, repo, retrieve, mode)
|
92
102
|
|
93
103
|
@with_auth
|
94
104
|
def ingest(path, repo, retrieve, mode, user):
|
@@ -104,8 +114,6 @@ def ingest(path, repo, retrieve, mode, user):
|
|
104
114
|
# retrieve dataset (create if doesn't exist)
|
105
115
|
dataset_or_model = retrieve(metadata, user)
|
106
116
|
current_version = sorted([v['version_id'] for v in dataset_or_model["versions"]])[-1]
|
107
|
-
print("current version: ", current_version)
|
108
|
-
|
109
117
|
# TODO: update README if metadata changed in UI (db)
|
110
118
|
# update_metadata = True
|
111
119
|
# if "description" in dataset:
|
@@ -118,12 +126,10 @@ def ingest(path, repo, retrieve, mode, user):
|
|
118
126
|
# return ingest_files(
|
119
127
|
# repo, dataset["id"], folder, verbose, logger, user, endpoint="datasets"
|
120
128
|
# )
|
121
|
-
|
122
129
|
catalog_path = path.joinpath("catalog.parquet")
|
123
130
|
gdf = gpd.read_parquet(catalog_path)
|
124
131
|
files_repo = FilesAPIRepo()
|
125
|
-
catalog_url = files_repo.generate_presigned_url(f'catalog.v{current_version}.parquet', dataset_or_model['id'], user)
|
126
|
-
|
132
|
+
catalog_url = files_repo.generate_presigned_url(f'catalog.v{current_version}.parquet', dataset_or_model['id'], user, endpoint=mode)
|
127
133
|
# first time ingesting
|
128
134
|
if catalog_url is None:
|
129
135
|
total_size = 0
|
@@ -149,7 +155,7 @@ def ingest(path, repo, retrieve, mode, user):
|
|
149
155
|
print(f"Error uploading asset {row[0]}: {e}")
|
150
156
|
break
|
151
157
|
gdf.to_parquet(catalog_path)
|
152
|
-
files_repo.ingest_file(str(catalog_path), f'catalog.v{current_version}.parquet', dataset_or_model['id'], user,
|
158
|
+
files_repo.ingest_file(str(catalog_path), f'catalog.v{current_version}.parquet', dataset_or_model['id'], user, mode)
|
153
159
|
data, error = repo.complete_ingestion(dataset_or_model['id'], current_version, total_size, user)
|
154
160
|
if error:
|
155
161
|
raise Exception(error)
|
@@ -174,7 +180,7 @@ def ingest(path, repo, retrieve, mode, user):
|
|
174
180
|
if len(df) > 0: # file exists in previous versions
|
175
181
|
if df.iloc[0]['assets'][k]["checksum"] == v["checksum"]: # file is the same
|
176
182
|
# still need to update the required fields
|
177
|
-
file_url = f"{repo.url}
|
183
|
+
file_url = f"{repo.url}{mode}/{dataset_or_model['id']}/stage/{item_id}"
|
178
184
|
gdf.loc[row[0], "assets"][k]["href"] = file_url
|
179
185
|
total_size += v["size"]
|
180
186
|
continue
|
eotdl/models/download.py
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
from tqdm import tqdm
|
4
|
+
|
5
|
+
from ..auth import with_auth
|
6
|
+
from .retrieve import retrieve_model, retrieve_model_files
|
7
|
+
from ..shared import calculate_checksum
|
8
|
+
from ..repos import FilesAPIRepo, ModelsAPIRepo
|
9
|
+
from .metadata import generate_metadata
|
10
|
+
from ..curation.stac import STACDataFrame
|
11
|
+
|
12
|
+
|
13
|
+
@with_auth
|
14
|
+
def download_model(
|
15
|
+
model_name,
|
16
|
+
version=None,
|
17
|
+
path=None,
|
18
|
+
logger=None,
|
19
|
+
assets=False,
|
20
|
+
force=False,
|
21
|
+
verbose=False,
|
22
|
+
user=None,
|
23
|
+
file=None,
|
24
|
+
):
|
25
|
+
model = retrieve_model(model_name)
|
26
|
+
if version is None:
|
27
|
+
version = sorted(model["versions"], key=lambda v: v["version_id"])[-1][
|
28
|
+
"version_id"
|
29
|
+
]
|
30
|
+
else:
|
31
|
+
assert version in [
|
32
|
+
v["version_id"] for v in model["versions"]
|
33
|
+
], f"Version {version} not found"
|
34
|
+
download_base_path = os.getenv(
|
35
|
+
"EOTDL_DOWNLOAD_PATH", str(Path.home()) + "/.cache/eotdl/models"
|
36
|
+
)
|
37
|
+
if path is None:
|
38
|
+
download_path = download_base_path + "/" + model_name + "/v" + str(version)
|
39
|
+
else:
|
40
|
+
download_path = path + "/" + model_name + "/v" + str(version)
|
41
|
+
# check if model already exists
|
42
|
+
if os.path.exists(download_path) and not force:
|
43
|
+
os.makedirs(download_path, exist_ok=True)
|
44
|
+
raise Exception(
|
45
|
+
f"model `{model['name']} v{str(version)}` already exists at {download_path}. To force download, use force=True or -f in the CLI."
|
46
|
+
)
|
47
|
+
if model["quality"] == 0:
|
48
|
+
if file:
|
49
|
+
raise NotImplementedError("Downloading a specific file is not implemented")
|
50
|
+
model_files = retrieve_model_files(model["id"], version)
|
51
|
+
repo = FilesAPIRepo()
|
52
|
+
for file in tqdm(model_files, disable=verbose, unit="file"):
|
53
|
+
filename, file_version = file["filename"], file["version"]
|
54
|
+
if verbose:
|
55
|
+
logger(f"Downloading {file['filename']}...")
|
56
|
+
dst_path = repo.download_file(
|
57
|
+
model["id"],
|
58
|
+
filename,
|
59
|
+
user,
|
60
|
+
download_path,
|
61
|
+
file_version,
|
62
|
+
endpoint="models",
|
63
|
+
)
|
64
|
+
if verbose:
|
65
|
+
logger("Generating README.md ...")
|
66
|
+
generate_metadata(download_path, model)
|
67
|
+
else:
|
68
|
+
if verbose:
|
69
|
+
logger("Downloading STAC metadata...")
|
70
|
+
repo = ModelsAPIRepo()
|
71
|
+
gdf, error = repo.download_stac(
|
72
|
+
model["id"],
|
73
|
+
user,
|
74
|
+
)
|
75
|
+
if error:
|
76
|
+
raise Exception(error)
|
77
|
+
# print(gdf)
|
78
|
+
df = STACDataFrame(gdf)
|
79
|
+
# df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
|
80
|
+
df.to_stac(download_path)
|
81
|
+
# print("----")
|
82
|
+
# print(df)
|
83
|
+
# download assets
|
84
|
+
if assets:
|
85
|
+
if verbose:
|
86
|
+
logger("Downloading assets...")
|
87
|
+
repo = FilesAPIRepo()
|
88
|
+
df = df.dropna(subset=["assets"])
|
89
|
+
for row in tqdm(df.iterrows(), total=len(df)):
|
90
|
+
for k, v in row[1]["assets"].items():
|
91
|
+
href = v["href"]
|
92
|
+
_, filename = href.split("/download/")
|
93
|
+
# will overwrite assets with same name :(
|
94
|
+
repo.download_file_url(
|
95
|
+
href, filename, f"{download_path}/assets", user
|
96
|
+
)
|
97
|
+
else:
|
98
|
+
logger("To download assets, set assets=True or -a in the CLI.")
|
99
|
+
if verbose:
|
100
|
+
logger("Done")
|
101
|
+
return download_path
|
eotdl/models/ingest.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
|
3
3
|
from ..repos import ModelsAPIRepo
|
4
|
-
from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest
|
4
|
+
from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest, ingest_virtual
|
5
5
|
|
6
6
|
def retrieve_model(metadata, user):
|
7
7
|
repo = ModelsAPIRepo()
|
@@ -12,7 +12,6 @@ def retrieve_model(metadata, user):
|
|
12
12
|
if error and error == "Model doesn't exist":
|
13
13
|
# create model
|
14
14
|
data, error = repo.create_model(metadata.dict(), user)
|
15
|
-
# print(data, error)
|
16
15
|
if error:
|
17
16
|
raise Exception(error)
|
18
17
|
return data
|
@@ -31,4 +30,13 @@ def ingest_model(
|
|
31
30
|
prep_ingest_stac(path, logger)
|
32
31
|
else:
|
33
32
|
prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
|
34
|
-
return ingest(path, ModelsAPIRepo(), retrieve_model, 'models')
|
33
|
+
return ingest(path, ModelsAPIRepo(), retrieve_model, 'models')
|
34
|
+
|
35
|
+
def ingest_virtual_model( # could work for a list of paths with minimal changes...
|
36
|
+
path,
|
37
|
+
links,
|
38
|
+
metadata = None,
|
39
|
+
logger=print,
|
40
|
+
user=None,
|
41
|
+
):
|
42
|
+
return ingest_virtual(path, links, ModelsAPIRepo(), retrieve_model, 'models', metadata, logger)
|
eotdl/models/stage.py
CHANGED
@@ -6,6 +6,7 @@ import geopandas as gpd
|
|
6
6
|
from ..auth import with_auth
|
7
7
|
from .retrieve import retrieve_model
|
8
8
|
from ..repos import FilesAPIRepo
|
9
|
+
from ..files.metadata import Metadata
|
9
10
|
|
10
11
|
@with_auth
|
11
12
|
def stage_model(
|
@@ -44,7 +45,9 @@ def stage_model(
|
|
44
45
|
repo = FilesAPIRepo()
|
45
46
|
catalog_path = repo.stage_file(model["id"], f"catalog.v{version}.parquet", user, download_path)
|
46
47
|
|
47
|
-
#
|
48
|
+
# stage README.md
|
49
|
+
metadata = Metadata(**model['metadata'], name=model['name'])
|
50
|
+
metadata.save_metadata(download_path)
|
48
51
|
|
49
52
|
if assets:
|
50
53
|
gdf = gpd.read_parquet(catalog_path)
|
eotdl/repos/FilesAPIRepo.py
CHANGED
@@ -101,7 +101,7 @@ class FilesAPIRepo(APIRepo):
|
|
101
101
|
reponse = requests.get(url, headers=self.generate_headers(user))
|
102
102
|
data, error = self.format_response(reponse)
|
103
103
|
if error:
|
104
|
-
print("ERROR generate_presigned_url", error)
|
104
|
+
# print("ERROR generate_presigned_url", error)
|
105
105
|
return None
|
106
106
|
return data["presigned_url"]
|
107
107
|
|