eotdl 2023.7.19.post4__py3-none-any.whl → 2023.9.14.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/commands/datasets.py +15 -29
- eotdl/curation/__init__.py +5 -5
- eotdl/curation/formatters.py +0 -2
- eotdl/curation/metadata.py +34 -9
- eotdl/curation/stac/assets.py +127 -0
- eotdl/curation/stac/dataframe.py +8 -4
- eotdl/curation/stac/extensions.py +295 -46
- eotdl/curation/stac/extent.py +130 -0
- eotdl/curation/stac/ml_dataset.py +509 -0
- eotdl/curation/stac/parsers.py +2 -0
- eotdl/curation/stac/stac.py +309 -286
- eotdl/curation/stac/utils.py +47 -1
- eotdl/datasets/__init__.py +2 -2
- eotdl/datasets/download.py +16 -3
- eotdl/datasets/ingest.py +21 -10
- eotdl/datasets/retrieve.py +10 -2
- eotdl/src/repos/APIRepo.py +40 -17
- eotdl/src/repos/AuthRepo.py +3 -3
- eotdl/src/usecases/auth/IsLogged.py +5 -3
- eotdl/src/usecases/datasets/DownloadDataset.py +35 -6
- eotdl/src/usecases/datasets/DownloadFileURL.py +22 -0
- eotdl/src/usecases/datasets/IngestFile.py +48 -28
- eotdl/src/usecases/datasets/IngestSTAC.py +43 -8
- eotdl/src/usecases/datasets/RetrieveDatasets.py +3 -2
- eotdl/src/usecases/datasets/__init__.py +1 -0
- eotdl/tools/sen12floods/tools.py +3 -3
- eotdl/tools/stac.py +8 -2
- {eotdl-2023.7.19.post4.dist-info → eotdl-2023.9.14.post2.dist-info}/METADATA +2 -1
- {eotdl-2023.7.19.post4.dist-info → eotdl-2023.9.14.post2.dist-info}/RECORD +31 -27
- {eotdl-2023.7.19.post4.dist-info → eotdl-2023.9.14.post2.dist-info}/WHEEL +1 -1
- {eotdl-2023.7.19.post4.dist-info → eotdl-2023.9.14.post2.dist-info}/entry_points.txt +0 -0
eotdl/commands/datasets.py
CHANGED
@@ -4,8 +4,7 @@ from pathlib import Path
|
|
4
4
|
from ..datasets import (
|
5
5
|
retrieve_datasets,
|
6
6
|
download_dataset,
|
7
|
-
|
8
|
-
ingest_stac,
|
7
|
+
ingest_dataset,
|
9
8
|
)
|
10
9
|
|
11
10
|
app = typer.Typer()
|
@@ -14,31 +13,21 @@ app = typer.Typer()
|
|
14
13
|
@app.command()
|
15
14
|
def ingest(
|
16
15
|
path: Path,
|
17
|
-
f: bool = typer.Option(
|
18
|
-
|
16
|
+
f: bool = typer.Option(
|
17
|
+
False, "--force", "-f", help="Force ingest even if file exists"
|
18
|
+
),
|
19
|
+
d: bool = typer.Option(
|
20
|
+
False, "--delete", "-d", help="Delete files not in the dataset"
|
21
|
+
),
|
19
22
|
):
|
20
|
-
"""
|
21
|
-
Ingest a dataset
|
22
|
-
|
23
|
-
path: Path to folder with the dataset
|
24
|
-
"""
|
25
23
|
try:
|
26
|
-
|
27
|
-
typer.echo("Path must be a folder")
|
28
|
-
return
|
29
|
-
if "catalog.json" in [f.name for f in path.iterdir()]:
|
30
|
-
ingest_stac(str(path) + "/catalog.json", typer.echo)
|
31
|
-
else:
|
32
|
-
ingest_folder(path, f, d, typer.echo)
|
24
|
+
ingest_dataset(path, f, d, typer.echo)
|
33
25
|
except Exception as e:
|
34
26
|
typer.echo(e)
|
35
27
|
|
36
28
|
|
37
29
|
@app.command()
|
38
30
|
def list():
|
39
|
-
"""
|
40
|
-
List all datasets and files
|
41
|
-
"""
|
42
31
|
datasets = retrieve_datasets()
|
43
32
|
typer.echo(datasets)
|
44
33
|
|
@@ -46,18 +35,15 @@ def list():
|
|
46
35
|
@app.command()
|
47
36
|
def get(
|
48
37
|
dataset: str,
|
49
|
-
path:
|
50
|
-
file:
|
38
|
+
path: Path = typer.Option(None, "--path", "-p", help="Download to a specific path"),
|
39
|
+
file: bool = typer.Option(None, "--file", "-f", help="Download a specific file"),
|
40
|
+
assets: bool = typer.Option(False, "--assets", "-a", help="Download assets"),
|
41
|
+
force: bool = typer.Option(
|
42
|
+
False, "--force", "-f", help="Force download even if file exists"
|
43
|
+
),
|
51
44
|
):
|
52
|
-
"""
|
53
|
-
Download a dataset
|
54
|
-
|
55
|
-
dataset: Name of the dataset
|
56
|
-
file: Name of the file to download (optional, if not provided, the whole dataset will be downloaded)
|
57
|
-
path: Path to download the dataset to (optional, if not provided, the dataset will be downloaded to ~/.eotdl/datasets)
|
58
|
-
"""
|
59
45
|
try:
|
60
|
-
dst_path = download_dataset(dataset, file, path, typer.echo)
|
46
|
+
dst_path = download_dataset(dataset, file, path, typer.echo, assets, force)
|
61
47
|
typer.echo(f"Data available at {dst_path}")
|
62
48
|
except Exception as e:
|
63
49
|
typer.echo(e)
|
eotdl/curation/__init__.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
1
|
+
from .stac.dataframe import STACDataFrame # , read_stac
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
from .stac.stac import STACGenerator, merge_stac_catalogs
|
4
|
+
from .formatters import SHFolderFormatter
|
5
|
+
from .stac.utils import format_time_acquired
|
6
|
+
from .stac.parsers import STACIdParser, StructuredParser, UnestructuredParser
|
eotdl/curation/formatters.py
CHANGED
eotdl/curation/metadata.py
CHANGED
@@ -6,13 +6,16 @@ import datetime
|
|
6
6
|
import json
|
7
7
|
import rasterio
|
8
8
|
from rasterio.warp import transform_bounds
|
9
|
+
from typing import Union, Optional
|
9
10
|
|
11
|
+
from os import remove
|
12
|
+
from glob import glob
|
10
13
|
from os.path import dirname, join, exists
|
11
14
|
|
12
15
|
|
13
16
|
def generate_raster_metadata(raster_path: str,
|
14
17
|
output_folder: str,
|
15
|
-
date_adquired: str
|
18
|
+
date_adquired: Union[str, datetime.datetime]
|
16
19
|
) -> None:
|
17
20
|
"""
|
18
21
|
Generate metadata.json file for a raster file
|
@@ -23,8 +26,10 @@ def generate_raster_metadata(raster_path: str,
|
|
23
26
|
"""
|
24
27
|
with rasterio.open(raster_path) as ds:
|
25
28
|
bounds = ds.bounds
|
26
|
-
dst_crs =
|
27
|
-
left, bottom, right, top = rasterio.warp.transform_bounds(
|
29
|
+
dst_crs = "EPSG:4326" # EPSG identifier for WGS84 coordinate system used by the geojson format
|
30
|
+
left, bottom, right, top = rasterio.warp.transform_bounds(
|
31
|
+
ds.crs, dst_crs, *bounds
|
32
|
+
)
|
28
33
|
bbox = [left, bottom, right, top]
|
29
34
|
|
30
35
|
# Get raster directory path to get the request.json file
|
@@ -32,12 +37,32 @@ def generate_raster_metadata(raster_path: str,
|
|
32
37
|
|
33
38
|
# Read the request.json file and get the request data type
|
34
39
|
if exists(raster_dir_path):
|
35
|
-
with open(join(raster_dir_path,
|
40
|
+
with open(join(raster_dir_path, "request.json"), "r") as f:
|
36
41
|
request = json.load(f)
|
37
|
-
request_data_type = request[
|
42
|
+
request_data_type = request["request"]["payload"]["input"]["data"][0][
|
43
|
+
"type"
|
44
|
+
]
|
38
45
|
|
39
|
-
metadata_path = join(output_folder,
|
40
|
-
metadata = {
|
41
|
-
|
42
|
-
|
46
|
+
metadata_path = join(output_folder, "metadata.json")
|
47
|
+
metadata = {
|
48
|
+
"date-adquired": date_adquired,
|
49
|
+
"bounding-box": bbox,
|
50
|
+
"type": request_data_type,
|
51
|
+
}
|
52
|
+
|
53
|
+
with open(metadata_path, "w") as f:
|
43
54
|
json.dump(metadata, f)
|
55
|
+
|
56
|
+
|
57
|
+
def remove_raster_metadata(folder: str, metadata_file: Optional[str] = 'metadata.json') -> None:
|
58
|
+
"""
|
59
|
+
Remove metadata.json file from a folder
|
60
|
+
|
61
|
+
:param folder: folder path
|
62
|
+
:param metadata_file: metadata file name
|
63
|
+
"""
|
64
|
+
# Search for all the metadata files in the folder
|
65
|
+
metadata_files = glob(join(folder, "**", metadata_file), recursive=True)
|
66
|
+
# Remove all the metadata files
|
67
|
+
for metadata_file in metadata_files:
|
68
|
+
remove(metadata_file)
|
@@ -0,0 +1,127 @@
|
|
1
|
+
'''
|
2
|
+
Module for STAC Asset Generators
|
3
|
+
'''
|
4
|
+
|
5
|
+
from os import remove, listdir
|
6
|
+
from os.path import dirname, join, basename
|
7
|
+
from ..metadata import remove_raster_metadata
|
8
|
+
from pathlib import Path
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
import rasterio
|
12
|
+
import pystac
|
13
|
+
|
14
|
+
|
15
|
+
MEDIA_TYPES_DICT = {
|
16
|
+
'tif': pystac.MediaType.GEOTIFF,
|
17
|
+
'tiff': pystac.MediaType.GEOTIFF,
|
18
|
+
'png': pystac.MediaType.PNG,
|
19
|
+
'jpg': pystac.MediaType.JPEG,
|
20
|
+
'jpeg': pystac.MediaType.JPEG,
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
class STACAssetGenerator:
|
25
|
+
|
26
|
+
type = 'None'
|
27
|
+
|
28
|
+
def __init__(self):
|
29
|
+
pass
|
30
|
+
|
31
|
+
@classmethod
|
32
|
+
def extract_assets(self, obj_info: pd.DataFrame):
|
33
|
+
"""
|
34
|
+
Extract the assets from the raster file
|
35
|
+
|
36
|
+
:param raster_path: path to the raster file
|
37
|
+
"""
|
38
|
+
# If there is no bands, create a single band asset from the file, assuming thats a singleband raster
|
39
|
+
raster_path = obj_info["image"].values[0]
|
40
|
+
title = basename(raster_path).split('.')[0]
|
41
|
+
# Get the file extension
|
42
|
+
raster_format = raster_path.split('.')[-1]
|
43
|
+
asset = pystac.Asset(href=Path(raster_path).as_posix(),
|
44
|
+
title=title,
|
45
|
+
media_type=MEDIA_TYPES_DICT[raster_format],
|
46
|
+
roles=['data'])
|
47
|
+
|
48
|
+
return [asset]
|
49
|
+
|
50
|
+
|
51
|
+
class BandsAssetGenerator(STACAssetGenerator):
|
52
|
+
|
53
|
+
type = 'Bands'
|
54
|
+
|
55
|
+
def __init__(self) -> None:
|
56
|
+
super().__init__()
|
57
|
+
|
58
|
+
def extract_assets(self, obj_info: pd.DataFrame):
|
59
|
+
"""
|
60
|
+
Extract the assets from the raster file from the bands column
|
61
|
+
|
62
|
+
:param raster_path: path to the raster file
|
63
|
+
"""
|
64
|
+
asset_list = []
|
65
|
+
# File path
|
66
|
+
raster_path = obj_info["image"].values[0]
|
67
|
+
# Bands
|
68
|
+
bands = obj_info["bands"].values
|
69
|
+
bands = bands[0] if bands else None
|
70
|
+
|
71
|
+
if bands:
|
72
|
+
with rasterio.open(raster_path, 'r') as raster:
|
73
|
+
if isinstance(bands, str):
|
74
|
+
bands = [bands]
|
75
|
+
for band in bands:
|
76
|
+
i = bands.index(band)
|
77
|
+
raster_format = raster_path.split('.')[-1] # Will be used later to save the bands files
|
78
|
+
try:
|
79
|
+
single_band = raster.read(i + 1)
|
80
|
+
except IndexError:
|
81
|
+
single_band = raster.read(1)
|
82
|
+
band_name = f'{band}.{raster_format}'
|
83
|
+
output_band = join(dirname(raster_path), band_name)
|
84
|
+
# Copy the metadata
|
85
|
+
metadata = raster.meta.copy()
|
86
|
+
metadata.update({"count": 1})
|
87
|
+
# Write the band to the output folder
|
88
|
+
with rasterio.open(output_band, "w", **metadata) as dest:
|
89
|
+
dest.write(single_band, 1)
|
90
|
+
# Instantiate pystac asset and append it to the list
|
91
|
+
asset_list.append(pystac.Asset(href=output_band,
|
92
|
+
title=band,
|
93
|
+
media_type=MEDIA_TYPES_DICT[raster_format]))
|
94
|
+
|
95
|
+
# Remove the original raster file and its metadata
|
96
|
+
remove(raster_path)
|
97
|
+
remove_raster_metadata(dirname(raster_path))
|
98
|
+
|
99
|
+
return asset_list
|
100
|
+
|
101
|
+
|
102
|
+
class ExtractedAssets(STACAssetGenerator):
|
103
|
+
|
104
|
+
type = 'Extracted'
|
105
|
+
|
106
|
+
def __init__(self) -> None:
|
107
|
+
super().__init__()
|
108
|
+
|
109
|
+
def extract_assets(self, obj_info: pd.DataFrame):
|
110
|
+
"""
|
111
|
+
Get all the files with the same extension as the image file as assets
|
112
|
+
"""
|
113
|
+
asset_list = []
|
114
|
+
# File path
|
115
|
+
raster_path = obj_info["image"].values[0]
|
116
|
+
raster_dir = dirname(raster_path)
|
117
|
+
# Get the files with the same extension as the image file
|
118
|
+
files = [f for f in listdir(raster_dir) if f.endswith(raster_path.split('.')[-1])]
|
119
|
+
# Instantiate pystac asset and append it to the list
|
120
|
+
for file in files:
|
121
|
+
# Get the file extension
|
122
|
+
raster_format = file.split('.')[-1]
|
123
|
+
asset_list.append(pystac.Asset(href=join(raster_dir, file),
|
124
|
+
title=basename(file),
|
125
|
+
media_type=MEDIA_TYPES_DICT[raster_format]))
|
126
|
+
|
127
|
+
return asset_list
|
eotdl/curation/stac/dataframe.py
CHANGED
@@ -9,9 +9,11 @@ import json
|
|
9
9
|
from geomet import wkt
|
10
10
|
from os.path import join
|
11
11
|
from os import makedirs
|
12
|
-
from typing import Union
|
12
|
+
from typing import Union, Optional
|
13
|
+
|
13
14
|
from math import isnan
|
14
15
|
from .utils import convert_df_geom_to_shape, get_all_children
|
16
|
+
from pathlib import Path
|
15
17
|
|
16
18
|
|
17
19
|
class STACDataFrame(gpd.GeoDataFrame):
|
@@ -19,9 +21,11 @@ class STACDataFrame(gpd.GeoDataFrame):
|
|
19
21
|
super().__init__(*args, **kwargs)
|
20
22
|
|
21
23
|
@classmethod
|
22
|
-
def from_stac_file(self, stac_file):
|
24
|
+
def from_stac_file(self, stac_file: pystac.STACObject):
|
23
25
|
"""
|
24
26
|
Create a STACDataFrame from a STAC file
|
27
|
+
|
28
|
+
:param stac_file: STAC file
|
25
29
|
"""
|
26
30
|
return read_stac(stac_file)
|
27
31
|
|
@@ -120,7 +124,7 @@ class STACDataFrame(gpd.GeoDataFrame):
|
|
120
124
|
|
121
125
|
def read_stac(
|
122
126
|
stac_file: Union[pystac.Catalog, pystac.Collection, str],
|
123
|
-
geometry_column: str = "geometry",
|
127
|
+
geometry_column: Optional[str] = "geometry",
|
124
128
|
) -> STACDataFrame:
|
125
129
|
"""
|
126
130
|
Read a STAC file and return a STACDataFrame
|
@@ -128,7 +132,7 @@ def read_stac(
|
|
128
132
|
:param stac_file: STAC file to read
|
129
133
|
:param geometry_column: name of the geometry column
|
130
134
|
"""
|
131
|
-
if isinstance(stac_file, str):
|
135
|
+
if isinstance(stac_file, str) or isinstance(stac_file, Path):
|
132
136
|
stac_file = pystac.read_file(stac_file)
|
133
137
|
children = get_all_children(stac_file)
|
134
138
|
|