eotdl 2023.6.27__py3-none-any.whl → 2023.7.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/cli.py +0 -1
- eotdl/commands/datasets.py +19 -9
- eotdl/curation/stac/__init__.py +1 -1
- eotdl/curation/stac/dataframe.py +5 -114
- eotdl/curation/stac/dataframe_bck.py +253 -0
- eotdl/datasets/__init__.py +1 -1
- eotdl/datasets/ingest.py +24 -33
- eotdl/src/models/__init__.py +1 -0
- eotdl/src/models/metadata.py +16 -0
- eotdl/src/repos/APIRepo.py +52 -72
- eotdl/src/usecases/datasets/DownloadDataset.py +46 -23
- eotdl/src/usecases/datasets/IngestFile.py +13 -9
- eotdl/src/usecases/datasets/IngestFolder.py +63 -2
- eotdl/src/usecases/datasets/IngestSTAC.py +42 -0
- eotdl/src/usecases/datasets/__init__.py +1 -0
- {eotdl-2023.6.27.dist-info → eotdl-2023.7.19.dist-info}/METADATA +1 -1
- {eotdl-2023.6.27.dist-info → eotdl-2023.7.19.dist-info}/RECORD +19 -15
- {eotdl-2023.6.27.dist-info → eotdl-2023.7.19.dist-info}/WHEEL +0 -0
- {eotdl-2023.6.27.dist-info → eotdl-2023.7.19.dist-info}/entry_points.txt +0 -0
eotdl/cli.py
CHANGED
eotdl/commands/datasets.py
CHANGED
@@ -4,26 +4,32 @@ from pathlib import Path
|
|
4
4
|
from ..datasets import (
|
5
5
|
retrieve_datasets,
|
6
6
|
download_dataset,
|
7
|
-
ingest_file,
|
8
7
|
ingest_folder,
|
8
|
+
ingest_stac,
|
9
9
|
)
|
10
10
|
|
11
11
|
app = typer.Typer()
|
12
12
|
|
13
13
|
|
14
14
|
@app.command()
|
15
|
-
def ingest(
|
15
|
+
def ingest(
|
16
|
+
path: Path,
|
17
|
+
f: bool = typer.Option(False, "--f", help="Force ingest even if file exists"),
|
18
|
+
d: bool = typer.Option(False, "--d", help="Delete files not in the dataset"),
|
19
|
+
):
|
16
20
|
"""
|
17
|
-
Ingest a
|
21
|
+
Ingest a dataset
|
18
22
|
|
19
|
-
path: Path to folder with
|
20
|
-
dataset: Name of the dataset
|
23
|
+
path: Path to folder with the dataset
|
21
24
|
"""
|
22
25
|
try:
|
23
|
-
if path.is_dir():
|
24
|
-
|
26
|
+
if not path.is_dir():
|
27
|
+
typer.echo("Path must be a folder")
|
28
|
+
return
|
29
|
+
if "catalog.json" in [f.name for f in path.iterdir()]:
|
30
|
+
ingest_stac(str(path) + "/catalog.json", typer.echo)
|
25
31
|
else:
|
26
|
-
|
32
|
+
ingest_folder(path, f, d, typer.echo)
|
27
33
|
except Exception as e:
|
28
34
|
typer.echo(e)
|
29
35
|
|
@@ -38,7 +44,11 @@ def list():
|
|
38
44
|
|
39
45
|
|
40
46
|
@app.command()
|
41
|
-
def get(
|
47
|
+
def get(
|
48
|
+
dataset: str,
|
49
|
+
path: str = None,
|
50
|
+
file: str = None,
|
51
|
+
):
|
42
52
|
"""
|
43
53
|
Download a dataset
|
44
54
|
|
eotdl/curation/stac/__init__.py
CHANGED
eotdl/curation/stac/dataframe.py
CHANGED
@@ -6,12 +6,10 @@ import pandas as pd
|
|
6
6
|
import geopandas as gpd
|
7
7
|
import pystac
|
8
8
|
import json
|
9
|
-
import os
|
10
|
-
from xcube_geodb.core.geodb import GeoDBClient
|
11
9
|
from geomet import wkt
|
12
10
|
from os.path import join
|
13
11
|
from os import makedirs
|
14
|
-
|
12
|
+
from typing import Union
|
15
13
|
from math import isnan
|
16
14
|
from .utils import convert_df_geom_to_shape, get_all_children
|
17
15
|
|
@@ -27,113 +25,7 @@ class STACDataFrame(gpd.GeoDataFrame):
|
|
27
25
|
"""
|
28
26
|
return read_stac(stac_file)
|
29
27
|
|
30
|
-
|
31
|
-
def from_geodb(
|
32
|
-
self,
|
33
|
-
server_url: str,
|
34
|
-
server_port: int | str,
|
35
|
-
client_id: str,
|
36
|
-
client_secret: str,
|
37
|
-
auth_aud: str,
|
38
|
-
collection: str,
|
39
|
-
database: str = None,
|
40
|
-
):
|
41
|
-
"""
|
42
|
-
Create a STACDataFrame from a GeoDB collection
|
43
|
-
|
44
|
-
:param server_url: GeoDB server url
|
45
|
-
:param server_port: GeoDB server port
|
46
|
-
:param client_id: GeoDB client id
|
47
|
-
:param client_secret: GeoDB client secret
|
48
|
-
:param auth_aud: GeoDB auth aud
|
49
|
-
:param collection: GeoDB collection
|
50
|
-
:param database: GeoDB database
|
51
|
-
"""
|
52
|
-
geodb_client = GeoDBClient(
|
53
|
-
server_url=server_url,
|
54
|
-
server_port=server_port,
|
55
|
-
client_id=client_id,
|
56
|
-
client_secret=client_secret,
|
57
|
-
auth_aud=auth_aud,
|
58
|
-
)
|
59
|
-
|
60
|
-
data = geodb_client.get_collection(collection, database=database)
|
61
|
-
|
62
|
-
return STACDataFrame(data, crs="EPSG:4326")
|
63
|
-
|
64
|
-
def ingest(
|
65
|
-
self,
|
66
|
-
collection: str,
|
67
|
-
server_url: str = os.environ["SERVER_URL"],
|
68
|
-
server_port: int = os.environ["SERVER_PORT"],
|
69
|
-
client_id: str = os.environ["CLIENT_ID"],
|
70
|
-
client_secret: str = os.environ["CLIENT_SECRET"],
|
71
|
-
auth_aud: str = os.environ["AUTH_DOMAIN"],
|
72
|
-
database: str = None,
|
73
|
-
):
|
74
|
-
"""
|
75
|
-
Create a GeoDB collection from a STACDataFrame
|
76
|
-
|
77
|
-
:param collection: dataset name (GeoDB collection)
|
78
|
-
:param server_url: GeoDB server url
|
79
|
-
:param server_port: GeoDB server port
|
80
|
-
:param client_id: GeoDB client id
|
81
|
-
:param client_secret: GeoDB client secret
|
82
|
-
:param auth_aud: GeoDB auth aud
|
83
|
-
:param database: GeoDB database
|
84
|
-
"""
|
85
|
-
|
86
|
-
geodb_client = GeoDBClient(
|
87
|
-
server_url=server_url,
|
88
|
-
server_port=server_port,
|
89
|
-
client_id=client_id,
|
90
|
-
client_secret=client_secret,
|
91
|
-
auth_aud=auth_aud,
|
92
|
-
)
|
93
|
-
|
94
|
-
# TODO: check name is unique (use eotdl-cli)
|
95
|
-
|
96
|
-
# TODO: ingest assets (only if local)
|
97
|
-
# TODO: rename assets in the dataframe with URLs (only if local)
|
98
|
-
|
99
|
-
# ingest to geodb
|
100
|
-
|
101
|
-
# Check if the collection already exists
|
102
|
-
if geodb_client.collection_exists(collection, database=database):
|
103
|
-
# geodb_client.drop_collection(collection, database=database)
|
104
|
-
raise Exception(f"Collection {collection} already exists")
|
105
|
-
|
106
|
-
# Rename the column id to stac_id, to avoid conflicts with the id column
|
107
|
-
self.rename(columns={"id": "stac_id"}, inplace=True)
|
108
|
-
# Fill the NaN with '' to avoid errors, except in the geometry column
|
109
|
-
copy = self.copy()
|
110
|
-
columns_to_fill = copy.columns.drop("geometry")
|
111
|
-
self[columns_to_fill] = self[columns_to_fill].fillna("")
|
112
|
-
|
113
|
-
# Create the collection if it does not exist
|
114
|
-
# and insert the data
|
115
|
-
collections = {collection: self._create_collection_structure(self.columns)}
|
116
|
-
geodb_client.create_collections(collections, database=database)
|
117
|
-
|
118
|
-
geodb_client.insert_into_collection(collection, database=database, values=self)
|
119
|
-
|
120
|
-
# TODO: save data in eotdl
|
121
|
-
|
122
|
-
def _create_collection_structure(self, columns: list) -> dict:
|
123
|
-
"""
|
124
|
-
Create the schema structure of a GeoDB collection from a STACDataFrame
|
125
|
-
|
126
|
-
:param columns: columns of the STACDataFrame
|
127
|
-
"""
|
128
|
-
stac_collection = {"crs": 4326, "properties": {}}
|
129
|
-
|
130
|
-
for column in columns:
|
131
|
-
if column not in ("geometry", "id"):
|
132
|
-
stac_collection["properties"][column] = "json"
|
133
|
-
|
134
|
-
return stac_collection
|
135
|
-
|
136
|
-
def to_stac(self):
|
28
|
+
def to_stac(self, path):
|
137
29
|
"""
|
138
30
|
Create a STAC catalog and children from a STACDataFrame
|
139
31
|
"""
|
@@ -150,11 +42,10 @@ class STACDataFrame(gpd.GeoDataFrame):
|
|
150
42
|
catalog_df = df[df["type"] == "Catalog"]
|
151
43
|
|
152
44
|
if catalog_df.empty:
|
153
|
-
|
154
|
-
makedirs(root_output_folder, exist_ok=True)
|
45
|
+
makedirs(path, exist_ok=True)
|
155
46
|
else:
|
156
47
|
for index, row in catalog_df.iterrows():
|
157
|
-
root_output_folder = row[id_column]
|
48
|
+
root_output_folder = path + "/" + row[id_column]
|
158
49
|
makedirs(root_output_folder, exist_ok=True)
|
159
50
|
row_json = row.to_dict()
|
160
51
|
|
@@ -228,7 +119,7 @@ class STACDataFrame(gpd.GeoDataFrame):
|
|
228
119
|
|
229
120
|
|
230
121
|
def read_stac(
|
231
|
-
stac_file: pystac.Catalog
|
122
|
+
stac_file: Union[pystac.Catalog, pystac.Collection, str],
|
232
123
|
geometry_column: str = "geometry",
|
233
124
|
) -> STACDataFrame:
|
234
125
|
"""
|
@@ -0,0 +1,253 @@
|
|
1
|
+
"""
|
2
|
+
Module for the STAC dataframe
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
import geopandas as gpd
|
7
|
+
import pystac
|
8
|
+
import json
|
9
|
+
import os
|
10
|
+
from xcube_geodb.core.geodb import GeoDBClient
|
11
|
+
from geomet import wkt
|
12
|
+
from os.path import join
|
13
|
+
from os import makedirs
|
14
|
+
|
15
|
+
from math import isnan
|
16
|
+
from .utils import convert_df_geom_to_shape, get_all_children
|
17
|
+
|
18
|
+
|
19
|
+
class STACDataFrame(gpd.GeoDataFrame):
|
20
|
+
def __init__(self, *args, **kwargs):
|
21
|
+
super().__init__(*args, **kwargs)
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def from_stac_file(self, stac_file):
|
25
|
+
"""
|
26
|
+
Create a STACDataFrame from a STAC file
|
27
|
+
"""
|
28
|
+
return read_stac(stac_file)
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def from_geodb(
|
32
|
+
self,
|
33
|
+
server_url: str,
|
34
|
+
server_port: int | str,
|
35
|
+
client_id: str,
|
36
|
+
client_secret: str,
|
37
|
+
auth_aud: str,
|
38
|
+
collection: str,
|
39
|
+
database: str = None,
|
40
|
+
):
|
41
|
+
"""
|
42
|
+
Create a STACDataFrame from a GeoDB collection
|
43
|
+
|
44
|
+
:param server_url: GeoDB server url
|
45
|
+
:param server_port: GeoDB server port
|
46
|
+
:param client_id: GeoDB client id
|
47
|
+
:param client_secret: GeoDB client secret
|
48
|
+
:param auth_aud: GeoDB auth aud
|
49
|
+
:param collection: GeoDB collection
|
50
|
+
:param database: GeoDB database
|
51
|
+
"""
|
52
|
+
geodb_client = GeoDBClient(
|
53
|
+
server_url=server_url,
|
54
|
+
server_port=server_port,
|
55
|
+
client_id=client_id,
|
56
|
+
client_secret=client_secret,
|
57
|
+
auth_aud=auth_aud,
|
58
|
+
)
|
59
|
+
|
60
|
+
data = geodb_client.get_collection(collection, database=database)
|
61
|
+
|
62
|
+
return STACDataFrame(data, crs="EPSG:4326")
|
63
|
+
|
64
|
+
def ingest(
|
65
|
+
self,
|
66
|
+
collection: str,
|
67
|
+
server_url: str = os.environ["SERVER_URL"],
|
68
|
+
server_port: int = os.environ["SERVER_PORT"],
|
69
|
+
client_id: str = os.environ["CLIENT_ID"],
|
70
|
+
client_secret: str = os.environ["CLIENT_SECRET"],
|
71
|
+
auth_aud: str = os.environ["AUTH_DOMAIN"],
|
72
|
+
database: str = None,
|
73
|
+
):
|
74
|
+
"""
|
75
|
+
Create a GeoDB collection from a STACDataFrame
|
76
|
+
|
77
|
+
:param collection: dataset name (GeoDB collection)
|
78
|
+
:param server_url: GeoDB server url
|
79
|
+
:param server_port: GeoDB server port
|
80
|
+
:param client_id: GeoDB client id
|
81
|
+
:param client_secret: GeoDB client secret
|
82
|
+
:param auth_aud: GeoDB auth aud
|
83
|
+
:param database: GeoDB database
|
84
|
+
"""
|
85
|
+
|
86
|
+
geodb_client = GeoDBClient(
|
87
|
+
server_url=server_url,
|
88
|
+
server_port=server_port,
|
89
|
+
client_id=client_id,
|
90
|
+
client_secret=client_secret,
|
91
|
+
auth_aud=auth_aud,
|
92
|
+
)
|
93
|
+
|
94
|
+
# TODO: check name is unique (use eotdl-cli)
|
95
|
+
|
96
|
+
# TODO: ingest assets (only if local)
|
97
|
+
# TODO: rename assets in the dataframe with URLs (only if local)
|
98
|
+
|
99
|
+
# ingest to geodb
|
100
|
+
|
101
|
+
# Check if the collection already exists
|
102
|
+
if geodb_client.collection_exists(collection, database=database):
|
103
|
+
# geodb_client.drop_collection(collection, database=database)
|
104
|
+
raise Exception(f"Collection {collection} already exists")
|
105
|
+
|
106
|
+
# Rename the column id to stac_id, to avoid conflicts with the id column
|
107
|
+
self.rename(columns={"id": "stac_id"}, inplace=True)
|
108
|
+
# Fill the NaN with '' to avoid errors, except in the geometry column
|
109
|
+
copy = self.copy()
|
110
|
+
columns_to_fill = copy.columns.drop("geometry")
|
111
|
+
self[columns_to_fill] = self[columns_to_fill].fillna("")
|
112
|
+
|
113
|
+
# Create the collection if it does not exist
|
114
|
+
# and insert the data
|
115
|
+
collections = {collection: self._create_collection_structure(self.columns)}
|
116
|
+
geodb_client.create_collections(collections, database=database)
|
117
|
+
|
118
|
+
geodb_client.insert_into_collection(collection, database=database, values=self)
|
119
|
+
|
120
|
+
# TODO: save data in eotdl
|
121
|
+
|
122
|
+
def _create_collection_structure(self, columns: list) -> dict:
|
123
|
+
"""
|
124
|
+
Create the schema structure of a GeoDB collection from a STACDataFrame
|
125
|
+
|
126
|
+
:param columns: columns of the STACDataFrame
|
127
|
+
"""
|
128
|
+
stac_collection = {"crs": 4326, "properties": {}}
|
129
|
+
|
130
|
+
for column in columns:
|
131
|
+
if column not in ("geometry", "id"):
|
132
|
+
stac_collection["properties"][column] = "json"
|
133
|
+
|
134
|
+
return stac_collection
|
135
|
+
|
136
|
+
def to_stac(self):
|
137
|
+
"""
|
138
|
+
Create a STAC catalog and children from a STACDataFrame
|
139
|
+
"""
|
140
|
+
df = self.copy()
|
141
|
+
|
142
|
+
if "id" in df.columns and "stac_id" in df.columns:
|
143
|
+
id_column = "stac_id"
|
144
|
+
stac_id_exists = True
|
145
|
+
else:
|
146
|
+
id_column = "id"
|
147
|
+
stac_id_exists = False
|
148
|
+
|
149
|
+
# First, create the catalog and its folder, if exists
|
150
|
+
catalog_df = df[df["type"] == "Catalog"]
|
151
|
+
|
152
|
+
if catalog_df.empty:
|
153
|
+
root_output_folder = "output"
|
154
|
+
makedirs(root_output_folder, exist_ok=True)
|
155
|
+
else:
|
156
|
+
for index, row in catalog_df.iterrows():
|
157
|
+
root_output_folder = row[id_column]
|
158
|
+
makedirs(root_output_folder, exist_ok=True)
|
159
|
+
row_json = row.to_dict()
|
160
|
+
|
161
|
+
# Curate the json row
|
162
|
+
row_json = self.curate_json_row(row_json, stac_id_exists)
|
163
|
+
|
164
|
+
with open(join(root_output_folder, f"catalog.json"), "w") as f:
|
165
|
+
json.dump(row_json, f)
|
166
|
+
|
167
|
+
# Second, create the collections and their folders, if exist
|
168
|
+
collections = dict()
|
169
|
+
collections_df = df[df["type"] == "Collection"]
|
170
|
+
for index, row in collections_df.iterrows():
|
171
|
+
stac_output_folder = join(root_output_folder, row[id_column])
|
172
|
+
collections[row[id_column]] = stac_output_folder
|
173
|
+
makedirs(stac_output_folder, exist_ok=True)
|
174
|
+
row_json = row.to_dict()
|
175
|
+
|
176
|
+
# Curate the json row
|
177
|
+
row_json = self.curate_json_row(row_json, stac_id_exists)
|
178
|
+
|
179
|
+
with open(join(stac_output_folder, f"collection.json"), "w") as f:
|
180
|
+
json.dump(row_json, f)
|
181
|
+
|
182
|
+
# Then, create the items and their folders, if exist
|
183
|
+
features_df = df[df["type"] == "Feature"]
|
184
|
+
for index, row in features_df.iterrows():
|
185
|
+
collection = row["collection"]
|
186
|
+
stac_output_folder = join(collections[collection], row[id_column])
|
187
|
+
|
188
|
+
# Convert the geometry from WKT back to geojson
|
189
|
+
row["geometry"] = row["geometry"].wkt
|
190
|
+
row["geometry"] = wkt.loads(row["geometry"])
|
191
|
+
makedirs(stac_output_folder, exist_ok=True)
|
192
|
+
row_json = row.to_dict()
|
193
|
+
|
194
|
+
# Curate the json row
|
195
|
+
row_json = self.curate_json_row(row_json, stac_id_exists)
|
196
|
+
|
197
|
+
with open(join(stac_output_folder, f'{row_json["id"]}.json'), "w") as f:
|
198
|
+
json.dump(row_json, f)
|
199
|
+
|
200
|
+
def curate_json_row(self, row: dict, stac_id_exists: bool) -> dict:
|
201
|
+
"""
|
202
|
+
Curate the json row of a STACDataFrame, in order to generate a valid STAC file
|
203
|
+
|
204
|
+
:param row: row of a STACDataFrame
|
205
|
+
:param stac_id_exists: if the stac_id column exists
|
206
|
+
"""
|
207
|
+
keys_to_remove = list()
|
208
|
+
|
209
|
+
# Remove the created_at and modified_at columns, if the STACDataFrame comes from GeoDB
|
210
|
+
for i in "created_at", "modified_at":
|
211
|
+
if i in row.keys():
|
212
|
+
keys_to_remove.append(i)
|
213
|
+
|
214
|
+
# Rename the stac_id column to id, to avoid conflicts with the id column
|
215
|
+
if stac_id_exists:
|
216
|
+
row["id"] = row["stac_id"]
|
217
|
+
del row["stac_id"]
|
218
|
+
|
219
|
+
# Remove the NaN values and empty strings
|
220
|
+
for k, v in row.items():
|
221
|
+
if (isinstance(v, float) and isnan(v)) or v == "":
|
222
|
+
keys_to_remove.append(k)
|
223
|
+
for key in keys_to_remove:
|
224
|
+
del row[key]
|
225
|
+
del row["geometry"]
|
226
|
+
|
227
|
+
return row
|
228
|
+
|
229
|
+
|
230
|
+
def read_stac(
|
231
|
+
stac_file: pystac.Catalog | pystac.Collection | str,
|
232
|
+
geometry_column: str = "geometry",
|
233
|
+
) -> STACDataFrame:
|
234
|
+
"""
|
235
|
+
Read a STAC file and return a STACDataFrame
|
236
|
+
|
237
|
+
:param stac_file: STAC file to read
|
238
|
+
:param geometry_column: name of the geometry column
|
239
|
+
"""
|
240
|
+
if isinstance(stac_file, str):
|
241
|
+
stac_file = pystac.read_file(stac_file)
|
242
|
+
children = get_all_children(stac_file)
|
243
|
+
|
244
|
+
# Convert Dataframe to STACDataFrame
|
245
|
+
dataframe = pd.DataFrame(children)
|
246
|
+
dataframe[geometry_column] = dataframe.apply(convert_df_geom_to_shape, axis=1)
|
247
|
+
stac_dataframe = STACDataFrame(
|
248
|
+
dataframe,
|
249
|
+
crs="EPSG:4326",
|
250
|
+
geometry=gpd.GeoSeries.from_wkt(dataframe[geometry_column]),
|
251
|
+
)
|
252
|
+
|
253
|
+
return stac_dataframe
|
eotdl/datasets/__init__.py
CHANGED
eotdl/datasets/ingest.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
+
import os
|
2
|
+
|
1
3
|
from ..src.repos import APIRepo
|
2
|
-
from ..src.usecases.datasets import IngestFile, IngestFolder
|
4
|
+
from ..src.usecases.datasets import IngestFile, IngestFolder, IngestSTAC
|
3
5
|
from ..auth import with_auth
|
4
6
|
|
7
|
+
|
5
8
|
allowed_extensions = [
|
6
9
|
".zip",
|
7
10
|
".tar",
|
@@ -11,51 +14,39 @@ allowed_extensions = [
|
|
11
14
|
".json",
|
12
15
|
".pdf",
|
13
16
|
".md",
|
17
|
+
".yml",
|
14
18
|
]
|
15
19
|
|
16
20
|
|
21
|
+
def ingest_q1(dataset, stac_catalog):
|
22
|
+
print("hola")
|
23
|
+
return
|
24
|
+
|
25
|
+
|
17
26
|
@with_auth
|
18
|
-
def ingest_file(
|
27
|
+
def ingest_file(
|
28
|
+
file, dataset_id, logger=None, allowed_extensions=allowed_extensions, user=None
|
29
|
+
):
|
19
30
|
api_repo = APIRepo()
|
20
31
|
ingest = IngestFile(api_repo, allowed_extensions, logger)
|
21
|
-
inputs = ingest.Inputs(file=file,
|
32
|
+
inputs = ingest.Inputs(file=file, dataset_id=dataset_id, user=user)
|
22
33
|
outputs = ingest(inputs)
|
23
|
-
return outputs.
|
34
|
+
return outputs.data
|
24
35
|
|
25
36
|
|
26
37
|
@with_auth
|
27
|
-
def ingest_folder(folder,
|
38
|
+
def ingest_folder(folder, force, delete, logger=None, user=None):
|
28
39
|
api_repo = APIRepo()
|
29
40
|
ingest = IngestFolder(api_repo, ingest_file, allowed_extensions, logger)
|
30
|
-
inputs = ingest.Inputs(folder=folder,
|
41
|
+
inputs = ingest.Inputs(folder=folder, user=user, force=force, delete=delete)
|
31
42
|
outputs = ingest(inputs)
|
32
43
|
return outputs.dataset
|
33
44
|
|
34
45
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
# outputs = ingest(inputs)
|
43
|
-
# return outputs.dataset
|
44
|
-
|
45
|
-
|
46
|
-
# @with_auth
|
47
|
-
# def ingest_large_dataset(name, path, logger=None, user=None):
|
48
|
-
# api_repo = APIRepo()
|
49
|
-
# ingest = IngestLargeDataset(api_repo, logger)
|
50
|
-
# inputs = ingest.Inputs(name=name, path=path, user=user)
|
51
|
-
# outputs = ingest(inputs)
|
52
|
-
# return outputs.dataset
|
53
|
-
|
54
|
-
|
55
|
-
# def ingest_q0(dataset, path):
|
56
|
-
# return ingest_large_dataset(dataset, path)
|
57
|
-
|
58
|
-
|
59
|
-
# def ingest_q1(dataset, stac_catalog):
|
60
|
-
# print("holas")
|
61
|
-
# return
|
46
|
+
@with_auth
|
47
|
+
def ingest_stac(stac_catalog, dataset, logger=None, user=None):
|
48
|
+
api_repo = APIRepo()
|
49
|
+
ingest = IngestSTAC(api_repo, ingest_file, allowed_extensions)
|
50
|
+
inputs = ingest.Inputs(stac_catalog=stac_catalog, dataset=dataset, user=user)
|
51
|
+
outputs = ingest(inputs)
|
52
|
+
return outputs.dataset
|
@@ -0,0 +1 @@
|
|
1
|
+
from .metadata import Metadata
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from pydantic import BaseModel, validator
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
|
5
|
+
class Metadata(BaseModel):
|
6
|
+
authors: List[str]
|
7
|
+
license: str
|
8
|
+
source: str
|
9
|
+
name: str
|
10
|
+
|
11
|
+
# validate source is a URL
|
12
|
+
@validator("source")
|
13
|
+
def source_is_url(cls, v):
|
14
|
+
if not v.startswith("http") and not v.startswith("https"):
|
15
|
+
raise ValueError("source must be a URL")
|
16
|
+
return v
|
eotdl/src/repos/APIRepo.py
CHANGED
@@ -6,11 +6,13 @@ from concurrent.futures import ThreadPoolExecutor
|
|
6
6
|
import time
|
7
7
|
import multiprocessing
|
8
8
|
import hashlib
|
9
|
+
import geopandas as gpd
|
9
10
|
|
10
11
|
|
11
12
|
class APIRepo:
|
12
13
|
def __init__(self, url=os.getenv("EOTDL_API_URL", "https://api.eotdl.com/")):
|
13
14
|
self.url = url
|
15
|
+
# print(self.url)
|
14
16
|
|
15
17
|
def login(self):
|
16
18
|
return requests.get(self.url + "auth/login")
|
@@ -22,6 +24,16 @@ class APIRepo:
|
|
22
24
|
response = requests.get(self.url + "auth/logout")
|
23
25
|
return response.json()["logout_url"]
|
24
26
|
|
27
|
+
def create_dataset(self, metadata, id_token):
|
28
|
+
response = requests.post(
|
29
|
+
self.url + "datasets",
|
30
|
+
json=metadata,
|
31
|
+
headers={"Authorization": "Bearer " + id_token},
|
32
|
+
)
|
33
|
+
if response.status_code == 200:
|
34
|
+
return response.json(), None
|
35
|
+
return None, response.json()["detail"]
|
36
|
+
|
25
37
|
def retrieve_datasets(self):
|
26
38
|
return requests.get(self.url + "datasets").json()
|
27
39
|
|
@@ -34,12 +46,7 @@ class APIRepo:
|
|
34
46
|
def download_file(self, dataset, dataset_id, file, id_token, path):
|
35
47
|
url = self.url + "datasets/" + dataset_id + "/download/" + file
|
36
48
|
headers = {"Authorization": "Bearer " + id_token}
|
37
|
-
if path is None:
|
38
|
-
path = str(Path.home()) + "/.eotdl/datasets/" + dataset
|
39
|
-
os.makedirs(path, exist_ok=True)
|
40
49
|
path = f"{path}/{file}"
|
41
|
-
# if os.path.exists(path):
|
42
|
-
# raise Exception("File already exists")
|
43
50
|
with requests.get(url, headers=headers, stream=True) as r:
|
44
51
|
r.raise_for_status()
|
45
52
|
total_size = int(r.headers.get("content-length", 0))
|
@@ -55,14 +62,21 @@ class APIRepo:
|
|
55
62
|
progress_bar.close()
|
56
63
|
return path
|
57
64
|
|
58
|
-
def ingest_file(self, file,
|
65
|
+
def ingest_file(self, file, dataset_id, id_token, checksum=None):
|
59
66
|
reponse = requests.post(
|
60
|
-
self.url + "datasets",
|
67
|
+
self.url + "datasets/" + dataset_id,
|
61
68
|
files={"file": open(file, "rb")},
|
62
|
-
data={
|
63
|
-
|
64
|
-
|
65
|
-
|
69
|
+
data={"checksum": checksum} if checksum else None,
|
70
|
+
headers={"Authorization": "Bearer " + id_token},
|
71
|
+
)
|
72
|
+
if reponse.status_code != 200:
|
73
|
+
return None, reponse.json()["detail"]
|
74
|
+
return reponse.json(), None
|
75
|
+
|
76
|
+
def ingest_file_url(self, file, dataset, id_token):
|
77
|
+
reponse = requests.post(
|
78
|
+
self.url + "datasets/url",
|
79
|
+
json={"dataset": dataset, "url": file},
|
66
80
|
headers={"Authorization": "Bearer " + id_token},
|
67
81
|
)
|
68
82
|
if reponse.status_code != 200:
|
@@ -76,11 +90,11 @@ class APIRepo:
|
|
76
90
|
break
|
77
91
|
yield data
|
78
92
|
|
79
|
-
def prepare_large_upload(self, file,
|
93
|
+
def prepare_large_upload(self, file, dataset_id, checksum, id_token):
|
80
94
|
filename = Path(file).name
|
81
95
|
response = requests.post(
|
82
|
-
self.url + "datasets/uploadId",
|
83
|
-
json={"name": filename, "checksum": checksum
|
96
|
+
self.url + f"datasets/{dataset_id}/uploadId",
|
97
|
+
json={"name": filename, "checksum": checksum},
|
84
98
|
headers={"Authorization": "Bearer " + id_token},
|
85
99
|
)
|
86
100
|
if response.status_code != 200:
|
@@ -204,63 +218,29 @@ class APIRepo:
|
|
204
218
|
return None, r.json()["detail"]
|
205
219
|
return r.json(), None
|
206
220
|
|
207
|
-
def
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
):
|
216
|
-
# Create thread pool executor
|
217
|
-
max_workers = threads if threads > 0 else multiprocessing.cpu_count()
|
218
|
-
executor = ThreadPoolExecutor(max_workers=max_workers)
|
219
|
-
|
220
|
-
# Divide file into chunks and create tasks for each chunk
|
221
|
-
offset = 0
|
222
|
-
tasks = []
|
223
|
-
content_path = os.path.abspath(path)
|
224
|
-
content_size = os.stat(content_path).st_size
|
225
|
-
chunk_size = self.get_chunk_size(content_size)
|
226
|
-
total_chunks = content_size // chunk_size
|
227
|
-
while offset < content_size:
|
228
|
-
chunk_end = min(offset + chunk_size, content_size)
|
229
|
-
part = str(offset // chunk_size + 1)
|
230
|
-
if part not in parts:
|
231
|
-
tasks.append((offset, chunk_end, part))
|
232
|
-
offset = chunk_end
|
233
|
-
|
234
|
-
# Define the function that will upload each chunk
|
235
|
-
def upload_chunk(start, end, part):
|
236
|
-
# print(f"Uploading chunk {start} - {end}", part)
|
237
|
-
with open(content_path, "rb") as f:
|
238
|
-
f.seek(start)
|
239
|
-
chunk = f.read(end - start)
|
240
|
-
checksum = hashlib.md5(chunk).hexdigest()
|
241
|
-
response = requests.post(
|
242
|
-
self.url + "datasets/chunk",
|
243
|
-
files={"file": chunk},
|
244
|
-
headers={
|
245
|
-
"Authorization": "Bearer " + id_token,
|
246
|
-
"Upload-Id": upload_id,
|
247
|
-
"Dataset-Id": dataset_id,
|
248
|
-
"Checksum": checksum,
|
249
|
-
"Part-Number": str(part),
|
250
|
-
},
|
251
|
-
)
|
252
|
-
if response.status_code != 200:
|
253
|
-
print(f"Failed to upload chunk {start} - {end}")
|
254
|
-
return response
|
221
|
+
def delete_file(self, dataset_id, file_name, id_token):
|
222
|
+
response = requests.delete(
|
223
|
+
self.url + "datasets/" + dataset_id + "/file/" + file_name,
|
224
|
+
headers={"Authorization": "Bearer " + id_token},
|
225
|
+
)
|
226
|
+
if response.status_code != 200:
|
227
|
+
return None, response.json()["detail"]
|
228
|
+
return response.json(), None
|
255
229
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
230
|
+
def ingest_stac(self, stac_json, dataset, id_token):
|
231
|
+
reponse = requests.post(
|
232
|
+
self.url + "datasets/stac",
|
233
|
+
json={"dataset": dataset, "stac": stac_json},
|
234
|
+
headers={"Authorization": "Bearer " + id_token},
|
235
|
+
)
|
236
|
+
if reponse.status_code != 200:
|
237
|
+
return None, reponse.json()["detail"]
|
238
|
+
return reponse.json(), None
|
263
239
|
|
264
|
-
|
265
|
-
|
266
|
-
|
240
|
+
def download_stac(self, dataset_id, id_token):
|
241
|
+
url = self.url + "datasets/" + dataset_id + "/download"
|
242
|
+
headers = {"Authorization": "Bearer " + id_token}
|
243
|
+
response = requests.get(url, headers=headers)
|
244
|
+
if response.status_code != 200:
|
245
|
+
return None, response.json()["detail"]
|
246
|
+
return gpd.GeoDataFrame.from_features(response.json()["features"]), None
|
@@ -1,5 +1,8 @@
|
|
1
1
|
from pydantic import BaseModel
|
2
2
|
from ....src.utils import calculate_checksum
|
3
|
+
from ....curation.stac import STACDataFrame
|
4
|
+
from pathlib import Path
|
5
|
+
import os
|
3
6
|
|
4
7
|
|
5
8
|
class DownloadDataset:
|
@@ -29,28 +32,48 @@ class DownloadDataset:
|
|
29
32
|
|
30
33
|
def __call__(self, inputs: Inputs) -> Outputs:
|
31
34
|
dataset = self.retrieve_dataset(inputs.dataset)
|
32
|
-
if inputs.
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
inputs.
|
35
|
+
if inputs.path is None:
|
36
|
+
download_path = str(Path.home()) + "/.eotdl/datasets/" + inputs.dataset
|
37
|
+
else:
|
38
|
+
download_path = inputs.path + "/" + inputs.dataset
|
39
|
+
os.makedirs(download_path, exist_ok=True)
|
40
|
+
if dataset["quality"] == 0:
|
41
|
+
if inputs.file:
|
42
|
+
files = [f for f in dataset["files"] if f["name"] == inputs.file]
|
43
|
+
if not files:
|
44
|
+
raise Exception(f"File {inputs.file} not found")
|
45
|
+
if len(files) > 1:
|
46
|
+
raise Exception(f"Multiple files with name {inputs.file} found")
|
47
|
+
dst_path = self.download(
|
48
|
+
inputs.dataset,
|
49
|
+
dataset["id"],
|
50
|
+
inputs.file,
|
51
|
+
files[0]["checksum"],
|
52
|
+
download_path,
|
53
|
+
inputs.user,
|
54
|
+
)
|
55
|
+
return self.Outputs(dst_path=dst_path)
|
56
|
+
for file in dataset["files"]:
|
57
|
+
dst_path = self.download(
|
58
|
+
inputs.dataset,
|
59
|
+
dataset["id"],
|
60
|
+
file["name"],
|
61
|
+
file["checksum"],
|
62
|
+
download_path,
|
63
|
+
inputs.user,
|
64
|
+
)
|
65
|
+
return self.Outputs(dst_path="/".join(dst_path.split("/")[:-1]))
|
66
|
+
else:
|
67
|
+
gdf, error = self.repo.download_stac(
|
40
68
|
dataset["id"],
|
41
|
-
inputs.
|
42
|
-
files[0]["checksum"],
|
43
|
-
inputs.path,
|
44
|
-
inputs.user,
|
69
|
+
inputs.user["id_token"],
|
45
70
|
)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
)
|
56
|
-
return self.Outputs(dst_path="/".join(dst_path.split("/")[:-1]))
|
71
|
+
if error:
|
72
|
+
raise Exception(error)
|
73
|
+
df = STACDataFrame(gdf)
|
74
|
+
# df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
|
75
|
+
path = inputs.path
|
76
|
+
if path is None:
|
77
|
+
path = str(Path.home()) + "/.eotdl/datasets/" + dataset["name"]
|
78
|
+
df.to_stac(path)
|
79
|
+
return self.Outputs(dst_path=path)
|
@@ -13,11 +13,11 @@ class IngestFile:
|
|
13
13
|
|
14
14
|
class Inputs(BaseModel):
|
15
15
|
file: typing.Any
|
16
|
-
|
16
|
+
dataset_id: str
|
17
17
|
user: dict
|
18
18
|
|
19
19
|
class Outputs(BaseModel):
|
20
|
-
|
20
|
+
data: dict
|
21
21
|
|
22
22
|
def __call__(self, inputs: Inputs) -> Outputs:
|
23
23
|
# validate file extension
|
@@ -26,31 +26,35 @@ class IngestFile:
|
|
26
26
|
raise Exception(
|
27
27
|
f"Only {', '.join(self.allowed_extensions)} files are allowed"
|
28
28
|
)
|
29
|
+
id_token = inputs.user["id_token"]
|
29
30
|
self.logger(f"Uploading file {inputs.file}...")
|
31
|
+
# if inputs.file.startswith("http://") or inputs.file.startswith("https://"):
|
32
|
+
# data, error = self.repo.ingest_file_url(
|
33
|
+
# inputs.file, inputs.metadata.name, id_token
|
34
|
+
# )
|
35
|
+
# else:
|
30
36
|
self.logger("Computing checksum...")
|
31
37
|
checksum = calculate_checksum(inputs.file)
|
32
38
|
self.logger(checksum)
|
33
|
-
self.logger("Ingesting
|
34
|
-
id_token = inputs.user["id_token"]
|
39
|
+
self.logger("Ingesting file...")
|
35
40
|
filesize = os.path.getsize(inputs.file)
|
36
41
|
# ingest small file
|
37
42
|
if filesize < 1024 * 1024 * 16: # 16 MB
|
38
43
|
data, error = self.repo.ingest_file(
|
39
|
-
inputs.file, inputs.
|
44
|
+
inputs.file, inputs.dataset_id, id_token, checksum
|
40
45
|
)
|
41
46
|
if error:
|
42
47
|
raise Exception(error)
|
43
48
|
self.logger("Done")
|
44
|
-
return self.Outputs(
|
49
|
+
return self.Outputs(data=data)
|
45
50
|
# ingest large file
|
46
51
|
upload_id, parts = self.repo.prepare_large_upload(
|
47
|
-
inputs.file, inputs.
|
52
|
+
inputs.file, inputs.dataset_id, checksum, id_token
|
48
53
|
)
|
49
|
-
print(upload_id, parts)
|
50
54
|
self.repo.ingest_large_dataset(inputs.file, upload_id, id_token, parts)
|
51
55
|
self.logger("\nCompleting upload...")
|
52
56
|
data, error = self.repo.complete_upload(id_token, upload_id)
|
53
57
|
if error:
|
54
58
|
raise Exception(error)
|
55
59
|
self.logger("Done")
|
56
|
-
return self.Outputs(
|
60
|
+
return self.Outputs(data=data)
|
@@ -1,6 +1,8 @@
|
|
1
1
|
from pydantic import BaseModel
|
2
2
|
import os
|
3
3
|
from pathlib import Path
|
4
|
+
import yaml
|
5
|
+
from ...models import Metadata
|
4
6
|
|
5
7
|
|
6
8
|
class IngestFolder:
|
@@ -12,13 +14,15 @@ class IngestFolder:
|
|
12
14
|
|
13
15
|
class Inputs(BaseModel):
|
14
16
|
folder: Path
|
15
|
-
dataset: str = None
|
16
17
|
user: dict
|
18
|
+
force: bool = False
|
19
|
+
delete: bool = False
|
17
20
|
|
18
21
|
class Outputs(BaseModel):
|
19
22
|
dataset: dict
|
20
23
|
|
21
24
|
def __call__(self, inputs: Inputs) -> Outputs:
|
25
|
+
# validate folder
|
22
26
|
self.logger("Uploading directory (only files, not recursive)")
|
23
27
|
items = list(inputs.folder.glob("*"))
|
24
28
|
filtered_items = [item for item in items if item.is_file()]
|
@@ -29,9 +33,66 @@ class IngestFolder:
|
|
29
33
|
raise Exception("No files found in directory")
|
30
34
|
if len(filtered_items) > 10:
|
31
35
|
raise Exception("Too many files in directory, limited to 10")
|
36
|
+
if "metadata.yml" not in [item.name for item in filtered_items]:
|
37
|
+
raise Exception("metadata.yml not found in directory")
|
38
|
+
# load metadata
|
39
|
+
metadata = yaml.safe_load(
|
40
|
+
open(inputs.folder.joinpath("metadata.yml"), "r").read()
|
41
|
+
)
|
42
|
+
metadata = Metadata(**metadata)
|
43
|
+
# remove metadata.yml from files
|
44
|
+
filtered_items = [
|
45
|
+
item for item in filtered_items if item.name != "metadata.yml"
|
46
|
+
]
|
47
|
+
# create dataset
|
48
|
+
data, error = self.repo.create_dataset(metadata.dict(), inputs.user["id_token"])
|
49
|
+
# dataset may already exists, but if user is owner continue ingesting files
|
50
|
+
current_files = []
|
51
|
+
if error:
|
52
|
+
data, error2 = self.repo.retrieve_dataset(metadata.name)
|
53
|
+
if error2:
|
54
|
+
raise Exception(error)
|
55
|
+
if data["uid"] != inputs.user["sub"]:
|
56
|
+
raise Exception("Dataset already exists.")
|
57
|
+
data["dataset_id"] = data["id"]
|
58
|
+
current_files = [item["name"] for item in data["files"]]
|
59
|
+
if len(current_files) > 0 and not inputs.force:
|
60
|
+
self.logger(
|
61
|
+
"The following files already exist and will not be uploaded (use --f to force re-upload):"
|
62
|
+
)
|
63
|
+
for item in current_files:
|
64
|
+
self.logger(f"{item}")
|
65
|
+
# TODO: delete current_files that are not in filtered_items if --delete
|
66
|
+
hanged_files = [
|
67
|
+
file
|
68
|
+
for file in current_files
|
69
|
+
if file not in [item.name for item in filtered_items]
|
70
|
+
]
|
71
|
+
if len(hanged_files) > 0:
|
72
|
+
self.logger(
|
73
|
+
"The following files are no longer in your dataset (use --d to delete):"
|
74
|
+
)
|
75
|
+
for item in hanged_files:
|
76
|
+
self.logger(f"{item}")
|
77
|
+
if inputs.delete:
|
78
|
+
self.logger(f"Deleting file {item}...")
|
79
|
+
_, error = self.repo.delete_file(
|
80
|
+
data["dataset_id"], item, inputs.user["id_token"]
|
81
|
+
)
|
82
|
+
if error:
|
83
|
+
self.logger(error)
|
84
|
+
else:
|
85
|
+
self.logger("Done")
|
86
|
+
filtered_items = [
|
87
|
+
item for item in filtered_items if item.name not in current_files
|
88
|
+
]
|
89
|
+
dataset_id = data["dataset_id"]
|
90
|
+
# upload files
|
91
|
+
if len(filtered_items) == 0:
|
92
|
+
raise Exception("No files to upload")
|
32
93
|
self.logger("The following files will be uploaded:")
|
33
94
|
for item in filtered_items:
|
34
95
|
self.logger(f"{item.name}")
|
35
96
|
for item in filtered_items:
|
36
|
-
data = self.ingest_file(item,
|
97
|
+
data = self.ingest_file(item, dataset_id, logger=self.logger)
|
37
98
|
return self.Outputs(dataset=data)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from ....curation.stac import STACDataFrame
|
3
|
+
import json
|
4
|
+
|
5
|
+
|
6
|
+
class IngestSTAC:
|
7
|
+
def __init__(self, repo, ingest_file, allowed_extensions):
|
8
|
+
self.repo = repo
|
9
|
+
self.ingest_file = ingest_file
|
10
|
+
self.allowed_extensions = allowed_extensions
|
11
|
+
|
12
|
+
class Inputs(BaseModel):
|
13
|
+
stac_catalog: str
|
14
|
+
dataset: str
|
15
|
+
user: dict
|
16
|
+
|
17
|
+
class Outputs(BaseModel):
|
18
|
+
dataset: dict
|
19
|
+
|
20
|
+
def __call__(self, inputs: Inputs) -> Outputs:
|
21
|
+
# load the STAC catalog as a STACsetFrame
|
22
|
+
df = STACDataFrame.from_stac_file(inputs.stac_catalog)
|
23
|
+
# upload all assets to EOTDL
|
24
|
+
for row in df.dropna(subset=["assets"]).iterrows():
|
25
|
+
# for asset in df.assets.dropna().values[:10]:
|
26
|
+
try:
|
27
|
+
for k, v in row[1]["assets"].items():
|
28
|
+
data = self.ingest_file(
|
29
|
+
v["href"],
|
30
|
+
inputs.dataset,
|
31
|
+
allowed_extensions=self.allowed_extensions + [".tif", ".tiff"],
|
32
|
+
)
|
33
|
+
file_url = f"{self.repo.url}datasets/{data['dataset_id']}/download/{data['file_name']}"
|
34
|
+
df.loc[row[0], "assets"][k]["href"] = file_url
|
35
|
+
except Exception as e:
|
36
|
+
break
|
37
|
+
data, error = self.repo.ingest_stac(
|
38
|
+
json.loads(df.to_json()), inputs.dataset, inputs.user["id_token"]
|
39
|
+
)
|
40
|
+
if error:
|
41
|
+
raise Exception(error)
|
42
|
+
return self.Outputs(dataset=data)
|
@@ -6,28 +6,31 @@ eotdl/access/sentinelhub/client.py,sha256=skL3EfVuBLSoOc8gbbsWuAe9UTDYSI1IdpqU8H
|
|
6
6
|
eotdl/access/sentinelhub/utils.py,sha256=4xzAOoxA4A3EmBD1HX7IAbiXl9cMTfYabq0wGkFrx8A,6595
|
7
7
|
eotdl/auth/__init__.py,sha256=gN4x9suYveP1eZr5_7IisdTVy13B-Xnm_t6N_nwoI1o,66
|
8
8
|
eotdl/auth/main.py,sha256=q3-JcDbLJMEBSXa4Y7fofso4iK-b4QjL8QV0Ok7eqG8,857
|
9
|
-
eotdl/cli.py,sha256=
|
9
|
+
eotdl/cli.py,sha256=UETgJnO1sxeLeZW1vDNLy_wtGvwXAsY7f_xjbUMZYXo,192
|
10
10
|
eotdl/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
eotdl/commands/auth.py,sha256=_Kgud59Wp8Jc1Y-o8XZRXrtd_3QfRJ4d7H8HsCg0smg,937
|
12
|
-
eotdl/commands/datasets.py,sha256=
|
12
|
+
eotdl/commands/datasets.py,sha256=vAFNOy2AbU0FwPPO8sJSoh-VsbvaIwzErMLMYES56as,1555
|
13
13
|
eotdl/curation/__init__.py,sha256=wdcnj-u8fwObyjF8LNebwYyUULEfp3m41X1TSjURO6w,270
|
14
14
|
eotdl/curation/formatters.py,sha256=UllCEsWspqzoBOBy4iAz5oSh4AAMva88orFFmE5binA,2929
|
15
15
|
eotdl/curation/metadata.py,sha256=m6mcCr7UI3Mgij3FbhQAHWetXyADJIQoL2JTUas8GoU,1552
|
16
|
-
eotdl/curation/stac/__init__.py,sha256=
|
17
|
-
eotdl/curation/stac/dataframe.py,sha256=
|
16
|
+
eotdl/curation/stac/__init__.py,sha256=YLUrLky6k1pvLciy2FrjmTauWNkJnx0L6U2ZN4VSL1c,199
|
17
|
+
eotdl/curation/stac/dataframe.py,sha256=ewDJoFKYk6fbsOKgHAsoDDF2bnriGC06w6iVuFtXBjo,4801
|
18
|
+
eotdl/curation/stac/dataframe_bck.py,sha256=0sMc00gnYNp58ShEGvXWbBh_5QOSpvKKYA0bKC5fFL4,8495
|
18
19
|
eotdl/curation/stac/extensions.py,sha256=I8SRMn9lyw_0sNZ-FiJ7HYPEGozylgf58gVWBSAW6kI,5231
|
19
20
|
eotdl/curation/stac/parsers.py,sha256=KumL2ieBt0ATGgKoGyc-AJ99zSMeLD4-tI5MF9ruYPw,1380
|
20
21
|
eotdl/curation/stac/stac.py,sha256=COFpXQAvVBpgNf7qVv3DfIalBzdnvyVuDiEDEGgnYsA,17796
|
21
22
|
eotdl/curation/stac/utils.py,sha256=WxC1uRY3BJJh0GFgrU3IAV4-_vwMaUyeRYwUcySfj9k,1855
|
22
|
-
eotdl/datasets/__init__.py,sha256=
|
23
|
+
eotdl/datasets/__init__.py,sha256=KtzMnvAoLOGtXKnTG1EdZuiEPi8TLNjRNkbeq-8iYNY,183
|
23
24
|
eotdl/datasets/download.py,sha256=_48fOUIM2SmT-6f80sc0m3ykvo1LtEBPodSZUE_WQ7c,481
|
24
|
-
eotdl/datasets/ingest.py,sha256=
|
25
|
+
eotdl/datasets/ingest.py,sha256=m71_yX8ZTHDbftqnBqsx1usP90HSayIqj9xDEpliYJM,1306
|
25
26
|
eotdl/datasets/retrieve.py,sha256=sXQq5FEAAx48lf6GkaGg8zyMl4KnTRp8dU6JTUjF_RQ,534
|
26
27
|
eotdl/hello.py,sha256=bkYLbDXy00l9-wcHDRZT9GUhsgJOHLp3N8yfYaGg6mY,74
|
27
28
|
eotdl/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
29
|
eotdl/src/errors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
30
|
eotdl/src/errors/auth.py,sha256=PpnFU2DvnRo8xrM77wgskKi0tfEJ1Rhle4xv2RD1qpk,306
|
30
|
-
eotdl/src/
|
31
|
+
eotdl/src/models/__init__.py,sha256=iFRk4hXJjI2wPT6iLhczfCMiGzJFgqBkM6d9fKiR1VU,31
|
32
|
+
eotdl/src/models/metadata.py,sha256=L23_EziGVSDJ-WZbYUYNN22GrgbCdMGxwJhgK9uzW0U,390
|
33
|
+
eotdl/src/repos/APIRepo.py,sha256=o-rBt_o3I7_aoNkEGTXPjX40BTUhoAUJrdIKa7etUCg,9455
|
31
34
|
eotdl/src/repos/AuthRepo.py,sha256=5Gwj7D4MgZWdvGFgoWPQeI_OM7Gv4XWc9lkD2wMy1k4,987
|
32
35
|
eotdl/src/repos/__init__.py,sha256=kJTtURg8RZ4GSwhFFyul-SX240r25wvLuwxIhyz9kmI,59
|
33
36
|
eotdl/src/usecases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -35,22 +38,23 @@ eotdl/src/usecases/auth/Auth.py,sha256=e36UNqjXSxrkM30lBmEtPd8FZr49Xt_uyTj9yieX7
|
|
35
38
|
eotdl/src/usecases/auth/IsLogged.py,sha256=CTHKct0V1kCx54eQGElkzeDZoWN6CIppfDEFas6ToNI,332
|
36
39
|
eotdl/src/usecases/auth/Logout.py,sha256=ZlCMnaSerqBr6XOCcV858qq8IMTAizx6dta-UB5qoVE,420
|
37
40
|
eotdl/src/usecases/auth/__init__.py,sha256=w9zv66rZDKgwuzETCkvYefs2gcA1VEzvtQXfBt-KOSk,81
|
38
|
-
eotdl/src/usecases/datasets/DownloadDataset.py,sha256=
|
41
|
+
eotdl/src/usecases/datasets/DownloadDataset.py,sha256=aosrRxc0mseGuDVjcNvbRc1tjw7AareDg5nuzQ7ww4s,2920
|
39
42
|
eotdl/src/usecases/datasets/DownloadFile.py,sha256=2PxQoBepX2AsZAPWt-jbdcI0VuYLtWLK_9Y_4cQ9a5U,940
|
40
43
|
eotdl/src/usecases/datasets/IngestDataset.py,sha256=d2H5nPXsEj-LhZpWGwNDSPs9uYNXRO2V07xsTFygQDc,953
|
41
|
-
eotdl/src/usecases/datasets/IngestFile.py,sha256=
|
42
|
-
eotdl/src/usecases/datasets/IngestFolder.py,sha256=
|
44
|
+
eotdl/src/usecases/datasets/IngestFile.py,sha256=BqAtmFi_JsDVNfK3y_zvKGo0elQioV8M8AIKLkZbB74,2136
|
45
|
+
eotdl/src/usecases/datasets/IngestFolder.py,sha256=Y9GUe0i9Tn1kYu9pn_KNf3LbKez_8Xh0pffmimeLUM0,4025
|
43
46
|
eotdl/src/usecases/datasets/IngestLargeDataset.py,sha256=Q4sR2wyRyUsCwgFucKoNPc2SpbmX74IDXROmSwwyT4Q,1428
|
44
47
|
eotdl/src/usecases/datasets/IngestLargeDatasetParallel.py,sha256=egcl54K_Oi0BUSJeIdoQSl8BczBQXY1kCf3SWiLuc6s,1595
|
48
|
+
eotdl/src/usecases/datasets/IngestSTAC.py,sha256=kov4SWr7PplccyiLI7pIR5WL--OpK0hLUHLpsxdfFcA,1518
|
45
49
|
eotdl/src/usecases/datasets/RetrieveDataset.py,sha256=rmoSLllsPx3PCnIxLhR8yiibbk6xS1Pz9yvZ0RxhsHg,421
|
46
50
|
eotdl/src/usecases/datasets/RetrieveDatasets.py,sha256=G5cEs4N6Nq8hZwi9U8Oc5NUvwmQrZdgUUAN7m3iHKWs,455
|
47
|
-
eotdl/src/usecases/datasets/__init__.py,sha256=
|
51
|
+
eotdl/src/usecases/datasets/__init__.py,sha256=vc1eeE1lFoFw5O_OCHKkhk7NuBBWgztcuz_5hwV8roA,338
|
48
52
|
eotdl/src/utils.py,sha256=4IB6N9jRO0chMDNJzpdnFDhC9wcFF9bO5oHq2HodcHw,479
|
49
53
|
eotdl/tools/__init__.py,sha256=pyWj9kl-0p-KSUSZ7BV8BoSYxj7j-OfGKt9NE3qw_3Q,277
|
50
54
|
eotdl/tools/sen12floods/__init__.py,sha256=J3McIaLi_Bp5-EIVfFWHwm0qYx7PtWydCrWwju8xFW0,215
|
51
55
|
eotdl/tools/sen12floods/tools.py,sha256=rPOkwZw_CdecfREUPMrjYCu1jI2OBhk_8PHL2MNTdV8,8124
|
52
56
|
eotdl/tools/stac.py,sha256=s-Js3wkqFIQwbWlr4hNTtkUgX_3Suf4A2eUUQEaE-30,636
|
53
|
-
eotdl-2023.
|
54
|
-
eotdl-2023.
|
55
|
-
eotdl-2023.
|
56
|
-
eotdl-2023.
|
57
|
+
eotdl-2023.7.19.dist-info/entry_points.txt,sha256=s6sfxUfRrSX2IP2UbrzTFTvRCtLgw3_OKcHlOKf_5F8,39
|
58
|
+
eotdl-2023.7.19.dist-info/WHEEL,sha256=gSF7fibx4crkLz_A-IKR6kcuq0jJ64KNCkG8_bcaEao,88
|
59
|
+
eotdl-2023.7.19.dist-info/METADATA,sha256=XxTgd6Vjw3W7W3QSD7FrgbevS0SfSgX-mAcm-Xa_9xk,739
|
60
|
+
eotdl-2023.7.19.dist-info/RECORD,,
|
File without changes
|
File without changes
|