eotdl 2024.10.7__py3-none-any.whl → 2025.3.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/__init__.py +1 -1
- eotdl/access/search.py +0 -2
- eotdl/access/sentinelhub/parameters.py +1 -1
- eotdl/cli.py +2 -2
- eotdl/commands/datasets.py +28 -31
- eotdl/commands/models.py +27 -30
- eotdl/commands/stac.py +57 -0
- eotdl/curation/__init__.py +0 -8
- eotdl/curation/stac/__init__.py +1 -8
- eotdl/curation/stac/api.py +58 -0
- eotdl/curation/stac/stac.py +31 -341
- eotdl/datasets/__init__.py +1 -1
- eotdl/datasets/ingest.py +28 -159
- eotdl/datasets/retrieve.py +0 -9
- eotdl/datasets/stage.py +64 -0
- eotdl/files/__init__.py +0 -2
- eotdl/files/ingest.bck +178 -0
- eotdl/files/ingest.py +229 -164
- eotdl/{datasets → files}/metadata.py +16 -17
- eotdl/models/__init__.py +1 -1
- eotdl/models/ingest.py +28 -159
- eotdl/models/stage.py +60 -0
- eotdl/repos/APIRepo.py +1 -1
- eotdl/repos/DatasetsAPIRepo.py +56 -43
- eotdl/repos/FilesAPIRepo.py +260 -167
- eotdl/repos/STACAPIRepo.py +40 -0
- eotdl/repos/__init__.py +1 -0
- eotdl/tools/geo_utils.py +7 -2
- {eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/METADATA +5 -4
- eotdl-2025.3.25.dist-info/RECORD +65 -0
- {eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/WHEEL +1 -1
- eotdl/curation/stac/assets.py +0 -110
- eotdl/curation/stac/dataframe.py +0 -172
- eotdl/curation/stac/dataframe_bck.py +0 -253
- eotdl/curation/stac/dataframe_labeling.py +0 -63
- eotdl/curation/stac/extensions/__init__.py +0 -23
- eotdl/curation/stac/extensions/base.py +0 -30
- eotdl/curation/stac/extensions/dem.py +0 -18
- eotdl/curation/stac/extensions/eo.py +0 -117
- eotdl/curation/stac/extensions/label/__init__.py +0 -7
- eotdl/curation/stac/extensions/label/base.py +0 -136
- eotdl/curation/stac/extensions/label/image_name_labeler.py +0 -203
- eotdl/curation/stac/extensions/label/scaneo.py +0 -219
- eotdl/curation/stac/extensions/ml_dataset.py +0 -648
- eotdl/curation/stac/extensions/projection.py +0 -44
- eotdl/curation/stac/extensions/raster.py +0 -53
- eotdl/curation/stac/extensions/sar.py +0 -55
- eotdl/curation/stac/extent.py +0 -158
- eotdl/curation/stac/parsers.py +0 -61
- eotdl/datasets/download.py +0 -104
- eotdl/files/list_files.py +0 -13
- eotdl/models/download.py +0 -101
- eotdl/models/metadata.py +0 -43
- eotdl/wrappers/utils.py +0 -35
- eotdl-2024.10.7.dist-info/RECORD +0 -82
- {eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/entry_points.txt +0 -0
eotdl/models/ingest.py
CHANGED
@@ -1,165 +1,34 @@
|
|
1
1
|
from pathlib import Path
|
2
|
-
import yaml
|
3
|
-
import frontmatter
|
4
|
-
from tqdm import tqdm
|
5
|
-
import json
|
6
|
-
|
7
|
-
from ..auth import with_auth
|
8
|
-
from .metadata import Metadata, generate_metadata
|
9
|
-
from ..repos import ModelsAPIRepo, FilesAPIRepo
|
10
|
-
from ..shared import calculate_checksum
|
11
|
-
from ..files import ingest_files, create_new_version
|
12
|
-
from .update import update_model
|
13
|
-
from ..curation.stac import STACDataFrame
|
14
|
-
|
15
|
-
|
16
|
-
def ingest_model(
|
17
|
-
path, verbose=False, logger=print, force_metadata_update=False, sync_metadata=False
|
18
|
-
):
|
19
|
-
path = Path(path)
|
20
|
-
if not path.is_dir():
|
21
|
-
raise Exception("Path must be a folder")
|
22
|
-
if "catalog.json" in [f.name for f in path.iterdir()]:
|
23
|
-
return ingest_stac(path / "catalog.json", logger)
|
24
|
-
return ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
|
25
2
|
|
3
|
+
from ..repos import ModelsAPIRepo
|
4
|
+
from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest
|
26
5
|
|
27
6
|
def retrieve_model(metadata, user):
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
return data
|
41
|
-
|
42
|
-
|
43
|
-
@with_auth
|
44
|
-
def ingest_folder(
|
45
|
-
folder,
|
46
|
-
verbose=False,
|
47
|
-
logger=print,
|
48
|
-
force_metadata_update=False,
|
49
|
-
sync_metadata=False,
|
50
|
-
user=None,
|
51
|
-
):
|
52
|
-
repo = ModelsAPIRepo()
|
53
|
-
# load metadata
|
54
|
-
try:
|
55
|
-
readme = frontmatter.load(folder.joinpath("README.md"))
|
56
|
-
metadata, content = readme.metadata, readme.content
|
57
|
-
metadata = Metadata(**metadata)
|
58
|
-
except FileNotFoundError:
|
59
|
-
# load metadata (legacy)
|
60
|
-
metadata = (
|
61
|
-
yaml.safe_load(open(folder.joinpath("metadata.yml"), "r").read()) or {}
|
62
|
-
)
|
63
|
-
metadata = Metadata(**metadata)
|
64
|
-
content = None
|
65
|
-
except Exception as e:
|
66
|
-
raise Exception(f"Error loading metadata: {e}")
|
67
|
-
# retrieve model (create if doesn't exist)
|
68
|
-
model = retrieve_model(metadata, user)
|
7
|
+
repo = ModelsAPIRepo()
|
8
|
+
data, error = repo.retrieve_model(metadata.name)
|
9
|
+
# print(data, error)
|
10
|
+
if data and data["uid"] != user["uid"]:
|
11
|
+
raise Exception("Model already exists.")
|
12
|
+
if error and error == "Model doesn't exist":
|
13
|
+
# create model
|
14
|
+
data, error = repo.create_model(metadata.dict(), user)
|
15
|
+
# print(data, error)
|
16
|
+
if error:
|
17
|
+
raise Exception(error)
|
18
|
+
return data
|
69
19
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
if update_metadata:
|
77
|
-
update_model(model["id"], metadata, content, user)
|
78
|
-
# ingest files
|
79
|
-
return ingest_files(
|
80
|
-
repo, model["id"], folder, verbose, logger, user, endpoint="models"
|
81
|
-
)
|
82
|
-
|
83
|
-
|
84
|
-
def check_metadata(
|
85
|
-
dataset, metadata, content, force_metadata_update, sync_metadata, folder
|
20
|
+
def ingest_model(
|
21
|
+
path,
|
22
|
+
verbose=False,
|
23
|
+
logger=print,
|
24
|
+
force_metadata_update=False,
|
25
|
+
sync_metadata=False,
|
86
26
|
):
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
if not force_metadata_update and not sync_metadata:
|
96
|
-
raise Exception(
|
97
|
-
"The provided metadata is not consistent with the current metadata. Use -f to force metadata update or -s to sync your local metadata."
|
98
|
-
)
|
99
|
-
if force_metadata_update:
|
100
|
-
return True
|
101
|
-
if sync_metadata:
|
102
|
-
generate_metadata(str(folder), dataset)
|
103
|
-
return False
|
104
|
-
return False
|
105
|
-
|
106
|
-
|
107
|
-
def retrieve_stac_model(model_name, user):
|
108
|
-
repo = ModelsAPIRepo()
|
109
|
-
data, error = repo.retrieve_model(model_name)
|
110
|
-
# print(data, error)
|
111
|
-
if data and data["uid"] != user["uid"]:
|
112
|
-
raise Exception("Model already exists.")
|
113
|
-
if error and error == "Model doesn't exist":
|
114
|
-
# create model
|
115
|
-
data, error = repo.create_stac_model(model_name, user)
|
116
|
-
# print(data, error)
|
117
|
-
if error:
|
118
|
-
raise Exception(error)
|
119
|
-
data["id"] = data["model_id"]
|
120
|
-
return data["id"]
|
121
|
-
|
122
|
-
|
123
|
-
@with_auth
|
124
|
-
def ingest_stac(stac_catalog, logger=None, user=None):
|
125
|
-
repo, files_repo = ModelsAPIRepo(), FilesAPIRepo()
|
126
|
-
# load catalog
|
127
|
-
logger("Loading STAC catalog...")
|
128
|
-
df = STACDataFrame.from_stac_file(stac_catalog)
|
129
|
-
catalog = df[df["type"] == "Catalog"]
|
130
|
-
assert len(catalog) == 1, "STAC catalog must have exactly one root catalog"
|
131
|
-
dataset_name = catalog.id.iloc[0]
|
132
|
-
# retrieve dataset (create if doesn't exist)
|
133
|
-
model_id = retrieve_stac_model(dataset_name, user)
|
134
|
-
# create new version
|
135
|
-
version = create_new_version(repo, model_id, user)
|
136
|
-
logger("New version created, version: " + str(version))
|
137
|
-
df2 = df.dropna(subset=["assets"])
|
138
|
-
for row in tqdm(df2.iterrows(), total=len(df2)):
|
139
|
-
try:
|
140
|
-
for k, v in row[1]["assets"].items():
|
141
|
-
data, error = files_repo.ingest_file(
|
142
|
-
v["href"],
|
143
|
-
model_id,
|
144
|
-
user,
|
145
|
-
calculate_checksum(v["href"]), # is always absolute?
|
146
|
-
"models",
|
147
|
-
version,
|
148
|
-
)
|
149
|
-
if error:
|
150
|
-
raise Exception(error)
|
151
|
-
file_url = (
|
152
|
-
f"{repo.url}models/{data['model_id']}/download/{data['filename']}"
|
153
|
-
)
|
154
|
-
df.loc[row[0], "assets"][k]["href"] = file_url
|
155
|
-
except Exception as e:
|
156
|
-
logger(f"Error uploading asset {row[0]}: {e}")
|
157
|
-
break
|
158
|
-
# ingest the STAC catalog into geodb
|
159
|
-
logger("Ingesting STAC catalog...")
|
160
|
-
data, error = repo.ingest_stac(json.loads(df.to_json()), model_id, user)
|
161
|
-
if error:
|
162
|
-
# TODO: delete all assets that were uploaded
|
163
|
-
raise Exception(error)
|
164
|
-
logger("Done")
|
165
|
-
return
|
27
|
+
path = Path(path)
|
28
|
+
if not path.is_dir():
|
29
|
+
raise Exception("Path must be a folder")
|
30
|
+
if "catalog.json" in [f.name for f in path.iterdir()]:
|
31
|
+
prep_ingest_stac(path, logger)
|
32
|
+
else:
|
33
|
+
prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
|
34
|
+
return ingest(path, ModelsAPIRepo(), retrieve_model, 'models')
|
eotdl/models/stage.py
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
from tqdm import tqdm
|
4
|
+
import geopandas as gpd
|
5
|
+
|
6
|
+
from ..auth import with_auth
|
7
|
+
from .retrieve import retrieve_model
|
8
|
+
from ..repos import FilesAPIRepo
|
9
|
+
|
10
|
+
@with_auth
|
11
|
+
def stage_model(
|
12
|
+
model_name,
|
13
|
+
version=None,
|
14
|
+
path=None,
|
15
|
+
logger=print,
|
16
|
+
assets=False,
|
17
|
+
force=False,
|
18
|
+
verbose=False,
|
19
|
+
user=None,
|
20
|
+
file=None,
|
21
|
+
):
|
22
|
+
model = retrieve_model(model_name)
|
23
|
+
if version is None:
|
24
|
+
version = sorted([v['version_id'] for v in model["versions"]])[-1]
|
25
|
+
else:
|
26
|
+
assert version in [
|
27
|
+
v["version_id"] for v in model["versions"]
|
28
|
+
], f"Version {version} not found"
|
29
|
+
download_base_path = os.getenv(
|
30
|
+
"EOTDL_DOWNLOAD_PATH", str(Path.home()) + "/.cache/eotdl/models"
|
31
|
+
)
|
32
|
+
if path is None:
|
33
|
+
download_path = download_base_path + "/" + model_name
|
34
|
+
else:
|
35
|
+
download_path = path + "/" + model_name
|
36
|
+
# check if model already exists
|
37
|
+
if os.path.exists(download_path) and not force:
|
38
|
+
os.makedirs(download_path, exist_ok=True)
|
39
|
+
# raise Exception(
|
40
|
+
# f"model `{model['name']} v{str(version)}` already exists at {download_path}. To force download, use force=True or -f in the CLI."
|
41
|
+
# )
|
42
|
+
|
43
|
+
# stage metadata
|
44
|
+
repo = FilesAPIRepo()
|
45
|
+
catalog_path = repo.stage_file(model["id"], f"catalog.v{version}.parquet", user, download_path)
|
46
|
+
|
47
|
+
# TODO: stage README.md
|
48
|
+
|
49
|
+
if assets:
|
50
|
+
gdf = gpd.read_parquet(catalog_path)
|
51
|
+
for _, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Staging assets"):
|
52
|
+
for k, v in row["assets"].items():
|
53
|
+
stage_model_file(v["href"], download_path)
|
54
|
+
|
55
|
+
return download_path
|
56
|
+
|
57
|
+
@with_auth
|
58
|
+
def stage_model_file(file_url, path, user):
|
59
|
+
repo = FilesAPIRepo()
|
60
|
+
return repo.stage_file_url(file_url, path, user)
|
eotdl/repos/APIRepo.py
CHANGED
@@ -5,7 +5,7 @@ import requests
|
|
5
5
|
class APIRepo:
|
6
6
|
def __init__(self, url=None):
|
7
7
|
default_url = "https://api.eotdl.com/"
|
8
|
-
# default_url = "http://localhost:
|
8
|
+
# default_url = "http://localhost:8000/"
|
9
9
|
self.url = url if url else os.getenv("EOTDL_API_URL", default_url)
|
10
10
|
|
11
11
|
def format_response(self, response):
|
eotdl/repos/DatasetsAPIRepo.py
CHANGED
@@ -19,7 +19,15 @@ class DatasetsAPIRepo(APIRepo):
|
|
19
19
|
url += "&limit=" + str(limit)
|
20
20
|
response = requests.get(url)
|
21
21
|
return self.format_response(response)
|
22
|
-
|
22
|
+
|
23
|
+
def retrieve_dataset(self, name):
|
24
|
+
response = requests.get(self.url + "datasets?name=" + name)
|
25
|
+
return self.format_response(response)
|
26
|
+
|
27
|
+
def get_dataset_by_id(self, dataset_id):
|
28
|
+
response = requests.get(self.url + "datasets/" + dataset_id)
|
29
|
+
return self.format_response(response)
|
30
|
+
|
23
31
|
def create_dataset(self, metadata, user):
|
24
32
|
response = requests.post(
|
25
33
|
self.url + "datasets",
|
@@ -28,53 +36,58 @@ class DatasetsAPIRepo(APIRepo):
|
|
28
36
|
)
|
29
37
|
return self.format_response(response)
|
30
38
|
|
31
|
-
def
|
32
|
-
response = requests.get(self.url + "datasets?name=" + name)
|
33
|
-
return self.format_response(response)
|
34
|
-
|
35
|
-
def create_version(self, dataset_id, user):
|
39
|
+
def complete_ingestion(self, dataset_id, version, size, user):
|
36
40
|
response = requests.post(
|
37
|
-
self.url + "datasets/
|
41
|
+
self.url + "datasets/complete/" + dataset_id,
|
42
|
+
json={"version": version, "size": size},
|
38
43
|
headers=self.generate_headers(user),
|
39
44
|
)
|
40
45
|
return self.format_response(response)
|
41
46
|
|
42
|
-
def create_stac_dataset(self, name, user):
|
43
|
-
response = requests.post(
|
44
|
-
self.url + "datasets/stac",
|
45
|
-
json={"name": name},
|
46
|
-
headers=self.generate_headers(user),
|
47
|
-
)
|
48
|
-
return self.format_response(response)
|
49
47
|
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
return self.format_response(response)
|
48
|
+
# def create_version(self, dataset_id, user):
|
49
|
+
# response = requests.post(
|
50
|
+
# self.url + "datasets/version/" + dataset_id,
|
51
|
+
# headers=self.generate_headers(user),
|
52
|
+
# )
|
53
|
+
# return self.format_response(response)
|
57
54
|
|
58
|
-
def
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
55
|
+
# def create_stac_dataset(self, name, user):
|
56
|
+
# response = requests.post(
|
57
|
+
# self.url + "datasets/stac",
|
58
|
+
# json={"name": name},
|
59
|
+
# headers=self.generate_headers(user),
|
60
|
+
# )
|
61
|
+
# return self.format_response(response)
|
65
62
|
|
66
|
-
def
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
63
|
+
# def ingest_stac(self, stac_json, dataset_id, user):
|
64
|
+
# response = requests.put(
|
65
|
+
# self.url + f"datasets/stac/{dataset_id}",
|
66
|
+
# json={"stac": stac_json},
|
67
|
+
# headers=self.generate_headers(user),
|
68
|
+
# )
|
69
|
+
# return self.format_response(response)
|
70
|
+
|
71
|
+
# def download_stac(self, dataset_id, user):
|
72
|
+
# url = self.url + "datasets/" + dataset_id + "/download"
|
73
|
+
# headers = self.generate_headers(user)
|
74
|
+
# response = requests.get(url, headers=headers)
|
75
|
+
# if response.status_code != 200:
|
76
|
+
# return None, response.json()["detail"]
|
77
|
+
# return gpd.GeoDataFrame.from_features(response.json()["features"]), None
|
78
|
+
|
79
|
+
# def update_dataset(
|
80
|
+
# self, dataset_id, authors, source, license, thumbnail, content, user
|
81
|
+
# ):
|
82
|
+
# response = requests.put(
|
83
|
+
# self.url + f"datasets/{dataset_id}",
|
84
|
+
# json={
|
85
|
+
# "authors": authors,
|
86
|
+
# "source": source,
|
87
|
+
# "license": license,
|
88
|
+
# "thumbnail": thumbnail,
|
89
|
+
# "description": content,
|
90
|
+
# },
|
91
|
+
# headers=self.generate_headers(user),
|
92
|
+
# )
|
93
|
+
# return self.format_response(response)
|