eotdl 2024.5.2__py3-none-any.whl → 2024.6.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eotdl/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2024.05.02"
1
+ __version__ = "2024.06.13"
eotdl/models/download.py CHANGED
@@ -5,8 +5,9 @@ from tqdm import tqdm
5
5
  from ..auth import with_auth
6
6
  from .retrieve import retrieve_model, retrieve_model_files
7
7
  from ..shared import calculate_checksum
8
- from ..repos import FilesAPIRepo
8
+ from ..repos import FilesAPIRepo, ModelsAPIRepo
9
9
  from .metadata import generate_metadata
10
+ from ..curation.stac import STACDataFrame
10
11
 
11
12
 
12
13
  @with_auth
@@ -46,20 +47,6 @@ def download_model(
46
47
  if model["quality"] == 0:
47
48
  if file:
48
49
  raise NotImplementedError("Downloading a specific file is not implemented")
49
- # files = [f for f in model["files"] if f["name"] == file]
50
- # if not files:
51
- # raise Exception(f"File {file} not found")
52
- # if len(files) > 1:
53
- # raise Exception(f"Multiple files with name {file} found")
54
- # dst_path = download(
55
- # model,
56
- # model["id"],
57
- # file,
58
- # files[0]["checksum"],
59
- # download_path,
60
- # user,
61
- # )
62
- # return Outputs(dst_path=dst_path)
63
50
  model_files = retrieve_model_files(model["id"], version)
64
51
  repo = FilesAPIRepo()
65
52
  for file in tqdm(model_files, disable=verbose, unit="file"):
@@ -74,41 +61,38 @@ def download_model(
74
61
  file_version,
75
62
  endpoint="models",
76
63
  )
77
- # if calculate_checksum(dst_path) != checksum:
78
- # logger(f"Checksum for {file} does not match")
64
+ if verbose:
65
+ logger("Generating README.md ...")
66
+ generate_metadata(download_path, model)
79
67
  else:
80
- raise NotImplementedError("Downloading a STAC model is not implemented")
81
- # logger("Downloading STAC metadata...")
82
- # gdf, error = repo.download_stac(
83
- # model["id"],
84
- # user["id_token"],
85
- # )
86
- # if error:
87
- # raise Exception(error)
88
- # df = STACDataFrame(gdf)
89
- # # df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
90
- # path = path
91
- # if path is None:
92
- # path = download_base_path + "/" + model["name"]
93
- # df.to_stac(path)
94
- # # download assets
95
- # if assets:
96
- # logger("Downloading assets...")
97
- # df = df.dropna(subset=["assets"])
98
- # for row in tqdm(df.iterrows(), total=len(df)):
99
- # id = row[1]["stac_id"]
100
- # # print(row[1]["links"])
101
- # for k, v in row[1]["assets"].items():
102
- # href = v["href"]
103
- # repo.download_file_url(
104
- # href, f"{path}/assets/{id}", user["id_token"]
105
- # )
106
- # else:
107
- # logger("To download assets, set assets=True or -a in the CLI.")
108
- # return Outputs(dst_path=path)
109
- if verbose:
110
- logger("Generating README.md ...")
111
- generate_metadata(download_path, model)
68
+ if verbose:
69
+ logger("Downloading STAC metadata...")
70
+ repo = ModelsAPIRepo()
71
+ gdf, error = repo.download_stac(
72
+ model["id"],
73
+ user,
74
+ )
75
+ if error:
76
+ raise Exception(error)
77
+ df = STACDataFrame(gdf)
78
+ # df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
79
+ df.to_stac(download_path)
80
+ # download assets
81
+ if assets:
82
+ if verbose:
83
+ logger("Downloading assets...")
84
+ repo = FilesAPIRepo()
85
+ df = df.dropna(subset=["assets"])
86
+ for row in tqdm(df.iterrows(), total=len(df)):
87
+ for k, v in row[1]["assets"].items():
88
+ href = v["href"]
89
+ _, filename = href.split("/download/")
90
+ # will overwrite assets with same name :(
91
+ repo.download_file_url(
92
+ href, filename, f"{download_path}/assets", user
93
+ )
94
+ else:
95
+ logger("To download assets, set assets=True or -a in the CLI.")
112
96
  if verbose:
113
97
  logger("Done")
114
98
  return download_path
eotdl/models/ingest.py CHANGED
@@ -2,13 +2,16 @@ from pathlib import Path
2
2
  import yaml
3
3
  import frontmatter
4
4
  import markdown
5
+ from tqdm import tqdm
6
+ import json
5
7
 
6
8
  from ..auth import with_auth
7
9
  from .metadata import Metadata, generate_metadata
8
- from ..repos import ModelsAPIRepo
10
+ from ..repos import ModelsAPIRepo, FilesAPIRepo
9
11
  from ..shared import calculate_checksum
10
- from ..files import ingest_files
12
+ from ..files import ingest_files, create_new_version
11
13
  from .update import update_model
14
+ from ..curation.stac import STACDataFrame
12
15
 
13
16
 
14
17
  def ingest_model(
@@ -17,8 +20,8 @@ def ingest_model(
17
20
  path = Path(path)
18
21
  if not path.is_dir():
19
22
  raise Exception("Path must be a folder")
20
- # if "catalog.json" in [f.name for f in path.iterdir()]:
21
- # return ingest_stac(path / "catalog.json", logger)
23
+ if "catalog.json" in [f.name for f in path.iterdir()]:
24
+ return ingest_stac(path / "catalog.json", logger)
22
25
  return ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
23
26
 
24
27
 
@@ -101,3 +104,64 @@ def check_metadata(
101
104
  generate_metadata(str(folder), dataset)
102
105
  return False
103
106
  return False
107
+
108
+
109
+ def retrieve_stac_model(model_name, user):
110
+ repo = ModelsAPIRepo()
111
+ data, error = repo.retrieve_model(model_name)
112
+ # print(data, error)
113
+ if data and data["uid"] != user["uid"]:
114
+ raise Exception("Model already exists.")
115
+ if error and error == "Model doesn't exist":
116
+ # create model
117
+ data, error = repo.create_stac_model(model_name, user)
118
+ # print(data, error)
119
+ if error:
120
+ raise Exception(error)
121
+ data["id"] = data["model_id"]
122
+ return data["id"]
123
+
124
+
125
+ @with_auth
126
+ def ingest_stac(stac_catalog, logger=None, user=None):
127
+ repo, files_repo = ModelsAPIRepo(), FilesAPIRepo()
128
+ # load catalog
129
+ logger("Loading STAC catalog...")
130
+ df = STACDataFrame.from_stac_file(stac_catalog)
131
+ catalog = df[df["type"] == "Catalog"]
132
+ assert len(catalog) == 1, "STAC catalog must have exactly one root catalog"
133
+ dataset_name = catalog.id.iloc[0]
134
+ # retrieve dataset (create if doesn't exist)
135
+ model_id = retrieve_stac_model(dataset_name, user)
136
+ # create new version
137
+ version = create_new_version(repo, model_id, user)
138
+ logger("New version created, version: " + str(version))
139
+ df2 = df.dropna(subset=["assets"])
140
+ for row in tqdm(df2.iterrows(), total=len(df2)):
141
+ try:
142
+ for k, v in row[1]["assets"].items():
143
+ data, error = files_repo.ingest_file(
144
+ v["href"],
145
+ model_id,
146
+ user,
147
+ calculate_checksum(v["href"]), # is always absolute?
148
+ "models",
149
+ version,
150
+ )
151
+ if error:
152
+ raise Exception(error)
153
+ file_url = (
154
+ f"{repo.url}models/{data['model_id']}/download/{data['filename']}"
155
+ )
156
+ df.loc[row[0], "assets"][k]["href"] = file_url
157
+ except Exception as e:
158
+ logger(f"Error uploading asset {row[0]}: {e}")
159
+ break
160
+ # ingest the STAC catalog into geodb
161
+ logger("Ingesting STAC catalog...")
162
+ data, error = repo.ingest_stac(json.loads(df.to_json()), model_id, user)
163
+ if error:
164
+ # TODO: delete all assets that were uploaded
165
+ raise Exception(error)
166
+ logger("Done")
167
+ return
@@ -1,4 +1,5 @@
1
1
  import requests
2
+ import geopandas as gpd
2
3
 
3
4
  from ..repos import APIRepo
4
5
 
@@ -53,3 +54,27 @@ class ModelsAPIRepo(APIRepo):
53
54
  headers=self.generate_headers(user),
54
55
  )
55
56
  return self.format_response(response)
57
+
58
+ def create_stac_model(self, name, user):
59
+ response = requests.post(
60
+ self.url + "models/stac",
61
+ json={"name": name},
62
+ headers=self.generate_headers(user),
63
+ )
64
+ return self.format_response(response)
65
+
66
+ def ingest_stac(self, stac_json, model_id, user):
67
+ response = requests.put(
68
+ self.url + f"models/stac/{model_id}",
69
+ json={"stac": stac_json},
70
+ headers=self.generate_headers(user),
71
+ )
72
+ return self.format_response(response)
73
+
74
+ def download_stac(self, model_id, user):
75
+ url = self.url + "models/" + model_id + "/download"
76
+ headers = self.generate_headers(user)
77
+ response = requests.get(url, headers=headers)
78
+ if response.status_code != 200:
79
+ return None, response.json()["detail"]
80
+ return gpd.GeoDataFrame.from_features(response.json()["features"]), None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: eotdl
3
- Version: 2024.5.2
3
+ Version: 2024.6.13
4
4
  Summary: Earth Observation Training Data Lab
5
5
  License: MIT
6
6
  Author: EarthPulse
@@ -12,7 +12,6 @@ Classifier: Programming Language :: Python :: 3.8
12
12
  Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
- Classifier: Programming Language :: Python :: 3.12
16
15
  Requires-Dist: black (>=23.10.1,<24.0.0)
17
16
  Requires-Dist: geomet (>=1.0.0,<2.0.0)
18
17
  Requires-Dist: geopandas (>=0.13.2,<0.14.0)
@@ -1,4 +1,4 @@
1
- eotdl/__init__.py,sha256=xrNIo8ojW99vbCIfmq4FSOCpbcqkPd11zXtBDaGUxXA,27
1
+ eotdl/__init__.py,sha256=ThfWXF_wVGcoWrP25NDPXeCmuGEi8P8FKQkVKe_JZZA,27
2
2
  eotdl/access/__init__.py,sha256=jbyjD7BRGJURlTNmtcbBBhw3Xk4EiZvkqmEykM-bJ1k,231
3
3
  eotdl/access/airbus/__init__.py,sha256=G_kkRS9eFjXbQ-aehmTLXeAxh7zpAxz_rgB7J_w0NRg,107
4
4
  eotdl/access/airbus/client.py,sha256=zjfgB_NTsCCIszoQesYkyLJgheKg-eTh28vbleXYxfw,12018
@@ -52,8 +52,8 @@ eotdl/files/__init__.py,sha256=2zfAxgLHmU_jWq_3emnfPXsX-R20gSt-yZX0bPa9h0g,87
52
52
  eotdl/files/ingest.py,sha256=dgjZfd-ACCKradDo2B02CPahwEhFtWvnKvTm372K5eo,6185
53
53
  eotdl/files/list_files.py,sha256=k4OgdbQ7u6tUEE9nJZGXw5s5HtvG0ZApOVTy0KbfTqs,519
54
54
  eotdl/models/__init__.py,sha256=-PvGWG0iSRNBqeFWpxol12dYw-QodXjR81n-JX3x6zI,146
55
- eotdl/models/download.py,sha256=uXWUslaBkJ222BfctbO0eKNcw6Unx1-Pa9Yv5RHXJIo,4119
56
- eotdl/models/ingest.py,sha256=KmBFUS1pkUYdegfi0lL1sd__lj9GMoJ7jxvnVXfY8Ts,3381
55
+ eotdl/models/download.py,sha256=4dgxE9ytT8QqiCyx1r19vL5UASfttUN_mCPKhFwMTfs,3410
56
+ eotdl/models/ingest.py,sha256=8xhGlsADi5dZSNbph4WWMk0cs2J_StxNCYRkSjMhUtg,5747
57
57
  eotdl/models/metadata.py,sha256=S5bpIB4e2pivDnReszJKC3bYBZcaHu-KMYOc3AwHbQ4,1443
58
58
  eotdl/models/retrieve.py,sha256=-Ij7dT4J1p7MW4n13OlPB9OW4tBaBXPwk9dW8IuCZPc,664
59
59
  eotdl/models/update.py,sha256=4FWeD95cXvRpefRjw3Foqb30e30otxqWUZ6nQM9cbmM,374
@@ -62,7 +62,7 @@ eotdl/repos/AuthAPIRepo.py,sha256=vYCqFawe3xUm2cx4SqVXCvzl8J_sr9rs_MkipYC0bXE,95
62
62
  eotdl/repos/AuthRepo.py,sha256=jpzzhINCcDZHRCyrPDsp49h17IlXp2HvX3BB3f5cnb4,1154
63
63
  eotdl/repos/DatasetsAPIRepo.py,sha256=rKqSe-UjJOlD0Kbypu4Gs5kx2mzUD7TY05gv2vgUTv4,2660
64
64
  eotdl/repos/FilesAPIRepo.py,sha256=cS6CFTkSYIXcefHEeLt7I69_EyyO-jhoAwQ7UWYlLkc,7171
65
- eotdl/repos/ModelsAPIRepo.py,sha256=7-0c-3OCTCwKUZ-Z7PWtbRusUxpTU7m2H5jIAcFoJ9M,1677
65
+ eotdl/repos/ModelsAPIRepo.py,sha256=79euf5WsfUxG5KSIGhKT8T7kSl-NtISwxvqHnck-bq0,2616
66
66
  eotdl/repos/__init__.py,sha256=WvX5TP49k7yYb5dWWNjv5kzbdluO3dJ4LqjQxRIOUVc,222
67
67
  eotdl/shared/__init__.py,sha256=mF7doJC8Z5eTPmB01UQvPivThZac32DRY33T6qshXfg,41
68
68
  eotdl/shared/checksum.py,sha256=4IB6N9jRO0chMDNJzpdnFDhC9wcFF9bO5oHq2HodcHw,479
@@ -73,7 +73,7 @@ eotdl/tools/paths.py,sha256=yWhOtVxX4NxrDrrBX2fuye5N1mAqrxXFy_eA7dffd84,1152
73
73
  eotdl/tools/stac.py,sha256=ovXdrPm4Sn9AAJmrP88WnxDmq2Ut-xPoscjphxz3Iyo,5763
74
74
  eotdl/tools/time_utils.py,sha256=qJ3-rk1I7ne722SLfAP6-59kahQ0vLQqIf9VpOi0Kpg,4691
75
75
  eotdl/tools/tools.py,sha256=Tl4_v2ejkQo_zyZek8oofJwoYcdVosdOwW1C0lvWaNM,6354
76
- eotdl-2024.5.2.dist-info/METADATA,sha256=O_RtJWIg7MvaJ3PeX4ZPIpQHuxUdSSZvdtK6qWb8ciY,4102
77
- eotdl-2024.5.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
78
- eotdl-2024.5.2.dist-info/entry_points.txt,sha256=s6sfxUfRrSX2IP2UbrzTFTvRCtLgw3_OKcHlOKf_5F8,39
79
- eotdl-2024.5.2.dist-info/RECORD,,
76
+ eotdl-2024.6.13.dist-info/METADATA,sha256=cZRWhvu3uUpHBRQ3D-6hacu4TDy2iU2K48SL7ORBQOw,4052
77
+ eotdl-2024.6.13.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
78
+ eotdl-2024.6.13.dist-info/entry_points.txt,sha256=s6sfxUfRrSX2IP2UbrzTFTvRCtLgw3_OKcHlOKf_5F8,39
79
+ eotdl-2024.6.13.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 1.6.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any