eotdl 2024.5.2__tar.gz → 2024.6.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {eotdl-2024.5.2 → eotdl-2024.6.13}/PKG-INFO +1 -2
  2. eotdl-2024.6.13/eotdl/__init__.py +1 -0
  3. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/models/download.py +33 -49
  4. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/models/ingest.py +68 -4
  5. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/repos/ModelsAPIRepo.py +25 -0
  6. {eotdl-2024.5.2 → eotdl-2024.6.13}/pyproject.toml +1 -1
  7. eotdl-2024.5.2/eotdl/__init__.py +0 -1
  8. {eotdl-2024.5.2 → eotdl-2024.6.13}/README.md +0 -0
  9. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/__init__.py +0 -0
  10. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/airbus/__init__.py +0 -0
  11. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/airbus/client.py +0 -0
  12. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/airbus/parameters.py +0 -0
  13. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/airbus/utils.py +0 -0
  14. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/download.py +0 -0
  15. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/search.py +0 -0
  16. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/sentinelhub/__init__.py +0 -0
  17. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/sentinelhub/client.py +0 -0
  18. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/sentinelhub/evalscripts.py +0 -0
  19. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/sentinelhub/parameters.py +0 -0
  20. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/access/sentinelhub/utils.py +0 -0
  21. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/auth/__init__.py +0 -0
  22. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/auth/auth.py +0 -0
  23. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/auth/errors.py +0 -0
  24. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/auth/is_logged.py +0 -0
  25. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/auth/logout.py +0 -0
  26. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/cli.py +0 -0
  27. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/commands/__init__.py +0 -0
  28. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/commands/auth.py +0 -0
  29. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/commands/datasets.py +0 -0
  30. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/commands/models.py +0 -0
  31. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/__init__.py +0 -0
  32. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/__init__.py +0 -0
  33. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/assets.py +0 -0
  34. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/dataframe.py +0 -0
  35. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/dataframe_bck.py +0 -0
  36. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/dataframe_labeling.py +0 -0
  37. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/__init__.py +0 -0
  38. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/base.py +0 -0
  39. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/dem.py +0 -0
  40. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/eo.py +0 -0
  41. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/label/__init__.py +0 -0
  42. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/label/base.py +0 -0
  43. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/label/image_name_labeler.py +0 -0
  44. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/label/scaneo.py +0 -0
  45. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/ml_dataset.py +0 -0
  46. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/projection.py +0 -0
  47. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/raster.py +0 -0
  48. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extensions/sar.py +0 -0
  49. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/extent.py +0 -0
  50. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/parsers.py +0 -0
  51. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/curation/stac/stac.py +0 -0
  52. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/datasets/__init__.py +0 -0
  53. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/datasets/download.py +0 -0
  54. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/datasets/ingest.py +0 -0
  55. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/datasets/metadata.py +0 -0
  56. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/datasets/retrieve.py +0 -0
  57. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/datasets/update.py +0 -0
  58. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/files/__init__.py +0 -0
  59. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/files/ingest.py +0 -0
  60. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/files/list_files.py +0 -0
  61. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/models/__init__.py +0 -0
  62. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/models/metadata.py +0 -0
  63. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/models/retrieve.py +0 -0
  64. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/models/update.py +0 -0
  65. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/repos/APIRepo.py +0 -0
  66. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/repos/AuthAPIRepo.py +0 -0
  67. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/repos/AuthRepo.py +0 -0
  68. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/repos/DatasetsAPIRepo.py +0 -0
  69. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/repos/FilesAPIRepo.py +0 -0
  70. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/repos/__init__.py +0 -0
  71. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/shared/__init__.py +0 -0
  72. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/shared/checksum.py +0 -0
  73. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/tools/__init__.py +0 -0
  74. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/tools/geo_utils.py +0 -0
  75. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/tools/metadata.py +0 -0
  76. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/tools/paths.py +0 -0
  77. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/tools/stac.py +0 -0
  78. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/tools/time_utils.py +0 -0
  79. {eotdl-2024.5.2 → eotdl-2024.6.13}/eotdl/tools/tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: eotdl
3
- Version: 2024.5.2
3
+ Version: 2024.6.13
4
4
  Summary: Earth Observation Training Data Lab
5
5
  License: MIT
6
6
  Author: EarthPulse
@@ -12,7 +12,6 @@ Classifier: Programming Language :: Python :: 3.8
12
12
  Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
- Classifier: Programming Language :: Python :: 3.12
16
15
  Requires-Dist: black (>=23.10.1,<24.0.0)
17
16
  Requires-Dist: geomet (>=1.0.0,<2.0.0)
18
17
  Requires-Dist: geopandas (>=0.13.2,<0.14.0)
@@ -0,0 +1 @@
1
+ __version__ = "2024.06.13"
@@ -5,8 +5,9 @@ from tqdm import tqdm
5
5
  from ..auth import with_auth
6
6
  from .retrieve import retrieve_model, retrieve_model_files
7
7
  from ..shared import calculate_checksum
8
- from ..repos import FilesAPIRepo
8
+ from ..repos import FilesAPIRepo, ModelsAPIRepo
9
9
  from .metadata import generate_metadata
10
+ from ..curation.stac import STACDataFrame
10
11
 
11
12
 
12
13
  @with_auth
@@ -46,20 +47,6 @@ def download_model(
46
47
  if model["quality"] == 0:
47
48
  if file:
48
49
  raise NotImplementedError("Downloading a specific file is not implemented")
49
- # files = [f for f in model["files"] if f["name"] == file]
50
- # if not files:
51
- # raise Exception(f"File {file} not found")
52
- # if len(files) > 1:
53
- # raise Exception(f"Multiple files with name {file} found")
54
- # dst_path = download(
55
- # model,
56
- # model["id"],
57
- # file,
58
- # files[0]["checksum"],
59
- # download_path,
60
- # user,
61
- # )
62
- # return Outputs(dst_path=dst_path)
63
50
  model_files = retrieve_model_files(model["id"], version)
64
51
  repo = FilesAPIRepo()
65
52
  for file in tqdm(model_files, disable=verbose, unit="file"):
@@ -74,41 +61,38 @@ def download_model(
74
61
  file_version,
75
62
  endpoint="models",
76
63
  )
77
- # if calculate_checksum(dst_path) != checksum:
78
- # logger(f"Checksum for {file} does not match")
64
+ if verbose:
65
+ logger("Generating README.md ...")
66
+ generate_metadata(download_path, model)
79
67
  else:
80
- raise NotImplementedError("Downloading a STAC model is not implemented")
81
- # logger("Downloading STAC metadata...")
82
- # gdf, error = repo.download_stac(
83
- # model["id"],
84
- # user["id_token"],
85
- # )
86
- # if error:
87
- # raise Exception(error)
88
- # df = STACDataFrame(gdf)
89
- # # df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
90
- # path = path
91
- # if path is None:
92
- # path = download_base_path + "/" + model["name"]
93
- # df.to_stac(path)
94
- # # download assets
95
- # if assets:
96
- # logger("Downloading assets...")
97
- # df = df.dropna(subset=["assets"])
98
- # for row in tqdm(df.iterrows(), total=len(df)):
99
- # id = row[1]["stac_id"]
100
- # # print(row[1]["links"])
101
- # for k, v in row[1]["assets"].items():
102
- # href = v["href"]
103
- # repo.download_file_url(
104
- # href, f"{path}/assets/{id}", user["id_token"]
105
- # )
106
- # else:
107
- # logger("To download assets, set assets=True or -a in the CLI.")
108
- # return Outputs(dst_path=path)
109
- if verbose:
110
- logger("Generating README.md ...")
111
- generate_metadata(download_path, model)
68
+ if verbose:
69
+ logger("Downloading STAC metadata...")
70
+ repo = ModelsAPIRepo()
71
+ gdf, error = repo.download_stac(
72
+ model["id"],
73
+ user,
74
+ )
75
+ if error:
76
+ raise Exception(error)
77
+ df = STACDataFrame(gdf)
78
+ # df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
79
+ df.to_stac(download_path)
80
+ # download assets
81
+ if assets:
82
+ if verbose:
83
+ logger("Downloading assets...")
84
+ repo = FilesAPIRepo()
85
+ df = df.dropna(subset=["assets"])
86
+ for row in tqdm(df.iterrows(), total=len(df)):
87
+ for k, v in row[1]["assets"].items():
88
+ href = v["href"]
89
+ _, filename = href.split("/download/")
90
+ # will overwrite assets with same name :(
91
+ repo.download_file_url(
92
+ href, filename, f"{download_path}/assets", user
93
+ )
94
+ else:
95
+ logger("To download assets, set assets=True or -a in the CLI.")
112
96
  if verbose:
113
97
  logger("Done")
114
98
  return download_path
@@ -2,13 +2,16 @@ from pathlib import Path
2
2
  import yaml
3
3
  import frontmatter
4
4
  import markdown
5
+ from tqdm import tqdm
6
+ import json
5
7
 
6
8
  from ..auth import with_auth
7
9
  from .metadata import Metadata, generate_metadata
8
- from ..repos import ModelsAPIRepo
10
+ from ..repos import ModelsAPIRepo, FilesAPIRepo
9
11
  from ..shared import calculate_checksum
10
- from ..files import ingest_files
12
+ from ..files import ingest_files, create_new_version
11
13
  from .update import update_model
14
+ from ..curation.stac import STACDataFrame
12
15
 
13
16
 
14
17
  def ingest_model(
@@ -17,8 +20,8 @@ def ingest_model(
17
20
  path = Path(path)
18
21
  if not path.is_dir():
19
22
  raise Exception("Path must be a folder")
20
- # if "catalog.json" in [f.name for f in path.iterdir()]:
21
- # return ingest_stac(path / "catalog.json", logger)
23
+ if "catalog.json" in [f.name for f in path.iterdir()]:
24
+ return ingest_stac(path / "catalog.json", logger)
22
25
  return ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
23
26
 
24
27
 
@@ -101,3 +104,64 @@ def check_metadata(
101
104
  generate_metadata(str(folder), dataset)
102
105
  return False
103
106
  return False
107
+
108
+
109
+ def retrieve_stac_model(model_name, user):
110
+ repo = ModelsAPIRepo()
111
+ data, error = repo.retrieve_model(model_name)
112
+ # print(data, error)
113
+ if data and data["uid"] != user["uid"]:
114
+ raise Exception("Model already exists.")
115
+ if error and error == "Model doesn't exist":
116
+ # create model
117
+ data, error = repo.create_stac_model(model_name, user)
118
+ # print(data, error)
119
+ if error:
120
+ raise Exception(error)
121
+ data["id"] = data["model_id"]
122
+ return data["id"]
123
+
124
+
125
+ @with_auth
126
+ def ingest_stac(stac_catalog, logger=None, user=None):
127
+ repo, files_repo = ModelsAPIRepo(), FilesAPIRepo()
128
+ # load catalog
129
+ logger("Loading STAC catalog...")
130
+ df = STACDataFrame.from_stac_file(stac_catalog)
131
+ catalog = df[df["type"] == "Catalog"]
132
+ assert len(catalog) == 1, "STAC catalog must have exactly one root catalog"
133
+ dataset_name = catalog.id.iloc[0]
134
+ # retrieve dataset (create if doesn't exist)
135
+ model_id = retrieve_stac_model(dataset_name, user)
136
+ # create new version
137
+ version = create_new_version(repo, model_id, user)
138
+ logger("New version created, version: " + str(version))
139
+ df2 = df.dropna(subset=["assets"])
140
+ for row in tqdm(df2.iterrows(), total=len(df2)):
141
+ try:
142
+ for k, v in row[1]["assets"].items():
143
+ data, error = files_repo.ingest_file(
144
+ v["href"],
145
+ model_id,
146
+ user,
147
+ calculate_checksum(v["href"]), # is always absolute?
148
+ "models",
149
+ version,
150
+ )
151
+ if error:
152
+ raise Exception(error)
153
+ file_url = (
154
+ f"{repo.url}models/{data['model_id']}/download/{data['filename']}"
155
+ )
156
+ df.loc[row[0], "assets"][k]["href"] = file_url
157
+ except Exception as e:
158
+ logger(f"Error uploading asset {row[0]}: {e}")
159
+ break
160
+ # ingest the STAC catalog into geodb
161
+ logger("Ingesting STAC catalog...")
162
+ data, error = repo.ingest_stac(json.loads(df.to_json()), model_id, user)
163
+ if error:
164
+ # TODO: delete all assets that were uploaded
165
+ raise Exception(error)
166
+ logger("Done")
167
+ return
@@ -1,4 +1,5 @@
1
1
  import requests
2
+ import geopandas as gpd
2
3
 
3
4
  from ..repos import APIRepo
4
5
 
@@ -53,3 +54,27 @@ class ModelsAPIRepo(APIRepo):
53
54
  headers=self.generate_headers(user),
54
55
  )
55
56
  return self.format_response(response)
57
+
58
+ def create_stac_model(self, name, user):
59
+ response = requests.post(
60
+ self.url + "models/stac",
61
+ json={"name": name},
62
+ headers=self.generate_headers(user),
63
+ )
64
+ return self.format_response(response)
65
+
66
+ def ingest_stac(self, stac_json, model_id, user):
67
+ response = requests.put(
68
+ self.url + f"models/stac/{model_id}",
69
+ json={"stac": stac_json},
70
+ headers=self.generate_headers(user),
71
+ )
72
+ return self.format_response(response)
73
+
74
+ def download_stac(self, model_id, user):
75
+ url = self.url + "models/" + model_id + "/download"
76
+ headers = self.generate_headers(user)
77
+ response = requests.get(url, headers=headers)
78
+ if response.status_code != 200:
79
+ return None, response.json()["detail"]
80
+ return gpd.GeoDataFrame.from_features(response.json()["features"]), None
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "eotdl"
3
- version = "2024.05.02"
3
+ version = "2024.06.13"
4
4
  description = "Earth Observation Training Data Lab"
5
5
  authors = ["EarthPulse <it@earthpulse.es>"]
6
6
  license = "MIT"
@@ -1 +0,0 @@
1
- __version__ = "2024.05.02"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes