eotdl 2023.10.25.post10__py3-none-any.whl → 2023.11.2.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. eotdl/__init__.py +1 -1
  2. eotdl/cli.py +6 -2
  3. eotdl/commands/auth.py +18 -1
  4. eotdl/commands/datasets.py +61 -11
  5. eotdl/commands/models.py +108 -0
  6. eotdl/curation/__init__.py +1 -4
  7. eotdl/curation/stac/assets.py +2 -1
  8. eotdl/curation/stac/dataframe.py +1 -1
  9. eotdl/curation/stac/extensions/label/image_name_labeler.py +6 -5
  10. eotdl/curation/stac/extensions/ml_dataset.py +15 -25
  11. eotdl/curation/stac/extent.py +1 -1
  12. eotdl/curation/stac/stac.py +1 -1
  13. eotdl/datasets/download.py +5 -4
  14. eotdl/datasets/ingest.py +25 -154
  15. eotdl/datasets/retrieve.py +1 -1
  16. eotdl/files/__init__.py +1 -0
  17. eotdl/files/ingest.py +175 -0
  18. eotdl/models/__init__.py +3 -0
  19. eotdl/models/download.py +119 -0
  20. eotdl/models/ingest.py +47 -0
  21. eotdl/models/metadata.py +16 -0
  22. eotdl/models/retrieve.py +26 -0
  23. eotdl/repos/FilesAPIRepo.py +136 -95
  24. eotdl/repos/ModelsAPIRepo.py +40 -0
  25. eotdl/repos/__init__.py +1 -0
  26. eotdl/shared/__init__.py +1 -0
  27. eotdl/tools/__init__.py +5 -6
  28. eotdl/tools/geo_utils.py +15 -1
  29. eotdl/tools/stac.py +144 -8
  30. eotdl/tools/time_utils.py +19 -6
  31. eotdl/tools/tools.py +2 -3
  32. {eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/METADATA +1 -1
  33. {eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/RECORD +38 -35
  34. eotdl/curation/folder_formatters/__init__.py +0 -1
  35. eotdl/curation/folder_formatters/base.py +0 -19
  36. eotdl/curation/folder_formatters/sentinel_hub.py +0 -135
  37. eotdl/curation/stac/utils/__init__.py +0 -5
  38. eotdl/curation/stac/utils/geometry.py +0 -22
  39. eotdl/curation/stac/utils/stac.py +0 -143
  40. eotdl/curation/stac/utils/time.py +0 -21
  41. /eotdl/{datasets/utils.py → shared/checksum.py} +0 -0
  42. /eotdl/{curation/stac/utils → tools}/metadata.py +0 -0
  43. /eotdl/{curation/stac/utils → tools}/paths.py +0 -0
  44. {eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/WHEEL +0 -0
  45. {eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import requests
2
2
  import os
3
3
  from tqdm import tqdm
4
+ import hashlib
4
5
 
5
6
  from ..repos import APIRepo
6
7
 
@@ -9,52 +10,85 @@ class FilesAPIRepo(APIRepo):
9
10
  def __init__(self, url=None):
10
11
  super().__init__(url)
11
12
 
12
- def ingest_file(self, file, dataset_id, version, parent, id_token, checksum=None):
13
+ def ingest_files_batch(
14
+ self,
15
+ batch, # ziped batch of files
16
+ checksums,
17
+ dataset_or_model_id,
18
+ id_token,
19
+ endpoint,
20
+ version=None,
21
+ ):
22
+ url = self.url + f"{endpoint}/{dataset_or_model_id}"
23
+ if version is not None:
24
+ url += "?version=" + str(version)
13
25
  reponse = requests.post(
14
- self.url + "datasets/" + dataset_id,
15
- files={"file": open(file, "rb")},
16
- data={"checksum": checksum, "version": version, "parent": parent}
17
- if checksum
18
- else None,
26
+ url,
27
+ files={"batch": ("batch.zip", batch)},
28
+ data={"checksums": checksums},
19
29
  headers={"Authorization": "Bearer " + id_token},
20
30
  )
21
31
  return self.format_response(reponse)
22
32
 
23
- def ingest_existing_file(
33
+ def add_files_batch_to_version(
24
34
  self,
25
- filename,
26
- dataset_id,
35
+ batch,
36
+ dataset_or_model_id,
27
37
  version,
28
- file_version,
29
38
  id_token,
30
- checksum=None,
39
+ endpoint,
31
40
  ):
32
41
  reponse = requests.post(
33
- self.url + "datasets/" + dataset_id,
42
+ self.url + f"{endpoint}/{dataset_or_model_id}/files?version={str(version)}",
34
43
  data={
35
- "checksum": checksum,
36
- "version": version,
37
- "filename": filename,
38
- "fileversion": file_version,
39
- }
44
+ "filenames": [f["path"] for f in batch],
45
+ "checksums": [f["checksum"] for f in batch],
46
+ },
47
+ headers={"Authorization": "Bearer " + id_token},
48
+ )
49
+ return self.format_response(reponse)
50
+
51
+ def ingest_file(
52
+ self,
53
+ file,
54
+ dataset_or_model_id,
55
+ version,
56
+ parent,
57
+ id_token,
58
+ checksum,
59
+ endpoint,
60
+ ):
61
+ reponse = requests.post(
62
+ self.url + f"{endpoint}/{dataset_or_model_id}",
63
+ files={"file": open(file, "rb")},
64
+ data={"checksum": checksum, "version": version, "parent": parent}
40
65
  if checksum
41
66
  else None,
42
67
  headers={"Authorization": "Bearer " + id_token},
43
68
  )
44
69
  return self.format_response(reponse)
45
70
 
46
- def retrieve_dataset_files(self, dataset_id, version=None):
47
- url = self.url + "datasets/" + dataset_id + "/files"
71
+ def retrieve_files(self, dataset_or_model_id, endpoint, version=None):
72
+ url = f"{self.url}{endpoint}/{dataset_or_model_id}/files"
48
73
  if version is not None:
49
74
  url += "?version=" + str(version)
50
75
  response = requests.get(url)
51
76
  return self.format_response(response)
52
77
 
53
- def download_file(self, dataset_id, file_name, id_token, path, file_version):
54
- url = self.url + "datasets/" + dataset_id + "/download/" + file_name
78
+ def download_file(
79
+ self,
80
+ dataset_or_model_id,
81
+ file_name,
82
+ id_token,
83
+ path,
84
+ file_version,
85
+ endpoint="datasets",
86
+ progress=False,
87
+ ):
88
+ url = self.url + f"{endpoint}/{dataset_or_model_id}/download/{file_name}"
55
89
  if file_version is not None:
56
90
  url += "?version=" + str(file_version)
57
- return self.download_file_url(url, file_name, path, id_token)
91
+ return self.download_file_url(url, file_name, path, id_token, progress=progress)
58
92
 
59
93
  def download_file_url(self, url, filename, path, id_token, progress=False):
60
94
  headers = {"Authorization": "Bearer " + id_token}
@@ -66,9 +100,14 @@ class FilesAPIRepo(APIRepo):
66
100
  r.raise_for_status()
67
101
  total_size = int(r.headers.get("content-length", 0))
68
102
  block_size = 1024 * 1024 * 10
103
+ progress = progress and total_size > 1024 * 1024 * 16
69
104
  if progress:
70
105
  progress_bar = tqdm(
71
- total=total_size, unit="iB", unit_scale=True, unit_divisor=1024
106
+ total=total_size,
107
+ unit="iB",
108
+ unit_scale=True,
109
+ unit_divisor=1024,
110
+ position=1,
72
111
  )
73
112
  with open(path, "wb") as f:
74
113
  for chunk in r.iter_content(block_size):
@@ -90,79 +129,81 @@ class FilesAPIRepo(APIRepo):
90
129
  # return None, reponse.json()["detail"]
91
130
  # return reponse.json(), None
92
131
 
93
- # def read_in_chunks(self, file_object, CHUNK_SIZE):
94
- # while True:
95
- # data = file_object.read(CHUNK_SIZE)
96
- # if not data:
97
- # break
98
- # yield data
99
-
100
- # def prepare_large_upload(self, file, dataset_id, checksum, id_token):
101
- # filename = Path(file).name
102
- # response = requests.post(
103
- # self.url + f"datasets/{dataset_id}/uploadId",
104
- # json={"name": filename, "checksum": checksum},
105
- # headers={"Authorization": "Bearer " + id_token},
106
- # )
107
- # if response.status_code != 200:
108
- # raise Exception(response.json()["detail"])
109
- # data = response.json()
110
- # upload_id, parts = (
111
- # data["upload_id"],
112
- # data["parts"] if "parts" in data else [],
113
- # )
114
- # return upload_id, parts
115
-
116
- # def get_chunk_size(self, content_size):
117
- # # adapt chunk size to content size to avoid S3 limits (10000 parts, 500MB per part, 5TB per object)
118
- # chunk_size = 1024 * 1024 * 10 # 10 MB (up to 100 GB, 10000 parts)
119
- # if content_size >= 1024 * 1024 * 1024 * 100: # 100 GB
120
- # chunk_size = 1024 * 1024 * 100 # 100 MB (up to 1 TB, 10000 parts)
121
- # elif content_size >= 1024 * 1024 * 1024 * 1000: # 1 TB
122
- # chunk_size = 1024 * 1024 * 500 # 0.5 GB (up to 5 TB, 10000 parts)
123
- # return chunk_size
124
-
125
- # def ingest_large_dataset(self, file, upload_id, id_token, parts):
126
- # content_path = os.path.abspath(file)
127
- # content_size = os.stat(content_path).st_size
128
- # chunk_size = self.get_chunk_size(content_size)
129
- # total_chunks = content_size // chunk_size
130
- # # upload chunks sequentially
131
- # pbar = tqdm(
132
- # self.read_in_chunks(open(content_path, "rb"), chunk_size),
133
- # total=total_chunks,
134
- # )
135
- # index = 0
136
- # for chunk in pbar:
137
- # part = index // chunk_size + 1
138
- # offset = index + len(chunk)
139
- # index = offset
140
- # if part not in parts:
141
- # checksum = hashlib.md5(chunk).hexdigest()
142
- # response = requests.post(
143
- # self.url + "datasets/chunk/" + upload_id,
144
- # files={"file": chunk},
145
- # data={"part_number": part, "checksum": checksum},
146
- # headers={"Authorization": "Bearer " + id_token},
147
- # )
148
- # if response.status_code != 200:
149
- # raise Exception(response.json()["detail"])
150
- # pbar.set_description(
151
- # "{:.2f}/{:.2f} MB".format(
152
- # offset / 1024 / 1024, content_size / 1024 / 1024
153
- # )
154
- # )
155
- # pbar.close()
156
- # return
132
+ def prepare_large_upload(
133
+ self, filename, dataset_or_model_id, checksum, id_token, endpoint
134
+ ):
135
+ response = requests.post(
136
+ self.url + f"{endpoint}/{dataset_or_model_id}/uploadId",
137
+ json={"filname": filename, "checksum": checksum},
138
+ headers={"Authorization": "Bearer " + id_token},
139
+ )
140
+ if response.status_code != 200:
141
+ raise Exception(response.json()["detail"])
142
+ data = response.json()
143
+ upload_id, parts = (
144
+ data["upload_id"],
145
+ data["parts"] if "parts" in data else [],
146
+ )
147
+ return upload_id, parts
157
148
 
158
- # def complete_upload(self, id_token, upload_id):
159
- # r = requests.post(
160
- # self.url + "datasets/complete/" + upload_id,
161
- # headers={"Authorization": "Bearer " + id_token},
162
- # )
163
- # if r.status_code != 200:
164
- # return None, r.json()["detail"]
165
- # return r.json(), None
149
+ def get_chunk_size(self, content_size):
150
+ # adapt chunk size to content size to avoid S3 limits (10000 parts, 500MB per part, 5TB per object)
151
+ chunk_size = 1024 * 1024 * 10 # 10 MB (up to 100 GB, 10000 parts)
152
+ if content_size >= 1024 * 1024 * 1024 * 100: # 100 GB
153
+ chunk_size = 1024 * 1024 * 100 # 100 MB (up to 1 TB, 10000 parts)
154
+ elif content_size >= 1024 * 1024 * 1024 * 1000: # 1 TB
155
+ chunk_size = 1024 * 1024 * 500 # 0.5 GB (up to 5 TB, 10000 parts)
156
+ return chunk_size
157
+
158
+ def read_in_chunks(self, file_object, CHUNK_SIZE):
159
+ while True:
160
+ data = file_object.read(CHUNK_SIZE)
161
+ if not data:
162
+ break
163
+ yield data
164
+
165
+ def ingest_large_file(
166
+ self, file_path, files_size, upload_id, id_token, parts, endpoint
167
+ ):
168
+ print(endpoint)
169
+ # content_path = os.path.abspath(file)
170
+ # content_size = os.stat(content_path).st_size
171
+ chunk_size = self.get_chunk_size(files_size)
172
+ total_chunks = files_size // chunk_size
173
+ # upload chunks sequentially
174
+ pbar = tqdm(
175
+ self.read_in_chunks(open(file_path, "rb"), chunk_size),
176
+ total=total_chunks,
177
+ )
178
+ index = 0
179
+ for chunk in pbar:
180
+ part = index // chunk_size + 1
181
+ offset = index + len(chunk)
182
+ index = offset
183
+ if part not in parts:
184
+ checksum = hashlib.md5(chunk).hexdigest()
185
+ response = requests.post(
186
+ f"{self.url}{endpoint}/chunk/{upload_id}",
187
+ files={"file": chunk},
188
+ data={"part_number": part, "checksum": checksum},
189
+ headers={"Authorization": "Bearer " + id_token},
190
+ )
191
+ if response.status_code != 200:
192
+ raise Exception(response.json()["detail"])
193
+ pbar.set_description(
194
+ "{:.2f}/{:.2f} MB".format(
195
+ offset / 1024 / 1024, files_size / 1024 / 1024
196
+ )
197
+ )
198
+ pbar.close()
199
+ return
200
+
201
+ def complete_upload(self, id_token, upload_id, version, endpoint):
202
+ r = requests.post(
203
+ f"{self.url}{endpoint}/complete/{upload_id}?version={version}",
204
+ headers={"Authorization": "Bearer " + id_token},
205
+ )
206
+ return self.format_response(r)
166
207
 
167
208
  # def update_dataset(self, name, path, id_token, checksum):
168
209
  # # check that dataset exists
@@ -0,0 +1,40 @@
1
+ import requests
2
+ import os
3
+
4
+ from ..repos import APIRepo
5
+
6
+
7
+ class ModelsAPIRepo(APIRepo):
8
+ def __init__(self, url=None):
9
+ super().__init__(url)
10
+
11
+ def retrieve_models(self, name, limit):
12
+ url = self.url + "models"
13
+ if name is not None:
14
+ url += "?match=" + name
15
+ if limit is not None:
16
+ if name is None:
17
+ url += "?limit=" + str(limit)
18
+ else:
19
+ url += "&limit=" + str(limit)
20
+ response = requests.get(url)
21
+ return self.format_response(response)
22
+
23
+ def create_model(self, metadata, id_token):
24
+ response = requests.post(
25
+ self.url + "models",
26
+ json=metadata,
27
+ headers={"Authorization": "Bearer " + id_token},
28
+ )
29
+ return self.format_response(response)
30
+
31
+ def retrieve_model(self, name):
32
+ response = requests.get(self.url + "models?name=" + name)
33
+ return self.format_response(response)
34
+
35
+ def create_version(self, model_id, id_token):
36
+ response = requests.post(
37
+ self.url + "models/version/" + model_id,
38
+ headers={"Authorization": "Bearer " + id_token},
39
+ )
40
+ return self.format_response(response)
eotdl/repos/__init__.py CHANGED
@@ -3,3 +3,4 @@ from .APIRepo import APIRepo
3
3
  from .AuthAPIRepo import AuthAPIRepo
4
4
  from .DatasetsAPIRepo import DatasetsAPIRepo
5
5
  from .FilesAPIRepo import FilesAPIRepo
6
+ from .ModelsAPIRepo import ModelsAPIRepo
@@ -0,0 +1 @@
1
+ from .checksum import calculate_checksum
eotdl/tools/__init__.py CHANGED
@@ -1,7 +1,6 @@
1
- from .stac import stac_items_to_gdf
2
- from .tools import (generate_location_payload,
3
- format_product_location_payload,
4
- get_images_by_location,
5
- get_tarfile_image_info)
1
+ from .stac import *
2
+ from .tools import *
6
3
  from .geo_utils import *
7
- from .time_utils import *
4
+ from .time_utils import *
5
+ from .metadata import *
6
+ from .paths import *
eotdl/tools/geo_utils.py CHANGED
@@ -5,9 +5,10 @@ import tarfile
5
5
 
6
6
  from typing import Union
7
7
  from shapely import geometry
8
- from shapely.geometry import box, Polygon
8
+ from shapely.geometry import box, Polygon, shape
9
9
  from pyproj import Transformer
10
10
  from sentinelhub import BBox, CRS, bbox_to_dimensions
11
+ from pandas import isna
11
12
 
12
13
 
13
14
  def is_bounding_box(bbox: list) -> bool:
@@ -199,3 +200,16 @@ def generate_new_locations_bounding_boxes(gdf: gpd.GeoDataFrame,
199
200
  latest_id += 1
200
201
 
201
202
  return bbox_by_new_location
203
+
204
+
205
+ def convert_df_geom_to_shape(row):
206
+ """
207
+ Convert the geometry of a dataframe row to a shapely shape
208
+ """
209
+ if not isna(row["geometry"]):
210
+ geo = shape(row["geometry"])
211
+ wkt = geo.wkt
212
+ else:
213
+ wkt = "POLYGON EMPTY"
214
+
215
+ return wkt
eotdl/tools/stac.py CHANGED
@@ -1,11 +1,20 @@
1
1
  """
2
2
  Module for data engineering with STAC elements
3
3
  """
4
+
4
5
  import geopandas as gpd
5
- from pystac import ItemCollection
6
+ import pystac
7
+
8
+ from os.path import dirname, join, abspath
9
+ from os import makedirs
10
+ from json import dumps
11
+ from typing import Union, Optional
12
+ from tqdm import tqdm
13
+ from traceback import print_exc
14
+ from shutil import rmtree
6
15
 
7
16
 
8
- def stac_items_to_gdf(items: ItemCollection) -> gpd.GeoDataFrame:
17
+ def stac_items_to_gdf(items: pystac.ItemCollection) -> gpd.GeoDataFrame:
9
18
  """
10
19
  Get a GeoDataFrame from a given pystac.ItemCollection.
11
20
 
@@ -18,14 +27,141 @@ def stac_items_to_gdf(items: ItemCollection) -> gpd.GeoDataFrame:
18
27
  features = []
19
28
  for f in _features:
20
29
  if f not in features:
21
- # Add the id, type and stac_extensions as properties, in order to retrieve
22
- # them as columns in the GeoDataFrame
23
- # TODO put all the properties in the GeoDataFrame
24
- f['properties']['id'] = f['id']
30
+ # Add all the keys in the properties dict as columns in the GeoDataFrame
31
+ for k, v in f.items():
32
+ if k not in f['properties'] and k != 'geometry':
33
+ f['properties'][k] = v
25
34
  if 'scene_id' in f['properties']:
26
35
  f['properties']['scene_id'] = f['id'].split('_')[3]
27
- f['properties']['type'] = f['type']
28
- f['properties']['stac_extensions'] = f['stac_extensions']
29
36
  features.append(f)
30
37
 
31
38
  return gpd.GeoDataFrame.from_features(features)
39
+
40
+
41
+ def get_all_children(obj: pystac.STACObject) -> list:
42
+ """
43
+ Get all the children of a STAC object
44
+ """
45
+ children = []
46
+ # Append the current object to the list
47
+ children.append(obj.to_dict())
48
+
49
+ # Collections
50
+ collections = list(obj.get_collections())
51
+ for collection in collections:
52
+ children.append(collection.to_dict())
53
+
54
+ # Items
55
+ items = obj.get_items()
56
+ for item in items:
57
+ children.append(item.to_dict())
58
+
59
+ # Items from collections
60
+ for collection in collections:
61
+ items = collection.get_items()
62
+ for item in items:
63
+ children.append(item.to_dict())
64
+
65
+ return children
66
+
67
+
68
+ def make_links_relative_to_path(path: str,
69
+ catalog: Union[pystac.Catalog, str],
70
+ ) -> pystac.Catalog:
71
+ """
72
+ Makes all asset HREFs and links in the STAC catalog relative to a given path
73
+ """
74
+ if isinstance(catalog, str):
75
+ catalog = pystac.read_file(catalog)
76
+ path = abspath(path)
77
+
78
+ # Create a temporary catalog in the destination path to set as root
79
+ future_path = join(path, 'catalog.json')
80
+ makedirs(path, exist_ok=True)
81
+ with open(future_path, 'w') as f:
82
+ f.write(dumps(catalog.to_dict(), indent=4))
83
+ temp_catalog = pystac.Catalog.from_file(future_path)
84
+
85
+ catalog.set_root(temp_catalog)
86
+ catalog.make_all_asset_hrefs_absolute()
87
+
88
+ for collection in catalog.get_children():
89
+ # Create new collection
90
+ new_collection = collection.clone()
91
+ new_collection.set_self_href(join(path, collection.id, f"collection.json"))
92
+ new_collection.set_root(catalog)
93
+ new_collection.set_parent(catalog)
94
+ # Remove old collection and add new one to catalog
95
+ catalog.remove_child(collection.id)
96
+ catalog.add_child(new_collection)
97
+ for item in collection.get_all_items():
98
+ # Create new item from old collection and add it to the new collection
99
+ new_item = item.clone()
100
+ new_item.set_self_href(join(path, collection.id, item.id, f"{item.id}.json"))
101
+ new_item.set_parent(collection)
102
+ new_item.set_root(catalog)
103
+ new_item.make_asset_hrefs_relative()
104
+ new_collection.add_item(new_item)
105
+
106
+ catalog.make_all_asset_hrefs_relative()
107
+
108
+ return catalog
109
+
110
+
111
+ def merge_stac_catalogs(catalog_1: Union[pystac.Catalog, str],
112
+ catalog_2: Union[pystac.Catalog, str],
113
+ destination: Optional[str] = None,
114
+ keep_extensions: Optional[bool] = False,
115
+ catalog_type: Optional[pystac.CatalogType] = pystac.CatalogType.SELF_CONTAINED
116
+ ) -> None:
117
+ """
118
+ Merge two STAC catalogs, keeping the properties, collection and items of both catalogs
119
+ """
120
+ if isinstance(catalog_1, str):
121
+ catalog_1 = pystac.Catalog.from_file(catalog_1)
122
+ if isinstance(catalog_2, str):
123
+ catalog_2 = pystac.Catalog.from_file(catalog_2)
124
+
125
+ for col1 in tqdm(catalog_1.get_children(), desc='Merging catalogs...'):
126
+ # Check if the collection exists in catalog_2
127
+ col2 = catalog_2.get_child(col1.id)
128
+ if col2 is None:
129
+ # If it does not exist, add it
130
+ col1_ = col1.clone()
131
+ catalog_2.add_child(col1)
132
+ col2 = catalog_2.get_child(col1.id)
133
+ col2.clear_items()
134
+ for i in col1_.get_stac_objects(pystac.RelType.ITEM):
135
+ col2.add_item(i)
136
+ else:
137
+ # If it exists, merge the items
138
+ for item1 in col1.get_items():
139
+ if col2.get_item(item1.id) is None:
140
+ col2.add_item(item1)
141
+
142
+ if keep_extensions:
143
+ for ext in catalog_1.stac_extensions:
144
+ if ext not in catalog_2.stac_extensions:
145
+ catalog_2.stac_extensions.append(ext)
146
+
147
+ for extra_field_name, extra_field_value in catalog_1.extra_fields.items():
148
+ if extra_field_name not in catalog_2.extra_fields:
149
+ catalog_2.extra_fields[extra_field_name] = extra_field_value
150
+
151
+ if destination:
152
+ make_links_relative_to_path(destination, catalog_2)
153
+ else:
154
+ destination = dirname(catalog_2.get_self_href())
155
+
156
+ # Save the merged catalog
157
+ try:
158
+ print("Validating and saving...")
159
+ catalog_2.validate()
160
+ rmtree(destination) if not destination else None # Remove the old catalog and replace it with the new one
161
+ catalog_2.normalize_and_save(root_href=destination,
162
+ catalog_type=catalog_type
163
+ )
164
+ print("Success!")
165
+ except pystac.STACValidationError:
166
+ # Return full callback
167
+ print_exc()
eotdl/tools/time_utils.py CHANGED
@@ -81,21 +81,21 @@ def expand_time_interval(time_interval: Union[list, tuple], format: str='%Y-%m-%
81
81
  def prepare_time_interval(date):
82
82
  if isinstance(date, str):
83
83
  date = datetime.strptime(date, "%Y-%m-%d")
84
- elif isinstance(date, datetime):
85
- date = date.strftime("%Y-%m-%d")
86
84
  elif isinstance(date, tuple):
87
85
  if not is_time_interval(date):
88
86
  raise ValueError('The time interval must be a range of two dates, with format YYYY-MM-DD or a datetime object')
89
87
  else:
90
88
  return date
91
- else:
89
+ elif not isinstance(date, datetime):
92
90
  raise ValueError('The date must be a string with format YYYY-MM-DD or a datetime object')
91
+
93
92
  date_day_before = date - timedelta(days=1)
94
93
  date_next_day = date + timedelta(days=1)
95
- date_day_before = date_day_before.strftime("%Y-%m-%d")
96
- date_next_day = date_next_day.strftime("%Y-%m-%d")
97
94
 
98
- return (date_day_before, date_next_day)
95
+ date_day_before_str = date_day_before.strftime("%Y-%m-%d")
96
+ date_next_day_str = date_next_day.strftime("%Y-%m-%d")
97
+
98
+ return (date_day_before_str, date_next_day_str)
99
99
 
100
100
 
101
101
  def get_day_between(from_date: Union[datetime, str],
@@ -112,3 +112,16 @@ def get_day_between(from_date: Union[datetime, str],
112
112
  date_between = date_between.strftime("%Y-%m-%d")
113
113
 
114
114
  return date_between
115
+
116
+
117
+ def format_time_acquired(dt: Union[str, datetime]) -> str:
118
+ """
119
+ Format the date time to the required format for STAC
120
+
121
+ :param dt: date time to format
122
+ """
123
+ from dateutil import parser
124
+
125
+ dt_str = parser.parse(dt).strftime("%Y-%m-%dT%H:%M:%S.%f")
126
+
127
+ return dt_str
eotdl/tools/tools.py CHANGED
@@ -5,13 +5,12 @@ Module for data engineeringt
5
5
  import geopandas as gpd
6
6
  import pandas as pd
7
7
  import tarfile
8
- import rasterio
9
8
  import re
10
9
  import datetime
11
10
  import json
12
11
 
13
- from shapely.geometry import box, Polygon
14
- from pyproj import Transformer
12
+ from .geo_utils import get_image_bbox
13
+ from shapely.geometry import box
15
14
  from os.path import exists
16
15
  from typing import Union, Optional
17
16
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: eotdl
3
- Version: 2023.10.25.post10
3
+ Version: 2023.11.2.post2
4
4
  Summary: Earth Observation Training Data Lab
5
5
  License: MIT
6
6
  Author: EarthPulse