eotdl 2023.11.2.post5__py3-none-any.whl → 2023.11.3.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. eotdl/__init__.py +1 -1
  2. eotdl/access/__init__.py +6 -3
  3. eotdl/access/airbus/__init__.py +5 -1
  4. eotdl/access/airbus/client.py +356 -338
  5. eotdl/access/airbus/parameters.py +19 -4
  6. eotdl/access/airbus/utils.py +26 -21
  7. eotdl/access/download.py +30 -14
  8. eotdl/access/search.py +17 -6
  9. eotdl/access/sentinelhub/__init__.py +5 -1
  10. eotdl/access/sentinelhub/client.py +57 -54
  11. eotdl/access/sentinelhub/evalscripts.py +38 -39
  12. eotdl/access/sentinelhub/parameters.py +43 -23
  13. eotdl/access/sentinelhub/utils.py +38 -28
  14. eotdl/auth/errors.py +2 -1
  15. eotdl/commands/auth.py +3 -3
  16. eotdl/curation/__init__.py +5 -1
  17. eotdl/curation/stac/__init__.py +5 -1
  18. eotdl/curation/stac/assets.py +55 -32
  19. eotdl/curation/stac/dataframe.py +20 -14
  20. eotdl/curation/stac/dataframe_bck.py +2 -2
  21. eotdl/curation/stac/dataframe_labeling.py +15 -12
  22. eotdl/curation/stac/extensions/__init__.py +6 -2
  23. eotdl/curation/stac/extensions/base.py +8 -4
  24. eotdl/curation/stac/extensions/dem.py +6 -3
  25. eotdl/curation/stac/extensions/eo.py +10 -6
  26. eotdl/curation/stac/extensions/label/__init__.py +5 -1
  27. eotdl/curation/stac/extensions/label/base.py +40 -26
  28. eotdl/curation/stac/extensions/label/image_name_labeler.py +64 -43
  29. eotdl/curation/stac/extensions/label/scaneo.py +59 -56
  30. eotdl/curation/stac/extensions/ml_dataset.py +154 -56
  31. eotdl/curation/stac/extensions/projection.py +11 -9
  32. eotdl/curation/stac/extensions/raster.py +22 -14
  33. eotdl/curation/stac/extensions/sar.py +12 -7
  34. eotdl/curation/stac/extent.py +67 -40
  35. eotdl/curation/stac/parsers.py +18 -10
  36. eotdl/curation/stac/stac.py +81 -62
  37. eotdl/datasets/__init__.py +1 -1
  38. eotdl/datasets/download.py +42 -55
  39. eotdl/datasets/ingest.py +68 -11
  40. eotdl/files/__init__.py +1 -1
  41. eotdl/files/ingest.py +3 -1
  42. eotdl/models/download.py +1 -1
  43. eotdl/repos/AuthAPIRepo.py +0 -1
  44. eotdl/repos/DatasetsAPIRepo.py +22 -146
  45. eotdl/repos/FilesAPIRepo.py +7 -92
  46. eotdl/repos/ModelsAPIRepo.py +0 -1
  47. eotdl/tools/__init__.py +5 -1
  48. eotdl/tools/geo_utils.py +78 -48
  49. eotdl/tools/metadata.py +13 -11
  50. eotdl/tools/paths.py +14 -14
  51. eotdl/tools/stac.py +36 -31
  52. eotdl/tools/time_utils.py +53 -26
  53. eotdl/tools/tools.py +84 -50
  54. {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/METADATA +5 -3
  55. eotdl-2023.11.3.post2.dist-info/RECORD +84 -0
  56. eotdl-2023.11.2.post5.dist-info/RECORD +0 -84
  57. {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/WHEEL +0 -0
  58. {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/entry_points.txt +0 -0
@@ -1,16 +1,14 @@
1
- '''
1
+ """
2
2
  Module for STAC extent
3
- '''
3
+ """
4
4
 
5
- import pystac
6
5
  from datetime import datetime
7
- import rasterio
8
- import json
9
-
10
- from glob import glob
11
- from os.path import dirname
12
6
  from typing import List
13
7
 
8
+ import pystac
9
+
10
+ import rasterio
11
+
14
12
  from ...tools import get_item_metadata
15
13
 
16
14
 
@@ -18,33 +16,43 @@ def get_dem_temporal_interval() -> pystac.TemporalExtent:
18
16
  """
19
17
  Get a temporal interval for DEM data
20
18
  """
21
- min_date = datetime.strptime('2011-01-01', '%Y-%m-%d')
22
- max_date = datetime.strptime('2015-01-07', '%Y-%m-%d')
19
+ min_date = datetime.strptime("2011-01-01", "%Y-%m-%d")
20
+ max_date = datetime.strptime("2015-01-07", "%Y-%m-%d")
23
21
 
24
22
  return pystac.TemporalExtent([(min_date, max_date)])
25
-
23
+
24
+
26
25
  def get_unknow_temporal_interval() -> pystac.TemporalExtent:
27
26
  """
28
27
  Get an unknown temporal interval
29
28
  """
30
- min_date = datetime.strptime('2000-01-01', '%Y-%m-%d')
31
- max_date = datetime.strptime('2023-12-31', '%Y-%m-%d')
29
+ min_date = datetime.strptime("2000-01-01", "%Y-%m-%d")
30
+ max_date = datetime.strptime("2023-12-31", "%Y-%m-%d")
32
31
 
33
32
  return pystac.TemporalExtent([(min_date, max_date)])
34
-
33
+
34
+
35
35
  def get_unknow_extent() -> pystac.Extent:
36
36
  """
37
+ Get an unknown extent
37
38
  """
38
- return pystac.Extent(spatial=pystac.SpatialExtent([[0, 0, 0, 0]]),
39
- temporal=pystac.TemporalExtent([(datetime.strptime('2000-01-01', '%Y-%m-%d'),
40
- datetime.strptime('2023-12-31', '%Y-%m-%d')
41
- )]))
39
+ return pystac.Extent(
40
+ spatial=pystac.SpatialExtent([[0, 0, 0, 0]]),
41
+ temporal=pystac.TemporalExtent(
42
+ [
43
+ (
44
+ datetime.strptime("2000-01-01", "%Y-%m-%d"),
45
+ datetime.strptime("2023-12-31", "%Y-%m-%d"),
46
+ )
47
+ ]
48
+ ),
49
+ )
42
50
 
43
51
 
44
52
  def get_collection_extent(rasters: List[str]) -> pystac.Extent:
45
53
  """
46
54
  Get the extent of a collection
47
-
55
+
48
56
  :param rasters: list of rasters
49
57
  """
50
58
  # Get the spatial extent of the collection
@@ -55,7 +63,8 @@ def get_collection_extent(rasters: List[str]) -> pystac.Extent:
55
63
  extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_interval)
56
64
 
57
65
  return extent
58
-
66
+
67
+
59
68
  def get_collection_spatial_extent(rasters: List[str]) -> pystac.SpatialExtent:
60
69
  """
61
70
  Get the spatial extent of a collection
@@ -63,13 +72,15 @@ def get_collection_spatial_extent(rasters: List[str]) -> pystac.SpatialExtent:
63
72
  :param path: path to the directory
64
73
  """
65
74
  # Get the bounding boxes of all the given rasters
66
- bboxes = list()
75
+ bboxes = []
67
76
  for raster in rasters:
68
77
  with rasterio.open(raster) as ds:
69
78
  bounds = ds.bounds
70
- dst_crs = 'EPSG:4326'
79
+ dst_crs = "EPSG:4326"
71
80
  try:
72
- left, bottom, right, top = rasterio.warp.transform_bounds(ds.crs, dst_crs, *bounds)
81
+ left, bottom, right, top = rasterio.warp.transform_bounds(
82
+ ds.crs, dst_crs, *bounds
83
+ )
73
84
  bbox = [left, bottom, right, top]
74
85
  except rasterio.errors.CRSError:
75
86
  spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
@@ -77,16 +88,17 @@ def get_collection_spatial_extent(rasters: List[str]) -> pystac.SpatialExtent:
77
88
  bboxes.append(bbox)
78
89
  # Get the minimum and maximum values of the bounding boxes
79
90
  try:
80
- left = min([bbox[0] for bbox in bboxes])
81
- bottom = min([bbox[1] for bbox in bboxes])
82
- right = max([bbox[2] for bbox in bboxes])
83
- top = max([bbox[3] for bbox in bboxes])
91
+ left = min(bbox[0] for bbox in bboxes)
92
+ bottom = min(bbox[1] for bbox in bboxes)
93
+ right = max(bbox[2] for bbox in bboxes)
94
+ top = max(bbox[3] for bbox in bboxes)
84
95
  spatial_extent = pystac.SpatialExtent([[left, bottom, right, top]])
85
96
  except ValueError:
86
97
  spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
87
98
  finally:
88
99
  return spatial_extent
89
100
 
101
+
90
102
  def get_collection_temporal_interval(rasters: List[str]) -> pystac.TemporalExtent:
91
103
  """
92
104
  Get the temporal interval of a collection
@@ -94,38 +106,53 @@ def get_collection_temporal_interval(rasters: List[str]) -> pystac.TemporalExten
94
106
  :param path: path to the directory
95
107
  """
96
108
  # Get all the metadata.json files in the directory of all the given rasters
97
- metadata_jsons = list()
109
+ metadata_jsons = []
98
110
  for raster in rasters:
99
111
  metadata_json = get_item_metadata(raster)
100
112
  if metadata_json:
101
113
  metadata_jsons.append(metadata_json)
102
114
 
103
115
  if not metadata_jsons:
104
- return get_unknow_temporal_interval() # If there is no metadata, set a generic temporal interval
105
-
116
+ return (
117
+ get_unknow_temporal_interval()
118
+ ) # If there is no metadata, set a generic temporal interval
119
+
106
120
  # Get the temporal interval of every metadata.json file and the type of the data
107
- data_types = list()
108
- temporal_intervals = list()
121
+ data_types = []
122
+ temporal_intervals = []
109
123
  for metadata in metadata_jsons:
110
124
  # Append the temporal interval to the list as a datetime object
111
- temporal_intervals.append(metadata['acquisition-date']) if metadata['acquisition-date'] else None
125
+ temporal_intervals.append(metadata["acquisition-date"]) if metadata[
126
+ "acquisition-date"
127
+ ] else None
112
128
  # Append the data type to the list
113
- data_types.append(metadata['type']) if metadata['type'] else None
114
-
129
+ data_types.append(metadata["type"]) if metadata["type"] else None
130
+
115
131
  if temporal_intervals:
132
+ min_date, max_date = None, None
116
133
  try:
117
134
  # Get the minimum and maximum values of the temporal intervals
118
- min_date = min([datetime.strptime(interval, '%Y-%m-%d') for interval in temporal_intervals])
119
- max_date = max([datetime.strptime(interval, '%Y-%m-%d') for interval in temporal_intervals])
135
+ min_date = min(
136
+ datetime.strptime(interval, "%Y-%m-%d")
137
+ for interval in temporal_intervals
138
+ )
139
+ max_date = max(
140
+ datetime.strptime(interval, "%Y-%m-%d")
141
+ for interval in temporal_intervals
142
+ )
120
143
  except ValueError:
121
- min_date = datetime.strptime('2000-01-01', '%Y-%m-%d')
122
- max_date = datetime.strptime('2023-12-31', '%Y-%m-%d')
144
+ min_date = datetime.strptime("2000-01-01", "%Y-%m-%d")
145
+ max_date = datetime.strptime("2023-12-31", "%Y-%m-%d")
123
146
  finally:
124
147
  # Create the temporal interval
125
148
  return pystac.TemporalExtent([(min_date, max_date)])
126
149
  else:
127
150
  # Check if the collection is composed by DEM data. If not, set a generic temporal interval
128
- if set(data_types) == {'dem'} or set(data_types) == {'DEM'} or set(data_types) == {'dem', 'DEM'}:
151
+ if (
152
+ set(data_types) == {"dem"}
153
+ or set(data_types) == {"DEM"}
154
+ or set(data_types) == {"dem", "DEM"}
155
+ ):
129
156
  return get_dem_temporal_interval()
130
157
  else:
131
158
  return get_unknow_temporal_interval()
@@ -1,26 +1,31 @@
1
- '''
1
+ """
2
2
  Module for STAC parsers
3
- '''
3
+ """
4
4
 
5
5
  from os.path import dirname, basename
6
6
 
7
7
 
8
8
  class STACIdParser:
9
-
9
+ """
10
+ STAC ID parser base class
11
+ """
10
12
  def get_item_id(self, raster_path: str):
11
13
  """
12
14
  Get the ID of the STAC Item from the given raster path
13
15
 
14
16
  :param raster_path: path to the raster file
15
17
  """
16
- pass
18
+ return
17
19
 
18
20
 
19
21
  class StructuredParser(STACIdParser):
22
+ """
23
+ Structured STAC ID parser
24
+ """
20
25
 
21
26
  def __init__(self) -> None:
22
27
  super().__init__()
23
-
28
+
24
29
  def get_item_id(self, raster_path: str):
25
30
  """
26
31
  Get the ID of the STAC Item from the given raster path.
@@ -30,16 +35,19 @@ class StructuredParser(STACIdParser):
30
35
  :param raster_path: path to the raster file
31
36
  """
32
37
  tiff_dir_path = dirname(raster_path)
33
- id = tiff_dir_path.split('/')[-1]
38
+ item_id = tiff_dir_path.split("/")[-1]
34
39
 
35
- return id
40
+ return item_id
36
41
 
37
42
 
38
43
  class UnestructuredParser(STACIdParser):
44
+ """
45
+ Unstructured STAC ID parser
46
+ """
39
47
 
40
48
  def __init__(self) -> None:
41
49
  super().__init__()
42
-
50
+
43
51
  def get_item_id(self, raster_path: str):
44
52
  """
45
53
  Get the ID of the STAC Item from the given raster path.
@@ -48,6 +56,6 @@ class UnestructuredParser(STACIdParser):
48
56
 
49
57
  :param raster_path: path to the raster file
50
58
  """
51
- id = basename(raster_path).split('.')[0]
59
+ item_id = basename(raster_path).split(".")[0]
52
60
 
53
- return id
61
+ return item_id
@@ -1,41 +1,39 @@
1
1
  """
2
- Module for generating STAC metadata
2
+ Module for generating STAC metadata
3
3
  """
4
4
 
5
- import traceback
6
- from typing import Union
7
- import pandas as pd
8
- import pystac
9
- from tqdm import tqdm
10
-
5
+ import random
6
+ from datetime import datetime
7
+ from typing import Union, Optional
11
8
  from os.path import join, basename, dirname
12
- from shutil import rmtree
13
9
 
10
+ import pandas as pd
11
+ import pystac
14
12
  import rasterio
15
- import random
16
- from rasterio.warp import transform_bounds
17
- from typing import Union, List
18
-
19
- from datetime import datetime
13
+ from tqdm import tqdm
20
14
  from shapely.geometry import Polygon, mapping
21
- from glob import glob
22
- from typing import Union, Optional
23
15
 
24
16
  from .parsers import STACIdParser, StructuredParser
25
17
  from .assets import STACAssetGenerator
26
18
  from .dataframe_labeling import LabelingStrategy, UnlabeledStrategy
27
- from ...tools import (format_time_acquired,
28
- cut_images,
29
- get_item_metadata,
30
- get_all_images_in_path)
31
- from .extensions import (type_stac_extensions_dict,
32
- SUPPORTED_EXTENSIONS,
33
- LabelExtensionObject)
34
- from .extent import (get_unknow_extent,
35
- get_collection_extent)
19
+ from ...tools import (
20
+ format_time_acquired,
21
+ cut_images,
22
+ get_item_metadata,
23
+ get_all_images_in_path,
24
+ )
25
+ from .extensions import (
26
+ type_stac_extensions_dict,
27
+ SUPPORTED_EXTENSIONS,
28
+ )
29
+ from .extent import get_collection_extent
36
30
 
37
31
 
38
32
  class STACGenerator:
33
+ """
34
+ STAC generator class
35
+ """
36
+
39
37
  def __init__(
40
38
  self,
41
39
  image_format: str = "tiff",
@@ -63,7 +61,7 @@ class STACGenerator:
63
61
 
64
62
  def generate_stac_metadata(
65
63
  self,
66
- id: str,
64
+ stac_id: str,
67
65
  description: str,
68
66
  stac_dataframe: pd.DataFrame = None,
69
67
  output_folder: str = "stac",
@@ -84,8 +82,8 @@ class STACGenerator:
84
82
  raise ValueError("No STAC dataframe provided")
85
83
 
86
84
  # Create an empty catalog
87
- catalog = pystac.Catalog(id=id, description=description, **kwargs)
88
-
85
+ catalog = pystac.Catalog(id=stac_id, description=description, **kwargs)
86
+
89
87
  # Add the collections to the catalog
90
88
  collections = self._stac_dataframe.collection.unique()
91
89
  for collection_path in collections:
@@ -97,7 +95,9 @@ class STACGenerator:
97
95
  # Check there have been generate all the items from the images
98
96
  items_count = 0
99
97
  for collection in catalog.get_children():
100
- items = list(set([item.id for item in collection.get_items(recursive=True)]))
98
+ items = list(
99
+ set([item.id for item in collection.get_items(recursive=True)])
100
+ )
101
101
  items_count += len(items)
102
102
  if len(self._stac_dataframe) != items_count:
103
103
  raise pystac.STACError(
@@ -117,13 +117,14 @@ class STACGenerator:
117
117
  print(f"Catalog validation error: {e}")
118
118
  return
119
119
 
120
- def get_stac_dataframe(self,
121
- path: str,
122
- collections: Optional[Union[str, dict]]='source',
123
- bands: Optional[dict]=None,
124
- extensions: Optional[dict]=None,
125
- sample: Optional[int]=None
126
- ) -> pd.DataFrame:
120
+ def get_stac_dataframe(
121
+ self,
122
+ path: str,
123
+ collections: Optional[Union[str, dict]] = "source",
124
+ bands: Optional[dict] = None,
125
+ extensions: Optional[dict] = None,
126
+ sample: Optional[int] = None,
127
+ ) -> pd.DataFrame:
127
128
  """
128
129
  Get a dataframe with the STAC metadata of a given directory containing the assets to generate metadata
129
130
 
@@ -134,16 +135,20 @@ class STACGenerator:
134
135
  """
135
136
  images = get_all_images_in_path(path, self._image_format)
136
137
  if len(images) == 0:
137
- raise ValueError("No images found in the given path with the given extension. Please check the path and the extension")
138
-
139
- if self._assets_generator.type == 'Extracted':
138
+ raise ValueError(
139
+ "No images found in the given path with the given extension. Please check the path and the extension"
140
+ )
141
+
142
+ if self._assets_generator.type == "Extracted":
140
143
  images = cut_images(images)
141
144
 
142
145
  if sample:
143
146
  try:
144
147
  images = random.sample(images, sample)
145
148
  except ValueError:
146
- raise ValueError(f"Sample size must be smaller than the number of images ({len(images)}). May be there are no images found in the given path with the given extension")
149
+ raise ValueError(
150
+ f"Sample size must be smaller than the number of images ({len(images)}). May be there are no images found in the given path with the given extension"
151
+ )
147
152
 
148
153
  labels, ixs = self._labeling_strategy.get_images_labels(images)
149
154
  bands_values = self._get_items_list_from_dict(labels, bands)
@@ -152,22 +157,32 @@ class STACGenerator:
152
157
  if collections == "source":
153
158
  # List of path with the same value repeated as many times as the number of images
154
159
  collections_values = [join(path, "source") for i in range(len(images))]
155
- elif collections == '*':
156
- collections_values = [join(path, basename(dirname(image))) for image in images]
160
+ elif collections == "*":
161
+ collections_values = [
162
+ join(path, basename(dirname(image))) for image in images
163
+ ]
157
164
  else:
158
165
  try:
159
- collections_values = [join(path, value) for value in self._get_items_list_from_dict(labels, collections)]
166
+ collections_values = [
167
+ join(path, value)
168
+ for value in self._get_items_list_from_dict(labels, collections)
169
+ ]
160
170
  except TypeError:
161
- raise pystac.STACError('There is an error generating the collections. Please check the collections dictionary')
162
-
163
- df = pd.DataFrame({'image': images,
164
- 'label': labels,
165
- 'ix': ixs,
166
- 'collection': collections_values,
167
- 'extensions': extensions_values,
168
- 'bands': bands_values
169
- })
170
-
171
+ raise pystac.STACError(
172
+ "There is an error generating the collections. Please check the collections dictionary"
173
+ )
174
+
175
+ df = pd.DataFrame(
176
+ {
177
+ "image": images,
178
+ "label": labels,
179
+ "ix": ixs,
180
+ "collection": collections_values,
181
+ "extensions": extensions_values,
182
+ "bands": bands_values,
183
+ }
184
+ )
185
+
171
186
  self._stac_dataframe = df
172
187
 
173
188
  return df
@@ -182,7 +197,7 @@ class STACGenerator:
182
197
  if not items:
183
198
  # Create list of None with the same length as the labels list
184
199
  return [None for _ in labels]
185
- items_list = list()
200
+ items_list = []
186
201
  for label in labels:
187
202
  if label in items.keys():
188
203
  items_list.append(items[label])
@@ -219,7 +234,7 @@ class STACGenerator:
219
234
  # Return the collection
220
235
  return collection
221
236
 
222
- def create_stac_item(self, raster_path: str, kwargs: dict = {}) -> pystac.Item:
237
+ def create_stac_item(self, raster_path: str) -> pystac.Item:
223
238
  """
224
239
  Create a STAC item from a directory containing the raster files and the metadata.json file
225
240
 
@@ -250,12 +265,16 @@ class STACGenerator:
250
265
  )
251
266
 
252
267
  # Initialize pySTAC item parameters
253
- params = dict()
254
- params["properties"] = dict()
268
+ params = {}
269
+ params["properties"] = {}
255
270
 
256
271
  # Obtain the date acquired
257
272
  start_time, end_time = None, None
258
- if metadata and metadata["acquisition-date"] and metadata["type"] not in ('dem', 'DEM'):
273
+ if (
274
+ metadata
275
+ and metadata["acquisition-date"]
276
+ and metadata["type"] not in ("dem", "DEM")
277
+ ):
259
278
  time_acquired = format_time_acquired(metadata["acquisition-date"])
260
279
  else:
261
280
  # Check if the type of the data is DEM
@@ -270,15 +289,15 @@ class STACGenerator:
270
289
  time_acquired = datetime.strptime("2000-01-01", "%Y-%m-%d")
271
290
 
272
291
  # Obtain the item ID. The approach depends on the item parser
273
- id = self._item_parser.get_item_id(raster_path)
292
+ item_id = self._item_parser.get_item_id(raster_path)
274
293
  # Add the item ID to the dataframe, to be able to get it later
275
294
  self._stac_dataframe.loc[
276
295
  self._stac_dataframe["image"] == raster_path, "id"
277
- ] = id
278
-
296
+ ] = item_id
297
+
279
298
  # Instantiate pystac item
280
299
  item = pystac.Item(
281
- id=id, geometry=geom, bbox=bbox, datetime=time_acquired, **params
300
+ id=item_id, geometry=geom, bbox=bbox, datetime=time_acquired, **params
282
301
  )
283
302
 
284
303
  # Get the item info, from the raster path
@@ -319,6 +338,6 @@ class STACGenerator:
319
338
  else:
320
339
  extension_obj = self._extensions_dict[extension]
321
340
  extension_obj.add_extension_to_object(asset, item_info)
322
- item.set_self_href(join(dirname(raster_path), f"{id}.json"))
341
+ item.set_self_href(join(dirname(raster_path), f"{item_id}.json"))
323
342
  item.make_asset_hrefs_relative()
324
343
  return item
@@ -1,3 +1,3 @@
1
1
  from .retrieve import retrieve_datasets # , retrieve_dataset, list_datasets
2
2
  from .ingest import ingest_dataset
3
- from .download import download_dataset # , download_file_url
3
+ from .download import download_dataset, download_file_url
@@ -4,8 +4,8 @@ from tqdm import tqdm
4
4
 
5
5
  from ..auth import with_auth
6
6
  from .retrieve import retrieve_dataset, retrieve_dataset_files
7
- from ..shared import calculate_checksum
8
- from ..repos import FilesAPIRepo
7
+ from ..repos import FilesAPIRepo, DatasetsAPIRepo
8
+ from ..curation.stac import STACDataFrame
9
9
 
10
10
 
11
11
  @with_auth
@@ -13,7 +13,7 @@ def download_dataset(
13
13
  dataset_name,
14
14
  version=None,
15
15
  path=None,
16
- logger=None,
16
+ logger=print,
17
17
  assets=False,
18
18
  force=False,
19
19
  verbose=False,
@@ -45,20 +45,6 @@ def download_dataset(
45
45
  if dataset["quality"] == 0:
46
46
  if file:
47
47
  raise NotImplementedError("Downloading a specific file is not implemented")
48
- # files = [f for f in dataset["files"] if f["name"] == file]
49
- # if not files:
50
- # raise Exception(f"File {file} not found")
51
- # if len(files) > 1:
52
- # raise Exception(f"Multiple files with name {file} found")
53
- # dst_path = download(
54
- # dataset,
55
- # dataset["id"],
56
- # file,
57
- # files[0]["checksum"],
58
- # download_path,
59
- # user,
60
- # )
61
- # return Outputs(dst_path=dst_path)
62
48
  dataset_files = retrieve_dataset_files(dataset["id"], version)
63
49
  repo = FilesAPIRepo()
64
50
  for file in tqdm(dataset_files, disable=verbose, unit="file", position=0):
@@ -76,44 +62,45 @@ def download_dataset(
76
62
  # if calculate_checksum(dst_path) != checksum:
77
63
  # logger(f"Checksum for {file} does not match")
78
64
  if verbose:
79
- logger(f"Done")
80
- return download_path
65
+ logger("Done")
81
66
  else:
82
- raise NotImplementedError("Downloading a STAC dataset is not implemented")
83
- # logger("Downloading STAC metadata...")
84
- # gdf, error = repo.download_stac(
85
- # dataset["id"],
86
- # user["id_token"],
87
- # )
88
- # if error:
89
- # raise Exception(error)
90
- # df = STACDataFrame(gdf)
91
- # # df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
92
- # path = path
93
- # if path is None:
94
- # path = download_base_path + "/" + dataset["name"]
95
- # df.to_stac(path)
96
- # # download assets
97
- # if assets:
98
- # logger("Downloading assets...")
99
- # df = df.dropna(subset=["assets"])
100
- # for row in tqdm(df.iterrows(), total=len(df)):
101
- # id = row[1]["stac_id"]
102
- # # print(row[1]["links"])
103
- # for k, v in row[1]["assets"].items():
104
- # href = v["href"]
105
- # repo.download_file_url(
106
- # href, f"{path}/assets/{id}", user["id_token"]
107
- # )
108
- # else:
109
- # logger("To download assets, set assets=True or -a in the CLI.")
110
- # return Outputs(dst_path=path)
67
+ # raise NotImplementedError("Downloading a STAC dataset is not implemented")
68
+ if verbose:
69
+ logger("Downloading STAC metadata...")
70
+ repo = DatasetsAPIRepo()
71
+ gdf, error = repo.download_stac(
72
+ dataset["id"],
73
+ user["id_token"],
74
+ )
75
+ if error:
76
+ raise Exception(error)
77
+ df = STACDataFrame(gdf)
78
+ # df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
79
+ df.to_stac(download_path)
80
+ # download assets
81
+ if assets:
82
+ if verbose:
83
+ logger("Downloading assets...")
84
+ repo = FilesAPIRepo()
85
+ df = df.dropna(subset=["assets"])
86
+ for row in tqdm(df.iterrows(), total=len(df)):
87
+ for k, v in row[1]["assets"].items():
88
+ href = v["href"]
89
+ _, filename = href.split("/download/")
90
+ # will overwrite assets with same name :(
91
+ repo.download_file_url(
92
+ href, filename, f"{download_path}/assets", user["id_token"]
93
+ )
94
+ else:
95
+ if verbose:
96
+ logger("To download assets, set assets=True or -a in the CLI.")
97
+ return download_path
111
98
 
112
99
 
113
- # @with_auth
114
- # def download_file_url(url, path, progress=True, logger=None, user=None):
115
- # api_repo = APIRepo()
116
- # download = DownloadFileURL(api_repo, logger, progress)
117
- # inputs = DownloadFileURL.Inputs(url=url, path=path, user=user)
118
- # outputs = download(inputs)
119
- # return outputs.dst_path
100
+ @with_auth
101
+ def download_file_url(url, path, progress=True, logger=print, user=None):
102
+ repo = FilesAPIRepo()
103
+ _, filename = url.split("/download/")
104
+ return repo.download_file_url(
105
+ url, filename, f"{path}/assets", user["id_token"], progress
106
+ )