eotdl 2023.11.2.post5__py3-none-any.whl → 2023.11.3.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. eotdl/__init__.py +1 -1
  2. eotdl/access/__init__.py +6 -3
  3. eotdl/access/airbus/__init__.py +5 -1
  4. eotdl/access/airbus/client.py +356 -338
  5. eotdl/access/airbus/parameters.py +19 -4
  6. eotdl/access/airbus/utils.py +26 -21
  7. eotdl/access/download.py +30 -14
  8. eotdl/access/search.py +17 -6
  9. eotdl/access/sentinelhub/__init__.py +5 -1
  10. eotdl/access/sentinelhub/client.py +57 -54
  11. eotdl/access/sentinelhub/evalscripts.py +38 -39
  12. eotdl/access/sentinelhub/parameters.py +43 -23
  13. eotdl/access/sentinelhub/utils.py +38 -28
  14. eotdl/auth/errors.py +2 -1
  15. eotdl/commands/auth.py +3 -3
  16. eotdl/curation/__init__.py +5 -1
  17. eotdl/curation/stac/__init__.py +5 -1
  18. eotdl/curation/stac/assets.py +55 -32
  19. eotdl/curation/stac/dataframe.py +20 -14
  20. eotdl/curation/stac/dataframe_bck.py +2 -2
  21. eotdl/curation/stac/dataframe_labeling.py +15 -12
  22. eotdl/curation/stac/extensions/__init__.py +6 -2
  23. eotdl/curation/stac/extensions/base.py +8 -4
  24. eotdl/curation/stac/extensions/dem.py +6 -3
  25. eotdl/curation/stac/extensions/eo.py +10 -6
  26. eotdl/curation/stac/extensions/label/__init__.py +5 -1
  27. eotdl/curation/stac/extensions/label/base.py +40 -26
  28. eotdl/curation/stac/extensions/label/image_name_labeler.py +64 -43
  29. eotdl/curation/stac/extensions/label/scaneo.py +59 -56
  30. eotdl/curation/stac/extensions/ml_dataset.py +154 -56
  31. eotdl/curation/stac/extensions/projection.py +11 -9
  32. eotdl/curation/stac/extensions/raster.py +22 -14
  33. eotdl/curation/stac/extensions/sar.py +12 -7
  34. eotdl/curation/stac/extent.py +67 -40
  35. eotdl/curation/stac/parsers.py +18 -10
  36. eotdl/curation/stac/stac.py +81 -62
  37. eotdl/datasets/__init__.py +1 -1
  38. eotdl/datasets/download.py +42 -55
  39. eotdl/datasets/ingest.py +68 -11
  40. eotdl/files/__init__.py +1 -1
  41. eotdl/files/ingest.py +3 -1
  42. eotdl/models/download.py +1 -1
  43. eotdl/repos/AuthAPIRepo.py +0 -1
  44. eotdl/repos/DatasetsAPIRepo.py +22 -146
  45. eotdl/repos/FilesAPIRepo.py +7 -92
  46. eotdl/repos/ModelsAPIRepo.py +0 -1
  47. eotdl/tools/__init__.py +5 -1
  48. eotdl/tools/geo_utils.py +78 -48
  49. eotdl/tools/metadata.py +13 -11
  50. eotdl/tools/paths.py +14 -14
  51. eotdl/tools/stac.py +36 -31
  52. eotdl/tools/time_utils.py +53 -26
  53. eotdl/tools/tools.py +84 -50
  54. {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/METADATA +5 -3
  55. eotdl-2023.11.3.post2.dist-info/RECORD +84 -0
  56. eotdl-2023.11.2.post5.dist-info/RECORD +0 -84
  57. {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/WHEEL +0 -0
  58. {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/entry_points.txt +0 -0
eotdl/tools/geo_utils.py CHANGED
@@ -1,9 +1,15 @@
1
+ """
2
+ Geo Utils
3
+ """
4
+
5
+ import tarfile
6
+ from typing import Union
7
+ from statistics import mean
8
+
1
9
  import geopandas as gpd
2
10
  import rasterio
3
11
  import rasterio.warp
4
- import tarfile
5
12
 
6
- from typing import Union
7
13
  from shapely import geometry
8
14
  from shapely.geometry import box, Polygon, shape
9
15
  from pyproj import Transformer
@@ -12,9 +18,12 @@ from pandas import isna
12
18
 
13
19
 
14
20
  def is_bounding_box(bbox: list) -> bool:
21
+ """
22
+ Check if the given bounding box is a bounding box and is valid
23
+ """
15
24
  if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
16
25
  return False
17
-
26
+
18
27
  for value in bbox:
19
28
  if not isinstance(value, (int, float)):
20
29
  return False
@@ -27,13 +36,19 @@ def is_bounding_box(bbox: list) -> bool:
27
36
 
28
37
 
29
38
  def compute_image_size(bounding_box, parameters):
39
+ """
40
+ Compute the image size from the bounding box and the resolution
41
+ """
30
42
  bbox = BBox(bbox=bounding_box, crs=CRS.WGS84)
31
43
  bbox_size = bbox_to_dimensions(bbox, resolution=parameters.RESOLUTION)
32
44
 
33
- return bbox, bbox_size
45
+ return bbox, bbox_size
34
46
 
35
47
 
36
48
  def get_image_bbox(raster: Union[tarfile.ExFileObject, str]):
49
+ """
50
+ Get the bounding box of a raster
51
+ """
37
52
  with rasterio.open(raster) as src:
38
53
  bounds = src.bounds
39
54
  dst_crs = "EPSG:4326"
@@ -45,6 +60,9 @@ def get_image_bbox(raster: Union[tarfile.ExFileObject, str]):
45
60
 
46
61
 
47
62
  def get_image_resolution(raster: Union[tarfile.ExFileObject, str]):
63
+ """
64
+ Get the resolution of a raster
65
+ """
48
66
  with rasterio.open(raster) as src:
49
67
  resolution = src.res
50
68
  return resolution
@@ -52,13 +70,14 @@ def get_image_resolution(raster: Union[tarfile.ExFileObject, str]):
52
70
 
53
71
  def bbox_to_coordinates(bounding_box: list) -> list:
54
72
  """
73
+ Convert a bounding box to a list of polygon coordinates
55
74
  """
56
75
  polygon_coordinates = [
57
76
  (bounding_box[0], bounding_box[1]), # bottom left
58
77
  (bounding_box[0], bounding_box[3]), # top left
59
78
  (bounding_box[2], bounding_box[3]), # top right
60
79
  (bounding_box[2], bounding_box[1]), # bottom right
61
- (bounding_box[0], bounding_box[1]) # back to bottom left
80
+ (bounding_box[0], bounding_box[1]), # back to bottom left
62
81
  ]
63
82
 
64
83
  return polygon_coordinates
@@ -66,22 +85,24 @@ def bbox_to_coordinates(bounding_box: list) -> list:
66
85
 
67
86
  def bbox_to_polygon(bounding_box: list) -> Polygon:
68
87
  """
88
+ Convert a bounding box to a shapely polygon
69
89
  """
70
90
  polygon = box(bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3])
71
91
 
72
92
  return polygon
73
93
 
74
94
 
75
- from_4326_transformer = Transformer.from_crs('EPSG:4326', 'EPSG:3857')
76
- from_3857_transformer = Transformer.from_crs('EPSG:3857', 'EPSG:4326')
95
+ from_4326_transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857")
96
+ from_3857_transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326")
77
97
 
78
98
 
79
- def bbox_from_centroid(x: Union[int, float],
80
- y: Union[int, float],
81
- pixel_size: Union[int, float],
82
- width: Union[int, float],
83
- height: Union[int, float]
84
- ) -> list:
99
+ def bbox_from_centroid(
100
+ x: Union[int, float],
101
+ y: Union[int, float],
102
+ pixel_size: Union[int, float],
103
+ width: Union[int, float],
104
+ height: Union[int, float],
105
+ ) -> list:
85
106
  """
86
107
  Generate a bounding box from a centroid, pixel size and image dimensions.
87
108
 
@@ -122,9 +143,7 @@ def bbox_from_centroid(x: Union[int, float],
122
143
  return [min_y, min_x, max_y, max_x]
123
144
 
124
145
 
125
- def generate_bounding_box(geom: geometry.point.Point,
126
- differences: list
127
- ) -> list:
146
+ def generate_bounding_box(geom: geometry.point.Point, differences: list) -> list:
128
147
  """
129
148
  Generate the bounding box of a given point using the difference
130
149
  between the maximum and mininum coordinates of the bounding box
@@ -137,12 +156,14 @@ def generate_bounding_box(geom: geometry.point.Point,
137
156
  """
138
157
  long_diff, lat_diff = differences[0], differences[1]
139
158
  lon, lat = geom.x, geom.y
140
-
141
- bbox = (lon - (long_diff/2),
142
- lat - (lat_diff/2),
143
- lon + (long_diff/2),
144
- lat + (lat_diff/2))
145
-
159
+
160
+ bbox = (
161
+ lon - (long_diff / 2),
162
+ lat - (lat_diff / 2),
163
+ lon + (long_diff / 2),
164
+ lat + (lat_diff / 2),
165
+ )
166
+
146
167
  # Round the coordinates to 6 decimals
147
168
  bounding_box = [round(i, 6) for i in bbox]
148
169
 
@@ -151,17 +172,18 @@ def generate_bounding_box(geom: geometry.point.Point,
151
172
 
152
173
  def calculate_average_coordinates_distance(bounding_box_by_location: dict) -> list:
153
174
  """
154
- Calculate the mean distance between maximum and minixum longitude and latitude of the bounding boxes
155
- from the existing locations. This is intended to use these mean distance to generate the bounding
175
+ Calculate the mean distance between maximum and minixum longitude
176
+ and latitude of the bounding boxes from the existing locations.
177
+ This is intended to use these mean distance to generate the bounding
156
178
  boxes of the new locations given a centroid.
157
179
 
158
- :param bounding_box_by_location: dictionary with format location_id : bounding_box for the existing
159
- locations in the sen12floods dataset.
160
- :return mean_long_diff, mean_lat_diff: mean longitude and latitude difference in the bounding boxes
180
+ :param bounding_box_by_location: dictionary with format
181
+ location_id : bounding_box for the existing locations in
182
+ the sen12floods dataset.
183
+ :return mean_long_diff, mean_lat_diff: mean longitude
184
+ and latitude difference in the bounding boxes
161
185
  """
162
- from statistics import mean
163
-
164
- long_diff_list, lat_diff_list = list(), list()
186
+ long_diff_list, lat_diff_list = [], []
165
187
 
166
188
  for bbox in bounding_box_by_location.values():
167
189
  long_diff = bbox[2] - bbox[0]
@@ -175,28 +197,36 @@ def calculate_average_coordinates_distance(bounding_box_by_location: dict) -> li
175
197
  return mean_long_diff, mean_lat_diff
176
198
 
177
199
 
178
- def generate_new_locations_bounding_boxes(gdf: gpd.GeoDataFrame,
179
- mean_differences: list,
180
- latest_id: int
181
- ) -> dict:
200
+ def generate_new_locations_bounding_boxes(
201
+ gdf: gpd.GeoDataFrame, mean_differences: list, latest_id: int
202
+ ) -> dict:
182
203
  """
183
- Generate the bounding box of every new location, using the mean difference between the maximum and
184
- minimum calculated longitude and latitude. This function also returns the time interval which we
185
- want to request from Sentinel Hub Services.
186
-
187
- :param gdf: GeoDataFrame wiht the new locations that are going to be added to the dataset
188
- :param mean_differences: list with the longitude and latitude mean differences, which are going to be used
189
- to generate the bounding boxes.
190
- :return: bbox_by_new_location: dict with format {<location_id>: {'bounding_box': list(), 'time_interval': list()}, ... }
191
- that contains the bounding box and time interval of the imagery for each location
204
+ Generate the bounding box of every new location, using
205
+ the mean difference between the maximum and minimum calculated
206
+ longitude and latitude. This function also returns the time
207
+ interval which we want to request from Sentinel Hub Services.
208
+
209
+ :param gdf: GeoDataFrame wiht the new locations that
210
+ are going to be added to the dataset
211
+ :param mean_differences: list with the longitude
212
+ and latitude mean differences, which are going to be used to generate
213
+ the bounding boxes.
214
+ :return: bbox_by_new_location: dict with format {<location_id>:
215
+ {'bounding_box': list(), 'time_interval': list()}, ... }
216
+ that contains the bounding box and time interval of the imagery for each location
192
217
  """
193
- bbox_by_new_location = dict()
218
+ bbox_by_new_location = {}
194
219
 
195
- for i, row in gdf.iterrows():
220
+ for _, row in gdf.iterrows():
196
221
  new_location_id = str(latest_id + 1)
197
- time_interval = row['Began'].strftime("%Y-%m-%d"), row['Ended'].strftime("%Y-%m-%d")
198
- bbox = generate_bounding_box(row['geometry'], mean_differences)
199
- bbox_by_new_location[new_location_id] = {'bounding_box': bbox, 'time_interval': time_interval}
222
+ time_interval = row["Began"].strftime("%Y-%m-%d"), row["Ended"].strftime(
223
+ "%Y-%m-%d"
224
+ )
225
+ bbox = generate_bounding_box(row["geometry"], mean_differences)
226
+ bbox_by_new_location[new_location_id] = {
227
+ "bounding_box": bbox,
228
+ "time_interval": time_interval,
229
+ }
200
230
  latest_id += 1
201
231
 
202
232
  return bbox_by_new_location
eotdl/tools/metadata.py CHANGED
@@ -1,6 +1,6 @@
1
- '''
1
+ """
2
2
  Metadata utilities for STAC
3
- '''
3
+ """
4
4
 
5
5
  import json
6
6
 
@@ -20,25 +20,27 @@ def get_item_metadata(raster_path: str) -> str:
20
20
  raster_dir_path = dirname(raster_path)
21
21
  # Get the metadata JSON file
22
22
  # Check if there is a metadata.json file in the directory
23
- if 'metadata.json' in listdir(raster_dir_path):
24
- metadata_json = join(raster_dir_path, 'metadata.json')
23
+ if "metadata.json" in listdir(raster_dir_path):
24
+ metadata_json = join(raster_dir_path, "metadata.json")
25
25
  else:
26
26
  # If there is no metadata.json file in the directory, check if there is
27
27
  # a json file with the same name as the raster file
28
28
  base = splitext(raster_path)[0]
29
- metadata_json = base + '.json'
29
+ metadata_json = base + ".json"
30
30
  if not exists(metadata_json):
31
31
  # If there is no metadata file in the directory, return None
32
32
  return None
33
-
33
+
34
34
  # Open the metadata file and return it
35
- with open(metadata_json, 'r') as f:
35
+ with open(metadata_json, "r", encoding="utf-8") as f:
36
36
  metadata = json.load(f)
37
-
37
+
38
38
  return metadata
39
39
 
40
40
 
41
- def remove_raster_metadata(folder: str, metadata_file: Optional[str] = 'metadata.json') -> None:
41
+ def remove_raster_metadata(
42
+ folder: str, metadata_file: Optional[str] = "metadata.json"
43
+ ) -> None:
42
44
  """
43
45
  Remove metadata.json file from a folder
44
46
 
@@ -48,5 +50,5 @@ def remove_raster_metadata(folder: str, metadata_file: Optional[str] = 'metadata
48
50
  # Search for all the metadata files in the folder
49
51
  metadata_files = glob(join(folder, "**", metadata_file), recursive=True)
50
52
  # Remove all the metadata files
51
- for metadata_file in metadata_files:
52
- remove(metadata_file)
53
+ for metadata in metadata_files:
54
+ remove(metadata)
eotdl/tools/paths.py CHANGED
@@ -1,6 +1,6 @@
1
- '''
1
+ """
2
2
  Paths utils
3
- '''
3
+ """
4
4
 
5
5
  from os.path import dirname
6
6
  from typing import Union, Optional
@@ -28,24 +28,24 @@ def cut_images(images_list: Union[list, tuple]) -> list:
28
28
 
29
29
  :return: list of unique directories
30
30
  """
31
- dirnames = list()
32
- images = list()
31
+ dirnames = []
32
+ images = []
33
33
 
34
34
  for image in images_list:
35
- dir = dirname(image)
36
- if dir not in dirnames:
37
- dirnames.append(dir)
35
+ directory = dirname(image)
36
+ if directory not in dirnames:
37
+ dirnames.append(directory)
38
38
  images.append(image)
39
39
 
40
40
  return images
41
41
 
42
42
 
43
- def get_all_images_in_path(path: str, image_format: Optional[str] = 'tif') -> list:
44
- """
45
- Get all the images in a directory
43
+ def get_all_images_in_path(path: str, image_format: Optional[str] = "tif") -> list:
44
+ """
45
+ Get all the images in a directory
46
46
 
47
- :param path: path to the directory
47
+ :param path: path to the directory
48
48
 
49
- :return: list of images
50
- """
51
- return glob(str(path) + f'/**/*.{image_format}', recursive=True)
49
+ :return: list of images
50
+ """
51
+ return glob(str(path) + f"/**/*.{image_format}", recursive=True)
eotdl/tools/stac.py CHANGED
@@ -2,21 +2,22 @@
2
2
  Module for data engineering with STAC elements
3
3
  """
4
4
 
5
- import geopandas as gpd
6
- import pystac
7
-
8
5
  from os.path import dirname, join, abspath
9
6
  from os import makedirs
10
7
  from json import dumps
11
8
  from typing import Union, Optional
12
- from tqdm import tqdm
13
- from traceback import print_exc
14
9
  from shutil import rmtree
10
+ from traceback import print_exc
11
+
12
+ import geopandas as gpd
13
+ import pystac
14
+
15
+ from tqdm import tqdm
15
16
 
16
17
 
17
18
  def stac_items_to_gdf(items: pystac.ItemCollection) -> gpd.GeoDataFrame:
18
19
  """
19
- Get a GeoDataFrame from a given pystac.ItemCollection.
20
+ Get a GeoDataFrame from a given pystac.ItemCollection.
20
21
 
21
22
  :param: items: A pystac.ItemCollection
22
23
  :return: GeoDataframe from the given ItemCollection
@@ -29,12 +30,12 @@ def stac_items_to_gdf(items: pystac.ItemCollection) -> gpd.GeoDataFrame:
29
30
  if f not in features:
30
31
  # Add all the keys in the properties dict as columns in the GeoDataFrame
31
32
  for k, v in f.items():
32
- if k not in f['properties'] and k != 'geometry':
33
- f['properties'][k] = v
34
- if 'scene_id' in f['properties']:
35
- f['properties']['scene_id'] = f['id'].split('_')[3]
36
- features.append(f)
37
-
33
+ if k not in f["properties"] and k != "geometry":
34
+ f["properties"][k] = v
35
+ if "scene_id" in f["properties"]:
36
+ f["properties"]["scene_id"] = f["id"].split("_")[3]
37
+ features.append(f)
38
+
38
39
  return gpd.GeoDataFrame.from_features(features)
39
40
 
40
41
 
@@ -65,9 +66,10 @@ def get_all_children(obj: pystac.STACObject) -> list:
65
66
  return children
66
67
 
67
68
 
68
- def make_links_relative_to_path(path: str,
69
- catalog: Union[pystac.Catalog, str],
70
- ) -> pystac.Catalog:
69
+ def make_links_relative_to_path(
70
+ path: str,
71
+ catalog: Union[pystac.Catalog, str],
72
+ ) -> pystac.Catalog:
71
73
  """
72
74
  Makes all asset HREFs and links in the STAC catalog relative to a given path
73
75
  """
@@ -76,9 +78,9 @@ def make_links_relative_to_path(path: str,
76
78
  path = abspath(path)
77
79
 
78
80
  # Create a temporary catalog in the destination path to set as root
79
- future_path = join(path, 'catalog.json')
81
+ future_path = join(path, "catalog.json")
80
82
  makedirs(path, exist_ok=True)
81
- with open(future_path, 'w') as f:
83
+ with open(future_path, "w", encoding="utf-8") as f:
82
84
  f.write(dumps(catalog.to_dict(), indent=4))
83
85
  temp_catalog = pystac.Catalog.from_file(future_path)
84
86
 
@@ -88,7 +90,7 @@ def make_links_relative_to_path(path: str,
88
90
  for collection in catalog.get_children():
89
91
  # Create new collection
90
92
  new_collection = collection.clone()
91
- new_collection.set_self_href(join(path, collection.id, f"collection.json"))
93
+ new_collection.set_self_href(join(path, collection.id, "collection.json"))
92
94
  new_collection.set_root(catalog)
93
95
  new_collection.set_parent(catalog)
94
96
  # Remove old collection and add new one to catalog
@@ -97,7 +99,9 @@ def make_links_relative_to_path(path: str,
97
99
  for item in collection.get_all_items():
98
100
  # Create new item from old collection and add it to the new collection
99
101
  new_item = item.clone()
100
- new_item.set_self_href(join(path, collection.id, item.id, f"{item.id}.json"))
102
+ new_item.set_self_href(
103
+ join(path, collection.id, item.id, f"{item.id}.json")
104
+ )
101
105
  new_item.set_parent(collection)
102
106
  new_item.set_root(catalog)
103
107
  new_item.make_asset_hrefs_relative()
@@ -108,12 +112,13 @@ def make_links_relative_to_path(path: str,
108
112
  return catalog
109
113
 
110
114
 
111
- def merge_stac_catalogs(catalog_1: Union[pystac.Catalog, str],
112
- catalog_2: Union[pystac.Catalog, str],
113
- destination: Optional[str] = None,
114
- keep_extensions: Optional[bool] = False,
115
- catalog_type: Optional[pystac.CatalogType] = pystac.CatalogType.SELF_CONTAINED
116
- ) -> None:
115
+ def merge_stac_catalogs(
116
+ catalog_1: Union[pystac.Catalog, str],
117
+ catalog_2: Union[pystac.Catalog, str],
118
+ destination: Optional[str] = None,
119
+ keep_extensions: Optional[bool] = False,
120
+ catalog_type: Optional[pystac.CatalogType] = pystac.CatalogType.SELF_CONTAINED,
121
+ ) -> None:
117
122
  """
118
123
  Merge two STAC catalogs, keeping the properties, collection and items of both catalogs
119
124
  """
@@ -122,10 +127,10 @@ def merge_stac_catalogs(catalog_1: Union[pystac.Catalog, str],
122
127
  if isinstance(catalog_2, str):
123
128
  catalog_2 = pystac.Catalog.from_file(catalog_2)
124
129
 
125
- for col1 in tqdm(catalog_1.get_children(), desc='Merging catalogs...'):
130
+ for col1 in tqdm(catalog_1.get_children(), desc="Merging catalogs..."):
126
131
  # Check if the collection exists in catalog_2
127
132
  col2 = catalog_2.get_child(col1.id)
128
- if col2 is None:
133
+ if not col2:
129
134
  # If it does not exist, add it
130
135
  col1_ = col1.clone()
131
136
  catalog_2.add_child(col1)
@@ -157,10 +162,10 @@ def merge_stac_catalogs(catalog_1: Union[pystac.Catalog, str],
157
162
  try:
158
163
  print("Validating and saving...")
159
164
  catalog_2.validate()
160
- rmtree(destination) if not destination else None # Remove the old catalog and replace it with the new one
161
- catalog_2.normalize_and_save(root_href=destination,
162
- catalog_type=catalog_type
163
- )
165
+ rmtree(
166
+ destination
167
+ ) if not destination else None # Remove the old catalog and replace it with the new one
168
+ catalog_2.normalize_and_save(root_href=destination, catalog_type=catalog_type)
164
169
  print("Success!")
165
170
  except pystac.STACValidationError:
166
171
  # Return full callback
eotdl/tools/time_utils.py CHANGED
@@ -1,14 +1,22 @@
1
- import geopandas as gpd
2
- import pandas as pd
1
+ """
2
+ Time utils
3
+ """
3
4
 
4
5
  from datetime import datetime, timedelta
5
- from typing import Union, Optional, List
6
+ from typing import Union, Optional
7
+ from dateutil import parser
8
+
9
+ import geopandas as gpd
10
+ import pandas as pd
6
11
 
7
12
 
8
13
  def is_time_interval(time_interval: list) -> bool:
14
+ """
15
+ Check if is time interval and is valid
16
+ """
9
17
  if not isinstance(time_interval, (list, tuple)) or len(time_interval) != 2:
10
18
  return False
11
-
19
+
12
20
  for value in time_interval:
13
21
  if not isinstance(value, str):
14
22
  return False
@@ -19,6 +27,9 @@ def is_time_interval(time_interval: list) -> bool:
19
27
 
20
28
 
21
29
  def is_valid_date(date_str: str) -> bool:
30
+ """
31
+ Check if a date is valid
32
+ """
22
33
  try:
23
34
  datetime.strptime(date_str, "%Y-%m-%d")
24
35
  return True
@@ -26,24 +37,29 @@ def is_valid_date(date_str: str) -> bool:
26
37
  return False
27
38
 
28
39
 
29
- def get_first_last_dates(dataframe: Union[pd.DataFrame, gpd.GeoDataFrame], dates_column: Optional[str] = 'datetime'):
40
+ def get_first_last_dates(
41
+ dataframe: Union[pd.DataFrame, gpd.GeoDataFrame],
42
+ dates_column: Optional[str] = "datetime",
43
+ ):
30
44
  """
45
+ Get first and last dates from a dataframe
31
46
  """
32
47
  dataframe[dates_column] = dataframe[dates_column].apply(lambda x: sorted(x))
33
- dataframe['first_date'] = dataframe['dates_list'].apply(lambda x: x[0])
34
- dataframe['last_date'] = dataframe['dates_list'].apply(lambda x: x[-1])
35
- dataframe = dataframe.sort_values(by=['first_date', 'last_date'])
48
+ dataframe["first_date"] = dataframe["dates_list"].apply(lambda x: x[0])
49
+ dataframe["last_date"] = dataframe["dates_list"].apply(lambda x: x[-1])
50
+ dataframe = dataframe.sort_values(by=["first_date", "last_date"])
36
51
  # Sort by sequence id
37
- dataframe = dataframe.sort_values(by=['location_id'])
52
+ dataframe = dataframe.sort_values(by=["location_id"])
38
53
  # Convert first_date and last_date to datetime, in format YYYY-MM-DD
39
- dataframe['first_date'] = pd.to_datetime(dataframe['first_date'], format='%Y-%m-%d')
40
- dataframe['last_date'] = pd.to_datetime(dataframe['last_date'], format='%Y-%m-%d')
54
+ dataframe["first_date"] = pd.to_datetime(dataframe["first_date"], format="%Y-%m-%d")
55
+ dataframe["last_date"] = pd.to_datetime(dataframe["last_date"], format="%Y-%m-%d")
41
56
 
42
57
  return dataframe
43
58
 
44
59
 
45
60
  def create_time_slots(start_date: datetime, end_date: datetime, n_chunks: int):
46
61
  """
62
+ Create time slots from start date to end date, with n_chunks
47
63
  """
48
64
  if isinstance(start_date, str):
49
65
  start_date = datetime.strptime(start_date, "%Y-%m-%d")
@@ -56,16 +72,19 @@ def create_time_slots(start_date: datetime, end_date: datetime, n_chunks: int):
56
72
  return slots
57
73
 
58
74
 
59
- def expand_time_interval(time_interval: Union[list, tuple], format: str='%Y-%m-%dT%H:%M:%S.%fZ') -> list:
75
+ def expand_time_interval(
76
+ time_interval: Union[list, tuple], time_format: str = "%Y-%m-%dT%H:%M:%S.%fZ"
77
+ ) -> list:
60
78
  """
79
+ Expand time interval to get more data
61
80
  """
62
81
  start_date = time_interval[0]
63
82
  end_date = time_interval[1]
64
83
 
65
84
  if isinstance(start_date, str):
66
- start_date = datetime.datetime.strptime(start_date, format)
85
+ start_date = datetime.datetime.strptime(start_date, time_format)
67
86
  if isinstance(end_date, str):
68
- end_date = datetime.datetime.strptime(end_date, format)
87
+ end_date = datetime.datetime.strptime(end_date, time_format)
69
88
 
70
89
  # Add one day to start date and remove one day to end date
71
90
  new_start_date = start_date - datetime.timedelta(days=1)
@@ -79,15 +98,22 @@ def expand_time_interval(time_interval: Union[list, tuple], format: str='%Y-%m-%
79
98
 
80
99
 
81
100
  def prepare_time_interval(date):
101
+ """
102
+ Prepare time interval to request data
103
+ """
82
104
  if isinstance(date, str):
83
105
  date = datetime.strptime(date, "%Y-%m-%d")
84
106
  elif isinstance(date, tuple):
85
- if not is_time_interval(date):
86
- raise ValueError('The time interval must be a range of two dates, with format YYYY-MM-DD or a datetime object')
87
- else:
107
+ if is_time_interval(date):
88
108
  return date
109
+ else:
110
+ raise ValueError(
111
+ "The time interval must be a range of two dates, with format YYYY-MM-DD or a datetime object"
112
+ )
89
113
  elif not isinstance(date, datetime):
90
- raise ValueError('The date must be a string with format YYYY-MM-DD or a datetime object')
114
+ raise ValueError(
115
+ "The date must be a string with format YYYY-MM-DD or a datetime object"
116
+ )
91
117
 
92
118
  date_day_before = date - timedelta(days=1)
93
119
  date_next_day = date + timedelta(days=1)
@@ -98,10 +124,11 @@ def prepare_time_interval(date):
98
124
  return (date_day_before_str, date_next_day_str)
99
125
 
100
126
 
101
- def get_day_between(from_date: Union[datetime, str],
102
- to_date: Union[datetime, str]
103
- ) -> str:
127
+ def get_day_between(
128
+ from_date: Union[datetime, str], to_date: Union[datetime, str]
129
+ ) -> str:
104
130
  """
131
+ Get the day between two dates
105
132
  """
106
133
  if isinstance(from_date, str):
107
134
  from_date = datetime.strptime(from_date, "%Y-%m-%dT%H:%M:%SZ")
@@ -109,8 +136,8 @@ def get_day_between(from_date: Union[datetime, str],
109
136
  to_date = datetime.strptime(to_date, "%Y-%m-%dT%H:%M:%SZ")
110
137
 
111
138
  date_between = from_date + timedelta(days=1)
112
- date_between = date_between.strftime("%Y-%m-%d")
113
-
139
+ date_between = date_between.strftime("%Y-%m-%d")
140
+
114
141
  return date_between
115
142
 
116
143
 
@@ -120,8 +147,8 @@ def format_time_acquired(dt: Union[str, datetime]) -> str:
120
147
 
121
148
  :param dt: date time to format
122
149
  """
123
- from dateutil import parser
124
-
125
150
  dt_str = parser.parse(dt).strftime("%Y-%m-%dT%H:%M:%S.%f")
151
+ # convert to datetime object
152
+ dt = datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f")
126
153
 
127
- return dt_str
154
+ return dt