eotdl 2023.7.19.post3__py3-none-any.whl → 2023.9.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,7 @@ from pathlib import Path
4
4
  from ..datasets import (
5
5
  retrieve_datasets,
6
6
  download_dataset,
7
- ingest_folder,
8
- ingest_stac,
7
+ ingest_dataset,
9
8
  )
10
9
 
11
10
  app = typer.Typer()
@@ -14,31 +13,21 @@ app = typer.Typer()
14
13
  @app.command()
15
14
  def ingest(
16
15
  path: Path,
17
- f: bool = typer.Option(False, "--f", help="Force ingest even if file exists"),
18
- d: bool = typer.Option(False, "--d", help="Delete files not in the dataset"),
16
+ f: bool = typer.Option(
17
+ False, "--force", "-f", help="Force ingest even if file exists"
18
+ ),
19
+ d: bool = typer.Option(
20
+ False, "--delete", "-d", help="Delete files not in the dataset"
21
+ ),
19
22
  ):
20
- """
21
- Ingest a dataset
22
-
23
- path: Path to folder with the dataset
24
- """
25
23
  try:
26
- if not path.is_dir():
27
- typer.echo("Path must be a folder")
28
- return
29
- if "catalog.json" in [f.name for f in path.iterdir()]:
30
- ingest_stac(str(path) + "/catalog.json", typer.echo)
31
- else:
32
- ingest_folder(path, f, d, typer.echo)
24
+ ingest_dataset(path, f, d, typer.echo)
33
25
  except Exception as e:
34
26
  typer.echo(e)
35
27
 
36
28
 
37
29
  @app.command()
38
30
  def list():
39
- """
40
- List all datasets and files
41
- """
42
31
  datasets = retrieve_datasets()
43
32
  typer.echo(datasets)
44
33
 
@@ -46,18 +35,15 @@ def list():
46
35
  @app.command()
47
36
  def get(
48
37
  dataset: str,
49
- path: str = None,
50
- file: str = None,
38
+ path: Path = typer.Option(None, "--path", "-p", help="Download to a specific path"),
39
+ file: bool = typer.Option(None, "--file", "-f", help="Download a specific file"),
40
+ assets: bool = typer.Option(False, "--assets", "-a", help="Download assets"),
41
+ force: bool = typer.Option(
42
+ False, "--force", "-f", help="Force download even if file exists"
43
+ ),
51
44
  ):
52
- """
53
- Download a dataset
54
-
55
- dataset: Name of the dataset
56
- file: Name of the file to download (optional, if not provided, the whole dataset will be downloaded)
57
- path: Path to download the dataset to (optional, if not provided, the dataset will be downloaded to ~/.eotdl/datasets)
58
- """
59
45
  try:
60
- dst_path = download_dataset(dataset, file, path, typer.echo)
46
+ dst_path = download_dataset(dataset, file, path, typer.echo, assets, force)
61
47
  typer.echo(f"Data available at {dst_path}")
62
48
  except Exception as e:
63
49
  typer.echo(e)
@@ -1,6 +1,6 @@
1
- # from .stac.dataframe import STACDataFrame # , read_stac
1
+ from .stac.dataframe import STACDataFrame # , read_stac
2
2
 
3
- # from .stac.stac import STACGenerator
4
- # from .formatters import SHFolderFormatter
5
- # from .stac.utils import format_time_acquired
6
- # from .stac.parsers import STACIdParser, StructuredParser, UnestructuredParser
3
+ from .stac.stac import STACGenerator, merge_stac_catalogs
4
+ from .formatters import SHFolderFormatter
5
+ from .stac.utils import format_time_acquired
6
+ from .stac.parsers import STACIdParser, StructuredParser, UnestructuredParser
@@ -2,8 +2,6 @@
2
2
  Module for formatter classes
3
3
  """
4
4
 
5
- import rasterio
6
-
7
5
  from os.path import join, exists
8
6
  from os import listdir, mkdir
9
7
  from glob import glob
@@ -6,13 +6,16 @@ import datetime
6
6
  import json
7
7
  import rasterio
8
8
  from rasterio.warp import transform_bounds
9
+ from typing import Union, Optional
9
10
 
11
+ from os import remove
12
+ from glob import glob
10
13
  from os.path import dirname, join, exists
11
14
 
12
15
 
13
16
  def generate_raster_metadata(raster_path: str,
14
17
  output_folder: str,
15
- date_adquired: str|datetime.datetime
18
+ date_adquired: Union[str, datetime.datetime]
16
19
  ) -> None:
17
20
  """
18
21
  Generate metadata.json file for a raster file
@@ -23,8 +26,10 @@ def generate_raster_metadata(raster_path: str,
23
26
  """
24
27
  with rasterio.open(raster_path) as ds:
25
28
  bounds = ds.bounds
26
- dst_crs = 'EPSG:4326' # EPSG identifier for WGS84 coordinate system used by the geojson format
27
- left, bottom, right, top = rasterio.warp.transform_bounds(ds.crs, dst_crs, *bounds)
29
+ dst_crs = "EPSG:4326" # EPSG identifier for WGS84 coordinate system used by the geojson format
30
+ left, bottom, right, top = rasterio.warp.transform_bounds(
31
+ ds.crs, dst_crs, *bounds
32
+ )
28
33
  bbox = [left, bottom, right, top]
29
34
 
30
35
  # Get raster directory path to get the request.json file
@@ -32,12 +37,32 @@ def generate_raster_metadata(raster_path: str,
32
37
 
33
38
  # Read the request.json file and get the request data type
34
39
  if exists(raster_dir_path):
35
- with open(join(raster_dir_path, 'request.json'), 'r') as f:
40
+ with open(join(raster_dir_path, "request.json"), "r") as f:
36
41
  request = json.load(f)
37
- request_data_type = request['request']['payload']['input']['data'][0]['type']
42
+ request_data_type = request["request"]["payload"]["input"]["data"][0][
43
+ "type"
44
+ ]
38
45
 
39
- metadata_path = join(output_folder, 'metadata.json')
40
- metadata = {'date-adquired': date_adquired, 'bounding-box': bbox, 'type': request_data_type}
41
-
42
- with open(metadata_path, 'w') as f:
46
+ metadata_path = join(output_folder, "metadata.json")
47
+ metadata = {
48
+ "date-adquired": date_adquired,
49
+ "bounding-box": bbox,
50
+ "type": request_data_type,
51
+ }
52
+
53
+ with open(metadata_path, "w") as f:
43
54
  json.dump(metadata, f)
55
+
56
+
57
+ def remove_raster_metadata(folder: str, metadata_file: Optional[str] = 'metadata.json') -> None:
58
+ """
59
+ Remove metadata.json file from a folder
60
+
61
+ :param folder: folder path
62
+ :param metadata_file: metadata file name
63
+ """
64
+ # Search for all the metadata files in the folder
65
+ metadata_files = glob(join(folder, "**", metadata_file), recursive=True)
66
+ # Remove all the metadata files
67
+ for metadata_file in metadata_files:
68
+ remove(metadata_file)
@@ -0,0 +1,127 @@
1
+ '''
2
+ Module for STAC Asset Generators
3
+ '''
4
+
5
+ from os import remove, listdir
6
+ from os.path import dirname, join, basename
7
+ from ..metadata import remove_raster_metadata
8
+ from pathlib import Path
9
+
10
+ import pandas as pd
11
+ import rasterio
12
+ import pystac
13
+
14
+
15
+ MEDIA_TYPES_DICT = {
16
+ 'tif': pystac.MediaType.GEOTIFF,
17
+ 'tiff': pystac.MediaType.GEOTIFF,
18
+ 'png': pystac.MediaType.PNG,
19
+ 'jpg': pystac.MediaType.JPEG,
20
+ 'jpeg': pystac.MediaType.JPEG,
21
+ }
22
+
23
+
24
+ class STACAssetGenerator:
25
+
26
+ type = 'None'
27
+
28
+ def __init__(self):
29
+ pass
30
+
31
+ @classmethod
32
+ def extract_assets(self, obj_info: pd.DataFrame):
33
+ """
34
+ Extract the assets from the raster file
35
+
36
+ :param raster_path: path to the raster file
37
+ """
38
+ # If there is no bands, create a single band asset from the file, assuming thats a singleband raster
39
+ raster_path = obj_info["image"].values[0]
40
+ title = basename(raster_path).split('.')[0]
41
+ # Get the file extension
42
+ raster_format = raster_path.split('.')[-1]
43
+ asset = pystac.Asset(href=Path(raster_path).as_posix(),
44
+ title=title,
45
+ media_type=MEDIA_TYPES_DICT[raster_format],
46
+ roles=['data'])
47
+
48
+ return [asset]
49
+
50
+
51
+ class BandsAssetGenerator(STACAssetGenerator):
52
+
53
+ type = 'Bands'
54
+
55
+ def __init__(self) -> None:
56
+ super().__init__()
57
+
58
+ def extract_assets(self, obj_info: pd.DataFrame):
59
+ """
60
+ Extract the assets from the raster file from the bands column
61
+
62
+ :param raster_path: path to the raster file
63
+ """
64
+ asset_list = []
65
+ # File path
66
+ raster_path = obj_info["image"].values[0]
67
+ # Bands
68
+ bands = obj_info["bands"].values
69
+ bands = bands[0] if bands else None
70
+
71
+ if bands:
72
+ with rasterio.open(raster_path, 'r') as raster:
73
+ if isinstance(bands, str):
74
+ bands = [bands]
75
+ for band in bands:
76
+ i = bands.index(band)
77
+ raster_format = raster_path.split('.')[-1] # Will be used later to save the bands files
78
+ try:
79
+ single_band = raster.read(i + 1)
80
+ except IndexError:
81
+ single_band = raster.read(1)
82
+ band_name = f'{band}.{raster_format}'
83
+ output_band = join(dirname(raster_path), band_name)
84
+ # Copy the metadata
85
+ metadata = raster.meta.copy()
86
+ metadata.update({"count": 1})
87
+ # Write the band to the output folder
88
+ with rasterio.open(output_band, "w", **metadata) as dest:
89
+ dest.write(single_band, 1)
90
+ # Instantiate pystac asset and append it to the list
91
+ asset_list.append(pystac.Asset(href=output_band,
92
+ title=band,
93
+ media_type=MEDIA_TYPES_DICT[raster_format]))
94
+
95
+ # Remove the original raster file and its metadata
96
+ remove(raster_path)
97
+ remove_raster_metadata(dirname(raster_path))
98
+
99
+ return asset_list
100
+
101
+
102
+ class ExtractedAssets(STACAssetGenerator):
103
+
104
+ type = 'Extracted'
105
+
106
+ def __init__(self) -> None:
107
+ super().__init__()
108
+
109
+ def extract_assets(self, obj_info: pd.DataFrame):
110
+ """
111
+ Get all the files with the same extension as the image file as assets
112
+ """
113
+ asset_list = []
114
+ # File path
115
+ raster_path = obj_info["image"].values[0]
116
+ raster_dir = dirname(raster_path)
117
+ # Get the files with the same extension as the image file
118
+ files = [f for f in listdir(raster_dir) if f.endswith(raster_path.split('.')[-1])]
119
+ # Instantiate pystac asset and append it to the list
120
+ for file in files:
121
+ # Get the file extension
122
+ raster_format = file.split('.')[-1]
123
+ asset_list.append(pystac.Asset(href=join(raster_dir, file),
124
+ title=basename(file),
125
+ media_type=MEDIA_TYPES_DICT[raster_format]))
126
+
127
+ return asset_list
@@ -9,9 +9,11 @@ import json
9
9
  from geomet import wkt
10
10
  from os.path import join
11
11
  from os import makedirs
12
- from typing import Union
12
+ from typing import Union, Optional
13
+
13
14
  from math import isnan
14
15
  from .utils import convert_df_geom_to_shape, get_all_children
16
+ from pathlib import Path
15
17
 
16
18
 
17
19
  class STACDataFrame(gpd.GeoDataFrame):
@@ -19,9 +21,11 @@ class STACDataFrame(gpd.GeoDataFrame):
19
21
  super().__init__(*args, **kwargs)
20
22
 
21
23
  @classmethod
22
- def from_stac_file(self, stac_file):
24
+ def from_stac_file(self, stac_file: pystac.STACObject):
23
25
  """
24
26
  Create a STACDataFrame from a STAC file
27
+
28
+ :param stac_file: STAC file
25
29
  """
26
30
  return read_stac(stac_file)
27
31
 
@@ -120,7 +124,7 @@ class STACDataFrame(gpd.GeoDataFrame):
120
124
 
121
125
  def read_stac(
122
126
  stac_file: Union[pystac.Catalog, pystac.Collection, str],
123
- geometry_column: str = "geometry",
127
+ geometry_column: Optional[str] = "geometry",
124
128
  ) -> STACDataFrame:
125
129
  """
126
130
  Read a STAC file and return a STACDataFrame
@@ -128,7 +132,7 @@ def read_stac(
128
132
  :param stac_file: STAC file to read
129
133
  :param geometry_column: name of the geometry column
130
134
  """
131
- if isinstance(stac_file, str):
135
+ if isinstance(stac_file, str) or isinstance(stac_file, Path):
132
136
  stac_file = pystac.read_file(stac_file)
133
137
  children = get_all_children(stac_file)
134
138