eotdl 2025.3.25__py3-none-any.whl → 2025.4.2.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,15 +2,27 @@
2
2
  Utils
3
3
  """
4
4
 
5
- from sentinelhub import DataCollection, MosaickingOrder
6
-
5
+ from sentinelhub import DataCollection, MosaickingOrder, MimeType
7
6
  from .evalscripts import EvalScripts
8
7
 
9
8
 
9
+ class OUTPUT_FORMAT:
10
+ TIFF = MimeType.TIFF
11
+ JPG = MimeType.JPG
12
+ PNG = MimeType.PNG
13
+
14
+
10
15
  class SHParameters:
11
16
  """
12
17
  Sentinel Hub Parameters base class
13
18
  """
19
+
20
+ MAX_CLOUD_COVERAGE: float = None
21
+ FIELDS = None
22
+ MOSAICKING_ORDER = MosaickingOrder.MOST_RECENT
23
+ EVALSCRIPT = None
24
+ OUTPUT_FORMAT = MimeType.TIFF
25
+
14
26
  def __init__(self):
15
27
  pass
16
28
 
@@ -19,8 +31,8 @@ class SHS2L2AParameters(SHParameters):
19
31
  """
20
32
  Sentinel-2-L2A parameters
21
33
  """
34
+
22
35
  DATA_COLLECTION = DataCollection.SENTINEL2_L2A
23
- RESOLUTION = 10
24
36
  MOSAICKING_ORDER = MosaickingOrder.LEAST_CC
25
37
  EVALSCRIPT = EvalScripts.SENTINEL_2_L2A
26
38
  FIELDS = {
@@ -28,14 +40,17 @@ class SHS2L2AParameters(SHParameters):
28
40
  "exclude": [],
29
41
  }
30
42
  FILTER = None
43
+ RESOLUTION = 10
44
+ BASE_URL = "https://services.sentinel-hub.com"
45
+ CLOUD_COVERAGE = True
31
46
 
32
47
 
33
48
  class SHS2L1CParameters(SHParameters):
34
49
  """
35
50
  Sentinel-2-L1C parameters
36
51
  """
52
+
37
53
  DATA_COLLECTION = DataCollection.SENTINEL2_L1C
38
- RESOLUTION = 10
39
54
  MOSAICKING_ORDER = MosaickingOrder.LEAST_CC
40
55
  EVALSCRIPT = EvalScripts.SENTINEL_2_L1C
41
56
  FIELDS = {
@@ -43,47 +58,110 @@ class SHS2L1CParameters(SHParameters):
43
58
  "exclude": [],
44
59
  }
45
60
  FILTER = None
61
+ RESOLUTION = 10
62
+ BASE_URL = "https://services.sentinel-hub.com"
63
+ CLOUD_COVERAGE = True
64
+
46
65
 
47
66
  class SHS1Parameters(SHParameters):
48
67
  """
49
68
  Sentinel-1 parameters
50
69
  """
70
+
51
71
  DATA_COLLECTION = DataCollection.SENTINEL1
52
- RESOLUTION = 3
53
72
  EVALSCRIPT = EvalScripts.SENTINEL_1
54
- MOSAICKING_ORDER = None
73
+ MOSAICKING_ORDER = MosaickingOrder.MOST_RECENT
55
74
  FIELDS = {
56
- "include": [
57
- "id",
58
- "properties.datetime",
59
- "sar:instrument_mode",
60
- "s1:polarization",
61
- "sat:orbit_state",
62
- "s1:resolution",
63
- "s1:timeliness",
64
- ],
65
- "exclude": [],
66
- }
75
+ "include": [
76
+ "id",
77
+ "properties.datetime",
78
+ "sar:instrument_mode",
79
+ "s1:polarization",
80
+ "sat:orbit_state",
81
+ "s1:resolution",
82
+ "s1:timeliness",
83
+ ],
84
+ "exclude": [],
85
+ }
67
86
  FILTER = None
87
+ RESOLUTION = 3
88
+ BASE_URL = "https://services.sentinel-hub.com"
89
+ CLOUD_COVERAGE = False
68
90
 
69
91
 
70
92
  class SHDEMParameters(SHParameters):
71
93
  """
72
94
  Copernicus DEM parameters
73
95
  """
96
+
74
97
  DATA_COLLECTION = DataCollection.DEM_COPERNICUS_30
75
- RESOLUTION = 3
76
98
  MOSAICKING_ORDER = None
77
99
  EVALSCRIPT = EvalScripts.DEM
100
+ FIELDS = None
101
+ FILTER = None
102
+ RESOLUTION = 3
103
+ BASE_URL = "https://services.sentinel-hub.com"
104
+ CLOUD_COVERAGE = False
105
+
106
+
107
+ class SHHarmonizedLandsatSentinel(SHParameters):
108
+ """
109
+ Harmonized Landsat Sentinel parameters
110
+ """
111
+
112
+ DATA_COLLECTION = DataCollection.HARMONIZED_LANDSAT_SENTINEL
113
+ MOSAICKING_ORDER = MosaickingOrder.LEAST_CC
114
+ EVALSCRIPT = EvalScripts.HLS_TRUE_COLOR
115
+ FIELDS = None
78
116
  FILTER = None
117
+ RESOLUTION = 10
118
+ BASE_URL = "https://services-uswest2.sentinel-hub.com"
119
+ CLOUD_COVERAGE = True
120
+
121
+
122
+ class SHLandsatOTL2(SHParameters):
123
+ """
124
+ Landsat 8-9 Collection 2 imagery processed to level 2
125
+ """
126
+
127
+ DATA_COLLECTION = DataCollection.LANDSAT_OT_L2
128
+ MOSAICKING_ORDER = MosaickingOrder.LEAST_CC
129
+ EVALSCRIPT = EvalScripts.LANDSAT_OT_L2_TRUE_COLOR
79
130
  FIELDS = None
131
+ FILTER = None
132
+ RESOLUTION = 10
133
+ BASE_URL = "https://services-uswest2.sentinel-hub.com"
134
+ CLOUD_COVERAGE = True
135
+
80
136
 
137
+ class DATA_COLLECTION_ID:
138
+ SENTINEL_1_GRD = DataCollection.SENTINEL1.api_id
139
+ SENTINEL_2_L1C = DataCollection.SENTINEL2_L1C.api_id
140
+ SENTINEL_2_L2A = DataCollection.SENTINEL2_L2A.api_id
141
+ DEM = DataCollection.DEM_COPERNICUS_30.api_id
142
+ HLS = DataCollection.HARMONIZED_LANDSAT_SENTINEL.api_id
143
+ LANDSAT_OT_L2 = DataCollection.LANDSAT_OT_L2.api_id
81
144
 
82
- SUPPORTED_SENSORS = ("sentinel-1-grd", "sentinel-2-l1c", "sentinel-2-l2a", "dem")
145
+
146
+ SUPPORTED_COLLECTION_IDS = [
147
+ value
148
+ for name, value in DATA_COLLECTION_ID.__dict__.items()
149
+ if not name.startswith("__")
150
+ ]
83
151
 
84
152
  SH_PARAMETERS_DICT = {
85
- "sentinel-1-grd": SHS1Parameters,
86
- "sentinel-2-l1c": SHS2L1CParameters,
87
- "sentinel-2-l2a": SHS2L2AParameters,
88
- "dem": SHDEMParameters,
153
+ DATA_COLLECTION_ID.SENTINEL_1_GRD: SHS1Parameters,
154
+ DATA_COLLECTION_ID.SENTINEL_2_L1C: SHS2L1CParameters,
155
+ DATA_COLLECTION_ID.SENTINEL_2_L2A: SHS2L2AParameters,
156
+ DATA_COLLECTION_ID.DEM: SHDEMParameters,
157
+ DATA_COLLECTION_ID.HLS: SHHarmonizedLandsatSentinel,
158
+ DATA_COLLECTION_ID.LANDSAT_OT_L2: SHLandsatOTL2,
89
159
  }
160
+
161
+
162
+ def get_default_parameters(collection_id: str) -> SHParameters:
163
+ return SH_PARAMETERS_DICT[collection_id]()
164
+
165
+
166
+ def supports_cloud_coverage(collection_id: str):
167
+ return SH_PARAMETERS_DICT[collection_id]().CLOUD_COVERAGE
@@ -5,22 +5,23 @@ Utils for Sentinel Hub access
5
5
  import json
6
6
 
7
7
  from os import makedirs
8
- from datetime import datetime
9
- from typing import Union, Optional
8
+ from datetime import datetime, timedelta
9
+ from typing import Union, Optional, Iterable, List
10
10
  from glob import glob
11
11
  from shutil import copyfile, rmtree
12
12
 
13
- from .parameters import SUPPORTED_SENSORS
13
+ from .parameters import SUPPORTED_COLLECTION_IDS, SHParameters, OUTPUT_FORMAT
14
14
  from ...tools.geo_utils import is_bounding_box, get_image_bbox
15
15
  from ...tools.time_utils import is_time_interval, get_day_between
16
16
 
17
17
 
18
18
  def evaluate_sentinel_parameters(
19
- sensor: str,
20
19
  time_interval: Union[str, datetime],
21
20
  bounding_box: list,
21
+ collection_id: Optional[str] = None,
22
22
  output: Optional[str] = None,
23
23
  output_needed: Optional[bool] = True,
24
+ parameters: Optional[SHParameters] = None,
24
25
  ) -> None:
25
26
  """
26
27
  Evaluate parameters for Sentinel Hub access
@@ -28,10 +29,20 @@ def evaluate_sentinel_parameters(
28
29
  if output_needed:
29
30
  if not output:
30
31
  raise ValueError("Output path must be specified.")
31
- if sensor not in SUPPORTED_SENSORS:
32
- raise ValueError(
33
- f"Sensor {sensor} is not supported. Supported sensors are: {SUPPORTED_SENSORS}"
34
- )
32
+ if parameters and not parameters.OUTPUT_FORMAT:
33
+ raise ValueError("Output format must be specified.")
34
+ if collection_id:
35
+ if collection_id not in SUPPORTED_COLLECTION_IDS:
36
+ raise ValueError(
37
+ f"Collection id {collection_id} is not supported. Supported collections ids are: {SUPPORTED_COLLECTION_IDS}"
38
+ )
39
+ else:
40
+ if not (
41
+ parameters
42
+ and hasattr(parameters, "DATA_COLLECTION")
43
+ and hasattr(parameters.DATA_COLLECTION, "api_id")
44
+ ):
45
+ raise ValueError(f"Data collection is not defined properly.")
35
46
  if not time_interval:
36
47
  raise ValueError("Time interval must be specified.")
37
48
  else:
@@ -46,26 +57,34 @@ def evaluate_sentinel_parameters(
46
57
  raise ValueError(
47
58
  "Bounding box must be a list or tuple with four elements in format (lon_min, lat_min, lon_max, lat_max)."
48
59
  )
60
+ if parameters and parameters.MAX_CLOUD_COVERAGE:
61
+ if not isinstance(parameters.MAX_CLOUD_COVERAGE, (int, float)) or (
62
+ parameters.MAX_CLOUD_COVERAGE < 0 or parameters.MAX_CLOUD_COVERAGE > 100
63
+ ):
64
+ raise ValueError("Max cloud coverage must be a number between 0 and 100.")
49
65
 
50
66
 
51
67
  def imagery_from_tmp_to_dir(
52
68
  output_dir: str,
53
- tmp_dir: Optional[str] = "/tmp/sentinelhub",
69
+ bounding_box: List[Union[int, float]],
70
+ tmp_dir: Optional[str],
54
71
  name: Optional[str] = None,
55
72
  bulk: Optional[bool] = False,
73
+ output_format: Optional[str] = OUTPUT_FORMAT.TIFF,
56
74
  ) -> None:
57
75
  """
58
76
  Copy imagery from tmp to output dir
59
77
  """
60
- downloaded_files = glob(f"{tmp_dir}/**/response.tiff")
78
+ format = output_format
79
+ downloaded_files = glob(f"{tmp_dir}/**/response." + format)
80
+
61
81
  if len(downloaded_files) == 0:
62
82
  return
63
-
64
83
  makedirs(output_dir, exist_ok=True)
65
-
66
84
  for downloaded_file in downloaded_files:
67
- request_json = downloaded_file.replace("response.tiff", "request.json")
68
- metadata = generate_raster_metadata(downloaded_file, request_json)
85
+ request_json = downloaded_file.replace("response." + format, "request.json")
86
+ metadata = generate_raster_metadata(request_json, bounding_box)
87
+
69
88
  if name and not bulk:
70
89
  output_filename = name
71
90
  elif name and bulk:
@@ -75,19 +94,16 @@ def imagery_from_tmp_to_dir(
75
94
  output_filename = f"{metadata['type']}_{metadata['acquisition-date']}"
76
95
  else:
77
96
  output_filename = metadata["type"]
78
-
79
- copyfile(downloaded_file, f"{output_dir}/{output_filename}.tif")
97
+ copyfile(downloaded_file, f"{output_dir}/{output_filename}." + format)
80
98
  with open(f"{output_dir}/{output_filename}.json", "w", encoding="utf-8") as f:
81
99
  json.dump(metadata, f)
82
-
83
100
  rmtree(tmp_dir)
84
101
 
85
102
 
86
- def generate_raster_metadata(raster: str, request_json: str) -> None:
103
+ def generate_raster_metadata(request_json: str, bounding_box) -> None:
87
104
  """
88
105
  Generate metadata for raster
89
106
  """
90
- bbox = get_image_bbox(raster)
91
107
  with open(request_json, "r", encoding="utf-8") as f:
92
108
  json_content = json.load(f)
93
109
 
@@ -102,8 +118,27 @@ def generate_raster_metadata(raster: str, request_json: str) -> None:
102
118
 
103
119
  metadata = {
104
120
  "acquisition-date": acquisition_date,
105
- "bounding-box": bbox,
121
+ "bounding-box": bounding_box,
106
122
  "type": sensor_type,
107
123
  }
108
124
 
109
125
  return metadata
126
+
127
+
128
+ def filter_times(
129
+ timestamps: Iterable[datetime], time_difference: timedelta
130
+ ) -> list[datetime]:
131
+ """
132
+ Filters out timestamps within time_difference, preserving only the oldest timestamp.
133
+ """
134
+ timestamps = sorted(set(timestamps))
135
+
136
+ filtered_timestamps: list[datetime] = []
137
+ for current_timestamp in timestamps:
138
+ if (
139
+ not filtered_timestamps
140
+ or current_timestamp - filtered_timestamps[-1] > time_difference
141
+ ):
142
+ filtered_timestamps.append(current_timestamp)
143
+
144
+ return filtered_timestamps
@@ -11,7 +11,7 @@ def create_stac_catalog(parquet_catalog_path, stac_catalog = None):
11
11
  items = []
12
12
  for item in tqdm(stac_geoparquet.arrow.stac_table_to_items(table), total=len(table)):
13
13
  item = pystac.Item.from_dict(item)
14
- item.validate()
14
+ # item.validate()
15
15
  # collection.add_item(item)
16
16
  if stac_catalog is not None:
17
17
  stac_catalog.add_item(item)
@@ -1,3 +1,3 @@
1
1
  from .retrieve import retrieve_datasets, retrieve_dataset, retrieve_dataset_files
2
- from .ingest import ingest_dataset
2
+ from .ingest import ingest_dataset, ingest_virtual_dataset
3
3
  from .stage import stage_dataset, stage_dataset_file
eotdl/datasets/ingest.py CHANGED
@@ -1,12 +1,11 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from ..repos import DatasetsAPIRepo
4
- from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest
4
+ from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest, ingest_virtual
5
5
 
6
6
  def retrieve_dataset(metadata, user):
7
7
  repo = DatasetsAPIRepo()
8
8
  data, error = repo.retrieve_dataset(metadata.name)
9
- # print(data, error)
10
9
  if data and data["uid"] != user["uid"]:
11
10
  raise Exception("Dataset already exists.")
12
11
  if error and error == "Dataset doesn't exist":
@@ -34,3 +33,11 @@ def ingest_dataset(
34
33
  return ingest(path, DatasetsAPIRepo(), retrieve_dataset, 'datasets')
35
34
 
36
35
 
36
+ def ingest_virtual_dataset( # could work for a list of paths with minimal changes...
37
+ path,
38
+ links,
39
+ metadata = None,
40
+ logger=print,
41
+ user=None,
42
+ ):
43
+ return ingest_virtual(path, links, DatasetsAPIRepo(), retrieve_dataset, 'datasets', metadata, logger)
eotdl/datasets/stage.py CHANGED
@@ -6,6 +6,7 @@ import geopandas as gpd
6
6
  from ..auth import with_auth
7
7
  from .retrieve import retrieve_dataset
8
8
  from ..repos import FilesAPIRepo
9
+ from ..files.metadata import Metadata
9
10
 
10
11
  @with_auth
11
12
  def stage_dataset(
@@ -42,19 +43,18 @@ def stage_dataset(
42
43
  raise Exception(
43
44
  f"Dataset `{dataset['name']}` already exists at {download_path}. To force download, use force=True or -f in the CLI."
44
45
  )
45
-
46
46
  # stage metadata
47
47
  repo = FilesAPIRepo()
48
48
  catalog_path = repo.stage_file(dataset["id"], f"catalog.v{version}.parquet", user, download_path)
49
-
50
- # TODO: stage README.md
51
-
49
+ # stage README.md
50
+ metadata = Metadata(**dataset['metadata'], name=dataset['name'])
51
+ metadata.save_metadata(download_path)
52
+ # stage assets
52
53
  if assets:
53
54
  gdf = gpd.read_parquet(catalog_path)
54
55
  for _, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Staging assets"):
55
56
  for k, v in row["assets"].items():
56
57
  stage_dataset_file(v["href"], download_path)
57
-
58
58
  return download_path
59
59
 
60
60
 
eotdl/files/ingest.py CHANGED
@@ -56,6 +56,15 @@ def prep_ingest_stac(path, logger=None): # in theory should work with a remote c
56
56
  # iterate over items
57
57
  for item in tqdm(collection.get_items(), desc=f"Ingesting items from collection {collection.id}"):
58
58
  assert isinstance(item, pystac.Item)
59
+ # Process each asset in the item
60
+ for asset in item.assets.values():
61
+ if not asset.href.startswith(('http://', 'https://')):
62
+ # Asset is a local file
63
+ file_path = Path(asset.href)
64
+ # Calculate and add file size
65
+ asset.extra_fields['size'] = file_path.stat().st_size
66
+ # Calculate and add checksum
67
+ asset.extra_fields['checksum'] = calculate_checksum(str(file_path))
59
68
  items.append(item)
60
69
  # save parquet file
61
70
  record_batch_reader = stac_geoparquet.arrow.parse_stac_items_to_arrow(items)
@@ -63,13 +72,14 @@ def prep_ingest_stac(path, logger=None): # in theory should work with a remote c
63
72
  stac_geoparquet.arrow.to_parquet(record_batch_reader, output_path)
64
73
  return output_path
65
74
 
66
- @with_auth
67
- def ingest_virutal_dataset( # could work for a list of paths with minimal changes...
75
+ def ingest_virtual( # could work for a list of paths with minimal changes...
68
76
  path,
69
77
  links,
78
+ repo,
79
+ retrieve,
80
+ mode,
70
81
  metadata = None,
71
82
  logger=print,
72
- user=None,
73
83
  ):
74
84
  path = Path(path)
75
85
  if metadata is None:
@@ -88,7 +98,7 @@ def ingest_virutal_dataset( # could work for a list of paths with minimal change
88
98
  data.append(create_stac_item('README.md', str(path / "README.md")))
89
99
  gdf = gpd.GeoDataFrame(data, geometry='geometry')
90
100
  gdf.to_parquet(path / "catalog.parquet")
91
- return ingest(path)
101
+ return ingest(path, repo, retrieve, mode)
92
102
 
93
103
  @with_auth
94
104
  def ingest(path, repo, retrieve, mode, user):
@@ -104,8 +114,6 @@ def ingest(path, repo, retrieve, mode, user):
104
114
  # retrieve dataset (create if doesn't exist)
105
115
  dataset_or_model = retrieve(metadata, user)
106
116
  current_version = sorted([v['version_id'] for v in dataset_or_model["versions"]])[-1]
107
- print("current version: ", current_version)
108
-
109
117
  # TODO: update README if metadata changed in UI (db)
110
118
  # update_metadata = True
111
119
  # if "description" in dataset:
@@ -118,12 +126,10 @@ def ingest(path, repo, retrieve, mode, user):
118
126
  # return ingest_files(
119
127
  # repo, dataset["id"], folder, verbose, logger, user, endpoint="datasets"
120
128
  # )
121
-
122
129
  catalog_path = path.joinpath("catalog.parquet")
123
130
  gdf = gpd.read_parquet(catalog_path)
124
131
  files_repo = FilesAPIRepo()
125
- catalog_url = files_repo.generate_presigned_url(f'catalog.v{current_version}.parquet', dataset_or_model['id'], user)
126
-
132
+ catalog_url = files_repo.generate_presigned_url(f'catalog.v{current_version}.parquet', dataset_or_model['id'], user, endpoint=mode)
127
133
  # first time ingesting
128
134
  if catalog_url is None:
129
135
  total_size = 0
@@ -149,7 +155,7 @@ def ingest(path, repo, retrieve, mode, user):
149
155
  print(f"Error uploading asset {row[0]}: {e}")
150
156
  break
151
157
  gdf.to_parquet(catalog_path)
152
- files_repo.ingest_file(str(catalog_path), f'catalog.v{current_version}.parquet', dataset_or_model['id'], user, "datasets")
158
+ files_repo.ingest_file(str(catalog_path), f'catalog.v{current_version}.parquet', dataset_or_model['id'], user, mode)
153
159
  data, error = repo.complete_ingestion(dataset_or_model['id'], current_version, total_size, user)
154
160
  if error:
155
161
  raise Exception(error)
@@ -174,7 +180,7 @@ def ingest(path, repo, retrieve, mode, user):
174
180
  if len(df) > 0: # file exists in previous versions
175
181
  if df.iloc[0]['assets'][k]["checksum"] == v["checksum"]: # file is the same
176
182
  # still need to update the required fields
177
- file_url = f"{repo.url}datasets/{dataset_or_model['id']}/stage/{item_id}"
183
+ file_url = f"{repo.url}{mode}/{dataset_or_model['id']}/stage/{item_id}"
178
184
  gdf.loc[row[0], "assets"][k]["href"] = file_url
179
185
  total_size += v["size"]
180
186
  continue
@@ -0,0 +1,101 @@
1
+ import os
2
+ from pathlib import Path
3
+ from tqdm import tqdm
4
+
5
+ from ..auth import with_auth
6
+ from .retrieve import retrieve_model, retrieve_model_files
7
+ from ..shared import calculate_checksum
8
+ from ..repos import FilesAPIRepo, ModelsAPIRepo
9
+ from .metadata import generate_metadata
10
+ from ..curation.stac import STACDataFrame
11
+
12
+
13
+ @with_auth
14
+ def download_model(
15
+ model_name,
16
+ version=None,
17
+ path=None,
18
+ logger=None,
19
+ assets=False,
20
+ force=False,
21
+ verbose=False,
22
+ user=None,
23
+ file=None,
24
+ ):
25
+ model = retrieve_model(model_name)
26
+ if version is None:
27
+ version = sorted(model["versions"], key=lambda v: v["version_id"])[-1][
28
+ "version_id"
29
+ ]
30
+ else:
31
+ assert version in [
32
+ v["version_id"] for v in model["versions"]
33
+ ], f"Version {version} not found"
34
+ download_base_path = os.getenv(
35
+ "EOTDL_DOWNLOAD_PATH", str(Path.home()) + "/.cache/eotdl/models"
36
+ )
37
+ if path is None:
38
+ download_path = download_base_path + "/" + model_name + "/v" + str(version)
39
+ else:
40
+ download_path = path + "/" + model_name + "/v" + str(version)
41
+ # check if model already exists
42
+ if os.path.exists(download_path) and not force:
43
+ os.makedirs(download_path, exist_ok=True)
44
+ raise Exception(
45
+ f"model `{model['name']} v{str(version)}` already exists at {download_path}. To force download, use force=True or -f in the CLI."
46
+ )
47
+ if model["quality"] == 0:
48
+ if file:
49
+ raise NotImplementedError("Downloading a specific file is not implemented")
50
+ model_files = retrieve_model_files(model["id"], version)
51
+ repo = FilesAPIRepo()
52
+ for file in tqdm(model_files, disable=verbose, unit="file"):
53
+ filename, file_version = file["filename"], file["version"]
54
+ if verbose:
55
+ logger(f"Downloading {file['filename']}...")
56
+ dst_path = repo.download_file(
57
+ model["id"],
58
+ filename,
59
+ user,
60
+ download_path,
61
+ file_version,
62
+ endpoint="models",
63
+ )
64
+ if verbose:
65
+ logger("Generating README.md ...")
66
+ generate_metadata(download_path, model)
67
+ else:
68
+ if verbose:
69
+ logger("Downloading STAC metadata...")
70
+ repo = ModelsAPIRepo()
71
+ gdf, error = repo.download_stac(
72
+ model["id"],
73
+ user,
74
+ )
75
+ if error:
76
+ raise Exception(error)
77
+ # print(gdf)
78
+ df = STACDataFrame(gdf)
79
+ # df.geometry = df.geometry.apply(lambda x: Polygon() if x is None else x)
80
+ df.to_stac(download_path)
81
+ # print("----")
82
+ # print(df)
83
+ # download assets
84
+ if assets:
85
+ if verbose:
86
+ logger("Downloading assets...")
87
+ repo = FilesAPIRepo()
88
+ df = df.dropna(subset=["assets"])
89
+ for row in tqdm(df.iterrows(), total=len(df)):
90
+ for k, v in row[1]["assets"].items():
91
+ href = v["href"]
92
+ _, filename = href.split("/download/")
93
+ # will overwrite assets with same name :(
94
+ repo.download_file_url(
95
+ href, filename, f"{download_path}/assets", user
96
+ )
97
+ else:
98
+ logger("To download assets, set assets=True or -a in the CLI.")
99
+ if verbose:
100
+ logger("Done")
101
+ return download_path
eotdl/models/ingest.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from ..repos import ModelsAPIRepo
4
- from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest
4
+ from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest, ingest_virtual
5
5
 
6
6
  def retrieve_model(metadata, user):
7
7
  repo = ModelsAPIRepo()
@@ -12,7 +12,6 @@ def retrieve_model(metadata, user):
12
12
  if error and error == "Model doesn't exist":
13
13
  # create model
14
14
  data, error = repo.create_model(metadata.dict(), user)
15
- # print(data, error)
16
15
  if error:
17
16
  raise Exception(error)
18
17
  return data
@@ -31,4 +30,13 @@ def ingest_model(
31
30
  prep_ingest_stac(path, logger)
32
31
  else:
33
32
  prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
34
- return ingest(path, ModelsAPIRepo(), retrieve_model, 'models')
33
+ return ingest(path, ModelsAPIRepo(), retrieve_model, 'models')
34
+
35
+ def ingest_virtual_model( # could work for a list of paths with minimal changes...
36
+ path,
37
+ links,
38
+ metadata = None,
39
+ logger=print,
40
+ user=None,
41
+ ):
42
+ return ingest_virtual(path, links, ModelsAPIRepo(), retrieve_model, 'models', metadata, logger)
eotdl/models/stage.py CHANGED
@@ -6,6 +6,7 @@ import geopandas as gpd
6
6
  from ..auth import with_auth
7
7
  from .retrieve import retrieve_model
8
8
  from ..repos import FilesAPIRepo
9
+ from ..files.metadata import Metadata
9
10
 
10
11
  @with_auth
11
12
  def stage_model(
@@ -44,7 +45,9 @@ def stage_model(
44
45
  repo = FilesAPIRepo()
45
46
  catalog_path = repo.stage_file(model["id"], f"catalog.v{version}.parquet", user, download_path)
46
47
 
47
- # TODO: stage README.md
48
+ # stage README.md
49
+ metadata = Metadata(**model['metadata'], name=model['name'])
50
+ metadata.save_metadata(download_path)
48
51
 
49
52
  if assets:
50
53
  gdf = gpd.read_parquet(catalog_path)