OceanDataStore 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- OceanDataStore/__init__.py +21 -0
- OceanDataStore/catalog/__init__.py +12 -0
- OceanDataStore/catalog/oceandatacatalog.py +1242 -0
- OceanDataStore/catalog/stac/README.md +34 -0
- OceanDataStore/catalog/stac/__init__.py +30 -0
- OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
- OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
- OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
- OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
- OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
- OceanDataStore/catalog/stac/template_collection.py +85 -0
- OceanDataStore/catalog/stac/utils.py +476 -0
- OceanDataStore/cli/__init__.py +34 -0
- OceanDataStore/cli/arg_parser.py +182 -0
- OceanDataStore/cli/cli.py +203 -0
- OceanDataStore/cli/exceptions.py +83 -0
- OceanDataStore/cli/icechunk.py +888 -0
- OceanDataStore/cli/logging.py +52 -0
- OceanDataStore/cli/object_store.py +293 -0
- OceanDataStore/cli/utils.py +275 -0
- OceanDataStore/cli/zarr.py +870 -0
- OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
- OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
- OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
- OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
- OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
- OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
- OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
- OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
- OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
- OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
- OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
- OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
- OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
- OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
- OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
- OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
- OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
- OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
- OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
- OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
- OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
- OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
- OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
- OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
- OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
- OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
- OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
- OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
- OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
- OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
- OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
- OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
- OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
- OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
- OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
- OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
- OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
- OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
- OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
- OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
- OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
- OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
- OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
- OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
- OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
- OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
- OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
- OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
- OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
- OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
- OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
- OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
- OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
- OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
- OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
- OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
- OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
- OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
- OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
- OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
- OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
- OceanDataStore/data/utils.py +506 -0
- OceanDataStore/zarr.py +993 -0
- oceandatastore-0.3.0.dist-info/METADATA +184 -0
- oceandatastore-0.3.0.dist-info/RECORD +104 -0
- oceandatastore-0.3.0.dist-info/WHEEL +5 -0
- oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
- oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
- oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
- oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
- oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1242 @@
|
|
|
1
|
+
"""
|
|
2
|
+
oceandatacatalog.py
|
|
3
|
+
|
|
4
|
+
Description:
|
|
5
|
+
This module defines the OceanDataCatalog() class which is a
|
|
6
|
+
container for the NOC STAC and a basic API for accessing data
|
|
7
|
+
using pystac, Zarr and Icechunk.
|
|
8
|
+
|
|
9
|
+
Authors:
|
|
10
|
+
- Ollie Tooth
|
|
11
|
+
"""
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
import icechunk
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pystac
|
|
17
|
+
import xarray as xr
|
|
18
|
+
|
|
19
|
+
# -- NOC brand CSS -- #
|
|
20
|
+
_NOC_CSS = """
|
|
21
|
+
<style>
|
|
22
|
+
.ods-card {
|
|
23
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
|
24
|
+
font-size: 13px;
|
|
25
|
+
border: 1px solid #0087c1;
|
|
26
|
+
border-radius: 6px;
|
|
27
|
+
overflow: hidden;
|
|
28
|
+
max-width: 950px;
|
|
29
|
+
margin: 6px 0;
|
|
30
|
+
box-shadow: 0 1px 4px rgba(0,63,112,0.12);
|
|
31
|
+
}
|
|
32
|
+
.ods-header {
|
|
33
|
+
background: #003f70;
|
|
34
|
+
color: #ffffff;
|
|
35
|
+
padding: 8px 14px;
|
|
36
|
+
display: flex;
|
|
37
|
+
align-items: center;
|
|
38
|
+
gap: 10px;
|
|
39
|
+
font-size: 14px;
|
|
40
|
+
font-weight: 600;
|
|
41
|
+
letter-spacing: 0.3px;
|
|
42
|
+
}
|
|
43
|
+
.ods-badge {
|
|
44
|
+
background: #0087c1;
|
|
45
|
+
color: #ffffff;
|
|
46
|
+
border-radius: 12px;
|
|
47
|
+
padding: 1px 9px;
|
|
48
|
+
font-size: 11px;
|
|
49
|
+
font-weight: 500;
|
|
50
|
+
white-space: nowrap;
|
|
51
|
+
}
|
|
52
|
+
.ods-badge-neutral {
|
|
53
|
+
background: #5a9cbf;
|
|
54
|
+
color: #ffffff;
|
|
55
|
+
border-radius: 12px;
|
|
56
|
+
padding: 1px 9px;
|
|
57
|
+
font-size: 11px;
|
|
58
|
+
font-weight: 500;
|
|
59
|
+
white-space: nowrap;
|
|
60
|
+
}
|
|
61
|
+
.ods-body {
|
|
62
|
+
background: #eef6fb;
|
|
63
|
+
padding: 10px 14px;
|
|
64
|
+
}
|
|
65
|
+
.ods-stats {
|
|
66
|
+
display: flex;
|
|
67
|
+
flex-wrap: wrap;
|
|
68
|
+
gap: 8px;
|
|
69
|
+
margin-bottom: 8px;
|
|
70
|
+
}
|
|
71
|
+
.ods-stat {
|
|
72
|
+
background: #ffffff;
|
|
73
|
+
border: 1px solid #b3d7ea;
|
|
74
|
+
border-radius: 5px;
|
|
75
|
+
padding: 4px 10px;
|
|
76
|
+
font-size: 12px;
|
|
77
|
+
color: #003f70;
|
|
78
|
+
}
|
|
79
|
+
.ods-stat span {
|
|
80
|
+
font-weight: 600;
|
|
81
|
+
}
|
|
82
|
+
.ods-url {
|
|
83
|
+
font-size: 12px;
|
|
84
|
+
font-weight: 500;
|
|
85
|
+
color: #555;
|
|
86
|
+
word-break: break-all;
|
|
87
|
+
}
|
|
88
|
+
.ods-url a { color: #0087c1; text-decoration: none; }
|
|
89
|
+
.ods-url a:hover { text-decoration: underline; }
|
|
90
|
+
.ods-table {
|
|
91
|
+
width: 100%;
|
|
92
|
+
border-collapse: collapse;
|
|
93
|
+
font-size: 12px;
|
|
94
|
+
margin-top: 2px;
|
|
95
|
+
}
|
|
96
|
+
.ods-table thead tr {
|
|
97
|
+
background: #003f70;
|
|
98
|
+
color: #ffffff;
|
|
99
|
+
}
|
|
100
|
+
.ods-table thead th {
|
|
101
|
+
padding: 6px 10px;
|
|
102
|
+
text-align: left;
|
|
103
|
+
font-weight: 600;
|
|
104
|
+
white-space: nowrap;
|
|
105
|
+
}
|
|
106
|
+
.ods-table tbody tr:nth-child(even) { background: #d6ecf5; }
|
|
107
|
+
.ods-table tbody tr:nth-child(odd) { background: #ffffff; }
|
|
108
|
+
.ods-table tbody tr:hover { background: #b3d7ea; }
|
|
109
|
+
.ods-table td {
|
|
110
|
+
padding: 5px 10px;
|
|
111
|
+
vertical-align: top;
|
|
112
|
+
text-align: left;
|
|
113
|
+
border-bottom: 1px solid #cce4f0;
|
|
114
|
+
}
|
|
115
|
+
.ods-id {
|
|
116
|
+
font-family: monospace;
|
|
117
|
+
font-size: 11px;
|
|
118
|
+
color: #003f70;
|
|
119
|
+
white-space: nowrap;
|
|
120
|
+
}
|
|
121
|
+
details.ods-details > summary {
|
|
122
|
+
cursor: pointer;
|
|
123
|
+
color: #0087c1;
|
|
124
|
+
font-size: 11px;
|
|
125
|
+
list-style: none;
|
|
126
|
+
user-select: none;
|
|
127
|
+
}
|
|
128
|
+
details.ods-details > summary::-webkit-details-marker { display: none; }
|
|
129
|
+
details.ods-details > summary::before { content: "▶ "; font-size: 9px; }
|
|
130
|
+
details.ods-details[open] > summary::before { content: "▼ "; font-size: 9px; }
|
|
131
|
+
details.ods-details .ods-detail-body {
|
|
132
|
+
margin-top: 4px;
|
|
133
|
+
color: #333;
|
|
134
|
+
font-size: 11px;
|
|
135
|
+
line-height: 1.5;
|
|
136
|
+
}
|
|
137
|
+
.ods-section-title {
|
|
138
|
+
font-weight: 600;
|
|
139
|
+
color: #003f70;
|
|
140
|
+
margin-bottom: 6px;
|
|
141
|
+
font-size: 12px;
|
|
142
|
+
}
|
|
143
|
+
.ods-code {
|
|
144
|
+
background: #ffffff;
|
|
145
|
+
color: #003f70;
|
|
146
|
+
font-family: monospace;
|
|
147
|
+
font-size: 12px;
|
|
148
|
+
padding: 8px 12px;
|
|
149
|
+
border-radius: 4px;
|
|
150
|
+
border: 1px solid #cce4f0;
|
|
151
|
+
display: flex;
|
|
152
|
+
align-items: center;
|
|
153
|
+
justify-content: space-between;
|
|
154
|
+
gap: 8px;
|
|
155
|
+
margin-top: 4px;
|
|
156
|
+
}
|
|
157
|
+
.ods-copy-btn {
|
|
158
|
+
background: #0087c1;
|
|
159
|
+
color: #ffffff;
|
|
160
|
+
border: none;
|
|
161
|
+
border-radius: 4px;
|
|
162
|
+
padding: 3px 8px;
|
|
163
|
+
font-size: 11px;
|
|
164
|
+
cursor: pointer;
|
|
165
|
+
white-space: nowrap;
|
|
166
|
+
flex-shrink: 0;
|
|
167
|
+
}
|
|
168
|
+
.ods-copy-btn:hover { background: #006fa0; }
|
|
169
|
+
.ods-none { color: #999; font-style: italic; }
|
|
170
|
+
</style>
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
# -- Utility Functions -- #
|
|
174
|
+
def apply_bbox(ds: xr.Dataset,
|
|
175
|
+
bbox: tuple
|
|
176
|
+
) -> xr.Dataset:
|
|
177
|
+
"""
|
|
178
|
+
Apply a geographical bounding box to subset an xarray Dataset.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
ds : xr.Dataset
|
|
183
|
+
Input xarray Dataset.
|
|
184
|
+
bbox : tuple
|
|
185
|
+
Geographical bounding box in the format (min_lon, max_lon, min_lat, max_lat).
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
xr.Dataset
|
|
190
|
+
Geographically subsetted xarray Dataset.
|
|
191
|
+
"""
|
|
192
|
+
# -- Validate Inputs -- #
|
|
193
|
+
if not isinstance(ds, xr.Dataset):
|
|
194
|
+
raise ValueError("'ds' must be an xarray Dataset.")
|
|
195
|
+
if not (isinstance(bbox, tuple) and len(bbox) == 4):
|
|
196
|
+
raise ValueError("'bbox' must be a tuple of the form (min_lon, max_lon, min_lat, max_lat).")
|
|
197
|
+
|
|
198
|
+
# -- Identify geographical coordinate names & dimensions -- #
|
|
199
|
+
# Default lat/lon coord names:
|
|
200
|
+
lon_name, lat_name = "nav_lon", "nav_lat"
|
|
201
|
+
# Update lat/lon coord names via standard_name attributes:
|
|
202
|
+
for coord in ds.coords:
|
|
203
|
+
if ds[coord].attrs.get('standard_name', '').lower() == 'longitude':
|
|
204
|
+
lon_name = coord
|
|
205
|
+
if ds[coord].attrs.get('standard_name', '').lower() == 'latitude':
|
|
206
|
+
lat_name = coord
|
|
207
|
+
|
|
208
|
+
# -- Apply Bounding Box -- #
|
|
209
|
+
if (ds[lon_name].ndim > 1) and (ds[lat_name].ndim > 1):
|
|
210
|
+
# -- Case 1: 2D lat/lon coordinates -- #
|
|
211
|
+
# Identify lat/lon coordinate dimensions:
|
|
212
|
+
if ds[lon_name].dims != ds[lat_name].dims:
|
|
213
|
+
raise ValueError("Longitude and latitude coordinates must have the same dimensions.")
|
|
214
|
+
else:
|
|
215
|
+
y_name, x_name = ds[lon_name].dims
|
|
216
|
+
|
|
217
|
+
# Define bbox mask:
|
|
218
|
+
mask = (
|
|
219
|
+
(ds[lon_name] >= bbox[0])
|
|
220
|
+
& (ds[lon_name] <= bbox[2])
|
|
221
|
+
& (ds[lat_name] >= bbox[1])
|
|
222
|
+
& (ds[lat_name] <= bbox[3])
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Find rows/columns containing at least one valid grid point:
|
|
226
|
+
rows = mask.any(dim=x_name)
|
|
227
|
+
cols = mask.any(dim=y_name)
|
|
228
|
+
y_idx = np.where(rows.compute())[0]
|
|
229
|
+
x_idx = np.where(cols.compute())[0]
|
|
230
|
+
|
|
231
|
+
if len(y_idx) == 0 or len(x_idx) == 0:
|
|
232
|
+
raise ValueError("No grid points found inside bbox")
|
|
233
|
+
|
|
234
|
+
# Subset dataset to bounding box:
|
|
235
|
+
ds_subset = (ds
|
|
236
|
+
.where(mask, drop=False)
|
|
237
|
+
.isel({y_name: slice(y_idx.min(), y_idx.max() + 1),
|
|
238
|
+
x_name: slice(x_idx.min(), x_idx.max() + 1),
|
|
239
|
+
})
|
|
240
|
+
)
|
|
241
|
+
else:
|
|
242
|
+
# -- Case 2: 1D lat/lon coordinates -- #
|
|
243
|
+
ds_subset = ds.sel({lon_name: slice(bbox[0], bbox[1]),
|
|
244
|
+
lat_name: slice(bbox[2], bbox[3])
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
return ds_subset
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def apply_time_bounds(ds: xr.Dataset,
|
|
251
|
+
start_datetime: str | None = None,
|
|
252
|
+
end_datetime: str | None = None
|
|
253
|
+
) -> xr.Dataset:
|
|
254
|
+
"""
|
|
255
|
+
Apply temporal subsetting to an xarray Dataset.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
ds : xr.Dataset
|
|
260
|
+
Input xarray Dataset.
|
|
261
|
+
start_datetime : str, optional
|
|
262
|
+
Start datetime in ISO format (e.g., 'YYYY-MM-DDTHH:MM:SS').
|
|
263
|
+
end_datetime : str, optional
|
|
264
|
+
End datetime in ISO format (e.g., 'YYYY-MM-DDTHH:MM:SS').
|
|
265
|
+
|
|
266
|
+
Returns
|
|
267
|
+
-------
|
|
268
|
+
xr.Dataset
|
|
269
|
+
Temporally subsetted xarray Dataset.
|
|
270
|
+
"""
|
|
271
|
+
# -- Validate Inputs -- #
|
|
272
|
+
if not isinstance(ds, xr.Dataset):
|
|
273
|
+
raise ValueError("'ds' must be an xarray Dataset.")
|
|
274
|
+
if start_datetime is not None:
|
|
275
|
+
if not isinstance(start_datetime, str):
|
|
276
|
+
raise ValueError("'start_datetime' must be a string in ISO format (e.g., 'YYYY-MM-DDTHH:MM:SS').")
|
|
277
|
+
if end_datetime is not None:
|
|
278
|
+
if not isinstance(end_datetime, str):
|
|
279
|
+
raise ValueError("'end_datetime' must be a string in ISO format (e.g., 'YYYY-MM-DDTHH:MM:SS').")
|
|
280
|
+
|
|
281
|
+
# -- Identify time dimension -- #
|
|
282
|
+
for coord in ds.dims:
|
|
283
|
+
if 'time' in coord.lower():
|
|
284
|
+
time_name = coord
|
|
285
|
+
break
|
|
286
|
+
|
|
287
|
+
# -- Apply temporal subsetting -- #
|
|
288
|
+
ds_subset = ds.sel({time_name: slice(start_datetime, end_datetime)})
|
|
289
|
+
|
|
290
|
+
return ds_subset
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
# -- Define CatalogSummary() class -- #
|
|
294
|
+
class CatalogSummary:
|
|
295
|
+
"""
|
|
296
|
+
Container for OceanDataCatalog summary.
|
|
297
|
+
|
|
298
|
+
Parameters
|
|
299
|
+
----------
|
|
300
|
+
num_collections : int
|
|
301
|
+
The number of collections in the catalog.
|
|
302
|
+
num_items : int
|
|
303
|
+
The number of items in the catalog.
|
|
304
|
+
other_info : dict
|
|
305
|
+
Any other relevant summary information about the catalog.
|
|
306
|
+
"""
|
|
307
|
+
def __init__(self,
|
|
308
|
+
display_text: str | None = None,
|
|
309
|
+
display_html: str | None = None,
|
|
310
|
+
):
|
|
311
|
+
self.display_text = display_text
|
|
312
|
+
self.display_html = display_html
|
|
313
|
+
|
|
314
|
+
def __repr__(self):
|
|
315
|
+
"""
|
|
316
|
+
Plain text representation of the CatalogSummary.
|
|
317
|
+
"""
|
|
318
|
+
return self.display_text
|
|
319
|
+
|
|
320
|
+
def _repr_html_(self):
|
|
321
|
+
"""
|
|
322
|
+
HTML representation of the CatalogSummary.
|
|
323
|
+
"""
|
|
324
|
+
return self.display_html
|
|
325
|
+
|
|
326
|
+
# -- Define OceanDataCatalog() class -- #
|
|
327
|
+
class OceanDataCatalog:
|
|
328
|
+
"""
|
|
329
|
+
A class to interact with the National Oceanography Centre (NOC)
|
|
330
|
+
Spatio-Temporal Access Catalogs (STAC).
|
|
331
|
+
|
|
332
|
+
The catalog provides metadata and access to oceanographic
|
|
333
|
+
datasets stored in cloud object storage. Users can search the
|
|
334
|
+
catalog, inspect available Items, and open datasets as familiar
|
|
335
|
+
xarray data structures.
|
|
336
|
+
|
|
337
|
+
Parameters
|
|
338
|
+
----------
|
|
339
|
+
catalog_name : str, optional
|
|
340
|
+
Name of the NOC STAC catalog to use.
|
|
341
|
+
catalog_url : str, optional
|
|
342
|
+
Path or URL to the root STAC catalog. If not provided,
|
|
343
|
+
a default path to the NOC STAC catalog is used.
|
|
344
|
+
|
|
345
|
+
Attributes
|
|
346
|
+
----------
|
|
347
|
+
catalog : pystac.Catalog
|
|
348
|
+
The root NOC STAC catalog.
|
|
349
|
+
collection : pystac.Collection or None
|
|
350
|
+
The current STAC Collection being viewed.
|
|
351
|
+
items : list of pystac.Item
|
|
352
|
+
The list of STAC Items returned from the most recent query.
|
|
353
|
+
"""
|
|
354
|
+
def __init__(self,
|
|
355
|
+
catalog_name: str = "noc-stac",
|
|
356
|
+
catalog_url: str = None
|
|
357
|
+
):
|
|
358
|
+
# Define the URL to the NOC STAC root catalog:
|
|
359
|
+
self._stac_url = catalog_url or f"https://noc-msm-o.s3-ext.jc.rl.ac.uk/oceandatastore/{catalog_name}/catalog.json"
|
|
360
|
+
# Store the root catalog as a class attribute:
|
|
361
|
+
self.Catalog = pystac.read_file(self._stac_url)
|
|
362
|
+
|
|
363
|
+
# Define the Collection and Items attributes:
|
|
364
|
+
self.Collection = None
|
|
365
|
+
self.Items = None
|
|
366
|
+
# Cache the catalog name for display:
|
|
367
|
+
self._catalog_name = catalog_name
|
|
368
|
+
|
|
369
|
+
def __repr__(self) -> str:
|
|
370
|
+
"""
|
|
371
|
+
Plain text representation of the OceanDataCatalog.
|
|
372
|
+
"""
|
|
373
|
+
n_collections = len(self.available_collections)
|
|
374
|
+
col_name = self.Collection.id if self.Collection else "—"
|
|
375
|
+
n_items = len(self.Items) if self.Items is not None else "—"
|
|
376
|
+
return (
|
|
377
|
+
f"OceanDataCatalog\n"
|
|
378
|
+
f" Catalog: {self._catalog_name}\n"
|
|
379
|
+
f" URL: {self._stac_url}\n"
|
|
380
|
+
f" Collections: {n_collections} available\n"
|
|
381
|
+
f" Collection: {col_name}\n"
|
|
382
|
+
f" Search: {n_items} items"
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _repr_html_(self) -> str:
|
|
387
|
+
"""
|
|
388
|
+
HTML representation of the OceanDataCatalog.
|
|
389
|
+
"""
|
|
390
|
+
n_collections = len(self.available_collections)
|
|
391
|
+
col_name = self.Collection.id if self.Collection else "<span class='ods-none'>none selected</span>"
|
|
392
|
+
n_items = (
|
|
393
|
+
f"{len(self.Items)} items"
|
|
394
|
+
if self.Items is not None
|
|
395
|
+
else "<span class='ods-none'>no search yet</span>"
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
def _extent_dates(col):
|
|
399
|
+
try:
|
|
400
|
+
ext = col.extent.temporal.intervals
|
|
401
|
+
start = ext[0][0].strftime("%Y-%m-%d") if ext[0][0] else "—"
|
|
402
|
+
end = ext[0][1].strftime("%Y-%m-%d") if ext[0][1] else "present"
|
|
403
|
+
except Exception:
|
|
404
|
+
start, end = "—", "—"
|
|
405
|
+
return start, end
|
|
406
|
+
|
|
407
|
+
rows_html = ""
|
|
408
|
+
for col in list(self.Catalog.get_all_collections()):
|
|
409
|
+
start, end = _extent_dates(col)
|
|
410
|
+
desc = col.description or ""
|
|
411
|
+
desc_cell = (
|
|
412
|
+
f"<details class='ods-details'>"
|
|
413
|
+
f"<summary>Summary</summary>"
|
|
414
|
+
f"<div class='ods-detail-body'>{desc.replace('**', '')}</div>"
|
|
415
|
+
f"</details>"
|
|
416
|
+
if desc else "<span class='ods-none'>—</span>"
|
|
417
|
+
)
|
|
418
|
+
active = " <span class='ods-badge' style='font-size:10px'>active</span>" if (
|
|
419
|
+
self.Collection and col.id == self.Collection.id
|
|
420
|
+
) else ""
|
|
421
|
+
col_title_cell = col.title if col.title else "<span class='ods-none'>—</span>"
|
|
422
|
+
rows_html += (
|
|
423
|
+
f"<tr>"
|
|
424
|
+
f"<td><span class='ods-id'>{col.id}</span>{active}</td>"
|
|
425
|
+
f"<td>{col_title_cell}</td>"
|
|
426
|
+
f"<td>{desc_cell}</td>"
|
|
427
|
+
f"<td>{start}</td>"
|
|
428
|
+
f"<td>{end}</td>"
|
|
429
|
+
f"</tr>"
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
return (
|
|
433
|
+
f"{_NOC_CSS}"
|
|
434
|
+
f"<div class='ods-card'>"
|
|
435
|
+
f" <div class='ods-header'>"
|
|
436
|
+
f" OceanDataCatalog"
|
|
437
|
+
f" <span class='ods-badge'>{self._catalog_name}</span>"
|
|
438
|
+
f" </div>"
|
|
439
|
+
f" <div class='ods-body'>"
|
|
440
|
+
f" <div class='ods-stats'>"
|
|
441
|
+
f" <div class='ods-stat'>Version <span>{self.Catalog.extra_fields.get('catalog_version', 'None')}</span></div>"
|
|
442
|
+
f" <div class='ods-stat'>Collections <span>{n_collections}</span></div>"
|
|
443
|
+
f" <div class='ods-stat'>Active collection <span>{col_name}</span></div>"
|
|
444
|
+
f" <div class='ods-stat'>Last search <span>{n_items}</span></div>"
|
|
445
|
+
f" </div>"
|
|
446
|
+
f" <table class='ods-table'>"
|
|
447
|
+
f" <thead><tr>"
|
|
448
|
+
f" <th>Collection ID</th><th>Title</th><th>Description</th>"
|
|
449
|
+
f" <th>From</th><th>To</th>"
|
|
450
|
+
f" </tr></thead>"
|
|
451
|
+
f" <tbody>{rows_html}</tbody>"
|
|
452
|
+
f" </table>"
|
|
453
|
+
f" <div class='ods-section-title' style='margin-top:10px'>Source URL</div>"
|
|
454
|
+
f" <div class='ods-url'> <a href='{self._stac_url}' target='_blank'>{self._stac_url}</a></div>"
|
|
455
|
+
f" </div>"
|
|
456
|
+
f"</div>"
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
@property
|
|
461
|
+
def available_collections(self) -> list[str]:
|
|
462
|
+
"""
|
|
463
|
+
List available collection IDs in the NOC STAC catalog.
|
|
464
|
+
"""
|
|
465
|
+
return [col.id for col in self.Catalog.get_all_collections()]
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
@property
|
|
469
|
+
def available_items(self) -> list[str]:
|
|
470
|
+
"""
|
|
471
|
+
List available Item IDs in the current Collection or the root Catalog.
|
|
472
|
+
"""
|
|
473
|
+
if self.Items is not None:
|
|
474
|
+
# Return all Item IDs from the most recent search:
|
|
475
|
+
return [item.id for item in self.Items]
|
|
476
|
+
else:
|
|
477
|
+
# Return all Item IDs from the current Collection or root Catalog:
|
|
478
|
+
scope = self.Collection if self.Collection else self.Catalog
|
|
479
|
+
return list(item.id for item in scope.get_items(recursive=True))
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def summary(self) -> CatalogSummary:
|
|
483
|
+
"""
|
|
484
|
+
Summary of the most recent OceanDataCatalog search.
|
|
485
|
+
|
|
486
|
+
* In Jupyter / Marimo environments a styled HTML table is displayed.
|
|
487
|
+
* In plain Python / CLI environments a formatted text table is printed instead.
|
|
488
|
+
"""
|
|
489
|
+
# -- Validate STAC Items -- #
|
|
490
|
+
if not self.Items:
|
|
491
|
+
raise ValueError("No Items returned in most recent query. Use 'search()' to query Catalog.")
|
|
492
|
+
|
|
493
|
+
n = len(self.Items)
|
|
494
|
+
|
|
495
|
+
# ----- HTML Output ----- #
|
|
496
|
+
rows_html = ""
|
|
497
|
+
for item in self.Items:
|
|
498
|
+
title = item.properties.get("title", "")
|
|
499
|
+
platform = item.properties.get("platform", "<span class='ods-none'>—</span>")
|
|
500
|
+
start = item.properties.get("start_datetime", "<span class='ods-none'>—</span>")
|
|
501
|
+
end = item.properties.get("end_datetime", "<span class='ods-none'>—</span>")
|
|
502
|
+
variables = item.properties.get("variables", [])
|
|
503
|
+
if variables:
|
|
504
|
+
var_list = "<br>".join(variables)
|
|
505
|
+
vars_cell = (
|
|
506
|
+
f"<details class='ods-details'>"
|
|
507
|
+
f"<summary>{len(variables)} variable{'s' if len(variables) != 1 else ''}</summary>"
|
|
508
|
+
f"<div class='ods-detail-body'>{var_list}</div>"
|
|
509
|
+
f"</details>"
|
|
510
|
+
)
|
|
511
|
+
else:
|
|
512
|
+
vars_cell = "<span class='ods-none'>—</span>"
|
|
513
|
+
|
|
514
|
+
title_cell = title if title else "<span class='ods-none'>—</span>"
|
|
515
|
+
rows_html += (
|
|
516
|
+
f"<tr>"
|
|
517
|
+
f"<td><span class='ods-id'>{item.id}</span></td>"
|
|
518
|
+
f"<td>{title_cell}</td>"
|
|
519
|
+
f"<td>{platform}</td>"
|
|
520
|
+
f"<td>{start}</td>"
|
|
521
|
+
f"<td>{end}</td>"
|
|
522
|
+
f"<td>{vars_cell}</td>"
|
|
523
|
+
f"</tr>"
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
col_badge = (
|
|
527
|
+
f"<span class='ods-badge-neutral'>{self.Collection.id}</span>"
|
|
528
|
+
if self.Collection else ""
|
|
529
|
+
)
|
|
530
|
+
html = (
|
|
531
|
+
f"{_NOC_CSS}"
|
|
532
|
+
f"<div class='ods-card'>"
|
|
533
|
+
f" <div class='ods-header'>"
|
|
534
|
+
f" Search Results"
|
|
535
|
+
f" <span class='ods-badge'>{n} Item{'s' if n != 1 else ''} found</span>"
|
|
536
|
+
f" {col_badge}"
|
|
537
|
+
f" </div>"
|
|
538
|
+
f" <div class='ods-body'>"
|
|
539
|
+
f" <table class='ods-table'>"
|
|
540
|
+
f" <thead><tr>"
|
|
541
|
+
f" <th>Item ID</th><th>Title</th><th>Platform</th>"
|
|
542
|
+
f" <th>Start Date</th><th>End Date</th><th>Variables</th>"
|
|
543
|
+
f" </tr></thead>"
|
|
544
|
+
f" <tbody>{rows_html}</tbody>"
|
|
545
|
+
f" </table>"
|
|
546
|
+
f" </div>"
|
|
547
|
+
f"</div>"
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
# ----- Plain-Text Output ----- #
|
|
551
|
+
col_w = [46, 28, 10, 12, 12, 30]
|
|
552
|
+
headers = ["Item ID", "Title", "Platform", "Start Date", "End Date", "Variables"]
|
|
553
|
+
sep = "+" + "+".join("-" * (w + 2) for w in col_w) + "+"
|
|
554
|
+
header_row = "| " + " | ".join(h.ljust(col_w[i]) for i, h in enumerate(headers)) + " |"
|
|
555
|
+
text_lines = [f"Search Results — {n} Item{'s' if n != 1 else ''} found", sep, header_row, sep]
|
|
556
|
+
for item in self.Items:
|
|
557
|
+
variables = item.properties.get("variables", [])
|
|
558
|
+
row = [
|
|
559
|
+
item.id[:col_w[0]],
|
|
560
|
+
item.properties.get("title", "")[:col_w[1]],
|
|
561
|
+
item.properties.get("platform", "")[:col_w[2]],
|
|
562
|
+
item.properties.get("start_datetime", "")[:col_w[3]],
|
|
563
|
+
item.properties.get("end_datetime", "")[:col_w[4]],
|
|
564
|
+
(", ".join(variables))[:col_w[5]],
|
|
565
|
+
]
|
|
566
|
+
text_lines.append("| " + " | ".join(v.ljust(col_w[i]) for i, v in enumerate(row)) + " |")
|
|
567
|
+
text_lines.append(sep)
|
|
568
|
+
text = "\n".join(text_lines)
|
|
569
|
+
|
|
570
|
+
return CatalogSummary(display_text=text, display_html=html)
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def item_summary(self, id: str) -> CatalogSummary:
|
|
574
|
+
"""
|
|
575
|
+
Display the detailed summary for a single OceanDataStore Item.
|
|
576
|
+
|
|
577
|
+
Searches the current Items list first; if the Item is not found
|
|
578
|
+
there it is fetched directly from the Catalog URL.
|
|
579
|
+
|
|
580
|
+
* In Jupyter / Marimo environments a styled HTML card is displayed with collapsible
|
|
581
|
+
property and asset sections.
|
|
582
|
+
* In plain Python / CLI environments a formatted text summary is printed instead.
|
|
583
|
+
|
|
584
|
+
Parameters
|
|
585
|
+
----------
|
|
586
|
+
id : str
|
|
587
|
+
Item ID to display metadata for.
|
|
588
|
+
|
|
589
|
+
Raises
|
|
590
|
+
------
|
|
591
|
+
TypeError
|
|
592
|
+
If *id* is not a string.
|
|
593
|
+
ValueError
|
|
594
|
+
If the Item ID is not found in the Catalog.
|
|
595
|
+
"""
|
|
596
|
+
if not isinstance(id, str):
|
|
597
|
+
raise TypeError("'id' must be a string.")
|
|
598
|
+
|
|
599
|
+
# Collect STAC Item properties metadata:
|
|
600
|
+
item = None
|
|
601
|
+
if self.Items:
|
|
602
|
+
for it in self.Items:
|
|
603
|
+
if it.id == id:
|
|
604
|
+
item = it
|
|
605
|
+
break
|
|
606
|
+
if item is None:
|
|
607
|
+
try:
|
|
608
|
+
item = self._open_item(id=id)
|
|
609
|
+
except Exception:
|
|
610
|
+
raise ValueError(f"Item '{id}' not found in Catalog.")
|
|
611
|
+
|
|
612
|
+
props = item.properties
|
|
613
|
+
title = props.get("title", "")
|
|
614
|
+
desc_raw = props.get("description", "")
|
|
615
|
+
desc = desc_raw.split("OceanDataCatalog Access")[0].strip() if desc_raw else ""
|
|
616
|
+
platform = props.get("platform", "")
|
|
617
|
+
start = props.get("start_datetime", "")
|
|
618
|
+
end = props.get("end_datetime", "")
|
|
619
|
+
bbox = item.bbox
|
|
620
|
+
bbox_str = (
|
|
621
|
+
f"{bbox[0]:.2f}, {bbox[1]:.2f}, {bbox[2]:.2f}, {bbox[3]:.2f}"
|
|
622
|
+
if bbox else "—"
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
# ---- HTML Output (Jupyter) ---- #
|
|
626
|
+
coll_badge = f"<span class='ods-badge-neutral'>{item.collection_id}</span>" if item.collection_id else ""
|
|
627
|
+
|
|
628
|
+
core_stats = (
|
|
629
|
+
f"<div class='ods-stats'>"
|
|
630
|
+
f" <div class='ods-stat'>Platform <span>{platform or '—'}</span></div>"
|
|
631
|
+
f" <div class='ods-stat'>Start <span>{start or '—'}</span></div>"
|
|
632
|
+
f" <div class='ods-stat'>End <span>{end or '—'}</span></div>"
|
|
633
|
+
f" <div class='ods-stat'>BBox <span>({bbox_str})</span></div>"
|
|
634
|
+
f"</div>"
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
none_span = "<span class='ods-none'>—</span>"
|
|
638
|
+
if title or desc:
|
|
639
|
+
title_val = title if title else none_span
|
|
640
|
+
desc_val = desc if desc else none_span
|
|
641
|
+
title_row = (
|
|
642
|
+
f"<table class='ods-table' style='margin-bottom:8px'>"
|
|
643
|
+
f" <thead><tr><th>Title</th><th>Description</th></tr></thead>"
|
|
644
|
+
f" <tbody><tr><td>{title_val}</td><td>{desc_val.replace('**', '')}</td></tr></tbody>"
|
|
645
|
+
f"</table>"
|
|
646
|
+
)
|
|
647
|
+
else:
|
|
648
|
+
title_row = ""
|
|
649
|
+
|
|
650
|
+
# Properties:
|
|
651
|
+
_shown = {"title", "description", "platform", "start_datetime", "end_datetime", "datetime"}
|
|
652
|
+
prop_rows = ""
|
|
653
|
+
for key, val in props.items():
|
|
654
|
+
if key in _shown:
|
|
655
|
+
continue
|
|
656
|
+
if isinstance(val, list):
|
|
657
|
+
items_html = "<br>".join(str(v) for v in val)
|
|
658
|
+
val_cell = (
|
|
659
|
+
f"<details class='ods-details'>"
|
|
660
|
+
f"<summary>{len(val)} item{'s' if len(val) != 1 else ''}</summary>"
|
|
661
|
+
f"<div class='ods-detail-body'>{items_html}</div>"
|
|
662
|
+
f"</details>"
|
|
663
|
+
)
|
|
664
|
+
elif isinstance(val, dict):
|
|
665
|
+
dict_html = "<br>".join(f"<b>{k}</b>: {v}" for k, v in val.items())
|
|
666
|
+
val_cell = (
|
|
667
|
+
f"<details class='ods-details'>"
|
|
668
|
+
f"<summary>{len(val)} field{'s' if len(val) != 1 else ''}</summary>"
|
|
669
|
+
f"<div class='ods-detail-body'>{dict_html}</div>"
|
|
670
|
+
f"</details>"
|
|
671
|
+
)
|
|
672
|
+
else:
|
|
673
|
+
val_cell = str(val) if val is not None else none_span
|
|
674
|
+
prop_rows += f"<tr><td class='ods-id'>{key}</td><td>{val_cell}</td></tr>"
|
|
675
|
+
|
|
676
|
+
props_section = ""
|
|
677
|
+
if prop_rows:
|
|
678
|
+
props_section = (
|
|
679
|
+
f"<div class='ods-section-title' style='margin-top:10px'>Properties</div>"
|
|
680
|
+
f"<table class='ods-table'>"
|
|
681
|
+
f" <thead><tr><th>Property</th><th>Value</th></tr></thead>"
|
|
682
|
+
f" <tbody>{prop_rows}</tbody>"
|
|
683
|
+
f"</table>"
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
asset_rows = ""
|
|
687
|
+
for asset_key, asset in item.assets.items():
|
|
688
|
+
af = asset.extra_fields
|
|
689
|
+
media_type = asset.media_type or ""
|
|
690
|
+
endpoint = af.get("endpoint_url", "")
|
|
691
|
+
bucket = af.get("bucket", "")
|
|
692
|
+
prefix = af.get("prefix", "")
|
|
693
|
+
asset_rows += (
|
|
694
|
+
f"<tr>"
|
|
695
|
+
f"<td class='ods-id'>{asset_key}</td>"
|
|
696
|
+
f"<td>{media_type}</td>"
|
|
697
|
+
f"<td>{endpoint}</td>"
|
|
698
|
+
f"<td>{bucket}</td>"
|
|
699
|
+
f"<td class='ods-id'>{prefix}</td>"
|
|
700
|
+
f"</tr>"
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
assets_section = ""
|
|
704
|
+
if asset_rows:
|
|
705
|
+
assets_section = (
|
|
706
|
+
f"<div class='ods-section-title' style='margin-top:10px'>Assets</div>"
|
|
707
|
+
f"<table class='ods-table'>"
|
|
708
|
+
f" <thead><tr>"
|
|
709
|
+
f" <th>Key</th><th>Media Type</th><th>Endpoint</th><th>Bucket</th><th>Prefix</th>"
|
|
710
|
+
f" </tr></thead>"
|
|
711
|
+
f" <tbody>{asset_rows}</tbody>"
|
|
712
|
+
f"</table>"
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
access_ds_str = f"catalog.open_dataset(id='{id}')"
|
|
716
|
+
access_repo_str = f"catalog.open_repo(id='{id}')"
|
|
717
|
+
_copy_js = (
|
|
718
|
+
"(function(b){"
|
|
719
|
+
"var t=document.createElement('textarea');"
|
|
720
|
+
"t.value=b.dataset.copy;"
|
|
721
|
+
"document.body.appendChild(t);"
|
|
722
|
+
"t.select();"
|
|
723
|
+
"document.execCommand('copy');"
|
|
724
|
+
"document.body.removeChild(t);"
|
|
725
|
+
"b.textContent='Copied!';"
|
|
726
|
+
"setTimeout(function(){b.textContent='Copy'},1500)"
|
|
727
|
+
"})(this)"
|
|
728
|
+
)
|
|
729
|
+
access_section = (
|
|
730
|
+
f"<div class='ods-section-title' style='margin-top:10px'>Access</div>"
|
|
731
|
+
f"<div class='ods-code'>"
|
|
732
|
+
f" <code>{access_ds_str}</code>"
|
|
733
|
+
f" <button class='ods-copy-btn' data-copy=\"{access_ds_str}\" onclick=\"{_copy_js}\">Copy</button>"
|
|
734
|
+
f"</div>"
|
|
735
|
+
f"<div class='ods-code'>"
|
|
736
|
+
f" <code>{access_repo_str}</code>"
|
|
737
|
+
f" <button class='ods-copy-btn' data-copy=\"{access_repo_str}\" onclick=\"{_copy_js}\">Copy</button>"
|
|
738
|
+
f"</div>"
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
html = (
|
|
742
|
+
f"{_NOC_CSS}"
|
|
743
|
+
f"<div class='ods-card'>"
|
|
744
|
+
f" <div class='ods-header'>"
|
|
745
|
+
f" {id}"
|
|
746
|
+
f" {coll_badge}"
|
|
747
|
+
f" </div>"
|
|
748
|
+
f" <div class='ods-body'>"
|
|
749
|
+
f" {core_stats}"
|
|
750
|
+
f" {title_row}"
|
|
751
|
+
f" {access_section}"
|
|
752
|
+
f" {props_section}"
|
|
753
|
+
f" {assets_section}"
|
|
754
|
+
f" </div>"
|
|
755
|
+
f"</div>"
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
# ---- Plain-Text Output ---- #
|
|
759
|
+
_shown_text = {"title", "description", "platform", "start_datetime", "end_datetime", "datetime"}
|
|
760
|
+
text_lines = [
|
|
761
|
+
f"Item: {id}",
|
|
762
|
+
f" Title: {title or '—'}",
|
|
763
|
+
f" Platform: {platform or '—'}",
|
|
764
|
+
f" Start: {start or '—'}",
|
|
765
|
+
f" End: {end or '—'}",
|
|
766
|
+
f" BBox: {bbox_str}",
|
|
767
|
+
"",
|
|
768
|
+
" Properties:",
|
|
769
|
+
]
|
|
770
|
+
for key, val in props.items():
|
|
771
|
+
if key in _shown_text:
|
|
772
|
+
continue
|
|
773
|
+
if isinstance(val, list):
|
|
774
|
+
preview = ", ".join(str(v) for v in val[:5])
|
|
775
|
+
suffix = ", ..." if len(val) > 5 else ""
|
|
776
|
+
text_lines.append(f" {key}: [{preview}{suffix}]")
|
|
777
|
+
else:
|
|
778
|
+
text_lines.append(f" {key}: {val}")
|
|
779
|
+
if item.assets:
|
|
780
|
+
text_lines.append("")
|
|
781
|
+
text_lines.append(" Assets:")
|
|
782
|
+
for asset_key, asset in item.assets.items():
|
|
783
|
+
af = asset.extra_fields
|
|
784
|
+
loc = f"{af.get('endpoint_url', '')}/{af.get('bucket', '')}/{af.get('prefix', '')}"
|
|
785
|
+
text_lines.append(f" {asset_key}: {asset.media_type or ''} — {loc}")
|
|
786
|
+
text_lines += ["", f" Access: {access_ds_str}"]
|
|
787
|
+
text = "\n".join(text_lines)
|
|
788
|
+
|
|
789
|
+
return CatalogSummary(display_text=text, display_html=html)
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
def _filter_items(self,
|
|
793
|
+
items: list[pystac.Item],
|
|
794
|
+
dataset_type: Optional[str] = None,
|
|
795
|
+
product_type: Optional[str] = None,
|
|
796
|
+
variable_name: Optional[str] = None,
|
|
797
|
+
standard_name: Optional[str] = None,
|
|
798
|
+
item_name: Optional[str] = None
|
|
799
|
+
):
|
|
800
|
+
"""
|
|
801
|
+
Filter Items based on specified dataset type, product type,
|
|
802
|
+
variable name, and standard name.
|
|
803
|
+
|
|
804
|
+
Parameters
|
|
805
|
+
----------
|
|
806
|
+
items : list[pystac.Item]
|
|
807
|
+
List of STAC Items to filter.
|
|
808
|
+
dataset_type : str, optional
|
|
809
|
+
Dataset type to filter Items by.
|
|
810
|
+
product_type : str, optional
|
|
811
|
+
Product type to filter Items by.
|
|
812
|
+
variable_name : str, optional
|
|
813
|
+
Variable name to filter Items by.
|
|
814
|
+
standard_name : str, optional
|
|
815
|
+
Standard variable name to filter Items by.
|
|
816
|
+
item_name : str, optional
|
|
817
|
+
Substring to filter Item IDs by.
|
|
818
|
+
"""
|
|
819
|
+
if dataset_type:
|
|
820
|
+
items = [item for item in items if dataset_type in str(item.properties.get('dataset_type', ''))]
|
|
821
|
+
if product_type:
|
|
822
|
+
items = [item for item in items if product_type in str(item.properties.get('product_type', ''))]
|
|
823
|
+
if variable_name:
|
|
824
|
+
items = [item for item in items if any(variable_name in var for var in item.properties.get('variables', []))]
|
|
825
|
+
if standard_name:
|
|
826
|
+
items = [item for item in items if any(standard_name in var for var in item.properties.get('variable_standard_names', []))]
|
|
827
|
+
if item_name:
|
|
828
|
+
items = [item for item in items if item_name in item.id]
|
|
829
|
+
|
|
830
|
+
return items
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
def clear(self) -> None:
|
|
834
|
+
"""
|
|
835
|
+
Clear the Active Collection and Items returned from
|
|
836
|
+
the latest OceanDataCatalog search.
|
|
837
|
+
"""
|
|
838
|
+
self.Collection = None
|
|
839
|
+
self.Items = None
|
|
840
|
+
|
|
841
|
+
|
|
842
|
+
def search(self,
|
|
843
|
+
collection: Optional[str] = None,
|
|
844
|
+
dataset_type: Optional[str] = None,
|
|
845
|
+
product_type: Optional[str] = None,
|
|
846
|
+
variable_name: Optional[str] = None,
|
|
847
|
+
standard_name: Optional[str] = None,
|
|
848
|
+
item_name: Optional[str] = None
|
|
849
|
+
) -> None:
|
|
850
|
+
"""
|
|
851
|
+
Search the OceanDataCatalog for Items matching the specified criteria.
|
|
852
|
+
|
|
853
|
+
When both dataset_type / product_type and variable / standard names are
|
|
854
|
+
provided, the search returns all Items which match both criteria.
|
|
855
|
+
|
|
856
|
+
Parameters
|
|
857
|
+
----------
|
|
858
|
+
collection : str, optional
|
|
859
|
+
Collection name to search for. Default is None,
|
|
860
|
+
which searches the entire root Catalog.
|
|
861
|
+
dataset_type : str, optional
|
|
862
|
+
Dataset type to search for (e.g., 'model', 'observation').
|
|
863
|
+
Default is None, which retrieves Items from all dataset types.
|
|
864
|
+
product_type : str, optional
|
|
865
|
+
Product type to search for (e.g., 'timeseries', 'climatology').
|
|
866
|
+
Default is None, which retrieves Items from all product types.
|
|
867
|
+
variable_name : str, optional
|
|
868
|
+
Variable name to search for. Default is None,
|
|
869
|
+
which retrieves all Items.
|
|
870
|
+
standard_name : str, optional
|
|
871
|
+
Standard variable name to search for. Default is None,
|
|
872
|
+
which retrieves all Items.
|
|
873
|
+
item_name : str, optional
|
|
874
|
+
Substring to filter Item IDs by. Default is None,
|
|
875
|
+
which retrieves all Items.
|
|
876
|
+
|
|
877
|
+
Raises
|
|
878
|
+
------
|
|
879
|
+
ValueError
|
|
880
|
+
If the specified collection is not found in the Catalog.
|
|
881
|
+
ValueError
|
|
882
|
+
If both variable_name and standard_name are specified.
|
|
883
|
+
TypeError
|
|
884
|
+
If any of the input parameters are of incorrect type.
|
|
885
|
+
"""
|
|
886
|
+
# -- Validate Inputs -- #
|
|
887
|
+
if not isinstance(collection, (type(None), str)):
|
|
888
|
+
raise TypeError("'collection' must be a string or None.")
|
|
889
|
+
if not isinstance(dataset_type, (type(None), str)):
|
|
890
|
+
raise TypeError("'dataset_type' must be a string or None.")
|
|
891
|
+
if not isinstance(product_type, (type(None), str)):
|
|
892
|
+
raise TypeError("'product_type' must be a string or None.")
|
|
893
|
+
if not isinstance(variable_name, (type(None), str)):
|
|
894
|
+
raise TypeError("'variable_name' must be a string or None.")
|
|
895
|
+
if not isinstance(standard_name, (type(None), str)):
|
|
896
|
+
raise TypeError("'standard_name' must be a string or None.")
|
|
897
|
+
if not isinstance(item_name, (type(None), str)):
|
|
898
|
+
raise TypeError("'item_name' must be a string or None.")
|
|
899
|
+
|
|
900
|
+
if collection:
|
|
901
|
+
collections = {col.id: col for col in self.Catalog.get_all_collections()}
|
|
902
|
+
if collection not in collections:
|
|
903
|
+
raise ValueError(f"Collection '{collection}' not found. Available: {list(collections)}")
|
|
904
|
+
self.Collection = self.Catalog.get_child(collection)
|
|
905
|
+
items = list(self.Collection.get_items(recursive=True))
|
|
906
|
+
else:
|
|
907
|
+
scope = self.Collection if self.Collection else self.Catalog
|
|
908
|
+
items = list(scope.get_items(recursive=True))
|
|
909
|
+
|
|
910
|
+
if (variable_name is not None) and (standard_name is not None):
|
|
911
|
+
raise ValueError("Only one of 'variable_name' or 'standard_name' can be specified.")
|
|
912
|
+
else:
|
|
913
|
+
self.Items = self._filter_items(items=items,
|
|
914
|
+
dataset_type=dataset_type,
|
|
915
|
+
product_type=product_type,
|
|
916
|
+
variable_name=variable_name,
|
|
917
|
+
standard_name=standard_name,
|
|
918
|
+
item_name=item_name
|
|
919
|
+
)
|
|
920
|
+
return self.summary()
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
def _open_item(
|
|
924
|
+
self,
|
|
925
|
+
id: str,
|
|
926
|
+
) -> pystac.Item:
|
|
927
|
+
"""
|
|
928
|
+
Open a STAC Item directly from the Item ID.
|
|
929
|
+
|
|
930
|
+
Parameters
|
|
931
|
+
----------
|
|
932
|
+
id : str
|
|
933
|
+
Item ID to open directly from URL.
|
|
934
|
+
|
|
935
|
+
Returns
|
|
936
|
+
-------
|
|
937
|
+
pystac.Item
|
|
938
|
+
STAC Item object.
|
|
939
|
+
"""
|
|
940
|
+
# Define components of Item ID path:
|
|
941
|
+
parts = id.split("/")
|
|
942
|
+
# Initialise node to root Catalog:
|
|
943
|
+
node = self.Catalog
|
|
944
|
+
|
|
945
|
+
# Iterate over ID components:
|
|
946
|
+
for _, part in enumerate(parts):
|
|
947
|
+
# Traverse Catalog to child node containing Item:
|
|
948
|
+
child = node.get_child(part)
|
|
949
|
+
if child is not None:
|
|
950
|
+
node = child
|
|
951
|
+
continue
|
|
952
|
+
else:
|
|
953
|
+
# Collect STAC Item from child node:
|
|
954
|
+
item = next(node.get_items(id), None)
|
|
955
|
+
|
|
956
|
+
return item
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
def _open_icechunk_repo(
|
|
960
|
+
self,
|
|
961
|
+
fields: dict,
|
|
962
|
+
) -> icechunk.Repository:
|
|
963
|
+
"""
|
|
964
|
+
Open STAC Item asset as an Icechunk Repository.
|
|
965
|
+
|
|
966
|
+
Parameters
|
|
967
|
+
----------
|
|
968
|
+
fields : dict
|
|
969
|
+
Dictionary of arguments defining Icechunk S3 storage instance.
|
|
970
|
+
|
|
971
|
+
Returns
|
|
972
|
+
-------
|
|
973
|
+
icechunk.Repository
|
|
974
|
+
Icechunk Repository object for the Item asset.
|
|
975
|
+
"""
|
|
976
|
+
# Define S3 storage configuration:
|
|
977
|
+
storage = icechunk.s3_storage(
|
|
978
|
+
bucket=fields['bucket'],
|
|
979
|
+
prefix=fields['prefix'],
|
|
980
|
+
region="us-east-1",
|
|
981
|
+
anonymous=fields['anonymous'],
|
|
982
|
+
endpoint_url=fields['endpoint_url'],
|
|
983
|
+
force_path_style=True
|
|
984
|
+
)
|
|
985
|
+
|
|
986
|
+
# Open Icechunk Repository from S3 storage:
|
|
987
|
+
repo = icechunk.Repository.open(storage=storage)
|
|
988
|
+
return repo
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
def _open_icechunk_store(
|
|
992
|
+
self,
|
|
993
|
+
fields: dict,
|
|
994
|
+
branch: str,
|
|
995
|
+
group: str | None = None
|
|
996
|
+
) -> xr.Dataset:
|
|
997
|
+
"""
|
|
998
|
+
Open STAC Item asset Icechunk store as xarray Dataset.
|
|
999
|
+
|
|
1000
|
+
Parameters
|
|
1001
|
+
----------
|
|
1002
|
+
fields : dict
|
|
1003
|
+
Dictionary of arguments to s3_storage() defining Icechunk
|
|
1004
|
+
S3 storage instance.
|
|
1005
|
+
branch : str
|
|
1006
|
+
Branch of the Icechunk repository to read.
|
|
1007
|
+
group : str, optional
|
|
1008
|
+
Group within the Icechunk repository to read. Default is None,
|
|
1009
|
+
which reads from the root of the repository.
|
|
1010
|
+
|
|
1011
|
+
Returns
|
|
1012
|
+
-------
|
|
1013
|
+
xarray.Dataset
|
|
1014
|
+
Dataset read from Item asset.
|
|
1015
|
+
"""
|
|
1016
|
+
# Open Zarr store from Icechunk repository:
|
|
1017
|
+
repo = self._open_icechunk_repo(fields)
|
|
1018
|
+
store = repo.readonly_session(branch=branch).store
|
|
1019
|
+
ds = xr.open_zarr(store, consolidated=False, group=group)
|
|
1020
|
+
|
|
1021
|
+
return ds
|
|
1022
|
+
|
|
1023
|
+
|
|
1024
|
+
def _open_zarr_store(
|
|
1025
|
+
self,
|
|
1026
|
+
fields: dict,
|
|
1027
|
+
consolidated: bool = True,
|
|
1028
|
+
group: str | None = None
|
|
1029
|
+
) -> xr.Dataset:
|
|
1030
|
+
"""
|
|
1031
|
+
Open STAC Item Zarr store asset as xarray Dataset.
|
|
1032
|
+
|
|
1033
|
+
Parameters
|
|
1034
|
+
----------
|
|
1035
|
+
fields : dict
|
|
1036
|
+
Dictionary of arguments to open_zarr() defining URL
|
|
1037
|
+
and version of Zarr store.
|
|
1038
|
+
consolidated : bool, optional
|
|
1039
|
+
Whether to open Zarr store using consolidated metadata capability.
|
|
1040
|
+
Default is True, meaning that consolidated metadata is expected.
|
|
1041
|
+
group : str, optional
|
|
1042
|
+
Group within the Zarr store to read. Default is None,
|
|
1043
|
+
which reads from the root of the store.
|
|
1044
|
+
|
|
1045
|
+
Returns
|
|
1046
|
+
-------
|
|
1047
|
+
xarray.Dataset
|
|
1048
|
+
Dataset read from Item asset.
|
|
1049
|
+
"""
|
|
1050
|
+
# Open Item asset Zarr store via URL:
|
|
1051
|
+
url = f"{fields['endpoint_url']}/{fields['bucket']}/{fields['prefix']}"
|
|
1052
|
+
ds = xr.open_zarr(url, zarr_format=int(fields['zarr_format']), consolidated=consolidated, group=group)
|
|
1053
|
+
|
|
1054
|
+
return ds
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
def open_repo(self,
|
|
1058
|
+
id: str,
|
|
1059
|
+
asset_key: Optional[str] = None
|
|
1060
|
+
) -> icechunk.Repository:
|
|
1061
|
+
"""
|
|
1062
|
+
Open STAC Item asset as an Icechunk Repository.
|
|
1063
|
+
|
|
1064
|
+
Parameters
|
|
1065
|
+
----------
|
|
1066
|
+
id : str
|
|
1067
|
+
Item ID to open asset.
|
|
1068
|
+
asset_key : str, optional
|
|
1069
|
+
Key of the asset to open. Default is to infer the key from the Item ID.
|
|
1070
|
+
|
|
1071
|
+
Returns
|
|
1072
|
+
-------
|
|
1073
|
+
icechunk.Repository
|
|
1074
|
+
Icechunk Repository for STAC Item asset.
|
|
1075
|
+
|
|
1076
|
+
Raises
|
|
1077
|
+
------
|
|
1078
|
+
ValueError
|
|
1079
|
+
If the Item ID or asset key is not found in the catalog.
|
|
1080
|
+
ValueError
|
|
1081
|
+
If the asset key is not found in the Item ID.
|
|
1082
|
+
"""
|
|
1083
|
+
# -- Validate Inputs -- #
|
|
1084
|
+
if not isinstance(id, str):
|
|
1085
|
+
raise TypeError("'id' must be a string.")
|
|
1086
|
+
|
|
1087
|
+
# -- Collect Item Asset -- #
|
|
1088
|
+
try:
|
|
1089
|
+
item = self._open_item(id=id)
|
|
1090
|
+
except Exception:
|
|
1091
|
+
raise RuntimeError(f"Item ID '{id}' not found in Catalog.")
|
|
1092
|
+
|
|
1093
|
+
# Infer asset key from Item ID if not provided:
|
|
1094
|
+
if asset_key is None:
|
|
1095
|
+
asset_key = list(item.assets.keys())[0]
|
|
1096
|
+
asset = item.assets.get(asset_key)
|
|
1097
|
+
if asset is None:
|
|
1098
|
+
raise ValueError(f"Asset key '{asset_key}' not found in Item ID '{id}'.")
|
|
1099
|
+
|
|
1100
|
+
fields = asset.extra_fields
|
|
1101
|
+
|
|
1102
|
+
# -- Open Icechunk Repository -- #
|
|
1103
|
+
if asset.to_dict()['type'] == "application/vnd.zarr+icechunk":
|
|
1104
|
+
required_fields = ['bucket', 'prefix', 'anonymous', 'endpoint_url']
|
|
1105
|
+
for field in required_fields:
|
|
1106
|
+
if field not in fields:
|
|
1107
|
+
raise ValueError(f"Missing asset field '{field}' in item '{id}'.")
|
|
1108
|
+
repo = self._open_icechunk_repo(fields=fields)
|
|
1109
|
+
else:
|
|
1110
|
+
raise ValueError(f"Item ID '{id}' asset is not an Icechunk repository.")
|
|
1111
|
+
|
|
1112
|
+
return repo
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
def open_dataset(self,
|
|
1116
|
+
id: str,
|
|
1117
|
+
group: Optional[str] = None,
|
|
1118
|
+
variable_names: Optional[list[str]] = None,
|
|
1119
|
+
start_datetime: Optional[str] = None,
|
|
1120
|
+
end_datetime: Optional[str] = None,
|
|
1121
|
+
bbox: Optional[tuple[float | int, float | int, float | int, float | int]] = None,
|
|
1122
|
+
branch: str = "main",
|
|
1123
|
+
consolidated: bool = True,
|
|
1124
|
+
asset_key: Optional[str] = None
|
|
1125
|
+
) -> xr.Dataset:
|
|
1126
|
+
"""
|
|
1127
|
+
Open STAC Item asset as an xarray Dataset.
|
|
1128
|
+
|
|
1129
|
+
Parameters
|
|
1130
|
+
----------
|
|
1131
|
+
id : str
|
|
1132
|
+
Item ID to open asset.
|
|
1133
|
+
group : str, optional
|
|
1134
|
+
Group within the Zarr or Icechunk repository to read. Default is None,
|
|
1135
|
+
which reads from the root of the repository.
|
|
1136
|
+
variable_names : list[str], optional
|
|
1137
|
+
List of variable names to be parsed from the dataset.
|
|
1138
|
+
Default is to return all variables.
|
|
1139
|
+
start_datetime : str, optional
|
|
1140
|
+
Start datetime used to subset the dataset. Should be a string
|
|
1141
|
+
in ISO format (e.g., "1976-01-01T00:00:00Z"). Default is to use
|
|
1142
|
+
the Item start_datetime.
|
|
1143
|
+
end_datetime : str, optional
|
|
1144
|
+
End datetime used to subset the dataset. Should be a string
|
|
1145
|
+
in ISO format (e.g., "2024-12-31T00:00:00Z"). Default is to use
|
|
1146
|
+
the Item end_datetime.
|
|
1147
|
+
bbox : tuple[float | int, float | int, float | int, float | int], optional
|
|
1148
|
+
Spatial bounding box used to subset the dataset. Should be a list of four floats
|
|
1149
|
+
representing the bounding box in the format: (min_lon, min_lat, max_lon, max_lat).
|
|
1150
|
+
Default is to use the Item bbox.
|
|
1151
|
+
branch : str, optional
|
|
1152
|
+
Branch of the Icechunk repository to use. Default is to use the "main" branch.
|
|
1153
|
+
consolidated : bool, optional
|
|
1154
|
+
Whether to open Zarr stores using consolidated metadata. Default is True.
|
|
1155
|
+
asset_key : str, optional
|
|
1156
|
+
Key of the asset to open. Default is to infer the key from the Item ID.
|
|
1157
|
+
|
|
1158
|
+
Returns
|
|
1159
|
+
-------
|
|
1160
|
+
xarray.Dataset
|
|
1161
|
+
Dataset read from Item asset.
|
|
1162
|
+
|
|
1163
|
+
Raises
|
|
1164
|
+
------
|
|
1165
|
+
ValueError
|
|
1166
|
+
If the Item ID or asset key is not found in the catalog.
|
|
1167
|
+
ValueError
|
|
1168
|
+
If the asset key is not found in the Item ID.
|
|
1169
|
+
KeyError
|
|
1170
|
+
If the specified variable(s) are not found in the dataset.
|
|
1171
|
+
"""
|
|
1172
|
+
# -- Validate Inputs -- #
|
|
1173
|
+
if not isinstance(id, str):
|
|
1174
|
+
raise TypeError("'id' must be a string.")
|
|
1175
|
+
if group is not None and not isinstance(group, str):
|
|
1176
|
+
raise TypeError("'group' must be a string or None.")
|
|
1177
|
+
if not isinstance(variable_names, (type(None), list)):
|
|
1178
|
+
raise TypeError("'variable_names' must be a list of strings.")
|
|
1179
|
+
if variable_names is not None and not all([isinstance(var, str) for var in variable_names]):
|
|
1180
|
+
raise TypeError("'variable_names' must be a list of strings.")
|
|
1181
|
+
if not isinstance(start_datetime, (type(None), str)):
|
|
1182
|
+
raise TypeError("'start_datetime' must be a string or None.")
|
|
1183
|
+
if not isinstance(end_datetime, (type(None), str)):
|
|
1184
|
+
raise TypeError("'end_datetime' must be a string or None.")
|
|
1185
|
+
if not isinstance(bbox, (type(None), tuple)):
|
|
1186
|
+
raise TypeError("'bbox' must be a tuple or None.")
|
|
1187
|
+
if bbox is not None and (len(bbox) != 4 or not all(isinstance(coord, (float, int)) for coord in bbox)):
|
|
1188
|
+
raise TypeError("'bbox' must be a tuple of the form (min_lon, min_lat, max_lon, max_lat) with float or int values.")
|
|
1189
|
+
if not isinstance(branch, str):
|
|
1190
|
+
raise TypeError("'branch' must be a string.")
|
|
1191
|
+
if not isinstance(consolidated, bool):
|
|
1192
|
+
raise TypeError("'consolidated' must be a boolean.")
|
|
1193
|
+
|
|
1194
|
+
# -- Collect Item Asset -- #
|
|
1195
|
+
try:
|
|
1196
|
+
item = self._open_item(id=id)
|
|
1197
|
+
except Exception:
|
|
1198
|
+
raise RuntimeError(f"Item ID '{id}' not found in Catalog.")
|
|
1199
|
+
|
|
1200
|
+
# Infer asset key from Item ID if not provided:
|
|
1201
|
+
if asset_key is None:
|
|
1202
|
+
asset_key = list(item.assets.keys())[0]
|
|
1203
|
+
asset = item.assets.get(asset_key)
|
|
1204
|
+
if asset is None:
|
|
1205
|
+
raise ValueError(f"Asset key '{asset_key}' not found in Item ID '{id}'.")
|
|
1206
|
+
|
|
1207
|
+
fields = asset.extra_fields
|
|
1208
|
+
|
|
1209
|
+
# Open Icechunk Repository as xarray Dataset:
|
|
1210
|
+
if asset.to_dict()['type'] == "application/vnd.zarr+icechunk":
|
|
1211
|
+
required_fields = ['bucket', 'prefix', 'anonymous', 'endpoint_url']
|
|
1212
|
+
for field in required_fields:
|
|
1213
|
+
if field not in fields:
|
|
1214
|
+
raise ValueError(f"Missing asset field '{field}' in item '{id}'.")
|
|
1215
|
+
ds = self._open_icechunk_store(fields=fields, branch=branch, group=group)
|
|
1216
|
+
|
|
1217
|
+
# Open Zarr store as xarray Dataset:
|
|
1218
|
+
elif asset.to_dict()['type'] == 'application/vnd.zarr':
|
|
1219
|
+
required_fields = ['bucket', 'prefix', 'endpoint_url', 'zarr_format']
|
|
1220
|
+
for field in required_fields:
|
|
1221
|
+
if field not in fields:
|
|
1222
|
+
raise ValueError(f"Missing asset field '{field}' in item '{id}'.")
|
|
1223
|
+
ds = self._open_zarr_store(fields=fields, group=group)
|
|
1224
|
+
|
|
1225
|
+
else:
|
|
1226
|
+
raise ValueError(f"Unsupported media type {asset.to_dict()['type']} for Item asset.")
|
|
1227
|
+
|
|
1228
|
+
# Selecting variables:
|
|
1229
|
+
if variable_names is not None:
|
|
1230
|
+
try:
|
|
1231
|
+
ds = ds[variable_names]
|
|
1232
|
+
except KeyError:
|
|
1233
|
+
raise KeyError("One or more variables not found in dataset.")
|
|
1234
|
+
|
|
1235
|
+
# Spatio-temporal subsetting:
|
|
1236
|
+
if bbox:
|
|
1237
|
+
ds = apply_bbox(ds=ds, bbox=bbox)
|
|
1238
|
+
|
|
1239
|
+
if start_datetime or end_datetime:
|
|
1240
|
+
ds = apply_time_bounds(ds=ds, start_datetime=start_datetime, end_datetime=end_datetime)
|
|
1241
|
+
|
|
1242
|
+
return ds
|