OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1242 @@
1
+ """
2
+ oceandatacatalog.py
3
+
4
+ Description:
5
+ This module defines the OceanDataCatalog() class which is a
6
+ container for the NOC STAC and a basic API for accessing data
7
+ using pystac, Zarr and Icechunk.
8
+
9
+ Authors:
10
+ - Ollie Tooth
11
+ """
12
+ from typing import Optional
13
+
14
+ import icechunk
15
+ import numpy as np
16
+ import pystac
17
+ import xarray as xr
18
+
19
+ # -- NOC brand CSS -- #
20
+ _NOC_CSS = """
21
+ <style>
22
+ .ods-card {
23
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
24
+ font-size: 13px;
25
+ border: 1px solid #0087c1;
26
+ border-radius: 6px;
27
+ overflow: hidden;
28
+ max-width: 950px;
29
+ margin: 6px 0;
30
+ box-shadow: 0 1px 4px rgba(0,63,112,0.12);
31
+ }
32
+ .ods-header {
33
+ background: #003f70;
34
+ color: #ffffff;
35
+ padding: 8px 14px;
36
+ display: flex;
37
+ align-items: center;
38
+ gap: 10px;
39
+ font-size: 14px;
40
+ font-weight: 600;
41
+ letter-spacing: 0.3px;
42
+ }
43
+ .ods-badge {
44
+ background: #0087c1;
45
+ color: #ffffff;
46
+ border-radius: 12px;
47
+ padding: 1px 9px;
48
+ font-size: 11px;
49
+ font-weight: 500;
50
+ white-space: nowrap;
51
+ }
52
+ .ods-badge-neutral {
53
+ background: #5a9cbf;
54
+ color: #ffffff;
55
+ border-radius: 12px;
56
+ padding: 1px 9px;
57
+ font-size: 11px;
58
+ font-weight: 500;
59
+ white-space: nowrap;
60
+ }
61
+ .ods-body {
62
+ background: #eef6fb;
63
+ padding: 10px 14px;
64
+ }
65
+ .ods-stats {
66
+ display: flex;
67
+ flex-wrap: wrap;
68
+ gap: 8px;
69
+ margin-bottom: 8px;
70
+ }
71
+ .ods-stat {
72
+ background: #ffffff;
73
+ border: 1px solid #b3d7ea;
74
+ border-radius: 5px;
75
+ padding: 4px 10px;
76
+ font-size: 12px;
77
+ color: #003f70;
78
+ }
79
+ .ods-stat span {
80
+ font-weight: 600;
81
+ }
82
+ .ods-url {
83
+ font-size: 12px;
84
+ font-weight: 500;
85
+ color: #555;
86
+ word-break: break-all;
87
+ }
88
+ .ods-url a { color: #0087c1; text-decoration: none; }
89
+ .ods-url a:hover { text-decoration: underline; }
90
+ .ods-table {
91
+ width: 100%;
92
+ border-collapse: collapse;
93
+ font-size: 12px;
94
+ margin-top: 2px;
95
+ }
96
+ .ods-table thead tr {
97
+ background: #003f70;
98
+ color: #ffffff;
99
+ }
100
+ .ods-table thead th {
101
+ padding: 6px 10px;
102
+ text-align: left;
103
+ font-weight: 600;
104
+ white-space: nowrap;
105
+ }
106
+ .ods-table tbody tr:nth-child(even) { background: #d6ecf5; }
107
+ .ods-table tbody tr:nth-child(odd) { background: #ffffff; }
108
+ .ods-table tbody tr:hover { background: #b3d7ea; }
109
+ .ods-table td {
110
+ padding: 5px 10px;
111
+ vertical-align: top;
112
+ text-align: left;
113
+ border-bottom: 1px solid #cce4f0;
114
+ }
115
+ .ods-id {
116
+ font-family: monospace;
117
+ font-size: 11px;
118
+ color: #003f70;
119
+ white-space: nowrap;
120
+ }
121
+ details.ods-details > summary {
122
+ cursor: pointer;
123
+ color: #0087c1;
124
+ font-size: 11px;
125
+ list-style: none;
126
+ user-select: none;
127
+ }
128
+ details.ods-details > summary::-webkit-details-marker { display: none; }
129
+ details.ods-details > summary::before { content: "▶ "; font-size: 9px; }
130
+ details.ods-details[open] > summary::before { content: "▼ "; font-size: 9px; }
131
+ details.ods-details .ods-detail-body {
132
+ margin-top: 4px;
133
+ color: #333;
134
+ font-size: 11px;
135
+ line-height: 1.5;
136
+ }
137
+ .ods-section-title {
138
+ font-weight: 600;
139
+ color: #003f70;
140
+ margin-bottom: 6px;
141
+ font-size: 12px;
142
+ }
143
+ .ods-code {
144
+ background: #ffffff;
145
+ color: #003f70;
146
+ font-family: monospace;
147
+ font-size: 12px;
148
+ padding: 8px 12px;
149
+ border-radius: 4px;
150
+ border: 1px solid #cce4f0;
151
+ display: flex;
152
+ align-items: center;
153
+ justify-content: space-between;
154
+ gap: 8px;
155
+ margin-top: 4px;
156
+ }
157
+ .ods-copy-btn {
158
+ background: #0087c1;
159
+ color: #ffffff;
160
+ border: none;
161
+ border-radius: 4px;
162
+ padding: 3px 8px;
163
+ font-size: 11px;
164
+ cursor: pointer;
165
+ white-space: nowrap;
166
+ flex-shrink: 0;
167
+ }
168
+ .ods-copy-btn:hover { background: #006fa0; }
169
+ .ods-none { color: #999; font-style: italic; }
170
+ </style>
171
+ """
172
+
173
+ # -- Utility Functions -- #
174
+ def apply_bbox(ds: xr.Dataset,
175
+ bbox: tuple
176
+ ) -> xr.Dataset:
177
+ """
178
+ Apply a geographical bounding box to subset an xarray Dataset.
179
+
180
+ Parameters
181
+ ----------
182
+ ds : xr.Dataset
183
+ Input xarray Dataset.
184
+ bbox : tuple
185
+ Geographical bounding box in the format (min_lon, max_lon, min_lat, max_lat).
186
+
187
+ Returns
188
+ -------
189
+ xr.Dataset
190
+ Geographically subsetted xarray Dataset.
191
+ """
192
+ # -- Validate Inputs -- #
193
+ if not isinstance(ds, xr.Dataset):
194
+ raise ValueError("'ds' must be an xarray Dataset.")
195
+ if not (isinstance(bbox, tuple) and len(bbox) == 4):
196
+ raise ValueError("'bbox' must be a tuple of the form (min_lon, max_lon, min_lat, max_lat).")
197
+
198
+ # -- Identify geographical coordinate names & dimensions -- #
199
+ # Default lat/lon coord names:
200
+ lon_name, lat_name = "nav_lon", "nav_lat"
201
+ # Update lat/lon coord names via standard_name attributes:
202
+ for coord in ds.coords:
203
+ if ds[coord].attrs.get('standard_name', '').lower() == 'longitude':
204
+ lon_name = coord
205
+ if ds[coord].attrs.get('standard_name', '').lower() == 'latitude':
206
+ lat_name = coord
207
+
208
+ # -- Apply Bounding Box -- #
209
+ if (ds[lon_name].ndim > 1) and (ds[lat_name].ndim > 1):
210
+ # -- Case 1: 2D lat/lon coordinates -- #
211
+ # Identify lat/lon coordinate dimensions:
212
+ if ds[lon_name].dims != ds[lat_name].dims:
213
+ raise ValueError("Longitude and latitude coordinates must have the same dimensions.")
214
+ else:
215
+ y_name, x_name = ds[lon_name].dims
216
+
217
+ # Define bbox mask:
218
+ mask = (
219
+ (ds[lon_name] >= bbox[0])
220
+ & (ds[lon_name] <= bbox[2])
221
+ & (ds[lat_name] >= bbox[1])
222
+ & (ds[lat_name] <= bbox[3])
223
+ )
224
+
225
+ # Find rows/columns containing at least one valid grid point:
226
+ rows = mask.any(dim=x_name)
227
+ cols = mask.any(dim=y_name)
228
+ y_idx = np.where(rows.compute())[0]
229
+ x_idx = np.where(cols.compute())[0]
230
+
231
+ if len(y_idx) == 0 or len(x_idx) == 0:
232
+ raise ValueError("No grid points found inside bbox")
233
+
234
+ # Subset dataset to bounding box:
235
+ ds_subset = (ds
236
+ .where(mask, drop=False)
237
+ .isel({y_name: slice(y_idx.min(), y_idx.max() + 1),
238
+ x_name: slice(x_idx.min(), x_idx.max() + 1),
239
+ })
240
+ )
241
+ else:
242
+ # -- Case 2: 1D lat/lon coordinates -- #
243
+ ds_subset = ds.sel({lon_name: slice(bbox[0], bbox[1]),
244
+ lat_name: slice(bbox[2], bbox[3])
245
+ })
246
+
247
+ return ds_subset
248
+
249
+
250
+ def apply_time_bounds(ds: xr.Dataset,
251
+ start_datetime: str | None = None,
252
+ end_datetime: str | None = None
253
+ ) -> xr.Dataset:
254
+ """
255
+ Apply temporal subsetting to an xarray Dataset.
256
+
257
+ Parameters
258
+ ----------
259
+ ds : xr.Dataset
260
+ Input xarray Dataset.
261
+ start_datetime : str, optional
262
+ Start datetime in ISO format (e.g., 'YYYY-MM-DDTHH:MM:SS').
263
+ end_datetime : str, optional
264
+ End datetime in ISO format (e.g., 'YYYY-MM-DDTHH:MM:SS').
265
+
266
+ Returns
267
+ -------
268
+ xr.Dataset
269
+ Temporally subsetted xarray Dataset.
270
+ """
271
+ # -- Validate Inputs -- #
272
+ if not isinstance(ds, xr.Dataset):
273
+ raise ValueError("'ds' must be an xarray Dataset.")
274
+ if start_datetime is not None:
275
+ if not isinstance(start_datetime, str):
276
+ raise ValueError("'start_datetime' must be a string in ISO format (e.g., 'YYYY-MM-DDTHH:MM:SS').")
277
+ if end_datetime is not None:
278
+ if not isinstance(end_datetime, str):
279
+ raise ValueError("'end_datetime' must be a string in ISO format (e.g., 'YYYY-MM-DDTHH:MM:SS').")
280
+
281
+ # -- Identify time dimension -- #
282
+ for coord in ds.dims:
283
+ if 'time' in coord.lower():
284
+ time_name = coord
285
+ break
286
+
287
+ # -- Apply temporal subsetting -- #
288
+ ds_subset = ds.sel({time_name: slice(start_datetime, end_datetime)})
289
+
290
+ return ds_subset
291
+
292
+
293
+ # -- Define CatalogSummary() class -- #
294
+ class CatalogSummary:
295
+ """
296
+ Container for OceanDataCatalog summary.
297
+
298
+ Parameters
299
+ ----------
300
+ num_collections : int
301
+ The number of collections in the catalog.
302
+ num_items : int
303
+ The number of items in the catalog.
304
+ other_info : dict
305
+ Any other relevant summary information about the catalog.
306
+ """
307
+ def __init__(self,
308
+ display_text: str | None = None,
309
+ display_html: str | None = None,
310
+ ):
311
+ self.display_text = display_text
312
+ self.display_html = display_html
313
+
314
+ def __repr__(self):
315
+ """
316
+ Plain text representation of the CatalogSummary.
317
+ """
318
+ return self.display_text
319
+
320
+ def _repr_html_(self):
321
+ """
322
+ HTML representation of the CatalogSummary.
323
+ """
324
+ return self.display_html
325
+
326
+ # -- Define OceanDataCatalog() class -- #
327
+ class OceanDataCatalog:
328
+ """
329
+ A class to interact with the National Oceanography Centre (NOC)
330
+ Spatio-Temporal Access Catalogs (STAC).
331
+
332
+ The catalog provides metadata and access to oceanographic
333
+ datasets stored in cloud object storage. Users can search the
334
+ catalog, inspect available Items, and open datasets as familiar
335
+ xarray data structures.
336
+
337
+ Parameters
338
+ ----------
339
+ catalog_name : str, optional
340
+ Name of the NOC STAC catalog to use.
341
+ catalog_url : str, optional
342
+ Path or URL to the root STAC catalog. If not provided,
343
+ a default path to the NOC STAC catalog is used.
344
+
345
+ Attributes
346
+ ----------
347
+ catalog : pystac.Catalog
348
+ The root NOC STAC catalog.
349
+ collection : pystac.Collection or None
350
+ The current STAC Collection being viewed.
351
+ items : list of pystac.Item
352
+ The list of STAC Items returned from the most recent query.
353
+ """
354
+ def __init__(self,
355
+ catalog_name: str = "noc-stac",
356
+ catalog_url: str = None
357
+ ):
358
+ # Define the URL to the NOC STAC root catalog:
359
+ self._stac_url = catalog_url or f"https://noc-msm-o.s3-ext.jc.rl.ac.uk/oceandatastore/{catalog_name}/catalog.json"
360
+ # Store the root catalog as a class attribute:
361
+ self.Catalog = pystac.read_file(self._stac_url)
362
+
363
+ # Define the Collection and Items attributes:
364
+ self.Collection = None
365
+ self.Items = None
366
+ # Cache the catalog name for display:
367
+ self._catalog_name = catalog_name
368
+
369
+ def __repr__(self) -> str:
370
+ """
371
+ Plain text representation of the OceanDataCatalog.
372
+ """
373
+ n_collections = len(self.available_collections)
374
+ col_name = self.Collection.id if self.Collection else "—"
375
+ n_items = len(self.Items) if self.Items is not None else "—"
376
+ return (
377
+ f"OceanDataCatalog\n"
378
+ f" Catalog: {self._catalog_name}\n"
379
+ f" URL: {self._stac_url}\n"
380
+ f" Collections: {n_collections} available\n"
381
+ f" Collection: {col_name}\n"
382
+ f" Search: {n_items} items"
383
+ )
384
+
385
+
386
+ def _repr_html_(self) -> str:
387
+ """
388
+ HTML representation of the OceanDataCatalog.
389
+ """
390
+ n_collections = len(self.available_collections)
391
+ col_name = self.Collection.id if self.Collection else "<span class='ods-none'>none selected</span>"
392
+ n_items = (
393
+ f"{len(self.Items)} items"
394
+ if self.Items is not None
395
+ else "<span class='ods-none'>no search yet</span>"
396
+ )
397
+
398
+ def _extent_dates(col):
399
+ try:
400
+ ext = col.extent.temporal.intervals
401
+ start = ext[0][0].strftime("%Y-%m-%d") if ext[0][0] else "—"
402
+ end = ext[0][1].strftime("%Y-%m-%d") if ext[0][1] else "present"
403
+ except Exception:
404
+ start, end = "—", "—"
405
+ return start, end
406
+
407
+ rows_html = ""
408
+ for col in list(self.Catalog.get_all_collections()):
409
+ start, end = _extent_dates(col)
410
+ desc = col.description or ""
411
+ desc_cell = (
412
+ f"<details class='ods-details'>"
413
+ f"<summary>Summary</summary>"
414
+ f"<div class='ods-detail-body'>{desc.replace('**', '')}</div>"
415
+ f"</details>"
416
+ if desc else "<span class='ods-none'>—</span>"
417
+ )
418
+ active = " <span class='ods-badge' style='font-size:10px'>active</span>" if (
419
+ self.Collection and col.id == self.Collection.id
420
+ ) else ""
421
+ col_title_cell = col.title if col.title else "<span class='ods-none'>—</span>"
422
+ rows_html += (
423
+ f"<tr>"
424
+ f"<td><span class='ods-id'>{col.id}</span>{active}</td>"
425
+ f"<td>{col_title_cell}</td>"
426
+ f"<td>{desc_cell}</td>"
427
+ f"<td>{start}</td>"
428
+ f"<td>{end}</td>"
429
+ f"</tr>"
430
+ )
431
+
432
+ return (
433
+ f"{_NOC_CSS}"
434
+ f"<div class='ods-card'>"
435
+ f" <div class='ods-header'>"
436
+ f" OceanDataCatalog"
437
+ f" <span class='ods-badge'>{self._catalog_name}</span>"
438
+ f" </div>"
439
+ f" <div class='ods-body'>"
440
+ f" <div class='ods-stats'>"
441
+ f" <div class='ods-stat'>Version&nbsp;<span>{self.Catalog.extra_fields.get('catalog_version', 'None')}</span></div>"
442
+ f" <div class='ods-stat'>Collections&nbsp;<span>{n_collections}</span></div>"
443
+ f" <div class='ods-stat'>Active collection&nbsp;<span>{col_name}</span></div>"
444
+ f" <div class='ods-stat'>Last search&nbsp;<span>{n_items}</span></div>"
445
+ f" </div>"
446
+ f" <table class='ods-table'>"
447
+ f" <thead><tr>"
448
+ f" <th>Collection ID</th><th>Title</th><th>Description</th>"
449
+ f" <th>From</th><th>To</th>"
450
+ f" </tr></thead>"
451
+ f" <tbody>{rows_html}</tbody>"
452
+ f" </table>"
453
+ f" <div class='ods-section-title' style='margin-top:10px'>Source URL</div>"
454
+ f" <div class='ods-url'> <a href='{self._stac_url}' target='_blank'>{self._stac_url}</a></div>"
455
+ f" </div>"
456
+ f"</div>"
457
+ )
458
+
459
+
460
+ @property
461
+ def available_collections(self) -> list[str]:
462
+ """
463
+ List available collection IDs in the NOC STAC catalog.
464
+ """
465
+ return [col.id for col in self.Catalog.get_all_collections()]
466
+
467
+
468
+ @property
469
+ def available_items(self) -> list[str]:
470
+ """
471
+ List available Item IDs in the current Collection or the root Catalog.
472
+ """
473
+ if self.Items is not None:
474
+ # Return all Item IDs from the most recent search:
475
+ return [item.id for item in self.Items]
476
+ else:
477
+ # Return all Item IDs from the current Collection or root Catalog:
478
+ scope = self.Collection if self.Collection else self.Catalog
479
+ return list(item.id for item in scope.get_items(recursive=True))
480
+
481
+
482
+ def summary(self) -> CatalogSummary:
483
+ """
484
+ Summary of the most recent OceanDataCatalog search.
485
+
486
+ * In Jupyter / Marimo environments a styled HTML table is displayed.
487
+ * In plain Python / CLI environments a formatted text table is printed instead.
488
+ """
489
+ # -- Validate STAC Items -- #
490
+ if not self.Items:
491
+ raise ValueError("No Items returned in most recent query. Use 'search()' to query Catalog.")
492
+
493
+ n = len(self.Items)
494
+
495
+ # ----- HTML Output ----- #
496
+ rows_html = ""
497
+ for item in self.Items:
498
+ title = item.properties.get("title", "")
499
+ platform = item.properties.get("platform", "<span class='ods-none'>—</span>")
500
+ start = item.properties.get("start_datetime", "<span class='ods-none'>—</span>")
501
+ end = item.properties.get("end_datetime", "<span class='ods-none'>—</span>")
502
+ variables = item.properties.get("variables", [])
503
+ if variables:
504
+ var_list = "<br>".join(variables)
505
+ vars_cell = (
506
+ f"<details class='ods-details'>"
507
+ f"<summary>{len(variables)} variable{'s' if len(variables) != 1 else ''}</summary>"
508
+ f"<div class='ods-detail-body'>{var_list}</div>"
509
+ f"</details>"
510
+ )
511
+ else:
512
+ vars_cell = "<span class='ods-none'>—</span>"
513
+
514
+ title_cell = title if title else "<span class='ods-none'>—</span>"
515
+ rows_html += (
516
+ f"<tr>"
517
+ f"<td><span class='ods-id'>{item.id}</span></td>"
518
+ f"<td>{title_cell}</td>"
519
+ f"<td>{platform}</td>"
520
+ f"<td>{start}</td>"
521
+ f"<td>{end}</td>"
522
+ f"<td>{vars_cell}</td>"
523
+ f"</tr>"
524
+ )
525
+
526
+ col_badge = (
527
+ f"<span class='ods-badge-neutral'>{self.Collection.id}</span>"
528
+ if self.Collection else ""
529
+ )
530
+ html = (
531
+ f"{_NOC_CSS}"
532
+ f"<div class='ods-card'>"
533
+ f" <div class='ods-header'>"
534
+ f" Search Results"
535
+ f" <span class='ods-badge'>{n} Item{'s' if n != 1 else ''} found</span>"
536
+ f" {col_badge}"
537
+ f" </div>"
538
+ f" <div class='ods-body'>"
539
+ f" <table class='ods-table'>"
540
+ f" <thead><tr>"
541
+ f" <th>Item ID</th><th>Title</th><th>Platform</th>"
542
+ f" <th>Start Date</th><th>End Date</th><th>Variables</th>"
543
+ f" </tr></thead>"
544
+ f" <tbody>{rows_html}</tbody>"
545
+ f" </table>"
546
+ f" </div>"
547
+ f"</div>"
548
+ )
549
+
550
+ # ----- Plain-Text Output ----- #
551
+ col_w = [46, 28, 10, 12, 12, 30]
552
+ headers = ["Item ID", "Title", "Platform", "Start Date", "End Date", "Variables"]
553
+ sep = "+" + "+".join("-" * (w + 2) for w in col_w) + "+"
554
+ header_row = "| " + " | ".join(h.ljust(col_w[i]) for i, h in enumerate(headers)) + " |"
555
+ text_lines = [f"Search Results — {n} Item{'s' if n != 1 else ''} found", sep, header_row, sep]
556
+ for item in self.Items:
557
+ variables = item.properties.get("variables", [])
558
+ row = [
559
+ item.id[:col_w[0]],
560
+ item.properties.get("title", "")[:col_w[1]],
561
+ item.properties.get("platform", "")[:col_w[2]],
562
+ item.properties.get("start_datetime", "")[:col_w[3]],
563
+ item.properties.get("end_datetime", "")[:col_w[4]],
564
+ (", ".join(variables))[:col_w[5]],
565
+ ]
566
+ text_lines.append("| " + " | ".join(v.ljust(col_w[i]) for i, v in enumerate(row)) + " |")
567
+ text_lines.append(sep)
568
+ text = "\n".join(text_lines)
569
+
570
+ return CatalogSummary(display_text=text, display_html=html)
571
+
572
+
573
+ def item_summary(self, id: str) -> CatalogSummary:
574
+ """
575
+ Display the detailed summary for a single OceanDataStore Item.
576
+
577
+ Searches the current Items list first; if the Item is not found
578
+ there it is fetched directly from the Catalog URL.
579
+
580
+ * In Jupyter / Marimo environments a styled HTML card is displayed with collapsible
581
+ property and asset sections.
582
+ * In plain Python / CLI environments a formatted text summary is printed instead.
583
+
584
+ Parameters
585
+ ----------
586
+ id : str
587
+ Item ID to display metadata for.
588
+
589
+ Raises
590
+ ------
591
+ TypeError
592
+ If *id* is not a string.
593
+ ValueError
594
+ If the Item ID is not found in the Catalog.
595
+ """
596
+ if not isinstance(id, str):
597
+ raise TypeError("'id' must be a string.")
598
+
599
+ # Collect STAC Item properties metadata:
600
+ item = None
601
+ if self.Items:
602
+ for it in self.Items:
603
+ if it.id == id:
604
+ item = it
605
+ break
606
+ if item is None:
607
+ try:
608
+ item = self._open_item(id=id)
609
+ except Exception:
610
+ raise ValueError(f"Item '{id}' not found in Catalog.")
611
+
612
+ props = item.properties
613
+ title = props.get("title", "")
614
+ desc_raw = props.get("description", "")
615
+ desc = desc_raw.split("OceanDataCatalog Access")[0].strip() if desc_raw else ""
616
+ platform = props.get("platform", "")
617
+ start = props.get("start_datetime", "")
618
+ end = props.get("end_datetime", "")
619
+ bbox = item.bbox
620
+ bbox_str = (
621
+ f"{bbox[0]:.2f}, {bbox[1]:.2f}, {bbox[2]:.2f}, {bbox[3]:.2f}"
622
+ if bbox else "—"
623
+ )
624
+
625
+ # ---- HTML Output (Jupyter) ---- #
626
+ coll_badge = f"<span class='ods-badge-neutral'>{item.collection_id}</span>" if item.collection_id else ""
627
+
628
+ core_stats = (
629
+ f"<div class='ods-stats'>"
630
+ f" <div class='ods-stat'>Platform&nbsp;<span>{platform or '—'}</span></div>"
631
+ f" <div class='ods-stat'>Start&nbsp;<span>{start or '—'}</span></div>"
632
+ f" <div class='ods-stat'>End&nbsp;<span>{end or '—'}</span></div>"
633
+ f" <div class='ods-stat'>BBox&nbsp;<span>({bbox_str})</span></div>"
634
+ f"</div>"
635
+ )
636
+
637
+ none_span = "<span class='ods-none'>—</span>"
638
+ if title or desc:
639
+ title_val = title if title else none_span
640
+ desc_val = desc if desc else none_span
641
+ title_row = (
642
+ f"<table class='ods-table' style='margin-bottom:8px'>"
643
+ f" <thead><tr><th>Title</th><th>Description</th></tr></thead>"
644
+ f" <tbody><tr><td>{title_val}</td><td>{desc_val.replace('**', '')}</td></tr></tbody>"
645
+ f"</table>"
646
+ )
647
+ else:
648
+ title_row = ""
649
+
650
+ # Properties:
651
+ _shown = {"title", "description", "platform", "start_datetime", "end_datetime", "datetime"}
652
+ prop_rows = ""
653
+ for key, val in props.items():
654
+ if key in _shown:
655
+ continue
656
+ if isinstance(val, list):
657
+ items_html = "<br>".join(str(v) for v in val)
658
+ val_cell = (
659
+ f"<details class='ods-details'>"
660
+ f"<summary>{len(val)} item{'s' if len(val) != 1 else ''}</summary>"
661
+ f"<div class='ods-detail-body'>{items_html}</div>"
662
+ f"</details>"
663
+ )
664
+ elif isinstance(val, dict):
665
+ dict_html = "<br>".join(f"<b>{k}</b>: {v}" for k, v in val.items())
666
+ val_cell = (
667
+ f"<details class='ods-details'>"
668
+ f"<summary>{len(val)} field{'s' if len(val) != 1 else ''}</summary>"
669
+ f"<div class='ods-detail-body'>{dict_html}</div>"
670
+ f"</details>"
671
+ )
672
+ else:
673
+ val_cell = str(val) if val is not None else none_span
674
+ prop_rows += f"<tr><td class='ods-id'>{key}</td><td>{val_cell}</td></tr>"
675
+
676
+ props_section = ""
677
+ if prop_rows:
678
+ props_section = (
679
+ f"<div class='ods-section-title' style='margin-top:10px'>Properties</div>"
680
+ f"<table class='ods-table'>"
681
+ f" <thead><tr><th>Property</th><th>Value</th></tr></thead>"
682
+ f" <tbody>{prop_rows}</tbody>"
683
+ f"</table>"
684
+ )
685
+
686
+ asset_rows = ""
687
+ for asset_key, asset in item.assets.items():
688
+ af = asset.extra_fields
689
+ media_type = asset.media_type or ""
690
+ endpoint = af.get("endpoint_url", "")
691
+ bucket = af.get("bucket", "")
692
+ prefix = af.get("prefix", "")
693
+ asset_rows += (
694
+ f"<tr>"
695
+ f"<td class='ods-id'>{asset_key}</td>"
696
+ f"<td>{media_type}</td>"
697
+ f"<td>{endpoint}</td>"
698
+ f"<td>{bucket}</td>"
699
+ f"<td class='ods-id'>{prefix}</td>"
700
+ f"</tr>"
701
+ )
702
+
703
+ assets_section = ""
704
+ if asset_rows:
705
+ assets_section = (
706
+ f"<div class='ods-section-title' style='margin-top:10px'>Assets</div>"
707
+ f"<table class='ods-table'>"
708
+ f" <thead><tr>"
709
+ f" <th>Key</th><th>Media Type</th><th>Endpoint</th><th>Bucket</th><th>Prefix</th>"
710
+ f" </tr></thead>"
711
+ f" <tbody>{asset_rows}</tbody>"
712
+ f"</table>"
713
+ )
714
+
715
+ access_ds_str = f"catalog.open_dataset(id='{id}')"
716
+ access_repo_str = f"catalog.open_repo(id='{id}')"
717
+ _copy_js = (
718
+ "(function(b){"
719
+ "var t=document.createElement('textarea');"
720
+ "t.value=b.dataset.copy;"
721
+ "document.body.appendChild(t);"
722
+ "t.select();"
723
+ "document.execCommand('copy');"
724
+ "document.body.removeChild(t);"
725
+ "b.textContent='Copied!';"
726
+ "setTimeout(function(){b.textContent='Copy'},1500)"
727
+ "})(this)"
728
+ )
729
+ access_section = (
730
+ f"<div class='ods-section-title' style='margin-top:10px'>Access</div>"
731
+ f"<div class='ods-code'>"
732
+ f" <code>{access_ds_str}</code>"
733
+ f" <button class='ods-copy-btn' data-copy=\"{access_ds_str}\" onclick=\"{_copy_js}\">Copy</button>"
734
+ f"</div>"
735
+ f"<div class='ods-code'>"
736
+ f" <code>{access_repo_str}</code>"
737
+ f" <button class='ods-copy-btn' data-copy=\"{access_repo_str}\" onclick=\"{_copy_js}\">Copy</button>"
738
+ f"</div>"
739
+ )
740
+
741
+ html = (
742
+ f"{_NOC_CSS}"
743
+ f"<div class='ods-card'>"
744
+ f" <div class='ods-header'>"
745
+ f" {id}"
746
+ f" {coll_badge}"
747
+ f" </div>"
748
+ f" <div class='ods-body'>"
749
+ f" {core_stats}"
750
+ f" {title_row}"
751
+ f" {access_section}"
752
+ f" {props_section}"
753
+ f" {assets_section}"
754
+ f" </div>"
755
+ f"</div>"
756
+ )
757
+
758
+ # ---- Plain-Text Output ---- #
759
+ _shown_text = {"title", "description", "platform", "start_datetime", "end_datetime", "datetime"}
760
+ text_lines = [
761
+ f"Item: {id}",
762
+ f" Title: {title or '—'}",
763
+ f" Platform: {platform or '—'}",
764
+ f" Start: {start or '—'}",
765
+ f" End: {end or '—'}",
766
+ f" BBox: {bbox_str}",
767
+ "",
768
+ " Properties:",
769
+ ]
770
+ for key, val in props.items():
771
+ if key in _shown_text:
772
+ continue
773
+ if isinstance(val, list):
774
+ preview = ", ".join(str(v) for v in val[:5])
775
+ suffix = ", ..." if len(val) > 5 else ""
776
+ text_lines.append(f" {key}: [{preview}{suffix}]")
777
+ else:
778
+ text_lines.append(f" {key}: {val}")
779
+ if item.assets:
780
+ text_lines.append("")
781
+ text_lines.append(" Assets:")
782
+ for asset_key, asset in item.assets.items():
783
+ af = asset.extra_fields
784
+ loc = f"{af.get('endpoint_url', '')}/{af.get('bucket', '')}/{af.get('prefix', '')}"
785
+ text_lines.append(f" {asset_key}: {asset.media_type or ''} — {loc}")
786
+ text_lines += ["", f" Access: {access_ds_str}"]
787
+ text = "\n".join(text_lines)
788
+
789
+ return CatalogSummary(display_text=text, display_html=html)
790
+
791
+
792
+ def _filter_items(self,
793
+ items: list[pystac.Item],
794
+ dataset_type: Optional[str] = None,
795
+ product_type: Optional[str] = None,
796
+ variable_name: Optional[str] = None,
797
+ standard_name: Optional[str] = None,
798
+ item_name: Optional[str] = None
799
+ ):
800
+ """
801
+ Filter Items based on specified dataset type, product type,
802
+ variable name, and standard name.
803
+
804
+ Parameters
805
+ ----------
806
+ items : list[pystac.Item]
807
+ List of STAC Items to filter.
808
+ dataset_type : str, optional
809
+ Dataset type to filter Items by.
810
+ product_type : str, optional
811
+ Product type to filter Items by.
812
+ variable_name : str, optional
813
+ Variable name to filter Items by.
814
+ standard_name : str, optional
815
+ Standard variable name to filter Items by.
816
+ item_name : str, optional
817
+ Substring to filter Item IDs by.
818
+ """
819
+ if dataset_type:
820
+ items = [item for item in items if dataset_type in str(item.properties.get('dataset_type', ''))]
821
+ if product_type:
822
+ items = [item for item in items if product_type in str(item.properties.get('product_type', ''))]
823
+ if variable_name:
824
+ items = [item for item in items if any(variable_name in var for var in item.properties.get('variables', []))]
825
+ if standard_name:
826
+ items = [item for item in items if any(standard_name in var for var in item.properties.get('variable_standard_names', []))]
827
+ if item_name:
828
+ items = [item for item in items if item_name in item.id]
829
+
830
+ return items
831
+
832
+
833
+ def clear(self) -> None:
834
+ """
835
+ Clear the Active Collection and Items returned from
836
+ the latest OceanDataCatalog search.
837
+ """
838
+ self.Collection = None
839
+ self.Items = None
840
+
841
+
842
+ def search(self,
843
+ collection: Optional[str] = None,
844
+ dataset_type: Optional[str] = None,
845
+ product_type: Optional[str] = None,
846
+ variable_name: Optional[str] = None,
847
+ standard_name: Optional[str] = None,
848
+ item_name: Optional[str] = None
849
+ ) -> None:
850
+ """
851
+ Search the OceanDataCatalog for Items matching the specified criteria.
852
+
853
+ When both dataset_type / product_type and variable / standard names are
854
+ provided, the search returns all Items which match both criteria.
855
+
856
+ Parameters
857
+ ----------
858
+ collection : str, optional
859
+ Collection name to search for. Default is None,
860
+ which searches the entire root Catalog.
861
+ dataset_type : str, optional
862
+ Dataset type to search for (e.g., 'model', 'observation').
863
+ Default is None, which retrieves Items from all dataset types.
864
+ product_type : str, optional
865
+ Product type to search for (e.g., 'timeseries', 'climatology').
866
+ Default is None, which retrieves Items from all product types.
867
+ variable_name : str, optional
868
+ Variable name to search for. Default is None,
869
+ which retrieves all Items.
870
+ standard_name : str, optional
871
+ Standard variable name to search for. Default is None,
872
+ which retrieves all Items.
873
+ item_name : str, optional
874
+ Substring to filter Item IDs by. Default is None,
875
+ which retrieves all Items.
876
+
877
+ Raises
878
+ ------
879
+ ValueError
880
+ If the specified collection is not found in the Catalog.
881
+ ValueError
882
+ If both variable_name and standard_name are specified.
883
+ TypeError
884
+ If any of the input parameters are of incorrect type.
885
+ """
886
+ # -- Validate Inputs -- #
887
+ if not isinstance(collection, (type(None), str)):
888
+ raise TypeError("'collection' must be a string or None.")
889
+ if not isinstance(dataset_type, (type(None), str)):
890
+ raise TypeError("'dataset_type' must be a string or None.")
891
+ if not isinstance(product_type, (type(None), str)):
892
+ raise TypeError("'product_type' must be a string or None.")
893
+ if not isinstance(variable_name, (type(None), str)):
894
+ raise TypeError("'variable_name' must be a string or None.")
895
+ if not isinstance(standard_name, (type(None), str)):
896
+ raise TypeError("'standard_name' must be a string or None.")
897
+ if not isinstance(item_name, (type(None), str)):
898
+ raise TypeError("'item_name' must be a string or None.")
899
+
900
+ if collection:
901
+ collections = {col.id: col for col in self.Catalog.get_all_collections()}
902
+ if collection not in collections:
903
+ raise ValueError(f"Collection '{collection}' not found. Available: {list(collections)}")
904
+ self.Collection = self.Catalog.get_child(collection)
905
+ items = list(self.Collection.get_items(recursive=True))
906
+ else:
907
+ scope = self.Collection if self.Collection else self.Catalog
908
+ items = list(scope.get_items(recursive=True))
909
+
910
+ if (variable_name is not None) and (standard_name is not None):
911
+ raise ValueError("Only one of 'variable_name' or 'standard_name' can be specified.")
912
+ else:
913
+ self.Items = self._filter_items(items=items,
914
+ dataset_type=dataset_type,
915
+ product_type=product_type,
916
+ variable_name=variable_name,
917
+ standard_name=standard_name,
918
+ item_name=item_name
919
+ )
920
+ return self.summary()
921
+
922
+
923
+ def _open_item(
924
+ self,
925
+ id: str,
926
+ ) -> pystac.Item:
927
+ """
928
+ Open a STAC Item directly from the Item ID.
929
+
930
+ Parameters
931
+ ----------
932
+ id : str
933
+ Item ID to open directly from URL.
934
+
935
+ Returns
936
+ -------
937
+ pystac.Item
938
+ STAC Item object.
939
+ """
940
+ # Define components of Item ID path:
941
+ parts = id.split("/")
942
+ # Initialise node to root Catalog:
943
+ node = self.Catalog
944
+
945
+ # Iterate over ID components:
946
+ for _, part in enumerate(parts):
947
+ # Traverse Catalog to child node containing Item:
948
+ child = node.get_child(part)
949
+ if child is not None:
950
+ node = child
951
+ continue
952
+ else:
953
+ # Collect STAC Item from child node:
954
+ item = next(node.get_items(id), None)
955
+
956
+ return item
957
+
958
+
959
+ def _open_icechunk_repo(
960
+ self,
961
+ fields: dict,
962
+ ) -> icechunk.Repository:
963
+ """
964
+ Open STAC Item asset as an Icechunk Repository.
965
+
966
+ Parameters
967
+ ----------
968
+ fields : dict
969
+ Dictionary of arguments defining Icechunk S3 storage instance.
970
+
971
+ Returns
972
+ -------
973
+ icechunk.Repository
974
+ Icechunk Repository object for the Item asset.
975
+ """
976
+ # Define S3 storage configuration:
977
+ storage = icechunk.s3_storage(
978
+ bucket=fields['bucket'],
979
+ prefix=fields['prefix'],
980
+ region="us-east-1",
981
+ anonymous=fields['anonymous'],
982
+ endpoint_url=fields['endpoint_url'],
983
+ force_path_style=True
984
+ )
985
+
986
+ # Open Icechunk Repository from S3 storage:
987
+ repo = icechunk.Repository.open(storage=storage)
988
+ return repo
989
+
990
+
991
+ def _open_icechunk_store(
992
+ self,
993
+ fields: dict,
994
+ branch: str,
995
+ group: str | None = None
996
+ ) -> xr.Dataset:
997
+ """
998
+ Open STAC Item asset Icechunk store as xarray Dataset.
999
+
1000
+ Parameters
1001
+ ----------
1002
+ fields : dict
1003
+ Dictionary of arguments to s3_storage() defining Icechunk
1004
+ S3 storage instance.
1005
+ branch : str
1006
+ Branch of the Icechunk repository to read.
1007
+ group : str, optional
1008
+ Group within the Icechunk repository to read. Default is None,
1009
+ which reads from the root of the repository.
1010
+
1011
+ Returns
1012
+ -------
1013
+ xarray.Dataset
1014
+ Dataset read from Item asset.
1015
+ """
1016
+ # Open Zarr store from Icechunk repository:
1017
+ repo = self._open_icechunk_repo(fields)
1018
+ store = repo.readonly_session(branch=branch).store
1019
+ ds = xr.open_zarr(store, consolidated=False, group=group)
1020
+
1021
+ return ds
1022
+
1023
+
1024
+ def _open_zarr_store(
1025
+ self,
1026
+ fields: dict,
1027
+ consolidated: bool = True,
1028
+ group: str | None = None
1029
+ ) -> xr.Dataset:
1030
+ """
1031
+ Open STAC Item Zarr store asset as xarray Dataset.
1032
+
1033
+ Parameters
1034
+ ----------
1035
+ fields : dict
1036
+ Dictionary of arguments to open_zarr() defining URL
1037
+ and version of Zarr store.
1038
+ consolidated : bool, optional
1039
+ Whether to open Zarr store using consolidated metadata capability.
1040
+ Default is True, meaning that consolidated metadata is expected.
1041
+ group : str, optional
1042
+ Group within the Zarr store to read. Default is None,
1043
+ which reads from the root of the store.
1044
+
1045
+ Returns
1046
+ -------
1047
+ xarray.Dataset
1048
+ Dataset read from Item asset.
1049
+ """
1050
+ # Open Item asset Zarr store via URL:
1051
+ url = f"{fields['endpoint_url']}/{fields['bucket']}/{fields['prefix']}"
1052
+ ds = xr.open_zarr(url, zarr_format=int(fields['zarr_format']), consolidated=consolidated, group=group)
1053
+
1054
+ return ds
1055
+
1056
+
1057
+ def open_repo(self,
1058
+ id: str,
1059
+ asset_key: Optional[str] = None
1060
+ ) -> icechunk.Repository:
1061
+ """
1062
+ Open STAC Item asset as an Icechunk Repository.
1063
+
1064
+ Parameters
1065
+ ----------
1066
+ id : str
1067
+ Item ID to open asset.
1068
+ asset_key : str, optional
1069
+ Key of the asset to open. Default is to infer the key from the Item ID.
1070
+
1071
+ Returns
1072
+ -------
1073
+ icechunk.Repository
1074
+ Icechunk Repository for STAC Item asset.
1075
+
1076
+ Raises
1077
+ ------
1078
+ ValueError
1079
+ If the Item ID or asset key is not found in the catalog.
1080
+ ValueError
1081
+ If the asset key is not found in the Item ID.
1082
+ """
1083
+ # -- Validate Inputs -- #
1084
+ if not isinstance(id, str):
1085
+ raise TypeError("'id' must be a string.")
1086
+
1087
+ # -- Collect Item Asset -- #
1088
+ try:
1089
+ item = self._open_item(id=id)
1090
+ except Exception:
1091
+ raise RuntimeError(f"Item ID '{id}' not found in Catalog.")
1092
+
1093
+ # Infer asset key from Item ID if not provided:
1094
+ if asset_key is None:
1095
+ asset_key = list(item.assets.keys())[0]
1096
+ asset = item.assets.get(asset_key)
1097
+ if asset is None:
1098
+ raise ValueError(f"Asset key '{asset_key}' not found in Item ID '{id}'.")
1099
+
1100
+ fields = asset.extra_fields
1101
+
1102
+ # -- Open Icechunk Repository -- #
1103
+ if asset.to_dict()['type'] == "application/vnd.zarr+icechunk":
1104
+ required_fields = ['bucket', 'prefix', 'anonymous', 'endpoint_url']
1105
+ for field in required_fields:
1106
+ if field not in fields:
1107
+ raise ValueError(f"Missing asset field '{field}' in item '{id}'.")
1108
+ repo = self._open_icechunk_repo(fields=fields)
1109
+ else:
1110
+ raise ValueError(f"Item ID '{id}' asset is not an Icechunk repository.")
1111
+
1112
+ return repo
1113
+
1114
+
1115
+ def open_dataset(self,
1116
+ id: str,
1117
+ group: Optional[str] = None,
1118
+ variable_names: Optional[list[str]] = None,
1119
+ start_datetime: Optional[str] = None,
1120
+ end_datetime: Optional[str] = None,
1121
+ bbox: Optional[tuple[float | int, float | int, float | int, float | int]] = None,
1122
+ branch: str = "main",
1123
+ consolidated: bool = True,
1124
+ asset_key: Optional[str] = None
1125
+ ) -> xr.Dataset:
1126
+ """
1127
+ Open STAC Item asset as an xarray Dataset.
1128
+
1129
+ Parameters
1130
+ ----------
1131
+ id : str
1132
+ Item ID to open asset.
1133
+ group : str, optional
1134
+ Group within the Zarr or Icechunk repository to read. Default is None,
1135
+ which reads from the root of the repository.
1136
+ variable_names : list[str], optional
1137
+ List of variable names to be parsed from the dataset.
1138
+ Default is to return all variables.
1139
+ start_datetime : str, optional
1140
+ Start datetime used to subset the dataset. Should be a string
1141
+ in ISO format (e.g., "1976-01-01T00:00:00Z"). Default is to use
1142
+ the Item start_datetime.
1143
+ end_datetime : str, optional
1144
+ End datetime used to subset the dataset. Should be a string
1145
+ in ISO format (e.g., "2024-12-31T00:00:00Z"). Default is to use
1146
+ the Item end_datetime.
1147
+ bbox : tuple[float | int, float | int, float | int, float | int], optional
1148
+ Spatial bounding box used to subset the dataset. Should be a list of four floats
1149
+ representing the bounding box in the format: (min_lon, min_lat, max_lon, max_lat).
1150
+ Default is to use the Item bbox.
1151
+ branch : str, optional
1152
+ Branch of the Icechunk repository to use. Default is to use the "main" branch.
1153
+ consolidated : bool, optional
1154
+ Whether to open Zarr stores using consolidated metadata. Default is True.
1155
+ asset_key : str, optional
1156
+ Key of the asset to open. Default is to infer the key from the Item ID.
1157
+
1158
+ Returns
1159
+ -------
1160
+ xarray.Dataset
1161
+ Dataset read from Item asset.
1162
+
1163
+ Raises
1164
+ ------
1165
+ ValueError
1166
+ If the Item ID or asset key is not found in the catalog.
1167
+ ValueError
1168
+ If the asset key is not found in the Item ID.
1169
+ KeyError
1170
+ If the specified variable(s) are not found in the dataset.
1171
+ """
1172
+ # -- Validate Inputs -- #
1173
+ if not isinstance(id, str):
1174
+ raise TypeError("'id' must be a string.")
1175
+ if group is not None and not isinstance(group, str):
1176
+ raise TypeError("'group' must be a string or None.")
1177
+ if not isinstance(variable_names, (type(None), list)):
1178
+ raise TypeError("'variable_names' must be a list of strings.")
1179
+ if variable_names is not None and not all([isinstance(var, str) for var in variable_names]):
1180
+ raise TypeError("'variable_names' must be a list of strings.")
1181
+ if not isinstance(start_datetime, (type(None), str)):
1182
+ raise TypeError("'start_datetime' must be a string or None.")
1183
+ if not isinstance(end_datetime, (type(None), str)):
1184
+ raise TypeError("'end_datetime' must be a string or None.")
1185
+ if not isinstance(bbox, (type(None), tuple)):
1186
+ raise TypeError("'bbox' must be a tuple or None.")
1187
+ if bbox is not None and (len(bbox) != 4 or not all(isinstance(coord, (float, int)) for coord in bbox)):
1188
+ raise TypeError("'bbox' must be a tuple of the form (min_lon, min_lat, max_lon, max_lat) with float or int values.")
1189
+ if not isinstance(branch, str):
1190
+ raise TypeError("'branch' must be a string.")
1191
+ if not isinstance(consolidated, bool):
1192
+ raise TypeError("'consolidated' must be a boolean.")
1193
+
1194
+ # -- Collect Item Asset -- #
1195
+ try:
1196
+ item = self._open_item(id=id)
1197
+ except Exception:
1198
+ raise RuntimeError(f"Item ID '{id}' not found in Catalog.")
1199
+
1200
+ # Infer asset key from Item ID if not provided:
1201
+ if asset_key is None:
1202
+ asset_key = list(item.assets.keys())[0]
1203
+ asset = item.assets.get(asset_key)
1204
+ if asset is None:
1205
+ raise ValueError(f"Asset key '{asset_key}' not found in Item ID '{id}'.")
1206
+
1207
+ fields = asset.extra_fields
1208
+
1209
+ # Open Icechunk Repository as xarray Dataset:
1210
+ if asset.to_dict()['type'] == "application/vnd.zarr+icechunk":
1211
+ required_fields = ['bucket', 'prefix', 'anonymous', 'endpoint_url']
1212
+ for field in required_fields:
1213
+ if field not in fields:
1214
+ raise ValueError(f"Missing asset field '{field}' in item '{id}'.")
1215
+ ds = self._open_icechunk_store(fields=fields, branch=branch, group=group)
1216
+
1217
+ # Open Zarr store as xarray Dataset:
1218
+ elif asset.to_dict()['type'] == 'application/vnd.zarr':
1219
+ required_fields = ['bucket', 'prefix', 'endpoint_url', 'zarr_format']
1220
+ for field in required_fields:
1221
+ if field not in fields:
1222
+ raise ValueError(f"Missing asset field '{field}' in item '{id}'.")
1223
+ ds = self._open_zarr_store(fields=fields, group=group)
1224
+
1225
+ else:
1226
+ raise ValueError(f"Unsupported media type {asset.to_dict()['type']} for Item asset.")
1227
+
1228
+ # Selecting variables:
1229
+ if variable_names is not None:
1230
+ try:
1231
+ ds = ds[variable_names]
1232
+ except KeyError:
1233
+ raise KeyError("One or more variables not found in dataset.")
1234
+
1235
+ # Spatio-temporal subsetting:
1236
+ if bbox:
1237
+ ds = apply_bbox(ds=ds, bbox=bbox)
1238
+
1239
+ if start_datetime or end_datetime:
1240
+ ds = apply_time_bounds(ds=ds, start_datetime=start_datetime, end_datetime=end_datetime)
1241
+
1242
+ return ds