OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,476 @@
1
+ """
2
+ utils.py
3
+
4
+ Description:
5
+ Utility functions to create the National Oceanography Centre
6
+ (NOC) Spatio-Temporal Access Catalog and write to JSON files.
7
+
8
+ Authors:
9
+ - Ollie Tooth (oliver.tooth@noc.ac.uk)
10
+ """
11
+ # -- Import Python Modules -- #
12
+ import pystac
13
+ import datetime
14
+ import icechunk
15
+ import xarray as xr
16
+ from shapely.geometry import Polygon, mapping
17
+
18
+ # -- I/O Functions -- #
19
+ def open_icechunk_store(
20
+ bucket: str,
21
+ prefix: str,
22
+ branch: str = "main",
23
+ group: str | None = None,
24
+ endpoint_url: str = "https://noc-msm-o.s3-ext.jc.rl.ac.uk",
25
+ ) -> xr.Dataset:
26
+ """
27
+ Open an Icechunk Store as an xarray.Dataset.
28
+
29
+ Parameters
30
+ ----------
31
+ bucket : str
32
+ S3 bucket name where the Icechunk repository is stored.
33
+ prefix : str
34
+ Prefix for the Icechunk repository in the S3 bucket.
35
+ branch : str, optional
36
+ Branch of the Icechunk repository to open (default is "main").
37
+ group : str, optional
38
+ Group within the Icechunk repository to open (default is None).
39
+ endpoint_url : str, optional
40
+ The S3 endpoint URL (default is "https://noc-msm-o.s3-ext.jc.rl.ac.uk").
41
+ """
42
+ # Define S3 storage:
43
+ storage = icechunk.s3_storage(
44
+ bucket=bucket,
45
+ prefix=prefix,
46
+ region="us-east-1",
47
+ anonymous=True,
48
+ endpoint_url=endpoint_url,
49
+ force_path_style=True,
50
+ )
51
+
52
+ # Open Icechunk Repository:
53
+ repo = icechunk.Repository.open(storage=storage)
54
+
55
+ # Open Dataset from Icechunk Store:
56
+ return xr.open_zarr(repo.readonly_session(branch=branch).store, group=group, consolidated=False)
57
+
58
+
59
+ # -- STAC Functions -- #
60
+ def create_item_with_zarr_asset(
61
+ id : str,
62
+ ds: xr.Dataset,
63
+ bucket: str,
64
+ prefix: str,
65
+ title: str,
66
+ dataset_type: str = "model",
67
+ product_type: str = "timeseries",
68
+ product_version: str = "1.0",
69
+ institution: str = "National Oceanography Centre, UK",
70
+ platform: str = "gn",
71
+ horizontal_grid_type: str = "curvilinear",
72
+ horizontal_grid_resolution: str = "1 degree",
73
+ vertical_grid_type: str = "zps",
74
+ vertical_grid_coordinate: str = "depth with partial steps",
75
+ vertical_grid_levels: int = 75,
76
+ operation: str ="annual-mean",
77
+ status: str = "completed",
78
+ update_frequency: str = "None",
79
+ variant: str = "r1i1c1f1",
80
+ ocean_component: str = "NEMO v4.2.2",
81
+ sea_ice_component: str = "SI3 v4.0",
82
+ biogeochemistry_component: str = "None",
83
+ atmosphere_component: str = "None",
84
+ atmospheric_forcing: str = "JRA55-do",
85
+ start_date: str = "1976-01-01",
86
+ end_date: str = "2024-02-01",
87
+ bbox: tuple = (-180.0, -90.0, 180.0, 90.0),
88
+ collection : str = "noc-npd-jra55",
89
+ variable_stores: bool = True,
90
+ endpoint_url: str = "https://noc-msm-o.s3-ext.jc.rl.ac.uk",
91
+ zarr_format: int = 2,
92
+ ) -> pystac.Item:
93
+ """
94
+ Create a STAC Item from a Zarr Store asset.
95
+
96
+ Parameters
97
+ ----------
98
+ id : str
99
+ Unique identifier for the STAC Item.
100
+ ds : xr.Dataset
101
+ Dataset containing the data to be included in the STAC Item.
102
+ bucket : str
103
+ S3 bucket name where the data is stored.
104
+ prefix : str
105
+ Prefix for the data in the S3 bucket (e.g., "U1y", "U1m", etc.).
106
+ title : str
107
+ Title of the dataset.
108
+ dataset_type : str
109
+ Type of dataset (e.g., "model", "observation", etc.).
110
+ product_type : str
111
+ Type of product (e.g., "climatology", "timeseries", etc.).
112
+ product_version : str
113
+ Version of the product.
114
+ institution : str
115
+ Institution responsible for producing the dataset.
116
+ platform : str
117
+ Platform name (e.g., "gn_global", "gr_global", etc.).
118
+ horizontal_grid_type : str
119
+ Type of horizontal grid used in the dataset (e.g., "regular rectilinear", "irregular rectilinear", "curvilinear", etc.).
120
+ horizontal_grid_resolution : str
121
+ Horizontal grid resolution of the dataset (e.g., "1 degreee", "0.25 degree", etc.).
122
+ vertical_grid_type : str
123
+ Type of vertical grid used in the dataset (e.g., "z", "sigma", "hybrid", etc.).
124
+ vertical_grid_coordinate : str
125
+ Type of vertical coordinate used in the dataset (e.g., "depth", "sigma", etc.).
126
+ vertical_grid_levels : int
127
+ Number of vertical levels in the dataset.
128
+ operation : str, optional
129
+ Operation string indicating the type of operation performed on the dataset (default is "annual-mean").
130
+ status : str, optional
131
+ Status of the dataset (e.g., "ongoing", "completed", etc.) (default is "completed").
132
+ update_frequency : str, optional
133
+ Frequency at which the dataset is updated (e.g., "monthly", "biannually", etc.) (default is "None").
134
+ variant : str, optional
135
+ Simulation variant string for the dataset (default is "r1i1c1f1").
136
+ ocean_component : str, optional
137
+ Ocean model component used to produce the dataset (e.g., "NEMO v4.2.2", etc.) (default is "NEMO v4.2.2").
138
+ sea_ice_component : str, optional
139
+ Sea ice model component used to produce the dataset (e.g., "CICE v6.1", etc.) (default is "SI3 v4.0").
140
+ biogeochemistry_component : str, optional
141
+ Biogeochemistry model component used to produce the dataset (e.g., "PISCES v2", etc.) (default is "None").
142
+ atmosphere_component : str, optional
143
+ Atmospheric model component used to produce the dataset (e.g., "UKMO UM Global Atmosphere 7.1", etc.) (default is "None").
144
+ atmospheric_forcing : str, optional
145
+ Atmospheric forcing used to produce the dataset (e.g., "ERA5", "JRA55-do", etc.) (default is "JRA55-do").
146
+ start_date : str, optional
147
+ Start date of the dataset in "YYYY-MM-DD" format (default is "1976-01-01").
148
+ end_date : str, optional
149
+ End date of the dataset in "YYYY-MM-DD" format (default is "2024-12-31").
150
+ bbox : tuple, optional
151
+ Bounding box for the dataset in the format (min_lon, min_lat, max_lon, max_lat).
152
+ (default is global coverage).
153
+ collection : str, optional
154
+ STAC Collection to which this Item belongs (default is "noc-npd-jra55").
155
+ variable_stores : bool, optional
156
+ Whether each variable is stored in a separate Zarr store (default is True).
157
+ endpoint_url : str, optional
158
+ S3 endpoint URL (default is "https://noc-msm-o.s3-ext.jc.rl.ac.uk").
159
+ zarr_format: int, optional
160
+ Zarr format version (default is 2).
161
+
162
+ Returns
163
+ -------
164
+ pystac.Item
165
+ A STAC Item containing the dataset information and an asset pointing to the data.
166
+ """
167
+ # Define the item description based on the prefix:
168
+ var = f"{prefix.split('/')[-1]} output" if variable_stores else "outputs"
169
+
170
+ if 'domain' in prefix:
171
+ description = "**Global ocean model domain and mesh mask variables.**"
172
+ elif 'I' in prefix:
173
+ description = f"**{operation.capitalize()} global sea-ice {var} defined at NEMO model T-points.**"
174
+ elif 'S' in prefix:
175
+ description = f"**{operation.capitalize()} global ocean scalar {var}.**"
176
+ elif 'M' in prefix:
177
+ description = f"**{operation.capitalize()} ocean physics transect {var} defined at {prefix.split('/')[-1]}.**"
178
+ else:
179
+ description = f"**{operation.capitalize()} global ocean physics {var} defined at {prefix[0]}-points.**"
180
+
181
+ # Add OceanDataCatalog Access Information to the description:
182
+ description += f"\n\n**OceanDataCatalog Access:**\n`catalog.open_dataset(id='{id}')`"
183
+
184
+ # Define Polygon geometry for the item:
185
+ polygon = Polygon([
186
+ (bbox[0], bbox[1]), # SW corner
187
+ (bbox[2], bbox[1]), # SE corner
188
+ (bbox[2], bbox[3]), # NE corner
189
+ (bbox[0], bbox[3]), # NW corner
190
+ (bbox[0], bbox[1]) # Closing the polygon back to SW corner
191
+ ])
192
+
193
+ # Convert the Polygon to GeoJSON format:
194
+ geometry = mapping(polygon)
195
+
196
+ # Create a STAC Item with Asset:
197
+ item = pystac.Item(
198
+ id=id,
199
+ geometry=geometry,
200
+ bbox=list(polygon.bounds), # [min_lon, min_lat, max_lon, max_lat]
201
+ datetime=datetime.datetime(year=(int(start_date.split("-")[0]) + int(end_date.split("-")[0])) // 2, month=1, day=1),
202
+ start_datetime=datetime.datetime(year=int(start_date.split("-")[0]), month=int(start_date.split("-")[1]), day=int(start_date.split("-")[2])),
203
+ end_datetime=datetime.datetime(year=int(end_date.split("-")[0]), month=int(end_date.split("-")[1]), day=int(end_date.split("-")[2])),
204
+ properties={
205
+ "title": title,
206
+ "description": description,
207
+ "dataset_type": dataset_type,
208
+ "product_type": product_type,
209
+ "product_version": product_version,
210
+ "institution": institution,
211
+ "platform": platform,
212
+ "horizontal_grid_type": horizontal_grid_type,
213
+ "horizontal_grid_resolution": horizontal_grid_resolution,
214
+ "vertical_grid_type": vertical_grid_type,
215
+ "vertical_grid_coordinate": vertical_grid_coordinate,
216
+ "vertical_grid_levels": vertical_grid_levels,
217
+ "dimensions": list(ds.dims),
218
+ "variables": list(ds.data_vars),
219
+ "variable_standard_names": [ds[var].attrs.get('standard_name', var) for var in ds.data_vars],
220
+ "aggregation": operation.split()[1].lower(),
221
+ "aggregation_frequency": operation.split()[0].lower(),
222
+ "status": status,
223
+ "update_frequency": update_frequency,
224
+ "latest_data_update": datetime.datetime.now().isoformat(),
225
+ "variant": variant,
226
+ "ocean_component": ocean_component,
227
+ "sea_ice_component": sea_ice_component,
228
+ "biogeochemistry_component": biogeochemistry_component,
229
+ "atmosphere_component": atmosphere_component,
230
+ "atmospheric_forcing": atmospheric_forcing,
231
+ },
232
+ collection=collection,
233
+ )
234
+
235
+ item.add_asset(key=prefix.split('/')[-1], asset=pystac.Asset(
236
+ href=f"https://noc-msm-o.s3-ext.jc.rl.ac.uk/{bucket}/{prefix}",
237
+ title=title,
238
+ description=description,
239
+ media_type="application/vnd.zarr",
240
+ extra_fields=dict(
241
+ endpoint_url=endpoint_url,
242
+ bucket=bucket,
243
+ prefix=prefix,
244
+ zarr_format=zarr_format,
245
+ anonymous=True
246
+ )
247
+ ))
248
+
249
+ return item
250
+
251
+
252
+ def create_item_with_icechunk_asset(
253
+ ds: xr.Dataset,
254
+ id: str,
255
+ bucket: str,
256
+ prefix: str,
257
+ title: str | None = None,
258
+ description: str | None = None,
259
+ dataset_type: str | None = None,
260
+ product_type: str | None = None,
261
+ product_version: str | None = None,
262
+ institution: str | None = None,
263
+ citation: str | None = None,
264
+ acknowledgement: str | None = None,
265
+ license: str | None = None,
266
+ doi: str | None = None,
267
+ platform: str | None = None,
268
+ horizontal_grid_type: str | None = None,
269
+ horizontal_grid_resolution: str | None = None,
270
+ vertical_grid_type: str | None = None,
271
+ vertical_grid_coordinate: str | None = None,
272
+ vertical_grid_levels: int | None = None,
273
+ aggregation: str | None = None,
274
+ aggregation_frequency: str | None = None,
275
+ status: str | None = None,
276
+ update_frequency: str | None = None,
277
+ ocean_component: str | None = None,
278
+ sea_ice_component: str | None = None,
279
+ biogeochemistry_component: str | None = None,
280
+ atmosphere_component: str | None = None,
281
+ atmospheric_forcing: str | None = None,
282
+ variant: str | None = None,
283
+ start_date: str | None = None,
284
+ end_date: str | None = None,
285
+ bbox: tuple | None = None,
286
+ collection: str = "noc-npd-era5",
287
+ endpoint_url: str = "https://noc-msm-o.s3-ext.jc.rl.ac.uk",
288
+ group: str | None = None,
289
+ anonymous: bool = True,
290
+ ) -> pystac.Item:
291
+ """
292
+ Create a STAC Item from an Icechunk Store.
293
+
294
+ Parameters
295
+ ----------
296
+ ds : xr.Dataset
297
+ Dataset to be included in the STAC Item.
298
+ id : str
299
+ Unique identifier for the STAC Item.
300
+ bucket : str
301
+ S3 bucket name where the dataset is stored.
302
+ prefix : str
303
+ Prefix for the dataset in the S3 bucket (e.g., "U1y", "U1m", etc.).
304
+ title : str, optional
305
+ Title of the dataset (default is None, which will use the "title" attribute from the dataset if available).
306
+ description : str, optional
307
+ Description of the dataset (default is None, which will use the "description" attribute from the dataset if available).
308
+ dataset_type : str, optional
309
+ Type of dataset (e.g., "model", "observation", etc.) (default is None, which will use the "dataset_type" attribute from the dataset if available).
310
+ product_type : str, optional
311
+ Type of product (e.g., "climatology", "timeseries", etc.) (default is None, which will use the "product_type" attribute from the dataset if available).
312
+ product_version : str, optional
313
+ Version of the product (default is None, which will use the "product_version" attribute from the dataset if available).
314
+ institution : str, optional
315
+ Institution responsible for producing the dataset (default is None, which will use the "institution" attribute from the dataset if available).
316
+ citation : str, optional
317
+ Citation for the dataset (default is None, which will use the "citation" attribute from the dataset if available).
318
+ acknowledgement : str, optional
319
+ Acknowledgement for the dataset (default is None, which will use the "acknowledgement" attribute from the dataset if available).
320
+ license : str, optional
321
+ License for the dataset (default is None, which will use the "license" attribute from the dataset if available).
322
+ doi : str, optional
323
+ Digital Object Identifier (DOI) for the dataset (default is None, which will use the "doi" attribute from the dataset if available).
324
+ platform : str, optional
325
+ Platform string (e.g., "gn", "gr", "tn", etc.) (default is None, which will use the "platform" attribute from the dataset if available).
326
+ horizontal_grid_type : str, optional
327
+ Type of horizontal grid used in the dataset (e.g., "regular rectilinear", "irregular rectilinear", "curvilinear", etc.) (default is None, which will use the "horizontal_grid_type" attribute from the dataset if available).
328
+ horizontal_grid_resolution : str, optional
329
+ Horizontal resolution of the dataset (e.g., "1 degree", "0.25 degree", etc.) (default is None, which will use the "horizontal_grid_resolution" attribute from the dataset if available).
330
+ vertical_grid_type : str, optional
331
+ Type of vertical grid used in the dataset (e.g., "z", "sigma", "hybrid", etc.) (default is None, which will use the "vertical_grid_type" attribute from the dataset if available).
332
+ vertical_grid_coordinate : str, optional
333
+ Type of vertical coordinate used in the dataset (e.g., "depth", "sigma", etc.) (default is None, which will use the "vertical_grid_coordinate" attribute from the dataset if available).
334
+ vertical_grid_levels : int, optional
335
+ Number of vertical levels in the dataset (default is None, which will use the "vertical_grid_levels" attribute from the dataset if available).
336
+ aggregation : str, optional
337
+ Type of aggregation used to produce the dataset (e.g., "mean", "max", etc.) (default is None, which will use the "aggregation" attribute from the dataset if available).
338
+ aggregation_frequency : str, optional
339
+ Frequency at which the aggregation is applied (e.g., "monthly", "biannually", etc.) (default is None, which will use the "aggregation_frequency" attribute from the dataset if available).
340
+ status : str, optional
341
+ Status of the dataset (e.g., "ongoing", "completed", etc.) (default is None, which will use the "status" attribute from the dataset if available).
342
+ update_frequency : str, optional
343
+ Frequency at which the dataset is updated (e.g., "monthly", "biannually", etc.) (default is None, which will use the "update_frequency" attribute from the dataset if available).
344
+ ocean_component : str, optional
345
+ Ocean model component used to produce the dataset (e.g., "NEMO v4.2.2", etc.) (default is None, which will use the "ocean_component" attribute from the dataset if available).
346
+ sea_ice_component : str, optional
347
+ Sea ice model component used to produce the dataset (e.g., "CICE v6.1", etc.) (default is None, which will use the "sea_ice_component" attribute from the dataset if available).
348
+ biogeochemistry_component : str, optional
349
+ Biogeochemistry model component used to produce the dataset (e.g., "PISCES v2", etc.) (default is None, which will use the "biogeochemistry_component" attribute from the dataset if available).
350
+ atmosphere_component : str, optional
351
+ Atmospheric model component used to produce the dataset (e.g., "UKMO UM Global Atmosphere 7.1", etc.) (default is None, which will use the "atmosphere_component" attribute from the dataset if available).
352
+ atmospheric_forcing : str, optional
353
+ Atmospheric forcing used to produce the dataset (e.g., "ERA5", "JRA-55", etc.) (default is None, which will use the "atmospheric_forcing" attribute from the dataset if available).
354
+ variant : str, optional
355
+ Configuration variant string for the dataset (default is "r1i1c1f1").
356
+ start_date : str, optional
357
+ Start date of the dataset in "YYYY-MM-DD" format (default is "1976-01-01").
358
+ end_date : str, optional
359
+ End date of the dataset in "YYYY-MM-DD" format (default is "2024-12-31").
360
+ bbox : tuple, optional
361
+ Bounding box for the dataset in the format (min_lon, min_lat, max_lon, max_lat).
362
+ (default is global coverage).
363
+ collection : str, optional
364
+ Collection to which this Item belongs (default is "noc-npd-era5").
365
+ endpoint_url : str, optional
366
+ The S3 endpoint URL (default is "https://noc-msm-o.s3-ext.jc.rl.ac.uk").
367
+ group : str, optional
368
+ Group within the Icechunk repository to open (default is None).
369
+ anonymous : bool, optional
370
+ Whether anonymous access is supported for the S3 asset (default is True).
371
+
372
+ Returns
373
+ -------
374
+ pystac.Item
375
+ STAC Item containing the dataset metadata and associated dataset asset.
376
+ """
377
+ # === Geometry === #
378
+ # Collect bounding box from dataset attributes if not provided:
379
+ bbox = ds.attrs.get("bbox", "[-180.0, -90.0, 180.0, 90.0]") if bbox is None else bbox
380
+ bbox = [float(bound) for bound in bbox.replace("[", "").replace("]", "").split(",")]
381
+
382
+ # Define Polygon geometry for the item:
383
+ polygon = Polygon([
384
+ (bbox[0], bbox[1]), # SW corner
385
+ (bbox[2], bbox[1]), # SE corner
386
+ (bbox[2], bbox[3]), # NE corner
387
+ (bbox[0], bbox[3]), # NW corner
388
+ (bbox[0], bbox[1]) # Closing the polygon back to SW corner
389
+ ])
390
+
391
+ # Convert the Polygon to GeoJSON format:
392
+ geometry = mapping(polygon)
393
+
394
+ # === Properties === #
395
+ # Add OceanDataCatalog Access Information to description:
396
+ if description is None:
397
+ description = ds.attrs.get("description", "")
398
+ description += f"\n\n**OceanDataCatalog Access:**\n`catalog.open_dataset(id='{id}')`"
399
+
400
+ # Define start and end datetimes for the Item:
401
+ if start_date is None:
402
+ start_date = ds.attrs.get("start_date", None)
403
+ if start_date is None:
404
+ raise ValueError("'start_date' must be provided either as a parameter or as a global dataset attribute.")
405
+ if end_date is None:
406
+ end_date = ds.attrs.get("end_date", None)
407
+ if end_date is None:
408
+ raise ValueError("'end_date' must be provided either as a parameter or as a global dataset attribute.")
409
+
410
+ # Define standard properties dictionary:
411
+ properties={
412
+ "title": ds.attrs.get("title", None) if title is None else title,
413
+ "description": description,
414
+ "dataset_type": ds.attrs.get("dataset_type", None) if dataset_type is None else dataset_type,
415
+ "product_type": ds.attrs.get("product_type", None) if product_type is None else product_type,
416
+ "product_version": ds.attrs.get("product_version", None) if product_version is None else product_version,
417
+ "institution": ds.attrs.get("institution", None) if institution is None else institution,
418
+ "citation": ds.attrs.get("citation", None) if citation is None else citation,
419
+ "acknowledgement": ds.attrs.get("acknowledgement", None) if acknowledgement is None else acknowledgement,
420
+ "license": ds.attrs.get("license", None) if license is None else license,
421
+ "doi": ds.attrs.get("doi", None) if doi is None else doi,
422
+ "platform": ds.attrs.get("platform", None) if platform is None else platform,
423
+ "horizontal_grid_type": ds.attrs.get("horizontal_grid_type", None) if horizontal_grid_type is None else horizontal_grid_type,
424
+ "horizontal_grid_resolution": ds.attrs.get("horizontal_grid_resolution", None) if horizontal_grid_resolution is None else horizontal_grid_resolution,
425
+ "vertical_grid_type": ds.attrs.get("vertical_grid_type", None) if vertical_grid_type is None else vertical_grid_type,
426
+ "vertical_grid_coordinate": ds.attrs.get("vertical_grid_coordinate", None) if vertical_grid_coordinate is None else vertical_grid_coordinate,
427
+ "vertical_grid_levels": ds.attrs.get("vertical_grid_levels", None) if vertical_grid_levels is None else vertical_grid_levels,
428
+ "dimensions": list(ds.dims),
429
+ "variables": list(ds.data_vars),
430
+ "variable_standard_names": [ds[var].attrs.get('standard_name', var) for var in ds.data_vars],
431
+ "aggregation": ds.attrs.get("aggregation", None) if aggregation is None else aggregation,
432
+ "aggregation_frequency": ds.attrs.get("aggregation_frequency", None) if aggregation_frequency is None else aggregation_frequency,
433
+ "status": ds.attrs.get("status", None) if status is None else status,
434
+ "update_frequency": ds.attrs.get("update_frequency", None) if update_frequency is None else update_frequency,
435
+ "latest_data_update": datetime.datetime.now().isoformat(),
436
+ }
437
+
438
+ if properties["dataset_type"] == "model":
439
+ # Append numerical model specific properties:
440
+ properties.update({
441
+ "variant": ds.attrs.get("variant", None) if variant is None else variant,
442
+ "ocean_component": ds.attrs.get("ocean_component", None) if ocean_component is None else ocean_component,
443
+ "sea_ice_component": ds.attrs.get("sea_ice_component", None) if sea_ice_component is None else sea_ice_component,
444
+ "biogeochemistry_component": ds.attrs.get("biogeochemistry_component", None) if biogeochemistry_component is None else biogeochemistry_component,
445
+ "atmosphere_component": ds.attrs.get("atmosphere_component", None) if atmosphere_component is None else atmosphere_component,
446
+ "atmospheric_forcing": ds.attrs.get("atmospheric_forcing", None) if atmospheric_forcing is None else atmospheric_forcing,
447
+ })
448
+
449
+ # === Create a STAC Item with Asset === #
450
+ item = pystac.Item(
451
+ id=id,
452
+ geometry=geometry,
453
+ bbox=list(polygon.bounds),
454
+ datetime=datetime.datetime(year=(int(start_date.split("-")[0]) + int(end_date.split("-")[0])) // 2, month=1, day=1),
455
+ start_datetime=datetime.datetime(year=int(start_date.split("-")[0]), month=int(start_date.split("-")[1]), day=int(start_date.split("-")[2])),
456
+ end_datetime=datetime.datetime(year=int(end_date.split("-")[0]), month=int(end_date.split("-")[1]), day=int(end_date.split("-")[2])),
457
+ properties=properties,
458
+ collection=collection,
459
+ )
460
+
461
+ item.add_asset(key=prefix.split('/')[-1], asset=pystac.Asset(
462
+ href=f"{endpoint_url}/{bucket}/{prefix}",
463
+ title=ds.attrs.get("title", None) if title is None else title,
464
+ description=description,
465
+ media_type="application/vnd.zarr+icechunk",
466
+ extra_fields=dict(
467
+ endpoint_url=endpoint_url,
468
+ bucket=bucket,
469
+ prefix=prefix,
470
+ variant=ds.attrs.get("variant", None) if variant is None else variant,
471
+ group=group,
472
+ anonymous=anonymous
473
+ )
474
+ ))
475
+
476
+ return item
@@ -0,0 +1,34 @@
1
+ # ===================================================================
2
+ # Copyright 2026 National Oceanography Centre
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0.
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
11
+ # implied. See the License for the specific language governing
12
+ # permissions and limitations under the License.
13
+ # ===================================================================
14
+ """
15
+ OceanDataStore: CLI Sub-package
16
+
17
+ An open-source Python library to streamline writing, updating and accessing
18
+ ocean data stored in cloud object storage.
19
+ """
20
+ __author__ = "Ollie Tooth, Joao Morado, Tobias Ferreira"
21
+ __credits__ = "National Oceanography Centre (NOC), Southampton, UK"
22
+
23
+ from OceanDataStore.cli.icechunk import (
24
+ send_to_icechunk,
25
+ update_icechunk,
26
+ )
27
+ from OceanDataStore.cli.logging import initialise_logging
28
+ from OceanDataStore.cli.utils import ObjectStoreS3, list_objects
29
+ from OceanDataStore.cli.zarr import (
30
+ send_to_zarr,
31
+ update_zarr,
32
+ )
33
+
34
+ __all__ = ("initialise_logging", "send_to_zarr", "send_to_icechunk", "update_zarr", "update_icechunk", "list_objects", "ObjectStoreS3")