OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,506 @@
1
+ """
2
+ utils.py
3
+
4
+ Description: Utility functions for processing gridded ocean data.
5
+
6
+ Contact: Ollie Tooth (oliver.tooth@noc.ac.uk)
7
+ """
8
+ # == Import Python packages == #
9
+ import json
10
+
11
+ import icechunk
12
+ import numpy as np
13
+ import xarray as xr
14
+ import zarr
15
+
16
+
17
+ # == Utility Functions == #
18
+ def compute_gc_distance(
19
+ lat1: xr.DataArray,
20
+ lon1: xr.DataArray,
21
+ lat2: xr.DataArray,
22
+ lon2: xr.DataArray
23
+ ) -> xr.DataArray:
24
+ """
25
+ Calculate the Great-Circle distance between two sets of
26
+ geographical points on the Earth's surface.
27
+
28
+ Parameters:
29
+ -----------
30
+ lat1 : xr.DataArray
31
+ Latitude of the first set of points (degrees).
32
+ lon1 : xr.DataArray
33
+ Longitude of the first set of points (degrees).
34
+ lat2 : xr.DataArray
35
+ Latitude of the second set of points (degrees).
36
+ lon2 : xr.DataArray
37
+ Longitude of the second set of points (degrees).
38
+
39
+ Returns:
40
+ --------
41
+ dist : xr.DataArray
42
+ Great-circle distance between the two sets
43
+ of points (meters).
44
+
45
+ """
46
+ # Define the radius of the Earth in meters:
47
+ re = 6371000
48
+
49
+ # Convert latitudes and longitudes from degrees to radians:
50
+ lon1, lat1, lon2, lat2 = map(np.deg2rad, [lon1, lat1, lon2, lat2])
51
+ dlat = lat2 - lat1
52
+ dlon = lon2 - lon1
53
+
54
+ # Calculate the great-circle distance between points:
55
+ dist = (2*re*np.arcsin(np.sqrt(
56
+ np.sin(dlat/2)**2 +
57
+ (np.cos(lat1) *
58
+ np.cos(lat2) *
59
+ np.sin(dlon/2)**2)
60
+ )))
61
+
62
+ return dist
63
+
64
+
65
+ def compute_dx(
66
+ ds: xr.Dataset,
67
+ ) -> xr.DataArray:
68
+ """
69
+ Calculate zonal length of each grid cell in meters.
70
+
71
+ The length is calculated using the latitude and longitude coordinates
72
+ of the input dataset assuming a uniform regular grid.
73
+
74
+ Parameters:
75
+ -----------
76
+ ds : xr.Dataset
77
+ Input dataset containing 'latitude' and 'longitude' coordinates.
78
+
79
+ Returns:
80
+ --------
81
+ xr.DataArray
82
+ DataArray representing the zonal length of each grid cell.
83
+ """
84
+ # -- Validate Input -- #
85
+ if not isinstance(ds, xr.Dataset):
86
+ raise TypeError("Input must be an xarray Dataset.")
87
+ if 'latitude' not in ds.coords or 'longitude' not in ds.coords:
88
+ raise ValueError("Input dataset must contain 'latitude' and 'longitude' coordinates.")
89
+
90
+ # -- Calculate Grid Cell Length -- #
91
+ # Infer horizontal resolution for uniform grid:
92
+ dlon = ds['longitude'].diff(dim="longitude").mean().values
93
+
94
+ if (ds['longitude'].ndim == 1) and (ds['latitude'].ndim == 1):
95
+ # Define 2-dimensional longitude and latitude arrays for grid cell centers:
96
+ lon = np.repeat(ds['longitude'].values[np.newaxis, :], len(ds['latitude']), axis=0)
97
+ lat = np.repeat(ds['latitude'].values[:, np.newaxis], len(ds['longitude']), axis=1)
98
+ else:
99
+ # Use existing 2-dimensional longitude and latitude arrays:
100
+ lon = ds['longitude'].values
101
+ lat = ds['latitude'].values
102
+
103
+ # Calculate zonal and meridional grid cell dimensions:
104
+ dx = compute_gc_distance(lon1=lon - dlon / 2, lat1=lat, lon2=lon + dlon / 2, lat2=lat)
105
+
106
+ # Define dx DataArray with CF-compliant metadata:
107
+ dx = xr.DataArray(
108
+ data=dx,
109
+ dims=('latitude', 'longitude'),
110
+ coords={'latitude': ds['latitude'], 'longitude': ds['longitude']},
111
+ name='dx',
112
+ attrs={
113
+ 'long_name': 'Grid-Cell Zonal Length',
114
+ 'standard_name': 'cell_x_length',
115
+ 'units': 'm',
116
+ },
117
+ )
118
+
119
+ return dx
120
+
121
+
122
+ def compute_dy(
123
+ ds: xr.Dataset,
124
+ ) -> xr.DataArray:
125
+ """
126
+ Calculate meridional length of each grid cell in meters.
127
+
128
+ The length is calculated using the latitude and longitude coordinates
129
+ of the input dataset assuming a uniform regular grid.
130
+
131
+ Parameters:
132
+ -----------
133
+ ds : xr.Dataset
134
+ Input dataset containing 'latitude' and 'longitude' coordinates.
135
+
136
+ Returns:
137
+ --------
138
+ xr.DataArray
139
+ DataArray representing the meridional length of each grid cell.
140
+ """
141
+ # -- Validate Input -- #
142
+ if not isinstance(ds, xr.Dataset):
143
+ raise TypeError("Input must be an xarray Dataset.")
144
+ if 'latitude' not in ds.coords or 'longitude' not in ds.coords:
145
+ raise ValueError("Input dataset must contain 'latitude' and 'longitude' coordinates.")
146
+
147
+ # -- Calculate Grid Cell Length -- #
148
+ # Infer horizontal resolution for uniform grid:
149
+ dlat = ds['latitude'].diff(dim="latitude").mean().values
150
+
151
+ if (ds['longitude'].ndim == 1) and (ds['latitude'].ndim == 1):
152
+ # Define 2-dimensional longitude and latitude arrays for grid cell centers:
153
+ lon = np.repeat(ds['longitude'].values[np.newaxis, :], len(ds['latitude']), axis=0)
154
+ lat = np.repeat(ds['latitude'].values[:, np.newaxis], len(ds['longitude']), axis=1)
155
+ else:
156
+ # Use existing 2-dimensional longitude and latitude arrays:
157
+ lon = ds['longitude'].values
158
+ lat = ds['latitude'].values
159
+
160
+ # Calculate zonal and meridional grid cell dimensions:
161
+ dy = compute_gc_distance(lon1=lon, lat1=lat - dlat / 2, lon2=lon, lat2=lat + dlat / 2)
162
+
163
+ # Define dy DataArray with CF-compliant metadata:
164
+ dy = xr.DataArray(
165
+ data=dy,
166
+ dims=('latitude', 'longitude'),
167
+ coords={'latitude': ds['latitude'], 'longitude': ds['longitude']},
168
+ name='dy',
169
+ attrs={
170
+ 'long_name': 'Grid-Cell Meridional Length',
171
+ 'standard_name': 'cell_y_length',
172
+ 'units': 'm',
173
+ },
174
+ )
175
+
176
+ return dy
177
+
178
+
179
+ def compute_cell_area(
180
+ ds: xr.Dataset,
181
+ ) -> xr.DataArray:
182
+ """
183
+ Calculate horizontal area of each grid cell in square meters.
184
+
185
+ The area is calculated using the latitude and longitude coordinates
186
+ of the input dataset assuming a uniform regular grid.
187
+
188
+ Parameters:
189
+ -----------
190
+ ds : xr.Dataset
191
+ Input dataset containing 'latitude' and 'longitude' coordinates.
192
+
193
+ Returns:
194
+ --------
195
+ xr.DataArray
196
+ DataArray representing the horizontal area of each grid cell.
197
+ """
198
+ # -- Validate Input -- #
199
+ if not isinstance(ds, xr.Dataset):
200
+ raise TypeError("Input must be an xarray Dataset.")
201
+ if 'latitude' not in ds.coords or 'longitude' not in ds.coords:
202
+ raise ValueError("Input dataset must contain 'latitude' and 'longitude' coordinates.")
203
+
204
+ # -- Calculate Grid Cell Area -- #
205
+ # Infer horizontal resolution for uniform grid:
206
+ dlon = ds['longitude'].diff(dim="longitude").mean().values
207
+ dlat = ds['latitude'].diff(dim="latitude").mean().values
208
+
209
+ if (ds['longitude'].ndim == 1) and (ds['latitude'].ndim == 1):
210
+ # Define 2-dimensional longitude and latitude arrays for grid cell centers:
211
+ lon = np.repeat(ds['longitude'].values[np.newaxis, :], len(ds['latitude']), axis=0)
212
+ lat = np.repeat(ds['latitude'].values[:, np.newaxis], len(ds['longitude']), axis=1)
213
+ else:
214
+ # Use existing 2-dimensional longitude and latitude arrays:
215
+ lon = ds['longitude'].values
216
+ lat = ds['latitude'].values
217
+
218
+ # Calculate zonal and meridional grid cell dimensions:
219
+ dx = compute_gc_distance(lon1=lon - dlon / 2, lat1=lat, lon2=lon + dlon / 2, lat2=lat)
220
+ dy = compute_gc_distance(lon1=lon, lat1=lat - dlat / 2, lon2=lon, lat2=lat + dlat / 2)
221
+
222
+ # Define cell_area DataArray with CF-compliant metadata:
223
+ cell_area = xr.DataArray(
224
+ data=dx*dy,
225
+ dims=('latitude', 'longitude'),
226
+ coords={'latitude': ds['latitude'], 'longitude': ds['longitude']},
227
+ name='cell_area',
228
+ attrs={
229
+ 'long_name': 'Grid-Cell Area',
230
+ 'standard_name': 'cell_area',
231
+ 'units': 'm2',
232
+ },
233
+ )
234
+
235
+ return cell_area
236
+
237
+
238
+ def compute_cell_thickness(
239
+ ds: xr.Dataset,
240
+ ) -> xr.DataArray:
241
+ """
242
+ Calculate vertical thickness of each grid cell in meters.
243
+
244
+ Cell thickness is calculated using the depth coordinates of the input dataset assuming a regular grid in the vertical dimension.
245
+
246
+ Parameters:
247
+ -----------
248
+ ds : xr.Dataset
249
+ Input dataset containing 'depth' coordinates.
250
+
251
+ Returns:
252
+ --------
253
+ xr.DataArray
254
+ Vertical thickness of each grid cell.
255
+ """
256
+ # -- Validate Input -- #
257
+ if not isinstance(ds, xr.Dataset):
258
+ raise TypeError("Input must be an xarray Dataset.")
259
+ if 'depth' not in ds.coords:
260
+ raise ValueError("Input dataset must contain 'depth' coordinates.")
261
+ depth = ds['depth'].data
262
+
263
+ # Check that depth is 1-dimensional:
264
+ if depth.ndim != 1:
265
+ raise ValueError("Input depth DataArray must be 1-dimensional.")
266
+
267
+ # Find interfaces between vertical levels:
268
+ interfaces = 0.5 * (depth[:-1] + depth[1:])
269
+ # Use sea surface as top boundary:
270
+ top = 0.0
271
+ # Extrapolate bottom boundary:
272
+ bottom = depth[-1] + (depth[-1] - interfaces[-1])
273
+ edges = np.concatenate([[top], interfaces, [bottom]])
274
+
275
+ # Define cell_thickness DataArray with CF-compliant metadata:
276
+ cell_thickness = xr.DataArray(
277
+ data=np.diff(edges),
278
+ dims=('depth',),
279
+ coords={'depth': depth},
280
+ name='cell_thickness',
281
+ attrs={
282
+ 'long_name': 'Grid-Cell Thickness',
283
+ 'standard_name': 'cell_thickness',
284
+ 'units': 'm',
285
+ },
286
+ )
287
+
288
+ return cell_thickness
289
+
290
+ def compute_land_sea_mask(
291
+ da: xr.DataArray,
292
+ ) -> xr.DataArray:
293
+ """
294
+ Calculate land-sea mask from a variable DataArray.
295
+
296
+ The resulting mask is defined as follows:
297
+ * 1 -> ocean grid point
298
+ * 0 -> land grid point
299
+
300
+ Parameters:
301
+ -----------
302
+ da : xr.DataArray
303
+ Input variable DataArray containing NaN values on land points.
304
+
305
+ Returns:
306
+ --------
307
+ xr.DataArray
308
+ Land-sea mask.
309
+ """
310
+ # -- Validate Input -- #
311
+ if not isinstance(da, xr.DataArray):
312
+ raise TypeError("Input must be an xarray DataArray.")
313
+ if da.ndim != 2:
314
+ raise ValueError("Input DataArray must be 2-dimensional.")
315
+
316
+ # -- Calculate Land-Sea Mask -- #
317
+ # Define land-sea mask:
318
+ mask = xr.where(np.isnan(da), 0, 1)
319
+
320
+ # Add CF-compliant metadata to the mask:
321
+ mask.attrs['long_name'] = "Land-Sea Binary Mask"
322
+ mask.attrs['standard_name'] = "sea_binary_mask"
323
+ mask.attrs['comment'] = " 1 = sea, 0 = land"
324
+
325
+ return mask
326
+
327
+
328
+ def update_icechunk_global_attrs(
329
+ credentials_filepath: str,
330
+ bucket: str,
331
+ prefix: str,
332
+ attrs: dict,
333
+ commit_message: str,
334
+ branch: str='main',
335
+ region: str='us-east-1',
336
+ force_path_style: bool=True,
337
+ ) -> str:
338
+ """
339
+ Update global attributes of existing Icechunk store via a new
340
+ commit.
341
+
342
+ Expects Icechunk S3 storage at a custom endpoint (e.g., JASMIN OS).
343
+
344
+ Parameters:
345
+ -----------
346
+ credentials_filepath : str
347
+ Filepath to JSON file containing Icechunk S3 storage credentials.
348
+ bucket : str
349
+ Name of the S3 bucket where the Icechunk store is located.
350
+ prefix : str
351
+ Prefix (path) within the S3 bucket where the Icechunk store is located.
352
+ attrs : dict
353
+ Dictionary of global attributes to update in the root group of the Icechunk store.
354
+ commit_message : str
355
+ Commit message describing the update to the Icechunk store.
356
+ branch : str, optional
357
+ Branch of the Icechunk repository to update (default: 'main').
358
+ region : str, optional
359
+ AWS region where the S3 bucket is located (default: 'us-east-1').
360
+ force_path_style : bool, optional
361
+ Whether to force path-style access for S3 (default: True).
362
+
363
+ Returns:
364
+ --------
365
+ str
366
+ Snapshot ID of new commit.
367
+ """
368
+ # -- Validate Input -- #
369
+ if not isinstance(credentials_filepath, str):
370
+ raise TypeError("credentials_filepath must be a string.")
371
+ if not isinstance(bucket, str):
372
+ raise TypeError("bucket must be a string.")
373
+ if not isinstance(prefix, str):
374
+ raise TypeError("prefix must be a string.")
375
+ if not isinstance(attrs, dict):
376
+ raise TypeError("attributes must be a dictionary.")
377
+ if not isinstance(commit_message, str):
378
+ raise TypeError("commit_message must be a string.")
379
+ if not isinstance(branch, str):
380
+ raise TypeError("branch must be a string.")
381
+ if not isinstance(region, str):
382
+ raise TypeError("region must be a string.")
383
+ if not isinstance(force_path_style, bool):
384
+ raise TypeError("force_path_style must be a boolean.")
385
+
386
+ # -- Update Icechunk Global Attributes -- #
387
+ # Load Icechunk S3 storage credentials from JSON file:
388
+ store_credentials = json.load(open(credentials_filepath, 'r'))
389
+
390
+ # Define Icechunk storage:
391
+ storage = icechunk.s3_storage(
392
+ bucket=bucket,
393
+ prefix=prefix,
394
+ region=region,
395
+ access_key_id=store_credentials['token'],
396
+ secret_access_key=store_credentials['secret'],
397
+ endpoint_url=store_credentials['endpoint_url'],
398
+ force_path_style=force_path_style,
399
+ )
400
+
401
+ # Open Icechunk repository & start read-only session on main branch:
402
+ repo = icechunk.Repository.open(storage=storage)
403
+ print(f"Opened Icechunk repository at s3://{bucket}/{prefix} on branch '{branch}'")
404
+
405
+ # Open a writable session on root group:
406
+ session = repo.writable_session(branch=branch)
407
+ root = zarr.open_group(session.store)
408
+ # Update global attributes & commit changes to repo:
409
+ root.attrs.update(attrs)
410
+ print(f"Updated global attributes via new commit on branch '{branch}' with commit message -> '{commit_message}'")
411
+
412
+ return session.commit(message=commit_message)
413
+
414
+
415
+ def update_icechunk_variable_attrs(
416
+ credentials_filepath: str,
417
+ bucket: str,
418
+ prefix: str,
419
+ vars: list[str],
420
+ attrs: list[dict],
421
+ commit_message: str,
422
+ branch: str='main',
423
+ region: str='us-east-1',
424
+ force_path_style: bool=True,
425
+ ) -> str:
426
+ """
427
+ Update variable attributes of existing Icechunk store via a new
428
+ commit.
429
+
430
+ Expects Icechunk S3 storage at a custom endpoint (e.g., JASMIN OS).
431
+
432
+ Parameters:
433
+ -----------
434
+ credentials_filepath : str
435
+ Filepath to JSON file containing Icechunk S3 storage credentials.
436
+ bucket : str
437
+ Name of the S3 bucket where the Icechunk store is located.
438
+ prefix : str
439
+ Prefix (path) within the S3 bucket where the Icechunk store is located.
440
+ vars : list[str]
441
+ List of variable names whose attributes are to be updated.
442
+ attrs : list[dict]
443
+ List of dictionaries containing attributes to update for each variable.
444
+ commit_message : str
445
+ Commit message describing the update to the Icechunk store.
446
+ branch : str, optional
447
+ Branch of the Icechunk repository to update (default: 'main').
448
+ region : str, optional
449
+ AWS region where the S3 bucket is located (default: 'us-east-1').
450
+ force_path_style : bool, optional
451
+ Whether to force path-style access for S3 (default: True).
452
+
453
+ Returns:
454
+ --------
455
+ str
456
+ Snapshot ID of new commit.
457
+ """
458
+ # -- Validate Input -- #
459
+ if not isinstance(credentials_filepath, str):
460
+ raise TypeError("credentials_filepath must be a string.")
461
+ if not isinstance(bucket, str):
462
+ raise TypeError("bucket must be a string.")
463
+ if not isinstance(prefix, str):
464
+ raise TypeError("prefix must be a string.")
465
+ if not isinstance(vars, list):
466
+ raise TypeError("vars must be a list.")
467
+ if not isinstance(attrs, list):
468
+ raise TypeError("attributes must be a list.")
469
+ if not isinstance(commit_message, str):
470
+ raise TypeError("commit_message must be a string.")
471
+ if not isinstance(branch, str):
472
+ raise TypeError("branch must be a string.")
473
+ if not isinstance(region, str):
474
+ raise TypeError("region must be a string.")
475
+ if not isinstance(force_path_style, bool):
476
+ raise TypeError("force_path_style must be a boolean.")
477
+
478
+ # -- Update Icechunk Global Attributes -- #
479
+ # Load Icechunk S3 storage credentials from JSON file:
480
+ store_credentials = json.load(open(credentials_filepath, 'r'))
481
+
482
+ # Define Icechunk storage:
483
+ storage = icechunk.s3_storage(
484
+ bucket=bucket,
485
+ prefix=prefix,
486
+ region=region,
487
+ access_key_id=store_credentials['token'],
488
+ secret_access_key=store_credentials['secret'],
489
+ endpoint_url=store_credentials['endpoint_url'],
490
+ force_path_style=force_path_style,
491
+ )
492
+
493
+ # Open Icechunk repository & start read-only session on main branch:
494
+ repo = icechunk.Repository.open(storage=storage)
495
+ print(f"Opened Icechunk repository at s3://{bucket}/{prefix} on branch '{branch}'")
496
+
497
+ # Open a writable session on root group:
498
+ session = repo.writable_session(branch=branch)
499
+ root = zarr.open_group(session.store)
500
+ # Update variable attributes & commit changes to repo:
501
+ for var, attr in zip(vars, attrs):
502
+ root[var].attrs.update(attr)
503
+
504
+ print(f"Updated variable attributes via new commit on branch '{branch}' with commit message -> '{commit_message}'")
505
+
506
+ return session.commit(message=commit_message)