OceanDataStore 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- OceanDataStore/__init__.py +21 -0
- OceanDataStore/catalog/__init__.py +12 -0
- OceanDataStore/catalog/oceandatacatalog.py +1242 -0
- OceanDataStore/catalog/stac/README.md +34 -0
- OceanDataStore/catalog/stac/__init__.py +30 -0
- OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
- OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
- OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
- OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
- OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
- OceanDataStore/catalog/stac/template_collection.py +85 -0
- OceanDataStore/catalog/stac/utils.py +476 -0
- OceanDataStore/cli/__init__.py +34 -0
- OceanDataStore/cli/arg_parser.py +182 -0
- OceanDataStore/cli/cli.py +203 -0
- OceanDataStore/cli/exceptions.py +83 -0
- OceanDataStore/cli/icechunk.py +888 -0
- OceanDataStore/cli/logging.py +52 -0
- OceanDataStore/cli/object_store.py +293 -0
- OceanDataStore/cli/utils.py +275 -0
- OceanDataStore/cli/zarr.py +870 -0
- OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
- OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
- OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
- OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
- OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
- OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
- OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
- OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
- OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
- OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
- OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
- OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
- OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
- OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
- OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
- OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
- OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
- OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
- OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
- OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
- OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
- OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
- OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
- OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
- OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
- OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
- OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
- OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
- OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
- OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
- OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
- OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
- OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
- OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
- OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
- OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
- OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
- OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
- OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
- OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
- OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
- OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
- OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
- OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
- OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
- OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
- OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
- OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
- OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
- OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
- OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
- OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
- OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
- OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
- OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
- OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
- OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
- OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
- OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
- OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
- OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
- OceanDataStore/data/utils.py +506 -0
- OceanDataStore/zarr.py +993 -0
- oceandatastore-0.3.0.dist-info/METADATA +184 -0
- oceandatastore-0.3.0.dist-info/RECORD +104 -0
- oceandatastore-0.3.0.dist-info/WHEEL +5 -0
- oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
- oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
- oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
- oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
- oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
"""
|
|
2
|
+
utils.py
|
|
3
|
+
|
|
4
|
+
Description: Utility functions for processing gridded ocean data.
|
|
5
|
+
|
|
6
|
+
Contact: Ollie Tooth (oliver.tooth@noc.ac.uk)
|
|
7
|
+
"""
|
|
8
|
+
# == Import Python packages == #
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
import icechunk
|
|
12
|
+
import numpy as np
|
|
13
|
+
import xarray as xr
|
|
14
|
+
import zarr
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# == Utility Functions == #
|
|
18
|
+
def compute_gc_distance(
|
|
19
|
+
lat1: xr.DataArray,
|
|
20
|
+
lon1: xr.DataArray,
|
|
21
|
+
lat2: xr.DataArray,
|
|
22
|
+
lon2: xr.DataArray
|
|
23
|
+
) -> xr.DataArray:
|
|
24
|
+
"""
|
|
25
|
+
Calculate the Great-Circle distance between two sets of
|
|
26
|
+
geographical points on the Earth's surface.
|
|
27
|
+
|
|
28
|
+
Parameters:
|
|
29
|
+
-----------
|
|
30
|
+
lat1 : xr.DataArray
|
|
31
|
+
Latitude of the first set of points (degrees).
|
|
32
|
+
lon1 : xr.DataArray
|
|
33
|
+
Longitude of the first set of points (degrees).
|
|
34
|
+
lat2 : xr.DataArray
|
|
35
|
+
Latitude of the second set of points (degrees).
|
|
36
|
+
lon2 : xr.DataArray
|
|
37
|
+
Longitude of the second set of points (degrees).
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
--------
|
|
41
|
+
dist : xr.DataArray
|
|
42
|
+
Great-circle distance between the two sets
|
|
43
|
+
of points (meters).
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
# Define the radius of the Earth in meters:
|
|
47
|
+
re = 6371000
|
|
48
|
+
|
|
49
|
+
# Convert latitudes and longitudes from degrees to radians:
|
|
50
|
+
lon1, lat1, lon2, lat2 = map(np.deg2rad, [lon1, lat1, lon2, lat2])
|
|
51
|
+
dlat = lat2 - lat1
|
|
52
|
+
dlon = lon2 - lon1
|
|
53
|
+
|
|
54
|
+
# Calculate the great-circle distance between points:
|
|
55
|
+
dist = (2*re*np.arcsin(np.sqrt(
|
|
56
|
+
np.sin(dlat/2)**2 +
|
|
57
|
+
(np.cos(lat1) *
|
|
58
|
+
np.cos(lat2) *
|
|
59
|
+
np.sin(dlon/2)**2)
|
|
60
|
+
)))
|
|
61
|
+
|
|
62
|
+
return dist
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def compute_dx(
|
|
66
|
+
ds: xr.Dataset,
|
|
67
|
+
) -> xr.DataArray:
|
|
68
|
+
"""
|
|
69
|
+
Calculate zonal length of each grid cell in meters.
|
|
70
|
+
|
|
71
|
+
The length is calculated using the latitude and longitude coordinates
|
|
72
|
+
of the input dataset assuming a uniform regular grid.
|
|
73
|
+
|
|
74
|
+
Parameters:
|
|
75
|
+
-----------
|
|
76
|
+
ds : xr.Dataset
|
|
77
|
+
Input dataset containing 'latitude' and 'longitude' coordinates.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
--------
|
|
81
|
+
xr.DataArray
|
|
82
|
+
DataArray representing the zonal length of each grid cell.
|
|
83
|
+
"""
|
|
84
|
+
# -- Validate Input -- #
|
|
85
|
+
if not isinstance(ds, xr.Dataset):
|
|
86
|
+
raise TypeError("Input must be an xarray Dataset.")
|
|
87
|
+
if 'latitude' not in ds.coords or 'longitude' not in ds.coords:
|
|
88
|
+
raise ValueError("Input dataset must contain 'latitude' and 'longitude' coordinates.")
|
|
89
|
+
|
|
90
|
+
# -- Calculate Grid Cell Length -- #
|
|
91
|
+
# Infer horizontal resolution for uniform grid:
|
|
92
|
+
dlon = ds['longitude'].diff(dim="longitude").mean().values
|
|
93
|
+
|
|
94
|
+
if (ds['longitude'].ndim == 1) and (ds['latitude'].ndim == 1):
|
|
95
|
+
# Define 2-dimensional longitude and latitude arrays for grid cell centers:
|
|
96
|
+
lon = np.repeat(ds['longitude'].values[np.newaxis, :], len(ds['latitude']), axis=0)
|
|
97
|
+
lat = np.repeat(ds['latitude'].values[:, np.newaxis], len(ds['longitude']), axis=1)
|
|
98
|
+
else:
|
|
99
|
+
# Use existing 2-dimensional longitude and latitude arrays:
|
|
100
|
+
lon = ds['longitude'].values
|
|
101
|
+
lat = ds['latitude'].values
|
|
102
|
+
|
|
103
|
+
# Calculate zonal and meridional grid cell dimensions:
|
|
104
|
+
dx = compute_gc_distance(lon1=lon - dlon / 2, lat1=lat, lon2=lon + dlon / 2, lat2=lat)
|
|
105
|
+
|
|
106
|
+
# Define dx DataArray with CF-compliant metadata:
|
|
107
|
+
dx = xr.DataArray(
|
|
108
|
+
data=dx,
|
|
109
|
+
dims=('latitude', 'longitude'),
|
|
110
|
+
coords={'latitude': ds['latitude'], 'longitude': ds['longitude']},
|
|
111
|
+
name='dx',
|
|
112
|
+
attrs={
|
|
113
|
+
'long_name': 'Grid-Cell Zonal Length',
|
|
114
|
+
'standard_name': 'cell_x_length',
|
|
115
|
+
'units': 'm',
|
|
116
|
+
},
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return dx
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def compute_dy(
|
|
123
|
+
ds: xr.Dataset,
|
|
124
|
+
) -> xr.DataArray:
|
|
125
|
+
"""
|
|
126
|
+
Calculate meridional length of each grid cell in meters.
|
|
127
|
+
|
|
128
|
+
The length is calculated using the latitude and longitude coordinates
|
|
129
|
+
of the input dataset assuming a uniform regular grid.
|
|
130
|
+
|
|
131
|
+
Parameters:
|
|
132
|
+
-----------
|
|
133
|
+
ds : xr.Dataset
|
|
134
|
+
Input dataset containing 'latitude' and 'longitude' coordinates.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
--------
|
|
138
|
+
xr.DataArray
|
|
139
|
+
DataArray representing the meridional length of each grid cell.
|
|
140
|
+
"""
|
|
141
|
+
# -- Validate Input -- #
|
|
142
|
+
if not isinstance(ds, xr.Dataset):
|
|
143
|
+
raise TypeError("Input must be an xarray Dataset.")
|
|
144
|
+
if 'latitude' not in ds.coords or 'longitude' not in ds.coords:
|
|
145
|
+
raise ValueError("Input dataset must contain 'latitude' and 'longitude' coordinates.")
|
|
146
|
+
|
|
147
|
+
# -- Calculate Grid Cell Length -- #
|
|
148
|
+
# Infer horizontal resolution for uniform grid:
|
|
149
|
+
dlat = ds['latitude'].diff(dim="latitude").mean().values
|
|
150
|
+
|
|
151
|
+
if (ds['longitude'].ndim == 1) and (ds['latitude'].ndim == 1):
|
|
152
|
+
# Define 2-dimensional longitude and latitude arrays for grid cell centers:
|
|
153
|
+
lon = np.repeat(ds['longitude'].values[np.newaxis, :], len(ds['latitude']), axis=0)
|
|
154
|
+
lat = np.repeat(ds['latitude'].values[:, np.newaxis], len(ds['longitude']), axis=1)
|
|
155
|
+
else:
|
|
156
|
+
# Use existing 2-dimensional longitude and latitude arrays:
|
|
157
|
+
lon = ds['longitude'].values
|
|
158
|
+
lat = ds['latitude'].values
|
|
159
|
+
|
|
160
|
+
# Calculate zonal and meridional grid cell dimensions:
|
|
161
|
+
dy = compute_gc_distance(lon1=lon, lat1=lat - dlat / 2, lon2=lon, lat2=lat + dlat / 2)
|
|
162
|
+
|
|
163
|
+
# Define dy DataArray with CF-compliant metadata:
|
|
164
|
+
dy = xr.DataArray(
|
|
165
|
+
data=dy,
|
|
166
|
+
dims=('latitude', 'longitude'),
|
|
167
|
+
coords={'latitude': ds['latitude'], 'longitude': ds['longitude']},
|
|
168
|
+
name='dy',
|
|
169
|
+
attrs={
|
|
170
|
+
'long_name': 'Grid-Cell Meridional Length',
|
|
171
|
+
'standard_name': 'cell_y_length',
|
|
172
|
+
'units': 'm',
|
|
173
|
+
},
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
return dy
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def compute_cell_area(
|
|
180
|
+
ds: xr.Dataset,
|
|
181
|
+
) -> xr.DataArray:
|
|
182
|
+
"""
|
|
183
|
+
Calculate horizontal area of each grid cell in square meters.
|
|
184
|
+
|
|
185
|
+
The area is calculated using the latitude and longitude coordinates
|
|
186
|
+
of the input dataset assuming a uniform regular grid.
|
|
187
|
+
|
|
188
|
+
Parameters:
|
|
189
|
+
-----------
|
|
190
|
+
ds : xr.Dataset
|
|
191
|
+
Input dataset containing 'latitude' and 'longitude' coordinates.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
--------
|
|
195
|
+
xr.DataArray
|
|
196
|
+
DataArray representing the horizontal area of each grid cell.
|
|
197
|
+
"""
|
|
198
|
+
# -- Validate Input -- #
|
|
199
|
+
if not isinstance(ds, xr.Dataset):
|
|
200
|
+
raise TypeError("Input must be an xarray Dataset.")
|
|
201
|
+
if 'latitude' not in ds.coords or 'longitude' not in ds.coords:
|
|
202
|
+
raise ValueError("Input dataset must contain 'latitude' and 'longitude' coordinates.")
|
|
203
|
+
|
|
204
|
+
# -- Calculate Grid Cell Area -- #
|
|
205
|
+
# Infer horizontal resolution for uniform grid:
|
|
206
|
+
dlon = ds['longitude'].diff(dim="longitude").mean().values
|
|
207
|
+
dlat = ds['latitude'].diff(dim="latitude").mean().values
|
|
208
|
+
|
|
209
|
+
if (ds['longitude'].ndim == 1) and (ds['latitude'].ndim == 1):
|
|
210
|
+
# Define 2-dimensional longitude and latitude arrays for grid cell centers:
|
|
211
|
+
lon = np.repeat(ds['longitude'].values[np.newaxis, :], len(ds['latitude']), axis=0)
|
|
212
|
+
lat = np.repeat(ds['latitude'].values[:, np.newaxis], len(ds['longitude']), axis=1)
|
|
213
|
+
else:
|
|
214
|
+
# Use existing 2-dimensional longitude and latitude arrays:
|
|
215
|
+
lon = ds['longitude'].values
|
|
216
|
+
lat = ds['latitude'].values
|
|
217
|
+
|
|
218
|
+
# Calculate zonal and meridional grid cell dimensions:
|
|
219
|
+
dx = compute_gc_distance(lon1=lon - dlon / 2, lat1=lat, lon2=lon + dlon / 2, lat2=lat)
|
|
220
|
+
dy = compute_gc_distance(lon1=lon, lat1=lat - dlat / 2, lon2=lon, lat2=lat + dlat / 2)
|
|
221
|
+
|
|
222
|
+
# Define cell_area DataArray with CF-compliant metadata:
|
|
223
|
+
cell_area = xr.DataArray(
|
|
224
|
+
data=dx*dy,
|
|
225
|
+
dims=('latitude', 'longitude'),
|
|
226
|
+
coords={'latitude': ds['latitude'], 'longitude': ds['longitude']},
|
|
227
|
+
name='cell_area',
|
|
228
|
+
attrs={
|
|
229
|
+
'long_name': 'Grid-Cell Area',
|
|
230
|
+
'standard_name': 'cell_area',
|
|
231
|
+
'units': 'm2',
|
|
232
|
+
},
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
return cell_area
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def compute_cell_thickness(
|
|
239
|
+
ds: xr.Dataset,
|
|
240
|
+
) -> xr.DataArray:
|
|
241
|
+
"""
|
|
242
|
+
Calculate vertical thickness of each grid cell in meters.
|
|
243
|
+
|
|
244
|
+
Cell thickness is calculated using the depth coordinates of the input dataset assuming a regular grid in the vertical dimension.
|
|
245
|
+
|
|
246
|
+
Parameters:
|
|
247
|
+
-----------
|
|
248
|
+
ds : xr.Dataset
|
|
249
|
+
Input dataset containing 'depth' coordinates.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
--------
|
|
253
|
+
xr.DataArray
|
|
254
|
+
Vertical thickness of each grid cell.
|
|
255
|
+
"""
|
|
256
|
+
# -- Validate Input -- #
|
|
257
|
+
if not isinstance(ds, xr.Dataset):
|
|
258
|
+
raise TypeError("Input must be an xarray Dataset.")
|
|
259
|
+
if 'depth' not in ds.coords:
|
|
260
|
+
raise ValueError("Input dataset must contain 'depth' coordinates.")
|
|
261
|
+
depth = ds['depth'].data
|
|
262
|
+
|
|
263
|
+
# Check that depth is 1-dimensional:
|
|
264
|
+
if depth.ndim != 1:
|
|
265
|
+
raise ValueError("Input depth DataArray must be 1-dimensional.")
|
|
266
|
+
|
|
267
|
+
# Find interfaces between vertical levels:
|
|
268
|
+
interfaces = 0.5 * (depth[:-1] + depth[1:])
|
|
269
|
+
# Use sea surface as top boundary:
|
|
270
|
+
top = 0.0
|
|
271
|
+
# Extrapolate bottom boundary:
|
|
272
|
+
bottom = depth[-1] + (depth[-1] - interfaces[-1])
|
|
273
|
+
edges = np.concatenate([[top], interfaces, [bottom]])
|
|
274
|
+
|
|
275
|
+
# Define cell_thickness DataArray with CF-compliant metadata:
|
|
276
|
+
cell_thickness = xr.DataArray(
|
|
277
|
+
data=np.diff(edges),
|
|
278
|
+
dims=('depth',),
|
|
279
|
+
coords={'depth': depth},
|
|
280
|
+
name='cell_thickness',
|
|
281
|
+
attrs={
|
|
282
|
+
'long_name': 'Grid-Cell Thickness',
|
|
283
|
+
'standard_name': 'cell_thickness',
|
|
284
|
+
'units': 'm',
|
|
285
|
+
},
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
return cell_thickness
|
|
289
|
+
|
|
290
|
+
def compute_land_sea_mask(
|
|
291
|
+
da: xr.DataArray,
|
|
292
|
+
) -> xr.DataArray:
|
|
293
|
+
"""
|
|
294
|
+
Calculate land-sea mask from a variable DataArray.
|
|
295
|
+
|
|
296
|
+
The resulting mask is defined as follows:
|
|
297
|
+
* 1 -> ocean grid point
|
|
298
|
+
* 0 -> land grid point
|
|
299
|
+
|
|
300
|
+
Parameters:
|
|
301
|
+
-----------
|
|
302
|
+
da : xr.DataArray
|
|
303
|
+
Input variable DataArray containing NaN values on land points.
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
--------
|
|
307
|
+
xr.DataArray
|
|
308
|
+
Land-sea mask.
|
|
309
|
+
"""
|
|
310
|
+
# -- Validate Input -- #
|
|
311
|
+
if not isinstance(da, xr.DataArray):
|
|
312
|
+
raise TypeError("Input must be an xarray DataArray.")
|
|
313
|
+
if da.ndim != 2:
|
|
314
|
+
raise ValueError("Input DataArray must be 2-dimensional.")
|
|
315
|
+
|
|
316
|
+
# -- Calculate Land-Sea Mask -- #
|
|
317
|
+
# Define land-sea mask:
|
|
318
|
+
mask = xr.where(np.isnan(da), 0, 1)
|
|
319
|
+
|
|
320
|
+
# Add CF-compliant metadata to the mask:
|
|
321
|
+
mask.attrs['long_name'] = "Land-Sea Binary Mask"
|
|
322
|
+
mask.attrs['standard_name'] = "sea_binary_mask"
|
|
323
|
+
mask.attrs['comment'] = " 1 = sea, 0 = land"
|
|
324
|
+
|
|
325
|
+
return mask
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def update_icechunk_global_attrs(
|
|
329
|
+
credentials_filepath: str,
|
|
330
|
+
bucket: str,
|
|
331
|
+
prefix: str,
|
|
332
|
+
attrs: dict,
|
|
333
|
+
commit_message: str,
|
|
334
|
+
branch: str='main',
|
|
335
|
+
region: str='us-east-1',
|
|
336
|
+
force_path_style: bool=True,
|
|
337
|
+
) -> str:
|
|
338
|
+
"""
|
|
339
|
+
Update global attributes of existing Icechunk store via a new
|
|
340
|
+
commit.
|
|
341
|
+
|
|
342
|
+
Expects Icechunk S3 storage at a custom endpoint (e.g., JASMIN OS).
|
|
343
|
+
|
|
344
|
+
Parameters:
|
|
345
|
+
-----------
|
|
346
|
+
credentials_filepath : str
|
|
347
|
+
Filepath to JSON file containing Icechunk S3 storage credentials.
|
|
348
|
+
bucket : str
|
|
349
|
+
Name of the S3 bucket where the Icechunk store is located.
|
|
350
|
+
prefix : str
|
|
351
|
+
Prefix (path) within the S3 bucket where the Icechunk store is located.
|
|
352
|
+
attrs : dict
|
|
353
|
+
Dictionary of global attributes to update in the root group of the Icechunk store.
|
|
354
|
+
commit_message : str
|
|
355
|
+
Commit message describing the update to the Icechunk store.
|
|
356
|
+
branch : str, optional
|
|
357
|
+
Branch of the Icechunk repository to update (default: 'main').
|
|
358
|
+
region : str, optional
|
|
359
|
+
AWS region where the S3 bucket is located (default: 'us-east-1').
|
|
360
|
+
force_path_style : bool, optional
|
|
361
|
+
Whether to force path-style access for S3 (default: True).
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
--------
|
|
365
|
+
str
|
|
366
|
+
Snapshot ID of new commit.
|
|
367
|
+
"""
|
|
368
|
+
# -- Validate Input -- #
|
|
369
|
+
if not isinstance(credentials_filepath, str):
|
|
370
|
+
raise TypeError("credentials_filepath must be a string.")
|
|
371
|
+
if not isinstance(bucket, str):
|
|
372
|
+
raise TypeError("bucket must be a string.")
|
|
373
|
+
if not isinstance(prefix, str):
|
|
374
|
+
raise TypeError("prefix must be a string.")
|
|
375
|
+
if not isinstance(attrs, dict):
|
|
376
|
+
raise TypeError("attributes must be a dictionary.")
|
|
377
|
+
if not isinstance(commit_message, str):
|
|
378
|
+
raise TypeError("commit_message must be a string.")
|
|
379
|
+
if not isinstance(branch, str):
|
|
380
|
+
raise TypeError("branch must be a string.")
|
|
381
|
+
if not isinstance(region, str):
|
|
382
|
+
raise TypeError("region must be a string.")
|
|
383
|
+
if not isinstance(force_path_style, bool):
|
|
384
|
+
raise TypeError("force_path_style must be a boolean.")
|
|
385
|
+
|
|
386
|
+
# -- Update Icechunk Global Attributes -- #
|
|
387
|
+
# Load Icechunk S3 storage credentials from JSON file:
|
|
388
|
+
store_credentials = json.load(open(credentials_filepath, 'r'))
|
|
389
|
+
|
|
390
|
+
# Define Icechunk storage:
|
|
391
|
+
storage = icechunk.s3_storage(
|
|
392
|
+
bucket=bucket,
|
|
393
|
+
prefix=prefix,
|
|
394
|
+
region=region,
|
|
395
|
+
access_key_id=store_credentials['token'],
|
|
396
|
+
secret_access_key=store_credentials['secret'],
|
|
397
|
+
endpoint_url=store_credentials['endpoint_url'],
|
|
398
|
+
force_path_style=force_path_style,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
# Open Icechunk repository & start read-only session on main branch:
|
|
402
|
+
repo = icechunk.Repository.open(storage=storage)
|
|
403
|
+
print(f"Opened Icechunk repository at s3://{bucket}/{prefix} on branch '{branch}'")
|
|
404
|
+
|
|
405
|
+
# Open a writable session on root group:
|
|
406
|
+
session = repo.writable_session(branch=branch)
|
|
407
|
+
root = zarr.open_group(session.store)
|
|
408
|
+
# Update global attributes & commit changes to repo:
|
|
409
|
+
root.attrs.update(attrs)
|
|
410
|
+
print(f"Updated global attributes via new commit on branch '{branch}' with commit message -> '{commit_message}'")
|
|
411
|
+
|
|
412
|
+
return session.commit(message=commit_message)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def update_icechunk_variable_attrs(
|
|
416
|
+
credentials_filepath: str,
|
|
417
|
+
bucket: str,
|
|
418
|
+
prefix: str,
|
|
419
|
+
vars: list[str],
|
|
420
|
+
attrs: list[dict],
|
|
421
|
+
commit_message: str,
|
|
422
|
+
branch: str='main',
|
|
423
|
+
region: str='us-east-1',
|
|
424
|
+
force_path_style: bool=True,
|
|
425
|
+
) -> str:
|
|
426
|
+
"""
|
|
427
|
+
Update variable attributes of existing Icechunk store via a new
|
|
428
|
+
commit.
|
|
429
|
+
|
|
430
|
+
Expects Icechunk S3 storage at a custom endpoint (e.g., JASMIN OS).
|
|
431
|
+
|
|
432
|
+
Parameters:
|
|
433
|
+
-----------
|
|
434
|
+
credentials_filepath : str
|
|
435
|
+
Filepath to JSON file containing Icechunk S3 storage credentials.
|
|
436
|
+
bucket : str
|
|
437
|
+
Name of the S3 bucket where the Icechunk store is located.
|
|
438
|
+
prefix : str
|
|
439
|
+
Prefix (path) within the S3 bucket where the Icechunk store is located.
|
|
440
|
+
vars : list[str]
|
|
441
|
+
List of variable names whose attributes are to be updated.
|
|
442
|
+
attrs : list[dict]
|
|
443
|
+
List of dictionaries containing attributes to update for each variable.
|
|
444
|
+
commit_message : str
|
|
445
|
+
Commit message describing the update to the Icechunk store.
|
|
446
|
+
branch : str, optional
|
|
447
|
+
Branch of the Icechunk repository to update (default: 'main').
|
|
448
|
+
region : str, optional
|
|
449
|
+
AWS region where the S3 bucket is located (default: 'us-east-1').
|
|
450
|
+
force_path_style : bool, optional
|
|
451
|
+
Whether to force path-style access for S3 (default: True).
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
--------
|
|
455
|
+
str
|
|
456
|
+
Snapshot ID of new commit.
|
|
457
|
+
"""
|
|
458
|
+
# -- Validate Input -- #
|
|
459
|
+
if not isinstance(credentials_filepath, str):
|
|
460
|
+
raise TypeError("credentials_filepath must be a string.")
|
|
461
|
+
if not isinstance(bucket, str):
|
|
462
|
+
raise TypeError("bucket must be a string.")
|
|
463
|
+
if not isinstance(prefix, str):
|
|
464
|
+
raise TypeError("prefix must be a string.")
|
|
465
|
+
if not isinstance(vars, list):
|
|
466
|
+
raise TypeError("vars must be a list.")
|
|
467
|
+
if not isinstance(attrs, list):
|
|
468
|
+
raise TypeError("attributes must be a list.")
|
|
469
|
+
if not isinstance(commit_message, str):
|
|
470
|
+
raise TypeError("commit_message must be a string.")
|
|
471
|
+
if not isinstance(branch, str):
|
|
472
|
+
raise TypeError("branch must be a string.")
|
|
473
|
+
if not isinstance(region, str):
|
|
474
|
+
raise TypeError("region must be a string.")
|
|
475
|
+
if not isinstance(force_path_style, bool):
|
|
476
|
+
raise TypeError("force_path_style must be a boolean.")
|
|
477
|
+
|
|
478
|
+
# -- Update Icechunk Global Attributes -- #
|
|
479
|
+
# Load Icechunk S3 storage credentials from JSON file:
|
|
480
|
+
store_credentials = json.load(open(credentials_filepath, 'r'))
|
|
481
|
+
|
|
482
|
+
# Define Icechunk storage:
|
|
483
|
+
storage = icechunk.s3_storage(
|
|
484
|
+
bucket=bucket,
|
|
485
|
+
prefix=prefix,
|
|
486
|
+
region=region,
|
|
487
|
+
access_key_id=store_credentials['token'],
|
|
488
|
+
secret_access_key=store_credentials['secret'],
|
|
489
|
+
endpoint_url=store_credentials['endpoint_url'],
|
|
490
|
+
force_path_style=force_path_style,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
# Open Icechunk repository & start read-only session on main branch:
|
|
494
|
+
repo = icechunk.Repository.open(storage=storage)
|
|
495
|
+
print(f"Opened Icechunk repository at s3://{bucket}/{prefix} on branch '{branch}'")
|
|
496
|
+
|
|
497
|
+
# Open a writable session on root group:
|
|
498
|
+
session = repo.writable_session(branch=branch)
|
|
499
|
+
root = zarr.open_group(session.store)
|
|
500
|
+
# Update variable attributes & commit changes to repo:
|
|
501
|
+
for var, attr in zip(vars, attrs):
|
|
502
|
+
root[var].attrs.update(attr)
|
|
503
|
+
|
|
504
|
+
print(f"Updated variable attributes via new commit on branch '{branch}' with commit message -> '{commit_message}'")
|
|
505
|
+
|
|
506
|
+
return session.commit(message=commit_message)
|