OceanDataStore 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- OceanDataStore/__init__.py +21 -0
- OceanDataStore/catalog/__init__.py +12 -0
- OceanDataStore/catalog/oceandatacatalog.py +1242 -0
- OceanDataStore/catalog/stac/README.md +34 -0
- OceanDataStore/catalog/stac/__init__.py +30 -0
- OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
- OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
- OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
- OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
- OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
- OceanDataStore/catalog/stac/template_collection.py +85 -0
- OceanDataStore/catalog/stac/utils.py +476 -0
- OceanDataStore/cli/__init__.py +34 -0
- OceanDataStore/cli/arg_parser.py +182 -0
- OceanDataStore/cli/cli.py +203 -0
- OceanDataStore/cli/exceptions.py +83 -0
- OceanDataStore/cli/icechunk.py +888 -0
- OceanDataStore/cli/logging.py +52 -0
- OceanDataStore/cli/object_store.py +293 -0
- OceanDataStore/cli/utils.py +275 -0
- OceanDataStore/cli/zarr.py +870 -0
- OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
- OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
- OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
- OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
- OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
- OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
- OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
- OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
- OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
- OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
- OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
- OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
- OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
- OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
- OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
- OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
- OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
- OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
- OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
- OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
- OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
- OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
- OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
- OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
- OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
- OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
- OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
- OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
- OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
- OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
- OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
- OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
- OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
- OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
- OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
- OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
- OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
- OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
- OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
- OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
- OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
- OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
- OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
- OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
- OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
- OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
- OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
- OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
- OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
- OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
- OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
- OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
- OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
- OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
- OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
- OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
- OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
- OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
- OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
- OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
- OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
- OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
- OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
- OceanDataStore/data/utils.py +506 -0
- OceanDataStore/zarr.py +993 -0
- oceandatastore-0.3.0.dist-info/METADATA +184 -0
- oceandatastore-0.3.0.dist-info/RECORD +104 -0
- oceandatastore-0.3.0.dist-info/WHEEL +5 -0
- oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
- oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
- oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
- oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
- oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
"""
|
|
2
|
+
utils.py
|
|
3
|
+
|
|
4
|
+
Description:
|
|
5
|
+
Utility functions to create the National Oceanography Centre
|
|
6
|
+
(NOC) Spatio-Temporal Access Catalog and write to JSON files.
|
|
7
|
+
|
|
8
|
+
Authors:
|
|
9
|
+
- Ollie Tooth (oliver.tooth@noc.ac.uk)
|
|
10
|
+
"""
|
|
11
|
+
# -- Import Python Modules -- #
|
|
12
|
+
import pystac
|
|
13
|
+
import datetime
|
|
14
|
+
import icechunk
|
|
15
|
+
import xarray as xr
|
|
16
|
+
from shapely.geometry import Polygon, mapping
|
|
17
|
+
|
|
18
|
+
# -- I/O Functions -- #
|
|
19
|
+
def open_icechunk_store(
|
|
20
|
+
bucket: str,
|
|
21
|
+
prefix: str,
|
|
22
|
+
branch: str = "main",
|
|
23
|
+
group: str | None = None,
|
|
24
|
+
endpoint_url: str = "https://noc-msm-o.s3-ext.jc.rl.ac.uk",
|
|
25
|
+
) -> xr.Dataset:
|
|
26
|
+
"""
|
|
27
|
+
Open an Icechunk Store as an xarray.Dataset.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
bucket : str
|
|
32
|
+
S3 bucket name where the Icechunk repository is stored.
|
|
33
|
+
prefix : str
|
|
34
|
+
Prefix for the Icechunk repository in the S3 bucket.
|
|
35
|
+
branch : str, optional
|
|
36
|
+
Branch of the Icechunk repository to open (default is "main").
|
|
37
|
+
group : str, optional
|
|
38
|
+
Group within the Icechunk repository to open (default is None).
|
|
39
|
+
endpoint_url : str, optional
|
|
40
|
+
The S3 endpoint URL (default is "https://noc-msm-o.s3-ext.jc.rl.ac.uk").
|
|
41
|
+
"""
|
|
42
|
+
# Define S3 storage:
|
|
43
|
+
storage = icechunk.s3_storage(
|
|
44
|
+
bucket=bucket,
|
|
45
|
+
prefix=prefix,
|
|
46
|
+
region="us-east-1",
|
|
47
|
+
anonymous=True,
|
|
48
|
+
endpoint_url=endpoint_url,
|
|
49
|
+
force_path_style=True,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Open Icechunk Repository:
|
|
53
|
+
repo = icechunk.Repository.open(storage=storage)
|
|
54
|
+
|
|
55
|
+
# Open Dataset from Icechunk Store:
|
|
56
|
+
return xr.open_zarr(repo.readonly_session(branch=branch).store, group=group, consolidated=False)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# -- STAC Functions -- #
|
|
60
|
+
def create_item_with_zarr_asset(
|
|
61
|
+
id : str,
|
|
62
|
+
ds: xr.Dataset,
|
|
63
|
+
bucket: str,
|
|
64
|
+
prefix: str,
|
|
65
|
+
title: str,
|
|
66
|
+
dataset_type: str = "model",
|
|
67
|
+
product_type: str = "timeseries",
|
|
68
|
+
product_version: str = "1.0",
|
|
69
|
+
institution: str = "National Oceanography Centre, UK",
|
|
70
|
+
platform: str = "gn",
|
|
71
|
+
horizontal_grid_type: str = "curvilinear",
|
|
72
|
+
horizontal_grid_resolution: str = "1 degree",
|
|
73
|
+
vertical_grid_type: str = "zps",
|
|
74
|
+
vertical_grid_coordinate: str = "depth with partial steps",
|
|
75
|
+
vertical_grid_levels: int = 75,
|
|
76
|
+
operation: str ="annual-mean",
|
|
77
|
+
status: str = "completed",
|
|
78
|
+
update_frequency: str = "None",
|
|
79
|
+
variant: str = "r1i1c1f1",
|
|
80
|
+
ocean_component: str = "NEMO v4.2.2",
|
|
81
|
+
sea_ice_component: str = "SI3 v4.0",
|
|
82
|
+
biogeochemistry_component: str = "None",
|
|
83
|
+
atmosphere_component: str = "None",
|
|
84
|
+
atmospheric_forcing: str = "JRA55-do",
|
|
85
|
+
start_date: str = "1976-01-01",
|
|
86
|
+
end_date: str = "2024-02-01",
|
|
87
|
+
bbox: tuple = (-180.0, -90.0, 180.0, 90.0),
|
|
88
|
+
collection : str = "noc-npd-jra55",
|
|
89
|
+
variable_stores: bool = True,
|
|
90
|
+
endpoint_url: str = "https://noc-msm-o.s3-ext.jc.rl.ac.uk",
|
|
91
|
+
zarr_format: int = 2,
|
|
92
|
+
) -> pystac.Item:
|
|
93
|
+
"""
|
|
94
|
+
Create a STAC Item from a Zarr Store asset.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
id : str
|
|
99
|
+
Unique identifier for the STAC Item.
|
|
100
|
+
ds : xr.Dataset
|
|
101
|
+
Dataset containing the data to be included in the STAC Item.
|
|
102
|
+
bucket : str
|
|
103
|
+
S3 bucket name where the data is stored.
|
|
104
|
+
prefix : str
|
|
105
|
+
Prefix for the data in the S3 bucket (e.g., "U1y", "U1m", etc.).
|
|
106
|
+
title : str
|
|
107
|
+
Title of the dataset.
|
|
108
|
+
dataset_type : str
|
|
109
|
+
Type of dataset (e.g., "model", "observation", etc.).
|
|
110
|
+
product_type : str
|
|
111
|
+
Type of product (e.g., "climatology", "timeseries", etc.).
|
|
112
|
+
product_version : str
|
|
113
|
+
Version of the product.
|
|
114
|
+
institution : str
|
|
115
|
+
Institution responsible for producing the dataset.
|
|
116
|
+
platform : str
|
|
117
|
+
Platform name (e.g., "gn_global", "gr_global", etc.).
|
|
118
|
+
horizontal_grid_type : str
|
|
119
|
+
Type of horizontal grid used in the dataset (e.g., "regular rectilinear", "irregular rectilinear", "curvilinear", etc.).
|
|
120
|
+
horizontal_grid_resolution : str
|
|
121
|
+
Horizontal grid resolution of the dataset (e.g., "1 degreee", "0.25 degree", etc.).
|
|
122
|
+
vertical_grid_type : str
|
|
123
|
+
Type of vertical grid used in the dataset (e.g., "z", "sigma", "hybrid", etc.).
|
|
124
|
+
vertical_grid_coordinate : str
|
|
125
|
+
Type of vertical coordinate used in the dataset (e.g., "depth", "sigma", etc.).
|
|
126
|
+
vertical_grid_levels : int
|
|
127
|
+
Number of vertical levels in the dataset.
|
|
128
|
+
operation : str, optional
|
|
129
|
+
Operation string indicating the type of operation performed on the dataset (default is "annual-mean").
|
|
130
|
+
status : str, optional
|
|
131
|
+
Status of the dataset (e.g., "ongoing", "completed", etc.) (default is "completed").
|
|
132
|
+
update_frequency : str, optional
|
|
133
|
+
Frequency at which the dataset is updated (e.g., "monthly", "biannually", etc.) (default is "None").
|
|
134
|
+
variant : str, optional
|
|
135
|
+
Simulation variant string for the dataset (default is "r1i1c1f1").
|
|
136
|
+
ocean_component : str, optional
|
|
137
|
+
Ocean model component used to produce the dataset (e.g., "NEMO v4.2.2", etc.) (default is "NEMO v4.2.2").
|
|
138
|
+
sea_ice_component : str, optional
|
|
139
|
+
Sea ice model component used to produce the dataset (e.g., "CICE v6.1", etc.) (default is "SI3 v4.0").
|
|
140
|
+
biogeochemistry_component : str, optional
|
|
141
|
+
Biogeochemistry model component used to produce the dataset (e.g., "PISCES v2", etc.) (default is "None").
|
|
142
|
+
atmosphere_component : str, optional
|
|
143
|
+
Atmospheric model component used to produce the dataset (e.g., "UKMO UM Global Atmosphere 7.1", etc.) (default is "None").
|
|
144
|
+
atmospheric_forcing : str, optional
|
|
145
|
+
Atmospheric forcing used to produce the dataset (e.g., "ERA5", "JRA55-do", etc.) (default is "JRA55-do").
|
|
146
|
+
start_date : str, optional
|
|
147
|
+
Start date of the dataset in "YYYY-MM-DD" format (default is "1976-01-01").
|
|
148
|
+
end_date : str, optional
|
|
149
|
+
End date of the dataset in "YYYY-MM-DD" format (default is "2024-12-31").
|
|
150
|
+
bbox : tuple, optional
|
|
151
|
+
Bounding box for the dataset in the format (min_lon, min_lat, max_lon, max_lat).
|
|
152
|
+
(default is global coverage).
|
|
153
|
+
collection : str, optional
|
|
154
|
+
STAC Collection to which this Item belongs (default is "noc-npd-jra55").
|
|
155
|
+
variable_stores : bool, optional
|
|
156
|
+
Whether each variable is stored in a separate Zarr store (default is True).
|
|
157
|
+
endpoint_url : str, optional
|
|
158
|
+
S3 endpoint URL (default is "https://noc-msm-o.s3-ext.jc.rl.ac.uk").
|
|
159
|
+
zarr_format: int, optional
|
|
160
|
+
Zarr format version (default is 2).
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
pystac.Item
|
|
165
|
+
A STAC Item containing the dataset information and an asset pointing to the data.
|
|
166
|
+
"""
|
|
167
|
+
# Define the item description based on the prefix:
|
|
168
|
+
var = f"{prefix.split('/')[-1]} output" if variable_stores else "outputs"
|
|
169
|
+
|
|
170
|
+
if 'domain' in prefix:
|
|
171
|
+
description = "**Global ocean model domain and mesh mask variables.**"
|
|
172
|
+
elif 'I' in prefix:
|
|
173
|
+
description = f"**{operation.capitalize()} global sea-ice {var} defined at NEMO model T-points.**"
|
|
174
|
+
elif 'S' in prefix:
|
|
175
|
+
description = f"**{operation.capitalize()} global ocean scalar {var}.**"
|
|
176
|
+
elif 'M' in prefix:
|
|
177
|
+
description = f"**{operation.capitalize()} ocean physics transect {var} defined at {prefix.split('/')[-1]}.**"
|
|
178
|
+
else:
|
|
179
|
+
description = f"**{operation.capitalize()} global ocean physics {var} defined at {prefix[0]}-points.**"
|
|
180
|
+
|
|
181
|
+
# Add OceanDataCatalog Access Information to the description:
|
|
182
|
+
description += f"\n\n**OceanDataCatalog Access:**\n`catalog.open_dataset(id='{id}')`"
|
|
183
|
+
|
|
184
|
+
# Define Polygon geometry for the item:
|
|
185
|
+
polygon = Polygon([
|
|
186
|
+
(bbox[0], bbox[1]), # SW corner
|
|
187
|
+
(bbox[2], bbox[1]), # SE corner
|
|
188
|
+
(bbox[2], bbox[3]), # NE corner
|
|
189
|
+
(bbox[0], bbox[3]), # NW corner
|
|
190
|
+
(bbox[0], bbox[1]) # Closing the polygon back to SW corner
|
|
191
|
+
])
|
|
192
|
+
|
|
193
|
+
# Convert the Polygon to GeoJSON format:
|
|
194
|
+
geometry = mapping(polygon)
|
|
195
|
+
|
|
196
|
+
# Create a STAC Item with Asset:
|
|
197
|
+
item = pystac.Item(
|
|
198
|
+
id=id,
|
|
199
|
+
geometry=geometry,
|
|
200
|
+
bbox=list(polygon.bounds), # [min_lon, min_lat, max_lon, max_lat]
|
|
201
|
+
datetime=datetime.datetime(year=(int(start_date.split("-")[0]) + int(end_date.split("-")[0])) // 2, month=1, day=1),
|
|
202
|
+
start_datetime=datetime.datetime(year=int(start_date.split("-")[0]), month=int(start_date.split("-")[1]), day=int(start_date.split("-")[2])),
|
|
203
|
+
end_datetime=datetime.datetime(year=int(end_date.split("-")[0]), month=int(end_date.split("-")[1]), day=int(end_date.split("-")[2])),
|
|
204
|
+
properties={
|
|
205
|
+
"title": title,
|
|
206
|
+
"description": description,
|
|
207
|
+
"dataset_type": dataset_type,
|
|
208
|
+
"product_type": product_type,
|
|
209
|
+
"product_version": product_version,
|
|
210
|
+
"institution": institution,
|
|
211
|
+
"platform": platform,
|
|
212
|
+
"horizontal_grid_type": horizontal_grid_type,
|
|
213
|
+
"horizontal_grid_resolution": horizontal_grid_resolution,
|
|
214
|
+
"vertical_grid_type": vertical_grid_type,
|
|
215
|
+
"vertical_grid_coordinate": vertical_grid_coordinate,
|
|
216
|
+
"vertical_grid_levels": vertical_grid_levels,
|
|
217
|
+
"dimensions": list(ds.dims),
|
|
218
|
+
"variables": list(ds.data_vars),
|
|
219
|
+
"variable_standard_names": [ds[var].attrs.get('standard_name', var) for var in ds.data_vars],
|
|
220
|
+
"aggregation": operation.split()[1].lower(),
|
|
221
|
+
"aggregation_frequency": operation.split()[0].lower(),
|
|
222
|
+
"status": status,
|
|
223
|
+
"update_frequency": update_frequency,
|
|
224
|
+
"latest_data_update": datetime.datetime.now().isoformat(),
|
|
225
|
+
"variant": variant,
|
|
226
|
+
"ocean_component": ocean_component,
|
|
227
|
+
"sea_ice_component": sea_ice_component,
|
|
228
|
+
"biogeochemistry_component": biogeochemistry_component,
|
|
229
|
+
"atmosphere_component": atmosphere_component,
|
|
230
|
+
"atmospheric_forcing": atmospheric_forcing,
|
|
231
|
+
},
|
|
232
|
+
collection=collection,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
item.add_asset(key=prefix.split('/')[-1], asset=pystac.Asset(
|
|
236
|
+
href=f"https://noc-msm-o.s3-ext.jc.rl.ac.uk/{bucket}/{prefix}",
|
|
237
|
+
title=title,
|
|
238
|
+
description=description,
|
|
239
|
+
media_type="application/vnd.zarr",
|
|
240
|
+
extra_fields=dict(
|
|
241
|
+
endpoint_url=endpoint_url,
|
|
242
|
+
bucket=bucket,
|
|
243
|
+
prefix=prefix,
|
|
244
|
+
zarr_format=zarr_format,
|
|
245
|
+
anonymous=True
|
|
246
|
+
)
|
|
247
|
+
))
|
|
248
|
+
|
|
249
|
+
return item
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def create_item_with_icechunk_asset(
|
|
253
|
+
ds: xr.Dataset,
|
|
254
|
+
id: str,
|
|
255
|
+
bucket: str,
|
|
256
|
+
prefix: str,
|
|
257
|
+
title: str | None = None,
|
|
258
|
+
description: str | None = None,
|
|
259
|
+
dataset_type: str | None = None,
|
|
260
|
+
product_type: str | None = None,
|
|
261
|
+
product_version: str | None = None,
|
|
262
|
+
institution: str | None = None,
|
|
263
|
+
citation: str | None = None,
|
|
264
|
+
acknowledgement: str | None = None,
|
|
265
|
+
license: str | None = None,
|
|
266
|
+
doi: str | None = None,
|
|
267
|
+
platform: str | None = None,
|
|
268
|
+
horizontal_grid_type: str | None = None,
|
|
269
|
+
horizontal_grid_resolution: str | None = None,
|
|
270
|
+
vertical_grid_type: str | None = None,
|
|
271
|
+
vertical_grid_coordinate: str | None = None,
|
|
272
|
+
vertical_grid_levels: int | None = None,
|
|
273
|
+
aggregation: str | None = None,
|
|
274
|
+
aggregation_frequency: str | None = None,
|
|
275
|
+
status: str | None = None,
|
|
276
|
+
update_frequency: str | None = None,
|
|
277
|
+
ocean_component: str | None = None,
|
|
278
|
+
sea_ice_component: str | None = None,
|
|
279
|
+
biogeochemistry_component: str | None = None,
|
|
280
|
+
atmosphere_component: str | None = None,
|
|
281
|
+
atmospheric_forcing: str | None = None,
|
|
282
|
+
variant: str | None = None,
|
|
283
|
+
start_date: str | None = None,
|
|
284
|
+
end_date: str | None = None,
|
|
285
|
+
bbox: tuple | None = None,
|
|
286
|
+
collection: str = "noc-npd-era5",
|
|
287
|
+
endpoint_url: str = "https://noc-msm-o.s3-ext.jc.rl.ac.uk",
|
|
288
|
+
group: str | None = None,
|
|
289
|
+
anonymous: bool = True,
|
|
290
|
+
) -> pystac.Item:
|
|
291
|
+
"""
|
|
292
|
+
Create a STAC Item from an Icechunk Store.
|
|
293
|
+
|
|
294
|
+
Parameters
|
|
295
|
+
----------
|
|
296
|
+
ds : xr.Dataset
|
|
297
|
+
Dataset to be included in the STAC Item.
|
|
298
|
+
id : str
|
|
299
|
+
Unique identifier for the STAC Item.
|
|
300
|
+
bucket : str
|
|
301
|
+
S3 bucket name where the dataset is stored.
|
|
302
|
+
prefix : str
|
|
303
|
+
Prefix for the dataset in the S3 bucket (e.g., "U1y", "U1m", etc.).
|
|
304
|
+
title : str, optional
|
|
305
|
+
Title of the dataset (default is None, which will use the "title" attribute from the dataset if available).
|
|
306
|
+
description : str, optional
|
|
307
|
+
Description of the dataset (default is None, which will use the "description" attribute from the dataset if available).
|
|
308
|
+
dataset_type : str, optional
|
|
309
|
+
Type of dataset (e.g., "model", "observation", etc.) (default is None, which will use the "dataset_type" attribute from the dataset if available).
|
|
310
|
+
product_type : str, optional
|
|
311
|
+
Type of product (e.g., "climatology", "timeseries", etc.) (default is None, which will use the "product_type" attribute from the dataset if available).
|
|
312
|
+
product_version : str, optional
|
|
313
|
+
Version of the product (default is None, which will use the "product_version" attribute from the dataset if available).
|
|
314
|
+
institution : str, optional
|
|
315
|
+
Institution responsible for producing the dataset (default is None, which will use the "institution" attribute from the dataset if available).
|
|
316
|
+
citation : str, optional
|
|
317
|
+
Citation for the dataset (default is None, which will use the "citation" attribute from the dataset if available).
|
|
318
|
+
acknowledgement : str, optional
|
|
319
|
+
Acknowledgement for the dataset (default is None, which will use the "acknowledgement" attribute from the dataset if available).
|
|
320
|
+
license : str, optional
|
|
321
|
+
License for the dataset (default is None, which will use the "license" attribute from the dataset if available).
|
|
322
|
+
doi : str, optional
|
|
323
|
+
Digital Object Identifier (DOI) for the dataset (default is None, which will use the "doi" attribute from the dataset if available).
|
|
324
|
+
platform : str, optional
|
|
325
|
+
Platform string (e.g., "gn", "gr", "tn", etc.) (default is None, which will use the "platform" attribute from the dataset if available).
|
|
326
|
+
horizontal_grid_type : str, optional
|
|
327
|
+
Type of horizontal grid used in the dataset (e.g., "regular rectilinear", "irregular rectilinear", "curvilinear", etc.) (default is None, which will use the "horizontal_grid_type" attribute from the dataset if available).
|
|
328
|
+
horizontal_grid_resolution : str, optional
|
|
329
|
+
Horizontal resolution of the dataset (e.g., "1 degree", "0.25 degree", etc.) (default is None, which will use the "horizontal_grid_resolution" attribute from the dataset if available).
|
|
330
|
+
vertical_grid_type : str, optional
|
|
331
|
+
Type of vertical grid used in the dataset (e.g., "z", "sigma", "hybrid", etc.) (default is None, which will use the "vertical_grid_type" attribute from the dataset if available).
|
|
332
|
+
vertical_grid_coordinate : str, optional
|
|
333
|
+
Type of vertical coordinate used in the dataset (e.g., "depth", "sigma", etc.) (default is None, which will use the "vertical_grid_coordinate" attribute from the dataset if available).
|
|
334
|
+
vertical_grid_levels : int, optional
|
|
335
|
+
Number of vertical levels in the dataset (default is None, which will use the "vertical_grid_levels" attribute from the dataset if available).
|
|
336
|
+
aggregation : str, optional
|
|
337
|
+
Type of aggregation used to produce the dataset (e.g., "mean", "max", etc.) (default is None, which will use the "aggregation" attribute from the dataset if available).
|
|
338
|
+
aggregation_frequency : str, optional
|
|
339
|
+
Frequency at which the aggregation is applied (e.g., "monthly", "biannually", etc.) (default is None, which will use the "aggregation_frequency" attribute from the dataset if available).
|
|
340
|
+
status : str, optional
|
|
341
|
+
Status of the dataset (e.g., "ongoing", "completed", etc.) (default is None, which will use the "status" attribute from the dataset if available).
|
|
342
|
+
update_frequency : str, optional
|
|
343
|
+
Frequency at which the dataset is updated (e.g., "monthly", "biannually", etc.) (default is None, which will use the "update_frequency" attribute from the dataset if available).
|
|
344
|
+
ocean_component : str, optional
|
|
345
|
+
Ocean model component used to produce the dataset (e.g., "NEMO v4.2.2", etc.) (default is None, which will use the "ocean_component" attribute from the dataset if available).
|
|
346
|
+
sea_ice_component : str, optional
|
|
347
|
+
Sea ice model component used to produce the dataset (e.g., "CICE v6.1", etc.) (default is None, which will use the "sea_ice_component" attribute from the dataset if available).
|
|
348
|
+
biogeochemistry_component : str, optional
|
|
349
|
+
Biogeochemistry model component used to produce the dataset (e.g., "PISCES v2", etc.) (default is None, which will use the "biogeochemistry_component" attribute from the dataset if available).
|
|
350
|
+
atmosphere_component : str, optional
|
|
351
|
+
Atmospheric model component used to produce the dataset (e.g., "UKMO UM Global Atmosphere 7.1", etc.) (default is None, which will use the "atmosphere_component" attribute from the dataset if available).
|
|
352
|
+
atmospheric_forcing : str, optional
|
|
353
|
+
Atmospheric forcing used to produce the dataset (e.g., "ERA5", "JRA-55", etc.) (default is None, which will use the "atmospheric_forcing" attribute from the dataset if available).
|
|
354
|
+
variant : str, optional
|
|
355
|
+
Configuration variant string for the dataset (default is "r1i1c1f1").
|
|
356
|
+
start_date : str, optional
|
|
357
|
+
Start date of the dataset in "YYYY-MM-DD" format (default is "1976-01-01").
|
|
358
|
+
end_date : str, optional
|
|
359
|
+
End date of the dataset in "YYYY-MM-DD" format (default is "2024-12-31").
|
|
360
|
+
bbox : tuple, optional
|
|
361
|
+
Bounding box for the dataset in the format (min_lon, min_lat, max_lon, max_lat).
|
|
362
|
+
(default is global coverage).
|
|
363
|
+
collection : str, optional
|
|
364
|
+
Collection to which this Item belongs (default is "noc-npd-era5").
|
|
365
|
+
endpoint_url : str, optional
|
|
366
|
+
The S3 endpoint URL (default is "https://noc-msm-o.s3-ext.jc.rl.ac.uk").
|
|
367
|
+
group : str, optional
|
|
368
|
+
Group within the Icechunk repository to open (default is None).
|
|
369
|
+
anonymous : bool, optional
|
|
370
|
+
Whether anonymous access is supported for the S3 asset (default is True).
|
|
371
|
+
|
|
372
|
+
Returns
|
|
373
|
+
-------
|
|
374
|
+
pystac.Item
|
|
375
|
+
STAC Item containing the dataset metadata and associated dataset asset.
|
|
376
|
+
"""
|
|
377
|
+
# === Geometry === #
|
|
378
|
+
# Collect bounding box from dataset attributes if not provided:
|
|
379
|
+
bbox = ds.attrs.get("bbox", "[-180.0, -90.0, 180.0, 90.0]") if bbox is None else bbox
|
|
380
|
+
bbox = [float(bound) for bound in bbox.replace("[", "").replace("]", "").split(",")]
|
|
381
|
+
|
|
382
|
+
# Define Polygon geometry for the item:
|
|
383
|
+
polygon = Polygon([
|
|
384
|
+
(bbox[0], bbox[1]), # SW corner
|
|
385
|
+
(bbox[2], bbox[1]), # SE corner
|
|
386
|
+
(bbox[2], bbox[3]), # NE corner
|
|
387
|
+
(bbox[0], bbox[3]), # NW corner
|
|
388
|
+
(bbox[0], bbox[1]) # Closing the polygon back to SW corner
|
|
389
|
+
])
|
|
390
|
+
|
|
391
|
+
# Convert the Polygon to GeoJSON format:
|
|
392
|
+
geometry = mapping(polygon)
|
|
393
|
+
|
|
394
|
+
# === Properties === #
|
|
395
|
+
# Add OceanDataCatalog Access Information to description:
|
|
396
|
+
if description is None:
|
|
397
|
+
description = ds.attrs.get("description", "")
|
|
398
|
+
description += f"\n\n**OceanDataCatalog Access:**\n`catalog.open_dataset(id='{id}')`"
|
|
399
|
+
|
|
400
|
+
# Define start and end datetimes for the Item:
|
|
401
|
+
if start_date is None:
|
|
402
|
+
start_date = ds.attrs.get("start_date", None)
|
|
403
|
+
if start_date is None:
|
|
404
|
+
raise ValueError("'start_date' must be provided either as a parameter or as a global dataset attribute.")
|
|
405
|
+
if end_date is None:
|
|
406
|
+
end_date = ds.attrs.get("end_date", None)
|
|
407
|
+
if end_date is None:
|
|
408
|
+
raise ValueError("'end_date' must be provided either as a parameter or as a global dataset attribute.")
|
|
409
|
+
|
|
410
|
+
# Define standard properties dictionary:
|
|
411
|
+
properties={
|
|
412
|
+
"title": ds.attrs.get("title", None) if title is None else title,
|
|
413
|
+
"description": description,
|
|
414
|
+
"dataset_type": ds.attrs.get("dataset_type", None) if dataset_type is None else dataset_type,
|
|
415
|
+
"product_type": ds.attrs.get("product_type", None) if product_type is None else product_type,
|
|
416
|
+
"product_version": ds.attrs.get("product_version", None) if product_version is None else product_version,
|
|
417
|
+
"institution": ds.attrs.get("institution", None) if institution is None else institution,
|
|
418
|
+
"citation": ds.attrs.get("citation", None) if citation is None else citation,
|
|
419
|
+
"acknowledgement": ds.attrs.get("acknowledgement", None) if acknowledgement is None else acknowledgement,
|
|
420
|
+
"license": ds.attrs.get("license", None) if license is None else license,
|
|
421
|
+
"doi": ds.attrs.get("doi", None) if doi is None else doi,
|
|
422
|
+
"platform": ds.attrs.get("platform", None) if platform is None else platform,
|
|
423
|
+
"horizontal_grid_type": ds.attrs.get("horizontal_grid_type", None) if horizontal_grid_type is None else horizontal_grid_type,
|
|
424
|
+
"horizontal_grid_resolution": ds.attrs.get("horizontal_grid_resolution", None) if horizontal_grid_resolution is None else horizontal_grid_resolution,
|
|
425
|
+
"vertical_grid_type": ds.attrs.get("vertical_grid_type", None) if vertical_grid_type is None else vertical_grid_type,
|
|
426
|
+
"vertical_grid_coordinate": ds.attrs.get("vertical_grid_coordinate", None) if vertical_grid_coordinate is None else vertical_grid_coordinate,
|
|
427
|
+
"vertical_grid_levels": ds.attrs.get("vertical_grid_levels", None) if vertical_grid_levels is None else vertical_grid_levels,
|
|
428
|
+
"dimensions": list(ds.dims),
|
|
429
|
+
"variables": list(ds.data_vars),
|
|
430
|
+
"variable_standard_names": [ds[var].attrs.get('standard_name', var) for var in ds.data_vars],
|
|
431
|
+
"aggregation": ds.attrs.get("aggregation", None) if aggregation is None else aggregation,
|
|
432
|
+
"aggregation_frequency": ds.attrs.get("aggregation_frequency", None) if aggregation_frequency is None else aggregation_frequency,
|
|
433
|
+
"status": ds.attrs.get("status", None) if status is None else status,
|
|
434
|
+
"update_frequency": ds.attrs.get("update_frequency", None) if update_frequency is None else update_frequency,
|
|
435
|
+
"latest_data_update": datetime.datetime.now().isoformat(),
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
if properties["dataset_type"] == "model":
|
|
439
|
+
# Append numerical model specific properties:
|
|
440
|
+
properties.update({
|
|
441
|
+
"variant": ds.attrs.get("variant", None) if variant is None else variant,
|
|
442
|
+
"ocean_component": ds.attrs.get("ocean_component", None) if ocean_component is None else ocean_component,
|
|
443
|
+
"sea_ice_component": ds.attrs.get("sea_ice_component", None) if sea_ice_component is None else sea_ice_component,
|
|
444
|
+
"biogeochemistry_component": ds.attrs.get("biogeochemistry_component", None) if biogeochemistry_component is None else biogeochemistry_component,
|
|
445
|
+
"atmosphere_component": ds.attrs.get("atmosphere_component", None) if atmosphere_component is None else atmosphere_component,
|
|
446
|
+
"atmospheric_forcing": ds.attrs.get("atmospheric_forcing", None) if atmospheric_forcing is None else atmospheric_forcing,
|
|
447
|
+
})
|
|
448
|
+
|
|
449
|
+
# === Create a STAC Item with Asset === #
|
|
450
|
+
item = pystac.Item(
|
|
451
|
+
id=id,
|
|
452
|
+
geometry=geometry,
|
|
453
|
+
bbox=list(polygon.bounds),
|
|
454
|
+
datetime=datetime.datetime(year=(int(start_date.split("-")[0]) + int(end_date.split("-")[0])) // 2, month=1, day=1),
|
|
455
|
+
start_datetime=datetime.datetime(year=int(start_date.split("-")[0]), month=int(start_date.split("-")[1]), day=int(start_date.split("-")[2])),
|
|
456
|
+
end_datetime=datetime.datetime(year=int(end_date.split("-")[0]), month=int(end_date.split("-")[1]), day=int(end_date.split("-")[2])),
|
|
457
|
+
properties=properties,
|
|
458
|
+
collection=collection,
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
item.add_asset(key=prefix.split('/')[-1], asset=pystac.Asset(
|
|
462
|
+
href=f"{endpoint_url}/{bucket}/{prefix}",
|
|
463
|
+
title=ds.attrs.get("title", None) if title is None else title,
|
|
464
|
+
description=description,
|
|
465
|
+
media_type="application/vnd.zarr+icechunk",
|
|
466
|
+
extra_fields=dict(
|
|
467
|
+
endpoint_url=endpoint_url,
|
|
468
|
+
bucket=bucket,
|
|
469
|
+
prefix=prefix,
|
|
470
|
+
variant=ds.attrs.get("variant", None) if variant is None else variant,
|
|
471
|
+
group=group,
|
|
472
|
+
anonymous=anonymous
|
|
473
|
+
)
|
|
474
|
+
))
|
|
475
|
+
|
|
476
|
+
return item
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# ===================================================================
|
|
2
|
+
# Copyright 2026 National Oceanography Centre
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0.
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
11
|
+
# implied. See the License for the specific language governing
|
|
12
|
+
# permissions and limitations under the License.
|
|
13
|
+
# ===================================================================
|
|
14
|
+
"""
|
|
15
|
+
OceanDataStore: CLI Sub-package
|
|
16
|
+
|
|
17
|
+
An open-source Python library to streamline writing, updating and accessing
|
|
18
|
+
ocean data stored in cloud object storage.
|
|
19
|
+
"""
|
|
20
|
+
__author__ = "Ollie Tooth, Joao Morado, Tobias Ferreira"
|
|
21
|
+
__credits__ = "National Oceanography Centre (NOC), Southampton, UK"
|
|
22
|
+
|
|
23
|
+
from OceanDataStore.cli.icechunk import (
|
|
24
|
+
send_to_icechunk,
|
|
25
|
+
update_icechunk,
|
|
26
|
+
)
|
|
27
|
+
from OceanDataStore.cli.logging import initialise_logging
|
|
28
|
+
from OceanDataStore.cli.utils import ObjectStoreS3, list_objects
|
|
29
|
+
from OceanDataStore.cli.zarr import (
|
|
30
|
+
send_to_zarr,
|
|
31
|
+
update_zarr,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
__all__ = ("initialise_logging", "send_to_zarr", "send_to_icechunk", "update_zarr", "update_icechunk", "list_objects", "ObjectStoreS3")
|