OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=send_era5_daily
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=03:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_ERA5_daily_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the ERA5 daily
14
+ # time-series dataset to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-25
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send ERA5 daily time-series datasets to JASMIN OS -- #
28
+ echo "In Progress: Sending ERA5 daily time-series to Icechunk..."
29
+
30
+ python3 send_ERA5_daily_to_os.py
31
+
32
+ echo "Completed: Sent ERA5 daily time-series to Icechunk."
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=send_era5_monthly_to_os
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=03:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_ERA5_monthly_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the ERA5 monthly
14
+ # time-series dataset to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-25
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send ERA5 monthly time-series datasets to JASMIN OS -- #
28
+ echo "In Progress: Sending ERA5 monthly time-series to Icechunk..."
29
+
30
+ python3 send_ERA5_monthly_to_os.py
31
+
32
+ echo "Completed: Sent ERA5 monthly time-series to Icechunk."
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=update_era5_daily
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=03:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_update_ERA5_daily_to_os.slurm
12
+ #
13
+ # Description: SLURM script to update the ERA5 daily
14
+ # time-series dataset in the Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-25
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Update ERA5 daily time-series datasets in JASMIN OS -- #
28
+ echo "In Progress: Updating ERA5 daily time-series in Icechunk..."
29
+
30
+ python3 update_ERA5_daily_to_os.py
31
+
32
+ echo "Completed: Updated ERA5 daily time-series in Icechunk."
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=update_era5_monthly_to_os
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=03:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_update_ERA5_monthly_to_os.slurm
12
+ #
13
+ # Description: SLURM script to update the ERA5 monthly
14
+ # time-series dataset in the Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-25
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Update ERA5 monthly time-series datasets in JASMIN OS -- #
28
+ echo "In Progress: Updating ERA5 monthly time-series in Icechunk..."
29
+
30
+ python3 update_ERA5_monthly_to_os.py
31
+
32
+ echo "Completed: Updated ERA5 monthly time-series in Icechunk."
@@ -0,0 +1,159 @@
1
+ # =========================================================
2
+ # send_ERA5_daily_climatology_to_os.py
3
+ #
4
+ # Script to write ERA5 long-term daily climatologies
5
+ # to Icechunk repositories in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import numpy as np
12
+ import xarray as xr
13
+ import zarr
14
+
15
+ from OceanDataStore.cli import initialise_logging, send_to_icechunk
16
+ from OceanDataStore.data.utils import (
17
+ compute_land_sea_mask,
18
+ compute_cell_area,
19
+ compute_dx,
20
+ compute_dy,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def main():
27
+ # ========== Initialise OceanDataStore Logging ========== #
28
+ initialise_logging()
29
+
30
+ # ========== Send to Icechunk Repository ========== #
31
+ bucket = "era5"
32
+ exists = False
33
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
34
+ branch = "main"
35
+ variable_commits = True
36
+
37
+ # Define climatology period:
38
+ start_yr = 1996
39
+ end_yr = 2025
40
+
41
+ logging.info(f"In Progress: Sending ERA5 daily climatology for {start_yr}-{end_yr} to Icechunk...")
42
+ # Open ERA5 dataset:
43
+ filepath = f"/dssgfs01/scratch/otooth/npd_data/observations/ERA5/climatology/ERA5_sst_climatology_{start_yr}-{end_yr}.nc"
44
+ ds = xr.open_dataset(filepath)
45
+
46
+ # Standardise coordinate dimension names:
47
+ ds = ds.rename({"dayofyear": "day"})
48
+
49
+ # Update longitude coordinates to be in the range [-180, 180]:
50
+ ds = ds.assign_coords(
51
+ longitude=((ds["longitude"] + 180) % 360) - 180
52
+ )
53
+ ds = ds.sortby("longitude")
54
+
55
+ # Add day of year coordinate (1-366):
56
+ ds = ds.assign_coords(
57
+ day=np.arange(1, 367)
58
+ )
59
+
60
+ # Update variable names, units, and attributes:
61
+ ds = ds.drop_vars(["quantile"])
62
+ for var in ds.data_vars:
63
+ if "sst" in var:
64
+ # Transform units degK -> degC:
65
+ ds[var] = ds[var] - 273.15
66
+ # Add standard names and units:
67
+ ds[var].attrs["standard_name"] = "sea_surface_temperature"
68
+ ds[var].attrs["units"] = "degC"
69
+ # Rename variables to standard names:
70
+ ds = ds.rename({var: var.replace("sst", "tos")})
71
+
72
+ # Update variable long names:
73
+ ds["tos_mean"].attrs["long_name"] = "Daily Mean Sea Surface Temperature Climatology"
74
+ ds["tos_variance"].attrs["long_name"] = "Daily Variance Sea Surface Temperature Climatology"
75
+ ds["tos_p10"].attrs["long_name"] = "Daily 10th Percentile Sea Surface Temperature Climatology"
76
+ ds["tos_p90"].attrs["long_name"] = "Daily 90th Percentile Sea Surface Temperature Climatology"
77
+ ds["tos_minimum"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature Climatology"
78
+ ds["tos_maximum"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature Climatology"
79
+
80
+ # Add ancillary variables:
81
+ ds['mask'] = compute_land_sea_mask(ds['tos_mean'].isel(day=0))
82
+ ds['dx'] = compute_dx(ds)
83
+ ds['dy'] = compute_dy(ds)
84
+ ds['cell_area'] = compute_cell_area(ds)
85
+
86
+ # Update time bounds to reflect climatological period:
87
+ ds['time_bnds'] = xr.DataArray(
88
+ np.zeros((ds['day'].size, 2), dtype='datetime64[ns]'),
89
+ dims=('day', 'bnds'),
90
+ coords={'day': ds['day']},
91
+ )
92
+ ds['time_bnds'].data[:, 0] = (np.datetime64(f'{start_yr}-01-01', 'D') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]')
93
+ ds['time_bnds'].data[:, 1] = (np.datetime64(f'{end_yr}-01-01', 'D') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]')
94
+
95
+ # Update global attributes:
96
+ ds.attrs.clear()
97
+ ds = ds.assign_attrs({
98
+ "Conventions": "CF-1.7",
99
+ "title": f"ERA-5 Daily Climatology ({start_yr}-{end_yr})",
100
+ "description": f"ERA-5 Sea Surface Temperature Daily Climatology ({start_yr}-{end_yr}).",
101
+ "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.",
102
+ "dataset_type": "reanalysis",
103
+ "product_type": "climatology",
104
+ "product_version": "1.0",
105
+ "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)",
106
+ "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).",
107
+ "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.",
108
+ "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.",
109
+ "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/",
110
+ "doi": "10.24381/cds.adbb2d47",
111
+ "platform": "gr",
112
+ "horizontal_grid_type": "regular rectilinear",
113
+ "horizontal_grid_resolution": "31 km",
114
+ "aggregation": "mean",
115
+ "aggregation_frequency": "daily",
116
+ "status": "completed",
117
+ "update_frequency": "None",
118
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
119
+ })
120
+
121
+ # Optimise chunk sizes for spatial analysis:
122
+ ds = ds.chunk({'day': 5, 'latitude': 721, 'longitude': 1440})
123
+
124
+ # Update variable encodings:
125
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
126
+ for var in list(ds.data_vars) + list(ds.coords):
127
+ ds[var].encoding['compressors'] = [blosccodec]
128
+
129
+ # Define prefix and commit message based on climatology period:
130
+ prefix = f"era5_{start_yr}_{end_yr}_daily_climatology"
131
+ commit_message = f"Added ERA5 SST Daily Climatology ({start_yr}-{end_yr})."
132
+
133
+ # Dask LocalCluster configuration:
134
+ config_kwargs = {
135
+ "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/",
136
+ "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/"
137
+ }
138
+ cluster_kwargs = {
139
+ "n_workers" : 20,
140
+ "threads_per_worker" : 1,
141
+ "memory_limit":"2GB"
142
+ }
143
+
144
+ send_to_icechunk(
145
+ file=ds,
146
+ bucket=bucket,
147
+ object_prefix=prefix,
148
+ store_credentials_json=store_credentials_json,
149
+ exists=exists,
150
+ append_dim='day',
151
+ branch=branch,
152
+ commit_message=commit_message,
153
+ variable_commits=variable_commits,
154
+ dask_config_kwargs=config_kwargs,
155
+ dask_cluster_kwargs=cluster_kwargs,
156
+ )
157
+
158
+ if __name__ == "__main__":
159
+ main()
@@ -0,0 +1,141 @@
1
+ # =========================================================
2
+ # send_ERA5_daily_to_os.py
3
+ #
4
+ # Script to write ERA5 daily data to Icechunk repositories
5
+ # in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import xarray as xr
12
+ import zarr
13
+
14
+ from OceanDataStore.cli import initialise_logging, send_to_icechunk
15
+ from OceanDataStore.data.utils import (
16
+ compute_land_sea_mask,
17
+ compute_cell_area,
18
+ compute_dx,
19
+ compute_dy,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def main():
26
+ # ========== Initialise OceanDataStore Logging ========== #
27
+ initialise_logging()
28
+
29
+ # ========== Send to Icechunk Repository ========== #
30
+ bucket = "era5"
31
+ exists = False
32
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
33
+ branch = "main"
34
+ variable_commits = True
35
+
36
+ logging.info("In Progress: Sending ERA5 daily data to Icechunk...")
37
+ # Open ERA5 dataset:
38
+ filepath = "/dssgfs01/scratch/atb299/ERA5_daily/ERA5_daily_fields/sst_y198?m??_daily.nc"
39
+ ds = xr.open_mfdataset(filepath,
40
+ combine="by_coords",
41
+ data_vars="all",
42
+ engine="h5netcdf",
43
+ chunks={"time": -1, "latitude": -1, "longitude": -1}
44
+ )
45
+
46
+ # Update longitude coordinates to be in the range [-180, 180]:
47
+ ds = ds.assign_coords(
48
+ longitude=((ds["longitude"] + 180) % 360) - 180
49
+ )
50
+ ds = ds.sortby("longitude")
51
+
52
+ # Update variable names, units, and attributes:
53
+ if "number" in ds.data_vars:
54
+ ds = ds.drop_vars(["number"])
55
+ for var in ds.data_vars:
56
+ if "sst" in var:
57
+ # Transform units degK -> degC:
58
+ ds[var] = ds[var] - 273.15
59
+ # Add standard names and units:
60
+ ds[var].attrs["standard_name"] = "sea_surface_temperature"
61
+ ds[var].attrs["units"] = "degC"
62
+ # Rename variables to standard names:
63
+ ds = ds.rename({var: var.replace("sst", "tos")})
64
+
65
+ # Update variable long names:
66
+ ds["tos"].attrs["long_name"] = "Daily Mean Sea Surface Temperature"
67
+ ds["tos_var"].attrs["long_name"] = "Daily Variance Sea Surface Temperature"
68
+ ds["tos_min"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature"
69
+ ds["tos_max"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature"
70
+
71
+ # Add ancillary variables:
72
+ ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0))
73
+ ds['dx'] = compute_dx(ds)
74
+ ds['dy'] = compute_dy(ds)
75
+ ds['cell_area'] = compute_cell_area(ds)
76
+
77
+ # Update global attributes:
78
+ ds.attrs.clear()
79
+ ds = ds.assign_attrs({
80
+ "Conventions": "CF-1.7",
81
+ "title": "ERA5 Sea Surface Daily Timeseries",
82
+ "description": "ERA5 daily sea surface temperature timeseries.",
83
+ "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.",
84
+ "dataset_type": "reanalysis",
85
+ "product_type": "timeseries",
86
+ "product_version": "1.0",
87
+ "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)",
88
+ "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).",
89
+ "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.",
90
+ "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.",
91
+ "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/",
92
+ "doi": "10.24381/cds.adbb2d47",
93
+ "platform": "gr",
94
+ "horizontal_grid_type": "regular rectilinear",
95
+ "horizontal_grid_resolution": "31 km",
96
+ "aggregation": "mean, variance, min, max",
97
+ "aggregation_frequency": "daily",
98
+ "status": "completed",
99
+ "update_frequency": "None",
100
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
101
+ })
102
+
103
+ # Optimise chunk sizes for time-series analysis:
104
+ ds = ds.chunk({'time': ds['time'].size, 'latitude': 50, 'longitude': 50})
105
+
106
+ # Update variable encodings:
107
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
108
+ for var in list(ds.data_vars) + list(ds.coords):
109
+ ds[var].encoding['compressors'] = [blosccodec]
110
+
111
+ # Define prefix and commit message based on climatology period:
112
+ prefix = "era5_daily_timeseries"
113
+ commit_message = "Added ERA5 Sea Surface Daily Timeseries (1980-01-1989-12)."
114
+
115
+ # Dask LocalCluster configuration:
116
+ config_kwargs = {
117
+ "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/",
118
+ "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/"
119
+ }
120
+ cluster_kwargs = {
121
+ "n_workers" : 25,
122
+ "threads_per_worker" : 1,
123
+ "memory_limit":"4GB"
124
+ }
125
+
126
+ send_to_icechunk(
127
+ file=ds,
128
+ bucket=bucket,
129
+ object_prefix=prefix,
130
+ store_credentials_json=store_credentials_json,
131
+ exists=exists,
132
+ append_dim='time',
133
+ branch=branch,
134
+ commit_message=commit_message,
135
+ variable_commits=variable_commits,
136
+ dask_config_kwargs=config_kwargs,
137
+ dask_cluster_kwargs=cluster_kwargs,
138
+ )
139
+
140
+ if __name__ == "__main__":
141
+ main()
@@ -0,0 +1,173 @@
1
+ # =========================================================
2
+ # send_ERA5_monthly_to_os.py
3
+ #
4
+ # Script to write ERA5 monthly data to Icechunk repositories
5
+ # in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import xarray as xr
12
+ import zarr
13
+
14
+ from OceanDataStore.cli import initialise_logging, send_to_icechunk
15
+ from OceanDataStore.data.utils import (
16
+ compute_land_sea_mask,
17
+ compute_cell_area,
18
+ compute_dx,
19
+ compute_dy,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def main():
26
+ # ========== Initialise OceanDataStore Logging ========== #
27
+ initialise_logging()
28
+
29
+ # ========== Send to Icechunk Repository ========== #
30
+ bucket = "era5"
31
+ exists = False
32
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
33
+ branch = "main"
34
+ variable_commits = True
35
+
36
+ logging.info("In Progress: Sending ERA5 monthly data to Icechunk...")
37
+ # Open ERA5 dataset:
38
+ filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/sst_y198?m??_monthly.nc"
39
+ ds_sst = xr.open_mfdataset(filepath,
40
+ combine="by_coords",
41
+ data_vars="all",
42
+ engine="h5netcdf",
43
+ chunks={"time": -1, "latitude": -1, "longitude": -1}
44
+ )
45
+
46
+ filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/siconc_y198?m??_monthly.nc"
47
+ ds_si = xr.open_mfdataset(filepath,
48
+ combine="by_coords",
49
+ data_vars="all",
50
+ engine="h5netcdf",
51
+ chunks={"time": -1, "latitude": -1, "longitude": -1}
52
+ )
53
+
54
+ # SST: Update longitude coordinates to be in the range [-180, 180]:
55
+ ds_sst = ds_sst.assign_coords(
56
+ longitude=((ds_sst["longitude"] + 180) % 360) - 180
57
+ )
58
+ ds_sst = ds_sst.sortby("longitude")
59
+
60
+ # SIC: Update longitude coordinates to be in the range [-180, 180]:
61
+ ds_si = ds_si.assign_coords(
62
+ longitude=((ds_si["longitude"] + 180) % 360) - 180
63
+ )
64
+ ds_si = ds_si.sortby("longitude")
65
+
66
+ # SST: Update variable names, units, and attributes:
67
+ if "number" in ds_sst.data_vars:
68
+ ds_sst = ds_sst.drop_vars(["number"])
69
+ for var in ds_sst.data_vars:
70
+ if "sst" in var:
71
+ # Transform units degK -> degC:
72
+ ds_sst[var] = ds_sst[var] - 273.15
73
+ # Add standard names and units:
74
+ ds_sst[var].attrs["standard_name"] = "sea_surface_temperature"
75
+ ds_sst[var].attrs["units"] = "degC"
76
+ # Rename variables to standard names:
77
+ ds_sst = ds_sst.rename({var: var.replace("sst", "tos")})
78
+
79
+ # SIC: Update variable names, units, and attributes:
80
+ if "number" in ds_si.data_vars:
81
+ ds_si = ds_si.drop_vars(["number"])
82
+ for var in ds_si.data_vars:
83
+ if "siconc" in var:
84
+ # Add standard names and units:
85
+ ds_si[var].attrs["standard_name"] = "sea_ice_area_fraction"
86
+ ds_si[var].attrs["units"] = "1"
87
+
88
+ # SST: Update variable long names:
89
+ ds_sst["tos"].attrs["long_name"] = "Daily Mean Sea Surface Temperature"
90
+ ds_sst["tos_var"].attrs["long_name"] = "Daily Variance Sea Surface Temperature"
91
+ ds_sst["tos_min"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature"
92
+ ds_sst["tos_max"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature"
93
+
94
+ # SIC: Update variable long names:
95
+ ds_si["siconc"].attrs["long_name"] = "Daily Mean Sea Ice Area Fraction"
96
+ ds_si["siconc_var"].attrs["long_name"] = "Daily Variance Sea Ice Area Fraction"
97
+ ds_si["siconc_min"].attrs["long_name"] = "Daily Minimum Sea Ice Area Fraction"
98
+ ds_si["siconc_max"].attrs["long_name"] = "Daily Maximum Sea Ice Area Fraction"
99
+
100
+ # Merge SST and SIC datasets:
101
+ ds = xr.merge([ds_sst, ds_si], compat="override", join="override")
102
+
103
+ # Add ancillary variables:
104
+ ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0))
105
+ ds['dx'] = compute_dx(ds)
106
+ ds['dy'] = compute_dy(ds)
107
+ ds['cell_area'] = compute_cell_area(ds)
108
+
109
+ # Update global attributes:
110
+ ds.attrs.clear()
111
+ ds = ds.assign_attrs({
112
+ "Conventions": "CF-1.7",
113
+ "title": "ERA5 Sea Surface Monthly Timeseries",
114
+ "description": "ERA5 monthly sea surface temperature and sea ice area fraction timeseries.",
115
+ "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.",
116
+ "dataset_type": "reanalysis",
117
+ "product_type": "timeseries",
118
+ "product_version": "1.0",
119
+ "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)",
120
+ "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).",
121
+ "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.",
122
+ "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.",
123
+ "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/",
124
+ "doi": "10.24381/cds.adbb2d47",
125
+ "platform": "gr",
126
+ "horizontal_grid_type": "regular rectilinear",
127
+ "horizontal_grid_resolution": "31 km",
128
+ "aggregation": "mean, variance, min, max",
129
+ "aggregation_frequency": "monthly",
130
+ "status": "completed",
131
+ "update_frequency": "None",
132
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
133
+ })
134
+
135
+ # Optimise chunk sizes for spatial analysis:
136
+ ds = ds.chunk({'time': 1, 'latitude': 721, 'longitude': 1440})
137
+
138
+ # Update variable encodings:
139
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
140
+ for var in list(ds.data_vars) + list(ds.coords):
141
+ ds[var].encoding['compressors'] = [blosccodec]
142
+
143
+ # Define prefix and commit message based on climatology period:
144
+ prefix = "era5_monthly_timeseries"
145
+ commit_message = "Added ERA5 Sea Surface Monthly Timeseries (1980-01-1989-12)."
146
+
147
+ # Dask LocalCluster configuration:
148
+ config_kwargs = {
149
+ "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/",
150
+ "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/"
151
+ }
152
+ cluster_kwargs = {
153
+ "n_workers" : 25,
154
+ "threads_per_worker" : 1,
155
+ "memory_limit":"4GB"
156
+ }
157
+
158
+ send_to_icechunk(
159
+ file=ds,
160
+ bucket=bucket,
161
+ object_prefix=prefix,
162
+ store_credentials_json=store_credentials_json,
163
+ exists=exists,
164
+ append_dim='time',
165
+ branch=branch,
166
+ commit_message=commit_message,
167
+ variable_commits=variable_commits,
168
+ dask_config_kwargs=config_kwargs,
169
+ dask_cluster_kwargs=cluster_kwargs,
170
+ )
171
+
172
+ if __name__ == "__main__":
173
+ main()