OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,165 @@
1
+ # =========================================================
2
+ # send_EN4.2.2_analyses_g10_to_os.py
3
+ #
4
+ # Script to write EN.4.2.2 analyses to Icechunk repository
5
+ # in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import xarray as xr
12
+ import zarr
13
+
14
+ from OceanDataStore.cli import initialise_logging, send_to_icechunk
15
+ from OceanDataStore.data.utils import (
16
+ compute_cell_area,
17
+ compute_dx,
18
+ compute_dy,
19
+ compute_land_sea_mask,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def main():
26
+ # ========== Initialise OceanDataStore Logging ========== #
27
+ initialise_logging()
28
+
29
+ # ========== Prepare Data ========== #
30
+ # Open EN.4.2.2 analyses dataset:
31
+ filepath = "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/EN.4.2.2.f.analysis.g10.19*.nc"
32
+ ds = xr.open_mfdataset(filepath, combine="by_coords", data_vars="all", engine="netcdf4")
33
+
34
+ # Standardise coordinate dimension names:
35
+ ds = ds.rename({"lon": "longitude", "lat": "latitude"})
36
+
37
+ # Update longitude coordinates to be in the range [-180, 180]:
38
+ ds = ds.assign_coords(
39
+ longitude=((ds["longitude"] + 180) % 360) - 180
40
+ )
41
+ ds = ds.sortby("longitude")
42
+
43
+ # Rename variables to standard names:
44
+ ds = ds.rename({"temperature": "thetao",
45
+ "salinity": "so",
46
+ "temperature_uncertainty": "thetao_uncertainty",
47
+ "salinity_uncertainty": "so_uncertainty",
48
+ "temperature_observation_weights": "thetao_obs_weights",
49
+ "salinity_observation_weights": "so_obs_weights"
50
+ })
51
+
52
+ # Update variable attributes:
53
+ ds["thetao"].attrs.update({
54
+ "long_name": "Potential Temperature",
55
+ })
56
+ ds["so"].attrs.update({
57
+ "long_name": "Practical Salinity",
58
+ })
59
+ ds["thetao_uncertainty"].attrs.update({
60
+ "long_name": "Potential Temperature Error Standard Deviation",
61
+ })
62
+ ds["so_uncertainty"].attrs.update({
63
+ "long_name": "Practical Salinity Error Standard Deviation",
64
+ })
65
+ ds["thetao_obs_weights"].attrs.update({
66
+ "long_name": "Potential Temperature Observation Weights",
67
+ })
68
+ ds["so_obs_weights"].attrs.update({
69
+ "long_name": "Practical Salinity Observation Weights",
70
+ })
71
+
72
+ # Update global attributes:
73
+ ds.attrs.clear()
74
+
75
+ ds = ds.assign_attrs({
76
+ "Conventions": "CF-1.0",
77
+ "title": "EN.4.2.2 ocean temperature and salinity monthly timeseries.",
78
+ "description": "EN.4.2.2 quality controlled ocean temperature and salinity monthly timeseries from objective analyses with uncertainty estimates using Gouretski and Reseghetti (2010) corrections.",
79
+ "source": "Numerical models: Objective Analysis. In-situ observations: Argo, Arctic Synoptic Basin-wide Oceanography (ASBO) project, Global Temperature and Salinity Profile Programme (GTSPP), and World Ocean Database 2018 (WOD18).",
80
+ "dataset_type": "observation",
81
+ "product_type": "timeseries",
82
+ "product_version": "1.0",
83
+ "institution": "Met Office, UK",
84
+ "citation": "Good, S. A., Martin, M. J., and Rayner, N. A., 2013. EN4: quality controlled ocean temperature and salinity profiles and monthly objective analyses with uncertainty estimates, Journal of Geophysical Research: Oceans, 118, 6704-6716, doi:10.1002/2013JC009067.",
85
+ "references": "Gouretski, V., and Reseghetti, F., 2010: On depth and temperature biases in bathythermograph data: development of a new correction scheme based on analysis of a global ocean database. Deep-Sea Research I, 57, 6. doi:10.1016/j.dsr.2010.03.011.",
86
+ "acknowledgement": "None",
87
+ "license": "EN.4.2.2 data were obtained from https://www.metoffice.gov.uk/hadobs/en4/ and are © Crown Copyright, Met Office, [2026], provided under a Non-Commercial Government Licence http://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/.",
88
+ "doi": "None",
89
+ "platform": "gr",
90
+ "horizontal_grid_type": "regular rectilinear",
91
+ "horizontal_grid_resolution": "1 degree",
92
+ "vertical_grid_type": "z",
93
+ "vertical_grid_coordinate": "depth",
94
+ "vertical_grid_levels": 42,
95
+ "aggregation": "mean",
96
+ "aggregation_frequency": "monthly",
97
+ "status": "ongoing",
98
+ "update_frequency": "quarterly",
99
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
100
+ })
101
+
102
+ # Add ancillary variables:
103
+ ds['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0))
104
+ ds['dx'] = compute_dx(ds)
105
+ ds['dy'] = compute_dy(ds)
106
+ ds['cell_area'] = compute_cell_area(ds)
107
+
108
+ # Custom ancillary variables:
109
+ ds['cell_thickness'] = (ds['depth_bnds'].isel(bnds=1) - ds['depth_bnds'].isel(bnds=0)).isel(time=0)
110
+ ds['cell_volume'] = ds['cell_thickness'] * ds['cell_area']
111
+
112
+ # Update attributes for custom ancillary variables:
113
+ ds['cell_thickness'].attrs.update({
114
+ 'long_name': "Grid-Cell Thickness",
115
+ 'standard_name': "cell_thickness",
116
+ 'units': "m",
117
+ })
118
+ ds['cell_volume'].attrs.update({
119
+ 'long_name': "Grid-Cell Volume",
120
+ 'standard_name': "cell_volume",
121
+ 'units': "m3",
122
+ })
123
+
124
+ # ========== Send to Icechunk Repository ========== #
125
+ bucket = "en4.2.2"
126
+ prefix = "en4.2.2_analysis_g10_monthly"
127
+ exists = False
128
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
129
+ branch = "main"
130
+ commit_message = "Added EN.4.2.2.analysis.g10 monthly (1950-01-1999-12)."
131
+ variable_commits = True
132
+ config_kwargs = {
133
+ "temporary_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/",
134
+ "local_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/"
135
+ }
136
+ cluster_kwargs = {
137
+ "n_workers" : 25,
138
+ "threads_per_worker" : 1,
139
+ "memory_limit":"3GB"
140
+ }
141
+
142
+ # Optimise chunk sizes for spatial analysis:
143
+ ds = ds.chunk({'time': 1, 'depth': 20, 'latitude': 173, 'longitude': 360})
144
+
145
+ # Update variable encodings:
146
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
147
+ for var in list(ds.data_vars) + list(ds.coords):
148
+ ds[var].encoding['compressors'] = [blosccodec]
149
+
150
+ send_to_icechunk(
151
+ file=ds,
152
+ bucket=bucket,
153
+ object_prefix=prefix,
154
+ store_credentials_json=store_credentials_json,
155
+ exists=exists,
156
+ append_dim='time',
157
+ branch=branch,
158
+ commit_message=commit_message,
159
+ variable_commits=variable_commits,
160
+ dask_config_kwargs=config_kwargs,
161
+ dask_cluster_kwargs=cluster_kwargs,
162
+ )
163
+
164
+ if __name__ == "__main__":
165
+ main()
@@ -0,0 +1,161 @@
1
+ # =========================================================
2
+ # update_EN4.2.2_analyses_g10_to_os.py
3
+ #
4
+ # Script to update EN.4.2.2 analyses in Icechunk repository
5
+ # in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import xarray as xr
12
+ import zarr
13
+
14
+ from OceanDataStore.cli import initialise_logging, update_icechunk
15
+ from OceanDataStore.data.utils import (
16
+ compute_cell_area,
17
+ compute_dx,
18
+ compute_dy,
19
+ compute_land_sea_mask,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def main():
26
+ # ========== Initialise OceanDataStore Logging ========== #
27
+ initialise_logging()
28
+
29
+ # ========== Prepare Data ========== #
30
+ # Open EN.4.2.2 analyses dataset:
31
+ filepath = "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/EN.4.2.2.f.analysis.g10.20*.nc"
32
+ ds = xr.open_mfdataset(filepath, combine="by_coords", data_vars="all", engine="netcdf4")
33
+
34
+ # Standardise coordinate dimension names:
35
+ ds = ds.rename({"lon": "longitude", "lat": "latitude"})
36
+
37
+ # Update longitude coordinates to be in the range [-180, 180]:
38
+ ds = ds.assign_coords(
39
+ longitude=((ds["longitude"] + 180) % 360) - 180
40
+ )
41
+ ds = ds.sortby("longitude")
42
+
43
+ # Rename variables to standard names:
44
+ ds = ds.rename({"temperature": "thetao",
45
+ "salinity": "so",
46
+ "temperature_uncertainty": "thetao_uncertainty",
47
+ "salinity_uncertainty": "so_uncertainty",
48
+ "temperature_observation_weights": "thetao_obs_weights",
49
+ "salinity_observation_weights": "so_obs_weights"
50
+ })
51
+
52
+ # Update variable attributes:
53
+ ds["thetao"].attrs.update({
54
+ "long_name": "Potential Temperature",
55
+ })
56
+ ds["so"].attrs.update({
57
+ "long_name": "Practical Salinity",
58
+ })
59
+ ds["thetao_uncertainty"].attrs.update({
60
+ "long_name": "Potential Temperature Error Standard Deviation",
61
+ })
62
+ ds["so_uncertainty"].attrs.update({
63
+ "long_name": "Practical Salinity Error Standard Deviation",
64
+ })
65
+ ds["thetao_obs_weights"].attrs.update({
66
+ "long_name": "Potential Temperature Observation Weights",
67
+ })
68
+ ds["so_obs_weights"].attrs.update({
69
+ "long_name": "Practical Salinity Observation Weights",
70
+ })
71
+
72
+ # Update global attributes:
73
+ ds.attrs.clear()
74
+
75
+ ds = ds.assign_attrs({
76
+ "Conventions": "CF-1.0",
77
+ "title": "EN.4.2.2 ocean temperature and salinity monthly timeseries.",
78
+ "description": "EN.4.2.2 quality controlled ocean temperature and salinity monthly timeseries from objective analyses with uncertainty estimates using Gouretski and Reseghetti (2010) corrections.",
79
+ "source": "Numerical models: Objective Analysis. In-situ observations: Argo, Arctic Synoptic Basin-wide Oceanography (ASBO) project, Global Temperature and Salinity Profile Programme (GTSPP), and World Ocean Database 2018 (WOD18).",
80
+ "dataset_type": "observation",
81
+ "product_type": "timeseries",
82
+ "product_version": "1.0",
83
+ "institution": "Met Office, UK",
84
+ "citation": "Good, S. A., Martin, M. J., and Rayner, N. A., 2013. EN4: quality controlled ocean temperature and salinity profiles and monthly objective analyses with uncertainty estimates, Journal of Geophysical Research: Oceans, 118, 6704-6716, doi:10.1002/2013JC009067.",
85
+ "references": "Gouretski, V., and Reseghetti, F., 2010: On depth and temperature biases in bathythermograph data: development of a new correction scheme based on analysis of a global ocean database. Deep-Sea Research I, 57, 6. doi:10.1016/j.dsr.2010.03.011.",
86
+ "acknowledgement": "None",
87
+ "license": "EN.4.2.2 data were obtained from https://www.metoffice.gov.uk/hadobs/en4/ and are © Crown Copyright, Met Office, [2026], provided under a Non-Commercial Government Licence http://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/.",
88
+ "doi": "None",
89
+ "platform": "gr",
90
+ "horizontal_grid_type": "regular rectilinear",
91
+ "horizontal_grid_resolution": "1 degree",
92
+ "vertical_grid_type": "z",
93
+ "vertical_grid_coordinate": "depth",
94
+ "vertical_grid_levels": 42,
95
+ "aggregation": "mean",
96
+ "aggregation_frequency": "monthly",
97
+ "status": "ongoing",
98
+ "update_frequency": "quarterly",
99
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
100
+ })
101
+
102
+ # Add ancillary variables:
103
+ ds['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0))
104
+ ds['dx'] = compute_dx(ds)
105
+ ds['dy'] = compute_dy(ds)
106
+ ds['cell_area'] = compute_cell_area(ds)
107
+
108
+ # Custom ancillary variables:
109
+ ds['cell_thickness'] = (ds['depth_bnds'].isel(bnds=1) - ds['depth_bnds'].isel(bnds=0)).isel(time=0)
110
+ ds['cell_volume'] = ds['cell_thickness'] * ds['cell_area']
111
+
112
+ # Update attributes for custom ancillary variables:
113
+ ds['cell_thickness'].attrs.update({
114
+ 'long_name': "Grid-Cell Thickness",
115
+ 'standard_name': "cell_thickness",
116
+ 'units': "m",
117
+ })
118
+ ds['cell_volume'].attrs.update({
119
+ 'long_name': "Grid-Cell Volume",
120
+ 'standard_name': "cell_volume",
121
+ 'units': "m3",
122
+ })
123
+
124
+ # ========== Send to Icechunk Repository ========== #
125
+ bucket = "en4.2.2"
126
+ prefix = "en4.2.2_analysis_g10_monthly"
127
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
128
+ branch = "main"
129
+ commit_message = "Added EN.4.2.2.analysis.g10 monthly (2000-01-2026-03)."
130
+ config_kwargs = {
131
+ "temporary_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/",
132
+ "local_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/"
133
+ }
134
+ cluster_kwargs = {
135
+ "n_workers" : 25,
136
+ "threads_per_worker" : 1,
137
+ "memory_limit":"3GB"
138
+ }
139
+
140
+ # Optimise chunk sizes for spatial analysis:
141
+ ds = ds.chunk({'time': 1, 'depth': 20, 'latitude': 173, 'longitude': 360})
142
+
143
+ # Update variable encodings:
144
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
145
+ for var in list(ds.data_vars) + list(ds.coords):
146
+ ds[var].encoding['compressors'] = [blosccodec]
147
+
148
+ update_icechunk(
149
+ file=ds,
150
+ bucket=bucket,
151
+ object_prefix=prefix,
152
+ store_credentials_json=store_credentials_json,
153
+ append_dim='time',
154
+ branch=branch,
155
+ commit_message=commit_message,
156
+ dask_config_kwargs=config_kwargs,
157
+ dask_cluster_kwargs=cluster_kwargs,
158
+ )
159
+
160
+ if __name__ == "__main__":
161
+ main()
@@ -0,0 +1,110 @@
1
+ # =========================================================
2
+ # create_ERA5_daily_climatology.py
3
+ #
4
+ # Script to calculate daily mean, minimum, maximum, and
5
+ # variance for ERA5 sea surface temperature data.
6
+ #
7
+ # Created By: Adam Blaker (atb299@noc.ac.uk)
8
+ # =========================================================
9
+ import numpy as np
10
+ import xarray as xr
11
+ import glob
12
+ import argparse
13
+ import re
14
+ from dask.distributed import Client
15
+
16
+ def extract_year(filename):
17
+ """Extract year from filename like sst_y2011m07.nc"""
18
+ match = re.search(r"y(\d{4})m\d{2}", filename)
19
+ return int(match.group(1)) if match else None
20
+
21
+ def preprocess(ds):
22
+ if "valid_time" in ds.dims:
23
+ ds = ds.rename({"valid_time": "time"})
24
+ return ds
25
+
26
+ def main(start_year, end_year, data_path="./", output="sst_climatology.nc"):
27
+
28
+ client = Client(n_workers=16, threads_per_worker=1)
29
+ print(client, flush=True)
30
+
31
+ # Find all SST files
32
+ files = sorted(glob.glob(f"{data_path}/sst_y????m??_daily.nc"))
33
+
34
+ # print("Files: ", files)
35
+
36
+ # Filter files by year
37
+ selected_files = [
38
+ f for f in files
39
+ if extract_year(f) is not None and start_year <= extract_year(f) <= end_year
40
+ ]
41
+
42
+ if not selected_files:
43
+ raise ValueError("No files found in the specified year range.")
44
+
45
+ print(f"Using {len(selected_files)} files from {start_year} to {end_year}")
46
+
47
+ # Open multiple files
48
+ ds = xr.open_mfdataset(selected_files, preprocess=preprocess, combine="by_coords", parallel=True, chunks={"time": 31, "latitude": 721, "longitude": 360})
49
+ # print("New chunks:", ds["sst"].chunks, flush=True)
50
+
51
+ ds = ds.chunk({
52
+ "time": -1,
53
+ "latitude": 100,
54
+ "longitude": 100
55
+ })
56
+
57
+ # Compute daily climatology (day of year)
58
+ g_sst = ds["sst"].groupby("time.dayofyear") # Group once for readability
59
+
60
+ mean = g_sst.mean("time")
61
+ mean = mean.persist()
62
+
63
+ var = g_sst.var("time")
64
+ var = var.persist()
65
+
66
+ p10 = g_sst.quantile(0.10, dim="time")
67
+ p10 = p10.persist()
68
+
69
+ p90 = g_sst.quantile(0.90, dim="time")
70
+ p90 = p90.persist()
71
+
72
+ minimum = g_sst.min("time")
73
+ minimum = minimum.persist()
74
+
75
+ maximum = g_sst.max("time")
76
+ maximum = maximum.persist()
77
+
78
+ # Build output dataset
79
+ clim = xr.Dataset()
80
+
81
+ clim["sst_mean"] = mean
82
+ clim["sst_variance"] = var
83
+ clim["sst_p10"] = p10.astype(np.float32)
84
+ clim["sst_p90"] = p90.astype(np.float32)
85
+ clim["sst_minimum"] = minimum
86
+ clim["sst_maximum"] = maximum
87
+
88
+
89
+ clim = clim.chunk({
90
+ "dayofyear": 30,
91
+ "latitude": 721,
92
+ "longitude": 1440
93
+ })
94
+
95
+ # Save output
96
+ clim.to_netcdf(output)
97
+
98
+ print(f"Climatology saved to {output}")
99
+
100
+
101
+ if __name__ == "__main__":
102
+ parser = argparse.ArgumentParser(description="Compute SST daily climatology")
103
+ parser.add_argument("start_year", type=int, help="Start year (e.g. 2000)")
104
+ parser.add_argument("end_year", type=int, help="End year (e.g. 2010)")
105
+ parser.add_argument("--data_path", default=".", help="Directory containing SST files")
106
+ parser.add_argument("--output", default="sst_climatology.nc", help="Output file")
107
+
108
+ args = parser.parse_args()
109
+
110
+ main(args.start_year, args.end_year, args.data_path, args.output)
@@ -0,0 +1,69 @@
1
+ # =========================================================
2
+ # create_ERA5_daily_mean.py
3
+ #
4
+ # Script to calculate daily mean, minimum, maximum, and
5
+ # variance for ERA5 sea surface temperature data.
6
+ #
7
+ # Created By: Adam Blaker (atb299@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+ import xarray as xr
11
+ import numpy as np
12
+
13
+ from OceanDataStore.cli import initialise_logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def main(filepath: str, outpath: str) -> None:
19
+ # ========== Initialise OceanDataStore Logging ========== #
20
+ initialise_logging()
21
+
22
+ # ========== Calculate Daily Mean, Min, Max, and Variance ========== #
23
+ logging.info(f"In Progress: Calculating ERA5 SST daily mean, min, max and variance for {year}-{month:02d}...")
24
+ ds = xr.open_dataset(filepath, chunks={"time": -1, "latitude": -1, "longitude": -1})
25
+ logging.info(f"Completed: Read ERA5 Hourly SST data from {filepath}.")
26
+
27
+ # Experimental: see https://confluence.ecmwf.int/pages/viewpage.action?pageId=173385064
28
+ if 'expver' in [i for i in ds.dims]:
29
+ print(f"Dimension 'expver' present in {filepath}")
30
+ ds = ds.reduce(np.nansum,dim='expver')
31
+
32
+ # Catch and rename the time dimension for consistency
33
+ if "valid_time" in ds.dims:
34
+ ds = ds.rename({"valid_time": "time"})
35
+
36
+ ds2 = ds.resample(time='1D').mean()
37
+
38
+ for var in ds.data_vars:
39
+ ds2[var+'_min'] = ds[var].resample(time='1D').min()
40
+ ds2[var+'_max'] = ds[var].resample(time='1D').max()
41
+ ds2[var+'_var'] = ds[var].resample(time='1D').var()
42
+
43
+ vv = [i for i in ds2.data_vars]
44
+ z_chunks={vv[0]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1},
45
+ vv[1]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1},
46
+ vv[2]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1},
47
+ vv[3]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1}
48
+ }
49
+
50
+ logging.info(f"In Progress: Writing ERA5 Daily SST data to {outpath}...")
51
+ ds2.to_netcdf(outpath, encoding=z_chunks)
52
+ logging.info(f"Completed: ERA5 Daily SST data saved to {outpath}.")
53
+
54
+
55
+ if __name__ == "__main__":
56
+ # ====== Inputs ====== #
57
+ # Define year and month:
58
+ year = 2026
59
+ month = 6
60
+
61
+ # Define ERA5[T] source - [original, original_latest]:
62
+ source = "original_latest"
63
+
64
+ # Define path to hourly ERA5 SST data and output path for daily mean, min, max, and variance:
65
+ filepath = f"/dssgfs01/scratch/npd/forcing/ERA5/{source}/{year}/sea_surface_temperature/sea_surface_temperature_{year}-{month:02d}.nc"
66
+ outpath = f"/dssgfs01/scratch/otooth/npd_data/observations/ERA5/daily/sst_y{year}m{month:02d}_daily.nc"
67
+
68
+ # ====== Calculate ERA5 Daily Mean ====== #
69
+ main(filepath, outpath)
@@ -0,0 +1,74 @@
1
+ # =========================================================
2
+ # create_ERA5_monthly_mean.py
3
+ #
4
+ # Script to calculate monthly mean, minimum, maximum, and
5
+ # variance for ERA5 sea surface temperature data.
6
+ #
7
+ # Created By: Adam Blaker (atb299@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+ import xarray as xr
11
+ import numpy as np
12
+
13
+ from OceanDataStore.cli import initialise_logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def main(filepath: str, outpath: str, var_out: str) -> None:
19
+ # ========== Initialise OceanDataStore Logging ========== #
20
+ initialise_logging()
21
+
22
+ # ========== Calculate Monthly Mean, Min, Max, and Variance ========== #
23
+ logging.info(f"In Progress: Calculating ERA5 {var_out} monthly mean, min, max and variance for {year}-{month:02d}...")
24
+ ds = xr.open_dataset(filepath, chunks={"time": -1, "latitude": -1, "longitude": -1})
25
+ logging.info(f"Completed: Read ERA5 Hourly {var_out} data from {filepath}.")
26
+
27
+ # Experimental: see https://confluence.ecmwf.int/pages/viewpage.action?pageId=173385064
28
+ if 'expver' in [i for i in ds.dims]:
29
+ print(f"Dimension 'expver' present in {filepath}")
30
+ ds = ds.reduce(np.nansum,dim='expver')
31
+
32
+ # Catch and rename the time dimension for consistency
33
+ if "valid_time" in ds.dims:
34
+ ds = ds.rename({"valid_time": "time"})
35
+
36
+ ds2 = ds.resample(time='1ME').mean()
37
+
38
+ for var in ds.data_vars:
39
+ ds2[var+'_min'] = ds[var].resample(time='1ME').min()
40
+ ds2[var+'_max'] = ds[var].resample(time='1ME').max()
41
+ ds2[var+'_var'] = ds[var].resample(time='1ME').var()
42
+
43
+ vv = [i for i in ds2.data_vars]
44
+ z_chunks={vv[0]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1},
45
+ vv[1]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1},
46
+ vv[2]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1},
47
+ vv[3]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1}
48
+ }
49
+
50
+ logging.info(f"In Progress: Writing ERA5 Monthly {var_out} data to {outpath}...")
51
+ ds2.to_netcdf(outpath, encoding=z_chunks)
52
+ logging.info(f"Completed: ERA5 Monthly {var_out} data saved to {outpath}.")
53
+
54
+
55
+ if __name__ == "__main__":
56
+ # ====== Inputs ====== #
57
+ # Define year and month:
58
+ year = 2026
59
+ month = 5
60
+
61
+ # Define ERA5 variable:
62
+ var_in = "sea_ice_cover"
63
+ var_out = "siconc"
64
+
65
+ # Define ERA5[T] source - [original, original_latest]:
66
+ # source = "original"
67
+ source = "original_latest"
68
+
69
+ # Define path to hourly ERA5 SST data and output path for monthly mean, min, max, and variance:
70
+ filepath = f"/dssgfs01/scratch/npd/forcing/ERA5/{source}/{year}/{var_in}/{var_in}_{year}-{month:02d}.nc"
71
+ outpath = f"/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/{var_out}_y{year}m{month:02d}_monthly.nc"
72
+
73
+ # ====== Calculate ERA5 Monthly Mean ====== #
74
+ main(filepath, outpath, var_out)
@@ -0,0 +1,54 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=era5_daily_climatology
3
+ #SBATCH --time=12:00:00
4
+ #SBATCH --partition=compute
5
+ #SBATCH --nodes=1
6
+ #SBATCH --mem=0
7
+ #SBATCH --exclusive
8
+ ##SBATCH --mem-per-cpu=4G
9
+ ##SBATCH --ntasks-per-node=64
10
+ ##SBATCH --ntasks-per-socket=32
11
+ ##SBATCH --ntasks-per-core=1
12
+
13
+ module load NEMO/prg-env
14
+ #============================
15
+ export I_MPI_SHM=icx
16
+
17
+ source /dssgfs01/working/atb299/miniforge3/bin/activate
18
+ conda activate Sci
19
+
20
+ # ==============================================================
21
+ # run_create_ERA5_daily_climatology.slurm
22
+ #
23
+ # Description: SLURM script to create the ERA5 daily
24
+ # climatology datasets.
25
+ #
26
+ # Created By: Adam Blaker (atb299@noc.ac.uk)
27
+ # Created On: 2026-06-25
28
+ #
29
+ # ==============================================================
30
+
31
+ #============================
32
+ TIME1=`date +%s`
33
+
34
+ dpath="/dssgfs01/scratch/atb299/ERA5_daily/ERA5_daily_fields/"
35
+
36
+ Ystart=1996
37
+ Yend=$((Ystart+29))
38
+
39
+ ofile="/dssgfs01/scratch/otooth/npd_data/observations/ERA5/climatology/ERA5_sst_climatology_${Ystart}-${Yend}.nc"
40
+
41
+ echo "Start year : " $Ystart
42
+ echo "End year : " $Yend
43
+ echo "Output file : " $ofile
44
+
45
+ python create_ERA5_daily_climatology.py $Ystart $Yend --data_path $dpath --output $ofile
46
+
47
+ #============================
48
+ # Job timing
49
+
50
+ TIME2=`date +%s`
51
+ DIFFSEC=`expr ${TIME2} - ${TIME1}`
52
+ echo Took ${DIFFSEC} seconds.
53
+ echo Took `date +%H:%M:%S -ud @${DIFFSEC}`
54
+
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=era5_daily_climatology
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=02:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_ERA5_daily_climatology_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the ERA5 daily
14
+ # climatology datasets to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-25
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send ERA5 daily climatology datasets to JASMIN OS -- #
28
+ echo "In Progress: Sending ERA5 daily climatology to Icechunk..."
29
+
30
+ python3 send_ERA5_daily_climatology_to_os.py
31
+
32
+ echo "Completed: Sent ERA5 daily climatology to Icechunk."