OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,143 @@
1
+ # =========================================================
2
+ # update_EN4.2.2_analyses_g10_to_os.py
3
+ #
4
+ # Script to update EN.4.2.2 analyses in Icechunk repository
5
+ # in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import xarray as xr
12
+ import zarr
13
+
14
+ from OceanDataStore.cli import initialise_logging, update_icechunk
15
+ from OceanDataStore.data.utils import (
16
+ compute_dx,
17
+ compute_dy,
18
+ compute_cell_area,
19
+ compute_cell_thickness,
20
+ compute_land_sea_mask,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def main():
27
+ # ========== Initialise OceanDataStore Logging ========== #
28
+ initialise_logging()
29
+
30
+ # ========== Prepare Data ========== #
31
+ # Open complete ARMOR3D REP monthly climatology dataset:
32
+ filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ARMOR3D/armor-3d_rep_monthly_NA_*.zarr"
33
+ ds = xr.open_mfdataset(filepath, compat="no_conflicts", data_vars="all", engine="zarr")
34
+ logging.info("-> Completed: Opened ARMOR-3D REP monthly climatology dataset from Zarr stores.")
35
+
36
+ # Rename variables to standard names:
37
+ ds = ds.rename({"to": "thetao",
38
+ "so": "so",
39
+ })
40
+
41
+ # Update global attributes:
42
+ ds.attrs.clear()
43
+
44
+ ds = ds.assign_attrs({
45
+ "Conventions": "CF-1.0",
46
+ "title": "Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD.",
47
+ "description": "Multi Observation Global Ocean ARMOR3D multi-year reprocessed temperature salinity, sea surface height, geostrophic current and mixed layer depth monthly timeseries on 1/8 degree regular grid and 50 depth levels.",
48
+ "source": "Numerical models: Multiple Linear Regression, Optimal Interpolation. In-situ observations: Copernicus In Situ TAC (including Argo, XBT, CTD and moorings) Copernicus Sea Level TAC, CNES-CLS22 Mean Dynamic Topography, OSTIA Sea Surface Temperature Analysis, Copernicus MOB TAC (Sea Surface Salinity), and World Ocean Atlas 2018 (WOA18).",
49
+ "dataset_type": "observation",
50
+ "product_type": "timeseries",
51
+ "product_version": "2.0",
52
+ "institution": "Copernicus Marine Service, Mercator Ocean International, France",
53
+ "citation": "Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD. E.U. Copernicus Marine Service Information (CMEMS). Marine Data Store (MDS). DOI: 10.48670/moi-00052 (Accessed on 21 04 2026).",
54
+ "references": "Guinehut S., A.-L. Dhomps, G. Larnicol and P.-Y. Le Traon, 2012: High resolution 3D temperature and salinity fields derived from in situ and satellite observations. Ocean Sci., 8(5):845-857. Mulet, S., M.-H. Rio, A. Mignot, S. Guinehut and R. Morrow, 2012: A new estimate of the global 3D geostrophic ocean circulation based on satellite data and in-situ measurements. Deep Sea Research Part II : Topical Studies in Oceanography, 77-80(0):70-81.",
55
+ "acknowledgement": "Generated using E.U. Copernicus Marine Service Information; https://doi.org/10.48670/moi-00052.",
56
+ "license": "ARMOR3D data were obtained from https://doi.org/10.48670/moi-00052, and are provided under the Copernicus Marine Environment Monitoring Service Service Level Agreement (SLA) https://marine.copernicus.eu/user-corner/service-commitments-and-licence?pk_vid=42ac3e352be888641780994034c3bb6e",
57
+ "doi": "10.48670/moi-00052",
58
+ "platform": "gr",
59
+ "horizontal_grid_type": "regular rectilinear",
60
+ "horizontal_grid_resolution": "0.125 degree",
61
+ "vertical_grid_type": "z",
62
+ "vertical_grid_coordinate": "depth",
63
+ "vertical_grid_levels": 50,
64
+ "aggregation": "mean",
65
+ "aggregation_frequency": "monthly",
66
+ "status": "ongoing",
67
+ "update_frequency": "quarterly",
68
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
69
+ })
70
+
71
+ # Add ancillary variables:
72
+ ds['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0))
73
+ ds['dx'] = compute_dx(ds)
74
+ ds['dy'] = compute_dy(ds)
75
+ ds['cell_area'] = compute_cell_area(ds)
76
+ # Custom ancillary variables:
77
+ ds['cell_thickness'] = compute_cell_thickness(ds)
78
+ ds['cell_volume'] = ds['cell_thickness'] * ds['cell_area']
79
+
80
+ # Update attributes for custom ancillary variables:
81
+ ds['cell_volume'].attrs.update({
82
+ 'long_name': "Grid-Cell Volume",
83
+ 'standard_name': "cell_volume",
84
+ 'units': "m3",
85
+ })
86
+
87
+ # ========== Send to Icechunk Repository ========== #
88
+ bucket = "armor3d"
89
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
90
+ branch = "main"
91
+ config_kwargs = {
92
+ "temporary_directory":".../OceanDataStore/OceanDataStore/data/ARMOR3D/",
93
+ "local_directory":".../OceanDataStore/OceanDataStore/data/ARMOR3D/"
94
+ }
95
+ cluster_kwargs = {
96
+ "n_workers" : 25,
97
+ "threads_per_worker" : 1,
98
+ "memory_limit":"4GB"
99
+ }
100
+
101
+ # Optimise chunk sizes for spatial analysis:
102
+ for var in ds.data_vars:
103
+ if ds[var].ndim == 4:
104
+ ds[var] = ds[var].chunk({'time': 1, 'depth': 3, 'latitude': 689, 'longitude': 1440})
105
+ ds[var].encoding['chunks'] = (1, 3, 689, 1440)
106
+ elif ds[var].ndim == 3:
107
+ if "time" in ds[var].dims:
108
+ ds[var] = ds[var].chunk({'time': 1, 'latitude': 1378, 'longitude': 2880})
109
+ ds[var].encoding['chunks'] = (1, 1378, 2880)
110
+ elif "depth" in ds[var].dims:
111
+ ds[var] = ds[var].chunk({'depth': 10, 'latitude': 1378, 'longitude': 2880})
112
+ ds[var].encoding['chunks'] = (10, 1378, 2880)
113
+ elif (ds[var].ndim == 2):
114
+ if "latitude" in ds[var].dims and "longitude" in ds[var].dims:
115
+ ds[var] = ds[var].chunk({'latitude': 1378, 'longitude': 2880})
116
+ ds[var].encoding['chunks'] = (1378, 2880)
117
+ elif ds[var].ndim == 1:
118
+ ds[var] = ds[var].chunk({'depth': 50})
119
+ ds[var].encoding['chunks'] = (50,)
120
+
121
+ # Update variable encodings:
122
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=5, shuffle=zarr.codecs.BloscShuffle.shuffle)
123
+ for var in list(ds.data_vars) + list(ds.coords):
124
+ ds[var].encoding['compressors'] = [blosccodec]
125
+
126
+ # Define prefix and commit message based on period:
127
+ prefix = "armor3d_global_my_monthly"
128
+ commit_message = "Added ARMOR3D Global Monthly (2000-2024)."
129
+
130
+ update_icechunk(
131
+ file=ds.sel(time=slice("2000-01-01", "2024-12-31")),
132
+ bucket=bucket,
133
+ object_prefix=prefix,
134
+ store_credentials_json=store_credentials_json,
135
+ append_dim='time',
136
+ branch=branch,
137
+ commit_message=commit_message,
138
+ dask_config_kwargs=config_kwargs,
139
+ dask_cluster_kwargs=cluster_kwargs,
140
+ )
141
+
142
+ if __name__ == "__main__":
143
+ main()
@@ -0,0 +1,162 @@
1
+ # =========================================================
2
+ # create_EN4.2.2_analysis_g10_climatology.py
3
+ #
4
+ # Script to create EN.4.2.2 analyses climatologies for
5
+ # climate normals (1971-2000, 1981-2010, 1991-2020) and
6
+ # write to local netCDF files.
7
+ #
8
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
9
+ # =========================================================
10
+ import logging
11
+
12
+ import numpy as np
13
+ import xarray as xr
14
+ import zarr
15
+
16
+ from OceanDataStore.cli import initialise_logging
17
+ from OceanDataStore.data.utils import (
18
+ compute_cell_area,
19
+ compute_dx,
20
+ compute_dy,
21
+ compute_land_sea_mask,
22
+ )
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ def main():
27
+ # ========== Initialize Logging and Print Banner ========== #
28
+ initialise_logging()
29
+
30
+ # ========== Prepare Data ========== #
31
+ # Open complete EN.4.2.2 analysis dataset:
32
+ filepath = "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/EN.4.2.2.f.analysis.g10.*.nc"
33
+ ds = xr.open_mfdataset(filepath, combine="by_coords", data_vars="all", engine="netcdf4")
34
+ logging.info("-> Completed: Opened EN.4.2.2 analysis dataset from netCDF files.")
35
+
36
+ # Standardise coordinate dimension names:
37
+ ds = ds.rename({"lon": "longitude", "lat": "latitude"})
38
+
39
+ # Update longitude coordinates to be in the range [-180, 180]:
40
+ ds = ds.assign_coords(
41
+ longitude=((ds["longitude"] + 180) % 360) - 180
42
+ )
43
+ ds = ds.sortby("longitude")
44
+
45
+ # Rename variables to standard names:
46
+ ds = ds.rename({"temperature": "thetao",
47
+ "salinity": "so",
48
+ "temperature_uncertainty": "thetao_uncertainty",
49
+ "salinity_uncertainty": "so_uncertainty",
50
+ "temperature_observation_weights": "thetao_obs_weights",
51
+ "salinity_observation_weights": "so_obs_weights"
52
+ })
53
+
54
+ # Update variable attributes:
55
+ ds["thetao"].attrs.update({
56
+ "long_name": "Potential Temperature",
57
+ })
58
+ ds["so"].attrs.update({
59
+ "long_name": "Practical Salinity",
60
+ })
61
+ ds["thetao_uncertainty"].attrs.update({
62
+ "long_name": "Potential Temperature Error Standard Deviation",
63
+ })
64
+ ds["so_uncertainty"].attrs.update({
65
+ "long_name": "Practical Salinity Error Standard Deviation",
66
+ })
67
+ ds["thetao_obs_weights"].attrs.update({
68
+ "long_name": "Potential Temperature Observation Weights",
69
+ })
70
+ ds["so_obs_weights"].attrs.update({
71
+ "long_name": "Practical Salinity Observation Weights",
72
+ })
73
+
74
+ # Update global attributes:
75
+ ds.attrs.clear()
76
+
77
+ ds = ds.assign_attrs({
78
+ "Conventions": "CF-1.0",
79
+ "title": "EN.4.2.2 ocean temperature and salinity monthly climatology.",
80
+ "description": "EN.4.2.2 quality controlled ocean temperature and salinity monthly climatology from objective analyses with uncertainty estimates using Gouretski and Reseghetti (2010) corrections.",
81
+ "source": "Numerical models: Objective Analysis. In-situ observations: Argo, Arctic Synoptic Basin-wide Oceanography (ASBO) project, Global Temperature and Salinity Profile Programme (GTSPP), and World Ocean Database 2018 (WOD18).",
82
+ "dataset_type": "observation",
83
+ "product_type": "climatology",
84
+ "product_version": "1.0",
85
+ "institution": "Met Office, UK",
86
+ "citation": "Good, S. A., Martin, M. J., and Rayner, N. A., 2013. EN4: quality controlled ocean temperature and salinity profiles and monthly objective analyses with uncertainty estimates, Journal of Geophysical Research: Oceans, 118, 6704-6716, doi:10.1002/2013JC009067.",
87
+ "references": "Gouretski, V., and Reseghetti, F., 2010: On depth and temperature biases in bathythermograph data: development of a new correction scheme based on analysis of a global ocean database. Deep-Sea Research I, 57, 6. doi:10.1016/j.dsr.2010.03.011.",
88
+ "acknowledgement": "None",
89
+ "license": "EN.4.2.2 data were obtained from https://www.metoffice.gov.uk/hadobs/en4/ and are © Crown Copyright, Met Office, [2026], provided under a Non-Commercial Government Licence http://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/.",
90
+ "doi": "None",
91
+ "platform": "gr",
92
+ "horizontal_grid_type": "regular rectilinear",
93
+ "horizontal_grid_resolution": "1 degree",
94
+ "vertical_grid_type": "z",
95
+ "vertical_grid_coordinate": "depth",
96
+ "vertical_grid_levels": 42,
97
+ "aggregation": "mean",
98
+ "aggregation_frequency": "monthly",
99
+ "status": "completed",
100
+ "update_frequency": "None",
101
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
102
+ })
103
+
104
+ logging.info("-> Completed: Updated EN.4.2.2 analysis CF-metadata.")
105
+
106
+ # -- Calculate climate normal monthly climatologies -- #
107
+ output_dir = "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/climatology/"
108
+ start_years = [1971, 1981, 1991]
109
+ end_years = [2000, 2010, 2020]
110
+
111
+ for start_year, end_year in zip(start_years, end_years):
112
+ logging.info(f"In Progress: Calculating monthly climatology for {start_year}-{end_year} climate normal period...")
113
+ # Calculate monthly climatology for the specified period:
114
+ ds_climatology = ds.sel(time=slice(f'{start_year}-01', f'{end_year}-12')).groupby('time.month').mean()
115
+
116
+ # Add ancillary variables:
117
+ ds_climatology['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0))
118
+ ds_climatology['dx'] = compute_dx(ds)
119
+ ds_climatology['dy'] = compute_dy(ds)
120
+ ds_climatology['cell_area'] = compute_cell_area(ds)
121
+ # Custom ancillary variables:
122
+ ds_climatology['cell_thickness'] = (ds_climatology['depth_bnds'].isel(bnds=1) - ds_climatology['depth_bnds'].isel(bnds=0)).isel(month=0)
123
+ ds_climatology['cell_volume'] = ds_climatology['cell_thickness'] * ds_climatology['cell_area']
124
+
125
+ # Update attributes for custom ancillary variables:
126
+ ds_climatology['cell_thickness'].attrs.update({
127
+ 'long_name': "Grid-Cell Thickness",
128
+ 'standard_name': "cell_thickness",
129
+ 'units': "m",
130
+ })
131
+ ds_climatology['cell_volume'].attrs.update({
132
+ 'long_name': "Grid-Cell Volume",
133
+ 'standard_name': "cell_volume",
134
+ 'units': "m3",
135
+ })
136
+
137
+ # Update time bounds to reflect climatological period:
138
+ ds_climatology['time_bnds'][:, 0] = np.arange(f'{start_year}-01', f'{start_year+1}-01', dtype='datetime64[M]')
139
+ ds_climatology['time_bnds'][:, 1] = np.arange(f'{end_year}-01', f'{end_year+1}-01', dtype='datetime64[M]')
140
+ logging.info(f"-> Completed: Calculated monthly climatology for {start_year}-{end_year} climate normal period.")
141
+
142
+ # Update title attribute to reflect climatological period:
143
+ ds_climatology.attrs['title'] = f"EN.4.2.2 ocean temperature and salinity monthly climatology ({start_year}-{end_year})."
144
+
145
+ ds_climatology.attrs['description'] = f"EN.4.2.2: quality controlled ocean temperature and salinity monthly climatology ({start_year}-{end_year}) from objective analyses with uncertainty estimates using Gouretski and Reseghetti (2010) corrections."
146
+
147
+ # Update variable encodings:
148
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
149
+ for var in list(ds_climatology.data_vars) + list(ds_climatology.coords):
150
+ ds_climatology[var].encoding['compressors'] = [blosccodec]
151
+
152
+ # Write monthly climatology to netCDF:
153
+ output_filepath = f"{output_dir}EN.4.2.2.f.analysis.g10.{start_year}_{end_year}_monthly_climatology.nc"
154
+ ds_climatology.to_netcdf(output_filepath)
155
+ logging.info(f"-> Completed: Saved monthly climatology for {start_year}-{end_year} climate normal period to {output_filepath}.")
156
+
157
+ # -- Close EN.4.2.2 analysis datasets -- #
158
+ ds_climatology.close()
159
+ ds.close()
160
+
161
+ if __name__ == "__main__":
162
+ main()
@@ -0,0 +1,51 @@
1
+ #!/bin/bash
2
+
3
+ # ----------------------------------------------------------------
4
+ # download_EN4.2.2_analyses_g10_data.sh
5
+ #
6
+ # Description: Download the EN.4.2.2 analyses.g10 dataset from the
7
+ # Met Office Hadley Centre EN.4.2.2 website:
8
+ # http://www.metoffice.gov.uk/hadobs/en4
9
+ #
10
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
11
+ # Created On: 2026-05-27
12
+ # ----------------------------------------------------------------
13
+ set -euo pipefail
14
+
15
+ # --- Inputs --- #
16
+ output_dir="/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/"
17
+
18
+ # -- Defaults -- #
19
+ base_url="http://www.metoffice.gov.uk/hadobs/en4/data/en4-2-1"
20
+
21
+ # --- Main Script --- #
22
+ echo "==================================================="
23
+ echo " Downloading EN.4.2.2 Analyses"
24
+ echo " v0.1.0"
25
+ echo " Oliver J. Tooth, NOC"
26
+ echo "==================================================="
27
+ echo "In Progress: Downloading EN.4.2.2 analyses dataset..."
28
+ # Iterate over years:
29
+ for yr in {1990..2026}; do
30
+ # Construct URL for current year:
31
+ if [ $yr -ge 2021 ]; then
32
+ url="$base_url/EN.4.2.2.analyses.g10.${yr}.zip"
33
+ else
34
+ url="$base_url/EN.4.2.2/EN.4.2.2.analyses.g10.${yr}.zip"
35
+ fi
36
+
37
+ # Download and unzip file if not in output directory:
38
+ nc_files=("${output_dir}/EN.4.2.2.f.analysis.g10.${yr}"*.nc)
39
+ filepath="$output_dir/$(basename $url)"
40
+ if [ ${#nc_files[@]} -ne 12 ]; then
41
+ wget -P $output_dir $url
42
+ echo "-> Completed: Downloaded $filepath."
43
+
44
+ unzip "$filepath" -d $output_dir
45
+ echo "-> Completed: Unzipped $filepath."
46
+ else
47
+ echo "-> Skipping Download: NetCDF files for ${yr} already exist in $output_dir."
48
+ fi
49
+ done
50
+
51
+ echo "======================================="
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=en4.2.2_analysis_g10_climatology
3
+ #SBATCH --partition=test
4
+ #SBATCH --time=00:20:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the EN.4.2.2 analysis g10
14
+ # climatology datasets to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-05-29
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source .../miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send EN.4.2.2.analysis.g10 climatology datasets to JASMIN OS -- #
28
+ echo "In Progress: Sending EN.4.2.2.analysis.g10 climatology to Icechunk..."
29
+
30
+ python3 send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py
31
+
32
+ echo "Completed: Sent EN.4.2.2.analysis.g10 climatology to Icechunk."
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=en4.2.2_analysis_g10_monthly
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=01:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the EN.4.2.2 analysis g10
14
+ # monthly datasets to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-05-29
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source .../miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send EN.4.2.2.analysis.g10 monthly to JASMIN OS -- #
28
+ echo "In Progress: Sending EN.4.2.2.analysis.g10 monthly to Icechunk..."
29
+
30
+ python3 send_EN4.2.2_analysis_g10_monthly_to_os.py
31
+
32
+ echo "Completed: Sent EN.4.2.2.analysis.g10 monthly to Icechunk."
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=en4.2.2_analysis_g10_monthly
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=01:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm
12
+ #
13
+ # Description: SLURM script to update the EN.4.2.2 analysis g10
14
+ # monthly datasets in Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-05-29
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source .../miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Update EN.4.2.2.analysis.g10 monthly in JASMIN OS -- #
28
+ echo "In Progress: Updating EN.4.2.2.analysis.g10 monthly in Icechunk..."
29
+
30
+ python3 update_EN4.2.2_analysis_g10_monthly_to_os.py
31
+
32
+ echo "Completed: Updated EN.4.2.2.analysis.g10 monthly in Icechunk."
@@ -0,0 +1,76 @@
1
+ # =========================================================
2
+ # send_EN4.2.2_analysis_g10_climatology_to_os.py
3
+ #
4
+ # Script to write EN.4.2.2 analysis climatologies to
5
+ # Icechunk repositories in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import xarray as xr
12
+
13
+ from OceanDataStore.cli import send_to_icechunk, initialise_logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def main():
19
+ # ========== Initialise OceanDataStore Logging ========== #
20
+ initialise_logging()
21
+
22
+ # ========== Prepare Data ========== #
23
+ # Open EN.4.2.2 analysis climatology datasets:
24
+ filepaths = [
25
+ "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/climatology/EN.4.2.2.f.analysis.g10.1971_2000_monthly_climatology.nc",
26
+ "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/climatology/EN.4.2.2.f.analysis.g10.1981_2010_monthly_climatology.nc",
27
+ "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/climatology/EN.4.2.2.f.analysis.g10.1991_2020_monthly_climatology.nc"
28
+ ]
29
+
30
+ # Define start & end years of climatology periods:
31
+ start_years = [1971, 1981, 1991]
32
+ end_years = [2000, 2010, 2020]
33
+
34
+ # ========== Send to Icechunk Repository ========== #
35
+ bucket = "en4.2.2"
36
+ exists = False
37
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
38
+ branch = "main"
39
+ variable_commits = True
40
+ config_kwargs = {
41
+ "temporary_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/",
42
+ "local_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/"
43
+ }
44
+ cluster_kwargs = {
45
+ "n_workers" : 10,
46
+ "threads_per_worker" : 1,
47
+ "memory_limit":"3GB"
48
+ }
49
+
50
+ for filepath, start_yr, end_yr in zip(filepaths, start_years, end_years):
51
+ # Open EN.4.2.2 analysis climatology dataset:
52
+ ds = xr.open_dataset(filepath, engine='netcdf4')
53
+
54
+ # Optimise chunk sizes for spatial analysis:
55
+ ds = ds.chunk({'month': 1, 'depth': 20, 'latitude': 173, 'longitude': 360})
56
+
57
+ # Define prefix and commit message based on climatology period:
58
+ prefix = f"en4.2.2_analysis_g10_{start_yr}_{end_yr}_monthly_climatology"
59
+ commit_message = f"Added EN.4.2.2.analysis.g10 climatology ({start_yr}-{end_yr})."
60
+
61
+ send_to_icechunk(
62
+ file=ds,
63
+ bucket=bucket,
64
+ object_prefix=prefix,
65
+ store_credentials_json=store_credentials_json,
66
+ exists=exists,
67
+ append_dim='month',
68
+ branch=branch,
69
+ commit_message=commit_message,
70
+ variable_commits=variable_commits,
71
+ dask_config_kwargs=config_kwargs,
72
+ dask_cluster_kwargs=cluster_kwargs,
73
+ )
74
+
75
+ if __name__ == "__main__":
76
+ main()