OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,141 @@
1
+ # =========================================================
2
+ # send_ERA5_daily_to_os.py
3
+ #
4
+ # Script to write ERA5 daily data to Icechunk repositories
5
+ # in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+ from pathlib import Path
11
+
12
+ import xarray as xr
13
+ import zarr
14
+
15
+ from OceanDataStore.cli import initialise_logging, update_icechunk
16
+ from OceanDataStore.data.utils import (
17
+ compute_cell_area,
18
+ compute_dx,
19
+ compute_dy,
20
+ compute_land_sea_mask,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def main():
27
+ # ========== Initialise OceanDataStore Logging ========== #
28
+ initialise_logging()
29
+
30
+ # ========== Send to Icechunk Repository ========== #
31
+ bucket = "era5"
32
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
33
+ branch = "main"
34
+
35
+ logging.info("In Progress: Sending ERA5 daily data to Icechunk...")
36
+ # Open ERA5 dataset:
37
+ filepath = []
38
+ base = Path("/dssgfs01/scratch/otooth/npd_data/observations/ERA5/daily")
39
+ for year in range(2026, 2027):
40
+ filepath.extend(sorted(base.glob(f"sst_y{year}m??_daily.nc")))
41
+ ds = xr.open_mfdataset(filepath,
42
+ combine="by_coords",
43
+ data_vars="all",
44
+ engine="h5netcdf",
45
+ chunks={"time": -1, "latitude": -1, "longitude": -1}
46
+ )
47
+
48
+ # Update longitude coordinates to be in the range [-180, 180]:
49
+ ds = ds.assign_coords(
50
+ longitude=((ds["longitude"] + 180) % 360) - 180
51
+ )
52
+ ds = ds.sortby("longitude")
53
+
54
+ # Update variable names, units, and attributes:
55
+ if "number" in ds.data_vars:
56
+ ds = ds.drop_vars(["number"])
57
+ for var in ds.data_vars:
58
+ if "sst" in var:
59
+ # Transform units degK -> degC:
60
+ ds[var] = ds[var] - 273.15
61
+ # Add standard names and units:
62
+ ds[var].attrs["standard_name"] = "sea_surface_temperature"
63
+ ds[var].attrs["units"] = "degC"
64
+ # Rename variables to standard names:
65
+ ds = ds.rename({var: var.replace("sst", "tos")})
66
+
67
+ # Update variable long names:
68
+ ds["tos"].attrs["long_name"] = "Daily Mean Sea Surface Temperature"
69
+ ds["tos_var"].attrs["long_name"] = "Daily Variance Sea Surface Temperature"
70
+ ds["tos_min"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature"
71
+ ds["tos_max"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature"
72
+
73
+ # Add ancillary variables:
74
+ ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0))
75
+ ds['dx'] = compute_dx(ds)
76
+ ds['dy'] = compute_dy(ds)
77
+ ds['cell_area'] = compute_cell_area(ds)
78
+
79
+ # Update global attributes:
80
+ ds.attrs.clear()
81
+ ds = ds.assign_attrs({
82
+ "Conventions": "CF-1.7",
83
+ "title": "ERA5 Sea Surface Daily Timeseries",
84
+ "description": "ERA5 daily sea surface temperature timeseries.",
85
+ "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.",
86
+ "dataset_type": "reanalysis",
87
+ "product_type": "timeseries",
88
+ "product_version": "1.0",
89
+ "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)",
90
+ "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).",
91
+ "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.",
92
+ "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.",
93
+ "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/",
94
+ "doi": "10.24381/cds.adbb2d47",
95
+ "platform": "gr",
96
+ "horizontal_grid_type": "regular rectilinear",
97
+ "horizontal_grid_resolution": "31 km",
98
+ "aggregation": "mean, variance, min, max",
99
+ "aggregation_frequency": "daily",
100
+ "status": "completed",
101
+ "update_frequency": "None",
102
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
103
+ })
104
+
105
+ # Optimise chunk sizes for time-series analysis:
106
+ ds = ds.chunk({'time': ds['time'].size, 'latitude': 50, 'longitude': 50})
107
+
108
+ # Update variable encodings:
109
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
110
+ for var in list(ds.data_vars) + list(ds.coords):
111
+ ds[var].encoding['compressors'] = [blosccodec]
112
+
113
+ # Define prefix and commit message based on climatology period:
114
+ prefix = "era5_daily_timeseries"
115
+ commit_message = "Added ERA5 Sea Surface Daily Timeseries (2026-01-2026-06)."
116
+
117
+ # Dask LocalCluster configuration:
118
+ config_kwargs = {
119
+ "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/",
120
+ "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/"
121
+ }
122
+ cluster_kwargs = {
123
+ "n_workers" : 20,
124
+ "threads_per_worker" : 1,
125
+ "memory_limit":"5GB"
126
+ }
127
+
128
+ update_icechunk(
129
+ file=ds,
130
+ bucket=bucket,
131
+ object_prefix=prefix,
132
+ store_credentials_json=store_credentials_json,
133
+ append_dim='time',
134
+ branch=branch,
135
+ commit_message=commit_message,
136
+ dask_config_kwargs=config_kwargs,
137
+ dask_cluster_kwargs=cluster_kwargs,
138
+ )
139
+
140
+ if __name__ == "__main__":
141
+ main()
@@ -0,0 +1,169 @@
1
+ # =========================================================
2
+ # update_ERA5_monthly_to_os.py
3
+ #
4
+ # Script to update ERA5 monthly data in Icechunk repositories
5
+ # in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import xarray as xr
12
+ import zarr
13
+
14
+ from OceanDataStore.cli import initialise_logging, update_icechunk
15
+ from OceanDataStore.data.utils import (
16
+ compute_land_sea_mask,
17
+ compute_cell_area,
18
+ compute_dx,
19
+ compute_dy,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def main():
26
+ # ========== Initialise OceanDataStore Logging ========== #
27
+ initialise_logging()
28
+
29
+ # ========== Update Icechunk Repository ========== #
30
+ bucket = "era5"
31
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
32
+ branch = "main"
33
+
34
+ logging.info("In Progress: Updating ERA5 monthly data in Icechunk...")
35
+ # Open ERA5 dataset:
36
+ filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/sst_y20??m??_monthly.nc"
37
+ ds_sst = xr.open_mfdataset(filepath,
38
+ combine="by_coords",
39
+ data_vars="all",
40
+ engine="h5netcdf",
41
+ chunks={"time": -1, "latitude": -1, "longitude": -1}
42
+ )
43
+
44
+ filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/siconc_y20??m??_monthly.nc"
45
+ ds_si = xr.open_mfdataset(filepath,
46
+ combine="by_coords",
47
+ data_vars="all",
48
+ engine="h5netcdf",
49
+ chunks={"time": -1, "latitude": -1, "longitude": -1}
50
+ )
51
+
52
+ # SST: Update longitude coordinates to be in the range [-180, 180]:
53
+ ds_sst = ds_sst.assign_coords(
54
+ longitude=((ds_sst["longitude"] + 180) % 360) - 180
55
+ )
56
+ ds_sst = ds_sst.sortby("longitude")
57
+
58
+ # SIC: Update longitude coordinates to be in the range [-180, 180]:
59
+ ds_si = ds_si.assign_coords(
60
+ longitude=((ds_si["longitude"] + 180) % 360) - 180
61
+ )
62
+ ds_si = ds_si.sortby("longitude")
63
+
64
+ # SST: Update variable names, units, and attributes:
65
+ if "number" in ds_sst.data_vars:
66
+ ds_sst = ds_sst.drop_vars(["number"])
67
+ for var in ds_sst.data_vars:
68
+ if "sst" in var:
69
+ # Transform units degK -> degC:
70
+ ds_sst[var] = ds_sst[var] - 273.15
71
+ # Add standard names and units:
72
+ ds_sst[var].attrs["standard_name"] = "sea_surface_temperature"
73
+ ds_sst[var].attrs["units"] = "degC"
74
+ # Rename variables to standard names:
75
+ ds_sst = ds_sst.rename({var: var.replace("sst", "tos")})
76
+
77
+ # SIC: Update variable names, units, and attributes:
78
+ if "number" in ds_si.data_vars:
79
+ ds_si = ds_si.drop_vars(["number"])
80
+ for var in ds_si.data_vars:
81
+ if "siconc" in var:
82
+ # Add standard names and units:
83
+ ds_si[var].attrs["standard_name"] = "sea_ice_area_fraction"
84
+ ds_si[var].attrs["units"] = "1"
85
+
86
+ # SST: Update variable long names:
87
+ ds_sst["tos"].attrs["long_name"] = "Daily Mean Sea Surface Temperature"
88
+ ds_sst["tos_var"].attrs["long_name"] = "Daily Variance Sea Surface Temperature"
89
+ ds_sst["tos_min"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature"
90
+ ds_sst["tos_max"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature"
91
+
92
+ # SIC: Update variable long names:
93
+ ds_si["siconc"].attrs["long_name"] = "Daily Mean Sea Ice Area Fraction"
94
+ ds_si["siconc_var"].attrs["long_name"] = "Daily Variance Sea Ice Area Fraction"
95
+ ds_si["siconc_min"].attrs["long_name"] = "Daily Minimum Sea Ice Area Fraction"
96
+ ds_si["siconc_max"].attrs["long_name"] = "Daily Maximum Sea Ice Area Fraction"
97
+
98
+ # Merge SST and SIC datasets:
99
+ ds = xr.merge([ds_sst, ds_si], compat="override", join="override")
100
+
101
+ # Add ancillary variables:
102
+ ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0))
103
+ ds['dx'] = compute_dx(ds)
104
+ ds['dy'] = compute_dy(ds)
105
+ ds['cell_area'] = compute_cell_area(ds)
106
+
107
+ # Update global attributes:
108
+ ds.attrs.clear()
109
+ ds = ds.assign_attrs({
110
+ "Conventions": "CF-1.7",
111
+ "title": "ERA5 Sea Surface Monthly Timeseries",
112
+ "description": "ERA5 monthly sea surface temperature and sea ice area fraction timeseries.",
113
+ "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.",
114
+ "dataset_type": "reanalysis",
115
+ "product_type": "timeseries",
116
+ "product_version": "1.0",
117
+ "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)",
118
+ "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).",
119
+ "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.",
120
+ "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.",
121
+ "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/",
122
+ "doi": "10.24381/cds.adbb2d47",
123
+ "platform": "gr",
124
+ "horizontal_grid_type": "regular rectilinear",
125
+ "horizontal_grid_resolution": "31 km",
126
+ "aggregation": "mean, variance, min, max",
127
+ "aggregation_frequency": "monthly",
128
+ "status": "completed",
129
+ "update_frequency": "None",
130
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
131
+ })
132
+
133
+ # Optimise chunk sizes for spatial analysis:
134
+ ds = ds.chunk({'time': 1, 'latitude': 721, 'longitude': 1440})
135
+
136
+ # Update variable encodings:
137
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
138
+ for var in list(ds.data_vars) + list(ds.coords):
139
+ ds[var].encoding['compressors'] = [blosccodec]
140
+
141
+ # Define prefix and commit message based on climatology period:
142
+ prefix = "era5_monthly_timeseries"
143
+ commit_message = "Add ERA5 Sea Surface Monthly Timeseries (2000-01-2026-05)."
144
+
145
+ # Dask LocalCluster configuration:
146
+ config_kwargs = {
147
+ "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/",
148
+ "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/"
149
+ }
150
+ cluster_kwargs = {
151
+ "n_workers" : 25,
152
+ "threads_per_worker" : 1,
153
+ "memory_limit":"4GB"
154
+ }
155
+
156
+ update_icechunk(
157
+ file=ds,
158
+ bucket=bucket,
159
+ object_prefix=prefix,
160
+ store_credentials_json=store_credentials_json,
161
+ append_dim='time',
162
+ branch=branch,
163
+ commit_message=commit_message,
164
+ dask_config_kwargs=config_kwargs,
165
+ dask_cluster_kwargs=cluster_kwargs,
166
+ )
167
+
168
+ if __name__ == "__main__":
169
+ main()
@@ -0,0 +1,43 @@
1
+ #!/bin/bash
2
+
3
+ # ----------------------------------------------------------------
4
+ # download_HadISST1_data.sh
5
+ #
6
+ # This script downloads the HadISST1 dataset from the Met Office
7
+ # Hadley Centre HadISST website. The files to be downloaded are
8
+ # HadISST1_sst.nc.gz & HadISST_ice.nc.gz.
9
+ #
10
+ # Files will be downloaded into the current directory.
11
+ #
12
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
13
+ # Created On: 2026-05-27
14
+ # ----------------------------------------------------------------
15
+ set -euo pipefail
16
+
17
+ # --- Inputs --- #
18
+ # Output directory for downloaded files:
19
+ output_dir="/dssgfs01/scratch/otooth/npd_data/observations/HadISST"
20
+
21
+ # Define base URL to HadISST1 dataset:
22
+ url=https://www.metoffice.gov.uk/hadobs/hadisst/data
23
+
24
+ # --- Main Script --- #
25
+ echo "==================================================="
26
+ echo " Downloading HadISST1 Dataset"
27
+ echo " v0.1.0"
28
+ echo " Oliver J. Tooth, NOC"
29
+ echo "==================================================="
30
+ echo "In Progress: Downloading HadISST1 dataset..."
31
+
32
+ # Download the HadISST1 dataset:
33
+ echo "-> Downloading HadISST1_sst.nc.gz & HadISST_ice.nc.gz..."
34
+ wget -P $output_dir $url/HadISST_sst.nc.gz
35
+ wget -P $output_dir $url/HadISST_ice.nc.gz
36
+
37
+ # Unzip the files:
38
+ echo "-> Unzipping HadISST1 dataset..."
39
+ gunzip $output_dir/HadISST_sst.nc.gz
40
+ gunzip $output_dir/HadISST_ice.nc.gz
41
+
42
+ # Update users via stdout:
43
+ echo "...Completed: HadISST1 dataset downloaded and unzipped."
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=hadisst1_monthly
3
+ #SBATCH --partition=test
4
+ #SBATCH --time=00:20:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_HadISST1_monthly_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the HadISST1 monthly
14
+ # time-series dataset to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-24
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send HadISST1 monthly time-series datasets to JASMIN OS -- #
28
+ echo "In Progress: Sending HadISST1 monthly time-series to Icechunk..."
29
+
30
+ python3 send_HadISST1_monthly_to_os.py
31
+
32
+ echo "Completed: Sent HadISST1 monthly time-series to Icechunk."
@@ -0,0 +1,133 @@
1
+ # =========================================================
2
+ # send_HadISST1_monthly_to_os.py
3
+ #
4
+ # Script to write HadISST1 monthly data to Icechunk
5
+ # repository in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import numpy as np
12
+ import xarray as xr
13
+ import zarr
14
+
15
+ from OceanDataStore.cli import initialise_logging, send_to_icechunk
16
+ from OceanDataStore.data.utils import (
17
+ compute_cell_area,
18
+ compute_dx,
19
+ compute_dy,
20
+ compute_land_sea_mask,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def main():
27
+ # ========== Initialise OceanDataStore Logging ========== #
28
+ initialise_logging()
29
+
30
+ # ========== Prepare Data ========== #
31
+ # Open HadISST1 monthly dataset:
32
+ filedir = "/dssgfs01/scratch/otooth/npd_data/observations/HadISST"
33
+ ds = xr.open_dataset(f"{filedir}/HadISST_sst.nc", engine="netcdf4")
34
+ ds_si = xr.open_dataset(f"{filedir}/HadISST_ice.nc", engine="netcdf4")
35
+
36
+ # Add sea ice concentration to single dataset:
37
+ ds['sic'] = ds_si['sic']
38
+
39
+ # Rename variables to standard names:
40
+ ds = ds.rename({"sst": "tos", "sic": "siconc"})
41
+ # Fill missing sea surface temperature values with NaNs:
42
+ ds['tos'] = xr.where(cond=ds['tos'] == -1000, x=np.nan, y=ds['tos'])
43
+
44
+ # Update variable attributes:
45
+ ds["tos"].attrs.update({
46
+ "long_name": "Sea Surface Temperature",
47
+ })
48
+ ds["siconc"].attrs.update({
49
+ "long_name": "Sea Ice Area Fraction",
50
+ })
51
+
52
+ # Update global attributes:
53
+ ds.attrs.clear()
54
+
55
+ ds = ds.assign_attrs({
56
+ "Conventions": "CF-1.0",
57
+ "title": "Hadley Centre Sea Ice and Sea Surface Temperature (HadISST) monthly timeseries.",
58
+ "description": "HadISST v1.1 monthly averages of sea surface temperature and sea ice concentration.",
59
+ "source": "Numerical models: Reduced Space Optimal Interpolation. In-situ observations: Met Office Marine Data Bank (MDB), Comprehensive Ocean-Atmosphere Data Set (COADS). Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).",
60
+ "dataset_type": "observation",
61
+ "product_type": "timeseries",
62
+ "product_version": "1.1",
63
+ "institution": "Met Office, UK",
64
+ "citation": "Rayner, N. A., Parker, D. E., Horton, E. B., Folland, C. K., Alexander, L. V., Rowell, D. P., Kent, E. C., Kaplan, A. Global analyses of sea surface temperature, sea ice, and night marine air temperature since the late nineteenth century J. Geophys. Res.Vol. 108, No. D14, 4407 10.1029/2002JD002670.",
65
+ "references": "Rayner, N. A., Parker, D. E., Horton, E. B., Folland, C. K., Alexander, L. V., Rowell, D. P., Kent, E. C., Kaplan, A. Global analyses of sea surface temperature, sea ice, and night marine air temperature since the late nineteenth century J. Geophys. Res.Vol. 108, No. D14, 4407 10.1029/2002JD002670.",
66
+ "acknowledgement": "None",
67
+ "license": "HadISST1.1 data were obtained from https://www.metoffice.gov.uk/hadobs/hadisst/ and are © Crown Copyright, Met Office, [2026], provided under a Non-Commercial Government Licence http://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/.",
68
+ "doi": "None",
69
+ "platform": "gr",
70
+ "horizontal_grid_type": "regular rectilinear",
71
+ "horizontal_grid_resolution": "1 degree",
72
+ "aggregation": "mean",
73
+ "aggregation_frequency": "monthly",
74
+ "status": "ongoing",
75
+ "update_frequency": "quarterly",
76
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
77
+ })
78
+
79
+ # Add ancillary variables:
80
+ ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0))
81
+ ds['dx'] = compute_dx(ds)
82
+ ds['dy'] = compute_dy(ds)
83
+ ds['cell_area'] = compute_cell_area(ds)
84
+
85
+ # Add Northern and Southern Hemisphere sea ice area timeseries:
86
+ ds['siarea_NH'] = (ds['siconc'].where(ds['latitude'] > 0) * ds['cell_area']).sum(dim=['latitude', 'longitude'])
87
+ ds['siarea_NH'].attrs = {'long_name': 'Total Northern Hemisphere Sea Ice Area', 'standard_name': 'sea_ice_area', 'units': 'm2'}
88
+
89
+ ds['siarea_SH'] = (ds['siconc'].where(ds['latitude'] < 0) * ds['cell_area']).sum(dim=['latitude', 'longitude'])
90
+ ds['siarea_SH'].attrs = {'long_name': 'Total Southern Hemisphere Sea Ice Area', 'standard_name': 'sea_ice_area', 'units': 'm2'}
91
+
92
+ # ========== Send to Icechunk Repository ========== #
93
+ bucket = "hadisst"
94
+ prefix = "hadisst_v1.1_monthly"
95
+ exists = False
96
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
97
+ branch = "main"
98
+ commit_message = "Added HadISST1 sea surface temperature and sea ice concentration monthly (1870-01-2026-04)."
99
+ variable_commits = True
100
+ config_kwargs = {
101
+ "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/HadISST/",
102
+ "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/HadISST/"
103
+ }
104
+ cluster_kwargs = {
105
+ "n_workers" : 15,
106
+ "threads_per_worker" : 1,
107
+ "memory_limit":"3GB"
108
+ }
109
+
110
+ # Optimise chunk sizes for spatial analysis:
111
+ ds = ds.chunk({'time': 30, 'latitude': 180, 'longitude': 360})
112
+
113
+ # Update variable encodings:
114
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
115
+ for var in list(ds.data_vars) + list(ds.coords):
116
+ ds[var].encoding['compressors'] = [blosccodec]
117
+
118
+ send_to_icechunk(
119
+ file=ds,
120
+ bucket=bucket,
121
+ object_prefix=prefix,
122
+ store_credentials_json=store_credentials_json,
123
+ exists=exists,
124
+ append_dim='time',
125
+ branch=branch,
126
+ commit_message=commit_message,
127
+ variable_commits=variable_commits,
128
+ dask_config_kwargs=config_kwargs,
129
+ dask_cluster_kwargs=cluster_kwargs,
130
+ )
131
+
132
+ if __name__ == "__main__":
133
+ main()
@@ -0,0 +1,54 @@
1
+ #!/bin/bash
2
+
3
+ # ----------------------------------------------------------------
4
+ # download_NSIDC_monthly_1979_2025_data.sh
5
+ #
6
+ # Description: Download the National Snow & Ice Data Centre (NSIDC)
7
+ # Sea Ice Index version 4 sea ice extent & concentration GeoTiff
8
+ # files from 1979 to 2025.
9
+ #
10
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
11
+ # Created On: 2026-05-27
12
+ # ----------------------------------------------------------------
13
+ set -euo pipefail
14
+
15
+ # --- Inputs --- #
16
+ # Define hemisphere to download data for (options: "north" or "south"):
17
+ hemisphere="north"
18
+
19
+ # Define output directory for downloaded files:
20
+ output_dir="/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/"$hemisphere"/"
21
+
22
+ # Single year download:
23
+ single_year=True
24
+ # Define year to download if single_year is True:
25
+ year=2025
26
+
27
+ # -- Defaults -- #
28
+ # Default URL prefix:
29
+ url_prefix="https://noaadata.apps.nsidc.org/NOAA/G02135/"$hemisphere"/monthly/geotiff"
30
+
31
+ # -- Main Script -- #
32
+ echo "==================================================="
33
+ echo " Downloading NSIDC Sea Ice Index"
34
+ echo " v4.0"
35
+ echo " Oliver J. Tooth, NOC"
36
+ echo "==================================================="
37
+ echo "In Progress: Downloading NSIDC Sea Ice Index dataset..."
38
+
39
+ mkdir -p $output_dir
40
+ cd $output_dir
41
+
42
+ # Download monthly sea ice extent & concentration files from 1979 to 2025:
43
+ for month in 01_Jan 02_Feb 03_Mar 04_Apr 05_May 06_Jun 07_Jul 08_Aug 09_Sep 10_Oct 11_Nov 12_Dec
44
+ do
45
+ if [ "$single_year" = True ]; then
46
+ echo "Downloading NSIDC $year Sea Ice Conc. GeoTiffs for: $month"
47
+ wget -r -nd --no-check-certificate --reject "index.html*" -np -e robots=off $url_prefix/$month/ -A "*_${year}*_v4.0.tif"
48
+ else
49
+ echo "Downloading NSIDC 1979-2025 Sea Ice Conc. GeoTiffs for: $month"
50
+ wget -r -nd --no-check-certificate --reject "index.html*" -np -e robots=off $url_prefix/$month/
51
+ fi
52
+ done
53
+
54
+ echo "-> Completed: Downloaded NSIDC" $hemisphere "Sea Ice Extent & Concentration GeoTiffs"