OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,130 @@
1
+ """
2
+ process_NSIDC_SSI_Antarctic_data.py
3
+
4
+ Description: Python script to post-process NSIDC Sea Ice Index
5
+ data for the Antarctic 1978-2025, including sea ice concentration,
6
+ extent and total area.
7
+
8
+ Created by: Ollie Tooth (oliver.tooth@noc.ac.uk)
9
+ Created on: 2025-02-21
10
+ """
11
+ # -- Import Python packages -- #
12
+ import logging
13
+ from datetime import datetime
14
+ from glob import glob
15
+
16
+ import numpy as np
17
+ import xarray as xr
18
+
19
+ from OceanDataStore.cli import initialise_logging
20
+
21
+
22
+ # -- Define Utility Functions -- #
23
+ def get_datetimes_from_filenames(file_list):
24
+ # Extract filenames from paths:
25
+ filenames = [file.split('/')[-1] for file in file_list]
26
+
27
+ # Convert filenames date str to datetime:
28
+ datetimes = np.array([datetime(year=int(file[2:6]), month=int(file[6:8]), day=15) for file in filenames])
29
+
30
+ return datetimes
31
+
32
+
33
+ def main():
34
+ # ========== Initialize Logging and Print Banner ========== #
35
+ initialise_logging()
36
+
37
+ logging.info('In Progress: Post-Processing NSIDC Sea Ice Index Antarctic Observations...')
38
+
39
+ # ========== Load NSIDC Ancillary Data ========== #
40
+ # Define filepath to ancillary data:
41
+ anc_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/ancillary/NSIDC0771_LatLon_PS_S25km_v1.0.nc"
42
+ # Open NSIDC ancillary data as dataset:
43
+ ds_si = xr.open_dataset(anc_fpath)
44
+
45
+ # Define filepath to NSIDC ancillary file - grid cell area:
46
+ area_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/ancillary/NSIDC0771_CellArea_PS_S25km_v1.0.nc"
47
+ # Open NSIDC grid cell area:
48
+ ds_area = xr.open_dataset(area_fpath)
49
+ logging.info("-> Completed: Loaded NSIDC ancillary data and grid cell area.")
50
+
51
+ # ========== Load NSIDC Monthly Data ========== #
52
+ # Define directory path:
53
+ dir_path = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/antarctic/"
54
+
55
+ # Get the list of files in the directory:
56
+ file_paths = glob(f"{dir_path}*.tif")
57
+ file_paths.sort()
58
+
59
+ # Retrieve sea ice mask & concentration files:
60
+ version_str = "v4.0" # Options: "v3.0" or "v4.0"
61
+ mask_files = [f for f in file_paths if f"extent_{version_str}.tif" in f]
62
+ conc_files = [f for f in file_paths if f"concentration_{version_str}.tif" in f]
63
+
64
+ # ========== Post-Process Sea Ice Mask Data ========== #
65
+ # Define the time dimension:
66
+ time_simask = xr.DataArray(data=get_datetimes_from_filenames(file_list=mask_files), dims='time', name='time')
67
+ # Load and concatenate all sea ice mask GeoTIFFs:
68
+ simask = xr.concat([xr.open_dataset(i) for i in mask_files], dim=time_simask)
69
+ logging.info("-> Completed: Loaded NSIDC sea ice mask and concentration GeoTIFF files.")
70
+
71
+ # Sea Ice Mask is defined by [1: sea ice, 0: ocean]:
72
+ # Values greater than 1 (missing or land) are set to NaN:
73
+ ds_si['simask'] = xr.where(simask['band_data'] > 1, np.nan, simask['band_data']).squeeze(drop=True)
74
+ ds_si["simask"].attrs = {'units': '1', "long_name": "Sea Ice Mask", "standard_name": "sea_ice_mask", "comment": "1 = sea ice, 0 = ocean"}
75
+
76
+ # ========== Post-Process Sea Ice Concentration Data ========== #
77
+ # Define the time dimension:
78
+ time_siconc = xr.DataArray(data=get_datetimes_from_filenames(file_list=conc_files), dims='time', name='time')
79
+ # Load and concatenate all sea ice extent GeoTIFFs:
80
+ siconc = xr.concat([xr.open_dataset(i) for i in conc_files], dim=time_siconc)
81
+ logging.info("-> Completed: Loaded NSIDC sea ice concentration GeoTIFF files.")
82
+
83
+ # Sea Ice Area Fraction:
84
+ # Note concentration percentage is scaled by 10 -> requires division by 1000.
85
+ # Values greater than 1 (missing or land) are set to NaN:
86
+ ds_si['siconc'] = xr.where(siconc['band_data'] > 1000, np.nan, siconc['band_data']).squeeze(drop=True) / 1000
87
+ ds_si['siconc'].attrs = {'units': '1', 'long_name': 'Sea Ice Area Fraction', 'standard_name': 'sea_ice_area_fraction', "comment": "0 = ocean, 0.01-0.15 = statistically insignificant, > 0.15 = sea ice"}
88
+
89
+ # ========== Calculate sea ice area (m2) ========== #
90
+ ds_si['cell_area'] = ds_area['cell_area']
91
+ ds_si['cell_area'].attrs = {'units': 'm2', 'long_name': 'Grid-Cell Area for Sea Ice Variables', "standard_name": "cell_area"}
92
+
93
+ ds_si['siextent'] = (ds_si['cell_area']*ds_si['simask']).sum(dim=['x', 'y'])
94
+ ds_si['siextent'].attrs = {'units': 'm2', 'long_name': 'Total Area Where Sea Ice Area Fraction Exceeds 15%', 'standard_name': 'sea_ice_extent'}
95
+
96
+ # ========== Update Coordinates ========== #
97
+ ds_si.coords['lon'] = ds_si['longitude']
98
+ ds_si.coords['lat'] = ds_si['latitude']
99
+ # Drop auxiliary variables:
100
+ ds_si = ds_si.drop_vars(["spatial_ref", "crs", "longitude", "latitude"])
101
+ # Rename coordinates:
102
+ ds_si = ds_si.rename({'lon': 'longitude', 'lat': 'latitude'})
103
+
104
+ # ========== Update attributes to ensure CF-compliance: ========== #
105
+ ds_si['x'].attrs = {'standard_name': 'projection_x_coordinate', 'long_name': 'x coordinate of projection', 'units': 'meters'}
106
+ ds_si['y'].attrs = {'standard_name': 'projection_y_coordinate', 'long_name': 'y coordinate of projection', 'units': 'meters'}
107
+ ds_si['longitude'].attrs = {'standard_name': 'longitude', 'long_name': 'Longitude', 'units': 'degrees_east'}
108
+ ds_si['latitude'].attrs = {'standard_name': 'latitude', 'long_name': 'Latitude', 'units': 'degrees_north'}
109
+ ds_si['simask'].attrs.pop("valid_range", None)
110
+
111
+ # ========== Save NSIDC Sea Ice Index Dataset ========== #
112
+ # Update variable encodings:
113
+ for var in ds_si.variables:
114
+ if ds_si[var].dtype == 'float64':
115
+ ds_si[var].encoding['missing_value'] = None
116
+ ds_si[var].encoding['_FillValue'] = None
117
+
118
+ # Define output filepath:
119
+ out_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/NSIDC_Sea_Ice_Index_v4_Antarctic_combined_1978_2025.nc"
120
+ # Save dataset to netCDF file:
121
+ logging.info(f'In Progress: Saving NSIDC Sea Ice Index Antarctic Observations to netCDF file: {out_fpath}...')
122
+ ds_si.to_netcdf(out_fpath, unlimited_dims='time')
123
+ # Close files associated with datasets:
124
+ ds_si.close()
125
+ ds_area.close()
126
+
127
+ logging.info(f'Completed: Saved NSIDC Sea Ice Index Antarctic Observations to netCDF file: {out_fpath}.')
128
+
129
+ if __name__ == "__main__":
130
+ main()
@@ -0,0 +1,129 @@
1
+ """
2
+ process_NSIDC_SSI_Arctic_data.py
3
+
4
+ Description: Python script to post-process NSIDC Sea Ice Index
5
+ data for the Arctic 1978-2025, including sea ice concentration,
6
+ extent and total area.
7
+
8
+ Created by: Ollie Tooth (oliver.tooth@noc.ac.uk)
9
+ Created on: 2025-02-21
10
+ """
11
+ # -- Import Python packages -- #
12
+ import logging
13
+ from datetime import datetime
14
+ from glob import glob
15
+
16
+ import numpy as np
17
+ import xarray as xr
18
+
19
+ from OceanDataStore.cli import initialise_logging
20
+
21
+
22
+ # -- Define Utility Functions -- #
23
+ def get_datetimes_from_filenames(file_list):
24
+ # Extract filenames from paths:
25
+ filenames = [file.split('/')[-1] for file in file_list]
26
+
27
+ # Convert filenames date str to datetime:
28
+ datetimes = np.array([datetime(year=int(file[2:6]), month=int(file[6:8]), day=15) for file in filenames])
29
+
30
+ return datetimes
31
+
32
+ def main():
33
+ # ========== Initialize Logging and Print Banner ========== #
34
+ initialise_logging()
35
+
36
+ logging.info('In Progress: Post-Processing NSIDC Sea Ice Index Arctic Observations...')
37
+
38
+ # ========== Load NSIDC Ancillary Data ========== #
39
+ # Define filepath to ancillary data:
40
+ anc_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/ancillary/NSIDC0771_LatLon_PS_N25km_v1.0.nc"
41
+ # Open NSIDC ancillary data as dataset:
42
+ ds_si = xr.open_dataset(anc_fpath)
43
+
44
+ # Define filepath to NSIDC ancillary file - grid cell area:
45
+ area_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/ancillary/NSIDC0771_CellArea_PS_N25km_v1.0.nc"
46
+ # Open NSIDC grid cell area:
47
+ ds_area = xr.open_dataset(area_fpath)
48
+ logging.info("-> Completed: Loaded NSIDC ancillary data and grid cell area.")
49
+
50
+ # ========== Load NSIDC Monthly Data ========== #
51
+ # Define directory path:
52
+ dir_path = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/arctic/"
53
+
54
+ # Get the list of files in the directory:
55
+ file_paths = glob(f"{dir_path}*.tif")
56
+ file_paths.sort()
57
+
58
+ # Retrieve sea ice mask & concentration files:
59
+ version_str = "v4.0" # Options: "v3.0" or "v4.0"
60
+ mask_files = [f for f in file_paths if f"extent_{version_str}.tif" in f]
61
+ conc_files = [f for f in file_paths if f"concentration_{version_str}.tif" in f]
62
+
63
+ # ========== Post-Process Sea Ice Mask Data ========== #
64
+ # Define the time dimension:
65
+ time_simask = xr.DataArray(data=get_datetimes_from_filenames(file_list=mask_files), dims='time', name='time')
66
+ # Load and concatenate all sea ice mask GeoTIFFs:
67
+ simask = xr.concat([xr.open_dataset(i) for i in mask_files], dim=time_simask)
68
+ logging.info("-> Completed: Loaded NSIDC sea ice mask and concentration GeoTIFF files.")
69
+
70
+ # Sea Ice Mask is defined by [1: sea ice, 0: ocean]:
71
+ # Values greater than 1 (missing or land) are set to NaN:
72
+ ds_si['simask'] = xr.where(simask['band_data'] > 1, np.nan, simask['band_data']).squeeze(drop=True)
73
+ ds_si["simask"].attrs = {'units': '1', "long_name": "Sea Ice Mask", "standard_name": "sea_ice_mask", "comment": "1 = sea ice, 0 = ocean"}
74
+
75
+ # ========== Post-Process Sea Ice Concentration Data ========== #
76
+ # Define the time dimension:
77
+ time_siconc = xr.DataArray(data=get_datetimes_from_filenames(file_list=conc_files), dims='time', name='time')
78
+ # Load and concatenate all sea ice extent GeoTIFFs:
79
+ siconc = xr.concat([xr.open_dataset(i) for i in conc_files], dim=time_siconc)
80
+ logging.info("-> Completed: Loaded NSIDC sea ice concentration GeoTIFF files.")
81
+
82
+ # Sea Ice Area Fraction:
83
+ # Note concentration percentage is scaled by 10 -> requires division by 1000.
84
+ # Values greater than 1 (missing or land) are set to NaN:
85
+ ds_si['siconc'] = xr.where(siconc['band_data'] > 1000, np.nan, siconc['band_data']).squeeze(drop=True) / 1000
86
+ ds_si['siconc'].attrs = {'units': '1', 'long_name': 'Sea Ice Area Fraction', 'standard_name': 'sea_ice_area_fraction', "comment": "0 = ocean, 0.01-0.15 = statistically insignificant, > 0.15 = sea ice"}
87
+
88
+ # ========== Calculate sea ice area (m2) ========== #
89
+ ds_si['cell_area'] = ds_area['cell_area']
90
+ ds_si['cell_area'].attrs = {'units': 'm2', 'long_name': 'Grid-Cell Area for Sea Ice Variables', "standard_name": "cell_area"}
91
+
92
+ ds_si['siextent'] = (ds_si['cell_area']*ds_si['simask']).sum(dim=['x', 'y'])
93
+ ds_si['siextent'].attrs = {'units': 'm2', 'long_name': 'Total Area Where Sea Ice Area Fraction Exceeds 15%', 'standard_name': 'sea_ice_extent'}
94
+
95
+ # ========== Update Coordinates ========== #
96
+ ds_si.coords['lon'] = ds_si['longitude']
97
+ ds_si.coords['lat'] = ds_si['latitude']
98
+ # Drop auxiliary variables:
99
+ ds_si = ds_si.drop_vars(["spatial_ref", "crs", "longitude", "latitude"])
100
+ # Rename coordinates:
101
+ ds_si = ds_si.rename({'lon': 'longitude', 'lat': 'latitude'})
102
+
103
+ # ========== Update attributes to ensure CF-compliance: ========== #
104
+ ds_si['x'].attrs = {'standard_name': 'projection_x_coordinate', 'long_name': 'x coordinate of projection', 'units': 'meters'}
105
+ ds_si['y'].attrs = {'standard_name': 'projection_y_coordinate', 'long_name': 'y coordinate of projection', 'units': 'meters'}
106
+ ds_si['longitude'].attrs = {'standard_name': 'longitude', 'long_name': 'Longitude', 'units': 'degrees_east'}
107
+ ds_si['latitude'].attrs = {'standard_name': 'latitude', 'long_name': 'Latitude', 'units': 'degrees_north'}
108
+ ds_si['simask'].attrs.pop("valid_range", None)
109
+
110
+ # ========== Save NSIDC Sea Ice Index Dataset ========== #
111
+ # Update variable encodings:
112
+ for var in ds_si.variables:
113
+ if ds_si[var].dtype == 'float64':
114
+ ds_si[var].encoding['missing_value'] = None
115
+ ds_si[var].encoding['_FillValue'] = None
116
+
117
+ # Define output filepath:
118
+ out_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/NSIDC_Sea_Ice_Index_v4_Arctic_combined_1978_2025.nc"
119
+ # Save dataset to netCDF file:
120
+ logging.info(f"-> Saving NSIDC Sea Ice Index Arctic Observations to netCDF file: {out_fpath}...")
121
+ ds_si.to_netcdf(out_fpath, unlimited_dims='time')
122
+ # Close files associated with datasets:
123
+ ds_si.close()
124
+ ds_area.close()
125
+
126
+ logging.info(f'Completed: Saved NSIDC Sea Ice Index Arctic Observations to netCDF file: {out_fpath}.')
127
+
128
+ if __name__ == "__main__":
129
+ main()
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=xfer_NSIDC_sea_ice_index_v4.0
3
+ #SBATCH --partition=test
4
+ #SBATCH --time=00:20:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ================================================================
11
+ # run_send_NSIDC_v4.0_to_OS.slurm
12
+ #
13
+ # Description: SLURM script to send the NSIDC sea ice observations
14
+ # data to the JASMIN Object Store.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-05
18
+ #
19
+ # ================================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source .../miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send NSIDC Sea Ice Index v4.0 to JASMIN OS -- #
28
+ echo "In Progress: Sending NSIDC Sea Ice Index v4.0 to Icechunk..."
29
+
30
+ python3 send_NSIDC_SII_v4.0_to_os.py
31
+
32
+ echo "Completed: Sent NSIDC Sea Ice Index v4.0 to Icechunk."
@@ -0,0 +1,140 @@
1
+ # =========================================================
2
+ # send_NSIDC_SII_v4.0_to_os.py
3
+ #
4
+ # Script to write NSIDC Sea Ice Index version 4.0 to
5
+ # Icechunk repositories in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import xarray as xr
12
+ import zarr
13
+
14
+ from OceanDataStore.cli import send_to_icechunk, initialise_logging
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def main():
20
+ # ========== Initialise OceanDataStore Logging ========== #
21
+ initialise_logging()
22
+
23
+ # ========== Prepare Data ========== #
24
+ # Open NSIDC Sea Ice Index v4.0 datasets:
25
+ ds_si_arctic = xr.open_dataset("/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/NSIDC_Sea_Ice_Index_v4_Arctic_combined_1978_2025.nc")
26
+
27
+ ds_si_antarctic = xr.open_dataset("/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/NSIDC_Sea_Ice_Index_v4_Antarctic_combined_1978_2025.nc")
28
+
29
+ # Optimise chunk sizes for spatial analysis:
30
+ ds_si_arctic = ds_si_arctic.chunk({'time': 12, 'y': 448, 'x': 304})
31
+ ds_si_antarctic = ds_si_antarctic.chunk({'time': 12, 'y': 332, 'x': 316})
32
+
33
+ # Update variable encodings:
34
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
35
+ for var in list(ds_si_arctic.data_vars) + list(ds_si_arctic.coords):
36
+ ds_si_arctic[var].encoding['compressors'] = [blosccodec]
37
+ for var in list(ds_si_antarctic.data_vars) + list(ds_si_antarctic.coords):
38
+ ds_si_antarctic[var].encoding['compressors'] = [blosccodec]
39
+
40
+ # Update global CF-metadata attributes:
41
+ ds_si_arctic.attrs.clear()
42
+ ds_si_arctic = ds_si_arctic.assign_attrs({
43
+ "Conventions": "CF-1.6",
44
+ "title": "NSIDC Sea Ice Index, Version 4 - Arctic",
45
+ "description": "NSIDC Sea Ice Index version 4.0 - Arctic sea ice area fraction, sea ice extent and total sea ice area timeseries.",
46
+ "source": "Satellite observations: Sea Ice Concentrations from Nimbus-7 SMMR and DMSP SSM/I-SSMIS Passive Microwave Data (GSFC). AMSR2 Daily Polar Gridded Sea Ice Concentrations (AMSR2).",
47
+ "dataset_type": "observation",
48
+ "product_type": "timeseries",
49
+ "product_version" : "1.0",
50
+ "institution": "National Snow and Ice Data Center; Cooperative Institute for Research in Environmental Sciences; University of Colorado at Boulder; Boulder, CO",
51
+ "citation": "Fetterer, F., Knowles, K., Meier, W. N., Savoie, M., Windnagel, A. K. & Stafford, T. (2025). Sea Ice Index. (G02135, Version 4). [Data Set]. Boulder, Colorado USA. National Snow and Ice Data Center. https://doi.org/10.7265/a98x-0f50. Date Accessed 05-29-2026.",
52
+ "references": "Windnagel, A., Stafford, T., Fetterer, F., Meier, W. (2025). Sea Ice Index Version 4 Analysis. NSIDC Special Report 28. Boulder CO, USA: National Snow and Ice Data Center.",
53
+ "acknowledgement": "These data are produced and supported by the NASA National Snow and Ice Data Center Distributed Active Archive Center.",
54
+ "license": "NSIDC Sea Ice Index, Version 4 data were obtained from https://nsidc.org/data/g02135/versions/4 and are provided under a U.S. Government Works License https://www.usa.gov/government-works",
55
+ "doi": "10.7265/a98x-0f50",
56
+ "platform": "gn",
57
+ "horizontal_grid_type": "curvilinear",
58
+ "horizontal_grid_resolution": "25 km",
59
+ "aggregation": "mean",
60
+ "aggregation_frequency": "monthly",
61
+ "status": "ongoing",
62
+ "update_frequency": "quarterly",
63
+ "bbox": "[-180.0, 180.0, 30.98, 90.0]",
64
+ })
65
+
66
+ ds_si_antarctic.attrs.clear()
67
+ ds_si_antarctic = ds_si_antarctic.assign_attrs({
68
+ "Conventions": "CF-1.6",
69
+ "title": "NSIDC Sea Ice Index, Version 4 - Antarctic",
70
+ "description": "NSIDC Sea Ice Index, Version 4 - Antarctic sea ice area fraction, sea ice extent and total sea ice area timeseries.",
71
+ "source": "Satellite observations: Sea Ice Concentrations from Nimbus-7 SMMR and DMSP SSM/I-SSMIS Passive Microwave Data (GSFC). AMSR2 Daily Polar Gridded Sea Ice Concentrations (AMSR2).",
72
+ "dataset_type": "observation",
73
+ "product_type": "timeseries",
74
+ "product_version" : "1.0",
75
+ "institution": "National Snow and Ice Data Center; Cooperative Institute for Research in Environmental Sciences; University of Colorado at Boulder; Boulder, CO",
76
+ "citation": "Fetterer, F., Knowles, K., Meier, W. N., Savoie, M., Windnagel, A. K. & Stafford, T. (2025). Sea Ice Index. (G02135, Version 4). [Data Set]. Boulder, Colorado USA. National Snow and Ice Data Center. https://doi.org/10.7265/a98x-0f50. Date Accessed 05-29-2026.",
77
+ "references": "Windnagel, A., Stafford, T., Fetterer, F., Meier, W. (2025). Sea Ice Index Version 4 Analysis. NSIDC Special Report 28. Boulder CO, USA: National Snow and Ice Data Center.",
78
+ "acknowledgement": "These data are produced and supported by the NASA National Snow and Ice Data Center Distributed Active Archive Center.",
79
+ "license": "NSIDC Sea Ice Index, Version 4 data were obtained from https://nsidc.org/data/g02135/versions/4 and are provided under a U.S. Government Works License https://www.usa.gov/government-works",
80
+ "doi": "10.7265/a98x-0f50",
81
+ "platform": "gn",
82
+ "horizontal_grid_type": "curvilinear",
83
+ "horizontal_grid_resolution": "25 km",
84
+ "aggregation": "mean",
85
+ "aggregation_frequency": "monthly",
86
+ "status": "ongoing",
87
+ "update_frequency": "quarterly",
88
+ "bbox": "[-180.0, 180.0, -90.0, -39.23089]",
89
+ })
90
+
91
+ # ========== Send to Icechunk Repository ========== #
92
+ bucket = "nsidc"
93
+ exists = False
94
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
95
+ branch = "main"
96
+ variable_commits = True
97
+ config_kwargs = {
98
+ "temporary_directory":".../OceanDataStore/OceanDataStore/data/NSIDC/",
99
+ "local_directory":".../OceanDataStore/OceanDataStore/data/NSIDC/"
100
+ }
101
+ cluster_kwargs = {
102
+ "n_workers" : 10,
103
+ "threads_per_worker" : 1,
104
+ "memory_limit":"3GB"
105
+ }
106
+
107
+ # -- Sea Ice Index v4.0 - Arctic -- #
108
+ send_to_icechunk(
109
+ file=ds_si_arctic,
110
+ bucket=bucket,
111
+ object_prefix="nsidc_sea_ice_index_v4_arctic_monthly",
112
+ store_credentials_json=store_credentials_json,
113
+ exists=exists,
114
+ append_dim='time',
115
+ attrs=ds_si_arctic.attrs,
116
+ branch=branch,
117
+ commit_message="Added NSIDC Sea Ice Index version 4 - Arctic (1978-01-2025-12).",
118
+ variable_commits=variable_commits,
119
+ dask_config_kwargs=config_kwargs,
120
+ dask_cluster_kwargs=cluster_kwargs,
121
+ )
122
+
123
+ # -- Sea Ice Index v4.0 - Antarctic -- #
124
+ send_to_icechunk(
125
+ file=ds_si_antarctic,
126
+ bucket=bucket,
127
+ object_prefix="nsidc_sea_ice_index_v4_antarctic_monthly",
128
+ store_credentials_json=store_credentials_json,
129
+ exists=exists,
130
+ append_dim='time',
131
+ attrs=ds_si_antarctic.attrs,
132
+ branch=branch,
133
+ commit_message="Added NSIDC Sea Ice Index version 4 - Antarctic (1978-01-2025-12).",
134
+ variable_commits=variable_commits,
135
+ dask_config_kwargs=config_kwargs,
136
+ dask_cluster_kwargs=cluster_kwargs,
137
+ )
138
+
139
+ if __name__ == "__main__":
140
+ main()
@@ -0,0 +1,83 @@
1
+ import argparse
2
+ import glob
3
+
4
+ import dask
5
+ import numpy as np
6
+ import xarray as xr
7
+ from dask.distributed import Client, LocalCluster
8
+
9
+
10
+ def main(
11
+ year_start=1991,
12
+ year_end=2020,
13
+ data_path="/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily/",
14
+ output="./sst.daily.climatology.nc",
15
+ dask_cluster_kwargs={
16
+ "n_workers" : 8,
17
+ "threads_per_worker" : 1,
18
+ "memory_limit":"10GB"
19
+ },
20
+ dask_config_kwargs={
21
+ "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/",
22
+ "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/"
23
+ }
24
+ ):
25
+ xr.set_options(use_flox=True)
26
+
27
+ if dask_config_kwargs is not None:
28
+ dask.config.set(dask_config_kwargs)
29
+
30
+ with LocalCluster(**dask_cluster_kwargs) as cluster, Client(cluster) as client:
31
+ print(f"Created LocalCluster with {dask_cluster_kwargs['n_workers']} workers @ Client: {client.dashboard_link}")
32
+
33
+ # Find all SST files
34
+ files = sorted(glob.glob(f"{data_path}/sst.day.mean.????.nc"))
35
+ selected_files = [file for file in files if int(file[-7:-3]) >= year_start and int(file[-7:-3]) <= year_end]
36
+ print(f"Selected files for climatology computation: {selected_files}", flush=True)
37
+
38
+ # Open multiple files
39
+ ds = xr.open_mfdataset(selected_files, combine="by_coords",
40
+ parallel=True, engine='h5netcdf',
41
+ chunks={"time": 31, "latitude": 720, "longitude": 360},
42
+ preprocess=lambda ds: ds['sst']
43
+ )
44
+
45
+ # Compute daily climatology (day of year)
46
+ ds = ds.chunk({
47
+ "time": -1,
48
+ "lat": 100,
49
+ "lon": 100,
50
+ })
51
+ g_sst = ds["sst"].groupby("time.dayofyear") # Group once for readability
52
+
53
+ mean = g_sst.mean("time")
54
+ mean = mean.persist()
55
+
56
+ p10 = g_sst.quantile(0.10, dim="time")
57
+ p10 = p10.persist()
58
+
59
+ p90 = g_sst.quantile(0.90, dim="time")
60
+ p90 = p90.persist()
61
+
62
+ # Build output dataset
63
+ clim = xr.Dataset()
64
+ clim["sst_mean"] = mean
65
+ clim["sst_p10"] = p10.astype(np.float32)
66
+ clim["sst_p90"] = p90.astype(np.float32)
67
+
68
+ # Save output
69
+ print(f"In Progress: Saving Climatology to {output}")
70
+ clim.to_netcdf(output, engine='h5netcdf', mode='w')
71
+ print(f"Completed: Climatology saved to {output}", flush=True)
72
+
73
+
74
+ if __name__ == "__main__":
75
+ parser = argparse.ArgumentParser(description="Compute OISST v2.1 SST Daily Climatology")
76
+ parser.add_argument("--year_start", type=int, default=1996, help="Start year for climatology")
77
+ parser.add_argument("--year_end", type=int, default=2025, help="End year for climatology")
78
+ parser.add_argument("--data_path", default="/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily", help="Directory containing SST files")
79
+ parser.add_argument("--output", default="./sst.daily.climatology.nc", help="Output file")
80
+
81
+ args = parser.parse_args()
82
+
83
+ main(args.year_start, args.year_end, args.data_path, args.output)
@@ -0,0 +1,43 @@
1
+ #!/bin/bash
2
+
3
+ # ----------------------------------------------------------------
4
+ # download_OISSTv2_data.sh
5
+ #
6
+ # Description: Download the OISSTv2 dataset from the
7
+ # NOAA website:
8
+ # https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html
9
+ #
10
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
11
+ # Created On: 2026-06-24
12
+ # ----------------------------------------------------------------
13
+ set -euo pipefail
14
+
15
+ # --- Inputs --- #
16
+ output_dir="/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily/"
17
+
18
+ # -- Defaults -- #
19
+ base_url="https://downloads.psl.noaa.gov//Datasets/noaa.oisst.v2.highres"
20
+
21
+ # --- Main Script --- #
22
+ echo "==================================================="
23
+ echo " Downloading OISSTv2 Data"
24
+ echo " v0.1.0"
25
+ echo " Oliver J. Tooth, NOC"
26
+ echo "==================================================="
27
+ echo "In Progress: Downloading OISSTv2 dataset..."
28
+ # Iterate over years:
29
+ for yr in {2012..2026}; do
30
+ # Construct URL for current year:
31
+ url="$base_url/sst.day.mean.${yr}.nc"
32
+
33
+ # Download file if not in output directory:
34
+ filepath="$output_dir/$(basename $url)"
35
+ if [ ! -f "$filepath" ]; then
36
+ wget -P $output_dir $url
37
+ echo "-> Completed: Downloaded $filepath."
38
+ else
39
+ echo "-> Skipping Download: NetCDF file for ${yr} already exists in $output_dir."
40
+ fi
41
+ done
42
+
43
+ echo "==================================================="
@@ -0,0 +1,44 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=oisstv2_daily_climatology
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=05:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_create_OISSTv2_daily_climatology.slurm
12
+ #
13
+ # Description: SLURM script to create the OISSTv2.1 daily
14
+ # climatology datasets.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-25
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+ TIME1=`date +%s`
22
+
23
+ # -- Python Environment -- #
24
+ # Activate miniconda environment:
25
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
26
+ conda activate env_ods
27
+
28
+ # -- Create OISSTv2.1 daily climatology datasets -- #
29
+ dpath="/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily/"
30
+ opath="/dssgfs01/scratch/otooth/npd_data/observations/OISST/climatology"
31
+ year_start=1996
32
+ year_end=2025
33
+
34
+ echo "Start year : " $year_start
35
+ echo "End year : " $year_end
36
+ echo "Output file : " $opath/OISST_v2.1_sst_climatology_${year_start}-${year_end}.nc
37
+
38
+ python3 create_OISSTv2_daily_climatology.py --year_start $year_start --year_end $year_end --data_path $dpath --output $opath/oisst_climatology_${year_start}-${year_end}.nc
39
+
40
+ # -- Report Job Timing -- #
41
+ TIME2=`date +%s`
42
+ DIFFSEC=`expr ${TIME2} - ${TIME1}`
43
+ echo Job Completed in: ${DIFFSEC} seconds.
44
+ echo Job Took: `date +%H:%M:%S -ud @${DIFFSEC}`
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=oisstv2_daily_climatology
3
+ #SBATCH --partition=test
4
+ #SBATCH --time=00:20:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_OISSTv2_daily_climatology_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the OISSTv2.1 daily
14
+ # climatology datasets to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-09
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send OISSTv2.1 daily climatology datasets to JASMIN OS -- #
28
+ echo "In Progress: Sending OISSTv2.1 daily climatology to Icechunk..."
29
+
30
+ python3 send_OISSTv2_daily_climatology_to_os.py
31
+
32
+ echo "Completed: Sent OISSTv2.1 daily climatology to Icechunk."
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=send_oisstv2_daily
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=03:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_OISSTv2_daily_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the OISSTv2.1 daily
14
+ # time-series dataset to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-09
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send OISSTv2.1 daily time-series datasets to JASMIN OS -- #
28
+ echo "In Progress: Sending OISSTv2.1 Daily time-series to Icechunk..."
29
+
30
+ python3 send_OISSTv2_daily_to_os.py
31
+
32
+ echo "Completed: Sent OISSTv2.1 Daily time-series to Icechunk."