OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=oisstv2_monthly_climatology
3
+ #SBATCH --partition=test
4
+ #SBATCH --time=00:20:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_OISSTv2_monthly_climatology_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the OISSTv2.1 monthly
14
+ # climatology datasets to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-09
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source .../miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send OISSTv2.1 monthly climatology datasets to JASMIN OS -- #
28
+ echo "In Progress: Sending OISSTv2.1 monthly climatology to Icechunk..."
29
+
30
+ python3 send_OISSTv2_monthly_climatology_to_os.py
31
+
32
+ echo "Completed: Sent OISSTv2.1 monthly climatology to Icechunk."
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=oisstv2_monthly
3
+ #SBATCH --partition=test
4
+ #SBATCH --time=00:20:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_send_OISSTv2_monthly_to_os.slurm
12
+ #
13
+ # Description: SLURM script to send the OISSTv2.1 monthly
14
+ # time-series dataset to Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-09
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source .../miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Send OISSTv2.1 monthly time-series datasets to JASMIN OS -- #
28
+ echo "In Progress: Sending OISSTv2.1 monthly time-series to Icechunk..."
29
+
30
+ python3 send_OISSTv2_monthly_to_os.py
31
+
32
+ echo "Completed: Sent OISSTv2.1 monthly time-series to Icechunk."
@@ -0,0 +1,32 @@
1
+ #!/bin/bash
2
+ #SBATCH --job-name=update_oisstv2_daily
3
+ #SBATCH --partition=compute
4
+ #SBATCH --time=03:00:00
5
+ #SBATCH --ntasks-per-core=1
6
+ #SBATCH --ntasks-per-node=64
7
+ #SBATCH --ntasks-per-socket=32
8
+ #SBATCH --nodes=1
9
+
10
+ # ==============================================================
11
+ # run_update_OISSTv2_daily_to_os.slurm
12
+ #
13
+ # Description: SLURM script to update the OISSTv2.1 daily
14
+ # time-series dataset in Icechunk repository.
15
+ #
16
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
17
+ # Created On: 2026-06-09
18
+ #
19
+ # ==============================================================
20
+ set -euo pipefail
21
+
22
+ # -- Python Environment -- #
23
+ # Activate miniconda environment:
24
+ source /dssgfs01/working/otooth/miniforge3/bin/activate
25
+ conda activate env_ods
26
+
27
+ # -- Update OISSTv2.1 daily time-series datasets in JASMIN OS -- #
28
+ echo "In Progress: Updating OISSTv2.1 Daily time-series in Icechunk..."
29
+
30
+ python3 update_OISSTv2_daily_to_os.py
31
+
32
+ echo "Completed: Updated OISSTv2.1 Daily time-series in Icechunk."
@@ -0,0 +1,154 @@
1
+ # =========================================================
2
+ # send_OISSTv2_daily_climatology_to_os.py
3
+ #
4
+ # Script to write OISST v2.1 long-term daily climatologies
5
+ # to Icechunk repositories in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import numpy as np
12
+ import xarray as xr
13
+ import zarr
14
+
15
+ from OceanDataStore.cli import initialise_logging, send_to_icechunk
16
+ from OceanDataStore.data.utils import (
17
+ compute_cell_area,
18
+ compute_dx,
19
+ compute_dy,
20
+ )
21
+
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def main():
27
+ # ========== Initialise OceanDataStore Logging ========== #
28
+ initialise_logging()
29
+
30
+ # ========== Send to Icechunk Repository ========== #
31
+ bucket = "oisst"
32
+ exists = False
33
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
34
+ branch = "main"
35
+ variable_commits = True
36
+
37
+ # Define climatology period:
38
+ start_yr = 1991
39
+ end_yr = 2020
40
+
41
+ logging.info(f"In Progress: Sending OISSTv2.1 daily climatology for {start_yr}-{end_yr} to Icechunk...")
42
+ # Open OISSTv2 dataset:
43
+ filepath = f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/climatology/oisst_climatology_{start_yr}-{end_yr}.nc"
44
+ ds = xr.open_dataset(filepath, engine="h5netcdf")
45
+
46
+ # Open OISSTv2 land-sea mask dataset:
47
+ ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False)
48
+ ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"})
49
+ ds_mask = ds_mask.assign_coords(
50
+ longitude=((ds_mask["longitude"] + 180) % 360) - 180
51
+ )
52
+
53
+ # Standardise coordinate dimension names:
54
+ ds = ds.rename({"lon": "longitude", "lat": "latitude", "dayofyear": "day"})
55
+
56
+ # Update longitude coordinates to be in the range [-180, 180]:
57
+ ds = ds.assign_coords(
58
+ longitude=((ds["longitude"] + 180) % 360) - 180
59
+ )
60
+ ds = ds.sortby("longitude")
61
+
62
+ # Add day of year coordinate (1-366):
63
+ ds = ds.assign_coords(
64
+ day=np.arange(1, 367)
65
+ )
66
+
67
+ # Rename variables to standard names:
68
+ ds = ds.rename({"sst_mean": "tos_mean",
69
+ "sst_p10": "tos_p10",
70
+ "sst_p90": "tos_p90",
71
+ })
72
+
73
+ # Add standard names and units:
74
+ ds["tos_mean"].attrs["long_name"] = "Daily Mean Sea Surface Temperature Climatology"
75
+ ds["tos_p10"].attrs["long_name"] = "Daily 10th Percentile Sea Surface Temperature Climatology"
76
+ ds["tos_p90"].attrs["long_name"] = "Daily 90th Percentile Sea Surface Temperature Climatology"
77
+
78
+ # Add OISSTv2 land mask:
79
+ ds["mask"] = ds_mask["mask"]
80
+ ds["mask"].attrs.clear()
81
+ ds["mask"] = ds["mask"].assign_attrs({'long_name': "Land-Sea Binary Mask",
82
+ "standard_name": "sea_binary_mask",
83
+ "comment": "1 = sea, 0 = land"
84
+ })
85
+
86
+ # Add horizontal grid cell area:
87
+ ds['dx'] = compute_dx(ds)
88
+ ds['dy'] = compute_dy(ds)
89
+ ds['cell_area'] = compute_cell_area(ds)
90
+
91
+ # Update time bounds to reflect climatological period:
92
+ ds['time_bnds'] = xr.DataArray(
93
+ np.zeros((ds['day'].size, 2), dtype='datetime64[ns]'),
94
+ dims=('day', 'bnds'),
95
+ coords={'day': ds['day']},
96
+ )
97
+ ds['time_bnds'].data[:, 0] = (np.datetime64(f'{start_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]')
98
+ ds['time_bnds'].data[:, 1] = (np.datetime64(f'{end_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]')
99
+ ds.time_bnds.attrs.clear()
100
+
101
+ # Update global attributes:
102
+ ds.attrs.clear()
103
+ ds = ds.assign_attrs({
104
+ "Conventions": "CF-1.5",
105
+ "title": f"NOAA OISSTv2.1 Daily Climatology ({start_yr}-{end_yr})",
106
+ "description": f"NOAA 1/4° Daily Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 daily sea surface temperature climatology ({start_yr}-{end_yr}).",
107
+ "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).",
108
+ "dataset_type": "observation",
109
+ "product_type": "climatology",
110
+ "product_version": "2.1",
111
+ "institution": "NOAA National Centers for Environmental Information (NCEI)",
112
+ "citation": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2021: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1",
113
+ "references": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2020: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1. Banzon, V., Smith, T. M., Chin, T. M., Liu, C., and Hankins, W., 2016: A long-term record of blended satellite and in situ sea-surface temperature for climate monitoring, modeling and environmental studies. Earth Syst. Sci. Data, 8, 165-176, doi:10.5194/essd-8-165-2016. Reynolds, R. W., T. M. Smith, C. Liu, D. B. Chelton, K. S. Casey, and M. G. Schlax, 2007: Daily high-resolution-blended analyses for sea surface temperature. Journal of Climate, 20, 5473-5496, doi:10.1175/JCLI-D-14-00293.1",
114
+ "acknowledgement": "NOAA OI SST V2 High Resolution Dataset data provided by the NOAA PSL, Boulder, Colorado, USA, from their website at https://psl.noaa.gov.",
115
+ "license": "OISST v2.1 data were obtained from https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/",
116
+ "doi": "10.1175/JCLI-D-20-0166.1",
117
+ "platform": "gr",
118
+ "horizontal_grid_type": "regular rectilinear",
119
+ "horizontal_grid_resolution": "0.25 degree",
120
+ "aggregation": "mean",
121
+ "aggregation_frequency": "daily",
122
+ "status": "completed",
123
+ "update_frequency": "None",
124
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
125
+ })
126
+
127
+ # Optimise chunk sizes for spatial analysis:
128
+ ds = ds.chunk({'day': 5, 'latitude': 720, 'longitude': 1440})
129
+
130
+ # Update variable encodings:
131
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
132
+ for var in list(ds.data_vars) + list(ds.coords):
133
+ ds[var].encoding['compressors'] = [blosccodec]
134
+
135
+ # Define prefix and commit message based on climatology period:
136
+ prefix = f"oisst_v2.1_{start_yr}_{end_yr}_daily_climatology"
137
+ commit_message = f"Added OISSTv2.1 Sea Surface Temperature Climatology ({start_yr}-{end_yr})."
138
+
139
+ send_to_icechunk(
140
+ file=ds,
141
+ bucket=bucket,
142
+ object_prefix=prefix,
143
+ store_credentials_json=store_credentials_json,
144
+ exists=exists,
145
+ append_dim='day',
146
+ branch=branch,
147
+ commit_message=commit_message,
148
+ variable_commits=variable_commits,
149
+ dask_config_kwargs=None,
150
+ dask_cluster_kwargs=None,
151
+ )
152
+
153
+ if __name__ == "__main__":
154
+ main()
@@ -0,0 +1,151 @@
1
+ # =========================================================
2
+ # send_OISSTv2_daily_climatology_to_os.py
3
+ #
4
+ # Script to write OISST v2.1 long-term daily climatologies
5
+ # to Icechunk repositories in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import numpy as np
12
+ import xarray as xr
13
+ import zarr
14
+
15
+ from OceanDataStore.cli import initialise_logging, send_to_icechunk
16
+ from OceanDataStore.data.utils import (
17
+ compute_cell_area,
18
+ compute_dx,
19
+ compute_dy,
20
+ )
21
+
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def main():
27
+ # ========== Initialise OceanDataStore Logging ========== #
28
+ initialise_logging()
29
+
30
+ # ========== Send to Icechunk Repository ========== #
31
+ bucket = "oisst"
32
+ exists = False
33
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
34
+ branch = "main"
35
+ variable_commits = True
36
+
37
+ # Define climatology period:
38
+ start_yr = 1991
39
+ end_yr = 2020
40
+
41
+ logging.info(f"In Progress: Sending OISSTv2.1 daily climatology for {start_yr}-{end_yr} to Icechunk...")
42
+ # Open OISSTv2 dataset:
43
+ filepaths = [f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/icec.day.mean.ltm.{start_yr}-{end_yr}.nc",
44
+ f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/sst.day.mean.ltm.{start_yr}-{end_yr}.nc"
45
+ ]
46
+ ds = xr.merge([xr.open_dataset(filepath, decode_times=False).drop_vars("valid_yr_count") for filepath in filepaths], compat="no_conflicts")
47
+ # Open OISSTv2 land-sea mask dataset:
48
+ ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False)
49
+ ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"})
50
+ ds_mask = ds_mask.assign_coords(
51
+ longitude=((ds_mask["longitude"] + 180) % 360) - 180
52
+ )
53
+
54
+ # Standardise coordinate dimension names:
55
+ ds = ds.rename({"lon": "longitude", "lat": "latitude", "time": "day"})
56
+
57
+ # Update longitude coordinates to be in the range [-180, 180]:
58
+ ds = ds.assign_coords(
59
+ longitude=((ds["longitude"] + 180) % 360) - 180
60
+ )
61
+ ds = ds.sortby("longitude")
62
+
63
+ # Add day of year coordinate (1-365):
64
+ ds = ds.assign_coords(
65
+ day=np.arange(1, 366)
66
+ )
67
+
68
+ # Rename variables to standard names:
69
+ ds = ds.rename({"sst": "tos",
70
+ "icec": "siconc",
71
+ "climatology_bounds": "time_bnds",
72
+ })
73
+
74
+ # Add standard names and units:
75
+ ds["tos"].attrs["standard_name"] = "sea_surface_temperature"
76
+ ds["siconc"].attrs["standard_name"] = "sea_ice_area_fraction"
77
+ ds["siconc"].attrs["units"] = "1"
78
+
79
+ # Add OISSTv2 land mask:
80
+ ds["mask"] = ds_mask["mask"]
81
+ ds["mask"].attrs.clear()
82
+ ds["mask"] = ds["mask"].assign_attrs({'long_name': "Land-Sea Binary Mask",
83
+ "standard_name": "sea_binary_mask",
84
+ "comment": "1 = sea, 0 = land"
85
+ })
86
+
87
+ # Add horizontal grid cell area:
88
+ ds['dx'] = compute_dx(ds)
89
+ ds['dy'] = compute_dy(ds)
90
+ ds['cell_area'] = compute_cell_area(ds)
91
+
92
+ # Update time bounds to reflect climatological period:
93
+ ds['time_bnds'] = ds['time_bnds'].astype('datetime64[ns]')
94
+ ds['time_bnds'].data[:, 0] = (np.datetime64(f'{start_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]')
95
+ ds['time_bnds'].data[:, 1] = (np.datetime64(f'{end_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]')
96
+ ds.time_bnds.attrs.clear()
97
+
98
+ # Update global attributes:
99
+ ds.attrs.clear()
100
+ ds = ds.assign_attrs({
101
+ "Conventions": "CF-1.5",
102
+ "title": f"NOAA OISSTv2.1 Daily Climatology ({start_yr}-{end_yr})",
103
+ "description": f"NOAA 1/4° Daily Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 daily sea surface temperature and sea ice fraction climatology ({start_yr}-{end_yr}).",
104
+ "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).",
105
+ "dataset_type": "observation",
106
+ "product_type": "climatology",
107
+ "product_version": "2.1",
108
+ "institution": "NOAA National Centers for Environmental Information (NCEI)",
109
+ "citation": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2021: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1",
110
+ "references": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2020: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1. Banzon, V., Smith, T. M., Chin, T. M., Liu, C., and Hankins, W., 2016: A long-term record of blended satellite and in situ sea-surface temperature for climate monitoring, modeling and environmental studies. Earth Syst. Sci. Data, 8, 165-176, doi:10.5194/essd-8-165-2016. Reynolds, R. W., T. M. Smith, C. Liu, D. B. Chelton, K. S. Casey, and M. G. Schlax, 2007: Daily high-resolution-blended analyses for sea surface temperature. Journal of Climate, 20, 5473-5496, doi:10.1175/JCLI-D-14-00293.1",
111
+ "acknowledgement": "NOAA OI SST V2 High Resolution Dataset data provided by the NOAA PSL, Boulder, Colorado, USA, from their website at https://psl.noaa.gov.",
112
+ "license": "OISST v2.1 data were obtained from https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/",
113
+ "doi": "10.1175/JCLI-D-20-0166.1",
114
+ "platform": "gr",
115
+ "horizontal_grid_type": "regular rectilinear",
116
+ "horizontal_grid_resolution": "0.25 degree",
117
+ "aggregation": "mean",
118
+ "aggregation_frequency": "daily",
119
+ "status": "completed",
120
+ "update_frequency": "None",
121
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
122
+ })
123
+
124
+ # Optimise chunk sizes for spatial analysis:
125
+ ds = ds.chunk({'day': 5, 'latitude': 720, 'longitude': 1440})
126
+
127
+ # Update variable encodings:
128
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
129
+ for var in list(ds.data_vars) + list(ds.coords):
130
+ ds[var].encoding['compressors'] = [blosccodec]
131
+
132
+ # Define prefix and commit message based on climatology period:
133
+ prefix = f"oisst_v2.1_{start_yr}_{end_yr}_daily_climatology"
134
+ commit_message = f"Added OISSTv2.1 Sea Surface Temperature Climatology ({start_yr}-{end_yr})."
135
+
136
+ send_to_icechunk(
137
+ file=ds,
138
+ bucket=bucket,
139
+ object_prefix=prefix,
140
+ store_credentials_json=store_credentials_json,
141
+ exists=exists,
142
+ append_dim='day',
143
+ branch=branch,
144
+ commit_message=commit_message,
145
+ variable_commits=variable_commits,
146
+ dask_config_kwargs=None,
147
+ dask_cluster_kwargs=None,
148
+ )
149
+
150
+ if __name__ == "__main__":
151
+ main()
@@ -0,0 +1,142 @@
1
+ # =========================================================
2
+ # send_OISSTv2_daily_to_os.py
3
+ #
4
+ # Script to write OISST v2.1 daily mean time series
5
+ # to Icechunk repositories in JASMIN cloud object storage.
6
+ #
7
+ # Created By: Ollie Tooth (oliver.tooth@noc.ac.uk)
8
+ # =========================================================
9
+ import logging
10
+
11
+ import xarray as xr
12
+ import zarr
13
+
14
+ from OceanDataStore.cli import initialise_logging, send_to_icechunk
15
+ from OceanDataStore.data.utils import (
16
+ compute_cell_area,
17
+ compute_dx,
18
+ compute_dy,
19
+ )
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def main():
25
+ # ========== Initialise OceanDataStore Logging ========== #
26
+ initialise_logging()
27
+
28
+ # ========== Send to Icechunk Repository ========== #
29
+ bucket = "oisst"
30
+ exists = False
31
+ store_credentials_json = ".../credentials/jasmin_os_credentials.json"
32
+ branch = "main"
33
+ variable_commits = True
34
+ config_kwargs = {
35
+ "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/",
36
+ "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/"
37
+ }
38
+ cluster_kwargs = {
39
+ "n_workers" : 15,
40
+ "threads_per_worker" : 1,
41
+ "memory_limit":"6GB"
42
+ }
43
+
44
+ logging.info("In Progress: Sending OISSTv2.1 daily mean time series to Icechunk...")
45
+ # Open OISSTv2 dataset:
46
+ filepaths = "/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily/sst.day.mean.198?.nc"
47
+ ds = xr.open_mfdataset(filepaths,
48
+ combine="by_coords",
49
+ data_vars="all",
50
+ engine="h5netcdf",
51
+ )
52
+
53
+ # Open OISSTv2 land-sea mask dataset:
54
+ ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False)
55
+ ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"})
56
+ ds_mask = ds_mask.assign_coords(
57
+ longitude=((ds_mask["longitude"] + 180) % 360) - 180
58
+ )
59
+
60
+ # Standardise coordinate dimension names:
61
+ ds = ds.rename({"lon": "longitude", "lat": "latitude"})
62
+
63
+ # Update longitude coordinates to be in the range [-180, 180]:
64
+ ds = ds.assign_coords(
65
+ longitude=((ds["longitude"] + 180) % 360) - 180
66
+ )
67
+ ds = ds.sortby("longitude")
68
+
69
+ # Rename variables to standard names:
70
+ ds = ds.rename({"sst": "tos"})
71
+
72
+ # Add standard names and units:
73
+ ds["tos"].attrs["standard_name"] = "sea_surface_temperature"
74
+
75
+ # Add OISSTv2 land mask:
76
+ ds["mask"] = ds_mask["mask"]
77
+ ds["mask"].attrs.clear()
78
+ ds["mask"] = ds["mask"].assign_attrs({"long_name": "Land-Sea Binary Mask",
79
+ "standard_name": "sea_binary_mask",
80
+ "comment": "1 = sea, 0 = land"
81
+ })
82
+
83
+ # Add horizontal grid cell area:
84
+ ds["dx"] = compute_dx(ds)
85
+ ds["dy"] = compute_dy(ds)
86
+ ds['cell_area'] = compute_cell_area(ds)
87
+
88
+ # Update global attributes:
89
+ ds.attrs.clear()
90
+ ds = ds.assign_attrs({
91
+ "Conventions": "CF-1.5",
92
+ "title": "NOAA OISSTv2.1 Daily Timeseries",
93
+ "description": "NOAA 1/4° Daily Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 daily sea surface temperature timeseries.",
94
+ "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).",
95
+ "dataset_type": "observation",
96
+ "product_type": "timeseries",
97
+ "product_version": "2.1",
98
+ "institution": "NOAA National Centers for Environmental Information (NCEI)",
99
+ "citation": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2021: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1",
100
+ "references": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2020: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1. Banzon, V., Smith, T. M., Chin, T. M., Liu, C., and Hankins, W., 2016: A long-term record of blended satellite and in situ sea-surface temperature for climate monitoring, modeling and environmental studies. Earth Syst. Sci. Data, 8, 165-176, doi:10.5194/essd-8-165-2016. Reynolds, R. W., T. M. Smith, C. Liu, D. B. Chelton, K. S. Casey, and M. G. Schlax, 2007: Daily high-resolution-blended analyses for sea surface temperature. Journal of Climate, 20, 5473-5496, doi:10.1175/JCLI-D-14-00293.1",
101
+ "acknowledgement": "NOAA OI SST V2 High Resolution Dataset data provided by the NOAA PSL, Boulder, Colorado, USA, from their website at https://psl.noaa.gov.",
102
+ "license": "OISST v2.1 data were obtained from https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/",
103
+ "doi": "10.1175/JCLI-D-20-0166.1",
104
+ "platform": "gr",
105
+ "horizontal_grid_type": "regular rectilinear",
106
+ "horizontal_grid_resolution": "0.25 degree",
107
+ "aggregation": "mean",
108
+ "aggregation_frequency": "daily",
109
+ "status": "ongoing",
110
+ "update_frequency": "quarterly",
111
+ "bbox": "[-180.0, 180.0, -90.0, 90.0]",
112
+ })
113
+
114
+ # Optimise chunk sizes for time-series analysis:
115
+ ds = ds.chunk({'time': ds['time'].size, 'latitude': 50, 'longitude': 50})
116
+
117
+ # Update variable encodings:
118
+ blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
119
+ for var in list(ds.data_vars) + list(ds.coords):
120
+ ds[var].encoding.clear()
121
+ ds[var].encoding['compressors'] = [blosccodec]
122
+
123
+ # Define prefix and commit message based on climatology period:
124
+ prefix = "oisst_v2.1_daily"
125
+ commit_message = "Added OISSTv2.1 Sea Surface Temperature Daily Timeseries (1981-09-1989-12)."
126
+
127
+ send_to_icechunk(
128
+ file=ds,
129
+ bucket=bucket,
130
+ object_prefix=prefix,
131
+ store_credentials_json=store_credentials_json,
132
+ exists=exists,
133
+ append_dim='time',
134
+ branch=branch,
135
+ commit_message=commit_message,
136
+ variable_commits=variable_commits,
137
+ dask_config_kwargs=config_kwargs,
138
+ dask_cluster_kwargs=cluster_kwargs,
139
+ )
140
+
141
+ if __name__ == "__main__":
142
+ main()