OceanDataStore 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. OceanDataStore/__init__.py +21 -0
  2. OceanDataStore/catalog/__init__.py +12 -0
  3. OceanDataStore/catalog/oceandatacatalog.py +1242 -0
  4. OceanDataStore/catalog/stac/README.md +34 -0
  5. OceanDataStore/catalog/stac/__init__.py +30 -0
  6. OceanDataStore/catalog/stac/create_noc_stac.py +109 -0
  7. OceanDataStore/catalog/stac/npd_era5_collection.py +364 -0
  8. OceanDataStore/catalog/stac/npd_jra55_collection.py +196 -0
  9. OceanDataStore/catalog/stac/ods_obs_collection.py +534 -0
  10. OceanDataStore/catalog/stac/rapid_evo_collection.py +309 -0
  11. OceanDataStore/catalog/stac/template_collection.py +85 -0
  12. OceanDataStore/catalog/stac/utils.py +476 -0
  13. OceanDataStore/cli/__init__.py +34 -0
  14. OceanDataStore/cli/arg_parser.py +182 -0
  15. OceanDataStore/cli/cli.py +203 -0
  16. OceanDataStore/cli/exceptions.py +83 -0
  17. OceanDataStore/cli/icechunk.py +888 -0
  18. OceanDataStore/cli/logging.py +52 -0
  19. OceanDataStore/cli/object_store.py +293 -0
  20. OceanDataStore/cli/utils.py +275 -0
  21. OceanDataStore/cli/zarr.py +870 -0
  22. OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py +135 -0
  23. OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py +33 -0
  24. OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +32 -0
  25. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +32 -0
  26. OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +32 -0
  27. OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +32 -0
  28. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +99 -0
  29. OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py +147 -0
  30. OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py +143 -0
  31. OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py +162 -0
  32. OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh +51 -0
  33. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +32 -0
  34. OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  35. OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +32 -0
  36. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py +76 -0
  37. OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py +165 -0
  38. OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py +161 -0
  39. OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py +110 -0
  40. OceanDataStore/data/ERA5/create_ERA5_daily_mean.py +69 -0
  41. OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py +74 -0
  42. OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm +54 -0
  43. OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm +32 -0
  44. OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm +32 -0
  45. OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm +32 -0
  46. OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm +32 -0
  47. OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm +32 -0
  48. OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py +159 -0
  49. OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py +141 -0
  50. OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py +173 -0
  51. OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py +141 -0
  52. OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py +169 -0
  53. OceanDataStore/data/HadISST/download_HadISST1_data.sh +43 -0
  54. OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm +32 -0
  55. OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py +133 -0
  56. OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh +54 -0
  57. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py +130 -0
  58. OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py +129 -0
  59. OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm +32 -0
  60. OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py +140 -0
  61. OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py +83 -0
  62. OceanDataStore/data/OISST/download_oisstv2_data.sh +43 -0
  63. OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm +44 -0
  64. OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +32 -0
  65. OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm +32 -0
  66. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm +32 -0
  67. OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm +32 -0
  68. OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm +32 -0
  69. OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +154 -0
  70. OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py +151 -0
  71. OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py +142 -0
  72. OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py +150 -0
  73. OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py +145 -0
  74. OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py +142 -0
  75. OceanDataStore/data/OSTIA/create_OSTIA_daily_climatology.py +120 -0
  76. OceanDataStore/data/OSTIA/download_OSTIA_NRT.py +42 -0
  77. OceanDataStore/data/OSTIA/download_OSTIA_REP_1981_2025.py +42 -0
  78. OceanDataStore/data/OSTIA/run_create_OSTIA_daily_climatology.slurm +54 -0
  79. OceanDataStore/data/OSTIA/run_send_OSTIA_daily_climatology_to_os.slurm +32 -0
  80. OceanDataStore/data/OSTIA/run_send_OSTIA_nrt_daily_to_os.slurm +32 -0
  81. OceanDataStore/data/OSTIA/run_send_OSTIA_rep_daily_to_os.slurm +32 -0
  82. OceanDataStore/data/OSTIA/run_update_OSTIA_daily_to_os.slurm +33 -0
  83. OceanDataStore/data/OSTIA/send_OSTIA_daily_climatology_to_os.py +194 -0
  84. OceanDataStore/data/OSTIA/send_OSTIA_nrt_daily_to_os.py +141 -0
  85. OceanDataStore/data/OSTIA/send_OSTIA_rep_daily_to_os.py +145 -0
  86. OceanDataStore/data/OSTIA/update_OSTIA_copernicus_nrt_daily_to_os.py +144 -0
  87. OceanDataStore/data/OSTIA/update_OSTIA_nrt_daily_to_os.py +137 -0
  88. OceanDataStore/data/WOA23/download_WOA23_climatology.sh +41 -0
  89. OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm +32 -0
  90. OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm +32 -0
  91. OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py +263 -0
  92. OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py +292 -0
  93. OceanDataStore/data/update_icechunk_repo_attrs.py +76 -0
  94. OceanDataStore/data/update_noc_npd_era5v1_attrs.py +172 -0
  95. OceanDataStore/data/utils.py +506 -0
  96. OceanDataStore/zarr.py +993 -0
  97. oceandatastore-0.3.0.dist-info/METADATA +184 -0
  98. oceandatastore-0.3.0.dist-info/RECORD +104 -0
  99. oceandatastore-0.3.0.dist-info/WHEEL +5 -0
  100. oceandatastore-0.3.0.dist-info/entry_points.txt +2 -0
  101. oceandatastore-0.3.0.dist-info/licenses/LICENSE +201 -0
  102. oceandatastore-0.3.0.dist-info/scm_file_list.json +154 -0
  103. oceandatastore-0.3.0.dist-info/scm_version.json +8 -0
  104. oceandatastore-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,534 @@
1
+ """
2
+ obs_collections.py
3
+
4
+ Description:
5
+ Function to create Spatio-Temporal Access Catalog Collections
6
+ for ocean observation datasets.
7
+
8
+ Authors:
9
+ - Ollie Tooth (oliver.tooth@noc.ac.uk)
10
+ """
11
+ # -- Import Python Modules -- #
12
+ import logging
13
+ import pystac
14
+ import datetime
15
+
16
+ from OceanDataStore.catalog.stac.utils import open_icechunk_store, create_item_with_icechunk_asset
17
+
18
+
19
+ def create_nsidc_collection() -> pystac.Collection:
20
+ """
21
+ Create the NSIDC Sea Ice Index, Version 4 STAC Collection.
22
+
23
+ Returns:
24
+ -------
25
+ nsidc_collection : pystac.Collection
26
+ NSIDC Sea Ice Index, Version 4 STAC Collection.
27
+ """
28
+ # ==== Define NSIDC Sea Ice Index, Version 4 Collection ==== #
29
+ # Define the spatial extent for the collection:
30
+ spatial_extent = pystac.SpatialExtent(bboxes=[[-180.0, -90.0, 180.0, 90.0]])
31
+
32
+ # Define the current temporal extent for the collection:
33
+ collection_interval = sorted([datetime.datetime(year=1978, month=11, day=15), datetime.datetime(year=2025, month=12, day=15)])
34
+ temporal_extent = pystac.TemporalExtent(intervals=[collection_interval])
35
+
36
+ # Define the NSIDC Sea Ice Index, Version 4 Collection:
37
+ nsidc_collection = pystac.Collection(
38
+ id="nsidc",
39
+ title="NSIDC Sea Ice Index, Version 4 Collection",
40
+ description="**About:**\n\nCollection of National Snow and Ice Data Center (NSIDC) Sea Ice Index, Version 4 datasets.\n\n**More Information:**\n - [NSIDC](https://nsidc.org/home)",
41
+ extent=pystac.Extent(spatial=spatial_extent, temporal=temporal_extent),
42
+ license="U.S. Government Works License",
43
+ extra_fields=dict(contact="Ollie Tooth (oliver.tooth@noc.ac.uk)", project="OceanDataStore", status="ongoing", update_frequency="quarterly", last_data_update="2025-12-15"),
44
+ keywords=["NSIDC", "arctic", "antarctic", "observation", "sea-ice"],
45
+ providers=[
46
+ pystac.Provider(
47
+ name="National Snow and Ice Data Center (NSIDC)",
48
+ description="National Snow and Ice Data Center (NSIDC), Cooperative Institute for Research in Environmental Sciences, University of Colorado, United States.",
49
+ roles=[pystac.ProviderRole.PRODUCER, pystac.ProviderRole.LICENSOR],
50
+ url="https://nsidc.org/data/g02135/versions/4"
51
+ ),
52
+ pystac.Provider(
53
+ name="JASMIN",
54
+ description="JASMIN Environmental Data Analysis Facility (United Kingdom).",
55
+ roles=[pystac.ProviderRole.HOST],
56
+ url="https://jasmin.ac.uk"
57
+ )
58
+ ],
59
+ )
60
+
61
+ logging.info(f"Completed: Created STAC Collection: {nsidc_collection}")
62
+
63
+ # -- Add Items to NSIDC Sea Ice Index Collection -- #
64
+ bucket = "nsidc"
65
+ for prefix in ["nsidc_sea_ice_index_v4_antarctic_monthly", "nsidc_sea_ice_index_v4_arctic_monthly"]:
66
+ # Open dataset from Icechunk repository:
67
+ ds = open_icechunk_store(bucket=bucket, prefix=prefix, branch="main")
68
+
69
+ item = create_item_with_icechunk_asset(
70
+ ds=ds,
71
+ id=f"{bucket}/{prefix}",
72
+ bucket=bucket,
73
+ prefix=prefix,
74
+ start_date="1978-11-15",
75
+ end_date="2025-12-15",
76
+ collection=bucket
77
+ )
78
+ # Add Item to the NSIDC Sea Ice Index Collection:
79
+ nsidc_collection.add_item(item)
80
+
81
+ logging.info(f"Completed: Added Items to STAC Collection with ID: {nsidc_collection.id}")
82
+
83
+ return nsidc_collection
84
+
85
+
86
+ def create_woa23_collection() -> pystac.Collection:
87
+ """
88
+ Create the World Ocean Atlas 2023 STAC Collection.
89
+
90
+ Returns:
91
+ -------
92
+ woa23_collection : pystac.Collection
93
+ World Ocean Atlas 2023 STAC Collection.
94
+ """
95
+ # ==== Define World Ocean Atlas 2023 Collection ==== #
96
+ # Define the spatial extent for the collection:
97
+ spatial_extent = pystac.SpatialExtent(bboxes=[[-180.0, -90.0, 180.0, 90.0]])
98
+
99
+ # Define the current temporal extent for the collection:
100
+ collection_interval = sorted([datetime.datetime(year=1971, month=1, day=1), datetime.datetime(year=2020, month=12, day=31)])
101
+ temporal_extent = pystac.TemporalExtent(intervals=[collection_interval])
102
+
103
+ # Define the World Ocean Atlas 2023 Collection:
104
+ woa23_collection = pystac.Collection(
105
+ id="woa23",
106
+ title="World Ocean Atlas 2023 Collection",
107
+ description="**About:**\n\nCollection of World Ocean Atlas 2023 climatology datasets.\n\n**More Information:**\n - [World Ocean Atlas](https://www.ncei.noaa.gov/access/world-ocean-atlas-2023/)",
108
+ extent=pystac.Extent(spatial=spatial_extent, temporal=temporal_extent),
109
+ license="Creative Commons CC0 1.0 Universal License",
110
+ extra_fields=dict(contact="Ollie Tooth (oliver.tooth@noc.ac.uk)", project="OceanDataStore", status="ongoing", update_frequency="None", last_data_update="2024-02-01"),
111
+ keywords=["WOA23", "global", "observation", "temperature", "salinity"],
112
+ providers=[
113
+ pystac.Provider(
114
+ name="NOAA National Centers for Environmental Information (NCEI)",
115
+ description="National Oceanic and Atmospheric Administration (NOAA) National Centers for Environmental Information (NCEI), United States.",
116
+ roles=[pystac.ProviderRole.PRODUCER, pystac.ProviderRole.LICENSOR],
117
+ url="https://www.ncei.noaa.gov",
118
+ ),
119
+ pystac.Provider(
120
+ name="JASMIN",
121
+ description="JASMIN Environmental Data Analysis Facility (United Kingdom).",
122
+ roles=[pystac.ProviderRole.HOST],
123
+ url="https://jasmin.ac.uk"
124
+ )
125
+ ],
126
+ )
127
+
128
+ logging.info(f"Completed: Created STAC Collection: {woa23_collection}")
129
+
130
+ # -- Add Items to World Ocean Atlas 2023 Collection -- #
131
+ bucket = "woa23"
132
+ for prefix in ["woa23_1971_2000_annual_climatology",
133
+ "woa23_1971_2000_monthly_climatology",
134
+ "woa23_1981_2010_annual_climatology",
135
+ "woa23_1981_2010_monthly_climatology",
136
+ "woa23_1991_2020_annual_climatology",
137
+ "woa23_1991_2020_monthly_climatology"
138
+ ]:
139
+ # Open dataset from Icechunk repository:
140
+ ds = open_icechunk_store(bucket=bucket, prefix=prefix, branch="main")
141
+
142
+ item = create_item_with_icechunk_asset(
143
+ ds=ds,
144
+ id=f"{bucket}/{prefix}",
145
+ bucket=bucket,
146
+ prefix=prefix,
147
+ start_date=f"{prefix.split('_')[1]}-01-01",
148
+ end_date=f"{prefix.split('_')[2]}-12-31",
149
+ collection=bucket
150
+ )
151
+ # Add item to the World Ocean Atlas 2023 Collection:
152
+ woa23_collection.add_item(item)
153
+
154
+ logging.info(f"Completed: Added Items to STAC Collection with ID: {woa23_collection.id}")
155
+
156
+ return woa23_collection
157
+
158
+
159
+ def create_oisst_collection() -> pystac.Collection:
160
+ """
161
+ Create the OISST Version 2.1 STAC Collection.
162
+
163
+ Returns:
164
+ -------
165
+ oisst_collection : pystac.Collection
166
+ OISST Version 2.1 STAC Collection.
167
+ """
168
+ # ==== Define OISST Version 2.1 Collection ==== #
169
+ # Define the spatial extent for the collection:
170
+ spatial_extent = pystac.SpatialExtent(bboxes=[[-180.0, -90.0, 180.0, 90.0]])
171
+
172
+ # Define the current temporal extent for the collection:
173
+ collection_interval = sorted([datetime.datetime(year=1981, month=9, day=1), datetime.datetime(year=2026, month=5, day=1)])
174
+ temporal_extent = pystac.TemporalExtent(intervals=[collection_interval])
175
+
176
+ # Define the OISST Version 2.1 Collection:
177
+ oisst_collection = pystac.Collection(
178
+ id="oisst",
179
+ title="OISST Version 2.1 Collection",
180
+ description="**About:**\n\nCollection of OISST Version 2.1 datasets.\n\n**More Information:**\n - [OISST Version 2.1](https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html)",
181
+ extent=pystac.Extent(spatial=spatial_extent, temporal=temporal_extent),
182
+ license="Creative Commons CC0 1.0 Universal License",
183
+ extra_fields=dict(contact="Ollie Tooth (oliver.tooth@noc.ac.uk)", project="OceanDataStore", status="ongoing", update_frequency="quarterly", last_data_update="2026-05-01"),
184
+ keywords=["OISSTv2.1", "global", "observation", "sea surface temperature", "sea ice concentration"],
185
+ providers=[
186
+ pystac.Provider(
187
+ name="NOAA National Centers for Environmental Information (NCEI)",
188
+ description="National Oceanic and Atmospheric Administration (NOAA) National Centers for Environmental Information (NCEI), United States.",
189
+ roles=[pystac.ProviderRole.PRODUCER, pystac.ProviderRole.LICENSOR],
190
+ url="https://www.ncei.noaa.gov",
191
+ ),
192
+ pystac.Provider(
193
+ name="JASMIN",
194
+ description="JASMIN Environmental Data Analysis Facility (United Kingdom).",
195
+ roles=[pystac.ProviderRole.HOST],
196
+ url="https://jasmin.ac.uk"
197
+ )
198
+ ],
199
+ )
200
+
201
+ logging.info(f"Completed: Created STAC Collection: {oisst_collection}")
202
+
203
+ # -- Add Items to OISST Version 2.1 Collection -- #
204
+ bucket = "oisst"
205
+ for prefix in ["oisst_v2.1_monthly",
206
+ "oisst_v2.1_1991_2020_daily_climatology",
207
+ "oisst_v2.1_1991_2020_monthly_climatology",
208
+ ]:
209
+ # Open dataset from Icechunk repository:
210
+ ds = open_icechunk_store(bucket=bucket, prefix=prefix, branch="main")
211
+
212
+ if "1991_2020" in prefix:
213
+ start_date = "1991-01-01"
214
+ end_date = "2020-12-31"
215
+ else:
216
+ start_date = "1981-09-01"
217
+ end_date = "2026-05-01"
218
+
219
+ item = create_item_with_icechunk_asset(
220
+ ds=ds,
221
+ id=f"{bucket}/{prefix}",
222
+ bucket=bucket,
223
+ prefix=prefix,
224
+ start_date=start_date,
225
+ end_date=end_date,
226
+ collection=bucket
227
+ )
228
+ # Add item to the OISST Version 2.1 Collection:
229
+ oisst_collection.add_item(item)
230
+
231
+ logging.info(f"Completed: Added Items to STAC Collection with ID: {oisst_collection.id}")
232
+
233
+ return oisst_collection
234
+
235
+
236
+ def create_en4_collection() -> pystac.Collection:
237
+ """
238
+ Create the EN4.2.2 STAC Collection.
239
+
240
+ Returns:
241
+ -------
242
+ en4_collection : pystac.Collection
243
+ EN4.2.2 STAC Collection.
244
+ """
245
+ # ==== Define EN4.2.2 Collection ==== #
246
+ # Define the spatial extent for the collection:
247
+ spatial_extent = pystac.SpatialExtent(bboxes=[[-180.0, -90.0, 180.0, 90.0]])
248
+
249
+ # Define the current temporal extent for the collection:
250
+ collection_interval = sorted([datetime.datetime(year=1950, month=1, day=1), datetime.datetime(year=2026, month=3, day=1)])
251
+ temporal_extent = pystac.TemporalExtent(intervals=[collection_interval])
252
+
253
+ # Define the EN4.2.2 Collection:
254
+ en4_collection = pystac.Collection(
255
+ id="en4.2.2",
256
+ title="EN4.2.2 Collection",
257
+ description="**About:**\n\nCollection of EN4.2.2 quality Controlled Ocean datasets.\n\n**More Information:**\n - [EN4.2.2](https://www.metoffice.gov.uk/hadobs/en4/)",
258
+ extent=pystac.Extent(spatial=spatial_extent, temporal=temporal_extent),
259
+ license="Non-Commercial Government Licence",
260
+ extra_fields=dict(contact="Ollie Tooth (oliver.tooth@noc.ac.uk)", project="OceanDataStore", status="ongoing", update_frequency="quarterly", last_data_update="2026-05-01"),
261
+ keywords=["EN4.2.2", "global", "observation", "temperature", "salinity"],
262
+ providers=[
263
+ pystac.Provider(
264
+ name="Met Office",
265
+ description="Met Office, United Kingdom.",
266
+ roles=[pystac.ProviderRole.PRODUCER, pystac.ProviderRole.LICENSOR],
267
+ url="https://www.metoffice.gov.uk",
268
+ ),
269
+ pystac.Provider(
270
+ name="JASMIN",
271
+ description="JASMIN Environmental Data Analysis Facility (United Kingdom).",
272
+ roles=[pystac.ProviderRole.HOST],
273
+ url="https://jasmin.ac.uk"
274
+ )
275
+ ],
276
+ )
277
+
278
+ logging.info(f"Completed: Created STAC Collection: {en4_collection}")
279
+
280
+ # -- Add Items to EN4.2.2 Collection -- #
281
+ bucket = "en4.2.2"
282
+ for prefix in ["en4.2.2_analysis_g10_monthly",
283
+ "en4.2.2_analysis_g10_1971_2000_monthly_climatology",
284
+ "en4.2.2_analysis_g10_1981_2010_monthly_climatology",
285
+ "en4.2.2_analysis_g10_1991_2020_monthly_climatology",
286
+ ]:
287
+ # Open dataset from Icechunk repository:
288
+ ds = open_icechunk_store(bucket=bucket, prefix=prefix, branch="main")
289
+
290
+ if "19" in prefix:
291
+ start_date = f"{prefix.split('_')[3]}-01-01"
292
+ end_date = f"{prefix.split('_')[4]}-12-31"
293
+ else:
294
+ start_date = "1950-01-01"
295
+ end_date = "2026-03-12"
296
+
297
+ item = create_item_with_icechunk_asset(
298
+ ds=ds,
299
+ id=f"{bucket}/{prefix}",
300
+ bucket=bucket,
301
+ prefix=prefix,
302
+ start_date=start_date,
303
+ end_date=end_date,
304
+ collection=bucket
305
+ )
306
+ # Add item to the EN4.2.2 Collection:
307
+ en4_collection.add_item(item)
308
+
309
+ logging.info(f"Completed: Added Items to STAC Collection with ID: {en4_collection.id}")
310
+
311
+ return en4_collection
312
+
313
+
314
+ def create_armor3d_collection() -> pystac.Collection:
315
+ """
316
+ Create the ARMOR3D STAC Collection.
317
+
318
+ Returns:
319
+ -------
320
+ armor3d_collection : pystac.Collection
321
+ ARMOR3D STAC Collection.
322
+ """
323
+ # ==== Define ARMOR3D Collection ==== #
324
+ # Define the spatial extent for the collection:
325
+ spatial_extent = pystac.SpatialExtent(bboxes=[[-180.0, -90.0, 180.0, 90.0]])
326
+
327
+ # Define the current temporal extent for the collection:
328
+ collection_interval = sorted([datetime.datetime(year=1993, month=1, day=1), datetime.datetime(year=2024, month=12, day=31)])
329
+ temporal_extent = pystac.TemporalExtent(intervals=[collection_interval])
330
+
331
+ # Define the ARMOR3D Collection:
332
+ armor3d_collection = pystac.Collection(
333
+ id="armor3d",
334
+ title="ARMOR3D Collection",
335
+ description="**About:**\n\nCollection of Multi Observation Global Ocean ARMOR3D Temperature Salinity Height Geostrophic Current and MLD.\n\n**More Information:**\n - [ARMOR3D](https://data.marine.copernicus.eu/product/MULTIOBS_GLO_PHY_TSUV_3D_MYNRT_015_012/description)",
336
+ extent=pystac.Extent(spatial=spatial_extent, temporal=temporal_extent),
337
+ license="Copernicus Marine Environment Monitoring Service Service Level Agreement (SLA)",
338
+ extra_fields=dict(contact="Ollie Tooth (oliver.tooth@noc.ac.uk)", project="OceanDataStore", status="ongoing", update_frequency="quarterly", last_data_update="2025-11-01"),
339
+ keywords=["ARMOR3D", "global", "observation", "temperature", "salinity", "dynamic height", "geostrophic current", "mixed layer depth"],
340
+ providers=[
341
+ pystac.Provider(
342
+ name="Copernicus Marine Service",
343
+ description="Copernicus Marine Service, Mercator Ocean International, France.",
344
+ roles=[pystac.ProviderRole.PRODUCER, pystac.ProviderRole.LICENSOR],
345
+ url="https://marine.copernicus.eu",
346
+ ),
347
+ pystac.Provider(
348
+ name="JASMIN",
349
+ description="JASMIN Environmental Data Analysis Facility (United Kingdom).",
350
+ roles=[pystac.ProviderRole.HOST],
351
+ url="https://jasmin.ac.uk"
352
+ )
353
+ ],
354
+ )
355
+
356
+ logging.info(f"Completed: Created STAC Collection: {armor3d_collection}")
357
+
358
+ # -- Add Items to ARMOR3D Collection -- #
359
+ bucket = "armor3d"
360
+ for prefix in ["armor3d_global_my_monthly",
361
+ "armor3d_global_my_1971_2000_monthly_climatology",
362
+ "armor3d_global_my_1981_2010_monthly_climatology",
363
+ "armor3d_global_my_1991_2020_monthly_climatology",
364
+ ]:
365
+ # Open dataset from Icechunk repository:
366
+ ds = open_icechunk_store(bucket=bucket, prefix=prefix, branch="main")
367
+
368
+ if "19" in prefix:
369
+ start_date = f"{prefix.split('_')[3]}-01-01"
370
+ end_date = f"{prefix.split('_')[4]}-12-31"
371
+ else:
372
+ start_date = "1993-01-01"
373
+ end_date = "2024-12-31"
374
+
375
+ item = create_item_with_icechunk_asset(
376
+ ds=ds,
377
+ id=f"{bucket}/{prefix}",
378
+ bucket=bucket,
379
+ prefix=prefix,
380
+ start_date=start_date,
381
+ end_date=end_date,
382
+ collection=bucket
383
+ )
384
+ # Add item to the ARMOR3D Collection:
385
+ armor3d_collection.add_item(item)
386
+
387
+ logging.info(f"Completed: Added Items to STAC Collection with ID: {armor3d_collection.id}")
388
+
389
+ return armor3d_collection
390
+
391
+
392
+ def create_hadisst_collection() -> pystac.Collection:
393
+ """
394
+ Create the HadISST Version 1.1 STAC Collection.
395
+
396
+ Returns:
397
+ -------
398
+ hadisst_collection : pystac.Collection
399
+ HadISST Version 1.1 STAC Collection.
400
+ """
401
+ # ==== Define HadISST Version 1.1 Collection ==== #
402
+ # Define the spatial extent for the collection:
403
+ spatial_extent = pystac.SpatialExtent(bboxes=[[-180.0, -90.0, 180.0, 90.0]])
404
+
405
+ # Define the current temporal extent for the collection:
406
+ collection_interval = sorted([datetime.datetime(year=1870, month=1, day=16), datetime.datetime(year=2026, month=4, day=16)])
407
+ temporal_extent = pystac.TemporalExtent(intervals=[collection_interval])
408
+
409
+ # Define the HadISST Version 1.1 Collection:
410
+ hadisst_collection = pystac.Collection(
411
+ id="hadisst",
412
+ title="HadISST Version 1.1 Collection",
413
+ description="**About:**\n\nCollection of HadISST Version 1.1 datasets.\n\n**More Information:**\n - [HadISST Version 1.1](https://www.metoffice.gov.uk/hadobs/hadisst/)",
414
+ extent=pystac.Extent(spatial=spatial_extent, temporal=temporal_extent),
415
+ license="Non-Commercial Government Licence",
416
+ extra_fields=dict(contact="Ollie Tooth (oliver.tooth@noc.ac.uk)", project="OceanDataStore", status="ongoing", update_frequency="quarterly", last_data_update="2026-05-01"),
417
+ keywords=["HadISSTv1.1", "global", "observation", "sea surface temperature", "sea ice concentration"],
418
+ providers=[
419
+ pystac.Provider(
420
+ name="Met Office",
421
+ description="Met Office, United Kingdom.",
422
+ roles=[pystac.ProviderRole.PRODUCER, pystac.ProviderRole.LICENSOR],
423
+ url="https://www.metoffice.gov.uk",
424
+ ),
425
+ pystac.Provider(
426
+ name="JASMIN",
427
+ description="JASMIN Environmental Data Analysis Facility (United Kingdom).",
428
+ roles=[pystac.ProviderRole.HOST],
429
+ url="https://jasmin.ac.uk"
430
+ )
431
+ ],
432
+ )
433
+
434
+ logging.info(f"Completed: Created STAC Collection: {hadisst_collection}")
435
+
436
+ # -- Add Items to HadISST Version 1.1 Collection -- #
437
+ bucket = "hadisst"
438
+ for prefix in ["hadisst_v1.1_monthly"]:
439
+ # Open dataset from Icechunk repository:
440
+ ds = open_icechunk_store(bucket=bucket, prefix=prefix, branch="main")
441
+
442
+ item = create_item_with_icechunk_asset(
443
+ ds=ds,
444
+ id=f"{bucket}/{prefix}",
445
+ bucket=bucket,
446
+ prefix=prefix,
447
+ start_date="1870-01-16",
448
+ end_date="2026-05-01",
449
+ collection=bucket
450
+ )
451
+ # Add item to the HadISST Version 1.1 Collection:
452
+ hadisst_collection.add_item(item)
453
+
454
+ logging.info(f"Completed: Added Items to STAC Collection with ID: {hadisst_collection.id}")
455
+
456
+ return hadisst_collection
457
+
458
+
459
+ def create_era5_collection() -> pystac.Collection:
460
+ """
461
+ Create the ERA5 STAC Collection.
462
+
463
+ Returns:
464
+ -------
465
+ era5_collection : pystac.Collection
466
+ ERA5 STAC Collection.
467
+ """
468
+ # ==== Define ERA5 Collection ==== #
469
+ # Define the spatial extent for the collection:
470
+ spatial_extent = pystac.SpatialExtent(bboxes=[[-180.0, -90.0, 180.0, 90.0]])
471
+
472
+ # Define the current temporal extent for the collection:
473
+ collection_interval = sorted([datetime.datetime(year=1980, month=1, day=1), datetime.datetime(year=2026, month=6, day=20)])
474
+ temporal_extent = pystac.TemporalExtent(intervals=[collection_interval])
475
+
476
+ # Define the ERA5 Collection:
477
+ era5_collection = pystac.Collection(
478
+ id="era5",
479
+ title="ERA5 Collection",
480
+ description="**About:**\n\nCollection of ERA5 datasets.\n\n**More Information:**\n - [ERA5](https://www.ecmwf.int/en/forecasts/datasets/reanalysis-datasets/era5)",
481
+ extent=pystac.Extent(spatial=spatial_extent, temporal=temporal_extent),
482
+ license="Creative Commons CC-BY-4.0 License",
483
+ extra_fields=dict(contact="Ollie Tooth (oliver.tooth@noc.ac.uk)", project="OceanDataStore", status="ongoing", update_frequency="quarterly", last_data_update="2026-06-20"),
484
+ keywords=["ERA5", "global", "reanalysis", "sea surface temperature", "sea ice concentration"],
485
+ providers=[
486
+ pystac.Provider(
487
+ name="ECMWF",
488
+ description="European Centre for Medium-Range Weather Forecasts (ECMWF), EU.",
489
+ roles=[pystac.ProviderRole.PRODUCER, pystac.ProviderRole.LICENSOR],
490
+ url="https://www.ecmwf.int",
491
+ ),
492
+ pystac.Provider(
493
+ name="JASMIN",
494
+ description="JASMIN Environmental Data Analysis Facility (United Kingdom).",
495
+ roles=[pystac.ProviderRole.HOST],
496
+ url="https://jasmin.ac.uk"
497
+ )
498
+ ],
499
+ )
500
+
501
+ logging.info(f"Completed: Created STAC Collection: {era5_collection.id}")
502
+
503
+ # -- Add Items to ERA5 Collection -- #
504
+ bucket = "era5"
505
+ prefixes = ["era5_1991_2020_daily_climatology",
506
+ "era5_1996_2025_daily_climatology",
507
+ "era5_daily_timeseries",
508
+ "era5_monthly_timeseries"
509
+ ]
510
+ dates = [("1991-01-01", "2020-12-31"),
511
+ ("1996-01-01", "2025-12-31"),
512
+ ("1980-01-01", "2026-06-20"),
513
+ ("1980-01-01", "2026-06-20")
514
+ ]
515
+
516
+ for prefix, date in zip(prefixes, dates):
517
+ # Open dataset from Icechunk repository:
518
+ ds = open_icechunk_store(bucket=bucket, prefix=prefix, branch="main")
519
+
520
+ item = create_item_with_icechunk_asset(
521
+ ds=ds,
522
+ id=f"{bucket}/{prefix}",
523
+ bucket=bucket,
524
+ prefix=prefix,
525
+ start_date=date[0],
526
+ end_date=date[1],
527
+ collection=bucket
528
+ )
529
+ # Add item to the ERA5 Collection:
530
+ era5_collection.add_item(item)
531
+
532
+ logging.info(f"Completed: Added Items to STAC Collection with ID: {era5_collection.id}")
533
+
534
+ return era5_collection