deep-code 0.1.3__tar.gz → 0.1.4.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/PKG-INFO +1 -1
- deep_code-0.1.4.dev1/deep_code/tests/utils/test_custom_xrlint_rules.py +73 -0
- deep_code-0.1.4.dev1/deep_code/tests/utils/test_dataset_stac_generator.py +233 -0
- deep_code-0.1.4.dev1/deep_code/tests/utils/test_helper.py +158 -0
- deep_code-0.1.4.dev1/deep_code/tools/lint.py +37 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/new.py +17 -16
- deep_code-0.1.4.dev1/deep_code/utils/custom_xrlint_rules.py +78 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/dataset_stac_generator.py +25 -85
- deep_code-0.1.4.dev1/deep_code/utils/helper.py +108 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/version.py +1 -1
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/PKG-INFO +1 -1
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/SOURCES.txt +4 -1
- deep_code-0.1.3/deep_code/tests/utils/test_dataset_stac_generator.py +0 -277
- deep_code-0.1.3/deep_code/tools/check.py +0 -4
- deep_code-0.1.3/deep_code/utils/helper.py +0 -14
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/LICENSE +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/README.md +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/__init__.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/cli/__init__.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/cli/generate_config.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/cli/main.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/cli/publish.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/constants.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/tools/__init__.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/tools/test_publish.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/__init__.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/test_github_automation.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/test_ogc_api_record.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/test_ogc_record_generator.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/test_osc_extension.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/__init__.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/publish.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/register.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/setup_ci.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/test.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/__init__.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/github_automation.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/ogc_api_record.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/ogc_record_generator.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/osc_extension.py +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/dependency_links.txt +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/entry_points.txt +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/requires.txt +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/top_level.txt +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/pyproject.toml +0 -0
- {deep_code-0.1.3 → deep_code-0.1.4.dev1}/setup.cfg +0 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Copyright © 2025 Brockmann Consult GmbH.
|
|
2
|
+
# This software is distributed under the terms and conditions of the
|
|
3
|
+
# MIT license (https://mit-license.org/).
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
import xarray as xr
|
|
8
|
+
from xrlint.testing import RuleTest, RuleTester
|
|
9
|
+
|
|
10
|
+
from deep_code.utils.custom_xrlint_rules import (
|
|
11
|
+
DatasetDescriptionRule,
|
|
12
|
+
VariableGcmdKeywordUrlRule,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TestDeepCodePlugin(unittest.TestCase):
|
|
17
|
+
def setUp(self):
|
|
18
|
+
"""Set up test datasets."""
|
|
19
|
+
# Valid dataset with all required metadata
|
|
20
|
+
self.valid_dataset = xr.Dataset(
|
|
21
|
+
data_vars={
|
|
22
|
+
"temperature": (("time", "lat", "lon"), [[[300, 301], [302, 303]]]),
|
|
23
|
+
"precipitation": (("time", "lat", "lon"), [[[10, 20], [30, 40]]]),
|
|
24
|
+
},
|
|
25
|
+
coords={"time": [1], "lat": [0, 1], "lon": [0, 1]},
|
|
26
|
+
attrs={
|
|
27
|
+
"description": "Test climate dataset",
|
|
28
|
+
"title": "Climate Dataset 2025",
|
|
29
|
+
},
|
|
30
|
+
)
|
|
31
|
+
self.valid_dataset["temperature"].attrs[
|
|
32
|
+
"gcmd_keyword_url"
|
|
33
|
+
] = "https://gcmd.nasa.gov/KeywordViewer/temperature"
|
|
34
|
+
self.valid_dataset["temperature"].attrs["units"] = "K"
|
|
35
|
+
self.valid_dataset["precipitation"].attrs[
|
|
36
|
+
"gcmd_keyword_url"
|
|
37
|
+
] = "https://gcmd.nasa.gov/KeywordViewer/precipitation"
|
|
38
|
+
self.valid_dataset["precipitation"].attrs["units"] = "mm"
|
|
39
|
+
|
|
40
|
+
# Invalid dataset missing required metadata
|
|
41
|
+
self.invalid_dataset = xr.Dataset(
|
|
42
|
+
data_vars={
|
|
43
|
+
"temperature": (("time", "lat", "lon"), [[[300, 301], [302, 303]]]),
|
|
44
|
+
"precipitation": (("time", "lat", "lon"), [[[10, 20], [30, 40]]]),
|
|
45
|
+
},
|
|
46
|
+
coords={"time": [1], "lat": [0, 1], "lon": [0, 1]},
|
|
47
|
+
attrs={},
|
|
48
|
+
)
|
|
49
|
+
self.invalid_dataset["temperature"].attrs[
|
|
50
|
+
"gcmd_keyword_url"
|
|
51
|
+
] = "https://gcmd.nasa.gov/KeywordViewer/temperature"
|
|
52
|
+
self.invalid_dataset["temperature"].attrs["units"] = "K"
|
|
53
|
+
# Intentionally omit gcmd_keyword_url and units for precipitation
|
|
54
|
+
|
|
55
|
+
self.tester = RuleTester()
|
|
56
|
+
|
|
57
|
+
def test_dataset_description(self):
|
|
58
|
+
"""Test DatasetDescriptionRule with valid and invalid dataset."""
|
|
59
|
+
self.tester.run(
|
|
60
|
+
"dataset-description",
|
|
61
|
+
DatasetDescriptionRule,
|
|
62
|
+
valid=[RuleTest(dataset=self.valid_dataset)],
|
|
63
|
+
invalid=[RuleTest(dataset=self.invalid_dataset, expected=1)],
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def test_variable_gcmd_keyword_url(self):
|
|
67
|
+
"""Test VariableGcmdKeywordUrlRule with valid dataset."""
|
|
68
|
+
self.tester.run(
|
|
69
|
+
"variable-gcmd-keyword-url",
|
|
70
|
+
VariableGcmdKeywordUrlRule,
|
|
71
|
+
valid=[RuleTest(dataset=self.valid_dataset)],
|
|
72
|
+
invalid=[RuleTest(dataset=self.invalid_dataset, expected=1)],
|
|
73
|
+
)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) 2025 by Brockmann Consult GmbH
|
|
3
|
+
# Permissions are hereby granted under the terms of the MIT License:
|
|
4
|
+
# https://opensource.org/licenses/MIT.
|
|
5
|
+
|
|
6
|
+
import unittest
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from unittest.mock import MagicMock, patch
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from pystac import Catalog, Collection
|
|
12
|
+
from xarray import DataArray, Dataset
|
|
13
|
+
|
|
14
|
+
from deep_code.constants import (
|
|
15
|
+
DEEPESDL_COLLECTION_SELF_HREF,
|
|
16
|
+
OSC_THEME_SCHEME,
|
|
17
|
+
PRODUCT_BASE_CATALOG_SELF_HREF,
|
|
18
|
+
VARIABLE_BASE_CATALOG_SELF_HREF,
|
|
19
|
+
)
|
|
20
|
+
from deep_code.utils.dataset_stac_generator import OscDatasetStacGenerator, Theme
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestOSCProductSTACGenerator(unittest.TestCase):
|
|
24
|
+
@patch("deep_code.utils.dataset_stac_generator.open_dataset")
|
|
25
|
+
def setUp(self, mock_data_store):
|
|
26
|
+
"""Set up a mock dataset and generator."""
|
|
27
|
+
self.mock_dataset = Dataset(
|
|
28
|
+
coords={
|
|
29
|
+
"lon": ("lon", np.linspace(-180, 180, 10)),
|
|
30
|
+
"lat": ("lat", np.linspace(-90, 90, 5)),
|
|
31
|
+
"time": (
|
|
32
|
+
"time",
|
|
33
|
+
[
|
|
34
|
+
np.datetime64(datetime(2023, 1, 1), "ns"),
|
|
35
|
+
np.datetime64(datetime(2023, 1, 2), "ns"),
|
|
36
|
+
],
|
|
37
|
+
),
|
|
38
|
+
},
|
|
39
|
+
attrs={"description": "Mock dataset for testing.", "title": "Mock Dataset"},
|
|
40
|
+
data_vars={
|
|
41
|
+
"var1": (
|
|
42
|
+
("time", "lat", "lon"),
|
|
43
|
+
np.random.rand(2, 5, 10),
|
|
44
|
+
{
|
|
45
|
+
"description": "dummy",
|
|
46
|
+
"standard_name": "var1",
|
|
47
|
+
"gcmd_keyword_url": "https://dummy",
|
|
48
|
+
},
|
|
49
|
+
),
|
|
50
|
+
"var2": (
|
|
51
|
+
("time", "lat", "lon"),
|
|
52
|
+
np.random.rand(2, 5, 10),
|
|
53
|
+
{
|
|
54
|
+
"description": "dummy",
|
|
55
|
+
"standard_name": "var2",
|
|
56
|
+
"gcmd_keyword_url": "https://dummy",
|
|
57
|
+
},
|
|
58
|
+
),
|
|
59
|
+
},
|
|
60
|
+
)
|
|
61
|
+
mock_store = MagicMock()
|
|
62
|
+
mock_store.open_data.return_value = self.mock_dataset
|
|
63
|
+
mock_data_store.return_value = self.mock_dataset
|
|
64
|
+
|
|
65
|
+
self.generator = OscDatasetStacGenerator(
|
|
66
|
+
dataset_id="mock-dataset-id",
|
|
67
|
+
collection_id="mock-collection-id",
|
|
68
|
+
access_link="s3://mock-bucket/mock-dataset",
|
|
69
|
+
documentation_link="https://example.com/docs",
|
|
70
|
+
osc_status="ongoing",
|
|
71
|
+
osc_region="Global",
|
|
72
|
+
osc_themes=["climate", "environment"],
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def test_open_dataset(self):
|
|
76
|
+
"""Test if the dataset is opened correctly."""
|
|
77
|
+
self.assertIsInstance(self.generator.dataset, Dataset)
|
|
78
|
+
for coord in ("lon", "lat", "time"):
|
|
79
|
+
self.assertIn(coord, self.generator.dataset.coords)
|
|
80
|
+
|
|
81
|
+
def test_get_spatial_extent(self):
|
|
82
|
+
"""Test spatial extent extraction."""
|
|
83
|
+
extent = self.generator._get_spatial_extent()
|
|
84
|
+
self.assertEqual(extent.bboxes[0], [-180.0, -90.0, 180.0, 90.0])
|
|
85
|
+
|
|
86
|
+
def test_get_temporal_extent(self):
|
|
87
|
+
"""Test temporal extent extraction."""
|
|
88
|
+
extent = self.generator._get_temporal_extent()
|
|
89
|
+
# TemporalExtent.intervals is a list of [start, end]
|
|
90
|
+
interval = extent.intervals[0]
|
|
91
|
+
self.assertEqual(interval[0], datetime(2023, 1, 1, 0, 0))
|
|
92
|
+
self.assertEqual(interval[1], datetime(2023, 1, 2, 0, 0))
|
|
93
|
+
|
|
94
|
+
def test_get_variables(self):
|
|
95
|
+
"""Test variable ID extraction."""
|
|
96
|
+
vars_ = self.generator.get_variable_ids()
|
|
97
|
+
self.assertCountEqual(vars_, ["var1", "var2"])
|
|
98
|
+
|
|
99
|
+
def test_get_general_metadata(self):
|
|
100
|
+
"""Test general metadata extraction."""
|
|
101
|
+
meta = self.generator._get_general_metadata()
|
|
102
|
+
self.assertEqual(meta.get("description"), "Mock dataset for testing.")
|
|
103
|
+
|
|
104
|
+
def test_extract_metadata_for_variable(self):
|
|
105
|
+
"""Test single variable metadata extraction."""
|
|
106
|
+
da: DataArray = self.mock_dataset.data_vars["var1"]
|
|
107
|
+
var_meta = self.generator.extract_metadata_for_variable(da)
|
|
108
|
+
self.assertEqual(var_meta["variable_id"], "var1")
|
|
109
|
+
self.assertEqual(var_meta["description"], "dummy")
|
|
110
|
+
self.assertEqual(var_meta["gcmd_keyword_url"], "https://dummy")
|
|
111
|
+
|
|
112
|
+
def test_get_variables_metadata(self):
|
|
113
|
+
"""Test metadata dict for all variables."""
|
|
114
|
+
meta_dict = self.generator.get_variables_metadata()
|
|
115
|
+
self.assertIn("var1", meta_dict)
|
|
116
|
+
self.assertIn("var2", meta_dict)
|
|
117
|
+
self.assertIsInstance(meta_dict["var1"], dict)
|
|
118
|
+
|
|
119
|
+
def test_build_theme(self):
|
|
120
|
+
"""Test Theme builder static method."""
|
|
121
|
+
themes = ["a", "b"]
|
|
122
|
+
theme_obj: Theme = OscDatasetStacGenerator.build_theme(themes)
|
|
123
|
+
self.assertEqual(theme_obj.scheme, OSC_THEME_SCHEME)
|
|
124
|
+
ids = [tc.id for tc in theme_obj.concepts]
|
|
125
|
+
self.assertListEqual(ids, ["a", "b"])
|
|
126
|
+
|
|
127
|
+
@patch.object(OscDatasetStacGenerator, "_add_gcmd_link_to_var_catalog")
|
|
128
|
+
@patch.object(OscDatasetStacGenerator, "add_themes_as_related_links_var_catalog")
|
|
129
|
+
def test_build_variable_catalog(self, mock_add_themes, mock_add_gcmd):
|
|
130
|
+
"""Test building of variable-level STAC catalog."""
|
|
131
|
+
var_meta = self.generator.variables_metadata["var1"]
|
|
132
|
+
catalog = self.generator.build_variable_catalog(var_meta)
|
|
133
|
+
self.assertIsInstance(catalog, Catalog)
|
|
134
|
+
self.assertEqual(catalog.id, "var1")
|
|
135
|
+
# Title should be capitalized
|
|
136
|
+
self.assertEqual(catalog.title, "Var1")
|
|
137
|
+
# Self href ends with var1/catalog.json
|
|
138
|
+
self.assertTrue(catalog.self_href.endswith("/var1/catalog.json"))
|
|
139
|
+
|
|
140
|
+
@patch("pystac.Catalog.from_file")
|
|
141
|
+
def test_update_product_base_catalog(self, mock_from_file):
|
|
142
|
+
"""Test linking product catalog."""
|
|
143
|
+
mock_cat = MagicMock(spec=Catalog)
|
|
144
|
+
mock_from_file.return_value = mock_cat
|
|
145
|
+
|
|
146
|
+
result = self.generator.update_product_base_catalog("path.json")
|
|
147
|
+
self.assertIs(result, mock_cat)
|
|
148
|
+
mock_cat.add_link.assert_called_once()
|
|
149
|
+
mock_cat.set_self_href.assert_called_once_with(PRODUCT_BASE_CATALOG_SELF_HREF)
|
|
150
|
+
|
|
151
|
+
@patch("pystac.Catalog.from_file")
|
|
152
|
+
def test_update_variable_base_catalog(self, mock_from_file):
|
|
153
|
+
"""Test linking variable base catalog."""
|
|
154
|
+
mock_cat = MagicMock(spec=Catalog)
|
|
155
|
+
mock_from_file.return_value = mock_cat
|
|
156
|
+
|
|
157
|
+
vars_ = ["v1", "v2"]
|
|
158
|
+
result = self.generator.update_variable_base_catalog("vars.json", vars_)
|
|
159
|
+
self.assertIs(result, mock_cat)
|
|
160
|
+
# Expect one add_link per variable
|
|
161
|
+
self.assertEqual(mock_cat.add_link.call_count, len(vars_))
|
|
162
|
+
mock_cat.set_self_href.assert_called_once_with(VARIABLE_BASE_CATALOG_SELF_HREF)
|
|
163
|
+
|
|
164
|
+
@patch("pystac.Collection.from_file")
|
|
165
|
+
def test_update_deepesdl_collection(self, mock_from_file):
|
|
166
|
+
"""Test updating DeepESDL collection."""
|
|
167
|
+
mock_coll = MagicMock(spec=Collection)
|
|
168
|
+
mock_from_file.return_value = mock_coll
|
|
169
|
+
|
|
170
|
+
result = self.generator.update_deepesdl_collection("deep.json")
|
|
171
|
+
self.assertIs(result, mock_coll)
|
|
172
|
+
# Expect child and theme related links for each theme
|
|
173
|
+
calls = mock_coll.add_link.call_count
|
|
174
|
+
self.assertGreaterEqual(calls, 1 + len(self.generator.osc_themes))
|
|
175
|
+
mock_coll.set_self_href.assert_called_once_with(DEEPESDL_COLLECTION_SELF_HREF)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class TestFormatString(unittest.TestCase):
|
|
179
|
+
def test_single_word(self):
|
|
180
|
+
self.assertEqual(
|
|
181
|
+
OscDatasetStacGenerator.format_string("temperature"), "Temperature"
|
|
182
|
+
)
|
|
183
|
+
self.assertEqual(OscDatasetStacGenerator.format_string("temp"), "Temp")
|
|
184
|
+
self.assertEqual(OscDatasetStacGenerator.format_string("hello"), "Hello")
|
|
185
|
+
|
|
186
|
+
def test_multiple_words_with_spaces(self):
|
|
187
|
+
self.assertEqual(
|
|
188
|
+
OscDatasetStacGenerator.format_string("surface temp"), "Surface Temp"
|
|
189
|
+
)
|
|
190
|
+
self.assertEqual(
|
|
191
|
+
OscDatasetStacGenerator.format_string("this is a test"), "This Is A Test"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
def test_multiple_words_with_underscores(self):
|
|
195
|
+
self.assertEqual(
|
|
196
|
+
OscDatasetStacGenerator.format_string("surface_temp"), "Surface Temp"
|
|
197
|
+
)
|
|
198
|
+
self.assertEqual(
|
|
199
|
+
OscDatasetStacGenerator.format_string("this_is_a_test"), "This Is A Test"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
def test_mixed_spaces_and_underscores(self):
|
|
203
|
+
self.assertEqual(
|
|
204
|
+
OscDatasetStacGenerator.format_string("surface_temp and_more"),
|
|
205
|
+
"Surface Temp And More",
|
|
206
|
+
)
|
|
207
|
+
self.assertEqual(
|
|
208
|
+
OscDatasetStacGenerator.format_string(
|
|
209
|
+
"mixed_case_with_underscores_and spaces"
|
|
210
|
+
),
|
|
211
|
+
"Mixed Case With Underscores And Spaces",
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
def test_edge_cases(self):
|
|
215
|
+
# Empty string
|
|
216
|
+
self.assertEqual(OscDatasetStacGenerator.format_string(""), "")
|
|
217
|
+
# Single word with trailing underscore
|
|
218
|
+
self.assertEqual(
|
|
219
|
+
OscDatasetStacGenerator.format_string("temperature_"), "Temperature"
|
|
220
|
+
)
|
|
221
|
+
# Single word with leading underscore
|
|
222
|
+
self.assertEqual(OscDatasetStacGenerator.format_string("_temp"), "Temp")
|
|
223
|
+
# Single word with leading/trailing spaces
|
|
224
|
+
self.assertEqual(OscDatasetStacGenerator.format_string(" hello "), "Hello")
|
|
225
|
+
# Multiple spaces or underscores
|
|
226
|
+
self.assertEqual(
|
|
227
|
+
OscDatasetStacGenerator.format_string("too___many___underscores"),
|
|
228
|
+
"Too Many Underscores",
|
|
229
|
+
)
|
|
230
|
+
self.assertEqual(
|
|
231
|
+
OscDatasetStacGenerator.format_string("too many spaces"),
|
|
232
|
+
"Too Many Spaces",
|
|
233
|
+
)
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) 2025 by Brockmann Consult GmbH
|
|
3
|
+
# Permissions are hereby granted under the terms of the MIT License:
|
|
4
|
+
# https://opensource.org/licenses/MIT.
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import unittest
|
|
8
|
+
from unittest.mock import MagicMock, call, patch
|
|
9
|
+
|
|
10
|
+
import xarray
|
|
11
|
+
import xarray as xr
|
|
12
|
+
|
|
13
|
+
from deep_code.utils.helper import open_dataset
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def make_dummy_dataset():
|
|
17
|
+
"""Create a simple xarray.Dataset for testing."""
|
|
18
|
+
return xr.Dataset(
|
|
19
|
+
coords={"time": [0, 1, 2]}, data_vars={"x": (("time",), [10, 20, 30])}
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestOpenDataset(unittest.TestCase):
|
|
24
|
+
@patch("deep_code.utils.helper.logging.getLogger")
|
|
25
|
+
@patch("deep_code.utils.helper.new_data_store")
|
|
26
|
+
def test_success_public_store(self, mock_new_store, mock_get_logger):
|
|
27
|
+
"""Should open dataset with the public store on first try."""
|
|
28
|
+
dummy = make_dummy_dataset()
|
|
29
|
+
mock_store = MagicMock()
|
|
30
|
+
mock_store.open_data.return_value = dummy
|
|
31
|
+
mock_new_store.return_value = mock_store
|
|
32
|
+
mock_logger = MagicMock()
|
|
33
|
+
mock_get_logger.return_value = mock_logger
|
|
34
|
+
|
|
35
|
+
result = open_dataset("test-id")
|
|
36
|
+
|
|
37
|
+
self.assertIs(result, dummy)
|
|
38
|
+
mock_new_store.assert_called_once_with(
|
|
39
|
+
"s3", root="deep-esdl-public", storage_options={"anon": True}
|
|
40
|
+
)
|
|
41
|
+
mock_logger.info.assert_any_call(
|
|
42
|
+
"Attempting to open dataset 'test-id' with configuration: Public store"
|
|
43
|
+
)
|
|
44
|
+
mock_logger.info.assert_any_call(
|
|
45
|
+
"Successfully opened dataset 'test-id' with configuration: Public store"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
@patch("deep_code.utils.helper.new_data_store")
|
|
49
|
+
@patch("deep_code.utils.helper.logging.getLogger")
|
|
50
|
+
def test_open_dataset_success_authenticated_store(
|
|
51
|
+
self, mock_get_logger, mock_new_store
|
|
52
|
+
):
|
|
53
|
+
"""Test fallback to authenticated store when public store fails."""
|
|
54
|
+
mock_store = MagicMock()
|
|
55
|
+
mock_new_store.side_effect = [Exception("Public store failure"), mock_store]
|
|
56
|
+
mock_store.open_data.return_value = make_dummy_dataset()
|
|
57
|
+
|
|
58
|
+
os.environ["S3_USER_STORAGE_BUCKET"] = "mock-bucket"
|
|
59
|
+
os.environ["S3_USER_STORAGE_KEY"] = "mock-key"
|
|
60
|
+
os.environ["S3_USER_STORAGE_SECRET"] = "mock-secret"
|
|
61
|
+
|
|
62
|
+
ds = open_dataset("my-id", logger=mock_get_logger())
|
|
63
|
+
|
|
64
|
+
self.assertIsInstance(ds, xarray.Dataset)
|
|
65
|
+
|
|
66
|
+
# And new_data_store should have been called twice with exactly these params
|
|
67
|
+
expected_calls = [
|
|
68
|
+
call("s3", root="deep-esdl-public", storage_options={"anon": True}),
|
|
69
|
+
call(
|
|
70
|
+
"s3",
|
|
71
|
+
root="mock-bucket",
|
|
72
|
+
storage_options={
|
|
73
|
+
"anon": False,
|
|
74
|
+
"key": "mock-key",
|
|
75
|
+
"secret": "mock-secret",
|
|
76
|
+
},
|
|
77
|
+
),
|
|
78
|
+
]
|
|
79
|
+
mock_new_store.assert_has_calls(expected_calls, any_order=False)
|
|
80
|
+
|
|
81
|
+
# And the logger should have info about both attempts
|
|
82
|
+
logger = mock_get_logger()
|
|
83
|
+
logger.info.assert_any_call(
|
|
84
|
+
"Attempting to open dataset 'my-id' with configuration: Public store"
|
|
85
|
+
)
|
|
86
|
+
logger.info.assert_any_call(
|
|
87
|
+
"Attempting to open dataset 'my-id' with configuration: Authenticated store"
|
|
88
|
+
)
|
|
89
|
+
logger.info.assert_any_call(
|
|
90
|
+
"Successfully opened dataset 'my-id' with configuration: Authenticated store"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
@patch("deep_code.utils.helper.logging.getLogger")
|
|
94
|
+
@patch("deep_code.utils.helper.new_data_store")
|
|
95
|
+
def test_all_stores_fail_raises(self, mock_new_store, mock_get_logger):
|
|
96
|
+
"""Should raise ValueError if all stores fail."""
|
|
97
|
+
mock_new_store.side_effect = Exception("fail")
|
|
98
|
+
os.environ["S3_USER_STORAGE_BUCKET"] = "user-bucket"
|
|
99
|
+
os.environ["S3_USER_STORAGE_KEY"] = "key"
|
|
100
|
+
os.environ["S3_USER_STORAGE_SECRET"] = "secret"
|
|
101
|
+
mock_logger = MagicMock()
|
|
102
|
+
mock_get_logger.return_value = mock_logger
|
|
103
|
+
|
|
104
|
+
with self.assertRaises(ValueError) as ctx:
|
|
105
|
+
open_dataset("test-id")
|
|
106
|
+
msg = str(ctx.exception)
|
|
107
|
+
self.assertIn("Tried configurations: Public store, Authenticated store", msg)
|
|
108
|
+
self.assertIn("Last error: fail", msg)
|
|
109
|
+
|
|
110
|
+
@patch("deep_code.utils.helper.logging.getLogger")
|
|
111
|
+
@patch("deep_code.utils.helper.new_data_store")
|
|
112
|
+
def test_with_custom_configs(self, mock_new_store, mock_get_logger):
|
|
113
|
+
"""Should use provided storage_configs instead of defaults."""
|
|
114
|
+
dummy = make_dummy_dataset()
|
|
115
|
+
mock_store = MagicMock()
|
|
116
|
+
mock_store.open_data.return_value = dummy
|
|
117
|
+
mock_new_store.return_value = mock_store
|
|
118
|
+
mock_logger = MagicMock()
|
|
119
|
+
mock_get_logger.return_value = mock_logger
|
|
120
|
+
|
|
121
|
+
custom_cfgs = [
|
|
122
|
+
{
|
|
123
|
+
"description": "Local store",
|
|
124
|
+
"params": {"storage_type": "file", "root": ".", "storage_options": {}},
|
|
125
|
+
}
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
result = open_dataset("test-id", storage_configs=custom_cfgs)
|
|
129
|
+
|
|
130
|
+
self.assertIs(result, dummy)
|
|
131
|
+
mock_new_store.assert_called_once_with("file", root=".", storage_options={})
|
|
132
|
+
mock_logger.info.assert_any_call(
|
|
133
|
+
"Attempting to open dataset 'test-id' with configuration: Local store"
|
|
134
|
+
)
|
|
135
|
+
mock_logger.info.assert_any_call(
|
|
136
|
+
"Successfully opened dataset 'test-id' with configuration: Local store"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
@patch("deep_code.utils.helper.logging.getLogger")
|
|
140
|
+
@patch("deep_code.utils.helper.new_data_store")
|
|
141
|
+
def test_uses_provided_logger(self, mock_new_store, mock_get_logger):
|
|
142
|
+
"""Should use the logger provided by the caller."""
|
|
143
|
+
dummy = make_dummy_dataset()
|
|
144
|
+
mock_store = MagicMock()
|
|
145
|
+
mock_store.open_data.return_value = dummy
|
|
146
|
+
mock_new_store.return_value = mock_store
|
|
147
|
+
custom_logger = MagicMock()
|
|
148
|
+
mock_get_logger.side_effect = AssertionError("getLogger should not be used")
|
|
149
|
+
|
|
150
|
+
result = open_dataset("test-id", logger=custom_logger)
|
|
151
|
+
|
|
152
|
+
self.assertIs(result, dummy)
|
|
153
|
+
custom_logger.info.assert_any_call(
|
|
154
|
+
"Attempting to open dataset 'test-id' with configuration: Public store"
|
|
155
|
+
)
|
|
156
|
+
custom_logger.info.assert_any_call(
|
|
157
|
+
"Successfully opened dataset 'test-id' with configuration: Public store"
|
|
158
|
+
)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import xarray as xr
|
|
2
|
+
from xrlint.linter import new_linter
|
|
3
|
+
from xrlint.result import Result
|
|
4
|
+
|
|
5
|
+
from deep_code.utils.custom_xrlint_rules import export_config
|
|
6
|
+
from deep_code.utils.helper import open_dataset
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LintDataset:
|
|
10
|
+
"""Lints xarray dataset using xrlint library.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
dataset_id (str | None): ID of a Zarr dataset in the DeepESDL public or team bucket.
|
|
14
|
+
dataset (xr.Dataset | None): In-memory xarray.Dataset instance.
|
|
15
|
+
|
|
16
|
+
Note:
|
|
17
|
+
One of `dataset_id` or `dataset` must be provided.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self, dataset_id: str | None = None, dataset: xr.Dataset | None = None
|
|
22
|
+
):
|
|
23
|
+
if not dataset_id and not dataset:
|
|
24
|
+
raise ValueError("You must provide either `dataset_id` or `dataset`.")
|
|
25
|
+
self.dataset_id = dataset_id
|
|
26
|
+
self.dataset = dataset
|
|
27
|
+
|
|
28
|
+
def lint_dataset(self) -> Result:
|
|
29
|
+
if self.dataset is not None:
|
|
30
|
+
ds = self.dataset
|
|
31
|
+
elif self.dataset_id is not None:
|
|
32
|
+
ds = open_dataset(self.dataset_id)
|
|
33
|
+
else:
|
|
34
|
+
raise RuntimeError("No dataset to lint.")
|
|
35
|
+
|
|
36
|
+
linter = new_linter(*export_config())
|
|
37
|
+
return linter.validate(ds)
|
|
@@ -15,24 +15,24 @@ class TemplateGenerator:
|
|
|
15
15
|
"""Generate a complete template with all possible keys and placeholder values"""
|
|
16
16
|
|
|
17
17
|
template = {
|
|
18
|
-
"workflow_id": "[
|
|
18
|
+
"workflow_id": "[A unique identifier for your workflow]",
|
|
19
19
|
"properties": {
|
|
20
|
-
"title": "[
|
|
21
|
-
"description": "[
|
|
20
|
+
"title": "[Human-readable title of the workflow]",
|
|
21
|
+
"description": "[A concise summary of what the workflow does]",
|
|
22
22
|
"keywords": ["[KEYWORD1]", "[KEYWORD2]"],
|
|
23
|
-
"themes": ["[THEME1]", "[THEME2]"],
|
|
24
|
-
"license": "[
|
|
23
|
+
"themes": ["[Thematic area(s) of focus (e.g. land, ocean, atmosphere)]","[THEME1]", "[THEME2]"],
|
|
24
|
+
"license": "[License type (e.g. MIT, Apache-2.0, CC-BY-4.0, proprietary)]",
|
|
25
25
|
"jupyter_kernel_info": {
|
|
26
|
-
"name": "[
|
|
26
|
+
"name": "[Name of the execution environment or notebook kernel]",
|
|
27
27
|
"python_version": "[PYTHON_VERSION]",
|
|
28
|
-
"env_file": "[
|
|
28
|
+
"env_file": "[Link to the environment file (YAML) used to create the notebook environment]",
|
|
29
29
|
},
|
|
30
30
|
},
|
|
31
|
-
"jupyter_notebook_url": "[
|
|
31
|
+
"jupyter_notebook_url": "[Link to the source notebook (e.g. on GitHub)]",
|
|
32
32
|
"contact": [
|
|
33
33
|
{
|
|
34
|
-
"name": "[
|
|
35
|
-
"organization": "[
|
|
34
|
+
"name": "[Contact person's full name]",
|
|
35
|
+
"organization": "[Affiliated institution or company]",
|
|
36
36
|
"links": [
|
|
37
37
|
{
|
|
38
38
|
"rel": "about",
|
|
@@ -59,12 +59,13 @@ class TemplateGenerator:
|
|
|
59
59
|
"""Generate a complete dataset template with all possible keys and placeholder values"""
|
|
60
60
|
|
|
61
61
|
template = {
|
|
62
|
-
"dataset_id": "[
|
|
63
|
-
"collection_id": "[
|
|
64
|
-
"osc_themes": ["[
|
|
65
|
-
|
|
66
|
-
"
|
|
67
|
-
"
|
|
62
|
+
"dataset_id": "[The name of the dataset object within your S3 bucket].zarr",
|
|
63
|
+
"collection_id": "[A unique identifier for the dataset collection]",
|
|
64
|
+
"osc_themes": ["[Oceans]", "[Open Science theme (choose from "
|
|
65
|
+
"https://opensciencedata.esa.int/themes/catalog)"],
|
|
66
|
+
"osc_region": "[Geographical coverage, e.g. 'global']",
|
|
67
|
+
"dataset_status": "[Status of the dataset: 'ongoing', 'completed', or 'planned']",
|
|
68
|
+
"documentation_link": "[Link to relevant documentation, publication, or handbook]",
|
|
68
69
|
}
|
|
69
70
|
|
|
70
71
|
yaml_str = yaml.dump(
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Copyright © 2025 Brockmann Consult GmbH.
|
|
2
|
+
# This software is distributed under the terms and conditions of the
|
|
3
|
+
# MIT license (https://mit-license.org/).
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module defines the deepcode plugin for XRLint, which validates
|
|
7
|
+
metadata required for dataset publication to a catalog. It checks for:
|
|
8
|
+
- A 'description' attribute in dataset.attrs
|
|
9
|
+
- A 'gcmd_keyword_url' attribute in each variable's attrs
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from xrlint.node import DatasetNode, VariableNode
|
|
13
|
+
from xrlint.plugin import new_plugin
|
|
14
|
+
from xrlint.rule import RuleContext, RuleOp
|
|
15
|
+
|
|
16
|
+
plugin = new_plugin(name="deepcode", version="1.0.0")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@plugin.define_rule("dataset-description")
|
|
20
|
+
class DatasetDescriptionRule(RuleOp):
|
|
21
|
+
"""Ensures the dataset has a 'description' attribute."""
|
|
22
|
+
|
|
23
|
+
def validate_dataset(self, ctx: RuleContext, node: DatasetNode):
|
|
24
|
+
if "description" not in node.dataset.attrs:
|
|
25
|
+
ctx.report(
|
|
26
|
+
"Dataset missing required 'description' attribute.",
|
|
27
|
+
suggestions=["Add a 'description' attribute to dataset.attrs."],
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@plugin.define_rule("variable-gcmd-keyword-url")
|
|
32
|
+
class VariableGcmdKeywordUrlRule(RuleOp):
|
|
33
|
+
"""Ensures all variables have a 'gcmd_keyword_url' attribute."""
|
|
34
|
+
|
|
35
|
+
def validate_variable(self, ctx: RuleContext, node: VariableNode):
|
|
36
|
+
if node.name not in ctx.dataset.data_vars:
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
if "gcmd_keyword_url" not in node.array.attrs:
|
|
40
|
+
ctx.report(f"Variable '{node.name}' missing 'gcmd_keyword_url' attribute.")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Define the recommended ruleset for this plugin
|
|
44
|
+
plugin.define_config(
|
|
45
|
+
"recommended",
|
|
46
|
+
[
|
|
47
|
+
{
|
|
48
|
+
"rules": {
|
|
49
|
+
"deepcode/variable-gcmd-keyword-url": "error",
|
|
50
|
+
"deepcode/dataset-description": "error",
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
],
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def export_config() -> list:
|
|
58
|
+
"""
|
|
59
|
+
Export the plugin configuration to be consumed by the XRLint Linter.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
list
|
|
64
|
+
A list of plugin config dictionaries and rule presets.
|
|
65
|
+
"""
|
|
66
|
+
return [
|
|
67
|
+
{"plugins": {"deepcode": plugin}},
|
|
68
|
+
"recommended",
|
|
69
|
+
{
|
|
70
|
+
"rules": {
|
|
71
|
+
"content-desc": "off",
|
|
72
|
+
"no-empty-attrs": "off",
|
|
73
|
+
"conventions": "off",
|
|
74
|
+
"time-coordinate": "off"
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"deepcode/recommended",
|
|
78
|
+
]
|