deep-code 0.1.3__tar.gz → 0.1.4.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/PKG-INFO +1 -1
  2. deep_code-0.1.4.dev1/deep_code/tests/utils/test_custom_xrlint_rules.py +73 -0
  3. deep_code-0.1.4.dev1/deep_code/tests/utils/test_dataset_stac_generator.py +233 -0
  4. deep_code-0.1.4.dev1/deep_code/tests/utils/test_helper.py +158 -0
  5. deep_code-0.1.4.dev1/deep_code/tools/lint.py +37 -0
  6. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/new.py +17 -16
  7. deep_code-0.1.4.dev1/deep_code/utils/custom_xrlint_rules.py +78 -0
  8. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/dataset_stac_generator.py +25 -85
  9. deep_code-0.1.4.dev1/deep_code/utils/helper.py +108 -0
  10. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/version.py +1 -1
  11. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/PKG-INFO +1 -1
  12. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/SOURCES.txt +4 -1
  13. deep_code-0.1.3/deep_code/tests/utils/test_dataset_stac_generator.py +0 -277
  14. deep_code-0.1.3/deep_code/tools/check.py +0 -4
  15. deep_code-0.1.3/deep_code/utils/helper.py +0 -14
  16. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/LICENSE +0 -0
  17. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/README.md +0 -0
  18. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/__init__.py +0 -0
  19. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/cli/__init__.py +0 -0
  20. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/cli/generate_config.py +0 -0
  21. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/cli/main.py +0 -0
  22. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/cli/publish.py +0 -0
  23. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/constants.py +0 -0
  24. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/tools/__init__.py +0 -0
  25. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/tools/test_publish.py +0 -0
  26. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/__init__.py +0 -0
  27. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/test_github_automation.py +0 -0
  28. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/test_ogc_api_record.py +0 -0
  29. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/test_ogc_record_generator.py +0 -0
  30. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tests/utils/test_osc_extension.py +0 -0
  31. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/__init__.py +0 -0
  32. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/publish.py +0 -0
  33. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/register.py +0 -0
  34. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/setup_ci.py +0 -0
  35. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/tools/test.py +0 -0
  36. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/__init__.py +0 -0
  37. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/github_automation.py +0 -0
  38. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/ogc_api_record.py +0 -0
  39. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/ogc_record_generator.py +0 -0
  40. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code/utils/osc_extension.py +0 -0
  41. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/dependency_links.txt +0 -0
  42. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/entry_points.txt +0 -0
  43. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/requires.txt +0 -0
  44. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/deep_code.egg-info/top_level.txt +0 -0
  45. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/pyproject.toml +0 -0
  46. {deep_code-0.1.3 → deep_code-0.1.4.dev1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deep_code
3
- Version: 0.1.3
3
+ Version: 0.1.4.dev1
4
4
  Summary: deepesdl earthcode integration utility tool
5
5
  Author-email: Tejas Morbagal Harish <tejas.morbagalharish@brockmann-consult.de>
6
6
  License: MIT
@@ -0,0 +1,73 @@
1
+ # Copyright © 2025 Brockmann Consult GmbH.
2
+ # This software is distributed under the terms and conditions of the
3
+ # MIT license (https://mit-license.org/).
4
+
5
+ import unittest
6
+
7
+ import xarray as xr
8
+ from xrlint.testing import RuleTest, RuleTester
9
+
10
+ from deep_code.utils.custom_xrlint_rules import (
11
+ DatasetDescriptionRule,
12
+ VariableGcmdKeywordUrlRule,
13
+ )
14
+
15
+
16
+ class TestDeepCodePlugin(unittest.TestCase):
17
+ def setUp(self):
18
+ """Set up test datasets."""
19
+ # Valid dataset with all required metadata
20
+ self.valid_dataset = xr.Dataset(
21
+ data_vars={
22
+ "temperature": (("time", "lat", "lon"), [[[300, 301], [302, 303]]]),
23
+ "precipitation": (("time", "lat", "lon"), [[[10, 20], [30, 40]]]),
24
+ },
25
+ coords={"time": [1], "lat": [0, 1], "lon": [0, 1]},
26
+ attrs={
27
+ "description": "Test climate dataset",
28
+ "title": "Climate Dataset 2025",
29
+ },
30
+ )
31
+ self.valid_dataset["temperature"].attrs[
32
+ "gcmd_keyword_url"
33
+ ] = "https://gcmd.nasa.gov/KeywordViewer/temperature"
34
+ self.valid_dataset["temperature"].attrs["units"] = "K"
35
+ self.valid_dataset["precipitation"].attrs[
36
+ "gcmd_keyword_url"
37
+ ] = "https://gcmd.nasa.gov/KeywordViewer/precipitation"
38
+ self.valid_dataset["precipitation"].attrs["units"] = "mm"
39
+
40
+ # Invalid dataset missing required metadata
41
+ self.invalid_dataset = xr.Dataset(
42
+ data_vars={
43
+ "temperature": (("time", "lat", "lon"), [[[300, 301], [302, 303]]]),
44
+ "precipitation": (("time", "lat", "lon"), [[[10, 20], [30, 40]]]),
45
+ },
46
+ coords={"time": [1], "lat": [0, 1], "lon": [0, 1]},
47
+ attrs={},
48
+ )
49
+ self.invalid_dataset["temperature"].attrs[
50
+ "gcmd_keyword_url"
51
+ ] = "https://gcmd.nasa.gov/KeywordViewer/temperature"
52
+ self.invalid_dataset["temperature"].attrs["units"] = "K"
53
+ # Intentionally omit gcmd_keyword_url and units for precipitation
54
+
55
+ self.tester = RuleTester()
56
+
57
+ def test_dataset_description(self):
58
+ """Test DatasetDescriptionRule with valid and invalid dataset."""
59
+ self.tester.run(
60
+ "dataset-description",
61
+ DatasetDescriptionRule,
62
+ valid=[RuleTest(dataset=self.valid_dataset)],
63
+ invalid=[RuleTest(dataset=self.invalid_dataset, expected=1)],
64
+ )
65
+
66
+ def test_variable_gcmd_keyword_url(self):
67
+ """Test VariableGcmdKeywordUrlRule with valid dataset."""
68
+ self.tester.run(
69
+ "variable-gcmd-keyword-url",
70
+ VariableGcmdKeywordUrlRule,
71
+ valid=[RuleTest(dataset=self.valid_dataset)],
72
+ invalid=[RuleTest(dataset=self.invalid_dataset, expected=1)],
73
+ )
@@ -0,0 +1,233 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) 2025 by Brockmann Consult GmbH
3
+ # Permissions are hereby granted under the terms of the MIT License:
4
+ # https://opensource.org/licenses/MIT.
5
+
6
+ import unittest
7
+ from datetime import datetime
8
+ from unittest.mock import MagicMock, patch
9
+
10
+ import numpy as np
11
+ from pystac import Catalog, Collection
12
+ from xarray import DataArray, Dataset
13
+
14
+ from deep_code.constants import (
15
+ DEEPESDL_COLLECTION_SELF_HREF,
16
+ OSC_THEME_SCHEME,
17
+ PRODUCT_BASE_CATALOG_SELF_HREF,
18
+ VARIABLE_BASE_CATALOG_SELF_HREF,
19
+ )
20
+ from deep_code.utils.dataset_stac_generator import OscDatasetStacGenerator, Theme
21
+
22
+
23
+ class TestOSCProductSTACGenerator(unittest.TestCase):
24
+ @patch("deep_code.utils.dataset_stac_generator.open_dataset")
25
+ def setUp(self, mock_data_store):
26
+ """Set up a mock dataset and generator."""
27
+ self.mock_dataset = Dataset(
28
+ coords={
29
+ "lon": ("lon", np.linspace(-180, 180, 10)),
30
+ "lat": ("lat", np.linspace(-90, 90, 5)),
31
+ "time": (
32
+ "time",
33
+ [
34
+ np.datetime64(datetime(2023, 1, 1), "ns"),
35
+ np.datetime64(datetime(2023, 1, 2), "ns"),
36
+ ],
37
+ ),
38
+ },
39
+ attrs={"description": "Mock dataset for testing.", "title": "Mock Dataset"},
40
+ data_vars={
41
+ "var1": (
42
+ ("time", "lat", "lon"),
43
+ np.random.rand(2, 5, 10),
44
+ {
45
+ "description": "dummy",
46
+ "standard_name": "var1",
47
+ "gcmd_keyword_url": "https://dummy",
48
+ },
49
+ ),
50
+ "var2": (
51
+ ("time", "lat", "lon"),
52
+ np.random.rand(2, 5, 10),
53
+ {
54
+ "description": "dummy",
55
+ "standard_name": "var2",
56
+ "gcmd_keyword_url": "https://dummy",
57
+ },
58
+ ),
59
+ },
60
+ )
61
+ mock_store = MagicMock()
62
+ mock_store.open_data.return_value = self.mock_dataset
63
+ mock_data_store.return_value = self.mock_dataset
64
+
65
+ self.generator = OscDatasetStacGenerator(
66
+ dataset_id="mock-dataset-id",
67
+ collection_id="mock-collection-id",
68
+ access_link="s3://mock-bucket/mock-dataset",
69
+ documentation_link="https://example.com/docs",
70
+ osc_status="ongoing",
71
+ osc_region="Global",
72
+ osc_themes=["climate", "environment"],
73
+ )
74
+
75
+ def test_open_dataset(self):
76
+ """Test if the dataset is opened correctly."""
77
+ self.assertIsInstance(self.generator.dataset, Dataset)
78
+ for coord in ("lon", "lat", "time"):
79
+ self.assertIn(coord, self.generator.dataset.coords)
80
+
81
+ def test_get_spatial_extent(self):
82
+ """Test spatial extent extraction."""
83
+ extent = self.generator._get_spatial_extent()
84
+ self.assertEqual(extent.bboxes[0], [-180.0, -90.0, 180.0, 90.0])
85
+
86
+ def test_get_temporal_extent(self):
87
+ """Test temporal extent extraction."""
88
+ extent = self.generator._get_temporal_extent()
89
+ # TemporalExtent.intervals is a list of [start, end]
90
+ interval = extent.intervals[0]
91
+ self.assertEqual(interval[0], datetime(2023, 1, 1, 0, 0))
92
+ self.assertEqual(interval[1], datetime(2023, 1, 2, 0, 0))
93
+
94
+ def test_get_variables(self):
95
+ """Test variable ID extraction."""
96
+ vars_ = self.generator.get_variable_ids()
97
+ self.assertCountEqual(vars_, ["var1", "var2"])
98
+
99
+ def test_get_general_metadata(self):
100
+ """Test general metadata extraction."""
101
+ meta = self.generator._get_general_metadata()
102
+ self.assertEqual(meta.get("description"), "Mock dataset for testing.")
103
+
104
+ def test_extract_metadata_for_variable(self):
105
+ """Test single variable metadata extraction."""
106
+ da: DataArray = self.mock_dataset.data_vars["var1"]
107
+ var_meta = self.generator.extract_metadata_for_variable(da)
108
+ self.assertEqual(var_meta["variable_id"], "var1")
109
+ self.assertEqual(var_meta["description"], "dummy")
110
+ self.assertEqual(var_meta["gcmd_keyword_url"], "https://dummy")
111
+
112
+ def test_get_variables_metadata(self):
113
+ """Test metadata dict for all variables."""
114
+ meta_dict = self.generator.get_variables_metadata()
115
+ self.assertIn("var1", meta_dict)
116
+ self.assertIn("var2", meta_dict)
117
+ self.assertIsInstance(meta_dict["var1"], dict)
118
+
119
+ def test_build_theme(self):
120
+ """Test Theme builder static method."""
121
+ themes = ["a", "b"]
122
+ theme_obj: Theme = OscDatasetStacGenerator.build_theme(themes)
123
+ self.assertEqual(theme_obj.scheme, OSC_THEME_SCHEME)
124
+ ids = [tc.id for tc in theme_obj.concepts]
125
+ self.assertListEqual(ids, ["a", "b"])
126
+
127
+ @patch.object(OscDatasetStacGenerator, "_add_gcmd_link_to_var_catalog")
128
+ @patch.object(OscDatasetStacGenerator, "add_themes_as_related_links_var_catalog")
129
+ def test_build_variable_catalog(self, mock_add_themes, mock_add_gcmd):
130
+ """Test building of variable-level STAC catalog."""
131
+ var_meta = self.generator.variables_metadata["var1"]
132
+ catalog = self.generator.build_variable_catalog(var_meta)
133
+ self.assertIsInstance(catalog, Catalog)
134
+ self.assertEqual(catalog.id, "var1")
135
+ # Title should be capitalized
136
+ self.assertEqual(catalog.title, "Var1")
137
+ # Self href ends with var1/catalog.json
138
+ self.assertTrue(catalog.self_href.endswith("/var1/catalog.json"))
139
+
140
+ @patch("pystac.Catalog.from_file")
141
+ def test_update_product_base_catalog(self, mock_from_file):
142
+ """Test linking product catalog."""
143
+ mock_cat = MagicMock(spec=Catalog)
144
+ mock_from_file.return_value = mock_cat
145
+
146
+ result = self.generator.update_product_base_catalog("path.json")
147
+ self.assertIs(result, mock_cat)
148
+ mock_cat.add_link.assert_called_once()
149
+ mock_cat.set_self_href.assert_called_once_with(PRODUCT_BASE_CATALOG_SELF_HREF)
150
+
151
+ @patch("pystac.Catalog.from_file")
152
+ def test_update_variable_base_catalog(self, mock_from_file):
153
+ """Test linking variable base catalog."""
154
+ mock_cat = MagicMock(spec=Catalog)
155
+ mock_from_file.return_value = mock_cat
156
+
157
+ vars_ = ["v1", "v2"]
158
+ result = self.generator.update_variable_base_catalog("vars.json", vars_)
159
+ self.assertIs(result, mock_cat)
160
+ # Expect one add_link per variable
161
+ self.assertEqual(mock_cat.add_link.call_count, len(vars_))
162
+ mock_cat.set_self_href.assert_called_once_with(VARIABLE_BASE_CATALOG_SELF_HREF)
163
+
164
+ @patch("pystac.Collection.from_file")
165
+ def test_update_deepesdl_collection(self, mock_from_file):
166
+ """Test updating DeepESDL collection."""
167
+ mock_coll = MagicMock(spec=Collection)
168
+ mock_from_file.return_value = mock_coll
169
+
170
+ result = self.generator.update_deepesdl_collection("deep.json")
171
+ self.assertIs(result, mock_coll)
172
+ # Expect child and theme related links for each theme
173
+ calls = mock_coll.add_link.call_count
174
+ self.assertGreaterEqual(calls, 1 + len(self.generator.osc_themes))
175
+ mock_coll.set_self_href.assert_called_once_with(DEEPESDL_COLLECTION_SELF_HREF)
176
+
177
+
178
+ class TestFormatString(unittest.TestCase):
179
+ def test_single_word(self):
180
+ self.assertEqual(
181
+ OscDatasetStacGenerator.format_string("temperature"), "Temperature"
182
+ )
183
+ self.assertEqual(OscDatasetStacGenerator.format_string("temp"), "Temp")
184
+ self.assertEqual(OscDatasetStacGenerator.format_string("hello"), "Hello")
185
+
186
+ def test_multiple_words_with_spaces(self):
187
+ self.assertEqual(
188
+ OscDatasetStacGenerator.format_string("surface temp"), "Surface Temp"
189
+ )
190
+ self.assertEqual(
191
+ OscDatasetStacGenerator.format_string("this is a test"), "This Is A Test"
192
+ )
193
+
194
+ def test_multiple_words_with_underscores(self):
195
+ self.assertEqual(
196
+ OscDatasetStacGenerator.format_string("surface_temp"), "Surface Temp"
197
+ )
198
+ self.assertEqual(
199
+ OscDatasetStacGenerator.format_string("this_is_a_test"), "This Is A Test"
200
+ )
201
+
202
+ def test_mixed_spaces_and_underscores(self):
203
+ self.assertEqual(
204
+ OscDatasetStacGenerator.format_string("surface_temp and_more"),
205
+ "Surface Temp And More",
206
+ )
207
+ self.assertEqual(
208
+ OscDatasetStacGenerator.format_string(
209
+ "mixed_case_with_underscores_and spaces"
210
+ ),
211
+ "Mixed Case With Underscores And Spaces",
212
+ )
213
+
214
+ def test_edge_cases(self):
215
+ # Empty string
216
+ self.assertEqual(OscDatasetStacGenerator.format_string(""), "")
217
+ # Single word with trailing underscore
218
+ self.assertEqual(
219
+ OscDatasetStacGenerator.format_string("temperature_"), "Temperature"
220
+ )
221
+ # Single word with leading underscore
222
+ self.assertEqual(OscDatasetStacGenerator.format_string("_temp"), "Temp")
223
+ # Single word with leading/trailing spaces
224
+ self.assertEqual(OscDatasetStacGenerator.format_string(" hello "), "Hello")
225
+ # Multiple spaces or underscores
226
+ self.assertEqual(
227
+ OscDatasetStacGenerator.format_string("too___many___underscores"),
228
+ "Too Many Underscores",
229
+ )
230
+ self.assertEqual(
231
+ OscDatasetStacGenerator.format_string("too many spaces"),
232
+ "Too Many Spaces",
233
+ )
@@ -0,0 +1,158 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) 2025 by Brockmann Consult GmbH
3
+ # Permissions are hereby granted under the terms of the MIT License:
4
+ # https://opensource.org/licenses/MIT.
5
+
6
+ import os
7
+ import unittest
8
+ from unittest.mock import MagicMock, call, patch
9
+
10
+ import xarray
11
+ import xarray as xr
12
+
13
+ from deep_code.utils.helper import open_dataset
14
+
15
+
16
+ def make_dummy_dataset():
17
+ """Create a simple xarray.Dataset for testing."""
18
+ return xr.Dataset(
19
+ coords={"time": [0, 1, 2]}, data_vars={"x": (("time",), [10, 20, 30])}
20
+ )
21
+
22
+
23
+ class TestOpenDataset(unittest.TestCase):
24
+ @patch("deep_code.utils.helper.logging.getLogger")
25
+ @patch("deep_code.utils.helper.new_data_store")
26
+ def test_success_public_store(self, mock_new_store, mock_get_logger):
27
+ """Should open dataset with the public store on first try."""
28
+ dummy = make_dummy_dataset()
29
+ mock_store = MagicMock()
30
+ mock_store.open_data.return_value = dummy
31
+ mock_new_store.return_value = mock_store
32
+ mock_logger = MagicMock()
33
+ mock_get_logger.return_value = mock_logger
34
+
35
+ result = open_dataset("test-id")
36
+
37
+ self.assertIs(result, dummy)
38
+ mock_new_store.assert_called_once_with(
39
+ "s3", root="deep-esdl-public", storage_options={"anon": True}
40
+ )
41
+ mock_logger.info.assert_any_call(
42
+ "Attempting to open dataset 'test-id' with configuration: Public store"
43
+ )
44
+ mock_logger.info.assert_any_call(
45
+ "Successfully opened dataset 'test-id' with configuration: Public store"
46
+ )
47
+
48
+ @patch("deep_code.utils.helper.new_data_store")
49
+ @patch("deep_code.utils.helper.logging.getLogger")
50
+ def test_open_dataset_success_authenticated_store(
51
+ self, mock_get_logger, mock_new_store
52
+ ):
53
+ """Test fallback to authenticated store when public store fails."""
54
+ mock_store = MagicMock()
55
+ mock_new_store.side_effect = [Exception("Public store failure"), mock_store]
56
+ mock_store.open_data.return_value = make_dummy_dataset()
57
+
58
+ os.environ["S3_USER_STORAGE_BUCKET"] = "mock-bucket"
59
+ os.environ["S3_USER_STORAGE_KEY"] = "mock-key"
60
+ os.environ["S3_USER_STORAGE_SECRET"] = "mock-secret"
61
+
62
+ ds = open_dataset("my-id", logger=mock_get_logger())
63
+
64
+ self.assertIsInstance(ds, xarray.Dataset)
65
+
66
+ # And new_data_store should have been called twice with exactly these params
67
+ expected_calls = [
68
+ call("s3", root="deep-esdl-public", storage_options={"anon": True}),
69
+ call(
70
+ "s3",
71
+ root="mock-bucket",
72
+ storage_options={
73
+ "anon": False,
74
+ "key": "mock-key",
75
+ "secret": "mock-secret",
76
+ },
77
+ ),
78
+ ]
79
+ mock_new_store.assert_has_calls(expected_calls, any_order=False)
80
+
81
+ # And the logger should have info about both attempts
82
+ logger = mock_get_logger()
83
+ logger.info.assert_any_call(
84
+ "Attempting to open dataset 'my-id' with configuration: Public store"
85
+ )
86
+ logger.info.assert_any_call(
87
+ "Attempting to open dataset 'my-id' with configuration: Authenticated store"
88
+ )
89
+ logger.info.assert_any_call(
90
+ "Successfully opened dataset 'my-id' with configuration: Authenticated store"
91
+ )
92
+
93
+ @patch("deep_code.utils.helper.logging.getLogger")
94
+ @patch("deep_code.utils.helper.new_data_store")
95
+ def test_all_stores_fail_raises(self, mock_new_store, mock_get_logger):
96
+ """Should raise ValueError if all stores fail."""
97
+ mock_new_store.side_effect = Exception("fail")
98
+ os.environ["S3_USER_STORAGE_BUCKET"] = "user-bucket"
99
+ os.environ["S3_USER_STORAGE_KEY"] = "key"
100
+ os.environ["S3_USER_STORAGE_SECRET"] = "secret"
101
+ mock_logger = MagicMock()
102
+ mock_get_logger.return_value = mock_logger
103
+
104
+ with self.assertRaises(ValueError) as ctx:
105
+ open_dataset("test-id")
106
+ msg = str(ctx.exception)
107
+ self.assertIn("Tried configurations: Public store, Authenticated store", msg)
108
+ self.assertIn("Last error: fail", msg)
109
+
110
+ @patch("deep_code.utils.helper.logging.getLogger")
111
+ @patch("deep_code.utils.helper.new_data_store")
112
+ def test_with_custom_configs(self, mock_new_store, mock_get_logger):
113
+ """Should use provided storage_configs instead of defaults."""
114
+ dummy = make_dummy_dataset()
115
+ mock_store = MagicMock()
116
+ mock_store.open_data.return_value = dummy
117
+ mock_new_store.return_value = mock_store
118
+ mock_logger = MagicMock()
119
+ mock_get_logger.return_value = mock_logger
120
+
121
+ custom_cfgs = [
122
+ {
123
+ "description": "Local store",
124
+ "params": {"storage_type": "file", "root": ".", "storage_options": {}},
125
+ }
126
+ ]
127
+
128
+ result = open_dataset("test-id", storage_configs=custom_cfgs)
129
+
130
+ self.assertIs(result, dummy)
131
+ mock_new_store.assert_called_once_with("file", root=".", storage_options={})
132
+ mock_logger.info.assert_any_call(
133
+ "Attempting to open dataset 'test-id' with configuration: Local store"
134
+ )
135
+ mock_logger.info.assert_any_call(
136
+ "Successfully opened dataset 'test-id' with configuration: Local store"
137
+ )
138
+
139
+ @patch("deep_code.utils.helper.logging.getLogger")
140
+ @patch("deep_code.utils.helper.new_data_store")
141
+ def test_uses_provided_logger(self, mock_new_store, mock_get_logger):
142
+ """Should use the logger provided by the caller."""
143
+ dummy = make_dummy_dataset()
144
+ mock_store = MagicMock()
145
+ mock_store.open_data.return_value = dummy
146
+ mock_new_store.return_value = mock_store
147
+ custom_logger = MagicMock()
148
+ mock_get_logger.side_effect = AssertionError("getLogger should not be used")
149
+
150
+ result = open_dataset("test-id", logger=custom_logger)
151
+
152
+ self.assertIs(result, dummy)
153
+ custom_logger.info.assert_any_call(
154
+ "Attempting to open dataset 'test-id' with configuration: Public store"
155
+ )
156
+ custom_logger.info.assert_any_call(
157
+ "Successfully opened dataset 'test-id' with configuration: Public store"
158
+ )
@@ -0,0 +1,37 @@
1
+ import xarray as xr
2
+ from xrlint.linter import new_linter
3
+ from xrlint.result import Result
4
+
5
+ from deep_code.utils.custom_xrlint_rules import export_config
6
+ from deep_code.utils.helper import open_dataset
7
+
8
+
9
+ class LintDataset:
10
+ """Lints xarray dataset using xrlint library.
11
+
12
+ Args:
13
+ dataset_id (str | None): ID of a Zarr dataset in the DeepESDL public or team bucket.
14
+ dataset (xr.Dataset | None): In-memory xarray.Dataset instance.
15
+
16
+ Note:
17
+ One of `dataset_id` or `dataset` must be provided.
18
+ """
19
+
20
+ def __init__(
21
+ self, dataset_id: str | None = None, dataset: xr.Dataset | None = None
22
+ ):
23
+ if not dataset_id and not dataset:
24
+ raise ValueError("You must provide either `dataset_id` or `dataset`.")
25
+ self.dataset_id = dataset_id
26
+ self.dataset = dataset
27
+
28
+ def lint_dataset(self) -> Result:
29
+ if self.dataset is not None:
30
+ ds = self.dataset
31
+ elif self.dataset_id is not None:
32
+ ds = open_dataset(self.dataset_id)
33
+ else:
34
+ raise RuntimeError("No dataset to lint.")
35
+
36
+ linter = new_linter(*export_config())
37
+ return linter.validate(ds)
@@ -15,24 +15,24 @@ class TemplateGenerator:
15
15
  """Generate a complete template with all possible keys and placeholder values"""
16
16
 
17
17
  template = {
18
- "workflow_id": "[WORKFLOW_ID]",
18
+ "workflow_id": "[A unique identifier for your workflow]",
19
19
  "properties": {
20
- "title": "[TITLE]",
21
- "description": "[DESCRIPTION]",
20
+ "title": "[Human-readable title of the workflow]",
21
+ "description": "[A concise summary of what the workflow does]",
22
22
  "keywords": ["[KEYWORD1]", "[KEYWORD2]"],
23
- "themes": ["[THEME1]", "[THEME2]"],
24
- "license": "[LICENSE_TYPE]",
23
+ "themes": ["[Thematic area(s) of focus (e.g. land, ocean, atmosphere)]","[THEME1]", "[THEME2]"],
24
+ "license": "[License type (e.g. MIT, Apache-2.0, CC-BY-4.0, proprietary)]",
25
25
  "jupyter_kernel_info": {
26
- "name": "[DEEPESDL_KERNEL_NAME]",
26
+ "name": "[Name of the execution environment or notebook kernel]",
27
27
  "python_version": "[PYTHON_VERSION]",
28
- "env_file": "[ENV_FILE_URL_IN_GIT]",
28
+ "env_file": "[Link to the environment file (YAML) used to create the notebook environment]",
29
29
  },
30
30
  },
31
- "jupyter_notebook_url": "[NOTEBOOK_URL]",
31
+ "jupyter_notebook_url": "[Link to the source notebook (e.g. on GitHub)]",
32
32
  "contact": [
33
33
  {
34
- "name": "[CONTACT_NAME]",
35
- "organization": "[ORGANIZATION]",
34
+ "name": "[Contact person's full name]",
35
+ "organization": "[Affiliated institution or company]",
36
36
  "links": [
37
37
  {
38
38
  "rel": "about",
@@ -59,12 +59,13 @@ class TemplateGenerator:
59
59
  """Generate a complete dataset template with all possible keys and placeholder values"""
60
60
 
61
61
  template = {
62
- "dataset_id": "[DATASET_ID].zarr",
63
- "collection_id": "[COLLECTION_ID]",
64
- "osc_themes": ["[THEME1]", "[THEME2]"],
65
- "osc_region": "[REGION]",
66
- "dataset_status": "[STATUS]",
67
- "documentation_link": "[DOCS_URL]",
62
+ "dataset_id": "[The name of the dataset object within your S3 bucket].zarr",
63
+ "collection_id": "[A unique identifier for the dataset collection]",
64
+ "osc_themes": ["[Oceans]", "[Open Science theme (choose from "
65
+ "https://opensciencedata.esa.int/themes/catalog)"],
66
+ "osc_region": "[Geographical coverage, e.g. 'global']",
67
+ "dataset_status": "[Status of the dataset: 'ongoing', 'completed', or 'planned']",
68
+ "documentation_link": "[Link to relevant documentation, publication, or handbook]",
68
69
  }
69
70
 
70
71
  yaml_str = yaml.dump(
@@ -0,0 +1,78 @@
1
+ # Copyright © 2025 Brockmann Consult GmbH.
2
+ # This software is distributed under the terms and conditions of the
3
+ # MIT license (https://mit-license.org/).
4
+
5
+ """
6
+ This module defines the deepcode plugin for XRLint, which validates
7
+ metadata required for dataset publication to a catalog. It checks for:
8
+ - A 'description' attribute in dataset.attrs
9
+ - A 'gcmd_keyword_url' attribute in each variable's attrs
10
+ """
11
+
12
+ from xrlint.node import DatasetNode, VariableNode
13
+ from xrlint.plugin import new_plugin
14
+ from xrlint.rule import RuleContext, RuleOp
15
+
16
+ plugin = new_plugin(name="deepcode", version="1.0.0")
17
+
18
+
19
+ @plugin.define_rule("dataset-description")
20
+ class DatasetDescriptionRule(RuleOp):
21
+ """Ensures the dataset has a 'description' attribute."""
22
+
23
+ def validate_dataset(self, ctx: RuleContext, node: DatasetNode):
24
+ if "description" not in node.dataset.attrs:
25
+ ctx.report(
26
+ "Dataset missing required 'description' attribute.",
27
+ suggestions=["Add a 'description' attribute to dataset.attrs."],
28
+ )
29
+
30
+
31
+ @plugin.define_rule("variable-gcmd-keyword-url")
32
+ class VariableGcmdKeywordUrlRule(RuleOp):
33
+ """Ensures all variables have a 'gcmd_keyword_url' attribute."""
34
+
35
+ def validate_variable(self, ctx: RuleContext, node: VariableNode):
36
+ if node.name not in ctx.dataset.data_vars:
37
+ return
38
+
39
+ if "gcmd_keyword_url" not in node.array.attrs:
40
+ ctx.report(f"Variable '{node.name}' missing 'gcmd_keyword_url' attribute.")
41
+
42
+
43
+ # Define the recommended ruleset for this plugin
44
+ plugin.define_config(
45
+ "recommended",
46
+ [
47
+ {
48
+ "rules": {
49
+ "deepcode/variable-gcmd-keyword-url": "error",
50
+ "deepcode/dataset-description": "error",
51
+ }
52
+ }
53
+ ],
54
+ )
55
+
56
+
57
+ def export_config() -> list:
58
+ """
59
+ Export the plugin configuration to be consumed by the XRLint Linter.
60
+
61
+ Returns
62
+ -------
63
+ list
64
+ A list of plugin config dictionaries and rule presets.
65
+ """
66
+ return [
67
+ {"plugins": {"deepcode": plugin}},
68
+ "recommended",
69
+ {
70
+ "rules": {
71
+ "content-desc": "off",
72
+ "no-empty-attrs": "off",
73
+ "conventions": "off",
74
+ "time-coordinate": "off"
75
+ }
76
+ },
77
+ "deepcode/recommended",
78
+ ]