deep-code 0.1.4.dev1__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/PKG-INFO +20 -2
  2. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/README.md +18 -1
  3. deep_code-0.1.6/deep_code/cli/publish.py +264 -0
  4. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/constants.py +10 -0
  5. deep_code-0.1.6/deep_code/tests/tools/test_publish.py +230 -0
  6. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tests/utils/test_dataset_stac_generator.py +3 -0
  7. deep_code-0.1.6/deep_code/tests/utils/test_github_automation.py +362 -0
  8. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tests/utils/test_ogc_api_record.py +20 -7
  9. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tools/new.py +10 -3
  10. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tools/publish.py +235 -96
  11. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/utils/custom_xrlint_rules.py +1 -1
  12. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/utils/dataset_stac_generator.py +19 -1
  13. deep_code-0.1.6/deep_code/utils/github_automation.py +272 -0
  14. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/utils/ogc_api_record.py +156 -23
  15. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/version.py +1 -1
  16. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code.egg-info/PKG-INFO +20 -2
  17. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code.egg-info/requires.txt +1 -0
  18. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code.egg-info/top_level.txt +1 -0
  19. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/pyproject.toml +1 -0
  20. deep_code-0.1.4.dev1/deep_code/cli/publish.py +0 -31
  21. deep_code-0.1.4.dev1/deep_code/tests/tools/test_publish.py +0 -109
  22. deep_code-0.1.4.dev1/deep_code/tests/utils/test_github_automation.py +0 -171
  23. deep_code-0.1.4.dev1/deep_code/utils/github_automation.py +0 -145
  24. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/LICENSE +0 -0
  25. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/__init__.py +0 -0
  26. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/cli/__init__.py +0 -0
  27. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/cli/generate_config.py +0 -0
  28. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/cli/main.py +0 -0
  29. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tests/tools/__init__.py +0 -0
  30. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tests/utils/__init__.py +0 -0
  31. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tests/utils/test_custom_xrlint_rules.py +0 -0
  32. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tests/utils/test_helper.py +0 -0
  33. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tests/utils/test_ogc_record_generator.py +0 -0
  34. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tests/utils/test_osc_extension.py +0 -0
  35. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tools/__init__.py +0 -0
  36. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tools/lint.py +0 -0
  37. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tools/register.py +0 -0
  38. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tools/setup_ci.py +0 -0
  39. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tools/test.py +0 -0
  40. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/utils/__init__.py +0 -0
  41. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/utils/helper.py +0 -0
  42. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/utils/ogc_record_generator.py +0 -0
  43. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/utils/osc_extension.py +0 -0
  44. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code.egg-info/SOURCES.txt +0 -0
  45. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code.egg-info/dependency_links.txt +0 -0
  46. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code.egg-info/entry_points.txt +0 -0
  47. {deep_code-0.1.4.dev1 → deep_code-0.1.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deep_code
3
- Version: 0.1.4.dev1
3
+ Version: 0.1.6
4
4
  Summary: deepesdl earthcode integration utility tool
5
5
  Author-email: Tejas Morbagal Harish <tejas.morbagalharish@brockmann-consult.de>
6
6
  License: MIT
@@ -14,6 +14,7 @@ License-File: LICENSE
14
14
  Requires-Dist: click
15
15
  Requires-Dist: fsspec
16
16
  Requires-Dist: jsonschema
17
+ Requires-Dist: jsonpickle
17
18
  Requires-Dist: requests
18
19
  Requires-Dist: pandas
19
20
  Requires-Dist: pystac
@@ -125,7 +126,8 @@ catalog
125
126
 
126
127
  ### Usage
127
128
  ```
128
- deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
129
+ deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT] [--mode
130
+ all|dataset|workflow]
129
131
  ```
130
132
 
131
133
  #### Arguments
@@ -136,8 +138,12 @@ deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
136
138
  (e.g., workflow-config.yaml)
137
139
 
138
140
  #### Options
141
+ --dataset-config, - Explict path to dataset config
142
+ --workflow-config, - Explicit path to workflow config
139
143
  --environment, -e - Target catalog environment:
140
144
  production (default) | staging | testing
145
+ --mode, -m Publishing mode:
146
+ all (default) | dataset | workflow
141
147
 
142
148
  #### Examples:
143
149
  1. Publish to staging catalog
@@ -152,6 +158,18 @@ deep-code publish dataset-config.yaml workflow-config.yaml -e testing
152
158
  ```
153
159
  deep-code publish dataset-config.yaml workflow-config.yaml
154
160
  ```
161
+ 4. Publish Dataset only
162
+ ```
163
+ deep-code publish dataset-config.yaml -m dataset
164
+
165
+ deep-code publish --dataset-config dataset.yaml -m dataset
166
+ ```
167
+ 5. Publish Workflow only
168
+ ```
169
+ deep-code publish dataset-config.yaml -m workflow
170
+
171
+ deep-code publish --workflow-config workflow.yaml -m dataset
172
+ ```
155
173
  #### dataset-config.yaml example
156
174
 
157
175
  ```
@@ -93,7 +93,8 @@ catalog
93
93
 
94
94
  ### Usage
95
95
  ```
96
- deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
96
+ deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT] [--mode
97
+ all|dataset|workflow]
97
98
  ```
98
99
 
99
100
  #### Arguments
@@ -104,8 +105,12 @@ deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
104
105
  (e.g., workflow-config.yaml)
105
106
 
106
107
  #### Options
108
+ --dataset-config, - Explict path to dataset config
109
+ --workflow-config, - Explicit path to workflow config
107
110
  --environment, -e - Target catalog environment:
108
111
  production (default) | staging | testing
112
+ --mode, -m Publishing mode:
113
+ all (default) | dataset | workflow
109
114
 
110
115
  #### Examples:
111
116
  1. Publish to staging catalog
@@ -120,6 +125,18 @@ deep-code publish dataset-config.yaml workflow-config.yaml -e testing
120
125
  ```
121
126
  deep-code publish dataset-config.yaml workflow-config.yaml
122
127
  ```
128
+ 4. Publish Dataset only
129
+ ```
130
+ deep-code publish dataset-config.yaml -m dataset
131
+
132
+ deep-code publish --dataset-config dataset.yaml -m dataset
133
+ ```
134
+ 5. Publish Workflow only
135
+ ```
136
+ deep-code publish dataset-config.yaml -m workflow
137
+
138
+ deep-code publish --workflow-config workflow.yaml -m dataset
139
+ ```
123
140
  #### dataset-config.yaml example
124
141
 
125
142
  ```
@@ -0,0 +1,264 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright (c) 2025 by Brockmann Consult GmbH
4
+ # Permissions are hereby granted under the terms of the MIT License:
5
+ # https://opensource.org/licenses/MIT.
6
+
7
+ from pathlib import Path
8
+ from typing import Literal
9
+
10
+ import click
11
+ import yaml
12
+
13
+ from deep_code.tools.publish import Publisher
14
+
15
+ Mode = Literal["all", "dataset", "workflow"]
16
+
17
+ DATASET_MARKERS = {
18
+ "stac_version",
19
+ "extent",
20
+ "license",
21
+ "summaries",
22
+ "assets",
23
+ "providers",
24
+ "collection",
25
+ "collection_id",
26
+ "id",
27
+ }
28
+ WORKFLOW_MARKERS = {
29
+ "workflow",
30
+ "workflow_id",
31
+ "workflow_title",
32
+ "experiment",
33
+ "jupyter_notebook_url",
34
+ "notebook",
35
+ "parameters",
36
+ "input_datasets",
37
+ }
38
+
39
+
40
+ def _validate_inputs(
41
+ dataset_config: str | None, workflow_config: str | None, mode: str
42
+ ):
43
+ mode = mode.lower()
44
+
45
+ def ensure_file(path: str | None, label: str):
46
+ if path is None:
47
+ raise click.UsageError(f"{label} is required but was not provided.")
48
+ if not Path(path).is_file():
49
+ raise click.UsageError(f"{label} not found: {path} is not a file")
50
+
51
+ if mode == "dataset":
52
+ ensure_file(dataset_config, "DATASET_CONFIG")
53
+ if workflow_config is not None:
54
+ click.echo("Ignoring WORKFLOW_CONFIG since mode=dataset.", err=True)
55
+
56
+ elif mode == "workflow":
57
+ ensure_file(workflow_config, "WORKFLOW_CONFIG")
58
+
59
+ elif mode == "all":
60
+ ensure_file(dataset_config, "DATASET_CONFIG")
61
+ ensure_file(workflow_config, "WORKFLOW_CONFIG")
62
+
63
+ else:
64
+ raise click.UsageError("Invalid mode. Choose one of: all, dataset, workflow.")
65
+
66
+
67
+ def _detect_config_type(path: Path) -> Literal["dataset", "workflow"]:
68
+ """Detect config type via filename hints and YAML top-level keys."""
69
+ name = path.name.lower()
70
+ if "workflow" in name or "experiment" in name:
71
+ return "workflow"
72
+ if "dataset" in name or "collection" in name:
73
+ return "dataset"
74
+
75
+ try:
76
+ data = yaml.safe_load(path.read_text(encoding="utf-8"))
77
+ except Exception as e:
78
+ raise ValueError(f"Cannot read YAML from {path}: {e}")
79
+
80
+ if not isinstance(data, dict):
81
+ raise ValueError(f"YAML in {path} must be a mapping/object at the top level.")
82
+
83
+ keys = set(data.keys())
84
+ ds_score = len(keys & DATASET_MARKERS)
85
+ wf_score = len(keys & WORKFLOW_MARKERS)
86
+
87
+ if ds_score > wf_score:
88
+ return "dataset"
89
+ if wf_score > ds_score:
90
+ return "workflow"
91
+
92
+ raise ValueError(
93
+ f"Ambiguous config type for {path}. "
94
+ "Rename to include 'dataset' or 'workflow', or pass the missing file explicitly."
95
+ )
96
+
97
+
98
+ def _assign_configs(
99
+ pos_first: str | None,
100
+ pos_second: str | None,
101
+ mode: Mode,
102
+ explicit_dataset: str | None,
103
+ explicit_workflow: str | None,
104
+ ) -> tuple[str | None, str | None]:
105
+ """
106
+ Decide which file is dataset vs workflow.
107
+ Precedence: explicit flags > positional + detection.
108
+ Returns (dataset_config, workflow_config).
109
+ """
110
+ ds = explicit_dataset
111
+ wf = explicit_workflow
112
+
113
+ # If both explicit provided, we're done; warn if extra positionals are passed.
114
+ pos_args = [p for p in (pos_first, pos_second) if p]
115
+ if ds and wf:
116
+ if pos_args:
117
+ click.echo(
118
+ "Positional config paths ignored because explicit flags were provided.",
119
+ err=True,
120
+ )
121
+ return ds, wf
122
+
123
+ # Helper to assign a single positional file to the missing slot
124
+ def _assign_single(p: str) -> tuple[str | None, str | None]:
125
+ nonlocal ds, wf
126
+ if ds and wf:
127
+ raise click.UsageError(
128
+ "Both dataset and workflow configs already provided; remove extra positional files."
129
+ )
130
+ # Use mode as a strong hint when only one is missing
131
+ if not ds and mode == "dataset":
132
+ ds = p
133
+ return
134
+ if not wf and mode == "workflow":
135
+ wf = p
136
+ return
137
+ # Otherwise detect
138
+ kind = _detect_config_type(Path(p))
139
+ if kind == "dataset":
140
+ if ds and Path(ds).resolve() != Path(p).resolve():
141
+ raise click.UsageError(
142
+ f"Multiple dataset configs supplied: {ds} and {p}"
143
+ )
144
+ ds = p
145
+ else:
146
+ if wf and Path(wf).resolve() != Path(p).resolve():
147
+ raise click.UsageError(
148
+ f"Multiple workflow configs supplied: {wf} and {p}"
149
+ )
150
+ wf = p
151
+
152
+ # If exactly one explicit provided, try to fill the other via positionals
153
+ if ds and not wf:
154
+ if len(pos_args) > 1:
155
+ raise click.UsageError(
156
+ "Provide at most one positional file when using --dataset-config."
157
+ )
158
+ if pos_args:
159
+ _assign_single(pos_args[0])
160
+ return ds, wf
161
+
162
+ if wf and not ds:
163
+ if len(pos_args) > 1:
164
+ raise click.UsageError(
165
+ "Provide at most one positional file when using --workflow-config."
166
+ )
167
+ if pos_args:
168
+ _assign_single(pos_args[0])
169
+ return ds, wf
170
+
171
+ # No explicit flags: rely on positionals + detection
172
+ if not pos_args:
173
+ return None, None
174
+ if len(pos_args) == 1:
175
+ p = pos_args[0]
176
+ if mode == "dataset":
177
+ return p, None
178
+ if mode == "workflow":
179
+ return None, p
180
+ # mode == "all": detect and require the other later in validation
181
+ kind = _detect_config_type(Path(p))
182
+ return (p, None) if kind == "dataset" else (None, p)
183
+
184
+ # Two positionals: detect both and assign
185
+ p1, p2 = pos_args[0], pos_args[1]
186
+ k1 = _detect_config_type(Path(p1))
187
+ k2 = _detect_config_type(Path(p2))
188
+ if k1 == k2:
189
+ raise click.UsageError(
190
+ f"Both files look like '{k1}' configs: {p1} and {p2}. "
191
+ "Please rename one or use --dataset-config/--workflow-config."
192
+ )
193
+ ds = p1 if k1 == "dataset" else p2
194
+ wf = p1 if k1 == "workflow" else p2
195
+ return ds, wf
196
+
197
+
198
+ @click.command(name="publish")
199
+ @click.argument("dataset_config", type=click.Path(exists=True), required=False)
200
+ @click.argument("workflow_config", type=click.Path(exists=True), required=False)
201
+ @click.option(
202
+ "--dataset-config",
203
+ "dataset_config_opt",
204
+ type=click.Path(exists=True),
205
+ help="Explicit path to dataset config (overrides positional detection).",
206
+ )
207
+ @click.option(
208
+ "--workflow-config",
209
+ "workflow_config_opt",
210
+ type=click.Path(exists=True),
211
+ help="Explicit path to workflow config (overrides positional detection).",
212
+ )
213
+ @click.option(
214
+ "--environment",
215
+ "-e",
216
+ type=click.Choice(["production", "staging", "testing"], case_sensitive=False),
217
+ default="production",
218
+ help="Target environment for publishing (production, staging, testing)",
219
+ )
220
+ @click.option(
221
+ "--mode",
222
+ "-m",
223
+ type=click.Choice(["all", "dataset", "workflow"], case_sensitive=False),
224
+ default="all",
225
+ help="Publishing mode: dataset only, workflow only, or both",
226
+ )
227
+ def publish(
228
+ dataset_config,
229
+ workflow_config,
230
+ dataset_config_opt,
231
+ workflow_config_opt,
232
+ environment,
233
+ mode,
234
+ ):
235
+ """
236
+ Publish dataset and/or workflow/experiment metadata.
237
+
238
+ Examples:
239
+ deep-code publish workflow.yaml -e staging -m workflow
240
+ deep-code publish dataset.yaml -e staging -m dataset
241
+ deep-code publish dataset.yaml workflow.yaml -m all
242
+ deep-code publish --dataset-config dataset.yaml --workflow-config wf.yaml -m all
243
+ deep-code publish --dataset-config dataset.yaml -m dataset
244
+ deep-code publish --workflow-config wf.yaml -m workflow
245
+ """
246
+ mode = mode.lower()
247
+ ds_path, wf_path = _assign_configs(
248
+ dataset_config,
249
+ workflow_config,
250
+ mode, # type: ignore[arg-type]
251
+ dataset_config_opt,
252
+ workflow_config_opt,
253
+ )
254
+
255
+ _validate_inputs(ds_path, wf_path, mode)
256
+
257
+ publisher = Publisher(
258
+ dataset_config_path=ds_path,
259
+ workflow_config_path=wf_path,
260
+ environment=environment.lower(),
261
+ )
262
+ result = publisher.publish(mode=mode)
263
+
264
+ click.echo(result if isinstance(result, str) else "Wrote files locally.")
@@ -29,3 +29,13 @@ WORKFLOW_BASE_CATALOG_SELF_HREF = (
29
29
  ".json"
30
30
  )
31
31
  PROJECT_COLLECTION_NAME = "deep-earth-system-data-lab"
32
+ DEEPESDL_GIT_PULL_BASE = (
33
+ "https://deep.earthsystemdatalab.net/hub/user-redirect/git-pull"
34
+ )
35
+ APPLICATION_TYPE_JUPYTER_SPEC = (
36
+ "https://raw.githubusercontent.com/EOEPCA/metadata"
37
+ "-profile/refs/heads/1.0/schemas/application-type-jupyter-notebook"
38
+ )
39
+ APPLICATION_STAC_EXTENSION_SPEC = (
40
+ "https://stac-extensions.github.io/application/v0.1.0/schema.json"
41
+ )
@@ -0,0 +1,230 @@
1
+ import json
2
+ import tempfile
3
+ import unittest
4
+ from pathlib import Path
5
+ from unittest.mock import MagicMock, mock_open, patch
6
+
7
+ import pytest
8
+ import yaml
9
+ from pystac import Catalog
10
+
11
+ from deep_code.tools.publish import Publisher
12
+ from deep_code.utils.ogc_api_record import LinksBuilder
13
+
14
+
15
+ class TestPublisher(unittest.TestCase):
16
+ @patch("fsspec.open")
17
+ @patch("deep_code.tools.publish.GitHubPublisher")
18
+ def setUp(self, mock_github_publisher, mock_fsspec_open):
19
+ # Mock GitHubPublisher to avoid reading .gitaccess
20
+ self.mock_github_publisher_instance = MagicMock()
21
+ mock_github_publisher.return_value = self.mock_github_publisher_instance
22
+
23
+ # Mock dataset and workflow config files
24
+ self.dataset_config = {
25
+ "collection_id": "test-collection",
26
+ "dataset_id": "test-dataset",
27
+ }
28
+ self.workflow_config = {
29
+ "properties": {"title": "Test Workflow"},
30
+ "workflow_id": "test-workflow",
31
+ }
32
+
33
+ # Mock fsspec.open for config files
34
+ self.mock_fsspec_open = mock_fsspec_open
35
+ self.mock_fsspec_open.side_effect = [
36
+ mock_open(read_data=yaml.dump(self.dataset_config)).return_value,
37
+ mock_open(read_data=yaml.dump(self.workflow_config)).return_value,
38
+ ]
39
+
40
+ # Initialize Publisher
41
+ self.publisher = Publisher(
42
+ dataset_config_path="test-dataset-config.yaml",
43
+ workflow_config_path="test-workflow-config.yaml",
44
+ )
45
+
46
+ def test_normalize_name(self):
47
+ self.assertEqual(Publisher._normalize_name("Test Name"), "test-name")
48
+ self.assertEqual(Publisher._normalize_name("Test Name"), "test---name")
49
+ self.assertIsNone(Publisher._normalize_name(""))
50
+ self.assertIsNone(Publisher._normalize_name(None))
51
+
52
+ def test_write_to_file(self):
53
+ # Create a temporary file
54
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
55
+ file_path = temp_file.name
56
+
57
+ # Test data
58
+ data = {"key": "value"}
59
+
60
+ # Call the method
61
+ Publisher._write_to_file(file_path, data)
62
+
63
+ # Read the file and verify its content
64
+ with open(file_path, "r") as f:
65
+ content = json.load(f)
66
+ self.assertEqual(content, data)
67
+
68
+ # Clean up
69
+ Path(file_path).unlink()
70
+
71
+ def test_update_base_catalog(self):
72
+ # Create a mock Catalog
73
+ catalog = Catalog(id="test-catalog", description="Test Catalog")
74
+
75
+ # Mock file path and item ID
76
+ catalog_path = "test-catalog.json"
77
+ item_id = "test-item"
78
+ self_href = "https://example.com/catalog.json"
79
+
80
+ self.publisher.workflow_title = "Test Workflow"
81
+
82
+ # Mock the Catalog.from_file method
83
+ with patch("pystac.Catalog.from_file", return_value=catalog):
84
+ updated_catalog = self.publisher._update_base_catalog(
85
+ catalog_path, item_id, self_href
86
+ )
87
+
88
+ # Assertions
89
+ self.assertEqual(updated_catalog.get_self_href(), self_href)
90
+ self.assertIsInstance(updated_catalog, Catalog)
91
+
92
+ def test_read_config_files(self):
93
+ # Mock dataset and workflow config files
94
+ dataset_config = {
95
+ "collection_id": "test-collection",
96
+ "dataset_id": "test-dataset",
97
+ }
98
+ workflow_config = {
99
+ "properties": {"title": "Test Workflow"},
100
+ "workflow_id": "test-workflow",
101
+ }
102
+
103
+ # Mock fsspec.open for config files
104
+ self.mock_fsspec_open.side_effect = [
105
+ mock_open(read_data=yaml.dump(dataset_config)).return_value,
106
+ mock_open(read_data=yaml.dump(workflow_config)).return_value,
107
+ ]
108
+
109
+ # Assertions
110
+ self.assertEqual(self.publisher.dataset_config, dataset_config)
111
+ self.assertEqual(self.publisher.workflow_config, workflow_config)
112
+
113
+ @patch("deep_code.tools.publish.GitHubPublisher")
114
+ def test_environment_repo_selection(self, mock_gp):
115
+ Publisher(environment="production")
116
+ assert mock_gp.call_args.kwargs["repo_name"] == "open-science-catalog-metadata"
117
+ Publisher(environment="staging")
118
+ assert (
119
+ mock_gp.call_args.kwargs["repo_name"]
120
+ == "open-science-catalog-metadata-staging"
121
+ )
122
+ Publisher(environment="testing")
123
+ assert (
124
+ mock_gp.call_args.kwargs["repo_name"]
125
+ == "open-science-catalog-metadata-testing"
126
+ )
127
+
128
+ @patch.object(Publisher, "publish_dataset", return_value={"a": {}})
129
+ @patch.object(
130
+ Publisher, "generate_workflow_experiment_records", return_value={"b": {}}
131
+ )
132
+ def test_publish_mode_routing(self, mock_wf, mock_ds):
133
+ # dataset only
134
+ self.publisher.publish(write_to_file=True, mode="dataset")
135
+ mock_ds.assert_called()
136
+ mock_wf.assert_not_called()
137
+
138
+ mock_ds.reset_mock()
139
+ mock_wf.reset_mock()
140
+ self.publisher.publish(write_to_file=True, mode="workflow")
141
+ mock_ds.assert_not_called()
142
+ mock_wf.assert_called()
143
+
144
+ @patch.object(Publisher, "generate_workflow_experiment_records", return_value={})
145
+ @patch.object(Publisher, "publish_dataset", return_value={})
146
+ def test_publish_nothing_to_publish_raises(
147
+ self, mock_publish_dataset, mock_generate_workflow_experiment_records
148
+ ):
149
+ with pytest.raises(ValueError):
150
+ self.publisher.publish(write_to_file=False, mode="dataset")
151
+ mock_publish_dataset.assert_called_once()
152
+ mock_generate_workflow_experiment_records.assert_not_called()
153
+
154
+ @patch.object(Publisher, "publish_dataset", return_value={"x": {}})
155
+ @patch.object(
156
+ Publisher, "generate_workflow_experiment_records", return_value={"y": {}}
157
+ )
158
+ def test_publish_builds_pr_params(self, mock_wf, mock_ds):
159
+ # Make PR creation return a fixed URL
160
+ self.publisher.gh_publisher.publish_files.return_value = "PR_URL"
161
+
162
+ # Provide IDs for commit/PR labels
163
+ self.publisher.collection_id = "col"
164
+ self.publisher.workflow_id = "wf"
165
+
166
+ url = self.publisher.publish(write_to_file=False, mode="all")
167
+ assert url == "PR_URL"
168
+
169
+ # Inspect the call arguments to publish_files
170
+ _, kwargs = self.publisher.gh_publisher.publish_files.call_args
171
+ assert "dataset: col" in kwargs["commit_message"]
172
+ assert "workflow/experiment: wf" in kwargs["commit_message"]
173
+ assert "dataset: col" in kwargs["pr_title"]
174
+ assert "workflow/experiment: wf" in kwargs["pr_title"]
175
+
176
+
177
+ class TestParseGithubNotebookUrl:
178
+ @pytest.mark.parametrize(
179
+ "url,repo_url,repo_name,branch,file_path",
180
+ [
181
+ (
182
+ "https://github.com/deepesdl/cube-gen/blob/main/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
183
+ "https://github.com/deepesdl/cube-gen",
184
+ "cube-gen",
185
+ "main",
186
+ "Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
187
+ ),
188
+ (
189
+ "https://github.com/deepesdl/cube-gen/tree/release-1.0/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
190
+ "https://github.com/deepesdl/cube-gen",
191
+ "cube-gen",
192
+ "release-1.0",
193
+ "Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
194
+ ),
195
+ (
196
+ "https://raw.githubusercontent.com/deepesdl/cube-gen/main/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
197
+ "https://github.com/deepesdl/cube-gen",
198
+ "cube-gen",
199
+ "main",
200
+ "Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
201
+ ),
202
+ ],
203
+ )
204
+ def test_valid_urls(self, url, repo_url, repo_name, branch, file_path):
205
+ got_repo_url, got_repo_name, got_branch, got_file_path = LinksBuilder._parse_github_notebook_url(
206
+ url
207
+ )
208
+ assert got_repo_url == repo_url
209
+ assert got_repo_name == repo_name
210
+ assert got_branch == branch
211
+ assert got_file_path == file_path
212
+
213
+ def test_invalid_domain(self):
214
+ url = "https://gitlab.com/deepesdl/cube-gen/-/blob/main/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb"
215
+ with pytest.raises(ValueError) as e:
216
+ LinksBuilder._parse_github_notebook_url(url)
217
+ assert "Only GitHub URLs are supported" in str(e.value)
218
+
219
+ def test_unexpected_github_format_missing_blob_or_tree(self):
220
+ # Missing the "blob" or "tree" segment
221
+ url = "https://github.com/deepesdl/cube-gen/main/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb"
222
+ with pytest.raises(ValueError) as e:
223
+ LinksBuilder._parse_github_notebook_url(url)
224
+ assert "Unexpected GitHub URL format" in str(e.value)
225
+
226
+ def test_unexpected_raw_format_too_short(self):
227
+ url = "https://raw.githubusercontent.com/deepesdl/cube-gen/main"
228
+ with pytest.raises(ValueError) as e:
229
+ LinksBuilder._parse_github_notebook_url(url)
230
+ assert "Unexpected raw.githubusercontent URL format" in str(e.value)
@@ -65,8 +65,11 @@ class TestOSCProductSTACGenerator(unittest.TestCase):
65
65
  self.generator = OscDatasetStacGenerator(
66
66
  dataset_id="mock-dataset-id",
67
67
  collection_id="mock-collection-id",
68
+ workflow_id="dummy",
69
+ workflow_title="test",
68
70
  access_link="s3://mock-bucket/mock-dataset",
69
71
  documentation_link="https://example.com/docs",
72
+ license_type="proprietary",
70
73
  osc_status="ongoing",
71
74
  osc_region="Global",
72
75
  osc_themes=["climate", "environment"],