PyPI - deep-code - Versions diffs - 0.1.4.dev1__tar.gz → 0.1.6__tar.gz - Mend

deep-code 0.1.4.dev1tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{deep_code-0.1.4.dev1 → deep_code-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deep_code
-Version: 0.1.4.dev1
+Version: 0.1.6
 Summary: deepesdl earthcode integration utility tool
 Author-email: Tejas Morbagal Harish <tejas.morbagalharish@brockmann-consult.de>
 License: MIT
@@ -14,6 +14,7 @@ License-File: LICENSE
 Requires-Dist: click
 Requires-Dist: fsspec
 Requires-Dist: jsonschema
+Requires-Dist: jsonpickle
 Requires-Dist: requests
 Requires-Dist: pandas
 Requires-Dist: pystac
@@ -125,7 +126,8 @@ catalog
 ### Usage
 ```
-deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
+deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT] [--mode
+all|dataset|workflow]
  ```
 #### Arguments
@@ -136,8 +138,12 @@ deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
     (e.g., workflow-config.yaml)
 #### Options
+    --dataset-config, - Explict path to dataset config
+    --workflow-config, - Explicit path to workflow config
     --environment, -e - Target catalog environment:
     production (default) | staging | testing
+    --mode, -m Publishing mode:
+    all (default) | dataset | workflow
 #### Examples:
 1. Publish to staging catalog
@@ -152,6 +158,18 @@ deep-code publish dataset-config.yaml workflow-config.yaml -e testing
 ```
 deep-code publish dataset-config.yaml workflow-config.yaml
 ```
+4. Publish Dataset only
+```
+deep-code publish dataset-config.yaml -m dataset
+deep-code publish --dataset-config dataset.yaml -m dataset
+```
+5. Publish Workflow only
+```
+deep-code publish dataset-config.yaml -m workflow
+deep-code publish --workflow-config workflow.yaml -m dataset
+```
 #### dataset-config.yaml example
 ```

{deep_code-0.1.4.dev1 → deep_code-0.1.6}/README.md RENAMED Viewed

@@ -93,7 +93,8 @@ catalog
 ### Usage
 ```
-deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
+deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT] [--mode
+all|dataset|workflow]
  ```
 #### Arguments
@@ -104,8 +105,12 @@ deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
     (e.g., workflow-config.yaml)
 #### Options
+    --dataset-config, - Explict path to dataset config
+    --workflow-config, - Explicit path to workflow config
     --environment, -e - Target catalog environment:
     production (default) | staging | testing
+    --mode, -m Publishing mode:
+    all (default) | dataset | workflow
 #### Examples:
 1. Publish to staging catalog
@@ -120,6 +125,18 @@ deep-code publish dataset-config.yaml workflow-config.yaml -e testing
 ```
 deep-code publish dataset-config.yaml workflow-config.yaml
 ```
+4. Publish Dataset only
+```
+deep-code publish dataset-config.yaml -m dataset
+deep-code publish --dataset-config dataset.yaml -m dataset
+```
+5. Publish Workflow only
+```
+deep-code publish dataset-config.yaml -m workflow
+deep-code publish --workflow-config workflow.yaml -m dataset
+```
 #### dataset-config.yaml example
 ```

deep_code-0.1.6/deep_code/cli/publish.py ADDED Viewed

@@ -0,0 +1,264 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025 by Brockmann Consult GmbH
+# Permissions are hereby granted under the terms of the MIT License:
+# https://opensource.org/licenses/MIT.
+from pathlib import Path
+from typing import Literal
+import click
+import yaml
+from deep_code.tools.publish import Publisher
+Mode = Literal["all", "dataset", "workflow"]
+DATASET_MARKERS = {
+    "stac_version",
+    "extent",
+    "license",
+    "summaries",
+    "assets",
+    "providers",
+    "collection",
+    "collection_id",
+    "id",
+}
+WORKFLOW_MARKERS = {
+    "workflow",
+    "workflow_id",
+    "workflow_title",
+    "experiment",
+    "jupyter_notebook_url",
+    "notebook",
+    "parameters",
+    "input_datasets",
+}
+def _validate_inputs(
+    dataset_config: str | None, workflow_config: str | None, mode: str
+):
+    mode = mode.lower()
+    def ensure_file(path: str | None, label: str):
+        if path is None:
+            raise click.UsageError(f"{label} is required but was not provided.")
+        if not Path(path).is_file():
+            raise click.UsageError(f"{label} not found: {path} is not a file")
+    if mode == "dataset":
+        ensure_file(dataset_config, "DATASET_CONFIG")
+        if workflow_config is not None:
+            click.echo("Ignoring WORKFLOW_CONFIG since mode=dataset.", err=True)
+    elif mode == "workflow":
+        ensure_file(workflow_config, "WORKFLOW_CONFIG")
+    elif mode == "all":
+        ensure_file(dataset_config, "DATASET_CONFIG")
+        ensure_file(workflow_config, "WORKFLOW_CONFIG")
+    else:
+        raise click.UsageError("Invalid mode. Choose one of: all, dataset, workflow.")
+def _detect_config_type(path: Path) -> Literal["dataset", "workflow"]:
+    """Detect config type via filename hints and YAML top-level keys."""
+    name = path.name.lower()
+    if "workflow" in name or "experiment" in name:
+        return "workflow"
+    if "dataset" in name or "collection" in name:
+        return "dataset"
+    try:
+        data = yaml.safe_load(path.read_text(encoding="utf-8"))
+    except Exception as e:
+        raise ValueError(f"Cannot read YAML from {path}: {e}")
+    if not isinstance(data, dict):
+        raise ValueError(f"YAML in {path} must be a mapping/object at the top level.")
+    keys = set(data.keys())
+    ds_score = len(keys & DATASET_MARKERS)
+    wf_score = len(keys & WORKFLOW_MARKERS)
+    if ds_score > wf_score:
+        return "dataset"
+    if wf_score > ds_score:
+        return "workflow"
+    raise ValueError(
+        f"Ambiguous config type for {path}. "
+        "Rename to include 'dataset' or 'workflow', or pass the missing file explicitly."
+    )
+def _assign_configs(
+    pos_first: str | None,
+    pos_second: str | None,
+    mode: Mode,
+    explicit_dataset: str | None,
+    explicit_workflow: str | None,
+) -> tuple[str | None, str | None]:
+    """
+    Decide which file is dataset vs workflow.
+    Precedence: explicit flags > positional + detection.
+    Returns (dataset_config, workflow_config).
+    """
+    ds = explicit_dataset
+    wf = explicit_workflow
+    # If both explicit provided, we're done; warn if extra positionals are passed.
+    pos_args = [p for p in (pos_first, pos_second) if p]
+    if ds and wf:
+        if pos_args:
+            click.echo(
+                "Positional config paths ignored because explicit flags were provided.",
+                err=True,
+            )
+        return ds, wf
+    # Helper to assign a single positional file to the missing slot
+    def _assign_single(p: str) -> tuple[str | None, str | None]:
+        nonlocal ds, wf
+        if ds and wf:
+            raise click.UsageError(
+                "Both dataset and workflow configs already provided; remove extra positional files."
+            )
+        # Use mode as a strong hint when only one is missing
+        if not ds and mode == "dataset":
+            ds = p
+            return
+        if not wf and mode == "workflow":
+            wf = p
+            return
+        # Otherwise detect
+        kind = _detect_config_type(Path(p))
+        if kind == "dataset":
+            if ds and Path(ds).resolve() != Path(p).resolve():
+                raise click.UsageError(
+                    f"Multiple dataset configs supplied: {ds} and {p}"
+                )
+            ds = p
+        else:
+            if wf and Path(wf).resolve() != Path(p).resolve():
+                raise click.UsageError(
+                    f"Multiple workflow configs supplied: {wf} and {p}"
+                )
+            wf = p
+    # If exactly one explicit provided, try to fill the other via positionals
+    if ds and not wf:
+        if len(pos_args) > 1:
+            raise click.UsageError(
+                "Provide at most one positional file when using --dataset-config."
+            )
+        if pos_args:
+            _assign_single(pos_args[0])
+        return ds, wf
+    if wf and not ds:
+        if len(pos_args) > 1:
+            raise click.UsageError(
+                "Provide at most one positional file when using --workflow-config."
+            )
+        if pos_args:
+            _assign_single(pos_args[0])
+        return ds, wf
+    # No explicit flags: rely on positionals + detection
+    if not pos_args:
+        return None, None
+    if len(pos_args) == 1:
+        p = pos_args[0]
+        if mode == "dataset":
+            return p, None
+        if mode == "workflow":
+            return None, p
+        # mode == "all": detect and require the other later in validation
+        kind = _detect_config_type(Path(p))
+        return (p, None) if kind == "dataset" else (None, p)
+    # Two positionals: detect both and assign
+    p1, p2 = pos_args[0], pos_args[1]
+    k1 = _detect_config_type(Path(p1))
+    k2 = _detect_config_type(Path(p2))
+    if k1 == k2:
+        raise click.UsageError(
+            f"Both files look like '{k1}' configs: {p1} and {p2}. "
+            "Please rename one or use --dataset-config/--workflow-config."
+        )
+    ds = p1 if k1 == "dataset" else p2
+    wf = p1 if k1 == "workflow" else p2
+    return ds, wf
+@click.command(name="publish")
+@click.argument("dataset_config", type=click.Path(exists=True), required=False)
+@click.argument("workflow_config", type=click.Path(exists=True), required=False)
+@click.option(
+    "--dataset-config",
+    "dataset_config_opt",
+    type=click.Path(exists=True),
+    help="Explicit path to dataset config (overrides positional detection).",
+)
+@click.option(
+    "--workflow-config",
+    "workflow_config_opt",
+    type=click.Path(exists=True),
+    help="Explicit path to workflow config (overrides positional detection).",
+)
+@click.option(
+    "--environment",
+    "-e",
+    type=click.Choice(["production", "staging", "testing"], case_sensitive=False),
+    default="production",
+    help="Target environment for publishing (production, staging, testing)",
+)
+@click.option(
+    "--mode",
+    "-m",
+    type=click.Choice(["all", "dataset", "workflow"], case_sensitive=False),
+    default="all",
+    help="Publishing mode: dataset only, workflow only, or both",
+)
+def publish(
+    dataset_config,
+    workflow_config,
+    dataset_config_opt,
+    workflow_config_opt,
+    environment,
+    mode,
+):
+    """
+    Publish dataset and/or workflow/experiment metadata.
+    Examples:
+      deep-code publish workflow.yaml -e staging -m workflow
+      deep-code publish dataset.yaml -e staging -m dataset
+      deep-code publish dataset.yaml workflow.yaml -m all
+      deep-code publish --dataset-config dataset.yaml --workflow-config wf.yaml -m all
+      deep-code publish --dataset-config dataset.yaml -m dataset
+      deep-code publish --workflow-config wf.yaml -m workflow
+    """
+    mode = mode.lower()
+    ds_path, wf_path = _assign_configs(
+        dataset_config,
+        workflow_config,
+        mode,  # type: ignore[arg-type]
+        dataset_config_opt,
+        workflow_config_opt,
+    )
+    _validate_inputs(ds_path, wf_path, mode)
+    publisher = Publisher(
+        dataset_config_path=ds_path,
+        workflow_config_path=wf_path,
+        environment=environment.lower(),
+    )
+    result = publisher.publish(mode=mode)
+    click.echo(result if isinstance(result, str) else "Wrote files locally.")

{deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/constants.py RENAMED Viewed

@@ -29,3 +29,13 @@ WORKFLOW_BASE_CATALOG_SELF_HREF = (
     ".json"
 )
 PROJECT_COLLECTION_NAME = "deep-earth-system-data-lab"
+DEEPESDL_GIT_PULL_BASE = (
+    "https://deep.earthsystemdatalab.net/hub/user-redirect/git-pull"
+)
+APPLICATION_TYPE_JUPYTER_SPEC = (
+    "https://raw.githubusercontent.com/EOEPCA/metadata"
+    "-profile/refs/heads/1.0/schemas/application-type-jupyter-notebook"
+)
+APPLICATION_STAC_EXTENSION_SPEC = (
+    "https://stac-extensions.github.io/application/v0.1.0/schema.json"
+)

deep_code-0.1.6/deep_code/tests/tools/test_publish.py ADDED Viewed

@@ -0,0 +1,230 @@
+import json
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, mock_open, patch
+import pytest
+import yaml
+from pystac import Catalog
+from deep_code.tools.publish import Publisher
+from deep_code.utils.ogc_api_record import LinksBuilder
+class TestPublisher(unittest.TestCase):
+    @patch("fsspec.open")
+    @patch("deep_code.tools.publish.GitHubPublisher")
+    def setUp(self, mock_github_publisher, mock_fsspec_open):
+        # Mock GitHubPublisher to avoid reading .gitaccess
+        self.mock_github_publisher_instance = MagicMock()
+        mock_github_publisher.return_value = self.mock_github_publisher_instance
+        # Mock dataset and workflow config files
+        self.dataset_config = {
+            "collection_id": "test-collection",
+            "dataset_id": "test-dataset",
+        }
+        self.workflow_config = {
+            "properties": {"title": "Test Workflow"},
+            "workflow_id": "test-workflow",
+        }
+        # Mock fsspec.open for config files
+        self.mock_fsspec_open = mock_fsspec_open
+        self.mock_fsspec_open.side_effect = [
+            mock_open(read_data=yaml.dump(self.dataset_config)).return_value,
+            mock_open(read_data=yaml.dump(self.workflow_config)).return_value,
+        ]
+        # Initialize Publisher
+        self.publisher = Publisher(
+            dataset_config_path="test-dataset-config.yaml",
+            workflow_config_path="test-workflow-config.yaml",
+        )
+    def test_normalize_name(self):
+        self.assertEqual(Publisher._normalize_name("Test Name"), "test-name")
+        self.assertEqual(Publisher._normalize_name("Test   Name"), "test---name")
+        self.assertIsNone(Publisher._normalize_name(""))
+        self.assertIsNone(Publisher._normalize_name(None))
+    def test_write_to_file(self):
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+            file_path = temp_file.name
+        # Test data
+        data = {"key": "value"}
+        # Call the method
+        Publisher._write_to_file(file_path, data)
+        # Read the file and verify its content
+        with open(file_path, "r") as f:
+            content = json.load(f)
+            self.assertEqual(content, data)
+        # Clean up
+        Path(file_path).unlink()
+    def test_update_base_catalog(self):
+        # Create a mock Catalog
+        catalog = Catalog(id="test-catalog", description="Test Catalog")
+        # Mock file path and item ID
+        catalog_path = "test-catalog.json"
+        item_id = "test-item"
+        self_href = "https://example.com/catalog.json"
+        self.publisher.workflow_title = "Test Workflow"
+        # Mock the Catalog.from_file method
+        with patch("pystac.Catalog.from_file", return_value=catalog):
+            updated_catalog = self.publisher._update_base_catalog(
+                catalog_path, item_id, self_href
+            )
+        # Assertions
+        self.assertEqual(updated_catalog.get_self_href(), self_href)
+        self.assertIsInstance(updated_catalog, Catalog)
+    def test_read_config_files(self):
+        # Mock dataset and workflow config files
+        dataset_config = {
+            "collection_id": "test-collection",
+            "dataset_id": "test-dataset",
+        }
+        workflow_config = {
+            "properties": {"title": "Test Workflow"},
+            "workflow_id": "test-workflow",
+        }
+        # Mock fsspec.open for config files
+        self.mock_fsspec_open.side_effect = [
+            mock_open(read_data=yaml.dump(dataset_config)).return_value,
+            mock_open(read_data=yaml.dump(workflow_config)).return_value,
+        ]
+        # Assertions
+        self.assertEqual(self.publisher.dataset_config, dataset_config)
+        self.assertEqual(self.publisher.workflow_config, workflow_config)
+    @patch("deep_code.tools.publish.GitHubPublisher")
+    def test_environment_repo_selection(self, mock_gp):
+        Publisher(environment="production")
+        assert mock_gp.call_args.kwargs["repo_name"] == "open-science-catalog-metadata"
+        Publisher(environment="staging")
+        assert (
+            mock_gp.call_args.kwargs["repo_name"]
+            == "open-science-catalog-metadata-staging"
+        )
+        Publisher(environment="testing")
+        assert (
+            mock_gp.call_args.kwargs["repo_name"]
+            == "open-science-catalog-metadata-testing"
+        )
+    @patch.object(Publisher, "publish_dataset", return_value={"a": {}})
+    @patch.object(
+        Publisher, "generate_workflow_experiment_records", return_value={"b": {}}
+    )
+    def test_publish_mode_routing(self, mock_wf, mock_ds):
+        # dataset only
+        self.publisher.publish(write_to_file=True, mode="dataset")
+        mock_ds.assert_called()
+        mock_wf.assert_not_called()
+        mock_ds.reset_mock()
+        mock_wf.reset_mock()
+        self.publisher.publish(write_to_file=True, mode="workflow")
+        mock_ds.assert_not_called()
+        mock_wf.assert_called()
+    @patch.object(Publisher, "generate_workflow_experiment_records", return_value={})
+    @patch.object(Publisher, "publish_dataset", return_value={})
+    def test_publish_nothing_to_publish_raises(
+        self, mock_publish_dataset, mock_generate_workflow_experiment_records
+    ):
+        with pytest.raises(ValueError):
+            self.publisher.publish(write_to_file=False, mode="dataset")
+        mock_publish_dataset.assert_called_once()
+        mock_generate_workflow_experiment_records.assert_not_called()
+    @patch.object(Publisher, "publish_dataset", return_value={"x": {}})
+    @patch.object(
+        Publisher, "generate_workflow_experiment_records", return_value={"y": {}}
+    )
+    def test_publish_builds_pr_params(self, mock_wf, mock_ds):
+        # Make PR creation return a fixed URL
+        self.publisher.gh_publisher.publish_files.return_value = "PR_URL"
+        # Provide IDs for commit/PR labels
+        self.publisher.collection_id = "col"
+        self.publisher.workflow_id = "wf"
+        url = self.publisher.publish(write_to_file=False, mode="all")
+        assert url == "PR_URL"
+        # Inspect the call arguments to publish_files
+        _, kwargs = self.publisher.gh_publisher.publish_files.call_args
+        assert "dataset: col" in kwargs["commit_message"]
+        assert "workflow/experiment: wf" in kwargs["commit_message"]
+        assert "dataset: col" in kwargs["pr_title"]
+        assert "workflow/experiment: wf" in kwargs["pr_title"]
+class TestParseGithubNotebookUrl:
+    @pytest.mark.parametrize(
+        "url,repo_url,repo_name,branch,file_path",
+        [
+            (
+                "https://github.com/deepesdl/cube-gen/blob/main/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
+                "https://github.com/deepesdl/cube-gen",
+                "cube-gen",
+                "main",
+                "Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
+            ),
+            (
+                "https://github.com/deepesdl/cube-gen/tree/release-1.0/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
+                "https://github.com/deepesdl/cube-gen",
+                "cube-gen",
+                "release-1.0",
+                "Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
+            ),
+            (
+                "https://raw.githubusercontent.com/deepesdl/cube-gen/main/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
+                "https://github.com/deepesdl/cube-gen",
+                "cube-gen",
+                "main",
+                "Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb",
+            ),
+        ],
+    )
+    def test_valid_urls(self, url, repo_url, repo_name, branch, file_path):
+        got_repo_url, got_repo_name, got_branch, got_file_path = LinksBuilder._parse_github_notebook_url(
+            url
+        )
+        assert got_repo_url == repo_url
+        assert got_repo_name == repo_name
+        assert got_branch == branch
+        assert got_file_path == file_path
+    def test_invalid_domain(self):
+        url = "https://gitlab.com/deepesdl/cube-gen/-/blob/main/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb"
+        with pytest.raises(ValueError) as e:
+            LinksBuilder._parse_github_notebook_url(url)
+        assert "Only GitHub URLs are supported" in str(e.value)
+    def test_unexpected_github_format_missing_blob_or_tree(self):
+        # Missing the "blob" or "tree" segment
+        url = "https://github.com/deepesdl/cube-gen/main/Permafrost/Create-CCI-Permafrost-cube-EarthCODE.ipynb"
+        with pytest.raises(ValueError) as e:
+            LinksBuilder._parse_github_notebook_url(url)
+        assert "Unexpected GitHub URL format" in str(e.value)
+    def test_unexpected_raw_format_too_short(self):
+        url = "https://raw.githubusercontent.com/deepesdl/cube-gen/main"
+        with pytest.raises(ValueError) as e:
+            LinksBuilder._parse_github_notebook_url(url)
+        assert "Unexpected raw.githubusercontent URL format" in str(e.value)

{deep_code-0.1.4.dev1 → deep_code-0.1.6}/deep_code/tests/utils/test_dataset_stac_generator.py RENAMED Viewed

@@ -65,8 +65,11 @@ class TestOSCProductSTACGenerator(unittest.TestCase):
         self.generator = OscDatasetStacGenerator(
             dataset_id="mock-dataset-id",
             collection_id="mock-collection-id",
+            workflow_id="dummy",
+            workflow_title="test",
             access_link="s3://mock-bucket/mock-dataset",
             documentation_link="https://example.com/docs",
+            license_type="proprietary",
             osc_status="ongoing",
             osc_region="Global",
             osc_themes=["climate", "environment"],

deep-code 0.1.4.dev1__tar.gz → 0.1.6__tar.gz

deep-code 0.1.4.dev1tar.gz → 0.1.6tar.gz