deep-code 0.1.7__tar.gz → 0.1.9.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. deep_code-0.1.9.dev0/PKG-INFO +64 -0
  2. deep_code-0.1.9.dev0/README.md +25 -0
  3. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/constants.py +1 -0
  4. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tests/tools/test_publish.py +201 -0
  5. deep_code-0.1.9.dev0/deep_code/tests/utils/test_dataset_stac_generator.py +601 -0
  6. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tools/new.py +14 -0
  7. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tools/publish.py +90 -18
  8. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/utils/dataset_stac_generator.py +297 -75
  9. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/utils/ogc_api_record.py +1 -1
  10. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/version.py +1 -1
  11. deep_code-0.1.9.dev0/deep_code.egg-info/PKG-INFO +64 -0
  12. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code.egg-info/requires.txt +7 -0
  13. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/pyproject.toml +7 -0
  14. deep_code-0.1.7/PKG-INFO +0 -214
  15. deep_code-0.1.7/README.md +0 -181
  16. deep_code-0.1.7/deep_code/tests/utils/test_dataset_stac_generator.py +0 -236
  17. deep_code-0.1.7/deep_code.egg-info/PKG-INFO +0 -214
  18. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/LICENSE +0 -0
  19. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/__init__.py +0 -0
  20. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/cli/__init__.py +0 -0
  21. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/cli/generate_config.py +0 -0
  22. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/cli/main.py +0 -0
  23. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/cli/publish.py +0 -0
  24. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tests/tools/__init__.py +0 -0
  25. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tests/utils/__init__.py +0 -0
  26. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tests/utils/test_custom_xrlint_rules.py +0 -0
  27. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tests/utils/test_github_automation.py +0 -0
  28. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tests/utils/test_helper.py +0 -0
  29. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tests/utils/test_ogc_api_record.py +0 -0
  30. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tests/utils/test_ogc_record_generator.py +0 -0
  31. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tests/utils/test_osc_extension.py +0 -0
  32. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tools/__init__.py +0 -0
  33. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tools/lint.py +0 -0
  34. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tools/register.py +0 -0
  35. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tools/setup_ci.py +0 -0
  36. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/tools/test.py +0 -0
  37. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/utils/__init__.py +0 -0
  38. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/utils/custom_xrlint_rules.py +0 -0
  39. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/utils/github_automation.py +0 -0
  40. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/utils/helper.py +0 -0
  41. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/utils/ogc_record_generator.py +0 -0
  42. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code/utils/osc_extension.py +0 -0
  43. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code.egg-info/SOURCES.txt +0 -0
  44. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code.egg-info/dependency_links.txt +0 -0
  45. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code.egg-info/entry_points.txt +0 -0
  46. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/deep_code.egg-info/top_level.txt +0 -0
  47. {deep_code-0.1.7 → deep_code-0.1.9.dev0}/setup.cfg +0 -0
@@ -0,0 +1,64 @@
1
+ Metadata-Version: 2.4
2
+ Name: deep_code
3
+ Version: 0.1.9.dev0
4
+ Summary: deepesdl earthcode integration utility tool
5
+ Author-email: Tejas Morbagal Harish <tejas.morbagalharish@brockmann-consult.de>
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/deepesdl/deep-code
8
+ Project-URL: Issues, https://github.com/deepesdl/deep-code/issues
9
+ Project-URL: Changelog, https://github.com/deepesdl/deep-code/blob/main/CHANGES.md
10
+ Keywords: analysis ready data,data science,datacube,xarray,zarr,xcube,stac,FAIR,reproducible workflow,DeepESDL
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: click
15
+ Requires-Dist: fsspec
16
+ Requires-Dist: jsonschema
17
+ Requires-Dist: jsonpickle
18
+ Requires-Dist: requests
19
+ Requires-Dist: pandas
20
+ Requires-Dist: pystac
21
+ Requires-Dist: pyyaml
22
+ Requires-Dist: xcube-core
23
+ Requires-Dist: xrlint
24
+ Provides-Extra: dev
25
+ Requires-Dist: black; extra == "dev"
26
+ Requires-Dist: flake8; extra == "dev"
27
+ Requires-Dist: numpy; extra == "dev"
28
+ Requires-Dist: ruff; extra == "dev"
29
+ Requires-Dist: pytest; extra == "dev"
30
+ Requires-Dist: pytest-cov; extra == "dev"
31
+ Requires-Dist: pytest-recording; extra == "dev"
32
+ Provides-Extra: docs
33
+ Requires-Dist: mkdocs>=1.5; extra == "docs"
34
+ Requires-Dist: mkdocs-autorefs; extra == "docs"
35
+ Requires-Dist: mkdocs-material>=9.5; extra == "docs"
36
+ Requires-Dist: mkdocstrings; extra == "docs"
37
+ Requires-Dist: mkdocstrings-python; extra == "docs"
38
+ Dynamic: license-file
39
+
40
+ # deep-code
41
+
42
+ [![Build Status](https://github.com/deepesdl/deep-code/actions/workflows/unittest-workflow.yaml/badge.svg)](https://github.com/deepesdl/deep-code/actions/workflows/unittest-workflow.yaml)
43
+ [![codecov](https://codecov.io/gh/deepesdl/deep-code/graph/badge.svg?token=47MQXOXWOK)](https://codecov.io/gh/deepesdl/deep-code)
44
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
45
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
46
+ [![PyPI version](https://img.shields.io/pypi/v/deep-code)](https://pypi.org/project/deep-code/)
47
+ [![License](https://img.shields.io/github/license/dcs4cop/xcube-smos)](https://github.com/deepesdl/deep-code/blob/main/LICENSE)
48
+ [![Docs](https://img.shields.io/badge/docs-GitHub%20Pages-blue)](https://deepesdl.github.io/deep-code/)
49
+
50
+ `deep-code` is a lightweight Python CLI and API that turns DeepESDL datasets and
51
+ workflows into EarthCODE Open Science Catalog metadata. It can generate starter configs,
52
+ build STAC collections and OGC API records, and open pull requests to the target
53
+ EarthCODE metadata repository (production, staging, or testing).
54
+
55
+ ## Features
56
+ - Generate starter dataset and workflow YAML templates.
57
+ - Publish dataset collections, workflows, and experiments via a single command.
58
+ - Build STAC collections and catalogs for Datasets and their corresponding variables
59
+ automatically from the dataset metadata.
60
+ - Build OGC API records for Workflows and Experiments from your configs.
61
+ - Flexible publishling targets i.e production/staging/testing EarthCODE metadata
62
+ repositories with GitHub automation.
63
+
64
+ More about deep-code can be found in its [documentation](https://deepesdl.github.io/deep-code/).
@@ -0,0 +1,25 @@
1
+ # deep-code
2
+
3
+ [![Build Status](https://github.com/deepesdl/deep-code/actions/workflows/unittest-workflow.yaml/badge.svg)](https://github.com/deepesdl/deep-code/actions/workflows/unittest-workflow.yaml)
4
+ [![codecov](https://codecov.io/gh/deepesdl/deep-code/graph/badge.svg?token=47MQXOXWOK)](https://codecov.io/gh/deepesdl/deep-code)
5
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
6
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
7
+ [![PyPI version](https://img.shields.io/pypi/v/deep-code)](https://pypi.org/project/deep-code/)
8
+ [![License](https://img.shields.io/github/license/dcs4cop/xcube-smos)](https://github.com/deepesdl/deep-code/blob/main/LICENSE)
9
+ [![Docs](https://img.shields.io/badge/docs-GitHub%20Pages-blue)](https://deepesdl.github.io/deep-code/)
10
+
11
+ `deep-code` is a lightweight Python CLI and API that turns DeepESDL datasets and
12
+ workflows into EarthCODE Open Science Catalog metadata. It can generate starter configs,
13
+ build STAC collections and OGC API records, and open pull requests to the target
14
+ EarthCODE metadata repository (production, staging, or testing).
15
+
16
+ ## Features
17
+ - Generate starter dataset and workflow YAML templates.
18
+ - Publish dataset collections, workflows, and experiments via a single command.
19
+ - Build STAC collections and catalogs for Datasets and their corresponding variables
20
+ automatically from the dataset metadata.
21
+ - Build OGC API records for Workflows and Experiments from your configs.
22
+ - Flexible publishling targets i.e production/staging/testing EarthCODE metadata
23
+ repositories with GitHub automation.
24
+
25
+ More about deep-code can be found in its [documentation](https://deepesdl.github.io/deep-code/).
@@ -39,3 +39,4 @@ APPLICATION_TYPE_JUPYTER_SPEC = (
39
39
  APPLICATION_STAC_EXTENSION_SPEC = (
40
40
  "https://stac-extensions.github.io/application/v0.1.0/schema.json"
41
41
  )
42
+ ZARR_MEDIA_TYPE = "application/vnd+zarr"
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import os
2
3
  import tempfile
3
4
  import unittest
4
5
  from pathlib import Path
@@ -174,6 +175,206 @@ class TestPublisher(unittest.TestCase):
174
175
  assert "workflow/experiment: wf" in kwargs["pr_title"]
175
176
 
176
177
 
178
+ # ------------------------------------------------------------------
179
+ # S3 credential resolution
180
+ # ------------------------------------------------------------------
181
+
182
+ def test_get_stac_s3_storage_options_prefers_stac_env_vars(self):
183
+ env = {
184
+ "STAC_S3_KEY": "stac-key",
185
+ "STAC_S3_SECRET": "stac-secret",
186
+ "AWS_ACCESS_KEY_ID": "aws-key",
187
+ "AWS_SECRET_ACCESS_KEY": "aws-secret",
188
+ }
189
+ with patch.dict(os.environ, env):
190
+ opts = self.publisher._get_stac_s3_storage_options()
191
+ self.assertEqual(opts["key"], "stac-key")
192
+ self.assertEqual(opts["secret"], "stac-secret")
193
+ self.assertEqual(opts["s3_additional_kwargs"], {"ACL": ""})
194
+
195
+ def test_get_stac_s3_storage_options_falls_back_to_aws_env_vars(self):
196
+ env = {"AWS_ACCESS_KEY_ID": "aws-key", "AWS_SECRET_ACCESS_KEY": "aws-secret"}
197
+ patched_env = {
198
+ k: v
199
+ for k, v in os.environ.items()
200
+ if k not in ("STAC_S3_KEY", "STAC_S3_SECRET")
201
+ }
202
+ patched_env.update(env)
203
+ with patch.dict(os.environ, patched_env, clear=True):
204
+ opts = self.publisher._get_stac_s3_storage_options()
205
+ self.assertEqual(opts["key"], "aws-key")
206
+ self.assertEqual(opts["secret"], "aws-secret")
207
+ self.assertEqual(opts["s3_additional_kwargs"], {"ACL": ""})
208
+
209
+ def test_get_stac_s3_storage_options_returns_acl_suppression_for_boto3_chain(self):
210
+ no_cred_env = {
211
+ k: v
212
+ for k, v in os.environ.items()
213
+ if k
214
+ not in (
215
+ "STAC_S3_KEY",
216
+ "STAC_S3_SECRET",
217
+ "AWS_ACCESS_KEY_ID",
218
+ "AWS_SECRET_ACCESS_KEY",
219
+ )
220
+ }
221
+ with patch.dict(os.environ, no_cred_env, clear=True):
222
+ opts = self.publisher._get_stac_s3_storage_options()
223
+ self.assertEqual(opts, {"s3_additional_kwargs": {"ACL": ""}})
224
+
225
+ # ------------------------------------------------------------------
226
+ # S3 write helper
227
+ # ------------------------------------------------------------------
228
+
229
+ @patch("deep_code.tools.publish.fsspec.open")
230
+ def test_write_stac_catalog_to_s3(self, mock_fsspec_open):
231
+ mock_file = MagicMock()
232
+ mock_ctx = MagicMock()
233
+ mock_ctx.__enter__ = MagicMock(return_value=mock_file)
234
+ mock_ctx.__exit__ = MagicMock(return_value=False)
235
+ mock_fsspec_open.return_value = mock_ctx
236
+
237
+ file_dict = {
238
+ "s3://bucket/catalog.json": {"type": "Catalog", "id": "test"},
239
+ "s3://bucket/col/item.json": {"type": "Feature", "id": "item"},
240
+ }
241
+ self.publisher._write_stac_catalog_to_s3(
242
+ file_dict, {"key": "k", "secret": "s"}
243
+ )
244
+
245
+ self.assertEqual(mock_fsspec_open.call_count, 2)
246
+ mock_fsspec_open.assert_any_call(
247
+ "s3://bucket/catalog.json", "w", key="k", secret="s"
248
+ )
249
+ mock_fsspec_open.assert_any_call(
250
+ "s3://bucket/col/item.json", "w", key="k", secret="s"
251
+ )
252
+
253
+ # ------------------------------------------------------------------
254
+ # End-to-end zarr STAC publishing wired into publish()
255
+ # ------------------------------------------------------------------
256
+
257
+ @patch("deep_code.tools.publish.fsspec.open")
258
+ @patch.object(Publisher, "publish_dataset", return_value={"github_file.json": {}})
259
+ def test_publish_writes_zarr_stac_to_s3_when_configured(
260
+ self, mock_publish_ds, mock_fsspec_open
261
+ ):
262
+ self.publisher.dataset_config["stac_catalog_s3_root"] = (
263
+ "s3://test-bucket/stac/"
264
+ )
265
+
266
+ mock_ctx = MagicMock()
267
+ mock_ctx.__enter__ = MagicMock(return_value=MagicMock())
268
+ mock_ctx.__exit__ = MagicMock(return_value=False)
269
+ mock_fsspec_open.return_value = mock_ctx
270
+
271
+ mock_generator = MagicMock()
272
+ mock_generator.build_zarr_stac_catalog_file_dict.return_value = {
273
+ "s3://test-bucket/stac/catalog.json": {"type": "Catalog"},
274
+ "s3://test-bucket/stac/test-collection/item.json": {"type": "Feature"},
275
+ }
276
+ # Simulate what publish_dataset() normally does: store the generator
277
+ self.publisher._last_generator = mock_generator
278
+ self.publisher.gh_publisher.publish_files.return_value = "PR_URL"
279
+
280
+ self.publisher.publish(mode="dataset")
281
+
282
+ mock_generator.build_zarr_stac_catalog_file_dict.assert_called_once_with(
283
+ "s3://test-bucket/stac/"
284
+ )
285
+ # Two S3 files written: catalog.json + item.json
286
+ self.assertEqual(mock_fsspec_open.call_count, 2)
287
+
288
+ # ------------------------------------------------------------------
289
+ # Project collection create-vs-update branching
290
+ # ------------------------------------------------------------------
291
+
292
+ @patch("deep_code.tools.publish.OscDatasetStacGenerator")
293
+ def test_publish_dataset_creates_project_collection_when_missing(
294
+ self, MockGenerator
295
+ ):
296
+ """When the project collection does not exist, build_project_collection is
297
+ called and projects/catalog.json is updated via _update_and_add_to_file_dict."""
298
+ mock_gen = MagicMock()
299
+ mock_gen.osc_project = "test-project"
300
+ mock_gen.get_variable_ids.return_value = []
301
+ mock_gen.build_dataset_stac_collection.return_value.to_dict.return_value = {}
302
+ mock_gen.build_project_collection.return_value = {
303
+ "type": "Collection",
304
+ "id": "test-project",
305
+ }
306
+ MockGenerator.return_value = mock_gen
307
+
308
+ self.publisher.dataset_config = {
309
+ "dataset_id": "test-dataset",
310
+ "collection_id": "test-collection",
311
+ "license_type": "CC-BY-4.0",
312
+ }
313
+ self.publisher.collection_id = "test-collection"
314
+
315
+ # Project collection is missing; all other file_exists calls return True
316
+ self.publisher.gh_publisher.github_automation.file_exists.return_value = False
317
+
318
+ with patch.object(self.publisher, "_update_and_add_to_file_dict") as mock_update, \
319
+ patch.object(self.publisher, "_update_variable_catalogs"):
320
+ file_dict = self.publisher.publish_dataset(write_to_file=False)
321
+
322
+ mock_gen.build_project_collection.assert_called_once()
323
+ self.assertIn("projects/test-project/collection.json", file_dict)
324
+ mock_gen.update_deepesdl_collection.assert_not_called()
325
+
326
+ # projects/catalog.json must be updated
327
+ updated_paths = [call.args[1] for call in mock_update.call_args_list]
328
+ self.assertIn("projects/catalog.json", updated_paths)
329
+
330
+ @patch("deep_code.tools.publish.OscDatasetStacGenerator")
331
+ def test_publish_dataset_updates_project_collection_when_exists(
332
+ self, MockGenerator
333
+ ):
334
+ """When the project collection exists, update_deepesdl_collection is called
335
+ via _update_and_add_to_file_dict and build_project_collection is not called."""
336
+ mock_gen = MagicMock()
337
+ mock_gen.osc_project = "test-project"
338
+ mock_gen.get_variable_ids.return_value = []
339
+ mock_gen.build_dataset_stac_collection.return_value.to_dict.return_value = {}
340
+ MockGenerator.return_value = mock_gen
341
+
342
+ self.publisher.dataset_config = {
343
+ "dataset_id": "test-dataset",
344
+ "collection_id": "test-collection",
345
+ "license_type": "CC-BY-4.0",
346
+ }
347
+ self.publisher.collection_id = "test-collection"
348
+
349
+ # Project collection already exists
350
+ self.publisher.gh_publisher.github_automation.file_exists.return_value = True
351
+
352
+ with patch.object(self.publisher, "_update_and_add_to_file_dict") as mock_update, \
353
+ patch.object(self.publisher, "_update_variable_catalogs"):
354
+ self.publisher.publish_dataset(write_to_file=False)
355
+
356
+ mock_gen.build_project_collection.assert_not_called()
357
+
358
+ # update_deepesdl_collection passed to _update_and_add_to_file_dict
359
+ update_methods = [call.args[2] for call in mock_update.call_args_list]
360
+ self.assertIn(mock_gen.update_deepesdl_collection, update_methods)
361
+
362
+ @patch.object(Publisher, "publish_dataset", return_value={"github_file.json": {}})
363
+ def test_publish_skips_zarr_stac_when_not_configured(self, mock_publish_ds):
364
+ # No stac_catalog_s3_root in config
365
+ self.publisher.dataset_config = {
366
+ "collection_id": "test-collection",
367
+ "dataset_id": "test-dataset",
368
+ }
369
+ self.publisher.gh_publisher.publish_files.return_value = "PR_URL"
370
+
371
+ with patch.object(
372
+ self.publisher, "_write_stac_catalog_to_s3"
373
+ ) as mock_write:
374
+ self.publisher.publish(mode="dataset")
375
+ mock_write.assert_not_called()
376
+
377
+
177
378
  class TestParseGithubNotebookUrl:
178
379
  @pytest.mark.parametrize(
179
380
  "url,repo_url,repo_name,branch,file_path",