deep-code 0.0.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deep_code/__init__.py +24 -0
- deep_code/cli/__init__.py +3 -0
- deep_code/cli/main.py +22 -0
- deep_code/cli/publish.py +26 -0
- deep_code/constants.py +16 -0
- deep_code/tests/tools/__init__.py +3 -0
- deep_code/tests/tools/test_publish.py +120 -0
- deep_code/tests/utils/__init__.py +3 -0
- deep_code/tests/utils/test_dataset_stac_generator.py +219 -0
- deep_code/tests/utils/test_github_automation.py +120 -0
- deep_code/tests/utils/test_ogc_api_record.py +113 -0
- deep_code/tests/utils/test_ogc_record_generator.py +63 -0
- deep_code/tests/utils/test_osc_extension.py +117 -0
- deep_code/tools/__init__.py +3 -0
- deep_code/tools/check.py +4 -0
- deep_code/tools/new.py +5 -0
- deep_code/tools/publish.py +233 -0
- deep_code/tools/register.py +0 -0
- deep_code/tools/setup_ci.py +1 -0
- deep_code/tools/test.py +2 -0
- deep_code/utils/__init__.py +3 -0
- deep_code/utils/dataset_stac_generator.py +426 -0
- deep_code/utils/github_automation.py +122 -0
- deep_code/utils/ogc_api_record.py +94 -0
- deep_code/utils/ogc_record_generator.py +54 -0
- deep_code/utils/osc_extension.py +201 -0
- deep_code/version.py +22 -0
- deep_code-0.0.1.dev0.dist-info/LICENSE +21 -0
- deep_code-0.0.1.dev0.dist-info/METADATA +166 -0
- deep_code-0.0.1.dev0.dist-info/RECORD +33 -0
- deep_code-0.0.1.dev0.dist-info/WHEEL +5 -0
- deep_code-0.0.1.dev0.dist-info/entry_points.txt +2 -0
- deep_code-0.0.1.dev0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright (c) 2025 by Brockmann Consult GmbH
|
|
4
|
+
# Permissions are hereby granted under the terms of the MIT License:
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from pystac import Catalog, Collection, Extent, Link, SpatialExtent, TemporalExtent
|
|
13
|
+
from xcube.core.store import new_data_store
|
|
14
|
+
|
|
15
|
+
from deep_code.utils.osc_extension import OscExtension
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class OscDatasetStacGenerator:
|
|
19
|
+
"""Generates OSC STAC Collections for a product from Zarr datasets.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
dataset_id: ID of the Zarr dataset.
|
|
23
|
+
collection_id: Unique identifier for the STAC collection.
|
|
24
|
+
access_link: Public access link to the dataset.
|
|
25
|
+
documentation_link: Link to dataset documentation.
|
|
26
|
+
osc_status: Status of the dataset (e.g., "ongoing").
|
|
27
|
+
osc_region: Geographical region associated with the dataset.
|
|
28
|
+
osc_themes: List of themes related to the dataset (e.g., ["climate"]).
|
|
29
|
+
osc_missions: List of satellite missions associated with the dataset.
|
|
30
|
+
cf_params: CF metadata parameters for the dataset.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
dataset_id: str,
|
|
36
|
+
collection_id: str,
|
|
37
|
+
access_link: str | None = None,
|
|
38
|
+
documentation_link: str | None = None,
|
|
39
|
+
osc_status: str = "ongoing",
|
|
40
|
+
osc_region: str = "Global",
|
|
41
|
+
osc_themes: list[str] | None = None,
|
|
42
|
+
osc_missions: list[str] | None = None,
|
|
43
|
+
cf_params: list[dict[str]] | None = None,
|
|
44
|
+
):
|
|
45
|
+
self.dataset_id = dataset_id
|
|
46
|
+
self.collection_id = collection_id
|
|
47
|
+
self.access_link = access_link or f"s3://deep-esdl-public/{dataset_id}"
|
|
48
|
+
self.documentation_link = documentation_link
|
|
49
|
+
self.osc_status = osc_status
|
|
50
|
+
self.osc_region = osc_region
|
|
51
|
+
self.osc_themes = osc_themes or []
|
|
52
|
+
self.osc_missions = osc_missions or []
|
|
53
|
+
self.cf_params = cf_params or {}
|
|
54
|
+
self.logger = logging.getLogger(__name__)
|
|
55
|
+
self.dataset = self._open_dataset()
|
|
56
|
+
self.variables_metadata = self.get_variables_metadata()
|
|
57
|
+
|
|
58
|
+
def _open_dataset(self):
|
|
59
|
+
"""Open the dataset using a S3 store as a xarray Dataset."""
|
|
60
|
+
|
|
61
|
+
store_configs = [
|
|
62
|
+
{
|
|
63
|
+
"description": "Public store",
|
|
64
|
+
"params": {
|
|
65
|
+
"storage_type": "s3",
|
|
66
|
+
"root": "deep-esdl-public",
|
|
67
|
+
"storage_options": {"anon": True},
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"description": "Authenticated store",
|
|
72
|
+
"params": {
|
|
73
|
+
"storage_type": "s3",
|
|
74
|
+
"root": os.environ.get("S3_USER_STORAGE_BUCKET"),
|
|
75
|
+
"storage_options": {
|
|
76
|
+
"anon": False,
|
|
77
|
+
"key": os.environ.get("S3_USER_STORAGE_KEY"),
|
|
78
|
+
"secret": os.environ.get("S3_USER_STORAGE_SECRET"),
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Iterate through configurations and attempt to open the dataset
|
|
85
|
+
last_exception = None
|
|
86
|
+
tried_configurations = []
|
|
87
|
+
for config in store_configs:
|
|
88
|
+
tried_configurations.append(config["description"])
|
|
89
|
+
try:
|
|
90
|
+
self.logger.info(
|
|
91
|
+
f"Attempting to open dataset with configuration: "
|
|
92
|
+
f"{config['description']}"
|
|
93
|
+
)
|
|
94
|
+
store = new_data_store(
|
|
95
|
+
config["params"]["storage_type"],
|
|
96
|
+
root=config["params"]["root"],
|
|
97
|
+
storage_options=config["params"]["storage_options"],
|
|
98
|
+
)
|
|
99
|
+
dataset = store.open_data(self.dataset_id)
|
|
100
|
+
self.logger.info(
|
|
101
|
+
f"Successfully opened dataset with configuration: "
|
|
102
|
+
f"{config['description']}"
|
|
103
|
+
)
|
|
104
|
+
return dataset
|
|
105
|
+
except Exception as e:
|
|
106
|
+
self.logger.error(
|
|
107
|
+
f"Failed to open dataset with configuration: "
|
|
108
|
+
f"{config['description']}. Error: {e}"
|
|
109
|
+
)
|
|
110
|
+
last_exception = e
|
|
111
|
+
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"Failed to open Zarr dataset with ID {self.dataset_id}. "
|
|
114
|
+
f"Tried configurations: {', '.join(tried_configurations)}. "
|
|
115
|
+
f"Last error: {last_exception}"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def _get_spatial_extent(self) -> SpatialExtent:
|
|
119
|
+
"""Extract spatial extent from the dataset."""
|
|
120
|
+
if {"lon", "lat"}.issubset(self.dataset.coords):
|
|
121
|
+
# For regular gridding
|
|
122
|
+
lon_min, lon_max = (
|
|
123
|
+
float(self.dataset.lon.min()),
|
|
124
|
+
float(self.dataset.lon.max()),
|
|
125
|
+
)
|
|
126
|
+
lat_min, lat_max = (
|
|
127
|
+
float(self.dataset.lat.min()),
|
|
128
|
+
float(self.dataset.lat.max()),
|
|
129
|
+
)
|
|
130
|
+
return SpatialExtent([[lon_min, lat_min, lon_max, lat_max]])
|
|
131
|
+
elif {"longitude", "latitude"}.issubset(self.dataset.coords):
|
|
132
|
+
# For regular gridding with 'longitude' and 'latitude'
|
|
133
|
+
lon_min, lon_max = (
|
|
134
|
+
float(self.dataset.longitude.min()),
|
|
135
|
+
float(self.dataset.longitude.max()),
|
|
136
|
+
)
|
|
137
|
+
lat_min, lat_max = (
|
|
138
|
+
float(self.dataset.latitude.min()),
|
|
139
|
+
float(self.dataset.latitude.max()),
|
|
140
|
+
)
|
|
141
|
+
return SpatialExtent([[lon_min, lat_min, lon_max, lat_max]])
|
|
142
|
+
elif {"x", "y"}.issubset(self.dataset.coords):
|
|
143
|
+
# For irregular gridding
|
|
144
|
+
x_min, x_max = (float(self.dataset.x.min()), float(self.dataset.x.max()))
|
|
145
|
+
y_min, y_max = (float(self.dataset.y.min()), float(self.dataset.y.max()))
|
|
146
|
+
return SpatialExtent([[x_min, y_min, x_max, y_max]])
|
|
147
|
+
else:
|
|
148
|
+
raise ValueError(
|
|
149
|
+
"Dataset does not have recognized spatial coordinates "
|
|
150
|
+
"('lon', 'lat' or 'x', 'y')."
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def _get_temporal_extent(self) -> TemporalExtent:
|
|
154
|
+
"""Extract temporal extent from the dataset."""
|
|
155
|
+
if "time" in self.dataset.coords:
|
|
156
|
+
try:
|
|
157
|
+
# Convert the time bounds to datetime objects
|
|
158
|
+
time_min = pd.to_datetime(
|
|
159
|
+
self.dataset.time.min().values
|
|
160
|
+
).to_pydatetime()
|
|
161
|
+
time_max = pd.to_datetime(
|
|
162
|
+
self.dataset.time.max().values
|
|
163
|
+
).to_pydatetime()
|
|
164
|
+
return TemporalExtent([[time_min, time_max]])
|
|
165
|
+
except Exception as e:
|
|
166
|
+
raise ValueError(f"Failed to parse temporal extent: {e}")
|
|
167
|
+
else:
|
|
168
|
+
raise ValueError("Dataset does not have a 'time' coordinate.")
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def _normalize_name(name: str | None) -> str | None:
|
|
172
|
+
return name.replace(" ", "-").lower() if name else None
|
|
173
|
+
|
|
174
|
+
def _get_general_metadata(self) -> dict:
|
|
175
|
+
return {
|
|
176
|
+
"description": self.dataset.attrs.get(
|
|
177
|
+
"description", "No description available."
|
|
178
|
+
)
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
def extract_metadata_for_variable(self, variable_data) -> dict:
|
|
182
|
+
"""Extract metadata for a single variable."""
|
|
183
|
+
long_name = variable_data.attrs.get("long_name")
|
|
184
|
+
standard_name = variable_data.attrs.get("standard_name")
|
|
185
|
+
variable_id = standard_name or variable_data.name
|
|
186
|
+
description = variable_data.attrs.get("description", long_name)
|
|
187
|
+
gcmd_keyword_url = variable_data.attrs.get("gcmd_keyword_url")
|
|
188
|
+
return {
|
|
189
|
+
"variable_id": self._normalize_name(variable_id),
|
|
190
|
+
"description": description,
|
|
191
|
+
"gcmd_keyword_url": gcmd_keyword_url,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
def get_variable_ids(self) -> list[str]:
|
|
195
|
+
"""Get variable IDs for all variables in the dataset."""
|
|
196
|
+
return list(self.variables_metadata.keys())
|
|
197
|
+
|
|
198
|
+
def get_variables_metadata(self) -> dict[str, dict]:
|
|
199
|
+
"""Extract metadata for all variables in the dataset."""
|
|
200
|
+
variables_metadata = {}
|
|
201
|
+
for var_name, variable in self.dataset.data_vars.items():
|
|
202
|
+
var_metadata = self.extract_metadata_for_variable(variable)
|
|
203
|
+
variables_metadata[var_metadata.get("variable_id")] = var_metadata
|
|
204
|
+
return variables_metadata
|
|
205
|
+
|
|
206
|
+
def _add_gcmd_link_to_var_catalog(
|
|
207
|
+
self, var_catalog: Catalog, var_metadata: dict
|
|
208
|
+
) -> None:
|
|
209
|
+
"""
|
|
210
|
+
Checks for a GCMD keyword URL in var_metadata, adds a 'via' link to the catalog
|
|
211
|
+
pointing to the GCMD Keyword Viewer.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
var_catalog: The PySTAC Catalog to which we want to add the link.
|
|
215
|
+
var_metadata: Dictionary containing metadata about the variable,
|
|
216
|
+
including 'gcmd_keyword_url'.
|
|
217
|
+
"""
|
|
218
|
+
gcmd_keyword_url = var_metadata.get("gcmd_keyword_url")
|
|
219
|
+
if not gcmd_keyword_url:
|
|
220
|
+
self.logger.debug(
|
|
221
|
+
f"No gcmd_keyword_url in var_metadata. Skipping adding GCMD link in "
|
|
222
|
+
f'the {var_metadata.get("variable_id")} catalog'
|
|
223
|
+
)
|
|
224
|
+
return
|
|
225
|
+
var_catalog.add_link(
|
|
226
|
+
Link(
|
|
227
|
+
rel="via",
|
|
228
|
+
target=gcmd_keyword_url,
|
|
229
|
+
title="Description",
|
|
230
|
+
media_type="text/html",
|
|
231
|
+
)
|
|
232
|
+
)
|
|
233
|
+
self.logger.info(
|
|
234
|
+
f'Added GCMD link for {var_metadata.get("variable_id")} '
|
|
235
|
+
f"catalog {gcmd_keyword_url}."
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def build_variable_catalog(self, var_metadata) -> Catalog:
|
|
239
|
+
"""Build an OSC STAC Catalog for the variables in the dataset.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
A pystac.Catalog object.
|
|
243
|
+
"""
|
|
244
|
+
var_id = var_metadata.get("variable_id")
|
|
245
|
+
concepts = [{"id": theme} for theme in self.osc_themes]
|
|
246
|
+
|
|
247
|
+
themes = [
|
|
248
|
+
{
|
|
249
|
+
"scheme": "https://github.com/stac-extensions/osc#theme",
|
|
250
|
+
"concepts": concepts,
|
|
251
|
+
}
|
|
252
|
+
]
|
|
253
|
+
|
|
254
|
+
now_iso = datetime.now(timezone.utc).isoformat()
|
|
255
|
+
|
|
256
|
+
# Create a PySTAC Catalog object
|
|
257
|
+
var_catalog = Catalog(
|
|
258
|
+
id=var_id,
|
|
259
|
+
description=var_metadata.get("description"),
|
|
260
|
+
title=var_id,
|
|
261
|
+
stac_extensions=[
|
|
262
|
+
"https://stac-extensions.github.io/themes/v1.0.0/schema.json"
|
|
263
|
+
],
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
var_catalog.stac_version = "1.0.0"
|
|
267
|
+
var_catalog.extra_fields["updated"] = now_iso
|
|
268
|
+
var_catalog.keywords = []
|
|
269
|
+
|
|
270
|
+
# Add the 'themes' block (from your example JSON)
|
|
271
|
+
var_catalog.extra_fields["themes"] = themes
|
|
272
|
+
|
|
273
|
+
var_catalog.remove_links("root")
|
|
274
|
+
# Add relevant links
|
|
275
|
+
var_catalog.add_link(
|
|
276
|
+
Link(
|
|
277
|
+
rel="root",
|
|
278
|
+
target="../../catalog.json",
|
|
279
|
+
media_type="application/json",
|
|
280
|
+
title="Open Science Catalog",
|
|
281
|
+
)
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# 'child' link: points to the product (or one of its collections) using this variable
|
|
285
|
+
var_catalog.add_link(
|
|
286
|
+
Link(
|
|
287
|
+
rel="child",
|
|
288
|
+
target=f"../../products/{self.collection_id}/collection.json",
|
|
289
|
+
media_type="application/json",
|
|
290
|
+
title=self.collection_id,
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# 'parent' link: back up to the variables overview
|
|
295
|
+
var_catalog.add_link(
|
|
296
|
+
Link(
|
|
297
|
+
rel="parent",
|
|
298
|
+
target="../catalog.json",
|
|
299
|
+
media_type="application/json",
|
|
300
|
+
title="Variables",
|
|
301
|
+
)
|
|
302
|
+
)
|
|
303
|
+
# Add gcmd link for the variable definition
|
|
304
|
+
self._add_gcmd_link_to_var_catalog(var_catalog, var_metadata)
|
|
305
|
+
|
|
306
|
+
self_href = (
|
|
307
|
+
f"https://esa-earthcode.github.io/open-science-catalog-metadata/variables"
|
|
308
|
+
f"/{var_id}/catalog.json"
|
|
309
|
+
)
|
|
310
|
+
# 'self' link: the direct URL where this JSON is hosted
|
|
311
|
+
var_catalog.set_self_href(self_href)
|
|
312
|
+
|
|
313
|
+
return var_catalog
|
|
314
|
+
|
|
315
|
+
def update_existing_variable_catalog(self, var_file_path, var_id) -> Catalog:
|
|
316
|
+
existing_catalog = Catalog.from_file(var_file_path)
|
|
317
|
+
now_iso = datetime.now(timezone.utc).isoformat()
|
|
318
|
+
existing_catalog.extra_fields["updated"] = now_iso
|
|
319
|
+
|
|
320
|
+
# add 'child' link as the product
|
|
321
|
+
existing_catalog.add_link(
|
|
322
|
+
Link(
|
|
323
|
+
rel="child",
|
|
324
|
+
target=f"../../products/{self.collection_id}/collection.json",
|
|
325
|
+
media_type="application/json",
|
|
326
|
+
title=self.collection_id,
|
|
327
|
+
)
|
|
328
|
+
)
|
|
329
|
+
self_href = (
|
|
330
|
+
f"https://esa-earthcode.github.io/open-science-catalog-metadata/variables"
|
|
331
|
+
f"/{var_id}/catalog.json"
|
|
332
|
+
)
|
|
333
|
+
# 'self' link: the direct URL where this JSON is hosted
|
|
334
|
+
existing_catalog.set_self_href(self_href)
|
|
335
|
+
|
|
336
|
+
return existing_catalog
|
|
337
|
+
|
|
338
|
+
def build_dataset_stac_collection(self) -> Collection:
|
|
339
|
+
"""Build an OSC STAC Collection for the dataset.
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
A pystac.Collection object.
|
|
343
|
+
"""
|
|
344
|
+
try:
|
|
345
|
+
spatial_extent = self._get_spatial_extent()
|
|
346
|
+
temporal_extent = self._get_temporal_extent()
|
|
347
|
+
variables = self.get_variable_ids()
|
|
348
|
+
general_metadata = self._get_general_metadata()
|
|
349
|
+
except ValueError as e:
|
|
350
|
+
raise ValueError(f"Metadata extraction failed: {e}")
|
|
351
|
+
|
|
352
|
+
# Build base STAC Collection
|
|
353
|
+
collection = Collection(
|
|
354
|
+
id=self.collection_id,
|
|
355
|
+
description=general_metadata.get("description", "No description provided."),
|
|
356
|
+
extent=Extent(spatial=spatial_extent, temporal=temporal_extent),
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Add OSC extension metadata
|
|
360
|
+
osc_extension = OscExtension.add_to(collection)
|
|
361
|
+
# osc_project and osc_type are fixed constant values
|
|
362
|
+
osc_extension.osc_project = "deep-earth-system-data-lab"
|
|
363
|
+
osc_extension.osc_type = "product"
|
|
364
|
+
osc_extension.osc_status = self.osc_status
|
|
365
|
+
osc_extension.osc_region = self.osc_region
|
|
366
|
+
osc_extension.osc_themes = self.osc_themes
|
|
367
|
+
osc_extension.osc_variables = variables
|
|
368
|
+
osc_extension.osc_missions = self.osc_missions
|
|
369
|
+
if self.cf_params:
|
|
370
|
+
osc_extension.cf_parameter = self.cf_params
|
|
371
|
+
else:
|
|
372
|
+
osc_extension.cf_parameter = [{"name": self.collection_id}]
|
|
373
|
+
|
|
374
|
+
# Add creation and update timestamps for the collection
|
|
375
|
+
now_iso = datetime.now(timezone.utc).isoformat()
|
|
376
|
+
collection.extra_fields["created"] = now_iso
|
|
377
|
+
collection.extra_fields["updated"] = now_iso
|
|
378
|
+
collection.title = self.collection_id
|
|
379
|
+
|
|
380
|
+
# Remove any existing root link and re-add it properly
|
|
381
|
+
collection.remove_links("root")
|
|
382
|
+
collection.add_link(
|
|
383
|
+
Link(
|
|
384
|
+
rel="root",
|
|
385
|
+
target="../../catalog.json",
|
|
386
|
+
media_type="application/json",
|
|
387
|
+
title="Open Science Catalog",
|
|
388
|
+
)
|
|
389
|
+
)
|
|
390
|
+
collection.add_link(Link(rel="via", target=self.access_link, title="Access"))
|
|
391
|
+
if self.documentation_link:
|
|
392
|
+
collection.add_link(
|
|
393
|
+
Link(rel="via", target=self.documentation_link, title="Documentation")
|
|
394
|
+
)
|
|
395
|
+
collection.add_link(
|
|
396
|
+
Link(
|
|
397
|
+
rel="parent",
|
|
398
|
+
target="../catalog.json",
|
|
399
|
+
media_type="application/json",
|
|
400
|
+
title="Products",
|
|
401
|
+
)
|
|
402
|
+
)
|
|
403
|
+
# Add variables ref
|
|
404
|
+
for var in variables:
|
|
405
|
+
collection.add_link(
|
|
406
|
+
Link(
|
|
407
|
+
rel="related",
|
|
408
|
+
target=f"../../varibales/{var}/catalog.json",
|
|
409
|
+
media_type="application/json",
|
|
410
|
+
title="Variable: " + var,
|
|
411
|
+
)
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
self_href = (
|
|
415
|
+
"https://esa-earthcode.github.io/"
|
|
416
|
+
"open-science-catalog-metadata/products/deepesdl/collection.json"
|
|
417
|
+
)
|
|
418
|
+
collection.set_self_href(self_href)
|
|
419
|
+
|
|
420
|
+
# Validate OSC extension fields
|
|
421
|
+
try:
|
|
422
|
+
osc_extension.validate_extension()
|
|
423
|
+
except ValueError as e:
|
|
424
|
+
raise ValueError(f"OSC Extension validation failed: {e}")
|
|
425
|
+
|
|
426
|
+
return collection
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright (c) 2025 by Brockmann Consult GmbH
|
|
4
|
+
# Permissions are hereby granted under the terms of the MIT License:
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import subprocess
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import requests
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GitHubAutomation:
|
|
17
|
+
"""Automates GitHub operations needed to create a Pull Request.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
username: GitHub username.
|
|
21
|
+
token: Personal access token for GitHub.
|
|
22
|
+
repo_owner: Owner of the repository to fork.
|
|
23
|
+
repo_name: Name of the repository to fork.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, username: str, token: str, repo_owner: str, repo_name: str):
|
|
27
|
+
self.username = username
|
|
28
|
+
self.token = token
|
|
29
|
+
self.repo_owner = repo_owner
|
|
30
|
+
self.repo_name = repo_name
|
|
31
|
+
self.base_repo_url = f"https://github.com/{repo_owner}/{repo_name}.git"
|
|
32
|
+
self.fork_repo_url = (
|
|
33
|
+
f"https://{username}:{token}@github.com/{username}/{repo_name}.git"
|
|
34
|
+
)
|
|
35
|
+
self.local_clone_dir = os.path.join(os.path.expanduser("~"), "temp_repo")
|
|
36
|
+
|
|
37
|
+
def fork_repository(self):
|
|
38
|
+
"""Fork the repository to the user's GitHub account."""
|
|
39
|
+
logging.info("Forking repository...")
|
|
40
|
+
url = f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/forks"
|
|
41
|
+
headers = {"Authorization": f"token {self.token}"}
|
|
42
|
+
response = requests.post(url, headers=headers)
|
|
43
|
+
response.raise_for_status()
|
|
44
|
+
logging.info(f"Repository forked to {self.username}/{self.repo_name}")
|
|
45
|
+
|
|
46
|
+
def clone_repository(self):
|
|
47
|
+
"""Clone the forked repository locally."""
|
|
48
|
+
logging.info("Cloning forked repository...")
|
|
49
|
+
try:
|
|
50
|
+
subprocess.run(
|
|
51
|
+
["git", "clone", self.fork_repo_url, self.local_clone_dir], check=True
|
|
52
|
+
)
|
|
53
|
+
os.chdir(self.local_clone_dir)
|
|
54
|
+
except subprocess.CalledProcessError as e:
|
|
55
|
+
raise RuntimeError(f"Failed to clone repository: {e}")
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def create_branch(branch_name: str):
|
|
59
|
+
"""Create a new branch in the local repository."""
|
|
60
|
+
logging.info(f"Creating new branch: {branch_name}...")
|
|
61
|
+
try:
|
|
62
|
+
subprocess.run(["git", "checkout", "-b", branch_name], check=True)
|
|
63
|
+
except subprocess.CalledProcessError as e:
|
|
64
|
+
raise RuntimeError(f"Failed Creating branch: '{branch_name}': {e}")
|
|
65
|
+
|
|
66
|
+
def add_file(self, file_path: str, content):
|
|
67
|
+
"""Add a new file to the local repository."""
|
|
68
|
+
logging.info(f"Adding new file: {file_path}...")
|
|
69
|
+
full_path = Path(self.local_clone_dir) / file_path
|
|
70
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
with open(full_path, "w") as f:
|
|
72
|
+
# Convert content to dictionary if it's a PySTAC object
|
|
73
|
+
if hasattr(content, "to_dict"):
|
|
74
|
+
content = content.to_dict()
|
|
75
|
+
f.write(json.dumps(content, indent=2))
|
|
76
|
+
try:
|
|
77
|
+
subprocess.run(["git", "add", str(full_path)], check=True)
|
|
78
|
+
except subprocess.CalledProcessError as e:
|
|
79
|
+
raise RuntimeError(f"Failed to add file '{file_path}': {e}")
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def commit_and_push(branch_name: str, commit_message: str):
|
|
83
|
+
"""Commit changes and push to the forked repository."""
|
|
84
|
+
logging.info("Committing and pushing changes...")
|
|
85
|
+
try:
|
|
86
|
+
subprocess.run(["git", "commit", "-m", commit_message], check=True)
|
|
87
|
+
subprocess.run(["git", "push", "-u", "origin", branch_name], check=True)
|
|
88
|
+
except subprocess.CalledProcessError as e:
|
|
89
|
+
raise RuntimeError(f"Failed to commit and push: {e}")
|
|
90
|
+
|
|
91
|
+
def create_pull_request(
|
|
92
|
+
self, branch_name: str, pr_title: str, pr_body: str, base_branch: str = "main"
|
|
93
|
+
):
|
|
94
|
+
"""Create a pull request from the forked repository to the base repository."""
|
|
95
|
+
logging.info("Creating a pull request...")
|
|
96
|
+
url = f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/pulls"
|
|
97
|
+
headers = {"Authorization": f"token {self.token}"}
|
|
98
|
+
data = {
|
|
99
|
+
"title": pr_title,
|
|
100
|
+
"head": f"{self.username}:{branch_name}",
|
|
101
|
+
"base": base_branch,
|
|
102
|
+
"body": pr_body,
|
|
103
|
+
}
|
|
104
|
+
response = requests.post(url, headers=headers, json=data)
|
|
105
|
+
response.raise_for_status()
|
|
106
|
+
pr_url = response.json()["html_url"]
|
|
107
|
+
logging.info(f"Pull request created: {pr_url}")
|
|
108
|
+
|
|
109
|
+
def clean_up(self):
|
|
110
|
+
"""Clean up the local cloned repository."""
|
|
111
|
+
logging.info("Cleaning up local repository...")
|
|
112
|
+
os.chdir("..")
|
|
113
|
+
try:
|
|
114
|
+
subprocess.run(["rm", "-rf", self.local_clone_dir])
|
|
115
|
+
except subprocess.CalledProcessError as e:
|
|
116
|
+
raise RuntimeError(f"Failed to clean-up local repository: {e}")
|
|
117
|
+
|
|
118
|
+
def file_exists(self, file_path) -> bool:
|
|
119
|
+
full_path = Path(self.local_clone_dir) / file_path
|
|
120
|
+
exists = os.path.isfile(full_path)
|
|
121
|
+
logging.debug(f"Checking existence of {full_path}: {exists}")
|
|
122
|
+
return exists
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
from xrlint.util.constructible import MappingConstructible
|
|
4
|
+
from xrlint.util.serializable import JsonSerializable
|
|
5
|
+
|
|
6
|
+
from deep_code.constants import OGC_API_RECORD_SPEC
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Contact(MappingConstructible["Contact"], JsonSerializable):
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
name: str,
|
|
13
|
+
organization: str,
|
|
14
|
+
position: str | None = "",
|
|
15
|
+
links: list[dict[str, Any]] | None = None,
|
|
16
|
+
contactInstructions: str | None = "",
|
|
17
|
+
roles: list[str] = None,
|
|
18
|
+
):
|
|
19
|
+
self.name = name
|
|
20
|
+
self.organization = organization
|
|
21
|
+
self.position = position
|
|
22
|
+
self.links = links or []
|
|
23
|
+
self.contactInstructions = contactInstructions
|
|
24
|
+
self.roles = roles or ["principal investigator"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ThemeConcept(MappingConstructible["ThemeConcept"], JsonSerializable):
|
|
28
|
+
def __init__(self, id: str):
|
|
29
|
+
self.id = id
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Theme(MappingConstructible["Theme"], JsonSerializable):
|
|
33
|
+
def __init__(self, concepts: list[ThemeConcept], scheme: str):
|
|
34
|
+
self.concepts = concepts
|
|
35
|
+
self.scheme = scheme
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class JupyterKernelInfo(MappingConstructible["RecordProperties"], JsonSerializable):
|
|
39
|
+
def __init__(self, name: str, python_version: float, env_file: str):
|
|
40
|
+
self.name = name
|
|
41
|
+
self.python_version = python_version
|
|
42
|
+
self.env_file = env_file
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class RecordProperties(MappingConstructible["RecordProperties"], JsonSerializable):
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
created: str,
|
|
49
|
+
type: str,
|
|
50
|
+
title: str,
|
|
51
|
+
description: str,
|
|
52
|
+
jupyter_kernel_info: JupyterKernelInfo,
|
|
53
|
+
updated: str = None,
|
|
54
|
+
contacts: list[Contact] = None,
|
|
55
|
+
themes: list[Theme] = None,
|
|
56
|
+
keywords: list[str] | None = None,
|
|
57
|
+
formats: list[dict] | None = None,
|
|
58
|
+
license: str = None,
|
|
59
|
+
):
|
|
60
|
+
self.created = created
|
|
61
|
+
self.updated = updated
|
|
62
|
+
self.type = type
|
|
63
|
+
self.title = title
|
|
64
|
+
self.description = description
|
|
65
|
+
self.jupyter_kernel_info = jupyter_kernel_info
|
|
66
|
+
self.keywords = keywords or []
|
|
67
|
+
self.contacts = contacts
|
|
68
|
+
self.themes = themes
|
|
69
|
+
self.formats = formats or []
|
|
70
|
+
self.license = license
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class OgcRecord(MappingConstructible["OgcRecord"], JsonSerializable):
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
id: str,
|
|
77
|
+
type: str,
|
|
78
|
+
time: dict,
|
|
79
|
+
properties: RecordProperties,
|
|
80
|
+
links: list[dict],
|
|
81
|
+
linkTemplates: list = [],
|
|
82
|
+
conformsTo: list[str] = None,
|
|
83
|
+
geometry: Optional[Any] = None,
|
|
84
|
+
):
|
|
85
|
+
if conformsTo is None:
|
|
86
|
+
conformsTo = [OGC_API_RECORD_SPEC]
|
|
87
|
+
self.id = id
|
|
88
|
+
self.type = type
|
|
89
|
+
self.conformsTo = conformsTo
|
|
90
|
+
self.time = time
|
|
91
|
+
self.geometry = geometry
|
|
92
|
+
self.properties = properties
|
|
93
|
+
self.linkTemplates = linkTemplates
|
|
94
|
+
self.links = links
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright (c) 2025 by Brockmann Consult GmbH
|
|
4
|
+
# Permissions are hereby granted under the terms of the MIT License:
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
|
|
9
|
+
from deep_code.constants import DEFAULT_THEME_SCHEME
|
|
10
|
+
from deep_code.utils.ogc_api_record import (
|
|
11
|
+
Contact,
|
|
12
|
+
RecordProperties,
|
|
13
|
+
Theme,
|
|
14
|
+
ThemeConcept,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class OSCWorkflowOGCApiRecordGenerator:
|
|
19
|
+
"""Generates OGC API record for a workflow
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def build_contact_objects(contacts_list: list[dict]) -> list[Contact]:
|
|
24
|
+
"""Build a list of Contact objects from a list of contact dictionaries.
|
|
25
|
+
Uses the inherited MappingConstructible logic to parse each dict.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
contacts_list: A list of dictionaries, each containing contact information.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
A list of Contact instances.
|
|
32
|
+
"""
|
|
33
|
+
return [Contact.from_value(cdict) for cdict in contacts_list]
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def build_theme(osc_themes: list[str]) -> Theme:
|
|
37
|
+
"""Convert each string into a ThemeConcept
|
|
38
|
+
"""
|
|
39
|
+
concepts = [ThemeConcept(id=theme_str) for theme_str in osc_themes]
|
|
40
|
+
return Theme(concepts=concepts, scheme=DEFAULT_THEME_SCHEME)
|
|
41
|
+
|
|
42
|
+
def build_record_properties(self, properties, contacts) -> RecordProperties:
|
|
43
|
+
"""Build a RecordProperties object from a list of single-key property dicts
|
|
44
|
+
"""
|
|
45
|
+
now_iso = datetime.now(timezone.utc).isoformat()
|
|
46
|
+
properties.update({"created": now_iso})
|
|
47
|
+
properties.update({"updated": now_iso})
|
|
48
|
+
themes_list = properties.get("themes", [])
|
|
49
|
+
properties.update({"contacts": self.build_contact_objects(contacts)})
|
|
50
|
+
if themes_list:
|
|
51
|
+
theme_obj = self.build_theme(themes_list)
|
|
52
|
+
properties.update({"themes": [theme_obj]})
|
|
53
|
+
properties.setdefault("type", "workflow")
|
|
54
|
+
return RecordProperties.from_value(properties)
|