deep-code 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,547 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) 2025 by Brockmann Consult GmbH
3
+ # Permissions are hereby granted under the terms of the MIT License:
4
+ # https://opensource.org/licenses/MIT.
5
+
6
+ import logging
7
+ import os
8
+ from datetime import datetime, timezone
9
+
10
+ import pandas as pd
11
+ from pystac import Catalog, Collection, Extent, Link, SpatialExtent, TemporalExtent
12
+ from xcube.core.store import new_data_store
13
+
14
+ from deep_code.constants import (
15
+ DEEPESDL_COLLECTION_SELF_HREF,
16
+ OSC_THEME_SCHEME,
17
+ PRODUCT_BASE_CATALOG_SELF_HREF,
18
+ VARIABLE_BASE_CATALOG_SELF_HREF,
19
+ )
20
+ from deep_code.utils.ogc_api_record import Theme, ThemeConcept
21
+ from deep_code.utils.osc_extension import OscExtension
22
+
23
+
24
+ class OscDatasetStacGenerator:
25
+ """Generates OSC STAC Collections for a product from Zarr datasets.
26
+
27
+ Args:
28
+ dataset_id: ID of the Zarr dataset.
29
+ collection_id: Unique identifier for the STAC collection.
30
+ access_link: Public access link to the dataset.
31
+ documentation_link: Link to dataset documentation.
32
+ osc_status: Status of the dataset (e.g., "ongoing").
33
+ osc_region: Geographical region associated with the dataset.
34
+ osc_themes: List of themes related to the dataset (e.g., ["climate"]).
35
+ osc_missions: List of satellite missions associated with the dataset.
36
+ cf_params: CF metadata parameters for the dataset.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ dataset_id: str,
42
+ collection_id: str,
43
+ access_link: str | None = None,
44
+ documentation_link: str | None = None,
45
+ osc_status: str = "ongoing",
46
+ osc_region: str = "Global",
47
+ osc_themes: list[str] | None = None,
48
+ osc_missions: list[str] | None = None,
49
+ cf_params: list[dict[str]] | None = None,
50
+ ):
51
+ self.dataset_id = dataset_id
52
+ self.collection_id = collection_id
53
+ self.access_link = access_link or f"s3://deep-esdl-public/{dataset_id}"
54
+ self.documentation_link = documentation_link
55
+ self.osc_status = osc_status
56
+ self.osc_region = osc_region
57
+ self.osc_themes = osc_themes or []
58
+ self.osc_missions = osc_missions or []
59
+ self.cf_params = cf_params or {}
60
+ self.logger = logging.getLogger(__name__)
61
+ self.dataset = self._open_dataset()
62
+ self.variables_metadata = self.get_variables_metadata()
63
+
64
+ def _open_dataset(self):
65
+ """Open the dataset using a S3 store as a xarray Dataset."""
66
+
67
+ store_configs = [
68
+ {
69
+ "description": "Public store",
70
+ "params": {
71
+ "storage_type": "s3",
72
+ "root": "deep-esdl-public",
73
+ "storage_options": {"anon": True},
74
+ },
75
+ },
76
+ {
77
+ "description": "Authenticated store",
78
+ "params": {
79
+ "storage_type": "s3",
80
+ "root": os.environ.get("S3_USER_STORAGE_BUCKET"),
81
+ "storage_options": {
82
+ "anon": False,
83
+ "key": os.environ.get("S3_USER_STORAGE_KEY"),
84
+ "secret": os.environ.get("S3_USER_STORAGE_SECRET"),
85
+ },
86
+ },
87
+ },
88
+ ]
89
+
90
+ # Iterate through configurations and attempt to open the dataset
91
+ last_exception = None
92
+ tried_configurations = []
93
+ for config in store_configs:
94
+ tried_configurations.append(config["description"])
95
+ try:
96
+ self.logger.info(
97
+ f"Attempting to open dataset with configuration: "
98
+ f"{config['description']}"
99
+ )
100
+ store = new_data_store(
101
+ config["params"]["storage_type"],
102
+ root=config["params"]["root"],
103
+ storage_options=config["params"]["storage_options"],
104
+ )
105
+ dataset = store.open_data(self.dataset_id)
106
+ self.logger.info(
107
+ f"Successfully opened dataset with configuration: "
108
+ f"{config['description']}"
109
+ )
110
+ return dataset
111
+ except Exception as e:
112
+ self.logger.error(
113
+ f"Failed to open dataset with configuration: "
114
+ f"{config['description']}. Error: {e}"
115
+ )
116
+ last_exception = e
117
+
118
+ raise ValueError(
119
+ f"Failed to open Zarr dataset with ID {self.dataset_id}. "
120
+ f"Tried configurations: {', '.join(tried_configurations)}. "
121
+ f"Last error: {last_exception}"
122
+ )
123
+
124
+ def _get_spatial_extent(self) -> SpatialExtent:
125
+ """Extract spatial extent from the dataset."""
126
+ if {"lon", "lat"}.issubset(self.dataset.coords):
127
+ # For regular gridding
128
+ lon_min, lon_max = (
129
+ float(self.dataset.lon.min()),
130
+ float(self.dataset.lon.max()),
131
+ )
132
+ lat_min, lat_max = (
133
+ float(self.dataset.lat.min()),
134
+ float(self.dataset.lat.max()),
135
+ )
136
+ return SpatialExtent([[lon_min, lat_min, lon_max, lat_max]])
137
+ elif {"longitude", "latitude"}.issubset(self.dataset.coords):
138
+ # For regular gridding with 'longitude' and 'latitude'
139
+ lon_min, lon_max = (
140
+ float(self.dataset.longitude.min()),
141
+ float(self.dataset.longitude.max()),
142
+ )
143
+ lat_min, lat_max = (
144
+ float(self.dataset.latitude.min()),
145
+ float(self.dataset.latitude.max()),
146
+ )
147
+ return SpatialExtent([[lon_min, lat_min, lon_max, lat_max]])
148
+ elif {"x", "y"}.issubset(self.dataset.coords):
149
+ # For irregular gridding
150
+ x_min, x_max = (float(self.dataset.x.min()), float(self.dataset.x.max()))
151
+ y_min, y_max = (float(self.dataset.y.min()), float(self.dataset.y.max()))
152
+ return SpatialExtent([[x_min, y_min, x_max, y_max]])
153
+ else:
154
+ raise ValueError(
155
+ "Dataset does not have recognized spatial coordinates "
156
+ "('lon', 'lat' or 'x', 'y')."
157
+ )
158
+
159
+ def _get_temporal_extent(self) -> TemporalExtent:
160
+ """Extract temporal extent from the dataset."""
161
+ if "time" in self.dataset.coords:
162
+ try:
163
+ # Convert the time bounds to datetime objects
164
+ time_min = pd.to_datetime(
165
+ self.dataset.time.min().values
166
+ ).to_pydatetime()
167
+ time_max = pd.to_datetime(
168
+ self.dataset.time.max().values
169
+ ).to_pydatetime()
170
+ return TemporalExtent([[time_min, time_max]])
171
+ except Exception as e:
172
+ raise ValueError(f"Failed to parse temporal extent: {e}")
173
+ else:
174
+ raise ValueError("Dataset does not have a 'time' coordinate.")
175
+
176
+ @staticmethod
177
+ def _normalize_name(name: str | None) -> str | None:
178
+ if name:
179
+ return (name.replace(" ", "-").
180
+ replace("_", "-").lower())
181
+ return None
182
+
183
+ def _get_general_metadata(self) -> dict:
184
+ return {
185
+ "description": self.dataset.attrs.get(
186
+ "description", "No description available."
187
+ )
188
+ }
189
+
190
+ def extract_metadata_for_variable(self, variable_data) -> dict:
191
+ """Extract metadata for a single variable."""
192
+ long_name = variable_data.attrs.get("long_name")
193
+ standard_name = variable_data.attrs.get("standard_name")
194
+ variable_id = standard_name or variable_data.name
195
+ description = variable_data.attrs.get("description", long_name)
196
+ gcmd_keyword_url = variable_data.attrs.get("gcmd_keyword_url")
197
+ return {
198
+ "variable_id": self._normalize_name(variable_id),
199
+ "description": description,
200
+ "gcmd_keyword_url": gcmd_keyword_url,
201
+ }
202
+
203
+ def get_variable_ids(self) -> list[str]:
204
+ """Get variable IDs for all variables in the dataset."""
205
+ variable_ids = list(self.variables_metadata.keys())
206
+ # Remove 'crs' and 'spatial_ref' from the list if they exist, note that
207
+ # spatial_ref will be normalized to spatial-ref in variable_ids and skipped.
208
+ return [var_id for var_id in variable_ids if var_id not in ["crs",
209
+ "spatial-ref"]]
210
+
211
+ def get_variables_metadata(self) -> dict[str, dict]:
212
+ """Extract metadata for all variables in the dataset."""
213
+ variables_metadata = {}
214
+ for var_name, variable in self.dataset.data_vars.items():
215
+ var_metadata = self.extract_metadata_for_variable(variable)
216
+ variables_metadata[var_metadata.get("variable_id")] = var_metadata
217
+ return variables_metadata
218
+
219
+ def _add_gcmd_link_to_var_catalog(
220
+ self, var_catalog: Catalog, var_metadata: dict
221
+ ) -> None:
222
+ """
223
+ Checks for a GCMD keyword URL in var_metadata, adds a 'via' link to the catalog
224
+ pointing to the GCMD Keyword Viewer.
225
+
226
+ Args:
227
+ var_catalog: The PySTAC Catalog to which we want to add the link.
228
+ var_metadata: Dictionary containing metadata about the variable,
229
+ including 'gcmd_keyword_url'.
230
+ """
231
+ gcmd_keyword_url = var_metadata.get("gcmd_keyword_url")
232
+ if not gcmd_keyword_url:
233
+ gcmd_keyword_url = input(
234
+ f"Enter GCMD keyword URL or a similar url for"
235
+ f" {var_metadata.get("variable_id")}: ").strip()
236
+ var_catalog.add_link(
237
+ Link(
238
+ rel="via",
239
+ target=gcmd_keyword_url,
240
+ title="Description",
241
+ media_type="text/html",
242
+ )
243
+ )
244
+ self.logger.info(
245
+ f'Added GCMD link for {var_metadata.get("variable_id")} '
246
+ f"catalog {gcmd_keyword_url}."
247
+ )
248
+
249
+ def build_variable_catalog(self, var_metadata) -> Catalog:
250
+ """Build an OSC STAC Catalog for the variables in the dataset.
251
+
252
+ Returns:
253
+ A pystac.Catalog object.
254
+ """
255
+ var_id = var_metadata.get("variable_id")
256
+ concepts = [{"id": theme} for theme in self.osc_themes]
257
+
258
+ themes = [
259
+ {
260
+ "scheme": "https://github.com/stac-extensions/osc#theme",
261
+ "concepts": concepts,
262
+ }
263
+ ]
264
+
265
+ now_iso = datetime.now(timezone.utc).isoformat()
266
+
267
+ # Create a PySTAC Catalog object
268
+ var_catalog = Catalog(
269
+ id=var_id,
270
+ description=var_metadata.get("description"),
271
+ title=self.format_string(var_id),
272
+ stac_extensions=[
273
+ "https://stac-extensions.github.io/themes/v1.0.0/schema.json"
274
+ ],
275
+ )
276
+
277
+ var_catalog.stac_version = "1.0.0"
278
+ var_catalog.extra_fields["updated"] = now_iso
279
+ var_catalog.keywords = []
280
+
281
+ # Add the 'themes' block (from your example JSON)
282
+ var_catalog.extra_fields["themes"] = themes
283
+
284
+ var_catalog.remove_links("root")
285
+ # Add relevant links
286
+ var_catalog.add_link(
287
+ Link(
288
+ rel="root",
289
+ target="../../catalog.json",
290
+ media_type="application/json",
291
+ title="Open Science Catalog",
292
+ )
293
+ )
294
+
295
+ # 'child' link: points to the product (or one of its collections) using this variable
296
+ var_catalog.add_link(
297
+ Link(
298
+ rel="child",
299
+ target=f"../../products/{self.collection_id}/collection.json",
300
+ media_type="application/json",
301
+ title=self.collection_id,
302
+ )
303
+ )
304
+
305
+ # 'parent' link: back up to the variables overview
306
+ var_catalog.add_link(
307
+ Link(
308
+ rel="parent",
309
+ target="../catalog.json",
310
+ media_type="application/json",
311
+ title="Variables",
312
+ )
313
+ )
314
+ # Add gcmd link for the variable definition
315
+ self._add_gcmd_link_to_var_catalog(var_catalog, var_metadata)
316
+
317
+ self.add_themes_as_related_links_var_catalog(var_catalog)
318
+
319
+ self_href = (
320
+ f"https://esa-earthcode.github.io/open-science-catalog-metadata/variables"
321
+ f"/{var_id}/catalog.json"
322
+ )
323
+ # 'self' link: the direct URL where this JSON is hosted
324
+ var_catalog.set_self_href(self_href)
325
+
326
+ return var_catalog
327
+
328
+ def update_product_base_catalog(self, product_catalog_path) -> Catalog:
329
+ """Link product to base product catalog"""
330
+ product_base_catalog = Catalog.from_file(product_catalog_path)
331
+ product_base_catalog.add_link(
332
+ Link(
333
+ rel="child",
334
+ target=f"./{self.collection_id}/collection.json",
335
+ media_type="application/json",
336
+ title=self.collection_id,
337
+ )
338
+ )
339
+ # 'self' link: the direct URL where this JSON is hosted
340
+ product_base_catalog.set_self_href(PRODUCT_BASE_CATALOG_SELF_HREF)
341
+ return product_base_catalog
342
+
343
+ def update_variable_base_catalog(self, variable_base_catalog_path, variable_ids) \
344
+ -> (
345
+ Catalog):
346
+ """Link product to base product catalog"""
347
+ variable_base_catalog = Catalog.from_file(variable_base_catalog_path)
348
+ for var_id in variable_ids:
349
+ variable_base_catalog.add_link(
350
+ Link(
351
+ rel="child",
352
+ target=f"./{var_id}/catalog.json",
353
+ media_type="application/json",
354
+ title=self.format_string(var_id),
355
+ )
356
+ )
357
+ # 'self' link: the direct URL where this JSON is hosted
358
+ variable_base_catalog.set_self_href(VARIABLE_BASE_CATALOG_SELF_HREF)
359
+ return variable_base_catalog
360
+
361
+ def add_themes_as_related_links_var_catalog(self, var_catalog):
362
+ """Add themes as related links to variable catalog"""
363
+ for theme in self.osc_themes:
364
+ var_catalog.add_link(
365
+ Link(
366
+ rel="related",
367
+ target=f"../../themes/{theme}/catalog.json",
368
+ media_type="application/json",
369
+ title=f"Theme: {self.format_string(theme)}",
370
+ )
371
+ )
372
+
373
+ def update_deepesdl_collection(self, deepesdl_collection_full_path):
374
+ deepesdl_collection = Collection.from_file(deepesdl_collection_full_path)
375
+ deepesdl_collection.add_link(
376
+ Link(
377
+ rel="child",
378
+ target=f"../../products/{self.collection_id}/collection.json",
379
+ media_type="application/json",
380
+ title=self.collection_id,
381
+ )
382
+ )
383
+ # add themes to deepesdl
384
+ for theme in self.osc_themes:
385
+ deepesdl_collection.add_link(
386
+ Link(
387
+ rel="related",
388
+ target=f"../../themes/{theme}/catalog.json",
389
+ media_type="application/json",
390
+ title=f"Theme: {self.format_string(theme)}"
391
+ )
392
+ )
393
+ deepesdl_collection.set_self_href(DEEPESDL_COLLECTION_SELF_HREF)
394
+ return deepesdl_collection
395
+
396
+ def update_existing_variable_catalog(self, var_file_path, var_id) -> Catalog:
397
+ existing_catalog = Catalog.from_file(var_file_path)
398
+ now_iso = datetime.now(timezone.utc).isoformat()
399
+ existing_catalog.extra_fields["updated"] = now_iso
400
+
401
+ # add 'child' link as the product
402
+ existing_catalog.add_link(
403
+ Link(
404
+ rel="child",
405
+ target=f"../../products/{self.collection_id}/collection.json",
406
+ media_type="application/json",
407
+ title=self.collection_id,
408
+ )
409
+ )
410
+ self.add_themes_as_related_links_var_catalog(existing_catalog)
411
+ self_href = (
412
+ f"https://esa-earthcode.github.io/open-science-catalog-metadata/variables"
413
+ f"/{var_id}/catalog.json"
414
+ )
415
+ # 'self' link: the direct URL where this JSON is hosted
416
+ existing_catalog.set_self_href(self_href)
417
+
418
+ return existing_catalog
419
+
420
+ @staticmethod
421
+ def format_string(s: str) -> str:
422
+ # Strip leading/trailing spaces/underscores and replace underscores with spaces
423
+ words = s.strip(" _").replace("_", " ").replace("-", " ").split()
424
+ # Capitalize each word and join them with a space
425
+ return " ".join(word.capitalize() for word in words)
426
+
427
+ @staticmethod
428
+ def build_theme(osc_themes: list[str]) -> Theme:
429
+ """Convert each string into a ThemeConcept
430
+ """
431
+ concepts = [ThemeConcept(id=theme_str) for theme_str in osc_themes]
432
+ return Theme(concepts=concepts, scheme=OSC_THEME_SCHEME)
433
+
434
+ def build_dataset_stac_collection(self) -> Collection:
435
+ """Build an OSC STAC Collection for the dataset.
436
+
437
+ Returns:
438
+ A pystac.Collection object.
439
+ """
440
+ try:
441
+ spatial_extent = self._get_spatial_extent()
442
+ temporal_extent = self._get_temporal_extent()
443
+ variables = self.get_variable_ids()
444
+ general_metadata = self._get_general_metadata()
445
+ except ValueError as e:
446
+ raise ValueError(f"Metadata extraction failed: {e}")
447
+
448
+ # Build base STAC Collection
449
+ collection = Collection(
450
+ id=self.collection_id,
451
+ description=general_metadata.get("description", "No description provided."),
452
+ extent=Extent(spatial=spatial_extent, temporal=temporal_extent),
453
+ )
454
+
455
+ # Add OSC extension metadata
456
+ osc_extension = OscExtension.add_to(collection)
457
+ # osc_project and osc_type are fixed constant values
458
+ osc_extension.osc_project = "deep-earth-system-data-lab"
459
+ osc_extension.osc_type = "product"
460
+ osc_extension.osc_status = self.osc_status
461
+ osc_extension.osc_region = self.osc_region
462
+ osc_extension.osc_variables = variables
463
+ osc_extension.osc_missions = self.osc_missions
464
+ if self.cf_params:
465
+ osc_extension.cf_parameter = self.cf_params
466
+ else:
467
+ osc_extension.cf_parameter = [{"name": self.collection_id}]
468
+
469
+ # Add creation and update timestamps for the collection
470
+ now_iso = datetime.now(timezone.utc).isoformat()
471
+ collection.extra_fields["created"] = now_iso
472
+ collection.extra_fields["updated"] = now_iso
473
+ collection.title = self.collection_id
474
+
475
+ # Remove any existing root link and re-add it properly
476
+ collection.remove_links("root")
477
+ collection.add_link(
478
+ Link(
479
+ rel="root",
480
+ target="../../catalog.json",
481
+ media_type="application/json",
482
+ title="Open Science Catalog",
483
+ )
484
+ )
485
+ collection.add_link(Link(rel="via", target=self.access_link, title="Access"))
486
+ if self.documentation_link:
487
+ collection.add_link(
488
+ Link(rel="via", target=self.documentation_link, title="Documentation")
489
+ )
490
+ collection.add_link(
491
+ Link(
492
+ rel="parent",
493
+ target="../catalog.json",
494
+ media_type="application/json",
495
+ title="Products",
496
+ )
497
+ )
498
+
499
+ # Add variables ref
500
+ for var in variables:
501
+ collection.add_link(
502
+ Link(
503
+ rel="related",
504
+ target=f"../../variables/{var}/catalog.json",
505
+ media_type="application/json",
506
+ title="Variable: " + self.format_string(var),
507
+ )
508
+ )
509
+
510
+ self_href = (
511
+ "https://esa-earthcode.github.io/"
512
+ f"open-science-catalog-metadata/products/{self.collection_id}/collection.json"
513
+ )
514
+ collection.set_self_href(self_href)
515
+
516
+ # align with themes instead of osc:themes
517
+ if self.osc_themes:
518
+ theme_obj = self.build_theme(self.osc_themes)
519
+ collection.extra_fields["themes"] = [theme_obj]
520
+
521
+ for theme in self.osc_themes:
522
+ formatted_theme = self.format_string(theme)
523
+ collection.add_link(
524
+ Link(
525
+ rel="related",
526
+ target=f"../../themes/{theme}/catalog.json",
527
+ media_type="application/json",
528
+ title=f"Theme: {formatted_theme}",
529
+ )
530
+ )
531
+
532
+ collection.add_link(
533
+ Link(
534
+ rel="related",
535
+ target="../../projects/deep-earth-system-data-lab/collection.json",
536
+ media_type="application/json",
537
+ title="Project: DeepESDL"
538
+ )
539
+ )
540
+
541
+ # Validate OSC extension fields
542
+ try:
543
+ osc_extension.validate_extension()
544
+ except ValueError as e:
545
+ raise ValueError(f"OSC Extension validation failed: {e}")
546
+
547
+ return collection
@@ -0,0 +1,145 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright (c) 2025 by Brockmann Consult GmbH
4
+ # Permissions are hereby granted under the terms of the MIT License:
5
+ # https://opensource.org/licenses/MIT.
6
+
7
+ import json
8
+ import logging
9
+ import os
10
+ import subprocess
11
+ from pathlib import Path
12
+
13
+ import requests
14
+
15
+ from deep_code.utils.helper import serialize
16
+
17
+
18
+ class GitHubAutomation:
19
+ """Automates GitHub operations needed to create a Pull Request.
20
+
21
+ Args:
22
+ username: GitHub username.
23
+ token: Personal access token for GitHub.
24
+ repo_owner: Owner of the repository to fork.
25
+ repo_name: Name of the repository to fork.
26
+ """
27
+
28
+ def __init__(self, username: str, token: str, repo_owner: str, repo_name: str):
29
+ self.username = username
30
+ self.token = token
31
+ self.repo_owner = repo_owner
32
+ self.repo_name = repo_name
33
+ self.base_repo_url = f"https://github.com/{repo_owner}/{repo_name}.git"
34
+ self.fork_repo_url = (
35
+ f"https://{username}:{token}@github.com/{username}/{repo_name}.git"
36
+ )
37
+ self.local_clone_dir = os.path.join(os.path.expanduser("~"), "temp_repo")
38
+
39
+ def fork_repository(self):
40
+ """Fork the repository to the user's GitHub account."""
41
+ logging.info("Forking repository...")
42
+ url = f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/forks"
43
+ headers = {"Authorization": f"token {self.token}"}
44
+ response = requests.post(url, headers=headers)
45
+ response.raise_for_status()
46
+ logging.info(f"Repository forked to {self.username}/{self.repo_name}")
47
+
48
+ def clone_sync_repository(self):
49
+ """Clone the forked repository locally if it doesn't exist, or pull updates if it does."""
50
+ logging.info("Checking local repository...")
51
+ if not os.path.exists(self.local_clone_dir):
52
+ logging.info("Cloning forked repository...")
53
+ try:
54
+ subprocess.run(
55
+ ["git", "clone", self.fork_repo_url, self.local_clone_dir],
56
+ check=True,
57
+ )
58
+ logging.info(f"Repository cloned to {self.local_clone_dir}")
59
+ except subprocess.CalledProcessError as e:
60
+ raise RuntimeError(f"Failed to clone repository: {e}")
61
+ else:
62
+ logging.info("Local repository already exists. Pulling latest changes...")
63
+ try:
64
+ os.chdir(self.local_clone_dir)
65
+ subprocess.run(["git", "pull"], check=True)
66
+ logging.info("Repository updated with latest changes.")
67
+ except subprocess.CalledProcessError as e:
68
+ raise RuntimeError(f"Failed to pull latest changes: {e}")
69
+
70
+ def create_branch(self, branch_name: str):
71
+ """Create a new branch in the local repository."""
72
+ logging.info(f"Creating new branch: {branch_name}...")
73
+ try:
74
+ os.chdir(self.local_clone_dir)
75
+ subprocess.run(["git", "checkout", "-b", branch_name], check=True)
76
+ except subprocess.CalledProcessError as e:
77
+ raise RuntimeError(f"Failed Creating branch: '{branch_name}': {e}")
78
+
79
+ def add_file(self, file_path: str, content):
80
+ """Add a new file to the local repository."""
81
+ logging.info(f"Adding new file: {file_path}...")
82
+ os.chdir(self.local_clone_dir)
83
+ full_path = Path(self.local_clone_dir) / file_path
84
+ full_path.parent.mkdir(parents=True, exist_ok=True)
85
+ # Ensure content is serializable
86
+ if hasattr(content, "to_dict"):
87
+ content = content.to_dict()
88
+ if not isinstance(content, (dict, list, str, int, float, bool, type(None))):
89
+ raise TypeError(f"Cannot serialize content of type {type(content)}")
90
+ try:
91
+ json_content = json.dumps(
92
+ content, indent=2, ensure_ascii=False, default=serialize
93
+ )
94
+ except TypeError as e:
95
+ raise RuntimeError(f"JSON serialization failed: {e}")
96
+ with open(full_path, "w", encoding="utf-8") as f:
97
+ f.write(json_content)
98
+ try:
99
+ subprocess.run(["git", "add", str(full_path)], check=True)
100
+ except subprocess.CalledProcessError as e:
101
+ raise RuntimeError(f"Failed to add file '{file_path}': {e}")
102
+
103
+ def commit_and_push(self, branch_name: str, commit_message: str):
104
+ """Commit changes and push to the forked repository."""
105
+ logging.info("Committing and pushing changes...")
106
+ os.chdir(self.local_clone_dir)
107
+ try:
108
+ subprocess.run(["git", "commit", "-m", commit_message], check=True)
109
+ subprocess.run(["git", "push", "-u", "origin", branch_name], check=True)
110
+ except subprocess.CalledProcessError as e:
111
+ raise RuntimeError(f"Failed to commit and push: {e}")
112
+
113
+ def create_pull_request(
114
+ self, branch_name: str, pr_title: str, pr_body: str, base_branch: str = "main"
115
+ ):
116
+ """Create a pull request from the forked repository to the base repository."""
117
+ logging.info("Creating a pull request...")
118
+ os.chdir(self.local_clone_dir)
119
+ url = f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/pulls"
120
+ headers = {"Authorization": f"token {self.token}"}
121
+ data = {
122
+ "title": pr_title,
123
+ "head": f"{self.username}:{branch_name}",
124
+ "base": base_branch,
125
+ "body": pr_body,
126
+ }
127
+ response = requests.post(url, headers=headers, json=data)
128
+ response.raise_for_status()
129
+ pr_url = response.json()["html_url"]
130
+ logging.info(f"Pull request created: {pr_url}")
131
+
132
+ def clean_up(self):
133
+ """Clean up the local cloned repository."""
134
+ logging.info("Cleaning up local repository...")
135
+ os.chdir("..")
136
+ try:
137
+ subprocess.run(["rm", "-rf", self.local_clone_dir])
138
+ except subprocess.CalledProcessError as e:
139
+ raise RuntimeError(f"Failed to clean-up local repository: {e}")
140
+
141
+ def file_exists(self, file_path) -> bool:
142
+ full_path = Path(self.local_clone_dir) / file_path
143
+ exists = os.path.isfile(full_path)
144
+ logging.debug(f"Checking existence of {full_path}: {exists}")
145
+ return exists