gluekit 1.0.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gluekit/__init__.py +7 -0
- gluekit/app.py +0 -0
- gluekit/cli.py +64 -0
- gluekit/commands/__init__.py +1 -0
- gluekit/commands/add.py +455 -0
- gluekit/commands/build.py +816 -0
- gluekit/commands/checkout.py +114 -0
- gluekit/commands/clone.py +516 -0
- gluekit/commands/config_commands.py +180 -0
- gluekit/commands/constants.py +47 -0
- gluekit/commands/convert.py +336 -0
- gluekit/commands/edit.py +1104 -0
- gluekit/commands/helpers.py +1068 -0
- gluekit/commands/init.py +798 -0
- gluekit/commands/list.py +16 -0
- gluekit/commands/local_commands.py +680 -0
- gluekit/commands/pull.py +374 -0
- gluekit/commands/push.py +251 -0
- gluekit/commands/remove.py +161 -0
- gluekit/commands/run.py +126 -0
- gluekit/commands/status.py +97 -0
- gluekit/commands/sync.py +97 -0
- gluekit/commands/update.py +104 -0
- gluekit/job_mgmt/__init__.py +0 -0
- gluekit/job_mgmt/glue_jobs.py +1323 -0
- gluekit/job_mgmt/magics.py +122 -0
- gluekit/job_mgmt/resources/__init__.py +0 -0
- gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
- gluekit/job_mgmt/resources/magic_map.json +83 -0
- gluekit/job_mgmt/schema.py +165 -0
- gluekit/local/__init__.py +6 -0
- gluekit/local/awsglue/__init__.py +1 -0
- gluekit/local/awsglue/context.py +30 -0
- gluekit/local/awsglue/job.py +9 -0
- gluekit/local/awsglue/utils.py +17 -0
- gluekit/local/local.py +434 -0
- gluekit/local/local_fixtures.py +337 -0
- gluekit/local/pyspark/__init__.py +7 -0
- gluekit/local/pyspark/context.py +31 -0
- gluekit/local/pyspark/sql/__init__.py +6 -0
- gluekit/local/pyspark/sql/session.py +29 -0
- gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
- gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
- gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
- gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
- gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from .constants import GLUE_SET_FILE
|
|
9
|
+
from .helpers import (
|
|
10
|
+
_apply_saved_params_to_config_path,
|
|
11
|
+
_clear_checked_out_jobs,
|
|
12
|
+
_examples_epilog,
|
|
13
|
+
_load_config_index,
|
|
14
|
+
_resolve_single_job_name,
|
|
15
|
+
_resolve_checkout_job_name,
|
|
16
|
+
_save_checked_out_jobs,
|
|
17
|
+
_set_checked_out_local_setup,
|
|
18
|
+
)
|
|
19
|
+
from ..cli import app
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@app.command(
|
|
23
|
+
"checkout",
|
|
24
|
+
epilog=_examples_epilog(
|
|
25
|
+
"gluekit checkout my-job",
|
|
26
|
+
"gluekit checkout --local",
|
|
27
|
+
"gluekit checkout --clear",
|
|
28
|
+
),
|
|
29
|
+
)
|
|
30
|
+
def checkout_job(
|
|
31
|
+
job_name: Optional[str] = typer.Argument(
|
|
32
|
+
None,
|
|
33
|
+
help="Glue job name to check out for later commands.",
|
|
34
|
+
),
|
|
35
|
+
config_dir: Path = typer.Option(
|
|
36
|
+
Path("glue/configs"),
|
|
37
|
+
"--config-dir",
|
|
38
|
+
help="Directory containing Glue job config files.",
|
|
39
|
+
),
|
|
40
|
+
profile: Optional[str] = typer.Option(
|
|
41
|
+
None,
|
|
42
|
+
"--profile",
|
|
43
|
+
"-p",
|
|
44
|
+
help=(
|
|
45
|
+
"AWS CLI credential profile name to store as the active gluekit profile "
|
|
46
|
+
"scope for saved params."
|
|
47
|
+
),
|
|
48
|
+
),
|
|
49
|
+
dry_run: bool = typer.Option(
|
|
50
|
+
False,
|
|
51
|
+
"--dry-run",
|
|
52
|
+
help="Show gluekit profile-scoped config updates without writing files.",
|
|
53
|
+
),
|
|
54
|
+
clear: bool = typer.Option(
|
|
55
|
+
False,
|
|
56
|
+
"--clear",
|
|
57
|
+
help="Remove the active checkout selection without deleting saved params or local files.",
|
|
58
|
+
),
|
|
59
|
+
local: bool = typer.Option(
|
|
60
|
+
False,
|
|
61
|
+
"--local",
|
|
62
|
+
help="Check out the single local development settings file in .gluekit/local.json.",
|
|
63
|
+
),
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Replace the active local checkout selection."""
|
|
66
|
+
if clear:
|
|
67
|
+
if job_name or local:
|
|
68
|
+
raise typer.BadParameter("Use either <job-name>, --local, or --clear.")
|
|
69
|
+
if dry_run:
|
|
70
|
+
typer.echo(f"Would clear active checkout selection in {GLUE_SET_FILE}.")
|
|
71
|
+
return
|
|
72
|
+
_clear_checked_out_jobs()
|
|
73
|
+
typer.echo(f"Cleared active checkout selection in {GLUE_SET_FILE}.")
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
if local:
|
|
77
|
+
if job_name:
|
|
78
|
+
raise typer.BadParameter("Use either <job-name> or --local, not both.")
|
|
79
|
+
if dry_run:
|
|
80
|
+
typer.echo(f"Would check out local settings in {GLUE_SET_FILE}.")
|
|
81
|
+
return
|
|
82
|
+
_set_checked_out_local_setup("local")
|
|
83
|
+
typer.echo(f"Checked out local settings in {GLUE_SET_FILE}.")
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
if not job_name and profile:
|
|
87
|
+
job_name = _resolve_single_job_name(None, "checkout --profile")
|
|
88
|
+
|
|
89
|
+
if not job_name:
|
|
90
|
+
raise typer.BadParameter("Provide <job-name>, use --local, or use --clear.")
|
|
91
|
+
|
|
92
|
+
resolved_job_name, source = _resolve_checkout_job_name(
|
|
93
|
+
job_name, config_dir=config_dir
|
|
94
|
+
)
|
|
95
|
+
if not dry_run:
|
|
96
|
+
_save_checked_out_jobs(
|
|
97
|
+
[resolved_job_name], resolved_job_name, source=source, profile=profile
|
|
98
|
+
)
|
|
99
|
+
action = "Would check out" if dry_run else "Checked out"
|
|
100
|
+
typer.echo(f"{action} 1 job(s) from {source} in {GLUE_SET_FILE}:")
|
|
101
|
+
typer.echo(f"- {resolved_job_name}")
|
|
102
|
+
if profile:
|
|
103
|
+
typer.echo(f"Active gluekit profile scope: {profile}")
|
|
104
|
+
|
|
105
|
+
config_index = _load_config_index(config_dir)
|
|
106
|
+
config_entry = config_index.get(resolved_job_name)
|
|
107
|
+
if config_entry:
|
|
108
|
+
_apply_saved_params_to_config_path(
|
|
109
|
+
config_path=config_entry["config_path"],
|
|
110
|
+
config_data=config_entry["config"],
|
|
111
|
+
job_name=resolved_job_name,
|
|
112
|
+
profile=profile,
|
|
113
|
+
dry_run=dry_run,
|
|
114
|
+
)
|
|
@@ -0,0 +1,516 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import copy
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
import tarfile
|
|
10
|
+
import uuid
|
|
11
|
+
import zipfile
|
|
12
|
+
from collections.abc import Mapping
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from email.parser import Parser
|
|
15
|
+
from fnmatch import fnmatch
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from tempfile import TemporaryDirectory
|
|
18
|
+
from typing import Any, Optional
|
|
19
|
+
|
|
20
|
+
import typer
|
|
21
|
+
from slugify import slugify
|
|
22
|
+
|
|
23
|
+
from ..job_mgmt.glue_jobs import (
|
|
24
|
+
download_glue_job_files,
|
|
25
|
+
list_glue_jobs,
|
|
26
|
+
normalize_glue_config_data,
|
|
27
|
+
convert_script_to_notebook,
|
|
28
|
+
convert_notebook_to_script,
|
|
29
|
+
_resolve_notebook_path,
|
|
30
|
+
upload_glue_job_files_from_config,
|
|
31
|
+
)
|
|
32
|
+
from ..job_mgmt.magics import build_magic_cell_sources as _build_magic_cell_sources
|
|
33
|
+
|
|
34
|
+
from .constants import *
|
|
35
|
+
from .helpers import *
|
|
36
|
+
from .edit import _parse_csv_list
|
|
37
|
+
from ..cli import app, glue_config_app
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _rewrite_clone_path_value(
|
|
41
|
+
value: str,
|
|
42
|
+
*,
|
|
43
|
+
source_job: str,
|
|
44
|
+
target_job: str,
|
|
45
|
+
fallback_suffixes: tuple[str, ...] = (),
|
|
46
|
+
) -> str:
|
|
47
|
+
text = value.strip()
|
|
48
|
+
if not text:
|
|
49
|
+
return value
|
|
50
|
+
|
|
51
|
+
source_slug = slugify(source_job)
|
|
52
|
+
target_slug = slugify(target_job)
|
|
53
|
+
rewritten = text
|
|
54
|
+
replacements = (
|
|
55
|
+
(source_job, target_job),
|
|
56
|
+
(source_slug, target_slug),
|
|
57
|
+
)
|
|
58
|
+
for old_value, new_value in replacements:
|
|
59
|
+
if old_value and old_value in rewritten:
|
|
60
|
+
rewritten = rewritten.replace(old_value, new_value)
|
|
61
|
+
break
|
|
62
|
+
else:
|
|
63
|
+
for suffix in fallback_suffixes:
|
|
64
|
+
if not rewritten.endswith(suffix):
|
|
65
|
+
continue
|
|
66
|
+
head, _tail = (
|
|
67
|
+
rewritten.rsplit("/", 1) if "/" in rewritten else ("", rewritten)
|
|
68
|
+
)
|
|
69
|
+
candidate = f"{target_slug}{suffix}"
|
|
70
|
+
rewritten = f"{head}/{candidate}" if head else candidate
|
|
71
|
+
break
|
|
72
|
+
|
|
73
|
+
return rewritten
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _should_rewrite_cloned_dependency(local_path: Path, source_job: str) -> bool:
|
|
77
|
+
normalized_parts = {part.lower() for part in local_path.parts}
|
|
78
|
+
source_slug = slugify(source_job).lower()
|
|
79
|
+
source_name = source_job.lower()
|
|
80
|
+
if source_slug in normalized_parts or source_name in normalized_parts:
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
stem = local_path.stem.lower()
|
|
84
|
+
return source_slug in stem or source_name in stem
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _prepare_clone_destination(path: Path, *, force: bool) -> None:
|
|
88
|
+
if not path.exists():
|
|
89
|
+
return
|
|
90
|
+
if not force:
|
|
91
|
+
raise typer.BadParameter(
|
|
92
|
+
f"Refusing to overwrite existing path without --force: {path}"
|
|
93
|
+
)
|
|
94
|
+
if path.is_dir():
|
|
95
|
+
shutil.rmtree(path)
|
|
96
|
+
else:
|
|
97
|
+
path.unlink()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _copy_cloned_artifact(source_path: Path, target_path: Path, *, force: bool) -> None:
|
|
101
|
+
_prepare_clone_destination(target_path, force=force)
|
|
102
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
if source_path.is_dir():
|
|
104
|
+
shutil.copytree(source_path, target_path)
|
|
105
|
+
else:
|
|
106
|
+
shutil.copy2(source_path, target_path)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _rewrite_extra_py_files(
|
|
110
|
+
existing_value: Optional[str],
|
|
111
|
+
s3_rewrites: dict[str, str],
|
|
112
|
+
) -> Optional[str]:
|
|
113
|
+
if existing_value is None:
|
|
114
|
+
return None
|
|
115
|
+
updated_items = [
|
|
116
|
+
s3_rewrites.get(item, item) for item in _parse_csv_list(existing_value)
|
|
117
|
+
]
|
|
118
|
+
return ",".join(_format_csv_item(item) for item in updated_items)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _rewrite_source_control_file_mappings(
|
|
122
|
+
entries: Any,
|
|
123
|
+
*,
|
|
124
|
+
field_name: str,
|
|
125
|
+
source_job: str,
|
|
126
|
+
target_job: str,
|
|
127
|
+
rewrite_local_paths: bool,
|
|
128
|
+
target_script_location: Any,
|
|
129
|
+
local_script_path_for_s3: Path,
|
|
130
|
+
) -> dict[str, str]:
|
|
131
|
+
if entries is None:
|
|
132
|
+
return {}
|
|
133
|
+
if not isinstance(entries, list):
|
|
134
|
+
raise typer.BadParameter(f"SourceControlDetails.{field_name} must be a list.")
|
|
135
|
+
|
|
136
|
+
s3_rewrites: dict[str, str] = {}
|
|
137
|
+
for entry in entries:
|
|
138
|
+
if not isinstance(entry, dict):
|
|
139
|
+
raise typer.BadParameter(f"{field_name} entries must be objects.")
|
|
140
|
+
local_value = entry.get("LocalPath")
|
|
141
|
+
s3_value = entry.get("S3Path")
|
|
142
|
+
if not isinstance(local_value, str) or not isinstance(s3_value, str):
|
|
143
|
+
raise typer.BadParameter(
|
|
144
|
+
f"{field_name} entries must include LocalPath and S3Path."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
source_local_path = Path(local_value)
|
|
148
|
+
target_local_path = source_local_path
|
|
149
|
+
if rewrite_local_paths and _should_rewrite_cloned_dependency(
|
|
150
|
+
source_local_path, source_job
|
|
151
|
+
):
|
|
152
|
+
target_local_value = _rewrite_clone_path_value(
|
|
153
|
+
local_value,
|
|
154
|
+
source_job=source_job,
|
|
155
|
+
target_job=target_job,
|
|
156
|
+
)
|
|
157
|
+
target_local_path = Path(target_local_value)
|
|
158
|
+
entry["LocalPath"] = target_local_path.as_posix()
|
|
159
|
+
|
|
160
|
+
if not rewrite_local_paths:
|
|
161
|
+
target_s3_path = _rewrite_clone_path_value(
|
|
162
|
+
s3_value,
|
|
163
|
+
source_job=source_job,
|
|
164
|
+
target_job=target_job,
|
|
165
|
+
)
|
|
166
|
+
elif isinstance(target_script_location, str) and target_script_location.strip():
|
|
167
|
+
target_s3_path = _derive_s3_target(
|
|
168
|
+
target_local_path,
|
|
169
|
+
target_job,
|
|
170
|
+
target_script_location,
|
|
171
|
+
local_script_path_for_s3,
|
|
172
|
+
)
|
|
173
|
+
if target_local_path.is_dir() and not target_s3_path.endswith(".zip"):
|
|
174
|
+
target_s3_path = f"{target_s3_path}.zip"
|
|
175
|
+
else:
|
|
176
|
+
target_s3_path = _rewrite_clone_path_value(
|
|
177
|
+
s3_value,
|
|
178
|
+
source_job=source_job,
|
|
179
|
+
target_job=target_job,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
entry["S3Path"] = target_s3_path
|
|
183
|
+
s3_rewrites[s3_value] = target_s3_path
|
|
184
|
+
|
|
185
|
+
return s3_rewrites
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _rewrite_glue_job_config(
|
|
189
|
+
source_config: dict[str, Any],
|
|
190
|
+
*,
|
|
191
|
+
source_job: str,
|
|
192
|
+
target_job: str,
|
|
193
|
+
rewrite_local_paths: bool,
|
|
194
|
+
) -> dict[str, Any]:
|
|
195
|
+
renamed_config = copy.deepcopy(source_config)
|
|
196
|
+
renamed_config["Name"] = target_job
|
|
197
|
+
renamed_config.pop("LastModifiedOn", None)
|
|
198
|
+
|
|
199
|
+
source_sc = source_config.get("SourceControlDetails", {}) or {}
|
|
200
|
+
if not isinstance(source_sc, dict):
|
|
201
|
+
source_sc = {}
|
|
202
|
+
target_sc = renamed_config.setdefault("SourceControlDetails", {})
|
|
203
|
+
if not isinstance(target_sc, dict):
|
|
204
|
+
raise typer.BadParameter("SourceControlDetails must be an object.")
|
|
205
|
+
|
|
206
|
+
source_command = source_config.get("Command", {}) or {}
|
|
207
|
+
if not isinstance(source_command, dict):
|
|
208
|
+
source_command = {}
|
|
209
|
+
target_command = renamed_config.setdefault("Command", {})
|
|
210
|
+
if not isinstance(target_command, dict):
|
|
211
|
+
raise typer.BadParameter("Command must be an object.")
|
|
212
|
+
|
|
213
|
+
source_local_script_path = Path(
|
|
214
|
+
source_sc.get("ScriptLocation")
|
|
215
|
+
or source_sc.get("LocalPath")
|
|
216
|
+
or f"glue/scripts/{slugify(source_job)}.py"
|
|
217
|
+
)
|
|
218
|
+
local_script_path_for_s3 = source_local_script_path
|
|
219
|
+
|
|
220
|
+
if rewrite_local_paths:
|
|
221
|
+
target_local_script_value = _rewrite_clone_path_value(
|
|
222
|
+
source_sc.get("ScriptLocation")
|
|
223
|
+
or source_sc.get("LocalPath")
|
|
224
|
+
or f"glue/scripts/{slugify(target_job)}.py",
|
|
225
|
+
source_job=source_job,
|
|
226
|
+
target_job=target_job,
|
|
227
|
+
fallback_suffixes=(".py",),
|
|
228
|
+
)
|
|
229
|
+
local_script_path_for_s3 = Path(target_local_script_value)
|
|
230
|
+
|
|
231
|
+
if isinstance(source_sc.get("ScriptLocation"), str):
|
|
232
|
+
target_sc["ScriptLocation"] = _rewrite_clone_path_value(
|
|
233
|
+
source_sc["ScriptLocation"],
|
|
234
|
+
source_job=source_job,
|
|
235
|
+
target_job=target_job,
|
|
236
|
+
fallback_suffixes=(".py",),
|
|
237
|
+
)
|
|
238
|
+
if isinstance(source_sc.get("LocalPath"), str):
|
|
239
|
+
target_sc["LocalPath"] = _rewrite_clone_path_value(
|
|
240
|
+
source_sc["LocalPath"],
|
|
241
|
+
source_job=source_job,
|
|
242
|
+
target_job=target_job,
|
|
243
|
+
fallback_suffixes=(".py",),
|
|
244
|
+
)
|
|
245
|
+
if isinstance(source_sc.get("NotebookLocation"), str):
|
|
246
|
+
target_sc["NotebookLocation"] = _rewrite_clone_path_value(
|
|
247
|
+
source_sc["NotebookLocation"],
|
|
248
|
+
source_job=source_job,
|
|
249
|
+
target_job=target_job,
|
|
250
|
+
fallback_suffixes=(".ipynb",),
|
|
251
|
+
)
|
|
252
|
+
if isinstance(source_sc.get("NotebookPath"), str):
|
|
253
|
+
target_sc["NotebookPath"] = _rewrite_clone_path_value(
|
|
254
|
+
source_sc["NotebookPath"],
|
|
255
|
+
source_job=source_job,
|
|
256
|
+
target_job=target_job,
|
|
257
|
+
fallback_suffixes=(".ipynb",),
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
if isinstance(source_command.get("ScriptLocation"), str):
|
|
261
|
+
target_command["ScriptLocation"] = _rewrite_clone_path_value(
|
|
262
|
+
source_command["ScriptLocation"],
|
|
263
|
+
source_job=source_job,
|
|
264
|
+
target_job=target_job,
|
|
265
|
+
fallback_suffixes=(".py",),
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
s3_rewrites: dict[str, str] = {}
|
|
269
|
+
target_script_location = target_command.get("ScriptLocation")
|
|
270
|
+
for field_name in ("AdditionalPythonFiles", "ExtraPyFiles"):
|
|
271
|
+
s3_rewrites.update(
|
|
272
|
+
_rewrite_source_control_file_mappings(
|
|
273
|
+
target_sc.get(field_name),
|
|
274
|
+
field_name=field_name,
|
|
275
|
+
source_job=source_job,
|
|
276
|
+
target_job=target_job,
|
|
277
|
+
rewrite_local_paths=rewrite_local_paths,
|
|
278
|
+
target_script_location=target_script_location,
|
|
279
|
+
local_script_path_for_s3=local_script_path_for_s3,
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
default_args = renamed_config.get("DefaultArguments")
|
|
284
|
+
if isinstance(default_args, dict) and s3_rewrites:
|
|
285
|
+
rewritten_extra = _rewrite_extra_py_files(
|
|
286
|
+
default_args.get("--extra-py-files"),
|
|
287
|
+
s3_rewrites,
|
|
288
|
+
)
|
|
289
|
+
if rewritten_extra is not None:
|
|
290
|
+
default_args["--extra-py-files"] = rewritten_extra
|
|
291
|
+
|
|
292
|
+
return renamed_config
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _collect_source_control_file_local_renames(
|
|
296
|
+
source_config: dict[str, Any],
|
|
297
|
+
target_config: dict[str, Any],
|
|
298
|
+
) -> list[tuple[Path, Path]]:
|
|
299
|
+
renames: list[tuple[Path, Path]] = []
|
|
300
|
+
source_sc = source_config.get("SourceControlDetails", {}) or {}
|
|
301
|
+
target_sc = target_config.get("SourceControlDetails", {}) or {}
|
|
302
|
+
for field_name in ("AdditionalPythonFiles", "ExtraPyFiles"):
|
|
303
|
+
source_entries = source_sc.get(field_name) or []
|
|
304
|
+
target_entries = target_sc.get(field_name) or []
|
|
305
|
+
if not isinstance(source_entries, list) or not isinstance(target_entries, list):
|
|
306
|
+
continue
|
|
307
|
+
for source_entry, target_entry in zip(source_entries, target_entries):
|
|
308
|
+
if not isinstance(source_entry, dict) or not isinstance(target_entry, dict):
|
|
309
|
+
continue
|
|
310
|
+
source_local = source_entry.get("LocalPath")
|
|
311
|
+
target_local = target_entry.get("LocalPath")
|
|
312
|
+
if not isinstance(source_local, str) or not isinstance(target_local, str):
|
|
313
|
+
continue
|
|
314
|
+
if source_local != target_local:
|
|
315
|
+
renames.append((Path(source_local), Path(target_local)))
|
|
316
|
+
return renames
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _move_local_artifact(source_path: Path, target_path: Path) -> None:
|
|
320
|
+
if not source_path.exists() or source_path == target_path:
|
|
321
|
+
return
|
|
322
|
+
_prepare_clone_destination(target_path, force=True)
|
|
323
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
324
|
+
shutil.move(str(source_path), str(target_path))
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _build_glue_clone_plan(
|
|
328
|
+
source_config: dict[str, Any],
|
|
329
|
+
*,
|
|
330
|
+
source_job: str,
|
|
331
|
+
target_job: str,
|
|
332
|
+
) -> tuple[dict[str, Any], list[tuple[Path, Path, str]], list[str]]:
|
|
333
|
+
cloned_config = _rewrite_glue_job_config(
|
|
334
|
+
source_config,
|
|
335
|
+
source_job=source_job,
|
|
336
|
+
target_job=target_job,
|
|
337
|
+
rewrite_local_paths=True,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
source_sc = source_config.get("SourceControlDetails", {}) or {}
|
|
341
|
+
if not isinstance(source_sc, dict):
|
|
342
|
+
source_sc = {}
|
|
343
|
+
target_sc = cloned_config.setdefault("SourceControlDetails", {})
|
|
344
|
+
if not isinstance(target_sc, dict):
|
|
345
|
+
raise typer.BadParameter("SourceControlDetails must be an object.")
|
|
346
|
+
|
|
347
|
+
source_command = source_config.get("Command", {}) or {}
|
|
348
|
+
if not isinstance(source_command, dict):
|
|
349
|
+
source_command = {}
|
|
350
|
+
target_command = cloned_config.setdefault("Command", {})
|
|
351
|
+
if not isinstance(target_command, dict):
|
|
352
|
+
raise typer.BadParameter("Command must be an object.")
|
|
353
|
+
|
|
354
|
+
source_local_script_path = Path(
|
|
355
|
+
source_sc.get("ScriptLocation")
|
|
356
|
+
or source_sc.get("LocalPath")
|
|
357
|
+
or f"glue/scripts/{slugify(source_job)}.py"
|
|
358
|
+
)
|
|
359
|
+
target_local_script_value = (
|
|
360
|
+
target_sc.get("ScriptLocation")
|
|
361
|
+
or target_sc.get("LocalPath")
|
|
362
|
+
or f"glue/scripts/{slugify(target_job)}.py"
|
|
363
|
+
)
|
|
364
|
+
target_local_script_path = Path(target_local_script_value)
|
|
365
|
+
|
|
366
|
+
source_notebook_path = Path(
|
|
367
|
+
source_sc.get("NotebookLocation")
|
|
368
|
+
or source_sc.get("NotebookPath")
|
|
369
|
+
or _resolve_notebook_path(source_local_script_path)
|
|
370
|
+
)
|
|
371
|
+
target_notebook_path = Path(
|
|
372
|
+
target_sc.get("NotebookLocation")
|
|
373
|
+
or target_sc.get("NotebookPath")
|
|
374
|
+
or _rewrite_clone_path_value(
|
|
375
|
+
_resolve_notebook_path(source_local_script_path),
|
|
376
|
+
source_job=source_job,
|
|
377
|
+
target_job=target_job,
|
|
378
|
+
fallback_suffixes=(".ipynb",),
|
|
379
|
+
)
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
artifact_copies: list[tuple[Path, Path, str]] = []
|
|
383
|
+
warnings: list[str] = []
|
|
384
|
+
if source_local_script_path.exists():
|
|
385
|
+
artifact_copies.append(
|
|
386
|
+
(source_local_script_path, target_local_script_path, "script")
|
|
387
|
+
)
|
|
388
|
+
else:
|
|
389
|
+
warnings.append(
|
|
390
|
+
f"Source script not found, skipping copy: {source_local_script_path}"
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
if source_notebook_path.exists():
|
|
394
|
+
artifact_copies.append((source_notebook_path, target_notebook_path, "notebook"))
|
|
395
|
+
elif source_sc.get("NotebookLocation") or source_sc.get("NotebookPath"):
|
|
396
|
+
warnings.append(
|
|
397
|
+
f"Source notebook not found, skipping copy: {source_notebook_path}"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
for (
|
|
401
|
+
source_local_path,
|
|
402
|
+
target_local_path,
|
|
403
|
+
) in _collect_source_control_file_local_renames(
|
|
404
|
+
source_config,
|
|
405
|
+
cloned_config,
|
|
406
|
+
):
|
|
407
|
+
if source_local_path.exists():
|
|
408
|
+
artifact_copies.append((source_local_path, target_local_path, "dependency"))
|
|
409
|
+
else:
|
|
410
|
+
warnings.append(
|
|
411
|
+
f"Dependency source not found, keeping cloned config reference only: {source_local_path}"
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
return cloned_config, artifact_copies, warnings
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
@app.command(
|
|
418
|
+
"clone",
|
|
419
|
+
no_args_is_help=True,
|
|
420
|
+
help="Clone local Glue job configurations and artifacts.",
|
|
421
|
+
epilog=_examples_epilog(
|
|
422
|
+
"gluekit clone job source-job target-job",
|
|
423
|
+
"gluekit clone source-job target-job",
|
|
424
|
+
"gluekit clone source-job target-job --dry-run",
|
|
425
|
+
"gluekit clone source-job target-job --force",
|
|
426
|
+
),
|
|
427
|
+
)
|
|
428
|
+
def clone_job(
|
|
429
|
+
args: list[str] = typer.Argument(
|
|
430
|
+
...,
|
|
431
|
+
help="Use either <source-job> <target-job> or job <source-job> <target-job>.",
|
|
432
|
+
),
|
|
433
|
+
config_dir: Path = typer.Option(
|
|
434
|
+
Path("glue/configs"),
|
|
435
|
+
"--config-dir",
|
|
436
|
+
help="Directory containing Glue job config files.",
|
|
437
|
+
),
|
|
438
|
+
dry_run: bool = typer.Option(
|
|
439
|
+
False,
|
|
440
|
+
"--dry-run",
|
|
441
|
+
help="Show what would be cloned without writing files.",
|
|
442
|
+
),
|
|
443
|
+
force: bool = typer.Option(
|
|
444
|
+
False,
|
|
445
|
+
"--force",
|
|
446
|
+
help="Overwrite existing target config and cloned local artifacts.",
|
|
447
|
+
),
|
|
448
|
+
) -> None:
|
|
449
|
+
"""Clone one local Glue job config and its local artifacts."""
|
|
450
|
+
if len(args) == 3:
|
|
451
|
+
clone_kind, source_job, target_job = args
|
|
452
|
+
if clone_kind != "job":
|
|
453
|
+
raise typer.BadParameter(
|
|
454
|
+
"Use 'gluekit clone <source-job> <target-job>' or 'gluekit clone job <source-job> <target-job>'."
|
|
455
|
+
)
|
|
456
|
+
elif len(args) == 2:
|
|
457
|
+
source_job, target_job = args
|
|
458
|
+
else:
|
|
459
|
+
raise typer.BadParameter(
|
|
460
|
+
"Use 'gluekit clone <source-job> <target-job>' or 'gluekit clone job <source-job> <target-job>'."
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
normalized_source = source_job.strip()
|
|
464
|
+
normalized_target = target_job.strip()
|
|
465
|
+
if not normalized_source or not normalized_target:
|
|
466
|
+
raise typer.BadParameter(
|
|
467
|
+
"Source and target job names must be non-empty strings."
|
|
468
|
+
)
|
|
469
|
+
if normalized_source == normalized_target:
|
|
470
|
+
raise typer.BadParameter("Source and target job names must differ.")
|
|
471
|
+
|
|
472
|
+
config_index = _load_config_index(config_dir)
|
|
473
|
+
source_entry = config_index.get(normalized_source)
|
|
474
|
+
if not source_entry:
|
|
475
|
+
raise typer.BadParameter(f'No config files matched "{normalized_source}".')
|
|
476
|
+
if normalized_target in config_index and not force:
|
|
477
|
+
raise typer.BadParameter(
|
|
478
|
+
f'Glue job "{normalized_target}" already exists. Use --force to overwrite it.'
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
target_config_path = config_dir / f"{slugify(normalized_target)}.json"
|
|
482
|
+
source_config_path: Path = source_entry["config_path"]
|
|
483
|
+
if target_config_path == source_config_path:
|
|
484
|
+
raise typer.BadParameter(
|
|
485
|
+
"Target job resolves to the same config path as the source job. Use a distinct target name."
|
|
486
|
+
)
|
|
487
|
+
if target_config_path.exists() and not force:
|
|
488
|
+
raise typer.BadParameter(
|
|
489
|
+
f"Refusing to overwrite existing config without --force: {target_config_path}"
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
cloned_config, artifact_copies, warnings = _build_glue_clone_plan(
|
|
493
|
+
source_entry["config"],
|
|
494
|
+
source_job=normalized_source,
|
|
495
|
+
target_job=normalized_target,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
if dry_run:
|
|
499
|
+
typer.echo(f"Would clone config: {source_config_path} -> {target_config_path}")
|
|
500
|
+
for source_path, target_path, kind in artifact_copies:
|
|
501
|
+
typer.echo(f"Would copy {kind}: {source_path} -> {target_path}")
|
|
502
|
+
for message in warnings:
|
|
503
|
+
typer.echo(message)
|
|
504
|
+
return
|
|
505
|
+
|
|
506
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
507
|
+
for source_path, target_path, _kind in artifact_copies:
|
|
508
|
+
_copy_cloned_artifact(source_path, target_path, force=force)
|
|
509
|
+
target_config_path.write_text(json.dumps(cloned_config, indent=4))
|
|
510
|
+
|
|
511
|
+
typer.echo(f"Cloned Glue job '{normalized_source}' to '{normalized_target}'.")
|
|
512
|
+
typer.echo(f"Wrote config: {target_config_path}")
|
|
513
|
+
for source_path, target_path, kind in artifact_copies:
|
|
514
|
+
typer.echo(f"Copied {kind}: {source_path} -> {target_path}")
|
|
515
|
+
for message in warnings:
|
|
516
|
+
typer.echo(message)
|