gluekit 1.0.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gluekit/__init__.py +7 -0
- gluekit/app.py +0 -0
- gluekit/cli.py +64 -0
- gluekit/commands/__init__.py +1 -0
- gluekit/commands/add.py +455 -0
- gluekit/commands/build.py +816 -0
- gluekit/commands/checkout.py +114 -0
- gluekit/commands/clone.py +516 -0
- gluekit/commands/config_commands.py +180 -0
- gluekit/commands/constants.py +47 -0
- gluekit/commands/convert.py +336 -0
- gluekit/commands/edit.py +1104 -0
- gluekit/commands/helpers.py +1068 -0
- gluekit/commands/init.py +798 -0
- gluekit/commands/list.py +16 -0
- gluekit/commands/local_commands.py +680 -0
- gluekit/commands/pull.py +374 -0
- gluekit/commands/push.py +251 -0
- gluekit/commands/remove.py +161 -0
- gluekit/commands/run.py +126 -0
- gluekit/commands/status.py +97 -0
- gluekit/commands/sync.py +97 -0
- gluekit/commands/update.py +104 -0
- gluekit/job_mgmt/__init__.py +0 -0
- gluekit/job_mgmt/glue_jobs.py +1323 -0
- gluekit/job_mgmt/magics.py +122 -0
- gluekit/job_mgmt/resources/__init__.py +0 -0
- gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
- gluekit/job_mgmt/resources/magic_map.json +83 -0
- gluekit/job_mgmt/schema.py +165 -0
- gluekit/local/__init__.py +6 -0
- gluekit/local/awsglue/__init__.py +1 -0
- gluekit/local/awsglue/context.py +30 -0
- gluekit/local/awsglue/job.py +9 -0
- gluekit/local/awsglue/utils.py +17 -0
- gluekit/local/local.py +434 -0
- gluekit/local/local_fixtures.py +337 -0
- gluekit/local/pyspark/__init__.py +7 -0
- gluekit/local/pyspark/context.py +31 -0
- gluekit/local/pyspark/sql/__init__.py +6 -0
- gluekit/local/pyspark/sql/session.py +29 -0
- gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
- gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
- gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
- gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
- gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1068 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
import subprocess
|
|
7
|
+
from collections.abc import Mapping
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
|
|
12
|
+
import typer
|
|
13
|
+
from slugify import slugify
|
|
14
|
+
|
|
15
|
+
from ..job_mgmt.glue_jobs import (
|
|
16
|
+
normalize_glue_config_data,
|
|
17
|
+
_resolve_notebook_path,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from .constants import GLUE_SET_FILE
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _examples_epilog(*lines: str) -> str:
|
|
24
|
+
examples = "\n\n".join(f"- `{line.strip()}`" for line in lines)
|
|
25
|
+
return f"\n\n**Examples**\n\n{examples}"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _find_workspace_root(start: Optional[Path] = None) -> Path:
|
|
29
|
+
current = (start or Path.cwd()).resolve()
|
|
30
|
+
for candidate in [current, *current.parents]:
|
|
31
|
+
if (candidate / "pyproject.toml").exists():
|
|
32
|
+
return candidate
|
|
33
|
+
return current
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def run_command(
|
|
37
|
+
command: list[str],
|
|
38
|
+
cwd: Optional[Path] = None,
|
|
39
|
+
dry_run: bool = False,
|
|
40
|
+
verbose: bool = False,
|
|
41
|
+
) -> None:
|
|
42
|
+
display = " ".join(command)
|
|
43
|
+
if dry_run or verbose:
|
|
44
|
+
typer.echo(f"{'Would run' if dry_run else 'Running'}: {display}")
|
|
45
|
+
if dry_run:
|
|
46
|
+
return
|
|
47
|
+
try:
|
|
48
|
+
subprocess.run(
|
|
49
|
+
command,
|
|
50
|
+
cwd=str(cwd) if cwd else None,
|
|
51
|
+
check=True,
|
|
52
|
+
text=True,
|
|
53
|
+
)
|
|
54
|
+
except FileNotFoundError as exc:
|
|
55
|
+
raise typer.BadParameter(f"Command not found: {command[0]}") from exc
|
|
56
|
+
except subprocess.CalledProcessError as exc:
|
|
57
|
+
raise typer.Exit(code=exc.returncode) from exc
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _emit_compatibility_notice(command_name: str, replacement_hint: str) -> None:
|
|
61
|
+
typer.echo(
|
|
62
|
+
f"Note: 'gluekit {command_name}' is kept for compatibility. Prefer 'gluekit edit' {replacement_hint}."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _raise_missing_local_config(
|
|
67
|
+
job_name: str, config_dir: Path, command_label: str
|
|
68
|
+
) -> None:
|
|
69
|
+
checked_out_jobs = _get_checked_out_jobs()
|
|
70
|
+
if job_name in checked_out_jobs:
|
|
71
|
+
raise typer.BadParameter(
|
|
72
|
+
f'No local config files matched "{job_name}" in {config_dir}. '
|
|
73
|
+
f"This checked-out job is selected for local work, but it does not have a local config yet. "
|
|
74
|
+
f"Create or clone a config before running {command_label}."
|
|
75
|
+
)
|
|
76
|
+
raise typer.BadParameter(f'No config files matched "{job_name}" in {config_dir}.')
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _looks_like_remote_module_spec(item: str) -> bool:
|
|
80
|
+
normalized = item.strip()
|
|
81
|
+
if not normalized:
|
|
82
|
+
return False
|
|
83
|
+
return bool(re.match(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", normalized))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _routes_to_additional_python_modules(path: Path) -> bool:
|
|
87
|
+
return path.suffix.lower() == ".whl"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _collect_config_local_artifact_paths(
|
|
91
|
+
config_data: dict[str, Any],
|
|
92
|
+
*,
|
|
93
|
+
job_name: str,
|
|
94
|
+
include_additional_python_files: bool,
|
|
95
|
+
include_extra_files: bool,
|
|
96
|
+
) -> list[tuple[str, Path]]:
|
|
97
|
+
sc = config_data.get("SourceControlDetails", {}) or {}
|
|
98
|
+
if not isinstance(sc, dict):
|
|
99
|
+
sc = {}
|
|
100
|
+
|
|
101
|
+
script_path = Path(
|
|
102
|
+
sc.get("ScriptLocation")
|
|
103
|
+
or sc.get("LocalPath")
|
|
104
|
+
or f"glue/scripts/{slugify(job_name)}.py"
|
|
105
|
+
)
|
|
106
|
+
notebook_value = sc.get("NotebookLocation") or sc.get("NotebookPath")
|
|
107
|
+
notebook_path = (
|
|
108
|
+
Path(notebook_value)
|
|
109
|
+
if isinstance(notebook_value, str)
|
|
110
|
+
else Path(_resolve_notebook_path(script_path))
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
artifacts: list[tuple[str, Path]] = [
|
|
114
|
+
("script", script_path),
|
|
115
|
+
("notebook", notebook_path),
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
def append_entries(label: str, value: Any, kind: str) -> None:
|
|
119
|
+
entries = value
|
|
120
|
+
if entries is None:
|
|
121
|
+
return
|
|
122
|
+
if isinstance(entries, dict):
|
|
123
|
+
entries = [entries]
|
|
124
|
+
if not isinstance(entries, list):
|
|
125
|
+
return
|
|
126
|
+
for entry in entries:
|
|
127
|
+
if not isinstance(entry, dict):
|
|
128
|
+
continue
|
|
129
|
+
local_path = (
|
|
130
|
+
entry.get("LocalPath")
|
|
131
|
+
or entry.get("local_path")
|
|
132
|
+
or entry.get("localPath")
|
|
133
|
+
)
|
|
134
|
+
if isinstance(local_path, str) and local_path.strip():
|
|
135
|
+
artifacts.append((kind, Path(local_path)))
|
|
136
|
+
|
|
137
|
+
if include_additional_python_files:
|
|
138
|
+
append_entries(
|
|
139
|
+
"AdditionalPythonFiles",
|
|
140
|
+
sc.get("AdditionalPythonFiles"),
|
|
141
|
+
"additional-python-file",
|
|
142
|
+
)
|
|
143
|
+
append_entries("ExtraPyFiles", sc.get("ExtraPyFiles"), "extra-py-file")
|
|
144
|
+
if include_extra_files:
|
|
145
|
+
append_entries("AdditionalFiles", sc.get("AdditionalFiles"), "extra-file")
|
|
146
|
+
append_entries("ExtraFiles", sc.get("ExtraFiles"), "extra-file")
|
|
147
|
+
|
|
148
|
+
return artifacts
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _set_saved_scope(
|
|
152
|
+
parsed: dict[str, Any],
|
|
153
|
+
job_name: Optional[str],
|
|
154
|
+
global_scope: bool,
|
|
155
|
+
profile: Optional[str] = None,
|
|
156
|
+
) -> str:
|
|
157
|
+
if global_scope and job_name:
|
|
158
|
+
raise typer.BadParameter("Use either <job-name> or --global, not both.")
|
|
159
|
+
store = _load_glue_set_store()
|
|
160
|
+
|
|
161
|
+
profile_name = profile.strip() if isinstance(profile, str) else None
|
|
162
|
+
if profile_name:
|
|
163
|
+
profiles = store.setdefault("profiles", {})
|
|
164
|
+
profile_store = profiles.setdefault(profile_name, {})
|
|
165
|
+
if not isinstance(profile_store, dict):
|
|
166
|
+
profile_store = {}
|
|
167
|
+
profiles[profile_name] = profile_store
|
|
168
|
+
if not isinstance(profile_store.get("global"), dict):
|
|
169
|
+
profile_store["global"] = {}
|
|
170
|
+
if not isinstance(profile_store.get("jobs"), dict):
|
|
171
|
+
profile_store["jobs"] = {}
|
|
172
|
+
base = profile_store
|
|
173
|
+
else:
|
|
174
|
+
base = store
|
|
175
|
+
|
|
176
|
+
if global_scope:
|
|
177
|
+
target = f"profile:{profile_name}:global" if profile_name else "global"
|
|
178
|
+
scope = base.setdefault("global", {})
|
|
179
|
+
else:
|
|
180
|
+
if not job_name:
|
|
181
|
+
raise typer.BadParameter("Provide <job-name> or use --global.")
|
|
182
|
+
target = f"profile:{profile_name}:{job_name}" if profile_name else job_name
|
|
183
|
+
jobs = base.setdefault("jobs", {})
|
|
184
|
+
scope = jobs.setdefault(job_name, {})
|
|
185
|
+
|
|
186
|
+
if not isinstance(scope, dict):
|
|
187
|
+
scope = {}
|
|
188
|
+
if global_scope:
|
|
189
|
+
base["global"] = scope
|
|
190
|
+
else:
|
|
191
|
+
base.setdefault("jobs", {})[job_name] = scope
|
|
192
|
+
|
|
193
|
+
scope.update(parsed)
|
|
194
|
+
_save_glue_set_store(store)
|
|
195
|
+
return target
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _load_config_index(config_dir: Path) -> dict[str, dict[str, Any]]:
|
|
199
|
+
config_index: dict[str, dict[str, Any]] = {}
|
|
200
|
+
for config_path in config_dir.glob("*.json"):
|
|
201
|
+
try:
|
|
202
|
+
config_data = normalize_glue_config_data(
|
|
203
|
+
json.loads(config_path.read_text())
|
|
204
|
+
)
|
|
205
|
+
except json.JSONDecodeError:
|
|
206
|
+
continue
|
|
207
|
+
job_name = config_data.get("Name")
|
|
208
|
+
if job_name:
|
|
209
|
+
config_index[job_name] = {
|
|
210
|
+
"config_path": config_path,
|
|
211
|
+
"config": config_data,
|
|
212
|
+
}
|
|
213
|
+
return config_index
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _parse_s3_url(s3_url: str) -> tuple[str, str]:
|
|
217
|
+
if not s3_url.startswith("s3://"):
|
|
218
|
+
raise typer.BadParameter(f"Invalid S3 URL: {s3_url}")
|
|
219
|
+
bucket_key = s3_url[5:]
|
|
220
|
+
if "/" not in bucket_key:
|
|
221
|
+
raise typer.BadParameter(f"Invalid S3 URL (missing key): {s3_url}")
|
|
222
|
+
bucket, key = bucket_key.split("/", 1)
|
|
223
|
+
return bucket, key
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _find_sequence(parts: tuple[str, ...], sequence: tuple[str, ...]) -> Optional[int]:
|
|
227
|
+
if not sequence:
|
|
228
|
+
return None
|
|
229
|
+
for idx in range(len(parts) - len(sequence) + 1):
|
|
230
|
+
if parts[idx : idx + len(sequence)] == sequence:
|
|
231
|
+
return idx
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _derive_s3_target(
|
|
236
|
+
local_path: Path,
|
|
237
|
+
job_name: str,
|
|
238
|
+
script_location: str,
|
|
239
|
+
local_script_path: Path,
|
|
240
|
+
) -> str:
|
|
241
|
+
bucket, script_key = _parse_s3_url(script_location)
|
|
242
|
+
script_dir_parts = Path(script_key).parent.parts
|
|
243
|
+
local_script_dir_parts = local_script_path.parent.parts
|
|
244
|
+
local_root = None
|
|
245
|
+
|
|
246
|
+
if script_dir_parts:
|
|
247
|
+
idx = _find_sequence(local_script_dir_parts, script_dir_parts)
|
|
248
|
+
if idx is not None:
|
|
249
|
+
local_root = Path(*local_script_dir_parts[: idx + len(script_dir_parts)])
|
|
250
|
+
|
|
251
|
+
if local_root and local_path.is_relative_to(local_root):
|
|
252
|
+
rel = local_path.relative_to(local_root)
|
|
253
|
+
s3_key = Path(*script_dir_parts) / rel
|
|
254
|
+
return f"s3://{bucket}/{s3_key.as_posix()}"
|
|
255
|
+
|
|
256
|
+
if local_path.parts and local_path.parts[0] == "glue":
|
|
257
|
+
rel = (
|
|
258
|
+
Path(*local_path.parts[1:])
|
|
259
|
+
if len(local_path.parts) > 1
|
|
260
|
+
else Path(local_path.name)
|
|
261
|
+
)
|
|
262
|
+
return f"s3://{bucket}/{rel.as_posix()}"
|
|
263
|
+
|
|
264
|
+
job_candidates = {job_name, slugify(job_name)}
|
|
265
|
+
for candidate in job_candidates:
|
|
266
|
+
if candidate in local_path.parts:
|
|
267
|
+
idx = local_path.parts.index(candidate)
|
|
268
|
+
s3_key = Path(*local_path.parts[idx:])
|
|
269
|
+
return f"s3://{bucket}/{s3_key.as_posix()}"
|
|
270
|
+
|
|
271
|
+
if not local_path.is_absolute() and len(local_path.parts) > 1:
|
|
272
|
+
return f"s3://{bucket}/{local_path.as_posix()}"
|
|
273
|
+
|
|
274
|
+
return f"s3://{bucket}/{local_path.name}"
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _load_glue_set_store() -> dict[str, Any]:
|
|
278
|
+
if not GLUE_SET_FILE.exists():
|
|
279
|
+
return {
|
|
280
|
+
"global": {},
|
|
281
|
+
"jobs": {},
|
|
282
|
+
"profiles": {},
|
|
283
|
+
"checkout": {},
|
|
284
|
+
}
|
|
285
|
+
try:
|
|
286
|
+
data = json.loads(GLUE_SET_FILE.read_text())
|
|
287
|
+
except json.JSONDecodeError:
|
|
288
|
+
return {
|
|
289
|
+
"global": {},
|
|
290
|
+
"jobs": {},
|
|
291
|
+
"profiles": {},
|
|
292
|
+
"checkout": {},
|
|
293
|
+
}
|
|
294
|
+
if not isinstance(data, dict):
|
|
295
|
+
return {
|
|
296
|
+
"global": {},
|
|
297
|
+
"jobs": {},
|
|
298
|
+
"profiles": {},
|
|
299
|
+
"checkout": {},
|
|
300
|
+
}
|
|
301
|
+
if not isinstance(data.get("global"), dict):
|
|
302
|
+
data["global"] = {}
|
|
303
|
+
if not isinstance(data.get("jobs"), dict):
|
|
304
|
+
data["jobs"] = {}
|
|
305
|
+
if not isinstance(data.get("profiles"), dict):
|
|
306
|
+
data["profiles"] = {}
|
|
307
|
+
for profile_name, profile_data in list(data["profiles"].items()):
|
|
308
|
+
if not isinstance(profile_name, str) or not isinstance(profile_data, dict):
|
|
309
|
+
data["profiles"].pop(profile_name, None)
|
|
310
|
+
continue
|
|
311
|
+
if not isinstance(profile_data.get("global"), dict):
|
|
312
|
+
profile_data["global"] = {}
|
|
313
|
+
if not isinstance(profile_data.get("jobs"), dict):
|
|
314
|
+
profile_data["jobs"] = {}
|
|
315
|
+
if not isinstance(data.get("checkout"), dict):
|
|
316
|
+
data["checkout"] = {}
|
|
317
|
+
data.pop("local_checkouts", None)
|
|
318
|
+
_normalize_checkout_local(data["checkout"])
|
|
319
|
+
return data
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _normalize_checkout_local(checkout: dict[str, Any]) -> None:
|
|
323
|
+
local_value = checkout.get("local")
|
|
324
|
+
if isinstance(local_value, dict):
|
|
325
|
+
local_data = {
|
|
326
|
+
key: value.strip()
|
|
327
|
+
for key, value in local_value.items()
|
|
328
|
+
if isinstance(key, str) and isinstance(value, str) and value.strip()
|
|
329
|
+
}
|
|
330
|
+
if isinstance(local_data.get("name"), str):
|
|
331
|
+
checkout["local"] = local_data
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
checkout.pop("local", None)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _save_glue_set_store(data: dict[str, Any]) -> None:
|
|
338
|
+
GLUE_SET_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
339
|
+
GLUE_SET_FILE.write_text(json.dumps(data, indent=4))
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _resolve_single_job_name(job_name: Optional[str], context_label: str) -> str:
|
|
343
|
+
if job_name:
|
|
344
|
+
return job_name
|
|
345
|
+
|
|
346
|
+
checked_out_jobs = _get_checked_out_jobs()
|
|
347
|
+
if not checked_out_jobs:
|
|
348
|
+
raise typer.BadParameter(
|
|
349
|
+
f"Provide <job-name> or run 'gluekit checkout <job-name>' before {context_label}."
|
|
350
|
+
)
|
|
351
|
+
if len(checked_out_jobs) != 1:
|
|
352
|
+
raise typer.BadParameter(
|
|
353
|
+
f"{context_label} requires a single active checkout selection; found {len(checked_out_jobs)} jobs in {GLUE_SET_FILE}."
|
|
354
|
+
)
|
|
355
|
+
return checked_out_jobs[0]
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _get_saved_params_for_job(
|
|
359
|
+
job_name: str, profile: Optional[str] = None
|
|
360
|
+
) -> dict[str, Any]:
|
|
361
|
+
store = _load_glue_set_store()
|
|
362
|
+
global_params = store.get("global", {})
|
|
363
|
+
job_params = store.get("jobs", {}).get(job_name, {})
|
|
364
|
+
profile_name = profile.strip() if isinstance(profile, str) else None
|
|
365
|
+
profile_store = store.get("profiles", {}).get(profile_name, {})
|
|
366
|
+
profile_global_params = {}
|
|
367
|
+
profile_job_params = {}
|
|
368
|
+
if isinstance(profile_store, dict):
|
|
369
|
+
profile_global_params = profile_store.get("global", {})
|
|
370
|
+
profile_job_params = profile_store.get("jobs", {}).get(job_name, {})
|
|
371
|
+
|
|
372
|
+
merged: dict[str, Any] = {}
|
|
373
|
+
if isinstance(global_params, dict):
|
|
374
|
+
merged.update(global_params)
|
|
375
|
+
if isinstance(job_params, dict):
|
|
376
|
+
merged.update(job_params)
|
|
377
|
+
if isinstance(profile_global_params, dict):
|
|
378
|
+
merged.update(profile_global_params)
|
|
379
|
+
if isinstance(profile_job_params, dict):
|
|
380
|
+
merged.update(profile_job_params)
|
|
381
|
+
return merged
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _get_checked_out_profile() -> Optional[str]:
|
|
385
|
+
store = _load_glue_set_store()
|
|
386
|
+
checkout = store.get("checkout", {})
|
|
387
|
+
if not isinstance(checkout, dict):
|
|
388
|
+
return None
|
|
389
|
+
profile = checkout.get("profile")
|
|
390
|
+
if isinstance(profile, str) and profile.strip():
|
|
391
|
+
return profile.strip()
|
|
392
|
+
return None
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _get_local_checkouts() -> dict[str, dict[str, Any]]:
|
|
396
|
+
local_setup = _get_checked_out_local_setup()
|
|
397
|
+
if not local_setup:
|
|
398
|
+
return {}
|
|
399
|
+
name = local_setup.get("name")
|
|
400
|
+
if not isinstance(name, str) or not name.strip():
|
|
401
|
+
return {}
|
|
402
|
+
return {name: local_setup}
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _save_local_checkout(name: str, data: dict[str, Any]) -> None:
|
|
406
|
+
normalized_name = name.strip()
|
|
407
|
+
if not normalized_name:
|
|
408
|
+
raise typer.BadParameter("Local setup name must be a non-empty string.")
|
|
409
|
+
store = _load_glue_set_store()
|
|
410
|
+
checkout = store.setdefault("checkout", {})
|
|
411
|
+
if not isinstance(checkout, dict):
|
|
412
|
+
checkout = {}
|
|
413
|
+
store["checkout"] = checkout
|
|
414
|
+
checkout["local"] = {
|
|
415
|
+
"name": normalized_name,
|
|
416
|
+
**{
|
|
417
|
+
key: value.strip()
|
|
418
|
+
for key, value in data.items()
|
|
419
|
+
if isinstance(key, str) and isinstance(value, str) and value.strip()
|
|
420
|
+
},
|
|
421
|
+
}
|
|
422
|
+
_save_glue_set_store(store)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def _get_checked_out_local_setup_name() -> Optional[str]:
|
|
426
|
+
store = _load_glue_set_store()
|
|
427
|
+
checkout = store.get("checkout", {})
|
|
428
|
+
if not isinstance(checkout, dict):
|
|
429
|
+
return None
|
|
430
|
+
local_name = checkout.get("local")
|
|
431
|
+
if isinstance(local_name, dict):
|
|
432
|
+
name = local_name.get("name")
|
|
433
|
+
if isinstance(name, str) and name.strip():
|
|
434
|
+
return name.strip()
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _get_checked_out_local_setup() -> Optional[dict[str, Any]]:
|
|
439
|
+
store = _load_glue_set_store()
|
|
440
|
+
checkout = store.get("checkout", {})
|
|
441
|
+
if not isinstance(checkout, dict):
|
|
442
|
+
return None
|
|
443
|
+
local_setup = checkout.get("local")
|
|
444
|
+
if not isinstance(local_setup, dict):
|
|
445
|
+
return None
|
|
446
|
+
name = local_setup.get("name")
|
|
447
|
+
if not isinstance(name, str) or not name.strip():
|
|
448
|
+
return None
|
|
449
|
+
return dict(local_setup)
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _set_checked_out_local_setup(name: str) -> None:
|
|
453
|
+
normalized_name = name.strip()
|
|
454
|
+
if not normalized_name:
|
|
455
|
+
raise typer.BadParameter("Local setup name must be a non-empty string.")
|
|
456
|
+
store = _load_glue_set_store()
|
|
457
|
+
checkout = store.setdefault("checkout", {})
|
|
458
|
+
if not isinstance(checkout, dict):
|
|
459
|
+
checkout = {}
|
|
460
|
+
store["checkout"] = checkout
|
|
461
|
+
existing_local = checkout.get("local")
|
|
462
|
+
next_local: dict[str, str] = {"name": normalized_name}
|
|
463
|
+
if isinstance(existing_local, dict):
|
|
464
|
+
for key in ("profile", "job", "mode"):
|
|
465
|
+
value = existing_local.get(key)
|
|
466
|
+
if isinstance(value, str) and value.strip():
|
|
467
|
+
next_local[key] = value.strip()
|
|
468
|
+
checkout["local"] = next_local
|
|
469
|
+
_save_glue_set_store(store)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def _parse_config_field_path(field_path: str) -> list[str | int]:
|
|
473
|
+
parts: list[str | int] = []
|
|
474
|
+
for raw_part in field_path.split("."):
|
|
475
|
+
if not raw_part:
|
|
476
|
+
return []
|
|
477
|
+
name, bracket, remainder = raw_part.partition("[")
|
|
478
|
+
if name:
|
|
479
|
+
parts.append(name)
|
|
480
|
+
while bracket:
|
|
481
|
+
index_text, closing, remainder = remainder.partition("]")
|
|
482
|
+
if closing != "]" or not index_text.isdigit():
|
|
483
|
+
return []
|
|
484
|
+
parts.append(int(index_text))
|
|
485
|
+
bracket = ""
|
|
486
|
+
if remainder:
|
|
487
|
+
if not remainder.startswith("["):
|
|
488
|
+
return []
|
|
489
|
+
bracket = "["
|
|
490
|
+
remainder = remainder[1:]
|
|
491
|
+
return parts
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def _set_config_field_path(
|
|
495
|
+
config_data: dict[str, Any],
|
|
496
|
+
field_path: str,
|
|
497
|
+
value: Any,
|
|
498
|
+
changes: list[str],
|
|
499
|
+
) -> bool:
|
|
500
|
+
parts = _parse_config_field_path(field_path)
|
|
501
|
+
if not parts:
|
|
502
|
+
return False
|
|
503
|
+
|
|
504
|
+
current: Any = config_data
|
|
505
|
+
for index, part in enumerate(parts[:-1]):
|
|
506
|
+
next_part = parts[index + 1]
|
|
507
|
+
if isinstance(part, int):
|
|
508
|
+
if not isinstance(current, list) or part >= len(current):
|
|
509
|
+
return False
|
|
510
|
+
current = current[part]
|
|
511
|
+
continue
|
|
512
|
+
|
|
513
|
+
if not isinstance(current, dict):
|
|
514
|
+
return False
|
|
515
|
+
if part not in current:
|
|
516
|
+
if isinstance(next_part, int):
|
|
517
|
+
return False
|
|
518
|
+
current[part] = {}
|
|
519
|
+
current = current[part]
|
|
520
|
+
|
|
521
|
+
final_part = parts[-1]
|
|
522
|
+
if isinstance(final_part, int):
|
|
523
|
+
if not isinstance(current, list) or final_part >= len(current):
|
|
524
|
+
return False
|
|
525
|
+
if current[final_part] != value:
|
|
526
|
+
current[final_part] = value
|
|
527
|
+
changes.append(field_path)
|
|
528
|
+
return True
|
|
529
|
+
|
|
530
|
+
if not isinstance(current, dict):
|
|
531
|
+
return False
|
|
532
|
+
_set_if_changed(current, final_part, value, changes)
|
|
533
|
+
return True
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _saved_param_config_value(value: Any) -> Any:
|
|
537
|
+
if (
|
|
538
|
+
isinstance(value, dict)
|
|
539
|
+
and isinstance(value.get("remote"), str)
|
|
540
|
+
and "local" in value
|
|
541
|
+
):
|
|
542
|
+
return value["remote"]
|
|
543
|
+
return _to_csv_if_list(value)
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _get_config_field_path(config_data: dict[str, Any], field_path: str) -> Any:
|
|
547
|
+
parts = _parse_config_field_path(field_path)
|
|
548
|
+
if not parts:
|
|
549
|
+
return None
|
|
550
|
+
|
|
551
|
+
current: Any = config_data
|
|
552
|
+
for part in parts:
|
|
553
|
+
if isinstance(part, int):
|
|
554
|
+
if not isinstance(current, list) or part >= len(current):
|
|
555
|
+
return None
|
|
556
|
+
current = current[part]
|
|
557
|
+
continue
|
|
558
|
+
if not isinstance(current, dict) or part not in current:
|
|
559
|
+
return None
|
|
560
|
+
current = current[part]
|
|
561
|
+
return current
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def _profile_param_expected_fields(key: str, value: Any) -> list[tuple[str, Any]]:
|
|
565
|
+
normalized_key = key.strip()
|
|
566
|
+
if normalized_key.startswith(
|
|
567
|
+
(
|
|
568
|
+
"Command.",
|
|
569
|
+
"DefaultArguments.",
|
|
570
|
+
"NonOverridableArguments.",
|
|
571
|
+
"SourceControlDetails.",
|
|
572
|
+
)
|
|
573
|
+
):
|
|
574
|
+
expected = (
|
|
575
|
+
_saved_param_config_value(value)
|
|
576
|
+
if normalized_key.startswith(
|
|
577
|
+
("DefaultArguments.", "NonOverridableArguments.")
|
|
578
|
+
)
|
|
579
|
+
else value
|
|
580
|
+
)
|
|
581
|
+
return [(normalized_key, expected)]
|
|
582
|
+
if normalized_key in {"script_location", "local_script_path", "local_path"}:
|
|
583
|
+
return [("SourceControlDetails.ScriptLocation", str(value))]
|
|
584
|
+
if normalized_key in {"notebook_location", "notebook_path"}:
|
|
585
|
+
expected = str(value)
|
|
586
|
+
return [
|
|
587
|
+
("SourceControlDetails.NotebookLocation", expected),
|
|
588
|
+
("SourceControlDetails.NotebookPath", expected),
|
|
589
|
+
]
|
|
590
|
+
if normalized_key in {"command_script_location", "remote_script_location"}:
|
|
591
|
+
return [("Command.ScriptLocation", str(value))]
|
|
592
|
+
if normalized_key == "extra_py_files":
|
|
593
|
+
return [("DefaultArguments.--extra-py-files", _to_csv_if_list(value))]
|
|
594
|
+
if normalized_key == "extra_files":
|
|
595
|
+
return [("DefaultArguments.--extra-files", _to_csv_if_list(value))]
|
|
596
|
+
if normalized_key == "additional_python_modules":
|
|
597
|
+
return [
|
|
598
|
+
(
|
|
599
|
+
"DefaultArguments.--additional-python-modules",
|
|
600
|
+
_saved_param_config_value(value),
|
|
601
|
+
)
|
|
602
|
+
]
|
|
603
|
+
if normalized_key == "default_arguments" and isinstance(value, dict):
|
|
604
|
+
fields: list[tuple[str, Any]] = []
|
|
605
|
+
for arg_key, arg_value in value.items():
|
|
606
|
+
final_key = arg_key if arg_key.startswith("--") else f"--{arg_key}"
|
|
607
|
+
fields.append(
|
|
608
|
+
(
|
|
609
|
+
f"DefaultArguments.{final_key}",
|
|
610
|
+
_saved_param_config_value(arg_value),
|
|
611
|
+
)
|
|
612
|
+
)
|
|
613
|
+
return fields
|
|
614
|
+
if normalized_key.startswith("default_arguments."):
|
|
615
|
+
arg_key = normalized_key.split(".", 1)[1]
|
|
616
|
+
final_key = arg_key if arg_key.startswith("--") else f"--{arg_key}"
|
|
617
|
+
return [
|
|
618
|
+
(
|
|
619
|
+
f"DefaultArguments.{final_key}",
|
|
620
|
+
_saved_param_config_value(value),
|
|
621
|
+
)
|
|
622
|
+
]
|
|
623
|
+
if normalized_key.startswith("source_control."):
|
|
624
|
+
sc_key = normalized_key.split(".", 1)[1]
|
|
625
|
+
return [(f"SourceControlDetails.{sc_key}", value)]
|
|
626
|
+
if normalized_key.startswith("command."):
|
|
627
|
+
cmd_key = normalized_key.split(".", 1)[1]
|
|
628
|
+
return [(f"Command.{cmd_key}", value)]
|
|
629
|
+
if normalized_key.startswith("--"):
|
|
630
|
+
return [(f"DefaultArguments.{normalized_key}", _to_csv_if_list(value))]
|
|
631
|
+
return [(normalized_key, value)]
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def _collect_profile_mapping_mismatches(
|
|
635
|
+
config_data: dict[str, Any],
|
|
636
|
+
params: dict[str, Any],
|
|
637
|
+
) -> list[str]:
|
|
638
|
+
mismatches: list[str] = []
|
|
639
|
+
for key, value in params.items():
|
|
640
|
+
for field_path, expected in _profile_param_expected_fields(key, value):
|
|
641
|
+
actual = _get_config_field_path(config_data, field_path)
|
|
642
|
+
if actual != expected:
|
|
643
|
+
mismatches.append(
|
|
644
|
+
f"{field_path}: expected {expected!r}, found {actual!r}"
|
|
645
|
+
)
|
|
646
|
+
return mismatches
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def _validate_profile_mappings_align_with_config(
|
|
650
|
+
*,
|
|
651
|
+
job_name: str,
|
|
652
|
+
config_data: dict[str, Any],
|
|
653
|
+
profile: Optional[str],
|
|
654
|
+
success_message: Optional[str] = None,
|
|
655
|
+
) -> None:
|
|
656
|
+
saved_params = _get_saved_params_for_job(job_name, profile=profile)
|
|
657
|
+
if not saved_params:
|
|
658
|
+
return
|
|
659
|
+
|
|
660
|
+
mismatches = _collect_profile_mapping_mismatches(config_data, saved_params)
|
|
661
|
+
profile_label = f" for profile {profile}" if profile else ""
|
|
662
|
+
if mismatches:
|
|
663
|
+
rendered = "\n".join(f"- {mismatch}" for mismatch in mismatches)
|
|
664
|
+
raise typer.BadParameter(
|
|
665
|
+
f"Profile mappings do not align with config{profile_label}: {job_name}\n"
|
|
666
|
+
f"{rendered}"
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
if success_message:
|
|
670
|
+
typer.echo(success_message)
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def _apply_saved_params_to_config(
|
|
674
|
+
config_data: dict[str, Any], params: dict[str, Any]
|
|
675
|
+
) -> list[str]:
|
|
676
|
+
changes: list[str] = []
|
|
677
|
+
default_args = config_data.setdefault("DefaultArguments", {})
|
|
678
|
+
source_control = config_data.setdefault("SourceControlDetails", {})
|
|
679
|
+
command = config_data.setdefault("Command", {})
|
|
680
|
+
|
|
681
|
+
for key, value in params.items():
|
|
682
|
+
normalized_key = key.strip()
|
|
683
|
+
if normalized_key.startswith(
|
|
684
|
+
(
|
|
685
|
+
"Command.",
|
|
686
|
+
"DefaultArguments.",
|
|
687
|
+
"NonOverridableArguments.",
|
|
688
|
+
"SourceControlDetails.",
|
|
689
|
+
)
|
|
690
|
+
):
|
|
691
|
+
config_value = (
|
|
692
|
+
_saved_param_config_value(value)
|
|
693
|
+
if normalized_key.startswith(
|
|
694
|
+
("DefaultArguments.", "NonOverridableArguments.")
|
|
695
|
+
)
|
|
696
|
+
else value
|
|
697
|
+
)
|
|
698
|
+
if not _set_config_field_path(
|
|
699
|
+
config_data, normalized_key, config_value, changes
|
|
700
|
+
):
|
|
701
|
+
changes.append(f"Skipped unresolved saved param path: {normalized_key}")
|
|
702
|
+
elif normalized_key in {"script_location", "local_script_path", "local_path"}:
|
|
703
|
+
_set_if_changed(source_control, "ScriptLocation", str(value), changes)
|
|
704
|
+
elif normalized_key in {"notebook_location", "notebook_path"}:
|
|
705
|
+
_set_if_changed(source_control, "NotebookLocation", str(value), changes)
|
|
706
|
+
_set_if_changed(source_control, "NotebookPath", str(value), changes)
|
|
707
|
+
elif normalized_key in {"command_script_location", "remote_script_location"}:
|
|
708
|
+
_set_if_changed(command, "ScriptLocation", str(value), changes)
|
|
709
|
+
elif normalized_key == "extra_py_files":
|
|
710
|
+
_set_if_changed(
|
|
711
|
+
default_args, "--extra-py-files", _to_csv_if_list(value), changes
|
|
712
|
+
)
|
|
713
|
+
elif normalized_key == "extra_files":
|
|
714
|
+
_set_if_changed(
|
|
715
|
+
default_args, "--extra-files", _to_csv_if_list(value), changes
|
|
716
|
+
)
|
|
717
|
+
elif normalized_key == "additional_python_modules":
|
|
718
|
+
_set_if_changed(
|
|
719
|
+
default_args,
|
|
720
|
+
"--additional-python-modules",
|
|
721
|
+
_saved_param_config_value(value),
|
|
722
|
+
changes,
|
|
723
|
+
)
|
|
724
|
+
elif normalized_key == "default_arguments" and isinstance(value, dict):
|
|
725
|
+
for arg_key, arg_value in value.items():
|
|
726
|
+
final_key = arg_key if arg_key.startswith("--") else f"--{arg_key}"
|
|
727
|
+
_set_if_changed(
|
|
728
|
+
default_args,
|
|
729
|
+
final_key,
|
|
730
|
+
_saved_param_config_value(arg_value),
|
|
731
|
+
changes,
|
|
732
|
+
)
|
|
733
|
+
elif normalized_key.startswith("default_arguments."):
|
|
734
|
+
arg_key = normalized_key.split(".", 1)[1]
|
|
735
|
+
final_key = arg_key if arg_key.startswith("--") else f"--{arg_key}"
|
|
736
|
+
_set_if_changed(
|
|
737
|
+
default_args, final_key, _saved_param_config_value(value), changes
|
|
738
|
+
)
|
|
739
|
+
elif normalized_key.startswith("source_control."):
|
|
740
|
+
sc_key = normalized_key.split(".", 1)[1]
|
|
741
|
+
_set_if_changed(source_control, sc_key, value, changes)
|
|
742
|
+
elif normalized_key.startswith("command."):
|
|
743
|
+
cmd_key = normalized_key.split(".", 1)[1]
|
|
744
|
+
_set_if_changed(command, cmd_key, value, changes)
|
|
745
|
+
elif normalized_key.startswith("--"):
|
|
746
|
+
_set_if_changed(
|
|
747
|
+
default_args, normalized_key, _to_csv_if_list(value), changes
|
|
748
|
+
)
|
|
749
|
+
else:
|
|
750
|
+
_set_if_changed(config_data, normalized_key, value, changes)
|
|
751
|
+
|
|
752
|
+
return changes
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def _get_checked_out_jobs() -> list[str]:
|
|
756
|
+
store = _load_glue_set_store()
|
|
757
|
+
checkout = store.get("checkout", {})
|
|
758
|
+
if not isinstance(checkout, dict):
|
|
759
|
+
return []
|
|
760
|
+
jobs = checkout.get("jobs")
|
|
761
|
+
if not isinstance(jobs, list):
|
|
762
|
+
return []
|
|
763
|
+
normalized: list[str] = []
|
|
764
|
+
for job_name in jobs:
|
|
765
|
+
if not isinstance(job_name, str):
|
|
766
|
+
continue
|
|
767
|
+
value = job_name.strip()
|
|
768
|
+
if value and value not in normalized:
|
|
769
|
+
normalized.append(value)
|
|
770
|
+
return normalized
|
|
771
|
+
|
|
772
|
+
|
|
773
|
+
def _save_checked_out_jobs(
|
|
774
|
+
job_names: list[str],
|
|
775
|
+
selector: str,
|
|
776
|
+
source: str = "local",
|
|
777
|
+
profile: Optional[str] = None,
|
|
778
|
+
) -> None:
|
|
779
|
+
store = _load_glue_set_store()
|
|
780
|
+
checkout = {
|
|
781
|
+
"mode": "job",
|
|
782
|
+
"selector": selector,
|
|
783
|
+
"source": source,
|
|
784
|
+
"jobs": list(job_names),
|
|
785
|
+
}
|
|
786
|
+
existing_checkout = store.get("checkout", {})
|
|
787
|
+
if isinstance(existing_checkout, dict) and isinstance(
|
|
788
|
+
existing_checkout.get("local"), dict
|
|
789
|
+
):
|
|
790
|
+
checkout["local"] = existing_checkout["local"]
|
|
791
|
+
profile_name = profile.strip() if isinstance(profile, str) else None
|
|
792
|
+
if profile_name:
|
|
793
|
+
checkout["profile"] = profile_name
|
|
794
|
+
store["checkout"] = checkout
|
|
795
|
+
_save_glue_set_store(store)
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def _clear_checked_out_jobs() -> None:
|
|
799
|
+
store = _load_glue_set_store()
|
|
800
|
+
store["checkout"] = {}
|
|
801
|
+
_save_glue_set_store(store)
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def _save_checkout_local_paths(
|
|
805
|
+
*,
|
|
806
|
+
job_name: str,
|
|
807
|
+
config_path: Path,
|
|
808
|
+
config_data: dict[str, Any],
|
|
809
|
+
dry_run: bool = False,
|
|
810
|
+
) -> dict[str, str]:
|
|
811
|
+
sc = config_data.get("SourceControlDetails", {})
|
|
812
|
+
if not isinstance(sc, dict):
|
|
813
|
+
sc = {}
|
|
814
|
+
script_path = Path(
|
|
815
|
+
sc.get("ScriptLocation")
|
|
816
|
+
or sc.get("LocalPath")
|
|
817
|
+
or f"glue/scripts/{slugify(job_name)}.py"
|
|
818
|
+
)
|
|
819
|
+
notebook_value = sc.get("NotebookLocation") or sc.get("NotebookPath")
|
|
820
|
+
notebook_path = (
|
|
821
|
+
Path(notebook_value)
|
|
822
|
+
if isinstance(notebook_value, str)
|
|
823
|
+
else Path(_resolve_notebook_path(script_path))
|
|
824
|
+
)
|
|
825
|
+
paths = {
|
|
826
|
+
"config": config_path.as_posix(),
|
|
827
|
+
"script": script_path.as_posix(),
|
|
828
|
+
"notebook": notebook_path.as_posix(),
|
|
829
|
+
}
|
|
830
|
+
if dry_run:
|
|
831
|
+
typer.echo(f"Would save checkout local paths for {job_name}: {paths}")
|
|
832
|
+
return paths
|
|
833
|
+
|
|
834
|
+
store = _load_glue_set_store()
|
|
835
|
+
checkout = store.setdefault("checkout", {})
|
|
836
|
+
if not isinstance(checkout, dict):
|
|
837
|
+
checkout = {}
|
|
838
|
+
store["checkout"] = checkout
|
|
839
|
+
local_paths = checkout.setdefault("local_paths", {})
|
|
840
|
+
if not isinstance(local_paths, dict):
|
|
841
|
+
local_paths = {}
|
|
842
|
+
checkout["local_paths"] = local_paths
|
|
843
|
+
local_paths[job_name] = paths
|
|
844
|
+
_save_glue_set_store(store)
|
|
845
|
+
return paths
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
def _apply_saved_params_to_config_path(
|
|
849
|
+
*,
|
|
850
|
+
config_path: Path,
|
|
851
|
+
config_data: dict[str, Any],
|
|
852
|
+
job_name: str,
|
|
853
|
+
profile: Optional[str],
|
|
854
|
+
dry_run: bool,
|
|
855
|
+
) -> list[str]:
|
|
856
|
+
saved_params = _get_saved_params_for_job(job_name, profile=profile)
|
|
857
|
+
if not saved_params:
|
|
858
|
+
return []
|
|
859
|
+
changes = _apply_saved_params_to_config(config_data, saved_params)
|
|
860
|
+
if not changes:
|
|
861
|
+
return []
|
|
862
|
+
profile_label = f" for profile {profile}" if profile else ""
|
|
863
|
+
if dry_run:
|
|
864
|
+
typer.echo(f"Would apply saved params{profile_label}: {job_name}")
|
|
865
|
+
else:
|
|
866
|
+
config_path.write_text(json.dumps(config_data, indent=4))
|
|
867
|
+
typer.echo(f"Applied saved params{profile_label}: {job_name}")
|
|
868
|
+
return changes
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
def _resolve_checkout_job_name(
|
|
872
|
+
job_name: str,
|
|
873
|
+
*,
|
|
874
|
+
config_dir: Path,
|
|
875
|
+
) -> tuple[str, str]:
|
|
876
|
+
normalized_job_name = job_name.strip()
|
|
877
|
+
if not normalized_job_name:
|
|
878
|
+
raise typer.BadParameter("Job name must be a non-empty string.")
|
|
879
|
+
|
|
880
|
+
config_index = _load_config_index(config_dir)
|
|
881
|
+
if normalized_job_name in config_index:
|
|
882
|
+
return normalized_job_name, "local"
|
|
883
|
+
|
|
884
|
+
return normalized_job_name, "offline"
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
def _coerce_set_value(raw: str) -> Any:
|
|
888
|
+
value = raw.strip()
|
|
889
|
+
lower = value.lower()
|
|
890
|
+
if lower == "true":
|
|
891
|
+
return True
|
|
892
|
+
if lower == "false":
|
|
893
|
+
return False
|
|
894
|
+
if lower in {"null", "none"}:
|
|
895
|
+
return None
|
|
896
|
+
if (value.startswith("{") and value.endswith("}")) or (
|
|
897
|
+
value.startswith("[") and value.endswith("]")
|
|
898
|
+
):
|
|
899
|
+
try:
|
|
900
|
+
return json.loads(value)
|
|
901
|
+
except json.JSONDecodeError:
|
|
902
|
+
return raw
|
|
903
|
+
try:
|
|
904
|
+
return int(value)
|
|
905
|
+
except ValueError:
|
|
906
|
+
pass
|
|
907
|
+
try:
|
|
908
|
+
return float(value)
|
|
909
|
+
except ValueError:
|
|
910
|
+
return raw
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
def _format_csv_item(item: str) -> str:
|
|
914
|
+
if any(ch in item for ch in [",", '"']):
|
|
915
|
+
escaped = item.replace('"', '""')
|
|
916
|
+
return f'"{escaped}"'
|
|
917
|
+
return item
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
def _set_if_changed(
|
|
921
|
+
container: dict[str, Any], key: str, value: Any, changes: list[str]
|
|
922
|
+
) -> None:
|
|
923
|
+
if container.get(key) != value:
|
|
924
|
+
container[key] = value
|
|
925
|
+
changes.append(key)
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
def _write_config_changes(
|
|
929
|
+
config_path: Path,
|
|
930
|
+
config_data: dict[str, Any],
|
|
931
|
+
changes: list[str],
|
|
932
|
+
*,
|
|
933
|
+
dry_run: bool,
|
|
934
|
+
) -> None:
|
|
935
|
+
if not changes:
|
|
936
|
+
typer.echo("No changes needed.")
|
|
937
|
+
return
|
|
938
|
+
|
|
939
|
+
actionable_changes = [
|
|
940
|
+
change for change in changes if not change.startswith("Skipped ")
|
|
941
|
+
]
|
|
942
|
+
if not actionable_changes:
|
|
943
|
+
typer.echo("No changes needed.")
|
|
944
|
+
for change in changes:
|
|
945
|
+
typer.echo(f"- {change}")
|
|
946
|
+
return
|
|
947
|
+
|
|
948
|
+
if dry_run:
|
|
949
|
+
typer.echo(f"Would update {config_path}:")
|
|
950
|
+
for change in changes:
|
|
951
|
+
typer.echo(f"- {change}")
|
|
952
|
+
return
|
|
953
|
+
|
|
954
|
+
config_path.write_text(json.dumps(config_data, indent=4))
|
|
955
|
+
typer.echo(f"Updated {config_path}:")
|
|
956
|
+
for change in changes:
|
|
957
|
+
typer.echo(f"- {change}")
|
|
958
|
+
|
|
959
|
+
|
|
960
|
+
def _to_csv_if_list(value: Any) -> Any:
|
|
961
|
+
if isinstance(value, list):
|
|
962
|
+
return ",".join(str(item) for item in value)
|
|
963
|
+
return value
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
__all__ = [
|
|
967
|
+
"_examples_epilog",
|
|
968
|
+
"_find_workspace_root",
|
|
969
|
+
"run_command",
|
|
970
|
+
"_emit_compatibility_notice",
|
|
971
|
+
"_raise_missing_local_config",
|
|
972
|
+
"_looks_like_remote_module_spec",
|
|
973
|
+
"_routes_to_additional_python_modules",
|
|
974
|
+
"_collect_config_local_artifact_paths",
|
|
975
|
+
"_set_saved_scope",
|
|
976
|
+
"_load_config_index",
|
|
977
|
+
"_parse_s3_url",
|
|
978
|
+
"_find_sequence",
|
|
979
|
+
"_derive_s3_target",
|
|
980
|
+
"_load_glue_set_store",
|
|
981
|
+
"_save_glue_set_store",
|
|
982
|
+
"_resolve_single_job_name",
|
|
983
|
+
"_get_saved_params_for_job",
|
|
984
|
+
"_get_checked_out_profile",
|
|
985
|
+
"_get_local_checkouts",
|
|
986
|
+
"_save_local_checkout",
|
|
987
|
+
"_get_checked_out_local_setup_name",
|
|
988
|
+
"_get_checked_out_local_setup",
|
|
989
|
+
"_set_checked_out_local_setup",
|
|
990
|
+
"_get_config_field_path",
|
|
991
|
+
"_profile_param_expected_fields",
|
|
992
|
+
"_collect_profile_mapping_mismatches",
|
|
993
|
+
"_validate_profile_mappings_align_with_config",
|
|
994
|
+
"_apply_saved_params_to_config",
|
|
995
|
+
"_get_checked_out_jobs",
|
|
996
|
+
"_save_checked_out_jobs",
|
|
997
|
+
"_clear_checked_out_jobs",
|
|
998
|
+
"_save_checkout_local_paths",
|
|
999
|
+
"_apply_saved_params_to_config_path",
|
|
1000
|
+
"_resolve_checkout_job_name",
|
|
1001
|
+
"_coerce_set_value",
|
|
1002
|
+
"_format_csv_item",
|
|
1003
|
+
"_set_if_changed",
|
|
1004
|
+
"_write_config_changes",
|
|
1005
|
+
"_to_csv_if_list",
|
|
1006
|
+
"_parse_datetime",
|
|
1007
|
+
"_get_local_last_modified",
|
|
1008
|
+
"_write_glue_job_list_csv",
|
|
1009
|
+
]
|
|
1010
|
+
|
|
1011
|
+
|
|
1012
|
+
def _parse_datetime(value: Any) -> Optional[datetime]:
|
|
1013
|
+
if not value:
|
|
1014
|
+
return None
|
|
1015
|
+
if isinstance(value, datetime):
|
|
1016
|
+
parsed = value
|
|
1017
|
+
elif isinstance(value, (int, float)):
|
|
1018
|
+
try:
|
|
1019
|
+
parsed = datetime.fromtimestamp(value, tz=timezone.utc)
|
|
1020
|
+
except (OverflowError, OSError, ValueError):
|
|
1021
|
+
return None
|
|
1022
|
+
elif isinstance(value, str):
|
|
1023
|
+
text = value.strip()
|
|
1024
|
+
if not text:
|
|
1025
|
+
return None
|
|
1026
|
+
if text.endswith("Z"):
|
|
1027
|
+
text = f"{text[:-1]}+00:00"
|
|
1028
|
+
try:
|
|
1029
|
+
parsed = datetime.fromisoformat(text)
|
|
1030
|
+
except ValueError:
|
|
1031
|
+
return None
|
|
1032
|
+
else:
|
|
1033
|
+
return None
|
|
1034
|
+
|
|
1035
|
+
if parsed.tzinfo is None:
|
|
1036
|
+
return parsed.replace(tzinfo=timezone.utc)
|
|
1037
|
+
return parsed.astimezone(timezone.utc)
|
|
1038
|
+
|
|
1039
|
+
|
|
1040
|
+
def _get_local_last_modified(
|
|
1041
|
+
config_path: Path, config_data: dict[str, Any]
|
|
1042
|
+
) -> Optional[datetime]:
|
|
1043
|
+
parsed = _parse_datetime(config_data.get("LastModifiedOn"))
|
|
1044
|
+
if parsed:
|
|
1045
|
+
return parsed
|
|
1046
|
+
if config_path.exists():
|
|
1047
|
+
return datetime.fromtimestamp(config_path.stat().st_mtime, tz=timezone.utc)
|
|
1048
|
+
return None
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
def _write_glue_job_list_csv(jobs: list[dict[str, Any]], output_path: Path) -> None:
|
|
1052
|
+
fieldnames: list[str] = []
|
|
1053
|
+
for job in jobs:
|
|
1054
|
+
if not isinstance(job, Mapping):
|
|
1055
|
+
continue
|
|
1056
|
+
for key in job.keys():
|
|
1057
|
+
if key not in fieldnames:
|
|
1058
|
+
fieldnames.append(str(key))
|
|
1059
|
+
|
|
1060
|
+
with output_path.open("w", newline="", encoding="utf-8") as handle:
|
|
1061
|
+
if not fieldnames:
|
|
1062
|
+
handle.write("")
|
|
1063
|
+
return
|
|
1064
|
+
writer = csv.DictWriter(handle, fieldnames=fieldnames, extrasaction="ignore")
|
|
1065
|
+
writer.writeheader()
|
|
1066
|
+
for job in jobs:
|
|
1067
|
+
if isinstance(job, Mapping):
|
|
1068
|
+
writer.writerow(job)
|