gluekit 1.0.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. gluekit/__init__.py +7 -0
  2. gluekit/app.py +0 -0
  3. gluekit/cli.py +64 -0
  4. gluekit/commands/__init__.py +1 -0
  5. gluekit/commands/add.py +455 -0
  6. gluekit/commands/build.py +816 -0
  7. gluekit/commands/checkout.py +114 -0
  8. gluekit/commands/clone.py +516 -0
  9. gluekit/commands/config_commands.py +180 -0
  10. gluekit/commands/constants.py +47 -0
  11. gluekit/commands/convert.py +336 -0
  12. gluekit/commands/edit.py +1104 -0
  13. gluekit/commands/helpers.py +1068 -0
  14. gluekit/commands/init.py +798 -0
  15. gluekit/commands/list.py +16 -0
  16. gluekit/commands/local_commands.py +680 -0
  17. gluekit/commands/pull.py +374 -0
  18. gluekit/commands/push.py +251 -0
  19. gluekit/commands/remove.py +161 -0
  20. gluekit/commands/run.py +126 -0
  21. gluekit/commands/status.py +97 -0
  22. gluekit/commands/sync.py +97 -0
  23. gluekit/commands/update.py +104 -0
  24. gluekit/job_mgmt/__init__.py +0 -0
  25. gluekit/job_mgmt/glue_jobs.py +1323 -0
  26. gluekit/job_mgmt/magics.py +122 -0
  27. gluekit/job_mgmt/resources/__init__.py +0 -0
  28. gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
  29. gluekit/job_mgmt/resources/magic_map.json +83 -0
  30. gluekit/job_mgmt/schema.py +165 -0
  31. gluekit/local/__init__.py +6 -0
  32. gluekit/local/awsglue/__init__.py +1 -0
  33. gluekit/local/awsglue/context.py +30 -0
  34. gluekit/local/awsglue/job.py +9 -0
  35. gluekit/local/awsglue/utils.py +17 -0
  36. gluekit/local/local.py +434 -0
  37. gluekit/local/local_fixtures.py +337 -0
  38. gluekit/local/pyspark/__init__.py +7 -0
  39. gluekit/local/pyspark/context.py +31 -0
  40. gluekit/local/pyspark/sql/__init__.py +6 -0
  41. gluekit/local/pyspark/sql/session.py +29 -0
  42. gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
  43. gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
  44. gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
  45. gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
  46. gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1068 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import json
5
+ import re
6
+ import subprocess
7
+ from collections.abc import Mapping
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import Any, Optional
11
+
12
+ import typer
13
+ from slugify import slugify
14
+
15
+ from ..job_mgmt.glue_jobs import (
16
+ normalize_glue_config_data,
17
+ _resolve_notebook_path,
18
+ )
19
+
20
+ from .constants import GLUE_SET_FILE
21
+
22
+
23
+ def _examples_epilog(*lines: str) -> str:
24
+ examples = "\n\n".join(f"- `{line.strip()}`" for line in lines)
25
+ return f"\n\n**Examples**\n\n{examples}"
26
+
27
+
28
+ def _find_workspace_root(start: Optional[Path] = None) -> Path:
29
+ current = (start or Path.cwd()).resolve()
30
+ for candidate in [current, *current.parents]:
31
+ if (candidate / "pyproject.toml").exists():
32
+ return candidate
33
+ return current
34
+
35
+
36
+ def run_command(
37
+ command: list[str],
38
+ cwd: Optional[Path] = None,
39
+ dry_run: bool = False,
40
+ verbose: bool = False,
41
+ ) -> None:
42
+ display = " ".join(command)
43
+ if dry_run or verbose:
44
+ typer.echo(f"{'Would run' if dry_run else 'Running'}: {display}")
45
+ if dry_run:
46
+ return
47
+ try:
48
+ subprocess.run(
49
+ command,
50
+ cwd=str(cwd) if cwd else None,
51
+ check=True,
52
+ text=True,
53
+ )
54
+ except FileNotFoundError as exc:
55
+ raise typer.BadParameter(f"Command not found: {command[0]}") from exc
56
+ except subprocess.CalledProcessError as exc:
57
+ raise typer.Exit(code=exc.returncode) from exc
58
+
59
+
60
+ def _emit_compatibility_notice(command_name: str, replacement_hint: str) -> None:
61
+ typer.echo(
62
+ f"Note: 'gluekit {command_name}' is kept for compatibility. Prefer 'gluekit edit' {replacement_hint}."
63
+ )
64
+
65
+
66
+ def _raise_missing_local_config(
67
+ job_name: str, config_dir: Path, command_label: str
68
+ ) -> None:
69
+ checked_out_jobs = _get_checked_out_jobs()
70
+ if job_name in checked_out_jobs:
71
+ raise typer.BadParameter(
72
+ f'No local config files matched "{job_name}" in {config_dir}. '
73
+ f"This checked-out job is selected for local work, but it does not have a local config yet. "
74
+ f"Create or clone a config before running {command_label}."
75
+ )
76
+ raise typer.BadParameter(f'No config files matched "{job_name}" in {config_dir}.')
77
+
78
+
79
+ def _looks_like_remote_module_spec(item: str) -> bool:
80
+ normalized = item.strip()
81
+ if not normalized:
82
+ return False
83
+ return bool(re.match(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", normalized))
84
+
85
+
86
+ def _routes_to_additional_python_modules(path: Path) -> bool:
87
+ return path.suffix.lower() == ".whl"
88
+
89
+
90
+ def _collect_config_local_artifact_paths(
91
+ config_data: dict[str, Any],
92
+ *,
93
+ job_name: str,
94
+ include_additional_python_files: bool,
95
+ include_extra_files: bool,
96
+ ) -> list[tuple[str, Path]]:
97
+ sc = config_data.get("SourceControlDetails", {}) or {}
98
+ if not isinstance(sc, dict):
99
+ sc = {}
100
+
101
+ script_path = Path(
102
+ sc.get("ScriptLocation")
103
+ or sc.get("LocalPath")
104
+ or f"glue/scripts/{slugify(job_name)}.py"
105
+ )
106
+ notebook_value = sc.get("NotebookLocation") or sc.get("NotebookPath")
107
+ notebook_path = (
108
+ Path(notebook_value)
109
+ if isinstance(notebook_value, str)
110
+ else Path(_resolve_notebook_path(script_path))
111
+ )
112
+
113
+ artifacts: list[tuple[str, Path]] = [
114
+ ("script", script_path),
115
+ ("notebook", notebook_path),
116
+ ]
117
+
118
+ def append_entries(label: str, value: Any, kind: str) -> None:
119
+ entries = value
120
+ if entries is None:
121
+ return
122
+ if isinstance(entries, dict):
123
+ entries = [entries]
124
+ if not isinstance(entries, list):
125
+ return
126
+ for entry in entries:
127
+ if not isinstance(entry, dict):
128
+ continue
129
+ local_path = (
130
+ entry.get("LocalPath")
131
+ or entry.get("local_path")
132
+ or entry.get("localPath")
133
+ )
134
+ if isinstance(local_path, str) and local_path.strip():
135
+ artifacts.append((kind, Path(local_path)))
136
+
137
+ if include_additional_python_files:
138
+ append_entries(
139
+ "AdditionalPythonFiles",
140
+ sc.get("AdditionalPythonFiles"),
141
+ "additional-python-file",
142
+ )
143
+ append_entries("ExtraPyFiles", sc.get("ExtraPyFiles"), "extra-py-file")
144
+ if include_extra_files:
145
+ append_entries("AdditionalFiles", sc.get("AdditionalFiles"), "extra-file")
146
+ append_entries("ExtraFiles", sc.get("ExtraFiles"), "extra-file")
147
+
148
+ return artifacts
149
+
150
+
151
+ def _set_saved_scope(
152
+ parsed: dict[str, Any],
153
+ job_name: Optional[str],
154
+ global_scope: bool,
155
+ profile: Optional[str] = None,
156
+ ) -> str:
157
+ if global_scope and job_name:
158
+ raise typer.BadParameter("Use either <job-name> or --global, not both.")
159
+ store = _load_glue_set_store()
160
+
161
+ profile_name = profile.strip() if isinstance(profile, str) else None
162
+ if profile_name:
163
+ profiles = store.setdefault("profiles", {})
164
+ profile_store = profiles.setdefault(profile_name, {})
165
+ if not isinstance(profile_store, dict):
166
+ profile_store = {}
167
+ profiles[profile_name] = profile_store
168
+ if not isinstance(profile_store.get("global"), dict):
169
+ profile_store["global"] = {}
170
+ if not isinstance(profile_store.get("jobs"), dict):
171
+ profile_store["jobs"] = {}
172
+ base = profile_store
173
+ else:
174
+ base = store
175
+
176
+ if global_scope:
177
+ target = f"profile:{profile_name}:global" if profile_name else "global"
178
+ scope = base.setdefault("global", {})
179
+ else:
180
+ if not job_name:
181
+ raise typer.BadParameter("Provide <job-name> or use --global.")
182
+ target = f"profile:{profile_name}:{job_name}" if profile_name else job_name
183
+ jobs = base.setdefault("jobs", {})
184
+ scope = jobs.setdefault(job_name, {})
185
+
186
+ if not isinstance(scope, dict):
187
+ scope = {}
188
+ if global_scope:
189
+ base["global"] = scope
190
+ else:
191
+ base.setdefault("jobs", {})[job_name] = scope
192
+
193
+ scope.update(parsed)
194
+ _save_glue_set_store(store)
195
+ return target
196
+
197
+
198
+ def _load_config_index(config_dir: Path) -> dict[str, dict[str, Any]]:
199
+ config_index: dict[str, dict[str, Any]] = {}
200
+ for config_path in config_dir.glob("*.json"):
201
+ try:
202
+ config_data = normalize_glue_config_data(
203
+ json.loads(config_path.read_text())
204
+ )
205
+ except json.JSONDecodeError:
206
+ continue
207
+ job_name = config_data.get("Name")
208
+ if job_name:
209
+ config_index[job_name] = {
210
+ "config_path": config_path,
211
+ "config": config_data,
212
+ }
213
+ return config_index
214
+
215
+
216
+ def _parse_s3_url(s3_url: str) -> tuple[str, str]:
217
+ if not s3_url.startswith("s3://"):
218
+ raise typer.BadParameter(f"Invalid S3 URL: {s3_url}")
219
+ bucket_key = s3_url[5:]
220
+ if "/" not in bucket_key:
221
+ raise typer.BadParameter(f"Invalid S3 URL (missing key): {s3_url}")
222
+ bucket, key = bucket_key.split("/", 1)
223
+ return bucket, key
224
+
225
+
226
+ def _find_sequence(parts: tuple[str, ...], sequence: tuple[str, ...]) -> Optional[int]:
227
+ if not sequence:
228
+ return None
229
+ for idx in range(len(parts) - len(sequence) + 1):
230
+ if parts[idx : idx + len(sequence)] == sequence:
231
+ return idx
232
+ return None
233
+
234
+
235
+ def _derive_s3_target(
236
+ local_path: Path,
237
+ job_name: str,
238
+ script_location: str,
239
+ local_script_path: Path,
240
+ ) -> str:
241
+ bucket, script_key = _parse_s3_url(script_location)
242
+ script_dir_parts = Path(script_key).parent.parts
243
+ local_script_dir_parts = local_script_path.parent.parts
244
+ local_root = None
245
+
246
+ if script_dir_parts:
247
+ idx = _find_sequence(local_script_dir_parts, script_dir_parts)
248
+ if idx is not None:
249
+ local_root = Path(*local_script_dir_parts[: idx + len(script_dir_parts)])
250
+
251
+ if local_root and local_path.is_relative_to(local_root):
252
+ rel = local_path.relative_to(local_root)
253
+ s3_key = Path(*script_dir_parts) / rel
254
+ return f"s3://{bucket}/{s3_key.as_posix()}"
255
+
256
+ if local_path.parts and local_path.parts[0] == "glue":
257
+ rel = (
258
+ Path(*local_path.parts[1:])
259
+ if len(local_path.parts) > 1
260
+ else Path(local_path.name)
261
+ )
262
+ return f"s3://{bucket}/{rel.as_posix()}"
263
+
264
+ job_candidates = {job_name, slugify(job_name)}
265
+ for candidate in job_candidates:
266
+ if candidate in local_path.parts:
267
+ idx = local_path.parts.index(candidate)
268
+ s3_key = Path(*local_path.parts[idx:])
269
+ return f"s3://{bucket}/{s3_key.as_posix()}"
270
+
271
+ if not local_path.is_absolute() and len(local_path.parts) > 1:
272
+ return f"s3://{bucket}/{local_path.as_posix()}"
273
+
274
+ return f"s3://{bucket}/{local_path.name}"
275
+
276
+
277
+ def _load_glue_set_store() -> dict[str, Any]:
278
+ if not GLUE_SET_FILE.exists():
279
+ return {
280
+ "global": {},
281
+ "jobs": {},
282
+ "profiles": {},
283
+ "checkout": {},
284
+ }
285
+ try:
286
+ data = json.loads(GLUE_SET_FILE.read_text())
287
+ except json.JSONDecodeError:
288
+ return {
289
+ "global": {},
290
+ "jobs": {},
291
+ "profiles": {},
292
+ "checkout": {},
293
+ }
294
+ if not isinstance(data, dict):
295
+ return {
296
+ "global": {},
297
+ "jobs": {},
298
+ "profiles": {},
299
+ "checkout": {},
300
+ }
301
+ if not isinstance(data.get("global"), dict):
302
+ data["global"] = {}
303
+ if not isinstance(data.get("jobs"), dict):
304
+ data["jobs"] = {}
305
+ if not isinstance(data.get("profiles"), dict):
306
+ data["profiles"] = {}
307
+ for profile_name, profile_data in list(data["profiles"].items()):
308
+ if not isinstance(profile_name, str) or not isinstance(profile_data, dict):
309
+ data["profiles"].pop(profile_name, None)
310
+ continue
311
+ if not isinstance(profile_data.get("global"), dict):
312
+ profile_data["global"] = {}
313
+ if not isinstance(profile_data.get("jobs"), dict):
314
+ profile_data["jobs"] = {}
315
+ if not isinstance(data.get("checkout"), dict):
316
+ data["checkout"] = {}
317
+ data.pop("local_checkouts", None)
318
+ _normalize_checkout_local(data["checkout"])
319
+ return data
320
+
321
+
322
+ def _normalize_checkout_local(checkout: dict[str, Any]) -> None:
323
+ local_value = checkout.get("local")
324
+ if isinstance(local_value, dict):
325
+ local_data = {
326
+ key: value.strip()
327
+ for key, value in local_value.items()
328
+ if isinstance(key, str) and isinstance(value, str) and value.strip()
329
+ }
330
+ if isinstance(local_data.get("name"), str):
331
+ checkout["local"] = local_data
332
+ return
333
+
334
+ checkout.pop("local", None)
335
+
336
+
337
+ def _save_glue_set_store(data: dict[str, Any]) -> None:
338
+ GLUE_SET_FILE.parent.mkdir(parents=True, exist_ok=True)
339
+ GLUE_SET_FILE.write_text(json.dumps(data, indent=4))
340
+
341
+
342
+ def _resolve_single_job_name(job_name: Optional[str], context_label: str) -> str:
343
+ if job_name:
344
+ return job_name
345
+
346
+ checked_out_jobs = _get_checked_out_jobs()
347
+ if not checked_out_jobs:
348
+ raise typer.BadParameter(
349
+ f"Provide <job-name> or run 'gluekit checkout <job-name>' before {context_label}."
350
+ )
351
+ if len(checked_out_jobs) != 1:
352
+ raise typer.BadParameter(
353
+ f"{context_label} requires a single active checkout selection; found {len(checked_out_jobs)} jobs in {GLUE_SET_FILE}."
354
+ )
355
+ return checked_out_jobs[0]
356
+
357
+
358
+ def _get_saved_params_for_job(
359
+ job_name: str, profile: Optional[str] = None
360
+ ) -> dict[str, Any]:
361
+ store = _load_glue_set_store()
362
+ global_params = store.get("global", {})
363
+ job_params = store.get("jobs", {}).get(job_name, {})
364
+ profile_name = profile.strip() if isinstance(profile, str) else None
365
+ profile_store = store.get("profiles", {}).get(profile_name, {})
366
+ profile_global_params = {}
367
+ profile_job_params = {}
368
+ if isinstance(profile_store, dict):
369
+ profile_global_params = profile_store.get("global", {})
370
+ profile_job_params = profile_store.get("jobs", {}).get(job_name, {})
371
+
372
+ merged: dict[str, Any] = {}
373
+ if isinstance(global_params, dict):
374
+ merged.update(global_params)
375
+ if isinstance(job_params, dict):
376
+ merged.update(job_params)
377
+ if isinstance(profile_global_params, dict):
378
+ merged.update(profile_global_params)
379
+ if isinstance(profile_job_params, dict):
380
+ merged.update(profile_job_params)
381
+ return merged
382
+
383
+
384
+ def _get_checked_out_profile() -> Optional[str]:
385
+ store = _load_glue_set_store()
386
+ checkout = store.get("checkout", {})
387
+ if not isinstance(checkout, dict):
388
+ return None
389
+ profile = checkout.get("profile")
390
+ if isinstance(profile, str) and profile.strip():
391
+ return profile.strip()
392
+ return None
393
+
394
+
395
+ def _get_local_checkouts() -> dict[str, dict[str, Any]]:
396
+ local_setup = _get_checked_out_local_setup()
397
+ if not local_setup:
398
+ return {}
399
+ name = local_setup.get("name")
400
+ if not isinstance(name, str) or not name.strip():
401
+ return {}
402
+ return {name: local_setup}
403
+
404
+
405
+ def _save_local_checkout(name: str, data: dict[str, Any]) -> None:
406
+ normalized_name = name.strip()
407
+ if not normalized_name:
408
+ raise typer.BadParameter("Local setup name must be a non-empty string.")
409
+ store = _load_glue_set_store()
410
+ checkout = store.setdefault("checkout", {})
411
+ if not isinstance(checkout, dict):
412
+ checkout = {}
413
+ store["checkout"] = checkout
414
+ checkout["local"] = {
415
+ "name": normalized_name,
416
+ **{
417
+ key: value.strip()
418
+ for key, value in data.items()
419
+ if isinstance(key, str) and isinstance(value, str) and value.strip()
420
+ },
421
+ }
422
+ _save_glue_set_store(store)
423
+
424
+
425
+ def _get_checked_out_local_setup_name() -> Optional[str]:
426
+ store = _load_glue_set_store()
427
+ checkout = store.get("checkout", {})
428
+ if not isinstance(checkout, dict):
429
+ return None
430
+ local_name = checkout.get("local")
431
+ if isinstance(local_name, dict):
432
+ name = local_name.get("name")
433
+ if isinstance(name, str) and name.strip():
434
+ return name.strip()
435
+ return None
436
+
437
+
438
+ def _get_checked_out_local_setup() -> Optional[dict[str, Any]]:
439
+ store = _load_glue_set_store()
440
+ checkout = store.get("checkout", {})
441
+ if not isinstance(checkout, dict):
442
+ return None
443
+ local_setup = checkout.get("local")
444
+ if not isinstance(local_setup, dict):
445
+ return None
446
+ name = local_setup.get("name")
447
+ if not isinstance(name, str) or not name.strip():
448
+ return None
449
+ return dict(local_setup)
450
+
451
+
452
+ def _set_checked_out_local_setup(name: str) -> None:
453
+ normalized_name = name.strip()
454
+ if not normalized_name:
455
+ raise typer.BadParameter("Local setup name must be a non-empty string.")
456
+ store = _load_glue_set_store()
457
+ checkout = store.setdefault("checkout", {})
458
+ if not isinstance(checkout, dict):
459
+ checkout = {}
460
+ store["checkout"] = checkout
461
+ existing_local = checkout.get("local")
462
+ next_local: dict[str, str] = {"name": normalized_name}
463
+ if isinstance(existing_local, dict):
464
+ for key in ("profile", "job", "mode"):
465
+ value = existing_local.get(key)
466
+ if isinstance(value, str) and value.strip():
467
+ next_local[key] = value.strip()
468
+ checkout["local"] = next_local
469
+ _save_glue_set_store(store)
470
+
471
+
472
+ def _parse_config_field_path(field_path: str) -> list[str | int]:
473
+ parts: list[str | int] = []
474
+ for raw_part in field_path.split("."):
475
+ if not raw_part:
476
+ return []
477
+ name, bracket, remainder = raw_part.partition("[")
478
+ if name:
479
+ parts.append(name)
480
+ while bracket:
481
+ index_text, closing, remainder = remainder.partition("]")
482
+ if closing != "]" or not index_text.isdigit():
483
+ return []
484
+ parts.append(int(index_text))
485
+ bracket = ""
486
+ if remainder:
487
+ if not remainder.startswith("["):
488
+ return []
489
+ bracket = "["
490
+ remainder = remainder[1:]
491
+ return parts
492
+
493
+
494
+ def _set_config_field_path(
495
+ config_data: dict[str, Any],
496
+ field_path: str,
497
+ value: Any,
498
+ changes: list[str],
499
+ ) -> bool:
500
+ parts = _parse_config_field_path(field_path)
501
+ if not parts:
502
+ return False
503
+
504
+ current: Any = config_data
505
+ for index, part in enumerate(parts[:-1]):
506
+ next_part = parts[index + 1]
507
+ if isinstance(part, int):
508
+ if not isinstance(current, list) or part >= len(current):
509
+ return False
510
+ current = current[part]
511
+ continue
512
+
513
+ if not isinstance(current, dict):
514
+ return False
515
+ if part not in current:
516
+ if isinstance(next_part, int):
517
+ return False
518
+ current[part] = {}
519
+ current = current[part]
520
+
521
+ final_part = parts[-1]
522
+ if isinstance(final_part, int):
523
+ if not isinstance(current, list) or final_part >= len(current):
524
+ return False
525
+ if current[final_part] != value:
526
+ current[final_part] = value
527
+ changes.append(field_path)
528
+ return True
529
+
530
+ if not isinstance(current, dict):
531
+ return False
532
+ _set_if_changed(current, final_part, value, changes)
533
+ return True
534
+
535
+
536
+ def _saved_param_config_value(value: Any) -> Any:
537
+ if (
538
+ isinstance(value, dict)
539
+ and isinstance(value.get("remote"), str)
540
+ and "local" in value
541
+ ):
542
+ return value["remote"]
543
+ return _to_csv_if_list(value)
544
+
545
+
546
+ def _get_config_field_path(config_data: dict[str, Any], field_path: str) -> Any:
547
+ parts = _parse_config_field_path(field_path)
548
+ if not parts:
549
+ return None
550
+
551
+ current: Any = config_data
552
+ for part in parts:
553
+ if isinstance(part, int):
554
+ if not isinstance(current, list) or part >= len(current):
555
+ return None
556
+ current = current[part]
557
+ continue
558
+ if not isinstance(current, dict) or part not in current:
559
+ return None
560
+ current = current[part]
561
+ return current
562
+
563
+
564
+ def _profile_param_expected_fields(key: str, value: Any) -> list[tuple[str, Any]]:
565
+ normalized_key = key.strip()
566
+ if normalized_key.startswith(
567
+ (
568
+ "Command.",
569
+ "DefaultArguments.",
570
+ "NonOverridableArguments.",
571
+ "SourceControlDetails.",
572
+ )
573
+ ):
574
+ expected = (
575
+ _saved_param_config_value(value)
576
+ if normalized_key.startswith(
577
+ ("DefaultArguments.", "NonOverridableArguments.")
578
+ )
579
+ else value
580
+ )
581
+ return [(normalized_key, expected)]
582
+ if normalized_key in {"script_location", "local_script_path", "local_path"}:
583
+ return [("SourceControlDetails.ScriptLocation", str(value))]
584
+ if normalized_key in {"notebook_location", "notebook_path"}:
585
+ expected = str(value)
586
+ return [
587
+ ("SourceControlDetails.NotebookLocation", expected),
588
+ ("SourceControlDetails.NotebookPath", expected),
589
+ ]
590
+ if normalized_key in {"command_script_location", "remote_script_location"}:
591
+ return [("Command.ScriptLocation", str(value))]
592
+ if normalized_key == "extra_py_files":
593
+ return [("DefaultArguments.--extra-py-files", _to_csv_if_list(value))]
594
+ if normalized_key == "extra_files":
595
+ return [("DefaultArguments.--extra-files", _to_csv_if_list(value))]
596
+ if normalized_key == "additional_python_modules":
597
+ return [
598
+ (
599
+ "DefaultArguments.--additional-python-modules",
600
+ _saved_param_config_value(value),
601
+ )
602
+ ]
603
+ if normalized_key == "default_arguments" and isinstance(value, dict):
604
+ fields: list[tuple[str, Any]] = []
605
+ for arg_key, arg_value in value.items():
606
+ final_key = arg_key if arg_key.startswith("--") else f"--{arg_key}"
607
+ fields.append(
608
+ (
609
+ f"DefaultArguments.{final_key}",
610
+ _saved_param_config_value(arg_value),
611
+ )
612
+ )
613
+ return fields
614
+ if normalized_key.startswith("default_arguments."):
615
+ arg_key = normalized_key.split(".", 1)[1]
616
+ final_key = arg_key if arg_key.startswith("--") else f"--{arg_key}"
617
+ return [
618
+ (
619
+ f"DefaultArguments.{final_key}",
620
+ _saved_param_config_value(value),
621
+ )
622
+ ]
623
+ if normalized_key.startswith("source_control."):
624
+ sc_key = normalized_key.split(".", 1)[1]
625
+ return [(f"SourceControlDetails.{sc_key}", value)]
626
+ if normalized_key.startswith("command."):
627
+ cmd_key = normalized_key.split(".", 1)[1]
628
+ return [(f"Command.{cmd_key}", value)]
629
+ if normalized_key.startswith("--"):
630
+ return [(f"DefaultArguments.{normalized_key}", _to_csv_if_list(value))]
631
+ return [(normalized_key, value)]
632
+
633
+
634
+ def _collect_profile_mapping_mismatches(
635
+ config_data: dict[str, Any],
636
+ params: dict[str, Any],
637
+ ) -> list[str]:
638
+ mismatches: list[str] = []
639
+ for key, value in params.items():
640
+ for field_path, expected in _profile_param_expected_fields(key, value):
641
+ actual = _get_config_field_path(config_data, field_path)
642
+ if actual != expected:
643
+ mismatches.append(
644
+ f"{field_path}: expected {expected!r}, found {actual!r}"
645
+ )
646
+ return mismatches
647
+
648
+
649
+ def _validate_profile_mappings_align_with_config(
650
+ *,
651
+ job_name: str,
652
+ config_data: dict[str, Any],
653
+ profile: Optional[str],
654
+ success_message: Optional[str] = None,
655
+ ) -> None:
656
+ saved_params = _get_saved_params_for_job(job_name, profile=profile)
657
+ if not saved_params:
658
+ return
659
+
660
+ mismatches = _collect_profile_mapping_mismatches(config_data, saved_params)
661
+ profile_label = f" for profile {profile}" if profile else ""
662
+ if mismatches:
663
+ rendered = "\n".join(f"- {mismatch}" for mismatch in mismatches)
664
+ raise typer.BadParameter(
665
+ f"Profile mappings do not align with config{profile_label}: {job_name}\n"
666
+ f"{rendered}"
667
+ )
668
+
669
+ if success_message:
670
+ typer.echo(success_message)
671
+
672
+
673
+ def _apply_saved_params_to_config(
674
+ config_data: dict[str, Any], params: dict[str, Any]
675
+ ) -> list[str]:
676
+ changes: list[str] = []
677
+ default_args = config_data.setdefault("DefaultArguments", {})
678
+ source_control = config_data.setdefault("SourceControlDetails", {})
679
+ command = config_data.setdefault("Command", {})
680
+
681
+ for key, value in params.items():
682
+ normalized_key = key.strip()
683
+ if normalized_key.startswith(
684
+ (
685
+ "Command.",
686
+ "DefaultArguments.",
687
+ "NonOverridableArguments.",
688
+ "SourceControlDetails.",
689
+ )
690
+ ):
691
+ config_value = (
692
+ _saved_param_config_value(value)
693
+ if normalized_key.startswith(
694
+ ("DefaultArguments.", "NonOverridableArguments.")
695
+ )
696
+ else value
697
+ )
698
+ if not _set_config_field_path(
699
+ config_data, normalized_key, config_value, changes
700
+ ):
701
+ changes.append(f"Skipped unresolved saved param path: {normalized_key}")
702
+ elif normalized_key in {"script_location", "local_script_path", "local_path"}:
703
+ _set_if_changed(source_control, "ScriptLocation", str(value), changes)
704
+ elif normalized_key in {"notebook_location", "notebook_path"}:
705
+ _set_if_changed(source_control, "NotebookLocation", str(value), changes)
706
+ _set_if_changed(source_control, "NotebookPath", str(value), changes)
707
+ elif normalized_key in {"command_script_location", "remote_script_location"}:
708
+ _set_if_changed(command, "ScriptLocation", str(value), changes)
709
+ elif normalized_key == "extra_py_files":
710
+ _set_if_changed(
711
+ default_args, "--extra-py-files", _to_csv_if_list(value), changes
712
+ )
713
+ elif normalized_key == "extra_files":
714
+ _set_if_changed(
715
+ default_args, "--extra-files", _to_csv_if_list(value), changes
716
+ )
717
+ elif normalized_key == "additional_python_modules":
718
+ _set_if_changed(
719
+ default_args,
720
+ "--additional-python-modules",
721
+ _saved_param_config_value(value),
722
+ changes,
723
+ )
724
+ elif normalized_key == "default_arguments" and isinstance(value, dict):
725
+ for arg_key, arg_value in value.items():
726
+ final_key = arg_key if arg_key.startswith("--") else f"--{arg_key}"
727
+ _set_if_changed(
728
+ default_args,
729
+ final_key,
730
+ _saved_param_config_value(arg_value),
731
+ changes,
732
+ )
733
+ elif normalized_key.startswith("default_arguments."):
734
+ arg_key = normalized_key.split(".", 1)[1]
735
+ final_key = arg_key if arg_key.startswith("--") else f"--{arg_key}"
736
+ _set_if_changed(
737
+ default_args, final_key, _saved_param_config_value(value), changes
738
+ )
739
+ elif normalized_key.startswith("source_control."):
740
+ sc_key = normalized_key.split(".", 1)[1]
741
+ _set_if_changed(source_control, sc_key, value, changes)
742
+ elif normalized_key.startswith("command."):
743
+ cmd_key = normalized_key.split(".", 1)[1]
744
+ _set_if_changed(command, cmd_key, value, changes)
745
+ elif normalized_key.startswith("--"):
746
+ _set_if_changed(
747
+ default_args, normalized_key, _to_csv_if_list(value), changes
748
+ )
749
+ else:
750
+ _set_if_changed(config_data, normalized_key, value, changes)
751
+
752
+ return changes
753
+
754
+
755
+ def _get_checked_out_jobs() -> list[str]:
756
+ store = _load_glue_set_store()
757
+ checkout = store.get("checkout", {})
758
+ if not isinstance(checkout, dict):
759
+ return []
760
+ jobs = checkout.get("jobs")
761
+ if not isinstance(jobs, list):
762
+ return []
763
+ normalized: list[str] = []
764
+ for job_name in jobs:
765
+ if not isinstance(job_name, str):
766
+ continue
767
+ value = job_name.strip()
768
+ if value and value not in normalized:
769
+ normalized.append(value)
770
+ return normalized
771
+
772
+
773
+ def _save_checked_out_jobs(
774
+ job_names: list[str],
775
+ selector: str,
776
+ source: str = "local",
777
+ profile: Optional[str] = None,
778
+ ) -> None:
779
+ store = _load_glue_set_store()
780
+ checkout = {
781
+ "mode": "job",
782
+ "selector": selector,
783
+ "source": source,
784
+ "jobs": list(job_names),
785
+ }
786
+ existing_checkout = store.get("checkout", {})
787
+ if isinstance(existing_checkout, dict) and isinstance(
788
+ existing_checkout.get("local"), dict
789
+ ):
790
+ checkout["local"] = existing_checkout["local"]
791
+ profile_name = profile.strip() if isinstance(profile, str) else None
792
+ if profile_name:
793
+ checkout["profile"] = profile_name
794
+ store["checkout"] = checkout
795
+ _save_glue_set_store(store)
796
+
797
+
798
+ def _clear_checked_out_jobs() -> None:
799
+ store = _load_glue_set_store()
800
+ store["checkout"] = {}
801
+ _save_glue_set_store(store)
802
+
803
+
804
+ def _save_checkout_local_paths(
805
+ *,
806
+ job_name: str,
807
+ config_path: Path,
808
+ config_data: dict[str, Any],
809
+ dry_run: bool = False,
810
+ ) -> dict[str, str]:
811
+ sc = config_data.get("SourceControlDetails", {})
812
+ if not isinstance(sc, dict):
813
+ sc = {}
814
+ script_path = Path(
815
+ sc.get("ScriptLocation")
816
+ or sc.get("LocalPath")
817
+ or f"glue/scripts/{slugify(job_name)}.py"
818
+ )
819
+ notebook_value = sc.get("NotebookLocation") or sc.get("NotebookPath")
820
+ notebook_path = (
821
+ Path(notebook_value)
822
+ if isinstance(notebook_value, str)
823
+ else Path(_resolve_notebook_path(script_path))
824
+ )
825
+ paths = {
826
+ "config": config_path.as_posix(),
827
+ "script": script_path.as_posix(),
828
+ "notebook": notebook_path.as_posix(),
829
+ }
830
+ if dry_run:
831
+ typer.echo(f"Would save checkout local paths for {job_name}: {paths}")
832
+ return paths
833
+
834
+ store = _load_glue_set_store()
835
+ checkout = store.setdefault("checkout", {})
836
+ if not isinstance(checkout, dict):
837
+ checkout = {}
838
+ store["checkout"] = checkout
839
+ local_paths = checkout.setdefault("local_paths", {})
840
+ if not isinstance(local_paths, dict):
841
+ local_paths = {}
842
+ checkout["local_paths"] = local_paths
843
+ local_paths[job_name] = paths
844
+ _save_glue_set_store(store)
845
+ return paths
846
+
847
+
848
+ def _apply_saved_params_to_config_path(
849
+ *,
850
+ config_path: Path,
851
+ config_data: dict[str, Any],
852
+ job_name: str,
853
+ profile: Optional[str],
854
+ dry_run: bool,
855
+ ) -> list[str]:
856
+ saved_params = _get_saved_params_for_job(job_name, profile=profile)
857
+ if not saved_params:
858
+ return []
859
+ changes = _apply_saved_params_to_config(config_data, saved_params)
860
+ if not changes:
861
+ return []
862
+ profile_label = f" for profile {profile}" if profile else ""
863
+ if dry_run:
864
+ typer.echo(f"Would apply saved params{profile_label}: {job_name}")
865
+ else:
866
+ config_path.write_text(json.dumps(config_data, indent=4))
867
+ typer.echo(f"Applied saved params{profile_label}: {job_name}")
868
+ return changes
869
+
870
+
871
+ def _resolve_checkout_job_name(
872
+ job_name: str,
873
+ *,
874
+ config_dir: Path,
875
+ ) -> tuple[str, str]:
876
+ normalized_job_name = job_name.strip()
877
+ if not normalized_job_name:
878
+ raise typer.BadParameter("Job name must be a non-empty string.")
879
+
880
+ config_index = _load_config_index(config_dir)
881
+ if normalized_job_name in config_index:
882
+ return normalized_job_name, "local"
883
+
884
+ return normalized_job_name, "offline"
885
+
886
+
887
+ def _coerce_set_value(raw: str) -> Any:
888
+ value = raw.strip()
889
+ lower = value.lower()
890
+ if lower == "true":
891
+ return True
892
+ if lower == "false":
893
+ return False
894
+ if lower in {"null", "none"}:
895
+ return None
896
+ if (value.startswith("{") and value.endswith("}")) or (
897
+ value.startswith("[") and value.endswith("]")
898
+ ):
899
+ try:
900
+ return json.loads(value)
901
+ except json.JSONDecodeError:
902
+ return raw
903
+ try:
904
+ return int(value)
905
+ except ValueError:
906
+ pass
907
+ try:
908
+ return float(value)
909
+ except ValueError:
910
+ return raw
911
+
912
+
913
+ def _format_csv_item(item: str) -> str:
914
+ if any(ch in item for ch in [",", '"']):
915
+ escaped = item.replace('"', '""')
916
+ return f'"{escaped}"'
917
+ return item
918
+
919
+
920
+ def _set_if_changed(
921
+ container: dict[str, Any], key: str, value: Any, changes: list[str]
922
+ ) -> None:
923
+ if container.get(key) != value:
924
+ container[key] = value
925
+ changes.append(key)
926
+
927
+
928
+ def _write_config_changes(
929
+ config_path: Path,
930
+ config_data: dict[str, Any],
931
+ changes: list[str],
932
+ *,
933
+ dry_run: bool,
934
+ ) -> None:
935
+ if not changes:
936
+ typer.echo("No changes needed.")
937
+ return
938
+
939
+ actionable_changes = [
940
+ change for change in changes if not change.startswith("Skipped ")
941
+ ]
942
+ if not actionable_changes:
943
+ typer.echo("No changes needed.")
944
+ for change in changes:
945
+ typer.echo(f"- {change}")
946
+ return
947
+
948
+ if dry_run:
949
+ typer.echo(f"Would update {config_path}:")
950
+ for change in changes:
951
+ typer.echo(f"- {change}")
952
+ return
953
+
954
+ config_path.write_text(json.dumps(config_data, indent=4))
955
+ typer.echo(f"Updated {config_path}:")
956
+ for change in changes:
957
+ typer.echo(f"- {change}")
958
+
959
+
960
+ def _to_csv_if_list(value: Any) -> Any:
961
+ if isinstance(value, list):
962
+ return ",".join(str(item) for item in value)
963
+ return value
964
+
965
+
966
+ __all__ = [
967
+ "_examples_epilog",
968
+ "_find_workspace_root",
969
+ "run_command",
970
+ "_emit_compatibility_notice",
971
+ "_raise_missing_local_config",
972
+ "_looks_like_remote_module_spec",
973
+ "_routes_to_additional_python_modules",
974
+ "_collect_config_local_artifact_paths",
975
+ "_set_saved_scope",
976
+ "_load_config_index",
977
+ "_parse_s3_url",
978
+ "_find_sequence",
979
+ "_derive_s3_target",
980
+ "_load_glue_set_store",
981
+ "_save_glue_set_store",
982
+ "_resolve_single_job_name",
983
+ "_get_saved_params_for_job",
984
+ "_get_checked_out_profile",
985
+ "_get_local_checkouts",
986
+ "_save_local_checkout",
987
+ "_get_checked_out_local_setup_name",
988
+ "_get_checked_out_local_setup",
989
+ "_set_checked_out_local_setup",
990
+ "_get_config_field_path",
991
+ "_profile_param_expected_fields",
992
+ "_collect_profile_mapping_mismatches",
993
+ "_validate_profile_mappings_align_with_config",
994
+ "_apply_saved_params_to_config",
995
+ "_get_checked_out_jobs",
996
+ "_save_checked_out_jobs",
997
+ "_clear_checked_out_jobs",
998
+ "_save_checkout_local_paths",
999
+ "_apply_saved_params_to_config_path",
1000
+ "_resolve_checkout_job_name",
1001
+ "_coerce_set_value",
1002
+ "_format_csv_item",
1003
+ "_set_if_changed",
1004
+ "_write_config_changes",
1005
+ "_to_csv_if_list",
1006
+ "_parse_datetime",
1007
+ "_get_local_last_modified",
1008
+ "_write_glue_job_list_csv",
1009
+ ]
1010
+
1011
+
1012
+ def _parse_datetime(value: Any) -> Optional[datetime]:
1013
+ if not value:
1014
+ return None
1015
+ if isinstance(value, datetime):
1016
+ parsed = value
1017
+ elif isinstance(value, (int, float)):
1018
+ try:
1019
+ parsed = datetime.fromtimestamp(value, tz=timezone.utc)
1020
+ except (OverflowError, OSError, ValueError):
1021
+ return None
1022
+ elif isinstance(value, str):
1023
+ text = value.strip()
1024
+ if not text:
1025
+ return None
1026
+ if text.endswith("Z"):
1027
+ text = f"{text[:-1]}+00:00"
1028
+ try:
1029
+ parsed = datetime.fromisoformat(text)
1030
+ except ValueError:
1031
+ return None
1032
+ else:
1033
+ return None
1034
+
1035
+ if parsed.tzinfo is None:
1036
+ return parsed.replace(tzinfo=timezone.utc)
1037
+ return parsed.astimezone(timezone.utc)
1038
+
1039
+
1040
+ def _get_local_last_modified(
1041
+ config_path: Path, config_data: dict[str, Any]
1042
+ ) -> Optional[datetime]:
1043
+ parsed = _parse_datetime(config_data.get("LastModifiedOn"))
1044
+ if parsed:
1045
+ return parsed
1046
+ if config_path.exists():
1047
+ return datetime.fromtimestamp(config_path.stat().st_mtime, tz=timezone.utc)
1048
+ return None
1049
+
1050
+
1051
+ def _write_glue_job_list_csv(jobs: list[dict[str, Any]], output_path: Path) -> None:
1052
+ fieldnames: list[str] = []
1053
+ for job in jobs:
1054
+ if not isinstance(job, Mapping):
1055
+ continue
1056
+ for key in job.keys():
1057
+ if key not in fieldnames:
1058
+ fieldnames.append(str(key))
1059
+
1060
+ with output_path.open("w", newline="", encoding="utf-8") as handle:
1061
+ if not fieldnames:
1062
+ handle.write("")
1063
+ return
1064
+ writer = csv.DictWriter(handle, fieldnames=fieldnames, extrasaction="ignore")
1065
+ writer.writeheader()
1066
+ for job in jobs:
1067
+ if isinstance(job, Mapping):
1068
+ writer.writerow(job)