gluekit 1.0.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. gluekit/__init__.py +7 -0
  2. gluekit/app.py +0 -0
  3. gluekit/cli.py +64 -0
  4. gluekit/commands/__init__.py +1 -0
  5. gluekit/commands/add.py +455 -0
  6. gluekit/commands/build.py +816 -0
  7. gluekit/commands/checkout.py +114 -0
  8. gluekit/commands/clone.py +516 -0
  9. gluekit/commands/config_commands.py +180 -0
  10. gluekit/commands/constants.py +47 -0
  11. gluekit/commands/convert.py +336 -0
  12. gluekit/commands/edit.py +1104 -0
  13. gluekit/commands/helpers.py +1068 -0
  14. gluekit/commands/init.py +798 -0
  15. gluekit/commands/list.py +16 -0
  16. gluekit/commands/local_commands.py +680 -0
  17. gluekit/commands/pull.py +374 -0
  18. gluekit/commands/push.py +251 -0
  19. gluekit/commands/remove.py +161 -0
  20. gluekit/commands/run.py +126 -0
  21. gluekit/commands/status.py +97 -0
  22. gluekit/commands/sync.py +97 -0
  23. gluekit/commands/update.py +104 -0
  24. gluekit/job_mgmt/__init__.py +0 -0
  25. gluekit/job_mgmt/glue_jobs.py +1323 -0
  26. gluekit/job_mgmt/magics.py +122 -0
  27. gluekit/job_mgmt/resources/__init__.py +0 -0
  28. gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
  29. gluekit/job_mgmt/resources/magic_map.json +83 -0
  30. gluekit/job_mgmt/schema.py +165 -0
  31. gluekit/local/__init__.py +6 -0
  32. gluekit/local/awsglue/__init__.py +1 -0
  33. gluekit/local/awsglue/context.py +30 -0
  34. gluekit/local/awsglue/job.py +9 -0
  35. gluekit/local/awsglue/utils.py +17 -0
  36. gluekit/local/local.py +434 -0
  37. gluekit/local/local_fixtures.py +337 -0
  38. gluekit/local/pyspark/__init__.py +7 -0
  39. gluekit/local/pyspark/context.py +31 -0
  40. gluekit/local/pyspark/sql/__init__.py +6 -0
  41. gluekit/local/pyspark/sql/session.py +29 -0
  42. gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
  43. gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
  44. gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
  45. gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
  46. gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1104 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import copy
5
+ import json
6
+ import re
7
+ import shutil
8
+ import subprocess
9
+ import tarfile
10
+ import uuid
11
+ import zipfile
12
+ from collections.abc import Mapping
13
+ from datetime import datetime, timezone
14
+ from email.parser import Parser
15
+ from fnmatch import fnmatch
16
+ from pathlib import Path
17
+ from tempfile import TemporaryDirectory
18
+ from typing import Any, Optional
19
+
20
+ import typer
21
+ from slugify import slugify
22
+
23
+ from ..job_mgmt.glue_jobs import (
24
+ download_glue_job_files,
25
+ list_glue_jobs,
26
+ normalize_glue_config_data,
27
+ convert_script_to_notebook,
28
+ convert_notebook_to_script,
29
+ _resolve_notebook_path,
30
+ upload_glue_job_files_from_config,
31
+ )
32
+ from ..job_mgmt.magics import build_magic_cell_sources as _build_magic_cell_sources
33
+
34
+ from .constants import *
35
+ from .helpers import *
36
+ from .helpers import _parse_datetime, _get_local_last_modified, _write_glue_job_list_csv
37
+ from ..cli import app, glue_config_app
38
+
39
+
40
+ def _parse_csv_list(value: Optional[str]) -> list[str]:
41
+ if not value:
42
+ return []
43
+ return [item.strip() for item in next(csv.reader([value])) if item.strip()]
44
+
45
+
46
+ def _append_csv_items(existing_value: Optional[str], items: list[str]) -> str:
47
+ existing_value = existing_value or ""
48
+ existing_items = _parse_csv_list(existing_value)
49
+ existing_set = {item for item in existing_items}
50
+ new_items: list[str] = []
51
+ for item in items:
52
+ stripped = item.strip()
53
+ if stripped and stripped not in existing_set:
54
+ existing_set.add(stripped)
55
+ new_items.append(stripped)
56
+
57
+ if not new_items:
58
+ return existing_value
59
+
60
+ for item in new_items:
61
+ formatted = _format_csv_item(item)
62
+ if existing_value:
63
+ existing_value = f"{existing_value},{formatted}"
64
+ else:
65
+ existing_value = formatted
66
+ return existing_value
67
+
68
+
69
+ def _remove_csv_items(
70
+ existing_value: Optional[str], items: list[str]
71
+ ) -> tuple[Optional[str], list[str]]:
72
+ existing_items = _parse_csv_list(existing_value)
73
+ if not existing_items:
74
+ return existing_value, []
75
+
76
+ removals = {item.strip() for item in items if item.strip()}
77
+ kept_items = [item for item in existing_items if item not in removals]
78
+ removed_items = [item for item in existing_items if item in removals]
79
+ if kept_items == existing_items:
80
+ return existing_value, []
81
+ if not kept_items:
82
+ return None, removed_items
83
+ return ",".join(_format_csv_item(item) for item in kept_items), removed_items
84
+
85
+
86
+ def _normalize_component_filters(
87
+ include: Optional[list[str]],
88
+ exclude: Optional[list[str]],
89
+ allowed: set[str],
90
+ aliases: dict[str, str],
91
+ context_label: str,
92
+ ) -> set[str]:
93
+ def collect(values: Optional[list[str]]) -> list[str]:
94
+ items: list[str] = []
95
+ for raw in values or []:
96
+ items.extend(_parse_csv_list(raw))
97
+ return [item.strip().lower() for item in items if item.strip()]
98
+
99
+ def resolve(item: str) -> str:
100
+ if item == "all":
101
+ return item
102
+ mapped = aliases.get(item, item)
103
+ if mapped not in allowed:
104
+ raise typer.BadParameter(
105
+ f"{context_label} component '{item}' is not valid. "
106
+ f"Valid values: {', '.join(sorted(allowed))}."
107
+ )
108
+ return mapped
109
+
110
+ include_items = collect(include)
111
+ exclude_items = collect(exclude)
112
+
113
+ include_set: set[str] = set()
114
+ include_all = False
115
+ for item in include_items:
116
+ resolved = resolve(item)
117
+ if resolved == "all":
118
+ include_all = True
119
+ continue
120
+ include_set.add(resolved)
121
+
122
+ if include_all:
123
+ include_set = set(allowed)
124
+ elif not include_items:
125
+ include_set = set(allowed)
126
+
127
+ exclude_set: set[str] = set()
128
+ exclude_all = False
129
+ for item in exclude_items:
130
+ resolved = resolve(item)
131
+ if resolved == "all":
132
+ exclude_all = True
133
+ continue
134
+ exclude_set.add(resolved)
135
+
136
+ if exclude_all:
137
+ exclude_set = set(allowed)
138
+
139
+ resolved = include_set - exclude_set
140
+ if not resolved:
141
+ raise typer.BadParameter(
142
+ f"{context_label} include/exclude filters removed all components."
143
+ )
144
+ return resolved
145
+
146
+
147
+ def _apply_csv_additions(
148
+ container: dict[str, Any],
149
+ key: str,
150
+ items: list[str],
151
+ changes: list[str],
152
+ label: str,
153
+ ) -> None:
154
+ values = [item.strip() for item in items if item and item.strip()]
155
+ if not values:
156
+ return
157
+ existing_value = container.get(key)
158
+ updated_value = _append_csv_items(existing_value, values)
159
+ if updated_value == existing_value:
160
+ return
161
+ container[key] = updated_value
162
+ for value in values:
163
+ changes.append(f"Added {label}: {value}")
164
+
165
+
166
+ def _apply_csv_removals(
167
+ container: dict[str, Any],
168
+ key: str,
169
+ items: list[str],
170
+ changes: list[str],
171
+ label: str,
172
+ ) -> None:
173
+ values = [item.strip() for item in items if item and item.strip()]
174
+ if not values:
175
+ return
176
+ existing_value = container.get(key)
177
+ updated_value, removed_items = _remove_csv_items(existing_value, values)
178
+ if not removed_items:
179
+ return
180
+ if updated_value is None:
181
+ container.pop(key, None)
182
+ else:
183
+ container[key] = updated_value
184
+ for value in removed_items:
185
+ changes.append(f"Removed {label}: {value}")
186
+
187
+
188
+ def _parse_key_value_pairs(
189
+ items: Optional[list[str]], option_name: str
190
+ ) -> dict[str, str]:
191
+ parsed: dict[str, str] = {}
192
+ for item in items or []:
193
+ if "=" not in item:
194
+ raise typer.BadParameter(f"{option_name} must use KEY=VALUE format: {item}")
195
+ key, value = item.split("=", 1)
196
+ normalized_key = key.strip()
197
+ if not normalized_key:
198
+ raise typer.BadParameter(f"{option_name} key cannot be empty: {item}")
199
+ parsed[normalized_key] = value.strip()
200
+ return parsed
201
+
202
+
203
+ def _ensure_list_of_dicts(container: dict[str, Any], key: str) -> list[dict[str, Any]]:
204
+ existing = container.get(key)
205
+ if existing is None:
206
+ existing = []
207
+ if not isinstance(existing, list):
208
+ raise typer.BadParameter(f"SourceControlDetails.{key} must be a list.")
209
+ for entry in existing:
210
+ if not isinstance(entry, dict):
211
+ raise typer.BadParameter(
212
+ f"SourceControlDetails.{key} entries must be objects."
213
+ )
214
+ container[key] = existing
215
+ return existing
216
+
217
+
218
+ def _ensure_connections_list(config_data: dict[str, Any]) -> list[str]:
219
+ connections = config_data.setdefault("Connections", {})
220
+ if not isinstance(connections, dict):
221
+ raise typer.BadParameter("Connections must be an object.")
222
+ items = connections.get("Connections")
223
+ if items is None:
224
+ items = []
225
+ if not isinstance(items, list):
226
+ raise typer.BadParameter("Connections.Connections must be a list.")
227
+ connections["Connections"] = items
228
+ return items
229
+
230
+
231
+ def _apply_list_additions(
232
+ target: list[str], items: list[str], changes: list[str], label: str
233
+ ) -> None:
234
+ for item in items:
235
+ normalized = item.strip()
236
+ if not normalized or normalized in target:
237
+ continue
238
+ target.append(normalized)
239
+ changes.append(f"Added {label}: {normalized}")
240
+
241
+
242
+ def _apply_list_removals(
243
+ target: list[str], items: list[str], changes: list[str], label: str
244
+ ) -> None:
245
+ removals = {item.strip() for item in items if item.strip()}
246
+ if not removals:
247
+ return
248
+ remaining = [item for item in target if item not in removals]
249
+ removed = [item for item in target if item in removals]
250
+ if removed:
251
+ target[:] = remaining
252
+ for item in removed:
253
+ changes.append(f"Removed {label}: {item}")
254
+
255
+
256
+ def _default_argument_key(argument_name: str) -> str:
257
+ return argument_name if argument_name.startswith("--") else f"--{argument_name}"
258
+
259
+
260
+ def _apply_argument_updates(
261
+ container: dict[str, Any],
262
+ additions: Optional[list[str]],
263
+ removals: Optional[list[str]],
264
+ changes: list[str],
265
+ label: str,
266
+ ) -> None:
267
+ addition_pairs = _parse_key_value_pairs(additions, label)
268
+ for key, value in addition_pairs.items():
269
+ normalized_key = _default_argument_key(key)
270
+ if container.get(normalized_key) != value:
271
+ container[normalized_key] = value
272
+ changes.append(f"Set {label}: {normalized_key}={value}")
273
+ for key in removals or []:
274
+ normalized_key = _default_argument_key(key.strip())
275
+ if normalized_key and normalized_key in container:
276
+ container.pop(normalized_key, None)
277
+ changes.append(f"Removed {label}: {normalized_key}")
278
+
279
+
280
+ def _build_file_mapping_indexes(
281
+ entries: list[dict[str, Any]], key_name: str
282
+ ) -> tuple[dict[str, dict[str, str]], dict[str, str]]:
283
+ local_to_entry: dict[str, dict[str, str]] = {}
284
+ s3_to_local: dict[str, str] = {}
285
+ for entry in entries:
286
+ local = entry.get("LocalPath")
287
+ s3_path = entry.get("S3Path")
288
+ if (
289
+ not isinstance(local, str)
290
+ or not isinstance(s3_path, str)
291
+ or not local
292
+ or not s3_path
293
+ ):
294
+ raise typer.BadParameter(
295
+ f"SourceControlDetails.{key_name} entries must include LocalPath and S3Path."
296
+ )
297
+ if local in local_to_entry:
298
+ raise typer.BadParameter(
299
+ f"Duplicate LocalPath in SourceControlDetails.{key_name}: {local}"
300
+ )
301
+ if s3_path in s3_to_local and s3_to_local[s3_path] != local:
302
+ raise typer.BadParameter(
303
+ f"S3Path {s3_path} already mapped to {s3_to_local[s3_path]}"
304
+ )
305
+ local_to_entry[local] = entry
306
+ s3_to_local[s3_path] = local
307
+ return local_to_entry, s3_to_local
308
+
309
+
310
+ def _apply_file_mapping_updates(
311
+ *,
312
+ config_data: dict[str, Any],
313
+ job_name: str,
314
+ source_control_key: str,
315
+ add_paths: Optional[list[str]],
316
+ remove_paths: Optional[list[str]],
317
+ default_arg_key: Optional[str],
318
+ changes: list[str],
319
+ label: str,
320
+ ) -> None:
321
+ source_control = config_data.setdefault("SourceControlDetails", {})
322
+ if not isinstance(source_control, dict):
323
+ raise typer.BadParameter("SourceControlDetails must be an object.")
324
+ default_args = config_data.setdefault("DefaultArguments", {})
325
+ command = config_data.get("Command", {})
326
+ script_location = command.get("ScriptLocation")
327
+ if add_paths and not script_location:
328
+ raise typer.BadParameter("Missing Command.ScriptLocation in config.")
329
+ local_script_path = Path(
330
+ source_control.get("ScriptLocation")
331
+ or source_control.get("LocalPath")
332
+ or f"glue/scripts/{slugify(job_name)}.py"
333
+ )
334
+
335
+ entries = _ensure_list_of_dicts(source_control, source_control_key)
336
+ local_to_entry, s3_to_local = _build_file_mapping_indexes(
337
+ entries, source_control_key
338
+ )
339
+
340
+ add_path_values = [
341
+ item.strip() for item in add_paths or [] if item and item.strip()
342
+ ]
343
+ remove_path_values = [
344
+ item.strip() for item in remove_paths or [] if item and item.strip()
345
+ ]
346
+
347
+ removed_s3_paths: list[str] = []
348
+ if remove_path_values:
349
+ remaining_entries: list[dict[str, Any]] = []
350
+ removals = set(remove_path_values)
351
+ for entry in entries:
352
+ local_path = entry["LocalPath"]
353
+ if local_path in removals:
354
+ removed_s3_paths.append(entry["S3Path"])
355
+ changes.append(f"Removed {label}: {local_path}")
356
+ continue
357
+ remaining_entries.append(entry)
358
+ entries[:] = remaining_entries
359
+ if not entries:
360
+ source_control.pop(source_control_key, None)
361
+
362
+ if add_path_values:
363
+ for raw_path in add_path_values:
364
+ path = Path(raw_path)
365
+ if path.is_absolute():
366
+ raise typer.BadParameter(f"Local path must be relative: {path}")
367
+ if not path.exists():
368
+ raise typer.BadParameter(f"Local path not found: {path}")
369
+ s3_path = _derive_s3_target(
370
+ path, job_name, script_location, local_script_path
371
+ )
372
+ if path.is_dir() and not s3_path.endswith(".zip"):
373
+ s3_path = f"{s3_path}.zip"
374
+
375
+ local_key = path.as_posix()
376
+ if s3_path in s3_to_local and s3_to_local[s3_path] != local_key:
377
+ raise typer.BadParameter(
378
+ f"S3Path {s3_path} already mapped to {s3_to_local[s3_path]}"
379
+ )
380
+
381
+ existing_entry = local_to_entry.get(local_key)
382
+ if existing_entry:
383
+ old_s3 = existing_entry.get("S3Path")
384
+ if old_s3 != s3_path:
385
+ existing_entry["S3Path"] = s3_path
386
+ changes.append(f"Updated {label}: {local_key} -> {s3_path}")
387
+ else:
388
+ new_entry = {"LocalPath": local_key, "S3Path": s3_path}
389
+ entries.append(new_entry)
390
+ local_to_entry[local_key] = new_entry
391
+ s3_to_local[s3_path] = local_key
392
+ changes.append(f"Added {label}: {local_key} -> {s3_path}")
393
+
394
+ if default_arg_key:
395
+ add_s3_values = [
396
+ entry["S3Path"]
397
+ for entry in entries
398
+ if entry["LocalPath"]
399
+ in {Path(value).as_posix() for value in add_path_values}
400
+ ]
401
+ _apply_csv_additions(
402
+ default_args, default_arg_key, add_s3_values, changes, default_arg_key
403
+ )
404
+ _apply_csv_removals(
405
+ default_args, default_arg_key, removed_s3_paths, changes, default_arg_key
406
+ )
407
+
408
+
409
+ def _derive_bucket_root_s3_target(local_path: Path, script_location: str) -> str:
410
+ bucket, _script_key = _parse_s3_url(script_location)
411
+ return f"s3://{bucket}/{local_path.as_posix()}"
412
+
413
+
414
+ def _apply_extra_py_file_updates(
415
+ *,
416
+ config_data: dict[str, Any],
417
+ add_paths: Optional[list[str]],
418
+ remove_paths: Optional[list[str]],
419
+ changes: list[str],
420
+ ) -> None:
421
+ source_control = config_data.setdefault("SourceControlDetails", {})
422
+ if not isinstance(source_control, dict):
423
+ raise typer.BadParameter("SourceControlDetails must be an object.")
424
+ default_args = config_data.setdefault("DefaultArguments", {})
425
+ command = config_data.get("Command", {})
426
+ script_location = command.get("ScriptLocation")
427
+ if add_paths and not script_location:
428
+ raise typer.BadParameter("Missing Command.ScriptLocation in config.")
429
+
430
+ entries = _ensure_list_of_dicts(source_control, "ExtraPyFiles")
431
+ local_to_entry, s3_to_local = _build_file_mapping_indexes(entries, "ExtraPyFiles")
432
+ existing_s3_values = set(_parse_csv_list(default_args.get("--extra-py-files")))
433
+
434
+ add_path_values = [
435
+ item.strip() for item in add_paths or [] if item and item.strip()
436
+ ]
437
+ remove_path_values = [
438
+ item.strip() for item in remove_paths or [] if item and item.strip()
439
+ ]
440
+
441
+ removed_s3_paths: list[str] = []
442
+ if remove_path_values:
443
+ remaining_entries: list[dict[str, Any]] = []
444
+ removals = set(remove_path_values)
445
+ for entry in entries:
446
+ local_path = entry["LocalPath"]
447
+ if local_path in removals:
448
+ removed_s3_paths.append(entry["S3Path"])
449
+ changes.append(f"Removed extra py file mapping: {local_path}")
450
+ continue
451
+ remaining_entries.append(entry)
452
+ entries[:] = remaining_entries
453
+ if not entries:
454
+ source_control.pop("ExtraPyFiles", None)
455
+
456
+ add_s3_values: list[str] = []
457
+ if add_path_values:
458
+ for raw_path in add_path_values:
459
+ path = Path(raw_path)
460
+ if path.is_absolute():
461
+ raise typer.BadParameter(f"Local path must be relative: {path}")
462
+ if not path.exists():
463
+ raise typer.BadParameter(f"Local path not found: {path}")
464
+ if path.is_dir():
465
+ raise typer.BadParameter(
466
+ f"--add-extra-py-files only accepts files: {path}"
467
+ )
468
+
469
+ s3_path = _derive_bucket_root_s3_target(path, script_location)
470
+ local_key = path.as_posix()
471
+ existing_entry = local_to_entry.get(local_key)
472
+ if s3_path in existing_s3_values:
473
+ mapped_local = s3_to_local.get(s3_path)
474
+ if existing_entry and existing_entry.get("S3Path") == s3_path:
475
+ changes.append(
476
+ f"Skipped extra py file mapping: {local_key} -> {s3_path} (already tracked)"
477
+ )
478
+ elif mapped_local:
479
+ changes.append(
480
+ f"Skipped extra py file mapping: {local_key} -> {s3_path} (already mapped to {mapped_local})"
481
+ )
482
+ else:
483
+ changes.append(
484
+ f"Skipped extra py file mapping: {local_key} -> {s3_path} (already in --extra-py-files)"
485
+ )
486
+ continue
487
+
488
+ if s3_path in s3_to_local and s3_to_local[s3_path] != local_key:
489
+ changes.append(
490
+ f"Skipped extra py file mapping: {local_key} -> {s3_path} (already mapped to {s3_to_local[s3_path]})"
491
+ )
492
+ continue
493
+
494
+ if existing_entry:
495
+ old_s3 = existing_entry.get("S3Path")
496
+ if old_s3 != s3_path:
497
+ existing_entry["S3Path"] = s3_path
498
+ if old_s3:
499
+ s3_to_local.pop(old_s3, None)
500
+ s3_to_local[s3_path] = local_key
501
+ existing_s3_values.add(s3_path)
502
+ changes.append(
503
+ f"Updated extra py file mapping: {local_key} -> {s3_path}"
504
+ )
505
+ add_s3_values.append(s3_path)
506
+ else:
507
+ changes.append(
508
+ f"Skipped extra py file mapping: {local_key} -> {s3_path} (already tracked)"
509
+ )
510
+ else:
511
+ new_entry = {"LocalPath": local_key, "S3Path": s3_path}
512
+ entries.append(new_entry)
513
+ local_to_entry[local_key] = new_entry
514
+ s3_to_local[s3_path] = local_key
515
+ existing_s3_values.add(s3_path)
516
+ changes.append(f"Added extra py file mapping: {local_key} -> {s3_path}")
517
+ add_s3_values.append(s3_path)
518
+
519
+ _apply_csv_additions(
520
+ default_args, "--extra-py-files", add_s3_values, changes, "--extra-py-files"
521
+ )
522
+ _apply_csv_removals(
523
+ default_args,
524
+ "--extra-py-files",
525
+ removed_s3_paths,
526
+ changes,
527
+ "--extra-py-files",
528
+ )
529
+
530
+
531
+ def _apply_edit_mutations(
532
+ *,
533
+ config_data: dict[str, Any],
534
+ job_name: str,
535
+ description: Optional[str],
536
+ role: Optional[str],
537
+ glue_version: Optional[str],
538
+ worker_type: Optional[str],
539
+ number_of_workers: Optional[int],
540
+ timeout: Optional[int],
541
+ max_retries: Optional[int],
542
+ execution_class: Optional[str],
543
+ max_concurrent_runs: Optional[int],
544
+ job_run_queuing_enabled: Optional[bool],
545
+ security_configuration: Optional[str],
546
+ maintenance_window: Optional[str],
547
+ log_uri: Optional[str],
548
+ command_name: Optional[str],
549
+ python_version: Optional[str],
550
+ runtime: Optional[str],
551
+ remote_script_location: Optional[str],
552
+ connections_to_add: Optional[list[str]],
553
+ connections_to_remove: Optional[list[str]],
554
+ source_control_provider: Optional[str],
555
+ source_control_owner: Optional[str],
556
+ source_control_repository: Optional[str],
557
+ source_control_branch: Optional[str],
558
+ source_control_folder: Optional[str],
559
+ local_script_location: Optional[str],
560
+ notebook_location: Optional[str],
561
+ add_additional_python_modules: Optional[list[str]],
562
+ remove_additional_python_modules: Optional[list[str]],
563
+ add_extra_py_files: Optional[list[str]],
564
+ remove_extra_py_files: Optional[list[str]],
565
+ add_extra_jars: Optional[list[str]],
566
+ remove_extra_jars: Optional[list[str]],
567
+ add_additional_python_files: Optional[list[str]],
568
+ remove_additional_python_files: Optional[list[str]],
569
+ add_extra_files: Optional[list[str]],
570
+ remove_extra_files: Optional[list[str]],
571
+ add_default_argument: Optional[list[str]],
572
+ remove_default_argument: Optional[list[str]],
573
+ add_non_overridable_argument: Optional[list[str]],
574
+ remove_non_overridable_argument: Optional[list[str]],
575
+ enable_metrics: Optional[bool],
576
+ enable_observability_metrics: Optional[bool],
577
+ enable_glue_datacatalog: Optional[bool],
578
+ enable_continuous_cloudwatch_log: Optional[bool],
579
+ enable_spark_ui: Optional[bool],
580
+ enable_job_insights: Optional[bool],
581
+ enable_auto_scaling: Optional[bool],
582
+ job_bookmark_option: Optional[str],
583
+ temp_dir: Optional[str],
584
+ spark_event_logs_path: Optional[str],
585
+ datalake_formats: Optional[str],
586
+ spark_conf: Optional[str],
587
+ continuous_log_log_group: Optional[str],
588
+ continuous_log_log_stream_prefix: Optional[str],
589
+ continuous_log_conversion_pattern: Optional[str],
590
+ executor_cores: Optional[int],
591
+ customer_driver_env_vars: Optional[str],
592
+ customer_executor_env_vars: Optional[str],
593
+ python_modules_installer_option: Optional[str],
594
+ ) -> list[str]:
595
+ changes: list[str] = []
596
+ default_args = config_data.setdefault("DefaultArguments", {})
597
+ non_overridable_args = config_data.setdefault("NonOverridableArguments", {})
598
+ source_control = config_data.setdefault("SourceControlDetails", {})
599
+ command = config_data.setdefault("Command", {})
600
+ execution_property = config_data.setdefault("ExecutionProperty", {})
601
+
602
+ scalar_updates = {
603
+ "Description": description,
604
+ "Role": role,
605
+ "GlueVersion": glue_version,
606
+ "WorkerType": worker_type,
607
+ "NumberOfWorkers": number_of_workers,
608
+ "Timeout": timeout,
609
+ "MaxRetries": max_retries,
610
+ "ExecutionClass": execution_class,
611
+ "JobRunQueuingEnabled": job_run_queuing_enabled,
612
+ "SecurityConfiguration": security_configuration,
613
+ "MaintenanceWindow": maintenance_window,
614
+ "LogUri": log_uri,
615
+ }
616
+ for key, value in scalar_updates.items():
617
+ if value is not None:
618
+ _set_if_changed(config_data, key, value, changes)
619
+
620
+ if max_concurrent_runs is not None:
621
+ _set_if_changed(
622
+ execution_property, "MaxConcurrentRuns", max_concurrent_runs, changes
623
+ )
624
+
625
+ command_updates = {
626
+ "Name": command_name,
627
+ "PythonVersion": python_version,
628
+ "Runtime": runtime,
629
+ "ScriptLocation": remote_script_location,
630
+ }
631
+ for key, value in command_updates.items():
632
+ if value is not None:
633
+ _set_if_changed(command, key, value, changes)
634
+
635
+ source_control_updates = {
636
+ "Provider": source_control_provider,
637
+ "Owner": source_control_owner,
638
+ "Repository": source_control_repository,
639
+ "Branch": source_control_branch,
640
+ "Folder": source_control_folder,
641
+ "ScriptLocation": local_script_location,
642
+ }
643
+ for key, value in source_control_updates.items():
644
+ if value is not None:
645
+ _set_if_changed(source_control, key, value, changes)
646
+ if notebook_location is not None:
647
+ _set_if_changed(source_control, "NotebookLocation", notebook_location, changes)
648
+ _set_if_changed(source_control, "NotebookPath", notebook_location, changes)
649
+
650
+ connections = _ensure_connections_list(config_data)
651
+ _apply_list_additions(connections, connections_to_add or [], changes, "connection")
652
+ _apply_list_removals(
653
+ connections, connections_to_remove or [], changes, "connection"
654
+ )
655
+
656
+ _apply_csv_additions(
657
+ default_args,
658
+ "--additional-python-modules",
659
+ add_additional_python_modules or [],
660
+ changes,
661
+ "--additional-python-modules",
662
+ )
663
+ _apply_csv_removals(
664
+ default_args,
665
+ "--additional-python-modules",
666
+ remove_additional_python_modules or [],
667
+ changes,
668
+ "--additional-python-modules",
669
+ )
670
+ _apply_csv_additions(
671
+ default_args,
672
+ "--extra-jars",
673
+ add_extra_jars or [],
674
+ changes,
675
+ "--extra-jars",
676
+ )
677
+ _apply_csv_removals(
678
+ default_args,
679
+ "--extra-jars",
680
+ remove_extra_jars or [],
681
+ changes,
682
+ "--extra-jars",
683
+ )
684
+
685
+ _apply_argument_updates(
686
+ default_args,
687
+ add_default_argument,
688
+ remove_default_argument,
689
+ changes,
690
+ "default argument",
691
+ )
692
+ _apply_argument_updates(
693
+ non_overridable_args,
694
+ add_non_overridable_argument,
695
+ remove_non_overridable_argument,
696
+ changes,
697
+ "non-overridable argument",
698
+ )
699
+
700
+ explicit_default_argument_updates = {
701
+ "--enable-metrics": enable_metrics,
702
+ "--enable-observability-metrics": enable_observability_metrics,
703
+ "--enable-glue-datacatalog": enable_glue_datacatalog,
704
+ "--enable-continuous-cloudwatch-log": enable_continuous_cloudwatch_log,
705
+ "--enable-spark-ui": enable_spark_ui,
706
+ "--enable-job-insights": enable_job_insights,
707
+ "--enable-auto-scaling": enable_auto_scaling,
708
+ "--job-bookmark-option": job_bookmark_option,
709
+ "--TempDir": temp_dir,
710
+ "--spark-event-logs-path": spark_event_logs_path,
711
+ "--datalake-formats": datalake_formats,
712
+ "--conf": spark_conf,
713
+ "--continuous-log-logGroup": continuous_log_log_group,
714
+ "--continuous-log-logStreamPrefix": continuous_log_log_stream_prefix,
715
+ "--continuous-log-conversionPattern": continuous_log_conversion_pattern,
716
+ "--executor-cores": executor_cores,
717
+ "--customer-driver-env-vars": customer_driver_env_vars,
718
+ "--customer-executor-env-vars": customer_executor_env_vars,
719
+ "--python-modules-installer-option": python_modules_installer_option,
720
+ }
721
+ for key, value in explicit_default_argument_updates.items():
722
+ if value is None:
723
+ continue
724
+ if isinstance(value, bool):
725
+ stored_value = str(value).lower()
726
+ elif isinstance(value, (int, float)):
727
+ stored_value = str(value)
728
+ else:
729
+ stored_value = value
730
+ _set_if_changed(default_args, key, stored_value, changes)
731
+
732
+ _apply_file_mapping_updates(
733
+ config_data=config_data,
734
+ job_name=job_name,
735
+ source_control_key="AdditionalPythonFiles",
736
+ add_paths=add_additional_python_files,
737
+ remove_paths=remove_additional_python_files,
738
+ default_arg_key="--extra-py-files",
739
+ changes=changes,
740
+ label="additional python file mapping",
741
+ )
742
+ _apply_extra_py_file_updates(
743
+ config_data=config_data,
744
+ add_paths=add_extra_py_files,
745
+ remove_paths=remove_extra_py_files,
746
+ changes=changes,
747
+ )
748
+ _apply_file_mapping_updates(
749
+ config_data=config_data,
750
+ job_name=job_name,
751
+ source_control_key="ExtraFiles",
752
+ add_paths=add_extra_files,
753
+ remove_paths=remove_extra_files,
754
+ default_arg_key="--extra-files",
755
+ changes=changes,
756
+ label="extra file mapping",
757
+ )
758
+
759
+ if not connections:
760
+ config_data.pop("Connections", None)
761
+ if not execution_property:
762
+ config_data.pop("ExecutionProperty", None)
763
+ if not default_args:
764
+ config_data.pop("DefaultArguments", None)
765
+ if not non_overridable_args:
766
+ config_data.pop("NonOverridableArguments", None)
767
+ if not source_control:
768
+ config_data.pop("SourceControlDetails", None)
769
+ if not command:
770
+ config_data.pop("Command", None)
771
+
772
+ return changes
773
+
774
+
775
+ @app.command(
776
+ "edit",
777
+ epilog=_examples_epilog(
778
+ "gluekit edit my-job --description 'Updated job description' --glue-version 5.0",
779
+ "gluekit edit --add-additional-python-files glue/libs/helper.py --add-extra-files glue/resources/job.json",
780
+ "gluekit edit my-job --temp-dir s3://bucket/tmp --remove-default-argument legacy-arg --enable-metrics",
781
+ "gluekit edit my-job --enable-auto-scaling --enable-job-insights --executor-cores 8",
782
+ ),
783
+ )
784
+ def glue_edit(
785
+ job_name: Optional[str] = typer.Argument(
786
+ None,
787
+ help="Glue job name to edit. Defaults to the active checkout selection.",
788
+ ),
789
+ description: Optional[str] = typer.Option(
790
+ None, "--description", help="Set the job description."
791
+ ),
792
+ role: Optional[str] = typer.Option(
793
+ None, "--role", help="Set the IAM role for the job."
794
+ ),
795
+ glue_version: Optional[str] = typer.Option(
796
+ None, "--glue-version", help="Set the Glue version."
797
+ ),
798
+ worker_type: Optional[str] = typer.Option(
799
+ None, "--worker-type", help="Set the worker type."
800
+ ),
801
+ number_of_workers: Optional[int] = typer.Option(
802
+ None, "--number-of-workers", help="Set the number of workers."
803
+ ),
804
+ timeout: Optional[int] = typer.Option(
805
+ None, "--timeout", help="Set the job timeout in minutes."
806
+ ),
807
+ max_retries: Optional[int] = typer.Option(
808
+ None, "--max-retries", help="Set the max retry count."
809
+ ),
810
+ execution_class: Optional[str] = typer.Option(
811
+ None, "--execution-class", help="Set the execution class."
812
+ ),
813
+ max_concurrent_runs: Optional[int] = typer.Option(
814
+ None, "--max-concurrent-runs", help="Set the max concurrent runs."
815
+ ),
816
+ job_run_queuing_enabled: Optional[bool] = typer.Option(
817
+ None,
818
+ "--job-run-queuing-enabled/--no-job-run-queuing-enabled",
819
+ help="Enable or disable job run queuing.",
820
+ ),
821
+ security_configuration: Optional[str] = typer.Option(
822
+ None, "--security-configuration", help="Set the security configuration name."
823
+ ),
824
+ maintenance_window: Optional[str] = typer.Option(
825
+ None, "--maintenance-window", help="Set the maintenance window."
826
+ ),
827
+ log_uri: Optional[str] = typer.Option(None, "--log-uri", help="Set the log URI."),
828
+ command_name: Optional[str] = typer.Option(
829
+ None, "--command-name", help="Set Command.Name."
830
+ ),
831
+ python_version: Optional[str] = typer.Option(
832
+ None, "--python-version", help="Set Command.PythonVersion."
833
+ ),
834
+ runtime: Optional[str] = typer.Option(
835
+ None, "--runtime", help="Set Command.Runtime."
836
+ ),
837
+ remote_script_location: Optional[str] = typer.Option(
838
+ None, "--remote-script-location", help="Set Command.ScriptLocation."
839
+ ),
840
+ local_script_location: Optional[str] = typer.Option(
841
+ None, "--local-script-location", help="Set SourceControlDetails.ScriptLocation."
842
+ ),
843
+ notebook_location: Optional[str] = typer.Option(
844
+ None,
845
+ "--notebook-location",
846
+ help="Set SourceControlDetails.NotebookLocation and NotebookPath.",
847
+ ),
848
+ source_control_provider: Optional[str] = typer.Option(
849
+ None, "--source-control-provider", help="Set SourceControlDetails.Provider."
850
+ ),
851
+ source_control_owner: Optional[str] = typer.Option(
852
+ None, "--source-control-owner", help="Set SourceControlDetails.Owner."
853
+ ),
854
+ source_control_repository: Optional[str] = typer.Option(
855
+ None, "--source-control-repository", help="Set SourceControlDetails.Repository."
856
+ ),
857
+ source_control_branch: Optional[str] = typer.Option(
858
+ None, "--source-control-branch", help="Set SourceControlDetails.Branch."
859
+ ),
860
+ source_control_folder: Optional[str] = typer.Option(
861
+ None, "--source-control-folder", help="Set SourceControlDetails.Folder."
862
+ ),
863
+ add_connection: Optional[list[str]] = typer.Option(
864
+ None, "--add-connection", help="Add one or more Glue connections."
865
+ ),
866
+ remove_connection: Optional[list[str]] = typer.Option(
867
+ None, "--remove-connection", help="Remove one or more Glue connections."
868
+ ),
869
+ add_additional_python_modules: Optional[list[str]] = typer.Option(
870
+ None,
871
+ "--add-additional-python-modules",
872
+ help="Add one or more values to --additional-python-modules.",
873
+ ),
874
+ remove_additional_python_modules: Optional[list[str]] = typer.Option(
875
+ None,
876
+ "--remove-additional-python-modules",
877
+ help="Remove one or more values from --additional-python-modules.",
878
+ ),
879
+ add_extra_py_files: Optional[list[str]] = typer.Option(
880
+ None,
881
+ "--add-extra-py-files",
882
+ help="Track one or more local relative Python files in SourceControlDetails.ExtraPyFiles and --extra-py-files.",
883
+ ),
884
+ remove_extra_py_files: Optional[list[str]] = typer.Option(
885
+ None,
886
+ "--remove-extra-py-files",
887
+ help="Remove one or more local relative Python files from SourceControlDetails.ExtraPyFiles and --extra-py-files.",
888
+ ),
889
+ add_extra_jars: Optional[list[str]] = typer.Option(
890
+ None, "--add-extra-jars", help="Add one or more values to --extra-jars."
891
+ ),
892
+ remove_extra_jars: Optional[list[str]] = typer.Option(
893
+ None, "--remove-extra-jars", help="Remove one or more values from --extra-jars."
894
+ ),
895
+ add_additional_python_files: Optional[list[str]] = typer.Option(
896
+ None,
897
+ "--add-additional-python-files",
898
+ help="Track one or more local paths in SourceControlDetails.AdditionalPythonFiles and --extra-py-files.",
899
+ ),
900
+ remove_additional_python_files: Optional[list[str]] = typer.Option(
901
+ None,
902
+ "--remove-additional-python-files",
903
+ help="Remove one or more local paths from SourceControlDetails.AdditionalPythonFiles and --extra-py-files.",
904
+ ),
905
+ add_extra_files: Optional[list[str]] = typer.Option(
906
+ None,
907
+ "--add-extra-files",
908
+ help="Track one or more local paths in SourceControlDetails.ExtraFiles and --extra-files.",
909
+ ),
910
+ remove_extra_files: Optional[list[str]] = typer.Option(
911
+ None,
912
+ "--remove-extra-files",
913
+ help="Remove one or more local paths from SourceControlDetails.ExtraFiles and --extra-files.",
914
+ ),
915
+ add_default_argument: Optional[list[str]] = typer.Option(
916
+ None,
917
+ "--add-default-argument",
918
+ help="Set a DefaultArguments entry using KEY=VALUE. Repeat as needed.",
919
+ ),
920
+ remove_default_argument: Optional[list[str]] = typer.Option(
921
+ None,
922
+ "--remove-default-argument",
923
+ help="Remove a DefaultArguments entry by key. Repeat as needed.",
924
+ ),
925
+ add_non_overridable_argument: Optional[list[str]] = typer.Option(
926
+ None,
927
+ "--add-non-overridable-argument",
928
+ help="Set a NonOverridableArguments entry using KEY=VALUE. Repeat as needed.",
929
+ ),
930
+ remove_non_overridable_argument: Optional[list[str]] = typer.Option(
931
+ None,
932
+ "--remove-non-overridable-argument",
933
+ help="Remove a NonOverridableArguments entry by key. Repeat as needed.",
934
+ ),
935
+ enable_metrics: Optional[bool] = typer.Option(
936
+ None,
937
+ "--enable-metrics/--disable-metrics",
938
+ help="Set DefaultArguments.--enable-metrics to true or false.",
939
+ ),
940
+ enable_observability_metrics: Optional[bool] = typer.Option(
941
+ None,
942
+ "--enable-observability-metrics/--disable-observability-metrics",
943
+ help="Set DefaultArguments.--enable-observability-metrics to true or false.",
944
+ ),
945
+ enable_glue_datacatalog: Optional[bool] = typer.Option(
946
+ None,
947
+ "--enable-glue-datacatalog/--disable-glue-datacatalog",
948
+ help="Set DefaultArguments.--enable-glue-datacatalog to true or false.",
949
+ ),
950
+ enable_continuous_cloudwatch_log: Optional[bool] = typer.Option(
951
+ None,
952
+ "--enable-continuous-cloudwatch-log/--disable-continuous-cloudwatch-log",
953
+ help="Set DefaultArguments.--enable-continuous-cloudwatch-log to true or false.",
954
+ ),
955
+ enable_spark_ui: Optional[bool] = typer.Option(
956
+ None,
957
+ "--enable-spark-ui/--disable-spark-ui",
958
+ help="Set DefaultArguments.--enable-spark-ui to true or false.",
959
+ ),
960
+ enable_job_insights: Optional[bool] = typer.Option(
961
+ None,
962
+ "--enable-job-insights/--disable-job-insights",
963
+ help="Set DefaultArguments.--enable-job-insights to true or false.",
964
+ ),
965
+ enable_auto_scaling: Optional[bool] = typer.Option(
966
+ None,
967
+ "--enable-auto-scaling/--disable-auto-scaling",
968
+ help="Set DefaultArguments.--enable-auto-scaling to true or false.",
969
+ ),
970
+ job_bookmark_option: Optional[str] = typer.Option(
971
+ None,
972
+ "--job-bookmark-option",
973
+ help="Set DefaultArguments.--job-bookmark-option.",
974
+ ),
975
+ temp_dir: Optional[str] = typer.Option(
976
+ None, "--temp-dir", help="Set DefaultArguments.--TempDir."
977
+ ),
978
+ spark_event_logs_path: Optional[str] = typer.Option(
979
+ None,
980
+ "--spark-event-logs-path",
981
+ help="Set DefaultArguments.--spark-event-logs-path.",
982
+ ),
983
+ datalake_formats: Optional[str] = typer.Option(
984
+ None, "--datalake-formats", help="Set DefaultArguments.--datalake-formats."
985
+ ),
986
+ spark_conf: Optional[str] = typer.Option(
987
+ None, "--spark-conf", help="Set DefaultArguments.--conf."
988
+ ),
989
+ continuous_log_log_group: Optional[str] = typer.Option(
990
+ None,
991
+ "--continuous-log-log-group",
992
+ help="Set DefaultArguments.--continuous-log-logGroup.",
993
+ ),
994
+ continuous_log_log_stream_prefix: Optional[str] = typer.Option(
995
+ None,
996
+ "--continuous-log-log-stream-prefix",
997
+ help="Set DefaultArguments.--continuous-log-logStreamPrefix.",
998
+ ),
999
+ continuous_log_conversion_pattern: Optional[str] = typer.Option(
1000
+ None,
1001
+ "--continuous-log-conversion-pattern",
1002
+ help="Set DefaultArguments.--continuous-log-conversionPattern.",
1003
+ ),
1004
+ executor_cores: Optional[int] = typer.Option(
1005
+ None, "--executor-cores", help="Set DefaultArguments.--executor-cores."
1006
+ ),
1007
+ customer_driver_env_vars: Optional[str] = typer.Option(
1008
+ None,
1009
+ "--customer-driver-env-vars",
1010
+ help="Set DefaultArguments.--customer-driver-env-vars.",
1011
+ ),
1012
+ customer_executor_env_vars: Optional[str] = typer.Option(
1013
+ None,
1014
+ "--customer-executor-env-vars",
1015
+ help="Set DefaultArguments.--customer-executor-env-vars.",
1016
+ ),
1017
+ python_modules_installer_option: Optional[str] = typer.Option(
1018
+ None,
1019
+ "--python-modules-installer-option",
1020
+ help="Set DefaultArguments.--python-modules-installer-option.",
1021
+ ),
1022
+ dry_run: bool = typer.Option(
1023
+ False, "--dry-run", help="Show what would be updated without writing files."
1024
+ ),
1025
+ config_dir: Path = typer.Option(
1026
+ Path("glue/configs"),
1027
+ "--config-dir",
1028
+ help="Directory containing Glue job config files.",
1029
+ ),
1030
+ ) -> None:
1031
+ """Edit one local Glue job config with explicit mutation flags."""
1032
+ job_name = _resolve_single_job_name(job_name, "glue edit")
1033
+ config_index = _load_config_index(config_dir)
1034
+ config_entry = config_index.get(job_name)
1035
+ if not config_entry:
1036
+ _raise_missing_local_config(job_name, config_dir, "glue edit")
1037
+
1038
+ config_path: Path = config_entry["config_path"]
1039
+ config_data = config_entry["config"]
1040
+ changes = _apply_edit_mutations(
1041
+ config_data=config_data,
1042
+ job_name=job_name,
1043
+ description=description,
1044
+ role=role,
1045
+ glue_version=glue_version,
1046
+ worker_type=worker_type,
1047
+ number_of_workers=number_of_workers,
1048
+ timeout=timeout,
1049
+ max_retries=max_retries,
1050
+ execution_class=execution_class,
1051
+ max_concurrent_runs=max_concurrent_runs,
1052
+ job_run_queuing_enabled=job_run_queuing_enabled,
1053
+ security_configuration=security_configuration,
1054
+ maintenance_window=maintenance_window,
1055
+ log_uri=log_uri,
1056
+ command_name=command_name,
1057
+ python_version=python_version,
1058
+ runtime=runtime,
1059
+ remote_script_location=remote_script_location,
1060
+ connections_to_add=add_connection,
1061
+ connections_to_remove=remove_connection,
1062
+ source_control_provider=source_control_provider,
1063
+ source_control_owner=source_control_owner,
1064
+ source_control_repository=source_control_repository,
1065
+ source_control_branch=source_control_branch,
1066
+ source_control_folder=source_control_folder,
1067
+ local_script_location=local_script_location,
1068
+ notebook_location=notebook_location,
1069
+ add_additional_python_modules=add_additional_python_modules,
1070
+ remove_additional_python_modules=remove_additional_python_modules,
1071
+ add_extra_py_files=add_extra_py_files,
1072
+ remove_extra_py_files=remove_extra_py_files,
1073
+ add_extra_jars=add_extra_jars,
1074
+ remove_extra_jars=remove_extra_jars,
1075
+ add_additional_python_files=add_additional_python_files,
1076
+ remove_additional_python_files=remove_additional_python_files,
1077
+ add_extra_files=add_extra_files,
1078
+ remove_extra_files=remove_extra_files,
1079
+ add_default_argument=add_default_argument,
1080
+ remove_default_argument=remove_default_argument,
1081
+ add_non_overridable_argument=add_non_overridable_argument,
1082
+ remove_non_overridable_argument=remove_non_overridable_argument,
1083
+ enable_metrics=enable_metrics,
1084
+ enable_observability_metrics=enable_observability_metrics,
1085
+ enable_glue_datacatalog=enable_glue_datacatalog,
1086
+ enable_continuous_cloudwatch_log=enable_continuous_cloudwatch_log,
1087
+ enable_spark_ui=enable_spark_ui,
1088
+ enable_job_insights=enable_job_insights,
1089
+ enable_auto_scaling=enable_auto_scaling,
1090
+ job_bookmark_option=job_bookmark_option,
1091
+ temp_dir=temp_dir,
1092
+ spark_event_logs_path=spark_event_logs_path,
1093
+ datalake_formats=datalake_formats,
1094
+ spark_conf=spark_conf,
1095
+ continuous_log_log_group=continuous_log_log_group,
1096
+ continuous_log_log_stream_prefix=continuous_log_log_stream_prefix,
1097
+ continuous_log_conversion_pattern=continuous_log_conversion_pattern,
1098
+ executor_cores=executor_cores,
1099
+ customer_driver_env_vars=customer_driver_env_vars,
1100
+ customer_executor_env_vars=customer_executor_env_vars,
1101
+ python_modules_installer_option=python_modules_installer_option,
1102
+ )
1103
+
1104
+ _write_config_changes(config_path, config_data, changes, dry_run=dry_run)