gluekit 1.0.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. gluekit/__init__.py +7 -0
  2. gluekit/app.py +0 -0
  3. gluekit/cli.py +64 -0
  4. gluekit/commands/__init__.py +1 -0
  5. gluekit/commands/add.py +455 -0
  6. gluekit/commands/build.py +816 -0
  7. gluekit/commands/checkout.py +114 -0
  8. gluekit/commands/clone.py +516 -0
  9. gluekit/commands/config_commands.py +180 -0
  10. gluekit/commands/constants.py +47 -0
  11. gluekit/commands/convert.py +336 -0
  12. gluekit/commands/edit.py +1104 -0
  13. gluekit/commands/helpers.py +1068 -0
  14. gluekit/commands/init.py +798 -0
  15. gluekit/commands/list.py +16 -0
  16. gluekit/commands/local_commands.py +680 -0
  17. gluekit/commands/pull.py +374 -0
  18. gluekit/commands/push.py +251 -0
  19. gluekit/commands/remove.py +161 -0
  20. gluekit/commands/run.py +126 -0
  21. gluekit/commands/status.py +97 -0
  22. gluekit/commands/sync.py +97 -0
  23. gluekit/commands/update.py +104 -0
  24. gluekit/job_mgmt/__init__.py +0 -0
  25. gluekit/job_mgmt/glue_jobs.py +1323 -0
  26. gluekit/job_mgmt/magics.py +122 -0
  27. gluekit/job_mgmt/resources/__init__.py +0 -0
  28. gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
  29. gluekit/job_mgmt/resources/magic_map.json +83 -0
  30. gluekit/job_mgmt/schema.py +165 -0
  31. gluekit/local/__init__.py +6 -0
  32. gluekit/local/awsglue/__init__.py +1 -0
  33. gluekit/local/awsglue/context.py +30 -0
  34. gluekit/local/awsglue/job.py +9 -0
  35. gluekit/local/awsglue/utils.py +17 -0
  36. gluekit/local/local.py +434 -0
  37. gluekit/local/local_fixtures.py +337 -0
  38. gluekit/local/pyspark/__init__.py +7 -0
  39. gluekit/local/pyspark/context.py +31 -0
  40. gluekit/local/pyspark/sql/__init__.py +6 -0
  41. gluekit/local/pyspark/sql/session.py +29 -0
  42. gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
  43. gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
  44. gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
  45. gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
  46. gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,798 @@
1
+ from __future__ import annotations
2
+
3
+ import configparser
4
+ import csv
5
+ import os
6
+ import re
7
+ import shlex
8
+ from collections.abc import Mapping
9
+ from dataclasses import dataclass
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any, Optional
13
+
14
+ import typer
15
+ from rapidfuzz import fuzz
16
+ from slugify import slugify
17
+
18
+ from ..job_mgmt.glue_jobs import (
19
+ download_glue_job_files,
20
+ get_glue_job_config,
21
+ list_glue_jobs,
22
+ )
23
+
24
+ from .constants import GLUE_SET_FILE
25
+ from .helpers import (
26
+ _examples_epilog,
27
+ _get_checked_out_jobs,
28
+ _load_glue_set_store,
29
+ _resolve_checkout_job_name,
30
+ _save_checked_out_jobs,
31
+ _save_glue_set_store,
32
+ _set_saved_scope,
33
+ )
34
+ from ..cli import app
35
+
36
+
37
+ S3_URI_PATTERN = re.compile(r"^s3://(?P<bucket>[^/\s,]+)(?:/(?P<key>[^\s,]*))?$")
38
+ S3_DETECTION_ROOTS = {
39
+ "Command",
40
+ "DefaultArguments",
41
+ "NonOverridableArguments",
42
+ }
43
+ S3_SOURCE_CONTROL_KEYS = {
44
+ "AdditionalPythonFiles",
45
+ "ExtraPyFiles",
46
+ "AdditionalFiles",
47
+ "ExtraFiles",
48
+ }
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class S3UriRecord:
53
+ field_path: str
54
+ token_index: int
55
+ uri: str
56
+ bucket: str
57
+ key: str
58
+ field_value: Any
59
+
60
+ @property
61
+ def identity(self) -> tuple[str, int]:
62
+ return self.field_path, self.token_index
63
+
64
+
65
+ @dataclass(frozen=True)
66
+ class S3DetectionResult:
67
+ params: dict[str, Any]
68
+ skipped: list[str]
69
+ candidates: dict[str, list[tuple[str, int]]]
70
+
71
+
72
+ def _aws_profiles_from_config(config_path: Path) -> list[str]:
73
+ parser = configparser.ConfigParser()
74
+ if not config_path.exists():
75
+ return []
76
+ parser.read(config_path)
77
+ profiles: list[str] = []
78
+ for section in parser.sections():
79
+ if section == "default":
80
+ profiles.append("default")
81
+ elif section.startswith("profile "):
82
+ profiles.append(section.removeprefix("profile ").strip())
83
+ return sorted({profile for profile in profiles if profile})
84
+
85
+
86
+ def _save_profile_param(
87
+ *,
88
+ profile: str,
89
+ job_name: str,
90
+ key: str,
91
+ value: Any,
92
+ ) -> None:
93
+ _set_saved_scope({key: value}, job_name, False, profile=profile)
94
+
95
+
96
+ def _parse_s3_uri(value: str) -> tuple[str, str] | None:
97
+ match = S3_URI_PATTERN.match(value.strip())
98
+ if not match:
99
+ return None
100
+ return match.group("bucket"), match.group("key") or ""
101
+
102
+
103
+ def _parse_csv_tokens(value: str) -> list[str]:
104
+ try:
105
+ return [item.strip() for item in next(csv.reader([value]))]
106
+ except csv.Error:
107
+ return [value.strip()]
108
+
109
+
110
+ def _append_s3_records_from_string(
111
+ records: list[S3UriRecord],
112
+ *,
113
+ field_path: str,
114
+ field_value: str,
115
+ ) -> None:
116
+ tokens = _parse_csv_tokens(field_value)
117
+ if not tokens:
118
+ tokens = [field_value.strip()]
119
+ for index, token in enumerate(tokens):
120
+ parsed = _parse_s3_uri(token)
121
+ if not parsed:
122
+ continue
123
+ bucket, key = parsed
124
+ records.append(
125
+ S3UriRecord(
126
+ field_path=field_path,
127
+ token_index=index,
128
+ uri=token,
129
+ bucket=bucket,
130
+ key=key,
131
+ field_value=field_value,
132
+ )
133
+ )
134
+
135
+
136
+ def _collect_s3_uri_records_from_value(
137
+ value: Any,
138
+ *,
139
+ field_path: str,
140
+ records: list[S3UriRecord],
141
+ ) -> None:
142
+ if isinstance(value, str):
143
+ _append_s3_records_from_string(
144
+ records, field_path=field_path, field_value=value
145
+ )
146
+ return
147
+ if isinstance(value, dict):
148
+ for key, child in value.items():
149
+ _collect_s3_uri_records_from_value(
150
+ child,
151
+ field_path=f"{field_path}.{key}",
152
+ records=records,
153
+ )
154
+ return
155
+ if isinstance(value, list):
156
+ for index, child in enumerate(value):
157
+ _collect_s3_uri_records_from_value(
158
+ child,
159
+ field_path=f"{field_path}[{index}]",
160
+ records=records,
161
+ )
162
+
163
+
164
+ def collect_config_s3_uri_records(config_data: dict[str, Any]) -> list[S3UriRecord]:
165
+ records: list[S3UriRecord] = []
166
+ for root in S3_DETECTION_ROOTS:
167
+ value = config_data.get(root)
168
+ if value is not None:
169
+ _collect_s3_uri_records_from_value(
170
+ value,
171
+ field_path=root,
172
+ records=records,
173
+ )
174
+
175
+ source_control = config_data.get("SourceControlDetails", {})
176
+ if isinstance(source_control, dict):
177
+ for key in S3_SOURCE_CONTROL_KEYS:
178
+ value = source_control.get(key)
179
+ if value is not None:
180
+ _collect_s3_uri_records_from_value(
181
+ value,
182
+ field_path=f"SourceControlDetails.{key}",
183
+ records=records,
184
+ )
185
+ return records
186
+
187
+
188
+ def detect_profile_s3_params(
189
+ *,
190
+ baseline_config: dict[str, Any],
191
+ target_config: dict[str, Any],
192
+ match_threshold: int,
193
+ ) -> S3DetectionResult:
194
+ baseline_records = collect_config_s3_uri_records(baseline_config)
195
+ target_records = collect_config_s3_uri_records(target_config)
196
+ target_by_identity = {record.identity: record for record in target_records}
197
+ target_by_field_path: dict[str, list[S3UriRecord]] = {}
198
+ for record in target_records:
199
+ target_by_field_path.setdefault(record.field_path, []).append(record)
200
+
201
+ params: dict[str, Any] = {}
202
+ skipped: list[str] = []
203
+ candidate_summary: dict[str, list[tuple[str, int]]] = {}
204
+
205
+ for baseline_record in baseline_records:
206
+ target_record = target_by_identity.get(baseline_record.identity)
207
+ if target_record is None:
208
+ choices = target_by_field_path.get(baseline_record.field_path, [])
209
+ if choices:
210
+ target_record, candidates = _best_s3_record_match(
211
+ baseline_record, choices, match_threshold
212
+ )
213
+ if candidates:
214
+ candidate_summary[baseline_record.uri] = candidates
215
+ if target_record is None:
216
+ skipped.append(
217
+ f"{baseline_record.field_path}: no target URI matched {baseline_record.uri}"
218
+ )
219
+ continue
220
+
221
+ if target_record.uri == baseline_record.uri:
222
+ continue
223
+ if target_record.key == baseline_record.key:
224
+ if target_record.bucket != baseline_record.bucket:
225
+ params[target_record.field_path] = target_record.field_value
226
+ continue
227
+
228
+ choices = target_by_field_path.get(baseline_record.field_path, [target_record])
229
+ matched_record, candidates = _best_s3_record_match(
230
+ baseline_record, choices, match_threshold
231
+ )
232
+ if candidates:
233
+ candidate_summary[baseline_record.uri] = candidates
234
+ if matched_record is None:
235
+ skipped.append(
236
+ f"{baseline_record.field_path}: no confident target URI matched {baseline_record.uri}"
237
+ )
238
+ continue
239
+ params[matched_record.field_path] = matched_record.field_value
240
+
241
+ return S3DetectionResult(
242
+ params=params,
243
+ skipped=skipped,
244
+ candidates=candidate_summary,
245
+ )
246
+
247
+
248
+ def _best_s3_record_match(
249
+ baseline_record: S3UriRecord,
250
+ choices: list[S3UriRecord],
251
+ match_threshold: int,
252
+ ) -> tuple[S3UriRecord | None, list[tuple[str, int]]]:
253
+ if not choices:
254
+ return None, []
255
+ scored = sorted(
256
+ (
257
+ (record, int(fuzz.ratio(baseline_record.key, record.key)))
258
+ for record in choices
259
+ ),
260
+ key=lambda item: item[1],
261
+ reverse=True,
262
+ )
263
+ candidates = [(record.uri, score) for record, score in scored[:2]]
264
+ best_record, best_score = scored[0]
265
+ if best_score < match_threshold:
266
+ return None, candidates
267
+ return best_record, candidates
268
+
269
+
270
+ def _fetch_profile_job_configs(
271
+ *,
272
+ profile_jobs: Mapping[str, str],
273
+ auto_login: bool,
274
+ ) -> tuple[dict[str, dict[str, Any]], dict[str, str]]:
275
+ configs: dict[str, dict[str, Any]] = {}
276
+ errors: dict[str, str] = {}
277
+ for profile, job_name in profile_jobs.items():
278
+ try:
279
+ config = get_glue_job_config(
280
+ job_name,
281
+ profile_name=profile,
282
+ auto_login=auto_login,
283
+ )
284
+ except Exception as exc: # noqa: BLE001 - preserve interactive fallback.
285
+ errors[profile] = str(exc)
286
+ continue
287
+ if isinstance(config, dict) and config:
288
+ configs[profile] = config
289
+ else:
290
+ errors[profile] = "Glue returned an empty job config."
291
+ return configs, errors
292
+
293
+
294
+ def _print_missing_checkout_job_recommendation(
295
+ *,
296
+ profile: str,
297
+ job_name: str,
298
+ ) -> None:
299
+ typer.echo(
300
+ f"Recommendation: AWS profile '{profile}' could not fetch the checked-out "
301
+ f"job '{job_name}'. If that checkout is stale, clear it and rerun init:",
302
+ err=True,
303
+ )
304
+ typer.echo(" gluekit checkout --clear", err=True)
305
+ typer.echo(
306
+ "Then run `gluekit init` again; with no job checked out, pressing Enter at "
307
+ "the job prompt will list Glue jobs from the selected profile.",
308
+ err=True,
309
+ )
310
+ typer.echo(
311
+ "You can also inspect that profile directly with:",
312
+ err=True,
313
+ )
314
+ typer.echo(
315
+ f" aws glue get-jobs --profile {profile} --query 'Jobs[].Name' --output table",
316
+ err=True,
317
+ )
318
+ typer.echo(
319
+ "If the job exists in that profile, reselect it with the profile:",
320
+ err=True,
321
+ )
322
+ typer.echo(
323
+ f" gluekit checkout {shlex.quote(job_name)} --profile {shlex.quote(profile)}",
324
+ err=True,
325
+ )
326
+
327
+
328
+ def _parse_profile_job_options(profile_job: list[str]) -> dict[str, str]:
329
+ profile_jobs: dict[str, str] = {}
330
+ for raw_value in profile_job:
331
+ profile, separator, job_name = raw_value.partition("=")
332
+ if not separator or not profile.strip() or not job_name.strip():
333
+ raise typer.BadParameter(
334
+ "--profile-job must be provided as PROFILE=GLUE_JOB_NAME."
335
+ )
336
+ profile_jobs[profile.strip()] = job_name.strip()
337
+ return profile_jobs
338
+
339
+
340
+ def _job_modified_sort_value(job: dict[str, Any]) -> datetime:
341
+ value = job.get("LastModifiedOn") or job.get("CreatedOn")
342
+ if isinstance(value, datetime):
343
+ if value.tzinfo is None:
344
+ return value.replace(tzinfo=timezone.utc)
345
+ return value
346
+ return datetime.min.replace(tzinfo=timezone.utc)
347
+
348
+
349
+ def _render_job_modified(job: dict[str, Any]) -> str:
350
+ value = job.get("LastModifiedOn") or job.get("CreatedOn")
351
+ if isinstance(value, datetime):
352
+ return value.isoformat()
353
+ if value:
354
+ return str(value)
355
+ return "unknown modified time"
356
+
357
+
358
+ def _select_job_from_profile_listing(
359
+ *,
360
+ profile: str,
361
+ auto_login: bool,
362
+ ) -> str | None:
363
+ try:
364
+ jobs = list_glue_jobs(profile_name=profile, auto_login=auto_login)
365
+ except Exception as exc: # noqa: BLE001 - keep init interactive.
366
+ typer.echo(f"Could not list Glue jobs for {profile}: {exc}", err=True)
367
+ jobs = []
368
+
369
+ named_jobs = [
370
+ job for job in jobs if isinstance(job, dict) and isinstance(job.get("Name"), str)
371
+ ]
372
+ named_jobs.sort(key=_job_modified_sort_value, reverse=True)
373
+ if not named_jobs:
374
+ return typer.prompt(
375
+ f"Glue job name for profile '{profile}'",
376
+ default="",
377
+ show_default=False,
378
+ ).strip() or None
379
+
380
+ typer.echo(f"Glue jobs for profile '{profile}':")
381
+ for index, job in enumerate(named_jobs, start=1):
382
+ typer.echo(f"{index}. {job['Name']} ({_render_job_modified(job)})")
383
+
384
+ names = {job["Name"] for job in named_jobs}
385
+ while True:
386
+ selection = typer.prompt(
387
+ f"Select Glue job for profile '{profile}' by number or name",
388
+ default="",
389
+ show_default=False,
390
+ ).strip()
391
+ if not selection:
392
+ return None
393
+ if selection.isdigit():
394
+ index = int(selection)
395
+ if 1 <= index <= len(named_jobs):
396
+ return named_jobs[index - 1]["Name"]
397
+ if selection in names:
398
+ return selection
399
+ typer.echo("Enter a listed number, an exact job name, or press Enter to skip.")
400
+
401
+
402
+ def _prompt_source_job_for_profile(
403
+ *,
404
+ profile: str,
405
+ default_job: str,
406
+ profile_job_overrides: Mapping[str, str],
407
+ auto_login: bool,
408
+ ) -> str | None:
409
+ override = profile_job_overrides.get(profile)
410
+ if override:
411
+ typer.echo(f"Using Glue job '{override}' for profile '{profile}'.")
412
+ return override
413
+
414
+ prompt_text = f"Glue job to inspect for profile '{profile}'"
415
+ if default_job:
416
+ return typer.prompt(prompt_text, default=default_job).strip() or None
417
+
418
+ job_name = typer.prompt(prompt_text, default="", show_default=False).strip()
419
+ if job_name:
420
+ return job_name
421
+
422
+ return _select_job_from_profile_listing(
423
+ profile=profile,
424
+ auto_login=auto_login,
425
+ )
426
+
427
+
428
+ def _add_detected_param(
429
+ detected_params: dict[str, dict[str, Any]],
430
+ *,
431
+ profile: str,
432
+ key: str,
433
+ value: Any,
434
+ ) -> None:
435
+ detected_params.setdefault(profile, {})[key] = value
436
+
437
+
438
+ def _prompt_manual_param_for_skip(
439
+ *,
440
+ profile: str,
441
+ reason: str,
442
+ detected_params: dict[str, dict[str, Any]],
443
+ ) -> None:
444
+ typer.echo(f"Skipped S3 detection for {profile}: {reason}")
445
+ if not typer.confirm(f"Enter a manual profile param for {profile}?", default=False):
446
+ return
447
+ key = typer.prompt("Param key")
448
+ value = typer.prompt("Param value")
449
+ if key.strip():
450
+ _add_detected_param(
451
+ detected_params,
452
+ profile=profile,
453
+ key=key.strip(),
454
+ value=value,
455
+ )
456
+
457
+
458
+ def _print_detection_summary(
459
+ *,
460
+ local_job_name: str,
461
+ profile_source_jobs: Mapping[str, str],
462
+ detected_params: dict[str, dict[str, Any]],
463
+ skipped: dict[str, list[str]],
464
+ candidates: dict[str, dict[str, list[tuple[str, int]]]],
465
+ ) -> None:
466
+ typer.echo("Profile source jobs:")
467
+ for profile in sorted(profile_source_jobs):
468
+ typer.echo(f"- {profile}: {profile_source_jobs[profile]} -> {local_job_name}")
469
+
470
+ typer.echo("Detected profile params:")
471
+ if not detected_params:
472
+ typer.echo("- (none)")
473
+ for profile in sorted(detected_params):
474
+ typer.echo(f"- {profile}:")
475
+ for key, value in sorted(detected_params[profile].items()):
476
+ typer.echo(f" {key}: {value}")
477
+
478
+ if candidates:
479
+ typer.echo("S3 match candidates:")
480
+ for profile in sorted(candidates):
481
+ for source_uri, matches in sorted(candidates[profile].items()):
482
+ rendered = ", ".join(f"{uri} ({score})" for uri, score in matches)
483
+ typer.echo(f"- {profile}: {source_uri} -> {rendered}")
484
+
485
+ if skipped:
486
+ typer.echo("Skipped S3 detections:")
487
+ for profile in sorted(skipped):
488
+ for reason in skipped[profile]:
489
+ typer.echo(f"- {profile}: {reason}")
490
+
491
+
492
+ def _glue_init(
493
+ *,
494
+ aws_config: Path,
495
+ config_dir: Path,
496
+ auto_login: bool,
497
+ detect_roles: bool,
498
+ detect_buckets: bool,
499
+ match_threshold: int,
500
+ baseline_profile: Optional[str],
501
+ profile_job: list[str],
502
+ ) -> None:
503
+ if match_threshold < 0 or match_threshold > 100:
504
+ raise typer.BadParameter("--match-threshold must be between 0 and 100.")
505
+
506
+ profile_job_overrides = _parse_profile_job_options(profile_job)
507
+ profiles = _aws_profiles_from_config(aws_config)
508
+ if not profiles:
509
+ typer.echo(f"No AWS profiles found in {aws_config}.")
510
+ return
511
+
512
+ selected_profiles: list[str] = []
513
+ profile_source_jobs: dict[str, str] = {}
514
+ checked_out_jobs = _get_checked_out_jobs()
515
+ default_job = checked_out_jobs[0] if len(checked_out_jobs) == 1 else ""
516
+ for profile in profiles:
517
+ if typer.confirm(
518
+ f"Use AWS profile '{profile}' in this repository?", default=False
519
+ ):
520
+ source_job = _prompt_source_job_for_profile(
521
+ profile=profile,
522
+ default_job=default_job,
523
+ profile_job_overrides=profile_job_overrides,
524
+ auto_login=auto_login,
525
+ )
526
+ if not source_job:
527
+ typer.echo(
528
+ f"Skipping profile '{profile}' because no Glue job was selected."
529
+ )
530
+ continue
531
+ selected_profiles.append(profile)
532
+ profile_source_jobs[profile] = source_job
533
+
534
+ if not selected_profiles:
535
+ typer.echo("No profiles selected.")
536
+ return
537
+
538
+ missing_overrides = sorted(set(profile_job_overrides) - set(selected_profiles))
539
+ if missing_overrides:
540
+ raise typer.BadParameter(
541
+ "--profile-job was provided for profiles that were not selected: "
542
+ + ", ".join(missing_overrides)
543
+ )
544
+
545
+ store = _load_glue_set_store()
546
+ profiles_store = store.setdefault("profiles", {})
547
+ for profile in selected_profiles:
548
+ profile_store = profiles_store.setdefault(profile, {})
549
+ if not isinstance(profile_store, dict):
550
+ profiles_store[profile] = {"global": {}, "jobs": {}}
551
+ continue
552
+ profile_store.setdefault("global", {})
553
+ profile_store.setdefault("jobs", {})
554
+ _save_glue_set_store(store)
555
+
556
+ local_job_name = default_job or profile_source_jobs[selected_profiles[0]]
557
+ resolved_job_name, source = _resolve_checkout_job_name(
558
+ local_job_name, config_dir=config_dir
559
+ )
560
+ if baseline_profile:
561
+ first_profile = baseline_profile
562
+ else:
563
+ first_profile = typer.prompt(
564
+ "Profile to checkout first / use as S3 baseline",
565
+ default=selected_profiles[0],
566
+ )
567
+ if first_profile not in selected_profiles:
568
+ raise typer.BadParameter(
569
+ f"Profile '{first_profile}' was not selected for this repository."
570
+ )
571
+ _save_checked_out_jobs(
572
+ [resolved_job_name],
573
+ resolved_job_name,
574
+ source=source,
575
+ profile=first_profile,
576
+ )
577
+ typer.echo(f"Checked out {resolved_job_name} with profile {first_profile}.")
578
+
579
+ role_detection_enabled = detect_roles and typer.confirm(
580
+ "Auto-detect profile-specific Glue IAM roles?", default=True
581
+ )
582
+ bucket_detection_enabled = detect_buckets and typer.confirm(
583
+ "Auto-detect profile-specific S3 bucket mappings?", default=True
584
+ )
585
+
586
+ detected_params: dict[str, dict[str, Any]] = {}
587
+ skipped_s3: dict[str, list[str]] = {}
588
+ s3_candidates: dict[str, dict[str, list[tuple[str, int]]]] = {}
589
+
590
+ profile_configs: dict[str, dict[str, Any]] = {}
591
+ profile_errors: dict[str, str] = {}
592
+ if role_detection_enabled or bucket_detection_enabled:
593
+ profile_configs, profile_errors = _fetch_profile_job_configs(
594
+ profile_jobs=profile_source_jobs,
595
+ auto_login=auto_login,
596
+ )
597
+ for profile, error in sorted(profile_errors.items()):
598
+ typer.echo(
599
+ f"Could not fetch Glue job config for {profile} "
600
+ f"({profile_source_jobs[profile]}): {error}",
601
+ err=True,
602
+ )
603
+ if default_job and profile_source_jobs[profile] == default_job:
604
+ _print_missing_checkout_job_recommendation(
605
+ profile=profile,
606
+ job_name=default_job,
607
+ )
608
+
609
+ if role_detection_enabled:
610
+ for profile in selected_profiles:
611
+ role = profile_configs.get(profile, {}).get("Role")
612
+ if isinstance(role, str) and role.strip():
613
+ _add_detected_param(
614
+ detected_params,
615
+ profile=profile,
616
+ key="Role",
617
+ value=role.strip(),
618
+ )
619
+ continue
620
+ role = typer.prompt(f"Role for {profile}", default="")
621
+ if role.strip():
622
+ _add_detected_param(
623
+ detected_params,
624
+ profile=profile,
625
+ key="Role",
626
+ value=role.strip(),
627
+ )
628
+
629
+ if bucket_detection_enabled:
630
+ baseline_config = profile_configs.get(first_profile)
631
+ if not baseline_config:
632
+ typer.echo(
633
+ f"Cannot detect S3 bucket mappings without baseline config for {first_profile}.",
634
+ err=True,
635
+ )
636
+ else:
637
+ for profile in selected_profiles:
638
+ if profile == first_profile:
639
+ continue
640
+ target_config = profile_configs.get(profile)
641
+ if not target_config:
642
+ continue
643
+ result = detect_profile_s3_params(
644
+ baseline_config=baseline_config,
645
+ target_config=target_config,
646
+ match_threshold=match_threshold,
647
+ )
648
+ for key, value in result.params.items():
649
+ _add_detected_param(
650
+ detected_params,
651
+ profile=profile,
652
+ key=key,
653
+ value=value,
654
+ )
655
+ if result.skipped:
656
+ skipped_s3[profile] = list(result.skipped)
657
+ if result.candidates:
658
+ s3_candidates[profile] = dict(result.candidates)
659
+
660
+ for profile, reasons in skipped_s3.items():
661
+ for reason in reasons:
662
+ _prompt_manual_param_for_skip(
663
+ profile=profile,
664
+ reason=reason,
665
+ detected_params=detected_params,
666
+ )
667
+
668
+ if role_detection_enabled or bucket_detection_enabled:
669
+ _print_detection_summary(
670
+ local_job_name=resolved_job_name,
671
+ profile_source_jobs=profile_source_jobs,
672
+ detected_params=detected_params,
673
+ skipped=skipped_s3,
674
+ candidates=s3_candidates,
675
+ )
676
+ for profile, params in detected_params.items():
677
+ for key, value in params.items():
678
+ _save_profile_param(
679
+ profile=profile,
680
+ job_name=resolved_job_name,
681
+ key=key,
682
+ value=value,
683
+ )
684
+
685
+ if typer.confirm(
686
+ "Pull the baseline Glue job now using the checkout profile?", default=False
687
+ ):
688
+ source_job_name = profile_source_jobs[first_profile]
689
+ if source_job_name != resolved_job_name:
690
+ raise typer.BadParameter(
691
+ "Pulling during init requires the baseline source job to match the "
692
+ f"local checked-out job ({resolved_job_name}); selected "
693
+ f"{source_job_name} for {first_profile}."
694
+ )
695
+ config_dir.mkdir(parents=True, exist_ok=True)
696
+ jobs = list_glue_jobs(profile_name=first_profile, auto_login=auto_login)
697
+ matches = [job for job in jobs if job.get("Name") == source_job_name]
698
+ if not matches:
699
+ raise typer.BadParameter(
700
+ f'No Glue job named "{source_job_name}" found for profile {first_profile}.'
701
+ )
702
+ config_path = config_dir / f"{slugify(resolved_job_name)}.json"
703
+ download_glue_job_files(
704
+ name=source_job_name,
705
+ local_path=f"glue/scripts/{slugify(resolved_job_name)}.py",
706
+ config_path=config_path,
707
+ include_components={"config", "script", "notebook"},
708
+ existing_sc={},
709
+ profile_name=first_profile,
710
+ auto_login=auto_login,
711
+ )
712
+
713
+ store = _load_glue_set_store()
714
+ automation = store.setdefault("automation", {})
715
+ if isinstance(automation, dict):
716
+ automation["profiles"] = selected_profiles
717
+ automation["source_jobs"] = profile_source_jobs
718
+ automation["default_job"] = resolved_job_name
719
+ automation["build"] = {
720
+ "package_whl": typer.confirm(
721
+ "Default build flow should refresh dist/*.whl in Glue config?",
722
+ default=True,
723
+ ),
724
+ "push": typer.confirm(
725
+ "Default build flow should push after build?",
726
+ default=False,
727
+ ),
728
+ }
729
+ automation["local_dev_file_type"] = typer.prompt(
730
+ "Local development file type",
731
+ default="notebook",
732
+ )
733
+ _save_glue_set_store(store)
734
+ typer.echo(f"Saved setup in {GLUE_SET_FILE}.")
735
+
736
+
737
+ @app.command(
738
+ "init",
739
+ epilog=_examples_epilog(
740
+ "gluekit init",
741
+ ),
742
+ )
743
+ def glue_init(
744
+ aws_config: Optional[Path] = typer.Option(
745
+ None,
746
+ "--aws-config",
747
+ help="Local AWS CLI config file to scan for credential profile names; defaults to AWS_CONFIG_FILE or ~/.aws/config.",
748
+ ),
749
+ config_dir: Path = typer.Option(
750
+ Path("glue/configs"),
751
+ "--config-dir",
752
+ help="Directory containing Glue job config files.",
753
+ ),
754
+ auto_login: bool = typer.Option(
755
+ True,
756
+ "--auto-login/--no-auto-login",
757
+ help="For selected real AWS profiles, automatically run 'aws sso login' when credentials are missing or expired.",
758
+ ),
759
+ detect_roles: bool = typer.Option(
760
+ True,
761
+ "--detect-roles/--no-detect-roles",
762
+ help="Fetch the selected real AWS Glue job in each profile and save its Role as local gluekit params.",
763
+ ),
764
+ detect_buckets: bool = typer.Option(
765
+ True,
766
+ "--detect-buckets/--no-detect-buckets",
767
+ help="Compare S3 URIs in real AWS Glue job configs across profiles and save local gluekit profile mappings.",
768
+ ),
769
+ match_threshold: int = typer.Option(
770
+ 85,
771
+ "--match-threshold",
772
+ help="Minimum rapidfuzz score for non-exact S3 key matches.",
773
+ ),
774
+ baseline_profile: Optional[str] = typer.Option(
775
+ None,
776
+ "--baseline-profile",
777
+ help="Selected AWS CLI profile to store as the first gluekit checkout scope and S3 comparison baseline.",
778
+ ),
779
+ profile_job: Optional[list[str]] = typer.Option(
780
+ None,
781
+ "--profile-job",
782
+ help="Real AWS Glue job to inspect for an AWS CLI profile, as PROFILE=GLUE_JOB_NAME. May be repeated.",
783
+ ),
784
+ ) -> None:
785
+ """Initialize local repo-level Gluekit settings from AWS CLI profiles."""
786
+ resolved_aws_config = aws_config or Path(
787
+ os.environ.get("AWS_CONFIG_FILE", Path.home() / ".aws" / "config")
788
+ )
789
+ _glue_init(
790
+ aws_config=resolved_aws_config,
791
+ config_dir=config_dir,
792
+ auto_login=auto_login,
793
+ detect_roles=detect_roles,
794
+ detect_buckets=detect_buckets,
795
+ match_threshold=match_threshold,
796
+ baseline_profile=baseline_profile,
797
+ profile_job=profile_job or [],
798
+ )