gluekit 1.0.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gluekit/__init__.py +7 -0
- gluekit/app.py +0 -0
- gluekit/cli.py +64 -0
- gluekit/commands/__init__.py +1 -0
- gluekit/commands/add.py +455 -0
- gluekit/commands/build.py +816 -0
- gluekit/commands/checkout.py +114 -0
- gluekit/commands/clone.py +516 -0
- gluekit/commands/config_commands.py +180 -0
- gluekit/commands/constants.py +47 -0
- gluekit/commands/convert.py +336 -0
- gluekit/commands/edit.py +1104 -0
- gluekit/commands/helpers.py +1068 -0
- gluekit/commands/init.py +798 -0
- gluekit/commands/list.py +16 -0
- gluekit/commands/local_commands.py +680 -0
- gluekit/commands/pull.py +374 -0
- gluekit/commands/push.py +251 -0
- gluekit/commands/remove.py +161 -0
- gluekit/commands/run.py +126 -0
- gluekit/commands/status.py +97 -0
- gluekit/commands/sync.py +97 -0
- gluekit/commands/update.py +104 -0
- gluekit/job_mgmt/__init__.py +0 -0
- gluekit/job_mgmt/glue_jobs.py +1323 -0
- gluekit/job_mgmt/magics.py +122 -0
- gluekit/job_mgmt/resources/__init__.py +0 -0
- gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
- gluekit/job_mgmt/resources/magic_map.json +83 -0
- gluekit/job_mgmt/schema.py +165 -0
- gluekit/local/__init__.py +6 -0
- gluekit/local/awsglue/__init__.py +1 -0
- gluekit/local/awsglue/context.py +30 -0
- gluekit/local/awsglue/job.py +9 -0
- gluekit/local/awsglue/utils.py +17 -0
- gluekit/local/local.py +434 -0
- gluekit/local/local_fixtures.py +337 -0
- gluekit/local/pyspark/__init__.py +7 -0
- gluekit/local/pyspark/context.py +31 -0
- gluekit/local/pyspark/sql/__init__.py +6 -0
- gluekit/local/pyspark/sql/session.py +29 -0
- gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
- gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
- gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
- gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
- gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
gluekit/commands/pull.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
import json
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from fnmatch import fnmatch
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
import typer
|
|
12
|
+
from slugify import slugify
|
|
13
|
+
|
|
14
|
+
from ..job_mgmt.glue_jobs import (
|
|
15
|
+
download_glue_job_files,
|
|
16
|
+
list_glue_jobs,
|
|
17
|
+
normalize_glue_config_data,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from .constants import PULL_COMPONENT_ALIASES, PULL_COMPONENTS
|
|
21
|
+
from .helpers import (
|
|
22
|
+
_examples_epilog,
|
|
23
|
+
_get_checked_out_jobs,
|
|
24
|
+
_get_checked_out_profile,
|
|
25
|
+
_get_config_field_path,
|
|
26
|
+
_get_local_last_modified,
|
|
27
|
+
_get_saved_params_for_job,
|
|
28
|
+
_load_config_index,
|
|
29
|
+
_load_glue_set_store,
|
|
30
|
+
_parse_datetime,
|
|
31
|
+
_profile_param_expected_fields,
|
|
32
|
+
_set_saved_scope,
|
|
33
|
+
_validate_profile_mappings_align_with_config,
|
|
34
|
+
_write_glue_job_list_csv,
|
|
35
|
+
)
|
|
36
|
+
from .clone import (
|
|
37
|
+
_rewrite_glue_job_config,
|
|
38
|
+
_collect_source_control_file_local_renames,
|
|
39
|
+
_move_local_artifact,
|
|
40
|
+
)
|
|
41
|
+
from .edit import _normalize_component_filters
|
|
42
|
+
from .init import detect_profile_s3_params
|
|
43
|
+
from ..cli import app
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _resolve_pull_local_path(
|
|
47
|
+
default_local_path: str, saved_params: dict[str, Any]
|
|
48
|
+
) -> str:
|
|
49
|
+
for key in ("script_location", "local_script_path", "local_path"):
|
|
50
|
+
value = saved_params.get(key)
|
|
51
|
+
if isinstance(value, str) and value.strip():
|
|
52
|
+
return value
|
|
53
|
+
return default_local_path
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _direct_profile_job_params(job_name: str, profile: Optional[str]) -> dict[str, Any]:
|
|
57
|
+
if not profile:
|
|
58
|
+
return {}
|
|
59
|
+
store = _load_glue_set_store()
|
|
60
|
+
profile_store = store.get("profiles", {}).get(profile, {})
|
|
61
|
+
if not isinstance(profile_store, dict):
|
|
62
|
+
return {}
|
|
63
|
+
params = profile_store.get("jobs", {}).get(job_name, {})
|
|
64
|
+
return dict(params) if isinstance(params, dict) else {}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _other_profile_job_params(
|
|
68
|
+
job_name: str, profile: Optional[str]
|
|
69
|
+
) -> dict[str, dict[str, Any]]:
|
|
70
|
+
store = _load_glue_set_store()
|
|
71
|
+
profiles = store.get("profiles", {})
|
|
72
|
+
if not isinstance(profiles, dict):
|
|
73
|
+
return {}
|
|
74
|
+
result: dict[str, dict[str, Any]] = {}
|
|
75
|
+
for profile_name, profile_store in profiles.items():
|
|
76
|
+
if not isinstance(profile_name, str) or profile_name == profile:
|
|
77
|
+
continue
|
|
78
|
+
if not isinstance(profile_store, dict):
|
|
79
|
+
continue
|
|
80
|
+
params = profile_store.get("jobs", {}).get(job_name, {})
|
|
81
|
+
if isinstance(params, dict) and params:
|
|
82
|
+
result[profile_name] = dict(params)
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _detect_profile_params_from_pull(
|
|
87
|
+
*,
|
|
88
|
+
job_name: str,
|
|
89
|
+
profile: str,
|
|
90
|
+
baseline_config: dict[str, Any],
|
|
91
|
+
pulled_config: dict[str, Any],
|
|
92
|
+
other_profile_params: Mapping[str, dict[str, Any]],
|
|
93
|
+
dry_run: bool,
|
|
94
|
+
) -> dict[str, Any]:
|
|
95
|
+
detected: dict[str, Any] = {}
|
|
96
|
+
|
|
97
|
+
role = pulled_config.get("Role")
|
|
98
|
+
if isinstance(role, str) and role.strip():
|
|
99
|
+
detected["Role"] = role.strip()
|
|
100
|
+
|
|
101
|
+
for params in other_profile_params.values():
|
|
102
|
+
for key in params:
|
|
103
|
+
for field_path, _expected in _profile_param_expected_fields(
|
|
104
|
+
key, params[key]
|
|
105
|
+
):
|
|
106
|
+
value = _get_config_field_path(pulled_config, field_path)
|
|
107
|
+
if value is not None:
|
|
108
|
+
detected[field_path] = value
|
|
109
|
+
|
|
110
|
+
if baseline_config:
|
|
111
|
+
s3_result = detect_profile_s3_params(
|
|
112
|
+
baseline_config=baseline_config,
|
|
113
|
+
target_config=pulled_config,
|
|
114
|
+
match_threshold=85,
|
|
115
|
+
)
|
|
116
|
+
detected.update(s3_result.params)
|
|
117
|
+
|
|
118
|
+
if not detected:
|
|
119
|
+
return {}
|
|
120
|
+
|
|
121
|
+
if dry_run:
|
|
122
|
+
typer.echo(f"Would save detected profile mappings for {profile}: {job_name}")
|
|
123
|
+
else:
|
|
124
|
+
_set_saved_scope(detected, job_name, False, profile=profile)
|
|
125
|
+
typer.echo(f"Saved detected profile mappings for {profile}: {job_name}")
|
|
126
|
+
for key, value in sorted(detected.items()):
|
|
127
|
+
typer.echo(f"- {key}: {value}")
|
|
128
|
+
return detected
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _sync_profile_mappings_after_pull(
|
|
132
|
+
*,
|
|
133
|
+
job_name: str,
|
|
134
|
+
profile: Optional[str],
|
|
135
|
+
baseline_config: dict[str, Any],
|
|
136
|
+
pulled_config: dict[str, Any],
|
|
137
|
+
dry_run: bool,
|
|
138
|
+
) -> None:
|
|
139
|
+
if not profile:
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
active_params = _direct_profile_job_params(job_name, profile)
|
|
143
|
+
if active_params:
|
|
144
|
+
_validate_profile_mappings_align_with_config(
|
|
145
|
+
job_name=job_name,
|
|
146
|
+
config_data=pulled_config,
|
|
147
|
+
profile=profile,
|
|
148
|
+
success_message="Profile mappings align with pulled config.",
|
|
149
|
+
)
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
other_params = _other_profile_job_params(job_name, profile)
|
|
153
|
+
if not other_params:
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
_detect_profile_params_from_pull(
|
|
157
|
+
job_name=job_name,
|
|
158
|
+
profile=profile,
|
|
159
|
+
baseline_config=baseline_config,
|
|
160
|
+
pulled_config=pulled_config,
|
|
161
|
+
other_profile_params=other_params,
|
|
162
|
+
dry_run=dry_run,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@app.command(
|
|
167
|
+
"pull",
|
|
168
|
+
epilog=_examples_epilog(
|
|
169
|
+
"gluekit pull my-job --profile my-sso-profile",
|
|
170
|
+
"gluekit pull source-job target-job --profile my-sso-profile",
|
|
171
|
+
'gluekit pull "my-job-\\*" --force --include config,script --exclude notebook',
|
|
172
|
+
'gluekit pull "\\*" --dry-run --profile my-sso-profile --no-auto-login',
|
|
173
|
+
),
|
|
174
|
+
)
|
|
175
|
+
def glue_pull(
|
|
176
|
+
job_name: Optional[str] = typer.Argument(
|
|
177
|
+
None,
|
|
178
|
+
help='Glue job name or pattern to pull. Use "*" for all jobs.',
|
|
179
|
+
),
|
|
180
|
+
target_job: Optional[str] = typer.Argument(
|
|
181
|
+
None,
|
|
182
|
+
help="Optional local job name to write pulled artifacts to.",
|
|
183
|
+
),
|
|
184
|
+
force: bool = typer.Option(
|
|
185
|
+
False,
|
|
186
|
+
"--force",
|
|
187
|
+
"-f",
|
|
188
|
+
help="Download even if the local config is newer or equal to LastModifiedOn.",
|
|
189
|
+
),
|
|
190
|
+
dry_run: bool = typer.Option(
|
|
191
|
+
False,
|
|
192
|
+
"--dry-run",
|
|
193
|
+
help="Show what would be downloaded without writing files.",
|
|
194
|
+
),
|
|
195
|
+
include: Optional[list[str]] = typer.Option(
|
|
196
|
+
None,
|
|
197
|
+
"--include",
|
|
198
|
+
"-i",
|
|
199
|
+
help="Include only specific components (config, script, notebook, extra-py-files, extra-files).",
|
|
200
|
+
),
|
|
201
|
+
exclude: Optional[list[str]] = typer.Option(
|
|
202
|
+
None,
|
|
203
|
+
"--exclude",
|
|
204
|
+
"-x",
|
|
205
|
+
help="Exclude specific components (config, script, notebook, extra-py-files, extra-files).",
|
|
206
|
+
),
|
|
207
|
+
config_dir: Path = typer.Option(
|
|
208
|
+
Path("glue/configs"),
|
|
209
|
+
"--config-dir",
|
|
210
|
+
help="Directory containing Glue job config files.",
|
|
211
|
+
),
|
|
212
|
+
write_job_list: bool = typer.Option(
|
|
213
|
+
True,
|
|
214
|
+
"--write-job-list/--no-write-job-list",
|
|
215
|
+
help="Write glue/glue_full_job_list.csv during the run.",
|
|
216
|
+
),
|
|
217
|
+
profile: Optional[str] = typer.Option(
|
|
218
|
+
None,
|
|
219
|
+
"--profile",
|
|
220
|
+
"-p",
|
|
221
|
+
help="AWS CLI credential profile used for real AWS Glue/S3 API calls; not a gluekit local test profile.",
|
|
222
|
+
),
|
|
223
|
+
auto_login: bool = typer.Option(
|
|
224
|
+
True,
|
|
225
|
+
"--auto-login/--no-auto-login",
|
|
226
|
+
help="For real AWS profiles, automatically run 'aws sso login' when credentials are missing or expired.",
|
|
227
|
+
),
|
|
228
|
+
) -> None:
|
|
229
|
+
"""Download real AWS Glue job artifacts and config into the local workspace."""
|
|
230
|
+
active_profile = profile or _get_checked_out_profile()
|
|
231
|
+
requested_job_name = job_name or "*"
|
|
232
|
+
if target_job and requested_job_name == "*":
|
|
233
|
+
raise typer.BadParameter(
|
|
234
|
+
"glue pull <target-job> requires a single source job name."
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
components = _normalize_component_filters(
|
|
238
|
+
include,
|
|
239
|
+
exclude,
|
|
240
|
+
allowed=PULL_COMPONENTS,
|
|
241
|
+
aliases=PULL_COMPONENT_ALIASES,
|
|
242
|
+
context_label="glue pull",
|
|
243
|
+
)
|
|
244
|
+
jobs = list_glue_jobs(profile_name=active_profile, auto_login=auto_login)
|
|
245
|
+
if write_job_list:
|
|
246
|
+
Path("glue").mkdir(parents=True, exist_ok=True)
|
|
247
|
+
jobs_sorted = sorted(
|
|
248
|
+
jobs,
|
|
249
|
+
key=lambda job: (
|
|
250
|
+
_parse_datetime(job.get("LastModifiedOn"))
|
|
251
|
+
or datetime.min.replace(tzinfo=timezone.utc)
|
|
252
|
+
),
|
|
253
|
+
reverse=True,
|
|
254
|
+
)
|
|
255
|
+
_write_glue_job_list_csv(jobs_sorted, Path("glue/glue_full_job_list.csv"))
|
|
256
|
+
|
|
257
|
+
checked_out_jobs = _get_checked_out_jobs()
|
|
258
|
+
if requested_job_name == "*" and checked_out_jobs:
|
|
259
|
+
selected_jobs = [job for job in jobs if job.get("Name") in checked_out_jobs]
|
|
260
|
+
typer.echo(f"Using active checkout selection ({len(selected_jobs)} job(s)).")
|
|
261
|
+
elif requested_job_name == "*":
|
|
262
|
+
selected_jobs = jobs
|
|
263
|
+
else:
|
|
264
|
+
selected_jobs = [
|
|
265
|
+
job for job in jobs if fnmatch(job.get("Name", ""), requested_job_name)
|
|
266
|
+
]
|
|
267
|
+
if selected_jobs:
|
|
268
|
+
typer.echo(
|
|
269
|
+
f'Found {len(selected_jobs)} job(s) matching "{requested_job_name}".'
|
|
270
|
+
)
|
|
271
|
+
else:
|
|
272
|
+
raise typer.BadParameter(f'No Glue jobs matched "{requested_job_name}".')
|
|
273
|
+
|
|
274
|
+
if target_job and len(selected_jobs) != 1:
|
|
275
|
+
raise typer.BadParameter(
|
|
276
|
+
"glue pull <target-job> requires exactly one matching source job."
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
280
|
+
config_index = _load_config_index(config_dir)
|
|
281
|
+
|
|
282
|
+
for job in selected_jobs:
|
|
283
|
+
source_name = job.get("Name", "")
|
|
284
|
+
if not source_name:
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
local_job_name = target_job or source_name
|
|
288
|
+
|
|
289
|
+
saved_params = _get_saved_params_for_job(local_job_name, profile=active_profile)
|
|
290
|
+
config_entry = config_index.get(local_job_name, {})
|
|
291
|
+
config_path: Path = config_entry.get(
|
|
292
|
+
"config_path",
|
|
293
|
+
config_dir / f"{slugify(local_job_name)}.json",
|
|
294
|
+
)
|
|
295
|
+
existing_config = config_entry.get("config", {})
|
|
296
|
+
baseline_config = copy.deepcopy(existing_config)
|
|
297
|
+
existing_sc = existing_config.get("SourceControlDetails", {})
|
|
298
|
+
local_path = _resolve_pull_local_path(
|
|
299
|
+
(
|
|
300
|
+
existing_sc.get("ScriptLocation")
|
|
301
|
+
or existing_sc.get("LocalPath")
|
|
302
|
+
or f"glue/scripts/{slugify(local_job_name)}.py"
|
|
303
|
+
),
|
|
304
|
+
saved_params,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
remove_nb_outputs = existing_sc.get("RemoveNbOutputs")
|
|
308
|
+
if remove_nb_outputs is None:
|
|
309
|
+
remove_nb_outputs = job.get("JobMode", "") == "NOTEBOOK"
|
|
310
|
+
|
|
311
|
+
local_last_modified = _get_local_last_modified(config_path, existing_config)
|
|
312
|
+
remote_last_modified = _parse_datetime(job.get("LastModifiedOn"))
|
|
313
|
+
|
|
314
|
+
should_download = (
|
|
315
|
+
force
|
|
316
|
+
or local_last_modified is None
|
|
317
|
+
or remote_last_modified is None
|
|
318
|
+
or remote_last_modified > local_last_modified
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
if should_download:
|
|
322
|
+
display_name = (
|
|
323
|
+
f"{source_name} -> {local_job_name}"
|
|
324
|
+
if local_job_name != source_name
|
|
325
|
+
else source_name
|
|
326
|
+
)
|
|
327
|
+
if dry_run:
|
|
328
|
+
typer.echo(f"Would pull: {display_name}")
|
|
329
|
+
continue
|
|
330
|
+
typer.echo(f"Pulling: {display_name}")
|
|
331
|
+
download_glue_job_files(
|
|
332
|
+
name=source_name,
|
|
333
|
+
config_path=config_path,
|
|
334
|
+
local_path=local_path,
|
|
335
|
+
remove_nb_outputs=remove_nb_outputs,
|
|
336
|
+
include_components=components,
|
|
337
|
+
existing_sc=existing_sc,
|
|
338
|
+
profile_name=active_profile,
|
|
339
|
+
auto_login=auto_login,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
if local_job_name != source_name and config_path.exists():
|
|
343
|
+
pulled_config = normalize_glue_config_data(
|
|
344
|
+
json.loads(config_path.read_text())
|
|
345
|
+
)
|
|
346
|
+
renamed_config = _rewrite_glue_job_config(
|
|
347
|
+
pulled_config,
|
|
348
|
+
source_job=source_name,
|
|
349
|
+
target_job=local_job_name,
|
|
350
|
+
rewrite_local_paths=True,
|
|
351
|
+
)
|
|
352
|
+
for (
|
|
353
|
+
source_path,
|
|
354
|
+
target_path,
|
|
355
|
+
) in _collect_source_control_file_local_renames(
|
|
356
|
+
pulled_config,
|
|
357
|
+
renamed_config,
|
|
358
|
+
):
|
|
359
|
+
_move_local_artifact(source_path, target_path)
|
|
360
|
+
config_path.write_text(json.dumps(renamed_config, indent=4))
|
|
361
|
+
else:
|
|
362
|
+
typer.echo(f"Up to date: {local_job_name}")
|
|
363
|
+
|
|
364
|
+
if config_path.exists():
|
|
365
|
+
latest_config_data = normalize_glue_config_data(
|
|
366
|
+
json.loads(config_path.read_text())
|
|
367
|
+
)
|
|
368
|
+
_sync_profile_mappings_after_pull(
|
|
369
|
+
job_name=local_job_name,
|
|
370
|
+
profile=active_profile,
|
|
371
|
+
baseline_config=baseline_config,
|
|
372
|
+
pulled_config=latest_config_data,
|
|
373
|
+
dry_run=dry_run,
|
|
374
|
+
)
|
gluekit/commands/push.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fnmatch import fnmatch
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from ..job_mgmt.glue_jobs import upload_glue_job_files_from_config
|
|
10
|
+
|
|
11
|
+
from .constants import PUSH_COMPONENT_ALIASES, PUSH_COMPONENTS
|
|
12
|
+
from .helpers import (
|
|
13
|
+
_examples_epilog,
|
|
14
|
+
_find_workspace_root,
|
|
15
|
+
_get_checked_out_jobs,
|
|
16
|
+
_get_checked_out_profile,
|
|
17
|
+
_load_config_index,
|
|
18
|
+
_raise_missing_local_config,
|
|
19
|
+
_validate_profile_mappings_align_with_config,
|
|
20
|
+
)
|
|
21
|
+
from .clone import _rewrite_glue_job_config
|
|
22
|
+
from .edit import _normalize_component_filters
|
|
23
|
+
from ..cli import app
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@app.command(
|
|
27
|
+
"push",
|
|
28
|
+
epilog=_examples_epilog(
|
|
29
|
+
"gluekit push my-job --profile my-sso-profile",
|
|
30
|
+
"gluekit push source-job target-job --profile my-sso-profile",
|
|
31
|
+
'gluekit push "my-job-\\*" --include script,job-config --exclude notebook',
|
|
32
|
+
"gluekit push my-job --build --build-tool auto --dry-run",
|
|
33
|
+
),
|
|
34
|
+
)
|
|
35
|
+
def glue_push(
|
|
36
|
+
job_name: Optional[str] = typer.Argument(
|
|
37
|
+
None,
|
|
38
|
+
help='Glue job name or pattern to push. Use "*" for all configs.',
|
|
39
|
+
),
|
|
40
|
+
target_job: Optional[str] = typer.Argument(
|
|
41
|
+
None,
|
|
42
|
+
help="Optional remote Glue job name to push to.",
|
|
43
|
+
),
|
|
44
|
+
dry_run: bool = typer.Option(
|
|
45
|
+
False,
|
|
46
|
+
"--dry-run",
|
|
47
|
+
help="Show what would be uploaded without writing files.",
|
|
48
|
+
),
|
|
49
|
+
include: Optional[list[str]] = typer.Option(
|
|
50
|
+
None,
|
|
51
|
+
"--include",
|
|
52
|
+
"-i",
|
|
53
|
+
help=(
|
|
54
|
+
"Include only specific components (script, notebook, additional-python-modules, "
|
|
55
|
+
"extra-files, job-config)."
|
|
56
|
+
),
|
|
57
|
+
),
|
|
58
|
+
exclude: Optional[list[str]] = typer.Option(
|
|
59
|
+
None,
|
|
60
|
+
"--exclude",
|
|
61
|
+
"-x",
|
|
62
|
+
help=(
|
|
63
|
+
"Exclude specific components (script, notebook, additional-python-modules, "
|
|
64
|
+
"extra-files, job-config)."
|
|
65
|
+
),
|
|
66
|
+
),
|
|
67
|
+
update_config: bool = typer.Option(
|
|
68
|
+
True,
|
|
69
|
+
"--update-config/--no-update-config",
|
|
70
|
+
help="Update Glue job configuration after uploading files.",
|
|
71
|
+
),
|
|
72
|
+
build: bool = typer.Option(
|
|
73
|
+
False,
|
|
74
|
+
"--build",
|
|
75
|
+
help="Build local package artifacts in the workspace root before push.",
|
|
76
|
+
),
|
|
77
|
+
build_tool: str = typer.Option(
|
|
78
|
+
"auto",
|
|
79
|
+
"--build-tool",
|
|
80
|
+
help="Build frontend for --build: auto, uv, or build.",
|
|
81
|
+
),
|
|
82
|
+
verbose: bool = typer.Option(
|
|
83
|
+
False,
|
|
84
|
+
"--verbose",
|
|
85
|
+
"-v",
|
|
86
|
+
help="Print local build command(s) before execution.",
|
|
87
|
+
),
|
|
88
|
+
config_dir: Path = typer.Option(
|
|
89
|
+
Path("glue/configs"),
|
|
90
|
+
"--config-dir",
|
|
91
|
+
help="Directory containing Glue job config files.",
|
|
92
|
+
),
|
|
93
|
+
profile: Optional[str] = typer.Option(
|
|
94
|
+
None,
|
|
95
|
+
"--profile",
|
|
96
|
+
"-p",
|
|
97
|
+
help="AWS CLI credential profile used for real AWS Glue/S3 API calls; not a gluekit local test profile.",
|
|
98
|
+
),
|
|
99
|
+
auto_login: bool = typer.Option(
|
|
100
|
+
True,
|
|
101
|
+
"--auto-login/--no-auto-login",
|
|
102
|
+
help="For real AWS profiles, automatically run 'aws sso login' when credentials are missing or expired.",
|
|
103
|
+
),
|
|
104
|
+
) -> None:
|
|
105
|
+
"""Upload local artifacts to real AWS Glue/S3 and optionally update the job config."""
|
|
106
|
+
active_profile = profile or _get_checked_out_profile()
|
|
107
|
+
requested_job_name = job_name or "*"
|
|
108
|
+
if target_job and requested_job_name == "*":
|
|
109
|
+
raise typer.BadParameter(
|
|
110
|
+
"glue push <target-job> requires a single local source job."
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
components = _normalize_component_filters(
|
|
114
|
+
include,
|
|
115
|
+
exclude,
|
|
116
|
+
allowed=PUSH_COMPONENTS,
|
|
117
|
+
aliases=PUSH_COMPONENT_ALIASES,
|
|
118
|
+
context_label="glue push",
|
|
119
|
+
)
|
|
120
|
+
if build:
|
|
121
|
+
from .build import run_project_build
|
|
122
|
+
|
|
123
|
+
workspace_root = _find_workspace_root()
|
|
124
|
+
run_project_build(
|
|
125
|
+
build_tool, cwd=workspace_root, dry_run=dry_run, verbose=verbose
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
config_index = _load_config_index(config_dir)
|
|
129
|
+
if not config_index:
|
|
130
|
+
raise typer.BadParameter(f"No config files found in {config_dir}")
|
|
131
|
+
|
|
132
|
+
checked_out_jobs = _get_checked_out_jobs()
|
|
133
|
+
if requested_job_name == "*" and checked_out_jobs:
|
|
134
|
+
selected_entries = [
|
|
135
|
+
(name, entry)
|
|
136
|
+
for name, entry in config_index.items()
|
|
137
|
+
if name in checked_out_jobs
|
|
138
|
+
]
|
|
139
|
+
if not selected_entries:
|
|
140
|
+
missing_job_names = ", ".join(sorted(checked_out_jobs))
|
|
141
|
+
raise typer.BadParameter(
|
|
142
|
+
f"No local config files were found for the active checkout selection: {missing_job_names}. "
|
|
143
|
+
"Create or clone the local config before running glue push."
|
|
144
|
+
)
|
|
145
|
+
typer.echo(
|
|
146
|
+
f"Using active checkout selection ({len(selected_entries)} config(s))."
|
|
147
|
+
)
|
|
148
|
+
elif requested_job_name == "*":
|
|
149
|
+
selected_entries = list(config_index.items())
|
|
150
|
+
else:
|
|
151
|
+
selected_entries = [
|
|
152
|
+
(name, entry)
|
|
153
|
+
for name, entry in config_index.items()
|
|
154
|
+
if fnmatch(name, requested_job_name)
|
|
155
|
+
]
|
|
156
|
+
if selected_entries:
|
|
157
|
+
typer.echo(
|
|
158
|
+
f'Found {len(selected_entries)} config(s) matching "{requested_job_name}".'
|
|
159
|
+
)
|
|
160
|
+
else:
|
|
161
|
+
_raise_missing_local_config(requested_job_name, config_dir, "glue push")
|
|
162
|
+
|
|
163
|
+
if target_job and len(selected_entries) != 1:
|
|
164
|
+
raise typer.BadParameter(
|
|
165
|
+
"glue push <target-job> requires exactly one matching local config."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
for name, entry in selected_entries:
|
|
169
|
+
config_path: Path = entry["config_path"]
|
|
170
|
+
config_data = entry.get("config", {})
|
|
171
|
+
if not config_data:
|
|
172
|
+
typer.echo(f"Skipping empty config for {name}")
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
_validate_profile_mappings_align_with_config(
|
|
176
|
+
job_name=name,
|
|
177
|
+
config_data=config_data,
|
|
178
|
+
profile=active_profile,
|
|
179
|
+
success_message="Profile mappings align with config...now pushing config",
|
|
180
|
+
)
|
|
181
|
+
_sync_package_wheel_mapping_before_push(
|
|
182
|
+
job_name=name,
|
|
183
|
+
config_path=config_path,
|
|
184
|
+
config_data=config_data,
|
|
185
|
+
profile=active_profile,
|
|
186
|
+
build=build,
|
|
187
|
+
dry_run=dry_run,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
push_config = config_data
|
|
191
|
+
display_name = name
|
|
192
|
+
if target_job and target_job != name:
|
|
193
|
+
push_config = _rewrite_glue_job_config(
|
|
194
|
+
config_data,
|
|
195
|
+
source_job=name,
|
|
196
|
+
target_job=target_job,
|
|
197
|
+
rewrite_local_paths=False,
|
|
198
|
+
)
|
|
199
|
+
display_name = f"{name} -> {target_job}"
|
|
200
|
+
|
|
201
|
+
typer.echo(f"Pushing: {display_name}")
|
|
202
|
+
upload_glue_job_files_from_config(
|
|
203
|
+
push_config,
|
|
204
|
+
dry_run=dry_run,
|
|
205
|
+
update_job_config=update_config and "job-config" in components,
|
|
206
|
+
include_components=components,
|
|
207
|
+
profile_name=active_profile,
|
|
208
|
+
auto_login=auto_login,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _sync_package_wheel_mapping_before_push(
|
|
213
|
+
*,
|
|
214
|
+
job_name: str,
|
|
215
|
+
config_path: Path,
|
|
216
|
+
config_data: dict[str, Any],
|
|
217
|
+
profile: Optional[str],
|
|
218
|
+
build: bool,
|
|
219
|
+
dry_run: bool,
|
|
220
|
+
) -> None:
|
|
221
|
+
if not build:
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
from .build import (
|
|
225
|
+
_apply_package_wheel_to_config,
|
|
226
|
+
_find_workspace_root,
|
|
227
|
+
_resolve_built_package_wheel_path,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
if dry_run and not build:
|
|
231
|
+
return
|
|
232
|
+
|
|
233
|
+
workspace_root = _find_workspace_root()
|
|
234
|
+
try:
|
|
235
|
+
package_wheel_path = _resolve_built_package_wheel_path(
|
|
236
|
+
workspace_root=workspace_root,
|
|
237
|
+
out_dir=None,
|
|
238
|
+
)
|
|
239
|
+
except typer.BadParameter:
|
|
240
|
+
if build and not dry_run:
|
|
241
|
+
raise
|
|
242
|
+
return
|
|
243
|
+
|
|
244
|
+
_apply_package_wheel_to_config(
|
|
245
|
+
job_name=job_name,
|
|
246
|
+
config_path=config_path,
|
|
247
|
+
config_data=config_data,
|
|
248
|
+
package_wheel_path=package_wheel_path,
|
|
249
|
+
profile=profile,
|
|
250
|
+
dry_run=dry_run,
|
|
251
|
+
)
|