gluekit 1.0.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gluekit/__init__.py +7 -0
- gluekit/app.py +0 -0
- gluekit/cli.py +64 -0
- gluekit/commands/__init__.py +1 -0
- gluekit/commands/add.py +455 -0
- gluekit/commands/build.py +816 -0
- gluekit/commands/checkout.py +114 -0
- gluekit/commands/clone.py +516 -0
- gluekit/commands/config_commands.py +180 -0
- gluekit/commands/constants.py +47 -0
- gluekit/commands/convert.py +336 -0
- gluekit/commands/edit.py +1104 -0
- gluekit/commands/helpers.py +1068 -0
- gluekit/commands/init.py +798 -0
- gluekit/commands/list.py +16 -0
- gluekit/commands/local_commands.py +680 -0
- gluekit/commands/pull.py +374 -0
- gluekit/commands/push.py +251 -0
- gluekit/commands/remove.py +161 -0
- gluekit/commands/run.py +126 -0
- gluekit/commands/status.py +97 -0
- gluekit/commands/sync.py +97 -0
- gluekit/commands/update.py +104 -0
- gluekit/job_mgmt/__init__.py +0 -0
- gluekit/job_mgmt/glue_jobs.py +1323 -0
- gluekit/job_mgmt/magics.py +122 -0
- gluekit/job_mgmt/resources/__init__.py +0 -0
- gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
- gluekit/job_mgmt/resources/magic_map.json +83 -0
- gluekit/job_mgmt/schema.py +165 -0
- gluekit/local/__init__.py +6 -0
- gluekit/local/awsglue/__init__.py +1 -0
- gluekit/local/awsglue/context.py +30 -0
- gluekit/local/awsglue/job.py +9 -0
- gluekit/local/awsglue/utils.py +17 -0
- gluekit/local/local.py +434 -0
- gluekit/local/local_fixtures.py +337 -0
- gluekit/local/pyspark/__init__.py +7 -0
- gluekit/local/pyspark/context.py +31 -0
- gluekit/local/pyspark/sql/__init__.py +6 -0
- gluekit/local/pyspark/sql/session.py +29 -0
- gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
- gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
- gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
- gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
- gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import copy
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
import tarfile
|
|
10
|
+
import uuid
|
|
11
|
+
import zipfile
|
|
12
|
+
from collections.abc import Mapping
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from email.parser import Parser
|
|
15
|
+
from fnmatch import fnmatch
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from tempfile import TemporaryDirectory
|
|
18
|
+
from typing import Any, Optional
|
|
19
|
+
|
|
20
|
+
import typer
|
|
21
|
+
from slugify import slugify
|
|
22
|
+
|
|
23
|
+
from ..job_mgmt.glue_jobs import (
|
|
24
|
+
download_glue_job_files,
|
|
25
|
+
list_glue_jobs,
|
|
26
|
+
normalize_glue_config_data,
|
|
27
|
+
convert_script_to_notebook,
|
|
28
|
+
convert_notebook_to_script,
|
|
29
|
+
_resolve_notebook_path,
|
|
30
|
+
upload_glue_job_files_from_config,
|
|
31
|
+
)
|
|
32
|
+
from ..job_mgmt.magics import build_magic_cell_sources as _build_magic_cell_sources
|
|
33
|
+
|
|
34
|
+
from .constants import *
|
|
35
|
+
from .helpers import *
|
|
36
|
+
from ..cli import app, glue_config_app
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _parse_set_args(args: list[str]) -> dict[str, Any]:
|
|
40
|
+
parsed: dict[str, Any] = {}
|
|
41
|
+
idx = 0
|
|
42
|
+
while idx < len(args):
|
|
43
|
+
token = args[idx]
|
|
44
|
+
if not token.startswith("--"):
|
|
45
|
+
raise typer.BadParameter(
|
|
46
|
+
f"Invalid token '{token}'. Use --key value or --key=value."
|
|
47
|
+
)
|
|
48
|
+
key_token = token[2:]
|
|
49
|
+
if not key_token:
|
|
50
|
+
raise typer.BadParameter("Invalid empty parameter name.")
|
|
51
|
+
|
|
52
|
+
if "=" in key_token:
|
|
53
|
+
key, raw_value = key_token.split("=", 1)
|
|
54
|
+
if not key:
|
|
55
|
+
raise typer.BadParameter("Invalid empty parameter name.")
|
|
56
|
+
parsed[key] = _coerce_set_value(raw_value)
|
|
57
|
+
idx += 1
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
key = key_token
|
|
61
|
+
idx += 1
|
|
62
|
+
if idx >= len(args):
|
|
63
|
+
raise typer.BadParameter(f"Missing value for --{key}.")
|
|
64
|
+
raw_value = args[idx]
|
|
65
|
+
if raw_value.startswith("--"):
|
|
66
|
+
raise typer.BadParameter(f"Missing value for --{key}.")
|
|
67
|
+
parsed[key] = _coerce_set_value(raw_value)
|
|
68
|
+
idx += 1
|
|
69
|
+
|
|
70
|
+
if not parsed:
|
|
71
|
+
raise typer.BadParameter("Provide one or more --key value pairs.")
|
|
72
|
+
return parsed
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@app.command(
|
|
76
|
+
"set",
|
|
77
|
+
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
|
|
78
|
+
epilog=_examples_epilog(
|
|
79
|
+
"gluekit set my-job --script_location glue/scripts/my-job.py --extra_files s3://my-bucket/shared/config.json",
|
|
80
|
+
"gluekit pull my-job --profile my-sso-profile",
|
|
81
|
+
'gluekit set --global --additional_python_modules "pydantic==2.11.7"',
|
|
82
|
+
'gluekit push "my-job-\\*" --profile my-sso-profile',
|
|
83
|
+
"gluekit config set my-job --default_arguments.--TempDir s3://my-bucket/tmp/",
|
|
84
|
+
"gluekit config get my-job",
|
|
85
|
+
"gluekit config get",
|
|
86
|
+
"gluekit set my-job --extra_py_files s3://my-bucket/dist/gluekit-0.0.1-py3-none-any.whl",
|
|
87
|
+
"gluekit push my-job --build --build-tool auto --profile my-sso-profile",
|
|
88
|
+
),
|
|
89
|
+
)
|
|
90
|
+
def glue_set(
|
|
91
|
+
ctx: typer.Context,
|
|
92
|
+
job_name: Optional[str] = typer.Argument(
|
|
93
|
+
None,
|
|
94
|
+
help="Glue job name to save params for.",
|
|
95
|
+
),
|
|
96
|
+
global_scope: bool = typer.Option(
|
|
97
|
+
False,
|
|
98
|
+
"--global",
|
|
99
|
+
help="Save params as global defaults for all jobs.",
|
|
100
|
+
),
|
|
101
|
+
profile: Optional[str] = typer.Option(
|
|
102
|
+
None,
|
|
103
|
+
"--profile",
|
|
104
|
+
"-p",
|
|
105
|
+
help=(
|
|
106
|
+
"Save params under this gluekit profile scope, usually matching an "
|
|
107
|
+
"AWS CLI profile name; does not contact AWS."
|
|
108
|
+
),
|
|
109
|
+
),
|
|
110
|
+
) -> None:
|
|
111
|
+
"""Set reusable local key/value parameters for a Glue job."""
|
|
112
|
+
parsed = _parse_set_args(list(ctx.args))
|
|
113
|
+
target = _set_saved_scope(parsed, job_name, global_scope, profile=profile)
|
|
114
|
+
typer.echo(f"Saved {len(parsed)} parameter(s) for '{target}' in {GLUE_SET_FILE}.")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@glue_config_app.command(
|
|
118
|
+
"set",
|
|
119
|
+
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
|
|
120
|
+
epilog=_examples_epilog(
|
|
121
|
+
"gluekit config set my-job --default_arguments.--TempDir s3://my-bucket/tmp/",
|
|
122
|
+
'gluekit config set --global --additional_python_modules "pydantic==2.11.7"',
|
|
123
|
+
),
|
|
124
|
+
)
|
|
125
|
+
def glue_config_set(
|
|
126
|
+
ctx: typer.Context,
|
|
127
|
+
job_name: Optional[str] = typer.Argument(
|
|
128
|
+
None,
|
|
129
|
+
help="Glue job name to save params for.",
|
|
130
|
+
),
|
|
131
|
+
global_scope: bool = typer.Option(
|
|
132
|
+
False,
|
|
133
|
+
"--global",
|
|
134
|
+
help="Save params as global defaults for all jobs.",
|
|
135
|
+
),
|
|
136
|
+
profile: Optional[str] = typer.Option(
|
|
137
|
+
None,
|
|
138
|
+
"--profile",
|
|
139
|
+
"-p",
|
|
140
|
+
help=(
|
|
141
|
+
"Save params under this gluekit profile scope, usually matching an "
|
|
142
|
+
"AWS CLI profile name; does not contact AWS."
|
|
143
|
+
),
|
|
144
|
+
),
|
|
145
|
+
) -> None:
|
|
146
|
+
"""Set reusable local Glue config params by job or globally."""
|
|
147
|
+
parsed = _parse_set_args(list(ctx.args))
|
|
148
|
+
target = _set_saved_scope(parsed, job_name, global_scope, profile=profile)
|
|
149
|
+
typer.echo(f"Saved {len(parsed)} parameter(s) for '{target}' in {GLUE_SET_FILE}.")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@glue_config_app.command(
|
|
153
|
+
"get",
|
|
154
|
+
epilog=_examples_epilog(
|
|
155
|
+
"gluekit config get my-job",
|
|
156
|
+
"gluekit config get",
|
|
157
|
+
),
|
|
158
|
+
)
|
|
159
|
+
def glue_config_get(
|
|
160
|
+
job_name: Optional[str] = typer.Argument(
|
|
161
|
+
None,
|
|
162
|
+
help="Optional Glue job name to view merged params (global + job).",
|
|
163
|
+
),
|
|
164
|
+
profile: Optional[str] = typer.Option(
|
|
165
|
+
None,
|
|
166
|
+
"--profile",
|
|
167
|
+
"-p",
|
|
168
|
+
help=(
|
|
169
|
+
"Include params from this gluekit profile scope, usually matching an "
|
|
170
|
+
"AWS CLI profile name; does not contact AWS."
|
|
171
|
+
),
|
|
172
|
+
),
|
|
173
|
+
) -> None:
|
|
174
|
+
"""Show stored local Glue config params."""
|
|
175
|
+
store = _load_glue_set_store()
|
|
176
|
+
if not job_name:
|
|
177
|
+
typer.echo(json.dumps(store, indent=4))
|
|
178
|
+
return
|
|
179
|
+
merged = _get_saved_params_for_job(job_name, profile=profile)
|
|
180
|
+
typer.echo(json.dumps(merged, indent=4))
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
PULL_COMPONENTS = {
|
|
4
|
+
"config",
|
|
5
|
+
"script",
|
|
6
|
+
"notebook",
|
|
7
|
+
"additional-python-modules",
|
|
8
|
+
"extra-files",
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
PUSH_COMPONENTS = {
|
|
12
|
+
"script",
|
|
13
|
+
"notebook",
|
|
14
|
+
"additional-python-modules",
|
|
15
|
+
"extra-files",
|
|
16
|
+
"job-config",
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
PULL_COMPONENT_ALIASES = {
|
|
20
|
+
"extra-py-files": "additional-python-modules",
|
|
21
|
+
"extra_py_files": "additional-python-modules",
|
|
22
|
+
"extra-py": "additional-python-modules",
|
|
23
|
+
"additional-python-files": "additional-python-modules",
|
|
24
|
+
"additional-python": "additional-python-modules",
|
|
25
|
+
"additional-py-files": "additional-python-modules",
|
|
26
|
+
"extra_files": "extra-files",
|
|
27
|
+
"additional-files": "extra-files",
|
|
28
|
+
"extra": "extra-files",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
PUSH_COMPONENT_ALIASES = {
|
|
32
|
+
**PULL_COMPONENT_ALIASES,
|
|
33
|
+
"config": "job-config",
|
|
34
|
+
"job_config": "job-config",
|
|
35
|
+
"update-config": "job-config",
|
|
36
|
+
"update_config": "job-config",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
GLUE_SET_FILE = Path(".gluekit/glue_set.json")
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
"PULL_COMPONENTS",
|
|
43
|
+
"PUSH_COMPONENTS",
|
|
44
|
+
"PULL_COMPONENT_ALIASES",
|
|
45
|
+
"PUSH_COMPONENT_ALIASES",
|
|
46
|
+
"GLUE_SET_FILE",
|
|
47
|
+
]
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import copy
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
import tarfile
|
|
10
|
+
import uuid
|
|
11
|
+
import zipfile
|
|
12
|
+
from collections.abc import Mapping
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from email.parser import Parser
|
|
15
|
+
from fnmatch import fnmatch
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from tempfile import TemporaryDirectory
|
|
18
|
+
from typing import Any, Optional
|
|
19
|
+
|
|
20
|
+
import typer
|
|
21
|
+
from slugify import slugify
|
|
22
|
+
|
|
23
|
+
from ..job_mgmt.glue_jobs import (
|
|
24
|
+
download_glue_job_files,
|
|
25
|
+
list_glue_jobs,
|
|
26
|
+
normalize_glue_config_data,
|
|
27
|
+
convert_script_to_notebook,
|
|
28
|
+
convert_notebook_to_script,
|
|
29
|
+
_resolve_notebook_path,
|
|
30
|
+
upload_glue_job_files_from_config,
|
|
31
|
+
)
|
|
32
|
+
from ..job_mgmt.magics import build_magic_cell_sources as _build_magic_cell_sources
|
|
33
|
+
|
|
34
|
+
from .constants import *
|
|
35
|
+
from .helpers import *
|
|
36
|
+
from ..cli import app, glue_config_app
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _notebook_source_to_lines(source: Any) -> list[str]:
|
|
40
|
+
if source is None:
|
|
41
|
+
return []
|
|
42
|
+
if isinstance(source, str):
|
|
43
|
+
return source.splitlines(keepends=True)
|
|
44
|
+
lines: list[str] = []
|
|
45
|
+
for part in source:
|
|
46
|
+
if part.endswith("\n"):
|
|
47
|
+
lines.append(part)
|
|
48
|
+
else:
|
|
49
|
+
lines.append(f"{part}\n")
|
|
50
|
+
return lines
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _build_script_magic_cell_lines(config_data: dict[str, Any]) -> list[str]:
|
|
54
|
+
lines = ["#%% [markdown]\n", "# AWS Glue configs\n"]
|
|
55
|
+
|
|
56
|
+
for source_lines in _build_magic_cell_sources(config_data):
|
|
57
|
+
for line in source_lines:
|
|
58
|
+
stripped = line.rstrip("\n")
|
|
59
|
+
lines.append(f"# {stripped}\n" if stripped else "#\n")
|
|
60
|
+
|
|
61
|
+
lines.append("\n")
|
|
62
|
+
lines.append("#%%\n")
|
|
63
|
+
return lines
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _update_script_config_cell(
|
|
67
|
+
script_path: Path,
|
|
68
|
+
config_data: dict[str, Any],
|
|
69
|
+
dry_run: bool,
|
|
70
|
+
) -> bool:
|
|
71
|
+
if not script_path.exists():
|
|
72
|
+
typer.echo(f"Script not found: {script_path}")
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
script_text = script_path.read_text()
|
|
76
|
+
lines = script_text.splitlines(keepends=True)
|
|
77
|
+
magic_lines = _build_script_magic_cell_lines(config_data)
|
|
78
|
+
|
|
79
|
+
marker_re = re.compile(r"^#\s*%%")
|
|
80
|
+
|
|
81
|
+
def is_glue_magic_line(raw_line: str) -> bool:
|
|
82
|
+
candidate = raw_line.lstrip()
|
|
83
|
+
if candidate.startswith("#"):
|
|
84
|
+
candidate = candidate[1:].lstrip()
|
|
85
|
+
return (
|
|
86
|
+
candidate.startswith("%")
|
|
87
|
+
or "%%configure" in candidate
|
|
88
|
+
or "AWS Glue configs" in candidate
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def split_into_cells(all_lines: list[str]) -> list[list[str]]:
|
|
92
|
+
starts = [idx for idx, line in enumerate(all_lines) if marker_re.match(line)]
|
|
93
|
+
if not starts:
|
|
94
|
+
return [all_lines]
|
|
95
|
+
if starts[0] != 0:
|
|
96
|
+
starts = [0] + starts
|
|
97
|
+
boundaries = starts[1:] + [len(all_lines)]
|
|
98
|
+
return [all_lines[start:end] for start, end in zip(starts, boundaries)]
|
|
99
|
+
|
|
100
|
+
kept: list[str] = []
|
|
101
|
+
for cell_lines in split_into_cells(lines):
|
|
102
|
+
if any(is_glue_magic_line(line) for line in cell_lines):
|
|
103
|
+
first_code_idx = None
|
|
104
|
+
for idx, line in enumerate(cell_lines):
|
|
105
|
+
stripped = line.strip()
|
|
106
|
+
if not stripped:
|
|
107
|
+
continue
|
|
108
|
+
if stripped.startswith("#"):
|
|
109
|
+
continue
|
|
110
|
+
if marker_re.match(line):
|
|
111
|
+
continue
|
|
112
|
+
first_code_idx = idx
|
|
113
|
+
break
|
|
114
|
+
if first_code_idx is not None:
|
|
115
|
+
kept.extend(cell_lines[first_code_idx:])
|
|
116
|
+
continue
|
|
117
|
+
kept.extend(cell_lines)
|
|
118
|
+
|
|
119
|
+
new_lines = magic_lines + kept
|
|
120
|
+
|
|
121
|
+
if new_lines == lines:
|
|
122
|
+
return False
|
|
123
|
+
|
|
124
|
+
if dry_run:
|
|
125
|
+
typer.echo(f"Would update script config cell: {script_path}")
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
script_path.write_text("".join(new_lines))
|
|
129
|
+
typer.echo(f"Updated script config cell: {script_path}")
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _extract_config_from_cell(lines: list[str]) -> tuple[int, dict[str, Any]]:
|
|
134
|
+
for idx, line in enumerate(lines):
|
|
135
|
+
if "%%configure" in line:
|
|
136
|
+
raw = "".join(lines[idx + 1 :]).strip()
|
|
137
|
+
if not raw:
|
|
138
|
+
return idx, {}
|
|
139
|
+
start = raw.find("{")
|
|
140
|
+
end = raw.rfind("}")
|
|
141
|
+
if start == -1 or end == -1 or end < start:
|
|
142
|
+
raise ValueError("Unable to locate JSON block after %%configure.")
|
|
143
|
+
payload = raw[start : end + 1]
|
|
144
|
+
return idx, json.loads(payload)
|
|
145
|
+
raise ValueError("No %%configure cell found.")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _new_notebook_code_cell(source_lines: list[str]) -> dict[str, Any]:
|
|
149
|
+
return {
|
|
150
|
+
"cell_type": "code",
|
|
151
|
+
"id": str(uuid.uuid4()),
|
|
152
|
+
"metadata": {
|
|
153
|
+
"tags": [],
|
|
154
|
+
"trusted": True,
|
|
155
|
+
"vscode": {"languageId": "python_glue_session"},
|
|
156
|
+
},
|
|
157
|
+
"source": source_lines,
|
|
158
|
+
"execution_count": None,
|
|
159
|
+
"outputs": [],
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _new_notebook_markdown_cell(source_lines: list[str]) -> dict[str, Any]:
|
|
164
|
+
return {
|
|
165
|
+
"cell_type": "markdown",
|
|
166
|
+
"id": str(uuid.uuid4()),
|
|
167
|
+
"metadata": {},
|
|
168
|
+
"source": source_lines,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _is_notebook_magic_cell(cell: dict[str, Any]) -> bool:
|
|
173
|
+
if cell.get("cell_type") != "code":
|
|
174
|
+
return False
|
|
175
|
+
lines = _notebook_source_to_lines(cell.get("source"))
|
|
176
|
+
for line in lines:
|
|
177
|
+
if line.lstrip().startswith("%"):
|
|
178
|
+
return True
|
|
179
|
+
return False
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _is_generated_glue_heading_cell(cell: dict[str, Any]) -> bool:
|
|
183
|
+
if cell.get("cell_type") != "markdown":
|
|
184
|
+
return False
|
|
185
|
+
lines = _notebook_source_to_lines(cell.get("source"))
|
|
186
|
+
text = "".join(lines).strip()
|
|
187
|
+
return text in {"# AWS Glue configs", "# AWS Glue Script"}
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _build_notebook_magic_cells(config_data: dict[str, Any]) -> list[dict[str, Any]]:
|
|
191
|
+
cells: list[dict[str, Any]] = [
|
|
192
|
+
_new_notebook_markdown_cell(["# AWS Glue configs\n"])
|
|
193
|
+
]
|
|
194
|
+
cells.extend(
|
|
195
|
+
_new_notebook_code_cell(source_lines)
|
|
196
|
+
for source_lines in _build_magic_cell_sources(config_data)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
cells.append(_new_notebook_markdown_cell(["# AWS Glue Script\n"]))
|
|
200
|
+
return cells
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _update_notebook_config_cell(
|
|
204
|
+
notebook_path: Path,
|
|
205
|
+
config_data: dict[str, Any],
|
|
206
|
+
keys_to_update: set[str],
|
|
207
|
+
dry_run: bool,
|
|
208
|
+
) -> bool:
|
|
209
|
+
if not notebook_path.exists():
|
|
210
|
+
typer.echo(f"Notebook not found: {notebook_path}")
|
|
211
|
+
return False
|
|
212
|
+
|
|
213
|
+
del keys_to_update
|
|
214
|
+
|
|
215
|
+
notebook = json.loads(notebook_path.read_text())
|
|
216
|
+
existing_cells = notebook.get("cells", [])
|
|
217
|
+
cleaned_cells = [
|
|
218
|
+
cell
|
|
219
|
+
for cell in existing_cells
|
|
220
|
+
if not _is_notebook_magic_cell(cell)
|
|
221
|
+
and not _is_generated_glue_heading_cell(cell)
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
new_magic_cells = _build_notebook_magic_cells(config_data)
|
|
225
|
+
notebook["cells"] = new_magic_cells + cleaned_cells
|
|
226
|
+
|
|
227
|
+
if dry_run:
|
|
228
|
+
typer.echo(f"Would update notebook config cell: {notebook_path}")
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
notebook_path.write_text(json.dumps(notebook, indent=4))
|
|
232
|
+
typer.echo(f"Updated notebook config cell: {notebook_path}")
|
|
233
|
+
return True
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@app.command(
|
|
237
|
+
"convert",
|
|
238
|
+
epilog=_examples_epilog(
|
|
239
|
+
"gluekit convert my-job --from script --to notebook --use-config",
|
|
240
|
+
"gluekit convert my-job --from notebook --to script --script-only",
|
|
241
|
+
),
|
|
242
|
+
)
|
|
243
|
+
def glue_convert(
|
|
244
|
+
job_name: Optional[str] = typer.Argument(
|
|
245
|
+
None,
|
|
246
|
+
help="Glue job name to convert. Defaults to the active checkout selection.",
|
|
247
|
+
),
|
|
248
|
+
from_format: str = typer.Option(
|
|
249
|
+
"script",
|
|
250
|
+
"--from",
|
|
251
|
+
help="Source format (script or notebook).",
|
|
252
|
+
),
|
|
253
|
+
to_format: str = typer.Option(
|
|
254
|
+
"notebook",
|
|
255
|
+
"--to",
|
|
256
|
+
help="Target format (script or notebook).",
|
|
257
|
+
),
|
|
258
|
+
use_config: bool = typer.Option(
|
|
259
|
+
True,
|
|
260
|
+
"--use-config/--script-only",
|
|
261
|
+
help="Use config metadata (magics and paths) when available.",
|
|
262
|
+
),
|
|
263
|
+
dry_run: bool = typer.Option(
|
|
264
|
+
False,
|
|
265
|
+
"--dry-run",
|
|
266
|
+
help="Show what would be converted without writing files.",
|
|
267
|
+
),
|
|
268
|
+
config_dir: Path = typer.Option(
|
|
269
|
+
Path("glue/configs"),
|
|
270
|
+
"--config-dir",
|
|
271
|
+
help="Directory containing Glue job config files.",
|
|
272
|
+
),
|
|
273
|
+
) -> None:
|
|
274
|
+
"""Convert Glue jobs between script and notebook formats."""
|
|
275
|
+
job_name = _resolve_single_job_name(job_name, "glue convert")
|
|
276
|
+
from_format = from_format.lower()
|
|
277
|
+
to_format = to_format.lower()
|
|
278
|
+
|
|
279
|
+
if from_format == to_format:
|
|
280
|
+
raise typer.BadParameter("--from and --to must be different values.")
|
|
281
|
+
|
|
282
|
+
if from_format not in {"script", "notebook"} or to_format not in {
|
|
283
|
+
"script",
|
|
284
|
+
"notebook",
|
|
285
|
+
}:
|
|
286
|
+
raise typer.BadParameter("--from/--to must be either script or notebook.")
|
|
287
|
+
|
|
288
|
+
config_data = None
|
|
289
|
+
if use_config:
|
|
290
|
+
config_index = _load_config_index(config_dir)
|
|
291
|
+
config_entry = config_index.get(job_name)
|
|
292
|
+
if config_entry:
|
|
293
|
+
config_data = config_entry.get("config", {})
|
|
294
|
+
sc = config_data.get("SourceControlDetails", {})
|
|
295
|
+
script_path = Path(
|
|
296
|
+
sc.get("ScriptLocation")
|
|
297
|
+
or sc.get("LocalPath")
|
|
298
|
+
or f"glue/scripts/{slugify(job_name)}.py"
|
|
299
|
+
)
|
|
300
|
+
notebook_path = sc.get("NotebookPath") or sc.get("NotebookLocation")
|
|
301
|
+
if notebook_path:
|
|
302
|
+
notebook_path = Path(notebook_path)
|
|
303
|
+
else:
|
|
304
|
+
notebook_path = Path(_resolve_notebook_path(script_path))
|
|
305
|
+
else:
|
|
306
|
+
script_path = Path(f"glue/scripts/{slugify(job_name)}.py")
|
|
307
|
+
notebook_path = Path(f"glue/notebooks/{slugify(job_name)}.ipynb")
|
|
308
|
+
else:
|
|
309
|
+
script_path = Path(f"glue/scripts/{slugify(job_name)}.py")
|
|
310
|
+
notebook_path = Path(f"glue/notebooks/{slugify(job_name)}.ipynb")
|
|
311
|
+
|
|
312
|
+
if from_format == "script" and not script_path.exists():
|
|
313
|
+
raise typer.BadParameter(f"Script not found: {script_path}")
|
|
314
|
+
if from_format == "notebook" and not notebook_path.exists():
|
|
315
|
+
raise typer.BadParameter(f"Notebook not found: {notebook_path}")
|
|
316
|
+
|
|
317
|
+
if dry_run:
|
|
318
|
+
if from_format == "notebook":
|
|
319
|
+
typer.echo(f"Would convert: {notebook_path} -> {script_path}")
|
|
320
|
+
else:
|
|
321
|
+
typer.echo(f"Would convert: {script_path} -> {notebook_path}")
|
|
322
|
+
return
|
|
323
|
+
|
|
324
|
+
if from_format == "script" and to_format == "notebook":
|
|
325
|
+
converted_path = convert_script_to_notebook(
|
|
326
|
+
script_path,
|
|
327
|
+
notebook_path,
|
|
328
|
+
config_data=config_data,
|
|
329
|
+
include_magics=use_config,
|
|
330
|
+
)
|
|
331
|
+
typer.echo(f"Converted: {script_path} -> {converted_path}")
|
|
332
|
+
elif from_format == "notebook" and to_format == "script":
|
|
333
|
+
converted_path = convert_notebook_to_script(notebook_path, script_path)
|
|
334
|
+
typer.echo(f"Converted: {notebook_path} -> {converted_path}")
|
|
335
|
+
else:
|
|
336
|
+
raise typer.BadParameter("Only script <-> notebook conversions are supported.")
|