gluekit 1.0.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gluekit/__init__.py +7 -0
- gluekit/app.py +0 -0
- gluekit/cli.py +64 -0
- gluekit/commands/__init__.py +1 -0
- gluekit/commands/add.py +455 -0
- gluekit/commands/build.py +816 -0
- gluekit/commands/checkout.py +114 -0
- gluekit/commands/clone.py +516 -0
- gluekit/commands/config_commands.py +180 -0
- gluekit/commands/constants.py +47 -0
- gluekit/commands/convert.py +336 -0
- gluekit/commands/edit.py +1104 -0
- gluekit/commands/helpers.py +1068 -0
- gluekit/commands/init.py +798 -0
- gluekit/commands/list.py +16 -0
- gluekit/commands/local_commands.py +680 -0
- gluekit/commands/pull.py +374 -0
- gluekit/commands/push.py +251 -0
- gluekit/commands/remove.py +161 -0
- gluekit/commands/run.py +126 -0
- gluekit/commands/status.py +97 -0
- gluekit/commands/sync.py +97 -0
- gluekit/commands/update.py +104 -0
- gluekit/job_mgmt/__init__.py +0 -0
- gluekit/job_mgmt/glue_jobs.py +1323 -0
- gluekit/job_mgmt/magics.py +122 -0
- gluekit/job_mgmt/resources/__init__.py +0 -0
- gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
- gluekit/job_mgmt/resources/magic_map.json +83 -0
- gluekit/job_mgmt/schema.py +165 -0
- gluekit/local/__init__.py +6 -0
- gluekit/local/awsglue/__init__.py +1 -0
- gluekit/local/awsglue/context.py +30 -0
- gluekit/local/awsglue/job.py +9 -0
- gluekit/local/awsglue/utils.py +17 -0
- gluekit/local/local.py +434 -0
- gluekit/local/local_fixtures.py +337 -0
- gluekit/local/pyspark/__init__.py +7 -0
- gluekit/local/pyspark/context.py +31 -0
- gluekit/local/pyspark/sql/__init__.py +6 -0
- gluekit/local/pyspark/sql/session.py +29 -0
- gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
- gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
- gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
- gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
- gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 1,
|
|
3
|
+
"source": "https://docs.aws.amazon.com/glue/latest/dg/interactive-sessions-magics.html",
|
|
4
|
+
"stop_session_magic": "%stop_session",
|
|
5
|
+
"section_order": [
|
|
6
|
+
"session",
|
|
7
|
+
"compute",
|
|
8
|
+
"dependencies",
|
|
9
|
+
"configure"
|
|
10
|
+
],
|
|
11
|
+
"mappings": [
|
|
12
|
+
{
|
|
13
|
+
"field_path": "Role",
|
|
14
|
+
"emit": "magic",
|
|
15
|
+
"magic": "%iam_role",
|
|
16
|
+
"section": "session",
|
|
17
|
+
"format": "string"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"field_path": "Connections.Connections",
|
|
21
|
+
"emit": "magic",
|
|
22
|
+
"magic": "%connections",
|
|
23
|
+
"section": "session",
|
|
24
|
+
"format": "csv"
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"field_path": "GlueVersion",
|
|
28
|
+
"emit": "magic",
|
|
29
|
+
"magic": "%glue_version",
|
|
30
|
+
"section": "compute",
|
|
31
|
+
"format": "string"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"field_path": "WorkerType",
|
|
35
|
+
"emit": "magic",
|
|
36
|
+
"magic": "%worker_type",
|
|
37
|
+
"section": "compute",
|
|
38
|
+
"format": "string"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"field_path": "NumberOfWorkers",
|
|
42
|
+
"emit": "magic",
|
|
43
|
+
"magic": "%number_of_workers",
|
|
44
|
+
"section": "compute",
|
|
45
|
+
"format": "string"
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"field_path": "DefaultArguments.--additional-python-modules",
|
|
49
|
+
"emit": "magic",
|
|
50
|
+
"magic": "%additional_python_modules",
|
|
51
|
+
"section": "dependencies",
|
|
52
|
+
"format": "string"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"field_path": "DefaultArguments.--extra-py-files",
|
|
56
|
+
"emit": "magic",
|
|
57
|
+
"magic": "%extra_py_files",
|
|
58
|
+
"section": "dependencies",
|
|
59
|
+
"format": "string"
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"field_path": "DefaultArguments.--extra-jars",
|
|
63
|
+
"emit": "magic",
|
|
64
|
+
"magic": "%extra_jars",
|
|
65
|
+
"section": "dependencies",
|
|
66
|
+
"format": "string"
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
"field_path": "DefaultArguments.--conf",
|
|
70
|
+
"emit": "magic",
|
|
71
|
+
"magic": "%spark_conf",
|
|
72
|
+
"section": "dependencies",
|
|
73
|
+
"format": "string"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"field_path": "Command.ScriptLocation",
|
|
77
|
+
"emit": "configure",
|
|
78
|
+
"configure_key": "script_location",
|
|
79
|
+
"section": "configure",
|
|
80
|
+
"format": "script_location_dir"
|
|
81
|
+
}
|
|
82
|
+
]
|
|
83
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from functools import lru_cache
|
|
5
|
+
from importlib.resources import files
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import boto3
|
|
10
|
+
|
|
11
|
+
DEPRECATED_JOB_FIELDS = (
|
|
12
|
+
"AllocatedCapacity",
|
|
13
|
+
"MaxCapacity",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
SCHEMA_RESOURCE_NAME = "glue_job_schema.json"
|
|
17
|
+
MAGIC_MAP_RESOURCE_NAME = "magic_map.json"
|
|
18
|
+
RESOURCE_DIR = Path(__file__).resolve().parent / "resources"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _resource_text(name: str) -> str:
|
|
22
|
+
resource_root = files("gluekit.job_mgmt").joinpath("resources")
|
|
23
|
+
return resource_root.joinpath(name).read_text(encoding="utf-8")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@lru_cache(maxsize=1)
|
|
27
|
+
def load_glue_schema() -> dict[str, Any]:
|
|
28
|
+
return json.loads(_resource_text(SCHEMA_RESOURCE_NAME))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@lru_cache(maxsize=1)
|
|
32
|
+
def load_magic_map() -> dict[str, Any]:
|
|
33
|
+
return json.loads(_resource_text(MAGIC_MAP_RESOURCE_NAME))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_updateable_job_fields(prefer_live: bool = False) -> set[str]:
|
|
37
|
+
if prefer_live:
|
|
38
|
+
live_fields = inspect_live_update_job_fields()
|
|
39
|
+
if live_fields:
|
|
40
|
+
return live_fields
|
|
41
|
+
return set(load_glue_schema().get("update_job_fields", []))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def inspect_live_update_job_fields() -> set[str]:
|
|
45
|
+
try:
|
|
46
|
+
session = boto3.Session()
|
|
47
|
+
glue = session.client("glue", region_name="us-east-1")
|
|
48
|
+
model = glue._service_model
|
|
49
|
+
request_shape = model.shape_for("UpdateJobRequest")
|
|
50
|
+
job_update_shape = request_shape.members.get("JobUpdate")
|
|
51
|
+
if not job_update_shape:
|
|
52
|
+
return set()
|
|
53
|
+
return set(job_update_shape.members.keys())
|
|
54
|
+
except Exception:
|
|
55
|
+
return set()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _clean_doc(value: Any) -> str | None:
|
|
59
|
+
if not value:
|
|
60
|
+
return None
|
|
61
|
+
text = " ".join(str(value).split())
|
|
62
|
+
return text or None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _serialize_shape(shape: Any, seen: set[str] | None = None) -> dict[str, Any]:
|
|
66
|
+
seen = seen or set()
|
|
67
|
+
shape_token = getattr(shape, "name", None) or f"anon:{id(shape)}"
|
|
68
|
+
|
|
69
|
+
data: dict[str, Any] = {
|
|
70
|
+
"shape_name": getattr(shape, "name", None),
|
|
71
|
+
"type": getattr(shape, "type_name", None),
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if shape_token in seen:
|
|
75
|
+
data["ref"] = shape_token
|
|
76
|
+
return data
|
|
77
|
+
|
|
78
|
+
next_seen = seen | {shape_token}
|
|
79
|
+
|
|
80
|
+
documentation = _clean_doc(getattr(shape, "documentation", None))
|
|
81
|
+
if documentation:
|
|
82
|
+
data["documentation"] = documentation
|
|
83
|
+
|
|
84
|
+
enum_values = list(getattr(shape, "enum", []) or [])
|
|
85
|
+
if enum_values:
|
|
86
|
+
data["enum"] = enum_values
|
|
87
|
+
|
|
88
|
+
for attr in ("min", "max"):
|
|
89
|
+
value = getattr(shape, attr, None)
|
|
90
|
+
if value is not None:
|
|
91
|
+
data[attr] = value
|
|
92
|
+
|
|
93
|
+
required_members = list(getattr(shape, "required_members", []) or [])
|
|
94
|
+
if required_members:
|
|
95
|
+
data["required_members"] = sorted(required_members)
|
|
96
|
+
|
|
97
|
+
if data["type"] == "structure":
|
|
98
|
+
members = getattr(shape, "members", {}) or {}
|
|
99
|
+
data["members"] = {
|
|
100
|
+
member_name: _serialize_shape(members[member_name], next_seen)
|
|
101
|
+
for member_name in sorted(members.keys())
|
|
102
|
+
}
|
|
103
|
+
elif data["type"] == "list":
|
|
104
|
+
member = getattr(shape, "member", None)
|
|
105
|
+
if member is not None:
|
|
106
|
+
data["member"] = _serialize_shape(member, next_seen)
|
|
107
|
+
elif data["type"] == "map":
|
|
108
|
+
key = getattr(shape, "key", None)
|
|
109
|
+
value = getattr(shape, "value", None)
|
|
110
|
+
if key is not None:
|
|
111
|
+
data["key"] = _serialize_shape(key, next_seen)
|
|
112
|
+
if value is not None:
|
|
113
|
+
data["value"] = _serialize_shape(value, next_seen)
|
|
114
|
+
|
|
115
|
+
return data
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def build_glue_schema() -> dict[str, Any]:
|
|
119
|
+
session = boto3.Session()
|
|
120
|
+
glue = session.client("glue", region_name="us-east-1")
|
|
121
|
+
model = glue._service_model
|
|
122
|
+
|
|
123
|
+
root_shape_names = ["UpdateJobRequest", "JobUpdate", "Job"]
|
|
124
|
+
root_shapes = {
|
|
125
|
+
name: _serialize_shape(model.shape_for(name)) for name in root_shape_names
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
job_shape = root_shapes["Job"]
|
|
129
|
+
job_update_shape = root_shapes["JobUpdate"]
|
|
130
|
+
|
|
131
|
+
return {
|
|
132
|
+
"schema_version": 1,
|
|
133
|
+
"service": "glue",
|
|
134
|
+
"api_version": getattr(model, "api_version", None),
|
|
135
|
+
"deprecated_fields": sorted(DEPRECATED_JOB_FIELDS),
|
|
136
|
+
"update_job_fields": sorted(job_update_shape.get("members", {}).keys()),
|
|
137
|
+
"job_fields": job_shape.get("members", {}),
|
|
138
|
+
"job_update_fields": job_update_shape.get("members", {}),
|
|
139
|
+
"root_shapes": root_shapes,
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def write_glue_schema(output_path: Path | None = None) -> Path:
|
|
144
|
+
path = output_path or (RESOURCE_DIR / SCHEMA_RESOURCE_NAME)
|
|
145
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
146
|
+
payload = build_glue_schema()
|
|
147
|
+
path.write_text(
|
|
148
|
+
json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8"
|
|
149
|
+
)
|
|
150
|
+
load_glue_schema.cache_clear()
|
|
151
|
+
return path
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
__all__ = [
|
|
155
|
+
"DEPRECATED_JOB_FIELDS",
|
|
156
|
+
"MAGIC_MAP_RESOURCE_NAME",
|
|
157
|
+
"RESOURCE_DIR",
|
|
158
|
+
"SCHEMA_RESOURCE_NAME",
|
|
159
|
+
"build_glue_schema",
|
|
160
|
+
"get_updateable_job_fields",
|
|
161
|
+
"inspect_live_update_job_fields",
|
|
162
|
+
"load_glue_schema",
|
|
163
|
+
"load_magic_map",
|
|
164
|
+
"write_glue_schema",
|
|
165
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""awsglue top-level module"""
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from gluekit.local.pyspark.context import SparkContext
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class GlueContext:
|
|
5
|
+
"""
|
|
6
|
+
A local emulation of AWS GlueContext for testing purposes.
|
|
7
|
+
To allow local runs on aws glue jobs and notebooks during development:
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
|
|
12
|
+
from awsglue.context import GlueContext
|
|
13
|
+
from awsglue.job import Job
|
|
14
|
+
from awsglue.utils import getResolvedOptions
|
|
15
|
+
from pyspark.context import SparkContext
|
|
16
|
+
|
|
17
|
+
sc = SparkContext()
|
|
18
|
+
glueContext = GlueContext(sc)
|
|
19
|
+
spark = glueContext.spark_session
|
|
20
|
+
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
|
|
21
|
+
job = Job(glueContext)
|
|
22
|
+
job.init(args['JOB_NAME'], args)
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, _spark_context: SparkContext):
|
|
30
|
+
self.spark_session = _spark_context.getOrCreate()
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
def getResolvedOptions(argv, keys):
|
|
2
|
+
resolved = {}
|
|
3
|
+
missing = []
|
|
4
|
+
for key in keys:
|
|
5
|
+
flag = f"--{key}"
|
|
6
|
+
if flag in argv:
|
|
7
|
+
index = argv.index(flag)
|
|
8
|
+
if index + 1 < len(argv):
|
|
9
|
+
resolved[key] = argv[index + 1]
|
|
10
|
+
else:
|
|
11
|
+
missing.append(key)
|
|
12
|
+
else:
|
|
13
|
+
missing.append(key)
|
|
14
|
+
|
|
15
|
+
if missing:
|
|
16
|
+
raise ValueError(f"Missing required options: {missing}")
|
|
17
|
+
return resolved
|