gluekit 1.0.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. gluekit/__init__.py +7 -0
  2. gluekit/app.py +0 -0
  3. gluekit/cli.py +64 -0
  4. gluekit/commands/__init__.py +1 -0
  5. gluekit/commands/add.py +455 -0
  6. gluekit/commands/build.py +816 -0
  7. gluekit/commands/checkout.py +114 -0
  8. gluekit/commands/clone.py +516 -0
  9. gluekit/commands/config_commands.py +180 -0
  10. gluekit/commands/constants.py +47 -0
  11. gluekit/commands/convert.py +336 -0
  12. gluekit/commands/edit.py +1104 -0
  13. gluekit/commands/helpers.py +1068 -0
  14. gluekit/commands/init.py +798 -0
  15. gluekit/commands/list.py +16 -0
  16. gluekit/commands/local_commands.py +680 -0
  17. gluekit/commands/pull.py +374 -0
  18. gluekit/commands/push.py +251 -0
  19. gluekit/commands/remove.py +161 -0
  20. gluekit/commands/run.py +126 -0
  21. gluekit/commands/status.py +97 -0
  22. gluekit/commands/sync.py +97 -0
  23. gluekit/commands/update.py +104 -0
  24. gluekit/job_mgmt/__init__.py +0 -0
  25. gluekit/job_mgmt/glue_jobs.py +1323 -0
  26. gluekit/job_mgmt/magics.py +122 -0
  27. gluekit/job_mgmt/resources/__init__.py +0 -0
  28. gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
  29. gluekit/job_mgmt/resources/magic_map.json +83 -0
  30. gluekit/job_mgmt/schema.py +165 -0
  31. gluekit/local/__init__.py +6 -0
  32. gluekit/local/awsglue/__init__.py +1 -0
  33. gluekit/local/awsglue/context.py +30 -0
  34. gluekit/local/awsglue/job.py +9 -0
  35. gluekit/local/awsglue/utils.py +17 -0
  36. gluekit/local/local.py +434 -0
  37. gluekit/local/local_fixtures.py +337 -0
  38. gluekit/local/pyspark/__init__.py +7 -0
  39. gluekit/local/pyspark/context.py +31 -0
  40. gluekit/local/pyspark/sql/__init__.py +6 -0
  41. gluekit/local/pyspark/sql/session.py +29 -0
  42. gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
  43. gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
  44. gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
  45. gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
  46. gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,83 @@
1
+ {
2
+ "version": 1,
3
+ "source": "https://docs.aws.amazon.com/glue/latest/dg/interactive-sessions-magics.html",
4
+ "stop_session_magic": "%stop_session",
5
+ "section_order": [
6
+ "session",
7
+ "compute",
8
+ "dependencies",
9
+ "configure"
10
+ ],
11
+ "mappings": [
12
+ {
13
+ "field_path": "Role",
14
+ "emit": "magic",
15
+ "magic": "%iam_role",
16
+ "section": "session",
17
+ "format": "string"
18
+ },
19
+ {
20
+ "field_path": "Connections.Connections",
21
+ "emit": "magic",
22
+ "magic": "%connections",
23
+ "section": "session",
24
+ "format": "csv"
25
+ },
26
+ {
27
+ "field_path": "GlueVersion",
28
+ "emit": "magic",
29
+ "magic": "%glue_version",
30
+ "section": "compute",
31
+ "format": "string"
32
+ },
33
+ {
34
+ "field_path": "WorkerType",
35
+ "emit": "magic",
36
+ "magic": "%worker_type",
37
+ "section": "compute",
38
+ "format": "string"
39
+ },
40
+ {
41
+ "field_path": "NumberOfWorkers",
42
+ "emit": "magic",
43
+ "magic": "%number_of_workers",
44
+ "section": "compute",
45
+ "format": "string"
46
+ },
47
+ {
48
+ "field_path": "DefaultArguments.--additional-python-modules",
49
+ "emit": "magic",
50
+ "magic": "%additional_python_modules",
51
+ "section": "dependencies",
52
+ "format": "string"
53
+ },
54
+ {
55
+ "field_path": "DefaultArguments.--extra-py-files",
56
+ "emit": "magic",
57
+ "magic": "%extra_py_files",
58
+ "section": "dependencies",
59
+ "format": "string"
60
+ },
61
+ {
62
+ "field_path": "DefaultArguments.--extra-jars",
63
+ "emit": "magic",
64
+ "magic": "%extra_jars",
65
+ "section": "dependencies",
66
+ "format": "string"
67
+ },
68
+ {
69
+ "field_path": "DefaultArguments.--conf",
70
+ "emit": "magic",
71
+ "magic": "%spark_conf",
72
+ "section": "dependencies",
73
+ "format": "string"
74
+ },
75
+ {
76
+ "field_path": "Command.ScriptLocation",
77
+ "emit": "configure",
78
+ "configure_key": "script_location",
79
+ "section": "configure",
80
+ "format": "script_location_dir"
81
+ }
82
+ ]
83
+ }
@@ -0,0 +1,165 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from functools import lru_cache
5
+ from importlib.resources import files
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import boto3
10
+
11
+ DEPRECATED_JOB_FIELDS = (
12
+ "AllocatedCapacity",
13
+ "MaxCapacity",
14
+ )
15
+
16
+ SCHEMA_RESOURCE_NAME = "glue_job_schema.json"
17
+ MAGIC_MAP_RESOURCE_NAME = "magic_map.json"
18
+ RESOURCE_DIR = Path(__file__).resolve().parent / "resources"
19
+
20
+
21
+ def _resource_text(name: str) -> str:
22
+ resource_root = files("gluekit.job_mgmt").joinpath("resources")
23
+ return resource_root.joinpath(name).read_text(encoding="utf-8")
24
+
25
+
26
+ @lru_cache(maxsize=1)
27
+ def load_glue_schema() -> dict[str, Any]:
28
+ return json.loads(_resource_text(SCHEMA_RESOURCE_NAME))
29
+
30
+
31
+ @lru_cache(maxsize=1)
32
+ def load_magic_map() -> dict[str, Any]:
33
+ return json.loads(_resource_text(MAGIC_MAP_RESOURCE_NAME))
34
+
35
+
36
+ def get_updateable_job_fields(prefer_live: bool = False) -> set[str]:
37
+ if prefer_live:
38
+ live_fields = inspect_live_update_job_fields()
39
+ if live_fields:
40
+ return live_fields
41
+ return set(load_glue_schema().get("update_job_fields", []))
42
+
43
+
44
+ def inspect_live_update_job_fields() -> set[str]:
45
+ try:
46
+ session = boto3.Session()
47
+ glue = session.client("glue", region_name="us-east-1")
48
+ model = glue._service_model
49
+ request_shape = model.shape_for("UpdateJobRequest")
50
+ job_update_shape = request_shape.members.get("JobUpdate")
51
+ if not job_update_shape:
52
+ return set()
53
+ return set(job_update_shape.members.keys())
54
+ except Exception:
55
+ return set()
56
+
57
+
58
+ def _clean_doc(value: Any) -> str | None:
59
+ if not value:
60
+ return None
61
+ text = " ".join(str(value).split())
62
+ return text or None
63
+
64
+
65
+ def _serialize_shape(shape: Any, seen: set[str] | None = None) -> dict[str, Any]:
66
+ seen = seen or set()
67
+ shape_token = getattr(shape, "name", None) or f"anon:{id(shape)}"
68
+
69
+ data: dict[str, Any] = {
70
+ "shape_name": getattr(shape, "name", None),
71
+ "type": getattr(shape, "type_name", None),
72
+ }
73
+
74
+ if shape_token in seen:
75
+ data["ref"] = shape_token
76
+ return data
77
+
78
+ next_seen = seen | {shape_token}
79
+
80
+ documentation = _clean_doc(getattr(shape, "documentation", None))
81
+ if documentation:
82
+ data["documentation"] = documentation
83
+
84
+ enum_values = list(getattr(shape, "enum", []) or [])
85
+ if enum_values:
86
+ data["enum"] = enum_values
87
+
88
+ for attr in ("min", "max"):
89
+ value = getattr(shape, attr, None)
90
+ if value is not None:
91
+ data[attr] = value
92
+
93
+ required_members = list(getattr(shape, "required_members", []) or [])
94
+ if required_members:
95
+ data["required_members"] = sorted(required_members)
96
+
97
+ if data["type"] == "structure":
98
+ members = getattr(shape, "members", {}) or {}
99
+ data["members"] = {
100
+ member_name: _serialize_shape(members[member_name], next_seen)
101
+ for member_name in sorted(members.keys())
102
+ }
103
+ elif data["type"] == "list":
104
+ member = getattr(shape, "member", None)
105
+ if member is not None:
106
+ data["member"] = _serialize_shape(member, next_seen)
107
+ elif data["type"] == "map":
108
+ key = getattr(shape, "key", None)
109
+ value = getattr(shape, "value", None)
110
+ if key is not None:
111
+ data["key"] = _serialize_shape(key, next_seen)
112
+ if value is not None:
113
+ data["value"] = _serialize_shape(value, next_seen)
114
+
115
+ return data
116
+
117
+
118
+ def build_glue_schema() -> dict[str, Any]:
119
+ session = boto3.Session()
120
+ glue = session.client("glue", region_name="us-east-1")
121
+ model = glue._service_model
122
+
123
+ root_shape_names = ["UpdateJobRequest", "JobUpdate", "Job"]
124
+ root_shapes = {
125
+ name: _serialize_shape(model.shape_for(name)) for name in root_shape_names
126
+ }
127
+
128
+ job_shape = root_shapes["Job"]
129
+ job_update_shape = root_shapes["JobUpdate"]
130
+
131
+ return {
132
+ "schema_version": 1,
133
+ "service": "glue",
134
+ "api_version": getattr(model, "api_version", None),
135
+ "deprecated_fields": sorted(DEPRECATED_JOB_FIELDS),
136
+ "update_job_fields": sorted(job_update_shape.get("members", {}).keys()),
137
+ "job_fields": job_shape.get("members", {}),
138
+ "job_update_fields": job_update_shape.get("members", {}),
139
+ "root_shapes": root_shapes,
140
+ }
141
+
142
+
143
+ def write_glue_schema(output_path: Path | None = None) -> Path:
144
+ path = output_path or (RESOURCE_DIR / SCHEMA_RESOURCE_NAME)
145
+ path.parent.mkdir(parents=True, exist_ok=True)
146
+ payload = build_glue_schema()
147
+ path.write_text(
148
+ json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8"
149
+ )
150
+ load_glue_schema.cache_clear()
151
+ return path
152
+
153
+
154
+ __all__ = [
155
+ "DEPRECATED_JOB_FIELDS",
156
+ "MAGIC_MAP_RESOURCE_NAME",
157
+ "RESOURCE_DIR",
158
+ "SCHEMA_RESOURCE_NAME",
159
+ "build_glue_schema",
160
+ "get_updateable_job_fields",
161
+ "inspect_live_update_job_fields",
162
+ "load_glue_schema",
163
+ "load_magic_map",
164
+ "write_glue_schema",
165
+ ]
@@ -0,0 +1,6 @@
1
+ from . import local_fixtures,local
2
+
3
+ run_glue_script = local.run_glue_script
4
+
5
+
6
+ __all__ = ["local","local_fixtures","run_glue_script"]
@@ -0,0 +1 @@
1
+ """awsglue top-level module"""
@@ -0,0 +1,30 @@
1
+ from gluekit.local.pyspark.context import SparkContext
2
+
3
+
4
+ class GlueContext:
5
+ """
6
+ A local emulation of AWS GlueContext for testing purposes.
7
+ To allow local runs on aws glue jobs and notebooks during development:
8
+
9
+
10
+ ```python
11
+
12
+ from awsglue.context import GlueContext
13
+ from awsglue.job import Job
14
+ from awsglue.utils import getResolvedOptions
15
+ from pyspark.context import SparkContext
16
+
17
+ sc = SparkContext()
18
+ glueContext = GlueContext(sc)
19
+ spark = glueContext.spark_session
20
+ args = getResolvedOptions(sys.argv, ['JOB_NAME'])
21
+ job = Job(glueContext)
22
+ job.init(args['JOB_NAME'], args)
23
+
24
+ ```
25
+
26
+
27
+ """
28
+
29
+ def __init__(self, _spark_context: SparkContext):
30
+ self.spark_session = _spark_context.getOrCreate()
@@ -0,0 +1,9 @@
1
+ class Job:
2
+ def __init__(self, _glue_context):
3
+ self.name = None
4
+
5
+ def init(self, name, _args):
6
+ self.name = name
7
+
8
+ def commit(self):
9
+ return None
@@ -0,0 +1,17 @@
1
+ def getResolvedOptions(argv, keys):
2
+ resolved = {}
3
+ missing = []
4
+ for key in keys:
5
+ flag = f"--{key}"
6
+ if flag in argv:
7
+ index = argv.index(flag)
8
+ if index + 1 < len(argv):
9
+ resolved[key] = argv[index + 1]
10
+ else:
11
+ missing.append(key)
12
+ else:
13
+ missing.append(key)
14
+
15
+ if missing:
16
+ raise ValueError(f"Missing required options: {missing}")
17
+ return resolved