gluekit 1.0.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. gluekit/__init__.py +7 -0
  2. gluekit/app.py +0 -0
  3. gluekit/cli.py +64 -0
  4. gluekit/commands/__init__.py +1 -0
  5. gluekit/commands/add.py +455 -0
  6. gluekit/commands/build.py +816 -0
  7. gluekit/commands/checkout.py +114 -0
  8. gluekit/commands/clone.py +516 -0
  9. gluekit/commands/config_commands.py +180 -0
  10. gluekit/commands/constants.py +47 -0
  11. gluekit/commands/convert.py +336 -0
  12. gluekit/commands/edit.py +1104 -0
  13. gluekit/commands/helpers.py +1068 -0
  14. gluekit/commands/init.py +798 -0
  15. gluekit/commands/list.py +16 -0
  16. gluekit/commands/local_commands.py +680 -0
  17. gluekit/commands/pull.py +374 -0
  18. gluekit/commands/push.py +251 -0
  19. gluekit/commands/remove.py +161 -0
  20. gluekit/commands/run.py +126 -0
  21. gluekit/commands/status.py +97 -0
  22. gluekit/commands/sync.py +97 -0
  23. gluekit/commands/update.py +104 -0
  24. gluekit/job_mgmt/__init__.py +0 -0
  25. gluekit/job_mgmt/glue_jobs.py +1323 -0
  26. gluekit/job_mgmt/magics.py +122 -0
  27. gluekit/job_mgmt/resources/__init__.py +0 -0
  28. gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
  29. gluekit/job_mgmt/resources/magic_map.json +83 -0
  30. gluekit/job_mgmt/schema.py +165 -0
  31. gluekit/local/__init__.py +6 -0
  32. gluekit/local/awsglue/__init__.py +1 -0
  33. gluekit/local/awsglue/context.py +30 -0
  34. gluekit/local/awsglue/job.py +9 -0
  35. gluekit/local/awsglue/utils.py +17 -0
  36. gluekit/local/local.py +434 -0
  37. gluekit/local/local_fixtures.py +337 -0
  38. gluekit/local/pyspark/__init__.py +7 -0
  39. gluekit/local/pyspark/context.py +31 -0
  40. gluekit/local/pyspark/sql/__init__.py +6 -0
  41. gluekit/local/pyspark/sql/session.py +29 -0
  42. gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
  43. gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
  44. gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
  45. gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
  46. gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
gluekit/local/local.py ADDED
@@ -0,0 +1,434 @@
1
+ """Python helpers for running Glue scripts locally with mocked AWS services.
2
+
3
+ This module exposes the same mock environment used by ``gluekit run`` so that
4
+ individual scripts — and the test suites that exercise them — can drive the
5
+ setup directly from Python instead of going through the CLI.
6
+
7
+ Typical usage in a test or ad-hoc script::
8
+
9
+ from gluekit.local import mock_glue_aws, run_glue_script
10
+
11
+ # ── option A: context manager ──────────────────────────────────────────
12
+ with mock_glue_aws(
13
+ create_buckets=["my-input-bucket"],
14
+ ssm_parameters={"/app/env": "dev"},
15
+ ):
16
+ import boto3
17
+ s3 = boto3.client("s3")
18
+ s3.put_object(Bucket="my-input-bucket", Key="data.csv", Body=b"a,b")
19
+
20
+ # ── option B: run a script file ────────────────────────────────────────
21
+ run_glue_script(
22
+ "glue/scripts/my_job.py",
23
+ glue_args={"run_date": "2026-06-18"},
24
+ create_buckets=["my-input-bucket"],
25
+ ssm_parameters={"/app/env": "dev"},
26
+ )
27
+
28
+ # ── option C: persist fixture mappings once, then run without repeating them ──
29
+ # 1. Run once in a terminal:
30
+ # gluekit local setup --s3-bucket my-input-bucket=tests/fixtures
31
+ # gluekit local setup --ssm-param /app/env=dev
32
+ # gluekit local s3 cp tests/fixtures/data.csv s3://my-input-bucket/data.csv
33
+ #
34
+ # 2. Create a thin launcher (e.g. run_local.py) and hit "play" in VSCode:
35
+ from gluekit.local import run_glue_script
36
+ run_glue_script("glue/scripts/my_job.py", config_file=".gluekit/local.json")
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import importlib
42
+ import os
43
+ import runpy
44
+ import sys
45
+ from contextlib import contextmanager
46
+ from pathlib import Path
47
+ from typing import Generator
48
+
49
+ from .local_fixtures import (
50
+ LOCAL_RUN_CONFIG_FILE,
51
+ load_runtime_config,
52
+ parse_s3_uri,
53
+ )
54
+
55
+ #: Default path for the local run configuration file written by ``gluekit local`` commands.
56
+
57
+
58
+ def load_local_run_config(
59
+ config_file: str | Path | None = None,
60
+ ) -> dict[str, object]:
61
+ """Load a local run configuration from a JSON file.
62
+
63
+ Reads the file written by ``gluekit local s3`` and ``gluekit local ssm``. If *config_file* is
64
+ ``None`` the default path ``LOCAL_RUN_CONFIG_FILE`` (``.gluekit/local.json``
65
+ relative to the current working directory) is used.
66
+
67
+ Returns an empty dict when the file does not exist or cannot be parsed, so
68
+ callers can safely spread the result into keyword arguments without
69
+ additional guards.
70
+
71
+ Args:
72
+ config_file: Path to the JSON config file. Defaults to
73
+ ``LOCAL_RUN_CONFIG_FILE`` when ``None``.
74
+
75
+ Returns:
76
+ A dict containing zero or more runtime keyword arguments: ``create_buckets``
77
+ (``list[str]``), ``s3_objects`` (``dict[str, str]``), and
78
+ ``ssm_parameters`` (``dict[str, str]``).
79
+
80
+ Example:
81
+ ::
82
+
83
+ from gluekit.local import load_local_run_config, mock_glue_aws
84
+
85
+ cfg = load_local_run_config()
86
+ with mock_glue_aws(**cfg):
87
+ ...
88
+ """
89
+ path = Path(config_file) if config_file is not None else LOCAL_RUN_CONFIG_FILE
90
+ return load_runtime_config(path)
91
+
92
+
93
+ def _create_mock_bucket(s3_client, *, bucket_name: str, region: str) -> None:
94
+ """Create a bucket in the active moto mock, handling the us-east-1 special case."""
95
+ if region == "us-east-1":
96
+ s3_client.create_bucket(Bucket=bucket_name)
97
+ return
98
+ s3_client.create_bucket(
99
+ Bucket=bucket_name,
100
+ CreateBucketConfiguration={"LocationConstraint": region},
101
+ )
102
+
103
+
104
+ def _contains_glue_arg(argv: list[str], name: str) -> bool:
105
+ """Return True if *name* already appears as a ``--name`` flag in *argv*."""
106
+ flag = f"--{name}"
107
+ for index, token in enumerate(argv):
108
+ if token == flag:
109
+ return True
110
+ if token.startswith(f"{flag}="):
111
+ return True
112
+ if token == "--":
113
+ return flag in argv[index + 1 :]
114
+ return False
115
+
116
+
117
+ class LocalSparkSession:
118
+ """Fallback SparkSession used when SQLFrame is not installed."""
119
+
120
+ class _Builder:
121
+ def appName(self, _name: str) -> "LocalSparkSession._Builder":
122
+ return self
123
+
124
+ def config(self, _key: str, _value: object) -> "LocalSparkSession._Builder":
125
+ return self
126
+
127
+ def getOrCreate(self) -> "LocalSparkSession":
128
+ return LocalSparkSession()
129
+
130
+ builder = _Builder()
131
+
132
+
133
+ class LocalSparkContext:
134
+ """Minimal SparkContext compatibility shim for local Glue bootstrap code."""
135
+
136
+ def getOrCreate(self):
137
+ from gluekit.local.pyspark.sql import SparkSession
138
+
139
+ return SparkSession.builder.getOrCreate()
140
+
141
+
142
+ class LocalGlueContext:
143
+ """Minimal GlueContext compatibility shim backed by the active local Spark API."""
144
+
145
+ def __init__(self, spark_context: LocalSparkContext):
146
+ self.spark_session = spark_context.getOrCreate()
147
+
148
+
149
+ class LocalGlueJob:
150
+ """Minimal Glue Job shim for scripts that call ``init`` and ``commit``."""
151
+
152
+ def __init__(self, _glue_context: LocalGlueContext):
153
+ self.name: str | None = None
154
+
155
+ def init(self, name: str, _args: dict[str, str]) -> None:
156
+ self.name = name
157
+
158
+ def commit(self) -> None:
159
+ return None
160
+
161
+
162
+ def get_resolved_options(argv: list[str], keys: list[str]) -> dict[str, str]:
163
+ """Local implementation of ``awsglue.utils.getResolvedOptions``."""
164
+ resolved = {}
165
+ missing = []
166
+ for key in keys:
167
+ flag = f"--{key}"
168
+ value = None
169
+ for index, token in enumerate(argv):
170
+ if token == flag and index + 1 < len(argv):
171
+ value = argv[index + 1]
172
+ break
173
+ if token.startswith(f"{flag}="):
174
+ value = token[len(flag) + 1 :]
175
+ break
176
+ if value is None:
177
+ missing.append(key)
178
+ else:
179
+ resolved[key] = value
180
+
181
+ if missing:
182
+ raise ValueError(f"Missing required options: {missing}")
183
+ return resolved
184
+
185
+
186
+ @contextmanager
187
+ def mock_glue_runtime(
188
+ *, sqlframe_engine: str = "duckdb"
189
+ ) -> Generator[None, None, None]:
190
+ """Temporarily expose Glue runtime import paths for local script execution.
191
+
192
+ AWS Glue scripts commonly import ``awsglue`` and ``pyspark`` as top-level
193
+ packages that only exist inside the Glue runtime. Gluekit keeps those
194
+ emulations scoped to this context so installing the CLI does not shadow real
195
+ ``awsglue`` or ``pyspark`` packages in unrelated Python processes.
196
+ """
197
+ module_names = [
198
+ "awsglue",
199
+ "pyspark",
200
+ ]
201
+ original_modules = {name: sys.modules.get(name) for name in module_names}
202
+
203
+ from sqlframe import activate
204
+ activate(engine=sqlframe_engine)
205
+ sys.modules.update(
206
+ {
207
+ "awsglue": importlib.import_module("gluekit.local.awsglue"),
208
+ "pyspark": importlib.import_module("gluekit.local.pyspark")
209
+ }
210
+ )
211
+
212
+ try:
213
+ yield
214
+ finally:
215
+ for name, module in original_modules.items():
216
+ if module is None:
217
+ sys.modules.pop(name, None)
218
+ else:
219
+ sys.modules[name] = module
220
+
221
+
222
+ @contextmanager
223
+ def mock_glue_aws(
224
+ *,
225
+ create_buckets: list[str] | None = None,
226
+ s3_objects: dict[str, str] | None = None,
227
+ ssm_parameters: dict[str, str] | None = None,
228
+ ssm_parameter_types: dict[str, str] | None = None,
229
+ aws_region: str | None = None,
230
+ config_file: str | Path | None = None,
231
+ ) -> Generator[None, None, None]:
232
+ """Context manager that activates a mocked AWS environment for local Glue development.
233
+
234
+ Inside the ``with`` block every ``boto3`` call is intercepted by
235
+ `moto <https://docs.getmoto.org/>`_ so no real AWS credentials are needed
236
+ and no real AWS resources are touched.
237
+
238
+ AWS credentials are temporarily set to ``"testing"`` values required by
239
+ moto, and are restored to their original values on exit.
240
+
241
+ Args:
242
+ create_buckets: S3 bucket names to pre-create in the mock environment.
243
+ s3_objects: ``{"s3://bucket/key": "local/file"}`` mappings to upload into
244
+ mocked S3 before entering the block.
245
+ ssm_parameters: ``{"/parameter/name": "value"}`` pairs to seed into
246
+ the mock SSM Parameter Store before entering the block.
247
+ ssm_parameter_types: Optional ``{"/parameter/name": "String"}`` type map
248
+ for seeded SSM parameters. Defaults to ``"String"``.
249
+ aws_region: AWS region for mock service clients. Defaults to
250
+ ``"us-east-1"``.
251
+ config_file: Path to a JSON config file written by ``gluekit local
252
+ fixture commands. Values in the file fill in any parameter that was not
253
+ provided explicitly (``create_buckets``, ``s3_objects``,
254
+ and ``ssm_parameters``).
255
+ When ``None`` no config file is loaded.
256
+
257
+ Raises:
258
+ ImportError: If ``moto[s3,ssm]`` is not installed.
259
+
260
+ Example:
261
+ ::
262
+
263
+ from gluekit.local import mock_glue_aws
264
+ import boto3
265
+
266
+ with mock_glue_aws(
267
+ create_buckets=["my-bucket"],
268
+ ssm_parameters={"/app/env": "dev"},
269
+ ):
270
+ s3 = boto3.client("s3")
271
+ s3.put_object(Bucket="my-bucket", Key="out.txt", Body=b"ok")
272
+ body = s3.get_object(Bucket="my-bucket", Key="out.txt")["Body"].read()
273
+ print(body) # b"ok"
274
+
275
+ # Or load persisted fixtures written by ``gluekit local s3`` and ``gluekit local ssm``:
276
+ with mock_glue_aws(config_file=".gluekit/local.json"):
277
+ ...
278
+ """
279
+ if config_file is not None:
280
+ file_cfg = load_local_run_config(config_file)
281
+ if create_buckets is None:
282
+ create_buckets = file_cfg.get("create_buckets") # type: ignore[assignment]
283
+ if s3_objects is None:
284
+ s3_objects = file_cfg.get("s3_objects") # type: ignore[assignment]
285
+ if ssm_parameters is None:
286
+ ssm_parameters = file_cfg.get("ssm_parameters") # type: ignore[assignment]
287
+
288
+ # Resolve the final region after config file fallback.
289
+ effective_region: str = aws_region if isinstance(aws_region, str) else "us-east-1"
290
+ try:
291
+ from moto import mock_aws
292
+ except ImportError as exc:
293
+ raise ImportError(
294
+ "mock_glue_aws requires moto with S3/SSM support. "
295
+ 'Install with: uv add "moto[s3,ssm]" or pip install "moto[s3,ssm]"'
296
+ ) from exc
297
+
298
+ env_overrides = {
299
+ "AWS_ACCESS_KEY_ID": "testing",
300
+ "AWS_SECRET_ACCESS_KEY": "testing",
301
+ "AWS_SESSION_TOKEN": "testing",
302
+ "AWS_DEFAULT_REGION": effective_region,
303
+ "AWS_REGION": effective_region,
304
+ }
305
+ original_env = {key: os.environ.get(key) for key in env_overrides}
306
+ os.environ.update(env_overrides)
307
+
308
+ try:
309
+ with mock_aws(), mock_glue_runtime():
310
+ import boto3
311
+
312
+ s3_client = boto3.client("s3", region_name=effective_region)
313
+ ssm_client = boto3.client("ssm", region_name=effective_region)
314
+
315
+ bucket_names = set(create_buckets or [])
316
+ for object_uri in s3_objects or {}:
317
+ bucket_name, _ = parse_s3_uri(object_uri)
318
+ bucket_names.add(bucket_name)
319
+
320
+ for bucket_name in sorted(bucket_names):
321
+ _create_mock_bucket(
322
+ s3_client, bucket_name=bucket_name, region=effective_region
323
+ )
324
+
325
+ for object_uri, local_path in (s3_objects or {}).items():
326
+ bucket_name, object_key = parse_s3_uri(object_uri)
327
+ s3_client.upload_file(str(Path(local_path)), bucket_name, object_key)
328
+
329
+ for name, value in (ssm_parameters or {}).items():
330
+ ssm_client.put_parameter(
331
+ Name=name,
332
+ Value=value,
333
+ Type=(ssm_parameter_types or {}).get(name, "String"),
334
+ Overwrite=True,
335
+ )
336
+
337
+ yield
338
+ finally:
339
+ for key, value in original_env.items():
340
+ if value is None:
341
+ os.environ.pop(key, None)
342
+ else:
343
+ os.environ[key] = value
344
+
345
+
346
+ def run_glue_script(
347
+ script_path: str | Path,
348
+ *,
349
+ script_args: list[str] | None = None,
350
+ glue_args: dict[str, str] | None = None,
351
+ job_name: str = "local-glue-job",
352
+ create_buckets: list[str] | None = None,
353
+ s3_objects: dict[str, str] | None = None,
354
+ ssm_parameters: dict[str, str] | None = None,
355
+ ssm_parameter_types: dict[str, str] | None = None,
356
+ aws_region: str | None = None,
357
+ config_file: str | Path | None = None,
358
+ ) -> None:
359
+ """Run a Glue script locally with emulated ``awsglue``/``pyspark`` and mocked AWS.
360
+
361
+ This mirrors the behaviour of ``gluekit run`` but is callable from Python
362
+ code — useful for integration tests or ad-hoc local runs where you want
363
+ programmatic control over the mock fixtures.
364
+
365
+ Args:
366
+ script_path: Path to the Glue Python script to execute.
367
+ script_args: Raw ``sys.argv``-style tokens (e.g.
368
+ ``["--JOB_NAME", "my-job"]``) appended **before** any
369
+ ``glue_args``. Use this to pass arguments verbatim when you
370
+ already have them in flag form.
371
+ glue_args: Keyword-style Glue arguments as a dict
372
+ (e.g. ``{"run_date": "2026-06-18"}``). Each entry is translated
373
+ to ``--key value`` and appended to ``sys.argv``.
374
+ job_name: Value injected as ``--JOB_NAME`` when the argument is not
375
+ already present in *script_args* or *glue_args*.
376
+ create_buckets: S3 bucket names to pre-create in the mock environment.
377
+ s3_objects: ``{"s3://bucket/key": "local/file"}`` mappings to upload into
378
+ mocked S3 before running the script.
379
+ ssm_parameters: ``{"/parameter/name": "value"}`` pairs to seed into
380
+ the mock SSM Parameter Store.
381
+ ssm_parameter_types: Optional ``{"/parameter/name": "String"}`` type map
382
+ for seeded SSM parameters. Defaults to ``"String"``.
383
+ aws_region: AWS region for mock service clients. Defaults to
384
+ ``"us-east-1"``.
385
+ config_file: Path to a JSON config file written by ``gluekit local
386
+ fixture commands. Values in the file fill in any parameter that was not
387
+ provided explicitly (``create_buckets``, ``s3_objects``,
388
+ and ``ssm_parameters``).
389
+ When ``None`` no config file is loaded.
390
+
391
+ Raises:
392
+ ImportError: If ``moto[s3,ssm]`` is not installed.
393
+
394
+ Example:
395
+ ::
396
+
397
+ from gluekit.local import run_glue_script
398
+
399
+ run_glue_script(
400
+ "glue/scripts/my_job.py",
401
+ glue_args={"run_date": "2026-06-18"},
402
+ create_buckets=["my-input-bucket"],
403
+ ssm_parameters={"/app/env": "dev"},
404
+ )
405
+
406
+ # Or drive everything from a persisted config file:
407
+ run_glue_script(
408
+ "glue/scripts/my_job.py",
409
+ config_file=".gluekit/local.json",
410
+ )
411
+ """
412
+ script_path = Path(script_path).resolve()
413
+
414
+ argv: list[str] = list(script_args or [])
415
+ for key, value in (glue_args or {}).items():
416
+ argv.extend([f"--{key}", value])
417
+
418
+ if not _contains_glue_arg(argv, "JOB_NAME"):
419
+ argv.extend(["--JOB_NAME", job_name])
420
+
421
+ original_argv = sys.argv[:]
422
+ try:
423
+ with mock_glue_aws(
424
+ create_buckets=create_buckets,
425
+ s3_objects=s3_objects,
426
+ ssm_parameters=ssm_parameters,
427
+ ssm_parameter_types=ssm_parameter_types,
428
+ aws_region=aws_region,
429
+ config_file=config_file,
430
+ ):
431
+ sys.argv = [str(script_path), *argv]
432
+ runpy.run_path(str(script_path), run_name="__main__")
433
+ finally:
434
+ sys.argv = original_argv