gluekit 1.0.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gluekit/__init__.py +7 -0
- gluekit/app.py +0 -0
- gluekit/cli.py +64 -0
- gluekit/commands/__init__.py +1 -0
- gluekit/commands/add.py +455 -0
- gluekit/commands/build.py +816 -0
- gluekit/commands/checkout.py +114 -0
- gluekit/commands/clone.py +516 -0
- gluekit/commands/config_commands.py +180 -0
- gluekit/commands/constants.py +47 -0
- gluekit/commands/convert.py +336 -0
- gluekit/commands/edit.py +1104 -0
- gluekit/commands/helpers.py +1068 -0
- gluekit/commands/init.py +798 -0
- gluekit/commands/list.py +16 -0
- gluekit/commands/local_commands.py +680 -0
- gluekit/commands/pull.py +374 -0
- gluekit/commands/push.py +251 -0
- gluekit/commands/remove.py +161 -0
- gluekit/commands/run.py +126 -0
- gluekit/commands/status.py +97 -0
- gluekit/commands/sync.py +97 -0
- gluekit/commands/update.py +104 -0
- gluekit/job_mgmt/__init__.py +0 -0
- gluekit/job_mgmt/glue_jobs.py +1323 -0
- gluekit/job_mgmt/magics.py +122 -0
- gluekit/job_mgmt/resources/__init__.py +0 -0
- gluekit/job_mgmt/resources/glue_job_schema.json +40341 -0
- gluekit/job_mgmt/resources/magic_map.json +83 -0
- gluekit/job_mgmt/schema.py +165 -0
- gluekit/local/__init__.py +6 -0
- gluekit/local/awsglue/__init__.py +1 -0
- gluekit/local/awsglue/context.py +30 -0
- gluekit/local/awsglue/job.py +9 -0
- gluekit/local/awsglue/utils.py +17 -0
- gluekit/local/local.py +434 -0
- gluekit/local/local_fixtures.py +337 -0
- gluekit/local/pyspark/__init__.py +7 -0
- gluekit/local/pyspark/context.py +31 -0
- gluekit/local/pyspark/sql/__init__.py +6 -0
- gluekit/local/pyspark/sql/session.py +29 -0
- gluekit-1.0.1.dev1.dist-info/METADATA +1176 -0
- gluekit-1.0.1.dev1.dist-info/RECORD +46 -0
- gluekit-1.0.1.dev1.dist-info/WHEEL +5 -0
- gluekit-1.0.1.dev1.dist-info/entry_points.txt +2 -0
- gluekit-1.0.1.dev1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import posixpath
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
LOCAL_RUN_CONFIG_FILE = Path(".gluekit/local.json")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def empty_local_run_config() -> dict[str, Any]:
|
|
13
|
+
return {
|
|
14
|
+
"version": 1,
|
|
15
|
+
"s3": {"buckets": {}, "objects": {}, "directories": []},
|
|
16
|
+
"ssm": {"parameters": {}},
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_s3_uri(raw: str) -> tuple[str, str]:
|
|
21
|
+
if not raw.startswith("s3://"):
|
|
22
|
+
raise ValueError(f"S3 URI must start with s3://. Received: {raw!r}")
|
|
23
|
+
bucket_and_key = raw[5:]
|
|
24
|
+
bucket, _, key = bucket_and_key.partition("/")
|
|
25
|
+
bucket = bucket.strip()
|
|
26
|
+
if not bucket:
|
|
27
|
+
raise ValueError(f"S3 URI must include a bucket name. Received: {raw!r}")
|
|
28
|
+
return bucket, key
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def is_s3_uri(raw: str) -> bool:
|
|
32
|
+
return raw.startswith("s3://")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def normalize_s3_uri(raw: str) -> str:
|
|
36
|
+
bucket, key = parse_s3_uri(raw)
|
|
37
|
+
normalized_key = posixpath.normpath(key.strip("/")) if key.strip("/") else ""
|
|
38
|
+
if normalized_key == ".":
|
|
39
|
+
normalized_key = ""
|
|
40
|
+
if normalized_key:
|
|
41
|
+
return f"s3://{bucket}/{normalized_key}"
|
|
42
|
+
return f"s3://{bucket}"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def normalize_s3_object_uri(raw: str) -> str:
|
|
46
|
+
bucket, key = parse_s3_uri(raw)
|
|
47
|
+
normalized = normalize_s3_uri(raw)
|
|
48
|
+
if not key.strip("/"):
|
|
49
|
+
raise ValueError(f"S3 object URI must include a key. Received: {raw!r}")
|
|
50
|
+
return normalized
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def s3_destination_uri(destination: str, source_name: str) -> str:
|
|
54
|
+
bucket, key = parse_s3_uri(destination)
|
|
55
|
+
if not key or destination.endswith("/"):
|
|
56
|
+
return normalize_s3_object_uri(
|
|
57
|
+
f"s3://{bucket}/{posixpath.join(key, source_name)}"
|
|
58
|
+
)
|
|
59
|
+
return normalize_s3_object_uri(destination)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def s3_prefix(raw: str) -> str:
|
|
63
|
+
bucket, key = parse_s3_uri(raw)
|
|
64
|
+
normalized_key = key.strip("/")
|
|
65
|
+
if normalized_key:
|
|
66
|
+
return f"s3://{bucket}/{normalized_key}/"
|
|
67
|
+
return f"s3://{bucket}/"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def s3_directory_destination(raw: str) -> str:
|
|
71
|
+
return s3_prefix(normalize_s3_uri(raw))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def s3_uri_matches_prefix(uri: str, prefix: str) -> bool:
|
|
75
|
+
return uri.startswith(prefix)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def normalize_s3_bucket_name(raw: str) -> str:
|
|
79
|
+
if raw.startswith("s3://"):
|
|
80
|
+
bucket, key = parse_s3_uri(raw)
|
|
81
|
+
if key.strip("/"):
|
|
82
|
+
raise ValueError(f"S3 bucket mapping must not include a key: {raw!r}")
|
|
83
|
+
return bucket
|
|
84
|
+
bucket = raw.strip()
|
|
85
|
+
if not bucket:
|
|
86
|
+
raise ValueError("S3 bucket mapping must include a bucket name.")
|
|
87
|
+
if "/" in bucket:
|
|
88
|
+
raise ValueError(f"S3 bucket mapping must be a bucket name: {raw!r}")
|
|
89
|
+
return bucket
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def normalize_s3_root_path(raw: str) -> str:
|
|
93
|
+
root = raw.strip()
|
|
94
|
+
if not root:
|
|
95
|
+
raise ValueError("S3 root mapping must include a local directory.")
|
|
96
|
+
return root
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def load_local_fixture_config(
|
|
100
|
+
config_file: str | Path | None = None,
|
|
101
|
+
) -> dict[str, Any]:
|
|
102
|
+
path = Path(config_file) if config_file is not None else LOCAL_RUN_CONFIG_FILE
|
|
103
|
+
if not path.exists():
|
|
104
|
+
return empty_local_run_config()
|
|
105
|
+
try:
|
|
106
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
107
|
+
except json.JSONDecodeError as exc:
|
|
108
|
+
raise ValueError(f"Invalid JSON in local run config {path}: {exc}") from exc
|
|
109
|
+
if not isinstance(data, dict):
|
|
110
|
+
raise ValueError(f"Local run config {path} must contain a JSON object.")
|
|
111
|
+
return normalize_local_fixture_config(data, path=path)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def normalize_local_fixture_config(
|
|
115
|
+
data: dict[str, Any], *, path: Path | None = None
|
|
116
|
+
) -> dict[str, Any]:
|
|
117
|
+
location = f" in {path}" if path else ""
|
|
118
|
+
if data.get("version") != 1:
|
|
119
|
+
raise ValueError(f"Local run config{location} must use version 1.")
|
|
120
|
+
raw_profile = data.get("profile")
|
|
121
|
+
if raw_profile is not None and not isinstance(raw_profile, str):
|
|
122
|
+
raise ValueError(f"Local run config{location} has invalid profile.")
|
|
123
|
+
profile = raw_profile.strip() if isinstance(raw_profile, str) else None
|
|
124
|
+
|
|
125
|
+
s3 = data.get("s3", {})
|
|
126
|
+
if not isinstance(s3, dict):
|
|
127
|
+
raise ValueError(f"Local run config{location} has invalid s3 section.")
|
|
128
|
+
raw_buckets = s3.get("buckets", {})
|
|
129
|
+
raw_root = s3.get("root")
|
|
130
|
+
raw_objects = s3.get("objects", {})
|
|
131
|
+
raw_directories = s3.get("directories", [])
|
|
132
|
+
if raw_root is not None and not isinstance(raw_root, str):
|
|
133
|
+
raise ValueError(f"Local run config{location} has invalid s3.root.")
|
|
134
|
+
if not isinstance(raw_buckets, dict) or not all(
|
|
135
|
+
isinstance(bucket, str) and isinstance(local_path, str)
|
|
136
|
+
for bucket, local_path in raw_buckets.items()
|
|
137
|
+
):
|
|
138
|
+
raise ValueError(f"Local run config{location} has invalid s3.buckets.")
|
|
139
|
+
if not isinstance(raw_objects, dict) or not all(
|
|
140
|
+
isinstance(uri, str) and isinstance(local_path, str)
|
|
141
|
+
for uri, local_path in raw_objects.items()
|
|
142
|
+
):
|
|
143
|
+
raise ValueError(f"Local run config{location} has invalid s3.objects.")
|
|
144
|
+
if not isinstance(raw_directories, list):
|
|
145
|
+
raise ValueError(f"Local run config{location} has invalid s3.directories.")
|
|
146
|
+
|
|
147
|
+
buckets: dict[str, str] = {}
|
|
148
|
+
for bucket, local_path in raw_buckets.items():
|
|
149
|
+
normalized_bucket = normalize_s3_bucket_name(bucket)
|
|
150
|
+
if not local_path.strip():
|
|
151
|
+
raise ValueError(
|
|
152
|
+
f"Local run config{location} has invalid local path for S3 bucket {bucket!r}."
|
|
153
|
+
)
|
|
154
|
+
buckets[normalized_bucket] = local_path
|
|
155
|
+
|
|
156
|
+
root = normalize_s3_root_path(raw_root) if isinstance(raw_root, str) else None
|
|
157
|
+
|
|
158
|
+
objects: dict[str, str] = {}
|
|
159
|
+
for uri, local_path in raw_objects.items():
|
|
160
|
+
normalized_uri = normalize_s3_object_uri(uri)
|
|
161
|
+
objects[normalized_uri] = local_path
|
|
162
|
+
directories: list[dict[str, str]] = []
|
|
163
|
+
seen_directories: set[tuple[str, str]] = set()
|
|
164
|
+
for entry in raw_directories:
|
|
165
|
+
if not isinstance(entry, dict):
|
|
166
|
+
raise ValueError(
|
|
167
|
+
f"Local run config{location} has invalid s3.directories entry."
|
|
168
|
+
)
|
|
169
|
+
raw_local_path = entry.get("local_path")
|
|
170
|
+
raw_destination = entry.get("destination")
|
|
171
|
+
if not isinstance(raw_local_path, str) or not raw_local_path.strip():
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"Local run config{location} has invalid local_path in s3.directories."
|
|
174
|
+
)
|
|
175
|
+
if not isinstance(raw_destination, str) or not raw_destination.strip():
|
|
176
|
+
raise ValueError(
|
|
177
|
+
f"Local run config{location} has invalid destination in s3.directories."
|
|
178
|
+
)
|
|
179
|
+
normalized_destination = s3_directory_destination(raw_destination)
|
|
180
|
+
directory_key = (raw_local_path, normalized_destination)
|
|
181
|
+
if directory_key in seen_directories:
|
|
182
|
+
continue
|
|
183
|
+
seen_directories.add(directory_key)
|
|
184
|
+
directories.append(
|
|
185
|
+
{
|
|
186
|
+
"local_path": raw_local_path,
|
|
187
|
+
"destination": normalized_destination,
|
|
188
|
+
}
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
ssm = data.get("ssm", {})
|
|
192
|
+
if not isinstance(ssm, dict):
|
|
193
|
+
raise ValueError(f"Local run config{location} has invalid ssm section.")
|
|
194
|
+
raw_parameters = ssm.get("parameters", {})
|
|
195
|
+
if not isinstance(raw_parameters, dict):
|
|
196
|
+
raise ValueError(f"Local run config{location} has invalid ssm.parameters.")
|
|
197
|
+
|
|
198
|
+
parameters: dict[str, str] = {}
|
|
199
|
+
for name, value in raw_parameters.items():
|
|
200
|
+
if not isinstance(name, str) or not name.strip() or " " in name:
|
|
201
|
+
raise ValueError(
|
|
202
|
+
f"Local run config{location} has invalid SSM parameter name."
|
|
203
|
+
)
|
|
204
|
+
if not isinstance(value, str):
|
|
205
|
+
raise ValueError(
|
|
206
|
+
f"Local run config{location} has invalid value for SSM parameter {name!r}."
|
|
207
|
+
)
|
|
208
|
+
parameters[name.strip()] = value
|
|
209
|
+
|
|
210
|
+
s3_config: dict[str, Any] = {
|
|
211
|
+
"buckets": dict(sorted(buckets.items())),
|
|
212
|
+
"objects": dict(sorted(objects.items())),
|
|
213
|
+
"directories": sorted(
|
|
214
|
+
directories,
|
|
215
|
+
key=lambda entry: (entry["destination"], entry["local_path"]),
|
|
216
|
+
),
|
|
217
|
+
}
|
|
218
|
+
if root is not None:
|
|
219
|
+
s3_config["root"] = root
|
|
220
|
+
|
|
221
|
+
normalized: dict[str, Any] = {
|
|
222
|
+
"version": 1,
|
|
223
|
+
"s3": s3_config,
|
|
224
|
+
"ssm": {"parameters": dict(sorted(parameters.items()))},
|
|
225
|
+
}
|
|
226
|
+
if profile:
|
|
227
|
+
normalized["profile"] = profile
|
|
228
|
+
return normalized
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def save_local_fixture_config(
|
|
232
|
+
config: dict[str, Any], config_file: str | Path | None = None
|
|
233
|
+
) -> None:
|
|
234
|
+
path = Path(config_file) if config_file is not None else LOCAL_RUN_CONFIG_FILE
|
|
235
|
+
normalized = normalize_local_fixture_config(config, path=path)
|
|
236
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
237
|
+
path.write_text(json.dumps(normalized, indent=4) + "\n", encoding="utf-8")
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def config_to_runtime_kwargs(config: dict[str, Any]) -> dict[str, Any]:
|
|
241
|
+
s3_objects = expand_s3_root_mapping(config)
|
|
242
|
+
s3_objects.update(expand_bucket_mappings(config))
|
|
243
|
+
s3_objects.update(config["s3"]["objects"])
|
|
244
|
+
s3_objects.update(expand_directory_mappings(config))
|
|
245
|
+
bucket_names = {parse_s3_uri(object_uri)[0] for object_uri in s3_objects}
|
|
246
|
+
return {
|
|
247
|
+
"create_buckets": sorted(bucket_names),
|
|
248
|
+
"s3_objects": s3_objects,
|
|
249
|
+
"ssm_parameters": dict(config["ssm"]["parameters"]),
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def load_runtime_config(config_file: str | Path | None = None) -> dict[str, Any]:
|
|
254
|
+
path = Path(config_file) if config_file is not None else LOCAL_RUN_CONFIG_FILE
|
|
255
|
+
if not path.exists():
|
|
256
|
+
return {}
|
|
257
|
+
return config_to_runtime_kwargs(load_local_fixture_config(path))
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def expand_s3_root_mapping(config: dict[str, Any]) -> dict[str, str]:
|
|
261
|
+
root = config.get("s3", {}).get("root")
|
|
262
|
+
if not isinstance(root, str):
|
|
263
|
+
return {}
|
|
264
|
+
root_path = Path(root)
|
|
265
|
+
if not root_path.exists():
|
|
266
|
+
raise ValueError(f"Mapped local S3 root directory does not exist: {root_path}")
|
|
267
|
+
if not root_path.is_dir():
|
|
268
|
+
raise ValueError(f"Mapped local S3 root path is not a directory: {root_path}")
|
|
269
|
+
|
|
270
|
+
expanded: dict[str, str] = {}
|
|
271
|
+
for bucket_dir in sorted(filter(Path.is_dir, root_path.iterdir())):
|
|
272
|
+
bucket = normalize_s3_bucket_name(bucket_dir.name)
|
|
273
|
+
for candidate in sorted(filter(Path.is_file, bucket_dir.rglob("*"))):
|
|
274
|
+
relative_path = candidate.relative_to(bucket_dir).as_posix()
|
|
275
|
+
target_uri = normalize_s3_object_uri(f"s3://{bucket}/{relative_path}")
|
|
276
|
+
expanded[target_uri] = str(candidate)
|
|
277
|
+
return expanded
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def expand_bucket_mappings(config: dict[str, Any]) -> dict[str, str]:
|
|
281
|
+
expanded: dict[str, str] = {}
|
|
282
|
+
buckets = config.get("s3", {}).get("buckets", {})
|
|
283
|
+
if not isinstance(buckets, dict):
|
|
284
|
+
return expanded
|
|
285
|
+
for bucket, local_path in buckets.items():
|
|
286
|
+
if not isinstance(bucket, str) or not isinstance(local_path, str):
|
|
287
|
+
continue
|
|
288
|
+
source_dir = Path(local_path)
|
|
289
|
+
if not source_dir.exists():
|
|
290
|
+
raise ValueError(
|
|
291
|
+
f"Mapped local S3 bucket directory does not exist: {source_dir}"
|
|
292
|
+
)
|
|
293
|
+
if not source_dir.is_dir():
|
|
294
|
+
raise ValueError(
|
|
295
|
+
f"Mapped local S3 bucket path is not a directory: {source_dir}"
|
|
296
|
+
)
|
|
297
|
+
for candidate in sorted(filter(Path.is_file, source_dir.rglob("*"))):
|
|
298
|
+
relative_path = candidate.relative_to(source_dir).as_posix()
|
|
299
|
+
target_uri = normalize_s3_object_uri(f"s3://{bucket}/{relative_path}")
|
|
300
|
+
expanded[target_uri] = str(candidate)
|
|
301
|
+
return expanded
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def expand_directory_mappings(config: dict[str, Any]) -> dict[str, str]:
|
|
305
|
+
expanded: dict[str, str] = {}
|
|
306
|
+
directories = config.get("s3", {}).get("directories", [])
|
|
307
|
+
if not isinstance(directories, list):
|
|
308
|
+
return expanded
|
|
309
|
+
for entry in directories:
|
|
310
|
+
if not isinstance(entry, dict):
|
|
311
|
+
continue
|
|
312
|
+
local_path = entry.get("local_path")
|
|
313
|
+
destination = entry.get("destination")
|
|
314
|
+
if not isinstance(local_path, str) or not isinstance(destination, str):
|
|
315
|
+
continue
|
|
316
|
+
source_dir = Path(local_path)
|
|
317
|
+
if not source_dir.exists():
|
|
318
|
+
raise ValueError(f"Mapped local directory does not exist: {source_dir}")
|
|
319
|
+
if not source_dir.is_dir():
|
|
320
|
+
raise ValueError(f"Mapped local path is not a directory: {source_dir}")
|
|
321
|
+
for candidate in sorted(filter(Path.is_file, source_dir.rglob("*"))):
|
|
322
|
+
relative_path = candidate.relative_to(source_dir).as_posix()
|
|
323
|
+
target_uri = normalize_s3_object_uri(f"{destination}{relative_path}")
|
|
324
|
+
expanded[target_uri] = str(candidate)
|
|
325
|
+
return expanded
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def copy_mapped_file(source_path: str, destination: str | Path) -> Path:
|
|
329
|
+
source = Path(source_path)
|
|
330
|
+
if not source.is_file():
|
|
331
|
+
raise ValueError(f"Mapped local file does not exist: {source}")
|
|
332
|
+
destination_path = Path(destination)
|
|
333
|
+
if str(destination).endswith(("/", "\\")) or destination_path.is_dir():
|
|
334
|
+
destination_path = destination_path / source.name
|
|
335
|
+
destination_path.parent.mkdir(parents=True, exist_ok=True)
|
|
336
|
+
shutil.copyfile(source, destination_path)
|
|
337
|
+
return destination_path
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from sqlframe.duckdb import DuckDBSession
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SparkContext:
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
Minimal local placeholder for Glue bootstrap compatibility.
|
|
8
|
+
|
|
9
|
+
Example usage in Glue job or notebook:
|
|
10
|
+
|
|
11
|
+
from awsglue.context import GlueContext
|
|
12
|
+
from awsglue.job import Job
|
|
13
|
+
from awsglue.utils import getResolvedOptions
|
|
14
|
+
from pyspark.context import SparkContext
|
|
15
|
+
|
|
16
|
+
sc = SparkContext()
|
|
17
|
+
glueContext = GlueContext(sc)
|
|
18
|
+
spark = glueContext.spark_session
|
|
19
|
+
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
|
|
20
|
+
job = Job(glueContext)
|
|
21
|
+
job.init(args['JOB_NAME'], args)
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
|
|
27
|
+
self._builder = DuckDBSession.builder
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def getOrCreate(cls):
|
|
31
|
+
return cls()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from sqlframe.duckdb import DuckDBSession
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SparkSession(DuckDBSession):
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
Minimal local placeholder for Glue bootstrap compatibility.
|
|
9
|
+
Probably do not need this as this is redundant with
|
|
10
|
+
sqlframe but using but adding here just in case.
|
|
11
|
+
|
|
12
|
+
Example usage in Glue job or notebook:
|
|
13
|
+
|
|
14
|
+
from pyspark.sql import SparkSession
|
|
15
|
+
|
|
16
|
+
spark = (
|
|
17
|
+
SparkSession.builder.appName("DataMigrationCrossTrackProfiling"
|
|
18
|
+
).config("spark.sql.caseSensitive", "true").getOrCreate()
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self):
|
|
25
|
+
self._builder = DuckDBSession.builder
|
|
26
|
+
|
|
27
|
+
def getOrCreate(self):
|
|
28
|
+
session = self._builder.getOrCreate()
|
|
29
|
+
return session
|