pybutt 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- old_tests/app.py +713 -0
- pybutt/__init__.py +17 -0
- pybutt/cli/__init__.py +11 -0
- pybutt/cli/app.py +94 -0
- pybutt/cli/combine_command.py +236 -0
- pybutt/cli/export_command.py +317 -0
- pybutt/cli/import_command.py +286 -0
- pybutt/cli/inspect_command.py +30 -0
- pybutt/cli/purge_command.py +235 -0
- pybutt/core/__init__.py +30 -0
- pybutt/core/base.py +124 -0
- pybutt/core/config.py +144 -0
- pybutt/core/logobs.py +445 -0
- pybutt/exceptions.py +82 -0
- pybutt/files/__init__.py +28 -0
- pybutt/files/combine.py +93 -0
- pybutt/files/inspect.py +51 -0
- pybutt/files/manifest.py +160 -0
- pybutt/io/__init__.py +6 -0
- pybutt/io/combiner.py +119 -0
- pybutt/io/exporter.py +612 -0
- pybutt/io/importer.py +928 -0
- pybutt/io/purger.py +44 -0
- pybutt-2.0.0.dist-info/METADATA +756 -0
- pybutt-2.0.0.dist-info/RECORD +39 -0
- pybutt-2.0.0.dist-info/WHEEL +5 -0
- pybutt-2.0.0.dist-info/entry_points.txt +2 -0
- pybutt-2.0.0.dist-info/licenses/LICENSE +21 -0
- pybutt-2.0.0.dist-info/top_level.txt +3 -0
- tests/conftest.py +22 -0
- tests/test_cli.py +979 -0
- tests/test_cli_help.py +130 -0
- tests/test_combiner.py +259 -0
- tests/test_core.py +1009 -0
- tests/test_exporter.py +637 -0
- tests/test_files.py +178 -0
- tests/test_import_retry_logic.py +837 -0
- tests/test_logobs.py +491 -0
- tests/test_purge.py +219 -0
old_tests/app.py
ADDED
|
@@ -0,0 +1,713 @@
|
|
|
1
|
+
import getpass
|
|
2
|
+
import tomllib
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from pybutt.core.config import (
|
|
8
|
+
BATCH_SIZE_DEFAULT,
|
|
9
|
+
CCI_DEFAULT,
|
|
10
|
+
DRIVER_DEFAULT,
|
|
11
|
+
ENCRYPT_DEFAULT,
|
|
12
|
+
EXPORT_ENGINE_DEFAULT,
|
|
13
|
+
FETCH_SIZE_DEFAULT,
|
|
14
|
+
IMPORT_ENGINE_DEFAULT,
|
|
15
|
+
MEM_COOLDOWN_DEFAULT,
|
|
16
|
+
MEM_HEARTBEAT_DEFAULT,
|
|
17
|
+
MEM_MAX_WAIT_DEFAULT,
|
|
18
|
+
MEM_SLEEP_DEFAULT,
|
|
19
|
+
MEM_THRESHOLD_DEFAULT,
|
|
20
|
+
PACKET_SIZE_DEFAULT,
|
|
21
|
+
RETRIES_DEFAULT,
|
|
22
|
+
ROWGROUP_SIZE_DEFAULT,
|
|
23
|
+
SCHEMA_DEFAULT,
|
|
24
|
+
TRANSACTION_MODE_DEFAULT,
|
|
25
|
+
TRUST_CERT_DEFAULT,
|
|
26
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
27
|
+
SqlConfig,
|
|
28
|
+
TransactionMode,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
app = typer.Typer(
|
|
32
|
+
context_settings={"help_option_names": ["-?", "--help"]},
|
|
33
|
+
help="""
|
|
34
|
+
PyButt CLI for exporting and importing between MS SQL Server tables and Parquet
|
|
35
|
+
files. Can also be used for inspecting Parquet files and combining or purging files
|
|
36
|
+
or tables based on manifest definitions.
|
|
37
|
+
""",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_project_version() -> str:
|
|
42
|
+
p = Path(__file__).resolve().parents[1] / "pyproject.toml"
|
|
43
|
+
return tomllib.loads(p.read_text(encoding="utf-8"))["project"]["version"]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _version_callback(ctx, param, value: bool):
|
|
47
|
+
if not value or ctx.resilient_parsing:
|
|
48
|
+
return
|
|
49
|
+
typer.echo("PyButt version: ", nl=False)
|
|
50
|
+
typer.echo(_get_project_version())
|
|
51
|
+
raise typer.Exit()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@app.callback(invoke_without_command=True)
|
|
55
|
+
def _main_callback(
|
|
56
|
+
ctx: typer.Context,
|
|
57
|
+
version: bool = typer.Option( # noqa: B008
|
|
58
|
+
False, "--version", "-v", callback=_version_callback, is_eager=True
|
|
59
|
+
),
|
|
60
|
+
):
|
|
61
|
+
"""PyButt CLI root callback."""
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def build_sql_config(
|
|
66
|
+
server: str,
|
|
67
|
+
database: str,
|
|
68
|
+
schema: str,
|
|
69
|
+
table: str,
|
|
70
|
+
username: str | None,
|
|
71
|
+
password: str | None,
|
|
72
|
+
driver: str,
|
|
73
|
+
trusted_connection: bool = True,
|
|
74
|
+
trust_cert: bool = False,
|
|
75
|
+
encrypt: bool = True,
|
|
76
|
+
retries: int = 1,
|
|
77
|
+
packet_size: int = 4_096,
|
|
78
|
+
) -> SqlConfig:
|
|
79
|
+
if not trusted_connection:
|
|
80
|
+
if not username:
|
|
81
|
+
raise typer.BadParameter(
|
|
82
|
+
"--username is required unless --trusted-connection is used"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Prompt for password if not provided
|
|
86
|
+
if not password:
|
|
87
|
+
password = getpass.getpass("Enter your password: ")
|
|
88
|
+
|
|
89
|
+
return SqlConfig(
|
|
90
|
+
server=server,
|
|
91
|
+
database=database,
|
|
92
|
+
schema=schema,
|
|
93
|
+
table=table,
|
|
94
|
+
username=username,
|
|
95
|
+
password=password,
|
|
96
|
+
driver=driver,
|
|
97
|
+
trusted_connection=trusted_connection,
|
|
98
|
+
trust_cert=trust_cert,
|
|
99
|
+
encrypt=encrypt,
|
|
100
|
+
retries=retries,
|
|
101
|
+
packet_size=packet_size,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@app.command(
|
|
106
|
+
"export",
|
|
107
|
+
help=(
|
|
108
|
+
"Export a SQL Server table to Parquet and write a manifest of output "
|
|
109
|
+
"file names."
|
|
110
|
+
),
|
|
111
|
+
)
|
|
112
|
+
def export(
|
|
113
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
114
|
+
False,
|
|
115
|
+
"--verbose",
|
|
116
|
+
"-V",
|
|
117
|
+
help="Show verbose logging output.",
|
|
118
|
+
),
|
|
119
|
+
server: str = typer.Option( # noqa: B008
|
|
120
|
+
...,
|
|
121
|
+
"--server",
|
|
122
|
+
"-s",
|
|
123
|
+
help="SQL Server hostname or instance.",
|
|
124
|
+
rich_help_panel="Server Connection Options",
|
|
125
|
+
),
|
|
126
|
+
database: str = typer.Option( # noqa: B008
|
|
127
|
+
...,
|
|
128
|
+
"--database",
|
|
129
|
+
"-d",
|
|
130
|
+
help="Target SQL Server database.",
|
|
131
|
+
rich_help_panel="Server Connection Options",
|
|
132
|
+
),
|
|
133
|
+
engine: str = typer.Option( # noqa: B008
|
|
134
|
+
EXPORT_ENGINE_DEFAULT,
|
|
135
|
+
"--engine",
|
|
136
|
+
"-E",
|
|
137
|
+
help="Export engine to use: duckdb, pyodbc, or mssql-python.",
|
|
138
|
+
rich_help_panel="Server Connection Options",
|
|
139
|
+
),
|
|
140
|
+
driver: str = typer.Option( # noqa: B008
|
|
141
|
+
DRIVER_DEFAULT,
|
|
142
|
+
"--driver",
|
|
143
|
+
"-D",
|
|
144
|
+
help="ODBC driver to use.",
|
|
145
|
+
rich_help_panel="Server Connection Options",
|
|
146
|
+
),
|
|
147
|
+
schema: str = typer.Option( # noqa: B008
|
|
148
|
+
SCHEMA_DEFAULT,
|
|
149
|
+
"--schema",
|
|
150
|
+
"-S",
|
|
151
|
+
help="Target table schema.",
|
|
152
|
+
rich_help_panel="SQL Data Object Options",
|
|
153
|
+
),
|
|
154
|
+
table: str = typer.Option( # noqa: B008
|
|
155
|
+
...,
|
|
156
|
+
"--table",
|
|
157
|
+
"-t",
|
|
158
|
+
help="Target table name.",
|
|
159
|
+
rich_help_panel="SQL Data Object Options",
|
|
160
|
+
),
|
|
161
|
+
parameters: str | None = typer.Option( # noqa: B008
|
|
162
|
+
None,
|
|
163
|
+
"--parameters",
|
|
164
|
+
help=(
|
|
165
|
+
"Comma-separated list of parameter values to pass to a table-valued "
|
|
166
|
+
"function. Example: --parameters 12,'fred','1989'."
|
|
167
|
+
),
|
|
168
|
+
rich_help_panel="SQL Data Object Options",
|
|
169
|
+
),
|
|
170
|
+
columns: str | None = typer.Option( # noqa: B008
|
|
171
|
+
None,
|
|
172
|
+
"--columns",
|
|
173
|
+
"-C",
|
|
174
|
+
help="Comma-separated list of columns to export. Defaults to all columns.",
|
|
175
|
+
rich_help_panel="SQL Data Object Options",
|
|
176
|
+
),
|
|
177
|
+
pk_column: str | None = typer.Option( # noqa: B008
|
|
178
|
+
None,
|
|
179
|
+
"--pk-column",
|
|
180
|
+
"-P",
|
|
181
|
+
help="Primary key column for deterministic partitioning.",
|
|
182
|
+
rich_help_panel="SQL Data Object Options",
|
|
183
|
+
),
|
|
184
|
+
username: str | None = typer.Option( # noqa: B008
|
|
185
|
+
None,
|
|
186
|
+
"--username",
|
|
187
|
+
"-u",
|
|
188
|
+
help="SQL Server username when not using trusted connection.",
|
|
189
|
+
rich_help_panel="Server Security Options",
|
|
190
|
+
),
|
|
191
|
+
password: str | None = typer.Option( # noqa: B008
|
|
192
|
+
None,
|
|
193
|
+
"--password",
|
|
194
|
+
"-p",
|
|
195
|
+
help="SQL Server password when not using trusted connection.",
|
|
196
|
+
rich_help_panel="Server Security Options",
|
|
197
|
+
),
|
|
198
|
+
trusted_connection: bool = typer.Option( # noqa: B008
|
|
199
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
200
|
+
"--trusted-connection",
|
|
201
|
+
"-T",
|
|
202
|
+
help="Use integrated Windows authentication instead of username/password.",
|
|
203
|
+
rich_help_panel="Server Security Options",
|
|
204
|
+
),
|
|
205
|
+
trust_cert: bool = typer.Option( # noqa: B008
|
|
206
|
+
TRUST_CERT_DEFAULT,
|
|
207
|
+
"--trust-cert",
|
|
208
|
+
"-c",
|
|
209
|
+
help="Trust the SQL Server TLS certificate.",
|
|
210
|
+
rich_help_panel="Server Security Options",
|
|
211
|
+
),
|
|
212
|
+
encrypt: bool = typer.Option( # noqa: B008
|
|
213
|
+
ENCRYPT_DEFAULT,
|
|
214
|
+
"--encrypt/--no-encrypt",
|
|
215
|
+
help="Enable or disable SQL Server encrypted transport.",
|
|
216
|
+
rich_help_panel="Server Security Options",
|
|
217
|
+
),
|
|
218
|
+
output_path: Path = typer.Option( # noqa: B008
|
|
219
|
+
...,
|
|
220
|
+
"--output-path",
|
|
221
|
+
"-o",
|
|
222
|
+
help="Directory to write Parquet files and manifest.",
|
|
223
|
+
rich_help_panel="File Options",
|
|
224
|
+
file_okay=False,
|
|
225
|
+
dir_okay=True,
|
|
226
|
+
writable=True,
|
|
227
|
+
),
|
|
228
|
+
manifest_filename: str | None = typer.Option(
|
|
229
|
+
None,
|
|
230
|
+
"--manifest-filename",
|
|
231
|
+
"-m",
|
|
232
|
+
help=(
|
|
233
|
+
"Manifest filename to write for export. Defaults to "
|
|
234
|
+
"<schema>_<table>_manifest.json."
|
|
235
|
+
),
|
|
236
|
+
rich_help_panel="File Options",
|
|
237
|
+
),
|
|
238
|
+
file_count: int = typer.Option( # noqa: B008
|
|
239
|
+
1,
|
|
240
|
+
"--file-count",
|
|
241
|
+
"-f",
|
|
242
|
+
help=("Number of Parquet output files. "),
|
|
243
|
+
rich_help_panel="File Options",
|
|
244
|
+
min=1,
|
|
245
|
+
),
|
|
246
|
+
fetch_size: int | None = typer.Option( # noqa: B008
|
|
247
|
+
FETCH_SIZE_DEFAULT,
|
|
248
|
+
"--fetch-size",
|
|
249
|
+
"-F",
|
|
250
|
+
help=("Cursor fetch size for pyodbc export."),
|
|
251
|
+
rich_help_panel="Transport Tuning Options",
|
|
252
|
+
min=1,
|
|
253
|
+
),
|
|
254
|
+
rowgroup_size: int = typer.Option( # noqa: B008
|
|
255
|
+
ROWGROUP_SIZE_DEFAULT,
|
|
256
|
+
"--rowgroup-size",
|
|
257
|
+
"-R",
|
|
258
|
+
help="Number of rows per rowgroup in the Parquet files.",
|
|
259
|
+
rich_help_panel="Transport Tuning Options",
|
|
260
|
+
min=1,
|
|
261
|
+
),
|
|
262
|
+
retries: int = typer.Option( # noqa: B008
|
|
263
|
+
RETRIES_DEFAULT,
|
|
264
|
+
"--retries",
|
|
265
|
+
"-r",
|
|
266
|
+
help="Number of retry attempts for transient SQL errors.",
|
|
267
|
+
rich_help_panel="Transport Tuning Options",
|
|
268
|
+
min=1,
|
|
269
|
+
),
|
|
270
|
+
packet_size: int = typer.Option( # noqa: B008
|
|
271
|
+
PACKET_SIZE_DEFAULT,
|
|
272
|
+
"--packet-size",
|
|
273
|
+
help=(
|
|
274
|
+
"TDS packet size in bytes (512-32767). "
|
|
275
|
+
"Note: encrypted connections are capped at 16383."
|
|
276
|
+
),
|
|
277
|
+
rich_help_panel="Transport Tuning Options",
|
|
278
|
+
min=512,
|
|
279
|
+
max=32767,
|
|
280
|
+
),
|
|
281
|
+
worker_count: int = typer.Option( # noqa: B008
|
|
282
|
+
1,
|
|
283
|
+
"--worker-count",
|
|
284
|
+
"-w",
|
|
285
|
+
help="Number of worker processes used for export.",
|
|
286
|
+
rich_help_panel="Transport Tuning Options",
|
|
287
|
+
min=1,
|
|
288
|
+
),
|
|
289
|
+
mem_heartbeat: float = typer.Option( # noqa: B008
|
|
290
|
+
MEM_HEARTBEAT_DEFAULT,
|
|
291
|
+
"--mem-heartbeat",
|
|
292
|
+
"-h",
|
|
293
|
+
help=("Log process memory (RSS + system %) every N seconds."),
|
|
294
|
+
rich_help_panel="Memory Tuning Options",
|
|
295
|
+
min=0,
|
|
296
|
+
),
|
|
297
|
+
mem_threshold: float = typer.Option( # noqa: B008
|
|
298
|
+
MEM_THRESHOLD_DEFAULT,
|
|
299
|
+
"--mem-threshold",
|
|
300
|
+
help=(
|
|
301
|
+
"System memory % at which workers are throttled. "
|
|
302
|
+
"Set to 0 to disable throttling."
|
|
303
|
+
),
|
|
304
|
+
rich_help_panel="Memory Tuning Options",
|
|
305
|
+
min=0,
|
|
306
|
+
max=100,
|
|
307
|
+
),
|
|
308
|
+
mem_sleep: float = typer.Option( # noqa: B008
|
|
309
|
+
MEM_SLEEP_DEFAULT,
|
|
310
|
+
"--mem-sleep",
|
|
311
|
+
help=("Seconds to sleep per throttle check when memory is high. "),
|
|
312
|
+
rich_help_panel="Memory Tuning Options",
|
|
313
|
+
min=0.1,
|
|
314
|
+
),
|
|
315
|
+
mem_max_wait: float = typer.Option( # noqa: B008
|
|
316
|
+
MEM_MAX_WAIT_DEFAULT,
|
|
317
|
+
"--mem-max-wait",
|
|
318
|
+
help=("Max total seconds to wait during memory throttling before giving up."),
|
|
319
|
+
rich_help_panel="Memory Tuning Options",
|
|
320
|
+
min=0,
|
|
321
|
+
),
|
|
322
|
+
mem_cooldown: float = typer.Option( # noqa: B008
|
|
323
|
+
MEM_COOLDOWN_DEFAULT,
|
|
324
|
+
"--mem-cooldown",
|
|
325
|
+
help=(
|
|
326
|
+
"Seconds after a throttle event before re-checking. Prevents "
|
|
327
|
+
"the gate from serialising workers"
|
|
328
|
+
),
|
|
329
|
+
rich_help_panel="Memory Tuning Options",
|
|
330
|
+
min=0,
|
|
331
|
+
),
|
|
332
|
+
) -> None:
|
|
333
|
+
"""Export data from a SQL Server table to one or more Parquet files.
|
|
334
|
+
|
|
335
|
+
The command writes one or more Parquet files into OUTPUT_PATH and
|
|
336
|
+
creates a manifest file listing the generated parquet file names.
|
|
337
|
+
"""
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
@app.command(
|
|
341
|
+
"import",
|
|
342
|
+
help=("Import Parquet files into a SQL Server table using a manifest file."),
|
|
343
|
+
)
|
|
344
|
+
def import_data(
|
|
345
|
+
manifest: Path = typer.Argument( # noqa: B008
|
|
346
|
+
..., help="Path to the input manifest file"
|
|
347
|
+
),
|
|
348
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
349
|
+
False,
|
|
350
|
+
"--verbose",
|
|
351
|
+
"-V",
|
|
352
|
+
help="Show verbose logging output.",
|
|
353
|
+
),
|
|
354
|
+
server: str = typer.Option( # noqa: B008
|
|
355
|
+
...,
|
|
356
|
+
"--server",
|
|
357
|
+
"-s",
|
|
358
|
+
help="SQL Server hostname or instance.",
|
|
359
|
+
rich_help_panel="Server Connection Options",
|
|
360
|
+
),
|
|
361
|
+
database: str = typer.Option( # noqa: B008
|
|
362
|
+
...,
|
|
363
|
+
"--database",
|
|
364
|
+
"-d",
|
|
365
|
+
help="Target SQL Server database.",
|
|
366
|
+
rich_help_panel="Server Connection Options",
|
|
367
|
+
),
|
|
368
|
+
driver: str = typer.Option( # noqa: B008
|
|
369
|
+
DRIVER_DEFAULT,
|
|
370
|
+
"--driver",
|
|
371
|
+
"-D",
|
|
372
|
+
help="ODBC driver name.",
|
|
373
|
+
rich_help_panel="Server Connection Options",
|
|
374
|
+
),
|
|
375
|
+
engine: str = typer.Option( # noqa: B008
|
|
376
|
+
IMPORT_ENGINE_DEFAULT,
|
|
377
|
+
"--engine",
|
|
378
|
+
"-E",
|
|
379
|
+
help="Import engine to use: duckdb, pyodbc, or mssql-python.",
|
|
380
|
+
rich_help_panel="Server Connection Options",
|
|
381
|
+
case_sensitive=False,
|
|
382
|
+
),
|
|
383
|
+
transaction_mode: TransactionMode = typer.Option( # noqa: B008
|
|
384
|
+
TRANSACTION_MODE_DEFAULT,
|
|
385
|
+
"--transaction-mode",
|
|
386
|
+
"-M",
|
|
387
|
+
help=(
|
|
388
|
+
"Transaction scope: batch (per batch), rowgroup (per row group, "
|
|
389
|
+
"recommended), file (entire file)."
|
|
390
|
+
),
|
|
391
|
+
rich_help_panel="Server Connection Options",
|
|
392
|
+
),
|
|
393
|
+
schema: str = typer.Option( # noqa: B008
|
|
394
|
+
"dbo",
|
|
395
|
+
"--schema",
|
|
396
|
+
"-S",
|
|
397
|
+
help="Target table schema.",
|
|
398
|
+
rich_help_panel="SQL Data Object Options",
|
|
399
|
+
),
|
|
400
|
+
table: str = typer.Option( # noqa: B008
|
|
401
|
+
...,
|
|
402
|
+
"--table",
|
|
403
|
+
"-t",
|
|
404
|
+
help="Target table name.",
|
|
405
|
+
rich_help_panel="SQL Data Object Options",
|
|
406
|
+
),
|
|
407
|
+
cci: bool = typer.Option( # noqa: B008
|
|
408
|
+
CCI_DEFAULT,
|
|
409
|
+
"--cci/--no-cci",
|
|
410
|
+
help=(
|
|
411
|
+
"Create a clustered columnstore index on the per-worker temp tables "
|
|
412
|
+
"used during multi-worker import. Use --no-cci to keep the previous "
|
|
413
|
+
"heap behaviour. Enabled by default."
|
|
414
|
+
),
|
|
415
|
+
rich_help_panel="SQL Data Object Options",
|
|
416
|
+
),
|
|
417
|
+
username: str | None = typer.Option( # noqa: B008
|
|
418
|
+
None,
|
|
419
|
+
"--username",
|
|
420
|
+
"-u",
|
|
421
|
+
help="SQL Server username when not using trusted connection.",
|
|
422
|
+
rich_help_panel="Server Security Options",
|
|
423
|
+
),
|
|
424
|
+
password: str | None = typer.Option( # noqa: B008
|
|
425
|
+
None,
|
|
426
|
+
"--password",
|
|
427
|
+
"-p",
|
|
428
|
+
help="SQL Server password when not using trusted connection.",
|
|
429
|
+
rich_help_panel="Server Security Options",
|
|
430
|
+
),
|
|
431
|
+
trusted_connection: bool = typer.Option( # noqa: B008
|
|
432
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
433
|
+
"--trusted-connection",
|
|
434
|
+
"-T",
|
|
435
|
+
help="Use integrated Windows authentication instead of username/password.",
|
|
436
|
+
rich_help_panel="Server Security Options",
|
|
437
|
+
),
|
|
438
|
+
trust_cert: bool = typer.Option( # noqa: B008
|
|
439
|
+
TRUST_CERT_DEFAULT,
|
|
440
|
+
"--trust-cert",
|
|
441
|
+
"-c",
|
|
442
|
+
help="Trust the SQL Server TLS certificate.",
|
|
443
|
+
rich_help_panel="Server Security Options",
|
|
444
|
+
),
|
|
445
|
+
encrypt: bool = typer.Option( # noqa: B008
|
|
446
|
+
ENCRYPT_DEFAULT,
|
|
447
|
+
"--encrypt/--no-encrypt",
|
|
448
|
+
"-e/-n",
|
|
449
|
+
help="Enable or disable SQL Server encrypted transport.",
|
|
450
|
+
rich_help_panel="Server Security Options",
|
|
451
|
+
),
|
|
452
|
+
temp_manifest_filename: str | None = typer.Option( # noqa: B008
|
|
453
|
+
None,
|
|
454
|
+
"--imported-manifest-filename",
|
|
455
|
+
"-o",
|
|
456
|
+
help=(
|
|
457
|
+
"Override the import worker manifest filename written during "
|
|
458
|
+
"multi-worker import. Defaults to <schema>_<table>_import_manifest.json."
|
|
459
|
+
),
|
|
460
|
+
rich_help_panel="File Options",
|
|
461
|
+
),
|
|
462
|
+
batch_size: int | None = typer.Option( # noqa: B008
|
|
463
|
+
BATCH_SIZE_DEFAULT,
|
|
464
|
+
"--batch-size",
|
|
465
|
+
"-b",
|
|
466
|
+
help="Rows per batch insert.",
|
|
467
|
+
rich_help_panel="Transport Tuning Options",
|
|
468
|
+
min=1,
|
|
469
|
+
),
|
|
470
|
+
retries: int = typer.Option( # noqa: B008
|
|
471
|
+
RETRIES_DEFAULT,
|
|
472
|
+
"--retries",
|
|
473
|
+
"-r",
|
|
474
|
+
help="Number of retry attempts for transient SQL errors.",
|
|
475
|
+
rich_help_panel="Transport Tuning Options",
|
|
476
|
+
min=1,
|
|
477
|
+
),
|
|
478
|
+
packet_size: int = typer.Option( # noqa: B008
|
|
479
|
+
PACKET_SIZE_DEFAULT,
|
|
480
|
+
"--packet-size",
|
|
481
|
+
help=(
|
|
482
|
+
"TDS packet size in bytes (512-32767). "
|
|
483
|
+
"Note: encrypted connections are capped at 16383."
|
|
484
|
+
),
|
|
485
|
+
rich_help_panel="Transport Tuning Options",
|
|
486
|
+
min=512,
|
|
487
|
+
max=32767,
|
|
488
|
+
),
|
|
489
|
+
worker_count: int = typer.Option( # noqa: B008
|
|
490
|
+
1,
|
|
491
|
+
"--worker-count",
|
|
492
|
+
"-w",
|
|
493
|
+
help="Number of parallel import threads.",
|
|
494
|
+
rich_help_panel="Transport Tuning Options",
|
|
495
|
+
min=1,
|
|
496
|
+
),
|
|
497
|
+
mem_heartbeat: float = typer.Option( # noqa: B008
|
|
498
|
+
MEM_HEARTBEAT_DEFAULT,
|
|
499
|
+
"--mem-heartbeat",
|
|
500
|
+
"-h",
|
|
501
|
+
help=("Log process memory (RSS + system %) every N seconds."),
|
|
502
|
+
rich_help_panel="Memory Tuning Options",
|
|
503
|
+
min=0,
|
|
504
|
+
),
|
|
505
|
+
mem_threshold: float = typer.Option( # noqa: B008
|
|
506
|
+
MEM_THRESHOLD_DEFAULT,
|
|
507
|
+
"--mem-threshold",
|
|
508
|
+
help=(
|
|
509
|
+
"System memory % at which workers are throttled. "
|
|
510
|
+
"Set to 0 to disable throttling."
|
|
511
|
+
),
|
|
512
|
+
rich_help_panel="Memory Tuning Options",
|
|
513
|
+
min=0,
|
|
514
|
+
max=100,
|
|
515
|
+
),
|
|
516
|
+
mem_sleep: float = typer.Option( # noqa: B008
|
|
517
|
+
MEM_SLEEP_DEFAULT,
|
|
518
|
+
"--mem-sleep",
|
|
519
|
+
help=("Seconds to sleep per throttle check when memory is high. "),
|
|
520
|
+
rich_help_panel="Memory Tuning Options",
|
|
521
|
+
min=0.1,
|
|
522
|
+
),
|
|
523
|
+
mem_max_wait: float = typer.Option( # noqa: B008
|
|
524
|
+
MEM_MAX_WAIT_DEFAULT,
|
|
525
|
+
"--mem-max-wait",
|
|
526
|
+
help=("Max total seconds to wait during memory throttling before giving up."),
|
|
527
|
+
rich_help_panel="Memory Tuning Options",
|
|
528
|
+
min=0,
|
|
529
|
+
),
|
|
530
|
+
mem_cooldown: float = typer.Option( # noqa: B008
|
|
531
|
+
MEM_COOLDOWN_DEFAULT,
|
|
532
|
+
"--mem-cooldown",
|
|
533
|
+
help=(
|
|
534
|
+
"Seconds after a throttle event before re-checking. Prevents "
|
|
535
|
+
"the gate from serialising workers"
|
|
536
|
+
),
|
|
537
|
+
rich_help_panel="Memory Tuning Options",
|
|
538
|
+
min=0,
|
|
539
|
+
),
|
|
540
|
+
) -> None:
|
|
541
|
+
"""Import one or more Parquet files into SQL Server tables.
|
|
542
|
+
|
|
543
|
+
The command reads the manifest file and imports each Parquet file into the
|
|
544
|
+
target table. If the number of workers is greater than 1, the import will be
|
|
545
|
+
done using multiple tables created to the same data schema as the target table.
|
|
546
|
+
"""
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
@app.command(
|
|
550
|
+
"combine",
|
|
551
|
+
help=(
|
|
552
|
+
"Merge objects listed in a manifest. "
|
|
553
|
+
"For file manifests, concatenate Parquet files to a single output. "
|
|
554
|
+
"For table manifests, insert from SQL tables into a single target table."
|
|
555
|
+
),
|
|
556
|
+
)
|
|
557
|
+
def combine(
|
|
558
|
+
manifest: Path = typer.Argument( # noqa: B008
|
|
559
|
+
..., help="Path to the input manifest file"
|
|
560
|
+
), # noqa: B008
|
|
561
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
562
|
+
False,
|
|
563
|
+
"--verbose",
|
|
564
|
+
"-V",
|
|
565
|
+
help="Show verbose logging output.",
|
|
566
|
+
),
|
|
567
|
+
server: str | None = typer.Option( # noqa: B008
|
|
568
|
+
None,
|
|
569
|
+
"--server",
|
|
570
|
+
"-s",
|
|
571
|
+
help="SQL Server host.",
|
|
572
|
+
rich_help_panel="Server Connection Options",
|
|
573
|
+
),
|
|
574
|
+
database: str | None = typer.Option( # noqa: B008
|
|
575
|
+
None,
|
|
576
|
+
"--database",
|
|
577
|
+
"-d",
|
|
578
|
+
help="Target database.",
|
|
579
|
+
rich_help_panel="Server Connection Options",
|
|
580
|
+
),
|
|
581
|
+
driver: str = typer.Option( # noqa: B008
|
|
582
|
+
DRIVER_DEFAULT,
|
|
583
|
+
"--driver",
|
|
584
|
+
"-D",
|
|
585
|
+
help="ODBC driver name.",
|
|
586
|
+
rich_help_panel="Server Connection Options",
|
|
587
|
+
),
|
|
588
|
+
schema: str = typer.Option( # noqa: B008
|
|
589
|
+
SCHEMA_DEFAULT,
|
|
590
|
+
"--schema",
|
|
591
|
+
"-S",
|
|
592
|
+
help="Target schema.",
|
|
593
|
+
rich_help_panel="SQL Data Object Options",
|
|
594
|
+
),
|
|
595
|
+
table: str | None = typer.Option( # noqa: B008
|
|
596
|
+
None,
|
|
597
|
+
"--table",
|
|
598
|
+
"-t",
|
|
599
|
+
help="Target table.",
|
|
600
|
+
rich_help_panel="SQL Data Object Options",
|
|
601
|
+
),
|
|
602
|
+
username: str | None = typer.Option( # noqa: B008
|
|
603
|
+
None,
|
|
604
|
+
"--username",
|
|
605
|
+
"-u",
|
|
606
|
+
help="SQL Server username when not using trusted connection.",
|
|
607
|
+
rich_help_panel="Server Security Options",
|
|
608
|
+
),
|
|
609
|
+
password: str | None = typer.Option( # noqa: B008
|
|
610
|
+
None,
|
|
611
|
+
"--password",
|
|
612
|
+
"-p",
|
|
613
|
+
help="SQL Server password when not using trusted connection.",
|
|
614
|
+
rich_help_panel="Server Security Options",
|
|
615
|
+
),
|
|
616
|
+
trusted_connection: bool = typer.Option( # noqa: B008
|
|
617
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
618
|
+
"--trusted-connection",
|
|
619
|
+
"-T",
|
|
620
|
+
help="Use integrated Windows authentication instead of username/password.",
|
|
621
|
+
rich_help_panel="Server Security Options",
|
|
622
|
+
),
|
|
623
|
+
trust_cert: bool = typer.Option( # noqa: B008
|
|
624
|
+
TRUST_CERT_DEFAULT,
|
|
625
|
+
"--trust-cert",
|
|
626
|
+
"-c",
|
|
627
|
+
help="Trust the SQL Server TLS certificate.",
|
|
628
|
+
rich_help_panel="Server Security Options",
|
|
629
|
+
),
|
|
630
|
+
encrypt: bool = typer.Option( # noqa: B008
|
|
631
|
+
ENCRYPT_DEFAULT,
|
|
632
|
+
"--encrypt/--no-encrypt",
|
|
633
|
+
"-e/-n",
|
|
634
|
+
help="Enable or disable SQL Server encrypted transport.",
|
|
635
|
+
rich_help_panel="Server Security Options",
|
|
636
|
+
),
|
|
637
|
+
rowgroup_size: int = typer.Option( # noqa: B008
|
|
638
|
+
ROWGROUP_SIZE_DEFAULT,
|
|
639
|
+
"--rowgroup-size",
|
|
640
|
+
"-R",
|
|
641
|
+
help="Rowgroup size for output.",
|
|
642
|
+
rich_help_panel="File Options",
|
|
643
|
+
),
|
|
644
|
+
retries: int = typer.Option( # noqa: B008
|
|
645
|
+
RETRIES_DEFAULT,
|
|
646
|
+
"--retries",
|
|
647
|
+
"-r",
|
|
648
|
+
help="Number of retry attempts for transient SQL errors.",
|
|
649
|
+
rich_help_panel="Transport Tuning Options",
|
|
650
|
+
min=1,
|
|
651
|
+
),
|
|
652
|
+
packet_size: int = typer.Option( # noqa: B008
|
|
653
|
+
PACKET_SIZE_DEFAULT,
|
|
654
|
+
"--packet-size",
|
|
655
|
+
help=(
|
|
656
|
+
"TDS packet size in bytes (512-32767). "
|
|
657
|
+
"Note: encrypted connections are capped at 16383."
|
|
658
|
+
),
|
|
659
|
+
rich_help_panel="Transport Tuning Options",
|
|
660
|
+
min=512,
|
|
661
|
+
max=32767,
|
|
662
|
+
),
|
|
663
|
+
) -> None:
|
|
664
|
+
"""Combine objects listed in a manifest.
|
|
665
|
+
|
|
666
|
+
For file manifests, this command concatenates Parquet files into a single output.
|
|
667
|
+
For table manifests, it inserts from SQL tables into a single target table.
|
|
668
|
+
"""
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
@app.command(
|
|
672
|
+
"inspect",
|
|
673
|
+
help=(
|
|
674
|
+
"Inspect Parquet files listed in a manifest. "
|
|
675
|
+
"Shows file-level metadata and optionally column-level details."
|
|
676
|
+
),
|
|
677
|
+
)
|
|
678
|
+
def inspect(
|
|
679
|
+
manifest: Path = typer.Argument( # noqa: B008
|
|
680
|
+
..., help="Path to the input manifest file"
|
|
681
|
+
),
|
|
682
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
683
|
+
False, "--verbose", "-V", help="Show column details"
|
|
684
|
+
),
|
|
685
|
+
):
|
|
686
|
+
"""
|
|
687
|
+
Inspect parquet files listed in a manifest.
|
|
688
|
+
"""
|
|
689
|
+
# inspect_manifest(manifest, verbose)
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
@app.command(
|
|
693
|
+
"purge",
|
|
694
|
+
help=(
|
|
695
|
+
"Purge Parquet files or SQL tables listed in a manifest. "
|
|
696
|
+
"Also deletes the input manifest file."
|
|
697
|
+
),
|
|
698
|
+
)
|
|
699
|
+
def purge(
|
|
700
|
+
manifest: Path = typer.Argument( # noqa: B008
|
|
701
|
+
..., help="Path to the input manifest file"
|
|
702
|
+
),
|
|
703
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
704
|
+
False, "--verbose", "-V", help="Show column details"
|
|
705
|
+
),
|
|
706
|
+
):
|
|
707
|
+
"""
|
|
708
|
+
Inspect parquet files listed in a manifest.
|
|
709
|
+
"""
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
if __name__ == "__main__":
|
|
713
|
+
app()
|