pybutt 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- old_tests/app.py +713 -0
- pybutt/__init__.py +17 -0
- pybutt/cli/__init__.py +11 -0
- pybutt/cli/app.py +94 -0
- pybutt/cli/combine_command.py +236 -0
- pybutt/cli/export_command.py +317 -0
- pybutt/cli/import_command.py +286 -0
- pybutt/cli/inspect_command.py +30 -0
- pybutt/cli/purge_command.py +235 -0
- pybutt/core/__init__.py +30 -0
- pybutt/core/base.py +124 -0
- pybutt/core/config.py +144 -0
- pybutt/core/logobs.py +445 -0
- pybutt/exceptions.py +82 -0
- pybutt/files/__init__.py +28 -0
- pybutt/files/combine.py +93 -0
- pybutt/files/inspect.py +51 -0
- pybutt/files/manifest.py +160 -0
- pybutt/io/__init__.py +6 -0
- pybutt/io/combiner.py +119 -0
- pybutt/io/exporter.py +612 -0
- pybutt/io/importer.py +928 -0
- pybutt/io/purger.py +44 -0
- pybutt-2.0.0.dist-info/METADATA +756 -0
- pybutt-2.0.0.dist-info/RECORD +39 -0
- pybutt-2.0.0.dist-info/WHEEL +5 -0
- pybutt-2.0.0.dist-info/entry_points.txt +2 -0
- pybutt-2.0.0.dist-info/licenses/LICENSE +21 -0
- pybutt-2.0.0.dist-info/top_level.txt +3 -0
- tests/conftest.py +22 -0
- tests/test_cli.py +979 -0
- tests/test_cli_help.py +130 -0
- tests/test_combiner.py +259 -0
- tests/test_core.py +1009 -0
- tests/test_exporter.py +637 -0
- tests/test_files.py +178 -0
- tests/test_import_retry_logic.py +837 -0
- tests/test_logobs.py +491 -0
- tests/test_purge.py +219 -0
pybutt/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from pybutt.core.config import SqlConfig, TransactionMode
|
|
2
|
+
from pybutt.exceptions import PyButtError
|
|
3
|
+
from pybutt.files import combine_parquet_files, inspect_manifest
|
|
4
|
+
from pybutt.io.combiner import TableCombine
|
|
5
|
+
from pybutt.io.exporter import Exporter
|
|
6
|
+
from pybutt.io.importer import Importer
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"SqlConfig",
|
|
10
|
+
"TransactionMode",
|
|
11
|
+
"Exporter",
|
|
12
|
+
"Importer",
|
|
13
|
+
"TableCombine",
|
|
14
|
+
"combine_parquet_files",
|
|
15
|
+
"inspect_manifest",
|
|
16
|
+
"PyButtError",
|
|
17
|
+
]
|
pybutt/cli/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# isort: skip_file
|
|
2
|
+
from .app import app
|
|
3
|
+
|
|
4
|
+
# Import command modules so @app.command decorators register
|
|
5
|
+
from . import combine_command # noqa: F401
|
|
6
|
+
from . import export_command # noqa: F401
|
|
7
|
+
from . import import_command # noqa: F401
|
|
8
|
+
from . import inspect_command # noqa: F401
|
|
9
|
+
from . import purge_command # noqa: F401
|
|
10
|
+
|
|
11
|
+
__all__ = ["app"]
|
pybutt/cli/app.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import getpass
|
|
2
|
+
import tomllib
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from pybutt.core.config import (
|
|
8
|
+
DRIVER_DEFAULT,
|
|
9
|
+
ENCRYPT_DEFAULT,
|
|
10
|
+
PACKET_SIZE_DEFAULT,
|
|
11
|
+
RETRIES_DEFAULT,
|
|
12
|
+
TRUST_CERT_DEFAULT,
|
|
13
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
14
|
+
SqlConfig,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
app = typer.Typer(
|
|
18
|
+
context_settings={"help_option_names": ["-?", "--help"]},
|
|
19
|
+
help="""
|
|
20
|
+
PyButt CLI for exporting and importing between MS SQL Server tables and Parquet
|
|
21
|
+
files. Can also be used for inspecting Parquet files and combining files or tables
|
|
22
|
+
based on manifest definitions.
|
|
23
|
+
""",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_project_version() -> str:
|
|
28
|
+
p = Path(__file__).resolve().parents[2] / "pyproject.toml"
|
|
29
|
+
return tomllib.loads(p.read_text(encoding="utf-8"))["project"]["version"]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _version_callback(ctx, param, value: bool):
|
|
33
|
+
if not value or ctx.resilient_parsing:
|
|
34
|
+
return
|
|
35
|
+
typer.echo("PyButt version: ", nl=False)
|
|
36
|
+
typer.echo(_get_project_version())
|
|
37
|
+
raise typer.Exit()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@app.callback(invoke_without_command=True)
|
|
41
|
+
def _main_callback(
|
|
42
|
+
ctx: typer.Context,
|
|
43
|
+
version: bool = typer.Option(
|
|
44
|
+
False, "--version", "-v", callback=_version_callback, is_eager=True
|
|
45
|
+
),
|
|
46
|
+
):
|
|
47
|
+
"""PyButt CLI root callback."""
|
|
48
|
+
return
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def parse_columns(columns: str | None) -> list[str] | None:
|
|
52
|
+
if columns is None:
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
parsed = [column.strip() for column in columns.split(",") if column.strip()]
|
|
56
|
+
if not parsed:
|
|
57
|
+
raise typer.BadParameter("--columns cannot be empty")
|
|
58
|
+
return parsed
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def build_sql_config(
|
|
62
|
+
server: str,
|
|
63
|
+
database: str,
|
|
64
|
+
username: str | None,
|
|
65
|
+
password: str | None,
|
|
66
|
+
driver: str = DRIVER_DEFAULT,
|
|
67
|
+
trusted_connection: bool = TRUSTED_CONNECTION_DEFAULT,
|
|
68
|
+
trust_cert: bool = TRUST_CERT_DEFAULT,
|
|
69
|
+
encrypt: bool = ENCRYPT_DEFAULT,
|
|
70
|
+
retries: int = RETRIES_DEFAULT,
|
|
71
|
+
packet_size: int = PACKET_SIZE_DEFAULT,
|
|
72
|
+
) -> SqlConfig:
|
|
73
|
+
if not trusted_connection:
|
|
74
|
+
if not username:
|
|
75
|
+
raise typer.BadParameter(
|
|
76
|
+
"--username is required unless --trusted-connection is used"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Prompt for password if not provided
|
|
80
|
+
if not password:
|
|
81
|
+
password = getpass.getpass("Enter your password: ")
|
|
82
|
+
|
|
83
|
+
return SqlConfig(
|
|
84
|
+
server=server,
|
|
85
|
+
database=database,
|
|
86
|
+
username=username,
|
|
87
|
+
password=password,
|
|
88
|
+
driver=driver,
|
|
89
|
+
trusted_connection=trusted_connection,
|
|
90
|
+
trust_cert=trust_cert,
|
|
91
|
+
encrypt=encrypt,
|
|
92
|
+
retries=retries,
|
|
93
|
+
packet_size=packet_size,
|
|
94
|
+
)
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from pybutt.cli.app import (
|
|
6
|
+
app,
|
|
7
|
+
build_sql_config,
|
|
8
|
+
)
|
|
9
|
+
from pybutt.core.config import (
|
|
10
|
+
DRIVER_DEFAULT,
|
|
11
|
+
ENCRYPT_DEFAULT,
|
|
12
|
+
PACKET_SIZE_DEFAULT,
|
|
13
|
+
RETRIES_DEFAULT,
|
|
14
|
+
ROWGROUP_SIZE_DEFAULT,
|
|
15
|
+
SCHEMA_DEFAULT,
|
|
16
|
+
TRUST_CERT_DEFAULT,
|
|
17
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
18
|
+
)
|
|
19
|
+
from pybutt.core.logobs import configure_logging, get_logger
|
|
20
|
+
from pybutt.exceptions import PyButtError
|
|
21
|
+
from pybutt.files import (
|
|
22
|
+
combine_parquet_files,
|
|
23
|
+
load_manifest,
|
|
24
|
+
write_manifest,
|
|
25
|
+
)
|
|
26
|
+
from pybutt.io.combiner import TableCombine
|
|
27
|
+
|
|
28
|
+
logger = get_logger("cli.combine")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@app.command(
|
|
32
|
+
"combine",
|
|
33
|
+
help=(
|
|
34
|
+
"Combine objects listed in a manifest. "
|
|
35
|
+
"For file manifests, concatenate Parquet files to a single output. "
|
|
36
|
+
"For table manifests, insert from SQL tables into a single target table."
|
|
37
|
+
),
|
|
38
|
+
)
|
|
39
|
+
def combine(
|
|
40
|
+
manifest_path: Path = typer.Argument( # noqa: B008
|
|
41
|
+
..., help="Path to the input manifest file"
|
|
42
|
+
), # noqa: B008
|
|
43
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
44
|
+
False,
|
|
45
|
+
"--verbose",
|
|
46
|
+
"-V",
|
|
47
|
+
help="Show verbose logging output.",
|
|
48
|
+
),
|
|
49
|
+
server: str | None = typer.Option( # noqa: B008
|
|
50
|
+
None,
|
|
51
|
+
"--server",
|
|
52
|
+
"-s",
|
|
53
|
+
help="SQL Server host.",
|
|
54
|
+
rich_help_panel="Server Connection Options",
|
|
55
|
+
),
|
|
56
|
+
database: str | None = typer.Option( # noqa: B008
|
|
57
|
+
None,
|
|
58
|
+
"--database",
|
|
59
|
+
"-d",
|
|
60
|
+
help="Target database.",
|
|
61
|
+
rich_help_panel="Server Connection Options",
|
|
62
|
+
),
|
|
63
|
+
driver: str = typer.Option( # noqa: B008
|
|
64
|
+
DRIVER_DEFAULT,
|
|
65
|
+
"--driver",
|
|
66
|
+
"-D",
|
|
67
|
+
help="ODBC driver name.",
|
|
68
|
+
rich_help_panel="Server Connection Options",
|
|
69
|
+
),
|
|
70
|
+
schema: str = typer.Option( # noqa: B008
|
|
71
|
+
SCHEMA_DEFAULT,
|
|
72
|
+
"--schema",
|
|
73
|
+
"-S",
|
|
74
|
+
help="Target schema.",
|
|
75
|
+
rich_help_panel="SQL Data Object Options",
|
|
76
|
+
),
|
|
77
|
+
table: str | None = typer.Option( # noqa: B008
|
|
78
|
+
None,
|
|
79
|
+
"--table",
|
|
80
|
+
"-t",
|
|
81
|
+
help="Target table.",
|
|
82
|
+
rich_help_panel="SQL Data Object Options",
|
|
83
|
+
),
|
|
84
|
+
username: str | None = typer.Option( # noqa: B008
|
|
85
|
+
None,
|
|
86
|
+
"--username",
|
|
87
|
+
"-u",
|
|
88
|
+
help="SQL Server username when not using trusted connection.",
|
|
89
|
+
rich_help_panel="Server Security Options",
|
|
90
|
+
),
|
|
91
|
+
password: str | None = typer.Option( # noqa: B008
|
|
92
|
+
None,
|
|
93
|
+
"--password",
|
|
94
|
+
"-p",
|
|
95
|
+
help="SQL Server password when not using trusted connection.",
|
|
96
|
+
rich_help_panel="Server Security Options",
|
|
97
|
+
),
|
|
98
|
+
trusted_connection: bool = typer.Option( # noqa: B008
|
|
99
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
100
|
+
"--trusted-connection",
|
|
101
|
+
"-T",
|
|
102
|
+
help="Use integrated Windows authentication instead of username/password.",
|
|
103
|
+
rich_help_panel="Server Security Options",
|
|
104
|
+
),
|
|
105
|
+
trust_cert: bool = typer.Option( # noqa: B008
|
|
106
|
+
TRUST_CERT_DEFAULT,
|
|
107
|
+
"--trust-cert",
|
|
108
|
+
"-c",
|
|
109
|
+
help="Trust the SQL Server TLS certificate.",
|
|
110
|
+
rich_help_panel="Server Security Options",
|
|
111
|
+
),
|
|
112
|
+
encrypt: bool = typer.Option( # noqa: B008
|
|
113
|
+
ENCRYPT_DEFAULT,
|
|
114
|
+
"--encrypt/--no-encrypt",
|
|
115
|
+
help="Enable or disable SQL Server encrypted transport.",
|
|
116
|
+
rich_help_panel="Server Security Options",
|
|
117
|
+
),
|
|
118
|
+
output_file: Path | None = typer.Option( # noqa: B008
|
|
119
|
+
None,
|
|
120
|
+
"--output-file",
|
|
121
|
+
"-o",
|
|
122
|
+
help="Output Parquet file when combining files.",
|
|
123
|
+
rich_help_panel="File Options",
|
|
124
|
+
file_okay=True,
|
|
125
|
+
dir_okay=False,
|
|
126
|
+
),
|
|
127
|
+
rowgroup_size: int = typer.Option( # noqa: B008
|
|
128
|
+
ROWGROUP_SIZE_DEFAULT,
|
|
129
|
+
"--rowgroup-size",
|
|
130
|
+
"-R",
|
|
131
|
+
help="Rowgroup size for output.",
|
|
132
|
+
rich_help_panel="File Options",
|
|
133
|
+
),
|
|
134
|
+
output_manifest_filename: str | None = typer.Option( # noqa: B008
|
|
135
|
+
None,
|
|
136
|
+
"--combined-manifest-filename",
|
|
137
|
+
"-m",
|
|
138
|
+
help=(
|
|
139
|
+
"Override the combined manifest filename for the written file. Defaults"
|
|
140
|
+
" to <manifest-filename>-combined.json."
|
|
141
|
+
),
|
|
142
|
+
rich_help_panel="File Options",
|
|
143
|
+
),
|
|
144
|
+
retries: int = typer.Option( # noqa: B008
|
|
145
|
+
RETRIES_DEFAULT,
|
|
146
|
+
"--retries",
|
|
147
|
+
"-r",
|
|
148
|
+
help="Number of retry attempts for transient SQL errors.",
|
|
149
|
+
rich_help_panel="Transport Tuning Options",
|
|
150
|
+
min=1,
|
|
151
|
+
),
|
|
152
|
+
packet_size: int = typer.Option( # noqa: B008
|
|
153
|
+
PACKET_SIZE_DEFAULT,
|
|
154
|
+
"--packet-size",
|
|
155
|
+
help=(
|
|
156
|
+
"TDS packet size in bytes (512-32767). "
|
|
157
|
+
"Note: encrypted connections are capped at 16383."
|
|
158
|
+
),
|
|
159
|
+
rich_help_panel="Transport Tuning Options",
|
|
160
|
+
min=512,
|
|
161
|
+
max=32767,
|
|
162
|
+
),
|
|
163
|
+
) -> None:
|
|
164
|
+
"""Combine objects listed in a manifest.
|
|
165
|
+
|
|
166
|
+
For file manifests, this command concatenates Parquet files into a single output.
|
|
167
|
+
For table manifests, it inserts from SQL tables into a single target table.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
configure_logging(verbose)
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
manifest = load_manifest(manifest_path)
|
|
174
|
+
except PyButtError as exc:
|
|
175
|
+
typer.secho(f"Combine failed: {exc}", fg=typer.colors.RED, err=True)
|
|
176
|
+
raise SystemExit(1) from exc
|
|
177
|
+
|
|
178
|
+
if manifest["type"] == "files":
|
|
179
|
+
if output_file is None:
|
|
180
|
+
raise typer.BadParameter("--output-file is required for file manifests")
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
combine_parquet_files(
|
|
184
|
+
manifest_path,
|
|
185
|
+
output_file,
|
|
186
|
+
rowgroup_size,
|
|
187
|
+
)
|
|
188
|
+
except PyButtError as exc:
|
|
189
|
+
typer.secho(f"Combine failed: {exc}", fg=typer.colors.RED, err=True)
|
|
190
|
+
raise SystemExit(1) from exc
|
|
191
|
+
typer.secho("File combine completed successfully.", fg=typer.colors.GREEN)
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
# tables manifest
|
|
195
|
+
if manifest["type"] == "tables":
|
|
196
|
+
if not (server and database and schema and table):
|
|
197
|
+
raise typer.BadParameter(
|
|
198
|
+
"--server, --database, --schema and "
|
|
199
|
+
"--table are required for table manifests"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
config = build_sql_config(
|
|
203
|
+
server=server,
|
|
204
|
+
database=database,
|
|
205
|
+
username=username,
|
|
206
|
+
password=password,
|
|
207
|
+
driver=driver,
|
|
208
|
+
trusted_connection=trusted_connection,
|
|
209
|
+
trust_cert=trust_cert,
|
|
210
|
+
encrypt=encrypt,
|
|
211
|
+
retries=retries,
|
|
212
|
+
packet_size=packet_size,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
combiner = TableCombine(
|
|
217
|
+
config=config, sources=manifest["entries"], schema=schema, table=table
|
|
218
|
+
)
|
|
219
|
+
combiner.combine()
|
|
220
|
+
except PyButtError as exc:
|
|
221
|
+
typer.secho(f"Combine failed: {exc}", fg=typer.colors.RED, err=True)
|
|
222
|
+
raise SystemExit(1) from exc
|
|
223
|
+
|
|
224
|
+
# ToDo: Review where this should be, and consider
|
|
225
|
+
# adding user override for path and filename
|
|
226
|
+
new_manifest_name = f"{manifest_path.stem}_combined{manifest_path.suffix}"
|
|
227
|
+
write_manifest(
|
|
228
|
+
manifest_path.parent / new_manifest_name,
|
|
229
|
+
[f"{schema}.{table}"],
|
|
230
|
+
manifest_type="tables",
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
typer.secho("Table combine completed successfully.", fg=typer.colors.GREEN)
|
|
234
|
+
return
|
|
235
|
+
|
|
236
|
+
raise typer.BadParameter(f"Unsupported manifest type: {manifest['type']}")
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from pybutt.cli.app import (
|
|
6
|
+
app,
|
|
7
|
+
build_sql_config,
|
|
8
|
+
parse_columns,
|
|
9
|
+
)
|
|
10
|
+
from pybutt.core.config import (
|
|
11
|
+
DRIVER_DEFAULT,
|
|
12
|
+
ENCRYPT_DEFAULT,
|
|
13
|
+
EXPORT_ENGINE_DEFAULT,
|
|
14
|
+
FETCH_SIZE_DEFAULT,
|
|
15
|
+
MEM_COOLDOWN_DEFAULT,
|
|
16
|
+
MEM_HEARTBEAT_DEFAULT,
|
|
17
|
+
MEM_MAX_WAIT_DEFAULT,
|
|
18
|
+
MEM_SLEEP_DEFAULT,
|
|
19
|
+
MEM_THRESHOLD_DEFAULT,
|
|
20
|
+
PACKET_SIZE_DEFAULT,
|
|
21
|
+
RETRIES_DEFAULT,
|
|
22
|
+
ROWGROUP_SIZE_DEFAULT,
|
|
23
|
+
SCHEMA_DEFAULT,
|
|
24
|
+
TRUST_CERT_DEFAULT,
|
|
25
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
26
|
+
)
|
|
27
|
+
from pybutt.core.logobs import configure_logging, get_logger
|
|
28
|
+
from pybutt.exceptions import PyButtError
|
|
29
|
+
from pybutt.io.exporter import Exporter
|
|
30
|
+
|
|
31
|
+
logger = get_logger("cli.export")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@app.command(
|
|
35
|
+
"export",
|
|
36
|
+
help=(
|
|
37
|
+
"Export a SQL Server table to Parquet and write a manifest of output "
|
|
38
|
+
"file names."
|
|
39
|
+
),
|
|
40
|
+
)
|
|
41
|
+
def export(
|
|
42
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
43
|
+
False,
|
|
44
|
+
"--verbose",
|
|
45
|
+
"-V",
|
|
46
|
+
help="Show verbose logging output.",
|
|
47
|
+
),
|
|
48
|
+
server: str = typer.Option( # noqa: B008
|
|
49
|
+
...,
|
|
50
|
+
"--server",
|
|
51
|
+
"-s",
|
|
52
|
+
help="SQL Server hostname or instance.",
|
|
53
|
+
rich_help_panel="Server Connection Options",
|
|
54
|
+
),
|
|
55
|
+
database: str = typer.Option( # noqa: B008
|
|
56
|
+
...,
|
|
57
|
+
"--database",
|
|
58
|
+
"-d",
|
|
59
|
+
help="Target SQL Server database.",
|
|
60
|
+
rich_help_panel="Server Connection Options",
|
|
61
|
+
),
|
|
62
|
+
engine: str = typer.Option( # noqa: B008
|
|
63
|
+
EXPORT_ENGINE_DEFAULT,
|
|
64
|
+
"--engine",
|
|
65
|
+
"-e",
|
|
66
|
+
help="Export engine to use: duckdb, pyodbc, or mssql-python.",
|
|
67
|
+
rich_help_panel="Server Connection Options",
|
|
68
|
+
),
|
|
69
|
+
driver: str = typer.Option( # noqa: B008
|
|
70
|
+
DRIVER_DEFAULT,
|
|
71
|
+
"--driver",
|
|
72
|
+
"-D",
|
|
73
|
+
help="ODBC driver to use.",
|
|
74
|
+
rich_help_panel="Server Connection Options",
|
|
75
|
+
),
|
|
76
|
+
schema: str = typer.Option( # noqa: B008
|
|
77
|
+
SCHEMA_DEFAULT,
|
|
78
|
+
"--schema",
|
|
79
|
+
"-S",
|
|
80
|
+
help="Target table schema.",
|
|
81
|
+
rich_help_panel="SQL Data Object Options",
|
|
82
|
+
),
|
|
83
|
+
table: str = typer.Option( # noqa: B008
|
|
84
|
+
...,
|
|
85
|
+
"--table",
|
|
86
|
+
"-t",
|
|
87
|
+
help="Target table name.",
|
|
88
|
+
rich_help_panel="SQL Data Object Options",
|
|
89
|
+
),
|
|
90
|
+
parameters: str | None = typer.Option( # noqa: B008
|
|
91
|
+
None,
|
|
92
|
+
"--parameters",
|
|
93
|
+
"-a",
|
|
94
|
+
help=(
|
|
95
|
+
"Comma-separated list of parameter values to pass to a table-valued "
|
|
96
|
+
"function. Example: --parameters 12,'fred','1989'."
|
|
97
|
+
),
|
|
98
|
+
rich_help_panel="SQL Data Object Options",
|
|
99
|
+
),
|
|
100
|
+
columns: str | None = typer.Option( # noqa: B008
|
|
101
|
+
None,
|
|
102
|
+
"--columns",
|
|
103
|
+
"-C",
|
|
104
|
+
help="Comma-separated list of columns to export. Defaults to all columns.",
|
|
105
|
+
rich_help_panel="SQL Data Object Options",
|
|
106
|
+
),
|
|
107
|
+
pk_column: str | None = typer.Option( # noqa: B008
|
|
108
|
+
None,
|
|
109
|
+
"--pk-column",
|
|
110
|
+
"-P",
|
|
111
|
+
help="Primary key column for deterministic partitioning.",
|
|
112
|
+
rich_help_panel="SQL Data Object Options",
|
|
113
|
+
),
|
|
114
|
+
username: str | None = typer.Option( # noqa: B008
|
|
115
|
+
None,
|
|
116
|
+
"--username",
|
|
117
|
+
"-u",
|
|
118
|
+
help="SQL Server username when not using trusted connection.",
|
|
119
|
+
rich_help_panel="Server Security Options",
|
|
120
|
+
),
|
|
121
|
+
password: str | None = typer.Option( # noqa: B008
|
|
122
|
+
None,
|
|
123
|
+
"--password",
|
|
124
|
+
"-p",
|
|
125
|
+
help="SQL Server password when not using trusted connection.",
|
|
126
|
+
rich_help_panel="Server Security Options",
|
|
127
|
+
),
|
|
128
|
+
trusted_connection: bool = typer.Option( # noqa: B008
|
|
129
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
130
|
+
"--trusted-connection",
|
|
131
|
+
"-T",
|
|
132
|
+
help="Use integrated Windows authentication instead of username/password.",
|
|
133
|
+
rich_help_panel="Server Security Options",
|
|
134
|
+
),
|
|
135
|
+
trust_cert: bool = typer.Option( # noqa: B008
|
|
136
|
+
TRUST_CERT_DEFAULT,
|
|
137
|
+
"--trust-cert",
|
|
138
|
+
"-c",
|
|
139
|
+
help="Trust the SQL Server TLS certificate.",
|
|
140
|
+
rich_help_panel="Server Security Options",
|
|
141
|
+
),
|
|
142
|
+
encrypt: bool = typer.Option( # noqa: B008
|
|
143
|
+
ENCRYPT_DEFAULT,
|
|
144
|
+
"--encrypt/--no-encrypt",
|
|
145
|
+
help="Enable or disable SQL Server encrypted transport.",
|
|
146
|
+
rich_help_panel="Server Security Options",
|
|
147
|
+
),
|
|
148
|
+
output_path: Path = typer.Option( # noqa: B008
|
|
149
|
+
...,
|
|
150
|
+
"--output-path",
|
|
151
|
+
"-o",
|
|
152
|
+
help="Directory to write Parquet files and manifest.",
|
|
153
|
+
rich_help_panel="File Options",
|
|
154
|
+
file_okay=False,
|
|
155
|
+
dir_okay=True,
|
|
156
|
+
writable=True,
|
|
157
|
+
),
|
|
158
|
+
manifest_filename: str | None = typer.Option(
|
|
159
|
+
None,
|
|
160
|
+
"--manifest-filename",
|
|
161
|
+
"-m",
|
|
162
|
+
help=(
|
|
163
|
+
"Manifest filename to write for export. Defaults to "
|
|
164
|
+
"<schema>_<table>_manifest.json."
|
|
165
|
+
),
|
|
166
|
+
rich_help_panel="File Options",
|
|
167
|
+
),
|
|
168
|
+
file_count: int = typer.Option( # noqa: B008
|
|
169
|
+
1,
|
|
170
|
+
"--file-count",
|
|
171
|
+
"-f",
|
|
172
|
+
help=("Number of Parquet output files. "),
|
|
173
|
+
rich_help_panel="File Options",
|
|
174
|
+
min=1,
|
|
175
|
+
),
|
|
176
|
+
fetch_size: int | None = typer.Option( # noqa: B008
|
|
177
|
+
FETCH_SIZE_DEFAULT,
|
|
178
|
+
"--fetch-size",
|
|
179
|
+
"-F",
|
|
180
|
+
help=("Cursor fetch size for pyodbc export."),
|
|
181
|
+
rich_help_panel="Transport Tuning Options",
|
|
182
|
+
min=1,
|
|
183
|
+
),
|
|
184
|
+
rowgroup_size: int = typer.Option( # noqa: B008
|
|
185
|
+
ROWGROUP_SIZE_DEFAULT,
|
|
186
|
+
"--rowgroup-size",
|
|
187
|
+
"-R",
|
|
188
|
+
help="Number of rows per rowgroup in the Parquet files.",
|
|
189
|
+
rich_help_panel="Transport Tuning Options",
|
|
190
|
+
min=1,
|
|
191
|
+
),
|
|
192
|
+
retries: int = typer.Option( # noqa: B008
|
|
193
|
+
RETRIES_DEFAULT,
|
|
194
|
+
"--retries",
|
|
195
|
+
"-r",
|
|
196
|
+
help="Number of retry attempts for transient SQL errors.",
|
|
197
|
+
rich_help_panel="Transport Tuning Options",
|
|
198
|
+
min=1,
|
|
199
|
+
),
|
|
200
|
+
packet_size: int = typer.Option( # noqa: B008
|
|
201
|
+
PACKET_SIZE_DEFAULT,
|
|
202
|
+
"--packet-size",
|
|
203
|
+
help=(
|
|
204
|
+
"TDS packet size in bytes (512-32767). "
|
|
205
|
+
"Note: encrypted connections are capped at 16383."
|
|
206
|
+
),
|
|
207
|
+
rich_help_panel="Transport Tuning Options",
|
|
208
|
+
min=512,
|
|
209
|
+
max=32767,
|
|
210
|
+
),
|
|
211
|
+
worker_count: int = typer.Option( # noqa: B008
|
|
212
|
+
1,
|
|
213
|
+
"--worker-count",
|
|
214
|
+
"-w",
|
|
215
|
+
help="Number of worker processes used for export.",
|
|
216
|
+
rich_help_panel="Transport Tuning Options",
|
|
217
|
+
min=1,
|
|
218
|
+
),
|
|
219
|
+
mem_heartbeat: float = typer.Option( # noqa: B008
|
|
220
|
+
MEM_HEARTBEAT_DEFAULT,
|
|
221
|
+
"--mem-heartbeat",
|
|
222
|
+
help=("Log process memory (RSS + system %) every N seconds."),
|
|
223
|
+
rich_help_panel="Memory Tuning Options",
|
|
224
|
+
min=0,
|
|
225
|
+
),
|
|
226
|
+
mem_threshold: float = typer.Option( # noqa: B008
|
|
227
|
+
MEM_THRESHOLD_DEFAULT,
|
|
228
|
+
"--mem-threshold",
|
|
229
|
+
help=(
|
|
230
|
+
"System memory % at which workers are throttled. "
|
|
231
|
+
"Set to 0 to disable throttling."
|
|
232
|
+
),
|
|
233
|
+
rich_help_panel="Memory Tuning Options",
|
|
234
|
+
min=0,
|
|
235
|
+
max=100,
|
|
236
|
+
),
|
|
237
|
+
mem_sleep: float = typer.Option( # noqa: B008
|
|
238
|
+
MEM_SLEEP_DEFAULT,
|
|
239
|
+
"--mem-sleep",
|
|
240
|
+
help=("Seconds to sleep per throttle check when memory is high. "),
|
|
241
|
+
rich_help_panel="Memory Tuning Options",
|
|
242
|
+
min=0.1,
|
|
243
|
+
),
|
|
244
|
+
mem_max_wait: float = typer.Option( # noqa: B008
|
|
245
|
+
MEM_MAX_WAIT_DEFAULT,
|
|
246
|
+
"--mem-max-wait",
|
|
247
|
+
help=("Max total seconds to wait during memory throttling before giving up."),
|
|
248
|
+
rich_help_panel="Memory Tuning Options",
|
|
249
|
+
min=0,
|
|
250
|
+
),
|
|
251
|
+
mem_cooldown: float = typer.Option( # noqa: B008
|
|
252
|
+
MEM_COOLDOWN_DEFAULT,
|
|
253
|
+
"--mem-cooldown",
|
|
254
|
+
help=(
|
|
255
|
+
"Seconds after a throttle event before re-checking. Prevents "
|
|
256
|
+
"the gate from serialising workers"
|
|
257
|
+
),
|
|
258
|
+
rich_help_panel="Memory Tuning Options",
|
|
259
|
+
min=0,
|
|
260
|
+
),
|
|
261
|
+
) -> None:
|
|
262
|
+
"""Export data from a SQL Server table to Parquet files.
|
|
263
|
+
|
|
264
|
+
The command writes one or more Parquet files into OUTPUT_PATH and
|
|
265
|
+
creates a manifest file listing the generated parquet file names.
|
|
266
|
+
"""
|
|
267
|
+
|
|
268
|
+
configure_logging(verbose)
|
|
269
|
+
|
|
270
|
+
if mem_threshold > 0:
|
|
271
|
+
logger.info(
|
|
272
|
+
"Memory throttling enabled: workers will sleep when system "
|
|
273
|
+
f"memory exceeds {mem_threshold:.0f}%% "
|
|
274
|
+
f"(--mem-threshold 0 to disable)"
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
config = build_sql_config(
|
|
278
|
+
server=server,
|
|
279
|
+
database=database,
|
|
280
|
+
username=username,
|
|
281
|
+
password=password,
|
|
282
|
+
driver=driver,
|
|
283
|
+
trusted_connection=trusted_connection,
|
|
284
|
+
trust_cert=trust_cert,
|
|
285
|
+
encrypt=encrypt,
|
|
286
|
+
retries=retries,
|
|
287
|
+
packet_size=packet_size,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
effective_file_count = file_count if file_count is not None else 1
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
exporter = Exporter(
|
|
294
|
+
config=config,
|
|
295
|
+
schema=schema,
|
|
296
|
+
table=table,
|
|
297
|
+
output_path=output_path,
|
|
298
|
+
pk_column=pk_column,
|
|
299
|
+
columns=parse_columns(columns),
|
|
300
|
+
worker_count=worker_count,
|
|
301
|
+
file_count=effective_file_count,
|
|
302
|
+
rowgroup_size=rowgroup_size,
|
|
303
|
+
fetch_size=fetch_size,
|
|
304
|
+
engine=engine.lower(),
|
|
305
|
+
manifest_filename=manifest_filename,
|
|
306
|
+
parameters=parameters,
|
|
307
|
+
mem_heartbeat=mem_heartbeat,
|
|
308
|
+
mem_threshold=mem_threshold,
|
|
309
|
+
mem_sleep=mem_sleep,
|
|
310
|
+
mem_max_wait=mem_max_wait,
|
|
311
|
+
mem_cooldown=mem_cooldown,
|
|
312
|
+
)
|
|
313
|
+
exporter.perform_work()
|
|
314
|
+
except PyButtError as exc:
|
|
315
|
+
typer.secho(f"Export failed: {exc}", fg=typer.colors.RED, err=True)
|
|
316
|
+
raise SystemExit(1) from exc
|
|
317
|
+
typer.secho("Export completed successfully.", fg=typer.colors.GREEN)
|