pybutt 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- old_tests/app.py +713 -0
- pybutt/__init__.py +17 -0
- pybutt/cli/__init__.py +11 -0
- pybutt/cli/app.py +94 -0
- pybutt/cli/combine_command.py +236 -0
- pybutt/cli/export_command.py +317 -0
- pybutt/cli/import_command.py +286 -0
- pybutt/cli/inspect_command.py +30 -0
- pybutt/cli/purge_command.py +235 -0
- pybutt/core/__init__.py +30 -0
- pybutt/core/base.py +124 -0
- pybutt/core/config.py +144 -0
- pybutt/core/logobs.py +445 -0
- pybutt/exceptions.py +82 -0
- pybutt/files/__init__.py +28 -0
- pybutt/files/combine.py +93 -0
- pybutt/files/inspect.py +51 -0
- pybutt/files/manifest.py +160 -0
- pybutt/io/__init__.py +6 -0
- pybutt/io/combiner.py +119 -0
- pybutt/io/exporter.py +612 -0
- pybutt/io/importer.py +928 -0
- pybutt/io/purger.py +44 -0
- pybutt-2.0.0.dist-info/METADATA +756 -0
- pybutt-2.0.0.dist-info/RECORD +39 -0
- pybutt-2.0.0.dist-info/WHEEL +5 -0
- pybutt-2.0.0.dist-info/entry_points.txt +2 -0
- pybutt-2.0.0.dist-info/licenses/LICENSE +21 -0
- pybutt-2.0.0.dist-info/top_level.txt +3 -0
- tests/conftest.py +22 -0
- tests/test_cli.py +979 -0
- tests/test_cli_help.py +130 -0
- tests/test_combiner.py +259 -0
- tests/test_core.py +1009 -0
- tests/test_exporter.py +637 -0
- tests/test_files.py +178 -0
- tests/test_import_retry_logic.py +837 -0
- tests/test_logobs.py +491 -0
- tests/test_purge.py +219 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from pybutt.cli.app import (
|
|
6
|
+
app,
|
|
7
|
+
build_sql_config,
|
|
8
|
+
)
|
|
9
|
+
from pybutt.core.config import (
|
|
10
|
+
BATCH_SIZE_DEFAULT,
|
|
11
|
+
CCI_DEFAULT,
|
|
12
|
+
DRIVER_DEFAULT,
|
|
13
|
+
ENCRYPT_DEFAULT,
|
|
14
|
+
IMPORT_ENGINE_DEFAULT,
|
|
15
|
+
MEM_COOLDOWN_DEFAULT,
|
|
16
|
+
MEM_HEARTBEAT_DEFAULT,
|
|
17
|
+
MEM_MAX_WAIT_DEFAULT,
|
|
18
|
+
MEM_SLEEP_DEFAULT,
|
|
19
|
+
MEM_THRESHOLD_DEFAULT,
|
|
20
|
+
PACKET_SIZE_DEFAULT,
|
|
21
|
+
RETRIES_DEFAULT,
|
|
22
|
+
SCHEMA_DEFAULT,
|
|
23
|
+
TRANSACTION_MODE_DEFAULT,
|
|
24
|
+
TRUST_CERT_DEFAULT,
|
|
25
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
26
|
+
TransactionMode,
|
|
27
|
+
)
|
|
28
|
+
from pybutt.core.logobs import configure_logging, get_logger
|
|
29
|
+
from pybutt.exceptions import PyButtError
|
|
30
|
+
from pybutt.io.importer import Importer
|
|
31
|
+
|
|
32
|
+
logger = get_logger("cli.import")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@app.command(
|
|
36
|
+
"import",
|
|
37
|
+
help=("Import Parquet files into a SQL Server table using a manifest file."),
|
|
38
|
+
)
|
|
39
|
+
def import_data(
|
|
40
|
+
manifest_path: Path = typer.Argument( # noqa: B008
|
|
41
|
+
..., help="Path to the input manifest file"
|
|
42
|
+
),
|
|
43
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
44
|
+
False,
|
|
45
|
+
"--verbose",
|
|
46
|
+
"-V",
|
|
47
|
+
help="Show verbose logging output.",
|
|
48
|
+
),
|
|
49
|
+
server: str = typer.Option( # noqa: B008
|
|
50
|
+
...,
|
|
51
|
+
"--server",
|
|
52
|
+
"-s",
|
|
53
|
+
help="SQL Server hostname or instance.",
|
|
54
|
+
rich_help_panel="Server Connection Options",
|
|
55
|
+
),
|
|
56
|
+
database: str = typer.Option( # noqa: B008
|
|
57
|
+
...,
|
|
58
|
+
"--database",
|
|
59
|
+
"-d",
|
|
60
|
+
help="Target SQL Server database.",
|
|
61
|
+
rich_help_panel="Server Connection Options",
|
|
62
|
+
),
|
|
63
|
+
driver: str = typer.Option( # noqa: B008
|
|
64
|
+
DRIVER_DEFAULT,
|
|
65
|
+
"--driver",
|
|
66
|
+
"-D",
|
|
67
|
+
help="ODBC driver name.",
|
|
68
|
+
rich_help_panel="Server Connection Options",
|
|
69
|
+
),
|
|
70
|
+
engine: str = typer.Option( # noqa: B008
|
|
71
|
+
IMPORT_ENGINE_DEFAULT,
|
|
72
|
+
"--engine",
|
|
73
|
+
"-e",
|
|
74
|
+
help="Import engine to use: duckdb, pyodbc, or mssql-python.",
|
|
75
|
+
rich_help_panel="Server Connection Options",
|
|
76
|
+
case_sensitive=False,
|
|
77
|
+
),
|
|
78
|
+
transaction_mode: TransactionMode = typer.Option( # noqa: B008
|
|
79
|
+
TRANSACTION_MODE_DEFAULT,
|
|
80
|
+
"--transaction-mode",
|
|
81
|
+
"-M",
|
|
82
|
+
help=(
|
|
83
|
+
"Transaction scope: batch (per batch), rowgroup (per row group, "
|
|
84
|
+
"recommended), file (entire file)."
|
|
85
|
+
),
|
|
86
|
+
rich_help_panel="Server Connection Options",
|
|
87
|
+
),
|
|
88
|
+
schema: str = typer.Option( # noqa: B008
|
|
89
|
+
SCHEMA_DEFAULT,
|
|
90
|
+
"--schema",
|
|
91
|
+
"-S",
|
|
92
|
+
help="Target table schema.",
|
|
93
|
+
rich_help_panel="SQL Data Object Options",
|
|
94
|
+
),
|
|
95
|
+
table: str = typer.Option( # noqa: B008
|
|
96
|
+
...,
|
|
97
|
+
"--table",
|
|
98
|
+
"-t",
|
|
99
|
+
help="Target table name.",
|
|
100
|
+
rich_help_panel="SQL Data Object Options",
|
|
101
|
+
),
|
|
102
|
+
cci: bool = typer.Option( # noqa: B008
|
|
103
|
+
CCI_DEFAULT,
|
|
104
|
+
"--cci/--no-cci",
|
|
105
|
+
help=(
|
|
106
|
+
"Create a clustered columnstore index on the per-worker temp tables "
|
|
107
|
+
"used during multi-worker import. Use --no-cci to keep the previous "
|
|
108
|
+
"heap behaviour. Enabled by default."
|
|
109
|
+
),
|
|
110
|
+
rich_help_panel="SQL Data Object Options",
|
|
111
|
+
),
|
|
112
|
+
username: str | None = typer.Option( # noqa: B008
|
|
113
|
+
None,
|
|
114
|
+
"--username",
|
|
115
|
+
"-u",
|
|
116
|
+
help="SQL Server username when not using trusted connection.",
|
|
117
|
+
rich_help_panel="Server Security Options",
|
|
118
|
+
),
|
|
119
|
+
password: str | None = typer.Option( # noqa: B008
|
|
120
|
+
None,
|
|
121
|
+
"--password",
|
|
122
|
+
"-p",
|
|
123
|
+
help="SQL Server password when not using trusted connection.",
|
|
124
|
+
rich_help_panel="Server Security Options",
|
|
125
|
+
),
|
|
126
|
+
trusted_connection: bool = typer.Option( # noqa: B008
|
|
127
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
128
|
+
"--trusted-connection",
|
|
129
|
+
"-T",
|
|
130
|
+
help="Use integrated Windows authentication instead of username/password.",
|
|
131
|
+
rich_help_panel="Server Security Options",
|
|
132
|
+
),
|
|
133
|
+
trust_cert: bool = typer.Option( # noqa: B008
|
|
134
|
+
TRUST_CERT_DEFAULT,
|
|
135
|
+
"--trust-cert",
|
|
136
|
+
"-c",
|
|
137
|
+
help="Trust the SQL Server TLS certificate.",
|
|
138
|
+
rich_help_panel="Server Security Options",
|
|
139
|
+
),
|
|
140
|
+
encrypt: bool = typer.Option( # noqa: B008
|
|
141
|
+
ENCRYPT_DEFAULT,
|
|
142
|
+
"--encrypt/--no-encrypt",
|
|
143
|
+
help="Enable or disable SQL Server encrypted transport.",
|
|
144
|
+
rich_help_panel="Server Security Options",
|
|
145
|
+
),
|
|
146
|
+
temp_manifest_filename: str | None = typer.Option( # noqa: B008
|
|
147
|
+
None,
|
|
148
|
+
"--imported-manifest-filename",
|
|
149
|
+
"-o",
|
|
150
|
+
help=(
|
|
151
|
+
"Override the import worker manifest filename written during "
|
|
152
|
+
"multi-worker import. Defaults to <schema>_<table>_import_manifest.json."
|
|
153
|
+
),
|
|
154
|
+
rich_help_panel="File Options",
|
|
155
|
+
),
|
|
156
|
+
batch_size: int | None = typer.Option( # noqa: B008
|
|
157
|
+
BATCH_SIZE_DEFAULT,
|
|
158
|
+
"--batch-size",
|
|
159
|
+
"-b",
|
|
160
|
+
help="Rows per batch insert.",
|
|
161
|
+
rich_help_panel="Transport Tuning Options",
|
|
162
|
+
min=1,
|
|
163
|
+
),
|
|
164
|
+
retries: int = typer.Option( # noqa: B008
|
|
165
|
+
RETRIES_DEFAULT,
|
|
166
|
+
"--retries",
|
|
167
|
+
"-r",
|
|
168
|
+
help="Number of retry attempts for transient SQL errors.",
|
|
169
|
+
rich_help_panel="Transport Tuning Options",
|
|
170
|
+
min=1,
|
|
171
|
+
),
|
|
172
|
+
packet_size: int = typer.Option( # noqa: B008
|
|
173
|
+
PACKET_SIZE_DEFAULT,
|
|
174
|
+
"--packet-size",
|
|
175
|
+
help=(
|
|
176
|
+
"TDS packet size in bytes (512-32767). "
|
|
177
|
+
"Note: encrypted connections are capped at 16383."
|
|
178
|
+
),
|
|
179
|
+
rich_help_panel="Transport Tuning Options",
|
|
180
|
+
min=512,
|
|
181
|
+
max=32767,
|
|
182
|
+
),
|
|
183
|
+
worker_count: int = typer.Option( # noqa: B008
|
|
184
|
+
1,
|
|
185
|
+
"--worker-count",
|
|
186
|
+
"-w",
|
|
187
|
+
help="Number of parallel import threads.",
|
|
188
|
+
rich_help_panel="Transport Tuning Options",
|
|
189
|
+
min=1,
|
|
190
|
+
),
|
|
191
|
+
mem_heartbeat: float = typer.Option( # noqa: B008
|
|
192
|
+
MEM_HEARTBEAT_DEFAULT,
|
|
193
|
+
"--mem-heartbeat",
|
|
194
|
+
help=("Log process memory (RSS + system %) every N seconds."),
|
|
195
|
+
rich_help_panel="Memory Tuning Options",
|
|
196
|
+
min=0,
|
|
197
|
+
),
|
|
198
|
+
mem_threshold: float = typer.Option( # noqa: B008
|
|
199
|
+
MEM_THRESHOLD_DEFAULT,
|
|
200
|
+
"--mem-threshold",
|
|
201
|
+
help=(
|
|
202
|
+
"System memory % at which workers are throttled. "
|
|
203
|
+
"Set to 0 to disable throttling."
|
|
204
|
+
),
|
|
205
|
+
rich_help_panel="Memory Tuning Options",
|
|
206
|
+
min=0,
|
|
207
|
+
max=100,
|
|
208
|
+
),
|
|
209
|
+
mem_sleep: float = typer.Option( # noqa: B008
|
|
210
|
+
MEM_SLEEP_DEFAULT,
|
|
211
|
+
"--mem-sleep",
|
|
212
|
+
help=("Seconds to sleep per throttle check when memory is high. "),
|
|
213
|
+
rich_help_panel="Memory Tuning Options",
|
|
214
|
+
min=0.1,
|
|
215
|
+
),
|
|
216
|
+
mem_max_wait: float = typer.Option( # noqa: B008
|
|
217
|
+
MEM_MAX_WAIT_DEFAULT,
|
|
218
|
+
"--mem-max-wait",
|
|
219
|
+
help=("Max total seconds to wait during memory throttling before giving up."),
|
|
220
|
+
rich_help_panel="Memory Tuning Options",
|
|
221
|
+
min=0,
|
|
222
|
+
),
|
|
223
|
+
mem_cooldown: float = typer.Option( # noqa: B008
|
|
224
|
+
MEM_COOLDOWN_DEFAULT,
|
|
225
|
+
"--mem-cooldown",
|
|
226
|
+
help=(
|
|
227
|
+
"Seconds after a throttle event before re-checking. Prevents "
|
|
228
|
+
"the gate from serialising workers"
|
|
229
|
+
),
|
|
230
|
+
rich_help_panel="Memory Tuning Options",
|
|
231
|
+
min=0,
|
|
232
|
+
),
|
|
233
|
+
) -> None:
|
|
234
|
+
"""Import one or more Parquet files into SQL Server tables.
|
|
235
|
+
|
|
236
|
+
The command reads the manifest file and imports each Parquet file into the
|
|
237
|
+
target table. If the number of workers is greater than 1, the import will be
|
|
238
|
+
done using multiple tables created to the same data schema as the target table.
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
configure_logging(verbose)
|
|
242
|
+
|
|
243
|
+
if mem_threshold > 0:
|
|
244
|
+
logger.info(
|
|
245
|
+
"Memory throttling enabled: threads will sleep when system "
|
|
246
|
+
f"memory exceeds {mem_threshold:.0f}%% "
|
|
247
|
+
f"(--mem-threshold 0 to disable)"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
config = build_sql_config(
|
|
251
|
+
server=server,
|
|
252
|
+
database=database,
|
|
253
|
+
username=username,
|
|
254
|
+
password=password,
|
|
255
|
+
driver=driver,
|
|
256
|
+
trusted_connection=trusted_connection,
|
|
257
|
+
trust_cert=trust_cert,
|
|
258
|
+
encrypt=encrypt,
|
|
259
|
+
retries=retries,
|
|
260
|
+
packet_size=packet_size,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
try:
|
|
264
|
+
importer = Importer(
|
|
265
|
+
config=config,
|
|
266
|
+
schema=schema,
|
|
267
|
+
table=table,
|
|
268
|
+
input_path=manifest_path.parent,
|
|
269
|
+
manifest_filename=manifest_path.name,
|
|
270
|
+
worker_count=worker_count,
|
|
271
|
+
batch_size=batch_size,
|
|
272
|
+
transaction_mode=transaction_mode,
|
|
273
|
+
engine=engine.lower(),
|
|
274
|
+
temp_manifest_filename=temp_manifest_filename,
|
|
275
|
+
create_cci=cci,
|
|
276
|
+
mem_heartbeat=mem_heartbeat,
|
|
277
|
+
mem_threshold=mem_threshold,
|
|
278
|
+
mem_sleep=mem_sleep,
|
|
279
|
+
mem_max_wait=mem_max_wait,
|
|
280
|
+
mem_cooldown=mem_cooldown,
|
|
281
|
+
)
|
|
282
|
+
importer.perform_work()
|
|
283
|
+
except PyButtError as exc:
|
|
284
|
+
typer.secho(f"Import failed: {exc}", fg=typer.colors.RED, err=True)
|
|
285
|
+
raise SystemExit(1) from exc
|
|
286
|
+
typer.secho("Import completed successfully.", fg=typer.colors.GREEN)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from pybutt.cli.app import app
|
|
6
|
+
from pybutt.core.logobs import get_logger
|
|
7
|
+
from pybutt.files import inspect_manifest
|
|
8
|
+
|
|
9
|
+
logger = get_logger("cli.inspect")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@app.command(
|
|
13
|
+
"inspect",
|
|
14
|
+
help=(
|
|
15
|
+
"Inspect Parquet files listed in a manifest. "
|
|
16
|
+
"Shows file-level metadata and optionally column-level details."
|
|
17
|
+
),
|
|
18
|
+
)
|
|
19
|
+
def inspect(
|
|
20
|
+
manifest: Path = typer.Argument( # noqa: B008
|
|
21
|
+
..., help="Path to the input manifest file"
|
|
22
|
+
),
|
|
23
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
24
|
+
False, "--verbose", "-V", help="Show column details"
|
|
25
|
+
),
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Inspect parquet files listed in a manifest.
|
|
29
|
+
"""
|
|
30
|
+
inspect_manifest(manifest, verbose)
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from pybutt.cli.app import app, build_sql_config
|
|
6
|
+
from pybutt.core.config import (
|
|
7
|
+
DRIVER_DEFAULT,
|
|
8
|
+
ENCRYPT_DEFAULT,
|
|
9
|
+
PACKET_SIZE_DEFAULT,
|
|
10
|
+
RETRIES_DEFAULT,
|
|
11
|
+
TRUST_CERT_DEFAULT,
|
|
12
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
13
|
+
)
|
|
14
|
+
from pybutt.core.logobs import configure_logging, get_logger
|
|
15
|
+
from pybutt.exceptions import PyButtError
|
|
16
|
+
from pybutt.files import load_manifest
|
|
17
|
+
from pybutt.io.purger import TablePurger
|
|
18
|
+
|
|
19
|
+
logger = get_logger("cli.purge")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@app.command(
|
|
23
|
+
"purge",
|
|
24
|
+
help=(
|
|
25
|
+
"Purge objects listed in a manifest. "
|
|
26
|
+
"For file manifests, deletes each Parquet file then removes the manifest. "
|
|
27
|
+
"For table manifests, drops each SQL table then removes the manifest."
|
|
28
|
+
),
|
|
29
|
+
)
|
|
30
|
+
def purge(
|
|
31
|
+
manifest_path: Path = typer.Argument( # noqa: B008
|
|
32
|
+
..., help="Path to the input manifest file."
|
|
33
|
+
),
|
|
34
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
35
|
+
False,
|
|
36
|
+
"--verbose",
|
|
37
|
+
"-V",
|
|
38
|
+
help="Show verbose logging output.",
|
|
39
|
+
),
|
|
40
|
+
server: str | None = typer.Option( # noqa: B008
|
|
41
|
+
None,
|
|
42
|
+
"--server",
|
|
43
|
+
"-s",
|
|
44
|
+
help="SQL Server host (required for table manifests).",
|
|
45
|
+
rich_help_panel="Server Connection Options",
|
|
46
|
+
),
|
|
47
|
+
database: str | None = typer.Option( # noqa: B008
|
|
48
|
+
None,
|
|
49
|
+
"--database",
|
|
50
|
+
"-d",
|
|
51
|
+
help="Target database (required for table manifests).",
|
|
52
|
+
rich_help_panel="Server Connection Options",
|
|
53
|
+
),
|
|
54
|
+
driver: str = typer.Option( # noqa: B008
|
|
55
|
+
DRIVER_DEFAULT,
|
|
56
|
+
"--driver",
|
|
57
|
+
"-D",
|
|
58
|
+
help="ODBC driver name.",
|
|
59
|
+
rich_help_panel="Server Connection Options",
|
|
60
|
+
),
|
|
61
|
+
username: str | None = typer.Option( # noqa: B008
|
|
62
|
+
None,
|
|
63
|
+
"--username",
|
|
64
|
+
"-u",
|
|
65
|
+
help="SQL Server username when not using trusted connection.",
|
|
66
|
+
rich_help_panel="Server Security Options",
|
|
67
|
+
),
|
|
68
|
+
password: str | None = typer.Option( # noqa: B008
|
|
69
|
+
None,
|
|
70
|
+
"--password",
|
|
71
|
+
"-p",
|
|
72
|
+
help="SQL Server password when not using trusted connection.",
|
|
73
|
+
rich_help_panel="Server Security Options",
|
|
74
|
+
),
|
|
75
|
+
trusted_connection: bool = typer.Option( # noqa: B008
|
|
76
|
+
TRUSTED_CONNECTION_DEFAULT,
|
|
77
|
+
"--trusted-connection",
|
|
78
|
+
"-T",
|
|
79
|
+
help="Use integrated Windows authentication instead of username/password.",
|
|
80
|
+
rich_help_panel="Server Security Options",
|
|
81
|
+
),
|
|
82
|
+
trust_cert: bool = typer.Option( # noqa: B008
|
|
83
|
+
TRUST_CERT_DEFAULT,
|
|
84
|
+
"--trust-cert",
|
|
85
|
+
"-c",
|
|
86
|
+
help="Trust the SQL Server TLS certificate.",
|
|
87
|
+
rich_help_panel="Server Security Options",
|
|
88
|
+
),
|
|
89
|
+
encrypt: bool = typer.Option( # noqa: B008
|
|
90
|
+
ENCRYPT_DEFAULT,
|
|
91
|
+
"--encrypt/--no-encrypt",
|
|
92
|
+
help="Enable or disable SQL Server encrypted transport.",
|
|
93
|
+
rich_help_panel="Server Security Options",
|
|
94
|
+
),
|
|
95
|
+
retries: int = typer.Option( # noqa: B008
|
|
96
|
+
RETRIES_DEFAULT,
|
|
97
|
+
"--retries",
|
|
98
|
+
"-r",
|
|
99
|
+
help="Number of retry attempts for transient SQL errors.",
|
|
100
|
+
rich_help_panel="Transport Tuning Options",
|
|
101
|
+
min=1,
|
|
102
|
+
),
|
|
103
|
+
packet_size: int = typer.Option( # noqa: B008
|
|
104
|
+
PACKET_SIZE_DEFAULT,
|
|
105
|
+
"--packet-size",
|
|
106
|
+
help=(
|
|
107
|
+
"TDS packet size in bytes (512-32767). "
|
|
108
|
+
"Note: encrypted connections are capped at 16383."
|
|
109
|
+
),
|
|
110
|
+
rich_help_panel="Transport Tuning Options",
|
|
111
|
+
min=512,
|
|
112
|
+
max=32767,
|
|
113
|
+
),
|
|
114
|
+
) -> None:
|
|
115
|
+
"""Purge objects listed in a manifest and delete the manifest file."""
|
|
116
|
+
|
|
117
|
+
configure_logging(verbose)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
manifest = load_manifest(manifest_path)
|
|
121
|
+
except PyButtError as exc:
|
|
122
|
+
typer.secho(f"Purge failed: {exc}", fg=typer.colors.RED, err=True)
|
|
123
|
+
raise SystemExit(1) from exc
|
|
124
|
+
|
|
125
|
+
if manifest["type"] == "files":
|
|
126
|
+
_purge_files(manifest, manifest_path)
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
if manifest["type"] == "tables":
|
|
130
|
+
_purge_tables(
|
|
131
|
+
manifest,
|
|
132
|
+
manifest_path,
|
|
133
|
+
server=server,
|
|
134
|
+
database=database,
|
|
135
|
+
driver=driver,
|
|
136
|
+
username=username,
|
|
137
|
+
password=password,
|
|
138
|
+
trusted_connection=trusted_connection,
|
|
139
|
+
trust_cert=trust_cert,
|
|
140
|
+
encrypt=encrypt,
|
|
141
|
+
retries=retries,
|
|
142
|
+
packet_size=packet_size,
|
|
143
|
+
)
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
typer.secho(
|
|
147
|
+
f"Purge failed: unsupported manifest type '{manifest['type']}'",
|
|
148
|
+
fg=typer.colors.RED,
|
|
149
|
+
err=True,
|
|
150
|
+
)
|
|
151
|
+
raise SystemExit(1)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _purge_files(manifest: dict, manifest_path: Path) -> None:
|
|
155
|
+
"""Delete Parquet files listed in the manifest, then delete the manifest."""
|
|
156
|
+
base_dir = manifest_path.parent
|
|
157
|
+
entries = manifest["entries"]
|
|
158
|
+
deleted = 0
|
|
159
|
+
missing = 0
|
|
160
|
+
|
|
161
|
+
for entry in entries:
|
|
162
|
+
filepath = base_dir / entry
|
|
163
|
+
if filepath.exists():
|
|
164
|
+
filepath.unlink()
|
|
165
|
+
logger.info(f"Deleted file: {filepath}")
|
|
166
|
+
deleted += 1
|
|
167
|
+
else:
|
|
168
|
+
logger.warning(f"File not found (skipping): {filepath}")
|
|
169
|
+
missing += 1
|
|
170
|
+
|
|
171
|
+
manifest_path.unlink()
|
|
172
|
+
logger.info(f"Deleted manifest: {manifest_path}")
|
|
173
|
+
|
|
174
|
+
summary = f"Purge complete: {deleted} file(s) deleted"
|
|
175
|
+
if missing:
|
|
176
|
+
summary += f", {missing} file(s) not found (skipped)"
|
|
177
|
+
summary += ", manifest removed."
|
|
178
|
+
typer.secho(summary, fg=typer.colors.GREEN)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _purge_tables(
|
|
182
|
+
manifest: dict,
|
|
183
|
+
manifest_path: Path,
|
|
184
|
+
*,
|
|
185
|
+
server: str | None,
|
|
186
|
+
database: str | None,
|
|
187
|
+
driver: str,
|
|
188
|
+
username: str | None,
|
|
189
|
+
password: str | None,
|
|
190
|
+
trusted_connection: bool,
|
|
191
|
+
trust_cert: bool,
|
|
192
|
+
encrypt: bool,
|
|
193
|
+
retries: int,
|
|
194
|
+
packet_size: int,
|
|
195
|
+
) -> None:
|
|
196
|
+
"""Drop SQL tables listed in the manifest, then delete the manifest."""
|
|
197
|
+
if not (server and database):
|
|
198
|
+
raise typer.BadParameter(
|
|
199
|
+
"--server and --database are required for table manifests"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
entries = manifest["entries"]
|
|
203
|
+
if not entries:
|
|
204
|
+
typer.secho("No tables to purge.", fg=typer.colors.YELLOW)
|
|
205
|
+
manifest_path.unlink()
|
|
206
|
+
logger.info(f"Deleted manifest: {manifest_path}")
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
config = build_sql_config(
|
|
210
|
+
server=server,
|
|
211
|
+
database=database,
|
|
212
|
+
username=username,
|
|
213
|
+
password=password,
|
|
214
|
+
driver=driver,
|
|
215
|
+
trusted_connection=trusted_connection,
|
|
216
|
+
trust_cert=trust_cert,
|
|
217
|
+
encrypt=encrypt,
|
|
218
|
+
retries=retries,
|
|
219
|
+
packet_size=packet_size,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
purger = TablePurger(config=config, sources=entries)
|
|
224
|
+
dropped = purger.purge()
|
|
225
|
+
except PyButtError as exc:
|
|
226
|
+
typer.secho(f"Purge failed: {exc}", fg=typer.colors.RED, err=True)
|
|
227
|
+
raise SystemExit(1) from exc
|
|
228
|
+
|
|
229
|
+
manifest_path.unlink()
|
|
230
|
+
logger.info(f"Deleted manifest: {manifest_path}")
|
|
231
|
+
|
|
232
|
+
typer.secho(
|
|
233
|
+
f"Purge complete: {len(dropped)} table(s) dropped, manifest removed.",
|
|
234
|
+
fg=typer.colors.GREEN,
|
|
235
|
+
)
|
pybutt/core/__init__.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .config import (
|
|
2
|
+
SqlConfig,
|
|
3
|
+
TransactionMode,
|
|
4
|
+
coerce_transaction_mode,
|
|
5
|
+
quote_identifier,
|
|
6
|
+
sanitise_dsn_value,
|
|
7
|
+
validate_engine,
|
|
8
|
+
validate_identifier,
|
|
9
|
+
validate_parameters,
|
|
10
|
+
)
|
|
11
|
+
from .logobs import (
|
|
12
|
+
configure_logging,
|
|
13
|
+
get_logger,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
# Config - types
|
|
18
|
+
"SqlConfig",
|
|
19
|
+
"TransactionMode",
|
|
20
|
+
# Config - validators
|
|
21
|
+
"coerce_transaction_mode",
|
|
22
|
+
"quote_identifier",
|
|
23
|
+
"sanitise_dsn_value",
|
|
24
|
+
"validate_engine",
|
|
25
|
+
"validate_parameters",
|
|
26
|
+
"validate_identifier",
|
|
27
|
+
# Logging
|
|
28
|
+
"configure_logging",
|
|
29
|
+
"get_logger",
|
|
30
|
+
]
|
pybutt/core/base.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import duckdb as d
|
|
5
|
+
import mssql_python
|
|
6
|
+
import pyodbc
|
|
7
|
+
|
|
8
|
+
from pybutt.exceptions import ConfigurationError, RetryExceededError
|
|
9
|
+
|
|
10
|
+
from .config import (
|
|
11
|
+
SqlConfig,
|
|
12
|
+
quote_identifier,
|
|
13
|
+
sanitise_dsn_value,
|
|
14
|
+
)
|
|
15
|
+
from .logobs import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger("base")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def rows_from_arrow(arrow_obj) -> list[tuple]:
|
|
21
|
+
"""Convert an Arrow Table or RecordBatch to a list of row-tuples.
|
|
22
|
+
|
|
23
|
+
Works with both ``pyarrow.Table`` and ``pyarrow.RecordBatch`` (anything
|
|
24
|
+
with a ``.columns`` attribute whose elements support ``.to_pylist()``).
|
|
25
|
+
"""
|
|
26
|
+
return list(zip(*[col.to_pylist() for col in arrow_obj.columns], strict=True))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SqlServerIOBase:
|
|
30
|
+
def __init__(self, config: SqlConfig):
|
|
31
|
+
self.config = config
|
|
32
|
+
self.dsn = self.build_dsn()
|
|
33
|
+
|
|
34
|
+
def _connection_parts(self, *, include_driver: bool = True) -> list[str]:
|
|
35
|
+
"""Build the common connection-string parts shared by all drivers."""
|
|
36
|
+
cfg = self.config
|
|
37
|
+
parts: list[str] = []
|
|
38
|
+
if include_driver:
|
|
39
|
+
parts.append(f"Driver={{{cfg.driver}}}")
|
|
40
|
+
parts.append(f"Server={sanitise_dsn_value(cfg.server)}")
|
|
41
|
+
parts.append(f"Database={sanitise_dsn_value(cfg.database)}")
|
|
42
|
+
|
|
43
|
+
if cfg.trusted_connection:
|
|
44
|
+
parts.append("Trusted_Connection=Yes")
|
|
45
|
+
else:
|
|
46
|
+
if include_driver:
|
|
47
|
+
if not cfg.username or not cfg.password:
|
|
48
|
+
raise ConfigurationError(
|
|
49
|
+
"Username/password required when not using trusted connection"
|
|
50
|
+
)
|
|
51
|
+
parts.append(f"Uid={sanitise_dsn_value(cfg.username)}")
|
|
52
|
+
parts.append(f"Pwd={sanitise_dsn_value(cfg.password)}")
|
|
53
|
+
else:
|
|
54
|
+
if cfg.username:
|
|
55
|
+
parts.append(f"UID={sanitise_dsn_value(cfg.username)}")
|
|
56
|
+
if cfg.password:
|
|
57
|
+
parts.append(f"PWD={sanitise_dsn_value(cfg.password)}")
|
|
58
|
+
|
|
59
|
+
parts.append(f"TrustServerCertificate={'Yes' if cfg.trust_cert else 'No'}")
|
|
60
|
+
|
|
61
|
+
if cfg.encrypt:
|
|
62
|
+
parts.append("Encrypt=Yes")
|
|
63
|
+
|
|
64
|
+
parts.append(f"PacketSize={cfg.packet_size}")
|
|
65
|
+
|
|
66
|
+
return parts
|
|
67
|
+
|
|
68
|
+
def build_dsn(self):
|
|
69
|
+
return ";".join(self._connection_parts(include_driver=True)) + ";"
|
|
70
|
+
|
|
71
|
+
def connection_d(self):
|
|
72
|
+
conn = d.connect()
|
|
73
|
+
conn.execute("INSTALL odbc_scanner; LOAD odbc_scanner;")
|
|
74
|
+
return conn
|
|
75
|
+
|
|
76
|
+
def connection_p(self, autocommit=False):
|
|
77
|
+
conn = pyodbc.connect(self.dsn)
|
|
78
|
+
conn.autocommit = autocommit
|
|
79
|
+
return conn
|
|
80
|
+
|
|
81
|
+
def connection_m(self, autocommit=False):
|
|
82
|
+
conn_str = ";".join(self._connection_parts(include_driver=False)) + ";"
|
|
83
|
+
conn = mssql_python.connect(conn_str)
|
|
84
|
+
conn.setautocommit(autocommit)
|
|
85
|
+
return conn
|
|
86
|
+
|
|
87
|
+
def full_table_name(self):
|
|
88
|
+
return f"{quote_identifier(self.schema)}.{quote_identifier(self.table)}"
|
|
89
|
+
|
|
90
|
+
def safe_error_message(self, e: Exception) -> str:
|
|
91
|
+
msg = str(e)
|
|
92
|
+
|
|
93
|
+
# redact common sensitive tokens
|
|
94
|
+
msg = re.sub(r"(Pwd|Password)=[^;]+", r"\1=***", msg, flags=re.IGNORECASE)
|
|
95
|
+
msg = re.sub(r"(Uid|User ID)=[^;]+", r"\1=***", msg, flags=re.IGNORECASE)
|
|
96
|
+
|
|
97
|
+
return msg
|
|
98
|
+
|
|
99
|
+
def retry(self, fn, context="operation"):
|
|
100
|
+
last_error: Exception | None = None
|
|
101
|
+
for attempt in range(self.config.retries):
|
|
102
|
+
try:
|
|
103
|
+
return fn()
|
|
104
|
+
except MemoryError:
|
|
105
|
+
logger.error(f"{context} out of memory - not retrying (fatal)")
|
|
106
|
+
raise
|
|
107
|
+
except Exception as e:
|
|
108
|
+
last_error = e
|
|
109
|
+
safe_msg = self.safe_error_message(e)
|
|
110
|
+
logger.warning(
|
|
111
|
+
f"{context} attempt {attempt + 1}/{self.config.retries} "
|
|
112
|
+
f"failed: {safe_msg}"
|
|
113
|
+
)
|
|
114
|
+
time.sleep(2**attempt)
|
|
115
|
+
if last_error is not None:
|
|
116
|
+
raise RetryExceededError(
|
|
117
|
+
f"{context} failed after max retries: "
|
|
118
|
+
f"{self.safe_error_message(last_error)}"
|
|
119
|
+
) from last_error
|
|
120
|
+
raise RetryExceededError(f"{context} failed after max retries")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
if __name__ == "__main__":
|
|
124
|
+
pass
|