datablade 0.0.0__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. datablade/__init__.py +49 -1
  2. datablade/blade.py +322 -0
  3. datablade/core/__init__.py +28 -7
  4. datablade/core/frames.py +23 -236
  5. datablade/core/json.py +5 -10
  6. datablade/core/lists.py +5 -10
  7. datablade/core/messages.py +23 -11
  8. datablade/core/strings.py +5 -43
  9. datablade/core/zip.py +5 -24
  10. datablade/dataframes/__init__.py +51 -0
  11. datablade/dataframes/frames.py +585 -0
  12. datablade/dataframes/readers.py +1367 -0
  13. datablade/docs/ARCHITECTURE.md +102 -0
  14. datablade/docs/OBJECT_REGISTRY.md +194 -0
  15. datablade/docs/README.md +57 -0
  16. datablade/docs/TESTING.md +37 -0
  17. datablade/docs/USAGE.md +409 -0
  18. datablade/docs/__init__.py +87 -0
  19. datablade/docs/__main__.py +6 -0
  20. datablade/io/__init__.py +15 -0
  21. datablade/io/json.py +70 -0
  22. datablade/io/zip.py +111 -0
  23. datablade/registry.py +581 -0
  24. datablade/sql/__init__.py +56 -0
  25. datablade/sql/bulk_load.py +665 -0
  26. datablade/sql/ddl.py +402 -0
  27. datablade/sql/ddl_pyarrow.py +411 -0
  28. datablade/sql/dialects.py +12 -0
  29. datablade/sql/quoting.py +44 -0
  30. datablade/sql/schema_spec.py +65 -0
  31. datablade/sql/sqlserver.py +390 -0
  32. datablade/utils/__init__.py +38 -0
  33. datablade/utils/lists.py +32 -0
  34. datablade/utils/logging.py +204 -0
  35. datablade/utils/messages.py +29 -0
  36. datablade/utils/strings.py +249 -0
  37. datablade-0.0.6.dist-info/METADATA +406 -0
  38. datablade-0.0.6.dist-info/RECORD +41 -0
  39. {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info}/WHEEL +1 -1
  40. {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info/licenses}/LICENSE +20 -20
  41. datablade-0.0.0.dist-info/METADATA +0 -13
  42. datablade-0.0.0.dist-info/RECORD +0 -13
  43. {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,585 @@
1
+ """DataFrame transformation and export helpers.
2
+
3
+ This module focuses on schema-aware DataFrame cleanup, type inference,
4
+ and downstream serialization helpers (Parquet schema/table generation
5
+ and SQL Server-compatible DDL helpers).
6
+ """
7
+
8
+ import os
9
+ import pathlib
10
+ import shutil
11
+ import subprocess
12
+ from typing import Any, Optional, Union
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ import pyarrow as pa
17
+
18
+ from ..utils.logging import log_debug, log_error, log_info, log_warning
19
+ from ..utils.strings import coerce_path, ensure_directory
20
+
21
+ _BYTES_LIKE_TYPES = (bytes, bytearray, memoryview)
22
+
23
+
24
+ def _is_bytes_like(value: Any) -> bool:
25
+ """Return True when a value should be treated as binary data."""
26
+ return isinstance(value, _BYTES_LIKE_TYPES)
27
+
28
+
29
+ def _infer_object_pa_type(col_data: pd.Series) -> pa.DataType:
30
+ """Infer a PyArrow type for object-dtype Series.
31
+
32
+ We sample non-null values and prefer "narrower" types (binary, string,
33
+ bool, integer, float) before falling back to pyarrow's inference.
34
+ """
35
+ non_null = col_data.dropna()
36
+ if non_null.empty:
37
+ return pa.string()
38
+
39
+ # Sample to avoid scanning very large object columns.
40
+ sample = non_null.iloc[:100].tolist()
41
+
42
+ if all(_is_bytes_like(v) for v in sample):
43
+ return pa.binary()
44
+ if all(isinstance(v, str) for v in sample):
45
+ return pa.string()
46
+ if all(isinstance(v, (bool, np.bool_)) for v in sample):
47
+ return pa.bool_()
48
+ if all(
49
+ isinstance(v, (int, np.integer)) and not isinstance(v, (bool, np.bool_))
50
+ for v in sample
51
+ ):
52
+ min_value = min(sample)
53
+ max_value = max(sample)
54
+ if min_value >= np.iinfo(np.int8).min and max_value <= np.iinfo(np.int8).max:
55
+ return pa.int8()
56
+ if min_value >= np.iinfo(np.int16).min and max_value <= np.iinfo(np.int16).max:
57
+ return pa.int16()
58
+ if min_value >= np.iinfo(np.int32).min and max_value <= np.iinfo(np.int32).max:
59
+ return pa.int32()
60
+ return pa.int64()
61
+ if all(isinstance(v, (float, np.floating)) for v in sample):
62
+ return pa.float64()
63
+
64
+ try:
65
+ # PyArrow can infer types for mixed object values; we normalize into
66
+ # the closest "primitive" type for DDL/Parquet stability.
67
+ inferred = pa.infer_type(sample)
68
+ if pa.types.is_binary(inferred) or pa.types.is_large_binary(inferred):
69
+ return pa.binary()
70
+ if pa.types.is_string(inferred) or pa.types.is_large_string(inferred):
71
+ return pa.string()
72
+ if pa.types.is_boolean(inferred):
73
+ return pa.bool_()
74
+ if pa.types.is_integer(inferred):
75
+ return pa.int64()
76
+ if pa.types.is_floating(inferred):
77
+ return pa.float64()
78
+ if pa.types.is_timestamp(inferred) or pa.types.is_date(inferred):
79
+ return inferred
80
+ except Exception:
81
+ pass
82
+
83
+ return pa.string()
84
+
85
+
86
+ def try_cast_string_columns_to_numeric(
87
+ df: Optional[pd.DataFrame] = None,
88
+ convert_partial: bool = False,
89
+ verbose: bool = False,
90
+ ) -> pd.DataFrame:
91
+ """
92
+ Attempt to cast DataFrame string columns to numeric values where possible.
93
+
94
+ Args:
95
+ df: The DataFrame to process. If None, returns None.
96
+ convert_partial: If True, columns with some values convertible to numeric types
97
+ will be converted to numeric types with NaNs where conversion failed.
98
+ If False, only columns where all values can be converted will be converted.
99
+ verbose: If True, prints progress messages.
100
+
101
+ Returns:
102
+ DataFrame with string columns converted to numeric types where possible,
103
+ or None if no DataFrame is provided.
104
+ """
105
+ if df is None:
106
+ log_warning(
107
+ "No DataFrame provided; exiting try_cast_string_columns_to_numeric.",
108
+ verbose,
109
+ )
110
+ raise ValueError("df must be provided")
111
+ if not isinstance(df, pd.DataFrame):
112
+ raise TypeError("df must be a pandas DataFrame")
113
+
114
+ for col in df.columns:
115
+ dtype = df[col].dtype
116
+ if pd.api.types.is_object_dtype(dtype) or pd.api.types.is_string_dtype(dtype):
117
+ non_null = df[col].dropna()
118
+ if (
119
+ pd.api.types.is_object_dtype(dtype)
120
+ and not non_null.empty
121
+ and non_null.iloc[:100].map(_is_bytes_like).any()
122
+ ):
123
+ log_debug(
124
+ f"Column '{col}' contains bytes-like values; skipping numeric coercion.",
125
+ verbose,
126
+ )
127
+ continue
128
+ converted = pd.to_numeric(df[col], errors="coerce")
129
+ has_nan = converted.isnull().any()
130
+ if not has_nan:
131
+ df[col] = converted
132
+ log_info(f"Column '{col}' successfully converted to numeric.", verbose)
133
+ else:
134
+ if convert_partial:
135
+ df[col] = converted
136
+ log_info(
137
+ f"Column '{col}' partially converted to numeric with NaNs where conversion failed.",
138
+ verbose,
139
+ )
140
+ else:
141
+ log_debug(
142
+ f"Column '{col}' could not be fully converted to numeric; leaving as is.",
143
+ verbose,
144
+ )
145
+ return df
146
+
147
+
148
+ def clean_dataframe_columns(
149
+ df: Optional[pd.DataFrame] = None, verbose: bool = False
150
+ ) -> pd.DataFrame:
151
+ """
152
+ Clean the DataFrame columns by flattening MultiIndex, converting to strings,
153
+ and removing duplicates.
154
+
155
+ Args:
156
+ df: The DataFrame to clean. If None, returns None.
157
+ verbose: If True, prints progress messages.
158
+
159
+ Returns:
160
+ The cleaned DataFrame with:
161
+ - Flattened MultiIndex columns
162
+ - String column names
163
+ - Duplicate columns removed (keeping first occurrence)
164
+ Returns None if no DataFrame is provided.
165
+ """
166
+ if df is None:
167
+ log_warning("No DataFrame provided; exiting clean_dataframe_columns.", verbose)
168
+ raise ValueError("df must be provided")
169
+ if not isinstance(df, pd.DataFrame):
170
+ raise TypeError("df must be a pandas DataFrame")
171
+ # Step 1: Flatten MultiIndex columns.
172
+ if isinstance(df.columns, pd.MultiIndex):
173
+ df.columns = ["_".join(map(str, col)).strip() for col in df.columns.values]
174
+ log_info("Flattened MultiIndex columns.", verbose)
175
+
176
+ # Step 2: Convert non-string column names to strings.
177
+ df.columns = df.columns.map(str)
178
+ log_debug("Converted column names to strings.", verbose)
179
+
180
+ # Step 3: Remove duplicate columns, keeping the first occurrence.
181
+ duplicates = df.columns.duplicated()
182
+ if duplicates.any():
183
+ duplicate_cols = df.columns[duplicates]
184
+ log_warning(f"Duplicate columns found: {list(duplicate_cols)}", verbose)
185
+ df = df.loc[:, ~duplicates]
186
+ log_info("Removed duplicate columns, keeping the first occurrence.", verbose)
187
+
188
+ return df
189
+
190
+
191
+ def generate_parquet_schema(
192
+ df: Optional[pd.DataFrame] = None, verbose: bool = False
193
+ ) -> pa.Schema:
194
+ """
195
+ Generate a PyArrow Schema from a pandas DataFrame with optimized data types.
196
+
197
+ Args:
198
+ df: The DataFrame to generate the schema from. If None, returns None.
199
+ verbose: If True, prints progress messages.
200
+
201
+ Returns:
202
+ PyArrow Schema object with optimized types (smallest integer type that fits the data),
203
+ or None if no DataFrame is provided.
204
+ """
205
+ if df is None:
206
+ log_warning("No DataFrame provided; exiting generate_parquet_schema.", verbose)
207
+ raise ValueError("df must be provided")
208
+ if not isinstance(df, pd.DataFrame):
209
+ raise TypeError("df must be a pandas DataFrame")
210
+
211
+ fields = []
212
+ for column in df.columns:
213
+ col_data = df[column]
214
+ col_name = column
215
+ dtype = col_data.dtype
216
+
217
+ # Determine if the column contains any nulls.
218
+ nullable = col_data.isnull().any()
219
+
220
+ # Map pandas dtype to PyArrow type.
221
+ pa_type = None
222
+
223
+ if pd.api.types.is_integer_dtype(dtype):
224
+ # Check the range to determine the smallest integer type.
225
+ non_null = col_data.dropna()
226
+ if non_null.empty:
227
+ pa_type = pa.int64()
228
+ else:
229
+ min_value = non_null.min()
230
+ max_value = non_null.max()
231
+ if (
232
+ min_value >= np.iinfo(np.int8).min
233
+ and max_value <= np.iinfo(np.int8).max
234
+ ):
235
+ pa_type = pa.int8()
236
+ elif (
237
+ min_value >= np.iinfo(np.int16).min
238
+ and max_value <= np.iinfo(np.int16).max
239
+ ):
240
+ pa_type = pa.int16()
241
+ elif (
242
+ min_value >= np.iinfo(np.int32).min
243
+ and max_value <= np.iinfo(np.int32).max
244
+ ):
245
+ pa_type = pa.int32()
246
+ else:
247
+ pa_type = pa.int64()
248
+
249
+ elif pd.api.types.is_float_dtype(dtype):
250
+ pa_type = pa.float64()
251
+
252
+ elif pd.api.types.is_bool_dtype(dtype):
253
+ pa_type = pa.bool_()
254
+
255
+ elif isinstance(dtype, pd.DatetimeTZDtype):
256
+ tz = getattr(getattr(col_data.dt, "tz", None), "zone", None) or str(
257
+ col_data.dt.tz
258
+ )
259
+ pa_type = pa.timestamp("ms", tz=tz)
260
+
261
+ elif pd.api.types.is_datetime64_any_dtype(dtype):
262
+ pa_type = pa.timestamp("ms")
263
+
264
+ elif pd.api.types.is_timedelta64_dtype(dtype):
265
+ pa_type = pa.duration("ns")
266
+
267
+ elif isinstance(dtype, pd.CategoricalDtype):
268
+ pa_type = pa.string()
269
+
270
+ elif pd.api.types.is_object_dtype(dtype):
271
+ pa_type = _infer_object_pa_type(col_data)
272
+
273
+ else:
274
+ pa_type = pa.string()
275
+
276
+ # Create a field.
277
+ field = pa.field(col_name, pa_type, nullable=nullable)
278
+ fields.append(field)
279
+
280
+ schema = pa.schema(fields)
281
+ return schema
282
+
283
+
284
+ def pandas_to_parquet_table(
285
+ df: Optional[pd.DataFrame] = None,
286
+ convert: bool = True,
287
+ partial: bool = False,
288
+ preserve_index: bool = False,
289
+ verbose: bool = False,
290
+ ) -> pa.Table:
291
+ """
292
+ Generate a PyArrow Table from a pandas DataFrame with automatic type conversion.
293
+
294
+ Args:
295
+ df: The DataFrame to convert. If None, returns None.
296
+ convert: If True, attempts to cast string columns to numeric types.
297
+ partial: If True with convert, allows partial conversion with NaNs.
298
+ preserve_index: If True, preserves the DataFrame index in the table.
299
+ verbose: If True, prints progress messages.
300
+
301
+ Returns:
302
+ PyArrow Table object with optimized schema, or None if DataFrame is None
303
+ or conversion fails.
304
+ """
305
+ if df is None:
306
+ log_warning("No DataFrame provided; exiting pandas_to_parquet_table.", verbose)
307
+ raise ValueError("df must be provided")
308
+ if not isinstance(df, pd.DataFrame):
309
+ raise TypeError("df must be a pandas DataFrame")
310
+
311
+ def _unique_col_name(existing: set[str], desired: str) -> str:
312
+ """Return a column name that does not collide with existing names."""
313
+ if desired not in existing:
314
+ return desired
315
+ i = 1
316
+ while f"{desired}_{i}" in existing:
317
+ i += 1
318
+ return f"{desired}_{i}"
319
+
320
+ def _materialize_index_columns(input_df: pd.DataFrame) -> pd.DataFrame:
321
+ """Convert Index/MultiIndex into explicit columns."""
322
+ if isinstance(input_df.index, pd.MultiIndex):
323
+ index_names: list[str] = []
324
+ for i, name in enumerate(input_df.index.names):
325
+ index_names.append(name or f"__index_level_{i}__")
326
+
327
+ existing = set(map(str, input_df.columns))
328
+ index_names = [_unique_col_name(existing, str(n)) for n in index_names]
329
+
330
+ idx_df = input_df.index.to_frame(index=False)
331
+ idx_df.columns = index_names
332
+ out_df = pd.concat([idx_df, input_df.reset_index(drop=True)], axis=1)
333
+ return out_df
334
+
335
+ index_name = (
336
+ str(input_df.index.name)
337
+ if input_df.index.name is not None
338
+ else "__index_level_0__"
339
+ )
340
+ existing = set(map(str, input_df.columns))
341
+ index_name = _unique_col_name(existing, index_name)
342
+
343
+ out_df = input_df.copy()
344
+ out_df.insert(0, index_name, out_df.index.to_numpy(copy=False))
345
+ out_df = out_df.reset_index(drop=True)
346
+ return out_df
347
+
348
+ # Clean columns before schema inference to avoid duplicate/invalid names.
349
+ df = clean_dataframe_columns(df=df, verbose=verbose)
350
+
351
+ if preserve_index:
352
+ # Preserve index by materializing it into real columns.
353
+ df = _materialize_index_columns(df)
354
+
355
+ if convert:
356
+ # Attempt numeric coercion so Parquet schema uses numeric types.
357
+ df = try_cast_string_columns_to_numeric(
358
+ df=df, convert_partial=partial, verbose=verbose
359
+ )
360
+
361
+ # Build schema explicitly so the Parquet table uses stable, optimized types.
362
+ schema = generate_parquet_schema(df=df, verbose=verbose)
363
+ try:
364
+ # We materialize index into regular columns above so that the schema can
365
+ # fully describe the resulting table and the index is preserved even
366
+ # when an explicit schema is supplied.
367
+ table = pa.Table.from_pandas(df, schema=schema, preserve_index=False)
368
+ return table
369
+ except Exception as e:
370
+ log_error(f"Error generating PyArrow Table: {e}", verbose)
371
+ raise
372
+
373
+
374
+ def generate_sql_server_create_table_string(
375
+ df: Optional[pd.DataFrame] = None,
376
+ catalog: str = "database",
377
+ schema: str = "dbo",
378
+ table: str = "table",
379
+ dropexisting: bool = True,
380
+ use_go: bool = False,
381
+ schema_spec: Optional[dict] = None,
382
+ verbose: bool = False,
383
+ ) -> str:
384
+ """
385
+ Generate a SQL Server CREATE TABLE statement from a pandas DataFrame.
386
+
387
+ Args:
388
+ df: The DataFrame to generate the schema from. If None, returns None.
389
+ catalog: The database catalog name.
390
+ schema: The schema name (default: 'dbo').
391
+ table: The table name.
392
+ dropexisting: If True, includes DROP TABLE IF EXISTS statement.
393
+ use_go: If True, inserts GO after USE when a catalog is supplied.
394
+ verbose: If True, prints progress messages.
395
+
396
+ Returns:
397
+ SQL Server CREATE TABLE statement string with optimized column types,
398
+ or None if no DataFrame is provided.
399
+ """
400
+ if df is None:
401
+ log_warning(
402
+ "No DataFrame provided; exiting generate_sql_server_create_table_string.",
403
+ verbose,
404
+ )
405
+ raise ValueError("df must be provided")
406
+ if not isinstance(df, pd.DataFrame):
407
+ raise TypeError("df must be a pandas DataFrame")
408
+ if not isinstance(catalog, str) or not catalog.strip():
409
+ raise ValueError("catalog must be a non-empty string")
410
+ if not isinstance(schema, str) or not schema.strip():
411
+ raise ValueError("schema must be a non-empty string")
412
+ if not isinstance(table, str) or not table.strip():
413
+ raise ValueError("table must be a non-empty string")
414
+
415
+ from ..sql.ddl import generate_create_table
416
+ from ..sql.dialects import Dialect
417
+
418
+ return generate_create_table(
419
+ df=df,
420
+ catalog=catalog,
421
+ schema=schema,
422
+ table=table,
423
+ drop_existing=dropexisting,
424
+ dialect=Dialect.SQLSERVER,
425
+ use_go=use_go,
426
+ schema_spec=schema_spec,
427
+ verbose=verbose,
428
+ )
429
+
430
+
431
+ def write_to_file_and_sql(
432
+ df: pd.DataFrame,
433
+ file_path: Union[str, pathlib.Path],
434
+ table_name: str,
435
+ sql_server: str,
436
+ database: str,
437
+ username: Optional[str] = None,
438
+ password: Optional[str] = None,
439
+ use_trusted_connection: bool = False,
440
+ use_azure_ad: bool = False,
441
+ use_env_credentials: bool = True,
442
+ verbose: bool = False,
443
+ ) -> None:
444
+ """
445
+ Write a DataFrame to a CSV file and import it to SQL Server using BCP.
446
+
447
+ Args:
448
+ df: The DataFrame to write.
449
+ file_path: Path where the CSV file will be saved.
450
+ table_name: Name of the SQL Server table.
451
+ sql_server: SQL Server instance name.
452
+ database: Database name.
453
+ username: SQL Server username.
454
+ password: SQL Server password.
455
+ use_trusted_connection: If True, use integrated authentication (-T).
456
+ use_azure_ad: If True, use Azure AD authentication (-G).
457
+ use_env_credentials: If True, fall back to DATABLADE_SQLSERVER_USERNAME
458
+ and DATABLADE_SQLSERVER_PASSWORD when username/password not provided.
459
+ verbose: If True, prints progress messages.
460
+
461
+ Raises:
462
+ subprocess.CalledProcessError: If BCP command fails.
463
+ """
464
+ if df is None or not isinstance(df, pd.DataFrame):
465
+ raise TypeError("df must be a pandas DataFrame")
466
+ path_obj = coerce_path(
467
+ file_path,
468
+ must_exist=False,
469
+ verbose=verbose,
470
+ label="file_path",
471
+ )
472
+ if not isinstance(table_name, str) or not table_name.strip():
473
+ raise ValueError("table_name must be a non-empty string")
474
+ if not isinstance(sql_server, str) or not sql_server.strip():
475
+ raise ValueError("sql_server must be a non-empty string")
476
+ if not isinstance(database, str) or not database.strip():
477
+ raise ValueError("database must be a non-empty string")
478
+ if use_trusted_connection and use_azure_ad:
479
+ raise ValueError(
480
+ "use_trusted_connection and use_azure_ad are mutually exclusive"
481
+ )
482
+ if use_env_credentials:
483
+ if not username:
484
+ username = os.getenv("DATABLADE_SQLSERVER_USERNAME")
485
+ if not password:
486
+ password = os.getenv("DATABLADE_SQLSERVER_PASSWORD")
487
+ if not use_trusted_connection:
488
+ if not isinstance(username, str) or not username.strip():
489
+ raise ValueError("username must be a non-empty string")
490
+ if not password and not use_azure_ad:
491
+ raise ValueError("password must be provided")
492
+
493
+ ensure_directory(path_obj.parent, verbose=verbose, label="output_dir")
494
+
495
+ df.to_csv(path_obj, index=False)
496
+ log_info(f"DataFrame written to file {path_obj}.", verbose)
497
+
498
+ qualified_table = f"{database}.dbo.{table_name}"
499
+ bcp_args = [
500
+ "bcp",
501
+ qualified_table,
502
+ "in",
503
+ str(path_obj),
504
+ "-c",
505
+ "-t,",
506
+ "-S",
507
+ sql_server,
508
+ ]
509
+ bcp_preview = bcp_args[:-1] + ["***REDACTED***"]
510
+ bcp_path = shutil.which("bcp")
511
+ if not bcp_path:
512
+ install_steps = (
513
+ "Install the SQL Server command line utilities (bcp) and ensure the "
514
+ "binary is on PATH. For example: "
515
+ "macOS (Homebrew): brew install msodbcsql17 mssql-tools; "
516
+ "Linux (Debian/Ubuntu): install mssql-tools; "
517
+ "Windows: install SQL Server Command Line Utilities and restart your shell."
518
+ )
519
+ path_env = os.environ.get("PATH", "")
520
+ message = (
521
+ "BCP executable was not found on PATH. "
522
+ f"PATH={path_env}. {install_steps} "
523
+ f"Command preview: {bcp_preview}."
524
+ )
525
+ log_error(message, verbose)
526
+ raise FileNotFoundError(message)
527
+
528
+ if use_trusted_connection:
529
+ bcp_args.append("-T")
530
+ else:
531
+ if use_azure_ad:
532
+ bcp_args.append("-G")
533
+ if username:
534
+ bcp_args.extend(["-U", username])
535
+ if password:
536
+ bcp_args.extend(["-P", password])
537
+
538
+ log_debug(
539
+ f"Executing BCP load to {qualified_table} on server {sql_server} as user {username}.",
540
+ verbose,
541
+ )
542
+ redacted_args = []
543
+ redact_next = False
544
+ for arg in bcp_args:
545
+ if redact_next:
546
+ redacted_args.append("***REDACTED***")
547
+ redact_next = False
548
+ continue
549
+ redacted_args.append(arg)
550
+ if arg == "-P":
551
+ redact_next = True
552
+ if "-P" in bcp_args:
553
+ log_warning(
554
+ "BCP authentication uses -P with a plaintext password. "
555
+ "Consider using trusted connection (-T) or Azure AD (-G).",
556
+ verbose,
557
+ )
558
+ log_debug(f"BCP args: {redacted_args}", verbose)
559
+ process = subprocess.run(
560
+ bcp_args,
561
+ shell=False,
562
+ check=True,
563
+ stdout=subprocess.PIPE,
564
+ stderr=subprocess.PIPE,
565
+ )
566
+ if process.returncode == 0:
567
+ log_info(
568
+ f"DataFrame successfully written to SQL Server table {table_name}.", verbose
569
+ )
570
+ else:
571
+ error_msg = process.stderr.decode()
572
+ log_error(
573
+ "Error writing DataFrame to SQL Server table "
574
+ f"{table_name}: {error_msg} "
575
+ f"PATH={os.environ.get('PATH', '')}. "
576
+ "Ensure BCP is installed (SQL Server command line utilities) and on PATH. "
577
+ f"Command preview: {bcp_preview}.",
578
+ verbose,
579
+ )
580
+ raise subprocess.CalledProcessError(
581
+ process.returncode,
582
+ bcp_args,
583
+ output=process.stdout,
584
+ stderr=process.stderr,
585
+ )