datablade 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,390 @@
1
+ """SQL Server-specific SQL helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import pathlib
7
+ from typing import Any, Iterable, Mapping, Optional, Sequence
8
+
9
+ from ..dataframes.readers import parquet_to_csv_partitions
10
+ from ..utils.logging import log_warning
11
+ from ..utils.strings import coerce_path
12
+ from .ddl import _qualify_name
13
+ from .ddl_pyarrow import generate_create_table_from_parquet
14
+ from .dialects import Dialect
15
+ from .quoting import quote_identifier
16
+
17
+
18
+ def sqlserver_openrowset_parquet(
19
+ parquet_path: str | os.PathLike,
20
+ *,
21
+ data_source: Optional[str] = None,
22
+ table_alias: str = "rows",
23
+ select_columns: Optional[Sequence[str]] = None,
24
+ where: Optional[str] = None,
25
+ top: Optional[int] = None,
26
+ ) -> str:
27
+ """Generate a SQL Server OPENROWSET query over Parquet files.
28
+
29
+ Args:
30
+ parquet_path: Path or wildcard to Parquet files (directory or pattern).
31
+ data_source: Optional external data source name.
32
+ table_alias: Alias for the OPENROWSET rowset.
33
+ select_columns: Optional list of columns/expressions to select.
34
+ where: Optional WHERE clause (without the WHERE keyword).
35
+ top: Optional TOP limit.
36
+ """
37
+ try:
38
+ path_value = os.fspath(parquet_path)
39
+ except TypeError as exc:
40
+ raise TypeError("parquet_path must be a string or pathlib.Path") from exc
41
+
42
+ if not isinstance(path_value, str):
43
+ raise TypeError("parquet_path must be a string or pathlib.Path")
44
+ if not path_value.strip():
45
+ raise ValueError("parquet_path must be provided")
46
+
47
+ if data_source is not None and (
48
+ not isinstance(data_source, str) or not data_source.strip()
49
+ ):
50
+ raise ValueError("data_source, if provided, must be a non-empty string")
51
+ if not isinstance(table_alias, str) or not table_alias.strip():
52
+ raise ValueError("table_alias must be a non-empty string")
53
+ if top is not None:
54
+ if not isinstance(top, int) or top <= 0:
55
+ raise ValueError("top must be a positive integer")
56
+
57
+ path_literal = path_value.replace("'", "''")
58
+ options = [f"BULK '{path_literal}'", "FORMAT = 'PARQUET'"]
59
+ if data_source:
60
+ options.append(
61
+ f"DATA_SOURCE = {quote_identifier(data_source, Dialect.SQLSERVER)}"
62
+ )
63
+
64
+ select_list = "*" if not select_columns else ", ".join(select_columns)
65
+ top_clause = f"TOP ({top}) " if top is not None else ""
66
+ alias = quote_identifier(table_alias, Dialect.SQLSERVER)
67
+
68
+ statement = (
69
+ f"SELECT {top_clause}{select_list}\n"
70
+ "FROM OPENROWSET(\n"
71
+ f" {', '.join(options)}\n"
72
+ f") AS {alias}"
73
+ )
74
+ if where:
75
+ statement = f"{statement}\nWHERE {where}"
76
+ return f"{statement};"
77
+
78
+
79
+ def sqlserver_bulk_insert_statements(
80
+ csv_files: Iterable[str | os.PathLike],
81
+ *,
82
+ table: str,
83
+ catalog: Optional[str] = None,
84
+ schema: Optional[str] = None,
85
+ first_row: int = 2,
86
+ field_terminator: str = ",",
87
+ row_terminator: str = "0x0a",
88
+ tablock: bool = True,
89
+ codepage: Optional[str] = None,
90
+ ) -> str:
91
+ """Generate BULK INSERT statements for CSV files."""
92
+ if not isinstance(table, str) or not table.strip():
93
+ raise ValueError("table must be a non-empty string")
94
+ if catalog is not None and (not isinstance(catalog, str) or not catalog.strip()):
95
+ raise ValueError("catalog, if provided, must be a non-empty string")
96
+ if schema is not None and (not isinstance(schema, str) or not schema.strip()):
97
+ raise ValueError("schema, if provided, must be a non-empty string")
98
+ if not isinstance(first_row, int) or first_row <= 0:
99
+ raise ValueError("first_row must be a positive integer")
100
+ if not isinstance(field_terminator, str) or not field_terminator:
101
+ raise ValueError("field_terminator must be a non-empty string")
102
+ if not isinstance(row_terminator, str) or not row_terminator:
103
+ raise ValueError("row_terminator must be a non-empty string")
104
+ if codepage is not None and (not isinstance(codepage, str) or not codepage.strip()):
105
+ raise ValueError("codepage, if provided, must be a non-empty string")
106
+
107
+ qualified_table = _qualify_name(catalog, schema, table, Dialect.SQLSERVER)
108
+ statements: list[str] = []
109
+ for file_path in csv_files:
110
+ try:
111
+ path_value = os.fspath(file_path)
112
+ except TypeError as exc:
113
+ raise TypeError(
114
+ "csv_files must contain strings or pathlib.Path values"
115
+ ) from exc
116
+ if not isinstance(path_value, str) or not path_value.strip():
117
+ raise ValueError("csv_files must contain non-empty paths")
118
+
119
+ path_literal = path_value.replace("'", "''")
120
+ options = [
121
+ f"FIRSTROW = {first_row}",
122
+ f"FIELDTERMINATOR = '{field_terminator}'",
123
+ f"ROWTERMINATOR = '{row_terminator}'",
124
+ ]
125
+ if tablock:
126
+ options.append("TABLOCK")
127
+ if codepage:
128
+ options.append(f"CODEPAGE = '{codepage}'")
129
+ options_sql = ", ".join(options)
130
+ statements.append(
131
+ f"BULK INSERT {qualified_table} FROM '{path_literal}' WITH ({options_sql});"
132
+ )
133
+
134
+ if not statements:
135
+ raise ValueError("csv_files must contain at least one path")
136
+
137
+ return "\n".join(statements)
138
+
139
+
140
+ def sqlserver_create_and_insert_from_parquet(
141
+ parquet_path: str | os.PathLike,
142
+ output_dir: str | os.PathLike,
143
+ *,
144
+ table: str,
145
+ catalog: Optional[str] = None,
146
+ schema: Optional[str] = None,
147
+ drop_existing: bool = True,
148
+ use_go: bool = False,
149
+ schema_spec: Optional[Mapping[str, Any]] = None,
150
+ rows_per_file: Optional[int] = None,
151
+ memory_fraction: float = 0.5,
152
+ convert_types: bool = True,
153
+ output_prefix: str = "part",
154
+ delimiter: str = ",",
155
+ include_header: bool = True,
156
+ line_terminator: str = "\n",
157
+ first_row: Optional[int] = None,
158
+ tablock: bool = True,
159
+ codepage: Optional[str] = None,
160
+ fallback_to_json: bool = False,
161
+ verbose: bool = False,
162
+ ) -> tuple[str, list[pathlib.Path]]:
163
+ """Create a SQL Server table from Parquet and generate CSV bulk insert SQL."""
164
+ import pyarrow.parquet as pq
165
+
166
+ path_obj = coerce_path(
167
+ parquet_path,
168
+ must_exist=True,
169
+ verbose=verbose,
170
+ label="parquet_path",
171
+ )
172
+ ddl, metadata = generate_create_table_from_parquet(
173
+ parquet_path=parquet_path,
174
+ catalog=catalog,
175
+ schema=schema,
176
+ table=table,
177
+ drop_existing=drop_existing,
178
+ use_go=use_go,
179
+ schema_spec=schema_spec,
180
+ dialect=Dialect.SQLSERVER,
181
+ verbose=verbose,
182
+ fallback_to_json=fallback_to_json,
183
+ return_metadata=True,
184
+ )
185
+
186
+ drop_columns: list[str] = []
187
+ if not fallback_to_json and metadata.dropped_columns:
188
+ drop_columns = [col.name for col in metadata.dropped_columns]
189
+
190
+ ref_schema = pq.ParquetFile(path_obj).schema_arrow
191
+ column_order = [
192
+ field.name for field in ref_schema if field.name not in drop_columns
193
+ ]
194
+
195
+ csv_files = parquet_to_csv_partitions(
196
+ file_path=parquet_path,
197
+ output_dir=output_dir,
198
+ output_prefix=output_prefix,
199
+ rows_per_file=rows_per_file,
200
+ memory_fraction=memory_fraction,
201
+ convert_types=convert_types,
202
+ verbose=verbose,
203
+ delimiter=delimiter,
204
+ include_header=include_header,
205
+ line_terminator=line_terminator,
206
+ drop_columns=drop_columns,
207
+ column_order=column_order,
208
+ drop_extra_columns=True,
209
+ )
210
+
211
+ if first_row is None:
212
+ first_row = 2 if include_header else 1
213
+
214
+ sql = sqlserver_bulk_insert_statements(
215
+ csv_files,
216
+ table=table,
217
+ catalog=catalog,
218
+ schema=schema,
219
+ first_row=first_row,
220
+ field_terminator=delimiter,
221
+ row_terminator=_sqlserver_row_terminator(line_terminator),
222
+ tablock=tablock,
223
+ codepage=codepage,
224
+ )
225
+
226
+ return f"{ddl}\n{sql}", csv_files
227
+
228
+
229
+ def sqlserver_create_and_stage_from_parquets(
230
+ parquet_paths: Sequence[str | os.PathLike],
231
+ output_dir: str | os.PathLike,
232
+ *,
233
+ table: str,
234
+ catalog: Optional[str] = None,
235
+ schema: Optional[str] = None,
236
+ drop_existing: bool = True,
237
+ use_go: bool = False,
238
+ schema_spec: Optional[Mapping[str, Any]] = None,
239
+ rows_per_file: Optional[int] = None,
240
+ memory_fraction: float = 0.5,
241
+ convert_types: bool = True,
242
+ output_prefix: str = "part",
243
+ delimiter: str = ",",
244
+ include_header: bool = True,
245
+ line_terminator: str = "\n",
246
+ fallback_to_json: bool = False,
247
+ schema_strict: bool = True,
248
+ verbose: bool = False,
249
+ ) -> tuple[str, list[pathlib.Path]]:
250
+ """Generate SQL Server DDL and stage multiple Parquet files as CSV partitions.
251
+
252
+ Returns the CREATE TABLE DDL (derived from the first Parquet file) and a list
253
+ of CSV files produced from all Parquet inputs. Use bulk_load_sqlserver_many()
254
+ to load the returned CSV files via BCP.
255
+
256
+ Schema drift guard:
257
+ - Missing columns (vs. first file) raise a ValueError.
258
+ - Extra columns or type mismatches raise when schema_strict=True.
259
+ - When schema_strict=False, extra columns are dropped and type mismatches
260
+ are logged as warnings.
261
+ """
262
+ import pyarrow.parquet as pq
263
+
264
+ if parquet_paths is None:
265
+ raise ValueError("parquet_paths must be provided")
266
+ if not isinstance(parquet_paths, (list, tuple)):
267
+ raise TypeError("parquet_paths must be a list or tuple of paths")
268
+ if not parquet_paths:
269
+ raise ValueError("parquet_paths must contain at least one path")
270
+
271
+ first_path = coerce_path(
272
+ parquet_paths[0],
273
+ must_exist=True,
274
+ verbose=verbose,
275
+ label="parquet_paths[0]",
276
+ )
277
+ if first_path.suffix.lower() != ".parquet":
278
+ raise ValueError("parquet_paths must point to .parquet files")
279
+
280
+ if not isinstance(schema_strict, bool):
281
+ raise TypeError("schema_strict must be a boolean")
282
+
283
+ ddl, metadata = generate_create_table_from_parquet(
284
+ parquet_path=first_path,
285
+ catalog=catalog,
286
+ schema=schema,
287
+ table=table,
288
+ drop_existing=drop_existing,
289
+ use_go=use_go,
290
+ schema_spec=schema_spec,
291
+ dialect=Dialect.SQLSERVER,
292
+ verbose=verbose,
293
+ fallback_to_json=fallback_to_json,
294
+ return_metadata=True,
295
+ )
296
+
297
+ drop_columns: list[str] = []
298
+ if not fallback_to_json and metadata.dropped_columns:
299
+ drop_columns = [col.name for col in metadata.dropped_columns]
300
+
301
+ ref_schema = pq.ParquetFile(first_path).schema_arrow
302
+ ref_columns = [field.name for field in ref_schema if field.name not in drop_columns]
303
+ ref_types = {
304
+ field.name: field.type for field in ref_schema if field.name not in drop_columns
305
+ }
306
+
307
+ csv_files: list[pathlib.Path] = []
308
+ for index, parquet_path in enumerate(parquet_paths):
309
+ path_obj = coerce_path(
310
+ parquet_path,
311
+ must_exist=True,
312
+ verbose=verbose,
313
+ label=f"parquet_paths[{index}]",
314
+ )
315
+ if path_obj.suffix.lower() != ".parquet":
316
+ raise ValueError("parquet_paths must point to .parquet files")
317
+
318
+ current_schema = pq.ParquetFile(path_obj).schema_arrow
319
+ current_columns = [
320
+ field.name for field in current_schema if field.name not in drop_columns
321
+ ]
322
+ current_types = {
323
+ field.name: field.type
324
+ for field in current_schema
325
+ if field.name not in drop_columns
326
+ }
327
+
328
+ missing = [c for c in ref_columns if c not in current_columns]
329
+ if missing:
330
+ raise ValueError(
331
+ f"Schema drift detected in {path_obj}: missing columns {missing}."
332
+ )
333
+
334
+ extra = [c for c in current_columns if c not in ref_columns]
335
+ if extra:
336
+ message = f"Schema drift detected in {path_obj}: extra columns {extra}."
337
+ if schema_strict:
338
+ raise ValueError(message)
339
+ log_warning(message, verbose)
340
+
341
+ type_mismatches = [
342
+ (col, ref_types[col], current_types[col])
343
+ for col in ref_columns
344
+ if col in current_types and current_types[col] != ref_types[col]
345
+ ]
346
+ if type_mismatches:
347
+ details = ", ".join(
348
+ f"{col} (expected {expected}, got {actual})"
349
+ for col, expected, actual in type_mismatches
350
+ )
351
+ message = f"Schema drift detected in {path_obj}: type mismatches {details}."
352
+ if schema_strict:
353
+ raise ValueError(message)
354
+ log_warning(message, verbose)
355
+
356
+ if current_columns != ref_columns:
357
+ log_warning(
358
+ f"Column order mismatch detected in {path_obj}; "
359
+ "reordering to match the reference schema.",
360
+ verbose,
361
+ )
362
+
363
+ prefix = f"{output_prefix}_{index:05d}"
364
+ csv_files.extend(
365
+ parquet_to_csv_partitions(
366
+ file_path=path_obj,
367
+ output_dir=output_dir,
368
+ output_prefix=prefix,
369
+ rows_per_file=rows_per_file,
370
+ memory_fraction=memory_fraction,
371
+ convert_types=convert_types,
372
+ verbose=verbose,
373
+ delimiter=delimiter,
374
+ include_header=include_header,
375
+ line_terminator=line_terminator,
376
+ drop_columns=drop_columns,
377
+ column_order=ref_columns,
378
+ drop_extra_columns=True,
379
+ )
380
+ )
381
+
382
+ return ddl, csv_files
383
+
384
+
385
+ def _sqlserver_row_terminator(line_terminator: str) -> str:
386
+ if line_terminator == "\n":
387
+ return "0x0a"
388
+ if line_terminator == "\r\n":
389
+ return "0x0d0a"
390
+ return line_terminator
@@ -19,11 +19,12 @@ from .logging import (
19
19
  log_info,
20
20
  log_warning,
21
21
  )
22
- from .strings import pathing, sql_quotename
22
+ from .strings import configure_paths, pathing, sql_quotename
23
23
 
24
24
  __all__ = [
25
25
  "sql_quotename",
26
26
  "pathing",
27
+ "configure_paths",
27
28
  "flatten",
28
29
  # Logging
29
30
  "get_logger",
datablade/utils/lists.py CHANGED
@@ -1,3 +1,5 @@
1
+ """List helpers for common transformations."""
2
+
1
3
  from typing import Any, List
2
4
 
3
5
 
@@ -20,6 +22,7 @@ def flatten(nest: List[Any]) -> List[Any]:
20
22
  if not isinstance(nest, list):
21
23
  raise TypeError("nest must be a list")
22
24
 
25
+ # Build a new list so the caller's input is untouched.
23
26
  result = []
24
27
  for item in nest:
25
28
  if isinstance(item, list):
@@ -8,7 +8,9 @@ handlers, levels, and formatters as needed.
8
8
 
9
9
  import logging
10
10
  import pathlib
11
- from typing import Any, Optional
11
+ import time
12
+ from contextlib import contextmanager
13
+ from typing import Any, Iterator, Mapping, Optional
12
14
 
13
15
  # Create the datablade logger
14
16
  _logger = logging.getLogger("datablade")
@@ -142,6 +144,49 @@ def log_error(message: Any, verbose: bool = True) -> None:
142
144
  log(message, logging.ERROR, verbose)
143
145
 
144
146
 
147
+ def build_log_context(
148
+ *,
149
+ file_path: Optional[str | pathlib.Path] = None,
150
+ chunk_index: Optional[int] = None,
151
+ **fields: Any,
152
+ ) -> dict[str, Any]:
153
+ """Build a logging context dict with common fields like file and chunk."""
154
+ context = dict(fields)
155
+ if file_path is not None:
156
+ context.setdefault("file", pathlib.Path(file_path).name)
157
+ if chunk_index is not None:
158
+ context.setdefault("chunk", chunk_index)
159
+ return context
160
+
161
+
162
+ def format_log_context(context: Optional[Mapping[str, Any]]) -> str:
163
+ """Format a context mapping into a compact suffix for log messages."""
164
+ if not context:
165
+ return ""
166
+
167
+ parts: list[str] = []
168
+ for key, value in context.items():
169
+ if value is None:
170
+ continue
171
+ parts.append(f"{key}={value}")
172
+
173
+ if not parts:
174
+ return ""
175
+
176
+ return f" ({', '.join(parts)})"
177
+
178
+
179
+ @contextmanager
180
+ def timed_step(name: str, *, verbose: bool = True) -> Iterator[None]:
181
+ """Measure elapsed time for a block and log on exit."""
182
+ start = time.perf_counter()
183
+ try:
184
+ yield
185
+ finally:
186
+ duration = time.perf_counter() - start
187
+ log_info(f"{name} took {duration:.2f}s", verbose)
188
+
189
+
145
190
  # Backward compatibility alias
146
191
  def print_verbose(message: Any, verbose: bool = True) -> None:
147
192
  """
@@ -1,8 +1,181 @@
1
+ """String and path helpers used across datablade."""
2
+
3
+ import os
1
4
  import pathlib
5
+ from functools import singledispatch
2
6
  from typing import Optional, Union
3
7
 
8
+ from .logging import log_warning
4
9
  from .messages import print_verbose
5
10
 
11
+ PathInput = Union[str, pathlib.Path, os.PathLike]
12
+ _PATH_STRICT_DEFAULT = False
13
+
14
+
15
+ def configure_paths(*, path_strict: bool = False) -> None:
16
+ """Configure global path handling behavior."""
17
+ global _PATH_STRICT_DEFAULT
18
+ _PATH_STRICT_DEFAULT = bool(path_strict)
19
+
20
+
21
+ def _resolve_path_strict(path_strict: Optional[bool]) -> bool:
22
+ if path_strict is None:
23
+ return _PATH_STRICT_DEFAULT
24
+ return bool(path_strict)
25
+
26
+
27
+ @singledispatch
28
+ def _coerce_path_input(value: object, type_label: str) -> str:
29
+ raise TypeError(f"{type_label} must be a string or pathlib.Path")
30
+
31
+
32
+ @_coerce_path_input.register
33
+ def _(value: str, type_label: str) -> str:
34
+ return value
35
+
36
+
37
+ @_coerce_path_input.register
38
+ def _(value: pathlib.Path, type_label: str) -> str:
39
+ return str(value)
40
+
41
+
42
+ @_coerce_path_input.register
43
+ def _(value: os.PathLike, type_label: str) -> str:
44
+ path_value = os.fspath(value)
45
+ if isinstance(path_value, bytes):
46
+ raise TypeError(f"{type_label} must be a string or pathlib.Path")
47
+ return path_value
48
+
49
+
50
+ @_coerce_path_input.register
51
+ def _(value: bytes, type_label: str) -> str:
52
+ raise TypeError(f"{type_label} must be a string or pathlib.Path")
53
+
54
+
55
+ def _normalize_path_value(path_value: str) -> str:
56
+ if os.name == "nt":
57
+ return path_value.replace("\\", "/")
58
+ return path_value
59
+
60
+
61
+ def _find_case_conflicts(
62
+ path_obj: pathlib.Path,
63
+ raw_path: str,
64
+ ) -> list[tuple[str, str, str]]:
65
+ """Return case mismatches as (provided, actual, parent) tuples."""
66
+ if not raw_path:
67
+ return []
68
+
69
+ try:
70
+ input_path = pathlib.Path(raw_path)
71
+ except Exception:
72
+ input_path = path_obj
73
+
74
+ if input_path.is_absolute():
75
+ anchor = input_path.anchor
76
+ if anchor:
77
+ current = pathlib.Path(anchor)
78
+ anchor_parts = pathlib.Path(anchor).parts
79
+ remaining_parts = input_path.parts[len(anchor_parts) :]
80
+ else:
81
+ current = pathlib.Path(anchor)
82
+ remaining_parts = input_path.parts
83
+ else:
84
+ current = pathlib.Path.cwd()
85
+ remaining_parts = input_path.parts
86
+
87
+ mismatches: list[tuple[str, str, str]] = []
88
+ for part in remaining_parts:
89
+ if part in ("", "."):
90
+ continue
91
+ if part == "..":
92
+ current = current.parent
93
+ continue
94
+ try:
95
+ with os.scandir(current) as entries:
96
+ actual = None
97
+ for entry in entries:
98
+ if entry.name.casefold() == part.casefold():
99
+ actual = entry.name
100
+ break
101
+ except OSError:
102
+ break
103
+
104
+ if actual is None:
105
+ current = current / part
106
+ continue
107
+ if actual != part:
108
+ mismatches.append((part, actual, str(current)))
109
+ current = current / actual
110
+
111
+ return mismatches
112
+
113
+
114
+ def coerce_path(
115
+ input: Optional[PathInput],
116
+ *,
117
+ must_exist: bool = False,
118
+ verbose: bool = False,
119
+ label: str = "path",
120
+ path_strict: Optional[bool] = None,
121
+ type_label: Optional[str] = None,
122
+ ) -> pathlib.Path:
123
+ """Normalize a path-like input and optionally validate existence and case."""
124
+ if input is None:
125
+ print_verbose(f"No {label} provided; exiting.", verbose)
126
+ raise ValueError(f"{label} must be provided")
127
+
128
+ type_label = type_label or label
129
+ path_value = _coerce_path_input(input, type_label)
130
+
131
+ if not path_value.strip():
132
+ print_verbose(f"No {label} provided; exiting.", verbose)
133
+ raise ValueError(f"{label} must be provided")
134
+
135
+ normalized = _normalize_path_value(path_value)
136
+ path_obj = pathlib.Path(normalized)
137
+ exists = path_obj.exists()
138
+
139
+ if must_exist and not exists:
140
+ print_verbose(f"Path {path_obj} does not exist; exiting.", verbose)
141
+ raise ValueError(f"Path does not exist: {path_obj}")
142
+
143
+ if exists:
144
+ strict = _resolve_path_strict(path_strict)
145
+ conflicts = _find_case_conflicts(path_obj, path_value)
146
+ if conflicts:
147
+ details = "; ".join(
148
+ f"{provided} -> {actual} in {parent}"
149
+ for provided, actual, parent in conflicts
150
+ )
151
+ message = f"Path case mismatch for {label}: {details}"
152
+ if strict:
153
+ raise ValueError(message)
154
+ log_warning(message, verbose)
155
+
156
+ return path_obj
157
+
158
+
159
+ def ensure_directory(
160
+ input: Optional[PathInput],
161
+ *,
162
+ verbose: bool = False,
163
+ label: str = "path",
164
+ path_strict: Optional[bool] = None,
165
+ type_label: Optional[str] = None,
166
+ ) -> pathlib.Path:
167
+ """Ensure a directory exists and return the resolved path."""
168
+ path_obj = coerce_path(
169
+ input,
170
+ must_exist=False,
171
+ verbose=verbose,
172
+ label=label,
173
+ path_strict=path_strict,
174
+ type_label=type_label,
175
+ )
176
+ path_obj.mkdir(parents=True, exist_ok=True)
177
+ return path_obj
178
+
6
179
 
7
180
  def sql_quotename(
8
181
  name: Optional[str] = None,
@@ -67,20 +240,10 @@ def pathing(
67
240
  ValueError: If input is None or the path does not exist.
68
241
  TypeError: If input is not a string or pathlib.Path.
69
242
  """
70
- if input is None:
71
- print_verbose("No path provided; exiting pathing.", verbose)
72
- raise ValueError("path input must be provided")
73
-
74
- if isinstance(input, str):
75
- normalized = input.replace("\\", "/")
76
- path_obj = pathlib.Path(normalized)
77
- elif isinstance(input, pathlib.Path):
78
- path_obj = input
79
- else:
80
- raise TypeError("input must be a string or pathlib.Path")
81
-
82
- if path_obj.exists():
83
- return path_obj
84
-
85
- print_verbose(f"Path {path_obj} does not exist; exiting pathing.", verbose)
86
- raise ValueError(f"Path does not exist: {path_obj}")
243
+ return coerce_path(
244
+ input,
245
+ must_exist=True,
246
+ verbose=verbose,
247
+ label="path input",
248
+ type_label="input",
249
+ )