datablade 0.0.0__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. datablade/__init__.py +49 -1
  2. datablade/blade.py +322 -0
  3. datablade/core/__init__.py +28 -7
  4. datablade/core/frames.py +23 -236
  5. datablade/core/json.py +5 -10
  6. datablade/core/lists.py +5 -10
  7. datablade/core/messages.py +23 -11
  8. datablade/core/strings.py +5 -43
  9. datablade/core/zip.py +5 -24
  10. datablade/dataframes/__init__.py +51 -0
  11. datablade/dataframes/frames.py +585 -0
  12. datablade/dataframes/readers.py +1367 -0
  13. datablade/docs/ARCHITECTURE.md +102 -0
  14. datablade/docs/OBJECT_REGISTRY.md +194 -0
  15. datablade/docs/README.md +57 -0
  16. datablade/docs/TESTING.md +37 -0
  17. datablade/docs/USAGE.md +409 -0
  18. datablade/docs/__init__.py +87 -0
  19. datablade/docs/__main__.py +6 -0
  20. datablade/io/__init__.py +15 -0
  21. datablade/io/json.py +70 -0
  22. datablade/io/zip.py +111 -0
  23. datablade/registry.py +581 -0
  24. datablade/sql/__init__.py +56 -0
  25. datablade/sql/bulk_load.py +665 -0
  26. datablade/sql/ddl.py +402 -0
  27. datablade/sql/ddl_pyarrow.py +411 -0
  28. datablade/sql/dialects.py +12 -0
  29. datablade/sql/quoting.py +44 -0
  30. datablade/sql/schema_spec.py +65 -0
  31. datablade/sql/sqlserver.py +390 -0
  32. datablade/utils/__init__.py +38 -0
  33. datablade/utils/lists.py +32 -0
  34. datablade/utils/logging.py +204 -0
  35. datablade/utils/messages.py +29 -0
  36. datablade/utils/strings.py +249 -0
  37. datablade-0.0.6.dist-info/METADATA +406 -0
  38. datablade-0.0.6.dist-info/RECORD +41 -0
  39. {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info}/WHEEL +1 -1
  40. {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info/licenses}/LICENSE +20 -20
  41. datablade-0.0.0.dist-info/METADATA +0 -13
  42. datablade-0.0.0.dist-info/RECORD +0 -13
  43. {datablade-0.0.0.dist-info → datablade-0.0.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,390 @@
1
+ """SQL Server-specific SQL helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import pathlib
7
+ from typing import Any, Iterable, Mapping, Optional, Sequence
8
+
9
+ from ..dataframes.readers import parquet_to_csv_partitions
10
+ from ..utils.logging import log_warning
11
+ from ..utils.strings import coerce_path
12
+ from .ddl import _qualify_name
13
+ from .ddl_pyarrow import generate_create_table_from_parquet
14
+ from .dialects import Dialect
15
+ from .quoting import quote_identifier
16
+
17
+
18
+ def sqlserver_openrowset_parquet(
19
+ parquet_path: str | os.PathLike,
20
+ *,
21
+ data_source: Optional[str] = None,
22
+ table_alias: str = "rows",
23
+ select_columns: Optional[Sequence[str]] = None,
24
+ where: Optional[str] = None,
25
+ top: Optional[int] = None,
26
+ ) -> str:
27
+ """Generate a SQL Server OPENROWSET query over Parquet files.
28
+
29
+ Args:
30
+ parquet_path: Path or wildcard to Parquet files (directory or pattern).
31
+ data_source: Optional external data source name.
32
+ table_alias: Alias for the OPENROWSET rowset.
33
+ select_columns: Optional list of columns/expressions to select.
34
+ where: Optional WHERE clause (without the WHERE keyword).
35
+ top: Optional TOP limit.
36
+ """
37
+ try:
38
+ path_value = os.fspath(parquet_path)
39
+ except TypeError as exc:
40
+ raise TypeError("parquet_path must be a string or pathlib.Path") from exc
41
+
42
+ if not isinstance(path_value, str):
43
+ raise TypeError("parquet_path must be a string or pathlib.Path")
44
+ if not path_value.strip():
45
+ raise ValueError("parquet_path must be provided")
46
+
47
+ if data_source is not None and (
48
+ not isinstance(data_source, str) or not data_source.strip()
49
+ ):
50
+ raise ValueError("data_source, if provided, must be a non-empty string")
51
+ if not isinstance(table_alias, str) or not table_alias.strip():
52
+ raise ValueError("table_alias must be a non-empty string")
53
+ if top is not None:
54
+ if not isinstance(top, int) or top <= 0:
55
+ raise ValueError("top must be a positive integer")
56
+
57
+ path_literal = path_value.replace("'", "''")
58
+ options = [f"BULK '{path_literal}'", "FORMAT = 'PARQUET'"]
59
+ if data_source:
60
+ options.append(
61
+ f"DATA_SOURCE = {quote_identifier(data_source, Dialect.SQLSERVER)}"
62
+ )
63
+
64
+ select_list = "*" if not select_columns else ", ".join(select_columns)
65
+ top_clause = f"TOP ({top}) " if top is not None else ""
66
+ alias = quote_identifier(table_alias, Dialect.SQLSERVER)
67
+
68
+ statement = (
69
+ f"SELECT {top_clause}{select_list}\n"
70
+ "FROM OPENROWSET(\n"
71
+ f" {', '.join(options)}\n"
72
+ f") AS {alias}"
73
+ )
74
+ if where:
75
+ statement = f"{statement}\nWHERE {where}"
76
+ return f"{statement};"
77
+
78
+
79
+ def sqlserver_bulk_insert_statements(
80
+ csv_files: Iterable[str | os.PathLike],
81
+ *,
82
+ table: str,
83
+ catalog: Optional[str] = None,
84
+ schema: Optional[str] = None,
85
+ first_row: int = 2,
86
+ field_terminator: str = ",",
87
+ row_terminator: str = "0x0a",
88
+ tablock: bool = True,
89
+ codepage: Optional[str] = None,
90
+ ) -> str:
91
+ """Generate BULK INSERT statements for CSV files."""
92
+ if not isinstance(table, str) or not table.strip():
93
+ raise ValueError("table must be a non-empty string")
94
+ if catalog is not None and (not isinstance(catalog, str) or not catalog.strip()):
95
+ raise ValueError("catalog, if provided, must be a non-empty string")
96
+ if schema is not None and (not isinstance(schema, str) or not schema.strip()):
97
+ raise ValueError("schema, if provided, must be a non-empty string")
98
+ if not isinstance(first_row, int) or first_row <= 0:
99
+ raise ValueError("first_row must be a positive integer")
100
+ if not isinstance(field_terminator, str) or not field_terminator:
101
+ raise ValueError("field_terminator must be a non-empty string")
102
+ if not isinstance(row_terminator, str) or not row_terminator:
103
+ raise ValueError("row_terminator must be a non-empty string")
104
+ if codepage is not None and (not isinstance(codepage, str) or not codepage.strip()):
105
+ raise ValueError("codepage, if provided, must be a non-empty string")
106
+
107
+ qualified_table = _qualify_name(catalog, schema, table, Dialect.SQLSERVER)
108
+ statements: list[str] = []
109
+ for file_path in csv_files:
110
+ try:
111
+ path_value = os.fspath(file_path)
112
+ except TypeError as exc:
113
+ raise TypeError(
114
+ "csv_files must contain strings or pathlib.Path values"
115
+ ) from exc
116
+ if not isinstance(path_value, str) or not path_value.strip():
117
+ raise ValueError("csv_files must contain non-empty paths")
118
+
119
+ path_literal = path_value.replace("'", "''")
120
+ options = [
121
+ f"FIRSTROW = {first_row}",
122
+ f"FIELDTERMINATOR = '{field_terminator}'",
123
+ f"ROWTERMINATOR = '{row_terminator}'",
124
+ ]
125
+ if tablock:
126
+ options.append("TABLOCK")
127
+ if codepage:
128
+ options.append(f"CODEPAGE = '{codepage}'")
129
+ options_sql = ", ".join(options)
130
+ statements.append(
131
+ f"BULK INSERT {qualified_table} FROM '{path_literal}' WITH ({options_sql});"
132
+ )
133
+
134
+ if not statements:
135
+ raise ValueError("csv_files must contain at least one path")
136
+
137
+ return "\n".join(statements)
138
+
139
+
140
+ def sqlserver_create_and_insert_from_parquet(
141
+ parquet_path: str | os.PathLike,
142
+ output_dir: str | os.PathLike,
143
+ *,
144
+ table: str,
145
+ catalog: Optional[str] = None,
146
+ schema: Optional[str] = None,
147
+ drop_existing: bool = True,
148
+ use_go: bool = False,
149
+ schema_spec: Optional[Mapping[str, Any]] = None,
150
+ rows_per_file: Optional[int] = None,
151
+ memory_fraction: float = 0.5,
152
+ convert_types: bool = True,
153
+ output_prefix: str = "part",
154
+ delimiter: str = ",",
155
+ include_header: bool = True,
156
+ line_terminator: str = "\n",
157
+ first_row: Optional[int] = None,
158
+ tablock: bool = True,
159
+ codepage: Optional[str] = None,
160
+ fallback_to_json: bool = False,
161
+ verbose: bool = False,
162
+ ) -> tuple[str, list[pathlib.Path]]:
163
+ """Create a SQL Server table from Parquet and generate CSV bulk insert SQL."""
164
+ import pyarrow.parquet as pq
165
+
166
+ path_obj = coerce_path(
167
+ parquet_path,
168
+ must_exist=True,
169
+ verbose=verbose,
170
+ label="parquet_path",
171
+ )
172
+ ddl, metadata = generate_create_table_from_parquet(
173
+ parquet_path=parquet_path,
174
+ catalog=catalog,
175
+ schema=schema,
176
+ table=table,
177
+ drop_existing=drop_existing,
178
+ use_go=use_go,
179
+ schema_spec=schema_spec,
180
+ dialect=Dialect.SQLSERVER,
181
+ verbose=verbose,
182
+ fallback_to_json=fallback_to_json,
183
+ return_metadata=True,
184
+ )
185
+
186
+ drop_columns: list[str] = []
187
+ if not fallback_to_json and metadata.dropped_columns:
188
+ drop_columns = [col.name for col in metadata.dropped_columns]
189
+
190
+ ref_schema = pq.ParquetFile(path_obj).schema_arrow
191
+ column_order = [
192
+ field.name for field in ref_schema if field.name not in drop_columns
193
+ ]
194
+
195
+ csv_files = parquet_to_csv_partitions(
196
+ file_path=parquet_path,
197
+ output_dir=output_dir,
198
+ output_prefix=output_prefix,
199
+ rows_per_file=rows_per_file,
200
+ memory_fraction=memory_fraction,
201
+ convert_types=convert_types,
202
+ verbose=verbose,
203
+ delimiter=delimiter,
204
+ include_header=include_header,
205
+ line_terminator=line_terminator,
206
+ drop_columns=drop_columns,
207
+ column_order=column_order,
208
+ drop_extra_columns=True,
209
+ )
210
+
211
+ if first_row is None:
212
+ first_row = 2 if include_header else 1
213
+
214
+ sql = sqlserver_bulk_insert_statements(
215
+ csv_files,
216
+ table=table,
217
+ catalog=catalog,
218
+ schema=schema,
219
+ first_row=first_row,
220
+ field_terminator=delimiter,
221
+ row_terminator=_sqlserver_row_terminator(line_terminator),
222
+ tablock=tablock,
223
+ codepage=codepage,
224
+ )
225
+
226
+ return f"{ddl}\n{sql}", csv_files
227
+
228
+
229
+ def sqlserver_create_and_stage_from_parquets(
230
+ parquet_paths: Sequence[str | os.PathLike],
231
+ output_dir: str | os.PathLike,
232
+ *,
233
+ table: str,
234
+ catalog: Optional[str] = None,
235
+ schema: Optional[str] = None,
236
+ drop_existing: bool = True,
237
+ use_go: bool = False,
238
+ schema_spec: Optional[Mapping[str, Any]] = None,
239
+ rows_per_file: Optional[int] = None,
240
+ memory_fraction: float = 0.5,
241
+ convert_types: bool = True,
242
+ output_prefix: str = "part",
243
+ delimiter: str = ",",
244
+ include_header: bool = True,
245
+ line_terminator: str = "\n",
246
+ fallback_to_json: bool = False,
247
+ schema_strict: bool = True,
248
+ verbose: bool = False,
249
+ ) -> tuple[str, list[pathlib.Path]]:
250
+ """Generate SQL Server DDL and stage multiple Parquet files as CSV partitions.
251
+
252
+ Returns the CREATE TABLE DDL (derived from the first Parquet file) and a list
253
+ of CSV files produced from all Parquet inputs. Use bulk_load_sqlserver_many()
254
+ to load the returned CSV files via BCP.
255
+
256
+ Schema drift guard:
257
+ - Missing columns (vs. first file) raise a ValueError.
258
+ - Extra columns or type mismatches raise when schema_strict=True.
259
+ - When schema_strict=False, extra columns are dropped and type mismatches
260
+ are logged as warnings.
261
+ """
262
+ import pyarrow.parquet as pq
263
+
264
+ if parquet_paths is None:
265
+ raise ValueError("parquet_paths must be provided")
266
+ if not isinstance(parquet_paths, (list, tuple)):
267
+ raise TypeError("parquet_paths must be a list or tuple of paths")
268
+ if not parquet_paths:
269
+ raise ValueError("parquet_paths must contain at least one path")
270
+
271
+ first_path = coerce_path(
272
+ parquet_paths[0],
273
+ must_exist=True,
274
+ verbose=verbose,
275
+ label="parquet_paths[0]",
276
+ )
277
+ if first_path.suffix.lower() != ".parquet":
278
+ raise ValueError("parquet_paths must point to .parquet files")
279
+
280
+ if not isinstance(schema_strict, bool):
281
+ raise TypeError("schema_strict must be a boolean")
282
+
283
+ ddl, metadata = generate_create_table_from_parquet(
284
+ parquet_path=first_path,
285
+ catalog=catalog,
286
+ schema=schema,
287
+ table=table,
288
+ drop_existing=drop_existing,
289
+ use_go=use_go,
290
+ schema_spec=schema_spec,
291
+ dialect=Dialect.SQLSERVER,
292
+ verbose=verbose,
293
+ fallback_to_json=fallback_to_json,
294
+ return_metadata=True,
295
+ )
296
+
297
+ drop_columns: list[str] = []
298
+ if not fallback_to_json and metadata.dropped_columns:
299
+ drop_columns = [col.name for col in metadata.dropped_columns]
300
+
301
+ ref_schema = pq.ParquetFile(first_path).schema_arrow
302
+ ref_columns = [field.name for field in ref_schema if field.name not in drop_columns]
303
+ ref_types = {
304
+ field.name: field.type for field in ref_schema if field.name not in drop_columns
305
+ }
306
+
307
+ csv_files: list[pathlib.Path] = []
308
+ for index, parquet_path in enumerate(parquet_paths):
309
+ path_obj = coerce_path(
310
+ parquet_path,
311
+ must_exist=True,
312
+ verbose=verbose,
313
+ label=f"parquet_paths[{index}]",
314
+ )
315
+ if path_obj.suffix.lower() != ".parquet":
316
+ raise ValueError("parquet_paths must point to .parquet files")
317
+
318
+ current_schema = pq.ParquetFile(path_obj).schema_arrow
319
+ current_columns = [
320
+ field.name for field in current_schema if field.name not in drop_columns
321
+ ]
322
+ current_types = {
323
+ field.name: field.type
324
+ for field in current_schema
325
+ if field.name not in drop_columns
326
+ }
327
+
328
+ missing = [c for c in ref_columns if c not in current_columns]
329
+ if missing:
330
+ raise ValueError(
331
+ f"Schema drift detected in {path_obj}: missing columns {missing}."
332
+ )
333
+
334
+ extra = [c for c in current_columns if c not in ref_columns]
335
+ if extra:
336
+ message = f"Schema drift detected in {path_obj}: extra columns {extra}."
337
+ if schema_strict:
338
+ raise ValueError(message)
339
+ log_warning(message, verbose)
340
+
341
+ type_mismatches = [
342
+ (col, ref_types[col], current_types[col])
343
+ for col in ref_columns
344
+ if col in current_types and current_types[col] != ref_types[col]
345
+ ]
346
+ if type_mismatches:
347
+ details = ", ".join(
348
+ f"{col} (expected {expected}, got {actual})"
349
+ for col, expected, actual in type_mismatches
350
+ )
351
+ message = f"Schema drift detected in {path_obj}: type mismatches {details}."
352
+ if schema_strict:
353
+ raise ValueError(message)
354
+ log_warning(message, verbose)
355
+
356
+ if current_columns != ref_columns:
357
+ log_warning(
358
+ f"Column order mismatch detected in {path_obj}; "
359
+ "reordering to match the reference schema.",
360
+ verbose,
361
+ )
362
+
363
+ prefix = f"{output_prefix}_{index:05d}"
364
+ csv_files.extend(
365
+ parquet_to_csv_partitions(
366
+ file_path=path_obj,
367
+ output_dir=output_dir,
368
+ output_prefix=prefix,
369
+ rows_per_file=rows_per_file,
370
+ memory_fraction=memory_fraction,
371
+ convert_types=convert_types,
372
+ verbose=verbose,
373
+ delimiter=delimiter,
374
+ include_header=include_header,
375
+ line_terminator=line_terminator,
376
+ drop_columns=drop_columns,
377
+ column_order=ref_columns,
378
+ drop_extra_columns=True,
379
+ )
380
+ )
381
+
382
+ return ddl, csv_files
383
+
384
+
385
+ def _sqlserver_row_terminator(line_terminator: str) -> str:
386
+ if line_terminator == "\n":
387
+ return "0x0a"
388
+ if line_terminator == "\r\n":
389
+ return "0x0d0a"
390
+ return line_terminator
@@ -0,0 +1,38 @@
1
+ """
2
+ General utility functions for common operations.
3
+
4
+ This module provides functions for:
5
+ - String manipulation and SQL name quoting
6
+ - List operations (flattening)
7
+ - Logging and messaging
8
+ - Path standardization
9
+ """
10
+
11
+ from .lists import flatten
12
+ from .logging import print_verbose # backward compatibility
13
+ from .logging import (
14
+ configure_logging,
15
+ get_logger,
16
+ log,
17
+ log_debug,
18
+ log_error,
19
+ log_info,
20
+ log_warning,
21
+ )
22
+ from .strings import configure_paths, pathing, sql_quotename
23
+
24
+ __all__ = [
25
+ "sql_quotename",
26
+ "pathing",
27
+ "configure_paths",
28
+ "flatten",
29
+ # Logging
30
+ "get_logger",
31
+ "configure_logging",
32
+ "log",
33
+ "log_debug",
34
+ "log_info",
35
+ "log_warning",
36
+ "log_error",
37
+ "print_verbose",
38
+ ]
@@ -0,0 +1,32 @@
1
+ """List helpers for common transformations."""
2
+
3
+ from typing import Any, List
4
+
5
+
6
+ def flatten(nest: List[Any]) -> List[Any]:
7
+ """
8
+ Flatten a nested list recursively to a single-level list.
9
+
10
+ Args:
11
+ nest: A potentially nested list structure.
12
+
13
+ Returns:
14
+ A flat list containing all elements from the nested structure.
15
+
16
+ Examples:
17
+ >>> flatten([1, [2, 3], [[4], 5]])
18
+ [1, 2, 3, 4, 5]
19
+ >>> flatten([1, 2, 3])
20
+ [1, 2, 3]
21
+ """
22
+ if not isinstance(nest, list):
23
+ raise TypeError("nest must be a list")
24
+
25
+ # Build a new list so the caller's input is untouched.
26
+ result = []
27
+ for item in nest:
28
+ if isinstance(item, list):
29
+ result.extend(flatten(item))
30
+ else:
31
+ result.append(item)
32
+ return result
@@ -0,0 +1,204 @@
1
+ """
2
+ Logging utilities for datablade.
3
+
4
+ Provides a configurable logger that can be used across all modules.
5
+ By default, logs to console at INFO level. Users can configure
6
+ handlers, levels, and formatters as needed.
7
+ """
8
+
9
+ import logging
10
+ import pathlib
11
+ import time
12
+ from contextlib import contextmanager
13
+ from typing import Any, Iterator, Mapping, Optional
14
+
15
+ # Create the datablade logger
16
+ _logger = logging.getLogger("datablade")
17
+ _logger.setLevel(logging.DEBUG) # Allow all levels; handlers control output
18
+
19
+ # Default console handler (can be replaced by user)
20
+ _default_handler: Optional[logging.Handler] = None
21
+
22
+
23
+ def _ensure_handler() -> None:
24
+ """Ensure at least one handler is configured."""
25
+ global _default_handler
26
+ if not _logger.handlers and _default_handler is None:
27
+ _default_handler = logging.StreamHandler()
28
+ _default_handler.setLevel(logging.INFO)
29
+ formatter = logging.Formatter(
30
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
31
+ )
32
+ _default_handler.setFormatter(formatter)
33
+ _logger.addHandler(_default_handler)
34
+
35
+
36
+ def get_logger() -> logging.Logger:
37
+ """
38
+ Get the datablade logger instance.
39
+
40
+ Returns:
41
+ The configured datablade logger.
42
+ """
43
+ _ensure_handler()
44
+ return _logger
45
+
46
+
47
+ def configure_logging(
48
+ level: int = logging.INFO,
49
+ handler: Optional[logging.Handler] = None,
50
+ format_string: Optional[str] = None,
51
+ *,
52
+ log_file: Optional[str | pathlib.Path] = None,
53
+ format: Optional[str] = None,
54
+ ) -> logging.Logger:
55
+ """
56
+ Configure the datablade logger.
57
+
58
+ Args:
59
+ level: Logging level (e.g., logging.DEBUG, logging.INFO).
60
+ handler: Optional custom handler. If None, uses StreamHandler.
61
+ format_string: Optional format string for log messages.
62
+
63
+ Returns:
64
+ The configured logger instance.
65
+ """
66
+ global _default_handler
67
+
68
+ if format is not None:
69
+ if format_string is not None:
70
+ raise ValueError("Provide only one of format_string or format")
71
+ format_string = format
72
+
73
+ # Remove existing handlers
74
+ for h in _logger.handlers[:]:
75
+ _logger.removeHandler(h)
76
+ _default_handler = None
77
+
78
+ # Add new handler
79
+ if handler is None:
80
+ if log_file is not None:
81
+ log_path = pathlib.Path(log_file)
82
+ if log_path.parent:
83
+ log_path.parent.mkdir(parents=True, exist_ok=True)
84
+ handler = logging.FileHandler(log_path, encoding="utf-8")
85
+ else:
86
+ handler = logging.StreamHandler()
87
+
88
+ handler.setLevel(level)
89
+
90
+ if format_string:
91
+ formatter = logging.Formatter(format_string)
92
+ else:
93
+ formatter = logging.Formatter(
94
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
95
+ )
96
+ handler.setFormatter(formatter)
97
+
98
+ _logger.addHandler(handler)
99
+ _default_handler = handler
100
+
101
+ return _logger
102
+
103
+
104
+ def log(
105
+ message: Any,
106
+ level: int = logging.INFO,
107
+ verbose: bool = True,
108
+ ) -> None:
109
+ """
110
+ Log a message at the specified level if verbose is True.
111
+
112
+ Args:
113
+ message: The message to log (converted to string).
114
+ level: Logging level (default: INFO).
115
+ verbose: If False, message is not logged.
116
+
117
+ Returns:
118
+ None
119
+ """
120
+ if not verbose:
121
+ return
122
+
123
+ _ensure_handler()
124
+ _logger.log(level, str(message))
125
+
126
+
127
+ def log_debug(message: Any, verbose: bool = True) -> None:
128
+ """Log a DEBUG level message."""
129
+ log(message, logging.DEBUG, verbose)
130
+
131
+
132
+ def log_info(message: Any, verbose: bool = True) -> None:
133
+ """Log an INFO level message."""
134
+ log(message, logging.INFO, verbose)
135
+
136
+
137
+ def log_warning(message: Any, verbose: bool = True) -> None:
138
+ """Log a WARNING level message."""
139
+ log(message, logging.WARNING, verbose)
140
+
141
+
142
+ def log_error(message: Any, verbose: bool = True) -> None:
143
+ """Log an ERROR level message."""
144
+ log(message, logging.ERROR, verbose)
145
+
146
+
147
+ def build_log_context(
148
+ *,
149
+ file_path: Optional[str | pathlib.Path] = None,
150
+ chunk_index: Optional[int] = None,
151
+ **fields: Any,
152
+ ) -> dict[str, Any]:
153
+ """Build a logging context dict with common fields like file and chunk."""
154
+ context = dict(fields)
155
+ if file_path is not None:
156
+ context.setdefault("file", pathlib.Path(file_path).name)
157
+ if chunk_index is not None:
158
+ context.setdefault("chunk", chunk_index)
159
+ return context
160
+
161
+
162
+ def format_log_context(context: Optional[Mapping[str, Any]]) -> str:
163
+ """Format a context mapping into a compact suffix for log messages."""
164
+ if not context:
165
+ return ""
166
+
167
+ parts: list[str] = []
168
+ for key, value in context.items():
169
+ if value is None:
170
+ continue
171
+ parts.append(f"{key}={value}")
172
+
173
+ if not parts:
174
+ return ""
175
+
176
+ return f" ({', '.join(parts)})"
177
+
178
+
179
+ @contextmanager
180
+ def timed_step(name: str, *, verbose: bool = True) -> Iterator[None]:
181
+ """Measure elapsed time for a block and log on exit."""
182
+ start = time.perf_counter()
183
+ try:
184
+ yield
185
+ finally:
186
+ duration = time.perf_counter() - start
187
+ log_info(f"{name} took {duration:.2f}s", verbose)
188
+
189
+
190
+ # Backward compatibility alias
191
+ def print_verbose(message: Any, verbose: bool = True) -> None:
192
+ """
193
+ Print a message if verbose is True.
194
+
195
+ This is a backward-compatible alias for log_info.
196
+
197
+ Args:
198
+ message: The message to print (converted to string).
199
+ verbose: If True, the message will be logged.
200
+
201
+ Returns:
202
+ None
203
+ """
204
+ log_info(message, verbose)
@@ -0,0 +1,29 @@
1
+ """
2
+ Messaging utilities for datablade.
3
+
4
+ This module provides backward-compatible message functions.
5
+ For new code, prefer using datablade.utils.logging directly.
6
+ """
7
+
8
+ # Re-export from logging module for backward compatibility
9
+ from .logging import (
10
+ configure_logging,
11
+ get_logger,
12
+ log,
13
+ log_debug,
14
+ log_error,
15
+ log_info,
16
+ log_warning,
17
+ print_verbose,
18
+ )
19
+
20
+ __all__ = [
21
+ "print_verbose",
22
+ "log",
23
+ "log_debug",
24
+ "log_info",
25
+ "log_warning",
26
+ "log_error",
27
+ "get_logger",
28
+ "configure_logging",
29
+ ]