PyPI - datablade - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

datablade 0.0.5py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

datablade/__init__.py +10 -2
datablade/blade.py +174 -5
datablade/dataframes/__init__.py +8 -0
datablade/dataframes/frames.py +127 -27
datablade/dataframes/readers.py +988 -161
datablade/docs/ARCHITECTURE.md +102 -0
datablade/docs/OBJECT_REGISTRY.md +194 -0
datablade/docs/README.md +57 -0
datablade/docs/TESTING.md +37 -0
datablade/docs/USAGE.md +409 -0
datablade/docs/__init__.py +87 -0
datablade/docs/__main__.py +6 -0
datablade/io/json.py +45 -8
datablade/io/zip.py +68 -30
datablade/registry.py +581 -0
datablade/sql/__init__.py +25 -1
datablade/sql/bulk_load.py +309 -49
datablade/sql/ddl.py +201 -26
datablade/sql/ddl_pyarrow.py +150 -26
datablade/sql/dialects.py +2 -0
datablade/sql/quoting.py +2 -0
datablade/sql/schema_spec.py +65 -0
datablade/sql/sqlserver.py +390 -0
datablade/utils/__init__.py +2 -1
datablade/utils/lists.py +3 -0
datablade/utils/logging.py +46 -1
datablade/utils/strings.py +180 -17
{datablade-0.0.5.dist-info → datablade-0.0.6.dist-info}/METADATA +68 -13
datablade-0.0.6.dist-info/RECORD +41 -0
{datablade-0.0.5.dist-info → datablade-0.0.6.dist-info}/WHEEL +1 -1
datablade-0.0.5.dist-info/RECORD +0 -31
{datablade-0.0.5.dist-info → datablade-0.0.6.dist-info}/licenses/LICENSE +0 -0
{datablade-0.0.5.dist-info → datablade-0.0.6.dist-info}/top_level.txt +0 -0

datablade/sql/ddl.py CHANGED Viewed

@@ -1,13 +1,117 @@
-from typing import Any, List, Optional
+"""Pandas-driven DDL generation for multiple SQL dialects."""
+from typing import Any, List, Mapping, Optional
 import pandas as pd
 from ..utils.messages import print_verbose
 from .dialects import Dialect
 from .quoting import quote_identifier
+from .schema_spec import resolve_column_spec, resolve_string_policy
+_VALID_PREFER_LENGTH = {"estimate", "minimum", "maximum"}
+def _coerce_positive_int(value: Any, label: str) -> Optional[int]:
+    if value is None:
+        return None
+    if isinstance(value, bool) or not isinstance(value, int) or value <= 0:
+        raise ValueError(f"{label} must be a positive integer")
+    return int(value)
+def _coerce_non_negative_int(value: Any, label: str) -> int:
+    if value is None:
+        return 0
+    if isinstance(value, bool) or not isinstance(value, int) or value < 0:
+        raise ValueError(f"{label} must be a non-negative integer")
+    return int(value)
+def _coerce_optional_bool(value: Any, label: str) -> Optional[bool]:
+    if value is None:
+        return None
+    if not isinstance(value, bool):
+        raise TypeError(f"{label} must be a boolean")
+    return value
+def _normalize_string_policy(policy: Optional[Mapping[str, Any]]) -> dict:
+    policy = {} if policy is None else dict(policy)
+    if "defined_pad" in policy and "pad" not in policy:
+        policy["pad"] = policy["defined_pad"]
+    prefer_length = policy.get("prefer_length", "estimate")
+    if prefer_length not in _VALID_PREFER_LENGTH:
+        raise ValueError(
+            "prefer_length must be one of 'estimate', 'minimum', or 'maximum'"
+        )
+    min_length = _coerce_positive_int(policy.get("min_length"), "min_length")
+    max_length = _coerce_positive_int(policy.get("max_length"), "max_length")
+    pad = _coerce_non_negative_int(policy.get("pad"), "pad")
+    empty_as_null = (
+        _coerce_optional_bool(policy.get("empty_as_null"), "empty_as_null") or False
+    )
+    allow_null = _coerce_optional_bool(policy.get("allow_null"), "allow_null")
-def _infer_sql_type(series: pd.Series, dialect: Dialect) -> str:  # noqa: C901
+    return {
+        "prefer_length": prefer_length,
+        "min_length": min_length,
+        "max_length": max_length,
+        "pad": pad,
+        "empty_as_null": empty_as_null,
+        "allow_null": allow_null,
+    }
+def _string_series_stats(series: pd.Series, empty_as_null: bool) -> tuple[int, bool]:
+    non_null = series.dropna()
+    if non_null.empty:
+        return 0, False
+    as_str = non_null.astype(str)
+    empty_mask = as_str == ""
+    any_empty = bool(empty_mask.any())
+    if empty_as_null:
+        as_str = as_str[~empty_mask]
+    if as_str.empty:
+        return 0, any_empty
+    lengths = as_str.map(len)
+    max_length = int(lengths.max()) if not lengths.empty else 0
+    return max_length, any_empty
+def _select_string_length(
+    max_length: int,
+    *,
+    prefer_length: str,
+    pad: int,
+    min_length: Optional[int],
+    max_length_bound: Optional[int],
+) -> int:
+    if prefer_length == "minimum" and min_length is not None:
+        length = min_length
+    elif prefer_length == "maximum" and max_length_bound is not None:
+        length = max_length_bound
+    else:
+        length = max_length + pad
+    if min_length is not None:
+        length = max(length, min_length)
+    if max_length_bound is not None:
+        length = min(length, max_length_bound)
+    return max(1, int(length))
+def _infer_sql_type(  # noqa: C901
+    series: pd.Series,
+    dialect: Dialect,
+    *,
+    string_policy: Optional[Mapping[str, Any]] = None,
+) -> str:
     """Infer a SQL column type for a pandas Series given a dialect."""
     dtype = series.dtype
@@ -25,6 +129,7 @@ def _infer_sql_type(series: pd.Series, dialect: Dialect) -> str:  # noqa: C901
         return bool(sample.map(_is_bytes_like).all())
     if dialect == Dialect.SQLSERVER:
+        # Use SQL Server's tiered integer sizes for best-fit types.
         if pd.api.types.is_integer_dtype(dtype):
             non_null = series.dropna()
             if non_null.empty:
@@ -47,19 +152,25 @@ def _infer_sql_type(series: pd.Series, dialect: Dialect) -> str:  # noqa: C901
         if _is_bytes_like_series(series):
             return "varbinary(max)"
         # strings / objects
-        non_null = series.dropna()
-        if non_null.empty:
-            max_length = 1
-        else:
-            lengths = non_null.astype(str).map(len)
-            max_length = int(lengths.max()) if not lengths.empty else 1
-        if pd.api.types.is_object_dtype(dtype) or isinstance(
-            dtype, pd.CategoricalDtype
+        policy = _normalize_string_policy(string_policy)
+        max_length, _ = _string_series_stats(series, policy["empty_as_null"])
+        max_length = _select_string_length(
+            max_length,
+            prefer_length=policy["prefer_length"],
+            pad=policy["pad"],
+            min_length=policy["min_length"],
+            max_length_bound=policy["max_length"],
+        )
+        if (
+            pd.api.types.is_object_dtype(dtype)
+            or pd.api.types.is_string_dtype(dtype)
+            or isinstance(dtype, pd.CategoricalDtype)
         ):
             return f"nvarchar({max_length if max_length <= 4000 else 'max'})"
         return "nvarchar(max)"
     if dialect == Dialect.POSTGRES:
+        # PostgreSQL integer sizes are narrower than SQL Server's tinyint.
         if pd.api.types.is_integer_dtype(dtype):
             non_null = series.dropna()
             if non_null.empty:
@@ -79,15 +190,19 @@ def _infer_sql_type(series: pd.Series, dialect: Dialect) -> str:  # noqa: C901
             return "timestamp"
         if _is_bytes_like_series(series):
             return "bytea"
-        non_null = series.dropna()
-        if non_null.empty:
-            max_length = 1
-        else:
-            lengths = non_null.astype(str).map(len)
-            max_length = int(lengths.max()) if not lengths.empty else 1
+        policy = _normalize_string_policy(string_policy)
+        max_length, _ = _string_series_stats(series, policy["empty_as_null"])
+        max_length = _select_string_length(
+            max_length,
+            prefer_length=policy["prefer_length"],
+            pad=policy["pad"],
+            min_length=policy["min_length"],
+            max_length_bound=policy["max_length"],
+        )
         return f"varchar({max_length})" if max_length <= 65535 else "text"
     if dialect == Dialect.MYSQL:
+        # Keep MySQL type names consistent with the existing DDL outputs.
         if pd.api.types.is_integer_dtype(dtype):
             non_null = series.dropna()
             if non_null.empty:
@@ -107,15 +222,19 @@ def _infer_sql_type(series: pd.Series, dialect: Dialect) -> str:  # noqa: C901
             return "DATETIME"
         if _is_bytes_like_series(series):
             return "LONGBLOB"
-        non_null = series.dropna()
-        if non_null.empty:
-            max_length = 1
-        else:
-            lengths = non_null.astype(str).map(len)
-            max_length = int(lengths.max()) if not lengths.empty else 1
+        policy = _normalize_string_policy(string_policy)
+        max_length, _ = _string_series_stats(series, policy["empty_as_null"])
+        max_length = _select_string_length(
+            max_length,
+            prefer_length=policy["prefer_length"],
+            pad=policy["pad"],
+            min_length=policy["min_length"],
+            max_length_bound=policy["max_length"],
+        )
         return f"VARCHAR({max_length})" if max_length <= 65535 else "TEXT"
     if dialect == Dialect.DUCKDB:
+        # DuckDB has simplified type names and distinguishes signed/unsigned.
         if pd.api.types.is_integer_dtype(dtype):
             return (
                 "BIGINT" if pd.api.types.is_signed_integer_dtype(dtype) else "UBIGINT"
@@ -136,6 +255,7 @@ def _infer_sql_type(series: pd.Series, dialect: Dialect) -> str:  # noqa: C901
 def _qualify_name(
     catalog: Optional[str], schema: Optional[str], table: str, dialect: Dialect
 ) -> str:
+    """Build a fully-qualified table name for the selected dialect."""
     if dialect == Dialect.SQLSERVER:
         # catalog and schema are both used when provided
         if catalog:
@@ -161,6 +281,8 @@ def generate_create_table(
     table: str = "table",
     drop_existing: bool = True,
     dialect: Dialect = Dialect.SQLSERVER,
+    use_go: bool = False,
+    schema_spec: Optional[Mapping[str, Any]] = None,
     verbose: bool = False,
 ) -> str:
     """
@@ -173,6 +295,9 @@ def generate_create_table(
         table: Target table name.
         drop_existing: If True, include a DROP TABLE IF EXISTS stanza.
         dialect: SQL dialect.
+        use_go: If True and dialect is SQL Server, insert a GO batch separator
+            after USE when a catalog is provided.
+        schema_spec: Optional schema overrides for column types and string sizing.
         verbose: If True, prints progress messages.
     Returns:
@@ -193,17 +318,62 @@ def generate_create_table(
         raise ValueError("catalog, if provided, must be a non-empty string")
     if schema is not None and (not isinstance(schema, str) or not schema.strip()):
         raise ValueError("schema, if provided, must be a non-empty string")
+    if not isinstance(use_go, bool):
+        raise TypeError("use_go must be a boolean")
     qualified_name = _qualify_name(catalog, schema, table, dialect)
     lines: List[str] = []
     for column in df.columns:
         series = df[column]
+        column_name = str(column)
+        defaults, column_spec = resolve_column_spec(column_name, schema_spec)
+        string_policy = resolve_string_policy(column_name, defaults, column_spec)
+        normalized_policy = _normalize_string_policy(string_policy)
         nullable = series.isnull().any()
-        sql_type = _infer_sql_type(series, dialect)
+        if normalized_policy["empty_as_null"] and (
+            pd.api.types.is_object_dtype(series.dtype)
+            or isinstance(series.dtype, pd.CategoricalDtype)
+            or pd.api.types.is_string_dtype(series.dtype)
+        ):
+            _, any_empty = _string_series_stats(series, True)
+            if any_empty:
+                nullable = True
+        nullable_override = _coerce_optional_bool(
+            column_spec.get("nullable"), "nullable"
+        )
+        if nullable_override is None:
+            nullable_override = _coerce_optional_bool(
+                column_spec.get("allow_null"), "allow_null"
+            )
+        if nullable_override is None:
+            nullable_override = normalized_policy["allow_null"]
+        if nullable_override is None:
+            nullable_override = _coerce_optional_bool(
+                defaults.get("nullable"), "defaults.nullable"
+            )
+        if nullable_override is None:
+            nullable_override = _coerce_optional_bool(
+                defaults.get("allow_null"), "defaults.allow_null"
+            )
+        if nullable_override is not None:
+            nullable = nullable_override
+        sql_type_override = column_spec.get("sql_type")
+        if sql_type_override is not None:
+            if not isinstance(sql_type_override, str) or not sql_type_override.strip():
+                raise ValueError(
+                    f"schema_spec.columns['{column_name}'].sql_type must be a non-empty string"
+                )
+            sql_type = sql_type_override.strip()
+        else:
+            sql_type = _infer_sql_type(series, dialect, string_policy=normalized_policy)
         null_str = "NULL" if nullable else "NOT NULL"
         lines.append(
-            f"    {quote_identifier(str(column), dialect)} {sql_type} {null_str}"
+            f"    {quote_identifier(column_name, dialect)} {sql_type} {null_str}"
         )
     body = ",\n".join(lines)
@@ -211,14 +381,19 @@ def generate_create_table(
     drop_clause = ""
     if drop_existing:
         if dialect == Dialect.SQLSERVER:
+            object_id_name = qualified_name.replace("'", "''")
             if catalog:
+                batch_sep = "GO\n" if use_go else ""
                 drop_clause = (
                     f"USE {quote_identifier(catalog, dialect)};\n"
-                    f"IF OBJECT_ID('{qualified_name}') IS NOT NULL "
+                    f"{batch_sep}IF OBJECT_ID('{object_id_name}') IS NOT NULL "
                     f"DROP TABLE {qualified_name};\n"
                 )
             else:
-                drop_clause = f"IF OBJECT_ID('{qualified_name}') IS NOT NULL DROP TABLE {qualified_name};\n"
+                drop_clause = (
+                    f"IF OBJECT_ID('{object_id_name}') IS NOT NULL "
+                    f"DROP TABLE {qualified_name};\n"
+                )
         else:
             drop_clause = f"DROP TABLE IF EXISTS {qualified_name};\n"

datablade/sql/ddl_pyarrow.py CHANGED Viewed

@@ -1,17 +1,50 @@
+"""Parquet schema-driven DDL generation using PyArrow."""
 from __future__ import annotations
 import logging
-from typing import List, Optional
+import pathlib
+from dataclasses import dataclass
+from typing import Any, List, Mapping, Optional, Union
 from ..utils.messages import print_verbose
+from ..utils.strings import coerce_path
 from .ddl import _qualify_name
 from .dialects import Dialect
 from .quoting import quote_identifier
+from .schema_spec import resolve_column_spec
 logger = logging.getLogger("datablade")
+@dataclass(frozen=True)
+class DroppedColumn:
+    """Metadata about a dropped column during Parquet DDL generation."""
+    name: str
+    arrow_type: str
+    reason: str
+@dataclass(frozen=True)
+class FallbackColumn:
+    """Metadata about a column handled via JSON fallback."""
+    name: str
+    arrow_type: str
+    sql_type: str
+@dataclass(frozen=True)
+class ParquetDDLMetadata:
+    """Details about columns dropped or handled via fallback."""
+    dropped_columns: List[DroppedColumn]
+    fallback_columns: List[FallbackColumn]
 def _require_pyarrow():
+    """Import pyarrow lazily to keep core dependencies light."""
     try:
         import pyarrow as pa  # type: ignore
         import pyarrow.parquet as pq  # type: ignore
@@ -23,6 +56,30 @@ def _require_pyarrow():
     return pa, pq
+def _is_complex_arrow_type(data_type) -> bool:
+    pa, _ = _require_pyarrow()
+    return (
+        pa.types.is_struct(data_type)
+        or pa.types.is_list(data_type)
+        or pa.types.is_large_list(data_type)
+        or pa.types.is_fixed_size_list(data_type)
+        or pa.types.is_map(data_type)
+        or pa.types.is_union(data_type)
+    )
+def _json_fallback_sql_type(dialect: Dialect) -> str:
+    if dialect == Dialect.SQLSERVER:
+        return "nvarchar(max)"
+    if dialect == Dialect.POSTGRES:
+        return "text"
+    if dialect == Dialect.MYSQL:
+        return "TEXT"
+    if dialect == Dialect.DUCKDB:
+        return "VARCHAR"
+    raise NotImplementedError(f"Dialect not supported: {dialect}")
 def _sql_type_from_arrow(data_type, dialect: Dialect) -> Optional[str]:  # noqa: C901
     """Map a pyarrow.DataType to a SQL type string.
@@ -153,8 +210,8 @@ def _sql_type_from_arrow(data_type, dialect: Dialect) -> Optional[str]:  # noqa:
         if pa.types.is_float64(data_type):
             return "DOUBLE"
         if pa.types.is_decimal(data_type):
-            precision = int(data_type.precision)
-            scale = int(data_type.scale)
+            precision = min(int(data_type.precision), 65)
+            scale = min(int(data_type.scale), 30, precision)
             return f"DECIMAL({precision}, {scale})"
         if pa.types.is_date(data_type):
             return "DATE"
@@ -204,56 +261,116 @@ def _sql_type_from_arrow(data_type, dialect: Dialect) -> Optional[str]:  # noqa:
 def generate_create_table_from_parquet(
-    parquet_path: str,
+    parquet_path: str | pathlib.Path,
     catalog: Optional[str] = None,
     schema: Optional[str] = None,
     table: str = "table",
     drop_existing: bool = True,
     dialect: Dialect = Dialect.SQLSERVER,
+    use_go: bool = False,
+    schema_spec: Optional[Mapping[str, Any]] = None,
     verbose: bool = False,
-) -> str:
+    fallback_to_json: bool = False,
+    return_metadata: bool = False,
+) -> Union[str, tuple[str, ParquetDDLMetadata]]:
     """Generate a CREATE TABLE statement from a Parquet file schema.
     This reads the Parquet schema only (via PyArrow) and does not materialize data.
     Columns whose Parquet types have no clean mapping for the chosen dialect are
-    dropped, and a warning is logged under logger name 'datablade'.
+    dropped, and a warning is logged under logger name 'datablade'. If
+    fallback_to_json is enabled, complex types are instead mapped to a text
+    column intended to store JSON-encoded values. Use return_metadata to receive
+    details about dropped and fallback-mapped columns.
+    When dialect is SQL Server and use_go is True, a GO batch separator is
+    inserted after a USE statement when a catalog is provided.
+    schema_spec may provide per-column sql_type/nullable overrides.
     """
-    if (
-        parquet_path is None
-        or not isinstance(parquet_path, str)
-        or not parquet_path.strip()
-    ):
-        raise ValueError("parquet_path must be a non-empty string")
+    path_obj = coerce_path(
+        parquet_path,
+        must_exist=True,
+        verbose=verbose,
+        label="parquet_path",
+    )
     if not isinstance(table, str) or not table.strip():
         raise ValueError("table must be a non-empty string")
     if catalog is not None and (not isinstance(catalog, str) or not catalog.strip()):
         raise ValueError("catalog, if provided, must be a non-empty string")
     if schema is not None and (not isinstance(schema, str) or not schema.strip()):
         raise ValueError("schema, if provided, must be a non-empty string")
+    if not isinstance(use_go, bool):
+        raise TypeError("use_go must be a boolean")
     _, pq = _require_pyarrow()
-    arrow_schema = pq.ParquetFile(parquet_path).schema_arrow
+    # Read Parquet metadata only; this does not load row data.
+    arrow_schema = pq.ParquetFile(path_obj).schema_arrow
     qualified_name = _qualify_name(catalog, schema, table, dialect)
     lines: List[str] = []
+    dropped_columns: List[DroppedColumn] = []
+    fallback_columns: List[FallbackColumn] = []
     for field in arrow_schema:
-        sql_type = _sql_type_from_arrow(field.type, dialect)
-        if sql_type is None:
-            logger.warning(
-                "Dropping Parquet column %r (type=%s) for dialect=%s: unsupported type",
-                field.name,
-                str(field.type),
-                dialect.value,
-            )
-            continue
+        column_name = str(field.name)
+        defaults, column_spec = resolve_column_spec(column_name, schema_spec)
+        sql_type_override = column_spec.get("sql_type")
+        if sql_type_override is not None:
+            if not isinstance(sql_type_override, str) or not sql_type_override.strip():
+                raise ValueError(
+                    f"schema_spec.columns['{column_name}'].sql_type must be a non-empty string"
+                )
+            sql_type = sql_type_override.strip()
+        else:
+            sql_type = _sql_type_from_arrow(field.type, dialect)
-        null_str = "NULL" if field.nullable else "NOT NULL"
+        if sql_type is None:
+            if fallback_to_json and _is_complex_arrow_type(field.type):
+                fallback_sql_type = _json_fallback_sql_type(dialect)
+                fallback_columns.append(
+                    FallbackColumn(
+                        name=str(field.name),
+                        arrow_type=str(field.type),
+                        sql_type=fallback_sql_type,
+                    )
+                )
+                sql_type = fallback_sql_type
+            else:
+                dropped_columns.append(
+                    DroppedColumn(
+                        name=str(field.name),
+                        arrow_type=str(field.type),
+                        reason="unsupported type",
+                    )
+                )
+                logger.warning(
+                    "Dropping Parquet column %r (type=%s) for dialect=%s: unsupported type",
+                    field.name,
+                    str(field.type),
+                    dialect.value,
+                )
+                continue
+        nullable = field.nullable
+        for label, value in (
+            ("nullable", column_spec.get("nullable")),
+            ("allow_null", column_spec.get("allow_null")),
+            ("defaults.nullable", defaults.get("nullable")),
+            ("defaults.allow_null", defaults.get("allow_null")),
+        ):
+            if value is None:
+                continue
+            if not isinstance(value, bool):
+                raise TypeError(f"{label} must be a boolean")
+            nullable = value
+            break
+        null_str = "NULL" if nullable else "NOT NULL"
         lines.append(
-            f"    {quote_identifier(str(field.name), dialect)} {sql_type} {null_str}"
+            f"    {quote_identifier(column_name, dialect)} {sql_type} {null_str}"
         )
     if not lines:
@@ -266,15 +383,17 @@ def generate_create_table_from_parquet(
     drop_clause = ""
     if drop_existing:
         if dialect == Dialect.SQLSERVER:
+            object_id_name = qualified_name.replace("'", "''")
             if catalog:
+                batch_sep = "GO\n" if use_go else ""
                 drop_clause = (
                     f"USE {quote_identifier(catalog, dialect)};\n"
-                    f"IF OBJECT_ID('{qualified_name}') IS NOT NULL "
+                    f"{batch_sep}IF OBJECT_ID('{object_id_name}') IS NOT NULL "
                     f"DROP TABLE {qualified_name};\n"
                 )
             else:
                 drop_clause = (
-                    f"IF OBJECT_ID('{qualified_name}') IS NOT NULL "
+                    f"IF OBJECT_ID('{object_id_name}') IS NOT NULL "
                     f"DROP TABLE {qualified_name};\n"
                 )
         else:
@@ -284,4 +403,9 @@ def generate_create_table_from_parquet(
     print_verbose(
         f"Generated CREATE TABLE from Parquet schema for {qualified_name}", verbose
     )
+    if return_metadata:
+        metadata = ParquetDDLMetadata(
+            dropped_columns=dropped_columns, fallback_columns=fallback_columns
+        )
+        return statement, metadata
     return statement

datablade/sql/dialects.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Enumeration of SQL dialects supported by datablade."""
 from enum import Enum

datablade/sql/quoting.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Identifier quoting for supported SQL dialects."""
 from typing import Optional
 from .dialects import Dialect

datablade/sql/schema_spec.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""Schema specification helpers for DDL generation."""
+from __future__ import annotations
+from collections.abc import Mapping
+from typing import Any, Optional, Tuple
+def _as_mapping(value: Any, label: str) -> dict:
+    if value is None:
+        return {}
+    if not isinstance(value, Mapping):
+        raise TypeError(f"{label} must be a mapping")
+    return dict(value)
+def resolve_schema_spec(
+    schema_spec: Optional[Mapping[str, Any]],
+) -> Tuple[dict, dict]:
+    """Return (defaults, columns) mappings for a schema spec."""
+    if schema_spec is None:
+        return {}, {}
+    if not isinstance(schema_spec, Mapping):
+        raise TypeError("schema_spec must be a mapping")
+    defaults = _as_mapping(schema_spec.get("defaults"), "schema_spec.defaults")
+    columns = _as_mapping(schema_spec.get("columns"), "schema_spec.columns")
+    return defaults, columns
+def resolve_column_spec(
+    column_name: str,
+    schema_spec: Optional[Mapping[str, Any]],
+) -> Tuple[dict, dict]:
+    """Return (defaults, column_spec) for a column name."""
+    defaults, columns = resolve_schema_spec(schema_spec)
+    if not columns:
+        return defaults, {}
+    column_spec = columns.get(column_name)
+    if column_spec is None:
+        column_spec = columns.get(str(column_name))
+    if column_spec is None:
+        return defaults, {}
+    if not isinstance(column_spec, Mapping):
+        raise TypeError(f"schema_spec.columns['{column_name}'] must be a mapping")
+    return defaults, dict(column_spec)
+def resolve_string_policy(
+    column_name: str,
+    defaults: dict,
+    column_spec: dict,
+) -> dict:
+    """Merge defaults + column string policy overrides."""
+    string_defaults = _as_mapping(defaults.get("string"), "schema_spec.defaults.string")
+    string_overrides = _as_mapping(
+        column_spec.get("string"),
+        f"schema_spec.columns['{column_name}'].string",
+    )
+    policy = {**string_defaults, **string_overrides}
+    if "defined_pad" in policy and "pad" not in policy:
+        policy["pad"] = policy["defined_pad"]
+    return policy

datablade 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

datablade 0.0.5py3-none-any.whl → 0.0.6py3-none-any.whl