PyPI - pointblank - Versions diffs - 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl - Mend

pointblank 0.13.0py3-none-any.whl → 0.13.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

pointblank/__init__.py +0 -2
pointblank/_constants.py +2 -28
pointblank/_constants_translations.py +54 -0
pointblank/_interrogation.py +1483 -1735
pointblank/column.py +6 -2
pointblank/datascan.py +3 -2
pointblank/schema.py +155 -1
pointblank/validate.py +459 -222
{pointblank-0.13.0.dist-info → pointblank-0.13.2.dist-info}/METADATA +3 -2
{pointblank-0.13.0.dist-info → pointblank-0.13.2.dist-info}/RECORD +14 -15
pointblank/tf.py +0 -287
{pointblank-0.13.0.dist-info → pointblank-0.13.2.dist-info}/WHEEL +0 -0
{pointblank-0.13.0.dist-info → pointblank-0.13.2.dist-info}/entry_points.txt +0 -0
{pointblank-0.13.0.dist-info → pointblank-0.13.2.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.13.0.dist-info → pointblank-0.13.2.dist-info}/top_level.txt +0 -0

pointblank/column.py CHANGED Viewed

@@ -215,8 +215,12 @@ class ColumnSelectorNarwhals(Column):
         # Convert the native table to a Narwhals DataFrame
         dfn = nw.from_native(table)
         # Use the selector to select columns and return their names
-        columns = dfn.select(self.exprs.exprs).columns
-        return columns
+        selected_df = dfn.select(self.exprs.exprs)
+        # Use `collect_schema()` for LazyFrame to avoid performance warnings
+        if hasattr(selected_df, "collect_schema"):
+            return list(selected_df.collect_schema().keys())
+        else:
+            return list(selected_df.columns)
 def col(

pointblank/datascan.py CHANGED Viewed

@@ -162,14 +162,15 @@ class DataScan:
         self.profile: _DataProfile = self._generate_profile_df()
     def _generate_profile_df(self) -> _DataProfile:
-        columns: list[str] = self.nw_data.columns
+        # Get schema and extract all column names from it
+        schema: Mapping[str, Any] = self.nw_data.collect_schema()
+        columns: list[str] = list(schema.keys())
         profile = _DataProfile(
             table_name=self.tbl_name,
             columns=columns,
             implementation=self.nw_data.implementation,
         )
-        schema: Mapping[str, Any] = self.nw_data.schema
         for column in columns:
             col_data: DataFrame = self.nw_data.select(column)

pointblank/schema.py CHANGED Viewed

@@ -8,7 +8,7 @@ import narwhals as nw
 from pointblank._constants import IBIS_BACKENDS
 from pointblank._utils import _get_tbl_type, _is_lazy_frame, _is_lib_present, _is_narwhals_table
-__all__ = ["Schema"]
+__all__ = ["Schema", "_check_schema_match"]
 @dataclass
@@ -888,6 +888,80 @@ def _schema_info_generate_params_dict(
     }
+def _check_schema_match(
+    data_tbl: any,
+    schema: Schema,
+    complete: bool = True,
+    in_order: bool = True,
+    case_sensitive_colnames: bool = True,
+    case_sensitive_dtypes: bool = True,
+    full_match_dtypes: bool = True,
+) -> bool:
+    """
+    Check if the schema matches the target table.
+    This function performs schema validation and returns a boolean result.
+    Parameters
+    ----------
+    data_tbl
+        The target table to validate.
+    schema
+        The expected schema.
+    complete
+        Whether the schema should be complete.
+    in_order
+        Whether the schema should be in order.
+    case_sensitive_colnames
+        Whether column names are case-sensitive.
+    case_sensitive_dtypes
+        Whether data types are case-sensitive.
+    full_match_dtypes
+        Whether data types must match exactly.
+    Returns
+    -------
+    bool
+        True if the schema matches, False otherwise.
+    """
+    validation_info = _get_schema_validation_info(
+        data_tbl=data_tbl,
+        schema=schema,
+        passed=False,  # This will be determined by the logic below
+        complete=complete,
+        in_order=in_order,
+        case_sensitive_colnames=case_sensitive_colnames,
+        case_sensitive_dtypes=case_sensitive_dtypes,
+        full_match_dtypes=full_match_dtypes,
+    )
+    # Determine if the schema validation passed based on the validation info
+    passed = True
+    # Check completeness requirement
+    if complete and not validation_info["columns_full_set"]:
+        passed = False
+    # Check order requirement
+    if in_order and not validation_info["columns_matched_in_order"]:
+        passed = False
+    # Check if all expected columns were found
+    if validation_info["columns_not_found"]:
+        passed = False
+    # Check column-specific validations
+    for col_info in validation_info["columns"].values():
+        if not col_info["colname_matched"]:
+            passed = False
+        if not col_info.get(
+            "dtype_matched", True
+        ):  # dtype_matched may not exist if no dtypes specified
+            passed = False
+    return passed
 def _get_schema_validation_info(
     data_tbl: any,
     schema: Schema,
@@ -1181,3 +1255,83 @@ def _get_schema_validation_info(
     )
     return schema_info
+def _check_schema_match(
+    data_tbl,
+    schema,
+    complete: bool = True,
+    in_order: bool = True,
+    case_sensitive_colnames: bool = True,
+    case_sensitive_dtypes: bool = True,
+    full_match_dtypes: bool = True,
+):
+    """
+    Check if a column exists in a DataFrame or has a certain data type.
+    Parameters
+    ----------
+    data_tbl
+        A data table.
+    schema
+        A schema to check against.
+    complete
+        `True` to check if the schema is complete, `False` otherwise.
+    in_order
+        `True` to check if the schema is in order, `False` otherwise.
+    case_sensitive_colnames
+        `True` to perform column-name matching in a case-sensitive manner, `False` otherwise.
+    case_sensitive_dtypes
+        `True` to perform data-type matching in a case-sensitive manner, `False` otherwise.
+    full_match_dtypes
+        `True` to perform a full match of data types, `False` otherwise.
+    Returns
+    -------
+    bool
+        `True` when schema matches, `False` otherwise.
+    """
+    schema_expect = schema
+    schema_actual = Schema(tbl=data_tbl)
+    if complete and in_order:
+        # Check if the schema is complete and in order (most restrictive check)
+        # complete: True, in_order: True
+        res = schema_expect._compare_schema_columns_complete_in_order(
+            other=schema_actual,
+            case_sensitive_colnames=case_sensitive_colnames,
+            case_sensitive_dtypes=case_sensitive_dtypes,
+            full_match_dtypes=full_match_dtypes,
+        )
+    elif not complete and not in_order:
+        # Check if the schema is at least a subset, and, order of columns does not matter
+        # complete: False, in_order: False
+        res = schema_expect._compare_schema_columns_subset_any_order(
+            other=schema_actual,
+            case_sensitive_colnames=case_sensitive_colnames,
+            case_sensitive_dtypes=case_sensitive_dtypes,
+            full_match_dtypes=full_match_dtypes,
+        )
+    elif complete:
+        # Check if the schema is complete, but the order of columns does not matter
+        # complete: True, in_order: False
+        res = schema_expect._compare_schema_columns_complete_any_order(
+            other=schema_actual,
+            case_sensitive_colnames=case_sensitive_colnames,
+            case_sensitive_dtypes=case_sensitive_dtypes,
+            full_match_dtypes=full_match_dtypes,
+        )
+    else:
+        # Check if the schema is a subset (doesn't need to be complete) and in order
+        # complete: False, in_order: True
+        res = schema_expect._compare_schema_columns_subset_in_order(
+            other=schema_actual,
+            case_sensitive_colnames=case_sensitive_colnames,
+            case_sensitive_dtypes=case_sensitive_dtypes,
+            full_match_dtypes=full_match_dtypes,
+        )
+    return res

pointblank 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl

pointblank 0.13.0py3-none-any.whl → 0.13.2py3-none-any.whl