PyPI - pointblank - Versions diffs - 0.13.4__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

pointblank 0.13.4py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

pointblank/__init__.py +4 -0
pointblank/_constants.py +117 -0
pointblank/_constants_translations.py +487 -2
pointblank/_interrogation.py +1065 -12
pointblank/_spec_utils.py +1015 -0
pointblank/_utils.py +17 -7
pointblank/_utils_ai.py +875 -0
pointblank/assistant.py +1 -1
pointblank/cli.py +128 -115
pointblank/column.py +1 -1
pointblank/data/api-docs.txt +1838 -130
pointblank/data/validations/README.md +108 -0
pointblank/data/validations/complex_preprocessing.json +54 -0
pointblank/data/validations/complex_preprocessing.pkl +0 -0
pointblank/data/validations/generate_test_files.py +127 -0
pointblank/data/validations/multiple_steps.json +83 -0
pointblank/data/validations/multiple_steps.pkl +0 -0
pointblank/data/validations/narwhals_function.json +28 -0
pointblank/data/validations/narwhals_function.pkl +0 -0
pointblank/data/validations/no_preprocessing.json +83 -0
pointblank/data/validations/no_preprocessing.pkl +0 -0
pointblank/data/validations/pandas_compatible.json +28 -0
pointblank/data/validations/pandas_compatible.pkl +0 -0
pointblank/data/validations/preprocessing_functions.py +46 -0
pointblank/data/validations/simple_preprocessing.json +57 -0
pointblank/data/validations/simple_preprocessing.pkl +0 -0
pointblank/datascan.py +4 -4
pointblank/draft.py +52 -3
pointblank/scan_profile.py +6 -6
pointblank/schema.py +8 -82
pointblank/thresholds.py +1 -1
pointblank/validate.py +3069 -437
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/METADATA +67 -8
pointblank-0.15.0.dist-info/RECORD +56 -0
pointblank-0.13.4.dist-info/RECORD +0 -39
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/WHEEL +0 -0
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/entry_points.txt +0 -0
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/top_level.txt +0 -0

pointblank/assistant.py CHANGED Viewed

@@ -55,7 +55,7 @@ def assistant(
     ----------
     model
         The model to be used. This should be in the form of `provider:model` (e.g.,
-        `"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
+        `"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
         `"ollama"`, and `"bedrock"`.
     data
         An optional data table to focus on during discussion with the PbA, which could be a

pointblank/cli.py CHANGED Viewed

@@ -295,6 +295,46 @@ def _format_dtype_compact(dtype_str: str) -> str:
         return dtype_str
+def _format_units(n: int) -> str:
+    """Format large numbers with K, M, B abbreviations for values above 10,000."""
+    if n is None:
+        return "—"
+    if n >= 1000000000:  # Billions
+        return f"{n / 1000000000:.1f}B"
+    elif n >= 1000000:  # Millions
+        return f"{n / 1000000:.1f}M"
+    elif n >= 10000:  # Use K for 10,000 and above
+        return f"{n / 1000:.0f}K"
+    else:
+        return str(n)
+def _format_pass_fail(passed: int, total: int) -> str:
+    """Format pass/fail counts with abbreviated numbers and fractions."""
+    if passed is None or total is None or total == 0:
+        return "—/—"
+    # Calculate fraction
+    fraction = passed / total
+    # Format fraction with special handling for very small and very large values
+    if fraction == 0.0:
+        fraction_str = "0.00"
+    elif fraction == 1.0:
+        fraction_str = "1.00"
+    elif fraction < 0.005:  # Less than 0.005 rounds to 0.00
+        fraction_str = "<0.01"
+    elif fraction > 0.995:  # Greater than 0.995 rounds to 1.00
+        fraction_str = ">0.99"
+    else:
+        fraction_str = f"{fraction:.2f}"
+    # Format absolute number with abbreviations
+    absolute_str = _format_units(passed)
+    return f"{absolute_str}/{fraction_str}"
 def _rich_print_scan_table(
     scan_result: Any,
     data_source: str,
@@ -314,7 +354,7 @@ def _rich_print_scan_table(
         total_rows: Total number of rows in the dataset
         total_columns: Total number of columns in the dataset
     """
-    try:
+    try:  # pragma: no cover
         import re
         import narwhals as nw
@@ -556,7 +596,7 @@ def _rich_print_scan_table(
         console.print()
         console.print(scan_table)
-    except Exception as e:
+    except Exception as e:  # pragma: no cover
         # Fallback to simple message if table creation fails
         console.print(f"[yellow]Scan results available for {data_source}[/yellow]")
         console.print(f"[red]Error displaying table: {str(e)}[/red]")
@@ -725,7 +765,7 @@ def _rich_print_gt_table(
                             # Create header with column name and data type
                             header_text = f"{display_col}\n[dim yellow]{dtype_display}[/dim yellow]"
                         else:
-                            header_text = display_col
+                            header_text = display_col  # pragma: no cover
                         rich_table.add_column(
                             header_text,
@@ -774,7 +814,7 @@ def _rich_print_gt_table(
                             ]
                             for row in data_dict
                         ]
-                elif hasattr(df, "to_dict"):
+                elif hasattr(df, "to_dict"):  # pragma: no cover
                     # Pandas-like interface
                     data_dict = df.to_dict("records")
                     if len(columns) > max_terminal_cols:
@@ -808,7 +848,7 @@ def _rich_print_gt_table(
                             ]
                             for row in data_dict
                         ]
-                elif hasattr(df, "iter_rows"):
+                elif hasattr(df, "iter_rows"):  # pragma: no cover
                     # Polars lazy frame
                     rows = [
                         [
@@ -822,7 +862,7 @@ def _rich_print_gt_table(
                         ]
                         for row in df.iter_rows()
                     ]
-                elif hasattr(df, "__iter__"):
+                elif hasattr(df, "__iter__"):  # pragma: no cover
                     # Try to iterate directly
                     rows = [
                         [
@@ -1031,51 +1071,13 @@ def _display_validation_summary(validation: Any) -> None:
                 steps_table.add_column("C", style="red")
                 steps_table.add_column("Ext", style="blue", justify="center")
-                def format_units(n: int) -> str:
-                    """Format large numbers with K, M, B abbreviations for values above 10,000."""
-                    if n is None:
-                        return "—"
-                    if n >= 1000000000:  # Billions
-                        return f"{n / 1000000000:.1f}B"
-                    elif n >= 1000000:  # Millions
-                        return f"{n / 1000000:.1f}M"
-                    elif n >= 10000:  # Use K for 10,000 and above
-                        return f"{n / 1000:.0f}K"
-                    else:
-                        return str(n)
-                def format_pass_fail(passed: int, total: int) -> str:
-                    """Format pass/fail counts with abbreviated numbers and fractions."""
-                    if passed is None or total is None or total == 0:
-                        return "—/—"
-                    # Calculate fraction
-                    fraction = passed / total
-                    # Format fraction with special handling for very small and very large values
-                    if fraction == 0.0:
-                        fraction_str = "0.00"
-                    elif fraction == 1.0:
-                        fraction_str = "1.00"
-                    elif fraction < 0.005:  # Less than 0.005 rounds to 0.00
-                        fraction_str = "<0.01"
-                    elif fraction > 0.995:  # Greater than 0.995 rounds to 1.00
-                        fraction_str = ">0.99"
-                    else:
-                        fraction_str = f"{fraction:.2f}"
-                    # Format absolute number with abbreviations
-                    absolute_str = format_units(passed)
-                    return f"{absolute_str}/{fraction_str}"
                 for step in info:
                     # Extract values information for the Values column
                     values_str = "—"  # Default to em dash if no values
                     # Handle different validation types
                     if step.assertion_type == "col_schema_match":
-                        values_str = "—"  # Schema is too complex to display inline
+                        values_str = "—"  # pragma: no cover
                     elif step.assertion_type == "col_vals_between":
                         # For between validations, try to get left and right bounds
                         if (
@@ -1090,37 +1092,42 @@ def _display_validation_summary(validation: Any) -> None:
                                 values_str = f"[{step.values[0]}, {step.values[1]}]"
                             else:
                                 values_str = str(step.values)
-                    elif step.assertion_type in ["row_count_match", "col_count_match"]:
+                    elif step.assertion_type in [
+                        "row_count_match",
+                        "col_count_match",
+                    ]:  # pragma: no cover
                         # For count match validations, extract the 'count' value from the dictionary
-                        if hasattr(step, "values") and step.values is not None:
-                            if isinstance(step.values, dict) and "count" in step.values:
-                                values_str = str(step.values["count"])
-                            else:
-                                values_str = str(step.values)
-                        else:
-                            values_str = "—"
+                        if hasattr(step, "values") and step.values is not None:  # pragma: no cover
+                            if (
+                                isinstance(step.values, dict) and "count" in step.values
+                            ):  # pragma: no cover
+                                values_str = str(step.values["count"])  # pragma: no cover
+                            else:  # pragma: no cover
+                                values_str = str(step.values)  # pragma: no cover
+                        else:  # pragma: no cover
+                            values_str = "—"  # pragma: no cover
                     elif step.assertion_type in ["col_vals_expr", "conjointly"]:
-                        values_str = "COLUMN EXPR"
+                        values_str = "COLUMN EXPR"  # pragma: no cover
                     elif step.assertion_type == "specially":
-                        values_str = "EXPR"
+                        values_str = "EXPR"  # pragma: no cover
                     elif hasattr(step, "values") and step.values is not None:
                         if isinstance(step.values, (list, tuple)):
                             if len(step.values) <= 3:
                                 values_str = ", ".join(str(v) for v in step.values)
-                            else:
-                                values_str = f"{', '.join(str(v) for v in step.values[:3])}..."
+                            else:  # pragma: no cover
+                                values_str = f"{', '.join(str(v) for v in step.values[:3])}..."  # pragma: no cover
                         else:
                             values_str = str(step.values)
                     elif hasattr(step, "value") and step.value is not None:
                         values_str = str(step.value)
-                    elif hasattr(step, "set") and step.set is not None:
-                        if isinstance(step.set, (list, tuple)):
-                            if len(step.set) <= 3:
-                                values_str = ", ".join(str(v) for v in step.set)
-                            else:
-                                values_str = f"{', '.join(str(v) for v in step.set[:3])}..."
-                        else:
-                            values_str = str(step.set)
+                    elif hasattr(step, "set") and step.set is not None:  # pragma: no cover
+                        if isinstance(step.set, (list, tuple)):  # pragma: no cover
+                            if len(step.set) <= 3:  # pragma: no cover
+                                values_str = ", ".join(str(v) for v in step.set)  # pragma: no cover
+                            else:  # pragma: no cover
+                                values_str = f"{', '.join(str(v) for v in step.set[:3])}..."  # pragma: no cover
+                        else:  # pragma: no cover
+                            values_str = str(step.set)  # pragma: no cover
                     # Determine threshold status for W, E, C columns
                     # Check if thresholds are set and whether they were exceeded
@@ -1132,10 +1139,10 @@ def _display_validation_summary(validation: Any) -> None:
                         and hasattr(step.thresholds, "warning")
                         and step.thresholds.warning is not None
                     ):
-                        w_status = (
-                            "[bright_black]●[/bright_black]"
-                            if step.warning
-                            else "[bright_black]○[/bright_black]"
+                        w_status = (  # pragma: no cover
+                            "[bright_black]●[/bright_black]"  # pragma: no cover
+                            if step.warning  # pragma: no cover
+                            else "[bright_black]○[/bright_black]"  # pragma: no cover
                         )
                     else:
                         w_status = "—"
@@ -1178,9 +1185,9 @@ def _display_validation_summary(validation: Any) -> None:
                         step.assertion_type,
                         str(step.column) if step.column else "—",
                         values_str,
-                        format_units(step.n),
-                        format_pass_fail(step.n_passed, step.n),
-                        format_pass_fail(step.n - step.n_passed, step.n),
+                        _format_units(step.n),
+                        _format_pass_fail(step.n_passed, step.n),
+                        _format_pass_fail(step.n - step.n_passed, step.n),
                         w_status,
                         e_status,
                         c_status,
@@ -1224,7 +1231,7 @@ def _display_validation_summary(validation: Any) -> None:
             console.print("[yellow]Validation object does not contain validation results.[/yellow]")
     except Exception as e:  # pragma: no cover
-        console.print(f"[red]Error displaying validation summary:[/red] {e}")
+        console.print(f"[red]Error displaying validation summary:[/red] {e}")  # pragma: no cover
         import traceback  # pragma: no cover
         console.print(f"[dim]{traceback.format_exc()}[/dim]")  # pragma: no cover
@@ -1372,24 +1379,26 @@ def preview(
         # Handle piped input
         if data_source is None:
-            if not sys.stdin.isatty():
+            if not sys.stdin.isatty():  # pragma: no cover
                 # Data is being piped in - read the file path from stdin
-                piped_input = sys.stdin.read().strip()
-                if piped_input:
-                    data_source = piped_input
+                piped_input = sys.stdin.read().strip()  # pragma: no cover
+                if piped_input:  # pragma: no cover
+                    data_source = piped_input  # pragma: no cover
                     # Determine the format from the file extension
-                    if piped_input.endswith(".parquet"):
-                        format_type = "Parquet"
-                    elif piped_input.endswith(".csv"):
-                        format_type = "CSV"
-                    else:
-                        format_type = "unknown"
+                    if piped_input.endswith(".parquet"):  # pragma: no cover
+                        format_type = "Parquet"  # pragma: no cover
+                    elif piped_input.endswith(".csv"):  # pragma: no cover
+                        format_type = "CSV"  # pragma: no cover
+                    else:  # pragma: no cover
+                        format_type = "unknown"  # pragma: no cover
-                    console.print(f"[dim]Using piped data source in {format_type} format.[/dim]")
-                else:
-                    console.print("[red]Error:[/red] No data provided via pipe")
-                    sys.exit(1)
+                    console.print(
+                        f"[dim]Using piped data source in {format_type} format.[/dim]"
+                    )  # pragma: no cover
+                else:  # pragma: no cover
+                    console.print("[red]Error:[/red] No data provided via pipe")  # pragma: no cover
+                    sys.exit(1)  # pragma: no cover
             else:
                 # Show concise help and exit
                 _show_concise_help("preview", None)
@@ -1742,24 +1751,26 @@ def missing(data_source: str | None, output_html: str | None):
         # Handle piped input
         if data_source is None:
-            if not sys.stdin.isatty():
+            if not sys.stdin.isatty():  # pragma: no cover
                 # Data is being piped in - read the file path from stdin
-                piped_input = sys.stdin.read().strip()
-                if piped_input:
-                    data_source = piped_input
+                piped_input = sys.stdin.read().strip()  # pragma: no cover
+                if piped_input:  # pragma: no cover
+                    data_source = piped_input  # pragma: no cover
                     # Determine the format from the file extension
-                    if piped_input.endswith(".parquet"):
-                        format_type = "Parquet"
-                    elif piped_input.endswith(".csv"):
-                        format_type = "CSV"
-                    else:
-                        format_type = "unknown"
+                    if piped_input.endswith(".parquet"):  # pragma: no cover
+                        format_type = "Parquet"  # pragma: no cover
+                    elif piped_input.endswith(".csv"):  # pragma: no cover
+                        format_type = "CSV"  # pragma: no cover
+                    else:  # pragma: no cover
+                        format_type = "unknown"  # pragma: no cover
-                    console.print(f"[dim]Using piped data source in {format_type} format.[/dim]")
-                else:
-                    console.print("[red]Error:[/red] No data provided via pipe")
-                    sys.exit(1)
+                    console.print(
+                        f"[dim]Using piped data source in {format_type} format.[/dim]"
+                    )  # pragma: no cover
+                else:  # pragma: no cover
+                    console.print("[red]Error:[/red] No data provided via pipe")  # pragma: no cover
+                    sys.exit(1)  # pragma: no cover
             else:
                 # Show concise help and exit
                 _show_concise_help("missing", None)
@@ -2027,24 +2038,26 @@ def validate(
         # or if we have piped input
         if data_source is None:
             # Check if we have piped input
-            if not sys.stdin.isatty():
+            if not sys.stdin.isatty():  # pragma: no cover
                 # Data is being piped in: read the file path from stdin
-                piped_input = sys.stdin.read().strip()
-                if piped_input:
-                    data_source = piped_input
+                piped_input = sys.stdin.read().strip()  # pragma: no cover
+                if piped_input:  # pragma: no cover
+                    data_source = piped_input  # pragma: no cover
                     # Determine the format from the file extension
-                    if piped_input.endswith(".parquet"):
-                        format_type = "Parquet"
-                    elif piped_input.endswith(".csv"):
-                        format_type = "CSV"
-                    else:
-                        format_type = "unknown"
+                    if piped_input.endswith(".parquet"):  # pragma: no cover
+                        format_type = "Parquet"  # pragma: no cover
+                    elif piped_input.endswith(".csv"):  # pragma: no cover
+                        format_type = "CSV"  # pragma: no cover
+                    else:  # pragma: no cover
+                        format_type = "unknown"  # pragma: no cover
-                    console.print(f"[dim]Using piped data source in {format_type} format.[/dim]")
-                else:
-                    console.print("[red]Error:[/red] No data provided via pipe")
-                    sys.exit(1)
+                    console.print(
+                        f"[dim]Using piped data source in {format_type} format.[/dim]"
+                    )  # pragma: no cover
+                else:  # pragma: no cover
+                    console.print("[red]Error:[/red] No data provided via pipe")  # pragma: no cover
+                    sys.exit(1)  # pragma: no cover
             else:
                 # Show concise help and exit
                 _show_concise_help("validate", None)

pointblank/column.py CHANGED Viewed

@@ -219,7 +219,7 @@ class ColumnSelectorNarwhals(Column):
         # Use `collect_schema()` for LazyFrame to avoid performance warnings
         if hasattr(selected_df, "collect_schema"):
             return list(selected_df.collect_schema().keys())
-        else:
+        else:  # pragma: no cover
             return list(selected_df.columns)

pointblank 0.13.4__py3-none-any.whl → 0.15.0__py3-none-any.whl

pointblank 0.13.4py3-none-any.whl → 0.15.0py3-none-any.whl