PyPI - pointblank - Versions diffs - 0.11.5__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pointblank 0.11.5py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

pointblank/__init__.py +2 -0
pointblank/_constants.py +0 -1
pointblank/_interrogation.py +181 -38
pointblank/_utils.py +29 -2
pointblank/assistant.py +9 -0
pointblank/cli.py +39 -24
pointblank/data/api-docs.txt +658 -29
pointblank/schema.py +17 -0
pointblank/segments.py +163 -0
pointblank/validate.py +320 -57
pointblank/yaml.py +162 -19
{pointblank-0.11.5.dist-info → pointblank-0.12.0.dist-info}/METADATA +58 -5
{pointblank-0.11.5.dist-info → pointblank-0.12.0.dist-info}/RECORD +17 -16
{pointblank-0.11.5.dist-info → pointblank-0.12.0.dist-info}/WHEEL +0 -0
{pointblank-0.11.5.dist-info → pointblank-0.12.0.dist-info}/entry_points.txt +0 -0
{pointblank-0.11.5.dist-info → pointblank-0.12.0.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.11.5.dist-info → pointblank-0.12.0.dist-info}/top_level.txt +0 -0

pointblank/__init__.py CHANGED Viewed

@@ -25,6 +25,7 @@ from pointblank.column import (
 from pointblank.datascan import DataScan, col_summary_tbl
 from pointblank.draft import DraftValidation
 from pointblank.schema import Schema
+from pointblank.segments import seg_group
 from pointblank.tf import TF
 from pointblank.thresholds import Actions, FinalActions, Thresholds
 from pointblank.validate import (
@@ -76,6 +77,7 @@ __all__ = [
     "get_validation_summary",
     "get_column_count",
     "get_row_count",
+    "seg_group",
     "send_slack_notification",
     # YAML functionality
     "yaml_interrogate",

pointblank/_constants.py CHANGED Viewed

@@ -118,7 +118,6 @@ IBIS_BACKENDS = [
     "mysql",
     "parquet",
     "postgres",
-    "pyspark",
     "snowflake",
     "sqlite",
 ]

pointblank/_interrogation.py CHANGED Viewed

@@ -23,6 +23,74 @@ if TYPE_CHECKING:
     from pointblank._typing import AbsoluteTolBounds
+def _safe_modify_datetime_compare_val(data_frame: Any, column: str, compare_val: Any) -> Any:
+    """
+    Safely modify datetime comparison values for LazyFrame compatibility.
+    This function handles the case where we can't directly slice LazyFrames
+    to get column dtypes for datetime conversion.
+    """
+    try:
+        # First try to get column dtype from schema for LazyFrames
+        column_dtype = None
+        if hasattr(data_frame, "collect_schema"):
+            schema = data_frame.collect_schema()
+            column_dtype = schema.get(column)
+        elif hasattr(data_frame, "schema"):
+            schema = data_frame.schema
+            column_dtype = schema.get(column)
+        # If we got a dtype from schema, use it
+        if column_dtype is not None:
+            # Create a mock column object for _modify_datetime_compare_val
+            class MockColumn:
+                def __init__(self, dtype):
+                    self.dtype = dtype
+            mock_column = MockColumn(column_dtype)
+            return _modify_datetime_compare_val(tgt_column=mock_column, compare_val=compare_val)
+        # Fallback: try collecting a small sample if possible
+        try:
+            sample = data_frame.head(1).collect()
+            if hasattr(sample, "dtypes") and column in sample.columns:
+                # For pandas-like dtypes
+                column_dtype = sample.dtypes[column] if hasattr(sample, "dtypes") else None
+                if column_dtype:
+                    class MockColumn:
+                        def __init__(self, dtype):
+                            self.dtype = dtype
+                    mock_column = MockColumn(column_dtype)
+                    return _modify_datetime_compare_val(
+                        tgt_column=mock_column, compare_val=compare_val
+                    )
+        except Exception:
+            pass
+        # Final fallback: try direct access (for eager DataFrames)
+        try:
+            if hasattr(data_frame, "dtypes") and column in data_frame.columns:
+                column_dtype = data_frame.dtypes[column]
+                class MockColumn:
+                    def __init__(self, dtype):
+                        self.dtype = dtype
+                mock_column = MockColumn(column_dtype)
+                return _modify_datetime_compare_val(tgt_column=mock_column, compare_val=compare_val)
+        except Exception:
+            pass
+    except Exception:
+        pass
+    # If all else fails, return the original compare_val
+    return compare_val
 @dataclass
 class Interrogator:
     """
@@ -136,9 +204,7 @@ class Interrogator:
         compare_expr = _get_compare_expr_nw(compare=self.compare)
-        compare_expr = _modify_datetime_compare_val(
-            tgt_column=self.x[self.column], compare_val=compare_expr
-        )
+        compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
         return (
             self.x.with_columns(
@@ -211,9 +277,7 @@ class Interrogator:
         compare_expr = _get_compare_expr_nw(compare=self.compare)
-        compare_expr = _modify_datetime_compare_val(
-            tgt_column=self.x[self.column], compare_val=compare_expr
-        )
+        compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
         return (
             self.x.with_columns(
@@ -329,9 +393,7 @@ class Interrogator:
         else:
             compare_expr = _get_compare_expr_nw(compare=self.compare)
-            compare_expr = _modify_datetime_compare_val(
-                tgt_column=self.x[self.column], compare_val=compare_expr
-            )
+            compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
             tbl = self.x.with_columns(
                 pb_is_good_1=nw.col(self.column).is_null() & self.na_pass,
@@ -421,9 +483,7 @@ class Interrogator:
                 ).to_native()
             else:
-                compare_expr = _modify_datetime_compare_val(
-                    tgt_column=self.x[self.column], compare_val=self.compare
-                )
+                compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, self.compare)
                 return self.x.with_columns(
                     pb_is_good_=nw.col(self.column) != nw.lit(compare_expr),
@@ -544,9 +604,7 @@ class Interrogator:
             if ref_col_has_null_vals:
                 # Create individual cases for Pandas and Polars
-                compare_expr = _modify_datetime_compare_val(
-                    tgt_column=self.x[self.column], compare_val=self.compare
-                )
+                compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, self.compare)
                 if is_pandas_dataframe(self.x.to_native()):
                     tbl = self.x.with_columns(
@@ -584,6 +642,25 @@ class Interrogator:
                     return tbl
+                else:
+                    # Generic case for other DataFrame types (PySpark, etc.)
+                    # Use similar logic to Polars but handle potential differences
+                    tbl = self.x.with_columns(
+                        pb_is_good_1=nw.col(self.column).is_null(),  # val is Null in Column
+                        pb_is_good_2=nw.lit(self.na_pass),  # Pass if any Null in val or compare
+                    )
+                    tbl = tbl.with_columns(pb_is_good_3=nw.col(self.column) != nw.lit(compare_expr))
+                    tbl = tbl.with_columns(
+                        pb_is_good_=(
+                            (nw.col("pb_is_good_1") & nw.col("pb_is_good_2"))
+                            | (nw.col("pb_is_good_3") & ~nw.col("pb_is_good_1"))
+                        )
+                    )
+                    return tbl.drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3").to_native()
     def ge(self) -> FrameT | Any:
         # Ibis backends ---------------------------------------------
@@ -629,9 +706,7 @@ class Interrogator:
         compare_expr = _get_compare_expr_nw(compare=self.compare)
-        compare_expr = _modify_datetime_compare_val(
-            tgt_column=self.x[self.column], compare_val=compare_expr
-        )
+        compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
         tbl = (
             self.x.with_columns(
@@ -702,9 +777,7 @@ class Interrogator:
         compare_expr = _get_compare_expr_nw(compare=self.compare)
-        compare_expr = _modify_datetime_compare_val(
-            tgt_column=self.x[self.column], compare_val=compare_expr
-        )
+        compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
         return (
             self.x.with_columns(
@@ -834,10 +907,8 @@ class Interrogator:
         low_val = _get_compare_expr_nw(compare=self.low)
         high_val = _get_compare_expr_nw(compare=self.high)
-        low_val = _modify_datetime_compare_val(tgt_column=self.x[self.column], compare_val=low_val)
-        high_val = _modify_datetime_compare_val(
-            tgt_column=self.x[self.column], compare_val=high_val
-        )
+        low_val = _safe_modify_datetime_compare_val(self.x, self.column, low_val)
+        high_val = _safe_modify_datetime_compare_val(self.x, self.column, high_val)
         tbl = self.x.with_columns(
             pb_is_good_1=nw.col(self.column).is_null(),  # val is Null in Column
@@ -1026,10 +1097,8 @@ class Interrogator:
         low_val = _get_compare_expr_nw(compare=self.low)
         high_val = _get_compare_expr_nw(compare=self.high)
-        low_val = _modify_datetime_compare_val(tgt_column=self.x[self.column], compare_val=low_val)
-        high_val = _modify_datetime_compare_val(
-            tgt_column=self.x[self.column], compare_val=high_val
-        )
+        low_val = _safe_modify_datetime_compare_val(self.x, self.column, low_val)
+        high_val = _safe_modify_datetime_compare_val(self.x, self.column, high_val)
         tbl = self.x.with_columns(
             pb_is_good_1=nw.col(self.column).is_null(),  # val is Null in Column
@@ -1209,14 +1278,15 @@ class Interrogator:
         else:
             columns_subset = self.columns_subset
-        # Create a subset of the table with only the columns of interest
-        subset_tbl = tbl.select(columns_subset)
+        # Create a count of duplicates using group_by approach like Ibis backend
+        # Group by the columns of interest and count occurrences
+        count_tbl = tbl.group_by(columns_subset).agg(nw.len().alias("pb_count_"))
-        # Check for duplicates in the subset table, creating a series of booleans
-        pb_is_good_series = subset_tbl.is_duplicated()
+        # Join back to original table to get count for each row
+        tbl = tbl.join(count_tbl, on=columns_subset, how="left")
-        # Add the series to the input table
-        tbl = tbl.with_columns(pb_is_good_=~pb_is_good_series)
+        # Passing rows will have the value `1` (no duplicates, so True), otherwise False applies
+        tbl = tbl.with_columns(pb_is_good_=nw.col("pb_count_") == 1).drop("pb_count_")
         return tbl.to_native()
@@ -2088,6 +2158,8 @@ class ConjointlyValidation:
             return self._get_pandas_results()
         elif "duckdb" in self.tbl_type or "ibis" in self.tbl_type:
             return self._get_ibis_results()
+        elif "pyspark" in self.tbl_type:
+            return self._get_pyspark_results()
         else:  # pragma: no cover
             raise NotImplementedError(f"Support for {self.tbl_type} is not yet implemented")
@@ -2247,6 +2319,53 @@ class ConjointlyValidation:
         results_tbl = self.data_tbl.mutate(pb_is_good_=ibis.literal(True))
         return results_tbl
+    def _get_pyspark_results(self):
+        """Process expressions for PySpark DataFrames."""
+        from pyspark.sql import functions as F
+        pyspark_columns = []
+        for expr_fn in self.expressions:
+            try:
+                # First try direct evaluation with PySpark DataFrame
+                expr_result = expr_fn(self.data_tbl)
+                # Check if it's a PySpark Column
+                if hasattr(expr_result, "_jc"):  # PySpark Column has _jc attribute
+                    pyspark_columns.append(expr_result)
+                else:
+                    raise TypeError(
+                        f"Expression returned {type(expr_result)}, expected PySpark Column"
+                    )
+            except Exception as e:
+                try:
+                    # Try as a ColumnExpression (for pb.expr_col style)
+                    col_expr = expr_fn(None)
+                    if hasattr(col_expr, "to_pyspark_expr"):
+                        # Convert to PySpark expression
+                        pyspark_expr = col_expr.to_pyspark_expr(self.data_tbl)
+                        pyspark_columns.append(pyspark_expr)
+                    else:
+                        raise TypeError(f"Cannot convert {type(col_expr)} to PySpark Column")
+                except Exception as nested_e:
+                    print(f"Error evaluating PySpark expression: {e} -> {nested_e}")
+        # Combine results with AND logic
+        if pyspark_columns:
+            final_result = pyspark_columns[0]
+            for col in pyspark_columns[1:]:
+                final_result = final_result & col
+            # Create results table with boolean column
+            results_tbl = self.data_tbl.withColumn("pb_is_good_", final_result)
+            return results_tbl
+        # Default case
+        results_tbl = self.data_tbl.withColumn("pb_is_good_", F.lit(True))
+        return results_tbl
 class SpeciallyValidation:
     def __init__(self, data_tbl, expression, threshold, tbl_type):
@@ -2359,13 +2478,22 @@ class NumberOfTestUnits:
     column: str
     def get_test_units(self, tbl_type: str) -> int:
-        if tbl_type == "pandas" or tbl_type == "polars":
+        if (
+            tbl_type == "pandas"
+            or tbl_type == "polars"
+            or tbl_type == "pyspark"
+            or tbl_type == "local"
+        ):
             # Convert the DataFrame to a format that narwhals can work with and:
             #  - check if the column exists
             dfn = _column_test_prep(
                 df=self.df, column=self.column, allowed_types=None, check_exists=False
             )
+            # Handle LazyFrames which don't have len()
+            if hasattr(dfn, "collect"):
+                dfn = dfn.collect()
             return len(dfn)
         if tbl_type in IBIS_BACKENDS:
@@ -2383,7 +2511,22 @@ def _get_compare_expr_nw(compare: Any) -> Any:
 def _column_has_null_values(table: FrameT, column: str) -> bool:
-    null_count = (table.select(column).null_count())[column][0]
+    try:
+        # Try the standard null_count() method
+        null_count = (table.select(column).null_count())[column][0]
+    except AttributeError:
+        # For LazyFrames, collect first then get null count
+        try:
+            collected = table.select(column).collect()
+            null_count = (collected.null_count())[column][0]
+        except Exception:
+            # Fallback: check if any values are null
+            try:
+                result = table.select(nw.col(column).is_null().sum().alias("null_count")).collect()
+                null_count = result["null_count"][0]
+            except Exception:
+                # Last resort: return False (assume no nulls)
+                return False
     if null_count is None or null_count == 0:
         return False
@@ -2414,7 +2557,7 @@ def _check_nulls_across_columns_nw(table, columns_subset):
     # Build the expression by combining each column's `is_null()` with OR operations
     null_expr = functools.reduce(
-        lambda acc, col: acc | table[col].is_null() if acc is not None else table[col].is_null(),
+        lambda acc, col: acc | nw.col(col).is_null() if acc is not None else nw.col(col).is_null(),
         column_names,
         None,
     )

pointblank/_utils.py CHANGED Viewed

@@ -66,11 +66,13 @@ def _get_tbl_type(data: FrameT | Any) -> str:
         except Exception as e:
             raise TypeError("The `data` object is not a DataFrame or Ibis Table.") from e
-        # Detect through regex if the table is a polars or pandas DataFrame
+        # Detect through regex if the table is a polars, pandas, or Spark DataFrame
         if re.search(r"polars", df_ns_str, re.IGNORECASE):
             return "polars"
         elif re.search(r"pandas", df_ns_str, re.IGNORECASE):
             return "pandas"
+        elif re.search(r"pyspark", df_ns_str, re.IGNORECASE):
+            return "pyspark"
     # If ibis is present, then get the table's backend name
     ibis_present = _is_lib_present(lib_name="ibis")
@@ -164,7 +166,7 @@ def _check_any_df_lib(method_used: str) -> None:
 def _is_value_a_df(value: Any) -> bool:
     try:
         ns = nw.get_native_namespace(value)
-        if "polars" in str(ns) or "pandas" in str(ns):
+        if "polars" in str(ns) or "pandas" in str(ns) or "pyspark" in str(ns):
             return True
         else:  # pragma: no cover
             return False
@@ -619,6 +621,10 @@ def _get_api_text() -> str:
         "expr_col",
     ]
+    segments_exported = [
+        "seg_group",
+    ]
     interrogation_exported = [
         "Validate.interrogate",
         "Validate.get_tabular_report",
@@ -648,6 +654,12 @@ def _get_api_text() -> str:
         "assistant",
         "load_dataset",
         "get_data_path",
+        "connect_to_table",
+    ]
+    yaml_exported = [
+        "yaml_interrogate",
+        "validate_yaml",
     ]
     utility_exported = [
@@ -679,6 +691,10 @@ many steps). Furthermore, the `col()` function can be used to declare a comparis
 for the `value=` argument in many `col_vals_*()` methods) when you can't use a fixed value
 for comparison."""
+    segments_desc = (
+        """Combine multiple values into a single segment using `seg_*()` helper functions."""
+    )
     interrogation_desc = """The validation plan is put into action when `interrogate()` is called.
 The workflow for performing a comprehensive validation is then: (1) `Validate()`, (2) adding
 validation steps, (3) `interrogate()`. After interrogation of the data, we can view a validation
@@ -694,6 +710,11 @@ datasets included in the package can be accessed via the `load_dataset()` functi
 `config()` utility lets us set global configuration parameters. Want to chat with an assistant? Use
 the `assistant()` function to get help with Pointblank."""
+    yaml_desc = """The *YAML* group contains functions that allow for the use of YAML to orchestrate
+validation workflows. The `yaml_interrogate()` function can be used to run a validation workflow from
+YAML strings or files. The `validate_yaml()` function checks if the YAML configuration
+passes its own validity checks."""
     utility_desc = """The Utility Functions group contains functions that are useful for accessing
 metadata about the target data. Use `get_column_count()` or `get_row_count()` to get the number of
 columns or rows in a table. The `get_action_metadata()` function is useful when building custom
@@ -718,12 +739,18 @@ table information, and timing details."""
     api_text += f"""\n## The Column Selection family\n\n{column_selection_desc}\n\n"""
     api_text += get_api_details(module=pointblank, exported_list=column_selection_exported)
+    api_text += f"""\n## The Segments family\n\n{segments_desc}\n\n"""
+    api_text += get_api_details(module=pointblank, exported_list=segments_exported)
     api_text += f"""\n## The Interrogation and Reporting family\n\n{interrogation_desc}\n\n"""
     api_text += get_api_details(module=pointblank, exported_list=interrogation_exported)
     api_text += f"""\n## The Inspection and Assistance family\n\n{inspect_desc}\n\n"""
     api_text += get_api_details(module=pointblank, exported_list=inspect_exported)
+    api_text += f"""\n## The YAML family\n\n{yaml_desc}\n\n"""
+    api_text += get_api_details(module=pointblank, exported_list=yaml_exported)
     api_text += f"""\n## The Utility Functions family\n\n{utility_desc}\n\n"""
     api_text += get_api_details(module=pointblank, exported_list=utility_exported)

pointblank/assistant.py CHANGED Viewed

@@ -138,10 +138,15 @@ def assistant(
     - Polars DataFrame (`"polars"`)
     - Pandas DataFrame (`"pandas"`)
+    - PySpark table (`"pyspark"`)
     - DuckDB table (`"duckdb"`)*
     - MySQL table (`"mysql"`)*
     - PostgreSQL table (`"postgresql"`)*
     - SQLite table (`"sqlite"`)*
+    - Microsoft SQL Server table (`"mssql"`)*
+    - Snowflake table (`"snowflake"`)*
+    - Databricks table (`"databricks"`)*
+    - BigQuery table (`"bigquery"`)*
     - Parquet table (`"parquet"`)*
     - CSV files (string path or `pathlib.Path` object with `.csv` extension)
     - Parquet files (string path, `pathlib.Path` object, glob pattern, directory with `.parquet`
@@ -152,6 +157,10 @@ def assistant(
     `ibis.expr.types.relations.Table`). Furthermore, using `assistant()` with these types of tables
     requires the Ibis library (`v9.5.0` or above) to be installed. If the input table is a Polars or
     Pandas DataFrame, the availability of Ibis is not needed.
+    To use a CSV file, ensure that a string or `pathlib.Path` object with a `.csv` extension is
+    provided. The file will be automatically detected and loaded using the best available DataFrame
+    library. The loading preference is Polars first, then Pandas as a fallback.
     """
     # Check that the chatlas package is installed

pointblank/cli.py CHANGED Viewed

@@ -1360,10 +1360,10 @@ def preview(
     For tables with many columns, use these options to control which columns are displayed:
     \b
-    - --columns: Specify exact columns (e.g., --columns "name,age,email")
-    - --col-range: Select column range (e.g., --col-range "1:10", --col-range "5:", --col-range ":15")
-    - --col-first: Show first N columns (e.g., --col-first 5)
-    - --col-last: Show last N columns (e.g., --col-last 3)
+    - --columns: Specify exact columns (--columns "name,age,email")
+    - --col-range: Select column range (--col-range "1:10", --col-range "5:", --col-range ":15")
+    - --col-first: Show first N columns (--col-first 5)
+    - --col-last: Show last N columns (--col-last 3)
     Tables with >15 columns automatically show first 7 and last 7 columns with indicators.
     """
@@ -1920,31 +1920,43 @@ def validate(
     AVAILABLE CHECK_TYPES:
-    Use --list-checks to see all available validation methods with examples.
-    The default CHECK_TYPE is 'rows-distinct' which checks for duplicate rows.
+    Require no additional options:
     \b
     - rows-distinct: Check if all rows in the dataset are unique (no duplicates)
     - rows-complete: Check if all rows are complete (no missing values in any column)
-    - col-exists: Check if a specific column exists in the dataset (requires --column)
-    - col-vals-not-null: Check if all values in a column are not null/missing (requires --column)
-    - col-vals-gt: Check if all values in a column are greater than a comparison value (requires --column and --value)
-    - col-vals-ge: Check if all values in a column are greater than or equal to a comparison value (requires --column and --value)
-    - col-vals-lt: Check if all values in a column are less than a comparison value (requires --column and --value)
-    - col-vals-le: Check if all values in a column are less than or equal to a comparison value (requires --column and --value)
-    - col-vals-in-set: Check if all values in a column are in an allowed set (requires --column and --set)
+    Require --column:
+    \b
+    - col-exists: Check if a specific column exists in the dataset
+    - col-vals-not-null: Check if all values in a column are not null/missing
+    Require --column and --value:
+    \b
+    - col-vals-gt: Check if column values are greater than a fixed value
+    - col-vals-ge: Check if column values are greater than or equal to a fixed value
+    - col-vals-lt: Check if column values are less than a fixed value
+    - col-vals-le: Check if column values are less than or equal to a fixed value
+    Require --column and --set:
+    \b
+    - col-vals-in-set: Check if column values are in an allowed set
+    Use --list-checks to see all available validation methods with examples. The default CHECK_TYPE
+    is 'rows-distinct' which checks for duplicate rows.
     Examples:
     \b
-    pb validate data.csv                                             # Uses default validation (rows-distinct)
-    pb validate data.csv --list-checks                               # Show all available checks
+    pb validate data.csv                               # Uses default validation (rows-distinct)
+    pb validate data.csv --list-checks                 # Show all available checks
     pb validate data.csv --check rows-distinct
     pb validate data.csv --check rows-distinct --show-extract
     pb validate data.csv --check rows-distinct --write-extract failing_rows_folder
     pb validate data.csv --check rows-distinct --exit-code
-    pb validate data.csv --check rows-complete
     pb validate data.csv --check col-exists --column price
     pb validate data.csv --check col-vals-not-null --column email
     pb validate data.csv --check col-vals-gt --column score --value 50
@@ -1952,7 +1964,6 @@ def validate(
     Multiple validations in one command:
     pb validate data.csv --check rows-distinct --check rows-complete
-    pb validate data.csv --check col-vals-not-null --column email --check col-vals-gt --column age --value 18
     """
     try:
         import sys
@@ -4627,36 +4638,40 @@ def pl(
     pb pl "pl.read_csv('data.csv').select(['name', 'age'])"
     pb pl "pl.read_csv('data.csv').filter(pl.col('age') > 25)"
+    \b
     # Multi-line with editor (supports multiple statements)
     pb pl --edit
+    \b
     # Multi-statement code example in editor:
     # csv = pl.read_csv('data.csv')
     # result = csv.select(['name', 'age']).filter(pl.col('age') > 25)
+    \b
     # Multi-line with a specific editor
     pb pl --edit --editor nano
     pb pl --edit --editor code
     pb pl --edit --editor micro
+    \b
     # From file
     pb pl --file query.py
-    # Piping to other pb commands
-    pb pl "pl.read_csv('data.csv').filter(pl.col('age') > 25)" --pipe | pb validate --check rows-distinct
+    \b
+    Piping to other pb commands
+    pb pl "pl.read_csv('data.csv').head(20)" --pipe | pb validate --check rows-distinct
     pb pl --edit --pipe | pb preview --head 10
     pb pl --edit --pipe | pb scan --output-html report.html
     pb pl --edit --pipe | pb missing --output-html missing_report.html
-    Use --output-format to change how results are displayed:
     \b
+    Use --output-format to change how results are displayed:
     pb pl "pl.read_csv('data.csv')" --output-format scan
     pb pl "pl.read_csv('data.csv')" --output-format missing
     pb pl "pl.read_csv('data.csv')" --output-format info
-    Note: For multi-statement code, assign your final result to a variable like
-    'result', 'df', 'data', or ensure it's the last expression.
+    Note: For multi-statement code, assign your final result to a variable like 'result', 'df',
+    'data', or ensure it's the last expression.
     """
     try:
         # Check if Polars is available

pointblank 0.11.5__py3-none-any.whl → 0.12.0__py3-none-any.whl

pointblank 0.11.5py3-none-any.whl → 0.12.0py3-none-any.whl