PyPI - pointblank - Versions diffs - 0.9.4__py3-none-any.whl → 0.9.5__py3-none-any.whl - Mend

pointblank 0.9.4py3-none-any.whl → 0.9.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

pointblank/data/api-docs.txt CHANGED Viewed

@@ -8837,7 +8837,7 @@ assistant(model: 'str', data: 'FrameT | Any | None' = None, tbl_name: 'str | Non
     Pandas DataFrame, the availability of Ibis is not needed.
-load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = 'small_table', tbl_type: "Literal['polars', 'pandas', 'duckdb']" = 'polars') -> 'FrameT | Any'
+load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights', 'global_sales']" = 'small_table', tbl_type: "Literal['polars', 'pandas', 'duckdb']" = 'polars') -> 'FrameT | Any'
     Load a dataset hosted in the library as specified table type.
@@ -8851,7 +8851,7 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
     ----------
     dataset
         The name of the dataset to load. Current options are `"small_table"`, `"game_revenue"`,
-        and `"nycflights"`.
+        `"nycflights"`, and `"global_sales"`.
     tbl_type
         The type of table to generate from the dataset. The named options are `"polars"`,
         `"pandas"`, and `"duckdb"`.
@@ -8873,6 +8873,8 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
     they purchased, ads viewed, and the revenue generated.
     - `"nycflights"`: A dataset with 336,776 rows and 18 columns. This dataset provides information
     about flights departing from New York City airports (JFK, LGA, or EWR) in 2013.
+    - `"global_sales"`: A dataset with 50,000 rows and 20 columns. Provides information about
+    global sales of products across different regions and countries.
     Supported DataFrame Types
     -------------------------
@@ -8884,10 +8886,10 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
     Examples
     --------
-    Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with its
-    defaults:
+    Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
+    `dataset="small_table"` and `tbl_type="polars"`:
-    Note that the `"small_table"` dataset is a simple Polars DataFrame and using the
+    Note that the `"small_table"` dataset is a Polars DataFrame and using the
     [`preview()`](`pointblank.preview`) function will display the table in an HTML viewing
     environment.
@@ -8915,6 +8917,19 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
     The `"nycflights"` dataset is a large dataset with 336,776 rows and 18 columns. This dataset is
     truly a real-world dataset and provides information about flights originating from New York City
     airports in 2013.
+    Finally, the `"global_sales"` dataset can be loaded as a Polars table by specifying the dataset
+    name. Since `tbl_type=` is set to `"polars"` by default, we don't need to specify it:
+    ```python
+    global_sales = pb.load_dataset(dataset="global_sales")
+    pb.preview(global_sales)
+    ```
+    The `"global_sales"` dataset is a large dataset with 50,000 rows and 20 columns. Each record
+    describes the sales of a particular product to a customer located in one of three global
+    regions: North America, Europe, or Asia.

pointblank/schema.py CHANGED Viewed

@@ -728,7 +728,14 @@ class Schema:
                 return new_schema
     def __str__(self):
-        return "Pointblank Schema\n" + "\n".join([f"  {col[0]}: {col[1]}" for col in self.columns])
+        formatted_columns = []
+        for col in self.columns:
+            if len(col) == 1:  # Only column name provided (no data type)
+                formatted_columns.append(f"  {col[0]}: <ANY>")
+            else:  # Both column name and data type provided
+                formatted_columns.append(f"  {col[0]}: {col[1]}")
+        return "Pointblank Schema\n" + "\n".join(formatted_columns)
     def __repr__(self):
         return f"Schema(columns={self.columns})"

pointblank/validate.py CHANGED Viewed

@@ -8031,7 +8031,7 @@ class Validate:
         After interrogation is complete, the `Validate` object will have gathered information, and
         we can use methods like [`n_passed()`](`pointblank.Validate.n_passed`),
-        [`f_failed()`](`pointblank.Validate.f_failed`)`, etc., to understand how the table performed
+        [`f_failed()`](`pointblank.Validate.f_failed`), etc., to understand how the table performed
         against the validation plan. A visual representation of the validation results can be viewed
         by printing the `Validate` object; this will display the validation table in an HTML viewing
         environment.
@@ -8772,6 +8772,10 @@ class Validate:
         assertion made is printed in the `AssertionError` message if a failure occurs, ensuring
         some details are preserved.
+        If the validation has not yet been interrogated, this method will automatically call
+        [`interrogate()`](`pointblank.Validate.interrogate`) with default parameters before checking
+        for passing tests.
         Raises
         -------
         AssertionError
@@ -8781,8 +8785,9 @@ class Validate:
         --------
         In the example below, we'll use a simple Polars DataFrame with three columns (`a`, `b`, and
         `c`). There will be three validation steps, and the second step will have a failing test
-        unit (the value `10` isn't less than `9`). After interrogation, the `assert_passing()`
-        method is used to assert that all validation steps passed perfectly.
+        unit (the value `10` isn't less than `9`). The `assert_passing()` method is used to assert
+        that all validation steps passed perfectly, automatically performing the interrogation if
+        needed.
         ```{python}
         #| error: True
@@ -8803,12 +8808,16 @@ class Validate:
             .col_vals_gt(columns="a", value=0)
             .col_vals_lt(columns="b", value=9) # this assertion is false
             .col_vals_in_set(columns="c", set=["a", "b"])
-            .interrogate()
         )
+        # No need to call [`interrogate()`](`pointblank.Validate.interrogate`) explicitly
         validation.assert_passing()
         ```
         """
+        # Check if validation has been interrogated
+        if not hasattr(self, "time_start") or self.time_start is None:
+            # Auto-interrogate with default parameters
+            self.interrogate()
         if not self.all_passed():
             failed_steps = [
@@ -8821,6 +8830,167 @@ class Validate:
             )
             raise AssertionError(msg)
+    def assert_below_threshold(
+        self, level: str = "warning", i: int = None, message: str = None
+    ) -> None:
+        """
+        Raise an `AssertionError` if validation steps exceed a specified threshold level.
+        The `assert_below_threshold()` method checks whether validation steps' failure rates are
+        below a given threshold level (`"warning"`, `"error"`, or `"critical"`). This is
+        particularly useful in automated testing environments where you want to ensure your data
+        quality meets minimum standards before proceeding.
+        If any validation step exceeds the specified threshold level, an `AssertionError` will be
+        raised with details about which steps failed. If the validation has not yet been
+        interrogated, this method will automatically call
+        [`interrogate()`](`pointblank.Validate.interrogate`) with default parameters.
+        Parameters
+        ----------
+        level
+            The threshold level to check against, which could be any of `"warning"` (the default),
+            `"error"`, or `"critical"`. An `AssertionError` will be raised if any validation step
+            exceeds this level.
+        i
+            Specific validation step number(s) to check. Can be provided as a single integer or a
+            list of integers. If `None` (the default), all steps are checked.
+        message
+            Custom error message to use if assertion fails. If `None`, a default message will be
+            generated that lists the specific steps that exceeded the threshold.
+        Returns
+        -------
+        None
+        Raises
+        ------
+        AssertionError
+            If any specified validation step exceeds the given threshold level.
+        ValueError
+            If an invalid threshold level is provided.
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
+        ```
+        Below are some examples of how to use the `assert_below_threshold()` method. First, we'll
+        create a simple Polars DataFrame with two columns (`a` and `b`).
+        ```{python}
+        import polars as pl
+        tbl = pl.DataFrame({
+            "a": [7, 4, 9, 7, 12],
+            "b": [9, 8, 10, 5, 10]
+        })
+        ```
+        Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
+        `critical=0.3`). After interrogating, we display the validation report table:
+        ```{python}
+        import pointblank as pb
+        validation = (
+            pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
+            .col_vals_gt(columns="a", value=5)   # 1 failing test unit
+            .col_vals_lt(columns="b", value=10)  # 2 failing test units
+            .interrogate()
+        )
+        validation
+        ```
+        Using `assert_below_threshold(level="warning")` will raise an `AssertionError` if any step
+        exceeds the 'warning' threshold:
+        ```{python}
+        try:
+            validation.assert_below_threshold(level="warning")
+        except AssertionError as e:
+            print(f"Assertion failed: {e}")
+        ```
+        Check a specific step against the 'critical' threshold using the `i=` parameter:
+        ```{python}
+        validation.assert_below_threshold(level="critical", i=1)  # Won't raise an error
+        ```
+        As the first step is below the 'critical' threshold (it exceeds the 'warning' and 'error'
+        thresholds), no error is raised and nothing is printed.
+        We can also provide a custom error message with the `message=` parameter. Let's try that
+        here:
+        ```{python}
+        try:
+            validation.assert_below_threshold(
+                level="error",
+                message="Data quality too low for processing!"
+            )
+        except AssertionError as e:
+            print(f"Custom error: {e}")
+        ```
+        See Also
+        --------
+        - [`warning()`](`pointblank.Validate.warning`): Get the 'warning' status for each validation
+        step
+        - [`error()`](`pointblank.Validate.error`): Get the 'error' status for each validation step
+        - [`critical()`](`pointblank.Validate.critical`): Get the 'critical' status for each
+        validation step
+        - [`assert_passing()`](`pointblank.Validate.assert_passing`): Assert all validations pass
+        completely
+        """
+        # Check if validation has been interrogated
+        if not hasattr(self, "time_start") or self.time_start is None:
+            # Auto-interrogate with default parameters
+            self.interrogate()
+        # Validate the level parameter
+        level = level.lower()
+        if level not in ["warning", "error", "critical"]:
+            raise ValueError(
+                f"Invalid threshold level: {level}. Must be one of 'warning', 'error', or 'critical'."
+            )
+        # Get the threshold status using the appropriate method
+        if level == "warning":
+            status = self.warning(i=i)
+        elif level == "error":
+            status = self.error(i=i)
+        elif level == "critical":
+            status = self.critical(i=i)
+        # Find any steps that exceeded the threshold
+        failures = []
+        for step_num, exceeded in status.items():
+            if exceeded:
+                # Get the step's description
+                validation_step = self.validation_info[step_num - 1]
+                step_descriptor = (
+                    validation_step.autobrief
+                    if hasattr(validation_step, "autobrief") and validation_step.autobrief
+                    else f"Validation step {step_num}"
+                )
+                failures.append(f"Step {step_num}: {step_descriptor}")
+        # If any failures were found, raise an AssertionError
+        if failures:
+            if message:
+                msg = message
+            else:
+                msg = f"The following steps exceeded the {level} threshold level:\n" + "\n".join(
+                    failures
+                )
+            raise AssertionError(msg)
     def n(self, i: int | list[int] | None = None, scalar: bool = False) -> dict[int, int] | int:
         """
         Provides a dictionary of the number of test units for each validation step.

{pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pointblank
-Version: 0.9.4
+Version: 0.9.5
 Summary: Find out if your data is what you think it is.
 Author-email: Richard Iannone <riannone@me.com>
 License: MIT License
@@ -103,7 +103,7 @@ _Data validation made beautiful and powerful_
 </div>
-<div align="right">
+<div align="center">
    <a href="translations/README.fr.md">Français</a> |
    <a href="translations/README.de.md">Deutsch</a> |
    <a href="translations/README.it.md">Italiano</a> |
@@ -112,7 +112,9 @@ _Data validation made beautiful and powerful_
    <a href="translations/README.nl.md">Nederlands</a> |
    <a href="translations/README.zh-CN.md">简体中文</a> |
    <a href="translations/README.ja.md">日本語</a> |
-   <a href="translations/README.ko.md">한국어</a>
+   <a href="translations/README.ko.md">한국어</a> |
+   <a href="translations/README.hi.md">हिन्दी</a> |
+   <a href="translations/README.ar.md">العربية</a>
 </div>
 ## What is Pointblank?

{pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/RECORD RENAMED Viewed

@@ -12,11 +12,11 @@ pointblank/assistant.py,sha256=ZIQJKTy9rDwq_Wmr1FMp0J7Q3ekxSgF3_tK0p4PTEUM,14850
 pointblank/column.py,sha256=LumGbnterw5VM7-2-7Za3jdlug1VVS9a3TOH0Y1E5eg,76548
 pointblank/datascan.py,sha256=rRz0hR81uTgd1e9OfLdfsNYXRk8vcpE8PW8exu-GJoE,47697
 pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
-pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
+pointblank/schema.py,sha256=nHkOXykPw7mTmVGjT67hjx13iKySZ5xsfVgPUQV0yCM,44588
 pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
 pointblank/thresholds.py,sha256=cweex25DwBPrsvPW12pRoaTQnwFpUUwqTdHyFJXTnN0,25760
-pointblank/validate.py,sha256=9dIWFetyBm70f_Ps0UkroT1gO4b5qACGs8trhObKUHg,608551
-pointblank/data/api-docs.txt,sha256=jKjPSq6X_vU_RRSJAydnVc3C35WvTqNvu-lLKroVO4I,482044
+pointblank/validate.py,sha256=DfTChQcLyaJFNLdjkG3jQAsY7GtLvTHSbxkzKusG9I4,615287
+pointblank/data/api-docs.txt,sha256=Sk2ePat_ngz3tAizQVSo7uG_fInv638HFLmM6041osM,482808
 pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
 pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
 pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
@@ -26,8 +26,8 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
 pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
 pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
 pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
-pointblank-0.9.4.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
-pointblank-0.9.4.dist-info/METADATA,sha256=TO7kSRz1e8_lhuqkF6st8ompJq-I0i5mevVfsCiHumU,14732
-pointblank-0.9.4.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-pointblank-0.9.4.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
-pointblank-0.9.4.dist-info/RECORD,,
+pointblank-0.9.5.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
+pointblank-0.9.5.dist-info/METADATA,sha256=8SHBgMHqrX9T2cMOfa_cQMDw60NbCmMB1xLgrwWk5vw,14857
+pointblank-0.9.5.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+pointblank-0.9.5.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
+pointblank-0.9.5.dist-info/RECORD,,

{pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.3.1)
+Generator: setuptools (80.7.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

pointblank 0.9.4__py3-none-any.whl → 0.9.5__py3-none-any.whl

pointblank 0.9.4py3-none-any.whl → 0.9.5py3-none-any.whl