PyPI - pointblank - Versions diffs - 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl - Mend

pointblank 0.9.2py3-none-any.whl → 0.9.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

pointblank/_typing.py CHANGED Viewed

@@ -1,26 +1,37 @@
 from __future__ import annotations
-from typing import TypeAlias
-## Absolute bounds, ie. plus or minus
-AbsoluteBounds: TypeAlias = tuple[int, int]
-## Relative bounds, ie. plus or minus some percent
-RelativeBounds: TypeAlias = tuple[float, float]
-## Tolerance afforded to some check
-Tolerance: TypeAlias = int | float | AbsoluteBounds | RelativeBounds
-## Types for data segmentation
-## Value(s) that can be used in a segment tuple
-SegmentValue: TypeAlias = str | list[str]
-## (column, value(s)) format for segments
-SegmentTuple: TypeAlias = tuple[str, SegmentValue]
-## Individual segment item (string or tuple)
-SegmentItem: TypeAlias = str | SegmentTuple
-## Full segment specification options
-SegmentSpec: TypeAlias = str | SegmentTuple | list[SegmentItem]
+import sys
+from typing import List, Tuple, Union
+# Check Python version for TypeAlias support
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+    # Python 3.10+ style type aliases
+    AbsoluteBounds: TypeAlias = Tuple[int, int]
+    RelativeBounds: TypeAlias = Tuple[float, float]
+    Tolerance: TypeAlias = Union[int, float, AbsoluteBounds, RelativeBounds]
+    SegmentValue: TypeAlias = Union[str, List[str]]
+    SegmentTuple: TypeAlias = Tuple[str, SegmentValue]
+    SegmentItem: TypeAlias = Union[str, SegmentTuple]
+    SegmentSpec: TypeAlias = Union[str, SegmentTuple, List[SegmentItem]]
+else:
+    # Python 3.8 and 3.9 compatible type aliases
+    AbsoluteBounds = Tuple[int, int]
+    RelativeBounds = Tuple[float, float]
+    Tolerance = Union[int, float, AbsoluteBounds, RelativeBounds]
+    SegmentValue = Union[str, List[str]]
+    SegmentTuple = Tuple[str, SegmentValue]
+    SegmentItem = Union[str, SegmentTuple]
+    SegmentSpec = Union[str, SegmentTuple, List[SegmentItem]]
+# Add docstrings for better IDE support
+AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
+RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
+Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
+SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
+SegmentTuple.__doc__ = "(column, value(s)) format for segments"
+SegmentItem.__doc__ = "Individual segment item (string or tuple)"
+SegmentSpec.__doc__ = (
+    "Full segment specification options (i.e., all options for segment specification)"
+)

pointblank/data/global_sales-duckdb.zip ADDED Viewed

Binary file

pointblank/data/global_sales.zip ADDED Viewed

Binary file

pointblank/validate.py CHANGED Viewed

@@ -385,7 +385,7 @@ def config(
 def load_dataset(
-    dataset: Literal["small_table", "game_revenue", "nycflights"] = "small_table",
+    dataset: Literal["small_table", "game_revenue", "nycflights", "global_sales"] = "small_table",
     tbl_type: Literal["polars", "pandas", "duckdb"] = "polars",
 ) -> FrameT | Any:
     """
@@ -401,7 +401,7 @@ def load_dataset(
     ----------
     dataset
         The name of the dataset to load. Current options are `"small_table"`, `"game_revenue"`,
-        and `"nycflights"`.
+        `"nycflights"`, and `"global_sales"`.
     tbl_type
         The type of table to generate from the dataset. The named options are `"polars"`,
         `"pandas"`, and `"duckdb"`.
@@ -423,6 +423,8 @@ def load_dataset(
     they purchased, ads viewed, and the revenue generated.
     - `"nycflights"`: A dataset with 336,776 rows and 18 columns. This dataset provides information
     about flights departing from New York City airports (JFK, LGA, or EWR) in 2013.
+    - `"global_sales"`: A dataset with 50,000 rows and 20 columns. Provides information about
+    global sales of products across different regions and countries.
     Supported DataFrame Types
     -------------------------
@@ -434,18 +436,18 @@ def load_dataset(
     Examples
     --------
-    Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with its
-    defaults:
+    Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
+    `dataset="small_table"` and `tbl_type="polars"`:
     ```{python}
     import pointblank as pb
-    small_table = pb.load_dataset()
+    small_table = pb.load_dataset(dataset="small_table", tbl_type="polars")
     pb.preview(small_table)
     ```
-    Note that the `"small_table"` dataset is a simple Polars DataFrame and using the
+    Note that the `"small_table"` dataset is a Polars DataFrame and using the
     [`preview()`](`pointblank.preview`) function will display the table in an HTML viewing
     environment.
@@ -473,10 +475,23 @@ def load_dataset(
     The `"nycflights"` dataset is a large dataset with 336,776 rows and 18 columns. This dataset is
     truly a real-world dataset and provides information about flights originating from New York City
     airports in 2013.
+    Finally, the `"global_sales"` dataset can be loaded as a Polars table by specifying the dataset
+    name. Since `tbl_type=` is set to `"polars"` by default, we don't need to specify it:
+    ```{python}
+    global_sales = pb.load_dataset(dataset="global_sales")
+    pb.preview(global_sales)
+    ```
+    The `"global_sales"` dataset is a large dataset with 50,000 rows and 20 columns. Each record
+    describes the sales of a particular product to a customer located in one of three global
+    regions: North America, Europe, or Asia.
     """
     # Raise an error if the dataset is from the list of provided datasets
-    if dataset not in ["small_table", "game_revenue", "nycflights"]:
+    if dataset not in ["small_table", "game_revenue", "nycflights", "global_sales"]:
         raise ValueError(
             f"The dataset name `{dataset}` is not valid. Choose one of the following:\n"
             "- `small_table`\n"
@@ -518,6 +533,7 @@ def load_dataset(
             "small_table": ["date_time", "date"],
             "game_revenue": ["session_start", "time", "start_day"],
             "nycflights": [],
+            "global_sales": ["timestamp"],
         }
         dataset = pd.read_csv(data_path, parse_dates=parse_date_columns[dataset])
@@ -8142,6 +8158,7 @@ class Validate:
             inclusive = validation.inclusive
             na_pass = validation.na_pass
             threshold = validation.thresholds
+            segment = validation.segments
             assertion_method = ASSERTION_TYPE_METHOD_MAP[assertion_type]
             assertion_category = METHOD_CATEGORY_MAP[assertion_method]
@@ -8149,7 +8166,14 @@ class Validate:
             # Process the `brief` text for the validation step by including template variables to
             # the user-supplied text
-            validation.brief = _process_brief(brief=validation.brief, step=validation.i, col=column)
+            validation.brief = _process_brief(
+                brief=validation.brief,
+                step=validation.i,
+                col=column,
+                values=value,
+                thresholds=threshold,
+                segment=segment,
+            )
             # Generate the autobrief description for the validation step; it's important to perform
             # that here since text components like the column and the value(s) have been resolved
@@ -11629,7 +11653,14 @@ def _string_date_dttm_conversion(value: any) -> any:
     return value
-def _process_brief(brief: str | None, step: int, col: str | list[str] | None) -> str:
+def _process_brief(
+    brief: str | None,
+    step: int,
+    col: str | list[str] | None,
+    values: any | None,
+    thresholds: any | None,
+    segment: any | None,
+) -> str:
     # If there is no brief, return `None`
     if brief is None:
         return None
@@ -11649,6 +11680,34 @@ def _process_brief(brief: str | None, step: int, col: str | list[str] | None) ->
         brief = brief.replace("{col}", col)
         brief = brief.replace("{column}", col)
+    if values is not None:
+        # If the value is a list, then join the values into a comma-separated string
+        if isinstance(values, list):
+            values = ", ".join([str(v) for v in values])
+        brief = brief.replace("{value}", str(values))
+    if thresholds is not None:
+        # Get the string representation of thresholds in the form of:
+        # "W: 0.20 / C: 0.40 / E: 1.00"
+        warning_val = thresholds._get_threshold_value(level="warning")
+        error_val = thresholds._get_threshold_value(level="error")
+        critical_val = thresholds._get_threshold_value(level="critical")
+        thresholds_fmt = f"W: {warning_val} / E: {error_val} / C: {critical_val}"
+        brief = brief.replace("{thresholds}", thresholds_fmt)
+    if segment is not None:
+        # The segment is always a tuple of the form ("{column}", "{value}")
+        segment_fmt = f"{segment[0]} / {segment[1]}"
+        brief = brief.replace("{segment}", segment_fmt)
+        brief = brief.replace("{segment_column}", segment[0])
+        brief = brief.replace("{segment_value}", segment[1])
     return brief

{pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pointblank
-Version: 0.9.2
+Version: 0.9.4
 Summary: Find out if your data is what you think it is.
 Author-email: Richard Iannone <riannone@me.com>
 License: MIT License

{pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ pointblank/_constants.py,sha256=D4HF0NrNAd-mdb88gZ6VatkRYfVX-9gC6C7TOQjjAw4,8112
 pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
 pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
 pointblank/_interrogation.py,sha256=U4GQ8Ik5rP75BYBkmunBvHKwf3XvLPHcUx18JwiBQZI,89422
-pointblank/_typing.py,sha256=ConITAbsFxU8CkNXY7l0Lua9hGofeDDJAWw-lGAIVgI,764
+pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
 pointblank/_utils.py,sha256=CsuUYXNzox-Nc5CjQNhyy2XnmnvYJVJrS5cZxklzIFo,24745
 pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
 pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
@@ -15,17 +15,19 @@ pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
 pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
 pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
 pointblank/thresholds.py,sha256=cweex25DwBPrsvPW12pRoaTQnwFpUUwqTdHyFJXTnN0,25760
-pointblank/validate.py,sha256=0LWCuex5DeNcoRoq0BppcKn1J-WaqCc3TYyQGWB-a2E,606287
+pointblank/validate.py,sha256=9dIWFetyBm70f_Ps0UkroT1gO4b5qACGs8trhObKUHg,608551
 pointblank/data/api-docs.txt,sha256=jKjPSq6X_vU_RRSJAydnVc3C35WvTqNvu-lLKroVO4I,482044
 pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
 pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
+pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
+pointblank/data/global_sales.zip,sha256=JeUnR1apKQ35PPwEcvTKCEIEiYeYQtoGmYjmzbz99DM,2138604
 pointblank/data/nycflights-duckdb.zip,sha256=GQrHO9tp7d9cNGFNSbA9EKF19MLf6t2wZE0U9-hIKow,5293077
 pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0mU,7828965
 pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
 pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
 pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
-pointblank-0.9.2.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
-pointblank-0.9.2.dist-info/METADATA,sha256=iUvV_QGj9ekzd3ddoPvT-HubBptqM7EIClXJ7HBs8-M,14732
-pointblank-0.9.2.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-pointblank-0.9.2.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
-pointblank-0.9.2.dist-info/RECORD,,
+pointblank-0.9.4.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
+pointblank-0.9.4.dist-info/METADATA,sha256=TO7kSRz1e8_lhuqkF6st8ompJq-I0i5mevVfsCiHumU,14732
+pointblank-0.9.4.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
+pointblank-0.9.4.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
+pointblank-0.9.4.dist-info/RECORD,,

{pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

pointblank 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

pointblank 0.9.2py3-none-any.whl → 0.9.4py3-none-any.whl