pointblank 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/_typing.py CHANGED
@@ -1,26 +1,37 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TypeAlias
4
-
5
- ## Absolute bounds, ie. plus or minus
6
- AbsoluteBounds: TypeAlias = tuple[int, int]
7
-
8
- ## Relative bounds, ie. plus or minus some percent
9
- RelativeBounds: TypeAlias = tuple[float, float]
10
-
11
- ## Tolerance afforded to some check
12
- Tolerance: TypeAlias = int | float | AbsoluteBounds | RelativeBounds
13
-
14
- ## Types for data segmentation
15
-
16
- ## Value(s) that can be used in a segment tuple
17
- SegmentValue: TypeAlias = str | list[str]
18
-
19
- ## (column, value(s)) format for segments
20
- SegmentTuple: TypeAlias = tuple[str, SegmentValue]
21
-
22
- ## Individual segment item (string or tuple)
23
- SegmentItem: TypeAlias = str | SegmentTuple
24
-
25
- ## Full segment specification options
26
- SegmentSpec: TypeAlias = str | SegmentTuple | list[SegmentItem]
3
+ import sys
4
+ from typing import List, Tuple, Union
5
+
6
+ # Check Python version for TypeAlias support
7
+ if sys.version_info >= (3, 10):
8
+ from typing import TypeAlias
9
+
10
+ # Python 3.10+ style type aliases
11
+ AbsoluteBounds: TypeAlias = Tuple[int, int]
12
+ RelativeBounds: TypeAlias = Tuple[float, float]
13
+ Tolerance: TypeAlias = Union[int, float, AbsoluteBounds, RelativeBounds]
14
+ SegmentValue: TypeAlias = Union[str, List[str]]
15
+ SegmentTuple: TypeAlias = Tuple[str, SegmentValue]
16
+ SegmentItem: TypeAlias = Union[str, SegmentTuple]
17
+ SegmentSpec: TypeAlias = Union[str, SegmentTuple, List[SegmentItem]]
18
+ else:
19
+ # Python 3.8 and 3.9 compatible type aliases
20
+ AbsoluteBounds = Tuple[int, int]
21
+ RelativeBounds = Tuple[float, float]
22
+ Tolerance = Union[int, float, AbsoluteBounds, RelativeBounds]
23
+ SegmentValue = Union[str, List[str]]
24
+ SegmentTuple = Tuple[str, SegmentValue]
25
+ SegmentItem = Union[str, SegmentTuple]
26
+ SegmentSpec = Union[str, SegmentTuple, List[SegmentItem]]
27
+
28
+ # Add docstrings for better IDE support
29
+ AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
30
+ RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
31
+ Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
32
+ SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
33
+ SegmentTuple.__doc__ = "(column, value(s)) format for segments"
34
+ SegmentItem.__doc__ = "Individual segment item (string or tuple)"
35
+ SegmentSpec.__doc__ = (
36
+ "Full segment specification options (i.e., all options for segment specification)"
37
+ )
Binary file
Binary file
pointblank/validate.py CHANGED
@@ -385,7 +385,7 @@ def config(
385
385
 
386
386
 
387
387
  def load_dataset(
388
- dataset: Literal["small_table", "game_revenue", "nycflights"] = "small_table",
388
+ dataset: Literal["small_table", "game_revenue", "nycflights", "global_sales"] = "small_table",
389
389
  tbl_type: Literal["polars", "pandas", "duckdb"] = "polars",
390
390
  ) -> FrameT | Any:
391
391
  """
@@ -401,7 +401,7 @@ def load_dataset(
401
401
  ----------
402
402
  dataset
403
403
  The name of the dataset to load. Current options are `"small_table"`, `"game_revenue"`,
404
- and `"nycflights"`.
404
+ `"nycflights"`, and `"global_sales"`.
405
405
  tbl_type
406
406
  The type of table to generate from the dataset. The named options are `"polars"`,
407
407
  `"pandas"`, and `"duckdb"`.
@@ -423,6 +423,8 @@ def load_dataset(
423
423
  they purchased, ads viewed, and the revenue generated.
424
424
  - `"nycflights"`: A dataset with 336,776 rows and 18 columns. This dataset provides information
425
425
  about flights departing from New York City airports (JFK, LGA, or EWR) in 2013.
426
+ - `"global_sales"`: A dataset with 50,000 rows and 20 columns. Provides information about
427
+ global sales of products across different regions and countries.
426
428
 
427
429
  Supported DataFrame Types
428
430
  -------------------------
@@ -434,18 +436,18 @@ def load_dataset(
434
436
 
435
437
  Examples
436
438
  --------
437
- Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with its
438
- defaults:
439
+ Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
440
+ `dataset="small_table"` and `tbl_type="polars"`:
439
441
 
440
442
  ```{python}
441
443
  import pointblank as pb
442
444
 
443
- small_table = pb.load_dataset()
445
+ small_table = pb.load_dataset(dataset="small_table", tbl_type="polars")
444
446
 
445
447
  pb.preview(small_table)
446
448
  ```
447
449
 
448
- Note that the `"small_table"` dataset is a simple Polars DataFrame and using the
450
+ Note that the `"small_table"` dataset is a Polars DataFrame and using the
449
451
  [`preview()`](`pointblank.preview`) function will display the table in an HTML viewing
450
452
  environment.
451
453
 
@@ -473,10 +475,23 @@ def load_dataset(
473
475
  The `"nycflights"` dataset is a large dataset with 336,776 rows and 18 columns. This dataset is
474
476
  truly a real-world dataset and provides information about flights originating from New York City
475
477
  airports in 2013.
478
+
479
+ Finally, the `"global_sales"` dataset can be loaded as a Polars table by specifying the dataset
480
+ name. Since `tbl_type=` is set to `"polars"` by default, we don't need to specify it:
481
+
482
+ ```{python}
483
+ global_sales = pb.load_dataset(dataset="global_sales")
484
+
485
+ pb.preview(global_sales)
486
+ ```
487
+
488
+ The `"global_sales"` dataset is a large dataset with 50,000 rows and 20 columns. Each record
489
+ describes the sales of a particular product to a customer located in one of three global
490
+ regions: North America, Europe, or Asia.
476
491
  """
477
492
 
478
493
  # Raise an error if the dataset is from the list of provided datasets
479
- if dataset not in ["small_table", "game_revenue", "nycflights"]:
494
+ if dataset not in ["small_table", "game_revenue", "nycflights", "global_sales"]:
480
495
  raise ValueError(
481
496
  f"The dataset name `{dataset}` is not valid. Choose one of the following:\n"
482
497
  "- `small_table`\n"
@@ -518,6 +533,7 @@ def load_dataset(
518
533
  "small_table": ["date_time", "date"],
519
534
  "game_revenue": ["session_start", "time", "start_day"],
520
535
  "nycflights": [],
536
+ "global_sales": ["timestamp"],
521
537
  }
522
538
 
523
539
  dataset = pd.read_csv(data_path, parse_dates=parse_date_columns[dataset])
@@ -8142,6 +8158,7 @@ class Validate:
8142
8158
  inclusive = validation.inclusive
8143
8159
  na_pass = validation.na_pass
8144
8160
  threshold = validation.thresholds
8161
+ segment = validation.segments
8145
8162
 
8146
8163
  assertion_method = ASSERTION_TYPE_METHOD_MAP[assertion_type]
8147
8164
  assertion_category = METHOD_CATEGORY_MAP[assertion_method]
@@ -8149,7 +8166,14 @@ class Validate:
8149
8166
 
8150
8167
  # Process the `brief` text for the validation step by including template variables to
8151
8168
  # the user-supplied text
8152
- validation.brief = _process_brief(brief=validation.brief, step=validation.i, col=column)
8169
+ validation.brief = _process_brief(
8170
+ brief=validation.brief,
8171
+ step=validation.i,
8172
+ col=column,
8173
+ values=value,
8174
+ thresholds=threshold,
8175
+ segment=segment,
8176
+ )
8153
8177
 
8154
8178
  # Generate the autobrief description for the validation step; it's important to perform
8155
8179
  # that here since text components like the column and the value(s) have been resolved
@@ -11629,7 +11653,14 @@ def _string_date_dttm_conversion(value: any) -> any:
11629
11653
  return value
11630
11654
 
11631
11655
 
11632
- def _process_brief(brief: str | None, step: int, col: str | list[str] | None) -> str:
11656
+ def _process_brief(
11657
+ brief: str | None,
11658
+ step: int,
11659
+ col: str | list[str] | None,
11660
+ values: any | None,
11661
+ thresholds: any | None,
11662
+ segment: any | None,
11663
+ ) -> str:
11633
11664
  # If there is no brief, return `None`
11634
11665
  if brief is None:
11635
11666
  return None
@@ -11649,6 +11680,34 @@ def _process_brief(brief: str | None, step: int, col: str | list[str] | None) ->
11649
11680
  brief = brief.replace("{col}", col)
11650
11681
  brief = brief.replace("{column}", col)
11651
11682
 
11683
+ if values is not None:
11684
+ # If the value is a list, then join the values into a comma-separated string
11685
+ if isinstance(values, list):
11686
+ values = ", ".join([str(v) for v in values])
11687
+
11688
+ brief = brief.replace("{value}", str(values))
11689
+
11690
+ if thresholds is not None:
11691
+ # Get the string representation of thresholds in the form of:
11692
+ # "W: 0.20 / C: 0.40 / E: 1.00"
11693
+
11694
+ warning_val = thresholds._get_threshold_value(level="warning")
11695
+ error_val = thresholds._get_threshold_value(level="error")
11696
+ critical_val = thresholds._get_threshold_value(level="critical")
11697
+
11698
+ thresholds_fmt = f"W: {warning_val} / E: {error_val} / C: {critical_val}"
11699
+
11700
+ brief = brief.replace("{thresholds}", thresholds_fmt)
11701
+
11702
+ if segment is not None:
11703
+ # The segment is always a tuple of the form ("{column}", "{value}")
11704
+
11705
+ segment_fmt = f"{segment[0]} / {segment[1]}"
11706
+
11707
+ brief = brief.replace("{segment}", segment_fmt)
11708
+ brief = brief.replace("{segment_column}", segment[0])
11709
+ brief = brief.replace("{segment_value}", segment[1])
11710
+
11652
11711
  return brief
11653
11712
 
11654
11713
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pointblank
3
- Version: 0.9.2
3
+ Version: 0.9.4
4
4
  Summary: Find out if your data is what you think it is.
5
5
  Author-email: Richard Iannone <riannone@me.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ pointblank/_constants.py,sha256=D4HF0NrNAd-mdb88gZ6VatkRYfVX-9gC6C7TOQjjAw4,8112
3
3
  pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
4
4
  pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
5
5
  pointblank/_interrogation.py,sha256=U4GQ8Ik5rP75BYBkmunBvHKwf3XvLPHcUx18JwiBQZI,89422
6
- pointblank/_typing.py,sha256=ConITAbsFxU8CkNXY7l0Lua9hGofeDDJAWw-lGAIVgI,764
6
+ pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
7
7
  pointblank/_utils.py,sha256=CsuUYXNzox-Nc5CjQNhyy2XnmnvYJVJrS5cZxklzIFo,24745
8
8
  pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
9
9
  pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
@@ -15,17 +15,19 @@ pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
15
15
  pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
16
16
  pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
17
17
  pointblank/thresholds.py,sha256=cweex25DwBPrsvPW12pRoaTQnwFpUUwqTdHyFJXTnN0,25760
18
- pointblank/validate.py,sha256=0LWCuex5DeNcoRoq0BppcKn1J-WaqCc3TYyQGWB-a2E,606287
18
+ pointblank/validate.py,sha256=9dIWFetyBm70f_Ps0UkroT1gO4b5qACGs8trhObKUHg,608551
19
19
  pointblank/data/api-docs.txt,sha256=jKjPSq6X_vU_RRSJAydnVc3C35WvTqNvu-lLKroVO4I,482044
20
20
  pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
21
21
  pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
22
+ pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
23
+ pointblank/data/global_sales.zip,sha256=JeUnR1apKQ35PPwEcvTKCEIEiYeYQtoGmYjmzbz99DM,2138604
22
24
  pointblank/data/nycflights-duckdb.zip,sha256=GQrHO9tp7d9cNGFNSbA9EKF19MLf6t2wZE0U9-hIKow,5293077
23
25
  pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0mU,7828965
24
26
  pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
25
27
  pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
26
28
  pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
27
- pointblank-0.9.2.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
28
- pointblank-0.9.2.dist-info/METADATA,sha256=iUvV_QGj9ekzd3ddoPvT-HubBptqM7EIClXJ7HBs8-M,14732
29
- pointblank-0.9.2.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
30
- pointblank-0.9.2.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
31
- pointblank-0.9.2.dist-info/RECORD,,
29
+ pointblank-0.9.4.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
30
+ pointblank-0.9.4.dist-info/METADATA,sha256=TO7kSRz1e8_lhuqkF6st8ompJq-I0i5mevVfsCiHumU,14732
31
+ pointblank-0.9.4.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
32
+ pointblank-0.9.4.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
33
+ pointblank-0.9.4.dist-info/RECORD,,