pointblank 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/_typing.py +35 -24
- pointblank/data/global_sales-duckdb.zip +0 -0
- pointblank/data/global_sales.zip +0 -0
- pointblank/validate.py +68 -9
- {pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/METADATA +1 -1
- {pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/RECORD +9 -7
- {pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/WHEEL +0 -0
- {pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.9.2.dist-info → pointblank-0.9.4.dist-info}/top_level.txt +0 -0
pointblank/_typing.py
CHANGED
|
@@ -1,26 +1,37 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
SegmentSpec
|
|
3
|
+
import sys
|
|
4
|
+
from typing import List, Tuple, Union
|
|
5
|
+
|
|
6
|
+
# Check Python version for TypeAlias support
|
|
7
|
+
if sys.version_info >= (3, 10):
|
|
8
|
+
from typing import TypeAlias
|
|
9
|
+
|
|
10
|
+
# Python 3.10+ style type aliases
|
|
11
|
+
AbsoluteBounds: TypeAlias = Tuple[int, int]
|
|
12
|
+
RelativeBounds: TypeAlias = Tuple[float, float]
|
|
13
|
+
Tolerance: TypeAlias = Union[int, float, AbsoluteBounds, RelativeBounds]
|
|
14
|
+
SegmentValue: TypeAlias = Union[str, List[str]]
|
|
15
|
+
SegmentTuple: TypeAlias = Tuple[str, SegmentValue]
|
|
16
|
+
SegmentItem: TypeAlias = Union[str, SegmentTuple]
|
|
17
|
+
SegmentSpec: TypeAlias = Union[str, SegmentTuple, List[SegmentItem]]
|
|
18
|
+
else:
|
|
19
|
+
# Python 3.8 and 3.9 compatible type aliases
|
|
20
|
+
AbsoluteBounds = Tuple[int, int]
|
|
21
|
+
RelativeBounds = Tuple[float, float]
|
|
22
|
+
Tolerance = Union[int, float, AbsoluteBounds, RelativeBounds]
|
|
23
|
+
SegmentValue = Union[str, List[str]]
|
|
24
|
+
SegmentTuple = Tuple[str, SegmentValue]
|
|
25
|
+
SegmentItem = Union[str, SegmentTuple]
|
|
26
|
+
SegmentSpec = Union[str, SegmentTuple, List[SegmentItem]]
|
|
27
|
+
|
|
28
|
+
# Add docstrings for better IDE support
|
|
29
|
+
AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
|
|
30
|
+
RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
|
|
31
|
+
Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
|
|
32
|
+
SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
|
|
33
|
+
SegmentTuple.__doc__ = "(column, value(s)) format for segments"
|
|
34
|
+
SegmentItem.__doc__ = "Individual segment item (string or tuple)"
|
|
35
|
+
SegmentSpec.__doc__ = (
|
|
36
|
+
"Full segment specification options (i.e., all options for segment specification)"
|
|
37
|
+
)
|
|
Binary file
|
|
Binary file
|
pointblank/validate.py
CHANGED
|
@@ -385,7 +385,7 @@ def config(
|
|
|
385
385
|
|
|
386
386
|
|
|
387
387
|
def load_dataset(
|
|
388
|
-
dataset: Literal["small_table", "game_revenue", "nycflights"] = "small_table",
|
|
388
|
+
dataset: Literal["small_table", "game_revenue", "nycflights", "global_sales"] = "small_table",
|
|
389
389
|
tbl_type: Literal["polars", "pandas", "duckdb"] = "polars",
|
|
390
390
|
) -> FrameT | Any:
|
|
391
391
|
"""
|
|
@@ -401,7 +401,7 @@ def load_dataset(
|
|
|
401
401
|
----------
|
|
402
402
|
dataset
|
|
403
403
|
The name of the dataset to load. Current options are `"small_table"`, `"game_revenue"`,
|
|
404
|
-
and `"
|
|
404
|
+
`"nycflights"`, and `"global_sales"`.
|
|
405
405
|
tbl_type
|
|
406
406
|
The type of table to generate from the dataset. The named options are `"polars"`,
|
|
407
407
|
`"pandas"`, and `"duckdb"`.
|
|
@@ -423,6 +423,8 @@ def load_dataset(
|
|
|
423
423
|
they purchased, ads viewed, and the revenue generated.
|
|
424
424
|
- `"nycflights"`: A dataset with 336,776 rows and 18 columns. This dataset provides information
|
|
425
425
|
about flights departing from New York City airports (JFK, LGA, or EWR) in 2013.
|
|
426
|
+
- `"global_sales"`: A dataset with 50,000 rows and 20 columns. Provides information about
|
|
427
|
+
global sales of products across different regions and countries.
|
|
426
428
|
|
|
427
429
|
Supported DataFrame Types
|
|
428
430
|
-------------------------
|
|
@@ -434,18 +436,18 @@ def load_dataset(
|
|
|
434
436
|
|
|
435
437
|
Examples
|
|
436
438
|
--------
|
|
437
|
-
Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
|
|
438
|
-
|
|
439
|
+
Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
|
|
440
|
+
`dataset="small_table"` and `tbl_type="polars"`:
|
|
439
441
|
|
|
440
442
|
```{python}
|
|
441
443
|
import pointblank as pb
|
|
442
444
|
|
|
443
|
-
small_table = pb.load_dataset()
|
|
445
|
+
small_table = pb.load_dataset(dataset="small_table", tbl_type="polars")
|
|
444
446
|
|
|
445
447
|
pb.preview(small_table)
|
|
446
448
|
```
|
|
447
449
|
|
|
448
|
-
Note that the `"small_table"` dataset is a
|
|
450
|
+
Note that the `"small_table"` dataset is a Polars DataFrame and using the
|
|
449
451
|
[`preview()`](`pointblank.preview`) function will display the table in an HTML viewing
|
|
450
452
|
environment.
|
|
451
453
|
|
|
@@ -473,10 +475,23 @@ def load_dataset(
|
|
|
473
475
|
The `"nycflights"` dataset is a large dataset with 336,776 rows and 18 columns. This dataset is
|
|
474
476
|
truly a real-world dataset and provides information about flights originating from New York City
|
|
475
477
|
airports in 2013.
|
|
478
|
+
|
|
479
|
+
Finally, the `"global_sales"` dataset can be loaded as a Polars table by specifying the dataset
|
|
480
|
+
name. Since `tbl_type=` is set to `"polars"` by default, we don't need to specify it:
|
|
481
|
+
|
|
482
|
+
```{python}
|
|
483
|
+
global_sales = pb.load_dataset(dataset="global_sales")
|
|
484
|
+
|
|
485
|
+
pb.preview(global_sales)
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
The `"global_sales"` dataset is a large dataset with 50,000 rows and 20 columns. Each record
|
|
489
|
+
describes the sales of a particular product to a customer located in one of three global
|
|
490
|
+
regions: North America, Europe, or Asia.
|
|
476
491
|
"""
|
|
477
492
|
|
|
478
493
|
# Raise an error if the dataset is from the list of provided datasets
|
|
479
|
-
if dataset not in ["small_table", "game_revenue", "nycflights"]:
|
|
494
|
+
if dataset not in ["small_table", "game_revenue", "nycflights", "global_sales"]:
|
|
480
495
|
raise ValueError(
|
|
481
496
|
f"The dataset name `{dataset}` is not valid. Choose one of the following:\n"
|
|
482
497
|
"- `small_table`\n"
|
|
@@ -518,6 +533,7 @@ def load_dataset(
|
|
|
518
533
|
"small_table": ["date_time", "date"],
|
|
519
534
|
"game_revenue": ["session_start", "time", "start_day"],
|
|
520
535
|
"nycflights": [],
|
|
536
|
+
"global_sales": ["timestamp"],
|
|
521
537
|
}
|
|
522
538
|
|
|
523
539
|
dataset = pd.read_csv(data_path, parse_dates=parse_date_columns[dataset])
|
|
@@ -8142,6 +8158,7 @@ class Validate:
|
|
|
8142
8158
|
inclusive = validation.inclusive
|
|
8143
8159
|
na_pass = validation.na_pass
|
|
8144
8160
|
threshold = validation.thresholds
|
|
8161
|
+
segment = validation.segments
|
|
8145
8162
|
|
|
8146
8163
|
assertion_method = ASSERTION_TYPE_METHOD_MAP[assertion_type]
|
|
8147
8164
|
assertion_category = METHOD_CATEGORY_MAP[assertion_method]
|
|
@@ -8149,7 +8166,14 @@ class Validate:
|
|
|
8149
8166
|
|
|
8150
8167
|
# Process the `brief` text for the validation step by including template variables to
|
|
8151
8168
|
# the user-supplied text
|
|
8152
|
-
validation.brief = _process_brief(
|
|
8169
|
+
validation.brief = _process_brief(
|
|
8170
|
+
brief=validation.brief,
|
|
8171
|
+
step=validation.i,
|
|
8172
|
+
col=column,
|
|
8173
|
+
values=value,
|
|
8174
|
+
thresholds=threshold,
|
|
8175
|
+
segment=segment,
|
|
8176
|
+
)
|
|
8153
8177
|
|
|
8154
8178
|
# Generate the autobrief description for the validation step; it's important to perform
|
|
8155
8179
|
# that here since text components like the column and the value(s) have been resolved
|
|
@@ -11629,7 +11653,14 @@ def _string_date_dttm_conversion(value: any) -> any:
|
|
|
11629
11653
|
return value
|
|
11630
11654
|
|
|
11631
11655
|
|
|
11632
|
-
def _process_brief(
|
|
11656
|
+
def _process_brief(
|
|
11657
|
+
brief: str | None,
|
|
11658
|
+
step: int,
|
|
11659
|
+
col: str | list[str] | None,
|
|
11660
|
+
values: any | None,
|
|
11661
|
+
thresholds: any | None,
|
|
11662
|
+
segment: any | None,
|
|
11663
|
+
) -> str:
|
|
11633
11664
|
# If there is no brief, return `None`
|
|
11634
11665
|
if brief is None:
|
|
11635
11666
|
return None
|
|
@@ -11649,6 +11680,34 @@ def _process_brief(brief: str | None, step: int, col: str | list[str] | None) ->
|
|
|
11649
11680
|
brief = brief.replace("{col}", col)
|
|
11650
11681
|
brief = brief.replace("{column}", col)
|
|
11651
11682
|
|
|
11683
|
+
if values is not None:
|
|
11684
|
+
# If the value is a list, then join the values into a comma-separated string
|
|
11685
|
+
if isinstance(values, list):
|
|
11686
|
+
values = ", ".join([str(v) for v in values])
|
|
11687
|
+
|
|
11688
|
+
brief = brief.replace("{value}", str(values))
|
|
11689
|
+
|
|
11690
|
+
if thresholds is not None:
|
|
11691
|
+
# Get the string representation of thresholds in the form of:
|
|
11692
|
+
# "W: 0.20 / C: 0.40 / E: 1.00"
|
|
11693
|
+
|
|
11694
|
+
warning_val = thresholds._get_threshold_value(level="warning")
|
|
11695
|
+
error_val = thresholds._get_threshold_value(level="error")
|
|
11696
|
+
critical_val = thresholds._get_threshold_value(level="critical")
|
|
11697
|
+
|
|
11698
|
+
thresholds_fmt = f"W: {warning_val} / E: {error_val} / C: {critical_val}"
|
|
11699
|
+
|
|
11700
|
+
brief = brief.replace("{thresholds}", thresholds_fmt)
|
|
11701
|
+
|
|
11702
|
+
if segment is not None:
|
|
11703
|
+
# The segment is always a tuple of the form ("{column}", "{value}")
|
|
11704
|
+
|
|
11705
|
+
segment_fmt = f"{segment[0]} / {segment[1]}"
|
|
11706
|
+
|
|
11707
|
+
brief = brief.replace("{segment}", segment_fmt)
|
|
11708
|
+
brief = brief.replace("{segment_column}", segment[0])
|
|
11709
|
+
brief = brief.replace("{segment_value}", segment[1])
|
|
11710
|
+
|
|
11652
11711
|
return brief
|
|
11653
11712
|
|
|
11654
11713
|
|
|
@@ -3,7 +3,7 @@ pointblank/_constants.py,sha256=D4HF0NrNAd-mdb88gZ6VatkRYfVX-9gC6C7TOQjjAw4,8112
|
|
|
3
3
|
pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
|
|
4
4
|
pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
|
|
5
5
|
pointblank/_interrogation.py,sha256=U4GQ8Ik5rP75BYBkmunBvHKwf3XvLPHcUx18JwiBQZI,89422
|
|
6
|
-
pointblank/_typing.py,sha256=
|
|
6
|
+
pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
|
|
7
7
|
pointblank/_utils.py,sha256=CsuUYXNzox-Nc5CjQNhyy2XnmnvYJVJrS5cZxklzIFo,24745
|
|
8
8
|
pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
|
|
9
9
|
pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
|
|
@@ -15,17 +15,19 @@ pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
|
|
|
15
15
|
pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
|
|
16
16
|
pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
|
|
17
17
|
pointblank/thresholds.py,sha256=cweex25DwBPrsvPW12pRoaTQnwFpUUwqTdHyFJXTnN0,25760
|
|
18
|
-
pointblank/validate.py,sha256=
|
|
18
|
+
pointblank/validate.py,sha256=9dIWFetyBm70f_Ps0UkroT1gO4b5qACGs8trhObKUHg,608551
|
|
19
19
|
pointblank/data/api-docs.txt,sha256=jKjPSq6X_vU_RRSJAydnVc3C35WvTqNvu-lLKroVO4I,482044
|
|
20
20
|
pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
|
|
21
21
|
pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
|
|
22
|
+
pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
|
|
23
|
+
pointblank/data/global_sales.zip,sha256=JeUnR1apKQ35PPwEcvTKCEIEiYeYQtoGmYjmzbz99DM,2138604
|
|
22
24
|
pointblank/data/nycflights-duckdb.zip,sha256=GQrHO9tp7d9cNGFNSbA9EKF19MLf6t2wZE0U9-hIKow,5293077
|
|
23
25
|
pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0mU,7828965
|
|
24
26
|
pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
|
|
25
27
|
pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
|
|
26
28
|
pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
|
|
27
|
-
pointblank-0.9.
|
|
28
|
-
pointblank-0.9.
|
|
29
|
-
pointblank-0.9.
|
|
30
|
-
pointblank-0.9.
|
|
31
|
-
pointblank-0.9.
|
|
29
|
+
pointblank-0.9.4.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
|
|
30
|
+
pointblank-0.9.4.dist-info/METADATA,sha256=TO7kSRz1e8_lhuqkF6st8ompJq-I0i5mevVfsCiHumU,14732
|
|
31
|
+
pointblank-0.9.4.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
|
32
|
+
pointblank-0.9.4.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
|
|
33
|
+
pointblank-0.9.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|