pointblank 0.9.1__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/validate.py CHANGED
@@ -58,6 +58,7 @@ from pointblank._interrogation import (
58
58
  RowCountMatch,
59
59
  RowsComplete,
60
60
  RowsDistinct,
61
+ SpeciallyValidation,
61
62
  )
62
63
  from pointblank._typing import SegmentSpec
63
64
  from pointblank._utils import (
@@ -384,7 +385,7 @@ def config(
384
385
 
385
386
 
386
387
  def load_dataset(
387
- dataset: Literal["small_table", "game_revenue", "nycflights"] = "small_table",
388
+ dataset: Literal["small_table", "game_revenue", "nycflights", "global_sales"] = "small_table",
388
389
  tbl_type: Literal["polars", "pandas", "duckdb"] = "polars",
389
390
  ) -> FrameT | Any:
390
391
  """
@@ -400,7 +401,7 @@ def load_dataset(
400
401
  ----------
401
402
  dataset
402
403
  The name of the dataset to load. Current options are `"small_table"`, `"game_revenue"`,
403
- and `"nycflights"`.
404
+ `"nycflights"`, and `"global_sales"`.
404
405
  tbl_type
405
406
  The type of table to generate from the dataset. The named options are `"polars"`,
406
407
  `"pandas"`, and `"duckdb"`.
@@ -422,6 +423,8 @@ def load_dataset(
422
423
  they purchased, ads viewed, and the revenue generated.
423
424
  - `"nycflights"`: A dataset with 336,776 rows and 18 columns. This dataset provides information
424
425
  about flights departing from New York City airports (JFK, LGA, or EWR) in 2013.
426
+ - `"global_sales"`: A dataset with 50,000 rows and 20 columns. Provides information about
427
+ global sales of products across different regions and countries.
425
428
 
426
429
  Supported DataFrame Types
427
430
  -------------------------
@@ -433,18 +436,18 @@ def load_dataset(
433
436
 
434
437
  Examples
435
438
  --------
436
- Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with its
437
- defaults:
439
+ Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
440
+ `dataset="small_table"` and `tbl_type="polars"`:
438
441
 
439
442
  ```{python}
440
443
  import pointblank as pb
441
444
 
442
- small_table = pb.load_dataset()
445
+ small_table = pb.load_dataset(dataset="small_table", tbl_type="polars")
443
446
 
444
447
  pb.preview(small_table)
445
448
  ```
446
449
 
447
- Note that the `"small_table"` dataset is a simple Polars DataFrame and using the
450
+ Note that the `"small_table"` dataset is a Polars DataFrame and using the
448
451
  [`preview()`](`pointblank.preview`) function will display the table in an HTML viewing
449
452
  environment.
450
453
 
@@ -472,10 +475,23 @@ def load_dataset(
472
475
  The `"nycflights"` dataset is a large dataset with 336,776 rows and 18 columns. This dataset is
473
476
  truly a real-world dataset and provides information about flights originating from New York City
474
477
  airports in 2013.
478
+
479
+ Finally, the `"global_sales"` dataset can be loaded as a Polars table by specifying the dataset
480
+ name. Since `tbl_type=` is set to `"polars"` by default, we don't need to specify it:
481
+
482
+ ```{python}
483
+ global_sales = pb.load_dataset(dataset="global_sales")
484
+
485
+ pb.preview(global_sales)
486
+ ```
487
+
488
+ The `"global_sales"` dataset is a large dataset with 50,000 rows and 20 columns. Each record
489
+ describes the sales of a particular product to a customer located in one of three global
490
+ regions: North America, Europe, or Asia.
475
491
  """
476
492
 
477
493
  # Raise an error if the dataset is from the list of provided datasets
478
- if dataset not in ["small_table", "game_revenue", "nycflights"]:
494
+ if dataset not in ["small_table", "game_revenue", "nycflights", "global_sales"]:
479
495
  raise ValueError(
480
496
  f"The dataset name `{dataset}` is not valid. Choose one of the following:\n"
481
497
  "- `small_table`\n"
@@ -517,6 +533,7 @@ def load_dataset(
517
533
  "small_table": ["date_time", "date"],
518
534
  "game_revenue": ["session_start", "time", "start_day"],
519
535
  "nycflights": [],
536
+ "global_sales": ["timestamp"],
520
537
  }
521
538
 
522
539
  dataset = pd.read_csv(data_path, parse_dates=parse_date_columns[dataset])
@@ -7633,7 +7650,7 @@ class Validate:
7633
7650
 
7634
7651
  val_info = _ValidationInfo(
7635
7652
  assertion_type=assertion_type,
7636
- column=None, # This is a rowwise validation, not specific to any column
7653
+ column=None, # This validation is not specific to any column(s)
7637
7654
  values=values,
7638
7655
  pre=pre,
7639
7656
  thresholds=thresholds,
@@ -7646,6 +7663,351 @@ class Validate:
7646
7663
 
7647
7664
  return self
7648
7665
 
7666
+ def specially(
7667
+ self,
7668
+ expr: Callable,
7669
+ pre: Callable | None = None,
7670
+ thresholds: int | float | bool | tuple | dict | Thresholds = None,
7671
+ actions: Actions | None = None,
7672
+ brief: str | bool | None = None,
7673
+ active: bool = True,
7674
+ ) -> Validate:
7675
+ """
7676
+ Perform a specialized validation with customized logic.
7677
+
7678
+ The `specially()` validation method allows for the creation of specialized validation
7679
+ expressions that can be used to validate specific conditions or logic in the data. This
7680
+ method provides maximum flexibility by accepting a custom callable that encapsulates
7681
+ your validation logic.
7682
+
7683
+ The callable function can have one of two signatures:
7684
+
7685
+ - a function accepting a single parameter (the data table): `def validate(data): ...`
7686
+ - a function with no parameters: `def validate(): ...`
7687
+
7688
+ The second form is particularly useful for environment validations that don't need to
7689
+ inspect the data table.
7690
+
7691
+ The callable function must ultimately return one of:
7692
+
7693
+ 1. a single boolean value or boolean list
7694
+ 2. a table where the final column contains boolean values (column name is unimportant)
7695
+
7696
+ The validation will operate over the number of test units that is equal to the number of
7697
+ rows in the data table (if returning a table with boolean values). If returning a scalar
7698
+ boolean value, the validation will operate over a single test unit. For a return of a list
7699
+ of boolean values, the length of the list constitutes the number of test units.
7700
+
7701
+ Parameters
7702
+ ----------
7703
+ expr
7704
+ A callable function that defines the specialized validation logic. This function should:
7705
+ (1) accept the target data table as its single argument (though it may ignore it), or
7706
+ (2) take no parameters at all (for environment validations). The function must
7707
+ ultimately return boolean values representing validation results. Design your function
7708
+ to incorporate any custom parameters directly within the function itself using closure
7709
+ variables or default parameters.
7710
+ pre
7711
+ An optional preprocessing function or lambda to apply to the data table during
7712
+ interrogation. This function should take a table as input and return a modified table.
7713
+ Have a look at the *Preprocessing* section for more information on how to use this
7714
+ argument.
7715
+ thresholds
7716
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
7717
+ The thresholds are set at the step level and will override any global thresholds set in
7718
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
7719
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
7720
+ section for information on how to set threshold levels.
7721
+ actions
7722
+ Optional actions to take when the validation step meets or exceeds any set threshold
7723
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
7724
+ define the actions.
7725
+ brief
7726
+ An optional brief description of the validation step that will be displayed in the
7727
+ reporting table. You can use the templating elements like `"{step}"` to insert
7728
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
7729
+ the entire brief will be automatically generated. If `None` (the default) then there
7730
+ won't be a brief.
7731
+ active
7732
+ A boolean value indicating whether the validation step should be active. Using `False`
7733
+ will make the validation step inactive (still reporting its presence and keeping indexes
7734
+ for the steps unchanged).
7735
+
7736
+ Returns
7737
+ -------
7738
+ Validate
7739
+ The `Validate` object with the added validation step.
7740
+
7741
+ Preprocessing
7742
+ -------------
7743
+ The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
7744
+ table during interrogation. This function should take a table as input and return a modified
7745
+ table. This is useful for performing any necessary transformations or filtering on the data
7746
+ before the validation step is applied.
7747
+
7748
+ The preprocessing function can be any callable that takes a table as input and returns a
7749
+ modified table. For example, you could use a lambda function to filter the table based on
7750
+ certain criteria or to apply a transformation to the data. Regarding the lifetime of the
7751
+ transformed table, it only exists during the validation step and is not stored in the
7752
+ `Validate` object or used in subsequent validation steps.
7753
+
7754
+ Thresholds
7755
+ ----------
7756
+ The `thresholds=` parameter is used to set the failure-condition levels for the validation
7757
+ step. If they are set here at the step level, these thresholds will override any thresholds
7758
+ set at the global level in `Validate(thresholds=...)`.
7759
+
7760
+ There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
7761
+ can either be set as a proportion failing of all test units (a value between `0` to `1`),
7762
+ or, the absolute number of failing test units (as integer that's `1` or greater).
7763
+
7764
+ Thresholds can be defined using one of these input schemes:
7765
+
7766
+ 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
7767
+ thresholds)
7768
+ 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
7769
+ the 'error' level, and position `2` is the 'critical' level
7770
+ 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
7771
+ 'critical'
7772
+ 4. a single integer/float value denoting absolute number or fraction of failing test units
7773
+ for the 'warning' level only
7774
+
7775
+ If the number of failing test units exceeds set thresholds, the validation step will be
7776
+ marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
7777
+ set, you're free to set any combination of them.
7778
+
7779
+ Aside from reporting failure conditions, thresholds can be used to determine the actions to
7780
+ take for each level of failure (using the `actions=` parameter).
7781
+
7782
+ Examples
7783
+ --------
7784
+ ```{python}
7785
+ #| echo: false
7786
+ #| output: false
7787
+ import pointblank as pb
7788
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
7789
+ ```
7790
+ The `specially()` method offers maximum flexibility for validation, allowing you to create
7791
+ custom validation logic that fits your specific needs. The following examples demonstrate
7792
+ different patterns and use cases for this powerful validation approach.
7793
+
7794
+ ### Simple validation with direct table access
7795
+
7796
+ This example shows the most straightforward use case where we create a function that
7797
+ directly checks if the sum of two columns is positive.
7798
+
7799
+ ```{python}
7800
+ import pointblank as pb
7801
+ import polars as pl
7802
+
7803
+ simple_tbl = pl.DataFrame({
7804
+ "a": [5, 7, 1, 3, 9, 4],
7805
+ "b": [6, 3, 0, 5, 8, 2]
7806
+ })
7807
+
7808
+ # Simple function that validates directly on the table
7809
+ def validate_sum_positive(data):
7810
+ return data.select(pl.col("a") + pl.col("b") > 0)
7811
+
7812
+ (
7813
+ pb.Validate(data=simple_tbl)
7814
+ .specially(expr=validate_sum_positive)
7815
+ .interrogate()
7816
+ )
7817
+ ```
7818
+
7819
+ The function returns a Polars DataFrame with a single boolean column indicating whether
7820
+ the sum of columns `a` and `b` is positive for each row. Each row in the resulting DataFrame
7821
+ is a distinct test unit. This pattern works well for simple validations where you don't need
7822
+ configurable parameters.
7823
+
7824
+ ### Advanced validation with closure variables for parameters
7825
+
7826
+ When you need to make your validation configurable, you can use the function factory pattern
7827
+ (also known as closures) to create parameterized validations:
7828
+
7829
+ ```{python}
7830
+ # Create a parameterized validation function using closures
7831
+ def make_column_ratio_validator(col1, col2, min_ratio):
7832
+ def validate_column_ratio(data):
7833
+ return data.select((pl.col(col1) / pl.col(col2)) > min_ratio)
7834
+ return validate_column_ratio
7835
+
7836
+ (
7837
+ pb.Validate(data=simple_tbl)
7838
+ .specially(
7839
+ expr=make_column_ratio_validator(col1="a", col2="b", min_ratio=0.5)
7840
+ )
7841
+ .interrogate()
7842
+ )
7843
+ ```
7844
+
7845
+ This approach allows you to create reusable validation functions that can be configured with
7846
+ different parameters without modifying the function itself.
7847
+
7848
+ ### Validation function returning a list of booleans
7849
+
7850
+ This example demonstrates how to create a validation function that returns a list of boolean
7851
+ values, where each element represents a separate test unit:
7852
+
7853
+ ```{python}
7854
+ import pointblank as pb
7855
+ import polars as pl
7856
+ import random
7857
+
7858
+ # Create sample data
7859
+ transaction_tbl = pl.DataFrame({
7860
+ "transaction_id": [f"TX{i:04d}" for i in range(1, 11)],
7861
+ "amount": [120.50, 85.25, 50.00, 240.75, 35.20, 150.00, 85.25, 65.00, 210.75, 90.50],
7862
+ "category": ["food", "shopping", "entertainment", "travel", "utilities",
7863
+ "food", "shopping", "entertainment", "travel", "utilities"]
7864
+ })
7865
+
7866
+ # Define a validation function that returns a list of booleans
7867
+ def validate_transaction_rules(data):
7868
+ # Create a list to store individual test results
7869
+ test_results = []
7870
+
7871
+ # Check each row individually against multiple business rules
7872
+ for row in data.iter_rows(named=True):
7873
+ # Rule: transaction IDs must start with "TX" and be 6 chars long
7874
+ valid_id = row["transaction_id"].startswith("TX") and len(row["transaction_id"]) == 6
7875
+
7876
+ # Rule: Amounts must be appropriate for their category
7877
+ valid_amount = True
7878
+ if row["category"] == "food" and (row["amount"] < 10 or row["amount"] > 200):
7879
+ valid_amount = False
7880
+ elif row["category"] == "utilities" and (row["amount"] < 20 or row["amount"] > 300):
7881
+ valid_amount = False
7882
+ elif row["category"] == "entertainment" and row["amount"] > 100:
7883
+ valid_amount = False
7884
+
7885
+ # A transaction passes if it satisfies both rules
7886
+ test_results.append(valid_id and valid_amount)
7887
+
7888
+ return test_results
7889
+
7890
+ (
7891
+ pb.Validate(data=transaction_tbl)
7892
+ .specially(
7893
+ expr=validate_transaction_rules,
7894
+ brief="Validate transaction IDs and amounts by category."
7895
+ )
7896
+ .interrogate()
7897
+ )
7898
+ ```
7899
+
7900
+ This example shows how to create a validation function that applies multiple business rules
7901
+ to each row and returns a list of boolean results. Each boolean in the list represents a
7902
+ separate test unit, and a test unit passes only if all rules are satisfied for a given row.
7903
+
7904
+ The function iterates through each row in the data table, checking:
7905
+
7906
+ 1. if transaction IDs follow the required format
7907
+ 2. if transaction amounts are appropriate for their respective categories
7908
+
7909
+ This approach is powerful when you need to apply complex, conditional logic that can't be
7910
+ easily expressed using the built-in validation functions.
7911
+
7912
+ ### Table-level validation returning a single boolean
7913
+
7914
+ Sometimes you need to validate properties of the entire table rather than row-by-row. In
7915
+ these cases, your function can return a single boolean value:
7916
+
7917
+ ```{python}
7918
+ def validate_table_properties(data):
7919
+ # Check if table has at least one row with column 'a' > 10
7920
+ has_large_values = data.filter(pl.col("a") > 10).height > 0
7921
+
7922
+ # Check if mean of column 'b' is positive
7923
+ has_positive_mean = data.select(pl.mean("b")).item() > 0
7924
+
7925
+ # Return a single boolean for the entire table
7926
+ return has_large_values and has_positive_mean
7927
+
7928
+ (
7929
+ pb.Validate(data=simple_tbl)
7930
+ .specially(expr=validate_table_properties)
7931
+ .interrogate()
7932
+ )
7933
+ ```
7934
+
7935
+ This example demonstrates how to perform multiple checks on the table as a whole and combine
7936
+ them into a single validation result.
7937
+
7938
+ ### Environment validation that doesn't use the data table
7939
+
7940
+ The `specially()` validation method can even be used to validate aspects of your environment
7941
+ that are completely independent of the data:
7942
+
7943
+ ```{python}
7944
+ def validate_pointblank_version():
7945
+ try:
7946
+ import importlib.metadata
7947
+ version = importlib.metadata.version("pointblank")
7948
+ version_parts = version.split(".")
7949
+
7950
+ # Get major and minor components regardless of how many parts there are
7951
+ major = int(version_parts[0])
7952
+ minor = int(version_parts[1])
7953
+
7954
+ # Check both major and minor components for version `0.9+`
7955
+ return (major > 0) or (major == 0 and minor >= 9)
7956
+
7957
+ except Exception as e:
7958
+ # More specific error handling could be added here
7959
+ print(f"Version check failed: {e}")
7960
+ return False
7961
+
7962
+ (
7963
+ pb.Validate(data=simple_tbl)
7964
+ .specially(
7965
+ expr=validate_pointblank_version,
7966
+ brief="Check Pointblank version `>=0.9.0`."
7967
+ )
7968
+ .interrogate()
7969
+ )
7970
+ ```
7971
+
7972
+ This pattern shows how to validate external dependencies or environment conditions as part
7973
+ of your validation workflow. Notice that the function doesn't take any parameters at all,
7974
+ which makes it cleaner when the validation doesn't need to access the data table.
7975
+
7976
+ By combining these patterns, you can create sophisticated validation workflows that address
7977
+ virtually any data quality requirement in your organization.
7978
+ """
7979
+
7980
+ assertion_type = _get_fn_name()
7981
+
7982
+ # TODO: add a check for the expression to be a callable
7983
+ # _check_expr_specially(expr=expr)
7984
+ _check_pre(pre=pre)
7985
+ _check_thresholds(thresholds=thresholds)
7986
+ _check_boolean_input(param=active, param_name="active")
7987
+
7988
+ # Determine threshold to use (global or local) and normalize a local `thresholds=` value
7989
+ thresholds = (
7990
+ self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
7991
+ )
7992
+
7993
+ # Determine brief to use (global or local) and transform any shorthands of `brief=`
7994
+ brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
7995
+
7996
+ val_info = _ValidationInfo(
7997
+ assertion_type=assertion_type,
7998
+ column=None, # This validation is not specific to any column(s)
7999
+ values=expr,
8000
+ pre=pre,
8001
+ thresholds=thresholds,
8002
+ actions=actions,
8003
+ brief=brief,
8004
+ active=active,
8005
+ )
8006
+
8007
+ self._add_validation(validation_info=val_info)
8008
+
8009
+ return self
8010
+
7649
8011
  def interrogate(
7650
8012
  self,
7651
8013
  collect_extracts: bool = True,
@@ -7796,6 +8158,7 @@ class Validate:
7796
8158
  inclusive = validation.inclusive
7797
8159
  na_pass = validation.na_pass
7798
8160
  threshold = validation.thresholds
8161
+ segment = validation.segments
7799
8162
 
7800
8163
  assertion_method = ASSERTION_TYPE_METHOD_MAP[assertion_type]
7801
8164
  assertion_category = METHOD_CATEGORY_MAP[assertion_method]
@@ -7803,7 +8166,14 @@ class Validate:
7803
8166
 
7804
8167
  # Process the `brief` text for the validation step by including template variables to
7805
8168
  # the user-supplied text
7806
- validation.brief = _process_brief(brief=validation.brief, step=validation.i, col=column)
8169
+ validation.brief = _process_brief(
8170
+ brief=validation.brief,
8171
+ step=validation.i,
8172
+ col=column,
8173
+ values=value,
8174
+ thresholds=threshold,
8175
+ segment=segment,
8176
+ )
7807
8177
 
7808
8178
  # Generate the autobrief description for the validation step; it's important to perform
7809
8179
  # that here since text components like the column and the value(s) have been resolved
@@ -8060,12 +8430,39 @@ class Validate:
8060
8430
  tbl_type=tbl_type,
8061
8431
  ).get_test_results()
8062
8432
 
8063
- if assertion_category not in [
8064
- "COL_EXISTS_HAS_TYPE",
8065
- "COL_SCHEMA_MATCH",
8066
- "ROW_COUNT_MATCH",
8067
- "COL_COUNT_MATCH",
8068
- ]:
8433
+ if assertion_category == "SPECIALLY":
8434
+ results_tbl_list = SpeciallyValidation(
8435
+ data_tbl=data_tbl_step,
8436
+ expression=value,
8437
+ threshold=threshold,
8438
+ tbl_type=tbl_type,
8439
+ ).get_test_results()
8440
+
8441
+ #
8442
+ # The result from this could either be a table in the conventional form, or,
8443
+ # a list of boolean values; handle both cases
8444
+ #
8445
+
8446
+ if isinstance(results_tbl_list, list):
8447
+ # If the result is a list of boolean values, then we need to convert it to a
8448
+ # set the validation results from the list
8449
+ validation.all_passed = all(results_tbl_list)
8450
+ validation.n = len(results_tbl_list)
8451
+ validation.n_passed = results_tbl_list.count(True)
8452
+ validation.n_failed = results_tbl_list.count(False)
8453
+
8454
+ results_tbl = None
8455
+
8456
+ else:
8457
+ # If the result is not a list, then we assume it's a table in the conventional
8458
+ # form (where the column is `pb_is_good_` exists, with boolean values)
8459
+
8460
+ results_tbl = results_tbl_list
8461
+
8462
+ # If the results table is not `None`, then we assume there is a table with a column
8463
+ # called `pb_is_good_` that contains boolean values; we can then use this table to
8464
+ # determine the number of test units that passed and failed
8465
+ if results_tbl is not None:
8069
8466
  # Extract the `pb_is_good_` column from the table as a results list
8070
8467
  if tbl_type in IBIS_BACKENDS:
8071
8468
  # Select the DataFrame library to use for getting the results list
@@ -9967,7 +10364,7 @@ class Validate:
9967
10364
  # With a column subset list, format with commas between the column names
9968
10365
  columns_upd.append(", ".join(column))
9969
10366
 
9970
- elif assertion_type[i] in ["conjointly"]:
10367
+ elif assertion_type[i] in ["conjointly", "specially"]:
9971
10368
  columns_upd.append("")
9972
10369
  else:
9973
10370
  columns_upd.append(str(column))
@@ -10029,7 +10426,7 @@ class Validate:
10029
10426
  elif assertion_type[i] in ["col_schema_match"]:
10030
10427
  values_upd.append("SCHEMA")
10031
10428
 
10032
- elif assertion_type[i] in ["col_vals_expr"]:
10429
+ elif assertion_type[i] in ["col_vals_expr", "conjointly"]:
10033
10430
  values_upd.append("COLUMN EXPR")
10034
10431
 
10035
10432
  elif assertion_type[i] in ["row_count_match", "col_count_match"]:
@@ -10041,8 +10438,8 @@ class Validate:
10041
10438
 
10042
10439
  values_upd.append(str(count))
10043
10440
 
10044
- elif assertion_type[i] in ["conjointly"]:
10045
- values_upd.append("COLUMN EXPR")
10441
+ elif assertion_type[i] in ["specially"]:
10442
+ values_upd.append("EXPR")
10046
10443
 
10047
10444
  # If the assertion type is not recognized, add the value as a string
10048
10445
  else:
@@ -11256,7 +11653,14 @@ def _string_date_dttm_conversion(value: any) -> any:
11256
11653
  return value
11257
11654
 
11258
11655
 
11259
- def _process_brief(brief: str | None, step: int, col: str | list[str] | None) -> str:
11656
+ def _process_brief(
11657
+ brief: str | None,
11658
+ step: int,
11659
+ col: str | list[str] | None,
11660
+ values: any | None,
11661
+ thresholds: any | None,
11662
+ segment: any | None,
11663
+ ) -> str:
11260
11664
  # If there is no brief, return `None`
11261
11665
  if brief is None:
11262
11666
  return None
@@ -11276,6 +11680,34 @@ def _process_brief(brief: str | None, step: int, col: str | list[str] | None) ->
11276
11680
  brief = brief.replace("{col}", col)
11277
11681
  brief = brief.replace("{column}", col)
11278
11682
 
11683
+ if values is not None:
11684
+ # If the value is a list, then join the values into a comma-separated string
11685
+ if isinstance(values, list):
11686
+ values = ", ".join([str(v) for v in values])
11687
+
11688
+ brief = brief.replace("{value}", str(values))
11689
+
11690
+ if thresholds is not None:
11691
+ # Get the string representation of thresholds in the form of:
11692
+ # "W: 0.20 / C: 0.40 / E: 1.00"
11693
+
11694
+ warning_val = thresholds._get_threshold_value(level="warning")
11695
+ error_val = thresholds._get_threshold_value(level="error")
11696
+ critical_val = thresholds._get_threshold_value(level="critical")
11697
+
11698
+ thresholds_fmt = f"W: {warning_val} / E: {error_val} / C: {critical_val}"
11699
+
11700
+ brief = brief.replace("{thresholds}", thresholds_fmt)
11701
+
11702
+ if segment is not None:
11703
+ # The segment is always a tuple of the form ("{column}", "{value}")
11704
+
11705
+ segment_fmt = f"{segment[0]} / {segment[1]}"
11706
+
11707
+ brief = brief.replace("{segment}", segment_fmt)
11708
+ brief = brief.replace("{segment_column}", segment[0])
11709
+ brief = brief.replace("{segment_value}", segment[1])
11710
+
11279
11711
  return brief
11280
11712
 
11281
11713
 
@@ -11470,6 +11902,9 @@ def _create_autobrief_or_failure_text(
11470
11902
  if assertion_type == "conjointly":
11471
11903
  return _create_text_conjointly(lang=lang, for_failure=for_failure)
11472
11904
 
11905
+ if assertion_type == "specially":
11906
+ return _create_text_specially(lang=lang, for_failure=for_failure)
11907
+
11473
11908
  return None # pragma: no cover
11474
11909
 
11475
11910
 
@@ -11668,6 +12103,12 @@ def _create_text_conjointly(lang: str, for_failure: bool = False) -> str:
11668
12103
  return EXPECT_FAIL_TEXT[f"conjointly_{type_}_text"][lang]
11669
12104
 
11670
12105
 
12106
+ def _create_text_specially(lang: str, for_failure: bool = False) -> str:
12107
+ type_ = _expect_failure_type(for_failure=for_failure)
12108
+
12109
+ return EXPECT_FAIL_TEXT[f"specially_{type_}_text"][lang]
12110
+
12111
+
11671
12112
  def _prep_column_text(column: str | list[str]) -> str:
11672
12113
  if isinstance(column, list):
11673
12114
  return "`" + str(column[0]) + "`"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pointblank
3
- Version: 0.9.1
3
+ Version: 0.9.4
4
4
  Summary: Find out if your data is what you think it is.
5
5
  Author-email: Richard Iannone <riannone@me.com>
6
6
  License: MIT License
@@ -1,31 +1,33 @@
1
1
  pointblank/__init__.py,sha256=uHrX-ARZOhvWogXXqKV65RO2DXdYLZNCD1oNcm8hE6o,1585
2
- pointblank/_constants.py,sha256=tlelmeuftW4BpVeEILbsbuuCaMQ7yA_FYqM6PJPXH58,78561
2
+ pointblank/_constants.py,sha256=D4HF0NrNAd-mdb88gZ6VatkRYfVX-9gC6C7TOQjjAw4,81128
3
3
  pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
4
- pointblank/_constants_translations.py,sha256=FHkY2Bh1VBmBwbiGRIRSMU1tNGxgQAkjoPoYlwOHSKU,180685
5
- pointblank/_interrogation.py,sha256=BjN60ed7BH4ZnoPtkmVSvVEqJgf8k9mce4Zb63_jv_s,85155
6
- pointblank/_typing.py,sha256=ConITAbsFxU8CkNXY7l0Lua9hGofeDDJAWw-lGAIVgI,764
7
- pointblank/_utils.py,sha256=g7vbvV33tKNvznUoYsHcZW90bYm1LPb76njQeDJDPyQ,24715
4
+ pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
5
+ pointblank/_interrogation.py,sha256=U4GQ8Ik5rP75BYBkmunBvHKwf3XvLPHcUx18JwiBQZI,89422
6
+ pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
7
+ pointblank/_utils.py,sha256=CsuUYXNzox-Nc5CjQNhyy2XnmnvYJVJrS5cZxklzIFo,24745
8
8
  pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
9
9
  pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
10
- pointblank/actions.py,sha256=oazJk4pe3lIA14hjyCDtPOr4r_sp4vGGo2eyU_LX5_0,18268
10
+ pointblank/actions.py,sha256=ilk__kbQiS4ieJp-4dM7SDGuobQihUxLyS5ahgiP7qE,18272
11
11
  pointblank/assistant.py,sha256=ZIQJKTy9rDwq_Wmr1FMp0J7Q3ekxSgF3_tK0p4PTEUM,14850
12
12
  pointblank/column.py,sha256=LumGbnterw5VM7-2-7Za3jdlug1VVS9a3TOH0Y1E5eg,76548
13
13
  pointblank/datascan.py,sha256=rRz0hR81uTgd1e9OfLdfsNYXRk8vcpE8PW8exu-GJoE,47697
14
14
  pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
15
15
  pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
16
16
  pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
17
- pointblank/thresholds.py,sha256=aAPfdo3VMCw_G_OAh4nEsCYfIynDfNRJOMrG8yDM6U8,25717
18
- pointblank/validate.py,sha256=EPqtxw5sQG4Xh7WSaViVEUtm4FmpFOsyh4KM9EzuqkU,588834
19
- pointblank/data/api-docs.txt,sha256=JkV9SdXyB3ftBMXVFdFqGZNpyfBdWUpyisn4QHco56w,467666
17
+ pointblank/thresholds.py,sha256=cweex25DwBPrsvPW12pRoaTQnwFpUUwqTdHyFJXTnN0,25760
18
+ pointblank/validate.py,sha256=9dIWFetyBm70f_Ps0UkroT1gO4b5qACGs8trhObKUHg,608551
19
+ pointblank/data/api-docs.txt,sha256=jKjPSq6X_vU_RRSJAydnVc3C35WvTqNvu-lLKroVO4I,482044
20
20
  pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
21
21
  pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
22
+ pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
23
+ pointblank/data/global_sales.zip,sha256=JeUnR1apKQ35PPwEcvTKCEIEiYeYQtoGmYjmzbz99DM,2138604
22
24
  pointblank/data/nycflights-duckdb.zip,sha256=GQrHO9tp7d9cNGFNSbA9EKF19MLf6t2wZE0U9-hIKow,5293077
23
25
  pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0mU,7828965
24
26
  pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
25
27
  pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
26
28
  pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
27
- pointblank-0.9.1.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
28
- pointblank-0.9.1.dist-info/METADATA,sha256=1o11OgPSmpB4qBDEG1HyHDfVj5emxcT_yxHeFsVPVUc,14732
29
- pointblank-0.9.1.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
30
- pointblank-0.9.1.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
31
- pointblank-0.9.1.dist-info/RECORD,,
29
+ pointblank-0.9.4.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
30
+ pointblank-0.9.4.dist-info/METADATA,sha256=TO7kSRz1e8_lhuqkF6st8ompJq-I0i5mevVfsCiHumU,14732
31
+ pointblank-0.9.4.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
32
+ pointblank-0.9.4.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
33
+ pointblank-0.9.4.dist-info/RECORD,,