pointblank 0.9.6__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/__init__.py CHANGED
@@ -30,8 +30,10 @@ from pointblank.thresholds import Actions, FinalActions, Thresholds
30
30
  from pointblank.validate import (
31
31
  Validate,
32
32
  config,
33
+ connect_to_table,
33
34
  get_action_metadata,
34
35
  get_column_count,
36
+ get_data_path,
35
37
  get_row_count,
36
38
  get_validation_summary,
37
39
  load_dataset,
@@ -60,7 +62,9 @@ __all__ = [
60
62
  "first_n",
61
63
  "last_n",
62
64
  "load_dataset",
65
+ "get_data_path",
63
66
  "config",
67
+ "connect_to_table",
64
68
  "preview",
65
69
  "missing_vals_tbl",
66
70
  "get_action_metadata",
pointblank/_constants.py CHANGED
@@ -105,6 +105,7 @@ ROW_BASED_VALIDATION_TYPES = [
105
105
  "col_vals_regex",
106
106
  "col_vals_null",
107
107
  "col_vals_not_null",
108
+ "col_vals_expr",
108
109
  "conjointly",
109
110
  ]
110
111
 
@@ -159,6 +160,9 @@ MODEL_PROVIDERS = [
159
160
  TABLE_TYPE_STYLES = {
160
161
  "pandas": {"background": "#150458", "text": "#FFFFFF", "label": "Pandas"},
161
162
  "polars": {"background": "#0075FF", "text": "#FFFFFF", "label": "Polars"},
163
+ "polars-lazy": {"background": "#0075FF", "text": "#FFFFFF", "label": "Polars (LazyFrame)"},
164
+ "narwhals": {"background": "#78BEAF", "text": "#222222", "label": "Narwhals"},
165
+ "narwhals-lazy": {"background": "#78BEAF", "text": "#222222", "label": "Narwhals (LazyFrame)"},
162
166
  "duckdb": {"background": "#000000", "text": "#FFFFFF", "label": "DuckDB"},
163
167
  "mysql": {"background": "#EBAD40", "text": "#222222", "label": "MySQL"},
164
168
  "postgres": {"background": "#3E638B", "text": "#FFFFFF", "label": "PostgreSQL"},
@@ -0,0 +1,65 @@
1
+ from __future__ import annotations
2
+
3
+ from math import floor, log10
4
+ from typing import TYPE_CHECKING
5
+
6
+ from great_tables.vals import fmt_integer, fmt_number, fmt_scientific
7
+
8
+ if TYPE_CHECKING:
9
+ pass
10
+
11
+
12
+ def _round_to_sig_figs(value: float, sig_figs: int) -> float:
13
+ if value == 0:
14
+ return 0
15
+ return round(value, sig_figs - int(floor(log10(abs(value)))) - 1)
16
+
17
+
18
+ def _compact_integer_fmt(value: float | int) -> str:
19
+ if value == 0:
20
+ formatted = "0"
21
+ elif abs(value) >= 1 and abs(value) < 10_000:
22
+ formatted = fmt_integer(value, use_seps=False)[0]
23
+ else:
24
+ formatted = fmt_scientific(value, decimals=1, exp_style="E1")[0]
25
+
26
+ return formatted
27
+
28
+
29
+ def _compact_decimal_fmt(value: float | int) -> str:
30
+ if value == 0:
31
+ formatted = "0.00"
32
+ elif abs(value) < 1 and abs(value) >= 0.01:
33
+ formatted = fmt_number(value, decimals=2)[0]
34
+ elif abs(value) < 0.01:
35
+ formatted = fmt_scientific(value, decimals=1, exp_style="E1")[0]
36
+ elif abs(value) >= 1 and abs(value) < 1000:
37
+ formatted = fmt_number(value, n_sigfig=3)[0]
38
+ elif abs(value) >= 1000 and abs(value) < 10_000:
39
+ formatted = fmt_number(value, decimals=0, use_seps=False)[0]
40
+ else:
41
+ formatted = fmt_scientific(value, decimals=1, exp_style="E1")[0]
42
+
43
+ return formatted
44
+
45
+
46
+ def _compact_0_1_fmt(value: float | int | None) -> str | None:
47
+ if value is None:
48
+ return value
49
+
50
+ if value == 0:
51
+ return " 0.00"
52
+
53
+ if value == 1:
54
+ return " 1.00"
55
+
56
+ if abs(value) < 1 and abs(value) >= 0.01:
57
+ return " " + fmt_number(value, decimals=2)[0]
58
+
59
+ if abs(value) < 0.01:
60
+ return "<0.01"
61
+
62
+ if abs(value) > 0.99:
63
+ return ">0.99"
64
+
65
+ return fmt_number(value, n_sigfig=3)[0]
pointblank/_utils.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import inspect
4
4
  import re
5
+ from collections import defaultdict
5
6
  from typing import TYPE_CHECKING, Any
6
7
 
7
8
  import narwhals as nw
@@ -12,9 +13,28 @@ from narwhals.typing import FrameT
12
13
  from pointblank._constants import ASSERTION_TYPE_METHOD_MAP, GENERAL_COLUMN_TYPES
13
14
 
14
15
  if TYPE_CHECKING:
16
+ from collections.abc import Mapping
17
+
15
18
  from pointblank._typing import AbsoluteBounds, Tolerance
16
19
 
17
20
 
21
+ def transpose_dicts(list_of_dicts: list[dict[str, Any]]) -> dict[str, list[Any]]:
22
+ if not list_of_dicts:
23
+ return {}
24
+
25
+ # Get all unique keys across all dictionaries
26
+ all_keys = set()
27
+ for d in list_of_dicts:
28
+ all_keys.update(d.keys())
29
+
30
+ result = defaultdict(list)
31
+ for d in list_of_dicts:
32
+ for key in all_keys:
33
+ result[key].append(d.get(key)) # None is default for missing keys
34
+
35
+ return dict(result)
36
+
37
+
18
38
  def _derive_single_bound(ref: int, tol: int | float) -> int:
19
39
  """Derive a single bound using the reference."""
20
40
  if not isinstance(tol, float | int):
@@ -88,6 +108,29 @@ def _get_tbl_type(data: FrameT | Any) -> str:
88
108
  return "unknown" # pragma: no cover
89
109
 
90
110
 
111
+ def _is_narwhals_table(data: any) -> bool:
112
+ # Check if the data is a Narwhals DataFrame
113
+ type_str = str(type(data)).lower()
114
+
115
+ if "narwhals" in type_str:
116
+ # If the object is not a Narwhals DataFrame, return False
117
+ return True
118
+
119
+ return False
120
+
121
+
122
+ def _is_lazy_frame(data: any) -> bool:
123
+ # Check if the data is a Polars or Narwhals DataFrame
124
+ type_str = str(type(data)).lower()
125
+
126
+ if "polars" not in type_str and "narwhals" not in type_str:
127
+ # If the object is neither a Polars nor a Narwhals DataFrame, return False
128
+ return False
129
+
130
+ # Check if the data is a lazy frame
131
+ return "lazy" in type_str
132
+
133
+
91
134
  def _is_lib_present(lib_name: str) -> bool:
92
135
  import importlib
93
136
 
@@ -186,6 +229,77 @@ def _check_column_exists(dfn: nw.DataFrame, column: str) -> None:
186
229
  raise ValueError(f"Column '{column}' not found in DataFrame.")
187
230
 
188
231
 
232
+ def _count_true_values_in_column(
233
+ tbl: FrameT,
234
+ column: str,
235
+ inverse: bool = False,
236
+ ) -> int:
237
+ """
238
+ Count the number of `True` values in a specified column of a table.
239
+
240
+ Parameters
241
+ ----------
242
+ tbl
243
+ A Narwhals-compatible DataFrame or table-like object.
244
+ column
245
+ The column in which to count the `True` values.
246
+ inverse
247
+ If `True`, count the number of `False` values instead.
248
+
249
+ Returns
250
+ -------
251
+ int
252
+ The count of `True` (or `False`) values in the specified column.
253
+ """
254
+
255
+ # Convert the DataFrame to a Narwhals DataFrame (no detrimental effect if
256
+ # already a Narwhals DataFrame)
257
+ tbl_nw = nw.from_native(tbl)
258
+
259
+ # Filter the table based on the column and whether we want to count True or False values
260
+ tbl_filtered = tbl_nw.filter(nw.col(column) if not inverse else ~nw.col(column))
261
+
262
+ # Always collect table if it is a LazyFrame; this is required to get the row count
263
+ if _is_lazy_frame(tbl_filtered):
264
+ tbl_filtered = tbl_filtered.collect()
265
+
266
+ return len(tbl_filtered)
267
+
268
+
269
+ def _count_null_values_in_column(
270
+ tbl: FrameT,
271
+ column: str,
272
+ ) -> int:
273
+ """
274
+ Count the number of Null values in a specified column of a table.
275
+
276
+ Parameters
277
+ ----------
278
+ tbl
279
+ A Narwhals-compatible DataFrame or table-like object.
280
+ column
281
+ The column in which to count the Null values.
282
+
283
+ Returns
284
+ -------
285
+ int
286
+ The count of Null values in the specified column.
287
+ """
288
+
289
+ # Convert the DataFrame to a Narwhals DataFrame (no detrimental effect if
290
+ # already a Narwhals DataFrame)
291
+ tbl_nw = nw.from_native(tbl)
292
+
293
+ # Filter the table to get rows where the specified column is Null
294
+ tbl_filtered = tbl_nw.filter(nw.col(column).is_null())
295
+
296
+ # Always collect table if it is a LazyFrame; this is required to get the row count
297
+ if _is_lazy_frame(tbl_filtered):
298
+ tbl_filtered = tbl_filtered.collect()
299
+
300
+ return len(tbl_filtered)
301
+
302
+
189
303
  def _is_numeric_dtype(dtype: str) -> bool:
190
304
  """
191
305
  Check if a given data type string represents a numeric type.
@@ -533,6 +647,7 @@ def _get_api_text() -> str:
533
647
  "missing_vals_tbl",
534
648
  "assistant",
535
649
  "load_dataset",
650
+ "get_data_path",
536
651
  ]
537
652
 
538
653
  utility_exported = [
@@ -784,3 +899,14 @@ def _format_to_float_value(
784
899
  formatted_vals = _get_column_of_values(gt, column_name="x", context="html")
785
900
 
786
901
  return formatted_vals[0]
902
+
903
+
904
+ def _pivot_to_dict(col_dict: Mapping[str, Any]): # TODO : Type hint and unit test
905
+ result_dict = {}
906
+ for col, sub_dict in col_dict.items():
907
+ for key, value in sub_dict.items():
908
+ # add columns fields not present
909
+ if key not in result_dict:
910
+ result_dict[key] = [None] * len(col_dict)
911
+ result_dict[key][list(col_dict.keys()).index(col)] = value
912
+ return result_dict
pointblank/_utils_html.py CHANGED
@@ -1,9 +1,49 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from typing import Any
4
+
5
+ from great_tables import html
6
+
3
7
  from pointblank._constants import TABLE_TYPE_STYLES
4
8
  from pointblank._utils import _format_to_integer_value
5
9
 
6
10
 
11
+ def _fmt_frac(vec) -> list[str | None]:
12
+ res: list[str | None] = []
13
+ for x in vec:
14
+ if x is None:
15
+ res.append(x)
16
+ continue
17
+
18
+ if x == 0:
19
+ res.append("0")
20
+ continue
21
+
22
+ if x < 0.01:
23
+ res.append("<.01")
24
+ continue
25
+
26
+ try:
27
+ intx: int = int(x)
28
+ except ValueError: # generic object, ie. NaN
29
+ res.append(str(x))
30
+ continue
31
+
32
+ if intx == x: # can remove trailing 0s w/o loss
33
+ res.append(str(intx))
34
+ continue
35
+
36
+ res.append(str(round(x, 2)))
37
+
38
+ return res
39
+
40
+
41
+ def _make_sublabel(major: str, minor: str) -> Any:
42
+ return html(
43
+ f'{major!s}<span style="font-size: 0.75em; vertical-align: sub; position: relative; line-height: 0.5em;">{minor!s}</span>'
44
+ )
45
+
46
+
7
47
  def _create_table_type_html(
8
48
  tbl_type: str | None, tbl_name: str | None, font_size: str = "10px"
9
49
  ) -> str:
pointblank/assistant.py CHANGED
@@ -176,9 +176,7 @@ def assistant(
176
176
  if data is not None:
177
177
  scan = DataScan(data=data)
178
178
 
179
- scan_dict = scan.to_dict()
180
-
181
- tbl_type = scan_dict["tbl_type"]
179
+ tbl_type: str = scan.profile.implementation.name.lower()
182
180
  tbl_json = scan.to_json()
183
181
 
184
182
  if tbl_name is not None:
pointblank/compare.py ADDED
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from pointblank import DataScan
6
+
7
+ if TYPE_CHECKING:
8
+ from narwhals.typing import IntoFrame
9
+
10
+
11
+ class Compare:
12
+ def __init__(self, a: IntoFrame, b: IntoFrame) -> None:
13
+ self.a: IntoFrame = a
14
+ self.b: IntoFrame = b
15
+
16
+ def compare(self) -> None:
17
+ ## Scan both frames
18
+ self._scana = DataScan(self.a)
19
+ self._scanb = DataScan(self.b)
20
+
21
+ ## Get summary outs
22
+ summarya = self._scana.summary_data
23
+ summaryb = self._scana.summary_data
24
+
25
+ summarya.columns
26
+
27
+ self._scana.profile