pointblank 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/__init__.py +2 -0
- pointblank/_agg.py +120 -0
- pointblank/_constants.py +207 -6
- pointblank/_constants_translations.py +1302 -0
- pointblank/_datascan_utils.py +28 -10
- pointblank/_interrogation.py +216 -139
- pointblank/_typing.py +12 -0
- pointblank/_utils.py +81 -44
- pointblank/_utils_ai.py +4 -5
- pointblank/_utils_check_args.py +3 -3
- pointblank/_utils_llms_txt.py +41 -2
- pointblank/actions.py +1 -1
- pointblank/assistant.py +2 -3
- pointblank/cli.py +1 -1
- pointblank/column.py +162 -46
- pointblank/data/api-docs.txt +2957 -50
- pointblank/datascan.py +17 -17
- pointblank/draft.py +2 -3
- pointblank/scan_profile.py +2 -1
- pointblank/schema.py +61 -20
- pointblank/thresholds.py +15 -13
- pointblank/validate.py +2280 -410
- pointblank/validate.pyi +1104 -0
- pointblank/yaml.py +15 -8
- {pointblank-0.16.0.dist-info → pointblank-0.18.0.dist-info}/METADATA +7 -2
- {pointblank-0.16.0.dist-info → pointblank-0.18.0.dist-info}/RECORD +30 -28
- {pointblank-0.16.0.dist-info → pointblank-0.18.0.dist-info}/licenses/LICENSE +1 -1
- {pointblank-0.16.0.dist-info → pointblank-0.18.0.dist-info}/WHEEL +0 -0
- {pointblank-0.16.0.dist-info → pointblank-0.18.0.dist-info}/entry_points.txt +0 -0
- {pointblank-0.16.0.dist-info → pointblank-0.18.0.dist-info}/top_level.txt +0 -0
pointblank/_typing.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import datetime
|
|
3
4
|
import sys
|
|
5
|
+
from collections.abc import Container
|
|
4
6
|
from typing import List, Tuple, Union
|
|
5
7
|
|
|
6
8
|
# Check Python version for TypeAlias support
|
|
@@ -15,6 +17,12 @@ if sys.version_info >= (3, 10):
|
|
|
15
17
|
SegmentTuple: TypeAlias = Tuple[str, SegmentValue]
|
|
16
18
|
SegmentItem: TypeAlias = Union[str, SegmentTuple]
|
|
17
19
|
SegmentSpec: TypeAlias = Union[str, SegmentTuple, List[SegmentItem]]
|
|
20
|
+
|
|
21
|
+
_CompliantValue: TypeAlias = Union[str, int, float, datetime.datetime, datetime.date]
|
|
22
|
+
"""A compliant value that pointblank can use in a validation step"""
|
|
23
|
+
_CompliantValues: TypeAlias = Container[_CompliantValue]
|
|
24
|
+
"""A collection of compliant values that pointblank can use in a validation step"""
|
|
25
|
+
|
|
18
26
|
else:
|
|
19
27
|
# Python 3.8 and 3.9 compatible type aliases
|
|
20
28
|
AbsoluteBounds = Tuple[int, int]
|
|
@@ -24,6 +32,10 @@ else:
|
|
|
24
32
|
SegmentTuple = Tuple[str, SegmentValue]
|
|
25
33
|
SegmentItem = Union[str, SegmentTuple]
|
|
26
34
|
SegmentSpec = Union[str, SegmentTuple, List[SegmentItem]]
|
|
35
|
+
_CompliantValue = Union[str, int, float, datetime.datetime, datetime.date]
|
|
36
|
+
"""A compliant value that pointblank can use in a validation step"""
|
|
37
|
+
_CompliantValues = Container[_CompliantValue]
|
|
38
|
+
"""A collection of compliant values that pointblank can use in a validation step"""
|
|
27
39
|
|
|
28
40
|
# Add docstrings for better IDE support
|
|
29
41
|
# In Python 3.14+, __doc__ attribute on typing.Union objects became read-only
|
pointblank/_utils.py
CHANGED
|
@@ -7,14 +7,17 @@ from typing import TYPE_CHECKING, Any
|
|
|
7
7
|
|
|
8
8
|
import narwhals as nw
|
|
9
9
|
from great_tables import GT
|
|
10
|
+
from narwhals.dependencies import is_narwhals_dataframe, is_narwhals_lazyframe
|
|
10
11
|
from great_tables.gt import _get_column_of_values
|
|
11
|
-
from narwhals.typing import FrameT
|
|
12
12
|
|
|
13
13
|
from pointblank._constants import ASSERTION_TYPE_METHOD_MAP, GENERAL_COLUMN_TYPES, IBIS_BACKENDS
|
|
14
|
+
from pointblank.column import Column, ColumnLiteral, ColumnSelector, ColumnSelectorNarwhals, col
|
|
14
15
|
|
|
15
16
|
if TYPE_CHECKING:
|
|
16
17
|
from collections.abc import Mapping
|
|
17
18
|
|
|
19
|
+
from narwhals.typing import IntoFrame, IntoFrameT
|
|
20
|
+
|
|
18
21
|
from pointblank._typing import AbsoluteBounds, Tolerance
|
|
19
22
|
|
|
20
23
|
|
|
@@ -35,6 +38,7 @@ def transpose_dicts(list_of_dicts: list[dict[str, Any]]) -> dict[str, list[Any]]
|
|
|
35
38
|
return dict(result)
|
|
36
39
|
|
|
37
40
|
|
|
41
|
+
# TODO: doctest
|
|
38
42
|
def _derive_single_bound(ref: int, tol: int | float) -> int:
|
|
39
43
|
"""Derive a single bound using the reference."""
|
|
40
44
|
if not isinstance(tol, float | int):
|
|
@@ -44,16 +48,17 @@ def _derive_single_bound(ref: int, tol: int | float) -> int:
|
|
|
44
48
|
return int(tol * ref) if tol < 1 else int(tol)
|
|
45
49
|
|
|
46
50
|
|
|
51
|
+
# TODO: doctest
|
|
47
52
|
def _derive_bounds(ref: int, tol: Tolerance) -> AbsoluteBounds:
|
|
48
53
|
"""Validate and extract the absolute bounds of the tolerance."""
|
|
49
54
|
if isinstance(tol, tuple):
|
|
50
|
-
return
|
|
55
|
+
return (_derive_single_bound(ref, tol[0]), _derive_single_bound(ref, tol[1]))
|
|
51
56
|
|
|
52
57
|
bound = _derive_single_bound(ref, tol)
|
|
53
58
|
return bound, bound
|
|
54
59
|
|
|
55
60
|
|
|
56
|
-
def _get_tbl_type(data:
|
|
61
|
+
def _get_tbl_type(data: Any) -> str:
|
|
57
62
|
type_str = str(type(data))
|
|
58
63
|
|
|
59
64
|
ibis_tbl = "ibis.expr.types.relations.Table" in type_str
|
|
@@ -110,7 +115,7 @@ def _get_tbl_type(data: FrameT | Any) -> str:
|
|
|
110
115
|
return "unknown" # pragma: no cover
|
|
111
116
|
|
|
112
117
|
|
|
113
|
-
def _process_ibis_through_narwhals(data:
|
|
118
|
+
def _process_ibis_through_narwhals(data: Any, tbl_type: str) -> tuple[Any, str]:
|
|
114
119
|
"""
|
|
115
120
|
Process Ibis tables through Narwhals to unify the processing pathway.
|
|
116
121
|
|
|
@@ -120,14 +125,14 @@ def _process_ibis_through_narwhals(data: FrameT | Any, tbl_type: str) -> tuple[F
|
|
|
120
125
|
|
|
121
126
|
Parameters
|
|
122
127
|
----------
|
|
123
|
-
data
|
|
128
|
+
data
|
|
124
129
|
The data table, potentially an Ibis table
|
|
125
|
-
tbl_type
|
|
130
|
+
tbl_type
|
|
126
131
|
The detected table type
|
|
127
132
|
|
|
128
133
|
Returns
|
|
129
134
|
-------
|
|
130
|
-
tuple[
|
|
135
|
+
tuple[Any, str]
|
|
131
136
|
A tuple of (processed_data, updated_tbl_type) where:
|
|
132
137
|
- processed_data is the Narwhals-wrapped table if it was Ibis, otherwise original data
|
|
133
138
|
- updated_tbl_type is "narwhals" if it was Ibis, otherwise original tbl_type
|
|
@@ -145,7 +150,7 @@ def _process_ibis_through_narwhals(data: FrameT | Any, tbl_type: str) -> tuple[F
|
|
|
145
150
|
return data, tbl_type
|
|
146
151
|
|
|
147
152
|
|
|
148
|
-
def _is_narwhals_table(data:
|
|
153
|
+
def _is_narwhals_table(data: Any) -> bool:
|
|
149
154
|
# Check if the data is a Narwhals DataFrame
|
|
150
155
|
type_str = str(type(data)).lower()
|
|
151
156
|
|
|
@@ -156,7 +161,7 @@ def _is_narwhals_table(data: any) -> bool:
|
|
|
156
161
|
return False
|
|
157
162
|
|
|
158
163
|
|
|
159
|
-
def _is_lazy_frame(data:
|
|
164
|
+
def _is_lazy_frame(data: Any) -> bool:
|
|
160
165
|
# Check if the data is a Polars or Narwhals DataFrame
|
|
161
166
|
type_str = str(type(data)).lower()
|
|
162
167
|
|
|
@@ -180,15 +185,17 @@ def _is_lib_present(lib_name: str) -> bool:
|
|
|
180
185
|
|
|
181
186
|
def _check_any_df_lib(method_used: str) -> None:
|
|
182
187
|
# Determine whether Pandas or Polars is available
|
|
188
|
+
pd = None
|
|
183
189
|
try:
|
|
184
190
|
import pandas as pd
|
|
185
191
|
except ImportError:
|
|
186
|
-
|
|
192
|
+
pass
|
|
187
193
|
|
|
194
|
+
pl = None
|
|
188
195
|
try:
|
|
189
196
|
import polars as pl
|
|
190
197
|
except ImportError:
|
|
191
|
-
|
|
198
|
+
pass
|
|
192
199
|
|
|
193
200
|
# If neither Pandas nor Polars is available, raise an ImportError
|
|
194
201
|
if pd is None and pl is None:
|
|
@@ -211,16 +218,18 @@ def _is_value_a_df(value: Any) -> bool:
|
|
|
211
218
|
|
|
212
219
|
def _select_df_lib(preference: str = "polars") -> Any:
|
|
213
220
|
# Determine whether Pandas is available
|
|
221
|
+
pd = None
|
|
214
222
|
try:
|
|
215
223
|
import pandas as pd
|
|
216
224
|
except ImportError:
|
|
217
|
-
|
|
225
|
+
pass
|
|
218
226
|
|
|
219
|
-
# Determine whether
|
|
227
|
+
# Determine whether Polars is available
|
|
228
|
+
pl = None
|
|
220
229
|
try:
|
|
221
230
|
import polars as pl
|
|
222
231
|
except ImportError:
|
|
223
|
-
|
|
232
|
+
pass
|
|
224
233
|
|
|
225
234
|
# TODO: replace this with the `_check_any_df_lib()` function, introduce `method_used=` param
|
|
226
235
|
# If neither Pandas nor Polars is available, raise an ImportError
|
|
@@ -240,7 +249,8 @@ def _select_df_lib(preference: str = "polars") -> Any:
|
|
|
240
249
|
return pl if pl is not None else pd
|
|
241
250
|
|
|
242
251
|
|
|
243
|
-
|
|
252
|
+
# TODO: Good argument exceptions should be handled by caller
|
|
253
|
+
def _copy_dataframe(df: IntoFrameT) -> IntoFrameT:
|
|
244
254
|
"""
|
|
245
255
|
Create a copy of a DataFrame, handling different DataFrame types.
|
|
246
256
|
|
|
@@ -280,19 +290,22 @@ def _copy_dataframe(df):
|
|
|
280
290
|
return df # pragma: no cover
|
|
281
291
|
|
|
282
292
|
|
|
283
|
-
|
|
293
|
+
# TODO: Should straight up remove this
|
|
294
|
+
def _convert_to_narwhals(df: IntoFrame) -> nw.DataFrame[Any] | nw.LazyFrame[Any]:
|
|
284
295
|
# Convert the DataFrame to a format that narwhals can work with
|
|
285
|
-
|
|
296
|
+
result = nw.from_native(df)
|
|
297
|
+
assert is_narwhals_dataframe(result) or is_narwhals_lazyframe(result)
|
|
298
|
+
return result
|
|
286
299
|
|
|
287
300
|
|
|
288
|
-
def _check_column_exists(dfn: nw.DataFrame, column: str) -> None:
|
|
301
|
+
def _check_column_exists(dfn: nw.DataFrame[Any] | nw.LazyFrame[Any], column: str) -> None:
|
|
289
302
|
"""
|
|
290
303
|
Check if a column exists in a DataFrame.
|
|
291
304
|
|
|
292
305
|
Parameters
|
|
293
306
|
----------
|
|
294
307
|
dfn
|
|
295
|
-
A Narwhals DataFrame.
|
|
308
|
+
A Narwhals DataFrame or LazyFrame.
|
|
296
309
|
column
|
|
297
310
|
The column to check for existence.
|
|
298
311
|
|
|
@@ -307,7 +320,7 @@ def _check_column_exists(dfn: nw.DataFrame, column: str) -> None:
|
|
|
307
320
|
|
|
308
321
|
|
|
309
322
|
def _count_true_values_in_column(
|
|
310
|
-
tbl:
|
|
323
|
+
tbl: IntoFrame,
|
|
311
324
|
column: str,
|
|
312
325
|
inverse: bool = False,
|
|
313
326
|
) -> int:
|
|
@@ -337,14 +350,14 @@ def _count_true_values_in_column(
|
|
|
337
350
|
tbl_filtered = tbl_nw.filter(nw.col(column) if not inverse else ~nw.col(column))
|
|
338
351
|
|
|
339
352
|
# Always collect table if it is a LazyFrame; this is required to get the row count
|
|
340
|
-
if
|
|
353
|
+
if is_narwhals_lazyframe(tbl_filtered):
|
|
341
354
|
tbl_filtered = tbl_filtered.collect()
|
|
342
355
|
|
|
343
356
|
return len(tbl_filtered)
|
|
344
357
|
|
|
345
358
|
|
|
346
359
|
def _count_null_values_in_column(
|
|
347
|
-
tbl:
|
|
360
|
+
tbl: IntoFrame,
|
|
348
361
|
column: str,
|
|
349
362
|
) -> int:
|
|
350
363
|
"""
|
|
@@ -371,7 +384,7 @@ def _count_null_values_in_column(
|
|
|
371
384
|
tbl_filtered = tbl_nw.filter(nw.col(column).is_null())
|
|
372
385
|
|
|
373
386
|
# Always collect table if it is a LazyFrame; this is required to get the row count
|
|
374
|
-
if
|
|
387
|
+
if is_narwhals_lazyframe(tbl_filtered):
|
|
375
388
|
tbl_filtered = tbl_filtered.collect()
|
|
376
389
|
|
|
377
390
|
return len(tbl_filtered)
|
|
@@ -435,8 +448,11 @@ def _is_duration_dtype(dtype: str) -> bool:
|
|
|
435
448
|
|
|
436
449
|
|
|
437
450
|
def _get_column_dtype(
|
|
438
|
-
dfn: nw.DataFrame
|
|
439
|
-
|
|
451
|
+
dfn: nw.DataFrame[Any] | nw.LazyFrame[Any],
|
|
452
|
+
column: str,
|
|
453
|
+
raw: bool = False,
|
|
454
|
+
lowercased: bool = True,
|
|
455
|
+
) -> str | nw.dtypes.DType | None:
|
|
440
456
|
"""
|
|
441
457
|
Get the data type of a column in a DataFrame.
|
|
442
458
|
|
|
@@ -447,14 +463,14 @@ def _get_column_dtype(
|
|
|
447
463
|
column
|
|
448
464
|
The column from which to get the data type.
|
|
449
465
|
raw
|
|
450
|
-
If `True`, return the raw
|
|
466
|
+
If `True`, return the raw DType object (or None if column not found).
|
|
451
467
|
lowercased
|
|
452
468
|
If `True`, return the data type string in lowercase.
|
|
453
469
|
|
|
454
470
|
Returns
|
|
455
471
|
-------
|
|
456
|
-
str
|
|
457
|
-
The data type of the column
|
|
472
|
+
str | nw.dtypes.DType | None
|
|
473
|
+
The data type of the column as a string, or the raw DType object if `raw=True`.
|
|
458
474
|
"""
|
|
459
475
|
|
|
460
476
|
if raw: # pragma: no cover
|
|
@@ -468,7 +484,9 @@ def _get_column_dtype(
|
|
|
468
484
|
return column_dtype_str
|
|
469
485
|
|
|
470
486
|
|
|
471
|
-
def _check_column_type(
|
|
487
|
+
def _check_column_type(
|
|
488
|
+
dfn: nw.DataFrame[Any] | nw.LazyFrame[Any], column: str, allowed_types: list[str]
|
|
489
|
+
) -> None:
|
|
472
490
|
"""
|
|
473
491
|
Check if a column is of a certain data type.
|
|
474
492
|
|
|
@@ -520,8 +538,8 @@ def _check_column_type(dfn: nw.DataFrame, column: str, allowed_types: list[str])
|
|
|
520
538
|
|
|
521
539
|
|
|
522
540
|
def _column_test_prep(
|
|
523
|
-
df:
|
|
524
|
-
) -> nw.DataFrame:
|
|
541
|
+
df: IntoFrame, column: str, allowed_types: list[str] | None, check_exists: bool = True
|
|
542
|
+
) -> nw.DataFrame[Any] | nw.LazyFrame[Any]:
|
|
525
543
|
# Convert the DataFrame to a format that narwhals can work with.
|
|
526
544
|
dfn = _convert_to_narwhals(df=df)
|
|
527
545
|
|
|
@@ -537,8 +555,8 @@ def _column_test_prep(
|
|
|
537
555
|
|
|
538
556
|
|
|
539
557
|
def _column_subset_test_prep(
|
|
540
|
-
df:
|
|
541
|
-
) -> nw.DataFrame:
|
|
558
|
+
df: IntoFrame, columns_subset: list[str] | None, check_exists: bool = True
|
|
559
|
+
) -> nw.DataFrame[Any] | nw.LazyFrame[Any]:
|
|
542
560
|
# Convert the DataFrame to a format that narwhals can work with.
|
|
543
561
|
dfn = _convert_to_narwhals(df=df)
|
|
544
562
|
|
|
@@ -550,21 +568,40 @@ def _column_subset_test_prep(
|
|
|
550
568
|
return dfn
|
|
551
569
|
|
|
552
570
|
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
fn_name = inspect.currentframe().f_back.f_code.co_name
|
|
571
|
+
_PBUnresolvedColumn = str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals
|
|
572
|
+
_PBResolvedColumn = Column | ColumnLiteral | ColumnSelectorNarwhals | list[Column] | list[str]
|
|
556
573
|
|
|
557
|
-
return fn_name
|
|
558
574
|
|
|
575
|
+
def _resolve_columns(columns: _PBUnresolvedColumn) -> _PBResolvedColumn:
|
|
576
|
+
# If `columns` is a ColumnSelector or Narwhals selector, call `col()` on it to later
|
|
577
|
+
# resolve the columns
|
|
578
|
+
if isinstance(columns, (ColumnSelector, nw.selectors.Selector)):
|
|
579
|
+
columns = col(columns)
|
|
559
580
|
|
|
560
|
-
|
|
581
|
+
# If `columns` is Column value or a string, place it in a list for iteration
|
|
582
|
+
if isinstance(columns, (Column, str)):
|
|
583
|
+
columns = [columns]
|
|
584
|
+
|
|
585
|
+
return columns
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def _get_fn_name() -> str | None:
|
|
561
589
|
# Get the current function name
|
|
562
|
-
|
|
590
|
+
frame = inspect.currentframe()
|
|
591
|
+
if frame is None or frame.f_back is None:
|
|
592
|
+
return None
|
|
593
|
+
return frame.f_back.f_code.co_name
|
|
563
594
|
|
|
564
|
-
# Use the `ASSERTION_TYPE_METHOD_MAP` dictionary to get the assertion type
|
|
565
|
-
assertion = ASSERTION_TYPE_METHOD_MAP.get(func_name)
|
|
566
595
|
|
|
567
|
-
|
|
596
|
+
def _get_assertion_from_fname() -> str | None:
|
|
597
|
+
# Get the current function name
|
|
598
|
+
frame = inspect.currentframe()
|
|
599
|
+
if frame is None or frame.f_back is None:
|
|
600
|
+
return None
|
|
601
|
+
func_name = frame.f_back.f_code.co_name
|
|
602
|
+
|
|
603
|
+
# Use the `ASSERTION_TYPE_METHOD_MAP` dictionary to get the assertion type
|
|
604
|
+
return ASSERTION_TYPE_METHOD_MAP.get(func_name)
|
|
568
605
|
|
|
569
606
|
|
|
570
607
|
def _check_invalid_fields(fields: list[str], valid_fields: list[str]):
|
|
@@ -660,10 +697,10 @@ def _format_to_float_value(
|
|
|
660
697
|
|
|
661
698
|
def _pivot_to_dict(col_dict: Mapping[str, Any]): # TODO : Type hint and unit test
|
|
662
699
|
result_dict = {}
|
|
663
|
-
for
|
|
700
|
+
for _col, sub_dict in col_dict.items():
|
|
664
701
|
for key, value in sub_dict.items():
|
|
665
702
|
# add columns fields not present
|
|
666
703
|
if key not in result_dict:
|
|
667
704
|
result_dict[key] = [None] * len(col_dict)
|
|
668
|
-
result_dict[key][list(col_dict.keys()).index(
|
|
705
|
+
result_dict[key][list(col_dict.keys()).index(_col)] = value
|
|
669
706
|
return result_dict
|
pointblank/_utils_ai.py
CHANGED
|
@@ -7,7 +7,6 @@ from dataclasses import dataclass
|
|
|
7
7
|
from typing import Any, Dict, List, Optional, Tuple
|
|
8
8
|
|
|
9
9
|
import narwhals as nw
|
|
10
|
-
from narwhals.typing import FrameT
|
|
11
10
|
|
|
12
11
|
from pointblank._constants import MODEL_PROVIDERS
|
|
13
12
|
|
|
@@ -111,7 +110,7 @@ EXAMPLE OUTPUT FORMAT:
|
|
|
111
110
|
if provider == "anthropic": # pragma: no cover
|
|
112
111
|
# Check that the anthropic package is installed
|
|
113
112
|
try:
|
|
114
|
-
import anthropic # noqa
|
|
113
|
+
import anthropic # noqa # type: ignore[import-not-found]
|
|
115
114
|
except ImportError:
|
|
116
115
|
raise ImportError(
|
|
117
116
|
"The `anthropic` package is required to use AI validation with "
|
|
@@ -205,7 +204,7 @@ class _DataBatcher:
|
|
|
205
204
|
|
|
206
205
|
def __init__(
|
|
207
206
|
self,
|
|
208
|
-
data:
|
|
207
|
+
data: Any,
|
|
209
208
|
columns: Optional[List[str]] = None,
|
|
210
209
|
config: Optional[_BatchConfig] = None,
|
|
211
210
|
):
|
|
@@ -265,13 +264,13 @@ class _DataBatcher:
|
|
|
265
264
|
signature_str = json.dumps(signature_data, sort_keys=True, default=str)
|
|
266
265
|
return hashlib.md5(signature_str.encode()).hexdigest()
|
|
267
266
|
|
|
268
|
-
def _build_unique_rows_table(self) -> Tuple[
|
|
267
|
+
def _build_unique_rows_table(self) -> Tuple[Any, Dict[str, List[int]]]:
|
|
269
268
|
"""
|
|
270
269
|
Build unique rows table and mapping back to original indices.
|
|
271
270
|
|
|
272
271
|
Returns
|
|
273
272
|
-------
|
|
274
|
-
Tuple[
|
|
273
|
+
Tuple[Any, Dict[str, List[int]]]
|
|
275
274
|
Unique rows table and signature-to-indices mapping.
|
|
276
275
|
"""
|
|
277
276
|
nw_data = self._nw_data
|
pointblank/_utils_check_args.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Callable
|
|
3
|
+
from typing import Any, Callable
|
|
4
4
|
|
|
5
5
|
import narwhals as nw
|
|
6
6
|
|
|
@@ -28,7 +28,7 @@ def _check_boolean_input(param: bool, param_name: str):
|
|
|
28
28
|
raise ValueError(f"`{param_name}=` must be a boolean value.")
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def _check_column(column: str | list[str]):
|
|
31
|
+
def _check_column(column: str | list[str] | Column | ColumnSelector | nw.selectors.Selector):
|
|
32
32
|
"""
|
|
33
33
|
Check the input value of the `column=` parameter.
|
|
34
34
|
|
|
@@ -59,7 +59,7 @@ def _check_column(column: str | list[str]):
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
# TODO: allow for checking of dates/datetimes
|
|
62
|
-
def _check_value_float_int(value: float | int |
|
|
62
|
+
def _check_value_float_int(value: float | int | Any):
|
|
63
63
|
"""
|
|
64
64
|
Check that input value of the `value=` parameter is a float or integer.
|
|
65
65
|
|
pointblank/_utils_llms_txt.py
CHANGED
|
@@ -46,6 +46,25 @@ def get_api_details(module, exported_list):
|
|
|
46
46
|
# Get the docstring
|
|
47
47
|
doc = obj.__doc__
|
|
48
48
|
|
|
49
|
+
# Fallback for dynamically generated aggregation methods that might not have
|
|
50
|
+
# their docstrings properly attached yet
|
|
51
|
+
if not doc and obj_name.startswith("col_") and "_" in obj_name:
|
|
52
|
+
# Check if this looks like a dynamically generated aggregation method
|
|
53
|
+
# (e.g., col_sum_gt, col_avg_eq, col_sd_le)
|
|
54
|
+
parts_name = obj_name.split("_")
|
|
55
|
+
if (
|
|
56
|
+
len(parts_name) == 3
|
|
57
|
+
and parts_name[1] in ["sum", "avg", "sd"]
|
|
58
|
+
and parts_name[2] in ["gt", "ge", "lt", "le", "eq"]
|
|
59
|
+
):
|
|
60
|
+
try:
|
|
61
|
+
from pointblank.validate import _generate_agg_docstring
|
|
62
|
+
|
|
63
|
+
doc = _generate_agg_docstring(obj_name)
|
|
64
|
+
except Exception:
|
|
65
|
+
# If we can't generate the docstring, just use what we have
|
|
66
|
+
pass
|
|
67
|
+
|
|
49
68
|
# Combine the class name, signature, and docstring
|
|
50
69
|
api_text += f"{obj_name}{sig}\n{doc}\n\n"
|
|
51
70
|
|
|
@@ -101,9 +120,25 @@ def _get_api_text() -> str:
|
|
|
101
120
|
"Validate.col_vals_regex",
|
|
102
121
|
"Validate.col_vals_within_spec",
|
|
103
122
|
"Validate.col_vals_expr",
|
|
123
|
+
"Validate.col_sum_gt",
|
|
124
|
+
"Validate.col_sum_lt",
|
|
125
|
+
"Validate.col_sum_ge",
|
|
126
|
+
"Validate.col_sum_le",
|
|
127
|
+
"Validate.col_sum_eq",
|
|
128
|
+
"Validate.col_avg_gt",
|
|
129
|
+
"Validate.col_avg_lt",
|
|
130
|
+
"Validate.col_avg_ge",
|
|
131
|
+
"Validate.col_avg_le",
|
|
132
|
+
"Validate.col_avg_eq",
|
|
133
|
+
"Validate.col_sd_gt",
|
|
134
|
+
"Validate.col_sd_lt",
|
|
135
|
+
"Validate.col_sd_ge",
|
|
136
|
+
"Validate.col_sd_le",
|
|
137
|
+
"Validate.col_sd_eq",
|
|
104
138
|
"Validate.rows_distinct",
|
|
105
139
|
"Validate.rows_complete",
|
|
106
140
|
"Validate.col_exists",
|
|
141
|
+
"Validate.col_pct_null",
|
|
107
142
|
"Validate.col_schema_match",
|
|
108
143
|
"Validate.row_count_match",
|
|
109
144
|
"Validate.col_count_match",
|
|
@@ -331,10 +366,14 @@ def _get_examples_text() -> str:
|
|
|
331
366
|
example_text = "\n".join(example_text.split("\n")[8:])
|
|
332
367
|
|
|
333
368
|
# Extract the title of the example (the line beginning with `###`)
|
|
334
|
-
|
|
369
|
+
title_match = re.search(r"### (.*)", example_text)
|
|
370
|
+
assert title_match is not None
|
|
371
|
+
title = title_match.group(1)
|
|
335
372
|
|
|
336
373
|
# The next line with text is the short description of the example
|
|
337
|
-
|
|
374
|
+
desc_match = re.search(r"(.*)\.", example_text)
|
|
375
|
+
assert desc_match is not None
|
|
376
|
+
desc = desc_match.group(1)
|
|
338
377
|
|
|
339
378
|
# Get all of the Python code blocks in the example
|
|
340
379
|
# these can be identified as starting with ```python and ending with ```
|
pointblank/actions.py
CHANGED
pointblank/assistant.py
CHANGED
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
5
|
from importlib_resources import files
|
|
6
|
-
from narwhals.typing import FrameT
|
|
7
6
|
|
|
8
7
|
from pointblank._constants import MODEL_PROVIDERS
|
|
9
8
|
from pointblank.datascan import DataScan
|
|
@@ -15,7 +14,7 @@ __all__ = [
|
|
|
15
14
|
|
|
16
15
|
def assistant(
|
|
17
16
|
model: str,
|
|
18
|
-
data:
|
|
17
|
+
data: Any = None,
|
|
19
18
|
tbl_name: str | None = None,
|
|
20
19
|
api_key: str | None = None,
|
|
21
20
|
display: str | None = None,
|
|
@@ -295,7 +294,7 @@ def assistant(
|
|
|
295
294
|
if provider == "anthropic": # pragma: no cover
|
|
296
295
|
# Check that the anthropic package is installed
|
|
297
296
|
try:
|
|
298
|
-
import anthropic # noqa
|
|
297
|
+
import anthropic # noqa # type: ignore[import-not-found]
|
|
299
298
|
except ImportError: # pragma: no cover
|
|
300
299
|
raise ImportError( # pragma: no cover
|
|
301
300
|
"The `anthropic` package is required to use the `DraftValidation` class with "
|
pointblank/cli.py
CHANGED
|
@@ -2411,7 +2411,7 @@ def requirements():
|
|
|
2411
2411
|
|
|
2412
2412
|
|
|
2413
2413
|
def _rich_print_missing_table_enhanced(
|
|
2414
|
-
gt_table: Any, original_data: Any = None, missing_info: dict = None
|
|
2414
|
+
gt_table: Any, original_data: Any = None, missing_info: dict | None = None
|
|
2415
2415
|
) -> None:
|
|
2416
2416
|
"""Convert a missing values GT table to Rich table with enhanced formatting and metadata.
|
|
2417
2417
|
|