pointblank 0.15.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/__init__.py +2 -0
- pointblank/_constants.py +10 -0
- pointblank/_constants_translations.py +1059 -2
- pointblank/_typing.py +37 -9
- pointblank/_utils.py +0 -355
- pointblank/_utils_llms_txt.py +660 -0
- pointblank/column.py +24 -0
- pointblank/data/api-docs.txt +72 -0
- pointblank/validate.py +415 -111
- pointblank/yaml.py +5 -0
- {pointblank-0.15.0.dist-info → pointblank-0.16.0.dist-info}/METADATA +4 -4
- {pointblank-0.15.0.dist-info → pointblank-0.16.0.dist-info}/RECORD +16 -15
- {pointblank-0.15.0.dist-info → pointblank-0.16.0.dist-info}/WHEEL +0 -0
- {pointblank-0.15.0.dist-info → pointblank-0.16.0.dist-info}/entry_points.txt +0 -0
- {pointblank-0.15.0.dist-info → pointblank-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.15.0.dist-info → pointblank-0.16.0.dist-info}/top_level.txt +0 -0
pointblank/validate.py
CHANGED
|
@@ -45,6 +45,7 @@ from pointblank._constants import (
|
|
|
45
45
|
)
|
|
46
46
|
from pointblank._constants_translations import (
|
|
47
47
|
EXPECT_FAIL_TEXT,
|
|
48
|
+
NOTES_TEXT,
|
|
48
49
|
STEP_REPORT_TEXT,
|
|
49
50
|
VALIDATION_REPORT_TEXT,
|
|
50
51
|
)
|
|
@@ -122,6 +123,7 @@ __all__ = [
|
|
|
122
123
|
"write_file",
|
|
123
124
|
"config",
|
|
124
125
|
"connect_to_table",
|
|
126
|
+
"print_database_tables",
|
|
125
127
|
"preview",
|
|
126
128
|
"missing_vals_tbl",
|
|
127
129
|
"get_action_metadata",
|
|
@@ -3918,6 +3920,47 @@ class _ValidationInfo:
|
|
|
3918
3920
|
return self.notes is not None and len(self.notes) > 0
|
|
3919
3921
|
|
|
3920
3922
|
|
|
3923
|
+
def _handle_connection_errors(e: Exception, connection_string: str) -> None:
|
|
3924
|
+
"""
|
|
3925
|
+
Shared error handling for database connection failures.
|
|
3926
|
+
|
|
3927
|
+
Raises appropriate ConnectionError with helpful messages based on the exception.
|
|
3928
|
+
"""
|
|
3929
|
+
|
|
3930
|
+
error_str = str(e).lower()
|
|
3931
|
+
backend_install_map = {
|
|
3932
|
+
"duckdb": "pip install 'ibis-framework[duckdb]'",
|
|
3933
|
+
"postgresql": "pip install 'ibis-framework[postgres]'",
|
|
3934
|
+
"postgres": "pip install 'ibis-framework[postgres]'",
|
|
3935
|
+
"mysql": "pip install 'ibis-framework[mysql]'",
|
|
3936
|
+
"sqlite": "pip install 'ibis-framework[sqlite]'",
|
|
3937
|
+
"bigquery": "pip install 'ibis-framework[bigquery]'",
|
|
3938
|
+
"snowflake": "pip install 'ibis-framework[snowflake]'",
|
|
3939
|
+
}
|
|
3940
|
+
|
|
3941
|
+
# Check if this is a missing backend dependency
|
|
3942
|
+
for backend, install_cmd in backend_install_map.items():
|
|
3943
|
+
if backend in error_str and ("not found" in error_str or "no module" in error_str):
|
|
3944
|
+
raise ConnectionError(
|
|
3945
|
+
f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
|
|
3946
|
+
f" {install_cmd}\n\n"
|
|
3947
|
+
f"Original error: {e}"
|
|
3948
|
+
) from e
|
|
3949
|
+
|
|
3950
|
+
# Generic connection error
|
|
3951
|
+
raise ConnectionError( # pragma: no cover
|
|
3952
|
+
f"Failed to connect using: {connection_string}\n"
|
|
3953
|
+
f"Error: {e}\n\n"
|
|
3954
|
+
f"Supported connection string formats:\n"
|
|
3955
|
+
f"- DuckDB: 'duckdb:///path/to/file.ddb'\n"
|
|
3956
|
+
f"- SQLite: 'sqlite:///path/to/file.db'\n"
|
|
3957
|
+
f"- PostgreSQL: 'postgresql://user:pass@host:port/db'\n"
|
|
3958
|
+
f"- MySQL: 'mysql://user:pass@host:port/db'\n"
|
|
3959
|
+
f"- BigQuery: 'bigquery://project/dataset'\n"
|
|
3960
|
+
f"- Snowflake: 'snowflake://user:pass@account/db/schema'"
|
|
3961
|
+
) from e
|
|
3962
|
+
|
|
3963
|
+
|
|
3921
3964
|
def connect_to_table(connection_string: str) -> Any:
|
|
3922
3965
|
"""
|
|
3923
3966
|
Connect to a database table using a connection string.
|
|
@@ -3997,7 +4040,11 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
3997
4040
|
pip install 'ibis-framework[duckdb]' # for DuckDB
|
|
3998
4041
|
pip install 'ibis-framework[postgres]' # for PostgreSQL
|
|
3999
4042
|
```
|
|
4043
|
+
See Also
|
|
4044
|
+
--------
|
|
4045
|
+
print_database_tables : List all available tables in a database for discovery
|
|
4000
4046
|
"""
|
|
4047
|
+
|
|
4001
4048
|
# Check if Ibis is available
|
|
4002
4049
|
if not _is_lib_present(lib_name="ibis"):
|
|
4003
4050
|
raise ImportError(
|
|
@@ -4011,14 +4058,10 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
4011
4058
|
if "::" not in connection_string:
|
|
4012
4059
|
# Try to connect to get available tables for helpful error message
|
|
4013
4060
|
try:
|
|
4014
|
-
# Extract the base connection string (without table name)
|
|
4015
4061
|
base_connection = connection_string
|
|
4016
|
-
|
|
4017
|
-
# Connect to the database
|
|
4018
4062
|
conn = ibis.connect(base_connection)
|
|
4019
4063
|
|
|
4020
|
-
#
|
|
4021
|
-
try:
|
|
4064
|
+
try: # pragma: no cover
|
|
4022
4065
|
available_tables = conn.list_tables()
|
|
4023
4066
|
except Exception: # pragma: no cover
|
|
4024
4067
|
available_tables = []
|
|
@@ -4035,7 +4078,6 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
4035
4078
|
f" {connection_string}::TABLE_NAME\n\n"
|
|
4036
4079
|
f"Examples:\n"
|
|
4037
4080
|
)
|
|
4038
|
-
# Add examples with first few table names
|
|
4039
4081
|
for table in available_tables[:3]:
|
|
4040
4082
|
error_msg += f" {connection_string}::{table}\n"
|
|
4041
4083
|
else:
|
|
@@ -4050,43 +4092,8 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
4050
4092
|
|
|
4051
4093
|
except Exception as e:
|
|
4052
4094
|
if isinstance(e, ValueError):
|
|
4053
|
-
raise
|
|
4054
|
-
|
|
4055
|
-
# Check for backend-specific errors and provide installation guidance
|
|
4056
|
-
error_str = str(e).lower()
|
|
4057
|
-
backend_install_map = {
|
|
4058
|
-
"duckdb": "pip install 'ibis-framework[duckdb]'",
|
|
4059
|
-
"postgresql": "pip install 'ibis-framework[postgres]'",
|
|
4060
|
-
"postgres": "pip install 'ibis-framework[postgres]'",
|
|
4061
|
-
"mysql": "pip install 'ibis-framework[mysql]'",
|
|
4062
|
-
"sqlite": "pip install 'ibis-framework[sqlite]'",
|
|
4063
|
-
"bigquery": "pip install 'ibis-framework[bigquery]'",
|
|
4064
|
-
"snowflake": "pip install 'ibis-framework[snowflake]'",
|
|
4065
|
-
}
|
|
4066
|
-
|
|
4067
|
-
# Check if this is a missing backend dependency
|
|
4068
|
-
for backend, install_cmd in backend_install_map.items(): # pragma: no cover
|
|
4069
|
-
if backend in error_str and ("not found" in error_str or "no module" in error_str):
|
|
4070
|
-
raise ConnectionError(
|
|
4071
|
-
f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
|
|
4072
|
-
f" {install_cmd}\n\n"
|
|
4073
|
-
f"Original error: {e}\n\n"
|
|
4074
|
-
f"Supported connection string formats:\n"
|
|
4075
|
-
f"- DuckDB: 'duckdb:///path/to/file.ddb::table_name'\n"
|
|
4076
|
-
f"- SQLite: 'sqlite:///path/to/file.db::table_name'\n"
|
|
4077
|
-
f"- PostgreSQL: 'postgresql://user:pass@host:port/db::table_name'\n"
|
|
4078
|
-
f"- MySQL: 'mysql://user:pass@host:port/db::table_name'\n"
|
|
4079
|
-
f"- BigQuery: 'bigquery://project/dataset::table_name'\n"
|
|
4080
|
-
f"- Snowflake: 'snowflake://user:pass@account/db/schema::table_name'\n"
|
|
4081
|
-
f"\nNote: Use '::table_name' to specify the table within the database."
|
|
4082
|
-
) from e
|
|
4083
|
-
|
|
4084
|
-
# Generic connection error
|
|
4085
|
-
raise ConnectionError( # pragma: no cover
|
|
4086
|
-
f"Failed to connect to database using connection string: {connection_string}\n"
|
|
4087
|
-
f"Error: {e}\n\n"
|
|
4088
|
-
f"No table specified. Use the format: {connection_string}::TABLE_NAME"
|
|
4089
|
-
) from e
|
|
4095
|
+
raise
|
|
4096
|
+
_handle_connection_errors(e, connection_string)
|
|
4090
4097
|
|
|
4091
4098
|
# Split connection string and table name
|
|
4092
4099
|
try:
|
|
@@ -4099,32 +4106,14 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
4099
4106
|
conn = ibis.connect(base_connection)
|
|
4100
4107
|
table = conn.table(table_name)
|
|
4101
4108
|
return table
|
|
4102
|
-
|
|
4103
4109
|
except Exception as e:
|
|
4104
|
-
# Check for backend-specific errors and provide installation guidance
|
|
4105
4110
|
error_str = str(e).lower()
|
|
4106
|
-
backend_install_map = {
|
|
4107
|
-
"duckdb": "pip install 'ibis-framework[duckdb]'",
|
|
4108
|
-
"postgresql": "pip install 'ibis-framework[postgres]'",
|
|
4109
|
-
"postgres": "pip install 'ibis-framework[postgres]'",
|
|
4110
|
-
"mysql": "pip install 'ibis-framework[mysql]'",
|
|
4111
|
-
"sqlite": "pip install 'ibis-framework[sqlite]'",
|
|
4112
|
-
"bigquery": "pip install 'ibis-framework[bigquery]'",
|
|
4113
|
-
"snowflake": "pip install 'ibis-framework[snowflake]'",
|
|
4114
|
-
}
|
|
4115
|
-
|
|
4116
|
-
# Check if this is a missing backend dependency
|
|
4117
|
-
for backend, install_cmd in backend_install_map.items():
|
|
4118
|
-
if backend in error_str and ("not found" in error_str or "no module" in error_str):
|
|
4119
|
-
raise ConnectionError(
|
|
4120
|
-
f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
|
|
4121
|
-
f" {install_cmd}\n\n"
|
|
4122
|
-
f"Original error: {e}"
|
|
4123
|
-
) from e
|
|
4124
4111
|
|
|
4125
|
-
# Check if table
|
|
4126
|
-
if "table" in error_str and (
|
|
4127
|
-
|
|
4112
|
+
# Check if this is a "table not found" error
|
|
4113
|
+
if "table" in error_str and (
|
|
4114
|
+
"not found" in error_str or "does not exist" in error_str or "not exist" in error_str
|
|
4115
|
+
):
|
|
4116
|
+
# Try to get available tables for a helpful error message
|
|
4128
4117
|
try: # pragma: no cover
|
|
4129
4118
|
available_tables = conn.list_tables()
|
|
4130
4119
|
if available_tables:
|
|
@@ -4132,23 +4121,79 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
4132
4121
|
raise ValueError(
|
|
4133
4122
|
f"Table '{table_name}' not found in database.\n\n"
|
|
4134
4123
|
f"Available tables:\n{table_list}\n\n"
|
|
4135
|
-
f"
|
|
4136
|
-
f" {base_connection}::CORRECT_TABLE_NAME"
|
|
4137
|
-
) from e
|
|
4138
|
-
else:
|
|
4139
|
-
raise ValueError(
|
|
4140
|
-
f"Table '{table_name}' not found and no tables available in database."
|
|
4124
|
+
f"Connection: {base_connection}"
|
|
4141
4125
|
) from e
|
|
4126
|
+
except ValueError:
|
|
4127
|
+
# Re-raise the table-specific ValueError
|
|
4128
|
+
raise
|
|
4142
4129
|
except Exception:
|
|
4143
|
-
raise
|
|
4144
|
-
|
|
4145
|
-
|
|
4146
|
-
|
|
4130
|
+
# If we can't list tables, just raise a simple error
|
|
4131
|
+
pass
|
|
4132
|
+
|
|
4133
|
+
raise ValueError(
|
|
4134
|
+
f"Table '{table_name}' not found in database.\n"
|
|
4135
|
+
f"Connection: {base_connection}\n\n"
|
|
4136
|
+
f"Original error: {e}"
|
|
4137
|
+
) from e
|
|
4138
|
+
|
|
4139
|
+
# For other errors, use the generic connection error handler
|
|
4140
|
+
_handle_connection_errors(e, base_connection)
|
|
4141
|
+
|
|
4147
4142
|
|
|
4148
|
-
|
|
4149
|
-
|
|
4150
|
-
|
|
4151
|
-
|
|
4143
|
+
def print_database_tables(connection_string: str) -> list[str]:
|
|
4144
|
+
"""
|
|
4145
|
+
List all tables in a database from a connection string.
|
|
4146
|
+
|
|
4147
|
+
The `print_database_tables()` function connects to a database and returns a list of all
|
|
4148
|
+
available tables. This is particularly useful for discovering what tables exist in a database
|
|
4149
|
+
before connecting to a specific table with `connect_to_table(). The function automatically
|
|
4150
|
+
filters out temporary Ibis tables (memtables) to show only user tables. It supports all database
|
|
4151
|
+
backends available through Ibis, including DuckDB, SQLite, PostgreSQL, MySQL, BigQuery, and
|
|
4152
|
+
Snowflake.
|
|
4153
|
+
|
|
4154
|
+
Parameters
|
|
4155
|
+
----------
|
|
4156
|
+
connection_string
|
|
4157
|
+
A database connection string *without* the `::table_name` suffix. Example:
|
|
4158
|
+
`"duckdb:///path/to/database.ddb"`.
|
|
4159
|
+
|
|
4160
|
+
Returns
|
|
4161
|
+
-------
|
|
4162
|
+
list[str]
|
|
4163
|
+
List of table names, excluding temporary Ibis tables.
|
|
4164
|
+
|
|
4165
|
+
See Also
|
|
4166
|
+
--------
|
|
4167
|
+
connect_to_table : Connect to a database table with full connection string documentation
|
|
4168
|
+
"""
|
|
4169
|
+
# Check if connection string includes table specification (which is not allowed)
|
|
4170
|
+
if "::" in connection_string:
|
|
4171
|
+
raise ValueError(
|
|
4172
|
+
"Connection string should not include table specification (::table_name).\n"
|
|
4173
|
+
f"You've supplied: {connection_string}\n"
|
|
4174
|
+
f"Expected format: 'duckdb:///path/to/database.ddb' (without ::table_name)"
|
|
4175
|
+
)
|
|
4176
|
+
|
|
4177
|
+
# Check if Ibis is available
|
|
4178
|
+
if not _is_lib_present(lib_name="ibis"):
|
|
4179
|
+
raise ImportError(
|
|
4180
|
+
"The Ibis library is not installed but is required for database connection strings.\n"
|
|
4181
|
+
"Install it with: pip install 'ibis-framework[duckdb]' (or other backend as needed)"
|
|
4182
|
+
)
|
|
4183
|
+
|
|
4184
|
+
import ibis
|
|
4185
|
+
|
|
4186
|
+
try:
|
|
4187
|
+
# Connect to database
|
|
4188
|
+
conn = ibis.connect(connection_string)
|
|
4189
|
+
# Get all tables and filter out temporary Ibis tables
|
|
4190
|
+
all_tables = conn.list_tables()
|
|
4191
|
+
user_tables = [t for t in all_tables if "memtable" not in t]
|
|
4192
|
+
|
|
4193
|
+
return user_tables
|
|
4194
|
+
|
|
4195
|
+
except Exception as e:
|
|
4196
|
+
_handle_connection_errors(e, connection_string)
|
|
4152
4197
|
|
|
4153
4198
|
|
|
4154
4199
|
@dataclass
|
|
@@ -4430,6 +4475,16 @@ class Validate:
|
|
|
4430
4475
|
- Vietnamese (`"vi"`)
|
|
4431
4476
|
- Indonesian (`"id"`)
|
|
4432
4477
|
- Ukrainian (`"uk"`)
|
|
4478
|
+
- Bulgarian (`"bg"`)
|
|
4479
|
+
- Croatian (`"hr"`)
|
|
4480
|
+
- Estonian (`"et"`)
|
|
4481
|
+
- Hungarian (`"hu"`)
|
|
4482
|
+
- Irish (`"ga"`)
|
|
4483
|
+
- Latvian (`"lv"`)
|
|
4484
|
+
- Lithuanian (`"lt"`)
|
|
4485
|
+
- Maltese (`"mt"`)
|
|
4486
|
+
- Slovak (`"sk"`)
|
|
4487
|
+
- Slovenian (`"sl"`)
|
|
4433
4488
|
- Hebrew (`"he"`)
|
|
4434
4489
|
- Thai (`"th"`)
|
|
4435
4490
|
- Persian (`"fa"`)
|
|
@@ -12722,6 +12777,33 @@ class Validate:
|
|
|
12722
12777
|
),
|
|
12723
12778
|
)
|
|
12724
12779
|
|
|
12780
|
+
# Add note for local thresholds (if they differ from global thresholds)
|
|
12781
|
+
if threshold != self.thresholds:
|
|
12782
|
+
if threshold != Thresholds():
|
|
12783
|
+
# Local thresholds are set - generate threshold note
|
|
12784
|
+
threshold_note_html = _create_local_threshold_note_html(
|
|
12785
|
+
thresholds=threshold, locale=self.locale
|
|
12786
|
+
)
|
|
12787
|
+
threshold_note_text = _create_local_threshold_note_text(thresholds=threshold)
|
|
12788
|
+
|
|
12789
|
+
# Add the note to the validation step
|
|
12790
|
+
validation._add_note(
|
|
12791
|
+
key="local_thresholds",
|
|
12792
|
+
markdown=threshold_note_html,
|
|
12793
|
+
text=threshold_note_text,
|
|
12794
|
+
)
|
|
12795
|
+
elif self.thresholds != Thresholds():
|
|
12796
|
+
# Thresholds explicitly reset to empty when global thresholds exist
|
|
12797
|
+
reset_note_html = _create_threshold_reset_note_html(locale=self.locale)
|
|
12798
|
+
reset_note_text = _create_threshold_reset_note_text()
|
|
12799
|
+
|
|
12800
|
+
# Add the note to the validation step
|
|
12801
|
+
validation._add_note(
|
|
12802
|
+
key="local_threshold_reset",
|
|
12803
|
+
markdown=reset_note_html,
|
|
12804
|
+
text=reset_note_text,
|
|
12805
|
+
)
|
|
12806
|
+
|
|
12725
12807
|
# If there is any threshold level that has been exceeded, then produce and
|
|
12726
12808
|
# set the general failure text for the validation step
|
|
12727
12809
|
if validation.warning or validation.error or validation.critical:
|
|
@@ -14217,11 +14299,15 @@ class Validate:
|
|
|
14217
14299
|
- [`col_vals_outside()`](`pointblank.Validate.col_vals_outside`)
|
|
14218
14300
|
- [`col_vals_in_set()`](`pointblank.Validate.col_vals_in_set`)
|
|
14219
14301
|
- [`col_vals_not_in_set()`](`pointblank.Validate.col_vals_not_in_set`)
|
|
14302
|
+
- [`col_vals_increasing()`](`pointblank.Validate.col_vals_increasing`)
|
|
14303
|
+
- [`col_vals_decreasing()`](`pointblank.Validate.col_vals_decreasing`)
|
|
14220
14304
|
- [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
|
|
14221
14305
|
- [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
|
|
14222
14306
|
- [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
|
|
14307
|
+
- [`col_vals_within_spec()`](`pointblank.Validate.col_vals_within_spec`)
|
|
14223
14308
|
- [`col_vals_expr()`](`pointblank.Validate.col_vals_expr`)
|
|
14224
14309
|
- [`conjointly()`](`pointblank.Validate.conjointly`)
|
|
14310
|
+
- [`prompt()`](`pointblank.Validate.prompt`)
|
|
14225
14311
|
|
|
14226
14312
|
An extracted row for these validation methods means that a test unit failed for that row in
|
|
14227
14313
|
the validation step.
|
|
@@ -15795,11 +15881,15 @@ class Validate:
|
|
|
15795
15881
|
- [`col_vals_outside()`](`pointblank.Validate.col_vals_outside`)
|
|
15796
15882
|
- [`col_vals_in_set()`](`pointblank.Validate.col_vals_in_set`)
|
|
15797
15883
|
- [`col_vals_not_in_set()`](`pointblank.Validate.col_vals_not_in_set`)
|
|
15884
|
+
- [`col_vals_increasing()`](`pointblank.Validate.col_vals_increasing`)
|
|
15885
|
+
- [`col_vals_decreasing()`](`pointblank.Validate.col_vals_decreasing`)
|
|
15798
15886
|
- [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
|
|
15799
15887
|
- [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
|
|
15800
15888
|
- [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
|
|
15889
|
+
- [`col_vals_within_spec()`](`pointblank.Validate.col_vals_within_spec`)
|
|
15801
15890
|
- [`col_vals_expr()`](`pointblank.Validate.col_vals_expr`)
|
|
15802
15891
|
- [`conjointly()`](`pointblank.Validate.conjointly`)
|
|
15892
|
+
- [`prompt()`](`pointblank.Validate.prompt`)
|
|
15803
15893
|
- [`rows_complete()`](`pointblank.Validate.rows_complete`)
|
|
15804
15894
|
|
|
15805
15895
|
The [`rows_distinct()`](`pointblank.Validate.rows_distinct`) validation step will produce a
|
|
@@ -18030,60 +18120,93 @@ def _format_single_float_with_gt_custom(
|
|
|
18030
18120
|
return formatted_values[0] # Return the single formatted value
|
|
18031
18121
|
|
|
18032
18122
|
|
|
18123
|
+
def _format_number_safe(
|
|
18124
|
+
value: float, decimals: int, drop_trailing_zeros: bool = False, locale: str = "en", df_lib=None
|
|
18125
|
+
) -> str:
|
|
18126
|
+
"""
|
|
18127
|
+
Safely format a float value with locale support.
|
|
18128
|
+
|
|
18129
|
+
Uses GT-based formatting when a DataFrame library is available, otherwise falls back to
|
|
18130
|
+
vals.fmt_number. This helper is used by threshold formatting functions.
|
|
18131
|
+
"""
|
|
18132
|
+
if df_lib is not None and value is not None:
|
|
18133
|
+
# Use GT-based formatting to avoid Pandas dependency completely
|
|
18134
|
+
return _format_single_float_with_gt_custom(
|
|
18135
|
+
value,
|
|
18136
|
+
decimals=decimals,
|
|
18137
|
+
drop_trailing_zeros=drop_trailing_zeros,
|
|
18138
|
+
locale=locale,
|
|
18139
|
+
df_lib=df_lib,
|
|
18140
|
+
)
|
|
18141
|
+
else:
|
|
18142
|
+
# Fallback to the original behavior
|
|
18143
|
+
return fmt_number(
|
|
18144
|
+
value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
|
|
18145
|
+
)[0] # pragma: no cover
|
|
18146
|
+
|
|
18147
|
+
|
|
18148
|
+
def _format_integer_safe(value: int, locale: str = "en", df_lib=None) -> str:
|
|
18149
|
+
"""
|
|
18150
|
+
Safely format an integer value with locale support.
|
|
18151
|
+
|
|
18152
|
+
Uses GT-based formatting when a DataFrame library is available, otherwise falls back to
|
|
18153
|
+
vals.fmt_integer. This helper is used by threshold formatting functions.
|
|
18154
|
+
"""
|
|
18155
|
+
if df_lib is not None and value is not None:
|
|
18156
|
+
# Use GT-based formatting to avoid Pandas dependency completely
|
|
18157
|
+
return _format_single_integer_with_gt(value, locale=locale, df_lib=df_lib)
|
|
18158
|
+
else:
|
|
18159
|
+
# Fallback to the original behavior
|
|
18160
|
+
return fmt_integer(value, locale=locale)[0]
|
|
18161
|
+
|
|
18162
|
+
|
|
18033
18163
|
def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) -> str:
|
|
18034
18164
|
if thresholds == Thresholds():
|
|
18035
18165
|
return ""
|
|
18036
18166
|
|
|
18037
|
-
# Helper functions to format numbers safely
|
|
18038
|
-
def _format_number_safe(value: float, decimals: int, drop_trailing_zeros: bool = False) -> str:
|
|
18039
|
-
if df_lib is not None and value is not None:
|
|
18040
|
-
# Use GT-based formatting to avoid Pandas dependency completely
|
|
18041
|
-
return _format_single_float_with_gt_custom(
|
|
18042
|
-
value,
|
|
18043
|
-
decimals=decimals,
|
|
18044
|
-
drop_trailing_zeros=drop_trailing_zeros,
|
|
18045
|
-
locale=locale,
|
|
18046
|
-
df_lib=df_lib,
|
|
18047
|
-
)
|
|
18048
|
-
else:
|
|
18049
|
-
# Fallback to the original behavior
|
|
18050
|
-
return fmt_number(
|
|
18051
|
-
value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
|
|
18052
|
-
)[0] # pragma: no cover
|
|
18053
|
-
|
|
18054
|
-
def _format_integer_safe(value: int) -> str:
|
|
18055
|
-
if df_lib is not None and value is not None:
|
|
18056
|
-
# Use GT-based formatting to avoid Pandas dependency completely
|
|
18057
|
-
return _format_single_integer_with_gt(value, locale=locale, df_lib=df_lib)
|
|
18058
|
-
else:
|
|
18059
|
-
# Fallback to the original behavior
|
|
18060
|
-
return fmt_integer(value, locale=locale)[0]
|
|
18061
|
-
|
|
18062
18167
|
warning = (
|
|
18063
|
-
_format_number_safe(
|
|
18168
|
+
_format_number_safe(
|
|
18169
|
+
thresholds.warning_fraction,
|
|
18170
|
+
decimals=3,
|
|
18171
|
+
drop_trailing_zeros=True,
|
|
18172
|
+
locale=locale,
|
|
18173
|
+
df_lib=df_lib,
|
|
18174
|
+
)
|
|
18064
18175
|
if thresholds.warning_fraction is not None
|
|
18065
18176
|
else (
|
|
18066
|
-
_format_integer_safe(thresholds.warning_count)
|
|
18177
|
+
_format_integer_safe(thresholds.warning_count, locale=locale, df_lib=df_lib)
|
|
18067
18178
|
if thresholds.warning_count is not None
|
|
18068
18179
|
else "—"
|
|
18069
18180
|
)
|
|
18070
18181
|
)
|
|
18071
18182
|
|
|
18072
18183
|
error = (
|
|
18073
|
-
_format_number_safe(
|
|
18184
|
+
_format_number_safe(
|
|
18185
|
+
thresholds.error_fraction,
|
|
18186
|
+
decimals=3,
|
|
18187
|
+
drop_trailing_zeros=True,
|
|
18188
|
+
locale=locale,
|
|
18189
|
+
df_lib=df_lib,
|
|
18190
|
+
)
|
|
18074
18191
|
if thresholds.error_fraction is not None
|
|
18075
18192
|
else (
|
|
18076
|
-
_format_integer_safe(thresholds.error_count)
|
|
18193
|
+
_format_integer_safe(thresholds.error_count, locale=locale, df_lib=df_lib)
|
|
18077
18194
|
if thresholds.error_count is not None
|
|
18078
18195
|
else "—"
|
|
18079
18196
|
)
|
|
18080
18197
|
)
|
|
18081
18198
|
|
|
18082
18199
|
critical = (
|
|
18083
|
-
_format_number_safe(
|
|
18200
|
+
_format_number_safe(
|
|
18201
|
+
thresholds.critical_fraction,
|
|
18202
|
+
decimals=3,
|
|
18203
|
+
drop_trailing_zeros=True,
|
|
18204
|
+
locale=locale,
|
|
18205
|
+
df_lib=df_lib,
|
|
18206
|
+
)
|
|
18084
18207
|
if thresholds.critical_fraction is not None
|
|
18085
18208
|
else (
|
|
18086
|
-
_format_integer_safe(thresholds.critical_count)
|
|
18209
|
+
_format_integer_safe(thresholds.critical_count, locale=locale, df_lib=df_lib)
|
|
18087
18210
|
if thresholds.critical_count is not None
|
|
18088
18211
|
else "—"
|
|
18089
18212
|
)
|
|
@@ -18129,6 +18252,187 @@ def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) ->
|
|
|
18129
18252
|
)
|
|
18130
18253
|
|
|
18131
18254
|
|
|
18255
|
+
def _create_local_threshold_note_html(thresholds: Thresholds, locale: str = "en") -> str:
|
|
18256
|
+
"""
|
|
18257
|
+
Create a miniature HTML representation of local thresholds for display in notes.
|
|
18258
|
+
|
|
18259
|
+
This function generates a compact HTML representation of threshold values that is suitable for
|
|
18260
|
+
display in validation step notes/footnotes. It follows a similar visual style to the global
|
|
18261
|
+
thresholds shown in the header, but with a more compact format.
|
|
18262
|
+
|
|
18263
|
+
Parameters
|
|
18264
|
+
----------
|
|
18265
|
+
thresholds
|
|
18266
|
+
The Thresholds object containing the local threshold values.
|
|
18267
|
+
locale
|
|
18268
|
+
The locale to use for formatting numbers (default: "en").
|
|
18269
|
+
|
|
18270
|
+
Returns
|
|
18271
|
+
-------
|
|
18272
|
+
str
|
|
18273
|
+
HTML string containing the formatted threshold information.
|
|
18274
|
+
"""
|
|
18275
|
+
if thresholds == Thresholds():
|
|
18276
|
+
return ""
|
|
18277
|
+
|
|
18278
|
+
# Get df_lib for formatting
|
|
18279
|
+
df_lib = None
|
|
18280
|
+
if _is_lib_present("polars"):
|
|
18281
|
+
import polars as pl
|
|
18282
|
+
|
|
18283
|
+
df_lib = pl
|
|
18284
|
+
elif _is_lib_present("pandas"):
|
|
18285
|
+
import pandas as pd
|
|
18286
|
+
|
|
18287
|
+
df_lib = pd
|
|
18288
|
+
|
|
18289
|
+
# Helper function to format threshold values using the shared formatting functions
|
|
18290
|
+
def _format_threshold_value(fraction: float | None, count: int | None) -> str:
|
|
18291
|
+
if fraction is not None:
|
|
18292
|
+
# Format as fraction/percentage with locale formatting
|
|
18293
|
+
if fraction == 0:
|
|
18294
|
+
return "0"
|
|
18295
|
+
elif fraction < 0.01:
|
|
18296
|
+
# For very small fractions, show "<0.01" with locale formatting
|
|
18297
|
+
formatted = _format_number_safe(0.01, decimals=2, locale=locale, df_lib=df_lib)
|
|
18298
|
+
return f"<{formatted}"
|
|
18299
|
+
else:
|
|
18300
|
+
# Use shared formatting function with drop_trailing_zeros
|
|
18301
|
+
formatted = _format_number_safe(
|
|
18302
|
+
fraction, decimals=2, drop_trailing_zeros=True, locale=locale, df_lib=df_lib
|
|
18303
|
+
)
|
|
18304
|
+
return formatted
|
|
18305
|
+
elif count is not None:
|
|
18306
|
+
# Format integer count using shared formatting function
|
|
18307
|
+
return _format_integer_safe(count, locale=locale, df_lib=df_lib)
|
|
18308
|
+
else:
|
|
18309
|
+
return "—"
|
|
18310
|
+
|
|
18311
|
+
warning = _format_threshold_value(thresholds.warning_fraction, thresholds.warning_count)
|
|
18312
|
+
error = _format_threshold_value(thresholds.error_fraction, thresholds.error_count)
|
|
18313
|
+
critical = _format_threshold_value(thresholds.critical_fraction, thresholds.critical_count)
|
|
18314
|
+
|
|
18315
|
+
warning_color = SEVERITY_LEVEL_COLORS["warning"]
|
|
18316
|
+
error_color = SEVERITY_LEVEL_COLORS["error"]
|
|
18317
|
+
critical_color = SEVERITY_LEVEL_COLORS["critical"]
|
|
18318
|
+
|
|
18319
|
+
# Build threshold parts with colored letters in monospace font
|
|
18320
|
+
threshold_parts = []
|
|
18321
|
+
|
|
18322
|
+
# Add warning threshold if set
|
|
18323
|
+
if thresholds.warning is not None:
|
|
18324
|
+
threshold_parts.append(
|
|
18325
|
+
f'<span style="color: {warning_color}; font-weight: bold;">W</span>:{warning}'
|
|
18326
|
+
)
|
|
18327
|
+
|
|
18328
|
+
# Add error threshold if set
|
|
18329
|
+
if thresholds.error is not None:
|
|
18330
|
+
threshold_parts.append(
|
|
18331
|
+
f'<span style="color: {error_color}; font-weight: bold;">E</span>:{error}'
|
|
18332
|
+
)
|
|
18333
|
+
|
|
18334
|
+
# Add critical threshold if set
|
|
18335
|
+
if thresholds.critical is not None:
|
|
18336
|
+
threshold_parts.append(
|
|
18337
|
+
f'<span style="color: {critical_color}; font-weight: bold;">C</span>:{critical}'
|
|
18338
|
+
)
|
|
18339
|
+
|
|
18340
|
+
# Join with "|" separator (only between multiple thresholds)
|
|
18341
|
+
thresholds_html = f'<span style="font-family: monospace;">{"|".join(threshold_parts)}</span>'
|
|
18342
|
+
|
|
18343
|
+
# Get localized text and format with threshold HTML
|
|
18344
|
+
localized_text = NOTES_TEXT["local_threshold"].get(locale, NOTES_TEXT["local_threshold"]["en"])
|
|
18345
|
+
note_html = localized_text.replace("{thresholds}", thresholds_html)
|
|
18346
|
+
|
|
18347
|
+
return note_html
|
|
18348
|
+
|
|
18349
|
+
|
|
18350
|
+
def _create_local_threshold_note_text(thresholds: Thresholds) -> str:
|
|
18351
|
+
"""
|
|
18352
|
+
Create a plain text representation of local thresholds for display in logs.
|
|
18353
|
+
|
|
18354
|
+
This function generates a plain text representation of threshold values that is
|
|
18355
|
+
suitable for display in text-based output such as logs or console output.
|
|
18356
|
+
|
|
18357
|
+
Parameters
|
|
18358
|
+
----------
|
|
18359
|
+
thresholds
|
|
18360
|
+
The Thresholds object containing the local threshold values.
|
|
18361
|
+
|
|
18362
|
+
Returns
|
|
18363
|
+
-------
|
|
18364
|
+
str
|
|
18365
|
+
Plain text string containing the formatted threshold information.
|
|
18366
|
+
"""
|
|
18367
|
+
if thresholds == Thresholds():
|
|
18368
|
+
return ""
|
|
18369
|
+
|
|
18370
|
+
# Helper function to format threshold values
|
|
18371
|
+
def _format_threshold_value(fraction: float | None, count: int | None) -> str:
|
|
18372
|
+
if fraction is not None:
|
|
18373
|
+
if fraction == 0:
|
|
18374
|
+
return "0"
|
|
18375
|
+
elif fraction < 0.01:
|
|
18376
|
+
return "<0.01"
|
|
18377
|
+
else:
|
|
18378
|
+
return f"{fraction:.2f}".rstrip("0").rstrip(".")
|
|
18379
|
+
elif count is not None:
|
|
18380
|
+
return str(count)
|
|
18381
|
+
else:
|
|
18382
|
+
return "—"
|
|
18383
|
+
|
|
18384
|
+
parts = []
|
|
18385
|
+
|
|
18386
|
+
if thresholds.warning is not None:
|
|
18387
|
+
warning = _format_threshold_value(thresholds.warning_fraction, thresholds.warning_count)
|
|
18388
|
+
parts.append(f"W: {warning}")
|
|
18389
|
+
|
|
18390
|
+
if thresholds.error is not None:
|
|
18391
|
+
error = _format_threshold_value(thresholds.error_fraction, thresholds.error_count)
|
|
18392
|
+
parts.append(f"E: {error}")
|
|
18393
|
+
|
|
18394
|
+
if thresholds.critical is not None:
|
|
18395
|
+
critical = _format_threshold_value(thresholds.critical_fraction, thresholds.critical_count)
|
|
18396
|
+
parts.append(f"C: {critical}")
|
|
18397
|
+
|
|
18398
|
+
if parts:
|
|
18399
|
+
return "Step-specific thresholds set: " + ", ".join(parts)
|
|
18400
|
+
else:
|
|
18401
|
+
return ""
|
|
18402
|
+
|
|
18403
|
+
|
|
18404
|
+
def _create_threshold_reset_note_html(locale: str = "en") -> str:
|
|
18405
|
+
"""
|
|
18406
|
+
Create an HTML note for when thresholds are explicitly reset to empty.
|
|
18407
|
+
|
|
18408
|
+
Parameters
|
|
18409
|
+
----------
|
|
18410
|
+
locale
|
|
18411
|
+
The locale string (e.g., 'en', 'fr').
|
|
18412
|
+
|
|
18413
|
+
Returns
|
|
18414
|
+
-------
|
|
18415
|
+
str
|
|
18416
|
+
HTML-formatted note text.
|
|
18417
|
+
"""
|
|
18418
|
+
text = NOTES_TEXT.get("local_threshold_reset", {}).get(
|
|
18419
|
+
locale, NOTES_TEXT.get("local_threshold_reset", {}).get("en", "")
|
|
18420
|
+
)
|
|
18421
|
+
return text
|
|
18422
|
+
|
|
18423
|
+
|
|
18424
|
+
def _create_threshold_reset_note_text() -> str:
|
|
18425
|
+
"""
|
|
18426
|
+
Create a plain text note for when thresholds are explicitly reset to empty.
|
|
18427
|
+
|
|
18428
|
+
Returns
|
|
18429
|
+
-------
|
|
18430
|
+
str
|
|
18431
|
+
Plain text note.
|
|
18432
|
+
"""
|
|
18433
|
+
return "Global thresholds explicitly not used for this step."
|
|
18434
|
+
|
|
18435
|
+
|
|
18132
18436
|
def _step_report_row_based(
|
|
18133
18437
|
assertion_type: str,
|
|
18134
18438
|
i: int,
|
pointblank/yaml.py
CHANGED
|
@@ -233,11 +233,16 @@ class YAMLValidator:
|
|
|
233
233
|
"col_vals_not_null": "col_vals_not_null",
|
|
234
234
|
"col_vals_null": "col_vals_null",
|
|
235
235
|
"col_vals_expr": "col_vals_expr",
|
|
236
|
+
"col_vals_increasing": "col_vals_increasing",
|
|
237
|
+
"col_vals_decreasing": "col_vals_decreasing",
|
|
238
|
+
"col_vals_within_spec": "col_vals_within_spec",
|
|
236
239
|
"rows_distinct": "rows_distinct",
|
|
237
240
|
"rows_complete": "rows_complete",
|
|
238
241
|
"col_count_match": "col_count_match",
|
|
239
242
|
"row_count_match": "row_count_match",
|
|
240
243
|
"col_schema_match": "col_schema_match",
|
|
244
|
+
"tbl_match": "tbl_match",
|
|
245
|
+
"prompt": "prompt",
|
|
241
246
|
"conjointly": "conjointly",
|
|
242
247
|
"specially": "specially",
|
|
243
248
|
}
|