pointblank 0.7.3__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pointblank-0.7.3 → pointblank-0.8.1}/.github/ISSUE_TEMPLATE/bug.md +2 -2
- {pointblank-0.7.3 → pointblank-0.8.1}/.github/ISSUE_TEMPLATE/feature.md +1 -1
- {pointblank-0.7.3 → pointblank-0.8.1}/.github/PULL_REQUEST_TEMPLATE.md +1 -1
- pointblank-0.8.1/.github/SECURITY.md +14 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/.github/workflows/ci-tests.yaml +1 -1
- {pointblank-0.7.3 → pointblank-0.8.1}/CITATION.cff +3 -3
- {pointblank-0.7.3 → pointblank-0.8.1}/CONTRIBUTING.md +3 -3
- {pointblank-0.7.3 → pointblank-0.8.1}/PKG-INFO +8 -1
- {pointblank-0.7.3 → pointblank-0.8.1}/README.md +7 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/_quarto.yml +5 -0
- pointblank-0.8.1/docs/blog/index.qmd +10 -0
- pointblank-0.8.1/docs/blog/intro-pointblank/index.qmd +263 -0
- pointblank-0.8.1/docs/blog/intro-pointblank/pointblank-localized.png +0 -0
- pointblank-0.8.1/docs/blog/intro-pointblank/step_report.png +0 -0
- pointblank-0.8.1/docs/blog/intro-pointblank/validation-table-diagram.png +0 -0
- pointblank-0.8.1/docs/blog/intro-pointblank/validation-test-units.png +0 -0
- pointblank-0.8.1/docs/blog/pointblank_blog_logo.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/styles.css +1 -1
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/actions.qmd +70 -7
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/__init__.py +4 -1
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/_constants.py +9 -2
- pointblank-0.7.3/pointblank/_constants_expect_fail.py → pointblank-0.8.1/pointblank/_constants_translations.py +633 -2
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/_interrogation.py +145 -3
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/_utils.py +1 -1
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/_utils_check_args.py +1 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/data/api-docs.txt +1516 -277
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/draft.py +1 -1
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/thresholds.py +169 -2
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/validate.py +2354 -468
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank.egg-info/PKG-INFO +8 -1
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank.egg-info/SOURCES.txt +10 -1
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_comprehensive_validation_report_html_snap/comprehensive_validation_report.html +152 -77
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_no_interrogation_validation_report_html_snap/no_interrogation_validation_report.html +104 -53
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_no_steps_validation_report_html_snap/no_steps_validation_report.html +16 -16
- pointblank-0.8.1/tests/snapshots/test_validate/test_validation_report_briefs_global_local_html/validation_report_briefs_global_local.html +310 -0
- pointblank-0.8.1/tests/snapshots/test_validate/test_validation_report_briefs_html/validation_report_with_briefs.html +309 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_with_selector_helper_functions_no_match_snap/tbl_memtable_variable_names/selector_helper_functions_no_match.html +20 -11
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_with_selector_helper_functions_no_match_snap/tbl_pd_variable_names/selector_helper_functions_no_match.html +20 -11
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_with_selector_helper_functions_no_match_snap/tbl_pl_variable_names/selector_helper_functions_no_match.html +20 -11
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test_validate.py +1217 -27
- pointblank-0.7.3/.github/SECURITY.md +0 -14
- {pointblank-0.7.3 → pointblank-0.8.1}/.github/CODE_OF_CONDUCT.md +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/.github/ISSUE_TEMPLATE/question.md +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/.github/workflows/ci-docs.yaml +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/.github/workflows/code-checks.yaml +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/.gitignore +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/.pre-commit-config.yaml +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/.vscode/settings.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/LICENSE +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/Makefile +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/data_raw/game_revenue.csv +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/data_raw/nycflights.csv +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/data_raw/nycflights.ddb +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/data_raw/small_table.csv +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/data_raw/x-01-parquet.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/data_raw/x-02-duckdb.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/data_raw/x-03-sqlite.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/.gitignore +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/_extensions/machow/interlinks/.gitignore +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/_extensions/machow/interlinks/_extension.yml +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/_extensions/machow/interlinks/interlinks.lua +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/assets/fav-logo.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/assets/pointblank-tabular-report.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/assets/pointblank_logo.svg +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/assets/validation-table-diagram.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/01-starter/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/02-advanced/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/03-data-extracts/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/04-sundered-data/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/05-step-report-column-check/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/06-step-report-schema-check/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/apply-checks-to-several-columns/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/check-row-column-counts/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/checks-for-missing/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/col-vals-custom-expr/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/column-selector-functions/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/comparisons-across-columns/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/data/game_revenue.parquet +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/expect-no-duplicate-rows/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/expect-no-duplicate-values/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/expect-text-pattern/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/failure-thresholds/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/img/advanced_validation.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/img/data_extracts.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/img/starter_validation.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/img/step_report_column_schema.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/img/step_report_column_values.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/img/sundered_data.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/mutate-table-in-step/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/numeric-comparisons/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/schema-check/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/set-membership/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/demos/using-parquet-data/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/across.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/col-summary-tbl.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/columns.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/extracts.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/index.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/missing-vals-tbl.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/preprocessing.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/preview.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/sundering.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/thresholds.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/docs/user-guide/types.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/images/pointblank-tabular-report.png +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/images/pointblank_logo.svg +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/_constants_docs.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/_typing.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/_utils_html.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/assistant.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/column.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/data/game_revenue-duckdb.zip +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/data/game_revenue.zip +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/data/nycflights-duckdb.zip +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/data/nycflights.zip +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/data/polars-api-docs.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/data/small_table-duckdb.zip +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/data/small_table.zip +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/datascan.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/schema.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank/tf.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank.egg-info/dependency_links.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank.egg-info/requires.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pointblank.egg-info/top_level.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/pyproject.toml +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/setup.cfg +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/__init__.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/manual_tests/schema_step_reports.qmd +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_datascan/test_col_summary_tbl_duckdb_snap/col_summary_html_duckdb.html +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_datascan/test_col_summary_tbl_pandas_snap/col_summary_html_pandas.html +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_datascan/test_col_summary_tbl_polars_snap/col_summary_html_polars.html +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_01/schema_step_report_01-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_01_1/schema_step_report_01-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_01_2/schema_step_report_01-2.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_01_3/schema_step_report_01-3.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_02/schema_step_report_02-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_02_1/schema_step_report_02-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_02_2/schema_step_report_02-2.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_02_3/schema_step_report_02-3.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_03/schema_step_report_03-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_03_1/schema_step_report_03-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_03_2/schema_step_report_03-2.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_03_3/schema_step_report_03-3.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_04/schema_step_report_04-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_05/schema_step_report_05-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_06/schema_step_report_06-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_07/schema_step_report_07-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_08/schema_step_report_08-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_09/schema_step_report_09-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_10/schema_step_report_10-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_11/schema_step_report_11-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_12/schema_step_report_12-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_13/schema_step_report_13-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_14/schema_step_report_14-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_14_1/schema_step_report_14-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_15/schema_step_report_15-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_15_1/schema_step_report_15-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_16/schema_step_report_16-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_16_1/schema_step_report_16-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_17/schema_step_report_17-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_17_1/schema_step_report_17-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_18/schema_step_report_18-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_18_1/schema_step_report_18-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_19/schema_step_report_19-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_19_1/schema_step_report_19-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_20/schema_step_report_20-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_20_1/schema_step_report_20-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_21/schema_step_report_21-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_21_1/schema_step_report_21-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_21_2/schema_step_report_21-2.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_21_3/schema_step_report_21-3.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_22/schema_step_report_22-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_22_1/schema_step_report_22-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_23/schema_step_report_23-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_23_1/schema_step_report_23-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_24/schema_step_report_24-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_24_1/schema_step_report_24-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_24_2/schema_step_report_24-2.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_24_3/schema_step_report_24-3.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_25/schema_step_report_25-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_25_1/schema_step_report_25-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_25_2/schema_step_report_25-2.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_25_3/schema_step_report_25-3.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_25_4/schema_step_report_25-4.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_step_report_25_5/schema_step_report_25-5.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_01-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_02-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_03-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_04-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_05-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_06-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_07-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_08-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_09-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_10-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_11-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_12-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_13-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_14-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_14-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_15-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_15-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_16-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_16-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_17-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_17-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_18-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_18-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_19-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_19-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_20-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_20-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_21-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_21-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_21-2.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_21-3.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_22-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_22-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_23-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_23-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_24-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_24-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_24-2.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_24-3.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_25-0.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_25-1.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_25-2.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_25-3.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_25-4.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_get_schema_validation_info/schema_info_25-5.txt +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_interrogate_snap/tbl_duckdb/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_interrogate_snap/tbl_parquet/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_interrogate_snap/tbl_pd/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_interrogate_snap/tbl_pl/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_interrogate_snap/tbl_sqlite/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_no_interrogate_snap/tbl_duckdb/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_no_interrogate_snap/tbl_parquet/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_no_interrogate_snap/tbl_pd/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_no_interrogate_snap/tbl_pl/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_no_interrogate_snap/tbl_sqlite/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_use_fields_snap/tbl_duckdb/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_use_fields_snap/tbl_parquet/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_use_fields_snap/tbl_pd/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_use_fields_snap/tbl_pl/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/snapshots/test_validate/test_validation_report_use_fields_snap/tbl_sqlite/validation_report.json +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_dates_times_text.ddb +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_dates_times_text.parquet +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_dates_times_text.sqlite +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_true_dates_times.ddb +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_xyz.ddb +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_xyz.parquet +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_xyz.sqlite +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_xyz_missing.ddb +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_xyz_missing.parquet +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/tbl_files/tbl_xyz_missing.sqlite +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test__interrogation.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test__utils.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test__utils_check_args.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test_assistant.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test_column.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test_datascan.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test_draft.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test_schema.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test_tf.py +0 -0
- {pointblank-0.7.3 → pointblank-0.8.1}/tests/test_thresholds.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: Bug
|
|
3
|
-
about: Something is wrong with '
|
|
3
|
+
about: Something is wrong with 'Pointblank'.
|
|
4
4
|
title: ""
|
|
5
5
|
labels: "Type: ☹︎ Bug"
|
|
6
6
|
---
|
|
@@ -28,7 +28,7 @@ What should have happened? Please be as specific as possible.
|
|
|
28
28
|
## Development environment
|
|
29
29
|
|
|
30
30
|
- Operating System: [e.g., Linux, macOS, Windows]
|
|
31
|
-
-
|
|
31
|
+
- Pointblank Version: [e.g., 0.0.1]
|
|
32
32
|
|
|
33
33
|
## Additional context
|
|
34
34
|
|
|
@@ -7,7 +7,7 @@ labels: "Type: ★ Enhancement"
|
|
|
7
7
|
|
|
8
8
|
## Prework
|
|
9
9
|
|
|
10
|
-
- [ ] Read and abide by the
|
|
10
|
+
- [ ] Read and abide by the Pointblank [code of conduct](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) and [contributing guidelines](https://github.com/posit-dev/pointblank/blob/main/.github/CONTRIBUTING.md).
|
|
11
11
|
- [ ] Search for duplicates among the [existing issues](https://github.com/posit-dev/pointblank/issues) (both open and closed).
|
|
12
12
|
|
|
13
13
|
## Proposal
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Summary
|
|
2
2
|
|
|
3
|
-
Thank you for contributing to
|
|
3
|
+
Thank you for contributing to Pointblank! To make this process easier for everyone, please explain the context and purpose of your contribution. Also, list the changes made to the existing code or documentation.
|
|
4
4
|
|
|
5
5
|
# Related GitHub Issues and PRs
|
|
6
6
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported Versions
|
|
4
|
+
|
|
5
|
+
The following provides information on the versions of Pointblank that are
|
|
6
|
+
currently being supported with security updates.
|
|
7
|
+
|
|
8
|
+
| Version | Supported |
|
|
9
|
+
| -------- | ------------------ |
|
|
10
|
+
| >= 0.7.3 | :white_check_mark: |
|
|
11
|
+
|
|
12
|
+
## Reporting a Vulnerability
|
|
13
|
+
|
|
14
|
+
Please report any security vulnerabilities to rich@posit.co.
|
|
@@ -29,7 +29,7 @@ jobs:
|
|
|
29
29
|
pip install -e '.[dev]'
|
|
30
30
|
- name: Install test dependencies
|
|
31
31
|
run: |
|
|
32
|
-
pip install pytest pytest-cov pytest-snapshot pandas polars ibis-framework[duckdb,mysql,postgres,sqlite]>=9.5.0 chatlas shiny
|
|
32
|
+
pip install pytest pytest-cov pytest-snapshot pandas polars ibis-framework[duckdb,mysql,postgres,sqlite]>=9.5.0 chatlas requests shiny
|
|
33
33
|
- name: pytest unit tests
|
|
34
34
|
run: |
|
|
35
35
|
make test
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
cff-version: 1.2.0
|
|
2
|
-
message: 'If you wish to cite the "
|
|
2
|
+
message: 'If you wish to cite the "Pointblank" package use:'
|
|
3
3
|
type: software
|
|
4
4
|
license: MIT
|
|
5
|
-
title: "
|
|
6
|
-
version: 0.
|
|
5
|
+
title: "Pointblank: Find out if your data is what you think it is"
|
|
6
|
+
version: 0.7.3
|
|
7
7
|
abstract: Validate data in Polars and Pandas DataFrames and database tables.
|
|
8
8
|
Validation pipelines can be made using easily-readable, consecutive validation
|
|
9
9
|
steps. Upon execution of the validation plan, several reporting options are available.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Contributing Guidelines
|
|
2
2
|
|
|
3
|
-
There are many ways to contribute to the ongoing development of the
|
|
3
|
+
There are many ways to contribute to the ongoing development of the Pointblank package. Some contributions can be rather easy to do (e.g., fixing typos, improving documentation, filing issues for feature requests or problems, etc.) whereas other contributions can require more time and patience (like answering questions and submitting pull requests with code changes). Just know that help provided in any capacity is very much appreciated.
|
|
4
4
|
|
|
5
5
|
## Filing Issues
|
|
6
6
|
|
|
@@ -10,7 +10,7 @@ If you believe you found a bug, minimal reproducible example (MRE) for your post
|
|
|
10
10
|
|
|
11
11
|
One way to help is by simply answering questions. It's amazing how a little conversation could lead to better insights on a problem. Don't quite know the answer? That's okay too. We're all in this together.
|
|
12
12
|
|
|
13
|
-
Where might you answer user questions? Some of the forums for Q&A on
|
|
13
|
+
Where might you answer user questions? Some of the forums for Q&A on Pointblank include the _Issues_ and _Discussion_ pages in the repo. Good etiquette is key during these interactions: be a good person to all who ask questions.
|
|
14
14
|
|
|
15
15
|
### Making Pull Requests
|
|
16
16
|
|
|
@@ -19,7 +19,7 @@ Should you consider making a pull request (PR), please file an issue first and e
|
|
|
19
19
|
Once there is consensus that a PR based on the issue would be helpful, adhering to the following process will make things proceed more quickly:
|
|
20
20
|
|
|
21
21
|
- Create a separate Git branch for each PR
|
|
22
|
-
- The
|
|
22
|
+
- The Pointblank package follows the [Style Guide for Python Code](https://peps.python.org/pep-0008/) so please adopt those guidelines in your submitted code as best as possible
|
|
23
23
|
- Comment your code, particularly in those hard-to-understand areas
|
|
24
24
|
- Add test cases that cover the changes made in the PR; having tests for any new codepaths will help guard against regressions
|
|
25
25
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pointblank
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: Find out if your data is what you think it is.
|
|
5
5
|
Author-email: Richard Iannone <riannone@me.com>
|
|
6
6
|
License: MIT License
|
|
@@ -177,6 +177,13 @@ You can install Pointblank using pip:
|
|
|
177
177
|
pip install pointblank
|
|
178
178
|
```
|
|
179
179
|
|
|
180
|
+
You can also install [Pointblank from Conda-Forge](https://anaconda.org/conda-forge/pointblank) by
|
|
181
|
+
using:
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
conda install conda-forge::pointblank
|
|
185
|
+
```
|
|
186
|
+
|
|
180
187
|
If you don't have Polars or Pandas installed, you'll need to install one of them to use Pointblank.
|
|
181
188
|
|
|
182
189
|
```bash
|
|
@@ -96,6 +96,13 @@ You can install Pointblank using pip:
|
|
|
96
96
|
pip install pointblank
|
|
97
97
|
```
|
|
98
98
|
|
|
99
|
+
You can also install [Pointblank from Conda-Forge](https://anaconda.org/conda-forge/pointblank) by
|
|
100
|
+
using:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
conda install conda-forge::pointblank
|
|
104
|
+
```
|
|
105
|
+
|
|
99
106
|
If you don't have Polars or Pandas installed, you'll need to install one of them to use Pointblank.
|
|
100
107
|
|
|
101
108
|
```bash
|
|
@@ -42,6 +42,8 @@ website:
|
|
|
42
42
|
file: demos/index.qmd
|
|
43
43
|
- href: reference/index.qmd
|
|
44
44
|
text: API Reference
|
|
45
|
+
- href: blog/index.qmd
|
|
46
|
+
text: Pointblog
|
|
45
47
|
right:
|
|
46
48
|
- icon: discord
|
|
47
49
|
href: https://discord.com/invite/YH7CybCNCQ
|
|
@@ -96,6 +98,8 @@ quartodoc:
|
|
|
96
98
|
members: []
|
|
97
99
|
- name: Thresholds
|
|
98
100
|
- name: Actions
|
|
101
|
+
members: []
|
|
102
|
+
- name: FinalActions
|
|
99
103
|
- name: Schema
|
|
100
104
|
members: []
|
|
101
105
|
- name: DraftValidation
|
|
@@ -193,4 +197,5 @@ quartodoc:
|
|
|
193
197
|
- name: get_column_count
|
|
194
198
|
- name: get_row_count
|
|
195
199
|
- name: get_action_metadata
|
|
200
|
+
- name: get_validation_summary
|
|
196
201
|
- name: config
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
---
|
|
2
|
+
jupyter: python3
|
|
3
|
+
html-table-processing: none
|
|
4
|
+
title: "Introducing Pointblank"
|
|
5
|
+
author: Rich Iannone
|
|
6
|
+
date: 2024-04-04
|
|
7
|
+
freeze: true
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
If you have tabular data (and who doesn't?) this is the package for you! I've long been interested
|
|
11
|
+
in data quality and so I've spent a lot of time building tooling that makes it possible to perform
|
|
12
|
+
data quality checks. And there's so many reasons to care about data quality. If I were to put down
|
|
13
|
+
just one good reason for why data quality is worth your time it is because having good data quality
|
|
14
|
+
strongly determines the quality of decisions.
|
|
15
|
+
|
|
16
|
+
Having the ability to distinguish bad data from good data is the first step in solving DQ issues,
|
|
17
|
+
and the sustained practice of doing data validation will guard against intrusions of poor-quality
|
|
18
|
+
data. Pointblank has been designed to really help here. Though it's a fairly new package it is
|
|
19
|
+
currently quite capable. And it's available in PyPI, so you can install it by using:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install pointblank
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
::: {.callout-note}
|
|
26
|
+
To run the examples in this post, you'll need to have a DataFrame library installed. Pointblank
|
|
27
|
+
works seamlessly with both Polars and Pandas but you'll need to install at least one of them on your
|
|
28
|
+
own. We also have a DuckDB example that's running via Ibis (so, you'll have to install Ibis with
|
|
29
|
+
the DuckDB backend for that to work).
|
|
30
|
+
:::
|
|
31
|
+
|
|
32
|
+
## How Pointblank Transforms Your Data Validation Workflow
|
|
33
|
+
|
|
34
|
+
What sets Pointblank apart is its intuitive, expressive approach to data validation. Rather than
|
|
35
|
+
writing dozens of ad-hoc checks scattered throughout your codebase, Pointblank lets you define a
|
|
36
|
+
comprehensive validation plan with just a few lines of code. The fluent API makes your validation
|
|
37
|
+
intentions crystal clear, whether you're ensuring numeric values fall within expected ranges, text
|
|
38
|
+
fields match specific patterns, or relationships between columns remain consistent.
|
|
39
|
+
|
|
40
|
+
But say you find problems. What are you gonna do about it? Well, Pointblank wants to help at not
|
|
41
|
+
just finding problems but helping you understand them. When validation failures occur, the detailed
|
|
42
|
+
reporting capabilities (in the form of beautiful, sharable tables) show you exactly where issues
|
|
43
|
+
are. Right down to the specific rows and columns. This transforms data validation from a binary
|
|
44
|
+
pass/fail exercise into a super-insightful diagnostic tool.
|
|
45
|
+
|
|
46
|
+

|
|
47
|
+
|
|
48
|
+
Here's the the best part: Pointblank is designed to work with your existing data stack. Whether
|
|
49
|
+
you're using Polars, Pandas, DuckDB, or other database systems, Pointblank tries hard to integrate
|
|
50
|
+
without forcing you to change your workflow. We also have international spoken language support for
|
|
51
|
+
reporting, meaning that validation reports can be localized to your team's preferred language. This
|
|
52
|
+
making data quality accessible to everyone in your organization (like a team sport!).
|
|
53
|
+
|
|
54
|
+

|
|
55
|
+
|
|
56
|
+
Alright! Let's look at a few demonstrations of Pointblank's capabilities for data validation.
|
|
57
|
+
|
|
58
|
+
## The Data Validation Workflow
|
|
59
|
+
|
|
60
|
+
Let's get right to performing a basic check of a Polars DataFrame. We'll make use of the included
|
|
61
|
+
`small_table` dataset.
|
|
62
|
+
|
|
63
|
+
```{python}
|
|
64
|
+
import pointblank as pb
|
|
65
|
+
|
|
66
|
+
small_table = pb.load_dataset(dataset="small_table", tbl_type="polars")
|
|
67
|
+
|
|
68
|
+
validation_1 = (
|
|
69
|
+
pb.Validate(
|
|
70
|
+
data=small_table,
|
|
71
|
+
tbl_name="small_table",
|
|
72
|
+
label="Example Validation"
|
|
73
|
+
)
|
|
74
|
+
.col_vals_lt(columns="a", value=10)
|
|
75
|
+
.col_vals_between(columns="d", left=0, right=5000)
|
|
76
|
+
.col_vals_in_set(columns="f", set=["low", "mid", "high"])
|
|
77
|
+
.col_vals_regex(columns="b", pattern=r"^[0-9]-[a-z]{3}-[0-9]{3}$")
|
|
78
|
+
.interrogate()
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
validation_1
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
There's a lot to take in here so let's break down the code first! Note these three key pieces:
|
|
85
|
+
|
|
86
|
+
- the `Validate(data=...)` argument takes a DataFrame (or database table) that you want to validate
|
|
87
|
+
- the methods starting with `col_*` specify validation steps that run on specific columns
|
|
88
|
+
- the `interrogate()` method executes the validation plan on the table (it's the finishing step)
|
|
89
|
+
|
|
90
|
+
This common pattern is used in a validation workflow, where
|
|
91
|
+
[`Validate`](https://posit-dev.github.io/pointblank/reference/Validate.html) and
|
|
92
|
+
[`interrogate()`](https://posit-dev.github.io/pointblank/reference/Validate.interrogate.html)
|
|
93
|
+
bookend a validation plan generated through calling validation methods.
|
|
94
|
+
|
|
95
|
+
Now, onto the result: it's a table! Naturally, we're using the awesome Great Tables package here in
|
|
96
|
+
Pointblank to really give you the goods on how the validation went down. Each row in this reporting
|
|
97
|
+
table represents a single validation step (one for each invocation of a `col_vals_*()` validation
|
|
98
|
+
method). Generally speaking, the left side of the validation report tables outlines the key
|
|
99
|
+
validation rules, and the right side provides the results of each validation step.
|
|
100
|
+
|
|
101
|
+
We tried to keep it simple in principle, but a lot of useful information can be packed into this
|
|
102
|
+
validation table. Here's a diagram that describes a few of the important parts of the validation
|
|
103
|
+
report table:
|
|
104
|
+
|
|
105
|
+
{width=100%}
|
|
106
|
+
|
|
107
|
+
All of those numbers under the `UNITS`, `PASS`, and `FAIL` columns have to do with test units, a
|
|
108
|
+
measure of central importance in Pointblank. Each validation step will execute a type of validation
|
|
109
|
+
test on the target table. For example, a
|
|
110
|
+
[`col_vals_lt()`](https://posit-dev.github.io/pointblank/reference/Validate.col_vals_lt.html)
|
|
111
|
+
validation step can test that each value in a column is less than a specified number. The key
|
|
112
|
+
finding that’s reported as a result of this test is the number of test units that pass or fail. This
|
|
113
|
+
little diagram explains what those numbers mean:
|
|
114
|
+
|
|
115
|
+
{width=50%, text-align=center}
|
|
116
|
+
|
|
117
|
+
Failing test units can be tied to threshold levels, which can provide a better indication of whether
|
|
118
|
+
failures should raise some basic awareness or spur you into action. Here's a validation workflow
|
|
119
|
+
that sets three failure threshold levels that signal the severity of data quality problems:
|
|
120
|
+
|
|
121
|
+
```{python}
|
|
122
|
+
import pointblank as pb
|
|
123
|
+
import polars as pl
|
|
124
|
+
|
|
125
|
+
validation_2 = (
|
|
126
|
+
pb.Validate(
|
|
127
|
+
data=pb.load_dataset(dataset="game_revenue", tbl_type="polars"),
|
|
128
|
+
tbl_name="game_revenue",
|
|
129
|
+
label="Data validation with threshold levels set.",
|
|
130
|
+
thresholds=pb.Thresholds(warning=1, error=20, critical=0.10),
|
|
131
|
+
)
|
|
132
|
+
.col_vals_regex(columns="player_id", pattern=r"^[A-Z]{12}[0-9]{3}$") # STEP 1
|
|
133
|
+
.col_vals_gt(columns="session_duration", value=5) # STEP 2
|
|
134
|
+
.col_vals_ge(columns="item_revenue", value=0.02) # STEP 3
|
|
135
|
+
.col_vals_in_set(columns="item_type", set=["iap", "ad"]) # STEP 4
|
|
136
|
+
.col_vals_in_set( # STEP 5
|
|
137
|
+
columns="acquisition",
|
|
138
|
+
set=["google", "facebook", "organic", "crosspromo", "other_campaign"]
|
|
139
|
+
)
|
|
140
|
+
.col_vals_not_in_set(columns="country", set=["Mongolia", "Germany"]) # STEP 6
|
|
141
|
+
.col_vals_between( # STEP 7
|
|
142
|
+
columns="session_duration",
|
|
143
|
+
left=10, right=50,
|
|
144
|
+
pre = lambda df: df.select(pl.median("session_duration"))
|
|
145
|
+
)
|
|
146
|
+
.rows_distinct(columns_subset=["player_id", "session_id", "time"]) # STEP 8
|
|
147
|
+
.row_count_match(count=2000) # STEP 9
|
|
148
|
+
.col_exists(columns="start_day") # STEP 10
|
|
149
|
+
.interrogate()
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
validation_2
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
This data validation makes use of the many
|
|
156
|
+
[validation methods available in the library](https://posit-dev.github.io/pointblank/reference/#validation-steps).
|
|
157
|
+
Because thresholds have been set at the `Validate(thresholds=)` parameter, we can now see where
|
|
158
|
+
certain validation steps have greater amounts of failures. Any validation steps with green
|
|
159
|
+
indicators passed with flying colors, whereas: (1) gray indicates the 'warning' condition was met
|
|
160
|
+
(at least one test unit failing), (2) yellow is for the 'error' condition (20 or more test units
|
|
161
|
+
failing), and (3) red means 'critical' and that's tripped when 10% of all test units are failing
|
|
162
|
+
ones.
|
|
163
|
+
|
|
164
|
+
Reporting tables are essential to the package and they help communicate what went wrong (or well) in
|
|
165
|
+
a validation workflow. Now let's look at some additional reporting that Pointblank can give you to
|
|
166
|
+
better understand *where* things might've gone wrong.
|
|
167
|
+
|
|
168
|
+
## Reporting for Individual Validation Steps
|
|
169
|
+
|
|
170
|
+
The second validation step of the previous data validation showed 18 failing test units. That
|
|
171
|
+
translates to 18 spots in a 2,000 row DataFrame where a data quality assertion failed. We often
|
|
172
|
+
would like to know exactly what that failing data is; it's usually the next step toward addressing
|
|
173
|
+
data quality issues.
|
|
174
|
+
|
|
175
|
+
Pointblank offers a method that gives you a tabular report on a specific step:
|
|
176
|
+
[`get_step_report()`](https://posit-dev.github.io/pointblank/reference/Validate.get_step_report.html).
|
|
177
|
+
The previous tables you've seen (the validation report table) dealt with providing a summary of all
|
|
178
|
+
validation steps. In contrast, a focused report on a single step can help to get to the heart of a data
|
|
179
|
+
quality issue. Here's how that looks for Step 2:
|
|
180
|
+
|
|
181
|
+
```{python}
|
|
182
|
+
validation_2.get_step_report(i=2)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
This report provides the 18 rows where the failure occurred. If you scroll the table to the right
|
|
186
|
+
you'll see the column that underwent testing (`session_duration`) is highlighted in red. All of
|
|
187
|
+
these values are `5.0` or less, which is in violation of the assertion (in the header) that
|
|
188
|
+
`session_duration > 5`.
|
|
189
|
+
|
|
190
|
+
These types of bespoke reports are useful for finding a needle in a haystack. Another good use for
|
|
191
|
+
a step report is when validating a table schema. Using the
|
|
192
|
+
[`col_schema_match()`](https://posit-dev.github.io/pointblank/reference/Validate.col_schema_match.html)
|
|
193
|
+
validation method with a table schema prepared with the
|
|
194
|
+
[`Schema`](https://posit-dev.github.io/pointblank/reference/Schema.html) class allows us to verify
|
|
195
|
+
our understanding of the table structure. Here is a validation that performs a schema validation
|
|
196
|
+
with the `small_table` dataset prepared as a DuckDB table:
|
|
197
|
+
|
|
198
|
+
```{python}
|
|
199
|
+
import pointblank as pb
|
|
200
|
+
|
|
201
|
+
# Create a schema for the target table (`small_table` as a DuckDB table)
|
|
202
|
+
schema = pb.Schema(
|
|
203
|
+
columns=[
|
|
204
|
+
("date_time", "timestamp(6)"),
|
|
205
|
+
("dates", "date"),
|
|
206
|
+
("a", "int64"),
|
|
207
|
+
("b",),
|
|
208
|
+
("c",),
|
|
209
|
+
("d", "float64"),
|
|
210
|
+
("e", ["bool", "boolean"]),
|
|
211
|
+
("f", "str"),
|
|
212
|
+
]
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Use the `col_schema_match()` validation method to perform a schema check
|
|
216
|
+
validation_3 = (
|
|
217
|
+
pb.Validate(
|
|
218
|
+
data=pb.load_dataset(dataset="small_table", tbl_type="duckdb"),
|
|
219
|
+
tbl_name="small_table",
|
|
220
|
+
label="Schema check"
|
|
221
|
+
)
|
|
222
|
+
.col_schema_match(schema=schema)
|
|
223
|
+
.interrogate()
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
validation_3
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
This step fails, but the validation report table doesn't tell us how (or where). Using
|
|
230
|
+
[`get_step_report()](https://posit-dev.github.io/pointblank/reference/Validate.get_step_report.html)
|
|
231
|
+
will show us what the underlying issues are:
|
|
232
|
+
|
|
233
|
+
```{python}
|
|
234
|
+
validation_3.get_step_report(i=1)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
The step report here shows the target table's schema on the left side and the expectation of the
|
|
238
|
+
schema on the right side. There appears to be two problems with our supplied schema:
|
|
239
|
+
|
|
240
|
+
1. the second column is actually `date` instead of `dates`
|
|
241
|
+
2. the dtype of the `f` column is `"string"` and not `"str"`
|
|
242
|
+
|
|
243
|
+
The convenience of this step report means we only have to look at one display of information, rather
|
|
244
|
+
than having to collect up the individual pieces and make careful comparisons.
|
|
245
|
+
|
|
246
|
+
## Much More in Store
|
|
247
|
+
|
|
248
|
+
Pointblank tries really hard to make it easy for you to test your data. All sorts of input tables
|
|
249
|
+
are supported since we integrate with the brilliant
|
|
250
|
+
[Narwhals](https://narwhals-dev.github.io/narwhals/) and
|
|
251
|
+
[Ibis](https://ibis-project.org) libraries. And even through the project has only started four
|
|
252
|
+
months ago, we already have an extensive catalog of well-tested validation methods.
|
|
253
|
+
|
|
254
|
+
We care a great deal about documentation so much recent effort has been placed on getting the
|
|
255
|
+
[**User Guide**](https://posit-dev.github.io/pointblank/user-guide/) written. We hope it provides
|
|
256
|
+
for gentle introduction to the major features of the library. If you want some quick examples to get
|
|
257
|
+
your imagination going, check out our
|
|
258
|
+
[gallery of examples](https://posit-dev.github.io/pointblank/demos/).
|
|
259
|
+
|
|
260
|
+
We really care about what **you** want in a validation package, so talk to us :) We just started a
|
|
261
|
+
[Discord](https://discord.com/invite/YH7CybCNCQ) so feel free to hop on and ask us anything.
|
|
262
|
+
Alternatively, we always like to get [issues](https://github.com/posit-dev/pointblank/issues) so
|
|
263
|
+
don't be shy in letting us know how we could improve!
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -70,7 +70,7 @@ considered optional. Put another way, you can set various thresholds and various
|
|
|
70
70
|
and the interrogation phase will determine whether all the requirements are met for executing
|
|
71
71
|
an action.
|
|
72
72
|
|
|
73
|
-
##
|
|
73
|
+
## Expressing Actions with a String
|
|
74
74
|
|
|
75
75
|
There are a few options in how to define the actions:
|
|
76
76
|
|
|
@@ -99,10 +99,9 @@ Here's an example where we prepare a console message with a number of value plac
|
|
|
99
99
|
(`action_str`) and use it globally at `Actions(critical=)`:
|
|
100
100
|
|
|
101
101
|
```{python}
|
|
102
|
-
|
|
103
102
|
action_str = "[{LEVEL}: {TYPE}]: Step {step} has failed validation. ({time})"
|
|
104
103
|
|
|
105
|
-
|
|
104
|
+
validation_2 = (
|
|
106
105
|
pb.Validate(
|
|
107
106
|
data=pb.load_dataset(dataset="game_revenue", tbl_type="duckdb"),
|
|
108
107
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
@@ -113,6 +112,8 @@ validation = (
|
|
|
113
112
|
.col_vals_ge(columns="session_duration", value=15)
|
|
114
113
|
.interrogate()
|
|
115
114
|
)
|
|
115
|
+
|
|
116
|
+
validation_2
|
|
116
117
|
```
|
|
117
118
|
|
|
118
119
|
What we get here are two messages in the console, corresponding to critical failures in steps 2 and
|
|
@@ -121,16 +122,19 @@ resulting text is capitalized (e.g., `"CRITICAL"`, `"COL_VALS_GT"`, etc.) and th
|
|
|
121
122
|
capitalized the placeholder text itself. Have a look at the documentation article of
|
|
122
123
|
[`Actions`](https://posit-dev.github.io/pointblank/reference/Actions.html) for more details on this.
|
|
123
124
|
|
|
125
|
+
## Making Actions with Callables
|
|
126
|
+
|
|
124
127
|
Aside from strings, any callable can be used as an action value. Here's an example where we use a
|
|
125
128
|
custom function as part of an action:
|
|
126
129
|
|
|
127
130
|
```{python}
|
|
128
|
-
def
|
|
131
|
+
def duration_issue():
|
|
129
132
|
from datetime import datetime
|
|
130
133
|
|
|
131
134
|
print(f"Data quality issue found ({datetime.now()}).")
|
|
132
135
|
|
|
133
|
-
|
|
136
|
+
|
|
137
|
+
validation_3 = (
|
|
134
138
|
pb.Validate(
|
|
135
139
|
data=pb.load_dataset(dataset="game_revenue", tbl_type="duckdb"),
|
|
136
140
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
@@ -140,12 +144,12 @@ validation = (
|
|
|
140
144
|
.col_vals_gt(
|
|
141
145
|
columns="session_duration",
|
|
142
146
|
value=15,
|
|
143
|
-
actions=pb.Actions(warning=
|
|
147
|
+
actions=pb.Actions(warning=duration_issue),
|
|
144
148
|
)
|
|
145
149
|
.interrogate()
|
|
146
150
|
)
|
|
147
151
|
|
|
148
|
-
|
|
152
|
+
validation_3
|
|
149
153
|
```
|
|
150
154
|
|
|
151
155
|
In this case, the 'warning' action is set to call the user's `dq_issue()` function. This action is
|
|
@@ -159,3 +163,62 @@ applied to step 3. You are free to mix and match both threshold and action setti
|
|
|
159
163
|
level (i.e., set in the [`Validate`](https://posit-dev.github.io/pointblank/reference/Validate.html)
|
|
160
164
|
call) or at the step level. The key thing to be aware of is that step-level settings of thresholds
|
|
161
165
|
and actions take precedence.
|
|
166
|
+
|
|
167
|
+
## Using `get_action_metadata()` to Access Metadata When Building an Action Callable
|
|
168
|
+
|
|
169
|
+
To access information about the validation step where an action was triggered, we can call
|
|
170
|
+
[`get_action_metadata()`](https://posit-dev.github.io/pointblank/reference/get_action_metadata.html)
|
|
171
|
+
in the body of a function to be used within `Actions`. The dictionary that's returned by that
|
|
172
|
+
function allows us to make more generalized actions that could react accordingly to different
|
|
173
|
+
failure states.
|
|
174
|
+
|
|
175
|
+
In the following example, we'll make a function called `print_problem()` that prints information to
|
|
176
|
+
the console about the failure state for a validation step. In this case, the action will be applied
|
|
177
|
+
to any threshold level being exceeded (by using `Actions(default=print_problem)`). And only the most
|
|
178
|
+
severe level exceeded per step will execute `print_problem()` since `Actions(highest_only=True)` by
|
|
179
|
+
default.
|
|
180
|
+
|
|
181
|
+
```{python}
|
|
182
|
+
def print_problem():
|
|
183
|
+
m = pb.get_action_metadata()
|
|
184
|
+
print(f"{m['level']} ({m['level_num']}) for Step {m['step']}: {m['failure_text']}")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
validation = (
|
|
188
|
+
pb.Validate(
|
|
189
|
+
data=pb.load_dataset(dataset="game_revenue", tbl_type="duckdb"),
|
|
190
|
+
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
191
|
+
actions=pb.Actions(default=print_problem),
|
|
192
|
+
brief=True,
|
|
193
|
+
)
|
|
194
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
|
|
195
|
+
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
196
|
+
.col_vals_gt(
|
|
197
|
+
columns="session_duration",
|
|
198
|
+
value=15,
|
|
199
|
+
)
|
|
200
|
+
.interrogate()
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
validation
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
We end up seeing two messages printed for failures in Steps 2 and 3. And though those steps had more
|
|
207
|
+
than one threshold exceeded, only the most severe level in each yielded a console message.
|
|
208
|
+
|
|
209
|
+
Alse note that we set the action in `Validate(actions=)` so that the action would apply to all
|
|
210
|
+
validation steps where thresholds are exceeded. This obviated the need to set `actions=` at every
|
|
211
|
+
validation step (though you can do this as a local override, even setting `actions=None` to disable
|
|
212
|
+
globally set actions).
|
|
213
|
+
|
|
214
|
+
The metadata dictionary contains the following fields for a given validation step:
|
|
215
|
+
|
|
216
|
+
- `step`: The step number.
|
|
217
|
+
- `column`: The column name.
|
|
218
|
+
- `value`: The value being compared (only available in certain validation steps).
|
|
219
|
+
- `type`: The assertion type (e.g., `"col_vals_gt"`, etc.).
|
|
220
|
+
- `time`: The time the validation step was executed (in ISO format).
|
|
221
|
+
- `level`: The severity level (`"warning"`, `"error"`, or `"critical"`).
|
|
222
|
+
- `level_num`: The severity level as a numeric value (`30`, `40`, or `50`).
|
|
223
|
+
- `autobrief`: A localized and brief statement of the expectation for the step.
|
|
224
|
+
- `failure_text`: Localized text that explains how the validation step failed.
|
|
@@ -24,13 +24,14 @@ from pointblank.datascan import DataScan, col_summary_tbl
|
|
|
24
24
|
from pointblank.draft import DraftValidation
|
|
25
25
|
from pointblank.schema import Schema
|
|
26
26
|
from pointblank.tf import TF
|
|
27
|
-
from pointblank.thresholds import Actions, Thresholds
|
|
27
|
+
from pointblank.thresholds import Actions, FinalActions, Thresholds
|
|
28
28
|
from pointblank.validate import (
|
|
29
29
|
Validate,
|
|
30
30
|
config,
|
|
31
31
|
get_action_metadata,
|
|
32
32
|
get_column_count,
|
|
33
33
|
get_row_count,
|
|
34
|
+
get_validation_summary,
|
|
34
35
|
load_dataset,
|
|
35
36
|
missing_vals_tbl,
|
|
36
37
|
preview,
|
|
@@ -42,6 +43,7 @@ __all__ = [
|
|
|
42
43
|
"Validate",
|
|
43
44
|
"Thresholds",
|
|
44
45
|
"Actions",
|
|
46
|
+
"FinalActions",
|
|
45
47
|
"Schema",
|
|
46
48
|
"DataScan",
|
|
47
49
|
"DraftValidation",
|
|
@@ -59,6 +61,7 @@ __all__ = [
|
|
|
59
61
|
"preview",
|
|
60
62
|
"missing_vals_tbl",
|
|
61
63
|
"get_action_metadata",
|
|
64
|
+
"get_validation_summary",
|
|
62
65
|
"get_column_count",
|
|
63
66
|
"get_row_count",
|
|
64
67
|
]
|
|
@@ -13,8 +13,8 @@ COMPATIBLE_DTYPES = {
|
|
|
13
13
|
"ne": ["numeric", "datetime"],
|
|
14
14
|
"ge": ["numeric", "datetime"],
|
|
15
15
|
"le": ["numeric", "datetime"],
|
|
16
|
-
"between": ["numeric"],
|
|
17
|
-
"outside": ["numeric"],
|
|
16
|
+
"between": ["numeric", "datetime"],
|
|
17
|
+
"outside": ["numeric", "datetime"],
|
|
18
18
|
"in_set": ["numeric", "str"],
|
|
19
19
|
"not_in_set": ["numeric", "str"],
|
|
20
20
|
"regex": ["str"],
|
|
@@ -159,14 +159,21 @@ REPORTING_LANGUAGES = [
|
|
|
159
159
|
"it",
|
|
160
160
|
"es",
|
|
161
161
|
"pt",
|
|
162
|
+
"ro",
|
|
162
163
|
"tr",
|
|
163
164
|
"zh-Hans",
|
|
164
165
|
"zh-Hant",
|
|
166
|
+
"ja",
|
|
167
|
+
"ko",
|
|
165
168
|
"ru",
|
|
169
|
+
"cs",
|
|
166
170
|
"pl",
|
|
167
171
|
"da",
|
|
168
172
|
"sv",
|
|
173
|
+
"nb",
|
|
169
174
|
"nl",
|
|
175
|
+
"fi",
|
|
176
|
+
"is",
|
|
170
177
|
]
|
|
171
178
|
|
|
172
179
|
SEVERITY_LEVEL_COLORS = {
|