pointblank 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/_typing.py CHANGED
@@ -26,12 +26,40 @@ else:
26
26
  SegmentSpec = Union[str, SegmentTuple, List[SegmentItem]]
27
27
 
28
28
  # Add docstrings for better IDE support
29
- AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
30
- RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
31
- Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
32
- SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
33
- SegmentTuple.__doc__ = "(column, value(s)) format for segments"
34
- SegmentItem.__doc__ = "Individual segment item (string or tuple)"
35
- SegmentSpec.__doc__ = (
36
- "Full segment specification options (i.e., all options for segment specification)"
37
- )
29
+ # In Python 3.14+, __doc__ attribute on typing.Union objects became read-only
30
+ try:
31
+ AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
32
+ except AttributeError:
33
+ pass
34
+
35
+ try:
36
+ RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
37
+ except AttributeError:
38
+ pass
39
+
40
+ try:
41
+ Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
42
+ except AttributeError:
43
+ pass
44
+
45
+ try:
46
+ SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
47
+ except AttributeError:
48
+ pass
49
+
50
+ try:
51
+ SegmentTuple.__doc__ = "(column, value(s)) format for segments"
52
+ except AttributeError:
53
+ pass
54
+
55
+ try:
56
+ SegmentItem.__doc__ = "Individual segment item (string or tuple)"
57
+ except AttributeError:
58
+ pass
59
+
60
+ try:
61
+ SegmentSpec.__doc__ = (
62
+ "Full segment specification options (i.e., all options for segment specification)"
63
+ )
64
+ except AttributeError:
65
+ pass
pointblank/_utils.py CHANGED
@@ -588,351 +588,6 @@ def _check_invalid_fields(fields: list[str], valid_fields: list[str]):
588
588
  raise ValueError(f"Invalid field: {field}")
589
589
 
590
590
 
591
- def get_api_details(module, exported_list):
592
- """
593
- Retrieve the signatures and docstrings of the functions/classes in the exported list.
594
-
595
- Parameters
596
- ----------
597
- module : module
598
- The module from which to retrieve the functions/classes.
599
- exported_list : list
600
- A list of function/class names as strings.
601
-
602
- Returns
603
- -------
604
- str
605
- A string containing the combined class name, signature, and docstring.
606
- """
607
- api_text = ""
608
-
609
- for fn in exported_list:
610
- # Split the attribute path to handle nested attributes
611
- parts = fn.split(".")
612
- obj = module
613
- for part in parts:
614
- obj = getattr(obj, part)
615
-
616
- # Get the name of the object
617
- obj_name = obj.__name__
618
-
619
- # Get the function signature
620
- sig = inspect.signature(obj)
621
-
622
- # Get the docstring
623
- doc = obj.__doc__
624
-
625
- # Combine the class name, signature, and docstring
626
- api_text += f"{obj_name}{sig}\n{doc}\n\n"
627
-
628
- return api_text
629
-
630
-
631
- def _get_api_text() -> str:
632
- """
633
- Get the API documentation for the Pointblank library.
634
-
635
- Returns
636
- -------
637
- str
638
- The API documentation for the Pointblank library.
639
- """
640
-
641
- import pointblank
642
-
643
- sep_line = "-" * 70
644
-
645
- api_text = (
646
- f"{sep_line}\nThis is the API documentation for the Pointblank library.\n{sep_line}\n\n"
647
- )
648
-
649
- #
650
- # Lists of exported functions and methods in different families
651
- #
652
-
653
- validate_exported = [
654
- "Validate",
655
- "Thresholds",
656
- "Actions",
657
- "FinalActions",
658
- "Schema",
659
- "DraftValidation",
660
- ]
661
-
662
- val_steps_exported = [
663
- "Validate.col_vals_gt",
664
- "Validate.col_vals_lt",
665
- "Validate.col_vals_ge",
666
- "Validate.col_vals_le",
667
- "Validate.col_vals_eq",
668
- "Validate.col_vals_ne",
669
- "Validate.col_vals_between",
670
- "Validate.col_vals_outside",
671
- "Validate.col_vals_in_set",
672
- "Validate.col_vals_not_in_set",
673
- "Validate.col_vals_null",
674
- "Validate.col_vals_not_null",
675
- "Validate.col_vals_regex",
676
- "Validate.col_vals_expr",
677
- "Validate.col_exists",
678
- "Validate.rows_distinct",
679
- "Validate.rows_complete",
680
- "Validate.col_schema_match",
681
- "Validate.row_count_match",
682
- "Validate.col_count_match",
683
- "Validate.conjointly",
684
- "Validate.specially",
685
- ]
686
-
687
- column_selection_exported = [
688
- "col",
689
- "starts_with",
690
- "ends_with",
691
- "contains",
692
- "matches",
693
- "everything",
694
- "first_n",
695
- "last_n",
696
- "expr_col",
697
- ]
698
-
699
- segments_exported = [
700
- "seg_group",
701
- ]
702
-
703
- interrogation_exported = [
704
- "Validate.interrogate",
705
- "Validate.get_tabular_report",
706
- "Validate.get_step_report",
707
- "Validate.get_json_report",
708
- "Validate.get_sundered_data",
709
- "Validate.get_data_extracts",
710
- "Validate.all_passed",
711
- "Validate.assert_passing",
712
- "Validate.assert_below_threshold",
713
- "Validate.above_threshold",
714
- "Validate.n",
715
- "Validate.n_passed",
716
- "Validate.n_failed",
717
- "Validate.f_passed",
718
- "Validate.f_failed",
719
- "Validate.warning",
720
- "Validate.error",
721
- "Validate.critical",
722
- ]
723
-
724
- inspect_exported = [
725
- "DataScan",
726
- "preview",
727
- "col_summary_tbl",
728
- "missing_vals_tbl",
729
- "assistant",
730
- "load_dataset",
731
- "get_data_path",
732
- "connect_to_table",
733
- ]
734
-
735
- yaml_exported = [
736
- "yaml_interrogate",
737
- "validate_yaml",
738
- ]
739
-
740
- utility_exported = [
741
- "get_column_count",
742
- "get_row_count",
743
- "get_action_metadata",
744
- "get_validation_summary",
745
- "config",
746
- ]
747
-
748
- prebuilt_actions_exported = [
749
- "send_slack_notification",
750
- ]
751
-
752
- validate_desc = """When peforming data validation, you'll need the `Validate` class to get the
753
- process started. It's given the target table and you can optionally provide some metadata and/or
754
- failure thresholds (using the `Thresholds` class or through shorthands for this task). The
755
- `Validate` class has numerous methods for defining validation steps and for obtaining
756
- post-interrogation metrics and data."""
757
-
758
- val_steps_desc = """Validation steps can be thought of as sequential validations on the target
759
- data. We call `Validate`'s validation methods to build up a validation plan: a collection of steps
760
- that, in the aggregate, provides good validation coverage."""
761
-
762
- column_selection_desc = """A flexible way to select columns for validation is to use the `col()`
763
- function along with column selection helper functions. A combination of `col()` + `starts_with()`,
764
- `matches()`, etc., allows for the selection of multiple target columns (mapping a validation across
765
- many steps). Furthermore, the `col()` function can be used to declare a comparison column (e.g.,
766
- for the `value=` argument in many `col_vals_*()` methods) when you can't use a fixed value
767
- for comparison."""
768
-
769
- segments_desc = (
770
- """Combine multiple values into a single segment using `seg_*()` helper functions."""
771
- )
772
-
773
- interrogation_desc = """The validation plan is put into action when `interrogate()` is called.
774
- The workflow for performing a comprehensive validation is then: (1) `Validate()`, (2) adding
775
- validation steps, (3) `interrogate()`. After interrogation of the data, we can view a validation
776
- report table (by printing the object or using `get_tabular_report()`), extract key metrics, or we
777
- can split the data based on the validation results (with `get_sundered_data()`)."""
778
-
779
- inspect_desc = """The *Inspection and Assistance* group contains functions that are helpful for
780
- getting to grips on a new data table. Use the `DataScan` class to get a quick overview of the data,
781
- `preview()` to see the first and last few rows of a table, `col_summary_tbl()` for a column-level
782
- summary of a table, `missing_vals_tbl()` to see where there are missing values in a table, and
783
- `get_column_count()`/`get_row_count()` to get the number of columns and rows in a table. Several
784
- datasets included in the package can be accessed via the `load_dataset()` function. Finally, the
785
- `config()` utility lets us set global configuration parameters. Want to chat with an assistant? Use
786
- the `assistant()` function to get help with Pointblank."""
787
-
788
- yaml_desc = """The *YAML* group contains functions that allow for the use of YAML to orchestrate
789
- validation workflows. The `yaml_interrogate()` function can be used to run a validation workflow from
790
- YAML strings or files. The `validate_yaml()` function checks if the YAML configuration
791
- passes its own validity checks."""
792
-
793
- utility_desc = """The Utility Functions group contains functions that are useful for accessing
794
- metadata about the target data. Use `get_column_count()` or `get_row_count()` to get the number of
795
- columns or rows in a table. The `get_action_metadata()` function is useful when building custom
796
- actions since it returns metadata about the validation step that's triggering the action. Lastly,
797
- the `config()` utility lets us set global configuration parameters."""
798
-
799
- prebuilt_actions_desc = """The Prebuilt Actions group contains a function that can be used to
800
- send a Slack notification when validation steps exceed failure threshold levels or just to provide a
801
- summary of the validation results, including the status, number of steps, passing and failing steps,
802
- table information, and timing details."""
803
-
804
- #
805
- # Add headings (`*_desc` text) and API details for each family of functions/methods
806
- #
807
-
808
- api_text += f"""\n## The Validate family\n\n{validate_desc}\n\n"""
809
- api_text += get_api_details(module=pointblank, exported_list=validate_exported)
810
-
811
- api_text += f"""\n## The Validation Steps family\n\n{val_steps_desc}\n\n"""
812
- api_text += get_api_details(module=pointblank, exported_list=val_steps_exported)
813
-
814
- api_text += f"""\n## The Column Selection family\n\n{column_selection_desc}\n\n"""
815
- api_text += get_api_details(module=pointblank, exported_list=column_selection_exported)
816
-
817
- api_text += f"""\n## The Segments family\n\n{segments_desc}\n\n"""
818
- api_text += get_api_details(module=pointblank, exported_list=segments_exported)
819
-
820
- api_text += f"""\n## The Interrogation and Reporting family\n\n{interrogation_desc}\n\n"""
821
- api_text += get_api_details(module=pointblank, exported_list=interrogation_exported)
822
-
823
- api_text += f"""\n## The Inspection and Assistance family\n\n{inspect_desc}\n\n"""
824
- api_text += get_api_details(module=pointblank, exported_list=inspect_exported)
825
-
826
- api_text += f"""\n## The YAML family\n\n{yaml_desc}\n\n"""
827
- api_text += get_api_details(module=pointblank, exported_list=yaml_exported)
828
-
829
- api_text += f"""\n## The Utility Functions family\n\n{utility_desc}\n\n"""
830
- api_text += get_api_details(module=pointblank, exported_list=utility_exported)
831
-
832
- api_text += f"""\n## The Prebuilt Actions family\n\n{prebuilt_actions_desc}\n\n"""
833
- api_text += get_api_details(module=pointblank, exported_list=prebuilt_actions_exported)
834
-
835
- # Modify language syntax in all code cells
836
- api_text = api_text.replace("{python}", "python")
837
-
838
- # Remove code cells that contain `#| echo: false` (i.e., don't display the code)
839
- api_text = re.sub(r"```python\n\s*.*\n\s*.*\n.*\n.*\n.*```\n\s*", "", api_text)
840
-
841
- return api_text
842
-
843
-
844
- def _get_examples_text() -> str:
845
- """
846
- Get the examples for the Pointblank library. These examples are extracted from the Quarto
847
- documents in the `docs/demos` directory.
848
-
849
- Returns
850
- -------
851
- str
852
- The examples for the Pointblank library.
853
- """
854
-
855
- sep_line = "-" * 70
856
-
857
- examples_text = (
858
- f"{sep_line}\nThis is a set of examples for the Pointblank library.\n{sep_line}\n\n"
859
- )
860
-
861
- # A large set of examples is available in the docs/demos directory, and each of the
862
- # subdirectories contains a different example (in the form of a Quarto document)
863
-
864
- example_dirs = [
865
- "01-starter",
866
- "02-advanced",
867
- "03-data-extracts",
868
- "04-sundered-data",
869
- "05-step-report-column-check",
870
- "06-step-report-schema-check",
871
- "apply-checks-to-several-columns",
872
- "check-row-column-counts",
873
- "checks-for-missing",
874
- "col-vals-custom-expr",
875
- "column-selector-functions",
876
- "comparisons-across-columns",
877
- "expect-no-duplicate-rows",
878
- "expect-no-duplicate-values",
879
- "expect-text-pattern",
880
- "failure-thresholds",
881
- "mutate-table-in-step",
882
- "numeric-comparisons",
883
- "schema-check",
884
- "set-membership",
885
- "using-parquet-data",
886
- ]
887
-
888
- for example_dir in example_dirs:
889
- link = f"https://posit-dev.github.io/pointblank/demos/{example_dir}/"
890
-
891
- # Read in the index.qmd file for each example
892
- with open(f"docs/demos/{example_dir}/index.qmd", "r") as f:
893
- example_text = f.read()
894
-
895
- # Remove the first eight lines of the example text (contains the YAML front matter)
896
- example_text = "\n".join(example_text.split("\n")[8:])
897
-
898
- # Extract the title of the example (the line beginning with `###`)
899
- title = re.search(r"### (.*)", example_text).group(1)
900
-
901
- # The next line with text is the short description of the example
902
- desc = re.search(r"(.*)\.", example_text).group(1)
903
-
904
- # Get all of the Python code blocks in the example
905
- # these can be identified as starting with ```python and ending with ```
906
- code_blocks = re.findall(r"```python\n(.*?)```", example_text, re.DOTALL)
907
-
908
- # Wrap each code block with a leading ```python and trailing ```
909
- code_blocks = [f"```python\n{code}```" for code in code_blocks]
910
-
911
- # Collapse all code blocks into a single string
912
- code_text = "\n\n".join(code_blocks)
913
-
914
- # Add the example title, description, and code to the examples text
915
- examples_text += f"### {title} ({link})\n\n{desc}\n\n{code_text}\n\n"
916
-
917
- return examples_text
918
-
919
-
920
- def _get_api_and_examples_text() -> str:
921
- """
922
- Get the combined API and examples text for the Pointblank library.
923
-
924
- Returns
925
- -------
926
- str
927
- The combined API and examples text for the Pointblank library.
928
- """
929
-
930
- api_text = _get_api_text()
931
- examples_text = _get_examples_text()
932
-
933
- return f"{api_text}\n\n{examples_text}"
934
-
935
-
936
591
  def _format_to_integer_value(x: int | float, locale: str = "en") -> str:
937
592
  """
938
593
  Format a numeric value as an integer according to a locale's specifications.
pointblank/_utils_ai.py CHANGED
@@ -28,17 +28,22 @@ class _LLMConfig:
28
28
  provider
29
29
  LLM provider name (e.g., 'anthropic', 'openai', 'ollama', 'bedrock').
30
30
  model
31
- Model name (e.g., 'claude-3-sonnet-20240229', 'gpt-4').
31
+ Model name (e.g., 'claude-sonnet-4-5', 'gpt-4').
32
32
  api_key
33
33
  API key for the provider. If None, will be read from environment.
34
+ verify_ssl
35
+ Whether to verify SSL certificates when making requests. Defaults to True.
34
36
  """
35
37
 
36
38
  provider: str
37
39
  model: str
38
40
  api_key: Optional[str] = None
41
+ verify_ssl: bool = True
39
42
 
40
43
 
41
- def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str] = None):
44
+ def _create_chat_instance(
45
+ provider: str, model_name: str, api_key: Optional[str] = None, verify_ssl: bool = True
46
+ ):
42
47
  """
43
48
  Create a chatlas chat instance for the specified provider.
44
49
 
@@ -50,6 +55,8 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
50
55
  The model name for the provider.
51
56
  api_key
52
57
  Optional API key. If None, will be read from environment.
58
+ verify_ssl
59
+ Whether to verify SSL certificates when making requests. Defaults to True.
53
60
 
54
61
  Returns
55
62
  -------
@@ -89,6 +96,17 @@ EXAMPLE OUTPUT FORMAT:
89
96
  {"index": 2, "result": true}
90
97
  ]"""
91
98
 
99
+ # Create httpx client with SSL verification settings
100
+ try:
101
+ import httpx # noqa
102
+ except ImportError: # pragma: no cover
103
+ raise ImportError( # pragma: no cover
104
+ "The `httpx` package is required for SSL configuration. "
105
+ "Please install it using `pip install httpx`."
106
+ )
107
+
108
+ http_client = httpx.AsyncClient(verify=verify_ssl)
109
+
92
110
  # Create provider-specific chat instance
93
111
  if provider == "anthropic": # pragma: no cover
94
112
  # Check that the anthropic package is installed
@@ -106,6 +124,7 @@ EXAMPLE OUTPUT FORMAT:
106
124
  model=model_name,
107
125
  api_key=api_key,
108
126
  system_prompt=system_prompt,
127
+ kwargs={"http_client": http_client},
109
128
  )
110
129
 
111
130
  elif provider == "openai": # pragma: no cover
@@ -124,6 +143,7 @@ EXAMPLE OUTPUT FORMAT:
124
143
  model=model_name,
125
144
  api_key=api_key,
126
145
  system_prompt=system_prompt,
146
+ kwargs={"http_client": http_client},
127
147
  )
128
148
 
129
149
  elif provider == "ollama": # pragma: no cover
@@ -141,6 +161,7 @@ EXAMPLE OUTPUT FORMAT:
141
161
  chat = ChatOllama(
142
162
  model=model_name,
143
163
  system_prompt=system_prompt,
164
+ kwargs={"http_client": http_client},
144
165
  )
145
166
 
146
167
  elif provider == "bedrock": # pragma: no cover
@@ -149,6 +170,7 @@ EXAMPLE OUTPUT FORMAT:
149
170
  chat = ChatBedrockAnthropic(
150
171
  model=model_name,
151
172
  system_prompt=system_prompt,
173
+ kwargs={"http_client": http_client},
152
174
  )
153
175
 
154
176
  else:
@@ -722,7 +744,10 @@ class _AIValidationEngine:
722
744
  """
723
745
  self.llm_config = llm_config
724
746
  self.chat = _create_chat_instance(
725
- provider=llm_config.provider, model_name=llm_config.model, api_key=llm_config.api_key
747
+ provider=llm_config.provider,
748
+ model_name=llm_config.model,
749
+ api_key=llm_config.api_key,
750
+ verify_ssl=llm_config.verify_ssl,
726
751
  )
727
752
 
728
753
  def validate_batches(