PyPI - pointblank - Versions diffs - 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

pointblank 0.14.0py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

pointblank/__init__.py +2 -0
pointblank/_constants.py +73 -0
pointblank/_constants_translations.py +1059 -2
pointblank/_interrogation.py +883 -1
pointblank/_spec_utils.py +1015 -0
pointblank/_typing.py +37 -9
pointblank/_utils.py +0 -345
pointblank/_utils_ai.py +28 -3
pointblank/_utils_llms_txt.py +660 -0
pointblank/assistant.py +1 -1
pointblank/column.py +24 -0
pointblank/data/api-docs.txt +1727 -132
pointblank/draft.py +52 -3
pointblank/validate.py +2001 -286
pointblank/yaml.py +5 -0
{pointblank-0.14.0.dist-info → pointblank-0.16.0.dist-info}/METADATA +5 -4
{pointblank-0.14.0.dist-info → pointblank-0.16.0.dist-info}/RECORD +21 -19
{pointblank-0.14.0.dist-info → pointblank-0.16.0.dist-info}/WHEEL +0 -0
{pointblank-0.14.0.dist-info → pointblank-0.16.0.dist-info}/entry_points.txt +0 -0
{pointblank-0.14.0.dist-info → pointblank-0.16.0.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.14.0.dist-info → pointblank-0.16.0.dist-info}/top_level.txt +0 -0

pointblank/_typing.py CHANGED Viewed

@@ -26,12 +26,40 @@ else:
     SegmentSpec = Union[str, SegmentTuple, List[SegmentItem]]
 # Add docstrings for better IDE support
-AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
-RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
-Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
-SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
-SegmentTuple.__doc__ = "(column, value(s)) format for segments"
-SegmentItem.__doc__ = "Individual segment item (string or tuple)"
-SegmentSpec.__doc__ = (
-    "Full segment specification options (i.e., all options for segment specification)"
-)
+# In Python 3.14+, __doc__ attribute on typing.Union objects became read-only
+try:
+    AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
+except AttributeError:
+    pass
+try:
+    RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
+except AttributeError:
+    pass
+try:
+    Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
+except AttributeError:
+    pass
+try:
+    SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
+except AttributeError:
+    pass
+try:
+    SegmentTuple.__doc__ = "(column, value(s)) format for segments"
+except AttributeError:
+    pass
+try:
+    SegmentItem.__doc__ = "Individual segment item (string or tuple)"
+except AttributeError:
+    pass
+try:
+    SegmentSpec.__doc__ = (
+        "Full segment specification options (i.e., all options for segment specification)"
+    )
+except AttributeError:
+    pass

pointblank/_utils.py CHANGED Viewed

@@ -588,351 +588,6 @@ def _check_invalid_fields(fields: list[str], valid_fields: list[str]):
             raise ValueError(f"Invalid field: {field}")
-def get_api_details(module, exported_list):
-    """
-    Retrieve the signatures and docstrings of the functions/classes in the exported list.
-    Parameters
-    ----------
-    module : module
-        The module from which to retrieve the functions/classes.
-    exported_list : list
-        A list of function/class names as strings.
-    Returns
-    -------
-    str
-        A string containing the combined class name, signature, and docstring.
-    """
-    api_text = ""
-    for fn in exported_list:
-        # Split the attribute path to handle nested attributes
-        parts = fn.split(".")
-        obj = module
-        for part in parts:
-            obj = getattr(obj, part)
-        # Get the name of the object
-        obj_name = obj.__name__
-        # Get the function signature
-        sig = inspect.signature(obj)
-        # Get the docstring
-        doc = obj.__doc__
-        # Combine the class name, signature, and docstring
-        api_text += f"{obj_name}{sig}\n{doc}\n\n"
-    return api_text
-def _get_api_text() -> str:
-    """
-    Get the API documentation for the Pointblank library.
-    Returns
-    -------
-    str
-        The API documentation for the Pointblank library.
-    """
-    import pointblank
-    sep_line = "-" * 70
-    api_text = (
-        f"{sep_line}\nThis is the API documentation for the Pointblank library.\n{sep_line}\n\n"
-    )
-    #
-    # Lists of exported functions and methods in different families
-    #
-    validate_exported = [
-        "Validate",
-        "Thresholds",
-        "Actions",
-        "FinalActions",
-        "Schema",
-        "DraftValidation",
-    ]
-    val_steps_exported = [
-        "Validate.col_vals_gt",
-        "Validate.col_vals_lt",
-        "Validate.col_vals_ge",
-        "Validate.col_vals_le",
-        "Validate.col_vals_eq",
-        "Validate.col_vals_ne",
-        "Validate.col_vals_between",
-        "Validate.col_vals_outside",
-        "Validate.col_vals_in_set",
-        "Validate.col_vals_not_in_set",
-        "Validate.col_vals_null",
-        "Validate.col_vals_not_null",
-        "Validate.col_vals_regex",
-        "Validate.col_vals_expr",
-        "Validate.col_exists",
-        "Validate.rows_distinct",
-        "Validate.rows_complete",
-        "Validate.col_schema_match",
-        "Validate.row_count_match",
-        "Validate.col_count_match",
-        "Validate.conjointly",
-        "Validate.specially",
-    ]
-    column_selection_exported = [
-        "col",
-        "starts_with",
-        "ends_with",
-        "contains",
-        "matches",
-        "everything",
-        "first_n",
-        "last_n",
-        "expr_col",
-    ]
-    segments_exported = [
-        "seg_group",
-    ]
-    interrogation_exported = [
-        "Validate.interrogate",
-        "Validate.get_tabular_report",
-        "Validate.get_step_report",
-        "Validate.get_json_report",
-        "Validate.get_sundered_data",
-        "Validate.get_data_extracts",
-        "Validate.all_passed",
-        "Validate.assert_passing",
-        "Validate.assert_below_threshold",
-        "Validate.above_threshold",
-        "Validate.n",
-        "Validate.n_passed",
-        "Validate.n_failed",
-        "Validate.f_passed",
-        "Validate.f_failed",
-        "Validate.warning",
-        "Validate.error",
-        "Validate.critical",
-    ]
-    inspect_exported = [
-        "DataScan",
-        "preview",
-        "col_summary_tbl",
-        "missing_vals_tbl",
-        "assistant",
-        "load_dataset",
-        "get_data_path",
-        "connect_to_table",
-    ]
-    yaml_exported = [
-        "yaml_interrogate",
-        "validate_yaml",
-    ]
-    utility_exported = [
-        "get_column_count",
-        "get_row_count",
-        "get_action_metadata",
-        "get_validation_summary",
-        "config",
-    ]
-    prebuilt_actions_exported = [
-        "send_slack_notification",
-    ]
-    validate_desc = """When peforming data validation, you'll need the `Validate` class to get the
-process started. It's given the target table and you can optionally provide some metadata and/or
-failure thresholds (using the `Thresholds` class or through shorthands for this task). The
-`Validate` class has numerous methods for defining validation steps and for obtaining
-post-interrogation metrics and data."""
-    val_steps_desc = """Validation steps can be thought of as sequential validations on the target
-data. We call `Validate`'s validation methods to build up a validation plan: a collection of steps
-that, in the aggregate, provides good validation coverage."""
-    column_selection_desc = """A flexible way to select columns for validation is to use the `col()`
-function along with column selection helper functions. A combination of `col()` + `starts_with()`,
-`matches()`, etc., allows for the selection of multiple target columns (mapping a validation across
-many steps). Furthermore, the `col()` function can be used to declare a comparison column (e.g.,
-for the `value=` argument in many `col_vals_*()` methods) when you can't use a fixed value
-for comparison."""
-    segments_desc = (
-        """Combine multiple values into a single segment using `seg_*()` helper functions."""
-    )
-    interrogation_desc = """The validation plan is put into action when `interrogate()` is called.
-The workflow for performing a comprehensive validation is then: (1) `Validate()`, (2) adding
-validation steps, (3) `interrogate()`. After interrogation of the data, we can view a validation
-report table (by printing the object or using `get_tabular_report()`), extract key metrics, or we
-can split the data based on the validation results (with `get_sundered_data()`)."""
-    inspect_desc = """The *Inspection and Assistance* group contains functions that are helpful for
-getting to grips on a new data table. Use the `DataScan` class to get a quick overview of the data,
-`preview()` to see the first and last few rows of a table, `col_summary_tbl()` for a column-level
-summary of a table, `missing_vals_tbl()` to see where there are missing values in a table, and
-`get_column_count()`/`get_row_count()` to get the number of columns and rows in a table. Several
-datasets included in the package can be accessed via the `load_dataset()` function. Finally, the
-`config()` utility lets us set global configuration parameters. Want to chat with an assistant? Use
-the `assistant()` function to get help with Pointblank."""
-    yaml_desc = """The *YAML* group contains functions that allow for the use of YAML to orchestrate
-validation workflows. The `yaml_interrogate()` function can be used to run a validation workflow from
-YAML strings or files. The `validate_yaml()` function checks if the YAML configuration
-passes its own validity checks."""
-    utility_desc = """The Utility Functions group contains functions that are useful for accessing
-metadata about the target data. Use `get_column_count()` or `get_row_count()` to get the number of
-columns or rows in a table. The `get_action_metadata()` function is useful when building custom
-actions since it returns metadata about the validation step that's triggering the action. Lastly,
-the `config()` utility lets us set global configuration parameters."""
-    prebuilt_actions_desc = """The Prebuilt Actions group contains a function that can be used to
-send a Slack notification when validation steps exceed failure threshold levels or just to provide a
-summary of the validation results, including the status, number of steps, passing and failing steps,
-table information, and timing details."""
-    #
-    # Add headings (`*_desc` text) and API details for each family of functions/methods
-    #
-    api_text += f"""\n## The Validate family\n\n{validate_desc}\n\n"""
-    api_text += get_api_details(module=pointblank, exported_list=validate_exported)
-    api_text += f"""\n## The Validation Steps family\n\n{val_steps_desc}\n\n"""
-    api_text += get_api_details(module=pointblank, exported_list=val_steps_exported)
-    api_text += f"""\n## The Column Selection family\n\n{column_selection_desc}\n\n"""
-    api_text += get_api_details(module=pointblank, exported_list=column_selection_exported)
-    api_text += f"""\n## The Segments family\n\n{segments_desc}\n\n"""
-    api_text += get_api_details(module=pointblank, exported_list=segments_exported)
-    api_text += f"""\n## The Interrogation and Reporting family\n\n{interrogation_desc}\n\n"""
-    api_text += get_api_details(module=pointblank, exported_list=interrogation_exported)
-    api_text += f"""\n## The Inspection and Assistance family\n\n{inspect_desc}\n\n"""
-    api_text += get_api_details(module=pointblank, exported_list=inspect_exported)
-    api_text += f"""\n## The YAML family\n\n{yaml_desc}\n\n"""
-    api_text += get_api_details(module=pointblank, exported_list=yaml_exported)
-    api_text += f"""\n## The Utility Functions family\n\n{utility_desc}\n\n"""
-    api_text += get_api_details(module=pointblank, exported_list=utility_exported)
-    api_text += f"""\n## The Prebuilt Actions family\n\n{prebuilt_actions_desc}\n\n"""
-    api_text += get_api_details(module=pointblank, exported_list=prebuilt_actions_exported)
-    # Modify language syntax in all code cells
-    api_text = api_text.replace("{python}", "python")
-    # Remove code cells that contain `#| echo: false` (i.e., don't display the code)
-    api_text = re.sub(r"```python\n\s*.*\n\s*.*\n.*\n.*\n.*```\n\s*", "", api_text)
-    return api_text
-def _get_examples_text() -> str:
-    """
-    Get the examples for the Pointblank library. These examples are extracted from the Quarto
-    documents in the `docs/demos` directory.
-    Returns
-    -------
-    str
-        The examples for the Pointblank library.
-    """
-    sep_line = "-" * 70
-    examples_text = (
-        f"{sep_line}\nThis is a set of examples for the Pointblank library.\n{sep_line}\n\n"
-    )
-    # A large set of examples is available in the docs/demos directory, and each of the
-    # subdirectories contains a different example (in the form of a Quarto document)
-    example_dirs = [
-        "01-starter",
-        "02-advanced",
-        "03-data-extracts",
-        "04-sundered-data",
-        "05-step-report-column-check",
-        "06-step-report-schema-check",
-        "apply-checks-to-several-columns",
-        "check-row-column-counts",
-        "checks-for-missing",
-        "col-vals-custom-expr",
-        "column-selector-functions",
-        "comparisons-across-columns",
-        "expect-no-duplicate-rows",
-        "expect-no-duplicate-values",
-        "expect-text-pattern",
-        "failure-thresholds",
-        "mutate-table-in-step",
-        "numeric-comparisons",
-        "schema-check",
-        "set-membership",
-        "using-parquet-data",
-    ]
-    for example_dir in example_dirs:
-        link = f"https://posit-dev.github.io/pointblank/demos/{example_dir}/"
-        # Read in the index.qmd file for each example
-        with open(f"docs/demos/{example_dir}/index.qmd", "r") as f:
-            example_text = f.read()
-            # Remove the first eight lines of the example text (contains the YAML front matter)
-            example_text = "\n".join(example_text.split("\n")[8:])
-            # Extract the title of the example (the line beginning with `###`)
-            title = re.search(r"### (.*)", example_text).group(1)
-            # The next line with text is the short description of the example
-            desc = re.search(r"(.*)\.", example_text).group(1)
-            # Get all of the Python code blocks in the example
-            # these can be identified as starting with ```python and ending with ```
-            code_blocks = re.findall(r"```python\n(.*?)```", example_text, re.DOTALL)
-            # Wrap each code block with a leading ```python and trailing ```
-            code_blocks = [f"```python\n{code}```" for code in code_blocks]
-            # Collapse all code blocks into a single string
-            code_text = "\n\n".join(code_blocks)
-            # Add the example title, description, and code to the examples text
-            examples_text += f"### {title} ({link})\n\n{desc}\n\n{code_text}\n\n"
-    return examples_text
-def _get_api_and_examples_text() -> str:
-    """
-    Get the combined API and examples text for the Pointblank library.
-    Returns
-    -------
-    str
-        The combined API and examples text for the Pointblank library.
-    """
-    api_text = _get_api_text()
-    examples_text = _get_examples_text()
-    return f"{api_text}\n\n{examples_text}"
 def _format_to_integer_value(x: int | float, locale: str = "en") -> str:
     """
     Format a numeric value as an integer according to a locale's specifications.

pointblank/_utils_ai.py CHANGED Viewed

@@ -28,17 +28,22 @@ class _LLMConfig:
     provider
         LLM provider name (e.g., 'anthropic', 'openai', 'ollama', 'bedrock').
     model
-        Model name (e.g., 'claude-3-sonnet-20240229', 'gpt-4').
+        Model name (e.g., 'claude-sonnet-4-5', 'gpt-4').
     api_key
         API key for the provider. If None, will be read from environment.
+    verify_ssl
+        Whether to verify SSL certificates when making requests. Defaults to True.
     """
     provider: str
     model: str
     api_key: Optional[str] = None
+    verify_ssl: bool = True
-def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str] = None):
+def _create_chat_instance(
+    provider: str, model_name: str, api_key: Optional[str] = None, verify_ssl: bool = True
+):
     """
     Create a chatlas chat instance for the specified provider.
@@ -50,6 +55,8 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
         The model name for the provider.
     api_key
         Optional API key. If None, will be read from environment.
+    verify_ssl
+        Whether to verify SSL certificates when making requests. Defaults to True.
     Returns
     -------
@@ -89,6 +96,17 @@ EXAMPLE OUTPUT FORMAT:
   {"index": 2, "result": true}
 ]"""
+    # Create httpx client with SSL verification settings
+    try:
+        import httpx  # noqa
+    except ImportError:  # pragma: no cover
+        raise ImportError(  # pragma: no cover
+            "The `httpx` package is required for SSL configuration. "
+            "Please install it using `pip install httpx`."
+        )
+    http_client = httpx.AsyncClient(verify=verify_ssl)
     # Create provider-specific chat instance
     if provider == "anthropic":  # pragma: no cover
         # Check that the anthropic package is installed
@@ -106,6 +124,7 @@ EXAMPLE OUTPUT FORMAT:
             model=model_name,
             api_key=api_key,
             system_prompt=system_prompt,
+            kwargs={"http_client": http_client},
         )
     elif provider == "openai":  # pragma: no cover
@@ -124,6 +143,7 @@ EXAMPLE OUTPUT FORMAT:
             model=model_name,
             api_key=api_key,
             system_prompt=system_prompt,
+            kwargs={"http_client": http_client},
         )
     elif provider == "ollama":  # pragma: no cover
@@ -141,6 +161,7 @@ EXAMPLE OUTPUT FORMAT:
         chat = ChatOllama(
             model=model_name,
             system_prompt=system_prompt,
+            kwargs={"http_client": http_client},
         )
     elif provider == "bedrock":  # pragma: no cover
@@ -149,6 +170,7 @@ EXAMPLE OUTPUT FORMAT:
         chat = ChatBedrockAnthropic(
             model=model_name,
             system_prompt=system_prompt,
+            kwargs={"http_client": http_client},
         )
     else:
@@ -722,7 +744,10 @@ class _AIValidationEngine:
         """
         self.llm_config = llm_config
         self.chat = _create_chat_instance(
-            provider=llm_config.provider, model_name=llm_config.model, api_key=llm_config.api_key
+            provider=llm_config.provider,
+            model_name=llm_config.model,
+            api_key=llm_config.api_key,
+            verify_ssl=llm_config.verify_ssl,
         )
     def validate_batches(

pointblank 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

pointblank 0.14.0py3-none-any.whl → 0.16.0py3-none-any.whl