PyPI - pointblank - Versions diffs - 0.13.4__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

pointblank 0.13.4py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

pointblank/__init__.py +4 -0
pointblank/_constants.py +117 -0
pointblank/_constants_translations.py +487 -2
pointblank/_interrogation.py +1065 -12
pointblank/_spec_utils.py +1015 -0
pointblank/_utils.py +17 -7
pointblank/_utils_ai.py +875 -0
pointblank/assistant.py +1 -1
pointblank/cli.py +128 -115
pointblank/column.py +1 -1
pointblank/data/api-docs.txt +1838 -130
pointblank/data/validations/README.md +108 -0
pointblank/data/validations/complex_preprocessing.json +54 -0
pointblank/data/validations/complex_preprocessing.pkl +0 -0
pointblank/data/validations/generate_test_files.py +127 -0
pointblank/data/validations/multiple_steps.json +83 -0
pointblank/data/validations/multiple_steps.pkl +0 -0
pointblank/data/validations/narwhals_function.json +28 -0
pointblank/data/validations/narwhals_function.pkl +0 -0
pointblank/data/validations/no_preprocessing.json +83 -0
pointblank/data/validations/no_preprocessing.pkl +0 -0
pointblank/data/validations/pandas_compatible.json +28 -0
pointblank/data/validations/pandas_compatible.pkl +0 -0
pointblank/data/validations/preprocessing_functions.py +46 -0
pointblank/data/validations/simple_preprocessing.json +57 -0
pointblank/data/validations/simple_preprocessing.pkl +0 -0
pointblank/datascan.py +4 -4
pointblank/draft.py +52 -3
pointblank/scan_profile.py +6 -6
pointblank/schema.py +8 -82
pointblank/thresholds.py +1 -1
pointblank/validate.py +3069 -437
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/METADATA +67 -8
pointblank-0.15.0.dist-info/RECORD +56 -0
pointblank-0.13.4.dist-info/RECORD +0 -39
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/WHEEL +0 -0
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/entry_points.txt +0 -0
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/top_level.txt +0 -0

pointblank/draft.py CHANGED Viewed

@@ -38,10 +38,15 @@ class DraftValidation:
         The data to be used for drafting a validation plan.
     model
         The model to be used. This should be in the form of `provider:model` (e.g.,
-        `"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
+        `"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
         `"ollama"`, and `"bedrock"`.
     api_key
         The API key to be used for the model.
+    verify_ssl
+        Whether to verify SSL certificates when making requests to the LLM provider. Set to `False`
+        to disable SSL verification (e.g., when behind a corporate firewall with self-signed
+        certificates). Defaults to `True`. Use with caution as disabling SSL verification can pose
+        security risks.
     Returns
     -------
@@ -83,6 +88,33 @@ class DraftValidation:
     There's no need to have the `python-dotenv` package installed when using `.env` files in this
     way.
+    Notes on SSL Certificate Verification
+    --------------------------------------
+    By default, SSL certificate verification is enabled for all requests to LLM providers. However,
+    in certain network environments (such as corporate networks with self-signed certificates or
+    firewall proxies), you may encounter SSL certificate verification errors.
+    To disable SSL verification, set the `verify_ssl` parameter to `False`:
+    ```python
+    import pointblank as pb
+    data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")
+    # Disable SSL verification for networks with self-signed certificates
+    pb.DraftValidation(
+        data=data,
+        model="anthropic:claude-sonnet-4-5",
+        verify_ssl=False
+    )
+    ```
+    :::{.callout-warning}
+    Disabling SSL verification (through `verify_ssl=False`) can expose your API keys and data to
+    man-in-the-middle attacks. Only use this option in trusted network environments and when
+    absolutely necessary.
+    :::
     Notes on Data Sent to the Model Provider
     ----------------------------------------
     The data sent to the model provider is a JSON summary of the table. This data summary is
@@ -109,7 +141,7 @@ class DraftValidation:
     Let's look at how the `DraftValidation` class can be used to draft a validation plan for a
     table. The table to be used is `"nycflights"`, which is available here via the
     [`load_dataset()`](`pointblank.load_dataset`) function. The model to be used is
-    `"anthropic:claude-3-5-sonnet-latest"` (which performs very well compared to other LLMs). The
+    `"anthropic:claude-sonnet-4-5"` (which performs very well compared to other LLMs). The
     example assumes that the API key is stored in an `.env` file as `ANTHROPIC_API_KEY`.
     ```python
@@ -119,7 +151,7 @@ class DraftValidation:
     data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")
     # Draft a validation plan for the "nycflights" table
-    pb.DraftValidation(data=data, model="anthropic:claude-3-5-sonnet-latest")
+    pb.DraftValidation(data=data, model="anthropic:claude-sonnet-4-5")
     ```
     The output will be a drafted validation plan for the `"nycflights"` table and this will appear
@@ -194,6 +226,7 @@ class DraftValidation:
     data: FrameT | Any
     model: str
     api_key: str | None = None
+    verify_ssl: bool = True
     response: str = field(init=False)
     def __post_init__(self):
@@ -280,6 +313,18 @@ class DraftValidation:
             "    per line)"
         )
+        # Create httpx client with SSL verification settings
+        # This will be passed to the LLM provider's chat client
+        try:
+            import httpx  # noqa
+        except ImportError:  # pragma: no cover
+            raise ImportError(  # pragma: no cover
+                "The `httpx` package is required for SSL configuration. "
+                "Please install it using `pip install httpx`."
+            )
+        http_client = httpx.AsyncClient(verify=self.verify_ssl)
         if provider == "anthropic":  # pragma: no cover
             # Check that the anthropic package is installed
             try:
@@ -296,6 +341,7 @@ class DraftValidation:
                 model=model_name,
                 system_prompt="You are a terse assistant and a Python expert.",
                 api_key=self.api_key,
+                kwargs={"http_client": http_client},
             )
         if provider == "openai":  # pragma: no cover
@@ -314,6 +360,7 @@ class DraftValidation:
                 model=model_name,
                 system_prompt="You are a terse assistant and a Python expert.",
                 api_key=self.api_key,
+                kwargs={"http_client": http_client},
             )
         if provider == "ollama":  # pragma: no cover
@@ -331,6 +378,7 @@ class DraftValidation:
             chat = ChatOllama(  # pragma: no cover
                 model=model_name,
                 system_prompt="You are a terse assistant and a Python expert.",
+                kwargs={"http_client": http_client},
             )
         if provider == "bedrock":  # pragma: no cover
@@ -339,6 +387,7 @@ class DraftValidation:
             chat = ChatBedrockAnthropic(  # pragma: no cover
                 model=model_name,
                 system_prompt="You are a terse assistant and a Python expert.",
+                kwargs={"http_client": http_client},
             )
         self.response = str(chat.chat(prompt, stream=False, echo="none"))  # pragma: no cover

pointblank/scan_profile.py CHANGED Viewed

@@ -299,12 +299,12 @@ class _DataProfile:  # TODO: feels redundant and weird
                 # instantiations that require consistent types.
                 all_same_type: bool = all(type(v) is first_type for v in values[1:])
                 if not all_same_type:
-                    if strict:
-                        msg = f"Some types in {key!s} stat are different. Turn off `strict` to bypass."
-                        raise TypeError(msg)
-                    for d in cols:
-                        if key in d:
-                            d[key] = str(d[key])
+                    if strict:  # pragma: no cover
+                        msg = f"Some types in {key!s} stat are different. Turn off `strict` to bypass."  # pragma: no cover
+                        raise TypeError(msg)  # pragma: no cover
+                    for d in cols:  # pragma: no cover
+                        if key in d:  # pragma: no cover
+                            d[key] = str(d[key])  # pragma: no cover
         return nw.from_dict(transpose_dicts(cols), backend=self.implementation)

pointblank/schema.py CHANGED Viewed

@@ -343,15 +343,15 @@ class Schema:
             schema_dict = {k: str(v) for k, v in schema_dict.items()}
             self.columns = list(schema_dict.items())
-        elif table_type == "pyspark":
+        elif table_type == "pyspark":  # pragma: no cover
             # Convert PySpark DataFrame to Narwhals to get schema
-            nw_df = nw.from_native(self.tbl)
-            if _is_lazy_frame(data=nw_df):
-                schema_dict = dict(nw_df.collect_schema())
-            else:
-                schema_dict = dict(nw_df.schema.items())
-            schema_dict = {k: str(v) for k, v in schema_dict.items()}
-            self.columns = list(schema_dict.items())
+            nw_df = nw.from_native(self.tbl)  # pragma: no cover
+            if _is_lazy_frame(data=nw_df):  # pragma: no cover
+                schema_dict = dict(nw_df.collect_schema())  # pragma: no cover
+            else:  # pragma: no cover
+                schema_dict = dict(nw_df.schema.items())  # pragma: no cover
+            schema_dict = {k: str(v) for k, v in schema_dict.items()}  # pragma: no cover
+            self.columns = list(schema_dict.items())  # pragma: no cover
         elif table_type in IBIS_BACKENDS:
             schema_dict = dict(self.tbl.schema().items())
@@ -888,80 +888,6 @@ def _schema_info_generate_params_dict(
     }
-def _check_schema_match(
-    data_tbl: any,
-    schema: Schema,
-    complete: bool = True,
-    in_order: bool = True,
-    case_sensitive_colnames: bool = True,
-    case_sensitive_dtypes: bool = True,
-    full_match_dtypes: bool = True,
-) -> bool:
-    """
-    Check if the schema matches the target table.
-    This function performs schema validation and returns a boolean result.
-    Parameters
-    ----------
-    data_tbl
-        The target table to validate.
-    schema
-        The expected schema.
-    complete
-        Whether the schema should be complete.
-    in_order
-        Whether the schema should be in order.
-    case_sensitive_colnames
-        Whether column names are case-sensitive.
-    case_sensitive_dtypes
-        Whether data types are case-sensitive.
-    full_match_dtypes
-        Whether data types must match exactly.
-    Returns
-    -------
-    bool
-        True if the schema matches, False otherwise.
-    """
-    validation_info = _get_schema_validation_info(
-        data_tbl=data_tbl,
-        schema=schema,
-        passed=False,  # This will be determined by the logic below
-        complete=complete,
-        in_order=in_order,
-        case_sensitive_colnames=case_sensitive_colnames,
-        case_sensitive_dtypes=case_sensitive_dtypes,
-        full_match_dtypes=full_match_dtypes,
-    )
-    # Determine if the schema validation passed based on the validation info
-    passed = True
-    # Check completeness requirement
-    if complete and not validation_info["columns_full_set"]:
-        passed = False
-    # Check order requirement
-    if in_order and not validation_info["columns_matched_in_order"]:
-        passed = False
-    # Check if all expected columns were found
-    if validation_info["columns_not_found"]:
-        passed = False
-    # Check column-specific validations
-    for col_info in validation_info["columns"].values():
-        if not col_info["colname_matched"]:
-            passed = False
-        if not col_info.get(
-            "dtype_matched", True
-        ):  # dtype_matched may not exist if no dtypes specified
-            passed = False
-    return passed
 def _get_schema_validation_info(
     data_tbl: any,
     schema: Schema,

pointblank/thresholds.py CHANGED Viewed

@@ -559,7 +559,7 @@ class FinalActions:
     def send_alert():
         summary = pb.get_validation_summary()
         if summary["highest_severity"] == "critical":
-            print(f"ALERT: Critical validation failures found in {summary['table_name']}")
+            print(f"ALERT: Critical validation failures found in {summary['tbl_name']}")
     validation = (
         pb.Validate(

pointblank 0.13.4__py3-none-any.whl → 0.15.0__py3-none-any.whl

pointblank 0.13.4py3-none-any.whl → 0.15.0py3-none-any.whl