PyPI - datacontract-cli - Versions diffs - 0.10.32__py3-none-any.whl → 0.10.34__py3-none-any.whl - Mend

datacontract-cli 0.10.32py3-none-any.whl → 0.10.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datacontract-cli might be problematic. Click here for more details.

Files changed (24) hide show

datacontract/cli.py CHANGED Viewed

@@ -210,12 +210,21 @@ def export(
     # TODO: this should be a subcommand
     template: Annotated[
         Optional[Path],
-        typer.Option(help="[custom] The file path of Jinja template."),
+        typer.Option(
+            help="The file path or URL of a template. For Excel format: path/URL to custom Excel template. For custom format: path to Jinja template."
+        ),
     ] = None,
 ):
     """
     Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout.
     """
+    # Validate that Excel format requires an output file path
+    if format == ExportFormat.excel and output is None:
+        console.print("❌ Error: Excel export requires an output file path.")
+        console.print("💡 Hint: Use --output to specify where to save the Excel file, e.g.:")
+        console.print("   datacontract export --format excel --output datacontract.xlsx")
+        raise typer.Exit(code=1)
     # TODO exception handling
     result = DataContract(data_contract_file=location, schema_location=schema, server=server).export(
         export_format=format,
@@ -230,8 +239,13 @@ def export(
     if output is None:
         console.print(result, markup=False, soft_wrap=True)
     else:
-        with output.open(mode="w", encoding="utf-8") as f:
-            f.write(result)
+        if isinstance(result, bytes):
+            # If the result is bytes, we assume it's a binary file (e.g., Excel, PDF)
+            with output.open(mode="wb") as f:
+                f.write(result)
+        else:
+            with output.open(mode="w", encoding="utf-8") as f:
+                f.write(result)
         console.print(f"Written result to {output}")
@@ -482,13 +496,14 @@ def _get_uvicorn_arguments(port: int, host: str, context: typer.Context) -> dict
     }
     # Create a list of the extra arguments, remove the leading -- from the cli arguments
-    trimmed_keys = list(map(lambda x : str(x).replace("--", ""),context.args[::2]))
+    trimmed_keys = list(map(lambda x: str(x).replace("--", ""), context.args[::2]))
     # Merge the two dicts and return them as one dict
     return default_args | dict(zip(trimmed_keys, context.args[1::2]))
 @app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
 def api(
-    ctx: Annotated[typer.Context, typer.Option(help="Extra arguments to pass to uvicorn.run().")],
+    ctx: Annotated[typer.Context, typer.Option(help="Extra arguments to pass to uvicorn.run().")],
     port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
     host: Annotated[
         str, typer.Option(help="Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0")

datacontract/data_contract.py CHANGED Viewed

@@ -250,8 +250,14 @@ class DataContract:
             inline_quality=self._inline_quality,
         )
-    def export(self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs) -> str:
-        if export_format == ExportFormat.html or export_format == ExportFormat.mermaid:
+    def export(
+        self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs
+    ) -> str | bytes:
+        if (
+            export_format == ExportFormat.html
+            or export_format == ExportFormat.mermaid
+            or export_format == ExportFormat.excel
+        ):
             data_contract = resolve.resolve_data_contract_v2(
                 self._data_contract_file,
                 self._data_contract_str,

datacontract/engines/data_contract_checks.py CHANGED Viewed

@@ -1,4 +1,6 @@
+import re
 import uuid
+from dataclasses import dataclass
 from typing import List
 from venv import logger
@@ -9,6 +11,12 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
 from datacontract.model.run import Check
+@dataclass
+class QuotingConfig:
+    quote_field_name: bool = False
+    quote_model_name: bool = False
 def create_checks(data_contract_spec: DataContractSpecification, server: Server) -> List[Check]:
     checks: List[Check] = []
     for model_key, model_value in data_contract_spec.models.items():
@@ -26,37 +34,41 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
     fields = model_value.fields
     check_types = is_check_types(server)
-    quote_field_name = server_type in ["postgres", "sqlserver"]
+    quoting_config = QuotingConfig(
+        quote_field_name=server_type in ["postgres", "sqlserver"],
+        quote_model_name=server_type in ["postgres", "sqlserver"],
+    )
     for field_name, field in fields.items():
-        checks.append(check_field_is_present(model_name, field_name, quote_field_name))
+        checks.append(check_field_is_present(model_name, field_name, quoting_config))
         if check_types and field.type is not None:
             sql_type = convert_to_sql_type(field, server_type)
-            checks.append(check_field_type(model_name, field_name, sql_type, quote_field_name))
+            checks.append(check_field_type(model_name, field_name, sql_type, quoting_config))
         if field.required:
-            checks.append(check_field_required(model_name, field_name, quote_field_name))
+            checks.append(check_field_required(model_name, field_name, quoting_config))
         if field.unique:
-            checks.append(check_field_unique(model_name, field_name, quote_field_name))
+            checks.append(check_field_unique(model_name, field_name, quoting_config))
         if field.minLength is not None:
-            checks.append(check_field_min_length(model_name, field_name, field.minLength, quote_field_name))
+            checks.append(check_field_min_length(model_name, field_name, field.minLength, quoting_config))
         if field.maxLength is not None:
-            checks.append(check_field_max_length(model_name, field_name, field.maxLength, quote_field_name))
+            checks.append(check_field_max_length(model_name, field_name, field.maxLength, quoting_config))
         if field.minimum is not None:
-            checks.append(check_field_minimum(model_name, field_name, field.minimum, quote_field_name))
+            checks.append(check_field_minimum(model_name, field_name, field.minimum, quoting_config))
         if field.maximum is not None:
-            checks.append(check_field_maximum(model_name, field_name, field.maximum, quote_field_name))
+            checks.append(check_field_maximum(model_name, field_name, field.maximum, quoting_config))
         if field.exclusiveMinimum is not None:
-            checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quote_field_name))
-            checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quote_field_name))
+            checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quoting_config))
+            checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quoting_config))
         if field.exclusiveMaximum is not None:
-            checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quote_field_name))
-            checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quote_field_name))
+            checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quoting_config))
+            checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quoting_config))
         if field.pattern is not None:
-            checks.append(check_field_regex(model_name, field_name, field.pattern, quote_field_name))
+            checks.append(check_field_regex(model_name, field_name, field.pattern, quoting_config))
         if field.enum is not None and len(field.enum) > 0:
-            checks.append(check_field_enum(model_name, field_name, field.enum, quote_field_name))
+            checks.append(check_field_enum(model_name, field_name, field.enum, quoting_config))
         if field.quality is not None and len(field.quality) > 0:
-            quality_list = check_quality_list(model_name, field_name, field.quality)
+            quality_list = check_quality_list(model_name, field_name, field.quality, quoting_config)
             if (quality_list is not None) and len(quality_list) > 0:
                 checks.extend(quality_list)
         # TODO references: str = None
@@ -70,8 +82,8 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
     return checks
-def checks_for(model_name, quote_field_name):
-    if quote_field_name:
+def checks_for(model_name, quote_model_name: bool):
+    if quote_model_name:
         return f'checks for "{model_name}"'
     return f"checks for {model_name}"
@@ -98,11 +110,11 @@ def to_model_name(model_key, model_value, server_type):
     return model_key
-def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Check:
+def check_field_is_present(model_name, field_name, quoting_config: QuotingConfig = QuotingConfig()) -> Check:
     check_type = "field_is_present"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 "schema": {
                     "name": check_key,
@@ -127,11 +139,13 @@ def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Ch
     )
-def check_field_type(model_name: str, field_name: str, expected_type: str, quote_field_name: bool = False):
+def check_field_type(
+    model_name: str, field_name: str, expected_type: str, quoting_config: QuotingConfig = QuotingConfig()
+):
     check_type = "field_type"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 "schema": {
                     "name": check_key,
@@ -158,8 +172,8 @@ def check_field_type(model_name: str, field_name: str, expected_type: str, quote
     )
-def check_field_required(model_name: str, field_name: str, quote_field_name: bool = False):
-    if quote_field_name:
+def check_field_required(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
+    if quoting_config.quote_field_name:
         field_name_for_soda = f'"{field_name}"'
     else:
         field_name_for_soda = field_name
@@ -167,7 +181,7 @@ def check_field_required(model_name: str, field_name: str, quote_field_name: boo
     check_type = "field_required"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 f"missing_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -189,8 +203,8 @@ def check_field_required(model_name: str, field_name: str, quote_field_name: boo
     )
-def check_field_unique(model_name: str, field_name: str, quote_field_name: bool = False):
-    if quote_field_name:
+def check_field_unique(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
+    if quoting_config.quote_field_name:
         field_name_for_soda = f'"{field_name}"'
     else:
         field_name_for_soda = field_name
@@ -198,7 +212,7 @@ def check_field_unique(model_name: str, field_name: str, quote_field_name: bool
     check_type = "field_unique"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 f"duplicate_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -220,8 +234,10 @@ def check_field_unique(model_name: str, field_name: str, quote_field_name: bool
     )
-def check_field_min_length(model_name: str, field_name: str, min_length: int, quote_field_name: bool = False):
-    if quote_field_name:
+def check_field_min_length(
+    model_name: str, field_name: str, min_length: int, quoting_config: QuotingConfig = QuotingConfig()
+):
+    if quoting_config.quote_field_name:
         field_name_for_soda = f'"{field_name}"'
     else:
         field_name_for_soda = field_name
@@ -229,7 +245,7 @@ def check_field_min_length(model_name: str, field_name: str, min_length: int, qu
     check_type = "field_min_length"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -252,8 +268,10 @@ def check_field_min_length(model_name: str, field_name: str, min_length: int, qu
     )
-def check_field_max_length(model_name: str, field_name: str, max_length: int, quote_field_name: bool = False):
-    if quote_field_name:
+def check_field_max_length(
+    model_name: str, field_name: str, max_length: int, quoting_config: QuotingConfig = QuotingConfig()
+):
+    if quoting_config.quote_field_name:
         field_name_for_soda = f'"{field_name}"'
     else:
         field_name_for_soda = field_name
@@ -261,7 +279,7 @@ def check_field_max_length(model_name: str, field_name: str, max_length: int, qu
     check_type = "field_max_length"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -284,8 +302,10 @@ def check_field_max_length(model_name: str, field_name: str, max_length: int, qu
     )
-def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_field_name: bool = False):
-    if quote_field_name:
+def check_field_minimum(
+    model_name: str, field_name: str, minimum: int, quoting_config: QuotingConfig = QuotingConfig()
+):
+    if quoting_config.quote_field_name:
         field_name_for_soda = f'"{field_name}"'
     else:
         field_name_for_soda = field_name
@@ -293,7 +313,7 @@ def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_fi
     check_type = "field_minimum"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -316,8 +336,10 @@ def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_fi
     )
-def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_field_name: bool = False):
-    if quote_field_name:
+def check_field_maximum(
+    model_name: str, field_name: str, maximum: int, quoting_config: QuotingConfig = QuotingConfig()
+):
+    if quoting_config.quote_field_name:
         field_name_for_soda = f'"{field_name}"'
     else:
         field_name_for_soda = field_name
@@ -325,7 +347,7 @@ def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_fi
     check_type = "field_maximum"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -348,8 +370,10 @@ def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_fi
     )
-def check_field_not_equal(model_name: str, field_name: str, value: int, quote_field_name: bool = False):
-    if quote_field_name:
+def check_field_not_equal(
+    model_name: str, field_name: str, value: int, quoting_config: QuotingConfig = QuotingConfig()
+):
+    if quoting_config.quote_field_name:
         field_name_for_soda = f'"{field_name}"'
     else:
         field_name_for_soda = field_name
@@ -357,7 +381,7 @@ def check_field_not_equal(model_name: str, field_name: str, value: int, quote_fi
     check_type = "field_not_equal"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -380,8 +404,8 @@ def check_field_not_equal(model_name: str, field_name: str, value: int, quote_fi
     )
-def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_name: bool = False):
-    if quote_field_name:
+def check_field_enum(model_name: str, field_name: str, enum: list, quoting_config: QuotingConfig = QuotingConfig()):
+    if quoting_config.quote_field_name:
         field_name_for_soda = f'"{field_name}"'
     else:
         field_name_for_soda = field_name
@@ -389,7 +413,7 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_n
     check_type = "field_enum"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -412,8 +436,8 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_n
     )
-def check_field_regex(model_name: str, field_name: str, pattern: str, quote_field_name: bool = False):
-    if quote_field_name:
+def check_field_regex(model_name: str, field_name: str, pattern: str, quoting_config: QuotingConfig = QuotingConfig()):
+    if quoting_config.quote_field_name:
         field_name_for_soda = f'"{field_name}"'
     else:
         field_name_for_soda = field_name
@@ -421,7 +445,7 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quote_fiel
     check_type = "field_regex"
     check_key = f"{model_name}__{field_name}__{check_type}"
     sodacl_check_dict = {
-        checks_for(model_name, quote_field_name): [
+        checks_for(model_name, quoting_config.quote_model_name): [
             {
                 f"invalid_count({field_name_for_soda}) = 0": {
                     "name": check_key,
@@ -444,7 +468,9 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quote_fiel
     )
-def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> List[Check]:
+def check_quality_list(
+    model_name, field_name, quality_list: List[Quality], quoting_config: QuotingConfig = QuotingConfig()
+) -> List[Check]:
     checks: List[Check] = []
     count = 0
@@ -457,15 +483,20 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
                 check_key = f"{model_name}__{field_name}__quality_sql_{count}"
                 check_type = "model_quality_sql"
             threshold = to_sodacl_threshold(quality)
-            query = prepare_query(quality, model_name, field_name)
+            query = prepare_query(quality, model_name, field_name, quoting_config)
             if query is None:
                 logger.warning(f"Quality check {check_key} has no query")
                 continue
             if threshold is None:
                 logger.warning(f"Quality check {check_key} has no valid threshold")
                 continue
+            if quoting_config.quote_model_name:
+                model_name_for_soda = f'"{model_name}"'
+            else:
+                model_name_for_soda = model_name
             sodacl_check_dict = {
-                f"checks for {model_name}": [
+                f"checks for {model_name_for_soda}": [
                     {
                         f"{check_key} {threshold}": {
                             f"{check_key} query": query,
@@ -493,7 +524,9 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
     return checks
-def prepare_query(quality: Quality, model_name: str, field_name: str = None) -> str | None:
+def prepare_query(
+    quality: Quality, model_name: str, field_name: str = None, quoting_config: QuotingConfig = QuotingConfig()
+) -> str | None:
     if quality.query is None:
         return None
     if quality.query == "":
@@ -501,14 +534,24 @@ def prepare_query(quality: Quality, model_name: str, field_name: str = None) ->
     query = quality.query
-    query = query.replace("{model}", model_name)
-    query = query.replace("{schema}", model_name)
-    query = query.replace("{table}", model_name)
+    if quoting_config.quote_field_name:
+        field_name_for_soda = f'"{field_name}"'
+    else:
+        field_name_for_soda = field_name
+    if quoting_config.quote_model_name:
+        model_name_for_soda = f'"{model_name}"'
+    else:
+        model_name_for_soda = model_name
+    query = re.sub(r'["\']?\{model}["\']?', model_name_for_soda, query)
+    query = re.sub(r'["\']?{schema}["\']?', model_name_for_soda, query)
+    query = re.sub(r'["\']?{table}["\']?', model_name_for_soda, query)
     if field_name is not None:
-        query = query.replace("{field}", field_name)
-        query = query.replace("{column}", field_name)
-        query = query.replace("{property}", field_name)
+        query = re.sub(r'["\']?{field}["\']?', field_name_for_soda, query)
+        query = re.sub(r'["\']?{column}["\']?', field_name_for_soda, query)
+        query = re.sub(r'["\']?{property}["\']?', field_name_for_soda, query)
     return query

datacontract/engines/data_contract_test.py CHANGED Viewed

@@ -1,5 +1,9 @@
+import atexit
+import os
+import tempfile
 import typing
+import requests
 from duckdb.duckdb import DuckDBPyConnection
 from datacontract.engines.data_contract_checks import create_checks
@@ -46,6 +50,9 @@ def execute_data_contract_test(
     run.outputPortId = server.outputPortId
     run.server = server_name
+    if server.type == "api":
+        server = process_api_response(run, server)
     run.checks.extend(create_checks(data_contract_specification, server))
     # TODO check server is supported type for nicer error messages
@@ -74,3 +81,33 @@ def get_server(data_contract_specification: DataContractSpecification, server_na
         server_name = list(data_contract_specification.servers.keys())[0]
         server = data_contract_specification.servers.get(server_name)
     return server
+def process_api_response(run, server):
+    tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract_cli_api_")
+    atexit.register(tmp_dir.cleanup)
+    headers = {}
+    if os.getenv("DATACONTRACT_API_HEADER_AUTHORIZATION") is not None:
+        headers["Authorization"] = os.getenv("DATACONTRACT_API_HEADER_AUTHORIZATION")
+    try:
+        response = requests.get(server.location, headers=headers)
+        response.raise_for_status()
+    except requests.exceptions.RequestException as e:
+        raise DataContractException(
+            type="connection",
+            name="API server connection error",
+            result=ResultEnum.error,
+            reason=f"Failed to fetch API response from {server.location}: {e}",
+            engine="datacontract",
+        )
+    with open(f"{tmp_dir.name}/api_response.json", "w") as f:
+        f.write(response.text)
+    run.log_info(f"Saved API response to {tmp_dir.name}/api_response.json")
+    server = Server(
+        type="local",
+        format="json",
+        path=f"{tmp_dir.name}/api_response.json",
+        dataProductId=server.dataProductId,
+        outputPortId=server.outputPortId,
+    )
+    return server

datacontract/engines/fastjsonschema/check_jsonschema.py CHANGED Viewed

@@ -159,6 +159,14 @@ def process_json_file(run, schema, model_name, validate, file, delimiter):
 def process_local_file(run, server, schema, model_name, validate):
     path = server.path
+    if not path:
+        raise DataContractException(
+            type="schema",
+            name="Check that JSON has valid schema",
+            result=ResultEnum.warning,
+            reason="For server with type 'local', a 'path' must be defined.",
+            engine="datacontract",
+        )
     if "{model}" in path:
         path = path.format(model=model_name)

datacontract/engines/soda/check_soda_execute.py CHANGED Viewed

@@ -2,6 +2,8 @@ import logging
 import typing
 import uuid
+from datacontract.engines.soda.connections.athena import to_athena_soda_configuration
 if typing.TYPE_CHECKING:
     from pyspark.sql import SparkSession
@@ -106,6 +108,10 @@ def check_soda_execute(
         soda_configuration_str = to_trino_soda_configuration(server)
         scan.add_configuration_yaml_str(soda_configuration_str)
         scan.set_data_source_name(server.type)
+    elif server.type == "athena":
+        soda_configuration_str = to_athena_soda_configuration(server)
+        scan.add_configuration_yaml_str(soda_configuration_str)
+        scan.set_data_source_name(server.type)
     else:
         run.checks.append(

datacontract/engines/soda/connections/athena.py ADDED Viewed

@@ -0,0 +1,79 @@
+import os
+import yaml
+from datacontract.model.exceptions import DataContractException
+def to_athena_soda_configuration(server):
+    s3_region = os.getenv("DATACONTRACT_S3_REGION")
+    s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
+    s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
+    s3_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
+    # Validate required parameters
+    if not s3_access_key_id:
+        raise DataContractException(
+            type="athena-connection",
+            name="missing_access_key_id",
+            reason="AWS access key ID is required. Set the DATACONTRACT_S3_ACCESS_KEY_ID environment variable.",
+            engine="datacontract",
+        )
+    if not s3_secret_access_key:
+        raise DataContractException(
+            type="athena-connection",
+            name="missing_secret_access_key",
+            reason="AWS secret access key is required. Set the DATACONTRACT_S3_SECRET_ACCESS_KEY environment variable.",
+            engine="datacontract",
+        )
+    if not hasattr(server, "schema_") or not server.schema_:
+        raise DataContractException(
+            type="athena-connection",
+            name="missing_schema",
+            reason="Schema is required for Athena connection. Specify the schema where your tables exist in the server configuration.",
+            engine="datacontract",
+        )
+    if not hasattr(server, "stagingDir") or not server.stagingDir:
+        raise DataContractException(
+            type="athena-connection",
+            name="missing_s3_staging_dir",
+            reason="S3 staging directory is required for Athena connection. This should be the Amazon S3 Query Result Location (e.g., 's3://my-bucket/athena-results/').",
+            engine="datacontract",
+        )
+    # Validate S3 staging directory format
+    if not server.stagingDir.startswith("s3://"):
+        raise DataContractException(
+            type="athena-connection",
+            name="invalid_s3_staging_dir",
+            reason=f"S3 staging directory must start with 's3://'. Got: {server.s3_staging_dir}. Example: 's3://my-bucket/athena-results/'",
+            engine="datacontract",
+        )
+    data_source = {
+        "type": "athena",
+        "access_key_id": s3_access_key_id,
+        "secret_access_key": s3_secret_access_key,
+        "schema": server.schema_,
+        "staging_dir": server.stagingDir,
+    }
+    if s3_region:
+        data_source["region_name"] = s3_region
+    elif server.region_name:
+        data_source["region_name"] = server.region_name
+    if server.catalog:
+        # Optional, Identify the name of the Data Source, also referred to as a Catalog. The default value is `awsdatacatalog`.
+        data_source["catalog"] = server.catalog
+    if s3_session_token:
+        data_source["aws_session_token"] = s3_session_token
+    soda_configuration = {f"data_source {server.type}": data_source}
+    soda_configuration_str = yaml.dump(soda_configuration)
+    return soda_configuration_str

datacontract/engines/soda/connections/duckdb_connection.py CHANGED Viewed

@@ -71,6 +71,9 @@ def get_duckdb_connection(
         elif server.format == "delta":
             con.sql("update extensions;")  # Make sure we have the latest delta extension
             con.sql(f"""CREATE VIEW "{model_name}" AS SELECT * FROM delta_scan('{model_path}');""")
+        table_info = con.sql(f"PRAGMA table_info('{model_name}');").fetchdf()
+        if table_info is not None and not table_info.empty:
+            run.log_info(f"DuckDB Table Info: {table_info.to_string(index=False)}")
     return con

datacontract/export/avro_converter.py CHANGED Viewed

@@ -44,12 +44,18 @@ def to_avro_field(field, field_name):
     avro_type = to_avro_type(field, field_name)
     avro_field["type"] = avro_type if is_required_avro else ["null", avro_type]
-    if avro_field["type"] == "enum":
-        avro_field["type"] = {
+    # Handle enum types - both required and optional
+    if avro_type == "enum" or (isinstance(avro_field["type"], list) and "enum" in avro_field["type"]):
+        enum_def = {
             "type": "enum",
             "name": field.title,
             "symbols": field.enum,
         }
+        if is_required_avro:
+            avro_field["type"] = enum_def
+        else:
+            # Replace "enum" with the full enum definition in the union
+            avro_field["type"] = ["null", enum_def]
     if field.config:
         if "avroDefault" in field.config:
@@ -77,6 +83,10 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
         if "avroType" in field.config:
             return field.config["avroType"]
+    # Check for enum fields based on presence of enum list and avroType config
+    if field.enum and field.config and field.config.get("avroType") == "enum":
+        return "enum"
     if field.type is None:
         return "null"
     if field.type in ["string", "varchar", "text"]:

datacontract-cli 0.10.32__py3-none-any.whl → 0.10.34__py3-none-any.whl

Potentially problematic release.

datacontract-cli 0.10.32py3-none-any.whl → 0.10.34py3-none-any.whl