PyPI - datacontract-cli - Versions diffs - 0.10.18__py3-none-any.whl → 0.10.19__py3-none-any.whl - Mend

datacontract-cli 0.10.18py3-none-any.whl → 0.10.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datacontract-cli might be problematic. Click here for more details.

Files changed (28) hide show

datacontract/cli.py +20 -27
datacontract/data_contract.py +7 -8
datacontract/engines/soda/connections/duckdb.py +22 -9
datacontract/export/data_caterer_converter.py +20 -7
datacontract/export/sodacl_converter.py +21 -4
datacontract/export/sql_type_converter.py +7 -2
datacontract/imports/csv_importer.py +89 -0
datacontract/imports/importer.py +1 -0
datacontract/imports/importer_factory.py +5 -0
datacontract/init/init_template.py +20 -0
datacontract/integration/datamesh_manager.py +5 -10
datacontract/lint/linters/field_reference_linter.py +10 -1
datacontract/lint/resolve.py +22 -1
datacontract/lint/schema.py +10 -3
datacontract/model/data_contract_specification.py +2 -0
datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
datacontract/templates/datacontract.html +20 -1
datacontract/templates/partials/definition.html +15 -5
datacontract/templates/partials/model_field.html +9 -0
{datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/METADATA +445 -295
{datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/RECORD +27 -23
datacontract/init/download_datacontract_file.py +0 -17
{datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/LICENSE +0 -0
{datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/WHEEL +0 -0
{datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/entry_points.txt +0 -0
{datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/top_level.txt +0 -0

datacontract/cli.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import os
 from importlib import metadata
 from pathlib import Path
 from typing import Iterable, List, Optional
@@ -15,15 +16,11 @@ from datacontract import web
 from datacontract.catalog.catalog import create_data_contract_html, create_index_html
 from datacontract.data_contract import DataContract, ExportFormat
 from datacontract.imports.importer import ImportFormat
-from datacontract.init.download_datacontract_file import (
-    FileExistsException,
-    download_datacontract_file,
-)
+from datacontract.init.init_template import get_init_template
 from datacontract.integration.datamesh_manager import (
     publish_data_contract_to_datamesh_manager,
 )
-DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
+from datacontract.lint.resolve import resolve_data_contract_dict
 console = Console()
@@ -70,24 +67,21 @@ def common(
 @app.command()
 def init(
     location: Annotated[
-        str,
-        typer.Argument(help="The location (url or path) of the data contract yaml to create."),
+        str, typer.Argument(help="The location of the data contract file to create.")
     ] = "datacontract.yaml",
-    template: Annotated[
-        str, typer.Option(help="URL of a template or data contract")
-    ] = "https://datacontract.com/datacontract.init.yaml",
+    template: Annotated[str, typer.Option(help="URL of a template or data contract")] = None,
     overwrite: Annotated[bool, typer.Option(help="Replace the existing datacontract.yaml")] = False,
 ):
     """
-    Download a datacontract.yaml template and write it to file.
+    Create an empty data contract.
     """
-    try:
-        download_datacontract_file(location, template, overwrite)
-    except FileExistsException:
+    if not overwrite and os.path.exists(location):
         console.print("File already exists, use --overwrite to overwrite")
         raise typer.Exit(code=1)
-    else:
-        console.print("📄 data contract written to " + location)
+    template_str = get_init_template(template)
+    with open(location, "w") as f:
+        f.write(template_str)
+    console.print("📄 data contract written to " + location)
 @app.command()
@@ -99,7 +93,7 @@ def lint(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
 ):
     """
     Validate that the datacontract.yaml is correctly formatted.
@@ -117,7 +111,7 @@ def test(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
     server: Annotated[
         str,
         typer.Option(
@@ -135,7 +129,7 @@ def test(
     logs: Annotated[bool, typer.Option(help="Print logs")] = False,
     ssl_verification: Annotated[
         bool,
-        typer.Option(help="SSL verification when publishing the test results."),
+        typer.Option(help="SSL verification when publishing the data contract."),
     ] = True,
 ):
     """
@@ -150,6 +144,7 @@ def test(
         publish_url=publish,
         server=server,
         examples=examples,
+        ssl_verification=ssl_verification,
     ).test()
     if logs:
         _print_logs(run)
@@ -197,7 +192,7 @@ def export(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
     # TODO: this should be a subcommand
     engine: Annotated[
         Optional[str],
@@ -284,7 +279,7 @@ def import_(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
 ):
     """
     Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
@@ -321,7 +316,7 @@ def publish(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
     ssl_verification: Annotated[
         bool,
         typer.Option(help="SSL verification when publishing the data contract."),
@@ -331,9 +326,7 @@ def publish(
     Publish the data contract to the Data Mesh Manager.
     """
     publish_data_contract_to_datamesh_manager(
-        data_contract_specification=DataContract(
-            data_contract_file=location, schema_location=schema
-        ).get_data_contract_specification(),
+        data_contract_dict=resolve_data_contract_dict(location),
         ssl_verification=ssl_verification,
     )
@@ -350,7 +343,7 @@ def catalog(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
 ):
     """
     Create an html catalog of data contracts.

datacontract/data_contract.py CHANGED Viewed

@@ -22,6 +22,7 @@ from datacontract.engines.soda.check_soda_execute import check_soda_execute
 from datacontract.export.exporter import ExportFormat
 from datacontract.export.exporter_factory import exporter_factory
 from datacontract.imports.importer_factory import importer_factory
+from datacontract.init.init_template import get_init_template
 from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
 from datacontract.lint import resolve
 from datacontract.lint.linters.description_linter import DescriptionLinter
@@ -36,8 +37,6 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
 from datacontract.model.exceptions import DataContractException
 from datacontract.model.run import Check, Run
-DEFAULT_DATA_CONTRACT_TEMPLATE_URL = "https://datacontract.com/datacontract.init.yaml"
 class DataContract:
     def __init__(
@@ -52,6 +51,7 @@ class DataContract:
         spark: "SparkSession" = None,
         inline_definitions: bool = True,
         inline_quality: bool = True,
+        ssl_verification: bool = True,
     ):
         self._data_contract_file = data_contract_file
         self._data_contract_str = data_contract_str
@@ -63,6 +63,7 @@ class DataContract:
         self._spark = spark
         self._inline_definitions = inline_definitions
         self._inline_quality = inline_quality
+        self._ssl_verification = ssl_verification
         self.all_linters = {
             ExampleModelLinter(),
             QualityUsesSchemaLinter(),
@@ -74,10 +75,9 @@ class DataContract:
         }
     @classmethod
-    def init(
-        cls, template: str = DEFAULT_DATA_CONTRACT_TEMPLATE_URL, schema: typing.Optional[str] = None
-    ) -> DataContractSpecification:
-        return resolve.resolve_data_contract(data_contract_location=template, schema_location=schema)
+    def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification:
+        template_str = get_init_template(template)
+        return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema)
     def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
         """Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
@@ -231,7 +231,7 @@ class DataContract:
         run.finish()
         if self._publish_url is not None:
-            publish_test_results_to_datamesh_manager(run, self._publish_url)
+            publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
         return run
@@ -352,7 +352,6 @@ class DataContract:
         schema: typing.Optional[str] = None,
         **kwargs,
     ) -> DataContractSpecification:
-        template = DEFAULT_DATA_CONTRACT_TEMPLATE_URL if template is None else template
         data_contract_specification_initial = DataContract.init(template=template, schema=schema)
         return importer_factory.create(format).import_source(

datacontract/engines/soda/connections/duckdb.py CHANGED Viewed

@@ -146,6 +146,7 @@ def setup_azure_connection(con, server):
     tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
     client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")
     client_secret = os.getenv("DATACONTRACT_AZURE_CLIENT_SECRET")
+    storage_account = server.storageAccount
     if tenant_id is None:
         raise ValueError("Error: Environment variable DATACONTRACT_AZURE_TENANT_ID is not set")
@@ -157,12 +158,24 @@ def setup_azure_connection(con, server):
     con.install_extension("azure")
     con.load_extension("azure")
-    con.sql(f"""
-    CREATE SECRET azure_spn (
-        TYPE AZURE,
-        PROVIDER SERVICE_PRINCIPAL,
-        TENANT_ID '{tenant_id}',
-        CLIENT_ID '{client_id}',
-        CLIENT_SECRET '{client_secret}'
-    );
-    """)
+    if storage_account is not None:
+        con.sql(f"""
+        CREATE SECRET azure_spn (
+            TYPE AZURE,
+            PROVIDER SERVICE_PRINCIPAL,
+            TENANT_ID '{tenant_id}',
+            CLIENT_ID '{client_id}',
+            CLIENT_SECRET '{client_secret}',
+            ACCOUNT_NAME '{storage_account}'
+        );
+        """)
+    else:
+        con.sql(f"""
+        CREATE SECRET azure_spn (
+            TYPE AZURE,
+            PROVIDER SERVICE_PRINCIPAL,
+            TENANT_ID '{tenant_id}',
+            CLIENT_ID '{client_id}',
+            CLIENT_SECRET '{client_secret}'
+        );
+        """)

datacontract/export/data_caterer_converter.py CHANGED Viewed

@@ -42,11 +42,11 @@ def _to_data_caterer_generate_step(model_key, model_value: Model, server: Server
         "name": model_key,
         "type": _to_step_type(server),
         "options": _to_data_source_options(model_key, server),
-        "schema": [],
+        "fields": [],
     }
     fields = _to_fields(model_value.fields)
     if fields:
-        step["schema"] = fields
+        step["fields"] = fields
     return step
@@ -97,16 +97,29 @@ def _to_field(field_name: str, field: Field) -> dict:
         if new_type == "object" or new_type == "record" or new_type == "struct":
             # need to get nested field definitions
             nested_fields = _to_fields(field.fields)
-            dc_field["schema"] = {"fields": nested_fields}
+            dc_field["fields"] = nested_fields
+        elif new_type == "array":
+            if field.items is not None and field.items.type is not None:
+                dc_generator_opts["arrayType"] = _to_data_type(field.items.type)
+            else:
+                dc_generator_opts["arrayType"] = "string"
     if field.enum is not None and len(field.enum) > 0:
         dc_generator_opts["oneOf"] = field.enum
     if field.unique is not None and field.unique:
         dc_generator_opts["isUnique"] = field.unique
+    if field.primaryKey is not None and field.primaryKey:
+        dc_generator_opts["isPrimaryKey"] = field.primaryKey
     if field.minLength is not None:
-        dc_generator_opts["minLength"] = field.minLength
+        if field.type is not None and field.type == "array":
+            dc_generator_opts["arrayMinLen"] = field.minLength
+        else:
+            dc_generator_opts["minLen"] = field.minLength
     if field.maxLength is not None:
-        dc_generator_opts["maxLength"] = field.maxLength
+        if field.type is not None and field.type == "array":
+            dc_generator_opts["arrayMaxLen"] = field.maxLength
+        else:
+            dc_generator_opts["maxLen"] = field.maxLength
     if field.pattern is not None:
         dc_generator_opts["regex"] = field.pattern
     if field.minimum is not None:
@@ -115,7 +128,7 @@ def _to_field(field_name: str, field: Field) -> dict:
         dc_generator_opts["max"] = field.maximum
     if len(dc_generator_opts.keys()) > 0:
-        dc_field["generator"] = {"options": dc_generator_opts}
+        dc_field["options"] = dc_generator_opts
     return dc_field
@@ -124,7 +137,7 @@ def _to_data_type(data_type):
         return "double"
     elif data_type == "decimal" or data_type == "bigint":
         return "decimal"
-    elif data_type == "int":
+    elif data_type == "int" or data_type == "integer":
         return "integer"
     elif data_type == "long":
         return "long"

datacontract/export/sodacl_converter.py CHANGED Viewed

@@ -30,6 +30,7 @@ def to_sodacl_yaml(
 def to_checks(model_key, model_value, server_type: str, check_types: bool):
     checks = []
+    model_name = to_model_name(model_key, model_value, server_type)
     fields = model_value.fields
     quote_field_name = server_type in ["postgres", "sqlserver"]
@@ -62,25 +63,41 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
         if field.enum is not None and len(field.enum) > 0:
             checks.append(check_field_enum(field_name, field.enum, quote_field_name))
         if field.quality is not None and len(field.quality) > 0:
-            quality_list = check_quality_list(model_key, field_name, field.quality)
+            quality_list = check_quality_list(model_name, field_name, field.quality)
             if (quality_list is not None) and len(quality_list) > 0:
                 checks.append(quality_list)
         # TODO references: str = None
         # TODO format
     if model_value.quality is not None and len(model_value.quality) > 0:
-        quality_list = check_quality_list(model_key, None, model_value.quality)
+        quality_list = check_quality_list(model_name, None, model_value.quality)
         if (quality_list is not None) and len(quality_list) > 0:
             checks.append(quality_list)
-    checks_for_model_key = f"checks for {model_key}"
+    checks_for_model_key = f"checks for {model_name}"
     if quote_field_name:
-        checks_for_model_key = f'checks for "{model_key}"'
+        checks_for_model_key = f'checks for "{model_name}"'
     return checks_for_model_key, checks
+def to_model_name(model_key, model_value, server_type):
+    if server_type == "databricks":
+        if model_value.config is not None and "databricksTable" in model_value.config:
+            return model_value.config["databricksTable"]
+    if server_type == "snowflake":
+        if model_value.config is not None and "snowflakeTable" in model_value.config:
+            return model_value.config["snowflakeTable"]
+    if server_type == "sqlserver":
+        if model_value.config is not None and "sqlserverTable" in model_value.config:
+            return model_value.config["sqlserverTable"]
+    if server_type == "postgres" or server_type == "postgresql":
+        if model_value.config is not None and "postgresTable" in model_value.config:
+            return model_value.config["postgresTable"]
+    return model_key
 def check_field_is_present(field_name):
     return {
         "schema": {

datacontract/export/sql_type_converter.py CHANGED Viewed

@@ -182,11 +182,16 @@ def convert_to_databricks(field: Field) -> None | str:
     if type.lower() in ["boolean"]:
         return "BOOLEAN"
     if type.lower() in ["object", "record", "struct"]:
-        return "STRUCT"
+        nested_fields = []
+        for nested_field_name, nested_field in field.fields.items():
+            nested_field_type = convert_to_databricks(nested_field)
+            nested_fields.append(f"{nested_field_name} {nested_field_type}")
+        return f"STRUCT<{', '.join(nested_fields)}>"
     if type.lower() in ["bytes"]:
         return "BINARY"
     if type.lower() in ["array"]:
-        return "ARRAY"
+        item_type = convert_to_databricks(field.items)
+        return f"ARRAY<{item_type}>"
     return None

datacontract/imports/csv_importer.py ADDED Viewed

@@ -0,0 +1,89 @@
+import os
+import clevercsv
+from datacontract.imports.importer import Importer
+from datacontract.model.data_contract_specification import DataContractSpecification, Example, Field, Model, Server
+class CsvImporter(Importer):
+    def import_source(
+        self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
+    ) -> DataContractSpecification:
+        return import_csv(data_contract_specification, self.import_format, source)
+def import_csv(data_contract_specification: DataContractSpecification, format: str, source: str):
+    include_example = False
+    # detect encoding and dialect
+    encoding = clevercsv.encoding.get_encoding(source)
+    with open(source, "r", newline="") as fp:
+        dialect = clevercsv.Sniffer().sniff(fp.read(10000))
+    # using auto detecting of the format and encoding
+    df = clevercsv.read_dataframe(source)
+    if data_contract_specification.models is None:
+        data_contract_specification.models = {}
+    # use the file name as table name
+    table_name = os.path.splitext(os.path.basename(source))[0]
+    if data_contract_specification.servers is None:
+        data_contract_specification.servers = {}
+    data_contract_specification.servers["production"] = Server(
+        type="local", path=source, format="csv", delimiter=dialect.delimiter
+    )
+    fields = {}
+    for column, dtype in df.dtypes.items():
+        field = Field()
+        field.type = map_type_from_pandas(dtype.name)
+        fields[column] = field
+    data_contract_specification.models[table_name] = Model(
+        type="table",
+        description=f"Csv file with encoding {encoding}",
+        fields=fields,
+    )
+    # multiline data is not correctly handled by yaml dump
+    if include_example:
+        if data_contract_specification.examples is None:
+            data_contract_specification.examples = []
+        # read first 10 lines with the detected encoding
+        with open(source, "r", encoding=encoding) as csvfile:
+            lines = csvfile.readlines()[:10]
+        data_contract_specification.examples.append(Example(type="csv", model=table_name, data="".join(lines)))
+    return data_contract_specification
+def map_type_from_pandas(sql_type: str):
+    if sql_type is None:
+        return None
+    sql_type_normed = sql_type.lower().strip()
+    if sql_type_normed == "object":
+        return "string"
+    elif sql_type_normed.startswith("str"):
+        return "string"
+    elif sql_type_normed.startswith("int"):
+        return "integer"
+    elif sql_type_normed.startswith("float"):
+        return "float"
+    elif sql_type_normed.startswith("bool"):
+        return "boolean"
+    elif sql_type_normed.startswith("timestamp"):
+        return "timestamp"
+    elif sql_type_normed == "datetime64":
+        return "date"
+    elif sql_type_normed == "timedelta[ns]":
+        return "timestamp_ntz"
+    else:
+        return "variant"

datacontract/imports/importer.py CHANGED Viewed

@@ -31,6 +31,7 @@ class ImportFormat(str, Enum):
     spark = "spark"
     iceberg = "iceberg"
     parquet = "parquet"
+    csv = "csv"
     @classmethod
     def get_supported_formats(cls):

datacontract/imports/importer_factory.py CHANGED Viewed

@@ -104,3 +104,8 @@ importer_factory.register_lazy_importer(
     module_path="datacontract.imports.parquet_importer",
     class_name="ParquetImporter",
 )
+importer_factory.register_lazy_importer(
+    name=ImportFormat.csv,
+    module_path="datacontract.imports.csv_importer",
+    class_name="CsvImporter",
+)

datacontract/init/init_template.py ADDED Viewed

@@ -0,0 +1,20 @@
+import importlib.resources as resources
+import logging
+import requests
+DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.1.0.init.yaml"
+def get_init_template(location: str = None) -> str:
+    if location is None:
+        logging.info("Use default bundled template " + DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
+        schemas = resources.files("datacontract")
+        template = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
+        with template.open("r") as file:
+            return file.read()
+    elif location.startswith("http://") or location.startswith("https://"):
+        return requests.get(location).text
+    else:
+        with open(location, "r") as file:
+            return file.read()

datacontract/integration/datamesh_manager.py CHANGED Viewed

@@ -2,11 +2,10 @@ import os
 import requests
-from datacontract.model.data_contract_specification import DataContractSpecification
 from datacontract.model.run import Run
-def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
+def publish_test_results_to_datamesh_manager(run: Run, publish_url: str, ssl_verification: bool):
     try:
         if publish_url is None:
             # this url supports Data Mesh Manager and Data Contract Manager
@@ -32,7 +31,7 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
             url,
             data=request_body,
             headers=headers,
-            verify=False,
+            verify=ssl_verification,
         )
         # print("Status Code:", response.status_code)
         # print("Response Body:", response.text)
@@ -44,9 +43,7 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
         run.log_error(f"Failed publishing test results. Error: {str(e)}")
-def publish_data_contract_to_datamesh_manager(
-    data_contract_specification: DataContractSpecification, ssl_verification: bool
-):
+def publish_data_contract_to_datamesh_manager(data_contract_dict: dict, ssl_verification: bool):
     try:
         api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
         host = "https://api.datamesh-manager.com"
@@ -59,13 +56,11 @@ def publish_data_contract_to_datamesh_manager(
                 "Cannot publish data contract, as neither DATAMESH_MANAGER_API_KEY nor DATACONTRACT_MANAGER_API_KEY is set"
             )
         headers = {"Content-Type": "application/json", "x-api-key": api_key}
-        spec = data_contract_specification
-        id = spec.id
+        id = data_contract_dict["id"]
         url = f"{host}/api/datacontracts/{id}"
-        request_body = spec.model_dump_json().encode("utf-8")
         response = requests.put(
             url=url,
-            data=request_body,
+            json=data_contract_dict,
             headers=headers,
             verify=ssl_verification,
         )

datacontract/lint/linters/field_reference_linter.py CHANGED Viewed

@@ -22,7 +22,16 @@ class FieldReferenceLinter(Linter):
         for model_name, model in contract.models.items():
             for field_name, field in model.fields.items():
                 if field.references:
-                    (ref_model, ref_field) = field.references.split(".", maxsplit=2)
+                    reference_hierarchy = field.references.split(".")
+                    if len(reference_hierarchy) != 2:
+                        result = result.with_error(
+                            f"Field '{field_name}' in model '{model_name}'"
+                            f" references must follow the model.field syntax and refer to a field in a model in this data contract."
+                        )
+                        continue
+                    ref_model = reference_hierarchy[0]
+                    ref_field = reference_hierarchy[1]
                     if ref_model not in contract.models:
                         result = result.with_error(
                             f"Field '{field_name}' in model '{model_name}'"

datacontract/lint/resolve.py CHANGED Viewed

@@ -44,6 +44,27 @@ def resolve_data_contract(
         )
+def resolve_data_contract_dict(
+    data_contract_location: str = None,
+    data_contract_str: str = None,
+    data_contract: DataContractSpecification = None,
+) -> dict:
+    if data_contract_location is not None:
+        return _to_yaml(read_resource(data_contract_location))
+    elif data_contract_str is not None:
+        return _to_yaml(data_contract_str)
+    elif data_contract is not None:
+        return data_contract.model_dump()
+    else:
+        raise DataContractException(
+            type="lint",
+            result="failed",
+            name="Check that data contract YAML is valid",
+            reason="Data contract needs to be provided",
+            engine="datacontract",
+        )
 def resolve_data_contract_from_location(
     location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
 ) -> DataContractSpecification:
@@ -231,7 +252,7 @@ def _resolve_data_contract_from_str(
     return spec
-def _to_yaml(data_contract_str):
+def _to_yaml(data_contract_str) -> dict:
     try:
         yaml_dict = yaml.safe_load(data_contract_str)
         return yaml_dict

datacontract/lint/schema.py CHANGED Viewed

@@ -1,4 +1,6 @@
+import importlib.resources as resources
 import json
+import logging
 import os
 from typing import Any, Dict
@@ -6,6 +8,8 @@ import requests
 from datacontract.model.exceptions import DataContractException
+DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.1.0.schema.json"
 def fetch_schema(location: str = None) -> Dict[str, Any]:
     """
@@ -27,9 +31,12 @@ def fetch_schema(location: str = None) -> Dict[str, Any]:
     """
     if location is None:
-        location = "https://datacontract.com/datacontract.schema.json"
-    if location.startswith("http://") or location.startswith("https://"):
+        logging.info("Use default bundled schema " + DEFAULT_DATA_CONTRACT_SCHEMA)
+        schemas = resources.files("datacontract")
+        schema_file = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_SCHEMA)
+        with schema_file.open("r") as file:
+            schema = json.load(file)
+    elif location.startswith("http://") or location.startswith("https://"):
         response = requests.get(location)
         schema = response.json()
     else:

datacontract/model/data_contract_specification.py CHANGED Viewed

@@ -72,6 +72,7 @@ class Server(pyd.BaseModel):
     dataProductId: str = None
     outputPortId: str = None
     driver: str = None
+    storageAccount: str = None
     roles: List[ServerRole] = None
     model_config = pyd.ConfigDict(
@@ -112,6 +113,7 @@ class Definition(pyd.BaseModel):
     tags: List[str] = []
     links: Dict[str, str] = {}
     example: str = None
+    examples: List[Any] | None = None
     model_config = pyd.ConfigDict(
         extra="allow",

datacontract-cli 0.10.18__py3-none-any.whl → 0.10.19__py3-none-any.whl

Potentially problematic release.

datacontract-cli 0.10.18py3-none-any.whl → 0.10.19py3-none-any.whl