datacontract-cli 0.11.2__tar.gz → 0.11.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datacontract_cli-0.11.2/datacontract_cli.egg-info → datacontract_cli-0.11.4}/PKG-INFO +18 -16
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/README.md +2 -2
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/api.py +180 -38
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/catalog/catalog.py +1 -1
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/cli.py +5 -5
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/data_contract.py +2 -3
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/data_contract_checks.py +1 -1
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/data_contract_test.py +2 -2
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/check_soda_execute.py +2 -2
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/duckdb_connection.py +89 -25
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/bigquery_exporter.py +2 -2
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/pydantic_exporter.py +15 -4
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/spark_exporter.py +32 -7
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/csv_importer.py +5 -1
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/dcs_importer.py +67 -24
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/init/init_template.py +1 -1
- datacontract_cli-0.11.4/datacontract/schemas/odcs-3.1.0.init.yaml +36 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/datacontract_odcs.html +2 -2
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/index.html +27 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4/datacontract_cli.egg-info}/PKG-INFO +18 -16
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/SOURCES.txt +2 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/requires.txt +16 -13
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/pyproject.toml +18 -14
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_download_datacontract_file.py +18 -3
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_pydantic.py +17 -1
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_rdf.py +3 -0
- datacontract_cli-0.11.4/tests/test_test_schema_evolution.py +110 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/LICENSE +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/MANIFEST.in +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/__init__.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/__init__.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/datacontract/check_that_datacontract_file_exists.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/fastjsonschema/check_jsonschema.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/fastjsonschema/s3/s3_read_files.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/__init__.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/athena.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/bigquery.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/databricks.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/impala.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/kafka.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/oracle.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/postgres.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/snowflake.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/sqlserver.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/trino.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/__init__.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/avro_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/avro_idl_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/custom_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/data_caterer_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/dbml_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/dbt_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/dcs_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/dqx_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/duckdb_type_converter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/excel_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/exporter_factory.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/go_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/great_expectations_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/html_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/iceberg_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/jsonschema_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/markdown_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/mermaid_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/odcs_export_helper.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/odcs_v3_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/pandas_type_converter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/protobuf_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/rdf_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/sodacl_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/sql_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/sql_type_converter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/sqlalchemy_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/avro_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/bigquery_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/dbml_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/dbt_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/excel_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/glue_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/iceberg_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/importer_factory.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/json_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/jsonschema_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/odcs_helper.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/odcs_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/parquet_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/protobuf_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/spark_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/sql_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/unity_importer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/integration/entropy_data.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/files.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/resolve.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/resources.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/schema.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/urls.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/model/exceptions.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/model/odcs.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/model/run.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/output/__init__.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/output/junit_test_results.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/output/output_format.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/output/test_results_writer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/py.typed +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.1.0.init.yaml +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.1.0.schema.json +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.2.0.init.yaml +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.2.0.schema.json +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.2.1.init.yaml +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.2.1.schema.json +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/odcs-3.0.1.schema.json +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/odcs-3.0.2.schema.json +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/odcs-3.1.0.schema.json +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/datacontract.html +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/datacontract_information.html +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/datacontract_servicelevels.html +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/datacontract_terms.html +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/definition.html +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/example.html +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/model_field.html +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/quality.html +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/server.html +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/style/output.css +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/dependency_links.txt +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/entry_points.txt +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/top_level.txt +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/setup.cfg +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_api.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_catalog.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_cli.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_data_contract_checks.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_data_contract_specification.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_description_linter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_duckdb_json.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_avro.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_avro_idl.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_bigquery.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_complex_data_contract.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_custom.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_custom_exporter.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_data_caterer.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dbml.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dbt_models.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dbt_sources.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dbt_staging_sql.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dqx.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_excel.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_go.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_great_expectations.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_html.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_iceberg.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_jsonschema.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_markdown.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_mermaid.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_odcs_v3.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_protobuf.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_sodacl.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_spark.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_sql.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_sql_query.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_sqlalchemy.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_avro.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_bigquery.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_csv.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_dbml.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_dbt.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_excel.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_glue.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_iceberg.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_json.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_jsonschema.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_odcs_v3.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_parquet.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_protobuf.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_spark.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_sql_oracle.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_sql_postgres.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_sql_sqlserver.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_unity_file.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_integration_entropydata.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_lint.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_resolve.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_roundtrip_jsonschema.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_api.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_athena_iceberg.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_azure_remote.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_bigquery.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_databricks.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_dataframe.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_delta.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_gcs_csv_remote.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_gcs_json_remote.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_kafka.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_kafka_remote.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_local_json.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_local_json_nd.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_oracle.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_output_junit.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_parquet.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_postgres.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_quality.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_csv.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_delta.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_json.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_json_complex.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_json_multiple_models.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_json_remote.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_snowflake.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_sqlserver.py +0 -0
- {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_trino.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.11.
|
|
3
|
+
Version: 0.11.4
|
|
4
4
|
Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -11,25 +11,22 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: <3.13,>=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: typer<0.
|
|
14
|
+
Requires-Dist: typer<0.22,>=0.15.1
|
|
15
15
|
Requires-Dist: pydantic<2.13.0,>=2.8.2
|
|
16
16
|
Requires-Dist: pyyaml~=6.0.1
|
|
17
17
|
Requires-Dist: requests<2.33,>=2.31
|
|
18
18
|
Requires-Dist: fastjsonschema<2.22.0,>=2.19.1
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
Requires-Dist: numpy<2.0.0,>=1.26.4
|
|
19
|
+
Requires-Dist: pytz>=2024.1
|
|
21
20
|
Requires-Dist: python-multipart<1.0.0,>=0.0.20
|
|
22
21
|
Requires-Dist: rich<15.0,>=13.7
|
|
23
22
|
Requires-Dist: sqlglot<29.0.0,>=26.6.0
|
|
24
|
-
Requires-Dist: duckdb<1.4.0,>=1.0.0
|
|
25
|
-
Requires-Dist: soda-core-duckdb<3.6.0,>=3.3.20
|
|
26
23
|
Requires-Dist: setuptools>=60
|
|
27
24
|
Requires-Dist: python-dotenv<2.0.0,>=1.0.0
|
|
28
25
|
Requires-Dist: boto3<2.0.0,>=1.34.41
|
|
29
26
|
Requires-Dist: Jinja2<4.0.0,>=3.1.5
|
|
30
27
|
Requires-Dist: jinja_partials<1.0.0,>=0.2.1
|
|
31
28
|
Requires-Dist: datacontract-specification<2.0.0,>=1.2.3
|
|
32
|
-
Requires-Dist: open-data-contract-standard<4.0.0,>=3.1.
|
|
29
|
+
Requires-Dist: open-data-contract-standard<4.0.0,>=3.1.2
|
|
33
30
|
Provides-Extra: avro
|
|
34
31
|
Requires-Dist: avro==1.12.1; extra == "avro"
|
|
35
32
|
Provides-Extra: bigquery
|
|
@@ -42,19 +39,21 @@ Provides-Extra: databricks
|
|
|
42
39
|
Requires-Dist: soda-core-spark-df<3.6.0,>=3.3.20; extra == "databricks"
|
|
43
40
|
Requires-Dist: soda-core-spark[databricks]<3.6.0,>=3.3.20; extra == "databricks"
|
|
44
41
|
Requires-Dist: databricks-sql-connector<4.3.0,>=3.7.0; extra == "databricks"
|
|
45
|
-
Requires-Dist: databricks-sdk<0.
|
|
46
|
-
Requires-Dist: pyspark<
|
|
42
|
+
Requires-Dist: databricks-sdk<0.78.0; extra == "databricks"
|
|
43
|
+
Requires-Dist: pyspark<5.0.0,>=3.5.0; extra == "databricks"
|
|
44
|
+
Requires-Dist: numpy<2.0.0,>=1.26.4; extra == "databricks"
|
|
47
45
|
Provides-Extra: iceberg
|
|
48
46
|
Requires-Dist: pyiceberg==0.10.0; extra == "iceberg"
|
|
49
47
|
Provides-Extra: kafka
|
|
50
48
|
Requires-Dist: datacontract-cli[avro]; extra == "kafka"
|
|
51
49
|
Requires-Dist: soda-core-spark-df<3.6.0,>=3.3.20; extra == "kafka"
|
|
52
|
-
Requires-Dist: pyspark<
|
|
50
|
+
Requires-Dist: pyspark<5.0.0,>=3.5.0; extra == "kafka"
|
|
51
|
+
Requires-Dist: numpy<2.0.0,>=1.26.4; extra == "kafka"
|
|
53
52
|
Provides-Extra: postgres
|
|
54
53
|
Requires-Dist: soda-core-postgres<3.6.0,>=3.3.20; extra == "postgres"
|
|
55
54
|
Provides-Extra: s3
|
|
56
55
|
Requires-Dist: s3fs<2026.0.0,>=2025.2.0; extra == "s3"
|
|
57
|
-
Requires-Dist: aiobotocore<2.
|
|
56
|
+
Requires-Dist: aiobotocore<3.2.0,>=2.17.0; extra == "s3"
|
|
58
57
|
Provides-Extra: snowflake
|
|
59
58
|
Requires-Dist: snowflake-connector-python[pandas]<4.2,>=3.6; extra == "snowflake"
|
|
60
59
|
Requires-Dist: soda-core-snowflake<3.6.0,>=3.3.20; extra == "snowflake"
|
|
@@ -70,17 +69,20 @@ Provides-Extra: dbt
|
|
|
70
69
|
Requires-Dist: dbt-core>=1.8.0; extra == "dbt"
|
|
71
70
|
Provides-Extra: dbml
|
|
72
71
|
Requires-Dist: pydbml>=1.1.1; extra == "dbml"
|
|
72
|
+
Provides-Extra: duckdb
|
|
73
|
+
Requires-Dist: duckdb<1.5.0,>=1.0.0; extra == "duckdb"
|
|
74
|
+
Requires-Dist: soda-core-duckdb<3.6.0,>=3.3.20; extra == "duckdb"
|
|
73
75
|
Provides-Extra: parquet
|
|
74
76
|
Requires-Dist: pyarrow>=18.1.0; extra == "parquet"
|
|
75
77
|
Provides-Extra: rdf
|
|
76
|
-
Requires-Dist: rdflib==7.
|
|
78
|
+
Requires-Dist: rdflib==7.5.0; extra == "rdf"
|
|
77
79
|
Provides-Extra: api
|
|
78
80
|
Requires-Dist: fastapi==0.121.2; extra == "api"
|
|
79
|
-
Requires-Dist: uvicorn==0.
|
|
81
|
+
Requires-Dist: uvicorn==0.40.0; extra == "api"
|
|
80
82
|
Provides-Extra: protobuf
|
|
81
83
|
Requires-Dist: grpcio-tools>=1.53; extra == "protobuf"
|
|
82
84
|
Provides-Extra: all
|
|
83
|
-
Requires-Dist: datacontract-cli[api,athena,bigquery,csv,databricks,dbml,dbt,excel,iceberg,kafka,oracle,parquet,postgres,protobuf,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
|
|
85
|
+
Requires-Dist: datacontract-cli[api,athena,bigquery,csv,databricks,dbml,dbt,duckdb,excel,iceberg,kafka,oracle,parquet,postgres,protobuf,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
|
|
84
86
|
Provides-Extra: dev
|
|
85
87
|
Requires-Dist: datacontract-cli[all]; extra == "dev"
|
|
86
88
|
Requires-Dist: httpx==0.28.1; extra == "dev"
|
|
@@ -335,6 +337,7 @@ A list of available extras:
|
|
|
335
337
|
| Avro Support | `pip install datacontract-cli[avro]` |
|
|
336
338
|
| Google BigQuery | `pip install datacontract-cli[bigquery]` |
|
|
337
339
|
| Databricks Integration | `pip install datacontract-cli[databricks]` |
|
|
340
|
+
| DuckDB (local/S3/GCS/Azure file testing) | `pip install datacontract-cli[duckdb]` |
|
|
338
341
|
| Iceberg | `pip install datacontract-cli[iceberg]` |
|
|
339
342
|
| Kafka Integration | `pip install datacontract-cli[kafka]` |
|
|
340
343
|
| PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
|
|
@@ -657,8 +660,7 @@ datacontract.yaml
|
|
|
657
660
|
servers:
|
|
658
661
|
production:
|
|
659
662
|
type: azure
|
|
660
|
-
|
|
661
|
-
location: abfss://dataproducts/inventory_events/*.parquet
|
|
663
|
+
location: abfss://datameshdatabricksdemo.dfs.core.windows.net/inventory_events/*.parquet
|
|
662
664
|
format: parquet
|
|
663
665
|
```
|
|
664
666
|
|
|
@@ -236,6 +236,7 @@ A list of available extras:
|
|
|
236
236
|
| Avro Support | `pip install datacontract-cli[avro]` |
|
|
237
237
|
| Google BigQuery | `pip install datacontract-cli[bigquery]` |
|
|
238
238
|
| Databricks Integration | `pip install datacontract-cli[databricks]` |
|
|
239
|
+
| DuckDB (local/S3/GCS/Azure file testing) | `pip install datacontract-cli[duckdb]` |
|
|
239
240
|
| Iceberg | `pip install datacontract-cli[iceberg]` |
|
|
240
241
|
| Kafka Integration | `pip install datacontract-cli[kafka]` |
|
|
241
242
|
| PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
|
|
@@ -558,8 +559,7 @@ datacontract.yaml
|
|
|
558
559
|
servers:
|
|
559
560
|
production:
|
|
560
561
|
type: azure
|
|
561
|
-
|
|
562
|
-
location: abfss://dataproducts/inventory_events/*.parquet
|
|
562
|
+
location: abfss://datameshdatabricksdemo.dfs.core.windows.net/inventory_events/*.parquet
|
|
563
563
|
format: parquet
|
|
564
564
|
```
|
|
565
565
|
|
|
@@ -11,50 +11,192 @@ from fastapi.security.api_key import APIKeyHeader
|
|
|
11
11
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
12
12
|
from datacontract.model.run import Run
|
|
13
13
|
|
|
14
|
-
DATA_CONTRACT_EXAMPLE_PAYLOAD = """
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
14
|
+
DATA_CONTRACT_EXAMPLE_PAYLOAD = """apiVersion: v3.1.0
|
|
15
|
+
kind: DataContract
|
|
16
|
+
id: orders
|
|
17
|
+
name: Orders
|
|
18
|
+
version: 1.0.0
|
|
19
|
+
status: active
|
|
20
|
+
description:
|
|
21
|
+
purpose: "Provides order and line item data for analytics and reporting"
|
|
22
|
+
usage: "Used by analytics team for sales analysis and business intelligence"
|
|
23
|
+
limitations: "Contains only the last 2 years of data"
|
|
24
|
+
customProperties:
|
|
25
|
+
- property: "sensitivity"
|
|
26
|
+
value: "secret"
|
|
27
|
+
description: "Data contains personally identifiable information"
|
|
28
|
+
authoritativeDefinitions:
|
|
29
|
+
- url: "https://entropy-data.com/policies/gdpr-compliance"
|
|
30
|
+
type: "businessDefinition"
|
|
31
|
+
description: "GDPR compliance policy for handling customer data"
|
|
32
|
+
schema:
|
|
33
|
+
- name: orders
|
|
34
|
+
physicalType: TABLE
|
|
35
|
+
description: All historic web shop orders since 2020-01-01. Includes successful and cancelled orders.
|
|
36
|
+
properties:
|
|
37
|
+
- name: order_id
|
|
38
|
+
logicalType: string
|
|
39
|
+
description: The internal order id for every orders. Do not show this to a customer.
|
|
40
|
+
businessName: Internal Order ID
|
|
41
|
+
physicalType: UUID
|
|
42
|
+
examples:
|
|
43
|
+
- 99e8bb10-3785-4634-9664-8dc79eb69d43
|
|
33
44
|
primaryKey: true
|
|
34
|
-
|
|
35
|
-
description: The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful.
|
|
36
|
-
type: timestamp
|
|
45
|
+
classification: internal
|
|
37
46
|
required: true
|
|
47
|
+
unique: true
|
|
48
|
+
- name: customer_id
|
|
49
|
+
logicalType: string
|
|
50
|
+
description: A reference to the customer number
|
|
51
|
+
businessName: Customer Number
|
|
52
|
+
physicalType: TEXT
|
|
38
53
|
examples:
|
|
39
|
-
-
|
|
40
|
-
order_total:
|
|
41
|
-
description: Total amount the smallest monetary unit (e.g., cents).
|
|
42
|
-
type: long
|
|
54
|
+
- c123456789
|
|
43
55
|
required: true
|
|
56
|
+
unique: false
|
|
57
|
+
logicalTypeOptions:
|
|
58
|
+
minLength: 10
|
|
59
|
+
maxLength: 10
|
|
60
|
+
authoritativeDefinitions:
|
|
61
|
+
- type: definition
|
|
62
|
+
url: https://example.com/definitions/sales/customer/customer_id
|
|
63
|
+
tags:
|
|
64
|
+
- pii:true
|
|
65
|
+
classification: internal
|
|
66
|
+
criticalDataElement: true
|
|
67
|
+
- name: order_total
|
|
68
|
+
logicalType: integer
|
|
69
|
+
description: The order total amount in cents, including tax, after discounts.
|
|
70
|
+
Includes shipping costs.
|
|
71
|
+
physicalType: INTEGER
|
|
44
72
|
examples:
|
|
45
|
-
- 9999
|
|
73
|
+
- "9999"
|
|
74
|
+
quality:
|
|
75
|
+
- type: text
|
|
76
|
+
description: The order_total equals the sum of all related line items.
|
|
77
|
+
required: true
|
|
78
|
+
businessName: Order Amount
|
|
79
|
+
- name: order_timestamp
|
|
80
|
+
logicalType: timestamp
|
|
81
|
+
description: The time including timezone when the order payment was successfully
|
|
82
|
+
confirmed.
|
|
83
|
+
physicalType: TIMESTAMPTZ
|
|
84
|
+
businessName: Order Date
|
|
85
|
+
examples:
|
|
86
|
+
- "2025-03-01 14:30:00+01"
|
|
87
|
+
- name: order_status
|
|
88
|
+
businessName: Status
|
|
89
|
+
description: The business status of the order
|
|
90
|
+
logicalType: string
|
|
91
|
+
physicalType: TEXT
|
|
92
|
+
examples:
|
|
93
|
+
- shipped
|
|
46
94
|
quality:
|
|
47
|
-
- type:
|
|
48
|
-
description:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
95
|
+
- type: library
|
|
96
|
+
description: Ensure that there are no other status values.
|
|
97
|
+
metric: invalidValues
|
|
98
|
+
arguments:
|
|
99
|
+
validValues:
|
|
100
|
+
- pending
|
|
101
|
+
- paid
|
|
102
|
+
- processing
|
|
103
|
+
- shipped
|
|
104
|
+
- delivered
|
|
105
|
+
- cancelled
|
|
106
|
+
- refunded
|
|
107
|
+
mustBe: 0
|
|
108
|
+
quality:
|
|
109
|
+
- type: library
|
|
110
|
+
metric: rowCount
|
|
111
|
+
mustBeGreaterThan: 100000
|
|
112
|
+
description: If there are less than 100k rows, something is wrong.
|
|
113
|
+
- name: line_items
|
|
114
|
+
physicalType: table
|
|
115
|
+
description: Details for each item in an order
|
|
116
|
+
properties:
|
|
117
|
+
- name: line_item_id
|
|
118
|
+
logicalType: string
|
|
119
|
+
description: Unique identifier for the line item
|
|
120
|
+
physicalType: UUID
|
|
121
|
+
examples:
|
|
122
|
+
- 12c9ba21-0c44-4e29-ba72-b8fd01c1be30
|
|
123
|
+
logicalTypeOptions:
|
|
124
|
+
format: uuid
|
|
125
|
+
required: true
|
|
126
|
+
primaryKey: true
|
|
127
|
+
- name: sku
|
|
128
|
+
logicalType: string
|
|
129
|
+
businessName: Stock Keeping Unit
|
|
130
|
+
description: Identifier for the purchased product
|
|
131
|
+
physicalType: TEXT
|
|
132
|
+
examples:
|
|
133
|
+
- 111222333
|
|
134
|
+
required: true
|
|
135
|
+
- name: price
|
|
136
|
+
logicalType: integer
|
|
137
|
+
description: Price in cents for this line item including tax
|
|
138
|
+
physicalType: INTEGER
|
|
139
|
+
examples:
|
|
140
|
+
- 9999
|
|
141
|
+
required: true
|
|
142
|
+
- name: order_id
|
|
143
|
+
required: false
|
|
144
|
+
primaryKey: false
|
|
145
|
+
logicalType: string
|
|
146
|
+
physicalType: UUID
|
|
147
|
+
relationships:
|
|
148
|
+
- type: foreignKey
|
|
149
|
+
to: orders.order_id
|
|
150
|
+
servers:
|
|
151
|
+
- server: production
|
|
152
|
+
environment: prod
|
|
153
|
+
type: postgres
|
|
154
|
+
host: aws-1-eu-central-2.pooler.supabase.com
|
|
155
|
+
port: 6543
|
|
156
|
+
database: postgres
|
|
157
|
+
schema: dp_orders_v1
|
|
158
|
+
team:
|
|
159
|
+
name: sales
|
|
160
|
+
description: This data product is owned by the "Sales" team
|
|
161
|
+
members:
|
|
162
|
+
- username: john@example.com
|
|
163
|
+
name: John Doe
|
|
164
|
+
role: Owner
|
|
165
|
+
authoritativeDefinitions:
|
|
166
|
+
- type: slack
|
|
167
|
+
url: https://slack.example.com/teams/sales
|
|
168
|
+
roles:
|
|
169
|
+
- role: analyst_us
|
|
170
|
+
description: Read access for analytics to US orders
|
|
171
|
+
- role: analyst_eu
|
|
172
|
+
description: Read access for analytics to EU orders
|
|
173
|
+
slaProperties:
|
|
174
|
+
- property: availability
|
|
175
|
+
value: 99.9%
|
|
176
|
+
description: Data platform uptime guarantee
|
|
177
|
+
- property: retention
|
|
178
|
+
value: "1"
|
|
179
|
+
unit: year
|
|
180
|
+
description: Data will be deleted after 1 year
|
|
181
|
+
- property: freshness
|
|
182
|
+
value: "24"
|
|
183
|
+
unit: hours
|
|
184
|
+
description: Within 24 hours of order placement
|
|
185
|
+
- property: support
|
|
186
|
+
value: business hours
|
|
187
|
+
description: Support only during business hours
|
|
188
|
+
price:
|
|
189
|
+
priceAmount: 0
|
|
190
|
+
priceCurrency: USD
|
|
191
|
+
priceUnit: monthly
|
|
192
|
+
tags:
|
|
193
|
+
- e-commerce
|
|
194
|
+
- transactions
|
|
195
|
+
- pii
|
|
196
|
+
customProperties:
|
|
197
|
+
- property: dataPlatformRole
|
|
198
|
+
value: role_orders_v1
|
|
199
|
+
contractCreatedTs: "2025-01-15T10:00:00Z"
|
|
58
200
|
"""
|
|
59
201
|
|
|
60
202
|
app = FastAPI(
|
|
@@ -14,7 +14,7 @@ from datacontract.export.html_exporter import get_version
|
|
|
14
14
|
|
|
15
15
|
def _get_owner(odcs: OpenDataContractStandard) -> Optional[str]:
|
|
16
16
|
"""Get the owner from ODCS customProperties or team."""
|
|
17
|
-
if odcs.team and odcs.team.name:
|
|
17
|
+
if odcs.team and hasattr(odcs.team, "name") and odcs.team.name:
|
|
18
18
|
return odcs.team.name
|
|
19
19
|
if odcs.customProperties:
|
|
20
20
|
for prop in odcs.customProperties:
|
|
@@ -67,7 +67,7 @@ def common(
|
|
|
67
67
|
pass
|
|
68
68
|
|
|
69
69
|
|
|
70
|
-
@app.command()
|
|
70
|
+
@app.command(name="init")
|
|
71
71
|
def init(
|
|
72
72
|
location: Annotated[
|
|
73
73
|
str, typer.Argument(help="The location of the data contract file to create.")
|
|
@@ -90,7 +90,7 @@ def init(
|
|
|
90
90
|
console.print("📄 data contract written to " + location)
|
|
91
91
|
|
|
92
92
|
|
|
93
|
-
@app.command()
|
|
93
|
+
@app.command(name="lint")
|
|
94
94
|
def lint(
|
|
95
95
|
location: Annotated[
|
|
96
96
|
str,
|
|
@@ -125,7 +125,7 @@ def enable_debug_logging(debug: bool):
|
|
|
125
125
|
)
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
@app.command()
|
|
128
|
+
@app.command(name="test")
|
|
129
129
|
def test(
|
|
130
130
|
location: Annotated[
|
|
131
131
|
str,
|
|
@@ -187,7 +187,7 @@ def test(
|
|
|
187
187
|
write_test_result(run, console, output_format, output, data_contract)
|
|
188
188
|
|
|
189
189
|
|
|
190
|
-
@app.command()
|
|
190
|
+
@app.command(name="export")
|
|
191
191
|
def export(
|
|
192
192
|
format: Annotated[ExportFormat, typer.Option(help="The export format.")],
|
|
193
193
|
output: Annotated[
|
|
@@ -467,7 +467,7 @@ def _get_uvicorn_arguments(port: int, host: str, context: typer.Context) -> dict
|
|
|
467
467
|
return default_args | dict(zip(trimmed_keys, context.args[1::2]))
|
|
468
468
|
|
|
469
469
|
|
|
470
|
-
@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
|
|
470
|
+
@app.command(name="api", context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
|
|
471
471
|
def api(
|
|
472
472
|
ctx: Annotated[typer.Context, typer.Option(help="Extra arguments to pass to uvicorn.run().")],
|
|
473
473
|
port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
|
|
@@ -4,10 +4,9 @@ import typing
|
|
|
4
4
|
from open_data_contract_standard.model import OpenDataContractStandard, Team
|
|
5
5
|
|
|
6
6
|
if typing.TYPE_CHECKING:
|
|
7
|
+
from duckdb.duckdb import DuckDBPyConnection
|
|
7
8
|
from pyspark.sql import SparkSession
|
|
8
9
|
|
|
9
|
-
from duckdb.duckdb import DuckDBPyConnection
|
|
10
|
-
|
|
11
10
|
from datacontract.engines.data_contract_test import execute_data_contract_test
|
|
12
11
|
from datacontract.export.exporter import ExportFormat
|
|
13
12
|
from datacontract.export.exporter_factory import exporter_factory
|
|
@@ -29,7 +28,7 @@ class DataContract:
|
|
|
29
28
|
server: str = None,
|
|
30
29
|
publish_url: str = None,
|
|
31
30
|
spark: "SparkSession" = None,
|
|
32
|
-
duckdb_connection: DuckDBPyConnection = None,
|
|
31
|
+
duckdb_connection: "DuckDBPyConnection" = None,
|
|
33
32
|
inline_definitions: bool = True,
|
|
34
33
|
ssl_verification: bool = True,
|
|
35
34
|
publish_test_results: bool = False,
|
{datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/data_contract_checks.py
RENAMED
|
@@ -161,7 +161,7 @@ def to_schema_name(schema_object: SchemaObject, server_type: str) -> str:
|
|
|
161
161
|
# Use physicalName if set (ODCS standard way to specify actual table name)
|
|
162
162
|
if schema_object.physicalName:
|
|
163
163
|
return schema_object.physicalName
|
|
164
|
-
|
|
164
|
+
|
|
165
165
|
return schema_object.name
|
|
166
166
|
|
|
167
167
|
|
{datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/data_contract_test.py
RENAMED
|
@@ -4,12 +4,12 @@ import tempfile
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
import requests
|
|
7
|
-
from duckdb.duckdb import DuckDBPyConnection
|
|
8
7
|
from open_data_contract_standard.model import OpenDataContractStandard, Server
|
|
9
8
|
|
|
10
9
|
from datacontract.engines.data_contract_checks import create_checks
|
|
11
10
|
|
|
12
11
|
if typing.TYPE_CHECKING:
|
|
12
|
+
from duckdb.duckdb import DuckDBPyConnection
|
|
13
13
|
from pyspark.sql import SparkSession
|
|
14
14
|
|
|
15
15
|
from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
|
|
@@ -26,7 +26,7 @@ def execute_data_contract_test(
|
|
|
26
26
|
run: Run,
|
|
27
27
|
server_name: str = None,
|
|
28
28
|
spark: "SparkSession" = None,
|
|
29
|
-
duckdb_connection: DuckDBPyConnection = None,
|
|
29
|
+
duckdb_connection: "DuckDBPyConnection" = None,
|
|
30
30
|
):
|
|
31
31
|
if data_contract.schema_ is None or len(data_contract.schema_) == 0:
|
|
32
32
|
raise DataContractException(
|
{datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/check_soda_execute.py
RENAMED
|
@@ -6,9 +6,9 @@ from datacontract.engines.soda.connections.athena import to_athena_soda_configur
|
|
|
6
6
|
from datacontract.engines.soda.connections.oracle import initialize_client_and_create_soda_configuration
|
|
7
7
|
|
|
8
8
|
if typing.TYPE_CHECKING:
|
|
9
|
+
from duckdb.duckdb import DuckDBPyConnection
|
|
9
10
|
from pyspark.sql import SparkSession
|
|
10
11
|
|
|
11
|
-
from duckdb.duckdb import DuckDBPyConnection
|
|
12
12
|
from open_data_contract_standard.model import OpenDataContractStandard, Server
|
|
13
13
|
|
|
14
14
|
from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
|
|
@@ -29,7 +29,7 @@ def check_soda_execute(
|
|
|
29
29
|
data_contract: OpenDataContractStandard,
|
|
30
30
|
server: Server,
|
|
31
31
|
spark: "SparkSession" = None,
|
|
32
|
-
duckdb_connection: DuckDBPyConnection = None,
|
|
32
|
+
duckdb_connection: "DuckDBPyConnection" = None,
|
|
33
33
|
):
|
|
34
34
|
from soda.common.config_helper import ConfigHelper
|
|
35
35
|
|
|
@@ -1,19 +1,32 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
import re
|
|
3
|
+
from typing import TYPE_CHECKING, Any, List, Optional
|
|
3
4
|
|
|
4
|
-
import duckdb
|
|
5
5
|
from open_data_contract_standard.model import OpenDataContractStandard, SchemaObject, SchemaProperty, Server
|
|
6
6
|
|
|
7
7
|
from datacontract.export.duckdb_type_converter import convert_to_duckdb_csv_type, convert_to_duckdb_json_type
|
|
8
|
+
from datacontract.export.sql_type_converter import convert_to_duckdb
|
|
8
9
|
from datacontract.model.run import Run
|
|
9
10
|
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
import duckdb
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _import_duckdb():
|
|
16
|
+
try:
|
|
17
|
+
import duckdb
|
|
18
|
+
return duckdb
|
|
19
|
+
except ImportError:
|
|
20
|
+
raise ImportError("duckdb is required for this server type. Install with: pip install datacontract-cli[duckdb]")
|
|
21
|
+
|
|
10
22
|
|
|
11
23
|
def get_duckdb_connection(
|
|
12
24
|
data_contract: OpenDataContractStandard,
|
|
13
25
|
server: Server,
|
|
14
26
|
run: Run,
|
|
15
|
-
duckdb_connection: duckdb.DuckDBPyConnection | None = None,
|
|
16
|
-
) -> duckdb.DuckDBPyConnection:
|
|
27
|
+
duckdb_connection: "duckdb.DuckDBPyConnection | None" = None,
|
|
28
|
+
) -> "duckdb.DuckDBPyConnection":
|
|
29
|
+
duckdb = _import_duckdb()
|
|
17
30
|
if duckdb_connection is None:
|
|
18
31
|
con = duckdb.connect(database=":memory:")
|
|
19
32
|
else:
|
|
@@ -57,29 +70,46 @@ def get_duckdb_connection(
|
|
|
57
70
|
)
|
|
58
71
|
add_nested_views(con, model_name, schema_obj.properties)
|
|
59
72
|
elif server.format == "parquet":
|
|
60
|
-
con
|
|
61
|
-
CREATE VIEW "{model_name}" AS SELECT * FROM read_parquet('{model_path}', hive_partitioning=1);
|
|
62
|
-
""")
|
|
73
|
+
create_view_with_schema_union(con, schema_obj, model_path, "read_parquet", to_parquet_types)
|
|
63
74
|
elif server.format == "csv":
|
|
64
|
-
|
|
65
|
-
run.log_info("Using columns: " + str(columns))
|
|
66
|
-
if columns is None:
|
|
67
|
-
con.sql(
|
|
68
|
-
f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1);"""
|
|
69
|
-
)
|
|
70
|
-
else:
|
|
71
|
-
con.sql(
|
|
72
|
-
f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1, columns={columns});"""
|
|
73
|
-
)
|
|
75
|
+
create_view_with_schema_union(con, schema_obj, model_path, "read_csv", to_csv_types)
|
|
74
76
|
elif server.format == "delta":
|
|
75
77
|
con.sql("update extensions;") # Make sure we have the latest delta extension
|
|
76
78
|
con.sql(f"""CREATE VIEW "{model_name}" AS SELECT * FROM delta_scan('{model_path}');""")
|
|
77
|
-
table_info = con.sql(f"PRAGMA table_info('{model_name}');").
|
|
78
|
-
if table_info
|
|
79
|
-
run.log_info(f"DuckDB Table Info: {table_info
|
|
79
|
+
table_info = con.sql(f"PRAGMA table_info('{model_name}');").fetchall()
|
|
80
|
+
if table_info:
|
|
81
|
+
run.log_info(f"DuckDB Table Info: {table_info}")
|
|
80
82
|
return con
|
|
81
83
|
|
|
82
84
|
|
|
85
|
+
def create_view_with_schema_union(con, schema_obj: SchemaObject, model_path: str, read_function: str, type_converter):
|
|
86
|
+
"""Create a view by unioning empty schema table with data files using union_by_name"""
|
|
87
|
+
converted_types = type_converter(schema_obj)
|
|
88
|
+
model_name = schema_obj.name
|
|
89
|
+
if converted_types:
|
|
90
|
+
# Create empty table with contract schema
|
|
91
|
+
columns_def = [f'"{col_name}" {col_type}' for col_name, col_type in converted_types.items()]
|
|
92
|
+
create_empty_table = f"""CREATE TABLE "{model_name}" ({', '.join(columns_def)});"""
|
|
93
|
+
con.sql(create_empty_table)
|
|
94
|
+
|
|
95
|
+
# Read columns existing in both current data contract and data
|
|
96
|
+
intersecting_columns = con.sql(f"""SELECT column_name
|
|
97
|
+
FROM (DESCRIBE SELECT * FROM {read_function}('{model_path}', union_by_name=true, hive_partitioning=1))
|
|
98
|
+
INTERSECT SELECT column_name
|
|
99
|
+
FROM information_schema.columns
|
|
100
|
+
WHERE table_name = '{model_name}'""").fetchall()
|
|
101
|
+
selected_columns = ', '.join([column[0] for column in intersecting_columns])
|
|
102
|
+
|
|
103
|
+
# Insert data into table by name, but only columns existing in contract and data
|
|
104
|
+
insert_data_sql = f"""INSERT INTO {model_name} BY NAME
|
|
105
|
+
(SELECT {selected_columns} FROM {read_function}('{model_path}', union_by_name=true, hive_partitioning=1));"""
|
|
106
|
+
con.sql(insert_data_sql)
|
|
107
|
+
else:
|
|
108
|
+
# Fallback
|
|
109
|
+
con.sql(
|
|
110
|
+
f"""CREATE VIEW "{model_name}" AS SELECT * FROM {read_function}('{model_path}', union_by_name=true, hive_partitioning=1);"""
|
|
111
|
+
)
|
|
112
|
+
|
|
83
113
|
def to_csv_types(schema_obj: SchemaObject) -> dict[Any, str | None] | None:
|
|
84
114
|
if schema_obj is None:
|
|
85
115
|
return None
|
|
@@ -89,6 +119,15 @@ def to_csv_types(schema_obj: SchemaObject) -> dict[Any, str | None] | None:
|
|
|
89
119
|
columns[prop.name] = convert_to_duckdb_csv_type(prop)
|
|
90
120
|
return columns
|
|
91
121
|
|
|
122
|
+
def to_parquet_types(schema_obj: SchemaObject) -> dict[Any, str | None] | None:
|
|
123
|
+
"""Get proper SQL types for Parquet (preserves decimals, etc.)"""
|
|
124
|
+
if schema_obj is None:
|
|
125
|
+
return None
|
|
126
|
+
columns = {}
|
|
127
|
+
if schema_obj.properties:
|
|
128
|
+
for prop in schema_obj.properties:
|
|
129
|
+
columns[prop.name] = convert_to_duckdb(prop)
|
|
130
|
+
return columns
|
|
92
131
|
|
|
93
132
|
def to_json_types(schema_obj: SchemaObject) -> dict[Any, str | None] | None:
|
|
94
133
|
if schema_obj is None:
|
|
@@ -109,7 +148,7 @@ def _get_type(prop: SchemaProperty) -> Optional[str]:
|
|
|
109
148
|
return None
|
|
110
149
|
|
|
111
150
|
|
|
112
|
-
def add_nested_views(con: duckdb.DuckDBPyConnection, model_name: str, properties: List[SchemaProperty] | None):
|
|
151
|
+
def add_nested_views(con: "duckdb.DuckDBPyConnection", model_name: str, properties: List[SchemaProperty] | None):
|
|
113
152
|
model_name = model_name.strip('"')
|
|
114
153
|
if properties is None:
|
|
115
154
|
return
|
|
@@ -140,7 +179,7 @@ def add_nested_views(con: duckdb.DuckDBPyConnection, model_name: str, properties
|
|
|
140
179
|
add_nested_views(con, nested_model_name, prop.properties)
|
|
141
180
|
|
|
142
181
|
|
|
143
|
-
def setup_s3_connection(con, server):
|
|
182
|
+
def setup_s3_connection(con, server: Server):
|
|
144
183
|
s3_region = os.getenv("DATACONTRACT_S3_REGION")
|
|
145
184
|
s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
|
|
146
185
|
s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
|
|
@@ -184,7 +223,7 @@ def setup_s3_connection(con, server):
|
|
|
184
223
|
""")
|
|
185
224
|
|
|
186
225
|
|
|
187
|
-
def setup_gcs_connection(con, server):
|
|
226
|
+
def setup_gcs_connection(con, server: Server):
|
|
188
227
|
key_id = os.getenv("DATACONTRACT_GCS_KEY_ID")
|
|
189
228
|
secret = os.getenv("DATACONTRACT_GCS_SECRET")
|
|
190
229
|
|
|
@@ -202,11 +241,14 @@ def setup_gcs_connection(con, server):
|
|
|
202
241
|
""")
|
|
203
242
|
|
|
204
243
|
|
|
205
|
-
def setup_azure_connection(con, server):
|
|
244
|
+
def setup_azure_connection(con, server: Server):
|
|
206
245
|
tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
|
|
207
246
|
client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")
|
|
208
247
|
client_secret = os.getenv("DATACONTRACT_AZURE_CLIENT_SECRET")
|
|
209
|
-
storage_account =
|
|
248
|
+
storage_account = (
|
|
249
|
+
to_azure_storage_account(server.location) if server.type == "azure" and "://" in server.location
|
|
250
|
+
else None
|
|
251
|
+
)
|
|
210
252
|
|
|
211
253
|
if tenant_id is None:
|
|
212
254
|
raise ValueError("Error: Environment variable DATACONTRACT_AZURE_TENANT_ID is not set")
|
|
@@ -239,3 +281,25 @@ def setup_azure_connection(con, server):
|
|
|
239
281
|
CLIENT_SECRET '{client_secret}'
|
|
240
282
|
);
|
|
241
283
|
""")
|
|
284
|
+
|
|
285
|
+
def to_azure_storage_account(location: str) -> str | None:
|
|
286
|
+
"""
|
|
287
|
+
Converts a storage location string to extract the storage account name.
|
|
288
|
+
ODCS v3.0 has no explicit field for the storage account. It uses the location field, which is a URI.
|
|
289
|
+
This function parses a storage location string to identify and return the
|
|
290
|
+
storage account name. It handles two primary patterns:
|
|
291
|
+
1. Protocol://containerName@storageAccountName
|
|
292
|
+
2. Protocol://storageAccountName
|
|
293
|
+
:param location: The storage location string to parse, typically following
|
|
294
|
+
the format protocol://containerName@storageAccountName. or
|
|
295
|
+
protocol://storageAccountName.
|
|
296
|
+
:return: The extracted storage account name if found, otherwise None
|
|
297
|
+
"""
|
|
298
|
+
# to catch protocol://containerName@storageAccountName. pattern from location
|
|
299
|
+
match = re.search(r"(?<=@)([^.]*)", location, re.IGNORECASE)
|
|
300
|
+
if match:
|
|
301
|
+
return match.group()
|
|
302
|
+
else:
|
|
303
|
+
# to catch protocol://storageAccountName. pattern from location
|
|
304
|
+
match = re.search(r"(?<=//)(?!@)([^.]*)", location, re.IGNORECASE)
|
|
305
|
+
return match.group() if match else None
|