datacontract-cli 0.10.11__tar.gz → 0.10.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- {datacontract_cli-0.10.11/datacontract_cli.egg-info → datacontract_cli-0.10.12}/PKG-INFO +103 -28
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/README.md +80 -4
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/cli.py +19 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/data_contract.py +5 -10
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/fastjsonschema/check_jsonschema.py +11 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/fastjsonschema/s3/s3_read_files.py +2 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/check_soda_execute.py +2 -8
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/duckdb.py +23 -24
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/kafka.py +81 -23
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/avro_converter.py +12 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/dbml_converter.py +3 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/exporter.py +1 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/exporter_factory.py +6 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/spark_converter.py +4 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/sql_type_converter.py +64 -29
- datacontract_cli-0.10.12/datacontract/export/sqlalchemy_converter.py +169 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/avro_importer.py +1 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/bigquery_importer.py +2 -2
- datacontract_cli-0.10.12/datacontract/imports/dbml_importer.py +112 -0
- datacontract_cli-0.10.12/datacontract/imports/dbt_importer.py +93 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/glue_importer.py +62 -58
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/importer.py +2 -1
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/importer_factory.py +5 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/odcs_importer.py +1 -1
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/spark_importer.py +29 -10
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/sql_importer.py +1 -1
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/unity_importer.py +1 -1
- datacontract_cli-0.10.11/datacontract/integration/publish_datamesh_manager.py → datacontract_cli-0.10.12/datacontract/integration/datamesh_manager.py +33 -5
- datacontract_cli-0.10.11/datacontract/integration/publish_opentelemetry.py → datacontract_cli-0.10.12/datacontract/integration/opentelemetry.py +1 -1
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/model/data_contract_specification.py +6 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12/datacontract_cli.egg-info}/PKG-INFO +103 -28
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/SOURCES.txt +9 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/requires.txt +23 -23
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/pyproject.toml +36 -31
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_breaking.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_catalog.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_changelog.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_cli.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_avro.py +23 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_bigquery.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_complex_data_contract.py +3 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_custom_exporter.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_dbml.py +5 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_dbt_models.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_dbt_sources.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_dbt_staging_sql.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_go.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_great_expectations.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_html.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_jsonschema.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_odcs.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_protobuf.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_rdf.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_spark.py +38 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_sql.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_sql_query.py +1 -3
- datacontract_cli-0.10.12/tests/test_export_sqlalchemy.py +91 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_terraform.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_avro.py +3 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_bigquery.py +1 -3
- datacontract_cli-0.10.12/tests/test_import_dbml.py +68 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_dbt.py +4 -6
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_glue.py +55 -5
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_jsonschema.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_odcs.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_spark.py +82 -1
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_unity_file.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_integration_datameshmanager.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_integration_opentelemetry.py +2 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_lint.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_resolve.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_roundtrip_jsonschema.py +1 -2
- datacontract_cli-0.10.12/tests/test_spec_fields_field.py +17 -0
- datacontract_cli-0.10.12/tests/test_spec_ref.py +15 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_azure_parquet_remote.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_bigquery.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_databricks.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_csv.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_formats_valid.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_inline.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_json.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_missing.py +1 -3
- datacontract_cli-0.10.12/tests/test_test_gcs_json_remote.py +48 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_kafka.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_kafka_remote.py +1 -2
- datacontract_cli-0.10.12/tests/test_test_parquet.py +146 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_postgres.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_csv.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_delta.py +10 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_json_multiple_models.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_json_remote.py +1 -2
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_snowflake.py +1 -3
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_sqlserver.py +3 -4
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_trino.py +1 -1
- datacontract_cli-0.10.11/datacontract/imports/dbt_importer.py +0 -117
- datacontract_cli-0.10.11/datacontract/publish/publish.py +0 -32
- datacontract_cli-0.10.11/tests/test_test_parquet.py +0 -59
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/LICENSE +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/MANIFEST.in +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/__init__.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/breaking/breaking.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/breaking/breaking_rules.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/catalog/catalog.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/__init__.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/datacontract/check_that_datacontract_file_exists.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/__init__.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/bigquery.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/dask.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/databricks.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/postgres.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/snowflake.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/sqlserver.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/trino.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/__init__.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/avro_idl_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/bigquery_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/csv_type_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/dbt_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/go_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/great_expectations_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/html_export.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/jsonschema_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/odcs_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/protobuf_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/pydantic_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/rdf_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/sodacl_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/sql_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/terraform_converter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/jsonschema_importer.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/init/download_datacontract_file.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/files.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/lint.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/__init__.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/description_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/example_model_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/field_pattern_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/field_reference_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/notice_period_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/quality_schema_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/valid_constraints_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/resolve.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/schema.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/urls.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/model/breaking_change.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/model/exceptions.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/model/run.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/py.typed +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/datacontract.html +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/index.html +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/datacontract_information.html +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/datacontract_servicelevels.html +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/datacontract_terms.html +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/definition.html +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/example.html +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/model_field.html +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/server.html +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/style/output.css +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/web.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/dependency_links.txt +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/entry_points.txt +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/top_level.txt +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/setup.cfg +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_description_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_documentation_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_download_datacontract_file.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_example_model_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_avro_idl.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_pydantic.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_sodacl.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_field_constraint_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_field_pattern_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_field_reference_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_sql.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_notice_period_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_quality_schema_linter.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_dataframe.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_delta.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_local_json.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_json.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_json_complex.py +0 -0
- {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_web.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.12
|
|
4
4
|
Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
|
|
6
6
|
Project-URL: Homepage, https://cli.datacontract.com
|
|
@@ -11,36 +11,34 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: typer
|
|
14
|
+
Requires-Dist: typer<0.13,>=0.12
|
|
15
15
|
Requires-Dist: pydantic<2.9.0,>=2.8.2
|
|
16
16
|
Requires-Dist: pyyaml~=6.0.1
|
|
17
17
|
Requires-Dist: requests<2.33,>=2.31
|
|
18
|
-
Requires-Dist: fastapi==0.
|
|
18
|
+
Requires-Dist: fastapi==0.112.0
|
|
19
|
+
Requires-Dist: uvicorn==0.30.5
|
|
20
|
+
Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
|
|
19
21
|
Requires-Dist: fastparquet==2024.5.0
|
|
20
22
|
Requires-Dist: python-multipart==0.0.9
|
|
21
23
|
Requires-Dist: rich~=13.7.0
|
|
22
|
-
Requires-Dist: simple-ddl-parser==1.
|
|
24
|
+
Requires-Dist: simple-ddl-parser==1.6.0
|
|
25
|
+
Requires-Dist: duckdb==1.0.0
|
|
23
26
|
Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.1
|
|
24
27
|
Requires-Dist: setuptools>=60
|
|
25
|
-
Requires-Dist: duckdb==1.0.0
|
|
26
|
-
Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
|
|
27
28
|
Requires-Dist: python-dotenv~=1.0.0
|
|
28
29
|
Requires-Dist: rdflib==7.0.0
|
|
29
30
|
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
|
|
30
31
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
|
|
31
|
-
Requires-Dist: boto3<1.
|
|
32
|
-
Requires-Dist: botocore<1.34.137,>=1.34.41
|
|
32
|
+
Requires-Dist: boto3<1.35.6,>=1.34.41
|
|
33
33
|
Requires-Dist: jinja_partials>=0.2.1
|
|
34
34
|
Provides-Extra: avro
|
|
35
|
-
Requires-Dist: avro==1.
|
|
35
|
+
Requires-Dist: avro==1.12.0; extra == "avro"
|
|
36
36
|
Provides-Extra: bigquery
|
|
37
37
|
Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.1; extra == "bigquery"
|
|
38
38
|
Provides-Extra: databricks
|
|
39
39
|
Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "databricks"
|
|
40
40
|
Requires-Dist: databricks-sql-connector<3.4.0,>=3.1.2; extra == "databricks"
|
|
41
41
|
Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.1; extra == "databricks"
|
|
42
|
-
Provides-Extra: deltalake
|
|
43
|
-
Requires-Dist: deltalake<0.19,>=0.17; extra == "deltalake"
|
|
44
42
|
Provides-Extra: kafka
|
|
45
43
|
Requires-Dist: datacontract-cli[avro]; extra == "kafka"
|
|
46
44
|
Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
|
|
@@ -49,31 +47,32 @@ Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
|
|
|
49
47
|
Provides-Extra: s3
|
|
50
48
|
Requires-Dist: s3fs==2024.6.1; extra == "s3"
|
|
51
49
|
Provides-Extra: snowflake
|
|
52
|
-
Requires-Dist: snowflake-connector-python[pandas]<3.
|
|
50
|
+
Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
|
|
53
51
|
Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
|
|
54
52
|
Provides-Extra: sqlserver
|
|
55
53
|
Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.1; extra == "sqlserver"
|
|
56
54
|
Provides-Extra: trino
|
|
57
55
|
Requires-Dist: soda-core-trino<3.4.0,>=3.3.1; extra == "trino"
|
|
56
|
+
Provides-Extra: dbt
|
|
57
|
+
Requires-Dist: dbt-core>=1.8.0; extra == "dbt"
|
|
58
|
+
Provides-Extra: dbml
|
|
59
|
+
Requires-Dist: pydbml>=1.1.1; extra == "dbml"
|
|
58
60
|
Provides-Extra: all
|
|
59
|
-
Requires-Dist: datacontract-cli[bigquery,databricks,
|
|
61
|
+
Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
|
|
60
62
|
Provides-Extra: dev
|
|
61
63
|
Requires-Dist: datacontract-cli[all]; extra == "dev"
|
|
62
|
-
Requires-Dist: httpx==0.27.
|
|
63
|
-
Requires-Dist:
|
|
64
|
+
Requires-Dist: httpx==0.27.2; extra == "dev"
|
|
65
|
+
Requires-Dist: kafka-python; extra == "dev"
|
|
66
|
+
Requires-Dist: moto==5.0.13; extra == "dev"
|
|
67
|
+
Requires-Dist: pandas>=2.1.0; extra == "dev"
|
|
64
68
|
Requires-Dist: pre-commit<3.9.0,>=3.7.1; extra == "dev"
|
|
69
|
+
Requires-Dist: pyarrow>=12.0.0; extra == "dev"
|
|
65
70
|
Requires-Dist: pytest; extra == "dev"
|
|
66
71
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
67
|
-
Requires-Dist:
|
|
68
|
-
Requires-Dist:
|
|
69
|
-
Requires-Dist: kafka
|
|
72
|
+
Requires-Dist: pymssql==2.3.1; extra == "dev"
|
|
73
|
+
Requires-Dist: ruff; extra == "dev"
|
|
74
|
+
Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.8.1; extra == "dev"
|
|
70
75
|
Requires-Dist: trino==0.329.0; extra == "dev"
|
|
71
|
-
Requires-Dist: testcontainers==4.7.2; extra == "dev"
|
|
72
|
-
Requires-Dist: testcontainers[core]==4.7.2; extra == "dev"
|
|
73
|
-
Requires-Dist: testcontainers[minio]==4.7.2; extra == "dev"
|
|
74
|
-
Requires-Dist: testcontainers[postgres]==4.7.2; extra == "dev"
|
|
75
|
-
Requires-Dist: testcontainers[kafka]==4.7.2; extra == "dev"
|
|
76
|
-
Requires-Dist: testcontainers[mssql]==4.7.2; extra == "dev"
|
|
77
76
|
|
|
78
77
|
# Data Contract CLI
|
|
79
78
|
|
|
@@ -82,7 +81,7 @@ Requires-Dist: testcontainers[mssql]==4.7.2; extra == "dev"
|
|
|
82
81
|
<img alt="Test Workflow" src="https://img.shields.io/github/actions/workflow/status/datacontract/datacontract-cli/ci.yaml?branch=main"></a>
|
|
83
82
|
<a href="https://github.com/datacontract/datacontract-cli">
|
|
84
83
|
<img alt="Stars" src="https://img.shields.io/github/stars/datacontract/datacontract-cli" /></a>
|
|
85
|
-
<a href="https://datacontract.com/slack" rel="nofollow"><img src="https://
|
|
84
|
+
<a href="https://datacontract.com/slack" rel="nofollow"><img src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social" style="max-width: 100%;"></a>
|
|
86
85
|
</p>
|
|
87
86
|
|
|
88
87
|
The `datacontract` CLI is an open source command-line tool for working with [Data Contracts](https://datacontract.com/).
|
|
@@ -267,13 +266,13 @@ A list of available extras:
|
|
|
267
266
|
| Avro Support | `pip install datacontract-cli[avro]` |
|
|
268
267
|
| Google BigQuery | `pip install datacontract-cli[bigquery]` |
|
|
269
268
|
| Databricks Integration | `pip install datacontract-cli[databricks]` |
|
|
270
|
-
| Deltalake Integration | `pip install datacontract-cli[deltalake]` |
|
|
271
269
|
| Kafka Integration | `pip install datacontract-cli[kafka]` |
|
|
272
270
|
| PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
|
|
273
271
|
| S3 Integration | `pip install datacontract-cli[s3]` |
|
|
274
272
|
| Snowflake Integration | `pip install datacontract-cli[snowflake]` |
|
|
275
273
|
| Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
|
|
276
274
|
| Trino | `pip install datacontract-cli[trino]` |
|
|
275
|
+
| Dbt | `pip install datacontract-cli[dbt]` |
|
|
277
276
|
|
|
278
277
|
|
|
279
278
|
|
|
@@ -406,6 +405,12 @@ Feel free to create an [issue](https://github.com/datacontract/datacontract-cli/
|
|
|
406
405
|
|
|
407
406
|
Data Contract CLI can test data that is stored in S3 buckets or any S3-compliant endpoints in various formats.
|
|
408
407
|
|
|
408
|
+
- CSV
|
|
409
|
+
- JSON
|
|
410
|
+
- Delta
|
|
411
|
+
- Parquet
|
|
412
|
+
- Iceberg (coming soon)
|
|
413
|
+
|
|
409
414
|
#### Examples
|
|
410
415
|
|
|
411
416
|
##### JSON
|
|
@@ -444,6 +449,32 @@ servers:
|
|
|
444
449
|
|
|
445
450
|
|
|
446
451
|
|
|
452
|
+
### Google Cloud Storage (GCS)
|
|
453
|
+
|
|
454
|
+
The [S3](#S3) integration also works with files on Google Cloud Storage through its [interoperability](https://cloud.google.com/storage/docs/interoperability).
|
|
455
|
+
Use `https://storage.googleapis.com` as the endpoint URL.
|
|
456
|
+
|
|
457
|
+
#### Example
|
|
458
|
+
|
|
459
|
+
datacontract.yaml
|
|
460
|
+
```yaml
|
|
461
|
+
servers:
|
|
462
|
+
production:
|
|
463
|
+
type: s3
|
|
464
|
+
endpointUrl: https://storage.googleapis.com
|
|
465
|
+
location: s3://bucket-name/path/*/*.json # use s3:// schema instead of gs://
|
|
466
|
+
format: json
|
|
467
|
+
delimiter: new_line # new_line, array, or none
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
#### Environment Variables
|
|
471
|
+
|
|
472
|
+
| Environment Variable | Example | Description |
|
|
473
|
+
|-------------------------------------|----------------|------------------------------------------------------------------------------------------|
|
|
474
|
+
| `DATACONTRACT_S3_ACCESS_KEY_ID` | `GOOG1EZZZ...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Key ID |
|
|
475
|
+
| `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `PDWWpb...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Secret |
|
|
476
|
+
|
|
477
|
+
|
|
447
478
|
### BigQuery
|
|
448
479
|
|
|
449
480
|
We support authentication to BigQuery using Service Account Key. The used Service Account should include the roles:
|
|
@@ -794,7 +825,7 @@ models:
|
|
|
794
825
|
│ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
|
|
795
826
|
│ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
|
|
796
827
|
│ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
|
|
797
|
-
│ y|dbml|spark]
|
|
828
|
+
│ y|dbml|spark|sqlalchemy] │
|
|
798
829
|
│ --output PATH Specify the file path where the exported data will be │
|
|
799
830
|
│ saved. If no path is provided, the output will be │
|
|
800
831
|
│ printed to stdout. │
|
|
@@ -845,6 +876,7 @@ Available export options:
|
|
|
845
876
|
| `pydantic-model` | Export to pydantic models | ✅ |
|
|
846
877
|
| `DBML` | Export to a DBML Diagram description | ✅ |
|
|
847
878
|
| `spark` | Export to a Spark StructType | ✅ |
|
|
879
|
+
| `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
|
|
848
880
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
849
881
|
|
|
850
882
|
#### Great Expectations
|
|
@@ -918,6 +950,7 @@ models:
|
|
|
918
950
|
description: Example for AVRO with Timestamp (microsecond precision) https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29
|
|
919
951
|
type: long
|
|
920
952
|
example: 1672534861000000 # Equivalent to 2023-01-01 01:01:01 in microseconds
|
|
953
|
+
required: true
|
|
921
954
|
config:
|
|
922
955
|
avroLogicalType: local-timestamp-micros
|
|
923
956
|
avroDefault: 1672534861000000
|
|
@@ -932,6 +965,7 @@ models:
|
|
|
932
965
|
- **description**: A textual description of the field.
|
|
933
966
|
- **type**: The data type of the field. In this example, it is `long`.
|
|
934
967
|
- **example**: An example value for the field.
|
|
968
|
+
- **required**: Is this a required field (as opposed to optional/nullable).
|
|
935
969
|
- **config**: Section to specify custom Avro properties.
|
|
936
970
|
- **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
|
|
937
971
|
- **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
|
|
@@ -968,6 +1002,14 @@ models:
|
|
|
968
1002
|
│ names, leave empty for all models in the │
|
|
969
1003
|
│ dataset). │
|
|
970
1004
|
│ [default: None] │
|
|
1005
|
+
│ --dbml-schema TEXT List of schema names to import from the DBML │
|
|
1006
|
+
│ file (repeat for multiple schema names, │
|
|
1007
|
+
│ leave empty for all tables in the file). │
|
|
1008
|
+
│ [default: None] │
|
|
1009
|
+
│ --dbml-table TEXT List of table names to import from the DBML │
|
|
1010
|
+
│ file (repeat for multiple table names, leave │
|
|
1011
|
+
│ empty for all tables in the file). │
|
|
1012
|
+
│ [default: None] │
|
|
971
1013
|
│ --help Show this message and exit. │
|
|
972
1014
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
973
1015
|
```
|
|
@@ -991,6 +1033,7 @@ Available import options:
|
|
|
991
1033
|
| `dbt` | Import from dbt models | ✅ |
|
|
992
1034
|
| `odcs` | Import from Open Data Contract Standard (ODCS) | ✅ |
|
|
993
1035
|
| `spark` | Import from Spark StructTypes | ✅ |
|
|
1036
|
+
| `dbml` | Import from DBML models | ✅ |
|
|
994
1037
|
| `protobuf` | Import from Protobuf schemas | TBD |
|
|
995
1038
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
996
1039
|
|
|
@@ -1078,6 +1121,38 @@ Example:
|
|
|
1078
1121
|
datacontract import --format spark --source "users,orders"
|
|
1079
1122
|
```
|
|
1080
1123
|
|
|
1124
|
+
#### DBML
|
|
1125
|
+
|
|
1126
|
+
Importing from DBML Documents.
|
|
1127
|
+
**NOTE:** Since DBML does _not_ have strict requirements on the types of columns, this import _may_ create non-valid datacontracts, as not all types of fields can be properly mapped. In this case you will have to adapt the generated document manually.
|
|
1128
|
+
We also assume, that the description for models and fields is stored in a Note within the DBML model.
|
|
1129
|
+
|
|
1130
|
+
You may give the `dbml-table` or `dbml-schema` parameter to enumerate the tables or schemas that should be imported.
|
|
1131
|
+
If no tables are given, _all_ available tables of the source will be imported. Likewise, if no schema is given, _all_ schemas are imported.
|
|
1132
|
+
|
|
1133
|
+
Examples:
|
|
1134
|
+
|
|
1135
|
+
```bash
|
|
1136
|
+
# Example import from DBML file, importing everything
|
|
1137
|
+
datacontract import --format dbml --source <file_path>
|
|
1138
|
+
```
|
|
1139
|
+
|
|
1140
|
+
```bash
|
|
1141
|
+
# Example import from DBML file, filtering for tables from specific schemas
|
|
1142
|
+
datacontract import --format dbml --source <file_path> --dbml-schema <schema_1> --dbml-schema <schema_2>
|
|
1143
|
+
```
|
|
1144
|
+
|
|
1145
|
+
```bash
|
|
1146
|
+
# Example import from DBML file, filtering for tables with specific names
|
|
1147
|
+
datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-table <table_name_2>
|
|
1148
|
+
```
|
|
1149
|
+
|
|
1150
|
+
```bash
|
|
1151
|
+
# Example import from DBML file, filtering for tables with specific names from a specific schema
|
|
1152
|
+
datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-schema <schema_1>
|
|
1153
|
+
```
|
|
1154
|
+
|
|
1155
|
+
|
|
1081
1156
|
### breaking
|
|
1082
1157
|
|
|
1083
1158
|
```
|
|
@@ -1427,7 +1502,7 @@ if __name__ == "__main__":
|
|
|
1427
1502
|
"models": [
|
|
1428
1503
|
{
|
|
1429
1504
|
"name": "model1",
|
|
1430
|
-
"
|
|
1505
|
+
"description": "model description from app",
|
|
1431
1506
|
"columns": [
|
|
1432
1507
|
{
|
|
1433
1508
|
"name": "columnA",
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
<img alt="Test Workflow" src="https://img.shields.io/github/actions/workflow/status/datacontract/datacontract-cli/ci.yaml?branch=main"></a>
|
|
6
6
|
<a href="https://github.com/datacontract/datacontract-cli">
|
|
7
7
|
<img alt="Stars" src="https://img.shields.io/github/stars/datacontract/datacontract-cli" /></a>
|
|
8
|
-
<a href="https://datacontract.com/slack" rel="nofollow"><img src="https://
|
|
8
|
+
<a href="https://datacontract.com/slack" rel="nofollow"><img src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social" style="max-width: 100%;"></a>
|
|
9
9
|
</p>
|
|
10
10
|
|
|
11
11
|
The `datacontract` CLI is an open source command-line tool for working with [Data Contracts](https://datacontract.com/).
|
|
@@ -190,13 +190,13 @@ A list of available extras:
|
|
|
190
190
|
| Avro Support | `pip install datacontract-cli[avro]` |
|
|
191
191
|
| Google BigQuery | `pip install datacontract-cli[bigquery]` |
|
|
192
192
|
| Databricks Integration | `pip install datacontract-cli[databricks]` |
|
|
193
|
-
| Deltalake Integration | `pip install datacontract-cli[deltalake]` |
|
|
194
193
|
| Kafka Integration | `pip install datacontract-cli[kafka]` |
|
|
195
194
|
| PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
|
|
196
195
|
| S3 Integration | `pip install datacontract-cli[s3]` |
|
|
197
196
|
| Snowflake Integration | `pip install datacontract-cli[snowflake]` |
|
|
198
197
|
| Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
|
|
199
198
|
| Trino | `pip install datacontract-cli[trino]` |
|
|
199
|
+
| Dbt | `pip install datacontract-cli[dbt]` |
|
|
200
200
|
|
|
201
201
|
|
|
202
202
|
|
|
@@ -329,6 +329,12 @@ Feel free to create an [issue](https://github.com/datacontract/datacontract-cli/
|
|
|
329
329
|
|
|
330
330
|
Data Contract CLI can test data that is stored in S3 buckets or any S3-compliant endpoints in various formats.
|
|
331
331
|
|
|
332
|
+
- CSV
|
|
333
|
+
- JSON
|
|
334
|
+
- Delta
|
|
335
|
+
- Parquet
|
|
336
|
+
- Iceberg (coming soon)
|
|
337
|
+
|
|
332
338
|
#### Examples
|
|
333
339
|
|
|
334
340
|
##### JSON
|
|
@@ -367,6 +373,32 @@ servers:
|
|
|
367
373
|
|
|
368
374
|
|
|
369
375
|
|
|
376
|
+
### Google Cloud Storage (GCS)
|
|
377
|
+
|
|
378
|
+
The [S3](#S3) integration also works with files on Google Cloud Storage through its [interoperability](https://cloud.google.com/storage/docs/interoperability).
|
|
379
|
+
Use `https://storage.googleapis.com` as the endpoint URL.
|
|
380
|
+
|
|
381
|
+
#### Example
|
|
382
|
+
|
|
383
|
+
datacontract.yaml
|
|
384
|
+
```yaml
|
|
385
|
+
servers:
|
|
386
|
+
production:
|
|
387
|
+
type: s3
|
|
388
|
+
endpointUrl: https://storage.googleapis.com
|
|
389
|
+
location: s3://bucket-name/path/*/*.json # use s3:// schema instead of gs://
|
|
390
|
+
format: json
|
|
391
|
+
delimiter: new_line # new_line, array, or none
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
#### Environment Variables
|
|
395
|
+
|
|
396
|
+
| Environment Variable | Example | Description |
|
|
397
|
+
|-------------------------------------|----------------|------------------------------------------------------------------------------------------|
|
|
398
|
+
| `DATACONTRACT_S3_ACCESS_KEY_ID` | `GOOG1EZZZ...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Key ID |
|
|
399
|
+
| `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `PDWWpb...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Secret |
|
|
400
|
+
|
|
401
|
+
|
|
370
402
|
### BigQuery
|
|
371
403
|
|
|
372
404
|
We support authentication to BigQuery using Service Account Key. The used Service Account should include the roles:
|
|
@@ -717,7 +749,7 @@ models:
|
|
|
717
749
|
│ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
|
|
718
750
|
│ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
|
|
719
751
|
│ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
|
|
720
|
-
│ y|dbml|spark]
|
|
752
|
+
│ y|dbml|spark|sqlalchemy] │
|
|
721
753
|
│ --output PATH Specify the file path where the exported data will be │
|
|
722
754
|
│ saved. If no path is provided, the output will be │
|
|
723
755
|
│ printed to stdout. │
|
|
@@ -768,6 +800,7 @@ Available export options:
|
|
|
768
800
|
| `pydantic-model` | Export to pydantic models | ✅ |
|
|
769
801
|
| `DBML` | Export to a DBML Diagram description | ✅ |
|
|
770
802
|
| `spark` | Export to a Spark StructType | ✅ |
|
|
803
|
+
| `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
|
|
771
804
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
772
805
|
|
|
773
806
|
#### Great Expectations
|
|
@@ -841,6 +874,7 @@ models:
|
|
|
841
874
|
description: Example for AVRO with Timestamp (microsecond precision) https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29
|
|
842
875
|
type: long
|
|
843
876
|
example: 1672534861000000 # Equivalent to 2023-01-01 01:01:01 in microseconds
|
|
877
|
+
required: true
|
|
844
878
|
config:
|
|
845
879
|
avroLogicalType: local-timestamp-micros
|
|
846
880
|
avroDefault: 1672534861000000
|
|
@@ -855,6 +889,7 @@ models:
|
|
|
855
889
|
- **description**: A textual description of the field.
|
|
856
890
|
- **type**: The data type of the field. In this example, it is `long`.
|
|
857
891
|
- **example**: An example value for the field.
|
|
892
|
+
- **required**: Is this a required field (as opposed to optional/nullable).
|
|
858
893
|
- **config**: Section to specify custom Avro properties.
|
|
859
894
|
- **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
|
|
860
895
|
- **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
|
|
@@ -891,6 +926,14 @@ models:
|
|
|
891
926
|
│ names, leave empty for all models in the │
|
|
892
927
|
│ dataset). │
|
|
893
928
|
│ [default: None] │
|
|
929
|
+
│ --dbml-schema TEXT List of schema names to import from the DBML │
|
|
930
|
+
│ file (repeat for multiple schema names, │
|
|
931
|
+
│ leave empty for all tables in the file). │
|
|
932
|
+
│ [default: None] │
|
|
933
|
+
│ --dbml-table TEXT List of table names to import from the DBML │
|
|
934
|
+
│ file (repeat for multiple table names, leave │
|
|
935
|
+
│ empty for all tables in the file). │
|
|
936
|
+
│ [default: None] │
|
|
894
937
|
│ --help Show this message and exit. │
|
|
895
938
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
896
939
|
```
|
|
@@ -914,6 +957,7 @@ Available import options:
|
|
|
914
957
|
| `dbt` | Import from dbt models | ✅ |
|
|
915
958
|
| `odcs` | Import from Open Data Contract Standard (ODCS) | ✅ |
|
|
916
959
|
| `spark` | Import from Spark StructTypes | ✅ |
|
|
960
|
+
| `dbml` | Import from DBML models | ✅ |
|
|
917
961
|
| `protobuf` | Import from Protobuf schemas | TBD |
|
|
918
962
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
919
963
|
|
|
@@ -1001,6 +1045,38 @@ Example:
|
|
|
1001
1045
|
datacontract import --format spark --source "users,orders"
|
|
1002
1046
|
```
|
|
1003
1047
|
|
|
1048
|
+
#### DBML
|
|
1049
|
+
|
|
1050
|
+
Importing from DBML Documents.
|
|
1051
|
+
**NOTE:** Since DBML does _not_ have strict requirements on the types of columns, this import _may_ create non-valid datacontracts, as not all types of fields can be properly mapped. In this case you will have to adapt the generated document manually.
|
|
1052
|
+
We also assume, that the description for models and fields is stored in a Note within the DBML model.
|
|
1053
|
+
|
|
1054
|
+
You may give the `dbml-table` or `dbml-schema` parameter to enumerate the tables or schemas that should be imported.
|
|
1055
|
+
If no tables are given, _all_ available tables of the source will be imported. Likewise, if no schema is given, _all_ schemas are imported.
|
|
1056
|
+
|
|
1057
|
+
Examples:
|
|
1058
|
+
|
|
1059
|
+
```bash
|
|
1060
|
+
# Example import from DBML file, importing everything
|
|
1061
|
+
datacontract import --format dbml --source <file_path>
|
|
1062
|
+
```
|
|
1063
|
+
|
|
1064
|
+
```bash
|
|
1065
|
+
# Example import from DBML file, filtering for tables from specific schemas
|
|
1066
|
+
datacontract import --format dbml --source <file_path> --dbml-schema <schema_1> --dbml-schema <schema_2>
|
|
1067
|
+
```
|
|
1068
|
+
|
|
1069
|
+
```bash
|
|
1070
|
+
# Example import from DBML file, filtering for tables with specific names
|
|
1071
|
+
datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-table <table_name_2>
|
|
1072
|
+
```
|
|
1073
|
+
|
|
1074
|
+
```bash
|
|
1075
|
+
# Example import from DBML file, filtering for tables with specific names from a specific schema
|
|
1076
|
+
datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-schema <schema_1>
|
|
1077
|
+
```
|
|
1078
|
+
|
|
1079
|
+
|
|
1004
1080
|
### breaking
|
|
1005
1081
|
|
|
1006
1082
|
```
|
|
@@ -1350,7 +1426,7 @@ if __name__ == "__main__":
|
|
|
1350
1426
|
"models": [
|
|
1351
1427
|
{
|
|
1352
1428
|
"name": "model1",
|
|
1353
|
-
"
|
|
1429
|
+
"description": "model description from app",
|
|
1354
1430
|
"columns": [
|
|
1355
1431
|
{
|
|
1356
1432
|
"name": "columnA",
|
|
@@ -17,7 +17,7 @@ from datacontract.catalog.catalog import create_index_html, create_data_contract
|
|
|
17
17
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
18
18
|
from datacontract.imports.importer import ImportFormat
|
|
19
19
|
from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
|
|
20
|
-
from datacontract.
|
|
20
|
+
from datacontract.integration.datamesh_manager import publish_data_contract_to_datamesh_manager
|
|
21
21
|
|
|
22
22
|
DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
|
|
23
23
|
|
|
@@ -232,6 +232,18 @@ def import_(
|
|
|
232
232
|
help="List of models names to import from the dbt manifest file (repeat for multiple models names, leave empty for all models in the dataset)."
|
|
233
233
|
),
|
|
234
234
|
] = None,
|
|
235
|
+
dbml_schema: Annotated[
|
|
236
|
+
Optional[List[str]],
|
|
237
|
+
typer.Option(
|
|
238
|
+
help="List of schema names to import from the DBML file (repeat for multiple schema names, leave empty for all tables in the file)."
|
|
239
|
+
),
|
|
240
|
+
] = None,
|
|
241
|
+
dbml_table: Annotated[
|
|
242
|
+
Optional[List[str]],
|
|
243
|
+
typer.Option(
|
|
244
|
+
help="List of table names to import from the DBML file (repeat for multiple table names, leave empty for all tables in the file)."
|
|
245
|
+
),
|
|
246
|
+
] = None,
|
|
235
247
|
):
|
|
236
248
|
"""
|
|
237
249
|
Create a data contract from the given source location. Prints to stdout.
|
|
@@ -245,6 +257,8 @@ def import_(
|
|
|
245
257
|
bigquery_dataset=bigquery_dataset,
|
|
246
258
|
unity_table_full_name=unity_table_full_name,
|
|
247
259
|
dbt_model=dbt_model,
|
|
260
|
+
dbml_schema=dbml_schema,
|
|
261
|
+
dbml_table=dbml_table,
|
|
248
262
|
)
|
|
249
263
|
console.print(result.to_yaml())
|
|
250
264
|
|
|
@@ -261,8 +275,10 @@ def publish(
|
|
|
261
275
|
"""
|
|
262
276
|
Publish the data contract to the Data Mesh Manager.
|
|
263
277
|
"""
|
|
264
|
-
|
|
265
|
-
|
|
278
|
+
publish_data_contract_to_datamesh_manager(
|
|
279
|
+
data_contract_specification=DataContract(
|
|
280
|
+
data_contract_file=location, schema_location=schema
|
|
281
|
+
).get_data_contract_specification(),
|
|
266
282
|
)
|
|
267
283
|
|
|
268
284
|
|
|
@@ -18,8 +18,8 @@ from datacontract.export.exporter import ExportFormat
|
|
|
18
18
|
from datacontract.export.exporter_factory import exporter_factory
|
|
19
19
|
from datacontract.imports.importer_factory import importer_factory
|
|
20
20
|
|
|
21
|
-
from datacontract.integration.
|
|
22
|
-
from datacontract.integration.
|
|
21
|
+
from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
|
|
22
|
+
from datacontract.integration.opentelemetry import publish_test_results_to_opentelemetry
|
|
23
23
|
from datacontract.lint import resolve
|
|
24
24
|
from datacontract.lint.linters.description_linter import DescriptionLinter
|
|
25
25
|
from datacontract.lint.linters.example_model_linter import ExampleModelLinter
|
|
@@ -218,15 +218,10 @@ class DataContract:
|
|
|
218
218
|
run.finish()
|
|
219
219
|
|
|
220
220
|
if self._publish_url is not None:
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
except Exception:
|
|
224
|
-
run.log_error("Failed to publish to datamesh manager")
|
|
221
|
+
publish_test_results_to_datamesh_manager(run, self._publish_url)
|
|
222
|
+
|
|
225
223
|
if self._publish_to_opentelemetry:
|
|
226
|
-
|
|
227
|
-
publish_opentelemetry(run)
|
|
228
|
-
except Exception:
|
|
229
|
-
run.log_error("Failed to publish to opentelemetry")
|
|
224
|
+
publish_test_results_to_opentelemetry(run)
|
|
230
225
|
|
|
231
226
|
return run
|
|
232
227
|
|
|
@@ -158,6 +158,17 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
|
|
|
158
158
|
process_local_file(run, server, model_name, validate)
|
|
159
159
|
elif server.type == "s3":
|
|
160
160
|
process_s3_file(server, model_name, validate)
|
|
161
|
+
elif server.type == "gcs":
|
|
162
|
+
run.checks.append(
|
|
163
|
+
Check(
|
|
164
|
+
type="schema",
|
|
165
|
+
name="Check that JSON has valid schema",
|
|
166
|
+
model=model_name,
|
|
167
|
+
result="info",
|
|
168
|
+
reason="JSON Schema check skipped for GCS, as GCS is currently not supported",
|
|
169
|
+
engine="jsonschema",
|
|
170
|
+
)
|
|
171
|
+
)
|
|
161
172
|
else:
|
|
162
173
|
run.checks.append(
|
|
163
174
|
Check(
|
|
@@ -28,9 +28,11 @@ def s3_fs(s3_endpoint_url):
|
|
|
28
28
|
|
|
29
29
|
aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
|
|
30
30
|
aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
|
|
31
|
+
aws_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
|
|
31
32
|
return s3fs.S3FileSystem(
|
|
32
33
|
key=aws_access_key_id,
|
|
33
34
|
secret=aws_secret_access_key,
|
|
35
|
+
token=aws_session_token,
|
|
34
36
|
anon=aws_access_key_id is None,
|
|
35
37
|
client_kwargs={"endpoint_url": s3_endpoint_url},
|
|
36
38
|
)
|
|
@@ -1,8 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import typing
|
|
3
|
-
|
|
4
|
-
if typing.TYPE_CHECKING:
|
|
5
|
-
from pyspark.sql import SparkSession
|
|
6
2
|
|
|
7
3
|
from soda.scan import Scan
|
|
8
4
|
|
|
@@ -19,9 +15,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
|
|
|
19
15
|
from datacontract.model.run import Run, Check, Log
|
|
20
16
|
|
|
21
17
|
|
|
22
|
-
def check_soda_execute(
|
|
23
|
-
run: Run, data_contract: DataContractSpecification, server: Server, spark: "SparkSession", tmp_dir
|
|
24
|
-
):
|
|
18
|
+
def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
|
|
25
19
|
if data_contract is None:
|
|
26
20
|
run.log_warn("Cannot run engine soda-core, as data contract is invalid")
|
|
27
21
|
return
|
|
@@ -29,7 +23,7 @@ def check_soda_execute(
|
|
|
29
23
|
run.log_info("Running engine soda-core")
|
|
30
24
|
scan = Scan()
|
|
31
25
|
|
|
32
|
-
if server.type in ["s3", "azure", "local"]:
|
|
26
|
+
if server.type in ["s3", "gcs", "azure", "local"]:
|
|
33
27
|
if server.format in ["json", "parquet", "csv", "delta"]:
|
|
34
28
|
con = get_duckdb_connection(data_contract, server, run)
|
|
35
29
|
scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
from deltalake import DeltaTable
|
|
4
|
-
|
|
5
3
|
import duckdb
|
|
6
4
|
from datacontract.export.csv_type_converter import convert_to_duckdb_csv_type
|
|
7
5
|
from datacontract.model.run import Run
|
|
@@ -15,6 +13,9 @@ def get_duckdb_connection(data_contract, server, run: Run):
|
|
|
15
13
|
if server.type == "s3":
|
|
16
14
|
path = server.location
|
|
17
15
|
setup_s3_connection(con, server)
|
|
16
|
+
if server.type == "gcs":
|
|
17
|
+
path = server.location
|
|
18
|
+
setup_gcs_connection(con, server)
|
|
18
19
|
if server.type == "azure":
|
|
19
20
|
path = server.location
|
|
20
21
|
setup_azure_connection(con, server)
|
|
@@ -49,28 +50,8 @@ def get_duckdb_connection(data_contract, server, run: Run):
|
|
|
49
50
|
f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1, columns={columns});"""
|
|
50
51
|
)
|
|
51
52
|
elif server.format == "delta":
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
con.register(model_name, delta_table_arrow)
|
|
55
|
-
|
|
56
|
-
if server.type == "azure":
|
|
57
|
-
# After switching to native delta table support
|
|
58
|
-
# in https://github.com/datacontract/datacontract-cli/issues/258,
|
|
59
|
-
# azure storage should also work
|
|
60
|
-
# https://github.com/duckdb/duckdb_delta/issues/21
|
|
61
|
-
raise NotImplementedError("Support for Delta Tables on Azure Storage is not implemented yet")
|
|
62
|
-
if server.type == "s3":
|
|
63
|
-
storage_options = {
|
|
64
|
-
"AWS_ENDPOINT_URL": server.endpointUrl,
|
|
65
|
-
"AWS_ACCESS_KEY_ID": os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID"),
|
|
66
|
-
"AWS_SECRET_ACCESS_KEY": os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY"),
|
|
67
|
-
"AWS_REGION": os.getenv("DATACONTRACT_S3_REGION", "us-east-1"),
|
|
68
|
-
"AWS_ALLOW_HTTP": "True" if server.endpointUrl.startswith("http://") else "False",
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
delta_table_arrow = DeltaTable(model_path, storage_options=storage_options).to_pyarrow_dataset()
|
|
72
|
-
|
|
73
|
-
con.register(model_name, delta_table_arrow)
|
|
53
|
+
con.sql("update extensions;") # Make sure we have the latest delta extension
|
|
54
|
+
con.sql(f"""CREATE VIEW "{model_name}" AS SELECT * FROM delta_scan('{model_path}');""")
|
|
74
55
|
return con
|
|
75
56
|
|
|
76
57
|
|
|
@@ -142,6 +123,24 @@ def setup_s3_connection(con, server):
|
|
|
142
123
|
# print(con.sql("SELECT * FROM duckdb_settings() WHERE name like 's3%'"))
|
|
143
124
|
|
|
144
125
|
|
|
126
|
+
def setup_gcs_connection(con, server):
|
|
127
|
+
key_id = os.getenv("DATACONTRACT_GCS_KEY_ID")
|
|
128
|
+
secret = os.getenv("DATACONTRACT_GCS_SECRET")
|
|
129
|
+
|
|
130
|
+
if key_id is None:
|
|
131
|
+
raise ValueError("Error: Environment variable DATACONTRACT_GCS_KEY_ID is not set")
|
|
132
|
+
if secret is None:
|
|
133
|
+
raise ValueError("Error: Environment variable DATACONTRACT_GCS_SECRET is not set")
|
|
134
|
+
|
|
135
|
+
con.sql(f"""
|
|
136
|
+
CREATE SECRET gcs_secret (
|
|
137
|
+
TYPE GCS,
|
|
138
|
+
KEY_ID '{key_id}',
|
|
139
|
+
SECRET '{secret}'
|
|
140
|
+
);
|
|
141
|
+
""")
|
|
142
|
+
|
|
143
|
+
|
|
145
144
|
def setup_azure_connection(con, server):
|
|
146
145
|
tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
|
|
147
146
|
client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")
|