datacontract-cli 0.10.6__tar.gz → 0.10.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- {datacontract_cli-0.10.6/datacontract_cli.egg-info → datacontract_cli-0.10.7}/PKG-INFO +139 -67
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/README.md +133 -62
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/cli.py +8 -24
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/data_contract.py +33 -4
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/check_soda_execute.py +9 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/connections/kafka.py +3 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/avro_converter.py +14 -19
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/imports/glue_importer.py +4 -3
- datacontract_cli-0.10.7/datacontract/web.py +51 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7/datacontract_cli.egg-info}/PKG-INFO +139 -67
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract_cli.egg-info/SOURCES.txt +1 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract_cli.egg-info/requires.txt +5 -4
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/pyproject.toml +6 -12
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_jsonschema.py +5 -3
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_import_glue.py +32 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_azure_parquet_remote.py +1 -1
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_databricks.py +1 -1
- datacontract_cli-0.10.7/tests/test_test_dataframe.py +67 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_kafka.py +1 -5
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_postgres.py +5 -5
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_sqlserver.py +5 -5
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_web.py +17 -0
- datacontract_cli-0.10.6/datacontract/web.py +0 -14
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/LICENSE +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/MANIFEST.in +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/__init__.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/breaking/breaking.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/breaking/breaking_rules.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/catalog/catalog.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/__init__.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/datacontract/check_that_datacontract_file_exists.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/fastjsonschema/check_jsonschema.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/fastjsonschema/s3/s3_read_files.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/__init__.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/connections/bigquery.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/connections/dask.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/connections/databricks.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/connections/duckdb.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/connections/postgres.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/connections/snowflake.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/connections/sqlserver.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/avro_idl_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/bigquery_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/csv_type_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/dbml_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/dbt_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/go_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/great_expectations_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/html_export.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/jsonschema_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/odcs_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/protobuf_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/pydantic_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/rdf_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/sodacl_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/sql_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/sql_type_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/export/terraform_converter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/imports/avro_importer.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/imports/bigquery_importer.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/imports/jsonschema_importer.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/imports/sql_importer.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/init/download_datacontract_file.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/integration/publish_datamesh_manager.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/integration/publish_opentelemetry.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/files.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/lint.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/linters/__init__.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/linters/description_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/linters/example_model_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/linters/field_pattern_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/linters/field_reference_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/linters/notice_period_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/linters/quality_schema_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/linters/valid_constraints_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/resolve.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/schema.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/lint/urls.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/model/breaking_change.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/model/data_contract_specification.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/model/exceptions.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/model/run.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/publish/publish.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/py.typed +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/datacontract.html +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/index.html +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/partials/datacontract_information.html +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/partials/datacontract_servicelevels.html +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/partials/datacontract_terms.html +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/partials/definition.html +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/partials/example.html +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/partials/model_field.html +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/partials/server.html +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/templates/style/output.css +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract_cli.egg-info/dependency_links.txt +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract_cli.egg-info/entry_points.txt +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract_cli.egg-info/top_level.txt +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/setup.cfg +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_breaking.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_catalog.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_changelog.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_cli.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_description_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_documentation_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_download_datacontract_file.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_example_model_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_avro.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_avro_idl.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_bigquery.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_dbml.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_dbt_models.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_dbt_sources.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_dbt_staging_sql.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_go.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_great_expectations.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_html.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_odcs.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_protobuf.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_pydantic.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_rdf.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_sodacl.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_sql.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_sql_query.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_export_terraform.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_field_constraint_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_field_pattern_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_field_reference_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_import_avro.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_import_bigquery.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_import_jsonschema.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_import_sql.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_integration_datameshmanager.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_integration_opentelemetry.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_lint.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_notice_period_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_quality_schema_linter.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_schema.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_bigquery.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_examples_csv.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_examples_formats_valid.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_examples_inline.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_examples_json.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_examples_missing.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_kafka_remote.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_local_json.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_parquet.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_s3_csv.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_s3_delta.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_s3_json.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_s3_json_complex.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_s3_json_multiple_models.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_s3_json_remote.py +0 -0
- {datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/tests/test_test_snowflake.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.7
|
|
4
4
|
Summary: Test data contracts
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
|
|
6
6
|
Project-URL: Homepage, https://cli.datacontract.com
|
|
@@ -49,10 +49,11 @@ Requires-Dist: pytest; extra == "dev"
|
|
|
49
49
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
50
50
|
Requires-Dist: moto; extra == "dev"
|
|
51
51
|
Requires-Dist: pymssql==2.3.0; extra == "dev"
|
|
52
|
-
Requires-Dist:
|
|
53
|
-
Requires-Dist: testcontainers
|
|
54
|
-
Requires-Dist: testcontainers
|
|
55
|
-
Requires-Dist: testcontainers
|
|
52
|
+
Requires-Dist: kafka-python; extra == "dev"
|
|
53
|
+
Requires-Dist: testcontainers~=4.5.0; extra == "dev"
|
|
54
|
+
Requires-Dist: testcontainers[minio]; extra == "dev"
|
|
55
|
+
Requires-Dist: testcontainers[postgres]; extra == "dev"
|
|
56
|
+
Requires-Dist: testcontainers[kafka]; extra == "dev"
|
|
56
57
|
Requires-Dist: testcontainers[mssql]; extra == "dev"
|
|
57
58
|
|
|
58
59
|
# Data Contract CLI
|
|
@@ -333,8 +334,10 @@ Supported server types:
|
|
|
333
334
|
- [s3](#S3)
|
|
334
335
|
- [bigquery](#bigquery)
|
|
335
336
|
- [azure](#azure)
|
|
337
|
+
- [sqlserver](#sqlserver)
|
|
336
338
|
- [databricks](#databricks)
|
|
337
339
|
- [databricks (programmatic)](#databricks-programmatic)
|
|
340
|
+
- [dataframr (programmatic)](#dataframe-programmatic)
|
|
338
341
|
- [snowflake](#snowflake)
|
|
339
342
|
- [kafka](#kafka)
|
|
340
343
|
- [postgres](#postgres)
|
|
@@ -448,6 +451,43 @@ Authentication works with an Azure Service Principal (SPN) aka App Registration
|
|
|
448
451
|
|
|
449
452
|
|
|
450
453
|
|
|
454
|
+
### Sqlserver
|
|
455
|
+
|
|
456
|
+
Data Contract CLI can test data in MS SQL Server (including Azure SQL, Synapse Analytics SQL Pool).
|
|
457
|
+
|
|
458
|
+
#### Example
|
|
459
|
+
|
|
460
|
+
datacontract.yaml
|
|
461
|
+
```yaml
|
|
462
|
+
servers:
|
|
463
|
+
production:
|
|
464
|
+
type: sqlserver
|
|
465
|
+
host: localhost
|
|
466
|
+
port: 5432
|
|
467
|
+
database: tempdb
|
|
468
|
+
schema: dbo
|
|
469
|
+
driver: ODBC Driver 18 for SQL Server
|
|
470
|
+
models:
|
|
471
|
+
my_table_1: # corresponds to a table
|
|
472
|
+
type: table
|
|
473
|
+
fields:
|
|
474
|
+
my_column_1: # corresponds to a column
|
|
475
|
+
type: varchar
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
#### Environment Variables
|
|
479
|
+
|
|
480
|
+
| Environment Variable | Example | Description |
|
|
481
|
+
|----------------------------------|--------------------|-------------|
|
|
482
|
+
| `DATACONTRACT_SQLSERVER_USERNAME` | `root` | Username |
|
|
483
|
+
| `DATACONTRACT_SQLSERVER_PASSWORD` | `toor` | Password |
|
|
484
|
+
| `DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION` | `True` | Use windows authentication, instead of login |
|
|
485
|
+
| `DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE` | `True` | Trust self-signed certificate |
|
|
486
|
+
| `DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION` | `True` | Use SSL |
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
|
|
451
491
|
### Databricks
|
|
452
492
|
|
|
453
493
|
Works with Unity Catalog and Hive metastore.
|
|
@@ -516,6 +556,41 @@ run = data_contract.test()
|
|
|
516
556
|
run.result
|
|
517
557
|
```
|
|
518
558
|
|
|
559
|
+
### Dataframe (programmatic)
|
|
560
|
+
|
|
561
|
+
Works with Spark DataFrames.
|
|
562
|
+
DataFrames need to be created as named temporary views.
|
|
563
|
+
Multiple temporary views are suppored if your data contract contains multiple models.
|
|
564
|
+
|
|
565
|
+
Testing DataFrames is useful to test your datasets in a pipeline before writing them to a data source.
|
|
566
|
+
|
|
567
|
+
#### Example
|
|
568
|
+
|
|
569
|
+
datacontract.yaml
|
|
570
|
+
```yaml
|
|
571
|
+
servers:
|
|
572
|
+
production:
|
|
573
|
+
type: dataframe
|
|
574
|
+
models:
|
|
575
|
+
my_table: # corresponds to a temporary view
|
|
576
|
+
type: table
|
|
577
|
+
fields: ...
|
|
578
|
+
```
|
|
579
|
+
|
|
580
|
+
Example code
|
|
581
|
+
```python
|
|
582
|
+
from datacontract.data_contract import DataContract
|
|
583
|
+
|
|
584
|
+
df.createOrReplaceTempView("my_table")
|
|
585
|
+
|
|
586
|
+
data_contract = DataContract(
|
|
587
|
+
data_contract_file="datacontract.yaml",
|
|
588
|
+
spark=spark,
|
|
589
|
+
)
|
|
590
|
+
run = data_contract.test()
|
|
591
|
+
assert run.result == "passed"
|
|
592
|
+
```
|
|
593
|
+
|
|
519
594
|
|
|
520
595
|
### Snowflake
|
|
521
596
|
|
|
@@ -608,41 +683,6 @@ models:
|
|
|
608
683
|
|
|
609
684
|
|
|
610
685
|
|
|
611
|
-
### Postgres
|
|
612
|
-
|
|
613
|
-
Data Contract CLI can test data in Postgres or Postgres-compliant databases (e.g., RisingWave).
|
|
614
|
-
|
|
615
|
-
#### Example
|
|
616
|
-
|
|
617
|
-
datacontract.yaml
|
|
618
|
-
```yaml
|
|
619
|
-
servers:
|
|
620
|
-
postgres:
|
|
621
|
-
type: sqlserver
|
|
622
|
-
host: localhost
|
|
623
|
-
port: 5432
|
|
624
|
-
database: tempdb
|
|
625
|
-
schema: dbo
|
|
626
|
-
driver: ODBC Driver 18 for SQL Server
|
|
627
|
-
models:
|
|
628
|
-
my_table_1: # corresponds to a table
|
|
629
|
-
type: table
|
|
630
|
-
fields:
|
|
631
|
-
my_column_1: # corresponds to a column
|
|
632
|
-
type: varchar
|
|
633
|
-
```
|
|
634
|
-
|
|
635
|
-
#### Environment Variables
|
|
636
|
-
|
|
637
|
-
| Environment Variable | Example | Description |
|
|
638
|
-
|----------------------------------|--------------------|-------------|
|
|
639
|
-
| `DATACONTRACT_SQLSERVER_USERNAME` | `root` | Username |
|
|
640
|
-
| `DATACONTRACT_SQLSERVER_PASSWORD` | `toor` | Password |
|
|
641
|
-
| `DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION` | `True` | Use windows authentication, instead of login |
|
|
642
|
-
| `DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE` | `True` | Trust self-signed certificate |
|
|
643
|
-
| `DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION` | `True` | Use SSL |
|
|
644
|
-
|
|
645
|
-
|
|
646
686
|
|
|
647
687
|
### export
|
|
648
688
|
|
|
@@ -802,41 +842,30 @@ models:
|
|
|
802
842
|
```
|
|
803
843
|
Usage: datacontract import [OPTIONS]
|
|
804
844
|
|
|
805
|
-
Create a data contract from the given source location. Prints to stdout.
|
|
806
|
-
|
|
807
|
-
╭─ Options
|
|
808
|
-
│ * --format [sql|avro|glue|bigquery|jsonschema] The format of the source file. [default: None] [required]
|
|
809
|
-
│ --source TEXT
|
|
810
|
-
│
|
|
811
|
-
│ --
|
|
812
|
-
│
|
|
813
|
-
│
|
|
814
|
-
│
|
|
815
|
-
│ --
|
|
816
|
-
|
|
845
|
+
Create a data contract from the given source location. Prints to stdout.
|
|
846
|
+
|
|
847
|
+
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
848
|
+
│ * --format [sql|avro|glue|bigquery|jsonschema] The format of the source file. [default: None] [required] │
|
|
849
|
+
│ --source TEXT The path to the file or Glue Database that should be imported. │
|
|
850
|
+
│ [default: None] │
|
|
851
|
+
│ --glue-table TEXT List of table ids to import from the Glue Database (repeat for │
|
|
852
|
+
│ multiple table ids, leave empty for all tables in the dataset). │
|
|
853
|
+
│ [default: None] │
|
|
854
|
+
│ --bigquery-project TEXT The bigquery project id. [default: None] │
|
|
855
|
+
│ --bigquery-dataset TEXT The bigquery dataset id. [default: None] │
|
|
856
|
+
│ --bigquery-table TEXT List of table ids to import from the bigquery API (repeat for │
|
|
857
|
+
│ multiple table ids, leave empty for all tables in the dataset). │
|
|
858
|
+
│ [default: None] │
|
|
859
|
+
│ --help Show this message and exit. │
|
|
860
|
+
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
817
861
|
```
|
|
818
862
|
|
|
819
|
-
As shown, some options are only relevant in certain conditions: For `format` Bigtable we support to directly read off the Bigtable APIs.
|
|
820
|
-
In this case there's no need to specify `source` but instead `bt-project-id`, `bt-dataset-id` and `table` must be specified.
|
|
821
|
-
|
|
822
|
-
For providing authentication to the Client, please see [the google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to) or the one [about authorizing client libraries](https://cloud.google.com/bigquery/docs/authentication#client-libs).
|
|
823
|
-
|
|
824
863
|
Example:
|
|
825
864
|
```bash
|
|
826
865
|
# Example import from SQL DDL
|
|
827
866
|
datacontract import --format sql --source my_ddl.sql
|
|
828
867
|
```
|
|
829
868
|
|
|
830
|
-
```bash
|
|
831
|
-
# Example import from Bigquery JSON
|
|
832
|
-
datacontract import --format bigquery --source my_bigquery_table.json
|
|
833
|
-
```
|
|
834
|
-
|
|
835
|
-
```bash
|
|
836
|
-
# Example import from Bigquery API
|
|
837
|
-
datacontract import --format bigquery --btProjectId <project_id> --btDatasetId <dataset_id> --table <tableid_1> --table <tableid_2> --table <tableid_3>
|
|
838
|
-
```
|
|
839
|
-
|
|
840
869
|
Available import options:
|
|
841
870
|
|
|
842
871
|
| Type | Description | Status |
|
|
@@ -852,6 +881,49 @@ Available import options:
|
|
|
852
881
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
853
882
|
|
|
854
883
|
|
|
884
|
+
#### BigQuery
|
|
885
|
+
|
|
886
|
+
Bigquery data can either be imported off of JSON Files generated from the table descriptions or directly from the Bigquery API. In case you want to use JSON Files, specify the `source` parameter with a path to the JSON File.
|
|
887
|
+
|
|
888
|
+
To import from the Bigquery API, you have to _omit_ `source` and instead need to provide `bigquery-project` and `bigquery-dataset`. Additionally you may specify `bigquery-table` to enumerate the tables that should be imported. If no tables are given, _all_ available tables of the dataset will be imported.
|
|
889
|
+
|
|
890
|
+
For providing authentication to the Client, please see [the google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to) or the one [about authorizing client libraries](https://cloud.google.com/bigquery/docs/authentication#client-libs).
|
|
891
|
+
|
|
892
|
+
Examples:
|
|
893
|
+
|
|
894
|
+
```bash
|
|
895
|
+
# Example import from Bigquery JSON
|
|
896
|
+
datacontract import --format bigquery --source my_bigquery_table.json
|
|
897
|
+
```
|
|
898
|
+
|
|
899
|
+
```bash
|
|
900
|
+
# Example import from Bigquery API with specifying the tables to import
|
|
901
|
+
datacontract import --format bigquery --bigquery-project <project_id> --bigquery-dataset <dataset_id> --bigquery-table <tableid_1> --bigquery-table <tableid_2> --bigquery-table <tableid_3>
|
|
902
|
+
```
|
|
903
|
+
|
|
904
|
+
```bash
|
|
905
|
+
# Example import from Bigquery API importing all tables in the dataset
|
|
906
|
+
datacontract import --format bigquery --bigquery-project <project_id> --bigquery-dataset <dataset_id>
|
|
907
|
+
```
|
|
908
|
+
|
|
909
|
+
### Glue
|
|
910
|
+
|
|
911
|
+
Importing from Glue reads the necessary Data directly off of the AWS API.
|
|
912
|
+
You may give the `glue-table` parameter to enumerate the tables that should be imported. If no tables are given, _all_ available tables of the database will be imported.
|
|
913
|
+
|
|
914
|
+
Examples:
|
|
915
|
+
|
|
916
|
+
```bash
|
|
917
|
+
# Example import from AWS Glue with specifying the tables to import
|
|
918
|
+
datacontract import --format glue --source <database_name> --glue-table <table_name_1> --glue-table <table_name_2> --glue-table <table_name_3>
|
|
919
|
+
```
|
|
920
|
+
|
|
921
|
+
```bash
|
|
922
|
+
# Example import from AWS Glue importing all tables in the database
|
|
923
|
+
datacontract import --format glue --source <database_name>
|
|
924
|
+
```
|
|
925
|
+
|
|
926
|
+
|
|
855
927
|
### breaking
|
|
856
928
|
|
|
857
929
|
```
|
|
@@ -276,8 +276,10 @@ Supported server types:
|
|
|
276
276
|
- [s3](#S3)
|
|
277
277
|
- [bigquery](#bigquery)
|
|
278
278
|
- [azure](#azure)
|
|
279
|
+
- [sqlserver](#sqlserver)
|
|
279
280
|
- [databricks](#databricks)
|
|
280
281
|
- [databricks (programmatic)](#databricks-programmatic)
|
|
282
|
+
- [dataframr (programmatic)](#dataframe-programmatic)
|
|
281
283
|
- [snowflake](#snowflake)
|
|
282
284
|
- [kafka](#kafka)
|
|
283
285
|
- [postgres](#postgres)
|
|
@@ -391,6 +393,43 @@ Authentication works with an Azure Service Principal (SPN) aka App Registration
|
|
|
391
393
|
|
|
392
394
|
|
|
393
395
|
|
|
396
|
+
### Sqlserver
|
|
397
|
+
|
|
398
|
+
Data Contract CLI can test data in MS SQL Server (including Azure SQL, Synapse Analytics SQL Pool).
|
|
399
|
+
|
|
400
|
+
#### Example
|
|
401
|
+
|
|
402
|
+
datacontract.yaml
|
|
403
|
+
```yaml
|
|
404
|
+
servers:
|
|
405
|
+
production:
|
|
406
|
+
type: sqlserver
|
|
407
|
+
host: localhost
|
|
408
|
+
port: 5432
|
|
409
|
+
database: tempdb
|
|
410
|
+
schema: dbo
|
|
411
|
+
driver: ODBC Driver 18 for SQL Server
|
|
412
|
+
models:
|
|
413
|
+
my_table_1: # corresponds to a table
|
|
414
|
+
type: table
|
|
415
|
+
fields:
|
|
416
|
+
my_column_1: # corresponds to a column
|
|
417
|
+
type: varchar
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
#### Environment Variables
|
|
421
|
+
|
|
422
|
+
| Environment Variable | Example | Description |
|
|
423
|
+
|----------------------------------|--------------------|-------------|
|
|
424
|
+
| `DATACONTRACT_SQLSERVER_USERNAME` | `root` | Username |
|
|
425
|
+
| `DATACONTRACT_SQLSERVER_PASSWORD` | `toor` | Password |
|
|
426
|
+
| `DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION` | `True` | Use windows authentication, instead of login |
|
|
427
|
+
| `DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE` | `True` | Trust self-signed certificate |
|
|
428
|
+
| `DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION` | `True` | Use SSL |
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
|
|
394
433
|
### Databricks
|
|
395
434
|
|
|
396
435
|
Works with Unity Catalog and Hive metastore.
|
|
@@ -459,6 +498,41 @@ run = data_contract.test()
|
|
|
459
498
|
run.result
|
|
460
499
|
```
|
|
461
500
|
|
|
501
|
+
### Dataframe (programmatic)
|
|
502
|
+
|
|
503
|
+
Works with Spark DataFrames.
|
|
504
|
+
DataFrames need to be created as named temporary views.
|
|
505
|
+
Multiple temporary views are suppored if your data contract contains multiple models.
|
|
506
|
+
|
|
507
|
+
Testing DataFrames is useful to test your datasets in a pipeline before writing them to a data source.
|
|
508
|
+
|
|
509
|
+
#### Example
|
|
510
|
+
|
|
511
|
+
datacontract.yaml
|
|
512
|
+
```yaml
|
|
513
|
+
servers:
|
|
514
|
+
production:
|
|
515
|
+
type: dataframe
|
|
516
|
+
models:
|
|
517
|
+
my_table: # corresponds to a temporary view
|
|
518
|
+
type: table
|
|
519
|
+
fields: ...
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
Example code
|
|
523
|
+
```python
|
|
524
|
+
from datacontract.data_contract import DataContract
|
|
525
|
+
|
|
526
|
+
df.createOrReplaceTempView("my_table")
|
|
527
|
+
|
|
528
|
+
data_contract = DataContract(
|
|
529
|
+
data_contract_file="datacontract.yaml",
|
|
530
|
+
spark=spark,
|
|
531
|
+
)
|
|
532
|
+
run = data_contract.test()
|
|
533
|
+
assert run.result == "passed"
|
|
534
|
+
```
|
|
535
|
+
|
|
462
536
|
|
|
463
537
|
### Snowflake
|
|
464
538
|
|
|
@@ -551,41 +625,6 @@ models:
|
|
|
551
625
|
|
|
552
626
|
|
|
553
627
|
|
|
554
|
-
### Postgres
|
|
555
|
-
|
|
556
|
-
Data Contract CLI can test data in Postgres or Postgres-compliant databases (e.g., RisingWave).
|
|
557
|
-
|
|
558
|
-
#### Example
|
|
559
|
-
|
|
560
|
-
datacontract.yaml
|
|
561
|
-
```yaml
|
|
562
|
-
servers:
|
|
563
|
-
postgres:
|
|
564
|
-
type: sqlserver
|
|
565
|
-
host: localhost
|
|
566
|
-
port: 5432
|
|
567
|
-
database: tempdb
|
|
568
|
-
schema: dbo
|
|
569
|
-
driver: ODBC Driver 18 for SQL Server
|
|
570
|
-
models:
|
|
571
|
-
my_table_1: # corresponds to a table
|
|
572
|
-
type: table
|
|
573
|
-
fields:
|
|
574
|
-
my_column_1: # corresponds to a column
|
|
575
|
-
type: varchar
|
|
576
|
-
```
|
|
577
|
-
|
|
578
|
-
#### Environment Variables
|
|
579
|
-
|
|
580
|
-
| Environment Variable | Example | Description |
|
|
581
|
-
|----------------------------------|--------------------|-------------|
|
|
582
|
-
| `DATACONTRACT_SQLSERVER_USERNAME` | `root` | Username |
|
|
583
|
-
| `DATACONTRACT_SQLSERVER_PASSWORD` | `toor` | Password |
|
|
584
|
-
| `DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION` | `True` | Use windows authentication, instead of login |
|
|
585
|
-
| `DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE` | `True` | Trust self-signed certificate |
|
|
586
|
-
| `DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION` | `True` | Use SSL |
|
|
587
|
-
|
|
588
|
-
|
|
589
628
|
|
|
590
629
|
### export
|
|
591
630
|
|
|
@@ -745,41 +784,30 @@ models:
|
|
|
745
784
|
```
|
|
746
785
|
Usage: datacontract import [OPTIONS]
|
|
747
786
|
|
|
748
|
-
Create a data contract from the given source location. Prints to stdout.
|
|
749
|
-
|
|
750
|
-
╭─ Options
|
|
751
|
-
│ * --format [sql|avro|glue|bigquery|jsonschema] The format of the source file. [default: None] [required]
|
|
752
|
-
│ --source TEXT
|
|
753
|
-
│
|
|
754
|
-
│ --
|
|
755
|
-
│
|
|
756
|
-
│
|
|
757
|
-
│
|
|
758
|
-
│ --
|
|
759
|
-
|
|
787
|
+
Create a data contract from the given source location. Prints to stdout.
|
|
788
|
+
|
|
789
|
+
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
790
|
+
│ * --format [sql|avro|glue|bigquery|jsonschema] The format of the source file. [default: None] [required] │
|
|
791
|
+
│ --source TEXT The path to the file or Glue Database that should be imported. │
|
|
792
|
+
│ [default: None] │
|
|
793
|
+
│ --glue-table TEXT List of table ids to import from the Glue Database (repeat for │
|
|
794
|
+
│ multiple table ids, leave empty for all tables in the dataset). │
|
|
795
|
+
│ [default: None] │
|
|
796
|
+
│ --bigquery-project TEXT The bigquery project id. [default: None] │
|
|
797
|
+
│ --bigquery-dataset TEXT The bigquery dataset id. [default: None] │
|
|
798
|
+
│ --bigquery-table TEXT List of table ids to import from the bigquery API (repeat for │
|
|
799
|
+
│ multiple table ids, leave empty for all tables in the dataset). │
|
|
800
|
+
│ [default: None] │
|
|
801
|
+
│ --help Show this message and exit. │
|
|
802
|
+
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
760
803
|
```
|
|
761
804
|
|
|
762
|
-
As shown, some options are only relevant in certain conditions: For `format` Bigtable we support to directly read off the Bigtable APIs.
|
|
763
|
-
In this case there's no need to specify `source` but instead `bt-project-id`, `bt-dataset-id` and `table` must be specified.
|
|
764
|
-
|
|
765
|
-
For providing authentication to the Client, please see [the google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to) or the one [about authorizing client libraries](https://cloud.google.com/bigquery/docs/authentication#client-libs).
|
|
766
|
-
|
|
767
805
|
Example:
|
|
768
806
|
```bash
|
|
769
807
|
# Example import from SQL DDL
|
|
770
808
|
datacontract import --format sql --source my_ddl.sql
|
|
771
809
|
```
|
|
772
810
|
|
|
773
|
-
```bash
|
|
774
|
-
# Example import from Bigquery JSON
|
|
775
|
-
datacontract import --format bigquery --source my_bigquery_table.json
|
|
776
|
-
```
|
|
777
|
-
|
|
778
|
-
```bash
|
|
779
|
-
# Example import from Bigquery API
|
|
780
|
-
datacontract import --format bigquery --btProjectId <project_id> --btDatasetId <dataset_id> --table <tableid_1> --table <tableid_2> --table <tableid_3>
|
|
781
|
-
```
|
|
782
|
-
|
|
783
811
|
Available import options:
|
|
784
812
|
|
|
785
813
|
| Type | Description | Status |
|
|
@@ -795,6 +823,49 @@ Available import options:
|
|
|
795
823
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
796
824
|
|
|
797
825
|
|
|
826
|
+
#### BigQuery
|
|
827
|
+
|
|
828
|
+
Bigquery data can either be imported off of JSON Files generated from the table descriptions or directly from the Bigquery API. In case you want to use JSON Files, specify the `source` parameter with a path to the JSON File.
|
|
829
|
+
|
|
830
|
+
To import from the Bigquery API, you have to _omit_ `source` and instead need to provide `bigquery-project` and `bigquery-dataset`. Additionally you may specify `bigquery-table` to enumerate the tables that should be imported. If no tables are given, _all_ available tables of the dataset will be imported.
|
|
831
|
+
|
|
832
|
+
For providing authentication to the Client, please see [the google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to) or the one [about authorizing client libraries](https://cloud.google.com/bigquery/docs/authentication#client-libs).
|
|
833
|
+
|
|
834
|
+
Examples:
|
|
835
|
+
|
|
836
|
+
```bash
|
|
837
|
+
# Example import from Bigquery JSON
|
|
838
|
+
datacontract import --format bigquery --source my_bigquery_table.json
|
|
839
|
+
```
|
|
840
|
+
|
|
841
|
+
```bash
|
|
842
|
+
# Example import from Bigquery API with specifying the tables to import
|
|
843
|
+
datacontract import --format bigquery --bigquery-project <project_id> --bigquery-dataset <dataset_id> --bigquery-table <tableid_1> --bigquery-table <tableid_2> --bigquery-table <tableid_3>
|
|
844
|
+
```
|
|
845
|
+
|
|
846
|
+
```bash
|
|
847
|
+
# Example import from Bigquery API importing all tables in the dataset
|
|
848
|
+
datacontract import --format bigquery --bigquery-project <project_id> --bigquery-dataset <dataset_id>
|
|
849
|
+
```
|
|
850
|
+
|
|
851
|
+
### Glue
|
|
852
|
+
|
|
853
|
+
Importing from Glue reads the necessary Data directly off of the AWS API.
|
|
854
|
+
You may give the `glue-table` parameter to enumerate the tables that should be imported. If no tables are given, _all_ available tables of the database will be imported.
|
|
855
|
+
|
|
856
|
+
Examples:
|
|
857
|
+
|
|
858
|
+
```bash
|
|
859
|
+
# Example import from AWS Glue with specifying the tables to import
|
|
860
|
+
datacontract import --format glue --source <database_name> --glue-table <table_name_1> --glue-table <table_name_2> --glue-table <table_name_3>
|
|
861
|
+
```
|
|
862
|
+
|
|
863
|
+
```bash
|
|
864
|
+
# Example import from AWS Glue importing all tables in the database
|
|
865
|
+
datacontract import --format glue --source <database_name>
|
|
866
|
+
```
|
|
867
|
+
|
|
868
|
+
|
|
798
869
|
### breaking
|
|
799
870
|
|
|
800
871
|
```
|
|
@@ -13,7 +13,7 @@ from typer.core import TyperGroup
|
|
|
13
13
|
from typing_extensions import Annotated
|
|
14
14
|
|
|
15
15
|
from datacontract.catalog.catalog import create_index_html, create_data_contract_html
|
|
16
|
-
from datacontract.data_contract import DataContract
|
|
16
|
+
from datacontract.data_contract import DataContract, ExportFormat
|
|
17
17
|
from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
|
|
18
18
|
from datacontract.publish.publish import publish_to_datamesh_manager
|
|
19
19
|
|
|
@@ -141,28 +141,6 @@ def test(
|
|
|
141
141
|
_handle_result(run)
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
class ExportFormat(str, Enum):
|
|
145
|
-
jsonschema = "jsonschema"
|
|
146
|
-
pydantic_model = "pydantic-model"
|
|
147
|
-
sodacl = "sodacl"
|
|
148
|
-
dbt = "dbt"
|
|
149
|
-
dbt_sources = "dbt-sources"
|
|
150
|
-
dbt_staging_sql = "dbt-staging-sql"
|
|
151
|
-
odcs = "odcs"
|
|
152
|
-
rdf = "rdf"
|
|
153
|
-
avro = "avro"
|
|
154
|
-
protobuf = "protobuf"
|
|
155
|
-
great_expectations = "great-expectations"
|
|
156
|
-
terraform = "terraform"
|
|
157
|
-
avro_idl = "avro-idl"
|
|
158
|
-
sql = "sql"
|
|
159
|
-
sql_query = "sql-query"
|
|
160
|
-
html = "html"
|
|
161
|
-
go = "go"
|
|
162
|
-
bigquery = "bigquery"
|
|
163
|
-
dbml = "dbml"
|
|
164
|
-
|
|
165
|
-
|
|
166
144
|
@app.command()
|
|
167
145
|
def export(
|
|
168
146
|
format: Annotated[ExportFormat, typer.Option(help="The export format.")],
|
|
@@ -231,6 +209,12 @@ def import_(
|
|
|
231
209
|
source: Annotated[
|
|
232
210
|
Optional[str], typer.Option(help="The path to the file or Glue Database that should be imported.")
|
|
233
211
|
] = None,
|
|
212
|
+
glue_table: Annotated[
|
|
213
|
+
Optional[List[str]],
|
|
214
|
+
typer.Option(
|
|
215
|
+
help="List of table ids to import from the Glue Database (repeat for multiple table ids, leave empty for all tables in the dataset)."
|
|
216
|
+
),
|
|
217
|
+
] = None,
|
|
234
218
|
bigquery_project: Annotated[Optional[str], typer.Option(help="The bigquery project id.")] = None,
|
|
235
219
|
bigquery_dataset: Annotated[Optional[str], typer.Option(help="The bigquery dataset id.")] = None,
|
|
236
220
|
bigquery_table: Annotated[
|
|
@@ -243,7 +227,7 @@ def import_(
|
|
|
243
227
|
"""
|
|
244
228
|
Create a data contract from the given source location. Prints to stdout.
|
|
245
229
|
"""
|
|
246
|
-
result = DataContract().import_from_source(format, source, bigquery_table, bigquery_project, bigquery_dataset)
|
|
230
|
+
result = DataContract().import_from_source(format, source, glue_table, bigquery_table, bigquery_project, bigquery_dataset)
|
|
247
231
|
console.print(result.to_yaml())
|
|
248
232
|
|
|
249
233
|
|
|
@@ -2,6 +2,7 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import tempfile
|
|
4
4
|
import typing
|
|
5
|
+
from enum import Enum
|
|
5
6
|
|
|
6
7
|
import yaml
|
|
7
8
|
from pyspark.sql import SparkSession
|
|
@@ -49,6 +50,28 @@ from datacontract.model.exceptions import DataContractException
|
|
|
49
50
|
from datacontract.model.run import Run, Check
|
|
50
51
|
|
|
51
52
|
|
|
53
|
+
class ExportFormat(str, Enum):
|
|
54
|
+
jsonschema = "jsonschema"
|
|
55
|
+
pydantic_model = "pydantic-model"
|
|
56
|
+
sodacl = "sodacl"
|
|
57
|
+
dbt = "dbt"
|
|
58
|
+
dbt_sources = "dbt-sources"
|
|
59
|
+
dbt_staging_sql = "dbt-staging-sql"
|
|
60
|
+
odcs = "odcs"
|
|
61
|
+
rdf = "rdf"
|
|
62
|
+
avro = "avro"
|
|
63
|
+
protobuf = "protobuf"
|
|
64
|
+
great_expectations = "great-expectations"
|
|
65
|
+
terraform = "terraform"
|
|
66
|
+
avro_idl = "avro-idl"
|
|
67
|
+
sql = "sql"
|
|
68
|
+
sql_query = "sql-query"
|
|
69
|
+
html = "html"
|
|
70
|
+
go = "go"
|
|
71
|
+
bigquery = "bigquery"
|
|
72
|
+
dbml = "dbml"
|
|
73
|
+
|
|
74
|
+
|
|
52
75
|
class DataContract:
|
|
53
76
|
def __init__(
|
|
54
77
|
self,
|
|
@@ -195,10 +218,13 @@ class DataContract:
|
|
|
195
218
|
run.outputPortId = server.outputPortId
|
|
196
219
|
run.server = server_name
|
|
197
220
|
|
|
198
|
-
#
|
|
199
|
-
|
|
221
|
+
# TODO check server is supported type for nicer error messages
|
|
222
|
+
|
|
223
|
+
# TODO check server credentials are complete for nicer error messages
|
|
224
|
+
|
|
200
225
|
if server.format == "json" and server.type != "kafka":
|
|
201
226
|
check_jsonschema(run, data_contract, server)
|
|
227
|
+
|
|
202
228
|
check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
|
|
203
229
|
|
|
204
230
|
except DataContractException as e:
|
|
@@ -275,7 +301,9 @@ class DataContract:
|
|
|
275
301
|
inline_quality=self._inline_quality,
|
|
276
302
|
)
|
|
277
303
|
|
|
278
|
-
def export(
|
|
304
|
+
def export(
|
|
305
|
+
self, export_format: ExportFormat, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto"
|
|
306
|
+
) -> str:
|
|
279
307
|
data_contract = resolve.resolve_data_contract(
|
|
280
308
|
self._data_contract_file,
|
|
281
309
|
self._data_contract_str,
|
|
@@ -422,6 +450,7 @@ class DataContract:
|
|
|
422
450
|
self,
|
|
423
451
|
format: str,
|
|
424
452
|
source: typing.Optional[str] = None,
|
|
453
|
+
glue_tables: typing.Optional[typing.List[str]] = None,
|
|
425
454
|
bigquery_tables: typing.Optional[typing.List[str]] = None,
|
|
426
455
|
bigquery_project: typing.Optional[str] = None,
|
|
427
456
|
bigquery_dataset: typing.Optional[str] = None,
|
|
@@ -433,7 +462,7 @@ class DataContract:
|
|
|
433
462
|
elif format == "avro":
|
|
434
463
|
data_contract_specification = import_avro(data_contract_specification, source)
|
|
435
464
|
elif format == "glue":
|
|
436
|
-
data_contract_specification = import_glue(data_contract_specification, source)
|
|
465
|
+
data_contract_specification = import_glue(data_contract_specification, source, glue_tables)
|
|
437
466
|
elif format == "jsonschema":
|
|
438
467
|
data_contract_specification = import_jsonschema(data_contract_specification, source)
|
|
439
468
|
elif format == "bigquery":
|
{datacontract_cli-0.10.6 → datacontract_cli-0.10.7}/datacontract/engines/soda/check_soda_execute.py
RENAMED
|
@@ -64,6 +64,15 @@ def check_soda_execute(
|
|
|
64
64
|
soda_configuration_str = to_databricks_soda_configuration(server)
|
|
65
65
|
scan.add_configuration_yaml_str(soda_configuration_str)
|
|
66
66
|
scan.set_data_source_name(server.type)
|
|
67
|
+
elif server.type == "dataframe":
|
|
68
|
+
if spark is None:
|
|
69
|
+
run.log_warn("Server type dataframe only works with the Python library and requires a Spark session, "
|
|
70
|
+
"please provide one with the DataContract class")
|
|
71
|
+
return
|
|
72
|
+
else:
|
|
73
|
+
logging.info("Use Spark to connect to data source")
|
|
74
|
+
scan.add_spark_session(spark, data_source_name="datacontract-cli")
|
|
75
|
+
scan.set_data_source_name("datacontract-cli")
|
|
67
76
|
elif server.type == "kafka":
|
|
68
77
|
if spark is None:
|
|
69
78
|
spark = create_spark_session(tmp_dir)
|