datacontract-cli 0.10.12__tar.gz → 0.10.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- {datacontract_cli-0.10.12/datacontract_cli.egg-info → datacontract_cli-0.10.13}/PKG-INFO +27 -11
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/README.md +16 -1
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/kafka.py +3 -2
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/bigquery_converter.py +30 -23
- datacontract_cli-0.10.13/datacontract/export/data_caterer_converter.py +148 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/exporter.py +1 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/exporter_factory.py +6 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/jsonschema_converter.py +13 -2
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/spark_converter.py +1 -1
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/sql_type_converter.py +1 -10
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/spark_importer.py +5 -1
- datacontract_cli-0.10.13/datacontract/imports/unity_importer.py +173 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/resolve.py +10 -1
- datacontract_cli-0.10.13/datacontract/lint/urls.py +54 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13/datacontract_cli.egg-info}/PKG-INFO +27 -11
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract_cli.egg-info/SOURCES.txt +2 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract_cli.egg-info/requires.txt +10 -9
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/pyproject.toml +11 -10
- datacontract_cli-0.10.13/tests/test_export_data_caterer.py +90 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_jsonschema.py +4 -4
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_unity_file.py +30 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_examples_inline.py +1 -2
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_kafka.py +13 -26
- datacontract_cli-0.10.13/tests/test_web.py +37 -0
- datacontract_cli-0.10.12/datacontract/imports/unity_importer.py +0 -152
- datacontract_cli-0.10.12/datacontract/lint/urls.py +0 -40
- datacontract_cli-0.10.12/tests/test_web.py +0 -34
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/LICENSE +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/MANIFEST.in +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/__init__.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/breaking/breaking.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/breaking/breaking_rules.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/catalog/catalog.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/cli.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/data_contract.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/__init__.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/datacontract/check_that_datacontract_file_exists.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/fastjsonschema/check_jsonschema.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/fastjsonschema/s3/s3_read_files.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/__init__.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/check_soda_execute.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/bigquery.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/dask.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/databricks.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/duckdb.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/postgres.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/snowflake.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/sqlserver.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/trino.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/__init__.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/avro_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/avro_idl_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/csv_type_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/dbml_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/dbt_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/go_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/great_expectations_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/html_export.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/odcs_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/protobuf_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/pydantic_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/rdf_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/sodacl_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/sql_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/sqlalchemy_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/terraform_converter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/avro_importer.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/bigquery_importer.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/dbml_importer.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/dbt_importer.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/glue_importer.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/importer.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/importer_factory.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/jsonschema_importer.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/odcs_importer.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/sql_importer.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/init/download_datacontract_file.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/integration/datamesh_manager.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/integration/opentelemetry.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/files.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/lint.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/linters/__init__.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/linters/description_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/linters/example_model_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/linters/field_pattern_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/linters/field_reference_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/linters/notice_period_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/linters/quality_schema_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/linters/valid_constraints_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/lint/schema.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/model/breaking_change.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/model/data_contract_specification.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/model/exceptions.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/model/run.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/py.typed +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/datacontract.html +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/index.html +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/partials/datacontract_information.html +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/partials/datacontract_servicelevels.html +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/partials/datacontract_terms.html +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/partials/definition.html +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/partials/example.html +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/partials/model_field.html +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/partials/server.html +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/templates/style/output.css +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/web.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract_cli.egg-info/dependency_links.txt +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract_cli.egg-info/entry_points.txt +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract_cli.egg-info/top_level.txt +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/setup.cfg +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_breaking.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_catalog.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_changelog.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_cli.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_description_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_documentation_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_download_datacontract_file.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_example_model_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_avro.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_avro_idl.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_bigquery.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_complex_data_contract.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_custom_exporter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_dbml.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_dbt_models.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_dbt_sources.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_dbt_staging_sql.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_go.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_great_expectations.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_html.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_odcs.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_protobuf.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_pydantic.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_rdf.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_sodacl.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_spark.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_sql.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_sql_query.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_sqlalchemy.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_export_terraform.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_field_constraint_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_field_pattern_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_field_reference_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_avro.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_bigquery.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_dbml.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_dbt.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_glue.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_jsonschema.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_odcs.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_spark.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_import_sql.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_integration_datameshmanager.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_integration_opentelemetry.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_lint.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_notice_period_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_quality_schema_linter.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_resolve.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_roundtrip_jsonschema.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_spec_fields_field.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_spec_ref.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_azure_parquet_remote.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_bigquery.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_databricks.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_dataframe.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_delta.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_examples_csv.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_examples_formats_valid.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_examples_json.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_examples_missing.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_gcs_json_remote.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_kafka_remote.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_local_json.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_parquet.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_postgres.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_s3_csv.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_s3_delta.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_s3_json.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_s3_json_complex.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_s3_json_multiple_models.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_s3_json_remote.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_snowflake.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_sqlserver.py +0 -0
- {datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/tests/test_test_trino.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.13
|
|
4
4
|
Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
|
|
6
6
|
Project-URL: Homepage, https://cli.datacontract.com
|
|
@@ -12,16 +12,16 @@ Requires-Python: >=3.10
|
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: typer<0.13,>=0.12
|
|
15
|
-
Requires-Dist: pydantic<2.
|
|
15
|
+
Requires-Dist: pydantic<2.10.0,>=2.8.2
|
|
16
16
|
Requires-Dist: pyyaml~=6.0.1
|
|
17
17
|
Requires-Dist: requests<2.33,>=2.31
|
|
18
|
-
Requires-Dist: fastapi==0.
|
|
19
|
-
Requires-Dist: uvicorn==0.30.
|
|
18
|
+
Requires-Dist: fastapi==0.114.2
|
|
19
|
+
Requires-Dist: uvicorn==0.30.6
|
|
20
20
|
Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
|
|
21
21
|
Requires-Dist: fastparquet==2024.5.0
|
|
22
22
|
Requires-Dist: python-multipart==0.0.9
|
|
23
|
-
Requires-Dist: rich
|
|
24
|
-
Requires-Dist: simple-ddl-parser==1.6.
|
|
23
|
+
Requires-Dist: rich<13.9,>=13.7
|
|
24
|
+
Requires-Dist: simple-ddl-parser==1.6.1
|
|
25
25
|
Requires-Dist: duckdb==1.0.0
|
|
26
26
|
Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.1
|
|
27
27
|
Requires-Dist: setuptools>=60
|
|
@@ -29,7 +29,7 @@ Requires-Dist: python-dotenv~=1.0.0
|
|
|
29
29
|
Requires-Dist: rdflib==7.0.0
|
|
30
30
|
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
|
|
31
31
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
|
|
32
|
-
Requires-Dist: boto3<1.35.
|
|
32
|
+
Requires-Dist: boto3<1.35.20,>=1.34.41
|
|
33
33
|
Requires-Dist: jinja_partials>=0.2.1
|
|
34
34
|
Provides-Extra: avro
|
|
35
35
|
Requires-Dist: avro==1.12.0; extra == "avro"
|
|
@@ -37,7 +37,8 @@ Provides-Extra: bigquery
|
|
|
37
37
|
Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.1; extra == "bigquery"
|
|
38
38
|
Provides-Extra: databricks
|
|
39
39
|
Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "databricks"
|
|
40
|
-
Requires-Dist: databricks-sql-connector<3.
|
|
40
|
+
Requires-Dist: databricks-sql-connector<3.5.0,>=3.1.2; extra == "databricks"
|
|
41
|
+
Requires-Dist: databricks-sdk<0.33.0,>=0.32.0; extra == "databricks"
|
|
41
42
|
Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.1; extra == "databricks"
|
|
42
43
|
Provides-Extra: kafka
|
|
43
44
|
Requires-Dist: datacontract-cli[avro]; extra == "kafka"
|
|
@@ -45,7 +46,7 @@ Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
|
|
|
45
46
|
Provides-Extra: postgres
|
|
46
47
|
Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
|
|
47
48
|
Provides-Extra: s3
|
|
48
|
-
Requires-Dist: s3fs==2024.
|
|
49
|
+
Requires-Dist: s3fs==2024.9.0; extra == "s3"
|
|
49
50
|
Provides-Extra: snowflake
|
|
50
51
|
Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
|
|
51
52
|
Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
|
|
@@ -63,7 +64,7 @@ Provides-Extra: dev
|
|
|
63
64
|
Requires-Dist: datacontract-cli[all]; extra == "dev"
|
|
64
65
|
Requires-Dist: httpx==0.27.2; extra == "dev"
|
|
65
66
|
Requires-Dist: kafka-python; extra == "dev"
|
|
66
|
-
Requires-Dist: moto==5.0.
|
|
67
|
+
Requires-Dist: moto==5.0.14; extra == "dev"
|
|
67
68
|
Requires-Dist: pandas>=2.1.0; extra == "dev"
|
|
68
69
|
Requires-Dist: pre-commit<3.9.0,>=3.7.1; extra == "dev"
|
|
69
70
|
Requires-Dist: pyarrow>=12.0.0; extra == "dev"
|
|
@@ -825,7 +826,7 @@ models:
|
|
|
825
826
|
│ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
|
|
826
827
|
│ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
|
|
827
828
|
│ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
|
|
828
|
-
│ y|dbml|spark|sqlalchemy]
|
|
829
|
+
│ y|dbml|spark|sqlalchemy|data-caterer] │
|
|
829
830
|
│ --output PATH Specify the file path where the exported data will be │
|
|
830
831
|
│ saved. If no path is provided, the output will be │
|
|
831
832
|
│ printed to stdout. │
|
|
@@ -877,6 +878,7 @@ Available export options:
|
|
|
877
878
|
| `DBML` | Export to a DBML Diagram description | ✅ |
|
|
878
879
|
| `spark` | Export to a Spark StructType | ✅ |
|
|
879
880
|
| `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
|
|
881
|
+
| `data-caterer` | Export to Data Caterer in YAML format | ✅ |
|
|
880
882
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
881
883
|
|
|
882
884
|
#### Great Expectations
|
|
@@ -940,6 +942,20 @@ To specify custom Avro properties in your data contract, you can define them wit
|
|
|
940
942
|
|
|
941
943
|
>NOTE: At this moment, we just support [logicalType](https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types) and [default](https://avro.apache.org/docs/1.11.0/spec.htm)
|
|
942
944
|
|
|
945
|
+
#### Data Caterer
|
|
946
|
+
|
|
947
|
+
The export function converts the data contract to a data generation task in YAML format that can be
|
|
948
|
+
ingested by [Data Caterer](https://github.com/data-catering/data-caterer). This gives you the
|
|
949
|
+
ability to generate production-like data in any environment based off your data contract.
|
|
950
|
+
|
|
951
|
+
```shell
|
|
952
|
+
datacontract export datacontract.yaml --format data-caterer --model orders
|
|
953
|
+
```
|
|
954
|
+
|
|
955
|
+
You can further customise the way data is generated via adding
|
|
956
|
+
[additional metadata in the YAML](https://data.catering/setup/generator/data-generator/)
|
|
957
|
+
to suit your needs.
|
|
958
|
+
|
|
943
959
|
#### Example Configuration
|
|
944
960
|
|
|
945
961
|
```yaml
|
|
@@ -749,7 +749,7 @@ models:
|
|
|
749
749
|
│ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
|
|
750
750
|
│ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
|
|
751
751
|
│ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
|
|
752
|
-
│ y|dbml|spark|sqlalchemy]
|
|
752
|
+
│ y|dbml|spark|sqlalchemy|data-caterer] │
|
|
753
753
|
│ --output PATH Specify the file path where the exported data will be │
|
|
754
754
|
│ saved. If no path is provided, the output will be │
|
|
755
755
|
│ printed to stdout. │
|
|
@@ -801,6 +801,7 @@ Available export options:
|
|
|
801
801
|
| `DBML` | Export to a DBML Diagram description | ✅ |
|
|
802
802
|
| `spark` | Export to a Spark StructType | ✅ |
|
|
803
803
|
| `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
|
|
804
|
+
| `data-caterer` | Export to Data Caterer in YAML format | ✅ |
|
|
804
805
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
805
806
|
|
|
806
807
|
#### Great Expectations
|
|
@@ -864,6 +865,20 @@ To specify custom Avro properties in your data contract, you can define them wit
|
|
|
864
865
|
|
|
865
866
|
>NOTE: At this moment, we just support [logicalType](https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types) and [default](https://avro.apache.org/docs/1.11.0/spec.htm)
|
|
866
867
|
|
|
868
|
+
#### Data Caterer
|
|
869
|
+
|
|
870
|
+
The export function converts the data contract to a data generation task in YAML format that can be
|
|
871
|
+
ingested by [Data Caterer](https://github.com/data-catering/data-caterer). This gives you the
|
|
872
|
+
ability to generate production-like data in any environment based off your data contract.
|
|
873
|
+
|
|
874
|
+
```shell
|
|
875
|
+
datacontract export datacontract.yaml --format data-caterer --model orders
|
|
876
|
+
```
|
|
877
|
+
|
|
878
|
+
You can further customise the way data is generated via adding
|
|
879
|
+
[additional metadata in the YAML](https://data.catering/setup/generator/data-generator/)
|
|
880
|
+
to suit your needs.
|
|
881
|
+
|
|
867
882
|
#### Example Configuration
|
|
868
883
|
|
|
869
884
|
```yaml
|
{datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/engines/soda/connections/kafka.py
RENAMED
|
@@ -25,9 +25,10 @@ def create_spark_session(tmp_dir: str):
|
|
|
25
25
|
SparkSession.builder.appName("datacontract")
|
|
26
26
|
.config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
|
|
27
27
|
.config("spark.streaming.stopGracefullyOnShutdown", "true")
|
|
28
|
+
.config("spark.ui.enabled", "false")
|
|
28
29
|
.config(
|
|
29
30
|
"spark.jars.packages",
|
|
30
|
-
"org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.
|
|
31
|
+
"org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.2,org.apache.spark:spark-avro_2.12:3.5.2",
|
|
31
32
|
)
|
|
32
33
|
.getOrCreate()
|
|
33
34
|
)
|
|
@@ -112,7 +113,7 @@ def get_auth_options():
|
|
|
112
113
|
kafka_sasl_username = os.getenv("DATACONTRACT_KAFKA_SASL_USERNAME")
|
|
113
114
|
kafka_sasl_password = os.getenv("DATACONTRACT_KAFKA_SASL_PASSWORD")
|
|
114
115
|
|
|
115
|
-
if kafka_sasl_username is None:
|
|
116
|
+
if kafka_sasl_username is None or kafka_sasl_username == "":
|
|
116
117
|
return {}
|
|
117
118
|
|
|
118
119
|
return {
|
{datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/bigquery_converter.py
RENAMED
|
@@ -44,7 +44,7 @@ def to_fields_array(fields: Dict[str, Field]) -> List[Dict[str, Field]]:
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def to_field(field_name: str, field: Field) -> dict:
|
|
47
|
-
bq_type = map_type_to_bigquery(field
|
|
47
|
+
bq_type = map_type_to_bigquery(field)
|
|
48
48
|
bq_field = {
|
|
49
49
|
"name": field_name,
|
|
50
50
|
"type": bq_type,
|
|
@@ -59,10 +59,8 @@ def to_field(field_name: str, field: Field) -> dict:
|
|
|
59
59
|
# in case the array type is a complex object, we want to copy all its fields
|
|
60
60
|
bq_field["fields"] = to_fields_array(field.items.fields)
|
|
61
61
|
else:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
{f"{field_name}_1": Field(type=field.items.type, required=False, description="")}
|
|
65
|
-
)
|
|
62
|
+
bq_field["type"] = map_type_to_bigquery(field.items)
|
|
63
|
+
|
|
66
64
|
# all of these can carry other fields
|
|
67
65
|
elif bq_type.lower() in ["record", "struct"]:
|
|
68
66
|
bq_field["fields"] = to_fields_array(field.fields)
|
|
@@ -79,37 +77,46 @@ def to_field(field_name: str, field: Field) -> dict:
|
|
|
79
77
|
return bq_field
|
|
80
78
|
|
|
81
79
|
|
|
82
|
-
def map_type_to_bigquery(
|
|
80
|
+
def map_type_to_bigquery(field: Field) -> str:
|
|
83
81
|
logger = logging.getLogger(__name__)
|
|
84
|
-
|
|
82
|
+
|
|
83
|
+
field_type = field.type
|
|
84
|
+
if not field_type:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
if field.config and "bigqueryType" in field.config:
|
|
88
|
+
return field.config["bigqueryType"]
|
|
89
|
+
|
|
90
|
+
if field_type.lower() in ["string", "varchar", "text"]:
|
|
85
91
|
return "STRING"
|
|
86
|
-
elif
|
|
92
|
+
elif field_type.lower() == "bytes":
|
|
87
93
|
return "BYTES"
|
|
88
|
-
elif
|
|
94
|
+
elif field_type.lower() in ["int", "integer"]:
|
|
89
95
|
return "INTEGER"
|
|
90
|
-
elif
|
|
96
|
+
elif field_type.lower() in ["long", "bigint"]:
|
|
91
97
|
return "INT64"
|
|
92
|
-
elif
|
|
93
|
-
return "
|
|
94
|
-
elif
|
|
98
|
+
elif field_type.lower() == "float":
|
|
99
|
+
return "FLOAT64"
|
|
100
|
+
elif field_type.lower() == "boolean":
|
|
95
101
|
return "BOOL"
|
|
96
|
-
elif
|
|
102
|
+
elif field_type.lower() in ["timestamp", "timestamp_tz"]:
|
|
97
103
|
return "TIMESTAMP"
|
|
98
|
-
elif
|
|
104
|
+
elif field_type.lower() == "date":
|
|
99
105
|
return "DATE"
|
|
100
|
-
elif
|
|
106
|
+
elif field_type.lower() == "timestamp_ntz":
|
|
101
107
|
return "TIME"
|
|
102
|
-
elif
|
|
108
|
+
elif field_type.lower() in ["number", "decimal", "numeric"]:
|
|
103
109
|
return "NUMERIC"
|
|
104
|
-
elif
|
|
110
|
+
elif field_type.lower() == "double":
|
|
105
111
|
return "BIGNUMERIC"
|
|
106
|
-
elif
|
|
112
|
+
elif field_type.lower() in ["object", "record", "array"]:
|
|
107
113
|
return "RECORD"
|
|
108
|
-
elif
|
|
114
|
+
elif field_type.lower() == "struct":
|
|
109
115
|
return "STRUCT"
|
|
110
|
-
elif
|
|
116
|
+
elif field_type.lower() == "null":
|
|
111
117
|
logger.info(
|
|
112
|
-
f"Can't properly map {
|
|
118
|
+
f"Can't properly map {field.title} to bigquery Schema, as 'null' \
|
|
119
|
+
is not supported as a type. Mapping it to STRING."
|
|
113
120
|
)
|
|
114
121
|
return "STRING"
|
|
115
122
|
else:
|
|
@@ -117,6 +124,6 @@ def map_type_to_bigquery(type_str: str, field_name: str) -> str:
|
|
|
117
124
|
type="schema",
|
|
118
125
|
result="failed",
|
|
119
126
|
name="Map datacontract type to bigquery data type",
|
|
120
|
-
reason=f"Unsupported type {
|
|
127
|
+
reason=f"Unsupported type {field_type} in data contract definition.",
|
|
121
128
|
engine="datacontract",
|
|
122
129
|
)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field, Server
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DataCatererExporter(Exporter):
|
|
10
|
+
"""
|
|
11
|
+
Exporter class for Data Caterer.
|
|
12
|
+
Creates a YAML file, based on the data contract, for Data Caterer to generate synthetic data.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
16
|
+
return to_data_caterer_generate_yaml(data_contract, server)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def to_data_caterer_generate_yaml(data_contract_spec: DataContractSpecification, server):
|
|
20
|
+
generation_task = {"name": data_contract_spec.info.title, "steps": []}
|
|
21
|
+
server_info = _get_server_info(data_contract_spec, server)
|
|
22
|
+
|
|
23
|
+
for model_key, model_value in data_contract_spec.models.items():
|
|
24
|
+
odcs_table = _to_data_caterer_generate_step(model_key, model_value, server_info)
|
|
25
|
+
generation_task["steps"].append(odcs_table)
|
|
26
|
+
return yaml.dump(generation_task, indent=2, sort_keys=False, allow_unicode=True)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _get_server_info(data_contract_spec: DataContractSpecification, server):
|
|
30
|
+
if server is not None and server in data_contract_spec.servers:
|
|
31
|
+
return data_contract_spec.servers.get(server)
|
|
32
|
+
elif server is not None:
|
|
33
|
+
raise Exception(f"Server name not found in servers list in data contract, server-name={server}")
|
|
34
|
+
elif len(data_contract_spec.servers.keys()) > 0:
|
|
35
|
+
return next(iter(data_contract_spec.servers.values()))
|
|
36
|
+
else:
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _to_data_caterer_generate_step(model_key, model_value: Model, server: Server) -> dict:
|
|
41
|
+
step = {
|
|
42
|
+
"name": model_key,
|
|
43
|
+
"type": _to_step_type(server),
|
|
44
|
+
"options": _to_data_source_options(model_key, server),
|
|
45
|
+
"schema": [],
|
|
46
|
+
}
|
|
47
|
+
fields = _to_fields(model_value.fields)
|
|
48
|
+
if fields:
|
|
49
|
+
step["schema"] = fields
|
|
50
|
+
return step
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _to_step_type(server: Server):
|
|
54
|
+
if server is not None and server.type is not None:
|
|
55
|
+
if server.type in ["s3", "gcs", "azure", "local"]:
|
|
56
|
+
return server.format
|
|
57
|
+
else:
|
|
58
|
+
return server.type
|
|
59
|
+
else:
|
|
60
|
+
return "csv"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _to_data_source_options(model_key, server: Server):
|
|
64
|
+
options = {}
|
|
65
|
+
if server is not None and server.type is not None:
|
|
66
|
+
if server.type in ["s3", "gcs", "azure", "local"]:
|
|
67
|
+
if server.path is not None:
|
|
68
|
+
options["path"] = server.path
|
|
69
|
+
elif server.location is not None:
|
|
70
|
+
options["path"] = server.location
|
|
71
|
+
else:
|
|
72
|
+
options["path"] = "/tmp/data_caterer_data"
|
|
73
|
+
elif server.type == "postgres":
|
|
74
|
+
options["schema"] = server.schema_
|
|
75
|
+
options["table"] = model_key
|
|
76
|
+
elif server.type == "kafka":
|
|
77
|
+
options["topic"] = server.topic
|
|
78
|
+
|
|
79
|
+
return options
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _to_fields(fields: Dict[str, Field]) -> list:
|
|
83
|
+
dc_fields = []
|
|
84
|
+
for field_name, field in fields.items():
|
|
85
|
+
column = _to_field(field_name, field)
|
|
86
|
+
dc_fields.append(column)
|
|
87
|
+
return dc_fields
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _to_field(field_name: str, field: Field) -> dict:
|
|
91
|
+
dc_field = {"name": field_name}
|
|
92
|
+
dc_generator_opts = {}
|
|
93
|
+
|
|
94
|
+
if field.type is not None:
|
|
95
|
+
new_type = _to_data_type(field.type)
|
|
96
|
+
dc_field["type"] = _to_data_type(field.type)
|
|
97
|
+
if new_type == "object" or new_type == "record" or new_type == "struct":
|
|
98
|
+
# need to get nested field definitions
|
|
99
|
+
nested_fields = _to_fields(field.fields)
|
|
100
|
+
dc_field["schema"] = {"fields": nested_fields}
|
|
101
|
+
|
|
102
|
+
if field.enum is not None and len(field.enum) > 0:
|
|
103
|
+
dc_generator_opts["oneOf"] = field.enum
|
|
104
|
+
if field.unique is not None and field.unique:
|
|
105
|
+
dc_generator_opts["isUnique"] = field.unique
|
|
106
|
+
if field.minLength is not None:
|
|
107
|
+
dc_generator_opts["minLength"] = field.minLength
|
|
108
|
+
if field.maxLength is not None:
|
|
109
|
+
dc_generator_opts["maxLength"] = field.maxLength
|
|
110
|
+
if field.pattern is not None:
|
|
111
|
+
dc_generator_opts["regex"] = field.pattern
|
|
112
|
+
if field.minimum is not None:
|
|
113
|
+
dc_generator_opts["min"] = field.minimum
|
|
114
|
+
if field.maximum is not None:
|
|
115
|
+
dc_generator_opts["max"] = field.maximum
|
|
116
|
+
|
|
117
|
+
if len(dc_generator_opts.keys()) > 0:
|
|
118
|
+
dc_field["generator"] = {"options": dc_generator_opts}
|
|
119
|
+
return dc_field
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _to_data_type(data_type):
|
|
123
|
+
if data_type == "number" or data_type == "numeric" or data_type == "double":
|
|
124
|
+
return "double"
|
|
125
|
+
elif data_type == "decimal" or data_type == "bigint":
|
|
126
|
+
return "decimal"
|
|
127
|
+
elif data_type == "int":
|
|
128
|
+
return "integer"
|
|
129
|
+
elif data_type == "long":
|
|
130
|
+
return "long"
|
|
131
|
+
elif data_type == "float":
|
|
132
|
+
return "float"
|
|
133
|
+
elif data_type == "string" or data_type == "text" or data_type == "varchar":
|
|
134
|
+
return "string"
|
|
135
|
+
if data_type == "boolean":
|
|
136
|
+
return "boolean"
|
|
137
|
+
if data_type == "timestamp" or data_type == "timestamp_tz" or data_type == "timestamp_ntz":
|
|
138
|
+
return "timestamp"
|
|
139
|
+
elif data_type == "date":
|
|
140
|
+
return "date"
|
|
141
|
+
elif data_type == "array":
|
|
142
|
+
return "array"
|
|
143
|
+
elif data_type == "map" or data_type == "object" or data_type == "record" or data_type == "struct":
|
|
144
|
+
return "struct"
|
|
145
|
+
elif data_type == "bytes":
|
|
146
|
+
return "binary"
|
|
147
|
+
else:
|
|
148
|
+
return "string"
|
{datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/exporter_factory.py
RENAMED
|
@@ -62,6 +62,12 @@ exporter_factory.register_lazy_exporter(
|
|
|
62
62
|
class_name="BigQueryExporter",
|
|
63
63
|
)
|
|
64
64
|
|
|
65
|
+
exporter_factory.register_lazy_exporter(
|
|
66
|
+
name=ExportFormat.data_caterer,
|
|
67
|
+
module_path="datacontract.export.data_caterer_converter",
|
|
68
|
+
class_name="DataCatererExporter",
|
|
69
|
+
)
|
|
70
|
+
|
|
65
71
|
exporter_factory.register_lazy_exporter(
|
|
66
72
|
name=ExportFormat.dbml, module_path="datacontract.export.dbml_converter", class_name="DbmlExporter"
|
|
67
73
|
)
|
{datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/jsonschema_converter.py
RENAMED
|
@@ -36,7 +36,19 @@ def to_property(field: Field) -> dict:
|
|
|
36
36
|
property = {}
|
|
37
37
|
json_type, json_format = convert_type_format(field.type, field.format)
|
|
38
38
|
if json_type is not None:
|
|
39
|
-
|
|
39
|
+
if not field.required:
|
|
40
|
+
"""
|
|
41
|
+
From: https://json-schema.org/understanding-json-schema/reference/type
|
|
42
|
+
The type keyword may either be a string or an array:
|
|
43
|
+
|
|
44
|
+
If it's a string, it is the name of one of the basic types above.
|
|
45
|
+
If it is an array, it must be an array of strings, where each string
|
|
46
|
+
is the name of one of the basic types, and each element is unique.
|
|
47
|
+
In this case, the JSON snippet is valid if it matches any of the given types.
|
|
48
|
+
"""
|
|
49
|
+
property["type"] = [json_type, "null"]
|
|
50
|
+
else:
|
|
51
|
+
property["type"] = json_type
|
|
40
52
|
if json_format is not None:
|
|
41
53
|
property["format"] = json_format
|
|
42
54
|
if field.unique:
|
|
@@ -50,7 +62,6 @@ def to_property(field: Field) -> dict:
|
|
|
50
62
|
property["required"] = to_required(field.fields)
|
|
51
63
|
if json_type == "array":
|
|
52
64
|
property["items"] = to_property(field.items)
|
|
53
|
-
|
|
54
65
|
if field.pattern:
|
|
55
66
|
property["pattern"] = field.pattern
|
|
56
67
|
if field.enum:
|
{datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/spark_converter.py
RENAMED
|
@@ -128,7 +128,7 @@ def to_data_type(field: Field) -> types.DataType:
|
|
|
128
128
|
if field_type in ["string", "varchar", "text"]:
|
|
129
129
|
return types.StringType()
|
|
130
130
|
if field_type in ["number", "decimal", "numeric"]:
|
|
131
|
-
return types.DecimalType()
|
|
131
|
+
return types.DecimalType(precision=field.precision, scale=field.scale)
|
|
132
132
|
if field_type in ["integer", "int"]:
|
|
133
133
|
return types.IntegerType()
|
|
134
134
|
if field_type == "long":
|
{datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/export/sql_type_converter.py
RENAMED
|
@@ -269,16 +269,7 @@ def convert_type_to_sqlserver(field: Field) -> None | str:
|
|
|
269
269
|
|
|
270
270
|
def convert_type_to_bigquery(field: Field) -> None | str:
|
|
271
271
|
"""Convert from supported datacontract types to equivalent bigquery types"""
|
|
272
|
-
|
|
273
|
-
if not field_type:
|
|
274
|
-
return None
|
|
275
|
-
|
|
276
|
-
# If provided sql-server config type, prefer it over default mapping
|
|
277
|
-
if bigquery_type := get_type_config(field, "bigqueryType"):
|
|
278
|
-
return bigquery_type
|
|
279
|
-
|
|
280
|
-
field_type = field_type.lower()
|
|
281
|
-
return map_type_to_bigquery(field_type, field.title)
|
|
272
|
+
return map_type_to_bigquery(field)
|
|
282
273
|
|
|
283
274
|
|
|
284
275
|
def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
|
{datacontract_cli-0.10.12 → datacontract_cli-0.10.13}/datacontract/imports/spark_importer.py
RENAMED
|
@@ -80,6 +80,8 @@ def _field_from_struct_type(spark_field: types.StructField) -> Field:
|
|
|
80
80
|
"""
|
|
81
81
|
field = Field()
|
|
82
82
|
field.required = not spark_field.nullable
|
|
83
|
+
field.description = spark_field.metadata.get("comment")
|
|
84
|
+
|
|
83
85
|
return _type_from_data_type(field, spark_field.dataType)
|
|
84
86
|
|
|
85
87
|
|
|
@@ -121,7 +123,7 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
|
|
|
121
123
|
"""
|
|
122
124
|
if isinstance(spark_type, types.StringType):
|
|
123
125
|
return "string"
|
|
124
|
-
elif isinstance(spark_type, types.IntegerType):
|
|
126
|
+
elif isinstance(spark_type, (types.IntegerType, types.ShortType)):
|
|
125
127
|
return "integer"
|
|
126
128
|
elif isinstance(spark_type, types.LongType):
|
|
127
129
|
return "long"
|
|
@@ -149,5 +151,7 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
|
|
|
149
151
|
return "decimal"
|
|
150
152
|
elif isinstance(spark_type, types.NullType):
|
|
151
153
|
return "null"
|
|
154
|
+
elif isinstance(spark_type, types.VarcharType):
|
|
155
|
+
return "varchar"
|
|
152
156
|
else:
|
|
153
157
|
raise ValueError(f"Unsupported Spark type: {spark_type}")
|