datacontract-cli 0.10.11__tar.gz → 0.10.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (184) hide show
  1. {datacontract_cli-0.10.11/datacontract_cli.egg-info → datacontract_cli-0.10.12}/PKG-INFO +103 -28
  2. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/README.md +80 -4
  3. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/cli.py +19 -3
  4. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/data_contract.py +5 -10
  5. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/fastjsonschema/check_jsonschema.py +11 -0
  6. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/fastjsonschema/s3/s3_read_files.py +2 -0
  7. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/check_soda_execute.py +2 -8
  8. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/duckdb.py +23 -24
  9. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/kafka.py +81 -23
  10. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/avro_converter.py +12 -2
  11. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/dbml_converter.py +3 -2
  12. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/exporter.py +1 -0
  13. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/exporter_factory.py +6 -0
  14. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/spark_converter.py +4 -0
  15. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/sql_type_converter.py +64 -29
  16. datacontract_cli-0.10.12/datacontract/export/sqlalchemy_converter.py +169 -0
  17. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/avro_importer.py +1 -0
  18. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/bigquery_importer.py +2 -2
  19. datacontract_cli-0.10.12/datacontract/imports/dbml_importer.py +112 -0
  20. datacontract_cli-0.10.12/datacontract/imports/dbt_importer.py +93 -0
  21. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/glue_importer.py +62 -58
  22. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/importer.py +2 -1
  23. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/importer_factory.py +5 -0
  24. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/odcs_importer.py +1 -1
  25. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/spark_importer.py +29 -10
  26. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/sql_importer.py +1 -1
  27. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/unity_importer.py +1 -1
  28. datacontract_cli-0.10.11/datacontract/integration/publish_datamesh_manager.py → datacontract_cli-0.10.12/datacontract/integration/datamesh_manager.py +33 -5
  29. datacontract_cli-0.10.11/datacontract/integration/publish_opentelemetry.py → datacontract_cli-0.10.12/datacontract/integration/opentelemetry.py +1 -1
  30. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/model/data_contract_specification.py +6 -2
  31. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12/datacontract_cli.egg-info}/PKG-INFO +103 -28
  32. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/SOURCES.txt +9 -3
  33. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/requires.txt +23 -23
  34. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/pyproject.toml +36 -31
  35. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_breaking.py +1 -3
  36. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_catalog.py +1 -2
  37. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_changelog.py +1 -3
  38. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_cli.py +1 -3
  39. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_avro.py +23 -2
  40. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_bigquery.py +1 -3
  41. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_complex_data_contract.py +3 -3
  42. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_custom_exporter.py +1 -2
  43. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_dbml.py +5 -3
  44. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_dbt_models.py +1 -2
  45. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_dbt_sources.py +1 -3
  46. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_dbt_staging_sql.py +1 -3
  47. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_go.py +1 -3
  48. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_great_expectations.py +1 -2
  49. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_html.py +1 -2
  50. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_jsonschema.py +1 -2
  51. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_odcs.py +1 -2
  52. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_protobuf.py +1 -3
  53. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_rdf.py +1 -2
  54. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_spark.py +38 -3
  55. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_sql.py +1 -3
  56. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_sql_query.py +1 -3
  57. datacontract_cli-0.10.12/tests/test_export_sqlalchemy.py +91 -0
  58. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_terraform.py +1 -3
  59. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_avro.py +3 -3
  60. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_bigquery.py +1 -3
  61. datacontract_cli-0.10.12/tests/test_import_dbml.py +68 -0
  62. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_dbt.py +4 -6
  63. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_glue.py +55 -5
  64. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_jsonschema.py +1 -3
  65. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_odcs.py +1 -2
  66. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_spark.py +82 -1
  67. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_unity_file.py +1 -3
  68. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_integration_datameshmanager.py +1 -3
  69. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_integration_opentelemetry.py +2 -3
  70. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_lint.py +1 -3
  71. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_resolve.py +1 -2
  72. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_roundtrip_jsonschema.py +1 -2
  73. datacontract_cli-0.10.12/tests/test_spec_fields_field.py +17 -0
  74. datacontract_cli-0.10.12/tests/test_spec_ref.py +15 -0
  75. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_azure_parquet_remote.py +1 -2
  76. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_bigquery.py +1 -2
  77. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_databricks.py +1 -2
  78. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_csv.py +1 -3
  79. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_formats_valid.py +1 -3
  80. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_inline.py +1 -3
  81. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_json.py +1 -3
  82. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_examples_missing.py +1 -3
  83. datacontract_cli-0.10.12/tests/test_test_gcs_json_remote.py +48 -0
  84. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_kafka.py +1 -2
  85. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_kafka_remote.py +1 -2
  86. datacontract_cli-0.10.12/tests/test_test_parquet.py +146 -0
  87. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_postgres.py +1 -3
  88. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_csv.py +1 -2
  89. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_delta.py +10 -2
  90. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_json_multiple_models.py +1 -2
  91. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_json_remote.py +1 -2
  92. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_snowflake.py +1 -3
  93. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_sqlserver.py +3 -4
  94. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_trino.py +1 -1
  95. datacontract_cli-0.10.11/datacontract/imports/dbt_importer.py +0 -117
  96. datacontract_cli-0.10.11/datacontract/publish/publish.py +0 -32
  97. datacontract_cli-0.10.11/tests/test_test_parquet.py +0 -59
  98. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/LICENSE +0 -0
  99. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/MANIFEST.in +0 -0
  100. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/__init__.py +0 -0
  101. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/breaking/breaking.py +0 -0
  102. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/breaking/breaking_rules.py +0 -0
  103. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/catalog/catalog.py +0 -0
  104. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/__init__.py +0 -0
  105. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +0 -0
  106. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/datacontract/check_that_datacontract_file_exists.py +0 -0
  107. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -0
  108. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/__init__.py +0 -0
  109. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/bigquery.py +0 -0
  110. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/dask.py +0 -0
  111. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/databricks.py +0 -0
  112. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/postgres.py +0 -0
  113. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/snowflake.py +0 -0
  114. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/sqlserver.py +0 -0
  115. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/engines/soda/connections/trino.py +0 -0
  116. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/__init__.py +0 -0
  117. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/avro_idl_converter.py +0 -0
  118. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/bigquery_converter.py +0 -0
  119. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/csv_type_converter.py +0 -0
  120. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/dbt_converter.py +0 -0
  121. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/go_converter.py +0 -0
  122. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/great_expectations_converter.py +0 -0
  123. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/html_export.py +0 -0
  124. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/jsonschema_converter.py +0 -0
  125. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/odcs_converter.py +0 -0
  126. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/protobuf_converter.py +0 -0
  127. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/pydantic_converter.py +0 -0
  128. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/rdf_converter.py +0 -0
  129. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/sodacl_converter.py +0 -0
  130. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/sql_converter.py +0 -0
  131. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/export/terraform_converter.py +0 -0
  132. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/imports/jsonschema_importer.py +0 -0
  133. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/init/download_datacontract_file.py +0 -0
  134. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/files.py +0 -0
  135. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/lint.py +0 -0
  136. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/__init__.py +0 -0
  137. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/description_linter.py +0 -0
  138. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/example_model_linter.py +0 -0
  139. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/field_pattern_linter.py +0 -0
  140. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/field_reference_linter.py +0 -0
  141. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/notice_period_linter.py +0 -0
  142. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/quality_schema_linter.py +0 -0
  143. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/linters/valid_constraints_linter.py +0 -0
  144. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/resolve.py +0 -0
  145. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/schema.py +0 -0
  146. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/lint/urls.py +0 -0
  147. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/model/breaking_change.py +0 -0
  148. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/model/exceptions.py +0 -0
  149. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/model/run.py +0 -0
  150. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/py.typed +0 -0
  151. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/datacontract.html +0 -0
  152. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/index.html +0 -0
  153. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/datacontract_information.html +0 -0
  154. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/datacontract_servicelevels.html +0 -0
  155. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/datacontract_terms.html +0 -0
  156. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/definition.html +0 -0
  157. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/example.html +0 -0
  158. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/model_field.html +0 -0
  159. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/partials/server.html +0 -0
  160. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/templates/style/output.css +0 -0
  161. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract/web.py +0 -0
  162. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/dependency_links.txt +0 -0
  163. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/entry_points.txt +0 -0
  164. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/datacontract_cli.egg-info/top_level.txt +0 -0
  165. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/setup.cfg +0 -0
  166. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_description_linter.py +0 -0
  167. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_documentation_linter.py +0 -0
  168. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_download_datacontract_file.py +0 -0
  169. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_example_model_linter.py +0 -0
  170. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_avro_idl.py +0 -0
  171. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_pydantic.py +0 -0
  172. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_export_sodacl.py +0 -0
  173. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_field_constraint_linter.py +0 -0
  174. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_field_pattern_linter.py +0 -0
  175. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_field_reference_linter.py +0 -0
  176. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_import_sql.py +0 -0
  177. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_notice_period_linter.py +0 -0
  178. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_quality_schema_linter.py +0 -0
  179. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_dataframe.py +0 -0
  180. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_delta.py +0 -0
  181. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_local_json.py +0 -0
  182. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_json.py +0 -0
  183. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_test_s3_json_complex.py +0 -0
  184. {datacontract_cli-0.10.11 → datacontract_cli-0.10.12}/tests/test_web.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datacontract-cli
3
- Version: 0.10.11
3
+ Version: 0.10.12
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
@@ -11,36 +11,34 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: typer[all]<0.13,>=0.9
14
+ Requires-Dist: typer<0.13,>=0.12
15
15
  Requires-Dist: pydantic<2.9.0,>=2.8.2
16
16
  Requires-Dist: pyyaml~=6.0.1
17
17
  Requires-Dist: requests<2.33,>=2.31
18
- Requires-Dist: fastapi==0.111.1
18
+ Requires-Dist: fastapi==0.112.0
19
+ Requires-Dist: uvicorn==0.30.5
20
+ Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
19
21
  Requires-Dist: fastparquet==2024.5.0
20
22
  Requires-Dist: python-multipart==0.0.9
21
23
  Requires-Dist: rich~=13.7.0
22
- Requires-Dist: simple-ddl-parser==1.5.2
24
+ Requires-Dist: simple-ddl-parser==1.6.0
25
+ Requires-Dist: duckdb==1.0.0
23
26
  Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.1
24
27
  Requires-Dist: setuptools>=60
25
- Requires-Dist: duckdb==1.0.0
26
- Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
27
28
  Requires-Dist: python-dotenv~=1.0.0
28
29
  Requires-Dist: rdflib==7.0.0
29
30
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
30
31
  Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
31
- Requires-Dist: boto3<1.34.137,>=1.34.41
32
- Requires-Dist: botocore<1.34.137,>=1.34.41
32
+ Requires-Dist: boto3<1.35.6,>=1.34.41
33
33
  Requires-Dist: jinja_partials>=0.2.1
34
34
  Provides-Extra: avro
35
- Requires-Dist: avro==1.11.3; extra == "avro"
35
+ Requires-Dist: avro==1.12.0; extra == "avro"
36
36
  Provides-Extra: bigquery
37
37
  Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.1; extra == "bigquery"
38
38
  Provides-Extra: databricks
39
39
  Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "databricks"
40
40
  Requires-Dist: databricks-sql-connector<3.4.0,>=3.1.2; extra == "databricks"
41
41
  Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.1; extra == "databricks"
42
- Provides-Extra: deltalake
43
- Requires-Dist: deltalake<0.19,>=0.17; extra == "deltalake"
44
42
  Provides-Extra: kafka
45
43
  Requires-Dist: datacontract-cli[avro]; extra == "kafka"
46
44
  Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
@@ -49,31 +47,32 @@ Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
49
47
  Provides-Extra: s3
50
48
  Requires-Dist: s3fs==2024.6.1; extra == "s3"
51
49
  Provides-Extra: snowflake
52
- Requires-Dist: snowflake-connector-python[pandas]<3.12,>=3.6; extra == "snowflake"
50
+ Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
53
51
  Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
54
52
  Provides-Extra: sqlserver
55
53
  Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.1; extra == "sqlserver"
56
54
  Provides-Extra: trino
57
55
  Requires-Dist: soda-core-trino<3.4.0,>=3.3.1; extra == "trino"
56
+ Provides-Extra: dbt
57
+ Requires-Dist: dbt-core>=1.8.0; extra == "dbt"
58
+ Provides-Extra: dbml
59
+ Requires-Dist: pydbml>=1.1.1; extra == "dbml"
58
60
  Provides-Extra: all
59
- Requires-Dist: datacontract-cli[bigquery,databricks,deltalake,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
61
+ Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
60
62
  Provides-Extra: dev
61
63
  Requires-Dist: datacontract-cli[all]; extra == "dev"
62
- Requires-Dist: httpx==0.27.0; extra == "dev"
63
- Requires-Dist: ruff; extra == "dev"
64
+ Requires-Dist: httpx==0.27.2; extra == "dev"
65
+ Requires-Dist: kafka-python; extra == "dev"
66
+ Requires-Dist: moto==5.0.13; extra == "dev"
67
+ Requires-Dist: pandas>=2.1.0; extra == "dev"
64
68
  Requires-Dist: pre-commit<3.9.0,>=3.7.1; extra == "dev"
69
+ Requires-Dist: pyarrow>=12.0.0; extra == "dev"
65
70
  Requires-Dist: pytest; extra == "dev"
66
71
  Requires-Dist: pytest-xdist; extra == "dev"
67
- Requires-Dist: moto==5.0.11; extra == "dev"
68
- Requires-Dist: pymssql==2.3.0; extra == "dev"
69
- Requires-Dist: kafka-python; extra == "dev"
72
+ Requires-Dist: pymssql==2.3.1; extra == "dev"
73
+ Requires-Dist: ruff; extra == "dev"
74
+ Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.8.1; extra == "dev"
70
75
  Requires-Dist: trino==0.329.0; extra == "dev"
71
- Requires-Dist: testcontainers==4.7.2; extra == "dev"
72
- Requires-Dist: testcontainers[core]==4.7.2; extra == "dev"
73
- Requires-Dist: testcontainers[minio]==4.7.2; extra == "dev"
74
- Requires-Dist: testcontainers[postgres]==4.7.2; extra == "dev"
75
- Requires-Dist: testcontainers[kafka]==4.7.2; extra == "dev"
76
- Requires-Dist: testcontainers[mssql]==4.7.2; extra == "dev"
77
76
 
78
77
  # Data Contract CLI
79
78
 
@@ -82,7 +81,7 @@ Requires-Dist: testcontainers[mssql]==4.7.2; extra == "dev"
82
81
  <img alt="Test Workflow" src="https://img.shields.io/github/actions/workflow/status/datacontract/datacontract-cli/ci.yaml?branch=main"></a>
83
82
  <a href="https://github.com/datacontract/datacontract-cli">
84
83
  <img alt="Stars" src="https://img.shields.io/github/stars/datacontract/datacontract-cli" /></a>
85
- <a href="https://datacontract.com/slack" rel="nofollow"><img src="https://camo.githubusercontent.com/5ade1fd1e76a6ab860802cdd2941fe2501e2ca2cb534e5d8968dbf864c13d33d/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f736c61636b2d6a6f696e5f636861742d77686974652e7376673f6c6f676f3d736c61636b267374796c653d736f6369616c" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" style="max-width: 100%;"></a>
84
+ <a href="https://datacontract.com/slack" rel="nofollow"><img src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" style="max-width: 100%;"></a>
86
85
  </p>
87
86
 
88
87
  The `datacontract` CLI is an open source command-line tool for working with [Data Contracts](https://datacontract.com/).
@@ -267,13 +266,13 @@ A list of available extras:
267
266
  | Avro Support | `pip install datacontract-cli[avro]` |
268
267
  | Google BigQuery | `pip install datacontract-cli[bigquery]` |
269
268
  | Databricks Integration | `pip install datacontract-cli[databricks]` |
270
- | Deltalake Integration | `pip install datacontract-cli[deltalake]` |
271
269
  | Kafka Integration | `pip install datacontract-cli[kafka]` |
272
270
  | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
273
271
  | S3 Integration | `pip install datacontract-cli[s3]` |
274
272
  | Snowflake Integration | `pip install datacontract-cli[snowflake]` |
275
273
  | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
276
274
  | Trino | `pip install datacontract-cli[trino]` |
275
+ | Dbt | `pip install datacontract-cli[dbt]` |
277
276
 
278
277
 
279
278
 
@@ -406,6 +405,12 @@ Feel free to create an [issue](https://github.com/datacontract/datacontract-cli/
406
405
 
407
406
  Data Contract CLI can test data that is stored in S3 buckets or any S3-compliant endpoints in various formats.
408
407
 
408
+ - CSV
409
+ - JSON
410
+ - Delta
411
+ - Parquet
412
+ - Iceberg (coming soon)
413
+
409
414
  #### Examples
410
415
 
411
416
  ##### JSON
@@ -444,6 +449,32 @@ servers:
444
449
 
445
450
 
446
451
 
452
+ ### Google Cloud Storage (GCS)
453
+
454
+ The [S3](#S3) integration also works with files on Google Cloud Storage through its [interoperability](https://cloud.google.com/storage/docs/interoperability).
455
+ Use `https://storage.googleapis.com` as the endpoint URL.
456
+
457
+ #### Example
458
+
459
+ datacontract.yaml
460
+ ```yaml
461
+ servers:
462
+ production:
463
+ type: s3
464
+ endpointUrl: https://storage.googleapis.com
465
+ location: s3://bucket-name/path/*/*.json # use s3:// schema instead of gs://
466
+ format: json
467
+ delimiter: new_line # new_line, array, or none
468
+ ```
469
+
470
+ #### Environment Variables
471
+
472
+ | Environment Variable | Example | Description |
473
+ |-------------------------------------|----------------|------------------------------------------------------------------------------------------|
474
+ | `DATACONTRACT_S3_ACCESS_KEY_ID` | `GOOG1EZZZ...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Key ID |
475
+ | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `PDWWpb...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Secret |
476
+
477
+
447
478
  ### BigQuery
448
479
 
449
480
  We support authentication to BigQuery using Service Account Key. The used Service Account should include the roles:
@@ -794,7 +825,7 @@ models:
794
825
  │ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
795
826
  │ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
796
827
  │ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
797
- │ y|dbml|spark]
828
+ │ y|dbml|spark|sqlalchemy]
798
829
  │ --output PATH Specify the file path where the exported data will be │
799
830
  │ saved. If no path is provided, the output will be │
800
831
  │ printed to stdout. │
@@ -845,6 +876,7 @@ Available export options:
845
876
  | `pydantic-model` | Export to pydantic models | ✅ |
846
877
  | `DBML` | Export to a DBML Diagram description | ✅ |
847
878
  | `spark` | Export to a Spark StructType | ✅ |
879
+ | `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
848
880
  | Missing something? | Please create an issue on GitHub | TBD |
849
881
 
850
882
  #### Great Expectations
@@ -918,6 +950,7 @@ models:
918
950
  description: Example for AVRO with Timestamp (microsecond precision) https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29
919
951
  type: long
920
952
  example: 1672534861000000 # Equivalent to 2023-01-01 01:01:01 in microseconds
953
+ required: true
921
954
  config:
922
955
  avroLogicalType: local-timestamp-micros
923
956
  avroDefault: 1672534861000000
@@ -932,6 +965,7 @@ models:
932
965
  - **description**: A textual description of the field.
933
966
  - **type**: The data type of the field. In this example, it is `long`.
934
967
  - **example**: An example value for the field.
968
+ - **required**: Is this a required field (as opposed to optional/nullable).
935
969
  - **config**: Section to specify custom Avro properties.
936
970
  - **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
937
971
  - **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
@@ -968,6 +1002,14 @@ models:
968
1002
  │ names, leave empty for all models in the │
969
1003
  │ dataset). │
970
1004
  │ [default: None] │
1005
+ │ --dbml-schema TEXT List of schema names to import from the DBML │
1006
+ │ file (repeat for multiple schema names, │
1007
+ │ leave empty for all tables in the file). │
1008
+ │ [default: None] │
1009
+ │ --dbml-table TEXT List of table names to import from the DBML │
1010
+ │ file (repeat for multiple table names, leave │
1011
+ │ empty for all tables in the file). │
1012
+ │ [default: None] │
971
1013
  │ --help Show this message and exit. │
972
1014
  ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
973
1015
  ```
@@ -991,6 +1033,7 @@ Available import options:
991
1033
  | `dbt` | Import from dbt models | ✅ |
992
1034
  | `odcs` | Import from Open Data Contract Standard (ODCS) | ✅ |
993
1035
  | `spark` | Import from Spark StructTypes | ✅ |
1036
+ | `dbml` | Import from DBML models | ✅ |
994
1037
  | `protobuf` | Import from Protobuf schemas | TBD |
995
1038
  | Missing something? | Please create an issue on GitHub | TBD |
996
1039
 
@@ -1078,6 +1121,38 @@ Example:
1078
1121
  datacontract import --format spark --source "users,orders"
1079
1122
  ```
1080
1123
 
1124
+ #### DBML
1125
+
1126
+ Importing from DBML Documents.
1127
+ **NOTE:** Since DBML does _not_ have strict requirements on the types of columns, this import _may_ create non-valid datacontracts, as not all types of fields can be properly mapped. In this case you will have to adapt the generated document manually.
1128
+ We also assume, that the description for models and fields is stored in a Note within the DBML model.
1129
+
1130
+ You may give the `dbml-table` or `dbml-schema` parameter to enumerate the tables or schemas that should be imported.
1131
+ If no tables are given, _all_ available tables of the source will be imported. Likewise, if no schema is given, _all_ schemas are imported.
1132
+
1133
+ Examples:
1134
+
1135
+ ```bash
1136
+ # Example import from DBML file, importing everything
1137
+ datacontract import --format dbml --source <file_path>
1138
+ ```
1139
+
1140
+ ```bash
1141
+ # Example import from DBML file, filtering for tables from specific schemas
1142
+ datacontract import --format dbml --source <file_path> --dbml-schema <schema_1> --dbml-schema <schema_2>
1143
+ ```
1144
+
1145
+ ```bash
1146
+ # Example import from DBML file, filtering for tables with specific names
1147
+ datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-table <table_name_2>
1148
+ ```
1149
+
1150
+ ```bash
1151
+ # Example import from DBML file, filtering for tables with specific names from a specific schema
1152
+ datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-schema <schema_1>
1153
+ ```
1154
+
1155
+
1081
1156
  ### breaking
1082
1157
 
1083
1158
  ```
@@ -1427,7 +1502,7 @@ if __name__ == "__main__":
1427
1502
  "models": [
1428
1503
  {
1429
1504
  "name": "model1",
1430
- "desctiption": "model description from app",
1505
+ "description": "model description from app",
1431
1506
  "columns": [
1432
1507
  {
1433
1508
  "name": "columnA",
@@ -5,7 +5,7 @@
5
5
  <img alt="Test Workflow" src="https://img.shields.io/github/actions/workflow/status/datacontract/datacontract-cli/ci.yaml?branch=main"></a>
6
6
  <a href="https://github.com/datacontract/datacontract-cli">
7
7
  <img alt="Stars" src="https://img.shields.io/github/stars/datacontract/datacontract-cli" /></a>
8
- <a href="https://datacontract.com/slack" rel="nofollow"><img src="https://camo.githubusercontent.com/5ade1fd1e76a6ab860802cdd2941fe2501e2ca2cb534e5d8968dbf864c13d33d/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f736c61636b2d6a6f696e5f636861742d77686974652e7376673f6c6f676f3d736c61636b267374796c653d736f6369616c" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" style="max-width: 100%;"></a>
8
+ <a href="https://datacontract.com/slack" rel="nofollow"><img src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" style="max-width: 100%;"></a>
9
9
  </p>
10
10
 
11
11
  The `datacontract` CLI is an open source command-line tool for working with [Data Contracts](https://datacontract.com/).
@@ -190,13 +190,13 @@ A list of available extras:
190
190
  | Avro Support | `pip install datacontract-cli[avro]` |
191
191
  | Google BigQuery | `pip install datacontract-cli[bigquery]` |
192
192
  | Databricks Integration | `pip install datacontract-cli[databricks]` |
193
- | Deltalake Integration | `pip install datacontract-cli[deltalake]` |
194
193
  | Kafka Integration | `pip install datacontract-cli[kafka]` |
195
194
  | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
196
195
  | S3 Integration | `pip install datacontract-cli[s3]` |
197
196
  | Snowflake Integration | `pip install datacontract-cli[snowflake]` |
198
197
  | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
199
198
  | Trino | `pip install datacontract-cli[trino]` |
199
+ | Dbt | `pip install datacontract-cli[dbt]` |
200
200
 
201
201
 
202
202
 
@@ -329,6 +329,12 @@ Feel free to create an [issue](https://github.com/datacontract/datacontract-cli/
329
329
 
330
330
  Data Contract CLI can test data that is stored in S3 buckets or any S3-compliant endpoints in various formats.
331
331
 
332
+ - CSV
333
+ - JSON
334
+ - Delta
335
+ - Parquet
336
+ - Iceberg (coming soon)
337
+
332
338
  #### Examples
333
339
 
334
340
  ##### JSON
@@ -367,6 +373,32 @@ servers:
367
373
 
368
374
 
369
375
 
376
+ ### Google Cloud Storage (GCS)
377
+
378
+ The [S3](#S3) integration also works with files on Google Cloud Storage through its [interoperability](https://cloud.google.com/storage/docs/interoperability).
379
+ Use `https://storage.googleapis.com` as the endpoint URL.
380
+
381
+ #### Example
382
+
383
+ datacontract.yaml
384
+ ```yaml
385
+ servers:
386
+ production:
387
+ type: s3
388
+ endpointUrl: https://storage.googleapis.com
389
+ location: s3://bucket-name/path/*/*.json # use s3:// schema instead of gs://
390
+ format: json
391
+ delimiter: new_line # new_line, array, or none
392
+ ```
393
+
394
+ #### Environment Variables
395
+
396
+ | Environment Variable | Example | Description |
397
+ |-------------------------------------|----------------|------------------------------------------------------------------------------------------|
398
+ | `DATACONTRACT_S3_ACCESS_KEY_ID` | `GOOG1EZZZ...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Key ID |
399
+ | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `PDWWpb...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Secret |
400
+
401
+
370
402
  ### BigQuery
371
403
 
372
404
  We support authentication to BigQuery using Service Account Key. The used Service Account should include the roles:
@@ -717,7 +749,7 @@ models:
717
749
  │ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
718
750
  │ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
719
751
  │ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
720
- │ y|dbml|spark]
752
+ │ y|dbml|spark|sqlalchemy]
721
753
  │ --output PATH Specify the file path where the exported data will be │
722
754
  │ saved. If no path is provided, the output will be │
723
755
  │ printed to stdout. │
@@ -768,6 +800,7 @@ Available export options:
768
800
  | `pydantic-model` | Export to pydantic models | ✅ |
769
801
  | `DBML` | Export to a DBML Diagram description | ✅ |
770
802
  | `spark` | Export to a Spark StructType | ✅ |
803
+ | `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
771
804
  | Missing something? | Please create an issue on GitHub | TBD |
772
805
 
773
806
  #### Great Expectations
@@ -841,6 +874,7 @@ models:
841
874
  description: Example for AVRO with Timestamp (microsecond precision) https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29
842
875
  type: long
843
876
  example: 1672534861000000 # Equivalent to 2023-01-01 01:01:01 in microseconds
877
+ required: true
844
878
  config:
845
879
  avroLogicalType: local-timestamp-micros
846
880
  avroDefault: 1672534861000000
@@ -855,6 +889,7 @@ models:
855
889
  - **description**: A textual description of the field.
856
890
  - **type**: The data type of the field. In this example, it is `long`.
857
891
  - **example**: An example value for the field.
892
+ - **required**: Is this a required field (as opposed to optional/nullable).
858
893
  - **config**: Section to specify custom Avro properties.
859
894
  - **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
860
895
  - **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
@@ -891,6 +926,14 @@ models:
891
926
  │ names, leave empty for all models in the │
892
927
  │ dataset). │
893
928
  │ [default: None] │
929
+ │ --dbml-schema TEXT List of schema names to import from the DBML │
930
+ │ file (repeat for multiple schema names, │
931
+ │ leave empty for all tables in the file). │
932
+ │ [default: None] │
933
+ │ --dbml-table TEXT List of table names to import from the DBML │
934
+ │ file (repeat for multiple table names, leave │
935
+ │ empty for all tables in the file). │
936
+ │ [default: None] │
894
937
  │ --help Show this message and exit. │
895
938
  ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
896
939
  ```
@@ -914,6 +957,7 @@ Available import options:
914
957
  | `dbt` | Import from dbt models | ✅ |
915
958
  | `odcs` | Import from Open Data Contract Standard (ODCS) | ✅ |
916
959
  | `spark` | Import from Spark StructTypes | ✅ |
960
+ | `dbml` | Import from DBML models | ✅ |
917
961
  | `protobuf` | Import from Protobuf schemas | TBD |
918
962
  | Missing something? | Please create an issue on GitHub | TBD |
919
963
 
@@ -1001,6 +1045,38 @@ Example:
1001
1045
  datacontract import --format spark --source "users,orders"
1002
1046
  ```
1003
1047
 
1048
+ #### DBML
1049
+
1050
+ Importing from DBML Documents.
1051
+ **NOTE:** Since DBML does _not_ have strict requirements on the types of columns, this import _may_ create non-valid datacontracts, as not all types of fields can be properly mapped. In this case you will have to adapt the generated document manually.
1052
+ We also assume, that the description for models and fields is stored in a Note within the DBML model.
1053
+
1054
+ You may give the `dbml-table` or `dbml-schema` parameter to enumerate the tables or schemas that should be imported.
1055
+ If no tables are given, _all_ available tables of the source will be imported. Likewise, if no schema is given, _all_ schemas are imported.
1056
+
1057
+ Examples:
1058
+
1059
+ ```bash
1060
+ # Example import from DBML file, importing everything
1061
+ datacontract import --format dbml --source <file_path>
1062
+ ```
1063
+
1064
+ ```bash
1065
+ # Example import from DBML file, filtering for tables from specific schemas
1066
+ datacontract import --format dbml --source <file_path> --dbml-schema <schema_1> --dbml-schema <schema_2>
1067
+ ```
1068
+
1069
+ ```bash
1070
+ # Example import from DBML file, filtering for tables with specific names
1071
+ datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-table <table_name_2>
1072
+ ```
1073
+
1074
+ ```bash
1075
+ # Example import from DBML file, filtering for tables with specific names from a specific schema
1076
+ datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-schema <schema_1>
1077
+ ```
1078
+
1079
+
1004
1080
  ### breaking
1005
1081
 
1006
1082
  ```
@@ -1350,7 +1426,7 @@ if __name__ == "__main__":
1350
1426
  "models": [
1351
1427
  {
1352
1428
  "name": "model1",
1353
- "desctiption": "model description from app",
1429
+ "description": "model description from app",
1354
1430
  "columns": [
1355
1431
  {
1356
1432
  "name": "columnA",
@@ -17,7 +17,7 @@ from datacontract.catalog.catalog import create_index_html, create_data_contract
17
17
  from datacontract.data_contract import DataContract, ExportFormat
18
18
  from datacontract.imports.importer import ImportFormat
19
19
  from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
20
- from datacontract.publish.publish import publish_to_datamesh_manager
20
+ from datacontract.integration.datamesh_manager import publish_data_contract_to_datamesh_manager
21
21
 
22
22
  DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
23
23
 
@@ -232,6 +232,18 @@ def import_(
232
232
  help="List of models names to import from the dbt manifest file (repeat for multiple models names, leave empty for all models in the dataset)."
233
233
  ),
234
234
  ] = None,
235
+ dbml_schema: Annotated[
236
+ Optional[List[str]],
237
+ typer.Option(
238
+ help="List of schema names to import from the DBML file (repeat for multiple schema names, leave empty for all tables in the file)."
239
+ ),
240
+ ] = None,
241
+ dbml_table: Annotated[
242
+ Optional[List[str]],
243
+ typer.Option(
244
+ help="List of table names to import from the DBML file (repeat for multiple table names, leave empty for all tables in the file)."
245
+ ),
246
+ ] = None,
235
247
  ):
236
248
  """
237
249
  Create a data contract from the given source location. Prints to stdout.
@@ -245,6 +257,8 @@ def import_(
245
257
  bigquery_dataset=bigquery_dataset,
246
258
  unity_table_full_name=unity_table_full_name,
247
259
  dbt_model=dbt_model,
260
+ dbml_schema=dbml_schema,
261
+ dbml_table=dbml_table,
248
262
  )
249
263
  console.print(result.to_yaml())
250
264
 
@@ -261,8 +275,10 @@ def publish(
261
275
  """
262
276
  Publish the data contract to the Data Mesh Manager.
263
277
  """
264
- publish_to_datamesh_manager(
265
- data_contract=DataContract(data_contract_file=location, schema_location=schema),
278
+ publish_data_contract_to_datamesh_manager(
279
+ data_contract_specification=DataContract(
280
+ data_contract_file=location, schema_location=schema
281
+ ).get_data_contract_specification(),
266
282
  )
267
283
 
268
284
 
@@ -18,8 +18,8 @@ from datacontract.export.exporter import ExportFormat
18
18
  from datacontract.export.exporter_factory import exporter_factory
19
19
  from datacontract.imports.importer_factory import importer_factory
20
20
 
21
- from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
22
- from datacontract.integration.publish_opentelemetry import publish_opentelemetry
21
+ from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
22
+ from datacontract.integration.opentelemetry import publish_test_results_to_opentelemetry
23
23
  from datacontract.lint import resolve
24
24
  from datacontract.lint.linters.description_linter import DescriptionLinter
25
25
  from datacontract.lint.linters.example_model_linter import ExampleModelLinter
@@ -218,15 +218,10 @@ class DataContract:
218
218
  run.finish()
219
219
 
220
220
  if self._publish_url is not None:
221
- try:
222
- publish_datamesh_manager(run, self._publish_url)
223
- except Exception:
224
- run.log_error("Failed to publish to datamesh manager")
221
+ publish_test_results_to_datamesh_manager(run, self._publish_url)
222
+
225
223
  if self._publish_to_opentelemetry:
226
- try:
227
- publish_opentelemetry(run)
228
- except Exception:
229
- run.log_error("Failed to publish to opentelemetry")
224
+ publish_test_results_to_opentelemetry(run)
230
225
 
231
226
  return run
232
227
 
@@ -158,6 +158,17 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
158
158
  process_local_file(run, server, model_name, validate)
159
159
  elif server.type == "s3":
160
160
  process_s3_file(server, model_name, validate)
161
+ elif server.type == "gcs":
162
+ run.checks.append(
163
+ Check(
164
+ type="schema",
165
+ name="Check that JSON has valid schema",
166
+ model=model_name,
167
+ result="info",
168
+ reason="JSON Schema check skipped for GCS, as GCS is currently not supported",
169
+ engine="jsonschema",
170
+ )
171
+ )
161
172
  else:
162
173
  run.checks.append(
163
174
  Check(
@@ -28,9 +28,11 @@ def s3_fs(s3_endpoint_url):
28
28
 
29
29
  aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
30
30
  aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
31
+ aws_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
31
32
  return s3fs.S3FileSystem(
32
33
  key=aws_access_key_id,
33
34
  secret=aws_secret_access_key,
35
+ token=aws_session_token,
34
36
  anon=aws_access_key_id is None,
35
37
  client_kwargs={"endpoint_url": s3_endpoint_url},
36
38
  )
@@ -1,8 +1,4 @@
1
1
  import logging
2
- import typing
3
-
4
- if typing.TYPE_CHECKING:
5
- from pyspark.sql import SparkSession
6
2
 
7
3
  from soda.scan import Scan
8
4
 
@@ -19,9 +15,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
19
15
  from datacontract.model.run import Run, Check, Log
20
16
 
21
17
 
22
- def check_soda_execute(
23
- run: Run, data_contract: DataContractSpecification, server: Server, spark: "SparkSession", tmp_dir
24
- ):
18
+ def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
25
19
  if data_contract is None:
26
20
  run.log_warn("Cannot run engine soda-core, as data contract is invalid")
27
21
  return
@@ -29,7 +23,7 @@ def check_soda_execute(
29
23
  run.log_info("Running engine soda-core")
30
24
  scan = Scan()
31
25
 
32
- if server.type in ["s3", "azure", "local"]:
26
+ if server.type in ["s3", "gcs", "azure", "local"]:
33
27
  if server.format in ["json", "parquet", "csv", "delta"]:
34
28
  con = get_duckdb_connection(data_contract, server, run)
35
29
  scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
@@ -1,7 +1,5 @@
1
1
  import os
2
2
 
3
- from deltalake import DeltaTable
4
-
5
3
  import duckdb
6
4
  from datacontract.export.csv_type_converter import convert_to_duckdb_csv_type
7
5
  from datacontract.model.run import Run
@@ -15,6 +13,9 @@ def get_duckdb_connection(data_contract, server, run: Run):
15
13
  if server.type == "s3":
16
14
  path = server.location
17
15
  setup_s3_connection(con, server)
16
+ if server.type == "gcs":
17
+ path = server.location
18
+ setup_gcs_connection(con, server)
18
19
  if server.type == "azure":
19
20
  path = server.location
20
21
  setup_azure_connection(con, server)
@@ -49,28 +50,8 @@ def get_duckdb_connection(data_contract, server, run: Run):
49
50
  f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1, columns={columns});"""
50
51
  )
51
52
  elif server.format == "delta":
52
- if server.type == "local":
53
- delta_table_arrow = DeltaTable(model_path).to_pyarrow_dataset()
54
- con.register(model_name, delta_table_arrow)
55
-
56
- if server.type == "azure":
57
- # After switching to native delta table support
58
- # in https://github.com/datacontract/datacontract-cli/issues/258,
59
- # azure storage should also work
60
- # https://github.com/duckdb/duckdb_delta/issues/21
61
- raise NotImplementedError("Support for Delta Tables on Azure Storage is not implemented yet")
62
- if server.type == "s3":
63
- storage_options = {
64
- "AWS_ENDPOINT_URL": server.endpointUrl,
65
- "AWS_ACCESS_KEY_ID": os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID"),
66
- "AWS_SECRET_ACCESS_KEY": os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY"),
67
- "AWS_REGION": os.getenv("DATACONTRACT_S3_REGION", "us-east-1"),
68
- "AWS_ALLOW_HTTP": "True" if server.endpointUrl.startswith("http://") else "False",
69
- }
70
-
71
- delta_table_arrow = DeltaTable(model_path, storage_options=storage_options).to_pyarrow_dataset()
72
-
73
- con.register(model_name, delta_table_arrow)
53
+ con.sql("update extensions;") # Make sure we have the latest delta extension
54
+ con.sql(f"""CREATE VIEW "{model_name}" AS SELECT * FROM delta_scan('{model_path}');""")
74
55
  return con
75
56
 
76
57
 
@@ -142,6 +123,24 @@ def setup_s3_connection(con, server):
142
123
  # print(con.sql("SELECT * FROM duckdb_settings() WHERE name like 's3%'"))
143
124
 
144
125
 
126
+ def setup_gcs_connection(con, server):
127
+ key_id = os.getenv("DATACONTRACT_GCS_KEY_ID")
128
+ secret = os.getenv("DATACONTRACT_GCS_SECRET")
129
+
130
+ if key_id is None:
131
+ raise ValueError("Error: Environment variable DATACONTRACT_GCS_KEY_ID is not set")
132
+ if secret is None:
133
+ raise ValueError("Error: Environment variable DATACONTRACT_GCS_SECRET is not set")
134
+
135
+ con.sql(f"""
136
+ CREATE SECRET gcs_secret (
137
+ TYPE GCS,
138
+ KEY_ID '{key_id}',
139
+ SECRET '{secret}'
140
+ );
141
+ """)
142
+
143
+
145
144
  def setup_azure_connection(con, server):
146
145
  tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
147
146
  client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")