datacontract-cli 0.11.2__tar.gz → 0.11.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. {datacontract_cli-0.11.2/datacontract_cli.egg-info → datacontract_cli-0.11.4}/PKG-INFO +18 -16
  2. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/README.md +2 -2
  3. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/api.py +180 -38
  4. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/catalog/catalog.py +1 -1
  5. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/cli.py +5 -5
  6. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/data_contract.py +2 -3
  7. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/data_contract_checks.py +1 -1
  8. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/data_contract_test.py +2 -2
  9. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/check_soda_execute.py +2 -2
  10. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/duckdb_connection.py +89 -25
  11. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/bigquery_exporter.py +2 -2
  12. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/pydantic_exporter.py +15 -4
  13. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/spark_exporter.py +32 -7
  14. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/csv_importer.py +5 -1
  15. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/dcs_importer.py +67 -24
  16. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/init/init_template.py +1 -1
  17. datacontract_cli-0.11.4/datacontract/schemas/odcs-3.1.0.init.yaml +36 -0
  18. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/datacontract_odcs.html +2 -2
  19. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/index.html +27 -0
  20. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4/datacontract_cli.egg-info}/PKG-INFO +18 -16
  21. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/SOURCES.txt +2 -0
  22. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/requires.txt +16 -13
  23. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/pyproject.toml +18 -14
  24. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_download_datacontract_file.py +18 -3
  25. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_pydantic.py +17 -1
  26. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_rdf.py +3 -0
  27. datacontract_cli-0.11.4/tests/test_test_schema_evolution.py +110 -0
  28. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/LICENSE +0 -0
  29. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/MANIFEST.in +0 -0
  30. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/__init__.py +0 -0
  31. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/__init__.py +0 -0
  32. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +0 -0
  33. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/datacontract/check_that_datacontract_file_exists.py +0 -0
  34. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/fastjsonschema/check_jsonschema.py +0 -0
  35. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/fastjsonschema/s3/s3_read_files.py +0 -0
  36. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/__init__.py +0 -0
  37. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/athena.py +0 -0
  38. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/bigquery.py +0 -0
  39. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/databricks.py +0 -0
  40. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/impala.py +0 -0
  41. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/kafka.py +0 -0
  42. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/oracle.py +0 -0
  43. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/postgres.py +0 -0
  44. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/snowflake.py +0 -0
  45. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/sqlserver.py +0 -0
  46. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/engines/soda/connections/trino.py +0 -0
  47. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/__init__.py +0 -0
  48. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/avro_exporter.py +0 -0
  49. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/avro_idl_exporter.py +0 -0
  50. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/custom_exporter.py +0 -0
  51. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/data_caterer_exporter.py +0 -0
  52. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/dbml_exporter.py +0 -0
  53. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/dbt_exporter.py +0 -0
  54. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/dcs_exporter.py +0 -0
  55. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/dqx_exporter.py +0 -0
  56. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/duckdb_type_converter.py +0 -0
  57. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/excel_exporter.py +0 -0
  58. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/exporter.py +0 -0
  59. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/exporter_factory.py +0 -0
  60. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/go_exporter.py +0 -0
  61. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/great_expectations_exporter.py +0 -0
  62. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/html_exporter.py +0 -0
  63. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/iceberg_exporter.py +0 -0
  64. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/jsonschema_exporter.py +0 -0
  65. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/markdown_exporter.py +0 -0
  66. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/mermaid_exporter.py +0 -0
  67. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/odcs_export_helper.py +0 -0
  68. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/odcs_v3_exporter.py +0 -0
  69. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/pandas_type_converter.py +0 -0
  70. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/protobuf_exporter.py +0 -0
  71. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/rdf_exporter.py +0 -0
  72. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/sodacl_exporter.py +0 -0
  73. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/sql_exporter.py +0 -0
  74. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/sql_type_converter.py +0 -0
  75. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/export/sqlalchemy_exporter.py +0 -0
  76. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/avro_importer.py +0 -0
  77. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/bigquery_importer.py +0 -0
  78. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/dbml_importer.py +0 -0
  79. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/dbt_importer.py +0 -0
  80. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/excel_importer.py +0 -0
  81. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/glue_importer.py +0 -0
  82. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/iceberg_importer.py +0 -0
  83. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/importer.py +0 -0
  84. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/importer_factory.py +0 -0
  85. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/json_importer.py +0 -0
  86. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/jsonschema_importer.py +0 -0
  87. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/odcs_helper.py +0 -0
  88. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/odcs_importer.py +0 -0
  89. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/parquet_importer.py +0 -0
  90. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/protobuf_importer.py +0 -0
  91. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/spark_importer.py +0 -0
  92. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/sql_importer.py +0 -0
  93. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/imports/unity_importer.py +0 -0
  94. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/integration/entropy_data.py +0 -0
  95. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/files.py +0 -0
  96. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/resolve.py +0 -0
  97. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/resources.py +0 -0
  98. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/schema.py +0 -0
  99. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/lint/urls.py +0 -0
  100. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/model/exceptions.py +0 -0
  101. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/model/odcs.py +0 -0
  102. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/model/run.py +0 -0
  103. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/output/__init__.py +0 -0
  104. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/output/junit_test_results.py +0 -0
  105. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/output/output_format.py +0 -0
  106. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/output/test_results_writer.py +0 -0
  107. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/py.typed +0 -0
  108. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.1.0.init.yaml +0 -0
  109. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.1.0.schema.json +0 -0
  110. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.2.0.init.yaml +0 -0
  111. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.2.0.schema.json +0 -0
  112. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.2.1.init.yaml +0 -0
  113. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/datacontract-1.2.1.schema.json +0 -0
  114. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/odcs-3.0.1.schema.json +0 -0
  115. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/odcs-3.0.2.schema.json +0 -0
  116. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/schemas/odcs-3.1.0.schema.json +0 -0
  117. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/datacontract.html +0 -0
  118. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/datacontract_information.html +0 -0
  119. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/datacontract_servicelevels.html +0 -0
  120. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/datacontract_terms.html +0 -0
  121. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/definition.html +0 -0
  122. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/example.html +0 -0
  123. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/model_field.html +0 -0
  124. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/quality.html +0 -0
  125. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/partials/server.html +0 -0
  126. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract/templates/style/output.css +0 -0
  127. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/dependency_links.txt +0 -0
  128. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/entry_points.txt +0 -0
  129. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/datacontract_cli.egg-info/top_level.txt +0 -0
  130. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/setup.cfg +0 -0
  131. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_api.py +0 -0
  132. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_catalog.py +0 -0
  133. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_cli.py +0 -0
  134. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_data_contract_checks.py +0 -0
  135. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_data_contract_specification.py +0 -0
  136. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_description_linter.py +0 -0
  137. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_duckdb_json.py +0 -0
  138. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_avro.py +0 -0
  139. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_avro_idl.py +0 -0
  140. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_bigquery.py +0 -0
  141. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_complex_data_contract.py +0 -0
  142. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_custom.py +0 -0
  143. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_custom_exporter.py +0 -0
  144. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_data_caterer.py +0 -0
  145. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dbml.py +0 -0
  146. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dbt_models.py +0 -0
  147. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dbt_sources.py +0 -0
  148. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dbt_staging_sql.py +0 -0
  149. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_dqx.py +0 -0
  150. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_excel.py +0 -0
  151. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_go.py +0 -0
  152. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_great_expectations.py +0 -0
  153. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_html.py +0 -0
  154. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_iceberg.py +0 -0
  155. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_jsonschema.py +0 -0
  156. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_markdown.py +0 -0
  157. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_mermaid.py +0 -0
  158. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_odcs_v3.py +0 -0
  159. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_protobuf.py +0 -0
  160. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_sodacl.py +0 -0
  161. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_spark.py +0 -0
  162. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_sql.py +0 -0
  163. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_sql_query.py +0 -0
  164. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_export_sqlalchemy.py +0 -0
  165. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_avro.py +0 -0
  166. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_bigquery.py +0 -0
  167. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_csv.py +0 -0
  168. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_dbml.py +0 -0
  169. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_dbt.py +0 -0
  170. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_excel.py +0 -0
  171. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_glue.py +0 -0
  172. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_iceberg.py +0 -0
  173. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_json.py +0 -0
  174. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_jsonschema.py +0 -0
  175. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_odcs_v3.py +0 -0
  176. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_parquet.py +0 -0
  177. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_protobuf.py +0 -0
  178. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_spark.py +0 -0
  179. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_sql_oracle.py +0 -0
  180. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_sql_postgres.py +0 -0
  181. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_sql_sqlserver.py +0 -0
  182. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_import_unity_file.py +0 -0
  183. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_integration_entropydata.py +0 -0
  184. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_lint.py +0 -0
  185. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_resolve.py +0 -0
  186. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_roundtrip_jsonschema.py +0 -0
  187. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_api.py +0 -0
  188. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_athena_iceberg.py +0 -0
  189. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_azure_remote.py +0 -0
  190. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_bigquery.py +0 -0
  191. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_databricks.py +0 -0
  192. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_dataframe.py +0 -0
  193. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_delta.py +0 -0
  194. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_gcs_csv_remote.py +0 -0
  195. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_gcs_json_remote.py +0 -0
  196. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_kafka.py +0 -0
  197. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_kafka_remote.py +0 -0
  198. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_local_json.py +0 -0
  199. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_local_json_nd.py +0 -0
  200. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_oracle.py +0 -0
  201. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_output_junit.py +0 -0
  202. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_parquet.py +0 -0
  203. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_postgres.py +0 -0
  204. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_quality.py +0 -0
  205. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_csv.py +0 -0
  206. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_delta.py +0 -0
  207. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_json.py +0 -0
  208. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_json_complex.py +0 -0
  209. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_json_multiple_models.py +0 -0
  210. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_s3_json_remote.py +0 -0
  211. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_snowflake.py +0 -0
  212. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_sqlserver.py +0 -0
  213. {datacontract_cli-0.11.2 → datacontract_cli-0.11.4}/tests/test_test_trino.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datacontract-cli
3
- Version: 0.11.2
3
+ Version: 0.11.4
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  License-Expression: MIT
@@ -11,25 +11,22 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: <3.13,>=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: typer<0.20,>=0.15.1
14
+ Requires-Dist: typer<0.22,>=0.15.1
15
15
  Requires-Dist: pydantic<2.13.0,>=2.8.2
16
16
  Requires-Dist: pyyaml~=6.0.1
17
17
  Requires-Dist: requests<2.33,>=2.31
18
18
  Requires-Dist: fastjsonschema<2.22.0,>=2.19.1
19
- Requires-Dist: fastparquet<2025.0.0,>=2024.5.0
20
- Requires-Dist: numpy<2.0.0,>=1.26.4
19
+ Requires-Dist: pytz>=2024.1
21
20
  Requires-Dist: python-multipart<1.0.0,>=0.0.20
22
21
  Requires-Dist: rich<15.0,>=13.7
23
22
  Requires-Dist: sqlglot<29.0.0,>=26.6.0
24
- Requires-Dist: duckdb<1.4.0,>=1.0.0
25
- Requires-Dist: soda-core-duckdb<3.6.0,>=3.3.20
26
23
  Requires-Dist: setuptools>=60
27
24
  Requires-Dist: python-dotenv<2.0.0,>=1.0.0
28
25
  Requires-Dist: boto3<2.0.0,>=1.34.41
29
26
  Requires-Dist: Jinja2<4.0.0,>=3.1.5
30
27
  Requires-Dist: jinja_partials<1.0.0,>=0.2.1
31
28
  Requires-Dist: datacontract-specification<2.0.0,>=1.2.3
32
- Requires-Dist: open-data-contract-standard<4.0.0,>=3.1.0
29
+ Requires-Dist: open-data-contract-standard<4.0.0,>=3.1.2
33
30
  Provides-Extra: avro
34
31
  Requires-Dist: avro==1.12.1; extra == "avro"
35
32
  Provides-Extra: bigquery
@@ -42,19 +39,21 @@ Provides-Extra: databricks
42
39
  Requires-Dist: soda-core-spark-df<3.6.0,>=3.3.20; extra == "databricks"
43
40
  Requires-Dist: soda-core-spark[databricks]<3.6.0,>=3.3.20; extra == "databricks"
44
41
  Requires-Dist: databricks-sql-connector<4.3.0,>=3.7.0; extra == "databricks"
45
- Requires-Dist: databricks-sdk<0.74.0; extra == "databricks"
46
- Requires-Dist: pyspark<4.0.0,>=3.5.5; extra == "databricks"
42
+ Requires-Dist: databricks-sdk<0.78.0; extra == "databricks"
43
+ Requires-Dist: pyspark<5.0.0,>=3.5.0; extra == "databricks"
44
+ Requires-Dist: numpy<2.0.0,>=1.26.4; extra == "databricks"
47
45
  Provides-Extra: iceberg
48
46
  Requires-Dist: pyiceberg==0.10.0; extra == "iceberg"
49
47
  Provides-Extra: kafka
50
48
  Requires-Dist: datacontract-cli[avro]; extra == "kafka"
51
49
  Requires-Dist: soda-core-spark-df<3.6.0,>=3.3.20; extra == "kafka"
52
- Requires-Dist: pyspark<4.0.0,>=3.5.5; extra == "kafka"
50
+ Requires-Dist: pyspark<5.0.0,>=3.5.0; extra == "kafka"
51
+ Requires-Dist: numpy<2.0.0,>=1.26.4; extra == "kafka"
53
52
  Provides-Extra: postgres
54
53
  Requires-Dist: soda-core-postgres<3.6.0,>=3.3.20; extra == "postgres"
55
54
  Provides-Extra: s3
56
55
  Requires-Dist: s3fs<2026.0.0,>=2025.2.0; extra == "s3"
57
- Requires-Dist: aiobotocore<2.26.0,>=2.17.0; extra == "s3"
56
+ Requires-Dist: aiobotocore<3.2.0,>=2.17.0; extra == "s3"
58
57
  Provides-Extra: snowflake
59
58
  Requires-Dist: snowflake-connector-python[pandas]<4.2,>=3.6; extra == "snowflake"
60
59
  Requires-Dist: soda-core-snowflake<3.6.0,>=3.3.20; extra == "snowflake"
@@ -70,17 +69,20 @@ Provides-Extra: dbt
70
69
  Requires-Dist: dbt-core>=1.8.0; extra == "dbt"
71
70
  Provides-Extra: dbml
72
71
  Requires-Dist: pydbml>=1.1.1; extra == "dbml"
72
+ Provides-Extra: duckdb
73
+ Requires-Dist: duckdb<1.5.0,>=1.0.0; extra == "duckdb"
74
+ Requires-Dist: soda-core-duckdb<3.6.0,>=3.3.20; extra == "duckdb"
73
75
  Provides-Extra: parquet
74
76
  Requires-Dist: pyarrow>=18.1.0; extra == "parquet"
75
77
  Provides-Extra: rdf
76
- Requires-Dist: rdflib==7.0.0; extra == "rdf"
78
+ Requires-Dist: rdflib==7.5.0; extra == "rdf"
77
79
  Provides-Extra: api
78
80
  Requires-Dist: fastapi==0.121.2; extra == "api"
79
- Requires-Dist: uvicorn==0.38.0; extra == "api"
81
+ Requires-Dist: uvicorn==0.40.0; extra == "api"
80
82
  Provides-Extra: protobuf
81
83
  Requires-Dist: grpcio-tools>=1.53; extra == "protobuf"
82
84
  Provides-Extra: all
83
- Requires-Dist: datacontract-cli[api,athena,bigquery,csv,databricks,dbml,dbt,excel,iceberg,kafka,oracle,parquet,postgres,protobuf,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
85
+ Requires-Dist: datacontract-cli[api,athena,bigquery,csv,databricks,dbml,dbt,duckdb,excel,iceberg,kafka,oracle,parquet,postgres,protobuf,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
84
86
  Provides-Extra: dev
85
87
  Requires-Dist: datacontract-cli[all]; extra == "dev"
86
88
  Requires-Dist: httpx==0.28.1; extra == "dev"
@@ -335,6 +337,7 @@ A list of available extras:
335
337
  | Avro Support | `pip install datacontract-cli[avro]` |
336
338
  | Google BigQuery | `pip install datacontract-cli[bigquery]` |
337
339
  | Databricks Integration | `pip install datacontract-cli[databricks]` |
340
+ | DuckDB (local/S3/GCS/Azure file testing) | `pip install datacontract-cli[duckdb]` |
338
341
  | Iceberg | `pip install datacontract-cli[iceberg]` |
339
342
  | Kafka Integration | `pip install datacontract-cli[kafka]` |
340
343
  | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
@@ -657,8 +660,7 @@ datacontract.yaml
657
660
  servers:
658
661
  production:
659
662
  type: azure
660
- storageAccount: datameshdatabricksdemo
661
- location: abfss://dataproducts/inventory_events/*.parquet
663
+ location: abfss://datameshdatabricksdemo.dfs.core.windows.net/inventory_events/*.parquet
662
664
  format: parquet
663
665
  ```
664
666
 
@@ -236,6 +236,7 @@ A list of available extras:
236
236
  | Avro Support | `pip install datacontract-cli[avro]` |
237
237
  | Google BigQuery | `pip install datacontract-cli[bigquery]` |
238
238
  | Databricks Integration | `pip install datacontract-cli[databricks]` |
239
+ | DuckDB (local/S3/GCS/Azure file testing) | `pip install datacontract-cli[duckdb]` |
239
240
  | Iceberg | `pip install datacontract-cli[iceberg]` |
240
241
  | Kafka Integration | `pip install datacontract-cli[kafka]` |
241
242
  | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
@@ -558,8 +559,7 @@ datacontract.yaml
558
559
  servers:
559
560
  production:
560
561
  type: azure
561
- storageAccount: datameshdatabricksdemo
562
- location: abfss://dataproducts/inventory_events/*.parquet
562
+ location: abfss://datameshdatabricksdemo.dfs.core.windows.net/inventory_events/*.parquet
563
563
  format: parquet
564
564
  ```
565
565
 
@@ -11,50 +11,192 @@ from fastapi.security.api_key import APIKeyHeader
11
11
  from datacontract.data_contract import DataContract, ExportFormat
12
12
  from datacontract.model.run import Run
13
13
 
14
- DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.2.1
15
- id: urn:datacontract:checkout:orders-latest
16
- info:
17
- title: Orders Latest
18
- version: 2.0.0
19
- owner: Sales Team
20
- servers:
21
- production:
22
- type: s3
23
- location: s3://datacontract-example-orders-latest/v2/{model}/*.json
24
- format: json
25
- delimiter: new_line
26
- models:
27
- orders:
28
- description: One record per order. Includes cancelled and deleted orders.
29
- type: table
30
- fields:
31
- order_id:
32
- type: string
14
+ DATA_CONTRACT_EXAMPLE_PAYLOAD = """apiVersion: v3.1.0
15
+ kind: DataContract
16
+ id: orders
17
+ name: Orders
18
+ version: 1.0.0
19
+ status: active
20
+ description:
21
+ purpose: "Provides order and line item data for analytics and reporting"
22
+ usage: "Used by analytics team for sales analysis and business intelligence"
23
+ limitations: "Contains only the last 2 years of data"
24
+ customProperties:
25
+ - property: "sensitivity"
26
+ value: "secret"
27
+ description: "Data contains personally identifiable information"
28
+ authoritativeDefinitions:
29
+ - url: "https://entropy-data.com/policies/gdpr-compliance"
30
+ type: "businessDefinition"
31
+ description: "GDPR compliance policy for handling customer data"
32
+ schema:
33
+ - name: orders
34
+ physicalType: TABLE
35
+ description: All historic web shop orders since 2020-01-01. Includes successful and cancelled orders.
36
+ properties:
37
+ - name: order_id
38
+ logicalType: string
39
+ description: The internal order id for every orders. Do not show this to a customer.
40
+ businessName: Internal Order ID
41
+ physicalType: UUID
42
+ examples:
43
+ - 99e8bb10-3785-4634-9664-8dc79eb69d43
33
44
  primaryKey: true
34
- order_timestamp:
35
- description: The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful.
36
- type: timestamp
45
+ classification: internal
37
46
  required: true
47
+ unique: true
48
+ - name: customer_id
49
+ logicalType: string
50
+ description: A reference to the customer number
51
+ businessName: Customer Number
52
+ physicalType: TEXT
38
53
  examples:
39
- - "2024-09-09T08:30:00Z"
40
- order_total:
41
- description: Total amount the smallest monetary unit (e.g., cents).
42
- type: long
54
+ - c123456789
43
55
  required: true
56
+ unique: false
57
+ logicalTypeOptions:
58
+ minLength: 10
59
+ maxLength: 10
60
+ authoritativeDefinitions:
61
+ - type: definition
62
+ url: https://example.com/definitions/sales/customer/customer_id
63
+ tags:
64
+ - pii:true
65
+ classification: internal
66
+ criticalDataElement: true
67
+ - name: order_total
68
+ logicalType: integer
69
+ description: The order total amount in cents, including tax, after discounts.
70
+ Includes shipping costs.
71
+ physicalType: INTEGER
44
72
  examples:
45
- - 9999
73
+ - "9999"
74
+ quality:
75
+ - type: text
76
+ description: The order_total equals the sum of all related line items.
77
+ required: true
78
+ businessName: Order Amount
79
+ - name: order_timestamp
80
+ logicalType: timestamp
81
+ description: The time including timezone when the order payment was successfully
82
+ confirmed.
83
+ physicalType: TIMESTAMPTZ
84
+ businessName: Order Date
85
+ examples:
86
+ - "2025-03-01 14:30:00+01"
87
+ - name: order_status
88
+ businessName: Status
89
+ description: The business status of the order
90
+ logicalType: string
91
+ physicalType: TEXT
92
+ examples:
93
+ - shipped
46
94
  quality:
47
- - type: sql
48
- description: 95% of all order total values are expected to be between 10 and 499 EUR.
49
- query: |
50
- SELECT quantile_cont(order_total, 0.95) AS percentile_95
51
- FROM orders
52
- mustBeBetween: [1000, 99900]
53
- customer_id:
54
- description: Unique identifier for the customer.
55
- type: text
56
- minLength: 10
57
- maxLength: 20
95
+ - type: library
96
+ description: Ensure that there are no other status values.
97
+ metric: invalidValues
98
+ arguments:
99
+ validValues:
100
+ - pending
101
+ - paid
102
+ - processing
103
+ - shipped
104
+ - delivered
105
+ - cancelled
106
+ - refunded
107
+ mustBe: 0
108
+ quality:
109
+ - type: library
110
+ metric: rowCount
111
+ mustBeGreaterThan: 100000
112
+ description: If there are less than 100k rows, something is wrong.
113
+ - name: line_items
114
+ physicalType: table
115
+ description: Details for each item in an order
116
+ properties:
117
+ - name: line_item_id
118
+ logicalType: string
119
+ description: Unique identifier for the line item
120
+ physicalType: UUID
121
+ examples:
122
+ - 12c9ba21-0c44-4e29-ba72-b8fd01c1be30
123
+ logicalTypeOptions:
124
+ format: uuid
125
+ required: true
126
+ primaryKey: true
127
+ - name: sku
128
+ logicalType: string
129
+ businessName: Stock Keeping Unit
130
+ description: Identifier for the purchased product
131
+ physicalType: TEXT
132
+ examples:
133
+ - 111222333
134
+ required: true
135
+ - name: price
136
+ logicalType: integer
137
+ description: Price in cents for this line item including tax
138
+ physicalType: INTEGER
139
+ examples:
140
+ - 9999
141
+ required: true
142
+ - name: order_id
143
+ required: false
144
+ primaryKey: false
145
+ logicalType: string
146
+ physicalType: UUID
147
+ relationships:
148
+ - type: foreignKey
149
+ to: orders.order_id
150
+ servers:
151
+ - server: production
152
+ environment: prod
153
+ type: postgres
154
+ host: aws-1-eu-central-2.pooler.supabase.com
155
+ port: 6543
156
+ database: postgres
157
+ schema: dp_orders_v1
158
+ team:
159
+ name: sales
160
+ description: This data product is owned by the "Sales" team
161
+ members:
162
+ - username: john@example.com
163
+ name: John Doe
164
+ role: Owner
165
+ authoritativeDefinitions:
166
+ - type: slack
167
+ url: https://slack.example.com/teams/sales
168
+ roles:
169
+ - role: analyst_us
170
+ description: Read access for analytics to US orders
171
+ - role: analyst_eu
172
+ description: Read access for analytics to EU orders
173
+ slaProperties:
174
+ - property: availability
175
+ value: 99.9%
176
+ description: Data platform uptime guarantee
177
+ - property: retention
178
+ value: "1"
179
+ unit: year
180
+ description: Data will be deleted after 1 year
181
+ - property: freshness
182
+ value: "24"
183
+ unit: hours
184
+ description: Within 24 hours of order placement
185
+ - property: support
186
+ value: business hours
187
+ description: Support only during business hours
188
+ price:
189
+ priceAmount: 0
190
+ priceCurrency: USD
191
+ priceUnit: monthly
192
+ tags:
193
+ - e-commerce
194
+ - transactions
195
+ - pii
196
+ customProperties:
197
+ - property: dataPlatformRole
198
+ value: role_orders_v1
199
+ contractCreatedTs: "2025-01-15T10:00:00Z"
58
200
  """
59
201
 
60
202
  app = FastAPI(
@@ -14,7 +14,7 @@ from datacontract.export.html_exporter import get_version
14
14
 
15
15
  def _get_owner(odcs: OpenDataContractStandard) -> Optional[str]:
16
16
  """Get the owner from ODCS customProperties or team."""
17
- if odcs.team and odcs.team.name:
17
+ if odcs.team and hasattr(odcs.team, "name") and odcs.team.name:
18
18
  return odcs.team.name
19
19
  if odcs.customProperties:
20
20
  for prop in odcs.customProperties:
@@ -67,7 +67,7 @@ def common(
67
67
  pass
68
68
 
69
69
 
70
- @app.command()
70
+ @app.command(name="init")
71
71
  def init(
72
72
  location: Annotated[
73
73
  str, typer.Argument(help="The location of the data contract file to create.")
@@ -90,7 +90,7 @@ def init(
90
90
  console.print("📄 data contract written to " + location)
91
91
 
92
92
 
93
- @app.command()
93
+ @app.command(name="lint")
94
94
  def lint(
95
95
  location: Annotated[
96
96
  str,
@@ -125,7 +125,7 @@ def enable_debug_logging(debug: bool):
125
125
  )
126
126
 
127
127
 
128
- @app.command()
128
+ @app.command(name="test")
129
129
  def test(
130
130
  location: Annotated[
131
131
  str,
@@ -187,7 +187,7 @@ def test(
187
187
  write_test_result(run, console, output_format, output, data_contract)
188
188
 
189
189
 
190
- @app.command()
190
+ @app.command(name="export")
191
191
  def export(
192
192
  format: Annotated[ExportFormat, typer.Option(help="The export format.")],
193
193
  output: Annotated[
@@ -467,7 +467,7 @@ def _get_uvicorn_arguments(port: int, host: str, context: typer.Context) -> dict
467
467
  return default_args | dict(zip(trimmed_keys, context.args[1::2]))
468
468
 
469
469
 
470
- @app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
470
+ @app.command(name="api", context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
471
471
  def api(
472
472
  ctx: Annotated[typer.Context, typer.Option(help="Extra arguments to pass to uvicorn.run().")],
473
473
  port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
@@ -4,10 +4,9 @@ import typing
4
4
  from open_data_contract_standard.model import OpenDataContractStandard, Team
5
5
 
6
6
  if typing.TYPE_CHECKING:
7
+ from duckdb.duckdb import DuckDBPyConnection
7
8
  from pyspark.sql import SparkSession
8
9
 
9
- from duckdb.duckdb import DuckDBPyConnection
10
-
11
10
  from datacontract.engines.data_contract_test import execute_data_contract_test
12
11
  from datacontract.export.exporter import ExportFormat
13
12
  from datacontract.export.exporter_factory import exporter_factory
@@ -29,7 +28,7 @@ class DataContract:
29
28
  server: str = None,
30
29
  publish_url: str = None,
31
30
  spark: "SparkSession" = None,
32
- duckdb_connection: DuckDBPyConnection = None,
31
+ duckdb_connection: "DuckDBPyConnection" = None,
33
32
  inline_definitions: bool = True,
34
33
  ssl_verification: bool = True,
35
34
  publish_test_results: bool = False,
@@ -161,7 +161,7 @@ def to_schema_name(schema_object: SchemaObject, server_type: str) -> str:
161
161
  # Use physicalName if set (ODCS standard way to specify actual table name)
162
162
  if schema_object.physicalName:
163
163
  return schema_object.physicalName
164
-
164
+
165
165
  return schema_object.name
166
166
 
167
167
 
@@ -4,12 +4,12 @@ import tempfile
4
4
  import typing
5
5
 
6
6
  import requests
7
- from duckdb.duckdb import DuckDBPyConnection
8
7
  from open_data_contract_standard.model import OpenDataContractStandard, Server
9
8
 
10
9
  from datacontract.engines.data_contract_checks import create_checks
11
10
 
12
11
  if typing.TYPE_CHECKING:
12
+ from duckdb.duckdb import DuckDBPyConnection
13
13
  from pyspark.sql import SparkSession
14
14
 
15
15
  from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
@@ -26,7 +26,7 @@ def execute_data_contract_test(
26
26
  run: Run,
27
27
  server_name: str = None,
28
28
  spark: "SparkSession" = None,
29
- duckdb_connection: DuckDBPyConnection = None,
29
+ duckdb_connection: "DuckDBPyConnection" = None,
30
30
  ):
31
31
  if data_contract.schema_ is None or len(data_contract.schema_) == 0:
32
32
  raise DataContractException(
@@ -6,9 +6,9 @@ from datacontract.engines.soda.connections.athena import to_athena_soda_configur
6
6
  from datacontract.engines.soda.connections.oracle import initialize_client_and_create_soda_configuration
7
7
 
8
8
  if typing.TYPE_CHECKING:
9
+ from duckdb.duckdb import DuckDBPyConnection
9
10
  from pyspark.sql import SparkSession
10
11
 
11
- from duckdb.duckdb import DuckDBPyConnection
12
12
  from open_data_contract_standard.model import OpenDataContractStandard, Server
13
13
 
14
14
  from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
@@ -29,7 +29,7 @@ def check_soda_execute(
29
29
  data_contract: OpenDataContractStandard,
30
30
  server: Server,
31
31
  spark: "SparkSession" = None,
32
- duckdb_connection: DuckDBPyConnection = None,
32
+ duckdb_connection: "DuckDBPyConnection" = None,
33
33
  ):
34
34
  from soda.common.config_helper import ConfigHelper
35
35
 
@@ -1,19 +1,32 @@
1
1
  import os
2
- from typing import Any, List, Optional
2
+ import re
3
+ from typing import TYPE_CHECKING, Any, List, Optional
3
4
 
4
- import duckdb
5
5
  from open_data_contract_standard.model import OpenDataContractStandard, SchemaObject, SchemaProperty, Server
6
6
 
7
7
  from datacontract.export.duckdb_type_converter import convert_to_duckdb_csv_type, convert_to_duckdb_json_type
8
+ from datacontract.export.sql_type_converter import convert_to_duckdb
8
9
  from datacontract.model.run import Run
9
10
 
11
+ if TYPE_CHECKING:
12
+ import duckdb
13
+
14
+
15
+ def _import_duckdb():
16
+ try:
17
+ import duckdb
18
+ return duckdb
19
+ except ImportError:
20
+ raise ImportError("duckdb is required for this server type. Install with: pip install datacontract-cli[duckdb]")
21
+
10
22
 
11
23
  def get_duckdb_connection(
12
24
  data_contract: OpenDataContractStandard,
13
25
  server: Server,
14
26
  run: Run,
15
- duckdb_connection: duckdb.DuckDBPyConnection | None = None,
16
- ) -> duckdb.DuckDBPyConnection:
27
+ duckdb_connection: "duckdb.DuckDBPyConnection | None" = None,
28
+ ) -> "duckdb.DuckDBPyConnection":
29
+ duckdb = _import_duckdb()
17
30
  if duckdb_connection is None:
18
31
  con = duckdb.connect(database=":memory:")
19
32
  else:
@@ -57,29 +70,46 @@ def get_duckdb_connection(
57
70
  )
58
71
  add_nested_views(con, model_name, schema_obj.properties)
59
72
  elif server.format == "parquet":
60
- con.sql(f"""
61
- CREATE VIEW "{model_name}" AS SELECT * FROM read_parquet('{model_path}', hive_partitioning=1);
62
- """)
73
+ create_view_with_schema_union(con, schema_obj, model_path, "read_parquet", to_parquet_types)
63
74
  elif server.format == "csv":
64
- columns = to_csv_types(schema_obj)
65
- run.log_info("Using columns: " + str(columns))
66
- if columns is None:
67
- con.sql(
68
- f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1);"""
69
- )
70
- else:
71
- con.sql(
72
- f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1, columns={columns});"""
73
- )
75
+ create_view_with_schema_union(con, schema_obj, model_path, "read_csv", to_csv_types)
74
76
  elif server.format == "delta":
75
77
  con.sql("update extensions;") # Make sure we have the latest delta extension
76
78
  con.sql(f"""CREATE VIEW "{model_name}" AS SELECT * FROM delta_scan('{model_path}');""")
77
- table_info = con.sql(f"PRAGMA table_info('{model_name}');").fetchdf()
78
- if table_info is not None and not table_info.empty:
79
- run.log_info(f"DuckDB Table Info: {table_info.to_string(index=False)}")
79
+ table_info = con.sql(f"PRAGMA table_info('{model_name}');").fetchall()
80
+ if table_info:
81
+ run.log_info(f"DuckDB Table Info: {table_info}")
80
82
  return con
81
83
 
82
84
 
85
+ def create_view_with_schema_union(con, schema_obj: SchemaObject, model_path: str, read_function: str, type_converter):
86
+ """Create a view by unioning empty schema table with data files using union_by_name"""
87
+ converted_types = type_converter(schema_obj)
88
+ model_name = schema_obj.name
89
+ if converted_types:
90
+ # Create empty table with contract schema
91
+ columns_def = [f'"{col_name}" {col_type}' for col_name, col_type in converted_types.items()]
92
+ create_empty_table = f"""CREATE TABLE "{model_name}" ({', '.join(columns_def)});"""
93
+ con.sql(create_empty_table)
94
+
95
+ # Read columns existing in both current data contract and data
96
+ intersecting_columns = con.sql(f"""SELECT column_name
97
+ FROM (DESCRIBE SELECT * FROM {read_function}('{model_path}', union_by_name=true, hive_partitioning=1))
98
+ INTERSECT SELECT column_name
99
+ FROM information_schema.columns
100
+ WHERE table_name = '{model_name}'""").fetchall()
101
+ selected_columns = ', '.join([column[0] for column in intersecting_columns])
102
+
103
+ # Insert data into table by name, but only columns existing in contract and data
104
+ insert_data_sql = f"""INSERT INTO {model_name} BY NAME
105
+ (SELECT {selected_columns} FROM {read_function}('{model_path}', union_by_name=true, hive_partitioning=1));"""
106
+ con.sql(insert_data_sql)
107
+ else:
108
+ # Fallback
109
+ con.sql(
110
+ f"""CREATE VIEW "{model_name}" AS SELECT * FROM {read_function}('{model_path}', union_by_name=true, hive_partitioning=1);"""
111
+ )
112
+
83
113
  def to_csv_types(schema_obj: SchemaObject) -> dict[Any, str | None] | None:
84
114
  if schema_obj is None:
85
115
  return None
@@ -89,6 +119,15 @@ def to_csv_types(schema_obj: SchemaObject) -> dict[Any, str | None] | None:
89
119
  columns[prop.name] = convert_to_duckdb_csv_type(prop)
90
120
  return columns
91
121
 
122
+ def to_parquet_types(schema_obj: SchemaObject) -> dict[Any, str | None] | None:
123
+ """Get proper SQL types for Parquet (preserves decimals, etc.)"""
124
+ if schema_obj is None:
125
+ return None
126
+ columns = {}
127
+ if schema_obj.properties:
128
+ for prop in schema_obj.properties:
129
+ columns[prop.name] = convert_to_duckdb(prop)
130
+ return columns
92
131
 
93
132
  def to_json_types(schema_obj: SchemaObject) -> dict[Any, str | None] | None:
94
133
  if schema_obj is None:
@@ -109,7 +148,7 @@ def _get_type(prop: SchemaProperty) -> Optional[str]:
109
148
  return None
110
149
 
111
150
 
112
- def add_nested_views(con: duckdb.DuckDBPyConnection, model_name: str, properties: List[SchemaProperty] | None):
151
+ def add_nested_views(con: "duckdb.DuckDBPyConnection", model_name: str, properties: List[SchemaProperty] | None):
113
152
  model_name = model_name.strip('"')
114
153
  if properties is None:
115
154
  return
@@ -140,7 +179,7 @@ def add_nested_views(con: duckdb.DuckDBPyConnection, model_name: str, properties
140
179
  add_nested_views(con, nested_model_name, prop.properties)
141
180
 
142
181
 
143
- def setup_s3_connection(con, server):
182
+ def setup_s3_connection(con, server: Server):
144
183
  s3_region = os.getenv("DATACONTRACT_S3_REGION")
145
184
  s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
146
185
  s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
@@ -184,7 +223,7 @@ def setup_s3_connection(con, server):
184
223
  """)
185
224
 
186
225
 
187
- def setup_gcs_connection(con, server):
226
+ def setup_gcs_connection(con, server: Server):
188
227
  key_id = os.getenv("DATACONTRACT_GCS_KEY_ID")
189
228
  secret = os.getenv("DATACONTRACT_GCS_SECRET")
190
229
 
@@ -202,11 +241,14 @@ def setup_gcs_connection(con, server):
202
241
  """)
203
242
 
204
243
 
205
- def setup_azure_connection(con, server):
244
+ def setup_azure_connection(con, server: Server):
206
245
  tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
207
246
  client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")
208
247
  client_secret = os.getenv("DATACONTRACT_AZURE_CLIENT_SECRET")
209
- storage_account = server.storageAccount
248
+ storage_account = (
249
+ to_azure_storage_account(server.location) if server.type == "azure" and "://" in server.location
250
+ else None
251
+ )
210
252
 
211
253
  if tenant_id is None:
212
254
  raise ValueError("Error: Environment variable DATACONTRACT_AZURE_TENANT_ID is not set")
@@ -239,3 +281,25 @@ def setup_azure_connection(con, server):
239
281
  CLIENT_SECRET '{client_secret}'
240
282
  );
241
283
  """)
284
+
285
+ def to_azure_storage_account(location: str) -> str | None:
286
+ """
287
+ Converts a storage location string to extract the storage account name.
288
+ ODCS v3.0 has no explicit field for the storage account. It uses the location field, which is a URI.
289
+ This function parses a storage location string to identify and return the
290
+ storage account name. It handles two primary patterns:
291
+ 1. Protocol://containerName@storageAccountName
292
+ 2. Protocol://storageAccountName
293
+ :param location: The storage location string to parse, typically following
294
+ the format protocol://containerName@storageAccountName. or
295
+ protocol://storageAccountName.
296
+ :return: The extracted storage account name if found, otherwise None
297
+ """
298
+ # to catch protocol://containerName@storageAccountName. pattern from location
299
+ match = re.search(r"(?<=@)([^.]*)", location, re.IGNORECASE)
300
+ if match:
301
+ return match.group()
302
+ else:
303
+ # to catch protocol://storageAccountName. pattern from location
304
+ match = re.search(r"(?<=//)(?!@)([^.]*)", location, re.IGNORECASE)
305
+ return match.group() if match else None