datacontract-cli 0.10.8__tar.gz → 0.10.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (171) hide show
  1. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/PKG-INFO +203 -28
  2. datacontract_cli-0.10.8/datacontract_cli.egg-info/PKG-INFO → datacontract_cli-0.10.9/README.md +188 -90
  3. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/catalog/catalog.py +4 -2
  4. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/cli.py +29 -18
  5. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/data_contract.py +9 -51
  6. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/check_soda_execute.py +5 -0
  7. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/connections/duckdb.py +28 -12
  8. datacontract_cli-0.10.9/datacontract/engines/soda/connections/trino.py +26 -0
  9. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/exporter.py +3 -2
  10. datacontract_cli-0.10.9/datacontract/export/exporter_factory.py +145 -0
  11. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/jsonschema_converter.py +7 -7
  12. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/sodacl_converter.py +16 -11
  13. datacontract_cli-0.10.9/datacontract/export/spark_converter.py +211 -0
  14. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/sql_type_converter.py +28 -0
  15. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/imports/avro_importer.py +8 -0
  16. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/imports/bigquery_importer.py +17 -0
  17. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/imports/glue_importer.py +115 -32
  18. datacontract_cli-0.10.9/datacontract/imports/importer.py +29 -0
  19. datacontract_cli-0.10.9/datacontract/imports/importer_factory.py +72 -0
  20. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/imports/jsonschema_importer.py +8 -0
  21. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/imports/odcs_importer.py +8 -0
  22. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/imports/sql_importer.py +8 -0
  23. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/imports/unity_importer.py +23 -9
  24. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/resolve.py +22 -1
  25. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/model/data_contract_specification.py +34 -4
  26. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/templates/datacontract.html +17 -2
  27. datacontract_cli-0.10.9/datacontract/templates/partials/datacontract_information.html +86 -0
  28. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/templates/partials/datacontract_terms.html +7 -0
  29. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/templates/partials/definition.html +9 -1
  30. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/templates/partials/model_field.html +23 -6
  31. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/templates/partials/server.html +49 -16
  32. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/templates/style/output.css +42 -0
  33. datacontract_cli-0.10.8/README.md → datacontract_cli-0.10.9/datacontract_cli.egg-info/PKG-INFO +265 -17
  34. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract_cli.egg-info/SOURCES.txt +6 -0
  35. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract_cli.egg-info/requires.txt +12 -7
  36. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/pyproject.toml +20 -13
  37. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_custom_exporter.py +2 -1
  38. datacontract_cli-0.10.9/tests/test_export_spark.py +142 -0
  39. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_import_glue.py +10 -1
  40. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_import_unity_file.py +1 -0
  41. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_postgres.py +23 -10
  42. datacontract_cli-0.10.9/tests/test_test_trino.py +99 -0
  43. datacontract_cli-0.10.8/datacontract/export/exporter_factory.py +0 -52
  44. datacontract_cli-0.10.8/datacontract/templates/partials/datacontract_information.html +0 -66
  45. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/LICENSE +0 -0
  46. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/MANIFEST.in +0 -0
  47. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/__init__.py +0 -0
  48. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/breaking/breaking.py +0 -0
  49. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/breaking/breaking_rules.py +0 -0
  50. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/__init__.py +0 -0
  51. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +0 -0
  52. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/datacontract/check_that_datacontract_file_exists.py +0 -0
  53. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -0
  54. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/fastjsonschema/check_jsonschema.py +0 -0
  55. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/fastjsonschema/s3/s3_read_files.py +0 -0
  56. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/__init__.py +0 -0
  57. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/connections/bigquery.py +0 -0
  58. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/connections/dask.py +0 -0
  59. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/connections/databricks.py +0 -0
  60. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/connections/kafka.py +0 -0
  61. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/connections/postgres.py +0 -0
  62. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/connections/snowflake.py +0 -0
  63. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/engines/soda/connections/sqlserver.py +0 -0
  64. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/__init__.py +0 -0
  65. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/avro_converter.py +0 -0
  66. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/avro_idl_converter.py +0 -0
  67. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/bigquery_converter.py +0 -0
  68. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/csv_type_converter.py +0 -0
  69. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/dbml_converter.py +0 -0
  70. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/dbt_converter.py +0 -0
  71. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/go_converter.py +0 -0
  72. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/great_expectations_converter.py +0 -0
  73. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/html_export.py +0 -0
  74. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/odcs_converter.py +0 -0
  75. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/protobuf_converter.py +0 -0
  76. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/pydantic_converter.py +0 -0
  77. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/rdf_converter.py +0 -0
  78. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/sql_converter.py +0 -0
  79. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/export/terraform_converter.py +0 -0
  80. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/init/download_datacontract_file.py +0 -0
  81. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/integration/publish_datamesh_manager.py +0 -0
  82. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/integration/publish_opentelemetry.py +0 -0
  83. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/files.py +0 -0
  84. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/lint.py +0 -0
  85. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/linters/__init__.py +0 -0
  86. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/linters/description_linter.py +0 -0
  87. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/linters/example_model_linter.py +0 -0
  88. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/linters/field_pattern_linter.py +0 -0
  89. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/linters/field_reference_linter.py +0 -0
  90. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/linters/notice_period_linter.py +0 -0
  91. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/linters/quality_schema_linter.py +0 -0
  92. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/linters/valid_constraints_linter.py +0 -0
  93. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/schema.py +0 -0
  94. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/lint/urls.py +0 -0
  95. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/model/breaking_change.py +0 -0
  96. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/model/exceptions.py +0 -0
  97. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/model/run.py +0 -0
  98. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/publish/publish.py +0 -0
  99. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/py.typed +0 -0
  100. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/templates/index.html +0 -0
  101. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/templates/partials/datacontract_servicelevels.html +0 -0
  102. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/templates/partials/example.html +0 -0
  103. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract/web.py +0 -0
  104. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract_cli.egg-info/dependency_links.txt +0 -0
  105. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract_cli.egg-info/entry_points.txt +0 -0
  106. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/datacontract_cli.egg-info/top_level.txt +0 -0
  107. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/setup.cfg +0 -0
  108. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_breaking.py +0 -0
  109. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_catalog.py +0 -0
  110. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_changelog.py +0 -0
  111. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_cli.py +0 -0
  112. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_description_linter.py +0 -0
  113. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_documentation_linter.py +0 -0
  114. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_download_datacontract_file.py +0 -0
  115. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_example_model_linter.py +0 -0
  116. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_avro.py +0 -0
  117. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_avro_idl.py +0 -0
  118. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_bigquery.py +0 -0
  119. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_dbml.py +0 -0
  120. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_dbt_models.py +0 -0
  121. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_dbt_sources.py +0 -0
  122. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_dbt_staging_sql.py +0 -0
  123. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_go.py +0 -0
  124. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_great_expectations.py +0 -0
  125. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_html.py +0 -0
  126. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_jsonschema.py +0 -0
  127. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_odcs.py +0 -0
  128. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_protobuf.py +0 -0
  129. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_pydantic.py +0 -0
  130. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_rdf.py +0 -0
  131. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_sodacl.py +0 -0
  132. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_sql.py +0 -0
  133. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_sql_query.py +0 -0
  134. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_export_terraform.py +0 -0
  135. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_field_constraint_linter.py +0 -0
  136. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_field_pattern_linter.py +0 -0
  137. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_field_reference_linter.py +0 -0
  138. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_import_avro.py +0 -0
  139. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_import_bigquery.py +0 -0
  140. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_import_jsonschema.py +0 -0
  141. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_import_odcs.py +0 -0
  142. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_import_sql.py +0 -0
  143. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_integration_datameshmanager.py +0 -0
  144. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_integration_opentelemetry.py +0 -0
  145. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_lint.py +0 -0
  146. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_notice_period_linter.py +0 -0
  147. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_quality_schema_linter.py +0 -0
  148. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_roundtrip_jsonschema.py +0 -0
  149. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_schema.py +0 -0
  150. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_azure_parquet_remote.py +0 -0
  151. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_bigquery.py +0 -0
  152. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_databricks.py +0 -0
  153. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_dataframe.py +0 -0
  154. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_examples_csv.py +0 -0
  155. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_examples_formats_valid.py +0 -0
  156. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_examples_inline.py +0 -0
  157. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_examples_json.py +0 -0
  158. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_examples_missing.py +0 -0
  159. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_kafka.py +0 -0
  160. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_kafka_remote.py +0 -0
  161. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_local_json.py +0 -0
  162. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_parquet.py +0 -0
  163. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_s3_csv.py +0 -0
  164. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_s3_delta.py +0 -0
  165. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_s3_json.py +0 -0
  166. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_s3_json_complex.py +0 -0
  167. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_s3_json_multiple_models.py +0 -0
  168. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_s3_json_remote.py +0 -0
  169. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_snowflake.py +0 -0
  170. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_test_sqlserver.py +0 -0
  171. {datacontract_cli-0.10.8 → datacontract_cli-0.10.9}/tests/test_web.py +0 -0
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datacontract-cli
3
- Version: 0.10.8
4
- Summary: Test data contracts
5
- Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
3
+ Version: 0.10.9
4
+ Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
+ Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
7
- Project-URL: Issues, https://github.com/datacontract/cli/issues
7
+ Project-URL: Issues, https://github.com/datacontract/datacontract-cli/issues
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
@@ -28,8 +28,8 @@ Requires-Dist: python-dotenv~=1.0.0
28
28
  Requires-Dist: rdflib==7.0.0
29
29
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
30
30
  Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
31
- Requires-Dist: boto3<1.34.130,>=1.34.41
32
- Requires-Dist: botocore<1.34.128,>=1.34.41
31
+ Requires-Dist: boto3<1.34.137,>=1.34.41
32
+ Requires-Dist: botocore<1.34.137,>=1.34.41
33
33
  Requires-Dist: jinja_partials>=0.2.1
34
34
  Provides-Extra: avro
35
35
  Requires-Dist: avro==1.11.3; extra == "avro"
@@ -37,7 +37,7 @@ Provides-Extra: bigquery
37
37
  Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.1; extra == "bigquery"
38
38
  Provides-Extra: databricks
39
39
  Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "databricks"
40
- Requires-Dist: databricks-sql-connector<3.2.0,>=3.1.2; extra == "databricks"
40
+ Requires-Dist: databricks-sql-connector<3.3.0,>=3.1.2; extra == "databricks"
41
41
  Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.1; extra == "databricks"
42
42
  Provides-Extra: deltalake
43
43
  Requires-Dist: deltalake<0.19,>=0.17; extra == "deltalake"
@@ -47,14 +47,16 @@ Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
47
47
  Provides-Extra: postgres
48
48
  Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
49
49
  Provides-Extra: s3
50
- Requires-Dist: s3fs==2024.6.0; extra == "s3"
50
+ Requires-Dist: s3fs==2024.6.1; extra == "s3"
51
51
  Provides-Extra: snowflake
52
- Requires-Dist: snowflake-connector-python[pandas]<3.11,>=3.6; extra == "snowflake"
52
+ Requires-Dist: snowflake-connector-python[pandas]<3.12,>=3.6; extra == "snowflake"
53
53
  Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
54
54
  Provides-Extra: sqlserver
55
55
  Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.1; extra == "sqlserver"
56
+ Provides-Extra: trino
57
+ Requires-Dist: soda-core-trino<3.4.0,>=3.3.1; extra == "trino"
56
58
  Provides-Extra: all
57
- Requires-Dist: datacontract-cli[bigquery,databricks,deltalake,kafka,postgres,s3,snowflake,sqlserver]; extra == "all"
59
+ Requires-Dist: datacontract-cli[bigquery,databricks,deltalake,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
58
60
  Provides-Extra: dev
59
61
  Requires-Dist: datacontract-cli[all]; extra == "dev"
60
62
  Requires-Dist: httpx==0.27.0; extra == "dev"
@@ -65,7 +67,9 @@ Requires-Dist: pytest-xdist; extra == "dev"
65
67
  Requires-Dist: moto; extra == "dev"
66
68
  Requires-Dist: pymssql==2.3.0; extra == "dev"
67
69
  Requires-Dist: kafka-python; extra == "dev"
68
- Requires-Dist: testcontainers~=4.5.0; extra == "dev"
70
+ Requires-Dist: trino==0.328.0; extra == "dev"
71
+ Requires-Dist: testcontainers<4.8,>=4.5; extra == "dev"
72
+ Requires-Dist: testcontainers[core]; extra == "dev"
69
73
  Requires-Dist: testcontainers[minio]; extra == "dev"
70
74
  Requires-Dist: testcontainers[postgres]; extra == "dev"
71
75
  Requires-Dist: testcontainers[kafka]; extra == "dev"
@@ -258,17 +262,18 @@ pip install datacontract-cli[all]
258
262
 
259
263
  A list of available extras:
260
264
 
261
- | Dependency | Installation Command |
262
- |-------------------------|-------------------------------------------------------------|
263
- | Avro Support | `pip install datacontract-cli[avro]` |
264
- | Google BigQuery | `pip install datacontract-cli[bigquery]` |
265
- | Databricks Integration | `pip install datacontract-cli[databricks]` |
266
- | Deltalake Integration | `pip install datacontract-cli[deltalake]` |
267
- | Kafka Integration | `pip install datacontract-cli[kafka]` |
268
- | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
269
- | S3 Integration | `pip install datacontract-cli[s3]` |
270
- | Snowflake Integration | `pip install datacontract-cli[snowflake]` |
271
- | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
265
+ | Dependency | Installation Command |
266
+ |------------------------|--------------------------------------------|
267
+ | Avro Support | `pip install datacontract-cli[avro]` |
268
+ | Google BigQuery | `pip install datacontract-cli[bigquery]` |
269
+ | Databricks Integration | `pip install datacontract-cli[databricks]` |
270
+ | Deltalake Integration | `pip install datacontract-cli[deltalake]` |
271
+ | Kafka Integration | `pip install datacontract-cli[kafka]` |
272
+ | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
273
+ | S3 Integration | `pip install datacontract-cli[s3]` |
274
+ | Snowflake Integration | `pip install datacontract-cli[snowflake]` |
275
+ | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
276
+ | Trino | `pip install datacontract-cli[trino]` |
272
277
 
273
278
 
274
279
 
@@ -384,6 +389,7 @@ Supported server types:
384
389
  - [snowflake](#snowflake)
385
390
  - [kafka](#kafka)
386
391
  - [postgres](#postgres)
392
+ - [trino](#trino)
387
393
  - [local](#local)
388
394
 
389
395
  Supported formats:
@@ -429,11 +435,12 @@ servers:
429
435
 
430
436
  #### Environment Variables
431
437
 
432
- | Environment Variable | Example | Description |
433
- |-----------------------------------|-------------------------------|-----------------------|
434
- | `DATACONTRACT_S3_REGION` | `eu-central-1` | Region of S3 bucket |
435
- | `DATACONTRACT_S3_ACCESS_KEY_ID` | `AKIAXV5Q5QABCDEFGH` | AWS Access Key ID |
436
- | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `93S7LRrJcqLaaaa/XXXXXXXXXXXXX` | AWS Secret Access Key |
438
+ | Environment Variable | Example | Description |
439
+ |-------------------------------------|---------------------------------|----------------------------------------|
440
+ | `DATACONTRACT_S3_REGION` | `eu-central-1` | Region of S3 bucket |
441
+ | `DATACONTRACT_S3_ACCESS_KEY_ID` | `AKIAXV5Q5QABCDEFGH` | AWS Access Key ID |
442
+ | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `93S7LRrJcqLaaaa/XXXXXXXXXXXXX` | AWS Secret Access Key |
443
+ | `DATACONTRACT_S3_SESSION_TOKEN` | `AQoDYXdzEJr...` | AWS temporary session token (optional) |
437
444
 
438
445
 
439
446
 
@@ -724,6 +731,35 @@ models:
724
731
  | `DATACONTRACT_POSTGRES_PASSWORD` | `mysecretpassword` | Password |
725
732
 
726
733
 
734
+ ### Trino
735
+
736
+ Data Contract CLI can test data in Trino.
737
+
738
+ #### Example
739
+
740
+ datacontract.yaml
741
+ ```yaml
742
+ servers:
743
+ trino:
744
+ type: trino
745
+ host: localhost
746
+ port: 8080
747
+ catalog: my_catalog
748
+ schema: my_schema
749
+ models:
750
+ my_table_1: # corresponds to a table
751
+ type: table
752
+ fields:
753
+ my_column_1: # corresponds to a column
754
+ type: varchar
755
+ ```
756
+
757
+ #### Environment Variables
758
+
759
+ | Environment Variable | Example | Description |
760
+ |-------------------------------|--------------------|-------------|
761
+ | `DATACONTRACT_TRINO_USERNAME` | `trino` | Username |
762
+ | `DATACONTRACT_TRINO_PASSWORD` | `mysecretpassword` | Password |
727
763
 
728
764
 
729
765
 
@@ -742,7 +778,7 @@ models:
742
778
  │ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
743
779
  │ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
744
780
  │ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
745
- │ y|dbml] │
781
+ │ y|dbml|spark] │
746
782
  │ --output PATH Specify the file path where the exported data will be │
747
783
  │ saved. If no path is provided, the output will be │
748
784
  │ printed to stdout. │
@@ -792,6 +828,7 @@ Available export options:
792
828
  | `go` | Export to Go types | ✅ |
793
829
  | `pydantic-model` | Export to pydantic models | ✅ |
794
830
  | `DBML` | Export to a DBML Diagram description | ✅ |
831
+ | `spark` | Export to a Spark StructType | ✅ |
795
832
  | Missing something? | Please create an issue on GitHub | TBD |
796
833
 
797
834
  #### Great Expectations
@@ -838,6 +875,10 @@ The export function converts the logical data types of the datacontract into the
838
875
  if a server is selected via the `--server` option (based on the `type` of that server). If no server is selected, the
839
876
  logical data types are exported.
840
877
 
878
+ #### Spark
879
+
880
+ The export function converts the data contract specification into a StructType Spark schema. The returned value is a Python code picture of the model schemas.
881
+ Spark DataFrame schema is defined as StructType. For more details about Spark Data Types please see [the spark documentation](https://spark.apache.org/docs/latest/sql-ref-datatypes.html)
841
882
 
842
883
  #### Avro
843
884
 
@@ -1214,6 +1255,121 @@ Examples: Removing or renaming models and fields.
1214
1255
  $ datacontract changelog datacontract-from-pr.yaml datacontract-from-main.yaml
1215
1256
  ```
1216
1257
 
1258
+ ## Customizing Exporters and Importers
1259
+
1260
+ ### Custom Exporter
1261
+ Using the exporter factory to add a new custom exporter
1262
+ ```python
1263
+
1264
+ from datacontract.data_contract import DataContract
1265
+ from datacontract.export.exporter import Exporter
1266
+ from datacontract.export.exporter_factory import exporter_factory
1267
+
1268
+
1269
+ # Create a custom class that implements export method
1270
+ class CustomExporter(Exporter):
1271
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
1272
+ result = {
1273
+ "title": data_contract.info.title,
1274
+ "version": data_contract.info.version,
1275
+ "description": data_contract.info.description,
1276
+ "email": data_contract.info.contact.email,
1277
+ "url": data_contract.info.contact.url,
1278
+ "model": model,
1279
+ "model_columns": ", ".join(list(data_contract.models.get(model).fields.keys())),
1280
+ "export_args": export_args,
1281
+ "custom_args": export_args.get("custom_arg", ""),
1282
+ }
1283
+ return result
1284
+
1285
+
1286
+ # Register the new custom class into factory
1287
+ exporter_factory.register_exporter("custom", CustomExporter)
1288
+
1289
+
1290
+ if __name__ == "__main__":
1291
+ # Create a DataContract instance
1292
+ data_contract = DataContract(
1293
+ data_contract_file="/path/datacontract.yaml"
1294
+ )
1295
+ # call export
1296
+ result = data_contract.export(
1297
+ export_format="custom", model="orders", server="production", custom_arg="my_custom_arg"
1298
+ )
1299
+ print(result)
1300
+
1301
+ ```
1302
+ Output
1303
+ ```python
1304
+ {
1305
+ 'title': 'Orders Unit Test',
1306
+ 'version': '1.0.0',
1307
+ 'description': 'The orders data contract',
1308
+ 'email': 'team-orders@example.com',
1309
+ 'url': 'https://wiki.example.com/teams/checkout',
1310
+ 'model': 'orders',
1311
+ 'model_columns': 'order_id, order_total, order_status',
1312
+ 'export_args': {'server': 'production', 'custom_arg': 'my_custom_arg'},
1313
+ 'custom_args': 'my_custom_arg'
1314
+ }
1315
+ ```
1316
+
1317
+ ### Custom Importer
1318
+ Using the importer factory to add a new custom importer
1319
+ ```python
1320
+
1321
+ from datacontract.model.data_contract_specification import DataContractSpecification
1322
+ from datacontract.data_contract import DataContract
1323
+ from datacontract.imports.importer import Importer
1324
+ from datacontract.imports.importer_factory import importer_factory
1325
+ import json
1326
+
1327
+ # Create a custom class that implements import_source method
1328
+ class CustomImporter(Importer):
1329
+ def import_source(
1330
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
1331
+ ) -> dict:
1332
+ source_dict = json.loads(source)
1333
+ data_contract_specification.id = source_dict.get("id_custom")
1334
+ data_contract_specification.info.title = source_dict.get("title")
1335
+ data_contract_specification.info.description = source_dict.get("description_from_app")
1336
+
1337
+ return data_contract_specification
1338
+
1339
+
1340
+ # Register the new custom class into factory
1341
+ importer_factory.register_importer("custom_company_importer", CustomImporter)
1342
+
1343
+
1344
+ if __name__ == "__main__":
1345
+ # get a custom da
1346
+ json_from_custom_app = '{"id_custom":"uuid-custom","version":"0.0.2", "title":"my_custom_imported_data", "description_from_app": "Custom contract description"}'
1347
+ # Create a DataContract instance
1348
+ data_contract = DataContract()
1349
+
1350
+ # call import_from
1351
+ result = data_contract.import_from_source(
1352
+ format="custom_company_importer", data_contract_specification=DataContract.init(), source=json_from_custom_app
1353
+ )
1354
+ print(dict(result))
1355
+
1356
+ ```
1357
+ Output
1358
+
1359
+ ```python
1360
+ {
1361
+ 'dataContractSpecification': '0.9.3',
1362
+ 'id': 'uuid-custom',
1363
+ 'info': Info(title='my_custom_imported_data', version='0.0.1', status=None, description='Custom contract description', owner=None, contact=None),
1364
+ 'servers': {},
1365
+ 'terms': None,
1366
+ 'models': {},
1367
+ 'definitions': {},
1368
+ 'examples': [],
1369
+ 'quality': None,
1370
+ 'servicelevels': None
1371
+ }
1372
+ ```
1217
1373
  ## Development Setup
1218
1374
 
1219
1375
  Python base interpreter should be 3.11.x (unless working on 3.12 release candidate).
@@ -1263,7 +1419,26 @@ docker compose run --rm datacontract --version
1263
1419
 
1264
1420
  This command runs the container momentarily to check the version of the `datacontract` CLI. The `--rm` flag ensures that the container is automatically removed after the command executes, keeping your environment clean.
1265
1421
 
1422
+ ## Use with pre-commit
1423
+
1424
+ To run `datacontract-cli` as part of a [pre-commit](https://pre-commit.com/) workflow, add something like the below to the `repos` list in the project's `.pre-commit-config.yaml`:
1425
+
1426
+ ```yaml
1427
+ repos:
1428
+ - repo: https://github.com/datacontract/datacontract-cli
1429
+ rev: "v0.10.9"
1430
+ hooks:
1431
+ - id: datacontract-lint
1432
+ - id: datacontract-test
1433
+ args: ["--server", "production"]
1434
+ ```
1435
+
1436
+ ### Available Hook IDs
1266
1437
 
1438
+ | Hook ID | Description | Dependency |
1439
+ | ----------------- | ------------------------ | ---------- |
1440
+ | datacontract-lint | Runs the lint subcommand. | Python3 |
1441
+ | datacontract-test | Runs the test subcommand. Please look at [test](#test) section for all available arguments. | Python3 |
1267
1442
 
1268
1443
  ## Release Steps
1269
1444
 
@@ -1,76 +1,3 @@
1
- Metadata-Version: 2.1
2
- Name: datacontract-cli
3
- Version: 0.10.8
4
- Summary: Test data contracts
5
- Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
6
- Project-URL: Homepage, https://cli.datacontract.com
7
- Project-URL: Issues, https://github.com/datacontract/cli/issues
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.10
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: typer[all]<0.13,>=0.9
15
- Requires-Dist: pydantic<2.8.0,>=2.5.3
16
- Requires-Dist: pyyaml~=6.0.1
17
- Requires-Dist: requests<2.33,>=2.31
18
- Requires-Dist: fastapi==0.111.0
19
- Requires-Dist: fastparquet==2024.5.0
20
- Requires-Dist: python-multipart==0.0.9
21
- Requires-Dist: rich~=13.7.0
22
- Requires-Dist: simple-ddl-parser==1.5.1
23
- Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.1
24
- Requires-Dist: setuptools>=60
25
- Requires-Dist: duckdb==1.0.0
26
- Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
27
- Requires-Dist: python-dotenv~=1.0.0
28
- Requires-Dist: rdflib==7.0.0
29
- Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
30
- Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
31
- Requires-Dist: boto3<1.34.130,>=1.34.41
32
- Requires-Dist: botocore<1.34.128,>=1.34.41
33
- Requires-Dist: jinja_partials>=0.2.1
34
- Provides-Extra: avro
35
- Requires-Dist: avro==1.11.3; extra == "avro"
36
- Provides-Extra: bigquery
37
- Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.1; extra == "bigquery"
38
- Provides-Extra: databricks
39
- Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "databricks"
40
- Requires-Dist: databricks-sql-connector<3.2.0,>=3.1.2; extra == "databricks"
41
- Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.1; extra == "databricks"
42
- Provides-Extra: deltalake
43
- Requires-Dist: deltalake<0.19,>=0.17; extra == "deltalake"
44
- Provides-Extra: kafka
45
- Requires-Dist: datacontract-cli[avro]; extra == "kafka"
46
- Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
47
- Provides-Extra: postgres
48
- Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
49
- Provides-Extra: s3
50
- Requires-Dist: s3fs==2024.6.0; extra == "s3"
51
- Provides-Extra: snowflake
52
- Requires-Dist: snowflake-connector-python[pandas]<3.11,>=3.6; extra == "snowflake"
53
- Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
54
- Provides-Extra: sqlserver
55
- Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.1; extra == "sqlserver"
56
- Provides-Extra: all
57
- Requires-Dist: datacontract-cli[bigquery,databricks,deltalake,kafka,postgres,s3,snowflake,sqlserver]; extra == "all"
58
- Provides-Extra: dev
59
- Requires-Dist: datacontract-cli[all]; extra == "dev"
60
- Requires-Dist: httpx==0.27.0; extra == "dev"
61
- Requires-Dist: ruff; extra == "dev"
62
- Requires-Dist: pre-commit~=3.7.1; extra == "dev"
63
- Requires-Dist: pytest; extra == "dev"
64
- Requires-Dist: pytest-xdist; extra == "dev"
65
- Requires-Dist: moto; extra == "dev"
66
- Requires-Dist: pymssql==2.3.0; extra == "dev"
67
- Requires-Dist: kafka-python; extra == "dev"
68
- Requires-Dist: testcontainers~=4.5.0; extra == "dev"
69
- Requires-Dist: testcontainers[minio]; extra == "dev"
70
- Requires-Dist: testcontainers[postgres]; extra == "dev"
71
- Requires-Dist: testcontainers[kafka]; extra == "dev"
72
- Requires-Dist: testcontainers[mssql]; extra == "dev"
73
-
74
1
  # Data Contract CLI
75
2
 
76
3
  <p>
@@ -258,17 +185,18 @@ pip install datacontract-cli[all]
258
185
 
259
186
  A list of available extras:
260
187
 
261
- | Dependency | Installation Command |
262
- |-------------------------|-------------------------------------------------------------|
263
- | Avro Support | `pip install datacontract-cli[avro]` |
264
- | Google BigQuery | `pip install datacontract-cli[bigquery]` |
265
- | Databricks Integration | `pip install datacontract-cli[databricks]` |
266
- | Deltalake Integration | `pip install datacontract-cli[deltalake]` |
267
- | Kafka Integration | `pip install datacontract-cli[kafka]` |
268
- | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
269
- | S3 Integration | `pip install datacontract-cli[s3]` |
270
- | Snowflake Integration | `pip install datacontract-cli[snowflake]` |
271
- | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
188
+ | Dependency | Installation Command |
189
+ |------------------------|--------------------------------------------|
190
+ | Avro Support | `pip install datacontract-cli[avro]` |
191
+ | Google BigQuery | `pip install datacontract-cli[bigquery]` |
192
+ | Databricks Integration | `pip install datacontract-cli[databricks]` |
193
+ | Deltalake Integration | `pip install datacontract-cli[deltalake]` |
194
+ | Kafka Integration | `pip install datacontract-cli[kafka]` |
195
+ | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
196
+ | S3 Integration | `pip install datacontract-cli[s3]` |
197
+ | Snowflake Integration | `pip install datacontract-cli[snowflake]` |
198
+ | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
199
+ | Trino | `pip install datacontract-cli[trino]` |
272
200
 
273
201
 
274
202
 
@@ -384,6 +312,7 @@ Supported server types:
384
312
  - [snowflake](#snowflake)
385
313
  - [kafka](#kafka)
386
314
  - [postgres](#postgres)
315
+ - [trino](#trino)
387
316
  - [local](#local)
388
317
 
389
318
  Supported formats:
@@ -429,11 +358,12 @@ servers:
429
358
 
430
359
  #### Environment Variables
431
360
 
432
- | Environment Variable | Example | Description |
433
- |-----------------------------------|-------------------------------|-----------------------|
434
- | `DATACONTRACT_S3_REGION` | `eu-central-1` | Region of S3 bucket |
435
- | `DATACONTRACT_S3_ACCESS_KEY_ID` | `AKIAXV5Q5QABCDEFGH` | AWS Access Key ID |
436
- | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `93S7LRrJcqLaaaa/XXXXXXXXXXXXX` | AWS Secret Access Key |
361
+ | Environment Variable | Example | Description |
362
+ |-------------------------------------|---------------------------------|----------------------------------------|
363
+ | `DATACONTRACT_S3_REGION` | `eu-central-1` | Region of S3 bucket |
364
+ | `DATACONTRACT_S3_ACCESS_KEY_ID` | `AKIAXV5Q5QABCDEFGH` | AWS Access Key ID |
365
+ | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `93S7LRrJcqLaaaa/XXXXXXXXXXXXX` | AWS Secret Access Key |
366
+ | `DATACONTRACT_S3_SESSION_TOKEN` | `AQoDYXdzEJr...` | AWS temporary session token (optional) |
437
367
 
438
368
 
439
369
 
@@ -724,6 +654,35 @@ models:
724
654
  | `DATACONTRACT_POSTGRES_PASSWORD` | `mysecretpassword` | Password |
725
655
 
726
656
 
657
+ ### Trino
658
+
659
+ Data Contract CLI can test data in Trino.
660
+
661
+ #### Example
662
+
663
+ datacontract.yaml
664
+ ```yaml
665
+ servers:
666
+ trino:
667
+ type: trino
668
+ host: localhost
669
+ port: 8080
670
+ catalog: my_catalog
671
+ schema: my_schema
672
+ models:
673
+ my_table_1: # corresponds to a table
674
+ type: table
675
+ fields:
676
+ my_column_1: # corresponds to a column
677
+ type: varchar
678
+ ```
679
+
680
+ #### Environment Variables
681
+
682
+ | Environment Variable | Example | Description |
683
+ |-------------------------------|--------------------|-------------|
684
+ | `DATACONTRACT_TRINO_USERNAME` | `trino` | Username |
685
+ | `DATACONTRACT_TRINO_PASSWORD` | `mysecretpassword` | Password |
727
686
 
728
687
 
729
688
 
@@ -742,7 +701,7 @@ models:
742
701
  │ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
743
702
  │ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
744
703
  │ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
745
- │ y|dbml] │
704
+ │ y|dbml|spark] │
746
705
  │ --output PATH Specify the file path where the exported data will be │
747
706
  │ saved. If no path is provided, the output will be │
748
707
  │ printed to stdout. │
@@ -792,6 +751,7 @@ Available export options:
792
751
  | `go` | Export to Go types | ✅ |
793
752
  | `pydantic-model` | Export to pydantic models | ✅ |
794
753
  | `DBML` | Export to a DBML Diagram description | ✅ |
754
+ | `spark` | Export to a Spark StructType | ✅ |
795
755
  | Missing something? | Please create an issue on GitHub | TBD |
796
756
 
797
757
  #### Great Expectations
@@ -838,6 +798,10 @@ The export function converts the logical data types of the datacontract into the
838
798
  if a server is selected via the `--server` option (based on the `type` of that server). If no server is selected, the
839
799
  logical data types are exported.
840
800
 
801
+ #### Spark
802
+
803
+ The export function converts the data contract specification into a StructType Spark schema. The returned value is a Python code picture of the model schemas.
804
+ Spark DataFrame schema is defined as StructType. For more details about Spark Data Types please see [the spark documentation](https://spark.apache.org/docs/latest/sql-ref-datatypes.html)
841
805
 
842
806
  #### Avro
843
807
 
@@ -1214,6 +1178,121 @@ Examples: Removing or renaming models and fields.
1214
1178
  $ datacontract changelog datacontract-from-pr.yaml datacontract-from-main.yaml
1215
1179
  ```
1216
1180
 
1181
+ ## Customizing Exporters and Importers
1182
+
1183
+ ### Custom Exporter
1184
+ Using the exporter factory to add a new custom exporter
1185
+ ```python
1186
+
1187
+ from datacontract.data_contract import DataContract
1188
+ from datacontract.export.exporter import Exporter
1189
+ from datacontract.export.exporter_factory import exporter_factory
1190
+
1191
+
1192
+ # Create a custom class that implements export method
1193
+ class CustomExporter(Exporter):
1194
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
1195
+ result = {
1196
+ "title": data_contract.info.title,
1197
+ "version": data_contract.info.version,
1198
+ "description": data_contract.info.description,
1199
+ "email": data_contract.info.contact.email,
1200
+ "url": data_contract.info.contact.url,
1201
+ "model": model,
1202
+ "model_columns": ", ".join(list(data_contract.models.get(model).fields.keys())),
1203
+ "export_args": export_args,
1204
+ "custom_args": export_args.get("custom_arg", ""),
1205
+ }
1206
+ return result
1207
+
1208
+
1209
+ # Register the new custom class into factory
1210
+ exporter_factory.register_exporter("custom", CustomExporter)
1211
+
1212
+
1213
+ if __name__ == "__main__":
1214
+ # Create a DataContract instance
1215
+ data_contract = DataContract(
1216
+ data_contract_file="/path/datacontract.yaml"
1217
+ )
1218
+ # call export
1219
+ result = data_contract.export(
1220
+ export_format="custom", model="orders", server="production", custom_arg="my_custom_arg"
1221
+ )
1222
+ print(result)
1223
+
1224
+ ```
1225
+ Output
1226
+ ```python
1227
+ {
1228
+ 'title': 'Orders Unit Test',
1229
+ 'version': '1.0.0',
1230
+ 'description': 'The orders data contract',
1231
+ 'email': 'team-orders@example.com',
1232
+ 'url': 'https://wiki.example.com/teams/checkout',
1233
+ 'model': 'orders',
1234
+ 'model_columns': 'order_id, order_total, order_status',
1235
+ 'export_args': {'server': 'production', 'custom_arg': 'my_custom_arg'},
1236
+ 'custom_args': 'my_custom_arg'
1237
+ }
1238
+ ```
1239
+
1240
+ ### Custom Importer
1241
+ Using the importer factory to add a new custom importer
1242
+ ```python
1243
+
1244
+ from datacontract.model.data_contract_specification import DataContractSpecification
1245
+ from datacontract.data_contract import DataContract
1246
+ from datacontract.imports.importer import Importer
1247
+ from datacontract.imports.importer_factory import importer_factory
1248
+ import json
1249
+
1250
+ # Create a custom class that implements import_source method
1251
+ class CustomImporter(Importer):
1252
+ def import_source(
1253
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
1254
+ ) -> dict:
1255
+ source_dict = json.loads(source)
1256
+ data_contract_specification.id = source_dict.get("id_custom")
1257
+ data_contract_specification.info.title = source_dict.get("title")
1258
+ data_contract_specification.info.description = source_dict.get("description_from_app")
1259
+
1260
+ return data_contract_specification
1261
+
1262
+
1263
+ # Register the new custom class into factory
1264
+ importer_factory.register_importer("custom_company_importer", CustomImporter)
1265
+
1266
+
1267
+ if __name__ == "__main__":
1268
+ # get a custom da
1269
+ json_from_custom_app = '{"id_custom":"uuid-custom","version":"0.0.2", "title":"my_custom_imported_data", "description_from_app": "Custom contract description"}'
1270
+ # Create a DataContract instance
1271
+ data_contract = DataContract()
1272
+
1273
+ # call import_from
1274
+ result = data_contract.import_from_source(
1275
+ format="custom_company_importer", data_contract_specification=DataContract.init(), source=json_from_custom_app
1276
+ )
1277
+ print(dict(result))
1278
+
1279
+ ```
1280
+ Output
1281
+
1282
+ ```python
1283
+ {
1284
+ 'dataContractSpecification': '0.9.3',
1285
+ 'id': 'uuid-custom',
1286
+ 'info': Info(title='my_custom_imported_data', version='0.0.1', status=None, description='Custom contract description', owner=None, contact=None),
1287
+ 'servers': {},
1288
+ 'terms': None,
1289
+ 'models': {},
1290
+ 'definitions': {},
1291
+ 'examples': [],
1292
+ 'quality': None,
1293
+ 'servicelevels': None
1294
+ }
1295
+ ```
1217
1296
  ## Development Setup
1218
1297
 
1219
1298
  Python base interpreter should be 3.11.x (unless working on 3.12 release candidate).
@@ -1263,7 +1342,26 @@ docker compose run --rm datacontract --version
1263
1342
 
1264
1343
  This command runs the container momentarily to check the version of the `datacontract` CLI. The `--rm` flag ensures that the container is automatically removed after the command executes, keeping your environment clean.
1265
1344
 
1345
+ ## Use with pre-commit
1346
+
1347
+ To run `datacontract-cli` as part of a [pre-commit](https://pre-commit.com/) workflow, add something like the below to the `repos` list in the project's `.pre-commit-config.yaml`:
1348
+
1349
+ ```yaml
1350
+ repos:
1351
+ - repo: https://github.com/datacontract/datacontract-cli
1352
+ rev: "v0.10.9"
1353
+ hooks:
1354
+ - id: datacontract-lint
1355
+ - id: datacontract-test
1356
+ args: ["--server", "production"]
1357
+ ```
1358
+
1359
+ ### Available Hook IDs
1266
1360
 
1361
+ | Hook ID | Description | Dependency |
1362
+ | ----------------- | ------------------------ | ---------- |
1363
+ | datacontract-lint | Runs the lint subcommand. | Python3 |
1364
+ | datacontract-test | Runs the test subcommand. Please look at [test](#test) section for all available arguments. | Python3 |
1267
1365
 
1268
1366
  ## Release Steps
1269
1367