datacontract-cli 0.10.20__py3-none-any.whl → 0.10.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (38) hide show
  1. datacontract/{web.py → api.py} +55 -3
  2. datacontract/breaking/breaking.py +1 -1
  3. datacontract/breaking/breaking_rules.py +1 -1
  4. datacontract/cli.py +32 -10
  5. datacontract/data_contract.py +14 -100
  6. datacontract/engines/data_contract_checks.py +735 -0
  7. datacontract/engines/data_contract_test.py +51 -0
  8. datacontract/engines/soda/check_soda_execute.py +36 -30
  9. datacontract/engines/soda/connections/kafka.py +8 -3
  10. datacontract/export/avro_converter.py +2 -0
  11. datacontract/export/custom_converter.py +40 -0
  12. datacontract/export/exporter.py +1 -2
  13. datacontract/export/exporter_factory.py +4 -12
  14. datacontract/export/sodacl_converter.py +22 -294
  15. datacontract/export/sql_type_converter.py +7 -2
  16. datacontract/imports/odcs_importer.py +6 -3
  17. datacontract/imports/odcs_v3_importer.py +2 -0
  18. datacontract/imports/sql_importer.py +229 -29
  19. datacontract/lint/urls.py +4 -4
  20. datacontract/model/data_contract_specification.py +130 -129
  21. datacontract/model/exceptions.py +4 -1
  22. datacontract/model/run.py +25 -18
  23. datacontract/templates/datacontract.html +16 -2
  24. datacontract/templates/partials/definition.html +3 -95
  25. datacontract/templates/partials/model_field.html +13 -0
  26. datacontract/templates/partials/quality.html +49 -0
  27. datacontract/templates/style/output.css +151 -152
  28. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/METADATA +238 -184
  29. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/RECORD +34 -34
  30. datacontract/engines/soda/connections/dask.py +0 -28
  31. datacontract/export/odcs_v2_exporter.py +0 -124
  32. datacontract/imports/odcs_v2_importer.py +0 -177
  33. datacontract/lint/linters/example_model_linter.py +0 -91
  34. /datacontract/{model → breaking}/breaking_change.py +0 -0
  35. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/LICENSE +0 -0
  36. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/WHEEL +0 -0
  37. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/entry_points.txt +0 -0
  38. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datacontract-cli
3
- Version: 0.10.20
3
+ Version: 0.10.22
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
@@ -20,13 +20,13 @@ Requires-Dist: fastparquet==2024.11.0
20
20
  Requires-Dist: numpy<2.0.0,>=1.26.4
21
21
  Requires-Dist: python-multipart==0.0.20
22
22
  Requires-Dist: rich<13.10,>=13.7
23
- Requires-Dist: simple-ddl-parser==1.7.1
23
+ Requires-Dist: sqlglot<27.0.0,>=26.6.0
24
24
  Requires-Dist: duckdb==1.1.2
25
- Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.20
25
+ Requires-Dist: soda-core-duckdb<3.5.0,>=3.3.20
26
26
  Requires-Dist: setuptools>=60
27
27
  Requires-Dist: python-dotenv~=1.0.0
28
- Requires-Dist: rdflib==7.0.0
29
- Requires-Dist: boto3<1.35.98,>=1.34.41
28
+ Requires-Dist: boto3<1.36.12,>=1.34.41
29
+ Requires-Dist: Jinja2>=3.1.5
30
30
  Requires-Dist: jinja_partials>=0.2.1
31
31
  Provides-Extra: avro
32
32
  Requires-Dist: avro==1.12.0; extra == "avro"
@@ -49,10 +49,10 @@ Provides-Extra: postgres
49
49
  Requires-Dist: soda-core-postgres<3.4.0,>=3.3.20; extra == "postgres"
50
50
  Provides-Extra: s3
51
51
  Requires-Dist: s3fs==2024.12.0; extra == "s3"
52
- Requires-Dist: aiobotocore<2.18.0,>=2.17.0; extra == "s3"
52
+ Requires-Dist: aiobotocore<2.20.0,>=2.17.0; extra == "s3"
53
53
  Provides-Extra: snowflake
54
- Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
55
- Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.20; extra == "snowflake"
54
+ Requires-Dist: snowflake-connector-python[pandas]<3.14,>=3.6; extra == "snowflake"
55
+ Requires-Dist: soda-core-snowflake<3.5.0,>=3.3.20; extra == "snowflake"
56
56
  Provides-Extra: sqlserver
57
57
  Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.20; extra == "sqlserver"
58
58
  Provides-Extra: trino
@@ -63,16 +63,18 @@ Provides-Extra: dbml
63
63
  Requires-Dist: pydbml>=1.1.1; extra == "dbml"
64
64
  Provides-Extra: parquet
65
65
  Requires-Dist: pyarrow>=18.1.0; extra == "parquet"
66
- Provides-Extra: web
67
- Requires-Dist: fastapi==0.115.6; extra == "web"
68
- Requires-Dist: uvicorn==0.34.0; extra == "web"
66
+ Provides-Extra: rdf
67
+ Requires-Dist: rdflib==7.0.0; extra == "rdf"
68
+ Provides-Extra: api
69
+ Requires-Dist: fastapi==0.115.6; extra == "api"
70
+ Requires-Dist: uvicorn==0.34.0; extra == "api"
69
71
  Provides-Extra: all
70
- Requires-Dist: datacontract-cli[bigquery,csv,databricks,dbml,dbt,iceberg,kafka,parquet,postgres,s3,snowflake,sqlserver,trino,web]; extra == "all"
72
+ Requires-Dist: datacontract-cli[api,bigquery,csv,databricks,dbml,dbt,iceberg,kafka,parquet,postgres,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
71
73
  Provides-Extra: dev
72
74
  Requires-Dist: datacontract-cli[all]; extra == "dev"
73
75
  Requires-Dist: httpx==0.28.1; extra == "dev"
74
76
  Requires-Dist: kafka-python; extra == "dev"
75
- Requires-Dist: moto==5.0.26; extra == "dev"
77
+ Requires-Dist: moto==5.0.27; extra == "dev"
76
78
  Requires-Dist: pandas>=2.1.0; extra == "dev"
77
79
  Requires-Dist: pre-commit<4.1.0,>=3.7.1; extra == "dev"
78
80
  Requires-Dist: pytest; extra == "dev"
@@ -192,17 +194,20 @@ $ datacontract init datacontract.yaml
192
194
  # lint the datacontract.yaml
193
195
  $ datacontract lint datacontract.yaml
194
196
 
195
- # execute schema and quality checks
197
+ # execute schema and quality checks (define credentials as environment variables)
196
198
  $ datacontract test datacontract.yaml
197
199
 
198
- # execute schema and quality checks on the examples within the contract
199
- $ datacontract test --examples datacontract.yaml
200
+ # export data contract as html (other formats: avro, dbt, dbt-sources, dbt-staging-sql, jsonschema, odcs, rdf, sql, sodacl, terraform, ...)
201
+ $ datacontract export --format html datacontract.yaml --output datacontract.html
200
202
 
201
- # export data contract as html (other formats: avro, dbt, dbt-sources, dbt-staging-sql, jsonschema, odcs_v2, odcs_v3, rdf, sql, sodacl, terraform, ...)
202
- $ datacontract export --format html datacontract.yaml > datacontract.html
203
+ # export data contract to ODCS
204
+ $ datacontract export --format odcs datacontract.yaml --output odcs.yaml
203
205
 
204
- # import avro (other formats: sql, glue, bigquery...)
205
- $ datacontract import --format avro --source avro_schema.avsc
206
+ # import ODCS to data contract
207
+ $ datacontract import --format odcs odcs.yaml --output datacontract.yaml
208
+
209
+ # import sql (other formats: avro, glue, bigquery, jsonschema ...)
210
+ $ datacontract import --format sql --source my-ddl.sql --dialect postgres --output datacontract.yaml
206
211
 
207
212
  # find differences between two data contracts
208
213
  $ datacontract diff datacontract-v1.yaml datacontract-v2.yaml
@@ -235,12 +240,28 @@ Python 3.10, 3.11, and 3.12 are supported. We recommend to use Python 3.11.
235
240
 
236
241
  ```bash
237
242
  python3 -m pip install 'datacontract-cli[all]'
243
+ datacontract --version
244
+ ```
245
+
246
+ ### pip with venv
247
+
248
+ Typically it is better to install the application in a virtual environment for your projects:
249
+
250
+ ```bash
251
+ cd my-project
252
+ python3.11 -m venv venv
253
+ source venv/bin/activate
254
+ pip install 'datacontract-cli[all]'
255
+ datacontract --version
238
256
  ```
239
257
 
240
258
  ### pipx
259
+
241
260
  pipx installs into an isolated environment.
261
+
242
262
  ```bash
243
263
  pipx install 'datacontract-cli[all]'
264
+ datacontract --version
244
265
  ```
245
266
 
246
267
  ### Docker
@@ -262,7 +283,7 @@ _Note:_ The output of Docker command line messages is limited to 80 columns and
262
283
 
263
284
 
264
285
 
265
- ## Optional Dependencies
286
+ ## Optional Dependencies (Extras)
266
287
 
267
288
  The CLI tool defines several optional dependencies (also known as extras) that can be installed for using with specific servers types.
268
289
  With _all_, all server dependencies are included.
@@ -273,21 +294,23 @@ pip install datacontract-cli[all]
273
294
 
274
295
  A list of available extras:
275
296
 
276
- | Dependency | Installation Command |
277
- |------------------------|--------------------------------------------|
278
- | Avro Support | `pip install datacontract-cli[avro]` |
279
- | Google BigQuery | `pip install datacontract-cli[bigquery]` |
280
- | Databricks Integration | `pip install datacontract-cli[databricks]` |
281
- | Iceberg | `pip install datacontract-cli[iceberg]` |
282
- | Kafka Integration | `pip install datacontract-cli[kafka]` |
283
- | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
284
- | S3 Integration | `pip install datacontract-cli[s3]` |
285
- | Snowflake Integration | `pip install datacontract-cli[snowflake]` |
286
- | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
287
- | Trino | `pip install datacontract-cli[trino]` |
288
- | Dbt | `pip install datacontract-cli[dbt]` |
289
- | Dbml | `pip install datacontract-cli[dbml]` |
290
- | Parquet | `pip install datacontract-cli[parquet]` |
297
+ | Dependency | Installation Command |
298
+ |-------------------------|--------------------------------------------|
299
+ | Avro Support | `pip install datacontract-cli[avro]` |
300
+ | Google BigQuery | `pip install datacontract-cli[bigquery]` |
301
+ | Databricks Integration | `pip install datacontract-cli[databricks]` |
302
+ | Iceberg | `pip install datacontract-cli[iceberg]` |
303
+ | Kafka Integration | `pip install datacontract-cli[kafka]` |
304
+ | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
305
+ | S3 Integration | `pip install datacontract-cli[s3]` |
306
+ | Snowflake Integration | `pip install datacontract-cli[snowflake]` |
307
+ | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
308
+ | Trino | `pip install datacontract-cli[trino]` |
309
+ | dbt | `pip install datacontract-cli[dbt]` |
310
+ | DBML | `pip install datacontract-cli[dbml]` |
311
+ | Parquet | `pip install datacontract-cli[parquet]` |
312
+ | RDF | `pip install datacontract-cli[rdf]` |
313
+ | API (run as web server) | `pip install datacontract-cli[api]` |
291
314
 
292
315
 
293
316
 
@@ -305,7 +328,7 @@ Commands
305
328
  - [diff](#diff)
306
329
  - [catalog](#catalog)
307
330
  - [publish](#publish)
308
- - [serve](#serve)
331
+ - [api](#api)
309
332
 
310
333
  ### init
311
334
  ```
@@ -384,11 +407,6 @@ Commands
384
407
  │ for all servers │
385
408
  │ (default). │
386
409
  │ [default: all] │
387
- │ --examples --no-examples Run the schema and │
388
- │ quality tests on the │
389
- │ example data within the │
390
- │ data contract. │
391
- │ [default: no-examples] │
392
410
  │ --publish TEXT The url to publish the │
393
411
  │ results after the test │
394
412
  │ [default: None] │
@@ -875,13 +893,13 @@ models:
875
893
  ╭─ Options ────────────────────────────────────────────────────────────────────╮
876
894
  │ * --format [jsonschema|pydantic-model| The export format. │
877
895
  │ sodacl|dbt|dbt-sources|dbt- [default: None] │
878
- │ staging-sql|odcs|odcs_v2|od [required] │
879
- cs_v3|rdf|avro|protobuf|gre
896
+ │ staging-sql|odcs| [required] │
897
+ │ rdf|avro|protobuf|gre
880
898
  │ at-expectations|terraform|a │
881
899
  │ vro-idl|sql|sql-query|html| │
882
900
  │ go|bigquery|dbml|spark|sqla │
883
901
  │ lchemy|data-caterer|dcs|mar │
884
- │ kdown|iceberg]
902
+ │ kdown|iceberg|custom]
885
903
  │ --output PATH Specify the file path where │
886
904
  │ the exported data will be │
887
905
  │ saved. If no path is │
@@ -904,6 +922,9 @@ models:
904
922
  │ --engine TEXT [engine] The engine used for │
905
923
  │ great expection run. │
906
924
  │ [default: None] │
925
+ │ --template PATH [custom] The file path of │
926
+ │ Jinja template. │
927
+ │ [default: None] │
907
928
  │ --help Show this message and exit. │
908
929
  ╰──────────────────────────────────────────────────────────────────────────────╯
909
930
  ╭─ RDF Options ────────────────────────────────────────────────────────────────╮
@@ -931,8 +952,6 @@ Available export options:
931
952
  |----------------------|---------------------------------------------------------|--------|
932
953
  | `html` | Export to HTML | ✅ |
933
954
  | `jsonschema` | Export to JSON Schema | ✅ |
934
- | `odcs_v2` | Export to Open Data Contract Standard (ODCS) V2 | ✅ |
935
- | `odcs_v3` | Export to Open Data Contract Standard (ODCS) V3 | ✅ |
936
955
  | `odcs` | Export to Open Data Contract Standard (ODCS) V3 | ✅ |
937
956
  | `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
938
957
  | `dbt` | Export to dbt models in YAML format | ✅ |
@@ -955,6 +974,7 @@ Available export options:
955
974
  | `dcs` | Export to Data Contract Specification in YAML format | ✅ |
956
975
  | `markdown` | Export to Markdown | ✅ |
957
976
  | `iceberg` | Export to an Iceberg JSON Schema Definition | partial |
977
+ | `custom` | Export to Custom format with Jinja | ✅ |
958
978
  | Missing something? | Please create an issue on GitHub | TBD |
959
979
 
960
980
 
@@ -1136,109 +1156,152 @@ to limit your contract export to a single model.
1136
1156
  }
1137
1157
  ```
1138
1158
 
1159
+ #### Custom
1160
+
1161
+ The export function converts the data contract specification into the custom format with Jinja. You can specify the path to a Jinja template with the `--template` argument, allowing you to output files in any format.
1162
+
1163
+ ```shell
1164
+ datacontract export --format custom --template template.txt datacontract.yaml
1165
+ ```
1166
+
1167
+ ##### Jinja variables
1168
+
1169
+ You can directly use the Data Contract Specification as template variables.
1170
+
1171
+ ```shell
1172
+ $ cat template.txt
1173
+ title: {{ data_contract.info.title }}
1174
+
1175
+ $ datacontract export --format custom --template template.txt datacontract.yaml
1176
+ title: Orders Latest
1177
+ ```
1178
+
1179
+ ##### Example Jinja Templates
1180
+
1181
+ ###### Customized dbt model
1182
+
1183
+ You can export the dbt models containing any logic.
1184
+
1185
+ Below is an example of a dbt staging layer that converts a field of `type: timestamp` to a `DATETIME` type with time zone conversion.
1186
+
1187
+ template.sql
1188
+
1189
+ {% raw %}
1190
+ ```sql
1191
+ {%- for model_name, model in data_contract.models.items() %}
1192
+ {#- Export only the first model #}
1193
+ {%- if loop.first -%}
1194
+ SELECT
1195
+ {%- for field_name, field in model.fields.items() %}
1196
+ {%- if field.type == "timestamp" %}
1197
+ DATETIME({{ field_name }}, "Asia/Tokyo") AS {{ field_name }},
1198
+ {%- else %}
1199
+ {{ field_name }} AS {{ field_name }},
1200
+ {%- endif %}
1201
+ {%- endfor %}
1202
+ FROM
1203
+ {{ "{{" }} ref('{{ model_name }}') {{ "}}" }}
1204
+ {%- endif %}
1205
+ {%- endfor %}
1206
+ ```
1207
+ {% endraw %}
1208
+
1209
+ command
1210
+
1211
+ ```shell
1212
+ datacontract export --format custom --template template.sql --output output.sql datacontract.yaml
1213
+ ```
1214
+
1215
+ output.sql
1216
+
1217
+ ```sql
1218
+ SELECT
1219
+ order_id AS order_id,
1220
+ DATETIME(order_timestamp, "Asia/Tokyo") AS order_timestamp,
1221
+ order_total AS order_total,
1222
+ customer_id AS customer_id,
1223
+ customer_email_address AS customer_email_address,
1224
+ DATETIME(processed_timestamp, "Asia/Tokyo") AS processed_timestamp,
1225
+ FROM
1226
+ {{ ref('orders') }}
1227
+ ```
1228
+
1139
1229
  ### import
1140
1230
  ```
1141
-
1142
- Usage: datacontract import [OPTIONS]
1143
-
1144
- Create a data contract from the given source location. Saves to file specified
1145
- by `output` option if present, otherwise prints to stdout.
1146
-
1147
- ╭─ Options ────────────────────────────────────────────────────────────────────╮
1148
- * --format [sql|avro|dbt|dbml|gl The format of the
1149
- ue|jsonschema|bigquer source file.
1150
- y|odcs|unity|spark|ic [default: None]
1151
- eberg|parquet|csv] [required]
1152
- --output PATH Specify the file path
1153
- where the Data
1154
- Contract will be
1155
- saved. If no path is
1156
- provided, the output
1157
- will be printed to
1158
- stdout.
1159
- [default: None]
1160
- --source TEXT The path to the file
1161
- or Glue Database that
1162
- should be imported.
1163
- [default: None]
1164
- --glue-table TEXT List of table ids to
1165
- import from the Glue
1166
- Database (repeat for
1167
- multiple table ids,
1168
- leave empty for all
1169
- tables in the
1170
- dataset).
1171
- [default: None]
1172
- --bigquery-project TEXT The bigquery project
1173
- id.
1174
- [default: None]
1175
- --bigquery-dataset TEXT The bigquery dataset
1176
- id.
1177
- [default: None]
1178
- --bigquery-table TEXT List of table ids to
1179
- import from the
1180
- bigquery API (repeat
1181
- for multiple table
1182
- ids, leave empty for
1183
- all tables in the
1184
- dataset).
1185
- [default: None]
1186
- --unity-table-full-n… TEXT Full name of a table
1187
- in the unity catalog
1188
- [default: None]
1189
- │ --dbt-model TEXT List of models names
1190
- to import from the
1191
- dbt manifest file
1192
- (repeat for multiple
1193
- models names, leave
1194
- empty for all models
1195
- in the dataset). │
1196
- [default: None]
1197
- │ --dbml-schema TEXT List of schema names
1198
- to import from the
1199
- DBML file (repeat for
1200
- multiple schema
1201
- names, leave empty
1202
- for all tables in the
1203
- file).
1204
- [default: None]
1205
- │ --dbml-table TEXT List of table names │
1206
- │ to import from the │
1207
- │ DBML file (repeat for │
1208
- │ multiple table names, │
1209
- │ leave empty for all │
1210
- │ tables in the file). │
1211
- │ [default: None] │
1212
- │ --iceberg-table TEXT Table name to assign │
1213
- │ to the model created │
1214
- │ from the Iceberg │
1215
- │ schema. │
1216
- │ [default: None] │
1217
- │ --template TEXT The location (url or │
1218
- │ path) of the Data │
1219
- │ Contract │
1220
- │ Specification │
1221
- │ Template │
1222
- │ [default: None] │
1223
- │ --schema TEXT The location (url or │
1224
- │ path) of the Data │
1225
- │ Contract │
1226
- │ Specification JSON │
1227
- │ Schema │
1228
- │ [default: │
1229
- │ https://datacontract… │
1230
- │ --help Show this message and │
1231
- │ exit. │
1232
- ╰──────────────────────────────────────────────────────────────────────────────╯
1231
+ Usage: datacontract import [OPTIONS]
1232
+
1233
+ Create a data contract from the given source location. Saves to file specified by `output` option if present,
1234
+ otherwise prints to stdout.
1235
+
1236
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────╮
1237
+ * --format [sql|avro|dbt|dbml|glue|jsonschema|bi The format of the source file. │
1238
+ gquery|odcs|unity|spark|iceberg|parqu [default: None]
1239
+ et|csv] [required]
1240
+ --output PATH Specify the file path where the Data
1241
+ Contract will be saved. If no path is
1242
+ provided, the output will be printed
1243
+ to stdout.
1244
+ [default: None]
1245
+ --source TEXT The path to the file or Glue Database
1246
+ that should be imported.
1247
+ [default: None]
1248
+ --dialect TEXT The SQL dialect to use when importing
1249
+ SQL files, e.g., postgres, tsql,
1250
+ bigquery.
1251
+ [default: None]
1252
+ --glue-table TEXT List of table ids to import from the
1253
+ Glue Database (repeat for multiple
1254
+ table ids, leave empty for all tables
1255
+ in the dataset).
1256
+ [default: None]
1257
+ --bigquery-project TEXT The bigquery project id.
1258
+ [default: None]
1259
+ --bigquery-dataset TEXT The bigquery dataset id.
1260
+ [default: None]
1261
+ --bigquery-table TEXT List of table ids to import from the
1262
+ bigquery API (repeat for multiple
1263
+ table ids, leave empty for all tables
1264
+ in the dataset).
1265
+ [default: None]
1266
+ --unity-table-full-name TEXT Full name of a table in the unity
1267
+ catalog
1268
+ [default: None]
1269
+ --dbt-model TEXT List of models names to import from
1270
+ the dbt manifest file (repeat for
1271
+ multiple models names, leave empty
1272
+ for all models in the dataset).
1273
+ [default: None]
1274
+ --dbml-schema TEXT List of schema names to import from
1275
+ the DBML file (repeat for multiple
1276
+ schema names, leave empty for all
1277
+ tables in the file).
1278
+ [default: None]
1279
+ │ --dbml-table TEXT List of table names to import from
1280
+ the DBML file (repeat for multiple
1281
+ table names, leave empty for all
1282
+ tables in the file).
1283
+ [default: None]
1284
+ --iceberg-table TEXT Table name to assign to the model
1285
+ created from the Iceberg schema. │
1286
+ [default: None]
1287
+ │ --template TEXT The location (url or path) of the
1288
+ Data Contract Specification Template
1289
+ [default: None]
1290
+ --schema TEXT The location (url or path) of the
1291
+ Data Contract Specification JSON
1292
+ Schema
1293
+ [default: None]
1294
+ --help Show this message and exit.
1295
+ ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1233
1296
 
1234
1297
  ```
1235
1298
 
1236
1299
  Example:
1237
1300
  ```bash
1238
1301
  # Example import from SQL DDL
1239
- datacontract import --format sql --source my_ddl.sql
1302
+ datacontract import --format sql --source my_ddl.sql --dialect postgres
1240
1303
  # To save to file
1241
- datacontract import --format sql --source my_ddl.sql --output datacontract.yaml
1304
+ datacontract import --format sql --source my_ddl.sql --dialect postgres --output datacontract.yaml
1242
1305
  ```
1243
1306
 
1244
1307
  Available import options:
@@ -1540,18 +1603,23 @@ datacontract catalog --files "*.odcs.yaml"
1540
1603
 
1541
1604
  ```
1542
1605
 
1543
- ### serve
1606
+ ### api
1544
1607
  ```
1545
-
1546
- Usage: datacontract serve [OPTIONS]
1547
-
1548
- Start the datacontract web server.
1549
-
1550
- ╭─ Options ────────────────────────────────────────────────────────────────────╮
1551
- --port INTEGER Bind socket to this port. [default: 4242] │
1552
- --host TEXT Bind socket to this host. [default: 127.0.0.1] │
1553
- │ --help Show this message and exit. │
1554
- ╰──────────────────────────────────────────────────────────────────────────────╯
1608
+
1609
+ Usage: datacontract api [OPTIONS]
1610
+
1611
+ Start the datacontract CLI as server application with REST API.
1612
+ The OpenAPI documentation as Swagger UI is available on http://localhost:4242. You can execute the commands directly from the Swagger UI.
1613
+ To protect the API, you can set the environment variable DATACONTRACT_CLI_API_KEY to a secret API key. To authenticate, requests must include the header 'x-api-key' with the
1614
+ correct API key. This is highly recommended, as data contract tests may be subject to SQL injections or leak sensitive information.
1615
+ To connect to servers (such as a Snowflake data source), set the credentials as environment variables as documented in https://cli.datacontract.com/#test
1616
+
1617
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1618
+ │ --port INTEGER Bind socket to this port. [default: 4242] │
1619
+ │ --host TEXT Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0 [default: 127.0.0.1] │
1620
+ │ --help Show this message and exit. │
1621
+ ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1622
+
1555
1623
 
1556
1624
  ```
1557
1625
 
@@ -1588,24 +1656,20 @@ Create a data contract based on the actual data. This is the fastest way to get
1588
1656
  $ datacontract test
1589
1657
  ```
1590
1658
 
1591
- 2. Add examples to the `datacontract.yaml`. If you can, use actual data and anonymize. Make sure that the examples match the imported logical data model.
1659
+ 2. Add quality checks and additional type constraints one by one to the contract and make sure the
1660
+ data still adheres to the contract.
1592
1661
  ```bash
1593
- $ datacontract test --examples
1594
- ```
1595
-
1596
-
1597
- 3. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
1598
- ```bash
1599
- $ datacontract test --examples
1600
1662
  $ datacontract test
1601
1663
  ```
1602
1664
 
1603
- 4. Make sure that all the best practices for a `datacontract.yaml` are met using the linter. You probably forgot to document some fields and add the terms and conditions.
1665
+ 3. Make sure that all the best practices for a `datacontract.yaml` are met using the linter. You
1666
+ probably forgot to document some fields and add the terms and conditions.
1604
1667
  ```bash
1605
1668
  $ datacontract lint
1606
1669
  ```
1607
1670
 
1608
- 5. Set up a CI pipeline that executes daily for continuous quality checks. You can also report the test results to tools like [Data Mesh Manager](https://datamesh-manager.com)
1671
+ 4. Set up a CI pipeline that executes daily for continuous quality checks. You can also report the
1672
+ test results to tools like [Data Mesh Manager](https://datamesh-manager.com)
1609
1673
  ```bash
1610
1674
  $ datacontract test --publish https://api.datamesh-manager.com/api/test-results
1611
1675
  ```
@@ -1619,30 +1683,15 @@ Create a data contract based on the requirements from use cases.
1619
1683
  $ datacontract init
1620
1684
  ```
1621
1685
 
1622
- 2. Add examples to the `datacontract.yaml`. Do not start with the data model, although you are probably tempted to do that. Examples are the fastest way to get feedback from everybody and not loose someone in the discussion.
1623
-
1624
- 3. Create the model based on the examples. Test the model against the examples to double-check whether the model matches the examples.
1625
- ```bash
1626
- $ datacontract test --examples
1627
- ```
1628
-
1629
- 4. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
1630
- ```bash
1631
- $ datacontract test --examples
1632
- ```
1633
-
1634
- 5. Fill in the terms, descriptions, etc. Make sure you follow all best practices for a `datacontract.yaml` using the linter.
1686
+ 2. Create the model and quality guarantees based on your business requirements. Fill in the terms,
1687
+ descriptions, etc. Make sure you follow all best practices for a `datacontract.yaml` using the
1688
+ linter.
1635
1689
  ```bash
1636
1690
  $ datacontract lint
1637
1691
  ```
1638
1692
 
1639
- 6. Set up a CI pipeline that lints and tests the examples so you make sure that any changes later do not decrease the quality of the contract.
1640
- ```bash
1641
- $ datacontract lint
1642
- $ datacontract test --examples
1643
- ```
1644
-
1645
- 7. Use the export function to start building the providing data product as well as the integration into the consuming data products.
1693
+ 3. Use the export function to start building the providing data product as well as the integration
1694
+ into the consuming data products.
1646
1695
  ```bash
1647
1696
  # data provider
1648
1697
  $ datacontract export --format dbt
@@ -1651,6 +1700,11 @@ Create a data contract based on the requirements from use cases.
1651
1700
  $ datacontract export --format dbt-staging-sql
1652
1701
  ```
1653
1702
 
1703
+ 4. Test that your data product implementation adheres to the contract.
1704
+ ```bash
1705
+ $ datacontract test
1706
+ ```
1707
+
1654
1708
  ### Schema Evolution
1655
1709
 
1656
1710
  #### Non-breaking Changes
@@ -1709,7 +1763,7 @@ class CustomExporter(Exporter):
1709
1763
 
1710
1764
 
1711
1765
  # Register the new custom class into factory
1712
- exporter_factory.register_exporter("custom", CustomExporter)
1766
+ exporter_factory.register_exporter("custom_exporter", CustomExporter)
1713
1767
 
1714
1768
 
1715
1769
  if __name__ == "__main__":
@@ -1719,7 +1773,7 @@ if __name__ == "__main__":
1719
1773
  )
1720
1774
  # Call export
1721
1775
  result = data_contract.export(
1722
- export_format="custom", model="orders", server="production", custom_arg="my_custom_arg"
1776
+ export_format="custom_exporter", model="orders", server="production", custom_arg="my_custom_arg"
1723
1777
  )
1724
1778
  print(result)
1725
1779