datacontract-cli 0.10.20__py3-none-any.whl → 0.10.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/{web.py → api.py} +55 -3
- datacontract/breaking/breaking.py +1 -1
- datacontract/breaking/breaking_rules.py +1 -1
- datacontract/cli.py +32 -10
- datacontract/data_contract.py +14 -100
- datacontract/engines/data_contract_checks.py +735 -0
- datacontract/engines/data_contract_test.py +51 -0
- datacontract/engines/soda/check_soda_execute.py +36 -30
- datacontract/engines/soda/connections/kafka.py +8 -3
- datacontract/export/avro_converter.py +2 -0
- datacontract/export/custom_converter.py +40 -0
- datacontract/export/exporter.py +1 -2
- datacontract/export/exporter_factory.py +4 -12
- datacontract/export/sodacl_converter.py +22 -294
- datacontract/export/sql_type_converter.py +7 -2
- datacontract/imports/odcs_importer.py +6 -3
- datacontract/imports/odcs_v3_importer.py +2 -0
- datacontract/imports/sql_importer.py +229 -29
- datacontract/lint/urls.py +4 -4
- datacontract/model/data_contract_specification.py +130 -129
- datacontract/model/exceptions.py +4 -1
- datacontract/model/run.py +25 -18
- datacontract/templates/datacontract.html +16 -2
- datacontract/templates/partials/definition.html +3 -95
- datacontract/templates/partials/model_field.html +13 -0
- datacontract/templates/partials/quality.html +49 -0
- datacontract/templates/style/output.css +151 -152
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/METADATA +238 -184
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/RECORD +34 -34
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/export/odcs_v2_exporter.py +0 -124
- datacontract/imports/odcs_v2_importer.py +0 -177
- datacontract/lint/linters/example_model_linter.py +0 -91
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.22
|
|
4
4
|
Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
|
|
6
6
|
Project-URL: Homepage, https://cli.datacontract.com
|
|
@@ -20,13 +20,13 @@ Requires-Dist: fastparquet==2024.11.0
|
|
|
20
20
|
Requires-Dist: numpy<2.0.0,>=1.26.4
|
|
21
21
|
Requires-Dist: python-multipart==0.0.20
|
|
22
22
|
Requires-Dist: rich<13.10,>=13.7
|
|
23
|
-
Requires-Dist:
|
|
23
|
+
Requires-Dist: sqlglot<27.0.0,>=26.6.0
|
|
24
24
|
Requires-Dist: duckdb==1.1.2
|
|
25
|
-
Requires-Dist: soda-core-duckdb<3.
|
|
25
|
+
Requires-Dist: soda-core-duckdb<3.5.0,>=3.3.20
|
|
26
26
|
Requires-Dist: setuptools>=60
|
|
27
27
|
Requires-Dist: python-dotenv~=1.0.0
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist:
|
|
28
|
+
Requires-Dist: boto3<1.36.12,>=1.34.41
|
|
29
|
+
Requires-Dist: Jinja2>=3.1.5
|
|
30
30
|
Requires-Dist: jinja_partials>=0.2.1
|
|
31
31
|
Provides-Extra: avro
|
|
32
32
|
Requires-Dist: avro==1.12.0; extra == "avro"
|
|
@@ -49,10 +49,10 @@ Provides-Extra: postgres
|
|
|
49
49
|
Requires-Dist: soda-core-postgres<3.4.0,>=3.3.20; extra == "postgres"
|
|
50
50
|
Provides-Extra: s3
|
|
51
51
|
Requires-Dist: s3fs==2024.12.0; extra == "s3"
|
|
52
|
-
Requires-Dist: aiobotocore<2.
|
|
52
|
+
Requires-Dist: aiobotocore<2.20.0,>=2.17.0; extra == "s3"
|
|
53
53
|
Provides-Extra: snowflake
|
|
54
|
-
Requires-Dist: snowflake-connector-python[pandas]<3.
|
|
55
|
-
Requires-Dist: soda-core-snowflake<3.
|
|
54
|
+
Requires-Dist: snowflake-connector-python[pandas]<3.14,>=3.6; extra == "snowflake"
|
|
55
|
+
Requires-Dist: soda-core-snowflake<3.5.0,>=3.3.20; extra == "snowflake"
|
|
56
56
|
Provides-Extra: sqlserver
|
|
57
57
|
Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.20; extra == "sqlserver"
|
|
58
58
|
Provides-Extra: trino
|
|
@@ -63,16 +63,18 @@ Provides-Extra: dbml
|
|
|
63
63
|
Requires-Dist: pydbml>=1.1.1; extra == "dbml"
|
|
64
64
|
Provides-Extra: parquet
|
|
65
65
|
Requires-Dist: pyarrow>=18.1.0; extra == "parquet"
|
|
66
|
-
Provides-Extra:
|
|
67
|
-
Requires-Dist:
|
|
68
|
-
|
|
66
|
+
Provides-Extra: rdf
|
|
67
|
+
Requires-Dist: rdflib==7.0.0; extra == "rdf"
|
|
68
|
+
Provides-Extra: api
|
|
69
|
+
Requires-Dist: fastapi==0.115.6; extra == "api"
|
|
70
|
+
Requires-Dist: uvicorn==0.34.0; extra == "api"
|
|
69
71
|
Provides-Extra: all
|
|
70
|
-
Requires-Dist: datacontract-cli[bigquery,csv,databricks,dbml,dbt,iceberg,kafka,parquet,postgres,s3,snowflake,sqlserver,trino
|
|
72
|
+
Requires-Dist: datacontract-cli[api,bigquery,csv,databricks,dbml,dbt,iceberg,kafka,parquet,postgres,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
|
|
71
73
|
Provides-Extra: dev
|
|
72
74
|
Requires-Dist: datacontract-cli[all]; extra == "dev"
|
|
73
75
|
Requires-Dist: httpx==0.28.1; extra == "dev"
|
|
74
76
|
Requires-Dist: kafka-python; extra == "dev"
|
|
75
|
-
Requires-Dist: moto==5.0.
|
|
77
|
+
Requires-Dist: moto==5.0.27; extra == "dev"
|
|
76
78
|
Requires-Dist: pandas>=2.1.0; extra == "dev"
|
|
77
79
|
Requires-Dist: pre-commit<4.1.0,>=3.7.1; extra == "dev"
|
|
78
80
|
Requires-Dist: pytest; extra == "dev"
|
|
@@ -192,17 +194,20 @@ $ datacontract init datacontract.yaml
|
|
|
192
194
|
# lint the datacontract.yaml
|
|
193
195
|
$ datacontract lint datacontract.yaml
|
|
194
196
|
|
|
195
|
-
# execute schema and quality checks
|
|
197
|
+
# execute schema and quality checks (define credentials as environment variables)
|
|
196
198
|
$ datacontract test datacontract.yaml
|
|
197
199
|
|
|
198
|
-
#
|
|
199
|
-
$ datacontract
|
|
200
|
+
# export data contract as html (other formats: avro, dbt, dbt-sources, dbt-staging-sql, jsonschema, odcs, rdf, sql, sodacl, terraform, ...)
|
|
201
|
+
$ datacontract export --format html datacontract.yaml --output datacontract.html
|
|
200
202
|
|
|
201
|
-
# export data contract
|
|
202
|
-
$ datacontract export --format
|
|
203
|
+
# export data contract to ODCS
|
|
204
|
+
$ datacontract export --format odcs datacontract.yaml --output odcs.yaml
|
|
203
205
|
|
|
204
|
-
# import
|
|
205
|
-
$ datacontract import --format
|
|
206
|
+
# import ODCS to data contract
|
|
207
|
+
$ datacontract import --format odcs odcs.yaml --output datacontract.yaml
|
|
208
|
+
|
|
209
|
+
# import sql (other formats: avro, glue, bigquery, jsonschema ...)
|
|
210
|
+
$ datacontract import --format sql --source my-ddl.sql --dialect postgres --output datacontract.yaml
|
|
206
211
|
|
|
207
212
|
# find differences between two data contracts
|
|
208
213
|
$ datacontract diff datacontract-v1.yaml datacontract-v2.yaml
|
|
@@ -235,12 +240,28 @@ Python 3.10, 3.11, and 3.12 are supported. We recommend to use Python 3.11.
|
|
|
235
240
|
|
|
236
241
|
```bash
|
|
237
242
|
python3 -m pip install 'datacontract-cli[all]'
|
|
243
|
+
datacontract --version
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### pip with venv
|
|
247
|
+
|
|
248
|
+
Typically it is better to install the application in a virtual environment for your projects:
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
cd my-project
|
|
252
|
+
python3.11 -m venv venv
|
|
253
|
+
source venv/bin/activate
|
|
254
|
+
pip install 'datacontract-cli[all]'
|
|
255
|
+
datacontract --version
|
|
238
256
|
```
|
|
239
257
|
|
|
240
258
|
### pipx
|
|
259
|
+
|
|
241
260
|
pipx installs into an isolated environment.
|
|
261
|
+
|
|
242
262
|
```bash
|
|
243
263
|
pipx install 'datacontract-cli[all]'
|
|
264
|
+
datacontract --version
|
|
244
265
|
```
|
|
245
266
|
|
|
246
267
|
### Docker
|
|
@@ -262,7 +283,7 @@ _Note:_ The output of Docker command line messages is limited to 80 columns and
|
|
|
262
283
|
|
|
263
284
|
|
|
264
285
|
|
|
265
|
-
## Optional Dependencies
|
|
286
|
+
## Optional Dependencies (Extras)
|
|
266
287
|
|
|
267
288
|
The CLI tool defines several optional dependencies (also known as extras) that can be installed for using with specific servers types.
|
|
268
289
|
With _all_, all server dependencies are included.
|
|
@@ -273,21 +294,23 @@ pip install datacontract-cli[all]
|
|
|
273
294
|
|
|
274
295
|
A list of available extras:
|
|
275
296
|
|
|
276
|
-
| Dependency
|
|
277
|
-
|
|
278
|
-
| Avro Support
|
|
279
|
-
| Google BigQuery
|
|
280
|
-
| Databricks Integration
|
|
281
|
-
| Iceberg
|
|
282
|
-
| Kafka Integration
|
|
283
|
-
| PostgreSQL Integration
|
|
284
|
-
| S3 Integration
|
|
285
|
-
| Snowflake Integration
|
|
286
|
-
| Microsoft SQL Server
|
|
287
|
-
| Trino
|
|
288
|
-
|
|
|
289
|
-
|
|
|
290
|
-
| Parquet
|
|
297
|
+
| Dependency | Installation Command |
|
|
298
|
+
|-------------------------|--------------------------------------------|
|
|
299
|
+
| Avro Support | `pip install datacontract-cli[avro]` |
|
|
300
|
+
| Google BigQuery | `pip install datacontract-cli[bigquery]` |
|
|
301
|
+
| Databricks Integration | `pip install datacontract-cli[databricks]` |
|
|
302
|
+
| Iceberg | `pip install datacontract-cli[iceberg]` |
|
|
303
|
+
| Kafka Integration | `pip install datacontract-cli[kafka]` |
|
|
304
|
+
| PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
|
|
305
|
+
| S3 Integration | `pip install datacontract-cli[s3]` |
|
|
306
|
+
| Snowflake Integration | `pip install datacontract-cli[snowflake]` |
|
|
307
|
+
| Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
|
|
308
|
+
| Trino | `pip install datacontract-cli[trino]` |
|
|
309
|
+
| dbt | `pip install datacontract-cli[dbt]` |
|
|
310
|
+
| DBML | `pip install datacontract-cli[dbml]` |
|
|
311
|
+
| Parquet | `pip install datacontract-cli[parquet]` |
|
|
312
|
+
| RDF | `pip install datacontract-cli[rdf]` |
|
|
313
|
+
| API (run as web server) | `pip install datacontract-cli[api]` |
|
|
291
314
|
|
|
292
315
|
|
|
293
316
|
|
|
@@ -305,7 +328,7 @@ Commands
|
|
|
305
328
|
- [diff](#diff)
|
|
306
329
|
- [catalog](#catalog)
|
|
307
330
|
- [publish](#publish)
|
|
308
|
-
- [
|
|
331
|
+
- [api](#api)
|
|
309
332
|
|
|
310
333
|
### init
|
|
311
334
|
```
|
|
@@ -384,11 +407,6 @@ Commands
|
|
|
384
407
|
│ for all servers │
|
|
385
408
|
│ (default). │
|
|
386
409
|
│ [default: all] │
|
|
387
|
-
│ --examples --no-examples Run the schema and │
|
|
388
|
-
│ quality tests on the │
|
|
389
|
-
│ example data within the │
|
|
390
|
-
│ data contract. │
|
|
391
|
-
│ [default: no-examples] │
|
|
392
410
|
│ --publish TEXT The url to publish the │
|
|
393
411
|
│ results after the test │
|
|
394
412
|
│ [default: None] │
|
|
@@ -875,13 +893,13 @@ models:
|
|
|
875
893
|
╭─ Options ────────────────────────────────────────────────────────────────────╮
|
|
876
894
|
│ * --format [jsonschema|pydantic-model| The export format. │
|
|
877
895
|
│ sodacl|dbt|dbt-sources|dbt- [default: None] │
|
|
878
|
-
│ staging-sql|odcs|
|
|
879
|
-
│
|
|
896
|
+
│ staging-sql|odcs| [required] │
|
|
897
|
+
│ rdf|avro|protobuf|gre │
|
|
880
898
|
│ at-expectations|terraform|a │
|
|
881
899
|
│ vro-idl|sql|sql-query|html| │
|
|
882
900
|
│ go|bigquery|dbml|spark|sqla │
|
|
883
901
|
│ lchemy|data-caterer|dcs|mar │
|
|
884
|
-
│ kdown|iceberg]
|
|
902
|
+
│ kdown|iceberg|custom] │
|
|
885
903
|
│ --output PATH Specify the file path where │
|
|
886
904
|
│ the exported data will be │
|
|
887
905
|
│ saved. If no path is │
|
|
@@ -904,6 +922,9 @@ models:
|
|
|
904
922
|
│ --engine TEXT [engine] The engine used for │
|
|
905
923
|
│ great expection run. │
|
|
906
924
|
│ [default: None] │
|
|
925
|
+
│ --template PATH [custom] The file path of │
|
|
926
|
+
│ Jinja template. │
|
|
927
|
+
│ [default: None] │
|
|
907
928
|
│ --help Show this message and exit. │
|
|
908
929
|
╰──────────────────────────────────────────────────────────────────────────────╯
|
|
909
930
|
╭─ RDF Options ────────────────────────────────────────────────────────────────╮
|
|
@@ -931,8 +952,6 @@ Available export options:
|
|
|
931
952
|
|----------------------|---------------------------------------------------------|--------|
|
|
932
953
|
| `html` | Export to HTML | ✅ |
|
|
933
954
|
| `jsonschema` | Export to JSON Schema | ✅ |
|
|
934
|
-
| `odcs_v2` | Export to Open Data Contract Standard (ODCS) V2 | ✅ |
|
|
935
|
-
| `odcs_v3` | Export to Open Data Contract Standard (ODCS) V3 | ✅ |
|
|
936
955
|
| `odcs` | Export to Open Data Contract Standard (ODCS) V3 | ✅ |
|
|
937
956
|
| `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
|
|
938
957
|
| `dbt` | Export to dbt models in YAML format | ✅ |
|
|
@@ -955,6 +974,7 @@ Available export options:
|
|
|
955
974
|
| `dcs` | Export to Data Contract Specification in YAML format | ✅ |
|
|
956
975
|
| `markdown` | Export to Markdown | ✅ |
|
|
957
976
|
| `iceberg` | Export to an Iceberg JSON Schema Definition | partial |
|
|
977
|
+
| `custom` | Export to Custom format with Jinja | ✅ |
|
|
958
978
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
959
979
|
|
|
960
980
|
|
|
@@ -1136,109 +1156,152 @@ to limit your contract export to a single model.
|
|
|
1136
1156
|
}
|
|
1137
1157
|
```
|
|
1138
1158
|
|
|
1159
|
+
#### Custom
|
|
1160
|
+
|
|
1161
|
+
The export function converts the data contract specification into the custom format with Jinja. You can specify the path to a Jinja template with the `--template` argument, allowing you to output files in any format.
|
|
1162
|
+
|
|
1163
|
+
```shell
|
|
1164
|
+
datacontract export --format custom --template template.txt datacontract.yaml
|
|
1165
|
+
```
|
|
1166
|
+
|
|
1167
|
+
##### Jinja variables
|
|
1168
|
+
|
|
1169
|
+
You can directly use the Data Contract Specification as template variables.
|
|
1170
|
+
|
|
1171
|
+
```shell
|
|
1172
|
+
$ cat template.txt
|
|
1173
|
+
title: {{ data_contract.info.title }}
|
|
1174
|
+
|
|
1175
|
+
$ datacontract export --format custom --template template.txt datacontract.yaml
|
|
1176
|
+
title: Orders Latest
|
|
1177
|
+
```
|
|
1178
|
+
|
|
1179
|
+
##### Example Jinja Templates
|
|
1180
|
+
|
|
1181
|
+
###### Customized dbt model
|
|
1182
|
+
|
|
1183
|
+
You can export the dbt models containing any logic.
|
|
1184
|
+
|
|
1185
|
+
Below is an example of a dbt staging layer that converts a field of `type: timestamp` to a `DATETIME` type with time zone conversion.
|
|
1186
|
+
|
|
1187
|
+
template.sql
|
|
1188
|
+
|
|
1189
|
+
{% raw %}
|
|
1190
|
+
```sql
|
|
1191
|
+
{%- for model_name, model in data_contract.models.items() %}
|
|
1192
|
+
{#- Export only the first model #}
|
|
1193
|
+
{%- if loop.first -%}
|
|
1194
|
+
SELECT
|
|
1195
|
+
{%- for field_name, field in model.fields.items() %}
|
|
1196
|
+
{%- if field.type == "timestamp" %}
|
|
1197
|
+
DATETIME({{ field_name }}, "Asia/Tokyo") AS {{ field_name }},
|
|
1198
|
+
{%- else %}
|
|
1199
|
+
{{ field_name }} AS {{ field_name }},
|
|
1200
|
+
{%- endif %}
|
|
1201
|
+
{%- endfor %}
|
|
1202
|
+
FROM
|
|
1203
|
+
{{ "{{" }} ref('{{ model_name }}') {{ "}}" }}
|
|
1204
|
+
{%- endif %}
|
|
1205
|
+
{%- endfor %}
|
|
1206
|
+
```
|
|
1207
|
+
{% endraw %}
|
|
1208
|
+
|
|
1209
|
+
command
|
|
1210
|
+
|
|
1211
|
+
```shell
|
|
1212
|
+
datacontract export --format custom --template template.sql --output output.sql datacontract.yaml
|
|
1213
|
+
```
|
|
1214
|
+
|
|
1215
|
+
output.sql
|
|
1216
|
+
|
|
1217
|
+
```sql
|
|
1218
|
+
SELECT
|
|
1219
|
+
order_id AS order_id,
|
|
1220
|
+
DATETIME(order_timestamp, "Asia/Tokyo") AS order_timestamp,
|
|
1221
|
+
order_total AS order_total,
|
|
1222
|
+
customer_id AS customer_id,
|
|
1223
|
+
customer_email_address AS customer_email_address,
|
|
1224
|
+
DATETIME(processed_timestamp, "Asia/Tokyo") AS processed_timestamp,
|
|
1225
|
+
FROM
|
|
1226
|
+
{{ ref('orders') }}
|
|
1227
|
+
```
|
|
1228
|
+
|
|
1139
1229
|
### import
|
|
1140
1230
|
```
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
│
|
|
1149
|
-
│
|
|
1150
|
-
│
|
|
1151
|
-
│
|
|
1152
|
-
│
|
|
1153
|
-
│
|
|
1154
|
-
│
|
|
1155
|
-
│
|
|
1156
|
-
│
|
|
1157
|
-
│
|
|
1158
|
-
│
|
|
1159
|
-
│
|
|
1160
|
-
│
|
|
1161
|
-
│
|
|
1162
|
-
│
|
|
1163
|
-
│
|
|
1164
|
-
│
|
|
1165
|
-
│
|
|
1166
|
-
│
|
|
1167
|
-
│
|
|
1168
|
-
│
|
|
1169
|
-
│
|
|
1170
|
-
│
|
|
1171
|
-
│
|
|
1172
|
-
│
|
|
1173
|
-
│
|
|
1174
|
-
│
|
|
1175
|
-
│
|
|
1176
|
-
│
|
|
1177
|
-
│
|
|
1178
|
-
│
|
|
1179
|
-
│
|
|
1180
|
-
│
|
|
1181
|
-
│
|
|
1182
|
-
│
|
|
1183
|
-
│
|
|
1184
|
-
│
|
|
1185
|
-
│
|
|
1186
|
-
│
|
|
1187
|
-
│
|
|
1188
|
-
│
|
|
1189
|
-
│ --
|
|
1190
|
-
│
|
|
1191
|
-
│
|
|
1192
|
-
│
|
|
1193
|
-
│
|
|
1194
|
-
│
|
|
1195
|
-
│
|
|
1196
|
-
│
|
|
1197
|
-
│ --
|
|
1198
|
-
│
|
|
1199
|
-
│
|
|
1200
|
-
│
|
|
1201
|
-
│
|
|
1202
|
-
│
|
|
1203
|
-
│
|
|
1204
|
-
│
|
|
1205
|
-
|
|
1206
|
-
│ to import from the │
|
|
1207
|
-
│ DBML file (repeat for │
|
|
1208
|
-
│ multiple table names, │
|
|
1209
|
-
│ leave empty for all │
|
|
1210
|
-
│ tables in the file). │
|
|
1211
|
-
│ [default: None] │
|
|
1212
|
-
│ --iceberg-table TEXT Table name to assign │
|
|
1213
|
-
│ to the model created │
|
|
1214
|
-
│ from the Iceberg │
|
|
1215
|
-
│ schema. │
|
|
1216
|
-
│ [default: None] │
|
|
1217
|
-
│ --template TEXT The location (url or │
|
|
1218
|
-
│ path) of the Data │
|
|
1219
|
-
│ Contract │
|
|
1220
|
-
│ Specification │
|
|
1221
|
-
│ Template │
|
|
1222
|
-
│ [default: None] │
|
|
1223
|
-
│ --schema TEXT The location (url or │
|
|
1224
|
-
│ path) of the Data │
|
|
1225
|
-
│ Contract │
|
|
1226
|
-
│ Specification JSON │
|
|
1227
|
-
│ Schema │
|
|
1228
|
-
│ [default: │
|
|
1229
|
-
│ https://datacontract… │
|
|
1230
|
-
│ --help Show this message and │
|
|
1231
|
-
│ exit. │
|
|
1232
|
-
╰──────────────────────────────────────────────────────────────────────────────╯
|
|
1231
|
+
Usage: datacontract import [OPTIONS]
|
|
1232
|
+
|
|
1233
|
+
Create a data contract from the given source location. Saves to file specified by `output` option if present,
|
|
1234
|
+
otherwise prints to stdout.
|
|
1235
|
+
|
|
1236
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
1237
|
+
│ * --format [sql|avro|dbt|dbml|glue|jsonschema|bi The format of the source file. │
|
|
1238
|
+
│ gquery|odcs|unity|spark|iceberg|parqu [default: None] │
|
|
1239
|
+
│ et|csv] [required] │
|
|
1240
|
+
│ --output PATH Specify the file path where the Data │
|
|
1241
|
+
│ Contract will be saved. If no path is │
|
|
1242
|
+
│ provided, the output will be printed │
|
|
1243
|
+
│ to stdout. │
|
|
1244
|
+
│ [default: None] │
|
|
1245
|
+
│ --source TEXT The path to the file or Glue Database │
|
|
1246
|
+
│ that should be imported. │
|
|
1247
|
+
│ [default: None] │
|
|
1248
|
+
│ --dialect TEXT The SQL dialect to use when importing │
|
|
1249
|
+
│ SQL files, e.g., postgres, tsql, │
|
|
1250
|
+
│ bigquery. │
|
|
1251
|
+
│ [default: None] │
|
|
1252
|
+
│ --glue-table TEXT List of table ids to import from the │
|
|
1253
|
+
│ Glue Database (repeat for multiple │
|
|
1254
|
+
│ table ids, leave empty for all tables │
|
|
1255
|
+
│ in the dataset). │
|
|
1256
|
+
│ [default: None] │
|
|
1257
|
+
│ --bigquery-project TEXT The bigquery project id. │
|
|
1258
|
+
│ [default: None] │
|
|
1259
|
+
│ --bigquery-dataset TEXT The bigquery dataset id. │
|
|
1260
|
+
│ [default: None] │
|
|
1261
|
+
│ --bigquery-table TEXT List of table ids to import from the │
|
|
1262
|
+
│ bigquery API (repeat for multiple │
|
|
1263
|
+
│ table ids, leave empty for all tables │
|
|
1264
|
+
│ in the dataset). │
|
|
1265
|
+
│ [default: None] │
|
|
1266
|
+
│ --unity-table-full-name TEXT Full name of a table in the unity │
|
|
1267
|
+
│ catalog │
|
|
1268
|
+
│ [default: None] │
|
|
1269
|
+
│ --dbt-model TEXT List of models names to import from │
|
|
1270
|
+
│ the dbt manifest file (repeat for │
|
|
1271
|
+
│ multiple models names, leave empty │
|
|
1272
|
+
│ for all models in the dataset). │
|
|
1273
|
+
│ [default: None] │
|
|
1274
|
+
│ --dbml-schema TEXT List of schema names to import from │
|
|
1275
|
+
│ the DBML file (repeat for multiple │
|
|
1276
|
+
│ schema names, leave empty for all │
|
|
1277
|
+
│ tables in the file). │
|
|
1278
|
+
│ [default: None] │
|
|
1279
|
+
│ --dbml-table TEXT List of table names to import from │
|
|
1280
|
+
│ the DBML file (repeat for multiple │
|
|
1281
|
+
│ table names, leave empty for all │
|
|
1282
|
+
│ tables in the file). │
|
|
1283
|
+
│ [default: None] │
|
|
1284
|
+
│ --iceberg-table TEXT Table name to assign to the model │
|
|
1285
|
+
│ created from the Iceberg schema. │
|
|
1286
|
+
│ [default: None] │
|
|
1287
|
+
│ --template TEXT The location (url or path) of the │
|
|
1288
|
+
│ Data Contract Specification Template │
|
|
1289
|
+
│ [default: None] │
|
|
1290
|
+
│ --schema TEXT The location (url or path) of the │
|
|
1291
|
+
│ Data Contract Specification JSON │
|
|
1292
|
+
│ Schema │
|
|
1293
|
+
│ [default: None] │
|
|
1294
|
+
│ --help Show this message and exit. │
|
|
1295
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1233
1296
|
|
|
1234
1297
|
```
|
|
1235
1298
|
|
|
1236
1299
|
Example:
|
|
1237
1300
|
```bash
|
|
1238
1301
|
# Example import from SQL DDL
|
|
1239
|
-
datacontract import --format sql --source my_ddl.sql
|
|
1302
|
+
datacontract import --format sql --source my_ddl.sql --dialect postgres
|
|
1240
1303
|
# To save to file
|
|
1241
|
-
datacontract import --format sql --source my_ddl.sql --output datacontract.yaml
|
|
1304
|
+
datacontract import --format sql --source my_ddl.sql --dialect postgres --output datacontract.yaml
|
|
1242
1305
|
```
|
|
1243
1306
|
|
|
1244
1307
|
Available import options:
|
|
@@ -1540,18 +1603,23 @@ datacontract catalog --files "*.odcs.yaml"
|
|
|
1540
1603
|
|
|
1541
1604
|
```
|
|
1542
1605
|
|
|
1543
|
-
###
|
|
1606
|
+
### api
|
|
1544
1607
|
```
|
|
1545
|
-
|
|
1546
|
-
Usage: datacontract
|
|
1547
|
-
|
|
1548
|
-
Start the datacontract
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1608
|
+
|
|
1609
|
+
Usage: datacontract api [OPTIONS]
|
|
1610
|
+
|
|
1611
|
+
Start the datacontract CLI as server application with REST API.
|
|
1612
|
+
The OpenAPI documentation as Swagger UI is available on http://localhost:4242. You can execute the commands directly from the Swagger UI.
|
|
1613
|
+
To protect the API, you can set the environment variable DATACONTRACT_CLI_API_KEY to a secret API key. To authenticate, requests must include the header 'x-api-key' with the
|
|
1614
|
+
correct API key. This is highly recommended, as data contract tests may be subject to SQL injections or leak sensitive information.
|
|
1615
|
+
To connect to servers (such as a Snowflake data source), set the credentials as environment variables as documented in https://cli.datacontract.com/#test
|
|
1616
|
+
|
|
1617
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
1618
|
+
│ --port INTEGER Bind socket to this port. [default: 4242] │
|
|
1619
|
+
│ --host TEXT Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0 [default: 127.0.0.1] │
|
|
1620
|
+
│ --help Show this message and exit. │
|
|
1621
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1622
|
+
|
|
1555
1623
|
|
|
1556
1624
|
```
|
|
1557
1625
|
|
|
@@ -1588,24 +1656,20 @@ Create a data contract based on the actual data. This is the fastest way to get
|
|
|
1588
1656
|
$ datacontract test
|
|
1589
1657
|
```
|
|
1590
1658
|
|
|
1591
|
-
2. Add
|
|
1659
|
+
2. Add quality checks and additional type constraints one by one to the contract and make sure the
|
|
1660
|
+
data still adheres to the contract.
|
|
1592
1661
|
```bash
|
|
1593
|
-
$ datacontract test --examples
|
|
1594
|
-
```
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
3. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
|
|
1598
|
-
```bash
|
|
1599
|
-
$ datacontract test --examples
|
|
1600
1662
|
$ datacontract test
|
|
1601
1663
|
```
|
|
1602
1664
|
|
|
1603
|
-
|
|
1665
|
+
3. Make sure that all the best practices for a `datacontract.yaml` are met using the linter. You
|
|
1666
|
+
probably forgot to document some fields and add the terms and conditions.
|
|
1604
1667
|
```bash
|
|
1605
1668
|
$ datacontract lint
|
|
1606
1669
|
```
|
|
1607
1670
|
|
|
1608
|
-
|
|
1671
|
+
4. Set up a CI pipeline that executes daily for continuous quality checks. You can also report the
|
|
1672
|
+
test results to tools like [Data Mesh Manager](https://datamesh-manager.com)
|
|
1609
1673
|
```bash
|
|
1610
1674
|
$ datacontract test --publish https://api.datamesh-manager.com/api/test-results
|
|
1611
1675
|
```
|
|
@@ -1619,30 +1683,15 @@ Create a data contract based on the requirements from use cases.
|
|
|
1619
1683
|
$ datacontract init
|
|
1620
1684
|
```
|
|
1621
1685
|
|
|
1622
|
-
2.
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
```bash
|
|
1626
|
-
$ datacontract test --examples
|
|
1627
|
-
```
|
|
1628
|
-
|
|
1629
|
-
4. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
|
|
1630
|
-
```bash
|
|
1631
|
-
$ datacontract test --examples
|
|
1632
|
-
```
|
|
1633
|
-
|
|
1634
|
-
5. Fill in the terms, descriptions, etc. Make sure you follow all best practices for a `datacontract.yaml` using the linter.
|
|
1686
|
+
2. Create the model and quality guarantees based on your business requirements. Fill in the terms,
|
|
1687
|
+
descriptions, etc. Make sure you follow all best practices for a `datacontract.yaml` using the
|
|
1688
|
+
linter.
|
|
1635
1689
|
```bash
|
|
1636
1690
|
$ datacontract lint
|
|
1637
1691
|
```
|
|
1638
1692
|
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
$ datacontract lint
|
|
1642
|
-
$ datacontract test --examples
|
|
1643
|
-
```
|
|
1644
|
-
|
|
1645
|
-
7. Use the export function to start building the providing data product as well as the integration into the consuming data products.
|
|
1693
|
+
3. Use the export function to start building the providing data product as well as the integration
|
|
1694
|
+
into the consuming data products.
|
|
1646
1695
|
```bash
|
|
1647
1696
|
# data provider
|
|
1648
1697
|
$ datacontract export --format dbt
|
|
@@ -1651,6 +1700,11 @@ Create a data contract based on the requirements from use cases.
|
|
|
1651
1700
|
$ datacontract export --format dbt-staging-sql
|
|
1652
1701
|
```
|
|
1653
1702
|
|
|
1703
|
+
4. Test that your data product implementation adheres to the contract.
|
|
1704
|
+
```bash
|
|
1705
|
+
$ datacontract test
|
|
1706
|
+
```
|
|
1707
|
+
|
|
1654
1708
|
### Schema Evolution
|
|
1655
1709
|
|
|
1656
1710
|
#### Non-breaking Changes
|
|
@@ -1709,7 +1763,7 @@ class CustomExporter(Exporter):
|
|
|
1709
1763
|
|
|
1710
1764
|
|
|
1711
1765
|
# Register the new custom class into factory
|
|
1712
|
-
exporter_factory.register_exporter("
|
|
1766
|
+
exporter_factory.register_exporter("custom_exporter", CustomExporter)
|
|
1713
1767
|
|
|
1714
1768
|
|
|
1715
1769
|
if __name__ == "__main__":
|
|
@@ -1719,7 +1773,7 @@ if __name__ == "__main__":
|
|
|
1719
1773
|
)
|
|
1720
1774
|
# Call export
|
|
1721
1775
|
result = data_contract.export(
|
|
1722
|
-
export_format="
|
|
1776
|
+
export_format="custom_exporter", model="orders", server="production", custom_arg="my_custom_arg"
|
|
1723
1777
|
)
|
|
1724
1778
|
print(result)
|
|
1725
1779
|
|