datacontract-cli 0.10.4__py3-none-any.whl → 0.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +3 -3
- datacontract/catalog/catalog.py +1 -2
- datacontract/cli.py +11 -5
- datacontract/data_contract.py +32 -16
- datacontract/engines/soda/check_soda_execute.py +5 -0
- datacontract/engines/soda/connections/duckdb.py +1 -2
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/export/avro_converter.py +8 -1
- datacontract/export/bigquery_converter.py +17 -16
- datacontract/export/dbml_converter.py +118 -0
- datacontract/export/html_export.py +2 -3
- datacontract/export/jsonschema_converter.py +4 -5
- datacontract/export/rdf_converter.py +1 -2
- datacontract/export/sql_type_converter.py +88 -8
- datacontract/imports/avro_importer.py +32 -18
- datacontract/imports/bigquery_importer.py +28 -16
- datacontract/imports/jsonschema_importer.py +80 -82
- datacontract/imports/sql_importer.py +2 -2
- datacontract/lint/resolve.py +1 -2
- datacontract/model/data_contract_specification.py +10 -0
- datacontract/py.typed +0 -0
- datacontract/templates/index.html +6 -6
- datacontract/templates/style/output.css +19 -14
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.5.dist-info}/METADATA +169 -75
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.5.dist-info}/RECORD +29 -26
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.5.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.5.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.5.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.5.dist-info}/top_level.txt +0 -0
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
<div class="min-h-full flex flex-col">
|
|
15
15
|
|
|
16
16
|
<nav class="bg-white shadow-sm">
|
|
17
|
-
<div class="mx-auto max-w-7xl px-
|
|
17
|
+
<div class="mx-auto max-w-7xl px-6 lg:px-8">
|
|
18
18
|
<div class="flex h-16 justify-between">
|
|
19
19
|
<div class="flex">
|
|
20
20
|
<div class="flex flex-shrink-0 items-center mr-6">
|
|
@@ -32,9 +32,9 @@
|
|
|
32
32
|
|
|
33
33
|
<main class="pb-7">
|
|
34
34
|
|
|
35
|
-
<div class="pt-5 mx-auto max-w-7xl
|
|
35
|
+
<div class="pt-5 mx-auto max-w-7xl px-6 lg:px-8">
|
|
36
36
|
<div>
|
|
37
|
-
<div class="lg:flex lg:items-center lg:justify-between px-
|
|
37
|
+
<div class="lg:flex lg:items-center lg:justify-between px-0">
|
|
38
38
|
<div class="min-w-0 flex-1">
|
|
39
39
|
<h2 class="text-2xl font-bold leading-7 text-gray-900 sm:truncate sm:text-3xl sm:tracking-tight">
|
|
40
40
|
Data Contract Catalog</h2>
|
|
@@ -54,8 +54,8 @@
|
|
|
54
54
|
|
|
55
55
|
<form aria-labelledby="filter-heading" class="pb-4" >
|
|
56
56
|
<h2 id="filter-heading" class="sr-only">Filters</h2>
|
|
57
|
-
<div class="flex items-center justify-between">
|
|
58
|
-
<div class="
|
|
57
|
+
<div class="sm:flex items-center justify-between">
|
|
58
|
+
<div class="sm:flex items-baseline space-x-8">
|
|
59
59
|
<div>
|
|
60
60
|
<div class="relative rounded-md shadow-sm">
|
|
61
61
|
<div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3">
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
<path fill-rule="evenodd" d="M9 3.5a5.5 5.5 0 100 11 5.5 5.5 0 000-11zM2 9a7 7 0 1112.452 4.391l3.328 3.329a.75.75 0 11-1.06 1.06l-3.329-3.328A7 7 0 012 9z" clip-rule="evenodd" />
|
|
64
64
|
</svg>
|
|
65
65
|
</div>
|
|
66
|
-
<input type="text" name="q" id="search" class="block w-72 rounded-md border-0 py-1.5 pl-10 text-gray-900 ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" placeholder="Search">
|
|
66
|
+
<input type="text" name="q" id="search" class="block w-full sm:w-72 rounded-md border-0 py-1.5 pl-10 text-gray-900 ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" placeholder="Search">
|
|
67
67
|
</div>
|
|
68
68
|
</div>
|
|
69
69
|
</div>
|
|
@@ -781,10 +781,6 @@ video {
|
|
|
781
781
|
width: 58.333333%;
|
|
782
782
|
}
|
|
783
783
|
|
|
784
|
-
.w-72 {
|
|
785
|
-
width: 18rem;
|
|
786
|
-
}
|
|
787
|
-
|
|
788
784
|
.w-9\/12 {
|
|
789
785
|
width: 75%;
|
|
790
786
|
}
|
|
@@ -841,6 +837,10 @@ video {
|
|
|
841
837
|
align-items: center;
|
|
842
838
|
}
|
|
843
839
|
|
|
840
|
+
.items-baseline {
|
|
841
|
+
align-items: baseline;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
844
|
.justify-center {
|
|
845
845
|
justify-content: center;
|
|
846
846
|
}
|
|
@@ -888,6 +888,12 @@ video {
|
|
|
888
888
|
margin-left: calc(1.5rem * calc(1 - var(--tw-space-x-reverse)));
|
|
889
889
|
}
|
|
890
890
|
|
|
891
|
+
.space-x-8 > :not([hidden]) ~ :not([hidden]) {
|
|
892
|
+
--tw-space-x-reverse: 0;
|
|
893
|
+
margin-right: calc(2rem * var(--tw-space-x-reverse));
|
|
894
|
+
margin-left: calc(2rem * calc(1 - var(--tw-space-x-reverse)));
|
|
895
|
+
}
|
|
896
|
+
|
|
891
897
|
.space-y-6 > :not([hidden]) ~ :not([hidden]) {
|
|
892
898
|
--tw-space-y-reverse: 0;
|
|
893
899
|
margin-top: calc(1.5rem * calc(1 - var(--tw-space-y-reverse)));
|
|
@@ -1004,6 +1010,11 @@ video {
|
|
|
1004
1010
|
padding: 1.5rem;
|
|
1005
1011
|
}
|
|
1006
1012
|
|
|
1013
|
+
.px-0 {
|
|
1014
|
+
padding-left: 0px;
|
|
1015
|
+
padding-right: 0px;
|
|
1016
|
+
}
|
|
1017
|
+
|
|
1007
1018
|
.px-1 {
|
|
1008
1019
|
padding-left: 0.25rem;
|
|
1009
1020
|
padding-right: 0.25rem;
|
|
@@ -1376,6 +1387,10 @@ video {
|
|
|
1376
1387
|
width: 80%;
|
|
1377
1388
|
}
|
|
1378
1389
|
|
|
1390
|
+
.sm\:w-72 {
|
|
1391
|
+
width: 18rem;
|
|
1392
|
+
}
|
|
1393
|
+
|
|
1379
1394
|
.sm\:grid-cols-2 {
|
|
1380
1395
|
grid-template-columns: repeat(2, minmax(0, 1fr));
|
|
1381
1396
|
}
|
|
@@ -1392,22 +1407,12 @@ video {
|
|
|
1392
1407
|
align-items: center;
|
|
1393
1408
|
}
|
|
1394
1409
|
|
|
1395
|
-
.sm\:items-baseline {
|
|
1396
|
-
align-items: baseline;
|
|
1397
|
-
}
|
|
1398
|
-
|
|
1399
1410
|
.sm\:space-x-6 > :not([hidden]) ~ :not([hidden]) {
|
|
1400
1411
|
--tw-space-x-reverse: 0;
|
|
1401
1412
|
margin-right: calc(1.5rem * var(--tw-space-x-reverse));
|
|
1402
1413
|
margin-left: calc(1.5rem * calc(1 - var(--tw-space-x-reverse)));
|
|
1403
1414
|
}
|
|
1404
1415
|
|
|
1405
|
-
.sm\:space-x-8 > :not([hidden]) ~ :not([hidden]) {
|
|
1406
|
-
--tw-space-x-reverse: 0;
|
|
1407
|
-
margin-right: calc(2rem * var(--tw-space-x-reverse));
|
|
1408
|
-
margin-left: calc(2rem * calc(1 - var(--tw-space-x-reverse)));
|
|
1409
|
-
}
|
|
1410
|
-
|
|
1411
1416
|
.sm\:truncate {
|
|
1412
1417
|
overflow: hidden;
|
|
1413
1418
|
text-overflow: ellipsis;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.5
|
|
4
4
|
Summary: Test data contracts
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
|
|
6
6
|
Project-URL: Homepage, https://cli.datacontract.com
|
|
@@ -14,30 +14,33 @@ License-File: LICENSE
|
|
|
14
14
|
Requires-Dist: typer[all] <0.13,>=0.9
|
|
15
15
|
Requires-Dist: pydantic <2.8.0,>=2.5.3
|
|
16
16
|
Requires-Dist: pyyaml ~=6.0.1
|
|
17
|
-
Requires-Dist: requests
|
|
17
|
+
Requires-Dist: requests <2.33,>=2.31
|
|
18
18
|
Requires-Dist: fastapi ==0.111.0
|
|
19
|
-
Requires-Dist: fastparquet ==2024.
|
|
19
|
+
Requires-Dist: fastparquet ==2024.5.0
|
|
20
20
|
Requires-Dist: python-multipart ==0.0.9
|
|
21
21
|
Requires-Dist: rich ~=13.7.0
|
|
22
|
-
Requires-Dist: simple-ddl-parser ==1.
|
|
22
|
+
Requires-Dist: simple-ddl-parser ==1.5.1
|
|
23
23
|
Requires-Dist: soda-core-bigquery <3.4.0,>=3.3.1
|
|
24
24
|
Requires-Dist: soda-core-duckdb <3.4.0,>=3.3.1
|
|
25
|
+
Requires-Dist: soda-core-sqlserver <3.4.0,>=3.3.1
|
|
25
26
|
Requires-Dist: soda-core-postgres <3.4.0,>=3.3.1
|
|
26
27
|
Requires-Dist: soda-core-snowflake <3.4.0,>=3.3.1
|
|
27
28
|
Requires-Dist: soda-core-spark[databricks] <3.4.0,>=3.3.1
|
|
29
|
+
Requires-Dist: databricks-sql-connector <3.2.0,>=3.1.2
|
|
28
30
|
Requires-Dist: soda-core-spark-df <3.4.0,>=3.3.1
|
|
31
|
+
Requires-Dist: setuptools >=60
|
|
29
32
|
Requires-Dist: snowflake-connector-python[pandas] <3.11,>=3.6
|
|
30
|
-
Requires-Dist: duckdb ==0.10.
|
|
33
|
+
Requires-Dist: duckdb ==0.10.3
|
|
31
34
|
Requires-Dist: fastjsonschema ~=2.19.1
|
|
32
35
|
Requires-Dist: python-dotenv ~=1.0.0
|
|
33
|
-
Requires-Dist: s3fs ==2024.
|
|
36
|
+
Requires-Dist: s3fs ==2024.5.0
|
|
34
37
|
Requires-Dist: rdflib ==7.0.0
|
|
35
38
|
Requires-Dist: avro ==1.11.3
|
|
36
|
-
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc ~=1.16
|
|
37
|
-
Requires-Dist: opentelemetry-exporter-otlp-proto-http ~=1.16
|
|
39
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc ~=1.16
|
|
40
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http ~=1.16
|
|
38
41
|
Requires-Dist: deltalake ~=0.17.0
|
|
39
|
-
Requires-Dist: boto3 <1.34.
|
|
40
|
-
Requires-Dist: botocore <1.34.
|
|
42
|
+
Requires-Dist: boto3 <1.34.114,>=1.34.41
|
|
43
|
+
Requires-Dist: botocore <1.34.114,>=1.34.41
|
|
41
44
|
Requires-Dist: jinja-partials >=0.2.1
|
|
42
45
|
Provides-Extra: dev
|
|
43
46
|
Requires-Dist: httpx ==0.27.0 ; extra == 'dev'
|
|
@@ -45,10 +48,12 @@ Requires-Dist: ruff ; extra == 'dev'
|
|
|
45
48
|
Requires-Dist: pytest ; extra == 'dev'
|
|
46
49
|
Requires-Dist: pytest-xdist ; extra == 'dev'
|
|
47
50
|
Requires-Dist: moto ; extra == 'dev'
|
|
51
|
+
Requires-Dist: pymssql ==2.3.0 ; extra == 'dev'
|
|
48
52
|
Requires-Dist: testcontainers <4.0.0 ; extra == 'dev'
|
|
49
53
|
Requires-Dist: testcontainers-minio ; extra == 'dev'
|
|
50
54
|
Requires-Dist: testcontainers-postgres ; extra == 'dev'
|
|
51
55
|
Requires-Dist: testcontainers-kafka ; extra == 'dev'
|
|
56
|
+
Requires-Dist: testcontainers[mssql] ; extra == 'dev'
|
|
52
57
|
|
|
53
58
|
# Data Contract CLI
|
|
54
59
|
|
|
@@ -68,7 +73,7 @@ It uses data contract YAML files to lint the data contract, connect to data sour
|
|
|
68
73
|
|
|
69
74
|
## Getting started
|
|
70
75
|
|
|
71
|
-
Let's look at this data contract:
|
|
76
|
+
Let's look at this data contract:
|
|
72
77
|
[https://datacontract.com/examples/orders-latest/datacontract.yaml](https://datacontract.com/examples/orders-latest/datacontract.yaml)
|
|
73
78
|
|
|
74
79
|
We have a _servers_ section with endpoint details to the S3 bucket, _models_ for the structure of the data, _servicelevels_ and _quality_ attributes that describe the expected freshness and number of rows.
|
|
@@ -240,11 +245,11 @@ Commands
|
|
|
240
245
|
|
|
241
246
|
### init
|
|
242
247
|
|
|
243
|
-
```
|
|
244
|
-
Usage: datacontract init [OPTIONS] [LOCATION]
|
|
245
|
-
|
|
246
|
-
Download a datacontract.yaml template and write it to file.
|
|
247
|
-
|
|
248
|
+
```
|
|
249
|
+
Usage: datacontract init [OPTIONS] [LOCATION]
|
|
250
|
+
|
|
251
|
+
Download a datacontract.yaml template and write it to file.
|
|
252
|
+
|
|
248
253
|
╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────╮
|
|
249
254
|
│ location [LOCATION] The location (url or path) of the data contract yaml to create. │
|
|
250
255
|
│ [default: datacontract.yaml] │
|
|
@@ -262,10 +267,10 @@ Commands
|
|
|
262
267
|
### lint
|
|
263
268
|
|
|
264
269
|
```
|
|
265
|
-
Usage: datacontract lint [OPTIONS] [LOCATION]
|
|
266
|
-
|
|
267
|
-
Validate that the datacontract.yaml is correctly formatted.
|
|
268
|
-
|
|
270
|
+
Usage: datacontract lint [OPTIONS] [LOCATION]
|
|
271
|
+
|
|
272
|
+
Validate that the datacontract.yaml is correctly formatted.
|
|
273
|
+
|
|
269
274
|
╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
270
275
|
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
|
|
271
276
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
@@ -279,10 +284,10 @@ Commands
|
|
|
279
284
|
### test
|
|
280
285
|
|
|
281
286
|
```
|
|
282
|
-
Usage: datacontract test [OPTIONS] [LOCATION]
|
|
283
|
-
|
|
284
|
-
Run schema and quality tests on configured servers.
|
|
285
|
-
|
|
287
|
+
Usage: datacontract test [OPTIONS] [LOCATION]
|
|
288
|
+
|
|
289
|
+
Run schema and quality tests on configured servers.
|
|
290
|
+
|
|
286
291
|
╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
287
292
|
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
|
|
288
293
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
@@ -315,11 +320,11 @@ Data Contract CLI connects to a data source and runs schema and quality tests to
|
|
|
315
320
|
$ datacontract test --server production datacontract.yaml
|
|
316
321
|
```
|
|
317
322
|
|
|
318
|
-
To connect to the databases the `server` block in the datacontract.yaml is used to set up the connection.
|
|
323
|
+
To connect to the databases the `server` block in the datacontract.yaml is used to set up the connection.
|
|
319
324
|
In addition, credentials, such as username and passwords, may be defined with environment variables.
|
|
320
325
|
|
|
321
326
|
The application uses different engines, based on the server `type`.
|
|
322
|
-
Internally, it connects with DuckDB, Spark, or a native connection and executes the most tests with _soda-core_ and _fastjsonschema_.
|
|
327
|
+
Internally, it connects with DuckDB, Spark, or a native connection and executes the most tests with _soda-core_ and _fastjsonschema_.
|
|
323
328
|
|
|
324
329
|
Credentials are provided with environment variables.
|
|
325
330
|
|
|
@@ -505,7 +510,7 @@ dbutils.library.restartPython()
|
|
|
505
510
|
from datacontract.data_contract import DataContract
|
|
506
511
|
|
|
507
512
|
data_contract = DataContract(
|
|
508
|
-
data_contract_file="/Volumes/acme_catalog_prod/orders_latest/datacontract/datacontract.yaml",
|
|
513
|
+
data_contract_file="/Volumes/acme_catalog_prod/orders_latest/datacontract/datacontract.yaml",
|
|
509
514
|
spark=spark)
|
|
510
515
|
run = data_contract.test()
|
|
511
516
|
run.result
|
|
@@ -530,7 +535,7 @@ servers:
|
|
|
530
535
|
models:
|
|
531
536
|
my_table_1: # corresponds to a table
|
|
532
537
|
type: table
|
|
533
|
-
fields:
|
|
538
|
+
fields:
|
|
534
539
|
my_column_1: # corresponds to a column
|
|
535
540
|
type: varchar
|
|
536
541
|
```
|
|
@@ -588,7 +593,7 @@ servers:
|
|
|
588
593
|
models:
|
|
589
594
|
my_table_1: # corresponds to a table
|
|
590
595
|
type: table
|
|
591
|
-
fields:
|
|
596
|
+
fields:
|
|
592
597
|
my_column_1: # corresponds to a column
|
|
593
598
|
type: varchar
|
|
594
599
|
```
|
|
@@ -602,38 +607,78 @@ models:
|
|
|
602
607
|
|
|
603
608
|
|
|
604
609
|
|
|
610
|
+
|
|
611
|
+
### Postgres
|
|
612
|
+
|
|
613
|
+
Data Contract CLI can test data in Postgres or Postgres-compliant databases (e.g., RisingWave).
|
|
614
|
+
|
|
615
|
+
#### Example
|
|
616
|
+
|
|
617
|
+
datacontract.yaml
|
|
618
|
+
```yaml
|
|
619
|
+
servers:
|
|
620
|
+
postgres:
|
|
621
|
+
type: sqlserver
|
|
622
|
+
host: localhost
|
|
623
|
+
port: 5432
|
|
624
|
+
database: tempdb
|
|
625
|
+
schema: dbo
|
|
626
|
+
driver: ODBC Driver 18 for SQL Server
|
|
627
|
+
models:
|
|
628
|
+
my_table_1: # corresponds to a table
|
|
629
|
+
type: table
|
|
630
|
+
fields:
|
|
631
|
+
my_column_1: # corresponds to a column
|
|
632
|
+
type: varchar
|
|
633
|
+
```
|
|
634
|
+
|
|
635
|
+
#### Environment Variables
|
|
636
|
+
|
|
637
|
+
| Environment Variable | Example | Description |
|
|
638
|
+
|----------------------------------|--------------------|-------------|
|
|
639
|
+
| `DATACONTRACT_SQLSERVER_USERNAME` | `root` | Username |
|
|
640
|
+
| `DATACONTRACT_SQLSERVER_PASSWORD` | `toor` | Password |
|
|
641
|
+
| `DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION` | `True` | Use windows authentication, instead of login |
|
|
642
|
+
| `DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE` | `True` | Trust self-signed certificate |
|
|
643
|
+
| `DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION` | `True` | Use SSL |
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
|
|
605
647
|
### export
|
|
606
648
|
|
|
607
649
|
```
|
|
650
|
+
|
|
608
651
|
Usage: datacontract export [OPTIONS] [LOCATION]
|
|
609
652
|
|
|
610
653
|
Convert data contract to a specific format. Prints to stdout or to the specified output file.
|
|
611
654
|
|
|
612
|
-
╭─ Arguments
|
|
613
|
-
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml]
|
|
614
|
-
|
|
615
|
-
╭─ Options
|
|
616
|
-
│ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|
|
|
617
|
-
│ -sql|odcs|rdf|avro|protobuf|great-
|
|
618
|
-
│ -idl|sql|sql-query|html|
|
|
619
|
-
│
|
|
620
|
-
│
|
|
621
|
-
│
|
|
622
|
-
│
|
|
623
|
-
│
|
|
624
|
-
│
|
|
625
|
-
│
|
|
626
|
-
│
|
|
627
|
-
│
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
│
|
|
636
|
-
|
|
655
|
+
╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
656
|
+
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
|
|
657
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
658
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
659
|
+
│ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
|
|
660
|
+
│ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
|
|
661
|
+
│ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
|
|
662
|
+
│ y|dbml] │
|
|
663
|
+
│ --output PATH Specify the file path where the exported data will be │
|
|
664
|
+
│ saved. If no path is provided, the output will be │
|
|
665
|
+
│ printed to stdout. │
|
|
666
|
+
│ [default: None] │
|
|
667
|
+
│ --server TEXT The server name to export. [default: None] │
|
|
668
|
+
│ --model TEXT Use the key of the model in the data contract yaml │
|
|
669
|
+
│ file to refer to a model, e.g., `orders`, or `all` │
|
|
670
|
+
│ for all models (default). │
|
|
671
|
+
│ [default: all] │
|
|
672
|
+
│ --help Show this message and exit. │
|
|
673
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
674
|
+
╭─ RDF Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
675
|
+
│ --rdf-base TEXT [rdf] The base URI used to generate the RDF graph. [default: None] │
|
|
676
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
677
|
+
╭─ SQL Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
678
|
+
│ --sql-server-type TEXT [sql] The server type to determine the sql dialect. By default, it uses 'auto' to automatically │
|
|
679
|
+
│ detect the sql dialect via the specified servers in the data contract. │
|
|
680
|
+
│ [default: auto] │
|
|
681
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
637
682
|
|
|
638
683
|
```
|
|
639
684
|
|
|
@@ -646,9 +691,9 @@ Available export options:
|
|
|
646
691
|
|
|
647
692
|
| Type | Description | Status |
|
|
648
693
|
|----------------------|---------------------------------------------------------|--------|
|
|
649
|
-
| `html` | Export to HTML | ✅ |
|
|
650
|
-
| `jsonschema` | Export to JSON Schema | ✅ |
|
|
651
|
-
| `odcs` | Export to Open Data Contract Standard (ODCS) | ✅ |
|
|
694
|
+
| `html` | Export to HTML | ✅ |
|
|
695
|
+
| `jsonschema` | Export to JSON Schema | ✅ |
|
|
696
|
+
| `odcs` | Export to Open Data Contract Standard (ODCS) | ✅ |
|
|
652
697
|
| `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
|
|
653
698
|
| `dbt` | Export to dbt models in YAML format | ✅ |
|
|
654
699
|
| `dbt-sources` | Export to dbt sources in YAML format | ✅ |
|
|
@@ -663,15 +708,16 @@ Available export options:
|
|
|
663
708
|
| `bigquery` | Export to BigQuery Schemas | ✅ |
|
|
664
709
|
| `go` | Export to Go types | ✅ |
|
|
665
710
|
| `pydantic-model` | Export to pydantic models | ✅ |
|
|
711
|
+
| `DBML` | Export to a DBML Diagram description | ✅ |
|
|
666
712
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
667
713
|
|
|
668
714
|
#### Great Expectations
|
|
669
715
|
|
|
670
|
-
The export function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
|
|
716
|
+
The export function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
|
|
671
717
|
If the contract includes multiple models, you need to specify the names of the model you wish to export.
|
|
672
718
|
|
|
673
719
|
```shell
|
|
674
|
-
datacontract export datacontract.yaml --format great-expectations --model orders
|
|
720
|
+
datacontract export datacontract.yaml --format great-expectations --model orders
|
|
675
721
|
```
|
|
676
722
|
|
|
677
723
|
The export creates a list of expectations by utilizing:
|
|
@@ -681,7 +727,7 @@ The export creates a list of expectations by utilizing:
|
|
|
681
727
|
|
|
682
728
|
#### RDF
|
|
683
729
|
|
|
684
|
-
The export function converts a given data contract into a RDF representation. You have the option to
|
|
730
|
+
The export function converts a given data contract into a RDF representation. You have the option to
|
|
685
731
|
add a base_url which will be used as the default prefix to resolve relative IRIs inside the document.
|
|
686
732
|
|
|
687
733
|
```shell
|
|
@@ -703,6 +749,54 @@ Having the data contract inside an RDF Graph gives us access the following use c
|
|
|
703
749
|
- Apply graph algorithms on multiple data contracts (Find similar data contracts, find "gatekeeper"
|
|
704
750
|
data products, find the true domain owner of a field attribute)
|
|
705
751
|
|
|
752
|
+
#### DBML
|
|
753
|
+
|
|
754
|
+
The export function converts the logical data types of the datacontract into the specific ones of a concrete Database
|
|
755
|
+
if a server is selected via the `--server` option (based on the `type` of that server). If no server is selected, the
|
|
756
|
+
logical data types are exported.
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
#### Avro
|
|
760
|
+
|
|
761
|
+
The export function converts the data contract specification into an avro schema. It supports specifying custom avro properties for logicalTypes and default values.
|
|
762
|
+
|
|
763
|
+
##### Custom Avro Properties
|
|
764
|
+
|
|
765
|
+
We support a **config map on field level**. A config map may include any additional key-value pairs and support multiple server type bindings.
|
|
766
|
+
|
|
767
|
+
To specify custom Avro properties in your data contract, you can define them within the `config` section of your field definition. Below is an example of how to structure your YAML configuration to include custom Avro properties, such as `avroLogicalType` and `avroDefault`.
|
|
768
|
+
|
|
769
|
+
>NOTE: At this moment, we just support [logicalType](https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types) and [default](https://avro.apache.org/docs/1.11.0/spec.htm)
|
|
770
|
+
|
|
771
|
+
#### Example Configuration
|
|
772
|
+
|
|
773
|
+
```yaml
|
|
774
|
+
models:
|
|
775
|
+
orders:
|
|
776
|
+
fields:
|
|
777
|
+
my_field_1:
|
|
778
|
+
description: Example for AVRO with Timestamp (microsecond precision) https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29
|
|
779
|
+
type: long
|
|
780
|
+
example: 1672534861000000 # Equivalent to 2023-01-01 01:01:01 in microseconds
|
|
781
|
+
config:
|
|
782
|
+
avroLogicalType: local-timestamp-micros
|
|
783
|
+
avroDefault: 1672534861000000
|
|
784
|
+
```
|
|
785
|
+
|
|
786
|
+
#### Explanation
|
|
787
|
+
|
|
788
|
+
- **models**: The top-level key that contains different models (tables or objects) in your data contract.
|
|
789
|
+
- **orders**: A specific model name. Replace this with the name of your model.
|
|
790
|
+
- **fields**: The fields within the model. Each field can have various properties defined.
|
|
791
|
+
- **my_field_1**: The name of a specific field. Replace this with your field name.
|
|
792
|
+
- **description**: A textual description of the field.
|
|
793
|
+
- **type**: The data type of the field. In this example, it is `long`.
|
|
794
|
+
- **example**: An example value for the field.
|
|
795
|
+
- **config**: Section to specify custom Avro properties.
|
|
796
|
+
- **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
|
|
797
|
+
- **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
|
|
798
|
+
|
|
799
|
+
|
|
706
800
|
### import
|
|
707
801
|
|
|
708
802
|
```
|
|
@@ -727,7 +821,7 @@ In this case there's no need to specify `source` but instead `bt-project-id`, `b
|
|
|
727
821
|
|
|
728
822
|
For providing authentication to the Client, please see [the google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to) or the one [about authorizing client libraries](https://cloud.google.com/bigquery/docs/authentication#client-libs).
|
|
729
823
|
|
|
730
|
-
Example:
|
|
824
|
+
Example:
|
|
731
825
|
```bash
|
|
732
826
|
# Example import from SQL DDL
|
|
733
827
|
datacontract import --format sql --source my_ddl.sql
|
|
@@ -761,10 +855,10 @@ Available import options:
|
|
|
761
855
|
### breaking
|
|
762
856
|
|
|
763
857
|
```
|
|
764
|
-
Usage: datacontract breaking [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
765
|
-
|
|
766
|
-
Identifies breaking changes between data contracts. Prints to stdout.
|
|
767
|
-
|
|
858
|
+
Usage: datacontract breaking [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
859
|
+
|
|
860
|
+
Identifies breaking changes between data contracts. Prints to stdout.
|
|
861
|
+
|
|
768
862
|
╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
769
863
|
│ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required] │
|
|
770
864
|
│ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required] │
|
|
@@ -777,10 +871,10 @@ Available import options:
|
|
|
777
871
|
### changelog
|
|
778
872
|
|
|
779
873
|
```
|
|
780
|
-
Usage: datacontract changelog [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
781
|
-
|
|
782
|
-
Generate a changelog between data contracts. Prints to stdout.
|
|
783
|
-
|
|
874
|
+
Usage: datacontract changelog [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
875
|
+
|
|
876
|
+
Generate a changelog between data contracts. Prints to stdout.
|
|
877
|
+
|
|
784
878
|
╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
785
879
|
│ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required] │
|
|
786
880
|
│ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required] │
|
|
@@ -793,10 +887,10 @@ Available import options:
|
|
|
793
887
|
### diff
|
|
794
888
|
|
|
795
889
|
```
|
|
796
|
-
Usage: datacontract diff [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
797
|
-
|
|
798
|
-
PLACEHOLDER. Currently works as 'changelog' does.
|
|
799
|
-
|
|
890
|
+
Usage: datacontract diff [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
891
|
+
|
|
892
|
+
PLACEHOLDER. Currently works as 'changelog' does.
|
|
893
|
+
|
|
800
894
|
╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
801
895
|
│ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required] │
|
|
802
896
|
│ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required] │
|
|
@@ -928,14 +1022,14 @@ Create a data contract based on the requirements from use cases.
|
|
|
928
1022
|
```bash
|
|
929
1023
|
$ datacontract init
|
|
930
1024
|
```
|
|
931
|
-
|
|
1025
|
+
|
|
932
1026
|
2. Add examples to the `datacontract.yaml`. Do not start with the data model, although you are probably tempted to do that. Examples are the fastest way to get feedback from everybody and not loose someone in the discussion.
|
|
933
1027
|
|
|
934
1028
|
3. Create the model based on the examples. Test the model against the examples to double-check whether the model matches the examples.
|
|
935
1029
|
```bash
|
|
936
1030
|
$ datacontract test --examples
|
|
937
1031
|
```
|
|
938
|
-
|
|
1032
|
+
|
|
939
1033
|
4. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
|
|
940
1034
|
```bash
|
|
941
1035
|
$ datacontract test --examples
|