datacontract-cli 0.10.3__py3-none-any.whl → 0.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +12 -0
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/catalog/catalog.py +2 -2
- datacontract/cli.py +42 -8
- datacontract/data_contract.py +84 -134
- datacontract/engines/soda/check_soda_execute.py +5 -0
- datacontract/engines/soda/connections/duckdb.py +1 -2
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/export/avro_converter.py +23 -2
- datacontract/export/bigquery_converter.py +107 -0
- datacontract/export/dbml_converter.py +118 -0
- datacontract/export/go_converter.py +98 -0
- datacontract/export/html_export.py +4 -2
- datacontract/export/jsonschema_converter.py +41 -2
- datacontract/export/rdf_converter.py +1 -2
- datacontract/export/sql_converter.py +1 -0
- datacontract/export/sql_type_converter.py +125 -4
- datacontract/imports/avro_importer.py +41 -14
- datacontract/imports/bigquery_importer.py +178 -0
- datacontract/imports/jsonschema_importer.py +148 -0
- datacontract/imports/sql_importer.py +2 -2
- datacontract/lint/resolve.py +1 -2
- datacontract/model/data_contract_specification.py +65 -1
- datacontract/publish/publish.py +32 -0
- datacontract/py.typed +0 -0
- datacontract/templates/datacontract.html +37 -346
- datacontract/templates/index.html +70 -5
- datacontract/templates/partials/datacontract_information.html +66 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +44 -0
- datacontract/templates/partials/definition.html +99 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +97 -0
- datacontract/templates/partials/server.html +144 -0
- datacontract/templates/style/output.css +99 -13
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/METADATA +276 -139
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/RECORD +41 -26
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.5
|
|
4
4
|
Summary: Test data contracts
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
|
|
6
6
|
Project-URL: Homepage, https://cli.datacontract.com
|
|
@@ -14,40 +14,46 @@ License-File: LICENSE
|
|
|
14
14
|
Requires-Dist: typer[all] <0.13,>=0.9
|
|
15
15
|
Requires-Dist: pydantic <2.8.0,>=2.5.3
|
|
16
16
|
Requires-Dist: pyyaml ~=6.0.1
|
|
17
|
-
Requires-Dist: requests
|
|
18
|
-
Requires-Dist: fastapi ==0.
|
|
19
|
-
Requires-Dist: fastparquet ==2024.
|
|
17
|
+
Requires-Dist: requests <2.33,>=2.31
|
|
18
|
+
Requires-Dist: fastapi ==0.111.0
|
|
19
|
+
Requires-Dist: fastparquet ==2024.5.0
|
|
20
20
|
Requires-Dist: python-multipart ==0.0.9
|
|
21
21
|
Requires-Dist: rich ~=13.7.0
|
|
22
|
-
Requires-Dist: simple-ddl-parser ==1.1
|
|
22
|
+
Requires-Dist: simple-ddl-parser ==1.5.1
|
|
23
23
|
Requires-Dist: soda-core-bigquery <3.4.0,>=3.3.1
|
|
24
24
|
Requires-Dist: soda-core-duckdb <3.4.0,>=3.3.1
|
|
25
|
+
Requires-Dist: soda-core-sqlserver <3.4.0,>=3.3.1
|
|
25
26
|
Requires-Dist: soda-core-postgres <3.4.0,>=3.3.1
|
|
26
27
|
Requires-Dist: soda-core-snowflake <3.4.0,>=3.3.1
|
|
27
28
|
Requires-Dist: soda-core-spark[databricks] <3.4.0,>=3.3.1
|
|
29
|
+
Requires-Dist: databricks-sql-connector <3.2.0,>=3.1.2
|
|
28
30
|
Requires-Dist: soda-core-spark-df <3.4.0,>=3.3.1
|
|
29
|
-
Requires-Dist:
|
|
30
|
-
Requires-Dist:
|
|
31
|
+
Requires-Dist: setuptools >=60
|
|
32
|
+
Requires-Dist: snowflake-connector-python[pandas] <3.11,>=3.6
|
|
33
|
+
Requires-Dist: duckdb ==0.10.3
|
|
31
34
|
Requires-Dist: fastjsonschema ~=2.19.1
|
|
32
35
|
Requires-Dist: python-dotenv ~=1.0.0
|
|
33
|
-
Requires-Dist: s3fs ==2024.
|
|
36
|
+
Requires-Dist: s3fs ==2024.5.0
|
|
34
37
|
Requires-Dist: rdflib ==7.0.0
|
|
35
38
|
Requires-Dist: avro ==1.11.3
|
|
36
|
-
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc ~=1.16
|
|
37
|
-
Requires-Dist: opentelemetry-exporter-otlp-proto-http ~=1.16
|
|
39
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc ~=1.16
|
|
40
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http ~=1.16
|
|
38
41
|
Requires-Dist: deltalake ~=0.17.0
|
|
39
|
-
Requires-Dist: boto3 <1.34.
|
|
40
|
-
Requires-Dist: botocore <1.34.
|
|
42
|
+
Requires-Dist: boto3 <1.34.114,>=1.34.41
|
|
43
|
+
Requires-Dist: botocore <1.34.114,>=1.34.41
|
|
44
|
+
Requires-Dist: jinja-partials >=0.2.1
|
|
41
45
|
Provides-Extra: dev
|
|
42
46
|
Requires-Dist: httpx ==0.27.0 ; extra == 'dev'
|
|
43
47
|
Requires-Dist: ruff ; extra == 'dev'
|
|
44
48
|
Requires-Dist: pytest ; extra == 'dev'
|
|
45
49
|
Requires-Dist: pytest-xdist ; extra == 'dev'
|
|
46
50
|
Requires-Dist: moto ; extra == 'dev'
|
|
51
|
+
Requires-Dist: pymssql ==2.3.0 ; extra == 'dev'
|
|
47
52
|
Requires-Dist: testcontainers <4.0.0 ; extra == 'dev'
|
|
48
53
|
Requires-Dist: testcontainers-minio ; extra == 'dev'
|
|
49
54
|
Requires-Dist: testcontainers-postgres ; extra == 'dev'
|
|
50
55
|
Requires-Dist: testcontainers-kafka ; extra == 'dev'
|
|
56
|
+
Requires-Dist: testcontainers[mssql] ; extra == 'dev'
|
|
51
57
|
|
|
52
58
|
# Data Contract CLI
|
|
53
59
|
|
|
@@ -67,7 +73,7 @@ It uses data contract YAML files to lint the data contract, connect to data sour
|
|
|
67
73
|
|
|
68
74
|
## Getting started
|
|
69
75
|
|
|
70
|
-
Let's look at this data contract:
|
|
76
|
+
Let's look at this data contract:
|
|
71
77
|
[https://datacontract.com/examples/orders-latest/datacontract.yaml](https://datacontract.com/examples/orders-latest/datacontract.yaml)
|
|
72
78
|
|
|
73
79
|
We have a _servers_ section with endpoint details to the S3 bucket, _models_ for the structure of the data, _servicelevels_ and _quality_ attributes that describe the expected freshness and number of rows.
|
|
@@ -166,7 +172,7 @@ $ datacontract test --examples datacontract.yaml
|
|
|
166
172
|
# export data contract as html (other formats: avro, dbt, dbt-sources, dbt-staging-sql, jsonschema, odcs, rdf, sql, sodacl, terraform, ...)
|
|
167
173
|
$ datacontract export --format html datacontract.yaml > datacontract.html
|
|
168
174
|
|
|
169
|
-
# import avro (other formats: sql, ...)
|
|
175
|
+
# import avro (other formats: sql, glue, bigquery...)
|
|
170
176
|
$ datacontract import --format avro --source avro_schema.avsc
|
|
171
177
|
|
|
172
178
|
# find differences between to data contracts
|
|
@@ -235,75 +241,76 @@ Commands
|
|
|
235
241
|
- [changelog](#changelog)
|
|
236
242
|
- [diff](#diff)
|
|
237
243
|
- [catalog](#catalog)
|
|
244
|
+
- [publish](#publish)
|
|
238
245
|
|
|
239
246
|
### init
|
|
240
247
|
|
|
241
|
-
```
|
|
242
|
-
Usage: datacontract init [OPTIONS] [LOCATION]
|
|
243
|
-
|
|
244
|
-
Download a datacontract.yaml template and write it to file.
|
|
245
|
-
|
|
248
|
+
```
|
|
249
|
+
Usage: datacontract init [OPTIONS] [LOCATION]
|
|
250
|
+
|
|
251
|
+
Download a datacontract.yaml template and write it to file.
|
|
252
|
+
|
|
246
253
|
╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────╮
|
|
247
|
-
│ location [LOCATION] The location (url or path) of the data contract yaml to create.
|
|
248
|
-
│ [default: datacontract.yaml]
|
|
254
|
+
│ location [LOCATION] The location (url or path) of the data contract yaml to create. │
|
|
255
|
+
│ [default: datacontract.yaml] │
|
|
249
256
|
╰──────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
250
257
|
╭─ Options ────────────────────────────────────────────────────────────────────────────────────╮
|
|
251
|
-
│ --template TEXT URL of a template or data contract
|
|
252
|
-
│ [default:
|
|
253
|
-
│ https://datacontract.com/datacontract.init.yaml]
|
|
254
|
-
│ --overwrite --no-overwrite Replace the existing datacontract.yaml
|
|
255
|
-
│ [default: no-overwrite]
|
|
256
|
-
│ --help Show this message and exit.
|
|
258
|
+
│ --template TEXT URL of a template or data contract │
|
|
259
|
+
│ [default: │
|
|
260
|
+
│ https://datacontract.com/datacontract.init.yaml] │
|
|
261
|
+
│ --overwrite --no-overwrite Replace the existing datacontract.yaml │
|
|
262
|
+
│ [default: no-overwrite] │
|
|
263
|
+
│ --help Show this message and exit. │
|
|
257
264
|
╰──────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
258
265
|
```
|
|
259
266
|
|
|
260
267
|
### lint
|
|
261
268
|
|
|
262
269
|
```
|
|
263
|
-
Usage: datacontract lint [OPTIONS] [LOCATION]
|
|
264
|
-
|
|
265
|
-
Validate that the datacontract.yaml is correctly formatted.
|
|
266
|
-
|
|
270
|
+
Usage: datacontract lint [OPTIONS] [LOCATION]
|
|
271
|
+
|
|
272
|
+
Validate that the datacontract.yaml is correctly formatted.
|
|
273
|
+
|
|
267
274
|
╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
268
|
-
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml]
|
|
275
|
+
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
|
|
269
276
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
270
277
|
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
271
|
-
│ --schema TEXT The location (url or path) of the Data Contract Specification JSON Schema
|
|
272
|
-
│ [default: https://datacontract.com/datacontract.schema.json]
|
|
273
|
-
│ --help Show this message and exit.
|
|
278
|
+
│ --schema TEXT The location (url or path) of the Data Contract Specification JSON Schema │
|
|
279
|
+
│ [default: https://datacontract.com/datacontract.schema.json] │
|
|
280
|
+
│ --help Show this message and exit. │
|
|
274
281
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
275
282
|
```
|
|
276
283
|
|
|
277
284
|
### test
|
|
278
285
|
|
|
279
286
|
```
|
|
280
|
-
Usage: datacontract test [OPTIONS] [LOCATION]
|
|
281
|
-
|
|
282
|
-
Run schema and quality tests on configured servers.
|
|
283
|
-
|
|
287
|
+
Usage: datacontract test [OPTIONS] [LOCATION]
|
|
288
|
+
|
|
289
|
+
Run schema and quality tests on configured servers.
|
|
290
|
+
|
|
284
291
|
╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
285
|
-
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml]
|
|
292
|
+
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
|
|
286
293
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
287
294
|
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
288
|
-
│ --schema TEXT The location (url or path) of the Data Contract
|
|
289
|
-
│ Specification JSON Schema
|
|
290
|
-
│ [default:
|
|
291
|
-
│ https://datacontract.com/datacontract.schema.json]
|
|
292
|
-
│ --server TEXT The server configuration to run the schema and quality
|
|
293
|
-
│ tests. Use the key of the server object in the data
|
|
294
|
-
│ contract yaml file to refer to a server, e.g.,
|
|
295
|
-
│ `production`, or `all` for all servers (default).
|
|
296
|
-
│ [default: all]
|
|
297
|
-
│ --examples --no-examples Run the schema and quality tests on the example data
|
|
298
|
-
│ within the data contract.
|
|
299
|
-
│ [default: no-examples]
|
|
300
|
-
│ --publish TEXT The url to publish the results after the test
|
|
301
|
-
│ [default: None]
|
|
302
|
-
│ --publish-to-opentelemetry --no-publish-to-opentelemetry Publish the results to opentelemetry. Use environment
|
|
303
|
-
│ variables to configure the OTLP endpoint, headers, etc.
|
|
304
|
-
│ [default: no-publish-to-opentelemetry]
|
|
305
|
-
│ --logs --no-logs Print logs [default: no-logs]
|
|
306
|
-
│ --help Show this message and exit.
|
|
295
|
+
│ --schema TEXT The location (url or path) of the Data Contract │
|
|
296
|
+
│ Specification JSON Schema │
|
|
297
|
+
│ [default: │
|
|
298
|
+
│ https://datacontract.com/datacontract.schema.json] │
|
|
299
|
+
│ --server TEXT The server configuration to run the schema and quality │
|
|
300
|
+
│ tests. Use the key of the server object in the data │
|
|
301
|
+
│ contract yaml file to refer to a server, e.g., │
|
|
302
|
+
│ `production`, or `all` for all servers (default). │
|
|
303
|
+
│ [default: all] │
|
|
304
|
+
│ --examples --no-examples Run the schema and quality tests on the example data │
|
|
305
|
+
│ within the data contract. │
|
|
306
|
+
│ [default: no-examples] │
|
|
307
|
+
│ --publish TEXT The url to publish the results after the test │
|
|
308
|
+
│ [default: None] │
|
|
309
|
+
│ --publish-to-opentelemetry --no-publish-to-opentelemetry Publish the results to opentelemetry. Use environment │
|
|
310
|
+
│ variables to configure the OTLP endpoint, headers, etc. │
|
|
311
|
+
│ [default: no-publish-to-opentelemetry] │
|
|
312
|
+
│ --logs --no-logs Print logs [default: no-logs] │
|
|
313
|
+
│ --help Show this message and exit. │
|
|
307
314
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
308
315
|
```
|
|
309
316
|
|
|
@@ -313,11 +320,11 @@ Data Contract CLI connects to a data source and runs schema and quality tests to
|
|
|
313
320
|
$ datacontract test --server production datacontract.yaml
|
|
314
321
|
```
|
|
315
322
|
|
|
316
|
-
To connect to the databases the `server` block in the datacontract.yaml is used to set up the connection.
|
|
323
|
+
To connect to the databases the `server` block in the datacontract.yaml is used to set up the connection.
|
|
317
324
|
In addition, credentials, such as username and passwords, may be defined with environment variables.
|
|
318
325
|
|
|
319
326
|
The application uses different engines, based on the server `type`.
|
|
320
|
-
Internally, it connects with DuckDB, Spark, or a native connection and executes the most tests with _soda-core_ and _fastjsonschema_.
|
|
327
|
+
Internally, it connects with DuckDB, Spark, or a native connection and executes the most tests with _soda-core_ and _fastjsonschema_.
|
|
321
328
|
|
|
322
329
|
Credentials are provided with environment variables.
|
|
323
330
|
|
|
@@ -503,7 +510,7 @@ dbutils.library.restartPython()
|
|
|
503
510
|
from datacontract.data_contract import DataContract
|
|
504
511
|
|
|
505
512
|
data_contract = DataContract(
|
|
506
|
-
data_contract_file="/Volumes/acme_catalog_prod/orders_latest/datacontract/datacontract.yaml",
|
|
513
|
+
data_contract_file="/Volumes/acme_catalog_prod/orders_latest/datacontract/datacontract.yaml",
|
|
507
514
|
spark=spark)
|
|
508
515
|
run = data_contract.test()
|
|
509
516
|
run.result
|
|
@@ -528,7 +535,7 @@ servers:
|
|
|
528
535
|
models:
|
|
529
536
|
my_table_1: # corresponds to a table
|
|
530
537
|
type: table
|
|
531
|
-
fields:
|
|
538
|
+
fields:
|
|
532
539
|
my_column_1: # corresponds to a column
|
|
533
540
|
type: varchar
|
|
534
541
|
```
|
|
@@ -586,7 +593,7 @@ servers:
|
|
|
586
593
|
models:
|
|
587
594
|
my_table_1: # corresponds to a table
|
|
588
595
|
type: table
|
|
589
|
-
fields:
|
|
596
|
+
fields:
|
|
590
597
|
my_column_1: # corresponds to a column
|
|
591
598
|
type: varchar
|
|
592
599
|
```
|
|
@@ -600,33 +607,78 @@ models:
|
|
|
600
607
|
|
|
601
608
|
|
|
602
609
|
|
|
610
|
+
|
|
611
|
+
### Postgres
|
|
612
|
+
|
|
613
|
+
Data Contract CLI can test data in Postgres or Postgres-compliant databases (e.g., RisingWave).
|
|
614
|
+
|
|
615
|
+
#### Example
|
|
616
|
+
|
|
617
|
+
datacontract.yaml
|
|
618
|
+
```yaml
|
|
619
|
+
servers:
|
|
620
|
+
postgres:
|
|
621
|
+
type: sqlserver
|
|
622
|
+
host: localhost
|
|
623
|
+
port: 5432
|
|
624
|
+
database: tempdb
|
|
625
|
+
schema: dbo
|
|
626
|
+
driver: ODBC Driver 18 for SQL Server
|
|
627
|
+
models:
|
|
628
|
+
my_table_1: # corresponds to a table
|
|
629
|
+
type: table
|
|
630
|
+
fields:
|
|
631
|
+
my_column_1: # corresponds to a column
|
|
632
|
+
type: varchar
|
|
633
|
+
```
|
|
634
|
+
|
|
635
|
+
#### Environment Variables
|
|
636
|
+
|
|
637
|
+
| Environment Variable | Example | Description |
|
|
638
|
+
|----------------------------------|--------------------|-------------|
|
|
639
|
+
| `DATACONTRACT_SQLSERVER_USERNAME` | `root` | Username |
|
|
640
|
+
| `DATACONTRACT_SQLSERVER_PASSWORD` | `toor` | Password |
|
|
641
|
+
| `DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION` | `True` | Use windows authentication, instead of login |
|
|
642
|
+
| `DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE` | `True` | Trust self-signed certificate |
|
|
643
|
+
| `DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION` | `True` | Use SSL |
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
|
|
603
647
|
### export
|
|
604
648
|
|
|
605
649
|
```
|
|
650
|
+
|
|
606
651
|
Usage: datacontract export [OPTIONS] [LOCATION]
|
|
607
652
|
|
|
608
653
|
Convert data contract to a specific format. Prints to stdout or to the specified output file.
|
|
609
654
|
|
|
610
|
-
╭─ Arguments
|
|
611
|
-
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml]
|
|
612
|
-
|
|
613
|
-
╭─ Options
|
|
614
|
-
│ * --format [
|
|
615
|
-
│
|
|
616
|
-
│
|
|
617
|
-
│
|
|
618
|
-
│
|
|
619
|
-
│
|
|
620
|
-
│
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
│
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
│
|
|
627
|
-
│
|
|
628
|
-
|
|
629
|
-
|
|
655
|
+
╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
656
|
+
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
|
|
657
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
658
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
659
|
+
│ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
|
|
660
|
+
│ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
|
|
661
|
+
│ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
|
|
662
|
+
│ y|dbml] │
|
|
663
|
+
│ --output PATH Specify the file path where the exported data will be │
|
|
664
|
+
│ saved. If no path is provided, the output will be │
|
|
665
|
+
│ printed to stdout. │
|
|
666
|
+
│ [default: None] │
|
|
667
|
+
│ --server TEXT The server name to export. [default: None] │
|
|
668
|
+
│ --model TEXT Use the key of the model in the data contract yaml │
|
|
669
|
+
│ file to refer to a model, e.g., `orders`, or `all` │
|
|
670
|
+
│ for all models (default). │
|
|
671
|
+
│ [default: all] │
|
|
672
|
+
│ --help Show this message and exit. │
|
|
673
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
674
|
+
╭─ RDF Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
675
|
+
│ --rdf-base TEXT [rdf] The base URI used to generate the RDF graph. [default: None] │
|
|
676
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
677
|
+
╭─ SQL Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
678
|
+
│ --sql-server-type TEXT [sql] The server type to determine the sql dialect. By default, it uses 'auto' to automatically │
|
|
679
|
+
│ detect the sql dialect via the specified servers in the data contract. │
|
|
680
|
+
│ [default: auto] │
|
|
681
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
630
682
|
|
|
631
683
|
```
|
|
632
684
|
|
|
@@ -639,31 +691,33 @@ Available export options:
|
|
|
639
691
|
|
|
640
692
|
| Type | Description | Status |
|
|
641
693
|
|----------------------|---------------------------------------------------------|--------|
|
|
642
|
-
| `html` | Export to HTML | ✅
|
|
643
|
-
| `jsonschema` | Export to JSON Schema | ✅
|
|
644
|
-
| `odcs` | Export to Open Data Contract Standard (ODCS) | ✅
|
|
645
|
-
| `sodacl` | Export to SodaCL quality checks in YAML format | ✅
|
|
646
|
-
| `dbt` | Export to dbt models in YAML format | ✅
|
|
647
|
-
| `dbt-sources` | Export to dbt sources in YAML format | ✅
|
|
648
|
-
| `dbt-staging-sql` | Export to dbt staging SQL models | ✅
|
|
649
|
-
| `rdf` | Export data contract to RDF representation in N3 format | ✅
|
|
650
|
-
| `avro` | Export to AVRO models | ✅
|
|
651
|
-
| `protobuf` | Export to Protobuf | ✅
|
|
652
|
-
| `terraform` | Export to terraform resources | ✅
|
|
653
|
-
| `sql` | Export to SQL DDL | ✅
|
|
654
|
-
| `sql-query` | Export to SQL Query | ✅
|
|
655
|
-
| `great-expectations` | Export to Great Expectations Suites in JSON Format | ✅
|
|
656
|
-
| `bigquery` | Export to BigQuery Schemas |
|
|
657
|
-
| `
|
|
694
|
+
| `html` | Export to HTML | ✅ |
|
|
695
|
+
| `jsonschema` | Export to JSON Schema | ✅ |
|
|
696
|
+
| `odcs` | Export to Open Data Contract Standard (ODCS) | ✅ |
|
|
697
|
+
| `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
|
|
698
|
+
| `dbt` | Export to dbt models in YAML format | ✅ |
|
|
699
|
+
| `dbt-sources` | Export to dbt sources in YAML format | ✅ |
|
|
700
|
+
| `dbt-staging-sql` | Export to dbt staging SQL models | ✅ |
|
|
701
|
+
| `rdf` | Export data contract to RDF representation in N3 format | ✅ |
|
|
702
|
+
| `avro` | Export to AVRO models | ✅ |
|
|
703
|
+
| `protobuf` | Export to Protobuf | ✅ |
|
|
704
|
+
| `terraform` | Export to terraform resources | ✅ |
|
|
705
|
+
| `sql` | Export to SQL DDL | ✅ |
|
|
706
|
+
| `sql-query` | Export to SQL Query | ✅ |
|
|
707
|
+
| `great-expectations` | Export to Great Expectations Suites in JSON Format | ✅ |
|
|
708
|
+
| `bigquery` | Export to BigQuery Schemas | ✅ |
|
|
709
|
+
| `go` | Export to Go types | ✅ |
|
|
710
|
+
| `pydantic-model` | Export to pydantic models | ✅ |
|
|
711
|
+
| `DBML` | Export to a DBML Diagram description | ✅ |
|
|
658
712
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
659
713
|
|
|
660
714
|
#### Great Expectations
|
|
661
715
|
|
|
662
|
-
The export function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
|
|
716
|
+
The export function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
|
|
663
717
|
If the contract includes multiple models, you need to specify the names of the model you wish to export.
|
|
664
718
|
|
|
665
719
|
```shell
|
|
666
|
-
datacontract export datacontract.yaml --format great-expectations --model orders
|
|
720
|
+
datacontract export datacontract.yaml --format great-expectations --model orders
|
|
667
721
|
```
|
|
668
722
|
|
|
669
723
|
The export creates a list of expectations by utilizing:
|
|
@@ -673,7 +727,7 @@ The export creates a list of expectations by utilizing:
|
|
|
673
727
|
|
|
674
728
|
#### RDF
|
|
675
729
|
|
|
676
|
-
The export function converts a given data contract into a RDF representation. You have the option to
|
|
730
|
+
The export function converts a given data contract into a RDF representation. You have the option to
|
|
677
731
|
add a base_url which will be used as the default prefix to resolve relative IRIs inside the document.
|
|
678
732
|
|
|
679
733
|
```shell
|
|
@@ -695,6 +749,54 @@ Having the data contract inside an RDF Graph gives us access the following use c
|
|
|
695
749
|
- Apply graph algorithms on multiple data contracts (Find similar data contracts, find "gatekeeper"
|
|
696
750
|
data products, find the true domain owner of a field attribute)
|
|
697
751
|
|
|
752
|
+
#### DBML
|
|
753
|
+
|
|
754
|
+
The export function converts the logical data types of the datacontract into the specific ones of a concrete Database
|
|
755
|
+
if a server is selected via the `--server` option (based on the `type` of that server). If no server is selected, the
|
|
756
|
+
logical data types are exported.
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
#### Avro
|
|
760
|
+
|
|
761
|
+
The export function converts the data contract specification into an avro schema. It supports specifying custom avro properties for logicalTypes and default values.
|
|
762
|
+
|
|
763
|
+
##### Custom Avro Properties
|
|
764
|
+
|
|
765
|
+
We support a **config map on field level**. A config map may include any additional key-value pairs and support multiple server type bindings.
|
|
766
|
+
|
|
767
|
+
To specify custom Avro properties in your data contract, you can define them within the `config` section of your field definition. Below is an example of how to structure your YAML configuration to include custom Avro properties, such as `avroLogicalType` and `avroDefault`.
|
|
768
|
+
|
|
769
|
+
>NOTE: At this moment, we just support [logicalType](https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types) and [default](https://avro.apache.org/docs/1.11.0/spec.htm)
|
|
770
|
+
|
|
771
|
+
#### Example Configuration
|
|
772
|
+
|
|
773
|
+
```yaml
|
|
774
|
+
models:
|
|
775
|
+
orders:
|
|
776
|
+
fields:
|
|
777
|
+
my_field_1:
|
|
778
|
+
description: Example for AVRO with Timestamp (microsecond precision) https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29
|
|
779
|
+
type: long
|
|
780
|
+
example: 1672534861000000 # Equivalent to 2023-01-01 01:01:01 in microseconds
|
|
781
|
+
config:
|
|
782
|
+
avroLogicalType: local-timestamp-micros
|
|
783
|
+
avroDefault: 1672534861000000
|
|
784
|
+
```
|
|
785
|
+
|
|
786
|
+
#### Explanation
|
|
787
|
+
|
|
788
|
+
- **models**: The top-level key that contains different models (tables or objects) in your data contract.
|
|
789
|
+
- **orders**: A specific model name. Replace this with the name of your model.
|
|
790
|
+
- **fields**: The fields within the model. Each field can have various properties defined.
|
|
791
|
+
- **my_field_1**: The name of a specific field. Replace this with your field name.
|
|
792
|
+
- **description**: A textual description of the field.
|
|
793
|
+
- **type**: The data type of the field. In this example, it is `long`.
|
|
794
|
+
- **example**: An example value for the field.
|
|
795
|
+
- **config**: Section to specify custom Avro properties.
|
|
796
|
+
- **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
|
|
797
|
+
- **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
|
|
798
|
+
|
|
799
|
+
|
|
698
800
|
### import
|
|
699
801
|
|
|
700
802
|
```
|
|
@@ -702,29 +804,49 @@ data products, find the true domain owner of a field attribute)
|
|
|
702
804
|
|
|
703
805
|
Create a data contract from the given source location. Prints to stdout.
|
|
704
806
|
|
|
705
|
-
╭─ Options
|
|
706
|
-
│ * --format
|
|
707
|
-
│
|
|
708
|
-
│ --
|
|
709
|
-
|
|
807
|
+
╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
808
|
+
│ * --format [sql|avro|glue|bigquery|jsonschema] The format of the source file. [default: None] [required] │
|
|
809
|
+
│ --source TEXT The path to the file or Glue Database that should be imported. [default: None] │
|
|
810
|
+
│ --bigquery-project TEXT The bigquery project id. [default: None] │
|
|
811
|
+
│ --bigquery-dataset TEXT The bigquery dataset id. [default: None] │
|
|
812
|
+
│ --bigquery-table TEXT List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all │
|
|
813
|
+
│ tables in the dataset). │
|
|
814
|
+
│ [default: None] │
|
|
815
|
+
│ --help Show this message and exit. │
|
|
816
|
+
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
710
817
|
```
|
|
711
818
|
|
|
712
|
-
|
|
819
|
+
As shown, some options are only relevant in certain conditions: For `format` Bigtable we support to directly read off the Bigtable APIs.
|
|
820
|
+
In this case there's no need to specify `source` but instead `bt-project-id`, `bt-dataset-id` and `table` must be specified.
|
|
821
|
+
|
|
822
|
+
For providing authentication to the Client, please see [the google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to) or the one [about authorizing client libraries](https://cloud.google.com/bigquery/docs/authentication#client-libs).
|
|
823
|
+
|
|
824
|
+
Example:
|
|
713
825
|
```bash
|
|
714
826
|
# Example import from SQL DDL
|
|
715
827
|
datacontract import --format sql --source my_ddl.sql
|
|
716
828
|
```
|
|
717
829
|
|
|
830
|
+
```bash
|
|
831
|
+
# Example import from Bigquery JSON
|
|
832
|
+
datacontract import --format bigquery --source my_bigquery_table.json
|
|
833
|
+
```
|
|
834
|
+
|
|
835
|
+
```bash
|
|
836
|
+
# Example import from Bigquery API
|
|
837
|
+
datacontract import --format bigquery --btProjectId <project_id> --btDatasetId <dataset_id> --table <tableid_1> --table <tableid_2> --table <tableid_3>
|
|
838
|
+
```
|
|
839
|
+
|
|
718
840
|
Available import options:
|
|
719
841
|
|
|
720
842
|
| Type | Description | Status |
|
|
721
843
|
|--------------------|------------------------------------------------|---------|
|
|
722
|
-
| `sql` | Import from SQL DDL | ✅
|
|
723
|
-
| `avro` | Import from AVRO schemas | ✅
|
|
724
|
-
| `glue` | Import from AWS Glue DataCatalog | ✅
|
|
844
|
+
| `sql` | Import from SQL DDL | ✅ |
|
|
845
|
+
| `avro` | Import from AVRO schemas | ✅ |
|
|
846
|
+
| `glue` | Import from AWS Glue DataCatalog | ✅ |
|
|
725
847
|
| `protobuf` | Import from Protobuf schemas | TBD |
|
|
726
|
-
| `jsonschema` | Import from JSON Schemas |
|
|
727
|
-
| `bigquery` | Import from BigQuery Schemas |
|
|
848
|
+
| `jsonschema` | Import from JSON Schemas | ✅ |
|
|
849
|
+
| `bigquery` | Import from BigQuery Schemas | ✅ |
|
|
728
850
|
| `dbt` | Import from dbt models | TBD |
|
|
729
851
|
| `odcs` | Import from Open Data Contract Standard (ODCS) | TBD |
|
|
730
852
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
@@ -733,45 +855,45 @@ Available import options:
|
|
|
733
855
|
### breaking
|
|
734
856
|
|
|
735
857
|
```
|
|
736
|
-
Usage: datacontract breaking [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
737
|
-
|
|
738
|
-
Identifies breaking changes between data contracts. Prints to stdout.
|
|
739
|
-
|
|
858
|
+
Usage: datacontract breaking [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
859
|
+
|
|
860
|
+
Identifies breaking changes between data contracts. Prints to stdout.
|
|
861
|
+
|
|
740
862
|
╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
741
|
-
│ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required]
|
|
742
|
-
│ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required]
|
|
863
|
+
│ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required] │
|
|
864
|
+
│ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required] │
|
|
743
865
|
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
744
866
|
╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
745
|
-
│ --help Show this message and exit.
|
|
867
|
+
│ --help Show this message and exit. │
|
|
746
868
|
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
747
869
|
```
|
|
748
870
|
|
|
749
871
|
### changelog
|
|
750
872
|
|
|
751
873
|
```
|
|
752
|
-
Usage: datacontract changelog [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
753
|
-
|
|
754
|
-
Generate a changelog between data contracts. Prints to stdout.
|
|
755
|
-
|
|
874
|
+
Usage: datacontract changelog [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
875
|
+
|
|
876
|
+
Generate a changelog between data contracts. Prints to stdout.
|
|
877
|
+
|
|
756
878
|
╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
757
|
-
│ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required]
|
|
758
|
-
│ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required]
|
|
879
|
+
│ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required] │
|
|
880
|
+
│ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required] │
|
|
759
881
|
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
760
882
|
╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
761
|
-
│ --help Show this message and exit.
|
|
883
|
+
│ --help Show this message and exit. │
|
|
762
884
|
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
763
885
|
```
|
|
764
886
|
|
|
765
887
|
### diff
|
|
766
888
|
|
|
767
889
|
```
|
|
768
|
-
Usage: datacontract diff [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
769
|
-
|
|
770
|
-
PLACEHOLDER. Currently works as 'changelog' does.
|
|
771
|
-
|
|
890
|
+
Usage: datacontract diff [OPTIONS] LOCATION_OLD LOCATION_NEW
|
|
891
|
+
|
|
892
|
+
PLACEHOLDER. Currently works as 'changelog' does.
|
|
893
|
+
|
|
772
894
|
╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
773
|
-
│ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required]
|
|
774
|
-
│ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required]
|
|
895
|
+
│ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required] │
|
|
896
|
+
│ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required] │
|
|
775
897
|
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
776
898
|
╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
777
899
|
│ --help Show this message and exit. │
|
|
@@ -787,12 +909,27 @@ Available import options:
|
|
|
787
909
|
Create an html catalog of data contracts.
|
|
788
910
|
|
|
789
911
|
╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
790
|
-
│ --files TEXT Glob pattern for the data contract files to include in the catalog. [default: *.yaml]
|
|
791
|
-
│ --output TEXT Output directory for the catalog html files. [default: catalog/]
|
|
792
|
-
│ --help Show this message and exit.
|
|
912
|
+
│ --files TEXT Glob pattern for the data contract files to include in the catalog. [default: *.yaml] │
|
|
913
|
+
│ --output TEXT Output directory for the catalog html files. [default: catalog/] │
|
|
914
|
+
│ --help Show this message and exit. │
|
|
793
915
|
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
794
916
|
```
|
|
795
917
|
|
|
918
|
+
### Publish
|
|
919
|
+
|
|
920
|
+
```
|
|
921
|
+
|
|
922
|
+
Usage: datacontract publish [OPTIONS] [LOCATION]
|
|
923
|
+
|
|
924
|
+
Publish the data contract to the Data Mesh Manager.
|
|
925
|
+
|
|
926
|
+
╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
927
|
+
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
|
|
928
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
929
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
930
|
+
│ --help Show this message and exit. │
|
|
931
|
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
932
|
+
```
|
|
796
933
|
|
|
797
934
|
## Integrations
|
|
798
935
|
|
|
@@ -885,14 +1022,14 @@ Create a data contract based on the requirements from use cases.
|
|
|
885
1022
|
```bash
|
|
886
1023
|
$ datacontract init
|
|
887
1024
|
```
|
|
888
|
-
|
|
1025
|
+
|
|
889
1026
|
2. Add examples to the `datacontract.yaml`. Do not start with the data model, although you are probably tempted to do that. Examples are the fastest way to get feedback from everybody and not loose someone in the discussion.
|
|
890
1027
|
|
|
891
1028
|
3. Create the model based on the examples. Test the model against the examples to double-check whether the model matches the examples.
|
|
892
1029
|
```bash
|
|
893
1030
|
$ datacontract test --examples
|
|
894
1031
|
```
|
|
895
|
-
|
|
1032
|
+
|
|
896
1033
|
4. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
|
|
897
1034
|
```bash
|
|
898
1035
|
$ datacontract test --examples
|