datacontract-cli 0.10.16__py3-none-any.whl → 0.10.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (48) hide show
  1. datacontract/breaking/breaking_rules.py +4 -0
  2. datacontract/cli.py +49 -32
  3. datacontract/data_contract.py +14 -11
  4. datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
  5. datacontract/engines/soda/check_soda_execute.py +9 -4
  6. datacontract/engines/soda/connections/databricks.py +12 -3
  7. datacontract/engines/soda/connections/duckdb.py +22 -9
  8. datacontract/export/data_caterer_converter.py +20 -7
  9. datacontract/export/dbml_converter.py +2 -2
  10. datacontract/export/dbt_converter.py +41 -16
  11. datacontract/export/exporter.py +6 -2
  12. datacontract/export/exporter_factory.py +48 -14
  13. datacontract/export/iceberg_converter.py +3 -3
  14. datacontract/export/markdown_converter.py +208 -0
  15. datacontract/export/odcs_v3_exporter.py +6 -0
  16. datacontract/export/sodacl_converter.py +22 -5
  17. datacontract/export/sql_converter.py +1 -1
  18. datacontract/export/sql_type_converter.py +28 -2
  19. datacontract/export/sqlalchemy_converter.py +3 -1
  20. datacontract/imports/csv_importer.py +89 -0
  21. datacontract/imports/dbml_importer.py +1 -1
  22. datacontract/imports/dbt_importer.py +94 -12
  23. datacontract/imports/importer.py +1 -0
  24. datacontract/imports/importer_factory.py +5 -0
  25. datacontract/imports/odcs_v2_importer.py +1 -1
  26. datacontract/imports/odcs_v3_importer.py +1 -1
  27. datacontract/imports/sql_importer.py +1 -1
  28. datacontract/init/init_template.py +20 -0
  29. datacontract/integration/datamesh_manager.py +15 -9
  30. datacontract/lint/linters/field_reference_linter.py +10 -1
  31. datacontract/lint/resolve.py +48 -14
  32. datacontract/lint/schema.py +10 -3
  33. datacontract/model/data_contract_specification.py +13 -4
  34. datacontract/model/run.py +1 -0
  35. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  36. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  37. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  38. datacontract/templates/datacontract.html +20 -1
  39. datacontract/templates/partials/definition.html +15 -5
  40. datacontract/templates/partials/model_field.html +10 -1
  41. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/METADATA +477 -343
  42. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/RECORD +46 -42
  43. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/WHEEL +1 -1
  44. datacontract/init/download_datacontract_file.py +0 -17
  45. datacontract/integration/opentelemetry.py +0 -103
  46. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/LICENSE +0 -0
  47. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/entry_points.txt +0 -0
  48. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: datacontract-cli
3
- Version: 0.10.16
3
+ Version: 0.10.19
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
@@ -11,64 +11,67 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: typer<0.15,>=0.12
14
+ Requires-Dist: typer<0.16,>=0.15.1
15
15
  Requires-Dist: pydantic<2.11.0,>=2.8.2
16
16
  Requires-Dist: pyyaml~=6.0.1
17
17
  Requires-Dist: requests<2.33,>=2.31
18
18
  Requires-Dist: fastapi==0.115.6
19
- Requires-Dist: uvicorn==0.32.1
19
+ Requires-Dist: uvicorn==0.34.0
20
20
  Requires-Dist: fastjsonschema<2.22.0,>=2.19.1
21
21
  Requires-Dist: fastparquet==2024.11.0
22
- Requires-Dist: python-multipart==0.0.19
22
+ Requires-Dist: numpy<2.0.0,>=1.26.4
23
+ Requires-Dist: python-multipart==0.0.20
23
24
  Requires-Dist: rich<13.10,>=13.7
24
25
  Requires-Dist: simple-ddl-parser==1.7.1
25
26
  Requires-Dist: duckdb==1.1.2
26
- Requires-Dist: soda-core-duckdb<3.5.0,>=3.3.1
27
+ Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.20
27
28
  Requires-Dist: setuptools>=60
28
29
  Requires-Dist: python-dotenv~=1.0.0
29
30
  Requires-Dist: rdflib==7.0.0
30
- Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
31
- Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
32
- Requires-Dist: boto3<1.35.69,>=1.34.41
31
+ Requires-Dist: boto3<1.35.98,>=1.34.41
33
32
  Requires-Dist: jinja_partials>=0.2.1
34
33
  Provides-Extra: avro
35
34
  Requires-Dist: avro==1.12.0; extra == "avro"
36
35
  Provides-Extra: bigquery
37
- Requires-Dist: soda-core-bigquery<3.5.0,>=3.3.1; extra == "bigquery"
36
+ Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.20; extra == "bigquery"
37
+ Provides-Extra: csv
38
+ Requires-Dist: clevercsv>=0.8.2; extra == "csv"
39
+ Requires-Dist: pandas>=2.0.0; extra == "csv"
38
40
  Provides-Extra: databricks
39
- Requires-Dist: soda-core-spark-df<3.5.0,>=3.3.1; extra == "databricks"
40
- Requires-Dist: databricks-sql-connector<3.6.0,>=3.1.2; extra == "databricks"
41
- Requires-Dist: databricks-sdk<0.39.0,>=0.32.0; extra == "databricks"
42
- Requires-Dist: soda-core-spark[databricks]<3.5.0,>=3.3.1; extra == "databricks"
41
+ Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.20; extra == "databricks"
42
+ Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.20; extra == "databricks"
43
+ Requires-Dist: databricks-sql-connector<3.8.0,>=3.7.0; extra == "databricks"
44
+ Requires-Dist: databricks-sdk<0.41.0; extra == "databricks"
43
45
  Provides-Extra: iceberg
44
46
  Requires-Dist: pyiceberg==0.8.1; extra == "iceberg"
45
47
  Provides-Extra: kafka
46
48
  Requires-Dist: datacontract-cli[avro]; extra == "kafka"
47
- Requires-Dist: soda-core-spark-df<3.5.0,>=3.3.1; extra == "kafka"
49
+ Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.20; extra == "kafka"
48
50
  Provides-Extra: postgres
49
- Requires-Dist: soda-core-postgres<3.5.0,>=3.3.1; extra == "postgres"
51
+ Requires-Dist: soda-core-postgres<3.4.0,>=3.3.20; extra == "postgres"
50
52
  Provides-Extra: s3
51
- Requires-Dist: s3fs==2024.10.0; extra == "s3"
53
+ Requires-Dist: s3fs==2024.12.0; extra == "s3"
54
+ Requires-Dist: aiobotocore<2.18.0,>=2.17.0; extra == "s3"
52
55
  Provides-Extra: snowflake
53
56
  Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
54
- Requires-Dist: soda-core-snowflake<3.5.0,>=3.3.1; extra == "snowflake"
57
+ Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.20; extra == "snowflake"
55
58
  Provides-Extra: sqlserver
56
- Requires-Dist: soda-core-sqlserver<3.5.0,>=3.3.1; extra == "sqlserver"
59
+ Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.20; extra == "sqlserver"
57
60
  Provides-Extra: trino
58
- Requires-Dist: soda-core-trino<3.5.0,>=3.3.1; extra == "trino"
61
+ Requires-Dist: soda-core-trino<3.4.0,>=3.3.20; extra == "trino"
59
62
  Provides-Extra: dbt
60
63
  Requires-Dist: dbt-core>=1.8.0; extra == "dbt"
61
64
  Provides-Extra: dbml
62
65
  Requires-Dist: pydbml>=1.1.1; extra == "dbml"
63
66
  Provides-Extra: parquet
64
- Requires-Dist: pyarrow>=12.0.0; extra == "parquet"
67
+ Requires-Dist: pyarrow>=18.1.0; extra == "parquet"
65
68
  Provides-Extra: all
66
- Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,iceberg,kafka,parquet,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
69
+ Requires-Dist: datacontract-cli[bigquery,csv,databricks,dbml,dbt,iceberg,kafka,parquet,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
67
70
  Provides-Extra: dev
68
71
  Requires-Dist: datacontract-cli[all]; extra == "dev"
69
72
  Requires-Dist: httpx==0.28.1; extra == "dev"
70
73
  Requires-Dist: kafka-python; extra == "dev"
71
- Requires-Dist: moto==5.0.22; extra == "dev"
74
+ Requires-Dist: moto==5.0.26; extra == "dev"
72
75
  Requires-Dist: pandas>=2.1.0; extra == "dev"
73
76
  Requires-Dist: pre-commit<4.1.0,>=3.7.1; extra == "dev"
74
77
  Requires-Dist: pytest; extra == "dev"
@@ -76,7 +79,7 @@ Requires-Dist: pytest-xdist; extra == "dev"
76
79
  Requires-Dist: pymssql==2.3.2; extra == "dev"
77
80
  Requires-Dist: ruff; extra == "dev"
78
81
  Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.9.0; extra == "dev"
79
- Requires-Dist: trino==0.330.0; extra == "dev"
82
+ Requires-Dist: trino==0.332.0; extra == "dev"
80
83
 
81
84
  # Data Contract CLI
82
85
 
@@ -105,7 +108,7 @@ This data contract contains all information to connect to S3 and check that the
105
108
 
106
109
  Let's use [pip](https://pip.pypa.io/en/stable/getting-started/) to install the CLI (or use the [Docker image](#docker)),
107
110
  ```bash
108
- $ python3 -m pip install datacontract-cli[all]
111
+ $ python3 -m pip install 'datacontract-cli[all]'
109
112
  ```
110
113
 
111
114
 
@@ -230,13 +233,13 @@ Choose the most appropriate installation method for your needs:
230
233
  Python 3.10, 3.11, and 3.12 are supported. We recommend to use Python 3.11.
231
234
 
232
235
  ```bash
233
- python3 -m pip install datacontract-cli[all]
236
+ python3 -m pip install 'datacontract-cli[all]'
234
237
  ```
235
238
 
236
239
  ### pipx
237
240
  pipx installs into an isolated environment.
238
241
  ```bash
239
- pipx install datacontract-cli[all]
242
+ pipx install 'datacontract-cli[all]'
240
243
  ```
241
244
 
242
245
  ### Docker
@@ -301,76 +304,104 @@ Commands
301
304
  - [diff](#diff)
302
305
  - [catalog](#catalog)
303
306
  - [publish](#publish)
307
+ - [serve](#serve)
304
308
 
305
309
  ### init
306
-
307
310
  ```
308
- Usage: datacontract init [OPTIONS] [LOCATION]
309
-
310
- Download a datacontract.yaml template and write it to file.
311
+
312
+ Usage: datacontract init [OPTIONS] [LOCATION]
313
+
314
+ Download a datacontract.yaml template and write it to file.
315
+
316
+ ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
317
+ │ location [LOCATION] The location (url or path) of the data contract │
318
+ │ yaml to create. │
319
+ │ [default: datacontract.yaml] │
320
+ ╰──────────────────────────────────────────────────────────────────────────────╯
321
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
322
+ │ --template TEXT URL of a template or data contract │
323
+ │ [default: │
324
+ │ https://datacontract.com/datacontrac… │
325
+ │ --overwrite --no-overwrite Replace the existing │
326
+ │ datacontract.yaml │
327
+ │ [default: no-overwrite] │
328
+ │ --help Show this message and exit. │
329
+ ╰──────────────────────────────────────────────────────────────────────────────╯
311
330
 
312
- ╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────╮
313
- │ location [LOCATION] The location (url or path) of the data contract yaml to create. │
314
- │ [default: datacontract.yaml] │
315
- ╰──────────────────────────────────────────────────────────────────────────────────────────────╯
316
- ╭─ Options ────────────────────────────────────────────────────────────────────────────────────╮
317
- │ --template TEXT URL of a template or data contract │
318
- │ [default: │
319
- │ https://datacontract.com/datacontract.init.yaml] │
320
- │ --overwrite --no-overwrite Replace the existing datacontract.yaml │
321
- │ [default: no-overwrite] │
322
- │ --help Show this message and exit. │
323
- ╰──────────────────────────────────────────────────────────────────────────────────────────────╯
324
331
  ```
325
332
 
326
333
  ### lint
327
-
328
334
  ```
329
- Usage: datacontract lint [OPTIONS] [LOCATION]
335
+
336
+ Usage: datacontract lint [OPTIONS] [LOCATION]
337
+
338
+ Validate that the datacontract.yaml is correctly formatted.
339
+
340
+ ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
341
+ │ location [LOCATION] The location (url or path) of the data contract │
342
+ │ yaml. │
343
+ │ [default: datacontract.yaml] │
344
+ ╰──────────────────────────────────────────────────────────────────────────────╯
345
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
346
+ │ --schema TEXT The location (url or path) of the Data Contract │
347
+ │ Specification JSON Schema │
348
+ │ [default: │
349
+ │ https://datacontract.com/datacontract.schema.json] │
350
+ │ --help Show this message and exit. │
351
+ ╰──────────────────────────────────────────────────────────────────────────────╯
330
352
 
331
- Validate that the datacontract.yaml is correctly formatted.
332
-
333
- ╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
334
- │ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
335
- ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
336
- ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
337
- │ --schema TEXT The location (url or path) of the Data Contract Specification JSON Schema │
338
- │ [default: https://datacontract.com/datacontract.schema.json] │
339
- │ --help Show this message and exit. │
340
- ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
341
353
  ```
342
354
 
343
355
  ### test
344
-
345
356
  ```
346
- Usage: datacontract test [OPTIONS] [LOCATION]
347
-
348
- Run schema and quality tests on configured servers.
349
-
350
- ╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
351
- │ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
352
- ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
353
- ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
354
- --schema TEXT The location (url or path) of the Data Contract
355
- │ Specification JSON Schema │
356
- │ [default: │
357
- https://datacontract.com/datacontract.schema.json]
358
- --server TEXT The server configuration to run the schema and quality
359
- tests. Use the key of the server object in the data
360
- contract yaml file to refer to a server, e.g.,
361
- `production`, or `all` for all servers (default).
362
- [default: all]
363
- │ --examples --no-examples Run the schema and quality tests on the example data
364
- within the data contract.
365
- [default: no-examples]
366
- --publish TEXT The url to publish the results after the test
367
- [default: None]
368
- --publish-to-opentelemetry --no-publish-to-opentelemetry Publish the results to opentelemetry. Use environment
369
- variables to configure the OTLP endpoint, headers, etc.
370
- [default: no-publish-to-opentelemetry]
371
- --logs --no-logs Print logs [default: no-logs]
372
- --help Show this message and exit.
373
- ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
357
+
358
+ Usage: datacontract test [OPTIONS] [LOCATION]
359
+
360
+ Run schema and quality tests on configured servers.
361
+
362
+ ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
363
+ │ location [LOCATION] The location (url or path) of the data contract │
364
+ │ yaml. │
365
+ [default: datacontract.yaml]
366
+ ╰──────────────────────────────────────────────────────────────────────────────╯
367
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
368
+ --schema TEXT The location (url or
369
+ path) of the Data
370
+ Contract Specification
371
+ JSON Schema
372
+ [default:
373
+ https://datacontract.c…
374
+ │ --server TEXT The server
375
+ configuration to run
376
+ the schema and quality
377
+ tests. Use the key of
378
+ the server object in
379
+ the data contract yaml
380
+ file to refer to a
381
+ server, e.g.,
382
+ `production`, or `all`
383
+ for all servers
384
+ │ (default). │
385
+ │ [default: all] │
386
+ │ --examples --no-examples Run the schema and │
387
+ │ quality tests on the │
388
+ │ example data within the │
389
+ │ data contract. │
390
+ │ [default: no-examples] │
391
+ │ --publish TEXT The url to publish the │
392
+ │ results after the test │
393
+ │ [default: None] │
394
+ │ --logs --no-logs Print logs │
395
+ │ [default: no-logs] │
396
+ │ --ssl-verification --no-ssl-verification SSL verification when │
397
+ │ publishing the data │
398
+ │ contract. │
399
+ │ [default: │
400
+ │ ssl-verification] │
401
+ │ --help Show this message and │
402
+ │ exit. │
403
+ ╰──────────────────────────────────────────────────────────────────────────────╯
404
+
374
405
  ```
375
406
 
376
407
  Data Contract CLI connects to a data source and runs schema and quality tests to verify that the data contract is valid.
@@ -412,7 +443,7 @@ Supported formats:
412
443
 
413
444
  Feel free to create an [issue](https://github.com/datacontract/datacontract-cli/issues), if you need support for an additional type and formats.
414
445
 
415
- ### S3
446
+ #### S3
416
447
 
417
448
  Data Contract CLI can test data that is stored in S3 buckets or any S3-compliant endpoints in various formats.
418
449
 
@@ -422,9 +453,9 @@ Data Contract CLI can test data that is stored in S3 buckets or any S3-compliant
422
453
  - Parquet
423
454
  - Iceberg (coming soon)
424
455
 
425
- #### Examples
456
+ ##### Examples
426
457
 
427
- ##### JSON
458
+ ###### JSON
428
459
 
429
460
  datacontract.yaml
430
461
  ```yaml
@@ -437,7 +468,7 @@ servers:
437
468
  delimiter: new_line # new_line, array, or none
438
469
  ```
439
470
 
440
- ##### Delta Tables
471
+ ###### Delta Tables
441
472
 
442
473
  datacontract.yaml
443
474
  ```yaml
@@ -449,7 +480,7 @@ servers:
449
480
  format: delta
450
481
  ```
451
482
 
452
- #### Environment Variables
483
+ ##### Environment Variables
453
484
 
454
485
  | Environment Variable | Example | Description |
455
486
  |-------------------------------------|---------------------------------|----------------------------------------|
@@ -460,12 +491,12 @@ servers:
460
491
 
461
492
 
462
493
 
463
- ### Google Cloud Storage (GCS)
494
+ #### Google Cloud Storage (GCS)
464
495
 
465
496
  The [S3](#S3) integration also works with files on Google Cloud Storage through its [interoperability](https://cloud.google.com/storage/docs/interoperability).
466
497
  Use `https://storage.googleapis.com` as the endpoint URL.
467
498
 
468
- #### Example
499
+ ##### Example
469
500
 
470
501
  datacontract.yaml
471
502
  ```yaml
@@ -478,7 +509,7 @@ servers:
478
509
  delimiter: new_line # new_line, array, or none
479
510
  ```
480
511
 
481
- #### Environment Variables
512
+ ##### Environment Variables
482
513
 
483
514
  | Environment Variable | Example | Description |
484
515
  |-------------------------------------|----------------|------------------------------------------------------------------------------------------|
@@ -486,14 +517,14 @@ servers:
486
517
  | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `PDWWpb...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Secret |
487
518
 
488
519
 
489
- ### BigQuery
520
+ #### BigQuery
490
521
 
491
522
  We support authentication to BigQuery using Service Account Key. The used Service Account should include the roles:
492
523
  * BigQuery Job User
493
524
  * BigQuery Data Viewer
494
525
 
495
526
 
496
- #### Example
527
+ ##### Example
497
528
 
498
529
  datacontract.yaml
499
530
  ```yaml
@@ -508,29 +539,30 @@ models:
508
539
  fields: ...
509
540
  ```
510
541
 
511
- #### Environment Variables
542
+ ##### Environment Variables
512
543
 
513
544
  | Environment Variable | Example | Description |
514
545
  |----------------------------------------------|---------------------------|---------------------------------------------------------|
515
546
  | `DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH` | `~/service-access-key.json` | Service Access key as saved on key creation by BigQuery. If this environment variable isn't set, the cli tries to use `GOOGLE_APPLICATION_CREDENTIALS` as a fallback, so if you have that set for using their Python library anyway, it should work seamlessly. |
516
547
 
517
548
 
518
- ### Azure
549
+ #### Azure
519
550
 
520
551
  Data Contract CLI can test data that is stored in Azure Blob storage or Azure Data Lake Storage (Gen2) (ADLS) in various formats.
521
552
 
522
- #### Example
553
+ ##### Example
523
554
 
524
555
  datacontract.yaml
525
556
  ```yaml
526
557
  servers:
527
558
  production:
528
559
  type: azure
529
- location: abfss://datameshdatabricksdemo.dfs.core.windows.net/dataproducts/inventory_events/*.parquet
560
+ storageAccount: datameshdatabricksdemo
561
+ location: abfss://dataproducts/inventory_events/*.parquet
530
562
  format: parquet
531
563
  ```
532
564
 
533
- #### Environment Variables
565
+ ##### Environment Variables
534
566
 
535
567
  Authentication works with an Azure Service Principal (SPN) aka App Registration with a secret.
536
568
 
@@ -542,11 +574,11 @@ Authentication works with an Azure Service Principal (SPN) aka App Registration
542
574
 
543
575
 
544
576
 
545
- ### Sqlserver
577
+ #### Sqlserver
546
578
 
547
579
  Data Contract CLI can test data in MS SQL Server (including Azure SQL, Synapse Analytics SQL Pool).
548
580
 
549
- #### Example
581
+ ##### Example
550
582
 
551
583
  datacontract.yaml
552
584
  ```yaml
@@ -566,7 +598,7 @@ models:
566
598
  type: varchar
567
599
  ```
568
600
 
569
- #### Environment Variables
601
+ ##### Environment Variables
570
602
 
571
603
  | Environment Variable | Example| Description |
572
604
  |---------------------------------------------------|--------|----------------------------------------------|
@@ -579,20 +611,19 @@ models:
579
611
 
580
612
 
581
613
 
582
- ### Databricks
614
+ #### Databricks
583
615
 
584
616
  Works with Unity Catalog and Hive metastore.
585
617
 
586
618
  Needs a running SQL warehouse or compute cluster.
587
619
 
588
- #### Example
620
+ ##### Example
589
621
 
590
622
  datacontract.yaml
591
623
  ```yaml
592
624
  servers:
593
625
  production:
594
626
  type: databricks
595
- host: dbc-abcdefgh-1234.cloud.databricks.com
596
627
  catalog: acme_catalog_prod
597
628
  schema: orders_latest
598
629
  models:
@@ -601,15 +632,16 @@ models:
601
632
  fields: ...
602
633
  ```
603
634
 
604
- #### Environment Variables
635
+ ##### Environment Variables
605
636
 
606
- | Environment Variable | Example | Description |
607
- |----------------------------------------------|--------------------------------------|-------------------------------------------------------|
608
- | `DATACONTRACT_DATABRICKS_TOKEN` | `dapia00000000000000000000000000000` | The personal access token to authenticate |
609
- | `DATACONTRACT_DATABRICKS_HTTP_PATH` | `/sql/1.0/warehouses/b053a3ffffffff` | The HTTP path to the SQL warehouse or compute cluster |
637
+ | Environment Variable | Example | Description |
638
+ |-------------------------------------------|--------------------------------------|-----------------------------------------------------------|
639
+ | `DATACONTRACT_DATABRICKS_TOKEN` | `dapia00000000000000000000000000000` | The personal access token to authenticate |
640
+ | `DATACONTRACT_DATABRICKS_HTTP_PATH` | `/sql/1.0/warehouses/b053a3ffffffff` | The HTTP path to the SQL warehouse or compute cluster |
641
+ | `DATACONTRACT_DATABRICKS_SERVER_HOSTNAME` | `dbc-abcdefgh-1234.cloud.databricks.com` | The host name of the SQL warehouse or compute cluster |
610
642
 
611
643
 
612
- ### Databricks (programmatic)
644
+ #### Databricks (programmatic)
613
645
 
614
646
  Works with Unity Catalog and Hive metastore.
615
647
  When running in a notebook or pipeline, the provided `spark` session can be used.
@@ -617,7 +649,7 @@ An additional authentication is not required.
617
649
 
618
650
  Requires a Databricks Runtime with Python >= 3.10.
619
651
 
620
- #### Example
652
+ ##### Example
621
653
 
622
654
  datacontract.yaml
623
655
  ```yaml
@@ -647,7 +679,7 @@ run = data_contract.test()
647
679
  run.result
648
680
  ```
649
681
 
650
- ### Dataframe (programmatic)
682
+ #### Dataframe (programmatic)
651
683
 
652
684
  Works with Spark DataFrames.
653
685
  DataFrames need to be created as named temporary views.
@@ -655,7 +687,7 @@ Multiple temporary views are supported if your data contract contains multiple m
655
687
 
656
688
  Testing DataFrames is useful to test your datasets in a pipeline before writing them to a data source.
657
689
 
658
- #### Example
690
+ ##### Example
659
691
 
660
692
  datacontract.yaml
661
693
  ```yaml
@@ -683,11 +715,11 @@ assert run.result == "passed"
683
715
  ```
684
716
 
685
717
 
686
- ### Snowflake
718
+ #### Snowflake
687
719
 
688
720
  Data Contract CLI can test data in Snowflake.
689
721
 
690
- #### Example
722
+ ##### Example
691
723
 
692
724
  datacontract.yaml
693
725
  ```yaml
@@ -706,11 +738,11 @@ models:
706
738
  type: varchar
707
739
  ```
708
740
 
709
- #### Environment Variables
710
- All [parameters supported by Soda](https://docs.soda.io/soda/connect-snowflake.html), uppercased and prepended by `DATACONTRACT_SNOWFLAKE_` prefix.
741
+ ##### Environment Variables
742
+ All [parameters supported by Soda](https://docs.soda.io/soda/connect-snowflake.html), uppercased and prepended by `DATACONTRACT_SNOWFLAKE_` prefix.
711
743
  For example:
712
744
 
713
- | Soda parameter | Environment Variable |
745
+ | Soda parameter | Environment Variable |
714
746
  |----------------------|---------------------------------------------|
715
747
  | `username` | `DATACONTRACT_SNOWFLAKE_USERNAME` |
716
748
  | `password` | `DATACONTRACT_SNOWFLAKE_PASSWORD` |
@@ -723,7 +755,7 @@ Beware, that parameters:
723
755
  * `database`
724
756
  * `schema`
725
757
 
726
- are obtained from the `servers` section of the YAML-file.
758
+ are obtained from the `servers` section of the YAML-file.
727
759
  E.g. from the example above:
728
760
  ```yaml
729
761
  servers:
@@ -734,11 +766,11 @@ servers:
734
766
  ```
735
767
 
736
768
 
737
- ### Kafka
769
+ #### Kafka
738
770
 
739
771
  Kafka support is currently considered experimental.
740
772
 
741
- #### Example
773
+ ##### Example
742
774
 
743
775
  datacontract.yaml
744
776
  ```yaml
@@ -750,7 +782,7 @@ servers:
750
782
  format: json
751
783
  ```
752
784
 
753
- #### Environment Variables
785
+ ##### Environment Variables
754
786
 
755
787
  | Environment Variable | Example | Description |
756
788
  |-------------------------------------|---------|----------------------------------------------------------------------------------|
@@ -759,11 +791,11 @@ servers:
759
791
  | `DATACONTRACT_KAFKA_SASL_MECHANISM` | `PLAIN` | Default `PLAIN`. Other supported mechanisms: `SCRAM-SHA-256` and `SCRAM-SHA-512` |
760
792
 
761
793
 
762
- ### Postgres
794
+ #### Postgres
763
795
 
764
796
  Data Contract CLI can test data in Postgres or Postgres-compliant databases (e.g., RisingWave).
765
797
 
766
- #### Example
798
+ ##### Example
767
799
 
768
800
  datacontract.yaml
769
801
  ```yaml
@@ -782,7 +814,7 @@ models:
782
814
  type: varchar
783
815
  ```
784
816
 
785
- #### Environment Variables
817
+ ##### Environment Variables
786
818
 
787
819
  | Environment Variable | Example | Description |
788
820
  |----------------------------------|--------------------|-------------|
@@ -790,11 +822,11 @@ models:
790
822
  | `DATACONTRACT_POSTGRES_PASSWORD` | `mysecretpassword` | Password |
791
823
 
792
824
 
793
- ### Trino
825
+ #### Trino
794
826
 
795
827
  Data Contract CLI can test data in Trino.
796
828
 
797
- #### Example
829
+ ##### Example
798
830
 
799
831
  datacontract.yaml
800
832
  ```yaml
@@ -817,7 +849,7 @@ models:
817
849
  trinoType: row(en_us varchar, pt_br varchar)
818
850
  ```
819
851
 
820
- #### Environment Variables
852
+ ##### Environment Variables
821
853
 
822
854
  | Environment Variable | Example | Description |
823
855
  |-------------------------------|--------------------|-------------|
@@ -827,46 +859,64 @@ models:
827
859
 
828
860
 
829
861
  ### export
830
-
831
862
  ```
832
- Usage: datacontract export [OPTIONS] [LOCATION]
833
-
834
- Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout.
835
-
836
- ╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
837
- │ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
838
- ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
839
- ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
840
- * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required]
841
- t-staging-sql|odcs|odcs_v2|odcs_v3|rdf|avro|protobuf
842
- │ |great-expectations|terraform|avro-idl|sql|sql-query │
843
- │ |html|go|bigquery|dbml|spark|sqlalchemy|data-caterer │
844
- |dcs|iceberg]
845
- --output PATH Specify the file path where the exported data will be
846
- saved. If no path is provided, the output will be
847
- printed to stdout.
848
- [default: None]
849
- --server TEXT The server name to export. [default: None]
850
- --model TEXT Use the key of the model in the data contract yaml
851
- file to refer to a model, e.g., `orders`, or `all`
852
- for all models (default).
853
- [default: all]
854
- --schema TEXT The location (url or path) of the Data Contract
855
- Specification JSON Schema
856
- [default:
857
- https://datacontract.com/datacontract.schema.json]
858
- --engine TEXT [engine] The engine used for great expection run.
859
- [default: None]
860
- --help Show this message and exit.
861
- ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
862
- ╭─ RDF Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
863
- --rdf-base TEXT [rdf] The base URI used to generate the RDF graph. [default: None]
864
- ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
865
- ╭─ SQL Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
866
- --sql-server-type TEXT [sql] The server type to determine the sql dialect. By default, it uses 'auto' to automatically
867
- detect the sql dialect via the specified servers in the data contract.
868
- [default: auto]
869
- ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
863
+
864
+ Usage: datacontract export [OPTIONS] [LOCATION]
865
+
866
+ Convert data contract to a specific format. Saves to file specified by
867
+ `output` option if present, otherwise prints to stdout.
868
+
869
+ ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
870
+ │ location [LOCATION] The location (url or path) of the data contract │
871
+ yaml.
872
+ [default: datacontract.yaml]
873
+ ╰──────────────────────────────────────────────────────────────────────────────╯
874
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
875
+ * --format [jsonschema|pydantic-model| The export format.
876
+ sodacl|dbt|dbt-sources|dbt- [default: None]
877
+ staging-sql|odcs|odcs_v2|od [required]
878
+ cs_v3|rdf|avro|protobuf|gre
879
+ at-expectations|terraform|a
880
+ vro-idl|sql|sql-query|html|
881
+ go|bigquery|dbml|spark|sqla
882
+ lchemy|data-caterer|dcs|mar
883
+ kdown|iceberg]
884
+ --output PATH Specify the file path where
885
+ the exported data will be
886
+ saved. If no path is
887
+ provided, the output will be
888
+ printed to stdout.
889
+ [default: None]
890
+ --server TEXT The server name to export.
891
+ [default: None]
892
+ │ --model TEXT Use the key of the model in │
893
+ │ the data contract yaml file │
894
+ to refer to a model, e.g.,
895
+ │ `orders`, or `all` for all │
896
+ │ models (default). │
897
+ [default: all]
898
+ --schema TEXT The location (url or path)
899
+ of the Data Contract
900
+ │ Specification JSON Schema │
901
+ │ [default: │
902
+ │ https://datacontract.com/da… │
903
+ │ --engine TEXT [engine] The engine used for │
904
+ │ great expection run. │
905
+ │ [default: None] │
906
+ │ --help Show this message and exit. │
907
+ ╰──────────────────────────────────────────────────────────────────────────────╯
908
+ ╭─ RDF Options ────────────────────────────────────────────────────────────────╮
909
+ │ --rdf-base TEXT [rdf] The base URI used to generate the RDF graph. │
910
+ │ [default: None] │
911
+ ╰──────────────────────────────────────────────────────────────────────────────╯
912
+ ╭─ SQL Options ────────────────────────────────────────────────────────────────╮
913
+ │ --sql-server-type TEXT [sql] The server type to determine the sql │
914
+ │ dialect. By default, it uses 'auto' to │
915
+ │ automatically detect the sql dialect via the │
916
+ │ specified servers in the data contract. │
917
+ │ [default: auto] │
918
+ ╰──────────────────────────────────────────────────────────────────────────────╯
919
+
870
920
  ```
871
921
 
872
922
  ```bash
@@ -902,6 +952,7 @@ Available export options:
902
952
  | `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
903
953
  | `data-caterer` | Export to Data Caterer in YAML format | ✅ |
904
954
  | `dcs` | Export to Data Contract Specification in YAML format | ✅ |
955
+ | `markdown` | Export to Markdown | ✅ |
905
956
  | `iceberg` | Export to an Iceberg JSON Schema Definition | partial |
906
957
  | Missing something? | Please create an issue on GitHub | TBD |
907
958
 
@@ -920,7 +971,7 @@ The export creates a list of expectations by utilizing:
920
971
  - The data from the Model definition with a fixed mapping
921
972
  - The expectations provided in the quality field for each model (find here the expectations gallery: [Great Expectations Gallery](https://greatexpectations.io/expectations/))
922
973
 
923
- ### Additional Arguments
974
+ ##### Additional Arguments
924
975
 
925
976
  To further customize the export, the following optional arguments are available:
926
977
 
@@ -967,8 +1018,8 @@ logical data types are exported.
967
1018
 
968
1019
  #### Spark
969
1020
 
970
- The export function converts the data contract specification into a StructType Spark schema. The returned value is a Python code picture of the model schemas.
971
- Spark DataFrame schema is defined as StructType. For more details about Spark Data Types please see [the spark documentation](https://spark.apache.org/docs/latest/sql-ref-datatypes.html)
1021
+ The export function converts the data contract specification into a StructType Spark schema. The returned value is a Python code picture of the model schemas.
1022
+ Spark DataFrame schema is defined as StructType. For more details about Spark Data Types please see [the spark documentation](https://spark.apache.org/docs/latest/sql-ref-datatypes.html)
972
1023
 
973
1024
  #### Avro
974
1025
 
@@ -982,20 +1033,6 @@ To specify custom Avro properties in your data contract, you can define them wit
982
1033
 
983
1034
  >NOTE: At this moment, we just support [logicalType](https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types) and [default](https://avro.apache.org/docs/1.11.0/spec.htm)
984
1035
 
985
- #### Data Caterer
986
-
987
- The export function converts the data contract to a data generation task in YAML format that can be
988
- ingested by [Data Caterer](https://github.com/data-catering/data-caterer). This gives you the
989
- ability to generate production-like data in any environment based off your data contract.
990
-
991
- ```shell
992
- datacontract export datacontract.yaml --format data-caterer --model orders
993
- ```
994
-
995
- You can further customise the way data is generated via adding
996
- [additional metadata in the YAML](https://data.catering/setup/generator/data-generator/)
997
- to suit your needs.
998
-
999
1036
  #### Example Configuration
1000
1037
 
1001
1038
  ```yaml
@@ -1026,6 +1063,20 @@ models:
1026
1063
  - **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
1027
1064
  - **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
1028
1065
 
1066
+ #### Data Caterer
1067
+
1068
+ The export function converts the data contract to a data generation task in YAML format that can be
1069
+ ingested by [Data Caterer](https://github.com/data-catering/data-caterer). This gives you the
1070
+ ability to generate production-like data in any environment based off your data contract.
1071
+
1072
+ ```shell
1073
+ datacontract export datacontract.yaml --format data-caterer --model orders
1074
+ ```
1075
+
1076
+ You can further customise the way data is generated via adding
1077
+ [additional metadata in the YAML](https://data.catering/setup/generator/data-generator/)
1078
+ to suit your needs.
1079
+
1029
1080
  #### Iceberg
1030
1081
 
1031
1082
  Exports to an [Iceberg Table Json Schema Definition](https://iceberg.apache.org/spec/#appendix-c-json-serialization).
@@ -1085,55 +1136,100 @@ to limit your contract export to a single model.
1085
1136
  ```
1086
1137
 
1087
1138
  ### import
1088
-
1089
1139
  ```
1090
- Usage: datacontract import [OPTIONS]
1091
-
1092
- Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise
1093
- prints to stdout.
1094
-
1095
- ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1096
- * --format [sql|avro|dbt|dbml|glue|jsonschema|bigquery The format of the source file. │
1097
- |odcs|unity|spark|iceberg|parquet] [default: None]
1098
- [required]
1099
- --output PATH Specify the file path where the Data
1100
- Contract will be saved. If no path is
1101
- provided, the output will be printed to
1102
- stdout.
1103
- [default: None]
1104
- --source TEXT The path to the file or Glue Database that
1105
- should be imported.
1106
- [default: None]
1107
- --glue-table TEXT List of table ids to import from the Glue
1108
- Database (repeat for multiple table ids,
1109
- leave empty for all tables in the dataset).
1110
- [default: None]
1111
- --bigquery-project TEXT The bigquery project id. [default: None]
1112
- --bigquery-dataset TEXT The bigquery dataset id. [default: None]
1113
- │ --bigquery-table TEXT List of table ids to import from the
1114
- bigquery API (repeat for multiple table ids,
1115
- leave empty for all tables in the dataset).
1116
- [default: None]
1117
- --unity-table-full-name TEXT Full name of a table in the unity catalog
1118
- [default: None]
1119
- --dbt-model TEXT List of models names to import from the dbt
1120
- manifest file (repeat for multiple models
1121
- names, leave empty for all models in the
1122
- dataset).
1123
- [default: None]
1124
- │ --dbml-schema TEXT List of schema names to import from the DBML
1125
- file (repeat for multiple schema names,
1126
- leave empty for all tables in the file).
1127
- [default: None]
1128
- --dbml-table TEXT List of table names to import from the DBML
1129
- file (repeat for multiple table names, leave
1130
- empty for all tables in the file).
1131
- [default: None]
1132
- --iceberg-table TEXT Table name to assign to the model created
1133
- from the Iceberg schema.
1134
- [default: None]
1135
- │ --help Show this message and exit.
1136
- ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1140
+
1141
+ Usage: datacontract import [OPTIONS]
1142
+
1143
+ Create a data contract from the given source location. Saves to file specified
1144
+ by `output` option if present, otherwise prints to stdout.
1145
+
1146
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
1147
+ * --format [sql|avro|dbt|dbml|gl The format of the
1148
+ ue|jsonschema|bigquer source file.
1149
+ y|odcs|unity|spark|ic [default: None]
1150
+ eberg|parquet|csv] [required]
1151
+ --output PATH Specify the file path
1152
+ where the Data
1153
+ Contract will be
1154
+ saved. If no path is
1155
+ provided, the output
1156
+ will be printed to
1157
+ stdout.
1158
+ [default: None]
1159
+ --source TEXT The path to the file
1160
+ or Glue Database that
1161
+ should be imported.
1162
+ [default: None]
1163
+ │ --glue-table TEXT List of table ids to
1164
+ import from the Glue
1165
+ Database (repeat for │
1166
+ multiple table ids,
1167
+ leave empty for all
1168
+ tables in the
1169
+ dataset).
1170
+ [default: None]
1171
+ --bigquery-project TEXT The bigquery project
1172
+ id.
1173
+ [default: None]
1174
+ │ --bigquery-dataset TEXT The bigquery dataset
1175
+ id.
1176
+ [default: None]
1177
+ --bigquery-table TEXT List of table ids to
1178
+ import from the
1179
+ bigquery API (repeat
1180
+ for multiple table
1181
+ ids, leave empty for
1182
+ all tables in the
1183
+ dataset).
1184
+ [default: None]
1185
+ │ --unity-table-full-n… TEXT Full name of a table
1186
+ │ in the unity catalog │
1187
+ │ [default: None] │
1188
+ │ --dbt-model TEXT List of models names │
1189
+ │ to import from the │
1190
+ │ dbt manifest file │
1191
+ │ (repeat for multiple │
1192
+ │ models names, leave │
1193
+ │ empty for all models │
1194
+ │ in the dataset). │
1195
+ │ [default: None] │
1196
+ │ --dbml-schema TEXT List of schema names │
1197
+ │ to import from the │
1198
+ │ DBML file (repeat for │
1199
+ │ multiple schema │
1200
+ │ names, leave empty │
1201
+ │ for all tables in the │
1202
+ │ file). │
1203
+ │ [default: None] │
1204
+ │ --dbml-table TEXT List of table names │
1205
+ │ to import from the │
1206
+ │ DBML file (repeat for │
1207
+ │ multiple table names, │
1208
+ │ leave empty for all │
1209
+ │ tables in the file). │
1210
+ │ [default: None] │
1211
+ │ --iceberg-table TEXT Table name to assign │
1212
+ │ to the model created │
1213
+ │ from the Iceberg │
1214
+ │ schema. │
1215
+ │ [default: None] │
1216
+ │ --template TEXT The location (url or │
1217
+ │ path) of the Data │
1218
+ │ Contract │
1219
+ │ Specification │
1220
+ │ Template │
1221
+ │ [default: None] │
1222
+ │ --schema TEXT The location (url or │
1223
+ │ path) of the Data │
1224
+ │ Contract │
1225
+ │ Specification JSON │
1226
+ │ Schema │
1227
+ │ [default: │
1228
+ │ https://datacontract… │
1229
+ │ --help Show this message and │
1230
+ │ exit. │
1231
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1232
+
1137
1233
  ```
1138
1234
 
1139
1235
  Example:
@@ -1158,6 +1254,7 @@ Available import options:
1158
1254
  | `odcs` | Import from Open Data Contract Standard (ODCS) | ✅ |
1159
1255
  | `spark` | Import from Spark StructTypes | ✅ |
1160
1256
  | `dbml` | Import from DBML models | ✅ |
1257
+ | `csv` | Import from CSV File | ✅ |
1161
1258
  | `protobuf` | Import from Protobuf schemas | TBD |
1162
1259
  | `iceberg` | Import from an Iceberg JSON Schema Definition | partial |
1163
1260
  | `parquet` | Import from Parquet File Metadta | ✅ |
@@ -1300,67 +1397,106 @@ Examples:
1300
1397
  datacontract import --format iceberg --source ./tests/fixtures/iceberg/simple_schema.json --iceberg-table test-table
1301
1398
  ```
1302
1399
 
1303
- ### breaking
1400
+ #### CSV
1304
1401
 
1305
- ```
1306
- Usage: datacontract breaking [OPTIONS] LOCATION_OLD LOCATION_NEW
1402
+ Importing from CSV File. Specify file in `source` parameter. It does autodetection for encoding and csv dialect
1307
1403
 
1308
- Identifies breaking changes between data contracts. Prints to stdout.
1404
+ Example:
1309
1405
 
1310
- ╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1311
- * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required] │
1312
- │ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required] │
1313
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1314
- ╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1315
- │ --help Show this message and exit. │
1316
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1406
+ ```bash
1407
+ datacontract import --format csv --source "test.csv"
1317
1408
  ```
1318
1409
 
1319
- ### changelog
1320
1410
 
1411
+ ### breaking
1321
1412
  ```
1322
- Usage: datacontract changelog [OPTIONS] LOCATION_OLD LOCATION_NEW
1323
-
1324
- Generate a changelog between data contracts. Prints to stdout.
1413
+
1414
+ Usage: datacontract breaking [OPTIONS] LOCATION_OLD LOCATION_NEW
1415
+
1416
+ Identifies breaking changes between data contracts. Prints to stdout.
1417
+
1418
+ ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
1419
+ │ * location_old TEXT The location (url or path) of the old data │
1420
+ │ contract yaml. │
1421
+ │ [default: None] │
1422
+ │ [required] │
1423
+ │ * location_new TEXT The location (url or path) of the new data │
1424
+ │ contract yaml. │
1425
+ │ [default: None] │
1426
+ │ [required] │
1427
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1428
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
1429
+ │ --help Show this message and exit. │
1430
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1325
1431
 
1326
- ╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1327
- │ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required] │
1328
- │ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required] │
1329
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1330
- ╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1331
- │ --help Show this message and exit. │
1332
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1333
1432
  ```
1334
1433
 
1335
- ### diff
1434
+ ### changelog
1435
+ ```
1436
+
1437
+ Usage: datacontract changelog [OPTIONS] LOCATION_OLD LOCATION_NEW
1438
+
1439
+ Generate a changelog between data contracts. Prints to stdout.
1440
+
1441
+ ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
1442
+ │ * location_old TEXT The location (url or path) of the old data │
1443
+ │ contract yaml. │
1444
+ │ [default: None] │
1445
+ │ [required] │
1446
+ │ * location_new TEXT The location (url or path) of the new data │
1447
+ │ contract yaml. │
1448
+ │ [default: None] │
1449
+ │ [required] │
1450
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1451
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
1452
+ │ --help Show this message and exit. │
1453
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1336
1454
 
1337
1455
  ```
1338
- Usage: datacontract diff [OPTIONS] LOCATION_OLD LOCATION_NEW
1339
1456
 
1340
- PLACEHOLDER. Currently works as 'changelog' does.
1457
+ ### diff
1458
+ ```
1459
+
1460
+ Usage: datacontract diff [OPTIONS] LOCATION_OLD LOCATION_NEW
1461
+
1462
+ PLACEHOLDER. Currently works as 'changelog' does.
1463
+
1464
+ ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
1465
+ │ * location_old TEXT The location (url or path) of the old data │
1466
+ │ contract yaml. │
1467
+ │ [default: None] │
1468
+ │ [required] │
1469
+ │ * location_new TEXT The location (url or path) of the new data │
1470
+ │ contract yaml. │
1471
+ │ [default: None] │
1472
+ │ [required] │
1473
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1474
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
1475
+ │ --help Show this message and exit. │
1476
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1341
1477
 
1342
- ╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1343
- │ * location_old TEXT The location (url or path) of the old data contract yaml. [default: None] [required] │
1344
- │ * location_new TEXT The location (url or path) of the new data contract yaml. [default: None] [required] │
1345
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1346
- ╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1347
- │ --help Show this message and exit. │
1348
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1349
1478
  ```
1350
1479
 
1351
1480
  ### catalog
1352
-
1353
1481
  ```
1482
+
1483
+ Usage: datacontract catalog [OPTIONS]
1484
+
1485
+ Create an html catalog of data contracts.
1486
+
1487
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
1488
+ │ --files TEXT Glob pattern for the data contract files to include in │
1489
+ │ the catalog. Applies recursively to any subfolders. │
1490
+ │ [default: *.yaml] │
1491
+ │ --output TEXT Output directory for the catalog html files. │
1492
+ │ [default: catalog/] │
1493
+ │ --schema TEXT The location (url or path) of the Data Contract │
1494
+ │ Specification JSON Schema │
1495
+ │ [default: │
1496
+ │ https://datacontract.com/datacontract.schema.json] │
1497
+ │ --help Show this message and exit. │
1498
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1354
1499
 
1355
- Usage: datacontract catalog [OPTIONS]
1356
-
1357
- Create an html catalog of data contracts.
1358
-
1359
- ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1360
- │ --files TEXT Glob pattern for the data contract files to include in the catalog. [default: *.yaml] │
1361
- │ --output TEXT Output directory for the catalog html files. [default: catalog/] │
1362
- │ --help Show this message and exit. │
1363
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1364
1500
  ```
1365
1501
 
1366
1502
  Examples:
@@ -1373,20 +1509,49 @@ datacontract catalog --output "."
1373
1509
  datacontract catalog --files "*.odcs.yaml"
1374
1510
  ```
1375
1511
 
1376
- ### Publish
1377
-
1378
- ```
1379
-
1380
- Usage: datacontract publish [OPTIONS] [LOCATION]
1381
-
1382
- Publish the data contract to the Data Mesh Manager.
1512
+ ### publish
1513
+ ```
1514
+
1515
+ Usage: datacontract publish [OPTIONS] [LOCATION]
1516
+
1517
+ Publish the data contract to the Data Mesh Manager.
1518
+
1519
+ ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
1520
+ │ location [LOCATION] The location (url or path) of the data contract │
1521
+ │ yaml. │
1522
+ │ [default: datacontract.yaml] │
1523
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1524
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
1525
+ │ --schema TEXT The location (url or │
1526
+ │ path) of the Data │
1527
+ │ Contract Specification │
1528
+ │ JSON Schema │
1529
+ │ [default: │
1530
+ │ https://datacontract.c… │
1531
+ │ --ssl-verification --no-ssl-verification SSL verification when │
1532
+ │ publishing the data │
1533
+ │ contract. │
1534
+ │ [default: │
1535
+ │ ssl-verification] │
1536
+ │ --help Show this message and │
1537
+ │ exit. │
1538
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1539
+
1540
+ ```
1541
+
1542
+ ### serve
1543
+ ```
1544
+
1545
+ Usage: datacontract serve [OPTIONS]
1546
+
1547
+ Start the datacontract web server.
1548
+
1549
+ ╭─ Options ────────────────────────────────────────────────────────────────────╮
1550
+ │ --port INTEGER Bind socket to this port. [default: 4242] │
1551
+ │ --host TEXT Bind socket to this host. [default: 127.0.0.1] │
1552
+ │ --help Show this message and exit. │
1553
+ ╰──────────────────────────────────────────────────────────────────────────────╯
1383
1554
 
1384
- ╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1385
- │ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
1386
- ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1387
- ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1388
- │ --help Show this message and exit. │
1389
- ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1390
1555
  ```
1391
1556
 
1392
1557
  ## Integrations
@@ -1395,7 +1560,6 @@ datacontract catalog --files "*.odcs.yaml"
1395
1560
  |-----------------------|------------------------------|---------------------------------------------------------------------------------------------------------------|
1396
1561
  | Data Mesh Manager | `--publish` | Push full results to the [Data Mesh Manager API](https://api.datamesh-manager.com/swagger/index.html) |
1397
1562
  | Data Contract Manager | `--publish` | Push full results to the [Data Contract Manager API](https://api.datacontract-manager.com/swagger/index.html) |
1398
- | OpenTelemetry | `--publish-to-opentelemetry` | Push result as gauge metrics |
1399
1563
 
1400
1564
  ### Integration with Data Mesh Manager
1401
1565
 
@@ -1404,41 +1568,11 @@ If you use [Data Mesh Manager](https://datamesh-manager.com/) or [Data Contract
1404
1568
  ```bash
1405
1569
  # Fetch current data contract, execute tests on production, and publish result to data mesh manager
1406
1570
  $ EXPORT DATAMESH_MANAGER_API_KEY=xxx
1407
- $ datacontract test https://demo.datamesh-manager.com/demo279750347121/datacontracts/4df9d6ee-e55d-4088-9598-b635b2fdcbbc/datacontract.yaml \
1571
+ $ datacontract test https://demo.datamesh-manager.com/demo279750347121/datacontracts/4df9d6ee-e55d-4088-9598-b635b2fdcbbc/datacontract.yaml \
1408
1572
  --server production \
1409
1573
  --publish https://api.datamesh-manager.com/api/test-results
1410
1574
  ```
1411
1575
 
1412
- ### Integration with OpenTelemetry
1413
-
1414
- If you use OpenTelemetry, you can use the data contract URL and append the `--publish-to-opentelemetry` option to send the test results to your OLTP-compatible instance, e.g., Prometheus.
1415
-
1416
- The metric name is "datacontract.cli.test.result" and it uses the following encoding for the result:
1417
-
1418
- | datacontract.cli.test.result | Description |
1419
- |------------------------------|---------------------------------------|
1420
- | 0 | test run passed, no warnings |
1421
- | 1 | test run has warnings |
1422
- | 2 | test run failed |
1423
- | 3 | test run not possible due to an error |
1424
- | 4 | test status unknown |
1425
-
1426
-
1427
- ```bash
1428
- # Fetch current data contract, execute tests on production, and publish result to open telemetry
1429
- $ EXPORT OTEL_SERVICE_NAME=datacontract-cli
1430
- $ EXPORT OTEL_EXPORTER_OTLP_ENDPOINT=https://YOUR_ID.apm.westeurope.azure.elastic-cloud.com:443
1431
- $ EXPORT OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer%20secret # Optional, when using SaaS Products
1432
- $ EXPORT OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf # Optional, default is http/protobuf - use value grpc to use the gRPC protocol instead
1433
- # Send to OpenTelemetry
1434
- $ datacontract test https://demo.datamesh-manager.com/demo279750347121/datacontracts/4df9d6ee-e55d-4088-9598-b635b2fdcbbc/datacontract.yaml --server production --publish-to-opentelemetry
1435
- ```
1436
-
1437
- Current limitations:
1438
- - currently, only ConsoleExporter and OTLP Exporter
1439
- - Metrics only, no logs yet (but loosely planned)
1440
-
1441
-
1442
1576
  ## Best Practices
1443
1577
 
1444
1578
  We share best practices in using the Data Contract CLI.
@@ -1470,7 +1604,7 @@ Create a data contract based on the actual data. This is the fastest way to get
1470
1604
  $ datacontract lint
1471
1605
  ```
1472
1606
 
1473
- 5. Set up a CI pipeline that executes daily and reports the results to the [Data Mesh Manager](https://datamesh-manager.com). Or to some place else. You can even publish to any opentelemetry compatible system.
1607
+ 5. Set up a CI pipeline that executes daily for continuous quality checks. You can also report the test results to tools like [Data Mesh Manager](https://datamesh-manager.com)
1474
1608
  ```bash
1475
1609
  $ datacontract test --publish https://api.datamesh-manager.com/api/test-results
1476
1610
  ```
@@ -1546,7 +1680,7 @@ Examples: Removing or renaming models and fields.
1546
1680
  ```
1547
1681
 
1548
1682
  ## Customizing Exporters and Importers
1549
-
1683
+
1550
1684
  ### Custom Exporter
1551
1685
  Using the exporter factory to add a new custom exporter
1552
1686
  ```python
@@ -1592,18 +1726,18 @@ if __name__ == "__main__":
1592
1726
  Output
1593
1727
  ```python
1594
1728
  {
1595
- 'title': 'Orders Unit Test',
1596
- 'version': '1.0.0',
1597
- 'description': 'The orders data contract',
1598
- 'email': 'team-orders@example.com',
1599
- 'url': 'https://wiki.example.com/teams/checkout',
1600
- 'model': 'orders',
1601
- 'model_columns': 'order_id, order_total, order_status',
1602
- 'export_args': {'server': 'production', 'custom_arg': 'my_custom_arg'},
1729
+ 'title': 'Orders Unit Test',
1730
+ 'version': '1.0.0',
1731
+ 'description': 'The orders data contract',
1732
+ 'email': 'team-orders@example.com',
1733
+ 'url': 'https://wiki.example.com/teams/checkout',
1734
+ 'model': 'orders',
1735
+ 'model_columns': 'order_id, order_total, order_status',
1736
+ 'export_args': {'server': 'production', 'custom_arg': 'my_custom_arg'},
1603
1737
  'custom_args': 'my_custom_arg'
1604
1738
  }
1605
1739
  ```
1606
-
1740
+
1607
1741
  ### Custom Importer
1608
1742
  Using the importer factory to add a new custom importer
1609
1743
  ```python
@@ -1625,31 +1759,31 @@ class CustomImporter(Importer):
1625
1759
  data_contract_specification.info.title = source_dict.get("title")
1626
1760
  data_contract_specification.info.version = source_dict.get("version")
1627
1761
  data_contract_specification.info.description = source_dict.get("description_from_app")
1628
-
1762
+
1629
1763
  for model in source_dict.get("models", []):
1630
1764
  fields = {}
1631
1765
  for column in model.get('columns'):
1632
1766
  field = Field(
1633
- description=column.get('column_description'),
1634
- type=column.get('type')
1767
+ description=column.get('column_description'),
1768
+ type=column.get('type')
1635
1769
  )
1636
- fields[column.get('name')] = field
1637
-
1770
+ fields[column.get('name')] = field
1771
+
1638
1772
  dc_model = Model(
1639
- description=model.get('description'),
1773
+ description=model.get('description'),
1640
1774
  fields= fields
1641
1775
  )
1642
1776
 
1643
1777
  data_contract_specification.models[model.get('name')] = dc_model
1644
1778
  return data_contract_specification
1645
-
1779
+
1646
1780
 
1647
1781
  # Register the new custom class into factory
1648
1782
  importer_factory.register_importer("custom_company_importer", CustomImporter)
1649
1783
 
1650
1784
 
1651
1785
  if __name__ == "__main__":
1652
- # Get a custom data from other app
1786
+ # Get a custom data from other app
1653
1787
  json_from_custom_app = '''
1654
1788
  {
1655
1789
  "id_custom": "uuid-custom",
@@ -1681,14 +1815,14 @@ if __name__ == "__main__":
1681
1815
 
1682
1816
  # Call import_from_source
1683
1817
  result = data_contract.import_from_source(
1684
- format="custom_company_importer",
1685
- data_contract_specification=DataContract.init(),
1818
+ format="custom_company_importer",
1819
+ data_contract_specification=DataContract.init(),
1686
1820
  source=json_from_custom_app
1687
- )
1821
+ )
1688
1822
  print(result.to_yaml() )
1689
1823
  ```
1690
1824
  Output
1691
-
1825
+
1692
1826
  ```yaml
1693
1827
  dataContractSpecification: 1.1.0
1694
1828
  id: uuid-custom