datacontract-cli 0.10.10__py3-none-any.whl → 0.10.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (39) hide show
  1. datacontract/cli.py +19 -3
  2. datacontract/data_contract.py +17 -17
  3. datacontract/engines/fastjsonschema/check_jsonschema.py +15 -1
  4. datacontract/engines/fastjsonschema/s3/s3_read_files.py +2 -0
  5. datacontract/engines/soda/check_soda_execute.py +2 -8
  6. datacontract/engines/soda/connections/duckdb.py +23 -20
  7. datacontract/engines/soda/connections/kafka.py +81 -23
  8. datacontract/engines/soda/connections/snowflake.py +8 -5
  9. datacontract/export/avro_converter.py +12 -2
  10. datacontract/export/dbml_converter.py +42 -19
  11. datacontract/export/exporter.py +2 -1
  12. datacontract/export/exporter_factory.py +6 -0
  13. datacontract/export/jsonschema_converter.py +1 -4
  14. datacontract/export/spark_converter.py +4 -0
  15. datacontract/export/sql_type_converter.py +64 -29
  16. datacontract/export/sqlalchemy_converter.py +169 -0
  17. datacontract/imports/avro_importer.py +1 -0
  18. datacontract/imports/bigquery_importer.py +2 -2
  19. datacontract/imports/dbml_importer.py +112 -0
  20. datacontract/imports/dbt_importer.py +67 -91
  21. datacontract/imports/glue_importer.py +64 -54
  22. datacontract/imports/importer.py +3 -2
  23. datacontract/imports/importer_factory.py +5 -0
  24. datacontract/imports/jsonschema_importer.py +106 -120
  25. datacontract/imports/odcs_importer.py +1 -1
  26. datacontract/imports/spark_importer.py +29 -10
  27. datacontract/imports/sql_importer.py +5 -1
  28. datacontract/imports/unity_importer.py +1 -1
  29. datacontract/integration/{publish_datamesh_manager.py → datamesh_manager.py} +33 -5
  30. datacontract/integration/{publish_opentelemetry.py → opentelemetry.py} +1 -1
  31. datacontract/model/data_contract_specification.py +6 -2
  32. datacontract/templates/partials/model_field.html +10 -2
  33. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/METADATA +283 -113
  34. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/RECORD +38 -37
  35. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/WHEEL +1 -1
  36. datacontract/publish/publish.py +0 -32
  37. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/LICENSE +0 -0
  38. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/entry_points.txt +0 -0
  39. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/top_level.txt +0 -0
@@ -73,7 +73,7 @@ class Definition(pyd.BaseModel):
73
73
  exclusiveMaximum: int = None
74
74
  pii: bool = None
75
75
  classification: str = None
76
- fields: Dict[str, "Definition"] = {}
76
+ fields: Dict[str, "Field"] = {}
77
77
  tags: List[str] = []
78
78
  links: Dict[str, str] = {}
79
79
  example: str = None
@@ -239,4 +239,8 @@ class DataContractSpecification(pyd.BaseModel):
239
239
  return DataContractSpecification(**data)
240
240
 
241
241
  def to_yaml(self):
242
- return yaml.dump(self.model_dump(exclude_defaults=True, exclude_none=True), sort_keys=False, allow_unicode=True)
242
+ return yaml.dump(
243
+ self.model_dump(exclude_defaults=True, exclude_none=True, by_alias=True),
244
+ sort_keys=False,
245
+ allow_unicode=True,
246
+ )
@@ -110,5 +110,13 @@
110
110
  {% endif %}
111
111
 
112
112
  {% if field.items %}
113
- {{ render_nested_partial("item", field.items, level) }}
114
- {% endif %}
113
+ {{ render_nested_partial("items", field.items, level) }}
114
+ {% endif %}
115
+
116
+ {% if field.keys %}
117
+ {{ render_nested_partial("keys", field.keys, level) }}
118
+ {% endif %}
119
+
120
+ {% if field.values %}
121
+ {{ render_nested_partial("values", field.values, level) }}
122
+ {% endif %}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datacontract-cli
3
- Version: 0.10.10
3
+ Version: 0.10.12
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
@@ -11,69 +11,68 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: typer[all] <0.13,>=0.9
15
- Requires-Dist: pydantic <2.9.0,>=2.8.2
16
- Requires-Dist: pyyaml ~=6.0.1
17
- Requires-Dist: requests <2.33,>=2.31
18
- Requires-Dist: fastapi ==0.111.1
19
- Requires-Dist: fastparquet ==2024.5.0
20
- Requires-Dist: python-multipart ==0.0.9
21
- Requires-Dist: rich ~=13.7.0
22
- Requires-Dist: simple-ddl-parser ==1.5.1
23
- Requires-Dist: soda-core-duckdb <3.4.0,>=3.3.1
24
- Requires-Dist: setuptools >=60
25
- Requires-Dist: duckdb ==1.0.0
26
- Requires-Dist: fastjsonschema <2.21.0,>=2.19.1
27
- Requires-Dist: python-dotenv ~=1.0.0
28
- Requires-Dist: rdflib ==7.0.0
29
- Requires-Dist: opentelemetry-exporter-otlp-proto-grpc ~=1.16
30
- Requires-Dist: opentelemetry-exporter-otlp-proto-http ~=1.16
31
- Requires-Dist: boto3 <1.34.137,>=1.34.41
32
- Requires-Dist: botocore <1.34.137,>=1.34.41
33
- Requires-Dist: jinja-partials >=0.2.1
14
+ Requires-Dist: typer<0.13,>=0.12
15
+ Requires-Dist: pydantic<2.9.0,>=2.8.2
16
+ Requires-Dist: pyyaml~=6.0.1
17
+ Requires-Dist: requests<2.33,>=2.31
18
+ Requires-Dist: fastapi==0.112.0
19
+ Requires-Dist: uvicorn==0.30.5
20
+ Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
21
+ Requires-Dist: fastparquet==2024.5.0
22
+ Requires-Dist: python-multipart==0.0.9
23
+ Requires-Dist: rich~=13.7.0
24
+ Requires-Dist: simple-ddl-parser==1.6.0
25
+ Requires-Dist: duckdb==1.0.0
26
+ Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.1
27
+ Requires-Dist: setuptools>=60
28
+ Requires-Dist: python-dotenv~=1.0.0
29
+ Requires-Dist: rdflib==7.0.0
30
+ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
31
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
32
+ Requires-Dist: boto3<1.35.6,>=1.34.41
33
+ Requires-Dist: jinja-partials>=0.2.1
34
34
  Provides-Extra: all
35
- Requires-Dist: datacontract-cli[bigquery,databricks,deltalake,kafka,postgres,s3,snowflake,sqlserver,trino] ; extra == 'all'
35
+ Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
36
36
  Provides-Extra: avro
37
- Requires-Dist: avro ==1.11.3 ; extra == 'avro'
37
+ Requires-Dist: avro==1.12.0; extra == "avro"
38
38
  Provides-Extra: bigquery
39
- Requires-Dist: soda-core-bigquery <3.4.0,>=3.3.1 ; extra == 'bigquery'
39
+ Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.1; extra == "bigquery"
40
40
  Provides-Extra: databricks
41
- Requires-Dist: soda-core-spark-df <3.4.0,>=3.3.1 ; extra == 'databricks'
42
- Requires-Dist: databricks-sql-connector <3.3.0,>=3.1.2 ; extra == 'databricks'
43
- Requires-Dist: soda-core-spark[databricks] <3.4.0,>=3.3.1 ; extra == 'databricks'
44
- Provides-Extra: deltalake
45
- Requires-Dist: deltalake <0.19,>=0.17 ; extra == 'deltalake'
41
+ Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "databricks"
42
+ Requires-Dist: databricks-sql-connector<3.4.0,>=3.1.2; extra == "databricks"
43
+ Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.1; extra == "databricks"
44
+ Provides-Extra: dbml
45
+ Requires-Dist: pydbml>=1.1.1; extra == "dbml"
46
+ Provides-Extra: dbt
47
+ Requires-Dist: dbt-core>=1.8.0; extra == "dbt"
46
48
  Provides-Extra: dev
47
- Requires-Dist: datacontract-cli[all] ; extra == 'dev'
48
- Requires-Dist: httpx ==0.27.0 ; extra == 'dev'
49
- Requires-Dist: ruff ; extra == 'dev'
50
- Requires-Dist: pre-commit ~=3.7.1 ; extra == 'dev'
51
- Requires-Dist: pytest ; extra == 'dev'
52
- Requires-Dist: pytest-xdist ; extra == 'dev'
53
- Requires-Dist: moto ==5.0.11 ; extra == 'dev'
54
- Requires-Dist: pymssql ==2.3.0 ; extra == 'dev'
55
- Requires-Dist: kafka-python ; extra == 'dev'
56
- Requires-Dist: trino ==0.329.0 ; extra == 'dev'
57
- Requires-Dist: testcontainers <4.8,>=4.5 ; extra == 'dev'
58
- Requires-Dist: testcontainers[core] ; extra == 'dev'
59
- Requires-Dist: testcontainers[minio] ; extra == 'dev'
60
- Requires-Dist: testcontainers[postgres] ; extra == 'dev'
61
- Requires-Dist: testcontainers[kafka] ; extra == 'dev'
62
- Requires-Dist: testcontainers[mssql] ; extra == 'dev'
49
+ Requires-Dist: datacontract-cli[all]; extra == "dev"
50
+ Requires-Dist: httpx==0.27.2; extra == "dev"
51
+ Requires-Dist: kafka-python; extra == "dev"
52
+ Requires-Dist: moto==5.0.13; extra == "dev"
53
+ Requires-Dist: pandas>=2.1.0; extra == "dev"
54
+ Requires-Dist: pre-commit<3.9.0,>=3.7.1; extra == "dev"
55
+ Requires-Dist: pyarrow>=12.0.0; extra == "dev"
56
+ Requires-Dist: pytest; extra == "dev"
57
+ Requires-Dist: pytest-xdist; extra == "dev"
58
+ Requires-Dist: pymssql==2.3.1; extra == "dev"
59
+ Requires-Dist: ruff; extra == "dev"
60
+ Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.8.1; extra == "dev"
61
+ Requires-Dist: trino==0.329.0; extra == "dev"
63
62
  Provides-Extra: kafka
64
- Requires-Dist: datacontract-cli[avro] ; extra == 'kafka'
65
- Requires-Dist: soda-core-spark-df <3.4.0,>=3.3.1 ; extra == 'kafka'
63
+ Requires-Dist: datacontract-cli[avro]; extra == "kafka"
64
+ Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
66
65
  Provides-Extra: postgres
67
- Requires-Dist: soda-core-postgres <3.4.0,>=3.3.1 ; extra == 'postgres'
66
+ Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
68
67
  Provides-Extra: s3
69
- Requires-Dist: s3fs ==2024.6.1 ; extra == 's3'
68
+ Requires-Dist: s3fs==2024.6.1; extra == "s3"
70
69
  Provides-Extra: snowflake
71
- Requires-Dist: snowflake-connector-python[pandas] <3.12,>=3.6 ; extra == 'snowflake'
72
- Requires-Dist: soda-core-snowflake <3.4.0,>=3.3.1 ; extra == 'snowflake'
70
+ Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
71
+ Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
73
72
  Provides-Extra: sqlserver
74
- Requires-Dist: soda-core-sqlserver <3.4.0,>=3.3.1 ; extra == 'sqlserver'
73
+ Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.1; extra == "sqlserver"
75
74
  Provides-Extra: trino
76
- Requires-Dist: soda-core-trino <3.4.0,>=3.3.1 ; extra == 'trino'
75
+ Requires-Dist: soda-core-trino<3.4.0,>=3.3.1; extra == "trino"
77
76
 
78
77
  # Data Contract CLI
79
78
 
@@ -82,7 +81,7 @@ Requires-Dist: soda-core-trino <3.4.0,>=3.3.1 ; extra == 'trino'
82
81
  <img alt="Test Workflow" src="https://img.shields.io/github/actions/workflow/status/datacontract/datacontract-cli/ci.yaml?branch=main"></a>
83
82
  <a href="https://github.com/datacontract/datacontract-cli">
84
83
  <img alt="Stars" src="https://img.shields.io/github/stars/datacontract/datacontract-cli" /></a>
85
- <a href="https://datacontract.com/slack" rel="nofollow"><img src="https://camo.githubusercontent.com/5ade1fd1e76a6ab860802cdd2941fe2501e2ca2cb534e5d8968dbf864c13d33d/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f736c61636b2d6a6f696e5f636861742d77686974652e7376673f6c6f676f3d736c61636b267374796c653d736f6369616c" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" style="max-width: 100%;"></a>
84
+ <a href="https://datacontract.com/slack" rel="nofollow"><img src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" style="max-width: 100%;"></a>
86
85
  </p>
87
86
 
88
87
  The `datacontract` CLI is an open source command-line tool for working with [Data Contracts](https://datacontract.com/).
@@ -197,10 +196,10 @@ $ datacontract export --format html datacontract.yaml > datacontract.html
197
196
  # import avro (other formats: sql, glue, bigquery...)
198
197
  $ datacontract import --format avro --source avro_schema.avsc
199
198
 
200
- # find differences between to data contracts
199
+ # find differences between two data contracts
201
200
  $ datacontract diff datacontract-v1.yaml datacontract-v2.yaml
202
201
 
203
- # find differences between to data contracts categorized into error, warning, and info.
202
+ # find differences between two data contracts categorized into error, warning, and info.
204
203
  $ datacontract changelog datacontract-v1.yaml datacontract-v2.yaml
205
204
 
206
205
  # fail pipeline on breaking changes. Uses changelog internally and showing only error and warning.
@@ -267,13 +266,13 @@ A list of available extras:
267
266
  | Avro Support | `pip install datacontract-cli[avro]` |
268
267
  | Google BigQuery | `pip install datacontract-cli[bigquery]` |
269
268
  | Databricks Integration | `pip install datacontract-cli[databricks]` |
270
- | Deltalake Integration | `pip install datacontract-cli[deltalake]` |
271
269
  | Kafka Integration | `pip install datacontract-cli[kafka]` |
272
270
  | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
273
271
  | S3 Integration | `pip install datacontract-cli[s3]` |
274
272
  | Snowflake Integration | `pip install datacontract-cli[snowflake]` |
275
273
  | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
276
274
  | Trino | `pip install datacontract-cli[trino]` |
275
+ | Dbt | `pip install datacontract-cli[dbt]` |
277
276
 
278
277
 
279
278
 
@@ -385,7 +384,7 @@ Supported server types:
385
384
  - [sqlserver](#sqlserver)
386
385
  - [databricks](#databricks)
387
386
  - [databricks (programmatic)](#databricks-programmatic)
388
- - [dataframr (programmatic)](#dataframe-programmatic)
387
+ - [dataframe (programmatic)](#dataframe-programmatic)
389
388
  - [snowflake](#snowflake)
390
389
  - [kafka](#kafka)
391
390
  - [postgres](#postgres)
@@ -406,6 +405,12 @@ Feel free to create an [issue](https://github.com/datacontract/datacontract-cli/
406
405
 
407
406
  Data Contract CLI can test data that is stored in S3 buckets or any S3-compliant endpoints in various formats.
408
407
 
408
+ - CSV
409
+ - JSON
410
+ - Delta
411
+ - Parquet
412
+ - Iceberg (coming soon)
413
+
409
414
  #### Examples
410
415
 
411
416
  ##### JSON
@@ -444,6 +449,32 @@ servers:
444
449
 
445
450
 
446
451
 
452
+ ### Google Cloud Storage (GCS)
453
+
454
+ The [S3](#S3) integration also works with files on Google Cloud Storage through its [interoperability](https://cloud.google.com/storage/docs/interoperability).
455
+ Use `https://storage.googleapis.com` as the endpoint URL.
456
+
457
+ #### Example
458
+
459
+ datacontract.yaml
460
+ ```yaml
461
+ servers:
462
+ production:
463
+ type: s3
464
+ endpointUrl: https://storage.googleapis.com
465
+ location: s3://bucket-name/path/*/*.json # use s3:// schema instead of gs://
466
+ format: json
467
+ delimiter: new_line # new_line, array, or none
468
+ ```
469
+
470
+ #### Environment Variables
471
+
472
+ | Environment Variable | Example | Description |
473
+ |-------------------------------------|----------------|------------------------------------------------------------------------------------------|
474
+ | `DATACONTRACT_S3_ACCESS_KEY_ID` | `GOOG1EZZZ...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Key ID |
475
+ | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `PDWWpb...` | The GCS [HMAC Key](https://cloud.google.com/storage/docs/authentication/hmackeys) Secret |
476
+
477
+
447
478
  ### BigQuery
448
479
 
449
480
  We support authentication to BigQuery using Service Account Key. The used Service Account should include the roles:
@@ -665,14 +696,31 @@ models:
665
696
  ```
666
697
 
667
698
  #### Environment Variables
668
-
669
- | Environment Variable | Example | Description |
670
- |------------------------------------|--------------------|-----------------------------------------------------|
671
- | `DATACONTRACT_SNOWFLAKE_USERNAME` | `datacontract` | Username |
672
- | `DATACONTRACT_SNOWFLAKE_PASSWORD` | `mysecretpassword` | Password |
673
- | `DATACONTRACT_SNOWFLAKE_ROLE` | `DATAVALIDATION` | The snowflake role to use. |
674
- | `DATACONTRACT_SNOWFLAKE_WAREHOUSE` | `COMPUTE_WH` | The Snowflake Warehouse to use executing the tests. |
675
-
699
+ All [parameters supported by Soda](https://docs.soda.io/soda/connect-snowflake.html), uppercased and prepended by `DATACONTRACT_SNOWFLAKE_` prefix.
700
+ For example:
701
+
702
+ | Soda parameter | Environment Variable |
703
+ |----------------------|---------------------------------------------|
704
+ | `username` | `DATACONTRACT_SNOWFLAKE_USERNAME` |
705
+ | `password` | `DATACONTRACT_SNOWFLAKE_PASSWORD` |
706
+ | `warehouse` | `DATACONTRACT_SNOWFLAKE_WAREHOUSE` |
707
+ | `role` | `DATACONTRACT_SNOWFLAKE_ROLE` |
708
+ | `connection_timeout` | `DATACONTRACT_SNOWFLAKE_CONNECTION_TIMEOUT` |
709
+
710
+ Beware, that parameters:
711
+ * `account`
712
+ * `database`
713
+ * `schema`
714
+
715
+ are obtained from the `servers` section of the YAML-file.
716
+ E.g. from the example above:
717
+ ```yaml
718
+ servers:
719
+ snowflake:
720
+ account: abcdefg-xn12345
721
+ database: ORDER_DB
722
+ schema: ORDERS_PII_V2
723
+ ```
676
724
 
677
725
 
678
726
  ### Kafka
@@ -777,7 +825,7 @@ models:
777
825
  │ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
778
826
  │ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
779
827
  │ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
780
- │ y|dbml|spark]
828
+ │ y|dbml|spark|sqlalchemy]
781
829
  │ --output PATH Specify the file path where the exported data will be │
782
830
  │ saved. If no path is provided, the output will be │
783
831
  │ printed to stdout. │
@@ -828,6 +876,7 @@ Available export options:
828
876
  | `pydantic-model` | Export to pydantic models | ✅ |
829
877
  | `DBML` | Export to a DBML Diagram description | ✅ |
830
878
  | `spark` | Export to a Spark StructType | ✅ |
879
+ | `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
831
880
  | Missing something? | Please create an issue on GitHub | TBD |
832
881
 
833
882
  #### Great Expectations
@@ -901,6 +950,7 @@ models:
901
950
  description: Example for AVRO with Timestamp (microsecond precision) https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29
902
951
  type: long
903
952
  example: 1672534861000000 # Equivalent to 2023-01-01 01:01:01 in microseconds
953
+ required: true
904
954
  config:
905
955
  avroLogicalType: local-timestamp-micros
906
956
  avroDefault: 1672534861000000
@@ -915,6 +965,7 @@ models:
915
965
  - **description**: A textual description of the field.
916
966
  - **type**: The data type of the field. In this example, it is `long`.
917
967
  - **example**: An example value for the field.
968
+ - **required**: Is this a required field (as opposed to optional/nullable).
918
969
  - **config**: Section to specify custom Avro properties.
919
970
  - **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
920
971
  - **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
@@ -925,23 +976,42 @@ models:
925
976
  ```
926
977
  Usage: datacontract import [OPTIONS]
927
978
 
928
- Create a data contract from the given source location. Prints to stdout.
929
-
930
- ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
931
- │ * --format [sql|avro|glue|bigquery|jsonschema| The format of the source file. [default: None] [required] |
932
- unity|spark] |
933
- --source TEXT The path to the file or Glue Database that should be imported.
934
- [default: None]
935
- --glue-table TEXT List of table ids to import from the Glue Database (repeat for
936
- multiple table ids, leave empty for all tables in the dataset).
937
- [default: None]
938
- --bigquery-project TEXT The bigquery project id. [default: None]
939
- --bigquery-dataset TEXT The bigquery dataset id. [default: None]
940
- --bigquery-table TEXT List of table ids to import from the bigquery API (repeat for
941
- multiple table ids, leave empty for all tables in the dataset).
942
- [default: None]
943
- │ --help Show this message and exit.
944
- ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
979
+ Create a data contract from the given source location. Prints to stdout.
980
+
981
+ ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
982
+ │ * --format [sql|avro|dbt|glue|jsonschema|bigquery|odcs The format of the source file.
983
+ |unity|spark] [default: None] │
984
+ [required]
985
+ --source TEXT The path to the file or Glue Database that
986
+ should be imported.
987
+ [default: None]
988
+ --glue-table TEXT List of table ids to import from the Glue
989
+ Database (repeat for multiple table ids,
990
+ leave empty for all tables in the dataset).
991
+ [default: None]
992
+ --bigquery-project TEXT The bigquery project id. [default: None]
993
+ --bigquery-dataset TEXT The bigquery dataset id. [default: None]
994
+ │ --bigquery-table TEXT List of table ids to import from the
995
+ │ bigquery API (repeat for multiple table ids, │
996
+ │ leave empty for all tables in the dataset). │
997
+ │ [default: None] │
998
+ │ --unity-table-full-name TEXT Full name of a table in the unity catalog │
999
+ │ [default: None] │
1000
+ │ --dbt-model TEXT List of models names to import from the dbt │
1001
+ │ manifest file (repeat for multiple models │
1002
+ │ names, leave empty for all models in the │
1003
+ │ dataset). │
1004
+ │ [default: None] │
1005
+ │ --dbml-schema TEXT List of schema names to import from the DBML │
1006
+ │ file (repeat for multiple schema names, │
1007
+ │ leave empty for all tables in the file). │
1008
+ │ [default: None] │
1009
+ │ --dbml-table TEXT List of table names to import from the DBML │
1010
+ │ file (repeat for multiple table names, leave │
1011
+ │ empty for all tables in the file). │
1012
+ │ [default: None] │
1013
+ │ --help Show this message and exit. │
1014
+ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
945
1015
  ```
946
1016
 
947
1017
  Example:
@@ -952,18 +1022,20 @@ datacontract import --format sql --source my_ddl.sql
952
1022
 
953
1023
  Available import options:
954
1024
 
955
- | Type | Description | Status |
956
- |--------------------|------------------------------------------------|---------|
1025
+ | Type | Description | Status |
1026
+ |--------------------|------------------------------------------------|--------|
957
1027
  | `sql` | Import from SQL DDL | ✅ |
958
1028
  | `avro` | Import from AVRO schemas | ✅ |
959
1029
  | `glue` | Import from AWS Glue DataCatalog | ✅ |
960
- | `protobuf` | Import from Protobuf schemas | TBD |
961
1030
  | `jsonschema` | Import from JSON Schemas | ✅ |
962
1031
  | `bigquery` | Import from BigQuery Schemas | ✅ |
963
1032
  | `unity` | Import from Databricks Unity Catalog | partial |
964
- | `dbt` | Import from dbt models | TBD |
1033
+ | `dbt` | Import from dbt models | |
965
1034
  | `odcs` | Import from Open Data Contract Standard (ODCS) | ✅ |
966
- | Missing something? | Please create an issue on GitHub | TBD |
1035
+ | `spark` | Import from Spark StructTypes | |
1036
+ | `dbml` | Import from DBML models | ✅ |
1037
+ | `protobuf` | Import from Protobuf schemas | TBD |
1038
+ | Missing something? | Please create an issue on GitHub | TBD |
967
1039
 
968
1040
 
969
1041
  #### BigQuery
@@ -1005,6 +1077,23 @@ export DATABRICKS_IMPORT_ACCESS_TOKEN=<token>
1005
1077
  datacontract import --format unity --unity-table-full-name <table_full_name>
1006
1078
  ```
1007
1079
 
1080
+ #### dbt
1081
+
1082
+ Importing from dbt manifest file.
1083
+ You may give the `dbt-model` parameter to enumerate the tables that should be imported. If no tables are given, _all_ available tables of the database will be imported.
1084
+
1085
+ Examples:
1086
+
1087
+ ```bash
1088
+ # Example import from dbt manifest with specifying the tables to import
1089
+ datacontract import --format dbt --source <manifest_path> --dbt-model <model_name_1> --dbt-model <model_name_2> --dbt-model <model_name_3>
1090
+ ```
1091
+
1092
+ ```bash
1093
+ # Example import from dbt manifest importing all tables in the database
1094
+ datacontract import --format dbt --source <manifest_path>
1095
+ ```
1096
+
1008
1097
  #### Glue
1009
1098
 
1010
1099
  Importing from Glue reads the necessary Data directly off of the AWS API.
@@ -1032,6 +1121,38 @@ Example:
1032
1121
  datacontract import --format spark --source "users,orders"
1033
1122
  ```
1034
1123
 
1124
+ #### DBML
1125
+
1126
+ Importing from DBML Documents.
1127
+ **NOTE:** Since DBML does _not_ have strict requirements on the types of columns, this import _may_ create non-valid datacontracts, as not all types of fields can be properly mapped. In this case you will have to adapt the generated document manually.
1128
+ We also assume, that the description for models and fields is stored in a Note within the DBML model.
1129
+
1130
+ You may give the `dbml-table` or `dbml-schema` parameter to enumerate the tables or schemas that should be imported.
1131
+ If no tables are given, _all_ available tables of the source will be imported. Likewise, if no schema is given, _all_ schemas are imported.
1132
+
1133
+ Examples:
1134
+
1135
+ ```bash
1136
+ # Example import from DBML file, importing everything
1137
+ datacontract import --format dbml --source <file_path>
1138
+ ```
1139
+
1140
+ ```bash
1141
+ # Example import from DBML file, filtering for tables from specific schemas
1142
+ datacontract import --format dbml --source <file_path> --dbml-schema <schema_1> --dbml-schema <schema_2>
1143
+ ```
1144
+
1145
+ ```bash
1146
+ # Example import from DBML file, filtering for tables with specific names
1147
+ datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-table <table_name_2>
1148
+ ```
1149
+
1150
+ ```bash
1151
+ # Example import from DBML file, filtering for tables with specific names from a specific schema
1152
+ datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-schema <schema_1>
1153
+ ```
1154
+
1155
+
1035
1156
  ### breaking
1036
1157
 
1037
1158
  ```
@@ -1304,7 +1425,7 @@ if __name__ == "__main__":
1304
1425
  data_contract = DataContract(
1305
1426
  data_contract_file="/path/datacontract.yaml"
1306
1427
  )
1307
- # call export
1428
+ # Call export
1308
1429
  result = data_contract.export(
1309
1430
  export_format="custom", model="orders", server="production", custom_arg="my_custom_arg"
1310
1431
  )
@@ -1330,10 +1451,11 @@ Output
1330
1451
  Using the importer factory to add a new custom importer
1331
1452
  ```python
1332
1453
 
1333
- from datacontract.model.data_contract_specification import DataContractSpecification
1454
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
1334
1455
  from datacontract.data_contract import DataContract
1335
1456
  from datacontract.imports.importer import Importer
1336
1457
  from datacontract.imports.importer_factory import importer_factory
1458
+
1337
1459
  import json
1338
1460
 
1339
1461
  # Create a custom class that implements import_source method
@@ -1344,43 +1466,89 @@ class CustomImporter(Importer):
1344
1466
  source_dict = json.loads(source)
1345
1467
  data_contract_specification.id = source_dict.get("id_custom")
1346
1468
  data_contract_specification.info.title = source_dict.get("title")
1469
+ data_contract_specification.info.version = source_dict.get("version")
1347
1470
  data_contract_specification.info.description = source_dict.get("description_from_app")
1348
-
1471
+
1472
+ for model in source_dict.get("models", []):
1473
+ fields = {}
1474
+ for column in model.get('columns'):
1475
+ field = Field(
1476
+ description=column.get('column_description'),
1477
+ type=column.get('type')
1478
+ )
1479
+ fields[column.get('name')] = field
1480
+
1481
+ dc_model = Model(
1482
+ description=model.get('description'),
1483
+ fields= fields
1484
+ )
1485
+
1486
+ data_contract_specification.models[model.get('name')] = dc_model
1349
1487
  return data_contract_specification
1350
-
1488
+
1351
1489
 
1352
1490
  # Register the new custom class into factory
1353
1491
  importer_factory.register_importer("custom_company_importer", CustomImporter)
1354
1492
 
1355
1493
 
1356
1494
  if __name__ == "__main__":
1357
- # get a custom da
1358
- json_from_custom_app = '{"id_custom":"uuid-custom","version":"0.0.2", "title":"my_custom_imported_data", "description_from_app": "Custom contract description"}'
1495
+ # Get a custom data from other app
1496
+ json_from_custom_app = '''
1497
+ {
1498
+ "id_custom": "uuid-custom",
1499
+ "version": "0.0.2",
1500
+ "title": "my_custom_imported_data",
1501
+ "description_from_app": "Custom contract description",
1502
+ "models": [
1503
+ {
1504
+ "name": "model1",
1505
+ "description": "model description from app",
1506
+ "columns": [
1507
+ {
1508
+ "name": "columnA",
1509
+ "type": "varchar",
1510
+ "column_description": "my_column description"
1511
+ },
1512
+ {
1513
+ "name": "columnB",
1514
+ "type": "varchar",
1515
+ "column_description": "my_columnB description"
1516
+ }
1517
+ ]
1518
+ }
1519
+ ]
1520
+ }
1521
+ '''
1359
1522
  # Create a DataContract instance
1360
1523
  data_contract = DataContract()
1361
1524
 
1362
- # call import_from
1525
+ # Call import_from_source
1363
1526
  result = data_contract.import_from_source(
1364
- format="custom_company_importer", data_contract_specification=DataContract.init(), source=json_from_custom_app
1365
- )
1366
- print(dict(result))
1367
-
1527
+ format="custom_company_importer",
1528
+ data_contract_specification=DataContract.init(),
1529
+ source=json_from_custom_app
1530
+ )
1531
+ print(result.to_yaml() )
1368
1532
  ```
1369
1533
  Output
1534
+
1535
+ ```yaml
1536
+ dataContractSpecification: 0.9.3
1537
+ id: uuid-custom
1538
+ info:
1539
+ title: my_custom_imported_data
1540
+ version: 0.0.2
1541
+ description: Custom contract description
1542
+ models:
1543
+ model1:
1544
+ fields:
1545
+ columnA:
1546
+ type: varchar
1547
+ description: my_column description
1548
+ columnB:
1549
+ type: varchar
1550
+ description: my_columnB description
1370
1551
 
1371
- ```python
1372
- {
1373
- 'dataContractSpecification': '0.9.3',
1374
- 'id': 'uuid-custom',
1375
- 'info': Info(title='my_custom_imported_data', version='0.0.1', status=None, description='Custom contract description', owner=None, contact=None),
1376
- 'servers': {},
1377
- 'terms': None,
1378
- 'models': {},
1379
- 'definitions': {},
1380
- 'examples': [],
1381
- 'quality': None,
1382
- 'servicelevels': None
1383
- }
1384
1552
  ```
1385
1553
  ## Development Setup
1386
1554
 
@@ -1469,6 +1637,7 @@ We are happy to receive your contributions. Propose your change in an issue or d
1469
1637
  ## Companies using this tool
1470
1638
 
1471
1639
  - [INNOQ](https://innoq.com)
1640
+ - [Data Catering](https://data.catering/)
1472
1641
  - And many more. To add your company, please create a pull request.
1473
1642
 
1474
1643
  ## Related Tools
@@ -1476,6 +1645,7 @@ We are happy to receive your contributions. Propose your change in an issue or d
1476
1645
  - [Data Contract Manager](https://www.datacontract-manager.com/) is a commercial tool to manage data contracts. It contains a web UI, access management, and data governance for a full enterprise data marketplace.
1477
1646
  - [Data Contract GPT](https://gpt.datacontract.com) is a custom GPT that can help you write data contracts.
1478
1647
  - [Data Contract Editor](https://editor.datacontract.com) is an editor for Data Contracts, including a live html preview.
1648
+ - [Data Contract Playground](https://data-catering.github.io/data-contract-playground/) allows you to validate and export your data contract to different formats within your browser.
1479
1649
 
1480
1650
  ## License
1481
1651