datacontract-cli 0.10.12__py3-none-any.whl → 0.10.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (37) hide show
  1. datacontract/cli.py +5 -0
  2. datacontract/data_contract.py +9 -1
  3. datacontract/engines/soda/connections/kafka.py +28 -6
  4. datacontract/export/avro_converter.py +8 -1
  5. datacontract/export/avro_idl_converter.py +1 -0
  6. datacontract/export/bigquery_converter.py +30 -23
  7. datacontract/export/data_caterer_converter.py +148 -0
  8. datacontract/export/dcs_exporter.py +6 -0
  9. datacontract/export/exporter.py +5 -1
  10. datacontract/export/exporter_factory.py +19 -1
  11. datacontract/export/jsonschema_converter.py +13 -2
  12. datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +4 -4
  13. datacontract/export/odcs_v3_exporter.py +294 -0
  14. datacontract/export/sodacl_converter.py +82 -2
  15. datacontract/export/spark_converter.py +3 -1
  16. datacontract/export/sql_type_converter.py +56 -21
  17. datacontract/imports/iceberg_importer.py +162 -0
  18. datacontract/imports/importer.py +1 -0
  19. datacontract/imports/importer_factory.py +5 -0
  20. datacontract/imports/odcs_importer.py +25 -168
  21. datacontract/imports/odcs_v2_importer.py +177 -0
  22. datacontract/imports/odcs_v3_importer.py +309 -0
  23. datacontract/imports/spark_importer.py +5 -1
  24. datacontract/imports/unity_importer.py +105 -84
  25. datacontract/integration/datamesh_manager.py +1 -1
  26. datacontract/lint/resolve.py +24 -10
  27. datacontract/lint/resources.py +21 -0
  28. datacontract/lint/urls.py +29 -13
  29. datacontract/model/data_contract_specification.py +72 -8
  30. datacontract/model/odcs.py +11 -0
  31. {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/METADATA +106 -52
  32. {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/RECORD +36 -29
  33. {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/WHEEL +1 -1
  34. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  35. {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/LICENSE +0 -0
  36. {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/entry_points.txt +0 -0
  37. {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,21 @@
1
+ from datacontract.lint.files import read_file
2
+ from datacontract.lint.urls import fetch_resource
3
+
4
+
5
+ def read_resource(location: str) -> str:
6
+ """
7
+ Read a resource from a given location.
8
+
9
+ If the location is a URL, fetch the resource from the web. API-Keys are supported.
10
+ Otherwise, read the resource from a local file.
11
+
12
+ Args:
13
+ location (str): The location of the resource, either a URL or a file path.
14
+
15
+ Returns:
16
+ str: The content of the resource.
17
+ """
18
+ if location.startswith("http://") or location.startswith("https://"):
19
+ return fetch_resource(location)
20
+ else:
21
+ return read_file(location)
datacontract/lint/urls.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import os
2
+ from urllib.parse import urlparse
2
3
 
3
4
  import requests
4
5
 
@@ -25,16 +26,31 @@ def fetch_resource(url: str):
25
26
 
26
27
 
27
28
  def _set_api_key(headers, url):
28
- if ".datamesh-manager.com/" not in url:
29
- return
30
- datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
31
- if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
32
- print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
33
- raise DataContractException(
34
- type="lint",
35
- name=f"Reading data contract from {url}",
36
- reason="Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
37
- engine="datacontract",
38
- result="error",
39
- )
40
- headers["x-api-key"] = datamesh_manager_api_key
29
+ hostname = urlparse(url).hostname
30
+ if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
31
+ datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
32
+ if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
33
+ print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
34
+ raise DataContractException(
35
+ type="lint",
36
+ name=f"Reading data contract from {url}",
37
+ reason="Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
38
+ engine="datacontract",
39
+ result="error",
40
+ )
41
+ headers["x-api-key"] = datamesh_manager_api_key
42
+ elif hostname == "datacontract-manager.com" or hostname.endswith(".datacontract-manager.com"):
43
+ datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
44
+ if datacontract_manager_api_key is None or datacontract_manager_api_key == "":
45
+ print("Error: Data Contract Manager API Key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")
46
+ raise DataContractException(
47
+ type="lint",
48
+ name=f"Reading data contract from {url}",
49
+ reason="Error: Data Contract Manager API Key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.",
50
+ engine="datacontract",
51
+ result="error",
52
+ )
53
+ headers["x-api-key"] = datacontract_manager_api_key
54
+ else:
55
+ # do nothing
56
+ pass
@@ -4,6 +4,32 @@ from typing import List, Dict, Optional, Any
4
4
  import pydantic as pyd
5
5
  import yaml
6
6
 
7
+ DATACONTRACT_TYPES = [
8
+ "string",
9
+ "text",
10
+ "varchar",
11
+ "number",
12
+ "decimal",
13
+ "numeric",
14
+ "int",
15
+ "integer",
16
+ "long",
17
+ "bigint",
18
+ "float",
19
+ "double",
20
+ "boolean",
21
+ "timestamp",
22
+ "timestamp_tz",
23
+ "timestamp_ntz",
24
+ "date",
25
+ "array",
26
+ "bytes",
27
+ "object",
28
+ "record",
29
+ "struct",
30
+ "null",
31
+ ]
32
+
7
33
 
8
34
  class Contact(pyd.BaseModel):
9
35
  name: str = None
@@ -15,6 +41,14 @@ class Contact(pyd.BaseModel):
15
41
  )
16
42
 
17
43
 
44
+ class ServerRole(pyd.BaseModel):
45
+ name: str = None
46
+ description: str = None
47
+ model_config = pyd.ConfigDict(
48
+ extra="allow",
49
+ )
50
+
51
+
18
52
  class Server(pyd.BaseModel):
19
53
  type: str = None
20
54
  description: str = None
@@ -38,6 +72,7 @@ class Server(pyd.BaseModel):
38
72
  dataProductId: str = None
39
73
  outputPortId: str = None
40
74
  driver: str = None
75
+ roles: List[ServerRole] = None
41
76
 
42
77
  model_config = pyd.ConfigDict(
43
78
  extra="allow",
@@ -83,19 +118,40 @@ class Definition(pyd.BaseModel):
83
118
  )
84
119
 
85
120
 
121
+ class Quality(pyd.BaseModel):
122
+ type: str = None
123
+ description: str = None
124
+ query: str = None
125
+ dialect: str = None
126
+ mustBe: int = None
127
+ mustNotBe: int = None
128
+ mustBeGreaterThan: int = None
129
+ mustBeGreaterThanOrEqualTo: int = None
130
+ mustBeLessThan: int = None
131
+ mustBeLessThanOrEqualTo: int = None
132
+ mustBeBetween: List[int] = None
133
+ mustNotBeBetween: List[int] = None
134
+ engine: str = None
135
+ implementation: str | Dict[str, Any] = None
136
+
137
+ model_config = pyd.ConfigDict(
138
+ extra="allow",
139
+ )
140
+
141
+
86
142
  class Field(pyd.BaseModel):
87
143
  ref: str = pyd.Field(default=None, alias="$ref")
88
144
  ref_obj: Definition = pyd.Field(default=None, exclude=True)
89
- title: str = None
145
+ title: str | None = None
90
146
  type: str = None
91
147
  format: str = None
92
148
  required: bool = None
93
149
  primary: bool = None
94
- unique: bool = None
150
+ unique: bool | None = None
95
151
  references: str = None
96
- description: str = None
97
- pii: bool = None
98
- classification: str = None
152
+ description: str | None = None
153
+ pii: bool | None = None
154
+ classification: str | None = None
99
155
  pattern: str = None
100
156
  minLength: int = None
101
157
  maxLength: int = None
@@ -103,8 +159,8 @@ class Field(pyd.BaseModel):
103
159
  exclusiveMinimum: int = None
104
160
  maximum: int = None
105
161
  exclusiveMaximum: int = None
106
- enum: List[str] = []
107
- tags: List[str] = []
162
+ enum: List[str] | None = []
163
+ tags: List[str] | None = []
108
164
  links: Dict[str, str] = {}
109
165
  fields: Dict[str, "Field"] = {}
110
166
  items: "Field" = None
@@ -113,7 +169,9 @@ class Field(pyd.BaseModel):
113
169
  precision: int = None
114
170
  scale: int = None
115
171
  example: str = None
116
- config: Dict[str, Any] = None
172
+ examples: List[Any] | None = None
173
+ quality: List[Quality] | None = []
174
+ config: Dict[str, Any] | None = None
117
175
 
118
176
  model_config = pyd.ConfigDict(
119
177
  extra="allow",
@@ -126,7 +184,13 @@ class Model(pyd.BaseModel):
126
184
  namespace: Optional[str] = None
127
185
  title: Optional[str] = None
128
186
  fields: Dict[str, Field] = {}
187
+ quality: List[Quality] | None = []
129
188
  config: Dict[str, Any] = None
189
+ tags: List[str] | None = None
190
+
191
+ model_config = pyd.ConfigDict(
192
+ extra="allow",
193
+ )
130
194
 
131
195
 
132
196
  class Info(pyd.BaseModel):
@@ -0,0 +1,11 @@
1
+ def is_open_data_contract_standard(odcs: dict) -> bool:
2
+ """
3
+ Check if the given dictionary is an OpenDataContractStandard.
4
+
5
+ Args:
6
+ odcs (dict): The dictionary to check.
7
+
8
+ Returns:
9
+ bool: True if the dictionary is an OpenDataContractStandard, False otherwise.
10
+ """
11
+ return odcs.get("kind") == "DataContract" and odcs.get("apiVersion", "").startswith("v3")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datacontract-cli
3
- Version: 0.10.12
3
+ Version: 0.10.14
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
@@ -12,34 +12,35 @@ Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: typer<0.13,>=0.12
15
- Requires-Dist: pydantic<2.9.0,>=2.8.2
15
+ Requires-Dist: pydantic<2.10.0,>=2.8.2
16
16
  Requires-Dist: pyyaml~=6.0.1
17
17
  Requires-Dist: requests<2.33,>=2.31
18
- Requires-Dist: fastapi==0.112.0
19
- Requires-Dist: uvicorn==0.30.5
18
+ Requires-Dist: fastapi==0.115.2
19
+ Requires-Dist: uvicorn==0.30.6
20
20
  Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
21
21
  Requires-Dist: fastparquet==2024.5.0
22
- Requires-Dist: python-multipart==0.0.9
23
- Requires-Dist: rich~=13.7.0
24
- Requires-Dist: simple-ddl-parser==1.6.0
25
- Requires-Dist: duckdb==1.0.0
22
+ Requires-Dist: python-multipart==0.0.12
23
+ Requires-Dist: rich<13.10,>=13.7
24
+ Requires-Dist: simple-ddl-parser==1.7.1
25
+ Requires-Dist: duckdb==1.1.2
26
26
  Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.1
27
27
  Requires-Dist: setuptools>=60
28
28
  Requires-Dist: python-dotenv~=1.0.0
29
29
  Requires-Dist: rdflib==7.0.0
30
30
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
31
31
  Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
32
- Requires-Dist: boto3<1.35.6,>=1.34.41
32
+ Requires-Dist: boto3<1.35.45,>=1.34.41
33
33
  Requires-Dist: jinja-partials>=0.2.1
34
34
  Provides-Extra: all
35
- Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
35
+ Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,iceberg,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
36
36
  Provides-Extra: avro
37
37
  Requires-Dist: avro==1.12.0; extra == "avro"
38
38
  Provides-Extra: bigquery
39
39
  Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.1; extra == "bigquery"
40
40
  Provides-Extra: databricks
41
41
  Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "databricks"
42
- Requires-Dist: databricks-sql-connector<3.4.0,>=3.1.2; extra == "databricks"
42
+ Requires-Dist: databricks-sql-connector<3.6.0,>=3.1.2; extra == "databricks"
43
+ Requires-Dist: databricks-sdk<0.36.0,>=0.32.0; extra == "databricks"
43
44
  Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.1; extra == "databricks"
44
45
  Provides-Extra: dbml
45
46
  Requires-Dist: pydbml>=1.1.1; extra == "dbml"
@@ -49,7 +50,7 @@ Provides-Extra: dev
49
50
  Requires-Dist: datacontract-cli[all]; extra == "dev"
50
51
  Requires-Dist: httpx==0.27.2; extra == "dev"
51
52
  Requires-Dist: kafka-python; extra == "dev"
52
- Requires-Dist: moto==5.0.13; extra == "dev"
53
+ Requires-Dist: moto==5.0.18; extra == "dev"
53
54
  Requires-Dist: pandas>=2.1.0; extra == "dev"
54
55
  Requires-Dist: pre-commit<3.9.0,>=3.7.1; extra == "dev"
55
56
  Requires-Dist: pyarrow>=12.0.0; extra == "dev"
@@ -57,15 +58,17 @@ Requires-Dist: pytest; extra == "dev"
57
58
  Requires-Dist: pytest-xdist; extra == "dev"
58
59
  Requires-Dist: pymssql==2.3.1; extra == "dev"
59
60
  Requires-Dist: ruff; extra == "dev"
60
- Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.8.1; extra == "dev"
61
- Requires-Dist: trino==0.329.0; extra == "dev"
61
+ Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.8.2; extra == "dev"
62
+ Requires-Dist: trino==0.330.0; extra == "dev"
63
+ Provides-Extra: iceberg
64
+ Requires-Dist: pyiceberg==0.7.1; extra == "iceberg"
62
65
  Provides-Extra: kafka
63
66
  Requires-Dist: datacontract-cli[avro]; extra == "kafka"
64
67
  Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
65
68
  Provides-Extra: postgres
66
69
  Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
67
70
  Provides-Extra: s3
68
- Requires-Dist: s3fs==2024.6.1; extra == "s3"
71
+ Requires-Dist: s3fs==2024.9.0; extra == "s3"
69
72
  Provides-Extra: snowflake
70
73
  Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
71
74
  Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
@@ -84,8 +87,8 @@ Requires-Dist: soda-core-trino<3.4.0,>=3.3.1; extra == "trino"
84
87
  <a href="https://datacontract.com/slack" rel="nofollow"><img src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" style="max-width: 100%;"></a>
85
88
  </p>
86
89
 
87
- The `datacontract` CLI is an open source command-line tool for working with [Data Contracts](https://datacontract.com/).
88
- It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
90
+ The `datacontract` CLI is an open-source command-line tool for working with data contracts.
91
+ It uses data contract YAML files as [Data Contract Specification](https://datacontract.com/) or [ODCS](https://bitol-io.github.io/open-data-contract-standard/latest/) to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
89
92
 
90
93
  ![Main features of the Data Contract CLI](datacontractcli.png)
91
94
 
@@ -97,15 +100,15 @@ Let's look at this data contract:
97
100
 
98
101
  We have a _servers_ section with endpoint details to the S3 bucket, _models_ for the structure of the data, _servicelevels_ and _quality_ attributes that describe the expected freshness and number of rows.
99
102
 
100
- This data contract contains all information to connect to S3 and check that the actual data meets the defined schema and quality requirements. We can use this information to test if the actual data set in S3 is compliant to the data contract.
103
+ This data contract contains all information to connect to S3 and check that the actual data meets the defined schema and quality requirements. We can use this information to test if the actual data product in S3 is compliant to the data contract.
101
104
 
102
- Let's use [pip](https://pip.pypa.io/en/stable/getting-started/) to install the CLI (or use the [Docker image](#docker), if you prefer).
105
+ Let's use [pip](https://pip.pypa.io/en/stable/getting-started/) to install the CLI (or use the [Docker image](#docker)),
103
106
  ```bash
104
107
  $ python3 -m pip install datacontract-cli[all]
105
108
  ```
106
109
 
107
110
 
108
- We run the tests:
111
+ now, let's run the tests:
109
112
 
110
113
  ```bash
111
114
  $ datacontract test https://datacontract.com/examples/orders-latest/datacontract.yaml
@@ -143,7 +146,7 @@ Testing https://datacontract.com/examples/orders-latest/datacontract.yaml
143
146
 
144
147
  Voilà, the CLI tested that the _datacontract.yaml_ itself is valid, all records comply with the schema, and all quality attributes are met.
145
148
 
146
- We can also use the datacontract.yaml to export in many [formats](#format), e.g., to SQL:
149
+ We can also use the datacontract.yaml to export in many [formats](#format), e.g., to generate a SQL DDL:
147
150
 
148
151
  ```bash
149
152
  $ datacontract export --format sql https://datacontract.com/examples/orders-latest/datacontract.yaml
@@ -190,7 +193,7 @@ $ datacontract test datacontract.yaml
190
193
  # execute schema and quality checks on the examples within the contract
191
194
  $ datacontract test --examples datacontract.yaml
192
195
 
193
- # export data contract as html (other formats: avro, dbt, dbt-sources, dbt-staging-sql, jsonschema, odcs, rdf, sql, sodacl, terraform, ...)
196
+ # export data contract as html (other formats: avro, dbt, dbt-sources, dbt-staging-sql, jsonschema, odcs_v2, odcs_v3, rdf, sql, sodacl, terraform, ...)
194
197
  $ datacontract export --format html datacontract.yaml > datacontract.html
195
198
 
196
199
  # import avro (other formats: sql, glue, bigquery...)
@@ -223,8 +226,7 @@ if not run.has_passed():
223
226
  Choose the most appropriate installation method for your needs:
224
227
 
225
228
  ### pip
226
- Python 3.11 recommended.
227
- Python 3.12 available as pre-release release candidate for 0.9.3
229
+ Python 3.10, 3.11, and 3.12 are supported. We recommend to use Python 3.11.
228
230
 
229
231
  ```bash
230
232
  python3 -m pip install datacontract-cli[all]
@@ -238,17 +240,22 @@ pipx install datacontract-cli[all]
238
240
 
239
241
  ### Docker
240
242
 
243
+ You can also use our Docker image to run the CLI tool. It is also convenient for CI/CD pipelines.
244
+
241
245
  ```bash
242
246
  docker pull datacontract/cli
243
247
  docker run --rm -v ${PWD}:/home/datacontract datacontract/cli
244
248
  ```
245
249
 
246
- Or via an alias that automatically uses the latest version:
250
+ You can create an alias for the Docker command to make it easier to use:
247
251
 
248
252
  ```bash
249
253
  alias datacontract='docker run --rm -v "${PWD}:/home/datacontract" datacontract/cli:latest'
250
254
  ```
251
255
 
256
+ _Note:_ The output of Docker command line messages is limited to 80 columns and may include line breaks. Don't pipe docker output to files if you want to export code. Use the `--output` option instead.
257
+
258
+
252
259
 
253
260
  ## Optional Dependencies
254
261
 
@@ -741,10 +748,11 @@ servers:
741
748
 
742
749
  #### Environment Variables
743
750
 
744
- | Environment Variable | Example | Description |
745
- |------------------------------------|---------|-----------------------------|
746
- | `DATACONTRACT_KAFKA_SASL_USERNAME` | `xxx` | The SASL username (key). |
747
- | `DATACONTRACT_KAFKA_SASL_PASSWORD` | `xxx` | The SASL password (secret). |
751
+ | Environment Variable | Example | Description |
752
+ |-------------------------------------|---------|----------------------------------------------------------------------------------|
753
+ | `DATACONTRACT_KAFKA_SASL_USERNAME` | `xxx` | The SASL username (key). |
754
+ | `DATACONTRACT_KAFKA_SASL_PASSWORD` | `xxx` | The SASL password (secret). |
755
+ | `DATACONTRACT_KAFKA_SASL_MECHANISM` | `PLAIN` | Default `PLAIN`. Other supported mechanisms: `SCRAM-SHA-256` and `SCRAM-SHA-512` |
748
756
 
749
757
 
750
758
  ### Postgres
@@ -799,6 +807,10 @@ models:
799
807
  fields:
800
808
  my_column_1: # corresponds to a column
801
809
  type: varchar
810
+ my_column_2: # corresponds to a column with custom trino type
811
+ type: object
812
+ config:
813
+ trinoType: row(en_us varchar, pt_br varchar)
802
814
  ```
803
815
 
804
816
  #### Environment Variables
@@ -825,7 +837,7 @@ models:
825
837
  │ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
826
838
  │ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati │
827
839
  │ ons|terraform|avro-idl|sql|sql-query|html|go|bigquer │
828
- │ y|dbml|spark|sqlalchemy]
840
+ │ y|dbml|spark|sqlalchemy|data-caterer|dcs]
829
841
  │ --output PATH Specify the file path where the exported data will be │
830
842
  │ saved. If no path is provided, the output will be │
831
843
  │ printed to stdout. │
@@ -857,26 +869,30 @@ Available export options:
857
869
 
858
870
  | Type | Description | Status |
859
871
  |----------------------|---------------------------------------------------------|--------|
860
- | `html` | Export to HTML | ✅ |
861
- | `jsonschema` | Export to JSON Schema | ✅ |
862
- | `odcs` | Export to Open Data Contract Standard (ODCS) | ✅ |
863
- | `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
864
- | `dbt` | Export to dbt models in YAML format | ✅ |
865
- | `dbt-sources` | Export to dbt sources in YAML format | ✅ |
866
- | `dbt-staging-sql` | Export to dbt staging SQL models | ✅ |
867
- | `rdf` | Export data contract to RDF representation in N3 format | ✅ |
868
- | `avro` | Export to AVRO models | ✅ |
869
- | `protobuf` | Export to Protobuf | ✅ |
870
- | `terraform` | Export to terraform resources | ✅ |
871
- | `sql` | Export to SQL DDL | ✅ |
872
- | `sql-query` | Export to SQL Query | ✅ |
873
- | `great-expectations` | Export to Great Expectations Suites in JSON Format | ✅ |
874
- | `bigquery` | Export to BigQuery Schemas | ✅ |
875
- | `go` | Export to Go types | ✅ |
876
- | `pydantic-model` | Export to pydantic models | ✅ |
877
- | `DBML` | Export to a DBML Diagram description | ✅ |
878
- | `spark` | Export to a Spark StructType | ✅ |
879
- | `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
872
+ | `html` | Export to HTML | ✅ |
873
+ | `jsonschema` | Export to JSON Schema | ✅ |
874
+ | `odcs_v2` | Export to Open Data Contract Standard (ODCS) V2 | ✅ |
875
+ | `odcs_v3` | Export to Open Data Contract Standard (ODCS) V3 | ✅ |
876
+ | `odcs` | Export to Open Data Contract Standard (ODCS) V3 | ✅ |
877
+ | `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
878
+ | `dbt` | Export to dbt models in YAML format | ✅ |
879
+ | `dbt-sources` | Export to dbt sources in YAML format | ✅ |
880
+ | `dbt-staging-sql` | Export to dbt staging SQL models | ✅ |
881
+ | `rdf` | Export data contract to RDF representation in N3 format | ✅ |
882
+ | `avro` | Export to AVRO models | ✅ |
883
+ | `protobuf` | Export to Protobuf | ✅ |
884
+ | `terraform` | Export to terraform resources | ✅ |
885
+ | `sql` | Export to SQL DDL | ✅ |
886
+ | `sql-query` | Export to SQL Query | ✅ |
887
+ | `great-expectations` | Export to Great Expectations Suites in JSON Format | ✅ |
888
+ | `bigquery` | Export to BigQuery Schemas | ✅ |
889
+ | `go` | Export to Go types | ✅ |
890
+ | `pydantic-model` | Export to pydantic models | ✅ |
891
+ | `DBML` | Export to a DBML Diagram description | ✅ |
892
+ | `spark` | Export to a Spark StructType | ✅ |
893
+ | `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
894
+ | `data-caterer` | Export to Data Caterer in YAML format | ✅ |
895
+ | `dcs` | Export to Data Contract Specification in YAML format | ✅ |
880
896
  | Missing something? | Please create an issue on GitHub | TBD |
881
897
 
882
898
  #### Great Expectations
@@ -940,6 +956,20 @@ To specify custom Avro properties in your data contract, you can define them wit
940
956
 
941
957
  >NOTE: At this moment, we just support [logicalType](https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types) and [default](https://avro.apache.org/docs/1.11.0/spec.htm)
942
958
 
959
+ #### Data Caterer
960
+
961
+ The export function converts the data contract to a data generation task in YAML format that can be
962
+ ingested by [Data Caterer](https://github.com/data-catering/data-caterer). This gives you the
963
+ ability to generate production-like data in any environment based off your data contract.
964
+
965
+ ```shell
966
+ datacontract export datacontract.yaml --format data-caterer --model orders
967
+ ```
968
+
969
+ You can further customise the way data is generated via adding
970
+ [additional metadata in the YAML](https://data.catering/setup/generator/data-generator/)
971
+ to suit your needs.
972
+
943
973
  #### Example Configuration
944
974
 
945
975
  ```yaml
@@ -980,7 +1010,7 @@ models:
980
1010
 
981
1011
  ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
982
1012
  │ * --format [sql|avro|dbt|glue|jsonschema|bigquery|odcs The format of the source file. │
983
- │ |unity|spark] [default: None] │
1013
+ │ |unity|spark|iceberg] [default: None] │
984
1014
  │ [required] │
985
1015
  │ --source TEXT The path to the file or Glue Database that │
986
1016
  │ should be imported. │
@@ -1010,6 +1040,8 @@ models:
1010
1040
  │ file (repeat for multiple table names, leave │
1011
1041
  │ empty for all tables in the file). │
1012
1042
  │ [default: None] │
1043
+ │ --iceberg-table TEXT Table name to assign to the model created │
1044
+ │ from the Iceberg schema. [default: None] │
1013
1045
  │ --help Show this message and exit. │
1014
1046
  ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1015
1047
  ```
@@ -1035,12 +1067,25 @@ Available import options:
1035
1067
  | `spark` | Import from Spark StructTypes | ✅ |
1036
1068
  | `dbml` | Import from DBML models | ✅ |
1037
1069
  | `protobuf` | Import from Protobuf schemas | TBD |
1070
+ | `iceberg` | Import from an Iceberg JSON Schema Definition | partial |
1038
1071
  | Missing something? | Please create an issue on GitHub | TBD |
1039
1072
 
1040
1073
 
1074
+ #### ODCS
1075
+
1076
+ Import from Open Data Contract Standard (ODCS) v2 or v3.
1077
+ The importer automatically detects the ODCS version and imports the data contract.
1078
+
1079
+ Examples:
1080
+
1081
+ ```bash
1082
+ # Example import from ODCS
1083
+ datacontract import --format odcs --source my_data_contract.odcs.yaml
1084
+ ```
1085
+
1041
1086
  #### BigQuery
1042
1087
 
1043
- Bigquery data can either be imported off of JSON Files generated from the table descriptions or directly from the Bigquery API. In case you want to use JSON Files, specify the `source` parameter with a path to the JSON File.
1088
+ BigQuery data can either be imported off of JSON Files generated from the table descriptions or directly from the Bigquery API. In case you want to use JSON Files, specify the `source` parameter with a path to the JSON File.
1044
1089
 
1045
1090
  To import from the Bigquery API, you have to _omit_ `source` and instead need to provide `bigquery-project` and `bigquery-dataset`. Additionally you may specify `bigquery-table` to enumerate the tables that should be imported. If no tables are given, _all_ available tables of the dataset will be imported.
1046
1091
 
@@ -1152,6 +1197,15 @@ datacontract import --format dbml --source <file_path> --dbml-table <table_name_
1152
1197
  datacontract import --format dbml --source <file_path> --dbml-table <table_name_1> --dbml-schema <schema_1>
1153
1198
  ```
1154
1199
 
1200
+ #### Iceberg
1201
+
1202
+ Importing from an [Iceberg Table Json Schema Definition](https://iceberg.apache.org/spec/#appendix-c-json-serialization). Specify location of json files using the `source` parameter.
1203
+
1204
+ Examples:
1205
+
1206
+ ```bash
1207
+ datacontract import --format iceberg --source ./tests/fixtures/iceberg/simple_schema.json --iceberg-table test-table
1208
+ ```
1155
1209
 
1156
1210
  ### breaking
1157
1211