datacontract-cli 0.9.3__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

datacontract/cli.py CHANGED
@@ -87,14 +87,16 @@ def test(
87
87
  "Use the key of the server object in the data contract yaml file "
88
88
  "to refer to a server, e.g., `production`, or `all` for all "
89
89
  "servers (default).")] = "all",
90
+ examples: Annotated[bool, typer.Option(
91
+ help="Run the schema and quality tests on the example data within the data contract.")] = None,
90
92
  publish: Annotated[str, typer.Option(
91
- help="")] = None,
93
+ help="The url to publish the results after the test")] = None,
92
94
  ):
93
95
  """
94
96
  Run schema and quality tests on configured servers.
95
97
  """
96
98
  print(f"Testing {location}")
97
- run = DataContract(data_contract_file=location, publish_url=publish).test()
99
+ run = DataContract(data_contract_file=location, publish_url=publish, examples=examples).test()
98
100
  _handle_result(run)
99
101
 
100
102
 
@@ -1,5 +1,9 @@
1
1
  import json
2
2
  import logging
3
+ import tempfile
4
+ from typing import List
5
+
6
+ import yaml
3
7
 
4
8
  from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import \
5
9
  check_that_datacontract_contains_valid_server_configuration
@@ -13,7 +17,7 @@ from datacontract.integration.publish_datamesh_manager import \
13
17
  from datacontract.lint import resolve
14
18
  from datacontract.lint.linters.example_model_linter import ExampleModelLinter
15
19
  from datacontract.model.data_contract_specification import \
16
- DataContractSpecification
20
+ DataContractSpecification, Server
17
21
  from datacontract.model.exceptions import DataContractException
18
22
  from datacontract.model.run import \
19
23
  Run, Check
@@ -26,13 +30,17 @@ class DataContract:
26
30
  data_contract_str: str = None,
27
31
  data_contract: DataContractSpecification = None,
28
32
  server: str = None,
33
+ examples: bool = False,
29
34
  publish_url: str = None,
35
+ spark: str = None,
30
36
  ):
31
37
  self._data_contract_file = data_contract_file
32
38
  self._data_contract_str = data_contract_str
33
39
  self._data_contract = data_contract
34
40
  self._server = server
41
+ self._examples = examples
35
42
  self._publish_url = publish_url
43
+ self._spark = spark
36
44
 
37
45
  def lint(self):
38
46
  run = Run.create_run()
@@ -80,20 +88,27 @@ class DataContract:
80
88
 
81
89
  check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
82
90
  # TODO check yaml contains models
83
- server_name = list(data_contract.servers.keys())[0]
84
- server = data_contract.servers.get(server_name)
85
- run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
86
- run.dataContractId = data_contract.id
87
- run.dataContractVersion = data_contract.info.version
88
- run.dataProductId = server.dataProductId
89
- run.outputPortId = server.outputPortId
90
- run.server = server_name
91
91
 
92
- # 5. check server is supported type
93
- # 6. check server credentials are complete
94
- if server.format == "json":
95
- check_jsonschema(run, data_contract, server)
96
- check_soda_execute(run, data_contract, server)
92
+ with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
93
+ if self._examples:
94
+ server_name = "examples"
95
+ server = self._get_examples_server(data_contract, run, tmp_dir)
96
+ else:
97
+ server_name = list(data_contract.servers.keys())[0]
98
+ server = data_contract.servers.get(server_name)
99
+
100
+ run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
101
+ run.dataContractId = data_contract.id
102
+ run.dataContractVersion = data_contract.info.version
103
+ run.dataProductId = server.dataProductId
104
+ run.outputPortId = server.outputPortId
105
+ run.server = server_name
106
+
107
+ # 5. check server is supported type
108
+ # 6. check server credentials are complete
109
+ if server.format == "json":
110
+ check_jsonschema(run, data_contract, server)
111
+ check_soda_execute(run, data_contract, server, self._spark)
97
112
 
98
113
  except DataContractException as e:
99
114
  run.checks.append(Check(
@@ -123,6 +138,7 @@ class DataContract:
123
138
 
124
139
  return run
125
140
 
141
+
126
142
  def diff(self, other):
127
143
  pass
128
144
 
@@ -138,3 +154,35 @@ class DataContract:
138
154
  else:
139
155
  print(f"Export format {export_format} not supported.")
140
156
  return ""
157
+
158
+ def _get_examples_server(self, data_contract, run, tmp_dir):
159
+ run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
160
+ format = "json"
161
+ for example in data_contract.examples:
162
+ format = example.type
163
+ p = f"{tmp_dir}/{example.model}.{format}"
164
+ run.log_info(f"Creating example file {p}")
165
+ with open(p, "w") as f:
166
+ content = ""
167
+ if format == "json" and type(example.data) is list:
168
+ content = json.dumps(example.data)
169
+ elif format == "json" and type(example.data) is str:
170
+ content = example.data
171
+ elif format == "yaml" and type(example.data) is list:
172
+ content = yaml.dump(example.data)
173
+ elif format == "yaml" and type(example.data) is str:
174
+ content = example.data
175
+ elif format == "csv":
176
+ content = example.data
177
+ logging.debug(f"Content of example file {p}: {content}")
178
+ f.write(content)
179
+ path = f"{tmp_dir}" + "/{model}." + format
180
+ delimiter = "array"
181
+ server = Server(
182
+ type="local",
183
+ path=path,
184
+ format=format,
185
+ delimiter=delimiter,
186
+ )
187
+ run.log_info(f"Using {server} for testing the examples")
188
+ return server
@@ -29,29 +29,40 @@ def validate_json_stream(model_name, validate, json_stream):
29
29
  )
30
30
 
31
31
 
32
- def read_json_lines(file_content: str):
32
+ def read_json_lines(file):
33
+ file_content = file.read()
33
34
  for line in file_content.splitlines():
34
35
  yield json.loads(line)
35
36
 
36
37
 
37
- def read_json_lines_from_file(file):
38
- for line in file:
38
+ def read_json_lines_content(file_content: str):
39
+ for line in file_content.splitlines():
39
40
  yield json.loads(line)
40
41
 
41
42
 
42
43
  def read_json_array(file):
43
- data = json.loads(file)
44
+ data = json.load(file)
45
+ for item in data:
46
+ yield item
47
+
48
+
49
+ def read_json_array_content(file_content: str):
50
+ data = json.loads(file_content)
44
51
  for item in data:
45
52
  yield item
46
53
 
47
54
 
48
55
  def read_json_file(file):
49
- yield json.loads(file)
56
+ yield json.load(file)
57
+
58
+
59
+ def read_json_file_content(file_content: str):
60
+ yield json.loads(file_content)
50
61
 
51
62
 
52
63
  def process_json_file(run, model_name, validate, file, delimiter):
53
64
  if delimiter == "new_line":
54
- json_stream = read_json_lines_from_file(file)
65
+ json_stream = read_json_lines(file)
55
66
  elif delimiter == "array":
56
67
  json_stream = read_json_array(file)
57
68
  else:
@@ -62,11 +73,12 @@ def process_json_file(run, model_name, validate, file, delimiter):
62
73
  def process_local_file(run, server, model_name, validate):
63
74
  path = server.path
64
75
  if "{model}" in path:
65
- path = path.format(model = model_name)
76
+ path = path.format(model=model_name)
66
77
 
67
78
  if os.path.isdir(path):
68
79
  return process_directory(run, path, server, model_name, validate)
69
80
  else:
81
+ logging.info(f"Processing file {path}")
70
82
  with open(path, 'r') as file:
71
83
  process_json_file(run, model_name, validate, file, server.delimiter)
72
84
 
@@ -87,16 +99,16 @@ def process_s3_file(server, model_name, validate):
87
99
  s3_endpoint_url = server.endpointUrl
88
100
  s3_location = server.location
89
101
  if "{model}" in s3_location:
90
- s3_location = s3_location.format(model = model_name)
102
+ s3_location = s3_location.format(model=model_name)
91
103
  json_stream = None
92
104
 
93
105
  for file_content in yield_s3_files(s3_endpoint_url, s3_location):
94
106
  if server.delimiter == "new_line":
95
- json_stream = read_json_lines(file_content)
107
+ json_stream = read_json_lines_content(file_content)
96
108
  elif server.delimiter == "array":
97
- json_stream = read_json_array(file_content)
109
+ json_stream = read_json_array_content(file_content)
98
110
  else:
99
- json_stream = read_json_file(file_content)
111
+ json_stream = read_json_file_content(file_content)
100
112
 
101
113
  if json_stream is None:
102
114
  raise DataContractException(
@@ -9,7 +9,7 @@ def yield_s3_files(s3_endpoint_url, s3_location):
9
9
  files = fs.glob(s3_location)
10
10
  for file in files:
11
11
  with fs.open(file) as f:
12
- logging.info(f"Reading file {file}")
12
+ logging.info(f"Downloading file {file}")
13
13
  yield f.read()
14
14
 
15
15
 
@@ -2,7 +2,13 @@ import logging
2
2
 
3
3
  from soda.scan import Scan
4
4
 
5
+ from datacontract.engines.soda.connections.bigquery import \
6
+ to_bigquery_soda_configuration
7
+ from datacontract.engines.soda.connections.databricks import \
8
+ to_databricks_soda_configuration
5
9
  from datacontract.engines.soda.connections.duckdb import get_duckdb_connection
10
+ from datacontract.engines.soda.connections.postgres import \
11
+ to_postgres_soda_configuration
6
12
  from datacontract.engines.soda.connections.snowflake import \
7
13
  to_snowflake_soda_configuration
8
14
  from datacontract.export.sodacl_converter import to_sodacl
@@ -10,10 +16,9 @@ from datacontract.model.data_contract_specification import \
10
16
  DataContractSpecification, Server
11
17
  from datacontract.model.run import \
12
18
  Run, Check, Log
13
- from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
14
19
 
15
20
 
16
- def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server):
21
+ def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark):
17
22
  if data_contract is None:
18
23
  run.log_warn("Cannot run engine soda-core, as data contract is invalid")
19
24
  return
@@ -44,6 +49,20 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
44
49
  soda_configuration_str = to_bigquery_soda_configuration(server)
45
50
  scan.add_configuration_yaml_str(soda_configuration_str)
46
51
  scan.set_data_source_name(server.type)
52
+ elif server.type == "postgres":
53
+ soda_configuration_str = to_postgres_soda_configuration(server)
54
+ scan.add_configuration_yaml_str(soda_configuration_str)
55
+ scan.set_data_source_name(server.type)
56
+ elif server.type == "databricks":
57
+ if spark is not None:
58
+ logging.info("Use Spark to connect to data source")
59
+ scan.add_spark_session(spark, data_source_name=server.type)
60
+ scan.set_data_source_name(server.type)
61
+ spark.sql(f"USE {server.catalog}.{server.schema_}")
62
+ else:
63
+ soda_configuration_str = to_databricks_soda_configuration(server)
64
+ scan.add_configuration_yaml_str(soda_configuration_str)
65
+ scan.set_data_source_name(server.type)
47
66
  else:
48
67
  run.checks.append(Check(
49
68
  type="general",
@@ -0,0 +1,20 @@
1
+ import os
2
+
3
+ import yaml
4
+
5
+
6
+ def to_databricks_soda_configuration(server):
7
+ soda_configuration = {
8
+ f"data_source {server.type}": {
9
+ "type": "spark",
10
+ "method": "databricks",
11
+ "host": server.host,
12
+ "catalog": server.catalog,
13
+ "schema": server.schema_,
14
+ "http_path": os.getenv('DATACONTRACT_DATABRICKS_HTTP_PATH'),
15
+ "token": os.getenv('DATACONTRACT_DATABRICKS_TOKEN'),
16
+ }
17
+ }
18
+
19
+ soda_configuration_str = yaml.dump(soda_configuration)
20
+ return soda_configuration_str
@@ -0,0 +1,21 @@
1
+ import os
2
+
3
+ import yaml
4
+
5
+
6
+ def to_postgres_soda_configuration(server):
7
+ # with service account key, using an external json file
8
+ soda_configuration = {
9
+ f"data_source {server.type}": {
10
+ "type": "postgres",
11
+ "host": server.host,
12
+ "port": str(server.port),
13
+ "username": os.getenv('DATACONTRACT_POSTGRES_USERNAME'),
14
+ "password": os.getenv('DATACONTRACT_POSTGRES_PASSWORD'),
15
+ "database": server.database,
16
+ "schema": server.schema_,
17
+ }
18
+ }
19
+
20
+ soda_configuration_str = yaml.dump(soda_configuration)
21
+ return soda_configuration_str
@@ -23,6 +23,11 @@ class Server(BaseModel):
23
23
  account: str = None
24
24
  database: str = None
25
25
  schema_: str = pydantic.fields.Field(default=None, alias='schema')
26
+ host: str = None
27
+ port: int = None
28
+ catalog: str = None
29
+ http_path: str = None # Use ENV variable
30
+ token: str = None # Use ENV variable
26
31
  dataProductId: str = None
27
32
  outputPortId: str = None
28
33
 
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datacontract-cli
3
- Version: 0.9.3
4
- Summary: Validate data contracts
3
+ Version: 0.9.4
4
+ Summary: Test data contracts
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
7
7
  Project-URL: Issues, https://github.com/datacontract/cli/issues
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.11
11
+ Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: typer[all] ~=0.9.0
@@ -18,15 +18,19 @@ Requires-Dist: requests ~=2.31.0
18
18
  Requires-Dist: fastparquet ==2023.10.1
19
19
  Requires-Dist: soda-core-bigquery ~=3.1.5
20
20
  Requires-Dist: soda-core-duckdb ~=3.1.5
21
+ Requires-Dist: soda-core-postgres ~=3.1.5
21
22
  Requires-Dist: soda-core-snowflake ~=3.1.5
23
+ Requires-Dist: soda-core-spark[databricks] ~=3.1.5
24
+ Requires-Dist: soda-core-spark-df ~=3.1.5
22
25
  Requires-Dist: snowflake-connector-python[pandas] <3.8,>=3.6
23
- Requires-Dist: duckdb >=0.9.3.dev3920
26
+ Requires-Dist: duckdb ==0.10.0
24
27
  Requires-Dist: fastjsonschema ~=2.19.1
25
28
  Requires-Dist: python-dotenv ~=1.0.0
26
29
  Requires-Dist: s3fs ==2024.2.0
27
30
  Provides-Extra: dev
28
31
  Requires-Dist: pytest ; extra == 'dev'
29
32
  Requires-Dist: testcontainers-minio ; extra == 'dev'
33
+ Requires-Dist: testcontainers-postgres ; extra == 'dev'
30
34
 
31
35
  # Data Contract CLI
32
36
 
@@ -78,6 +82,9 @@ $ datacontract lint datacontract.yaml
78
82
  # execute schema and quality checks
79
83
  $ datacontract test datacontract.yaml
80
84
 
85
+ # execute schema and quality checks on the examples within the contract
86
+ $ datacontract test --examples datacontract.yaml
87
+
81
88
  # find differences between to data contracts (Coming Soon)
82
89
  $ datacontract diff datacontract-v1.yaml datacontract-v2.yaml
83
90
 
@@ -137,17 +144,17 @@ pipx installs into an isolated environment.
137
144
  pipx install datacontract-cli
138
145
  ```
139
146
 
140
- ### Homebrew (coming soon)
147
+ ### Docker
141
148
 
142
149
  ```bash
143
- brew install datacontract/brew/datacontract
150
+ docker pull --platform linux/amd64 datacontract/cli
151
+ docker run --rm --platform linux/amd64 -v ${PWD}:/home/datacontract datacontract/cli
144
152
  ```
145
153
 
146
- ### Docker (coming soon)
154
+ Or via an alias that automatically uses the latest version:
147
155
 
148
156
  ```bash
149
- docker pull datacontract/cli
150
- docker run --rm -v ${PWD}:/datacontract datacontract/cli
157
+ alias datacontract='docker run --rm -v "${PWD}:/home/datacontract" --platform linux/amd64 datacontract/cli:latest'
151
158
  ```
152
159
 
153
160
  ## Documentation
@@ -170,11 +177,12 @@ The application uses different engines, based on the server `type`.
170
177
  | `s3` | `json` | Support for `new_line` delimited JSON files and one JSON record per file. | ✅ | fastjsonschema<br> soda-core-duckdb |
171
178
  | `s3` | `csv` | | ✅ | soda-core-duckdb |
172
179
  | `s3` | `delta` | | Coming soon | TBD |
173
- | `postgres` | n/a | | Coming soon | TBD |
174
- | `snowflake` | n/a | | ✅ | soda-core-snowflake |
175
- | `bigquery` | n/a | | ✅ | soda-core-bigquery |
180
+ | `postgres` | n/a | | | soda-core-postgres |
181
+ | `snowflake` | n/a | | ✅ | soda-core-snowflake |
182
+ | `bigquery` | n/a | | ✅ | soda-core-bigquery |
176
183
  | `redshift` | n/a | | Coming soon | TBD |
177
- | `databricks` | n/a | | Coming soon | TBD |
184
+ | `databricks` | n/a | Support for Databricks SQL with Unity catalog and Hive metastore. | | soda-core-spark |
185
+ | `databricks` | n/a | Support for Spark for programmatic use in Notebooks. | ✅ | soda-core-spark-df |
178
186
  | `kafka` | `json` | | Coming soon | TBD |
179
187
  | `kafka` | `avro` | | Coming soon | TBD |
180
188
  | `kafka` | `protobuf` | | Coming soon | TBD |
@@ -184,9 +192,11 @@ The application uses different engines, based on the server `type`.
184
192
 
185
193
  Feel free to create an issue, if you need support for an additional type.
186
194
 
187
- ### Server Type S3
195
+ ### S3
196
+
197
+ Data Contract CLI can test data that is stored in S3 buckets or any S3-compliant endpoints in various formats.
188
198
 
189
- Example:
199
+ #### Example
190
200
 
191
201
  datacontract.yaml
192
202
  ```yaml
@@ -195,24 +205,58 @@ servers:
195
205
  type: s3
196
206
  endpointUrl: https://minio.example.com # not needed with AWS S3
197
207
  location: s3://bucket-name/path/*/*.json
198
- delimiter: new_line # new_line, array, or none
199
208
  format: json
209
+ delimiter: new_line # new_line, array, or none
200
210
  ```
201
211
 
202
- Environment variables
203
- ```bash
204
- export DATACONTRACT_S3_REGION=eu-central-1
205
- export DATACONTRACT_S3_ACCESS_KEY_ID=AKIAXV5Q5QABCDEFGH
206
- export DATACONTRACT_S3_SECRET_ACCESS_KEY=93S7LRrJcqLkdb2/XXXXXXXXXXXXX
212
+ #### Environment Variables
213
+
214
+ | Environment Variable | Example | Description |
215
+ |-----------------------------------|-------------------------------|-----------------------|
216
+ | `DATACONTRACT_S3_REGION` | `eu-central-1` | Region of S3 bucket |
217
+ | `DATACONTRACT_S3_ACCESS_KEY_ID` | `AKIAXV5Q5QABCDEFGH` | AWS Access Key ID |
218
+ | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `93S7LRrJcqLaaaa/XXXXXXXXXXXXX` | AWS Secret Access Key |
219
+
220
+
221
+ ### Postgres
222
+
223
+ Data Contract CLI can test data in Postgres or Postgres-compliant databases (e.g., RisingWave).
224
+
225
+ #### Example
226
+
227
+ datacontract.yaml
228
+ ```yaml
229
+ servers:
230
+ postgres:
231
+ type: postgres
232
+ host: localhost
233
+ port: 5432
234
+ database: postgres
235
+ schema: public
236
+ models:
237
+ my_table_1: # corresponds to a table
238
+ type: table
239
+ fields:
240
+ my_column_1: # corresponds to a column
241
+ type: varchar
207
242
  ```
208
243
 
209
- ### Server Type BigQuery
244
+ #### Environment Variables
245
+
246
+ | Environment Variable | Example | Description |
247
+ |----------------------------------|--------------------|-------------|
248
+ | `DATACONTRACT_POSTGRES_USERNAME` | `postgres` | Username |
249
+ | `DATACONTRACT_POSTGRES_PASSWORD` | `mysecretpassword` | Password |
250
+
251
+
252
+ ### BigQuery
210
253
 
211
254
  We support authentication to BigQuery using Service Account Key. The used Service Account should include the roles:
212
255
  * BigQuery Job User
213
256
  * BigQuery Data Viewer
214
257
 
215
- Example:
258
+
259
+ #### Example
216
260
 
217
261
  datacontract.yaml
218
262
  ```yaml
@@ -227,15 +271,99 @@ models:
227
271
  fields: ...
228
272
  ```
229
273
 
230
- Required environment variable:
231
- ```bash
232
- export DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH=~/service-access-key.json # as saved on key creation by BigQuery
274
+ #### Environment Variables
275
+
276
+ | Environment Variable | Example | Description |
277
+ |----------------------------------------------|---------------------------|---------------------------------------------------------|
278
+ | `DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH` | `~/service-access-key.json` | Service Access key as saved on key creation by BigQuery |
279
+
280
+
281
+ ### Databricks
282
+
283
+ Works with Unity Catalog and Hive metastore.
284
+
285
+ Needs a running SQL warehouse or compute cluster.
286
+
287
+ #### Example
288
+
289
+ datacontract.yaml
290
+ ```yaml
291
+ servers:
292
+ production:
293
+ type: databricks
294
+ host: dbc-abcdefgh-1234.cloud.databricks.com
295
+ catalog: acme_catalog_prod
296
+ schema: orders_latest
297
+ models:
298
+ orders: # corresponds to a table
299
+ type: table
300
+ fields: ...
301
+ ```
302
+
303
+ #### Environment Variables
304
+
305
+ | Environment Variable | Example | Description |
306
+ |----------------------------------------------|--------------------------------------|-------------------------------------------------------|
307
+ | `DATACONTRACT_DATABRICKS_TOKEN` | `dapia00000000000000000000000000000` | The personal access token to authenticate |
308
+ | `DATACONTRACT_DATABRICKS_HTTP_PATH` | `/sql/1.0/warehouses/b053a3ffffffff` | The HTTP path to the SQL warehouse or compute cluster |
309
+
310
+
311
+ ### Databricks (programmatic)
312
+
313
+ Works with Unity Catalog and Hive metastore.
314
+ When running in a notebook or pipeline, the provided `spark` session can be used.
315
+ An additional authentication is not required.
316
+
317
+ Requires a Databricks Runtime with Python >= 3.10.
318
+
319
+ #### Example
320
+
321
+ datacontract.yaml
322
+ ```yaml
323
+ servers:
324
+ production:
325
+ type: databricks
326
+ host: dbc-abcdefgh-1234.cloud.databricks.com # ignored, always use current host
327
+ catalog: acme_catalog_prod
328
+ schema: orders_latest
329
+ models:
330
+ orders: # corresponds to a table
331
+ type: table
332
+ fields: ...
333
+ ```
334
+
335
+ Notebook
336
+ ```python
337
+ %pip install git+https://github.com/datacontract/cli.git
338
+ dbutils.library.restartPython()
339
+
340
+ from datacontract.data_contract import DataContract
341
+
342
+ data_contract = DataContract(
343
+ data_contract_file="/Volumes/acme_catalog_prod/orders_latest/datacontract/datacontract.yaml",
344
+ spark=spark)
345
+ run = data_contract.test()
346
+ run.result
233
347
  ```
234
348
 
349
+
350
+ ### Exports
351
+
352
+ Available export options:
353
+
354
+ | Type | Description | Status |
355
+ |--------------|------------------------------------------------|--------|
356
+ | `jsonschema` | Export to JSON Schema | ✅ |
357
+ | `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
358
+ | `dbt` | Export to dbt model in YAML format | TBD |
359
+ | `avro` | Export to AVRO models | TBD |
360
+ | `pydantic` | Export to pydantic models | TBD |
361
+ | `sql` | Export to SQL DDL | TBD |
362
+ | `protobuf` | Export to Protobuf | TBD |
363
+
235
364
  ## Development Setup
236
365
 
237
- Python base interpreter should be 3.11.x (unless
238
- working on 3.12 release candidate).
366
+ Python base interpreter should be 3.11.x (unless working on 3.12 release candidate).
239
367
 
240
368
  ```bash
241
369
  # create venv
@@ -265,7 +393,7 @@ Docker Build
265
393
 
266
394
  ```
267
395
  docker build -t datacontract/cli .
268
- docker run --rm -v ${PWD}:/datacontract datacontract/cli
396
+ docker run --rm -v ${PWD}:/home/datacontract datacontract/cli
269
397
  ```
270
398
 
271
399
  ## Contribution
@@ -279,3 +407,7 @@ We are happy to receive your contributions. Propose your change in an issue or d
279
407
  ## Credits
280
408
 
281
409
  Created by [Stefan Negele](https://www.linkedin.com/in/stefan-negele-573153112/) and [Jochen Christ](https://www.linkedin.com/in/jochenchrist/).
410
+
411
+
412
+
413
+ <a href="https://github.com/datacontract/cli" class="github-corner" aria-label="View source on GitHub"><svg width="80" height="80" viewBox="0 0 250 250" style="fill:#151513; color:#fff; position: absolute; top: 0; border: 0; right: 0;" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>
@@ -1,17 +1,19 @@
1
1
  datacontract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- datacontract/cli.py,sha256=CJFoNxU8xmvMekaYJaDvniuPn0Jx2qdi8ZCSGtWebi8,4137
3
- datacontract/data_contract.py,sha256=6iZ_Kxz9r0q2Yx5OhEwtMSSrKPl3tzsuNIuEU_B7Q_Y,5446
2
+ datacontract/cli.py,sha256=CaU0B68__T6t_JzcfE4cQ54CCwKkvHnkatl_zVFIFQg,4349
3
+ datacontract/data_contract.py,sha256=BOJ8UuT75o-nQwuE-oHxHosSn6JC1F74OHQcmjiaoCs,7371
4
4
  datacontract/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py,sha256=Tj_REcEYl2BtIR_W9k0pjdjE4CvBE-4vpFrGAvvrde4,1557
6
6
  datacontract/engines/datacontract/check_that_datacontract_file_exists.py,sha256=V_YJyt1rKkkKhghU359vaAGtC8leIGmwqR4MlrLgCJ4,620
7
7
  datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py,sha256=bRoJp8a-Hvdc2OkbcTcS0tr8M7XxNzWbJAUFrc-ceiA,1393
8
- datacontract/engines/fastjsonschema/check_jsonschema.py,sha256=eVISybDFXlrvU33UE1TJF9M_5lu4ba9tPsm6_KqV9R8,5279
9
- datacontract/engines/fastjsonschema/s3/s3_read_files.py,sha256=5cjJf2zONs18FYUesV1io9fqg5kcDYvZ8RwhyGjNJA8,709
8
+ datacontract/engines/fastjsonschema/check_jsonschema.py,sha256=QuvFkeA-cE2nsHL33wQLb7QuhBQg4AQDQ3pav-iw9uE,5596
9
+ datacontract/engines/fastjsonschema/s3/s3_read_files.py,sha256=sCe028D8q04c2pYlzJuEXWmMZOQJLiaObyLXLe4UzUs,713
10
10
  datacontract/engines/soda/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- datacontract/engines/soda/check_soda_execute.py,sha256=J0DiCc2WlJy6y43F1FVSwjeUPx3Tlf_L0aEIh3XZLnc,4614
11
+ datacontract/engines/soda/check_soda_execute.py,sha256=WPvan3olUY7tao_75Uun7khwYLBTSRX9wtE1vCRmWJA,5572
12
12
  datacontract/engines/soda/connections/bigquery.py,sha256=_hNd7Lmo6DjLb3nqVx_pfePwSYp3_3T_hwivVlATEyI,658
13
13
  datacontract/engines/soda/connections/dask.py,sha256=iQfu4swHN_QfY9l0TdSbqAQXJvfKMIxGoZ4xiNpi4eY,1497
14
+ datacontract/engines/soda/connections/databricks.py,sha256=tCVE2Q2BXjuxS5ZmDyH_qN6jigStBsfIikRYMQ5LKVs,561
14
15
  datacontract/engines/soda/connections/duckdb.py,sha256=_Tpfo5D1ahOUPHbnEZ1WloeCecQ2LYDUebIU3hnnBDg,2342
16
+ datacontract/engines/soda/connections/postgres.py,sha256=ow21gzxiV2_FyOXrFYeSRefLKwRQR5_qxtOR2T1rdTI,625
15
17
  datacontract/engines/soda/connections/snowflake.py,sha256=H941nOQULZKznmarVvZcvJhseMOUwfnMsv1r_P0MMb0,719
16
18
  datacontract/export/jsonschema_converter.py,sha256=gceZ-_euhedZzPfpVG8xYI16-ro9wLwAnqfkwDNWDTE,2977
17
19
  datacontract/export/sodacl_converter.py,sha256=7P6be3GAsUaLchqP6GNibKvpXmpo24D6z1NvOyJCjcI,2836
@@ -24,12 +26,12 @@ datacontract/lint/schema.py,sha256=9UipDhpY6jQEtC6vKZ44-NcVMbpPXBvs9HZYGQ0gsAM,1
24
26
  datacontract/lint/urls.py,sha256=LXg_yzAmG71fJPc_0QeWJ0cKEqkhtZhlZZf1hWMTFNE,1408
25
27
  datacontract/lint/linters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
28
  datacontract/lint/linters/example_model_linter.py,sha256=XGXDbNc_LLzwiWstXloJ8dpabxd3xV9IMq5XO3VjKw8,2898
27
- datacontract/model/data_contract_specification.py,sha256=PMtXLSWsQKDVldf0HEhe-7RexQdLqbB0ARzVI7-oxS0,2011
29
+ datacontract/model/data_contract_specification.py,sha256=pTLQH6YoiVrL7L1CrgIh0d0cBPRFV6SUPxGy1Va1TS8,2167
28
30
  datacontract/model/exceptions.py,sha256=zhhXnKWTzEyG54N9QDVpE5F986cKuHEXN0OcR5Zy8oc,1090
29
31
  datacontract/model/run.py,sha256=AejMAlTex2oh-zQQw6fifWntPnBSLLOB-7VaexG6Ef0,2484
30
- datacontract_cli-0.9.3.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
31
- datacontract_cli-0.9.3.dist-info/METADATA,sha256=dzHbL3yJPwfrlXjt9PLQ4ajSrH-sZFOPdcTtHtO9G2o,10864
32
- datacontract_cli-0.9.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
33
- datacontract_cli-0.9.3.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
34
- datacontract_cli-0.9.3.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
35
- datacontract_cli-0.9.3.dist-info/RECORD,,
32
+ datacontract_cli-0.9.4.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
33
+ datacontract_cli-0.9.4.dist-info/METADATA,sha256=Ks35GB42Js-cwZx-r2x5QfPZLM7etMTKprkc0Ui_Sso,17052
34
+ datacontract_cli-0.9.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
35
+ datacontract_cli-0.9.4.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
36
+ datacontract_cli-0.9.4.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
37
+ datacontract_cli-0.9.4.dist-info/RECORD,,