datacontract-cli 0.10.14__py3-none-any.whl → 0.10.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (69) hide show
  1. datacontract/breaking/breaking.py +229 -11
  2. datacontract/breaking/breaking_rules.py +24 -0
  3. datacontract/catalog/catalog.py +1 -1
  4. datacontract/cli.py +100 -33
  5. datacontract/data_contract.py +26 -4
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
  8. datacontract/engines/soda/check_soda_execute.py +7 -5
  9. datacontract/engines/soda/connections/duckdb.py +1 -0
  10. datacontract/engines/soda/connections/kafka.py +12 -12
  11. datacontract/export/avro_idl_converter.py +1 -2
  12. datacontract/export/bigquery_converter.py +4 -3
  13. datacontract/export/data_caterer_converter.py +1 -1
  14. datacontract/export/dbml_converter.py +2 -4
  15. datacontract/export/dbt_converter.py +45 -39
  16. datacontract/export/exporter.py +2 -1
  17. datacontract/export/exporter_factory.py +7 -2
  18. datacontract/export/go_converter.py +3 -2
  19. datacontract/export/great_expectations_converter.py +202 -40
  20. datacontract/export/html_export.py +1 -1
  21. datacontract/export/iceberg_converter.py +188 -0
  22. datacontract/export/jsonschema_converter.py +3 -2
  23. datacontract/export/odcs_v2_exporter.py +1 -1
  24. datacontract/export/odcs_v3_exporter.py +44 -30
  25. datacontract/export/pandas_type_converter.py +40 -0
  26. datacontract/export/protobuf_converter.py +1 -1
  27. datacontract/export/rdf_converter.py +4 -5
  28. datacontract/export/sodacl_converter.py +9 -4
  29. datacontract/export/spark_converter.py +7 -6
  30. datacontract/export/sql_converter.py +1 -2
  31. datacontract/export/sqlalchemy_converter.py +1 -2
  32. datacontract/export/terraform_converter.py +1 -1
  33. datacontract/imports/avro_importer.py +1 -1
  34. datacontract/imports/bigquery_importer.py +1 -1
  35. datacontract/imports/dbml_importer.py +2 -2
  36. datacontract/imports/dbt_importer.py +80 -15
  37. datacontract/imports/glue_importer.py +5 -3
  38. datacontract/imports/iceberg_importer.py +17 -7
  39. datacontract/imports/importer.py +1 -0
  40. datacontract/imports/importer_factory.py +7 -1
  41. datacontract/imports/jsonschema_importer.py +3 -2
  42. datacontract/imports/odcs_v2_importer.py +2 -2
  43. datacontract/imports/odcs_v3_importer.py +7 -2
  44. datacontract/imports/parquet_importer.py +81 -0
  45. datacontract/imports/spark_importer.py +2 -1
  46. datacontract/imports/sql_importer.py +1 -1
  47. datacontract/imports/unity_importer.py +3 -3
  48. datacontract/integration/opentelemetry.py +0 -1
  49. datacontract/lint/lint.py +2 -1
  50. datacontract/lint/linters/description_linter.py +1 -0
  51. datacontract/lint/linters/example_model_linter.py +1 -0
  52. datacontract/lint/linters/field_pattern_linter.py +1 -0
  53. datacontract/lint/linters/field_reference_linter.py +1 -0
  54. datacontract/lint/linters/notice_period_linter.py +1 -0
  55. datacontract/lint/linters/quality_schema_linter.py +1 -0
  56. datacontract/lint/linters/valid_constraints_linter.py +1 -0
  57. datacontract/lint/resolve.py +7 -3
  58. datacontract/lint/schema.py +1 -1
  59. datacontract/model/data_contract_specification.py +13 -6
  60. datacontract/model/run.py +21 -12
  61. datacontract/templates/index.html +6 -6
  62. datacontract/web.py +2 -3
  63. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/METADATA +163 -60
  64. datacontract_cli-0.10.16.dist-info/RECORD +106 -0
  65. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/WHEEL +1 -1
  66. datacontract_cli-0.10.14.dist-info/RECORD +0 -103
  67. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/LICENSE +0 -0
  68. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/entry_points.txt +0 -0
  69. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@ import json
5
5
  import yaml
6
6
 
7
7
  from datacontract.model.data_contract_specification import DataContractSpecification, Example
8
+
8
9
  from ..lint import Linter, LinterResult
9
10
 
10
11
 
@@ -1,6 +1,7 @@
1
1
  import re
2
2
 
3
3
  from datacontract.model.data_contract_specification import DataContractSpecification
4
+
4
5
  from ..lint import Linter, LinterResult
5
6
 
6
7
 
@@ -1,4 +1,5 @@
1
1
  from datacontract.model.data_contract_specification import DataContractSpecification
2
+
2
3
  from ..lint import Linter, LinterResult
3
4
 
4
5
 
@@ -1,6 +1,7 @@
1
1
  import re
2
2
 
3
3
  from datacontract.model.data_contract_specification import DataContractSpecification
4
+
4
5
  from ..lint import Linter, LinterResult
5
6
 
6
7
 
@@ -1,6 +1,7 @@
1
1
  import yaml
2
2
 
3
3
  from datacontract.model.data_contract_specification import DataContractSpecification, Model
4
+
4
5
  from ..lint import Linter, LinterResult
5
6
 
6
7
 
@@ -1,4 +1,5 @@
1
1
  from datacontract.model.data_contract_specification import DataContractSpecification, Field
2
+
2
3
  from ..lint import Linter, LinterResult
3
4
 
4
5
 
@@ -9,7 +9,11 @@ from datacontract.imports.odcs_v3_importer import import_odcs_v3_from_str
9
9
  from datacontract.lint.resources import read_resource
10
10
  from datacontract.lint.schema import fetch_schema
11
11
  from datacontract.lint.urls import fetch_resource
12
- from datacontract.model.data_contract_specification import DataContractSpecification, Definition, Quality
12
+ from datacontract.model.data_contract_specification import (
13
+ DataContractSpecification,
14
+ Definition,
15
+ DeprecatedQuality,
16
+ )
13
17
  from datacontract.model.exceptions import DataContractException
14
18
  from datacontract.model.odcs import is_open_data_contract_standard
15
19
 
@@ -156,7 +160,7 @@ def _fetch_file(path) -> str:
156
160
  return file.read()
157
161
 
158
162
 
159
- def _resolve_quality_ref(quality: Quality):
163
+ def _resolve_quality_ref(quality: DeprecatedQuality):
160
164
  """
161
165
  Return the content of a ref file path
162
166
  @param quality data contract quality specification
@@ -199,7 +203,7 @@ def _resolve_data_contract_from_str(
199
203
 
200
204
  if is_open_data_contract_standard(yaml_dict):
201
205
  # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
202
- data_contract_specification = DataContractSpecification(dataContractSpecification="0.9.3")
206
+ data_contract_specification = DataContractSpecification(dataContractSpecification="1.1.0")
203
207
  return import_odcs_v3_from_str(data_contract_specification, source_str=data_contract_str)
204
208
 
205
209
  _validate_data_contract_specification_schema(yaml_dict, schema_location)
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import os
3
- from typing import Dict, Any
3
+ from typing import Any, Dict
4
4
 
5
5
  import requests
6
6
 
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import List, Dict, Optional, Any
2
+ from typing import Any, Dict, List, Optional
3
3
 
4
4
  import pydantic as pyd
5
5
  import yaml
@@ -147,6 +147,7 @@ class Field(pyd.BaseModel):
147
147
  format: str = None
148
148
  required: bool = None
149
149
  primary: bool = None
150
+ primaryKey: bool = None
150
151
  unique: bool | None = None
151
152
  references: str = None
152
153
  description: str | None = None
@@ -213,7 +214,8 @@ class Example(pyd.BaseModel):
213
214
  data: str | object = None
214
215
 
215
216
 
216
- class Quality(pyd.BaseModel):
217
+ # Deprecated Quality class
218
+ class DeprecatedQuality(pyd.BaseModel):
217
219
  type: str = None
218
220
  specification: str | object = None
219
221
 
@@ -282,9 +284,14 @@ class DataContractSpecification(pyd.BaseModel):
282
284
  terms: Terms = None
283
285
  models: Dict[str, Model] = {}
284
286
  definitions: Dict[str, Definition] = {}
285
- # schema: Dict[str, str]
286
- examples: List[Example] = []
287
- quality: Quality = None
287
+ examples: List[Example] = pyd.Field(
288
+ default_factory=list,
289
+ deprecated="Removed in Data Contract Specification " "v1.1.0. Use models.examples instead.",
290
+ )
291
+ quality: DeprecatedQuality = pyd.Field(
292
+ default=None,
293
+ deprecated="Removed in Data Contract Specification v1.1.0. Use " "model-level and field-level quality instead.",
294
+ )
288
295
  servicelevels: Optional[ServiceLevel] = None
289
296
  links: Dict[str, str] = {}
290
297
  tags: List[str] = []
@@ -292,7 +299,7 @@ class DataContractSpecification(pyd.BaseModel):
292
299
  @classmethod
293
300
  def from_file(cls, file):
294
301
  if not os.path.exists(file):
295
- raise (f"The file '{file}' does not exist.")
302
+ raise FileNotFoundError(f"The file '{file}' does not exist.")
296
303
  with open(file, "r") as file:
297
304
  file_content = file.read()
298
305
  return DataContractSpecification.from_string(file_content)
datacontract/model/run.py CHANGED
@@ -1,15 +1,24 @@
1
1
  import logging
2
2
  from datetime import datetime, timezone
3
+ from enum import Enum
3
4
  from typing import List, Optional
4
5
  from uuid import UUID, uuid4
5
6
 
6
7
  from pydantic import BaseModel
7
8
 
8
9
 
10
+ class ResultEnum(str, Enum):
11
+ passed = "passed"
12
+ warning = "warning"
13
+ failed = "failed"
14
+ error = "error"
15
+ unknown = "unknown"
16
+
17
+
9
18
  class Check(BaseModel):
10
19
  type: str
11
20
  name: Optional[str]
12
- result: str # passed, failed, warning, unknown
21
+ result: ResultEnum
13
22
  engine: str
14
23
  reason: Optional[str] = None
15
24
  model: Optional[str] = None
@@ -33,29 +42,29 @@ class Run(BaseModel):
33
42
  server: Optional[str] = None
34
43
  timestampStart: datetime
35
44
  timestampEnd: datetime
36
- result: str = "unknown" # passed, warning, failed, error, unknown
45
+ result: ResultEnum = ResultEnum.unknown
37
46
  checks: List[Check]
38
47
  logs: List[Log]
39
48
 
40
49
  def has_passed(self):
41
50
  self.calculate_result()
42
- return self.result == "passed"
51
+ return self.result == ResultEnum.passed
43
52
 
44
53
  def finish(self):
45
54
  self.timestampEnd = datetime.now(timezone.utc)
46
55
  self.calculate_result()
47
56
 
48
57
  def calculate_result(self):
49
- if any(check.result == "error" for check in self.checks):
50
- self.result = "error"
51
- elif any(check.result == "failed" for check in self.checks):
52
- self.result = "failed"
53
- elif any(check.result == "warning" for check in self.checks):
54
- self.result = "warning"
55
- elif any(check.result == "passed" for check in self.checks):
56
- self.result = "passed"
58
+ if any(check.result == ResultEnum.error for check in self.checks):
59
+ self.result = ResultEnum.error
60
+ elif any(check.result == ResultEnum.failed for check in self.checks):
61
+ self.result = ResultEnum.failed
62
+ elif any(check.result == ResultEnum.warning for check in self.checks):
63
+ self.result = ResultEnum.warning
64
+ elif any(check.result == ResultEnum.passed for check in self.checks):
65
+ self.result = ResultEnum.passed
57
66
  else:
58
- self.result = "unknown"
67
+ self.result = ResultEnum.unknown
59
68
 
60
69
  def log_info(self, message: str):
61
70
  logging.info(message)
@@ -78,17 +78,17 @@
78
78
 
79
79
  <li class="col-span-1 rounded-lg bg-white shadow hover:bg-gray-50"
80
80
  data-search="{{
81
- contract.spec.info.title|lower }} {{
82
- contract.spec.info.owner|lower if contract.spec.info.owner else '' }} {{
83
- contract.spec.info.description|lower }} {%
81
+ contract.spec.info.title|lower|e }} {{
82
+ contract.spec.info.owner|lower|e if contract.spec.info.owner else '' }} {{
83
+ contract.spec.info.description|lower|e }} {%
84
84
  for model_name, model in contract.spec.models.items() %}
85
- {{ model.description|lower }} {%
85
+ {{ model.description|lower|e }} {%
86
86
  for field_name, field in model.fields.items() %}
87
- {{ field_name|lower }} {{ field.description|lower if field.description else '' }} {%
87
+ {{ field_name|lower|e }} {{ field.description|lower|e if field.description else '' }} {%
88
88
  endfor %}
89
89
  {% endfor %}
90
90
  ">
91
- <a href="{{contract.html_link}}" >
91
+ <a href="{{contract.html_link|e}}" >
92
92
  <div class="flex w-full justify-between space-x-1 p-6 pb-4">
93
93
  <div class="flex-1 truncate">
94
94
  <div class="flex items-center space-x-3">
datacontract/web.py CHANGED
@@ -1,11 +1,10 @@
1
- from typing import Annotated, Union, Optional
1
+ from typing import Annotated, Optional, Union
2
2
 
3
3
  import typer
4
4
  from fastapi import FastAPI, File
5
- from fastapi.responses import HTMLResponse
5
+ from fastapi.responses import HTMLResponse, PlainTextResponse
6
6
 
7
7
  from datacontract.data_contract import DataContract, ExportFormat
8
- from fastapi.responses import PlainTextResponse
9
8
 
10
9
  app = FastAPI()
11
10
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datacontract-cli
3
- Version: 0.10.14
3
+ Version: 0.10.16
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
@@ -11,71 +11,72 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: typer<0.13,>=0.12
15
- Requires-Dist: pydantic<2.10.0,>=2.8.2
14
+ Requires-Dist: typer<0.15,>=0.12
15
+ Requires-Dist: pydantic<2.11.0,>=2.8.2
16
16
  Requires-Dist: pyyaml~=6.0.1
17
17
  Requires-Dist: requests<2.33,>=2.31
18
- Requires-Dist: fastapi==0.115.2
19
- Requires-Dist: uvicorn==0.30.6
20
- Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
21
- Requires-Dist: fastparquet==2024.5.0
22
- Requires-Dist: python-multipart==0.0.12
18
+ Requires-Dist: fastapi==0.115.6
19
+ Requires-Dist: uvicorn==0.32.1
20
+ Requires-Dist: fastjsonschema<2.22.0,>=2.19.1
21
+ Requires-Dist: fastparquet==2024.11.0
22
+ Requires-Dist: python-multipart==0.0.19
23
23
  Requires-Dist: rich<13.10,>=13.7
24
24
  Requires-Dist: simple-ddl-parser==1.7.1
25
25
  Requires-Dist: duckdb==1.1.2
26
- Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.1
26
+ Requires-Dist: soda-core-duckdb<3.5.0,>=3.3.1
27
27
  Requires-Dist: setuptools>=60
28
28
  Requires-Dist: python-dotenv~=1.0.0
29
29
  Requires-Dist: rdflib==7.0.0
30
30
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
31
31
  Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
32
- Requires-Dist: boto3<1.35.45,>=1.34.41
33
- Requires-Dist: jinja-partials>=0.2.1
34
- Provides-Extra: all
35
- Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,iceberg,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
32
+ Requires-Dist: boto3<1.35.69,>=1.34.41
33
+ Requires-Dist: jinja_partials>=0.2.1
36
34
  Provides-Extra: avro
37
35
  Requires-Dist: avro==1.12.0; extra == "avro"
38
36
  Provides-Extra: bigquery
39
- Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.1; extra == "bigquery"
37
+ Requires-Dist: soda-core-bigquery<3.5.0,>=3.3.1; extra == "bigquery"
40
38
  Provides-Extra: databricks
41
- Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "databricks"
39
+ Requires-Dist: soda-core-spark-df<3.5.0,>=3.3.1; extra == "databricks"
42
40
  Requires-Dist: databricks-sql-connector<3.6.0,>=3.1.2; extra == "databricks"
43
- Requires-Dist: databricks-sdk<0.36.0,>=0.32.0; extra == "databricks"
44
- Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.1; extra == "databricks"
45
- Provides-Extra: dbml
46
- Requires-Dist: pydbml>=1.1.1; extra == "dbml"
41
+ Requires-Dist: databricks-sdk<0.39.0,>=0.32.0; extra == "databricks"
42
+ Requires-Dist: soda-core-spark[databricks]<3.5.0,>=3.3.1; extra == "databricks"
43
+ Provides-Extra: iceberg
44
+ Requires-Dist: pyiceberg==0.8.1; extra == "iceberg"
45
+ Provides-Extra: kafka
46
+ Requires-Dist: datacontract-cli[avro]; extra == "kafka"
47
+ Requires-Dist: soda-core-spark-df<3.5.0,>=3.3.1; extra == "kafka"
48
+ Provides-Extra: postgres
49
+ Requires-Dist: soda-core-postgres<3.5.0,>=3.3.1; extra == "postgres"
50
+ Provides-Extra: s3
51
+ Requires-Dist: s3fs==2024.10.0; extra == "s3"
52
+ Provides-Extra: snowflake
53
+ Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
54
+ Requires-Dist: soda-core-snowflake<3.5.0,>=3.3.1; extra == "snowflake"
55
+ Provides-Extra: sqlserver
56
+ Requires-Dist: soda-core-sqlserver<3.5.0,>=3.3.1; extra == "sqlserver"
57
+ Provides-Extra: trino
58
+ Requires-Dist: soda-core-trino<3.5.0,>=3.3.1; extra == "trino"
47
59
  Provides-Extra: dbt
48
60
  Requires-Dist: dbt-core>=1.8.0; extra == "dbt"
61
+ Provides-Extra: dbml
62
+ Requires-Dist: pydbml>=1.1.1; extra == "dbml"
63
+ Provides-Extra: parquet
64
+ Requires-Dist: pyarrow>=12.0.0; extra == "parquet"
65
+ Provides-Extra: all
66
+ Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,iceberg,kafka,parquet,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
49
67
  Provides-Extra: dev
50
68
  Requires-Dist: datacontract-cli[all]; extra == "dev"
51
- Requires-Dist: httpx==0.27.2; extra == "dev"
69
+ Requires-Dist: httpx==0.28.1; extra == "dev"
52
70
  Requires-Dist: kafka-python; extra == "dev"
53
- Requires-Dist: moto==5.0.18; extra == "dev"
71
+ Requires-Dist: moto==5.0.22; extra == "dev"
54
72
  Requires-Dist: pandas>=2.1.0; extra == "dev"
55
- Requires-Dist: pre-commit<3.9.0,>=3.7.1; extra == "dev"
56
- Requires-Dist: pyarrow>=12.0.0; extra == "dev"
73
+ Requires-Dist: pre-commit<4.1.0,>=3.7.1; extra == "dev"
57
74
  Requires-Dist: pytest; extra == "dev"
58
75
  Requires-Dist: pytest-xdist; extra == "dev"
59
- Requires-Dist: pymssql==2.3.1; extra == "dev"
76
+ Requires-Dist: pymssql==2.3.2; extra == "dev"
60
77
  Requires-Dist: ruff; extra == "dev"
61
- Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.8.2; extra == "dev"
78
+ Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.9.0; extra == "dev"
62
79
  Requires-Dist: trino==0.330.0; extra == "dev"
63
- Provides-Extra: iceberg
64
- Requires-Dist: pyiceberg==0.7.1; extra == "iceberg"
65
- Provides-Extra: kafka
66
- Requires-Dist: datacontract-cli[avro]; extra == "kafka"
67
- Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
68
- Provides-Extra: postgres
69
- Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
70
- Provides-Extra: s3
71
- Requires-Dist: s3fs==2024.9.0; extra == "s3"
72
- Provides-Extra: snowflake
73
- Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
74
- Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
75
- Provides-Extra: sqlserver
76
- Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.1; extra == "sqlserver"
77
- Provides-Extra: trino
78
- Requires-Dist: soda-core-trino<3.4.0,>=3.3.1; extra == "trino"
79
80
 
80
81
  # Data Contract CLI
81
82
 
@@ -273,6 +274,7 @@ A list of available extras:
273
274
  | Avro Support | `pip install datacontract-cli[avro]` |
274
275
  | Google BigQuery | `pip install datacontract-cli[bigquery]` |
275
276
  | Databricks Integration | `pip install datacontract-cli[databricks]` |
277
+ | Iceberg | `pip install datacontract-cli[iceberg]` |
276
278
  | Kafka Integration | `pip install datacontract-cli[kafka]` |
277
279
  | PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
278
280
  | S3 Integration | `pip install datacontract-cli[s3]` |
@@ -280,6 +282,8 @@ A list of available extras:
280
282
  | Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
281
283
  | Trino | `pip install datacontract-cli[trino]` |
282
284
  | Dbt | `pip install datacontract-cli[dbt]` |
285
+ | Dbml | `pip install datacontract-cli[dbml]` |
286
+ | Parquet | `pip install datacontract-cli[parquet]` |
283
287
 
284
288
 
285
289
 
@@ -825,19 +829,19 @@ models:
825
829
  ### export
826
830
 
827
831
  ```
828
-
829
- Usage: datacontract export [OPTIONS] [LOCATION]
830
-
831
- Convert data contract to a specific format. Prints to stdout or to the specified output file.
832
-
832
+ Usage: datacontract export [OPTIONS] [LOCATION]
833
+
834
+ Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout.
835
+
833
836
  ╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
834
837
  │ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
835
838
  ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
836
839
  ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
837
840
  │ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
838
- │ t-staging-sql|odcs|rdf|avro|protobuf|great-expectati
839
- ons|terraform|avro-idl|sql|sql-query|html|go|bigquer
840
- y|dbml|spark|sqlalchemy|data-caterer|dcs]
841
+ │ t-staging-sql|odcs|odcs_v2|odcs_v3|rdf|avro|protobuf │
842
+ │ |great-expectations|terraform|avro-idl|sql|sql-query │
843
+ │ |html|go|bigquery|dbml|spark|sqlalchemy|data-caterer
844
+ │ |dcs|iceberg] │
841
845
  │ --output PATH Specify the file path where the exported data will be │
842
846
  │ saved. If no path is provided, the output will be │
843
847
  │ printed to stdout. │
@@ -847,6 +851,12 @@ models:
847
851
  │ file to refer to a model, e.g., `orders`, or `all` │
848
852
  │ for all models (default). │
849
853
  │ [default: all] │
854
+ │ --schema TEXT The location (url or path) of the Data Contract │
855
+ │ Specification JSON Schema │
856
+ │ [default: │
857
+ │ https://datacontract.com/datacontract.schema.json] │
858
+ │ --engine TEXT [engine] The engine used for great expection run. │
859
+ │ [default: None] │
850
860
  │ --help Show this message and exit. │
851
861
  ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
852
862
  ╭─ RDF Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
@@ -857,12 +867,11 @@ models:
857
867
  │ detect the sql dialect via the specified servers in the data contract. │
858
868
  │ [default: auto] │
859
869
  ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
860
-
861
870
  ```
862
871
 
863
872
  ```bash
864
873
  # Example export data contract as HTML
865
- datacontract export --format html > datacontract.html
874
+ datacontract export --format html --output datacontract.html
866
875
  ```
867
876
 
868
877
  Available export options:
@@ -893,11 +902,13 @@ Available export options:
893
902
  | `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
894
903
  | `data-caterer` | Export to Data Caterer in YAML format | ✅ |
895
904
  | `dcs` | Export to Data Contract Specification in YAML format | ✅ |
905
+ | `iceberg` | Export to an Iceberg JSON Schema Definition | partial |
896
906
  | Missing something? | Please create an issue on GitHub | TBD |
897
907
 
908
+
898
909
  #### Great Expectations
899
910
 
900
- The export function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
911
+ The `export` function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
901
912
  If the contract includes multiple models, you need to specify the names of the model you wish to export.
902
913
 
903
914
  ```shell
@@ -907,7 +918,22 @@ datacontract export datacontract.yaml --format great-expectations --model order
907
918
  The export creates a list of expectations by utilizing:
908
919
 
909
920
  - The data from the Model definition with a fixed mapping
910
- - The expectations provided in the quality field for each model (find here the expectations gallery https://greatexpectations.io/expectations/)
921
+ - The expectations provided in the quality field for each model (find here the expectations gallery: [Great Expectations Gallery](https://greatexpectations.io/expectations/))
922
+
923
+ ### Additional Arguments
924
+
925
+ To further customize the export, the following optional arguments are available:
926
+
927
+ - **`suite_name`**: The name of the expectation suite. This suite groups all generated expectations and provides a convenient identifier within Great Expectations. If not provided, a default suite name will be generated based on the model name(s).
928
+
929
+ - **`engine`**: Specifies the engine used to run Great Expectations checks. Accepted values are:
930
+ - `pandas` — Use this when working with in-memory data frames through the Pandas library.
931
+ - `spark` — Use this for working with Spark dataframes.
932
+ - `sql` — Use this for working with SQL databases.
933
+
934
+ - **`sql_server_type`**: Specifies the type of SQL server to connect with when `engine` is set to `sql`.
935
+
936
+ Providing `sql_server_type` ensures that the appropriate SQL dialect and connection settings are applied during the expectation validation.
911
937
 
912
938
  #### RDF
913
939
 
@@ -1000,18 +1026,81 @@ models:
1000
1026
  - **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
1001
1027
  - **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
1002
1028
 
1029
+ #### Iceberg
1030
+
1031
+ Exports to an [Iceberg Table Json Schema Definition](https://iceberg.apache.org/spec/#appendix-c-json-serialization).
1003
1032
 
1004
- ### import
1033
+ This export only supports a single model export at a time because Iceberg's schema definition is for a single table and the exporter maps 1 model to 1 table, use the `--model` flag
1034
+ to limit your contract export to a single model.
1005
1035
 
1036
+ ```bash
1037
+ $ datacontract export --format iceberg --model orders https://datacontract.com/examples/orders-latest/datacontract.yaml --output /tmp/orders_iceberg.json
1038
+
1039
+ $ cat /tmp/orders_iceberg.json | jq '.'
1040
+ {
1041
+ "type": "struct",
1042
+ "fields": [
1043
+ {
1044
+ "id": 1,
1045
+ "name": "order_id",
1046
+ "type": "string",
1047
+ "required": true
1048
+ },
1049
+ {
1050
+ "id": 2,
1051
+ "name": "order_timestamp",
1052
+ "type": "timestamptz",
1053
+ "required": true
1054
+ },
1055
+ {
1056
+ "id": 3,
1057
+ "name": "order_total",
1058
+ "type": "long",
1059
+ "required": true
1060
+ },
1061
+ {
1062
+ "id": 4,
1063
+ "name": "customer_id",
1064
+ "type": "string",
1065
+ "required": false
1066
+ },
1067
+ {
1068
+ "id": 5,
1069
+ "name": "customer_email_address",
1070
+ "type": "string",
1071
+ "required": true
1072
+ },
1073
+ {
1074
+ "id": 6,
1075
+ "name": "processed_timestamp",
1076
+ "type": "timestamptz",
1077
+ "required": true
1078
+ }
1079
+ ],
1080
+ "schema-id": 0,
1081
+ "identifier-field-ids": [
1082
+ 1
1083
+ ]
1084
+ }
1006
1085
  ```
1007
- Usage: datacontract import [OPTIONS]
1008
1086
 
1009
- Create a data contract from the given source location. Prints to stdout.
1087
+ ### import
1088
+
1089
+ ```
1090
+ Usage: datacontract import [OPTIONS]
1091
+
1092
+ Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise
1093
+ prints to stdout.
1010
1094
 
1011
1095
  ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
1012
- │ * --format [sql|avro|dbt|glue|jsonschema|bigquery|odcs The format of the source file. │
1013
- │ |unity|spark|iceberg] [default: None] │
1096
+ │ * --format [sql|avro|dbt|dbml|glue|jsonschema|bigquery The format of the source file. │
1097
+ │ |odcs|unity|spark|iceberg|parquet] [default: None] │
1014
1098
  │ [required] │
1099
+ │ --output PATH Specify the file path where the Data │
1100
+ │ Contract will be saved. If no path is │
1101
+ │ provided, the output will be printed to │
1102
+ │ stdout. │
1103
+ │ [default: None] │
1015
1104
  │ --source TEXT The path to the file or Glue Database that │
1016
1105
  │ should be imported. │
1017
1106
  │ [default: None] │
@@ -1041,7 +1130,8 @@ models:
1041
1130
  │ empty for all tables in the file). │
1042
1131
  │ [default: None] │
1043
1132
  │ --iceberg-table TEXT Table name to assign to the model created │
1044
- │ from the Iceberg schema. [default: None]
1133
+ │ from the Iceberg schema.
1134
+ │ [default: None] │
1045
1135
  │ --help Show this message and exit. │
1046
1136
  ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1047
1137
  ```
@@ -1050,6 +1140,8 @@ Example:
1050
1140
  ```bash
1051
1141
  # Example import from SQL DDL
1052
1142
  datacontract import --format sql --source my_ddl.sql
1143
+ # To save to file
1144
+ datacontract import --format sql --source my_ddl.sql --output datacontract.yaml
1053
1145
  ```
1054
1146
 
1055
1147
  Available import options:
@@ -1068,6 +1160,7 @@ Available import options:
1068
1160
  | `dbml` | Import from DBML models | ✅ |
1069
1161
  | `protobuf` | Import from Protobuf schemas | TBD |
1070
1162
  | `iceberg` | Import from an Iceberg JSON Schema Definition | partial |
1163
+ | `parquet` | Import from Parquet File Metadta | ✅ |
1071
1164
  | Missing something? | Please create an issue on GitHub | TBD |
1072
1165
 
1073
1166
 
@@ -1270,6 +1363,16 @@ datacontract import --format iceberg --source ./tests/fixtures/iceberg/simple_sc
1270
1363
  ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
1271
1364
  ```
1272
1365
 
1366
+ Examples:
1367
+
1368
+ ```
1369
+ # create a catalog right in the current folder
1370
+ datacontract catalog --output "."
1371
+
1372
+ # Create a catalog based on a filename convention
1373
+ datacontract catalog --files "*.odcs.yaml"
1374
+ ```
1375
+
1273
1376
  ### Publish
1274
1377
 
1275
1378
  ```
@@ -1587,7 +1690,7 @@ if __name__ == "__main__":
1587
1690
  Output
1588
1691
 
1589
1692
  ```yaml
1590
- dataContractSpecification: 0.9.3
1693
+ dataContractSpecification: 1.1.0
1591
1694
  id: uuid-custom
1592
1695
  info:
1593
1696
  title: my_custom_imported_data