datacontract-cli 0.9.6.post2__py3-none-any.whl → 0.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. datacontract/breaking/breaking.py +139 -63
  2. datacontract/breaking/breaking_rules.py +71 -54
  3. datacontract/cli.py +138 -45
  4. datacontract/data_contract.py +316 -78
  5. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
  7. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
  10. datacontract/engines/soda/check_soda_execute.py +46 -35
  11. datacontract/engines/soda/connections/bigquery.py +5 -3
  12. datacontract/engines/soda/connections/dask.py +0 -1
  13. datacontract/engines/soda/connections/databricks.py +2 -2
  14. datacontract/engines/soda/connections/duckdb.py +4 -4
  15. datacontract/engines/soda/connections/kafka.py +36 -17
  16. datacontract/engines/soda/connections/postgres.py +3 -3
  17. datacontract/engines/soda/connections/snowflake.py +4 -4
  18. datacontract/export/avro_converter.py +3 -7
  19. datacontract/export/avro_idl_converter.py +280 -0
  20. datacontract/export/dbt_converter.py +55 -80
  21. datacontract/export/great_expectations_converter.py +141 -0
  22. datacontract/export/jsonschema_converter.py +3 -1
  23. datacontract/export/odcs_converter.py +10 -12
  24. datacontract/export/protobuf_converter.py +99 -0
  25. datacontract/export/pydantic_converter.py +140 -0
  26. datacontract/export/rdf_converter.py +35 -12
  27. datacontract/export/sodacl_converter.py +24 -24
  28. datacontract/export/sql_converter.py +93 -0
  29. datacontract/export/sql_type_converter.py +131 -0
  30. datacontract/export/terraform_converter.py +71 -0
  31. datacontract/imports/avro_importer.py +106 -0
  32. datacontract/imports/sql_importer.py +0 -2
  33. datacontract/init/download_datacontract_file.py +2 -2
  34. datacontract/integration/publish_datamesh_manager.py +4 -9
  35. datacontract/integration/publish_opentelemetry.py +107 -0
  36. datacontract/lint/files.py +2 -2
  37. datacontract/lint/lint.py +46 -31
  38. datacontract/lint/linters/description_linter.py +34 -0
  39. datacontract/lint/linters/example_model_linter.py +67 -43
  40. datacontract/lint/linters/field_pattern_linter.py +34 -0
  41. datacontract/lint/linters/field_reference_linter.py +38 -0
  42. datacontract/lint/linters/notice_period_linter.py +55 -0
  43. datacontract/lint/linters/primary_field_linter.py +28 -0
  44. datacontract/lint/linters/quality_schema_linter.py +52 -0
  45. datacontract/lint/linters/valid_constraints_linter.py +99 -0
  46. datacontract/lint/resolve.py +53 -8
  47. datacontract/lint/schema.py +2 -3
  48. datacontract/lint/urls.py +4 -5
  49. datacontract/model/breaking_change.py +27 -5
  50. datacontract/model/data_contract_specification.py +45 -25
  51. datacontract/model/exceptions.py +13 -2
  52. datacontract/model/run.py +1 -1
  53. datacontract/web.py +5 -8
  54. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +207 -35
  55. datacontract_cli-0.9.8.dist-info/RECORD +63 -0
  56. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +1 -1
  57. datacontract_cli-0.9.6.post2.dist-info/RECORD +0 -47
  58. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
  59. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
  60. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,17 @@
1
1
  import os
2
2
  from typing import List, Dict
3
3
 
4
- import pydantic
4
+ import pydantic as pyd
5
5
  import yaml
6
- from pydantic import BaseModel
7
6
 
8
7
 
9
- class Contact(BaseModel):
8
+ class Contact(pyd.BaseModel):
10
9
  name: str = None
11
10
  url: str = None
12
11
  email: str = None
13
12
 
14
13
 
15
- class Server(BaseModel):
14
+ class Server(pyd.BaseModel):
16
15
  type: str = None
17
16
  format: str = None
18
17
  project: str = None
@@ -23,31 +22,53 @@ class Server(BaseModel):
23
22
  location: str = None
24
23
  account: str = None
25
24
  database: str = None
26
- schema_: str = pydantic.fields.Field(default=None, alias='schema')
25
+ schema_: str = pyd.Field(default=None, alias="schema")
27
26
  host: str = None
28
27
  port: int = None
29
28
  catalog: str = None
30
29
  topic: str = None
31
- http_path: str = None # Use ENV variable
32
- token: str = None # Use ENV variable
30
+ http_path: str = None # Use ENV variable
31
+ token: str = None # Use ENV variable
33
32
  dataProductId: str = None
34
33
  outputPortId: str = None
35
34
 
36
35
 
37
- class Terms(BaseModel):
36
+ class Terms(pyd.BaseModel):
38
37
  usage: str = None
39
38
  limitations: str = None
40
39
  billing: str = None
41
40
  noticePeriod: str = None
42
41
 
43
42
 
44
- class Field(BaseModel):
45
- ref: str = None
43
+ class Definition(pyd.BaseModel):
44
+ domain: str = None
45
+ name: str = None
46
+ title: str = None
47
+ description: str = None
48
+ type: str = None
49
+ enum: List[str] = []
50
+ format: str = None
51
+ minLength: int = None
52
+ maxLength: int = None
53
+ pattern: str = None
54
+ minimum: int = None
55
+ exclusiveMinimum: int = None
56
+ maximum: int = None
57
+ exclusiveMaximum: int = None
58
+ pii: bool = None
59
+ classification: str = None
60
+ tags: List[str] = []
61
+
62
+
63
+ class Field(pyd.BaseModel):
64
+ ref: str = pyd.Field(default=None, alias="$ref")
65
+ ref_obj: Definition = pyd.Field(default=None, exclude=True)
46
66
  type: str = None
47
67
  format: str = None
48
68
  required: bool = None
49
69
  primary: bool = None
50
70
  unique: bool = None
71
+ references: str = None
51
72
  description: str = None
52
73
  pii: bool = None
53
74
  classification: str = None
@@ -55,25 +76,23 @@ class Field(BaseModel):
55
76
  minLength: int = None
56
77
  maxLength: int = None
57
78
  minimum: int = None
58
- minimumExclusive: int = None
79
+ exclusiveMinimum: int = None
59
80
  maximum: int = None
60
- maximumExclusive: int = None
81
+ exclusiveMaximum: int = None
61
82
  enum: List[str] = []
62
83
  tags: List[str] = []
63
- fields: Dict[str, 'Field'] = {}
64
-
65
- @property
66
- def ref(self):
67
- return self.schema.get("$ref")
84
+ fields: Dict[str, "Field"] = {}
85
+ items: "Field" = None
68
86
 
69
87
 
70
- class Model(BaseModel):
88
+ class Model(pyd.BaseModel):
71
89
  description: str = None
72
90
  type: str = None
91
+ namespace: str = None
73
92
  fields: Dict[str, Field] = {}
74
93
 
75
94
 
76
- class Info(BaseModel):
95
+ class Info(pyd.BaseModel):
77
96
  title: str = None
78
97
  version: str = None
79
98
  description: str = None
@@ -81,25 +100,26 @@ class Info(BaseModel):
81
100
  contact: Contact = None
82
101
 
83
102
 
84
- class Example(BaseModel):
103
+ class Example(pyd.BaseModel):
85
104
  type: str = None
86
105
  description: str = None
87
106
  model: str = None
88
107
  data: str | object = None
89
108
 
90
109
 
91
- class Quality(BaseModel):
110
+ class Quality(pyd.BaseModel):
92
111
  type: str = None
93
112
  specification: str | object = None
94
113
 
95
114
 
96
- class DataContractSpecification(BaseModel):
115
+ class DataContractSpecification(pyd.BaseModel):
97
116
  dataContractSpecification: str = None
98
117
  id: str = None
99
118
  info: Info = None
100
119
  servers: Dict[str, Server] = {}
101
120
  terms: Terms = None
102
121
  models: Dict[str, Model] = {}
122
+ definitions: Dict[str, Definition] = {}
103
123
  # schema: Dict[str, str]
104
124
  examples: List[Example] = []
105
125
  quality: Quality = None
@@ -107,8 +127,8 @@ class DataContractSpecification(BaseModel):
107
127
  @classmethod
108
128
  def from_file(cls, file):
109
129
  if not os.path.exists(file):
110
- raise(f"The file '{file}' does not exist.")
111
- with open(file, 'r') as file:
130
+ raise (f"The file '{file}' does not exist.")
131
+ with open(file, "r") as file:
112
132
  file_content = file.read()
113
133
  return DataContractSpecification.from_string(file_content)
114
134
 
@@ -118,4 +138,4 @@ class DataContractSpecification(BaseModel):
118
138
  return DataContractSpecification(**data)
119
139
 
120
140
  def to_yaml(self):
121
- return yaml.dump(self.model_dump(exclude_defaults=True, exclude_none=True), sort_keys=False)
141
+ return yaml.dump(self.model_dump(exclude_defaults=True, exclude_none=True), sort_keys=False, allow_unicode=True)
@@ -11,7 +11,17 @@ class DataContractException(Exception):
11
11
  message (str): General message for the error.
12
12
  """
13
13
 
14
- def __init__(self, type, name, reason, engine="datacontract", model=None, original_exception=None, result: str = "failed", message="Run operation failed"):
14
+ def __init__(
15
+ self,
16
+ type,
17
+ name,
18
+ reason,
19
+ engine="datacontract",
20
+ model=None,
21
+ original_exception=None,
22
+ result: str = "failed",
23
+ message="Run operation failed",
24
+ ):
15
25
  self.type = type
16
26
  self.name = name
17
27
  self.model = model
@@ -21,4 +31,5 @@ class DataContractException(Exception):
21
31
  self.original_exception = original_exception
22
32
  self.message = message
23
33
  super().__init__(
24
- f"{self.message}: [{self.type}] {self.name} - {self.model} - {self.result} - {self.reason} - {self.engine}")
34
+ f"{self.message}: [{self.type}] {self.name} - {self.model} - {self.result} - {self.reason} - {self.engine}"
35
+ )
datacontract/model/run.py CHANGED
@@ -32,7 +32,7 @@ class Run(BaseModel):
32
32
  server: Optional[str] = None
33
33
  timestampStart: datetime
34
34
  timestampEnd: datetime
35
- result: str = "unknown"
35
+ result: str = "unknown" # passed, warning, failed, error, unknown
36
36
  checks: List[Check]
37
37
  logs: List[Log]
38
38
 
datacontract/web.py CHANGED
@@ -1,6 +1,6 @@
1
- from typing import Annotated
1
+ from typing import Annotated, Union
2
2
 
3
- from fastapi import FastAPI, File, UploadFile
3
+ from fastapi import FastAPI, File
4
4
 
5
5
  from datacontract.data_contract import DataContract
6
6
 
@@ -8,10 +8,7 @@ app = FastAPI()
8
8
 
9
9
 
10
10
  @app.post("/lint")
11
- def lint(file: Annotated[bytes, File()]):
11
+ def lint(file: Annotated[bytes, File()], linters: Union[str, set[str]] = "all"):
12
12
  data_contract = DataContract(data_contract_str=str(file, encoding="utf-8"))
13
- lint_result = data_contract.lint()
14
- return {
15
- "result": lint_result.result,
16
- "checks": lint_result.checks
17
- }
13
+ lint_result = data_contract.lint(enabled_linters=linters)
14
+ return {"result": lint_result.result, "checks": lint_result.checks}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datacontract-cli
3
- Version: 0.9.6.post2
3
+ Version: 0.9.8
4
4
  Summary: Test data contracts
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
@@ -11,7 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: typer[all] ~=0.9.0
14
+ Requires-Dist: typer[all] <0.13,>=0.9
15
15
  Requires-Dist: pydantic <2.7.0,>=2.5.3
16
16
  Requires-Dist: pyyaml ~=6.0.1
17
17
  Requires-Dist: requests ~=2.31.0
@@ -19,22 +19,27 @@ Requires-Dist: fastapi ==0.110.0
19
19
  Requires-Dist: fastparquet ==2024.2.0
20
20
  Requires-Dist: python-multipart ==0.0.9
21
21
  Requires-Dist: rich ~=13.7.0
22
- Requires-Dist: simple-ddl-parser ==1.0.3
23
- Requires-Dist: soda-core-bigquery ~=3.2.1
24
- Requires-Dist: soda-core-duckdb ~=3.2.1
25
- Requires-Dist: soda-core-postgres ~=3.2.1
26
- Requires-Dist: soda-core-snowflake ~=3.2.1
27
- Requires-Dist: soda-core-spark[databricks] ~=3.2.1
28
- Requires-Dist: soda-core-spark-df ~=3.2.1
22
+ Requires-Dist: simple-ddl-parser ==1.0.4
23
+ Requires-Dist: soda-core-bigquery <3.4.0,>=3.3.1
24
+ Requires-Dist: soda-core-duckdb <3.4.0,>=3.3.1
25
+ Requires-Dist: soda-core-postgres <3.4.0,>=3.3.1
26
+ Requires-Dist: soda-core-snowflake <3.4.0,>=3.3.1
27
+ Requires-Dist: soda-core-spark[databricks] <3.4.0,>=3.3.1
28
+ Requires-Dist: soda-core-spark-df <3.4.0,>=3.3.1
29
29
  Requires-Dist: snowflake-connector-python[pandas] <3.8,>=3.6
30
- Requires-Dist: duckdb ==0.10.0
30
+ Requires-Dist: duckdb ==0.10.1
31
31
  Requires-Dist: fastjsonschema ~=2.19.1
32
32
  Requires-Dist: python-dotenv ~=1.0.0
33
- Requires-Dist: s3fs ==2024.2.0
33
+ Requires-Dist: s3fs ==2024.3.1
34
34
  Requires-Dist: rdflib ==7.0.0
35
+ Requires-Dist: avro ==1.11.3
36
+ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc ~=1.16.0
37
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http ~=1.16.0
35
38
  Provides-Extra: dev
36
39
  Requires-Dist: httpx ==0.27.0 ; extra == 'dev'
40
+ Requires-Dist: ruff ; extra == 'dev'
37
41
  Requires-Dist: pytest ; extra == 'dev'
42
+ Requires-Dist: testcontainers <4.0 ; extra == 'dev'
38
43
  Requires-Dist: testcontainers-minio ; extra == 'dev'
39
44
  Requires-Dist: testcontainers-postgres ; extra == 'dev'
40
45
  Requires-Dist: testcontainers-kafka ; extra == 'dev'
@@ -52,6 +57,16 @@ Requires-Dist: testcontainers-kafka ; extra == 'dev'
52
57
  The `datacontract` CLI is an open source command-line tool for working with [Data Contracts](https://datacontract.com/).
53
58
  It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
54
59
 
60
+ ![Main features of the Data Contract CLI](datacontractcli.png)
61
+
62
+ <div align="center">
63
+ <a href="https://www.youtube.com/watch?v=B1dixhgO2vQ">
64
+ <img
65
+ src="https://img.youtube.com/vi/B1dixhgO2vQ/0.jpg"
66
+ alt="Demo of Data Contract CLI"
67
+ style="width:100%;">
68
+ </a>
69
+ </div>
55
70
 
56
71
  ## Getting started
57
72
 
@@ -123,20 +138,20 @@ $ datacontract test --examples datacontract.yaml
123
138
  # find differences between to data contracts (Coming Soon)
124
139
  $ datacontract diff datacontract-v1.yaml datacontract-v2.yaml
125
140
 
126
- # fail pipeline on breaking changes (Coming Soon)
141
+ # find differences between to data contracts categorized into error, warning, and info.
142
+ $ datacontract changelog datacontract-v1.yaml datacontract-v2.yaml
143
+
144
+ # fail pipeline on breaking changes. Uses changelog internally and showing only error and warning.
127
145
  $ datacontract breaking datacontract-v1.yaml datacontract-v2.yaml
128
146
 
129
- # export model as jsonschema
147
+ # export model as jsonschema (other formats: avro, dbt, dbt-sources, dbt-staging-sql, jsonschema, odcs, rdf, sql (coming soon), sodacl, terraform)
130
148
  $ datacontract export --format jsonschema datacontract.yaml
131
149
 
132
- # export model as dbt
133
- $ datacontract export --format dbt datacontract.yaml
134
-
135
150
  # import sql
136
151
  $ datacontract import --format sql --source my_ddl.sql
137
152
 
138
- # import protobuf as model (Coming Soon)
139
- $ datacontract import --format protobuf --source my_protobuf_file.proto datacontract.yaml
153
+ # import avro
154
+ $ datacontract import --format avro --source avro_schema.avsc
140
155
  ```
141
156
 
142
157
  ## Programmatic (Python)
@@ -150,7 +165,15 @@ if not run.has_passed():
150
165
  # Abort pipeline, alert, or take corrective actions...
151
166
  ```
152
167
 
153
- ## Scenario: Integration with Data Mesh Manager
168
+ ## Integrations
169
+
170
+
171
+ | Integration | Option | Description |
172
+ |-------------------|------------------------------|-------------------------------------------------------------------------------------------------------|
173
+ | Data Mesh Manager | `--publish` | Push full results to the [Data Mesh Manager API](https://api.datamesh-manager.com/swagger/index.html) |
174
+ | OpenTelemetry | `--publish-to-opentelemetry` | Push result as gauge metrics (logs are planned) |
175
+
176
+ ### Integration with Data Mesh Manager
154
177
 
155
178
  If you use [Data Mesh Manager](https://datamesh-manager.com/), you can use the data contract URL and append the `--publish` option to send and display the test results. Set an environment variable for your API key.
156
179
 
@@ -160,9 +183,34 @@ $ EXPORT DATAMESH_MANAGER_API_KEY=xxx
160
183
  $ datacontract test https://demo.datamesh-manager.com/demo279750347121/datacontracts/4df9d6ee-e55d-4088-9598-b635b2fdcbbc/datacontract.yaml --server production --publish
161
184
  ```
162
185
 
186
+ ### Integration with OpenTelemetry
163
187
 
188
+ If you use OpenTelemetry, you can use the data contract URL and append the `--publish-to-opentelemetry` option to send the test results to your OLTP-compatible instance, e.g., Prometheus.
164
189
 
190
+ The metric name is "datacontract.cli.test.result" and it uses the following encoding for the result:
165
191
 
192
+ | datacontract.cli.test.result | Description |
193
+ |-------|---------------------------------------|
194
+ | 0 | test run passed, no warnings |
195
+ | 1 | test run has warnings |
196
+ | 2 | test run failed |
197
+ | 3 | test run not possible due to an error |
198
+ | 4 | test status unknown |
199
+
200
+
201
+ ```bash
202
+ # Fetch current data contract, execute tests on production, and publish result to open telemetry
203
+ $ EXPORT OTEL_SERVICE_NAME=datacontract-cli
204
+ $ EXPORT OTEL_EXPORTER_OTLP_ENDPOINT=https://YOUR_ID.apm.westeurope.azure.elastic-cloud.com:443
205
+ $ EXPORT OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer%20secret # Optional, when using SaaS Products
206
+ $ EXPORT OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf # Optional, default is http/protobuf - use value grpc to use the gRPC protocol instead
207
+ # Send to OpenTelemetry
208
+ $ datacontract test https://demo.datamesh-manager.com/demo279750347121/datacontracts/4df9d6ee-e55d-4088-9598-b635b2fdcbbc/datacontract.yaml --server production --publish-to-opentelemetry
209
+ ```
210
+
211
+ Current limitations:
212
+ - currently, only ConsoleExporter and OTLP Exporter
213
+ - Metrics only, no logs yet (but loosely planned)
166
214
 
167
215
  ## Installation
168
216
 
@@ -451,20 +499,36 @@ datacontract export --format dbt
451
499
 
452
500
  Available export options:
453
501
 
454
- | Type | Description | Status |
455
- |--------------------|---------------------------------------------------------|--------|
456
- | `jsonschema` | Export to JSON Schema | ✅ |
457
- | `odcs` | Export to Open Data Contract Standard (ODCS) | ✅ |
458
- | `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
459
- | `dbt` | Export to dbt models in YAML format | ✅ |
460
- | `dbt-sources` | Export to dbt sources in YAML format | ✅ |
461
- | `dbt-staging-sql` | Export to dbt staging SQL models | ✅ |
462
- | `rdf` | Export data contract to RDF representation in N3 format | ✅ |
463
- | `avro` | Export to AVRO models | ✅ |
464
- | `pydantic` | Export to pydantic models | TBD |
465
- | `sql` | Export to SQL DDL | TBD |
466
- | `protobuf` | Export to Protobuf | TBD |
467
- | Missing something? | Please create an issue on GitHub | TBD |
502
+ | Type | Description | Status |
503
+ |----------------------|---------------------------------------------------------|--------|
504
+ | `jsonschema` | Export to JSON Schema | ✅ |
505
+ | `odcs` | Export to Open Data Contract Standard (ODCS) | ✅ |
506
+ | `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
507
+ | `dbt` | Export to dbt models in YAML format | ✅ |
508
+ | `dbt-sources` | Export to dbt sources in YAML format | ✅ |
509
+ | `dbt-staging-sql` | Export to dbt staging SQL models | ✅ |
510
+ | `rdf` | Export data contract to RDF representation in N3 format | ✅ |
511
+ | `avro` | Export to AVRO models | ✅ |
512
+ | `protobuf` | Export to Protobuf | |
513
+ | `terraform` | Export to terraform resources | |
514
+ | `sql` | Export to SQL DDL | |
515
+ | `sql-query` | Export to SQL Query | |
516
+ | `great-expectations` | Export to Great Expectations Suites in JSON Format | ✅ |
517
+ | `bigquery` | Export to BigQuery Schemas | TBD |
518
+ | `pydantic` | Export to pydantic models | TBD |
519
+ | `html` | Export to HTML page | TBD |
520
+ | Missing something? | Please create an issue on GitHub | TBD |
521
+
522
+ #### Great Expectations
523
+ The export function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
524
+ If the contract includes multiple models, you need to specify the names of the model you wish to export.
525
+ ```shell
526
+ datacontract export datacontract.yaml --format great-expectations --model orders
527
+ ```
528
+ The export creates a list of expectations by utilizing:
529
+
530
+ - The data from the Model definition with a fixed mapping
531
+ - The expectations provided in the quality field for each model (find here the expectations gallery https://greatexpectations.io/expectations/)
468
532
 
469
533
  #### RDF
470
534
 
@@ -502,13 +566,120 @@ Available import options:
502
566
  | Type | Description | Status |
503
567
  |--------------------|------------------------------------------------|---------|
504
568
  | `sql` | Import from SQL DDL | ✅ |
569
+ | `avro` | Import from AVRO schemas | ✅ |
505
570
  | `protobuf` | Import from Protobuf schemas | TBD |
506
- | `avro` | Import from AVRO schemas | TBD |
507
571
  | `jsonschema` | Import from JSON Schemas | TBD |
572
+ | `bigquery` | Import from BigQuery Schemas | TBD |
508
573
  | `dbt` | Import from dbt models | TBD |
509
574
  | `odcs` | Import from Open Data Contract Standard (ODCS) | TBD |
510
575
  | Missing something? | Please create an issue on GitHub | TBD |
511
576
 
577
+ ## Best Practices
578
+
579
+ We share best practices in using the Data Contract CLI.
580
+
581
+ ### Data-first Approach
582
+
583
+ Create a data contract based on the actual data. This is the fastest way to get started and to get feedback from the data consumers.
584
+
585
+ 1. Use an existing physical schema (e.g., SQL DDL) as a starting point to define your logical data model in the contract. Double check right after the import whether the actual data meets the imported logical data model. Just to be sure.
586
+ ```bash
587
+ $ datacontract import --format sql ddl.sql
588
+ $ datacontract test
589
+ ```
590
+
591
+ 2. Add examples to the `datacontract.yaml`. If you can, use actual data and anonymize. Make sure that the examples match the imported logical data model.
592
+ ```bash
593
+ $ datacontract test --examples
594
+ ```
595
+
596
+
597
+ 3. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
598
+ ```bash
599
+ $ datacontract test --examples
600
+ $ datacontract test
601
+ ```
602
+
603
+ 4. Make sure that all the best practices for a `datacontract.yaml` are met using the linter. You probably forgot to document some fields and add the terms and conditions.
604
+ ```bash
605
+ $ datacontract lint
606
+ ```
607
+
608
+ 5. Set up a CI pipeline that executes daily and reports the results to the [Data Mesh Manager](https://datamesh-manager.com). Or to some place else. You can even publish to any opentelemetry compatible system.
609
+ ```bash
610
+ $ datacontract test --publish https://api.datamesh-manager.com/api/runs
611
+ ```
612
+
613
+ ### Contract-First
614
+
615
+ Create a data contract based on the requirements from use cases.
616
+
617
+ 1. Start with a `datacontract.yaml` template.
618
+ ```bash
619
+ $ datacontract init
620
+ ```
621
+
622
+ 2. Add examples to the `datacontract.yaml`. Do not start with the data model, although you are probably tempted to do that. Examples are the fastest way to get feedback from everybody and not loose someone in the discussion.
623
+
624
+ 3. Create the model based on the examples. Test the model against the examples to double-check whether the model matches the examples.
625
+ ```bash
626
+ $ datacontract test --examples
627
+ ```
628
+
629
+ 4. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
630
+ ```bash
631
+ $ datacontract test --examples
632
+ ```
633
+
634
+ 5. Fill in the terms, descriptions, etc. Make sure you follow all best practices for a `datacontract.yaml` using the linter.
635
+ ```bash
636
+ $ datacontract lint
637
+ ```
638
+
639
+ 6. Set up a CI pipeline that lints and tests the examples so you make sure that any changes later do not decrease the quality of the contract.
640
+ ```bash
641
+ $ datacontract lint
642
+ $ datacontract test --examples
643
+ ```
644
+
645
+ 7. Use the export function to start building the providing data product as well as the integration into the consuming data products.
646
+ ```bash
647
+ # data provider
648
+ $ datacontract export --format dbt
649
+ # data consumer
650
+ $ datacontract export --format dbt-sources
651
+ $ datacontract export --format dbt-staging-sql
652
+ ```
653
+
654
+ ### Schema Evolution
655
+
656
+ #### Non-breaking Changes
657
+ Examples: adding models or fields
658
+
659
+ - Add the models or fields in the datacontract.yaml
660
+ - Increment the minor version of the datacontract.yaml on any change. Simply edit the datacontract.yaml for this.
661
+ - You need a policy that these changes are non-breaking. That means that one cannot use the star expression in SQL to query a table under contract. Make the consequences known.
662
+ - Fail the build in the Pull Request if a datacontract.yaml accidentially adds a breaking change even despite only a minor version change
663
+ ```bash
664
+ $ datacontract breaking datacontract-from-pr.yaml datacontract-from-main.yaml
665
+ ```
666
+ - Create a changelog of this minor change.
667
+ ```bash
668
+ $ datacontract changelog datacontract-from-pr.yaml datacontract-from-main.yaml
669
+ ```
670
+ #### Breaking Changes
671
+ Examples: Removing or renaming models and fields.
672
+
673
+ - Remove or rename models and fields in the datacontract.yaml, and any other change that might be part of this new major version of this data contract.
674
+ - Increment the major version of the datacontract.yaml for this and create a new file for the major version. The reason being, that one needs to offer an upgrade path for the data consumers from the old to the new major version.
675
+ - As data consumers need to migrate, try to reduce the frequency of major versions by making multiple breaking changes together if possible.
676
+ - Be aware of the notice period in the data contract as this is the minimum amount of time you have to offer both the old and the new version for a migration path.
677
+ - Do not fear making breaking changes with data contracts. It's okay to do them in this controlled way. Really!
678
+ - Create a changelog of this major change.
679
+ ```bash
680
+ $ datacontract changelog datacontract-from-pr.yaml datacontract-from-main.yaml
681
+ ```
682
+
512
683
  ## Development Setup
513
684
 
514
685
  Python base interpreter should be 3.11.x (unless working on 3.12 release candidate).
@@ -521,7 +692,8 @@ source venv/bin/activate
521
692
  # Install Requirements
522
693
  pip install --upgrade pip setuptools wheel
523
694
  pip install -e '.[dev]'
524
- cd tests/
695
+ ruff check --fix
696
+ ruff format --check
525
697
  pytest
526
698
  ```
527
699
 
@@ -0,0 +1,63 @@
1
+ datacontract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ datacontract/cli.py,sha256=NTM6W9J8WvnzheQnaL5W1JukEdXhvfPOtyv2T8eFnkE,10515
3
+ datacontract/data_contract.py,sha256=HMKFAKCN5rlrSwPzrrnGiybttS_-W9ipctsulTlqocE,21656
4
+ datacontract/web.py,sha256=SWglmbqy3NV5h4VDsG0OpwhASJT9uve7w6FPwkBjIYM,457
5
+ datacontract/breaking/breaking.py,sha256=Abw59Xd_CypS6Us-hvsGnPvCUo-a-cl8RzDCmjG4yfk,11719
6
+ datacontract/breaking/breaking_rules.py,sha256=PzjCojAwteiIP3AKHfzeHAZG5b8WNKTeFl8FbFAWOzo,2913
7
+ datacontract/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py,sha256=TXO47ON3NjwYI4Y2eBYklMOCo7vAtYzqLPAhZhii6dg,1565
9
+ datacontract/engines/datacontract/check_that_datacontract_file_exists.py,sha256=XHvxX6BrTdZk2wN55zd6ER4k7ILDyGzjbCbZxQAJ2iE,665
10
+ datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py,sha256=CsxFGyInjpsylmoofByV1b-wpFhG1jtRWxSUpp-BXk8,1525
11
+ datacontract/engines/fastjsonschema/check_jsonschema.py,sha256=YkVC0KrJlSMXTvrgopGseRBtJKZf-T9Nxk5oKZwLYWk,5754
12
+ datacontract/engines/fastjsonschema/s3/s3_read_files.py,sha256=iupiyqBa1dzgT2BtVGna-BjC5rqe6MTLs2QRp8GTs7M,665
13
+ datacontract/engines/soda/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ datacontract/engines/soda/check_soda_execute.py,sha256=4zd-E1-3RpqXHLdAVodVZm-tYMBfZORAyPDTObIfodo,6507
15
+ datacontract/engines/soda/connections/bigquery.py,sha256=Ao0KaJe4R28auU_4umxvVaLB6ZHEbKaNoYZ-RfAUmeo,662
16
+ datacontract/engines/soda/connections/dask.py,sha256=Yy6Et2n_vDVsdjtqyBWDSZt7mnjPzPk_MZ-92VZHfnY,1496
17
+ datacontract/engines/soda/connections/databricks.py,sha256=lpMju-o_TzLZeF0EEVwePPr8JahqvFnj5xRYjF15fc8,561
18
+ datacontract/engines/soda/connections/duckdb.py,sha256=xKMs_EVqzG4ynachRw9Xx_syA27wDCBLC15ar48k0aY,2340
19
+ datacontract/engines/soda/connections/kafka.py,sha256=AgAQxm_NgPUXQShqLS81PAjcQ9hJI1fy8CDFdGa_luI,5971
20
+ datacontract/engines/soda/connections/postgres.py,sha256=9GTF4Es3M5vb7ocSGqAxXmslvkS5CjsPQGIuo020CFc,626
21
+ datacontract/engines/soda/connections/snowflake.py,sha256=y1t2a1DWY4_tr5k-X5_nhLE6v1rfCwTahzhtHR91x9A,719
22
+ datacontract/export/avro_converter.py,sha256=hbw3d9FalqsjalXK2XqZbL9ecqnbCMs6o-kdDyeG0ZU,2202
23
+ datacontract/export/avro_idl_converter.py,sha256=pbDel_DdvakGOtxFzP-WKnHO1sshCCaLZeNkz3Dyvh8,9582
24
+ datacontract/export/dbt_converter.py,sha256=QF1PYh-UAZEnFYq2D70iT7KUIWCMd1tOVt8lfXmb0Ho,8549
25
+ datacontract/export/great_expectations_converter.py,sha256=yxYSyYOKow_5P6zqzcYlt3zoCAFMoDgEpFvrzXr_2G8,4908
26
+ datacontract/export/jsonschema_converter.py,sha256=6Y0vsIx8GtmrZt7tjivru0Qc6IAZ5IApmDwav-VJinQ,3153
27
+ datacontract/export/odcs_converter.py,sha256=l4fgXUHewtxfJWqIxMNU4y7apwvl_SYpb479IeZPI1A,3732
28
+ datacontract/export/protobuf_converter.py,sha256=K40yEdfRLYDLUx8bn3_an-cwdHfWv8_SBgzGLJT6mX4,2974
29
+ datacontract/export/pydantic_converter.py,sha256=dES_NpeXTMBPX-GASZmsRO9pONVVVKQVEQAM5maTfJ8,5670
30
+ datacontract/export/rdf_converter.py,sha256=dfMicvoOt2bloJZMjkIY6wPR9ymBa9jAlDDwQpiHfAc,6108
31
+ datacontract/export/sodacl_converter.py,sha256=a4CYzTRbWaVP4pom2JkH9A1VkVWsrWzewEamj1e_5fA,3197
32
+ datacontract/export/sql_converter.py,sha256=qmvyVLtFk5MVtNUDF5YmZbrbRz7fsQ4v3kdJHvrq_PI,3343
33
+ datacontract/export/sql_type_converter.py,sha256=kiFFX54dfSgXXqbUCtFz0ujEt0Ie5JnuuWbcvJc0kos,4627
34
+ datacontract/export/terraform_converter.py,sha256=3gwfKHp1QtF2bg_iXmWuV2QSNeZNy5i_AQVe1REXSbc,1949
35
+ datacontract/imports/avro_importer.py,sha256=rQ9lHrTV8_dgjRB2acDDdnWKH8y9DxC4TYv9Rx_CeEQ,3823
36
+ datacontract/imports/sql_importer.py,sha256=kaQMKQLuHFo5uSl8j_g9PBXPdP4KruMh_O_egkbcqxM,2027
37
+ datacontract/init/download_datacontract_file.py,sha256=pj_4mhWKlEtfueWohDgkb1nyuG5ERDipUDszxKwpZUs,413
38
+ datacontract/integration/publish_datamesh_manager.py,sha256=Ul6enuo5l3sFoNIFU0XRg6etzu8a15IQp8jUovB5ynY,1316
39
+ datacontract/integration/publish_opentelemetry.py,sha256=eyR2VQOUii-IvcaBMyru7IDrqU8OuxYvbB24poX4t58,3830
40
+ datacontract/lint/files.py,sha256=tg0vq_w4LQsEr_8A5qr4hUJmHeGalUpsXJXC1t-OGC0,471
41
+ datacontract/lint/lint.py,sha256=GhgBwfDgihXfWIu1WUfYeJoxfYacVJrwq3L2KO4Z2Io,5076
42
+ datacontract/lint/resolve.py,sha256=7EhNiG-040NXKJqliWs3u5vJg6KzfgAfgnq_s9dXGhc,4863
43
+ datacontract/lint/schema.py,sha256=FIzubF1C9TnhEQBeoDsLuhc6HymCMx_v435Hjbcz4U8,838
44
+ datacontract/lint/urls.py,sha256=NHTbwadnm1VCxsiF7clfDvsxJ8-D3Mi5AzDj029E6wQ,1325
45
+ datacontract/lint/linters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ datacontract/lint/linters/description_linter.py,sha256=Cm99NTWo75kDRIW63qz5uEa8xsug_8dFURPRC1twodw,1554
47
+ datacontract/lint/linters/example_model_linter.py,sha256=YpPWymPIKvcL6jsA8rM1nVMOHaMifmrqlLf65Dtglr8,3978
48
+ datacontract/lint/linters/field_pattern_linter.py,sha256=hdzcY_MM6MsJeag0HQ1YwgzXlgK2IQ2wpDKNDkNwkvk,1089
49
+ datacontract/lint/linters/field_reference_linter.py,sha256=fRNI483JpWV_ewWoNqfuVkNqYwQ-7z68q_xhHQZPO7g,1550
50
+ datacontract/lint/linters/notice_period_linter.py,sha256=_w3lp9qfEkbMMSTV38IlTVWnG-kyJY-S-7dex_JSpwc,2135
51
+ datacontract/lint/linters/primary_field_linter.py,sha256=HoNhk-wmoR0JERZB6e8jZnIx7Sl5-N8QdU4Ctz6scqU,1128
52
+ datacontract/lint/linters/quality_schema_linter.py,sha256=fwqOoT15kDXLw_I7LMDWh4Y-Z9xA83FHPooIz7cnDac,2187
53
+ datacontract/lint/linters/valid_constraints_linter.py,sha256=GeeE2hojMt24EoHuSHE1lURA2v2p5Dmxc7Qq8IvZQH0,4915
54
+ datacontract/model/breaking_change.py,sha256=BIDEUo1U2CQLVT2-I5PyFttxAj6zQPI1UUkEoOOQXMY,2249
55
+ datacontract/model/data_contract_specification.py,sha256=Oo-hL_FVjsN0xX8vi4qZ5gjxNYsqRmUMg0i7fbqSk-o,3480
56
+ datacontract/model/exceptions.py,sha256=zW9NoyzwsND-c9UqgyTVuezUVGEc6KK1Uc2zl12loyo,1178
57
+ datacontract/model/run.py,sha256=mm1cZxjJVg0w1qzN_WV9TcWrAXsACBDSChW39Kpa-K4,2563
58
+ datacontract_cli-0.9.8.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
59
+ datacontract_cli-0.9.8.dist-info/METADATA,sha256=P_OZ5eN2G4_4lXzHnQFfNYB70IzwOd-BHALFLQ47Gr8,35543
60
+ datacontract_cli-0.9.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
61
+ datacontract_cli-0.9.8.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
62
+ datacontract_cli-0.9.8.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
63
+ datacontract_cli-0.9.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5