datacontract-cli 0.9.7__py3-none-any.whl → 0.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (62) hide show
  1. datacontract/breaking/breaking.py +48 -57
  2. datacontract/cli.py +100 -80
  3. datacontract/data_contract.py +178 -128
  4. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
  5. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
  6. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
  8. datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
  9. datacontract/engines/soda/check_soda_execute.py +58 -36
  10. datacontract/engines/soda/connections/bigquery.py +5 -3
  11. datacontract/engines/soda/connections/dask.py +0 -1
  12. datacontract/engines/soda/connections/databricks.py +2 -2
  13. datacontract/engines/soda/connections/duckdb.py +25 -8
  14. datacontract/engines/soda/connections/kafka.py +36 -17
  15. datacontract/engines/soda/connections/postgres.py +3 -3
  16. datacontract/engines/soda/connections/snowflake.py +4 -4
  17. datacontract/export/avro_converter.py +9 -11
  18. datacontract/export/avro_idl_converter.py +65 -42
  19. datacontract/export/csv_type_converter.py +36 -0
  20. datacontract/export/dbt_converter.py +43 -32
  21. datacontract/export/great_expectations_converter.py +141 -0
  22. datacontract/export/html_export.py +46 -0
  23. datacontract/export/jsonschema_converter.py +3 -1
  24. datacontract/export/odcs_converter.py +5 -7
  25. datacontract/export/protobuf_converter.py +12 -10
  26. datacontract/export/pydantic_converter.py +131 -0
  27. datacontract/export/rdf_converter.py +34 -11
  28. datacontract/export/sodacl_converter.py +118 -21
  29. datacontract/export/sql_converter.py +30 -8
  30. datacontract/export/sql_type_converter.py +44 -4
  31. datacontract/export/terraform_converter.py +4 -3
  32. datacontract/imports/avro_importer.py +65 -18
  33. datacontract/imports/sql_importer.py +0 -2
  34. datacontract/init/download_datacontract_file.py +2 -2
  35. datacontract/integration/publish_datamesh_manager.py +6 -12
  36. datacontract/integration/publish_opentelemetry.py +30 -16
  37. datacontract/lint/files.py +2 -2
  38. datacontract/lint/lint.py +26 -31
  39. datacontract/lint/linters/description_linter.py +12 -21
  40. datacontract/lint/linters/example_model_linter.py +28 -29
  41. datacontract/lint/linters/field_pattern_linter.py +8 -8
  42. datacontract/lint/linters/field_reference_linter.py +11 -10
  43. datacontract/lint/linters/notice_period_linter.py +18 -22
  44. datacontract/lint/linters/quality_schema_linter.py +16 -20
  45. datacontract/lint/linters/valid_constraints_linter.py +42 -37
  46. datacontract/lint/resolve.py +50 -14
  47. datacontract/lint/schema.py +2 -3
  48. datacontract/lint/urls.py +4 -5
  49. datacontract/model/breaking_change.py +2 -1
  50. datacontract/model/data_contract_specification.py +8 -7
  51. datacontract/model/exceptions.py +13 -2
  52. datacontract/model/run.py +3 -2
  53. datacontract/web.py +3 -7
  54. datacontract_cli-0.9.9.dist-info/METADATA +951 -0
  55. datacontract_cli-0.9.9.dist-info/RECORD +64 -0
  56. datacontract/lint/linters/primary_field_linter.py +0 -30
  57. datacontract_cli-0.9.7.dist-info/METADATA +0 -603
  58. datacontract_cli-0.9.7.dist-info/RECORD +0 -61
  59. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/LICENSE +0 -0
  60. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/WHEEL +0 -0
  61. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/entry_points.txt +0 -0
  62. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/top_level.txt +0 -0
@@ -1,603 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: datacontract-cli
3
- Version: 0.9.7
4
- Summary: Test data contracts
5
- Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
6
- Project-URL: Homepage, https://cli.datacontract.com
7
- Project-URL: Issues, https://github.com/datacontract/cli/issues
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.10
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: typer[all] ~=0.9.0
15
- Requires-Dist: pydantic <2.7.0,>=2.5.3
16
- Requires-Dist: pyyaml ~=6.0.1
17
- Requires-Dist: requests ~=2.31.0
18
- Requires-Dist: fastapi ==0.110.0
19
- Requires-Dist: fastparquet ==2024.2.0
20
- Requires-Dist: python-multipart ==0.0.9
21
- Requires-Dist: rich ~=13.7.0
22
- Requires-Dist: simple-ddl-parser ==1.0.3
23
- Requires-Dist: soda-core-bigquery ~=3.2.1
24
- Requires-Dist: soda-core-duckdb ~=3.2.1
25
- Requires-Dist: soda-core-postgres ~=3.2.1
26
- Requires-Dist: soda-core-snowflake ~=3.2.1
27
- Requires-Dist: soda-core-spark[databricks] ~=3.2.1
28
- Requires-Dist: soda-core-spark-df ~=3.2.1
29
- Requires-Dist: snowflake-connector-python[pandas] <3.8,>=3.6
30
- Requires-Dist: duckdb ==0.10.0
31
- Requires-Dist: fastjsonschema ~=2.19.1
32
- Requires-Dist: python-dotenv ~=1.0.0
33
- Requires-Dist: s3fs ==2024.2.0
34
- Requires-Dist: rdflib ==7.0.0
35
- Requires-Dist: avro ==1.11.3
36
- Provides-Extra: dev
37
- Requires-Dist: httpx ==0.27.0 ; extra == 'dev'
38
- Requires-Dist: pytest ; extra == 'dev'
39
- Requires-Dist: testcontainers-minio ; extra == 'dev'
40
- Requires-Dist: testcontainers-postgres ; extra == 'dev'
41
- Requires-Dist: testcontainers-kafka ; extra == 'dev'
42
-
43
- # Data Contract CLI
44
-
45
- <p>
46
- <a href="https://github.com/datacontract/cli/actions/workflows/ci.yaml?query=branch%3Amain">
47
- <img alt="Test Workflow" src="https://img.shields.io/github/actions/workflow/status/datacontract/cli/ci.yaml?branch=main"></a>
48
- <a href="https://github.com/datacontract/cli">
49
- <img alt="Stars" src="https://img.shields.io/github/stars/datacontract/cli" /></a>
50
- <a href="https://datacontract.com/slack" rel="nofollow"><img src="https://camo.githubusercontent.com/5ade1fd1e76a6ab860802cdd2941fe2501e2ca2cb534e5d8968dbf864c13d33d/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f736c61636b2d6a6f696e5f636861742d77686974652e7376673f6c6f676f3d736c61636b267374796c653d736f6369616c" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" style="max-width: 100%;"></a>
51
- </p>
52
-
53
- The `datacontract` CLI is an open source command-line tool for working with [Data Contracts](https://datacontract.com/).
54
- It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
55
-
56
- ![Main features of the Data Contract CLI](datacontractcli.png)
57
-
58
- ## Getting started
59
-
60
- Let's look at this data contract:
61
- [https://datacontract.com/examples/orders-latest/datacontract.yaml](https://datacontract.com/examples/orders-latest/datacontract.yaml)
62
-
63
- We have a _servers_ section with endpoint details to the S3 bucket, _models_ for the structure of the data, and _quality_ attributes that describe the expected freshness and number of rows.
64
-
65
- This data contract contains all information to connect to S3 and check that the actual data meets the defined schema and quality requirements. We can use this information to test if the actual data set in S3 is compliant to the data contract.
66
-
67
- Let's use [pip](https://pip.pypa.io/en/stable/getting-started/) to install the CLI (or use the [Docker image](#docker), if you prefer).
68
- ```bash
69
- $ python3 -m pip install datacontract-cli
70
- ```
71
-
72
- We run the tests:
73
-
74
- ```bash
75
- $ datacontract test https://datacontract.com/examples/orders-latest/datacontract.yaml
76
-
77
- # returns:
78
- Testing https://datacontract.com/examples/orders-latest/datacontract.yaml
79
- ╭────────┬─────────────────────────────────────────────────────────────────────┬───────────────────────────────┬─────────╮
80
- │ Result │ Check │ Field │ Details │
81
- ├────────┼─────────────────────────────────────────────────────────────────────┼───────────────────────────────┼─────────┤
82
- │ passed │ Check that JSON has valid schema │ orders │ │
83
- │ passed │ Check that JSON has valid schema │ line_items │ │
84
- │ passed │ Check that field order_id is present │ orders │ │
85
- │ passed │ Check that field order_timestamp is present │ orders │ │
86
- │ passed │ Check that field order_total is present │ orders │ │
87
- │ passed │ Check that field customer_id is present │ orders │ │
88
- │ passed │ Check that field customer_email_address is present │ orders │ │
89
- │ passed │ row_count >= 5000 │ orders │ │
90
- │ passed │ Check that required field order_id has no null values │ orders.order_id │ │
91
- │ passed │ Check that unique field order_id has no duplicate values │ orders.order_id │ │
92
- │ passed │ duplicate_count(order_id) = 0 │ orders.order_id │ │
93
- │ passed │ Check that required field order_timestamp has no null values │ orders.order_timestamp │ │
94
- │ passed │ freshness(order_timestamp) < 24h │ orders.order_timestamp │ │
95
- │ passed │ Check that required field order_total has no null values │ orders.order_total │ │
96
- │ passed │ Check that required field customer_email_address has no null values │ orders.customer_email_address │ │
97
- │ passed │ Check that field lines_item_id is present │ line_items │ │
98
- │ passed │ Check that field order_id is present │ line_items │ │
99
- │ passed │ Check that field sku is present │ line_items │ │
100
- │ passed │ values in (order_id) must exist in orders (order_id) │ line_items.order_id │ │
101
- │ passed │ row_count >= 5000 │ line_items │ │
102
- │ passed │ Check that required field lines_item_id has no null values │ line_items.lines_item_id │ │
103
- │ passed │ Check that unique field lines_item_id has no duplicate values │ line_items.lines_item_id │ │
104
- ╰────────┴─────────────────────────────────────────────────────────────────────┴───────────────────────────────┴─────────╯
105
- 🟢 data contract is valid. Run 22 checks. Took 6.739514 seconds.
106
- ```
107
-
108
- Voilà, the CLI tested that the _datacontract.yaml_ itself is valid, all records comply with the schema, and all quality attributes are met.
109
-
110
- ## Usage
111
-
112
- ```bash
113
- # create a new data contract from example and write it to datacontract.yaml
114
- $ datacontract init datacontract.yaml
115
-
116
- # lint the datacontract.yaml
117
- $ datacontract lint datacontract.yaml
118
-
119
- # execute schema and quality checks
120
- $ datacontract test datacontract.yaml
121
-
122
- # execute schema and quality checks on the examples within the contract
123
- $ datacontract test --examples datacontract.yaml
124
-
125
- # find differences between to data contracts (Coming Soon)
126
- $ datacontract diff datacontract-v1.yaml datacontract-v2.yaml
127
-
128
- # find differences between to data contracts categorized into error, warning, and info.
129
- $ datacontract changelog datacontract-v1.yaml datacontract-v2.yaml
130
-
131
- # fail pipeline on breaking changes. Uses changelog internally and showing only error and warning.
132
- $ datacontract breaking datacontract-v1.yaml datacontract-v2.yaml
133
-
134
- # export model as jsonschema (other formats: avro, dbt, dbt-sources, dbt-staging-sql, jsonschema, odcs, rdf, sql (coming soon), sodacl, terraform)
135
- $ datacontract export --format jsonschema datacontract.yaml
136
-
137
- # import sql
138
- $ datacontract import --format sql --source my_ddl.sql
139
-
140
- # import avro
141
- $ datacontract import --format avro --source avro_schema.avsc
142
-
143
- # import protobuf as model (Coming Soon)
144
- $ datacontract import --format protobuf --source my_protobuf_file.proto datacontract.yaml
145
- ```
146
-
147
- ## Programmatic (Python)
148
- ```python
149
- from datacontract.data_contract import DataContract
150
-
151
- data_contract = DataContract(data_contract_file="datacontract.yaml")
152
- run = data_contract.test()
153
- if not run.has_passed():
154
- print("Data quality validation failed.")
155
- # Abort pipeline, alert, or take corrective actions...
156
- ```
157
-
158
- ## Scenario: Integration with Data Mesh Manager
159
-
160
- If you use [Data Mesh Manager](https://datamesh-manager.com/), you can use the data contract URL and append the `--publish` option to send and display the test results. Set an environment variable for your API key.
161
-
162
- ```bash
163
- # Fetch current data contract, execute tests on production, and publish result to data mesh manager
164
- $ EXPORT DATAMESH_MANAGER_API_KEY=xxx
165
- $ datacontract test https://demo.datamesh-manager.com/demo279750347121/datacontracts/4df9d6ee-e55d-4088-9598-b635b2fdcbbc/datacontract.yaml --server production --publish
166
- ```
167
-
168
- ## Scenario: Integration with OpenTelemetry
169
-
170
- If you use OpenTelemetry, you can use the data contract URL and append the `--publish-to-opentelemetry` option to send the test results to your OLTP-compatible instance, e.g., Prometheus.
171
-
172
- The metric name is "datacontract.cli.test.result" and it uses the following encoding for the result:
173
-
174
- | datacontract.cli.test.result | Description |
175
- |-------|---------------------------------------|
176
- | 0 | test run passed, no warnings |
177
- | 1 | test run has warnings |
178
- | 2 | test run failed |
179
- | 3 | test run not possible due to an error |
180
- | 4 | test status unknown |
181
-
182
-
183
- ```bash
184
- # Fetch current data contract, execute tests on production, and publish result to open telemetry
185
- $ EXPORT OTEL_SERVICE_NAME=datacontract-cli
186
- $ EXPORT OTEL_EXPORTER_OTLP_ENDPOINT=https://YOUR_ID.apm.westeurope.azure.elastic-cloud.com:443
187
- $ EXPORT OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer%20secret (Optional, when using SaaS Products)
188
- $ EXPORT OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf (Optional, because it is the default value)
189
- # Send to OpenTelemetry
190
- $ datacontract test https://demo.datamesh-manager.com/demo279750347121/datacontracts/4df9d6ee-e55d-4088-9598-b635b2fdcbbc/datacontract.yaml --server production --publish-to-opentelemetry
191
- ```
192
-
193
- Current limitations:
194
- - no gRPC support
195
- - currently, only ConsoleExporter and OTLP Exporter
196
- - Metrics only, no logs yet (but loosely planned)
197
-
198
- ## Installation
199
-
200
- Choose the most appropriate installation method for your needs:
201
-
202
- ### pip
203
- Python 3.11 recommended.
204
- Python 3.12 available as pre-release release candidate for 0.9.3
205
-
206
- ```bash
207
- python3 -m pip install datacontract-cli
208
- ```
209
-
210
- ### pipx
211
- pipx installs into an isolated environment.
212
- ```bash
213
- pipx install datacontract-cli
214
- ```
215
-
216
- ### Docker
217
-
218
- ```bash
219
- docker pull datacontract/cli
220
- docker run --rm -v ${PWD}:/home/datacontract datacontract/cli
221
- ```
222
-
223
- Or via an alias that automatically uses the latest version:
224
-
225
- ```bash
226
- alias datacontract='docker run --rm -v "${PWD}:/home/datacontract" datacontract/cli:latest'
227
- ```
228
-
229
- ## Documentation
230
-
231
- ### Tests
232
-
233
- Data Contract CLI can connect to data sources and run schema and quality tests to verify that the data contract is valid.
234
-
235
- ```bash
236
- $ datacontract test --server production datacontract.yaml
237
- ```
238
-
239
- To connect to the databases the `server` block in the datacontract.yaml is used to set up the connection. In addition, credentials, such as username and passwords, may be defined with environment variables.
240
-
241
- The application uses different engines, based on the server `type`.
242
-
243
- | Type | Format | Description | Status | Engines |
244
- |--------------|------------|---------------------------------------------------------------------------|-------------|-------------------------------------|
245
- | `s3` | `parquet` | Works for any S3-compliant endpoint., e.g., AWS S3, GCS, MinIO, Ceph, ... | ✅ | soda-core-duckdb |
246
- | `s3` | `json` | Support for `new_line` delimited JSON files and one JSON record per file. | ✅ | fastjsonschema<br> soda-core-duckdb |
247
- | `s3` | `csv` | | ✅ | soda-core-duckdb |
248
- | `s3` | `delta` | | Coming soon | TBD |
249
- | `postgres` | n/a | | ✅ | soda-core-postgres |
250
- | `snowflake` | n/a | | ✅ | soda-core-snowflake |
251
- | `bigquery` | n/a | | ✅ | soda-core-bigquery |
252
- | `redshift` | n/a | | Coming soon | TBD |
253
- | `databricks` | n/a | Support for Databricks SQL with Unity catalog and Hive metastore. | ✅ | soda-core-spark |
254
- | `databricks` | n/a | Support for Spark for programmatic use in Notebooks. | ✅ | soda-core-spark-df |
255
- | `kafka` | `json` | Experimental. | ✅ | pyspark<br>soda-core-spark-df |
256
- | `kafka` | `avro` | | Coming soon | TBD |
257
- | `kafka` | `protobuf` | | Coming soon | TBD |
258
- | `local` | `parquet` | | ✅ | soda-core-duckdb |
259
- | `local` | `json` | Support for `new_line` delimited JSON files and one JSON record per file. | ✅ | fastjsonschema<br> soda-core-duckdb |
260
- | `local` | `csv` | | ✅ | soda-core-duckdb |
261
-
262
- Feel free to create an issue, if you need support for an additional type.
263
-
264
- ### S3
265
-
266
- Data Contract CLI can test data that is stored in S3 buckets or any S3-compliant endpoints in various formats.
267
-
268
- #### Example
269
-
270
- datacontract.yaml
271
- ```yaml
272
- servers:
273
- production:
274
- type: s3
275
- endpointUrl: https://minio.example.com # not needed with AWS S3
276
- location: s3://bucket-name/path/*/*.json
277
- format: json
278
- delimiter: new_line # new_line, array, or none
279
- ```
280
-
281
- #### Environment Variables
282
-
283
- | Environment Variable | Example | Description |
284
- |-----------------------------------|-------------------------------|-----------------------|
285
- | `DATACONTRACT_S3_REGION` | `eu-central-1` | Region of S3 bucket |
286
- | `DATACONTRACT_S3_ACCESS_KEY_ID` | `AKIAXV5Q5QABCDEFGH` | AWS Access Key ID |
287
- | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `93S7LRrJcqLaaaa/XXXXXXXXXXXXX` | AWS Secret Access Key |
288
-
289
-
290
- ### Postgres
291
-
292
- Data Contract CLI can test data in Postgres or Postgres-compliant databases (e.g., RisingWave).
293
-
294
- #### Example
295
-
296
- datacontract.yaml
297
- ```yaml
298
- servers:
299
- postgres:
300
- type: postgres
301
- host: localhost
302
- port: 5432
303
- database: postgres
304
- schema: public
305
- models:
306
- my_table_1: # corresponds to a table
307
- type: table
308
- fields:
309
- my_column_1: # corresponds to a column
310
- type: varchar
311
- ```
312
-
313
- #### Environment Variables
314
-
315
- | Environment Variable | Example | Description |
316
- |----------------------------------|--------------------|-------------|
317
- | `DATACONTRACT_POSTGRES_USERNAME` | `postgres` | Username |
318
- | `DATACONTRACT_POSTGRES_PASSWORD` | `mysecretpassword` | Password |
319
-
320
-
321
- ### Snowflake
322
-
323
- Data Contract CLI can test data in Snowflake.
324
-
325
- #### Example
326
-
327
- datacontract.yaml
328
- ```yaml
329
-
330
- servers:
331
- snowflake:
332
- type: snowflake
333
- account: abcdefg-xn12345
334
- database: ORDER_DB
335
- schema: ORDERS_PII_V2
336
- models:
337
- my_table_1: # corresponds to a table
338
- type: table
339
- fields:
340
- my_column_1: # corresponds to a column
341
- type: varchar
342
- ```
343
-
344
- #### Environment Variables
345
-
346
- | Environment Variable | Example | Description |
347
- |------------------------------------|--------------------|-----------------------------------------------------|
348
- | `DATACONTRACT_SNOWFLAKE_USERNAME` | `datacontract` | Username |
349
- | `DATACONTRACT_SNOWFLAKE_PASSWORD` | `mysecretpassword` | Password |
350
- | `DATACONTRACT_SNOWFLAKE_ROLE` | `DATAVALIDATION` | The snowflake role to use. |
351
- | `DATACONTRACT_SNOWFLAKE_WAREHOUSE` | `COMPUTE_WH` | The Snowflake Warehouse to use executing the tests. |
352
-
353
-
354
- ### BigQuery
355
-
356
- We support authentication to BigQuery using Service Account Key. The used Service Account should include the roles:
357
- * BigQuery Job User
358
- * BigQuery Data Viewer
359
-
360
-
361
- #### Example
362
-
363
- datacontract.yaml
364
- ```yaml
365
- servers:
366
- production:
367
- type: bigquery
368
- project: datameshexample-product
369
- dataset: datacontract_cli_test_dataset
370
- models:
371
- datacontract_cli_test_table: # corresponds to a BigQuery table
372
- type: table
373
- fields: ...
374
- ```
375
-
376
- #### Environment Variables
377
-
378
- | Environment Variable | Example | Description |
379
- |----------------------------------------------|---------------------------|---------------------------------------------------------|
380
- | `DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH` | `~/service-access-key.json` | Service Access key as saved on key creation by BigQuery |
381
-
382
-
383
- ### Databricks
384
-
385
- Works with Unity Catalog and Hive metastore.
386
-
387
- Needs a running SQL warehouse or compute cluster.
388
-
389
- #### Example
390
-
391
- datacontract.yaml
392
- ```yaml
393
- servers:
394
- production:
395
- type: databricks
396
- host: dbc-abcdefgh-1234.cloud.databricks.com
397
- catalog: acme_catalog_prod
398
- schema: orders_latest
399
- models:
400
- orders: # corresponds to a table
401
- type: table
402
- fields: ...
403
- ```
404
-
405
- #### Environment Variables
406
-
407
- | Environment Variable | Example | Description |
408
- |----------------------------------------------|--------------------------------------|-------------------------------------------------------|
409
- | `DATACONTRACT_DATABRICKS_TOKEN` | `dapia00000000000000000000000000000` | The personal access token to authenticate |
410
- | `DATACONTRACT_DATABRICKS_HTTP_PATH` | `/sql/1.0/warehouses/b053a3ffffffff` | The HTTP path to the SQL warehouse or compute cluster |
411
-
412
-
413
- ### Databricks (programmatic)
414
-
415
- Works with Unity Catalog and Hive metastore.
416
- When running in a notebook or pipeline, the provided `spark` session can be used.
417
- An additional authentication is not required.
418
-
419
- Requires a Databricks Runtime with Python >= 3.10.
420
-
421
- #### Example
422
-
423
- datacontract.yaml
424
- ```yaml
425
- servers:
426
- production:
427
- type: databricks
428
- host: dbc-abcdefgh-1234.cloud.databricks.com # ignored, always use current host
429
- catalog: acme_catalog_prod
430
- schema: orders_latest
431
- models:
432
- orders: # corresponds to a table
433
- type: table
434
- fields: ...
435
- ```
436
-
437
- Notebook
438
- ```python
439
- %pip install datacontract-cli
440
- dbutils.library.restartPython()
441
-
442
- from datacontract.data_contract import DataContract
443
-
444
- data_contract = DataContract(
445
- data_contract_file="/Volumes/acme_catalog_prod/orders_latest/datacontract/datacontract.yaml",
446
- spark=spark)
447
- run = data_contract.test()
448
- run.result
449
- ```
450
-
451
- ### Kafka
452
-
453
- Kafka support is currently considered experimental.
454
-
455
- #### Example
456
-
457
- datacontract.yaml
458
- ```yaml
459
- servers:
460
- production:
461
- type: kafka
462
- host: abc-12345.eu-central-1.aws.confluent.cloud:9092
463
- topic: my-topic-name
464
- format: json
465
- ```
466
-
467
- #### Environment Variables
468
-
469
- | Environment Variable | Example | Description |
470
- |------------------------------------|---------|-----------------------------|
471
- | `DATACONTRACT_KAFKA_SASL_USERNAME` | `xxx` | The SASL username (key). |
472
- | `DATACONTRACT_KAFKA_SASL_PASSWORD` | `xxx` | The SASL password (secret). |
473
-
474
-
475
-
476
- ### Exports
477
-
478
- ```bash
479
- # Example export to dbt model
480
- datacontract export --format dbt
481
- ```
482
-
483
- Available export options:
484
-
485
- | Type | Description | Status |
486
- |--------------------|---------------------------------------------------------|--------|
487
- | `jsonschema` | Export to JSON Schema | ✅ |
488
- | `odcs` | Export to Open Data Contract Standard (ODCS) | ✅ |
489
- | `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
490
- | `dbt` | Export to dbt models in YAML format | ✅ |
491
- | `dbt-sources` | Export to dbt sources in YAML format | ✅ |
492
- | `dbt-staging-sql` | Export to dbt staging SQL models | ✅ |
493
- | `rdf` | Export data contract to RDF representation in N3 format | ✅ |
494
- | `avro` | Export to AVRO models | ✅ |
495
- | `protobuf` | Export to Protobuf | ✅ |
496
- | `terraform` | Export to terraform resources | ✅ |
497
- | `sql` | Export to SQL DDL | ✅ |
498
- | `sql-query` | Export to SQL Query | ✅ |
499
- | `pydantic` | Export to pydantic models | TBD |
500
- | Missing something? | Please create an issue on GitHub | TBD |
501
-
502
- #### RDF
503
-
504
- The export function converts a given data contract into a RDF representation. You have the option to
505
- add a base_url which will be used as the default prefix to resolve relative IRIs inside the document.
506
-
507
- ```shell
508
- datacontract export --format rdf --rdf-base https://www.example.com/ datacontract.yaml
509
- ```
510
-
511
- The data contract is mapped onto the following concepts of a yet to be defined Data Contract
512
- Ontology named https://datacontract.com/DataContractSpecification/ :
513
- - DataContract
514
- - Server
515
- - Model
516
-
517
- Having the data contract inside an RDF Graph gives us access the following use cases:
518
- - Interoperability with other data contract specification formats
519
- - Store data contracts inside a knowledge graph
520
- - Enhance a semantic search to find and retrieve data contracts
521
- - Linking model elements to already established ontologies and knowledge
522
- - Using full power of OWL to reason about the graph structure of data contracts
523
- - Apply graph algorithms on multiple data contracts (Find similar data contracts, find "gatekeeper"
524
- data products, find the true domain owner of a field attribute)
525
-
526
- ### Imports
527
-
528
- ```bash
529
- # Example import from SQL DDL
530
- datacontract import --format sql --source my_ddl.sql
531
- ```
532
-
533
- Available import options:
534
-
535
- | Type | Description | Status |
536
- |--------------------|------------------------------------------------|---------|
537
- | `sql` | Import from SQL DDL | ✅ |
538
- | `protobuf` | Import from Protobuf schemas | TBD |
539
- | `avro` | Import from AVRO schemas | ✅ |
540
- | `jsonschema` | Import from JSON Schemas | TBD |
541
- | `dbt` | Import from dbt models | TBD |
542
- | `odcs` | Import from Open Data Contract Standard (ODCS) | TBD |
543
- | Missing something? | Please create an issue on GitHub | TBD |
544
-
545
- ## Development Setup
546
-
547
- Python base interpreter should be 3.11.x (unless working on 3.12 release candidate).
548
-
549
- ```bash
550
- # create venv
551
- python3 -m venv venv
552
- source venv/bin/activate
553
-
554
- # Install Requirements
555
- pip install --upgrade pip setuptools wheel
556
- pip install -e '.[dev]'
557
- cd tests/
558
- pytest
559
- ```
560
-
561
- Release
562
-
563
- ```bash
564
- git tag v0.9.0
565
- git push origin v0.9.0
566
- python3 -m pip install --upgrade build twine
567
- rm -r dist/
568
- python3 -m build
569
- # for now only test.pypi.org
570
- python3 -m twine upload --repository testpypi dist/*
571
- ```
572
-
573
- Docker Build
574
-
575
- ```bash
576
- docker build -t datacontract/cli .
577
- docker run --rm -v ${PWD}:/home/datacontract datacontract/cli
578
- ```
579
-
580
- ## Release Steps
581
-
582
- 1. Update the version in `pyproject.toml`
583
- 2. Have a look at the `CHANGELOG.md`
584
- 3. Create release commit manually
585
- 4. Execute `./release`
586
- 5. Wait until GitHub Release is created
587
- 6. Add the release notes to the GitHub Release
588
-
589
- ## Contribution
590
-
591
- We are happy to receive your contributions. Propose your change in an issue or directly create a pull request with your improvements.
592
-
593
- ## License
594
-
595
- [MIT License](LICENSE)
596
-
597
- ## Credits
598
-
599
- Created by [Stefan Negele](https://www.linkedin.com/in/stefan-negele-573153112/) and [Jochen Christ](https://www.linkedin.com/in/jochenchrist/).
600
-
601
-
602
-
603
- <a href="https://github.com/datacontract/cli" class="github-corner" aria-label="View source on GitHub"><svg width="80" height="80" viewBox="0 0 250 250" style="fill:#151513; color:#fff; position: absolute; top: 0; border: 0; right: 0;" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>
@@ -1,61 +0,0 @@
1
- datacontract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- datacontract/cli.py,sha256=sCXanc95IOLlpNUUEJsy2PWynjak7xHKOoZKxauw2lU,10332
3
- datacontract/data_contract.py,sha256=6uzBt5v9sRQL-t6WF4N-xCNhUU1qYODfxYedgP6HAwQ,19880
4
- datacontract/web.py,sha256=dBSC56EzELzBGfykC3myAsmEYyqL7AM1RGsUZYWyTpA,498
5
- datacontract/breaking/breaking.py,sha256=VJO5bzu_5dCUCtweV8sSCUDwnMHg9bsU9TVhV5hh3QY,12111
6
- datacontract/breaking/breaking_rules.py,sha256=PzjCojAwteiIP3AKHfzeHAZG5b8WNKTeFl8FbFAWOzo,2913
7
- datacontract/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py,sha256=Tj_REcEYl2BtIR_W9k0pjdjE4CvBE-4vpFrGAvvrde4,1557
9
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py,sha256=V_YJyt1rKkkKhghU359vaAGtC8leIGmwqR4MlrLgCJ4,620
10
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py,sha256=bRoJp8a-Hvdc2OkbcTcS0tr8M7XxNzWbJAUFrc-ceiA,1393
11
- datacontract/engines/fastjsonschema/check_jsonschema.py,sha256=rI_nLJg2REGlooPWGLvmUVLT5MQTscGivY1bSrl4Tqg,5599
12
- datacontract/engines/fastjsonschema/s3/s3_read_files.py,sha256=sCe028D8q04c2pYlzJuEXWmMZOQJLiaObyLXLe4UzUs,713
13
- datacontract/engines/soda/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- datacontract/engines/soda/check_soda_execute.py,sha256=z-bvCB7wzJMipkdTSUfM84stxR531IUyWD5C_QwYjS0,6256
15
- datacontract/engines/soda/connections/bigquery.py,sha256=_hNd7Lmo6DjLb3nqVx_pfePwSYp3_3T_hwivVlATEyI,658
16
- datacontract/engines/soda/connections/dask.py,sha256=iQfu4swHN_QfY9l0TdSbqAQXJvfKMIxGoZ4xiNpi4eY,1497
17
- datacontract/engines/soda/connections/databricks.py,sha256=tCVE2Q2BXjuxS5ZmDyH_qN6jigStBsfIikRYMQ5LKVs,561
18
- datacontract/engines/soda/connections/duckdb.py,sha256=_Tpfo5D1ahOUPHbnEZ1WloeCecQ2LYDUebIU3hnnBDg,2342
19
- datacontract/engines/soda/connections/kafka.py,sha256=ybqTe9Z40gU-nuZJDXI9erQJ1W2WqX7dt_4E8pDSe5k,5715
20
- datacontract/engines/soda/connections/postgres.py,sha256=ow21gzxiV2_FyOXrFYeSRefLKwRQR5_qxtOR2T1rdTI,625
21
- datacontract/engines/soda/connections/snowflake.py,sha256=H941nOQULZKznmarVvZcvJhseMOUwfnMsv1r_P0MMb0,719
22
- datacontract/export/avro_converter.py,sha256=IPhM-MhgOUVXQfycH4HHYrsKDwwWjquVAZFoUScfSzU,2237
23
- datacontract/export/avro_idl_converter.py,sha256=wK84nKurXKXD39ba5VqdSHNUcv_G7NSxlbezZytjh00,9833
24
- datacontract/export/dbt_converter.py,sha256=loVODuEdSYtNFNjpPMWoWrbE097-0tqHPzQ_-WzylFE,8296
25
- datacontract/export/jsonschema_converter.py,sha256=S2yl5VmNNnVo7J4QrouANGup8MSuihrTZ5JpJs3YOBs,3150
26
- datacontract/export/odcs_converter.py,sha256=Aqrx0tdPoZjsoV0gLTYsnfzu5fSrgOEUckG3mjXrNeM,3723
27
- datacontract/export/protobuf_converter.py,sha256=ropJgwVvq8cHUFmK7_f1FT4dwD05NBdqyaAAAifUEq4,2889
28
- datacontract/export/rdf_converter.py,sha256=Zbm_Jo8JyeanutcvDrpGw4ruWrXiyJuBw4ywDYxFZQY,6032
29
- datacontract/export/sodacl_converter.py,sha256=UxjZghF-C8jMOUvTmXJzJoeVjemcdOuBYgdMKz4cZ6A,2858
30
- datacontract/export/sql_converter.py,sha256=ewn7tM5EpgEqXeIqS7wO6BCSsdBgQcTY7_DhzPCbN40,2750
31
- datacontract/export/sql_type_converter.py,sha256=BSIRfk6DvJY0xemcdbVkIJyFCmHIfLlvWOAlimkIePU,3327
32
- datacontract/export/terraform_converter.py,sha256=HYfX1269GYMT9H64PtXryO2D991eQe3ncA-qed5NwCk,1949
33
- datacontract/imports/avro_importer.py,sha256=KQwifIy5IdjfK-cTUQdKmp-dmBoGuLa27SA56Xu0OG4,2879
34
- datacontract/imports/sql_importer.py,sha256=tGjy0D5oLA96C6LDxuNqR2CBombJJ-Sz1PIeCl637mE,2029
35
- datacontract/init/download_datacontract_file.py,sha256=H_234IfZ3xezjgcZ4sb7wSCEZCDUjM1uYsUibHHj4Ow,412
36
- datacontract/integration/publish_datamesh_manager.py,sha256=_qn4lyUkyrO0IKFzfzWCNBqEH5Ur20M_cpieIPtgRwc,1358
37
- datacontract/integration/publish_opentelemetry.py,sha256=04qxsyNZTCzDtpjnyPkrrEi0gwAQC_9KUUKCktiJXow,3314
38
- datacontract/lint/files.py,sha256=DIUetslLuBvvddgza4vEvvUBMSVeJ4I1LHFID0mmMfU,470
39
- datacontract/lint/lint.py,sha256=_Bcfq9O0K-khEkyV-C_UmHaazkEbTQONvCGrAe4npm8,5202
40
- datacontract/lint/resolve.py,sha256=JP9ssoSwc3oyt8Vxbu1shu1sJMxZAShUvz9_GlSp2OA,4920
41
- datacontract/lint/schema.py,sha256=wlv-gf19HN1eAug0uq6getQ3FPHYDHbpukfoEPycAhg,838
42
- datacontract/lint/urls.py,sha256=-Vr1LweSpTh6f9w6F5mQ3XOOgL3FQ72H2pQvTqJSqoY,1325
43
- datacontract/lint/linters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- datacontract/lint/linters/description_linter.py,sha256=aMV5n2ZyRddyWV5p6pVbgXVLHg5juKse2zAvDLWegAo,1697
45
- datacontract/lint/linters/example_model_linter.py,sha256=BRxui6RdcXTY68iLmdjuUt1QKJkI27IK4cqJt_gFiCI,4081
46
- datacontract/lint/linters/field_pattern_linter.py,sha256=bZhchpN4afbmXvQxjd19X5q-kBpj0xnyXJiEh2fhCuw,1109
47
- datacontract/lint/linters/field_reference_linter.py,sha256=CacXAsstWA0zQs2qjuHYvLfDqn7isc1IQ1v_nN_2AuI,1522
48
- datacontract/lint/linters/notice_period_linter.py,sha256=gugRlb8OrIHFcRn8KoapedFxNXeW0NMMIpv9WjjyRy0,2214
49
- datacontract/lint/linters/primary_field_linter.py,sha256=Xqa-Xw90j2Ecm8f9HGVKAATbRtmOM7XIoFjlYNVV3_M,1183
50
- datacontract/lint/linters/quality_schema_linter.py,sha256=ndvaBk-ErAcRaKL09H_MhoXjvipbC1Gu7ug5KxoA8ME,2010
51
- datacontract/lint/linters/valid_constraints_linter.py,sha256=09n3CRgPyYdBAkkX5UDaEcKvKn4aMkbKoXLD3owafJg,4912
52
- datacontract/model/breaking_change.py,sha256=CC1UD0m6lPKM5Eo055KUsgNGbeZe1KDXME--cDaBI2A,2248
53
- datacontract/model/data_contract_specification.py,sha256=d_Va3qadTaDUeWBkvhou0LdhjG3nCShC0k2KpcgIyXc,3455
54
- datacontract/model/exceptions.py,sha256=zhhXnKWTzEyG54N9QDVpE5F986cKuHEXN0OcR5Zy8oc,1090
55
- datacontract/model/run.py,sha256=nmVT-gapqVhpNMJXPQM09abBc7GzuszvWK_de5L3fj0,2562
56
- datacontract_cli-0.9.7.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
57
- datacontract_cli-0.9.7.dist-info/METADATA,sha256=itGMGn_qAD9wy5vkTIoyrA9onWG_L78zi_7J6LDYumI,28550
58
- datacontract_cli-0.9.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
59
- datacontract_cli-0.9.7.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
60
- datacontract_cli-0.9.7.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
61
- datacontract_cli-0.9.7.dist-info/RECORD,,