datacontract-cli 0.10.25__py3-none-any.whl → 0.10.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/engines/soda/connections/kafka.py +2 -1
- datacontract/export/odcs_v3_exporter.py +22 -4
- datacontract/export/sql_converter.py +4 -0
- datacontract/export/sql_type_converter.py +2 -0
- datacontract/imports/spark_importer.py +2 -0
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.26.dist-info}/METADATA +27 -14
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.26.dist-info}/RECORD +11 -11
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.26.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.26.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.26.dist-info}/licenses/LICENSE +0 -0
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.26.dist-info}/top_level.txt +0 -0
|
@@ -27,6 +27,7 @@ def create_spark_session():
|
|
|
27
27
|
tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract-cli-spark")
|
|
28
28
|
atexit.register(tmp_dir.cleanup)
|
|
29
29
|
|
|
30
|
+
pyspark_version = "3.5.5" # MUST be the same as in the pyproject.toml
|
|
30
31
|
spark = (
|
|
31
32
|
SparkSession.builder.appName("datacontract")
|
|
32
33
|
.config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
|
|
@@ -34,7 +35,7 @@ def create_spark_session():
|
|
|
34
35
|
.config("spark.ui.enabled", "false")
|
|
35
36
|
.config(
|
|
36
37
|
"spark.jars.packages",
|
|
37
|
-
"org.apache.spark:spark-sql-kafka-0-10_2.12:
|
|
38
|
+
f"org.apache.spark:spark-sql-kafka-0-10_2.12:{pyspark_version},org.apache.spark:spark-avro_2.12:{pyspark_version}",
|
|
38
39
|
)
|
|
39
40
|
.getOrCreate()
|
|
40
41
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict
|
|
1
|
+
from typing import Any, Dict
|
|
2
2
|
|
|
3
3
|
from open_data_contract_standard.model import (
|
|
4
4
|
CustomProperty,
|
|
@@ -202,13 +202,31 @@ def to_logical_type(type: str) -> str | None:
|
|
|
202
202
|
return "array"
|
|
203
203
|
if type.lower() in ["array"]:
|
|
204
204
|
return "array"
|
|
205
|
+
if type.lower() in ["variant"]:
|
|
206
|
+
return "variant"
|
|
205
207
|
if type.lower() in ["null"]:
|
|
206
208
|
return None
|
|
207
209
|
return None
|
|
208
210
|
|
|
209
211
|
|
|
210
|
-
def to_physical_type(
|
|
211
|
-
|
|
212
|
+
def to_physical_type(config: Dict[str, Any]) -> str | None:
|
|
213
|
+
if config is None:
|
|
214
|
+
return None
|
|
215
|
+
if "postgresType" in config:
|
|
216
|
+
return config["postgresType"]
|
|
217
|
+
elif "bigqueryType" in config:
|
|
218
|
+
return config["bigqueryType"]
|
|
219
|
+
elif "snowflakeType" in config:
|
|
220
|
+
return config["snowflakeType"]
|
|
221
|
+
elif "redshiftType" in config:
|
|
222
|
+
return config["redshiftType"]
|
|
223
|
+
elif "sqlserverType" in config:
|
|
224
|
+
return config["sqlserverType"]
|
|
225
|
+
elif "databricksType" in config:
|
|
226
|
+
return config["databricksType"]
|
|
227
|
+
elif "physicalType" in config:
|
|
228
|
+
return config["physicalType"]
|
|
229
|
+
return None
|
|
212
230
|
|
|
213
231
|
|
|
214
232
|
def to_property(field_name: str, field: Field) -> SchemaProperty:
|
|
@@ -231,7 +249,7 @@ def to_property(field_name: str, field: Field) -> SchemaProperty:
|
|
|
231
249
|
|
|
232
250
|
if field.type is not None:
|
|
233
251
|
property.logicalType = to_logical_type(field.type)
|
|
234
|
-
property.physicalType = to_physical_type(field.
|
|
252
|
+
property.physicalType = to_physical_type(field.config)
|
|
235
253
|
|
|
236
254
|
if field.description is not None:
|
|
237
255
|
property.description = field.description
|
|
@@ -117,6 +117,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
117
117
|
result += " primary key"
|
|
118
118
|
if server_type == "databricks" and field.description is not None:
|
|
119
119
|
result += f' COMMENT "{_escape(field.description)}"'
|
|
120
|
+
if server_type == "snowflake" and field.description is not None:
|
|
121
|
+
result += f" COMMENT '{_escape(field.description)}'"
|
|
120
122
|
if current_field_index < fields:
|
|
121
123
|
result += ","
|
|
122
124
|
result += "\n"
|
|
@@ -124,6 +126,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
124
126
|
result += ")"
|
|
125
127
|
if server_type == "databricks" and model.description is not None:
|
|
126
128
|
result += f' COMMENT "{_escape(model.description)}"'
|
|
129
|
+
if server_type == "snowflake" and model.description is not None:
|
|
130
|
+
result += f" COMMENT='{_escape(model.description)}'"
|
|
127
131
|
result += ";\n"
|
|
128
132
|
return result
|
|
129
133
|
|
|
@@ -197,6 +197,8 @@ def convert_to_databricks(field: Field) -> None | str:
|
|
|
197
197
|
if type.lower() in ["array"]:
|
|
198
198
|
item_type = convert_to_databricks(field.items)
|
|
199
199
|
return f"ARRAY<{item_type}>"
|
|
200
|
+
if type.lower() in ["variant"]:
|
|
201
|
+
return "VARIANT"
|
|
200
202
|
return None
|
|
201
203
|
|
|
202
204
|
|
|
@@ -154,5 +154,7 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
|
|
|
154
154
|
return "null"
|
|
155
155
|
elif isinstance(spark_type, types.VarcharType):
|
|
156
156
|
return "varchar"
|
|
157
|
+
elif isinstance(spark_type, types.VariantType):
|
|
158
|
+
return "variant"
|
|
157
159
|
else:
|
|
158
160
|
raise ValueError(f"Unsupported Spark type: {spark_type}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.26
|
|
4
4
|
Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -70,7 +70,7 @@ Provides-Extra: rdf
|
|
|
70
70
|
Requires-Dist: rdflib==7.0.0; extra == "rdf"
|
|
71
71
|
Provides-Extra: api
|
|
72
72
|
Requires-Dist: fastapi==0.115.12; extra == "api"
|
|
73
|
-
Requires-Dist: uvicorn==0.34.
|
|
73
|
+
Requires-Dist: uvicorn==0.34.2; extra == "api"
|
|
74
74
|
Provides-Extra: protobuf
|
|
75
75
|
Requires-Dist: grpcio-tools>=1.53; extra == "protobuf"
|
|
76
76
|
Provides-Extra: all
|
|
@@ -84,7 +84,7 @@ Requires-Dist: pandas>=2.1.0; extra == "dev"
|
|
|
84
84
|
Requires-Dist: pre-commit<4.3.0,>=3.7.1; extra == "dev"
|
|
85
85
|
Requires-Dist: pytest; extra == "dev"
|
|
86
86
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
87
|
-
Requires-Dist: pymssql==2.3.
|
|
87
|
+
Requires-Dist: pymssql==2.3.4; extra == "dev"
|
|
88
88
|
Requires-Dist: ruff; extra == "dev"
|
|
89
89
|
Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.9.2; extra == "dev"
|
|
90
90
|
Requires-Dist: trino==0.333.0; extra == "dev"
|
|
@@ -1397,20 +1397,21 @@ Available import options:
|
|
|
1397
1397
|
|
|
1398
1398
|
| Type | Description | Status |
|
|
1399
1399
|
|--------------------|------------------------------------------------|--------|
|
|
1400
|
-
| `sql` | Import from SQL DDL | ✅ |
|
|
1401
1400
|
| `avro` | Import from AVRO schemas | ✅ |
|
|
1402
|
-
| `glue` | Import from AWS Glue DataCatalog | ✅ |
|
|
1403
|
-
| `jsonschema` | Import from JSON Schemas | ✅ |
|
|
1404
1401
|
| `bigquery` | Import from BigQuery Schemas | ✅ |
|
|
1405
|
-
| `
|
|
1402
|
+
| `csv` | Import from CSV File | ✅ |
|
|
1403
|
+
| `dbml` | Import from DBML models | ✅ |
|
|
1406
1404
|
| `dbt` | Import from dbt models | ✅ |
|
|
1405
|
+
| `excel` | Import from ODCS Excel Template | ✅ |
|
|
1406
|
+
| `glue` | Import from AWS Glue DataCatalog | ✅ |
|
|
1407
|
+
| `iceberg` | Import from an Iceberg JSON Schema Definition | partial |
|
|
1408
|
+
| `jsonschema` | Import from JSON Schemas | ✅ |
|
|
1407
1409
|
| `odcs` | Import from Open Data Contract Standard (ODCS) | ✅ |
|
|
1408
|
-
| `
|
|
1409
|
-
| `dbml` | Import from DBML models | ✅ |
|
|
1410
|
-
| `csv` | Import from CSV File | ✅ |
|
|
1410
|
+
| `parquet` | Import from Parquet File Metadata | ✅ |
|
|
1411
1411
|
| `protobuf` | Import from Protobuf schemas | ✅ |
|
|
1412
|
-
| `
|
|
1413
|
-
| `
|
|
1412
|
+
| `spark` | Import from Spark StructTypes | ✅ |
|
|
1413
|
+
| `sql` | Import from SQL DDL | ✅ |
|
|
1414
|
+
| `unity` | Import from Databricks Unity Catalog | partial |
|
|
1414
1415
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
1415
1416
|
|
|
1416
1417
|
|
|
@@ -1460,8 +1461,9 @@ datacontract import --format unity --source my_unity_table.json
|
|
|
1460
1461
|
|
|
1461
1462
|
```bash
|
|
1462
1463
|
# Example import single table from Unity Catalog via HTTP endpoint
|
|
1463
|
-
export
|
|
1464
|
-
export
|
|
1464
|
+
export DATACONTRACT_DATABRICKS_SERVER_HOSTNAME="https://xyz.cloud.databricks.com"
|
|
1465
|
+
export DATACONTRACT_DATABRICKS_HTTP_PATH="/sql/1.0/warehouses/b053a331fa014fb4"
|
|
1466
|
+
export DATACONTRACT_DATABRICKS_TOKEN=<token>
|
|
1465
1467
|
datacontract import --format unity --unity-table-full-name <table_full_name>
|
|
1466
1468
|
```
|
|
1467
1469
|
|
|
@@ -1482,6 +1484,17 @@ datacontract import --format dbt --source <manifest_path> --dbt-model <model_nam
|
|
|
1482
1484
|
datacontract import --format dbt --source <manifest_path>
|
|
1483
1485
|
```
|
|
1484
1486
|
|
|
1487
|
+
### Excel
|
|
1488
|
+
|
|
1489
|
+
Importing from [ODCS Excel Template](https://github.com/datacontract/open-data-contract-standard-excel-template).
|
|
1490
|
+
|
|
1491
|
+
Examples:
|
|
1492
|
+
|
|
1493
|
+
```bash
|
|
1494
|
+
# Example import from ODCS Excel Template
|
|
1495
|
+
datacontract import --format excel --source odcs.xlsx
|
|
1496
|
+
```
|
|
1497
|
+
|
|
1485
1498
|
#### Glue
|
|
1486
1499
|
|
|
1487
1500
|
Importing from Glue reads the necessary Data directly off of the AWS API.
|
|
@@ -19,7 +19,7 @@ datacontract/engines/soda/check_soda_execute.py,sha256=SYJdPpkozOA62yTM7s6cfwLfg
|
|
|
19
19
|
datacontract/engines/soda/connections/bigquery.py,sha256=C-8kxmzpYe88bJp80ObHFLMh4rpnIjnUQ7XOj0Ke7lk,903
|
|
20
20
|
datacontract/engines/soda/connections/databricks.py,sha256=cMRasuO0MrSKVgHPB-9uFTGTZPFg6z9Kpk3tJ0SdR0s,943
|
|
21
21
|
datacontract/engines/soda/connections/duckdb_connection.py,sha256=hy2HxktSSt1tFB7rVI7tDOe6WeTNvHSKqsEk9uIkntU,9114
|
|
22
|
-
datacontract/engines/soda/connections/kafka.py,sha256=
|
|
22
|
+
datacontract/engines/soda/connections/kafka.py,sha256=lnj_-3-CnJ6stetGqm6HOzN1Qatlw7xoCQU2zKBIXxU,8725
|
|
23
23
|
datacontract/engines/soda/connections/postgres.py,sha256=9GTF4Es3M5vb7ocSGqAxXmslvkS5CjsPQGIuo020CFc,626
|
|
24
24
|
datacontract/engines/soda/connections/snowflake.py,sha256=rfG2ysuqNM6TkvyqQKcGHFsTGJ6AROmud5VleUDRrb0,749
|
|
25
25
|
datacontract/engines/soda/connections/sqlserver.py,sha256=RzGLbCUdRyfmDcqtM_AB9WZ-Xk-XYX91nkXpVNpYbvc,1440
|
|
@@ -42,15 +42,15 @@ datacontract/export/html_export.py,sha256=ojazWrb0AwSc7Vr72M_otMo-3PA8mfi8tfIy9B
|
|
|
42
42
|
datacontract/export/iceberg_converter.py,sha256=ArcQ_Y3z_W4_kGDU_8jPRx2-pHpP3Nhx1zYoETOL3c4,6804
|
|
43
43
|
datacontract/export/jsonschema_converter.py,sha256=2MT82MurcQQbrVDRj1kFsxnmFd9scNSfYI1upQSecl4,5631
|
|
44
44
|
datacontract/export/markdown_converter.py,sha256=chtaZX4vXTee7JCMYmWiDQ9m55gwJjHPw6SEM3UOwpQ,6467
|
|
45
|
-
datacontract/export/odcs_v3_exporter.py,sha256=
|
|
45
|
+
datacontract/export/odcs_v3_exporter.py,sha256=ta5NMqZ-9k-w9okdkYrPbMtd4b0ldHGL0_w70EdA5ho,13769
|
|
46
46
|
datacontract/export/pandas_type_converter.py,sha256=464pQ3JQKFQa1TO0HBNcEoZvQye_yUbY6jQtiBaphSc,1117
|
|
47
47
|
datacontract/export/protobuf_converter.py,sha256=DHLl8BW26xqltBsd7Qhz0RhTl9YZQKCbkmjNpECgubg,7928
|
|
48
48
|
datacontract/export/pydantic_converter.py,sha256=1Lt9F8i6zyQYb44MyQtsXwCWWXYxZ47SmzArr_uPqsU,5579
|
|
49
49
|
datacontract/export/rdf_converter.py,sha256=4gnKus37Geth4MJ3Ruc8AbnpD_Ll9OCx8oTIEKScvh8,6435
|
|
50
50
|
datacontract/export/sodacl_converter.py,sha256=lQCOcNiT7i6KGaJ1Ua4MYBYGm-EyktTGrL4FLZDi14c,1102
|
|
51
51
|
datacontract/export/spark_converter.py,sha256=LCue-rLan3ki7HgzUFyBaO8YUlc6CrDNBZD-QVgUv-U,7190
|
|
52
|
-
datacontract/export/sql_converter.py,sha256=
|
|
53
|
-
datacontract/export/sql_type_converter.py,sha256
|
|
52
|
+
datacontract/export/sql_converter.py,sha256=KA5PNmKxUWF_8QXRX8aGvMGf7pX0rZgkLe6mbK8Q7Qk,5089
|
|
53
|
+
datacontract/export/sql_type_converter.py,sha256=-ZHDihXWd5Gr9XG5FyE5-NLfB5q-HTdjx6P6TdsckDA,13497
|
|
54
54
|
datacontract/export/sqlalchemy_converter.py,sha256=0DMncvA811lTtd5q4ZORREQ9YH1vQm1lJeqMWsFvloE,6463
|
|
55
55
|
datacontract/export/terraform_converter.py,sha256=ExFoEvErVk-gBnWJiqC38SxDUmUEydpACWc917l5RyM,2163
|
|
56
56
|
datacontract/imports/avro_importer.py,sha256=ryu4iUCSPJEV1uaE3AKdxD7fUxmRJ-ta936xurbgtHc,10922
|
|
@@ -68,7 +68,7 @@ datacontract/imports/odcs_importer.py,sha256=vv2dHLGL0Cdivv1CdKn5euJwGNKmiZmXCox
|
|
|
68
68
|
datacontract/imports/odcs_v3_importer.py,sha256=sZVBENcPMl6rt0bbT_b1lnTFs3KOe1cZ2hwWaJBQhgY,16924
|
|
69
69
|
datacontract/imports/parquet_importer.py,sha256=W_0_16mX4stwDUt4GM2L7dnGmTpAySab5k13-OlTCCc,3095
|
|
70
70
|
datacontract/imports/protobuf_importer.py,sha256=rlUIskv9PNi5rFQ4Hobt9zlnKpahGsb4dy5G5UJoVAw,10840
|
|
71
|
-
datacontract/imports/spark_importer.py,sha256=
|
|
71
|
+
datacontract/imports/spark_importer.py,sha256=8sPI6tcH0aMlaUYdc2P_abrJfOBp7vWFgvYE5jRNwLU,5177
|
|
72
72
|
datacontract/imports/sql_importer.py,sha256=ElFS2LILDOvWzW-X4emSIKltFV42i78TEoyg0bvn3II,9322
|
|
73
73
|
datacontract/imports/unity_importer.py,sha256=UcPYABhLZaWNl5IkCazwAuMoVDdujsu_QteuV_Q9hgI,6737
|
|
74
74
|
datacontract/init/init_template.py,sha256=BMawR-AF_vXyn_-Co-XoT8Dxj9b55V8xKk2KkJK-c1o,721
|
|
@@ -107,9 +107,9 @@ datacontract/templates/partials/model_field.html,sha256=2YBF95ypNCPFYuYKoeilRnDG
|
|
|
107
107
|
datacontract/templates/partials/quality.html,sha256=ynEDWRn8I90Uje-xhGYgFcfwOgKI1R-CDki-EvTsauQ,1785
|
|
108
108
|
datacontract/templates/partials/server.html,sha256=WkWFbz1ZvhIAUQQhH5Lkwb0HZRW907ehEnFmJSkpquQ,6235
|
|
109
109
|
datacontract/templates/style/output.css,sha256=V1k6smSvlz07W2UNOkhcDFUb0HLmoas7DnNg_o8XUcA,25759
|
|
110
|
-
datacontract_cli-0.10.
|
|
111
|
-
datacontract_cli-0.10.
|
|
112
|
-
datacontract_cli-0.10.
|
|
113
|
-
datacontract_cli-0.10.
|
|
114
|
-
datacontract_cli-0.10.
|
|
115
|
-
datacontract_cli-0.10.
|
|
110
|
+
datacontract_cli-0.10.26.dist-info/licenses/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
|
|
111
|
+
datacontract_cli-0.10.26.dist-info/METADATA,sha256=wK_1AwnrBPOBTCRKORH63413TPMUkiVJGBSEs6GmQtQ,104876
|
|
112
|
+
datacontract_cli-0.10.26.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
113
|
+
datacontract_cli-0.10.26.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
|
|
114
|
+
datacontract_cli-0.10.26.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
|
|
115
|
+
datacontract_cli-0.10.26.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|