datacontract-cli 0.10.25__py3-none-any.whl → 0.10.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

@@ -27,6 +27,7 @@ def create_spark_session():
27
27
  tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract-cli-spark")
28
28
  atexit.register(tmp_dir.cleanup)
29
29
 
30
+ pyspark_version = "3.5.5" # MUST be the same as in the pyproject.toml
30
31
  spark = (
31
32
  SparkSession.builder.appName("datacontract")
32
33
  .config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
@@ -34,7 +35,7 @@ def create_spark_session():
34
35
  .config("spark.ui.enabled", "false")
35
36
  .config(
36
37
  "spark.jars.packages",
37
- "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.5,org.apache.spark:spark-avro_2.12:3.5.5",
38
+ f"org.apache.spark:spark-sql-kafka-0-10_2.12:{pyspark_version},org.apache.spark:spark-avro_2.12:{pyspark_version}",
38
39
  )
39
40
  .getOrCreate()
40
41
  )
@@ -1,4 +1,4 @@
1
- from typing import Dict
1
+ from typing import Any, Dict
2
2
 
3
3
  from open_data_contract_standard.model import (
4
4
  CustomProperty,
@@ -202,13 +202,31 @@ def to_logical_type(type: str) -> str | None:
202
202
  return "array"
203
203
  if type.lower() in ["array"]:
204
204
  return "array"
205
+ if type.lower() in ["variant"]:
206
+ return "variant"
205
207
  if type.lower() in ["null"]:
206
208
  return None
207
209
  return None
208
210
 
209
211
 
210
- def to_physical_type(type: str) -> str | None:
211
- return type
212
+ def to_physical_type(config: Dict[str, Any]) -> str | None:
213
+ if config is None:
214
+ return None
215
+ if "postgresType" in config:
216
+ return config["postgresType"]
217
+ elif "bigqueryType" in config:
218
+ return config["bigqueryType"]
219
+ elif "snowflakeType" in config:
220
+ return config["snowflakeType"]
221
+ elif "redshiftType" in config:
222
+ return config["redshiftType"]
223
+ elif "sqlserverType" in config:
224
+ return config["sqlserverType"]
225
+ elif "databricksType" in config:
226
+ return config["databricksType"]
227
+ elif "physicalType" in config:
228
+ return config["physicalType"]
229
+ return None
212
230
 
213
231
 
214
232
  def to_property(field_name: str, field: Field) -> SchemaProperty:
@@ -231,7 +249,7 @@ def to_property(field_name: str, field: Field) -> SchemaProperty:
231
249
 
232
250
  if field.type is not None:
233
251
  property.logicalType = to_logical_type(field.type)
234
- property.physicalType = to_physical_type(field.type)
252
+ property.physicalType = to_physical_type(field.config)
235
253
 
236
254
  if field.description is not None:
237
255
  property.description = field.description
@@ -117,6 +117,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
117
117
  result += " primary key"
118
118
  if server_type == "databricks" and field.description is not None:
119
119
  result += f' COMMENT "{_escape(field.description)}"'
120
+ if server_type == "snowflake" and field.description is not None:
121
+ result += f" COMMENT '{_escape(field.description)}'"
120
122
  if current_field_index < fields:
121
123
  result += ","
122
124
  result += "\n"
@@ -124,6 +126,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
124
126
  result += ")"
125
127
  if server_type == "databricks" and model.description is not None:
126
128
  result += f' COMMENT "{_escape(model.description)}"'
129
+ if server_type == "snowflake" and model.description is not None:
130
+ result += f" COMMENT='{_escape(model.description)}'"
127
131
  result += ";\n"
128
132
  return result
129
133
 
@@ -197,6 +197,8 @@ def convert_to_databricks(field: Field) -> None | str:
197
197
  if type.lower() in ["array"]:
198
198
  item_type = convert_to_databricks(field.items)
199
199
  return f"ARRAY<{item_type}>"
200
+ if type.lower() in ["variant"]:
201
+ return "VARIANT"
200
202
  return None
201
203
 
202
204
 
@@ -154,5 +154,7 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
154
154
  return "null"
155
155
  elif isinstance(spark_type, types.VarcharType):
156
156
  return "varchar"
157
+ elif isinstance(spark_type, types.VariantType):
158
+ return "variant"
157
159
  else:
158
160
  raise ValueError(f"Unsupported Spark type: {spark_type}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datacontract-cli
3
- Version: 0.10.25
3
+ Version: 0.10.26
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  License-Expression: MIT
@@ -70,7 +70,7 @@ Provides-Extra: rdf
70
70
  Requires-Dist: rdflib==7.0.0; extra == "rdf"
71
71
  Provides-Extra: api
72
72
  Requires-Dist: fastapi==0.115.12; extra == "api"
73
- Requires-Dist: uvicorn==0.34.0; extra == "api"
73
+ Requires-Dist: uvicorn==0.34.2; extra == "api"
74
74
  Provides-Extra: protobuf
75
75
  Requires-Dist: grpcio-tools>=1.53; extra == "protobuf"
76
76
  Provides-Extra: all
@@ -84,7 +84,7 @@ Requires-Dist: pandas>=2.1.0; extra == "dev"
84
84
  Requires-Dist: pre-commit<4.3.0,>=3.7.1; extra == "dev"
85
85
  Requires-Dist: pytest; extra == "dev"
86
86
  Requires-Dist: pytest-xdist; extra == "dev"
87
- Requires-Dist: pymssql==2.3.2; extra == "dev"
87
+ Requires-Dist: pymssql==2.3.4; extra == "dev"
88
88
  Requires-Dist: ruff; extra == "dev"
89
89
  Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.9.2; extra == "dev"
90
90
  Requires-Dist: trino==0.333.0; extra == "dev"
@@ -1397,20 +1397,21 @@ Available import options:
1397
1397
 
1398
1398
  | Type | Description | Status |
1399
1399
  |--------------------|------------------------------------------------|--------|
1400
- | `sql` | Import from SQL DDL | ✅ |
1401
1400
  | `avro` | Import from AVRO schemas | ✅ |
1402
- | `glue` | Import from AWS Glue DataCatalog | ✅ |
1403
- | `jsonschema` | Import from JSON Schemas | ✅ |
1404
1401
  | `bigquery` | Import from BigQuery Schemas | ✅ |
1405
- | `unity` | Import from Databricks Unity Catalog | partial |
1402
+ | `csv` | Import from CSV File | |
1403
+ | `dbml` | Import from DBML models | ✅ |
1406
1404
  | `dbt` | Import from dbt models | ✅ |
1405
+ | `excel` | Import from ODCS Excel Template | ✅ |
1406
+ | `glue` | Import from AWS Glue DataCatalog | ✅ |
1407
+ | `iceberg` | Import from an Iceberg JSON Schema Definition | partial |
1408
+ | `jsonschema` | Import from JSON Schemas | ✅ |
1407
1409
  | `odcs` | Import from Open Data Contract Standard (ODCS) | ✅ |
1408
- | `spark` | Import from Spark StructTypes | ✅ |
1409
- | `dbml` | Import from DBML models | ✅ |
1410
- | `csv` | Import from CSV File | ✅ |
1410
+ | `parquet` | Import from Parquet File Metadata | ✅ |
1411
1411
  | `protobuf` | Import from Protobuf schemas | ✅ |
1412
- | `iceberg` | Import from an Iceberg JSON Schema Definition | partial |
1413
- | `parquet` | Import from Parquet File Metadta | ✅ |
1412
+ | `spark` | Import from Spark StructTypes | |
1413
+ | `sql` | Import from SQL DDL | ✅ |
1414
+ | `unity` | Import from Databricks Unity Catalog | partial |
1414
1415
  | Missing something? | Please create an issue on GitHub | TBD |
1415
1416
 
1416
1417
 
@@ -1460,8 +1461,9 @@ datacontract import --format unity --source my_unity_table.json
1460
1461
 
1461
1462
  ```bash
1462
1463
  # Example import single table from Unity Catalog via HTTP endpoint
1463
- export DATABRICKS_IMPORT_INSTANCE="https://xyz.cloud.databricks.com"
1464
- export DATABRICKS_IMPORT_ACCESS_TOKEN=<token>
1464
+ export DATACONTRACT_DATABRICKS_SERVER_HOSTNAME="https://xyz.cloud.databricks.com"
1465
+ export DATACONTRACT_DATABRICKS_HTTP_PATH="/sql/1.0/warehouses/b053a331fa014fb4"
1466
+ export DATACONTRACT_DATABRICKS_TOKEN=<token>
1465
1467
  datacontract import --format unity --unity-table-full-name <table_full_name>
1466
1468
  ```
1467
1469
 
@@ -1482,6 +1484,17 @@ datacontract import --format dbt --source <manifest_path> --dbt-model <model_nam
1482
1484
  datacontract import --format dbt --source <manifest_path>
1483
1485
  ```
1484
1486
 
1487
+ ### Excel
1488
+
1489
+ Importing from [ODCS Excel Template](https://github.com/datacontract/open-data-contract-standard-excel-template).
1490
+
1491
+ Examples:
1492
+
1493
+ ```bash
1494
+ # Example import from ODCS Excel Template
1495
+ datacontract import --format excel --source odcs.xlsx
1496
+ ```
1497
+
1485
1498
  #### Glue
1486
1499
 
1487
1500
  Importing from Glue reads the necessary Data directly off of the AWS API.
@@ -19,7 +19,7 @@ datacontract/engines/soda/check_soda_execute.py,sha256=SYJdPpkozOA62yTM7s6cfwLfg
19
19
  datacontract/engines/soda/connections/bigquery.py,sha256=C-8kxmzpYe88bJp80ObHFLMh4rpnIjnUQ7XOj0Ke7lk,903
20
20
  datacontract/engines/soda/connections/databricks.py,sha256=cMRasuO0MrSKVgHPB-9uFTGTZPFg6z9Kpk3tJ0SdR0s,943
21
21
  datacontract/engines/soda/connections/duckdb_connection.py,sha256=hy2HxktSSt1tFB7rVI7tDOe6WeTNvHSKqsEk9uIkntU,9114
22
- datacontract/engines/soda/connections/kafka.py,sha256=icUcsGy9lFQRTz_mXD35tahFKIIxK5aS3RCzu_YfBaQ,8625
22
+ datacontract/engines/soda/connections/kafka.py,sha256=lnj_-3-CnJ6stetGqm6HOzN1Qatlw7xoCQU2zKBIXxU,8725
23
23
  datacontract/engines/soda/connections/postgres.py,sha256=9GTF4Es3M5vb7ocSGqAxXmslvkS5CjsPQGIuo020CFc,626
24
24
  datacontract/engines/soda/connections/snowflake.py,sha256=rfG2ysuqNM6TkvyqQKcGHFsTGJ6AROmud5VleUDRrb0,749
25
25
  datacontract/engines/soda/connections/sqlserver.py,sha256=RzGLbCUdRyfmDcqtM_AB9WZ-Xk-XYX91nkXpVNpYbvc,1440
@@ -42,15 +42,15 @@ datacontract/export/html_export.py,sha256=ojazWrb0AwSc7Vr72M_otMo-3PA8mfi8tfIy9B
42
42
  datacontract/export/iceberg_converter.py,sha256=ArcQ_Y3z_W4_kGDU_8jPRx2-pHpP3Nhx1zYoETOL3c4,6804
43
43
  datacontract/export/jsonschema_converter.py,sha256=2MT82MurcQQbrVDRj1kFsxnmFd9scNSfYI1upQSecl4,5631
44
44
  datacontract/export/markdown_converter.py,sha256=chtaZX4vXTee7JCMYmWiDQ9m55gwJjHPw6SEM3UOwpQ,6467
45
- datacontract/export/odcs_v3_exporter.py,sha256=4iMPsREiVwODx29KETRUMcd1RRLRYLQy4mdwzsi0qbU,13128
45
+ datacontract/export/odcs_v3_exporter.py,sha256=ta5NMqZ-9k-w9okdkYrPbMtd4b0ldHGL0_w70EdA5ho,13769
46
46
  datacontract/export/pandas_type_converter.py,sha256=464pQ3JQKFQa1TO0HBNcEoZvQye_yUbY6jQtiBaphSc,1117
47
47
  datacontract/export/protobuf_converter.py,sha256=DHLl8BW26xqltBsd7Qhz0RhTl9YZQKCbkmjNpECgubg,7928
48
48
  datacontract/export/pydantic_converter.py,sha256=1Lt9F8i6zyQYb44MyQtsXwCWWXYxZ47SmzArr_uPqsU,5579
49
49
  datacontract/export/rdf_converter.py,sha256=4gnKus37Geth4MJ3Ruc8AbnpD_Ll9OCx8oTIEKScvh8,6435
50
50
  datacontract/export/sodacl_converter.py,sha256=lQCOcNiT7i6KGaJ1Ua4MYBYGm-EyktTGrL4FLZDi14c,1102
51
51
  datacontract/export/spark_converter.py,sha256=LCue-rLan3ki7HgzUFyBaO8YUlc6CrDNBZD-QVgUv-U,7190
52
- datacontract/export/sql_converter.py,sha256=BGjmOAlzB5QfzJiXP61ajV0wj4M5oJrmNZZe_4Lo1Ik,4821
53
- datacontract/export/sql_type_converter.py,sha256=qjm8Fdyihq3VBL4x2D7RHdWoOm6HWIJe28U4XboYCk8,13436
52
+ datacontract/export/sql_converter.py,sha256=KA5PNmKxUWF_8QXRX8aGvMGf7pX0rZgkLe6mbK8Q7Qk,5089
53
+ datacontract/export/sql_type_converter.py,sha256=-ZHDihXWd5Gr9XG5FyE5-NLfB5q-HTdjx6P6TdsckDA,13497
54
54
  datacontract/export/sqlalchemy_converter.py,sha256=0DMncvA811lTtd5q4ZORREQ9YH1vQm1lJeqMWsFvloE,6463
55
55
  datacontract/export/terraform_converter.py,sha256=ExFoEvErVk-gBnWJiqC38SxDUmUEydpACWc917l5RyM,2163
56
56
  datacontract/imports/avro_importer.py,sha256=ryu4iUCSPJEV1uaE3AKdxD7fUxmRJ-ta936xurbgtHc,10922
@@ -68,7 +68,7 @@ datacontract/imports/odcs_importer.py,sha256=vv2dHLGL0Cdivv1CdKn5euJwGNKmiZmXCox
68
68
  datacontract/imports/odcs_v3_importer.py,sha256=sZVBENcPMl6rt0bbT_b1lnTFs3KOe1cZ2hwWaJBQhgY,16924
69
69
  datacontract/imports/parquet_importer.py,sha256=W_0_16mX4stwDUt4GM2L7dnGmTpAySab5k13-OlTCCc,3095
70
70
  datacontract/imports/protobuf_importer.py,sha256=rlUIskv9PNi5rFQ4Hobt9zlnKpahGsb4dy5G5UJoVAw,10840
71
- datacontract/imports/spark_importer.py,sha256=h2na1YtdJYu9Oz07tSvwx8L4RX6aLCCDVkAv-RTKyVA,5100
71
+ datacontract/imports/spark_importer.py,sha256=8sPI6tcH0aMlaUYdc2P_abrJfOBp7vWFgvYE5jRNwLU,5177
72
72
  datacontract/imports/sql_importer.py,sha256=ElFS2LILDOvWzW-X4emSIKltFV42i78TEoyg0bvn3II,9322
73
73
  datacontract/imports/unity_importer.py,sha256=UcPYABhLZaWNl5IkCazwAuMoVDdujsu_QteuV_Q9hgI,6737
74
74
  datacontract/init/init_template.py,sha256=BMawR-AF_vXyn_-Co-XoT8Dxj9b55V8xKk2KkJK-c1o,721
@@ -107,9 +107,9 @@ datacontract/templates/partials/model_field.html,sha256=2YBF95ypNCPFYuYKoeilRnDG
107
107
  datacontract/templates/partials/quality.html,sha256=ynEDWRn8I90Uje-xhGYgFcfwOgKI1R-CDki-EvTsauQ,1785
108
108
  datacontract/templates/partials/server.html,sha256=WkWFbz1ZvhIAUQQhH5Lkwb0HZRW907ehEnFmJSkpquQ,6235
109
109
  datacontract/templates/style/output.css,sha256=V1k6smSvlz07W2UNOkhcDFUb0HLmoas7DnNg_o8XUcA,25759
110
- datacontract_cli-0.10.25.dist-info/licenses/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
111
- datacontract_cli-0.10.25.dist-info/METADATA,sha256=Kc_fZ_wuwVoDjKluxLUkH0-chM_b_lMcsCUnm9tNlyo,104455
112
- datacontract_cli-0.10.25.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
113
- datacontract_cli-0.10.25.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
114
- datacontract_cli-0.10.25.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
115
- datacontract_cli-0.10.25.dist-info/RECORD,,
110
+ datacontract_cli-0.10.26.dist-info/licenses/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
111
+ datacontract_cli-0.10.26.dist-info/METADATA,sha256=wK_1AwnrBPOBTCRKORH63413TPMUkiVJGBSEs6GmQtQ,104876
112
+ datacontract_cli-0.10.26.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
113
+ datacontract_cli-0.10.26.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
114
+ datacontract_cli-0.10.26.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
115
+ datacontract_cli-0.10.26.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5