datacontract-cli 0.9.8__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/cli.py +2 -0
- datacontract/data_contract.py +27 -27
- datacontract/engines/soda/check_soda_execute.py +17 -6
- datacontract/engines/soda/connections/duckdb.py +21 -4
- datacontract/export/avro_converter.py +6 -4
- datacontract/export/csv_type_converter.py +36 -0
- datacontract/export/great_expectations_converter.py +1 -1
- datacontract/export/html_export.py +66 -0
- datacontract/export/pydantic_converter.py +51 -60
- datacontract/export/sodacl_converter.py +104 -7
- datacontract/export/sql_converter.py +12 -1
- datacontract/imports/avro_importer.py +37 -12
- datacontract/integration/publish_datamesh_manager.py +2 -3
- datacontract/lint/resolve.py +45 -6
- datacontract/model/run.py +2 -1
- datacontract/templates/datacontract.html +502 -0
- datacontract/templates/style/output.css +1332 -0
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/METADATA +314 -105
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/RECORD +23 -20
- datacontract/lint/linters/primary_field_linter.py +0 -28
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/top_level.txt +0 -0
datacontract/cli.py
CHANGED
|
@@ -16,6 +16,7 @@ from datacontract.init.download_datacontract_file import \
|
|
|
16
16
|
|
|
17
17
|
console = Console()
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
class OrderedCommands(TyperGroup):
|
|
20
21
|
def list_commands(self, ctx: Context) -> Iterable[str]:
|
|
21
22
|
return self.commands.keys()
|
|
@@ -153,6 +154,7 @@ class ExportFormat(str, Enum):
|
|
|
153
154
|
avro_idl = "avro-idl"
|
|
154
155
|
sql = "sql"
|
|
155
156
|
sql_query = "sql-query"
|
|
157
|
+
html = "html"
|
|
156
158
|
|
|
157
159
|
|
|
158
160
|
@app.command()
|
datacontract/data_contract.py
CHANGED
|
@@ -4,6 +4,7 @@ import tempfile
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
import yaml
|
|
7
|
+
from pyspark.sql import SparkSession
|
|
7
8
|
|
|
8
9
|
from datacontract.breaking.breaking import models_breaking_changes, \
|
|
9
10
|
quality_breaking_changes
|
|
@@ -19,6 +20,7 @@ from datacontract.export.dbt_converter import to_dbt_models_yaml, \
|
|
|
19
20
|
to_dbt_sources_yaml, to_dbt_staging_sql
|
|
20
21
|
from datacontract.export.great_expectations_converter import \
|
|
21
22
|
to_great_expectations
|
|
23
|
+
from datacontract.export.html_export import to_html
|
|
22
24
|
from datacontract.export.jsonschema_converter import to_jsonschema_json
|
|
23
25
|
from datacontract.export.odcs_converter import to_odcs_yaml
|
|
24
26
|
from datacontract.export.protobuf_converter import to_protobuf
|
|
@@ -39,8 +41,6 @@ from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
|
|
|
39
41
|
from datacontract.lint.linters.field_reference_linter import \
|
|
40
42
|
FieldReferenceLinter
|
|
41
43
|
from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
|
|
42
|
-
from datacontract.lint.linters.primary_field_linter import \
|
|
43
|
-
PrimaryFieldUniqueRequired
|
|
44
44
|
from datacontract.lint.linters.quality_schema_linter import \
|
|
45
45
|
QualityUsesSchemaLinter
|
|
46
46
|
from datacontract.lint.linters.valid_constraints_linter import \
|
|
@@ -53,25 +53,6 @@ from datacontract.model.exceptions import DataContractException
|
|
|
53
53
|
from datacontract.model.run import Run, Check
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
def _determine_sql_server_type(data_contract, sql_server_type):
|
|
57
|
-
if sql_server_type == "auto":
|
|
58
|
-
if data_contract.servers is None or len(data_contract.servers) == 0:
|
|
59
|
-
raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
|
|
60
|
-
|
|
61
|
-
server_types = set([server.type for server in data_contract.servers.values()])
|
|
62
|
-
if "snowflake" in server_types:
|
|
63
|
-
return "snowflake"
|
|
64
|
-
elif "postgres" in server_types:
|
|
65
|
-
return "postgres"
|
|
66
|
-
elif "databricks" in server_types:
|
|
67
|
-
return "databricks"
|
|
68
|
-
else:
|
|
69
|
-
# default to snowflake dialect
|
|
70
|
-
return "snowflake"
|
|
71
|
-
else:
|
|
72
|
-
return sql_server_type
|
|
73
|
-
|
|
74
|
-
|
|
75
56
|
class DataContract:
|
|
76
57
|
def __init__(
|
|
77
58
|
self,
|
|
@@ -83,7 +64,7 @@ class DataContract:
|
|
|
83
64
|
examples: bool = False,
|
|
84
65
|
publish_url: str = None,
|
|
85
66
|
publish_to_opentelemetry: bool = False,
|
|
86
|
-
spark:
|
|
67
|
+
spark: SparkSession = None,
|
|
87
68
|
inline_definitions: bool = False,
|
|
88
69
|
):
|
|
89
70
|
self._data_contract_file = data_contract_file
|
|
@@ -102,7 +83,6 @@ class DataContract:
|
|
|
102
83
|
FieldPatternLinter(),
|
|
103
84
|
FieldReferenceLinter(),
|
|
104
85
|
NoticePeriodLinter(),
|
|
105
|
-
PrimaryFieldUniqueRequired(),
|
|
106
86
|
ValidFieldConstraintsLinter(),
|
|
107
87
|
DescriptionLinter(),
|
|
108
88
|
}
|
|
@@ -246,12 +226,12 @@ class DataContract:
|
|
|
246
226
|
try:
|
|
247
227
|
publish_datamesh_manager(run, self._publish_url)
|
|
248
228
|
except Exception:
|
|
249
|
-
|
|
229
|
+
run.log_error("Failed to publish to datamesh manager")
|
|
250
230
|
if self._publish_to_opentelemetry:
|
|
251
231
|
try:
|
|
252
232
|
publish_opentelemetry(run)
|
|
253
233
|
except Exception:
|
|
254
|
-
|
|
234
|
+
run.log_error("Failed to publish to opentelemetry")
|
|
255
235
|
|
|
256
236
|
return run
|
|
257
237
|
|
|
@@ -385,13 +365,13 @@ class DataContract:
|
|
|
385
365
|
if export_format == "terraform":
|
|
386
366
|
return to_terraform(data_contract)
|
|
387
367
|
if export_format == "sql":
|
|
388
|
-
server_type = _determine_sql_server_type(data_contract, sql_server_type)
|
|
368
|
+
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
389
369
|
return to_sql_ddl(data_contract, server_type=server_type)
|
|
390
370
|
if export_format == "sql-query":
|
|
391
371
|
if data_contract.models is None:
|
|
392
372
|
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
393
373
|
|
|
394
|
-
server_type = _determine_sql_server_type(data_contract, sql_server_type)
|
|
374
|
+
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
395
375
|
|
|
396
376
|
model_names = list(data_contract.models.keys())
|
|
397
377
|
|
|
@@ -439,10 +419,30 @@ class DataContract:
|
|
|
439
419
|
return to_great_expectations(data_contract, model_name)
|
|
440
420
|
if export_format == "pydantic-model":
|
|
441
421
|
return to_pydantic_model_str(data_contract)
|
|
422
|
+
if export_format == "html":
|
|
423
|
+
return to_html(data_contract)
|
|
442
424
|
else:
|
|
443
425
|
print(f"Export format {export_format} not supported.")
|
|
444
426
|
return ""
|
|
445
427
|
|
|
428
|
+
def _determine_sql_server_type(self, data_contract: DataContractSpecification, sql_server_type: str):
|
|
429
|
+
if sql_server_type == "auto":
|
|
430
|
+
if data_contract.servers is None or len(data_contract.servers) == 0:
|
|
431
|
+
raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
|
|
432
|
+
|
|
433
|
+
server_types = set([server.type for server in data_contract.servers.values()])
|
|
434
|
+
if "snowflake" in server_types:
|
|
435
|
+
return "snowflake"
|
|
436
|
+
elif "postgres" in server_types:
|
|
437
|
+
return "postgres"
|
|
438
|
+
elif "databricks" in server_types:
|
|
439
|
+
return "databricks"
|
|
440
|
+
else:
|
|
441
|
+
# default to snowflake dialect
|
|
442
|
+
return "snowflake"
|
|
443
|
+
else:
|
|
444
|
+
return sql_server_type
|
|
445
|
+
|
|
446
446
|
def _get_examples_server(self, data_contract, run, tmp_dir):
|
|
447
447
|
run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
|
|
448
448
|
format = "json"
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
+
from pyspark.sql import SparkSession
|
|
3
4
|
from soda.scan import Scan
|
|
4
5
|
|
|
5
6
|
from datacontract.engines.soda.connections.bigquery import \
|
|
@@ -19,7 +20,9 @@ from datacontract.model.data_contract_specification import \
|
|
|
19
20
|
from datacontract.model.run import Run, Check, Log
|
|
20
21
|
|
|
21
22
|
|
|
22
|
-
def check_soda_execute(
|
|
23
|
+
def check_soda_execute(
|
|
24
|
+
run: Run, data_contract: DataContractSpecification, server: Server, spark: SparkSession, tmp_dir
|
|
25
|
+
):
|
|
23
26
|
if data_contract is None:
|
|
24
27
|
run.log_warn("Cannot run engine soda-core, as data contract is invalid")
|
|
25
28
|
return
|
|
@@ -91,6 +94,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
|
|
|
91
94
|
# Don't check types for csv format, as they are hard to detect
|
|
92
95
|
server_type = server.type
|
|
93
96
|
check_types = server.format != "json" and server.format != "csv" and server.format != "avro"
|
|
97
|
+
|
|
94
98
|
sodacl_yaml_str = to_sodacl_yaml(data_contract, server_type, check_types)
|
|
95
99
|
# print("sodacl_yaml_str:\n" + sodacl_yaml_str)
|
|
96
100
|
scan.add_sodacl_yaml_str(sodacl_yaml_str)
|
|
@@ -106,16 +110,13 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
|
|
|
106
110
|
for c in scan_results.get("checks"):
|
|
107
111
|
check = Check(
|
|
108
112
|
type="schema",
|
|
109
|
-
result=
|
|
110
|
-
if c.get("outcome") == "pass"
|
|
111
|
-
else "failed"
|
|
112
|
-
if c.get("outcome") == "fail"
|
|
113
|
-
else c.get("outcome"),
|
|
113
|
+
result=to_result(c),
|
|
114
114
|
reason=", ".join(c.get("outcomeReasons")),
|
|
115
115
|
name=c.get("name"),
|
|
116
116
|
model=c.get("table"),
|
|
117
117
|
field=c.get("column"),
|
|
118
118
|
engine="soda-core",
|
|
119
|
+
diagnostics=c.get("diagnostics"),
|
|
119
120
|
)
|
|
120
121
|
update_reason(check, c)
|
|
121
122
|
run.checks.append(check)
|
|
@@ -143,6 +144,16 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
|
|
|
143
144
|
return
|
|
144
145
|
|
|
145
146
|
|
|
147
|
+
def to_result(c) -> str:
|
|
148
|
+
soda_outcome = c.get("outcome")
|
|
149
|
+
if soda_outcome == "pass":
|
|
150
|
+
return "passed"
|
|
151
|
+
elif soda_outcome == "fail":
|
|
152
|
+
return "failed"
|
|
153
|
+
else:
|
|
154
|
+
return soda_outcome
|
|
155
|
+
|
|
156
|
+
|
|
146
157
|
def update_reason(check, c):
|
|
147
158
|
"""Try to find a reason in diagnostics"""
|
|
148
159
|
if check.result == "passed":
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
4
|
import duckdb
|
|
5
|
+
from datacontract.export.csv_type_converter import convert_to_duckdb_csv_type
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def get_duckdb_connection(data_contract, server):
|
|
@@ -12,7 +13,7 @@ def get_duckdb_connection(data_contract, server):
|
|
|
12
13
|
if server.type == "s3":
|
|
13
14
|
path = server.location
|
|
14
15
|
setup_s3_connection(con, server)
|
|
15
|
-
for model_name in data_contract.models:
|
|
16
|
+
for model_name, model in data_contract.models.items():
|
|
16
17
|
model_path = path
|
|
17
18
|
if "{model}" in model_path:
|
|
18
19
|
model_path = model_path.format(model=model_name)
|
|
@@ -32,12 +33,28 @@ def get_duckdb_connection(data_contract, server):
|
|
|
32
33
|
CREATE VIEW "{model_name}" AS SELECT * FROM read_parquet('{model_path}', hive_partitioning=1);
|
|
33
34
|
""")
|
|
34
35
|
elif server.format == "csv":
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
columns = to_csv_types(model)
|
|
37
|
+
if columns is None:
|
|
38
|
+
con.sql(
|
|
39
|
+
f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1);"""
|
|
40
|
+
)
|
|
41
|
+
else:
|
|
42
|
+
con.sql(
|
|
43
|
+
f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1, columns={columns});"""
|
|
44
|
+
)
|
|
38
45
|
return con
|
|
39
46
|
|
|
40
47
|
|
|
48
|
+
def to_csv_types(model) -> dict:
|
|
49
|
+
if model is None:
|
|
50
|
+
return None
|
|
51
|
+
columns = {}
|
|
52
|
+
# ['SQLNULL', 'BOOLEAN', 'BIGINT', 'DOUBLE', 'TIME', 'DATE', 'TIMESTAMP', 'VARCHAR']
|
|
53
|
+
for field_name, field in model.fields.items():
|
|
54
|
+
columns[field_name] = convert_to_duckdb_csv_type(field)
|
|
55
|
+
return columns
|
|
56
|
+
|
|
57
|
+
|
|
41
58
|
def setup_s3_connection(con, server):
|
|
42
59
|
s3_region = os.getenv("DATACONTRACT_S3_REGION")
|
|
43
60
|
s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
|
|
@@ -4,7 +4,7 @@ from datacontract.model.data_contract_specification import Field
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def to_avro_schema(model_name, model) -> dict:
|
|
7
|
-
return to_avro_record(model_name, model.fields, model.description)
|
|
7
|
+
return to_avro_record(model_name, model.fields, model.description, model.namespace)
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def to_avro_schema_json(model_name, model) -> str:
|
|
@@ -12,10 +12,12 @@ def to_avro_schema_json(model_name, model) -> str:
|
|
|
12
12
|
return json.dumps(schema, indent=2, sort_keys=False)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def to_avro_record(name, fields, description) -> dict:
|
|
15
|
+
def to_avro_record(name, fields, description, namespace) -> dict:
|
|
16
16
|
schema = {"type": "record", "name": name}
|
|
17
17
|
if description is not None:
|
|
18
18
|
schema["doc"] = description
|
|
19
|
+
if namespace is not None:
|
|
20
|
+
schema["namespace"] = namespace
|
|
19
21
|
schema["fields"] = to_avro_fields(fields)
|
|
20
22
|
return schema
|
|
21
23
|
|
|
@@ -35,7 +37,7 @@ def to_avro_field(field, field_name):
|
|
|
35
37
|
return avro_field
|
|
36
38
|
|
|
37
39
|
|
|
38
|
-
def to_avro_type(field: Field, field_name: str):
|
|
40
|
+
def to_avro_type(field: Field, field_name: str) -> str | dict:
|
|
39
41
|
if field.type is None:
|
|
40
42
|
return "null"
|
|
41
43
|
if field.type in ["string", "varchar", "text"]:
|
|
@@ -60,7 +62,7 @@ def to_avro_type(field: Field, field_name: str):
|
|
|
60
62
|
elif field.type in ["time"]:
|
|
61
63
|
return "long"
|
|
62
64
|
elif field.type in ["object", "record", "struct"]:
|
|
63
|
-
return to_avro_record(field_name, field.fields, field.description)
|
|
65
|
+
return to_avro_record(field_name, field.fields, field.description, None)
|
|
64
66
|
elif field.type in ["binary"]:
|
|
65
67
|
return "bytes"
|
|
66
68
|
elif field.type in ["array"]:
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# https://duckdb.org/docs/data/csv/overview.html
|
|
2
|
+
# ['SQLNULL', 'BOOLEAN', 'BIGINT', 'DOUBLE', 'TIME', 'DATE', 'TIMESTAMP', 'VARCHAR']
|
|
3
|
+
def convert_to_duckdb_csv_type(field) -> None | str:
|
|
4
|
+
type = field.type
|
|
5
|
+
if type is None:
|
|
6
|
+
return "VARCHAR"
|
|
7
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
8
|
+
return "VARCHAR"
|
|
9
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
10
|
+
return "TIMESTAMP"
|
|
11
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
12
|
+
return "TIMESTAMP"
|
|
13
|
+
if type.lower() in ["date"]:
|
|
14
|
+
return "DATE"
|
|
15
|
+
if type.lower() in ["time"]:
|
|
16
|
+
return "TIME"
|
|
17
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
18
|
+
# precision and scale not supported by data contract
|
|
19
|
+
return "VARCHAR"
|
|
20
|
+
if type.lower() in ["float", "double"]:
|
|
21
|
+
return "DOUBLE"
|
|
22
|
+
if type.lower() in ["integer", "int", "long", "bigint"]:
|
|
23
|
+
return "BIGINT"
|
|
24
|
+
if type.lower() in ["boolean"]:
|
|
25
|
+
return "BOOLEAN"
|
|
26
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
27
|
+
# not supported in CSV
|
|
28
|
+
return "VARCHAR"
|
|
29
|
+
if type.lower() in ["bytes"]:
|
|
30
|
+
# not supported in CSV
|
|
31
|
+
return "VARCHAR"
|
|
32
|
+
if type.lower() in ["array"]:
|
|
33
|
+
return "VARCHAR"
|
|
34
|
+
if type.lower() in ["null"]:
|
|
35
|
+
return "SQLNULL"
|
|
36
|
+
return "VARCHAR"
|
|
@@ -58,7 +58,7 @@ def model_to_expectations(fields: Dict[str, Field]) -> List[Dict[str, Any]]:
|
|
|
58
58
|
def add_field_expectations(field_name, field: Field, expectations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
59
59
|
if field.type is not None:
|
|
60
60
|
expectations.append(to_column_types_exp(field_name, field.type))
|
|
61
|
-
if field.unique
|
|
61
|
+
if field.unique:
|
|
62
62
|
expectations.append(to_column_unique_exp(field_name))
|
|
63
63
|
if field.maxLength is not None or field.minLength is not None:
|
|
64
64
|
expectations.append(to_column_length_exp(field_name, field.minLength, field.maxLength))
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
from importlib.metadata import version
|
|
4
|
+
|
|
5
|
+
import pytz
|
|
6
|
+
import yaml
|
|
7
|
+
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
8
|
+
|
|
9
|
+
from datacontract.model.data_contract_specification import \
|
|
10
|
+
DataContractSpecification
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
14
|
+
# Load templates from templates folder
|
|
15
|
+
package_loader = PackageLoader("datacontract", "templates")
|
|
16
|
+
env = Environment(
|
|
17
|
+
loader=package_loader,
|
|
18
|
+
autoescape=select_autoescape(
|
|
19
|
+
enabled_extensions="html",
|
|
20
|
+
default_for_string=True,
|
|
21
|
+
),
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Load the required template
|
|
25
|
+
template = env.get_template("datacontract.html")
|
|
26
|
+
|
|
27
|
+
if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
|
|
28
|
+
quality_specification = data_contract_spec.quality.specification
|
|
29
|
+
elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
|
|
30
|
+
if data_contract_spec.quality.type == "great-expectations":
|
|
31
|
+
quality_specification = yaml.dump(
|
|
32
|
+
data_contract_spec.quality.specification, sort_keys=False, default_style="|"
|
|
33
|
+
)
|
|
34
|
+
else:
|
|
35
|
+
quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
|
|
36
|
+
else:
|
|
37
|
+
quality_specification = None
|
|
38
|
+
|
|
39
|
+
style_content, _, _ = package_loader.get_source(env, "style/output.css")
|
|
40
|
+
|
|
41
|
+
datacontract_yaml = data_contract_spec.to_yaml()
|
|
42
|
+
|
|
43
|
+
tz = pytz.timezone('UTC')
|
|
44
|
+
now = datetime.datetime.now(tz)
|
|
45
|
+
formatted_date = now.strftime('%d %b %Y %H:%M:%S UTC')
|
|
46
|
+
datacontract_cli_version = get_version()
|
|
47
|
+
|
|
48
|
+
# Render the template with necessary data
|
|
49
|
+
html_string = template.render(
|
|
50
|
+
datacontract=data_contract_spec,
|
|
51
|
+
quality_specification=quality_specification,
|
|
52
|
+
style=style_content,
|
|
53
|
+
datacontract_yaml=datacontract_yaml,
|
|
54
|
+
formatted_date=formatted_date,
|
|
55
|
+
datacontract_cli_version=datacontract_cli_version,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return html_string
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_version() -> str:
|
|
62
|
+
try:
|
|
63
|
+
return version("datacontract_cli")
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logging.debug("Ignoring exception", e)
|
|
66
|
+
return ""
|
|
@@ -1,51 +1,56 @@
|
|
|
1
|
-
import datacontract.model.data_contract_specification as spec
|
|
2
|
-
import typing
|
|
3
1
|
import ast
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
import datacontract.model.data_contract_specification as spec
|
|
5
|
+
|
|
4
6
|
|
|
5
7
|
def to_pydantic_model_str(contract: spec.DataContractSpecification) -> str:
|
|
6
8
|
classdefs = [generate_model_class(model_name, model) for (model_name, model) in contract.models.items()]
|
|
7
|
-
documentation =
|
|
8
|
-
contract.info and contract.info.description) else []
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
9
|
+
documentation = (
|
|
10
|
+
[ast.Expr(ast.Constant(contract.info.description))] if (contract.info and contract.info.description) else []
|
|
11
|
+
)
|
|
12
|
+
result = ast.Module(
|
|
13
|
+
body=[
|
|
14
|
+
ast.Import(
|
|
15
|
+
names=[
|
|
16
|
+
ast.Name("datetime", ctx=ast.Load()),
|
|
17
|
+
ast.Name("typing", ctx=ast.Load()),
|
|
18
|
+
ast.Name("pydantic", ctx=ast.Load()),
|
|
19
|
+
]
|
|
20
|
+
),
|
|
21
|
+
*documentation,
|
|
22
|
+
*classdefs,
|
|
23
|
+
],
|
|
24
|
+
type_ignores=[],
|
|
25
|
+
)
|
|
17
26
|
return ast.unparse(result)
|
|
18
27
|
|
|
28
|
+
|
|
19
29
|
def optional_of(node) -> ast.Subscript:
|
|
20
30
|
return ast.Subscript(
|
|
21
|
-
value=ast.Attribute(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
ctx=ast.Load()),
|
|
25
|
-
slice=node)
|
|
31
|
+
value=ast.Attribute(ast.Name(id="typing", ctx=ast.Load()), attr="Optional", ctx=ast.Load()), slice=node
|
|
32
|
+
)
|
|
33
|
+
|
|
26
34
|
|
|
27
35
|
def list_of(node) -> ast.Subscript:
|
|
28
|
-
return ast.Subscript(
|
|
29
|
-
|
|
30
|
-
slice=node)
|
|
36
|
+
return ast.Subscript(value=ast.Name(id="list", ctx=ast.Load()), slice=node)
|
|
37
|
+
|
|
31
38
|
|
|
32
39
|
def product_of(nodes: list[typing.Any]) -> ast.Subscript:
|
|
33
40
|
return ast.Subscript(
|
|
34
|
-
value=ast.Attribute(
|
|
35
|
-
|
|
36
|
-
attr="Product",
|
|
37
|
-
ctx=ast.Load()),
|
|
38
|
-
slice=ast.Tuple(nodes, ctx=ast.Load())
|
|
41
|
+
value=ast.Attribute(value=ast.Name(id="typing", ctx=ast.Load()), attr="Product", ctx=ast.Load()),
|
|
42
|
+
slice=ast.Tuple(nodes, ctx=ast.Load()),
|
|
39
43
|
)
|
|
40
44
|
|
|
41
45
|
|
|
42
46
|
type_annotation_type = typing.Union[ast.Name, ast.Attribute, ast.Constant, ast.Subscript]
|
|
43
47
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
48
|
+
|
|
49
|
+
def constant_field_annotation(
|
|
50
|
+
field_name: str, field: spec.Field
|
|
51
|
+
) -> tuple[type_annotation_type, typing.Optional[ast.ClassDef]]:
|
|
47
52
|
match field.type:
|
|
48
|
-
case "string"|"text"|"varchar":
|
|
53
|
+
case "string" | "text" | "varchar":
|
|
49
54
|
return (ast.Name("str", ctx=ast.Load()), None)
|
|
50
55
|
case "number", "decimal", "numeric":
|
|
51
56
|
# Either integer or float in specification,
|
|
@@ -58,13 +63,9 @@ def constant_field_annotation(field_name: str, field: spec.Field)\
|
|
|
58
63
|
case "boolean":
|
|
59
64
|
return (ast.Name("bool", ctx=ast.Load()), None)
|
|
60
65
|
case "timestamp" | "timestamp_tz" | "timestamp_ntz":
|
|
61
|
-
return (ast.Attribute(
|
|
62
|
-
value=ast.Name(id="datetime", ctx=ast.Load()),
|
|
63
|
-
attr="datetime"), None)
|
|
66
|
+
return (ast.Attribute(value=ast.Name(id="datetime", ctx=ast.Load()), attr="datetime"), None)
|
|
64
67
|
case "date":
|
|
65
|
-
return (ast.Attribute(
|
|
66
|
-
value=ast.Name(id="datetime", ctx=ast.Load()),
|
|
67
|
-
attr="date"), None)
|
|
68
|
+
return (ast.Attribute(value=ast.Name(id="datetime", ctx=ast.Load()), attr="date"), None)
|
|
68
69
|
case "bytes":
|
|
69
70
|
return (ast.Name("bytes", ctx=ast.Load()), None)
|
|
70
71
|
case "null":
|
|
@@ -86,44 +87,35 @@ def type_annotation(field_name: str, field: spec.Field) -> tuple[type_annotation
|
|
|
86
87
|
(annotated_type, new_classes) = constant_field_annotation(field_name, field)
|
|
87
88
|
return (optional_of(annotated_type), new_classes)
|
|
88
89
|
|
|
90
|
+
|
|
89
91
|
def is_simple_field(field: spec.Field) -> bool:
|
|
90
92
|
return field.type not in set(["object", "record", "struct"])
|
|
91
93
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
list[ast.ClassDef]]:
|
|
94
|
+
|
|
95
|
+
def field_definitions(fields: dict[str, spec.Field]) -> tuple[list[ast.Expr], list[ast.ClassDef]]:
|
|
95
96
|
annotations = []
|
|
96
97
|
classes = []
|
|
97
|
-
for
|
|
98
|
+
for field_name, field in fields.items():
|
|
98
99
|
(ann, new_class) = type_annotation(field_name, field)
|
|
99
|
-
annotations.append(
|
|
100
|
-
ast.AnnAssign(
|
|
101
|
-
target=ast.Name(id=field_name, ctx=ast.Store()),
|
|
102
|
-
annotation=ann,
|
|
103
|
-
simple=1))
|
|
100
|
+
annotations.append(ast.AnnAssign(target=ast.Name(id=field_name, ctx=ast.Store()), annotation=ann, simple=1))
|
|
104
101
|
if field.description and is_simple_field(field):
|
|
105
|
-
annotations.append(
|
|
106
|
-
ast.Expr(ast.Constant(field.description)))
|
|
102
|
+
annotations.append(ast.Expr(ast.Constant(field.description)))
|
|
107
103
|
if new_class:
|
|
108
104
|
classes.append(new_class)
|
|
109
105
|
return (annotations, classes)
|
|
110
106
|
|
|
107
|
+
|
|
111
108
|
def generate_field_class(field_name: str, field: spec.Field) -> ast.ClassDef:
|
|
112
|
-
assert
|
|
109
|
+
assert field.type in set(["object", "record", "struct"])
|
|
113
110
|
(annotated_type, new_classes) = field_definitions(field.fields)
|
|
114
111
|
documentation = [ast.Expr(ast.Constant(field.description))] if field.description else []
|
|
115
112
|
return ast.ClassDef(
|
|
116
113
|
name=field_name,
|
|
117
|
-
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()),
|
|
118
|
-
|
|
119
|
-
ctx=ast.Load())],
|
|
120
|
-
body=[
|
|
121
|
-
*documentation,
|
|
122
|
-
*new_classes,
|
|
123
|
-
*annotated_type
|
|
124
|
-
],
|
|
114
|
+
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()), attr="BaseModel", ctx=ast.Load())],
|
|
115
|
+
body=[*documentation, *new_classes, *annotated_type],
|
|
125
116
|
keywords=[],
|
|
126
|
-
decorator_list=[]
|
|
117
|
+
decorator_list=[],
|
|
118
|
+
)
|
|
127
119
|
|
|
128
120
|
|
|
129
121
|
def generate_model_class(name: str, model_definition: spec.Model) -> ast.ClassDef:
|
|
@@ -131,10 +123,9 @@ def generate_model_class(name: str, model_definition: spec.Model) -> ast.ClassDe
|
|
|
131
123
|
documentation = [ast.Expr(ast.Constant(model_definition.description))] if model_definition.description else []
|
|
132
124
|
result = ast.ClassDef(
|
|
133
125
|
name=name.capitalize(),
|
|
134
|
-
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()),
|
|
135
|
-
attr="BaseModel",
|
|
136
|
-
ctx=ast.Load())],
|
|
126
|
+
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()), attr="BaseModel", ctx=ast.Load())],
|
|
137
127
|
body=[*documentation, *nested_classes, *field_assignments],
|
|
138
128
|
keywords=[],
|
|
139
|
-
decorator_list=[]
|
|
129
|
+
decorator_list=[],
|
|
130
|
+
)
|
|
140
131
|
return result
|