datacontract-cli 0.10.21__py3-none-any.whl → 0.10.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/breaking/breaking.py +1 -1
- datacontract/breaking/breaking_rules.py +1 -1
- datacontract/cli.py +25 -77
- datacontract/data_contract.py +14 -100
- datacontract/engines/data_contract_checks.py +735 -0
- datacontract/engines/data_contract_test.py +67 -0
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
- datacontract/engines/soda/check_soda_execute.py +37 -31
- datacontract/engines/soda/connections/{duckdb.py → duckdb_connection.py} +6 -5
- datacontract/engines/soda/connections/kafka.py +8 -3
- datacontract/export/avro_converter.py +2 -0
- datacontract/export/dbt_converter.py +13 -10
- datacontract/export/exporter.py +0 -2
- datacontract/export/exporter_factory.py +0 -12
- datacontract/export/odcs_v3_exporter.py +22 -3
- datacontract/export/sodacl_converter.py +22 -294
- datacontract/export/sql_type_converter.py +7 -2
- datacontract/imports/odcs_importer.py +6 -3
- datacontract/imports/odcs_v3_importer.py +3 -1
- datacontract/imports/sql_importer.py +229 -29
- datacontract/lint/resolve.py +17 -4
- datacontract/model/exceptions.py +4 -1
- datacontract/model/run.py +11 -4
- datacontract/output/junit_test_results.py +135 -0
- datacontract/output/output_format.py +10 -0
- datacontract/output/test_results_writer.py +79 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/METADATA +192 -215
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/RECORD +33 -32
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/WHEEL +1 -1
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/export/odcs_v2_exporter.py +0 -124
- datacontract/imports/odcs_v2_importer.py +0 -177
- datacontract/lint/linters/example_model_linter.py +0 -91
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
from datacontract.engines.data_contract_checks import create_checks
|
|
4
|
+
|
|
5
|
+
if typing.TYPE_CHECKING:
|
|
6
|
+
from pyspark.sql import SparkSession
|
|
7
|
+
|
|
8
|
+
from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
|
|
9
|
+
check_that_datacontract_contains_valid_server_configuration,
|
|
10
|
+
)
|
|
11
|
+
from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
|
|
12
|
+
from datacontract.engines.soda.check_soda_execute import check_soda_execute
|
|
13
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
14
|
+
from datacontract.model.exceptions import DataContractException
|
|
15
|
+
from datacontract.model.run import ResultEnum, Run
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def execute_data_contract_test(
|
|
19
|
+
data_contract_specification: DataContractSpecification,
|
|
20
|
+
run: Run,
|
|
21
|
+
server_name: str = None,
|
|
22
|
+
spark: "SparkSession" = None,
|
|
23
|
+
):
|
|
24
|
+
if data_contract_specification.models is None or len(data_contract_specification.models) == 0:
|
|
25
|
+
raise DataContractException(
|
|
26
|
+
type="lint",
|
|
27
|
+
name="Check that data contract contains models",
|
|
28
|
+
result=ResultEnum.warning,
|
|
29
|
+
reason="Models block is missing. Skip executing tests.",
|
|
30
|
+
engine="datacontract",
|
|
31
|
+
)
|
|
32
|
+
server = get_server(data_contract_specification, server_name)
|
|
33
|
+
run.log_info(f"Running tests for data contract {data_contract_specification.id} with server {server_name}")
|
|
34
|
+
run.dataContractId = data_contract_specification.id
|
|
35
|
+
run.dataContractVersion = data_contract_specification.info.version
|
|
36
|
+
run.dataProductId = server.dataProductId
|
|
37
|
+
run.outputPortId = server.outputPortId
|
|
38
|
+
run.server = server_name
|
|
39
|
+
|
|
40
|
+
run.checks.extend(create_checks(data_contract_specification, server))
|
|
41
|
+
|
|
42
|
+
# TODO check server is supported type for nicer error messages
|
|
43
|
+
# TODO check server credentials are complete for nicer error messages
|
|
44
|
+
if server.format == "json" and server.type != "kafka":
|
|
45
|
+
check_jsonschema(run, data_contract_specification, server)
|
|
46
|
+
check_soda_execute(run, data_contract_specification, server, spark)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_server(data_contract_specification: DataContractSpecification, server_name: str = None):
|
|
50
|
+
"""Get the server configuration from the data contract specification.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
data_contract_specification: The data contract specification
|
|
54
|
+
server_name: Optional name of the server to use. If not provided, uses the first server.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
The selected server configuration
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
check_that_datacontract_contains_valid_server_configuration(data_contract_specification, server_name)
|
|
61
|
+
|
|
62
|
+
if server_name:
|
|
63
|
+
server = data_contract_specification.servers.get(server_name)
|
|
64
|
+
else:
|
|
65
|
+
server_name = list(data_contract_specification.servers.keys())[0]
|
|
66
|
+
server = data_contract_specification.servers.get(server_name)
|
|
67
|
+
return server
|
datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
2
2
|
from datacontract.model.exceptions import DataContractException
|
|
3
|
-
from datacontract.model.run import Run
|
|
4
3
|
|
|
5
4
|
|
|
6
5
|
def check_that_datacontract_contains_valid_server_configuration(
|
|
7
|
-
|
|
6
|
+
data_contract: DataContractSpecification, server_name: str | None
|
|
8
7
|
):
|
|
9
|
-
if data_contract.servers is None:
|
|
8
|
+
if data_contract.servers is None or len(data_contract.servers) == 0:
|
|
10
9
|
raise DataContractException(
|
|
11
10
|
type="lint",
|
|
12
11
|
name="Check that data contract contains valid server configuration",
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import uuid
|
|
2
3
|
|
|
3
4
|
from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
|
|
4
5
|
from datacontract.engines.soda.connections.databricks import to_databricks_soda_configuration
|
|
5
|
-
from datacontract.engines.soda.connections.
|
|
6
|
+
from datacontract.engines.soda.connections.duckdb_connection import get_duckdb_connection
|
|
6
7
|
from datacontract.engines.soda.connections.kafka import create_spark_session, read_kafka_topic
|
|
7
8
|
from datacontract.engines.soda.connections.postgres import to_postgres_soda_configuration
|
|
8
9
|
from datacontract.engines.soda.connections.snowflake import to_snowflake_soda_configuration
|
|
@@ -13,7 +14,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
|
|
|
13
14
|
from datacontract.model.run import Check, Log, ResultEnum, Run
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark
|
|
17
|
+
def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark):
|
|
17
18
|
from soda.common.config_helper import ConfigHelper
|
|
18
19
|
|
|
19
20
|
ConfigHelper.get_instance().upsert_value("send_anonymous_usage_stats", False)
|
|
@@ -80,8 +81,8 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
|
|
|
80
81
|
scan.set_data_source_name("datacontract-cli")
|
|
81
82
|
elif server.type == "kafka":
|
|
82
83
|
if spark is None:
|
|
83
|
-
spark = create_spark_session(
|
|
84
|
-
read_kafka_topic(spark, data_contract, server
|
|
84
|
+
spark = create_spark_session()
|
|
85
|
+
read_kafka_topic(spark, data_contract, server)
|
|
85
86
|
scan.add_spark_session(spark, data_source_name=server.type)
|
|
86
87
|
scan.set_data_source_name(server.type)
|
|
87
88
|
elif server.type == "sqlserver":
|
|
@@ -106,37 +107,34 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
|
|
|
106
107
|
run.log_warn(f"Server type {server.type} not yet supported by datacontract CLI")
|
|
107
108
|
return
|
|
108
109
|
|
|
109
|
-
|
|
110
|
-
# Don't check types for avro format, as they are checked with avro schema
|
|
111
|
-
# Don't check types for csv format, as they are hard to detect
|
|
112
|
-
server_type = server.type
|
|
113
|
-
check_types = server.format != "json" and server.format != "csv" and server.format != "avro"
|
|
114
|
-
|
|
115
|
-
sodacl_yaml_str = to_sodacl_yaml(data_contract, server_type, check_types)
|
|
110
|
+
sodacl_yaml_str = to_sodacl_yaml(run)
|
|
116
111
|
# print("sodacl_yaml_str:\n" + sodacl_yaml_str)
|
|
117
112
|
scan.add_sodacl_yaml_str(sodacl_yaml_str)
|
|
118
113
|
|
|
119
114
|
# Execute the scan
|
|
120
|
-
logging.info("Starting soda scan")
|
|
115
|
+
logging.info("Starting soda scan with checks:\n" + sodacl_yaml_str)
|
|
121
116
|
scan.execute()
|
|
122
117
|
logging.info("Finished soda scan")
|
|
123
118
|
|
|
124
119
|
# pprint.PrettyPrinter(indent=2).pprint(scan.build_scan_results())
|
|
125
120
|
|
|
126
121
|
scan_results = scan.get_scan_results()
|
|
127
|
-
for
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
122
|
+
for scan_result in scan_results.get("checks"):
|
|
123
|
+
name = scan_result.get("name")
|
|
124
|
+
check = get_check(run, scan_result)
|
|
125
|
+
if check is None:
|
|
126
|
+
check = Check(
|
|
127
|
+
id=str(uuid.uuid4()),
|
|
128
|
+
category="custom",
|
|
129
|
+
type="custom",
|
|
130
|
+
name=name,
|
|
131
|
+
engine="soda-core",
|
|
132
|
+
)
|
|
133
|
+
run.checks.append(check)
|
|
134
|
+
check.result = to_result(scan_result)
|
|
135
|
+
check.reason = ", ".join(scan_result.get("outcomeReasons"))
|
|
136
|
+
check.diagnostics = scan_result.get("diagnostics")
|
|
137
|
+
update_reason(check, scan_result)
|
|
140
138
|
|
|
141
139
|
for log in scan_results.get("logs"):
|
|
142
140
|
run.logs.append(
|
|
@@ -152,8 +150,8 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
|
|
|
152
150
|
run.checks.append(
|
|
153
151
|
Check(
|
|
154
152
|
type="general",
|
|
155
|
-
name="
|
|
156
|
-
result=
|
|
153
|
+
name="Data Contract Tests",
|
|
154
|
+
result=ResultEnum.warning,
|
|
157
155
|
reason="Engine soda-core has errors. See the logs for details.",
|
|
158
156
|
engine="soda-core",
|
|
159
157
|
)
|
|
@@ -161,14 +159,22 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
|
|
|
161
159
|
return
|
|
162
160
|
|
|
163
161
|
|
|
164
|
-
def
|
|
162
|
+
def get_check(run, scan_result) -> Check | None:
|
|
163
|
+
check_by_name = next((c for c in run.checks if c.key == scan_result.get("name")), None)
|
|
164
|
+
if check_by_name is not None:
|
|
165
|
+
return check_by_name
|
|
166
|
+
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def to_result(c) -> ResultEnum:
|
|
165
171
|
soda_outcome = c.get("outcome")
|
|
166
172
|
if soda_outcome == "pass":
|
|
167
|
-
return
|
|
173
|
+
return ResultEnum.passed
|
|
168
174
|
elif soda_outcome == "fail":
|
|
169
|
-
return
|
|
175
|
+
return ResultEnum.failed
|
|
170
176
|
else:
|
|
171
|
-
return
|
|
177
|
+
return ResultEnum.unknown
|
|
172
178
|
|
|
173
179
|
|
|
174
180
|
def update_reason(check, c):
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from typing import Any
|
|
2
3
|
|
|
3
4
|
import duckdb
|
|
4
5
|
|
|
@@ -27,13 +28,13 @@ def get_duckdb_connection(data_contract, server, run: Run):
|
|
|
27
28
|
run.log_info(f"Creating table {model_name} for {model_path}")
|
|
28
29
|
|
|
29
30
|
if server.format == "json":
|
|
30
|
-
|
|
31
|
+
json_format = "auto"
|
|
31
32
|
if server.delimiter == "new_line":
|
|
32
|
-
|
|
33
|
+
json_format = "newline_delimited"
|
|
33
34
|
elif server.delimiter == "array":
|
|
34
|
-
|
|
35
|
+
json_format = "array"
|
|
35
36
|
con.sql(f"""
|
|
36
|
-
CREATE VIEW "{model_name}" AS SELECT * FROM read_json_auto('{model_path}', format='{
|
|
37
|
+
CREATE VIEW "{model_name}" AS SELECT * FROM read_json_auto('{model_path}', format='{json_format}', hive_partitioning=1);
|
|
37
38
|
""")
|
|
38
39
|
elif server.format == "parquet":
|
|
39
40
|
con.sql(f"""
|
|
@@ -56,7 +57,7 @@ def get_duckdb_connection(data_contract, server, run: Run):
|
|
|
56
57
|
return con
|
|
57
58
|
|
|
58
59
|
|
|
59
|
-
def to_csv_types(model) -> dict:
|
|
60
|
+
def to_csv_types(model) -> dict[Any, str | None] | None:
|
|
60
61
|
if model is None:
|
|
61
62
|
return None
|
|
62
63
|
columns = {}
|
|
@@ -1,12 +1,14 @@
|
|
|
1
|
+
import atexit
|
|
1
2
|
import logging
|
|
2
3
|
import os
|
|
4
|
+
import tempfile
|
|
3
5
|
|
|
4
6
|
from datacontract.export.avro_converter import to_avro_schema_json
|
|
5
7
|
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Server
|
|
6
8
|
from datacontract.model.exceptions import DataContractException
|
|
7
9
|
|
|
8
10
|
|
|
9
|
-
def create_spark_session(
|
|
11
|
+
def create_spark_session():
|
|
10
12
|
"""Create and configure a Spark session."""
|
|
11
13
|
|
|
12
14
|
try:
|
|
@@ -21,6 +23,9 @@ def create_spark_session(tmp_dir: str):
|
|
|
21
23
|
original_exception=e,
|
|
22
24
|
)
|
|
23
25
|
|
|
26
|
+
tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract-cli-spark")
|
|
27
|
+
atexit.register(tmp_dir.cleanup)
|
|
28
|
+
|
|
24
29
|
spark = (
|
|
25
30
|
SparkSession.builder.appName("datacontract")
|
|
26
31
|
.config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
|
|
@@ -37,7 +42,7 @@ def create_spark_session(tmp_dir: str):
|
|
|
37
42
|
return spark
|
|
38
43
|
|
|
39
44
|
|
|
40
|
-
def read_kafka_topic(spark, data_contract: DataContractSpecification, server: Server
|
|
45
|
+
def read_kafka_topic(spark, data_contract: DataContractSpecification, server: Server):
|
|
41
46
|
"""Read and process data from a Kafka topic based on the server configuration."""
|
|
42
47
|
|
|
43
48
|
logging.info("Reading data from Kafka server %s topic %s", server.host, server.topic)
|
|
@@ -62,7 +67,7 @@ def read_kafka_topic(spark, data_contract: DataContractSpecification, server: Se
|
|
|
62
67
|
type="test",
|
|
63
68
|
name="Configuring Kafka checks",
|
|
64
69
|
result="warning",
|
|
65
|
-
reason=f"Kafka format '{server.format}' is not supported.
|
|
70
|
+
reason=f"Kafka format '{server.format}' is not supported. Skip executing tests.",
|
|
66
71
|
engine="datacontract",
|
|
67
72
|
)
|
|
68
73
|
|
|
@@ -108,6 +108,8 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
|
|
|
108
108
|
elif field.type in ["time"]:
|
|
109
109
|
return "long"
|
|
110
110
|
elif field.type in ["object", "record", "struct"]:
|
|
111
|
+
if field.config is not None and "namespace" in field.config:
|
|
112
|
+
return to_avro_record(field_name, field.fields, field.description, field.config["namespace"])
|
|
111
113
|
return to_avro_record(field_name, field.fields, field.description, None)
|
|
112
114
|
elif field.type in ["binary"]:
|
|
113
115
|
return "bytes"
|
|
@@ -9,7 +9,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
|
|
|
9
9
|
|
|
10
10
|
class DbtExporter(Exporter):
|
|
11
11
|
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
12
|
-
return to_dbt_models_yaml(data_contract)
|
|
12
|
+
return to_dbt_models_yaml(data_contract, server)
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class DbtSourceExporter(Exporter):
|
|
@@ -27,15 +27,16 @@ class DbtStageExporter(Exporter):
|
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
|
|
30
|
+
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification, server: str = None):
|
|
31
31
|
dbt = {
|
|
32
32
|
"version": 2,
|
|
33
33
|
"models": [],
|
|
34
34
|
}
|
|
35
|
+
|
|
35
36
|
for model_key, model_value in data_contract_spec.models.items():
|
|
36
|
-
dbt_model = _to_dbt_model(model_key, model_value, data_contract_spec)
|
|
37
|
+
dbt_model = _to_dbt_model(model_key, model_value, data_contract_spec, adapter_type=server)
|
|
37
38
|
dbt["models"].append(dbt_model)
|
|
38
|
-
return yaml.
|
|
39
|
+
return yaml.safe_dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
|
|
@@ -60,7 +61,7 @@ def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: s
|
|
|
60
61
|
if data_contract_spec.info.owner is not None:
|
|
61
62
|
source["meta"] = {"owner": data_contract_spec.info.owner}
|
|
62
63
|
if data_contract_spec.info.description is not None:
|
|
63
|
-
source["description"] = data_contract_spec.info.description
|
|
64
|
+
source["description"] = data_contract_spec.info.description.strip().replace("\n", " ")
|
|
64
65
|
found_server = data_contract_spec.servers.get(server)
|
|
65
66
|
adapter_type = None
|
|
66
67
|
if found_server is not None:
|
|
@@ -87,14 +88,16 @@ def _to_dbt_source_table(
|
|
|
87
88
|
}
|
|
88
89
|
|
|
89
90
|
if model_value.description is not None:
|
|
90
|
-
dbt_model["description"] = model_value.description
|
|
91
|
+
dbt_model["description"] = model_value.description.strip().replace("\n", " ")
|
|
91
92
|
columns = _to_columns(data_contract_spec, model_value.fields, False, adapter_type)
|
|
92
93
|
if columns:
|
|
93
94
|
dbt_model["columns"] = columns
|
|
94
95
|
return dbt_model
|
|
95
96
|
|
|
96
97
|
|
|
97
|
-
def _to_dbt_model(
|
|
98
|
+
def _to_dbt_model(
|
|
99
|
+
model_key, model_value: Model, data_contract_spec: DataContractSpecification, adapter_type: Optional[str]
|
|
100
|
+
) -> dict:
|
|
98
101
|
dbt_model = {
|
|
99
102
|
"name": model_key,
|
|
100
103
|
}
|
|
@@ -108,8 +111,8 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
|
|
|
108
111
|
if _supports_constraints(model_type):
|
|
109
112
|
dbt_model["config"]["contract"] = {"enforced": True}
|
|
110
113
|
if model_value.description is not None:
|
|
111
|
-
dbt_model["description"] = model_value.description
|
|
112
|
-
columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type),
|
|
114
|
+
dbt_model["description"] = model_value.description.strip().replace("\n", " ")
|
|
115
|
+
columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type)
|
|
113
116
|
if columns:
|
|
114
117
|
dbt_model["columns"] = columns
|
|
115
118
|
return dbt_model
|
|
@@ -171,7 +174,7 @@ def _to_column(
|
|
|
171
174
|
{"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
|
|
172
175
|
)
|
|
173
176
|
if field.description is not None:
|
|
174
|
-
column["description"] = field.description
|
|
177
|
+
column["description"] = field.description.strip().replace("\n", " ")
|
|
175
178
|
if field.required:
|
|
176
179
|
if supports_constraints:
|
|
177
180
|
column.setdefault("constraints", []).append({"type": "not_null"})
|
datacontract/export/exporter.py
CHANGED
|
@@ -107,18 +107,6 @@ exporter_factory.register_lazy_exporter(
|
|
|
107
107
|
class_name="JsonSchemaExporter",
|
|
108
108
|
)
|
|
109
109
|
|
|
110
|
-
exporter_factory.register_lazy_exporter(
|
|
111
|
-
name=ExportFormat.odcs_v2,
|
|
112
|
-
module_path="datacontract.export.odcs_v2_exporter",
|
|
113
|
-
class_name="OdcsV2Exporter",
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
exporter_factory.register_lazy_exporter(
|
|
117
|
-
name=ExportFormat.odcs_v3,
|
|
118
|
-
module_path="datacontract.export.odcs_v3_exporter",
|
|
119
|
-
class_name="OdcsV3Exporter",
|
|
120
|
-
)
|
|
121
|
-
|
|
122
110
|
exporter_factory.register_lazy_exporter(
|
|
123
111
|
name=ExportFormat.odcs,
|
|
124
112
|
module_path="datacontract.export.odcs_v3_exporter",
|
|
@@ -19,7 +19,7 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
|
19
19
|
"name": data_contract_spec.info.title,
|
|
20
20
|
"version": data_contract_spec.info.version,
|
|
21
21
|
"domain": data_contract_spec.info.owner,
|
|
22
|
-
"status": data_contract_spec.info.status,
|
|
22
|
+
"status": to_status(data_contract_spec.info.status),
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
if data_contract_spec.terms is not None:
|
|
@@ -217,9 +217,9 @@ def to_property(field_name: str, field: Field) -> dict:
|
|
|
217
217
|
if field.description is not None:
|
|
218
218
|
property["description"] = field.description
|
|
219
219
|
if field.required is not None:
|
|
220
|
-
property["
|
|
220
|
+
property["nullable"] = not field.required
|
|
221
221
|
if field.unique is not None:
|
|
222
|
-
property["
|
|
222
|
+
property["unique"] = field.unique
|
|
223
223
|
if field.classification is not None:
|
|
224
224
|
property["classification"] = field.classification
|
|
225
225
|
if field.examples is not None:
|
|
@@ -312,3 +312,22 @@ def to_odcs_quality(quality):
|
|
|
312
312
|
if quality.implementation is not None:
|
|
313
313
|
quality_dict["implementation"] = quality.implementation
|
|
314
314
|
return quality_dict
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def to_status(status):
|
|
318
|
+
"""Convert the data contract status to ODCS v3 format."""
|
|
319
|
+
if status is None:
|
|
320
|
+
return "draft" # Default to draft if no status is provided
|
|
321
|
+
|
|
322
|
+
# Valid status values according to ODCS v3.0.1 spec
|
|
323
|
+
valid_statuses = ["proposed", "draft", "active", "deprecated", "retired"]
|
|
324
|
+
|
|
325
|
+
# Convert to lowercase for comparison
|
|
326
|
+
status_lower = status.lower()
|
|
327
|
+
|
|
328
|
+
# If status is already valid, return it as is
|
|
329
|
+
if status_lower in valid_statuses:
|
|
330
|
+
return status_lower
|
|
331
|
+
|
|
332
|
+
# Default to "draft" for any non-standard status
|
|
333
|
+
return "draft"
|