datacontract-cli 0.10.9__py3-none-any.whl → 0.10.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/cli.py +7 -0
- datacontract/data_contract.py +4 -2
- datacontract/engines/soda/check_soda_execute.py +5 -2
- datacontract/engines/soda/connections/duckdb.py +4 -0
- datacontract/export/avro_converter.py +1 -1
- datacontract/export/sodacl_converter.py +1 -1
- datacontract/imports/avro_importer.py +142 -8
- datacontract/imports/dbt_importer.py +117 -0
- datacontract/imports/glue_importer.py +2 -2
- datacontract/imports/importer.py +6 -1
- datacontract/imports/importer_factory.py +24 -6
- datacontract/imports/jsonschema_importer.py +6 -3
- datacontract/imports/spark_importer.py +134 -0
- datacontract/integration/publish_datamesh_manager.py +10 -5
- datacontract/lint/resolve.py +72 -27
- datacontract/lint/schema.py +24 -4
- datacontract/model/data_contract_specification.py +3 -0
- datacontract/templates/datacontract.html +1 -1
- datacontract/templates/index.html +1 -1
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.10.dist-info}/METADATA +114 -101
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.10.dist-info}/RECORD +25 -23
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.10.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.10.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.10.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.10.dist-info}/top_level.txt +0 -0
datacontract/cli.py
CHANGED
|
@@ -226,6 +226,12 @@ def import_(
|
|
|
226
226
|
unity_table_full_name: Annotated[
|
|
227
227
|
Optional[str], typer.Option(help="Full name of a table in the unity catalog")
|
|
228
228
|
] = None,
|
|
229
|
+
dbt_model: Annotated[
|
|
230
|
+
Optional[List[str]],
|
|
231
|
+
typer.Option(
|
|
232
|
+
help="List of models names to import from the dbt manifest file (repeat for multiple models names, leave empty for all models in the dataset)."
|
|
233
|
+
),
|
|
234
|
+
] = None,
|
|
229
235
|
):
|
|
230
236
|
"""
|
|
231
237
|
Create a data contract from the given source location. Prints to stdout.
|
|
@@ -238,6 +244,7 @@ def import_(
|
|
|
238
244
|
bigquery_project=bigquery_project,
|
|
239
245
|
bigquery_dataset=bigquery_dataset,
|
|
240
246
|
unity_table_full_name=unity_table_full_name,
|
|
247
|
+
dbt_model=dbt_model,
|
|
241
248
|
)
|
|
242
249
|
console.print(result.to_yaml())
|
|
243
250
|
|
datacontract/data_contract.py
CHANGED
|
@@ -4,7 +4,9 @@ import tempfile
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
import yaml
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from pyspark.sql import SparkSession
|
|
8
10
|
|
|
9
11
|
from datacontract.breaking.breaking import models_breaking_changes, quality_breaking_changes
|
|
10
12
|
from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
|
|
@@ -43,7 +45,7 @@ class DataContract:
|
|
|
43
45
|
examples: bool = False,
|
|
44
46
|
publish_url: str = None,
|
|
45
47
|
publish_to_opentelemetry: bool = False,
|
|
46
|
-
spark: SparkSession = None,
|
|
48
|
+
spark: "SparkSession" = None,
|
|
47
49
|
inline_definitions: bool = False,
|
|
48
50
|
inline_quality: bool = False,
|
|
49
51
|
):
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
if typing.TYPE_CHECKING:
|
|
5
|
+
from pyspark.sql import SparkSession
|
|
2
6
|
|
|
3
|
-
from pyspark.sql import SparkSession
|
|
4
7
|
from soda.scan import Scan
|
|
5
8
|
|
|
6
9
|
from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
|
|
@@ -17,7 +20,7 @@ from datacontract.model.run import Run, Check, Log
|
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
def check_soda_execute(
|
|
20
|
-
run: Run, data_contract: DataContractSpecification, server: Server, spark: SparkSession, tmp_dir
|
|
23
|
+
run: Run, data_contract: DataContractSpecification, server: Server, spark: "SparkSession", tmp_dir
|
|
21
24
|
):
|
|
22
25
|
if data_contract is None:
|
|
23
26
|
run.log_warn("Cannot run engine soda-core, as data contract is invalid")
|
|
@@ -50,6 +50,10 @@ def get_duckdb_connection(data_contract, server, run: Run):
|
|
|
50
50
|
)
|
|
51
51
|
elif server.format == "delta":
|
|
52
52
|
if server.type == "azure":
|
|
53
|
+
# After switching to native delta table support
|
|
54
|
+
# in https://github.com/datacontract/datacontract-cli/issues/258,
|
|
55
|
+
# azure storage should also work
|
|
56
|
+
# https://github.com/duckdb/duckdb_delta/issues/21
|
|
53
57
|
raise NotImplementedError("Support for Delta Tables on Azure Storage is not implemented yet")
|
|
54
58
|
|
|
55
59
|
storage_options = {
|
|
@@ -65,7 +65,7 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
|
|
|
65
65
|
if field.config["avroLogicalType"] in ["time-millis", "date"]:
|
|
66
66
|
return {"type": "int", "logicalType": field.config["avroLogicalType"]}
|
|
67
67
|
if "avroType" in field.config:
|
|
68
|
-
return field.config["
|
|
68
|
+
return field.config["avroType"]
|
|
69
69
|
|
|
70
70
|
if field.type is None:
|
|
71
71
|
return "null"
|
|
@@ -131,7 +131,7 @@ def check_field_minimum(field_name, minimum, quote_field_name: bool = False):
|
|
|
131
131
|
field_name = f'"{field_name}"'
|
|
132
132
|
return {
|
|
133
133
|
f"invalid_count({field_name}) = 0": {
|
|
134
|
-
"name": f"Check that field {field_name} has a minimum of {
|
|
134
|
+
"name": f"Check that field {field_name} has a minimum of {minimum}",
|
|
135
135
|
"valid min": minimum,
|
|
136
136
|
}
|
|
137
137
|
}
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
1
3
|
import avro.schema
|
|
2
4
|
|
|
3
5
|
from datacontract.imports.importer import Importer
|
|
@@ -6,13 +8,39 @@ from datacontract.model.exceptions import DataContractException
|
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
class AvroImporter(Importer):
|
|
11
|
+
"""Class to import Avro Schema file"""
|
|
12
|
+
|
|
9
13
|
def import_source(
|
|
10
14
|
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
11
|
-
) ->
|
|
15
|
+
) -> DataContractSpecification:
|
|
16
|
+
"""
|
|
17
|
+
Import Avro schema from a source file.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
data_contract_specification: The data contract specification to update.
|
|
21
|
+
source: The path to the Avro schema file.
|
|
22
|
+
import_args: Additional import arguments.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
The updated data contract specification.
|
|
26
|
+
"""
|
|
12
27
|
return import_avro(data_contract_specification, source)
|
|
13
28
|
|
|
14
29
|
|
|
15
30
|
def import_avro(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
|
|
31
|
+
"""
|
|
32
|
+
Import an Avro schema from a file and update the data contract specification.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
data_contract_specification: The data contract specification to update.
|
|
36
|
+
source: The path to the Avro schema file.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
DataContractSpecification: The updated data contract specification.
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
DataContractException: If there's an error parsing the Avro schema.
|
|
43
|
+
"""
|
|
16
44
|
if data_contract_specification.models is None:
|
|
17
45
|
data_contract_specification.models = {}
|
|
18
46
|
|
|
@@ -45,7 +73,14 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
|
|
|
45
73
|
return data_contract_specification
|
|
46
74
|
|
|
47
75
|
|
|
48
|
-
def handle_config_avro_custom_properties(field, imported_field):
|
|
76
|
+
def handle_config_avro_custom_properties(field: avro.schema.Field, imported_field: Field) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Handle custom Avro properties and add them to the imported field's config.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
field: The Avro field.
|
|
82
|
+
imported_field: The imported field to update.
|
|
83
|
+
"""
|
|
49
84
|
if field.get_prop("logicalType") is not None:
|
|
50
85
|
if imported_field.config is None:
|
|
51
86
|
imported_field.config = {}
|
|
@@ -57,7 +92,16 @@ def handle_config_avro_custom_properties(field, imported_field):
|
|
|
57
92
|
imported_field.config["avroDefault"] = field.default
|
|
58
93
|
|
|
59
94
|
|
|
60
|
-
def import_record_fields(record_fields):
|
|
95
|
+
def import_record_fields(record_fields: List[avro.schema.Field]) -> Dict[str, Field]:
|
|
96
|
+
"""
|
|
97
|
+
Import Avro record fields and convert them to data contract fields.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
record_fields: List of Avro record fields.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
A dictionary of imported fields.
|
|
104
|
+
"""
|
|
61
105
|
imported_fields = {}
|
|
62
106
|
for field in record_fields:
|
|
63
107
|
imported_field = Field()
|
|
@@ -83,6 +127,15 @@ def import_record_fields(record_fields):
|
|
|
83
127
|
elif field.type.type == "array":
|
|
84
128
|
imported_field.type = "array"
|
|
85
129
|
imported_field.items = import_avro_array_items(field.type)
|
|
130
|
+
elif field.type.type == "map":
|
|
131
|
+
imported_field.type = "map"
|
|
132
|
+
imported_field.values = import_avro_map_values(field.type)
|
|
133
|
+
elif field.type.type == "enum":
|
|
134
|
+
imported_field.type = "string"
|
|
135
|
+
imported_field.enum = field.type.symbols
|
|
136
|
+
if not imported_field.config:
|
|
137
|
+
imported_field.config = {}
|
|
138
|
+
imported_field.config["avroType"] = "enum"
|
|
86
139
|
else: # primitive type
|
|
87
140
|
imported_field.type = map_type_from_avro(field.type.type)
|
|
88
141
|
|
|
@@ -91,7 +144,16 @@ def import_record_fields(record_fields):
|
|
|
91
144
|
return imported_fields
|
|
92
145
|
|
|
93
146
|
|
|
94
|
-
def import_avro_array_items(array_schema):
|
|
147
|
+
def import_avro_array_items(array_schema: avro.schema.ArraySchema) -> Field:
|
|
148
|
+
"""
|
|
149
|
+
Import Avro array items and convert them to a data contract field.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
array_schema: The Avro array schema.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Field: The imported field representing the array items.
|
|
156
|
+
"""
|
|
95
157
|
items = Field()
|
|
96
158
|
for prop in array_schema.other_props:
|
|
97
159
|
items.__setattr__(prop, array_schema.other_props[prop])
|
|
@@ -108,7 +170,45 @@ def import_avro_array_items(array_schema):
|
|
|
108
170
|
return items
|
|
109
171
|
|
|
110
172
|
|
|
111
|
-
def
|
|
173
|
+
def import_avro_map_values(map_schema: avro.schema.MapSchema) -> Field:
|
|
174
|
+
"""
|
|
175
|
+
Import Avro map values and convert them to a data contract field.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
map_schema: The Avro map schema.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Field: The imported field representing the map values.
|
|
182
|
+
"""
|
|
183
|
+
values = Field()
|
|
184
|
+
for prop in map_schema.other_props:
|
|
185
|
+
values.__setattr__(prop, map_schema.other_props[prop])
|
|
186
|
+
|
|
187
|
+
if map_schema.values.type == "record":
|
|
188
|
+
values.type = "object"
|
|
189
|
+
values.fields = import_record_fields(map_schema.values.fields)
|
|
190
|
+
elif map_schema.values.type == "array":
|
|
191
|
+
values.type = "array"
|
|
192
|
+
values.items = import_avro_array_items(map_schema.values)
|
|
193
|
+
else: # primitive type
|
|
194
|
+
values.type = map_type_from_avro(map_schema.values.type)
|
|
195
|
+
|
|
196
|
+
return values
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def import_type_of_optional_field(field: avro.schema.Field) -> str:
|
|
200
|
+
"""
|
|
201
|
+
Determine the type of optional field in an Avro union.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
field: The Avro field with a union type.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
str: The mapped type of the non-null field in the union.
|
|
208
|
+
|
|
209
|
+
Raises:
|
|
210
|
+
DataContractException: If no non-null type is found in the union.
|
|
211
|
+
"""
|
|
112
212
|
for field_type in field.type.schemas:
|
|
113
213
|
if field_type.type != "null":
|
|
114
214
|
return map_type_from_avro(field_type.type)
|
|
@@ -121,21 +221,51 @@ def import_type_of_optional_field(field):
|
|
|
121
221
|
)
|
|
122
222
|
|
|
123
223
|
|
|
124
|
-
def get_record_from_union_field(field):
|
|
224
|
+
def get_record_from_union_field(field: avro.schema.Field) -> avro.schema.RecordSchema | None:
|
|
225
|
+
"""
|
|
226
|
+
Get the record schema from a union field.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
field: The Avro field with a union type.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
The record schema if found, None otherwise.
|
|
233
|
+
"""
|
|
125
234
|
for field_type in field.type.schemas:
|
|
126
235
|
if field_type.type == "record":
|
|
127
236
|
return field_type
|
|
128
237
|
return None
|
|
129
238
|
|
|
130
239
|
|
|
131
|
-
def get_array_from_union_field(field):
|
|
240
|
+
def get_array_from_union_field(field: avro.schema.Field) -> avro.schema.ArraySchema | None:
|
|
241
|
+
"""
|
|
242
|
+
Get the array schema from a union field.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
field: The Avro field with a union type.
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
The array schema if found, None otherwise.
|
|
249
|
+
"""
|
|
132
250
|
for field_type in field.type.schemas:
|
|
133
251
|
if field_type.type == "array":
|
|
134
252
|
return field_type
|
|
135
253
|
return None
|
|
136
254
|
|
|
137
255
|
|
|
138
|
-
def map_type_from_avro(avro_type_str: str):
|
|
256
|
+
def map_type_from_avro(avro_type_str: str) -> str:
|
|
257
|
+
"""
|
|
258
|
+
Map Avro type strings to data contract type strings.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
avro_type_str (str): The Avro type string.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
str: The corresponding data contract type string.
|
|
265
|
+
|
|
266
|
+
Raises:
|
|
267
|
+
DataContractException: If the Avro type is unsupported.
|
|
268
|
+
"""
|
|
139
269
|
# TODO: ambiguous mapping in the export
|
|
140
270
|
if avro_type_str == "null":
|
|
141
271
|
return "null"
|
|
@@ -155,6 +285,10 @@ def map_type_from_avro(avro_type_str: str):
|
|
|
155
285
|
return "record"
|
|
156
286
|
elif avro_type_str == "array":
|
|
157
287
|
return "array"
|
|
288
|
+
elif avro_type_str == "map":
|
|
289
|
+
return "map"
|
|
290
|
+
elif avro_type_str == "enum":
|
|
291
|
+
return "string"
|
|
158
292
|
else:
|
|
159
293
|
raise DataContractException(
|
|
160
294
|
type="schema",
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from typing import (
|
|
4
|
+
List,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
from datacontract.imports.importer import Importer
|
|
8
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DbtManifestImporter(Importer):
|
|
12
|
+
def import_source(
|
|
13
|
+
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
14
|
+
) -> dict:
|
|
15
|
+
data = read_dbt_manifest(manifest_path=source)
|
|
16
|
+
return import_dbt_manifest(
|
|
17
|
+
data_contract_specification, manifest_dict=data, dbt_models=import_args.get("dbt_model")
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def import_dbt_manifest(
|
|
22
|
+
data_contract_specification: DataContractSpecification, manifest_dict: dict, dbt_models: List[str]
|
|
23
|
+
):
|
|
24
|
+
data_contract_specification.info.title = manifest_dict.get("info").get("project_name")
|
|
25
|
+
data_contract_specification.info.dbt_version = manifest_dict.get("info").get("dbt_version")
|
|
26
|
+
|
|
27
|
+
if data_contract_specification.models is None:
|
|
28
|
+
data_contract_specification.models = {}
|
|
29
|
+
|
|
30
|
+
for model in manifest_dict.get("models", []):
|
|
31
|
+
if dbt_models and model.name not in dbt_models:
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
dc_model = Model(
|
|
35
|
+
description=model.description,
|
|
36
|
+
tags=model.tags,
|
|
37
|
+
fields=create_fields(model.columns),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
data_contract_specification.models[model.name] = dc_model
|
|
41
|
+
|
|
42
|
+
return data_contract_specification
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def create_fields(columns: List):
|
|
46
|
+
fields = {}
|
|
47
|
+
for column in columns:
|
|
48
|
+
field = Field(
|
|
49
|
+
description=column.description, type=column.data_type if column.data_type else "", tags=column.tags
|
|
50
|
+
)
|
|
51
|
+
fields[column.name] = field
|
|
52
|
+
|
|
53
|
+
return fields
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def read_dbt_manifest(manifest_path: str):
|
|
57
|
+
with open(manifest_path, "r", encoding="utf-8") as f:
|
|
58
|
+
manifest = json.load(f)
|
|
59
|
+
return {"info": manifest.get("metadata"), "models": create_manifest_models(manifest)}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def create_manifest_models(manifest: dict) -> List:
|
|
63
|
+
models = []
|
|
64
|
+
nodes = manifest.get("nodes")
|
|
65
|
+
|
|
66
|
+
for node in nodes.values():
|
|
67
|
+
if node["resource_type"] != "model":
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
models.append(DbtModel(node))
|
|
71
|
+
return models
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class DbtColumn:
|
|
75
|
+
name: str
|
|
76
|
+
description: str
|
|
77
|
+
data_type: str
|
|
78
|
+
meta: dict
|
|
79
|
+
tags: List
|
|
80
|
+
|
|
81
|
+
def __init__(self, node_column: dict):
|
|
82
|
+
self.name = node_column.get("name")
|
|
83
|
+
self.description = node_column.get("description")
|
|
84
|
+
self.data_type = node_column.get("data_type")
|
|
85
|
+
self.meta = node_column.get("meta", {})
|
|
86
|
+
self.tags = node_column.get("tags", [])
|
|
87
|
+
|
|
88
|
+
def __repr__(self) -> str:
|
|
89
|
+
return self.name
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class DbtModel:
|
|
93
|
+
name: str
|
|
94
|
+
database: str
|
|
95
|
+
schema: str
|
|
96
|
+
description: str
|
|
97
|
+
unique_id: str
|
|
98
|
+
tags: List
|
|
99
|
+
|
|
100
|
+
def __init__(self, node: dict):
|
|
101
|
+
self.name = node.get("name")
|
|
102
|
+
self.database = node.get("database")
|
|
103
|
+
self.schema = node.get("schema")
|
|
104
|
+
self.description = node.get("description")
|
|
105
|
+
self.display_name = node.get("display_name")
|
|
106
|
+
self.unique_id = node.get("unique_id")
|
|
107
|
+
self.columns = []
|
|
108
|
+
self.tags = node.get("tags")
|
|
109
|
+
if node.get("columns"):
|
|
110
|
+
self.add_columns(node.get("columns").values())
|
|
111
|
+
|
|
112
|
+
def add_columns(self, model_columns: List):
|
|
113
|
+
for column in model_columns:
|
|
114
|
+
self.columns.append(DbtColumn(column))
|
|
115
|
+
|
|
116
|
+
def __repr__(self) -> str:
|
|
117
|
+
return self.name
|
|
@@ -14,7 +14,7 @@ class GlueImporter(Importer):
|
|
|
14
14
|
def import_source(
|
|
15
15
|
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
16
16
|
) -> dict:
|
|
17
|
-
return import_glue(data_contract_specification, source, import_args.get("
|
|
17
|
+
return import_glue(data_contract_specification, source, import_args.get("glue_table"))
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def get_glue_database(database_name: str):
|
|
@@ -154,7 +154,7 @@ def import_glue(
|
|
|
154
154
|
for column in table_schema:
|
|
155
155
|
field = create_typed_field(column["Type"])
|
|
156
156
|
|
|
157
|
-
# hive
|
|
157
|
+
# hive partitions are required, but are not primary keys
|
|
158
158
|
if column.get("Hive"):
|
|
159
159
|
field.required = True
|
|
160
160
|
|
datacontract/imports/importer.py
CHANGED
|
@@ -10,7 +10,10 @@ class Importer(ABC):
|
|
|
10
10
|
|
|
11
11
|
@abstractmethod
|
|
12
12
|
def import_source(
|
|
13
|
-
self,
|
|
13
|
+
self,
|
|
14
|
+
data_contract_specification: DataContractSpecification,
|
|
15
|
+
source: str,
|
|
16
|
+
import_args: dict,
|
|
14
17
|
) -> dict:
|
|
15
18
|
pass
|
|
16
19
|
|
|
@@ -18,11 +21,13 @@ class Importer(ABC):
|
|
|
18
21
|
class ImportFormat(str, Enum):
|
|
19
22
|
sql = "sql"
|
|
20
23
|
avro = "avro"
|
|
24
|
+
dbt = "dbt"
|
|
21
25
|
glue = "glue"
|
|
22
26
|
jsonschema = "jsonschema"
|
|
23
27
|
bigquery = "bigquery"
|
|
24
28
|
odcs = "odcs"
|
|
25
29
|
unity = "unity"
|
|
30
|
+
spark = "spark"
|
|
26
31
|
|
|
27
32
|
@classmethod
|
|
28
33
|
def get_suported_formats(cls):
|
|
@@ -18,7 +18,7 @@ class ImporterFactory:
|
|
|
18
18
|
importers = self.dict_importer.copy()
|
|
19
19
|
importers.update(self.dict_lazy_importer.copy())
|
|
20
20
|
if name not in importers.keys():
|
|
21
|
-
raise ValueError(f"The '{name}' format is not
|
|
21
|
+
raise ValueError(f"The '{name}' format is not supported.")
|
|
22
22
|
importer_class = importers[name]
|
|
23
23
|
if type(importers[name]) is tuple:
|
|
24
24
|
importer_class = load_module_class(module_path=importers[name][0], class_name=importers[name][1])
|
|
@@ -46,7 +46,9 @@ def load_module_class(module_path, class_name):
|
|
|
46
46
|
|
|
47
47
|
importer_factory = ImporterFactory()
|
|
48
48
|
importer_factory.register_lazy_importer(
|
|
49
|
-
name=ImportFormat.avro,
|
|
49
|
+
name=ImportFormat.avro,
|
|
50
|
+
module_path="datacontract.imports.avro_importer",
|
|
51
|
+
class_name="AvroImporter",
|
|
50
52
|
)
|
|
51
53
|
importer_factory.register_lazy_importer(
|
|
52
54
|
name=ImportFormat.bigquery,
|
|
@@ -54,7 +56,9 @@ importer_factory.register_lazy_importer(
|
|
|
54
56
|
class_name="BigQueryImporter",
|
|
55
57
|
)
|
|
56
58
|
importer_factory.register_lazy_importer(
|
|
57
|
-
name=ImportFormat.glue,
|
|
59
|
+
name=ImportFormat.glue,
|
|
60
|
+
module_path="datacontract.imports.glue_importer",
|
|
61
|
+
class_name="GlueImporter",
|
|
58
62
|
)
|
|
59
63
|
importer_factory.register_lazy_importer(
|
|
60
64
|
name=ImportFormat.jsonschema,
|
|
@@ -62,11 +66,25 @@ importer_factory.register_lazy_importer(
|
|
|
62
66
|
class_name="JsonSchemaImporter",
|
|
63
67
|
)
|
|
64
68
|
importer_factory.register_lazy_importer(
|
|
65
|
-
name=ImportFormat.odcs,
|
|
69
|
+
name=ImportFormat.odcs,
|
|
70
|
+
module_path="datacontract.imports.odcs_importer",
|
|
71
|
+
class_name="OdcsImporter",
|
|
66
72
|
)
|
|
67
73
|
importer_factory.register_lazy_importer(
|
|
68
|
-
name=ImportFormat.sql,
|
|
74
|
+
name=ImportFormat.sql,
|
|
75
|
+
module_path="datacontract.imports.sql_importer",
|
|
76
|
+
class_name="SqlImporter",
|
|
69
77
|
)
|
|
70
78
|
importer_factory.register_lazy_importer(
|
|
71
|
-
name=ImportFormat.unity,
|
|
79
|
+
name=ImportFormat.unity,
|
|
80
|
+
module_path="datacontract.imports.unity_importer",
|
|
81
|
+
class_name="UnityImporter",
|
|
82
|
+
)
|
|
83
|
+
importer_factory.register_lazy_importer(
|
|
84
|
+
name=ImportFormat.spark,
|
|
85
|
+
module_path="datacontract.imports.spark_importer",
|
|
86
|
+
class_name="SparkImporter",
|
|
87
|
+
)
|
|
88
|
+
importer_factory.register_lazy_importer(
|
|
89
|
+
name=ImportFormat.dbt, module_path="datacontract.imports.dbt_importer", class_name="DbtManifestImporter"
|
|
72
90
|
)
|
|
@@ -64,11 +64,14 @@ def convert_json_schema_properties(properties, is_definition=False):
|
|
|
64
64
|
case "tags":
|
|
65
65
|
field_kwargs["tags"] = value
|
|
66
66
|
case "properties":
|
|
67
|
-
field_kwargs["fields"] = convert_json_schema_properties(value)
|
|
67
|
+
field_kwargs["fields"] = convert_json_schema_properties(value, is_definition=is_definition)
|
|
68
68
|
case "items":
|
|
69
|
-
field_kwargs["items"] = convert_json_schema_properties(value)
|
|
69
|
+
field_kwargs["items"] = convert_json_schema_properties(value, is_definition=is_definition)
|
|
70
70
|
|
|
71
|
-
|
|
71
|
+
if is_definition:
|
|
72
|
+
field = Definition(**field_kwargs)
|
|
73
|
+
else:
|
|
74
|
+
field = Field(**field_kwargs)
|
|
72
75
|
fields[field_name] = field
|
|
73
76
|
|
|
74
77
|
return fields
|