datacontract-cli 0.10.8__py3-none-any.whl → 0.10.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (36) hide show
  1. datacontract/catalog/catalog.py +4 -2
  2. datacontract/cli.py +29 -18
  3. datacontract/data_contract.py +9 -51
  4. datacontract/engines/soda/check_soda_execute.py +5 -0
  5. datacontract/engines/soda/connections/duckdb.py +28 -12
  6. datacontract/engines/soda/connections/trino.py +26 -0
  7. datacontract/export/exporter.py +3 -2
  8. datacontract/export/exporter_factory.py +132 -39
  9. datacontract/export/jsonschema_converter.py +7 -7
  10. datacontract/export/sodacl_converter.py +16 -11
  11. datacontract/export/spark_converter.py +211 -0
  12. datacontract/export/sql_type_converter.py +28 -0
  13. datacontract/imports/avro_importer.py +8 -0
  14. datacontract/imports/bigquery_importer.py +17 -0
  15. datacontract/imports/glue_importer.py +115 -32
  16. datacontract/imports/importer.py +29 -0
  17. datacontract/imports/importer_factory.py +72 -0
  18. datacontract/imports/jsonschema_importer.py +8 -0
  19. datacontract/imports/odcs_importer.py +8 -0
  20. datacontract/imports/sql_importer.py +8 -0
  21. datacontract/imports/unity_importer.py +23 -9
  22. datacontract/lint/resolve.py +22 -1
  23. datacontract/model/data_contract_specification.py +34 -4
  24. datacontract/templates/datacontract.html +17 -2
  25. datacontract/templates/partials/datacontract_information.html +20 -0
  26. datacontract/templates/partials/datacontract_terms.html +7 -0
  27. datacontract/templates/partials/definition.html +9 -1
  28. datacontract/templates/partials/model_field.html +23 -6
  29. datacontract/templates/partials/server.html +49 -16
  30. datacontract/templates/style/output.css +42 -0
  31. {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.9.dist-info}/METADATA +203 -28
  32. {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.9.dist-info}/RECORD +36 -32
  33. {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.9.dist-info}/WHEEL +1 -1
  34. {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.9.dist-info}/LICENSE +0 -0
  35. {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.9.dist-info}/entry_points.txt +0 -0
  36. {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,211 @@
1
+ from pyspark.sql import types
2
+ from datacontract.model.data_contract_specification import (
3
+ DataContractSpecification,
4
+ Model,
5
+ Field,
6
+ )
7
+ from datacontract.export.exporter import Exporter
8
+
9
+
10
+ class SparkExporter(Exporter):
11
+ """
12
+ Exporter class for exporting data contracts to Spark schemas.
13
+ """
14
+
15
+ def export(
16
+ self,
17
+ data_contract: DataContractSpecification,
18
+ model,
19
+ server,
20
+ sql_server_type,
21
+ export_args,
22
+ ) -> dict[str, types.StructType]:
23
+ """
24
+ Export the given data contract to Spark schemas.
25
+
26
+ Args:
27
+ data_contract (DataContractSpecification): The data contract specification.
28
+ model: Not used in this implementation.
29
+ server: Not used in this implementation.
30
+ sql_server_type: Not used in this implementation.
31
+ export_args: Additional arguments for export.
32
+
33
+ Returns:
34
+ dict[str, types.StructType]: A dictionary mapping model names to their corresponding Spark schemas.
35
+ """
36
+ return to_spark(data_contract)
37
+
38
+
39
+ def to_spark(contract: DataContractSpecification) -> str:
40
+ """
41
+ Converts a DataContractSpecification into a Spark schema string.
42
+
43
+ Args:
44
+ contract (DataContractSpecification): The data contract specification containing models.
45
+
46
+ Returns:
47
+ str: A string representation of the Spark schema for each model in the contract.
48
+ """
49
+ return "\n\n".join(
50
+ f"{model_name} = {print_schema(to_spark_schema(model))}" for model_name, model in contract.models.items()
51
+ )
52
+
53
+
54
+ def to_spark_dict(contract: DataContractSpecification) -> dict[str, types.StructType]:
55
+ """
56
+ Convert a data contract specification to Spark schemas.
57
+
58
+ Args:
59
+ contract (DataContractSpecification): The data contract specification.
60
+
61
+ Returns:
62
+ dict[str, types.StructType]: A dictionary mapping model names to their corresponding Spark schemas.
63
+ """
64
+ return {model_name: to_spark_schema(model) for model_name, model in contract.models.items()}
65
+
66
+
67
+ def to_spark_schema(model: Model) -> types.StructType:
68
+ """
69
+ Convert a model to a Spark schema.
70
+
71
+ Args:
72
+ model (Model): The model to convert.
73
+
74
+ Returns:
75
+ types.StructType: The corresponding Spark schema.
76
+ """
77
+ return to_struct_type(model.fields)
78
+
79
+
80
+ def to_struct_type(fields: dict[str, Field]) -> types.StructType:
81
+ """
82
+ Convert a dictionary of fields to a Spark StructType.
83
+
84
+ Args:
85
+ fields (dict[str, Field]): The fields to convert.
86
+
87
+ Returns:
88
+ types.StructType: The corresponding Spark StructType.
89
+ """
90
+ struct_fields = [to_struct_field(field, field_name) for field_name, field in fields.items()]
91
+ return types.StructType(struct_fields)
92
+
93
+
94
+ def to_struct_field(field: Field, field_name: str) -> types.StructField:
95
+ """
96
+ Convert a field to a Spark StructField.
97
+
98
+ Args:
99
+ field (Field): The field to convert.
100
+ field_name (str): The name of the field.
101
+
102
+ Returns:
103
+ types.StructField: The corresponding Spark StructField.
104
+ """
105
+ data_type = to_data_type(field)
106
+ return types.StructField(name=field_name, dataType=data_type, nullable=not field.required)
107
+
108
+
109
+ def to_data_type(field: Field) -> types.DataType:
110
+ """
111
+ Convert a field to a Spark DataType.
112
+
113
+ Args:
114
+ field (Field): The field to convert.
115
+
116
+ Returns:
117
+ types.DataType: The corresponding Spark DataType.
118
+ """
119
+ field_type = field.type
120
+ if field_type is None or field_type in ["null"]:
121
+ return types.NullType()
122
+ if field_type == "array":
123
+ return types.ArrayType(to_data_type(field.items))
124
+ if field_type in ["object", "record", "struct"]:
125
+ return types.StructType(to_struct_type(field.fields))
126
+ if field_type in ["string", "varchar", "text"]:
127
+ return types.StringType()
128
+ if field_type in ["number", "decimal", "numeric"]:
129
+ return types.DecimalType()
130
+ if field_type in ["integer", "int"]:
131
+ return types.IntegerType()
132
+ if field_type == "long":
133
+ return types.LongType()
134
+ if field_type == "float":
135
+ return types.FloatType()
136
+ if field_type == "double":
137
+ return types.DoubleType()
138
+ if field_type == "boolean":
139
+ return types.BooleanType()
140
+ if field_type in ["timestamp", "timestamp_tz"]:
141
+ return types.TimestampType()
142
+ if field_type == "timestamp_ntz":
143
+ return types.TimestampNTZType()
144
+ if field_type == "date":
145
+ return types.DateType()
146
+ if field_type == "bytes":
147
+ return types.BinaryType()
148
+ return types.BinaryType()
149
+
150
+
151
+ def print_schema(dtype: types.DataType) -> str:
152
+ """
153
+ Converts a PySpark DataType schema to its equivalent code representation.
154
+
155
+ Args:
156
+ dtype (types.DataType): The PySpark DataType schema to be converted.
157
+
158
+ Returns:
159
+ str: The code representation of the PySpark DataType schema.
160
+ """
161
+
162
+ def indent(text: str, level: int) -> str:
163
+ """
164
+ Indents each line of the given text by a specified number of levels.
165
+
166
+ Args:
167
+ text (str): The text to be indented.
168
+ level (int): The number of indentation levels.
169
+
170
+ Returns:
171
+ str: The indented text.
172
+ """
173
+ return "\n".join([f'{" " * level}{line}' for line in text.split("\n")])
174
+
175
+ def repr_column(column: types.StructField) -> str:
176
+ """
177
+ Converts a PySpark StructField to its code representation.
178
+
179
+ Args:
180
+ column (types.StructField): The StructField to be converted.
181
+
182
+ Returns:
183
+ str: The code representation of the StructField.
184
+ """
185
+ name = f'"{column.name}"'
186
+ data_type = indent(print_schema(column.dataType), 1)
187
+ nullable = indent(f"{column.nullable}", 1)
188
+ return f"StructField({name},\n{data_type},\n{nullable}\n)"
189
+
190
+ def format_struct_type(struct_type: types.StructType) -> str:
191
+ """
192
+ Converts a PySpark StructType to its code representation.
193
+
194
+ Args:
195
+ struct_type (types.StructType): The StructType to be converted.
196
+
197
+ Returns:
198
+ str: The code representation of the StructType.
199
+ """
200
+ fields = ",\n".join([indent(repr_column(field), 1) for field in struct_type.fields])
201
+ return f"StructType([\n{fields}\n])"
202
+
203
+ if isinstance(dtype, types.StructType):
204
+ return format_struct_type(dtype)
205
+ elif isinstance(dtype, types.ArrayType):
206
+ return f"ArrayType({print_schema(dtype.elementType)})"
207
+ elif isinstance(dtype, types.DecimalType):
208
+ return f"DecimalType({dtype.precision}, {dtype.scale})"
209
+ else:
210
+ dtype_str = str(dtype)
211
+ return dtype_str if dtype_str.endswith("()") else f"{dtype_str}()"
@@ -15,6 +15,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
15
15
  return convert_type_to_sqlserver(field)
16
16
  elif server_type == "bigquery":
17
17
  return convert_type_to_bigquery(field)
18
+ elif server_type == "trino":
19
+ return convert_type_to_trino(field)
18
20
  return field.type
19
21
 
20
22
 
@@ -249,3 +251,29 @@ def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
249
251
  if not field.config:
250
252
  return None
251
253
  return field.config.get(config_attr, None)
254
+
255
+
256
+ def convert_type_to_trino(field: Field) -> None | str:
257
+ """Convert from supported datacontract types to equivalent trino types"""
258
+ field_type = field.type
259
+
260
+ if field_type.lower() in ["string", "text", "varchar"]:
261
+ return "varchar"
262
+ # tinyint, smallint not supported by data contract
263
+ if field_type.lower() in ["number", "decimal", "numeric"]:
264
+ # precision and scale not supported by data contract
265
+ return "decimal"
266
+ if field_type.lower() in ["int", "integer"]:
267
+ return "integer"
268
+ if field_type.lower() in ["long", "bigint"]:
269
+ return "bigint"
270
+ if field_type.lower() in ["float"]:
271
+ return "real"
272
+ if field_type.lower() in ["timestamp", "timestamp_tz"]:
273
+ return "timestamp(3) with time zone"
274
+ if field_type.lower() in ["timestamp_ntz"]:
275
+ return "timestamp(3)"
276
+ if field_type.lower() in ["bytes"]:
277
+ return "varbinary"
278
+ if field_type.lower() in ["object", "record", "struct"]:
279
+ return "json"
@@ -1,9 +1,17 @@
1
1
  import avro.schema
2
2
 
3
+ from datacontract.imports.importer import Importer
3
4
  from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
4
5
  from datacontract.model.exceptions import DataContractException
5
6
 
6
7
 
8
+ class AvroImporter(Importer):
9
+ def import_source(
10
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
11
+ ) -> dict:
12
+ return import_avro(data_contract_specification, source)
13
+
14
+
7
15
  def import_avro(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
8
16
  if data_contract_specification.models is None:
9
17
  data_contract_specification.models = {}
@@ -2,10 +2,27 @@ import json
2
2
  import logging
3
3
  from typing import List
4
4
 
5
+ from datacontract.imports.importer import Importer
5
6
  from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
7
  from datacontract.model.exceptions import DataContractException
7
8
 
8
9
 
10
+ class BigQueryImporter(Importer):
11
+ def import_source(
12
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
13
+ ) -> dict:
14
+ if source is not None:
15
+ data_contract_specification = import_bigquery_from_json(data_contract_specification, source)
16
+ else:
17
+ data_contract_specification = import_bigquery_from_api(
18
+ data_contract_specification,
19
+ import_args.get("bigquery_tables"),
20
+ import_args.get("bigquery_project"),
21
+ import_args.get("bigquery_dataset"),
22
+ )
23
+ return data_contract_specification
24
+
25
+
9
26
  def import_bigquery_from_json(
10
27
  data_contract_specification: DataContractSpecification, source: str
11
28
  ) -> DataContractSpecification:
@@ -1,6 +1,7 @@
1
1
  import boto3
2
2
  from typing import List
3
3
 
4
+ from datacontract.imports.importer import Importer
4
5
  from datacontract.model.data_contract_specification import (
5
6
  DataContractSpecification,
6
7
  Model,
@@ -9,7 +10,14 @@ from datacontract.model.data_contract_specification import (
9
10
  )
10
11
 
11
12
 
12
- def get_glue_database(datebase_name: str):
13
+ class GlueImporter(Importer):
14
+ def import_source(
15
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
16
+ ) -> dict:
17
+ return import_glue(data_contract_specification, source, import_args.get("glue_tables"))
18
+
19
+
20
+ def get_glue_database(database_name: str):
13
21
  """Get the details Glue database.
14
22
 
15
23
  Args:
@@ -18,31 +26,32 @@ def get_glue_database(datebase_name: str):
18
26
  Returns:
19
27
  set: catalogid and locationUri
20
28
  """
21
-
22
29
  glue = boto3.client("glue")
23
30
  try:
24
- response = glue.get_database(Name=datebase_name)
31
+ response = glue.get_database(Name=database_name)
25
32
  except glue.exceptions.EntityNotFoundException:
26
- print(f"Database not found {datebase_name}.")
33
+ print(f"Database not found {database_name}.")
27
34
  return (None, None)
28
35
  except Exception as e:
29
36
  # todo catch all
30
37
  print(f"Error: {e}")
31
38
  return (None, None)
32
39
 
33
- return (response["Database"]["CatalogId"], response["Database"].get("LocationUri", "None"))
40
+ return (
41
+ response["Database"]["CatalogId"],
42
+ response["Database"].get("LocationUri", "None"),
43
+ )
34
44
 
35
45
 
36
46
  def get_glue_tables(database_name: str) -> List[str]:
37
47
  """Get the list of tables in a Glue database.
38
48
 
39
49
  Args:
40
- database_name (str): glue database to request.
50
+ database_name (str): Glue database to request.
41
51
 
42
52
  Returns:
43
- List[string]: List of table names
53
+ List[str]: List of table names
44
54
  """
45
-
46
55
  glue = boto3.client("glue")
47
56
 
48
57
  # Set the paginator
@@ -107,9 +116,21 @@ def get_glue_table_schema(database_name: str, table_name: str):
107
116
  return table_schema
108
117
 
109
118
 
110
- def import_glue(data_contract_specification: DataContractSpecification, source: str, table_names: List[str]):
111
- """Import the schema of a Glue database."""
119
+ def import_glue(
120
+ data_contract_specification: DataContractSpecification,
121
+ source: str,
122
+ table_names: List[str],
123
+ ):
124
+ """Import the schema of a Glue database.
112
125
 
126
+ Args:
127
+ data_contract_specification (DataContractSpecification): The data contract specification to update.
128
+ source (str): The name of the Glue database.
129
+ table_names (List[str]): List of table names to import. If None, all tables in the database are imported.
130
+
131
+ Returns:
132
+ DataContractSpecification: The updated data contract specification.
133
+ """
113
134
  catalogid, location_uri = get_glue_database(source)
114
135
 
115
136
  # something went wrong
@@ -131,21 +152,18 @@ def import_glue(data_contract_specification: DataContractSpecification, source:
131
152
 
132
153
  fields = {}
133
154
  for column in table_schema:
134
- field = Field()
135
- field.type = map_type_from_sql(column["Type"])
155
+ field = create_typed_field(column["Type"])
136
156
 
137
157
  # hive partitons are required, but are not primary keys
138
158
  if column.get("Hive"):
139
159
  field.required = True
140
160
 
141
161
  field.description = column.get("Comment")
142
-
143
162
  fields[column["Name"]] = field
144
163
 
145
164
  if "decimal" in column["Type"]:
146
165
  # Extract precision and scale from the string
147
166
  perc_scale = column["Type"][8:-1].split(",")
148
- print(perc_scale)
149
167
  field.precision = int(perc_scale[0])
150
168
  field.scale = int(perc_scale[1])
151
169
 
@@ -157,37 +175,102 @@ def import_glue(data_contract_specification: DataContractSpecification, source:
157
175
  return data_contract_specification
158
176
 
159
177
 
160
- def map_type_from_sql(sql_type: str):
178
+ def create_typed_field(dtype: str) -> Field:
179
+ """Create a typed field based on the given data type.
180
+
181
+ Args:
182
+ dtype (str): The data type of the field.
183
+
184
+ Returns:
185
+ Field: The created field with the appropriate type.
186
+ """
187
+ field = Field()
188
+ dtype = dtype.strip().lower().replace(" ", "")
189
+ if dtype.startswith(("array", "struct")):
190
+ orig_dtype: str = dtype
191
+ if dtype.startswith("array"):
192
+ field.type = "array"
193
+ field.items = create_typed_field(orig_dtype[6:-1])
194
+ elif dtype.startswith("struct"):
195
+ field.type = "struct"
196
+ for f in split_struct(orig_dtype[7:-1]):
197
+ field.fields[f.split(":", 1)[0].strip()] = create_typed_field(f.split(":", 1)[1])
198
+ else:
199
+ field.type = map_type_from_sql(dtype)
200
+ return field
201
+
202
+
203
+ def split_fields(s: str):
204
+ """Split a string of fields considering nested structures.
205
+
206
+ Args:
207
+ s (str): The string to split.
208
+
209
+ Yields:
210
+ str: The next field in the string.
211
+ """
212
+ counter: int = 0
213
+ last: int = 0
214
+ for i, x in enumerate(s):
215
+ if x in ("<", "("):
216
+ counter += 1
217
+ elif x in (">", ")"):
218
+ counter -= 1
219
+ elif x == "," and counter == 0:
220
+ yield s[last:i]
221
+ last = i + 1
222
+ yield s[last:]
223
+
224
+
225
+ def split_struct(s: str) -> List[str]:
226
+ """Split a struct string into individual fields.
227
+
228
+ Args:
229
+ s (str): The struct string to split.
230
+
231
+ Returns:
232
+ List[str]: List of individual fields in the struct.
233
+ """
234
+ return list(split_fields(s=s))
235
+
236
+
237
+ def map_type_from_sql(sql_type: str) -> str:
238
+ """Map an SQL type to a corresponding field type.
239
+
240
+ Args:
241
+ sql_type (str): The SQL type to map.
242
+
243
+ Returns:
244
+ str: The corresponding field type.
245
+ """
161
246
  if sql_type is None:
162
247
  return None
163
248
 
164
- if sql_type.lower().startswith("varchar"):
249
+ sql_type = sql_type.lower()
250
+ if sql_type.startswith("varchar"):
165
251
  return "varchar"
166
- if sql_type.lower().startswith("string"):
252
+ if sql_type.startswith("string"):
167
253
  return "string"
168
- if sql_type.lower().startswith("text"):
254
+ if sql_type.startswith("text"):
169
255
  return "text"
170
- elif sql_type.lower().startswith("byte"):
256
+ if sql_type.startswith("byte"):
171
257
  return "byte"
172
- elif sql_type.lower().startswith("short"):
258
+ if sql_type.startswith("short"):
173
259
  return "short"
174
- elif sql_type.lower().startswith("integer"):
260
+ if sql_type.startswith("integer") or sql_type.startswith("int"):
175
261
  return "integer"
176
- elif sql_type.lower().startswith("long"):
262
+ if sql_type.startswith("long") or sql_type.startswith("bigint"):
177
263
  return "long"
178
- elif sql_type.lower().startswith("bigint"):
179
- return "long"
180
- elif sql_type.lower().startswith("float"):
264
+ if sql_type.startswith("float"):
181
265
  return "float"
182
- elif sql_type.lower().startswith("double"):
266
+ if sql_type.startswith("double"):
183
267
  return "double"
184
- elif sql_type.lower().startswith("boolean"):
268
+ if sql_type.startswith("boolean"):
185
269
  return "boolean"
186
- elif sql_type.lower().startswith("timestamp"):
270
+ if sql_type.startswith("timestamp"):
187
271
  return "timestamp"
188
- elif sql_type.lower().startswith("date"):
272
+ if sql_type.startswith("date"):
189
273
  return "date"
190
- elif sql_type.lower().startswith("decimal"):
274
+ if sql_type.startswith("decimal"):
191
275
  return "decimal"
192
- else:
193
- return "variant"
276
+ return "variant"
@@ -0,0 +1,29 @@
1
+ from abc import ABC, abstractmethod
2
+ from enum import Enum
3
+
4
+ from datacontract.model.data_contract_specification import DataContractSpecification
5
+
6
+
7
+ class Importer(ABC):
8
+ def __init__(self, import_format) -> None:
9
+ self.import_format = import_format
10
+
11
+ @abstractmethod
12
+ def import_source(
13
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
14
+ ) -> dict:
15
+ pass
16
+
17
+
18
+ class ImportFormat(str, Enum):
19
+ sql = "sql"
20
+ avro = "avro"
21
+ glue = "glue"
22
+ jsonschema = "jsonschema"
23
+ bigquery = "bigquery"
24
+ odcs = "odcs"
25
+ unity = "unity"
26
+
27
+ @classmethod
28
+ def get_suported_formats(cls):
29
+ return list(map(lambda c: c.value, cls))
@@ -0,0 +1,72 @@
1
+ import importlib.util
2
+ import sys
3
+ from datacontract.imports.importer import ImportFormat, Importer
4
+
5
+
6
+ class ImporterFactory:
7
+ def __init__(self):
8
+ self.dict_importer = {}
9
+ self.dict_lazy_importer = {}
10
+
11
+ def register_importer(self, name, importer: Importer):
12
+ self.dict_importer.update({name: importer})
13
+
14
+ def register_lazy_importer(self, name: str, module_path: str, class_name: str):
15
+ self.dict_lazy_importer.update({name: (module_path, class_name)})
16
+
17
+ def create(self, name) -> Importer:
18
+ importers = self.dict_importer.copy()
19
+ importers.update(self.dict_lazy_importer.copy())
20
+ if name not in importers.keys():
21
+ raise ValueError(f"The '{name}' format is not suportted.")
22
+ importer_class = importers[name]
23
+ if type(importers[name]) is tuple:
24
+ importer_class = load_module_class(module_path=importers[name][0], class_name=importers[name][1])
25
+ if not importer_class:
26
+ raise ValueError(f"Module {name} could not be loaded.")
27
+ return importer_class(name)
28
+
29
+
30
+ def import_module(module_path):
31
+ if importlib.util.find_spec(module_path) is not None:
32
+ try:
33
+ module = importlib.import_module(module_path)
34
+ except ModuleNotFoundError:
35
+ return None
36
+ sys.modules[module_path] = module
37
+ return module
38
+
39
+
40
+ def load_module_class(module_path, class_name):
41
+ module = import_module(module_path)
42
+ if not module:
43
+ return None
44
+ return getattr(module, class_name)
45
+
46
+
47
+ importer_factory = ImporterFactory()
48
+ importer_factory.register_lazy_importer(
49
+ name=ImportFormat.avro, module_path="datacontract.imports.avro_importer", class_name="AvroImporter"
50
+ )
51
+ importer_factory.register_lazy_importer(
52
+ name=ImportFormat.bigquery,
53
+ module_path="datacontract.imports.bigquery_importer",
54
+ class_name="BigQueryImporter",
55
+ )
56
+ importer_factory.register_lazy_importer(
57
+ name=ImportFormat.glue, module_path="datacontract.imports.glue_importer", class_name="GlueImporter"
58
+ )
59
+ importer_factory.register_lazy_importer(
60
+ name=ImportFormat.jsonschema,
61
+ module_path="datacontract.imports.jsonschema_importer",
62
+ class_name="JsonSchemaImporter",
63
+ )
64
+ importer_factory.register_lazy_importer(
65
+ name=ImportFormat.odcs, module_path="datacontract.imports.odcs_importer", class_name="OdcsImporter"
66
+ )
67
+ importer_factory.register_lazy_importer(
68
+ name=ImportFormat.sql, module_path="datacontract.imports.sql_importer", class_name="SqlImporter"
69
+ )
70
+ importer_factory.register_lazy_importer(
71
+ name=ImportFormat.unity, module_path="datacontract.imports.unity_importer", class_name="UnityImporter"
72
+ )
@@ -2,10 +2,18 @@ import json
2
2
 
3
3
  import fastjsonschema
4
4
 
5
+ from datacontract.imports.importer import Importer
5
6
  from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field, Definition
6
7
  from datacontract.model.exceptions import DataContractException
7
8
 
8
9
 
10
+ class JsonSchemaImporter(Importer):
11
+ def import_source(
12
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
13
+ ) -> dict:
14
+ return import_jsonschema(data_contract_specification, source)
15
+
16
+
9
17
  def convert_json_schema_properties(properties, is_definition=False):
10
18
  fields = {}
11
19
  for field_name, field_schema in properties.items():
@@ -2,6 +2,7 @@ import datetime
2
2
  import logging
3
3
  from typing import Any, Dict, List
4
4
  import yaml
5
+ from datacontract.imports.importer import Importer
5
6
  from datacontract.model.data_contract_specification import (
6
7
  Availability,
7
8
  Contact,
@@ -42,6 +43,13 @@ DATACONTRACT_TYPES = [
42
43
  ]
43
44
 
44
45
 
46
+ class OdcsImporter(Importer):
47
+ def import_source(
48
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
49
+ ) -> dict:
50
+ return import_odcs(data_contract_specification, source)
51
+
52
+
45
53
  def import_odcs(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
46
54
  try:
47
55
  with open(source, "r") as file:
@@ -1,8 +1,16 @@
1
1
  from simple_ddl_parser import parse_from_file
2
2
 
3
+ from datacontract.imports.importer import Importer
3
4
  from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
4
5
 
5
6
 
7
+ class SqlImporter(Importer):
8
+ def import_source(
9
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
10
+ ) -> dict:
11
+ return import_sql(data_contract_specification, self.import_format, source)
12
+
13
+
6
14
  def import_sql(data_contract_specification: DataContractSpecification, format: str, source: str):
7
15
  ddl = parse_from_file(source, group_by_type=True)
8
16
  tables = ddl["tables"]