datacontract-cli 0.10.10__py3-none-any.whl → 0.10.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (39) hide show
  1. datacontract/cli.py +19 -3
  2. datacontract/data_contract.py +17 -17
  3. datacontract/engines/fastjsonschema/check_jsonschema.py +15 -1
  4. datacontract/engines/fastjsonschema/s3/s3_read_files.py +2 -0
  5. datacontract/engines/soda/check_soda_execute.py +2 -8
  6. datacontract/engines/soda/connections/duckdb.py +23 -20
  7. datacontract/engines/soda/connections/kafka.py +81 -23
  8. datacontract/engines/soda/connections/snowflake.py +8 -5
  9. datacontract/export/avro_converter.py +12 -2
  10. datacontract/export/dbml_converter.py +42 -19
  11. datacontract/export/exporter.py +2 -1
  12. datacontract/export/exporter_factory.py +6 -0
  13. datacontract/export/jsonschema_converter.py +1 -4
  14. datacontract/export/spark_converter.py +4 -0
  15. datacontract/export/sql_type_converter.py +64 -29
  16. datacontract/export/sqlalchemy_converter.py +169 -0
  17. datacontract/imports/avro_importer.py +1 -0
  18. datacontract/imports/bigquery_importer.py +2 -2
  19. datacontract/imports/dbml_importer.py +112 -0
  20. datacontract/imports/dbt_importer.py +67 -91
  21. datacontract/imports/glue_importer.py +64 -54
  22. datacontract/imports/importer.py +3 -2
  23. datacontract/imports/importer_factory.py +5 -0
  24. datacontract/imports/jsonschema_importer.py +106 -120
  25. datacontract/imports/odcs_importer.py +1 -1
  26. datacontract/imports/spark_importer.py +29 -10
  27. datacontract/imports/sql_importer.py +5 -1
  28. datacontract/imports/unity_importer.py +1 -1
  29. datacontract/integration/{publish_datamesh_manager.py → datamesh_manager.py} +33 -5
  30. datacontract/integration/{publish_opentelemetry.py → opentelemetry.py} +1 -1
  31. datacontract/model/data_contract_specification.py +6 -2
  32. datacontract/templates/partials/model_field.html +10 -2
  33. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/METADATA +283 -113
  34. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/RECORD +38 -37
  35. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/WHEEL +1 -1
  36. datacontract/publish/publish.py +0 -32
  37. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/LICENSE +0 -0
  38. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/entry_points.txt +0 -0
  39. {datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import boto3
2
- from typing import List
3
-
2
+ from typing import List, Dict, Generator
3
+ import re
4
4
  from datacontract.imports.importer import Importer
5
5
  from datacontract.model.data_contract_specification import (
6
6
  DataContractSpecification,
@@ -13,7 +13,7 @@ from datacontract.model.data_contract_specification import (
13
13
  class GlueImporter(Importer):
14
14
  def import_source(
15
15
  self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
16
- ) -> dict:
16
+ ) -> DataContractSpecification:
17
17
  return import_glue(data_contract_specification, source, import_args.get("glue_table"))
18
18
 
19
19
 
@@ -39,7 +39,7 @@ def get_glue_database(database_name: str):
39
39
 
40
40
  return (
41
41
  response["Database"]["CatalogId"],
42
- response["Database"].get("LocationUri", "None"),
42
+ response["Database"].get("LocationUri"),
43
43
  )
44
44
 
45
45
 
@@ -75,7 +75,7 @@ def get_glue_tables(database_name: str) -> List[str]:
75
75
  return table_names
76
76
 
77
77
 
78
- def get_glue_table_schema(database_name: str, table_name: str):
78
+ def get_glue_table_schema(database_name: str, table_name: str) -> List[Dict]:
79
79
  """Get the schema of a Glue table.
80
80
 
81
81
  Args:
@@ -93,11 +93,11 @@ def get_glue_table_schema(database_name: str, table_name: str):
93
93
  response = glue.get_table(DatabaseName=database_name, Name=table_name)
94
94
  except glue.exceptions.EntityNotFoundException:
95
95
  print(f"Table {table_name} not found in database {database_name}.")
96
- return {}
96
+ return []
97
97
  except Exception as e:
98
98
  # todo catch all
99
99
  print(f"Error: {e}")
100
- return {}
100
+ return []
101
101
 
102
102
  table_schema = response["Table"]["StorageDescriptor"]["Columns"]
103
103
 
@@ -109,10 +109,9 @@ def get_glue_table_schema(database_name: str, table_name: str):
109
109
  "Name": pk["Name"],
110
110
  "Type": pk["Type"],
111
111
  "Hive": True,
112
- "Comment": "Partition Key",
112
+ "Comment": pk.get("Comment"),
113
113
  }
114
114
  )
115
-
116
115
  return table_schema
117
116
 
118
117
 
@@ -120,7 +119,7 @@ def import_glue(
120
119
  data_contract_specification: DataContractSpecification,
121
120
  source: str,
122
121
  table_names: List[str],
123
- ):
122
+ ) -> DataContractSpecification:
124
123
  """Import the schema of a Glue database.
125
124
 
126
125
  Args:
@@ -140,8 +139,13 @@ def import_glue(
140
139
  if table_names is None:
141
140
  table_names = get_glue_tables(source)
142
141
 
142
+ server_kwargs = {"type": "glue", "account": catalogid, "database": source}
143
+
144
+ if location_uri:
145
+ server_kwargs["location"] = location_uri
146
+
143
147
  data_contract_specification.servers = {
144
- "production": Server(type="glue", account=catalogid, database=source, location=location_uri),
148
+ "production": Server(**server_kwargs),
145
149
  }
146
150
 
147
151
  for table_name in table_names:
@@ -161,12 +165,6 @@ def import_glue(
161
165
  field.description = column.get("Comment")
162
166
  fields[column["Name"]] = field
163
167
 
164
- if "decimal" in column["Type"]:
165
- # Extract precision and scale from the string
166
- perc_scale = column["Type"][8:-1].split(",")
167
- field.precision = int(perc_scale[0])
168
- field.scale = int(perc_scale[1])
169
-
170
168
  data_contract_specification.models[table_name] = Model(
171
169
  type="table",
172
170
  fields=fields,
@@ -186,21 +184,43 @@ def create_typed_field(dtype: str) -> Field:
186
184
  """
187
185
  field = Field()
188
186
  dtype = dtype.strip().lower().replace(" ", "")
189
- if dtype.startswith(("array", "struct")):
190
- orig_dtype: str = dtype
191
- if dtype.startswith("array"):
192
- field.type = "array"
193
- field.items = create_typed_field(orig_dtype[6:-1])
194
- elif dtype.startswith("struct"):
195
- field.type = "struct"
196
- for f in split_struct(orig_dtype[7:-1]):
197
- field.fields[f.split(":", 1)[0].strip()] = create_typed_field(f.split(":", 1)[1])
187
+ # Example: array<string>
188
+ if dtype.startswith("array"):
189
+ field.type = "array"
190
+ field.items = create_typed_field(dtype[6:-1])
191
+ # Example: struct<field1:float,field2:string>
192
+ elif dtype.startswith("struct"):
193
+ field.type = "struct"
194
+ for f in split_struct(dtype[7:-1]):
195
+ field_name, field_key = f.split(":", 1)
196
+ field.fields[field_name] = create_typed_field(field_key)
197
+ # Example: map<string,int>
198
+ elif dtype.startswith("map"):
199
+ field.type = "map"
200
+ map_match = re.match(r"map<(.+?),\s*(.+)>", dtype)
201
+ if map_match:
202
+ key_type = map_match.group(1)
203
+ value_type = map_match.group(2)
204
+ field.keys = create_typed_field(key_type)
205
+ field.values = create_typed_field(value_type)
206
+ # Example: decimal(38, 6) or decimal
207
+ elif dtype.startswith("decimal"):
208
+ field.type = "decimal"
209
+ decimal_match = re.match(r"decimal\((\d+),\s*(\d+)\)", dtype)
210
+ if decimal_match: # if precision specified
211
+ field.precision = int(decimal_match.group(1))
212
+ field.scale = int(decimal_match.group(2))
213
+ # Example: varchar(255) or varchar
214
+ elif dtype.startswith("varchar"):
215
+ field.type = "varchar"
216
+ if len(dtype) > 7:
217
+ field.maxLength = int(dtype[8:-1])
198
218
  else:
199
219
  field.type = map_type_from_sql(dtype)
200
220
  return field
201
221
 
202
222
 
203
- def split_fields(s: str):
223
+ def split_fields(s: str) -> Generator[str, None, None]:
204
224
  """Split a string of fields considering nested structures.
205
225
 
206
226
  Args:
@@ -247,30 +267,20 @@ def map_type_from_sql(sql_type: str) -> str:
247
267
  return None
248
268
 
249
269
  sql_type = sql_type.lower()
250
- if sql_type.startswith("varchar"):
251
- return "varchar"
252
- if sql_type.startswith("string"):
253
- return "string"
254
- if sql_type.startswith("text"):
255
- return "text"
256
- if sql_type.startswith("byte"):
257
- return "byte"
258
- if sql_type.startswith("short"):
259
- return "short"
260
- if sql_type.startswith("integer") or sql_type.startswith("int"):
261
- return "integer"
262
- if sql_type.startswith("long") or sql_type.startswith("bigint"):
263
- return "long"
264
- if sql_type.startswith("float"):
265
- return "float"
266
- if sql_type.startswith("double"):
267
- return "double"
268
- if sql_type.startswith("boolean"):
269
- return "boolean"
270
- if sql_type.startswith("timestamp"):
271
- return "timestamp"
272
- if sql_type.startswith("date"):
273
- return "date"
274
- if sql_type.startswith("decimal"):
275
- return "decimal"
276
- return "variant"
270
+
271
+ type_mapping = {
272
+ "string": "string",
273
+ "int": "int",
274
+ "bigint": "bigint",
275
+ "float": "float",
276
+ "double": "double",
277
+ "boolean": "boolean",
278
+ "timestamp": "timestamp",
279
+ "date": "date",
280
+ }
281
+
282
+ for prefix, mapped_type in type_mapping.items():
283
+ if sql_type.startswith(prefix):
284
+ return mapped_type
285
+
286
+ return "unknown"
@@ -14,7 +14,7 @@ class Importer(ABC):
14
14
  data_contract_specification: DataContractSpecification,
15
15
  source: str,
16
16
  import_args: dict,
17
- ) -> dict:
17
+ ) -> DataContractSpecification:
18
18
  pass
19
19
 
20
20
 
@@ -22,6 +22,7 @@ class ImportFormat(str, Enum):
22
22
  sql = "sql"
23
23
  avro = "avro"
24
24
  dbt = "dbt"
25
+ dbml = "dbml"
25
26
  glue = "glue"
26
27
  jsonschema = "jsonschema"
27
28
  bigquery = "bigquery"
@@ -30,5 +31,5 @@ class ImportFormat(str, Enum):
30
31
  spark = "spark"
31
32
 
32
33
  @classmethod
33
- def get_suported_formats(cls):
34
+ def get_supported_formats(cls):
34
35
  return list(map(lambda c: c.value, cls))
@@ -88,3 +88,8 @@ importer_factory.register_lazy_importer(
88
88
  importer_factory.register_lazy_importer(
89
89
  name=ImportFormat.dbt, module_path="datacontract.imports.dbt_importer", class_name="DbtManifestImporter"
90
90
  )
91
+ importer_factory.register_lazy_importer(
92
+ name=ImportFormat.dbml,
93
+ module_path="datacontract.imports.dbml_importer",
94
+ class_name="DBMLImporter",
95
+ )
@@ -10,140 +10,49 @@ from datacontract.model.exceptions import DataContractException
10
10
  class JsonSchemaImporter(Importer):
11
11
  def import_source(
12
12
  self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
13
- ) -> dict:
13
+ ) -> DataContractSpecification:
14
14
  return import_jsonschema(data_contract_specification, source)
15
15
 
16
16
 
17
- def convert_json_schema_properties(properties, is_definition=False):
18
- fields = {}
19
- for field_name, field_schema in properties.items():
20
- field_kwargs = {}
21
- field_type = field_schema.get("type")
22
-
23
- # Determine if the field is required and set the type to the non-null option if applicable
24
- if isinstance(field_type, list) and "null" in field_type:
25
- field_kwargs["required"] = False
26
- non_null_types = [t for t in field_type if t != "null"]
27
- if non_null_types:
28
- field_type = non_null_types[0]
29
- else:
30
- field_type = None
31
- else:
32
- field_kwargs["required"] = True
33
-
34
- # Set the non-null type
35
- if field_type:
36
- field_kwargs["type"] = field_type
37
-
38
- for key, value in field_schema.items():
39
- match key:
40
- case "title":
41
- field_kwargs["title"] = value
42
- case "type":
43
- pass # type is already handled above
44
- case "format":
45
- field_kwargs["format"] = value
46
- case "description":
47
- field_kwargs["description"] = value
48
- case "pattern":
49
- field_kwargs["pattern"] = value
50
- case "minLength":
51
- field_kwargs["minLength"] = value
52
- case "maxLength":
53
- field_kwargs["maxLength"] = value
54
- case "minimum":
55
- field_kwargs["minimum"] = value
56
- case "exclusiveMinimum":
57
- field_kwargs["exclusiveMinimum"] = value
58
- case "maximum":
59
- field_kwargs["maximum"] = value
60
- case "exclusiveMaximum":
61
- field_kwargs["exclusiveMaximum"] = value
62
- case "enum":
63
- field_kwargs["enum"] = value
64
- case "tags":
65
- field_kwargs["tags"] = value
66
- case "properties":
67
- field_kwargs["fields"] = convert_json_schema_properties(value, is_definition=is_definition)
68
- case "items":
69
- field_kwargs["items"] = convert_json_schema_properties(value, is_definition=is_definition)
70
-
71
- if is_definition:
72
- field = Definition(**field_kwargs)
73
- else:
74
- field = Field(**field_kwargs)
75
- fields[field_name] = field
76
-
77
- return fields
78
-
79
-
80
17
  def import_jsonschema(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
81
18
  if data_contract_specification.models is None:
82
19
  data_contract_specification.models = {}
83
20
 
21
+ json_schema = load_and_validate_json_schema(source)
22
+
23
+ title = json_schema.get("title", "default_model")
24
+ description = json_schema.get("description")
25
+ type_ = json_schema.get("type")
26
+ properties = json_schema.get("properties", {})
27
+ required_properties = json_schema.get("required", [])
28
+
29
+ fields_kwargs = jsonschema_to_args(properties, required_properties)
30
+ fields = {name: Field(**kwargs) for name, kwargs in fields_kwargs.items()}
31
+
32
+ model = Model(description=description, type=type_, title=title, fields=fields)
33
+ data_contract_specification.models[title] = model
34
+
35
+ definitions = json_schema.get("definitions", {})
36
+ for name, schema in definitions.items():
37
+ kwargs = schema_to_args(schema)
38
+ data_contract_specification.definitions[name] = Definition(name=name, **kwargs)
39
+
40
+ return data_contract_specification
41
+
42
+
43
+ def load_and_validate_json_schema(source):
84
44
  try:
85
45
  with open(source, "r") as file:
86
46
  json_schema = json.loads(file.read())
87
- validator = fastjsonschema.compile({})
88
- validator(json_schema)
89
-
90
- model = Model(
91
- description=json_schema.get("description"),
92
- type=json_schema.get("type"),
93
- title=json_schema.get("title"),
94
- fields=convert_json_schema_properties(json_schema.get("properties", {})),
95
- )
96
- data_contract_specification.models[json_schema.get("title", "default_model")] = model
97
-
98
- if "definitions" in json_schema:
99
- for def_name, def_schema in json_schema["definitions"].items():
100
- definition_kwargs = {}
101
-
102
- for key, value in def_schema.items():
103
- match key:
104
- case "domain":
105
- definition_kwargs["domain"] = value
106
- case "title":
107
- definition_kwargs["title"] = value
108
- case "description":
109
- definition_kwargs["description"] = value
110
- case "type":
111
- definition_kwargs["type"] = value
112
- case "enum":
113
- definition_kwargs["enum"] = value
114
- case "format":
115
- definition_kwargs["format"] = value
116
- case "minLength":
117
- definition_kwargs["minLength"] = value
118
- case "maxLength":
119
- definition_kwargs["maxLength"] = value
120
- case "pattern":
121
- definition_kwargs["pattern"] = value
122
- case "minimum":
123
- definition_kwargs["minimum"] = value
124
- case "exclusiveMinimum":
125
- definition_kwargs["exclusiveMinimum"] = value
126
- case "maximum":
127
- definition_kwargs["maximum"] = value
128
- case "exclusiveMaximum":
129
- definition_kwargs["exclusiveMaximum"] = value
130
- case "pii":
131
- definition_kwargs["pii"] = value
132
- case "classification":
133
- definition_kwargs["classification"] = value
134
- case "tags":
135
- definition_kwargs["tags"] = value
136
- case "properties":
137
- definition_kwargs["fields"] = convert_json_schema_properties(value, is_definition=True)
138
-
139
- definition = Definition(name=def_name, **definition_kwargs)
140
- data_contract_specification.definitions[def_name] = definition
47
+
48
+ validator = fastjsonschema.compile({})
49
+ validator(json_schema)
141
50
 
142
51
  except fastjsonschema.JsonSchemaException as e:
143
52
  raise DataContractException(
144
53
  type="schema",
145
54
  name="Parse json schema",
146
- reason=f"Failed to parse json schema from {source}: {e}",
55
+ reason=f"Failed to validate json schema from {source}: {e}",
147
56
  engine="datacontract",
148
57
  )
149
58
 
@@ -155,5 +64,82 @@ def import_jsonschema(data_contract_specification: DataContractSpecification, so
155
64
  engine="datacontract",
156
65
  original_exception=e,
157
66
  )
67
+ return json_schema
158
68
 
159
- return data_contract_specification
69
+
70
+ def jsonschema_to_args(properties, required_properties):
71
+ args = {}
72
+ for property, property_schema in properties.items():
73
+ is_required = property in required_properties
74
+ args[property] = schema_to_args(property_schema, is_required)
75
+
76
+ return args
77
+
78
+
79
+ def schema_to_args(property_schema, is_required: bool = None) -> dict:
80
+ direct_mappings = {
81
+ "title",
82
+ "description",
83
+ "format",
84
+ "pattern",
85
+ "enum",
86
+ "tags",
87
+ "pii",
88
+ "minLength",
89
+ "maxLength",
90
+ "minimum",
91
+ "exclusiveMinimum",
92
+ "maximum",
93
+ "exclusiveMaximum",
94
+ }
95
+
96
+ field_kwargs = {key: value for key, value in property_schema.items() if key in direct_mappings}
97
+
98
+ if is_required is not None:
99
+ field_kwargs["required"] = is_required
100
+
101
+ property_type = determine_type(property_schema)
102
+ if property_type is not None:
103
+ field_kwargs["type"] = property_type
104
+
105
+ if property_type == "array":
106
+ nested_item_type, nested_items = determine_nested_item_type(property_schema)
107
+
108
+ if nested_items is not None:
109
+ field_kwargs["items"] = schema_to_args(nested_item_type)
110
+
111
+ nested_properties = property_schema.get("properties")
112
+ if nested_properties is not None:
113
+ # recursive call for complex nested properties
114
+ field_kwargs["fields"] = jsonschema_to_args(nested_properties, property_schema["required"])
115
+
116
+ return field_kwargs
117
+
118
+
119
+ def determine_nested_item_type(property_schema):
120
+ nested_items = property_schema.get("items")
121
+ nested_items_is_list = isinstance(nested_items, list)
122
+ if nested_items_is_list and len(nested_items) != 1:
123
+ raise DataContractException(
124
+ type="schema",
125
+ name="Parse json schema",
126
+ reason=f"Union types for arrays are currently not supported ({nested_items})",
127
+ engine="datacontract",
128
+ )
129
+ if nested_items_is_list and len(nested_items) == 1:
130
+ nested_item_type = nested_items[0]
131
+ elif not nested_items_is_list and nested_items is not None:
132
+ nested_item_type = nested_items
133
+ return nested_item_type, nested_items
134
+
135
+
136
+ def determine_type(property_schema):
137
+ property_type = property_schema.get("type")
138
+ type_is_list = isinstance(property_type, list)
139
+ if type_is_list:
140
+ non_null_types = [t for t in property_type if t != "null"]
141
+ if non_null_types:
142
+ property_type = non_null_types[0]
143
+ else:
144
+ property_type = None
145
+ return property_type
@@ -46,7 +46,7 @@ DATACONTRACT_TYPES = [
46
46
  class OdcsImporter(Importer):
47
47
  def import_source(
48
48
  self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
49
- ) -> dict:
49
+ ) -> DataContractSpecification:
50
50
  return import_odcs(data_contract_specification, source)
51
51
 
52
52
 
@@ -14,7 +14,7 @@ class SparkImporter(Importer):
14
14
  data_contract_specification: DataContractSpecification,
15
15
  source: str,
16
16
  import_args: dict,
17
- ) -> dict:
17
+ ) -> DataContractSpecification:
18
18
  """
19
19
  Imports data from a Spark source into the data contract specification.
20
20
 
@@ -63,12 +63,12 @@ def import_from_spark_df(df: DataFrame) -> Model:
63
63
  schema = df.schema
64
64
 
65
65
  for field in schema:
66
- model.fields[field.name] = _field_from_spark(field)
66
+ model.fields[field.name] = _field_from_struct_type(field)
67
67
 
68
68
  return model
69
69
 
70
70
 
71
- def _field_from_spark(spark_field: types.StructField) -> Field:
71
+ def _field_from_struct_type(spark_field: types.StructField) -> Field:
72
72
  """
73
73
  Converts a Spark StructField into a Field object for the data contract.
74
74
 
@@ -76,18 +76,35 @@ def _field_from_spark(spark_field: types.StructField) -> Field:
76
76
  spark_field: The Spark StructField to convert.
77
77
 
78
78
  Returns:
79
- Field: The corresponding Field object.
79
+ Field: The generated Field object.
80
80
  """
81
- field_type = _data_type_from_spark(spark_field.dataType)
82
81
  field = Field()
83
- field.type = field_type
84
82
  field.required = not spark_field.nullable
83
+ return _type_from_data_type(field, spark_field.dataType)
85
84
 
86
- if field_type == "array":
87
- field.items = _field_from_spark(spark_field.dataType.elementType)
88
85
 
89
- if field_type == "struct":
90
- field.fields = {sf.name: _field_from_spark(sf) for sf in spark_field.dataType.fields}
86
+ def _type_from_data_type(field: Field, spark_type: types.DataType) -> Field:
87
+ """
88
+ Maps Spark data types to the Data Contract type system and updates the field.
89
+
90
+ Args:
91
+ field: The Field object to update.
92
+ spark_type: The Spark data type to map.
93
+
94
+ Returns:
95
+ Field: The updated Field object.
96
+ """
97
+ field.type = _data_type_from_spark(spark_type)
98
+
99
+ if field.type == "array":
100
+ field.items = _type_from_data_type(Field(required=not spark_type.containsNull), spark_type.elementType)
101
+
102
+ elif field.type == "map":
103
+ field.keys = _type_from_data_type(Field(required=True), spark_type.keyType)
104
+ field.values = _type_from_data_type(Field(required=not spark_type.valueContainsNull), spark_type.valueType)
105
+
106
+ elif field.type == "struct":
107
+ field.fields = {sf.name: _field_from_struct_type(sf) for sf in spark_type.fields}
91
108
 
92
109
  return field
93
110
 
@@ -116,6 +133,8 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
116
133
  return "struct"
117
134
  elif isinstance(spark_type, types.ArrayType):
118
135
  return "array"
136
+ elif isinstance(spark_type, types.MapType):
137
+ return "map"
119
138
  elif isinstance(spark_type, types.TimestampType):
120
139
  return "timestamp"
121
140
  elif isinstance(spark_type, types.TimestampNTZType):
@@ -7,7 +7,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
7
7
  class SqlImporter(Importer):
8
8
  def import_source(
9
9
  self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
10
- ) -> dict:
10
+ ) -> DataContractSpecification:
11
11
  return import_sql(data_contract_specification, self.import_format, source)
12
12
 
13
13
 
@@ -64,6 +64,10 @@ def map_type_from_sql(sql_type: str):
64
64
  return "integer"
65
65
  elif sql_type_normed.startswith("float"):
66
66
  return "float"
67
+ elif sql_type_normed.startswith("decimal"):
68
+ return "decimal"
69
+ elif sql_type_normed.startswith("numeric"):
70
+ return "numeric"
67
71
  elif sql_type_normed.startswith("bool"):
68
72
  return "boolean"
69
73
  elif sql_type_normed.startswith("timestamp"):
@@ -11,7 +11,7 @@ from datacontract.model.exceptions import DataContractException
11
11
  class UnityImporter(Importer):
12
12
  def import_source(
13
13
  self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
14
- ) -> dict:
14
+ ) -> DataContractSpecification:
15
15
  if source is not None:
16
16
  data_contract_specification = import_unity_from_json(data_contract_specification, source)
17
17
  else:
@@ -2,28 +2,29 @@ import os
2
2
 
3
3
  import requests
4
4
 
5
+ from datacontract.model.data_contract_specification import DataContractSpecification
5
6
  from datacontract.model.run import Run
6
7
 
7
8
 
8
- def publish_datamesh_manager(run: Run, publish_url: str):
9
+ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
9
10
  try:
10
11
  if publish_url is None:
11
12
  # this url supports Data Mesh Manager and Data Contract Manager
12
13
  url = "https://api.datamesh-manager.com/api/test-results"
13
14
  else:
14
15
  url = publish_url
16
+
15
17
  api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
16
18
  if api_key is None:
17
19
  api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
18
-
19
- if run.dataContractId is None:
20
- raise Exception("Cannot publish run results, as data contract ID is unknown")
21
-
22
20
  if api_key is None:
23
21
  raise Exception(
24
22
  "Cannot publish run results, as DATAMESH_MANAGER_API_KEY nor DATACONTRACT_MANAGER_API_KEY are not set"
25
23
  )
26
24
 
25
+ if run.dataContractId is None:
26
+ raise Exception("Cannot publish run results, as data contract ID is unknown")
27
+
27
28
  headers = {"Content-Type": "application/json", "x-api-key": api_key}
28
29
  request_body = run.model_dump_json()
29
30
  # print("Request Body:", request_body)
@@ -36,3 +37,30 @@ def publish_datamesh_manager(run: Run, publish_url: str):
36
37
  run.log_info(f"Published test results to {url}")
37
38
  except Exception as e:
38
39
  run.log_error(f"Failed publishing test results. Error: {str(e)}")
40
+
41
+
42
+ def publish_data_contract_to_datamesh_manager(data_contract_specification: DataContractSpecification):
43
+ try:
44
+ api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
45
+ if api_key is None:
46
+ api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
47
+ if api_key is None:
48
+ raise Exception(
49
+ "Cannot publish data contract, as neither DATAMESH_MANAGER_API_KEY nor DATACONTRACT_MANAGER_API_KEY is set"
50
+ )
51
+ headers = {"Content-Type": "application/json", "x-api-key": api_key}
52
+ spec = data_contract_specification
53
+ id = spec.id
54
+ url = "https://api.datamesh-manager.com/api/datacontracts/{0}".format(id)
55
+ request_body = spec.model_dump_json().encode("utf-8")
56
+ response = requests.put(
57
+ url=url,
58
+ data=request_body,
59
+ headers=headers,
60
+ )
61
+ if response.status_code != 200:
62
+ print(f"Error publishing data contract to Data Mesh Manager: {response.text}")
63
+ exit(1)
64
+ print(f"Published data contract to {url}")
65
+ except Exception as e:
66
+ print(f"Failed publishing data contract. Error: {str(e)}")
@@ -34,7 +34,7 @@ from datacontract.model.run import Run
34
34
  # - Metrics only, no logs yet (but loosely planned)
35
35
 
36
36
 
37
- def publish_opentelemetry(run: Run):
37
+ def publish_test_results_to_opentelemetry(run: Run):
38
38
  try:
39
39
  if run.dataContractId is None:
40
40
  raise Exception("Cannot publish run results, as data contract ID is unknown")