datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,37 @@
1
+ from datacontract.export.bigquery_converter import map_type_to_bigquery
1
2
  from datacontract.model.data_contract_specification import Field
2
3
 
3
4
 
4
5
  def convert_to_sql_type(field: Field, server_type: str) -> str:
6
+ if field.config and "physicalType" in field.config:
7
+ return field.config["physicalType"]
8
+
5
9
  if server_type == "snowflake":
6
10
  return convert_to_snowflake(field)
7
- if server_type == "postgres":
11
+ elif server_type == "postgres":
8
12
  return convert_type_to_postgres(field)
9
- if server_type == "databricks":
13
+ elif server_type == "dataframe":
14
+ return convert_to_dataframe(field)
15
+ elif server_type == "databricks":
10
16
  return convert_to_databricks(field)
17
+ elif server_type == "local" or server_type == "s3":
18
+ return convert_to_duckdb(field)
19
+ elif server_type == "sqlserver":
20
+ return convert_type_to_sqlserver(field)
21
+ elif server_type == "bigquery":
22
+ return convert_type_to_bigquery(field)
23
+ elif server_type == "trino":
24
+ return convert_type_to_trino(field)
25
+
11
26
  return field.type
12
27
 
13
28
 
14
29
  # snowflake data types:
15
30
  # https://docs.snowflake.com/en/sql-reference/data-types.html
16
- def convert_to_snowflake(field) -> None | str:
31
+ def convert_to_snowflake(field: Field) -> None | str:
32
+ if field.config and "snowflakeType" in field.config:
33
+ return field.config["snowflakeType"]
34
+
17
35
  type = field.type
18
36
  # currently optimized for snowflake
19
37
  # LEARNING: data contract has no direct support for CHAR,CHARACTER
@@ -54,6 +72,9 @@ def convert_to_snowflake(field) -> None | str:
54
72
  # https://www.postgresql.org/docs/current/datatype.html
55
73
  # Using the name whenever possible
56
74
  def convert_type_to_postgres(field: Field) -> None | str:
75
+ if field.config and "postgresType" in field.config:
76
+ return field.config["postgresType"]
77
+
57
78
  type = field.type
58
79
  if type is None:
59
80
  return None
@@ -93,10 +114,62 @@ def convert_type_to_postgres(field: Field) -> None | str:
93
114
  return None
94
115
 
95
116
 
117
+ # dataframe data types:
118
+ # https://spark.apache.org/docs/latest/sql-ref-datatypes.html
119
+ def convert_to_dataframe(field: Field) -> None | str:
120
+ if field.config and "dataframeType" in field.config:
121
+ return field.config["dataframeType"]
122
+ type = field.type
123
+ if type is None:
124
+ return None
125
+ if type.lower() in ["string", "varchar", "text"]:
126
+ return "STRING"
127
+ if type.lower() in ["timestamp", "timestamp_tz"]:
128
+ return "TIMESTAMP"
129
+ if type.lower() in ["timestamp_ntz"]:
130
+ return "TIMESTAMP_NTZ"
131
+ if type.lower() in ["date"]:
132
+ return "DATE"
133
+ if type.lower() in ["time"]:
134
+ return "STRING"
135
+ if type.lower() in ["number", "decimal", "numeric"]:
136
+ precision = field.precision if field.precision is not None else 38
137
+ scale = field.scale if field.scale is not None else 0
138
+ return f"DECIMAL({precision},{scale})"
139
+ if type.lower() in ["float"]:
140
+ return "FLOAT"
141
+ if type.lower() in ["double"]:
142
+ return "DOUBLE"
143
+ if type.lower() in ["integer", "int"]:
144
+ return "INT"
145
+ if type.lower() in ["long", "bigint"]:
146
+ return "BIGINT"
147
+ if type.lower() in ["boolean"]:
148
+ return "BOOLEAN"
149
+ if type.lower() in ["object", "record", "struct"]:
150
+ nested_fields = []
151
+ for nested_field_name, nested_field in field.fields.items():
152
+ nested_field_type = convert_to_dataframe(nested_field)
153
+ nested_fields.append(f"{nested_field_name}:{nested_field_type}")
154
+ return f"STRUCT<{','.join(nested_fields)}>"
155
+ if type.lower() in ["bytes"]:
156
+ return "BINARY"
157
+ if type.lower() in ["array"]:
158
+ item_type = convert_to_dataframe(field.items)
159
+ return f"ARRAY<{item_type}>"
160
+ return None
161
+
162
+
96
163
  # databricks data types:
97
164
  # https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
98
- def convert_to_databricks(field) -> None | str:
165
+ def convert_to_databricks(field: Field) -> None | str:
99
166
  type = field.type
167
+ if (
168
+ field.config
169
+ and "databricksType" in field.config
170
+ and type.lower() not in ["array", "object", "record", "struct"]
171
+ ):
172
+ return field.config["databricksType"]
100
173
  if type is None:
101
174
  return None
102
175
  if type.lower() in ["string", "varchar", "text"]:
@@ -110,8 +183,9 @@ def convert_to_databricks(field) -> None | str:
110
183
  if type.lower() in ["time"]:
111
184
  return "STRING"
112
185
  if type.lower() in ["number", "decimal", "numeric"]:
113
- # precision and scale not supported by data contract
114
- return "DECIMAL"
186
+ precision = field.precision if field.precision is not None else 38
187
+ scale = field.scale if field.scale is not None else 0
188
+ return f"DECIMAL({precision},{scale})"
115
189
  if type.lower() in ["float"]:
116
190
  return "FLOAT"
117
191
  if type.lower() in ["double"]:
@@ -123,9 +197,196 @@ def convert_to_databricks(field) -> None | str:
123
197
  if type.lower() in ["boolean"]:
124
198
  return "BOOLEAN"
125
199
  if type.lower() in ["object", "record", "struct"]:
126
- return "STRUCT"
200
+ nested_fields = []
201
+ for nested_field_name, nested_field in field.fields.items():
202
+ nested_field_type = convert_to_databricks(nested_field)
203
+ nested_fields.append(f"{nested_field_name}:{nested_field_type}")
204
+ return f"STRUCT<{','.join(nested_fields)}>"
127
205
  if type.lower() in ["bytes"]:
128
206
  return "BINARY"
129
207
  if type.lower() in ["array"]:
130
- return "ARRAY"
208
+ item_type = convert_to_databricks(field.items)
209
+ return f"ARRAY<{item_type}>"
210
+ if type.lower() in ["variant"]:
211
+ return "VARIANT"
131
212
  return None
213
+
214
+
215
+ def convert_to_duckdb(field: Field) -> None | str:
216
+ """
217
+ Convert a data contract field to the corresponding DuckDB SQL type.
218
+
219
+ Parameters:
220
+ field (Field): The data contract field to convert.
221
+
222
+ Returns:
223
+ str: The corresponding DuckDB SQL type.
224
+ """
225
+ # Check
226
+ if field is None or field.type is None:
227
+ return None
228
+
229
+ # Get
230
+ type_lower = field.type.lower()
231
+
232
+ # Prepare
233
+ type_mapping = {
234
+ "varchar": "VARCHAR",
235
+ "string": "VARCHAR",
236
+ "text": "VARCHAR",
237
+ "binary": "BLOB",
238
+ "bytes": "BLOB",
239
+ "blob": "BLOB",
240
+ "boolean": "BOOLEAN",
241
+ "float": "FLOAT",
242
+ "double": "DOUBLE",
243
+ "int": "INTEGER",
244
+ "int32": "INTEGER",
245
+ "integer": "INTEGER",
246
+ "int64": "BIGINT",
247
+ "long": "BIGINT",
248
+ "bigint": "BIGINT",
249
+ "date": "DATE",
250
+ "time": "TIME",
251
+ "timestamp": "TIMESTAMP WITH TIME ZONE",
252
+ "timestamp_tz": "TIMESTAMP WITH TIME ZONE",
253
+ "timestamp_ntz": "TIMESTAMP",
254
+ }
255
+
256
+ # Convert simple mappings
257
+ if type_lower in type_mapping:
258
+ return type_mapping[type_lower]
259
+
260
+ # convert decimal numbers with precision and scale
261
+ if type_lower == "decimal" or type_lower == "number" or type_lower == "numeric":
262
+ return f"DECIMAL({field.precision},{field.scale})"
263
+
264
+ # Check list and map
265
+ if type_lower == "list" or type_lower == "array":
266
+ item_type = convert_to_duckdb(field.items)
267
+ return f"{item_type}[]"
268
+ if type_lower == "map":
269
+ key_type = convert_to_duckdb(field.keys)
270
+ value_type = convert_to_duckdb(field.values)
271
+ return f"MAP({key_type}, {value_type})"
272
+ if type_lower == "struct" or type_lower == "object" or type_lower == "record":
273
+ structure_field = "STRUCT("
274
+ field_strings = []
275
+ for fieldKey, fieldValue in field.fields.items():
276
+ field_strings.append(f"{fieldKey} {convert_to_duckdb(fieldValue)}")
277
+ structure_field += ", ".join(field_strings)
278
+ structure_field += ")"
279
+ return structure_field
280
+
281
+ # Return none
282
+ return None
283
+
284
+
285
+ def convert_type_to_sqlserver(field: Field) -> None | str:
286
+ """Convert from supported datacontract types to equivalent sqlserver types"""
287
+ field_type = field.type
288
+ if not field_type:
289
+ return None
290
+
291
+ # If provided sql-server config type, prefer it over default mapping
292
+ if sqlserver_type := get_type_config(field, "sqlserverType"):
293
+ return sqlserver_type
294
+
295
+ field_type = field_type.lower()
296
+ if field_type in ["string", "varchar", "text"]:
297
+ if field.format == "uuid":
298
+ return "uniqueidentifier"
299
+ return "varchar"
300
+ if field_type in ["timestamp", "timestamp_tz"]:
301
+ return "datetimeoffset"
302
+ if field_type in ["timestamp_ntz"]:
303
+ if field.format == "datetime":
304
+ return "datetime"
305
+ return "datetime2"
306
+ if field_type in ["date"]:
307
+ return "date"
308
+ if field_type in ["time"]:
309
+ return "time"
310
+ if field_type in ["number", "decimal", "numeric"]:
311
+ # precision and scale not supported by data contract
312
+ if field_type == "number":
313
+ return "numeric"
314
+ return field_type
315
+ if field_type in ["float"]:
316
+ return "float"
317
+ if field_type in ["double"]:
318
+ return "double precision"
319
+ if field_type in ["integer", "int", "bigint"]:
320
+ return field_type
321
+ if field_type in ["long"]:
322
+ return "bigint"
323
+ if field_type in ["boolean"]:
324
+ return "bit"
325
+ if field_type in ["object", "record", "struct"]:
326
+ return "jsonb"
327
+ if field_type in ["bytes"]:
328
+ return "binary"
329
+ if field_type in ["array"]:
330
+ raise NotImplementedError("SQLServer does not support array types.")
331
+ return None
332
+
333
+
334
+ def convert_type_to_bigquery(field: Field) -> None | str:
335
+ """Convert from supported datacontract types to equivalent bigquery types"""
336
+
337
+ # BigQuery exporter cannot be used for complex types, as the exporter has different syntax than SodaCL
338
+
339
+ field_type = field.type
340
+ if not field_type:
341
+ return None
342
+
343
+ if field.config and "bigqueryType" in field.config:
344
+ return field.config["bigqueryType"]
345
+
346
+ if field_type.lower() in ["array"]:
347
+ item_type = convert_type_to_bigquery(field.items)
348
+ return f"ARRAY<{item_type}>"
349
+
350
+ if field_type.lower() in ["object", "record", "struct"]:
351
+ nested_fields = []
352
+ for nested_field_name, nested_field in field.fields.items():
353
+ nested_field_type = convert_type_to_bigquery(nested_field)
354
+ nested_fields.append(f"{nested_field_name} {nested_field_type}")
355
+ return f"STRUCT<{', '.join(nested_fields)}>"
356
+
357
+ return map_type_to_bigquery(field)
358
+
359
+
360
+ def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
361
+ """Retrieve type configuration if provided in datacontract."""
362
+ if not field.config:
363
+ return None
364
+ return field.config.get(config_attr, None)
365
+
366
+
367
+ def convert_type_to_trino(field: Field) -> None | str:
368
+ """Convert from supported datacontract types to equivalent trino types"""
369
+ if field.config and "trinoType" in field.config:
370
+ return field.config["trinoType"]
371
+
372
+ field_type = field.type.lower()
373
+ if field_type in ["string", "text", "varchar"]:
374
+ return "varchar"
375
+ # tinyint, smallint not supported by data contract
376
+ if field_type in ["number", "decimal", "numeric"]:
377
+ # precision and scale not supported by data contract
378
+ return "decimal"
379
+ if field_type in ["int", "integer"]:
380
+ return "integer"
381
+ if field_type in ["long", "bigint"]:
382
+ return "bigint"
383
+ if field_type in ["float"]:
384
+ return "real"
385
+ if field_type in ["timestamp", "timestamp_tz"]:
386
+ return "timestamp(3) with time zone"
387
+ if field_type in ["timestamp_ntz"]:
388
+ return "timestamp(3)"
389
+ if field_type in ["bytes"]:
390
+ return "varbinary"
391
+ if field_type in ["object", "record", "struct"]:
392
+ return "json"
@@ -0,0 +1,170 @@
1
+ import ast
2
+ import typing
3
+
4
+ import datacontract.model.data_contract_specification as spec
5
+ from datacontract.export.exporter import Exporter, _determine_sql_server_type
6
+
7
+
8
+ class SQLAlchemyExporter(Exporter):
9
+ def export(
10
+ self, data_contract: spec.DataContractSpecification, model, server, sql_server_type, export_args
11
+ ) -> dict:
12
+ sql_server_type = _determine_sql_server_type(data_contract, sql_server_type, server)
13
+ return to_sqlalchemy_model_str(data_contract, sql_server_type, server)
14
+
15
+
16
+ DECLARATIVE_BASE = "Base"
17
+
18
+
19
+ def to_sqlalchemy_model_str(contract: spec.DataContractSpecification, sql_server_type: str = "", server=None) -> str:
20
+ server_obj = contract.servers.get(server)
21
+ classdefs = [
22
+ generate_model_class(model_name, model, server_obj, sql_server_type)
23
+ for (model_name, model) in contract.models.items()
24
+ ]
25
+ documentation = (
26
+ [ast.Expr(ast.Constant(contract.info.description))] if (contract.info and contract.info.description) else []
27
+ )
28
+
29
+ declarative_base = ast.ClassDef(
30
+ name=DECLARATIVE_BASE,
31
+ bases=[ast.Name(id="DeclarativeBase", ctx=ast.Load())],
32
+ body=[ast.Pass()],
33
+ keywords=[],
34
+ decorator_list=[],
35
+ )
36
+
37
+ databricks_timestamp = ast.ImportFrom(
38
+ module="databricks.sqlalchemy", names=[ast.alias("TIMESTAMP"), ast.alias("TIMESTAMP_NTZ")]
39
+ )
40
+ timestamp = ast.ImportFrom(module="sqlalchemy", names=[ast.alias(name="TIMESTAMP")])
41
+ result = ast.Module(
42
+ body=[
43
+ ast.ImportFrom(module="sqlalchemy.orm", names=[ast.alias(name="DeclarativeBase")]),
44
+ ast.ImportFrom(
45
+ module="sqlalchemy",
46
+ names=[
47
+ ast.alias("Column"),
48
+ ast.alias("Date"),
49
+ ast.alias("Integer"),
50
+ ast.alias("Numeric"),
51
+ ast.alias("String"),
52
+ ast.alias("Text"),
53
+ ast.alias("VARCHAR"),
54
+ ast.alias("BigInteger"),
55
+ ast.alias("Float"),
56
+ ast.alias("Double"),
57
+ ast.alias("Boolean"),
58
+ ast.alias("Date"),
59
+ ast.alias("ARRAY"),
60
+ ast.alias("LargeBinary"),
61
+ ],
62
+ ),
63
+ databricks_timestamp if sql_server_type == "databricks" else timestamp,
64
+ *documentation,
65
+ declarative_base,
66
+ *classdefs,
67
+ ],
68
+ type_ignores=[],
69
+ )
70
+ return ast.unparse(result)
71
+
72
+
73
+ def Call(name, *args, **kwargs) -> ast.Call:
74
+ return ast.Call(
75
+ ast.Name(name),
76
+ args=[v for v in args],
77
+ keywords=[ast.keyword(arg=f"{k}", value=ast.Constant(v)) for (k, v) in kwargs.items()],
78
+ )
79
+
80
+
81
+ def Column(predicate, **kwargs) -> ast.Call:
82
+ return Call("Column", predicate, **kwargs)
83
+
84
+
85
+ def sqlalchemy_primitive(field: spec.Field):
86
+ sqlalchemy_name = {
87
+ "string": Call("String", ast.Constant(field.maxLength)),
88
+ "text": Call("Text", ast.Constant(field.maxLength)),
89
+ "varchar": Call("VARCHAR", ast.Constant(field.maxLength)),
90
+ "number": Call("Numeric", ast.Constant(field.precision), ast.Constant(field.scale)),
91
+ "decimal": Call("Numeric", ast.Constant(field.precision), ast.Constant(field.scale)),
92
+ "numeric": Call("Numeric", ast.Constant(field.precision), ast.Constant(field.scale)),
93
+ "int": ast.Name("Integer"),
94
+ "integer": ast.Name("Integer"),
95
+ "long": ast.Name("BigInteger"),
96
+ "bigint": ast.Name("BigInteger"),
97
+ "float": ast.Name("Float"),
98
+ "double": ast.Name("Double"),
99
+ "boolean": ast.Name("Boolean"),
100
+ "timestamp": ast.Name("TIMESTAMP"),
101
+ "timestamp_tz": Call("TIMESTAMP", ast.Constant(True)),
102
+ "timestamp_ntz": ast.Name("TIMESTAMP_NTZ"),
103
+ "date": ast.Name("Date"),
104
+ "bytes": Call("LargeBinary", ast.Constant(field.maxLength)),
105
+ }
106
+ return sqlalchemy_name.get(field.type)
107
+
108
+
109
+ def constant_field_value(field_name: str, field: spec.Field) -> tuple[ast.Call, typing.Optional[ast.ClassDef]]:
110
+ new_type = sqlalchemy_primitive(field)
111
+ match field.type:
112
+ case "array":
113
+ new_type = Call("ARRAY", sqlalchemy_primitive(field.items))
114
+ if new_type is None:
115
+ raise RuntimeError(f"Unsupported field type {field.type}.")
116
+
117
+ return Column(
118
+ new_type, nullable=not field.required, comment=field.description, primary_key=field.primaryKey or field.primary
119
+ ), None
120
+
121
+
122
+ def column_assignment(field_name: str, field: spec.Field) -> tuple[ast.Call, typing.Optional[ast.ClassDef]]:
123
+ return constant_field_value(field_name, field)
124
+
125
+
126
+ def is_simple_field(field: spec.Field) -> bool:
127
+ return field.type not in set(["object", "record", "struct"])
128
+
129
+
130
+ def field_definitions(fields: dict[str, spec.Field]) -> tuple[list[ast.Expr], list[ast.ClassDef]]:
131
+ annotations: list[ast.Expr] = []
132
+ classes: list[typing.Any] = []
133
+ for field_name, field in fields.items():
134
+ (ann, new_class) = column_assignment(field_name, field)
135
+ annotations.append(ast.Assign(targets=[ast.Name(id=field_name, ctx=ast.Store())], value=ann, lineno=0))
136
+ return (annotations, classes)
137
+
138
+
139
+ def generate_model_class(
140
+ name: str, model_definition: spec.Model, server=None, sql_server_type: str = ""
141
+ ) -> ast.ClassDef:
142
+ (field_assignments, nested_classes) = field_definitions(model_definition.fields)
143
+ documentation = [ast.Expr(ast.Constant(model_definition.description))] if model_definition.description else []
144
+
145
+ schema = None if server is None else server.schema_
146
+ table_name = ast.Constant(name)
147
+ if sql_server_type == "databricks":
148
+ table_name = ast.Constant(name.lower())
149
+
150
+ result = ast.ClassDef(
151
+ name=name.capitalize(),
152
+ bases=[ast.Name(id=DECLARATIVE_BASE, ctx=ast.Load())],
153
+ body=[
154
+ *documentation,
155
+ ast.Assign(targets=[ast.Name("__tablename__")], value=table_name, lineno=0),
156
+ ast.Assign(
157
+ targets=[ast.Name("__table_args__")],
158
+ value=ast.Dict(
159
+ keys=[ast.Constant("comment"), ast.Constant("schema")],
160
+ values=[ast.Constant(model_definition.description), ast.Constant(schema)],
161
+ ),
162
+ lineno=0,
163
+ ),
164
+ *nested_classes,
165
+ *field_assignments,
166
+ ],
167
+ keywords=[],
168
+ decorator_list=[],
169
+ )
170
+ return result
@@ -1,7 +1,12 @@
1
1
  import re
2
2
 
3
- from datacontract.model.data_contract_specification import \
4
- DataContractSpecification, Server
3
+ from datacontract.export.exporter import Exporter
4
+ from datacontract.model.data_contract_specification import DataContractSpecification, Server
5
+
6
+
7
+ class TerraformExporter(Exporter):
8
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
9
+ return to_terraform(data_contract)
5
10
 
6
11
 
7
12
  def to_terraform(data_contract_spec: DataContractSpecification, server_id: str = None) -> str: