datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,46 @@
1
+ from typing import Dict, List
2
+
1
3
  import avro.schema
2
4
 
3
- from datacontract.model.data_contract_specification import \
4
- DataContractSpecification, Model, Field
5
+ from datacontract.imports.importer import Importer
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
5
7
  from datacontract.model.exceptions import DataContractException
6
8
 
7
9
 
10
+ class AvroImporter(Importer):
11
+ """Class to import Avro Schema file"""
12
+
13
+ def import_source(
14
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
15
+ ) -> DataContractSpecification:
16
+ """
17
+ Import Avro schema from a source file.
18
+
19
+ Args:
20
+ data_contract_specification: The data contract specification to update.
21
+ source: The path to the Avro schema file.
22
+ import_args: Additional import arguments.
23
+
24
+ Returns:
25
+ The updated data contract specification.
26
+ """
27
+ return import_avro(data_contract_specification, source)
28
+
29
+
8
30
  def import_avro(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
31
+ """
32
+ Import an Avro schema from a file and update the data contract specification.
33
+
34
+ Args:
35
+ data_contract_specification: The data contract specification to update.
36
+ source: The path to the Avro schema file.
37
+
38
+ Returns:
39
+ DataContractSpecification: The updated data contract specification.
40
+
41
+ Raises:
42
+ DataContractException: If there's an error parsing the Avro schema.
43
+ """
9
44
  if data_contract_specification.models is None:
10
45
  data_contract_specification.models = {}
11
46
 
@@ -20,7 +55,6 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
20
55
  engine="datacontract",
21
56
  original_exception=e,
22
57
  )
23
-
24
58
  # type record is being used for both the table and the object types in data contract
25
59
  # -> CONSTRAINT: one table per .avsc input, all nested records are interpreted as objects
26
60
  fields = import_record_fields(avro_schema.fields)
@@ -38,35 +72,118 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
38
72
  return data_contract_specification
39
73
 
40
74
 
41
- def import_record_fields(record_fields):
75
+ def handle_config_avro_custom_properties(field: avro.schema.Field, imported_field: Field) -> None:
76
+ """
77
+ Handle custom Avro properties and add them to the imported field's config.
78
+
79
+ Args:
80
+ field: The Avro field.
81
+ imported_field: The imported field to update.
82
+ """
83
+ if field.get_prop("logicalType") is not None:
84
+ if imported_field.config is None:
85
+ imported_field.config = {}
86
+ imported_field.config["avroLogicalType"] = field.get_prop("logicalType")
87
+
88
+ if field.default is not None:
89
+ if imported_field.config is None:
90
+ imported_field.config = {}
91
+ imported_field.config["avroDefault"] = field.default
92
+
93
+
94
+ LOGICAL_TYPE_MAPPING = {
95
+ "decimal": "decimal",
96
+ "date": "date",
97
+ "time-millis": "time",
98
+ "time-micros": "time",
99
+ "timestamp-millis": "timestamp_tz",
100
+ "timestamp-micros": "timestamp_tz",
101
+ "local-timestamp-micros": "timestamp_ntz",
102
+ "local-timestamp-millis": "timestamp_ntz",
103
+ "duration": "string",
104
+ "uuid": "string",
105
+ }
106
+
107
+
108
+ def import_record_fields(record_fields: List[avro.schema.Field]) -> Dict[str, Field]:
109
+ """
110
+ Import Avro record fields and convert them to data contract fields.
111
+
112
+ Args:
113
+ record_fields: List of Avro record fields.
114
+
115
+ Returns:
116
+ A dictionary of imported fields.
117
+ """
42
118
  imported_fields = {}
43
119
  for field in record_fields:
44
- imported_fields[field.name] = Field()
45
- imported_fields[field.name].required = True
46
- imported_fields[field.name].description = field.doc
47
- for prop in field.other_props:
48
- imported_fields[field.name].__setattr__(prop, field.other_props[prop])
120
+ imported_field = Field()
121
+ imported_field.required = True
122
+ imported_field.description = field.doc
123
+
124
+ handle_config_avro_custom_properties(field, imported_field)
49
125
 
126
+ # Determine field type and handle nested structures
50
127
  if field.type.type == "record":
51
- imported_fields[field.name].type = "object"
52
- imported_fields[field.name].description = field.type.doc
53
- imported_fields[field.name].fields = import_record_fields(field.type.fields)
128
+ imported_field.type = "object"
129
+ imported_field.description = field.type.doc
130
+ imported_field.fields = import_record_fields(field.type.fields)
54
131
  elif field.type.type == "union":
55
- imported_fields[field.name].required = False
56
- type = import_type_of_optional_field(field)
57
- imported_fields[field.name].type = type
58
- if type == "record":
59
- imported_fields[field.name].fields = import_record_fields(get_record_from_union_field(field).fields)
132
+ imported_field.required = False
133
+ # Check for enum in union first, since it needs special handling
134
+ enum_schema = get_enum_from_union_field(field)
135
+ if enum_schema:
136
+ imported_field.type = "string"
137
+ imported_field.enum = enum_schema.symbols
138
+ imported_field.title = enum_schema.name
139
+ if not imported_field.config:
140
+ imported_field.config = {}
141
+ imported_field.config["avroType"] = "enum"
142
+ else:
143
+ type = import_type_of_optional_field(field)
144
+ imported_field.type = type
145
+ if type == "record":
146
+ imported_field.fields = import_record_fields(get_record_from_union_field(field).fields)
147
+ elif type == "array":
148
+ imported_field.type = "array"
149
+ imported_field.items = import_avro_array_items(get_array_from_union_field(field))
60
150
  elif field.type.type == "array":
61
- imported_fields[field.name].type = "array"
62
- imported_fields[field.name].items = import_avro_array_items(field.type)
63
- else: # primitive type
64
- imported_fields[field.name].type = map_type_from_avro(field.type.type)
151
+ imported_field.type = "array"
152
+ imported_field.items = import_avro_array_items(field.type)
153
+ elif field.type.type == "map":
154
+ imported_field.type = "map"
155
+ imported_field.values = import_avro_map_values(field.type)
156
+ elif field.type.type == "enum":
157
+ imported_field.type = "string"
158
+ imported_field.enum = field.type.symbols
159
+ imported_field.title = field.type.name
160
+ if not imported_field.config:
161
+ imported_field.config = {}
162
+ imported_field.config["avroType"] = "enum"
163
+ else:
164
+ logical_type = field.type.get_prop("logicalType")
165
+ if logical_type in LOGICAL_TYPE_MAPPING:
166
+ imported_field.type = LOGICAL_TYPE_MAPPING[logical_type]
167
+ if logical_type == "decimal":
168
+ imported_field.precision = field.type.precision
169
+ imported_field.scale = field.type.scale
170
+ else:
171
+ imported_field.type = map_type_from_avro(field.type.type)
172
+ imported_fields[field.name] = imported_field
65
173
 
66
174
  return imported_fields
67
175
 
68
176
 
69
- def import_avro_array_items(array_schema):
177
+ def import_avro_array_items(array_schema: avro.schema.ArraySchema) -> Field:
178
+ """
179
+ Import Avro array items and convert them to a data contract field.
180
+
181
+ Args:
182
+ array_schema: The Avro array schema.
183
+
184
+ Returns:
185
+ Field: The imported field representing the array items.
186
+ """
70
187
  items = Field()
71
188
  for prop in array_schema.other_props:
72
189
  items.__setattr__(prop, array_schema.other_props[prop])
@@ -83,10 +200,52 @@ def import_avro_array_items(array_schema):
83
200
  return items
84
201
 
85
202
 
86
- def import_type_of_optional_field(field):
203
+ def import_avro_map_values(map_schema: avro.schema.MapSchema) -> Field:
204
+ """
205
+ Import Avro map values and convert them to a data contract field.
206
+
207
+ Args:
208
+ map_schema: The Avro map schema.
209
+
210
+ Returns:
211
+ Field: The imported field representing the map values.
212
+ """
213
+ values = Field()
214
+ for prop in map_schema.other_props:
215
+ values.__setattr__(prop, map_schema.other_props[prop])
216
+
217
+ if map_schema.values.type == "record":
218
+ values.type = "object"
219
+ values.fields = import_record_fields(map_schema.values.fields)
220
+ elif map_schema.values.type == "array":
221
+ values.type = "array"
222
+ values.items = import_avro_array_items(map_schema.values)
223
+ else: # primitive type
224
+ values.type = map_type_from_avro(map_schema.values.type)
225
+
226
+ return values
227
+
228
+
229
+ def import_type_of_optional_field(field: avro.schema.Field) -> str:
230
+ """
231
+ Determine the type of optional field in an Avro union.
232
+
233
+ Args:
234
+ field: The Avro field with a union type.
235
+
236
+ Returns:
237
+ str: The mapped type of the non-null field in the union.
238
+
239
+ Raises:
240
+ DataContractException: If no non-null type is found in the union.
241
+ """
87
242
  for field_type in field.type.schemas:
88
243
  if field_type.type != "null":
89
- return map_type_from_avro(field_type.type)
244
+ logical_type = field_type.get_prop("logicalType")
245
+ if logical_type and logical_type in LOGICAL_TYPE_MAPPING:
246
+ return LOGICAL_TYPE_MAPPING[logical_type]
247
+ else:
248
+ return map_type_from_avro(field_type.type)
90
249
  raise DataContractException(
91
250
  type="schema",
92
251
  result="failed",
@@ -96,14 +255,67 @@ def import_type_of_optional_field(field):
96
255
  )
97
256
 
98
257
 
99
- def get_record_from_union_field(field):
258
+ def get_record_from_union_field(field: avro.schema.Field) -> avro.schema.RecordSchema | None:
259
+ """
260
+ Get the record schema from a union field.
261
+
262
+ Args:
263
+ field: The Avro field with a union type.
264
+
265
+ Returns:
266
+ The record schema if found, None otherwise.
267
+ """
100
268
  for field_type in field.type.schemas:
101
269
  if field_type.type == "record":
102
270
  return field_type
103
271
  return None
104
272
 
105
273
 
106
- def map_type_from_avro(avro_type_str: str):
274
+ def get_array_from_union_field(field: avro.schema.Field) -> avro.schema.ArraySchema | None:
275
+ """
276
+ Get the array schema from a union field.
277
+
278
+ Args:
279
+ field: The Avro field with a union type.
280
+
281
+ Returns:
282
+ The array schema if found, None otherwise.
283
+ """
284
+ for field_type in field.type.schemas:
285
+ if field_type.type == "array":
286
+ return field_type
287
+ return None
288
+
289
+
290
+ def get_enum_from_union_field(field: avro.schema.Field) -> avro.schema.EnumSchema | None:
291
+ """
292
+ Get the enum schema from a union field.
293
+
294
+ Args:
295
+ field: The Avro field with a union type.
296
+
297
+ Returns:
298
+ The enum schema if found, None otherwise.
299
+ """
300
+ for field_type in field.type.schemas:
301
+ if field_type.type == "enum":
302
+ return field_type
303
+ return None
304
+
305
+
306
+ def map_type_from_avro(avro_type_str: str) -> str:
307
+ """
308
+ Map Avro type strings to data contract type strings.
309
+
310
+ Args:
311
+ avro_type_str (str): The Avro type string.
312
+
313
+ Returns:
314
+ str: The corresponding data contract type string.
315
+
316
+ Raises:
317
+ DataContractException: If the Avro type is unsupported.
318
+ """
107
319
  # TODO: ambiguous mapping in the export
108
320
  if avro_type_str == "null":
109
321
  return "null"
@@ -113,6 +325,8 @@ def map_type_from_avro(avro_type_str: str):
113
325
  return "binary"
114
326
  elif avro_type_str == "double":
115
327
  return "double"
328
+ elif avro_type_str == "float":
329
+ return "float"
116
330
  elif avro_type_str == "int":
117
331
  return "int"
118
332
  elif avro_type_str == "long":
@@ -121,6 +335,12 @@ def map_type_from_avro(avro_type_str: str):
121
335
  return "boolean"
122
336
  elif avro_type_str == "record":
123
337
  return "record"
338
+ elif avro_type_str == "array":
339
+ return "array"
340
+ elif avro_type_str == "map":
341
+ return "map"
342
+ elif avro_type_str == "enum":
343
+ return "string"
124
344
  else:
125
345
  raise DataContractException(
126
346
  type="schema",
@@ -0,0 +1,221 @@
1
+ import json
2
+ import logging
3
+ from typing import List
4
+
5
+ from datacontract.imports.importer import Importer
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
+ from datacontract.model.exceptions import DataContractException
8
+
9
+
10
+ class BigQueryImporter(Importer):
11
+ def import_source(
12
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
13
+ ) -> DataContractSpecification:
14
+ if source is not None:
15
+ data_contract_specification = import_bigquery_from_json(data_contract_specification, source)
16
+ else:
17
+ data_contract_specification = import_bigquery_from_api(
18
+ data_contract_specification,
19
+ import_args.get("bigquery_table"),
20
+ import_args.get("bigquery_project"),
21
+ import_args.get("bigquery_dataset"),
22
+ )
23
+ return data_contract_specification
24
+
25
+
26
+ def import_bigquery_from_json(
27
+ data_contract_specification: DataContractSpecification, source: str
28
+ ) -> DataContractSpecification:
29
+ try:
30
+ with open(source, "r") as file:
31
+ bigquery_schema = json.loads(file.read())
32
+ except json.JSONDecodeError as e:
33
+ raise DataContractException(
34
+ type="schema",
35
+ name="Parse bigquery schema",
36
+ reason=f"Failed to parse bigquery schema from {source}",
37
+ engine="datacontract",
38
+ original_exception=e,
39
+ )
40
+ return convert_bigquery_schema(data_contract_specification, bigquery_schema)
41
+
42
+
43
+ def import_bigquery_from_api(
44
+ data_contract_specification: DataContractSpecification,
45
+ bigquery_tables: List[str],
46
+ bigquery_project: str,
47
+ bigquery_dataset: str,
48
+ ) -> DataContractSpecification:
49
+ try:
50
+ from google.cloud import bigquery
51
+ except ImportError as e:
52
+ raise DataContractException(
53
+ type="schema",
54
+ result="failed",
55
+ name="bigquery extra missing",
56
+ reason="Install the extra datacontract-cli[bigquery] to use bigquery",
57
+ engine="datacontract",
58
+ original_exception=e,
59
+ )
60
+
61
+ client = bigquery.Client(project=bigquery_project)
62
+
63
+ if bigquery_tables is None:
64
+ bigquery_tables = fetch_table_names(client, bigquery_dataset)
65
+
66
+ for table in bigquery_tables:
67
+ try:
68
+ api_table = client.get_table("{}.{}.{}".format(bigquery_project, bigquery_dataset, table))
69
+
70
+ except ValueError as e:
71
+ raise DataContractException(
72
+ type="schema",
73
+ result="failed",
74
+ name="Invalid table name for bigquery API",
75
+ reason=f"Tablename {table} is invalid for the bigquery API",
76
+ original_exception=e,
77
+ engine="datacontract",
78
+ )
79
+
80
+ if api_table is None:
81
+ raise DataContractException(
82
+ type="request",
83
+ result="failed",
84
+ name="Query bigtable Schema from API",
85
+ reason=f"Table {table} bnot found on bigtable schema Project {bigquery_project}, dataset {bigquery_dataset}.",
86
+ engine="datacontract",
87
+ )
88
+
89
+ convert_bigquery_schema(data_contract_specification, api_table.to_api_repr())
90
+
91
+ return data_contract_specification
92
+
93
+
94
+ def fetch_table_names(client, dataset: str) -> List[str]:
95
+ table_names = []
96
+ api_tables = client.list_tables(dataset)
97
+ for api_table in api_tables:
98
+ table_names.append(api_table.table_id)
99
+
100
+ return table_names
101
+
102
+
103
+ def convert_bigquery_schema(
104
+ data_contract_specification: DataContractSpecification, bigquery_schema: dict
105
+ ) -> DataContractSpecification:
106
+ if data_contract_specification.models is None:
107
+ data_contract_specification.models = {}
108
+
109
+ fields = import_table_fields(bigquery_schema.get("schema").get("fields"))
110
+
111
+ # Looking at actual export data, I guess this is always set and friendlyName isn't, though I couldn't say
112
+ # what exactly leads to friendlyName being set
113
+ table_id = bigquery_schema.get("tableReference").get("tableId")
114
+
115
+ data_contract_specification.models[table_id] = Model(
116
+ fields=fields, type=map_bigquery_type(bigquery_schema.get("type"))
117
+ )
118
+
119
+ # Copy the description, if it exists
120
+ if bigquery_schema.get("description") is not None:
121
+ data_contract_specification.models[table_id].description = bigquery_schema.get("description")
122
+
123
+ # Set the title from friendlyName if it exists
124
+ if bigquery_schema.get("friendlyName") is not None:
125
+ data_contract_specification.models[table_id].title = bigquery_schema.get("friendlyName")
126
+
127
+ return data_contract_specification
128
+
129
+
130
+ def import_table_fields(table_fields):
131
+ imported_fields = {}
132
+ for field in table_fields:
133
+ field_name = field.get("name")
134
+ imported_fields[field_name] = Field()
135
+ imported_fields[field_name].required = field.get("mode") == "REQUIRED"
136
+ imported_fields[field_name].description = field.get("description")
137
+
138
+ if field.get("type") == "RECORD":
139
+ imported_fields[field_name].type = "object"
140
+ imported_fields[field_name].fields = import_table_fields(field.get("fields"))
141
+ elif field.get("type") == "STRUCT":
142
+ imported_fields[field_name].type = "struct"
143
+ imported_fields[field_name].fields = import_table_fields(field.get("fields"))
144
+ elif field.get("type") == "RANGE":
145
+ # This is a range of date/datetime/timestamp but multiple values
146
+ # So we map it to an array
147
+ imported_fields[field_name].type = "array"
148
+ imported_fields[field_name].items = Field(
149
+ type=map_type_from_bigquery(field["rangeElementType"].get("type"))
150
+ )
151
+ else: # primitive type
152
+ imported_fields[field_name].type = map_type_from_bigquery(field.get("type"))
153
+
154
+ if field.get("type") == "STRING":
155
+ # in bigquery both string and bytes have maxLength but in the datacontracts
156
+ # spec it is only valid for strings
157
+ if field.get("maxLength") is not None:
158
+ imported_fields[field_name].maxLength = int(field.get("maxLength"))
159
+
160
+ if field.get("type") == "NUMERIC" or field.get("type") == "BIGNUMERIC":
161
+ if field.get("precision") is not None:
162
+ imported_fields[field_name].precision = int(field.get("precision"))
163
+
164
+ if field.get("scale") is not None:
165
+ imported_fields[field_name].scale = int(field.get("scale"))
166
+
167
+ return imported_fields
168
+
169
+
170
+ def map_type_from_bigquery(bigquery_type_str: str):
171
+ if bigquery_type_str == "STRING":
172
+ return "string"
173
+ elif bigquery_type_str == "BYTES":
174
+ return "bytes"
175
+ elif bigquery_type_str == "INTEGER":
176
+ return "int"
177
+ elif bigquery_type_str == "INT64":
178
+ return "bigint"
179
+ elif bigquery_type_str == "FLOAT":
180
+ return "float"
181
+ elif bigquery_type_str == "FLOAT64":
182
+ return "double"
183
+ elif bigquery_type_str == "BOOLEAN" or bigquery_type_str == "BOOL":
184
+ return "boolean"
185
+ elif bigquery_type_str == "TIMESTAMP":
186
+ return "timestamp"
187
+ elif bigquery_type_str == "DATE":
188
+ return "date"
189
+ elif bigquery_type_str == "TIME":
190
+ return "timestamp_ntz"
191
+ elif bigquery_type_str == "DATETIME":
192
+ return "timestamp"
193
+ elif bigquery_type_str == "NUMERIC":
194
+ return "numeric"
195
+ elif bigquery_type_str == "BIGNUMERIC":
196
+ return "double"
197
+ elif bigquery_type_str == "GEOGRAPHY":
198
+ return "object"
199
+ elif bigquery_type_str == "JSON":
200
+ return "object"
201
+ else:
202
+ raise DataContractException(
203
+ type="schema",
204
+ result="failed",
205
+ name="Map bigquery type to data contract type",
206
+ reason=f"Unsupported type {bigquery_type_str} in bigquery json definition.",
207
+ engine="datacontract",
208
+ )
209
+
210
+
211
+ def map_bigquery_type(bigquery_type: str) -> str:
212
+ if bigquery_type == "TABLE" or bigquery_type == "EXTERNAL" or bigquery_type == "SNAPSHOT":
213
+ return "table"
214
+ elif bigquery_type == "VIEW" or bigquery_type == "MATERIALIZED_VIEW":
215
+ return "view"
216
+ else:
217
+ logger = logging.getLogger(__name__)
218
+ logger.info(
219
+ f"Can't properly map bigquery table type '{bigquery_type}' to datacontracts model types. Mapping it to table."
220
+ )
221
+ return "table"