datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,143 @@
1
+ import os
2
+ from typing import Any, Dict, List
3
+
4
+ import duckdb
5
+
6
+ from datacontract.imports.importer import Importer
7
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model, Server
8
+
9
+
10
+ class CsvImporter(Importer):
11
+ def import_source(
12
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
13
+ ) -> DataContractSpecification:
14
+ return import_csv(data_contract_specification, source)
15
+
16
+
17
+ def import_csv(
18
+ data_contract_specification: DataContractSpecification, source: str, include_examples: bool = False
19
+ ) -> DataContractSpecification:
20
+ # use the file name as table name
21
+ table_name = os.path.splitext(os.path.basename(source))[0]
22
+
23
+ # use duckdb to auto detect format, columns, etc.
24
+ con = duckdb.connect(database=":memory:")
25
+ con.sql(
26
+ f"""CREATE VIEW "{table_name}" AS SELECT * FROM read_csv_auto('{source}', hive_partitioning=1, auto_type_candidates = ['BOOLEAN', 'INTEGER', 'BIGINT', 'DOUBLE', 'VARCHAR']);"""
27
+ )
28
+ dialect = con.sql(f"SELECT * FROM sniff_csv('{source}', sample_size = 1000);").fetchnumpy()
29
+ tbl = con.table(table_name)
30
+
31
+ if data_contract_specification.servers is None:
32
+ data_contract_specification.servers = {}
33
+
34
+ delimiter = None if dialect is None else dialect["Delimiter"][0]
35
+
36
+ if dialect is not None:
37
+ dc_types = [map_type_from_duckdb(x["type"]) for x in dialect["Columns"][0]]
38
+ else:
39
+ dc_types = [map_type_from_duckdb(str(x)) for x in tbl.dtypes]
40
+
41
+ data_contract_specification.servers["production"] = Server(
42
+ type="local", path=source, format="csv", delimiter=delimiter
43
+ )
44
+
45
+ rowcount = tbl.shape[0]
46
+
47
+ tallies = dict()
48
+ for row in tbl.describe().fetchall():
49
+ if row[0] not in ["count", "max", "min"]:
50
+ continue
51
+ for i in range(tbl.shape[1]):
52
+ tallies[(row[0], tbl.columns[i])] = row[i + 1] if row[0] != "count" else int(row[i + 1])
53
+
54
+ samples: Dict[str, List] = dict()
55
+ for i in range(tbl.shape[1]):
56
+ field_name = tbl.columns[i]
57
+ if tallies[("count", field_name)] > 0 and tbl.dtypes[i] not in ["BOOLEAN", "BLOB"]:
58
+ sql = f"""SELECT DISTINCT "{field_name}" FROM "{table_name}" WHERE "{field_name}" IS NOT NULL USING SAMPLE 5 ROWS;"""
59
+ samples[field_name] = [x[0] for x in con.sql(sql).fetchall()]
60
+
61
+ formats: Dict[str, str] = dict()
62
+ for i in range(tbl.shape[1]):
63
+ field_name = tbl.columns[i]
64
+ if tallies[("count", field_name)] > 0 and tbl.dtypes[i] == "VARCHAR":
65
+ sql = f"""SELECT
66
+ count_if("{field_name}" IS NOT NULL) as count,
67
+ count_if(regexp_matches("{field_name}", '^[\\w-\\.]+@([\\w-]+\\.)+[\\w-]{{2,4}}$')) as email,
68
+ count_if(regexp_matches("{field_name}", '^[[a-z0-9]{{8}}-?[a-z0-9]{{4}}-?[a-z0-9]{{4}}-?[a-z0-9]{{4}}-?[a-z0-9]{{12}}]')) as uuid
69
+ FROM "{table_name}";
70
+ """
71
+ res = con.sql(sql).fetchone()
72
+ if res[1] == res[0]:
73
+ formats[field_name] = "email"
74
+ elif res[2] == res[0]:
75
+ formats[field_name] = "uuid"
76
+
77
+ fields = {}
78
+ for i in range(tbl.shape[1]):
79
+ field_name = tbl.columns[i]
80
+ dc_type = dc_types[i]
81
+
82
+ ## specifying "integer" rather than "bigint" looks nicer
83
+ if (
84
+ dc_type == "bigint"
85
+ and tallies[("max", field_name)] <= 2147483647
86
+ and tallies[("min", field_name)] >= -2147483648
87
+ ):
88
+ dc_type = "integer"
89
+
90
+ field: Dict[str, Any] = {"type": dc_type, "format": formats.get(field_name, None)}
91
+
92
+ if tallies[("count", field_name)] == rowcount:
93
+ field["required"] = True
94
+ if dc_type not in ["boolean", "bytes"]:
95
+ distinct_values = tbl.count(f'DISTINCT "{field_name}"').fetchone()[0] # type: ignore
96
+ if distinct_values > 0 and distinct_values == tallies[("count", field_name)]:
97
+ field["unique"] = True
98
+ s = samples.get(field_name, None)
99
+ if s is not None:
100
+ field["examples"] = s
101
+ if dc_type in ["integer", "bigint", "float", "double"]:
102
+ field["minimum"] = tallies[("min", field_name)]
103
+ field["maximum"] = tallies[("max", field_name)]
104
+
105
+ fields[field_name] = field
106
+
107
+ model_examples = None
108
+ if include_examples:
109
+ model_examples = con.sql(f"""SELECT DISTINCT * FROM "{table_name}" USING SAMPLE 5 ROWS;""").fetchall()
110
+
111
+ data_contract_specification.models[table_name] = Model(
112
+ type="table", description="Generated model of " + source, fields=fields, examples=model_examples
113
+ )
114
+
115
+ return data_contract_specification
116
+
117
+
118
+ _duck_db_types = {
119
+ "BOOLEAN": "boolean",
120
+ "BLOB": "bytes",
121
+ "TINYINT": "integer",
122
+ "SMALLINT": "integer",
123
+ "INTEGER": "integer",
124
+ "BIGINT": "bigint",
125
+ "UTINYINT": "integer",
126
+ "USMALLINT": "integer",
127
+ "UINTEGER": "integer",
128
+ "UBIGINT": "bigint",
129
+ "FLOAT": "float",
130
+ "DOUBLE": "double",
131
+ "VARCHAR": "string",
132
+ "TIMESTAMP": "timestamp",
133
+ "DATE": "date",
134
+ # TODO: Add support for NULL
135
+ }
136
+
137
+
138
+ def map_type_from_duckdb(sql_type: None | str):
139
+ if sql_type is None:
140
+ return None
141
+
142
+ sql_type_normed = sql_type.upper().strip()
143
+ return _duck_db_types.get(sql_type_normed, "string")
@@ -0,0 +1,112 @@
1
+ from typing import List
2
+
3
+ from pydbml import Database, PyDBML
4
+ from pyparsing import ParseException
5
+
6
+ from datacontract.imports.importer import Importer
7
+ from datacontract.imports.sql_importer import map_type_from_sql
8
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
9
+ from datacontract.model.exceptions import DataContractException
10
+
11
+
12
+ class DBMLImporter(Importer):
13
+ def import_source(
14
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
15
+ ) -> DataContractSpecification:
16
+ data_contract_specification = import_dbml_from_source(
17
+ data_contract_specification,
18
+ source,
19
+ import_args.get("dbml_schema"),
20
+ import_args.get("dbml_table"),
21
+ )
22
+ return data_contract_specification
23
+
24
+
25
+ def import_dbml_from_source(
26
+ data_contract_specification: DataContractSpecification,
27
+ source: str,
28
+ import_schemas: List[str],
29
+ import_tables: List[str],
30
+ ) -> DataContractSpecification:
31
+ try:
32
+ with open(source, "r") as file:
33
+ dbml_schema = PyDBML(file)
34
+ except ParseException as e:
35
+ raise DataContractException(
36
+ type="schema",
37
+ name="Parse DBML schema",
38
+ reason=f"Failed to parse DBML schema from {source}",
39
+ engine="datacontract",
40
+ original_exception=e,
41
+ )
42
+
43
+ return convert_dbml(data_contract_specification, dbml_schema, import_schemas, import_tables)
44
+
45
+
46
+ def convert_dbml(
47
+ data_contract_specification: DataContractSpecification,
48
+ dbml_schema: Database,
49
+ import_schemas: List[str],
50
+ import_tables: List[str],
51
+ ) -> DataContractSpecification:
52
+ if dbml_schema.project is not None:
53
+ data_contract_specification.info.title = dbml_schema.project.name
54
+
55
+ if data_contract_specification.models is None:
56
+ data_contract_specification.models = {}
57
+
58
+ for table in dbml_schema.tables:
59
+ schema_name = table.schema
60
+ table_name = table.name
61
+
62
+ # Skip if import schemas or table names are defined
63
+ # and the current table doesn't match
64
+ # if empty no filtering is done
65
+ if import_schemas and schema_name not in import_schemas:
66
+ continue
67
+
68
+ if import_tables and table_name not in import_tables:
69
+ continue
70
+
71
+ fields = import_table_fields(table, dbml_schema.refs)
72
+
73
+ data_contract_specification.models[table_name] = Model(
74
+ fields=fields, namespace=schema_name, description=table.note.text
75
+ )
76
+
77
+ return data_contract_specification
78
+
79
+
80
+ def import_table_fields(table, references) -> dict[str, Field]:
81
+ imported_fields = {}
82
+ for field in table.columns:
83
+ field_name = field.name
84
+ imported_fields[field_name] = Field()
85
+ imported_fields[field_name].required = field.not_null
86
+ imported_fields[field_name].description = field.note.text
87
+ imported_fields[field_name].primaryKey = field.pk
88
+ imported_fields[field_name].unique = field.unique
89
+ # This is an assumption, that these might be valid SQL Types, since
90
+ # DBML doesn't really enforce anything other than 'no spaces' in column types
91
+ imported_fields[field_name].type = map_type_from_sql(field.type)
92
+
93
+ ref = get_reference(field, references)
94
+ if ref is not None:
95
+ imported_fields[field_name].references = ref
96
+
97
+ return imported_fields
98
+
99
+
100
+ def get_reference(field, references):
101
+ result = None
102
+ for ref in references:
103
+ ref_table_name = ref.col1[0].table.name
104
+ ref_col_name = ref.col1[0].name
105
+ field_table_name = field.table.name
106
+ field_name = field.name
107
+
108
+ if ref_table_name == field_table_name and ref_col_name == field_name:
109
+ result = f"{ref.col2[0].table.name}.{ref.col2[0].name}"
110
+ return result
111
+
112
+ return result
@@ -0,0 +1,240 @@
1
+ import json
2
+ from typing import TypedDict
3
+
4
+ from dbt.artifacts.resources.v1.components import ColumnInfo
5
+ from dbt.contracts.graph.manifest import Manifest
6
+ from dbt.contracts.graph.nodes import GenericTestNode, ManifestNode, ModelNode
7
+ from dbt_common.contracts.constraints import ConstraintType
8
+
9
+ from datacontract.imports.bigquery_importer import map_type_from_bigquery
10
+ from datacontract.imports.importer import Importer
11
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
12
+
13
+
14
+ class DBTImportArgs(TypedDict, total=False):
15
+ """
16
+ A dictionary representing arguments for importing DBT models.
17
+ Makes the DBT Importer more customizable by allowing for flexible filtering
18
+ of models and their properties, through wrapping or extending.
19
+
20
+ Attributes:
21
+ dbt_models: The keys of models to be used in contract. All as default.
22
+ resource_types: Nodes listed in resource_types are kept while importing. model as default.
23
+ """
24
+
25
+ dbt_nodes: list[str]
26
+ resource_types: list[str]
27
+
28
+
29
+ class DbtManifestImporter(Importer):
30
+ def import_source(
31
+ self,
32
+ data_contract_specification: DataContractSpecification,
33
+ source: str,
34
+ import_args: DBTImportArgs,
35
+ ) -> DataContractSpecification:
36
+ manifest = read_dbt_manifest(manifest_path=source)
37
+ return import_dbt_manifest(
38
+ data_contract_specification=data_contract_specification,
39
+ manifest=manifest,
40
+ dbt_nodes=import_args.get("dbt_model", []),
41
+ resource_types=import_args.get("resource_types", ["model"]),
42
+ )
43
+
44
+
45
+ def read_dbt_manifest(manifest_path: str) -> Manifest:
46
+ """Read a manifest from file."""
47
+ with open(file=manifest_path, mode="r", encoding="utf-8") as f:
48
+ manifest_dict: dict = json.load(f)
49
+ manifest = Manifest.from_dict(manifest_dict)
50
+ manifest.build_parent_and_child_maps()
51
+ return manifest
52
+
53
+
54
+ def _get_primary_keys(manifest: Manifest, node: ManifestNode) -> list[str]:
55
+ node_unique_id = node.unique_id
56
+ if isinstance(node, ModelNode):
57
+ test_nodes = []
58
+ for node_id in manifest.child_map.get(node_unique_id, []):
59
+ test_node = manifest.nodes.get(node_id)
60
+ if not test_node or test_node.resource_type != "test":
61
+ continue
62
+ if not isinstance(test_node, GenericTestNode):
63
+ continue
64
+ if test_node.config.where is not None:
65
+ continue
66
+ test_nodes.append(test_node)
67
+ return node.infer_primary_key(test_nodes)
68
+ return []
69
+
70
+
71
+ def _get_references(manifest: Manifest, node: ManifestNode) -> dict[str, str]:
72
+ node_unique_id = node.unique_id
73
+ references = {}
74
+ for node_id in manifest.child_map.get(node_unique_id, []):
75
+ test_node = manifest.nodes.get(node_id)
76
+ if not test_node or test_node.resource_type != "test":
77
+ continue
78
+ if not isinstance(test_node, GenericTestNode):
79
+ continue
80
+ if test_node.test_metadata.name != "relationships":
81
+ continue
82
+ if test_node.config.where is not None:
83
+ continue
84
+ if test_node.attached_node != node_unique_id:
85
+ continue
86
+ relationship_target_node_id = [n for n in test_node.depends_on.nodes if n != node_unique_id][0]
87
+ relationship_target_node = manifest.nodes.get(relationship_target_node_id)
88
+ references[f"{node.name}.{test_node.column_name}"] = (
89
+ f"""{relationship_target_node.name}.{test_node.test_metadata.kwargs["field"]}"""
90
+ )
91
+ return references
92
+
93
+
94
+ def import_dbt_manifest(
95
+ data_contract_specification: DataContractSpecification,
96
+ manifest: Manifest,
97
+ dbt_nodes: list[str],
98
+ resource_types: list[str],
99
+ ) -> DataContractSpecification:
100
+ """
101
+ Extracts all relevant information from the manifest,
102
+ and puts it in a data contract specification.
103
+ """
104
+ data_contract_specification.info.title = manifest.metadata.project_name
105
+ data_contract_specification.info.dbt_version = manifest.metadata.dbt_version
106
+ adapter_type = manifest.metadata.adapter_type
107
+ data_contract_specification.models = data_contract_specification.models or {}
108
+ for node in manifest.nodes.values():
109
+ # Only intressted in processing models.
110
+ if node.resource_type not in resource_types:
111
+ continue
112
+
113
+ # To allow args stored in dbt_models to filter relevant models.
114
+ # If dbt_models is empty, use all models.
115
+ if dbt_nodes and node.name not in dbt_nodes:
116
+ continue
117
+
118
+ model_unique_id = node.unique_id
119
+ primary_keys = _get_primary_keys(manifest, node)
120
+ references = _get_references(manifest, node)
121
+
122
+ primary_key = None
123
+ if len(primary_keys) == 1:
124
+ primary_key = primary_keys[0]
125
+
126
+ dc_model = Model(
127
+ description=node.description,
128
+ tags=node.tags,
129
+ fields=create_fields(
130
+ manifest,
131
+ model_unique_id=model_unique_id,
132
+ columns=node.columns,
133
+ primary_key_name=primary_key,
134
+ references=references,
135
+ adapter_type=adapter_type,
136
+ ),
137
+ )
138
+ if len(primary_keys) > 1:
139
+ dc_model.primaryKey = primary_keys
140
+
141
+ data_contract_specification.models[node.name] = dc_model
142
+
143
+ return data_contract_specification
144
+
145
+
146
+ def convert_data_type_by_adapter_type(data_type: str, adapter_type: str) -> str:
147
+ if adapter_type == "bigquery":
148
+ return map_type_from_bigquery(data_type)
149
+ return data_type
150
+
151
+
152
+ def create_fields(
153
+ manifest: Manifest,
154
+ model_unique_id: str,
155
+ columns: dict[str, ColumnInfo],
156
+ primary_key_name: str,
157
+ references: dict[str, str],
158
+ adapter_type: str,
159
+ ) -> dict[str, Field]:
160
+ fields = {
161
+ column.name: create_field(manifest, model_unique_id, column, primary_key_name, references, adapter_type)
162
+ for column in columns.values()
163
+ }
164
+ return fields
165
+
166
+
167
+ def get_column_tests(manifest: Manifest, model_name: str, column_name: str) -> list[dict[str, str]]:
168
+ column_tests = []
169
+ model_node = manifest.nodes.get(model_name)
170
+ if not model_node:
171
+ raise ValueError(f"Model {model_name} not found in manifest.")
172
+
173
+ model_unique_id = model_node.unique_id
174
+ test_ids = manifest.child_map.get(model_unique_id, [])
175
+
176
+ for test_id in test_ids:
177
+ test_node = manifest.nodes.get(test_id)
178
+ if not test_node or test_node.resource_type != "test":
179
+ continue
180
+
181
+ if not isinstance(test_node, GenericTestNode):
182
+ continue
183
+
184
+ if test_node.column_name != column_name:
185
+ continue
186
+
187
+ if test_node.config.where is not None:
188
+ continue
189
+
190
+ column_tests.append(
191
+ {
192
+ "test_name": test_node.name,
193
+ "test_type": test_node.test_metadata.name,
194
+ "column": test_node.column_name,
195
+ }
196
+ )
197
+ return column_tests
198
+
199
+
200
+ def create_field(
201
+ manifest: Manifest,
202
+ model_unique_id: str,
203
+ column: ColumnInfo,
204
+ primary_key_name: str,
205
+ references: dict[str, str],
206
+ adapter_type: str,
207
+ ) -> Field:
208
+ column_type = convert_data_type_by_adapter_type(column.data_type, adapter_type) if column.data_type else ""
209
+ field = Field(
210
+ description=column.description,
211
+ type=column_type,
212
+ tags=column.tags,
213
+ )
214
+
215
+ all_tests = get_column_tests(manifest, model_unique_id, column.name)
216
+
217
+ required = False
218
+ if any(constraint.type == ConstraintType.not_null for constraint in column.constraints):
219
+ required = True
220
+ if [test for test in all_tests if test["test_type"] == "not_null"]:
221
+ required = True
222
+ if required:
223
+ field.required = required
224
+
225
+ unique = False
226
+ if any(constraint.type == ConstraintType.unique for constraint in column.constraints):
227
+ unique = True
228
+ if [test for test in all_tests if test["test_type"] == "unique"]:
229
+ unique = True
230
+ if unique:
231
+ field.unique = unique
232
+
233
+ if column.name == primary_key_name:
234
+ field.primaryKey = True
235
+
236
+ references_key = f"{manifest.nodes[model_unique_id].name}.{column.name}"
237
+ if references_key in references:
238
+ field.references = references[references_key]
239
+
240
+ return field