datacontract-cli 0.10.16__py3-none-any.whl → 0.10.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (48) hide show
  1. datacontract/breaking/breaking_rules.py +4 -0
  2. datacontract/cli.py +49 -32
  3. datacontract/data_contract.py +14 -11
  4. datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
  5. datacontract/engines/soda/check_soda_execute.py +9 -4
  6. datacontract/engines/soda/connections/databricks.py +12 -3
  7. datacontract/engines/soda/connections/duckdb.py +22 -9
  8. datacontract/export/data_caterer_converter.py +20 -7
  9. datacontract/export/dbml_converter.py +2 -2
  10. datacontract/export/dbt_converter.py +41 -16
  11. datacontract/export/exporter.py +6 -2
  12. datacontract/export/exporter_factory.py +48 -14
  13. datacontract/export/iceberg_converter.py +3 -3
  14. datacontract/export/markdown_converter.py +208 -0
  15. datacontract/export/odcs_v3_exporter.py +6 -0
  16. datacontract/export/sodacl_converter.py +22 -5
  17. datacontract/export/sql_converter.py +1 -1
  18. datacontract/export/sql_type_converter.py +28 -2
  19. datacontract/export/sqlalchemy_converter.py +3 -1
  20. datacontract/imports/csv_importer.py +89 -0
  21. datacontract/imports/dbml_importer.py +1 -1
  22. datacontract/imports/dbt_importer.py +94 -12
  23. datacontract/imports/importer.py +1 -0
  24. datacontract/imports/importer_factory.py +5 -0
  25. datacontract/imports/odcs_v2_importer.py +1 -1
  26. datacontract/imports/odcs_v3_importer.py +1 -1
  27. datacontract/imports/sql_importer.py +1 -1
  28. datacontract/init/init_template.py +20 -0
  29. datacontract/integration/datamesh_manager.py +15 -9
  30. datacontract/lint/linters/field_reference_linter.py +10 -1
  31. datacontract/lint/resolve.py +48 -14
  32. datacontract/lint/schema.py +10 -3
  33. datacontract/model/data_contract_specification.py +13 -4
  34. datacontract/model/run.py +1 -0
  35. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  36. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  37. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  38. datacontract/templates/datacontract.html +20 -1
  39. datacontract/templates/partials/definition.html +15 -5
  40. datacontract/templates/partials/model_field.html +10 -1
  41. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/METADATA +477 -343
  42. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/RECORD +46 -42
  43. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/WHEEL +1 -1
  44. datacontract/init/download_datacontract_file.py +0 -17
  45. datacontract/integration/opentelemetry.py +0 -103
  46. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/LICENSE +0 -0
  47. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/entry_points.txt +0 -0
  48. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/top_level.txt +0 -0
@@ -48,7 +48,9 @@ def load_module_class(module_path, class_name):
48
48
  exporter_factory = ExporterFactory()
49
49
 
50
50
  exporter_factory.register_lazy_exporter(
51
- name=ExportFormat.avro, module_path="datacontract.export.avro_converter", class_name="AvroExporter"
51
+ name=ExportFormat.avro,
52
+ module_path="datacontract.export.avro_converter",
53
+ class_name="AvroExporter",
52
54
  )
53
55
 
54
56
  exporter_factory.register_lazy_exporter(
@@ -70,15 +72,21 @@ exporter_factory.register_lazy_exporter(
70
72
  )
71
73
 
72
74
  exporter_factory.register_lazy_exporter(
73
- name=ExportFormat.dbml, module_path="datacontract.export.dbml_converter", class_name="DbmlExporter"
75
+ name=ExportFormat.dbml,
76
+ module_path="datacontract.export.dbml_converter",
77
+ class_name="DbmlExporter",
74
78
  )
75
79
 
76
80
  exporter_factory.register_lazy_exporter(
77
- name=ExportFormat.rdf, module_path="datacontract.export.rdf_converter", class_name="RdfExporter"
81
+ name=ExportFormat.rdf,
82
+ module_path="datacontract.export.rdf_converter",
83
+ class_name="RdfExporter",
78
84
  )
79
85
 
80
86
  exporter_factory.register_lazy_exporter(
81
- name=ExportFormat.dbt, module_path="datacontract.export.dbt_converter", class_name="DbtExporter"
87
+ name=ExportFormat.dbt,
88
+ module_path="datacontract.export.dbt_converter",
89
+ class_name="DbtExporter",
82
90
  )
83
91
 
84
92
  exporter_factory.register_lazy_exporter(
@@ -100,19 +108,27 @@ exporter_factory.register_lazy_exporter(
100
108
  )
101
109
 
102
110
  exporter_factory.register_lazy_exporter(
103
- name=ExportFormat.odcs_v2, module_path="datacontract.export.odcs_v2_exporter", class_name="OdcsV2Exporter"
111
+ name=ExportFormat.odcs_v2,
112
+ module_path="datacontract.export.odcs_v2_exporter",
113
+ class_name="OdcsV2Exporter",
104
114
  )
105
115
 
106
116
  exporter_factory.register_lazy_exporter(
107
- name=ExportFormat.odcs_v3, module_path="datacontract.export.odcs_v3_exporter", class_name="OdcsV3Exporter"
117
+ name=ExportFormat.odcs_v3,
118
+ module_path="datacontract.export.odcs_v3_exporter",
119
+ class_name="OdcsV3Exporter",
108
120
  )
109
121
 
110
122
  exporter_factory.register_lazy_exporter(
111
- name=ExportFormat.odcs, module_path="datacontract.export.odcs_v3_exporter", class_name="OdcsV3Exporter"
123
+ name=ExportFormat.odcs,
124
+ module_path="datacontract.export.odcs_v3_exporter",
125
+ class_name="OdcsV3Exporter",
112
126
  )
113
127
 
114
128
  exporter_factory.register_lazy_exporter(
115
- name=ExportFormat.go, module_path="datacontract.export.go_converter", class_name="GoExporter"
129
+ name=ExportFormat.go,
130
+ module_path="datacontract.export.go_converter",
131
+ class_name="GoExporter",
116
132
  )
117
133
 
118
134
  exporter_factory.register_lazy_exporter(
@@ -122,7 +138,9 @@ exporter_factory.register_lazy_exporter(
122
138
  )
123
139
 
124
140
  exporter_factory.register_lazy_exporter(
125
- name=ExportFormat.html, module_path="datacontract.export.html_export", class_name="HtmlExporter"
141
+ name=ExportFormat.html,
142
+ module_path="datacontract.export.html_export",
143
+ class_name="HtmlExporter",
126
144
  )
127
145
 
128
146
  exporter_factory.register_lazy_exporter(
@@ -138,15 +156,21 @@ exporter_factory.register_lazy_exporter(
138
156
  )
139
157
 
140
158
  exporter_factory.register_lazy_exporter(
141
- name=ExportFormat.sodacl, module_path="datacontract.export.sodacl_converter", class_name="SodaExporter"
159
+ name=ExportFormat.sodacl,
160
+ module_path="datacontract.export.sodacl_converter",
161
+ class_name="SodaExporter",
142
162
  )
143
163
 
144
164
  exporter_factory.register_lazy_exporter(
145
- name=ExportFormat.sql, module_path="datacontract.export.sql_converter", class_name="SqlExporter"
165
+ name=ExportFormat.sql,
166
+ module_path="datacontract.export.sql_converter",
167
+ class_name="SqlExporter",
146
168
  )
147
169
 
148
170
  exporter_factory.register_lazy_exporter(
149
- name=ExportFormat.sql_query, module_path="datacontract.export.sql_converter", class_name="SqlQueryExporter"
171
+ name=ExportFormat.sql_query,
172
+ module_path="datacontract.export.sql_converter",
173
+ class_name="SqlQueryExporter",
150
174
  )
151
175
 
152
176
  exporter_factory.register_lazy_exporter(
@@ -156,7 +180,9 @@ exporter_factory.register_lazy_exporter(
156
180
  )
157
181
 
158
182
  exporter_factory.register_lazy_exporter(
159
- name=ExportFormat.spark, module_path="datacontract.export.spark_converter", class_name="SparkExporter"
183
+ name=ExportFormat.spark,
184
+ module_path="datacontract.export.spark_converter",
185
+ class_name="SparkExporter",
160
186
  )
161
187
 
162
188
  exporter_factory.register_lazy_exporter(
@@ -166,7 +192,15 @@ exporter_factory.register_lazy_exporter(
166
192
  )
167
193
 
168
194
  exporter_factory.register_lazy_exporter(
169
- name=ExportFormat.dcs, module_path="datacontract.export.dcs_exporter", class_name="DcsExporter"
195
+ name=ExportFormat.dcs,
196
+ module_path="datacontract.export.dcs_exporter",
197
+ class_name="DcsExporter",
198
+ )
199
+
200
+ exporter_factory.register_lazy_exporter(
201
+ name=ExportFormat.markdown,
202
+ module_path="datacontract.export.markdown_converter",
203
+ class_name="MarkdownExporter",
170
204
  )
171
205
 
172
206
  exporter_factory.register_lazy_exporter(
@@ -105,14 +105,14 @@ def make_field(field_name, field):
105
105
  # Note 2: field_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values.
106
106
  # also, the Iceberg sdk catalog code will re-set the fieldIDs prior to executing any table operations on the schema
107
107
  # ref: https://github.com/apache/iceberg-python/pull/1072
108
- return types.NestedField(field_id=0, name=field_name, field_type=field_type, required=field.required)
108
+ return types.NestedField(field_id=0, name=field_name, field_type=field_type, required=field.required is True)
109
109
 
110
110
 
111
111
  def make_list(item):
112
112
  field_type = get_field_type(item)
113
113
 
114
114
  # element_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
115
- return types.ListType(element_id=0, element_type=field_type, element_required=item.required)
115
+ return types.ListType(element_id=0, element_type=field_type, element_required=item.required is True)
116
116
 
117
117
 
118
118
  def make_map(field):
@@ -121,7 +121,7 @@ def make_map(field):
121
121
 
122
122
  # key_id and value_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
123
123
  return types.MapType(
124
- key_id=0, key_type=key_type, value_id=0, value_type=value_type, value_required=field.values.required
124
+ key_id=0, key_type=key_type, value_id=0, value_type=value_type, value_required=field.values.required is True
125
125
  )
126
126
 
127
127
 
@@ -0,0 +1,208 @@
1
+ from typing import Dict
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from datacontract.export.exporter import Exporter
6
+ from datacontract.model.data_contract_specification import (
7
+ DataContractSpecification,
8
+ Definition,
9
+ Field,
10
+ Model,
11
+ Server,
12
+ ServiceLevel,
13
+ )
14
+
15
+
16
+ class MarkdownExporter(Exporter):
17
+ """Exporter implementation for converting data contracts to Markdown."""
18
+
19
+ def export(
20
+ self,
21
+ data_contract: DataContractSpecification,
22
+ model: Model,
23
+ server: str,
24
+ sql_server_type: str,
25
+ export_args: dict,
26
+ ) -> str:
27
+ """Exports a data contract to Markdown format."""
28
+ return to_markdown(data_contract)
29
+
30
+
31
+ def to_markdown(data_contract: DataContractSpecification) -> str:
32
+ """
33
+ Convert a data contract to its Markdown representation.
34
+
35
+ Args:
36
+ data_contract (DataContractSpecification): The data contract to convert.
37
+
38
+ Returns:
39
+ str: The Markdown representation of the data contract.
40
+ """
41
+ markdown_parts = [
42
+ f"# {data_contract.id}",
43
+ "## Info",
44
+ obj_attributes_to_markdown(data_contract.info),
45
+ "",
46
+ "## Servers",
47
+ servers_to_markdown(data_contract.servers),
48
+ "",
49
+ "## Terms",
50
+ obj_attributes_to_markdown(data_contract.terms),
51
+ "",
52
+ "## Models",
53
+ models_to_markdown(data_contract.models),
54
+ "",
55
+ "## Definitions",
56
+ definitions_to_markdown(data_contract.definitions),
57
+ "",
58
+ "## Service levels",
59
+ service_level_to_markdown(data_contract.servicelevels),
60
+ ]
61
+ return "\n".join(markdown_parts)
62
+
63
+
64
+ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_in_table_cell: bool = False) -> str:
65
+ if not obj:
66
+ return ""
67
+ if is_in_table_cell:
68
+ bullet_char = "•"
69
+ newline_char = "<br>"
70
+ else:
71
+ bullet_char = "-"
72
+ newline_char = "\n"
73
+ obj_model = obj.model_dump(exclude_unset=True, exclude=excluded_fields)
74
+ description_value = obj_model.pop("description", None)
75
+ attributes = [
76
+ (f"{bullet_char} `{attr}`" if value is True else f"{bullet_char} **{attr}:** {value}")
77
+ for attr, value in obj_model.items()
78
+ if value
79
+ ]
80
+ description = f"*{description_to_markdown(description_value)}*"
81
+ return newline_char.join([description] + attributes)
82
+
83
+
84
+ def servers_to_markdown(servers: Dict[str, Server]) -> str:
85
+ if not servers:
86
+ return ""
87
+ markdown_parts = [
88
+ "| Name | Type | Attributes |",
89
+ "| ---- | ---- | ---------- |",
90
+ ]
91
+ for server_name, server in servers.items():
92
+ markdown_parts.append(
93
+ f"| {server_name} | {server.type or ''} | {obj_attributes_to_markdown(server, {'type'}, True)} |"
94
+ )
95
+ return "\n".join(markdown_parts)
96
+
97
+
98
+ def models_to_markdown(models: Dict[str, Model]) -> str:
99
+ return "\n".join(model_to_markdown(model_name, model) for model_name, model in models.items())
100
+
101
+
102
+ def model_to_markdown(model_name: str, model: Model) -> str:
103
+ """
104
+ Generate Markdown representation for a specific model.
105
+
106
+ Args:
107
+ model_name (str): The name of the model.
108
+ model (Model): The model object.
109
+
110
+ Returns:
111
+ str: The Markdown representation of the model.
112
+ """
113
+ parts = [
114
+ f"### {model_name}",
115
+ f"*{description_to_markdown(model.description)}*",
116
+ "",
117
+ "| Field | Type | Attributes |",
118
+ "| ----- | ---- | ---------- |",
119
+ ]
120
+
121
+ # Append generated field rows
122
+ parts.append(fields_to_markdown(model.fields))
123
+ return "\n".join(parts)
124
+
125
+
126
+ def fields_to_markdown(
127
+ fields: Dict[str, Field],
128
+ level: int = 0,
129
+ ) -> str:
130
+ """
131
+ Generate Markdown table rows for all fields in a model.
132
+
133
+ Args:
134
+ fields (Dict[str, Field]): The fields to process.
135
+ level (int): The level of nesting for indentation.
136
+
137
+ Returns:
138
+ str: A Markdown table rows for the fields.
139
+ """
140
+
141
+ return "\n".join(field_to_markdown(field_name, field, level) for field_name, field in fields.items())
142
+
143
+
144
+ def field_to_markdown(field_name: str, field: Field, level: int = 0) -> str:
145
+ """
146
+ Generate Markdown table rows for a single field, including nested structures.
147
+
148
+ Args:
149
+ field_name (str): The name of the field.
150
+ field (Field): The field object.
151
+ level (int): The level of nesting for indentation.
152
+
153
+ Returns:
154
+ str: A Markdown table rows for the field.
155
+ """
156
+ tabs = "&numsp;" * level
157
+ arrow = "&#x21b3;" if level > 0 else ""
158
+ column_name = f"{tabs}{arrow} {field_name}"
159
+
160
+ attributes = obj_attributes_to_markdown(field, {"type", "fields", "items", "keys", "values"}, True)
161
+
162
+ rows = [f"| {column_name} | {field.type} | {attributes} |"]
163
+
164
+ # Recursively handle nested fields, array, map
165
+ if field.fields:
166
+ rows.append(fields_to_markdown(field.fields, level + 1))
167
+ if field.items:
168
+ rows.append(field_to_markdown("items", field.items, level + 1))
169
+ if field.keys:
170
+ rows.append(field_to_markdown("keys", field.keys, level + 1))
171
+ if field.values:
172
+ rows.append(field_to_markdown("values", field.values, level + 1))
173
+
174
+ return "\n".join(rows)
175
+
176
+
177
+ def definitions_to_markdown(definitions: Dict[str, Definition]) -> str:
178
+ if not definitions:
179
+ return ""
180
+ markdown_parts = [
181
+ "| Name | Type | Domain | Attributes |",
182
+ "| ---- | ---- | ------ | ---------- |",
183
+ ]
184
+ for definition_name, definition in definitions.items():
185
+ markdown_parts.append(
186
+ f"| {definition_name} | {definition.type or ''} | {definition.domain or ''} | {obj_attributes_to_markdown(definition, {'name', 'type', 'domain'}, True)} |",
187
+ )
188
+ return "\n".join(markdown_parts)
189
+
190
+
191
+ def service_level_to_markdown(service_level: ServiceLevel | None) -> str:
192
+ if not service_level:
193
+ return ""
194
+ sections = {
195
+ "Availability": service_level.availability,
196
+ "Retention": service_level.retention,
197
+ "Latency": service_level.latency,
198
+ "Freshness": service_level.freshness,
199
+ "Frequency": service_level.frequency,
200
+ "Support": service_level.support,
201
+ "Backup": service_level.backup,
202
+ }
203
+ result = [f"### {name}\n{obj_attributes_to_markdown(attr)}\n" for name, attr in sections.items() if attr]
204
+ return "\n".join(result)
205
+
206
+
207
+ def description_to_markdown(description: str | None) -> str:
208
+ return (description or "No description.").replace("\n", "<br>")
@@ -226,6 +226,12 @@ def to_property(field_name: str, field: Field) -> dict:
226
226
  property["examples"] = field.examples
227
227
  if field.example is not None:
228
228
  property["examples"] = [field.example]
229
+ if field.primaryKey is not None and field.primaryKey:
230
+ property["primaryKey"] = field.primaryKey
231
+ property["primaryKeyPosition"] = 1
232
+ if field.primary is not None and field.primary:
233
+ property["primaryKey"] = field.primary
234
+ property["primaryKeyPosition"] = 1
229
235
 
230
236
  property["customProperties"] = []
231
237
  if field.model_extra is not None:
@@ -30,9 +30,10 @@ def to_sodacl_yaml(
30
30
 
31
31
  def to_checks(model_key, model_value, server_type: str, check_types: bool):
32
32
  checks = []
33
+ model_name = to_model_name(model_key, model_value, server_type)
33
34
  fields = model_value.fields
34
35
 
35
- quote_field_name = server_type in ["postgres"]
36
+ quote_field_name = server_type in ["postgres", "sqlserver"]
36
37
 
37
38
  for field_name, field in fields.items():
38
39
  checks.append(check_field_is_present(field_name))
@@ -62,25 +63,41 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
62
63
  if field.enum is not None and len(field.enum) > 0:
63
64
  checks.append(check_field_enum(field_name, field.enum, quote_field_name))
64
65
  if field.quality is not None and len(field.quality) > 0:
65
- quality_list = check_quality_list(model_key, field_name, field.quality)
66
+ quality_list = check_quality_list(model_name, field_name, field.quality)
66
67
  if (quality_list is not None) and len(quality_list) > 0:
67
68
  checks.append(quality_list)
68
69
  # TODO references: str = None
69
70
  # TODO format
70
71
 
71
72
  if model_value.quality is not None and len(model_value.quality) > 0:
72
- quality_list = check_quality_list(model_key, None, model_value.quality)
73
+ quality_list = check_quality_list(model_name, None, model_value.quality)
73
74
  if (quality_list is not None) and len(quality_list) > 0:
74
75
  checks.append(quality_list)
75
76
 
76
- checks_for_model_key = f"checks for {model_key}"
77
+ checks_for_model_key = f"checks for {model_name}"
77
78
 
78
79
  if quote_field_name:
79
- checks_for_model_key = f'checks for "{model_key}"'
80
+ checks_for_model_key = f'checks for "{model_name}"'
80
81
 
81
82
  return checks_for_model_key, checks
82
83
 
83
84
 
85
+ def to_model_name(model_key, model_value, server_type):
86
+ if server_type == "databricks":
87
+ if model_value.config is not None and "databricksTable" in model_value.config:
88
+ return model_value.config["databricksTable"]
89
+ if server_type == "snowflake":
90
+ if model_value.config is not None and "snowflakeTable" in model_value.config:
91
+ return model_value.config["snowflakeTable"]
92
+ if server_type == "sqlserver":
93
+ if model_value.config is not None and "sqlserverTable" in model_value.config:
94
+ return model_value.config["sqlserverTable"]
95
+ if server_type == "postgres" or server_type == "postgresql":
96
+ if model_value.config is not None and "postgresTable" in model_value.config:
97
+ return model_value.config["postgresTable"]
98
+ return model_key
99
+
100
+
84
101
  def check_field_is_present(field_name):
85
102
  return {
86
103
  "schema": {
@@ -113,7 +113,7 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
113
113
  result += f" {field_name} {type}"
114
114
  if field.required:
115
115
  result += " not null"
116
- if field.primary:
116
+ if field.primaryKey or field.primary:
117
117
  result += " primary key"
118
118
  if server_type == "databricks" and field.description is not None:
119
119
  result += f' COMMENT "{_escape(field.description)}"'
@@ -182,11 +182,16 @@ def convert_to_databricks(field: Field) -> None | str:
182
182
  if type.lower() in ["boolean"]:
183
183
  return "BOOLEAN"
184
184
  if type.lower() in ["object", "record", "struct"]:
185
- return "STRUCT"
185
+ nested_fields = []
186
+ for nested_field_name, nested_field in field.fields.items():
187
+ nested_field_type = convert_to_databricks(nested_field)
188
+ nested_fields.append(f"{nested_field_name} {nested_field_type}")
189
+ return f"STRUCT<{', '.join(nested_fields)}>"
186
190
  if type.lower() in ["bytes"]:
187
191
  return "BINARY"
188
192
  if type.lower() in ["array"]:
189
- return "ARRAY"
193
+ item_type = convert_to_databricks(field.items)
194
+ return f"ARRAY<{item_type}>"
190
195
  return None
191
196
 
192
197
 
@@ -311,6 +316,27 @@ def convert_type_to_sqlserver(field: Field) -> None | str:
311
316
 
312
317
  def convert_type_to_bigquery(field: Field) -> None | str:
313
318
  """Convert from supported datacontract types to equivalent bigquery types"""
319
+
320
+ # BigQuery exporter cannot be used for complex types, as the exporter has different syntax than SodaCL
321
+
322
+ field_type = field.type
323
+ if not field_type:
324
+ return None
325
+
326
+ if field.config and "bigqueryType" in field.config:
327
+ return field.config["bigqueryType"]
328
+
329
+ if field_type.lower() in ["array"]:
330
+ item_type = convert_type_to_bigquery(field.items)
331
+ return f"ARRAY<{item_type}>"
332
+
333
+ if field_type.lower() in ["object", "record", "struct"]:
334
+ nested_fields = []
335
+ for nested_field_name, nested_field in field.fields.items():
336
+ nested_field_type = convert_type_to_bigquery(nested_field)
337
+ nested_fields.append(f"{nested_field_name} {nested_field_type}")
338
+ return f"STRUCT<{', '.join(nested_fields)}>"
339
+
314
340
  return map_type_to_bigquery(field)
315
341
 
316
342
 
@@ -114,7 +114,9 @@ def constant_field_value(field_name: str, field: spec.Field) -> tuple[ast.Call,
114
114
  if new_type is None:
115
115
  raise RuntimeError(f"Unsupported field type {field.type}.")
116
116
 
117
- return Column(new_type, nullable=not field.required, comment=field.description, primary_key=field.primary), None
117
+ return Column(
118
+ new_type, nullable=not field.required, comment=field.description, primary_key=field.primaryKey or field.primary
119
+ ), None
118
120
 
119
121
 
120
122
  def column_assignment(field_name: str, field: spec.Field) -> tuple[ast.Call, typing.Optional[ast.ClassDef]]:
@@ -0,0 +1,89 @@
1
+ import os
2
+
3
+ import clevercsv
4
+
5
+ from datacontract.imports.importer import Importer
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Example, Field, Model, Server
7
+
8
+
9
+ class CsvImporter(Importer):
10
+ def import_source(
11
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
12
+ ) -> DataContractSpecification:
13
+ return import_csv(data_contract_specification, self.import_format, source)
14
+
15
+
16
+ def import_csv(data_contract_specification: DataContractSpecification, format: str, source: str):
17
+ include_example = False
18
+
19
+ # detect encoding and dialect
20
+ encoding = clevercsv.encoding.get_encoding(source)
21
+ with open(source, "r", newline="") as fp:
22
+ dialect = clevercsv.Sniffer().sniff(fp.read(10000))
23
+
24
+ # using auto detecting of the format and encoding
25
+ df = clevercsv.read_dataframe(source)
26
+
27
+ if data_contract_specification.models is None:
28
+ data_contract_specification.models = {}
29
+
30
+ # use the file name as table name
31
+ table_name = os.path.splitext(os.path.basename(source))[0]
32
+
33
+ if data_contract_specification.servers is None:
34
+ data_contract_specification.servers = {}
35
+
36
+ data_contract_specification.servers["production"] = Server(
37
+ type="local", path=source, format="csv", delimiter=dialect.delimiter
38
+ )
39
+
40
+ fields = {}
41
+ for column, dtype in df.dtypes.items():
42
+ field = Field()
43
+ field.type = map_type_from_pandas(dtype.name)
44
+ fields[column] = field
45
+
46
+ data_contract_specification.models[table_name] = Model(
47
+ type="table",
48
+ description=f"Csv file with encoding {encoding}",
49
+ fields=fields,
50
+ )
51
+
52
+ # multiline data is not correctly handled by yaml dump
53
+ if include_example:
54
+ if data_contract_specification.examples is None:
55
+ data_contract_specification.examples = []
56
+
57
+ # read first 10 lines with the detected encoding
58
+ with open(source, "r", encoding=encoding) as csvfile:
59
+ lines = csvfile.readlines()[:10]
60
+
61
+ data_contract_specification.examples.append(Example(type="csv", model=table_name, data="".join(lines)))
62
+
63
+ return data_contract_specification
64
+
65
+
66
+ def map_type_from_pandas(sql_type: str):
67
+ if sql_type is None:
68
+ return None
69
+
70
+ sql_type_normed = sql_type.lower().strip()
71
+
72
+ if sql_type_normed == "object":
73
+ return "string"
74
+ elif sql_type_normed.startswith("str"):
75
+ return "string"
76
+ elif sql_type_normed.startswith("int"):
77
+ return "integer"
78
+ elif sql_type_normed.startswith("float"):
79
+ return "float"
80
+ elif sql_type_normed.startswith("bool"):
81
+ return "boolean"
82
+ elif sql_type_normed.startswith("timestamp"):
83
+ return "timestamp"
84
+ elif sql_type_normed == "datetime64":
85
+ return "date"
86
+ elif sql_type_normed == "timedelta[ns]":
87
+ return "timestamp_ntz"
88
+ else:
89
+ return "variant"
@@ -84,7 +84,7 @@ def import_table_fields(table, references) -> dict[str, Field]:
84
84
  imported_fields[field_name] = Field()
85
85
  imported_fields[field_name].required = field.not_null
86
86
  imported_fields[field_name].description = field.note.text
87
- imported_fields[field_name].primary = field.pk
87
+ imported_fields[field_name].primaryKey = field.pk
88
88
  imported_fields[field_name].unique = field.unique
89
89
  # This is an assumption, that these might be valid SQL Types, since
90
90
  # DBML doesn't really enforce anything other than 'no spaces' in column types