datacontract-cli 0.10.16__py3-none-any.whl → 0.10.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (48) hide show
  1. datacontract/breaking/breaking_rules.py +4 -0
  2. datacontract/cli.py +49 -32
  3. datacontract/data_contract.py +14 -11
  4. datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
  5. datacontract/engines/soda/check_soda_execute.py +9 -4
  6. datacontract/engines/soda/connections/databricks.py +12 -3
  7. datacontract/engines/soda/connections/duckdb.py +22 -9
  8. datacontract/export/data_caterer_converter.py +20 -7
  9. datacontract/export/dbml_converter.py +2 -2
  10. datacontract/export/dbt_converter.py +41 -16
  11. datacontract/export/exporter.py +6 -2
  12. datacontract/export/exporter_factory.py +48 -14
  13. datacontract/export/iceberg_converter.py +3 -3
  14. datacontract/export/markdown_converter.py +208 -0
  15. datacontract/export/odcs_v3_exporter.py +6 -0
  16. datacontract/export/sodacl_converter.py +22 -5
  17. datacontract/export/sql_converter.py +1 -1
  18. datacontract/export/sql_type_converter.py +28 -2
  19. datacontract/export/sqlalchemy_converter.py +3 -1
  20. datacontract/imports/csv_importer.py +89 -0
  21. datacontract/imports/dbml_importer.py +1 -1
  22. datacontract/imports/dbt_importer.py +94 -12
  23. datacontract/imports/importer.py +1 -0
  24. datacontract/imports/importer_factory.py +5 -0
  25. datacontract/imports/odcs_v2_importer.py +1 -1
  26. datacontract/imports/odcs_v3_importer.py +1 -1
  27. datacontract/imports/sql_importer.py +1 -1
  28. datacontract/init/init_template.py +20 -0
  29. datacontract/integration/datamesh_manager.py +15 -9
  30. datacontract/lint/linters/field_reference_linter.py +10 -1
  31. datacontract/lint/resolve.py +48 -14
  32. datacontract/lint/schema.py +10 -3
  33. datacontract/model/data_contract_specification.py +13 -4
  34. datacontract/model/run.py +1 -0
  35. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  36. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  37. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  38. datacontract/templates/datacontract.html +20 -1
  39. datacontract/templates/partials/definition.html +15 -5
  40. datacontract/templates/partials/model_field.html +10 -1
  41. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/METADATA +477 -343
  42. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/RECORD +46 -42
  43. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/WHEEL +1 -1
  44. datacontract/init/download_datacontract_file.py +0 -17
  45. datacontract/integration/opentelemetry.py +0 -103
  46. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/LICENSE +0 -0
  47. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/entry_points.txt +0 -0
  48. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from typing import TypedDict
3
3
 
4
4
  from dbt.artifacts.resources.v1.components import ColumnInfo
5
5
  from dbt.contracts.graph.manifest import Manifest
6
- from dbt.contracts.graph.nodes import GenericTestNode
6
+ from dbt.contracts.graph.nodes import GenericTestNode, ManifestNode, ModelNode
7
7
  from dbt_common.contracts.constraints import ConstraintType
8
8
 
9
9
  from datacontract.imports.bigquery_importer import map_type_from_bigquery
@@ -51,6 +51,46 @@ def read_dbt_manifest(manifest_path: str) -> Manifest:
51
51
  return manifest
52
52
 
53
53
 
54
+ def _get_primary_keys(manifest: Manifest, node: ManifestNode) -> list[str]:
55
+ node_unique_id = node.unique_id
56
+ if isinstance(node, ModelNode):
57
+ test_nodes = []
58
+ for node_id in manifest.child_map.get(node_unique_id, []):
59
+ test_node = manifest.nodes.get(node_id)
60
+ if not test_node or test_node.resource_type != "test":
61
+ continue
62
+ if not isinstance(test_node, GenericTestNode):
63
+ continue
64
+ if test_node.config.where is not None:
65
+ continue
66
+ test_nodes.append(test_node)
67
+ return node.infer_primary_key(test_nodes)
68
+ return []
69
+
70
+
71
+ def _get_references(manifest: Manifest, node: ManifestNode) -> dict[str, str]:
72
+ node_unique_id = node.unique_id
73
+ references = {}
74
+ for node_id in manifest.child_map.get(node_unique_id, []):
75
+ test_node = manifest.nodes.get(node_id)
76
+ if not test_node or test_node.resource_type != "test":
77
+ continue
78
+ if not isinstance(test_node, GenericTestNode):
79
+ continue
80
+ if test_node.test_metadata.name != "relationships":
81
+ continue
82
+ if test_node.config.where is not None:
83
+ continue
84
+ if test_node.attached_node != node_unique_id:
85
+ continue
86
+ relationship_target_node_id = [n for n in test_node.depends_on.nodes if n != node_unique_id][0]
87
+ relationship_target_node = manifest.nodes.get(relationship_target_node_id)
88
+ references[f"{node.name}.{test_node.column_name}"] = (
89
+ f"""{relationship_target_node.name}.{test_node.test_metadata.kwargs["field"]}"""
90
+ )
91
+ return references
92
+
93
+
54
94
  def import_dbt_manifest(
55
95
  data_contract_specification: DataContractSpecification,
56
96
  manifest: Manifest,
@@ -65,28 +105,40 @@ def import_dbt_manifest(
65
105
  data_contract_specification.info.dbt_version = manifest.metadata.dbt_version
66
106
  adapter_type = manifest.metadata.adapter_type
67
107
  data_contract_specification.models = data_contract_specification.models or {}
68
- for model_contents in manifest.nodes.values():
108
+ for node in manifest.nodes.values():
69
109
  # Only intressted in processing models.
70
- if model_contents.resource_type not in resource_types:
110
+ if node.resource_type not in resource_types:
71
111
  continue
72
112
 
73
113
  # To allow args stored in dbt_models to filter relevant models.
74
114
  # If dbt_models is empty, use all models.
75
- if dbt_nodes and model_contents.name not in dbt_nodes:
115
+ if dbt_nodes and node.name not in dbt_nodes:
76
116
  continue
77
117
 
118
+ model_unique_id = node.unique_id
119
+ primary_keys = _get_primary_keys(manifest, node)
120
+ references = _get_references(manifest, node)
121
+
122
+ primary_key = None
123
+ if len(primary_keys) == 1:
124
+ primary_key = primary_keys[0]
125
+
78
126
  dc_model = Model(
79
- description=model_contents.description,
80
- tags=model_contents.tags,
127
+ description=node.description,
128
+ tags=node.tags,
81
129
  fields=create_fields(
82
130
  manifest,
83
- model_unique_id=model_contents.unique_id,
84
- columns=model_contents.columns,
131
+ model_unique_id=model_unique_id,
132
+ columns=node.columns,
133
+ primary_key_name=primary_key,
134
+ references=references,
85
135
  adapter_type=adapter_type,
86
136
  ),
87
137
  )
138
+ if len(primary_keys) > 1:
139
+ dc_model.primaryKey = primary_keys
88
140
 
89
- data_contract_specification.models[model_contents.name] = dc_model
141
+ data_contract_specification.models[node.name] = dc_model
90
142
 
91
143
  return data_contract_specification
92
144
 
@@ -98,9 +150,17 @@ def convert_data_type_by_adapter_type(data_type: str, adapter_type: str) -> str:
98
150
 
99
151
 
100
152
  def create_fields(
101
- manifest: Manifest, model_unique_id: str, columns: dict[str, ColumnInfo], adapter_type: str
153
+ manifest: Manifest,
154
+ model_unique_id: str,
155
+ columns: dict[str, ColumnInfo],
156
+ primary_key_name: str,
157
+ references: dict[str, str],
158
+ adapter_type: str,
102
159
  ) -> dict[str, Field]:
103
- fields = {column.name: create_field(manifest, model_unique_id, column, adapter_type) for column in columns.values()}
160
+ fields = {
161
+ column.name: create_field(manifest, model_unique_id, column, primary_key_name, references, adapter_type)
162
+ for column in columns.values()
163
+ }
104
164
  return fields
105
165
 
106
166
 
@@ -137,7 +197,14 @@ def get_column_tests(manifest: Manifest, model_name: str, column_name: str) -> l
137
197
  return column_tests
138
198
 
139
199
 
140
- def create_field(manifest: Manifest, model_unique_id: str, column: ColumnInfo, adapter_type: str) -> Field:
200
+ def create_field(
201
+ manifest: Manifest,
202
+ model_unique_id: str,
203
+ column: ColumnInfo,
204
+ primary_key_name: str,
205
+ references: dict[str, str],
206
+ adapter_type: str,
207
+ ) -> Field:
141
208
  column_type = convert_data_type_by_adapter_type(column.data_type, adapter_type) if column.data_type else ""
142
209
  field = Field(
143
210
  description=column.description,
@@ -155,4 +222,19 @@ def create_field(manifest: Manifest, model_unique_id: str, column: ColumnInfo, a
155
222
  if required:
156
223
  field.required = required
157
224
 
225
+ unique = False
226
+ if any(constraint.type == ConstraintType.unique for constraint in column.constraints):
227
+ unique = True
228
+ if [test for test in all_tests if test["test_type"] == "unique"]:
229
+ unique = True
230
+ if unique:
231
+ field.unique = unique
232
+
233
+ if column.name == primary_key_name:
234
+ field.primaryKey = True
235
+
236
+ references_key = f"{manifest.nodes[model_unique_id].name}.{column.name}"
237
+ if references_key in references:
238
+ field.references = references[references_key]
239
+
158
240
  return field
@@ -31,6 +31,7 @@ class ImportFormat(str, Enum):
31
31
  spark = "spark"
32
32
  iceberg = "iceberg"
33
33
  parquet = "parquet"
34
+ csv = "csv"
34
35
 
35
36
  @classmethod
36
37
  def get_supported_formats(cls):
@@ -104,3 +104,8 @@ importer_factory.register_lazy_importer(
104
104
  module_path="datacontract.imports.parquet_importer",
105
105
  class_name="ParquetImporter",
106
106
  )
107
+ importer_factory.register_lazy_importer(
108
+ name=ImportFormat.csv,
109
+ module_path="datacontract.imports.csv_importer",
110
+ class_name="CsvImporter",
111
+ )
@@ -141,7 +141,7 @@ def import_fields(odcs_columns: Dict[str, Any], custom_type_mappings: Dict[str,
141
141
  type=mapped_type,
142
142
  title=column.get("businessName") if column.get("businessName") is not None else "",
143
143
  required=not column.get("isNullable") if column.get("isNullable") is not None else False,
144
- primary=column.get("isPrimary") if column.get("isPrimary") is not None else False,
144
+ primaryKey=column.get("isPrimary") if column.get("isPrimary") is not None else False,
145
145
  unique=column.get("isUnique") if column.get("isUnique") is not None else False,
146
146
  classification=column.get("classification") if column.get("classification") is not None else "",
147
147
  tags=column.get("tags") if column.get("tags") is not None else [],
@@ -265,7 +265,7 @@ def import_fields(
265
265
  type=mapped_type,
266
266
  title=odcs_property.get("businessName"),
267
267
  required=not odcs_property.get("nullable") if odcs_property.get("nullable") is not None else False,
268
- primary=odcs_property.get("primaryKey")
268
+ primaryKey=odcs_property.get("primaryKey")
269
269
  if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
270
270
  else False,
271
271
  unique=odcs_property.get("unique"),
@@ -38,7 +38,7 @@ def import_sql(data_contract_specification: DataContractSpecification, format: s
38
38
  if primary_key in fields:
39
39
  fields[primary_key].unique = True
40
40
  fields[primary_key].required = True
41
- fields[primary_key].primary = True
41
+ fields[primary_key].primaryKey = True
42
42
 
43
43
  data_contract_specification.models[table_name] = Model(
44
44
  type="table",
@@ -0,0 +1,20 @@
1
+ import importlib.resources as resources
2
+ import logging
3
+
4
+ import requests
5
+
6
+ DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.1.0.init.yaml"
7
+
8
+
9
+ def get_init_template(location: str = None) -> str:
10
+ if location is None:
11
+ logging.info("Use default bundled template " + DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
12
+ schemas = resources.files("datacontract")
13
+ template = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
14
+ with template.open("r") as file:
15
+ return file.read()
16
+ elif location.startswith("http://") or location.startswith("https://"):
17
+ return requests.get(location).text
18
+ else:
19
+ with open(location, "r") as file:
20
+ return file.read()
@@ -2,11 +2,10 @@ import os
2
2
 
3
3
  import requests
4
4
 
5
- from datacontract.model.data_contract_specification import DataContractSpecification
6
5
  from datacontract.model.run import Run
7
6
 
8
7
 
9
- def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
8
+ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str, ssl_verification: bool):
10
9
  try:
11
10
  if publish_url is None:
12
11
  # this url supports Data Mesh Manager and Data Contract Manager
@@ -28,7 +27,12 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
28
27
  headers = {"Content-Type": "application/json", "x-api-key": api_key}
29
28
  request_body = run.model_dump_json()
30
29
  # print("Request Body:", request_body)
31
- response = requests.post(url, data=request_body, headers=headers)
30
+ response = requests.post(
31
+ url,
32
+ data=request_body,
33
+ headers=headers,
34
+ verify=ssl_verification,
35
+ )
32
36
  # print("Status Code:", response.status_code)
33
37
  # print("Response Body:", response.text)
34
38
  if response.status_code != 200:
@@ -39,9 +43,12 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
39
43
  run.log_error(f"Failed publishing test results. Error: {str(e)}")
40
44
 
41
45
 
42
- def publish_data_contract_to_datamesh_manager(data_contract_specification: DataContractSpecification):
46
+ def publish_data_contract_to_datamesh_manager(data_contract_dict: dict, ssl_verification: bool):
43
47
  try:
44
48
  api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
49
+ host = "https://api.datamesh-manager.com"
50
+ if os.getenv("DATAMESH_MANAGER_HOST") is not None:
51
+ host = os.getenv("DATAMESH_MANAGER_HOST")
45
52
  if api_key is None:
46
53
  api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
47
54
  if api_key is None:
@@ -49,14 +56,13 @@ def publish_data_contract_to_datamesh_manager(data_contract_specification: DataC
49
56
  "Cannot publish data contract, as neither DATAMESH_MANAGER_API_KEY nor DATACONTRACT_MANAGER_API_KEY is set"
50
57
  )
51
58
  headers = {"Content-Type": "application/json", "x-api-key": api_key}
52
- spec = data_contract_specification
53
- id = spec.id
54
- url = "https://api.datamesh-manager.com/api/datacontracts/{0}".format(id)
55
- request_body = spec.model_dump_json().encode("utf-8")
59
+ id = data_contract_dict["id"]
60
+ url = f"{host}/api/datacontracts/{id}"
56
61
  response = requests.put(
57
62
  url=url,
58
- data=request_body,
63
+ json=data_contract_dict,
59
64
  headers=headers,
65
+ verify=ssl_verification,
60
66
  )
61
67
  if response.status_code != 200:
62
68
  print(f"Error publishing data contract to Data Mesh Manager: {response.text}")
@@ -22,7 +22,16 @@ class FieldReferenceLinter(Linter):
22
22
  for model_name, model in contract.models.items():
23
23
  for field_name, field in model.fields.items():
24
24
  if field.references:
25
- (ref_model, ref_field) = field.references.split(".", maxsplit=2)
25
+ reference_hierarchy = field.references.split(".")
26
+ if len(reference_hierarchy) != 2:
27
+ result = result.with_error(
28
+ f"Field '{field_name}' in model '{model_name}'"
29
+ f" references must follow the model.field syntax and refer to a field in a model in this data contract."
30
+ )
31
+ continue
32
+ ref_model = reference_hierarchy[0]
33
+ ref_field = reference_hierarchy[1]
34
+
26
35
  if ref_model not in contract.models:
27
36
  result = result.with_error(
28
37
  f"Field '{field_name}' in model '{model_name}'"
@@ -44,6 +44,27 @@ def resolve_data_contract(
44
44
  )
45
45
 
46
46
 
47
+ def resolve_data_contract_dict(
48
+ data_contract_location: str = None,
49
+ data_contract_str: str = None,
50
+ data_contract: DataContractSpecification = None,
51
+ ) -> dict:
52
+ if data_contract_location is not None:
53
+ return _to_yaml(read_resource(data_contract_location))
54
+ elif data_contract_str is not None:
55
+ return _to_yaml(data_contract_str)
56
+ elif data_contract is not None:
57
+ return data_contract.model_dump()
58
+ else:
59
+ raise DataContractException(
60
+ type="lint",
61
+ result="failed",
62
+ name="Check that data contract YAML is valid",
63
+ reason="Data contract needs to be provided",
64
+ engine="datacontract",
65
+ )
66
+
67
+
47
68
  def resolve_data_contract_from_location(
48
69
  location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
49
70
  ) -> DataContractSpecification:
@@ -54,20 +75,30 @@ def resolve_data_contract_from_location(
54
75
  def inline_definitions_into_data_contract(spec: DataContractSpecification):
55
76
  for model in spec.models.values():
56
77
  for field in model.fields.values():
57
- # If ref_obj is not empty, we've already inlined definitions.
58
- if not field.ref and not field.ref_obj:
59
- continue
78
+ inline_definition_into_field(field, spec)
79
+
80
+
81
+ def inline_definition_into_field(field, spec):
82
+ # iterate recursively over arrays
83
+ if field.items is not None:
84
+ inline_definition_into_field(field.items, spec)
60
85
 
61
- definition = _resolve_definition_ref(field.ref, spec)
62
- field.ref_obj = definition
86
+ # iterate recursively over nested fields
87
+ if field.fields is not None:
88
+ for nested_field_name, nested_field in field.fields.items():
89
+ inline_definition_into_field(nested_field, spec)
63
90
 
64
- for field_name in field.model_fields.keys():
65
- if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
66
- setattr(field, field_name, getattr(definition, field_name))
67
- # extras
68
- for extra_field_name, extra_field_value in definition.model_extra.items():
69
- if extra_field_name not in field.model_extra.keys():
70
- setattr(field, extra_field_name, extra_field_value)
91
+ if not field.ref:
92
+ return
93
+
94
+ definition = _resolve_definition_ref(field.ref, spec)
95
+ for field_name in field.model_fields.keys():
96
+ if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
97
+ setattr(field, field_name, getattr(definition, field_name))
98
+ # extras
99
+ for extra_field_name, extra_field_value in definition.model_extra.items():
100
+ if extra_field_name not in field.model_extra.keys():
101
+ setattr(field, extra_field_name, extra_field_value)
71
102
 
72
103
 
73
104
  def _resolve_definition_ref(ref, spec) -> Definition:
@@ -202,9 +233,12 @@ def _resolve_data_contract_from_str(
202
233
  yaml_dict = _to_yaml(data_contract_str)
203
234
 
204
235
  if is_open_data_contract_standard(yaml_dict):
236
+ logging.info("Importing ODCS v3")
205
237
  # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
206
238
  data_contract_specification = DataContractSpecification(dataContractSpecification="1.1.0")
207
239
  return import_odcs_v3_from_str(data_contract_specification, source_str=data_contract_str)
240
+ else:
241
+ logging.info("Importing DCS")
208
242
 
209
243
  _validate_data_contract_specification_schema(yaml_dict, schema_location)
210
244
  data_contract_specification = yaml_dict
@@ -218,7 +252,7 @@ def _resolve_data_contract_from_str(
218
252
  return spec
219
253
 
220
254
 
221
- def _to_yaml(data_contract_str):
255
+ def _to_yaml(data_contract_str) -> dict:
222
256
  try:
223
257
  yaml_dict = yaml.safe_load(data_contract_str)
224
258
  return yaml_dict
@@ -236,7 +270,7 @@ def _to_yaml(data_contract_str):
236
270
  def _validate_data_contract_specification_schema(data_contract_yaml, schema_location: str = None):
237
271
  schema = fetch_schema(schema_location)
238
272
  try:
239
- fastjsonschema.validate(schema, data_contract_yaml)
273
+ fastjsonschema.validate(schema, data_contract_yaml, use_default=False)
240
274
  logging.debug("YAML data is valid.")
241
275
  except JsonSchemaValueException as e:
242
276
  logging.warning(f"Data Contract YAML is invalid. Validation error: {e.message}")
@@ -1,4 +1,6 @@
1
+ import importlib.resources as resources
1
2
  import json
3
+ import logging
2
4
  import os
3
5
  from typing import Any, Dict
4
6
 
@@ -6,6 +8,8 @@ import requests
6
8
 
7
9
  from datacontract.model.exceptions import DataContractException
8
10
 
11
+ DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.1.0.schema.json"
12
+
9
13
 
10
14
  def fetch_schema(location: str = None) -> Dict[str, Any]:
11
15
  """
@@ -27,9 +31,12 @@ def fetch_schema(location: str = None) -> Dict[str, Any]:
27
31
 
28
32
  """
29
33
  if location is None:
30
- location = "https://datacontract.com/datacontract.schema.json"
31
-
32
- if location.startswith("http://") or location.startswith("https://"):
34
+ logging.info("Use default bundled schema " + DEFAULT_DATA_CONTRACT_SCHEMA)
35
+ schemas = resources.files("datacontract")
36
+ schema_file = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_SCHEMA)
37
+ with schema_file.open("r") as file:
38
+ schema = json.load(file)
39
+ elif location.startswith("http://") or location.startswith("https://"):
33
40
  response = requests.get(location)
34
41
  schema = response.json()
35
42
  else:
@@ -72,6 +72,7 @@ class Server(pyd.BaseModel):
72
72
  dataProductId: str = None
73
73
  outputPortId: str = None
74
74
  driver: str = None
75
+ storageAccount: str = None
75
76
  roles: List[ServerRole] = None
76
77
 
77
78
  model_config = pyd.ConfigDict(
@@ -112,6 +113,7 @@ class Definition(pyd.BaseModel):
112
113
  tags: List[str] = []
113
114
  links: Dict[str, str] = {}
114
115
  example: str = None
116
+ examples: List[Any] | None = None
115
117
 
116
118
  model_config = pyd.ConfigDict(
117
119
  extra="allow",
@@ -141,13 +143,15 @@ class Quality(pyd.BaseModel):
141
143
 
142
144
  class Field(pyd.BaseModel):
143
145
  ref: str = pyd.Field(default=None, alias="$ref")
144
- ref_obj: Definition = pyd.Field(default=None, exclude=True)
145
146
  title: str | None = None
146
147
  type: str = None
147
148
  format: str = None
148
149
  required: bool = None
149
- primary: bool = None
150
- primaryKey: bool = None
150
+ primary: bool = pyd.Field(
151
+ default=None,
152
+ deprecated="Removed in Data Contract Specification v1.1.0. Use primaryKey instead.",
153
+ )
154
+ primaryKey: bool | None = None
151
155
  unique: bool | None = None
152
156
  references: str = None
153
157
  description: str | None = None
@@ -169,7 +173,10 @@ class Field(pyd.BaseModel):
169
173
  values: "Field" = None
170
174
  precision: int = None
171
175
  scale: int = None
172
- example: str = None
176
+ example: str = pyd.Field(
177
+ default=None,
178
+ deprecated="Removed in Data Contract Specification v1.1.0. Use " "examples instead.",
179
+ )
173
180
  examples: List[Any] | None = None
174
181
  quality: List[Quality] | None = []
175
182
  config: Dict[str, Any] | None = None
@@ -186,6 +193,8 @@ class Model(pyd.BaseModel):
186
193
  title: Optional[str] = None
187
194
  fields: Dict[str, Field] = {}
188
195
  quality: List[Quality] | None = []
196
+ primaryKey: List[str] | None = []
197
+ examples: List[Any] | None = None
189
198
  config: Dict[str, Any] = None
190
199
  tags: List[str] | None = None
191
200
 
datacontract/model/run.py CHANGED
@@ -12,6 +12,7 @@ class ResultEnum(str, Enum):
12
12
  warning = "warning"
13
13
  failed = "failed"
14
14
  error = "error"
15
+ info = "info"
15
16
  unknown = "unknown"
16
17
 
17
18
 
@@ -0,0 +1,91 @@
1
+ dataContractSpecification: 1.1.0
2
+ id: my-data-contract-id
3
+ info:
4
+ title: My Data Contract
5
+ version: 0.0.1
6
+ # description:
7
+ # owner:
8
+ # contact:
9
+ # name:
10
+ # url:
11
+ # email:
12
+
13
+
14
+ ### servers
15
+
16
+ #servers:
17
+ # production:
18
+ # type: s3
19
+ # location: s3://
20
+ # format: parquet
21
+ # delimiter: new_line
22
+
23
+ ### terms
24
+
25
+ #terms:
26
+ # usage:
27
+ # limitations:
28
+ # billing:
29
+ # noticePeriod:
30
+
31
+
32
+ ### models
33
+
34
+ # models:
35
+ # my_model:
36
+ # description:
37
+ # type:
38
+ # fields:
39
+ # my_field:
40
+ # type:
41
+ # description:
42
+
43
+
44
+ ### definitions
45
+
46
+ # definitions:
47
+ # my_field:
48
+ # domain:
49
+ # name:
50
+ # title:
51
+ # type:
52
+ # description:
53
+ # example:
54
+ # pii:
55
+ # classification:
56
+
57
+
58
+ ### servicelevels
59
+
60
+ #servicelevels:
61
+ # availability:
62
+ # description: The server is available during support hours
63
+ # percentage: 99.9%
64
+ # retention:
65
+ # description: Data is retained for one year because!
66
+ # period: P1Y
67
+ # unlimited: false
68
+ # latency:
69
+ # description: Data is available within 25 hours after the order was placed
70
+ # threshold: 25h
71
+ # sourceTimestampField: orders.order_timestamp
72
+ # processedTimestampField: orders.processed_timestamp
73
+ # freshness:
74
+ # description: The age of the youngest row in a table.
75
+ # threshold: 25h
76
+ # timestampField: orders.order_timestamp
77
+ # frequency:
78
+ # description: Data is delivered once a day
79
+ # type: batch # or streaming
80
+ # interval: daily # for batch, either or cron
81
+ # cron: 0 0 * * * # for batch, either or interval
82
+ # support:
83
+ # description: The data is available during typical business hours at headquarters
84
+ # time: 9am to 5pm in EST on business days
85
+ # responseTime: 1h
86
+ # backup:
87
+ # description: Data is backed up once a week, every Sunday at 0:00 UTC.
88
+ # interval: weekly
89
+ # cron: 0 0 * * 0
90
+ # recoveryTime: 24 hours
91
+ # recoveryPoint: 1 week