datacontract-cli 0.10.18__py3-none-any.whl → 0.10.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (28) hide show
  1. datacontract/cli.py +20 -27
  2. datacontract/data_contract.py +7 -8
  3. datacontract/engines/soda/connections/duckdb.py +22 -9
  4. datacontract/export/data_caterer_converter.py +20 -7
  5. datacontract/export/sodacl_converter.py +21 -4
  6. datacontract/export/sql_type_converter.py +7 -2
  7. datacontract/imports/csv_importer.py +89 -0
  8. datacontract/imports/importer.py +1 -0
  9. datacontract/imports/importer_factory.py +5 -0
  10. datacontract/init/init_template.py +20 -0
  11. datacontract/integration/datamesh_manager.py +5 -10
  12. datacontract/lint/linters/field_reference_linter.py +10 -1
  13. datacontract/lint/resolve.py +22 -1
  14. datacontract/lint/schema.py +10 -3
  15. datacontract/model/data_contract_specification.py +2 -0
  16. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  17. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  18. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  19. datacontract/templates/datacontract.html +20 -1
  20. datacontract/templates/partials/definition.html +15 -5
  21. datacontract/templates/partials/model_field.html +9 -0
  22. {datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/METADATA +445 -295
  23. {datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/RECORD +27 -23
  24. datacontract/init/download_datacontract_file.py +0 -17
  25. {datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/LICENSE +0 -0
  26. {datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/WHEEL +0 -0
  27. {datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/entry_points.txt +0 -0
  28. {datacontract_cli-0.10.18.dist-info → datacontract_cli-0.10.19.dist-info}/top_level.txt +0 -0
datacontract/cli.py CHANGED
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from importlib import metadata
2
3
  from pathlib import Path
3
4
  from typing import Iterable, List, Optional
@@ -15,15 +16,11 @@ from datacontract import web
15
16
  from datacontract.catalog.catalog import create_data_contract_html, create_index_html
16
17
  from datacontract.data_contract import DataContract, ExportFormat
17
18
  from datacontract.imports.importer import ImportFormat
18
- from datacontract.init.download_datacontract_file import (
19
- FileExistsException,
20
- download_datacontract_file,
21
- )
19
+ from datacontract.init.init_template import get_init_template
22
20
  from datacontract.integration.datamesh_manager import (
23
21
  publish_data_contract_to_datamesh_manager,
24
22
  )
25
-
26
- DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
23
+ from datacontract.lint.resolve import resolve_data_contract_dict
27
24
 
28
25
  console = Console()
29
26
 
@@ -70,24 +67,21 @@ def common(
70
67
  @app.command()
71
68
  def init(
72
69
  location: Annotated[
73
- str,
74
- typer.Argument(help="The location (url or path) of the data contract yaml to create."),
70
+ str, typer.Argument(help="The location of the data contract file to create.")
75
71
  ] = "datacontract.yaml",
76
- template: Annotated[
77
- str, typer.Option(help="URL of a template or data contract")
78
- ] = "https://datacontract.com/datacontract.init.yaml",
72
+ template: Annotated[str, typer.Option(help="URL of a template or data contract")] = None,
79
73
  overwrite: Annotated[bool, typer.Option(help="Replace the existing datacontract.yaml")] = False,
80
74
  ):
81
75
  """
82
- Download a datacontract.yaml template and write it to file.
76
+ Create an empty data contract.
83
77
  """
84
- try:
85
- download_datacontract_file(location, template, overwrite)
86
- except FileExistsException:
78
+ if not overwrite and os.path.exists(location):
87
79
  console.print("File already exists, use --overwrite to overwrite")
88
80
  raise typer.Exit(code=1)
89
- else:
90
- console.print("📄 data contract written to " + location)
81
+ template_str = get_init_template(template)
82
+ with open(location, "w") as f:
83
+ f.write(template_str)
84
+ console.print("📄 data contract written to " + location)
91
85
 
92
86
 
93
87
  @app.command()
@@ -99,7 +93,7 @@ def lint(
99
93
  schema: Annotated[
100
94
  str,
101
95
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
102
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
96
+ ] = None,
103
97
  ):
104
98
  """
105
99
  Validate that the datacontract.yaml is correctly formatted.
@@ -117,7 +111,7 @@ def test(
117
111
  schema: Annotated[
118
112
  str,
119
113
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
120
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
114
+ ] = None,
121
115
  server: Annotated[
122
116
  str,
123
117
  typer.Option(
@@ -135,7 +129,7 @@ def test(
135
129
  logs: Annotated[bool, typer.Option(help="Print logs")] = False,
136
130
  ssl_verification: Annotated[
137
131
  bool,
138
- typer.Option(help="SSL verification when publishing the test results."),
132
+ typer.Option(help="SSL verification when publishing the data contract."),
139
133
  ] = True,
140
134
  ):
141
135
  """
@@ -150,6 +144,7 @@ def test(
150
144
  publish_url=publish,
151
145
  server=server,
152
146
  examples=examples,
147
+ ssl_verification=ssl_verification,
153
148
  ).test()
154
149
  if logs:
155
150
  _print_logs(run)
@@ -197,7 +192,7 @@ def export(
197
192
  schema: Annotated[
198
193
  str,
199
194
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
200
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
195
+ ] = None,
201
196
  # TODO: this should be a subcommand
202
197
  engine: Annotated[
203
198
  Optional[str],
@@ -284,7 +279,7 @@ def import_(
284
279
  schema: Annotated[
285
280
  str,
286
281
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
287
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
282
+ ] = None,
288
283
  ):
289
284
  """
290
285
  Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
@@ -321,7 +316,7 @@ def publish(
321
316
  schema: Annotated[
322
317
  str,
323
318
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
324
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
319
+ ] = None,
325
320
  ssl_verification: Annotated[
326
321
  bool,
327
322
  typer.Option(help="SSL verification when publishing the data contract."),
@@ -331,9 +326,7 @@ def publish(
331
326
  Publish the data contract to the Data Mesh Manager.
332
327
  """
333
328
  publish_data_contract_to_datamesh_manager(
334
- data_contract_specification=DataContract(
335
- data_contract_file=location, schema_location=schema
336
- ).get_data_contract_specification(),
329
+ data_contract_dict=resolve_data_contract_dict(location),
337
330
  ssl_verification=ssl_verification,
338
331
  )
339
332
 
@@ -350,7 +343,7 @@ def catalog(
350
343
  schema: Annotated[
351
344
  str,
352
345
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
353
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
346
+ ] = None,
354
347
  ):
355
348
  """
356
349
  Create an html catalog of data contracts.
@@ -22,6 +22,7 @@ from datacontract.engines.soda.check_soda_execute import check_soda_execute
22
22
  from datacontract.export.exporter import ExportFormat
23
23
  from datacontract.export.exporter_factory import exporter_factory
24
24
  from datacontract.imports.importer_factory import importer_factory
25
+ from datacontract.init.init_template import get_init_template
25
26
  from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
26
27
  from datacontract.lint import resolve
27
28
  from datacontract.lint.linters.description_linter import DescriptionLinter
@@ -36,8 +37,6 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
36
37
  from datacontract.model.exceptions import DataContractException
37
38
  from datacontract.model.run import Check, Run
38
39
 
39
- DEFAULT_DATA_CONTRACT_TEMPLATE_URL = "https://datacontract.com/datacontract.init.yaml"
40
-
41
40
 
42
41
  class DataContract:
43
42
  def __init__(
@@ -52,6 +51,7 @@ class DataContract:
52
51
  spark: "SparkSession" = None,
53
52
  inline_definitions: bool = True,
54
53
  inline_quality: bool = True,
54
+ ssl_verification: bool = True,
55
55
  ):
56
56
  self._data_contract_file = data_contract_file
57
57
  self._data_contract_str = data_contract_str
@@ -63,6 +63,7 @@ class DataContract:
63
63
  self._spark = spark
64
64
  self._inline_definitions = inline_definitions
65
65
  self._inline_quality = inline_quality
66
+ self._ssl_verification = ssl_verification
66
67
  self.all_linters = {
67
68
  ExampleModelLinter(),
68
69
  QualityUsesSchemaLinter(),
@@ -74,10 +75,9 @@ class DataContract:
74
75
  }
75
76
 
76
77
  @classmethod
77
- def init(
78
- cls, template: str = DEFAULT_DATA_CONTRACT_TEMPLATE_URL, schema: typing.Optional[str] = None
79
- ) -> DataContractSpecification:
80
- return resolve.resolve_data_contract(data_contract_location=template, schema_location=schema)
78
+ def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification:
79
+ template_str = get_init_template(template)
80
+ return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema)
81
81
 
82
82
  def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
83
83
  """Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
@@ -231,7 +231,7 @@ class DataContract:
231
231
  run.finish()
232
232
 
233
233
  if self._publish_url is not None:
234
- publish_test_results_to_datamesh_manager(run, self._publish_url)
234
+ publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
235
235
 
236
236
  return run
237
237
 
@@ -352,7 +352,6 @@ class DataContract:
352
352
  schema: typing.Optional[str] = None,
353
353
  **kwargs,
354
354
  ) -> DataContractSpecification:
355
- template = DEFAULT_DATA_CONTRACT_TEMPLATE_URL if template is None else template
356
355
  data_contract_specification_initial = DataContract.init(template=template, schema=schema)
357
356
 
358
357
  return importer_factory.create(format).import_source(
@@ -146,6 +146,7 @@ def setup_azure_connection(con, server):
146
146
  tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
147
147
  client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")
148
148
  client_secret = os.getenv("DATACONTRACT_AZURE_CLIENT_SECRET")
149
+ storage_account = server.storageAccount
149
150
 
150
151
  if tenant_id is None:
151
152
  raise ValueError("Error: Environment variable DATACONTRACT_AZURE_TENANT_ID is not set")
@@ -157,12 +158,24 @@ def setup_azure_connection(con, server):
157
158
  con.install_extension("azure")
158
159
  con.load_extension("azure")
159
160
 
160
- con.sql(f"""
161
- CREATE SECRET azure_spn (
162
- TYPE AZURE,
163
- PROVIDER SERVICE_PRINCIPAL,
164
- TENANT_ID '{tenant_id}',
165
- CLIENT_ID '{client_id}',
166
- CLIENT_SECRET '{client_secret}'
167
- );
168
- """)
161
+ if storage_account is not None:
162
+ con.sql(f"""
163
+ CREATE SECRET azure_spn (
164
+ TYPE AZURE,
165
+ PROVIDER SERVICE_PRINCIPAL,
166
+ TENANT_ID '{tenant_id}',
167
+ CLIENT_ID '{client_id}',
168
+ CLIENT_SECRET '{client_secret}',
169
+ ACCOUNT_NAME '{storage_account}'
170
+ );
171
+ """)
172
+ else:
173
+ con.sql(f"""
174
+ CREATE SECRET azure_spn (
175
+ TYPE AZURE,
176
+ PROVIDER SERVICE_PRINCIPAL,
177
+ TENANT_ID '{tenant_id}',
178
+ CLIENT_ID '{client_id}',
179
+ CLIENT_SECRET '{client_secret}'
180
+ );
181
+ """)
@@ -42,11 +42,11 @@ def _to_data_caterer_generate_step(model_key, model_value: Model, server: Server
42
42
  "name": model_key,
43
43
  "type": _to_step_type(server),
44
44
  "options": _to_data_source_options(model_key, server),
45
- "schema": [],
45
+ "fields": [],
46
46
  }
47
47
  fields = _to_fields(model_value.fields)
48
48
  if fields:
49
- step["schema"] = fields
49
+ step["fields"] = fields
50
50
  return step
51
51
 
52
52
 
@@ -97,16 +97,29 @@ def _to_field(field_name: str, field: Field) -> dict:
97
97
  if new_type == "object" or new_type == "record" or new_type == "struct":
98
98
  # need to get nested field definitions
99
99
  nested_fields = _to_fields(field.fields)
100
- dc_field["schema"] = {"fields": nested_fields}
100
+ dc_field["fields"] = nested_fields
101
+ elif new_type == "array":
102
+ if field.items is not None and field.items.type is not None:
103
+ dc_generator_opts["arrayType"] = _to_data_type(field.items.type)
104
+ else:
105
+ dc_generator_opts["arrayType"] = "string"
101
106
 
102
107
  if field.enum is not None and len(field.enum) > 0:
103
108
  dc_generator_opts["oneOf"] = field.enum
104
109
  if field.unique is not None and field.unique:
105
110
  dc_generator_opts["isUnique"] = field.unique
111
+ if field.primaryKey is not None and field.primaryKey:
112
+ dc_generator_opts["isPrimaryKey"] = field.primaryKey
106
113
  if field.minLength is not None:
107
- dc_generator_opts["minLength"] = field.minLength
114
+ if field.type is not None and field.type == "array":
115
+ dc_generator_opts["arrayMinLen"] = field.minLength
116
+ else:
117
+ dc_generator_opts["minLen"] = field.minLength
108
118
  if field.maxLength is not None:
109
- dc_generator_opts["maxLength"] = field.maxLength
119
+ if field.type is not None and field.type == "array":
120
+ dc_generator_opts["arrayMaxLen"] = field.maxLength
121
+ else:
122
+ dc_generator_opts["maxLen"] = field.maxLength
110
123
  if field.pattern is not None:
111
124
  dc_generator_opts["regex"] = field.pattern
112
125
  if field.minimum is not None:
@@ -115,7 +128,7 @@ def _to_field(field_name: str, field: Field) -> dict:
115
128
  dc_generator_opts["max"] = field.maximum
116
129
 
117
130
  if len(dc_generator_opts.keys()) > 0:
118
- dc_field["generator"] = {"options": dc_generator_opts}
131
+ dc_field["options"] = dc_generator_opts
119
132
  return dc_field
120
133
 
121
134
 
@@ -124,7 +137,7 @@ def _to_data_type(data_type):
124
137
  return "double"
125
138
  elif data_type == "decimal" or data_type == "bigint":
126
139
  return "decimal"
127
- elif data_type == "int":
140
+ elif data_type == "int" or data_type == "integer":
128
141
  return "integer"
129
142
  elif data_type == "long":
130
143
  return "long"
@@ -30,6 +30,7 @@ def to_sodacl_yaml(
30
30
 
31
31
  def to_checks(model_key, model_value, server_type: str, check_types: bool):
32
32
  checks = []
33
+ model_name = to_model_name(model_key, model_value, server_type)
33
34
  fields = model_value.fields
34
35
 
35
36
  quote_field_name = server_type in ["postgres", "sqlserver"]
@@ -62,25 +63,41 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
62
63
  if field.enum is not None and len(field.enum) > 0:
63
64
  checks.append(check_field_enum(field_name, field.enum, quote_field_name))
64
65
  if field.quality is not None and len(field.quality) > 0:
65
- quality_list = check_quality_list(model_key, field_name, field.quality)
66
+ quality_list = check_quality_list(model_name, field_name, field.quality)
66
67
  if (quality_list is not None) and len(quality_list) > 0:
67
68
  checks.append(quality_list)
68
69
  # TODO references: str = None
69
70
  # TODO format
70
71
 
71
72
  if model_value.quality is not None and len(model_value.quality) > 0:
72
- quality_list = check_quality_list(model_key, None, model_value.quality)
73
+ quality_list = check_quality_list(model_name, None, model_value.quality)
73
74
  if (quality_list is not None) and len(quality_list) > 0:
74
75
  checks.append(quality_list)
75
76
 
76
- checks_for_model_key = f"checks for {model_key}"
77
+ checks_for_model_key = f"checks for {model_name}"
77
78
 
78
79
  if quote_field_name:
79
- checks_for_model_key = f'checks for "{model_key}"'
80
+ checks_for_model_key = f'checks for "{model_name}"'
80
81
 
81
82
  return checks_for_model_key, checks
82
83
 
83
84
 
85
+ def to_model_name(model_key, model_value, server_type):
86
+ if server_type == "databricks":
87
+ if model_value.config is not None and "databricksTable" in model_value.config:
88
+ return model_value.config["databricksTable"]
89
+ if server_type == "snowflake":
90
+ if model_value.config is not None and "snowflakeTable" in model_value.config:
91
+ return model_value.config["snowflakeTable"]
92
+ if server_type == "sqlserver":
93
+ if model_value.config is not None and "sqlserverTable" in model_value.config:
94
+ return model_value.config["sqlserverTable"]
95
+ if server_type == "postgres" or server_type == "postgresql":
96
+ if model_value.config is not None and "postgresTable" in model_value.config:
97
+ return model_value.config["postgresTable"]
98
+ return model_key
99
+
100
+
84
101
  def check_field_is_present(field_name):
85
102
  return {
86
103
  "schema": {
@@ -182,11 +182,16 @@ def convert_to_databricks(field: Field) -> None | str:
182
182
  if type.lower() in ["boolean"]:
183
183
  return "BOOLEAN"
184
184
  if type.lower() in ["object", "record", "struct"]:
185
- return "STRUCT"
185
+ nested_fields = []
186
+ for nested_field_name, nested_field in field.fields.items():
187
+ nested_field_type = convert_to_databricks(nested_field)
188
+ nested_fields.append(f"{nested_field_name} {nested_field_type}")
189
+ return f"STRUCT<{', '.join(nested_fields)}>"
186
190
  if type.lower() in ["bytes"]:
187
191
  return "BINARY"
188
192
  if type.lower() in ["array"]:
189
- return "ARRAY"
193
+ item_type = convert_to_databricks(field.items)
194
+ return f"ARRAY<{item_type}>"
190
195
  return None
191
196
 
192
197
 
@@ -0,0 +1,89 @@
1
+ import os
2
+
3
+ import clevercsv
4
+
5
+ from datacontract.imports.importer import Importer
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Example, Field, Model, Server
7
+
8
+
9
+ class CsvImporter(Importer):
10
+ def import_source(
11
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
12
+ ) -> DataContractSpecification:
13
+ return import_csv(data_contract_specification, self.import_format, source)
14
+
15
+
16
+ def import_csv(data_contract_specification: DataContractSpecification, format: str, source: str):
17
+ include_example = False
18
+
19
+ # detect encoding and dialect
20
+ encoding = clevercsv.encoding.get_encoding(source)
21
+ with open(source, "r", newline="") as fp:
22
+ dialect = clevercsv.Sniffer().sniff(fp.read(10000))
23
+
24
+ # using auto detecting of the format and encoding
25
+ df = clevercsv.read_dataframe(source)
26
+
27
+ if data_contract_specification.models is None:
28
+ data_contract_specification.models = {}
29
+
30
+ # use the file name as table name
31
+ table_name = os.path.splitext(os.path.basename(source))[0]
32
+
33
+ if data_contract_specification.servers is None:
34
+ data_contract_specification.servers = {}
35
+
36
+ data_contract_specification.servers["production"] = Server(
37
+ type="local", path=source, format="csv", delimiter=dialect.delimiter
38
+ )
39
+
40
+ fields = {}
41
+ for column, dtype in df.dtypes.items():
42
+ field = Field()
43
+ field.type = map_type_from_pandas(dtype.name)
44
+ fields[column] = field
45
+
46
+ data_contract_specification.models[table_name] = Model(
47
+ type="table",
48
+ description=f"Csv file with encoding {encoding}",
49
+ fields=fields,
50
+ )
51
+
52
+ # multiline data is not correctly handled by yaml dump
53
+ if include_example:
54
+ if data_contract_specification.examples is None:
55
+ data_contract_specification.examples = []
56
+
57
+ # read first 10 lines with the detected encoding
58
+ with open(source, "r", encoding=encoding) as csvfile:
59
+ lines = csvfile.readlines()[:10]
60
+
61
+ data_contract_specification.examples.append(Example(type="csv", model=table_name, data="".join(lines)))
62
+
63
+ return data_contract_specification
64
+
65
+
66
+ def map_type_from_pandas(sql_type: str):
67
+ if sql_type is None:
68
+ return None
69
+
70
+ sql_type_normed = sql_type.lower().strip()
71
+
72
+ if sql_type_normed == "object":
73
+ return "string"
74
+ elif sql_type_normed.startswith("str"):
75
+ return "string"
76
+ elif sql_type_normed.startswith("int"):
77
+ return "integer"
78
+ elif sql_type_normed.startswith("float"):
79
+ return "float"
80
+ elif sql_type_normed.startswith("bool"):
81
+ return "boolean"
82
+ elif sql_type_normed.startswith("timestamp"):
83
+ return "timestamp"
84
+ elif sql_type_normed == "datetime64":
85
+ return "date"
86
+ elif sql_type_normed == "timedelta[ns]":
87
+ return "timestamp_ntz"
88
+ else:
89
+ return "variant"
@@ -31,6 +31,7 @@ class ImportFormat(str, Enum):
31
31
  spark = "spark"
32
32
  iceberg = "iceberg"
33
33
  parquet = "parquet"
34
+ csv = "csv"
34
35
 
35
36
  @classmethod
36
37
  def get_supported_formats(cls):
@@ -104,3 +104,8 @@ importer_factory.register_lazy_importer(
104
104
  module_path="datacontract.imports.parquet_importer",
105
105
  class_name="ParquetImporter",
106
106
  )
107
+ importer_factory.register_lazy_importer(
108
+ name=ImportFormat.csv,
109
+ module_path="datacontract.imports.csv_importer",
110
+ class_name="CsvImporter",
111
+ )
@@ -0,0 +1,20 @@
1
+ import importlib.resources as resources
2
+ import logging
3
+
4
+ import requests
5
+
6
+ DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.1.0.init.yaml"
7
+
8
+
9
+ def get_init_template(location: str = None) -> str:
10
+ if location is None:
11
+ logging.info("Use default bundled template " + DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
12
+ schemas = resources.files("datacontract")
13
+ template = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
14
+ with template.open("r") as file:
15
+ return file.read()
16
+ elif location.startswith("http://") or location.startswith("https://"):
17
+ return requests.get(location).text
18
+ else:
19
+ with open(location, "r") as file:
20
+ return file.read()
@@ -2,11 +2,10 @@ import os
2
2
 
3
3
  import requests
4
4
 
5
- from datacontract.model.data_contract_specification import DataContractSpecification
6
5
  from datacontract.model.run import Run
7
6
 
8
7
 
9
- def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
8
+ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str, ssl_verification: bool):
10
9
  try:
11
10
  if publish_url is None:
12
11
  # this url supports Data Mesh Manager and Data Contract Manager
@@ -32,7 +31,7 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
32
31
  url,
33
32
  data=request_body,
34
33
  headers=headers,
35
- verify=False,
34
+ verify=ssl_verification,
36
35
  )
37
36
  # print("Status Code:", response.status_code)
38
37
  # print("Response Body:", response.text)
@@ -44,9 +43,7 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
44
43
  run.log_error(f"Failed publishing test results. Error: {str(e)}")
45
44
 
46
45
 
47
- def publish_data_contract_to_datamesh_manager(
48
- data_contract_specification: DataContractSpecification, ssl_verification: bool
49
- ):
46
+ def publish_data_contract_to_datamesh_manager(data_contract_dict: dict, ssl_verification: bool):
50
47
  try:
51
48
  api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
52
49
  host = "https://api.datamesh-manager.com"
@@ -59,13 +56,11 @@ def publish_data_contract_to_datamesh_manager(
59
56
  "Cannot publish data contract, as neither DATAMESH_MANAGER_API_KEY nor DATACONTRACT_MANAGER_API_KEY is set"
60
57
  )
61
58
  headers = {"Content-Type": "application/json", "x-api-key": api_key}
62
- spec = data_contract_specification
63
- id = spec.id
59
+ id = data_contract_dict["id"]
64
60
  url = f"{host}/api/datacontracts/{id}"
65
- request_body = spec.model_dump_json().encode("utf-8")
66
61
  response = requests.put(
67
62
  url=url,
68
- data=request_body,
63
+ json=data_contract_dict,
69
64
  headers=headers,
70
65
  verify=ssl_verification,
71
66
  )
@@ -22,7 +22,16 @@ class FieldReferenceLinter(Linter):
22
22
  for model_name, model in contract.models.items():
23
23
  for field_name, field in model.fields.items():
24
24
  if field.references:
25
- (ref_model, ref_field) = field.references.split(".", maxsplit=2)
25
+ reference_hierarchy = field.references.split(".")
26
+ if len(reference_hierarchy) != 2:
27
+ result = result.with_error(
28
+ f"Field '{field_name}' in model '{model_name}'"
29
+ f" references must follow the model.field syntax and refer to a field in a model in this data contract."
30
+ )
31
+ continue
32
+ ref_model = reference_hierarchy[0]
33
+ ref_field = reference_hierarchy[1]
34
+
26
35
  if ref_model not in contract.models:
27
36
  result = result.with_error(
28
37
  f"Field '{field_name}' in model '{model_name}'"
@@ -44,6 +44,27 @@ def resolve_data_contract(
44
44
  )
45
45
 
46
46
 
47
+ def resolve_data_contract_dict(
48
+ data_contract_location: str = None,
49
+ data_contract_str: str = None,
50
+ data_contract: DataContractSpecification = None,
51
+ ) -> dict:
52
+ if data_contract_location is not None:
53
+ return _to_yaml(read_resource(data_contract_location))
54
+ elif data_contract_str is not None:
55
+ return _to_yaml(data_contract_str)
56
+ elif data_contract is not None:
57
+ return data_contract.model_dump()
58
+ else:
59
+ raise DataContractException(
60
+ type="lint",
61
+ result="failed",
62
+ name="Check that data contract YAML is valid",
63
+ reason="Data contract needs to be provided",
64
+ engine="datacontract",
65
+ )
66
+
67
+
47
68
  def resolve_data_contract_from_location(
48
69
  location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
49
70
  ) -> DataContractSpecification:
@@ -231,7 +252,7 @@ def _resolve_data_contract_from_str(
231
252
  return spec
232
253
 
233
254
 
234
- def _to_yaml(data_contract_str):
255
+ def _to_yaml(data_contract_str) -> dict:
235
256
  try:
236
257
  yaml_dict = yaml.safe_load(data_contract_str)
237
258
  return yaml_dict
@@ -1,4 +1,6 @@
1
+ import importlib.resources as resources
1
2
  import json
3
+ import logging
2
4
  import os
3
5
  from typing import Any, Dict
4
6
 
@@ -6,6 +8,8 @@ import requests
6
8
 
7
9
  from datacontract.model.exceptions import DataContractException
8
10
 
11
+ DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.1.0.schema.json"
12
+
9
13
 
10
14
  def fetch_schema(location: str = None) -> Dict[str, Any]:
11
15
  """
@@ -27,9 +31,12 @@ def fetch_schema(location: str = None) -> Dict[str, Any]:
27
31
 
28
32
  """
29
33
  if location is None:
30
- location = "https://datacontract.com/datacontract.schema.json"
31
-
32
- if location.startswith("http://") or location.startswith("https://"):
34
+ logging.info("Use default bundled schema " + DEFAULT_DATA_CONTRACT_SCHEMA)
35
+ schemas = resources.files("datacontract")
36
+ schema_file = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_SCHEMA)
37
+ with schema_file.open("r") as file:
38
+ schema = json.load(file)
39
+ elif location.startswith("http://") or location.startswith("https://"):
33
40
  response = requests.get(location)
34
41
  schema = response.json()
35
42
  else:
@@ -72,6 +72,7 @@ class Server(pyd.BaseModel):
72
72
  dataProductId: str = None
73
73
  outputPortId: str = None
74
74
  driver: str = None
75
+ storageAccount: str = None
75
76
  roles: List[ServerRole] = None
76
77
 
77
78
  model_config = pyd.ConfigDict(
@@ -112,6 +113,7 @@ class Definition(pyd.BaseModel):
112
113
  tags: List[str] = []
113
114
  links: Dict[str, str] = {}
114
115
  example: str = None
116
+ examples: List[Any] | None = None
115
117
 
116
118
  model_config = pyd.ConfigDict(
117
119
  extra="allow",