datacontract-cli 0.10.7__py3-none-any.whl → 0.10.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (39) hide show
  1. datacontract/cli.py +19 -1
  2. datacontract/data_contract.py +60 -172
  3. datacontract/engines/fastjsonschema/s3/s3_read_files.py +13 -1
  4. datacontract/engines/soda/check_soda_execute.py +4 -2
  5. datacontract/engines/soda/connections/bigquery.py +8 -1
  6. datacontract/export/__init__.py +0 -0
  7. datacontract/export/avro_converter.py +15 -3
  8. datacontract/export/avro_idl_converter.py +29 -22
  9. datacontract/export/bigquery_converter.py +15 -0
  10. datacontract/export/dbml_converter.py +9 -0
  11. datacontract/export/dbt_converter.py +26 -1
  12. datacontract/export/exporter.py +87 -0
  13. datacontract/export/exporter_factory.py +52 -0
  14. datacontract/export/go_converter.py +6 -0
  15. datacontract/export/great_expectations_converter.py +10 -0
  16. datacontract/export/html_export.py +6 -0
  17. datacontract/export/jsonschema_converter.py +24 -16
  18. datacontract/export/odcs_converter.py +24 -1
  19. datacontract/export/protobuf_converter.py +6 -0
  20. datacontract/export/pydantic_converter.py +6 -0
  21. datacontract/export/rdf_converter.py +9 -0
  22. datacontract/export/sodacl_converter.py +7 -1
  23. datacontract/export/sql_converter.py +32 -2
  24. datacontract/export/sql_type_converter.py +4 -5
  25. datacontract/export/terraform_converter.py +6 -0
  26. datacontract/imports/bigquery_importer.py +30 -4
  27. datacontract/imports/glue_importer.py +9 -0
  28. datacontract/imports/odcs_importer.py +192 -0
  29. datacontract/imports/unity_importer.py +138 -0
  30. datacontract/model/data_contract_specification.py +2 -0
  31. datacontract/templates/partials/server.html +64 -32
  32. datacontract/templates/style/output.css +9 -0
  33. datacontract/web.py +17 -0
  34. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/METADATA +113 -49
  35. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/RECORD +39 -34
  36. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/LICENSE +0 -0
  37. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/WHEEL +0 -0
  38. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/entry_points.txt +0 -0
  39. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/top_level.txt +0 -0
datacontract/cli.py CHANGED
@@ -5,6 +5,7 @@ from typing import Iterable, Optional
5
5
  from typing import List
6
6
 
7
7
  import typer
8
+ import uvicorn
8
9
  from click import Context
9
10
  from rich import box
10
11
  from rich.console import Console
@@ -12,6 +13,7 @@ from rich.table import Table
12
13
  from typer.core import TyperGroup
13
14
  from typing_extensions import Annotated
14
15
 
16
+ from datacontract import web
15
17
  from datacontract.catalog.catalog import create_index_html, create_data_contract_html
16
18
  from datacontract.data_contract import DataContract, ExportFormat
17
19
  from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
@@ -183,6 +185,7 @@ def export(
183
185
  result = DataContract(data_contract_file=location, server=server).export(
184
186
  export_format=format,
185
187
  model=model,
188
+ server=server,
186
189
  rdf_base=rdf_base,
187
190
  sql_server_type=sql_server_type,
188
191
  )
@@ -201,6 +204,8 @@ class ImportFormat(str, Enum):
201
204
  glue = "glue"
202
205
  bigquery = "bigquery"
203
206
  jsonschema = "jsonschema"
207
+ odcs="odcs"
208
+ unity = "unity"
204
209
 
205
210
 
206
211
  @app.command(name="import")
@@ -223,11 +228,12 @@ def import_(
223
228
  help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
224
229
  ),
225
230
  ] = None,
231
+ unity_table_full_name: Annotated[Optional[str], typer.Option(help="Full name of a table in the unity catalog")] = None,
226
232
  ):
227
233
  """
228
234
  Create a data contract from the given source location. Prints to stdout.
229
235
  """
230
- result = DataContract().import_from_source(format, source, glue_table, bigquery_table, bigquery_project, bigquery_dataset)
236
+ result = DataContract().import_from_source(format, source, glue_table, bigquery_table, bigquery_project, bigquery_dataset, unity_table_full_name)
231
237
  console.print(result.to_yaml())
232
238
 
233
239
 
@@ -323,6 +329,18 @@ def diff(
323
329
  console.print(result.changelog_str())
324
330
 
325
331
 
332
+ @app.command()
333
+ def serve(
334
+ port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
335
+ host: Annotated[str, typer.Option(help="Bind socket to this host.")] = "127.0.0.1",
336
+ ):
337
+ """
338
+ Start the datacontract web server.
339
+ """
340
+
341
+ uvicorn.run(web.app, port=port, host=host)
342
+
343
+
326
344
  def _handle_result(run):
327
345
  _print_table(run)
328
346
  if run.result == "passed":
@@ -2,7 +2,6 @@ import json
2
2
  import logging
3
3
  import tempfile
4
4
  import typing
5
- from enum import Enum
6
5
 
7
6
  import yaml
8
7
  from pyspark.sql import SparkSession
@@ -13,27 +12,15 @@ from datacontract.engines.datacontract.check_that_datacontract_contains_valid_se
13
12
  )
14
13
  from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
15
14
  from datacontract.engines.soda.check_soda_execute import check_soda_execute
16
- from datacontract.export.avro_converter import to_avro_schema_json
17
- from datacontract.export.avro_idl_converter import to_avro_idl
18
- from datacontract.export.bigquery_converter import to_bigquery_json
19
- from datacontract.export.dbml_converter import to_dbml_diagram
20
- from datacontract.export.dbt_converter import to_dbt_models_yaml, to_dbt_sources_yaml, to_dbt_staging_sql
21
- from datacontract.export.go_converter import to_go_types
22
- from datacontract.export.great_expectations_converter import to_great_expectations
23
- from datacontract.export.html_export import to_html
24
- from datacontract.export.jsonschema_converter import to_jsonschema_json
25
- from datacontract.export.odcs_converter import to_odcs_yaml
26
- from datacontract.export.protobuf_converter import to_protobuf
27
- from datacontract.export.pydantic_converter import to_pydantic_model_str
28
- from datacontract.export.rdf_converter import to_rdf_n3
29
- from datacontract.export.sodacl_converter import to_sodacl_yaml
30
- from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
31
- from datacontract.export.terraform_converter import to_terraform
15
+ from datacontract.export.exporter import ExportFormat
16
+ from datacontract.export.exporter_factory import exporter_factory
32
17
  from datacontract.imports.avro_importer import import_avro
33
18
  from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json
34
19
  from datacontract.imports.glue_importer import import_glue
35
20
  from datacontract.imports.jsonschema_importer import import_jsonschema
21
+ from datacontract.imports.odcs_importer import import_odcs
36
22
  from datacontract.imports.sql_importer import import_sql
23
+ from datacontract.imports.unity_importer import import_unity_from_json, import_unity_from_api
37
24
  from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
38
25
  from datacontract.integration.publish_opentelemetry import publish_opentelemetry
39
26
  from datacontract.lint import resolve
@@ -50,28 +37,6 @@ from datacontract.model.exceptions import DataContractException
50
37
  from datacontract.model.run import Run, Check
51
38
 
52
39
 
53
- class ExportFormat(str, Enum):
54
- jsonschema = "jsonschema"
55
- pydantic_model = "pydantic-model"
56
- sodacl = "sodacl"
57
- dbt = "dbt"
58
- dbt_sources = "dbt-sources"
59
- dbt_staging_sql = "dbt-staging-sql"
60
- odcs = "odcs"
61
- rdf = "rdf"
62
- avro = "avro"
63
- protobuf = "protobuf"
64
- great_expectations = "great-expectations"
65
- terraform = "terraform"
66
- avro_idl = "avro-idl"
67
- sql = "sql"
68
- sql_query = "sql-query"
69
- html = "html"
70
- go = "go"
71
- bigquery = "bigquery"
72
- dbml = "dbml"
73
-
74
-
75
40
  class DataContract:
76
41
  def __init__(
77
42
  self,
@@ -207,6 +172,9 @@ class DataContract:
207
172
  if self._examples:
208
173
  server_name = "examples"
209
174
  server = self._get_examples_server(data_contract, run, tmp_dir)
175
+ elif self._server:
176
+ server_name = self._server
177
+ server = data_contract.servers.get(server_name)
210
178
  else:
211
179
  server_name = list(data_contract.servers.keys())[0]
212
180
  server = data_contract.servers.get(server_name)
@@ -260,6 +228,38 @@ class DataContract:
260
228
 
261
229
  return run
262
230
 
231
+ def _get_examples_server(self, data_contract, run, tmp_dir):
232
+ run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
233
+ format = "json"
234
+ for example in data_contract.examples:
235
+ format = example.type
236
+ p = f"{tmp_dir}/{example.model}.{format}"
237
+ run.log_info(f"Creating example file {p}")
238
+ with open(p, "w") as f:
239
+ content = ""
240
+ if format == "json" and isinstance(example.data, list):
241
+ content = json.dumps(example.data)
242
+ elif format == "json" and isinstance(example.data, str):
243
+ content = example.data
244
+ elif format == "yaml" and isinstance(example.data, list):
245
+ content = yaml.dump(example.data, allow_unicode=True)
246
+ elif format == "yaml" and isinstance(example.data, str):
247
+ content = example.data
248
+ elif format == "csv":
249
+ content = example.data
250
+ logging.debug(f"Content of example file {p}: {content}")
251
+ f.write(content)
252
+ path = f"{tmp_dir}" + "/{model}." + format
253
+ delimiter = "array"
254
+ server = Server(
255
+ type="local",
256
+ path=path,
257
+ format=format,
258
+ delimiter=delimiter,
259
+ )
260
+ run.log_info(f"Using {server} for testing the examples")
261
+ return server
262
+
263
263
  def breaking(self, other: "DataContract") -> BreakingChanges:
264
264
  return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
265
265
 
@@ -302,7 +302,11 @@ class DataContract:
302
302
  )
303
303
 
304
304
  def export(
305
- self, export_format: ExportFormat, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto"
305
+ self,
306
+ export_format: ExportFormat,
307
+ model: str = "all",
308
+ sql_server_type: str = "auto",
309
+ **kwargs,
306
310
  ) -> str:
307
311
  data_contract = resolve.resolve_data_contract(
308
312
  self._data_contract_file,
@@ -311,140 +315,14 @@ class DataContract:
311
315
  inline_definitions=True,
312
316
  inline_quality=True,
313
317
  )
314
- if export_format == "jsonschema":
315
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
316
- return to_jsonschema_json(model_name, model_value)
317
- if export_format == "sodacl":
318
- return to_sodacl_yaml(data_contract)
319
- if export_format == "dbt":
320
- return to_dbt_models_yaml(data_contract)
321
- if export_format == "dbt-sources":
322
- return to_dbt_sources_yaml(data_contract, self._server)
323
- if export_format == "dbt-staging-sql":
324
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
325
- return to_dbt_staging_sql(data_contract, model_name, model_value)
326
- if export_format == "odcs":
327
- return to_odcs_yaml(data_contract)
328
- if export_format == "rdf":
329
- return to_rdf_n3(data_contract, rdf_base)
330
- if export_format == "protobuf":
331
- return to_protobuf(data_contract)
332
- if export_format == "avro":
333
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
334
- return to_avro_schema_json(model_name, model_value)
335
- if export_format == "avro-idl":
336
- return to_avro_idl(data_contract)
337
- if export_format == "terraform":
338
- return to_terraform(data_contract)
339
- if export_format == "sql":
340
- server_type = self._determine_sql_server_type(data_contract, sql_server_type)
341
- return to_sql_ddl(data_contract, server_type=server_type)
342
- if export_format == "sql-query":
343
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
344
- server_type = self._determine_sql_server_type(data_contract, sql_server_type)
345
- return to_sql_query(data_contract, model_name, model_value, server_type)
346
- if export_format == "great-expectations":
347
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
348
- return to_great_expectations(data_contract, model_name)
349
- if export_format == "pydantic-model":
350
- return to_pydantic_model_str(data_contract)
351
- if export_format == "html":
352
- return to_html(data_contract)
353
- if export_format == "go":
354
- return to_go_types(data_contract)
355
- if export_format == "bigquery":
356
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
357
- found_server = data_contract.servers.get(self._server)
358
- if found_server is None:
359
- raise RuntimeError(
360
- f"Export to {export_format} requires selecting a bigquery server from the data contract."
361
- )
362
- if found_server.type != "bigquery":
363
- raise RuntimeError(
364
- f"Export to {export_format} requires selecting a bigquery server from the data contract."
365
- )
366
- return to_bigquery_json(model_name, model_value, found_server)
367
- if export_format == "dbml":
368
- found_server = data_contract.servers.get(self._server)
369
- return to_dbml_diagram(data_contract, found_server)
370
- else:
371
- print(f"Export format {export_format} not supported.")
372
- return ""
373
-
374
- def _determine_sql_server_type(self, data_contract: DataContractSpecification, sql_server_type: str):
375
- if sql_server_type == "auto":
376
- if data_contract.servers is None or len(data_contract.servers) == 0:
377
- raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
378
-
379
- server_types = set([server.type for server in data_contract.servers.values()])
380
- if "snowflake" in server_types:
381
- return "snowflake"
382
- elif "postgres" in server_types:
383
- return "postgres"
384
- elif "databricks" in server_types:
385
- return "databricks"
386
- else:
387
- # default to snowflake dialect
388
- return "snowflake"
389
- else:
390
- return sql_server_type
391
318
 
392
- def _get_examples_server(self, data_contract, run, tmp_dir):
393
- run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
394
- format = "json"
395
- for example in data_contract.examples:
396
- format = example.type
397
- p = f"{tmp_dir}/{example.model}.{format}"
398
- run.log_info(f"Creating example file {p}")
399
- with open(p, "w") as f:
400
- content = ""
401
- if format == "json" and isinstance(example.data, list):
402
- content = json.dumps(example.data)
403
- elif format == "json" and isinstance(example.data, str):
404
- content = example.data
405
- elif format == "yaml" and isinstance(example.data, list):
406
- content = yaml.dump(example.data, allow_unicode=True)
407
- elif format == "yaml" and isinstance(example.data, str):
408
- content = example.data
409
- elif format == "csv":
410
- content = example.data
411
- logging.debug(f"Content of example file {p}: {content}")
412
- f.write(content)
413
- path = f"{tmp_dir}" + "/{model}." + format
414
- delimiter = "array"
415
- server = Server(
416
- type="local",
417
- path=path,
418
- format=format,
419
- delimiter=delimiter,
319
+ return exporter_factory.create(export_format).export(
320
+ data_contract=data_contract,
321
+ model=model,
322
+ server=self._server,
323
+ sql_server_type=sql_server_type,
324
+ export_args=kwargs,
420
325
  )
421
- run.log_info(f"Using {server} for testing the examples")
422
- return server
423
-
424
- def _check_models_for_export(
425
- self, data_contract: DataContractSpecification, model: str, export_format: str
426
- ) -> typing.Tuple[str, str]:
427
- if data_contract.models is None:
428
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
429
-
430
- model_names = list(data_contract.models.keys())
431
-
432
- if model == "all":
433
- if len(data_contract.models.items()) != 1:
434
- raise RuntimeError(
435
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
436
- )
437
-
438
- model_name, model_value = next(iter(data_contract.models.items()))
439
- else:
440
- model_name = model
441
- model_value = data_contract.models.get(model_name)
442
- if model_value is None:
443
- raise RuntimeError(
444
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
445
- )
446
-
447
- return model_name, model_value
448
326
 
449
327
  def import_from_source(
450
328
  self,
@@ -454,6 +332,7 @@ class DataContract:
454
332
  bigquery_tables: typing.Optional[typing.List[str]] = None,
455
333
  bigquery_project: typing.Optional[str] = None,
456
334
  bigquery_dataset: typing.Optional[str] = None,
335
+ unity_table_full_name: typing.Optional[str] = None
457
336
  ) -> DataContractSpecification:
458
337
  data_contract_specification = DataContract.init()
459
338
 
@@ -472,6 +351,15 @@ class DataContract:
472
351
  data_contract_specification = import_bigquery_from_api(
473
352
  data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset
474
353
  )
354
+ elif format == "odcs":
355
+ data_contract_specification = import_odcs(data_contract_specification, source)
356
+ elif format == "unity":
357
+ if source is not None:
358
+ data_contract_specification = import_unity_from_json(data_contract_specification, source)
359
+ else:
360
+ data_contract_specification = import_unity_from_api(
361
+ data_contract_specification, unity_table_full_name
362
+ )
475
363
  else:
476
364
  print(f"Import format {format} not supported.")
477
365
 
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
 
4
- import s3fs
4
+ from datacontract.model.exceptions import DataContractException
5
5
 
6
6
 
7
7
  def yield_s3_files(s3_endpoint_url, s3_location):
@@ -14,6 +14,18 @@ def yield_s3_files(s3_endpoint_url, s3_location):
14
14
 
15
15
 
16
16
  def s3_fs(s3_endpoint_url):
17
+ try:
18
+ import s3fs
19
+ except ImportError as e:
20
+ raise DataContractException(
21
+ type="schema",
22
+ result="failed",
23
+ name="s3 extra missing",
24
+ reason="Install the extra datacontract-cli\[s3] to use s3",
25
+ engine="datacontract",
26
+ original_exception=e,
27
+ )
28
+
17
29
  aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
18
30
  aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
19
31
  return s3fs.S3FileSystem(
@@ -66,8 +66,10 @@ def check_soda_execute(
66
66
  scan.set_data_source_name(server.type)
67
67
  elif server.type == "dataframe":
68
68
  if spark is None:
69
- run.log_warn("Server type dataframe only works with the Python library and requires a Spark session, "
70
- "please provide one with the DataContract class")
69
+ run.log_warn(
70
+ "Server type dataframe only works with the Python library and requires a Spark session, "
71
+ "please provide one with the DataContract class"
72
+ )
71
73
  return
72
74
  else:
73
75
  logging.info("Use Spark to connect to data source")
@@ -6,10 +6,17 @@ import yaml
6
6
  # https://docs.soda.io/soda/connect-bigquery.html#authentication-methods
7
7
  def to_bigquery_soda_configuration(server):
8
8
  # with service account key, using an external json file
9
+
10
+ # check for our own environment variable first
11
+ account_info = os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH")
12
+ if account_info is None:
13
+ # but as a fallback look for the default google one
14
+ account_info = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
15
+
9
16
  soda_configuration = {
10
17
  f"data_source {server.type}": {
11
18
  "type": "bigquery",
12
- "account_info_json_path": os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH"),
19
+ "account_info_json_path": account_info,
13
20
  "auth_scopes": ["https://www.googleapis.com/auth/bigquery"],
14
21
  "project_id": server.project,
15
22
  "dataset": server.dataset,
File without changes
@@ -1,8 +1,15 @@
1
1
  import json
2
2
 
3
+ from datacontract.export.exporter import Exporter, _check_models_for_export
3
4
  from datacontract.model.data_contract_specification import Field
4
5
 
5
6
 
7
+ class AvroExporter(Exporter):
8
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
9
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
10
+ return to_avro_schema_json(model_name, model_value)
11
+
12
+
6
13
  def to_avro_schema(model_name, model) -> dict:
7
14
  return to_avro_record(model_name, model.fields, model.description, model.namespace)
8
15
 
@@ -47,7 +54,13 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
47
54
  if "avroLogicalType" in field.config and "avroType" in field.config:
48
55
  return {"type": field.config["avroType"], "logicalType": field.config["avroLogicalType"]}
49
56
  if "avroLogicalType" in field.config:
50
- if field.config["avroLogicalType"] in ["timestamp-millis", "timestamp-micros", "local-timestamp-millis", "local-timestamp-micros", "time-micros"]:
57
+ if field.config["avroLogicalType"] in [
58
+ "timestamp-millis",
59
+ "timestamp-micros",
60
+ "local-timestamp-millis",
61
+ "local-timestamp-micros",
62
+ "time-micros",
63
+ ]:
51
64
  return {"type": "long", "logicalType": field.config["avroLogicalType"]}
52
65
  if field.config["avroLogicalType"] in ["time-millis", "date"]:
53
66
  return {"type": "int", "logicalType": field.config["avroLogicalType"]}
@@ -82,8 +95,7 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
82
95
  elif field.type in ["binary"]:
83
96
  return "bytes"
84
97
  elif field.type in ["array"]:
85
- # TODO support array structs
86
- return "array"
98
+ return {"type": "array", "items": to_avro_type(field.items, field_name)}
87
99
  elif field.type in ["null"]:
88
100
  return "null"
89
101
  else:
@@ -7,28 +7,7 @@ from datacontract.lint.resolve import inline_definitions_into_data_contract
7
7
  from datacontract.model.data_contract_specification import DataContractSpecification, Field
8
8
  from datacontract.model.exceptions import DataContractException
9
9
 
10
-
11
- def to_avro_idl(contract: DataContractSpecification) -> str:
12
- """Serialize the provided data contract specification into an Avro IDL string.
13
-
14
- The data contract will be serialized as a protocol, with one record type
15
- for each contained model. Model fields are mapped one-to-one to Avro IDL
16
- record fields.
17
- """
18
- stream = StringIO()
19
- to_avro_idl_stream(contract, stream)
20
- return stream.getvalue()
21
-
22
-
23
- def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextIO):
24
- """Serialize the provided data contract specification into Avro IDL."""
25
- ir = _contract_to_avro_idl_ir(contract)
26
- if ir.description:
27
- stream.write(f"/** {contract.info.description} */\n")
28
- stream.write(f"protocol {ir.name or 'Unnamed'} {{\n")
29
- for model_type in ir.model_types:
30
- _write_model_type(model_type, stream)
31
- stream.write("}\n")
10
+ from datacontract.export.exporter import Exporter
32
11
 
33
12
 
34
13
  class AvroPrimitiveType(Enum):
@@ -107,6 +86,34 @@ avro_primitive_types = set(
107
86
  )
108
87
 
109
88
 
89
+ class AvroIdlExporter(Exporter):
90
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
91
+ return to_avro_idl(data_contract)
92
+
93
+
94
+ def to_avro_idl(contract: DataContractSpecification) -> str:
95
+ """Serialize the provided data contract specification into an Avro IDL string.
96
+
97
+ The data contract will be serialized as a protocol, with one record type
98
+ for each contained model. Model fields are mapped one-to-one to Avro IDL
99
+ record fields.
100
+ """
101
+ stream = StringIO()
102
+ to_avro_idl_stream(contract, stream)
103
+ return stream.getvalue()
104
+
105
+
106
+ def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextIO):
107
+ """Serialize the provided data contract specification into Avro IDL."""
108
+ ir = _contract_to_avro_idl_ir(contract)
109
+ if ir.description:
110
+ stream.write(f"/** {contract.info.description} */\n")
111
+ stream.write(f"protocol {ir.name or 'Unnamed'} {{\n")
112
+ for model_type in ir.model_types:
113
+ _write_model_type(model_type, stream)
114
+ stream.write("}\n")
115
+
116
+
110
117
  def _to_avro_primitive_logical_type(field_name: str, field: Field) -> AvroPrimitiveField:
111
118
  result = AvroPrimitiveField(field_name, field.required, field.description, AvroPrimitiveType.string)
112
119
  match field.type:
@@ -5,6 +5,21 @@ from typing import Dict, List
5
5
  from datacontract.model.data_contract_specification import Model, Field, Server
6
6
  from datacontract.model.exceptions import DataContractException
7
7
 
8
+ from datacontract.export.exporter import Exporter, _check_models_for_export
9
+
10
+
11
+ class BigQueryExporter(Exporter):
12
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
13
+ self.dict_args = export_args
14
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
15
+ found_server = data_contract.servers.get(server)
16
+ if found_server is None:
17
+ raise RuntimeError("Export to bigquery requires selecting a bigquery server from the data contract.")
18
+ if found_server.type != "bigquery":
19
+ raise RuntimeError("Export to bigquery requires selecting a bigquery server from the data contract.")
20
+
21
+ return to_bigquery_json(model_name, model_value, found_server)
22
+
8
23
 
9
24
  def to_bigquery_json(model_name: str, model_value: Model, server: Server) -> str:
10
25
  bigquery_table = to_bigquery_schema(model_name, model_value, server)
@@ -8,6 +8,15 @@ import datacontract.model.data_contract_specification as spec
8
8
  from datacontract.export.sql_type_converter import convert_to_sql_type
9
9
 
10
10
 
11
+ from datacontract.export.exporter import Exporter
12
+
13
+
14
+ class DbmlExporter(Exporter):
15
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
16
+ found_server = data_contract.servers.get(server)
17
+ return to_dbml_diagram(data_contract, found_server)
18
+
19
+
11
20
  def to_dbml_diagram(contract: spec.DataContractSpecification, server: spec.Server) -> str:
12
21
  result = ""
13
22
  result += add_generated_info(contract, server) + "\n"
@@ -5,6 +5,28 @@ import yaml
5
5
  from datacontract.export.sql_type_converter import convert_to_sql_type
6
6
  from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
7
7
 
8
+ from datacontract.export.exporter import Exporter, _check_models_for_export
9
+
10
+
11
+ class DbtExporter(Exporter):
12
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
13
+ return to_dbt_models_yaml(data_contract)
14
+
15
+
16
+ class DbtSourceExporter(Exporter):
17
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
18
+ return to_dbt_sources_yaml(data_contract, server)
19
+
20
+
21
+ class DbtStageExporter(Exporter):
22
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
23
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
24
+ return to_dbt_staging_sql(
25
+ data_contract,
26
+ model_name,
27
+ model_value,
28
+ )
29
+
8
30
 
9
31
  def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
10
32
  dbt = {
@@ -19,7 +41,10 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
19
41
 
20
42
  def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
21
43
  if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
22
- print("Export to dbt-staging-sql currently only works with exactly one model in the data contract.")
44
+ print(
45
+ "Export to dbt-staging-sql currently only works with exactly one model in the data contract."
46
+ "Please specify the model name."
47
+ )
23
48
  return ""
24
49
 
25
50
  id = data_contract_spec.id