datacontract-cli 0.10.7__py3-none-any.whl → 0.10.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/cli.py +19 -1
- datacontract/data_contract.py +60 -172
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +13 -1
- datacontract/engines/soda/check_soda_execute.py +4 -2
- datacontract/engines/soda/connections/bigquery.py +8 -1
- datacontract/export/__init__.py +0 -0
- datacontract/export/avro_converter.py +15 -3
- datacontract/export/avro_idl_converter.py +29 -22
- datacontract/export/bigquery_converter.py +15 -0
- datacontract/export/dbml_converter.py +9 -0
- datacontract/export/dbt_converter.py +26 -1
- datacontract/export/exporter.py +87 -0
- datacontract/export/exporter_factory.py +52 -0
- datacontract/export/go_converter.py +6 -0
- datacontract/export/great_expectations_converter.py +10 -0
- datacontract/export/html_export.py +6 -0
- datacontract/export/jsonschema_converter.py +24 -16
- datacontract/export/odcs_converter.py +24 -1
- datacontract/export/protobuf_converter.py +6 -0
- datacontract/export/pydantic_converter.py +6 -0
- datacontract/export/rdf_converter.py +9 -0
- datacontract/export/sodacl_converter.py +7 -1
- datacontract/export/sql_converter.py +32 -2
- datacontract/export/sql_type_converter.py +4 -5
- datacontract/export/terraform_converter.py +6 -0
- datacontract/imports/bigquery_importer.py +30 -4
- datacontract/imports/glue_importer.py +9 -0
- datacontract/imports/odcs_importer.py +192 -0
- datacontract/imports/unity_importer.py +138 -0
- datacontract/model/data_contract_specification.py +2 -0
- datacontract/templates/partials/server.html +64 -32
- datacontract/templates/style/output.css +9 -0
- datacontract/web.py +17 -0
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/METADATA +113 -49
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/RECORD +39 -34
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.8.dist-info}/top_level.txt +0 -0
datacontract/cli.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Iterable, Optional
|
|
|
5
5
|
from typing import List
|
|
6
6
|
|
|
7
7
|
import typer
|
|
8
|
+
import uvicorn
|
|
8
9
|
from click import Context
|
|
9
10
|
from rich import box
|
|
10
11
|
from rich.console import Console
|
|
@@ -12,6 +13,7 @@ from rich.table import Table
|
|
|
12
13
|
from typer.core import TyperGroup
|
|
13
14
|
from typing_extensions import Annotated
|
|
14
15
|
|
|
16
|
+
from datacontract import web
|
|
15
17
|
from datacontract.catalog.catalog import create_index_html, create_data_contract_html
|
|
16
18
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
17
19
|
from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
|
|
@@ -183,6 +185,7 @@ def export(
|
|
|
183
185
|
result = DataContract(data_contract_file=location, server=server).export(
|
|
184
186
|
export_format=format,
|
|
185
187
|
model=model,
|
|
188
|
+
server=server,
|
|
186
189
|
rdf_base=rdf_base,
|
|
187
190
|
sql_server_type=sql_server_type,
|
|
188
191
|
)
|
|
@@ -201,6 +204,8 @@ class ImportFormat(str, Enum):
|
|
|
201
204
|
glue = "glue"
|
|
202
205
|
bigquery = "bigquery"
|
|
203
206
|
jsonschema = "jsonschema"
|
|
207
|
+
odcs="odcs"
|
|
208
|
+
unity = "unity"
|
|
204
209
|
|
|
205
210
|
|
|
206
211
|
@app.command(name="import")
|
|
@@ -223,11 +228,12 @@ def import_(
|
|
|
223
228
|
help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
|
|
224
229
|
),
|
|
225
230
|
] = None,
|
|
231
|
+
unity_table_full_name: Annotated[Optional[str], typer.Option(help="Full name of a table in the unity catalog")] = None,
|
|
226
232
|
):
|
|
227
233
|
"""
|
|
228
234
|
Create a data contract from the given source location. Prints to stdout.
|
|
229
235
|
"""
|
|
230
|
-
result = DataContract().import_from_source(format, source, glue_table, bigquery_table, bigquery_project, bigquery_dataset)
|
|
236
|
+
result = DataContract().import_from_source(format, source, glue_table, bigquery_table, bigquery_project, bigquery_dataset, unity_table_full_name)
|
|
231
237
|
console.print(result.to_yaml())
|
|
232
238
|
|
|
233
239
|
|
|
@@ -323,6 +329,18 @@ def diff(
|
|
|
323
329
|
console.print(result.changelog_str())
|
|
324
330
|
|
|
325
331
|
|
|
332
|
+
@app.command()
|
|
333
|
+
def serve(
|
|
334
|
+
port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
|
|
335
|
+
host: Annotated[str, typer.Option(help="Bind socket to this host.")] = "127.0.0.1",
|
|
336
|
+
):
|
|
337
|
+
"""
|
|
338
|
+
Start the datacontract web server.
|
|
339
|
+
"""
|
|
340
|
+
|
|
341
|
+
uvicorn.run(web.app, port=port, host=host)
|
|
342
|
+
|
|
343
|
+
|
|
326
344
|
def _handle_result(run):
|
|
327
345
|
_print_table(run)
|
|
328
346
|
if run.result == "passed":
|
datacontract/data_contract.py
CHANGED
|
@@ -2,7 +2,6 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import tempfile
|
|
4
4
|
import typing
|
|
5
|
-
from enum import Enum
|
|
6
5
|
|
|
7
6
|
import yaml
|
|
8
7
|
from pyspark.sql import SparkSession
|
|
@@ -13,27 +12,15 @@ from datacontract.engines.datacontract.check_that_datacontract_contains_valid_se
|
|
|
13
12
|
)
|
|
14
13
|
from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
|
|
15
14
|
from datacontract.engines.soda.check_soda_execute import check_soda_execute
|
|
16
|
-
from datacontract.export.
|
|
17
|
-
from datacontract.export.
|
|
18
|
-
from datacontract.export.bigquery_converter import to_bigquery_json
|
|
19
|
-
from datacontract.export.dbml_converter import to_dbml_diagram
|
|
20
|
-
from datacontract.export.dbt_converter import to_dbt_models_yaml, to_dbt_sources_yaml, to_dbt_staging_sql
|
|
21
|
-
from datacontract.export.go_converter import to_go_types
|
|
22
|
-
from datacontract.export.great_expectations_converter import to_great_expectations
|
|
23
|
-
from datacontract.export.html_export import to_html
|
|
24
|
-
from datacontract.export.jsonschema_converter import to_jsonschema_json
|
|
25
|
-
from datacontract.export.odcs_converter import to_odcs_yaml
|
|
26
|
-
from datacontract.export.protobuf_converter import to_protobuf
|
|
27
|
-
from datacontract.export.pydantic_converter import to_pydantic_model_str
|
|
28
|
-
from datacontract.export.rdf_converter import to_rdf_n3
|
|
29
|
-
from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
30
|
-
from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
|
|
31
|
-
from datacontract.export.terraform_converter import to_terraform
|
|
15
|
+
from datacontract.export.exporter import ExportFormat
|
|
16
|
+
from datacontract.export.exporter_factory import exporter_factory
|
|
32
17
|
from datacontract.imports.avro_importer import import_avro
|
|
33
18
|
from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json
|
|
34
19
|
from datacontract.imports.glue_importer import import_glue
|
|
35
20
|
from datacontract.imports.jsonschema_importer import import_jsonschema
|
|
21
|
+
from datacontract.imports.odcs_importer import import_odcs
|
|
36
22
|
from datacontract.imports.sql_importer import import_sql
|
|
23
|
+
from datacontract.imports.unity_importer import import_unity_from_json, import_unity_from_api
|
|
37
24
|
from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
|
|
38
25
|
from datacontract.integration.publish_opentelemetry import publish_opentelemetry
|
|
39
26
|
from datacontract.lint import resolve
|
|
@@ -50,28 +37,6 @@ from datacontract.model.exceptions import DataContractException
|
|
|
50
37
|
from datacontract.model.run import Run, Check
|
|
51
38
|
|
|
52
39
|
|
|
53
|
-
class ExportFormat(str, Enum):
|
|
54
|
-
jsonschema = "jsonschema"
|
|
55
|
-
pydantic_model = "pydantic-model"
|
|
56
|
-
sodacl = "sodacl"
|
|
57
|
-
dbt = "dbt"
|
|
58
|
-
dbt_sources = "dbt-sources"
|
|
59
|
-
dbt_staging_sql = "dbt-staging-sql"
|
|
60
|
-
odcs = "odcs"
|
|
61
|
-
rdf = "rdf"
|
|
62
|
-
avro = "avro"
|
|
63
|
-
protobuf = "protobuf"
|
|
64
|
-
great_expectations = "great-expectations"
|
|
65
|
-
terraform = "terraform"
|
|
66
|
-
avro_idl = "avro-idl"
|
|
67
|
-
sql = "sql"
|
|
68
|
-
sql_query = "sql-query"
|
|
69
|
-
html = "html"
|
|
70
|
-
go = "go"
|
|
71
|
-
bigquery = "bigquery"
|
|
72
|
-
dbml = "dbml"
|
|
73
|
-
|
|
74
|
-
|
|
75
40
|
class DataContract:
|
|
76
41
|
def __init__(
|
|
77
42
|
self,
|
|
@@ -207,6 +172,9 @@ class DataContract:
|
|
|
207
172
|
if self._examples:
|
|
208
173
|
server_name = "examples"
|
|
209
174
|
server = self._get_examples_server(data_contract, run, tmp_dir)
|
|
175
|
+
elif self._server:
|
|
176
|
+
server_name = self._server
|
|
177
|
+
server = data_contract.servers.get(server_name)
|
|
210
178
|
else:
|
|
211
179
|
server_name = list(data_contract.servers.keys())[0]
|
|
212
180
|
server = data_contract.servers.get(server_name)
|
|
@@ -260,6 +228,38 @@ class DataContract:
|
|
|
260
228
|
|
|
261
229
|
return run
|
|
262
230
|
|
|
231
|
+
def _get_examples_server(self, data_contract, run, tmp_dir):
|
|
232
|
+
run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
|
|
233
|
+
format = "json"
|
|
234
|
+
for example in data_contract.examples:
|
|
235
|
+
format = example.type
|
|
236
|
+
p = f"{tmp_dir}/{example.model}.{format}"
|
|
237
|
+
run.log_info(f"Creating example file {p}")
|
|
238
|
+
with open(p, "w") as f:
|
|
239
|
+
content = ""
|
|
240
|
+
if format == "json" and isinstance(example.data, list):
|
|
241
|
+
content = json.dumps(example.data)
|
|
242
|
+
elif format == "json" and isinstance(example.data, str):
|
|
243
|
+
content = example.data
|
|
244
|
+
elif format == "yaml" and isinstance(example.data, list):
|
|
245
|
+
content = yaml.dump(example.data, allow_unicode=True)
|
|
246
|
+
elif format == "yaml" and isinstance(example.data, str):
|
|
247
|
+
content = example.data
|
|
248
|
+
elif format == "csv":
|
|
249
|
+
content = example.data
|
|
250
|
+
logging.debug(f"Content of example file {p}: {content}")
|
|
251
|
+
f.write(content)
|
|
252
|
+
path = f"{tmp_dir}" + "/{model}." + format
|
|
253
|
+
delimiter = "array"
|
|
254
|
+
server = Server(
|
|
255
|
+
type="local",
|
|
256
|
+
path=path,
|
|
257
|
+
format=format,
|
|
258
|
+
delimiter=delimiter,
|
|
259
|
+
)
|
|
260
|
+
run.log_info(f"Using {server} for testing the examples")
|
|
261
|
+
return server
|
|
262
|
+
|
|
263
263
|
def breaking(self, other: "DataContract") -> BreakingChanges:
|
|
264
264
|
return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
|
|
265
265
|
|
|
@@ -302,7 +302,11 @@ class DataContract:
|
|
|
302
302
|
)
|
|
303
303
|
|
|
304
304
|
def export(
|
|
305
|
-
self,
|
|
305
|
+
self,
|
|
306
|
+
export_format: ExportFormat,
|
|
307
|
+
model: str = "all",
|
|
308
|
+
sql_server_type: str = "auto",
|
|
309
|
+
**kwargs,
|
|
306
310
|
) -> str:
|
|
307
311
|
data_contract = resolve.resolve_data_contract(
|
|
308
312
|
self._data_contract_file,
|
|
@@ -311,140 +315,14 @@ class DataContract:
|
|
|
311
315
|
inline_definitions=True,
|
|
312
316
|
inline_quality=True,
|
|
313
317
|
)
|
|
314
|
-
if export_format == "jsonschema":
|
|
315
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
316
|
-
return to_jsonschema_json(model_name, model_value)
|
|
317
|
-
if export_format == "sodacl":
|
|
318
|
-
return to_sodacl_yaml(data_contract)
|
|
319
|
-
if export_format == "dbt":
|
|
320
|
-
return to_dbt_models_yaml(data_contract)
|
|
321
|
-
if export_format == "dbt-sources":
|
|
322
|
-
return to_dbt_sources_yaml(data_contract, self._server)
|
|
323
|
-
if export_format == "dbt-staging-sql":
|
|
324
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
325
|
-
return to_dbt_staging_sql(data_contract, model_name, model_value)
|
|
326
|
-
if export_format == "odcs":
|
|
327
|
-
return to_odcs_yaml(data_contract)
|
|
328
|
-
if export_format == "rdf":
|
|
329
|
-
return to_rdf_n3(data_contract, rdf_base)
|
|
330
|
-
if export_format == "protobuf":
|
|
331
|
-
return to_protobuf(data_contract)
|
|
332
|
-
if export_format == "avro":
|
|
333
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
334
|
-
return to_avro_schema_json(model_name, model_value)
|
|
335
|
-
if export_format == "avro-idl":
|
|
336
|
-
return to_avro_idl(data_contract)
|
|
337
|
-
if export_format == "terraform":
|
|
338
|
-
return to_terraform(data_contract)
|
|
339
|
-
if export_format == "sql":
|
|
340
|
-
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
341
|
-
return to_sql_ddl(data_contract, server_type=server_type)
|
|
342
|
-
if export_format == "sql-query":
|
|
343
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
344
|
-
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
345
|
-
return to_sql_query(data_contract, model_name, model_value, server_type)
|
|
346
|
-
if export_format == "great-expectations":
|
|
347
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
348
|
-
return to_great_expectations(data_contract, model_name)
|
|
349
|
-
if export_format == "pydantic-model":
|
|
350
|
-
return to_pydantic_model_str(data_contract)
|
|
351
|
-
if export_format == "html":
|
|
352
|
-
return to_html(data_contract)
|
|
353
|
-
if export_format == "go":
|
|
354
|
-
return to_go_types(data_contract)
|
|
355
|
-
if export_format == "bigquery":
|
|
356
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
357
|
-
found_server = data_contract.servers.get(self._server)
|
|
358
|
-
if found_server is None:
|
|
359
|
-
raise RuntimeError(
|
|
360
|
-
f"Export to {export_format} requires selecting a bigquery server from the data contract."
|
|
361
|
-
)
|
|
362
|
-
if found_server.type != "bigquery":
|
|
363
|
-
raise RuntimeError(
|
|
364
|
-
f"Export to {export_format} requires selecting a bigquery server from the data contract."
|
|
365
|
-
)
|
|
366
|
-
return to_bigquery_json(model_name, model_value, found_server)
|
|
367
|
-
if export_format == "dbml":
|
|
368
|
-
found_server = data_contract.servers.get(self._server)
|
|
369
|
-
return to_dbml_diagram(data_contract, found_server)
|
|
370
|
-
else:
|
|
371
|
-
print(f"Export format {export_format} not supported.")
|
|
372
|
-
return ""
|
|
373
|
-
|
|
374
|
-
def _determine_sql_server_type(self, data_contract: DataContractSpecification, sql_server_type: str):
|
|
375
|
-
if sql_server_type == "auto":
|
|
376
|
-
if data_contract.servers is None or len(data_contract.servers) == 0:
|
|
377
|
-
raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
|
|
378
|
-
|
|
379
|
-
server_types = set([server.type for server in data_contract.servers.values()])
|
|
380
|
-
if "snowflake" in server_types:
|
|
381
|
-
return "snowflake"
|
|
382
|
-
elif "postgres" in server_types:
|
|
383
|
-
return "postgres"
|
|
384
|
-
elif "databricks" in server_types:
|
|
385
|
-
return "databricks"
|
|
386
|
-
else:
|
|
387
|
-
# default to snowflake dialect
|
|
388
|
-
return "snowflake"
|
|
389
|
-
else:
|
|
390
|
-
return sql_server_type
|
|
391
318
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
run.log_info(f"Creating example file {p}")
|
|
399
|
-
with open(p, "w") as f:
|
|
400
|
-
content = ""
|
|
401
|
-
if format == "json" and isinstance(example.data, list):
|
|
402
|
-
content = json.dumps(example.data)
|
|
403
|
-
elif format == "json" and isinstance(example.data, str):
|
|
404
|
-
content = example.data
|
|
405
|
-
elif format == "yaml" and isinstance(example.data, list):
|
|
406
|
-
content = yaml.dump(example.data, allow_unicode=True)
|
|
407
|
-
elif format == "yaml" and isinstance(example.data, str):
|
|
408
|
-
content = example.data
|
|
409
|
-
elif format == "csv":
|
|
410
|
-
content = example.data
|
|
411
|
-
logging.debug(f"Content of example file {p}: {content}")
|
|
412
|
-
f.write(content)
|
|
413
|
-
path = f"{tmp_dir}" + "/{model}." + format
|
|
414
|
-
delimiter = "array"
|
|
415
|
-
server = Server(
|
|
416
|
-
type="local",
|
|
417
|
-
path=path,
|
|
418
|
-
format=format,
|
|
419
|
-
delimiter=delimiter,
|
|
319
|
+
return exporter_factory.create(export_format).export(
|
|
320
|
+
data_contract=data_contract,
|
|
321
|
+
model=model,
|
|
322
|
+
server=self._server,
|
|
323
|
+
sql_server_type=sql_server_type,
|
|
324
|
+
export_args=kwargs,
|
|
420
325
|
)
|
|
421
|
-
run.log_info(f"Using {server} for testing the examples")
|
|
422
|
-
return server
|
|
423
|
-
|
|
424
|
-
def _check_models_for_export(
|
|
425
|
-
self, data_contract: DataContractSpecification, model: str, export_format: str
|
|
426
|
-
) -> typing.Tuple[str, str]:
|
|
427
|
-
if data_contract.models is None:
|
|
428
|
-
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
429
|
-
|
|
430
|
-
model_names = list(data_contract.models.keys())
|
|
431
|
-
|
|
432
|
-
if model == "all":
|
|
433
|
-
if len(data_contract.models.items()) != 1:
|
|
434
|
-
raise RuntimeError(
|
|
435
|
-
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
436
|
-
)
|
|
437
|
-
|
|
438
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
439
|
-
else:
|
|
440
|
-
model_name = model
|
|
441
|
-
model_value = data_contract.models.get(model_name)
|
|
442
|
-
if model_value is None:
|
|
443
|
-
raise RuntimeError(
|
|
444
|
-
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
445
|
-
)
|
|
446
|
-
|
|
447
|
-
return model_name, model_value
|
|
448
326
|
|
|
449
327
|
def import_from_source(
|
|
450
328
|
self,
|
|
@@ -454,6 +332,7 @@ class DataContract:
|
|
|
454
332
|
bigquery_tables: typing.Optional[typing.List[str]] = None,
|
|
455
333
|
bigquery_project: typing.Optional[str] = None,
|
|
456
334
|
bigquery_dataset: typing.Optional[str] = None,
|
|
335
|
+
unity_table_full_name: typing.Optional[str] = None
|
|
457
336
|
) -> DataContractSpecification:
|
|
458
337
|
data_contract_specification = DataContract.init()
|
|
459
338
|
|
|
@@ -472,6 +351,15 @@ class DataContract:
|
|
|
472
351
|
data_contract_specification = import_bigquery_from_api(
|
|
473
352
|
data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset
|
|
474
353
|
)
|
|
354
|
+
elif format == "odcs":
|
|
355
|
+
data_contract_specification = import_odcs(data_contract_specification, source)
|
|
356
|
+
elif format == "unity":
|
|
357
|
+
if source is not None:
|
|
358
|
+
data_contract_specification = import_unity_from_json(data_contract_specification, source)
|
|
359
|
+
else:
|
|
360
|
+
data_contract_specification = import_unity_from_api(
|
|
361
|
+
data_contract_specification, unity_table_full_name
|
|
362
|
+
)
|
|
475
363
|
else:
|
|
476
364
|
print(f"Import format {format} not supported.")
|
|
477
365
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
from datacontract.model.exceptions import DataContractException
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def yield_s3_files(s3_endpoint_url, s3_location):
|
|
@@ -14,6 +14,18 @@ def yield_s3_files(s3_endpoint_url, s3_location):
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def s3_fs(s3_endpoint_url):
|
|
17
|
+
try:
|
|
18
|
+
import s3fs
|
|
19
|
+
except ImportError as e:
|
|
20
|
+
raise DataContractException(
|
|
21
|
+
type="schema",
|
|
22
|
+
result="failed",
|
|
23
|
+
name="s3 extra missing",
|
|
24
|
+
reason="Install the extra datacontract-cli\[s3] to use s3",
|
|
25
|
+
engine="datacontract",
|
|
26
|
+
original_exception=e,
|
|
27
|
+
)
|
|
28
|
+
|
|
17
29
|
aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
|
|
18
30
|
aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
|
|
19
31
|
return s3fs.S3FileSystem(
|
|
@@ -66,8 +66,10 @@ def check_soda_execute(
|
|
|
66
66
|
scan.set_data_source_name(server.type)
|
|
67
67
|
elif server.type == "dataframe":
|
|
68
68
|
if spark is None:
|
|
69
|
-
run.log_warn(
|
|
70
|
-
|
|
69
|
+
run.log_warn(
|
|
70
|
+
"Server type dataframe only works with the Python library and requires a Spark session, "
|
|
71
|
+
"please provide one with the DataContract class"
|
|
72
|
+
)
|
|
71
73
|
return
|
|
72
74
|
else:
|
|
73
75
|
logging.info("Use Spark to connect to data source")
|
|
@@ -6,10 +6,17 @@ import yaml
|
|
|
6
6
|
# https://docs.soda.io/soda/connect-bigquery.html#authentication-methods
|
|
7
7
|
def to_bigquery_soda_configuration(server):
|
|
8
8
|
# with service account key, using an external json file
|
|
9
|
+
|
|
10
|
+
# check for our own environment variable first
|
|
11
|
+
account_info = os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH")
|
|
12
|
+
if account_info is None:
|
|
13
|
+
# but as a fallback look for the default google one
|
|
14
|
+
account_info = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
|
15
|
+
|
|
9
16
|
soda_configuration = {
|
|
10
17
|
f"data_source {server.type}": {
|
|
11
18
|
"type": "bigquery",
|
|
12
|
-
"account_info_json_path":
|
|
19
|
+
"account_info_json_path": account_info,
|
|
13
20
|
"auth_scopes": ["https://www.googleapis.com/auth/bigquery"],
|
|
14
21
|
"project_id": server.project,
|
|
15
22
|
"dataset": server.dataset,
|
|
File without changes
|
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
import json
|
|
2
2
|
|
|
3
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
3
4
|
from datacontract.model.data_contract_specification import Field
|
|
4
5
|
|
|
5
6
|
|
|
7
|
+
class AvroExporter(Exporter):
|
|
8
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
9
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
10
|
+
return to_avro_schema_json(model_name, model_value)
|
|
11
|
+
|
|
12
|
+
|
|
6
13
|
def to_avro_schema(model_name, model) -> dict:
|
|
7
14
|
return to_avro_record(model_name, model.fields, model.description, model.namespace)
|
|
8
15
|
|
|
@@ -47,7 +54,13 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
|
|
|
47
54
|
if "avroLogicalType" in field.config and "avroType" in field.config:
|
|
48
55
|
return {"type": field.config["avroType"], "logicalType": field.config["avroLogicalType"]}
|
|
49
56
|
if "avroLogicalType" in field.config:
|
|
50
|
-
if field.config["avroLogicalType"] in [
|
|
57
|
+
if field.config["avroLogicalType"] in [
|
|
58
|
+
"timestamp-millis",
|
|
59
|
+
"timestamp-micros",
|
|
60
|
+
"local-timestamp-millis",
|
|
61
|
+
"local-timestamp-micros",
|
|
62
|
+
"time-micros",
|
|
63
|
+
]:
|
|
51
64
|
return {"type": "long", "logicalType": field.config["avroLogicalType"]}
|
|
52
65
|
if field.config["avroLogicalType"] in ["time-millis", "date"]:
|
|
53
66
|
return {"type": "int", "logicalType": field.config["avroLogicalType"]}
|
|
@@ -82,8 +95,7 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
|
|
|
82
95
|
elif field.type in ["binary"]:
|
|
83
96
|
return "bytes"
|
|
84
97
|
elif field.type in ["array"]:
|
|
85
|
-
|
|
86
|
-
return "array"
|
|
98
|
+
return {"type": "array", "items": to_avro_type(field.items, field_name)}
|
|
87
99
|
elif field.type in ["null"]:
|
|
88
100
|
return "null"
|
|
89
101
|
else:
|
|
@@ -7,28 +7,7 @@ from datacontract.lint.resolve import inline_definitions_into_data_contract
|
|
|
7
7
|
from datacontract.model.data_contract_specification import DataContractSpecification, Field
|
|
8
8
|
from datacontract.model.exceptions import DataContractException
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
def to_avro_idl(contract: DataContractSpecification) -> str:
|
|
12
|
-
"""Serialize the provided data contract specification into an Avro IDL string.
|
|
13
|
-
|
|
14
|
-
The data contract will be serialized as a protocol, with one record type
|
|
15
|
-
for each contained model. Model fields are mapped one-to-one to Avro IDL
|
|
16
|
-
record fields.
|
|
17
|
-
"""
|
|
18
|
-
stream = StringIO()
|
|
19
|
-
to_avro_idl_stream(contract, stream)
|
|
20
|
-
return stream.getvalue()
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextIO):
|
|
24
|
-
"""Serialize the provided data contract specification into Avro IDL."""
|
|
25
|
-
ir = _contract_to_avro_idl_ir(contract)
|
|
26
|
-
if ir.description:
|
|
27
|
-
stream.write(f"/** {contract.info.description} */\n")
|
|
28
|
-
stream.write(f"protocol {ir.name or 'Unnamed'} {{\n")
|
|
29
|
-
for model_type in ir.model_types:
|
|
30
|
-
_write_model_type(model_type, stream)
|
|
31
|
-
stream.write("}\n")
|
|
10
|
+
from datacontract.export.exporter import Exporter
|
|
32
11
|
|
|
33
12
|
|
|
34
13
|
class AvroPrimitiveType(Enum):
|
|
@@ -107,6 +86,34 @@ avro_primitive_types = set(
|
|
|
107
86
|
)
|
|
108
87
|
|
|
109
88
|
|
|
89
|
+
class AvroIdlExporter(Exporter):
|
|
90
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
91
|
+
return to_avro_idl(data_contract)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def to_avro_idl(contract: DataContractSpecification) -> str:
|
|
95
|
+
"""Serialize the provided data contract specification into an Avro IDL string.
|
|
96
|
+
|
|
97
|
+
The data contract will be serialized as a protocol, with one record type
|
|
98
|
+
for each contained model. Model fields are mapped one-to-one to Avro IDL
|
|
99
|
+
record fields.
|
|
100
|
+
"""
|
|
101
|
+
stream = StringIO()
|
|
102
|
+
to_avro_idl_stream(contract, stream)
|
|
103
|
+
return stream.getvalue()
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextIO):
|
|
107
|
+
"""Serialize the provided data contract specification into Avro IDL."""
|
|
108
|
+
ir = _contract_to_avro_idl_ir(contract)
|
|
109
|
+
if ir.description:
|
|
110
|
+
stream.write(f"/** {contract.info.description} */\n")
|
|
111
|
+
stream.write(f"protocol {ir.name or 'Unnamed'} {{\n")
|
|
112
|
+
for model_type in ir.model_types:
|
|
113
|
+
_write_model_type(model_type, stream)
|
|
114
|
+
stream.write("}\n")
|
|
115
|
+
|
|
116
|
+
|
|
110
117
|
def _to_avro_primitive_logical_type(field_name: str, field: Field) -> AvroPrimitiveField:
|
|
111
118
|
result = AvroPrimitiveField(field_name, field.required, field.description, AvroPrimitiveType.string)
|
|
112
119
|
match field.type:
|
|
@@ -5,6 +5,21 @@ from typing import Dict, List
|
|
|
5
5
|
from datacontract.model.data_contract_specification import Model, Field, Server
|
|
6
6
|
from datacontract.model.exceptions import DataContractException
|
|
7
7
|
|
|
8
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BigQueryExporter(Exporter):
|
|
12
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
13
|
+
self.dict_args = export_args
|
|
14
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
15
|
+
found_server = data_contract.servers.get(server)
|
|
16
|
+
if found_server is None:
|
|
17
|
+
raise RuntimeError("Export to bigquery requires selecting a bigquery server from the data contract.")
|
|
18
|
+
if found_server.type != "bigquery":
|
|
19
|
+
raise RuntimeError("Export to bigquery requires selecting a bigquery server from the data contract.")
|
|
20
|
+
|
|
21
|
+
return to_bigquery_json(model_name, model_value, found_server)
|
|
22
|
+
|
|
8
23
|
|
|
9
24
|
def to_bigquery_json(model_name: str, model_value: Model, server: Server) -> str:
|
|
10
25
|
bigquery_table = to_bigquery_schema(model_name, model_value, server)
|
|
@@ -8,6 +8,15 @@ import datacontract.model.data_contract_specification as spec
|
|
|
8
8
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
from datacontract.export.exporter import Exporter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DbmlExporter(Exporter):
|
|
15
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
16
|
+
found_server = data_contract.servers.get(server)
|
|
17
|
+
return to_dbml_diagram(data_contract, found_server)
|
|
18
|
+
|
|
19
|
+
|
|
11
20
|
def to_dbml_diagram(contract: spec.DataContractSpecification, server: spec.Server) -> str:
|
|
12
21
|
result = ""
|
|
13
22
|
result += add_generated_info(contract, server) + "\n"
|
|
@@ -5,6 +5,28 @@ import yaml
|
|
|
5
5
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
6
6
|
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
7
7
|
|
|
8
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DbtExporter(Exporter):
|
|
12
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
13
|
+
return to_dbt_models_yaml(data_contract)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DbtSourceExporter(Exporter):
|
|
17
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
18
|
+
return to_dbt_sources_yaml(data_contract, server)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DbtStageExporter(Exporter):
|
|
22
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
23
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
24
|
+
return to_dbt_staging_sql(
|
|
25
|
+
data_contract,
|
|
26
|
+
model_name,
|
|
27
|
+
model_value,
|
|
28
|
+
)
|
|
29
|
+
|
|
8
30
|
|
|
9
31
|
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
|
|
10
32
|
dbt = {
|
|
@@ -19,7 +41,10 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
|
|
|
19
41
|
|
|
20
42
|
def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
|
|
21
43
|
if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
|
|
22
|
-
print(
|
|
44
|
+
print(
|
|
45
|
+
"Export to dbt-staging-sql currently only works with exactly one model in the data contract."
|
|
46
|
+
"Please specify the model name."
|
|
47
|
+
)
|
|
23
48
|
return ""
|
|
24
49
|
|
|
25
50
|
id = data_contract_spec.id
|