datacontract-cli 0.10.7__py3-none-any.whl → 0.10.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/catalog/catalog.py +4 -2
- datacontract/cli.py +44 -15
- datacontract/data_contract.py +52 -206
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +13 -1
- datacontract/engines/soda/check_soda_execute.py +9 -2
- datacontract/engines/soda/connections/bigquery.py +8 -1
- datacontract/engines/soda/connections/duckdb.py +28 -12
- datacontract/engines/soda/connections/trino.py +26 -0
- datacontract/export/__init__.py +0 -0
- datacontract/export/avro_converter.py +15 -3
- datacontract/export/avro_idl_converter.py +29 -22
- datacontract/export/bigquery_converter.py +15 -0
- datacontract/export/dbml_converter.py +9 -0
- datacontract/export/dbt_converter.py +26 -1
- datacontract/export/exporter.py +88 -0
- datacontract/export/exporter_factory.py +145 -0
- datacontract/export/go_converter.py +6 -0
- datacontract/export/great_expectations_converter.py +10 -0
- datacontract/export/html_export.py +6 -0
- datacontract/export/jsonschema_converter.py +31 -23
- datacontract/export/odcs_converter.py +24 -1
- datacontract/export/protobuf_converter.py +6 -0
- datacontract/export/pydantic_converter.py +6 -0
- datacontract/export/rdf_converter.py +9 -0
- datacontract/export/sodacl_converter.py +23 -12
- datacontract/export/spark_converter.py +211 -0
- datacontract/export/sql_converter.py +32 -2
- datacontract/export/sql_type_converter.py +32 -5
- datacontract/export/terraform_converter.py +6 -0
- datacontract/imports/avro_importer.py +8 -0
- datacontract/imports/bigquery_importer.py +47 -4
- datacontract/imports/glue_importer.py +122 -30
- datacontract/imports/importer.py +29 -0
- datacontract/imports/importer_factory.py +72 -0
- datacontract/imports/jsonschema_importer.py +8 -0
- datacontract/imports/odcs_importer.py +200 -0
- datacontract/imports/sql_importer.py +8 -0
- datacontract/imports/unity_importer.py +152 -0
- datacontract/lint/resolve.py +22 -1
- datacontract/model/data_contract_specification.py +36 -4
- datacontract/templates/datacontract.html +17 -2
- datacontract/templates/partials/datacontract_information.html +20 -0
- datacontract/templates/partials/datacontract_terms.html +7 -0
- datacontract/templates/partials/definition.html +9 -1
- datacontract/templates/partials/model_field.html +23 -6
- datacontract/templates/partials/server.html +113 -48
- datacontract/templates/style/output.css +51 -0
- datacontract/web.py +17 -0
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/METADATA +298 -59
- datacontract_cli-0.10.9.dist-info/RECORD +93 -0
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/WHEEL +1 -1
- datacontract_cli-0.10.7.dist-info/RECORD +0 -84
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/top_level.txt +0 -0
datacontract/catalog/catalog.py
CHANGED
|
@@ -10,8 +10,10 @@ from datacontract.export.html_export import get_version
|
|
|
10
10
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def create_data_contract_html(contracts, file: Path, path: Path):
|
|
14
|
-
data_contract = DataContract(
|
|
13
|
+
def create_data_contract_html(contracts, file: Path, path: Path, schema: str):
|
|
14
|
+
data_contract = DataContract(
|
|
15
|
+
data_contract_file=f"{file.absolute()}", inline_definitions=True, inline_quality=True, schema_location=schema
|
|
16
|
+
)
|
|
15
17
|
html = data_contract.export(export_format="html")
|
|
16
18
|
spec = data_contract.get_data_contract_specification()
|
|
17
19
|
file_without_suffix = file.with_suffix(".html")
|
datacontract/cli.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
1
|
from importlib import metadata
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
from typing import Iterable, Optional
|
|
5
4
|
from typing import List
|
|
6
5
|
|
|
7
6
|
import typer
|
|
7
|
+
import uvicorn
|
|
8
8
|
from click import Context
|
|
9
9
|
from rich import box
|
|
10
10
|
from rich.console import Console
|
|
@@ -12,11 +12,15 @@ from rich.table import Table
|
|
|
12
12
|
from typer.core import TyperGroup
|
|
13
13
|
from typing_extensions import Annotated
|
|
14
14
|
|
|
15
|
+
from datacontract import web
|
|
15
16
|
from datacontract.catalog.catalog import create_index_html, create_data_contract_html
|
|
16
17
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
18
|
+
from datacontract.imports.importer import ImportFormat
|
|
17
19
|
from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
|
|
18
20
|
from datacontract.publish.publish import publish_to_datamesh_manager
|
|
19
21
|
|
|
22
|
+
DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
|
|
23
|
+
|
|
20
24
|
console = Console()
|
|
21
25
|
|
|
22
26
|
|
|
@@ -84,7 +88,7 @@ def lint(
|
|
|
84
88
|
] = "datacontract.yaml",
|
|
85
89
|
schema: Annotated[
|
|
86
90
|
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
87
|
-
] =
|
|
91
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
88
92
|
):
|
|
89
93
|
"""
|
|
90
94
|
Validate that the datacontract.yaml is correctly formatted.
|
|
@@ -100,7 +104,7 @@ def test(
|
|
|
100
104
|
] = "datacontract.yaml",
|
|
101
105
|
schema: Annotated[
|
|
102
106
|
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
103
|
-
] =
|
|
107
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
104
108
|
server: Annotated[
|
|
105
109
|
str,
|
|
106
110
|
typer.Option(
|
|
@@ -175,14 +179,18 @@ def export(
|
|
|
175
179
|
location: Annotated[
|
|
176
180
|
str, typer.Argument(help="The location (url or path) of the data contract yaml.")
|
|
177
181
|
] = "datacontract.yaml",
|
|
182
|
+
schema: Annotated[
|
|
183
|
+
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
184
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
178
185
|
):
|
|
179
186
|
"""
|
|
180
187
|
Convert data contract to a specific format. console.prints to stdout.
|
|
181
188
|
"""
|
|
182
189
|
# TODO exception handling
|
|
183
|
-
result = DataContract(data_contract_file=location, server=server).export(
|
|
190
|
+
result = DataContract(data_contract_file=location, schema_location=schema, server=server).export(
|
|
184
191
|
export_format=format,
|
|
185
192
|
model=model,
|
|
193
|
+
server=server,
|
|
186
194
|
rdf_base=rdf_base,
|
|
187
195
|
sql_server_type=sql_server_type,
|
|
188
196
|
)
|
|
@@ -195,14 +203,6 @@ def export(
|
|
|
195
203
|
console.print(f"Written result to {output}")
|
|
196
204
|
|
|
197
205
|
|
|
198
|
-
class ImportFormat(str, Enum):
|
|
199
|
-
sql = "sql"
|
|
200
|
-
avro = "avro"
|
|
201
|
-
glue = "glue"
|
|
202
|
-
bigquery = "bigquery"
|
|
203
|
-
jsonschema = "jsonschema"
|
|
204
|
-
|
|
205
|
-
|
|
206
206
|
@app.command(name="import")
|
|
207
207
|
def import_(
|
|
208
208
|
format: Annotated[ImportFormat, typer.Option(help="The format of the source file.")],
|
|
@@ -223,11 +223,22 @@ def import_(
|
|
|
223
223
|
help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
|
|
224
224
|
),
|
|
225
225
|
] = None,
|
|
226
|
+
unity_table_full_name: Annotated[
|
|
227
|
+
Optional[str], typer.Option(help="Full name of a table in the unity catalog")
|
|
228
|
+
] = None,
|
|
226
229
|
):
|
|
227
230
|
"""
|
|
228
231
|
Create a data contract from the given source location. Prints to stdout.
|
|
229
232
|
"""
|
|
230
|
-
result = DataContract().import_from_source(
|
|
233
|
+
result = DataContract().import_from_source(
|
|
234
|
+
format=format,
|
|
235
|
+
source=source,
|
|
236
|
+
glue_table=glue_table,
|
|
237
|
+
bigquery_table=bigquery_table,
|
|
238
|
+
bigquery_project=bigquery_project,
|
|
239
|
+
bigquery_dataset=bigquery_dataset,
|
|
240
|
+
unity_table_full_name=unity_table_full_name,
|
|
241
|
+
)
|
|
231
242
|
console.print(result.to_yaml())
|
|
232
243
|
|
|
233
244
|
|
|
@@ -236,12 +247,15 @@ def publish(
|
|
|
236
247
|
location: Annotated[
|
|
237
248
|
str, typer.Argument(help="The location (url or path) of the data contract yaml.")
|
|
238
249
|
] = "datacontract.yaml",
|
|
250
|
+
schema: Annotated[
|
|
251
|
+
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
252
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
239
253
|
):
|
|
240
254
|
"""
|
|
241
255
|
Publish the data contract to the Data Mesh Manager.
|
|
242
256
|
"""
|
|
243
257
|
publish_to_datamesh_manager(
|
|
244
|
-
data_contract=DataContract(data_contract_file=location),
|
|
258
|
+
data_contract=DataContract(data_contract_file=location, schema_location=schema),
|
|
245
259
|
)
|
|
246
260
|
|
|
247
261
|
|
|
@@ -251,6 +265,9 @@ def catalog(
|
|
|
251
265
|
Optional[str], typer.Option(help="Glob pattern for the data contract files to include in the catalog.")
|
|
252
266
|
] = "*.yaml",
|
|
253
267
|
output: Annotated[Optional[str], typer.Option(help="Output directory for the catalog html files.")] = "catalog/",
|
|
268
|
+
schema: Annotated[
|
|
269
|
+
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
270
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
254
271
|
):
|
|
255
272
|
"""
|
|
256
273
|
Create an html catalog of data contracts.
|
|
@@ -262,7 +279,7 @@ def catalog(
|
|
|
262
279
|
contracts = []
|
|
263
280
|
for file in Path().glob(files):
|
|
264
281
|
try:
|
|
265
|
-
create_data_contract_html(contracts, file, path)
|
|
282
|
+
create_data_contract_html(contracts, file, path, schema)
|
|
266
283
|
except Exception as e:
|
|
267
284
|
console.print(f"Skipped {file} due to error: {e}")
|
|
268
285
|
|
|
@@ -323,6 +340,18 @@ def diff(
|
|
|
323
340
|
console.print(result.changelog_str())
|
|
324
341
|
|
|
325
342
|
|
|
343
|
+
@app.command()
|
|
344
|
+
def serve(
|
|
345
|
+
port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
|
|
346
|
+
host: Annotated[str, typer.Option(help="Bind socket to this host.")] = "127.0.0.1",
|
|
347
|
+
):
|
|
348
|
+
"""
|
|
349
|
+
Start the datacontract web server.
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
uvicorn.run(web.app, port=port, host=host)
|
|
353
|
+
|
|
354
|
+
|
|
326
355
|
def _handle_result(run):
|
|
327
356
|
_print_table(run)
|
|
328
357
|
if run.result == "passed":
|
datacontract/data_contract.py
CHANGED
|
@@ -2,7 +2,6 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import tempfile
|
|
4
4
|
import typing
|
|
5
|
-
from enum import Enum
|
|
6
5
|
|
|
7
6
|
import yaml
|
|
8
7
|
from pyspark.sql import SparkSession
|
|
@@ -13,27 +12,10 @@ from datacontract.engines.datacontract.check_that_datacontract_contains_valid_se
|
|
|
13
12
|
)
|
|
14
13
|
from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
|
|
15
14
|
from datacontract.engines.soda.check_soda_execute import check_soda_execute
|
|
16
|
-
from datacontract.export.
|
|
17
|
-
from datacontract.export.
|
|
18
|
-
from datacontract.
|
|
19
|
-
|
|
20
|
-
from datacontract.export.dbt_converter import to_dbt_models_yaml, to_dbt_sources_yaml, to_dbt_staging_sql
|
|
21
|
-
from datacontract.export.go_converter import to_go_types
|
|
22
|
-
from datacontract.export.great_expectations_converter import to_great_expectations
|
|
23
|
-
from datacontract.export.html_export import to_html
|
|
24
|
-
from datacontract.export.jsonschema_converter import to_jsonschema_json
|
|
25
|
-
from datacontract.export.odcs_converter import to_odcs_yaml
|
|
26
|
-
from datacontract.export.protobuf_converter import to_protobuf
|
|
27
|
-
from datacontract.export.pydantic_converter import to_pydantic_model_str
|
|
28
|
-
from datacontract.export.rdf_converter import to_rdf_n3
|
|
29
|
-
from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
30
|
-
from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
|
|
31
|
-
from datacontract.export.terraform_converter import to_terraform
|
|
32
|
-
from datacontract.imports.avro_importer import import_avro
|
|
33
|
-
from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json
|
|
34
|
-
from datacontract.imports.glue_importer import import_glue
|
|
35
|
-
from datacontract.imports.jsonschema_importer import import_jsonschema
|
|
36
|
-
from datacontract.imports.sql_importer import import_sql
|
|
15
|
+
from datacontract.export.exporter import ExportFormat
|
|
16
|
+
from datacontract.export.exporter_factory import exporter_factory
|
|
17
|
+
from datacontract.imports.importer_factory import importer_factory
|
|
18
|
+
|
|
37
19
|
from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
|
|
38
20
|
from datacontract.integration.publish_opentelemetry import publish_opentelemetry
|
|
39
21
|
from datacontract.lint import resolve
|
|
@@ -50,28 +32,6 @@ from datacontract.model.exceptions import DataContractException
|
|
|
50
32
|
from datacontract.model.run import Run, Check
|
|
51
33
|
|
|
52
34
|
|
|
53
|
-
class ExportFormat(str, Enum):
|
|
54
|
-
jsonschema = "jsonschema"
|
|
55
|
-
pydantic_model = "pydantic-model"
|
|
56
|
-
sodacl = "sodacl"
|
|
57
|
-
dbt = "dbt"
|
|
58
|
-
dbt_sources = "dbt-sources"
|
|
59
|
-
dbt_staging_sql = "dbt-staging-sql"
|
|
60
|
-
odcs = "odcs"
|
|
61
|
-
rdf = "rdf"
|
|
62
|
-
avro = "avro"
|
|
63
|
-
protobuf = "protobuf"
|
|
64
|
-
great_expectations = "great-expectations"
|
|
65
|
-
terraform = "terraform"
|
|
66
|
-
avro_idl = "avro-idl"
|
|
67
|
-
sql = "sql"
|
|
68
|
-
sql_query = "sql-query"
|
|
69
|
-
html = "html"
|
|
70
|
-
go = "go"
|
|
71
|
-
bigquery = "bigquery"
|
|
72
|
-
dbml = "dbml"
|
|
73
|
-
|
|
74
|
-
|
|
75
35
|
class DataContract:
|
|
76
36
|
def __init__(
|
|
77
37
|
self,
|
|
@@ -207,6 +167,9 @@ class DataContract:
|
|
|
207
167
|
if self._examples:
|
|
208
168
|
server_name = "examples"
|
|
209
169
|
server = self._get_examples_server(data_contract, run, tmp_dir)
|
|
170
|
+
elif self._server:
|
|
171
|
+
server_name = self._server
|
|
172
|
+
server = data_contract.servers.get(server_name)
|
|
210
173
|
else:
|
|
211
174
|
server_name = list(data_contract.servers.keys())[0]
|
|
212
175
|
server = data_contract.servers.get(server_name)
|
|
@@ -260,6 +223,38 @@ class DataContract:
|
|
|
260
223
|
|
|
261
224
|
return run
|
|
262
225
|
|
|
226
|
+
def _get_examples_server(self, data_contract, run, tmp_dir):
|
|
227
|
+
run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
|
|
228
|
+
format = "json"
|
|
229
|
+
for example in data_contract.examples:
|
|
230
|
+
format = example.type
|
|
231
|
+
p = f"{tmp_dir}/{example.model}.{format}"
|
|
232
|
+
run.log_info(f"Creating example file {p}")
|
|
233
|
+
with open(p, "w") as f:
|
|
234
|
+
content = ""
|
|
235
|
+
if format == "json" and isinstance(example.data, list):
|
|
236
|
+
content = json.dumps(example.data)
|
|
237
|
+
elif format == "json" and isinstance(example.data, str):
|
|
238
|
+
content = example.data
|
|
239
|
+
elif format == "yaml" and isinstance(example.data, list):
|
|
240
|
+
content = yaml.dump(example.data, allow_unicode=True)
|
|
241
|
+
elif format == "yaml" and isinstance(example.data, str):
|
|
242
|
+
content = example.data
|
|
243
|
+
elif format == "csv":
|
|
244
|
+
content = example.data
|
|
245
|
+
logging.debug(f"Content of example file {p}: {content}")
|
|
246
|
+
f.write(content)
|
|
247
|
+
path = f"{tmp_dir}" + "/{model}." + format
|
|
248
|
+
delimiter = "array"
|
|
249
|
+
server = Server(
|
|
250
|
+
type="local",
|
|
251
|
+
path=path,
|
|
252
|
+
format=format,
|
|
253
|
+
delimiter=delimiter,
|
|
254
|
+
)
|
|
255
|
+
run.log_info(f"Using {server} for testing the examples")
|
|
256
|
+
return server
|
|
257
|
+
|
|
263
258
|
def breaking(self, other: "DataContract") -> BreakingChanges:
|
|
264
259
|
return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
|
|
265
260
|
|
|
@@ -301,178 +296,29 @@ class DataContract:
|
|
|
301
296
|
inline_quality=self._inline_quality,
|
|
302
297
|
)
|
|
303
298
|
|
|
304
|
-
def export(
|
|
305
|
-
self, export_format: ExportFormat, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto"
|
|
306
|
-
) -> str:
|
|
299
|
+
def export(self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs) -> str:
|
|
307
300
|
data_contract = resolve.resolve_data_contract(
|
|
308
301
|
self._data_contract_file,
|
|
309
302
|
self._data_contract_str,
|
|
310
303
|
self._data_contract,
|
|
304
|
+
schema_location=self._schema_location,
|
|
311
305
|
inline_definitions=True,
|
|
312
306
|
inline_quality=True,
|
|
313
307
|
)
|
|
314
|
-
if export_format == "jsonschema":
|
|
315
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
316
|
-
return to_jsonschema_json(model_name, model_value)
|
|
317
|
-
if export_format == "sodacl":
|
|
318
|
-
return to_sodacl_yaml(data_contract)
|
|
319
|
-
if export_format == "dbt":
|
|
320
|
-
return to_dbt_models_yaml(data_contract)
|
|
321
|
-
if export_format == "dbt-sources":
|
|
322
|
-
return to_dbt_sources_yaml(data_contract, self._server)
|
|
323
|
-
if export_format == "dbt-staging-sql":
|
|
324
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
325
|
-
return to_dbt_staging_sql(data_contract, model_name, model_value)
|
|
326
|
-
if export_format == "odcs":
|
|
327
|
-
return to_odcs_yaml(data_contract)
|
|
328
|
-
if export_format == "rdf":
|
|
329
|
-
return to_rdf_n3(data_contract, rdf_base)
|
|
330
|
-
if export_format == "protobuf":
|
|
331
|
-
return to_protobuf(data_contract)
|
|
332
|
-
if export_format == "avro":
|
|
333
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
334
|
-
return to_avro_schema_json(model_name, model_value)
|
|
335
|
-
if export_format == "avro-idl":
|
|
336
|
-
return to_avro_idl(data_contract)
|
|
337
|
-
if export_format == "terraform":
|
|
338
|
-
return to_terraform(data_contract)
|
|
339
|
-
if export_format == "sql":
|
|
340
|
-
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
341
|
-
return to_sql_ddl(data_contract, server_type=server_type)
|
|
342
|
-
if export_format == "sql-query":
|
|
343
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
344
|
-
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
345
|
-
return to_sql_query(data_contract, model_name, model_value, server_type)
|
|
346
|
-
if export_format == "great-expectations":
|
|
347
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
348
|
-
return to_great_expectations(data_contract, model_name)
|
|
349
|
-
if export_format == "pydantic-model":
|
|
350
|
-
return to_pydantic_model_str(data_contract)
|
|
351
|
-
if export_format == "html":
|
|
352
|
-
return to_html(data_contract)
|
|
353
|
-
if export_format == "go":
|
|
354
|
-
return to_go_types(data_contract)
|
|
355
|
-
if export_format == "bigquery":
|
|
356
|
-
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
357
|
-
found_server = data_contract.servers.get(self._server)
|
|
358
|
-
if found_server is None:
|
|
359
|
-
raise RuntimeError(
|
|
360
|
-
f"Export to {export_format} requires selecting a bigquery server from the data contract."
|
|
361
|
-
)
|
|
362
|
-
if found_server.type != "bigquery":
|
|
363
|
-
raise RuntimeError(
|
|
364
|
-
f"Export to {export_format} requires selecting a bigquery server from the data contract."
|
|
365
|
-
)
|
|
366
|
-
return to_bigquery_json(model_name, model_value, found_server)
|
|
367
|
-
if export_format == "dbml":
|
|
368
|
-
found_server = data_contract.servers.get(self._server)
|
|
369
|
-
return to_dbml_diagram(data_contract, found_server)
|
|
370
|
-
else:
|
|
371
|
-
print(f"Export format {export_format} not supported.")
|
|
372
|
-
return ""
|
|
373
|
-
|
|
374
|
-
def _determine_sql_server_type(self, data_contract: DataContractSpecification, sql_server_type: str):
|
|
375
|
-
if sql_server_type == "auto":
|
|
376
|
-
if data_contract.servers is None or len(data_contract.servers) == 0:
|
|
377
|
-
raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
|
|
378
|
-
|
|
379
|
-
server_types = set([server.type for server in data_contract.servers.values()])
|
|
380
|
-
if "snowflake" in server_types:
|
|
381
|
-
return "snowflake"
|
|
382
|
-
elif "postgres" in server_types:
|
|
383
|
-
return "postgres"
|
|
384
|
-
elif "databricks" in server_types:
|
|
385
|
-
return "databricks"
|
|
386
|
-
else:
|
|
387
|
-
# default to snowflake dialect
|
|
388
|
-
return "snowflake"
|
|
389
|
-
else:
|
|
390
|
-
return sql_server_type
|
|
391
308
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
run.log_info(f"Creating example file {p}")
|
|
399
|
-
with open(p, "w") as f:
|
|
400
|
-
content = ""
|
|
401
|
-
if format == "json" and isinstance(example.data, list):
|
|
402
|
-
content = json.dumps(example.data)
|
|
403
|
-
elif format == "json" and isinstance(example.data, str):
|
|
404
|
-
content = example.data
|
|
405
|
-
elif format == "yaml" and isinstance(example.data, list):
|
|
406
|
-
content = yaml.dump(example.data, allow_unicode=True)
|
|
407
|
-
elif format == "yaml" and isinstance(example.data, str):
|
|
408
|
-
content = example.data
|
|
409
|
-
elif format == "csv":
|
|
410
|
-
content = example.data
|
|
411
|
-
logging.debug(f"Content of example file {p}: {content}")
|
|
412
|
-
f.write(content)
|
|
413
|
-
path = f"{tmp_dir}" + "/{model}." + format
|
|
414
|
-
delimiter = "array"
|
|
415
|
-
server = Server(
|
|
416
|
-
type="local",
|
|
417
|
-
path=path,
|
|
418
|
-
format=format,
|
|
419
|
-
delimiter=delimiter,
|
|
309
|
+
return exporter_factory.create(export_format).export(
|
|
310
|
+
data_contract=data_contract,
|
|
311
|
+
model=model,
|
|
312
|
+
server=self._server,
|
|
313
|
+
sql_server_type=sql_server_type,
|
|
314
|
+
export_args=kwargs,
|
|
420
315
|
)
|
|
421
|
-
run.log_info(f"Using {server} for testing the examples")
|
|
422
|
-
return server
|
|
423
|
-
|
|
424
|
-
def _check_models_for_export(
|
|
425
|
-
self, data_contract: DataContractSpecification, model: str, export_format: str
|
|
426
|
-
) -> typing.Tuple[str, str]:
|
|
427
|
-
if data_contract.models is None:
|
|
428
|
-
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
429
|
-
|
|
430
|
-
model_names = list(data_contract.models.keys())
|
|
431
|
-
|
|
432
|
-
if model == "all":
|
|
433
|
-
if len(data_contract.models.items()) != 1:
|
|
434
|
-
raise RuntimeError(
|
|
435
|
-
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
436
|
-
)
|
|
437
|
-
|
|
438
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
439
|
-
else:
|
|
440
|
-
model_name = model
|
|
441
|
-
model_value = data_contract.models.get(model_name)
|
|
442
|
-
if model_value is None:
|
|
443
|
-
raise RuntimeError(
|
|
444
|
-
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
445
|
-
)
|
|
446
|
-
|
|
447
|
-
return model_name, model_value
|
|
448
316
|
|
|
449
317
|
def import_from_source(
|
|
450
|
-
self,
|
|
451
|
-
format: str,
|
|
452
|
-
source: typing.Optional[str] = None,
|
|
453
|
-
glue_tables: typing.Optional[typing.List[str]] = None,
|
|
454
|
-
bigquery_tables: typing.Optional[typing.List[str]] = None,
|
|
455
|
-
bigquery_project: typing.Optional[str] = None,
|
|
456
|
-
bigquery_dataset: typing.Optional[str] = None,
|
|
318
|
+
self, format: str, source: typing.Optional[str] = None, **kwargs
|
|
457
319
|
) -> DataContractSpecification:
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
if format == "sql":
|
|
461
|
-
data_contract_specification = import_sql(data_contract_specification, format, source)
|
|
462
|
-
elif format == "avro":
|
|
463
|
-
data_contract_specification = import_avro(data_contract_specification, source)
|
|
464
|
-
elif format == "glue":
|
|
465
|
-
data_contract_specification = import_glue(data_contract_specification, source, glue_tables)
|
|
466
|
-
elif format == "jsonschema":
|
|
467
|
-
data_contract_specification = import_jsonschema(data_contract_specification, source)
|
|
468
|
-
elif format == "bigquery":
|
|
469
|
-
if source is not None:
|
|
470
|
-
data_contract_specification = import_bigquery_from_json(data_contract_specification, source)
|
|
471
|
-
else:
|
|
472
|
-
data_contract_specification = import_bigquery_from_api(
|
|
473
|
-
data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset
|
|
474
|
-
)
|
|
475
|
-
else:
|
|
476
|
-
print(f"Import format {format} not supported.")
|
|
320
|
+
data_contract_specification_initial = DataContract.init()
|
|
477
321
|
|
|
478
|
-
return
|
|
322
|
+
return importer_factory.create(format).import_source(
|
|
323
|
+
data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
|
|
324
|
+
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
from datacontract.model.exceptions import DataContractException
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def yield_s3_files(s3_endpoint_url, s3_location):
|
|
@@ -14,6 +14,18 @@ def yield_s3_files(s3_endpoint_url, s3_location):
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def s3_fs(s3_endpoint_url):
|
|
17
|
+
try:
|
|
18
|
+
import s3fs
|
|
19
|
+
except ImportError as e:
|
|
20
|
+
raise DataContractException(
|
|
21
|
+
type="schema",
|
|
22
|
+
result="failed",
|
|
23
|
+
name="s3 extra missing",
|
|
24
|
+
reason="Install the extra datacontract-cli\[s3] to use s3",
|
|
25
|
+
engine="datacontract",
|
|
26
|
+
original_exception=e,
|
|
27
|
+
)
|
|
28
|
+
|
|
17
29
|
aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
|
|
18
30
|
aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
|
|
19
31
|
return s3fs.S3FileSystem(
|
|
@@ -10,6 +10,7 @@ from datacontract.engines.soda.connections.kafka import create_spark_session, re
|
|
|
10
10
|
from datacontract.engines.soda.connections.postgres import to_postgres_soda_configuration
|
|
11
11
|
from datacontract.engines.soda.connections.snowflake import to_snowflake_soda_configuration
|
|
12
12
|
from datacontract.engines.soda.connections.sqlserver import to_sqlserver_soda_configuration
|
|
13
|
+
from datacontract.engines.soda.connections.trino import to_trino_soda_configuration
|
|
13
14
|
from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
14
15
|
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
15
16
|
from datacontract.model.run import Run, Check, Log
|
|
@@ -66,8 +67,10 @@ def check_soda_execute(
|
|
|
66
67
|
scan.set_data_source_name(server.type)
|
|
67
68
|
elif server.type == "dataframe":
|
|
68
69
|
if spark is None:
|
|
69
|
-
run.log_warn(
|
|
70
|
-
|
|
70
|
+
run.log_warn(
|
|
71
|
+
"Server type dataframe only works with the Python library and requires a Spark session, "
|
|
72
|
+
"please provide one with the DataContract class"
|
|
73
|
+
)
|
|
71
74
|
return
|
|
72
75
|
else:
|
|
73
76
|
logging.info("Use Spark to connect to data source")
|
|
@@ -83,6 +86,10 @@ def check_soda_execute(
|
|
|
83
86
|
soda_configuration_str = to_sqlserver_soda_configuration(server)
|
|
84
87
|
scan.add_configuration_yaml_str(soda_configuration_str)
|
|
85
88
|
scan.set_data_source_name(server.type)
|
|
89
|
+
elif server.type == "trino":
|
|
90
|
+
soda_configuration_str = to_trino_soda_configuration(server)
|
|
91
|
+
scan.add_configuration_yaml_str(soda_configuration_str)
|
|
92
|
+
scan.set_data_source_name(server.type)
|
|
86
93
|
|
|
87
94
|
else:
|
|
88
95
|
run.checks.append(
|
|
@@ -6,10 +6,17 @@ import yaml
|
|
|
6
6
|
# https://docs.soda.io/soda/connect-bigquery.html#authentication-methods
|
|
7
7
|
def to_bigquery_soda_configuration(server):
|
|
8
8
|
# with service account key, using an external json file
|
|
9
|
+
|
|
10
|
+
# check for our own environment variable first
|
|
11
|
+
account_info = os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH")
|
|
12
|
+
if account_info is None:
|
|
13
|
+
# but as a fallback look for the default google one
|
|
14
|
+
account_info = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
|
15
|
+
|
|
9
16
|
soda_configuration = {
|
|
10
17
|
f"data_source {server.type}": {
|
|
11
18
|
"type": "bigquery",
|
|
12
|
-
"account_info_json_path":
|
|
19
|
+
"account_info_json_path": account_info,
|
|
13
20
|
"auth_scopes": ["https://www.googleapis.com/auth/bigquery"],
|
|
14
21
|
"project_id": server.project,
|
|
15
22
|
"dataset": server.dataset,
|
|
@@ -80,6 +80,7 @@ def setup_s3_connection(con, server):
|
|
|
80
80
|
s3_region = os.getenv("DATACONTRACT_S3_REGION")
|
|
81
81
|
s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
|
|
82
82
|
s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
|
|
83
|
+
s3_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
|
|
83
84
|
s3_endpoint = "s3.amazonaws.com"
|
|
84
85
|
use_ssl = "true"
|
|
85
86
|
url_style = "vhost"
|
|
@@ -90,18 +91,33 @@ def setup_s3_connection(con, server):
|
|
|
90
91
|
url_style = "path"
|
|
91
92
|
|
|
92
93
|
if s3_access_key_id is not None:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
94
|
+
if s3_session_token is not None:
|
|
95
|
+
con.sql(f"""
|
|
96
|
+
CREATE OR REPLACE SECRET s3_secret (
|
|
97
|
+
TYPE S3,
|
|
98
|
+
PROVIDER CREDENTIAL_CHAIN,
|
|
99
|
+
REGION '{s3_region}',
|
|
100
|
+
KEY_ID '{s3_access_key_id}',
|
|
101
|
+
SECRET '{s3_secret_access_key}',
|
|
102
|
+
SESSION_TOKEN '{s3_session_token}',
|
|
103
|
+
ENDPOINT '{s3_endpoint}',
|
|
104
|
+
USE_SSL '{use_ssl}',
|
|
105
|
+
URL_STYLE '{url_style}'
|
|
106
|
+
);
|
|
107
|
+
""")
|
|
108
|
+
else:
|
|
109
|
+
con.sql(f"""
|
|
110
|
+
CREATE OR REPLACE SECRET s3_secret (
|
|
111
|
+
TYPE S3,
|
|
112
|
+
PROVIDER CREDENTIAL_CHAIN,
|
|
113
|
+
REGION '{s3_region}',
|
|
114
|
+
KEY_ID '{s3_access_key_id}',
|
|
115
|
+
SECRET '{s3_secret_access_key}',
|
|
116
|
+
ENDPOINT '{s3_endpoint}',
|
|
117
|
+
USE_SSL '{use_ssl}',
|
|
118
|
+
URL_STYLE '{url_style}'
|
|
119
|
+
);
|
|
120
|
+
""")
|
|
105
121
|
|
|
106
122
|
# con.sql(f"""
|
|
107
123
|
# SET s3_region = '{s3_region}';
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def to_trino_soda_configuration(server):
|
|
7
|
+
password = os.getenv("DATACONTRACT_TRINO_PASSWORD")
|
|
8
|
+
username = os.getenv("DATACONTRACT_TRINO_USERNAME")
|
|
9
|
+
|
|
10
|
+
data_source = {
|
|
11
|
+
"type": "trino",
|
|
12
|
+
"host": server.host,
|
|
13
|
+
"port": str(server.port),
|
|
14
|
+
"username": username,
|
|
15
|
+
"password": password,
|
|
16
|
+
"catalog": server.catalog,
|
|
17
|
+
"schema": server.schema_,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
if password is None or password == "":
|
|
21
|
+
data_source["auth_type"] = "NoAuthentication" # default is BasicAuthentication
|
|
22
|
+
|
|
23
|
+
soda_configuration = {f"data_source {server.type}": data_source}
|
|
24
|
+
|
|
25
|
+
soda_configuration_str = yaml.dump(soda_configuration)
|
|
26
|
+
return soda_configuration_str
|
|
File without changes
|