datacontract-cli 0.10.8__py3-none-any.whl → 0.10.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/catalog/catalog.py +4 -2
- datacontract/cli.py +36 -18
- datacontract/data_contract.py +13 -53
- datacontract/engines/soda/check_soda_execute.py +10 -2
- datacontract/engines/soda/connections/duckdb.py +32 -12
- datacontract/engines/soda/connections/trino.py +26 -0
- datacontract/export/avro_converter.py +1 -1
- datacontract/export/exporter.py +3 -2
- datacontract/export/exporter_factory.py +132 -39
- datacontract/export/jsonschema_converter.py +7 -7
- datacontract/export/sodacl_converter.py +17 -12
- datacontract/export/spark_converter.py +211 -0
- datacontract/export/sql_type_converter.py +28 -0
- datacontract/imports/avro_importer.py +149 -7
- datacontract/imports/bigquery_importer.py +17 -0
- datacontract/imports/dbt_importer.py +117 -0
- datacontract/imports/glue_importer.py +116 -33
- datacontract/imports/importer.py +34 -0
- datacontract/imports/importer_factory.py +90 -0
- datacontract/imports/jsonschema_importer.py +14 -3
- datacontract/imports/odcs_importer.py +8 -0
- datacontract/imports/spark_importer.py +134 -0
- datacontract/imports/sql_importer.py +8 -0
- datacontract/imports/unity_importer.py +23 -9
- datacontract/integration/publish_datamesh_manager.py +10 -5
- datacontract/lint/resolve.py +87 -21
- datacontract/lint/schema.py +24 -4
- datacontract/model/data_contract_specification.py +37 -4
- datacontract/templates/datacontract.html +18 -3
- datacontract/templates/index.html +1 -1
- datacontract/templates/partials/datacontract_information.html +20 -0
- datacontract/templates/partials/datacontract_terms.html +7 -0
- datacontract/templates/partials/definition.html +9 -1
- datacontract/templates/partials/model_field.html +23 -6
- datacontract/templates/partials/server.html +49 -16
- datacontract/templates/style/output.css +42 -0
- {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.10.dist-info}/METADATA +310 -122
- {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.10.dist-info}/RECORD +42 -36
- {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.10.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.10.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.10.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.8.dist-info → datacontract_cli-0.10.10.dist-info}/top_level.txt +0 -0
datacontract/catalog/catalog.py
CHANGED
|
@@ -10,8 +10,10 @@ from datacontract.export.html_export import get_version
|
|
|
10
10
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def create_data_contract_html(contracts, file: Path, path: Path):
|
|
14
|
-
data_contract = DataContract(
|
|
13
|
+
def create_data_contract_html(contracts, file: Path, path: Path, schema: str):
|
|
14
|
+
data_contract = DataContract(
|
|
15
|
+
data_contract_file=f"{file.absolute()}", inline_definitions=True, inline_quality=True, schema_location=schema
|
|
16
|
+
)
|
|
15
17
|
html = data_contract.export(export_format="html")
|
|
16
18
|
spec = data_contract.get_data_contract_specification()
|
|
17
19
|
file_without_suffix = file.with_suffix(".html")
|
datacontract/cli.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
1
|
from importlib import metadata
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
from typing import Iterable, Optional
|
|
@@ -16,9 +15,12 @@ from typing_extensions import Annotated
|
|
|
16
15
|
from datacontract import web
|
|
17
16
|
from datacontract.catalog.catalog import create_index_html, create_data_contract_html
|
|
18
17
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
18
|
+
from datacontract.imports.importer import ImportFormat
|
|
19
19
|
from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
|
|
20
20
|
from datacontract.publish.publish import publish_to_datamesh_manager
|
|
21
21
|
|
|
22
|
+
DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
|
|
23
|
+
|
|
22
24
|
console = Console()
|
|
23
25
|
|
|
24
26
|
|
|
@@ -86,7 +88,7 @@ def lint(
|
|
|
86
88
|
] = "datacontract.yaml",
|
|
87
89
|
schema: Annotated[
|
|
88
90
|
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
89
|
-
] =
|
|
91
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
90
92
|
):
|
|
91
93
|
"""
|
|
92
94
|
Validate that the datacontract.yaml is correctly formatted.
|
|
@@ -102,7 +104,7 @@ def test(
|
|
|
102
104
|
] = "datacontract.yaml",
|
|
103
105
|
schema: Annotated[
|
|
104
106
|
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
105
|
-
] =
|
|
107
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
106
108
|
server: Annotated[
|
|
107
109
|
str,
|
|
108
110
|
typer.Option(
|
|
@@ -177,12 +179,15 @@ def export(
|
|
|
177
179
|
location: Annotated[
|
|
178
180
|
str, typer.Argument(help="The location (url or path) of the data contract yaml.")
|
|
179
181
|
] = "datacontract.yaml",
|
|
182
|
+
schema: Annotated[
|
|
183
|
+
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
184
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
180
185
|
):
|
|
181
186
|
"""
|
|
182
187
|
Convert data contract to a specific format. console.prints to stdout.
|
|
183
188
|
"""
|
|
184
189
|
# TODO exception handling
|
|
185
|
-
result = DataContract(data_contract_file=location, server=server).export(
|
|
190
|
+
result = DataContract(data_contract_file=location, schema_location=schema, server=server).export(
|
|
186
191
|
export_format=format,
|
|
187
192
|
model=model,
|
|
188
193
|
server=server,
|
|
@@ -198,16 +203,6 @@ def export(
|
|
|
198
203
|
console.print(f"Written result to {output}")
|
|
199
204
|
|
|
200
205
|
|
|
201
|
-
class ImportFormat(str, Enum):
|
|
202
|
-
sql = "sql"
|
|
203
|
-
avro = "avro"
|
|
204
|
-
glue = "glue"
|
|
205
|
-
bigquery = "bigquery"
|
|
206
|
-
jsonschema = "jsonschema"
|
|
207
|
-
odcs="odcs"
|
|
208
|
-
unity = "unity"
|
|
209
|
-
|
|
210
|
-
|
|
211
206
|
@app.command(name="import")
|
|
212
207
|
def import_(
|
|
213
208
|
format: Annotated[ImportFormat, typer.Option(help="The format of the source file.")],
|
|
@@ -228,12 +223,29 @@ def import_(
|
|
|
228
223
|
help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
|
|
229
224
|
),
|
|
230
225
|
] = None,
|
|
231
|
-
unity_table_full_name: Annotated[
|
|
226
|
+
unity_table_full_name: Annotated[
|
|
227
|
+
Optional[str], typer.Option(help="Full name of a table in the unity catalog")
|
|
228
|
+
] = None,
|
|
229
|
+
dbt_model: Annotated[
|
|
230
|
+
Optional[List[str]],
|
|
231
|
+
typer.Option(
|
|
232
|
+
help="List of models names to import from the dbt manifest file (repeat for multiple models names, leave empty for all models in the dataset)."
|
|
233
|
+
),
|
|
234
|
+
] = None,
|
|
232
235
|
):
|
|
233
236
|
"""
|
|
234
237
|
Create a data contract from the given source location. Prints to stdout.
|
|
235
238
|
"""
|
|
236
|
-
result = DataContract().import_from_source(
|
|
239
|
+
result = DataContract().import_from_source(
|
|
240
|
+
format=format,
|
|
241
|
+
source=source,
|
|
242
|
+
glue_table=glue_table,
|
|
243
|
+
bigquery_table=bigquery_table,
|
|
244
|
+
bigquery_project=bigquery_project,
|
|
245
|
+
bigquery_dataset=bigquery_dataset,
|
|
246
|
+
unity_table_full_name=unity_table_full_name,
|
|
247
|
+
dbt_model=dbt_model,
|
|
248
|
+
)
|
|
237
249
|
console.print(result.to_yaml())
|
|
238
250
|
|
|
239
251
|
|
|
@@ -242,12 +254,15 @@ def publish(
|
|
|
242
254
|
location: Annotated[
|
|
243
255
|
str, typer.Argument(help="The location (url or path) of the data contract yaml.")
|
|
244
256
|
] = "datacontract.yaml",
|
|
257
|
+
schema: Annotated[
|
|
258
|
+
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
259
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
245
260
|
):
|
|
246
261
|
"""
|
|
247
262
|
Publish the data contract to the Data Mesh Manager.
|
|
248
263
|
"""
|
|
249
264
|
publish_to_datamesh_manager(
|
|
250
|
-
data_contract=DataContract(data_contract_file=location),
|
|
265
|
+
data_contract=DataContract(data_contract_file=location, schema_location=schema),
|
|
251
266
|
)
|
|
252
267
|
|
|
253
268
|
|
|
@@ -257,6 +272,9 @@ def catalog(
|
|
|
257
272
|
Optional[str], typer.Option(help="Glob pattern for the data contract files to include in the catalog.")
|
|
258
273
|
] = "*.yaml",
|
|
259
274
|
output: Annotated[Optional[str], typer.Option(help="Output directory for the catalog html files.")] = "catalog/",
|
|
275
|
+
schema: Annotated[
|
|
276
|
+
str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
|
|
277
|
+
] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
|
|
260
278
|
):
|
|
261
279
|
"""
|
|
262
280
|
Create an html catalog of data contracts.
|
|
@@ -268,7 +286,7 @@ def catalog(
|
|
|
268
286
|
contracts = []
|
|
269
287
|
for file in Path().glob(files):
|
|
270
288
|
try:
|
|
271
|
-
create_data_contract_html(contracts, file, path)
|
|
289
|
+
create_data_contract_html(contracts, file, path, schema)
|
|
272
290
|
except Exception as e:
|
|
273
291
|
console.print(f"Skipped {file} due to error: {e}")
|
|
274
292
|
|
datacontract/data_contract.py
CHANGED
|
@@ -4,7 +4,9 @@ import tempfile
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
import yaml
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from pyspark.sql import SparkSession
|
|
8
10
|
|
|
9
11
|
from datacontract.breaking.breaking import models_breaking_changes, quality_breaking_changes
|
|
10
12
|
from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
|
|
@@ -14,13 +16,8 @@ from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschem
|
|
|
14
16
|
from datacontract.engines.soda.check_soda_execute import check_soda_execute
|
|
15
17
|
from datacontract.export.exporter import ExportFormat
|
|
16
18
|
from datacontract.export.exporter_factory import exporter_factory
|
|
17
|
-
from datacontract.imports.
|
|
18
|
-
|
|
19
|
-
from datacontract.imports.glue_importer import import_glue
|
|
20
|
-
from datacontract.imports.jsonschema_importer import import_jsonschema
|
|
21
|
-
from datacontract.imports.odcs_importer import import_odcs
|
|
22
|
-
from datacontract.imports.sql_importer import import_sql
|
|
23
|
-
from datacontract.imports.unity_importer import import_unity_from_json, import_unity_from_api
|
|
19
|
+
from datacontract.imports.importer_factory import importer_factory
|
|
20
|
+
|
|
24
21
|
from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
|
|
25
22
|
from datacontract.integration.publish_opentelemetry import publish_opentelemetry
|
|
26
23
|
from datacontract.lint import resolve
|
|
@@ -48,7 +45,7 @@ class DataContract:
|
|
|
48
45
|
examples: bool = False,
|
|
49
46
|
publish_url: str = None,
|
|
50
47
|
publish_to_opentelemetry: bool = False,
|
|
51
|
-
spark: SparkSession = None,
|
|
48
|
+
spark: "SparkSession" = None,
|
|
52
49
|
inline_definitions: bool = False,
|
|
53
50
|
inline_quality: bool = False,
|
|
54
51
|
):
|
|
@@ -301,17 +298,12 @@ class DataContract:
|
|
|
301
298
|
inline_quality=self._inline_quality,
|
|
302
299
|
)
|
|
303
300
|
|
|
304
|
-
def export(
|
|
305
|
-
self,
|
|
306
|
-
export_format: ExportFormat,
|
|
307
|
-
model: str = "all",
|
|
308
|
-
sql_server_type: str = "auto",
|
|
309
|
-
**kwargs,
|
|
310
|
-
) -> str:
|
|
301
|
+
def export(self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs) -> str:
|
|
311
302
|
data_contract = resolve.resolve_data_contract(
|
|
312
303
|
self._data_contract_file,
|
|
313
304
|
self._data_contract_str,
|
|
314
305
|
self._data_contract,
|
|
306
|
+
schema_location=self._schema_location,
|
|
315
307
|
inline_definitions=True,
|
|
316
308
|
inline_quality=True,
|
|
317
309
|
)
|
|
@@ -325,42 +317,10 @@ class DataContract:
|
|
|
325
317
|
)
|
|
326
318
|
|
|
327
319
|
def import_from_source(
|
|
328
|
-
self,
|
|
329
|
-
format: str,
|
|
330
|
-
source: typing.Optional[str] = None,
|
|
331
|
-
glue_tables: typing.Optional[typing.List[str]] = None,
|
|
332
|
-
bigquery_tables: typing.Optional[typing.List[str]] = None,
|
|
333
|
-
bigquery_project: typing.Optional[str] = None,
|
|
334
|
-
bigquery_dataset: typing.Optional[str] = None,
|
|
335
|
-
unity_table_full_name: typing.Optional[str] = None
|
|
320
|
+
self, format: str, source: typing.Optional[str] = None, **kwargs
|
|
336
321
|
) -> DataContractSpecification:
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
if format == "sql":
|
|
340
|
-
data_contract_specification = import_sql(data_contract_specification, format, source)
|
|
341
|
-
elif format == "avro":
|
|
342
|
-
data_contract_specification = import_avro(data_contract_specification, source)
|
|
343
|
-
elif format == "glue":
|
|
344
|
-
data_contract_specification = import_glue(data_contract_specification, source, glue_tables)
|
|
345
|
-
elif format == "jsonschema":
|
|
346
|
-
data_contract_specification = import_jsonschema(data_contract_specification, source)
|
|
347
|
-
elif format == "bigquery":
|
|
348
|
-
if source is not None:
|
|
349
|
-
data_contract_specification = import_bigquery_from_json(data_contract_specification, source)
|
|
350
|
-
else:
|
|
351
|
-
data_contract_specification = import_bigquery_from_api(
|
|
352
|
-
data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset
|
|
353
|
-
)
|
|
354
|
-
elif format == "odcs":
|
|
355
|
-
data_contract_specification = import_odcs(data_contract_specification, source)
|
|
356
|
-
elif format == "unity":
|
|
357
|
-
if source is not None:
|
|
358
|
-
data_contract_specification = import_unity_from_json(data_contract_specification, source)
|
|
359
|
-
else:
|
|
360
|
-
data_contract_specification = import_unity_from_api(
|
|
361
|
-
data_contract_specification, unity_table_full_name
|
|
362
|
-
)
|
|
363
|
-
else:
|
|
364
|
-
print(f"Import format {format} not supported.")
|
|
322
|
+
data_contract_specification_initial = DataContract.init()
|
|
365
323
|
|
|
366
|
-
return
|
|
324
|
+
return importer_factory.create(format).import_source(
|
|
325
|
+
data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
|
|
326
|
+
)
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
if typing.TYPE_CHECKING:
|
|
5
|
+
from pyspark.sql import SparkSession
|
|
2
6
|
|
|
3
|
-
from pyspark.sql import SparkSession
|
|
4
7
|
from soda.scan import Scan
|
|
5
8
|
|
|
6
9
|
from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
|
|
@@ -10,13 +13,14 @@ from datacontract.engines.soda.connections.kafka import create_spark_session, re
|
|
|
10
13
|
from datacontract.engines.soda.connections.postgres import to_postgres_soda_configuration
|
|
11
14
|
from datacontract.engines.soda.connections.snowflake import to_snowflake_soda_configuration
|
|
12
15
|
from datacontract.engines.soda.connections.sqlserver import to_sqlserver_soda_configuration
|
|
16
|
+
from datacontract.engines.soda.connections.trino import to_trino_soda_configuration
|
|
13
17
|
from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
14
18
|
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
15
19
|
from datacontract.model.run import Run, Check, Log
|
|
16
20
|
|
|
17
21
|
|
|
18
22
|
def check_soda_execute(
|
|
19
|
-
run: Run, data_contract: DataContractSpecification, server: Server, spark: SparkSession, tmp_dir
|
|
23
|
+
run: Run, data_contract: DataContractSpecification, server: Server, spark: "SparkSession", tmp_dir
|
|
20
24
|
):
|
|
21
25
|
if data_contract is None:
|
|
22
26
|
run.log_warn("Cannot run engine soda-core, as data contract is invalid")
|
|
@@ -85,6 +89,10 @@ def check_soda_execute(
|
|
|
85
89
|
soda_configuration_str = to_sqlserver_soda_configuration(server)
|
|
86
90
|
scan.add_configuration_yaml_str(soda_configuration_str)
|
|
87
91
|
scan.set_data_source_name(server.type)
|
|
92
|
+
elif server.type == "trino":
|
|
93
|
+
soda_configuration_str = to_trino_soda_configuration(server)
|
|
94
|
+
scan.add_configuration_yaml_str(soda_configuration_str)
|
|
95
|
+
scan.set_data_source_name(server.type)
|
|
88
96
|
|
|
89
97
|
else:
|
|
90
98
|
run.checks.append(
|
|
@@ -50,6 +50,10 @@ def get_duckdb_connection(data_contract, server, run: Run):
|
|
|
50
50
|
)
|
|
51
51
|
elif server.format == "delta":
|
|
52
52
|
if server.type == "azure":
|
|
53
|
+
# After switching to native delta table support
|
|
54
|
+
# in https://github.com/datacontract/datacontract-cli/issues/258,
|
|
55
|
+
# azure storage should also work
|
|
56
|
+
# https://github.com/duckdb/duckdb_delta/issues/21
|
|
53
57
|
raise NotImplementedError("Support for Delta Tables on Azure Storage is not implemented yet")
|
|
54
58
|
|
|
55
59
|
storage_options = {
|
|
@@ -80,6 +84,7 @@ def setup_s3_connection(con, server):
|
|
|
80
84
|
s3_region = os.getenv("DATACONTRACT_S3_REGION")
|
|
81
85
|
s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
|
|
82
86
|
s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
|
|
87
|
+
s3_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
|
|
83
88
|
s3_endpoint = "s3.amazonaws.com"
|
|
84
89
|
use_ssl = "true"
|
|
85
90
|
url_style = "vhost"
|
|
@@ -90,18 +95,33 @@ def setup_s3_connection(con, server):
|
|
|
90
95
|
url_style = "path"
|
|
91
96
|
|
|
92
97
|
if s3_access_key_id is not None:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
98
|
+
if s3_session_token is not None:
|
|
99
|
+
con.sql(f"""
|
|
100
|
+
CREATE OR REPLACE SECRET s3_secret (
|
|
101
|
+
TYPE S3,
|
|
102
|
+
PROVIDER CREDENTIAL_CHAIN,
|
|
103
|
+
REGION '{s3_region}',
|
|
104
|
+
KEY_ID '{s3_access_key_id}',
|
|
105
|
+
SECRET '{s3_secret_access_key}',
|
|
106
|
+
SESSION_TOKEN '{s3_session_token}',
|
|
107
|
+
ENDPOINT '{s3_endpoint}',
|
|
108
|
+
USE_SSL '{use_ssl}',
|
|
109
|
+
URL_STYLE '{url_style}'
|
|
110
|
+
);
|
|
111
|
+
""")
|
|
112
|
+
else:
|
|
113
|
+
con.sql(f"""
|
|
114
|
+
CREATE OR REPLACE SECRET s3_secret (
|
|
115
|
+
TYPE S3,
|
|
116
|
+
PROVIDER CREDENTIAL_CHAIN,
|
|
117
|
+
REGION '{s3_region}',
|
|
118
|
+
KEY_ID '{s3_access_key_id}',
|
|
119
|
+
SECRET '{s3_secret_access_key}',
|
|
120
|
+
ENDPOINT '{s3_endpoint}',
|
|
121
|
+
USE_SSL '{use_ssl}',
|
|
122
|
+
URL_STYLE '{url_style}'
|
|
123
|
+
);
|
|
124
|
+
""")
|
|
105
125
|
|
|
106
126
|
# con.sql(f"""
|
|
107
127
|
# SET s3_region = '{s3_region}';
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def to_trino_soda_configuration(server):
|
|
7
|
+
password = os.getenv("DATACONTRACT_TRINO_PASSWORD")
|
|
8
|
+
username = os.getenv("DATACONTRACT_TRINO_USERNAME")
|
|
9
|
+
|
|
10
|
+
data_source = {
|
|
11
|
+
"type": "trino",
|
|
12
|
+
"host": server.host,
|
|
13
|
+
"port": str(server.port),
|
|
14
|
+
"username": username,
|
|
15
|
+
"password": password,
|
|
16
|
+
"catalog": server.catalog,
|
|
17
|
+
"schema": server.schema_,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
if password is None or password == "":
|
|
21
|
+
data_source["auth_type"] = "NoAuthentication" # default is BasicAuthentication
|
|
22
|
+
|
|
23
|
+
soda_configuration = {f"data_source {server.type}": data_source}
|
|
24
|
+
|
|
25
|
+
soda_configuration_str = yaml.dump(soda_configuration)
|
|
26
|
+
return soda_configuration_str
|
|
@@ -65,7 +65,7 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
|
|
|
65
65
|
if field.config["avroLogicalType"] in ["time-millis", "date"]:
|
|
66
66
|
return {"type": "int", "logicalType": field.config["avroLogicalType"]}
|
|
67
67
|
if "avroType" in field.config:
|
|
68
|
-
return field.config["
|
|
68
|
+
return field.config["avroType"]
|
|
69
69
|
|
|
70
70
|
if field.type is None:
|
|
71
71
|
return "null"
|
datacontract/export/exporter.py
CHANGED
|
@@ -34,10 +34,11 @@ class ExportFormat(str, Enum):
|
|
|
34
34
|
go = "go"
|
|
35
35
|
bigquery = "bigquery"
|
|
36
36
|
dbml = "dbml"
|
|
37
|
+
spark = "spark"
|
|
37
38
|
|
|
38
39
|
@classmethod
|
|
39
|
-
def
|
|
40
|
-
return cls
|
|
40
|
+
def get_suported_formats(cls):
|
|
41
|
+
return list(map(lambda c: c.value, cls))
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def _check_models_for_export(
|
|
@@ -1,52 +1,145 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from datacontract.export.dbml_converter import DbmlExporter
|
|
4
|
-
from datacontract.export.dbt_converter import DbtExporter, DbtSourceExporter, DbtStageExporter
|
|
5
|
-
from datacontract.export.avro_converter import AvroExporter
|
|
1
|
+
import importlib
|
|
2
|
+
import sys
|
|
6
3
|
from datacontract.export.exporter import ExportFormat, Exporter
|
|
7
|
-
from datacontract.export.go_converter import GoExporter
|
|
8
|
-
from datacontract.export.great_expectations_converter import GreateExpectationsExporter
|
|
9
|
-
from datacontract.export.html_export import HtmlExporter
|
|
10
|
-
from datacontract.export.jsonschema_converter import JsonSchemaExporter
|
|
11
|
-
from datacontract.export.odcs_converter import OdcsExporter
|
|
12
|
-
from datacontract.export.protobuf_converter import ProtoBufExporter
|
|
13
|
-
from datacontract.export.pydantic_converter import PydanticExporter
|
|
14
|
-
from datacontract.export.rdf_converter import RdfExporter
|
|
15
|
-
from datacontract.export.sodacl_converter import SodaExporter
|
|
16
|
-
from datacontract.export.sql_converter import SqlExporter, SqlQueryExporter
|
|
17
|
-
from datacontract.export.terraform_converter import TerraformExporter
|
|
18
4
|
|
|
19
5
|
|
|
20
6
|
class ExporterFactory:
|
|
21
7
|
def __init__(self):
|
|
22
8
|
self.dict_exporter = {}
|
|
9
|
+
self.dict_lazy_exporter = {}
|
|
23
10
|
|
|
24
|
-
def register_exporter(self, name, exporter):
|
|
11
|
+
def register_exporter(self, name: str, exporter: Exporter):
|
|
25
12
|
self.dict_exporter.update({name: exporter})
|
|
26
13
|
|
|
14
|
+
def register_lazy_exporter(self, name: str, module_path: str, class_name: str):
|
|
15
|
+
self.dict_lazy_exporter.update({name: (module_path, class_name)})
|
|
16
|
+
|
|
27
17
|
def create(self, name) -> Exporter:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
18
|
+
exporters = self.dict_exporter.copy()
|
|
19
|
+
exporters.update(self.dict_lazy_exporter.copy())
|
|
20
|
+
if name not in exporters.keys():
|
|
21
|
+
raise ValueError(f"The '{name}' format is not supported.")
|
|
22
|
+
exporter_class = exporters[name]
|
|
23
|
+
if type(exporters[name]) is tuple:
|
|
24
|
+
exporter_class = load_module_class(module_path=exporters[name][0], class_name=exporters[name][1])
|
|
25
|
+
if not exporter_class:
|
|
26
|
+
raise ValueError(f"Module {name} could not be loaded.")
|
|
27
|
+
return exporter_class(name)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def import_module(module_path):
|
|
31
|
+
if importlib.util.find_spec(module_path) is not None:
|
|
32
|
+
try:
|
|
33
|
+
module = importlib.import_module(module_path)
|
|
34
|
+
except ModuleNotFoundError:
|
|
35
|
+
return None
|
|
36
|
+
sys.modules[module_path] = module
|
|
37
|
+
return module
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def load_module_class(module_path, class_name):
|
|
41
|
+
module = import_module(module_path)
|
|
42
|
+
if not module:
|
|
43
|
+
return None
|
|
44
|
+
return getattr(module, class_name)
|
|
31
45
|
|
|
32
46
|
|
|
33
47
|
exporter_factory = ExporterFactory()
|
|
34
|
-
|
|
35
|
-
exporter_factory.
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
exporter_factory.
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
exporter_factory.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
exporter_factory.
|
|
52
|
-
|
|
48
|
+
|
|
49
|
+
exporter_factory.register_lazy_exporter(
|
|
50
|
+
name=ExportFormat.avro, module_path="datacontract.export.avro_converter", class_name="AvroExporter"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
exporter_factory.register_lazy_exporter(
|
|
54
|
+
name=ExportFormat.avro_idl,
|
|
55
|
+
module_path="datacontract.export.avro_idl_converter",
|
|
56
|
+
class_name="AvroIdlExporter",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
exporter_factory.register_lazy_exporter(
|
|
60
|
+
name=ExportFormat.bigquery,
|
|
61
|
+
module_path="datacontract.export.bigquery_converter",
|
|
62
|
+
class_name="BigQueryExporter",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
exporter_factory.register_lazy_exporter(
|
|
66
|
+
name=ExportFormat.dbml, module_path="datacontract.export.dbml_converter", class_name="DbmlExporter"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
exporter_factory.register_lazy_exporter(
|
|
70
|
+
name=ExportFormat.rdf, module_path="datacontract.export.rdf_converter", class_name="RdfExporter"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
exporter_factory.register_lazy_exporter(
|
|
74
|
+
name=ExportFormat.dbt, module_path="datacontract.export.dbt_converter", class_name="DbtExporter"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
exporter_factory.register_lazy_exporter(
|
|
78
|
+
name=ExportFormat.dbt_sources,
|
|
79
|
+
module_path="datacontract.export.dbt_converter",
|
|
80
|
+
class_name="DbtSourceExporter",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
exporter_factory.register_lazy_exporter(
|
|
84
|
+
name=ExportFormat.dbt_staging_sql,
|
|
85
|
+
module_path="datacontract.export.dbt_converter",
|
|
86
|
+
class_name="DbtStageExporter",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
exporter_factory.register_lazy_exporter(
|
|
90
|
+
name=ExportFormat.jsonschema,
|
|
91
|
+
module_path="datacontract.export.jsonschema_converter",
|
|
92
|
+
class_name="JsonSchemaExporter",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
exporter_factory.register_lazy_exporter(
|
|
96
|
+
name=ExportFormat.odcs, module_path="datacontract.export.odcs_converter", class_name="OdcsExporter"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
exporter_factory.register_lazy_exporter(
|
|
100
|
+
name=ExportFormat.go, module_path="datacontract.export.go_converter", class_name="GoExporter"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
exporter_factory.register_lazy_exporter(
|
|
104
|
+
name=ExportFormat.great_expectations,
|
|
105
|
+
module_path="datacontract.export.great_expectations_converter",
|
|
106
|
+
class_name="GreateExpectationsExporter",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
exporter_factory.register_lazy_exporter(
|
|
110
|
+
name=ExportFormat.html, module_path="datacontract.export.html_export", class_name="HtmlExporter"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
exporter_factory.register_lazy_exporter(
|
|
114
|
+
name=ExportFormat.protobuf,
|
|
115
|
+
module_path="datacontract.export.protobuf_converter",
|
|
116
|
+
class_name="ProtoBufExporter",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
exporter_factory.register_lazy_exporter(
|
|
120
|
+
name=ExportFormat.pydantic_model,
|
|
121
|
+
module_path="datacontract.export.pydantic_converter",
|
|
122
|
+
class_name="PydanticExporter",
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
exporter_factory.register_lazy_exporter(
|
|
126
|
+
name=ExportFormat.sodacl, module_path="datacontract.export.sodacl_converter", class_name="SodaExporter"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
exporter_factory.register_lazy_exporter(
|
|
130
|
+
name=ExportFormat.sql, module_path="datacontract.export.sql_converter", class_name="SqlExporter"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
exporter_factory.register_lazy_exporter(
|
|
134
|
+
name=ExportFormat.sql_query, module_path="datacontract.export.sql_converter", class_name="SqlQueryExporter"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
exporter_factory.register_lazy_exporter(
|
|
138
|
+
name=ExportFormat.terraform,
|
|
139
|
+
module_path="datacontract.export.terraform_converter",
|
|
140
|
+
class_name="TerraformExporter",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
exporter_factory.register_lazy_exporter(
|
|
144
|
+
name=ExportFormat.spark, module_path="datacontract.export.spark_converter", class_name="SparkExporter"
|
|
145
|
+
)
|
|
@@ -58,27 +58,27 @@ def to_property(field: Field) -> dict:
|
|
|
58
58
|
property["pattern"] = field.pattern
|
|
59
59
|
if field.enum:
|
|
60
60
|
property["enum"] = field.enum
|
|
61
|
-
if field.minLength:
|
|
61
|
+
if field.minLength is not None:
|
|
62
62
|
property["minLength"] = field.minLength
|
|
63
|
-
if field.maxLength:
|
|
63
|
+
if field.maxLength is not None:
|
|
64
64
|
property["maxLength"] = field.maxLength
|
|
65
65
|
if field.title:
|
|
66
66
|
property["title"] = field.title
|
|
67
67
|
if field.description:
|
|
68
68
|
property["description"] = field.description
|
|
69
|
-
if field.exclusiveMinimum:
|
|
69
|
+
if field.exclusiveMinimum is not None:
|
|
70
70
|
property["exclusiveMinimum"] = field.exclusiveMinimum
|
|
71
|
-
if field.exclusiveMaximum:
|
|
71
|
+
if field.exclusiveMaximum is not None:
|
|
72
72
|
property["exclusiveMaximum"] = field.exclusiveMaximum
|
|
73
|
-
if field.minimum:
|
|
73
|
+
if field.minimum is not None:
|
|
74
74
|
property["minimum"] = field.minimum
|
|
75
|
-
if field.maximum:
|
|
75
|
+
if field.maximum is not None:
|
|
76
76
|
property["maximum"] = field.maximum
|
|
77
77
|
if field.tags:
|
|
78
78
|
property["tags"] = field.tags
|
|
79
79
|
if field.pii:
|
|
80
80
|
property["pii"] = field.pii
|
|
81
|
-
if field.classification:
|
|
81
|
+
if field.classification is not None:
|
|
82
82
|
property["classification"] = field.classification
|
|
83
83
|
|
|
84
84
|
# TODO: all constraints
|