datacontract-cli 0.10.3__py3-none-any.whl → 0.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +12 -0
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/catalog/catalog.py +2 -2
- datacontract/cli.py +42 -8
- datacontract/data_contract.py +84 -134
- datacontract/engines/soda/check_soda_execute.py +5 -0
- datacontract/engines/soda/connections/duckdb.py +1 -2
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/export/avro_converter.py +23 -2
- datacontract/export/bigquery_converter.py +107 -0
- datacontract/export/dbml_converter.py +118 -0
- datacontract/export/go_converter.py +98 -0
- datacontract/export/html_export.py +4 -2
- datacontract/export/jsonschema_converter.py +41 -2
- datacontract/export/rdf_converter.py +1 -2
- datacontract/export/sql_converter.py +1 -0
- datacontract/export/sql_type_converter.py +125 -4
- datacontract/imports/avro_importer.py +41 -14
- datacontract/imports/bigquery_importer.py +178 -0
- datacontract/imports/jsonschema_importer.py +148 -0
- datacontract/imports/sql_importer.py +2 -2
- datacontract/lint/resolve.py +1 -2
- datacontract/model/data_contract_specification.py +65 -1
- datacontract/publish/publish.py +32 -0
- datacontract/py.typed +0 -0
- datacontract/templates/datacontract.html +37 -346
- datacontract/templates/index.html +70 -5
- datacontract/templates/partials/datacontract_information.html +66 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +44 -0
- datacontract/templates/partials/definition.html +99 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +97 -0
- datacontract/templates/partials/server.html +144 -0
- datacontract/templates/style/output.css +99 -13
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/METADATA +276 -139
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/RECORD +41 -26
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/top_level.txt +0 -0
|
@@ -257,6 +257,18 @@ def field_breaking_changes(
|
|
|
257
257
|
)
|
|
258
258
|
continue
|
|
259
259
|
|
|
260
|
+
if field_definition_field == "items" and old_field.type == "array" and new_field.type == "array":
|
|
261
|
+
results.extend(
|
|
262
|
+
field_breaking_changes(
|
|
263
|
+
old_field=old_value,
|
|
264
|
+
new_field=new_value,
|
|
265
|
+
composition=composition + ["items"],
|
|
266
|
+
new_path=new_path,
|
|
267
|
+
include_severities=include_severities,
|
|
268
|
+
)
|
|
269
|
+
)
|
|
270
|
+
continue
|
|
271
|
+
|
|
260
272
|
rule_name = None
|
|
261
273
|
description = None
|
|
262
274
|
|
|
@@ -90,6 +90,10 @@ class BreakingRules:
|
|
|
90
90
|
field_tags_removed = Severity.INFO
|
|
91
91
|
field_tags_updated = Severity.INFO
|
|
92
92
|
|
|
93
|
+
field_example_added = Severity.INFO
|
|
94
|
+
field_example_updated = Severity.INFO
|
|
95
|
+
field_example_removed = Severity.INFO
|
|
96
|
+
|
|
93
97
|
# quality Rules
|
|
94
98
|
quality_added = Severity.INFO
|
|
95
99
|
quality_removed = Severity.WARNING
|
datacontract/catalog/catalog.py
CHANGED
|
@@ -7,8 +7,7 @@ from jinja2 import PackageLoader, Environment, select_autoescape
|
|
|
7
7
|
|
|
8
8
|
from datacontract.data_contract import DataContract
|
|
9
9
|
from datacontract.export.html_export import get_version
|
|
10
|
-
from datacontract.model.data_contract_specification import
|
|
11
|
-
DataContractSpecification
|
|
10
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
def create_data_contract_html(contracts, file: Path, path: Path):
|
|
@@ -71,6 +70,7 @@ def create_index_html(contracts, path):
|
|
|
71
70
|
datacontract_cli_version=datacontract_cli_version,
|
|
72
71
|
contracts=contracts,
|
|
73
72
|
contracts_size=len(contracts),
|
|
73
|
+
owners=sorted(set(dc.spec.info.owner for dc in contracts if dc.spec.info.owner)),
|
|
74
74
|
)
|
|
75
75
|
f.write(html_string)
|
|
76
76
|
print(f"Created {index_filepath}")
|
datacontract/cli.py
CHANGED
|
@@ -2,6 +2,7 @@ from enum import Enum
|
|
|
2
2
|
from importlib import metadata
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Iterable, Optional
|
|
5
|
+
from typing import List
|
|
5
6
|
|
|
6
7
|
import typer
|
|
7
8
|
from click import Context
|
|
@@ -11,11 +12,10 @@ from rich.table import Table
|
|
|
11
12
|
from typer.core import TyperGroup
|
|
12
13
|
from typing_extensions import Annotated
|
|
13
14
|
|
|
14
|
-
from datacontract.catalog.catalog import create_index_html,
|
|
15
|
-
create_data_contract_html
|
|
15
|
+
from datacontract.catalog.catalog import create_index_html, create_data_contract_html
|
|
16
16
|
from datacontract.data_contract import DataContract
|
|
17
|
-
from datacontract.init.download_datacontract_file import
|
|
18
|
-
|
|
17
|
+
from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
|
|
18
|
+
from datacontract.publish.publish import publish_to_datamesh_manager
|
|
19
19
|
|
|
20
20
|
console = Console()
|
|
21
21
|
|
|
@@ -158,12 +158,20 @@ class ExportFormat(str, Enum):
|
|
|
158
158
|
sql = "sql"
|
|
159
159
|
sql_query = "sql-query"
|
|
160
160
|
html = "html"
|
|
161
|
+
go = "go"
|
|
162
|
+
bigquery = "bigquery"
|
|
163
|
+
dbml = "dbml"
|
|
161
164
|
|
|
162
165
|
|
|
163
166
|
@app.command()
|
|
164
167
|
def export(
|
|
165
168
|
format: Annotated[ExportFormat, typer.Option(help="The export format.")],
|
|
166
|
-
output: Annotated[
|
|
169
|
+
output: Annotated[
|
|
170
|
+
Path,
|
|
171
|
+
typer.Option(
|
|
172
|
+
help="Specify the file path where the exported data will be saved. If no path is provided, the output will be printed to stdout."
|
|
173
|
+
),
|
|
174
|
+
] = None,
|
|
167
175
|
server: Annotated[str, typer.Option(help="The server name to export.")] = None,
|
|
168
176
|
model: Annotated[
|
|
169
177
|
str,
|
|
@@ -204,7 +212,7 @@ def export(
|
|
|
204
212
|
if output is None:
|
|
205
213
|
console.print(result, markup=False)
|
|
206
214
|
else:
|
|
207
|
-
with output.open(
|
|
215
|
+
with output.open("w") as f:
|
|
208
216
|
f.write(result)
|
|
209
217
|
console.print(f"Written result to {output}")
|
|
210
218
|
|
|
@@ -213,20 +221,46 @@ class ImportFormat(str, Enum):
|
|
|
213
221
|
sql = "sql"
|
|
214
222
|
avro = "avro"
|
|
215
223
|
glue = "glue"
|
|
224
|
+
bigquery = "bigquery"
|
|
225
|
+
jsonschema = "jsonschema"
|
|
216
226
|
|
|
217
227
|
|
|
218
228
|
@app.command(name="import")
|
|
219
229
|
def import_(
|
|
220
230
|
format: Annotated[ImportFormat, typer.Option(help="The format of the source file.")],
|
|
221
|
-
source: Annotated[
|
|
231
|
+
source: Annotated[
|
|
232
|
+
Optional[str], typer.Option(help="The path to the file or Glue Database that should be imported.")
|
|
233
|
+
] = None,
|
|
234
|
+
bigquery_project: Annotated[Optional[str], typer.Option(help="The bigquery project id.")] = None,
|
|
235
|
+
bigquery_dataset: Annotated[Optional[str], typer.Option(help="The bigquery dataset id.")] = None,
|
|
236
|
+
bigquery_table: Annotated[
|
|
237
|
+
Optional[List[str]],
|
|
238
|
+
typer.Option(
|
|
239
|
+
help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
|
|
240
|
+
),
|
|
241
|
+
] = None,
|
|
222
242
|
):
|
|
223
243
|
"""
|
|
224
244
|
Create a data contract from the given source location. Prints to stdout.
|
|
225
245
|
"""
|
|
226
|
-
result = DataContract().import_from_source(format, source)
|
|
246
|
+
result = DataContract().import_from_source(format, source, bigquery_table, bigquery_project, bigquery_dataset)
|
|
227
247
|
console.print(result.to_yaml())
|
|
228
248
|
|
|
229
249
|
|
|
250
|
+
@app.command(name="publish")
|
|
251
|
+
def publish(
|
|
252
|
+
location: Annotated[
|
|
253
|
+
str, typer.Argument(help="The location (url or path) of the data contract yaml.")
|
|
254
|
+
] = "datacontract.yaml",
|
|
255
|
+
):
|
|
256
|
+
"""
|
|
257
|
+
Publish the data contract to the Data Mesh Manager.
|
|
258
|
+
"""
|
|
259
|
+
publish_to_datamesh_manager(
|
|
260
|
+
data_contract=DataContract(data_contract_file=location),
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
230
264
|
@app.command(name="catalog")
|
|
231
265
|
def catalog(
|
|
232
266
|
files: Annotated[
|
datacontract/data_contract.py
CHANGED
|
@@ -6,20 +6,19 @@ import typing
|
|
|
6
6
|
import yaml
|
|
7
7
|
from pyspark.sql import SparkSession
|
|
8
8
|
|
|
9
|
-
from datacontract.breaking.breaking import models_breaking_changes,
|
|
10
|
-
quality_breaking_changes
|
|
9
|
+
from datacontract.breaking.breaking import models_breaking_changes, quality_breaking_changes
|
|
11
10
|
from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
|
|
12
11
|
check_that_datacontract_contains_valid_server_configuration,
|
|
13
12
|
)
|
|
14
|
-
from datacontract.engines.fastjsonschema.check_jsonschema import
|
|
15
|
-
check_jsonschema
|
|
13
|
+
from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
|
|
16
14
|
from datacontract.engines.soda.check_soda_execute import check_soda_execute
|
|
17
15
|
from datacontract.export.avro_converter import to_avro_schema_json
|
|
18
16
|
from datacontract.export.avro_idl_converter import to_avro_idl
|
|
19
|
-
from datacontract.export.
|
|
20
|
-
|
|
21
|
-
from datacontract.export.
|
|
22
|
-
|
|
17
|
+
from datacontract.export.bigquery_converter import to_bigquery_json
|
|
18
|
+
from datacontract.export.dbml_converter import to_dbml_diagram
|
|
19
|
+
from datacontract.export.dbt_converter import to_dbt_models_yaml, to_dbt_sources_yaml, to_dbt_staging_sql
|
|
20
|
+
from datacontract.export.go_converter import to_go_types
|
|
21
|
+
from datacontract.export.great_expectations_converter import to_great_expectations
|
|
23
22
|
from datacontract.export.html_export import to_html
|
|
24
23
|
from datacontract.export.jsonschema_converter import to_jsonschema_json
|
|
25
24
|
from datacontract.export.odcs_converter import to_odcs_yaml
|
|
@@ -30,26 +29,22 @@ from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
|
30
29
|
from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
|
|
31
30
|
from datacontract.export.terraform_converter import to_terraform
|
|
32
31
|
from datacontract.imports.avro_importer import import_avro
|
|
32
|
+
from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json
|
|
33
33
|
from datacontract.imports.glue_importer import import_glue
|
|
34
|
+
from datacontract.imports.jsonschema_importer import import_jsonschema
|
|
34
35
|
from datacontract.imports.sql_importer import import_sql
|
|
35
|
-
from datacontract.integration.publish_datamesh_manager import
|
|
36
|
-
publish_datamesh_manager
|
|
36
|
+
from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
|
|
37
37
|
from datacontract.integration.publish_opentelemetry import publish_opentelemetry
|
|
38
38
|
from datacontract.lint import resolve
|
|
39
39
|
from datacontract.lint.linters.description_linter import DescriptionLinter
|
|
40
40
|
from datacontract.lint.linters.example_model_linter import ExampleModelLinter
|
|
41
41
|
from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
|
|
42
|
-
from datacontract.lint.linters.field_reference_linter import
|
|
43
|
-
FieldReferenceLinter
|
|
42
|
+
from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
|
|
44
43
|
from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
|
|
45
|
-
from datacontract.lint.linters.quality_schema_linter import
|
|
46
|
-
|
|
47
|
-
from datacontract.
|
|
48
|
-
|
|
49
|
-
from datacontract.model.breaking_change import BreakingChanges, BreakingChange, \
|
|
50
|
-
Severity
|
|
51
|
-
from datacontract.model.data_contract_specification import \
|
|
52
|
-
DataContractSpecification, Server
|
|
44
|
+
from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
|
|
45
|
+
from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
|
|
46
|
+
from datacontract.model.breaking_change import BreakingChanges, BreakingChange, Severity
|
|
47
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
53
48
|
from datacontract.model.exceptions import DataContractException
|
|
54
49
|
from datacontract.model.run import Run, Check
|
|
55
50
|
|
|
@@ -289,28 +284,8 @@ class DataContract:
|
|
|
289
284
|
inline_quality=True,
|
|
290
285
|
)
|
|
291
286
|
if export_format == "jsonschema":
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
model_names = list(data_contract.models.keys())
|
|
296
|
-
|
|
297
|
-
if model == "all":
|
|
298
|
-
if len(data_contract.models.items()) != 1:
|
|
299
|
-
raise RuntimeError(
|
|
300
|
-
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
301
|
-
)
|
|
302
|
-
|
|
303
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
304
|
-
return to_jsonschema_json(model_name, model_value)
|
|
305
|
-
else:
|
|
306
|
-
model_name = model
|
|
307
|
-
model_value = data_contract.models.get(model_name)
|
|
308
|
-
if model_value is None:
|
|
309
|
-
raise RuntimeError(
|
|
310
|
-
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
return to_jsonschema_json(model_name, model_value)
|
|
287
|
+
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
288
|
+
return to_jsonschema_json(model_name, model_value)
|
|
314
289
|
if export_format == "sodacl":
|
|
315
290
|
return to_sodacl_yaml(data_contract)
|
|
316
291
|
if export_format == "dbt":
|
|
@@ -318,28 +293,8 @@ class DataContract:
|
|
|
318
293
|
if export_format == "dbt-sources":
|
|
319
294
|
return to_dbt_sources_yaml(data_contract, self._server)
|
|
320
295
|
if export_format == "dbt-staging-sql":
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
model_names = list(data_contract.models.keys())
|
|
325
|
-
|
|
326
|
-
if model == "all":
|
|
327
|
-
if len(data_contract.models.items()) != 1:
|
|
328
|
-
raise RuntimeError(
|
|
329
|
-
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
330
|
-
)
|
|
331
|
-
|
|
332
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
333
|
-
return to_dbt_staging_sql(data_contract, model_name, model_value)
|
|
334
|
-
else:
|
|
335
|
-
model_name = model
|
|
336
|
-
model_value = data_contract.models.get(model_name)
|
|
337
|
-
if model_value is None:
|
|
338
|
-
raise RuntimeError(
|
|
339
|
-
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
return to_dbt_staging_sql(data_contract, model_name, model_value)
|
|
296
|
+
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
297
|
+
return to_dbt_staging_sql(data_contract, model_name, model_value)
|
|
343
298
|
if export_format == "odcs":
|
|
344
299
|
return to_odcs_yaml(data_contract)
|
|
345
300
|
if export_format == "rdf":
|
|
@@ -347,28 +302,8 @@ class DataContract:
|
|
|
347
302
|
if export_format == "protobuf":
|
|
348
303
|
return to_protobuf(data_contract)
|
|
349
304
|
if export_format == "avro":
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
model_names = list(data_contract.models.keys())
|
|
354
|
-
|
|
355
|
-
if model == "all":
|
|
356
|
-
if len(data_contract.models.items()) != 1:
|
|
357
|
-
raise RuntimeError(
|
|
358
|
-
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
359
|
-
)
|
|
360
|
-
|
|
361
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
362
|
-
return to_avro_schema_json(model_name, model_value)
|
|
363
|
-
else:
|
|
364
|
-
model_name = model
|
|
365
|
-
model_value = data_contract.models.get(model_name)
|
|
366
|
-
if model_value is None:
|
|
367
|
-
raise RuntimeError(
|
|
368
|
-
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
369
|
-
)
|
|
370
|
-
|
|
371
|
-
return to_avro_schema_json(model_name, model_value)
|
|
305
|
+
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
306
|
+
return to_avro_schema_json(model_name, model_value)
|
|
372
307
|
if export_format == "avro-idl":
|
|
373
308
|
return to_avro_idl(data_contract)
|
|
374
309
|
if export_format == "terraform":
|
|
@@ -377,59 +312,33 @@ class DataContract:
|
|
|
377
312
|
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
378
313
|
return to_sql_ddl(data_contract, server_type=server_type)
|
|
379
314
|
if export_format == "sql-query":
|
|
380
|
-
|
|
381
|
-
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
382
|
-
|
|
315
|
+
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
383
316
|
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
384
|
-
|
|
385
|
-
model_names = list(data_contract.models.keys())
|
|
386
|
-
|
|
387
|
-
if model == "all":
|
|
388
|
-
if len(data_contract.models.items()) != 1:
|
|
389
|
-
raise RuntimeError(
|
|
390
|
-
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
391
|
-
)
|
|
392
|
-
|
|
393
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
394
|
-
return to_sql_query(data_contract, model_name, model_value, server_type)
|
|
395
|
-
else:
|
|
396
|
-
model_name = model
|
|
397
|
-
model_value = data_contract.models.get(model_name)
|
|
398
|
-
if model_value is None:
|
|
399
|
-
raise RuntimeError(
|
|
400
|
-
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
401
|
-
)
|
|
402
|
-
|
|
403
|
-
return to_sql_query(data_contract, model_name, model_value, server_type)
|
|
404
|
-
|
|
317
|
+
return to_sql_query(data_contract, model_name, model_value, server_type)
|
|
405
318
|
if export_format == "great-expectations":
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
model_names = list(data_contract.models.keys())
|
|
410
|
-
|
|
411
|
-
if model == "all":
|
|
412
|
-
if len(data_contract.models.items()) != 1:
|
|
413
|
-
raise RuntimeError(
|
|
414
|
-
f"Export to {export_format} is model specific. Specify the model via --model "
|
|
415
|
-
f"$MODEL_NAME. Available models: {model_names}"
|
|
416
|
-
)
|
|
417
|
-
|
|
418
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
419
|
-
return to_great_expectations(data_contract, model_name)
|
|
420
|
-
else:
|
|
421
|
-
model_name = model
|
|
422
|
-
model_value = data_contract.models.get(model_name)
|
|
423
|
-
if model_value is None:
|
|
424
|
-
raise RuntimeError(
|
|
425
|
-
f"Model {model_name} not found in the data contract. " f"Available models: {model_names}"
|
|
426
|
-
)
|
|
427
|
-
|
|
428
|
-
return to_great_expectations(data_contract, model_name)
|
|
319
|
+
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
320
|
+
return to_great_expectations(data_contract, model_name)
|
|
429
321
|
if export_format == "pydantic-model":
|
|
430
322
|
return to_pydantic_model_str(data_contract)
|
|
431
323
|
if export_format == "html":
|
|
432
324
|
return to_html(data_contract)
|
|
325
|
+
if export_format == "go":
|
|
326
|
+
return to_go_types(data_contract)
|
|
327
|
+
if export_format == "bigquery":
|
|
328
|
+
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
|
|
329
|
+
found_server = data_contract.servers.get(self._server)
|
|
330
|
+
if found_server is None:
|
|
331
|
+
raise RuntimeError(
|
|
332
|
+
f"Export to {export_format} requires selecting a bigquery server from the data contract."
|
|
333
|
+
)
|
|
334
|
+
if found_server.type != "bigquery":
|
|
335
|
+
raise RuntimeError(
|
|
336
|
+
f"Export to {export_format} requires selecting a bigquery server from the data contract."
|
|
337
|
+
)
|
|
338
|
+
return to_bigquery_json(model_name, model_value, found_server)
|
|
339
|
+
if export_format == "dbml":
|
|
340
|
+
found_server = data_contract.servers.get(self._server)
|
|
341
|
+
return to_dbml_diagram(data_contract, found_server)
|
|
433
342
|
else:
|
|
434
343
|
print(f"Export format {export_format} not supported.")
|
|
435
344
|
return ""
|
|
@@ -484,7 +393,39 @@ class DataContract:
|
|
|
484
393
|
run.log_info(f"Using {server} for testing the examples")
|
|
485
394
|
return server
|
|
486
395
|
|
|
487
|
-
def
|
|
396
|
+
def _check_models_for_export(
|
|
397
|
+
self, data_contract: DataContractSpecification, model: str, export_format: str
|
|
398
|
+
) -> typing.Tuple[str, str]:
|
|
399
|
+
if data_contract.models is None:
|
|
400
|
+
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
401
|
+
|
|
402
|
+
model_names = list(data_contract.models.keys())
|
|
403
|
+
|
|
404
|
+
if model == "all":
|
|
405
|
+
if len(data_contract.models.items()) != 1:
|
|
406
|
+
raise RuntimeError(
|
|
407
|
+
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
model_name, model_value = next(iter(data_contract.models.items()))
|
|
411
|
+
else:
|
|
412
|
+
model_name = model
|
|
413
|
+
model_value = data_contract.models.get(model_name)
|
|
414
|
+
if model_value is None:
|
|
415
|
+
raise RuntimeError(
|
|
416
|
+
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
return model_name, model_value
|
|
420
|
+
|
|
421
|
+
def import_from_source(
|
|
422
|
+
self,
|
|
423
|
+
format: str,
|
|
424
|
+
source: typing.Optional[str] = None,
|
|
425
|
+
bigquery_tables: typing.Optional[typing.List[str]] = None,
|
|
426
|
+
bigquery_project: typing.Optional[str] = None,
|
|
427
|
+
bigquery_dataset: typing.Optional[str] = None,
|
|
428
|
+
) -> DataContractSpecification:
|
|
488
429
|
data_contract_specification = DataContract.init()
|
|
489
430
|
|
|
490
431
|
if format == "sql":
|
|
@@ -493,6 +434,15 @@ class DataContract:
|
|
|
493
434
|
data_contract_specification = import_avro(data_contract_specification, source)
|
|
494
435
|
elif format == "glue":
|
|
495
436
|
data_contract_specification = import_glue(data_contract_specification, source)
|
|
437
|
+
elif format == "jsonschema":
|
|
438
|
+
data_contract_specification = import_jsonschema(data_contract_specification, source)
|
|
439
|
+
elif format == "bigquery":
|
|
440
|
+
if source is not None:
|
|
441
|
+
data_contract_specification = import_bigquery_from_json(data_contract_specification, source)
|
|
442
|
+
else:
|
|
443
|
+
data_contract_specification = import_bigquery_from_api(
|
|
444
|
+
data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset
|
|
445
|
+
)
|
|
496
446
|
else:
|
|
497
447
|
print(f"Import format {format} not supported.")
|
|
498
448
|
|
|
@@ -9,6 +9,7 @@ from datacontract.engines.soda.connections.duckdb import get_duckdb_connection
|
|
|
9
9
|
from datacontract.engines.soda.connections.kafka import create_spark_session, read_kafka_topic
|
|
10
10
|
from datacontract.engines.soda.connections.postgres import to_postgres_soda_configuration
|
|
11
11
|
from datacontract.engines.soda.connections.snowflake import to_snowflake_soda_configuration
|
|
12
|
+
from datacontract.engines.soda.connections.sqlserver import to_sqlserver_soda_configuration
|
|
12
13
|
from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
13
14
|
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
14
15
|
from datacontract.model.run import Run, Check, Log
|
|
@@ -69,6 +70,10 @@ def check_soda_execute(
|
|
|
69
70
|
read_kafka_topic(spark, data_contract, server, tmp_dir)
|
|
70
71
|
scan.add_spark_session(spark, data_source_name=server.type)
|
|
71
72
|
scan.set_data_source_name(server.type)
|
|
73
|
+
elif server.type == "sqlserver":
|
|
74
|
+
soda_configuration_str = to_sqlserver_soda_configuration(server)
|
|
75
|
+
scan.add_configuration_yaml_str(soda_configuration_str)
|
|
76
|
+
scan.set_data_source_name(server.type)
|
|
72
77
|
|
|
73
78
|
else:
|
|
74
79
|
run.checks.append(
|
|
@@ -87,8 +87,7 @@ def setup_s3_connection(con, server):
|
|
|
87
87
|
s3_endpoint = server.endpointUrl.removeprefix("http://").removeprefix("https://")
|
|
88
88
|
if server.endpointUrl.startswith("http://"):
|
|
89
89
|
use_ssl = "false"
|
|
90
|
-
url_style =
|
|
91
|
-
|
|
90
|
+
url_style = "path"
|
|
92
91
|
|
|
93
92
|
if s3_access_key_id is not None:
|
|
94
93
|
con.sql(f"""
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from datacontract.model.data_contract_specification import Server
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def to_sqlserver_soda_configuration(server: Server) -> str:
|
|
9
|
+
"""Serialize server config to soda configuration.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
### Example:
|
|
13
|
+
type: sqlserver
|
|
14
|
+
host: host
|
|
15
|
+
port: '1433'
|
|
16
|
+
username: simple
|
|
17
|
+
password: simple_pass
|
|
18
|
+
database: database
|
|
19
|
+
schema: dbo
|
|
20
|
+
trusted_connection: false
|
|
21
|
+
encrypt: false
|
|
22
|
+
trust_server_certificate: false
|
|
23
|
+
driver: ODBC Driver 18 for SQL Server
|
|
24
|
+
"""
|
|
25
|
+
# with service account key, using an external json file
|
|
26
|
+
soda_configuration = {
|
|
27
|
+
f"data_source {server.type}": {
|
|
28
|
+
"type": "sqlserver",
|
|
29
|
+
"host": server.host,
|
|
30
|
+
"port": str(server.port),
|
|
31
|
+
"username": os.getenv("DATACONTRACT_SQLSERVER_USERNAME", ""),
|
|
32
|
+
"password": os.getenv("DATACONTRACT_SQLSERVER_PASSWORD", ""),
|
|
33
|
+
"database": server.database,
|
|
34
|
+
"schema": server.schema_,
|
|
35
|
+
"trusted_connection": os.getenv("DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION", False),
|
|
36
|
+
"trust_server_certificate": os.getenv("DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE", False),
|
|
37
|
+
"encrypt": os.getenv("DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION", True),
|
|
38
|
+
"driver": server.driver,
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
soda_configuration_str = yaml.dump(soda_configuration)
|
|
43
|
+
return soda_configuration_str
|
|
@@ -34,6 +34,16 @@ def to_avro_field(field, field_name):
|
|
|
34
34
|
if field.description is not None:
|
|
35
35
|
avro_field["doc"] = field.description
|
|
36
36
|
avro_field["type"] = to_avro_type(field, field_name)
|
|
37
|
+
# add logical type definitions for any of the date type fields
|
|
38
|
+
if field.type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]:
|
|
39
|
+
avro_field["logicalType"] = to_avro_logical_type(field.type)
|
|
40
|
+
|
|
41
|
+
if field.config:
|
|
42
|
+
if "avroLogicalType" in field.config:
|
|
43
|
+
avro_field["logicalType"] = field.config["avroLogicalType"]
|
|
44
|
+
if "avroDefault" in field.config:
|
|
45
|
+
avro_field["default"] = field.config["avroDefault"]
|
|
46
|
+
|
|
37
47
|
return avro_field
|
|
38
48
|
|
|
39
49
|
|
|
@@ -54,9 +64,9 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
|
|
|
54
64
|
elif field.type in ["boolean"]:
|
|
55
65
|
return "boolean"
|
|
56
66
|
elif field.type in ["timestamp", "timestamp_tz"]:
|
|
57
|
-
return "
|
|
67
|
+
return "long"
|
|
58
68
|
elif field.type in ["timestamp_ntz"]:
|
|
59
|
-
return "
|
|
69
|
+
return "long"
|
|
60
70
|
elif field.type in ["date"]:
|
|
61
71
|
return "int"
|
|
62
72
|
elif field.type in ["time"]:
|
|
@@ -72,3 +82,14 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
|
|
|
72
82
|
return "null"
|
|
73
83
|
else:
|
|
74
84
|
return "bytes"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def to_avro_logical_type(type: str) -> str:
|
|
88
|
+
if type in ["timestamp", "timestamp_tz"]:
|
|
89
|
+
return "timestamp-millis"
|
|
90
|
+
elif type in ["timestamp_ntz"]:
|
|
91
|
+
return "local-timestamp-millis"
|
|
92
|
+
elif type in ["date"]:
|
|
93
|
+
return "date"
|
|
94
|
+
else:
|
|
95
|
+
return ""
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
|
|
5
|
+
from datacontract.model.data_contract_specification import Model, Field, Server
|
|
6
|
+
from datacontract.model.exceptions import DataContractException
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def to_bigquery_json(model_name: str, model_value: Model, server: Server) -> str:
|
|
10
|
+
bigquery_table = to_bigquery_schema(model_name, model_value, server)
|
|
11
|
+
return json.dumps(bigquery_table, indent=2)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def to_bigquery_schema(model_name: str, model_value: Model, server: Server) -> dict:
|
|
15
|
+
return {
|
|
16
|
+
"kind": "bigquery#table",
|
|
17
|
+
"tableReference": {"datasetId": server.dataset, "projectId": server.project, "tableId": model_name},
|
|
18
|
+
"description": model_value.description,
|
|
19
|
+
"schema": {"fields": to_fields_array(model_value.fields)},
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def to_fields_array(fields: Dict[str, Field]) -> List[Dict[str, Field]]:
|
|
24
|
+
bq_fields = []
|
|
25
|
+
for field_name, field in fields.items():
|
|
26
|
+
bq_fields.append(to_field(field_name, field))
|
|
27
|
+
|
|
28
|
+
return bq_fields
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_field(field_name: str, field: Field) -> dict:
|
|
32
|
+
bq_type = map_type_to_bigquery(field.type, field_name)
|
|
33
|
+
bq_field = {
|
|
34
|
+
"name": field_name,
|
|
35
|
+
"type": bq_type,
|
|
36
|
+
"mode": "REQUIRED" if field.required else "NULLABLE",
|
|
37
|
+
"description": field.description,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# handle arrays
|
|
41
|
+
if field.type == "array":
|
|
42
|
+
bq_field["mode"] = "REPEATED"
|
|
43
|
+
if field.items.type == "object":
|
|
44
|
+
# in case the array type is a complex object, we want to copy all its fields
|
|
45
|
+
bq_field["fields"] = to_fields_array(field.items.fields)
|
|
46
|
+
else:
|
|
47
|
+
# otherwise we make up a structure that gets us a single field of the specified type
|
|
48
|
+
bq_field["fields"] = to_fields_array(
|
|
49
|
+
{f"{field_name}_1": Field(type=field.items.type, required=False, description="")}
|
|
50
|
+
)
|
|
51
|
+
# all of these can carry other fields
|
|
52
|
+
elif bq_type.lower() in ["record", "struct"]:
|
|
53
|
+
bq_field["fields"] = to_fields_array(field.fields)
|
|
54
|
+
|
|
55
|
+
# strings can have a maxlength
|
|
56
|
+
if bq_type.lower() == "string":
|
|
57
|
+
bq_field["maxLength"] = field.maxLength
|
|
58
|
+
|
|
59
|
+
# number types have precision and scale
|
|
60
|
+
if bq_type.lower() in ["numeric", "bignumeric"]:
|
|
61
|
+
bq_field["precision"] = field.precision
|
|
62
|
+
bq_field["scale"] = field.scale
|
|
63
|
+
|
|
64
|
+
return bq_field
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def map_type_to_bigquery(type_str: str, field_name: str) -> str:
|
|
68
|
+
logger = logging.getLogger(__name__)
|
|
69
|
+
if type_str.lower() in ["string", "varchar", "text"]:
|
|
70
|
+
return "STRING"
|
|
71
|
+
elif type_str == "bytes":
|
|
72
|
+
return "BYTES"
|
|
73
|
+
elif type_str.lower() in ["int", "integer"]:
|
|
74
|
+
return "INTEGER"
|
|
75
|
+
elif type_str.lower() in ["long", "bigint"]:
|
|
76
|
+
return "INT64"
|
|
77
|
+
elif type_str == "float":
|
|
78
|
+
return "FLOAT"
|
|
79
|
+
elif type_str == "boolean":
|
|
80
|
+
return "BOOL"
|
|
81
|
+
elif type_str.lower() in ["timestamp", "timestamp_tz"]:
|
|
82
|
+
return "TIMESTAMP"
|
|
83
|
+
elif type_str == "date":
|
|
84
|
+
return "DATE"
|
|
85
|
+
elif type_str == "timestamp_ntz":
|
|
86
|
+
return "TIME"
|
|
87
|
+
elif type_str.lower() in ["number", "decimal", "numeric"]:
|
|
88
|
+
return "NUMERIC"
|
|
89
|
+
elif type_str == "double":
|
|
90
|
+
return "BIGNUMERIC"
|
|
91
|
+
elif type_str.lower() in ["object", "record", "array"]:
|
|
92
|
+
return "RECORD"
|
|
93
|
+
elif type_str == "struct":
|
|
94
|
+
return "STRUCT"
|
|
95
|
+
elif type_str == "null":
|
|
96
|
+
logger.info(
|
|
97
|
+
f"Can't properly map {field_name} to bigquery Schema, as 'null' is not supported as a type. Mapping it to STRING."
|
|
98
|
+
)
|
|
99
|
+
return "STRING"
|
|
100
|
+
else:
|
|
101
|
+
raise DataContractException(
|
|
102
|
+
type="schema",
|
|
103
|
+
result="failed",
|
|
104
|
+
name="Map datacontract type to bigquery data type",
|
|
105
|
+
reason=f"Unsupported type {type_str} in data contract definition.",
|
|
106
|
+
engine="datacontract",
|
|
107
|
+
)
|