datacontract-cli 0.10.3__py3-none-any.whl → 0.10.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (41) hide show
  1. datacontract/breaking/breaking.py +12 -0
  2. datacontract/breaking/breaking_rules.py +4 -0
  3. datacontract/catalog/catalog.py +2 -2
  4. datacontract/cli.py +42 -8
  5. datacontract/data_contract.py +84 -134
  6. datacontract/engines/soda/check_soda_execute.py +5 -0
  7. datacontract/engines/soda/connections/duckdb.py +1 -2
  8. datacontract/engines/soda/connections/sqlserver.py +43 -0
  9. datacontract/export/avro_converter.py +23 -2
  10. datacontract/export/bigquery_converter.py +107 -0
  11. datacontract/export/dbml_converter.py +118 -0
  12. datacontract/export/go_converter.py +98 -0
  13. datacontract/export/html_export.py +4 -2
  14. datacontract/export/jsonschema_converter.py +41 -2
  15. datacontract/export/rdf_converter.py +1 -2
  16. datacontract/export/sql_converter.py +1 -0
  17. datacontract/export/sql_type_converter.py +125 -4
  18. datacontract/imports/avro_importer.py +41 -14
  19. datacontract/imports/bigquery_importer.py +178 -0
  20. datacontract/imports/jsonschema_importer.py +148 -0
  21. datacontract/imports/sql_importer.py +2 -2
  22. datacontract/lint/resolve.py +1 -2
  23. datacontract/model/data_contract_specification.py +65 -1
  24. datacontract/publish/publish.py +32 -0
  25. datacontract/py.typed +0 -0
  26. datacontract/templates/datacontract.html +37 -346
  27. datacontract/templates/index.html +70 -5
  28. datacontract/templates/partials/datacontract_information.html +66 -0
  29. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  30. datacontract/templates/partials/datacontract_terms.html +44 -0
  31. datacontract/templates/partials/definition.html +99 -0
  32. datacontract/templates/partials/example.html +27 -0
  33. datacontract/templates/partials/model_field.html +97 -0
  34. datacontract/templates/partials/server.html +144 -0
  35. datacontract/templates/style/output.css +99 -13
  36. {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/METADATA +276 -139
  37. {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/RECORD +41 -26
  38. {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/LICENSE +0 -0
  39. {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/WHEEL +0 -0
  40. {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/entry_points.txt +0 -0
  41. {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/top_level.txt +0 -0
@@ -257,6 +257,18 @@ def field_breaking_changes(
257
257
  )
258
258
  continue
259
259
 
260
+ if field_definition_field == "items" and old_field.type == "array" and new_field.type == "array":
261
+ results.extend(
262
+ field_breaking_changes(
263
+ old_field=old_value,
264
+ new_field=new_value,
265
+ composition=composition + ["items"],
266
+ new_path=new_path,
267
+ include_severities=include_severities,
268
+ )
269
+ )
270
+ continue
271
+
260
272
  rule_name = None
261
273
  description = None
262
274
 
@@ -90,6 +90,10 @@ class BreakingRules:
90
90
  field_tags_removed = Severity.INFO
91
91
  field_tags_updated = Severity.INFO
92
92
 
93
+ field_example_added = Severity.INFO
94
+ field_example_updated = Severity.INFO
95
+ field_example_removed = Severity.INFO
96
+
93
97
  # quality Rules
94
98
  quality_added = Severity.INFO
95
99
  quality_removed = Severity.WARNING
@@ -7,8 +7,7 @@ from jinja2 import PackageLoader, Environment, select_autoescape
7
7
 
8
8
  from datacontract.data_contract import DataContract
9
9
  from datacontract.export.html_export import get_version
10
- from datacontract.model.data_contract_specification import \
11
- DataContractSpecification
10
+ from datacontract.model.data_contract_specification import DataContractSpecification
12
11
 
13
12
 
14
13
  def create_data_contract_html(contracts, file: Path, path: Path):
@@ -71,6 +70,7 @@ def create_index_html(contracts, path):
71
70
  datacontract_cli_version=datacontract_cli_version,
72
71
  contracts=contracts,
73
72
  contracts_size=len(contracts),
73
+ owners=sorted(set(dc.spec.info.owner for dc in contracts if dc.spec.info.owner)),
74
74
  )
75
75
  f.write(html_string)
76
76
  print(f"Created {index_filepath}")
datacontract/cli.py CHANGED
@@ -2,6 +2,7 @@ from enum import Enum
2
2
  from importlib import metadata
3
3
  from pathlib import Path
4
4
  from typing import Iterable, Optional
5
+ from typing import List
5
6
 
6
7
  import typer
7
8
  from click import Context
@@ -11,11 +12,10 @@ from rich.table import Table
11
12
  from typer.core import TyperGroup
12
13
  from typing_extensions import Annotated
13
14
 
14
- from datacontract.catalog.catalog import create_index_html, \
15
- create_data_contract_html
15
+ from datacontract.catalog.catalog import create_index_html, create_data_contract_html
16
16
  from datacontract.data_contract import DataContract
17
- from datacontract.init.download_datacontract_file import \
18
- download_datacontract_file, FileExistsException
17
+ from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
18
+ from datacontract.publish.publish import publish_to_datamesh_manager
19
19
 
20
20
  console = Console()
21
21
 
@@ -158,12 +158,20 @@ class ExportFormat(str, Enum):
158
158
  sql = "sql"
159
159
  sql_query = "sql-query"
160
160
  html = "html"
161
+ go = "go"
162
+ bigquery = "bigquery"
163
+ dbml = "dbml"
161
164
 
162
165
 
163
166
  @app.command()
164
167
  def export(
165
168
  format: Annotated[ExportFormat, typer.Option(help="The export format.")],
166
- output: Annotated[Path, typer.Option(help="Specify the file path where the exported data will be saved. If no path is provided, the output will be printed to stdout.")] = None,
169
+ output: Annotated[
170
+ Path,
171
+ typer.Option(
172
+ help="Specify the file path where the exported data will be saved. If no path is provided, the output will be printed to stdout."
173
+ ),
174
+ ] = None,
167
175
  server: Annotated[str, typer.Option(help="The server name to export.")] = None,
168
176
  model: Annotated[
169
177
  str,
@@ -204,7 +212,7 @@ def export(
204
212
  if output is None:
205
213
  console.print(result, markup=False)
206
214
  else:
207
- with output.open('w') as f:
215
+ with output.open("w") as f:
208
216
  f.write(result)
209
217
  console.print(f"Written result to {output}")
210
218
 
@@ -213,20 +221,46 @@ class ImportFormat(str, Enum):
213
221
  sql = "sql"
214
222
  avro = "avro"
215
223
  glue = "glue"
224
+ bigquery = "bigquery"
225
+ jsonschema = "jsonschema"
216
226
 
217
227
 
218
228
  @app.command(name="import")
219
229
  def import_(
220
230
  format: Annotated[ImportFormat, typer.Option(help="The format of the source file.")],
221
- source: Annotated[str, typer.Option(help="The path to the file or Glue Database that should be imported.")],
231
+ source: Annotated[
232
+ Optional[str], typer.Option(help="The path to the file or Glue Database that should be imported.")
233
+ ] = None,
234
+ bigquery_project: Annotated[Optional[str], typer.Option(help="The bigquery project id.")] = None,
235
+ bigquery_dataset: Annotated[Optional[str], typer.Option(help="The bigquery dataset id.")] = None,
236
+ bigquery_table: Annotated[
237
+ Optional[List[str]],
238
+ typer.Option(
239
+ help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
240
+ ),
241
+ ] = None,
222
242
  ):
223
243
  """
224
244
  Create a data contract from the given source location. Prints to stdout.
225
245
  """
226
- result = DataContract().import_from_source(format, source)
246
+ result = DataContract().import_from_source(format, source, bigquery_table, bigquery_project, bigquery_dataset)
227
247
  console.print(result.to_yaml())
228
248
 
229
249
 
250
+ @app.command(name="publish")
251
+ def publish(
252
+ location: Annotated[
253
+ str, typer.Argument(help="The location (url or path) of the data contract yaml.")
254
+ ] = "datacontract.yaml",
255
+ ):
256
+ """
257
+ Publish the data contract to the Data Mesh Manager.
258
+ """
259
+ publish_to_datamesh_manager(
260
+ data_contract=DataContract(data_contract_file=location),
261
+ )
262
+
263
+
230
264
  @app.command(name="catalog")
231
265
  def catalog(
232
266
  files: Annotated[
@@ -6,20 +6,19 @@ import typing
6
6
  import yaml
7
7
  from pyspark.sql import SparkSession
8
8
 
9
- from datacontract.breaking.breaking import models_breaking_changes, \
10
- quality_breaking_changes
9
+ from datacontract.breaking.breaking import models_breaking_changes, quality_breaking_changes
11
10
  from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
12
11
  check_that_datacontract_contains_valid_server_configuration,
13
12
  )
14
- from datacontract.engines.fastjsonschema.check_jsonschema import \
15
- check_jsonschema
13
+ from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
16
14
  from datacontract.engines.soda.check_soda_execute import check_soda_execute
17
15
  from datacontract.export.avro_converter import to_avro_schema_json
18
16
  from datacontract.export.avro_idl_converter import to_avro_idl
19
- from datacontract.export.dbt_converter import to_dbt_models_yaml, \
20
- to_dbt_sources_yaml, to_dbt_staging_sql
21
- from datacontract.export.great_expectations_converter import \
22
- to_great_expectations
17
+ from datacontract.export.bigquery_converter import to_bigquery_json
18
+ from datacontract.export.dbml_converter import to_dbml_diagram
19
+ from datacontract.export.dbt_converter import to_dbt_models_yaml, to_dbt_sources_yaml, to_dbt_staging_sql
20
+ from datacontract.export.go_converter import to_go_types
21
+ from datacontract.export.great_expectations_converter import to_great_expectations
23
22
  from datacontract.export.html_export import to_html
24
23
  from datacontract.export.jsonschema_converter import to_jsonschema_json
25
24
  from datacontract.export.odcs_converter import to_odcs_yaml
@@ -30,26 +29,22 @@ from datacontract.export.sodacl_converter import to_sodacl_yaml
30
29
  from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
31
30
  from datacontract.export.terraform_converter import to_terraform
32
31
  from datacontract.imports.avro_importer import import_avro
32
+ from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json
33
33
  from datacontract.imports.glue_importer import import_glue
34
+ from datacontract.imports.jsonschema_importer import import_jsonschema
34
35
  from datacontract.imports.sql_importer import import_sql
35
- from datacontract.integration.publish_datamesh_manager import \
36
- publish_datamesh_manager
36
+ from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
37
37
  from datacontract.integration.publish_opentelemetry import publish_opentelemetry
38
38
  from datacontract.lint import resolve
39
39
  from datacontract.lint.linters.description_linter import DescriptionLinter
40
40
  from datacontract.lint.linters.example_model_linter import ExampleModelLinter
41
41
  from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
42
- from datacontract.lint.linters.field_reference_linter import \
43
- FieldReferenceLinter
42
+ from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
44
43
  from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
45
- from datacontract.lint.linters.quality_schema_linter import \
46
- QualityUsesSchemaLinter
47
- from datacontract.lint.linters.valid_constraints_linter import \
48
- ValidFieldConstraintsLinter
49
- from datacontract.model.breaking_change import BreakingChanges, BreakingChange, \
50
- Severity
51
- from datacontract.model.data_contract_specification import \
52
- DataContractSpecification, Server
44
+ from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
45
+ from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
46
+ from datacontract.model.breaking_change import BreakingChanges, BreakingChange, Severity
47
+ from datacontract.model.data_contract_specification import DataContractSpecification, Server
53
48
  from datacontract.model.exceptions import DataContractException
54
49
  from datacontract.model.run import Run, Check
55
50
 
@@ -289,28 +284,8 @@ class DataContract:
289
284
  inline_quality=True,
290
285
  )
291
286
  if export_format == "jsonschema":
292
- if data_contract.models is None:
293
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
294
-
295
- model_names = list(data_contract.models.keys())
296
-
297
- if model == "all":
298
- if len(data_contract.models.items()) != 1:
299
- raise RuntimeError(
300
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
301
- )
302
-
303
- model_name, model_value = next(iter(data_contract.models.items()))
304
- return to_jsonschema_json(model_name, model_value)
305
- else:
306
- model_name = model
307
- model_value = data_contract.models.get(model_name)
308
- if model_value is None:
309
- raise RuntimeError(
310
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
311
- )
312
-
313
- return to_jsonschema_json(model_name, model_value)
287
+ model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
288
+ return to_jsonschema_json(model_name, model_value)
314
289
  if export_format == "sodacl":
315
290
  return to_sodacl_yaml(data_contract)
316
291
  if export_format == "dbt":
@@ -318,28 +293,8 @@ class DataContract:
318
293
  if export_format == "dbt-sources":
319
294
  return to_dbt_sources_yaml(data_contract, self._server)
320
295
  if export_format == "dbt-staging-sql":
321
- if data_contract.models is None:
322
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
323
-
324
- model_names = list(data_contract.models.keys())
325
-
326
- if model == "all":
327
- if len(data_contract.models.items()) != 1:
328
- raise RuntimeError(
329
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
330
- )
331
-
332
- model_name, model_value = next(iter(data_contract.models.items()))
333
- return to_dbt_staging_sql(data_contract, model_name, model_value)
334
- else:
335
- model_name = model
336
- model_value = data_contract.models.get(model_name)
337
- if model_value is None:
338
- raise RuntimeError(
339
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
340
- )
341
-
342
- return to_dbt_staging_sql(data_contract, model_name, model_value)
296
+ model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
297
+ return to_dbt_staging_sql(data_contract, model_name, model_value)
343
298
  if export_format == "odcs":
344
299
  return to_odcs_yaml(data_contract)
345
300
  if export_format == "rdf":
@@ -347,28 +302,8 @@ class DataContract:
347
302
  if export_format == "protobuf":
348
303
  return to_protobuf(data_contract)
349
304
  if export_format == "avro":
350
- if data_contract.models is None:
351
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
352
-
353
- model_names = list(data_contract.models.keys())
354
-
355
- if model == "all":
356
- if len(data_contract.models.items()) != 1:
357
- raise RuntimeError(
358
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
359
- )
360
-
361
- model_name, model_value = next(iter(data_contract.models.items()))
362
- return to_avro_schema_json(model_name, model_value)
363
- else:
364
- model_name = model
365
- model_value = data_contract.models.get(model_name)
366
- if model_value is None:
367
- raise RuntimeError(
368
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
369
- )
370
-
371
- return to_avro_schema_json(model_name, model_value)
305
+ model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
306
+ return to_avro_schema_json(model_name, model_value)
372
307
  if export_format == "avro-idl":
373
308
  return to_avro_idl(data_contract)
374
309
  if export_format == "terraform":
@@ -377,59 +312,33 @@ class DataContract:
377
312
  server_type = self._determine_sql_server_type(data_contract, sql_server_type)
378
313
  return to_sql_ddl(data_contract, server_type=server_type)
379
314
  if export_format == "sql-query":
380
- if data_contract.models is None:
381
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
382
-
315
+ model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
383
316
  server_type = self._determine_sql_server_type(data_contract, sql_server_type)
384
-
385
- model_names = list(data_contract.models.keys())
386
-
387
- if model == "all":
388
- if len(data_contract.models.items()) != 1:
389
- raise RuntimeError(
390
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
391
- )
392
-
393
- model_name, model_value = next(iter(data_contract.models.items()))
394
- return to_sql_query(data_contract, model_name, model_value, server_type)
395
- else:
396
- model_name = model
397
- model_value = data_contract.models.get(model_name)
398
- if model_value is None:
399
- raise RuntimeError(
400
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
401
- )
402
-
403
- return to_sql_query(data_contract, model_name, model_value, server_type)
404
-
317
+ return to_sql_query(data_contract, model_name, model_value, server_type)
405
318
  if export_format == "great-expectations":
406
- if data_contract.models is None:
407
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
408
-
409
- model_names = list(data_contract.models.keys())
410
-
411
- if model == "all":
412
- if len(data_contract.models.items()) != 1:
413
- raise RuntimeError(
414
- f"Export to {export_format} is model specific. Specify the model via --model "
415
- f"$MODEL_NAME. Available models: {model_names}"
416
- )
417
-
418
- model_name, model_value = next(iter(data_contract.models.items()))
419
- return to_great_expectations(data_contract, model_name)
420
- else:
421
- model_name = model
422
- model_value = data_contract.models.get(model_name)
423
- if model_value is None:
424
- raise RuntimeError(
425
- f"Model {model_name} not found in the data contract. " f"Available models: {model_names}"
426
- )
427
-
428
- return to_great_expectations(data_contract, model_name)
319
+ model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
320
+ return to_great_expectations(data_contract, model_name)
429
321
  if export_format == "pydantic-model":
430
322
  return to_pydantic_model_str(data_contract)
431
323
  if export_format == "html":
432
324
  return to_html(data_contract)
325
+ if export_format == "go":
326
+ return to_go_types(data_contract)
327
+ if export_format == "bigquery":
328
+ model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
329
+ found_server = data_contract.servers.get(self._server)
330
+ if found_server is None:
331
+ raise RuntimeError(
332
+ f"Export to {export_format} requires selecting a bigquery server from the data contract."
333
+ )
334
+ if found_server.type != "bigquery":
335
+ raise RuntimeError(
336
+ f"Export to {export_format} requires selecting a bigquery server from the data contract."
337
+ )
338
+ return to_bigquery_json(model_name, model_value, found_server)
339
+ if export_format == "dbml":
340
+ found_server = data_contract.servers.get(self._server)
341
+ return to_dbml_diagram(data_contract, found_server)
433
342
  else:
434
343
  print(f"Export format {export_format} not supported.")
435
344
  return ""
@@ -484,7 +393,39 @@ class DataContract:
484
393
  run.log_info(f"Using {server} for testing the examples")
485
394
  return server
486
395
 
487
- def import_from_source(self, format: str, source: str) -> DataContractSpecification:
396
+ def _check_models_for_export(
397
+ self, data_contract: DataContractSpecification, model: str, export_format: str
398
+ ) -> typing.Tuple[str, str]:
399
+ if data_contract.models is None:
400
+ raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
401
+
402
+ model_names = list(data_contract.models.keys())
403
+
404
+ if model == "all":
405
+ if len(data_contract.models.items()) != 1:
406
+ raise RuntimeError(
407
+ f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
408
+ )
409
+
410
+ model_name, model_value = next(iter(data_contract.models.items()))
411
+ else:
412
+ model_name = model
413
+ model_value = data_contract.models.get(model_name)
414
+ if model_value is None:
415
+ raise RuntimeError(
416
+ f"Model {model_name} not found in the data contract. Available models: {model_names}"
417
+ )
418
+
419
+ return model_name, model_value
420
+
421
+ def import_from_source(
422
+ self,
423
+ format: str,
424
+ source: typing.Optional[str] = None,
425
+ bigquery_tables: typing.Optional[typing.List[str]] = None,
426
+ bigquery_project: typing.Optional[str] = None,
427
+ bigquery_dataset: typing.Optional[str] = None,
428
+ ) -> DataContractSpecification:
488
429
  data_contract_specification = DataContract.init()
489
430
 
490
431
  if format == "sql":
@@ -493,6 +434,15 @@ class DataContract:
493
434
  data_contract_specification = import_avro(data_contract_specification, source)
494
435
  elif format == "glue":
495
436
  data_contract_specification = import_glue(data_contract_specification, source)
437
+ elif format == "jsonschema":
438
+ data_contract_specification = import_jsonschema(data_contract_specification, source)
439
+ elif format == "bigquery":
440
+ if source is not None:
441
+ data_contract_specification = import_bigquery_from_json(data_contract_specification, source)
442
+ else:
443
+ data_contract_specification = import_bigquery_from_api(
444
+ data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset
445
+ )
496
446
  else:
497
447
  print(f"Import format {format} not supported.")
498
448
 
@@ -9,6 +9,7 @@ from datacontract.engines.soda.connections.duckdb import get_duckdb_connection
9
9
  from datacontract.engines.soda.connections.kafka import create_spark_session, read_kafka_topic
10
10
  from datacontract.engines.soda.connections.postgres import to_postgres_soda_configuration
11
11
  from datacontract.engines.soda.connections.snowflake import to_snowflake_soda_configuration
12
+ from datacontract.engines.soda.connections.sqlserver import to_sqlserver_soda_configuration
12
13
  from datacontract.export.sodacl_converter import to_sodacl_yaml
13
14
  from datacontract.model.data_contract_specification import DataContractSpecification, Server
14
15
  from datacontract.model.run import Run, Check, Log
@@ -69,6 +70,10 @@ def check_soda_execute(
69
70
  read_kafka_topic(spark, data_contract, server, tmp_dir)
70
71
  scan.add_spark_session(spark, data_source_name=server.type)
71
72
  scan.set_data_source_name(server.type)
73
+ elif server.type == "sqlserver":
74
+ soda_configuration_str = to_sqlserver_soda_configuration(server)
75
+ scan.add_configuration_yaml_str(soda_configuration_str)
76
+ scan.set_data_source_name(server.type)
72
77
 
73
78
  else:
74
79
  run.checks.append(
@@ -87,8 +87,7 @@ def setup_s3_connection(con, server):
87
87
  s3_endpoint = server.endpointUrl.removeprefix("http://").removeprefix("https://")
88
88
  if server.endpointUrl.startswith("http://"):
89
89
  use_ssl = "false"
90
- url_style = 'path'
91
-
90
+ url_style = "path"
92
91
 
93
92
  if s3_access_key_id is not None:
94
93
  con.sql(f"""
@@ -0,0 +1,43 @@
1
+ import os
2
+
3
+ import yaml
4
+
5
+ from datacontract.model.data_contract_specification import Server
6
+
7
+
8
+ def to_sqlserver_soda_configuration(server: Server) -> str:
9
+ """Serialize server config to soda configuration.
10
+
11
+
12
+ ### Example:
13
+ type: sqlserver
14
+ host: host
15
+ port: '1433'
16
+ username: simple
17
+ password: simple_pass
18
+ database: database
19
+ schema: dbo
20
+ trusted_connection: false
21
+ encrypt: false
22
+ trust_server_certificate: false
23
+ driver: ODBC Driver 18 for SQL Server
24
+ """
25
+ # with service account key, using an external json file
26
+ soda_configuration = {
27
+ f"data_source {server.type}": {
28
+ "type": "sqlserver",
29
+ "host": server.host,
30
+ "port": str(server.port),
31
+ "username": os.getenv("DATACONTRACT_SQLSERVER_USERNAME", ""),
32
+ "password": os.getenv("DATACONTRACT_SQLSERVER_PASSWORD", ""),
33
+ "database": server.database,
34
+ "schema": server.schema_,
35
+ "trusted_connection": os.getenv("DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION", False),
36
+ "trust_server_certificate": os.getenv("DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE", False),
37
+ "encrypt": os.getenv("DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION", True),
38
+ "driver": server.driver,
39
+ }
40
+ }
41
+
42
+ soda_configuration_str = yaml.dump(soda_configuration)
43
+ return soda_configuration_str
@@ -34,6 +34,16 @@ def to_avro_field(field, field_name):
34
34
  if field.description is not None:
35
35
  avro_field["doc"] = field.description
36
36
  avro_field["type"] = to_avro_type(field, field_name)
37
+ # add logical type definitions for any of the date type fields
38
+ if field.type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]:
39
+ avro_field["logicalType"] = to_avro_logical_type(field.type)
40
+
41
+ if field.config:
42
+ if "avroLogicalType" in field.config:
43
+ avro_field["logicalType"] = field.config["avroLogicalType"]
44
+ if "avroDefault" in field.config:
45
+ avro_field["default"] = field.config["avroDefault"]
46
+
37
47
  return avro_field
38
48
 
39
49
 
@@ -54,9 +64,9 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
54
64
  elif field.type in ["boolean"]:
55
65
  return "boolean"
56
66
  elif field.type in ["timestamp", "timestamp_tz"]:
57
- return "string"
67
+ return "long"
58
68
  elif field.type in ["timestamp_ntz"]:
59
- return "string"
69
+ return "long"
60
70
  elif field.type in ["date"]:
61
71
  return "int"
62
72
  elif field.type in ["time"]:
@@ -72,3 +82,14 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
72
82
  return "null"
73
83
  else:
74
84
  return "bytes"
85
+
86
+
87
+ def to_avro_logical_type(type: str) -> str:
88
+ if type in ["timestamp", "timestamp_tz"]:
89
+ return "timestamp-millis"
90
+ elif type in ["timestamp_ntz"]:
91
+ return "local-timestamp-millis"
92
+ elif type in ["date"]:
93
+ return "date"
94
+ else:
95
+ return ""
@@ -0,0 +1,107 @@
1
+ import json
2
+ import logging
3
+ from typing import Dict, List
4
+
5
+ from datacontract.model.data_contract_specification import Model, Field, Server
6
+ from datacontract.model.exceptions import DataContractException
7
+
8
+
9
+ def to_bigquery_json(model_name: str, model_value: Model, server: Server) -> str:
10
+ bigquery_table = to_bigquery_schema(model_name, model_value, server)
11
+ return json.dumps(bigquery_table, indent=2)
12
+
13
+
14
+ def to_bigquery_schema(model_name: str, model_value: Model, server: Server) -> dict:
15
+ return {
16
+ "kind": "bigquery#table",
17
+ "tableReference": {"datasetId": server.dataset, "projectId": server.project, "tableId": model_name},
18
+ "description": model_value.description,
19
+ "schema": {"fields": to_fields_array(model_value.fields)},
20
+ }
21
+
22
+
23
+ def to_fields_array(fields: Dict[str, Field]) -> List[Dict[str, Field]]:
24
+ bq_fields = []
25
+ for field_name, field in fields.items():
26
+ bq_fields.append(to_field(field_name, field))
27
+
28
+ return bq_fields
29
+
30
+
31
+ def to_field(field_name: str, field: Field) -> dict:
32
+ bq_type = map_type_to_bigquery(field.type, field_name)
33
+ bq_field = {
34
+ "name": field_name,
35
+ "type": bq_type,
36
+ "mode": "REQUIRED" if field.required else "NULLABLE",
37
+ "description": field.description,
38
+ }
39
+
40
+ # handle arrays
41
+ if field.type == "array":
42
+ bq_field["mode"] = "REPEATED"
43
+ if field.items.type == "object":
44
+ # in case the array type is a complex object, we want to copy all its fields
45
+ bq_field["fields"] = to_fields_array(field.items.fields)
46
+ else:
47
+ # otherwise we make up a structure that gets us a single field of the specified type
48
+ bq_field["fields"] = to_fields_array(
49
+ {f"{field_name}_1": Field(type=field.items.type, required=False, description="")}
50
+ )
51
+ # all of these can carry other fields
52
+ elif bq_type.lower() in ["record", "struct"]:
53
+ bq_field["fields"] = to_fields_array(field.fields)
54
+
55
+ # strings can have a maxlength
56
+ if bq_type.lower() == "string":
57
+ bq_field["maxLength"] = field.maxLength
58
+
59
+ # number types have precision and scale
60
+ if bq_type.lower() in ["numeric", "bignumeric"]:
61
+ bq_field["precision"] = field.precision
62
+ bq_field["scale"] = field.scale
63
+
64
+ return bq_field
65
+
66
+
67
+ def map_type_to_bigquery(type_str: str, field_name: str) -> str:
68
+ logger = logging.getLogger(__name__)
69
+ if type_str.lower() in ["string", "varchar", "text"]:
70
+ return "STRING"
71
+ elif type_str == "bytes":
72
+ return "BYTES"
73
+ elif type_str.lower() in ["int", "integer"]:
74
+ return "INTEGER"
75
+ elif type_str.lower() in ["long", "bigint"]:
76
+ return "INT64"
77
+ elif type_str == "float":
78
+ return "FLOAT"
79
+ elif type_str == "boolean":
80
+ return "BOOL"
81
+ elif type_str.lower() in ["timestamp", "timestamp_tz"]:
82
+ return "TIMESTAMP"
83
+ elif type_str == "date":
84
+ return "DATE"
85
+ elif type_str == "timestamp_ntz":
86
+ return "TIME"
87
+ elif type_str.lower() in ["number", "decimal", "numeric"]:
88
+ return "NUMERIC"
89
+ elif type_str == "double":
90
+ return "BIGNUMERIC"
91
+ elif type_str.lower() in ["object", "record", "array"]:
92
+ return "RECORD"
93
+ elif type_str == "struct":
94
+ return "STRUCT"
95
+ elif type_str == "null":
96
+ logger.info(
97
+ f"Can't properly map {field_name} to bigquery Schema, as 'null' is not supported as a type. Mapping it to STRING."
98
+ )
99
+ return "STRING"
100
+ else:
101
+ raise DataContractException(
102
+ type="schema",
103
+ result="failed",
104
+ name="Map datacontract type to bigquery data type",
105
+ reason=f"Unsupported type {type_str} in data contract definition.",
106
+ engine="datacontract",
107
+ )