datacontract-cli 0.10.5__py3-none-any.whl → 0.10.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

datacontract/cli.py CHANGED
@@ -13,7 +13,7 @@ from typer.core import TyperGroup
13
13
  from typing_extensions import Annotated
14
14
 
15
15
  from datacontract.catalog.catalog import create_index_html, create_data_contract_html
16
- from datacontract.data_contract import DataContract
16
+ from datacontract.data_contract import DataContract, ExportFormat
17
17
  from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
18
18
  from datacontract.publish.publish import publish_to_datamesh_manager
19
19
 
@@ -141,28 +141,6 @@ def test(
141
141
  _handle_result(run)
142
142
 
143
143
 
144
- class ExportFormat(str, Enum):
145
- jsonschema = "jsonschema"
146
- pydantic_model = "pydantic-model"
147
- sodacl = "sodacl"
148
- dbt = "dbt"
149
- dbt_sources = "dbt-sources"
150
- dbt_staging_sql = "dbt-staging-sql"
151
- odcs = "odcs"
152
- rdf = "rdf"
153
- avro = "avro"
154
- protobuf = "protobuf"
155
- great_expectations = "great-expectations"
156
- terraform = "terraform"
157
- avro_idl = "avro-idl"
158
- sql = "sql"
159
- sql_query = "sql-query"
160
- html = "html"
161
- go = "go"
162
- bigquery = "bigquery"
163
- dbml = "dbml"
164
-
165
-
166
144
  @app.command()
167
145
  def export(
168
146
  format: Annotated[ExportFormat, typer.Option(help="The export format.")],
@@ -231,6 +209,12 @@ def import_(
231
209
  source: Annotated[
232
210
  Optional[str], typer.Option(help="The path to the file or Glue Database that should be imported.")
233
211
  ] = None,
212
+ glue_table: Annotated[
213
+ Optional[List[str]],
214
+ typer.Option(
215
+ help="List of table ids to import from the Glue Database (repeat for multiple table ids, leave empty for all tables in the dataset)."
216
+ ),
217
+ ] = None,
234
218
  bigquery_project: Annotated[Optional[str], typer.Option(help="The bigquery project id.")] = None,
235
219
  bigquery_dataset: Annotated[Optional[str], typer.Option(help="The bigquery dataset id.")] = None,
236
220
  bigquery_table: Annotated[
@@ -243,7 +227,7 @@ def import_(
243
227
  """
244
228
  Create a data contract from the given source location. Prints to stdout.
245
229
  """
246
- result = DataContract().import_from_source(format, source, bigquery_table, bigquery_project, bigquery_dataset)
230
+ result = DataContract().import_from_source(format, source, glue_table, bigquery_table, bigquery_project, bigquery_dataset)
247
231
  console.print(result.to_yaml())
248
232
 
249
233
 
@@ -2,6 +2,7 @@ import json
2
2
  import logging
3
3
  import tempfile
4
4
  import typing
5
+ from enum import Enum
5
6
 
6
7
  import yaml
7
8
  from pyspark.sql import SparkSession
@@ -49,6 +50,28 @@ from datacontract.model.exceptions import DataContractException
49
50
  from datacontract.model.run import Run, Check
50
51
 
51
52
 
53
+ class ExportFormat(str, Enum):
54
+ jsonschema = "jsonschema"
55
+ pydantic_model = "pydantic-model"
56
+ sodacl = "sodacl"
57
+ dbt = "dbt"
58
+ dbt_sources = "dbt-sources"
59
+ dbt_staging_sql = "dbt-staging-sql"
60
+ odcs = "odcs"
61
+ rdf = "rdf"
62
+ avro = "avro"
63
+ protobuf = "protobuf"
64
+ great_expectations = "great-expectations"
65
+ terraform = "terraform"
66
+ avro_idl = "avro-idl"
67
+ sql = "sql"
68
+ sql_query = "sql-query"
69
+ html = "html"
70
+ go = "go"
71
+ bigquery = "bigquery"
72
+ dbml = "dbml"
73
+
74
+
52
75
  class DataContract:
53
76
  def __init__(
54
77
  self,
@@ -195,10 +218,13 @@ class DataContract:
195
218
  run.outputPortId = server.outputPortId
196
219
  run.server = server_name
197
220
 
198
- # 5. check server is supported type
199
- # 6. check server credentials are complete
221
+ # TODO check server is supported type for nicer error messages
222
+
223
+ # TODO check server credentials are complete for nicer error messages
224
+
200
225
  if server.format == "json" and server.type != "kafka":
201
226
  check_jsonschema(run, data_contract, server)
227
+
202
228
  check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
203
229
 
204
230
  except DataContractException as e:
@@ -275,7 +301,9 @@ class DataContract:
275
301
  inline_quality=self._inline_quality,
276
302
  )
277
303
 
278
- def export(self, export_format, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto") -> str:
304
+ def export(
305
+ self, export_format: ExportFormat, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto"
306
+ ) -> str:
279
307
  data_contract = resolve.resolve_data_contract(
280
308
  self._data_contract_file,
281
309
  self._data_contract_str,
@@ -422,6 +450,7 @@ class DataContract:
422
450
  self,
423
451
  format: str,
424
452
  source: typing.Optional[str] = None,
453
+ glue_tables: typing.Optional[typing.List[str]] = None,
425
454
  bigquery_tables: typing.Optional[typing.List[str]] = None,
426
455
  bigquery_project: typing.Optional[str] = None,
427
456
  bigquery_dataset: typing.Optional[str] = None,
@@ -433,7 +462,7 @@ class DataContract:
433
462
  elif format == "avro":
434
463
  data_contract_specification = import_avro(data_contract_specification, source)
435
464
  elif format == "glue":
436
- data_contract_specification = import_glue(data_contract_specification, source)
465
+ data_contract_specification = import_glue(data_contract_specification, source, glue_tables)
437
466
  elif format == "jsonschema":
438
467
  data_contract_specification = import_jsonschema(data_contract_specification, source)
439
468
  elif format == "bigquery":
@@ -64,6 +64,15 @@ def check_soda_execute(
64
64
  soda_configuration_str = to_databricks_soda_configuration(server)
65
65
  scan.add_configuration_yaml_str(soda_configuration_str)
66
66
  scan.set_data_source_name(server.type)
67
+ elif server.type == "dataframe":
68
+ if spark is None:
69
+ run.log_warn("Server type dataframe only works with the Python library and requires a Spark session, "
70
+ "please provide one with the DataContract class")
71
+ return
72
+ else:
73
+ logging.info("Use Spark to connect to data source")
74
+ scan.add_spark_session(spark, data_source_name="datacontract-cli")
75
+ scan.set_data_source_name("datacontract-cli")
67
76
  elif server.type == "kafka":
68
77
  if spark is None:
69
78
  spark = create_spark_session(tmp_dir)
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  from pyspark.sql import SparkSession
3
4
  from pyspark.sql.functions import col, expr, from_json
@@ -44,6 +45,8 @@ def create_spark_session(tmp_dir: str) -> SparkSession:
44
45
 
45
46
  def read_kafka_topic(spark: SparkSession, data_contract: DataContractSpecification, server: Server, tmp_dir):
46
47
  """Read and process data from a Kafka topic based on the server configuration."""
48
+
49
+ logging.info("Reading data from Kafka server %s topic %s", server.host, server.topic)
47
50
  df = (
48
51
  spark.read.format("kafka")
49
52
  .options(**get_auth_options())
@@ -34,13 +34,8 @@ def to_avro_field(field, field_name):
34
34
  if field.description is not None:
35
35
  avro_field["doc"] = field.description
36
36
  avro_field["type"] = to_avro_type(field, field_name)
37
- # add logical type definitions for any of the date type fields
38
- if field.type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]:
39
- avro_field["logicalType"] = to_avro_logical_type(field.type)
40
37
 
41
38
  if field.config:
42
- if "avroLogicalType" in field.config:
43
- avro_field["logicalType"] = field.config["avroLogicalType"]
44
39
  if "avroDefault" in field.config:
45
40
  avro_field["default"] = field.config["avroDefault"]
46
41
 
@@ -48,6 +43,17 @@ def to_avro_field(field, field_name):
48
43
 
49
44
 
50
45
  def to_avro_type(field: Field, field_name: str) -> str | dict:
46
+ if field.config:
47
+ if "avroLogicalType" in field.config and "avroType" in field.config:
48
+ return {"type": field.config["avroType"], "logicalType": field.config["avroLogicalType"]}
49
+ if "avroLogicalType" in field.config:
50
+ if field.config["avroLogicalType"] in ["timestamp-millis", "timestamp-micros", "local-timestamp-millis", "local-timestamp-micros", "time-micros"]:
51
+ return {"type": "long", "logicalType": field.config["avroLogicalType"]}
52
+ if field.config["avroLogicalType"] in ["time-millis", "date"]:
53
+ return {"type": "int", "logicalType": field.config["avroLogicalType"]}
54
+ if "avroType" in field.config:
55
+ return field.config["avroLogicalType"]
56
+
51
57
  if field.type is None:
52
58
  return "null"
53
59
  if field.type in ["string", "varchar", "text"]:
@@ -64,11 +70,11 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
64
70
  elif field.type in ["boolean"]:
65
71
  return "boolean"
66
72
  elif field.type in ["timestamp", "timestamp_tz"]:
67
- return "long"
73
+ return {"type": "long", "logicalType": "timestamp-millis"}
68
74
  elif field.type in ["timestamp_ntz"]:
69
- return "long"
75
+ return {"type": "long", "logicalType": "local-timestamp-millis"}
70
76
  elif field.type in ["date"]:
71
- return "int"
77
+ return {"type": "int", "logicalType": "date"}
72
78
  elif field.type in ["time"]:
73
79
  return "long"
74
80
  elif field.type in ["object", "record", "struct"]:
@@ -82,14 +88,3 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
82
88
  return "null"
83
89
  else:
84
90
  return "bytes"
85
-
86
-
87
- def to_avro_logical_type(type: str) -> str:
88
- if type in ["timestamp", "timestamp_tz"]:
89
- return "timestamp-millis"
90
- elif type in ["timestamp_ntz"]:
91
- return "local-timestamp-millis"
92
- elif type in ["date"]:
93
- return "date"
94
- else:
95
- return ""
@@ -107,7 +107,7 @@ def get_glue_table_schema(database_name: str, table_name: str):
107
107
  return table_schema
108
108
 
109
109
 
110
- def import_glue(data_contract_specification: DataContractSpecification, source: str):
110
+ def import_glue(data_contract_specification: DataContractSpecification, source: str, table_names: List[str]):
111
111
  """Import the schema of a Glue database."""
112
112
 
113
113
  catalogid, location_uri = get_glue_database(source)
@@ -116,13 +116,14 @@ def import_glue(data_contract_specification: DataContractSpecification, source:
116
116
  if catalogid is None:
117
117
  return data_contract_specification
118
118
 
119
- tables = get_glue_tables(source)
119
+ if table_names is None:
120
+ table_names = get_glue_tables(source)
120
121
 
121
122
  data_contract_specification.servers = {
122
123
  "production": Server(type="glue", account=catalogid, database=source, location=location_uri),
123
124
  }
124
125
 
125
- for table_name in tables:
126
+ for table_name in table_names:
126
127
  if data_contract_specification.models is None:
127
128
  data_contract_specification.models = {}
128
129
 
datacontract/web.py CHANGED
@@ -1,8 +1,10 @@
1
- from typing import Annotated, Union
1
+ from typing import Annotated, Union, Optional
2
2
 
3
+ import typer
3
4
  from fastapi import FastAPI, File
4
5
 
5
- from datacontract.data_contract import DataContract
6
+ from datacontract.data_contract import DataContract, ExportFormat
7
+ from fastapi.responses import PlainTextResponse
6
8
 
7
9
  app = FastAPI()
8
10
 
@@ -12,3 +14,38 @@ def lint(file: Annotated[bytes, File()], linters: Union[str, set[str]] = "all"):
12
14
  data_contract = DataContract(data_contract_str=str(file, encoding="utf-8"))
13
15
  lint_result = data_contract.lint(enabled_linters=linters)
14
16
  return {"result": lint_result.result, "checks": lint_result.checks}
17
+
18
+
19
+ @app.post("/export", response_class=PlainTextResponse)
20
+ def export(
21
+ file: Annotated[bytes, File()],
22
+ export_format: Annotated[ExportFormat, typer.Option(help="The export format.")],
23
+ server: Annotated[str, typer.Option(help="The server name to export.")] = None,
24
+ model: Annotated[
25
+ str,
26
+ typer.Option(
27
+ help="Use the key of the model in the data contract yaml file "
28
+ "to refer to a model, e.g., `orders`, or `all` for all "
29
+ "models (default)."
30
+ ),
31
+ ] = "all",
32
+ rdf_base: Annotated[
33
+ Optional[str],
34
+ typer.Option(help="[rdf] The base URI used to generate the RDF graph.", rich_help_panel="RDF Options"),
35
+ ] = None,
36
+ sql_server_type: Annotated[
37
+ Optional[str],
38
+ typer.Option(
39
+ help="[sql] The server type to determine the sql dialect. By default, it uses 'auto' to automatically detect the sql dialect via the specified servers in the data contract.",
40
+ rich_help_panel="SQL Options",
41
+ ),
42
+ ] = "auto",
43
+ ):
44
+ result = DataContract(data_contract_str=str(file, encoding="utf-8"), server=server).export(
45
+ export_format=export_format,
46
+ model=model,
47
+ rdf_base=rdf_base,
48
+ sql_server_type=sql_server_type,
49
+ )
50
+
51
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datacontract-cli
3
- Version: 0.10.5
3
+ Version: 0.10.7
4
4
  Summary: Test data contracts
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
6
6
  Project-URL: Homepage, https://cli.datacontract.com
@@ -49,10 +49,11 @@ Requires-Dist: pytest ; extra == 'dev'
49
49
  Requires-Dist: pytest-xdist ; extra == 'dev'
50
50
  Requires-Dist: moto ; extra == 'dev'
51
51
  Requires-Dist: pymssql ==2.3.0 ; extra == 'dev'
52
- Requires-Dist: testcontainers <4.0.0 ; extra == 'dev'
53
- Requires-Dist: testcontainers-minio ; extra == 'dev'
54
- Requires-Dist: testcontainers-postgres ; extra == 'dev'
55
- Requires-Dist: testcontainers-kafka ; extra == 'dev'
52
+ Requires-Dist: kafka-python ; extra == 'dev'
53
+ Requires-Dist: testcontainers ~=4.5.0 ; extra == 'dev'
54
+ Requires-Dist: testcontainers[minio] ; extra == 'dev'
55
+ Requires-Dist: testcontainers[postgres] ; extra == 'dev'
56
+ Requires-Dist: testcontainers[kafka] ; extra == 'dev'
56
57
  Requires-Dist: testcontainers[mssql] ; extra == 'dev'
57
58
 
58
59
  # Data Contract CLI
@@ -333,8 +334,10 @@ Supported server types:
333
334
  - [s3](#S3)
334
335
  - [bigquery](#bigquery)
335
336
  - [azure](#azure)
337
+ - [sqlserver](#sqlserver)
336
338
  - [databricks](#databricks)
337
339
  - [databricks (programmatic)](#databricks-programmatic)
340
+ - [dataframr (programmatic)](#dataframe-programmatic)
338
341
  - [snowflake](#snowflake)
339
342
  - [kafka](#kafka)
340
343
  - [postgres](#postgres)
@@ -448,6 +451,43 @@ Authentication works with an Azure Service Principal (SPN) aka App Registration
448
451
 
449
452
 
450
453
 
454
+ ### Sqlserver
455
+
456
+ Data Contract CLI can test data in MS SQL Server (including Azure SQL, Synapse Analytics SQL Pool).
457
+
458
+ #### Example
459
+
460
+ datacontract.yaml
461
+ ```yaml
462
+ servers:
463
+ production:
464
+ type: sqlserver
465
+ host: localhost
466
+ port: 5432
467
+ database: tempdb
468
+ schema: dbo
469
+ driver: ODBC Driver 18 for SQL Server
470
+ models:
471
+ my_table_1: # corresponds to a table
472
+ type: table
473
+ fields:
474
+ my_column_1: # corresponds to a column
475
+ type: varchar
476
+ ```
477
+
478
+ #### Environment Variables
479
+
480
+ | Environment Variable | Example | Description |
481
+ |----------------------------------|--------------------|-------------|
482
+ | `DATACONTRACT_SQLSERVER_USERNAME` | `root` | Username |
483
+ | `DATACONTRACT_SQLSERVER_PASSWORD` | `toor` | Password |
484
+ | `DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION` | `True` | Use windows authentication, instead of login |
485
+ | `DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE` | `True` | Trust self-signed certificate |
486
+ | `DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION` | `True` | Use SSL |
487
+
488
+
489
+
490
+
451
491
  ### Databricks
452
492
 
453
493
  Works with Unity Catalog and Hive metastore.
@@ -516,6 +556,41 @@ run = data_contract.test()
516
556
  run.result
517
557
  ```
518
558
 
559
+ ### Dataframe (programmatic)
560
+
561
+ Works with Spark DataFrames.
562
+ DataFrames need to be created as named temporary views.
563
+ Multiple temporary views are suppored if your data contract contains multiple models.
564
+
565
+ Testing DataFrames is useful to test your datasets in a pipeline before writing them to a data source.
566
+
567
+ #### Example
568
+
569
+ datacontract.yaml
570
+ ```yaml
571
+ servers:
572
+ production:
573
+ type: dataframe
574
+ models:
575
+ my_table: # corresponds to a temporary view
576
+ type: table
577
+ fields: ...
578
+ ```
579
+
580
+ Example code
581
+ ```python
582
+ from datacontract.data_contract import DataContract
583
+
584
+ df.createOrReplaceTempView("my_table")
585
+
586
+ data_contract = DataContract(
587
+ data_contract_file="datacontract.yaml",
588
+ spark=spark,
589
+ )
590
+ run = data_contract.test()
591
+ assert run.result == "passed"
592
+ ```
593
+
519
594
 
520
595
  ### Snowflake
521
596
 
@@ -608,41 +683,6 @@ models:
608
683
 
609
684
 
610
685
 
611
- ### Postgres
612
-
613
- Data Contract CLI can test data in Postgres or Postgres-compliant databases (e.g., RisingWave).
614
-
615
- #### Example
616
-
617
- datacontract.yaml
618
- ```yaml
619
- servers:
620
- postgres:
621
- type: sqlserver
622
- host: localhost
623
- port: 5432
624
- database: tempdb
625
- schema: dbo
626
- driver: ODBC Driver 18 for SQL Server
627
- models:
628
- my_table_1: # corresponds to a table
629
- type: table
630
- fields:
631
- my_column_1: # corresponds to a column
632
- type: varchar
633
- ```
634
-
635
- #### Environment Variables
636
-
637
- | Environment Variable | Example | Description |
638
- |----------------------------------|--------------------|-------------|
639
- | `DATACONTRACT_SQLSERVER_USERNAME` | `root` | Username |
640
- | `DATACONTRACT_SQLSERVER_PASSWORD` | `toor` | Password |
641
- | `DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION` | `True` | Use windows authentication, instead of login |
642
- | `DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE` | `True` | Trust self-signed certificate |
643
- | `DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION` | `True` | Use SSL |
644
-
645
-
646
686
 
647
687
  ### export
648
688
 
@@ -802,41 +842,30 @@ models:
802
842
  ```
803
843
  Usage: datacontract import [OPTIONS]
804
844
 
805
- Create a data contract from the given source location. Prints to stdout.
806
-
807
- ╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
808
- │ * --format [sql|avro|glue|bigquery|jsonschema] The format of the source file. [default: None] [required]
809
- │ --source TEXT The path to the file or Glue Database that should be imported. [default: None]
810
- --bigquery-project TEXT The bigquery project id. [default: None]
811
- │ --bigquery-dataset TEXT The bigquery dataset id. [default: None]
812
- --bigquery-table TEXT List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all
813
- tables in the dataset).
814
- [default: None]
815
- │ --help Show this message and exit.
816
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
845
+ Create a data contract from the given source location. Prints to stdout.
846
+
847
+ ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
848
+ │ * --format [sql|avro|glue|bigquery|jsonschema] The format of the source file. [default: None] [required]
849
+ │ --source TEXT The path to the file or Glue Database that should be imported.
850
+ [default: None]
851
+ │ --glue-table TEXT List of table ids to import from the Glue Database (repeat for
852
+ multiple table ids, leave empty for all tables in the dataset).
853
+ [default: None]
854
+ --bigquery-project TEXT The bigquery project id. [default: None]
855
+ │ --bigquery-dataset TEXT The bigquery dataset id. [default: None]
856
+ │ --bigquery-table TEXT List of table ids to import from the bigquery API (repeat for │
857
+ │ multiple table ids, leave empty for all tables in the dataset). │
858
+ │ [default: None] │
859
+ │ --help Show this message and exit. │
860
+ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
817
861
  ```
818
862
 
819
- As shown, some options are only relevant in certain conditions: For `format` Bigtable we support to directly read off the Bigtable APIs.
820
- In this case there's no need to specify `source` but instead `bt-project-id`, `bt-dataset-id` and `table` must be specified.
821
-
822
- For providing authentication to the Client, please see [the google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to) or the one [about authorizing client libraries](https://cloud.google.com/bigquery/docs/authentication#client-libs).
823
-
824
863
  Example:
825
864
  ```bash
826
865
  # Example import from SQL DDL
827
866
  datacontract import --format sql --source my_ddl.sql
828
867
  ```
829
868
 
830
- ```bash
831
- # Example import from Bigquery JSON
832
- datacontract import --format bigquery --source my_bigquery_table.json
833
- ```
834
-
835
- ```bash
836
- # Example import from Bigquery API
837
- datacontract import --format bigquery --btProjectId <project_id> --btDatasetId <dataset_id> --table <tableid_1> --table <tableid_2> --table <tableid_3>
838
- ```
839
-
840
869
  Available import options:
841
870
 
842
871
  | Type | Description | Status |
@@ -852,6 +881,49 @@ Available import options:
852
881
  | Missing something? | Please create an issue on GitHub | TBD |
853
882
 
854
883
 
884
+ #### BigQuery
885
+
886
+ Bigquery data can either be imported off of JSON Files generated from the table descriptions or directly from the Bigquery API. In case you want to use JSON Files, specify the `source` parameter with a path to the JSON File.
887
+
888
+ To import from the Bigquery API, you have to _omit_ `source` and instead need to provide `bigquery-project` and `bigquery-dataset`. Additionally you may specify `bigquery-table` to enumerate the tables that should be imported. If no tables are given, _all_ available tables of the dataset will be imported.
889
+
890
+ For providing authentication to the Client, please see [the google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to) or the one [about authorizing client libraries](https://cloud.google.com/bigquery/docs/authentication#client-libs).
891
+
892
+ Examples:
893
+
894
+ ```bash
895
+ # Example import from Bigquery JSON
896
+ datacontract import --format bigquery --source my_bigquery_table.json
897
+ ```
898
+
899
+ ```bash
900
+ # Example import from Bigquery API with specifying the tables to import
901
+ datacontract import --format bigquery --bigquery-project <project_id> --bigquery-dataset <dataset_id> --bigquery-table <tableid_1> --bigquery-table <tableid_2> --bigquery-table <tableid_3>
902
+ ```
903
+
904
+ ```bash
905
+ # Example import from Bigquery API importing all tables in the dataset
906
+ datacontract import --format bigquery --bigquery-project <project_id> --bigquery-dataset <dataset_id>
907
+ ```
908
+
909
+ ### Glue
910
+
911
+ Importing from Glue reads the necessary Data directly off of the AWS API.
912
+ You may give the `glue-table` parameter to enumerate the tables that should be imported. If no tables are given, _all_ available tables of the database will be imported.
913
+
914
+ Examples:
915
+
916
+ ```bash
917
+ # Example import from AWS Glue with specifying the tables to import
918
+ datacontract import --format glue --source <database_name> --glue-table <table_name_1> --glue-table <table_name_2> --glue-table <table_name_3>
919
+ ```
920
+
921
+ ```bash
922
+ # Example import from AWS Glue importing all tables in the database
923
+ datacontract import --format glue --source <database_name>
924
+ ```
925
+
926
+
855
927
  ### breaking
856
928
 
857
929
  ```
@@ -1,8 +1,8 @@
1
1
  datacontract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- datacontract/cli.py,sha256=6_MklywEJvibmrn6v0XxQ3MvTpHdMwt9C0nFosNVXu8,13001
3
- datacontract/data_contract.py,sha256=oDrfiozDxKpbwKIrhxufIjv6jgAUcjXYP6_sDudO08I,20752
2
+ datacontract/cli.py,sha256=-PFT-P03aiT5RlodXPGBKVqz2etlnnzMLoNqDrsOXKE,12767
3
+ datacontract/data_contract.py,sha256=bsvf-_nSFKStcPPdKkSprD4KUecON4EBIk2dvwbkxWw,21444
4
4
  datacontract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- datacontract/web.py,sha256=SWglmbqy3NV5h4VDsG0OpwhASJT9uve7w6FPwkBjIYM,457
5
+ datacontract/web.py,sha256=toFF9L3ueToMIa5v0BjS-PyI-w1htNlIBV8O8QLrigM,1834
6
6
  datacontract/breaking/breaking.py,sha256=l0ZwUX8G4QK_0nLRLJHArpwgTWTD7WQYMuAp3l_y1bY,12184
7
7
  datacontract/breaking/breaking_rules.py,sha256=Qj7XbsyD-BynNwhrNiETTk7_Hs_tHrtrZmSU634zEJI,3157
8
8
  datacontract/catalog/catalog.py,sha256=GMBGZPVo5lGZQDmdWCwWb5-upXlUz6R6eTMg9xI9a3s,2640
@@ -13,16 +13,16 @@ datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py,sha256
13
13
  datacontract/engines/fastjsonschema/check_jsonschema.py,sha256=rXGfGDu9-RJomj9WcVe8vEfluR25vn2rOS7BeOVQ0XA,5748
14
14
  datacontract/engines/fastjsonschema/s3/s3_read_files.py,sha256=iupiyqBa1dzgT2BtVGna-BjC5rqe6MTLs2QRp8GTs7M,665
15
15
  datacontract/engines/soda/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- datacontract/engines/soda/check_soda_execute.py,sha256=f0TEo9MVT_l62lrGnQUfi9b-V1GV_1ctfD_RpAU4IAM,6969
16
+ datacontract/engines/soda/check_soda_execute.py,sha256=zGIIkT-m65kP9_eQauHwfri1Poos6DkNx9Lr0_k6pu8,7456
17
17
  datacontract/engines/soda/connections/bigquery.py,sha256=Ao0KaJe4R28auU_4umxvVaLB6ZHEbKaNoYZ-RfAUmeo,662
18
18
  datacontract/engines/soda/connections/dask.py,sha256=Yy6Et2n_vDVsdjtqyBWDSZt7mnjPzPk_MZ-92VZHfnY,1496
19
19
  datacontract/engines/soda/connections/databricks.py,sha256=lpMju-o_TzLZeF0EEVwePPr8JahqvFnj5xRYjF15fc8,561
20
20
  datacontract/engines/soda/connections/duckdb.py,sha256=2wzUWnK7CLi7EJTT2Mh8Arv2pg6XGToe_9DdvLu0cNY,5585
21
- datacontract/engines/soda/connections/kafka.py,sha256=TZGJIDcyqeIXKYZomHRc9gIJMoV-etD0ewVfKYQbNMU,5529
21
+ datacontract/engines/soda/connections/kafka.py,sha256=vh7z-4ZsmpXiYcogf3oTkagrAPcq6HG2SccnxNwFeVQ,5635
22
22
  datacontract/engines/soda/connections/postgres.py,sha256=9GTF4Es3M5vb7ocSGqAxXmslvkS5CjsPQGIuo020CFc,626
23
23
  datacontract/engines/soda/connections/snowflake.py,sha256=y1t2a1DWY4_tr5k-X5_nhLE6v1rfCwTahzhtHR91x9A,719
24
24
  datacontract/engines/soda/connections/sqlserver.py,sha256=RzGLbCUdRyfmDcqtM_AB9WZ-Xk-XYX91nkXpVNpYbvc,1440
25
- datacontract/export/avro_converter.py,sha256=-Uqv-8vNOHJhk_38QSt6OH2hT-XY3GrIqs1X1iqFQlQ,3055
25
+ datacontract/export/avro_converter.py,sha256=KC4TLeacHSXDeEPGEuUO8wvMH5cwoSOLuep5VCBNmww,3320
26
26
  datacontract/export/avro_idl_converter.py,sha256=_2acoImuBqNqEt97OpBSewWT_w3aDBOdNosuy0gbkSY,9576
27
27
  datacontract/export/bigquery_converter.py,sha256=XSVX7aVqyhBrOI-_BiPz9gtZXoT6wd5XucHaoJfWOCo,3802
28
28
  datacontract/export/csv_type_converter.py,sha256=ZZuJwBgQnafZC7PPvAXsBf2IajPJq8TYZ1l8Qq0GYeI,1290
@@ -42,7 +42,7 @@ datacontract/export/sql_type_converter.py,sha256=DSIyBhRxU-Jo8NihwozE9Q_CZauBCoY
42
42
  datacontract/export/terraform_converter.py,sha256=-xIIspVrvCyB2AVf1vd7bVGkWI3iiMUHX1btM_o1h-g,1943
43
43
  datacontract/imports/avro_importer.py,sha256=3QTnGNps-g1dxnJjLOLr8vk64jRNebHgN1EHrdcMiXc,5559
44
44
  datacontract/imports/bigquery_importer.py,sha256=HLotmmwCSe2sGBCI57gPQa3WyeTHA8h1yaDxp25TtLQ,6802
45
- datacontract/imports/glue_importer.py,sha256=9wvX5jE6hPYNW5eoqcFIjySb41q-vbUW-j9XHYKN18o,5265
45
+ datacontract/imports/glue_importer.py,sha256=HpS5E7774YqjF4hUItgtb5NVwA6OTwwjQbf-IyEejS8,5331
46
46
  datacontract/imports/jsonschema_importer.py,sha256=f_x8DbWo423b6xcSIvY7jbk_rs2FM4lMMF75fSRE3sk,6329
47
47
  datacontract/imports/sql_importer.py,sha256=tCSayA0YK_zr_R_KElfz0GOQwP0Tzz3TdBHAICnPN84,2419
48
48
  datacontract/init/download_datacontract_file.py,sha256=pj_4mhWKlEtfueWohDgkb1nyuG5ERDipUDszxKwpZUs,413
@@ -76,9 +76,9 @@ datacontract/templates/partials/example.html,sha256=F1dWbHDIXQScgfs4OVgqM1lR4uV4
76
76
  datacontract/templates/partials/model_field.html,sha256=Xy8fauErx61HuTSPLLQHgAHfX3_ilRbE0c17-nX9jAY,5072
77
77
  datacontract/templates/partials/server.html,sha256=CINXVsdVAzDzrABtBOw4lD2qBUWb8kwcOsgZi_y8ZeU,4505
78
78
  datacontract/templates/style/output.css,sha256=lfmd1Kmrtq8F5entx2o-yy9XOAZN3WkWMKNvbEtSO1k,24999
79
- datacontract_cli-0.10.5.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
80
- datacontract_cli-0.10.5.dist-info/METADATA,sha256=s34oy_BbtKQVz0sdTPf6JSag2H7y5QYqKRyUaRV3fD4,68111
81
- datacontract_cli-0.10.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
82
- datacontract_cli-0.10.5.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
83
- datacontract_cli-0.10.5.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
84
- datacontract_cli-0.10.5.dist-info/RECORD,,
79
+ datacontract_cli-0.10.7.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
80
+ datacontract_cli-0.10.7.dist-info/METADATA,sha256=-S9oqCD7uZWLX30vxB6LThbZo8Ocv3hlEWUbTa_CBMI,70579
81
+ datacontract_cli-0.10.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
82
+ datacontract_cli-0.10.7.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
83
+ datacontract_cli-0.10.7.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
84
+ datacontract_cli-0.10.7.dist-info/RECORD,,