datacontract-cli 0.9.7__py3-none-any.whl → 0.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (62) hide show
  1. datacontract/breaking/breaking.py +48 -57
  2. datacontract/cli.py +100 -80
  3. datacontract/data_contract.py +178 -128
  4. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
  5. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
  6. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
  8. datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
  9. datacontract/engines/soda/check_soda_execute.py +58 -36
  10. datacontract/engines/soda/connections/bigquery.py +5 -3
  11. datacontract/engines/soda/connections/dask.py +0 -1
  12. datacontract/engines/soda/connections/databricks.py +2 -2
  13. datacontract/engines/soda/connections/duckdb.py +25 -8
  14. datacontract/engines/soda/connections/kafka.py +36 -17
  15. datacontract/engines/soda/connections/postgres.py +3 -3
  16. datacontract/engines/soda/connections/snowflake.py +4 -4
  17. datacontract/export/avro_converter.py +9 -11
  18. datacontract/export/avro_idl_converter.py +65 -42
  19. datacontract/export/csv_type_converter.py +36 -0
  20. datacontract/export/dbt_converter.py +43 -32
  21. datacontract/export/great_expectations_converter.py +141 -0
  22. datacontract/export/html_export.py +46 -0
  23. datacontract/export/jsonschema_converter.py +3 -1
  24. datacontract/export/odcs_converter.py +5 -7
  25. datacontract/export/protobuf_converter.py +12 -10
  26. datacontract/export/pydantic_converter.py +131 -0
  27. datacontract/export/rdf_converter.py +34 -11
  28. datacontract/export/sodacl_converter.py +118 -21
  29. datacontract/export/sql_converter.py +30 -8
  30. datacontract/export/sql_type_converter.py +44 -4
  31. datacontract/export/terraform_converter.py +4 -3
  32. datacontract/imports/avro_importer.py +65 -18
  33. datacontract/imports/sql_importer.py +0 -2
  34. datacontract/init/download_datacontract_file.py +2 -2
  35. datacontract/integration/publish_datamesh_manager.py +6 -12
  36. datacontract/integration/publish_opentelemetry.py +30 -16
  37. datacontract/lint/files.py +2 -2
  38. datacontract/lint/lint.py +26 -31
  39. datacontract/lint/linters/description_linter.py +12 -21
  40. datacontract/lint/linters/example_model_linter.py +28 -29
  41. datacontract/lint/linters/field_pattern_linter.py +8 -8
  42. datacontract/lint/linters/field_reference_linter.py +11 -10
  43. datacontract/lint/linters/notice_period_linter.py +18 -22
  44. datacontract/lint/linters/quality_schema_linter.py +16 -20
  45. datacontract/lint/linters/valid_constraints_linter.py +42 -37
  46. datacontract/lint/resolve.py +50 -14
  47. datacontract/lint/schema.py +2 -3
  48. datacontract/lint/urls.py +4 -5
  49. datacontract/model/breaking_change.py +2 -1
  50. datacontract/model/data_contract_specification.py +8 -7
  51. datacontract/model/exceptions.py +13 -2
  52. datacontract/model/run.py +3 -2
  53. datacontract/web.py +3 -7
  54. datacontract_cli-0.9.9.dist-info/METADATA +951 -0
  55. datacontract_cli-0.9.9.dist-info/RECORD +64 -0
  56. datacontract/lint/linters/primary_field_linter.py +0 -30
  57. datacontract_cli-0.9.7.dist-info/METADATA +0 -603
  58. datacontract_cli-0.9.7.dist-info/RECORD +0 -61
  59. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/LICENSE +0 -0
  60. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/WHEEL +0 -0
  61. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/entry_points.txt +0 -0
  62. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ def validate_json_stream(model_name, validate, json_stream):
25
25
  model=model_name,
26
26
  reason=e.message,
27
27
  engine="jsonschema",
28
- original_exception=e
28
+ original_exception=e,
29
29
  )
30
30
 
31
31
 
@@ -79,16 +79,16 @@ def process_local_file(run, server, model_name, validate):
79
79
  return process_directory(run, path, server, model_name, validate)
80
80
  else:
81
81
  logging.info(f"Processing file {path}")
82
- with open(path, 'r') as file:
82
+ with open(path, "r") as file:
83
83
  process_json_file(run, model_name, validate, file, server.delimiter)
84
84
 
85
85
 
86
86
  def process_directory(run, path, server, model_name, validate):
87
87
  success = True
88
88
  for filename in os.listdir(path):
89
- if filename.endswith('.json'): # or make this a parameter
89
+ if filename.endswith(".json"): # or make this a parameter
90
90
  file_path = os.path.join(path, filename)
91
- with open(file_path, 'r') as file:
91
+ with open(file_path, "r") as file:
92
92
  if not process_json_file(run, model_name, validate, file, server.delimiter):
93
93
  success = False
94
94
  break
@@ -127,13 +127,15 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
127
127
 
128
128
  # Early exit conditions
129
129
  if server.format != "json":
130
- run.checks.append(Check(
131
- type="schema",
132
- name="Check that JSON has valid schema",
133
- result="warning",
134
- reason="Server format is not 'json'. Skip validating jsonschema.",
135
- engine="jsonschema",
136
- ))
130
+ run.checks.append(
131
+ Check(
132
+ type="schema",
133
+ name="Check that JSON has valid schema",
134
+ result="warning",
135
+ reason="Server format is not 'json'. Skip validating jsonschema.",
136
+ engine="jsonschema",
137
+ )
138
+ )
137
139
  run.log_warn("jsonschema: Server format is not 'json'. Skip jsonschema checks.")
138
140
  return
139
141
 
@@ -155,21 +157,25 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
155
157
  elif server.type == "s3":
156
158
  process_s3_file(server, model_name, validate)
157
159
  else:
158
- run.checks.append(Check(
160
+ run.checks.append(
161
+ Check(
162
+ type="schema",
163
+ name="Check that JSON has valid schema",
164
+ model=model_name,
165
+ result="warn",
166
+ reason=f"Server type {server.type} not supported",
167
+ engine="jsonschema",
168
+ )
169
+ )
170
+ return
171
+
172
+ run.checks.append(
173
+ Check(
159
174
  type="schema",
160
175
  name="Check that JSON has valid schema",
161
176
  model=model_name,
162
- result="warn",
163
- reason=f"Server type {server.type} not supported",
177
+ result="passed",
178
+ reason="All JSON entries are valid.",
164
179
  engine="jsonschema",
165
- ))
166
- return
167
-
168
- run.checks.append(Check(
169
- type="schema",
170
- name="Check that JSON has valid schema",
171
- model=model_name,
172
- result="passed",
173
- reason="All JSON entries are valid.",
174
- engine="jsonschema",
175
- ))
180
+ )
181
+ )
@@ -14,9 +14,11 @@ def yield_s3_files(s3_endpoint_url, s3_location):
14
14
 
15
15
 
16
16
  def s3_fs(s3_endpoint_url):
17
- aws_access_key_id = os.getenv('DATACONTRACT_S3_ACCESS_KEY_ID')
18
- aws_secret_access_key = os.getenv('DATACONTRACT_S3_SECRET_ACCESS_KEY')
19
- return s3fs.S3FileSystem(key=aws_access_key_id,
20
- secret=aws_secret_access_key,
21
- anon=aws_access_key_id is None,
22
- client_kwargs={'endpoint_url': s3_endpoint_url})
17
+ aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
18
+ aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
19
+ return s3fs.S3FileSystem(
20
+ key=aws_access_key_id,
21
+ secret=aws_secret_access_key,
22
+ anon=aws_access_key_id is None,
23
+ client_kwargs={"endpoint_url": s3_endpoint_url},
24
+ )
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
 
3
+ from pyspark.sql import SparkSession
3
4
  from soda.scan import Scan
4
5
 
5
6
  from datacontract.engines.soda.connections.bigquery import \
@@ -16,11 +17,12 @@ from datacontract.engines.soda.connections.snowflake import \
16
17
  from datacontract.export.sodacl_converter import to_sodacl_yaml
17
18
  from datacontract.model.data_contract_specification import \
18
19
  DataContractSpecification, Server
19
- from datacontract.model.run import \
20
- Run, Check, Log
20
+ from datacontract.model.run import Run, Check, Log
21
21
 
22
22
 
23
- def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
23
+ def check_soda_execute(
24
+ run: Run, data_contract: DataContractSpecification, server: Server, spark: SparkSession, tmp_dir
25
+ ):
24
26
  if data_contract is None:
25
27
  run.log_warn("Cannot run engine soda-core, as data contract is invalid")
26
28
  return
@@ -34,13 +36,15 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
34
36
  scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
35
37
  scan.set_data_source_name(server.type)
36
38
  else:
37
- run.checks.append(Check(
38
- type="general",
39
- name="Check that format is supported",
40
- result="warning",
41
- reason=f"Format {server.format} not yet supported by datacontract CLI",
42
- engine="datacontract",
43
- ))
39
+ run.checks.append(
40
+ Check(
41
+ type="general",
42
+ name="Check that format is supported",
43
+ result="warning",
44
+ reason=f"Format {server.format} not yet supported by datacontract CLI",
45
+ engine="datacontract",
46
+ )
47
+ )
44
48
  run.log_warn(f"Format {server.format} not yet supported by datacontract CLI")
45
49
  return
46
50
  elif server.type == "snowflake":
@@ -73,21 +77,25 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
73
77
  scan.set_data_source_name(server.type)
74
78
 
75
79
  else:
76
- run.checks.append(Check(
77
- type="general",
78
- name="Check that server type is supported",
79
- result="warning",
80
- reason=f"Server type {server.type} not yet supported by datacontract CLI",
81
- engine="datacontract-cli",
82
- ))
80
+ run.checks.append(
81
+ Check(
82
+ type="general",
83
+ name="Check that server type is supported",
84
+ result="warning",
85
+ reason=f"Server type {server.type} not yet supported by datacontract CLI",
86
+ engine="datacontract-cli",
87
+ )
88
+ )
83
89
  run.log_warn(f"Server type {server.type} not yet supported by datacontract CLI")
84
90
  return
85
91
 
86
92
  # Don't check types for json format, as they are checked with json schema
87
93
  # Don't check types for avro format, as they are checked with avro schema
88
94
  # Don't check types for csv format, as they are hard to detect
95
+ server_type = server.type
89
96
  check_types = server.format != "json" and server.format != "csv" and server.format != "avro"
90
- sodacl_yaml_str = to_sodacl_yaml(data_contract, check_types)
97
+
98
+ sodacl_yaml_str = to_sodacl_yaml(data_contract, server_type, check_types)
91
99
  # print("sodacl_yaml_str:\n" + sodacl_yaml_str)
92
100
  scan.add_sodacl_yaml_str(sodacl_yaml_str)
93
101
 
@@ -102,46 +110,60 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
102
110
  for c in scan_results.get("checks"):
103
111
  check = Check(
104
112
  type="schema",
105
- result="passed" if c.get("outcome") == "pass" else "failed" if c.get("outcome") == "fail" else c.get(
106
- "outcome"),
107
- reason=', '.join(c.get("outcomeReasons")),
113
+ result=to_result(c),
114
+ reason=", ".join(c.get("outcomeReasons")),
108
115
  name=c.get("name"),
109
116
  model=c.get("table"),
110
117
  field=c.get("column"),
111
118
  engine="soda-core",
119
+ diagnostics=c.get("diagnostics"),
112
120
  )
113
121
  update_reason(check, c)
114
122
  run.checks.append(check)
115
123
 
116
124
  for log in scan_results.get("logs"):
117
- run.logs.append(Log(
118
- timestamp=log.get("timestamp"),
119
- level=log.get("level"),
120
- message=log.get("message"),
121
- ))
125
+ run.logs.append(
126
+ Log(
127
+ timestamp=log.get("timestamp"),
128
+ level=log.get("level"),
129
+ message=log.get("message"),
130
+ )
131
+ )
122
132
 
123
133
  if scan.has_error_logs():
124
134
  run.log_warn("Engine soda-core has errors. See the logs for details.")
125
- run.checks.append(Check(
126
- type="general",
127
- name="Execute quality checks",
128
- result="warning",
129
- reason=f"Engine soda-core has errors. See the logs for details.",
130
- engine="soda-core",
131
- ))
135
+ run.checks.append(
136
+ Check(
137
+ type="general",
138
+ name="Execute quality checks",
139
+ result="warning",
140
+ reason="Engine soda-core has errors. See the logs for details.",
141
+ engine="soda-core",
142
+ )
143
+ )
132
144
  return
133
145
 
134
146
 
147
+ def to_result(c) -> str:
148
+ soda_outcome = c.get("outcome")
149
+ if soda_outcome == "pass":
150
+ return "passed"
151
+ elif soda_outcome == "fail":
152
+ return "failed"
153
+ else:
154
+ return soda_outcome
155
+
156
+
135
157
  def update_reason(check, c):
136
158
  """Try to find a reason in diagnostics"""
137
159
  if check.result == "passed":
138
160
  return
139
161
  if check.reason is not None and check.reason != "":
140
162
  return
141
- for block in c['diagnostics']['blocks']:
142
- if block['title'] == 'Diagnostics':
163
+ for block in c["diagnostics"]["blocks"]:
164
+ if block["title"] == "Diagnostics":
143
165
  # Extract and print the 'text' value
144
- diagnostics_text = block['text']
166
+ diagnostics_text = block["text"]
145
167
  print(diagnostics_text)
146
168
  diagnostics_text_split = diagnostics_text.split(":icon-fail: ")
147
169
  if len(diagnostics_text_split) > 1:
@@ -1,18 +1,20 @@
1
1
  import os
2
+
2
3
  import yaml
3
4
 
5
+
4
6
  # https://docs.soda.io/soda/connect-bigquery.html#authentication-methods
5
7
  def to_bigquery_soda_configuration(server):
6
8
  # with service account key, using an external json file
7
9
  soda_configuration = {
8
10
  f"data_source {server.type}": {
9
11
  "type": "bigquery",
10
- "account_info_json_path": os.getenv('DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH'),
12
+ "account_info_json_path": os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH"),
11
13
  "auth_scopes": ["https://www.googleapis.com/auth/bigquery"],
12
14
  "project_id": server.project,
13
- "dataset": server.dataset
15
+ "dataset": server.dataset,
14
16
  }
15
17
  }
16
18
 
17
19
  soda_configuration_str = yaml.dump(soda_configuration)
18
- return soda_configuration_str
20
+ return soda_configuration_str
@@ -1,4 +1,3 @@
1
-
2
1
  # def add_s3_connection_dask_json(data_contract, scan, server):
3
2
  # s3_access_key_id = os.getenv('DATACONTRACT_S3_ACCESS_KEY_ID')
4
3
  # s3_secret_access_key = os.getenv('DATACONTRACT_S3_SECRET_ACCESS_KEY')
@@ -11,8 +11,8 @@ def to_databricks_soda_configuration(server):
11
11
  "host": server.host,
12
12
  "catalog": server.catalog,
13
13
  "schema": server.schema_,
14
- "http_path": os.getenv('DATACONTRACT_DATABRICKS_HTTP_PATH'),
15
- "token": os.getenv('DATACONTRACT_DATABRICKS_TOKEN'),
14
+ "http_path": os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH"),
15
+ "token": os.getenv("DATACONTRACT_DATABRICKS_TOKEN"),
16
16
  }
17
17
  }
18
18
 
@@ -2,6 +2,7 @@ import logging
2
2
  import os
3
3
 
4
4
  import duckdb
5
+ from datacontract.export.csv_type_converter import convert_to_duckdb_csv_type
5
6
 
6
7
 
7
8
  def get_duckdb_connection(data_contract, server):
@@ -12,10 +13,10 @@ def get_duckdb_connection(data_contract, server):
12
13
  if server.type == "s3":
13
14
  path = server.location
14
15
  setup_s3_connection(con, server)
15
- for model_name in data_contract.models:
16
+ for model_name, model in data_contract.models.items():
16
17
  model_path = path
17
18
  if "{model}" in model_path:
18
- model_path = model_path.format(model = model_name)
19
+ model_path = model_path.format(model=model_name)
19
20
  logging.info(f"Creating table {model_name} for {model_path}")
20
21
 
21
22
  if server.format == "json":
@@ -32,16 +33,32 @@ def get_duckdb_connection(data_contract, server):
32
33
  CREATE VIEW "{model_name}" AS SELECT * FROM read_parquet('{model_path}', hive_partitioning=1);
33
34
  """)
34
35
  elif server.format == "csv":
35
- con.sql(f"""
36
- CREATE VIEW "{model_name}" AS SELECT * FROM read_csv_auto('{model_path}', hive_partitioning=1);
37
- """)
36
+ columns = to_csv_types(model)
37
+ if columns is None:
38
+ con.sql(
39
+ f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1);"""
40
+ )
41
+ else:
42
+ con.sql(
43
+ f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1, columns={columns});"""
44
+ )
38
45
  return con
39
46
 
40
47
 
48
+ def to_csv_types(model) -> dict:
49
+ if model is None:
50
+ return None
51
+ columns = {}
52
+ # ['SQLNULL', 'BOOLEAN', 'BIGINT', 'DOUBLE', 'TIME', 'DATE', 'TIMESTAMP', 'VARCHAR']
53
+ for field_name, field in model.fields.items():
54
+ columns[field_name] = convert_to_duckdb_csv_type(field)
55
+ return columns
56
+
57
+
41
58
  def setup_s3_connection(con, server):
42
- s3_region = os.getenv('DATACONTRACT_S3_REGION')
43
- s3_access_key_id = os.getenv('DATACONTRACT_S3_ACCESS_KEY_ID')
44
- s3_secret_access_key = os.getenv('DATACONTRACT_S3_SECRET_ACCESS_KEY')
59
+ s3_region = os.getenv("DATACONTRACT_S3_REGION")
60
+ s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
61
+ s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
45
62
  # con.install_extension("httpfs")
46
63
  # con.load_extension("httpfs")
47
64
  if server.endpointUrl is not None:
@@ -4,7 +4,23 @@ import pyspark.sql.functions as fn
4
4
  from pyspark.sql import SparkSession
5
5
  from pyspark.sql.avro.functions import from_avro
6
6
  from pyspark.sql.functions import from_json, col
7
- from pyspark.sql.types import *
7
+ from pyspark.sql.types import (
8
+ StructType,
9
+ DataType,
10
+ NullType,
11
+ ArrayType,
12
+ BinaryType,
13
+ DateType,
14
+ TimestampNTZType,
15
+ TimestampType,
16
+ BooleanType,
17
+ LongType,
18
+ IntegerType,
19
+ DoubleType,
20
+ DecimalType,
21
+ StringType,
22
+ StructField,
23
+ )
8
24
 
9
25
  from datacontract.export.avro_converter import to_avro_schema_json
10
26
  from datacontract.model.data_contract_specification import \
@@ -15,14 +31,18 @@ from datacontract.model.exceptions import DataContractException
15
31
  def create_spark_session(tmp_dir) -> SparkSession:
16
32
  # TODO: Update dependency versions when updating pyspark
17
33
  # TODO: add protobuf library
18
- spark = SparkSession.builder.appName("datacontract") \
19
- .config("spark.sql.warehouse.dir", tmp_dir + "/spark-warehouse") \
20
- .config("spark.streaming.stopGracefullyOnShutdown", True) \
21
- .config('spark.jars.packages',
22
- 'org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0,org.apache.spark:spark-avro_2.12:3.5.0') \
34
+ spark = (
35
+ SparkSession.builder.appName("datacontract")
36
+ .config("spark.sql.warehouse.dir", tmp_dir + "/spark-warehouse")
37
+ .config("spark.streaming.stopGracefullyOnShutdown", True)
38
+ .config(
39
+ "spark.jars.packages",
40
+ "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0,org.apache.spark:spark-avro_2.12:3.5.0",
41
+ )
23
42
  .getOrCreate()
43
+ )
24
44
  spark.sparkContext.setLogLevel("WARN")
25
- print(f'Using PySpark version {spark.version}')
45
+ print(f"Using PySpark version {spark.version}")
26
46
  return spark
27
47
 
28
48
 
@@ -32,14 +52,14 @@ def read_kafka_topic(spark: SparkSession, data_contract: DataContractSpecificati
32
52
  auth_options = get_auth_options()
33
53
 
34
54
  # read full kafka topic
35
- df = spark \
36
- .read \
37
- .format("kafka") \
38
- .options(**auth_options) \
39
- .option("kafka.bootstrap.servers", host) \
40
- .option("subscribe", topic) \
41
- .option("startingOffsets", "earliest") \
55
+ df = (
56
+ spark.read.format("kafka")
57
+ .options(**auth_options)
58
+ .option("kafka.bootstrap.servers", host)
59
+ .option("subscribe", topic)
60
+ .option("startingOffsets", "earliest")
42
61
  .load()
62
+ )
43
63
  # TODO a warning if none or multiple models
44
64
  model_name, model = next(iter(data_contract.models.items()))
45
65
  if server.format == "avro":
@@ -73,8 +93,8 @@ def read_kafka_topic(spark: SparkSession, data_contract: DataContractSpecificati
73
93
 
74
94
 
75
95
  def get_auth_options():
76
- kafka_sasl_username = os.getenv('DATACONTRACT_KAFKA_SASL_USERNAME')
77
- kafka_sasl_password = os.getenv('DATACONTRACT_KAFKA_SASL_PASSWORD')
96
+ kafka_sasl_username = os.getenv("DATACONTRACT_KAFKA_SASL_USERNAME")
97
+ kafka_sasl_password = os.getenv("DATACONTRACT_KAFKA_SASL_PASSWORD")
78
98
  if kafka_sasl_username is None:
79
99
  auth_options = {}
80
100
  else:
@@ -130,4 +150,3 @@ def to_struct_field(field_name: str, field: Field) -> StructField:
130
150
  data_type = DataType()
131
151
 
132
152
  return StructField(field_name, data_type, nullable=not field.required)
133
-
@@ -10,12 +10,12 @@ def to_postgres_soda_configuration(server):
10
10
  "type": "postgres",
11
11
  "host": server.host,
12
12
  "port": str(server.port),
13
- "username": os.getenv('DATACONTRACT_POSTGRES_USERNAME'),
14
- "password": os.getenv('DATACONTRACT_POSTGRES_PASSWORD'),
13
+ "username": os.getenv("DATACONTRACT_POSTGRES_USERNAME"),
14
+ "password": os.getenv("DATACONTRACT_POSTGRES_PASSWORD"),
15
15
  "database": server.database,
16
16
  "schema": server.schema_,
17
17
  }
18
18
  }
19
19
 
20
20
  soda_configuration_str = yaml.dump(soda_configuration)
21
- return soda_configuration_str
21
+ return soda_configuration_str
@@ -7,13 +7,13 @@ def to_snowflake_soda_configuration(server):
7
7
  soda_configuration = {
8
8
  f"data_source {server.type}": {
9
9
  "type": "snowflake",
10
- "username": os.getenv('DATACONTRACT_SNOWFLAKE_USERNAME'),
11
- "password": os.getenv('DATACONTRACT_SNOWFLAKE_PASSWORD'),
12
- "role": os.getenv('DATACONTRACT_SNOWFLAKE_ROLE'),
10
+ "username": os.getenv("DATACONTRACT_SNOWFLAKE_USERNAME"),
11
+ "password": os.getenv("DATACONTRACT_SNOWFLAKE_PASSWORD"),
12
+ "role": os.getenv("DATACONTRACT_SNOWFLAKE_ROLE"),
13
13
  "account": server.account,
14
14
  "database": server.database,
15
15
  "schema": server.schema_,
16
- "warehouse": os.getenv('DATACONTRACT_SNOWFLAKE_WAREHOUSE'),
16
+ "warehouse": os.getenv("DATACONTRACT_SNOWFLAKE_WAREHOUSE"),
17
17
  "connection_timeout": 5, # minutes
18
18
  }
19
19
  }
@@ -4,20 +4,20 @@ from datacontract.model.data_contract_specification import Field
4
4
 
5
5
 
6
6
  def to_avro_schema(model_name, model) -> dict:
7
- return to_avro_record(model_name, model.fields, model.description)
7
+ return to_avro_record(model_name, model.fields, model.description, model.namespace)
8
+
8
9
 
9
10
  def to_avro_schema_json(model_name, model) -> str:
10
11
  schema = to_avro_schema(model_name, model)
11
12
  return json.dumps(schema, indent=2, sort_keys=False)
12
13
 
13
14
 
14
- def to_avro_record(name, fields, description) -> dict:
15
- schema = {
16
- "type": "record",
17
- "name": name
18
- }
15
+ def to_avro_record(name, fields, description, namespace) -> dict:
16
+ schema = {"type": "record", "name": name}
19
17
  if description is not None:
20
18
  schema["doc"] = description
19
+ if namespace is not None:
20
+ schema["namespace"] = namespace
21
21
  schema["fields"] = to_avro_fields(fields)
22
22
  return schema
23
23
 
@@ -30,16 +30,14 @@ def to_avro_fields(fields):
30
30
 
31
31
 
32
32
  def to_avro_field(field, field_name):
33
- avro_field = {
34
- "name": field_name
35
- }
33
+ avro_field = {"name": field_name}
36
34
  if field.description is not None:
37
35
  avro_field["doc"] = field.description
38
36
  avro_field["type"] = to_avro_type(field, field_name)
39
37
  return avro_field
40
38
 
41
39
 
42
- def to_avro_type(field: Field, field_name: str):
40
+ def to_avro_type(field: Field, field_name: str) -> str | dict:
43
41
  if field.type is None:
44
42
  return "null"
45
43
  if field.type in ["string", "varchar", "text"]:
@@ -64,7 +62,7 @@ def to_avro_type(field: Field, field_name: str):
64
62
  elif field.type in ["time"]:
65
63
  return "long"
66
64
  elif field.type in ["object", "record", "struct"]:
67
- return to_avro_record(field_name, field.fields, field.description)
65
+ return to_avro_record(field_name, field.fields, field.description, None)
68
66
  elif field.type in ["binary"]:
69
67
  return "bytes"
70
68
  elif field.type in ["array"]: