datacontract-cli 0.9.7__py3-none-any.whl → 0.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (59) hide show
  1. datacontract/breaking/breaking.py +48 -57
  2. datacontract/cli.py +98 -80
  3. datacontract/data_contract.py +156 -106
  4. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
  5. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
  6. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
  8. datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
  9. datacontract/engines/soda/check_soda_execute.py +46 -35
  10. datacontract/engines/soda/connections/bigquery.py +5 -3
  11. datacontract/engines/soda/connections/dask.py +0 -1
  12. datacontract/engines/soda/connections/databricks.py +2 -2
  13. datacontract/engines/soda/connections/duckdb.py +4 -4
  14. datacontract/engines/soda/connections/kafka.py +36 -17
  15. datacontract/engines/soda/connections/postgres.py +3 -3
  16. datacontract/engines/soda/connections/snowflake.py +4 -4
  17. datacontract/export/avro_converter.py +3 -7
  18. datacontract/export/avro_idl_converter.py +65 -42
  19. datacontract/export/dbt_converter.py +43 -32
  20. datacontract/export/great_expectations_converter.py +141 -0
  21. datacontract/export/jsonschema_converter.py +3 -1
  22. datacontract/export/odcs_converter.py +5 -7
  23. datacontract/export/protobuf_converter.py +12 -10
  24. datacontract/export/pydantic_converter.py +140 -0
  25. datacontract/export/rdf_converter.py +34 -11
  26. datacontract/export/sodacl_converter.py +24 -24
  27. datacontract/export/sql_converter.py +20 -9
  28. datacontract/export/sql_type_converter.py +44 -4
  29. datacontract/export/terraform_converter.py +4 -3
  30. datacontract/imports/avro_importer.py +32 -10
  31. datacontract/imports/sql_importer.py +0 -2
  32. datacontract/init/download_datacontract_file.py +2 -2
  33. datacontract/integration/publish_datamesh_manager.py +4 -9
  34. datacontract/integration/publish_opentelemetry.py +30 -16
  35. datacontract/lint/files.py +2 -2
  36. datacontract/lint/lint.py +26 -31
  37. datacontract/lint/linters/description_linter.py +12 -21
  38. datacontract/lint/linters/example_model_linter.py +28 -29
  39. datacontract/lint/linters/field_pattern_linter.py +8 -8
  40. datacontract/lint/linters/field_reference_linter.py +11 -10
  41. datacontract/lint/linters/notice_period_linter.py +18 -22
  42. datacontract/lint/linters/primary_field_linter.py +10 -12
  43. datacontract/lint/linters/quality_schema_linter.py +16 -20
  44. datacontract/lint/linters/valid_constraints_linter.py +42 -37
  45. datacontract/lint/resolve.py +7 -10
  46. datacontract/lint/schema.py +2 -3
  47. datacontract/lint/urls.py +4 -5
  48. datacontract/model/breaking_change.py +2 -1
  49. datacontract/model/data_contract_specification.py +8 -7
  50. datacontract/model/exceptions.py +13 -2
  51. datacontract/model/run.py +1 -1
  52. datacontract/web.py +3 -7
  53. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +176 -37
  54. datacontract_cli-0.9.8.dist-info/RECORD +63 -0
  55. datacontract_cli-0.9.7.dist-info/RECORD +0 -61
  56. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
  57. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +0 -0
  58. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
  59. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
@@ -16,8 +16,7 @@ from datacontract.engines.soda.connections.snowflake import \
16
16
  from datacontract.export.sodacl_converter import to_sodacl_yaml
17
17
  from datacontract.model.data_contract_specification import \
18
18
  DataContractSpecification, Server
19
- from datacontract.model.run import \
20
- Run, Check, Log
19
+ from datacontract.model.run import Run, Check, Log
21
20
 
22
21
 
23
22
  def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
@@ -34,13 +33,15 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
34
33
  scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
35
34
  scan.set_data_source_name(server.type)
36
35
  else:
37
- run.checks.append(Check(
38
- type="general",
39
- name="Check that format is supported",
40
- result="warning",
41
- reason=f"Format {server.format} not yet supported by datacontract CLI",
42
- engine="datacontract",
43
- ))
36
+ run.checks.append(
37
+ Check(
38
+ type="general",
39
+ name="Check that format is supported",
40
+ result="warning",
41
+ reason=f"Format {server.format} not yet supported by datacontract CLI",
42
+ engine="datacontract",
43
+ )
44
+ )
44
45
  run.log_warn(f"Format {server.format} not yet supported by datacontract CLI")
45
46
  return
46
47
  elif server.type == "snowflake":
@@ -73,21 +74,24 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
73
74
  scan.set_data_source_name(server.type)
74
75
 
75
76
  else:
76
- run.checks.append(Check(
77
- type="general",
78
- name="Check that server type is supported",
79
- result="warning",
80
- reason=f"Server type {server.type} not yet supported by datacontract CLI",
81
- engine="datacontract-cli",
82
- ))
77
+ run.checks.append(
78
+ Check(
79
+ type="general",
80
+ name="Check that server type is supported",
81
+ result="warning",
82
+ reason=f"Server type {server.type} not yet supported by datacontract CLI",
83
+ engine="datacontract-cli",
84
+ )
85
+ )
83
86
  run.log_warn(f"Server type {server.type} not yet supported by datacontract CLI")
84
87
  return
85
88
 
86
89
  # Don't check types for json format, as they are checked with json schema
87
90
  # Don't check types for avro format, as they are checked with avro schema
88
91
  # Don't check types for csv format, as they are hard to detect
92
+ server_type = server.type
89
93
  check_types = server.format != "json" and server.format != "csv" and server.format != "avro"
90
- sodacl_yaml_str = to_sodacl_yaml(data_contract, check_types)
94
+ sodacl_yaml_str = to_sodacl_yaml(data_contract, server_type, check_types)
91
95
  # print("sodacl_yaml_str:\n" + sodacl_yaml_str)
92
96
  scan.add_sodacl_yaml_str(sodacl_yaml_str)
93
97
 
@@ -102,9 +106,12 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
102
106
  for c in scan_results.get("checks"):
103
107
  check = Check(
104
108
  type="schema",
105
- result="passed" if c.get("outcome") == "pass" else "failed" if c.get("outcome") == "fail" else c.get(
106
- "outcome"),
107
- reason=', '.join(c.get("outcomeReasons")),
109
+ result="passed"
110
+ if c.get("outcome") == "pass"
111
+ else "failed"
112
+ if c.get("outcome") == "fail"
113
+ else c.get("outcome"),
114
+ reason=", ".join(c.get("outcomeReasons")),
108
115
  name=c.get("name"),
109
116
  model=c.get("table"),
110
117
  field=c.get("column"),
@@ -114,21 +121,25 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
114
121
  run.checks.append(check)
115
122
 
116
123
  for log in scan_results.get("logs"):
117
- run.logs.append(Log(
118
- timestamp=log.get("timestamp"),
119
- level=log.get("level"),
120
- message=log.get("message"),
121
- ))
124
+ run.logs.append(
125
+ Log(
126
+ timestamp=log.get("timestamp"),
127
+ level=log.get("level"),
128
+ message=log.get("message"),
129
+ )
130
+ )
122
131
 
123
132
  if scan.has_error_logs():
124
133
  run.log_warn("Engine soda-core has errors. See the logs for details.")
125
- run.checks.append(Check(
126
- type="general",
127
- name="Execute quality checks",
128
- result="warning",
129
- reason=f"Engine soda-core has errors. See the logs for details.",
130
- engine="soda-core",
131
- ))
134
+ run.checks.append(
135
+ Check(
136
+ type="general",
137
+ name="Execute quality checks",
138
+ result="warning",
139
+ reason="Engine soda-core has errors. See the logs for details.",
140
+ engine="soda-core",
141
+ )
142
+ )
132
143
  return
133
144
 
134
145
 
@@ -138,10 +149,10 @@ def update_reason(check, c):
138
149
  return
139
150
  if check.reason is not None and check.reason != "":
140
151
  return
141
- for block in c['diagnostics']['blocks']:
142
- if block['title'] == 'Diagnostics':
152
+ for block in c["diagnostics"]["blocks"]:
153
+ if block["title"] == "Diagnostics":
143
154
  # Extract and print the 'text' value
144
- diagnostics_text = block['text']
155
+ diagnostics_text = block["text"]
145
156
  print(diagnostics_text)
146
157
  diagnostics_text_split = diagnostics_text.split(":icon-fail: ")
147
158
  if len(diagnostics_text_split) > 1:
@@ -1,18 +1,20 @@
1
1
  import os
2
+
2
3
  import yaml
3
4
 
5
+
4
6
  # https://docs.soda.io/soda/connect-bigquery.html#authentication-methods
5
7
  def to_bigquery_soda_configuration(server):
6
8
  # with service account key, using an external json file
7
9
  soda_configuration = {
8
10
  f"data_source {server.type}": {
9
11
  "type": "bigquery",
10
- "account_info_json_path": os.getenv('DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH'),
12
+ "account_info_json_path": os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH"),
11
13
  "auth_scopes": ["https://www.googleapis.com/auth/bigquery"],
12
14
  "project_id": server.project,
13
- "dataset": server.dataset
15
+ "dataset": server.dataset,
14
16
  }
15
17
  }
16
18
 
17
19
  soda_configuration_str = yaml.dump(soda_configuration)
18
- return soda_configuration_str
20
+ return soda_configuration_str
@@ -1,4 +1,3 @@
1
-
2
1
  # def add_s3_connection_dask_json(data_contract, scan, server):
3
2
  # s3_access_key_id = os.getenv('DATACONTRACT_S3_ACCESS_KEY_ID')
4
3
  # s3_secret_access_key = os.getenv('DATACONTRACT_S3_SECRET_ACCESS_KEY')
@@ -11,8 +11,8 @@ def to_databricks_soda_configuration(server):
11
11
  "host": server.host,
12
12
  "catalog": server.catalog,
13
13
  "schema": server.schema_,
14
- "http_path": os.getenv('DATACONTRACT_DATABRICKS_HTTP_PATH'),
15
- "token": os.getenv('DATACONTRACT_DATABRICKS_TOKEN'),
14
+ "http_path": os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH"),
15
+ "token": os.getenv("DATACONTRACT_DATABRICKS_TOKEN"),
16
16
  }
17
17
  }
18
18
 
@@ -15,7 +15,7 @@ def get_duckdb_connection(data_contract, server):
15
15
  for model_name in data_contract.models:
16
16
  model_path = path
17
17
  if "{model}" in model_path:
18
- model_path = model_path.format(model = model_name)
18
+ model_path = model_path.format(model=model_name)
19
19
  logging.info(f"Creating table {model_name} for {model_path}")
20
20
 
21
21
  if server.format == "json":
@@ -39,9 +39,9 @@ def get_duckdb_connection(data_contract, server):
39
39
 
40
40
 
41
41
  def setup_s3_connection(con, server):
42
- s3_region = os.getenv('DATACONTRACT_S3_REGION')
43
- s3_access_key_id = os.getenv('DATACONTRACT_S3_ACCESS_KEY_ID')
44
- s3_secret_access_key = os.getenv('DATACONTRACT_S3_SECRET_ACCESS_KEY')
42
+ s3_region = os.getenv("DATACONTRACT_S3_REGION")
43
+ s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
44
+ s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
45
45
  # con.install_extension("httpfs")
46
46
  # con.load_extension("httpfs")
47
47
  if server.endpointUrl is not None:
@@ -4,7 +4,23 @@ import pyspark.sql.functions as fn
4
4
  from pyspark.sql import SparkSession
5
5
  from pyspark.sql.avro.functions import from_avro
6
6
  from pyspark.sql.functions import from_json, col
7
- from pyspark.sql.types import *
7
+ from pyspark.sql.types import (
8
+ StructType,
9
+ DataType,
10
+ NullType,
11
+ ArrayType,
12
+ BinaryType,
13
+ DateType,
14
+ TimestampNTZType,
15
+ TimestampType,
16
+ BooleanType,
17
+ LongType,
18
+ IntegerType,
19
+ DoubleType,
20
+ DecimalType,
21
+ StringType,
22
+ StructField,
23
+ )
8
24
 
9
25
  from datacontract.export.avro_converter import to_avro_schema_json
10
26
  from datacontract.model.data_contract_specification import \
@@ -15,14 +31,18 @@ from datacontract.model.exceptions import DataContractException
15
31
  def create_spark_session(tmp_dir) -> SparkSession:
16
32
  # TODO: Update dependency versions when updating pyspark
17
33
  # TODO: add protobuf library
18
- spark = SparkSession.builder.appName("datacontract") \
19
- .config("spark.sql.warehouse.dir", tmp_dir + "/spark-warehouse") \
20
- .config("spark.streaming.stopGracefullyOnShutdown", True) \
21
- .config('spark.jars.packages',
22
- 'org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0,org.apache.spark:spark-avro_2.12:3.5.0') \
34
+ spark = (
35
+ SparkSession.builder.appName("datacontract")
36
+ .config("spark.sql.warehouse.dir", tmp_dir + "/spark-warehouse")
37
+ .config("spark.streaming.stopGracefullyOnShutdown", True)
38
+ .config(
39
+ "spark.jars.packages",
40
+ "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0,org.apache.spark:spark-avro_2.12:3.5.0",
41
+ )
23
42
  .getOrCreate()
43
+ )
24
44
  spark.sparkContext.setLogLevel("WARN")
25
- print(f'Using PySpark version {spark.version}')
45
+ print(f"Using PySpark version {spark.version}")
26
46
  return spark
27
47
 
28
48
 
@@ -32,14 +52,14 @@ def read_kafka_topic(spark: SparkSession, data_contract: DataContractSpecificati
32
52
  auth_options = get_auth_options()
33
53
 
34
54
  # read full kafka topic
35
- df = spark \
36
- .read \
37
- .format("kafka") \
38
- .options(**auth_options) \
39
- .option("kafka.bootstrap.servers", host) \
40
- .option("subscribe", topic) \
41
- .option("startingOffsets", "earliest") \
55
+ df = (
56
+ spark.read.format("kafka")
57
+ .options(**auth_options)
58
+ .option("kafka.bootstrap.servers", host)
59
+ .option("subscribe", topic)
60
+ .option("startingOffsets", "earliest")
42
61
  .load()
62
+ )
43
63
  # TODO a warning if none or multiple models
44
64
  model_name, model = next(iter(data_contract.models.items()))
45
65
  if server.format == "avro":
@@ -73,8 +93,8 @@ def read_kafka_topic(spark: SparkSession, data_contract: DataContractSpecificati
73
93
 
74
94
 
75
95
  def get_auth_options():
76
- kafka_sasl_username = os.getenv('DATACONTRACT_KAFKA_SASL_USERNAME')
77
- kafka_sasl_password = os.getenv('DATACONTRACT_KAFKA_SASL_PASSWORD')
96
+ kafka_sasl_username = os.getenv("DATACONTRACT_KAFKA_SASL_USERNAME")
97
+ kafka_sasl_password = os.getenv("DATACONTRACT_KAFKA_SASL_PASSWORD")
78
98
  if kafka_sasl_username is None:
79
99
  auth_options = {}
80
100
  else:
@@ -130,4 +150,3 @@ def to_struct_field(field_name: str, field: Field) -> StructField:
130
150
  data_type = DataType()
131
151
 
132
152
  return StructField(field_name, data_type, nullable=not field.required)
133
-
@@ -10,12 +10,12 @@ def to_postgres_soda_configuration(server):
10
10
  "type": "postgres",
11
11
  "host": server.host,
12
12
  "port": str(server.port),
13
- "username": os.getenv('DATACONTRACT_POSTGRES_USERNAME'),
14
- "password": os.getenv('DATACONTRACT_POSTGRES_PASSWORD'),
13
+ "username": os.getenv("DATACONTRACT_POSTGRES_USERNAME"),
14
+ "password": os.getenv("DATACONTRACT_POSTGRES_PASSWORD"),
15
15
  "database": server.database,
16
16
  "schema": server.schema_,
17
17
  }
18
18
  }
19
19
 
20
20
  soda_configuration_str = yaml.dump(soda_configuration)
21
- return soda_configuration_str
21
+ return soda_configuration_str
@@ -7,13 +7,13 @@ def to_snowflake_soda_configuration(server):
7
7
  soda_configuration = {
8
8
  f"data_source {server.type}": {
9
9
  "type": "snowflake",
10
- "username": os.getenv('DATACONTRACT_SNOWFLAKE_USERNAME'),
11
- "password": os.getenv('DATACONTRACT_SNOWFLAKE_PASSWORD'),
12
- "role": os.getenv('DATACONTRACT_SNOWFLAKE_ROLE'),
10
+ "username": os.getenv("DATACONTRACT_SNOWFLAKE_USERNAME"),
11
+ "password": os.getenv("DATACONTRACT_SNOWFLAKE_PASSWORD"),
12
+ "role": os.getenv("DATACONTRACT_SNOWFLAKE_ROLE"),
13
13
  "account": server.account,
14
14
  "database": server.database,
15
15
  "schema": server.schema_,
16
- "warehouse": os.getenv('DATACONTRACT_SNOWFLAKE_WAREHOUSE'),
16
+ "warehouse": os.getenv("DATACONTRACT_SNOWFLAKE_WAREHOUSE"),
17
17
  "connection_timeout": 5, # minutes
18
18
  }
19
19
  }
@@ -6,16 +6,14 @@ from datacontract.model.data_contract_specification import Field
6
6
  def to_avro_schema(model_name, model) -> dict:
7
7
  return to_avro_record(model_name, model.fields, model.description)
8
8
 
9
+
9
10
  def to_avro_schema_json(model_name, model) -> str:
10
11
  schema = to_avro_schema(model_name, model)
11
12
  return json.dumps(schema, indent=2, sort_keys=False)
12
13
 
13
14
 
14
15
  def to_avro_record(name, fields, description) -> dict:
15
- schema = {
16
- "type": "record",
17
- "name": name
18
- }
16
+ schema = {"type": "record", "name": name}
19
17
  if description is not None:
20
18
  schema["doc"] = description
21
19
  schema["fields"] = to_avro_fields(fields)
@@ -30,9 +28,7 @@ def to_avro_fields(fields):
30
28
 
31
29
 
32
30
  def to_avro_field(field, field_name):
33
- avro_field = {
34
- "name": field_name
35
- }
31
+ avro_field = {"name": field_name}
36
32
  if field.description is not None:
37
33
  avro_field["doc"] = field.description
38
34
  avro_field["type"] = to_avro_type(field, field_name)
@@ -1,12 +1,14 @@
1
- from datacontract.model.data_contract_specification import DataContractSpecification, Field
2
- from datacontract.lint.resolve import inline_definitions_into_data_contract
1
+ import typing
3
2
  from dataclasses import dataclass
4
3
  from enum import Enum
5
- import typing
6
4
  from io import StringIO
7
5
 
6
+ from datacontract.lint.resolve import inline_definitions_into_data_contract
7
+ from datacontract.model.data_contract_specification import \
8
+ DataContractSpecification, Field
8
9
  from datacontract.model.exceptions import DataContractException
9
10
 
11
+
10
12
  def to_avro_idl(contract: DataContractSpecification) -> str:
11
13
  """Serialize the provided data contract specification into an Avro IDL string.
12
14
 
@@ -18,6 +20,7 @@ def to_avro_idl(contract: DataContractSpecification) -> str:
18
20
  to_avro_idl_stream(contract, stream)
19
21
  return stream.getvalue()
20
22
 
23
+
21
24
  def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextIO):
22
25
  """Serialize the provided data contract specification into Avro IDL."""
23
26
  ir = _contract_to_avro_idl_ir(contract)
@@ -28,6 +31,7 @@ def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextI
28
31
  _write_model_type(model_type, stream)
29
32
  stream.write("}\n")
30
33
 
34
+
31
35
  class AvroPrimitiveType(Enum):
32
36
  int = "int"
33
37
  long = "long"
@@ -38,49 +42,71 @@ class AvroPrimitiveType(Enum):
38
42
  null = "null"
39
43
  bytes = "bytes"
40
44
 
45
+
41
46
  class AvroLogicalType(Enum):
42
47
  decimal = "decimal"
43
48
  date = "date"
44
49
  time_ms = "time_ms"
45
50
  timestamp_ms = "timestamp_ms"
46
51
 
52
+
47
53
  @dataclass
48
54
  class AvroField:
49
55
  name: str
50
56
  required: bool
51
57
  description: typing.Optional[str]
52
58
 
59
+
53
60
  @dataclass
54
61
  class AvroPrimitiveField(AvroField):
55
62
  type: typing.Union[AvroPrimitiveType, AvroLogicalType]
56
63
 
64
+
57
65
  @dataclass
58
66
  class AvroComplexField(AvroField):
59
67
  subfields: list[AvroField]
60
68
 
69
+
61
70
  @dataclass
62
71
  class AvroArrayField(AvroField):
63
72
  type: AvroField
64
73
 
74
+
65
75
  @dataclass
66
76
  class AvroModelType:
67
77
  name: str
68
78
  description: typing.Optional[str]
69
79
  fields: list[AvroField]
70
80
 
81
+
71
82
  @dataclass
72
83
  class AvroIDLProtocol:
73
84
  name: typing.Optional[str]
74
85
  description: typing.Optional[str]
75
86
  model_types: list[AvroModelType]
76
87
 
77
- avro_primitive_types = set(["string", "text", "varchar",
78
- "float", "double", "int",
79
- "integer", "long", "bigint",
80
- "boolean", "timestamp_ntz",
81
- "timestamp", "timestamp_tz",
82
- "date", "bytes",
83
- "null"])
88
+
89
+ avro_primitive_types = set(
90
+ [
91
+ "string",
92
+ "text",
93
+ "varchar",
94
+ "float",
95
+ "double",
96
+ "int",
97
+ "integer",
98
+ "long",
99
+ "bigint",
100
+ "boolean",
101
+ "timestamp_ntz",
102
+ "timestamp",
103
+ "timestamp_tz",
104
+ "date",
105
+ "bytes",
106
+ "null",
107
+ ]
108
+ )
109
+
84
110
 
85
111
  def _to_avro_primitive_logical_type(field_name: str, field: Field) -> AvroPrimitiveField:
86
112
  result = AvroPrimitiveField(field_name, field.required, field.description, AvroPrimitiveType.string)
@@ -114,10 +140,11 @@ def _to_avro_primitive_logical_type(field_name: str, field: Field) -> AvroPrimit
114
140
  model=field,
115
141
  reason="Unknown field type {field.type}",
116
142
  result="failed",
117
- message="Avro IDL type conversion failed."
143
+ message="Avro IDL type conversion failed.",
118
144
  )
119
145
  return result
120
146
 
147
+
121
148
  def _to_avro_idl_type(field_name: str, field: Field) -> AvroField:
122
149
  if field.type in avro_primitive_types:
123
150
  return _to_avro_primitive_logical_type(field_name, field)
@@ -125,17 +152,14 @@ def _to_avro_idl_type(field_name: str, field: Field) -> AvroField:
125
152
  match field.type:
126
153
  case "array":
127
154
  return AvroArrayField(
128
- field_name,
129
- field.required,
130
- field.description,
131
- _to_avro_idl_type(field_name, field.items)
155
+ field_name, field.required, field.description, _to_avro_idl_type(field_name, field.items)
132
156
  )
133
157
  case "object" | "record" | "struct":
134
158
  return AvroComplexField(
135
159
  field_name,
136
160
  field.required,
137
161
  field.description,
138
- [_to_avro_idl_type(field_name, field) for (field_name, field) in field.fields.items()]
162
+ [_to_avro_idl_type(field_name, field) for (field_name, field) in field.fields.items()],
139
163
  )
140
164
  case _:
141
165
  raise DataContractException(
@@ -144,56 +168,55 @@ def _to_avro_idl_type(field_name: str, field: Field) -> AvroField:
144
168
  model=type,
145
169
  reason="Unknown Data Contract field type",
146
170
  result="failed",
147
- message="Avro IDL type conversion failed."
171
+ message="Avro IDL type conversion failed.",
148
172
  )
149
173
 
150
174
 
151
175
  def _generate_field_types(contract: DataContractSpecification) -> list[AvroField]:
152
176
  result = []
153
- for (_, model) in contract.models.items():
154
- for (field_name, field) in model.fields.items():
177
+ for _, model in contract.models.items():
178
+ for field_name, field in model.fields.items():
155
179
  result.append(_to_avro_idl_type(field_name, field))
156
180
  return result
157
181
 
182
+
158
183
  def generate_model_types(contract: DataContractSpecification) -> list[AvroModelType]:
159
184
  result = []
160
- for (model_name, model) in contract.models.items():
161
- result.append(AvroModelType(
162
- name=model_name,
163
- description=model.description,
164
- fields=_generate_field_types(contract)
165
- ))
185
+ for model_name, model in contract.models.items():
186
+ result.append(
187
+ AvroModelType(name=model_name, description=model.description, fields=_generate_field_types(contract))
188
+ )
166
189
  return result
167
190
 
191
+
168
192
  def _model_name_to_identifier(model_name: str):
169
- return "".join([word.title() for word in model_name.split()])
193
+ return "".join([word.title() for word in model_name.split()])
170
194
 
171
- def _contract_to_avro_idl_ir(contract: DataContractSpecification) -> AvroIDLProtocol:
172
195
 
196
+ def _contract_to_avro_idl_ir(contract: DataContractSpecification) -> AvroIDLProtocol:
173
197
  """Convert models into an intermediate representation for later serialization into Avro IDL.
174
198
 
175
- Each model is converted to a record containing a field for each model field.
176
- """
199
+ Each model is converted to a record containing a field for each model field.
200
+ """
177
201
  inlined_contract = contract.model_copy()
178
202
  inline_definitions_into_data_contract(inlined_contract)
179
- protocol_name = (_model_name_to_identifier(contract.info.title)
180
- if contract.info and contract.info.title
181
- else None)
182
- description = (contract.info.description if
183
- contract.info and contract.info.description
184
- else None)
185
- return AvroIDLProtocol(name=protocol_name,
186
- description=description,
187
- model_types=generate_model_types(inlined_contract))
203
+ protocol_name = _model_name_to_identifier(contract.info.title) if contract.info and contract.info.title else None
204
+ description = contract.info.description if contract.info and contract.info.description else None
205
+ return AvroIDLProtocol(
206
+ name=protocol_name, description=description, model_types=generate_model_types(inlined_contract)
207
+ )
208
+
188
209
 
189
210
  def _write_indent(indent: int, stream: typing.TextIO):
190
211
  stream.write(" " * indent)
191
212
 
213
+
192
214
  def _write_field_description(field: AvroField, indent: int, stream: typing.TextIO):
193
215
  if field.description:
194
216
  _write_indent(indent, stream)
195
217
  stream.write(f"/** {field.description} */\n")
196
218
 
219
+
197
220
  def _write_field_type_definition(field: AvroField, indent: int, stream: typing.TextIO) -> str:
198
221
  # Write any extra information (such as record type definition) and return
199
222
  # the name of the generated type. Writes descriptions only for record
@@ -215,7 +238,7 @@ def _write_field_type_definition(field: AvroField, indent: int, stream: typing.T
215
238
  for subfield in subfields:
216
239
  subfield_types.append(_write_field_type_definition(subfield, indent + 1, stream))
217
240
  # Reference all defined record types.
218
- for (field, subfield_type) in zip(field.subfields, subfield_types):
241
+ for field, subfield_type in zip(field.subfields, subfield_types):
219
242
  _write_field_description(field, indent + 1, stream)
220
243
  _write_indent(indent + 1, stream)
221
244
  stream.write(f"{subfield_type} {field.name};\n")
@@ -234,15 +257,15 @@ def _write_field_type_definition(field: AvroField, indent: int, stream: typing.T
234
257
  case _:
235
258
  raise RuntimeError("Unknown Avro field type {field}")
236
259
 
237
- def _write_field(field: AvroField,
238
- indent,
239
- stream: typing.TextIO):
260
+
261
+ def _write_field(field: AvroField, indent, stream: typing.TextIO):
240
262
  # Start of recursion.
241
263
  typename = _write_field_type_definition(field, indent, stream)
242
264
  _write_field_description(field, indent, stream)
243
265
  _write_indent(indent, stream)
244
266
  stream.write(f"{typename} {field.name};\n")
245
267
 
268
+
246
269
  def _write_model_type(model: AvroModelType, stream: typing.TextIO):
247
270
  # Called once for each model
248
271
  if model.description: