datacontract-cli 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (35) hide show
  1. datacontract/breaking/breaking_rules.py +4 -0
  2. datacontract/catalog/catalog.py +74 -0
  3. datacontract/cli.py +39 -3
  4. datacontract/data_contract.py +12 -1
  5. datacontract/engines/fastjsonschema/check_jsonschema.py +1 -2
  6. datacontract/engines/soda/check_soda_execute.py +9 -15
  7. datacontract/engines/soda/connections/duckdb.py +83 -14
  8. datacontract/engines/soda/connections/kafka.py +108 -105
  9. datacontract/export/avro_idl_converter.py +1 -2
  10. datacontract/export/dbt_converter.py +1 -2
  11. datacontract/export/great_expectations_converter.py +1 -2
  12. datacontract/export/html_export.py +3 -4
  13. datacontract/export/jsonschema_converter.py +1 -2
  14. datacontract/export/odcs_converter.py +1 -2
  15. datacontract/export/rdf_converter.py +1 -1
  16. datacontract/export/sodacl_converter.py +1 -2
  17. datacontract/export/terraform_converter.py +1 -2
  18. datacontract/imports/avro_importer.py +1 -2
  19. datacontract/imports/glue_importer.py +183 -0
  20. datacontract/imports/sql_importer.py +20 -9
  21. datacontract/integration/publish_opentelemetry.py +3 -6
  22. datacontract/lint/linters/example_model_linter.py +1 -2
  23. datacontract/lint/linters/field_pattern_linter.py +1 -2
  24. datacontract/lint/linters/notice_period_linter.py +1 -2
  25. datacontract/lint/linters/quality_schema_linter.py +1 -2
  26. datacontract/lint/resolve.py +9 -6
  27. datacontract/model/data_contract_specification.py +2 -0
  28. datacontract/templates/datacontract.html +76 -21
  29. datacontract/templates/style/output.css +113 -4
  30. {datacontract_cli-0.10.1.dist-info → datacontract_cli-0.10.2.dist-info}/METADATA +180 -102
  31. {datacontract_cli-0.10.1.dist-info → datacontract_cli-0.10.2.dist-info}/RECORD +35 -33
  32. {datacontract_cli-0.10.1.dist-info → datacontract_cli-0.10.2.dist-info}/LICENSE +0 -0
  33. {datacontract_cli-0.10.1.dist-info → datacontract_cli-0.10.2.dist-info}/WHEEL +0 -0
  34. {datacontract_cli-0.10.1.dist-info → datacontract_cli-0.10.2.dist-info}/entry_points.txt +0 -0
  35. {datacontract_cli-0.10.1.dist-info → datacontract_cli-0.10.2.dist-info}/top_level.txt +0 -0
@@ -1,40 +1,36 @@
1
1
  import os
2
-
3
- import pyspark.sql.functions as fn
4
2
  from pyspark.sql import SparkSession
3
+ from pyspark.sql.functions import col, expr, from_json
5
4
  from pyspark.sql.avro.functions import from_avro
6
- from pyspark.sql.functions import from_json, col
7
5
  from pyspark.sql.types import (
8
6
  StructType,
9
- DataType,
10
- NullType,
11
- ArrayType,
12
- BinaryType,
13
- DateType,
14
- TimestampNTZType,
15
- TimestampType,
16
- BooleanType,
17
- LongType,
18
- IntegerType,
19
- DoubleType,
20
- DecimalType,
21
- StringType,
22
7
  StructField,
8
+ StringType,
9
+ DecimalType,
10
+ DoubleType,
11
+ IntegerType,
12
+ LongType,
13
+ BooleanType,
14
+ TimestampType,
15
+ TimestampNTZType,
16
+ DateType,
17
+ BinaryType,
18
+ ArrayType,
19
+ NullType,
20
+ DataType,
23
21
  )
24
22
 
25
23
  from datacontract.export.avro_converter import to_avro_schema_json
26
- from datacontract.model.data_contract_specification import \
27
- DataContractSpecification, Server, Field
24
+ from datacontract.model.data_contract_specification import DataContractSpecification, Server, Field
28
25
  from datacontract.model.exceptions import DataContractException
29
26
 
30
27
 
31
- def create_spark_session(tmp_dir) -> SparkSession:
32
- # TODO: Update dependency versions when updating pyspark
33
- # TODO: add protobuf library
28
+ def create_spark_session(tmp_dir: str) -> SparkSession:
29
+ """Create and configure a Spark session."""
34
30
  spark = (
35
31
  SparkSession.builder.appName("datacontract")
36
- .config("spark.sql.warehouse.dir", tmp_dir + "/spark-warehouse")
37
- .config("spark.streaming.stopGracefullyOnShutdown", True)
32
+ .config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
33
+ .config("spark.streaming.stopGracefullyOnShutdown", "true")
38
34
  .config(
39
35
  "spark.jars.packages",
40
36
  "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0,org.apache.spark:spark-avro_2.12:3.5.0",
@@ -47,106 +43,113 @@ def create_spark_session(tmp_dir) -> SparkSession:
47
43
 
48
44
 
49
45
  def read_kafka_topic(spark: SparkSession, data_contract: DataContractSpecification, server: Server, tmp_dir):
50
- host = server.host
51
- topic = server.topic
52
- auth_options = get_auth_options()
53
-
54
- # read full kafka topic
46
+ """Read and process data from a Kafka topic based on the server configuration."""
55
47
  df = (
56
48
  spark.read.format("kafka")
57
- .options(**auth_options)
58
- .option("kafka.bootstrap.servers", host)
59
- .option("subscribe", topic)
49
+ .options(**get_auth_options())
50
+ .option("kafka.bootstrap.servers", server.host)
51
+ .option("subscribe", server.topic)
60
52
  .option("startingOffsets", "earliest")
61
53
  .load()
62
54
  )
63
- # TODO a warning if none or multiple models
55
+
64
56
  model_name, model = next(iter(data_contract.models.items()))
65
- if server.format == "avro":
66
- avro_schema = to_avro_schema_json(model_name, model)
67
-
68
- # Parse out the extra bytes from the Avro data
69
- # A Kafka message contains a key and a value. Data going through a Kafka topic in Confluent Cloud has five bytes added to the beginning of every Avro value. If you are using Avro format keys, then five bytes will be added to the beginning of those as well. For this example, we’re assuming string keys. These bytes consist of one magic byte and four bytes representing the schema ID of the schema in the registry that is needed to decode that data. The bytes need to be removed so that the schema ID can be determined and the Avro data can be parsed. To manipulate the data, we need a couple of imports:
70
- df2 = df.withColumn("fixedValue", fn.expr("substring(value, 6, length(value)-5)"))
71
-
72
- options = {"mode": "PERMISSIVE"}
73
- df3 = df2.select(from_avro(col("fixedValue"), avro_schema, options).alias("avro")).select(col("avro.*"))
74
- elif server.format == "json":
75
- # TODO A good warning when the conversion to json fails
76
- struct_type = to_struct_type(model.fields)
77
- df2 = df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
78
-
79
- options = {"mode": "PERMISSIVE"}
80
- df3 = df2.select(from_json(df2.value, struct_type, options).alias("json")).select(col("json.*"))
81
- else:
82
- raise DataContractException(
83
- type="test",
84
- name="Configuring Kafka checks",
85
- result="warning",
86
- reason=f"Kafka format '{server.format}' is not supported. Skip executing tests.",
87
- engine="datacontract",
88
- )
89
57
 
90
- # df3.writeStream.toTable(model_name, checkpointLocation=tmp_dir + "/checkpoint")
91
- df3.createOrReplaceTempView(model_name)
92
- # print(spark.sql(f"select * from {model_name}").show())
58
+ match server.format:
59
+ case "avro":
60
+ process_avro_format(df, model_name, model)
61
+ case "json":
62
+ process_json_format(df, model_name, model)
63
+ case _:
64
+ raise DataContractException(
65
+ type="test",
66
+ name="Configuring Kafka checks",
67
+ result="warning",
68
+ reason=f"Kafka format '{server.format}' is not supported. " f"Skip executing tests.",
69
+ engine="datacontract",
70
+ )
71
+
72
+
73
+ def process_avro_format(df, model_name, model):
74
+ avro_schema = to_avro_schema_json(model_name, model)
75
+ df2 = df.withColumn("fixedValue", expr("substring(value, 6, length(value)-5)"))
76
+ options = {"mode": "PERMISSIVE"}
77
+ df2.select(from_avro(col("fixedValue"), avro_schema, options).alias("avro")).select(
78
+ col("avro.*")
79
+ ).createOrReplaceTempView(model_name)
80
+
81
+
82
+ def process_json_format(df, model_name, model):
83
+ struct_type = to_struct_type(model.fields)
84
+ df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)").select(
85
+ from_json(col("value"), struct_type, {"mode": "PERMISSIVE"}).alias("json")
86
+ ).select(col("json.*")).createOrReplaceTempView(model_name)
93
87
 
94
88
 
95
89
  def get_auth_options():
90
+ """Retrieve Kafka authentication options from environment variables."""
96
91
  kafka_sasl_username = os.getenv("DATACONTRACT_KAFKA_SASL_USERNAME")
97
92
  kafka_sasl_password = os.getenv("DATACONTRACT_KAFKA_SASL_PASSWORD")
93
+
98
94
  if kafka_sasl_username is None:
99
- auth_options = {}
100
- else:
101
- kafka_sasl_jaas_config = f'org.apache.kafka.common.security.plain.PlainLoginModule required username="{kafka_sasl_username}" password="{kafka_sasl_password}";'
102
- auth_options = {
103
- "kafka.sasl.mechanism": "PLAIN",
104
- "kafka.security.protocol": "SASL_SSL",
105
- "kafka.sasl.jaas.config": kafka_sasl_jaas_config,
106
- }
107
- return auth_options
95
+ return {}
96
+
97
+ return {
98
+ "kafka.sasl.mechanism": "PLAIN",
99
+ "kafka.security.protocol": "SASL_SSL",
100
+ "kafka.sasl.jaas.config": (
101
+ f"org.apache.kafka.common.security.plain.PlainLoginModule required "
102
+ f'username="{kafka_sasl_username}" password="{kafka_sasl_password}";'
103
+ ),
104
+ }
108
105
 
109
106
 
110
107
  def to_struct_type(fields):
111
- struct_fields = []
112
- for field_name, field in fields.items():
113
- struct_fields.append(to_struct_field(field_name, field))
114
- return StructType(struct_fields)
108
+ """Convert field definitions to Spark StructType."""
109
+ return StructType([to_struct_field(field_name, field) for field_name, field in fields.items()])
115
110
 
116
111
 
117
112
  def to_struct_field(field_name: str, field: Field) -> StructField:
118
- if field.type is None:
119
- data_type = DataType()
120
- if field.type in ["string", "varchar", "text"]:
121
- data_type = StringType()
122
- elif field.type in ["number", "decimal", "numeric"]:
123
- data_type = DecimalType()
124
- elif field.type in ["float", "double"]:
125
- data_type = DoubleType()
126
- elif field.type in ["integer", "int"]:
127
- data_type = IntegerType()
128
- elif field.type in ["long", "bigint"]:
129
- data_type = LongType()
130
- elif field.type in ["boolean"]:
131
- data_type = BooleanType()
132
- elif field.type in ["timestamp", "timestamp_tz"]:
133
- data_type = TimestampType()
134
- elif field.type in ["timestamp_ntz"]:
135
- data_type = TimestampNTZType()
136
- elif field.type in ["date"]:
137
- data_type = DateType()
138
- elif field.type in ["time"]:
139
- data_type = DataType()
140
- elif field.type in ["object", "record", "struct"]:
141
- data_type = to_struct_type(field.fields)
142
- elif field.type in ["binary"]:
143
- data_type = BinaryType()
144
- elif field.type in ["array"]:
145
- # TODO support array structs
146
- data_type = ArrayType()
147
- elif field.type in ["null"]:
148
- data_type = NullType()
149
- else:
150
- data_type = DataType()
113
+ """Map field definitions to Spark StructField using match-case."""
114
+ match field.type:
115
+ case "string" | "varchar" | "text":
116
+ data_type = StringType()
117
+ case "number" | "decimal" | "numeric":
118
+ data_type = DecimalType()
119
+ case "float" | "double":
120
+ data_type = DoubleType()
121
+ case "integer" | "int":
122
+ data_type = IntegerType()
123
+ case "long" | "bigint":
124
+ data_type = LongType()
125
+ case "boolean":
126
+ data_type = BooleanType()
127
+ case "timestamp" | "timestamp_tz":
128
+ data_type = TimestampType()
129
+ case "timestamp_ntz":
130
+ data_type = TimestampNTZType()
131
+ case "date":
132
+ data_type = DateType()
133
+ case "time":
134
+ data_type = DataType() # Specific handling for time type
135
+ case "object" | "record" | "struct":
136
+ data_type = StructType(
137
+ [to_struct_field(sub_field_name, sub_field) for sub_field_name, sub_field in field.fields.items()]
138
+ )
139
+ case "binary":
140
+ data_type = BinaryType()
141
+ case "array":
142
+ element_type = (
143
+ StructType(
144
+ [to_struct_field(sub_field_name, sub_field) for sub_field_name, sub_field in field.fields.items()]
145
+ )
146
+ if field.fields
147
+ else DataType()
148
+ )
149
+ data_type = ArrayType(element_type)
150
+ case "null":
151
+ data_type = NullType()
152
+ case _:
153
+ data_type = DataType() # Fallback generic DataType
151
154
 
152
155
  return StructField(field_name, data_type, nullable=not field.required)
@@ -4,8 +4,7 @@ from enum import Enum
4
4
  from io import StringIO
5
5
 
6
6
  from datacontract.lint.resolve import inline_definitions_into_data_contract
7
- from datacontract.model.data_contract_specification import \
8
- DataContractSpecification, Field
7
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field
9
8
  from datacontract.model.exceptions import DataContractException
10
9
 
11
10
 
@@ -3,8 +3,7 @@ from typing import Dict
3
3
  import yaml
4
4
 
5
5
  from datacontract.export.sql_type_converter import convert_to_sql_type
6
- from datacontract.model.data_contract_specification import \
7
- DataContractSpecification, Model, Field
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
8
7
 
9
8
 
10
9
  def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
@@ -3,8 +3,7 @@ from typing import Dict, List, Any
3
3
 
4
4
  import yaml
5
5
 
6
- from datacontract.model.data_contract_specification import \
7
- DataContractSpecification, Field, Quality
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Quality
8
7
 
9
8
 
10
9
  def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
@@ -6,8 +6,7 @@ import pytz
6
6
  import yaml
7
7
  from jinja2 import Environment, PackageLoader, select_autoescape
8
8
 
9
- from datacontract.model.data_contract_specification import \
10
- DataContractSpecification
9
+ from datacontract.model.data_contract_specification import DataContractSpecification
11
10
 
12
11
 
13
12
  def to_html(data_contract_spec: DataContractSpecification) -> str:
@@ -40,9 +39,9 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
40
39
 
41
40
  datacontract_yaml = data_contract_spec.to_yaml()
42
41
 
43
- tz = pytz.timezone('UTC')
42
+ tz = pytz.timezone("UTC")
44
43
  now = datetime.datetime.now(tz)
45
- formatted_date = now.strftime('%d %b %Y %H:%M:%S UTC')
44
+ formatted_date = now.strftime("%d %b %Y %H:%M:%S UTC")
46
45
  datacontract_cli_version = get_version()
47
46
 
48
47
  # Render the template with necessary data
@@ -1,8 +1,7 @@
1
1
  import json
2
2
  from typing import Dict
3
3
 
4
- from datacontract.model.data_contract_specification import \
5
- DataContractSpecification, Model, Field
4
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
5
 
7
6
 
8
7
  def to_jsonschemas(data_contract_spec: DataContractSpecification):
@@ -2,8 +2,7 @@ from typing import Dict
2
2
 
3
3
  import yaml
4
4
 
5
- from datacontract.model.data_contract_specification import \
6
- DataContractSpecification, Model, Field
5
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
7
6
 
8
7
 
9
8
  def to_odcs_yaml(data_contract_spec: DataContractSpecification):
@@ -141,7 +141,7 @@ def add_info(contract, info, graph, dc, dcx):
141
141
  graph.add((bnode_info, dc.version, Literal(info.version)))
142
142
 
143
143
  # add owner
144
- owner = URIRef(info.owner)
144
+ owner = Literal(info.owner)
145
145
  graph.add((bnode_info, dc.owner, owner))
146
146
 
147
147
  # add contact
@@ -1,8 +1,7 @@
1
1
  import yaml
2
2
 
3
3
  from datacontract.export.sql_type_converter import convert_to_sql_type
4
- from datacontract.model.data_contract_specification import \
5
- DataContractSpecification
4
+ from datacontract.model.data_contract_specification import DataContractSpecification
6
5
 
7
6
 
8
7
  def to_sodacl_yaml(
@@ -1,7 +1,6 @@
1
1
  import re
2
2
 
3
- from datacontract.model.data_contract_specification import \
4
- DataContractSpecification, Server
3
+ from datacontract.model.data_contract_specification import DataContractSpecification, Server
5
4
 
6
5
 
7
6
  def to_terraform(data_contract_spec: DataContractSpecification, server_id: str = None) -> str:
@@ -1,7 +1,6 @@
1
1
  import avro.schema
2
2
 
3
- from datacontract.model.data_contract_specification import \
4
- DataContractSpecification, Model, Field
3
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
5
4
  from datacontract.model.exceptions import DataContractException
6
5
 
7
6
 
@@ -0,0 +1,183 @@
1
+ import boto3
2
+ from typing import List
3
+
4
+ from datacontract.model.data_contract_specification import (
5
+ DataContractSpecification,
6
+ Model,
7
+ Field,
8
+ Server,
9
+ )
10
+
11
+
12
+ def get_glue_database(datebase_name: str):
13
+ """Get the details Glue database.
14
+
15
+ Args:
16
+ database_name (str): glue database to request.
17
+
18
+ Returns:
19
+ set: catalogid and locationUri
20
+ """
21
+
22
+ glue = boto3.client("glue")
23
+ try:
24
+ response = glue.get_database(Name=datebase_name)
25
+ except glue.exceptions.EntityNotFoundException:
26
+ print(f"Database not found {datebase_name}.")
27
+ return (None, None)
28
+ except Exception as e:
29
+ # todo catch all
30
+ print(f"Error: {e}")
31
+ return (None, None)
32
+
33
+ return (response["Database"]["CatalogId"], response["Database"].get("LocationUri", "None"))
34
+
35
+
36
+ def get_glue_tables(database_name: str) -> List[str]:
37
+ """Get the list of tables in a Glue database.
38
+
39
+ Args:
40
+ database_name (str): glue database to request.
41
+
42
+ Returns:
43
+ List[string]: List of table names
44
+ """
45
+
46
+ glue = boto3.client("glue")
47
+
48
+ # Set the paginator
49
+ paginator = glue.get_paginator("get_tables")
50
+
51
+ # Initialize an empty list to store the table names
52
+ table_names = []
53
+ try:
54
+ # Paginate through the tables
55
+ for page in paginator.paginate(DatabaseName=database_name, PaginationConfig={"PageSize": 100}):
56
+ # Add the tables from the current page to the list
57
+ table_names.extend([table["Name"] for table in page["TableList"] if "Name" in table])
58
+ except glue.exceptions.EntityNotFoundException:
59
+ print(f"Database {database_name} not found.")
60
+ return []
61
+ except Exception as e:
62
+ # todo catch all
63
+ print(f"Error: {e}")
64
+ return []
65
+
66
+ return table_names
67
+
68
+
69
+ def get_glue_table_schema(database_name: str, table_name: str):
70
+ """Get the schema of a Glue table.
71
+
72
+ Args:
73
+ database_name (str): Glue database name.
74
+ table_name (str): Glue table name.
75
+
76
+ Returns:
77
+ dict: Table schema
78
+ """
79
+
80
+ glue = boto3.client("glue")
81
+
82
+ # Get the table schema
83
+ try:
84
+ response = glue.get_table(DatabaseName=database_name, Name=table_name)
85
+ except glue.exceptions.EntityNotFoundException:
86
+ print(f"Table {table_name} not found in database {database_name}.")
87
+ return {}
88
+ except Exception as e:
89
+ # todo catch all
90
+ print(f"Error: {e}")
91
+ return {}
92
+
93
+ table_schema = response["Table"]["StorageDescriptor"]["Columns"]
94
+
95
+ # when using hive partition keys, the schema is stored in the PartitionKeys field
96
+ if response["Table"].get("PartitionKeys") is not None:
97
+ for pk in response["Table"]["PartitionKeys"]:
98
+ table_schema.append(
99
+ {
100
+ "Name": pk["Name"],
101
+ "Type": pk["Type"],
102
+ "Hive": True,
103
+ "Comment": "Partition Key",
104
+ }
105
+ )
106
+
107
+ return table_schema
108
+
109
+
110
+ def import_glue(data_contract_specification: DataContractSpecification, source: str):
111
+ """Import the schema of a Glue database."""
112
+
113
+ catalogid, location_uri = get_glue_database(source)
114
+
115
+ # something went wrong
116
+ if catalogid is None:
117
+ return data_contract_specification
118
+
119
+ tables = get_glue_tables(source)
120
+
121
+ data_contract_specification.servers = {
122
+ "production": Server(type="glue", account=catalogid, database=source, location=location_uri),
123
+ }
124
+
125
+ for table_name in tables:
126
+ if data_contract_specification.models is None:
127
+ data_contract_specification.models = {}
128
+
129
+ table_schema = get_glue_table_schema(source, table_name)
130
+
131
+ fields = {}
132
+ for column in table_schema:
133
+ field = Field()
134
+ field.type = map_type_from_sql(column["Type"])
135
+
136
+ # hive partitons are required, but are not primary keys
137
+ if column.get("Hive"):
138
+ field.required = True
139
+
140
+ field.description = column.get("Comment")
141
+
142
+ fields[column["Name"]] = field
143
+
144
+ data_contract_specification.models[table_name] = Model(
145
+ type="table",
146
+ fields=fields,
147
+ )
148
+
149
+ return data_contract_specification
150
+
151
+
152
+ def map_type_from_sql(sql_type: str):
153
+ if sql_type is None:
154
+ return None
155
+
156
+ if sql_type.lower().startswith("varchar"):
157
+ return "varchar"
158
+ if sql_type.lower().startswith("string"):
159
+ return "string"
160
+ if sql_type.lower().startswith("text"):
161
+ return "text"
162
+ elif sql_type.lower().startswith("byte"):
163
+ return "byte"
164
+ elif sql_type.lower().startswith("short"):
165
+ return "short"
166
+ elif sql_type.lower().startswith("integer"):
167
+ return "integer"
168
+ elif sql_type.lower().startswith("long"):
169
+ return "long"
170
+ elif sql_type.lower().startswith("bigint"):
171
+ return "long"
172
+ elif sql_type.lower().startswith("float"):
173
+ return "float"
174
+ elif sql_type.lower().startswith("double"):
175
+ return "double"
176
+ elif sql_type.lower().startswith("boolean"):
177
+ return "boolean"
178
+ elif sql_type.lower().startswith("timestamp"):
179
+ return "timestamp"
180
+ elif sql_type.lower().startswith("date"):
181
+ return "date"
182
+ else:
183
+ return "variant"
@@ -1,7 +1,6 @@
1
1
  from simple_ddl_parser import parse_from_file
2
2
 
3
- from datacontract.model.data_contract_specification import \
4
- DataContractSpecification, Model, Field
3
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
5
4
 
6
5
 
7
6
  def import_sql(data_contract_specification: DataContractSpecification, format: str, source: str):
@@ -45,19 +44,31 @@ def map_type_from_sql(sql_type: str):
45
44
  if sql_type is None:
46
45
  return None
47
46
 
48
- if sql_type.lower().startswith("varchar"):
47
+ sql_type_normed = sql_type.lower().strip()
48
+
49
+ if sql_type_normed.startswith("varchar"):
49
50
  return "varchar"
50
- if sql_type.lower().startswith("string"):
51
+ elif sql_type_normed.startswith("string"):
51
52
  return "string"
52
- if sql_type.lower().startswith("text"):
53
+ elif sql_type_normed.startswith("text"):
53
54
  return "text"
54
- elif sql_type.lower().startswith("int"):
55
+ elif sql_type_normed.startswith("int"):
55
56
  return "integer"
56
- elif sql_type.lower().startswith("float"):
57
+ elif sql_type_normed.startswith("float"):
57
58
  return "float"
58
- elif sql_type.lower().startswith("bool"):
59
+ elif sql_type_normed.startswith("bool"):
59
60
  return "boolean"
60
- elif sql_type.lower().startswith("timestamp"):
61
+ elif sql_type_normed.startswith("timestamp"):
61
62
  return "timestamp"
63
+ elif sql_type_normed == "date":
64
+ return "date"
65
+ elif sql_type_normed == "smalldatetime":
66
+ return "timestamp_ntz"
67
+ elif sql_type_normed == "datetime":
68
+ return "timestamp_ntz"
69
+ elif sql_type_normed == "datetime2":
70
+ return "timestamp_ntz"
71
+ elif sql_type_normed == "datetimeoffset":
72
+ return "timestamp_tz"
62
73
  else:
63
74
  return "variant"
@@ -4,14 +4,11 @@ import os
4
4
  from importlib import metadata
5
5
 
6
6
  from opentelemetry import metrics
7
- from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import \
8
- OTLPMetricExporter as OTLPgRPCMetricExporter
9
- from opentelemetry.exporter.otlp.proto.http.metric_exporter import \
10
- OTLPMetricExporter
7
+ from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPgRPCMetricExporter
8
+ from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
11
9
  from opentelemetry.metrics import Observation
12
10
  from opentelemetry.sdk.metrics import MeterProvider
13
- from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, \
14
- PeriodicExportingMetricReader
11
+ from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader
15
12
 
16
13
  from datacontract.model.run import Run
17
14
 
@@ -4,8 +4,7 @@ import json
4
4
 
5
5
  import yaml
6
6
 
7
- from datacontract.model.data_contract_specification import \
8
- DataContractSpecification, Example
7
+ from datacontract.model.data_contract_specification import DataContractSpecification, Example
9
8
  from ..lint import Linter, LinterResult
10
9
 
11
10
 
@@ -1,7 +1,6 @@
1
1
  import re
2
2
 
3
- from datacontract.model.data_contract_specification import \
4
- DataContractSpecification
3
+ from datacontract.model.data_contract_specification import DataContractSpecification
5
4
  from ..lint import Linter, LinterResult
6
5
 
7
6
 
@@ -1,7 +1,6 @@
1
1
  import re
2
2
 
3
- from datacontract.model.data_contract_specification import \
4
- DataContractSpecification
3
+ from datacontract.model.data_contract_specification import DataContractSpecification
5
4
  from ..lint import Linter, LinterResult
6
5
 
7
6
 
@@ -1,7 +1,6 @@
1
1
  import yaml
2
2
 
3
- from datacontract.model.data_contract_specification import \
4
- DataContractSpecification, Model
3
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model
5
4
  from ..lint import Linter, LinterResult
6
5
 
7
6