datacontract-cli 0.9.6.post2__py3-none-any.whl → 0.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. datacontract/breaking/breaking.py +139 -63
  2. datacontract/breaking/breaking_rules.py +71 -54
  3. datacontract/cli.py +138 -45
  4. datacontract/data_contract.py +316 -78
  5. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
  7. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
  10. datacontract/engines/soda/check_soda_execute.py +46 -35
  11. datacontract/engines/soda/connections/bigquery.py +5 -3
  12. datacontract/engines/soda/connections/dask.py +0 -1
  13. datacontract/engines/soda/connections/databricks.py +2 -2
  14. datacontract/engines/soda/connections/duckdb.py +4 -4
  15. datacontract/engines/soda/connections/kafka.py +36 -17
  16. datacontract/engines/soda/connections/postgres.py +3 -3
  17. datacontract/engines/soda/connections/snowflake.py +4 -4
  18. datacontract/export/avro_converter.py +3 -7
  19. datacontract/export/avro_idl_converter.py +280 -0
  20. datacontract/export/dbt_converter.py +55 -80
  21. datacontract/export/great_expectations_converter.py +141 -0
  22. datacontract/export/jsonschema_converter.py +3 -1
  23. datacontract/export/odcs_converter.py +10 -12
  24. datacontract/export/protobuf_converter.py +99 -0
  25. datacontract/export/pydantic_converter.py +140 -0
  26. datacontract/export/rdf_converter.py +35 -12
  27. datacontract/export/sodacl_converter.py +24 -24
  28. datacontract/export/sql_converter.py +93 -0
  29. datacontract/export/sql_type_converter.py +131 -0
  30. datacontract/export/terraform_converter.py +71 -0
  31. datacontract/imports/avro_importer.py +106 -0
  32. datacontract/imports/sql_importer.py +0 -2
  33. datacontract/init/download_datacontract_file.py +2 -2
  34. datacontract/integration/publish_datamesh_manager.py +4 -9
  35. datacontract/integration/publish_opentelemetry.py +107 -0
  36. datacontract/lint/files.py +2 -2
  37. datacontract/lint/lint.py +46 -31
  38. datacontract/lint/linters/description_linter.py +34 -0
  39. datacontract/lint/linters/example_model_linter.py +67 -43
  40. datacontract/lint/linters/field_pattern_linter.py +34 -0
  41. datacontract/lint/linters/field_reference_linter.py +38 -0
  42. datacontract/lint/linters/notice_period_linter.py +55 -0
  43. datacontract/lint/linters/primary_field_linter.py +28 -0
  44. datacontract/lint/linters/quality_schema_linter.py +52 -0
  45. datacontract/lint/linters/valid_constraints_linter.py +99 -0
  46. datacontract/lint/resolve.py +53 -8
  47. datacontract/lint/schema.py +2 -3
  48. datacontract/lint/urls.py +4 -5
  49. datacontract/model/breaking_change.py +27 -5
  50. datacontract/model/data_contract_specification.py +45 -25
  51. datacontract/model/exceptions.py +13 -2
  52. datacontract/model/run.py +1 -1
  53. datacontract/web.py +5 -8
  54. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +207 -35
  55. datacontract_cli-0.9.8.dist-info/RECORD +63 -0
  56. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +1 -1
  57. datacontract_cli-0.9.6.post2.dist-info/RECORD +0 -47
  58. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
  59. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
  60. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,131 @@
1
+ from datacontract.model.data_contract_specification import Field
2
+
3
+
4
+ def convert_to_sql_type(field: Field, server_type: str) -> str:
5
+ if server_type == "snowflake":
6
+ return convert_to_snowflake(field)
7
+ if server_type == "postgres":
8
+ return convert_type_to_postgres(field)
9
+ if server_type == "databricks":
10
+ return convert_to_databricks(field)
11
+ return field.type
12
+
13
+
14
+ # snowflake data types:
15
+ # https://docs.snowflake.com/en/sql-reference/data-types.html
16
+ def convert_to_snowflake(field) -> None | str:
17
+ type = field.type
18
+ # currently optimized for snowflake
19
+ # LEARNING: data contract has no direct support for CHAR,CHARACTER
20
+ # LEARNING: data contract has no support for "date-time", "datetime", "time"
21
+ # LEARNING: No precision and scale support in data contract
22
+ # LEARNING: no support for any
23
+ # GEOGRAPHY and GEOMETRY are not supported by the mapping
24
+ if type is None:
25
+ return None
26
+ if type.lower() in ["string", "varchar", "text"]:
27
+ return type.upper() # STRING, TEXT, VARCHAR are all the same in snowflake
28
+ if type.lower() in ["timestamp", "timestamp_tz"]:
29
+ return "TIMESTAMP_TZ"
30
+ if type.lower() in ["timestamp_ntz"]:
31
+ return "TIMESTAMP_NTZ"
32
+ if type.lower() in ["date"]:
33
+ return "DATE"
34
+ if type.lower() in ["time"]:
35
+ return "TIME"
36
+ if type.lower() in ["number", "decimal", "numeric"]:
37
+ # precision and scale not supported by data contract
38
+ return "NUMBER"
39
+ if type.lower() in ["float", "double"]:
40
+ return "FLOAT"
41
+ if type.lower() in ["integer", "int", "long", "bigint"]:
42
+ return "NUMBER" # always NUMBER(38,0)
43
+ if type.lower() in ["boolean"]:
44
+ return "BOOLEAN"
45
+ if type.lower() in ["object", "record", "struct"]:
46
+ return "OBJECT"
47
+ if type.lower() in ["bytes"]:
48
+ return "BINARY"
49
+ if type.lower() in ["array"]:
50
+ return "ARRAY"
51
+ return None
52
+
53
+
54
+ # https://www.postgresql.org/docs/current/datatype.html
55
+ # Using the name whenever possible
56
+ def convert_type_to_postgres(field: Field) -> None | str:
57
+ type = field.type
58
+ if type is None:
59
+ return None
60
+ if type.lower() in ["string", "varchar", "text"]:
61
+ if field.format == "uuid":
62
+ return "uuid"
63
+ return "text" # STRING does not exist, TEXT and VARCHAR are all the same in postrges
64
+ if type.lower() in ["timestamp", "timestamp_tz"]:
65
+ return "timestamptz"
66
+ if type.lower() in ["timestamp_ntz"]:
67
+ return "timestamp"
68
+ if type.lower() in ["date"]:
69
+ return "date"
70
+ if type.lower() in ["time"]:
71
+ return "time"
72
+ if type.lower() in ["number", "decimal", "numeric"]:
73
+ # precision and scale not supported by data contract
74
+ if type.lower() == "number":
75
+ return "numeric"
76
+ return type.lower()
77
+ if type.lower() in ["float"]:
78
+ return "real"
79
+ if type.lower() in ["double"]:
80
+ return "double precision"
81
+ if type.lower() in ["integer", "int", "bigint"]:
82
+ return type.lower()
83
+ if type.lower() in ["long"]:
84
+ return "bigint"
85
+ if type.lower() in ["boolean"]:
86
+ return "boolean"
87
+ if type.lower() in ["object", "record", "struct"]:
88
+ return "jsonb"
89
+ if type.lower() in ["bytes"]:
90
+ return "bytea"
91
+ if type.lower() in ["array"]:
92
+ return convert_to_sql_type(field.items, "postgres") + "[]"
93
+ return None
94
+
95
+
96
+ # databricks data types:
97
+ # https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
98
+ def convert_to_databricks(field) -> None | str:
99
+ type = field.type
100
+ if type is None:
101
+ return None
102
+ if type.lower() in ["string", "varchar", "text"]:
103
+ return "STRING"
104
+ if type.lower() in ["timestamp", "timestamp_tz"]:
105
+ return "TIMESTAMP"
106
+ if type.lower() in ["timestamp_ntz"]:
107
+ return "TIMESTAMP_NTZ"
108
+ if type.lower() in ["date"]:
109
+ return "DATE"
110
+ if type.lower() in ["time"]:
111
+ return "STRING"
112
+ if type.lower() in ["number", "decimal", "numeric"]:
113
+ # precision and scale not supported by data contract
114
+ return "DECIMAL"
115
+ if type.lower() in ["float"]:
116
+ return "FLOAT"
117
+ if type.lower() in ["double"]:
118
+ return "DOUBLE"
119
+ if type.lower() in ["integer", "int"]:
120
+ return "INT"
121
+ if type.lower() in ["long", "bigint"]:
122
+ return "BIGINT"
123
+ if type.lower() in ["boolean"]:
124
+ return "BOOLEAN"
125
+ if type.lower() in ["object", "record", "struct"]:
126
+ return "STRUCT"
127
+ if type.lower() in ["bytes"]:
128
+ return "BINARY"
129
+ if type.lower() in ["array"]:
130
+ return "ARRAY"
131
+ return None
@@ -0,0 +1,71 @@
1
+ import re
2
+
3
+ from datacontract.model.data_contract_specification import \
4
+ DataContractSpecification, Server
5
+
6
+
7
+ def to_terraform(data_contract_spec: DataContractSpecification, server_id: str = None) -> str:
8
+ if data_contract_spec is None:
9
+ return ""
10
+ if data_contract_spec.servers is None or len(data_contract_spec.servers) == 0:
11
+ return ""
12
+
13
+ result = ""
14
+ for server_name, server in iter(data_contract_spec.servers.items()):
15
+ if server_id is not None and server_name != server_id:
16
+ continue
17
+ result = server_to_terraform_resource(data_contract_spec, result, server, server_name)
18
+
19
+ return result.strip()
20
+
21
+
22
+ def server_to_terraform_resource(data_contract_spec, result, server: Server, server_name):
23
+ tag_data_contract = data_contract_spec.id
24
+ tag_name = data_contract_spec.info.title
25
+ tag_server = server_name
26
+ bucket_name = extract_bucket_name(server)
27
+ resource_id = f"{data_contract_spec.id}_{server_name}"
28
+ data_product_id = server.dataProductId
29
+
30
+ if data_product_id is not None:
31
+ result += f"""
32
+ resource "aws_s3_bucket" "{resource_id}" {{
33
+ bucket = "{bucket_name}"
34
+
35
+ tags = {{
36
+ Name = "{tag_name}"
37
+ DataContract = "{tag_data_contract}"
38
+ Server = "{tag_server}"
39
+ DataProduct = "{data_product_id}"
40
+ }}
41
+ }}
42
+
43
+ """
44
+ else:
45
+ result += f"""
46
+ resource "aws_s3_bucket" "{resource_id}" {{
47
+ bucket = "{bucket_name}"
48
+
49
+ tags = {{
50
+ Name = "{tag_name}"
51
+ DataContract = "{tag_data_contract}"
52
+ Server = "{tag_server}"
53
+ }}
54
+ }}
55
+
56
+ """
57
+ return result
58
+
59
+
60
+ def extract_bucket_name(server) -> str | None:
61
+ if server.type == "s3":
62
+ s3_url = server.location
63
+ # Regular expression to match the S3 bucket name
64
+ match = re.search(r"s3://([^/]+)/", s3_url)
65
+ if match:
66
+ # Return the first group (bucket name)
67
+ return match.group(1)
68
+ else:
69
+ return ""
70
+
71
+ return ""
@@ -0,0 +1,106 @@
1
+ import avro.schema
2
+
3
+ from datacontract.model.data_contract_specification import \
4
+ DataContractSpecification, Model, Field
5
+ from datacontract.model.exceptions import DataContractException
6
+
7
+
8
+ def import_avro(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
9
+ if data_contract_specification.models is None:
10
+ data_contract_specification.models = {}
11
+
12
+ try:
13
+ with open(source, "r") as file:
14
+ avro_schema = avro.schema.parse(file.read())
15
+ except Exception as e:
16
+ raise DataContractException(
17
+ type="schema",
18
+ name="Parse avro schema",
19
+ reason=f"Failed to parse avro schema from {source}",
20
+ engine="datacontract",
21
+ original_exception=e,
22
+ )
23
+
24
+ # type record is being used for both the table and the object types in data contract
25
+ # -> CONSTRAINT: one table per .avsc input, all nested records are interpreted as objects
26
+ fields = import_record_fields(avro_schema.fields)
27
+
28
+ data_contract_specification.models[avro_schema.name] = Model(
29
+ type="table",
30
+ fields=fields,
31
+ )
32
+
33
+ if avro_schema.get_prop("doc") is not None:
34
+ data_contract_specification.models[avro_schema.name].description = avro_schema.get_prop("doc")
35
+
36
+ if avro_schema.get_prop("namespace") is not None:
37
+ data_contract_specification.models[avro_schema.name].namespace = avro_schema.get_prop("namespace")
38
+
39
+ return data_contract_specification
40
+
41
+
42
+ def import_record_fields(record_fields):
43
+ imported_fields = {}
44
+ for field in record_fields:
45
+
46
+ imported_fields[field.name] = Field()
47
+ imported_fields[field.name].required = True
48
+ imported_fields[field.name].description = field.doc
49
+
50
+ if field.type.type == "record":
51
+ imported_fields[field.name].type = "object"
52
+ imported_fields[field.name].description = field.type.doc
53
+ imported_fields[field.name].fields = import_record_fields(field.type.fields)
54
+ elif field.type.type == "union":
55
+ imported_fields[field.name].required = False
56
+ imported_fields[field.name].type = import_type_of_optional_field(field)
57
+ else: # primitive type
58
+ imported_fields[field.name].type = map_type_from_avro(field.type.type)
59
+ return imported_fields
60
+
61
+
62
+ def import_type_of_optional_field(field):
63
+ for field_type in field.type.schemas:
64
+ if field_type.type != "null":
65
+ return map_type_from_avro(field_type.type)
66
+ raise DataContractException(
67
+ type="schema",
68
+ result="failed",
69
+ name="Map avro type to data contract type",
70
+ reason="Could not import optional field: union type does not contain a non-null type",
71
+ engine="datacontract",
72
+ )
73
+
74
+
75
+ def map_type_from_avro(avro_type_str: str):
76
+ # TODO: ambiguous mapping in the export
77
+ if avro_type_str == "null":
78
+ return "null"
79
+ elif avro_type_str == "string":
80
+ return "string"
81
+ elif avro_type_str == "bytes":
82
+ return "binary"
83
+ elif avro_type_str == "double":
84
+ return "double"
85
+ elif avro_type_str == "int":
86
+ return "int"
87
+ elif avro_type_str == "long":
88
+ return "long"
89
+ elif avro_type_str == "boolean":
90
+ return "boolean"
91
+ elif avro_type_str == "array":
92
+ raise DataContractException(
93
+ type="schema",
94
+ result="failed",
95
+ name="Map avro type to data contract type",
96
+ reason="Array type not supported",
97
+ engine="datacontract",
98
+ )
99
+ else:
100
+ raise DataContractException(
101
+ type="schema",
102
+ result="failed",
103
+ name="Map avro type to data contract type",
104
+ reason=f"Unsupported type {avro_type_str} in avro schema.",
105
+ engine="datacontract",
106
+ )
@@ -5,12 +5,10 @@ from datacontract.model.data_contract_specification import \
5
5
 
6
6
 
7
7
  def import_sql(data_contract_specification: DataContractSpecification, format: str, source: str):
8
-
9
8
  ddl = parse_from_file(source, group_by_type=True)
10
9
  tables = ddl["tables"]
11
10
 
12
11
  for table in tables:
13
-
14
12
  if data_contract_specification.models is None:
15
13
  data_contract_specification.models = {}
16
14
 
@@ -9,9 +9,9 @@ def download_datacontract_file(file_path: str, from_url: str, overwrite_file: bo
9
9
 
10
10
  with requests.get(from_url) as response:
11
11
  response.raise_for_status()
12
- with open(file_path, 'w') as f:
12
+ with open(file_path, "w") as f:
13
13
  f.write(response.text)
14
14
 
15
15
 
16
16
  class FileExistsException(Exception):
17
- pass
17
+ pass
@@ -3,17 +3,16 @@ import os
3
3
 
4
4
  import requests
5
5
 
6
- from datacontract.model.run import \
7
- Run
6
+ from datacontract.model.run import Run
8
7
 
9
8
 
10
9
  def publish_datamesh_manager(run: Run, publish_url: str):
11
10
  try:
12
11
  if publish_url is None:
13
- url = f"https://api.datamesh-manager.com/api/runs"
12
+ url = "https://api.datamesh-manager.com/api/runs"
14
13
  else:
15
14
  url = publish_url
16
- datamesh_manager_api_key = os.getenv('DATAMESH_MANAGER_API_KEY')
15
+ datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
17
16
 
18
17
  if run.dataContractId is None:
19
18
  raise Exception("Cannot publish run results, as data contract ID is unknown")
@@ -21,10 +20,7 @@ def publish_datamesh_manager(run: Run, publish_url: str):
21
20
  if datamesh_manager_api_key is None:
22
21
  raise Exception("Cannot publish run results, as DATAMESH_MANAGER_API_KEY is not set")
23
22
 
24
- headers = {
25
- 'Content-Type': 'application/json',
26
- 'x-api-key': datamesh_manager_api_key
27
- }
23
+ headers = {"Content-Type": "application/json", "x-api-key": datamesh_manager_api_key}
28
24
  request_body = run.model_dump_json()
29
25
  # print("Request Body:", request_body)
30
26
  response = requests.post(url, data=request_body, headers=headers)
@@ -36,4 +32,3 @@ def publish_datamesh_manager(run: Run, publish_url: str):
36
32
  logging.info("Published test results to %s", url)
37
33
  except Exception as e:
38
34
  logging.error(f"Failed publishing test results. Error: {str(e)}")
39
-
@@ -0,0 +1,107 @@
1
+ import logging
2
+ import math
3
+ import os
4
+ from importlib import metadata
5
+
6
+ from opentelemetry import metrics
7
+ from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import \
8
+ OTLPMetricExporter as OTLPgRPCMetricExporter
9
+ from opentelemetry.exporter.otlp.proto.http.metric_exporter import \
10
+ OTLPMetricExporter
11
+ from opentelemetry.metrics import Observation
12
+ from opentelemetry.sdk.metrics import MeterProvider
13
+ from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, \
14
+ PeriodicExportingMetricReader
15
+
16
+ from datacontract.model.run import Run
17
+
18
+
19
+ # Publishes metrics of a test run.
20
+ # Metric contains the values:
21
+ # 0 == test run passed,
22
+ # 1 == test run has warnings
23
+ # 2 == test run failed
24
+ # 3 == test run not possible due to an error
25
+ # 4 == test status unknown
26
+ #
27
+ # Tested with these environment variables:
28
+ #
29
+ # OTEL_SERVICE_NAME=datacontract-cli
30
+ # OTEL_EXPORTER_OTLP_ENDPOINT=https://YOUR_ID.apm.westeurope.azure.elastic-cloud.com:443
31
+ # OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer%20secret (Optional, when using SaaS Products)
32
+ # OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf and OTEL_EXPORTER_OTLP_PROTOCOL=grpc
33
+ #
34
+ # Current limitations:
35
+ # - no gRPC support
36
+ # - currently, only ConsoleExporter and OTLP Exporter
37
+ # - Metrics only, no logs yet (but loosely planned)
38
+
39
+
40
+ def publish_opentelemetry(run: Run):
41
+ try:
42
+ if run.dataContractId is None:
43
+ raise Exception("Cannot publish run results, as data contract ID is unknown")
44
+
45
+ endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
46
+ logging.info(f"Publishing test results to opentelemetry at {endpoint}")
47
+
48
+ telemetry = Telemetry()
49
+ provider = metrics.get_meter_provider()
50
+ meter = provider.get_meter("com.datacontract.cli", metadata.version("datacontract-cli"))
51
+ meter.create_observable_gauge(
52
+ name="datacontract.cli.test",
53
+ callbacks=[lambda x: _to_observation_callback(run)],
54
+ unit="result",
55
+ description="The overall result of the data contract test run",
56
+ )
57
+
58
+ telemetry.publish()
59
+ except Exception as e:
60
+ logging.error(f"Failed publishing test results. Error: {str(e)}")
61
+
62
+
63
+ def _to_observation_callback(run):
64
+ yield _to_observation(run)
65
+
66
+
67
+ def _to_observation(run):
68
+ attributes = {
69
+ "datacontract.id": run.dataContractId,
70
+ "datacontract.version": run.dataContractVersion,
71
+ }
72
+
73
+ if run.result == "passed":
74
+ result_value = 0 # think of exit codes
75
+ elif run.result == "warning":
76
+ result_value = 1
77
+ elif run.result == "failed":
78
+ result_value = 2
79
+ elif run.result == "error":
80
+ result_value = 3
81
+ else:
82
+ result_value = 4
83
+ return Observation(value=result_value, attributes=attributes)
84
+
85
+
86
+ class Telemetry:
87
+ def __init__(self):
88
+ protocol = os.getenv("OTEL_EXPORTER_OTLP_PROTOCOL")
89
+
90
+ # lower to allow grpc, GRPC and alike values.
91
+ if protocol and protocol.lower() == "grpc":
92
+ self.remote_exporter = OTLPgRPCMetricExporter()
93
+ else:
94
+ # Fallback to default OTEL http/protobuf which is used when the variable is not set.
95
+ # This Exporter also works for http/json.
96
+ self.remote_exporter = OTLPMetricExporter()
97
+
98
+ self.console_exporter = ConsoleMetricExporter()
99
+ # using math.inf so it does not collect periodically. we do this in collect ourselves, one-time.
100
+ self.reader = PeriodicExportingMetricReader(self.console_exporter, export_interval_millis=math.inf)
101
+ self.remote_reader = PeriodicExportingMetricReader(self.remote_exporter, export_interval_millis=math.inf)
102
+ provider = MeterProvider(metric_readers=[self.reader, self.remote_reader])
103
+ metrics.set_meter_provider(provider)
104
+
105
+ def publish(self):
106
+ self.reader.collect()
107
+ self.remote_reader.collect()
@@ -10,8 +10,8 @@ def read_file(path):
10
10
  name=f"Reading data contract from {path}",
11
11
  reason=f"The file '{path}' does not exist.",
12
12
  engine="datacontract",
13
- result="error"
13
+ result="error",
14
14
  )
15
- with open(path, 'r') as file:
15
+ with open(path, "r") as file:
16
16
  file_content = file.read()
17
17
  return file_content
datacontract/lint/lint.py CHANGED
@@ -1,10 +1,10 @@
1
- from enum import Enum
2
- from dataclasses import dataclass, field
3
- from typing import Sequence, Any
4
1
  import abc
2
+ from dataclasses import dataclass, field
3
+ from enum import Enum
4
+ from typing import Sequence, Any, cast
5
5
 
6
- from ..model.data_contract_specification import DataContractSpecification
7
6
  from datacontract.model.run import Check
7
+ from ..model.data_contract_specification import DataContractSpecification
8
8
 
9
9
  """This module contains linter definitions for linting a data contract.
10
10
 
@@ -17,10 +17,11 @@ contract."""
17
17
 
18
18
  class LintSeverity(Enum):
19
19
  """The severity of a lint message. Generally, lint messages should be
20
- emitted with a severity of ERROR. WARNING should be used when the linter
21
- cannot determine a lint result, for example, when an unsupported model
22
- type is used.
20
+ emitted with a severity of ERROR. WARNING should be used when the linter
21
+ cannot determine a lint result, for example, when an unsupported model
22
+ type is used.
23
23
  """
24
+
24
25
  ERROR = 2
25
26
  WARNING = 1
26
27
 
@@ -31,11 +32,12 @@ class LinterMessage:
31
32
  caused the message.
32
33
 
33
34
  Attributes:
34
- outcome: The outcome of the linting, either ERROR or WARNING.
35
+ outcome: The outcome of the linting, either ERROR or WARNING. Linting outcomes with level WARNING are discarded for now.
35
36
  message: A message describing the error or warning in more detail.
36
37
  model: The model that caused the lint to fail. Is optional.
37
38
 
38
39
  """
40
+
39
41
  outcome: LintSeverity
40
42
  message: str
41
43
  model: Any = None
@@ -60,40 +62,55 @@ class LinterResult:
60
62
  results can be present in the list. An empty list means that
61
63
  the linter ran without producing warnings or errors.
62
64
  """
65
+
63
66
  results: Sequence[LinterMessage] = field(default_factory=list)
64
67
 
68
+ @classmethod
69
+ def erroneous(cls, message, model=None):
70
+ return cls([LinterMessage.error(message, model)])
71
+
72
+ @classmethod
73
+ def cautious(cls, message, model=None):
74
+ return cls([LinterMessage.warning(message, model)])
75
+
65
76
  def with_warning(self, message, model=None):
66
77
  result = LinterMessage.warning(message, model)
67
- return LinterResult(self.results + [result])
78
+ return LinterResult(cast(list[LinterMessage], self.results) + [result])
68
79
 
69
80
  def with_error(self, message, model=None):
70
81
  result = LinterMessage.error(message, model)
71
- return LinterResult(self.results + [result])
82
+ return LinterResult(cast(list[LinterMessage], self.results) + [result])
72
83
 
73
84
  def has_errors(self) -> bool:
74
- return any(map(lambda result: result.outcome == LintSeverity.ERROR,
75
- self.results))
85
+ return any(map(lambda result: result.outcome == LintSeverity.ERROR, self.results))
76
86
 
77
87
  def has_warnings(self) -> bool:
78
- return any(map(lambda result: result.outcome == LintSeverity.WARNING,
79
- self.results))
88
+ return any(map(lambda result: result.outcome == LintSeverity.WARNING, self.results))
80
89
 
81
90
  def error_results(self) -> Sequence[LinterMessage]:
82
- return [result for result in self.results
83
- if result.outcome == LintSeverity.ERROR]
91
+ return [result for result in self.results if result.outcome == LintSeverity.ERROR]
84
92
 
85
93
  def warning_results(self) -> Sequence[LinterMessage]:
86
- return [result for result in self.results
87
- if result.outcome == LintSeverity.WARNING]
94
+ return [result for result in self.results if result.outcome == LintSeverity.WARNING]
88
95
 
89
96
  def no_errors_or_warnings(self) -> bool:
90
97
  return len(self.results) == 0
91
98
 
99
+ def combine(self, other: "LinterResult") -> "LinterResult":
100
+ return LinterResult(cast(list[Any], self.results) + cast(list[Any], other.results))
101
+
92
102
 
93
103
  class Linter(abc.ABC):
94
104
  @property
95
105
  @abc.abstractmethod
96
106
  def name(self) -> str:
107
+ """Human-readable name of the linter."""
108
+ pass
109
+
110
+ @property
111
+ @abc.abstractmethod
112
+ def id(self) -> str:
113
+ """A linter ID for configuration (i.e. enabling and disabling)."""
97
114
  pass
98
115
 
99
116
  @abc.abstractmethod
@@ -101,26 +118,24 @@ class Linter(abc.ABC):
101
118
  pass
102
119
 
103
120
  def lint(self, contract: DataContractSpecification) -> list[Check]:
121
+ """Call with a data contract to get a list of check results from the linter."""
104
122
  result = self.lint_implementation(contract)
105
123
  checks = []
106
124
  if not result.error_results():
107
- checks.append(Check(
108
- type="lint",
109
- name=f"Linter '{self.name()}'",
110
- result="passed",
111
- engine="datacontract"
112
- ))
125
+ checks.append(Check(type="lint", name=f"Linter '{self.name}'", result="passed", engine="datacontract"))
113
126
  else:
114
127
  # All linter messages are treated as warnings. Severity is
115
128
  # currently ignored, but could be used in filtering in the future
116
129
  # Linter messages with level WARNING are currently ignored, but might
117
130
  # be logged or printed in the future.
118
131
  for lint_error in result.error_results():
119
- checks.append(Check(
120
- type="lint",
121
- name=f"Linter '{self.name()}'",
122
- result="warning",
123
- engine="datacontract",
124
- reason=lint_error.message
125
- ))
132
+ checks.append(
133
+ Check(
134
+ type="lint",
135
+ name=f"Linter '{self.name}'",
136
+ result="warning",
137
+ engine="datacontract",
138
+ reason=lint_error.message,
139
+ )
140
+ )
126
141
  return checks
@@ -0,0 +1,34 @@
1
+ from datacontract.model.data_contract_specification import DataContractSpecification
2
+ from ..lint import Linter, LinterResult
3
+
4
+
5
+ class DescriptionLinter(Linter):
6
+ """Check for a description on contracts, models, model fields, definitions and examples."""
7
+
8
+ @property
9
+ def name(self) -> str:
10
+ return "Objects have descriptions"
11
+
12
+ @property
13
+ def id(self) -> str:
14
+ return "description"
15
+
16
+ def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
17
+ result = LinterResult()
18
+ if not contract.info or not contract.info.description:
19
+ result = result.with_error("Contract has empty description.")
20
+ for model_name, model in contract.models.items():
21
+ if not model.description:
22
+ result = result.with_error(f"Model '{model_name}' has empty description.")
23
+ for field_name, field in model.fields.items():
24
+ if not field.description:
25
+ result = result.with_error(
26
+ f"Field '{field_name}' in model '{model_name}'" f" has empty description."
27
+ )
28
+ for definition_name, definition in contract.definitions.items():
29
+ if not definition.description:
30
+ result = result.with_error(f"Definition '{definition_name}' has empty description.")
31
+ for index, example in enumerate(contract.examples):
32
+ if not example.description:
33
+ result = result.with_error(f"Example {index + 1} has empty description.")
34
+ return result