datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
datacontract/model/run.py CHANGED
@@ -1,21 +1,38 @@
1
1
  import logging
2
2
  from datetime import datetime, timezone
3
- from typing import List, Optional
3
+ from enum import Enum
4
+ from typing import List
4
5
  from uuid import UUID, uuid4
5
6
 
6
7
  from pydantic import BaseModel
7
8
 
8
9
 
10
+ class ResultEnum(str, Enum):
11
+ passed = "passed"
12
+ warning = "warning"
13
+ failed = "failed"
14
+ error = "error"
15
+ info = "info"
16
+ unknown = "unknown"
17
+
18
+
9
19
  class Check(BaseModel):
20
+ id: str | None = None
21
+ key: str | None = None
22
+ category: str | None = None
10
23
  type: str
11
- name: Optional[str]
12
- result: str # passed, failed, warning, unknown
13
- engine: str
14
- reason: Optional[str] = None
15
- model: Optional[str] = None
16
- field: Optional[str] = None
17
- details: Optional[str] = None
18
- diagnostics: Optional[dict] = None
24
+ name: str | None = None
25
+ model: str | None = None
26
+ field: str | None = None
27
+
28
+ engine: str | None = None
29
+ language: str | None = None
30
+ implementation: str | None = None
31
+
32
+ result: ResultEnum | None = None
33
+ reason: str | None = None
34
+ details: str | None = None
35
+ diagnostics: dict | None = None
19
36
 
20
37
 
21
38
  class Log(BaseModel):
@@ -26,36 +43,36 @@ class Log(BaseModel):
26
43
 
27
44
  class Run(BaseModel):
28
45
  runId: UUID
29
- dataContractId: Optional[str] = None
30
- dataContractVersion: Optional[str] = None
31
- dataProductId: Optional[str] = None
32
- outputPortId: Optional[str] = None
33
- server: Optional[str] = None
34
- timestampStart: datetime
35
- timestampEnd: datetime
36
- result: str = "unknown" # passed, warning, failed, error, unknown
37
- checks: List[Check]
38
- logs: List[Log]
46
+ dataContractId: str | None = None
47
+ dataContractVersion: str | None = None
48
+ dataProductId: str | None = None
49
+ outputPortId: str | None = None
50
+ server: str | None = None
51
+ timestampStart: datetime | None
52
+ timestampEnd: datetime | None
53
+ result: ResultEnum = ResultEnum.unknown
54
+ checks: List[Check] | None
55
+ logs: List[Log] | None
39
56
 
40
57
  def has_passed(self):
41
58
  self.calculate_result()
42
- return self.result == "passed"
59
+ return self.result == ResultEnum.passed
43
60
 
44
61
  def finish(self):
45
62
  self.timestampEnd = datetime.now(timezone.utc)
46
63
  self.calculate_result()
47
64
 
48
65
  def calculate_result(self):
49
- if any(check.result == "error" for check in self.checks):
50
- self.result = "error"
51
- elif any(check.result == "failed" for check in self.checks):
52
- self.result = "failed"
53
- elif any(check.result == "warning" for check in self.checks):
54
- self.result = "warning"
55
- elif any(check.result == "passed" for check in self.checks):
56
- self.result = "passed"
66
+ if any(check.result == ResultEnum.error for check in self.checks):
67
+ self.result = ResultEnum.error
68
+ elif any(check.result == ResultEnum.failed for check in self.checks):
69
+ self.result = ResultEnum.failed
70
+ elif any(check.result == ResultEnum.warning for check in self.checks):
71
+ self.result = ResultEnum.warning
72
+ elif any(check.result == ResultEnum.passed for check in self.checks):
73
+ self.result = ResultEnum.passed
57
74
  else:
58
- self.result = "unknown"
75
+ self.result = ResultEnum.unknown
59
76
 
60
77
  def log_info(self, message: str):
61
78
  logging.info(message)
@@ -72,6 +89,9 @@ class Run(BaseModel):
72
89
  def pretty(self):
73
90
  return self.model_dump_json(indent=2)
74
91
 
92
+ def pretty_logs(self) -> str:
93
+ return "\n".join(f"[{log.timestamp.isoformat()}] {log.level}: {log.message}" for log in self.logs)
94
+
75
95
  @staticmethod
76
96
  def create_run():
77
97
  """
File without changes
@@ -0,0 +1,135 @@
1
+ import xml.etree.ElementTree as ET
2
+ from pathlib import Path
3
+ from xml.dom import minidom
4
+
5
+ import yaml
6
+
7
+ from datacontract.model.run import ResultEnum, Run
8
+
9
+
10
+ def write_junit_test_results(run: Run, console, output_path: Path):
11
+ if not output_path:
12
+ console.print("No output path specified for JUnit test results. Skip writing JUnit test results.")
13
+ return
14
+
15
+ testsuite = ET.Element(
16
+ "testsuite",
17
+ id=str(run.runId),
18
+ name=run.dataContractId if run.dataContractId else "Data Contract",
19
+ tests=str(len(run.checks)),
20
+ errors=str(count_errors(run)),
21
+ failures=str(count_failed(run)),
22
+ skipped=str(count_skipped(run)),
23
+ timestamp=run.timestampStart.replace(tzinfo=None).isoformat(),
24
+ time=str((run.timestampEnd - run.timestampStart).total_seconds()),
25
+ )
26
+
27
+ testsuiteProperties = ET.SubElement(testsuite, "properties")
28
+ if run.dataContractId is not None:
29
+ ET.SubElement(testsuiteProperties, "property", name="dataContractId", value=run.dataContractId)
30
+ if run.dataContractVersion is not None:
31
+ ET.SubElement(testsuiteProperties, "property", name="dataContractVersion", value=run.dataContractVersion)
32
+ if run.dataProductId is not None:
33
+ ET.SubElement(testsuiteProperties, "property", name="dataProductId", value=run.dataProductId)
34
+ if run.outputPortId is not None:
35
+ ET.SubElement(testsuiteProperties, "property", name="outputPortId", value=run.outputPortId)
36
+ if run.server is not None:
37
+ ET.SubElement(testsuiteProperties, "property", name="server", value=run.server)
38
+
39
+ for check in run.checks:
40
+ testcase = ET.SubElement(testsuite, "testcase", classname=to_class_name(check), name=to_testcase_name(check))
41
+ if check.result == ResultEnum.passed:
42
+ pass
43
+ elif check.result == ResultEnum.failed:
44
+ failure = ET.SubElement(
45
+ testcase,
46
+ "failure",
47
+ message=check.reason if check.reason else "Failed",
48
+ type=check.category if check.category else "General",
49
+ )
50
+ failure.text = to_failure_text(check)
51
+ elif check.result == ResultEnum.error:
52
+ error = ET.SubElement(
53
+ testcase,
54
+ "error",
55
+ message=check.reason if check.reason else "Error",
56
+ type=check.category if check.category else "General",
57
+ )
58
+ error.text = to_failure_text(check)
59
+ elif check.result == ResultEnum.warning:
60
+ skipped = ET.SubElement(
61
+ testcase,
62
+ "skipped",
63
+ message=check.reason if check.reason else "Warning",
64
+ type=check.category if check.category else "General",
65
+ )
66
+ skipped.text = to_failure_text(check)
67
+ else:
68
+ ET.SubElement(
69
+ testcase,
70
+ "skipped",
71
+ message=check.reason if check.reason else "Skipped",
72
+ type=check.category if check.category else "General",
73
+ )
74
+
75
+ if run.logs:
76
+ system_out = ET.SubElement(testsuite, "system-out")
77
+ system_out.text = logs_to_system_out(run)
78
+
79
+ xml_str: str = ET.tostring(testsuite, xml_declaration=True, encoding="utf-8")
80
+ xml_str_pretty = minidom.parseString(xml_str).toprettyxml(indent=" ")
81
+ output_path.parent.mkdir(parents=True, exist_ok=True)
82
+ with open(output_path, "w", encoding="utf-8") as f:
83
+ f.write(xml_str_pretty)
84
+ console.print(f"JUnit test results written to {output_path}")
85
+
86
+
87
+ def to_testcase_name(check):
88
+ if check.key:
89
+ return check.key
90
+ if check.name:
91
+ return check.name
92
+ else:
93
+ return "unknown"
94
+
95
+
96
+ def logs_to_system_out(run):
97
+ result = ""
98
+ for log in run.logs:
99
+ result += f"{log.timestamp} {log.level}: {log.message}\n"
100
+ return result
101
+
102
+
103
+ def to_class_name(check):
104
+ if check.model and check.field:
105
+ return f"{check.model}.{check.field}"
106
+ elif check.model:
107
+ return check.model
108
+ elif check.field:
109
+ return check.field
110
+ else:
111
+ return "general"
112
+
113
+
114
+ def to_failure_text(check):
115
+ return (
116
+ f"Name: {check.name}\n"
117
+ f"Engine: {check.engine}\n"
118
+ f"Implementation:\n{check.implementation}\n\n"
119
+ f"Result: {check.result.value if check.result is not None else ''}\n"
120
+ f"Reason: {check.reason}\n"
121
+ f"Details: {check.details}\n"
122
+ f"Diagnostics:\n{yaml.dump(check.diagnostics, default_flow_style=False)}"
123
+ )
124
+
125
+
126
+ def count_errors(run):
127
+ return sum(1 for check in run.checks if check.result == ResultEnum.error)
128
+
129
+
130
+ def count_failed(run):
131
+ return sum(1 for check in run.checks if check.result == ResultEnum.failed)
132
+
133
+
134
+ def count_skipped(run):
135
+ return sum(1 for check in run.checks if check.result is None)
@@ -0,0 +1,10 @@
1
+ from enum import Enum
2
+
3
+
4
+ class OutputFormat(str, Enum):
5
+ # json = "json" # coming soon
6
+ junit = "junit"
7
+
8
+ @classmethod
9
+ def get_supported_formats(cls):
10
+ return list(map(lambda c: c.value, cls))
@@ -0,0 +1,79 @@
1
+ from pathlib import Path
2
+
3
+ import typer
4
+ from rich import box
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+
8
+ from datacontract.model.run import Run
9
+ from datacontract.output.junit_test_results import write_junit_test_results
10
+ from datacontract.output.output_format import OutputFormat
11
+
12
+
13
+ def write_test_result(run: Run, console: Console, output_format: OutputFormat, output_path: Path):
14
+ if output_format == OutputFormat.junit:
15
+ write_junit_test_results(run, console, output_path)
16
+
17
+ _print_table(run, console)
18
+ if run.result == "passed":
19
+ console.print(
20
+ f"🟢 data contract is valid. Run {len(run.checks)} checks. Took {(run.timestampEnd - run.timestampStart).total_seconds()} seconds."
21
+ )
22
+ elif run.result == "warning":
23
+ console.print("🟠 data contract has warnings. Found the following warnings:")
24
+ i = 1
25
+ for check in run.checks:
26
+ if check.result != "passed":
27
+ field = to_field(run, check)
28
+ if field:
29
+ field = field + " "
30
+ else:
31
+ field = ""
32
+ console.print(f"{i}) {field}{check.name}: {check.reason}")
33
+ i += 1
34
+ else:
35
+ console.print("🔴 data contract is invalid, found the following errors:")
36
+ i = 1
37
+ for check in run.checks:
38
+ if check.result != "passed":
39
+ field = to_field(run, check)
40
+ if field:
41
+ field = field + " "
42
+ else:
43
+ field = ""
44
+ console.print(f"{i}) {field}{check.name}: {check.reason}")
45
+ i += 1
46
+ raise typer.Exit(code=1)
47
+
48
+
49
+ def _print_table(run, console):
50
+ table = Table(box=box.ROUNDED)
51
+ table.add_column("Result", no_wrap=True)
52
+ table.add_column("Check", max_width=100)
53
+ table.add_column("Field", max_width=32)
54
+ table.add_column("Details", max_width=50)
55
+ for check in sorted(run.checks, key=lambda c: (c.result or "", c.model or "", c.field or "")):
56
+ table.add_row(with_markup(check.result), check.name, to_field(run, check), check.reason)
57
+ console.print(table)
58
+
59
+
60
+ def to_field(run, check):
61
+ models = [c.model for c in run.checks]
62
+ if len(set(models)) > 1:
63
+ if check.field is None:
64
+ return check.model
65
+ return check.model + "." + check.field
66
+ else:
67
+ return check.field
68
+
69
+
70
+ def with_markup(result):
71
+ if result == "passed":
72
+ return "[green]passed[/green]"
73
+ if result == "warning":
74
+ return "[yellow]warning[/yellow]"
75
+ if result == "failed":
76
+ return "[red]failed[/red]"
77
+ if result == "error":
78
+ return "[red]error[/red]"
79
+ return result
datacontract/py.typed ADDED
File without changes
@@ -0,0 +1,91 @@
1
+ dataContractSpecification: 1.2.1
2
+ id: my-data-contract-id
3
+ info:
4
+ title: My Data Contract
5
+ version: 0.0.1
6
+ # description:
7
+ # owner:
8
+ # contact:
9
+ # name:
10
+ # url:
11
+ # email:
12
+
13
+
14
+ ### servers
15
+
16
+ #servers:
17
+ # production:
18
+ # type: s3
19
+ # location: s3://
20
+ # format: parquet
21
+ # delimiter: new_line
22
+
23
+ ### terms
24
+
25
+ #terms:
26
+ # usage:
27
+ # limitations:
28
+ # billing:
29
+ # noticePeriod:
30
+
31
+
32
+ ### models
33
+
34
+ # models:
35
+ # my_model:
36
+ # description:
37
+ # type:
38
+ # fields:
39
+ # my_field:
40
+ # type:
41
+ # description:
42
+
43
+
44
+ ### definitions
45
+
46
+ # definitions:
47
+ # my_field:
48
+ # domain:
49
+ # name:
50
+ # title:
51
+ # type:
52
+ # description:
53
+ # example:
54
+ # pii:
55
+ # classification:
56
+
57
+
58
+ ### servicelevels
59
+
60
+ #servicelevels:
61
+ # availability:
62
+ # description: The server is available during support hours
63
+ # percentage: 99.9%
64
+ # retention:
65
+ # description: Data is retained for one year because!
66
+ # period: P1Y
67
+ # unlimited: false
68
+ # latency:
69
+ # description: Data is available within 25 hours after the order was placed
70
+ # threshold: 25h
71
+ # sourceTimestampField: orders.order_timestamp
72
+ # processedTimestampField: orders.processed_timestamp
73
+ # freshness:
74
+ # description: The age of the youngest row in a table.
75
+ # threshold: 25h
76
+ # timestampField: orders.order_timestamp
77
+ # frequency:
78
+ # description: Data is delivered once a day
79
+ # type: batch # or streaming
80
+ # interval: daily # for batch, either or cron
81
+ # cron: 0 0 * * * # for batch, either or interval
82
+ # support:
83
+ # description: The data is available during typical business hours at headquarters
84
+ # time: 9am to 5pm in EST on business days
85
+ # responseTime: 1h
86
+ # backup:
87
+ # description: Data is backed up once a week, every Sunday at 0:00 UTC.
88
+ # interval: weekly
89
+ # cron: 0 0 * * 0
90
+ # recoveryTime: 24 hours
91
+ # recoveryPoint: 1 week