dcs-sdk 1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_diff/__init__.py +221 -0
- data_diff/__main__.py +517 -0
- data_diff/abcs/__init__.py +13 -0
- data_diff/abcs/compiler.py +27 -0
- data_diff/abcs/database_types.py +402 -0
- data_diff/config.py +141 -0
- data_diff/databases/__init__.py +38 -0
- data_diff/databases/_connect.py +323 -0
- data_diff/databases/base.py +1417 -0
- data_diff/databases/bigquery.py +376 -0
- data_diff/databases/clickhouse.py +217 -0
- data_diff/databases/databricks.py +262 -0
- data_diff/databases/duckdb.py +207 -0
- data_diff/databases/mssql.py +343 -0
- data_diff/databases/mysql.py +189 -0
- data_diff/databases/oracle.py +238 -0
- data_diff/databases/postgresql.py +293 -0
- data_diff/databases/presto.py +222 -0
- data_diff/databases/redis.py +93 -0
- data_diff/databases/redshift.py +233 -0
- data_diff/databases/snowflake.py +222 -0
- data_diff/databases/sybase.py +720 -0
- data_diff/databases/trino.py +73 -0
- data_diff/databases/vertica.py +174 -0
- data_diff/diff_tables.py +489 -0
- data_diff/errors.py +17 -0
- data_diff/format.py +369 -0
- data_diff/hashdiff_tables.py +1026 -0
- data_diff/info_tree.py +76 -0
- data_diff/joindiff_tables.py +434 -0
- data_diff/lexicographic_space.py +253 -0
- data_diff/parse_time.py +88 -0
- data_diff/py.typed +0 -0
- data_diff/queries/__init__.py +13 -0
- data_diff/queries/api.py +213 -0
- data_diff/queries/ast_classes.py +811 -0
- data_diff/queries/base.py +38 -0
- data_diff/queries/extras.py +43 -0
- data_diff/query_utils.py +70 -0
- data_diff/schema.py +67 -0
- data_diff/table_segment.py +583 -0
- data_diff/thread_utils.py +112 -0
- data_diff/utils.py +1022 -0
- data_diff/version.py +15 -0
- dcs_core/__init__.py +13 -0
- dcs_core/__main__.py +17 -0
- dcs_core/__version__.py +15 -0
- dcs_core/cli/__init__.py +13 -0
- dcs_core/cli/cli.py +165 -0
- dcs_core/core/__init__.py +19 -0
- dcs_core/core/common/__init__.py +13 -0
- dcs_core/core/common/errors.py +50 -0
- dcs_core/core/common/models/__init__.py +13 -0
- dcs_core/core/common/models/configuration.py +284 -0
- dcs_core/core/common/models/dashboard.py +24 -0
- dcs_core/core/common/models/data_source_resource.py +75 -0
- dcs_core/core/common/models/metric.py +160 -0
- dcs_core/core/common/models/profile.py +75 -0
- dcs_core/core/common/models/validation.py +216 -0
- dcs_core/core/common/models/widget.py +44 -0
- dcs_core/core/configuration/__init__.py +13 -0
- dcs_core/core/configuration/config_loader.py +139 -0
- dcs_core/core/configuration/configuration_parser.py +262 -0
- dcs_core/core/configuration/configuration_parser_arc.py +328 -0
- dcs_core/core/datasource/__init__.py +13 -0
- dcs_core/core/datasource/base.py +62 -0
- dcs_core/core/datasource/manager.py +112 -0
- dcs_core/core/datasource/search_datasource.py +421 -0
- dcs_core/core/datasource/sql_datasource.py +1094 -0
- dcs_core/core/inspect.py +163 -0
- dcs_core/core/logger/__init__.py +13 -0
- dcs_core/core/logger/base.py +32 -0
- dcs_core/core/logger/default_logger.py +94 -0
- dcs_core/core/metric/__init__.py +13 -0
- dcs_core/core/metric/base.py +220 -0
- dcs_core/core/metric/combined_metric.py +98 -0
- dcs_core/core/metric/custom_metric.py +34 -0
- dcs_core/core/metric/manager.py +137 -0
- dcs_core/core/metric/numeric_metric.py +403 -0
- dcs_core/core/metric/reliability_metric.py +90 -0
- dcs_core/core/profiling/__init__.py +13 -0
- dcs_core/core/profiling/datasource_profiling.py +136 -0
- dcs_core/core/profiling/numeric_field_profiling.py +72 -0
- dcs_core/core/profiling/text_field_profiling.py +67 -0
- dcs_core/core/repository/__init__.py +13 -0
- dcs_core/core/repository/metric_repository.py +77 -0
- dcs_core/core/utils/__init__.py +13 -0
- dcs_core/core/utils/log.py +29 -0
- dcs_core/core/utils/tracking.py +105 -0
- dcs_core/core/utils/utils.py +44 -0
- dcs_core/core/validation/__init__.py +13 -0
- dcs_core/core/validation/base.py +230 -0
- dcs_core/core/validation/completeness_validation.py +153 -0
- dcs_core/core/validation/custom_query_validation.py +24 -0
- dcs_core/core/validation/manager.py +282 -0
- dcs_core/core/validation/numeric_validation.py +276 -0
- dcs_core/core/validation/reliability_validation.py +91 -0
- dcs_core/core/validation/uniqueness_validation.py +61 -0
- dcs_core/core/validation/validity_validation.py +738 -0
- dcs_core/integrations/__init__.py +13 -0
- dcs_core/integrations/databases/__init__.py +13 -0
- dcs_core/integrations/databases/bigquery.py +187 -0
- dcs_core/integrations/databases/databricks.py +51 -0
- dcs_core/integrations/databases/db2.py +652 -0
- dcs_core/integrations/databases/elasticsearch.py +61 -0
- dcs_core/integrations/databases/mssql.py +829 -0
- dcs_core/integrations/databases/mysql.py +409 -0
- dcs_core/integrations/databases/opensearch.py +64 -0
- dcs_core/integrations/databases/oracle.py +719 -0
- dcs_core/integrations/databases/postgres.py +482 -0
- dcs_core/integrations/databases/redshift.py +53 -0
- dcs_core/integrations/databases/snowflake.py +48 -0
- dcs_core/integrations/databases/spark_df.py +111 -0
- dcs_core/integrations/databases/sybase.py +1069 -0
- dcs_core/integrations/storage/__init__.py +13 -0
- dcs_core/integrations/storage/local_file.py +149 -0
- dcs_core/integrations/utils/__init__.py +13 -0
- dcs_core/integrations/utils/utils.py +36 -0
- dcs_core/report/__init__.py +13 -0
- dcs_core/report/dashboard.py +211 -0
- dcs_core/report/models.py +88 -0
- dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- dcs_core/report/static/assets/images/docs.svg +6 -0
- dcs_core/report/static/assets/images/github.svg +4 -0
- dcs_core/report/static/assets/images/logo.svg +7 -0
- dcs_core/report/static/assets/images/slack.svg +13 -0
- dcs_core/report/static/index.js +2 -0
- dcs_core/report/static/index.js.LICENSE.txt +3971 -0
- dcs_sdk/__init__.py +13 -0
- dcs_sdk/__main__.py +18 -0
- dcs_sdk/__version__.py +15 -0
- dcs_sdk/cli/__init__.py +13 -0
- dcs_sdk/cli/cli.py +163 -0
- dcs_sdk/sdk/__init__.py +58 -0
- dcs_sdk/sdk/config/__init__.py +13 -0
- dcs_sdk/sdk/config/config_loader.py +491 -0
- dcs_sdk/sdk/data_diff/__init__.py +13 -0
- dcs_sdk/sdk/data_diff/data_differ.py +821 -0
- dcs_sdk/sdk/rules/__init__.py +15 -0
- dcs_sdk/sdk/rules/rules_mappping.py +31 -0
- dcs_sdk/sdk/rules/rules_repository.py +214 -0
- dcs_sdk/sdk/rules/schema_rules.py +65 -0
- dcs_sdk/sdk/utils/__init__.py +13 -0
- dcs_sdk/sdk/utils/serializer.py +25 -0
- dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
- dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
- dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
- dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
- dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
- dcs_sdk/sdk/utils/table.py +475 -0
- dcs_sdk/sdk/utils/themes.py +40 -0
- dcs_sdk/sdk/utils/utils.py +349 -0
- dcs_sdk-1.6.5.dist-info/METADATA +150 -0
- dcs_sdk-1.6.5.dist-info/RECORD +159 -0
- dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
- dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
data_diff/format.py
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import collections
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import Any, Dict, List, Optional, Tuple, Type
|
|
18
|
+
|
|
19
|
+
import attrs
|
|
20
|
+
|
|
21
|
+
from data_diff.abcs.database_types import (
|
|
22
|
+
JSON,
|
|
23
|
+
Array,
|
|
24
|
+
Boolean,
|
|
25
|
+
ColType,
|
|
26
|
+
ColType_Alphanum,
|
|
27
|
+
ColType_UUID,
|
|
28
|
+
Date,
|
|
29
|
+
FractionalType,
|
|
30
|
+
NumericType,
|
|
31
|
+
String_Alphanum,
|
|
32
|
+
Struct,
|
|
33
|
+
TemporalType,
|
|
34
|
+
)
|
|
35
|
+
from data_diff.diff_tables import DiffResultWrapper
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def jsonify_error(table1: List[str], table2: List[str], error: str) -> "FailedDiff":
|
|
39
|
+
return attrs.asdict(
|
|
40
|
+
FailedDiff(
|
|
41
|
+
status="failed",
|
|
42
|
+
dataset1=table1,
|
|
43
|
+
dataset2=table2,
|
|
44
|
+
error=error,
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
Columns = List[Tuple[str, str, ColType]]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def jsonify(
|
|
53
|
+
diff: DiffResultWrapper,
|
|
54
|
+
dataset1_columns: Columns,
|
|
55
|
+
dataset2_columns: Columns,
|
|
56
|
+
columns_diff: Dict[str, List[str]],
|
|
57
|
+
with_summary: bool = False,
|
|
58
|
+
stats_only: bool = False,
|
|
59
|
+
) -> "JsonDiff":
|
|
60
|
+
"""
|
|
61
|
+
Converts the diff result into a JSON-serializable format.
|
|
62
|
+
Optionally add stats summary and schema diff.
|
|
63
|
+
"""
|
|
64
|
+
diff_info = diff.info_tree.info
|
|
65
|
+
table1 = diff_info.tables[0]
|
|
66
|
+
table2 = diff_info.tables[1]
|
|
67
|
+
key_columns = table1.key_columns
|
|
68
|
+
|
|
69
|
+
t1_exclusive_rows = []
|
|
70
|
+
t2_exclusive_rows = []
|
|
71
|
+
diff_rows = []
|
|
72
|
+
rows = None
|
|
73
|
+
schema = [field for field, _ in diff_info.diff_schema]
|
|
74
|
+
|
|
75
|
+
t1_exclusive_rows, t2_exclusive_rows, diff_rows = _group_rows(diff_info, schema)
|
|
76
|
+
|
|
77
|
+
if not stats_only:
|
|
78
|
+
rows = _make_rows_diff(t1_exclusive_rows, t2_exclusive_rows, diff_rows, key_columns)
|
|
79
|
+
|
|
80
|
+
summary = None
|
|
81
|
+
if with_summary:
|
|
82
|
+
summary = _jsonify_diff_summary(diff.get_stats_dict())
|
|
83
|
+
|
|
84
|
+
columns = _jsonify_columns_diff(dataset1_columns, dataset2_columns, columns_diff, list(key_columns))
|
|
85
|
+
|
|
86
|
+
is_different = bool(
|
|
87
|
+
t1_exclusive_rows
|
|
88
|
+
or t2_exclusive_rows
|
|
89
|
+
or diff_rows
|
|
90
|
+
or (columns_diff["added"] or columns_diff["removed"] or columns_diff["changed"])
|
|
91
|
+
)
|
|
92
|
+
return attrs.asdict(
|
|
93
|
+
JsonDiff(
|
|
94
|
+
status="success",
|
|
95
|
+
result="different" if is_different else "identical",
|
|
96
|
+
dataset1=list(table1.table_path),
|
|
97
|
+
dataset2=list(table2.table_path),
|
|
98
|
+
rows=rows,
|
|
99
|
+
summary=summary,
|
|
100
|
+
columns=columns,
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@attrs.define(frozen=True)
|
|
106
|
+
class JsonExclusiveRowValue:
|
|
107
|
+
"""
|
|
108
|
+
Value of a single column in a row
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
isPK: bool
|
|
112
|
+
value: Any
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@attrs.define(frozen=True)
|
|
116
|
+
class JsonDiffRowValue:
|
|
117
|
+
"""
|
|
118
|
+
Pair of diffed values for 2 rows with equal PKs
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
dataset1: Any
|
|
122
|
+
dataset2: Any
|
|
123
|
+
isDiff: bool
|
|
124
|
+
isPK: bool
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@attrs.define(frozen=True)
|
|
128
|
+
class Total:
|
|
129
|
+
dataset1: int
|
|
130
|
+
dataset2: int
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@attrs.define(frozen=True)
|
|
134
|
+
class ExclusiveRows:
|
|
135
|
+
dataset1: int
|
|
136
|
+
dataset2: int
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@attrs.define(frozen=True)
|
|
140
|
+
class Rows:
|
|
141
|
+
total: Total
|
|
142
|
+
exclusive: ExclusiveRows
|
|
143
|
+
updated: int
|
|
144
|
+
unchanged: int
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@attrs.define(frozen=True)
|
|
148
|
+
class Stats:
|
|
149
|
+
diffCounts: Dict[str, int]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@attrs.define(frozen=True)
|
|
153
|
+
class JsonDiffSummary:
|
|
154
|
+
rows: Rows
|
|
155
|
+
stats: Stats
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@attrs.define(frozen=True)
|
|
159
|
+
class ExclusiveColumns:
|
|
160
|
+
dataset1: List[str]
|
|
161
|
+
dataset2: List[str]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class ColumnKind(Enum):
|
|
165
|
+
INTEGER = "integer"
|
|
166
|
+
FLOAT = "float"
|
|
167
|
+
STRING = "string"
|
|
168
|
+
DATE = "date"
|
|
169
|
+
TIME = "time"
|
|
170
|
+
DATETIME = "datetime"
|
|
171
|
+
BOOL = "boolean"
|
|
172
|
+
UNSUPPORTED = "unsupported"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
KIND_MAPPING: List[Tuple[Type[ColType], ColumnKind]] = [
|
|
176
|
+
(Boolean, ColumnKind.BOOL),
|
|
177
|
+
(Date, ColumnKind.DATE),
|
|
178
|
+
(TemporalType, ColumnKind.DATETIME),
|
|
179
|
+
(FractionalType, ColumnKind.FLOAT),
|
|
180
|
+
(NumericType, ColumnKind.INTEGER),
|
|
181
|
+
(ColType_UUID, ColumnKind.STRING),
|
|
182
|
+
(ColType_Alphanum, ColumnKind.STRING),
|
|
183
|
+
(String_Alphanum, ColumnKind.STRING),
|
|
184
|
+
(JSON, ColumnKind.STRING),
|
|
185
|
+
(Array, ColumnKind.STRING),
|
|
186
|
+
(Struct, ColumnKind.STRING),
|
|
187
|
+
(ColType, ColumnKind.UNSUPPORTED),
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@attrs.define(frozen=True)
|
|
192
|
+
class Column:
|
|
193
|
+
name: str
|
|
194
|
+
type: str
|
|
195
|
+
kind: str
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@attrs.define(frozen=True)
|
|
199
|
+
class JsonColumnsSummary:
|
|
200
|
+
dataset1: List[Column]
|
|
201
|
+
dataset2: List[Column]
|
|
202
|
+
primaryKey: List[str]
|
|
203
|
+
exclusive: ExclusiveColumns
|
|
204
|
+
typeChanged: List[str]
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
@attrs.define(frozen=True)
|
|
208
|
+
class ExclusiveDiff:
|
|
209
|
+
dataset1: List[Dict[str, JsonExclusiveRowValue]]
|
|
210
|
+
dataset2: List[Dict[str, JsonExclusiveRowValue]]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@attrs.define(frozen=True)
|
|
214
|
+
class RowsDiff:
|
|
215
|
+
exclusive: ExclusiveDiff
|
|
216
|
+
diff: List[Dict[str, JsonDiffRowValue]]
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@attrs.define(frozen=True)
|
|
220
|
+
class FailedDiff:
|
|
221
|
+
status: str # Literal ["failed"]
|
|
222
|
+
dataset1: List[str]
|
|
223
|
+
dataset2: List[str]
|
|
224
|
+
error: str
|
|
225
|
+
|
|
226
|
+
version: str = "1.0.0"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
@attrs.define(frozen=True)
|
|
230
|
+
class JsonDiff:
|
|
231
|
+
status: str # Literal ["success"]
|
|
232
|
+
result: str # Literal ["different", "identical"]
|
|
233
|
+
dataset1: List[str]
|
|
234
|
+
dataset2: List[str]
|
|
235
|
+
rows: Optional[RowsDiff]
|
|
236
|
+
summary: Optional[JsonDiffSummary]
|
|
237
|
+
columns: Optional[JsonColumnsSummary]
|
|
238
|
+
|
|
239
|
+
version: str = "1.1.0"
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _group_rows(
|
|
243
|
+
diff_info: DiffResultWrapper, schema: List[str]
|
|
244
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
245
|
+
t1_exclusive_rows = []
|
|
246
|
+
t2_exclusive_rows = []
|
|
247
|
+
diff_rows = []
|
|
248
|
+
|
|
249
|
+
for row in diff_info.diff:
|
|
250
|
+
row_w_schema = dict(zip(schema, row))
|
|
251
|
+
is_t1_exclusive = row_w_schema["is_exclusive_a"]
|
|
252
|
+
is_t2_exclusive = row_w_schema["is_exclusive_b"]
|
|
253
|
+
|
|
254
|
+
if is_t1_exclusive:
|
|
255
|
+
t1_exclusive_rows.append(row_w_schema)
|
|
256
|
+
|
|
257
|
+
elif is_t2_exclusive:
|
|
258
|
+
t2_exclusive_rows.append(row_w_schema)
|
|
259
|
+
|
|
260
|
+
else:
|
|
261
|
+
diff_rows.append(row_w_schema)
|
|
262
|
+
|
|
263
|
+
return t1_exclusive_rows, t2_exclusive_rows, diff_rows
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _make_rows_diff(
|
|
267
|
+
t1_exclusive_rows: List[Dict[str, Any]],
|
|
268
|
+
t2_exclusive_rows: List[Dict[str, Any]],
|
|
269
|
+
diff_rows: List[Dict[str, Any]],
|
|
270
|
+
key_columns: List[str],
|
|
271
|
+
) -> RowsDiff:
|
|
272
|
+
diff_rows_jsonified = []
|
|
273
|
+
for row in diff_rows:
|
|
274
|
+
diff_rows_jsonified.append(_jsonify_diff(row, key_columns))
|
|
275
|
+
|
|
276
|
+
t1_exclusive_rows_jsonified = []
|
|
277
|
+
for row in t1_exclusive_rows:
|
|
278
|
+
t1_exclusive_rows_jsonified.append(_jsonify_exclusive(row, key_columns))
|
|
279
|
+
|
|
280
|
+
t2_exclusive_rows_jsonified = []
|
|
281
|
+
for row in t2_exclusive_rows:
|
|
282
|
+
t2_exclusive_rows_jsonified.append(_jsonify_exclusive(row, key_columns))
|
|
283
|
+
|
|
284
|
+
return RowsDiff(
|
|
285
|
+
exclusive=ExclusiveDiff(dataset1=t1_exclusive_rows_jsonified, dataset2=t2_exclusive_rows_jsonified),
|
|
286
|
+
diff=diff_rows_jsonified,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _jsonify_diff(row: Dict[str, Any], key_columns: List[str]) -> Dict[str, JsonDiffRowValue]:
|
|
291
|
+
columns = collections.defaultdict(dict)
|
|
292
|
+
for field, value in row.items():
|
|
293
|
+
if field in ("is_exclusive_a", "is_exclusive_b"):
|
|
294
|
+
continue
|
|
295
|
+
|
|
296
|
+
if field.startswith("is_diff_"):
|
|
297
|
+
column_name = field[len("is_diff_") :]
|
|
298
|
+
columns[column_name]["isDiff"] = bool(value)
|
|
299
|
+
|
|
300
|
+
elif field.endswith("_a"):
|
|
301
|
+
column_name = field[: -len("_a")]
|
|
302
|
+
columns[column_name]["dataset1"] = value
|
|
303
|
+
columns[column_name]["isPK"] = column_name in key_columns
|
|
304
|
+
|
|
305
|
+
elif field.endswith("_b"):
|
|
306
|
+
column_name = field[: -len("_b")]
|
|
307
|
+
columns[column_name]["dataset2"] = value
|
|
308
|
+
columns[column_name]["isPK"] = column_name in key_columns
|
|
309
|
+
|
|
310
|
+
return {column: JsonDiffRowValue(**data) for column, data in columns.items()}
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _jsonify_exclusive(row: Dict[str, Any], key_columns: List[str]) -> Dict[str, JsonExclusiveRowValue]:
|
|
314
|
+
columns = collections.defaultdict(dict)
|
|
315
|
+
for field, value in row.items():
|
|
316
|
+
if field in ("is_exclusive_a", "is_exclusive_b"):
|
|
317
|
+
continue
|
|
318
|
+
if field.startswith("is_diff_"):
|
|
319
|
+
continue
|
|
320
|
+
if field.endswith("_b") and row["is_exclusive_b"]:
|
|
321
|
+
column_name = field[: -len("_b")]
|
|
322
|
+
columns[column_name]["isPK"] = column_name in key_columns
|
|
323
|
+
columns[column_name]["value"] = value
|
|
324
|
+
elif field.endswith("_a") and row["is_exclusive_a"]:
|
|
325
|
+
column_name = field[: -len("_a")]
|
|
326
|
+
columns[column_name]["isPK"] = column_name in key_columns
|
|
327
|
+
columns[column_name]["value"] = value
|
|
328
|
+
return {column: JsonExclusiveRowValue(**data) for column, data in columns.items()}
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
|
|
332
|
+
return JsonDiffSummary(
|
|
333
|
+
rows=Rows(
|
|
334
|
+
total=Total(dataset1=stats_dict["rows_A"], dataset2=stats_dict["rows_B"]),
|
|
335
|
+
exclusive=ExclusiveRows(
|
|
336
|
+
dataset1=stats_dict["exclusive_A"],
|
|
337
|
+
dataset2=stats_dict["exclusive_B"],
|
|
338
|
+
),
|
|
339
|
+
updated=stats_dict["updated"],
|
|
340
|
+
unchanged=stats_dict["unchanged"],
|
|
341
|
+
),
|
|
342
|
+
stats=Stats(diffCounts=stats_dict["values"]),
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _jsonify_columns_diff(
|
|
347
|
+
dataset1_columns: Columns, dataset2_columns: Columns, columns_diff: Dict[str, List[str]], key_columns: List[str]
|
|
348
|
+
) -> JsonColumnsSummary:
|
|
349
|
+
return JsonColumnsSummary(
|
|
350
|
+
dataset1=[
|
|
351
|
+
Column(name=name, type=type_, kind=_map_kind(kind).value) for (name, type_, kind) in dataset1_columns
|
|
352
|
+
],
|
|
353
|
+
dataset2=[
|
|
354
|
+
Column(name=name, type=type_, kind=_map_kind(kind).value) for (name, type_, kind) in dataset2_columns
|
|
355
|
+
],
|
|
356
|
+
primaryKey=key_columns,
|
|
357
|
+
exclusive=ExclusiveColumns(
|
|
358
|
+
dataset2=list(columns_diff.get("added", [])),
|
|
359
|
+
dataset1=list(columns_diff.get("removed", [])),
|
|
360
|
+
),
|
|
361
|
+
typeChanged=list(columns_diff.get("changed", [])),
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _map_kind(kind: ColType) -> ColumnKind:
|
|
366
|
+
for raw_kind, json_kind in KIND_MAPPING:
|
|
367
|
+
if isinstance(kind, raw_kind):
|
|
368
|
+
return json_kind
|
|
369
|
+
return ColumnKind.UNSUPPORTED
|