recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.4.0.20250514__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +22 -22
- recce/adapter/base.py +11 -14
- recce/adapter/dbt_adapter/__init__.py +355 -316
- recce/adapter/dbt_adapter/dbt_version.py +3 -0
- recce/adapter/sqlmesh_adapter.py +24 -35
- recce/apis/check_api.py +39 -28
- recce/apis/check_func.py +33 -27
- recce/apis/run_api.py +25 -19
- recce/apis/run_func.py +29 -23
- recce/artifact.py +44 -49
- recce/cli.py +484 -285
- recce/config.py +42 -33
- recce/core.py +52 -44
- recce/data/404.html +1 -1
- recce/data/_next/static/chunks/{368-7587b306577df275.js → 778-aef312bffb4c0312.js} +15 -15
- recce/data/_next/static/chunks/8d700b6a.ed11a130057c7a47.js +1 -0
- recce/data/_next/static/chunks/app/layout-c713a2829d3279e4.js +1 -0
- recce/data/_next/static/chunks/app/page-7086764277331fcb.js +1 -0
- recce/data/_next/static/chunks/{cd9f8d63-cf0d5a7b0f7a92e8.js → cd9f8d63-e020f408095ed77c.js} +3 -3
- recce/data/_next/static/chunks/webpack-b787cb1a4f2293de.js +1 -0
- recce/data/_next/static/css/88b8abc134cfd59a.css +3 -0
- recce/data/index.html +2 -2
- recce/data/index.txt +2 -2
- recce/diff.py +6 -12
- recce/event/__init__.py +74 -72
- recce/event/collector.py +27 -20
- recce/event/track.py +39 -27
- recce/exceptions.py +1 -1
- recce/git.py +7 -7
- recce/github.py +57 -53
- recce/models/__init__.py +1 -1
- recce/models/check.py +6 -7
- recce/models/run.py +1 -0
- recce/models/types.py +27 -27
- recce/pull_request.py +26 -24
- recce/run.py +148 -111
- recce/server.py +105 -88
- recce/state.py +209 -177
- recce/summary.py +168 -143
- recce/tasks/__init__.py +3 -3
- recce/tasks/core.py +11 -13
- recce/tasks/dataframe.py +19 -17
- recce/tasks/histogram.py +69 -34
- recce/tasks/lineage.py +2 -2
- recce/tasks/profile.py +152 -86
- recce/tasks/query.py +139 -87
- recce/tasks/rowcount.py +33 -30
- recce/tasks/schema.py +14 -14
- recce/tasks/top_k.py +35 -35
- recce/tasks/valuediff.py +216 -152
- recce/util/breaking.py +77 -84
- recce/util/cll.py +55 -51
- recce/util/io.py +19 -17
- recce/util/logger.py +1 -1
- recce/util/recce_cloud.py +70 -72
- recce/util/singleton.py +4 -4
- recce/yaml/__init__.py +7 -10
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.4.0.20250514.dist-info}/METADATA +5 -2
- recce_nightly-1.4.0.20250514.dist-info/RECORD +143 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.4.0.20250514.dist-info}/WHEEL +1 -1
- tests/adapter/dbt_adapter/conftest.py +1 -0
- tests/adapter/dbt_adapter/dbt_test_helper.py +28 -18
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
- tests/adapter/dbt_adapter/test_dbt_cll.py +39 -32
- tests/adapter/dbt_adapter/test_selector.py +22 -21
- tests/tasks/test_histogram.py +58 -66
- tests/tasks/test_lineage.py +36 -23
- tests/tasks/test_preset_checks.py +45 -31
- tests/tasks/test_profile.py +340 -15
- tests/tasks/test_query.py +40 -40
- tests/tasks/test_row_count.py +65 -46
- tests/tasks/test_schema.py +65 -42
- tests/tasks/test_top_k.py +22 -18
- tests/tasks/test_valuediff.py +43 -32
- tests/test_cli.py +71 -58
- tests/test_config.py +7 -9
- tests/test_core.py +5 -3
- tests/test_dbt.py +7 -7
- tests/test_pull_request.py +1 -1
- tests/test_server.py +19 -13
- tests/test_state.py +40 -27
- tests/test_summary.py +18 -14
- recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
- recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
- recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
- recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
- recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
- /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → E_HPXsXdrqHg2YEHmU3mK}/_buildManifest.js +0 -0
- /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → E_HPXsXdrqHg2YEHmU3mK}/_ssgManifest.js +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.4.0.20250514.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.4.0.20250514.dist-info}/licenses/LICENSE +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.4.0.20250514.dist-info}/top_level.txt +0 -0
recce/summary.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
|
-
from typing import List,
|
|
3
|
+
from typing import Dict, List, Optional, Set, Type, Union
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
8
|
from recce.apis.check_func import get_node_name_by_id
|
|
9
9
|
from recce.core import RecceContext
|
|
10
|
-
from recce.models import CheckDAO, RunDAO, RunType
|
|
10
|
+
from recce.models import CheckDAO, Run, RunDAO, RunType
|
|
11
11
|
from recce.tasks.core import TaskResultDiffer
|
|
12
12
|
from recce.tasks.histogram import HistogramDiffTaskResultDiffer
|
|
13
13
|
from recce.tasks.profile import ProfileDiffResultDiffer
|
|
@@ -15,13 +15,16 @@ from recce.tasks.query import QueryDiffResultDiffer
|
|
|
15
15
|
from recce.tasks.rowcount import RowCountDiffResultDiffer
|
|
16
16
|
from recce.tasks.schema import SchemaDiffResultDiffer
|
|
17
17
|
from recce.tasks.top_k import TopKDiffTaskResultDiffer
|
|
18
|
-
from recce.tasks.valuediff import
|
|
18
|
+
from recce.tasks.valuediff import (
|
|
19
|
+
ValueDiffDetailTaskResultDiffer,
|
|
20
|
+
ValueDiffTaskResultDiffer,
|
|
21
|
+
)
|
|
19
22
|
|
|
20
|
-
RECCE_CLOUD_HOST = os.environ.get(
|
|
23
|
+
RECCE_CLOUD_HOST = os.environ.get("RECCE_CLOUD_HOST", "https://cloud.datarecce.io")
|
|
21
24
|
|
|
22
|
-
ADD_COLOR =
|
|
23
|
-
MODIFIED_COLOR =
|
|
24
|
-
REMOVE_COLOR =
|
|
25
|
+
ADD_COLOR = "#1dce00"
|
|
26
|
+
MODIFIED_COLOR = "#ffa502"
|
|
27
|
+
REMOVE_COLOR = "#ff067e"
|
|
25
28
|
|
|
26
29
|
MAX_MERMAID_TEXT_SIZE = 50000 # source: https://mermaid.js.org/config/schema-docs/config.html#maxtextsize
|
|
27
30
|
|
|
@@ -42,44 +45,44 @@ class Node:
|
|
|
42
45
|
base_data: dict
|
|
43
46
|
current_data: dict
|
|
44
47
|
|
|
45
|
-
def __init__(self, node_id: str, node_data: dict, data_from: str =
|
|
48
|
+
def __init__(self, node_id: str, node_data: dict, data_from: str = "base"):
|
|
46
49
|
self.id = node_id
|
|
47
|
-
self.name = node_data[
|
|
50
|
+
self.name = node_data["name"]
|
|
48
51
|
self.data_from = data_from
|
|
49
|
-
self.resource_type = node_data[
|
|
50
|
-
self.package_name = node_data[
|
|
52
|
+
self.resource_type = node_data["resource_type"]
|
|
53
|
+
self.package_name = node_data["package_name"]
|
|
51
54
|
self.children = []
|
|
52
55
|
self.parents = []
|
|
53
56
|
|
|
54
57
|
self.base_data = {}
|
|
55
58
|
self.current_data = {}
|
|
56
59
|
|
|
57
|
-
if data_from ==
|
|
60
|
+
if data_from == "base":
|
|
58
61
|
self.base_data = node_data
|
|
59
|
-
elif data_from ==
|
|
62
|
+
elif data_from == "current":
|
|
60
63
|
self.current_data = node_data
|
|
61
64
|
|
|
62
65
|
@property
|
|
63
66
|
def change_status(self):
|
|
64
|
-
base_checksum = self.base_data.get(
|
|
65
|
-
curr_checksum = self.current_data.get(
|
|
66
|
-
if self.data_from ==
|
|
67
|
-
return
|
|
68
|
-
elif self.data_from ==
|
|
69
|
-
return
|
|
67
|
+
base_checksum = self.base_data.get("checksum", {}).get("checksum")
|
|
68
|
+
curr_checksum = self.current_data.get("checksum", {}).get("checksum")
|
|
69
|
+
if self.data_from == "base":
|
|
70
|
+
return "removed"
|
|
71
|
+
elif self.data_from == "current":
|
|
72
|
+
return "added"
|
|
70
73
|
elif base_checksum and curr_checksum and base_checksum != curr_checksum:
|
|
71
|
-
return
|
|
74
|
+
return "modified"
|
|
72
75
|
return None
|
|
73
76
|
|
|
74
77
|
def update_data(self, node_data: dict, data_from: str):
|
|
75
|
-
if data_from not in [
|
|
76
|
-
raise ValueError(f
|
|
78
|
+
if data_from not in ["base", "current"]:
|
|
79
|
+
raise ValueError(f"Invalid data_from value: {data_from}")
|
|
77
80
|
if self.data_from != data_from:
|
|
78
|
-
self.data_from =
|
|
81
|
+
self.data_from = "both"
|
|
79
82
|
|
|
80
|
-
if data_from ==
|
|
83
|
+
if data_from == "base":
|
|
81
84
|
self.base_data = node_data
|
|
82
|
-
elif data_from ==
|
|
85
|
+
elif data_from == "current":
|
|
83
86
|
self.current_data = node_data
|
|
84
87
|
|
|
85
88
|
def append_parent(self, parent_id: str):
|
|
@@ -93,8 +96,8 @@ class Node:
|
|
|
93
96
|
def _cal_row_count_delta_percentage(self):
|
|
94
97
|
row_count_diff, run_result = _get_node_row_count_diff(self.id, self.name)
|
|
95
98
|
if row_count_diff:
|
|
96
|
-
base = run_result.get(
|
|
97
|
-
current = run_result.get(
|
|
99
|
+
base = run_result.get("base", 0)
|
|
100
|
+
current = run_result.get("curr", 0)
|
|
98
101
|
if int(current) > int(base):
|
|
99
102
|
p = (int(current) - int(base)) / int(current) * 100
|
|
100
103
|
return f'🔼 +{round(p, 2) if p > 0.1 else "<0.1"}%'
|
|
@@ -104,25 +107,25 @@ class Node:
|
|
|
104
107
|
return None
|
|
105
108
|
|
|
106
109
|
def _get_schema_diff(self):
|
|
107
|
-
base_schema = self.base_data.get(
|
|
108
|
-
current_schema = self.current_data.get(
|
|
110
|
+
base_schema = self.base_data.get("columns", {})
|
|
111
|
+
current_schema = self.current_data.get("columns", {})
|
|
109
112
|
schema_diff = TaskResultDiffer.diff(base_schema, current_schema)
|
|
110
113
|
return schema_diff
|
|
111
114
|
|
|
112
115
|
def _what_changed(self, checks=None):
|
|
113
116
|
changes = []
|
|
114
|
-
if self.change_status ==
|
|
115
|
-
return [
|
|
116
|
-
elif self.change_status ==
|
|
117
|
-
return [
|
|
118
|
-
elif self.change_status ==
|
|
119
|
-
changes.append(
|
|
117
|
+
if self.change_status == "added":
|
|
118
|
+
return ["Added Node"]
|
|
119
|
+
elif self.change_status == "removed":
|
|
120
|
+
return ["Removed Node"]
|
|
121
|
+
elif self.change_status == "modified":
|
|
122
|
+
changes.append("Code")
|
|
120
123
|
row_count_delta_percentage = self._cal_row_count_delta_percentage()
|
|
121
124
|
if row_count_delta_percentage:
|
|
122
|
-
changes.append(f
|
|
125
|
+
changes.append(f"Row Count {row_count_delta_percentage}")
|
|
123
126
|
schema_diff = self._get_schema_diff()
|
|
124
127
|
if schema_diff:
|
|
125
|
-
changes.append(
|
|
128
|
+
changes.append("Schema")
|
|
126
129
|
|
|
127
130
|
if checks:
|
|
128
131
|
for check in checks:
|
|
@@ -131,7 +134,7 @@ class Node:
|
|
|
131
134
|
# Skip the row count and schema diff check, since we already have it.
|
|
132
135
|
continue
|
|
133
136
|
if check.node_ids and self.id in check.node_ids:
|
|
134
|
-
changes.append(str(check.type).replace(
|
|
137
|
+
changes.append(str(check.type).replace("_", " ").title())
|
|
135
138
|
return changes
|
|
136
139
|
|
|
137
140
|
def get_node_str(self, checks=None):
|
|
@@ -140,12 +143,12 @@ class Node:
|
|
|
140
143
|
|
|
141
144
|
if self.change_status is not None:
|
|
142
145
|
is_changed = True
|
|
143
|
-
if self.change_status ==
|
|
144
|
-
style = f
|
|
145
|
-
elif self.change_status ==
|
|
146
|
-
style = f
|
|
147
|
-
elif self.change_status ==
|
|
148
|
-
style = f
|
|
146
|
+
if self.change_status == "added":
|
|
147
|
+
style = f"style {self.id} stroke:{ADD_COLOR}"
|
|
148
|
+
elif self.change_status == "modified":
|
|
149
|
+
style = f"style {self.id} stroke:{MODIFIED_COLOR}"
|
|
150
|
+
elif self.change_status == "removed":
|
|
151
|
+
style = f"style {self.id} stroke:{REMOVE_COLOR}"
|
|
149
152
|
|
|
150
153
|
if checks:
|
|
151
154
|
for check in checks:
|
|
@@ -154,13 +157,13 @@ class Node:
|
|
|
154
157
|
|
|
155
158
|
content_output = f'{self.id}["{self.name}'
|
|
156
159
|
if is_changed:
|
|
157
|
-
content_output +=
|
|
160
|
+
content_output += "\n\n[What's Changed]\n"
|
|
158
161
|
changes = self._what_changed(checks)
|
|
159
|
-
content_output +=
|
|
162
|
+
content_output += ", ".join(changes)
|
|
160
163
|
|
|
161
164
|
content_output += '"]\n'
|
|
162
165
|
if style:
|
|
163
|
-
content_output += f
|
|
166
|
+
content_output += f"{style}\n"
|
|
164
167
|
return content_output
|
|
165
168
|
|
|
166
169
|
|
|
@@ -171,7 +174,7 @@ class Edge:
|
|
|
171
174
|
parent_id: str
|
|
172
175
|
change_status: Union[str, None]
|
|
173
176
|
|
|
174
|
-
def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str =
|
|
177
|
+
def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str = "base"):
|
|
175
178
|
self.id = edge_id
|
|
176
179
|
self.edge_from = edge_from
|
|
177
180
|
self.child_id = child_id
|
|
@@ -179,7 +182,7 @@ class Edge:
|
|
|
179
182
|
|
|
180
183
|
def update_edge_from(self, edge_from: str):
|
|
181
184
|
if self.edge_from != edge_from:
|
|
182
|
-
self.edge_from =
|
|
185
|
+
self.edge_from = "both"
|
|
183
186
|
|
|
184
187
|
|
|
185
188
|
class CheckSummary(BaseModel):
|
|
@@ -225,21 +228,21 @@ class LineageGraph:
|
|
|
225
228
|
edges: Dict[str, Edge] = {}
|
|
226
229
|
checks: List[CheckSummary] = None
|
|
227
230
|
|
|
228
|
-
def create_node(self, node_id: str, node_data: dict, data_from: str =
|
|
231
|
+
def create_node(self, node_id: str, node_data: dict, data_from: str = "base"):
|
|
229
232
|
if node_id not in self.nodes:
|
|
230
233
|
self.nodes[node_id] = Node(node_id, node_data, data_from)
|
|
231
234
|
else:
|
|
232
235
|
self.nodes[node_id].update_data(node_data, data_from)
|
|
233
236
|
|
|
234
|
-
def create_edge(self, parent_id: str, child_id: str, edge_from: str =
|
|
237
|
+
def create_edge(self, parent_id: str, child_id: str, edge_from: str = "base"):
|
|
235
238
|
if parent_id not in self.nodes:
|
|
236
|
-
_warn(f
|
|
239
|
+
_warn(f"Parent node {parent_id} not found in graph")
|
|
237
240
|
return
|
|
238
241
|
if child_id not in self.nodes:
|
|
239
|
-
_warn(f
|
|
242
|
+
_warn(f"Child node {child_id} not found in graph")
|
|
240
243
|
return
|
|
241
244
|
|
|
242
|
-
edge_id = f
|
|
245
|
+
edge_id = f"{parent_id}-->{child_id}"
|
|
243
246
|
if edge_id in self.edges:
|
|
244
247
|
self.edges[edge_id].update_edge_from(edge_from)
|
|
245
248
|
else:
|
|
@@ -250,67 +253,68 @@ class LineageGraph:
|
|
|
250
253
|
@property
|
|
251
254
|
def modified_set(self) -> Set[str]:
|
|
252
255
|
return set(
|
|
253
|
-
[node_id for node_id, node in self.nodes.items() if node.change_status in [
|
|
256
|
+
[node_id for node_id, node in self.nodes.items() if node.change_status in ["added", "removed", "modified"]]
|
|
257
|
+
)
|
|
254
258
|
|
|
255
259
|
def get_edge_str(self, edge_id):
|
|
256
260
|
edge = self.edges[edge_id]
|
|
257
261
|
child = self.nodes[edge.child_id]
|
|
258
262
|
|
|
259
|
-
if child.change_status ==
|
|
260
|
-
return f
|
|
261
|
-
if child.change_status is None or child.change_status ==
|
|
262
|
-
return f
|
|
263
|
-
if child.change_status ==
|
|
264
|
-
return f
|
|
263
|
+
if child.change_status == "removed":
|
|
264
|
+
return f"{edge.parent_id}-.->{edge.child_id}\n"
|
|
265
|
+
if child.change_status is None or child.change_status == "modified":
|
|
266
|
+
return f"{edge.parent_id}---->{edge.child_id}\n"
|
|
267
|
+
if child.change_status == "added":
|
|
268
|
+
return f"{edge.parent_id}-...->{edge.child_id}\n"
|
|
265
269
|
|
|
266
270
|
|
|
267
271
|
def _build_lineage_graph(base, current) -> LineageGraph:
|
|
268
272
|
graph = LineageGraph()
|
|
269
273
|
|
|
270
274
|
# Init Graph nodes with base & current nodes
|
|
271
|
-
for node_id, node_data in base.get(
|
|
272
|
-
graph.create_node(node_id, node_data,
|
|
275
|
+
for node_id, node_data in base.get("nodes", {}).items():
|
|
276
|
+
graph.create_node(node_id, node_data, "base")
|
|
273
277
|
|
|
274
|
-
for node_id, node_data in current.get(
|
|
278
|
+
for node_id, node_data in current.get("nodes", {}).items():
|
|
275
279
|
if node_id not in graph.nodes:
|
|
276
|
-
node = Node(node_id, node_data,
|
|
280
|
+
node = Node(node_id, node_data, "current")
|
|
277
281
|
graph.nodes[node_id] = node
|
|
278
282
|
else:
|
|
279
283
|
node = graph.nodes[node_id]
|
|
280
|
-
node.update_data(node_data,
|
|
284
|
+
node.update_data(node_data, "current")
|
|
281
285
|
|
|
282
286
|
# Build edges
|
|
283
|
-
for child_id, parents in base.get(
|
|
287
|
+
for child_id, parents in base.get("parent_map", {}).items():
|
|
284
288
|
for parent_id in parents:
|
|
285
|
-
graph.create_edge(parent_id, child_id,
|
|
286
|
-
for child_id, parents in current.get(
|
|
289
|
+
graph.create_edge(parent_id, child_id, "base")
|
|
290
|
+
for child_id, parents in current.get("parent_map", {}).items():
|
|
287
291
|
for parent_id in parents:
|
|
288
|
-
graph.create_edge(parent_id, child_id,
|
|
292
|
+
graph.create_edge(parent_id, child_id, "current")
|
|
289
293
|
|
|
290
294
|
return graph
|
|
291
295
|
|
|
292
296
|
|
|
293
297
|
def _build_node_schema(lineage, node_id):
|
|
294
|
-
return lineage.get(
|
|
298
|
+
return lineage.get("nodes", {}).get(node_id, {}).get("columns", {})
|
|
295
299
|
|
|
296
300
|
|
|
297
301
|
def _get_node_row_count_diff(node_id, node_name):
|
|
298
302
|
row_count_runs = RunDAO().list(type_filter=RunType.ROW_COUNT_DIFF)
|
|
299
303
|
for run in row_count_runs:
|
|
300
|
-
if node_id in run.params.get(
|
|
304
|
+
if node_id in run.params.get("node_ids", []):
|
|
301
305
|
result = run.result.get(node_name, {})
|
|
302
|
-
diff = TaskResultDiffer.diff(result.get(
|
|
306
|
+
diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
|
|
303
307
|
return diff, result
|
|
304
|
-
elif run.params.get(
|
|
308
|
+
elif run.params.get("node_id") == node_id:
|
|
305
309
|
result = run.result.get(node_name, {})
|
|
306
|
-
diff = TaskResultDiffer.diff(result.get(
|
|
310
|
+
diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
|
|
307
311
|
return diff, result
|
|
308
312
|
return None, None
|
|
309
313
|
|
|
310
314
|
|
|
311
315
|
def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> str:
|
|
312
316
|
if not check.related_nodes:
|
|
313
|
-
return
|
|
317
|
+
return "N/A"
|
|
314
318
|
|
|
315
319
|
nodes = check.related_nodes
|
|
316
320
|
if check.changed_nodes:
|
|
@@ -319,39 +323,43 @@ def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> s
|
|
|
319
323
|
nodes = check.changed_nodes
|
|
320
324
|
|
|
321
325
|
if len(nodes) <= limit:
|
|
322
|
-
return
|
|
326
|
+
return ", ".join(nodes)
|
|
323
327
|
|
|
324
|
-
display_nodes = nodes[:limit - 1]
|
|
325
|
-
return
|
|
328
|
+
display_nodes = nodes[: limit - 1]
|
|
329
|
+
return ", ".join(display_nodes) + f", and {len(nodes) - len(display_nodes)} more nodes"
|
|
326
330
|
|
|
327
331
|
|
|
328
332
|
def generate_summary_metadata(base_lineage, curr_lineage):
|
|
329
333
|
from py_markdown_table.markdown_table import markdown_table
|
|
330
334
|
|
|
331
|
-
base_manifest = base_lineage.get(
|
|
332
|
-
base_catalog = base_lineage.get(
|
|
333
|
-
curr_manifest = curr_lineage.get(
|
|
334
|
-
curr_catalog = curr_lineage.get(
|
|
335
|
+
base_manifest = base_lineage.get("manifest_metadata")
|
|
336
|
+
base_catalog = base_lineage.get("catalog_metadata")
|
|
337
|
+
curr_manifest = curr_lineage.get("manifest_metadata")
|
|
338
|
+
curr_catalog = curr_lineage.get("catalog_metadata")
|
|
335
339
|
|
|
336
340
|
metadata = [
|
|
337
341
|
{
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
342
|
+
"": "Base",
|
|
343
|
+
"Manifest": base_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
|
|
344
|
+
"Catalog": base_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if base_catalog else "N/A",
|
|
341
345
|
},
|
|
342
346
|
{
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
}
|
|
347
|
+
"": "Current",
|
|
348
|
+
"Manifest": curr_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
|
|
349
|
+
"Catalog": curr_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if curr_catalog else "N/A",
|
|
350
|
+
},
|
|
347
351
|
]
|
|
348
352
|
|
|
349
|
-
return
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
353
|
+
return (
|
|
354
|
+
markdown_table(metadata)
|
|
355
|
+
.set_params(
|
|
356
|
+
quote=False,
|
|
357
|
+
row_sep="markdown",
|
|
358
|
+
padding_width=1,
|
|
359
|
+
padding_weight="right", # Aligns the cell's contents to the beginning of the cell
|
|
360
|
+
)
|
|
361
|
+
.get_markdown()
|
|
362
|
+
)
|
|
355
363
|
|
|
356
364
|
|
|
357
365
|
def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], Dict[str, int]):
|
|
@@ -376,9 +384,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
|
|
|
376
384
|
continue
|
|
377
385
|
elif check.type == RunType.SCHEMA_DIFF:
|
|
378
386
|
differ = SchemaDiffResultDiffer(check, base_lineage, curr_lineage)
|
|
379
|
-
elif (
|
|
380
|
-
|
|
381
|
-
|
|
387
|
+
elif (
|
|
388
|
+
check.type
|
|
389
|
+
in [
|
|
390
|
+
RunType.ROW_COUNT_DIFF,
|
|
391
|
+
RunType.QUERY_DIFF,
|
|
392
|
+
RunType.VALUE_DIFF,
|
|
393
|
+
RunType.VALUE_DIFF_DETAIL,
|
|
394
|
+
RunType.PROFILE_DIFF,
|
|
395
|
+
RunType.TOP_K_DIFF,
|
|
396
|
+
RunType.HISTOGRAM_DIFF,
|
|
397
|
+
]
|
|
398
|
+
and run is not None
|
|
399
|
+
):
|
|
382
400
|
# Check the result is changed or not
|
|
383
401
|
differ = differ_factory(run)
|
|
384
402
|
|
|
@@ -391,19 +409,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
|
|
|
391
409
|
description=check.description,
|
|
392
410
|
changes=differ.changes,
|
|
393
411
|
node_ids=differ.related_node_ids,
|
|
394
|
-
changed_nodes=differ.changed_nodes
|
|
412
|
+
changed_nodes=differ.changed_nodes,
|
|
395
413
|
)
|
|
396
414
|
)
|
|
397
415
|
|
|
398
416
|
return checks_summary, {
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
417
|
+
"total": len(checks),
|
|
418
|
+
"mismatch": len(checks_summary),
|
|
419
|
+
"failed": failed_checks_count,
|
|
402
420
|
}
|
|
403
421
|
|
|
404
422
|
|
|
405
423
|
def generate_mermaid_lineage_graph(graph: LineageGraph):
|
|
406
|
-
content = up_to_level_content =
|
|
424
|
+
content = up_to_level_content = "graph LR\n"
|
|
407
425
|
is_not_modified = False
|
|
408
426
|
# Only show the modified nodes and there children
|
|
409
427
|
queue = list(graph.modified_set)
|
|
@@ -427,7 +445,7 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
|
|
|
427
445
|
content += node.get_node_str(graph.checks)
|
|
428
446
|
for child_id in node.children:
|
|
429
447
|
queue.append(child_id)
|
|
430
|
-
edge_id = f
|
|
448
|
+
edge_id = f"{node_id}-->{child_id}"
|
|
431
449
|
if edge_id not in display_edge:
|
|
432
450
|
display_edge.add(edge_id)
|
|
433
451
|
content += graph.get_edge_str(edge_id)
|
|
@@ -440,7 +458,7 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
|
|
|
440
458
|
return up_to_level_content, is_not_modified, len(content) > MAX_MERMAID_TEXT_SIZE
|
|
441
459
|
|
|
442
460
|
|
|
443
|
-
def generate_markdown_summary(ctx: RecceContext, summary_format: str =
|
|
461
|
+
def generate_markdown_summary(ctx: RecceContext, summary_format: str = "markdown"):
|
|
444
462
|
lineage_diff = ctx.get_lineage_diff()
|
|
445
463
|
summary_metadata = generate_summary_metadata(lineage_diff.base, lineage_diff.current)
|
|
446
464
|
graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current)
|
|
@@ -448,81 +466,88 @@ def generate_markdown_summary(ctx: RecceContext, summary_format: str = 'markdown
|
|
|
448
466
|
mermaid_content, is_empty_graph, is_partial_graph = generate_mermaid_lineage_graph(graph)
|
|
449
467
|
check_content = generate_check_content(graph, check_statistics)
|
|
450
468
|
|
|
451
|
-
if summary_format ==
|
|
469
|
+
if summary_format == "mermaid":
|
|
452
470
|
return mermaid_content
|
|
453
|
-
elif summary_format ==
|
|
471
|
+
elif summary_format == "check":
|
|
454
472
|
return check_content
|
|
455
|
-
elif summary_format ==
|
|
473
|
+
elif summary_format == "markdown":
|
|
456
474
|
|
|
457
|
-
content =
|
|
458
|
-
content += f
|
|
475
|
+
content = "# Recce Summary\n"
|
|
476
|
+
content += f"## Manifest Information\n{summary_metadata}\n"
|
|
459
477
|
|
|
460
478
|
if is_empty_graph is False:
|
|
461
|
-
content += f
|
|
479
|
+
content += f"""
|
|
462
480
|
## Lineage Graph
|
|
463
481
|
{"_Too many nodes to generate! Please see the full lineage graph on Recce instance._" if is_partial_graph else ''}
|
|
464
482
|
```mermaid
|
|
465
483
|
{mermaid_content}
|
|
466
484
|
```
|
|
467
|
-
|
|
485
|
+
"""
|
|
468
486
|
else:
|
|
469
|
-
content +=
|
|
487
|
+
content += """
|
|
470
488
|
## Lineage Graph
|
|
471
489
|
No changed module was detected.
|
|
472
|
-
|
|
490
|
+
"""
|
|
473
491
|
if check_content:
|
|
474
492
|
content += check_content
|
|
475
493
|
|
|
476
494
|
if ctx.state_loader.cloud_mode:
|
|
477
495
|
pr_info = ctx.state_loader.pr_info
|
|
478
|
-
content += f
|
|
496
|
+
content += f"\nSee PR page: {RECCE_CLOUD_HOST}/{pr_info.repository}/pulls/{pr_info.id}\n"
|
|
479
497
|
|
|
480
498
|
return content
|
|
481
499
|
|
|
482
500
|
|
|
483
501
|
def generate_check_content(graph, check_statistics):
|
|
484
502
|
from py_markdown_table.markdown_table import markdown_table
|
|
485
|
-
|
|
503
|
+
|
|
504
|
+
content = ""
|
|
486
505
|
check_content = None
|
|
487
506
|
# Generate the check summary if we found any changes
|
|
488
507
|
if len(graph.checks) > 0:
|
|
489
508
|
data = []
|
|
490
509
|
for check in graph.checks:
|
|
491
|
-
data.append(
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
510
|
+
data.append(
|
|
511
|
+
{
|
|
512
|
+
"Name": check.name,
|
|
513
|
+
"Type": str(check.type).replace("_", " ").title(),
|
|
514
|
+
"Mismatched Nodes": _generate_mismatched_nodes_summary(check),
|
|
515
|
+
# Temporarily remove the type of changes, until we implement a better way to display it.
|
|
516
|
+
# 'Type of Changes': _formate_changes(check.changes)
|
|
517
|
+
}
|
|
518
|
+
)
|
|
519
|
+
check_content = (
|
|
520
|
+
markdown_table(data)
|
|
521
|
+
.set_params(
|
|
522
|
+
quote=False,
|
|
523
|
+
row_sep="markdown",
|
|
524
|
+
padding_width=1,
|
|
525
|
+
padding_weight="right", # Aligns the cell's contents to the beginning of the cell
|
|
526
|
+
)
|
|
527
|
+
.get_markdown()
|
|
528
|
+
)
|
|
504
529
|
|
|
505
|
-
if check_statistics.get(
|
|
506
|
-
warning_message =
|
|
530
|
+
if check_statistics.get("total", 0) > 0:
|
|
531
|
+
warning_message = ""
|
|
507
532
|
statistics = {
|
|
508
|
-
|
|
509
|
-
|
|
533
|
+
"Checks Run": check_statistics.get("total", 0),
|
|
534
|
+
"Data Mismatch Detected": check_statistics.get("mismatch", 0),
|
|
510
535
|
}
|
|
511
|
-
if check_statistics.get(
|
|
512
|
-
statistics[
|
|
513
|
-
warning_message =
|
|
536
|
+
if check_statistics.get("failed", 0) > 0:
|
|
537
|
+
statistics["Incomplete Checks"] = check_statistics.get("failed", 0)
|
|
538
|
+
warning_message = """
|
|
514
539
|
:warning: **Incomplete Checks** refers to checks that did not successfully run due to configuration or SQL errors.
|
|
515
540
|
Please check the output of `recce run` for more information
|
|
516
|
-
|
|
517
|
-
check_summary = markdown_table([statistics]).set_params(quote=False, row_sep=
|
|
518
|
-
content += f
|
|
541
|
+
"""
|
|
542
|
+
check_summary = markdown_table([statistics]).set_params(quote=False, row_sep="markdown").get_markdown()
|
|
543
|
+
content += f"""
|
|
519
544
|
## Checks Summary
|
|
520
545
|
{check_summary}
|
|
521
546
|
{warning_message}
|
|
522
|
-
|
|
547
|
+
"""
|
|
523
548
|
if check_content:
|
|
524
|
-
content += f
|
|
549
|
+
content += f"""
|
|
525
550
|
### Checks of Data Mismatch Detected
|
|
526
551
|
{check_content}
|
|
527
|
-
|
|
552
|
+
"""
|
|
528
553
|
return content
|
recce/tasks/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .core import Task
|
|
2
2
|
from .histogram import HistogramDiffTask
|
|
3
3
|
from .profile import ProfileDiffTask, ProfileTask
|
|
4
|
-
from .query import
|
|
5
|
-
from .rowcount import
|
|
4
|
+
from .query import QueryBaseTask, QueryDiffTask, QueryTask
|
|
5
|
+
from .rowcount import RowCountDiffTask, RowCountTask
|
|
6
6
|
from .top_k import TopKDiffTask
|
|
7
|
-
from .valuediff import
|
|
7
|
+
from .valuediff import ValueDiffDetailTask, ValueDiffTask
|
recce/tasks/core.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import List,
|
|
2
|
+
from typing import List, Literal, Optional, Union
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
6
|
from recce.core import default_context
|
|
7
7
|
from recce.exceptions import RecceCancelException
|
|
8
|
-
from recce.models import
|
|
8
|
+
from recce.models import Check, Run
|
|
9
9
|
from recce.util.pydantic_model import pydantic_model_dump
|
|
10
10
|
|
|
11
11
|
|
|
@@ -63,6 +63,7 @@ class TaskResultDiffer(ABC):
|
|
|
63
63
|
@staticmethod
|
|
64
64
|
def diff(base, current):
|
|
65
65
|
from deepdiff import DeepDiff
|
|
66
|
+
|
|
66
67
|
diff = DeepDiff(base, current, ignore_order=True)
|
|
67
68
|
return diff if diff else None
|
|
68
69
|
|
|
@@ -76,15 +77,12 @@ class TaskResultDiffer(ABC):
|
|
|
76
77
|
select: Optional[str] = None,
|
|
77
78
|
exclude: Optional[str] = None,
|
|
78
79
|
packages: Optional[list[str]] = None,
|
|
79
|
-
view_mode: Optional[Literal[
|
|
80
|
+
view_mode: Optional[Literal["all", "changed_models"]] = None,
|
|
80
81
|
) -> List[str]:
|
|
81
82
|
nodes = default_context().adapter.select_nodes(
|
|
82
|
-
select=select,
|
|
83
|
-
exclude=exclude,
|
|
84
|
-
packages=packages,
|
|
85
|
-
view_mode=view_mode
|
|
83
|
+
select=select, exclude=exclude, packages=packages, view_mode=view_mode
|
|
86
84
|
)
|
|
87
|
-
return [node for node in nodes if not node.startswith(
|
|
85
|
+
return [node for node in nodes if not node.startswith("test.")]
|
|
88
86
|
|
|
89
87
|
@abstractmethod
|
|
90
88
|
def _check_result_changed_fn(self, result):
|
|
@@ -100,10 +98,10 @@ class TaskResultDiffer(ABC):
|
|
|
100
98
|
Should be implemented by subclass.
|
|
101
99
|
"""
|
|
102
100
|
params = self.run.params
|
|
103
|
-
if params.get(
|
|
104
|
-
return [TaskResultDiffer.get_node_id_by_name(params.get(
|
|
105
|
-
elif params.get(
|
|
106
|
-
names = params.get(
|
|
101
|
+
if params.get("model"):
|
|
102
|
+
return [TaskResultDiffer.get_node_id_by_name(params.get("model"))]
|
|
103
|
+
elif params.get("node_names"):
|
|
104
|
+
names = params.get("node_names", [])
|
|
107
105
|
return [TaskResultDiffer.get_node_id_by_name(name) for name in names]
|
|
108
106
|
else:
|
|
109
107
|
# No related node ids in the params
|
|
@@ -125,7 +123,7 @@ class CheckValidator:
|
|
|
125
123
|
try:
|
|
126
124
|
check = Check(**check)
|
|
127
125
|
except Exception as e:
|
|
128
|
-
raise ValueError(f
|
|
126
|
+
raise ValueError(f"Invalid check format. {str(e)}")
|
|
129
127
|
|
|
130
128
|
self.validate_check(check)
|
|
131
129
|
|