recce-nightly 1.3.0.20250507__py3-none-any.whl → 1.4.0.20250515__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (93) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +22 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +355 -316
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +39 -28
  8. recce/apis/check_func.py +33 -27
  9. recce/apis/run_api.py +25 -19
  10. recce/apis/run_func.py +29 -23
  11. recce/artifact.py +44 -49
  12. recce/cli.py +484 -285
  13. recce/config.py +42 -33
  14. recce/core.py +52 -44
  15. recce/data/404.html +1 -1
  16. recce/data/_next/static/chunks/{368-7587b306577df275.js → 778-aef312bffb4c0312.js} +15 -15
  17. recce/data/_next/static/chunks/8d700b6a.ed11a130057c7a47.js +1 -0
  18. recce/data/_next/static/chunks/app/layout-c713a2829d3279e4.js +1 -0
  19. recce/data/_next/static/chunks/app/page-7086764277331fcb.js +1 -0
  20. recce/data/_next/static/chunks/{cd9f8d63-cf0d5a7b0f7a92e8.js → cd9f8d63-e020f408095ed77c.js} +3 -3
  21. recce/data/_next/static/chunks/webpack-b787cb1a4f2293de.js +1 -0
  22. recce/data/_next/static/css/88b8abc134cfd59a.css +3 -0
  23. recce/data/index.html +2 -2
  24. recce/data/index.txt +2 -2
  25. recce/diff.py +6 -12
  26. recce/event/__init__.py +74 -72
  27. recce/event/collector.py +27 -20
  28. recce/event/track.py +39 -27
  29. recce/exceptions.py +1 -1
  30. recce/git.py +7 -7
  31. recce/github.py +57 -53
  32. recce/models/__init__.py +1 -1
  33. recce/models/check.py +6 -7
  34. recce/models/run.py +1 -0
  35. recce/models/types.py +27 -27
  36. recce/pull_request.py +26 -24
  37. recce/run.py +148 -111
  38. recce/server.py +103 -89
  39. recce/state.py +209 -177
  40. recce/summary.py +168 -143
  41. recce/tasks/__init__.py +3 -3
  42. recce/tasks/core.py +11 -13
  43. recce/tasks/dataframe.py +19 -17
  44. recce/tasks/histogram.py +69 -34
  45. recce/tasks/lineage.py +2 -2
  46. recce/tasks/profile.py +147 -86
  47. recce/tasks/query.py +139 -87
  48. recce/tasks/rowcount.py +33 -30
  49. recce/tasks/schema.py +14 -14
  50. recce/tasks/top_k.py +35 -35
  51. recce/tasks/valuediff.py +216 -152
  52. recce/util/breaking.py +77 -84
  53. recce/util/cll.py +55 -51
  54. recce/util/io.py +19 -17
  55. recce/util/logger.py +1 -1
  56. recce/util/recce_cloud.py +70 -72
  57. recce/util/singleton.py +4 -4
  58. recce/yaml/__init__.py +7 -10
  59. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/METADATA +5 -2
  60. recce_nightly-1.4.0.20250515.dist-info/RECORD +143 -0
  61. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/WHEEL +1 -1
  62. tests/adapter/dbt_adapter/conftest.py +1 -0
  63. tests/adapter/dbt_adapter/dbt_test_helper.py +28 -18
  64. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
  65. tests/adapter/dbt_adapter/test_dbt_cll.py +39 -32
  66. tests/adapter/dbt_adapter/test_selector.py +22 -21
  67. tests/tasks/test_histogram.py +58 -66
  68. tests/tasks/test_lineage.py +36 -23
  69. tests/tasks/test_preset_checks.py +45 -31
  70. tests/tasks/test_profile.py +340 -15
  71. tests/tasks/test_query.py +40 -40
  72. tests/tasks/test_row_count.py +65 -46
  73. tests/tasks/test_schema.py +65 -42
  74. tests/tasks/test_top_k.py +22 -18
  75. tests/tasks/test_valuediff.py +43 -32
  76. tests/test_cli.py +71 -58
  77. tests/test_config.py +7 -9
  78. tests/test_core.py +5 -3
  79. tests/test_dbt.py +7 -7
  80. tests/test_pull_request.py +1 -1
  81. tests/test_server.py +19 -13
  82. tests/test_state.py +40 -27
  83. tests/test_summary.py +18 -14
  84. recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
  85. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  86. recce/data/_next/static/chunks/app/page-92f13c8fad9fae3d.js +0 -1
  87. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  88. recce_nightly-1.3.0.20250507.dist-info/RECORD +0 -142
  89. /recce/data/_next/static/{K5iKlCYhdcpq8Ea6ck9J_ → q0Xsc9Sd6PDuo1lshYpLu}/_buildManifest.js +0 -0
  90. /recce/data/_next/static/{K5iKlCYhdcpq8Ea6ck9J_ → q0Xsc9Sd6PDuo1lshYpLu}/_ssgManifest.js +0 -0
  91. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/entry_points.txt +0 -0
  92. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/licenses/LICENSE +0 -0
  93. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/top_level.txt +0 -0
recce/summary.py CHANGED
@@ -1,13 +1,13 @@
1
1
  import os
2
2
  import sys
3
- from typing import List, Dict, Set, Union, Type, Optional
3
+ from typing import Dict, List, Optional, Set, Type, Union
4
4
  from uuid import UUID
5
5
 
6
6
  from pydantic import BaseModel
7
7
 
8
8
  from recce.apis.check_func import get_node_name_by_id
9
9
  from recce.core import RecceContext
10
- from recce.models import CheckDAO, RunDAO, RunType, Run
10
+ from recce.models import CheckDAO, Run, RunDAO, RunType
11
11
  from recce.tasks.core import TaskResultDiffer
12
12
  from recce.tasks.histogram import HistogramDiffTaskResultDiffer
13
13
  from recce.tasks.profile import ProfileDiffResultDiffer
@@ -15,13 +15,16 @@ from recce.tasks.query import QueryDiffResultDiffer
15
15
  from recce.tasks.rowcount import RowCountDiffResultDiffer
16
16
  from recce.tasks.schema import SchemaDiffResultDiffer
17
17
  from recce.tasks.top_k import TopKDiffTaskResultDiffer
18
- from recce.tasks.valuediff import ValueDiffTaskResultDiffer, ValueDiffDetailTaskResultDiffer
18
+ from recce.tasks.valuediff import (
19
+ ValueDiffDetailTaskResultDiffer,
20
+ ValueDiffTaskResultDiffer,
21
+ )
19
22
 
20
- RECCE_CLOUD_HOST = os.environ.get('RECCE_CLOUD_HOST', 'https://cloud.datarecce.io')
23
+ RECCE_CLOUD_HOST = os.environ.get("RECCE_CLOUD_HOST", "https://cloud.datarecce.io")
21
24
 
22
- ADD_COLOR = '#1dce00'
23
- MODIFIED_COLOR = '#ffa502'
24
- REMOVE_COLOR = '#ff067e'
25
+ ADD_COLOR = "#1dce00"
26
+ MODIFIED_COLOR = "#ffa502"
27
+ REMOVE_COLOR = "#ff067e"
25
28
 
26
29
  MAX_MERMAID_TEXT_SIZE = 50000 # source: https://mermaid.js.org/config/schema-docs/config.html#maxtextsize
27
30
 
@@ -42,44 +45,44 @@ class Node:
42
45
  base_data: dict
43
46
  current_data: dict
44
47
 
45
- def __init__(self, node_id: str, node_data: dict, data_from: str = 'base'):
48
+ def __init__(self, node_id: str, node_data: dict, data_from: str = "base"):
46
49
  self.id = node_id
47
- self.name = node_data['name']
50
+ self.name = node_data["name"]
48
51
  self.data_from = data_from
49
- self.resource_type = node_data['resource_type']
50
- self.package_name = node_data['package_name']
52
+ self.resource_type = node_data["resource_type"]
53
+ self.package_name = node_data["package_name"]
51
54
  self.children = []
52
55
  self.parents = []
53
56
 
54
57
  self.base_data = {}
55
58
  self.current_data = {}
56
59
 
57
- if data_from == 'base':
60
+ if data_from == "base":
58
61
  self.base_data = node_data
59
- elif data_from == 'current':
62
+ elif data_from == "current":
60
63
  self.current_data = node_data
61
64
 
62
65
  @property
63
66
  def change_status(self):
64
- base_checksum = self.base_data.get('checksum', {}).get('checksum')
65
- curr_checksum = self.current_data.get('checksum', {}).get('checksum')
66
- if self.data_from == 'base':
67
- return 'removed'
68
- elif self.data_from == 'current':
69
- return 'added'
67
+ base_checksum = self.base_data.get("checksum", {}).get("checksum")
68
+ curr_checksum = self.current_data.get("checksum", {}).get("checksum")
69
+ if self.data_from == "base":
70
+ return "removed"
71
+ elif self.data_from == "current":
72
+ return "added"
70
73
  elif base_checksum and curr_checksum and base_checksum != curr_checksum:
71
- return 'modified'
74
+ return "modified"
72
75
  return None
73
76
 
74
77
  def update_data(self, node_data: dict, data_from: str):
75
- if data_from not in ['base', 'current']:
76
- raise ValueError(f'Invalid data_from value: {data_from}')
78
+ if data_from not in ["base", "current"]:
79
+ raise ValueError(f"Invalid data_from value: {data_from}")
77
80
  if self.data_from != data_from:
78
- self.data_from = 'both'
81
+ self.data_from = "both"
79
82
 
80
- if data_from == 'base':
83
+ if data_from == "base":
81
84
  self.base_data = node_data
82
- elif data_from == 'current':
85
+ elif data_from == "current":
83
86
  self.current_data = node_data
84
87
 
85
88
  def append_parent(self, parent_id: str):
@@ -93,8 +96,8 @@ class Node:
93
96
  def _cal_row_count_delta_percentage(self):
94
97
  row_count_diff, run_result = _get_node_row_count_diff(self.id, self.name)
95
98
  if row_count_diff:
96
- base = run_result.get('base', 0)
97
- current = run_result.get('curr', 0)
99
+ base = run_result.get("base", 0)
100
+ current = run_result.get("curr", 0)
98
101
  if int(current) > int(base):
99
102
  p = (int(current) - int(base)) / int(current) * 100
100
103
  return f'🔼 +{round(p, 2) if p > 0.1 else "<0.1"}%'
@@ -104,25 +107,25 @@ class Node:
104
107
  return None
105
108
 
106
109
  def _get_schema_diff(self):
107
- base_schema = self.base_data.get('columns', {})
108
- current_schema = self.current_data.get('columns', {})
110
+ base_schema = self.base_data.get("columns", {})
111
+ current_schema = self.current_data.get("columns", {})
109
112
  schema_diff = TaskResultDiffer.diff(base_schema, current_schema)
110
113
  return schema_diff
111
114
 
112
115
  def _what_changed(self, checks=None):
113
116
  changes = []
114
- if self.change_status == 'added':
115
- return ['Added Node']
116
- elif self.change_status == 'removed':
117
- return ['Removed Node']
118
- elif self.change_status == 'modified':
119
- changes.append('Code')
117
+ if self.change_status == "added":
118
+ return ["Added Node"]
119
+ elif self.change_status == "removed":
120
+ return ["Removed Node"]
121
+ elif self.change_status == "modified":
122
+ changes.append("Code")
120
123
  row_count_delta_percentage = self._cal_row_count_delta_percentage()
121
124
  if row_count_delta_percentage:
122
- changes.append(f'Row Count {row_count_delta_percentage}')
125
+ changes.append(f"Row Count {row_count_delta_percentage}")
123
126
  schema_diff = self._get_schema_diff()
124
127
  if schema_diff:
125
- changes.append('Schema')
128
+ changes.append("Schema")
126
129
 
127
130
  if checks:
128
131
  for check in checks:
@@ -131,7 +134,7 @@ class Node:
131
134
  # Skip the row count and schema diff check, since we already have it.
132
135
  continue
133
136
  if check.node_ids and self.id in check.node_ids:
134
- changes.append(str(check.type).replace('_', ' ').title())
137
+ changes.append(str(check.type).replace("_", " ").title())
135
138
  return changes
136
139
 
137
140
  def get_node_str(self, checks=None):
@@ -140,12 +143,12 @@ class Node:
140
143
 
141
144
  if self.change_status is not None:
142
145
  is_changed = True
143
- if self.change_status == 'added':
144
- style = f'style {self.id} stroke:{ADD_COLOR}'
145
- elif self.change_status == 'modified':
146
- style = f'style {self.id} stroke:{MODIFIED_COLOR}'
147
- elif self.change_status == 'removed':
148
- style = f'style {self.id} stroke:{REMOVE_COLOR}'
146
+ if self.change_status == "added":
147
+ style = f"style {self.id} stroke:{ADD_COLOR}"
148
+ elif self.change_status == "modified":
149
+ style = f"style {self.id} stroke:{MODIFIED_COLOR}"
150
+ elif self.change_status == "removed":
151
+ style = f"style {self.id} stroke:{REMOVE_COLOR}"
149
152
 
150
153
  if checks:
151
154
  for check in checks:
@@ -154,13 +157,13 @@ class Node:
154
157
 
155
158
  content_output = f'{self.id}["{self.name}'
156
159
  if is_changed:
157
- content_output += '\n\n[What\'s Changed]\n'
160
+ content_output += "\n\n[What's Changed]\n"
158
161
  changes = self._what_changed(checks)
159
- content_output += ', '.join(changes)
162
+ content_output += ", ".join(changes)
160
163
 
161
164
  content_output += '"]\n'
162
165
  if style:
163
- content_output += f'{style}\n'
166
+ content_output += f"{style}\n"
164
167
  return content_output
165
168
 
166
169
 
@@ -171,7 +174,7 @@ class Edge:
171
174
  parent_id: str
172
175
  change_status: Union[str, None]
173
176
 
174
- def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str = 'base'):
177
+ def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str = "base"):
175
178
  self.id = edge_id
176
179
  self.edge_from = edge_from
177
180
  self.child_id = child_id
@@ -179,7 +182,7 @@ class Edge:
179
182
 
180
183
  def update_edge_from(self, edge_from: str):
181
184
  if self.edge_from != edge_from:
182
- self.edge_from = 'both'
185
+ self.edge_from = "both"
183
186
 
184
187
 
185
188
  class CheckSummary(BaseModel):
@@ -225,21 +228,21 @@ class LineageGraph:
225
228
  edges: Dict[str, Edge] = {}
226
229
  checks: List[CheckSummary] = None
227
230
 
228
- def create_node(self, node_id: str, node_data: dict, data_from: str = 'base'):
231
+ def create_node(self, node_id: str, node_data: dict, data_from: str = "base"):
229
232
  if node_id not in self.nodes:
230
233
  self.nodes[node_id] = Node(node_id, node_data, data_from)
231
234
  else:
232
235
  self.nodes[node_id].update_data(node_data, data_from)
233
236
 
234
- def create_edge(self, parent_id: str, child_id: str, edge_from: str = 'base'):
237
+ def create_edge(self, parent_id: str, child_id: str, edge_from: str = "base"):
235
238
  if parent_id not in self.nodes:
236
- _warn(f'Parent node {parent_id} not found in graph')
239
+ _warn(f"Parent node {parent_id} not found in graph")
237
240
  return
238
241
  if child_id not in self.nodes:
239
- _warn(f'Child node {child_id} not found in graph')
242
+ _warn(f"Child node {child_id} not found in graph")
240
243
  return
241
244
 
242
- edge_id = f'{parent_id}-->{child_id}'
245
+ edge_id = f"{parent_id}-->{child_id}"
243
246
  if edge_id in self.edges:
244
247
  self.edges[edge_id].update_edge_from(edge_from)
245
248
  else:
@@ -250,67 +253,68 @@ class LineageGraph:
250
253
  @property
251
254
  def modified_set(self) -> Set[str]:
252
255
  return set(
253
- [node_id for node_id, node in self.nodes.items() if node.change_status in ['added', 'removed', 'modified']])
256
+ [node_id for node_id, node in self.nodes.items() if node.change_status in ["added", "removed", "modified"]]
257
+ )
254
258
 
255
259
  def get_edge_str(self, edge_id):
256
260
  edge = self.edges[edge_id]
257
261
  child = self.nodes[edge.child_id]
258
262
 
259
- if child.change_status == 'removed':
260
- return f'{edge.parent_id}-.->{edge.child_id}\n'
261
- if child.change_status is None or child.change_status == 'modified':
262
- return f'{edge.parent_id}---->{edge.child_id}\n'
263
- if child.change_status == 'added':
264
- return f'{edge.parent_id}-...->{edge.child_id}\n'
263
+ if child.change_status == "removed":
264
+ return f"{edge.parent_id}-.->{edge.child_id}\n"
265
+ if child.change_status is None or child.change_status == "modified":
266
+ return f"{edge.parent_id}---->{edge.child_id}\n"
267
+ if child.change_status == "added":
268
+ return f"{edge.parent_id}-...->{edge.child_id}\n"
265
269
 
266
270
 
267
271
  def _build_lineage_graph(base, current) -> LineageGraph:
268
272
  graph = LineageGraph()
269
273
 
270
274
  # Init Graph nodes with base & current nodes
271
- for node_id, node_data in base.get('nodes', {}).items():
272
- graph.create_node(node_id, node_data, 'base')
275
+ for node_id, node_data in base.get("nodes", {}).items():
276
+ graph.create_node(node_id, node_data, "base")
273
277
 
274
- for node_id, node_data in current.get('nodes', {}).items():
278
+ for node_id, node_data in current.get("nodes", {}).items():
275
279
  if node_id not in graph.nodes:
276
- node = Node(node_id, node_data, 'current')
280
+ node = Node(node_id, node_data, "current")
277
281
  graph.nodes[node_id] = node
278
282
  else:
279
283
  node = graph.nodes[node_id]
280
- node.update_data(node_data, 'current')
284
+ node.update_data(node_data, "current")
281
285
 
282
286
  # Build edges
283
- for child_id, parents in base.get('parent_map', {}).items():
287
+ for child_id, parents in base.get("parent_map", {}).items():
284
288
  for parent_id in parents:
285
- graph.create_edge(parent_id, child_id, 'base')
286
- for child_id, parents in current.get('parent_map', {}).items():
289
+ graph.create_edge(parent_id, child_id, "base")
290
+ for child_id, parents in current.get("parent_map", {}).items():
287
291
  for parent_id in parents:
288
- graph.create_edge(parent_id, child_id, 'current')
292
+ graph.create_edge(parent_id, child_id, "current")
289
293
 
290
294
  return graph
291
295
 
292
296
 
293
297
  def _build_node_schema(lineage, node_id):
294
- return lineage.get('nodes', {}).get(node_id, {}).get('columns', {})
298
+ return lineage.get("nodes", {}).get(node_id, {}).get("columns", {})
295
299
 
296
300
 
297
301
  def _get_node_row_count_diff(node_id, node_name):
298
302
  row_count_runs = RunDAO().list(type_filter=RunType.ROW_COUNT_DIFF)
299
303
  for run in row_count_runs:
300
- if node_id in run.params.get('node_ids', []):
304
+ if node_id in run.params.get("node_ids", []):
301
305
  result = run.result.get(node_name, {})
302
- diff = TaskResultDiffer.diff(result.get('base'), result.get('curr'))
306
+ diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
303
307
  return diff, result
304
- elif run.params.get('node_id') == node_id:
308
+ elif run.params.get("node_id") == node_id:
305
309
  result = run.result.get(node_name, {})
306
- diff = TaskResultDiffer.diff(result.get('base'), result.get('curr'))
310
+ diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
307
311
  return diff, result
308
312
  return None, None
309
313
 
310
314
 
311
315
  def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> str:
312
316
  if not check.related_nodes:
313
- return 'N/A'
317
+ return "N/A"
314
318
 
315
319
  nodes = check.related_nodes
316
320
  if check.changed_nodes:
@@ -319,39 +323,43 @@ def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> s
319
323
  nodes = check.changed_nodes
320
324
 
321
325
  if len(nodes) <= limit:
322
- return ', '.join(nodes)
326
+ return ", ".join(nodes)
323
327
 
324
- display_nodes = nodes[:limit - 1]
325
- return ', '.join(display_nodes) + f', and {len(nodes) - len(display_nodes)} more nodes'
328
+ display_nodes = nodes[: limit - 1]
329
+ return ", ".join(display_nodes) + f", and {len(nodes) - len(display_nodes)} more nodes"
326
330
 
327
331
 
328
332
  def generate_summary_metadata(base_lineage, curr_lineage):
329
333
  from py_markdown_table.markdown_table import markdown_table
330
334
 
331
- base_manifest = base_lineage.get('manifest_metadata')
332
- base_catalog = base_lineage.get('catalog_metadata')
333
- curr_manifest = curr_lineage.get('manifest_metadata')
334
- curr_catalog = curr_lineage.get('catalog_metadata')
335
+ base_manifest = base_lineage.get("manifest_metadata")
336
+ base_catalog = base_lineage.get("catalog_metadata")
337
+ curr_manifest = curr_lineage.get("manifest_metadata")
338
+ curr_catalog = curr_lineage.get("catalog_metadata")
335
339
 
336
340
  metadata = [
337
341
  {
338
- '': 'Base',
339
- 'Manifest': base_manifest.generated_at.strftime('%Y-%m-%d %H:%M:%S'),
340
- 'Catalog': base_catalog.generated_at.strftime('%Y-%m-%d %H:%M:%S') if base_catalog else 'N/A'
342
+ "": "Base",
343
+ "Manifest": base_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
344
+ "Catalog": base_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if base_catalog else "N/A",
341
345
  },
342
346
  {
343
- '': 'Current',
344
- 'Manifest': curr_manifest.generated_at.strftime('%Y-%m-%d %H:%M:%S'),
345
- 'Catalog': curr_catalog.generated_at.strftime('%Y-%m-%d %H:%M:%S') if curr_catalog else 'N/A'
346
- }
347
+ "": "Current",
348
+ "Manifest": curr_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
349
+ "Catalog": curr_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if curr_catalog else "N/A",
350
+ },
347
351
  ]
348
352
 
349
- return markdown_table(metadata).set_params(
350
- quote=False,
351
- row_sep='markdown',
352
- padding_width=1,
353
- padding_weight='right' # Aligns the cell's contents to the beginning of the cell
354
- ).get_markdown()
353
+ return (
354
+ markdown_table(metadata)
355
+ .set_params(
356
+ quote=False,
357
+ row_sep="markdown",
358
+ padding_width=1,
359
+ padding_weight="right", # Aligns the cell's contents to the beginning of the cell
360
+ )
361
+ .get_markdown()
362
+ )
355
363
 
356
364
 
357
365
  def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], Dict[str, int]):
@@ -376,9 +384,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
376
384
  continue
377
385
  elif check.type == RunType.SCHEMA_DIFF:
378
386
  differ = SchemaDiffResultDiffer(check, base_lineage, curr_lineage)
379
- elif (check.type in [RunType.ROW_COUNT_DIFF, RunType.QUERY_DIFF,
380
- RunType.VALUE_DIFF, RunType.VALUE_DIFF_DETAIL, RunType.PROFILE_DIFF,
381
- RunType.TOP_K_DIFF, RunType.HISTOGRAM_DIFF] and run is not None):
387
+ elif (
388
+ check.type
389
+ in [
390
+ RunType.ROW_COUNT_DIFF,
391
+ RunType.QUERY_DIFF,
392
+ RunType.VALUE_DIFF,
393
+ RunType.VALUE_DIFF_DETAIL,
394
+ RunType.PROFILE_DIFF,
395
+ RunType.TOP_K_DIFF,
396
+ RunType.HISTOGRAM_DIFF,
397
+ ]
398
+ and run is not None
399
+ ):
382
400
  # Check the result is changed or not
383
401
  differ = differ_factory(run)
384
402
 
@@ -391,19 +409,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
391
409
  description=check.description,
392
410
  changes=differ.changes,
393
411
  node_ids=differ.related_node_ids,
394
- changed_nodes=differ.changed_nodes
412
+ changed_nodes=differ.changed_nodes,
395
413
  )
396
414
  )
397
415
 
398
416
  return checks_summary, {
399
- 'total': len(checks),
400
- 'mismatch': len(checks_summary),
401
- 'failed': failed_checks_count,
417
+ "total": len(checks),
418
+ "mismatch": len(checks_summary),
419
+ "failed": failed_checks_count,
402
420
  }
403
421
 
404
422
 
405
423
  def generate_mermaid_lineage_graph(graph: LineageGraph):
406
- content = up_to_level_content = 'graph LR\n'
424
+ content = up_to_level_content = "graph LR\n"
407
425
  is_not_modified = False
408
426
  # Only show the modified nodes and there children
409
427
  queue = list(graph.modified_set)
@@ -427,7 +445,7 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
427
445
  content += node.get_node_str(graph.checks)
428
446
  for child_id in node.children:
429
447
  queue.append(child_id)
430
- edge_id = f'{node_id}-->{child_id}'
448
+ edge_id = f"{node_id}-->{child_id}"
431
449
  if edge_id not in display_edge:
432
450
  display_edge.add(edge_id)
433
451
  content += graph.get_edge_str(edge_id)
@@ -440,7 +458,7 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
440
458
  return up_to_level_content, is_not_modified, len(content) > MAX_MERMAID_TEXT_SIZE
441
459
 
442
460
 
443
- def generate_markdown_summary(ctx: RecceContext, summary_format: str = 'markdown'):
461
+ def generate_markdown_summary(ctx: RecceContext, summary_format: str = "markdown"):
444
462
  lineage_diff = ctx.get_lineage_diff()
445
463
  summary_metadata = generate_summary_metadata(lineage_diff.base, lineage_diff.current)
446
464
  graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current)
@@ -448,81 +466,88 @@ def generate_markdown_summary(ctx: RecceContext, summary_format: str = 'markdown
448
466
  mermaid_content, is_empty_graph, is_partial_graph = generate_mermaid_lineage_graph(graph)
449
467
  check_content = generate_check_content(graph, check_statistics)
450
468
 
451
- if summary_format == 'mermaid':
469
+ if summary_format == "mermaid":
452
470
  return mermaid_content
453
- elif summary_format == 'check':
471
+ elif summary_format == "check":
454
472
  return check_content
455
- elif summary_format == 'markdown':
473
+ elif summary_format == "markdown":
456
474
 
457
- content = '# Recce Summary\n'
458
- content += f'## Manifest Information\n{summary_metadata}\n'
475
+ content = "# Recce Summary\n"
476
+ content += f"## Manifest Information\n{summary_metadata}\n"
459
477
 
460
478
  if is_empty_graph is False:
461
- content += f'''
479
+ content += f"""
462
480
  ## Lineage Graph
463
481
  {"_Too many nodes to generate! Please see the full lineage graph on Recce instance._" if is_partial_graph else ''}
464
482
  ```mermaid
465
483
  {mermaid_content}
466
484
  ```
467
- '''
485
+ """
468
486
  else:
469
- content += '''
487
+ content += """
470
488
  ## Lineage Graph
471
489
  No changed module was detected.
472
- '''
490
+ """
473
491
  if check_content:
474
492
  content += check_content
475
493
 
476
494
  if ctx.state_loader.cloud_mode:
477
495
  pr_info = ctx.state_loader.pr_info
478
- content += f'\nSee PR page: {RECCE_CLOUD_HOST}/{pr_info.repository}/pulls/{pr_info.id}\n'
496
+ content += f"\nSee PR page: {RECCE_CLOUD_HOST}/{pr_info.repository}/pulls/{pr_info.id}\n"
479
497
 
480
498
  return content
481
499
 
482
500
 
483
501
  def generate_check_content(graph, check_statistics):
484
502
  from py_markdown_table.markdown_table import markdown_table
485
- content = ''
503
+
504
+ content = ""
486
505
  check_content = None
487
506
  # Generate the check summary if we found any changes
488
507
  if len(graph.checks) > 0:
489
508
  data = []
490
509
  for check in graph.checks:
491
- data.append({
492
- 'Name': check.name,
493
- 'Type': str(check.type).replace('_', ' ').title(),
494
- 'Mismatched Nodes': _generate_mismatched_nodes_summary(check),
495
- # Temporarily remove the type of changes, until we implement a better way to display it.
496
- # 'Type of Changes': _formate_changes(check.changes)
497
- })
498
- check_content = markdown_table(data).set_params(
499
- quote=False,
500
- row_sep='markdown',
501
- padding_width=1,
502
- padding_weight='right' # Aligns the cell's contents to the beginning of the cell
503
- ).get_markdown()
510
+ data.append(
511
+ {
512
+ "Name": check.name,
513
+ "Type": str(check.type).replace("_", " ").title(),
514
+ "Mismatched Nodes": _generate_mismatched_nodes_summary(check),
515
+ # Temporarily remove the type of changes, until we implement a better way to display it.
516
+ # 'Type of Changes': _formate_changes(check.changes)
517
+ }
518
+ )
519
+ check_content = (
520
+ markdown_table(data)
521
+ .set_params(
522
+ quote=False,
523
+ row_sep="markdown",
524
+ padding_width=1,
525
+ padding_weight="right", # Aligns the cell's contents to the beginning of the cell
526
+ )
527
+ .get_markdown()
528
+ )
504
529
 
505
- if check_statistics.get('total', 0) > 0:
506
- warning_message = ''
530
+ if check_statistics.get("total", 0) > 0:
531
+ warning_message = ""
507
532
  statistics = {
508
- 'Checks Run': check_statistics.get('total', 0),
509
- 'Data Mismatch Detected': check_statistics.get('mismatch', 0),
533
+ "Checks Run": check_statistics.get("total", 0),
534
+ "Data Mismatch Detected": check_statistics.get("mismatch", 0),
510
535
  }
511
- if check_statistics.get('failed', 0) > 0:
512
- statistics['Incomplete Checks'] = check_statistics.get('failed', 0)
513
- warning_message = '''
536
+ if check_statistics.get("failed", 0) > 0:
537
+ statistics["Incomplete Checks"] = check_statistics.get("failed", 0)
538
+ warning_message = """
514
539
  :warning: **Incomplete Checks** refers to checks that did not successfully run due to configuration or SQL errors.
515
540
  Please check the output of `recce run` for more information
516
- '''
517
- check_summary = markdown_table([statistics]).set_params(quote=False, row_sep='markdown').get_markdown()
518
- content += f'''
541
+ """
542
+ check_summary = markdown_table([statistics]).set_params(quote=False, row_sep="markdown").get_markdown()
543
+ content += f"""
519
544
  ## Checks Summary
520
545
  {check_summary}
521
546
  {warning_message}
522
- '''
547
+ """
523
548
  if check_content:
524
- content += f'''
549
+ content += f"""
525
550
  ### Checks of Data Mismatch Detected
526
551
  {check_content}
527
- '''
552
+ """
528
553
  return content
recce/tasks/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from .core import Task
2
2
  from .histogram import HistogramDiffTask
3
3
  from .profile import ProfileDiffTask, ProfileTask
4
- from .query import QueryTask, QueryDiffTask, QueryBaseTask
5
- from .rowcount import RowCountTask, RowCountDiffTask
4
+ from .query import QueryBaseTask, QueryDiffTask, QueryTask
5
+ from .rowcount import RowCountDiffTask, RowCountTask
6
6
  from .top_k import TopKDiffTask
7
- from .valuediff import ValueDiffTask, ValueDiffDetailTask
7
+ from .valuediff import ValueDiffDetailTask, ValueDiffTask
recce/tasks/core.py CHANGED
@@ -1,11 +1,11 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import List, Union, Optional, Literal
2
+ from typing import List, Literal, Optional, Union
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
6
  from recce.core import default_context
7
7
  from recce.exceptions import RecceCancelException
8
- from recce.models import Run, Check
8
+ from recce.models import Check, Run
9
9
  from recce.util.pydantic_model import pydantic_model_dump
10
10
 
11
11
 
@@ -63,6 +63,7 @@ class TaskResultDiffer(ABC):
63
63
  @staticmethod
64
64
  def diff(base, current):
65
65
  from deepdiff import DeepDiff
66
+
66
67
  diff = DeepDiff(base, current, ignore_order=True)
67
68
  return diff if diff else None
68
69
 
@@ -76,15 +77,12 @@ class TaskResultDiffer(ABC):
76
77
  select: Optional[str] = None,
77
78
  exclude: Optional[str] = None,
78
79
  packages: Optional[list[str]] = None,
79
- view_mode: Optional[Literal['all', 'changed_models']] = None,
80
+ view_mode: Optional[Literal["all", "changed_models"]] = None,
80
81
  ) -> List[str]:
81
82
  nodes = default_context().adapter.select_nodes(
82
- select=select,
83
- exclude=exclude,
84
- packages=packages,
85
- view_mode=view_mode
83
+ select=select, exclude=exclude, packages=packages, view_mode=view_mode
86
84
  )
87
- return [node for node in nodes if not node.startswith('test.')]
85
+ return [node for node in nodes if not node.startswith("test.")]
88
86
 
89
87
  @abstractmethod
90
88
  def _check_result_changed_fn(self, result):
@@ -100,10 +98,10 @@ class TaskResultDiffer(ABC):
100
98
  Should be implemented by subclass.
101
99
  """
102
100
  params = self.run.params
103
- if params.get('model'):
104
- return [TaskResultDiffer.get_node_id_by_name(params.get('model'))]
105
- elif params.get('node_names'):
106
- names = params.get('node_names', [])
101
+ if params.get("model"):
102
+ return [TaskResultDiffer.get_node_id_by_name(params.get("model"))]
103
+ elif params.get("node_names"):
104
+ names = params.get("node_names", [])
107
105
  return [TaskResultDiffer.get_node_id_by_name(name) for name in names]
108
106
  else:
109
107
  # No related node ids in the params
@@ -125,7 +123,7 @@ class CheckValidator:
125
123
  try:
126
124
  check = Check(**check)
127
125
  except Exception as e:
128
- raise ValueError(f'Invalid check format. {str(e)}')
126
+ raise ValueError(f"Invalid check format. {str(e)}")
129
127
 
130
128
  self.validate_check(check)
131
129