recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.26.0.20251124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (213) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +810 -480
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +39 -28
  8. recce/apis/check_func.py +33 -27
  9. recce/apis/run_api.py +25 -19
  10. recce/apis/run_func.py +29 -23
  11. recce/artifact.py +119 -51
  12. recce/cli.py +1299 -323
  13. recce/config.py +42 -33
  14. recce/connect_to_cloud.py +138 -0
  15. recce/core.py +55 -47
  16. recce/data/404.html +1 -1
  17. recce/data/__next.__PAGE__.txt +10 -0
  18. recce/data/__next._full.txt +23 -0
  19. recce/data/__next._head.txt +8 -0
  20. recce/data/__next._index.txt +8 -0
  21. recce/data/__next._tree.txt +5 -0
  22. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_buildManifest.js +11 -0
  23. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_clientMiddlewareManifest.json +1 -0
  24. recce/data/_next/static/chunks/02b996c7f6a29a06.js +4 -0
  25. recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
  26. recce/data/_next/static/chunks/2df9ec28a061971d.js +11 -0
  27. recce/data/_next/static/chunks/3098c987393bda15.js +1 -0
  28. recce/data/_next/static/chunks/393dc43e483f717a.css +2 -0
  29. recce/data/_next/static/chunks/399e8d91a7e45073.js +2 -0
  30. recce/data/_next/static/chunks/4d0186f631230245.js +1 -0
  31. recce/data/_next/static/chunks/5794ba9e10a9c060.js +11 -0
  32. recce/data/_next/static/chunks/715761c929a3f28b.js +110 -0
  33. recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
  34. recce/data/_next/static/chunks/80d2a95eaf1201ea.js +1 -0
  35. recce/data/_next/static/chunks/9979c6109bbbee35.js +1 -0
  36. recce/data/_next/static/chunks/99d638224186c118.js +1 -0
  37. recce/data/_next/static/chunks/d003eb36240e92f3.js +1 -0
  38. recce/data/_next/static/chunks/d3167cdfec4fc351.js +1 -0
  39. recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
  40. recce/data/_next/static/chunks/f40141db1bdb46f0.css +6 -0
  41. recce/data/_next/static/chunks/fcc53a88741a52f9.js +1 -0
  42. recce/data/_next/static/chunks/turbopack-b1920d28cfb1f28d.js +3 -0
  43. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  44. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  45. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  46. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  47. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  48. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  49. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  50. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  51. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  52. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  53. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  54. recce/data/_next/static/media/reload-image.7aa931c7.svg +4 -0
  55. recce/data/_not-found/__next._full.txt +17 -0
  56. recce/data/_not-found/__next._head.txt +8 -0
  57. recce/data/_not-found/__next._index.txt +8 -0
  58. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  59. recce/data/_not-found/__next._not-found.txt +4 -0
  60. recce/data/_not-found/__next._tree.txt +3 -0
  61. recce/data/_not-found.html +1 -0
  62. recce/data/_not-found.txt +17 -0
  63. recce/data/auth_callback.html +68 -0
  64. recce/data/imgs/reload-image.svg +4 -0
  65. recce/data/index.html +1 -27
  66. recce/data/index.txt +23 -7
  67. recce/diff.py +6 -12
  68. recce/event/__init__.py +86 -74
  69. recce/event/collector.py +33 -22
  70. recce/event/track.py +49 -27
  71. recce/exceptions.py +1 -1
  72. recce/git.py +7 -7
  73. recce/github.py +57 -53
  74. recce/mcp_server.py +716 -0
  75. recce/models/__init__.py +4 -1
  76. recce/models/check.py +6 -7
  77. recce/models/run.py +1 -0
  78. recce/models/types.py +131 -28
  79. recce/pull_request.py +27 -25
  80. recce/run.py +165 -121
  81. recce/server.py +303 -111
  82. recce/state/__init__.py +31 -0
  83. recce/state/cloud.py +632 -0
  84. recce/state/const.py +26 -0
  85. recce/state/local.py +56 -0
  86. recce/state/state.py +119 -0
  87. recce/state/state_loader.py +174 -0
  88. recce/summary.py +188 -143
  89. recce/tasks/__init__.py +19 -3
  90. recce/tasks/core.py +11 -13
  91. recce/tasks/dataframe.py +82 -18
  92. recce/tasks/histogram.py +69 -34
  93. recce/tasks/lineage.py +2 -2
  94. recce/tasks/profile.py +152 -86
  95. recce/tasks/query.py +139 -87
  96. recce/tasks/rowcount.py +37 -31
  97. recce/tasks/schema.py +18 -15
  98. recce/tasks/top_k.py +35 -35
  99. recce/tasks/valuediff.py +216 -152
  100. recce/util/__init__.py +3 -0
  101. recce/util/api_token.py +80 -0
  102. recce/util/breaking.py +87 -85
  103. recce/util/cll.py +274 -219
  104. recce/util/io.py +22 -17
  105. recce/util/lineage.py +65 -16
  106. recce/util/logger.py +1 -1
  107. recce/util/onboarding_state.py +45 -0
  108. recce/util/perf_tracking.py +85 -0
  109. recce/util/recce_cloud.py +322 -72
  110. recce/util/singleton.py +4 -4
  111. recce/yaml/__init__.py +7 -10
  112. recce_cloud/__init__.py +24 -0
  113. recce_cloud/api/__init__.py +17 -0
  114. recce_cloud/api/base.py +111 -0
  115. recce_cloud/api/client.py +150 -0
  116. recce_cloud/api/exceptions.py +26 -0
  117. recce_cloud/api/factory.py +63 -0
  118. recce_cloud/api/github.py +76 -0
  119. recce_cloud/api/gitlab.py +82 -0
  120. recce_cloud/artifact.py +57 -0
  121. recce_cloud/ci_providers/__init__.py +9 -0
  122. recce_cloud/ci_providers/base.py +82 -0
  123. recce_cloud/ci_providers/detector.py +147 -0
  124. recce_cloud/ci_providers/github_actions.py +136 -0
  125. recce_cloud/ci_providers/gitlab_ci.py +130 -0
  126. recce_cloud/cli.py +245 -0
  127. recce_cloud/upload.py +214 -0
  128. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/METADATA +68 -37
  129. recce_nightly-1.26.0.20251124.dist-info/RECORD +180 -0
  130. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/WHEEL +1 -1
  131. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/top_level.txt +1 -0
  132. tests/adapter/dbt_adapter/conftest.py +9 -5
  133. tests/adapter/dbt_adapter/dbt_test_helper.py +37 -22
  134. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
  135. tests/adapter/dbt_adapter/test_dbt_cll.py +656 -41
  136. tests/adapter/dbt_adapter/test_selector.py +22 -21
  137. tests/recce_cloud/__init__.py +0 -0
  138. tests/recce_cloud/test_ci_providers.py +351 -0
  139. tests/recce_cloud/test_cli.py +372 -0
  140. tests/recce_cloud/test_client.py +273 -0
  141. tests/recce_cloud/test_platform_clients.py +333 -0
  142. tests/tasks/conftest.py +1 -1
  143. tests/tasks/test_histogram.py +58 -66
  144. tests/tasks/test_lineage.py +36 -23
  145. tests/tasks/test_preset_checks.py +45 -31
  146. tests/tasks/test_profile.py +339 -15
  147. tests/tasks/test_query.py +46 -46
  148. tests/tasks/test_row_count.py +65 -46
  149. tests/tasks/test_schema.py +65 -42
  150. tests/tasks/test_top_k.py +22 -18
  151. tests/tasks/test_valuediff.py +43 -32
  152. tests/test_cli.py +174 -60
  153. tests/test_cli_mcp_optional.py +45 -0
  154. tests/test_cloud_listing_cli.py +324 -0
  155. tests/test_config.py +7 -9
  156. tests/test_connect_to_cloud.py +82 -0
  157. tests/test_core.py +151 -4
  158. tests/test_dbt.py +7 -7
  159. tests/test_mcp_server.py +332 -0
  160. tests/test_pull_request.py +1 -1
  161. tests/test_server.py +25 -19
  162. tests/test_summary.py +29 -17
  163. recce/data/_next/static/Kcbs3GEIyH2LxgLYat0es/_buildManifest.js +0 -1
  164. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  165. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  166. recce/data/_next/static/chunks/368-7587b306577df275.js +0 -65
  167. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  168. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  169. recce/data/_next/static/chunks/3a92ee20-3b5d922d4157af5e.js +0 -1
  170. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  171. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  172. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  173. recce/data/_next/static/chunks/6ef81909-694dc38134099299.js +0 -1
  174. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  175. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  176. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  177. recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
  178. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  179. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  180. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  181. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  182. recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
  183. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  184. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  185. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  186. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  187. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  188. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  189. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  190. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  191. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  192. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  193. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  194. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  195. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  196. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  197. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  198. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  199. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  200. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  202. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  203. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  205. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  206. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  207. recce/state.py +0 -753
  208. recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
  209. tests/test_state.py +0 -123
  210. /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → 52aV_JrNUZU6dMFgvTQEO}/_ssgManifest.js +0 -0
  211. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  212. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/entry_points.txt +0 -0
  213. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/licenses/LICENSE +0 -0
recce/summary.py CHANGED
@@ -1,13 +1,13 @@
1
1
  import os
2
2
  import sys
3
- from typing import List, Dict, Set, Union, Type, Optional
3
+ from typing import Dict, List, Optional, Set, Type, Union
4
4
  from uuid import UUID
5
5
 
6
6
  from pydantic import BaseModel
7
7
 
8
8
  from recce.apis.check_func import get_node_name_by_id
9
9
  from recce.core import RecceContext
10
- from recce.models import CheckDAO, RunDAO, RunType, Run
10
+ from recce.models import CheckDAO, Run, RunDAO, RunType
11
11
  from recce.tasks.core import TaskResultDiffer
12
12
  from recce.tasks.histogram import HistogramDiffTaskResultDiffer
13
13
  from recce.tasks.profile import ProfileDiffResultDiffer
@@ -15,13 +15,16 @@ from recce.tasks.query import QueryDiffResultDiffer
15
15
  from recce.tasks.rowcount import RowCountDiffResultDiffer
16
16
  from recce.tasks.schema import SchemaDiffResultDiffer
17
17
  from recce.tasks.top_k import TopKDiffTaskResultDiffer
18
- from recce.tasks.valuediff import ValueDiffTaskResultDiffer, ValueDiffDetailTaskResultDiffer
18
+ from recce.tasks.valuediff import (
19
+ ValueDiffDetailTaskResultDiffer,
20
+ ValueDiffTaskResultDiffer,
21
+ )
19
22
 
20
- RECCE_CLOUD_HOST = os.environ.get('RECCE_CLOUD_HOST', 'https://cloud.datarecce.io')
23
+ RECCE_CLOUD_HOST = os.environ.get("RECCE_CLOUD_HOST", "https://cloud.datarecce.io")
21
24
 
22
- ADD_COLOR = '#1dce00'
23
- MODIFIED_COLOR = '#ffa502'
24
- REMOVE_COLOR = '#ff067e'
25
+ ADD_COLOR = "#1dce00"
26
+ MODIFIED_COLOR = "#ffa502"
27
+ REMOVE_COLOR = "#ff067e"
25
28
 
26
29
  MAX_MERMAID_TEXT_SIZE = 50000 # source: https://mermaid.js.org/config/schema-docs/config.html#maxtextsize
27
30
 
@@ -42,44 +45,44 @@ class Node:
42
45
  base_data: dict
43
46
  current_data: dict
44
47
 
45
- def __init__(self, node_id: str, node_data: dict, data_from: str = 'base'):
48
+ def __init__(self, node_id: str, node_data: dict, data_from: str = "base"):
46
49
  self.id = node_id
47
- self.name = node_data['name']
50
+ self.name = node_data["name"]
48
51
  self.data_from = data_from
49
- self.resource_type = node_data['resource_type']
50
- self.package_name = node_data['package_name']
52
+ self.resource_type = node_data["resource_type"]
53
+ self.package_name = node_data["package_name"]
51
54
  self.children = []
52
55
  self.parents = []
53
56
 
54
57
  self.base_data = {}
55
58
  self.current_data = {}
56
59
 
57
- if data_from == 'base':
60
+ if data_from == "base":
58
61
  self.base_data = node_data
59
- elif data_from == 'current':
62
+ elif data_from == "current":
60
63
  self.current_data = node_data
61
64
 
62
65
  @property
63
66
  def change_status(self):
64
- base_checksum = self.base_data.get('checksum', {}).get('checksum')
65
- curr_checksum = self.current_data.get('checksum', {}).get('checksum')
66
- if self.data_from == 'base':
67
- return 'removed'
68
- elif self.data_from == 'current':
69
- return 'added'
67
+ base_checksum = self.base_data.get("checksum", {}).get("checksum")
68
+ curr_checksum = self.current_data.get("checksum", {}).get("checksum")
69
+ if self.data_from == "base":
70
+ return "removed"
71
+ elif self.data_from == "current":
72
+ return "added"
70
73
  elif base_checksum and curr_checksum and base_checksum != curr_checksum:
71
- return 'modified'
74
+ return "modified"
72
75
  return None
73
76
 
74
77
  def update_data(self, node_data: dict, data_from: str):
75
- if data_from not in ['base', 'current']:
76
- raise ValueError(f'Invalid data_from value: {data_from}')
78
+ if data_from not in ["base", "current"]:
79
+ raise ValueError(f"Invalid data_from value: {data_from}")
77
80
  if self.data_from != data_from:
78
- self.data_from = 'both'
81
+ self.data_from = "both"
79
82
 
80
- if data_from == 'base':
83
+ if data_from == "base":
81
84
  self.base_data = node_data
82
- elif data_from == 'current':
85
+ elif data_from == "current":
83
86
  self.current_data = node_data
84
87
 
85
88
  def append_parent(self, parent_id: str):
@@ -93,8 +96,8 @@ class Node:
93
96
  def _cal_row_count_delta_percentage(self):
94
97
  row_count_diff, run_result = _get_node_row_count_diff(self.id, self.name)
95
98
  if row_count_diff:
96
- base = run_result.get('base', 0)
97
- current = run_result.get('curr', 0)
99
+ base = run_result.get("base", 0)
100
+ current = run_result.get("curr", 0)
98
101
  if int(current) > int(base):
99
102
  p = (int(current) - int(base)) / int(current) * 100
100
103
  return f'🔼 +{round(p, 2) if p > 0.1 else "<0.1"}%'
@@ -104,25 +107,25 @@ class Node:
104
107
  return None
105
108
 
106
109
  def _get_schema_diff(self):
107
- base_schema = self.base_data.get('columns', {})
108
- current_schema = self.current_data.get('columns', {})
110
+ base_schema = self.base_data.get("columns", {})
111
+ current_schema = self.current_data.get("columns", {})
109
112
  schema_diff = TaskResultDiffer.diff(base_schema, current_schema)
110
113
  return schema_diff
111
114
 
112
115
  def _what_changed(self, checks=None):
113
116
  changes = []
114
- if self.change_status == 'added':
115
- return ['Added Node']
116
- elif self.change_status == 'removed':
117
- return ['Removed Node']
118
- elif self.change_status == 'modified':
119
- changes.append('Code')
117
+ if self.change_status == "added":
118
+ return ["Added Node"]
119
+ elif self.change_status == "removed":
120
+ return ["Removed Node"]
121
+ elif self.change_status == "modified":
122
+ changes.append("Code")
120
123
  row_count_delta_percentage = self._cal_row_count_delta_percentage()
121
124
  if row_count_delta_percentage:
122
- changes.append(f'Row Count {row_count_delta_percentage}')
125
+ changes.append(f"Row Count {row_count_delta_percentage}")
123
126
  schema_diff = self._get_schema_diff()
124
127
  if schema_diff:
125
- changes.append('Schema')
128
+ changes.append("Schema")
126
129
 
127
130
  if checks:
128
131
  for check in checks:
@@ -131,7 +134,7 @@ class Node:
131
134
  # Skip the row count and schema diff check, since we already have it.
132
135
  continue
133
136
  if check.node_ids and self.id in check.node_ids:
134
- changes.append(str(check.type).replace('_', ' ').title())
137
+ changes.append(str(check.type).replace("_", " ").title())
135
138
  return changes
136
139
 
137
140
  def get_node_str(self, checks=None):
@@ -140,12 +143,12 @@ class Node:
140
143
 
141
144
  if self.change_status is not None:
142
145
  is_changed = True
143
- if self.change_status == 'added':
144
- style = f'style {self.id} stroke:{ADD_COLOR}'
145
- elif self.change_status == 'modified':
146
- style = f'style {self.id} stroke:{MODIFIED_COLOR}'
147
- elif self.change_status == 'removed':
148
- style = f'style {self.id} stroke:{REMOVE_COLOR}'
146
+ if self.change_status == "added":
147
+ style = f"style {self.id} stroke:{ADD_COLOR}"
148
+ elif self.change_status == "modified":
149
+ style = f"style {self.id} stroke:{MODIFIED_COLOR}"
150
+ elif self.change_status == "removed":
151
+ style = f"style {self.id} stroke:{REMOVE_COLOR}"
149
152
 
150
153
  if checks:
151
154
  for check in checks:
@@ -154,13 +157,13 @@ class Node:
154
157
 
155
158
  content_output = f'{self.id}["{self.name}'
156
159
  if is_changed:
157
- content_output += '\n\n[What\'s Changed]\n'
160
+ content_output += "\n\n[What's Changed]\n"
158
161
  changes = self._what_changed(checks)
159
- content_output += ', '.join(changes)
162
+ content_output += ", ".join(changes)
160
163
 
161
164
  content_output += '"]\n'
162
165
  if style:
163
- content_output += f'{style}\n'
166
+ content_output += f"{style}\n"
164
167
  return content_output
165
168
 
166
169
 
@@ -171,7 +174,7 @@ class Edge:
171
174
  parent_id: str
172
175
  change_status: Union[str, None]
173
176
 
174
- def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str = 'base'):
177
+ def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str = "base"):
175
178
  self.id = edge_id
176
179
  self.edge_from = edge_from
177
180
  self.child_id = child_id
@@ -179,7 +182,7 @@ class Edge:
179
182
 
180
183
  def update_edge_from(self, edge_from: str):
181
184
  if self.edge_from != edge_from:
182
- self.edge_from = 'both'
185
+ self.edge_from = "both"
183
186
 
184
187
 
185
188
  class CheckSummary(BaseModel):
@@ -225,21 +228,21 @@ class LineageGraph:
225
228
  edges: Dict[str, Edge] = {}
226
229
  checks: List[CheckSummary] = None
227
230
 
228
- def create_node(self, node_id: str, node_data: dict, data_from: str = 'base'):
231
+ def create_node(self, node_id: str, node_data: dict, data_from: str = "base"):
229
232
  if node_id not in self.nodes:
230
233
  self.nodes[node_id] = Node(node_id, node_data, data_from)
231
234
  else:
232
235
  self.nodes[node_id].update_data(node_data, data_from)
233
236
 
234
- def create_edge(self, parent_id: str, child_id: str, edge_from: str = 'base'):
237
+ def create_edge(self, parent_id: str, child_id: str, edge_from: str = "base"):
235
238
  if parent_id not in self.nodes:
236
- _warn(f'Parent node {parent_id} not found in graph')
239
+ _warn(f"Parent node {parent_id} not found in graph")
237
240
  return
238
241
  if child_id not in self.nodes:
239
- _warn(f'Child node {child_id} not found in graph')
242
+ _warn(f"Child node {child_id} not found in graph")
240
243
  return
241
244
 
242
- edge_id = f'{parent_id}-->{child_id}'
245
+ edge_id = f"{parent_id}-->{child_id}"
243
246
  if edge_id in self.edges:
244
247
  self.edges[edge_id].update_edge_from(edge_from)
245
248
  else:
@@ -250,67 +253,87 @@ class LineageGraph:
250
253
  @property
251
254
  def modified_set(self) -> Set[str]:
252
255
  return set(
253
- [node_id for node_id, node in self.nodes.items() if node.change_status in ['added', 'removed', 'modified']])
256
+ [node_id for node_id, node in self.nodes.items() if node.change_status in ["added", "removed", "modified"]]
257
+ )
254
258
 
255
259
  def get_edge_str(self, edge_id):
256
260
  edge = self.edges[edge_id]
257
261
  child = self.nodes[edge.child_id]
258
262
 
259
- if child.change_status == 'removed':
260
- return f'{edge.parent_id}-.->{edge.child_id}\n'
261
- if child.change_status is None or child.change_status == 'modified':
262
- return f'{edge.parent_id}---->{edge.child_id}\n'
263
- if child.change_status == 'added':
264
- return f'{edge.parent_id}-...->{edge.child_id}\n'
263
+ if child.change_status == "removed":
264
+ return f"{edge.parent_id}-.->{edge.child_id}\n"
265
+ if child.change_status is None or child.change_status == "modified":
266
+ return f"{edge.parent_id}---->{edge.child_id}\n"
267
+ if child.change_status == "added":
268
+ return f"{edge.parent_id}-...->{edge.child_id}\n"
265
269
 
266
270
 
267
271
  def _build_lineage_graph(base, current) -> LineageGraph:
268
272
  graph = LineageGraph()
269
273
 
274
+ # Get the current package name to filter nodes (from the current manifest metadata)
275
+ package_name = None
276
+ manifest_metadata = current.get("manifest_metadata")
277
+ if manifest_metadata and hasattr(manifest_metadata, "project_name"):
278
+ # The default package name is the project name
279
+ package_name = manifest_metadata.project_name
280
+
270
281
  # Init Graph nodes with base & current nodes
271
- for node_id, node_data in base.get('nodes', {}).items():
272
- graph.create_node(node_id, node_data, 'base')
282
+ for node_id, node_data in base.get("nodes", {}).items():
283
+ # Skip nodes that are not from the current package
284
+ if package_name and node_data.get("package_name") != package_name:
285
+ continue
286
+ graph.create_node(node_id, node_data, "base")
273
287
 
274
- for node_id, node_data in current.get('nodes', {}).items():
288
+ for node_id, node_data in current.get("nodes", {}).items():
289
+ # Skip nodes that are not from the current package
290
+ if package_name and node_data.get("package_name") != package_name:
291
+ continue
275
292
  if node_id not in graph.nodes:
276
- node = Node(node_id, node_data, 'current')
293
+ node = Node(node_id, node_data, "current")
277
294
  graph.nodes[node_id] = node
278
295
  else:
279
296
  node = graph.nodes[node_id]
280
- node.update_data(node_data, 'current')
297
+ node.update_data(node_data, "current")
281
298
 
282
299
  # Build edges
283
- for child_id, parents in base.get('parent_map', {}).items():
300
+ for child_id, parents in base.get("parent_map", {}).items():
284
301
  for parent_id in parents:
285
- graph.create_edge(parent_id, child_id, 'base')
286
- for child_id, parents in current.get('parent_map', {}).items():
302
+ if child_id not in graph.nodes or parent_id not in graph.nodes:
303
+ continue
304
+
305
+ graph.create_edge(parent_id, child_id, "base")
306
+ for child_id, parents in current.get("parent_map", {}).items():
287
307
  for parent_id in parents:
288
- graph.create_edge(parent_id, child_id, 'current')
308
+ if child_id not in graph.nodes or parent_id not in graph.nodes:
309
+ continue
310
+
311
+ graph.create_edge(parent_id, child_id, "current")
289
312
 
290
313
  return graph
291
314
 
292
315
 
293
316
  def _build_node_schema(lineage, node_id):
294
- return lineage.get('nodes', {}).get(node_id, {}).get('columns', {})
317
+ return lineage.get("nodes", {}).get(node_id, {}).get("columns", {})
295
318
 
296
319
 
297
320
  def _get_node_row_count_diff(node_id, node_name):
298
321
  row_count_runs = RunDAO().list(type_filter=RunType.ROW_COUNT_DIFF)
299
322
  for run in row_count_runs:
300
- if node_id in run.params.get('node_ids', []):
323
+ if node_id in run.params.get("node_ids", []):
301
324
  result = run.result.get(node_name, {})
302
- diff = TaskResultDiffer.diff(result.get('base'), result.get('curr'))
325
+ diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
303
326
  return diff, result
304
- elif run.params.get('node_id') == node_id:
327
+ elif run.params.get("node_id") == node_id:
305
328
  result = run.result.get(node_name, {})
306
- diff = TaskResultDiffer.diff(result.get('base'), result.get('curr'))
329
+ diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
307
330
  return diff, result
308
331
  return None, None
309
332
 
310
333
 
311
334
  def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> str:
312
335
  if not check.related_nodes:
313
- return 'N/A'
336
+ return "N/A"
314
337
 
315
338
  nodes = check.related_nodes
316
339
  if check.changed_nodes:
@@ -319,39 +342,43 @@ def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> s
319
342
  nodes = check.changed_nodes
320
343
 
321
344
  if len(nodes) <= limit:
322
- return ', '.join(nodes)
345
+ return ", ".join(nodes)
323
346
 
324
- display_nodes = nodes[:limit - 1]
325
- return ', '.join(display_nodes) + f', and {len(nodes) - len(display_nodes)} more nodes'
347
+ display_nodes = nodes[: limit - 1]
348
+ return ", ".join(display_nodes) + f", and {len(nodes) - len(display_nodes)} more nodes"
326
349
 
327
350
 
328
351
  def generate_summary_metadata(base_lineage, curr_lineage):
329
352
  from py_markdown_table.markdown_table import markdown_table
330
353
 
331
- base_manifest = base_lineage.get('manifest_metadata')
332
- base_catalog = base_lineage.get('catalog_metadata')
333
- curr_manifest = curr_lineage.get('manifest_metadata')
334
- curr_catalog = curr_lineage.get('catalog_metadata')
354
+ base_manifest = base_lineage.get("manifest_metadata")
355
+ base_catalog = base_lineage.get("catalog_metadata")
356
+ curr_manifest = curr_lineage.get("manifest_metadata")
357
+ curr_catalog = curr_lineage.get("catalog_metadata")
335
358
 
336
359
  metadata = [
337
360
  {
338
- '': 'Base',
339
- 'Manifest': base_manifest.generated_at.strftime('%Y-%m-%d %H:%M:%S'),
340
- 'Catalog': base_catalog.generated_at.strftime('%Y-%m-%d %H:%M:%S') if base_catalog else 'N/A'
361
+ "": "Base",
362
+ "Manifest": base_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
363
+ "Catalog": base_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if base_catalog else "N/A",
341
364
  },
342
365
  {
343
- '': 'Current',
344
- 'Manifest': curr_manifest.generated_at.strftime('%Y-%m-%d %H:%M:%S'),
345
- 'Catalog': curr_catalog.generated_at.strftime('%Y-%m-%d %H:%M:%S') if curr_catalog else 'N/A'
346
- }
366
+ "": "Current",
367
+ "Manifest": curr_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
368
+ "Catalog": curr_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if curr_catalog else "N/A",
369
+ },
347
370
  ]
348
371
 
349
- return markdown_table(metadata).set_params(
350
- quote=False,
351
- row_sep='markdown',
352
- padding_width=1,
353
- padding_weight='right' # Aligns the cell's contents to the beginning of the cell
354
- ).get_markdown()
372
+ return (
373
+ markdown_table(metadata)
374
+ .set_params(
375
+ quote=False,
376
+ row_sep="markdown",
377
+ padding_width=1,
378
+ padding_weight="right", # Aligns the cell's contents to the beginning of the cell
379
+ )
380
+ .get_markdown()
381
+ )
355
382
 
356
383
 
357
384
  def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], Dict[str, int]):
@@ -376,9 +403,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
376
403
  continue
377
404
  elif check.type == RunType.SCHEMA_DIFF:
378
405
  differ = SchemaDiffResultDiffer(check, base_lineage, curr_lineage)
379
- elif (check.type in [RunType.ROW_COUNT_DIFF, RunType.QUERY_DIFF,
380
- RunType.VALUE_DIFF, RunType.VALUE_DIFF_DETAIL, RunType.PROFILE_DIFF,
381
- RunType.TOP_K_DIFF, RunType.HISTOGRAM_DIFF] and run is not None):
406
+ elif (
407
+ check.type
408
+ in [
409
+ RunType.ROW_COUNT_DIFF,
410
+ RunType.QUERY_DIFF,
411
+ RunType.VALUE_DIFF,
412
+ RunType.VALUE_DIFF_DETAIL,
413
+ RunType.PROFILE_DIFF,
414
+ RunType.TOP_K_DIFF,
415
+ RunType.HISTOGRAM_DIFF,
416
+ ]
417
+ and run is not None
418
+ ):
382
419
  # Check the result is changed or not
383
420
  differ = differ_factory(run)
384
421
 
@@ -391,19 +428,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
391
428
  description=check.description,
392
429
  changes=differ.changes,
393
430
  node_ids=differ.related_node_ids,
394
- changed_nodes=differ.changed_nodes
431
+ changed_nodes=differ.changed_nodes,
395
432
  )
396
433
  )
397
434
 
398
435
  return checks_summary, {
399
- 'total': len(checks),
400
- 'mismatch': len(checks_summary),
401
- 'failed': failed_checks_count,
436
+ "total": len(checks),
437
+ "mismatch": len(checks_summary),
438
+ "failed": failed_checks_count,
402
439
  }
403
440
 
404
441
 
405
442
  def generate_mermaid_lineage_graph(graph: LineageGraph):
406
- content = up_to_level_content = 'graph LR\n'
443
+ content = up_to_level_content = "graph LR\n"
407
444
  is_not_modified = False
408
445
  # Only show the modified nodes and there children
409
446
  queue = list(graph.modified_set)
@@ -427,7 +464,7 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
427
464
  content += node.get_node_str(graph.checks)
428
465
  for child_id in node.children:
429
466
  queue.append(child_id)
430
- edge_id = f'{node_id}-->{child_id}'
467
+ edge_id = f"{node_id}-->{child_id}"
431
468
  if edge_id not in display_edge:
432
469
  display_edge.add(edge_id)
433
470
  content += graph.get_edge_str(edge_id)
@@ -440,7 +477,7 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
440
477
  return up_to_level_content, is_not_modified, len(content) > MAX_MERMAID_TEXT_SIZE
441
478
 
442
479
 
443
- def generate_markdown_summary(ctx: RecceContext, summary_format: str = 'markdown'):
480
+ def generate_markdown_summary(ctx: RecceContext, summary_format: str = "markdown"):
444
481
  lineage_diff = ctx.get_lineage_diff()
445
482
  summary_metadata = generate_summary_metadata(lineage_diff.base, lineage_diff.current)
446
483
  graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current)
@@ -448,81 +485,89 @@ def generate_markdown_summary(ctx: RecceContext, summary_format: str = 'markdown
448
485
  mermaid_content, is_empty_graph, is_partial_graph = generate_mermaid_lineage_graph(graph)
449
486
  check_content = generate_check_content(graph, check_statistics)
450
487
 
451
- if summary_format == 'mermaid':
488
+ if summary_format == "mermaid":
452
489
  return mermaid_content
453
- elif summary_format == 'check':
490
+ elif summary_format == "check":
454
491
  return check_content
455
- elif summary_format == 'markdown':
492
+ elif summary_format == "markdown":
456
493
 
457
- content = '# Recce Summary\n'
458
- content += f'## Manifest Information\n{summary_metadata}\n'
494
+ content = "# Recce Summary\n"
495
+ content += f"## Manifest Information\n{summary_metadata}\n"
459
496
 
460
497
  if is_empty_graph is False:
461
- content += f'''
498
+ content += f"""
462
499
  ## Lineage Graph
463
500
  {"_Too many nodes to generate! Please see the full lineage graph on Recce instance._" if is_partial_graph else ''}
464
501
  ```mermaid
465
502
  {mermaid_content}
466
503
  ```
467
- '''
504
+ """
468
505
  else:
469
- content += '''
506
+ content += """
470
507
  ## Lineage Graph
471
508
  No changed module was detected.
472
- '''
509
+ """
473
510
  if check_content:
474
511
  content += check_content
475
512
 
476
513
  if ctx.state_loader.cloud_mode:
477
514
  pr_info = ctx.state_loader.pr_info
478
- content += f'\nSee PR page: {RECCE_CLOUD_HOST}/{pr_info.repository}/pulls/{pr_info.id}\n'
515
+ # the classic route will be deprecated soon
516
+ content += f"\nSee PR page: {RECCE_CLOUD_HOST}/classic/{pr_info.repository}/pulls/{pr_info.id}\n"
479
517
 
480
518
  return content
481
519
 
482
520
 
483
521
  def generate_check_content(graph, check_statistics):
484
522
  from py_markdown_table.markdown_table import markdown_table
485
- content = ''
523
+
524
+ content = ""
486
525
  check_content = None
487
526
  # Generate the check summary if we found any changes
488
527
  if len(graph.checks) > 0:
489
528
  data = []
490
529
  for check in graph.checks:
491
- data.append({
492
- 'Name': check.name,
493
- 'Type': str(check.type).replace('_', ' ').title(),
494
- 'Mismatched Nodes': _generate_mismatched_nodes_summary(check),
495
- # Temporarily remove the type of changes, until we implement a better way to display it.
496
- # 'Type of Changes': _formate_changes(check.changes)
497
- })
498
- check_content = markdown_table(data).set_params(
499
- quote=False,
500
- row_sep='markdown',
501
- padding_width=1,
502
- padding_weight='right' # Aligns the cell's contents to the beginning of the cell
503
- ).get_markdown()
530
+ data.append(
531
+ {
532
+ "Name": check.name,
533
+ "Type": str(check.type).replace("_", " ").title(),
534
+ "Mismatched Nodes": _generate_mismatched_nodes_summary(check),
535
+ # Temporarily remove the type of changes, until we implement a better way to display it.
536
+ # 'Type of Changes': _formate_changes(check.changes)
537
+ }
538
+ )
539
+ check_content = (
540
+ markdown_table(data)
541
+ .set_params(
542
+ quote=False,
543
+ row_sep="markdown",
544
+ padding_width=1,
545
+ padding_weight="right", # Aligns the cell's contents to the beginning of the cell
546
+ )
547
+ .get_markdown()
548
+ )
504
549
 
505
- if check_statistics.get('total', 0) > 0:
506
- warning_message = ''
550
+ if check_statistics.get("total", 0) > 0:
551
+ warning_message = ""
507
552
  statistics = {
508
- 'Checks Run': check_statistics.get('total', 0),
509
- 'Data Mismatch Detected': check_statistics.get('mismatch', 0),
553
+ "Checks Run": check_statistics.get("total", 0),
554
+ "Data Mismatch Detected": check_statistics.get("mismatch", 0),
510
555
  }
511
- if check_statistics.get('failed', 0) > 0:
512
- statistics['Incomplete Checks'] = check_statistics.get('failed', 0)
513
- warning_message = '''
556
+ if check_statistics.get("failed", 0) > 0:
557
+ statistics["Incomplete Checks"] = check_statistics.get("failed", 0)
558
+ warning_message = """
514
559
  :warning: **Incomplete Checks** refers to checks that did not successfully run due to configuration or SQL errors.
515
560
  Please check the output of `recce run` for more information
516
- '''
517
- check_summary = markdown_table([statistics]).set_params(quote=False, row_sep='markdown').get_markdown()
518
- content += f'''
561
+ """
562
+ check_summary = markdown_table([statistics]).set_params(quote=False, row_sep="markdown").get_markdown()
563
+ content += f"""
519
564
  ## Checks Summary
520
565
  {check_summary}
521
566
  {warning_message}
522
- '''
567
+ """
523
568
  if check_content:
524
- content += f'''
569
+ content += f"""
525
570
  ### Checks of Data Mismatch Detected
526
571
  {check_content}
527
- '''
572
+ """
528
573
  return content
recce/tasks/__init__.py CHANGED
@@ -1,7 +1,23 @@
1
1
  from .core import Task
2
2
  from .histogram import HistogramDiffTask
3
3
  from .profile import ProfileDiffTask, ProfileTask
4
- from .query import QueryTask, QueryDiffTask, QueryBaseTask
5
- from .rowcount import RowCountTask, RowCountDiffTask
4
+ from .query import QueryBaseTask, QueryDiffTask, QueryTask
5
+ from .rowcount import RowCountDiffTask, RowCountTask
6
6
  from .top_k import TopKDiffTask
7
- from .valuediff import ValueDiffTask, ValueDiffDetailTask
7
+ from .valuediff import ValueDiffDetailTask, ValueDiffTask
8
+
9
+ # Explicitly declare exports
10
+ __all__ = [
11
+ "Task",
12
+ "HistogramDiffTask",
13
+ "ProfileDiffTask",
14
+ "ProfileTask",
15
+ "QueryBaseTask",
16
+ "QueryDiffTask",
17
+ "QueryTask",
18
+ "RowCountDiffTask",
19
+ "RowCountTask",
20
+ "TopKDiffTask",
21
+ "ValueDiffDetailTask",
22
+ "ValueDiffTask",
23
+ ]