recce-nightly 0.62.0.20250417__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (245) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +845 -461
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +59 -42
  8. recce/apis/check_events_api.py +353 -0
  9. recce/apis/check_func.py +41 -35
  10. recce/apis/run_api.py +25 -19
  11. recce/apis/run_func.py +64 -25
  12. recce/artifact.py +119 -51
  13. recce/cli.py +1301 -324
  14. recce/config.py +43 -34
  15. recce/connect_to_cloud.py +138 -0
  16. recce/core.py +55 -47
  17. recce/data/404/index.html +2 -0
  18. recce/data/404.html +2 -1
  19. recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
  20. recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
  21. recce/data/__next.__PAGE__.txt +6 -0
  22. recce/data/__next._full.txt +32 -0
  23. recce/data/__next._head.txt +8 -0
  24. recce/data/__next._index.txt +14 -0
  25. recce/data/__next._tree.txt +8 -0
  26. recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
  27. recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
  28. recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
  29. recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
  30. recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
  31. recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
  32. recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
  33. recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
  34. recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
  35. recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
  36. recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
  37. recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
  38. recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
  39. recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
  40. recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
  41. recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
  42. recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
  43. recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
  44. recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
  45. recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
  46. recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
  47. recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
  48. recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
  49. recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
  50. recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
  51. recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
  52. recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
  53. recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
  54. recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
  55. recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
  56. recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
  57. recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
  58. recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
  59. recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
  60. recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
  61. recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
  62. recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
  63. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  64. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  65. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  66. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  67. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  68. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  69. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  70. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  71. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  72. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  73. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  74. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
  75. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
  76. recce/data/_not-found/__next._full.txt +24 -0
  77. recce/data/_not-found/__next._head.txt +8 -0
  78. recce/data/_not-found/__next._index.txt +13 -0
  79. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  80. recce/data/_not-found/__next._not-found.txt +4 -0
  81. recce/data/_not-found/__next._tree.txt +6 -0
  82. recce/data/_not-found/index.html +2 -0
  83. recce/data/_not-found/index.txt +24 -0
  84. recce/data/auth_callback.html +68 -0
  85. recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
  86. recce/data/checks/__next._full.txt +39 -0
  87. recce/data/checks/__next._head.txt +8 -0
  88. recce/data/checks/__next._index.txt +14 -0
  89. recce/data/checks/__next._tree.txt +8 -0
  90. recce/data/checks/__next.checks.__PAGE__.txt +10 -0
  91. recce/data/checks/__next.checks.txt +4 -0
  92. recce/data/checks/index.html +2 -0
  93. recce/data/checks/index.txt +39 -0
  94. recce/data/imgs/reload-image.svg +4 -0
  95. recce/data/index.html +2 -27
  96. recce/data/index.txt +32 -7
  97. recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
  98. recce/data/lineage/__next._full.txt +39 -0
  99. recce/data/lineage/__next._head.txt +8 -0
  100. recce/data/lineage/__next._index.txt +14 -0
  101. recce/data/lineage/__next._tree.txt +8 -0
  102. recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
  103. recce/data/lineage/__next.lineage.txt +4 -0
  104. recce/data/lineage/index.html +2 -0
  105. recce/data/lineage/index.txt +39 -0
  106. recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
  107. recce/data/query/__next._full.txt +37 -0
  108. recce/data/query/__next._head.txt +8 -0
  109. recce/data/query/__next._index.txt +14 -0
  110. recce/data/query/__next._tree.txt +8 -0
  111. recce/data/query/__next.query.__PAGE__.txt +9 -0
  112. recce/data/query/__next.query.txt +4 -0
  113. recce/data/query/index.html +2 -0
  114. recce/data/query/index.txt +37 -0
  115. recce/diff.py +6 -12
  116. recce/event/CONFIG.bak +1 -0
  117. recce/event/__init__.py +86 -74
  118. recce/event/collector.py +33 -22
  119. recce/event/track.py +49 -27
  120. recce/exceptions.py +1 -1
  121. recce/git.py +7 -7
  122. recce/github.py +57 -53
  123. recce/mcp_server.py +725 -0
  124. recce/models/__init__.py +4 -1
  125. recce/models/check.py +438 -21
  126. recce/models/run.py +1 -0
  127. recce/models/types.py +134 -28
  128. recce/pull_request.py +27 -25
  129. recce/run.py +179 -122
  130. recce/server.py +394 -104
  131. recce/state/__init__.py +31 -0
  132. recce/state/cloud.py +644 -0
  133. recce/state/const.py +26 -0
  134. recce/state/local.py +56 -0
  135. recce/state/state.py +119 -0
  136. recce/state/state_loader.py +174 -0
  137. recce/summary.py +196 -149
  138. recce/tasks/__init__.py +19 -3
  139. recce/tasks/core.py +11 -13
  140. recce/tasks/dataframe.py +82 -18
  141. recce/tasks/histogram.py +69 -34
  142. recce/tasks/lineage.py +2 -2
  143. recce/tasks/profile.py +152 -86
  144. recce/tasks/query.py +180 -89
  145. recce/tasks/rowcount.py +37 -31
  146. recce/tasks/schema.py +18 -15
  147. recce/tasks/top_k.py +35 -35
  148. recce/tasks/utils.py +147 -0
  149. recce/tasks/valuediff.py +247 -155
  150. recce/util/__init__.py +3 -0
  151. recce/util/api_token.py +80 -0
  152. recce/util/breaking.py +105 -100
  153. recce/util/cll.py +274 -219
  154. recce/util/cloud/__init__.py +15 -0
  155. recce/util/cloud/base.py +115 -0
  156. recce/util/cloud/check_events.py +190 -0
  157. recce/util/cloud/checks.py +242 -0
  158. recce/util/io.py +22 -17
  159. recce/util/lineage.py +65 -16
  160. recce/util/logger.py +1 -1
  161. recce/util/onboarding_state.py +45 -0
  162. recce/util/perf_tracking.py +85 -0
  163. recce/util/recce_cloud.py +347 -72
  164. recce/util/singleton.py +4 -4
  165. recce/util/startup_perf.py +121 -0
  166. recce/yaml/__init__.py +7 -10
  167. recce_nightly-1.30.0.20251221.dist-info/METADATA +195 -0
  168. recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
  169. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
  170. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  171. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  172. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  173. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  174. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  175. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  176. recce/data/_next/static/chunks/500-e51c92a025a51234.js +0 -65
  177. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  178. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  179. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  180. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  181. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  182. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  183. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  184. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  185. recce/data/_next/static/chunks/app/page-9adc25782272ed2e.js +0 -1
  186. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  187. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  188. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  189. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  190. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  191. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  192. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  193. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  194. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  195. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  196. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  197. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  198. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  199. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  200. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  202. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  203. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  205. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  206. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  207. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  208. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  209. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  210. recce/data/_next/static/qiyFlux77VkhxiceAJe_F/_buildManifest.js +0 -1
  211. recce/state.py +0 -753
  212. recce_nightly-0.62.0.20250417.dist-info/METADATA +0 -311
  213. recce_nightly-0.62.0.20250417.dist-info/RECORD +0 -139
  214. recce_nightly-0.62.0.20250417.dist-info/top_level.txt +0 -2
  215. tests/__init__.py +0 -0
  216. tests/adapter/__init__.py +0 -0
  217. tests/adapter/dbt_adapter/__init__.py +0 -0
  218. tests/adapter/dbt_adapter/conftest.py +0 -13
  219. tests/adapter/dbt_adapter/dbt_test_helper.py +0 -283
  220. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -40
  221. tests/adapter/dbt_adapter/test_dbt_cll.py +0 -102
  222. tests/adapter/dbt_adapter/test_selector.py +0 -177
  223. tests/tasks/__init__.py +0 -0
  224. tests/tasks/conftest.py +0 -4
  225. tests/tasks/test_histogram.py +0 -137
  226. tests/tasks/test_lineage.py +0 -42
  227. tests/tasks/test_preset_checks.py +0 -50
  228. tests/tasks/test_profile.py +0 -73
  229. tests/tasks/test_query.py +0 -151
  230. tests/tasks/test_row_count.py +0 -116
  231. tests/tasks/test_schema.py +0 -99
  232. tests/tasks/test_top_k.py +0 -73
  233. tests/tasks/test_valuediff.py +0 -74
  234. tests/test_cli.py +0 -122
  235. tests/test_config.py +0 -45
  236. tests/test_core.py +0 -27
  237. tests/test_dbt.py +0 -36
  238. tests/test_pull_request.py +0 -130
  239. tests/test_server.py +0 -98
  240. tests/test_state.py +0 -123
  241. tests/test_summary.py +0 -57
  242. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  243. /recce/data/_next/static/{qiyFlux77VkhxiceAJe_F → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
  244. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
  245. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
recce/summary.py CHANGED
@@ -1,13 +1,13 @@
1
1
  import os
2
2
  import sys
3
- from typing import List, Dict, Set, Union, Type, Optional
3
+ from typing import Dict, List, Optional, Set, Type, Union
4
4
  from uuid import UUID
5
5
 
6
6
  from pydantic import BaseModel
7
7
 
8
8
  from recce.apis.check_func import get_node_name_by_id
9
9
  from recce.core import RecceContext
10
- from recce.models import CheckDAO, RunDAO, RunType, Run
10
+ from recce.models import CheckDAO, Run, RunDAO, RunType
11
11
  from recce.tasks.core import TaskResultDiffer
12
12
  from recce.tasks.histogram import HistogramDiffTaskResultDiffer
13
13
  from recce.tasks.profile import ProfileDiffResultDiffer
@@ -15,13 +15,16 @@ from recce.tasks.query import QueryDiffResultDiffer
15
15
  from recce.tasks.rowcount import RowCountDiffResultDiffer
16
16
  from recce.tasks.schema import SchemaDiffResultDiffer
17
17
  from recce.tasks.top_k import TopKDiffTaskResultDiffer
18
- from recce.tasks.valuediff import ValueDiffTaskResultDiffer, ValueDiffDetailTaskResultDiffer
18
+ from recce.tasks.valuediff import (
19
+ ValueDiffDetailTaskResultDiffer,
20
+ ValueDiffTaskResultDiffer,
21
+ )
19
22
 
20
- RECCE_CLOUD_HOST = os.environ.get('RECCE_CLOUD_HOST', 'https://cloud.datarecce.io')
23
+ RECCE_CLOUD_HOST = os.environ.get("RECCE_CLOUD_HOST", "https://cloud.datarecce.io")
21
24
 
22
- ADD_COLOR = '#1dce00'
23
- MODIFIED_COLOR = '#ffa502'
24
- REMOVE_COLOR = '#ff067e'
25
+ ADD_COLOR = "#1dce00"
26
+ MODIFIED_COLOR = "#ffa502"
27
+ REMOVE_COLOR = "#ff067e"
25
28
 
26
29
  MAX_MERMAID_TEXT_SIZE = 50000 # source: https://mermaid.js.org/config/schema-docs/config.html#maxtextsize
27
30
 
@@ -42,44 +45,44 @@ class Node:
42
45
  base_data: dict
43
46
  current_data: dict
44
47
 
45
- def __init__(self, node_id: str, node_data: dict, data_from: str = 'base'):
48
+ def __init__(self, node_id: str, node_data: dict, data_from: str = "base"):
46
49
  self.id = node_id
47
- self.name = node_data['name']
50
+ self.name = node_data["name"]
48
51
  self.data_from = data_from
49
- self.resource_type = node_data['resource_type']
50
- self.package_name = node_data['package_name']
52
+ self.resource_type = node_data["resource_type"]
53
+ self.package_name = node_data["package_name"]
51
54
  self.children = []
52
55
  self.parents = []
53
56
 
54
57
  self.base_data = {}
55
58
  self.current_data = {}
56
59
 
57
- if data_from == 'base':
60
+ if data_from == "base":
58
61
  self.base_data = node_data
59
- elif data_from == 'current':
62
+ elif data_from == "current":
60
63
  self.current_data = node_data
61
64
 
62
65
  @property
63
66
  def change_status(self):
64
- base_checksum = self.base_data.get('checksum', {}).get('checksum')
65
- curr_checksum = self.current_data.get('checksum', {}).get('checksum')
66
- if self.data_from == 'base':
67
- return 'removed'
68
- elif self.data_from == 'current':
69
- return 'added'
67
+ base_checksum = self.base_data.get("checksum", {}).get("checksum")
68
+ curr_checksum = self.current_data.get("checksum", {}).get("checksum")
69
+ if self.data_from == "base":
70
+ return "removed"
71
+ elif self.data_from == "current":
72
+ return "added"
70
73
  elif base_checksum and curr_checksum and base_checksum != curr_checksum:
71
- return 'modified'
74
+ return "modified"
72
75
  return None
73
76
 
74
77
  def update_data(self, node_data: dict, data_from: str):
75
- if data_from not in ['base', 'current']:
76
- raise ValueError(f'Invalid data_from value: {data_from}')
78
+ if data_from not in ["base", "current"]:
79
+ raise ValueError(f"Invalid data_from value: {data_from}")
77
80
  if self.data_from != data_from:
78
- self.data_from = 'both'
81
+ self.data_from = "both"
79
82
 
80
- if data_from == 'base':
83
+ if data_from == "base":
81
84
  self.base_data = node_data
82
- elif data_from == 'current':
85
+ elif data_from == "current":
83
86
  self.current_data = node_data
84
87
 
85
88
  def append_parent(self, parent_id: str):
@@ -93,8 +96,8 @@ class Node:
93
96
  def _cal_row_count_delta_percentage(self):
94
97
  row_count_diff, run_result = _get_node_row_count_diff(self.id, self.name)
95
98
  if row_count_diff:
96
- base = run_result.get('base', 0)
97
- current = run_result.get('curr', 0)
99
+ base = run_result.get("base", 0)
100
+ current = run_result.get("curr", 0)
98
101
  if int(current) > int(base):
99
102
  p = (int(current) - int(base)) / int(current) * 100
100
103
  return f'🔼 +{round(p, 2) if p > 0.1 else "<0.1"}%'
@@ -104,25 +107,25 @@ class Node:
104
107
  return None
105
108
 
106
109
  def _get_schema_diff(self):
107
- base_schema = self.base_data.get('columns', {})
108
- current_schema = self.current_data.get('columns', {})
110
+ base_schema = self.base_data.get("columns", {})
111
+ current_schema = self.current_data.get("columns", {})
109
112
  schema_diff = TaskResultDiffer.diff(base_schema, current_schema)
110
113
  return schema_diff
111
114
 
112
115
  def _what_changed(self, checks=None):
113
116
  changes = []
114
- if self.change_status == 'added':
115
- return ['Added Node']
116
- elif self.change_status == 'removed':
117
- return ['Removed Node']
118
- elif self.change_status == 'modified':
119
- changes.append('Code')
117
+ if self.change_status == "added":
118
+ return ["Added Node"]
119
+ elif self.change_status == "removed":
120
+ return ["Removed Node"]
121
+ elif self.change_status == "modified":
122
+ changes.append("Code")
120
123
  row_count_delta_percentage = self._cal_row_count_delta_percentage()
121
124
  if row_count_delta_percentage:
122
- changes.append(f'Row Count {row_count_delta_percentage}')
125
+ changes.append(f"Row Count {row_count_delta_percentage}")
123
126
  schema_diff = self._get_schema_diff()
124
127
  if schema_diff:
125
- changes.append('Schema')
128
+ changes.append("Schema")
126
129
 
127
130
  if checks:
128
131
  for check in checks:
@@ -131,7 +134,7 @@ class Node:
131
134
  # Skip the row count and schema diff check, since we already have it.
132
135
  continue
133
136
  if check.node_ids and self.id in check.node_ids:
134
- changes.append(str(check.type).replace('_', ' ').title())
137
+ changes.append(str(check.type).replace("_", " ").title())
135
138
  return changes
136
139
 
137
140
  def get_node_str(self, checks=None):
@@ -140,12 +143,12 @@ class Node:
140
143
 
141
144
  if self.change_status is not None:
142
145
  is_changed = True
143
- if self.change_status == 'added':
144
- style = f'style {self.id} stroke:{ADD_COLOR}'
145
- elif self.change_status == 'modified':
146
- style = f'style {self.id} stroke:{MODIFIED_COLOR}'
147
- elif self.change_status == 'removed':
148
- style = f'style {self.id} stroke:{REMOVE_COLOR}'
146
+ if self.change_status == "added":
147
+ style = f"style {self.id} stroke:{ADD_COLOR}"
148
+ elif self.change_status == "modified":
149
+ style = f"style {self.id} stroke:{MODIFIED_COLOR}"
150
+ elif self.change_status == "removed":
151
+ style = f"style {self.id} stroke:{REMOVE_COLOR}"
149
152
 
150
153
  if checks:
151
154
  for check in checks:
@@ -154,13 +157,13 @@ class Node:
154
157
 
155
158
  content_output = f'{self.id}["{self.name}'
156
159
  if is_changed:
157
- content_output += '\n\n[What\'s Changed]\n'
160
+ content_output += "\n\n[What's Changed]\n"
158
161
  changes = self._what_changed(checks)
159
- content_output += ', '.join(changes)
162
+ content_output += ", ".join(changes)
160
163
 
161
164
  content_output += '"]\n'
162
165
  if style:
163
- content_output += f'{style}\n'
166
+ content_output += f"{style}\n"
164
167
  return content_output
165
168
 
166
169
 
@@ -171,7 +174,7 @@ class Edge:
171
174
  parent_id: str
172
175
  change_status: Union[str, None]
173
176
 
174
- def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str = 'base'):
177
+ def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str = "base"):
175
178
  self.id = edge_id
176
179
  self.edge_from = edge_from
177
180
  self.child_id = child_id
@@ -179,7 +182,7 @@ class Edge:
179
182
 
180
183
  def update_edge_from(self, edge_from: str):
181
184
  if self.edge_from != edge_from:
182
- self.edge_from = 'both'
185
+ self.edge_from = "both"
183
186
 
184
187
 
185
188
  class CheckSummary(BaseModel):
@@ -225,21 +228,21 @@ class LineageGraph:
225
228
  edges: Dict[str, Edge] = {}
226
229
  checks: List[CheckSummary] = None
227
230
 
228
- def create_node(self, node_id: str, node_data: dict, data_from: str = 'base'):
231
+ def create_node(self, node_id: str, node_data: dict, data_from: str = "base"):
229
232
  if node_id not in self.nodes:
230
233
  self.nodes[node_id] = Node(node_id, node_data, data_from)
231
234
  else:
232
235
  self.nodes[node_id].update_data(node_data, data_from)
233
236
 
234
- def create_edge(self, parent_id: str, child_id: str, edge_from: str = 'base'):
237
+ def create_edge(self, parent_id: str, child_id: str, edge_from: str = "base"):
235
238
  if parent_id not in self.nodes:
236
- _warn(f'Parent node {parent_id} not found in graph')
239
+ _warn(f"Parent node {parent_id} not found in graph")
237
240
  return
238
241
  if child_id not in self.nodes:
239
- _warn(f'Child node {child_id} not found in graph')
242
+ _warn(f"Child node {child_id} not found in graph")
240
243
  return
241
244
 
242
- edge_id = f'{parent_id}-->{child_id}'
245
+ edge_id = f"{parent_id}-->{child_id}"
243
246
  if edge_id in self.edges:
244
247
  self.edges[edge_id].update_edge_from(edge_from)
245
248
  else:
@@ -249,67 +252,89 @@ class LineageGraph:
249
252
 
250
253
  @property
251
254
  def modified_set(self) -> Set[str]:
252
- return set([node_id for node_id, node in self.nodes.items() if node.change_status == 'modified'])
255
+ return set(
256
+ [node_id for node_id, node in self.nodes.items() if node.change_status in ["added", "removed", "modified"]]
257
+ )
253
258
 
254
259
  def get_edge_str(self, edge_id):
255
260
  edge = self.edges[edge_id]
256
261
  child = self.nodes[edge.child_id]
257
262
 
258
- if child.change_status == 'removed':
259
- return f'{edge.parent_id}-.->{edge.child_id}\n'
260
- if child.change_status is None or child.change_status == 'modified':
261
- return f'{edge.parent_id}---->{edge.child_id}\n'
262
- if child.change_status == 'added':
263
- return f'{edge.parent_id}-...->{edge.child_id}\n'
263
+ if child.change_status == "removed":
264
+ return f"{edge.parent_id}-.->{edge.child_id}\n"
265
+ if child.change_status is None or child.change_status == "modified":
266
+ return f"{edge.parent_id}---->{edge.child_id}\n"
267
+ if child.change_status == "added":
268
+ return f"{edge.parent_id}-...->{edge.child_id}\n"
264
269
 
265
270
 
266
271
  def _build_lineage_graph(base, current) -> LineageGraph:
267
272
  graph = LineageGraph()
268
273
 
274
+ # Get the current package name to filter nodes (from the current manifest metadata)
275
+ package_name = None
276
+ manifest_metadata = current.get("manifest_metadata")
277
+ if manifest_metadata and hasattr(manifest_metadata, "project_name"):
278
+ # The default package name is the project name
279
+ package_name = manifest_metadata.project_name
280
+
269
281
  # Init Graph nodes with base & current nodes
270
- for node_id, node_data in base.get('nodes', {}).items():
271
- graph.create_node(node_id, node_data, 'base')
282
+ for node_id, node_data in base.get("nodes", {}).items():
283
+ # Skip nodes that are not from the current package
284
+ if package_name and node_data.get("package_name") != package_name:
285
+ continue
286
+ graph.create_node(node_id, node_data, "base")
272
287
 
273
- for node_id, node_data in current.get('nodes', {}).items():
288
+ for node_id, node_data in current.get("nodes", {}).items():
289
+ # Skip nodes that are not from the current package
290
+ if package_name and node_data.get("package_name") != package_name:
291
+ continue
274
292
  if node_id not in graph.nodes:
275
- node = Node(node_id, node_data, 'current')
293
+ node = Node(node_id, node_data, "current")
276
294
  graph.nodes[node_id] = node
277
295
  else:
278
296
  node = graph.nodes[node_id]
279
- node.update_data(node_data, 'current')
297
+ node.update_data(node_data, "current")
280
298
 
281
299
  # Build edges
282
- for child_id, parents in base.get('parent_map', {}).items():
300
+ for child_id, parents in base.get("parent_map", {}).items():
283
301
  for parent_id in parents:
284
- graph.create_edge(parent_id, child_id, 'base')
285
- for child_id, parents in current.get('parent_map', {}).items():
302
+ if child_id not in graph.nodes or parent_id not in graph.nodes:
303
+ continue
304
+
305
+ graph.create_edge(parent_id, child_id, "base")
306
+ for child_id, parents in current.get("parent_map", {}).items():
286
307
  for parent_id in parents:
287
- graph.create_edge(parent_id, child_id, 'current')
308
+ if child_id not in graph.nodes or parent_id not in graph.nodes:
309
+ continue
310
+
311
+ graph.create_edge(parent_id, child_id, "current")
288
312
 
289
313
  return graph
290
314
 
291
315
 
292
316
  def _build_node_schema(lineage, node_id):
293
- return lineage.get('nodes', {}).get(node_id, {}).get('columns', {})
317
+ return lineage.get("nodes", {}).get(node_id, {}).get("columns", {})
294
318
 
295
319
 
296
320
  def _get_node_row_count_diff(node_id, node_name):
297
321
  row_count_runs = RunDAO().list(type_filter=RunType.ROW_COUNT_DIFF)
298
322
  for run in row_count_runs:
299
- if node_id in run.params.get('node_ids', []):
323
+ node_ids = (run.params or {}).get("node_ids") or []
324
+ if node_id in node_ids:
300
325
  result = run.result.get(node_name, {})
301
- diff = TaskResultDiffer.diff(result.get('base'), result.get('curr'))
326
+ diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
302
327
  return diff, result
303
- elif run.params.get('node_id') == node_id:
328
+ elif run.params.get("node_id") == node_id:
304
329
  result = run.result.get(node_name, {})
305
- diff = TaskResultDiffer.diff(result.get('base'), result.get('curr'))
330
+ diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
306
331
  return diff, result
307
332
  return None, None
308
333
 
309
334
 
310
335
  def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> str:
311
336
  if not check.related_nodes:
312
- return 'N/A'
337
+ return "N/A"
313
338
 
314
339
  nodes = check.related_nodes
315
340
  if check.changed_nodes:
@@ -318,39 +343,43 @@ def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> s
318
343
  nodes = check.changed_nodes
319
344
 
320
345
  if len(nodes) <= limit:
321
- return ', '.join(nodes)
346
+ return ", ".join(nodes)
322
347
 
323
- display_nodes = nodes[:limit - 1]
324
- return ', '.join(display_nodes) + f', and {len(nodes) - len(display_nodes)} more nodes'
348
+ display_nodes = nodes[: limit - 1]
349
+ return ", ".join(display_nodes) + f", and {len(nodes) - len(display_nodes)} more nodes"
325
350
 
326
351
 
327
352
  def generate_summary_metadata(base_lineage, curr_lineage):
328
353
  from py_markdown_table.markdown_table import markdown_table
329
354
 
330
- base_manifest = base_lineage.get('manifest_metadata')
331
- base_catalog = base_lineage.get('catalog_metadata')
332
- curr_manifest = curr_lineage.get('manifest_metadata')
333
- curr_catalog = curr_lineage.get('catalog_metadata')
355
+ base_manifest = base_lineage.get("manifest_metadata")
356
+ base_catalog = base_lineage.get("catalog_metadata")
357
+ curr_manifest = curr_lineage.get("manifest_metadata")
358
+ curr_catalog = curr_lineage.get("catalog_metadata")
334
359
 
335
360
  metadata = [
336
361
  {
337
- '': 'Base',
338
- 'Manifest': base_manifest.generated_at.strftime('%Y-%m-%d %H:%M:%S'),
339
- 'Catalog': base_catalog.generated_at.strftime('%Y-%m-%d %H:%M:%S') if base_catalog else 'N/A'
362
+ "": "Base",
363
+ "Manifest": base_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
364
+ "Catalog": base_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if base_catalog else "N/A",
340
365
  },
341
366
  {
342
- '': 'Current',
343
- 'Manifest': curr_manifest.generated_at.strftime('%Y-%m-%d %H:%M:%S'),
344
- 'Catalog': curr_catalog.generated_at.strftime('%Y-%m-%d %H:%M:%S') if curr_catalog else 'N/A'
345
- }
367
+ "": "Current",
368
+ "Manifest": curr_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
369
+ "Catalog": curr_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if curr_catalog else "N/A",
370
+ },
346
371
  ]
347
372
 
348
- return markdown_table(metadata).set_params(
349
- quote=False,
350
- row_sep='markdown',
351
- padding_width=1,
352
- padding_weight='right' # Aligns the cell's contents to the beginning of the cell
353
- ).get_markdown()
373
+ return (
374
+ markdown_table(metadata)
375
+ .set_params(
376
+ quote=False,
377
+ row_sep="markdown",
378
+ padding_width=1,
379
+ padding_weight="right", # Aligns the cell's contents to the beginning of the cell
380
+ )
381
+ .get_markdown()
382
+ )
354
383
 
355
384
 
356
385
  def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], Dict[str, int]):
@@ -375,9 +404,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
375
404
  continue
376
405
  elif check.type == RunType.SCHEMA_DIFF:
377
406
  differ = SchemaDiffResultDiffer(check, base_lineage, curr_lineage)
378
- elif (check.type in [RunType.ROW_COUNT_DIFF, RunType.QUERY_DIFF,
379
- RunType.VALUE_DIFF, RunType.VALUE_DIFF_DETAIL, RunType.PROFILE_DIFF,
380
- RunType.TOP_K_DIFF, RunType.HISTOGRAM_DIFF] and run is not None):
407
+ elif (
408
+ check.type
409
+ in [
410
+ RunType.ROW_COUNT_DIFF,
411
+ RunType.QUERY_DIFF,
412
+ RunType.VALUE_DIFF,
413
+ RunType.VALUE_DIFF_DETAIL,
414
+ RunType.PROFILE_DIFF,
415
+ RunType.TOP_K_DIFF,
416
+ RunType.HISTOGRAM_DIFF,
417
+ ]
418
+ and run is not None
419
+ ):
381
420
  # Check the result is changed or not
382
421
  differ = differ_factory(run)
383
422
 
@@ -390,19 +429,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
390
429
  description=check.description,
391
430
  changes=differ.changes,
392
431
  node_ids=differ.related_node_ids,
393
- changed_nodes=differ.changed_nodes
432
+ changed_nodes=differ.changed_nodes,
394
433
  )
395
434
  )
396
435
 
397
436
  return checks_summary, {
398
- 'total': len(checks),
399
- 'mismatch': len(checks_summary),
400
- 'failed': failed_checks_count,
437
+ "total": len(checks),
438
+ "mismatch": len(checks_summary),
439
+ "failed": failed_checks_count,
401
440
  }
402
441
 
403
442
 
404
443
  def generate_mermaid_lineage_graph(graph: LineageGraph):
405
- content = up_to_level_content = 'graph LR\n'
444
+ content = up_to_level_content = "graph LR\n"
406
445
  is_not_modified = False
407
446
  # Only show the modified nodes and there children
408
447
  queue = list(graph.modified_set)
@@ -426,7 +465,7 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
426
465
  content += node.get_node_str(graph.checks)
427
466
  for child_id in node.children:
428
467
  queue.append(child_id)
429
- edge_id = f'{node_id}-->{child_id}'
468
+ edge_id = f"{node_id}-->{child_id}"
430
469
  if edge_id not in display_edge:
431
470
  display_edge.add(edge_id)
432
471
  content += graph.get_edge_str(edge_id)
@@ -439,90 +478,98 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
439
478
  return up_to_level_content, is_not_modified, len(content) > MAX_MERMAID_TEXT_SIZE
440
479
 
441
480
 
442
- def generate_markdown_summary(ctx: RecceContext, summary_format: str = 'markdown'):
443
- curr_lineage = ctx.get_lineage(base=False)
444
- base_lineage = ctx.get_lineage(base=True)
445
- summary_metadata = generate_summary_metadata(base_lineage, curr_lineage)
446
- graph = _build_lineage_graph(base_lineage, curr_lineage)
447
- graph.checks, check_statistics = generate_check_summary(base_lineage, curr_lineage)
481
+ def generate_markdown_summary(ctx: RecceContext, summary_format: str = "markdown"):
482
+ lineage_diff = ctx.get_lineage_diff()
483
+ summary_metadata = generate_summary_metadata(lineage_diff.base, lineage_diff.current)
484
+ graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current)
485
+ graph.checks, check_statistics = generate_check_summary(lineage_diff.base, lineage_diff.current)
448
486
  mermaid_content, is_empty_graph, is_partial_graph = generate_mermaid_lineage_graph(graph)
449
487
  check_content = generate_check_content(graph, check_statistics)
450
488
 
451
- if summary_format == 'mermaid':
489
+ if summary_format == "mermaid":
452
490
  return mermaid_content
453
- elif summary_format == 'check':
491
+ elif summary_format == "check":
454
492
  return check_content
455
- elif summary_format == 'markdown':
493
+ elif summary_format == "markdown":
456
494
 
457
- content = '# Recce Summary\n'
458
- content += f'## Manifest Information\n{summary_metadata}\n'
495
+ content = "# Recce Summary\n"
496
+ content += f"## Manifest Information\n{summary_metadata}\n"
459
497
 
460
498
  if is_empty_graph is False:
461
- content += f'''
499
+ content += f"""
462
500
  ## Lineage Graph
463
501
  {"_Too many nodes to generate! Please see the full lineage graph on Recce instance._" if is_partial_graph else ''}
464
502
  ```mermaid
465
503
  {mermaid_content}
466
504
  ```
467
- '''
505
+ """
468
506
  else:
469
- content += '''
507
+ content += """
470
508
  ## Lineage Graph
471
509
  No changed module was detected.
472
- '''
510
+ """
473
511
  if check_content:
474
512
  content += check_content
475
513
 
476
- if ctx.state_loader.cloud_mode:
514
+ if ctx.state_loader.cloud_mode and ctx.state_loader.pr_info is not None:
477
515
  pr_info = ctx.state_loader.pr_info
478
- content += f'\nSee PR page: {RECCE_CLOUD_HOST}/{pr_info.repository}/pulls/{pr_info.id}\n'
516
+ if pr_info.repository is not None and pr_info.id is not None:
517
+ # the classic route will be deprecated soon
518
+ content += f"\nSee PR page: {RECCE_CLOUD_HOST}/classic/{pr_info.repository}/pulls/{pr_info.id}\n"
479
519
 
480
520
  return content
481
521
 
482
522
 
483
523
  def generate_check_content(graph, check_statistics):
484
524
  from py_markdown_table.markdown_table import markdown_table
485
- content = ''
525
+
526
+ content = ""
486
527
  check_content = None
487
528
  # Generate the check summary if we found any changes
488
529
  if len(graph.checks) > 0:
489
530
  data = []
490
531
  for check in graph.checks:
491
- data.append({
492
- 'Name': check.name,
493
- 'Type': str(check.type).replace('_', ' ').title(),
494
- 'Mismatched Nodes': _generate_mismatched_nodes_summary(check),
495
- # Temporarily remove the type of changes, until we implement a better way to display it.
496
- # 'Type of Changes': _formate_changes(check.changes)
497
- })
498
- check_content = markdown_table(data).set_params(
499
- quote=False,
500
- row_sep='markdown',
501
- padding_width=1,
502
- padding_weight='right' # Aligns the cell's contents to the beginning of the cell
503
- ).get_markdown()
532
+ data.append(
533
+ {
534
+ "Name": check.name,
535
+ "Type": str(check.type).replace("_", " ").title(),
536
+ "Mismatched Nodes": _generate_mismatched_nodes_summary(check),
537
+ # Temporarily remove the type of changes, until we implement a better way to display it.
538
+ # 'Type of Changes': _formate_changes(check.changes)
539
+ }
540
+ )
541
+ check_content = (
542
+ markdown_table(data)
543
+ .set_params(
544
+ quote=False,
545
+ row_sep="markdown",
546
+ padding_width=1,
547
+ padding_weight="right", # Aligns the cell's contents to the beginning of the cell
548
+ )
549
+ .get_markdown()
550
+ )
504
551
 
505
- if check_statistics.get('total', 0) > 0:
506
- warning_message = ''
552
+ if check_statistics.get("total", 0) > 0:
553
+ warning_message = ""
507
554
  statistics = {
508
- 'Checks Run': check_statistics.get('total', 0),
509
- 'Data Mismatch Detected': check_statistics.get('mismatch', 0),
555
+ "Checks Run": check_statistics.get("total", 0),
556
+ "Data Mismatch Detected": check_statistics.get("mismatch", 0),
510
557
  }
511
- if check_statistics.get('failed', 0) > 0:
512
- statistics['Incomplete Checks'] = check_statistics.get('failed', 0)
513
- warning_message = '''
558
+ if check_statistics.get("failed", 0) > 0:
559
+ statistics["Incomplete Checks"] = check_statistics.get("failed", 0)
560
+ warning_message = """
514
561
  :warning: **Incomplete Checks** refers to checks that did not successfully run due to configuration or SQL errors.
515
562
  Please check the output of `recce run` for more information
516
- '''
517
- check_summary = markdown_table([statistics]).set_params(quote=False, row_sep='markdown').get_markdown()
518
- content += f'''
563
+ """
564
+ check_summary = markdown_table([statistics]).set_params(quote=False, row_sep="markdown").get_markdown()
565
+ content += f"""
519
566
  ## Checks Summary
520
567
  {check_summary}
521
568
  {warning_message}
522
- '''
569
+ """
523
570
  if check_content:
524
- content += f'''
571
+ content += f"""
525
572
  ### Checks of Data Mismatch Detected
526
573
  {check_content}
527
- '''
574
+ """
528
575
  return content
recce/tasks/__init__.py CHANGED
@@ -1,7 +1,23 @@
1
1
  from .core import Task
2
2
  from .histogram import HistogramDiffTask
3
3
  from .profile import ProfileDiffTask, ProfileTask
4
- from .query import QueryTask, QueryDiffTask, QueryBaseTask
5
- from .rowcount import RowCountTask, RowCountDiffTask
4
+ from .query import QueryBaseTask, QueryDiffTask, QueryTask
5
+ from .rowcount import RowCountDiffTask, RowCountTask
6
6
  from .top_k import TopKDiffTask
7
- from .valuediff import ValueDiffTask, ValueDiffDetailTask
7
+ from .valuediff import ValueDiffDetailTask, ValueDiffTask
8
+
9
+ # Explicitly declare exports
10
+ __all__ = [
11
+ "Task",
12
+ "HistogramDiffTask",
13
+ "ProfileDiffTask",
14
+ "ProfileTask",
15
+ "QueryBaseTask",
16
+ "QueryDiffTask",
17
+ "QueryTask",
18
+ "RowCountDiffTask",
19
+ "RowCountTask",
20
+ "TopKDiffTask",
21
+ "ValueDiffDetailTask",
22
+ "ValueDiffTask",
23
+ ]