recce-nightly 0.62.0.20250417__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (245) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +845 -461
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +59 -42
  8. recce/apis/check_events_api.py +353 -0
  9. recce/apis/check_func.py +41 -35
  10. recce/apis/run_api.py +25 -19
  11. recce/apis/run_func.py +64 -25
  12. recce/artifact.py +119 -51
  13. recce/cli.py +1301 -324
  14. recce/config.py +43 -34
  15. recce/connect_to_cloud.py +138 -0
  16. recce/core.py +55 -47
  17. recce/data/404/index.html +2 -0
  18. recce/data/404.html +2 -1
  19. recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
  20. recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
  21. recce/data/__next.__PAGE__.txt +6 -0
  22. recce/data/__next._full.txt +32 -0
  23. recce/data/__next._head.txt +8 -0
  24. recce/data/__next._index.txt +14 -0
  25. recce/data/__next._tree.txt +8 -0
  26. recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
  27. recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
  28. recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
  29. recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
  30. recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
  31. recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
  32. recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
  33. recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
  34. recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
  35. recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
  36. recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
  37. recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
  38. recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
  39. recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
  40. recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
  41. recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
  42. recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
  43. recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
  44. recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
  45. recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
  46. recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
  47. recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
  48. recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
  49. recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
  50. recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
  51. recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
  52. recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
  53. recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
  54. recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
  55. recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
  56. recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
  57. recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
  58. recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
  59. recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
  60. recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
  61. recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
  62. recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
  63. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  64. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  65. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  66. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  67. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  68. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  69. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  70. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  71. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  72. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  73. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  74. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
  75. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
  76. recce/data/_not-found/__next._full.txt +24 -0
  77. recce/data/_not-found/__next._head.txt +8 -0
  78. recce/data/_not-found/__next._index.txt +13 -0
  79. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  80. recce/data/_not-found/__next._not-found.txt +4 -0
  81. recce/data/_not-found/__next._tree.txt +6 -0
  82. recce/data/_not-found/index.html +2 -0
  83. recce/data/_not-found/index.txt +24 -0
  84. recce/data/auth_callback.html +68 -0
  85. recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
  86. recce/data/checks/__next._full.txt +39 -0
  87. recce/data/checks/__next._head.txt +8 -0
  88. recce/data/checks/__next._index.txt +14 -0
  89. recce/data/checks/__next._tree.txt +8 -0
  90. recce/data/checks/__next.checks.__PAGE__.txt +10 -0
  91. recce/data/checks/__next.checks.txt +4 -0
  92. recce/data/checks/index.html +2 -0
  93. recce/data/checks/index.txt +39 -0
  94. recce/data/imgs/reload-image.svg +4 -0
  95. recce/data/index.html +2 -27
  96. recce/data/index.txt +32 -7
  97. recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
  98. recce/data/lineage/__next._full.txt +39 -0
  99. recce/data/lineage/__next._head.txt +8 -0
  100. recce/data/lineage/__next._index.txt +14 -0
  101. recce/data/lineage/__next._tree.txt +8 -0
  102. recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
  103. recce/data/lineage/__next.lineage.txt +4 -0
  104. recce/data/lineage/index.html +2 -0
  105. recce/data/lineage/index.txt +39 -0
  106. recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
  107. recce/data/query/__next._full.txt +37 -0
  108. recce/data/query/__next._head.txt +8 -0
  109. recce/data/query/__next._index.txt +14 -0
  110. recce/data/query/__next._tree.txt +8 -0
  111. recce/data/query/__next.query.__PAGE__.txt +9 -0
  112. recce/data/query/__next.query.txt +4 -0
  113. recce/data/query/index.html +2 -0
  114. recce/data/query/index.txt +37 -0
  115. recce/diff.py +6 -12
  116. recce/event/CONFIG.bak +1 -0
  117. recce/event/__init__.py +86 -74
  118. recce/event/collector.py +33 -22
  119. recce/event/track.py +49 -27
  120. recce/exceptions.py +1 -1
  121. recce/git.py +7 -7
  122. recce/github.py +57 -53
  123. recce/mcp_server.py +725 -0
  124. recce/models/__init__.py +4 -1
  125. recce/models/check.py +438 -21
  126. recce/models/run.py +1 -0
  127. recce/models/types.py +134 -28
  128. recce/pull_request.py +27 -25
  129. recce/run.py +179 -122
  130. recce/server.py +394 -104
  131. recce/state/__init__.py +31 -0
  132. recce/state/cloud.py +644 -0
  133. recce/state/const.py +26 -0
  134. recce/state/local.py +56 -0
  135. recce/state/state.py +119 -0
  136. recce/state/state_loader.py +174 -0
  137. recce/summary.py +196 -149
  138. recce/tasks/__init__.py +19 -3
  139. recce/tasks/core.py +11 -13
  140. recce/tasks/dataframe.py +82 -18
  141. recce/tasks/histogram.py +69 -34
  142. recce/tasks/lineage.py +2 -2
  143. recce/tasks/profile.py +152 -86
  144. recce/tasks/query.py +180 -89
  145. recce/tasks/rowcount.py +37 -31
  146. recce/tasks/schema.py +18 -15
  147. recce/tasks/top_k.py +35 -35
  148. recce/tasks/utils.py +147 -0
  149. recce/tasks/valuediff.py +247 -155
  150. recce/util/__init__.py +3 -0
  151. recce/util/api_token.py +80 -0
  152. recce/util/breaking.py +105 -100
  153. recce/util/cll.py +274 -219
  154. recce/util/cloud/__init__.py +15 -0
  155. recce/util/cloud/base.py +115 -0
  156. recce/util/cloud/check_events.py +190 -0
  157. recce/util/cloud/checks.py +242 -0
  158. recce/util/io.py +22 -17
  159. recce/util/lineage.py +65 -16
  160. recce/util/logger.py +1 -1
  161. recce/util/onboarding_state.py +45 -0
  162. recce/util/perf_tracking.py +85 -0
  163. recce/util/recce_cloud.py +347 -72
  164. recce/util/singleton.py +4 -4
  165. recce/util/startup_perf.py +121 -0
  166. recce/yaml/__init__.py +7 -10
  167. recce_nightly-1.30.0.20251221.dist-info/METADATA +195 -0
  168. recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
  169. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
  170. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  171. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  172. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  173. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  174. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  175. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  176. recce/data/_next/static/chunks/500-e51c92a025a51234.js +0 -65
  177. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  178. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  179. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  180. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  181. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  182. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  183. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  184. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  185. recce/data/_next/static/chunks/app/page-9adc25782272ed2e.js +0 -1
  186. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  187. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  188. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  189. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  190. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  191. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  192. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  193. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  194. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  195. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  196. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  197. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  198. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  199. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  200. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  202. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  203. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  205. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  206. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  207. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  208. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  209. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  210. recce/data/_next/static/qiyFlux77VkhxiceAJe_F/_buildManifest.js +0 -1
  211. recce/state.py +0 -753
  212. recce_nightly-0.62.0.20250417.dist-info/METADATA +0 -311
  213. recce_nightly-0.62.0.20250417.dist-info/RECORD +0 -139
  214. recce_nightly-0.62.0.20250417.dist-info/top_level.txt +0 -2
  215. tests/__init__.py +0 -0
  216. tests/adapter/__init__.py +0 -0
  217. tests/adapter/dbt_adapter/__init__.py +0 -0
  218. tests/adapter/dbt_adapter/conftest.py +0 -13
  219. tests/adapter/dbt_adapter/dbt_test_helper.py +0 -283
  220. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -40
  221. tests/adapter/dbt_adapter/test_dbt_cll.py +0 -102
  222. tests/adapter/dbt_adapter/test_selector.py +0 -177
  223. tests/tasks/__init__.py +0 -0
  224. tests/tasks/conftest.py +0 -4
  225. tests/tasks/test_histogram.py +0 -137
  226. tests/tasks/test_lineage.py +0 -42
  227. tests/tasks/test_preset_checks.py +0 -50
  228. tests/tasks/test_profile.py +0 -73
  229. tests/tasks/test_query.py +0 -151
  230. tests/tasks/test_row_count.py +0 -116
  231. tests/tasks/test_schema.py +0 -99
  232. tests/tasks/test_top_k.py +0 -73
  233. tests/tasks/test_valuediff.py +0 -74
  234. tests/test_cli.py +0 -122
  235. tests/test_config.py +0 -45
  236. tests/test_core.py +0 -27
  237. tests/test_dbt.py +0 -36
  238. tests/test_pull_request.py +0 -130
  239. tests/test_server.py +0 -98
  240. tests/test_state.py +0 -123
  241. tests/test_summary.py +0 -57
  242. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  243. /recce/data/_next/static/{qiyFlux77VkhxiceAJe_F → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
  244. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
  245. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
recce/tasks/query.py CHANGED
@@ -1,14 +1,15 @@
1
1
  import typing
2
- from typing import Optional, Tuple, List
2
+ from typing import List, Optional, Tuple
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
- from .core import Task, TaskResultDiffer, CheckValidator
7
- from .dataframe import DataFrame
8
- from .valuediff import ValueDiffMixin
9
6
  from ..core import default_context
10
7
  from ..exceptions import RecceException
11
8
  from ..models import Check
9
+ from .core import CheckValidator, Task, TaskResultDiffer
10
+ from .dataframe import DataFrame
11
+ from .utils import normalize_boolean_flag_columns, normalize_keys_to_columns
12
+ from .valuediff import ValueDiffMixin
12
13
 
13
14
  QUERY_LIMIT = 2000
14
15
 
@@ -19,11 +20,8 @@ if typing.TYPE_CHECKING:
19
20
  class QueryMixin:
20
21
  @classmethod
21
22
  def execute_sql_with_limit(
22
- cls,
23
- sql_template,
24
- base: bool = False,
25
- limit: Optional[int] = None
26
- ) -> Tuple['agate.Table', bool]:
23
+ cls, sql_template, base: bool = False, limit: Optional[int] = None
24
+ ) -> Tuple["agate.Table", bool]:
27
25
  """
28
26
  Execute a SQL template and return the result as an agate table.
29
27
  :param sql_template: SQL template to execute
@@ -32,7 +30,10 @@ class QueryMixin:
32
30
  :return: Tuple of agate table and whether there are more rows to fetch
33
31
  """
34
32
  from jinja2.exceptions import TemplateSyntaxError
33
+
35
34
  dbt_adapter = default_context().adapter
35
+ from dbt.exceptions import TargetNotFoundError
36
+
36
37
  try:
37
38
  sql = dbt_adapter.generate_sql(sql_template, base)
38
39
 
@@ -44,12 +45,13 @@ class QueryMixin:
44
45
  if len(result.rows) > limit:
45
46
  return result.limit(limit), True
46
47
  return result, False
47
-
48
+ except TargetNotFoundError as e:
49
+ raise RecceException(str(e), is_raise=False)
48
50
  except TemplateSyntaxError as e:
49
51
  raise RecceException(f"Jinja template error: line {e.lineno}: {str(e)}")
50
52
 
51
53
  @classmethod
52
- def execute_sql(cls, sql_template, base: bool = False) -> 'agate.Table':
54
+ def execute_sql(cls, sql_template, base: bool = False) -> "agate.Table":
53
55
  result, _ = cls.execute_sql_with_limit(sql_template, base)
54
56
  return result
55
57
 
@@ -85,6 +87,7 @@ class QueryTask(Task, QueryMixin):
85
87
 
86
88
  def execute_dbt(self):
87
89
  from recce.adapter.dbt_adapter import DbtAdapter
90
+
88
91
  dbt_adapter: DbtAdapter = default_context().adapter
89
92
 
90
93
  limit = QUERY_LIMIT
@@ -99,9 +102,10 @@ class QueryTask(Task, QueryMixin):
99
102
 
100
103
  def execute_sqlmesh(self):
101
104
  from ..adapter.sqlmesh_adapter import SqlmeshAdapter
105
+
102
106
  sqlmesh_adapter: SqlmeshAdapter = default_context().adapter
103
107
 
104
- sql = self.params.get('sql_template')
108
+ sql = self.params.get("sql_template")
105
109
  limit = QUERY_LIMIT
106
110
  df, more = sqlmesh_adapter.fetchdf_with_limit(sql, base=self.is_base, limit=limit)
107
111
  return DataFrame.from_pandas(df, limit=limit, more=more)
@@ -109,7 +113,7 @@ class QueryTask(Task, QueryMixin):
109
113
  def execute(self):
110
114
  context = default_context()
111
115
 
112
- if context.adapter_type == 'sqlmesh':
116
+ if context.adapter_type == "sqlmesh":
113
117
  return self.execute_sqlmesh()
114
118
  else:
115
119
  return self.execute_dbt()
@@ -137,8 +141,17 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
137
141
  self.connection = None
138
142
  self.legacy_surrogate_key = True
139
143
 
140
- def _query_diff(self, dbt_adapter, sql_template: str, base_sql_template: Optional[str] = None,
141
- preview_change: bool = False):
144
+ def _query_diff(
145
+ self,
146
+ dbt_adapter,
147
+ sql_template: str,
148
+ base_sql_template: Optional[str] = None,
149
+ preview_change: bool = False,
150
+ ):
151
+ """
152
+ Execute diff queries on base and current environments without join.
153
+ Note: Mutates self.params.primary_keys to normalize values with actual column keys.
154
+ """
142
155
  limit = QUERY_LIMIT
143
156
 
144
157
  self.connection = dbt_adapter.get_thread_connection()
@@ -151,42 +164,102 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
151
164
  current, current_more = self.execute_sql_with_limit(sql_template, base=False, limit=limit)
152
165
  self.check_cancel()
153
166
 
167
+ base_df = DataFrame.from_agate(base, limit=limit, more=base_more)
168
+ current_df = DataFrame.from_agate(current, limit=limit, more=current_more)
169
+
170
+ # Normalize primary_keys if present (for non-join diff, use current columns as reference)
171
+ if self.params.primary_keys:
172
+ column_keys = [col.key for col in current_df.columns]
173
+ self.params.primary_keys = normalize_keys_to_columns(self.params.primary_keys, column_keys)
174
+
154
175
  return QueryDiffResult(
155
- base=DataFrame.from_agate(base, limit=limit, more=base_more),
156
- current=DataFrame.from_agate(current, limit=limit, more=current_more)
176
+ base=base_df,
177
+ current=current_df,
157
178
  )
158
179
 
159
- def _query_diff_join(self, dbt_adapter, sql_template: str, primary_keys: List[str],
160
- base_sql_template: Optional[str] = None, preview_change: bool = False):
180
+ def _query_diff_join(
181
+ self,
182
+ dbt_adapter,
183
+ sql_template: str,
184
+ primary_keys: List[str],
185
+ base_sql_template: Optional[str] = None,
186
+ preview_change: bool = False,
187
+ ):
188
+ """
189
+ Execute diff queries on base and current environments using SQL join operations.
190
+ This method performs a set-based diff using INTERSECT and EXCEPT operations
191
+ to identify rows that differ between base and current query results.
192
+
193
+ Note: Mutates self.params.primary_keys to normalize values with actual column keys.
194
+
195
+ :param dbt_adapter: The dbt adapter instance for executing SQL
196
+ :param sql_template: SQL template to execute on the current environment
197
+ :param primary_keys: List of column names to use as primary keys for ordering
198
+ :param base_sql_template: Optional SQL template for the base environment.
199
+ If None, sql_template is used for both environments.
200
+ :param preview_change: If True, run base_sql_template against current environment
201
+ instead of base environment
202
+ :return: QueryDiffResult containing the diff DataFrame with in_a/in_b flags
203
+ """
161
204
 
162
205
  query_template = r"""
163
- {% set a_query %}
164
- {{ base_query }}
165
- {% endset %}
166
-
167
- {% set b_query %}
168
- {{ current_query }}
169
- {% endset %}
170
-
171
- {{ audit_helper.compare_queries(
172
- a_query=a_query,
173
- b_query=b_query,
174
- primary_key=__PRIMARY_KEY__,
175
- summarize=False,
176
- ) }} limit {{ limit }}
177
- """
178
-
179
- if len(primary_keys) > 1:
180
- self._verify_dbt_packages_deps(dbt_adapter)
181
- self.check_cancel()
206
+ with a_query as (
207
+ {{ base_query }}
208
+ ),
209
+
210
+ b_query as (
211
+ {{ current_query }}
212
+ ),
213
+
214
+ a_intersect_b as (
215
+ select * from a_query
216
+ {{ dbt.intersect() }}
217
+ select * from b_query
218
+ ),
219
+
220
+ a_except_b as (
221
+ select * from a_query
222
+ {{ dbt.except() }}
223
+ select * from b_query
224
+ ),
225
+
226
+ b_except_a as (
227
+ select * from b_query
228
+ {{ dbt.except() }}
229
+ select * from a_query
230
+ ),
231
+
232
+ all_records as (
233
+ select
234
+ *,
235
+ true as in_a,
236
+ true as in_b
237
+ from a_intersect_b
238
+
239
+ union all
240
+
241
+ select
242
+ *,
243
+ true as in_a,
244
+ false as in_b
245
+ from a_except_b
246
+
247
+ union all
248
+
249
+ select
250
+ *,
251
+ false as in_a,
252
+ true as in_b
253
+ from b_except_a
254
+ )
182
255
 
183
- if self.legacy_surrogate_key:
184
- new_primary_key = 'dbt_utils.surrogate_key(primary_key)'
185
- else:
186
- new_primary_key = 'dbt_utils.generate_surrogate_key(primary_key)'
187
- else:
188
- new_primary_key = 'primary_key'
189
- query_template = query_template.replace('__PRIMARY_KEY__', new_primary_key)
256
+ select * from all_records
257
+ where not (in_a and in_b)
258
+ order by {{ primary_keys | join(',\n') }}, in_a desc, in_b desc
259
+ limit {{ limit }}
260
+ """
261
+
262
+ self.check_cancel()
190
263
 
191
264
  if preview_change:
192
265
  base_query = dbt_adapter.generate_sql(base_sql_template, base=False)
@@ -194,19 +267,28 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
194
267
  base_query = dbt_adapter.generate_sql(base_sql_template or sql_template, base=True)
195
268
  current_query = dbt_adapter.generate_sql(sql_template, base=False)
196
269
 
197
- sql = dbt_adapter.generate_sql(query_template, context=dict(
198
- base_query=base_query,
199
- current_query=current_query,
200
- primary_key=primary_keys if len(primary_keys) != 1 else primary_keys[0],
201
- limit=QUERY_LIMIT,
202
- ))
270
+ sql = dbt_adapter.generate_sql(
271
+ query_template,
272
+ context=dict(
273
+ base_query=base_query,
274
+ current_query=current_query,
275
+ primary_keys=primary_keys,
276
+ limit=QUERY_LIMIT,
277
+ ),
278
+ )
203
279
 
204
280
  _, table = dbt_adapter.execute(sql, fetch=True)
205
281
  self.check_cancel()
206
282
 
207
- return QueryDiffResult(
208
- diff=DataFrame.from_agate(table)
209
- )
283
+ diff_df = DataFrame.from_agate(table)
284
+ # Normalize in_a/in_b columns to lowercase for cross-warehouse consistency
285
+ diff_df = normalize_boolean_flag_columns(diff_df)
286
+
287
+ # Normalize primary_keys to match actual column keys from warehouse
288
+ column_keys = [col.key for col in diff_df.columns]
289
+ self.params.primary_keys = normalize_keys_to_columns(primary_keys, column_keys)
290
+
291
+ return QueryDiffResult(diff=diff_df)
210
292
 
211
293
  @staticmethod
212
294
  def _select_single_model(model_name):
@@ -214,6 +296,7 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
214
296
 
215
297
  def execute_dbt(self):
216
298
  from recce.adapter.dbt_adapter import DbtAdapter
299
+
217
300
  dbt_adapter: DbtAdapter = default_context().adapter
218
301
 
219
302
  with dbt_adapter.connection_named("query"):
@@ -226,11 +309,20 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
226
309
  preview_change = True
227
310
 
228
311
  if primary_keys:
229
- return self._query_diff_join(dbt_adapter, sql_template, primary_keys,
230
- base_sql_template=base_sql_template, preview_change=preview_change)
231
-
232
- return self._query_diff(dbt_adapter, sql_template, base_sql_template=base_sql_template,
233
- preview_change=preview_change)
312
+ return self._query_diff_join(
313
+ dbt_adapter,
314
+ sql_template,
315
+ primary_keys,
316
+ base_sql_template=base_sql_template,
317
+ preview_change=preview_change,
318
+ )
319
+
320
+ return self._query_diff(
321
+ dbt_adapter,
322
+ sql_template,
323
+ base_sql_template=base_sql_template,
324
+ preview_change=preview_change,
325
+ )
234
326
 
235
327
  def _sqlmesh_query_diff(self, sql, base_sql=None):
236
328
  from ..adapter.sqlmesh_adapter import SqlmeshAdapter
@@ -242,7 +334,7 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
242
334
  curr, curr_more = sqlmesh_adapter.fetchdf_with_limit(sql, base=False, limit=limit)
243
335
  return QueryDiffResult(
244
336
  base=DataFrame.from_pandas(base, limit=limit, more=base_more),
245
- current=DataFrame.from_pandas(curr, limit=limit, more=curr_more)
337
+ current=DataFrame.from_pandas(curr, limit=limit, more=curr_more),
246
338
  )
247
339
 
248
340
  def _sqlmesh_query_diff_join(self, sql, primary_keys, base_sql=None):
@@ -255,21 +347,18 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
255
347
  expr_curr = sqlmesh_adapter.replace_virtual_tables(sql, base=False)
256
348
  import sqlglot as g
257
349
 
258
- expr = g.select(
259
- '*',
260
- ).with_(
261
- 'a', as_=expr_base
262
- ).with_(
263
- 'b', as_=expr_curr
264
- ).with_(
265
- 'a_interset_b', as_='select * from a intersect select * from b'
266
- ).with_(
267
- 'a_except_b', as_='select * from a except select * from b'
268
- ).with_(
269
- 'b_except_a', as_='select * from b except select * from a'
270
- ).with_(
271
- 'all_records',
272
- as_='''
350
+ expr = (
351
+ g.select(
352
+ "*",
353
+ )
354
+ .with_("a", as_=expr_base)
355
+ .with_("b", as_=expr_curr)
356
+ .with_("a_interset_b", as_="select * from a intersect select * from b")
357
+ .with_("a_except_b", as_="select * from a except select * from b")
358
+ .with_("b_except_a", as_="select * from b except select * from a")
359
+ .with_(
360
+ "all_records",
361
+ as_="""
273
362
  SELECT
274
363
  *,
275
364
  TRUE AS in_a,
@@ -287,19 +376,21 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
287
376
  FALSE AS in_a,
288
377
  TRUE AS in_b
289
378
  FROM b_except_a
290
- '''
291
- ).with_(
292
- 'final',
293
- as_=f'''
379
+ """,
380
+ )
381
+ .with_(
382
+ "final",
383
+ as_=f"""
294
384
  select * from all_records
295
385
  where not (in_a and in_b)
296
386
  order by {", ".join(primary_keys)}, in_a desc, in_b desc
297
- '''
298
- ).from_('final').limit(1000)
299
- diff, diff_more = sqlmesh_adapter.fetchdf_with_limit(expr, limit=limit)
300
- return QueryDiffResult(
301
- diff=DataFrame.from_pandas(diff, limit=limit, more=diff_more)
387
+ """,
388
+ )
389
+ .from_("final")
390
+ .limit(1000)
302
391
  )
392
+ diff, diff_more = sqlmesh_adapter.fetchdf_with_limit(expr, limit=limit)
393
+ return QueryDiffResult(diff=DataFrame.from_pandas(diff, limit=limit, more=diff_more))
303
394
 
304
395
  def execute_sqlmesh(self):
305
396
  sql = self.params.sql_template
@@ -314,7 +405,7 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
314
405
  def execute(self):
315
406
  context = default_context()
316
407
 
317
- if context.adapter_type == 'sqlmesh':
408
+ if context.adapter_type == "sqlmesh":
318
409
  return self.execute_sqlmesh()
319
410
  else:
320
411
  return self.execute_dbt()
@@ -327,14 +418,14 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
327
418
 
328
419
  class QueryDiffResultDiffer(TaskResultDiffer):
329
420
  def _check_result_changed_fn(self, result):
330
- base = result.get('base')
331
- current = result.get('current')
332
- diff = result.get('diff')
421
+ base = result.get("base")
422
+ current = result.get("current")
423
+ diff = result.get("diff")
333
424
 
334
425
  if diff is None:
335
426
  return TaskResultDiffer.diff(base, current)
336
427
  else:
337
- diff_data = diff.get('data')
428
+ diff_data = diff.get("data")
338
429
  if diff_data is None or len(diff_data) == 0:
339
430
  return None
340
431
 
recce/tasks/rowcount.py CHANGED
@@ -1,11 +1,11 @@
1
- from typing import Optional, Union, List, Literal
1
+ from typing import List, Literal, Optional, Union
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
5
5
  from recce.core import default_context
6
6
  from recce.models import Check
7
7
  from recce.tasks import Task
8
- from recce.tasks.core import TaskResultDiffer, CheckValidator
8
+ from recce.tasks.core import CheckValidator, TaskResultDiffer
9
9
  from recce.tasks.query import QueryMixin
10
10
 
11
11
 
@@ -25,10 +25,10 @@ class RowCountTask(Task, QueryMixin):
25
25
  if node is None:
26
26
  return None
27
27
 
28
- if node.resource_type != 'model' and node.resource_type != 'snapshot':
28
+ if node.resource_type != "model" and node.resource_type != "snapshot":
29
29
  return None
30
30
 
31
- if node.config and node.config.materialized not in ['table', 'view', 'incremental', 'snapshot']:
31
+ if node.config and node.config.materialized not in ["table", "view", "incremental", "snapshot"]:
32
32
  return None
33
33
 
34
34
  relation = dbt_adapter.create_relation(model_name, base=base)
@@ -54,8 +54,9 @@ class RowCountTask(Task, QueryMixin):
54
54
  for node in self.params.node_names or []:
55
55
  query_candidates.append(node)
56
56
  else:
57
+
57
58
  def countable(unique_id):
58
- return unique_id.startswith('model') or unique_id.startswith('snapshot') or unique_id.startswith('seed')
59
+ return unique_id.startswith("model") or unique_id.startswith("snapshot") or unique_id.startswith("seed")
59
60
 
60
61
  node_ids = dbt_adapter.select_nodes(
61
62
  select=self.params.select,
@@ -80,7 +81,7 @@ class RowCountTask(Task, QueryMixin):
80
81
  row_count = self._query_row_count(dbt_adapter, node, base=False)
81
82
  self.check_cancel()
82
83
  result[node] = {
83
- 'curr': row_count,
84
+ "curr": row_count,
84
85
  }
85
86
  completed += 1
86
87
 
@@ -98,7 +99,7 @@ class RowCountDiffParams(BaseModel):
98
99
  select: Optional[str] = None
99
100
  exclude: Optional[str] = None
100
101
  packages: Optional[list[str]] = None
101
- view_mode: Optional[Literal['all', 'changed_models']] = None
102
+ view_mode: Optional[Literal["all", "changed_models"]] = None
102
103
 
103
104
 
104
105
  class RowCountDiffTask(Task, QueryMixin):
@@ -112,10 +113,10 @@ class RowCountDiffTask(Task, QueryMixin):
112
113
  if node is None:
113
114
  return None
114
115
 
115
- if node.resource_type != 'model' and node.resource_type != 'snapshot':
116
+ if node.resource_type != "model" and node.resource_type != "snapshot":
116
117
  return None
117
118
 
118
- if node.config and node.config.materialized not in ['table', 'view', 'incremental', 'snapshot']:
119
+ if node.config and node.config.materialized not in ["table", "view", "incremental", "snapshot"]:
119
120
  return None
120
121
 
121
122
  relation = dbt_adapter.create_relation(model_name, base=base)
@@ -141,8 +142,9 @@ class RowCountDiffTask(Task, QueryMixin):
141
142
  for node in self.params.node_names or []:
142
143
  query_candidates.append(node)
143
144
  else:
145
+
144
146
  def countable(unique_id):
145
- return unique_id.startswith('model') or unique_id.startswith('snapshot') or unique_id.startswith('seed')
147
+ return unique_id.startswith("model") or unique_id.startswith("snapshot") or unique_id.startswith("seed")
146
148
 
147
149
  node_ids = dbt_adapter.select_nodes(
148
150
  select=self.params.select,
@@ -169,8 +171,8 @@ class RowCountDiffTask(Task, QueryMixin):
169
171
  curr_row_count = self._query_row_count(dbt_adapter, node, base=False)
170
172
  self.check_cancel()
171
173
  result[node] = {
172
- 'base': base_row_count,
173
- 'curr': curr_row_count,
174
+ "base": base_row_count,
175
+ "curr": curr_row_count,
174
176
  }
175
177
  completed += 1
176
178
 
@@ -187,6 +189,7 @@ class RowCountDiffTask(Task, QueryMixin):
187
189
  query_candidates.append(node_name)
188
190
 
189
191
  from recce.adapter.sqlmesh_adapter import SqlmeshAdapter
192
+
190
193
  sqlmesh_adapter: SqlmeshAdapter = default_context().adapter
191
194
 
192
195
  for name in query_candidates:
@@ -194,28 +197,28 @@ class RowCountDiffTask(Task, QueryMixin):
194
197
  curr_row_count = None
195
198
 
196
199
  try:
197
- df, _ = sqlmesh_adapter.fetchdf_with_limit(f'select count(*) from {name}', base=True)
200
+ df, _ = sqlmesh_adapter.fetchdf_with_limit(f"select count(*) from {name}", base=True)
198
201
  base_row_count = int(df.iloc[0, 0])
199
202
  except Exception:
200
203
  pass
201
204
  self.check_cancel()
202
205
 
203
206
  try:
204
- df, _ = sqlmesh_adapter.fetchdf_with_limit(f'select count(*) from {name}', base=False)
207
+ df, _ = sqlmesh_adapter.fetchdf_with_limit(f"select count(*) from {name}", base=False)
205
208
  curr_row_count = int(df.iloc[0, 0])
206
209
  except Exception:
207
210
  pass
208
211
  self.check_cancel()
209
212
  result[name] = {
210
- 'base': base_row_count,
211
- 'curr': curr_row_count,
213
+ "base": base_row_count,
214
+ "curr": curr_row_count,
212
215
  }
213
216
 
214
217
  return result
215
218
 
216
219
  def execute(self):
217
220
  context = default_context()
218
- if context.adapter_type == 'dbt':
221
+ if context.adapter_type == "dbt":
219
222
  return self.execute_dbt()
220
223
  else:
221
224
  return self.execute_sqlmesh()
@@ -232,8 +235,8 @@ class RowCountDiffResultDiffer(TaskResultDiffer):
232
235
  current = {}
233
236
 
234
237
  for node, row_counts in result.items():
235
- base[node] = row_counts['base']
236
- current[node] = row_counts['curr']
238
+ base[node] = row_counts["base"]
239
+ current[node] = row_counts["curr"]
237
240
 
238
241
  return TaskResultDiffer.diff(base, current)
239
242
 
@@ -243,24 +246,27 @@ class RowCountDiffResultDiffer(TaskResultDiffer):
243
246
  Should be implemented by subclass.
244
247
  """
245
248
  params = self.run.params
246
- if params.get('model'):
247
- return [TaskResultDiffer.get_node_id_by_name(params.get('model'))]
248
- elif params.get('node_names'):
249
- names = params.get('node_names', [])
249
+ if params.get("model"):
250
+ return [TaskResultDiffer.get_node_id_by_name(params.get("model"))]
251
+ elif params.get("node_names"):
252
+ names = params.get("node_names", [])
250
253
  return [TaskResultDiffer.get_node_id_by_name(name) for name in names]
251
- elif params.get('node_ids'):
252
- return params.get('node_ids', [])
254
+ elif params.get("node_ids"):
255
+ return params.get("node_ids", [])
253
256
  else:
254
257
  return TaskResultDiffer.get_node_ids_by_selector(
255
- select=params.get('select'),
256
- exclude=params.get('exclude'),
257
- packages=params.get('packages'),
258
- view_mode=params.get('view_mode'),
258
+ select=params.get("select"),
259
+ exclude=params.get("exclude"),
260
+ packages=params.get("packages"),
261
+ view_mode=params.get("view_mode"),
259
262
  )
260
263
 
261
264
  def _get_changed_nodes(self) -> Union[List[str], None]:
262
265
  if self.changes:
263
- return self.changes.affected_root_keys.items
266
+ # Both affected_root_keys of deepdiff v7 (OrderedSet) and v8 (SetOrdered) are iterable
267
+ # Convert to list directly
268
+ return list(self.changes.affected_root_keys)
269
+ return None
264
270
 
265
271
 
266
272
  class RowCountDiffCheckValidator(CheckValidator):
@@ -268,4 +274,4 @@ class RowCountDiffCheckValidator(CheckValidator):
268
274
  try:
269
275
  RowCountDiffParams(**check.params)
270
276
  except Exception as e:
271
- raise ValueError(f'Invalid params: str{e}')
277
+ raise ValueError(f"Invalid params: str{e}")
recce/tasks/schema.py CHANGED
@@ -1,9 +1,9 @@
1
- from typing import Union, List, Optional, Literal
1
+ from typing import List, Literal, Optional, Union
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
5
5
  from recce.models import Check
6
- from recce.tasks.core import TaskResultDiffer, CheckValidator
6
+ from recce.tasks.core import CheckValidator, TaskResultDiffer
7
7
 
8
8
 
9
9
  class SchemaDiffResultDiffer:
@@ -17,35 +17,38 @@ class SchemaDiffResultDiffer:
17
17
 
18
18
  def _get_related_node_ids(self) -> Union[List[str], None]:
19
19
  params = self.check.params
20
- if params.get('node_id'):
21
- return params.get('node_id') if isinstance(params.get('node_id'), list) else [params.get('node_id')]
20
+ if params.get("node_id"):
21
+ return params.get("node_id") if isinstance(params.get("node_id"), list) else [params.get("node_id")]
22
22
  else:
23
23
  return TaskResultDiffer.get_node_ids_by_selector(
24
- select=params.get('select'),
25
- exclude=params.get('exclude'),
26
- packages=params.get('packages'),
27
- view_mode=params.get('view_mode'),
24
+ select=params.get("select"),
25
+ exclude=params.get("exclude"),
26
+ packages=params.get("packages"),
27
+ view_mode=params.get("view_mode"),
28
28
  )
29
29
 
30
30
  def _check_result_changed_fn(self, base_lineage, curr_lineage):
31
31
  base = {}
32
32
  current = {}
33
- base_nodes = base_lineage.get('nodes', {})
34
- curr_nodes = curr_lineage.get('nodes', {})
33
+ base_nodes = base_lineage.get("nodes", {})
34
+ curr_nodes = curr_lineage.get("nodes", {})
35
35
  for node_id in self.related_node_ids:
36
36
  node = curr_nodes.get(node_id) or base_nodes.get(node_id)
37
37
  if not node:
38
38
  continue
39
39
 
40
- node_name = node.get('name')
41
- base[node_name] = base_nodes.get(node_id, {}).get('columns', {})
42
- current[node_name] = curr_nodes.get(node_id, {}).get('columns', {})
40
+ node_name = node.get("name")
41
+ base[node_name] = base_nodes.get(node_id, {}).get("columns", {})
42
+ current[node_name] = curr_nodes.get(node_id, {}).get("columns", {})
43
43
 
44
44
  return TaskResultDiffer.diff(base, current)
45
45
 
46
46
  def _get_changed_nodes(self) -> Union[List[str], None]:
47
47
  if self.changes:
48
- return self.changes.affected_root_keys.items
48
+ # Both affected_root_keys of deepdiff v7 (OrderedSet) and v8 (SetOrdered) are iterable
49
+ # Convert to list directly
50
+ return list(self.changes.affected_root_keys)
51
+ return None
49
52
 
50
53
 
51
54
  class SchemaDiffParams(BaseModel):
@@ -53,7 +56,7 @@ class SchemaDiffParams(BaseModel):
53
56
  select: Optional[str] = None
54
57
  exclude: Optional[str] = None
55
58
  packages: Optional[list[str]] = None
56
- view_mode: Optional[Literal['all', 'changed_models']] = None
59
+ view_mode: Optional[Literal["all", "changed_models"]] = None
57
60
 
58
61
 
59
62
  class SchemaDiffCheckValidator(CheckValidator):