recce-nightly 0.62.0.20250417__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (245) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +845 -461
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +59 -42
  8. recce/apis/check_events_api.py +353 -0
  9. recce/apis/check_func.py +41 -35
  10. recce/apis/run_api.py +25 -19
  11. recce/apis/run_func.py +64 -25
  12. recce/artifact.py +119 -51
  13. recce/cli.py +1301 -324
  14. recce/config.py +43 -34
  15. recce/connect_to_cloud.py +138 -0
  16. recce/core.py +55 -47
  17. recce/data/404/index.html +2 -0
  18. recce/data/404.html +2 -1
  19. recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
  20. recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
  21. recce/data/__next.__PAGE__.txt +6 -0
  22. recce/data/__next._full.txt +32 -0
  23. recce/data/__next._head.txt +8 -0
  24. recce/data/__next._index.txt +14 -0
  25. recce/data/__next._tree.txt +8 -0
  26. recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
  27. recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
  28. recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
  29. recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
  30. recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
  31. recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
  32. recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
  33. recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
  34. recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
  35. recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
  36. recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
  37. recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
  38. recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
  39. recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
  40. recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
  41. recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
  42. recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
  43. recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
  44. recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
  45. recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
  46. recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
  47. recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
  48. recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
  49. recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
  50. recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
  51. recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
  52. recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
  53. recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
  54. recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
  55. recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
  56. recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
  57. recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
  58. recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
  59. recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
  60. recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
  61. recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
  62. recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
  63. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  64. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  65. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  66. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  67. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  68. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  69. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  70. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  71. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  72. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  73. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  74. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
  75. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
  76. recce/data/_not-found/__next._full.txt +24 -0
  77. recce/data/_not-found/__next._head.txt +8 -0
  78. recce/data/_not-found/__next._index.txt +13 -0
  79. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  80. recce/data/_not-found/__next._not-found.txt +4 -0
  81. recce/data/_not-found/__next._tree.txt +6 -0
  82. recce/data/_not-found/index.html +2 -0
  83. recce/data/_not-found/index.txt +24 -0
  84. recce/data/auth_callback.html +68 -0
  85. recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
  86. recce/data/checks/__next._full.txt +39 -0
  87. recce/data/checks/__next._head.txt +8 -0
  88. recce/data/checks/__next._index.txt +14 -0
  89. recce/data/checks/__next._tree.txt +8 -0
  90. recce/data/checks/__next.checks.__PAGE__.txt +10 -0
  91. recce/data/checks/__next.checks.txt +4 -0
  92. recce/data/checks/index.html +2 -0
  93. recce/data/checks/index.txt +39 -0
  94. recce/data/imgs/reload-image.svg +4 -0
  95. recce/data/index.html +2 -27
  96. recce/data/index.txt +32 -7
  97. recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
  98. recce/data/lineage/__next._full.txt +39 -0
  99. recce/data/lineage/__next._head.txt +8 -0
  100. recce/data/lineage/__next._index.txt +14 -0
  101. recce/data/lineage/__next._tree.txt +8 -0
  102. recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
  103. recce/data/lineage/__next.lineage.txt +4 -0
  104. recce/data/lineage/index.html +2 -0
  105. recce/data/lineage/index.txt +39 -0
  106. recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
  107. recce/data/query/__next._full.txt +37 -0
  108. recce/data/query/__next._head.txt +8 -0
  109. recce/data/query/__next._index.txt +14 -0
  110. recce/data/query/__next._tree.txt +8 -0
  111. recce/data/query/__next.query.__PAGE__.txt +9 -0
  112. recce/data/query/__next.query.txt +4 -0
  113. recce/data/query/index.html +2 -0
  114. recce/data/query/index.txt +37 -0
  115. recce/diff.py +6 -12
  116. recce/event/CONFIG.bak +1 -0
  117. recce/event/__init__.py +86 -74
  118. recce/event/collector.py +33 -22
  119. recce/event/track.py +49 -27
  120. recce/exceptions.py +1 -1
  121. recce/git.py +7 -7
  122. recce/github.py +57 -53
  123. recce/mcp_server.py +725 -0
  124. recce/models/__init__.py +4 -1
  125. recce/models/check.py +438 -21
  126. recce/models/run.py +1 -0
  127. recce/models/types.py +134 -28
  128. recce/pull_request.py +27 -25
  129. recce/run.py +179 -122
  130. recce/server.py +394 -104
  131. recce/state/__init__.py +31 -0
  132. recce/state/cloud.py +644 -0
  133. recce/state/const.py +26 -0
  134. recce/state/local.py +56 -0
  135. recce/state/state.py +119 -0
  136. recce/state/state_loader.py +174 -0
  137. recce/summary.py +196 -149
  138. recce/tasks/__init__.py +19 -3
  139. recce/tasks/core.py +11 -13
  140. recce/tasks/dataframe.py +82 -18
  141. recce/tasks/histogram.py +69 -34
  142. recce/tasks/lineage.py +2 -2
  143. recce/tasks/profile.py +152 -86
  144. recce/tasks/query.py +180 -89
  145. recce/tasks/rowcount.py +37 -31
  146. recce/tasks/schema.py +18 -15
  147. recce/tasks/top_k.py +35 -35
  148. recce/tasks/utils.py +147 -0
  149. recce/tasks/valuediff.py +247 -155
  150. recce/util/__init__.py +3 -0
  151. recce/util/api_token.py +80 -0
  152. recce/util/breaking.py +105 -100
  153. recce/util/cll.py +274 -219
  154. recce/util/cloud/__init__.py +15 -0
  155. recce/util/cloud/base.py +115 -0
  156. recce/util/cloud/check_events.py +190 -0
  157. recce/util/cloud/checks.py +242 -0
  158. recce/util/io.py +22 -17
  159. recce/util/lineage.py +65 -16
  160. recce/util/logger.py +1 -1
  161. recce/util/onboarding_state.py +45 -0
  162. recce/util/perf_tracking.py +85 -0
  163. recce/util/recce_cloud.py +347 -72
  164. recce/util/singleton.py +4 -4
  165. recce/util/startup_perf.py +121 -0
  166. recce/yaml/__init__.py +7 -10
  167. recce_nightly-1.30.0.20251221.dist-info/METADATA +195 -0
  168. recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
  169. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
  170. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  171. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  172. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  173. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  174. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  175. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  176. recce/data/_next/static/chunks/500-e51c92a025a51234.js +0 -65
  177. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  178. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  179. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  180. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  181. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  182. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  183. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  184. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  185. recce/data/_next/static/chunks/app/page-9adc25782272ed2e.js +0 -1
  186. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  187. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  188. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  189. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  190. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  191. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  192. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  193. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  194. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  195. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  196. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  197. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  198. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  199. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  200. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  202. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  203. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  205. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  206. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  207. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  208. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  209. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  210. recce/data/_next/static/qiyFlux77VkhxiceAJe_F/_buildManifest.js +0 -1
  211. recce/state.py +0 -753
  212. recce_nightly-0.62.0.20250417.dist-info/METADATA +0 -311
  213. recce_nightly-0.62.0.20250417.dist-info/RECORD +0 -139
  214. recce_nightly-0.62.0.20250417.dist-info/top_level.txt +0 -2
  215. tests/__init__.py +0 -0
  216. tests/adapter/__init__.py +0 -0
  217. tests/adapter/dbt_adapter/__init__.py +0 -0
  218. tests/adapter/dbt_adapter/conftest.py +0 -13
  219. tests/adapter/dbt_adapter/dbt_test_helper.py +0 -283
  220. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -40
  221. tests/adapter/dbt_adapter/test_dbt_cll.py +0 -102
  222. tests/adapter/dbt_adapter/test_selector.py +0 -177
  223. tests/tasks/__init__.py +0 -0
  224. tests/tasks/conftest.py +0 -4
  225. tests/tasks/test_histogram.py +0 -137
  226. tests/tasks/test_lineage.py +0 -42
  227. tests/tasks/test_preset_checks.py +0 -50
  228. tests/tasks/test_profile.py +0 -73
  229. tests/tasks/test_query.py +0 -151
  230. tests/tasks/test_row_count.py +0 -116
  231. tests/tasks/test_schema.py +0 -99
  232. tests/tasks/test_top_k.py +0 -73
  233. tests/tasks/test_valuediff.py +0 -74
  234. tests/test_cli.py +0 -122
  235. tests/test_config.py +0 -45
  236. tests/test_core.py +0 -27
  237. tests/test_dbt.py +0 -36
  238. tests/test_pull_request.py +0 -130
  239. tests/test_server.py +0 -98
  240. tests/test_state.py +0 -123
  241. tests/test_summary.py +0 -57
  242. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  243. /recce/data/_next/static/{qiyFlux77VkhxiceAJe_F → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
  244. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
  245. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
recce/tasks/valuediff.py CHANGED
@@ -1,12 +1,13 @@
1
- from typing import TypedDict, Optional, List, Union
1
+ from typing import List, Optional, TypedDict, Union
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
5
- from .core import Task, TaskResultDiffer, CheckValidator
6
- from .dataframe import DataFrame
7
5
  from ..core import default_context
8
6
  from ..exceptions import RecceException
9
7
  from ..models import Check
8
+ from .core import CheckValidator, Task, TaskResultDiffer
9
+ from .dataframe import DataFrame
10
+ from .utils import normalize_boolean_flag_columns, normalize_keys_to_columns
10
11
 
11
12
 
12
13
  class ValueDiffParams(BaseModel):
@@ -26,19 +27,6 @@ class ValueDiffResult(BaseModel):
26
27
 
27
28
 
28
29
  class ValueDiffMixin:
29
- def _verify_dbt_packages_deps(self, dbt_adapter):
30
- for macro_name, macro in dbt_adapter.manifest.macros.items():
31
- if macro.package_name == 'audit_helper':
32
- break
33
- else:
34
- raise RecceException(
35
- r"Package 'audit_helper' not found. Please refer to the link to install: https://hub.getdbt.com/dbt-labs/audit_helper/")
36
-
37
- for macro_name, macro in dbt_adapter.manifest.macros.items():
38
- if macro.package_name == 'dbt_utils' and macro.name == 'generate_surrogate_key':
39
- self.legacy_surrogate_key = False
40
- break
41
-
42
30
  def _verify_primary_key(self, dbt_adapter, primary_key: Union[str, List[str]], model: str):
43
31
  self.update_progress(message=f"Verify primary key: {primary_key}")
44
32
  composite = True if isinstance(primary_key, List) else False
@@ -46,7 +34,21 @@ class ValueDiffMixin:
46
34
  if composite:
47
35
  if len(primary_key) == 0:
48
36
  raise RecceException("Primary key cannot be empty")
49
- sql_template = r"""{{ adapter.dispatch('test_unique_combination_of_columns', 'dbt_utils')(relation, primary_key) }}"""
37
+ sql_template = r"""
38
+ {%- set column_list = primary_key %}
39
+ {%- set columns_csv = column_list | join(', ') %}
40
+
41
+ with validation_errors as (
42
+ select
43
+ {{ columns_csv }}
44
+ from {{ relation }}
45
+ group by {{ columns_csv }}
46
+ having count(*) > 1
47
+ )
48
+
49
+ select *
50
+ from validation_errors
51
+ """
50
52
  else:
51
53
  if primary_key is None or len(primary_key) == 0:
52
54
  raise RecceException("Primary key cannot be empty")
@@ -54,7 +56,6 @@ class ValueDiffMixin:
54
56
 
55
57
  # check primary keys
56
58
  for base in [True, False]:
57
-
58
59
  relation = dbt_adapter.create_relation(model, base)
59
60
  context = dict(
60
61
  relation=relation,
@@ -69,31 +70,47 @@ class ValueDiffMixin:
69
70
  invalids = row[0]
70
71
  if invalids > 0:
71
72
  raise RecceException(
72
- f"Invalid primary key: \"{primary_key}\". The column should be unique. Please check by this sql: '{sql}'")
73
+ f"Invalid primary key: \"{primary_key}\". The column should be unique. Please check by this sql: '{sql}'"
74
+ )
73
75
  break
74
76
  else:
75
77
  # it will never happen unless we use a wrong check sql
76
- raise RecceException('Cannot verify primary key')
78
+ raise RecceException("Cannot verify primary key")
77
79
 
78
80
 
79
81
  class ValueDiffTask(Task, ValueDiffMixin):
80
-
81
82
  def __init__(self, params):
82
83
  super().__init__()
83
84
  self.params = ValueDiffParams(**params)
84
85
  self.connection = None
85
86
  self.legacy_surrogate_key = True
86
87
 
87
- def _query_value_diff(self, dbt_adpter, primary_key: Union[str, List[str]], model: str,
88
- columns: List[str] = None):
88
+ def _query_value_diff(
89
+ self,
90
+ dbt_adapter,
91
+ primary_key: Union[str, List[str]],
92
+ model: str,
93
+ columns: List[str] = None,
94
+ ):
95
+ """
96
+ Query value diff between base and current relations.
97
+ Compares column values between base and current relations using the primary key.
98
+ Mutates `self.params.primary_key` to normalize primary key names to match actual column names.
99
+
100
+ :param dbt_adapter: The dbt adapter instance.
101
+ :param primary_key: Single column name or list of column names for composite key.
102
+ :param model: The model name to compare.
103
+ :param columns: Optional list of columns to compare. If None, uses common columns.
104
+ :return: ValueDiffResult with summary and per-column match data, or None if invalid.
105
+ """
89
106
  import agate
90
107
 
91
108
  column_groups = {}
92
109
  composite = True if isinstance(primary_key, List) else False
93
110
 
94
111
  if columns is None or len(columns) == 0:
95
- base_columns = [column.column for column in dbt_adpter.get_columns(model, base=True)]
96
- curr_columns = [column.column for column in dbt_adpter.get_columns(model, base=False)]
112
+ base_columns = [column.column for column in dbt_adapter.get_columns(model, base=True)]
113
+ curr_columns = [column.column for column in dbt_adapter.get_columns(model, base=False)]
97
114
  columns = [column for column in base_columns if column in curr_columns]
98
115
  completed = 0
99
116
 
@@ -106,81 +123,117 @@ class ValueDiffTask(Task, ValueDiffMixin):
106
123
  columns.insert(0, primary_key)
107
124
 
108
125
  sql_template = r"""
109
- {% set a_query %}
110
- select {{ __PRIMARY_KEY__ }} as _pk, * from {{ base_relation }}
111
- {% endset %}
112
-
113
- {% set b_query %}
114
- select {{ __PRIMARY_KEY__ }} as _pk, * from {{ curr_relation }}
115
- {% endset %}
116
-
117
- {{ audit_helper.compare_column_values(
118
- a_query=a_query,
119
- b_query=b_query,
120
- primary_key="_pk",
121
- column_to_compare=column_to_compare
122
- ) }}
123
- """
126
+ {%- set default_null_value = "_recce_surrogate_key_null_" -%}
127
+ {%- set fields = [] -%}
128
+
129
+ {%- for field in primary_keys -%}
130
+ {%- do fields.append(
131
+ "coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '" ~ default_null_value ~"')"
132
+ ) -%}
133
+
134
+ {%- if not loop.last %}
135
+ {%- do fields.append("'-'") -%}
136
+ {%- endif -%}
137
+ {%- endfor -%}
138
+
139
+ {%- set _pk = dbt.hash(dbt.concat(fields)) -%}
140
+
141
+ with a_query as (
142
+ select {{ _pk }} as _pk, * from {{ base_relation }}
143
+ ),
144
+
145
+ b_query as (
146
+ select {{ _pk }} as _pk, * from {{ curr_relation }}
147
+ ),
148
+
149
+ joined as (
150
+ select
151
+ coalesce(a_query._pk, b_query._pk) as _pk,
152
+ a_query.{{ column_to_compare }} as a_query_value,
153
+ b_query.{{ column_to_compare }} as b_query_value,
154
+ case
155
+ when a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} then 'perfect match'
156
+ when a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null then 'both are null'
157
+ when a_query._pk is null then 'missing from {{ a_relation_name }}'
158
+ when b_query._pk is null then 'missing from {{ b_relation_name }}'
159
+ when a_query.{{ column_to_compare }} is null then 'value is null in {{ a_relation_name }} only'
160
+ when b_query.{{ column_to_compare }} is null then 'value is null in {{ b_relation_name }} only'
161
+ when a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} then 'values do not match'
162
+ else 'unknown' -- this should never happen
163
+ end as match_status
164
+ from a_query
165
+ full outer join b_query on a_query._pk = b_query._pk
166
+ ),
167
+
168
+ aggregated as (
169
+ select
170
+ '{{ column_to_compare }}' as column_name,
171
+ match_status,
172
+ count(*) as count_records
173
+ from joined
174
+ group by 1, 2
175
+ )
124
176
 
125
- if composite:
126
- if self.legacy_surrogate_key:
127
- new_primary_key = 'dbt_utils.surrogate_key(primary_key)'
128
- else:
129
- new_primary_key = 'dbt_utils.generate_surrogate_key(primary_key)'
130
- else:
131
- new_primary_key = 'primary_key'
132
- sql_template = sql_template.replace('__PRIMARY_KEY__', new_primary_key)
177
+ select
178
+ column_name,
179
+ match_status,
180
+ count_records,
181
+ round(100.0 * count_records / sum(count_records) over (), 2) as percent_of_total
182
+ from aggregated
183
+ """
133
184
 
134
185
  for column in columns:
135
186
  self.update_progress(message=f"Diff column: {column}", percentage=completed / len(columns))
136
187
 
137
- sql = dbt_adpter.generate_sql(sql_template, context=dict(
138
- base_relation=dbt_adpter.create_relation(model, base=True),
139
- curr_relation=dbt_adpter.create_relation(model, base=False),
140
- primary_key=primary_key,
141
- column_to_compare=column,
142
- ))
188
+ sql = dbt_adapter.generate_sql(
189
+ sql_template,
190
+ context=dict(
191
+ base_relation=dbt_adapter.create_relation(model, base=True),
192
+ curr_relation=dbt_adapter.create_relation(model, base=False),
193
+ primary_keys=primary_key if composite else [primary_key],
194
+ column_to_compare=column,
195
+ ),
196
+ )
143
197
 
144
- _, table = dbt_adpter.execute(sql, fetch=True)
198
+ _, table = dbt_adapter.execute(sql, fetch=True)
199
+ if column not in column_groups:
200
+ column_groups[column] = dict(added=0, removed=0, mismatched=0, matched=0)
145
201
  for row in table.rows:
146
202
  # data example:
147
203
  # ('COLUMN_NAME', 'MATCH_STATUS', 'COUNT_RECORDS', 'PERCENT_OF_TOTAL')
148
- # ('EVENT_ID', '✅: perfect match', 158601510, Decimal('100.00'))
204
+ # ('EVENT_ID', 'perfect match', 158601510, Decimal('100.00'))
149
205
  column_name, column_state, row_count, total_rate = row
150
- if 'column_name' == row[0].lower():
206
+ if "column_name" == row[0].lower():
151
207
  # skip column names
152
208
  return
153
209
 
154
- #
155
210
  # sample data like this:
156
211
  # https://github.com/dbt-labs/dbt-audit-helper/blob/main/macros/compare_column_values.sql
157
212
  #
158
- # '✅: perfect match' -> matched
159
- # '✅: both are null' -> matched
160
- # '🤷: missing from a' -> row added
161
- # '🤷: missing from b' -> row removed
162
- # '🤷: value is null in a only' -> mismatched
163
- # '🤷: value is null in b only' -> mismatched
164
- # '🙅: values do not match' -> mismatched
165
- # 'unknown' -> this should never happen
213
+ # 'perfect match' -> matched
214
+ # 'both are null' -> matched
215
+ # 'missing from a' -> row added
216
+ # 'missing from b' -> row removed
217
+ # 'value is null in a only' -> mismatched
218
+ # 'value is null in b only' -> mismatched
219
+ # 'values do not match' -> mismatched
220
+ # 'unknown' -> this should never happen
166
221
  # end as match_status,
167
222
 
168
- if column_name not in column_groups:
169
- column_groups[column_name] = dict(added=0, removed=0, mismatched=0, matched=0)
170
- if 'perfect match' in column_state:
171
- column_groups[column_name]['matched'] += row_count
172
- if 'both are null' in column_state:
173
- column_groups[column_name]['matched'] += row_count
174
- if 'missing from a' in column_state:
175
- column_groups[column_name]['added'] += row_count
176
- if 'missing from b' in column_state:
177
- column_groups[column_name]['removed'] += row_count
178
- if 'value is null in a only' in column_state:
179
- column_groups[column_name]['mismatched'] += row_count
180
- if 'value is null in b only' in column_state:
181
- column_groups[column_name]['mismatched'] += row_count
182
- if 'values do not match' in column_state:
183
- column_groups[column_name]['mismatched'] += row_count
223
+ state_mappings = {
224
+ "perfect match": "matched",
225
+ "both are null": "matched",
226
+ "missing from a": "added",
227
+ "missing from b": "removed",
228
+ "value is null in a only": "mismatched",
229
+ "value is null in b only": "mismatched",
230
+ "values do not match": "mismatched",
231
+ }
232
+
233
+ # Use the mapping to update counts
234
+ for state, action in state_mappings.items():
235
+ if state in column_state:
236
+ column_groups[column_name][action] += row_count
184
237
 
185
238
  # Cancel as early as possible
186
239
  self.check_cancel()
@@ -188,9 +241,9 @@ class ValueDiffTask(Task, ValueDiffMixin):
188
241
  completed = completed + 1
189
242
 
190
243
  first = list(column_groups.values())[0]
191
- added = first['added']
192
- removed = first['removed']
193
- common = first['matched'] + first['mismatched']
244
+ added = first["added"]
245
+ removed = first["removed"]
246
+ common = first["matched"] + first["mismatched"]
194
247
  total = common + added + removed
195
248
 
196
249
  row = []
@@ -200,15 +253,25 @@ class ValueDiffTask(Task, ValueDiffMixin):
200
253
  # This is incorrect when there are one side null
201
254
  # https://github.com/dbt-labs/dbt-audit-helper/blob/main/macros/compare_column_values.sql#L20-L23
202
255
  # matched = v['matched']
203
- matched = common - v['mismatched']
256
+ matched = common - v["mismatched"]
204
257
  rate = None if common == 0 else matched / common
205
258
  record = [k, matched, rate]
206
259
  row.append(record)
207
260
 
208
- column_names = ['column', 'matched', 'matched_p']
261
+ column_names = ["column", "matched", "matched_p"]
209
262
  column_types = [agate.Text(), agate.Number(), agate.Number()]
210
263
  table = agate.Table(row, column_names=column_names, column_types=column_types)
211
264
 
265
+ # Normalize primary_key to match actual column keys
266
+ # For ValueDiff, 'columns' refers to the model's column list (from metadata), not a DataFrame result.
267
+ composite = isinstance(primary_key, list)
268
+ if composite:
269
+ self.params.primary_key = normalize_keys_to_columns(primary_key, columns) # columns list from the model
270
+ else:
271
+ normalized = normalize_keys_to_columns([primary_key], columns)
272
+ if normalized:
273
+ self.params.primary_key = normalized[0]
274
+
212
275
  return ValueDiffResult(
213
276
  summary=ValueDiffResult.Summary(total=total, added=added, removed=removed),
214
277
  data=DataFrame.from_agate(table),
@@ -224,9 +287,6 @@ class ValueDiffTask(Task, ValueDiffMixin):
224
287
  model: str = self.params.model
225
288
  columns: List[str] = self.params.columns
226
289
 
227
- self._verify_dbt_packages_deps(dbt_adapter)
228
- self.check_cancel()
229
-
230
290
  self._verify_primary_key(dbt_adapter, primary_key, model)
231
291
  self.check_cancel()
232
292
 
@@ -243,35 +303,34 @@ class ValueDiffTask(Task, ValueDiffMixin):
243
303
 
244
304
 
245
305
  class ValueDiffTaskResultDiffer(TaskResultDiffer):
246
-
247
306
  def _check_result_changed_fn(self, result):
248
307
  is_changed = False
249
- summary = result.get('summary', {})
250
- added = summary.get('added', 0)
251
- removed = summary.get('removed', 0)
252
- changes = {
253
- 'column_changed': []
254
- }
308
+ summary = result.get("summary", {})
309
+ added = summary.get("added", 0)
310
+ removed = summary.get("removed", 0)
311
+ changes = {"column_changed": []}
255
312
 
256
313
  if added > 0:
257
314
  is_changed = True
258
- changes['row_added'] = added
315
+ changes["row_added"] = added
259
316
 
260
317
  if removed > 0:
261
318
  is_changed = True
262
- changes['row_removed'] = removed
319
+ changes["row_removed"] = removed
263
320
 
264
- row_data = result.get('data', {}).get('data', [])
321
+ row_data = result.get("data", {}).get("data", [])
265
322
  for row in row_data:
266
323
  column, matched, matched_p = row
267
324
  if float(matched_p) < 1.0:
268
325
  # if there is any mismatched, we consider it as changed
269
326
  is_changed = True
270
- changes['column_changed'].append({
271
- 'column': column,
272
- 'matched': matched,
273
- 'matched_p': matched_p,
274
- })
327
+ changes["column_changed"].append(
328
+ {
329
+ "column": column,
330
+ "matched": matched,
331
+ "matched_p": matched_p,
332
+ }
333
+ )
275
334
 
276
335
  return changes if is_changed else None
277
336
 
@@ -287,15 +346,19 @@ class ValueDiffDetailResult(DataFrame):
287
346
 
288
347
 
289
348
  class ValueDiffDetailTask(Task, ValueDiffMixin):
290
-
291
349
  def __init__(self, params):
292
350
  super().__init__()
293
351
  self.params = ValueDiffParams(**params)
294
352
  self.connection = None
295
353
  self.legacy_surrogate_key = True
296
354
 
297
- def _query_value_diff(self, dbt_adapter, primary_key: Union[str, List[str]], model: str, columns: List[str] = None):
298
-
355
+ def _query_value_diff(
356
+ self,
357
+ dbt_adapter,
358
+ primary_key: Union[str, List[str]],
359
+ model: str,
360
+ columns: List[str] = None,
361
+ ):
299
362
  composite = True if isinstance(primary_key, List) else False
300
363
 
301
364
  if columns is None or len(columns) == 0:
@@ -312,54 +375,87 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
312
375
  columns.insert(0, primary_key)
313
376
 
314
377
  sql_template = r"""
315
- {% set col_list %}
316
- {%- for col in columns %}
317
- {{ col|trim }}
318
- {%- if not loop.last %},{{ '\n ' }}{%- endif -%}
319
- {%- endfor -%}
320
- {% endset %}
321
-
322
- {% set a_query %}
323
- select {{col_list}} from {{ base_relation }}
324
- {% endset %}
325
-
326
- {% set b_query %}
327
- select {{col_list}} from {{ curr_relation }}
328
- {% endset %}
329
-
330
- {{ audit_helper.compare_queries(
331
- a_query=a_query,
332
- b_query=b_query,
333
- primary_key=__PRIMARY_KEY__,
334
- summarize=False,
335
- ) }} limit {{ limit }}
336
- """
337
-
338
- if composite:
339
- if self.legacy_surrogate_key:
340
- new_primary_key = 'dbt_utils.surrogate_key(primary_key)'
341
- else:
342
- new_primary_key = 'dbt_utils.generate_surrogate_key(primary_key)'
343
- else:
344
- new_primary_key = 'primary_key'
345
- sql_template = sql_template.replace('__PRIMARY_KEY__', new_primary_key)
346
-
347
- sql = dbt_adapter.generate_sql(sql_template, context=dict(
348
- base_relation=dbt_adapter.create_relation(model, base=True),
349
- curr_relation=dbt_adapter.create_relation(model, base=False),
350
- primary_key=primary_key,
351
- columns=columns,
352
- limit=1000,
353
- ))
378
+ with a_query as (select {{ columns | join (',\n') }}
379
+ from {{ base_relation }}
380
+ ), b_query as (
381
+ select {{ columns | join (',\n') }}
382
+ from {{ curr_relation }}
383
+ ), a_intersect_b as (
384
+ select *
385
+ from a_query
386
+ {{ dbt.intersect() }}
387
+ select *
388
+ from b_query
389
+ ), a_except_b as (
390
+ select *
391
+ from a_query
392
+ {{ dbt.except() }}
393
+ select *
394
+ from b_query
395
+ ), b_except_a as (
396
+ select *
397
+ from b_query
398
+ {{ dbt.except() }}
399
+ select *
400
+ from a_query
401
+ ), all_records as (
402
+ select
403
+ *, true as in_a, true as in_b
404
+ from a_intersect_b
405
+
406
+ union all
407
+
408
+ select
409
+ *, true as in_a, false as in_b
410
+ from a_except_b
411
+
412
+ union all
413
+
414
+ select
415
+ *, false as in_a, true as in_b
416
+ from b_except_a
417
+ )
418
+
419
+ select *
420
+ from all_records
421
+ where not (in_a and in_b)
422
+ order by {{ primary_keys | join (',\n') }}, in_a desc, in_b desc
423
+ limit {{ limit }}
424
+ """
425
+
426
+ sql = dbt_adapter.generate_sql(
427
+ sql_template,
428
+ context=dict(
429
+ base_relation=dbt_adapter.create_relation(model, base=True),
430
+ curr_relation=dbt_adapter.create_relation(model, base=False),
431
+ primary_keys=primary_key if composite else [primary_key],
432
+ columns=columns,
433
+ limit=1000,
434
+ ),
435
+ )
354
436
 
355
437
  _, table = dbt_adapter.execute(sql, fetch=True)
356
438
  self.check_cancel()
357
439
 
358
- return DataFrame.from_agate(table)
440
+ result_df = DataFrame.from_agate(table)
441
+ # Normalize in_a/in_b columns to lowercase for cross-warehouse consistency
442
+ result_df = normalize_boolean_flag_columns(result_df)
359
443
 
360
- def execute(self):
444
+ # Normalize primary_key to match actual column keys from result
445
+ column_keys = [col.key for col in result_df.columns]
446
+ composite = isinstance(primary_key, list)
447
+ if composite:
448
+ self.params.primary_key = normalize_keys_to_columns(primary_key, column_keys)
449
+ else:
450
+ normalized = normalize_keys_to_columns([primary_key], column_keys)
451
+ if normalized:
452
+ self.params.primary_key = normalized[0]
453
+
454
+ return result_df
361
455
 
456
+ def execute(self):
362
457
  from recce.adapter.dbt_adapter import DbtAdapter
458
+
363
459
  dbt_adapter: DbtAdapter = default_context().adapter
364
460
 
365
461
  with dbt_adapter.connection_named("value diff"):
@@ -369,9 +465,6 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
369
465
  model: str = self.params.model
370
466
  columns: List[str] = self.params.columns
371
467
 
372
- self._verify_dbt_packages_deps(dbt_adapter)
373
- self.check_cancel()
374
-
375
468
  self._verify_primary_key(dbt_adapter, primary_key, model)
376
469
  self.check_cancel()
377
470
 
@@ -379,6 +472,7 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
379
472
 
380
473
  def cancel(self):
381
474
  from recce.adapter.dbt_adapter import DbtAdapter
475
+
382
476
  if self.connection:
383
477
  adapter: DbtAdapter = default_context().adapter
384
478
  with adapter.connection_named("cancel"):
@@ -386,9 +480,8 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
386
480
 
387
481
 
388
482
  class ValueDiffDetailTaskResultDiffer(TaskResultDiffer):
389
-
390
483
  def _check_result_changed_fn(self, result):
391
- diff_data = result.get('data')
484
+ diff_data = result.get("data")
392
485
  if diff_data is None or len(diff_data) == 0:
393
486
  return None
394
487
 
@@ -397,7 +490,6 @@ class ValueDiffDetailTaskResultDiffer(TaskResultDiffer):
397
490
 
398
491
 
399
492
  class ValueDiffCheckValidator(CheckValidator):
400
-
401
493
  def validate_check(self, check: Check):
402
494
  try:
403
495
  ValueDiffParams(**check.params)
recce/util/__init__.py CHANGED
@@ -1 +1,4 @@
1
1
  from .singleton import SingletonMeta
2
+
3
+ # Explicitly declare exports
4
+ __all__ = ["SingletonMeta"]
@@ -0,0 +1,80 @@
1
+ import click
2
+ from rich.console import Console
3
+
4
+ from recce import event
5
+ from recce.event import get_recce_api_token, update_recce_api_token
6
+ from recce.exceptions import RecceConfigException
7
+ from recce.util.recce_cloud import (
8
+ RECCE_CLOUD_BASE_URL,
9
+ RecceCloud,
10
+ )
11
+
12
+ console = Console()
13
+
14
+
15
+ def show_invalid_api_token_message():
16
+ """
17
+ Show the message when the API token is invalid.
18
+ """
19
+ console.print("[[red]Error[/red]] Invalid Recce Cloud API token.")
20
+ console.print("Please associate with your Recce Cloud account by the following command 'recce connect-to-cloud'.")
21
+ console.print(
22
+ "For more information, please visit: https://docs.reccehq.com/recce-cloud/share-recce-session-securely/#configure-recce-cloud-association-manually"
23
+ )
24
+
25
+
26
+ def prepare_api_token(
27
+ interaction=False,
28
+ **kwargs,
29
+ ):
30
+ """
31
+ Prepare the API token for the request.
32
+ """
33
+ # Verify the API token for Recce Cloud Share Link
34
+ api_token = get_recce_api_token()
35
+ new_api_token = kwargs.get("api_token")
36
+ if new_api_token is not None and new_api_token.startswith("rct-"):
37
+ # Task Token
38
+ valid = RecceCloud(new_api_token).verify_token()
39
+ if not valid:
40
+ raise RecceConfigException("Invalid Recce Cloud Task token")
41
+ api_token = new_api_token
42
+ elif api_token != new_api_token and new_api_token is not None:
43
+ # Handle the API token provided by option `--api-token`
44
+ valid = RecceCloud(new_api_token).verify_token()
45
+ if not valid:
46
+ raise RecceConfigException("Invalid Recce Cloud API token")
47
+ event.log_connected_to_cloud()
48
+ api_token = new_api_token
49
+ update_recce_api_token(api_token)
50
+ console.print(
51
+ "[[green]Success[/green]] User profile has been updated to include the Recce Cloud API Token. "
52
+ "You no longer need to append --api-token to the recce command"
53
+ )
54
+ elif api_token:
55
+ # Verify the API token from the user profile
56
+ valid = RecceCloud(api_token).verify_token()
57
+ if not valid:
58
+ console.print("[[yellow]Warning[/yellow]] Invalid Recce Cloud API token. Skipping the share link.")
59
+ api_token = None
60
+ if valid:
61
+ event.log_connected_to_cloud()
62
+ else:
63
+ # No api_token provided
64
+ if interaction:
65
+ console.print(
66
+ "An API token is required for this feature. This can be obtained in your user account settings.\n"
67
+ f"{RECCE_CLOUD_BASE_URL}/settings#tokens\n"
68
+ "Your API token can be added to '~/.recce/profile.yml' for more convenient sharing."
69
+ )
70
+ api_token = click.prompt("Your Recce API token", type=str, hide_input=True, show_default=False)
71
+ valid = RecceCloud(api_token).verify_token()
72
+ if not valid:
73
+ raise RecceConfigException("Invalid Recce Cloud API token")
74
+ update_recce_api_token(api_token)
75
+ console.print(
76
+ "[[green]Success[/green]] User profile has been updated to include the Recce Cloud API Token. "
77
+ "You no longer need to append --api-token to the recce command"
78
+ )
79
+
80
+ return api_token