recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.26.0.20251124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (213) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +810 -480
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +39 -28
  8. recce/apis/check_func.py +33 -27
  9. recce/apis/run_api.py +25 -19
  10. recce/apis/run_func.py +29 -23
  11. recce/artifact.py +119 -51
  12. recce/cli.py +1299 -323
  13. recce/config.py +42 -33
  14. recce/connect_to_cloud.py +138 -0
  15. recce/core.py +55 -47
  16. recce/data/404.html +1 -1
  17. recce/data/__next.__PAGE__.txt +10 -0
  18. recce/data/__next._full.txt +23 -0
  19. recce/data/__next._head.txt +8 -0
  20. recce/data/__next._index.txt +8 -0
  21. recce/data/__next._tree.txt +5 -0
  22. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_buildManifest.js +11 -0
  23. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_clientMiddlewareManifest.json +1 -0
  24. recce/data/_next/static/chunks/02b996c7f6a29a06.js +4 -0
  25. recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
  26. recce/data/_next/static/chunks/2df9ec28a061971d.js +11 -0
  27. recce/data/_next/static/chunks/3098c987393bda15.js +1 -0
  28. recce/data/_next/static/chunks/393dc43e483f717a.css +2 -0
  29. recce/data/_next/static/chunks/399e8d91a7e45073.js +2 -0
  30. recce/data/_next/static/chunks/4d0186f631230245.js +1 -0
  31. recce/data/_next/static/chunks/5794ba9e10a9c060.js +11 -0
  32. recce/data/_next/static/chunks/715761c929a3f28b.js +110 -0
  33. recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
  34. recce/data/_next/static/chunks/80d2a95eaf1201ea.js +1 -0
  35. recce/data/_next/static/chunks/9979c6109bbbee35.js +1 -0
  36. recce/data/_next/static/chunks/99d638224186c118.js +1 -0
  37. recce/data/_next/static/chunks/d003eb36240e92f3.js +1 -0
  38. recce/data/_next/static/chunks/d3167cdfec4fc351.js +1 -0
  39. recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
  40. recce/data/_next/static/chunks/f40141db1bdb46f0.css +6 -0
  41. recce/data/_next/static/chunks/fcc53a88741a52f9.js +1 -0
  42. recce/data/_next/static/chunks/turbopack-b1920d28cfb1f28d.js +3 -0
  43. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  44. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  45. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  46. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  47. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  48. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  49. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  50. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  51. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  52. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  53. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  54. recce/data/_next/static/media/reload-image.7aa931c7.svg +4 -0
  55. recce/data/_not-found/__next._full.txt +17 -0
  56. recce/data/_not-found/__next._head.txt +8 -0
  57. recce/data/_not-found/__next._index.txt +8 -0
  58. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  59. recce/data/_not-found/__next._not-found.txt +4 -0
  60. recce/data/_not-found/__next._tree.txt +3 -0
  61. recce/data/_not-found.html +1 -0
  62. recce/data/_not-found.txt +17 -0
  63. recce/data/auth_callback.html +68 -0
  64. recce/data/imgs/reload-image.svg +4 -0
  65. recce/data/index.html +1 -27
  66. recce/data/index.txt +23 -7
  67. recce/diff.py +6 -12
  68. recce/event/__init__.py +86 -74
  69. recce/event/collector.py +33 -22
  70. recce/event/track.py +49 -27
  71. recce/exceptions.py +1 -1
  72. recce/git.py +7 -7
  73. recce/github.py +57 -53
  74. recce/mcp_server.py +716 -0
  75. recce/models/__init__.py +4 -1
  76. recce/models/check.py +6 -7
  77. recce/models/run.py +1 -0
  78. recce/models/types.py +131 -28
  79. recce/pull_request.py +27 -25
  80. recce/run.py +165 -121
  81. recce/server.py +303 -111
  82. recce/state/__init__.py +31 -0
  83. recce/state/cloud.py +632 -0
  84. recce/state/const.py +26 -0
  85. recce/state/local.py +56 -0
  86. recce/state/state.py +119 -0
  87. recce/state/state_loader.py +174 -0
  88. recce/summary.py +188 -143
  89. recce/tasks/__init__.py +19 -3
  90. recce/tasks/core.py +11 -13
  91. recce/tasks/dataframe.py +82 -18
  92. recce/tasks/histogram.py +69 -34
  93. recce/tasks/lineage.py +2 -2
  94. recce/tasks/profile.py +152 -86
  95. recce/tasks/query.py +139 -87
  96. recce/tasks/rowcount.py +37 -31
  97. recce/tasks/schema.py +18 -15
  98. recce/tasks/top_k.py +35 -35
  99. recce/tasks/valuediff.py +216 -152
  100. recce/util/__init__.py +3 -0
  101. recce/util/api_token.py +80 -0
  102. recce/util/breaking.py +87 -85
  103. recce/util/cll.py +274 -219
  104. recce/util/io.py +22 -17
  105. recce/util/lineage.py +65 -16
  106. recce/util/logger.py +1 -1
  107. recce/util/onboarding_state.py +45 -0
  108. recce/util/perf_tracking.py +85 -0
  109. recce/util/recce_cloud.py +322 -72
  110. recce/util/singleton.py +4 -4
  111. recce/yaml/__init__.py +7 -10
  112. recce_cloud/__init__.py +24 -0
  113. recce_cloud/api/__init__.py +17 -0
  114. recce_cloud/api/base.py +111 -0
  115. recce_cloud/api/client.py +150 -0
  116. recce_cloud/api/exceptions.py +26 -0
  117. recce_cloud/api/factory.py +63 -0
  118. recce_cloud/api/github.py +76 -0
  119. recce_cloud/api/gitlab.py +82 -0
  120. recce_cloud/artifact.py +57 -0
  121. recce_cloud/ci_providers/__init__.py +9 -0
  122. recce_cloud/ci_providers/base.py +82 -0
  123. recce_cloud/ci_providers/detector.py +147 -0
  124. recce_cloud/ci_providers/github_actions.py +136 -0
  125. recce_cloud/ci_providers/gitlab_ci.py +130 -0
  126. recce_cloud/cli.py +245 -0
  127. recce_cloud/upload.py +214 -0
  128. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/METADATA +68 -37
  129. recce_nightly-1.26.0.20251124.dist-info/RECORD +180 -0
  130. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/WHEEL +1 -1
  131. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/top_level.txt +1 -0
  132. tests/adapter/dbt_adapter/conftest.py +9 -5
  133. tests/adapter/dbt_adapter/dbt_test_helper.py +37 -22
  134. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
  135. tests/adapter/dbt_adapter/test_dbt_cll.py +656 -41
  136. tests/adapter/dbt_adapter/test_selector.py +22 -21
  137. tests/recce_cloud/__init__.py +0 -0
  138. tests/recce_cloud/test_ci_providers.py +351 -0
  139. tests/recce_cloud/test_cli.py +372 -0
  140. tests/recce_cloud/test_client.py +273 -0
  141. tests/recce_cloud/test_platform_clients.py +333 -0
  142. tests/tasks/conftest.py +1 -1
  143. tests/tasks/test_histogram.py +58 -66
  144. tests/tasks/test_lineage.py +36 -23
  145. tests/tasks/test_preset_checks.py +45 -31
  146. tests/tasks/test_profile.py +339 -15
  147. tests/tasks/test_query.py +46 -46
  148. tests/tasks/test_row_count.py +65 -46
  149. tests/tasks/test_schema.py +65 -42
  150. tests/tasks/test_top_k.py +22 -18
  151. tests/tasks/test_valuediff.py +43 -32
  152. tests/test_cli.py +174 -60
  153. tests/test_cli_mcp_optional.py +45 -0
  154. tests/test_cloud_listing_cli.py +324 -0
  155. tests/test_config.py +7 -9
  156. tests/test_connect_to_cloud.py +82 -0
  157. tests/test_core.py +151 -4
  158. tests/test_dbt.py +7 -7
  159. tests/test_mcp_server.py +332 -0
  160. tests/test_pull_request.py +1 -1
  161. tests/test_server.py +25 -19
  162. tests/test_summary.py +29 -17
  163. recce/data/_next/static/Kcbs3GEIyH2LxgLYat0es/_buildManifest.js +0 -1
  164. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  165. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  166. recce/data/_next/static/chunks/368-7587b306577df275.js +0 -65
  167. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  168. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  169. recce/data/_next/static/chunks/3a92ee20-3b5d922d4157af5e.js +0 -1
  170. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  171. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  172. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  173. recce/data/_next/static/chunks/6ef81909-694dc38134099299.js +0 -1
  174. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  175. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  176. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  177. recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
  178. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  179. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  180. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  181. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  182. recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
  183. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  184. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  185. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  186. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  187. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  188. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  189. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  190. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  191. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  192. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  193. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  194. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  195. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  196. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  197. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  198. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  199. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  200. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  202. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  203. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  205. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  206. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  207. recce/state.py +0 -753
  208. recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
  209. tests/test_state.py +0 -123
  210. /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → 52aV_JrNUZU6dMFgvTQEO}/_ssgManifest.js +0 -0
  211. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  212. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/entry_points.txt +0 -0
  213. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/licenses/LICENSE +0 -0
recce/tasks/valuediff.py CHANGED
@@ -1,12 +1,12 @@
1
- from typing import TypedDict, Optional, List, Union
1
+ from typing import List, Optional, TypedDict, Union
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
5
- from .core import Task, TaskResultDiffer, CheckValidator
6
- from .dataframe import DataFrame
7
5
  from ..core import default_context
8
6
  from ..exceptions import RecceException
9
7
  from ..models import Check
8
+ from .core import CheckValidator, Task, TaskResultDiffer
9
+ from .dataframe import DataFrame
10
10
 
11
11
 
12
12
  class ValueDiffParams(BaseModel):
@@ -26,19 +26,6 @@ class ValueDiffResult(BaseModel):
26
26
 
27
27
 
28
28
  class ValueDiffMixin:
29
- def _verify_dbt_packages_deps(self, dbt_adapter):
30
- for macro_name, macro in dbt_adapter.manifest.macros.items():
31
- if macro.package_name == 'audit_helper':
32
- break
33
- else:
34
- raise RecceException(
35
- r"Package 'audit_helper' not found. Please refer to the link to install: https://hub.getdbt.com/dbt-labs/audit_helper/")
36
-
37
- for macro_name, macro in dbt_adapter.manifest.macros.items():
38
- if macro.package_name == 'dbt_utils' and macro.name == 'generate_surrogate_key':
39
- self.legacy_surrogate_key = False
40
- break
41
-
42
29
  def _verify_primary_key(self, dbt_adapter, primary_key: Union[str, List[str]], model: str):
43
30
  self.update_progress(message=f"Verify primary key: {primary_key}")
44
31
  composite = True if isinstance(primary_key, List) else False
@@ -46,7 +33,21 @@ class ValueDiffMixin:
46
33
  if composite:
47
34
  if len(primary_key) == 0:
48
35
  raise RecceException("Primary key cannot be empty")
49
- sql_template = r"""{{ adapter.dispatch('test_unique_combination_of_columns', 'dbt_utils')(relation, primary_key) }}"""
36
+ sql_template = r"""
37
+ {%- set column_list = primary_key %}
38
+ {%- set columns_csv = column_list | join(', ') %}
39
+
40
+ with validation_errors as (
41
+ select
42
+ {{ columns_csv }}
43
+ from {{ relation }}
44
+ group by {{ columns_csv }}
45
+ having count(*) > 1
46
+ )
47
+
48
+ select *
49
+ from validation_errors
50
+ """
50
51
  else:
51
52
  if primary_key is None or len(primary_key) == 0:
52
53
  raise RecceException("Primary key cannot be empty")
@@ -54,7 +55,6 @@ class ValueDiffMixin:
54
55
 
55
56
  # check primary keys
56
57
  for base in [True, False]:
57
-
58
58
  relation = dbt_adapter.create_relation(model, base)
59
59
  context = dict(
60
60
  relation=relation,
@@ -69,31 +69,36 @@ class ValueDiffMixin:
69
69
  invalids = row[0]
70
70
  if invalids > 0:
71
71
  raise RecceException(
72
- f"Invalid primary key: \"{primary_key}\". The column should be unique. Please check by this sql: '{sql}'")
72
+ f"Invalid primary key: \"{primary_key}\". The column should be unique. Please check by this sql: '{sql}'"
73
+ )
73
74
  break
74
75
  else:
75
76
  # it will never happen unless we use a wrong check sql
76
- raise RecceException('Cannot verify primary key')
77
+ raise RecceException("Cannot verify primary key")
77
78
 
78
79
 
79
80
  class ValueDiffTask(Task, ValueDiffMixin):
80
-
81
81
  def __init__(self, params):
82
82
  super().__init__()
83
83
  self.params = ValueDiffParams(**params)
84
84
  self.connection = None
85
85
  self.legacy_surrogate_key = True
86
86
 
87
- def _query_value_diff(self, dbt_adpter, primary_key: Union[str, List[str]], model: str,
88
- columns: List[str] = None):
87
+ def _query_value_diff(
88
+ self,
89
+ dbt_adapter,
90
+ primary_key: Union[str, List[str]],
91
+ model: str,
92
+ columns: List[str] = None,
93
+ ):
89
94
  import agate
90
95
 
91
96
  column_groups = {}
92
97
  composite = True if isinstance(primary_key, List) else False
93
98
 
94
99
  if columns is None or len(columns) == 0:
95
- base_columns = [column.column for column in dbt_adpter.get_columns(model, base=True)]
96
- curr_columns = [column.column for column in dbt_adpter.get_columns(model, base=False)]
100
+ base_columns = [column.column for column in dbt_adapter.get_columns(model, base=True)]
101
+ curr_columns = [column.column for column in dbt_adapter.get_columns(model, base=False)]
97
102
  columns = [column for column in base_columns if column in curr_columns]
98
103
  completed = 0
99
104
 
@@ -106,81 +111,117 @@ class ValueDiffTask(Task, ValueDiffMixin):
106
111
  columns.insert(0, primary_key)
107
112
 
108
113
  sql_template = r"""
109
- {% set a_query %}
110
- select {{ __PRIMARY_KEY__ }} as _pk, * from {{ base_relation }}
111
- {% endset %}
112
-
113
- {% set b_query %}
114
- select {{ __PRIMARY_KEY__ }} as _pk, * from {{ curr_relation }}
115
- {% endset %}
116
-
117
- {{ audit_helper.compare_column_values(
118
- a_query=a_query,
119
- b_query=b_query,
120
- primary_key="_pk",
121
- column_to_compare=column_to_compare
122
- ) }}
123
- """
114
+ {%- set default_null_value = "_recce_surrogate_key_null_" -%}
115
+ {%- set fields = [] -%}
116
+
117
+ {%- for field in primary_keys -%}
118
+ {%- do fields.append(
119
+ "coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '" ~ default_null_value ~"')"
120
+ ) -%}
121
+
122
+ {%- if not loop.last %}
123
+ {%- do fields.append("'-'") -%}
124
+ {%- endif -%}
125
+ {%- endfor -%}
126
+
127
+ {%- set _pk = dbt.hash(dbt.concat(fields)) -%}
128
+
129
+ with a_query as (
130
+ select {{ _pk }} as _pk, * from {{ base_relation }}
131
+ ),
132
+
133
+ b_query as (
134
+ select {{ _pk }} as _pk, * from {{ curr_relation }}
135
+ ),
136
+
137
+ joined as (
138
+ select
139
+ coalesce(a_query._pk, b_query._pk) as _pk,
140
+ a_query.{{ column_to_compare }} as a_query_value,
141
+ b_query.{{ column_to_compare }} as b_query_value,
142
+ case
143
+ when a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} then 'perfect match'
144
+ when a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null then 'both are null'
145
+ when a_query._pk is null then 'missing from {{ a_relation_name }}'
146
+ when b_query._pk is null then 'missing from {{ b_relation_name }}'
147
+ when a_query.{{ column_to_compare }} is null then 'value is null in {{ a_relation_name }} only'
148
+ when b_query.{{ column_to_compare }} is null then 'value is null in {{ b_relation_name }} only'
149
+ when a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} then 'values do not match'
150
+ else 'unknown' -- this should never happen
151
+ end as match_status
152
+ from a_query
153
+ full outer join b_query on a_query._pk = b_query._pk
154
+ ),
155
+
156
+ aggregated as (
157
+ select
158
+ '{{ column_to_compare }}' as column_name,
159
+ match_status,
160
+ count(*) as count_records
161
+ from joined
162
+ group by 1, 2
163
+ )
124
164
 
125
- if composite:
126
- if self.legacy_surrogate_key:
127
- new_primary_key = 'dbt_utils.surrogate_key(primary_key)'
128
- else:
129
- new_primary_key = 'dbt_utils.generate_surrogate_key(primary_key)'
130
- else:
131
- new_primary_key = 'primary_key'
132
- sql_template = sql_template.replace('__PRIMARY_KEY__', new_primary_key)
165
+ select
166
+ column_name,
167
+ match_status,
168
+ count_records,
169
+ round(100.0 * count_records / sum(count_records) over (), 2) as percent_of_total
170
+ from aggregated
171
+ """
133
172
 
134
173
  for column in columns:
135
174
  self.update_progress(message=f"Diff column: {column}", percentage=completed / len(columns))
136
175
 
137
- sql = dbt_adpter.generate_sql(sql_template, context=dict(
138
- base_relation=dbt_adpter.create_relation(model, base=True),
139
- curr_relation=dbt_adpter.create_relation(model, base=False),
140
- primary_key=primary_key,
141
- column_to_compare=column,
142
- ))
176
+ sql = dbt_adapter.generate_sql(
177
+ sql_template,
178
+ context=dict(
179
+ base_relation=dbt_adapter.create_relation(model, base=True),
180
+ curr_relation=dbt_adapter.create_relation(model, base=False),
181
+ primary_keys=primary_key if composite else [primary_key],
182
+ column_to_compare=column,
183
+ ),
184
+ )
143
185
 
144
- _, table = dbt_adpter.execute(sql, fetch=True)
186
+ _, table = dbt_adapter.execute(sql, fetch=True)
187
+ if column not in column_groups:
188
+ column_groups[column] = dict(added=0, removed=0, mismatched=0, matched=0)
145
189
  for row in table.rows:
146
190
  # data example:
147
191
  # ('COLUMN_NAME', 'MATCH_STATUS', 'COUNT_RECORDS', 'PERCENT_OF_TOTAL')
148
- # ('EVENT_ID', '✅: perfect match', 158601510, Decimal('100.00'))
192
+ # ('EVENT_ID', 'perfect match', 158601510, Decimal('100.00'))
149
193
  column_name, column_state, row_count, total_rate = row
150
- if 'column_name' == row[0].lower():
194
+ if "column_name" == row[0].lower():
151
195
  # skip column names
152
196
  return
153
197
 
154
- #
155
198
  # sample data like this:
156
199
  # https://github.com/dbt-labs/dbt-audit-helper/blob/main/macros/compare_column_values.sql
157
200
  #
158
- # '✅: perfect match' -> matched
159
- # '✅: both are null' -> matched
160
- # '🤷: missing from a' -> row added
161
- # '🤷: missing from b' -> row removed
162
- # '🤷: value is null in a only' -> mismatched
163
- # '🤷: value is null in b only' -> mismatched
164
- # '🙅: values do not match' -> mismatched
165
- # 'unknown' -> this should never happen
201
+ # 'perfect match' -> matched
202
+ # 'both are null' -> matched
203
+ # 'missing from a' -> row added
204
+ # 'missing from b' -> row removed
205
+ # 'value is null in a only' -> mismatched
206
+ # 'value is null in b only' -> mismatched
207
+ # 'values do not match' -> mismatched
208
+ # 'unknown' -> this should never happen
166
209
  # end as match_status,
167
210
 
168
- if column_name not in column_groups:
169
- column_groups[column_name] = dict(added=0, removed=0, mismatched=0, matched=0)
170
- if 'perfect match' in column_state:
171
- column_groups[column_name]['matched'] += row_count
172
- if 'both are null' in column_state:
173
- column_groups[column_name]['matched'] += row_count
174
- if 'missing from a' in column_state:
175
- column_groups[column_name]['added'] += row_count
176
- if 'missing from b' in column_state:
177
- column_groups[column_name]['removed'] += row_count
178
- if 'value is null in a only' in column_state:
179
- column_groups[column_name]['mismatched'] += row_count
180
- if 'value is null in b only' in column_state:
181
- column_groups[column_name]['mismatched'] += row_count
182
- if 'values do not match' in column_state:
183
- column_groups[column_name]['mismatched'] += row_count
211
+ state_mappings = {
212
+ "perfect match": "matched",
213
+ "both are null": "matched",
214
+ "missing from a": "added",
215
+ "missing from b": "removed",
216
+ "value is null in a only": "mismatched",
217
+ "value is null in b only": "mismatched",
218
+ "values do not match": "mismatched",
219
+ }
220
+
221
+ # Use the mapping to update counts
222
+ for state, action in state_mappings.items():
223
+ if state in column_state:
224
+ column_groups[column_name][action] += row_count
184
225
 
185
226
  # Cancel as early as possible
186
227
  self.check_cancel()
@@ -188,9 +229,9 @@ class ValueDiffTask(Task, ValueDiffMixin):
188
229
  completed = completed + 1
189
230
 
190
231
  first = list(column_groups.values())[0]
191
- added = first['added']
192
- removed = first['removed']
193
- common = first['matched'] + first['mismatched']
232
+ added = first["added"]
233
+ removed = first["removed"]
234
+ common = first["matched"] + first["mismatched"]
194
235
  total = common + added + removed
195
236
 
196
237
  row = []
@@ -200,12 +241,12 @@ class ValueDiffTask(Task, ValueDiffMixin):
200
241
  # This is incorrect when there are one side null
201
242
  # https://github.com/dbt-labs/dbt-audit-helper/blob/main/macros/compare_column_values.sql#L20-L23
202
243
  # matched = v['matched']
203
- matched = common - v['mismatched']
244
+ matched = common - v["mismatched"]
204
245
  rate = None if common == 0 else matched / common
205
246
  record = [k, matched, rate]
206
247
  row.append(record)
207
248
 
208
- column_names = ['column', 'matched', 'matched_p']
249
+ column_names = ["column", "matched", "matched_p"]
209
250
  column_types = [agate.Text(), agate.Number(), agate.Number()]
210
251
  table = agate.Table(row, column_names=column_names, column_types=column_types)
211
252
 
@@ -224,9 +265,6 @@ class ValueDiffTask(Task, ValueDiffMixin):
224
265
  model: str = self.params.model
225
266
  columns: List[str] = self.params.columns
226
267
 
227
- self._verify_dbt_packages_deps(dbt_adapter)
228
- self.check_cancel()
229
-
230
268
  self._verify_primary_key(dbt_adapter, primary_key, model)
231
269
  self.check_cancel()
232
270
 
@@ -243,35 +281,34 @@ class ValueDiffTask(Task, ValueDiffMixin):
243
281
 
244
282
 
245
283
  class ValueDiffTaskResultDiffer(TaskResultDiffer):
246
-
247
284
  def _check_result_changed_fn(self, result):
248
285
  is_changed = False
249
- summary = result.get('summary', {})
250
- added = summary.get('added', 0)
251
- removed = summary.get('removed', 0)
252
- changes = {
253
- 'column_changed': []
254
- }
286
+ summary = result.get("summary", {})
287
+ added = summary.get("added", 0)
288
+ removed = summary.get("removed", 0)
289
+ changes = {"column_changed": []}
255
290
 
256
291
  if added > 0:
257
292
  is_changed = True
258
- changes['row_added'] = added
293
+ changes["row_added"] = added
259
294
 
260
295
  if removed > 0:
261
296
  is_changed = True
262
- changes['row_removed'] = removed
297
+ changes["row_removed"] = removed
263
298
 
264
- row_data = result.get('data', {}).get('data', [])
299
+ row_data = result.get("data", {}).get("data", [])
265
300
  for row in row_data:
266
301
  column, matched, matched_p = row
267
302
  if float(matched_p) < 1.0:
268
303
  # if there is any mismatched, we consider it as changed
269
304
  is_changed = True
270
- changes['column_changed'].append({
271
- 'column': column,
272
- 'matched': matched,
273
- 'matched_p': matched_p,
274
- })
305
+ changes["column_changed"].append(
306
+ {
307
+ "column": column,
308
+ "matched": matched,
309
+ "matched_p": matched_p,
310
+ }
311
+ )
275
312
 
276
313
  return changes if is_changed else None
277
314
 
@@ -287,15 +324,19 @@ class ValueDiffDetailResult(DataFrame):
287
324
 
288
325
 
289
326
  class ValueDiffDetailTask(Task, ValueDiffMixin):
290
-
291
327
  def __init__(self, params):
292
328
  super().__init__()
293
329
  self.params = ValueDiffParams(**params)
294
330
  self.connection = None
295
331
  self.legacy_surrogate_key = True
296
332
 
297
- def _query_value_diff(self, dbt_adapter, primary_key: Union[str, List[str]], model: str, columns: List[str] = None):
298
-
333
+ def _query_value_diff(
334
+ self,
335
+ dbt_adapter,
336
+ primary_key: Union[str, List[str]],
337
+ model: str,
338
+ columns: List[str] = None,
339
+ ):
299
340
  composite = True if isinstance(primary_key, List) else False
300
341
 
301
342
  if columns is None or len(columns) == 0:
@@ -312,45 +353,72 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
312
353
  columns.insert(0, primary_key)
313
354
 
314
355
  sql_template = r"""
315
- {% set col_list %}
316
- {%- for col in columns %}
317
- {{ col|trim }}
318
- {%- if not loop.last %},{{ '\n ' }}{%- endif -%}
319
- {%- endfor -%}
320
- {% endset %}
321
-
322
- {% set a_query %}
323
- select {{col_list}} from {{ base_relation }}
324
- {% endset %}
325
-
326
- {% set b_query %}
327
- select {{col_list}} from {{ curr_relation }}
328
- {% endset %}
329
-
330
- {{ audit_helper.compare_queries(
331
- a_query=a_query,
332
- b_query=b_query,
333
- primary_key=__PRIMARY_KEY__,
334
- summarize=False,
335
- ) }} limit {{ limit }}
336
- """
356
+ with a_query as (
357
+ select {{ columns | join(',\n') }} from {{ base_relation }}
358
+ ),
359
+
360
+ b_query as (
361
+ select {{ columns | join(',\n') }} from {{ curr_relation }}
362
+ ),
363
+
364
+ a_intersect_b as (
365
+ select * from a_query
366
+ {{ dbt.intersect() }}
367
+ select * from b_query
368
+ ),
369
+
370
+ a_except_b as (
371
+ select * from a_query
372
+ {{ dbt.except() }}
373
+ select * from b_query
374
+ ),
375
+
376
+ b_except_a as (
377
+ select * from b_query
378
+ {{ dbt.except() }}
379
+ select * from a_query
380
+ ),
381
+
382
+ all_records as (
383
+ select
384
+ *,
385
+ true as in_a,
386
+ true as in_b
387
+ from a_intersect_b
388
+
389
+ union all
390
+
391
+ select
392
+ *,
393
+ true as in_a,
394
+ false as in_b
395
+ from a_except_b
396
+
397
+ union all
398
+
399
+ select
400
+ *,
401
+ false as in_a,
402
+ true as in_b
403
+ from b_except_a
404
+ )
337
405
 
338
- if composite:
339
- if self.legacy_surrogate_key:
340
- new_primary_key = 'dbt_utils.surrogate_key(primary_key)'
341
- else:
342
- new_primary_key = 'dbt_utils.generate_surrogate_key(primary_key)'
343
- else:
344
- new_primary_key = 'primary_key'
345
- sql_template = sql_template.replace('__PRIMARY_KEY__', new_primary_key)
406
+ select * from all_records
407
+ where not (in_a and in_b)
408
+ order by {{ primary_keys | join(',\n') }}, in_a desc, in_b desc
409
+ limit {{ limit }}
410
+ """
346
411
 
347
- sql = dbt_adapter.generate_sql(sql_template, context=dict(
348
- base_relation=dbt_adapter.create_relation(model, base=True),
349
- curr_relation=dbt_adapter.create_relation(model, base=False),
350
- primary_key=primary_key,
351
- columns=columns,
352
- limit=1000,
353
- ))
412
+ sql = dbt_adapter.generate_sql(
413
+ sql_template,
414
+ context=dict(
415
+ base_relation=dbt_adapter.create_relation(model, base=True),
416
+ curr_relation=dbt_adapter.create_relation(model, base=False),
417
+ primary_keys=primary_key if composite else [primary_key],
418
+ columns=columns,
419
+ limit=1000,
420
+ ),
421
+ )
354
422
 
355
423
  _, table = dbt_adapter.execute(sql, fetch=True)
356
424
  self.check_cancel()
@@ -358,8 +426,8 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
358
426
  return DataFrame.from_agate(table)
359
427
 
360
428
  def execute(self):
361
-
362
429
  from recce.adapter.dbt_adapter import DbtAdapter
430
+
363
431
  dbt_adapter: DbtAdapter = default_context().adapter
364
432
 
365
433
  with dbt_adapter.connection_named("value diff"):
@@ -369,9 +437,6 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
369
437
  model: str = self.params.model
370
438
  columns: List[str] = self.params.columns
371
439
 
372
- self._verify_dbt_packages_deps(dbt_adapter)
373
- self.check_cancel()
374
-
375
440
  self._verify_primary_key(dbt_adapter, primary_key, model)
376
441
  self.check_cancel()
377
442
 
@@ -379,6 +444,7 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
379
444
 
380
445
  def cancel(self):
381
446
  from recce.adapter.dbt_adapter import DbtAdapter
447
+
382
448
  if self.connection:
383
449
  adapter: DbtAdapter = default_context().adapter
384
450
  with adapter.connection_named("cancel"):
@@ -386,9 +452,8 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
386
452
 
387
453
 
388
454
  class ValueDiffDetailTaskResultDiffer(TaskResultDiffer):
389
-
390
455
  def _check_result_changed_fn(self, result):
391
- diff_data = result.get('data')
456
+ diff_data = result.get("data")
392
457
  if diff_data is None or len(diff_data) == 0:
393
458
  return None
394
459
 
@@ -397,7 +462,6 @@ class ValueDiffDetailTaskResultDiffer(TaskResultDiffer):
397
462
 
398
463
 
399
464
  class ValueDiffCheckValidator(CheckValidator):
400
-
401
465
  def validate_check(self, check: Check):
402
466
  try:
403
467
  ValueDiffParams(**check.params)
recce/util/__init__.py CHANGED
@@ -1 +1,4 @@
1
1
  from .singleton import SingletonMeta
2
+
3
+ # Explicitly declare exports
4
+ __all__ = ["SingletonMeta"]
@@ -0,0 +1,80 @@
1
+ import click
2
+ from rich.console import Console
3
+
4
+ from recce import event
5
+ from recce.event import get_recce_api_token, update_recce_api_token
6
+ from recce.exceptions import RecceConfigException
7
+ from recce.util.recce_cloud import (
8
+ RECCE_CLOUD_BASE_URL,
9
+ RecceCloud,
10
+ )
11
+
12
+ console = Console()
13
+
14
+
15
+ def show_invalid_api_token_message():
16
+ """
17
+ Show the message when the API token is invalid.
18
+ """
19
+ console.print("[[red]Error[/red]] Invalid Recce Cloud API token.")
20
+ console.print("Please associate with your Recce Cloud account by the following command 'recce connect-to-cloud'.")
21
+ console.print(
22
+ "For more information, please visit: https://docs.reccehq.com/recce-cloud/share-recce-session-securely/#configure-recce-cloud-association-manually"
23
+ )
24
+
25
+
26
+ def prepare_api_token(
27
+ interaction=False,
28
+ **kwargs,
29
+ ):
30
+ """
31
+ Prepare the API token for the request.
32
+ """
33
+ # Verify the API token for Recce Cloud Share Link
34
+ api_token = get_recce_api_token()
35
+ new_api_token = kwargs.get("api_token")
36
+ if new_api_token is not None and new_api_token.startswith("rct-"):
37
+ # Task Token
38
+ valid = RecceCloud(new_api_token).verify_token()
39
+ if not valid:
40
+ raise RecceConfigException("Invalid Recce Cloud Task token")
41
+ api_token = new_api_token
42
+ elif api_token != new_api_token and new_api_token is not None:
43
+ # Handle the API token provided by option `--api-token`
44
+ valid = RecceCloud(new_api_token).verify_token()
45
+ if not valid:
46
+ raise RecceConfigException("Invalid Recce Cloud API token")
47
+ event.log_connected_to_cloud()
48
+ api_token = new_api_token
49
+ update_recce_api_token(api_token)
50
+ console.print(
51
+ "[[green]Success[/green]] User profile has been updated to include the Recce Cloud API Token. "
52
+ "You no longer need to append --api-token to the recce command"
53
+ )
54
+ elif api_token:
55
+ # Verify the API token from the user profile
56
+ valid = RecceCloud(api_token).verify_token()
57
+ if not valid:
58
+ console.print("[[yellow]Warning[/yellow]] Invalid Recce Cloud API token. Skipping the share link.")
59
+ api_token = None
60
+ if valid:
61
+ event.log_connected_to_cloud()
62
+ else:
63
+ # No api_token provided
64
+ if interaction:
65
+ console.print(
66
+ "An API token is required for this feature. This can be obtained in your user account settings.\n"
67
+ f"{RECCE_CLOUD_BASE_URL}/settings#tokens\n"
68
+ "Your API token can be added to '~/.recce/profile.yml' for more convenient sharing."
69
+ )
70
+ api_token = click.prompt("Your Recce API token", type=str, hide_input=True, show_default=False)
71
+ valid = RecceCloud(api_token).verify_token()
72
+ if not valid:
73
+ raise RecceConfigException("Invalid Recce Cloud API token")
74
+ update_recce_api_token(api_token)
75
+ console.print(
76
+ "[[green]Success[/green]] User profile has been updated to include the Recce Cloud API Token. "
77
+ "You no longer need to append --api-token to the recce command"
78
+ )
79
+
80
+ return api_token