recce-nightly 0.62.0.20250417__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (245) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +845 -461
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +59 -42
  8. recce/apis/check_events_api.py +353 -0
  9. recce/apis/check_func.py +41 -35
  10. recce/apis/run_api.py +25 -19
  11. recce/apis/run_func.py +64 -25
  12. recce/artifact.py +119 -51
  13. recce/cli.py +1301 -324
  14. recce/config.py +43 -34
  15. recce/connect_to_cloud.py +138 -0
  16. recce/core.py +55 -47
  17. recce/data/404/index.html +2 -0
  18. recce/data/404.html +2 -1
  19. recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
  20. recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
  21. recce/data/__next.__PAGE__.txt +6 -0
  22. recce/data/__next._full.txt +32 -0
  23. recce/data/__next._head.txt +8 -0
  24. recce/data/__next._index.txt +14 -0
  25. recce/data/__next._tree.txt +8 -0
  26. recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
  27. recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
  28. recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
  29. recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
  30. recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
  31. recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
  32. recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
  33. recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
  34. recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
  35. recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
  36. recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
  37. recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
  38. recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
  39. recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
  40. recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
  41. recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
  42. recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
  43. recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
  44. recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
  45. recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
  46. recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
  47. recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
  48. recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
  49. recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
  50. recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
  51. recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
  52. recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
  53. recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
  54. recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
  55. recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
  56. recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
  57. recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
  58. recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
  59. recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
  60. recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
  61. recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
  62. recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
  63. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  64. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  65. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  66. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  67. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  68. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  69. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  70. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  71. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  72. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  73. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  74. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
  75. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
  76. recce/data/_not-found/__next._full.txt +24 -0
  77. recce/data/_not-found/__next._head.txt +8 -0
  78. recce/data/_not-found/__next._index.txt +13 -0
  79. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  80. recce/data/_not-found/__next._not-found.txt +4 -0
  81. recce/data/_not-found/__next._tree.txt +6 -0
  82. recce/data/_not-found/index.html +2 -0
  83. recce/data/_not-found/index.txt +24 -0
  84. recce/data/auth_callback.html +68 -0
  85. recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
  86. recce/data/checks/__next._full.txt +39 -0
  87. recce/data/checks/__next._head.txt +8 -0
  88. recce/data/checks/__next._index.txt +14 -0
  89. recce/data/checks/__next._tree.txt +8 -0
  90. recce/data/checks/__next.checks.__PAGE__.txt +10 -0
  91. recce/data/checks/__next.checks.txt +4 -0
  92. recce/data/checks/index.html +2 -0
  93. recce/data/checks/index.txt +39 -0
  94. recce/data/imgs/reload-image.svg +4 -0
  95. recce/data/index.html +2 -27
  96. recce/data/index.txt +32 -7
  97. recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
  98. recce/data/lineage/__next._full.txt +39 -0
  99. recce/data/lineage/__next._head.txt +8 -0
  100. recce/data/lineage/__next._index.txt +14 -0
  101. recce/data/lineage/__next._tree.txt +8 -0
  102. recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
  103. recce/data/lineage/__next.lineage.txt +4 -0
  104. recce/data/lineage/index.html +2 -0
  105. recce/data/lineage/index.txt +39 -0
  106. recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
  107. recce/data/query/__next._full.txt +37 -0
  108. recce/data/query/__next._head.txt +8 -0
  109. recce/data/query/__next._index.txt +14 -0
  110. recce/data/query/__next._tree.txt +8 -0
  111. recce/data/query/__next.query.__PAGE__.txt +9 -0
  112. recce/data/query/__next.query.txt +4 -0
  113. recce/data/query/index.html +2 -0
  114. recce/data/query/index.txt +37 -0
  115. recce/diff.py +6 -12
  116. recce/event/CONFIG.bak +1 -0
  117. recce/event/__init__.py +86 -74
  118. recce/event/collector.py +33 -22
  119. recce/event/track.py +49 -27
  120. recce/exceptions.py +1 -1
  121. recce/git.py +7 -7
  122. recce/github.py +57 -53
  123. recce/mcp_server.py +725 -0
  124. recce/models/__init__.py +4 -1
  125. recce/models/check.py +438 -21
  126. recce/models/run.py +1 -0
  127. recce/models/types.py +134 -28
  128. recce/pull_request.py +27 -25
  129. recce/run.py +179 -122
  130. recce/server.py +394 -104
  131. recce/state/__init__.py +31 -0
  132. recce/state/cloud.py +644 -0
  133. recce/state/const.py +26 -0
  134. recce/state/local.py +56 -0
  135. recce/state/state.py +119 -0
  136. recce/state/state_loader.py +174 -0
  137. recce/summary.py +196 -149
  138. recce/tasks/__init__.py +19 -3
  139. recce/tasks/core.py +11 -13
  140. recce/tasks/dataframe.py +82 -18
  141. recce/tasks/histogram.py +69 -34
  142. recce/tasks/lineage.py +2 -2
  143. recce/tasks/profile.py +152 -86
  144. recce/tasks/query.py +180 -89
  145. recce/tasks/rowcount.py +37 -31
  146. recce/tasks/schema.py +18 -15
  147. recce/tasks/top_k.py +35 -35
  148. recce/tasks/utils.py +147 -0
  149. recce/tasks/valuediff.py +247 -155
  150. recce/util/__init__.py +3 -0
  151. recce/util/api_token.py +80 -0
  152. recce/util/breaking.py +105 -100
  153. recce/util/cll.py +274 -219
  154. recce/util/cloud/__init__.py +15 -0
  155. recce/util/cloud/base.py +115 -0
  156. recce/util/cloud/check_events.py +190 -0
  157. recce/util/cloud/checks.py +242 -0
  158. recce/util/io.py +22 -17
  159. recce/util/lineage.py +65 -16
  160. recce/util/logger.py +1 -1
  161. recce/util/onboarding_state.py +45 -0
  162. recce/util/perf_tracking.py +85 -0
  163. recce/util/recce_cloud.py +347 -72
  164. recce/util/singleton.py +4 -4
  165. recce/util/startup_perf.py +121 -0
  166. recce/yaml/__init__.py +7 -10
  167. recce_nightly-1.30.0.20251221.dist-info/METADATA +195 -0
  168. recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
  169. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
  170. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  171. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  172. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  173. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  174. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  175. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  176. recce/data/_next/static/chunks/500-e51c92a025a51234.js +0 -65
  177. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  178. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  179. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  180. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  181. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  182. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  183. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  184. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  185. recce/data/_next/static/chunks/app/page-9adc25782272ed2e.js +0 -1
  186. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  187. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  188. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  189. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  190. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  191. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  192. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  193. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  194. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  195. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  196. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  197. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  198. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  199. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  200. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  202. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  203. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  205. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  206. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  207. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  208. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  209. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  210. recce/data/_next/static/qiyFlux77VkhxiceAJe_F/_buildManifest.js +0 -1
  211. recce/state.py +0 -753
  212. recce_nightly-0.62.0.20250417.dist-info/METADATA +0 -311
  213. recce_nightly-0.62.0.20250417.dist-info/RECORD +0 -139
  214. recce_nightly-0.62.0.20250417.dist-info/top_level.txt +0 -2
  215. tests/__init__.py +0 -0
  216. tests/adapter/__init__.py +0 -0
  217. tests/adapter/dbt_adapter/__init__.py +0 -0
  218. tests/adapter/dbt_adapter/conftest.py +0 -13
  219. tests/adapter/dbt_adapter/dbt_test_helper.py +0 -283
  220. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -40
  221. tests/adapter/dbt_adapter/test_dbt_cll.py +0 -102
  222. tests/adapter/dbt_adapter/test_selector.py +0 -177
  223. tests/tasks/__init__.py +0 -0
  224. tests/tasks/conftest.py +0 -4
  225. tests/tasks/test_histogram.py +0 -137
  226. tests/tasks/test_lineage.py +0 -42
  227. tests/tasks/test_preset_checks.py +0 -50
  228. tests/tasks/test_profile.py +0 -73
  229. tests/tasks/test_query.py +0 -151
  230. tests/tasks/test_row_count.py +0 -116
  231. tests/tasks/test_schema.py +0 -99
  232. tests/tasks/test_top_k.py +0 -73
  233. tests/tasks/test_valuediff.py +0 -74
  234. tests/test_cli.py +0 -122
  235. tests/test_config.py +0 -45
  236. tests/test_core.py +0 -27
  237. tests/test_dbt.py +0 -36
  238. tests/test_pull_request.py +0 -130
  239. tests/test_server.py +0 -98
  240. tests/test_state.py +0 -123
  241. tests/test_summary.py +0 -57
  242. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  243. /recce/data/_next/static/{qiyFlux77VkhxiceAJe_F → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
  244. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
  245. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
recce/tasks/profile.py CHANGED
@@ -1,13 +1,141 @@
1
- import textwrap
2
1
  from typing import List
3
2
 
4
3
  from pydantic import BaseModel
5
4
 
6
- from .core import Task, TaskResultDiffer, CheckValidator
7
- from .dataframe import DataFrame
8
5
  from ..core import default_context
9
6
  from ..exceptions import RecceException
10
7
  from ..models import Check
8
+ from .core import CheckValidator, Task, TaskResultDiffer
9
+ from .dataframe import DataFrame
10
+
11
+ PROFILE_COLUMN_JINJA_TEMPLATE = r"""
12
+ {# Conditions -------------------------------------------- #}
13
+ {%- set is_struct = column_type.startswith('struct') -%}
14
+ {%- set is_numeric =
15
+ column_type.startswith('int') or
16
+ column_type.startswith('float') or
17
+ 'numeric' in column_type or
18
+ 'number' in column_type or
19
+ 'double' in column_type or
20
+ 'bigint' in column_type
21
+ -%}
22
+ {%- set is_date_or_time =
23
+ column_type.startswith('date') or
24
+ column_type.startswith('timestamp')
25
+ -%}
26
+ {%- set is_logical = column_type.startswith('bool') -%}
27
+
28
+ {%- if db_type == 'sqlserver' -%}
29
+ {%- set is_numeric = column_type in [
30
+ "bigint", "numeric", "smallint", "decimal", "int",
31
+ "tinyint", "money", "float", "real"
32
+ ]-%}
33
+ {%- elif db_type == 'athena' -%}
34
+ {%- set is_numeric =
35
+ "int" in column_type or
36
+ "float" in column_type or
37
+ "decimal" in column_type or
38
+ "double" in column_type
39
+ -%}
40
+ {%- endif -%}
41
+
42
+ {# General Agg ------------------------------------------- #}
43
+ {%- set agg_row_count = 'cast(count(*) as ' ~ dbt.type_bigint() ~ ')' -%}
44
+ {%- set agg_not_null_proportion =
45
+ 'sum(case when ' ~ adapter.quote(column_name) ~ ' is null '
46
+ ~ 'then 0 '
47
+ ~ 'else 1 end) / '
48
+ ~ 'cast(count(*) as ' ~ dbt.type_numeric() ~ ')'
49
+ -%}
50
+ {%- set agg_distinct_proportion =
51
+ 'count(distinct ' ~ adapter.quote(column_name) ~') / '
52
+ ~ 'cast(count(*) as ' ~ dbt.type_numeric() ~ ')'
53
+ -%}
54
+ {%- set agg_distinct_count = 'count(distinct ' ~ adapter.quote(column_name) ~ ')' -%}
55
+ {%- set agg_is_unique = 'count(distinct ' ~ adapter.quote(column_name) ~ ') = count(*)' -%}
56
+ {%- set agg_min = 'cast(null as ' ~ dbt.type_string() ~ ')' -%}
57
+ {%- set agg_max = 'cast(null as ' ~ dbt.type_string() ~ ')' -%}
58
+ {%- set agg_avg = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
59
+ {%- set agg_median = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
60
+
61
+
62
+ {%- if is_struct -%}
63
+ {%- set agg_distinct_proportion = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
64
+ {%- set agg_distinct_count = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
65
+ {%- set agg_is_unique = 'null' -%}
66
+ {%- endif -%}
67
+
68
+
69
+ {%- if (is_numeric or is_date_or_time) and (not is_struct) -%}
70
+ {%- set agg_min =
71
+ 'cast(min(' ~ adapter.quote(column_name) ~ ') as ' ~ dbt.type_string() ~ ')'
72
+ -%}
73
+ {%- set agg_max =
74
+ 'cast(max(' ~ adapter.quote(column_name) ~ ') as ' ~ dbt.type_string() ~ ')'
75
+ -%}
76
+ {%- endif -%}
77
+
78
+
79
+ {%- if is_numeric and not is_struct -%}
80
+ {%- set agg_avg = 'avg(' ~ adapter.quote(column_name) ~ ')' -%}
81
+
82
+ {%- if db_type == 'bigquery' -%}
83
+ {%- set agg_median = 'approx_quantiles(' ~ adapter.quote(column_name) ~ ', 100)[offset(50)]' -%}
84
+ {%- elif db_type == 'postgres' -%}
85
+ {%- set agg_median = 'percentile_cont(0.5) within group (order by ' ~ adapter.quote(column_name) ~ ')' -%}
86
+ {%- elif db_type == 'redshift' -%}
87
+ {%- set agg_median =
88
+ '(select percentile_cont(0.5) within group (order by '
89
+ ~ adapter.quote(column_name) ~ ') from ' ~ relation ~ ')' -%}
90
+ {%- elif db_type == 'athena' -%}
91
+ {%- set agg_median = 'approx_percentile( ' ~ adapter.quote(column_name) ~ ', 0.5)' -%}
92
+ {%- elif db_type == 'sqlserver' -%}
93
+ {%- set agg_median = 'percentile_cont(' ~ adapter.quote(column_name) ~ ', 0.5) over ()' -%}
94
+ {%- else -%}
95
+ {%- set agg_median = 'median(' ~ adapter.quote(column_name) ~ ')' -%}
96
+ {%- endif -%}
97
+ {%- elif is_logical -%}
98
+ {%- set agg_avg = 'avg(case when ' ~ adapter.quote(column_name) ~ ' then 1 else 0 end)' -%}
99
+ {%- endif -%}
100
+
101
+
102
+ {# Overwrite Agg ----------------------------------------- #}
103
+
104
+ {# DRC-663: Support bigquery array type }
105
+ {%- set is_array = column_type.startswith('array') -%}
106
+ {%- if db_type == 'bigquery' and is_array -%}
107
+ {%- set agg_distinct_proportion = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
108
+ {%- set agg_distinct_count = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
109
+ {%- set agg_is_unique = 'null' -%}
110
+ {%- set agg_min =
111
+ 'cast(min(array_length(' ~ adapter.quote(column_name) ~ ')) as ' ~ dbt.type_string() ~ ')'
112
+ -%}
113
+ {%- set agg_max =
114
+ 'cast(max(array_length(' ~ adapter.quote(column_name) ~ ')) as ' ~ dbt.type_string() ~ ')'
115
+ -%}
116
+ {%- set agg_avg = 'avg(array_length(' ~ adapter.quote(column_name) ~ '))' -%}
117
+ {%- set agg_median =
118
+ 'approx_quantiles(array_length(' ~ adapter.quote(column_name) ~ '), 100)[offset(50)]'
119
+ -%}
120
+ {%- endif -%}
121
+
122
+
123
+ {# Main Query -------------------------------------------- #}
124
+
125
+ select
126
+ '{{ column_name }}' as column_name,
127
+ nullif('{{ column_type }}', '') as data_type,
128
+ {{ agg_row_count }} as row_count,
129
+ {{ agg_not_null_proportion }} as not_null_proportion,
130
+ {{ agg_distinct_proportion }} as distinct_proportion,
131
+ {{ agg_distinct_count }} as distinct_count,
132
+ {{ agg_is_unique }} as is_unique,
133
+ {{ agg_min }} as min,
134
+ {{ agg_max }} as max,
135
+ {{ agg_avg }} as avg,
136
+ {{ agg_median }} as median
137
+ from {{ relation }}
138
+ """
11
139
 
12
140
 
13
141
  class ProfileParams(BaseModel):
@@ -33,14 +161,14 @@ class ProfileDiffTask(Task):
33
161
 
34
162
  def execute(self):
35
163
  import agate
164
+
36
165
  from recce.adapter.dbt_adapter import DbtAdapter, merge_tables
166
+
37
167
  dbt_adapter: DbtAdapter = default_context().adapter
38
168
 
39
169
  model: str = self.params.model
40
170
  selected_columns: List[str] = self.params.columns
41
171
 
42
- self._verify_dbt_profiler(dbt_adapter)
43
-
44
172
  with dbt_adapter.connection_named("query"):
45
173
  self.connection = dbt_adapter.get_thread_connection()
46
174
 
@@ -58,7 +186,7 @@ class ProfileDiffTask(Task):
58
186
  tables: List[agate.Table] = []
59
187
 
60
188
  for column in base_columns:
61
- self.update_progress(message=f'[Base] Profile column: {column.name}', percentage=completed / total)
189
+ self.update_progress(message=f"[Base] Profile column: {column.name}", percentage=completed / total)
62
190
  relation = dbt_adapter.create_relation(model, base=True)
63
191
  response, table = self._profile_column(dbt_adapter, relation, column)
64
192
  tables.append(table)
@@ -68,7 +196,7 @@ class ProfileDiffTask(Task):
68
196
 
69
197
  tables: List[agate.Table] = []
70
198
  for column in curr_columns:
71
- self.update_progress(message=f'[Current] Profile column: {column.column}', percentage=completed / total)
199
+ self.update_progress(message=f"[Current] Profile column: {column.column}", percentage=completed / total)
72
200
  relation = dbt_adapter.create_relation(model, base=False)
73
201
  response, table = self._profile_column(dbt_adapter, relation, column)
74
202
  tables.append(table)
@@ -76,87 +204,23 @@ class ProfileDiffTask(Task):
76
204
  self.check_cancel()
77
205
  current = DataFrame.from_agate(merge_tables(tables))
78
206
 
79
- return ProfileDiffResult(base=base, current=current)
207
+ if len(base.columns) == 0 and len(current.columns) != 0:
208
+ base.columns = current.columns
209
+ elif len(base.columns) != 0 and len(current.columns) == 0:
210
+ current.columns = base.columns
80
211
 
81
- def _verify_dbt_profiler(self, dbt_adapter):
82
- for macro_name, macro in dbt_adapter.manifest.macros.items():
83
- if macro.package_name == 'dbt_profiler':
84
- break
85
- else:
86
- raise RecceException(
87
- r"Package 'dbt_profiler' not found. Please refer to the link to install: https://hub.getdbt.com/data-mie/dbt_profiler/")
212
+ return ProfileDiffResult(base=base, current=current)
88
213
 
89
214
  def _profile_column(self, dbt_adapter, relation, column):
90
- sql_template = textwrap.dedent(r"""
91
- select
92
- '{{column_name}}' as column_name,
93
- nullif('{{column_type}}', '') as data_type,
94
- {{ dbt_profiler.measure_row_count(column_name, column_type) }} as row_count,
95
- {{ dbt_profiler.measure_not_null_proportion(column_name, column_type) }} as not_null_proportion,
96
- {{ dbt_profiler.measure_distinct_proportion(column_name, column_type) }} as distinct_proportion,
97
- {{ dbt_profiler.measure_distinct_count(column_name, column_type) }} as distinct_count,
98
- {{ dbt_profiler.measure_is_unique(column_name, column_type) }} as is_unique,
99
- {{ dbt_profiler.measure_min(column_name, column_type) }} as min,
100
- {{ dbt_profiler.measure_max(column_name, column_type) }} as max,
101
- {{ dbt_profiler.measure_avg(column_name, column_type) }} as avg,
102
- {{ dbt_profiler.measure_median(column_name, column_type) }} as median
103
- from
104
- {{ relation }}
105
- """)
106
215
  column_name = column.name
107
216
  column_type = column.data_type.lower()
108
- db_type = dbt_adapter.adapter.type()
109
- if db_type == 'bigquery' and column_type.startswith('array'):
110
- # DRC-663: Support bigquery array type
111
- sql_template = textwrap.dedent(r"""
112
- select
113
- '{{column_name}}' as column_name,
114
- nullif('{{column_type}}', '') as data_type,
115
- {{ dbt_profiler.measure_row_count(column_name, column_type) }} as row_count,
116
- {{ dbt_profiler.measure_not_null_proportion(column_name, column_type) }} as not_null_proportion,
117
- cast(null as {{ dbt.type_numeric() }}) as distinct_proportion,
118
- cast(null as {{ dbt.type_numeric() }}) as distinct_count,
119
- null as is_unique,
120
- cast(min(ARRAY_LENGTH({{ adapter.quote(column_name) }})) as {{ dbt_profiler.type_string() }}) as min,
121
- cast(max(ARRAY_LENGTH({{ adapter.quote(column_name) }})) as {{ dbt_profiler.type_string() }}) as max,
122
- avg(ARRAY_LENGTH({{ adapter.quote(column_name) }})) as avg,
123
- APPROX_QUANTILES(ARRAY_LENGTH({{ adapter.quote(column_name) }}), 100)[OFFSET(50)] as median,
124
- from
125
- {{ relation }}
126
- """)
127
- elif db_type == 'redshift':
128
- # DRC-1149: Support redshift median calculation
129
- # https://github.com/data-mie/dbt-profiler/pull/89
130
- #
131
- # Since dbt-profiler 0.8.2, there is the third parameter for measure_median
132
- # For sake of compatibility, we use the new way to call the macro only for redshift
133
- sql_template = textwrap.dedent(r"""
134
- with source_data as (
135
- select
136
- *
137
- from {{ relation }}
138
- )
139
- select
140
- '{{column_name}}' as column_name,
141
- nullif('{{column_type}}', '') as data_type,
142
- {{ dbt_profiler.measure_row_count(column_name, column_type) }} as row_count,
143
- {{ dbt_profiler.measure_not_null_proportion(column_name, column_type) }} as not_null_proportion,
144
- {{ dbt_profiler.measure_distinct_proportion(column_name, column_type) }} as distinct_proportion,
145
- {{ dbt_profiler.measure_distinct_count(column_name, column_type) }} as distinct_count,
146
- {{ dbt_profiler.measure_is_unique(column_name, column_type) }} as is_unique,
147
- {{ dbt_profiler.measure_min(column_name, column_type) }} as min,
148
- {{ dbt_profiler.measure_max(column_name, column_type) }} as max,
149
- {{ dbt_profiler.measure_avg(column_name, column_type) }} as avg,
150
- ({{ dbt_profiler.measure_median(column_name, column_type, 'source_data') }}) as median
151
- from
152
- source_data
153
- """)
217
+ db_type = dbt_adapter.adapter.type().lower()
154
218
 
155
219
  try:
156
220
  sql = dbt_adapter.generate_sql(
157
- sql_template,
221
+ PROFILE_COLUMN_JINJA_TEMPLATE,
158
222
  base=False, # always false because we use the macro in current manifest
159
- context=dict(relation=relation, column_name=column_name, column_type=column_type)
223
+ context=dict(relation=relation, column_name=column_name, column_type=column_type, db_type=db_type),
160
224
  )
161
225
  except Exception as e:
162
226
  raise RecceException(f"Failed to generate SQL for profiling column: {column_name}") from e
@@ -165,14 +229,15 @@ class ProfileDiffTask(Task):
165
229
  return dbt_adapter.execute(sql, fetch=True)
166
230
  except Exception as e:
167
231
  from recce.adapter.dbt_adapter import dbt_version
168
- if dbt_version < 'v1.8':
232
+
233
+ if dbt_version < "v1.8":
169
234
  from dbt.exceptions import DbtDatabaseError
170
235
  else:
171
236
  from dbt_common.exceptions import DbtDatabaseError
172
237
  if isinstance(e, DbtDatabaseError):
173
- if str(e).find('100051') >= 0:
238
+ if str(e).find("100051") >= 0:
174
239
  # Snowflake error '100051 (22012): Division by zero"'
175
- e = RecceException('No profile diff result due to the model is empty.', False)
240
+ e = RecceException("No profile diff result due to the model is empty.", False)
176
241
  raise e
177
242
 
178
243
  def cancel(self):
@@ -180,6 +245,7 @@ class ProfileDiffTask(Task):
180
245
 
181
246
  if self.connection:
182
247
  from recce.adapter.dbt_adapter import DbtAdapter
248
+
183
249
  dbt_adapter: DbtAdapter = default_context().adapter
184
250
  with dbt_adapter.connection_named("cancel"):
185
251
  dbt_adapter.cancel(self.connection)
@@ -187,7 +253,7 @@ class ProfileDiffTask(Task):
187
253
 
188
254
  class ProfileDiffResultDiffer(TaskResultDiffer):
189
255
  def _check_result_changed_fn(self, result):
190
- return self.diff(result['base'], result['current'])
256
+ return self.diff(result["base"], result["current"])
191
257
 
192
258
 
193
259
  class ProfileCheckValidator(CheckValidator):
@@ -202,14 +268,14 @@ class ProfileCheckValidator(CheckValidator):
202
268
  class ProfileTask(ProfileDiffTask):
203
269
  def execute(self):
204
270
  import agate
271
+
205
272
  from recce.adapter.dbt_adapter import DbtAdapter, merge_tables
273
+
206
274
  dbt_adapter: DbtAdapter = default_context().adapter
207
275
 
208
276
  model: str = self.params.model
209
277
  selected_columns: List[str] = self.params.columns
210
278
 
211
- self._verify_dbt_profiler(dbt_adapter)
212
-
213
279
  with dbt_adapter.connection_named("query"):
214
280
  self.connection = dbt_adapter.get_thread_connection()
215
281
  curr_columns = [column for column in dbt_adapter.get_columns(model, base=False)]
@@ -222,7 +288,7 @@ class ProfileTask(ProfileDiffTask):
222
288
 
223
289
  tables: List[agate.Table] = []
224
290
  for column in curr_columns:
225
- self.update_progress(message=f'[Current] Profile column: {column.column}', percentage=completed / total)
291
+ self.update_progress(message=f"[Current] Profile column: {column.column}", percentage=completed / total)
226
292
  relation = dbt_adapter.create_relation(model, base=False)
227
293
  response, table = self._profile_column(dbt_adapter, relation, column)
228
294
  tables.append(table)