recce-nightly 0.62.0.20250417__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +27 -22
- recce/adapter/base.py +11 -14
- recce/adapter/dbt_adapter/__init__.py +845 -461
- recce/adapter/dbt_adapter/dbt_version.py +3 -0
- recce/adapter/sqlmesh_adapter.py +24 -35
- recce/apis/check_api.py +59 -42
- recce/apis/check_events_api.py +353 -0
- recce/apis/check_func.py +41 -35
- recce/apis/run_api.py +25 -19
- recce/apis/run_func.py +64 -25
- recce/artifact.py +119 -51
- recce/cli.py +1301 -324
- recce/config.py +43 -34
- recce/connect_to_cloud.py +138 -0
- recce/core.py +55 -47
- recce/data/404/index.html +2 -0
- recce/data/404.html +2 -1
- recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
- recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
- recce/data/__next.__PAGE__.txt +6 -0
- recce/data/__next._full.txt +32 -0
- recce/data/__next._head.txt +8 -0
- recce/data/__next._index.txt +14 -0
- recce/data/__next._tree.txt +8 -0
- recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
- recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
- recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
- recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
- recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
- recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
- recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
- recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
- recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
- recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
- recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
- recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
- recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
- recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
- recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
- recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
- recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
- recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
- recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
- recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
- recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
- recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
- recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
- recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
- recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
- recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
- recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
- recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
- recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
- recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
- recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
- recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
- recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
- recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
- recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
- recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
- recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
- recce/data/_not-found/__next._full.txt +24 -0
- recce/data/_not-found/__next._head.txt +8 -0
- recce/data/_not-found/__next._index.txt +13 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +6 -0
- recce/data/_not-found/index.html +2 -0
- recce/data/_not-found/index.txt +24 -0
- recce/data/auth_callback.html +68 -0
- recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/checks/__next._full.txt +39 -0
- recce/data/checks/__next._head.txt +8 -0
- recce/data/checks/__next._index.txt +14 -0
- recce/data/checks/__next._tree.txt +8 -0
- recce/data/checks/__next.checks.__PAGE__.txt +10 -0
- recce/data/checks/__next.checks.txt +4 -0
- recce/data/checks/index.html +2 -0
- recce/data/checks/index.txt +39 -0
- recce/data/imgs/reload-image.svg +4 -0
- recce/data/index.html +2 -27
- recce/data/index.txt +32 -7
- recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/lineage/__next._full.txt +39 -0
- recce/data/lineage/__next._head.txt +8 -0
- recce/data/lineage/__next._index.txt +14 -0
- recce/data/lineage/__next._tree.txt +8 -0
- recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
- recce/data/lineage/__next.lineage.txt +4 -0
- recce/data/lineage/index.html +2 -0
- recce/data/lineage/index.txt +39 -0
- recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/query/__next._full.txt +37 -0
- recce/data/query/__next._head.txt +8 -0
- recce/data/query/__next._index.txt +14 -0
- recce/data/query/__next._tree.txt +8 -0
- recce/data/query/__next.query.__PAGE__.txt +9 -0
- recce/data/query/__next.query.txt +4 -0
- recce/data/query/index.html +2 -0
- recce/data/query/index.txt +37 -0
- recce/diff.py +6 -12
- recce/event/CONFIG.bak +1 -0
- recce/event/__init__.py +86 -74
- recce/event/collector.py +33 -22
- recce/event/track.py +49 -27
- recce/exceptions.py +1 -1
- recce/git.py +7 -7
- recce/github.py +57 -53
- recce/mcp_server.py +725 -0
- recce/models/__init__.py +4 -1
- recce/models/check.py +438 -21
- recce/models/run.py +1 -0
- recce/models/types.py +134 -28
- recce/pull_request.py +27 -25
- recce/run.py +179 -122
- recce/server.py +394 -104
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +644 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +196 -149
- recce/tasks/__init__.py +19 -3
- recce/tasks/core.py +11 -13
- recce/tasks/dataframe.py +82 -18
- recce/tasks/histogram.py +69 -34
- recce/tasks/lineage.py +2 -2
- recce/tasks/profile.py +152 -86
- recce/tasks/query.py +180 -89
- recce/tasks/rowcount.py +37 -31
- recce/tasks/schema.py +18 -15
- recce/tasks/top_k.py +35 -35
- recce/tasks/utils.py +147 -0
- recce/tasks/valuediff.py +247 -155
- recce/util/__init__.py +3 -0
- recce/util/api_token.py +80 -0
- recce/util/breaking.py +105 -100
- recce/util/cll.py +274 -219
- recce/util/cloud/__init__.py +15 -0
- recce/util/cloud/base.py +115 -0
- recce/util/cloud/check_events.py +190 -0
- recce/util/cloud/checks.py +242 -0
- recce/util/io.py +22 -17
- recce/util/lineage.py +65 -16
- recce/util/logger.py +1 -1
- recce/util/onboarding_state.py +45 -0
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +347 -72
- recce/util/singleton.py +4 -4
- recce/util/startup_perf.py +121 -0
- recce/yaml/__init__.py +7 -10
- recce_nightly-1.30.0.20251221.dist-info/METADATA +195 -0
- recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
- {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
- recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
- recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
- recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
- recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
- recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
- recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
- recce/data/_next/static/chunks/500-e51c92a025a51234.js +0 -65
- recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
- recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
- recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
- recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
- recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
- recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
- recce/data/_next/static/chunks/app/page-9adc25782272ed2e.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
- recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
- recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
- recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
- recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
- recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
- recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
- recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
- recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
- recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
- recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/data/_next/static/qiyFlux77VkhxiceAJe_F/_buildManifest.js +0 -1
- recce/state.py +0 -753
- recce_nightly-0.62.0.20250417.dist-info/METADATA +0 -311
- recce_nightly-0.62.0.20250417.dist-info/RECORD +0 -139
- recce_nightly-0.62.0.20250417.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/conftest.py +0 -13
- tests/adapter/dbt_adapter/dbt_test_helper.py +0 -283
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -40
- tests/adapter/dbt_adapter/test_dbt_cll.py +0 -102
- tests/adapter/dbt_adapter/test_selector.py +0 -177
- tests/tasks/__init__.py +0 -0
- tests/tasks/conftest.py +0 -4
- tests/tasks/test_histogram.py +0 -137
- tests/tasks/test_lineage.py +0 -42
- tests/tasks/test_preset_checks.py +0 -50
- tests/tasks/test_profile.py +0 -73
- tests/tasks/test_query.py +0 -151
- tests/tasks/test_row_count.py +0 -116
- tests/tasks/test_schema.py +0 -99
- tests/tasks/test_top_k.py +0 -73
- tests/tasks/test_valuediff.py +0 -74
- tests/test_cli.py +0 -122
- tests/test_config.py +0 -45
- tests/test_core.py +0 -27
- tests/test_dbt.py +0 -36
- tests/test_pull_request.py +0 -130
- tests/test_server.py +0 -98
- tests/test_state.py +0 -123
- tests/test_summary.py +0 -57
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- /recce/data/_next/static/{qiyFlux77VkhxiceAJe_F → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
- {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
- {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
recce/tasks/profile.py
CHANGED
|
@@ -1,13 +1,141 @@
|
|
|
1
|
-
import textwrap
|
|
2
1
|
from typing import List
|
|
3
2
|
|
|
4
3
|
from pydantic import BaseModel
|
|
5
4
|
|
|
6
|
-
from .core import Task, TaskResultDiffer, CheckValidator
|
|
7
|
-
from .dataframe import DataFrame
|
|
8
5
|
from ..core import default_context
|
|
9
6
|
from ..exceptions import RecceException
|
|
10
7
|
from ..models import Check
|
|
8
|
+
from .core import CheckValidator, Task, TaskResultDiffer
|
|
9
|
+
from .dataframe import DataFrame
|
|
10
|
+
|
|
11
|
+
PROFILE_COLUMN_JINJA_TEMPLATE = r"""
|
|
12
|
+
{# Conditions -------------------------------------------- #}
|
|
13
|
+
{%- set is_struct = column_type.startswith('struct') -%}
|
|
14
|
+
{%- set is_numeric =
|
|
15
|
+
column_type.startswith('int') or
|
|
16
|
+
column_type.startswith('float') or
|
|
17
|
+
'numeric' in column_type or
|
|
18
|
+
'number' in column_type or
|
|
19
|
+
'double' in column_type or
|
|
20
|
+
'bigint' in column_type
|
|
21
|
+
-%}
|
|
22
|
+
{%- set is_date_or_time =
|
|
23
|
+
column_type.startswith('date') or
|
|
24
|
+
column_type.startswith('timestamp')
|
|
25
|
+
-%}
|
|
26
|
+
{%- set is_logical = column_type.startswith('bool') -%}
|
|
27
|
+
|
|
28
|
+
{%- if db_type == 'sqlserver' -%}
|
|
29
|
+
{%- set is_numeric = column_type in [
|
|
30
|
+
"bigint", "numeric", "smallint", "decimal", "int",
|
|
31
|
+
"tinyint", "money", "float", "real"
|
|
32
|
+
]-%}
|
|
33
|
+
{%- elif db_type == 'athena' -%}
|
|
34
|
+
{%- set is_numeric =
|
|
35
|
+
"int" in column_type or
|
|
36
|
+
"float" in column_type or
|
|
37
|
+
"decimal" in column_type or
|
|
38
|
+
"double" in column_type
|
|
39
|
+
-%}
|
|
40
|
+
{%- endif -%}
|
|
41
|
+
|
|
42
|
+
{# General Agg ------------------------------------------- #}
|
|
43
|
+
{%- set agg_row_count = 'cast(count(*) as ' ~ dbt.type_bigint() ~ ')' -%}
|
|
44
|
+
{%- set agg_not_null_proportion =
|
|
45
|
+
'sum(case when ' ~ adapter.quote(column_name) ~ ' is null '
|
|
46
|
+
~ 'then 0 '
|
|
47
|
+
~ 'else 1 end) / '
|
|
48
|
+
~ 'cast(count(*) as ' ~ dbt.type_numeric() ~ ')'
|
|
49
|
+
-%}
|
|
50
|
+
{%- set agg_distinct_proportion =
|
|
51
|
+
'count(distinct ' ~ adapter.quote(column_name) ~') / '
|
|
52
|
+
~ 'cast(count(*) as ' ~ dbt.type_numeric() ~ ')'
|
|
53
|
+
-%}
|
|
54
|
+
{%- set agg_distinct_count = 'count(distinct ' ~ adapter.quote(column_name) ~ ')' -%}
|
|
55
|
+
{%- set agg_is_unique = 'count(distinct ' ~ adapter.quote(column_name) ~ ') = count(*)' -%}
|
|
56
|
+
{%- set agg_min = 'cast(null as ' ~ dbt.type_string() ~ ')' -%}
|
|
57
|
+
{%- set agg_max = 'cast(null as ' ~ dbt.type_string() ~ ')' -%}
|
|
58
|
+
{%- set agg_avg = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
|
|
59
|
+
{%- set agg_median = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
{%- if is_struct -%}
|
|
63
|
+
{%- set agg_distinct_proportion = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
|
|
64
|
+
{%- set agg_distinct_count = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
|
|
65
|
+
{%- set agg_is_unique = 'null' -%}
|
|
66
|
+
{%- endif -%}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
{%- if (is_numeric or is_date_or_time) and (not is_struct) -%}
|
|
70
|
+
{%- set agg_min =
|
|
71
|
+
'cast(min(' ~ adapter.quote(column_name) ~ ') as ' ~ dbt.type_string() ~ ')'
|
|
72
|
+
-%}
|
|
73
|
+
{%- set agg_max =
|
|
74
|
+
'cast(max(' ~ adapter.quote(column_name) ~ ') as ' ~ dbt.type_string() ~ ')'
|
|
75
|
+
-%}
|
|
76
|
+
{%- endif -%}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
{%- if is_numeric and not is_struct -%}
|
|
80
|
+
{%- set agg_avg = 'avg(' ~ adapter.quote(column_name) ~ ')' -%}
|
|
81
|
+
|
|
82
|
+
{%- if db_type == 'bigquery' -%}
|
|
83
|
+
{%- set agg_median = 'approx_quantiles(' ~ adapter.quote(column_name) ~ ', 100)[offset(50)]' -%}
|
|
84
|
+
{%- elif db_type == 'postgres' -%}
|
|
85
|
+
{%- set agg_median = 'percentile_cont(0.5) within group (order by ' ~ adapter.quote(column_name) ~ ')' -%}
|
|
86
|
+
{%- elif db_type == 'redshift' -%}
|
|
87
|
+
{%- set agg_median =
|
|
88
|
+
'(select percentile_cont(0.5) within group (order by '
|
|
89
|
+
~ adapter.quote(column_name) ~ ') from ' ~ relation ~ ')' -%}
|
|
90
|
+
{%- elif db_type == 'athena' -%}
|
|
91
|
+
{%- set agg_median = 'approx_percentile( ' ~ adapter.quote(column_name) ~ ', 0.5)' -%}
|
|
92
|
+
{%- elif db_type == 'sqlserver' -%}
|
|
93
|
+
{%- set agg_median = 'percentile_cont(' ~ adapter.quote(column_name) ~ ', 0.5) over ()' -%}
|
|
94
|
+
{%- else -%}
|
|
95
|
+
{%- set agg_median = 'median(' ~ adapter.quote(column_name) ~ ')' -%}
|
|
96
|
+
{%- endif -%}
|
|
97
|
+
{%- elif is_logical -%}
|
|
98
|
+
{%- set agg_avg = 'avg(case when ' ~ adapter.quote(column_name) ~ ' then 1 else 0 end)' -%}
|
|
99
|
+
{%- endif -%}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
{# Overwrite Agg ----------------------------------------- #}
|
|
103
|
+
|
|
104
|
+
{# DRC-663: Support bigquery array type }
|
|
105
|
+
{%- set is_array = column_type.startswith('array') -%}
|
|
106
|
+
{%- if db_type == 'bigquery' and is_array -%}
|
|
107
|
+
{%- set agg_distinct_proportion = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
|
|
108
|
+
{%- set agg_distinct_count = 'cast(null as ' ~ dbt.type_numeric() ~ ')' -%}
|
|
109
|
+
{%- set agg_is_unique = 'null' -%}
|
|
110
|
+
{%- set agg_min =
|
|
111
|
+
'cast(min(array_length(' ~ adapter.quote(column_name) ~ ')) as ' ~ dbt.type_string() ~ ')'
|
|
112
|
+
-%}
|
|
113
|
+
{%- set agg_max =
|
|
114
|
+
'cast(max(array_length(' ~ adapter.quote(column_name) ~ ')) as ' ~ dbt.type_string() ~ ')'
|
|
115
|
+
-%}
|
|
116
|
+
{%- set agg_avg = 'avg(array_length(' ~ adapter.quote(column_name) ~ '))' -%}
|
|
117
|
+
{%- set agg_median =
|
|
118
|
+
'approx_quantiles(array_length(' ~ adapter.quote(column_name) ~ '), 100)[offset(50)]'
|
|
119
|
+
-%}
|
|
120
|
+
{%- endif -%}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
{# Main Query -------------------------------------------- #}
|
|
124
|
+
|
|
125
|
+
select
|
|
126
|
+
'{{ column_name }}' as column_name,
|
|
127
|
+
nullif('{{ column_type }}', '') as data_type,
|
|
128
|
+
{{ agg_row_count }} as row_count,
|
|
129
|
+
{{ agg_not_null_proportion }} as not_null_proportion,
|
|
130
|
+
{{ agg_distinct_proportion }} as distinct_proportion,
|
|
131
|
+
{{ agg_distinct_count }} as distinct_count,
|
|
132
|
+
{{ agg_is_unique }} as is_unique,
|
|
133
|
+
{{ agg_min }} as min,
|
|
134
|
+
{{ agg_max }} as max,
|
|
135
|
+
{{ agg_avg }} as avg,
|
|
136
|
+
{{ agg_median }} as median
|
|
137
|
+
from {{ relation }}
|
|
138
|
+
"""
|
|
11
139
|
|
|
12
140
|
|
|
13
141
|
class ProfileParams(BaseModel):
|
|
@@ -33,14 +161,14 @@ class ProfileDiffTask(Task):
|
|
|
33
161
|
|
|
34
162
|
def execute(self):
|
|
35
163
|
import agate
|
|
164
|
+
|
|
36
165
|
from recce.adapter.dbt_adapter import DbtAdapter, merge_tables
|
|
166
|
+
|
|
37
167
|
dbt_adapter: DbtAdapter = default_context().adapter
|
|
38
168
|
|
|
39
169
|
model: str = self.params.model
|
|
40
170
|
selected_columns: List[str] = self.params.columns
|
|
41
171
|
|
|
42
|
-
self._verify_dbt_profiler(dbt_adapter)
|
|
43
|
-
|
|
44
172
|
with dbt_adapter.connection_named("query"):
|
|
45
173
|
self.connection = dbt_adapter.get_thread_connection()
|
|
46
174
|
|
|
@@ -58,7 +186,7 @@ class ProfileDiffTask(Task):
|
|
|
58
186
|
tables: List[agate.Table] = []
|
|
59
187
|
|
|
60
188
|
for column in base_columns:
|
|
61
|
-
self.update_progress(message=f
|
|
189
|
+
self.update_progress(message=f"[Base] Profile column: {column.name}", percentage=completed / total)
|
|
62
190
|
relation = dbt_adapter.create_relation(model, base=True)
|
|
63
191
|
response, table = self._profile_column(dbt_adapter, relation, column)
|
|
64
192
|
tables.append(table)
|
|
@@ -68,7 +196,7 @@ class ProfileDiffTask(Task):
|
|
|
68
196
|
|
|
69
197
|
tables: List[agate.Table] = []
|
|
70
198
|
for column in curr_columns:
|
|
71
|
-
self.update_progress(message=f
|
|
199
|
+
self.update_progress(message=f"[Current] Profile column: {column.column}", percentage=completed / total)
|
|
72
200
|
relation = dbt_adapter.create_relation(model, base=False)
|
|
73
201
|
response, table = self._profile_column(dbt_adapter, relation, column)
|
|
74
202
|
tables.append(table)
|
|
@@ -76,87 +204,23 @@ class ProfileDiffTask(Task):
|
|
|
76
204
|
self.check_cancel()
|
|
77
205
|
current = DataFrame.from_agate(merge_tables(tables))
|
|
78
206
|
|
|
79
|
-
|
|
207
|
+
if len(base.columns) == 0 and len(current.columns) != 0:
|
|
208
|
+
base.columns = current.columns
|
|
209
|
+
elif len(base.columns) != 0 and len(current.columns) == 0:
|
|
210
|
+
current.columns = base.columns
|
|
80
211
|
|
|
81
|
-
|
|
82
|
-
for macro_name, macro in dbt_adapter.manifest.macros.items():
|
|
83
|
-
if macro.package_name == 'dbt_profiler':
|
|
84
|
-
break
|
|
85
|
-
else:
|
|
86
|
-
raise RecceException(
|
|
87
|
-
r"Package 'dbt_profiler' not found. Please refer to the link to install: https://hub.getdbt.com/data-mie/dbt_profiler/")
|
|
212
|
+
return ProfileDiffResult(base=base, current=current)
|
|
88
213
|
|
|
89
214
|
def _profile_column(self, dbt_adapter, relation, column):
|
|
90
|
-
sql_template = textwrap.dedent(r"""
|
|
91
|
-
select
|
|
92
|
-
'{{column_name}}' as column_name,
|
|
93
|
-
nullif('{{column_type}}', '') as data_type,
|
|
94
|
-
{{ dbt_profiler.measure_row_count(column_name, column_type) }} as row_count,
|
|
95
|
-
{{ dbt_profiler.measure_not_null_proportion(column_name, column_type) }} as not_null_proportion,
|
|
96
|
-
{{ dbt_profiler.measure_distinct_proportion(column_name, column_type) }} as distinct_proportion,
|
|
97
|
-
{{ dbt_profiler.measure_distinct_count(column_name, column_type) }} as distinct_count,
|
|
98
|
-
{{ dbt_profiler.measure_is_unique(column_name, column_type) }} as is_unique,
|
|
99
|
-
{{ dbt_profiler.measure_min(column_name, column_type) }} as min,
|
|
100
|
-
{{ dbt_profiler.measure_max(column_name, column_type) }} as max,
|
|
101
|
-
{{ dbt_profiler.measure_avg(column_name, column_type) }} as avg,
|
|
102
|
-
{{ dbt_profiler.measure_median(column_name, column_type) }} as median
|
|
103
|
-
from
|
|
104
|
-
{{ relation }}
|
|
105
|
-
""")
|
|
106
215
|
column_name = column.name
|
|
107
216
|
column_type = column.data_type.lower()
|
|
108
|
-
db_type = dbt_adapter.adapter.type()
|
|
109
|
-
if db_type == 'bigquery' and column_type.startswith('array'):
|
|
110
|
-
# DRC-663: Support bigquery array type
|
|
111
|
-
sql_template = textwrap.dedent(r"""
|
|
112
|
-
select
|
|
113
|
-
'{{column_name}}' as column_name,
|
|
114
|
-
nullif('{{column_type}}', '') as data_type,
|
|
115
|
-
{{ dbt_profiler.measure_row_count(column_name, column_type) }} as row_count,
|
|
116
|
-
{{ dbt_profiler.measure_not_null_proportion(column_name, column_type) }} as not_null_proportion,
|
|
117
|
-
cast(null as {{ dbt.type_numeric() }}) as distinct_proportion,
|
|
118
|
-
cast(null as {{ dbt.type_numeric() }}) as distinct_count,
|
|
119
|
-
null as is_unique,
|
|
120
|
-
cast(min(ARRAY_LENGTH({{ adapter.quote(column_name) }})) as {{ dbt_profiler.type_string() }}) as min,
|
|
121
|
-
cast(max(ARRAY_LENGTH({{ adapter.quote(column_name) }})) as {{ dbt_profiler.type_string() }}) as max,
|
|
122
|
-
avg(ARRAY_LENGTH({{ adapter.quote(column_name) }})) as avg,
|
|
123
|
-
APPROX_QUANTILES(ARRAY_LENGTH({{ adapter.quote(column_name) }}), 100)[OFFSET(50)] as median,
|
|
124
|
-
from
|
|
125
|
-
{{ relation }}
|
|
126
|
-
""")
|
|
127
|
-
elif db_type == 'redshift':
|
|
128
|
-
# DRC-1149: Support redshift median calculation
|
|
129
|
-
# https://github.com/data-mie/dbt-profiler/pull/89
|
|
130
|
-
#
|
|
131
|
-
# Since dbt-profiler 0.8.2, there is the third parameter for measure_median
|
|
132
|
-
# For sake of compatibility, we use the new way to call the macro only for redshift
|
|
133
|
-
sql_template = textwrap.dedent(r"""
|
|
134
|
-
with source_data as (
|
|
135
|
-
select
|
|
136
|
-
*
|
|
137
|
-
from {{ relation }}
|
|
138
|
-
)
|
|
139
|
-
select
|
|
140
|
-
'{{column_name}}' as column_name,
|
|
141
|
-
nullif('{{column_type}}', '') as data_type,
|
|
142
|
-
{{ dbt_profiler.measure_row_count(column_name, column_type) }} as row_count,
|
|
143
|
-
{{ dbt_profiler.measure_not_null_proportion(column_name, column_type) }} as not_null_proportion,
|
|
144
|
-
{{ dbt_profiler.measure_distinct_proportion(column_name, column_type) }} as distinct_proportion,
|
|
145
|
-
{{ dbt_profiler.measure_distinct_count(column_name, column_type) }} as distinct_count,
|
|
146
|
-
{{ dbt_profiler.measure_is_unique(column_name, column_type) }} as is_unique,
|
|
147
|
-
{{ dbt_profiler.measure_min(column_name, column_type) }} as min,
|
|
148
|
-
{{ dbt_profiler.measure_max(column_name, column_type) }} as max,
|
|
149
|
-
{{ dbt_profiler.measure_avg(column_name, column_type) }} as avg,
|
|
150
|
-
({{ dbt_profiler.measure_median(column_name, column_type, 'source_data') }}) as median
|
|
151
|
-
from
|
|
152
|
-
source_data
|
|
153
|
-
""")
|
|
217
|
+
db_type = dbt_adapter.adapter.type().lower()
|
|
154
218
|
|
|
155
219
|
try:
|
|
156
220
|
sql = dbt_adapter.generate_sql(
|
|
157
|
-
|
|
221
|
+
PROFILE_COLUMN_JINJA_TEMPLATE,
|
|
158
222
|
base=False, # always false because we use the macro in current manifest
|
|
159
|
-
context=dict(relation=relation, column_name=column_name, column_type=column_type)
|
|
223
|
+
context=dict(relation=relation, column_name=column_name, column_type=column_type, db_type=db_type),
|
|
160
224
|
)
|
|
161
225
|
except Exception as e:
|
|
162
226
|
raise RecceException(f"Failed to generate SQL for profiling column: {column_name}") from e
|
|
@@ -165,14 +229,15 @@ class ProfileDiffTask(Task):
|
|
|
165
229
|
return dbt_adapter.execute(sql, fetch=True)
|
|
166
230
|
except Exception as e:
|
|
167
231
|
from recce.adapter.dbt_adapter import dbt_version
|
|
168
|
-
|
|
232
|
+
|
|
233
|
+
if dbt_version < "v1.8":
|
|
169
234
|
from dbt.exceptions import DbtDatabaseError
|
|
170
235
|
else:
|
|
171
236
|
from dbt_common.exceptions import DbtDatabaseError
|
|
172
237
|
if isinstance(e, DbtDatabaseError):
|
|
173
|
-
if str(e).find(
|
|
238
|
+
if str(e).find("100051") >= 0:
|
|
174
239
|
# Snowflake error '100051 (22012): Division by zero"'
|
|
175
|
-
e = RecceException(
|
|
240
|
+
e = RecceException("No profile diff result due to the model is empty.", False)
|
|
176
241
|
raise e
|
|
177
242
|
|
|
178
243
|
def cancel(self):
|
|
@@ -180,6 +245,7 @@ class ProfileDiffTask(Task):
|
|
|
180
245
|
|
|
181
246
|
if self.connection:
|
|
182
247
|
from recce.adapter.dbt_adapter import DbtAdapter
|
|
248
|
+
|
|
183
249
|
dbt_adapter: DbtAdapter = default_context().adapter
|
|
184
250
|
with dbt_adapter.connection_named("cancel"):
|
|
185
251
|
dbt_adapter.cancel(self.connection)
|
|
@@ -187,7 +253,7 @@ class ProfileDiffTask(Task):
|
|
|
187
253
|
|
|
188
254
|
class ProfileDiffResultDiffer(TaskResultDiffer):
|
|
189
255
|
def _check_result_changed_fn(self, result):
|
|
190
|
-
return self.diff(result[
|
|
256
|
+
return self.diff(result["base"], result["current"])
|
|
191
257
|
|
|
192
258
|
|
|
193
259
|
class ProfileCheckValidator(CheckValidator):
|
|
@@ -202,14 +268,14 @@ class ProfileCheckValidator(CheckValidator):
|
|
|
202
268
|
class ProfileTask(ProfileDiffTask):
|
|
203
269
|
def execute(self):
|
|
204
270
|
import agate
|
|
271
|
+
|
|
205
272
|
from recce.adapter.dbt_adapter import DbtAdapter, merge_tables
|
|
273
|
+
|
|
206
274
|
dbt_adapter: DbtAdapter = default_context().adapter
|
|
207
275
|
|
|
208
276
|
model: str = self.params.model
|
|
209
277
|
selected_columns: List[str] = self.params.columns
|
|
210
278
|
|
|
211
|
-
self._verify_dbt_profiler(dbt_adapter)
|
|
212
|
-
|
|
213
279
|
with dbt_adapter.connection_named("query"):
|
|
214
280
|
self.connection = dbt_adapter.get_thread_connection()
|
|
215
281
|
curr_columns = [column for column in dbt_adapter.get_columns(model, base=False)]
|
|
@@ -222,7 +288,7 @@ class ProfileTask(ProfileDiffTask):
|
|
|
222
288
|
|
|
223
289
|
tables: List[agate.Table] = []
|
|
224
290
|
for column in curr_columns:
|
|
225
|
-
self.update_progress(message=f
|
|
291
|
+
self.update_progress(message=f"[Current] Profile column: {column.column}", percentage=completed / total)
|
|
226
292
|
relation = dbt_adapter.create_relation(model, base=False)
|
|
227
293
|
response, table = self._profile_column(dbt_adapter, relation, column)
|
|
228
294
|
tables.append(table)
|