recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.26.0.20251124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +27 -22
- recce/adapter/base.py +11 -14
- recce/adapter/dbt_adapter/__init__.py +810 -480
- recce/adapter/dbt_adapter/dbt_version.py +3 -0
- recce/adapter/sqlmesh_adapter.py +24 -35
- recce/apis/check_api.py +39 -28
- recce/apis/check_func.py +33 -27
- recce/apis/run_api.py +25 -19
- recce/apis/run_func.py +29 -23
- recce/artifact.py +119 -51
- recce/cli.py +1299 -323
- recce/config.py +42 -33
- recce/connect_to_cloud.py +138 -0
- recce/core.py +55 -47
- recce/data/404.html +1 -1
- recce/data/__next.__PAGE__.txt +10 -0
- recce/data/__next._full.txt +23 -0
- recce/data/__next._head.txt +8 -0
- recce/data/__next._index.txt +8 -0
- recce/data/__next._tree.txt +5 -0
- recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_buildManifest.js +11 -0
- recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_clientMiddlewareManifest.json +1 -0
- recce/data/_next/static/chunks/02b996c7f6a29a06.js +4 -0
- recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
- recce/data/_next/static/chunks/2df9ec28a061971d.js +11 -0
- recce/data/_next/static/chunks/3098c987393bda15.js +1 -0
- recce/data/_next/static/chunks/393dc43e483f717a.css +2 -0
- recce/data/_next/static/chunks/399e8d91a7e45073.js +2 -0
- recce/data/_next/static/chunks/4d0186f631230245.js +1 -0
- recce/data/_next/static/chunks/5794ba9e10a9c060.js +11 -0
- recce/data/_next/static/chunks/715761c929a3f28b.js +110 -0
- recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
- recce/data/_next/static/chunks/80d2a95eaf1201ea.js +1 -0
- recce/data/_next/static/chunks/9979c6109bbbee35.js +1 -0
- recce/data/_next/static/chunks/99d638224186c118.js +1 -0
- recce/data/_next/static/chunks/d003eb36240e92f3.js +1 -0
- recce/data/_next/static/chunks/d3167cdfec4fc351.js +1 -0
- recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
- recce/data/_next/static/chunks/f40141db1bdb46f0.css +6 -0
- recce/data/_next/static/chunks/fcc53a88741a52f9.js +1 -0
- recce/data/_next/static/chunks/turbopack-b1920d28cfb1f28d.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_next/static/media/reload-image.7aa931c7.svg +4 -0
- recce/data/_not-found/__next._full.txt +17 -0
- recce/data/_not-found/__next._head.txt +8 -0
- recce/data/_not-found/__next._index.txt +8 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +3 -0
- recce/data/_not-found.html +1 -0
- recce/data/_not-found.txt +17 -0
- recce/data/auth_callback.html +68 -0
- recce/data/imgs/reload-image.svg +4 -0
- recce/data/index.html +1 -27
- recce/data/index.txt +23 -7
- recce/diff.py +6 -12
- recce/event/__init__.py +86 -74
- recce/event/collector.py +33 -22
- recce/event/track.py +49 -27
- recce/exceptions.py +1 -1
- recce/git.py +7 -7
- recce/github.py +57 -53
- recce/mcp_server.py +716 -0
- recce/models/__init__.py +4 -1
- recce/models/check.py +6 -7
- recce/models/run.py +1 -0
- recce/models/types.py +131 -28
- recce/pull_request.py +27 -25
- recce/run.py +165 -121
- recce/server.py +303 -111
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +632 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +188 -143
- recce/tasks/__init__.py +19 -3
- recce/tasks/core.py +11 -13
- recce/tasks/dataframe.py +82 -18
- recce/tasks/histogram.py +69 -34
- recce/tasks/lineage.py +2 -2
- recce/tasks/profile.py +152 -86
- recce/tasks/query.py +139 -87
- recce/tasks/rowcount.py +37 -31
- recce/tasks/schema.py +18 -15
- recce/tasks/top_k.py +35 -35
- recce/tasks/valuediff.py +216 -152
- recce/util/__init__.py +3 -0
- recce/util/api_token.py +80 -0
- recce/util/breaking.py +87 -85
- recce/util/cll.py +274 -219
- recce/util/io.py +22 -17
- recce/util/lineage.py +65 -16
- recce/util/logger.py +1 -1
- recce/util/onboarding_state.py +45 -0
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +322 -72
- recce/util/singleton.py +4 -4
- recce/yaml/__init__.py +7 -10
- recce_cloud/__init__.py +24 -0
- recce_cloud/api/__init__.py +17 -0
- recce_cloud/api/base.py +111 -0
- recce_cloud/api/client.py +150 -0
- recce_cloud/api/exceptions.py +26 -0
- recce_cloud/api/factory.py +63 -0
- recce_cloud/api/github.py +76 -0
- recce_cloud/api/gitlab.py +82 -0
- recce_cloud/artifact.py +57 -0
- recce_cloud/ci_providers/__init__.py +9 -0
- recce_cloud/ci_providers/base.py +82 -0
- recce_cloud/ci_providers/detector.py +147 -0
- recce_cloud/ci_providers/github_actions.py +136 -0
- recce_cloud/ci_providers/gitlab_ci.py +130 -0
- recce_cloud/cli.py +245 -0
- recce_cloud/upload.py +214 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/METADATA +68 -37
- recce_nightly-1.26.0.20251124.dist-info/RECORD +180 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/WHEEL +1 -1
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/top_level.txt +1 -0
- tests/adapter/dbt_adapter/conftest.py +9 -5
- tests/adapter/dbt_adapter/dbt_test_helper.py +37 -22
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
- tests/adapter/dbt_adapter/test_dbt_cll.py +656 -41
- tests/adapter/dbt_adapter/test_selector.py +22 -21
- tests/recce_cloud/__init__.py +0 -0
- tests/recce_cloud/test_ci_providers.py +351 -0
- tests/recce_cloud/test_cli.py +372 -0
- tests/recce_cloud/test_client.py +273 -0
- tests/recce_cloud/test_platform_clients.py +333 -0
- tests/tasks/conftest.py +1 -1
- tests/tasks/test_histogram.py +58 -66
- tests/tasks/test_lineage.py +36 -23
- tests/tasks/test_preset_checks.py +45 -31
- tests/tasks/test_profile.py +339 -15
- tests/tasks/test_query.py +46 -46
- tests/tasks/test_row_count.py +65 -46
- tests/tasks/test_schema.py +65 -42
- tests/tasks/test_top_k.py +22 -18
- tests/tasks/test_valuediff.py +43 -32
- tests/test_cli.py +174 -60
- tests/test_cli_mcp_optional.py +45 -0
- tests/test_cloud_listing_cli.py +324 -0
- tests/test_config.py +7 -9
- tests/test_connect_to_cloud.py +82 -0
- tests/test_core.py +151 -4
- tests/test_dbt.py +7 -7
- tests/test_mcp_server.py +332 -0
- tests/test_pull_request.py +1 -1
- tests/test_server.py +25 -19
- tests/test_summary.py +29 -17
- recce/data/_next/static/Kcbs3GEIyH2LxgLYat0es/_buildManifest.js +0 -1
- recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
- recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
- recce/data/_next/static/chunks/368-7587b306577df275.js +0 -65
- recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
- recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
- recce/data/_next/static/chunks/3a92ee20-3b5d922d4157af5e.js +0 -1
- recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
- recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
- recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
- recce/data/_next/static/chunks/6ef81909-694dc38134099299.js +0 -1
- recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
- recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
- recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
- recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
- recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
- recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
- recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
- recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
- recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
- recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
- recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
- recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
- recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
- recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
- recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
- recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
- recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/state.py +0 -753
- recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
- tests/test_state.py +0 -123
- /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → 52aV_JrNUZU6dMFgvTQEO}/_ssgManifest.js +0 -0
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/licenses/LICENSE +0 -0
recce/tasks/valuediff.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import List, Optional, TypedDict, Union
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
|
-
from .core import Task, TaskResultDiffer, CheckValidator
|
|
6
|
-
from .dataframe import DataFrame
|
|
7
5
|
from ..core import default_context
|
|
8
6
|
from ..exceptions import RecceException
|
|
9
7
|
from ..models import Check
|
|
8
|
+
from .core import CheckValidator, Task, TaskResultDiffer
|
|
9
|
+
from .dataframe import DataFrame
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class ValueDiffParams(BaseModel):
|
|
@@ -26,19 +26,6 @@ class ValueDiffResult(BaseModel):
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class ValueDiffMixin:
|
|
29
|
-
def _verify_dbt_packages_deps(self, dbt_adapter):
|
|
30
|
-
for macro_name, macro in dbt_adapter.manifest.macros.items():
|
|
31
|
-
if macro.package_name == 'audit_helper':
|
|
32
|
-
break
|
|
33
|
-
else:
|
|
34
|
-
raise RecceException(
|
|
35
|
-
r"Package 'audit_helper' not found. Please refer to the link to install: https://hub.getdbt.com/dbt-labs/audit_helper/")
|
|
36
|
-
|
|
37
|
-
for macro_name, macro in dbt_adapter.manifest.macros.items():
|
|
38
|
-
if macro.package_name == 'dbt_utils' and macro.name == 'generate_surrogate_key':
|
|
39
|
-
self.legacy_surrogate_key = False
|
|
40
|
-
break
|
|
41
|
-
|
|
42
29
|
def _verify_primary_key(self, dbt_adapter, primary_key: Union[str, List[str]], model: str):
|
|
43
30
|
self.update_progress(message=f"Verify primary key: {primary_key}")
|
|
44
31
|
composite = True if isinstance(primary_key, List) else False
|
|
@@ -46,7 +33,21 @@ class ValueDiffMixin:
|
|
|
46
33
|
if composite:
|
|
47
34
|
if len(primary_key) == 0:
|
|
48
35
|
raise RecceException("Primary key cannot be empty")
|
|
49
|
-
sql_template = r"""
|
|
36
|
+
sql_template = r"""
|
|
37
|
+
{%- set column_list = primary_key %}
|
|
38
|
+
{%- set columns_csv = column_list | join(', ') %}
|
|
39
|
+
|
|
40
|
+
with validation_errors as (
|
|
41
|
+
select
|
|
42
|
+
{{ columns_csv }}
|
|
43
|
+
from {{ relation }}
|
|
44
|
+
group by {{ columns_csv }}
|
|
45
|
+
having count(*) > 1
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
select *
|
|
49
|
+
from validation_errors
|
|
50
|
+
"""
|
|
50
51
|
else:
|
|
51
52
|
if primary_key is None or len(primary_key) == 0:
|
|
52
53
|
raise RecceException("Primary key cannot be empty")
|
|
@@ -54,7 +55,6 @@ class ValueDiffMixin:
|
|
|
54
55
|
|
|
55
56
|
# check primary keys
|
|
56
57
|
for base in [True, False]:
|
|
57
|
-
|
|
58
58
|
relation = dbt_adapter.create_relation(model, base)
|
|
59
59
|
context = dict(
|
|
60
60
|
relation=relation,
|
|
@@ -69,31 +69,36 @@ class ValueDiffMixin:
|
|
|
69
69
|
invalids = row[0]
|
|
70
70
|
if invalids > 0:
|
|
71
71
|
raise RecceException(
|
|
72
|
-
f"Invalid primary key: \"{primary_key}\". The column should be unique. Please check by this sql: '{sql}'"
|
|
72
|
+
f"Invalid primary key: \"{primary_key}\". The column should be unique. Please check by this sql: '{sql}'"
|
|
73
|
+
)
|
|
73
74
|
break
|
|
74
75
|
else:
|
|
75
76
|
# it will never happen unless we use a wrong check sql
|
|
76
|
-
raise RecceException(
|
|
77
|
+
raise RecceException("Cannot verify primary key")
|
|
77
78
|
|
|
78
79
|
|
|
79
80
|
class ValueDiffTask(Task, ValueDiffMixin):
|
|
80
|
-
|
|
81
81
|
def __init__(self, params):
|
|
82
82
|
super().__init__()
|
|
83
83
|
self.params = ValueDiffParams(**params)
|
|
84
84
|
self.connection = None
|
|
85
85
|
self.legacy_surrogate_key = True
|
|
86
86
|
|
|
87
|
-
def _query_value_diff(
|
|
88
|
-
|
|
87
|
+
def _query_value_diff(
|
|
88
|
+
self,
|
|
89
|
+
dbt_adapter,
|
|
90
|
+
primary_key: Union[str, List[str]],
|
|
91
|
+
model: str,
|
|
92
|
+
columns: List[str] = None,
|
|
93
|
+
):
|
|
89
94
|
import agate
|
|
90
95
|
|
|
91
96
|
column_groups = {}
|
|
92
97
|
composite = True if isinstance(primary_key, List) else False
|
|
93
98
|
|
|
94
99
|
if columns is None or len(columns) == 0:
|
|
95
|
-
base_columns = [column.column for column in
|
|
96
|
-
curr_columns = [column.column for column in
|
|
100
|
+
base_columns = [column.column for column in dbt_adapter.get_columns(model, base=True)]
|
|
101
|
+
curr_columns = [column.column for column in dbt_adapter.get_columns(model, base=False)]
|
|
97
102
|
columns = [column for column in base_columns if column in curr_columns]
|
|
98
103
|
completed = 0
|
|
99
104
|
|
|
@@ -106,81 +111,117 @@ class ValueDiffTask(Task, ValueDiffMixin):
|
|
|
106
111
|
columns.insert(0, primary_key)
|
|
107
112
|
|
|
108
113
|
sql_template = r"""
|
|
109
|
-
{
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
) }
|
|
123
|
-
|
|
114
|
+
{%- set default_null_value = "_recce_surrogate_key_null_" -%}
|
|
115
|
+
{%- set fields = [] -%}
|
|
116
|
+
|
|
117
|
+
{%- for field in primary_keys -%}
|
|
118
|
+
{%- do fields.append(
|
|
119
|
+
"coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '" ~ default_null_value ~"')"
|
|
120
|
+
) -%}
|
|
121
|
+
|
|
122
|
+
{%- if not loop.last %}
|
|
123
|
+
{%- do fields.append("'-'") -%}
|
|
124
|
+
{%- endif -%}
|
|
125
|
+
{%- endfor -%}
|
|
126
|
+
|
|
127
|
+
{%- set _pk = dbt.hash(dbt.concat(fields)) -%}
|
|
128
|
+
|
|
129
|
+
with a_query as (
|
|
130
|
+
select {{ _pk }} as _pk, * from {{ base_relation }}
|
|
131
|
+
),
|
|
132
|
+
|
|
133
|
+
b_query as (
|
|
134
|
+
select {{ _pk }} as _pk, * from {{ curr_relation }}
|
|
135
|
+
),
|
|
136
|
+
|
|
137
|
+
joined as (
|
|
138
|
+
select
|
|
139
|
+
coalesce(a_query._pk, b_query._pk) as _pk,
|
|
140
|
+
a_query.{{ column_to_compare }} as a_query_value,
|
|
141
|
+
b_query.{{ column_to_compare }} as b_query_value,
|
|
142
|
+
case
|
|
143
|
+
when a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} then 'perfect match'
|
|
144
|
+
when a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null then 'both are null'
|
|
145
|
+
when a_query._pk is null then 'missing from {{ a_relation_name }}'
|
|
146
|
+
when b_query._pk is null then 'missing from {{ b_relation_name }}'
|
|
147
|
+
when a_query.{{ column_to_compare }} is null then 'value is null in {{ a_relation_name }} only'
|
|
148
|
+
when b_query.{{ column_to_compare }} is null then 'value is null in {{ b_relation_name }} only'
|
|
149
|
+
when a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} then 'values do not match'
|
|
150
|
+
else 'unknown' -- this should never happen
|
|
151
|
+
end as match_status
|
|
152
|
+
from a_query
|
|
153
|
+
full outer join b_query on a_query._pk = b_query._pk
|
|
154
|
+
),
|
|
155
|
+
|
|
156
|
+
aggregated as (
|
|
157
|
+
select
|
|
158
|
+
'{{ column_to_compare }}' as column_name,
|
|
159
|
+
match_status,
|
|
160
|
+
count(*) as count_records
|
|
161
|
+
from joined
|
|
162
|
+
group by 1, 2
|
|
163
|
+
)
|
|
124
164
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
sql_template = sql_template.replace('__PRIMARY_KEY__', new_primary_key)
|
|
165
|
+
select
|
|
166
|
+
column_name,
|
|
167
|
+
match_status,
|
|
168
|
+
count_records,
|
|
169
|
+
round(100.0 * count_records / sum(count_records) over (), 2) as percent_of_total
|
|
170
|
+
from aggregated
|
|
171
|
+
"""
|
|
133
172
|
|
|
134
173
|
for column in columns:
|
|
135
174
|
self.update_progress(message=f"Diff column: {column}", percentage=completed / len(columns))
|
|
136
175
|
|
|
137
|
-
sql =
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
176
|
+
sql = dbt_adapter.generate_sql(
|
|
177
|
+
sql_template,
|
|
178
|
+
context=dict(
|
|
179
|
+
base_relation=dbt_adapter.create_relation(model, base=True),
|
|
180
|
+
curr_relation=dbt_adapter.create_relation(model, base=False),
|
|
181
|
+
primary_keys=primary_key if composite else [primary_key],
|
|
182
|
+
column_to_compare=column,
|
|
183
|
+
),
|
|
184
|
+
)
|
|
143
185
|
|
|
144
|
-
_, table =
|
|
186
|
+
_, table = dbt_adapter.execute(sql, fetch=True)
|
|
187
|
+
if column not in column_groups:
|
|
188
|
+
column_groups[column] = dict(added=0, removed=0, mismatched=0, matched=0)
|
|
145
189
|
for row in table.rows:
|
|
146
190
|
# data example:
|
|
147
191
|
# ('COLUMN_NAME', 'MATCH_STATUS', 'COUNT_RECORDS', 'PERCENT_OF_TOTAL')
|
|
148
|
-
# ('EVENT_ID', '
|
|
192
|
+
# ('EVENT_ID', 'perfect match', 158601510, Decimal('100.00'))
|
|
149
193
|
column_name, column_state, row_count, total_rate = row
|
|
150
|
-
if
|
|
194
|
+
if "column_name" == row[0].lower():
|
|
151
195
|
# skip column names
|
|
152
196
|
return
|
|
153
197
|
|
|
154
|
-
#
|
|
155
198
|
# sample data like this:
|
|
156
199
|
# https://github.com/dbt-labs/dbt-audit-helper/blob/main/macros/compare_column_values.sql
|
|
157
200
|
#
|
|
158
|
-
# '
|
|
159
|
-
# '
|
|
160
|
-
# '
|
|
161
|
-
# '
|
|
162
|
-
# '
|
|
163
|
-
# '
|
|
164
|
-
# '
|
|
165
|
-
# 'unknown'
|
|
201
|
+
# 'perfect match' -> matched
|
|
202
|
+
# 'both are null' -> matched
|
|
203
|
+
# 'missing from a' -> row added
|
|
204
|
+
# 'missing from b' -> row removed
|
|
205
|
+
# 'value is null in a only' -> mismatched
|
|
206
|
+
# 'value is null in b only' -> mismatched
|
|
207
|
+
# 'values do not match' -> mismatched
|
|
208
|
+
# 'unknown' -> this should never happen
|
|
166
209
|
# end as match_status,
|
|
167
210
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
if 'values do not match' in column_state:
|
|
183
|
-
column_groups[column_name]['mismatched'] += row_count
|
|
211
|
+
state_mappings = {
|
|
212
|
+
"perfect match": "matched",
|
|
213
|
+
"both are null": "matched",
|
|
214
|
+
"missing from a": "added",
|
|
215
|
+
"missing from b": "removed",
|
|
216
|
+
"value is null in a only": "mismatched",
|
|
217
|
+
"value is null in b only": "mismatched",
|
|
218
|
+
"values do not match": "mismatched",
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
# Use the mapping to update counts
|
|
222
|
+
for state, action in state_mappings.items():
|
|
223
|
+
if state in column_state:
|
|
224
|
+
column_groups[column_name][action] += row_count
|
|
184
225
|
|
|
185
226
|
# Cancel as early as possible
|
|
186
227
|
self.check_cancel()
|
|
@@ -188,9 +229,9 @@ class ValueDiffTask(Task, ValueDiffMixin):
|
|
|
188
229
|
completed = completed + 1
|
|
189
230
|
|
|
190
231
|
first = list(column_groups.values())[0]
|
|
191
|
-
added = first[
|
|
192
|
-
removed = first[
|
|
193
|
-
common = first[
|
|
232
|
+
added = first["added"]
|
|
233
|
+
removed = first["removed"]
|
|
234
|
+
common = first["matched"] + first["mismatched"]
|
|
194
235
|
total = common + added + removed
|
|
195
236
|
|
|
196
237
|
row = []
|
|
@@ -200,12 +241,12 @@ class ValueDiffTask(Task, ValueDiffMixin):
|
|
|
200
241
|
# This is incorrect when there are one side null
|
|
201
242
|
# https://github.com/dbt-labs/dbt-audit-helper/blob/main/macros/compare_column_values.sql#L20-L23
|
|
202
243
|
# matched = v['matched']
|
|
203
|
-
matched = common - v[
|
|
244
|
+
matched = common - v["mismatched"]
|
|
204
245
|
rate = None if common == 0 else matched / common
|
|
205
246
|
record = [k, matched, rate]
|
|
206
247
|
row.append(record)
|
|
207
248
|
|
|
208
|
-
column_names = [
|
|
249
|
+
column_names = ["column", "matched", "matched_p"]
|
|
209
250
|
column_types = [agate.Text(), agate.Number(), agate.Number()]
|
|
210
251
|
table = agate.Table(row, column_names=column_names, column_types=column_types)
|
|
211
252
|
|
|
@@ -224,9 +265,6 @@ class ValueDiffTask(Task, ValueDiffMixin):
|
|
|
224
265
|
model: str = self.params.model
|
|
225
266
|
columns: List[str] = self.params.columns
|
|
226
267
|
|
|
227
|
-
self._verify_dbt_packages_deps(dbt_adapter)
|
|
228
|
-
self.check_cancel()
|
|
229
|
-
|
|
230
268
|
self._verify_primary_key(dbt_adapter, primary_key, model)
|
|
231
269
|
self.check_cancel()
|
|
232
270
|
|
|
@@ -243,35 +281,34 @@ class ValueDiffTask(Task, ValueDiffMixin):
|
|
|
243
281
|
|
|
244
282
|
|
|
245
283
|
class ValueDiffTaskResultDiffer(TaskResultDiffer):
|
|
246
|
-
|
|
247
284
|
def _check_result_changed_fn(self, result):
|
|
248
285
|
is_changed = False
|
|
249
|
-
summary = result.get(
|
|
250
|
-
added = summary.get(
|
|
251
|
-
removed = summary.get(
|
|
252
|
-
changes = {
|
|
253
|
-
'column_changed': []
|
|
254
|
-
}
|
|
286
|
+
summary = result.get("summary", {})
|
|
287
|
+
added = summary.get("added", 0)
|
|
288
|
+
removed = summary.get("removed", 0)
|
|
289
|
+
changes = {"column_changed": []}
|
|
255
290
|
|
|
256
291
|
if added > 0:
|
|
257
292
|
is_changed = True
|
|
258
|
-
changes[
|
|
293
|
+
changes["row_added"] = added
|
|
259
294
|
|
|
260
295
|
if removed > 0:
|
|
261
296
|
is_changed = True
|
|
262
|
-
changes[
|
|
297
|
+
changes["row_removed"] = removed
|
|
263
298
|
|
|
264
|
-
row_data = result.get(
|
|
299
|
+
row_data = result.get("data", {}).get("data", [])
|
|
265
300
|
for row in row_data:
|
|
266
301
|
column, matched, matched_p = row
|
|
267
302
|
if float(matched_p) < 1.0:
|
|
268
303
|
# if there is any mismatched, we consider it as changed
|
|
269
304
|
is_changed = True
|
|
270
|
-
changes[
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
305
|
+
changes["column_changed"].append(
|
|
306
|
+
{
|
|
307
|
+
"column": column,
|
|
308
|
+
"matched": matched,
|
|
309
|
+
"matched_p": matched_p,
|
|
310
|
+
}
|
|
311
|
+
)
|
|
275
312
|
|
|
276
313
|
return changes if is_changed else None
|
|
277
314
|
|
|
@@ -287,15 +324,19 @@ class ValueDiffDetailResult(DataFrame):
|
|
|
287
324
|
|
|
288
325
|
|
|
289
326
|
class ValueDiffDetailTask(Task, ValueDiffMixin):
|
|
290
|
-
|
|
291
327
|
def __init__(self, params):
|
|
292
328
|
super().__init__()
|
|
293
329
|
self.params = ValueDiffParams(**params)
|
|
294
330
|
self.connection = None
|
|
295
331
|
self.legacy_surrogate_key = True
|
|
296
332
|
|
|
297
|
-
def _query_value_diff(
|
|
298
|
-
|
|
333
|
+
def _query_value_diff(
|
|
334
|
+
self,
|
|
335
|
+
dbt_adapter,
|
|
336
|
+
primary_key: Union[str, List[str]],
|
|
337
|
+
model: str,
|
|
338
|
+
columns: List[str] = None,
|
|
339
|
+
):
|
|
299
340
|
composite = True if isinstance(primary_key, List) else False
|
|
300
341
|
|
|
301
342
|
if columns is None or len(columns) == 0:
|
|
@@ -312,45 +353,72 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
|
|
|
312
353
|
columns.insert(0, primary_key)
|
|
313
354
|
|
|
314
355
|
sql_template = r"""
|
|
315
|
-
|
|
316
|
-
{
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
b_query
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
356
|
+
with a_query as (
|
|
357
|
+
select {{ columns | join(',\n') }} from {{ base_relation }}
|
|
358
|
+
),
|
|
359
|
+
|
|
360
|
+
b_query as (
|
|
361
|
+
select {{ columns | join(',\n') }} from {{ curr_relation }}
|
|
362
|
+
),
|
|
363
|
+
|
|
364
|
+
a_intersect_b as (
|
|
365
|
+
select * from a_query
|
|
366
|
+
{{ dbt.intersect() }}
|
|
367
|
+
select * from b_query
|
|
368
|
+
),
|
|
369
|
+
|
|
370
|
+
a_except_b as (
|
|
371
|
+
select * from a_query
|
|
372
|
+
{{ dbt.except() }}
|
|
373
|
+
select * from b_query
|
|
374
|
+
),
|
|
375
|
+
|
|
376
|
+
b_except_a as (
|
|
377
|
+
select * from b_query
|
|
378
|
+
{{ dbt.except() }}
|
|
379
|
+
select * from a_query
|
|
380
|
+
),
|
|
381
|
+
|
|
382
|
+
all_records as (
|
|
383
|
+
select
|
|
384
|
+
*,
|
|
385
|
+
true as in_a,
|
|
386
|
+
true as in_b
|
|
387
|
+
from a_intersect_b
|
|
388
|
+
|
|
389
|
+
union all
|
|
390
|
+
|
|
391
|
+
select
|
|
392
|
+
*,
|
|
393
|
+
true as in_a,
|
|
394
|
+
false as in_b
|
|
395
|
+
from a_except_b
|
|
396
|
+
|
|
397
|
+
union all
|
|
398
|
+
|
|
399
|
+
select
|
|
400
|
+
*,
|
|
401
|
+
false as in_a,
|
|
402
|
+
true as in_b
|
|
403
|
+
from b_except_a
|
|
404
|
+
)
|
|
337
405
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
else:
|
|
344
|
-
new_primary_key = 'primary_key'
|
|
345
|
-
sql_template = sql_template.replace('__PRIMARY_KEY__', new_primary_key)
|
|
406
|
+
select * from all_records
|
|
407
|
+
where not (in_a and in_b)
|
|
408
|
+
order by {{ primary_keys | join(',\n') }}, in_a desc, in_b desc
|
|
409
|
+
limit {{ limit }}
|
|
410
|
+
"""
|
|
346
411
|
|
|
347
|
-
sql = dbt_adapter.generate_sql(
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
412
|
+
sql = dbt_adapter.generate_sql(
|
|
413
|
+
sql_template,
|
|
414
|
+
context=dict(
|
|
415
|
+
base_relation=dbt_adapter.create_relation(model, base=True),
|
|
416
|
+
curr_relation=dbt_adapter.create_relation(model, base=False),
|
|
417
|
+
primary_keys=primary_key if composite else [primary_key],
|
|
418
|
+
columns=columns,
|
|
419
|
+
limit=1000,
|
|
420
|
+
),
|
|
421
|
+
)
|
|
354
422
|
|
|
355
423
|
_, table = dbt_adapter.execute(sql, fetch=True)
|
|
356
424
|
self.check_cancel()
|
|
@@ -358,8 +426,8 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
|
|
|
358
426
|
return DataFrame.from_agate(table)
|
|
359
427
|
|
|
360
428
|
def execute(self):
|
|
361
|
-
|
|
362
429
|
from recce.adapter.dbt_adapter import DbtAdapter
|
|
430
|
+
|
|
363
431
|
dbt_adapter: DbtAdapter = default_context().adapter
|
|
364
432
|
|
|
365
433
|
with dbt_adapter.connection_named("value diff"):
|
|
@@ -369,9 +437,6 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
|
|
|
369
437
|
model: str = self.params.model
|
|
370
438
|
columns: List[str] = self.params.columns
|
|
371
439
|
|
|
372
|
-
self._verify_dbt_packages_deps(dbt_adapter)
|
|
373
|
-
self.check_cancel()
|
|
374
|
-
|
|
375
440
|
self._verify_primary_key(dbt_adapter, primary_key, model)
|
|
376
441
|
self.check_cancel()
|
|
377
442
|
|
|
@@ -379,6 +444,7 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
|
|
|
379
444
|
|
|
380
445
|
def cancel(self):
|
|
381
446
|
from recce.adapter.dbt_adapter import DbtAdapter
|
|
447
|
+
|
|
382
448
|
if self.connection:
|
|
383
449
|
adapter: DbtAdapter = default_context().adapter
|
|
384
450
|
with adapter.connection_named("cancel"):
|
|
@@ -386,9 +452,8 @@ class ValueDiffDetailTask(Task, ValueDiffMixin):
|
|
|
386
452
|
|
|
387
453
|
|
|
388
454
|
class ValueDiffDetailTaskResultDiffer(TaskResultDiffer):
|
|
389
|
-
|
|
390
455
|
def _check_result_changed_fn(self, result):
|
|
391
|
-
diff_data = result.get(
|
|
456
|
+
diff_data = result.get("data")
|
|
392
457
|
if diff_data is None or len(diff_data) == 0:
|
|
393
458
|
return None
|
|
394
459
|
|
|
@@ -397,7 +462,6 @@ class ValueDiffDetailTaskResultDiffer(TaskResultDiffer):
|
|
|
397
462
|
|
|
398
463
|
|
|
399
464
|
class ValueDiffCheckValidator(CheckValidator):
|
|
400
|
-
|
|
401
465
|
def validate_check(self, check: Check):
|
|
402
466
|
try:
|
|
403
467
|
ValueDiffParams(**check.params)
|
recce/util/__init__.py
CHANGED
recce/util/api_token.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from rich.console import Console
|
|
3
|
+
|
|
4
|
+
from recce import event
|
|
5
|
+
from recce.event import get_recce_api_token, update_recce_api_token
|
|
6
|
+
from recce.exceptions import RecceConfigException
|
|
7
|
+
from recce.util.recce_cloud import (
|
|
8
|
+
RECCE_CLOUD_BASE_URL,
|
|
9
|
+
RecceCloud,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def show_invalid_api_token_message():
|
|
16
|
+
"""
|
|
17
|
+
Show the message when the API token is invalid.
|
|
18
|
+
"""
|
|
19
|
+
console.print("[[red]Error[/red]] Invalid Recce Cloud API token.")
|
|
20
|
+
console.print("Please associate with your Recce Cloud account by the following command 'recce connect-to-cloud'.")
|
|
21
|
+
console.print(
|
|
22
|
+
"For more information, please visit: https://docs.reccehq.com/recce-cloud/share-recce-session-securely/#configure-recce-cloud-association-manually"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def prepare_api_token(
|
|
27
|
+
interaction=False,
|
|
28
|
+
**kwargs,
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Prepare the API token for the request.
|
|
32
|
+
"""
|
|
33
|
+
# Verify the API token for Recce Cloud Share Link
|
|
34
|
+
api_token = get_recce_api_token()
|
|
35
|
+
new_api_token = kwargs.get("api_token")
|
|
36
|
+
if new_api_token is not None and new_api_token.startswith("rct-"):
|
|
37
|
+
# Task Token
|
|
38
|
+
valid = RecceCloud(new_api_token).verify_token()
|
|
39
|
+
if not valid:
|
|
40
|
+
raise RecceConfigException("Invalid Recce Cloud Task token")
|
|
41
|
+
api_token = new_api_token
|
|
42
|
+
elif api_token != new_api_token and new_api_token is not None:
|
|
43
|
+
# Handle the API token provided by option `--api-token`
|
|
44
|
+
valid = RecceCloud(new_api_token).verify_token()
|
|
45
|
+
if not valid:
|
|
46
|
+
raise RecceConfigException("Invalid Recce Cloud API token")
|
|
47
|
+
event.log_connected_to_cloud()
|
|
48
|
+
api_token = new_api_token
|
|
49
|
+
update_recce_api_token(api_token)
|
|
50
|
+
console.print(
|
|
51
|
+
"[[green]Success[/green]] User profile has been updated to include the Recce Cloud API Token. "
|
|
52
|
+
"You no longer need to append --api-token to the recce command"
|
|
53
|
+
)
|
|
54
|
+
elif api_token:
|
|
55
|
+
# Verify the API token from the user profile
|
|
56
|
+
valid = RecceCloud(api_token).verify_token()
|
|
57
|
+
if not valid:
|
|
58
|
+
console.print("[[yellow]Warning[/yellow]] Invalid Recce Cloud API token. Skipping the share link.")
|
|
59
|
+
api_token = None
|
|
60
|
+
if valid:
|
|
61
|
+
event.log_connected_to_cloud()
|
|
62
|
+
else:
|
|
63
|
+
# No api_token provided
|
|
64
|
+
if interaction:
|
|
65
|
+
console.print(
|
|
66
|
+
"An API token is required for this feature. This can be obtained in your user account settings.\n"
|
|
67
|
+
f"{RECCE_CLOUD_BASE_URL}/settings#tokens\n"
|
|
68
|
+
"Your API token can be added to '~/.recce/profile.yml' for more convenient sharing."
|
|
69
|
+
)
|
|
70
|
+
api_token = click.prompt("Your Recce API token", type=str, hide_input=True, show_default=False)
|
|
71
|
+
valid = RecceCloud(api_token).verify_token()
|
|
72
|
+
if not valid:
|
|
73
|
+
raise RecceConfigException("Invalid Recce Cloud API token")
|
|
74
|
+
update_recce_api_token(api_token)
|
|
75
|
+
console.print(
|
|
76
|
+
"[[green]Success[/green]] User profile has been updated to include the Recce Cloud API Token. "
|
|
77
|
+
"You no longer need to append --api-token to the recce command"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return api_token
|