recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.26.0.20251124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +27 -22
- recce/adapter/base.py +11 -14
- recce/adapter/dbt_adapter/__init__.py +810 -480
- recce/adapter/dbt_adapter/dbt_version.py +3 -0
- recce/adapter/sqlmesh_adapter.py +24 -35
- recce/apis/check_api.py +39 -28
- recce/apis/check_func.py +33 -27
- recce/apis/run_api.py +25 -19
- recce/apis/run_func.py +29 -23
- recce/artifact.py +119 -51
- recce/cli.py +1299 -323
- recce/config.py +42 -33
- recce/connect_to_cloud.py +138 -0
- recce/core.py +55 -47
- recce/data/404.html +1 -1
- recce/data/__next.__PAGE__.txt +10 -0
- recce/data/__next._full.txt +23 -0
- recce/data/__next._head.txt +8 -0
- recce/data/__next._index.txt +8 -0
- recce/data/__next._tree.txt +5 -0
- recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_buildManifest.js +11 -0
- recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_clientMiddlewareManifest.json +1 -0
- recce/data/_next/static/chunks/02b996c7f6a29a06.js +4 -0
- recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
- recce/data/_next/static/chunks/2df9ec28a061971d.js +11 -0
- recce/data/_next/static/chunks/3098c987393bda15.js +1 -0
- recce/data/_next/static/chunks/393dc43e483f717a.css +2 -0
- recce/data/_next/static/chunks/399e8d91a7e45073.js +2 -0
- recce/data/_next/static/chunks/4d0186f631230245.js +1 -0
- recce/data/_next/static/chunks/5794ba9e10a9c060.js +11 -0
- recce/data/_next/static/chunks/715761c929a3f28b.js +110 -0
- recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
- recce/data/_next/static/chunks/80d2a95eaf1201ea.js +1 -0
- recce/data/_next/static/chunks/9979c6109bbbee35.js +1 -0
- recce/data/_next/static/chunks/99d638224186c118.js +1 -0
- recce/data/_next/static/chunks/d003eb36240e92f3.js +1 -0
- recce/data/_next/static/chunks/d3167cdfec4fc351.js +1 -0
- recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
- recce/data/_next/static/chunks/f40141db1bdb46f0.css +6 -0
- recce/data/_next/static/chunks/fcc53a88741a52f9.js +1 -0
- recce/data/_next/static/chunks/turbopack-b1920d28cfb1f28d.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_next/static/media/reload-image.7aa931c7.svg +4 -0
- recce/data/_not-found/__next._full.txt +17 -0
- recce/data/_not-found/__next._head.txt +8 -0
- recce/data/_not-found/__next._index.txt +8 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +3 -0
- recce/data/_not-found.html +1 -0
- recce/data/_not-found.txt +17 -0
- recce/data/auth_callback.html +68 -0
- recce/data/imgs/reload-image.svg +4 -0
- recce/data/index.html +1 -27
- recce/data/index.txt +23 -7
- recce/diff.py +6 -12
- recce/event/__init__.py +86 -74
- recce/event/collector.py +33 -22
- recce/event/track.py +49 -27
- recce/exceptions.py +1 -1
- recce/git.py +7 -7
- recce/github.py +57 -53
- recce/mcp_server.py +716 -0
- recce/models/__init__.py +4 -1
- recce/models/check.py +6 -7
- recce/models/run.py +1 -0
- recce/models/types.py +131 -28
- recce/pull_request.py +27 -25
- recce/run.py +165 -121
- recce/server.py +303 -111
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +632 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +188 -143
- recce/tasks/__init__.py +19 -3
- recce/tasks/core.py +11 -13
- recce/tasks/dataframe.py +82 -18
- recce/tasks/histogram.py +69 -34
- recce/tasks/lineage.py +2 -2
- recce/tasks/profile.py +152 -86
- recce/tasks/query.py +139 -87
- recce/tasks/rowcount.py +37 -31
- recce/tasks/schema.py +18 -15
- recce/tasks/top_k.py +35 -35
- recce/tasks/valuediff.py +216 -152
- recce/util/__init__.py +3 -0
- recce/util/api_token.py +80 -0
- recce/util/breaking.py +87 -85
- recce/util/cll.py +274 -219
- recce/util/io.py +22 -17
- recce/util/lineage.py +65 -16
- recce/util/logger.py +1 -1
- recce/util/onboarding_state.py +45 -0
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +322 -72
- recce/util/singleton.py +4 -4
- recce/yaml/__init__.py +7 -10
- recce_cloud/__init__.py +24 -0
- recce_cloud/api/__init__.py +17 -0
- recce_cloud/api/base.py +111 -0
- recce_cloud/api/client.py +150 -0
- recce_cloud/api/exceptions.py +26 -0
- recce_cloud/api/factory.py +63 -0
- recce_cloud/api/github.py +76 -0
- recce_cloud/api/gitlab.py +82 -0
- recce_cloud/artifact.py +57 -0
- recce_cloud/ci_providers/__init__.py +9 -0
- recce_cloud/ci_providers/base.py +82 -0
- recce_cloud/ci_providers/detector.py +147 -0
- recce_cloud/ci_providers/github_actions.py +136 -0
- recce_cloud/ci_providers/gitlab_ci.py +130 -0
- recce_cloud/cli.py +245 -0
- recce_cloud/upload.py +214 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/METADATA +68 -37
- recce_nightly-1.26.0.20251124.dist-info/RECORD +180 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/WHEEL +1 -1
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/top_level.txt +1 -0
- tests/adapter/dbt_adapter/conftest.py +9 -5
- tests/adapter/dbt_adapter/dbt_test_helper.py +37 -22
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
- tests/adapter/dbt_adapter/test_dbt_cll.py +656 -41
- tests/adapter/dbt_adapter/test_selector.py +22 -21
- tests/recce_cloud/__init__.py +0 -0
- tests/recce_cloud/test_ci_providers.py +351 -0
- tests/recce_cloud/test_cli.py +372 -0
- tests/recce_cloud/test_client.py +273 -0
- tests/recce_cloud/test_platform_clients.py +333 -0
- tests/tasks/conftest.py +1 -1
- tests/tasks/test_histogram.py +58 -66
- tests/tasks/test_lineage.py +36 -23
- tests/tasks/test_preset_checks.py +45 -31
- tests/tasks/test_profile.py +339 -15
- tests/tasks/test_query.py +46 -46
- tests/tasks/test_row_count.py +65 -46
- tests/tasks/test_schema.py +65 -42
- tests/tasks/test_top_k.py +22 -18
- tests/tasks/test_valuediff.py +43 -32
- tests/test_cli.py +174 -60
- tests/test_cli_mcp_optional.py +45 -0
- tests/test_cloud_listing_cli.py +324 -0
- tests/test_config.py +7 -9
- tests/test_connect_to_cloud.py +82 -0
- tests/test_core.py +151 -4
- tests/test_dbt.py +7 -7
- tests/test_mcp_server.py +332 -0
- tests/test_pull_request.py +1 -1
- tests/test_server.py +25 -19
- tests/test_summary.py +29 -17
- recce/data/_next/static/Kcbs3GEIyH2LxgLYat0es/_buildManifest.js +0 -1
- recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
- recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
- recce/data/_next/static/chunks/368-7587b306577df275.js +0 -65
- recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
- recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
- recce/data/_next/static/chunks/3a92ee20-3b5d922d4157af5e.js +0 -1
- recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
- recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
- recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
- recce/data/_next/static/chunks/6ef81909-694dc38134099299.js +0 -1
- recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
- recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
- recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
- recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
- recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
- recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
- recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
- recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
- recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
- recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
- recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
- recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
- recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
- recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
- recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
- recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
- recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/state.py +0 -753
- recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
- tests/test_state.py +0 -123
- /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → 52aV_JrNUZU6dMFgvTQEO}/_ssgManifest.js +0 -0
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/licenses/LICENSE +0 -0
recce/summary.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
|
-
from typing import List,
|
|
3
|
+
from typing import Dict, List, Optional, Set, Type, Union
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
8
|
from recce.apis.check_func import get_node_name_by_id
|
|
9
9
|
from recce.core import RecceContext
|
|
10
|
-
from recce.models import CheckDAO, RunDAO, RunType
|
|
10
|
+
from recce.models import CheckDAO, Run, RunDAO, RunType
|
|
11
11
|
from recce.tasks.core import TaskResultDiffer
|
|
12
12
|
from recce.tasks.histogram import HistogramDiffTaskResultDiffer
|
|
13
13
|
from recce.tasks.profile import ProfileDiffResultDiffer
|
|
@@ -15,13 +15,16 @@ from recce.tasks.query import QueryDiffResultDiffer
|
|
|
15
15
|
from recce.tasks.rowcount import RowCountDiffResultDiffer
|
|
16
16
|
from recce.tasks.schema import SchemaDiffResultDiffer
|
|
17
17
|
from recce.tasks.top_k import TopKDiffTaskResultDiffer
|
|
18
|
-
from recce.tasks.valuediff import
|
|
18
|
+
from recce.tasks.valuediff import (
|
|
19
|
+
ValueDiffDetailTaskResultDiffer,
|
|
20
|
+
ValueDiffTaskResultDiffer,
|
|
21
|
+
)
|
|
19
22
|
|
|
20
|
-
RECCE_CLOUD_HOST = os.environ.get(
|
|
23
|
+
RECCE_CLOUD_HOST = os.environ.get("RECCE_CLOUD_HOST", "https://cloud.datarecce.io")
|
|
21
24
|
|
|
22
|
-
ADD_COLOR =
|
|
23
|
-
MODIFIED_COLOR =
|
|
24
|
-
REMOVE_COLOR =
|
|
25
|
+
ADD_COLOR = "#1dce00"
|
|
26
|
+
MODIFIED_COLOR = "#ffa502"
|
|
27
|
+
REMOVE_COLOR = "#ff067e"
|
|
25
28
|
|
|
26
29
|
MAX_MERMAID_TEXT_SIZE = 50000 # source: https://mermaid.js.org/config/schema-docs/config.html#maxtextsize
|
|
27
30
|
|
|
@@ -42,44 +45,44 @@ class Node:
|
|
|
42
45
|
base_data: dict
|
|
43
46
|
current_data: dict
|
|
44
47
|
|
|
45
|
-
def __init__(self, node_id: str, node_data: dict, data_from: str =
|
|
48
|
+
def __init__(self, node_id: str, node_data: dict, data_from: str = "base"):
|
|
46
49
|
self.id = node_id
|
|
47
|
-
self.name = node_data[
|
|
50
|
+
self.name = node_data["name"]
|
|
48
51
|
self.data_from = data_from
|
|
49
|
-
self.resource_type = node_data[
|
|
50
|
-
self.package_name = node_data[
|
|
52
|
+
self.resource_type = node_data["resource_type"]
|
|
53
|
+
self.package_name = node_data["package_name"]
|
|
51
54
|
self.children = []
|
|
52
55
|
self.parents = []
|
|
53
56
|
|
|
54
57
|
self.base_data = {}
|
|
55
58
|
self.current_data = {}
|
|
56
59
|
|
|
57
|
-
if data_from ==
|
|
60
|
+
if data_from == "base":
|
|
58
61
|
self.base_data = node_data
|
|
59
|
-
elif data_from ==
|
|
62
|
+
elif data_from == "current":
|
|
60
63
|
self.current_data = node_data
|
|
61
64
|
|
|
62
65
|
@property
|
|
63
66
|
def change_status(self):
|
|
64
|
-
base_checksum = self.base_data.get(
|
|
65
|
-
curr_checksum = self.current_data.get(
|
|
66
|
-
if self.data_from ==
|
|
67
|
-
return
|
|
68
|
-
elif self.data_from ==
|
|
69
|
-
return
|
|
67
|
+
base_checksum = self.base_data.get("checksum", {}).get("checksum")
|
|
68
|
+
curr_checksum = self.current_data.get("checksum", {}).get("checksum")
|
|
69
|
+
if self.data_from == "base":
|
|
70
|
+
return "removed"
|
|
71
|
+
elif self.data_from == "current":
|
|
72
|
+
return "added"
|
|
70
73
|
elif base_checksum and curr_checksum and base_checksum != curr_checksum:
|
|
71
|
-
return
|
|
74
|
+
return "modified"
|
|
72
75
|
return None
|
|
73
76
|
|
|
74
77
|
def update_data(self, node_data: dict, data_from: str):
|
|
75
|
-
if data_from not in [
|
|
76
|
-
raise ValueError(f
|
|
78
|
+
if data_from not in ["base", "current"]:
|
|
79
|
+
raise ValueError(f"Invalid data_from value: {data_from}")
|
|
77
80
|
if self.data_from != data_from:
|
|
78
|
-
self.data_from =
|
|
81
|
+
self.data_from = "both"
|
|
79
82
|
|
|
80
|
-
if data_from ==
|
|
83
|
+
if data_from == "base":
|
|
81
84
|
self.base_data = node_data
|
|
82
|
-
elif data_from ==
|
|
85
|
+
elif data_from == "current":
|
|
83
86
|
self.current_data = node_data
|
|
84
87
|
|
|
85
88
|
def append_parent(self, parent_id: str):
|
|
@@ -93,8 +96,8 @@ class Node:
|
|
|
93
96
|
def _cal_row_count_delta_percentage(self):
|
|
94
97
|
row_count_diff, run_result = _get_node_row_count_diff(self.id, self.name)
|
|
95
98
|
if row_count_diff:
|
|
96
|
-
base = run_result.get(
|
|
97
|
-
current = run_result.get(
|
|
99
|
+
base = run_result.get("base", 0)
|
|
100
|
+
current = run_result.get("curr", 0)
|
|
98
101
|
if int(current) > int(base):
|
|
99
102
|
p = (int(current) - int(base)) / int(current) * 100
|
|
100
103
|
return f'🔼 +{round(p, 2) if p > 0.1 else "<0.1"}%'
|
|
@@ -104,25 +107,25 @@ class Node:
|
|
|
104
107
|
return None
|
|
105
108
|
|
|
106
109
|
def _get_schema_diff(self):
|
|
107
|
-
base_schema = self.base_data.get(
|
|
108
|
-
current_schema = self.current_data.get(
|
|
110
|
+
base_schema = self.base_data.get("columns", {})
|
|
111
|
+
current_schema = self.current_data.get("columns", {})
|
|
109
112
|
schema_diff = TaskResultDiffer.diff(base_schema, current_schema)
|
|
110
113
|
return schema_diff
|
|
111
114
|
|
|
112
115
|
def _what_changed(self, checks=None):
|
|
113
116
|
changes = []
|
|
114
|
-
if self.change_status ==
|
|
115
|
-
return [
|
|
116
|
-
elif self.change_status ==
|
|
117
|
-
return [
|
|
118
|
-
elif self.change_status ==
|
|
119
|
-
changes.append(
|
|
117
|
+
if self.change_status == "added":
|
|
118
|
+
return ["Added Node"]
|
|
119
|
+
elif self.change_status == "removed":
|
|
120
|
+
return ["Removed Node"]
|
|
121
|
+
elif self.change_status == "modified":
|
|
122
|
+
changes.append("Code")
|
|
120
123
|
row_count_delta_percentage = self._cal_row_count_delta_percentage()
|
|
121
124
|
if row_count_delta_percentage:
|
|
122
|
-
changes.append(f
|
|
125
|
+
changes.append(f"Row Count {row_count_delta_percentage}")
|
|
123
126
|
schema_diff = self._get_schema_diff()
|
|
124
127
|
if schema_diff:
|
|
125
|
-
changes.append(
|
|
128
|
+
changes.append("Schema")
|
|
126
129
|
|
|
127
130
|
if checks:
|
|
128
131
|
for check in checks:
|
|
@@ -131,7 +134,7 @@ class Node:
|
|
|
131
134
|
# Skip the row count and schema diff check, since we already have it.
|
|
132
135
|
continue
|
|
133
136
|
if check.node_ids and self.id in check.node_ids:
|
|
134
|
-
changes.append(str(check.type).replace(
|
|
137
|
+
changes.append(str(check.type).replace("_", " ").title())
|
|
135
138
|
return changes
|
|
136
139
|
|
|
137
140
|
def get_node_str(self, checks=None):
|
|
@@ -140,12 +143,12 @@ class Node:
|
|
|
140
143
|
|
|
141
144
|
if self.change_status is not None:
|
|
142
145
|
is_changed = True
|
|
143
|
-
if self.change_status ==
|
|
144
|
-
style = f
|
|
145
|
-
elif self.change_status ==
|
|
146
|
-
style = f
|
|
147
|
-
elif self.change_status ==
|
|
148
|
-
style = f
|
|
146
|
+
if self.change_status == "added":
|
|
147
|
+
style = f"style {self.id} stroke:{ADD_COLOR}"
|
|
148
|
+
elif self.change_status == "modified":
|
|
149
|
+
style = f"style {self.id} stroke:{MODIFIED_COLOR}"
|
|
150
|
+
elif self.change_status == "removed":
|
|
151
|
+
style = f"style {self.id} stroke:{REMOVE_COLOR}"
|
|
149
152
|
|
|
150
153
|
if checks:
|
|
151
154
|
for check in checks:
|
|
@@ -154,13 +157,13 @@ class Node:
|
|
|
154
157
|
|
|
155
158
|
content_output = f'{self.id}["{self.name}'
|
|
156
159
|
if is_changed:
|
|
157
|
-
content_output +=
|
|
160
|
+
content_output += "\n\n[What's Changed]\n"
|
|
158
161
|
changes = self._what_changed(checks)
|
|
159
|
-
content_output +=
|
|
162
|
+
content_output += ", ".join(changes)
|
|
160
163
|
|
|
161
164
|
content_output += '"]\n'
|
|
162
165
|
if style:
|
|
163
|
-
content_output += f
|
|
166
|
+
content_output += f"{style}\n"
|
|
164
167
|
return content_output
|
|
165
168
|
|
|
166
169
|
|
|
@@ -171,7 +174,7 @@ class Edge:
|
|
|
171
174
|
parent_id: str
|
|
172
175
|
change_status: Union[str, None]
|
|
173
176
|
|
|
174
|
-
def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str =
|
|
177
|
+
def __init__(self, edge_id: str, parent_id: str, child_id: str, edge_from: str = "base"):
|
|
175
178
|
self.id = edge_id
|
|
176
179
|
self.edge_from = edge_from
|
|
177
180
|
self.child_id = child_id
|
|
@@ -179,7 +182,7 @@ class Edge:
|
|
|
179
182
|
|
|
180
183
|
def update_edge_from(self, edge_from: str):
|
|
181
184
|
if self.edge_from != edge_from:
|
|
182
|
-
self.edge_from =
|
|
185
|
+
self.edge_from = "both"
|
|
183
186
|
|
|
184
187
|
|
|
185
188
|
class CheckSummary(BaseModel):
|
|
@@ -225,21 +228,21 @@ class LineageGraph:
|
|
|
225
228
|
edges: Dict[str, Edge] = {}
|
|
226
229
|
checks: List[CheckSummary] = None
|
|
227
230
|
|
|
228
|
-
def create_node(self, node_id: str, node_data: dict, data_from: str =
|
|
231
|
+
def create_node(self, node_id: str, node_data: dict, data_from: str = "base"):
|
|
229
232
|
if node_id not in self.nodes:
|
|
230
233
|
self.nodes[node_id] = Node(node_id, node_data, data_from)
|
|
231
234
|
else:
|
|
232
235
|
self.nodes[node_id].update_data(node_data, data_from)
|
|
233
236
|
|
|
234
|
-
def create_edge(self, parent_id: str, child_id: str, edge_from: str =
|
|
237
|
+
def create_edge(self, parent_id: str, child_id: str, edge_from: str = "base"):
|
|
235
238
|
if parent_id not in self.nodes:
|
|
236
|
-
_warn(f
|
|
239
|
+
_warn(f"Parent node {parent_id} not found in graph")
|
|
237
240
|
return
|
|
238
241
|
if child_id not in self.nodes:
|
|
239
|
-
_warn(f
|
|
242
|
+
_warn(f"Child node {child_id} not found in graph")
|
|
240
243
|
return
|
|
241
244
|
|
|
242
|
-
edge_id = f
|
|
245
|
+
edge_id = f"{parent_id}-->{child_id}"
|
|
243
246
|
if edge_id in self.edges:
|
|
244
247
|
self.edges[edge_id].update_edge_from(edge_from)
|
|
245
248
|
else:
|
|
@@ -250,67 +253,87 @@ class LineageGraph:
|
|
|
250
253
|
@property
|
|
251
254
|
def modified_set(self) -> Set[str]:
|
|
252
255
|
return set(
|
|
253
|
-
[node_id for node_id, node in self.nodes.items() if node.change_status in [
|
|
256
|
+
[node_id for node_id, node in self.nodes.items() if node.change_status in ["added", "removed", "modified"]]
|
|
257
|
+
)
|
|
254
258
|
|
|
255
259
|
def get_edge_str(self, edge_id):
|
|
256
260
|
edge = self.edges[edge_id]
|
|
257
261
|
child = self.nodes[edge.child_id]
|
|
258
262
|
|
|
259
|
-
if child.change_status ==
|
|
260
|
-
return f
|
|
261
|
-
if child.change_status is None or child.change_status ==
|
|
262
|
-
return f
|
|
263
|
-
if child.change_status ==
|
|
264
|
-
return f
|
|
263
|
+
if child.change_status == "removed":
|
|
264
|
+
return f"{edge.parent_id}-.->{edge.child_id}\n"
|
|
265
|
+
if child.change_status is None or child.change_status == "modified":
|
|
266
|
+
return f"{edge.parent_id}---->{edge.child_id}\n"
|
|
267
|
+
if child.change_status == "added":
|
|
268
|
+
return f"{edge.parent_id}-...->{edge.child_id}\n"
|
|
265
269
|
|
|
266
270
|
|
|
267
271
|
def _build_lineage_graph(base, current) -> LineageGraph:
|
|
268
272
|
graph = LineageGraph()
|
|
269
273
|
|
|
274
|
+
# Get the current package name to filter nodes (from the current manifest metadata)
|
|
275
|
+
package_name = None
|
|
276
|
+
manifest_metadata = current.get("manifest_metadata")
|
|
277
|
+
if manifest_metadata and hasattr(manifest_metadata, "project_name"):
|
|
278
|
+
# The default package name is the project name
|
|
279
|
+
package_name = manifest_metadata.project_name
|
|
280
|
+
|
|
270
281
|
# Init Graph nodes with base & current nodes
|
|
271
|
-
for node_id, node_data in base.get(
|
|
272
|
-
|
|
282
|
+
for node_id, node_data in base.get("nodes", {}).items():
|
|
283
|
+
# Skip nodes that are not from the current package
|
|
284
|
+
if package_name and node_data.get("package_name") != package_name:
|
|
285
|
+
continue
|
|
286
|
+
graph.create_node(node_id, node_data, "base")
|
|
273
287
|
|
|
274
|
-
for node_id, node_data in current.get(
|
|
288
|
+
for node_id, node_data in current.get("nodes", {}).items():
|
|
289
|
+
# Skip nodes that are not from the current package
|
|
290
|
+
if package_name and node_data.get("package_name") != package_name:
|
|
291
|
+
continue
|
|
275
292
|
if node_id not in graph.nodes:
|
|
276
|
-
node = Node(node_id, node_data,
|
|
293
|
+
node = Node(node_id, node_data, "current")
|
|
277
294
|
graph.nodes[node_id] = node
|
|
278
295
|
else:
|
|
279
296
|
node = graph.nodes[node_id]
|
|
280
|
-
node.update_data(node_data,
|
|
297
|
+
node.update_data(node_data, "current")
|
|
281
298
|
|
|
282
299
|
# Build edges
|
|
283
|
-
for child_id, parents in base.get(
|
|
300
|
+
for child_id, parents in base.get("parent_map", {}).items():
|
|
284
301
|
for parent_id in parents:
|
|
285
|
-
graph.
|
|
286
|
-
|
|
302
|
+
if child_id not in graph.nodes or parent_id not in graph.nodes:
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
graph.create_edge(parent_id, child_id, "base")
|
|
306
|
+
for child_id, parents in current.get("parent_map", {}).items():
|
|
287
307
|
for parent_id in parents:
|
|
288
|
-
graph.
|
|
308
|
+
if child_id not in graph.nodes or parent_id not in graph.nodes:
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
graph.create_edge(parent_id, child_id, "current")
|
|
289
312
|
|
|
290
313
|
return graph
|
|
291
314
|
|
|
292
315
|
|
|
293
316
|
def _build_node_schema(lineage, node_id):
|
|
294
|
-
return lineage.get(
|
|
317
|
+
return lineage.get("nodes", {}).get(node_id, {}).get("columns", {})
|
|
295
318
|
|
|
296
319
|
|
|
297
320
|
def _get_node_row_count_diff(node_id, node_name):
|
|
298
321
|
row_count_runs = RunDAO().list(type_filter=RunType.ROW_COUNT_DIFF)
|
|
299
322
|
for run in row_count_runs:
|
|
300
|
-
if node_id in run.params.get(
|
|
323
|
+
if node_id in run.params.get("node_ids", []):
|
|
301
324
|
result = run.result.get(node_name, {})
|
|
302
|
-
diff = TaskResultDiffer.diff(result.get(
|
|
325
|
+
diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
|
|
303
326
|
return diff, result
|
|
304
|
-
elif run.params.get(
|
|
327
|
+
elif run.params.get("node_id") == node_id:
|
|
305
328
|
result = run.result.get(node_name, {})
|
|
306
|
-
diff = TaskResultDiffer.diff(result.get(
|
|
329
|
+
diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
|
|
307
330
|
return diff, result
|
|
308
331
|
return None, None
|
|
309
332
|
|
|
310
333
|
|
|
311
334
|
def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> str:
|
|
312
335
|
if not check.related_nodes:
|
|
313
|
-
return
|
|
336
|
+
return "N/A"
|
|
314
337
|
|
|
315
338
|
nodes = check.related_nodes
|
|
316
339
|
if check.changed_nodes:
|
|
@@ -319,39 +342,43 @@ def _generate_mismatched_nodes_summary(check: CheckSummary, limit: int = 3) -> s
|
|
|
319
342
|
nodes = check.changed_nodes
|
|
320
343
|
|
|
321
344
|
if len(nodes) <= limit:
|
|
322
|
-
return
|
|
345
|
+
return ", ".join(nodes)
|
|
323
346
|
|
|
324
|
-
display_nodes = nodes[:limit - 1]
|
|
325
|
-
return
|
|
347
|
+
display_nodes = nodes[: limit - 1]
|
|
348
|
+
return ", ".join(display_nodes) + f", and {len(nodes) - len(display_nodes)} more nodes"
|
|
326
349
|
|
|
327
350
|
|
|
328
351
|
def generate_summary_metadata(base_lineage, curr_lineage):
|
|
329
352
|
from py_markdown_table.markdown_table import markdown_table
|
|
330
353
|
|
|
331
|
-
base_manifest = base_lineage.get(
|
|
332
|
-
base_catalog = base_lineage.get(
|
|
333
|
-
curr_manifest = curr_lineage.get(
|
|
334
|
-
curr_catalog = curr_lineage.get(
|
|
354
|
+
base_manifest = base_lineage.get("manifest_metadata")
|
|
355
|
+
base_catalog = base_lineage.get("catalog_metadata")
|
|
356
|
+
curr_manifest = curr_lineage.get("manifest_metadata")
|
|
357
|
+
curr_catalog = curr_lineage.get("catalog_metadata")
|
|
335
358
|
|
|
336
359
|
metadata = [
|
|
337
360
|
{
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
361
|
+
"": "Base",
|
|
362
|
+
"Manifest": base_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
|
|
363
|
+
"Catalog": base_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if base_catalog else "N/A",
|
|
341
364
|
},
|
|
342
365
|
{
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
}
|
|
366
|
+
"": "Current",
|
|
367
|
+
"Manifest": curr_manifest.generated_at.strftime("%Y-%m-%d %H:%M:%S"),
|
|
368
|
+
"Catalog": curr_catalog.generated_at.strftime("%Y-%m-%d %H:%M:%S") if curr_catalog else "N/A",
|
|
369
|
+
},
|
|
347
370
|
]
|
|
348
371
|
|
|
349
|
-
return
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
372
|
+
return (
|
|
373
|
+
markdown_table(metadata)
|
|
374
|
+
.set_params(
|
|
375
|
+
quote=False,
|
|
376
|
+
row_sep="markdown",
|
|
377
|
+
padding_width=1,
|
|
378
|
+
padding_weight="right", # Aligns the cell's contents to the beginning of the cell
|
|
379
|
+
)
|
|
380
|
+
.get_markdown()
|
|
381
|
+
)
|
|
355
382
|
|
|
356
383
|
|
|
357
384
|
def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], Dict[str, int]):
|
|
@@ -376,9 +403,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
|
|
|
376
403
|
continue
|
|
377
404
|
elif check.type == RunType.SCHEMA_DIFF:
|
|
378
405
|
differ = SchemaDiffResultDiffer(check, base_lineage, curr_lineage)
|
|
379
|
-
elif (
|
|
380
|
-
|
|
381
|
-
|
|
406
|
+
elif (
|
|
407
|
+
check.type
|
|
408
|
+
in [
|
|
409
|
+
RunType.ROW_COUNT_DIFF,
|
|
410
|
+
RunType.QUERY_DIFF,
|
|
411
|
+
RunType.VALUE_DIFF,
|
|
412
|
+
RunType.VALUE_DIFF_DETAIL,
|
|
413
|
+
RunType.PROFILE_DIFF,
|
|
414
|
+
RunType.TOP_K_DIFF,
|
|
415
|
+
RunType.HISTOGRAM_DIFF,
|
|
416
|
+
]
|
|
417
|
+
and run is not None
|
|
418
|
+
):
|
|
382
419
|
# Check the result is changed or not
|
|
383
420
|
differ = differ_factory(run)
|
|
384
421
|
|
|
@@ -391,19 +428,19 @@ def generate_check_summary(base_lineage, curr_lineage) -> (List[CheckSummary], D
|
|
|
391
428
|
description=check.description,
|
|
392
429
|
changes=differ.changes,
|
|
393
430
|
node_ids=differ.related_node_ids,
|
|
394
|
-
changed_nodes=differ.changed_nodes
|
|
431
|
+
changed_nodes=differ.changed_nodes,
|
|
395
432
|
)
|
|
396
433
|
)
|
|
397
434
|
|
|
398
435
|
return checks_summary, {
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
436
|
+
"total": len(checks),
|
|
437
|
+
"mismatch": len(checks_summary),
|
|
438
|
+
"failed": failed_checks_count,
|
|
402
439
|
}
|
|
403
440
|
|
|
404
441
|
|
|
405
442
|
def generate_mermaid_lineage_graph(graph: LineageGraph):
|
|
406
|
-
content = up_to_level_content =
|
|
443
|
+
content = up_to_level_content = "graph LR\n"
|
|
407
444
|
is_not_modified = False
|
|
408
445
|
# Only show the modified nodes and there children
|
|
409
446
|
queue = list(graph.modified_set)
|
|
@@ -427,7 +464,7 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
|
|
|
427
464
|
content += node.get_node_str(graph.checks)
|
|
428
465
|
for child_id in node.children:
|
|
429
466
|
queue.append(child_id)
|
|
430
|
-
edge_id = f
|
|
467
|
+
edge_id = f"{node_id}-->{child_id}"
|
|
431
468
|
if edge_id not in display_edge:
|
|
432
469
|
display_edge.add(edge_id)
|
|
433
470
|
content += graph.get_edge_str(edge_id)
|
|
@@ -440,7 +477,7 @@ def generate_mermaid_lineage_graph(graph: LineageGraph):
|
|
|
440
477
|
return up_to_level_content, is_not_modified, len(content) > MAX_MERMAID_TEXT_SIZE
|
|
441
478
|
|
|
442
479
|
|
|
443
|
-
def generate_markdown_summary(ctx: RecceContext, summary_format: str =
|
|
480
|
+
def generate_markdown_summary(ctx: RecceContext, summary_format: str = "markdown"):
|
|
444
481
|
lineage_diff = ctx.get_lineage_diff()
|
|
445
482
|
summary_metadata = generate_summary_metadata(lineage_diff.base, lineage_diff.current)
|
|
446
483
|
graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current)
|
|
@@ -448,81 +485,89 @@ def generate_markdown_summary(ctx: RecceContext, summary_format: str = 'markdown
|
|
|
448
485
|
mermaid_content, is_empty_graph, is_partial_graph = generate_mermaid_lineage_graph(graph)
|
|
449
486
|
check_content = generate_check_content(graph, check_statistics)
|
|
450
487
|
|
|
451
|
-
if summary_format ==
|
|
488
|
+
if summary_format == "mermaid":
|
|
452
489
|
return mermaid_content
|
|
453
|
-
elif summary_format ==
|
|
490
|
+
elif summary_format == "check":
|
|
454
491
|
return check_content
|
|
455
|
-
elif summary_format ==
|
|
492
|
+
elif summary_format == "markdown":
|
|
456
493
|
|
|
457
|
-
content =
|
|
458
|
-
content += f
|
|
494
|
+
content = "# Recce Summary\n"
|
|
495
|
+
content += f"## Manifest Information\n{summary_metadata}\n"
|
|
459
496
|
|
|
460
497
|
if is_empty_graph is False:
|
|
461
|
-
content += f
|
|
498
|
+
content += f"""
|
|
462
499
|
## Lineage Graph
|
|
463
500
|
{"_Too many nodes to generate! Please see the full lineage graph on Recce instance._" if is_partial_graph else ''}
|
|
464
501
|
```mermaid
|
|
465
502
|
{mermaid_content}
|
|
466
503
|
```
|
|
467
|
-
|
|
504
|
+
"""
|
|
468
505
|
else:
|
|
469
|
-
content +=
|
|
506
|
+
content += """
|
|
470
507
|
## Lineage Graph
|
|
471
508
|
No changed module was detected.
|
|
472
|
-
|
|
509
|
+
"""
|
|
473
510
|
if check_content:
|
|
474
511
|
content += check_content
|
|
475
512
|
|
|
476
513
|
if ctx.state_loader.cloud_mode:
|
|
477
514
|
pr_info = ctx.state_loader.pr_info
|
|
478
|
-
|
|
515
|
+
# the classic route will be deprecated soon
|
|
516
|
+
content += f"\nSee PR page: {RECCE_CLOUD_HOST}/classic/{pr_info.repository}/pulls/{pr_info.id}\n"
|
|
479
517
|
|
|
480
518
|
return content
|
|
481
519
|
|
|
482
520
|
|
|
483
521
|
def generate_check_content(graph, check_statistics):
|
|
484
522
|
from py_markdown_table.markdown_table import markdown_table
|
|
485
|
-
|
|
523
|
+
|
|
524
|
+
content = ""
|
|
486
525
|
check_content = None
|
|
487
526
|
# Generate the check summary if we found any changes
|
|
488
527
|
if len(graph.checks) > 0:
|
|
489
528
|
data = []
|
|
490
529
|
for check in graph.checks:
|
|
491
|
-
data.append(
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
530
|
+
data.append(
|
|
531
|
+
{
|
|
532
|
+
"Name": check.name,
|
|
533
|
+
"Type": str(check.type).replace("_", " ").title(),
|
|
534
|
+
"Mismatched Nodes": _generate_mismatched_nodes_summary(check),
|
|
535
|
+
# Temporarily remove the type of changes, until we implement a better way to display it.
|
|
536
|
+
# 'Type of Changes': _formate_changes(check.changes)
|
|
537
|
+
}
|
|
538
|
+
)
|
|
539
|
+
check_content = (
|
|
540
|
+
markdown_table(data)
|
|
541
|
+
.set_params(
|
|
542
|
+
quote=False,
|
|
543
|
+
row_sep="markdown",
|
|
544
|
+
padding_width=1,
|
|
545
|
+
padding_weight="right", # Aligns the cell's contents to the beginning of the cell
|
|
546
|
+
)
|
|
547
|
+
.get_markdown()
|
|
548
|
+
)
|
|
504
549
|
|
|
505
|
-
if check_statistics.get(
|
|
506
|
-
warning_message =
|
|
550
|
+
if check_statistics.get("total", 0) > 0:
|
|
551
|
+
warning_message = ""
|
|
507
552
|
statistics = {
|
|
508
|
-
|
|
509
|
-
|
|
553
|
+
"Checks Run": check_statistics.get("total", 0),
|
|
554
|
+
"Data Mismatch Detected": check_statistics.get("mismatch", 0),
|
|
510
555
|
}
|
|
511
|
-
if check_statistics.get(
|
|
512
|
-
statistics[
|
|
513
|
-
warning_message =
|
|
556
|
+
if check_statistics.get("failed", 0) > 0:
|
|
557
|
+
statistics["Incomplete Checks"] = check_statistics.get("failed", 0)
|
|
558
|
+
warning_message = """
|
|
514
559
|
:warning: **Incomplete Checks** refers to checks that did not successfully run due to configuration or SQL errors.
|
|
515
560
|
Please check the output of `recce run` for more information
|
|
516
|
-
|
|
517
|
-
check_summary = markdown_table([statistics]).set_params(quote=False, row_sep=
|
|
518
|
-
content += f
|
|
561
|
+
"""
|
|
562
|
+
check_summary = markdown_table([statistics]).set_params(quote=False, row_sep="markdown").get_markdown()
|
|
563
|
+
content += f"""
|
|
519
564
|
## Checks Summary
|
|
520
565
|
{check_summary}
|
|
521
566
|
{warning_message}
|
|
522
|
-
|
|
567
|
+
"""
|
|
523
568
|
if check_content:
|
|
524
|
-
content += f
|
|
569
|
+
content += f"""
|
|
525
570
|
### Checks of Data Mismatch Detected
|
|
526
571
|
{check_content}
|
|
527
|
-
|
|
572
|
+
"""
|
|
528
573
|
return content
|
recce/tasks/__init__.py
CHANGED
|
@@ -1,7 +1,23 @@
|
|
|
1
1
|
from .core import Task
|
|
2
2
|
from .histogram import HistogramDiffTask
|
|
3
3
|
from .profile import ProfileDiffTask, ProfileTask
|
|
4
|
-
from .query import
|
|
5
|
-
from .rowcount import
|
|
4
|
+
from .query import QueryBaseTask, QueryDiffTask, QueryTask
|
|
5
|
+
from .rowcount import RowCountDiffTask, RowCountTask
|
|
6
6
|
from .top_k import TopKDiffTask
|
|
7
|
-
from .valuediff import
|
|
7
|
+
from .valuediff import ValueDiffDetailTask, ValueDiffTask
|
|
8
|
+
|
|
9
|
+
# Explicitly declare exports
|
|
10
|
+
__all__ = [
|
|
11
|
+
"Task",
|
|
12
|
+
"HistogramDiffTask",
|
|
13
|
+
"ProfileDiffTask",
|
|
14
|
+
"ProfileTask",
|
|
15
|
+
"QueryBaseTask",
|
|
16
|
+
"QueryDiffTask",
|
|
17
|
+
"QueryTask",
|
|
18
|
+
"RowCountDiffTask",
|
|
19
|
+
"RowCountTask",
|
|
20
|
+
"TopKDiffTask",
|
|
21
|
+
"ValueDiffDetailTask",
|
|
22
|
+
"ValueDiffTask",
|
|
23
|
+
]
|