recce-nightly 1.10.0.20250625__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +5 -0
- recce/adapter/dbt_adapter/__init__.py +343 -245
- recce/apis/check_api.py +20 -14
- recce/apis/check_events_api.py +353 -0
- recce/apis/check_func.py +5 -5
- recce/apis/run_func.py +32 -3
- recce/artifact.py +76 -3
- recce/cli.py +705 -82
- recce/config.py +2 -2
- recce/connect_to_cloud.py +1 -1
- recce/core.py +3 -3
- recce/data/404/index.html +2 -0
- recce/data/404.html +2 -22
- recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
- recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
- recce/data/__next.__PAGE__.txt +6 -0
- recce/data/__next._full.txt +32 -0
- recce/data/__next._head.txt +8 -0
- recce/data/__next._index.txt +14 -0
- recce/data/__next._tree.txt +8 -0
- recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
- recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
- recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
- recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
- recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
- recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
- recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
- recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
- recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
- recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
- recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
- recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
- recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
- recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
- recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
- recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
- recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
- recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
- recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
- recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
- recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
- recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
- recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
- recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
- recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
- recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
- recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
- recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
- recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
- recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
- recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
- recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
- recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
- recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
- recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
- recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
- recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-cyrillic-800-normal.bd5c9f50.woff → montserrat-cyrillic-800-normal.f9d58125.woff} +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-latin-800-normal.fc315020.woff → montserrat-latin-800-normal.d5761935.woff} +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-latin-ext-800-normal.2e5381b2.woff → montserrat-latin-ext-800-normal.b671449b.woff} +0 -0
- recce/data/_next/static/media/{montserrat-vietnamese-800-normal.20c545e6.woff → montserrat-vietnamese-800-normal.9f7b8541.woff} +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
- recce/data/_not-found/__next._full.txt +24 -0
- recce/data/_not-found/__next._head.txt +8 -0
- recce/data/_not-found/__next._index.txt +13 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +6 -0
- recce/data/_not-found/index.html +2 -0
- recce/data/_not-found/index.txt +24 -0
- recce/data/auth_callback.html +1 -1
- recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/checks/__next._full.txt +39 -0
- recce/data/checks/__next._head.txt +8 -0
- recce/data/checks/__next._index.txt +14 -0
- recce/data/checks/__next._tree.txt +8 -0
- recce/data/checks/__next.checks.__PAGE__.txt +10 -0
- recce/data/checks/__next.checks.txt +4 -0
- recce/data/checks/index.html +2 -0
- recce/data/checks/index.txt +39 -0
- recce/data/index.html +2 -27
- recce/data/index.txt +32 -8
- recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/lineage/__next._full.txt +39 -0
- recce/data/lineage/__next._head.txt +8 -0
- recce/data/lineage/__next._index.txt +14 -0
- recce/data/lineage/__next._tree.txt +8 -0
- recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
- recce/data/lineage/__next.lineage.txt +4 -0
- recce/data/lineage/index.html +2 -0
- recce/data/lineage/index.txt +39 -0
- recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/query/__next._full.txt +37 -0
- recce/data/query/__next._head.txt +8 -0
- recce/data/query/__next._index.txt +14 -0
- recce/data/query/__next._tree.txt +8 -0
- recce/data/query/__next.query.__PAGE__.txt +9 -0
- recce/data/query/__next.query.txt +4 -0
- recce/data/query/index.html +2 -0
- recce/data/query/index.txt +37 -0
- recce/event/CONFIG.bak +1 -0
- recce/event/__init__.py +9 -8
- recce/event/collector.py +6 -2
- recce/event/track.py +10 -0
- recce/github.py +1 -1
- recce/mcp_server.py +725 -0
- recce/models/check.py +433 -15
- recce/models/types.py +61 -2
- recce/pull_request.py +1 -1
- recce/run.py +37 -17
- recce/server.py +216 -21
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +644 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +25 -3
- recce/tasks/dataframe.py +63 -1
- recce/tasks/query.py +40 -3
- recce/tasks/rowcount.py +4 -1
- recce/tasks/schema.py +4 -1
- recce/tasks/utils.py +147 -0
- recce/tasks/valuediff.py +85 -57
- recce/util/api_token.py +11 -2
- recce/util/breaking.py +10 -1
- recce/util/cll.py +1 -2
- recce/util/cloud/__init__.py +15 -0
- recce/util/cloud/base.py +115 -0
- recce/util/cloud/check_events.py +190 -0
- recce/util/cloud/checks.py +242 -0
- recce/util/io.py +2 -2
- recce/util/lineage.py +19 -18
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +254 -5
- recce/util/startup_perf.py +121 -0
- recce/yaml/__init__.py +2 -2
- {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/METADATA +91 -71
- recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
- {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
- recce/data/_next/static/abCX3x3UoIdRLEDWxx4xd/_buildManifest.js +0 -1
- recce/data/_next/static/chunks/181-acc61ddada3bc0ca.js +0 -43
- recce/data/_next/static/chunks/1bff33f1-1ef85cf5e658a751.js +0 -1
- recce/data/_next/static/chunks/217-879a84d70f7a907c.js +0 -2
- recce/data/_next/static/chunks/29e3cc0d-60045b2e47aa3916.js +0 -1
- recce/data/_next/static/chunks/36e1c10d-8e7be4a6c1f6ab2d.js +0 -1
- recce/data/_next/static/chunks/3998a672-03adacad07b346ac.js +0 -1
- recce/data/_next/static/chunks/3a92ee20-1081c360214f9602.js +0 -1
- recce/data/_next/static/chunks/42-cd3c06533f5fd47c.js +0 -9
- recce/data/_next/static/chunks/450c323b-fd94e7ffaa4a5efa.js +0 -1
- recce/data/_next/static/chunks/47d8844f-929aed9b1c73a905.js +0 -1
- recce/data/_next/static/chunks/608-3b079b544e5d5f5e.js +0 -15
- recce/data/_next/static/chunks/6dc81886-adbfa45836061d79.js +0 -1
- recce/data/_next/static/chunks/7a8a3e83-edf6dc64b5d5f0a5.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-d5f0438edd5c2a5b.js +0 -1
- recce/data/_next/static/chunks/86730205-cfb14e3f051bab35.js +0 -1
- recce/data/_next/static/chunks/8d700b6a.8bb140898499c512.js +0 -1
- recce/data/_next/static/chunks/92-607cd1af83c41f43.js +0 -1
- recce/data/_next/static/chunks/9746af58-a42b7d169cacadf0.js +0 -1
- recce/data/_next/static/chunks/a30376cd-de84559016d7e133.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-01ed58b7f971d311.js +0 -1
- recce/data/_next/static/chunks/app/layout-177a410a97e0d018.js +0 -1
- recce/data/_next/static/chunks/app/page-da6e046a8235dbfc.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-4282bdcf459e075c.js +0 -1
- recce/data/_next/static/chunks/bbda5537-9ec25eb1dd62348a.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-08cb668a789d6afd.js +0 -1
- recce/data/_next/static/chunks/ce84277d-2e5d1d46910cf052.js +0 -1
- recce/data/_next/static/chunks/febdd86e-c6b525341634b860.js +0 -54
- recce/data/_next/static/chunks/fee69bc6-2dbccaf9b90474e6.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-app-39061b0166c47f55.js +0 -1
- recce/data/_next/static/chunks/main-b5b3ae20a1405261.js +0 -1
- recce/data/_next/static/chunks/pages/_app-437c455677d62394.js +0 -1
- recce/data/_next/static/chunks/pages/_error-e7650df18ca04bde.js +0 -1
- recce/data/_next/static/chunks/webpack-7b49d5ba7e3a434d.js +0 -1
- recce/data/_next/static/css/17a96168e3a9db13.css +0 -1
- recce/data/_next/static/css/1b121dc4d36aeb4d.css +0 -3
- recce/data/_next/static/css/35c6679a098e1e34.css +0 -1
- recce/data/_next/static/css/951e2e0eea2d4a5b.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/data/_next/static/media/reload-image.79aabb7d.svg +0 -4
- recce/state.py +0 -786
- recce_nightly-1.10.0.20250625.dist-info/RECORD +0 -154
- recce_nightly-1.10.0.20250625.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/conftest.py +0 -17
- tests/adapter/dbt_adapter/dbt_test_helper.py +0 -298
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -25
- tests/adapter/dbt_adapter/test_dbt_cll.py +0 -384
- tests/adapter/dbt_adapter/test_selector.py +0 -202
- tests/tasks/__init__.py +0 -0
- tests/tasks/conftest.py +0 -4
- tests/tasks/test_histogram.py +0 -129
- tests/tasks/test_lineage.py +0 -55
- tests/tasks/test_preset_checks.py +0 -64
- tests/tasks/test_profile.py +0 -397
- tests/tasks/test_query.py +0 -151
- tests/tasks/test_row_count.py +0 -135
- tests/tasks/test_schema.py +0 -122
- tests/tasks/test_top_k.py +0 -77
- tests/tasks/test_valuediff.py +0 -85
- tests/test_cli.py +0 -133
- tests/test_config.py +0 -43
- tests/test_connect_to_cloud.py +0 -82
- tests/test_core.py +0 -29
- tests/test_dbt.py +0 -36
- tests/test_pull_request.py +0 -130
- tests/test_server.py +0 -104
- tests/test_state.py +0 -134
- tests/test_summary.py +0 -65
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- /recce/data/_next/static/media/{montserrat-cyrillic-ext-800-normal.e6e0d8d0.woff → montserrat-cyrillic-ext-800-normal.a4fa76b5.woff} +0 -0
- /recce/data/_next/static/{abCX3x3UoIdRLEDWxx4xd → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
- {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
recce/state/local.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
from .state import RecceState
|
|
6
|
+
from .state_loader import RecceStateLoader
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger("uvicorn")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FileStateLoader(RecceStateLoader):
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
review_mode: bool = False,
|
|
15
|
+
state_file: Optional[str] = None,
|
|
16
|
+
initial_state: Optional[RecceState] = None,
|
|
17
|
+
):
|
|
18
|
+
super().__init__(review_mode=review_mode, state_file=state_file, initial_state=initial_state)
|
|
19
|
+
|
|
20
|
+
def verify(self) -> bool:
|
|
21
|
+
if self.review_mode is True and self.state_file is None:
|
|
22
|
+
self.error_message = "Recce can not launch without a state file."
|
|
23
|
+
self.hint_message = "Please provide a state file in the command argument."
|
|
24
|
+
return False
|
|
25
|
+
return True
|
|
26
|
+
|
|
27
|
+
def _load_state(self) -> Tuple[RecceState, str]:
|
|
28
|
+
state = RecceState.from_file(self.state_file) if self.state_file else None
|
|
29
|
+
state_tag = None
|
|
30
|
+
return state, state_tag
|
|
31
|
+
|
|
32
|
+
def _export_state(self, state: RecceState = None) -> Tuple[Union[str, None], str]:
|
|
33
|
+
"""
|
|
34
|
+
Store the state to a file. Store happens when terminating the server or run instance.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
if self.state_file is None:
|
|
38
|
+
return "No state file is provided. Skip storing the state.", None
|
|
39
|
+
|
|
40
|
+
logger.info(f"Store recce state to '{self.state_file}'")
|
|
41
|
+
message = self._export_state_to_file(self.state_file)
|
|
42
|
+
tag = None
|
|
43
|
+
|
|
44
|
+
return message, tag
|
|
45
|
+
|
|
46
|
+
def purge(self) -> bool:
|
|
47
|
+
if self.state_file is not None:
|
|
48
|
+
try:
|
|
49
|
+
os.remove(self.state_file)
|
|
50
|
+
return True
|
|
51
|
+
except Exception as e:
|
|
52
|
+
self.error_message = f"Failed to remove the state file: {e}"
|
|
53
|
+
return False
|
|
54
|
+
else:
|
|
55
|
+
self.error_message = "No state file is provided. Skip removing the state file."
|
|
56
|
+
return False
|
recce/state/state.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Define the type to serialize/de-serialize the state of the recce instance."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
from recce import get_version
|
|
11
|
+
from recce.exceptions import RecceException
|
|
12
|
+
from recce.git import current_branch
|
|
13
|
+
from recce.models.types import Check, Run
|
|
14
|
+
from recce.pull_request import PullRequestInfo
|
|
15
|
+
from recce.util.io import SupportedFileTypes, file_io_factory
|
|
16
|
+
from recce.util.pydantic_model import pydantic_model_dump, pydantic_model_json_dump
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("uvicorn")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GitRepoInfo(BaseModel):
|
|
22
|
+
branch: Optional[str] = None
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def from_current_repository() -> Optional["GitRepoInfo"]:
|
|
26
|
+
branch = current_branch()
|
|
27
|
+
if branch is None:
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
return GitRepoInfo(branch=branch)
|
|
31
|
+
|
|
32
|
+
def to_dict(self):
|
|
33
|
+
return pydantic_model_dump(self)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class RecceStateMetadata(BaseModel):
|
|
37
|
+
schema_version: str = "v0"
|
|
38
|
+
recce_version: str = Field(default_factory=lambda: get_version())
|
|
39
|
+
generated_at: str = Field(default_factory=lambda: datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ArtifactsRoot(BaseModel):
|
|
43
|
+
"""
|
|
44
|
+
Root of the artifacts.
|
|
45
|
+
|
|
46
|
+
base: artifacts of the base env. key is file name, value is dict
|
|
47
|
+
current: artifacts of the current env. key is file name, value is dict
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
base: Dict[str, Optional[dict]] = {}
|
|
51
|
+
current: Dict[str, Optional[dict]] = {}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class RecceState(BaseModel):
|
|
55
|
+
metadata: Optional[RecceStateMetadata] = None
|
|
56
|
+
runs: Optional[List[Run]] = Field(default_factory=list)
|
|
57
|
+
checks: Optional[List[Check]] = Field(default_factory=list)
|
|
58
|
+
artifacts: ArtifactsRoot = ArtifactsRoot(base={}, current={})
|
|
59
|
+
git: Optional[GitRepoInfo] = None
|
|
60
|
+
pull_request: Optional[PullRequestInfo] = None
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def from_json(json_content: str):
|
|
64
|
+
dict_data = json.loads(json_content)
|
|
65
|
+
state = RecceState(**dict_data)
|
|
66
|
+
metadata = state.metadata
|
|
67
|
+
|
|
68
|
+
if metadata:
|
|
69
|
+
if metadata.schema_version is None:
|
|
70
|
+
pass
|
|
71
|
+
if metadata.schema_version == "v0":
|
|
72
|
+
pass
|
|
73
|
+
else:
|
|
74
|
+
raise RecceException(f"Unsupported state file version: {metadata.schema_version}")
|
|
75
|
+
return state
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
def from_file(file_path: str, file_type: SupportedFileTypes = SupportedFileTypes.FILE):
|
|
79
|
+
"""
|
|
80
|
+
Load the state from a recce state file.
|
|
81
|
+
"""
|
|
82
|
+
from pathlib import Path
|
|
83
|
+
|
|
84
|
+
logger.debug(f"Load state file from: '{file_path}'")
|
|
85
|
+
if not Path(file_path).is_file():
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
io = file_io_factory(file_type)
|
|
89
|
+
json_content = io.read(file_path)
|
|
90
|
+
return RecceState.from_json(json_content)
|
|
91
|
+
|
|
92
|
+
def to_json(self):
|
|
93
|
+
return pydantic_model_json_dump(self)
|
|
94
|
+
|
|
95
|
+
def to_file(self, file_path: str, file_type: SupportedFileTypes = SupportedFileTypes.FILE):
|
|
96
|
+
|
|
97
|
+
json_data = self.to_json()
|
|
98
|
+
io = file_io_factory(file_type)
|
|
99
|
+
|
|
100
|
+
io.write(file_path, json_data)
|
|
101
|
+
return f"The state file is stored at '{file_path}'"
|
|
102
|
+
|
|
103
|
+
def _merge_run(self, run: Run):
|
|
104
|
+
for r in self.runs:
|
|
105
|
+
if r.run_id == run.run_id:
|
|
106
|
+
break
|
|
107
|
+
else:
|
|
108
|
+
self.runs.append(run)
|
|
109
|
+
|
|
110
|
+
def _merge_check(self, check: Check):
|
|
111
|
+
for c in self.checks:
|
|
112
|
+
if c.check_id == check.check_id:
|
|
113
|
+
c.merge(check)
|
|
114
|
+
break
|
|
115
|
+
else:
|
|
116
|
+
self.checks.append(check)
|
|
117
|
+
|
|
118
|
+
def _merge_artifacts(self, artifacts: ArtifactsRoot):
|
|
119
|
+
self.artifacts.merge(artifacts)
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import Dict, Literal, Optional, Tuple, Union, final
|
|
6
|
+
|
|
7
|
+
from recce.exceptions import RecceException
|
|
8
|
+
from recce.pull_request import fetch_pr_metadata
|
|
9
|
+
|
|
10
|
+
from ..util.io import SupportedFileTypes, file_io_factory
|
|
11
|
+
from .const import RECCE_API_TOKEN_MISSING
|
|
12
|
+
from .state import RecceState
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger("uvicorn")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RecceStateLoader(ABC):
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
review_mode: bool = False,
|
|
21
|
+
cloud_mode: bool = False,
|
|
22
|
+
state_file: Optional[str] = None,
|
|
23
|
+
cloud_options: Optional[Dict[str, str]] = None,
|
|
24
|
+
initial_state: Optional[RecceState] = None,
|
|
25
|
+
):
|
|
26
|
+
self.review_mode = review_mode
|
|
27
|
+
self.cloud_mode = cloud_mode
|
|
28
|
+
self.state_file = state_file
|
|
29
|
+
self.cloud_options = cloud_options or {}
|
|
30
|
+
self.error_message = None
|
|
31
|
+
self.hint_message = None
|
|
32
|
+
self.state: RecceState | None = initial_state
|
|
33
|
+
self.state_lock = threading.Lock()
|
|
34
|
+
self.state_etag = None
|
|
35
|
+
self.pr_info = None
|
|
36
|
+
self.catalog: Literal["github", "preview", "session"] = "github"
|
|
37
|
+
self.share_id = None
|
|
38
|
+
self.session_id = None
|
|
39
|
+
|
|
40
|
+
if self.cloud_mode:
|
|
41
|
+
if self.cloud_options.get("github_token"):
|
|
42
|
+
self.catalog = "github"
|
|
43
|
+
self.pr_info = fetch_pr_metadata(
|
|
44
|
+
cloud=self.cloud_mode, github_token=self.cloud_options.get("github_token")
|
|
45
|
+
)
|
|
46
|
+
if self.pr_info.id is None:
|
|
47
|
+
raise RecceException("Cannot get the pull request information from GitHub.")
|
|
48
|
+
elif self.cloud_options.get("api_token"):
|
|
49
|
+
if self.cloud_options.get("session_id"):
|
|
50
|
+
self.catalog = "session"
|
|
51
|
+
self.session_id = self.cloud_options.get("session_id")
|
|
52
|
+
else:
|
|
53
|
+
self.catalog = "preview"
|
|
54
|
+
self.share_id = self.cloud_options.get("share_id")
|
|
55
|
+
else:
|
|
56
|
+
raise RecceException(RECCE_API_TOKEN_MISSING.error_message)
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def token(self):
|
|
60
|
+
return self.cloud_options.get("github_token") or self.cloud_options.get("api_token")
|
|
61
|
+
|
|
62
|
+
@abstractmethod
|
|
63
|
+
def verify(self) -> bool:
|
|
64
|
+
"""
|
|
65
|
+
Verify the state loader configuration.
|
|
66
|
+
Returns:
|
|
67
|
+
bool: True if the configuration is valid, False otherwise.
|
|
68
|
+
"""
|
|
69
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def error_and_hint(self) -> (Union[str, None], Union[str, None]):
|
|
73
|
+
return self.error_message, self.hint_message
|
|
74
|
+
|
|
75
|
+
def update(self, state: RecceState):
|
|
76
|
+
self.state = state
|
|
77
|
+
|
|
78
|
+
@final
|
|
79
|
+
def load(self, refresh=False) -> RecceState:
|
|
80
|
+
if self.state is not None and refresh is False:
|
|
81
|
+
return self.state
|
|
82
|
+
self.state_lock.acquire()
|
|
83
|
+
try:
|
|
84
|
+
self.state, self.state_etag = self._load_state()
|
|
85
|
+
finally:
|
|
86
|
+
self.state_lock.release()
|
|
87
|
+
return self.state
|
|
88
|
+
|
|
89
|
+
@abstractmethod
|
|
90
|
+
def _load_state(self) -> Tuple[RecceState, str]:
|
|
91
|
+
"""
|
|
92
|
+
Load the state from the specified source (file or cloud).
|
|
93
|
+
Returns:
|
|
94
|
+
RecceState: The loaded state object.
|
|
95
|
+
str: The etag of the state file (if applicable).
|
|
96
|
+
"""
|
|
97
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
|
98
|
+
|
|
99
|
+
def save_as(self, state_file: str, state: RecceState = None):
|
|
100
|
+
if self.cloud_mode:
|
|
101
|
+
raise Exception("Cannot save the state to Recce Cloud.")
|
|
102
|
+
|
|
103
|
+
self.state_file = state_file
|
|
104
|
+
self.export(state)
|
|
105
|
+
|
|
106
|
+
@final
|
|
107
|
+
def export(self, state: RecceState = None) -> Union[str, None]:
|
|
108
|
+
if state is not None:
|
|
109
|
+
self.update(state)
|
|
110
|
+
|
|
111
|
+
start_time = time.time()
|
|
112
|
+
self.state_lock.acquire()
|
|
113
|
+
try:
|
|
114
|
+
message, state_etag = self._export_state()
|
|
115
|
+
self.state_etag = state_etag
|
|
116
|
+
end_time = time.time()
|
|
117
|
+
elapsed_time = end_time - start_time
|
|
118
|
+
finally:
|
|
119
|
+
self.state_lock.release()
|
|
120
|
+
logger.info(f"Store state completed in {elapsed_time:.2f} seconds")
|
|
121
|
+
return message
|
|
122
|
+
|
|
123
|
+
@abstractmethod
|
|
124
|
+
def _export_state(self) -> Tuple[Union[str, None], str]:
|
|
125
|
+
"""
|
|
126
|
+
Export the current Recce state to a file or cloud storage.
|
|
127
|
+
Returns:
|
|
128
|
+
str: A message indicating the result of the export operation.
|
|
129
|
+
str: The etag of the exported state file (if applicable).
|
|
130
|
+
"""
|
|
131
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
|
132
|
+
|
|
133
|
+
def _export_state_to_file(self, file_path: str, file_type: SupportedFileTypes = SupportedFileTypes.FILE) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Store the state to a file. Store happens when terminating the server or run instance.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
json_data = self.state.to_json()
|
|
139
|
+
io = file_io_factory(file_type)
|
|
140
|
+
|
|
141
|
+
io.write(file_path, json_data)
|
|
142
|
+
return f"The state file is stored at '{file_path}'"
|
|
143
|
+
|
|
144
|
+
def refresh(self):
|
|
145
|
+
new_state = self.load(refresh=True)
|
|
146
|
+
return new_state
|
|
147
|
+
|
|
148
|
+
def check_conflict(self) -> bool:
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
def info(self) -> dict:
|
|
152
|
+
if self.state is None:
|
|
153
|
+
self.error_message = "No state is loaded."
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
state_info = {
|
|
157
|
+
"mode": "cloud" if self.cloud_mode else "local",
|
|
158
|
+
"source": None,
|
|
159
|
+
}
|
|
160
|
+
if self.cloud_mode:
|
|
161
|
+
state_info["source"] = "Recce Cloud"
|
|
162
|
+
state_info["pull_request"] = self.pr_info
|
|
163
|
+
else:
|
|
164
|
+
state_info["source"] = self.state_file
|
|
165
|
+
return state_info
|
|
166
|
+
|
|
167
|
+
@abstractmethod
|
|
168
|
+
def purge(self) -> bool:
|
|
169
|
+
"""
|
|
170
|
+
Purge the state file or cloud storage.
|
|
171
|
+
Returns:
|
|
172
|
+
bool: True if the purge was successful, False otherwise.
|
|
173
|
+
"""
|
|
174
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
recce/summary.py
CHANGED
|
@@ -271,11 +271,24 @@ class LineageGraph:
|
|
|
271
271
|
def _build_lineage_graph(base, current) -> LineageGraph:
|
|
272
272
|
graph = LineageGraph()
|
|
273
273
|
|
|
274
|
+
# Get the current package name to filter nodes (from the current manifest metadata)
|
|
275
|
+
package_name = None
|
|
276
|
+
manifest_metadata = current.get("manifest_metadata")
|
|
277
|
+
if manifest_metadata and hasattr(manifest_metadata, "project_name"):
|
|
278
|
+
# The default package name is the project name
|
|
279
|
+
package_name = manifest_metadata.project_name
|
|
280
|
+
|
|
274
281
|
# Init Graph nodes with base & current nodes
|
|
275
282
|
for node_id, node_data in base.get("nodes", {}).items():
|
|
283
|
+
# Skip nodes that are not from the current package
|
|
284
|
+
if package_name and node_data.get("package_name") != package_name:
|
|
285
|
+
continue
|
|
276
286
|
graph.create_node(node_id, node_data, "base")
|
|
277
287
|
|
|
278
288
|
for node_id, node_data in current.get("nodes", {}).items():
|
|
289
|
+
# Skip nodes that are not from the current package
|
|
290
|
+
if package_name and node_data.get("package_name") != package_name:
|
|
291
|
+
continue
|
|
279
292
|
if node_id not in graph.nodes:
|
|
280
293
|
node = Node(node_id, node_data, "current")
|
|
281
294
|
graph.nodes[node_id] = node
|
|
@@ -286,9 +299,15 @@ def _build_lineage_graph(base, current) -> LineageGraph:
|
|
|
286
299
|
# Build edges
|
|
287
300
|
for child_id, parents in base.get("parent_map", {}).items():
|
|
288
301
|
for parent_id in parents:
|
|
302
|
+
if child_id not in graph.nodes or parent_id not in graph.nodes:
|
|
303
|
+
continue
|
|
304
|
+
|
|
289
305
|
graph.create_edge(parent_id, child_id, "base")
|
|
290
306
|
for child_id, parents in current.get("parent_map", {}).items():
|
|
291
307
|
for parent_id in parents:
|
|
308
|
+
if child_id not in graph.nodes or parent_id not in graph.nodes:
|
|
309
|
+
continue
|
|
310
|
+
|
|
292
311
|
graph.create_edge(parent_id, child_id, "current")
|
|
293
312
|
|
|
294
313
|
return graph
|
|
@@ -301,7 +320,8 @@ def _build_node_schema(lineage, node_id):
|
|
|
301
320
|
def _get_node_row_count_diff(node_id, node_name):
|
|
302
321
|
row_count_runs = RunDAO().list(type_filter=RunType.ROW_COUNT_DIFF)
|
|
303
322
|
for run in row_count_runs:
|
|
304
|
-
|
|
323
|
+
node_ids = (run.params or {}).get("node_ids") or []
|
|
324
|
+
if node_id in node_ids:
|
|
305
325
|
result = run.result.get(node_name, {})
|
|
306
326
|
diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
|
|
307
327
|
return diff, result
|
|
@@ -491,9 +511,11 @@ No changed module was detected.
|
|
|
491
511
|
if check_content:
|
|
492
512
|
content += check_content
|
|
493
513
|
|
|
494
|
-
if ctx.state_loader.cloud_mode:
|
|
514
|
+
if ctx.state_loader.cloud_mode and ctx.state_loader.pr_info is not None:
|
|
495
515
|
pr_info = ctx.state_loader.pr_info
|
|
496
|
-
|
|
516
|
+
if pr_info.repository is not None and pr_info.id is not None:
|
|
517
|
+
# the classic route will be deprecated soon
|
|
518
|
+
content += f"\nSee PR page: {RECCE_CLOUD_HOST}/classic/{pr_info.repository}/pulls/{pr_info.id}\n"
|
|
497
519
|
|
|
498
520
|
return content
|
|
499
521
|
|
recce/tasks/dataframe.py
CHANGED
|
@@ -19,11 +19,34 @@ class DataFrameColumnType(Enum):
|
|
|
19
19
|
TIMEDELTA = "timedelta"
|
|
20
20
|
UNKNOWN = "unknown"
|
|
21
21
|
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_string(cls, type_str: str) -> "DataFrameColumnType":
|
|
24
|
+
"""Convert string to DataFrameColumnType enum.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
type_str: String representation of the type (e.g., "integer", "text")
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
DataFrameColumnType enum value
|
|
31
|
+
"""
|
|
32
|
+
type_str = type_str.lower().strip()
|
|
33
|
+
try:
|
|
34
|
+
return cls(type_str)
|
|
35
|
+
except ValueError:
|
|
36
|
+
return cls.UNKNOWN
|
|
37
|
+
|
|
22
38
|
|
|
23
39
|
class DataFrameColumn(BaseModel):
|
|
40
|
+
key: t.Optional[str] = None
|
|
24
41
|
name: str
|
|
25
42
|
type: DataFrameColumnType
|
|
26
43
|
|
|
44
|
+
def __init__(self, **data):
|
|
45
|
+
"""Initialize DataFrameColumn, auto-setting key=name if key is missing."""
|
|
46
|
+
if "key" not in data or data["key"] is None:
|
|
47
|
+
data["key"] = data.get("name")
|
|
48
|
+
super().__init__(**data)
|
|
49
|
+
|
|
27
50
|
|
|
28
51
|
class DataFrame(BaseModel):
|
|
29
52
|
columns: t.List[DataFrameColumn]
|
|
@@ -64,7 +87,7 @@ class DataFrame(BaseModel):
|
|
|
64
87
|
col_type = DataFrameColumnType.INTEGER
|
|
65
88
|
else:
|
|
66
89
|
col_type = DataFrameColumnType.UNKNOWN
|
|
67
|
-
columns.append(DataFrameColumn(name=col_name, type=col_type))
|
|
90
|
+
columns.append(DataFrameColumn(key=col_name, name=col_name, type=col_type))
|
|
68
91
|
|
|
69
92
|
def _row_values(row):
|
|
70
93
|
# If the value is Decimal, check if it's finite. If not, convert it to float(xxx) (GitHub issue #476)
|
|
@@ -106,3 +129,42 @@ class DataFrame(BaseModel):
|
|
|
106
129
|
more=more,
|
|
107
130
|
)
|
|
108
131
|
return df
|
|
132
|
+
|
|
133
|
+
@staticmethod
|
|
134
|
+
def from_data(
|
|
135
|
+
columns: t.Dict[str, str],
|
|
136
|
+
data: t.List[tuple],
|
|
137
|
+
limit: t.Optional[int] = None,
|
|
138
|
+
more: t.Optional[bool] = None,
|
|
139
|
+
):
|
|
140
|
+
"""Create a DataFrame from columns and data directly.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
columns: Dict defining the schema where keys are column names and values are type strings.
|
|
144
|
+
Type strings can be: "number", "integer", "text", "boolean", "date", "datetime", "timedelta"
|
|
145
|
+
data: List of rows (each row is a list/tuple/sequence of values)
|
|
146
|
+
limit: Optional limit on the number of rows returned
|
|
147
|
+
more: Optional flag indicating whether there are more rows to fetch
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
DataFrame instance
|
|
151
|
+
|
|
152
|
+
Examples:
|
|
153
|
+
# Using simple dict format
|
|
154
|
+
columns = {"idx": "integer", "name": "text", "impacted": "boolean"}
|
|
155
|
+
data = [[0, "model_a", True], [1, "model_b", False]]
|
|
156
|
+
df = DataFrame.from_data(columns, data)
|
|
157
|
+
"""
|
|
158
|
+
# Convert dict columns to DataFrameColumn objects
|
|
159
|
+
processed_columns = []
|
|
160
|
+
for key, type_str in columns.items():
|
|
161
|
+
col_type = DataFrameColumnType.from_string(type_str)
|
|
162
|
+
processed_columns.append(DataFrameColumn(key=key, name=key, type=col_type))
|
|
163
|
+
|
|
164
|
+
df = DataFrame(
|
|
165
|
+
columns=processed_columns,
|
|
166
|
+
data=data,
|
|
167
|
+
limit=limit,
|
|
168
|
+
more=more,
|
|
169
|
+
)
|
|
170
|
+
return df
|
recce/tasks/query.py
CHANGED
|
@@ -8,6 +8,7 @@ from ..exceptions import RecceException
|
|
|
8
8
|
from ..models import Check
|
|
9
9
|
from .core import CheckValidator, Task, TaskResultDiffer
|
|
10
10
|
from .dataframe import DataFrame
|
|
11
|
+
from .utils import normalize_boolean_flag_columns, normalize_keys_to_columns
|
|
11
12
|
from .valuediff import ValueDiffMixin
|
|
12
13
|
|
|
13
14
|
QUERY_LIMIT = 2000
|
|
@@ -147,6 +148,10 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
|
|
|
147
148
|
base_sql_template: Optional[str] = None,
|
|
148
149
|
preview_change: bool = False,
|
|
149
150
|
):
|
|
151
|
+
"""
|
|
152
|
+
Execute diff queries on base and current environments without join.
|
|
153
|
+
Note: Mutates self.params.primary_keys to normalize values with actual column keys.
|
|
154
|
+
"""
|
|
150
155
|
limit = QUERY_LIMIT
|
|
151
156
|
|
|
152
157
|
self.connection = dbt_adapter.get_thread_connection()
|
|
@@ -159,9 +164,17 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
|
|
|
159
164
|
current, current_more = self.execute_sql_with_limit(sql_template, base=False, limit=limit)
|
|
160
165
|
self.check_cancel()
|
|
161
166
|
|
|
167
|
+
base_df = DataFrame.from_agate(base, limit=limit, more=base_more)
|
|
168
|
+
current_df = DataFrame.from_agate(current, limit=limit, more=current_more)
|
|
169
|
+
|
|
170
|
+
# Normalize primary_keys if present (for non-join diff, use current columns as reference)
|
|
171
|
+
if self.params.primary_keys:
|
|
172
|
+
column_keys = [col.key for col in current_df.columns]
|
|
173
|
+
self.params.primary_keys = normalize_keys_to_columns(self.params.primary_keys, column_keys)
|
|
174
|
+
|
|
162
175
|
return QueryDiffResult(
|
|
163
|
-
base=
|
|
164
|
-
current=
|
|
176
|
+
base=base_df,
|
|
177
|
+
current=current_df,
|
|
165
178
|
)
|
|
166
179
|
|
|
167
180
|
def _query_diff_join(
|
|
@@ -172,6 +185,22 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
|
|
|
172
185
|
base_sql_template: Optional[str] = None,
|
|
173
186
|
preview_change: bool = False,
|
|
174
187
|
):
|
|
188
|
+
"""
|
|
189
|
+
Execute diff queries on base and current environments using SQL join operations.
|
|
190
|
+
This method performs a set-based diff using INTERSECT and EXCEPT operations
|
|
191
|
+
to identify rows that differ between base and current query results.
|
|
192
|
+
|
|
193
|
+
Note: Mutates self.params.primary_keys to normalize values with actual column keys.
|
|
194
|
+
|
|
195
|
+
:param dbt_adapter: The dbt adapter instance for executing SQL
|
|
196
|
+
:param sql_template: SQL template to execute on the current environment
|
|
197
|
+
:param primary_keys: List of column names to use as primary keys for ordering
|
|
198
|
+
:param base_sql_template: Optional SQL template for the base environment.
|
|
199
|
+
If None, sql_template is used for both environments.
|
|
200
|
+
:param preview_change: If True, run base_sql_template against current environment
|
|
201
|
+
instead of base environment
|
|
202
|
+
:return: QueryDiffResult containing the diff DataFrame with in_a/in_b flags
|
|
203
|
+
"""
|
|
175
204
|
|
|
176
205
|
query_template = r"""
|
|
177
206
|
with a_query as (
|
|
@@ -251,7 +280,15 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
|
|
|
251
280
|
_, table = dbt_adapter.execute(sql, fetch=True)
|
|
252
281
|
self.check_cancel()
|
|
253
282
|
|
|
254
|
-
|
|
283
|
+
diff_df = DataFrame.from_agate(table)
|
|
284
|
+
# Normalize in_a/in_b columns to lowercase for cross-warehouse consistency
|
|
285
|
+
diff_df = normalize_boolean_flag_columns(diff_df)
|
|
286
|
+
|
|
287
|
+
# Normalize primary_keys to match actual column keys from warehouse
|
|
288
|
+
column_keys = [col.key for col in diff_df.columns]
|
|
289
|
+
self.params.primary_keys = normalize_keys_to_columns(primary_keys, column_keys)
|
|
290
|
+
|
|
291
|
+
return QueryDiffResult(diff=diff_df)
|
|
255
292
|
|
|
256
293
|
@staticmethod
|
|
257
294
|
def _select_single_model(model_name):
|
recce/tasks/rowcount.py
CHANGED
|
@@ -263,7 +263,10 @@ class RowCountDiffResultDiffer(TaskResultDiffer):
|
|
|
263
263
|
|
|
264
264
|
def _get_changed_nodes(self) -> Union[List[str], None]:
|
|
265
265
|
if self.changes:
|
|
266
|
-
|
|
266
|
+
# Both affected_root_keys of deepdiff v7 (OrderedSet) and v8 (SetOrdered) are iterable
|
|
267
|
+
# Convert to list directly
|
|
268
|
+
return list(self.changes.affected_root_keys)
|
|
269
|
+
return None
|
|
267
270
|
|
|
268
271
|
|
|
269
272
|
class RowCountDiffCheckValidator(CheckValidator):
|
recce/tasks/schema.py
CHANGED
|
@@ -45,7 +45,10 @@ class SchemaDiffResultDiffer:
|
|
|
45
45
|
|
|
46
46
|
def _get_changed_nodes(self) -> Union[List[str], None]:
|
|
47
47
|
if self.changes:
|
|
48
|
-
|
|
48
|
+
# Both affected_root_keys of deepdiff v7 (OrderedSet) and v8 (SetOrdered) are iterable
|
|
49
|
+
# Convert to list directly
|
|
50
|
+
return list(self.changes.affected_root_keys)
|
|
51
|
+
return None
|
|
49
52
|
|
|
50
53
|
|
|
51
54
|
class SchemaDiffParams(BaseModel):
|