recce-nightly 1.10.0.20250625__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (229) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +5 -0
  3. recce/adapter/dbt_adapter/__init__.py +343 -245
  4. recce/apis/check_api.py +20 -14
  5. recce/apis/check_events_api.py +353 -0
  6. recce/apis/check_func.py +5 -5
  7. recce/apis/run_func.py +32 -3
  8. recce/artifact.py +76 -3
  9. recce/cli.py +705 -82
  10. recce/config.py +2 -2
  11. recce/connect_to_cloud.py +1 -1
  12. recce/core.py +3 -3
  13. recce/data/404/index.html +2 -0
  14. recce/data/404.html +2 -22
  15. recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
  16. recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
  17. recce/data/__next.__PAGE__.txt +6 -0
  18. recce/data/__next._full.txt +32 -0
  19. recce/data/__next._head.txt +8 -0
  20. recce/data/__next._index.txt +14 -0
  21. recce/data/__next._tree.txt +8 -0
  22. recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
  23. recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
  24. recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
  25. recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
  26. recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
  27. recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
  28. recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
  29. recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
  30. recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
  31. recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
  32. recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
  33. recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
  34. recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
  35. recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
  36. recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
  37. recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
  38. recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
  39. recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
  40. recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
  41. recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
  42. recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
  43. recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
  44. recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
  45. recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
  46. recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
  47. recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
  48. recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
  49. recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
  50. recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
  51. recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
  52. recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
  53. recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
  54. recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
  55. recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
  56. recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
  57. recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
  58. recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
  59. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  60. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  61. recce/data/_next/static/media/{montserrat-cyrillic-800-normal.bd5c9f50.woff → montserrat-cyrillic-800-normal.f9d58125.woff} +0 -0
  62. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  63. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  64. recce/data/_next/static/media/{montserrat-latin-800-normal.fc315020.woff → montserrat-latin-800-normal.d5761935.woff} +0 -0
  65. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  66. recce/data/_next/static/media/{montserrat-latin-ext-800-normal.2e5381b2.woff → montserrat-latin-ext-800-normal.b671449b.woff} +0 -0
  67. recce/data/_next/static/media/{montserrat-vietnamese-800-normal.20c545e6.woff → montserrat-vietnamese-800-normal.9f7b8541.woff} +0 -0
  68. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  69. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
  70. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
  71. recce/data/_not-found/__next._full.txt +24 -0
  72. recce/data/_not-found/__next._head.txt +8 -0
  73. recce/data/_not-found/__next._index.txt +13 -0
  74. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  75. recce/data/_not-found/__next._not-found.txt +4 -0
  76. recce/data/_not-found/__next._tree.txt +6 -0
  77. recce/data/_not-found/index.html +2 -0
  78. recce/data/_not-found/index.txt +24 -0
  79. recce/data/auth_callback.html +1 -1
  80. recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
  81. recce/data/checks/__next._full.txt +39 -0
  82. recce/data/checks/__next._head.txt +8 -0
  83. recce/data/checks/__next._index.txt +14 -0
  84. recce/data/checks/__next._tree.txt +8 -0
  85. recce/data/checks/__next.checks.__PAGE__.txt +10 -0
  86. recce/data/checks/__next.checks.txt +4 -0
  87. recce/data/checks/index.html +2 -0
  88. recce/data/checks/index.txt +39 -0
  89. recce/data/index.html +2 -27
  90. recce/data/index.txt +32 -8
  91. recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
  92. recce/data/lineage/__next._full.txt +39 -0
  93. recce/data/lineage/__next._head.txt +8 -0
  94. recce/data/lineage/__next._index.txt +14 -0
  95. recce/data/lineage/__next._tree.txt +8 -0
  96. recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
  97. recce/data/lineage/__next.lineage.txt +4 -0
  98. recce/data/lineage/index.html +2 -0
  99. recce/data/lineage/index.txt +39 -0
  100. recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
  101. recce/data/query/__next._full.txt +37 -0
  102. recce/data/query/__next._head.txt +8 -0
  103. recce/data/query/__next._index.txt +14 -0
  104. recce/data/query/__next._tree.txt +8 -0
  105. recce/data/query/__next.query.__PAGE__.txt +9 -0
  106. recce/data/query/__next.query.txt +4 -0
  107. recce/data/query/index.html +2 -0
  108. recce/data/query/index.txt +37 -0
  109. recce/event/CONFIG.bak +1 -0
  110. recce/event/__init__.py +9 -8
  111. recce/event/collector.py +6 -2
  112. recce/event/track.py +10 -0
  113. recce/github.py +1 -1
  114. recce/mcp_server.py +725 -0
  115. recce/models/check.py +433 -15
  116. recce/models/types.py +61 -2
  117. recce/pull_request.py +1 -1
  118. recce/run.py +37 -17
  119. recce/server.py +216 -21
  120. recce/state/__init__.py +31 -0
  121. recce/state/cloud.py +644 -0
  122. recce/state/const.py +26 -0
  123. recce/state/local.py +56 -0
  124. recce/state/state.py +119 -0
  125. recce/state/state_loader.py +174 -0
  126. recce/summary.py +25 -3
  127. recce/tasks/dataframe.py +63 -1
  128. recce/tasks/query.py +40 -3
  129. recce/tasks/rowcount.py +4 -1
  130. recce/tasks/schema.py +4 -1
  131. recce/tasks/utils.py +147 -0
  132. recce/tasks/valuediff.py +85 -57
  133. recce/util/api_token.py +11 -2
  134. recce/util/breaking.py +10 -1
  135. recce/util/cll.py +1 -2
  136. recce/util/cloud/__init__.py +15 -0
  137. recce/util/cloud/base.py +115 -0
  138. recce/util/cloud/check_events.py +190 -0
  139. recce/util/cloud/checks.py +242 -0
  140. recce/util/io.py +2 -2
  141. recce/util/lineage.py +19 -18
  142. recce/util/perf_tracking.py +85 -0
  143. recce/util/recce_cloud.py +254 -5
  144. recce/util/startup_perf.py +121 -0
  145. recce/yaml/__init__.py +2 -2
  146. {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/METADATA +91 -71
  147. recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
  148. {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
  149. recce/data/_next/static/abCX3x3UoIdRLEDWxx4xd/_buildManifest.js +0 -1
  150. recce/data/_next/static/chunks/181-acc61ddada3bc0ca.js +0 -43
  151. recce/data/_next/static/chunks/1bff33f1-1ef85cf5e658a751.js +0 -1
  152. recce/data/_next/static/chunks/217-879a84d70f7a907c.js +0 -2
  153. recce/data/_next/static/chunks/29e3cc0d-60045b2e47aa3916.js +0 -1
  154. recce/data/_next/static/chunks/36e1c10d-8e7be4a6c1f6ab2d.js +0 -1
  155. recce/data/_next/static/chunks/3998a672-03adacad07b346ac.js +0 -1
  156. recce/data/_next/static/chunks/3a92ee20-1081c360214f9602.js +0 -1
  157. recce/data/_next/static/chunks/42-cd3c06533f5fd47c.js +0 -9
  158. recce/data/_next/static/chunks/450c323b-fd94e7ffaa4a5efa.js +0 -1
  159. recce/data/_next/static/chunks/47d8844f-929aed9b1c73a905.js +0 -1
  160. recce/data/_next/static/chunks/608-3b079b544e5d5f5e.js +0 -15
  161. recce/data/_next/static/chunks/6dc81886-adbfa45836061d79.js +0 -1
  162. recce/data/_next/static/chunks/7a8a3e83-edf6dc64b5d5f0a5.js +0 -1
  163. recce/data/_next/static/chunks/7f27ae6c-d5f0438edd5c2a5b.js +0 -1
  164. recce/data/_next/static/chunks/86730205-cfb14e3f051bab35.js +0 -1
  165. recce/data/_next/static/chunks/8d700b6a.8bb140898499c512.js +0 -1
  166. recce/data/_next/static/chunks/92-607cd1af83c41f43.js +0 -1
  167. recce/data/_next/static/chunks/9746af58-a42b7d169cacadf0.js +0 -1
  168. recce/data/_next/static/chunks/a30376cd-de84559016d7e133.js +0 -1
  169. recce/data/_next/static/chunks/app/_not-found/page-01ed58b7f971d311.js +0 -1
  170. recce/data/_next/static/chunks/app/layout-177a410a97e0d018.js +0 -1
  171. recce/data/_next/static/chunks/app/page-da6e046a8235dbfc.js +0 -1
  172. recce/data/_next/static/chunks/b63b1b3f-4282bdcf459e075c.js +0 -1
  173. recce/data/_next/static/chunks/bbda5537-9ec25eb1dd62348a.js +0 -1
  174. recce/data/_next/static/chunks/c132bf7d-08cb668a789d6afd.js +0 -1
  175. recce/data/_next/static/chunks/ce84277d-2e5d1d46910cf052.js +0 -1
  176. recce/data/_next/static/chunks/febdd86e-c6b525341634b860.js +0 -54
  177. recce/data/_next/static/chunks/fee69bc6-2dbccaf9b90474e6.js +0 -1
  178. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  179. recce/data/_next/static/chunks/main-app-39061b0166c47f55.js +0 -1
  180. recce/data/_next/static/chunks/main-b5b3ae20a1405261.js +0 -1
  181. recce/data/_next/static/chunks/pages/_app-437c455677d62394.js +0 -1
  182. recce/data/_next/static/chunks/pages/_error-e7650df18ca04bde.js +0 -1
  183. recce/data/_next/static/chunks/webpack-7b49d5ba7e3a434d.js +0 -1
  184. recce/data/_next/static/css/17a96168e3a9db13.css +0 -1
  185. recce/data/_next/static/css/1b121dc4d36aeb4d.css +0 -3
  186. recce/data/_next/static/css/35c6679a098e1e34.css +0 -1
  187. recce/data/_next/static/css/951e2e0eea2d4a5b.css +0 -14
  188. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  189. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  190. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  191. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  192. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  193. recce/data/_next/static/media/reload-image.79aabb7d.svg +0 -4
  194. recce/state.py +0 -786
  195. recce_nightly-1.10.0.20250625.dist-info/RECORD +0 -154
  196. recce_nightly-1.10.0.20250625.dist-info/top_level.txt +0 -2
  197. tests/__init__.py +0 -0
  198. tests/adapter/__init__.py +0 -0
  199. tests/adapter/dbt_adapter/__init__.py +0 -0
  200. tests/adapter/dbt_adapter/conftest.py +0 -17
  201. tests/adapter/dbt_adapter/dbt_test_helper.py +0 -298
  202. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -25
  203. tests/adapter/dbt_adapter/test_dbt_cll.py +0 -384
  204. tests/adapter/dbt_adapter/test_selector.py +0 -202
  205. tests/tasks/__init__.py +0 -0
  206. tests/tasks/conftest.py +0 -4
  207. tests/tasks/test_histogram.py +0 -129
  208. tests/tasks/test_lineage.py +0 -55
  209. tests/tasks/test_preset_checks.py +0 -64
  210. tests/tasks/test_profile.py +0 -397
  211. tests/tasks/test_query.py +0 -151
  212. tests/tasks/test_row_count.py +0 -135
  213. tests/tasks/test_schema.py +0 -122
  214. tests/tasks/test_top_k.py +0 -77
  215. tests/tasks/test_valuediff.py +0 -85
  216. tests/test_cli.py +0 -133
  217. tests/test_config.py +0 -43
  218. tests/test_connect_to_cloud.py +0 -82
  219. tests/test_core.py +0 -29
  220. tests/test_dbt.py +0 -36
  221. tests/test_pull_request.py +0 -130
  222. tests/test_server.py +0 -104
  223. tests/test_state.py +0 -134
  224. tests/test_summary.py +0 -65
  225. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  226. /recce/data/_next/static/media/{montserrat-cyrillic-ext-800-normal.e6e0d8d0.woff → montserrat-cyrillic-ext-800-normal.a4fa76b5.woff} +0 -0
  227. /recce/data/_next/static/{abCX3x3UoIdRLEDWxx4xd → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
  228. {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
  229. {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
recce/state/local.py ADDED
@@ -0,0 +1,56 @@
1
+ import logging
2
+ import os
3
+ from typing import Optional, Tuple, Union
4
+
5
+ from .state import RecceState
6
+ from .state_loader import RecceStateLoader
7
+
8
+ logger = logging.getLogger("uvicorn")
9
+
10
+
11
+ class FileStateLoader(RecceStateLoader):
12
+ def __init__(
13
+ self,
14
+ review_mode: bool = False,
15
+ state_file: Optional[str] = None,
16
+ initial_state: Optional[RecceState] = None,
17
+ ):
18
+ super().__init__(review_mode=review_mode, state_file=state_file, initial_state=initial_state)
19
+
20
+ def verify(self) -> bool:
21
+ if self.review_mode is True and self.state_file is None:
22
+ self.error_message = "Recce can not launch without a state file."
23
+ self.hint_message = "Please provide a state file in the command argument."
24
+ return False
25
+ return True
26
+
27
+ def _load_state(self) -> Tuple[RecceState, str]:
28
+ state = RecceState.from_file(self.state_file) if self.state_file else None
29
+ state_tag = None
30
+ return state, state_tag
31
+
32
+ def _export_state(self, state: RecceState = None) -> Tuple[Union[str, None], str]:
33
+ """
34
+ Store the state to a file. Store happens when terminating the server or run instance.
35
+ """
36
+
37
+ if self.state_file is None:
38
+ return "No state file is provided. Skip storing the state.", None
39
+
40
+ logger.info(f"Store recce state to '{self.state_file}'")
41
+ message = self._export_state_to_file(self.state_file)
42
+ tag = None
43
+
44
+ return message, tag
45
+
46
+ def purge(self) -> bool:
47
+ if self.state_file is not None:
48
+ try:
49
+ os.remove(self.state_file)
50
+ return True
51
+ except Exception as e:
52
+ self.error_message = f"Failed to remove the state file: {e}"
53
+ return False
54
+ else:
55
+ self.error_message = "No state file is provided. Skip removing the state file."
56
+ return False
recce/state/state.py ADDED
@@ -0,0 +1,119 @@
1
+ """Define the type to serialize/de-serialize the state of the recce instance."""
2
+
3
+ import json
4
+ import logging
5
+ from datetime import datetime
6
+ from typing import Dict, List, Optional
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+ from recce import get_version
11
+ from recce.exceptions import RecceException
12
+ from recce.git import current_branch
13
+ from recce.models.types import Check, Run
14
+ from recce.pull_request import PullRequestInfo
15
+ from recce.util.io import SupportedFileTypes, file_io_factory
16
+ from recce.util.pydantic_model import pydantic_model_dump, pydantic_model_json_dump
17
+
18
+ logger = logging.getLogger("uvicorn")
19
+
20
+
21
+ class GitRepoInfo(BaseModel):
22
+ branch: Optional[str] = None
23
+
24
+ @staticmethod
25
+ def from_current_repository() -> Optional["GitRepoInfo"]:
26
+ branch = current_branch()
27
+ if branch is None:
28
+ return None
29
+
30
+ return GitRepoInfo(branch=branch)
31
+
32
+ def to_dict(self):
33
+ return pydantic_model_dump(self)
34
+
35
+
36
+ class RecceStateMetadata(BaseModel):
37
+ schema_version: str = "v0"
38
+ recce_version: str = Field(default_factory=lambda: get_version())
39
+ generated_at: str = Field(default_factory=lambda: datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))
40
+
41
+
42
+ class ArtifactsRoot(BaseModel):
43
+ """
44
+ Root of the artifacts.
45
+
46
+ base: artifacts of the base env. key is file name, value is dict
47
+ current: artifacts of the current env. key is file name, value is dict
48
+ """
49
+
50
+ base: Dict[str, Optional[dict]] = {}
51
+ current: Dict[str, Optional[dict]] = {}
52
+
53
+
54
+ class RecceState(BaseModel):
55
+ metadata: Optional[RecceStateMetadata] = None
56
+ runs: Optional[List[Run]] = Field(default_factory=list)
57
+ checks: Optional[List[Check]] = Field(default_factory=list)
58
+ artifacts: ArtifactsRoot = ArtifactsRoot(base={}, current={})
59
+ git: Optional[GitRepoInfo] = None
60
+ pull_request: Optional[PullRequestInfo] = None
61
+
62
+ @staticmethod
63
+ def from_json(json_content: str):
64
+ dict_data = json.loads(json_content)
65
+ state = RecceState(**dict_data)
66
+ metadata = state.metadata
67
+
68
+ if metadata:
69
+ if metadata.schema_version is None:
70
+ pass
71
+ if metadata.schema_version == "v0":
72
+ pass
73
+ else:
74
+ raise RecceException(f"Unsupported state file version: {metadata.schema_version}")
75
+ return state
76
+
77
+ @staticmethod
78
+ def from_file(file_path: str, file_type: SupportedFileTypes = SupportedFileTypes.FILE):
79
+ """
80
+ Load the state from a recce state file.
81
+ """
82
+ from pathlib import Path
83
+
84
+ logger.debug(f"Load state file from: '{file_path}'")
85
+ if not Path(file_path).is_file():
86
+ return None
87
+
88
+ io = file_io_factory(file_type)
89
+ json_content = io.read(file_path)
90
+ return RecceState.from_json(json_content)
91
+
92
+ def to_json(self):
93
+ return pydantic_model_json_dump(self)
94
+
95
+ def to_file(self, file_path: str, file_type: SupportedFileTypes = SupportedFileTypes.FILE):
96
+
97
+ json_data = self.to_json()
98
+ io = file_io_factory(file_type)
99
+
100
+ io.write(file_path, json_data)
101
+ return f"The state file is stored at '{file_path}'"
102
+
103
+ def _merge_run(self, run: Run):
104
+ for r in self.runs:
105
+ if r.run_id == run.run_id:
106
+ break
107
+ else:
108
+ self.runs.append(run)
109
+
110
+ def _merge_check(self, check: Check):
111
+ for c in self.checks:
112
+ if c.check_id == check.check_id:
113
+ c.merge(check)
114
+ break
115
+ else:
116
+ self.checks.append(check)
117
+
118
+ def _merge_artifacts(self, artifacts: ArtifactsRoot):
119
+ self.artifacts.merge(artifacts)
@@ -0,0 +1,174 @@
1
+ import logging
2
+ import threading
3
+ import time
4
+ from abc import ABC, abstractmethod
5
+ from typing import Dict, Literal, Optional, Tuple, Union, final
6
+
7
+ from recce.exceptions import RecceException
8
+ from recce.pull_request import fetch_pr_metadata
9
+
10
+ from ..util.io import SupportedFileTypes, file_io_factory
11
+ from .const import RECCE_API_TOKEN_MISSING
12
+ from .state import RecceState
13
+
14
+ logger = logging.getLogger("uvicorn")
15
+
16
+
17
+ class RecceStateLoader(ABC):
18
+ def __init__(
19
+ self,
20
+ review_mode: bool = False,
21
+ cloud_mode: bool = False,
22
+ state_file: Optional[str] = None,
23
+ cloud_options: Optional[Dict[str, str]] = None,
24
+ initial_state: Optional[RecceState] = None,
25
+ ):
26
+ self.review_mode = review_mode
27
+ self.cloud_mode = cloud_mode
28
+ self.state_file = state_file
29
+ self.cloud_options = cloud_options or {}
30
+ self.error_message = None
31
+ self.hint_message = None
32
+ self.state: RecceState | None = initial_state
33
+ self.state_lock = threading.Lock()
34
+ self.state_etag = None
35
+ self.pr_info = None
36
+ self.catalog: Literal["github", "preview", "session"] = "github"
37
+ self.share_id = None
38
+ self.session_id = None
39
+
40
+ if self.cloud_mode:
41
+ if self.cloud_options.get("github_token"):
42
+ self.catalog = "github"
43
+ self.pr_info = fetch_pr_metadata(
44
+ cloud=self.cloud_mode, github_token=self.cloud_options.get("github_token")
45
+ )
46
+ if self.pr_info.id is None:
47
+ raise RecceException("Cannot get the pull request information from GitHub.")
48
+ elif self.cloud_options.get("api_token"):
49
+ if self.cloud_options.get("session_id"):
50
+ self.catalog = "session"
51
+ self.session_id = self.cloud_options.get("session_id")
52
+ else:
53
+ self.catalog = "preview"
54
+ self.share_id = self.cloud_options.get("share_id")
55
+ else:
56
+ raise RecceException(RECCE_API_TOKEN_MISSING.error_message)
57
+
58
+ @property
59
+ def token(self):
60
+ return self.cloud_options.get("github_token") or self.cloud_options.get("api_token")
61
+
62
+ @abstractmethod
63
+ def verify(self) -> bool:
64
+ """
65
+ Verify the state loader configuration.
66
+ Returns:
67
+ bool: True if the configuration is valid, False otherwise.
68
+ """
69
+ raise NotImplementedError("Subclasses must implement this method.")
70
+
71
+ @property
72
+ def error_and_hint(self) -> (Union[str, None], Union[str, None]):
73
+ return self.error_message, self.hint_message
74
+
75
+ def update(self, state: RecceState):
76
+ self.state = state
77
+
78
+ @final
79
+ def load(self, refresh=False) -> RecceState:
80
+ if self.state is not None and refresh is False:
81
+ return self.state
82
+ self.state_lock.acquire()
83
+ try:
84
+ self.state, self.state_etag = self._load_state()
85
+ finally:
86
+ self.state_lock.release()
87
+ return self.state
88
+
89
+ @abstractmethod
90
+ def _load_state(self) -> Tuple[RecceState, str]:
91
+ """
92
+ Load the state from the specified source (file or cloud).
93
+ Returns:
94
+ RecceState: The loaded state object.
95
+ str: The etag of the state file (if applicable).
96
+ """
97
+ raise NotImplementedError("Subclasses must implement this method.")
98
+
99
+ def save_as(self, state_file: str, state: RecceState = None):
100
+ if self.cloud_mode:
101
+ raise Exception("Cannot save the state to Recce Cloud.")
102
+
103
+ self.state_file = state_file
104
+ self.export(state)
105
+
106
+ @final
107
+ def export(self, state: RecceState = None) -> Union[str, None]:
108
+ if state is not None:
109
+ self.update(state)
110
+
111
+ start_time = time.time()
112
+ self.state_lock.acquire()
113
+ try:
114
+ message, state_etag = self._export_state()
115
+ self.state_etag = state_etag
116
+ end_time = time.time()
117
+ elapsed_time = end_time - start_time
118
+ finally:
119
+ self.state_lock.release()
120
+ logger.info(f"Store state completed in {elapsed_time:.2f} seconds")
121
+ return message
122
+
123
+ @abstractmethod
124
+ def _export_state(self) -> Tuple[Union[str, None], str]:
125
+ """
126
+ Export the current Recce state to a file or cloud storage.
127
+ Returns:
128
+ str: A message indicating the result of the export operation.
129
+ str: The etag of the exported state file (if applicable).
130
+ """
131
+ raise NotImplementedError("Subclasses must implement this method.")
132
+
133
+ def _export_state_to_file(self, file_path: str, file_type: SupportedFileTypes = SupportedFileTypes.FILE) -> str:
134
+ """
135
+ Store the state to a file. Store happens when terminating the server or run instance.
136
+ """
137
+
138
+ json_data = self.state.to_json()
139
+ io = file_io_factory(file_type)
140
+
141
+ io.write(file_path, json_data)
142
+ return f"The state file is stored at '{file_path}'"
143
+
144
+ def refresh(self):
145
+ new_state = self.load(refresh=True)
146
+ return new_state
147
+
148
+ def check_conflict(self) -> bool:
149
+ return False
150
+
151
+ def info(self) -> dict:
152
+ if self.state is None:
153
+ self.error_message = "No state is loaded."
154
+ return None
155
+
156
+ state_info = {
157
+ "mode": "cloud" if self.cloud_mode else "local",
158
+ "source": None,
159
+ }
160
+ if self.cloud_mode:
161
+ state_info["source"] = "Recce Cloud"
162
+ state_info["pull_request"] = self.pr_info
163
+ else:
164
+ state_info["source"] = self.state_file
165
+ return state_info
166
+
167
+ @abstractmethod
168
+ def purge(self) -> bool:
169
+ """
170
+ Purge the state file or cloud storage.
171
+ Returns:
172
+ bool: True if the purge was successful, False otherwise.
173
+ """
174
+ raise NotImplementedError("Subclasses must implement this method.")
recce/summary.py CHANGED
@@ -271,11 +271,24 @@ class LineageGraph:
271
271
  def _build_lineage_graph(base, current) -> LineageGraph:
272
272
  graph = LineageGraph()
273
273
 
274
+ # Get the current package name to filter nodes (from the current manifest metadata)
275
+ package_name = None
276
+ manifest_metadata = current.get("manifest_metadata")
277
+ if manifest_metadata and hasattr(manifest_metadata, "project_name"):
278
+ # The default package name is the project name
279
+ package_name = manifest_metadata.project_name
280
+
274
281
  # Init Graph nodes with base & current nodes
275
282
  for node_id, node_data in base.get("nodes", {}).items():
283
+ # Skip nodes that are not from the current package
284
+ if package_name and node_data.get("package_name") != package_name:
285
+ continue
276
286
  graph.create_node(node_id, node_data, "base")
277
287
 
278
288
  for node_id, node_data in current.get("nodes", {}).items():
289
+ # Skip nodes that are not from the current package
290
+ if package_name and node_data.get("package_name") != package_name:
291
+ continue
279
292
  if node_id not in graph.nodes:
280
293
  node = Node(node_id, node_data, "current")
281
294
  graph.nodes[node_id] = node
@@ -286,9 +299,15 @@ def _build_lineage_graph(base, current) -> LineageGraph:
286
299
  # Build edges
287
300
  for child_id, parents in base.get("parent_map", {}).items():
288
301
  for parent_id in parents:
302
+ if child_id not in graph.nodes or parent_id not in graph.nodes:
303
+ continue
304
+
289
305
  graph.create_edge(parent_id, child_id, "base")
290
306
  for child_id, parents in current.get("parent_map", {}).items():
291
307
  for parent_id in parents:
308
+ if child_id not in graph.nodes or parent_id not in graph.nodes:
309
+ continue
310
+
292
311
  graph.create_edge(parent_id, child_id, "current")
293
312
 
294
313
  return graph
@@ -301,7 +320,8 @@ def _build_node_schema(lineage, node_id):
301
320
  def _get_node_row_count_diff(node_id, node_name):
302
321
  row_count_runs = RunDAO().list(type_filter=RunType.ROW_COUNT_DIFF)
303
322
  for run in row_count_runs:
304
- if node_id in run.params.get("node_ids", []):
323
+ node_ids = (run.params or {}).get("node_ids") or []
324
+ if node_id in node_ids:
305
325
  result = run.result.get(node_name, {})
306
326
  diff = TaskResultDiffer.diff(result.get("base"), result.get("curr"))
307
327
  return diff, result
@@ -491,9 +511,11 @@ No changed module was detected.
491
511
  if check_content:
492
512
  content += check_content
493
513
 
494
- if ctx.state_loader.cloud_mode:
514
+ if ctx.state_loader.cloud_mode and ctx.state_loader.pr_info is not None:
495
515
  pr_info = ctx.state_loader.pr_info
496
- content += f"\nSee PR page: {RECCE_CLOUD_HOST}/{pr_info.repository}/pulls/{pr_info.id}\n"
516
+ if pr_info.repository is not None and pr_info.id is not None:
517
+ # the classic route will be deprecated soon
518
+ content += f"\nSee PR page: {RECCE_CLOUD_HOST}/classic/{pr_info.repository}/pulls/{pr_info.id}\n"
497
519
 
498
520
  return content
499
521
 
recce/tasks/dataframe.py CHANGED
@@ -19,11 +19,34 @@ class DataFrameColumnType(Enum):
19
19
  TIMEDELTA = "timedelta"
20
20
  UNKNOWN = "unknown"
21
21
 
22
+ @classmethod
23
+ def from_string(cls, type_str: str) -> "DataFrameColumnType":
24
+ """Convert string to DataFrameColumnType enum.
25
+
26
+ Args:
27
+ type_str: String representation of the type (e.g., "integer", "text")
28
+
29
+ Returns:
30
+ DataFrameColumnType enum value
31
+ """
32
+ type_str = type_str.lower().strip()
33
+ try:
34
+ return cls(type_str)
35
+ except ValueError:
36
+ return cls.UNKNOWN
37
+
22
38
 
23
39
  class DataFrameColumn(BaseModel):
40
+ key: t.Optional[str] = None
24
41
  name: str
25
42
  type: DataFrameColumnType
26
43
 
44
+ def __init__(self, **data):
45
+ """Initialize DataFrameColumn, auto-setting key=name if key is missing."""
46
+ if "key" not in data or data["key"] is None:
47
+ data["key"] = data.get("name")
48
+ super().__init__(**data)
49
+
27
50
 
28
51
  class DataFrame(BaseModel):
29
52
  columns: t.List[DataFrameColumn]
@@ -64,7 +87,7 @@ class DataFrame(BaseModel):
64
87
  col_type = DataFrameColumnType.INTEGER
65
88
  else:
66
89
  col_type = DataFrameColumnType.UNKNOWN
67
- columns.append(DataFrameColumn(name=col_name, type=col_type))
90
+ columns.append(DataFrameColumn(key=col_name, name=col_name, type=col_type))
68
91
 
69
92
  def _row_values(row):
70
93
  # If the value is Decimal, check if it's finite. If not, convert it to float(xxx) (GitHub issue #476)
@@ -106,3 +129,42 @@ class DataFrame(BaseModel):
106
129
  more=more,
107
130
  )
108
131
  return df
132
+
133
+ @staticmethod
134
+ def from_data(
135
+ columns: t.Dict[str, str],
136
+ data: t.List[tuple],
137
+ limit: t.Optional[int] = None,
138
+ more: t.Optional[bool] = None,
139
+ ):
140
+ """Create a DataFrame from columns and data directly.
141
+
142
+ Args:
143
+ columns: Dict defining the schema where keys are column names and values are type strings.
144
+ Type strings can be: "number", "integer", "text", "boolean", "date", "datetime", "timedelta"
145
+ data: List of rows (each row is a list/tuple/sequence of values)
146
+ limit: Optional limit on the number of rows returned
147
+ more: Optional flag indicating whether there are more rows to fetch
148
+
149
+ Returns:
150
+ DataFrame instance
151
+
152
+ Examples:
153
+ # Using simple dict format
154
+ columns = {"idx": "integer", "name": "text", "impacted": "boolean"}
155
+ data = [[0, "model_a", True], [1, "model_b", False]]
156
+ df = DataFrame.from_data(columns, data)
157
+ """
158
+ # Convert dict columns to DataFrameColumn objects
159
+ processed_columns = []
160
+ for key, type_str in columns.items():
161
+ col_type = DataFrameColumnType.from_string(type_str)
162
+ processed_columns.append(DataFrameColumn(key=key, name=key, type=col_type))
163
+
164
+ df = DataFrame(
165
+ columns=processed_columns,
166
+ data=data,
167
+ limit=limit,
168
+ more=more,
169
+ )
170
+ return df
recce/tasks/query.py CHANGED
@@ -8,6 +8,7 @@ from ..exceptions import RecceException
8
8
  from ..models import Check
9
9
  from .core import CheckValidator, Task, TaskResultDiffer
10
10
  from .dataframe import DataFrame
11
+ from .utils import normalize_boolean_flag_columns, normalize_keys_to_columns
11
12
  from .valuediff import ValueDiffMixin
12
13
 
13
14
  QUERY_LIMIT = 2000
@@ -147,6 +148,10 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
147
148
  base_sql_template: Optional[str] = None,
148
149
  preview_change: bool = False,
149
150
  ):
151
+ """
152
+ Execute diff queries on base and current environments without join.
153
+ Note: Mutates self.params.primary_keys to normalize values with actual column keys.
154
+ """
150
155
  limit = QUERY_LIMIT
151
156
 
152
157
  self.connection = dbt_adapter.get_thread_connection()
@@ -159,9 +164,17 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
159
164
  current, current_more = self.execute_sql_with_limit(sql_template, base=False, limit=limit)
160
165
  self.check_cancel()
161
166
 
167
+ base_df = DataFrame.from_agate(base, limit=limit, more=base_more)
168
+ current_df = DataFrame.from_agate(current, limit=limit, more=current_more)
169
+
170
+ # Normalize primary_keys if present (for non-join diff, use current columns as reference)
171
+ if self.params.primary_keys:
172
+ column_keys = [col.key for col in current_df.columns]
173
+ self.params.primary_keys = normalize_keys_to_columns(self.params.primary_keys, column_keys)
174
+
162
175
  return QueryDiffResult(
163
- base=DataFrame.from_agate(base, limit=limit, more=base_more),
164
- current=DataFrame.from_agate(current, limit=limit, more=current_more),
176
+ base=base_df,
177
+ current=current_df,
165
178
  )
166
179
 
167
180
  def _query_diff_join(
@@ -172,6 +185,22 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
172
185
  base_sql_template: Optional[str] = None,
173
186
  preview_change: bool = False,
174
187
  ):
188
+ """
189
+ Execute diff queries on base and current environments using SQL join operations.
190
+ This method performs a set-based diff using INTERSECT and EXCEPT operations
191
+ to identify rows that differ between base and current query results.
192
+
193
+ Note: Mutates self.params.primary_keys to normalize values with actual column keys.
194
+
195
+ :param dbt_adapter: The dbt adapter instance for executing SQL
196
+ :param sql_template: SQL template to execute on the current environment
197
+ :param primary_keys: List of column names to use as primary keys for ordering
198
+ :param base_sql_template: Optional SQL template for the base environment.
199
+ If None, sql_template is used for both environments.
200
+ :param preview_change: If True, run base_sql_template against current environment
201
+ instead of base environment
202
+ :return: QueryDiffResult containing the diff DataFrame with in_a/in_b flags
203
+ """
175
204
 
176
205
  query_template = r"""
177
206
  with a_query as (
@@ -251,7 +280,15 @@ class QueryDiffTask(Task, QueryMixin, ValueDiffMixin):
251
280
  _, table = dbt_adapter.execute(sql, fetch=True)
252
281
  self.check_cancel()
253
282
 
254
- return QueryDiffResult(diff=DataFrame.from_agate(table))
283
+ diff_df = DataFrame.from_agate(table)
284
+ # Normalize in_a/in_b columns to lowercase for cross-warehouse consistency
285
+ diff_df = normalize_boolean_flag_columns(diff_df)
286
+
287
+ # Normalize primary_keys to match actual column keys from warehouse
288
+ column_keys = [col.key for col in diff_df.columns]
289
+ self.params.primary_keys = normalize_keys_to_columns(primary_keys, column_keys)
290
+
291
+ return QueryDiffResult(diff=diff_df)
255
292
 
256
293
  @staticmethod
257
294
  def _select_single_model(model_name):
recce/tasks/rowcount.py CHANGED
@@ -263,7 +263,10 @@ class RowCountDiffResultDiffer(TaskResultDiffer):
263
263
 
264
264
  def _get_changed_nodes(self) -> Union[List[str], None]:
265
265
  if self.changes:
266
- return self.changes.affected_root_keys.items
266
+ # Both affected_root_keys of deepdiff v7 (OrderedSet) and v8 (SetOrdered) are iterable
267
+ # Convert to list directly
268
+ return list(self.changes.affected_root_keys)
269
+ return None
267
270
 
268
271
 
269
272
  class RowCountDiffCheckValidator(CheckValidator):
recce/tasks/schema.py CHANGED
@@ -45,7 +45,10 @@ class SchemaDiffResultDiffer:
45
45
 
46
46
  def _get_changed_nodes(self) -> Union[List[str], None]:
47
47
  if self.changes:
48
- return self.changes.affected_root_keys.items
48
+ # Both affected_root_keys of deepdiff v7 (OrderedSet) and v8 (SetOrdered) are iterable
49
+ # Convert to list directly
50
+ return list(self.changes.affected_root_keys)
51
+ return None
49
52
 
50
53
 
51
54
  class SchemaDiffParams(BaseModel):