recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.26.0.20251124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +27 -22
- recce/adapter/base.py +11 -14
- recce/adapter/dbt_adapter/__init__.py +810 -480
- recce/adapter/dbt_adapter/dbt_version.py +3 -0
- recce/adapter/sqlmesh_adapter.py +24 -35
- recce/apis/check_api.py +39 -28
- recce/apis/check_func.py +33 -27
- recce/apis/run_api.py +25 -19
- recce/apis/run_func.py +29 -23
- recce/artifact.py +119 -51
- recce/cli.py +1299 -323
- recce/config.py +42 -33
- recce/connect_to_cloud.py +138 -0
- recce/core.py +55 -47
- recce/data/404.html +1 -1
- recce/data/__next.__PAGE__.txt +10 -0
- recce/data/__next._full.txt +23 -0
- recce/data/__next._head.txt +8 -0
- recce/data/__next._index.txt +8 -0
- recce/data/__next._tree.txt +5 -0
- recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_buildManifest.js +11 -0
- recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_clientMiddlewareManifest.json +1 -0
- recce/data/_next/static/chunks/02b996c7f6a29a06.js +4 -0
- recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
- recce/data/_next/static/chunks/2df9ec28a061971d.js +11 -0
- recce/data/_next/static/chunks/3098c987393bda15.js +1 -0
- recce/data/_next/static/chunks/393dc43e483f717a.css +2 -0
- recce/data/_next/static/chunks/399e8d91a7e45073.js +2 -0
- recce/data/_next/static/chunks/4d0186f631230245.js +1 -0
- recce/data/_next/static/chunks/5794ba9e10a9c060.js +11 -0
- recce/data/_next/static/chunks/715761c929a3f28b.js +110 -0
- recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
- recce/data/_next/static/chunks/80d2a95eaf1201ea.js +1 -0
- recce/data/_next/static/chunks/9979c6109bbbee35.js +1 -0
- recce/data/_next/static/chunks/99d638224186c118.js +1 -0
- recce/data/_next/static/chunks/d003eb36240e92f3.js +1 -0
- recce/data/_next/static/chunks/d3167cdfec4fc351.js +1 -0
- recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
- recce/data/_next/static/chunks/f40141db1bdb46f0.css +6 -0
- recce/data/_next/static/chunks/fcc53a88741a52f9.js +1 -0
- recce/data/_next/static/chunks/turbopack-b1920d28cfb1f28d.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_next/static/media/reload-image.7aa931c7.svg +4 -0
- recce/data/_not-found/__next._full.txt +17 -0
- recce/data/_not-found/__next._head.txt +8 -0
- recce/data/_not-found/__next._index.txt +8 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +3 -0
- recce/data/_not-found.html +1 -0
- recce/data/_not-found.txt +17 -0
- recce/data/auth_callback.html +68 -0
- recce/data/imgs/reload-image.svg +4 -0
- recce/data/index.html +1 -27
- recce/data/index.txt +23 -7
- recce/diff.py +6 -12
- recce/event/__init__.py +86 -74
- recce/event/collector.py +33 -22
- recce/event/track.py +49 -27
- recce/exceptions.py +1 -1
- recce/git.py +7 -7
- recce/github.py +57 -53
- recce/mcp_server.py +716 -0
- recce/models/__init__.py +4 -1
- recce/models/check.py +6 -7
- recce/models/run.py +1 -0
- recce/models/types.py +131 -28
- recce/pull_request.py +27 -25
- recce/run.py +165 -121
- recce/server.py +303 -111
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +632 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +188 -143
- recce/tasks/__init__.py +19 -3
- recce/tasks/core.py +11 -13
- recce/tasks/dataframe.py +82 -18
- recce/tasks/histogram.py +69 -34
- recce/tasks/lineage.py +2 -2
- recce/tasks/profile.py +152 -86
- recce/tasks/query.py +139 -87
- recce/tasks/rowcount.py +37 -31
- recce/tasks/schema.py +18 -15
- recce/tasks/top_k.py +35 -35
- recce/tasks/valuediff.py +216 -152
- recce/util/__init__.py +3 -0
- recce/util/api_token.py +80 -0
- recce/util/breaking.py +87 -85
- recce/util/cll.py +274 -219
- recce/util/io.py +22 -17
- recce/util/lineage.py +65 -16
- recce/util/logger.py +1 -1
- recce/util/onboarding_state.py +45 -0
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +322 -72
- recce/util/singleton.py +4 -4
- recce/yaml/__init__.py +7 -10
- recce_cloud/__init__.py +24 -0
- recce_cloud/api/__init__.py +17 -0
- recce_cloud/api/base.py +111 -0
- recce_cloud/api/client.py +150 -0
- recce_cloud/api/exceptions.py +26 -0
- recce_cloud/api/factory.py +63 -0
- recce_cloud/api/github.py +76 -0
- recce_cloud/api/gitlab.py +82 -0
- recce_cloud/artifact.py +57 -0
- recce_cloud/ci_providers/__init__.py +9 -0
- recce_cloud/ci_providers/base.py +82 -0
- recce_cloud/ci_providers/detector.py +147 -0
- recce_cloud/ci_providers/github_actions.py +136 -0
- recce_cloud/ci_providers/gitlab_ci.py +130 -0
- recce_cloud/cli.py +245 -0
- recce_cloud/upload.py +214 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/METADATA +68 -37
- recce_nightly-1.26.0.20251124.dist-info/RECORD +180 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/WHEEL +1 -1
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/top_level.txt +1 -0
- tests/adapter/dbt_adapter/conftest.py +9 -5
- tests/adapter/dbt_adapter/dbt_test_helper.py +37 -22
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
- tests/adapter/dbt_adapter/test_dbt_cll.py +656 -41
- tests/adapter/dbt_adapter/test_selector.py +22 -21
- tests/recce_cloud/__init__.py +0 -0
- tests/recce_cloud/test_ci_providers.py +351 -0
- tests/recce_cloud/test_cli.py +372 -0
- tests/recce_cloud/test_client.py +273 -0
- tests/recce_cloud/test_platform_clients.py +333 -0
- tests/tasks/conftest.py +1 -1
- tests/tasks/test_histogram.py +58 -66
- tests/tasks/test_lineage.py +36 -23
- tests/tasks/test_preset_checks.py +45 -31
- tests/tasks/test_profile.py +339 -15
- tests/tasks/test_query.py +46 -46
- tests/tasks/test_row_count.py +65 -46
- tests/tasks/test_schema.py +65 -42
- tests/tasks/test_top_k.py +22 -18
- tests/tasks/test_valuediff.py +43 -32
- tests/test_cli.py +174 -60
- tests/test_cli_mcp_optional.py +45 -0
- tests/test_cloud_listing_cli.py +324 -0
- tests/test_config.py +7 -9
- tests/test_connect_to_cloud.py +82 -0
- tests/test_core.py +151 -4
- tests/test_dbt.py +7 -7
- tests/test_mcp_server.py +332 -0
- tests/test_pull_request.py +1 -1
- tests/test_server.py +25 -19
- tests/test_summary.py +29 -17
- recce/data/_next/static/Kcbs3GEIyH2LxgLYat0es/_buildManifest.js +0 -1
- recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
- recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
- recce/data/_next/static/chunks/368-7587b306577df275.js +0 -65
- recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
- recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
- recce/data/_next/static/chunks/3a92ee20-3b5d922d4157af5e.js +0 -1
- recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
- recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
- recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
- recce/data/_next/static/chunks/6ef81909-694dc38134099299.js +0 -1
- recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
- recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
- recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
- recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
- recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
- recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
- recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
- recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
- recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
- recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
- recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
- recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
- recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
- recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
- recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
- recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
- recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/state.py +0 -753
- recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
- tests/test_state.py +0 -123
- /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → 52aV_JrNUZU6dMFgvTQEO}/_ssgManifest.js +0 -0
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/licenses/LICENSE +0 -0
recce/tasks/core.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import List,
|
|
2
|
+
from typing import List, Literal, Optional, Union
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
6
|
from recce.core import default_context
|
|
7
7
|
from recce.exceptions import RecceCancelException
|
|
8
|
-
from recce.models import
|
|
8
|
+
from recce.models import Check, Run
|
|
9
9
|
from recce.util.pydantic_model import pydantic_model_dump
|
|
10
10
|
|
|
11
11
|
|
|
@@ -63,6 +63,7 @@ class TaskResultDiffer(ABC):
|
|
|
63
63
|
@staticmethod
|
|
64
64
|
def diff(base, current):
|
|
65
65
|
from deepdiff import DeepDiff
|
|
66
|
+
|
|
66
67
|
diff = DeepDiff(base, current, ignore_order=True)
|
|
67
68
|
return diff if diff else None
|
|
68
69
|
|
|
@@ -76,15 +77,12 @@ class TaskResultDiffer(ABC):
|
|
|
76
77
|
select: Optional[str] = None,
|
|
77
78
|
exclude: Optional[str] = None,
|
|
78
79
|
packages: Optional[list[str]] = None,
|
|
79
|
-
view_mode: Optional[Literal[
|
|
80
|
+
view_mode: Optional[Literal["all", "changed_models"]] = None,
|
|
80
81
|
) -> List[str]:
|
|
81
82
|
nodes = default_context().adapter.select_nodes(
|
|
82
|
-
select=select,
|
|
83
|
-
exclude=exclude,
|
|
84
|
-
packages=packages,
|
|
85
|
-
view_mode=view_mode
|
|
83
|
+
select=select, exclude=exclude, packages=packages, view_mode=view_mode
|
|
86
84
|
)
|
|
87
|
-
return [node for node in nodes if not node.startswith(
|
|
85
|
+
return [node for node in nodes if not node.startswith("test.")]
|
|
88
86
|
|
|
89
87
|
@abstractmethod
|
|
90
88
|
def _check_result_changed_fn(self, result):
|
|
@@ -100,10 +98,10 @@ class TaskResultDiffer(ABC):
|
|
|
100
98
|
Should be implemented by subclass.
|
|
101
99
|
"""
|
|
102
100
|
params = self.run.params
|
|
103
|
-
if params.get(
|
|
104
|
-
return [TaskResultDiffer.get_node_id_by_name(params.get(
|
|
105
|
-
elif params.get(
|
|
106
|
-
names = params.get(
|
|
101
|
+
if params.get("model"):
|
|
102
|
+
return [TaskResultDiffer.get_node_id_by_name(params.get("model"))]
|
|
103
|
+
elif params.get("node_names"):
|
|
104
|
+
names = params.get("node_names", [])
|
|
107
105
|
return [TaskResultDiffer.get_node_id_by_name(name) for name in names]
|
|
108
106
|
else:
|
|
109
107
|
# No related node ids in the params
|
|
@@ -125,7 +123,7 @@ class CheckValidator:
|
|
|
125
123
|
try:
|
|
126
124
|
check = Check(**check)
|
|
127
125
|
except Exception as e:
|
|
128
|
-
raise ValueError(f
|
|
126
|
+
raise ValueError(f"Invalid check format. {str(e)}")
|
|
129
127
|
|
|
130
128
|
self.validate_check(check)
|
|
131
129
|
|
recce/tasks/dataframe.py
CHANGED
|
@@ -10,20 +10,43 @@ from pydantic import BaseModel, Field
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class DataFrameColumnType(Enum):
|
|
13
|
-
NUMBER =
|
|
14
|
-
INTEGER =
|
|
15
|
-
TEXT =
|
|
16
|
-
BOOLEAN =
|
|
17
|
-
DATE =
|
|
18
|
-
DATETIME =
|
|
19
|
-
TIMEDELTA =
|
|
20
|
-
UNKNOWN =
|
|
13
|
+
NUMBER = "number"
|
|
14
|
+
INTEGER = "integer"
|
|
15
|
+
TEXT = "text"
|
|
16
|
+
BOOLEAN = "boolean"
|
|
17
|
+
DATE = "date"
|
|
18
|
+
DATETIME = "datetime"
|
|
19
|
+
TIMEDELTA = "timedelta"
|
|
20
|
+
UNKNOWN = "unknown"
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_string(cls, type_str: str) -> "DataFrameColumnType":
|
|
24
|
+
"""Convert string to DataFrameColumnType enum.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
type_str: String representation of the type (e.g., "integer", "text")
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
DataFrameColumnType enum value
|
|
31
|
+
"""
|
|
32
|
+
type_str = type_str.lower().strip()
|
|
33
|
+
try:
|
|
34
|
+
return cls(type_str)
|
|
35
|
+
except ValueError:
|
|
36
|
+
return cls.UNKNOWN
|
|
21
37
|
|
|
22
38
|
|
|
23
39
|
class DataFrameColumn(BaseModel):
|
|
40
|
+
key: t.Optional[str] = None
|
|
24
41
|
name: str
|
|
25
42
|
type: DataFrameColumnType
|
|
26
43
|
|
|
44
|
+
def __init__(self, **data):
|
|
45
|
+
"""Initialize DataFrameColumn, auto-setting key=name if key is missing."""
|
|
46
|
+
if "key" not in data or data["key"] is None:
|
|
47
|
+
data["key"] = data.get("name")
|
|
48
|
+
super().__init__(**data)
|
|
49
|
+
|
|
27
50
|
|
|
28
51
|
class DataFrame(BaseModel):
|
|
29
52
|
columns: t.List[DataFrameColumn]
|
|
@@ -32,19 +55,21 @@ class DataFrame(BaseModel):
|
|
|
32
55
|
more: t.Optional[bool] = Field(None, description="Whether there are more rows to fetch")
|
|
33
56
|
|
|
34
57
|
@staticmethod
|
|
35
|
-
def from_agate(table:
|
|
58
|
+
def from_agate(table: "agate.Table", limit: t.Optional[int] = None, more: t.Optional[bool] = None):
|
|
36
59
|
from recce.adapter.dbt_adapter import dbt_version
|
|
37
|
-
|
|
60
|
+
|
|
61
|
+
if dbt_version < "v1.8":
|
|
38
62
|
import dbt.clients.agate_helper as agate_helper
|
|
39
63
|
else:
|
|
40
64
|
import dbt_common.clients.agate_helper as agate_helper
|
|
41
65
|
|
|
42
66
|
import agate
|
|
67
|
+
|
|
43
68
|
columns = []
|
|
44
69
|
|
|
45
70
|
for col_name, col_type in zip(table.column_names, table.column_types):
|
|
46
71
|
|
|
47
|
-
has_integer = hasattr(agate_helper,
|
|
72
|
+
has_integer = hasattr(agate_helper, "Integer")
|
|
48
73
|
|
|
49
74
|
if isinstance(col_type, agate.Number):
|
|
50
75
|
col_type = DataFrameColumnType.NUMBER
|
|
@@ -62,7 +87,7 @@ class DataFrame(BaseModel):
|
|
|
62
87
|
col_type = DataFrameColumnType.INTEGER
|
|
63
88
|
else:
|
|
64
89
|
col_type = DataFrameColumnType.UNKNOWN
|
|
65
|
-
columns.append(DataFrameColumn(name=col_name, type=col_type))
|
|
90
|
+
columns.append(DataFrameColumn(key=col_name, name=col_name, type=col_type))
|
|
66
91
|
|
|
67
92
|
def _row_values(row):
|
|
68
93
|
# If the value is Decimal, check if it's finite. If not, convert it to float(xxx) (GitHub issue #476)
|
|
@@ -78,23 +103,23 @@ class DataFrame(BaseModel):
|
|
|
78
103
|
return df
|
|
79
104
|
|
|
80
105
|
@staticmethod
|
|
81
|
-
def from_pandas(pandas_df:
|
|
106
|
+
def from_pandas(pandas_df: "pandas.DataFrame", limit: t.Optional[int] = None, more: t.Optional[bool] = None):
|
|
82
107
|
columns = []
|
|
83
108
|
for column in pandas_df.columns:
|
|
84
109
|
dtype = pandas_df[column].dtype
|
|
85
|
-
if dtype ==
|
|
110
|
+
if dtype == "int64":
|
|
86
111
|
col_type = DataFrameColumnType.INTEGER
|
|
87
|
-
elif dtype ==
|
|
112
|
+
elif dtype == "float64":
|
|
88
113
|
col_type = DataFrameColumnType.NUMBER
|
|
89
|
-
elif dtype ==
|
|
114
|
+
elif dtype == "object":
|
|
90
115
|
col_type = DataFrameColumnType.TEXT
|
|
91
|
-
elif dtype ==
|
|
116
|
+
elif dtype == "bool":
|
|
92
117
|
col_type = DataFrameColumnType.BOOLEAN
|
|
93
118
|
else:
|
|
94
119
|
col_type = DataFrameColumnType.UNKNOWN
|
|
95
120
|
columns.append(DataFrameColumn(name=column, type=col_type))
|
|
96
121
|
|
|
97
|
-
s = pandas_df.to_json(orient=
|
|
122
|
+
s = pandas_df.to_json(orient="values")
|
|
98
123
|
data = json.loads(s)
|
|
99
124
|
|
|
100
125
|
df = DataFrame(
|
|
@@ -104,3 +129,42 @@ class DataFrame(BaseModel):
|
|
|
104
129
|
more=more,
|
|
105
130
|
)
|
|
106
131
|
return df
|
|
132
|
+
|
|
133
|
+
@staticmethod
|
|
134
|
+
def from_data(
|
|
135
|
+
columns: t.Dict[str, str],
|
|
136
|
+
data: t.List[tuple],
|
|
137
|
+
limit: t.Optional[int] = None,
|
|
138
|
+
more: t.Optional[bool] = None,
|
|
139
|
+
):
|
|
140
|
+
"""Create a DataFrame from columns and data directly.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
columns: Dict defining the schema where keys are column names and values are type strings.
|
|
144
|
+
Type strings can be: "number", "integer", "text", "boolean", "date", "datetime", "timedelta"
|
|
145
|
+
data: List of rows (each row is a list/tuple/sequence of values)
|
|
146
|
+
limit: Optional limit on the number of rows returned
|
|
147
|
+
more: Optional flag indicating whether there are more rows to fetch
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
DataFrame instance
|
|
151
|
+
|
|
152
|
+
Examples:
|
|
153
|
+
# Using simple dict format
|
|
154
|
+
columns = {"idx": "integer", "name": "text", "impacted": "boolean"}
|
|
155
|
+
data = [[0, "model_a", True], [1, "model_b", False]]
|
|
156
|
+
df = DataFrame.from_data(columns, data)
|
|
157
|
+
"""
|
|
158
|
+
# Convert dict columns to DataFrameColumn objects
|
|
159
|
+
processed_columns = []
|
|
160
|
+
for key, type_str in columns.items():
|
|
161
|
+
col_type = DataFrameColumnType.from_string(type_str)
|
|
162
|
+
processed_columns.append(DataFrameColumn(key=key, name=key, type=col_type))
|
|
163
|
+
|
|
164
|
+
df = DataFrame(
|
|
165
|
+
columns=processed_columns,
|
|
166
|
+
data=data,
|
|
167
|
+
limit=limit,
|
|
168
|
+
more=more,
|
|
169
|
+
)
|
|
170
|
+
return df
|
recce/tasks/histogram.py
CHANGED
|
@@ -9,34 +9,66 @@ from pydantic import BaseModel
|
|
|
9
9
|
from recce.core import default_context
|
|
10
10
|
from recce.models import Check
|
|
11
11
|
from recce.tasks import Task
|
|
12
|
-
from recce.tasks.core import
|
|
12
|
+
from recce.tasks.core import CheckValidator, TaskResultDiffer
|
|
13
13
|
from recce.tasks.query import QueryMixin
|
|
14
14
|
|
|
15
15
|
sql_datetime_types = [
|
|
16
|
-
"DATE",
|
|
16
|
+
"DATE",
|
|
17
|
+
"DATETIME",
|
|
18
|
+
"TIMESTAMP",
|
|
19
|
+
"TIME",
|
|
17
20
|
"YEAR", # Specific to MySQL/MariaDB
|
|
18
|
-
"DATETIME2",
|
|
21
|
+
"DATETIME2",
|
|
22
|
+
"SMALLDATETIME",
|
|
23
|
+
"DATETIMEOFFSET", # Specific to SQL Server
|
|
19
24
|
"INTERVAL", # Common in PostgreSQL and Oracle
|
|
20
|
-
"TIMESTAMPTZ",
|
|
21
|
-
"
|
|
22
|
-
"
|
|
25
|
+
"TIMESTAMPTZ",
|
|
26
|
+
"TIMETZ", # Specific to PostgreSQL
|
|
27
|
+
"TIMESTAMP WITH TIME ZONE",
|
|
28
|
+
"TIMESTAMP WITH LOCAL TIME ZONE", # Oracle
|
|
29
|
+
"TIMESTAMP_LTZ",
|
|
30
|
+
"TIMESTAMP_NTZ",
|
|
31
|
+
"TIMESTAMP_TZ", # Specific to Snowflake
|
|
23
32
|
]
|
|
24
33
|
|
|
25
34
|
sql_integer_types = [
|
|
26
|
-
"TINYINT",
|
|
27
|
-
"
|
|
35
|
+
"TINYINT",
|
|
36
|
+
"SMALLINT",
|
|
37
|
+
"MEDIUMINT",
|
|
38
|
+
"INT",
|
|
39
|
+
"INTEGER",
|
|
40
|
+
"BIGINT", # Common across most databases
|
|
41
|
+
"INT2",
|
|
42
|
+
"INT4",
|
|
43
|
+
"INT8", # PostgreSQL specific aliases
|
|
28
44
|
"UNSIGNED BIG INT", # SQLite specific
|
|
29
45
|
"NUMBER", # Oracle, can be used as an integer with precision and scale
|
|
30
46
|
"NUMERIC", # Generally available in many SQL databases, used with precision and scale
|
|
31
|
-
"SMALLSERIAL",
|
|
32
|
-
"
|
|
47
|
+
"SMALLSERIAL",
|
|
48
|
+
"SERIAL",
|
|
49
|
+
"BIGSERIAL", # PostgreSQL auto-increment types
|
|
50
|
+
"IDENTITY",
|
|
51
|
+
"SMALLIDENTITY",
|
|
52
|
+
"BIGIDENTITY", # SQL Server specific auto-increment types
|
|
33
53
|
"BYTEINT", # Specific to Snowflake, for storing very small integers
|
|
34
54
|
]
|
|
35
55
|
|
|
36
56
|
sql_not_supported_types = [
|
|
37
|
-
"CHAR",
|
|
38
|
-
"
|
|
39
|
-
"
|
|
57
|
+
"CHAR",
|
|
58
|
+
"VARCHAR",
|
|
59
|
+
"TINYTEXT",
|
|
60
|
+
"TEXT",
|
|
61
|
+
"MEDIUMTEXT",
|
|
62
|
+
"LONGTEXT",
|
|
63
|
+
"NCHAR",
|
|
64
|
+
"NVARCHAR",
|
|
65
|
+
"VARCHAR2",
|
|
66
|
+
"NVARCHAR2",
|
|
67
|
+
"CLOB",
|
|
68
|
+
"NCLOB",
|
|
69
|
+
"VARCHAR(MAX)",
|
|
70
|
+
"XML",
|
|
71
|
+
"JSON",
|
|
40
72
|
"BOOLEAN", # PostgreSQL, SQLite, and others with native boolean support
|
|
41
73
|
"TINYINT(1)", # MySQL/MariaDB uses TINYINT(1) to represent boolean values
|
|
42
74
|
"BIT", # SQL Server and others use BIT to represent boolean values, where 1 is true and 0 is false
|
|
@@ -185,7 +217,7 @@ def query_numeric_histogram(task, node, column, column_type, min_value, max_valu
|
|
|
185
217
|
else:
|
|
186
218
|
counts[num_bins - 1] += count
|
|
187
219
|
base_result = {
|
|
188
|
-
|
|
220
|
+
"counts": counts,
|
|
189
221
|
}
|
|
190
222
|
if curr is not None:
|
|
191
223
|
counts = [0] * num_bins
|
|
@@ -199,7 +231,7 @@ def query_numeric_histogram(task, node, column, column_type, min_value, max_valu
|
|
|
199
231
|
else:
|
|
200
232
|
counts[num_bins - 1] += count
|
|
201
233
|
curr_result = {
|
|
202
|
-
|
|
234
|
+
"counts": counts,
|
|
203
235
|
}
|
|
204
236
|
return base_result, curr_result, bin_edges, labels
|
|
205
237
|
|
|
@@ -209,7 +241,7 @@ def query_datetime_histogram(task, node, column, min_value, max_value):
|
|
|
209
241
|
print(max_value, min_value, days_delta)
|
|
210
242
|
# _type = None
|
|
211
243
|
if days_delta > 365 * 4:
|
|
212
|
-
_type =
|
|
244
|
+
_type = "yearly"
|
|
213
245
|
dmin = date(min_value.year, 1, 1)
|
|
214
246
|
if max_value.year < 3000:
|
|
215
247
|
dmax = date(max_value.year, 1, 1) + relativedelta(years=+1)
|
|
@@ -237,7 +269,7 @@ def query_datetime_histogram(task, node, column, min_value, max_value):
|
|
|
237
269
|
else:
|
|
238
270
|
dmax = date(3000, 1, 1)
|
|
239
271
|
period = relativedelta(dmax, dmin)
|
|
240
|
-
num_buckets =
|
|
272
|
+
num_buckets = period.years * 12 + period.months
|
|
241
273
|
bin_edges = [dmin + relativedelta(months=i) for i in range(num_buckets + 1)]
|
|
242
274
|
sql = f"""
|
|
243
275
|
SELECT
|
|
@@ -285,18 +317,18 @@ def query_datetime_histogram(task, node, column, min_value, max_value):
|
|
|
285
317
|
|
|
286
318
|
base_counts = [0] * num_buckets
|
|
287
319
|
print(_type)
|
|
288
|
-
for
|
|
320
|
+
for d, v in base.rows:
|
|
289
321
|
i = bin_edges.index(d.date()) if isinstance(d, datetime) else bin_edges.index(d)
|
|
290
322
|
base_counts[i] = v
|
|
291
323
|
curr_counts = [0] * num_buckets
|
|
292
|
-
for
|
|
324
|
+
for d, v in curr.rows:
|
|
293
325
|
i = bin_edges.index(d.date()) if isinstance(d, datetime) else bin_edges.index(d)
|
|
294
326
|
curr_counts[i] = v
|
|
295
327
|
base_result = {
|
|
296
|
-
|
|
328
|
+
"counts": base_counts,
|
|
297
329
|
}
|
|
298
330
|
curr_result = {
|
|
299
|
-
|
|
331
|
+
"counts": curr_counts,
|
|
300
332
|
}
|
|
301
333
|
|
|
302
334
|
return base_result, curr_result, bin_edges
|
|
@@ -310,6 +342,7 @@ class HistogramDiffTask(Task, QueryMixin):
|
|
|
310
342
|
|
|
311
343
|
def execute(self):
|
|
312
344
|
from recce.adapter.dbt_adapter import DbtAdapter
|
|
345
|
+
|
|
313
346
|
result = {}
|
|
314
347
|
|
|
315
348
|
dbt_adapter: DbtAdapter = default_context().adapter
|
|
@@ -353,29 +386,31 @@ class HistogramDiffTask(Task, QueryMixin):
|
|
|
353
386
|
labels = None
|
|
354
387
|
if min_value is None or max_value is None:
|
|
355
388
|
base_result = {
|
|
356
|
-
|
|
389
|
+
"counts": [],
|
|
357
390
|
}
|
|
358
391
|
current_result = {
|
|
359
|
-
|
|
392
|
+
"counts": [],
|
|
360
393
|
}
|
|
361
394
|
bin_edges = []
|
|
362
395
|
labels = []
|
|
363
396
|
elif column_type.upper() in sql_datetime_types:
|
|
364
397
|
base_result, current_result, bin_edges = query_datetime_histogram(
|
|
365
|
-
self, node, column, min_value, max_value
|
|
398
|
+
self, node, column, min_value, max_value
|
|
399
|
+
)
|
|
366
400
|
else:
|
|
367
401
|
base_result, current_result, bin_edges, labels = query_numeric_histogram(
|
|
368
|
-
self, node, column, column_type, min_value, max_value, num_bins
|
|
402
|
+
self, node, column, column_type, min_value, max_value, num_bins
|
|
403
|
+
)
|
|
369
404
|
if base_result:
|
|
370
|
-
base_result[
|
|
405
|
+
base_result["total"] = base_total
|
|
371
406
|
if current_result:
|
|
372
|
-
current_result[
|
|
373
|
-
result[
|
|
374
|
-
result[
|
|
375
|
-
result[
|
|
376
|
-
result[
|
|
377
|
-
result[
|
|
378
|
-
result[
|
|
407
|
+
current_result["total"] = curr_total
|
|
408
|
+
result["base"] = base_result
|
|
409
|
+
result["current"] = current_result
|
|
410
|
+
result["min"] = min_value
|
|
411
|
+
result["max"] = max_value
|
|
412
|
+
result["bin_edges"] = bin_edges
|
|
413
|
+
result["labels"] = labels
|
|
379
414
|
return result
|
|
380
415
|
|
|
381
416
|
def cancel(self):
|
|
@@ -386,7 +421,7 @@ class HistogramDiffTask(Task, QueryMixin):
|
|
|
386
421
|
|
|
387
422
|
class HistogramDiffTaskResultDiffer(TaskResultDiffer):
|
|
388
423
|
def _check_result_changed_fn(self, result):
|
|
389
|
-
return TaskResultDiffer.diff(result[
|
|
424
|
+
return TaskResultDiffer.diff(result["base"], result["current"])
|
|
390
425
|
|
|
391
426
|
|
|
392
427
|
class HistogramDiffCheckValidator(CheckValidator):
|
recce/tasks/lineage.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Literal, Optional
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
@@ -10,7 +10,7 @@ class LineageDiffParams(BaseModel):
|
|
|
10
10
|
select: Optional[str] = None
|
|
11
11
|
exclude: Optional[str] = None
|
|
12
12
|
packages: Optional[list[str]] = None
|
|
13
|
-
view_mode: Optional[Literal[
|
|
13
|
+
view_mode: Optional[Literal["all", "changed_models"]] = None
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class LineageDiffCheckValidator(CheckValidator):
|