recce-nightly 0.62.0.20250417__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +27 -22
- recce/adapter/base.py +11 -14
- recce/adapter/dbt_adapter/__init__.py +845 -461
- recce/adapter/dbt_adapter/dbt_version.py +3 -0
- recce/adapter/sqlmesh_adapter.py +24 -35
- recce/apis/check_api.py +59 -42
- recce/apis/check_events_api.py +353 -0
- recce/apis/check_func.py +41 -35
- recce/apis/run_api.py +25 -19
- recce/apis/run_func.py +64 -25
- recce/artifact.py +119 -51
- recce/cli.py +1301 -324
- recce/config.py +43 -34
- recce/connect_to_cloud.py +138 -0
- recce/core.py +55 -47
- recce/data/404/index.html +2 -0
- recce/data/404.html +2 -1
- recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
- recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
- recce/data/__next.__PAGE__.txt +6 -0
- recce/data/__next._full.txt +32 -0
- recce/data/__next._head.txt +8 -0
- recce/data/__next._index.txt +14 -0
- recce/data/__next._tree.txt +8 -0
- recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
- recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
- recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
- recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
- recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
- recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
- recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
- recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
- recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
- recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
- recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
- recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
- recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
- recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
- recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
- recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
- recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
- recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
- recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
- recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
- recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
- recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
- recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
- recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
- recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
- recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
- recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
- recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
- recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
- recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
- recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
- recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
- recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
- recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
- recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
- recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
- recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
- recce/data/_not-found/__next._full.txt +24 -0
- recce/data/_not-found/__next._head.txt +8 -0
- recce/data/_not-found/__next._index.txt +13 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +6 -0
- recce/data/_not-found/index.html +2 -0
- recce/data/_not-found/index.txt +24 -0
- recce/data/auth_callback.html +68 -0
- recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/checks/__next._full.txt +39 -0
- recce/data/checks/__next._head.txt +8 -0
- recce/data/checks/__next._index.txt +14 -0
- recce/data/checks/__next._tree.txt +8 -0
- recce/data/checks/__next.checks.__PAGE__.txt +10 -0
- recce/data/checks/__next.checks.txt +4 -0
- recce/data/checks/index.html +2 -0
- recce/data/checks/index.txt +39 -0
- recce/data/imgs/reload-image.svg +4 -0
- recce/data/index.html +2 -27
- recce/data/index.txt +32 -7
- recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/lineage/__next._full.txt +39 -0
- recce/data/lineage/__next._head.txt +8 -0
- recce/data/lineage/__next._index.txt +14 -0
- recce/data/lineage/__next._tree.txt +8 -0
- recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
- recce/data/lineage/__next.lineage.txt +4 -0
- recce/data/lineage/index.html +2 -0
- recce/data/lineage/index.txt +39 -0
- recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/query/__next._full.txt +37 -0
- recce/data/query/__next._head.txt +8 -0
- recce/data/query/__next._index.txt +14 -0
- recce/data/query/__next._tree.txt +8 -0
- recce/data/query/__next.query.__PAGE__.txt +9 -0
- recce/data/query/__next.query.txt +4 -0
- recce/data/query/index.html +2 -0
- recce/data/query/index.txt +37 -0
- recce/diff.py +6 -12
- recce/event/CONFIG.bak +1 -0
- recce/event/__init__.py +86 -74
- recce/event/collector.py +33 -22
- recce/event/track.py +49 -27
- recce/exceptions.py +1 -1
- recce/git.py +7 -7
- recce/github.py +57 -53
- recce/mcp_server.py +725 -0
- recce/models/__init__.py +4 -1
- recce/models/check.py +438 -21
- recce/models/run.py +1 -0
- recce/models/types.py +134 -28
- recce/pull_request.py +27 -25
- recce/run.py +179 -122
- recce/server.py +394 -104
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +644 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +196 -149
- recce/tasks/__init__.py +19 -3
- recce/tasks/core.py +11 -13
- recce/tasks/dataframe.py +82 -18
- recce/tasks/histogram.py +69 -34
- recce/tasks/lineage.py +2 -2
- recce/tasks/profile.py +152 -86
- recce/tasks/query.py +180 -89
- recce/tasks/rowcount.py +37 -31
- recce/tasks/schema.py +18 -15
- recce/tasks/top_k.py +35 -35
- recce/tasks/utils.py +147 -0
- recce/tasks/valuediff.py +247 -155
- recce/util/__init__.py +3 -0
- recce/util/api_token.py +80 -0
- recce/util/breaking.py +105 -100
- recce/util/cll.py +274 -219
- recce/util/cloud/__init__.py +15 -0
- recce/util/cloud/base.py +115 -0
- recce/util/cloud/check_events.py +190 -0
- recce/util/cloud/checks.py +242 -0
- recce/util/io.py +22 -17
- recce/util/lineage.py +65 -16
- recce/util/logger.py +1 -1
- recce/util/onboarding_state.py +45 -0
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +347 -72
- recce/util/singleton.py +4 -4
- recce/util/startup_perf.py +121 -0
- recce/yaml/__init__.py +7 -10
- recce_nightly-1.30.0.20251221.dist-info/METADATA +195 -0
- recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
- {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
- recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
- recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
- recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
- recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
- recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
- recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
- recce/data/_next/static/chunks/500-e51c92a025a51234.js +0 -65
- recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
- recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
- recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
- recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
- recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
- recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
- recce/data/_next/static/chunks/app/page-9adc25782272ed2e.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
- recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
- recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
- recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
- recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
- recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
- recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
- recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
- recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
- recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
- recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/data/_next/static/qiyFlux77VkhxiceAJe_F/_buildManifest.js +0 -1
- recce/state.py +0 -753
- recce_nightly-0.62.0.20250417.dist-info/METADATA +0 -311
- recce_nightly-0.62.0.20250417.dist-info/RECORD +0 -139
- recce_nightly-0.62.0.20250417.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/conftest.py +0 -13
- tests/adapter/dbt_adapter/dbt_test_helper.py +0 -283
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -40
- tests/adapter/dbt_adapter/test_dbt_cll.py +0 -102
- tests/adapter/dbt_adapter/test_selector.py +0 -177
- tests/tasks/__init__.py +0 -0
- tests/tasks/conftest.py +0 -4
- tests/tasks/test_histogram.py +0 -137
- tests/tasks/test_lineage.py +0 -42
- tests/tasks/test_preset_checks.py +0 -50
- tests/tasks/test_profile.py +0 -73
- tests/tasks/test_query.py +0 -151
- tests/tasks/test_row_count.py +0 -116
- tests/tasks/test_schema.py +0 -99
- tests/tasks/test_top_k.py +0 -73
- tests/tasks/test_valuediff.py +0 -74
- tests/test_cli.py +0 -122
- tests/test_config.py +0 -45
- tests/test_core.py +0 -27
- tests/test_dbt.py +0 -36
- tests/test_pull_request.py +0 -130
- tests/test_server.py +0 -98
- tests/test_state.py +0 -123
- tests/test_summary.py +0 -57
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- /recce/data/_next/static/{qiyFlux77VkhxiceAJe_F → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
- {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
- {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
recce/tasks/top_k.py
CHANGED
|
@@ -5,7 +5,7 @@ from pydantic import BaseModel
|
|
|
5
5
|
from recce.core import default_context
|
|
6
6
|
from recce.models import Check
|
|
7
7
|
from recce.tasks import Task
|
|
8
|
-
from recce.tasks.core import
|
|
8
|
+
from recce.tasks.core import CheckValidator, TaskResultDiffer
|
|
9
9
|
from recce.tasks.query import QueryMixin
|
|
10
10
|
|
|
11
11
|
|
|
@@ -33,11 +33,14 @@ class TopKDiffTask(Task, QueryMixin):
|
|
|
33
33
|
UNION ALL
|
|
34
34
|
select count(*), count({{column}}) from {{ curr_relation }}
|
|
35
35
|
"""
|
|
36
|
-
sql = dbt_adapter.generate_sql(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
sql = dbt_adapter.generate_sql(
|
|
37
|
+
sql_template,
|
|
38
|
+
context=dict(
|
|
39
|
+
base_relation=base_relation,
|
|
40
|
+
curr_relation=curr_relation,
|
|
41
|
+
column=column,
|
|
42
|
+
),
|
|
43
|
+
)
|
|
41
44
|
_, table = dbt_adapter.execute(sql, fetch=True)
|
|
42
45
|
|
|
43
46
|
result = (table[0][0], table[0][1], table[1][0], table[1][1])
|
|
@@ -77,13 +80,16 @@ class TopKDiffTask(Task, QueryMixin):
|
|
|
77
80
|
order by curr_count desc, base_count desc
|
|
78
81
|
limit {{k}}
|
|
79
82
|
"""
|
|
80
|
-
sql = dbt_adapter.generate_sql(
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
83
|
+
sql = dbt_adapter.generate_sql(
|
|
84
|
+
sql_template,
|
|
85
|
+
context=dict(
|
|
86
|
+
base_relation=base_relation,
|
|
87
|
+
curr_relation=curr_relation,
|
|
88
|
+
column=column,
|
|
89
|
+
k=k,
|
|
90
|
+
include_null=False,
|
|
91
|
+
),
|
|
92
|
+
)
|
|
87
93
|
_, table = dbt_adapter.execute(sql, fetch=True)
|
|
88
94
|
|
|
89
95
|
categories = []
|
|
@@ -91,7 +97,7 @@ class TopKDiffTask(Task, QueryMixin):
|
|
|
91
97
|
curr_counts = []
|
|
92
98
|
|
|
93
99
|
for row in table:
|
|
94
|
-
categories.append(row[0] if row[0] !=
|
|
100
|
+
categories.append(row[0] if row[0] != "__null__" else None)
|
|
95
101
|
base_counts.append(int(row[1] if row[1] else 0))
|
|
96
102
|
curr_counts.append(int(row[2] if row[2] else 0))
|
|
97
103
|
|
|
@@ -100,6 +106,7 @@ class TopKDiffTask(Task, QueryMixin):
|
|
|
100
106
|
def execute(self):
|
|
101
107
|
|
|
102
108
|
from recce.adapter.dbt_adapter import DbtAdapter
|
|
109
|
+
|
|
103
110
|
dbt_adapter: DbtAdapter = default_context().adapter
|
|
104
111
|
|
|
105
112
|
with dbt_adapter.connection_named("query"):
|
|
@@ -118,33 +125,26 @@ class TopKDiffTask(Task, QueryMixin):
|
|
|
118
125
|
|
|
119
126
|
self.check_cancel()
|
|
120
127
|
categories, base_counts, curr_counts = self._query_top_k(
|
|
121
|
-
dbt_adapter,
|
|
122
|
-
base_relation,
|
|
123
|
-
curr_relation,
|
|
124
|
-
column,
|
|
125
|
-
k
|
|
128
|
+
dbt_adapter, base_relation, curr_relation, column, k
|
|
126
129
|
)
|
|
127
130
|
self.check_cancel()
|
|
128
131
|
|
|
129
132
|
base_total, base_valids, curr_total, curr_valids = self._query_row_count_diff(
|
|
130
|
-
dbt_adapter,
|
|
131
|
-
base_relation,
|
|
132
|
-
curr_relation,
|
|
133
|
-
column
|
|
133
|
+
dbt_adapter, base_relation, curr_relation, column
|
|
134
134
|
)
|
|
135
135
|
|
|
136
136
|
result = {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
137
|
+
"base": {
|
|
138
|
+
"values": categories,
|
|
139
|
+
"counts": base_counts,
|
|
140
|
+
"valids": base_valids,
|
|
141
|
+
"total": base_total,
|
|
142
142
|
},
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
143
|
+
"current": {
|
|
144
|
+
"values": categories,
|
|
145
|
+
"counts": curr_counts,
|
|
146
|
+
"valids": curr_valids,
|
|
147
|
+
"total": curr_total,
|
|
148
148
|
},
|
|
149
149
|
}
|
|
150
150
|
return result
|
|
@@ -157,8 +157,8 @@ class TopKDiffTask(Task, QueryMixin):
|
|
|
157
157
|
|
|
158
158
|
class TopKDiffTaskResultDiffer(TaskResultDiffer):
|
|
159
159
|
def _check_result_changed_fn(self, result):
|
|
160
|
-
base = result.get(
|
|
161
|
-
current = result.get(
|
|
160
|
+
base = result.get("base")
|
|
161
|
+
current = result.get("current")
|
|
162
162
|
|
|
163
163
|
return TaskResultDiffer.diff(base, current)
|
|
164
164
|
|
recce/tasks/utils.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Utility functions for task operations."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from recce.tasks.dataframe import DataFrame
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def strip_identifier_quotes(identifier: str) -> str:
|
|
9
|
+
"""
|
|
10
|
+
Strip SQL identifier quotes from a column name.
|
|
11
|
+
|
|
12
|
+
Different databases use different quoting styles:
|
|
13
|
+
- Double quotes: "column" (PostgreSQL, Snowflake, etc.)
|
|
14
|
+
- Backticks: `column` (MySQL, BigQuery)
|
|
15
|
+
- Square brackets: [column] (SQL Server)
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
identifier: Column name that may be quoted
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Column name with quotes stripped
|
|
22
|
+
|
|
23
|
+
Examples:
|
|
24
|
+
>>> strip_identifier_quotes('"myColumn"')
|
|
25
|
+
'myColumn'
|
|
26
|
+
>>> strip_identifier_quotes('`my_column`')
|
|
27
|
+
'my_column'
|
|
28
|
+
>>> strip_identifier_quotes('[Column Name]')
|
|
29
|
+
'Column Name'
|
|
30
|
+
>>> strip_identifier_quotes('regular_column')
|
|
31
|
+
'regular_column'
|
|
32
|
+
"""
|
|
33
|
+
if not identifier or len(identifier) < 2:
|
|
34
|
+
return identifier
|
|
35
|
+
|
|
36
|
+
# Check for double quotes
|
|
37
|
+
if identifier.startswith('"') and identifier.endswith('"'):
|
|
38
|
+
return identifier[1:-1]
|
|
39
|
+
|
|
40
|
+
# Check for backticks
|
|
41
|
+
if identifier.startswith("`") and identifier.endswith("`"):
|
|
42
|
+
return identifier[1:-1]
|
|
43
|
+
|
|
44
|
+
# Check for square brackets
|
|
45
|
+
if identifier.startswith("[") and identifier.endswith("]"):
|
|
46
|
+
return identifier[1:-1]
|
|
47
|
+
|
|
48
|
+
return identifier
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def normalize_keys_to_columns(
|
|
52
|
+
keys: Optional[List[str]],
|
|
53
|
+
column_keys: List[str],
|
|
54
|
+
) -> Optional[List[str]]:
|
|
55
|
+
"""
|
|
56
|
+
Normalize user-provided keys to match actual column keys from the warehouse.
|
|
57
|
+
|
|
58
|
+
Different warehouses return column names in different cases:
|
|
59
|
+
- Snowflake: UPPERCASE (unless quoted)
|
|
60
|
+
- PostgreSQL/Redshift: lowercase (unless quoted)
|
|
61
|
+
- BigQuery: preserves original case
|
|
62
|
+
|
|
63
|
+
This function first attempts an exact match (for quoted columns that preserve
|
|
64
|
+
case), then falls back to case-insensitive matching to align user input
|
|
65
|
+
with the actual column keys returned by the warehouse.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
keys: User-provided keys (e.g., primary_keys from params)
|
|
69
|
+
column_keys: Actual column keys from the query result
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
List of keys normalized to match column_keys casing,
|
|
73
|
+
or None if keys is None.
|
|
74
|
+
If a key doesn't match any column, it's preserved as-is.
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
>>> normalize_keys_to_columns(["payment_id"], ["PAYMENT_ID", "ORDER_ID"])
|
|
78
|
+
["PAYMENT_ID"]
|
|
79
|
+
|
|
80
|
+
>>> normalize_keys_to_columns(["ID", "NAME"], ["id", "name", "value"])
|
|
81
|
+
["id", "name"]
|
|
82
|
+
|
|
83
|
+
>>> normalize_keys_to_columns(["preCommitID"], ["preCommitID", "order_id"])
|
|
84
|
+
["preCommitID"] # Exact match preserved for quoted columns
|
|
85
|
+
|
|
86
|
+
>>> normalize_keys_to_columns(['"customerID"'], ["customerID", "amount"])
|
|
87
|
+
["customerID"] # Quotes stripped, then matched
|
|
88
|
+
|
|
89
|
+
>>> normalize_keys_to_columns(['`my_column`'], ["MY_COLUMN"])
|
|
90
|
+
["MY_COLUMN"] # Backticks stripped, then case-insensitive match
|
|
91
|
+
"""
|
|
92
|
+
if keys is None:
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
# Strip quotes from all keys first - quotes are for SQL execution,
|
|
96
|
+
# but the frontend should receive unquoted column names
|
|
97
|
+
unquoted_keys = [strip_identifier_quotes(key) for key in keys]
|
|
98
|
+
|
|
99
|
+
if not column_keys:
|
|
100
|
+
return unquoted_keys
|
|
101
|
+
|
|
102
|
+
# Build both exact and case-insensitive lookup maps
|
|
103
|
+
exact_key_set = set(column_keys)
|
|
104
|
+
case_insensitive_map = {col.lower(): col for col in column_keys}
|
|
105
|
+
|
|
106
|
+
normalized = []
|
|
107
|
+
for key in unquoted_keys:
|
|
108
|
+
if key in exact_key_set:
|
|
109
|
+
# Exact match - use as-is (handles quoted columns that preserved case)
|
|
110
|
+
normalized.append(key)
|
|
111
|
+
else:
|
|
112
|
+
# Case-insensitive fallback
|
|
113
|
+
actual_key = case_insensitive_map.get(key.lower())
|
|
114
|
+
normalized.append(actual_key if actual_key is not None else key)
|
|
115
|
+
|
|
116
|
+
return normalized
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def normalize_boolean_flag_columns(df: "DataFrame") -> "DataFrame":
|
|
120
|
+
"""
|
|
121
|
+
Normalize boolean flag columns (in_a, in_b) to lowercase for cross-warehouse consistency.
|
|
122
|
+
|
|
123
|
+
Different warehouses return column names in different cases:
|
|
124
|
+
- Snowflake: IN_A, IN_B (UPPERCASE)
|
|
125
|
+
- PostgreSQL/Redshift: in_a, in_b (lowercase)
|
|
126
|
+
- BigQuery: preserves original case
|
|
127
|
+
|
|
128
|
+
This function ensures these columns are always lowercase in the DataFrame
|
|
129
|
+
sent to the frontend, enabling exact string matching.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
df: DataFrame that may contain IN_A/IN_B columns
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
DataFrame with in_a/in_b columns normalized to lowercase
|
|
136
|
+
"""
|
|
137
|
+
from .dataframe import DataFrame, DataFrameColumn
|
|
138
|
+
|
|
139
|
+
normalized_columns = []
|
|
140
|
+
for col in df.columns:
|
|
141
|
+
key_upper = col.key.upper() if col.key else ""
|
|
142
|
+
if key_upper in ("IN_A", "IN_B"):
|
|
143
|
+
normalized_columns.append(DataFrameColumn(key=col.key.lower(), name=col.name.lower(), type=col.type))
|
|
144
|
+
else:
|
|
145
|
+
normalized_columns.append(col)
|
|
146
|
+
|
|
147
|
+
return DataFrame(columns=normalized_columns, data=df.data, limit=df.limit, more=df.more)
|