recce-nightly 1.4.0.20250519__py3-none-any.whl → 1.5.0.20250521__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/adapter/dbt_adapter/__init__.py +2 -1
- recce/cli.py +40 -23
- recce/data/404.html +1 -1
- recce/data/_next/static/chunks/{778-aef312bffb4c0312.js → 758-5bc37e5174e24ea3.js} +2 -2
- recce/data/_next/static/chunks/9746af58-013fa80917592388.js +1 -0
- recce/data/_next/static/chunks/app/page-4b2bbb416449c209.js +1 -0
- recce/data/index.html +2 -2
- recce/data/index.txt +2 -2
- recce/event/__init__.py +4 -0
- recce/models/__init__.py +3 -0
- recce/tasks/__init__.py +16 -0
- recce/util/__init__.py +3 -0
- recce/util/api_token.py +69 -0
- recce/util/breaking.py +2 -2
- recce/util/cll.py +231 -199
- recce/util/io.py +3 -0
- recce/util/recce_cloud.py +18 -0
- {recce_nightly-1.4.0.20250519.dist-info → recce_nightly-1.5.0.20250521.dist-info}/METADATA +1 -1
- {recce_nightly-1.4.0.20250519.dist-info → recce_nightly-1.5.0.20250521.dist-info}/RECORD +34 -33
- {recce_nightly-1.4.0.20250519.dist-info → recce_nightly-1.5.0.20250521.dist-info}/WHEEL +1 -1
- tests/adapter/dbt_adapter/dbt_test_helper.py +2 -2
- tests/tasks/conftest.py +1 -1
- tests/tasks/test_profile.py +0 -1
- tests/tasks/test_query.py +7 -7
- tests/test_core.py +1 -1
- tests/test_dbt.py +1 -1
- tests/test_server.py +1 -1
- tests/test_state.py +0 -2
- recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
- recce/data/_next/static/chunks/app/page-7086764277331fcb.js +0 -1
- /recce/data/_next/static/{eIqnO-Nu8atPURSm-S7vW → c4FL-WGbZZW6QqfPTZTNc}/_buildManifest.js +0 -0
- /recce/data/_next/static/{eIqnO-Nu8atPURSm-S7vW → c4FL-WGbZZW6QqfPTZTNc}/_ssgManifest.js +0 -0
- {recce_nightly-1.4.0.20250519.dist-info → recce_nightly-1.5.0.20250521.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.4.0.20250519.dist-info → recce_nightly-1.5.0.20250521.dist-info}/licenses/LICENSE +0 -0
- {recce_nightly-1.4.0.20250519.dist-info → recce_nightly-1.5.0.20250521.dist-info}/top_level.txt +0 -0
recce/util/cll.py
CHANGED
|
@@ -1,21 +1,10 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from typing import Dict, List, Literal
|
|
3
|
+
from typing import Dict, List, Literal, Optional
|
|
4
4
|
|
|
5
|
+
import sqlglot.expressions as exp
|
|
5
6
|
from sqlglot import Dialect, parse_one
|
|
6
7
|
from sqlglot.errors import OptimizeError, SqlglotError
|
|
7
|
-
from sqlglot.expressions import (
|
|
8
|
-
Alias,
|
|
9
|
-
Binary,
|
|
10
|
-
Case,
|
|
11
|
-
Column,
|
|
12
|
-
Expression,
|
|
13
|
-
Func,
|
|
14
|
-
If,
|
|
15
|
-
Intersect,
|
|
16
|
-
Paren,
|
|
17
|
-
Union,
|
|
18
|
-
)
|
|
19
8
|
from sqlglot.optimizer import Scope, traverse_scope
|
|
20
9
|
from sqlglot.optimizer.qualify import qualify
|
|
21
10
|
|
|
@@ -91,94 +80,231 @@ class ColumnLevelDependencyColumn:
|
|
|
91
80
|
depends_on: List[ColumnLevelDependsOn]
|
|
92
81
|
|
|
93
82
|
|
|
94
|
-
|
|
83
|
+
@dataclass()
|
|
84
|
+
class CllResult:
|
|
85
|
+
# Model to column dependencies
|
|
86
|
+
depends_on: List[ColumnLevelDependsOn]
|
|
87
|
+
|
|
88
|
+
# Column to column dependencies
|
|
89
|
+
columns: Dict[str, ColumnLevelDependencyColumn]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _cll_column(proj, table_alias_map) -> ColumnLevelDependencyColumn:
|
|
95
93
|
# given an expression, return the columns depends on
|
|
96
94
|
# [{node: table, column: column}, ...]
|
|
95
|
+
type = "source"
|
|
96
|
+
depends_on: List[ColumnLevelDependsOn] = []
|
|
97
|
+
|
|
98
|
+
# instance of Column
|
|
99
|
+
if isinstance(proj, exp.Alias):
|
|
100
|
+
# 'select a as b'
|
|
101
|
+
# 'select CURRENT_TIMESTAMP() as create_at'
|
|
102
|
+
root = proj.this
|
|
103
|
+
|
|
104
|
+
for expression in root.walk(bfs=False):
|
|
105
|
+
if isinstance(expression, exp.Column):
|
|
106
|
+
column = expression
|
|
107
|
+
alias = column.table
|
|
108
|
+
|
|
109
|
+
if alias is None:
|
|
110
|
+
table = next(iter(table_alias_map.values()))
|
|
111
|
+
else:
|
|
112
|
+
table = table_alias_map.get(alias, alias)
|
|
113
|
+
depends_on.append(ColumnLevelDependsOn(table, column.name))
|
|
114
|
+
if type == "source":
|
|
115
|
+
type = "passthrough"
|
|
116
|
+
elif isinstance(expression, (exp.Paren, exp.Identifier)):
|
|
117
|
+
pass
|
|
118
|
+
else:
|
|
119
|
+
type = "derived"
|
|
120
|
+
|
|
121
|
+
depends_on = _dedeup_depends_on(depends_on)
|
|
122
|
+
|
|
123
|
+
if len(depends_on) == 0:
|
|
124
|
+
type = "source"
|
|
125
|
+
|
|
126
|
+
if isinstance(proj, exp.Alias):
|
|
127
|
+
alias = proj
|
|
128
|
+
if type == "passthrough" and depends_on[0].column != alias.alias_or_name:
|
|
129
|
+
type = "renamed"
|
|
130
|
+
|
|
131
|
+
return ColumnLevelDependencyColumn(type=type, depends_on=depends_on)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def cll_old(sql, schema=None, dialect=None) -> Dict[str, ColumnLevelDependencyColumn]:
|
|
135
|
+
result = cll(sql, schema=schema, dialect=dialect)
|
|
136
|
+
return result.columns
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _dedeup_depends_on(depends_on: List[ColumnLevelDependsOn]) -> List[ColumnLevelDependsOn]:
|
|
140
|
+
# deduplicate the depends_on list
|
|
141
|
+
dedup_set = set()
|
|
142
|
+
dedup_list = []
|
|
143
|
+
for col_dep in depends_on:
|
|
144
|
+
node_col = col_dep.node + "." + col_dep.column
|
|
145
|
+
if node_col not in dedup_set:
|
|
146
|
+
dedup_list.append(col_dep)
|
|
147
|
+
dedup_set.add(node_col)
|
|
148
|
+
return dedup_list
|
|
97
149
|
|
|
98
|
-
if isinstance(expression, Column):
|
|
99
|
-
column = expression
|
|
100
|
-
alias = column.table
|
|
101
150
|
|
|
102
|
-
|
|
103
|
-
|
|
151
|
+
def _dedeup_cll_result(cll_result: CllResult):
|
|
152
|
+
cll_result.depends_on = _dedeup_depends_on(cll_result.depends_on)
|
|
153
|
+
for column in cll_result.columns.values():
|
|
154
|
+
column.depends_on = _dedeup_depends_on(column.depends_on)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _cll_set_scope(scope: Scope, scope_cll_map: dict[Scope, CllResult]) -> CllResult:
|
|
158
|
+
result = CllResult(depends_on=[], columns={})
|
|
159
|
+
scope_lineage = result.columns
|
|
160
|
+
|
|
161
|
+
for union_scope in scope.union_scopes:
|
|
162
|
+
sub_scope_result = scope_cll_map.get(union_scope)
|
|
163
|
+
|
|
164
|
+
for k, v in sub_scope_result.columns.items():
|
|
165
|
+
if k not in result.columns:
|
|
166
|
+
scope_lineage[k] = v
|
|
167
|
+
else:
|
|
168
|
+
scope_lineage[k].depends_on.extend(v.depends_on)
|
|
169
|
+
scope_lineage[k].type = "derived"
|
|
170
|
+
|
|
171
|
+
result.depends_on.extend(sub_scope_result.depends_on)
|
|
172
|
+
return result
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _cll_select_scope(scope: Scope, scope_cll_map: dict[Scope, CllResult]) -> CllResult:
|
|
176
|
+
assert scope.expression.key == "select"
|
|
177
|
+
|
|
178
|
+
column_dep_map = {}
|
|
179
|
+
model_depends_on = []
|
|
180
|
+
table_alias_map = {t.alias_or_name: t.name for t in scope.tables}
|
|
181
|
+
select = scope.expression
|
|
182
|
+
|
|
183
|
+
def source_column_dependency(ref_column: exp.Column) -> Optional[ColumnLevelDependencyColumn]:
|
|
184
|
+
column_name = ref_column.name
|
|
185
|
+
table_name = ref_column.table if ref_column.table != "" else next(iter(table_alias_map.values()))
|
|
186
|
+
source = scope.sources.get(table_name, None) # type: exp.Table | Scope
|
|
187
|
+
if isinstance(source, Scope):
|
|
188
|
+
ref_cll = scope_cll_map.get(source)
|
|
189
|
+
if ref_cll is None:
|
|
190
|
+
return None
|
|
191
|
+
return ref_cll.columns.get(column_name)
|
|
192
|
+
elif isinstance(source, exp.Table):
|
|
193
|
+
return ColumnLevelDependencyColumn(
|
|
194
|
+
type="passthrough", depends_on=[ColumnLevelDependsOn(source.name, column_name)]
|
|
195
|
+
)
|
|
104
196
|
else:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
if
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
if
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
#
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
for proj in scope.expression.selects:
|
|
200
|
+
type = "source"
|
|
201
|
+
column_depends_on: List[ColumnLevelDependsOn] = []
|
|
202
|
+
root = proj.this if isinstance(proj, exp.Alias) else proj
|
|
203
|
+
for expression in root.walk(bfs=False):
|
|
204
|
+
if isinstance(expression, exp.Column):
|
|
205
|
+
ref_column_dependency = source_column_dependency(expression)
|
|
206
|
+
if ref_column_dependency is not None:
|
|
207
|
+
column_depends_on.extend(ref_column_dependency.depends_on)
|
|
208
|
+
if ref_column_dependency.type == "derived":
|
|
209
|
+
type = "derived"
|
|
210
|
+
elif ref_column_dependency.type == "renamed":
|
|
211
|
+
if type == "source" or type == "passthrough":
|
|
212
|
+
type = "renamed"
|
|
213
|
+
elif ref_column_dependency.type == "passthrough":
|
|
214
|
+
if type == "source":
|
|
215
|
+
type = "passthrough"
|
|
216
|
+
else:
|
|
217
|
+
column_depends_on.append(ColumnLevelDependsOn(expression.table, expression.name))
|
|
218
|
+
if type == "source":
|
|
219
|
+
type = "passthrough"
|
|
220
|
+
|
|
221
|
+
elif isinstance(expression, (exp.Paren, exp.Identifier)):
|
|
222
|
+
pass
|
|
223
|
+
else:
|
|
224
|
+
type = "derived"
|
|
225
|
+
|
|
226
|
+
column_depends_on = _dedeup_depends_on(column_depends_on)
|
|
227
|
+
|
|
228
|
+
if len(column_depends_on) == 0 and type != "source":
|
|
229
|
+
type = "source"
|
|
230
|
+
|
|
231
|
+
if isinstance(proj, exp.Alias):
|
|
232
|
+
alias = proj
|
|
233
|
+
if type == "passthrough" and column_depends_on[0].column != alias.alias_or_name:
|
|
234
|
+
type = "renamed"
|
|
235
|
+
|
|
236
|
+
column_dep_map[proj.alias_or_name] = ColumnLevelDependencyColumn(type=type, depends_on=column_depends_on)
|
|
237
|
+
|
|
238
|
+
def selected_column_dependency(ref_column: exp.Column) -> Optional[ColumnLevelDependencyColumn]:
|
|
239
|
+
column_name = ref_column.name
|
|
240
|
+
return column_dep_map.get(column_name)
|
|
241
|
+
|
|
242
|
+
# joins clause: Reference the source columns
|
|
243
|
+
if select.args.get("joins"):
|
|
244
|
+
joins = select.args.get("joins")
|
|
245
|
+
for join in joins:
|
|
246
|
+
if isinstance(join, exp.Join):
|
|
247
|
+
for ref_column in join.find_all(exp.Column):
|
|
248
|
+
if source_column_dependency(ref_column) is not None:
|
|
249
|
+
model_depends_on.extend(source_column_dependency(ref_column).depends_on)
|
|
250
|
+
|
|
251
|
+
# where clauses: Reference the source columns
|
|
252
|
+
if select.args.get("where"):
|
|
253
|
+
where = select.args.get("where")
|
|
254
|
+
if isinstance(where, exp.Where):
|
|
255
|
+
for ref_column in where.find_all(exp.Column):
|
|
256
|
+
if source_column_dependency(ref_column) is not None:
|
|
257
|
+
model_depends_on.extend(source_column_dependency(ref_column).depends_on)
|
|
258
|
+
|
|
259
|
+
# group by clause: Reference the source columns, column index
|
|
260
|
+
if select.args.get("group"):
|
|
261
|
+
group = select.args.get("group")
|
|
262
|
+
if isinstance(group, exp.Group):
|
|
263
|
+
for ref_column in group.find_all(exp.Column):
|
|
264
|
+
if source_column_dependency(ref_column) is not None:
|
|
265
|
+
model_depends_on.extend(source_column_dependency(ref_column).depends_on)
|
|
266
|
+
|
|
267
|
+
# having clause: Reference the source columns, selected columns
|
|
268
|
+
if select.args.get("having"):
|
|
269
|
+
having = select.args.get("having")
|
|
270
|
+
if isinstance(having, exp.Having):
|
|
271
|
+
for ref_column in having.find_all(exp.Column):
|
|
272
|
+
if source_column_dependency(ref_column) is not None:
|
|
273
|
+
model_depends_on.extend(source_column_dependency(ref_column).depends_on)
|
|
274
|
+
elif selected_column_dependency(ref_column) is not None:
|
|
275
|
+
model_depends_on.extend(selected_column_dependency(ref_column).depends_on)
|
|
276
|
+
|
|
277
|
+
# order by clause: Reference the source columns, selected columns, column index
|
|
278
|
+
if select.args.get("order"):
|
|
279
|
+
order = select.args.get("order")
|
|
280
|
+
if isinstance(order, exp.Order):
|
|
281
|
+
for ref_column in order.find_all(exp.Column):
|
|
282
|
+
if source_column_dependency(ref_column) is not None:
|
|
283
|
+
model_depends_on.extend(source_column_dependency(ref_column).depends_on)
|
|
284
|
+
elif selected_column_dependency(ref_column) is not None:
|
|
285
|
+
model_depends_on.extend(selected_column_dependency(ref_column).depends_on)
|
|
286
|
+
|
|
287
|
+
for source in scope.sources.values():
|
|
288
|
+
scope_result = scope_cll_map.get(source)
|
|
289
|
+
if scope_result is not None:
|
|
290
|
+
model_depends_on.extend(scope_result.depends_on)
|
|
291
|
+
|
|
292
|
+
model_depends_on = _dedeup_depends_on(model_depends_on)
|
|
293
|
+
|
|
294
|
+
return CllResult(columns=column_dep_map, depends_on=model_depends_on)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def cll(sql, schema=None, dialect=None) -> CllResult:
|
|
298
|
+
# given a sql, return the cll for the sql
|
|
176
299
|
# {
|
|
177
|
-
#
|
|
178
|
-
#
|
|
179
|
-
#
|
|
180
|
-
#
|
|
181
|
-
#
|
|
300
|
+
# 'depends_on': [{'node': 'model_id', 'column': 'column'}],
|
|
301
|
+
# 'columns': {
|
|
302
|
+
# 'column1': {
|
|
303
|
+
# 'type': 'derived',
|
|
304
|
+
# 'depends_on': [{'node': 'model_id', 'column': 'column'}],
|
|
305
|
+
# }
|
|
306
|
+
# }
|
|
307
|
+
# }
|
|
182
308
|
|
|
183
309
|
dialect = Dialect.get(dialect) if dialect is not None else None
|
|
184
310
|
|
|
@@ -194,111 +320,17 @@ def cll(sql, schema=None, dialect=None) -> Dict[str, ColumnLevelDependencyColumn
|
|
|
194
320
|
except SqlglotError as e:
|
|
195
321
|
raise RecceException(f"Failed to qualify SQL: {str(e)}")
|
|
196
322
|
|
|
197
|
-
result = {}
|
|
198
|
-
|
|
323
|
+
result = CllResult(depends_on=[], columns={})
|
|
324
|
+
scope_cll_map = {}
|
|
199
325
|
for scope in traverse_scope(expression):
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
for union_scope in scope.union_scopes:
|
|
206
|
-
for k, v in global_lineage[union_scope].items():
|
|
207
|
-
if k not in scope_lineage:
|
|
208
|
-
scope_lineage[k] = v
|
|
209
|
-
else:
|
|
210
|
-
scope_lineage[k].depends_on.extend(v.depends_on)
|
|
211
|
-
scope_lineage[k].type = "derived"
|
|
326
|
+
scope_type = scope.expression.key
|
|
327
|
+
if scope_type == "union" or scope_type == "intersect" or scope_type == "except":
|
|
328
|
+
result = _cll_set_scope(scope, scope_cll_map)
|
|
329
|
+
elif scope_type == "select":
|
|
330
|
+
result = _cll_select_scope(scope, scope_cll_map)
|
|
212
331
|
else:
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
# 'select a'
|
|
217
|
-
column = select
|
|
218
|
-
column_cll = _cll_expression(column, table_alias_map)
|
|
219
|
-
elif isinstance(select, Alias):
|
|
220
|
-
# 'select a as b'
|
|
221
|
-
# 'select CURRENT_TIMESTAMP() as create_at'
|
|
222
|
-
alias = select
|
|
223
|
-
col_expression = alias.this
|
|
224
|
-
column_cll = _cll_expression(col_expression, table_alias_map)
|
|
225
|
-
if (
|
|
226
|
-
column_cll
|
|
227
|
-
and column_cll.type == "passthrough"
|
|
228
|
-
and column_cll.depends_on[0].column != alias.alias_or_name
|
|
229
|
-
):
|
|
230
|
-
column_cll.type = "renamed"
|
|
231
|
-
else:
|
|
232
|
-
# 'select 1'
|
|
233
|
-
column_cll = ColumnLevelDependencyColumn(type="source", depends_on=[])
|
|
234
|
-
|
|
235
|
-
cte_type = None
|
|
236
|
-
flatten_col_depends_on = []
|
|
237
|
-
for col_dep in column_cll.depends_on:
|
|
238
|
-
col_dep_node = col_dep.node
|
|
239
|
-
col_dep_column = col_dep.column
|
|
240
|
-
# cte
|
|
241
|
-
cte_scope = scope.cte_sources.get(col_dep_node)
|
|
242
|
-
# inline derived table
|
|
243
|
-
source_scope = None
|
|
244
|
-
if isinstance(scope.sources.get(col_dep_node), Scope):
|
|
245
|
-
source_scope = scope.sources.get(col_dep_node)
|
|
246
|
-
|
|
247
|
-
if cte_scope is not None:
|
|
248
|
-
cte_cll = global_lineage[cte_scope]
|
|
249
|
-
if cte_cll is None or cte_cll.get(col_dep_column) is None:
|
|
250
|
-
# In dbt-duckdb, the external source is compiled as `read_csv('..') rather than a table.
|
|
251
|
-
continue
|
|
252
|
-
cte_type = cte_cll.get(col_dep_column).type
|
|
253
|
-
flatten_col_depends_on.extend(cte_cll.get(col_dep_column).depends_on)
|
|
254
|
-
elif source_scope is not None:
|
|
255
|
-
source_cll = global_lineage[source_scope]
|
|
256
|
-
if source_cll is None or source_cll.get(col_dep_column) is None:
|
|
257
|
-
continue
|
|
258
|
-
flatten_col_depends_on.extend(source_cll.get(col_dep_column).depends_on)
|
|
259
|
-
else:
|
|
260
|
-
flatten_col_depends_on.append(col_dep)
|
|
261
|
-
|
|
262
|
-
# deduplicate
|
|
263
|
-
dedup_col_depends_on = []
|
|
264
|
-
dedup_set = set()
|
|
265
|
-
for col_dep in flatten_col_depends_on:
|
|
266
|
-
node_col = col_dep.node + "." + col_dep.column
|
|
267
|
-
if node_col not in dedup_set:
|
|
268
|
-
dedup_col_depends_on.append(col_dep)
|
|
269
|
-
dedup_set.add(node_col)
|
|
270
|
-
|
|
271
|
-
# transformation type
|
|
272
|
-
type = column_cll.type
|
|
273
|
-
if type == "derived":
|
|
274
|
-
if len(dedup_col_depends_on) == 0:
|
|
275
|
-
type = "source"
|
|
276
|
-
else:
|
|
277
|
-
# keep current scope type
|
|
278
|
-
pass
|
|
279
|
-
elif cte_type is not None:
|
|
280
|
-
if len(dedup_col_depends_on) > 1:
|
|
281
|
-
type = "derived"
|
|
282
|
-
elif len(dedup_col_depends_on) == 0:
|
|
283
|
-
type = "source"
|
|
284
|
-
else:
|
|
285
|
-
if isinstance(select, Column):
|
|
286
|
-
type = cte_type
|
|
287
|
-
elif isinstance(select, Alias):
|
|
288
|
-
alias = select
|
|
289
|
-
if column_cll.depends_on[0].column == alias.alias_or_name:
|
|
290
|
-
type = cte_type
|
|
291
|
-
else:
|
|
292
|
-
type = "renamed" if cte_type == "passthrough" else cte_type
|
|
293
|
-
else:
|
|
294
|
-
type = "source"
|
|
295
|
-
|
|
296
|
-
scope_lineage[select.alias_or_name] = ColumnLevelDependencyColumn(
|
|
297
|
-
type=type, depends_on=dedup_col_depends_on
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
global_lineage[scope] = scope_lineage
|
|
301
|
-
if not scope.is_cte:
|
|
302
|
-
result = scope_lineage
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
scope_cll_map[scope] = result
|
|
303
335
|
|
|
304
336
|
return result
|
recce/util/io.py
CHANGED
|
@@ -68,6 +68,9 @@ class ZipFileIO(AbstractFileIO, ABC):
|
|
|
68
68
|
def _is_pyminizip_installed():
|
|
69
69
|
try:
|
|
70
70
|
import pyminizip
|
|
71
|
+
|
|
72
|
+
# Use the module to avoid F401
|
|
73
|
+
return pyminizip is not None
|
|
71
74
|
except ImportError:
|
|
72
75
|
raise ImportError("pyminizip is not installed. Please install it using `pip install pyminizip`")
|
|
73
76
|
|
recce/util/recce_cloud.py
CHANGED
|
@@ -8,6 +8,7 @@ import requests
|
|
|
8
8
|
from recce.pull_request import PullRequestInfo
|
|
9
9
|
|
|
10
10
|
RECCE_CLOUD_API_HOST = os.environ.get("RECCE_CLOUD_API_HOST", "https://cloud.datarecce.io")
|
|
11
|
+
RECCE_CLOUD_BASE_URL = os.environ.get("RECCE_CLOUD_BASE_URL", RECCE_CLOUD_API_HOST)
|
|
11
12
|
|
|
12
13
|
logger = logging.getLogger("uvicorn")
|
|
13
14
|
|
|
@@ -31,13 +32,30 @@ class RecceCloudException(Exception):
|
|
|
31
32
|
|
|
32
33
|
class RecceCloud:
|
|
33
34
|
def __init__(self, token: str):
|
|
35
|
+
if token is None:
|
|
36
|
+
raise ValueError("Token cannot be None.")
|
|
34
37
|
self.token = token
|
|
38
|
+
self.token_type = "github_token" if token.startswith(
|
|
39
|
+
("ghp_", "gho_", "ghu_", "ghs_", "ghr_")) else "api_token"
|
|
35
40
|
self.base_url = f"{RECCE_CLOUD_API_HOST}/api/v1"
|
|
36
41
|
|
|
37
42
|
def _request(self, method, url, **kwargs):
|
|
38
43
|
headers = {"Authorization": f"Bearer {self.token}"}
|
|
39
44
|
return requests.request(method, url, headers=headers, **kwargs)
|
|
40
45
|
|
|
46
|
+
def verify_token(self) -> bool:
|
|
47
|
+
if self.token_type == "github_token":
|
|
48
|
+
return True
|
|
49
|
+
# Verify the Recce Cloud API token
|
|
50
|
+
api_url = f"{self.base_url}/verify-token"
|
|
51
|
+
try:
|
|
52
|
+
response = self._request("GET", api_url)
|
|
53
|
+
if response.status_code == 200:
|
|
54
|
+
return True
|
|
55
|
+
except Exception:
|
|
56
|
+
pass
|
|
57
|
+
return False
|
|
58
|
+
|
|
41
59
|
def get_presigned_url(
|
|
42
60
|
self,
|
|
43
61
|
method: PresignedUrlMethod,
|