recce-nightly 1.3.0.20250507__py3-none-any.whl → 1.4.0.20250514__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (93) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +22 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +355 -316
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +39 -28
  8. recce/apis/check_func.py +33 -27
  9. recce/apis/run_api.py +25 -19
  10. recce/apis/run_func.py +29 -23
  11. recce/artifact.py +44 -49
  12. recce/cli.py +484 -285
  13. recce/config.py +42 -33
  14. recce/core.py +52 -44
  15. recce/data/404.html +1 -1
  16. recce/data/_next/static/chunks/{368-7587b306577df275.js → 778-aef312bffb4c0312.js} +15 -15
  17. recce/data/_next/static/chunks/8d700b6a.ed11a130057c7a47.js +1 -0
  18. recce/data/_next/static/chunks/app/layout-c713a2829d3279e4.js +1 -0
  19. recce/data/_next/static/chunks/app/page-7086764277331fcb.js +1 -0
  20. recce/data/_next/static/chunks/{cd9f8d63-cf0d5a7b0f7a92e8.js → cd9f8d63-e020f408095ed77c.js} +3 -3
  21. recce/data/_next/static/chunks/webpack-b787cb1a4f2293de.js +1 -0
  22. recce/data/_next/static/css/88b8abc134cfd59a.css +3 -0
  23. recce/data/index.html +2 -2
  24. recce/data/index.txt +2 -2
  25. recce/diff.py +6 -12
  26. recce/event/__init__.py +74 -72
  27. recce/event/collector.py +27 -20
  28. recce/event/track.py +39 -27
  29. recce/exceptions.py +1 -1
  30. recce/git.py +7 -7
  31. recce/github.py +57 -53
  32. recce/models/__init__.py +1 -1
  33. recce/models/check.py +6 -7
  34. recce/models/run.py +1 -0
  35. recce/models/types.py +27 -27
  36. recce/pull_request.py +26 -24
  37. recce/run.py +148 -111
  38. recce/server.py +103 -89
  39. recce/state.py +209 -177
  40. recce/summary.py +168 -143
  41. recce/tasks/__init__.py +3 -3
  42. recce/tasks/core.py +11 -13
  43. recce/tasks/dataframe.py +19 -17
  44. recce/tasks/histogram.py +69 -34
  45. recce/tasks/lineage.py +2 -2
  46. recce/tasks/profile.py +147 -86
  47. recce/tasks/query.py +139 -87
  48. recce/tasks/rowcount.py +33 -30
  49. recce/tasks/schema.py +14 -14
  50. recce/tasks/top_k.py +35 -35
  51. recce/tasks/valuediff.py +216 -152
  52. recce/util/breaking.py +77 -84
  53. recce/util/cll.py +55 -51
  54. recce/util/io.py +19 -17
  55. recce/util/logger.py +1 -1
  56. recce/util/recce_cloud.py +70 -72
  57. recce/util/singleton.py +4 -4
  58. recce/yaml/__init__.py +7 -10
  59. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/METADATA +5 -2
  60. recce_nightly-1.4.0.20250514.dist-info/RECORD +143 -0
  61. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/WHEEL +1 -1
  62. tests/adapter/dbt_adapter/conftest.py +1 -0
  63. tests/adapter/dbt_adapter/dbt_test_helper.py +28 -18
  64. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
  65. tests/adapter/dbt_adapter/test_dbt_cll.py +39 -32
  66. tests/adapter/dbt_adapter/test_selector.py +22 -21
  67. tests/tasks/test_histogram.py +58 -66
  68. tests/tasks/test_lineage.py +36 -23
  69. tests/tasks/test_preset_checks.py +45 -31
  70. tests/tasks/test_profile.py +340 -15
  71. tests/tasks/test_query.py +40 -40
  72. tests/tasks/test_row_count.py +65 -46
  73. tests/tasks/test_schema.py +65 -42
  74. tests/tasks/test_top_k.py +22 -18
  75. tests/tasks/test_valuediff.py +43 -32
  76. tests/test_cli.py +71 -58
  77. tests/test_config.py +7 -9
  78. tests/test_core.py +5 -3
  79. tests/test_dbt.py +7 -7
  80. tests/test_pull_request.py +1 -1
  81. tests/test_server.py +19 -13
  82. tests/test_state.py +40 -27
  83. tests/test_summary.py +18 -14
  84. recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
  85. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  86. recce/data/_next/static/chunks/app/page-92f13c8fad9fae3d.js +0 -1
  87. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  88. recce_nightly-1.3.0.20250507.dist-info/RECORD +0 -142
  89. /recce/data/_next/static/{K5iKlCYhdcpq8Ea6ck9J_ → E_HPXsXdrqHg2YEHmU3mK}/_buildManifest.js +0 -0
  90. /recce/data/_next/static/{K5iKlCYhdcpq8Ea6ck9J_ → E_HPXsXdrqHg2YEHmU3mK}/_ssgManifest.js +0 -0
  91. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/entry_points.txt +0 -0
  92. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/licenses/LICENSE +0 -0
  93. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/top_level.txt +0 -0
recce/util/breaking.py CHANGED
@@ -3,15 +3,15 @@ from dataclasses import dataclass
3
3
  from typing import Optional
4
4
 
5
5
  import sqlglot.expressions as exp
6
- from sqlglot import parse_one, Dialect
6
+ from sqlglot import Dialect, parse_one
7
7
  from sqlglot.errors import SqlglotError
8
- from sqlglot.optimizer import traverse_scope, Scope
8
+ from sqlglot.optimizer import Scope, traverse_scope
9
9
  from sqlglot.optimizer.qualify import qualify
10
10
 
11
- from recce.models.types import NodeChange, ChangeStatus
11
+ from recce.models.types import ChangeStatus, NodeChange
12
12
 
13
- CHANGE_CATEGORY_UNKNOWN = NodeChange(category='unknown')
14
- CHANGE_CATEGORY_BREAKING = NodeChange(category='breaking')
13
+ CHANGE_CATEGORY_UNKNOWN = NodeChange(category="unknown")
14
+ CHANGE_CATEGORY_BREAKING = NodeChange(category="breaking")
15
15
 
16
16
 
17
17
  @dataclass
@@ -48,11 +48,11 @@ class BreakingPerformanceTracking:
48
48
 
49
49
  def to_dict(self):
50
50
  return {
51
- 'lineage_diff_elapsed_ms': self.lineage_diff_elapsed,
52
- 'modified_nodes': self.modified_nodes,
53
- 'sqlglot_error_nodes': self.sqlglot_error_nodes,
54
- 'other_error_nodes': self.other_error_nodes,
55
- 'checkpoints': self.checkpoints,
51
+ "lineage_diff_elapsed_ms": self.lineage_diff_elapsed,
52
+ "modified_nodes": self.modified_nodes,
53
+ "sqlglot_error_nodes": self.sqlglot_error_nodes,
54
+ "other_error_nodes": self.other_error_nodes,
55
+ "checkpoints": self.checkpoints,
56
56
  }
57
57
 
58
58
  def reset(self):
@@ -64,33 +64,29 @@ class BreakingPerformanceTracking:
64
64
  self.checkpoints = {}
65
65
 
66
66
 
67
- def _diff_select_scope(
68
- old_scope: Scope,
69
- new_scope: Scope,
70
- scope_changes_map: dict[Scope, NodeChange]
71
- ) -> NodeChange:
72
- assert old_scope.expression.key == 'select'
73
- assert new_scope.expression.key == 'select'
67
+ def _diff_select_scope(old_scope: Scope, new_scope: Scope, scope_changes_map: dict[Scope, NodeChange]) -> NodeChange:
68
+ assert old_scope.expression.key == "select"
69
+ assert new_scope.expression.key == "select"
74
70
 
75
- change_category = 'non_breaking'
71
+ change_category = "non_breaking"
76
72
  changed_columns = {}
77
73
 
78
74
  # check if the upstream scopes is not breaking
79
75
  for source_name, source in new_scope.sources.items():
80
76
  if scope_changes_map.get(source) is not None:
81
77
  chanage = scope_changes_map[source]
82
- if chanage.category == 'breaking':
83
- change_category = 'breaking'
78
+ if chanage.category == "breaking":
79
+ change_category = "breaking"
84
80
 
85
81
  # check if non-select expressions are the same
86
82
  old_select = old_scope.expression # type: exp.Select
87
83
  new_select = new_scope.expression # type: exp.Select
88
84
  for arg_key in old_select.args.keys() | new_select.args.keys():
89
- if arg_key in ['expressions', 'with', 'from']:
85
+ if arg_key in ["expressions", "with", "from"]:
90
86
  continue
91
87
 
92
88
  if old_select.args.get(arg_key) != new_select.args.get(arg_key):
93
- change_category = 'breaking'
89
+ change_category = "breaking"
94
90
 
95
91
  def source_column_change_status(ref_column: exp.Column) -> Optional[ChangeStatus]:
96
92
  table_name = ref_column.table
@@ -108,9 +104,10 @@ def _diff_select_scope(
108
104
  # selects
109
105
  old_column_map = {projection.alias_or_name: projection for projection in old_select.selects}
110
106
  new_column_map = {projection.alias_or_name: projection for projection in new_select.selects}
111
- is_distinct = new_select.args.get('distinct') is not None
107
+ is_distinct = new_select.args.get("distinct") is not None
108
+
109
+ for column_name in old_column_map.keys() | new_column_map.keys():
112
110
 
113
- for column_name in (old_column_map.keys() | new_column_map.keys()):
114
111
  def _has_udtf(expr: exp.Expression) -> bool:
115
112
  return expr.find(exp.UDTF) is not None
116
113
 
@@ -124,116 +121,112 @@ def _diff_select_scope(
124
121
  new_column = new_column_map.get(column_name)
125
122
  if old_column is None:
126
123
  if is_distinct:
127
- change_category = 'breaking'
124
+ change_category = "breaking"
128
125
  elif _has_udtf(new_column):
129
- change_category = 'breaking'
126
+ change_category = "breaking"
130
127
 
131
- changed_columns[column_name] = 'added'
128
+ changed_columns[column_name] = "added"
132
129
  elif new_column is None:
133
130
  if is_distinct:
134
- change_category = 'breaking'
131
+ change_category = "breaking"
135
132
  elif _has_udtf(old_column):
136
- change_category = 'breaking'
133
+ change_category = "breaking"
137
134
 
138
- changed_columns[column_name] = 'removed'
139
- if change_category != 'breaking':
140
- change_category = 'partial_breaking'
135
+ changed_columns[column_name] = "removed"
136
+ if change_category != "breaking":
137
+ change_category = "partial_breaking"
141
138
  elif old_column != new_column:
142
139
  if is_distinct:
143
- change_category = 'breaking'
140
+ change_category = "breaking"
144
141
  elif _has_udtf(old_column) and _has_udtf(new_column):
145
- change_category = 'breaking'
142
+ change_category = "breaking"
146
143
  elif _has_aggregate(old_column) != _has_aggregate(new_column):
147
- change_category = 'breaking'
144
+ change_category = "breaking"
148
145
 
149
- changed_columns[column_name] = 'modified'
150
- if change_category != 'breaking':
151
- change_category = 'partial_breaking'
146
+ changed_columns[column_name] = "modified"
147
+ if change_category != "breaking":
148
+ change_category = "partial_breaking"
152
149
  else:
153
150
  if _has_star(new_column):
154
151
  for source_name, (_, source) in new_scope.selected_sources.items():
155
152
  change = scope_changes_map.get(source)
156
153
  if change is not None:
157
- if change.category == 'breaking':
158
- change_category = 'breaking'
154
+ if change.category == "breaking":
155
+ change_category = "breaking"
159
156
  for sub_column_name in change.columns.keys():
160
157
  column_change_status = change.columns[sub_column_name]
161
158
  changed_columns[sub_column_name] = column_change_status
162
- if change_category != 'breaking' and column_change_status in ['removed', 'modified']:
163
- change_category = 'partial_breaking'
159
+ if change_category != "breaking" and column_change_status in ["removed", "modified"]:
160
+ change_category = "partial_breaking"
164
161
  continue
165
162
 
166
163
  ref_columns = new_column.find_all(exp.Column)
167
164
  for ref_column in ref_columns:
168
165
  if source_column_change_status(ref_column) is not None:
169
166
  if is_distinct:
170
- change_category = 'breaking'
167
+ change_category = "breaking"
171
168
  elif _has_udtf(new_column):
172
- change_category = 'breaking'
169
+ change_category = "breaking"
173
170
 
174
- if change_category != 'breaking':
175
- change_category = 'partial_breaking'
176
- changed_columns[column_name] = 'modified'
171
+ if change_category != "breaking":
172
+ change_category = "partial_breaking"
173
+ changed_columns[column_name] = "modified"
177
174
 
178
175
  def selected_column_change_status(ref_column: exp.Column) -> Optional[ChangeStatus]:
179
176
  column_name = ref_column.name
180
177
  return changed_columns.get(column_name)
181
178
 
182
179
  # joins clause: Reference the source columns
183
- if new_select.args.get('joins'):
184
- joins = new_select.args.get('joins')
180
+ if new_select.args.get("joins"):
181
+ joins = new_select.args.get("joins")
185
182
  for join in joins:
186
183
  if isinstance(join, exp.Join):
187
184
  for ref_column in join.find_all(exp.Column):
188
185
  if source_column_change_status(ref_column) is not None:
189
- change_category = 'breaking'
186
+ change_category = "breaking"
190
187
 
191
188
  # where clauses: Reference the source columns
192
- if new_select.args.get('where'):
193
- where = new_select.args.get('where')
189
+ if new_select.args.get("where"):
190
+ where = new_select.args.get("where")
194
191
  if isinstance(where, exp.Where):
195
192
  for ref_column in where.find_all(exp.Column):
196
193
  if source_column_change_status(ref_column) is not None:
197
- change_category = 'breaking'
194
+ change_category = "breaking"
198
195
 
199
196
  # group by clause: Reference the source columns, column index
200
- if new_select.args.get('group'):
201
- group = new_select.args.get('group')
197
+ if new_select.args.get("group"):
198
+ group = new_select.args.get("group")
202
199
  if isinstance(group, exp.Group):
203
200
  for ref_column in group.find_all(exp.Column):
204
201
  if source_column_change_status(ref_column) is not None:
205
- change_category = 'breaking'
202
+ change_category = "breaking"
206
203
 
207
204
  # having clause: Reference the source columns, selected columns
208
- if new_select.args.get('having'):
209
- having = new_select.args.get('having')
205
+ if new_select.args.get("having"):
206
+ having = new_select.args.get("having")
210
207
  if isinstance(having, exp.Having):
211
208
  for ref_column in having.find_all(exp.Column):
212
209
  if source_column_change_status(ref_column) is not None:
213
- change_category = 'breaking'
210
+ change_category = "breaking"
214
211
  elif selected_column_change_status(ref_column) is not None:
215
- change_category = 'breaking'
212
+ change_category = "breaking"
216
213
 
217
214
  # order by clause: Reference the source columns, selected columns, column index
218
- if new_select.args.get('order'):
219
- order = new_select.args.get('order')
215
+ if new_select.args.get("order"):
216
+ order = new_select.args.get("order")
220
217
  if isinstance(order, exp.Order):
221
218
  for ref_column in order.find_all(exp.Column):
222
219
  if source_column_change_status(ref_column) is not None:
223
- change_category = 'breaking'
220
+ change_category = "breaking"
224
221
  elif selected_column_change_status(ref_column) is not None:
225
- change_category = 'breaking'
222
+ change_category = "breaking"
226
223
 
227
224
  return NodeChange(category=change_category, columns=changed_columns)
228
225
 
229
226
 
230
- def _diff_union_scope(
231
- old_scope: Scope,
232
- new_scope: Scope,
233
- scope_changes_map: dict[Scope, NodeChange]
234
- ) -> NodeChange:
235
- assert old_scope.expression.key == 'union'
236
- assert new_scope.expression.key == 'union'
227
+ def _diff_union_scope(old_scope: Scope, new_scope: Scope, scope_changes_map: dict[Scope, NodeChange]) -> NodeChange:
228
+ assert old_scope.expression.key == "union"
229
+ assert new_scope.expression.key == "union"
237
230
  assert len(old_scope.union_scopes) == len(new_scope.union_scopes)
238
231
  assert new_scope.union_scopes is not None
239
232
  assert len(new_scope.union_scopes) > 0
@@ -244,11 +237,11 @@ def _diff_union_scope(
244
237
 
245
238
  for sub_scope in new_scope.union_scopes[1:]:
246
239
  result_right = scope_changes_map.get(sub_scope)
247
- if change_category == 'partial_breaking':
248
- if result_right.category in ['breaking']:
240
+ if change_category == "partial_breaking":
241
+ if result_right.category in ["breaking"]:
249
242
  change_category = result_right.category
250
- elif change_category == 'non_breaking':
251
- if result_right.category in ['breaking', 'partial_breaking']:
243
+ elif change_category == "non_breaking":
244
+ if result_right.category in ["breaking", "partial_breaking"]:
252
245
  change_category = result_right.category
253
246
  for column_name, column_change_status in result_right.columns.items():
254
247
  changed_columns[column_name] = column_change_status
@@ -265,7 +258,7 @@ def parse_change_category(
265
258
  perf_tracking: BreakingPerformanceTracking = None,
266
259
  ) -> NodeChange:
267
260
  if old_sql == new_sql:
268
- return NodeChange(category='non_breaking')
261
+ return NodeChange(category="non_breaking")
269
262
 
270
263
  try:
271
264
  dialect = Dialect.get(dialect)
@@ -294,31 +287,31 @@ def parse_change_category(
294
287
  old_scopes = traverse_scope(old_exp)
295
288
  new_scopes = traverse_scope(new_exp)
296
289
  if len(old_scopes) != len(new_scopes):
297
- return NodeChange(category='breaking', columns={})
290
+ return NodeChange(category="breaking", columns={})
298
291
 
299
292
  scope_changes_map = {}
300
293
  for old_scope, new_scope in zip(old_scopes, new_scopes):
301
294
  if old_scope.expression.key != new_scope.expression.key:
302
- scope_changes_map[new_scope] = NodeChange(category='breaking')
295
+ scope_changes_map[new_scope] = NodeChange(category="breaking")
303
296
  continue
304
297
  if old_scope == new_scope:
305
- scope_changes_map[new_scope] = NodeChange(category='non_breaking')
298
+ scope_changes_map[new_scope] = NodeChange(category="non_breaking")
306
299
  continue
307
300
 
308
301
  scope_type = old_scope.expression.key
309
- if scope_type == 'select':
302
+ if scope_type == "select":
310
303
  # CTE, Subquery, Root
311
304
  result = _diff_select_scope(old_scope, new_scope, scope_changes_map)
312
- elif scope_type == 'union':
305
+ elif scope_type == "union":
313
306
  # Union
314
307
  result = _diff_union_scope(old_scope, new_scope, scope_changes_map)
315
308
  else:
316
309
  if old_scope.expression != new_scope.expression:
317
- result = NodeChange(category='breaking', columns={})
310
+ result = NodeChange(category="breaking", columns={})
318
311
  else:
319
- result = NodeChange(category='non_breaking', columns={})
312
+ result = NodeChange(category="non_breaking", columns={})
320
313
 
321
- if result.category == 'unknown':
314
+ if result.category == "unknown":
322
315
  return result
323
316
 
324
317
  scope_changes_map[new_scope] = result
recce/util/cll.py CHANGED
@@ -2,10 +2,21 @@ import time
2
2
  from dataclasses import dataclass
3
3
  from typing import Dict, List, Literal
4
4
 
5
- from sqlglot import parse_one, Dialect
6
- from sqlglot.errors import SqlglotError, OptimizeError
7
- from sqlglot.expressions import Column, Alias, Func, Binary, Paren, Case, Expression, If, Union, Intersect
8
- from sqlglot.optimizer import traverse_scope, Scope
5
+ from sqlglot import Dialect, parse_one
6
+ from sqlglot.errors import OptimizeError, SqlglotError
7
+ from sqlglot.expressions import (
8
+ Alias,
9
+ Binary,
10
+ Case,
11
+ Column,
12
+ Expression,
13
+ Func,
14
+ If,
15
+ Intersect,
16
+ Paren,
17
+ Union,
18
+ )
19
+ from sqlglot.optimizer import Scope, traverse_scope
9
20
  from sqlglot.optimizer.qualify import qualify
10
21
 
11
22
  from recce.exceptions import RecceException
@@ -50,11 +61,11 @@ class CLLPerformanceTracking(metaclass=SingletonMeta):
50
61
 
51
62
  def to_dict(self):
52
63
  return {
53
- 'lineage_elapsed_ms': self.lineage_elapsed,
54
- 'column_lineage_elapsed_ms': self.column_lineage_elapsed,
55
- 'total_nodes': self.total_nodes,
56
- 'sqlglot_error_nodes': self.sqlglot_error_nodes,
57
- 'other_error_nodes': self.other_error_nodes
64
+ "lineage_elapsed_ms": self.lineage_elapsed,
65
+ "column_lineage_elapsed_ms": self.column_lineage_elapsed,
66
+ "total_nodes": self.total_nodes,
67
+ "sqlglot_error_nodes": self.sqlglot_error_nodes,
68
+ "other_error_nodes": self.other_error_nodes,
58
69
  }
59
70
 
60
71
  def reset(self):
@@ -76,7 +87,7 @@ class ColumnLevelDependsOn:
76
87
 
77
88
  @dataclass
78
89
  class ColumnLevelDependencyColumn:
79
- type: Literal['source', 'passthrough', 'renamed', 'derived']
90
+ type: Literal["source", "passthrough", "renamed", "derived"]
80
91
  depends_on: List[ColumnLevelDependsOn]
81
92
 
82
93
 
@@ -93,10 +104,7 @@ def _cll_expression(expression, table_alias_map) -> ColumnLevelDependencyColumn:
93
104
  else:
94
105
  table = table_alias_map.get(alias, alias)
95
106
 
96
- return ColumnLevelDependencyColumn(
97
- type='passthrough',
98
- depends_on=[ColumnLevelDependsOn(table, column.name)]
99
- )
107
+ return ColumnLevelDependencyColumn(type="passthrough", depends_on=[ColumnLevelDependsOn(table, column.name)])
100
108
  elif isinstance(expression, Paren):
101
109
  return _cll_expression(expression.this, table_alias_map)
102
110
  elif isinstance(expression, Binary):
@@ -107,31 +115,31 @@ def _cll_expression(expression, table_alias_map) -> ColumnLevelDependencyColumn:
107
115
  if expression.right:
108
116
  depends_on_right = _cll_expression(expression.right, table_alias_map).depends_on
109
117
  depends_on.extend(depends_on_right)
110
- type = 'derived' if depends_on else 'source'
118
+ type = "derived" if depends_on else "source"
111
119
  return ColumnLevelDependencyColumn(type=type, depends_on=depends_on)
112
120
  elif isinstance(expression, Case):
113
- ifs = expression.args['ifs']
114
- default = expression.args['default']
121
+ ifs = expression.args["ifs"]
122
+ default = expression.args["default"]
115
123
  depends_on = []
116
124
  for expr in ifs:
117
125
  depends_on_one = _cll_expression(expr, table_alias_map).depends_on
118
126
  depends_on.extend(depends_on_one)
119
127
  if default is not None:
120
128
  depends_on.extend(_cll_expression(default, table_alias_map).depends_on)
121
- type = 'derived' if depends_on else 'source'
129
+ type = "derived" if depends_on else "source"
122
130
  return ColumnLevelDependencyColumn(type=type, depends_on=depends_on)
123
131
  elif isinstance(expression, If):
124
132
  depends_on = []
125
133
  if expression.this:
126
134
  depends_on_one = _cll_expression(expression.this, table_alias_map).depends_on
127
135
  depends_on.extend(depends_on_one)
128
- if expression.args.get('true'):
129
- depends_on_one = _cll_expression(expression.args.get('true'), table_alias_map).depends_on
136
+ if expression.args.get("true"):
137
+ depends_on_one = _cll_expression(expression.args.get("true"), table_alias_map).depends_on
130
138
  depends_on.extend(depends_on_one)
131
- if expression.args.get('false'):
132
- depends_on_one = _cll_expression(expression.args.get('false'), table_alias_map).depends_on
139
+ if expression.args.get("false"):
140
+ depends_on_one = _cll_expression(expression.args.get("false"), table_alias_map).depends_on
133
141
  depends_on.extend(depends_on_one)
134
- type = 'derived' if depends_on else 'source'
142
+ type = "derived" if depends_on else "source"
135
143
  return ColumnLevelDependencyColumn(type=type, depends_on=depends_on)
136
144
  elif isinstance(expression, Func):
137
145
  if expression.expressions:
@@ -139,28 +147,28 @@ def _cll_expression(expression, table_alias_map) -> ColumnLevelDependencyColumn:
139
147
  for expr in expression.expressions:
140
148
  depends_on_one = _cll_expression(expr, table_alias_map).depends_on
141
149
  depends_on.extend(depends_on_one)
142
- type = 'derived' if depends_on else 'source'
150
+ type = "derived" if depends_on else "source"
143
151
  return ColumnLevelDependencyColumn(type=type, depends_on=depends_on)
144
152
  if expression.this:
145
153
  depends_on = _cll_expression(expression.this, table_alias_map).depends_on
146
- type = 'derived' if depends_on else 'source'
154
+ type = "derived" if depends_on else "source"
147
155
  return ColumnLevelDependencyColumn(type=type, depends_on=depends_on)
148
156
 
149
- return ColumnLevelDependencyColumn(type='source', depends_on=[])
157
+ return ColumnLevelDependencyColumn(type="source", depends_on=[])
150
158
  elif expression.this and isinstance(expression.this, Expression):
151
159
  depends_on = _cll_expression(expression.this, table_alias_map).depends_on
152
- type = 'derived' if depends_on else 'source'
160
+ type = "derived" if depends_on else "source"
153
161
  return ColumnLevelDependencyColumn(type=type, depends_on=depends_on)
154
162
  elif expression.expressions:
155
163
  depends_on = []
156
164
  for expr in expression.expressions:
157
165
  depends_on_one = _cll_expression(expr, table_alias_map).depends_on
158
166
  depends_on.extend(depends_on_one)
159
- type = 'derived' if depends_on else 'source'
167
+ type = "derived" if depends_on else "source"
160
168
  return ColumnLevelDependencyColumn(type=type, depends_on=depends_on)
161
169
  else:
162
170
  depends_on = []
163
- return ColumnLevelDependencyColumn(type='source', depends_on=depends_on)
171
+ return ColumnLevelDependencyColumn(type="source", depends_on=depends_on)
164
172
 
165
173
 
166
174
  def cll(sql, schema=None, dialect=None) -> Dict[str, ColumnLevelDependencyColumn]:
@@ -177,24 +185,21 @@ def cll(sql, schema=None, dialect=None) -> Dict[str, ColumnLevelDependencyColumn
177
185
  try:
178
186
  expression = parse_one(sql, dialect=dialect)
179
187
  except SqlglotError as e:
180
- raise RecceException(f'Failed to parse SQL: {str(e)}')
188
+ raise RecceException(f"Failed to parse SQL: {str(e)}")
181
189
 
182
190
  try:
183
191
  expression = qualify(expression, schema=schema, dialect=dialect)
184
192
  except OptimizeError as e:
185
- raise RecceException(f'Failed to optimize SQL: {str(e)}')
193
+ raise RecceException(f"Failed to optimize SQL: {str(e)}")
186
194
  except SqlglotError as e:
187
- raise RecceException(f'Failed to qualify SQL: {str(e)}')
195
+ raise RecceException(f"Failed to qualify SQL: {str(e)}")
188
196
 
189
197
  result = {}
190
198
  global_lineage = {}
191
199
  for scope in traverse_scope(expression):
192
200
  scope_lineage = {}
193
201
 
194
- table_alias_map = {
195
- t.alias_or_name: t.name
196
- for t in scope.tables
197
- }
202
+ table_alias_map = {t.alias_or_name: t.name for t in scope.tables}
198
203
 
199
204
  if isinstance(scope.expression, Union) or isinstance(scope.expression, Intersect):
200
205
  for union_scope in scope.union_scopes:
@@ -203,7 +208,7 @@ def cll(sql, schema=None, dialect=None) -> Dict[str, ColumnLevelDependencyColumn
203
208
  scope_lineage[k] = v
204
209
  else:
205
210
  scope_lineage[k].depends_on.extend(v.depends_on)
206
- scope_lineage[k].type = 'derived'
211
+ scope_lineage[k].type = "derived"
207
212
  else:
208
213
  for select in scope.expression.selects:
209
214
  # instance of Column
@@ -218,14 +223,14 @@ def cll(sql, schema=None, dialect=None) -> Dict[str, ColumnLevelDependencyColumn
218
223
  col_expression = alias.this
219
224
  column_cll = _cll_expression(col_expression, table_alias_map)
220
225
  if (
221
- column_cll and
222
- column_cll.type == 'passthrough' and
223
- column_cll.depends_on[0].column != alias.alias_or_name
226
+ column_cll
227
+ and column_cll.type == "passthrough"
228
+ and column_cll.depends_on[0].column != alias.alias_or_name
224
229
  ):
225
- column_cll.type = 'renamed'
230
+ column_cll.type = "renamed"
226
231
  else:
227
232
  # 'select 1'
228
- column_cll = ColumnLevelDependencyColumn(type='source', depends_on=[])
233
+ column_cll = ColumnLevelDependencyColumn(type="source", depends_on=[])
229
234
 
230
235
  cte_type = None
231
236
  flatten_col_depends_on = []
@@ -258,24 +263,24 @@ def cll(sql, schema=None, dialect=None) -> Dict[str, ColumnLevelDependencyColumn
258
263
  dedup_col_depends_on = []
259
264
  dedup_set = set()
260
265
  for col_dep in flatten_col_depends_on:
261
- node_col = col_dep.node + '.' + col_dep.column
266
+ node_col = col_dep.node + "." + col_dep.column
262
267
  if node_col not in dedup_set:
263
268
  dedup_col_depends_on.append(col_dep)
264
269
  dedup_set.add(node_col)
265
270
 
266
271
  # transformation type
267
272
  type = column_cll.type
268
- if type == 'derived':
273
+ if type == "derived":
269
274
  if len(dedup_col_depends_on) == 0:
270
- type = 'source'
275
+ type = "source"
271
276
  else:
272
277
  # keep current scope type
273
278
  pass
274
279
  elif cte_type is not None:
275
280
  if len(dedup_col_depends_on) > 1:
276
- type = 'derived'
281
+ type = "derived"
277
282
  elif len(dedup_col_depends_on) == 0:
278
- type = 'source'
283
+ type = "source"
279
284
  else:
280
285
  if isinstance(select, Column):
281
286
  type = cte_type
@@ -284,13 +289,12 @@ def cll(sql, schema=None, dialect=None) -> Dict[str, ColumnLevelDependencyColumn
284
289
  if column_cll.depends_on[0].column == alias.alias_or_name:
285
290
  type = cte_type
286
291
  else:
287
- type = 'renamed' if cte_type == 'passthrough' else cte_type
292
+ type = "renamed" if cte_type == "passthrough" else cte_type
288
293
  else:
289
- type = 'source'
294
+ type = "source"
290
295
 
291
296
  scope_lineage[select.alias_or_name] = ColumnLevelDependencyColumn(
292
- type=type,
293
- depends_on=dedup_col_depends_on
297
+ type=type, depends_on=dedup_col_depends_on
294
298
  )
295
299
 
296
300
  global_lineage[scope] = scope_lineage