@altimateai/altimate-code 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/bin/altimate +6 -0
  3. package/bin/altimate-code +6 -0
  4. package/dbt-tools/bin/altimate-dbt +2 -0
  5. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
  6. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
  7. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
  8. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
  9. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
  10. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
  11. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
  12. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
  13. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
  14. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
  15. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
  16. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
  17. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
  18. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
  19. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
  20. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
  21. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
  22. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
  23. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
  24. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
  25. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
  26. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
  27. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
  28. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
  29. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
  30. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
  31. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
  32. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
  33. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
  34. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
  35. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
  36. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
  37. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
  38. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
  39. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
  40. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
  41. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
  42. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
  43. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
  44. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
  45. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
  46. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
  47. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
  48. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
  49. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
  50. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
  51. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
  52. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
  53. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
  54. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
  55. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
  56. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
  57. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
  58. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
  59. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
  60. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
  61. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
  62. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
  63. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
  64. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
  65. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
  66. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
  67. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
  68. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
  69. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
  70. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
  71. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
  72. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
  73. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
  74. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
  75. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
  76. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
  77. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
  78. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
  79. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
  80. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
  81. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
  82. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
  83. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
  84. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
  85. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
  86. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
  87. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
  88. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
  89. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
  90. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
  91. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
  92. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
  93. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
  94. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
  95. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
  96. package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
  97. package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
  98. package/dbt-tools/dist/index.js +23859 -0
  99. package/package.json +13 -13
  100. package/postinstall.mjs +42 -0
  101. package/skills/altimate-setup/SKILL.md +31 -0
@@ -0,0 +1,904 @@
1
+ from __future__ import annotations
2
+
3
+ import itertools
4
+ import logging
5
+ import typing as t
6
+ from collections import defaultdict
7
+ from enum import Enum, auto
8
+
9
+ from sqlglot import exp
10
+ from sqlglot.errors import OptimizeError
11
+ from sqlglot.helper import ensure_collection, find_new_name, seq_get
12
+
13
+ logger = logging.getLogger("sqlglot")
14
+
15
+ TRAVERSABLES = (exp.Query, exp.DDL, exp.DML)
16
+
17
+
18
+ class ScopeType(Enum):
19
+ ROOT = auto()
20
+ SUBQUERY = auto()
21
+ DERIVED_TABLE = auto()
22
+ CTE = auto()
23
+ UNION = auto()
24
+ UDTF = auto()
25
+
26
+
27
+ class Scope:
28
+ """
29
+ Selection scope.
30
+
31
+ Attributes:
32
+ expression (exp.Select|exp.SetOperation): Root expression of this scope
33
+ sources (dict[str, exp.Table|Scope]): Mapping of source name to either
34
+ a Table expression or another Scope instance. For example:
35
+ SELECT * FROM x {"x": Table(this="x")}
36
+ SELECT * FROM x AS y {"y": Table(this="x")}
37
+ SELECT * FROM (SELECT ...) AS y {"y": Scope(...)}
38
+ lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals
39
+ For example:
40
+ SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c;
41
+ The LATERAL VIEW EXPLODE gets x as a source.
42
+ cte_sources (dict[str, Scope]): Sources from CTES
43
+ outer_columns (list[str]): If this is a derived table or CTE, and the outer query
44
+ defines a column list for the alias of this scope, this is that list of columns.
45
+ For example:
46
+ SELECT * FROM (SELECT ...) AS y(col1, col2)
47
+ The inner query would have `["col1", "col2"]` for its `outer_columns`
48
+ parent (Scope): Parent scope
49
+ scope_type (ScopeType): Type of this scope, relative to it's parent
50
+ subquery_scopes (list[Scope]): List of all child scopes for subqueries
51
+ cte_scopes (list[Scope]): List of all child scopes for CTEs
52
+ derived_table_scopes (list[Scope]): List of all child scopes for derived_tables
53
+ udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions
54
+ table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined
55
+ union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be
56
+ a list of the left and right child scopes.
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ expression,
62
+ sources=None,
63
+ outer_columns=None,
64
+ parent=None,
65
+ scope_type=ScopeType.ROOT,
66
+ lateral_sources=None,
67
+ cte_sources=None,
68
+ can_be_correlated=None,
69
+ ):
70
+ self.expression = expression
71
+ self.sources = sources or {}
72
+ self.lateral_sources = lateral_sources or {}
73
+ self.cte_sources = cte_sources or {}
74
+ self.sources.update(self.lateral_sources)
75
+ self.sources.update(self.cte_sources)
76
+ self.outer_columns = outer_columns or []
77
+ self.parent = parent
78
+ self.scope_type = scope_type
79
+ self.subquery_scopes = []
80
+ self.derived_table_scopes = []
81
+ self.table_scopes = []
82
+ self.cte_scopes = []
83
+ self.union_scopes = []
84
+ self.udtf_scopes = []
85
+ self.can_be_correlated = can_be_correlated
86
+ self.clear_cache()
87
+
88
+ def clear_cache(self):
89
+ self._collected = False
90
+ self._raw_columns = None
91
+ self._stars = None
92
+ self._derived_tables = None
93
+ self._udtfs = None
94
+ self._tables = None
95
+ self._ctes = None
96
+ self._subqueries = None
97
+ self._selected_sources = None
98
+ self._columns = None
99
+ self._external_columns = None
100
+ self._join_hints = None
101
+ self._pivots = None
102
+ self._references = None
103
+ self._semi_anti_join_tables = None
104
+
105
+ def branch(
106
+ self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs
107
+ ):
108
+ """Branch from the current scope to a new, inner scope"""
109
+ return Scope(
110
+ expression=expression.unnest(),
111
+ sources=sources.copy() if sources else None,
112
+ parent=self,
113
+ scope_type=scope_type,
114
+ cte_sources={**self.cte_sources, **(cte_sources or {})},
115
+ lateral_sources=lateral_sources.copy() if lateral_sources else None,
116
+ can_be_correlated=self.can_be_correlated
117
+ or scope_type in (ScopeType.SUBQUERY, ScopeType.UDTF),
118
+ **kwargs,
119
+ )
120
+
121
+ def _collect(self):
122
+ self._tables = []
123
+ self._ctes = []
124
+ self._subqueries = []
125
+ self._derived_tables = []
126
+ self._udtfs = []
127
+ self._raw_columns = []
128
+ self._stars = []
129
+ self._join_hints = []
130
+ self._semi_anti_join_tables = set()
131
+
132
+ for node in self.walk(bfs=False):
133
+ if node is self.expression:
134
+ continue
135
+
136
+ if isinstance(node, exp.Dot) and node.is_star:
137
+ self._stars.append(node)
138
+ elif isinstance(node, exp.Column):
139
+ if isinstance(node.this, exp.Star):
140
+ self._stars.append(node)
141
+ else:
142
+ self._raw_columns.append(node)
143
+ elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint):
144
+ parent = node.parent
145
+ if isinstance(parent, exp.Join) and parent.is_semi_or_anti_join:
146
+ self._semi_anti_join_tables.add(node.alias_or_name)
147
+
148
+ self._tables.append(node)
149
+ elif isinstance(node, exp.JoinHint):
150
+ self._join_hints.append(node)
151
+ elif isinstance(node, exp.UDTF):
152
+ self._udtfs.append(node)
153
+ elif isinstance(node, exp.CTE):
154
+ self._ctes.append(node)
155
+ elif _is_derived_table(node) and _is_from_or_join(node):
156
+ self._derived_tables.append(node)
157
+ elif isinstance(node, exp.UNWRAPPED_QUERIES):
158
+ self._subqueries.append(node)
159
+
160
+ self._collected = True
161
+
162
+ def _ensure_collected(self):
163
+ if not self._collected:
164
+ self._collect()
165
+
166
+ def walk(self, bfs=True, prune=None):
167
+ return walk_in_scope(self.expression, bfs=bfs, prune=None)
168
+
169
+ def find(self, *expression_types, bfs=True):
170
+ return find_in_scope(self.expression, expression_types, bfs=bfs)
171
+
172
+ def find_all(self, *expression_types, bfs=True):
173
+ return find_all_in_scope(self.expression, expression_types, bfs=bfs)
174
+
175
+ def replace(self, old, new):
176
+ """
177
+ Replace `old` with `new`.
178
+
179
+ This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date.
180
+
181
+ Args:
182
+ old (exp.Expression): old node
183
+ new (exp.Expression): new node
184
+ """
185
+ old.replace(new)
186
+ self.clear_cache()
187
+
188
+ @property
189
+ def tables(self):
190
+ """
191
+ List of tables in this scope.
192
+
193
+ Returns:
194
+ list[exp.Table]: tables
195
+ """
196
+ self._ensure_collected()
197
+ return self._tables
198
+
199
+ @property
200
+ def ctes(self):
201
+ """
202
+ List of CTEs in this scope.
203
+
204
+ Returns:
205
+ list[exp.CTE]: ctes
206
+ """
207
+ self._ensure_collected()
208
+ return self._ctes
209
+
210
+ @property
211
+ def derived_tables(self):
212
+ """
213
+ List of derived tables in this scope.
214
+
215
+ For example:
216
+ SELECT * FROM (SELECT ...) <- that's a derived table
217
+
218
+ Returns:
219
+ list[exp.Subquery]: derived tables
220
+ """
221
+ self._ensure_collected()
222
+ return self._derived_tables
223
+
224
+ @property
225
+ def udtfs(self):
226
+ """
227
+ List of "User Defined Tabular Functions" in this scope.
228
+
229
+ Returns:
230
+ list[exp.UDTF]: UDTFs
231
+ """
232
+ self._ensure_collected()
233
+ return self._udtfs
234
+
235
+ @property
236
+ def subqueries(self):
237
+ """
238
+ List of subqueries in this scope.
239
+
240
+ For example:
241
+ SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
242
+
243
+ Returns:
244
+ list[exp.Select | exp.SetOperation]: subqueries
245
+ """
246
+ self._ensure_collected()
247
+ return self._subqueries
248
+
249
+ @property
250
+ def stars(self) -> t.List[exp.Column | exp.Dot]:
251
+ """
252
+ List of star expressions (columns or dots) in this scope.
253
+ """
254
+ self._ensure_collected()
255
+ return self._stars
256
+
257
+ @property
258
+ def columns(self):
259
+ """
260
+ List of columns in this scope.
261
+
262
+ Returns:
263
+ list[exp.Column]: Column instances in this scope, plus any
264
+ Columns that reference this scope from correlated subqueries.
265
+ """
266
+ if self._columns is None:
267
+ self._ensure_collected()
268
+ columns = self._raw_columns
269
+
270
+ external_columns = [
271
+ column
272
+ for scope in itertools.chain(
273
+ self.subquery_scopes,
274
+ self.udtf_scopes,
275
+ (dts for dts in self.derived_table_scopes if dts.can_be_correlated),
276
+ )
277
+ for column in scope.external_columns
278
+ ]
279
+
280
+ named_selects = set(self.expression.named_selects)
281
+
282
+ self._columns = []
283
+ for column in columns + external_columns:
284
+ ancestor = column.find_ancestor(
285
+ exp.Select,
286
+ exp.Qualify,
287
+ exp.Order,
288
+ exp.Having,
289
+ exp.Hint,
290
+ exp.Table,
291
+ exp.Star,
292
+ exp.Distinct,
293
+ )
294
+ if (
295
+ not ancestor
296
+ or column.table
297
+ or isinstance(ancestor, exp.Select)
298
+ or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func))
299
+ or (
300
+ isinstance(ancestor, (exp.Order, exp.Distinct))
301
+ and (
302
+ isinstance(ancestor.parent, (exp.Window, exp.WithinGroup))
303
+ or column.name not in named_selects
304
+ )
305
+ )
306
+ or (isinstance(ancestor, exp.Star) and not column.arg_key == "except")
307
+ ):
308
+ self._columns.append(column)
309
+
310
+ return self._columns
311
+
312
+ @property
313
+ def selected_sources(self):
314
+ """
315
+ Mapping of nodes and sources that are actually selected from in this scope.
316
+
317
+ That is, all tables in a schema are selectable at any point. But a
318
+ table only becomes a selected source if it's included in a FROM or JOIN clause.
319
+
320
+ Returns:
321
+ dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
322
+ """
323
+ if self._selected_sources is None:
324
+ result = {}
325
+
326
+ for name, node in self.references:
327
+ if name in self._semi_anti_join_tables:
328
+ # The RHS table of SEMI/ANTI joins shouldn't be collected as a
329
+ # selected source
330
+ continue
331
+
332
+ if name in result:
333
+ raise OptimizeError(f"Alias already used: {name}")
334
+ if name in self.sources:
335
+ result[name] = (node, self.sources[name])
336
+
337
+ self._selected_sources = result
338
+ return self._selected_sources
339
+
340
+ @property
341
+ def references(self) -> t.List[t.Tuple[str, exp.Expression]]:
342
+ if self._references is None:
343
+ self._references = []
344
+
345
+ for table in self.tables:
346
+ self._references.append((table.alias_or_name, table))
347
+ for expression in itertools.chain(self.derived_tables, self.udtfs):
348
+ self._references.append(
349
+ (
350
+ expression.alias,
351
+ expression if expression.args.get("pivots") else expression.unnest(),
352
+ )
353
+ )
354
+
355
+ return self._references
356
+
357
+ @property
358
+ def external_columns(self):
359
+ """
360
+ Columns that appear to reference sources in outer scopes.
361
+
362
+ Returns:
363
+ list[exp.Column]: Column instances that don't reference
364
+ sources in the current scope.
365
+ """
366
+ if self._external_columns is None:
367
+ if isinstance(self.expression, exp.SetOperation):
368
+ left, right = self.union_scopes
369
+ self._external_columns = left.external_columns + right.external_columns
370
+ else:
371
+ self._external_columns = [
372
+ c
373
+ for c in self.columns
374
+ if c.table not in self.selected_sources
375
+ and c.table not in self.semi_or_anti_join_tables
376
+ ]
377
+
378
+ return self._external_columns
379
+
380
+ @property
381
+ def unqualified_columns(self):
382
+ """
383
+ Unqualified columns in the current scope.
384
+
385
+ Returns:
386
+ list[exp.Column]: Unqualified columns
387
+ """
388
+ return [c for c in self.columns if not c.table]
389
+
390
+ @property
391
+ def join_hints(self):
392
+ """
393
+ Hints that exist in the scope that reference tables
394
+
395
+ Returns:
396
+ list[exp.JoinHint]: Join hints that are referenced within the scope
397
+ """
398
+ if self._join_hints is None:
399
+ return []
400
+ return self._join_hints
401
+
402
+ @property
403
+ def pivots(self):
404
+ if not self._pivots:
405
+ self._pivots = [
406
+ pivot for _, node in self.references for pivot in node.args.get("pivots") or []
407
+ ]
408
+
409
+ return self._pivots
410
+
411
+ @property
412
+ def semi_or_anti_join_tables(self):
413
+ return self._semi_anti_join_tables or set()
414
+
415
+ def source_columns(self, source_name):
416
+ """
417
+ Get all columns in the current scope for a particular source.
418
+
419
+ Args:
420
+ source_name (str): Name of the source
421
+ Returns:
422
+ list[exp.Column]: Column instances that reference `source_name`
423
+ """
424
+ return [column for column in self.columns if column.table == source_name]
425
+
426
+ @property
427
+ def is_subquery(self):
428
+ """Determine if this scope is a subquery"""
429
+ return self.scope_type == ScopeType.SUBQUERY
430
+
431
+ @property
432
+ def is_derived_table(self):
433
+ """Determine if this scope is a derived table"""
434
+ return self.scope_type == ScopeType.DERIVED_TABLE
435
+
436
+ @property
437
+ def is_union(self):
438
+ """Determine if this scope is a union"""
439
+ return self.scope_type == ScopeType.UNION
440
+
441
+ @property
442
+ def is_cte(self):
443
+ """Determine if this scope is a common table expression"""
444
+ return self.scope_type == ScopeType.CTE
445
+
446
+ @property
447
+ def is_root(self):
448
+ """Determine if this is the root scope"""
449
+ return self.scope_type == ScopeType.ROOT
450
+
451
+ @property
452
+ def is_udtf(self):
453
+ """Determine if this scope is a UDTF (User Defined Table Function)"""
454
+ return self.scope_type == ScopeType.UDTF
455
+
456
+ @property
457
+ def is_correlated_subquery(self):
458
+ """Determine if this scope is a correlated subquery"""
459
+ return bool(self.can_be_correlated and self.external_columns)
460
+
461
+ def rename_source(self, old_name, new_name):
462
+ """Rename a source in this scope"""
463
+ columns = self.sources.pop(old_name or "", [])
464
+ self.sources[new_name] = columns
465
+
466
+ def add_source(self, name, source):
467
+ """Add a source to this scope"""
468
+ self.sources[name] = source
469
+ self.clear_cache()
470
+
471
+ def remove_source(self, name):
472
+ """Remove a source from this scope"""
473
+ self.sources.pop(name, None)
474
+ self.clear_cache()
475
+
476
+ def __repr__(self):
477
+ return f"Scope<{self.expression.sql()}>"
478
+
479
+ def traverse(self):
480
+ """
481
+ Traverse the scope tree from this node.
482
+
483
+ Yields:
484
+ Scope: scope instances in depth-first-search post-order
485
+ """
486
+ stack = [self]
487
+ result = []
488
+ while stack:
489
+ scope = stack.pop()
490
+ result.append(scope)
491
+ stack.extend(
492
+ itertools.chain(
493
+ scope.cte_scopes,
494
+ scope.union_scopes,
495
+ scope.table_scopes,
496
+ scope.subquery_scopes,
497
+ )
498
+ )
499
+
500
+ yield from reversed(result)
501
+
502
+ def ref_count(self):
503
+ """
504
+ Count the number of times each scope in this tree is referenced.
505
+
506
+ Returns:
507
+ dict[int, int]: Mapping of Scope instance ID to reference count
508
+ """
509
+ scope_ref_count = defaultdict(lambda: 0)
510
+
511
+ for scope in self.traverse():
512
+ for _, source in scope.selected_sources.values():
513
+ scope_ref_count[id(source)] += 1
514
+
515
+ return scope_ref_count
516
+
517
+
518
+ def traverse_scope(expression: exp.Expression) -> t.List[Scope]:
519
+ """
520
+ Traverse an expression by its "scopes".
521
+
522
+ "Scope" represents the current context of a Select statement.
523
+
524
+ This is helpful for optimizing queries, where we need more information than
525
+ the expression tree itself. For example, we might care about the source
526
+ names within a subquery. Returns a list because a generator could result in
527
+ incomplete properties which is confusing.
528
+
529
+ Examples:
530
+ >>> import sqlglot
531
+ >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y")
532
+ >>> scopes = traverse_scope(expression)
533
+ >>> scopes[0].expression.sql(), list(scopes[0].sources)
534
+ ('SELECT a FROM x', ['x'])
535
+ >>> scopes[1].expression.sql(), list(scopes[1].sources)
536
+ ('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
537
+
538
+ Args:
539
+ expression: Expression to traverse
540
+
541
+ Returns:
542
+ A list of the created scope instances
543
+ """
544
+ if isinstance(expression, TRAVERSABLES):
545
+ return list(_traverse_scope(Scope(expression)))
546
+ return []
547
+
548
+
549
+ def build_scope(expression: exp.Expression) -> t.Optional[Scope]:
550
+ """
551
+ Build a scope tree.
552
+
553
+ Args:
554
+ expression: Expression to build the scope tree for.
555
+
556
+ Returns:
557
+ The root scope
558
+ """
559
+ return seq_get(traverse_scope(expression), -1)
560
+
561
+
562
+ def _traverse_scope(scope):
563
+ expression = scope.expression
564
+
565
+ if isinstance(expression, exp.Select):
566
+ yield from _traverse_select(scope)
567
+ elif isinstance(expression, exp.SetOperation):
568
+ yield from _traverse_ctes(scope)
569
+ yield from _traverse_union(scope)
570
+ return
571
+ elif isinstance(expression, exp.Subquery):
572
+ if scope.is_root:
573
+ yield from _traverse_select(scope)
574
+ else:
575
+ yield from _traverse_subqueries(scope)
576
+ elif isinstance(expression, exp.Table):
577
+ yield from _traverse_tables(scope)
578
+ elif isinstance(expression, exp.UDTF):
579
+ yield from _traverse_udtfs(scope)
580
+ elif isinstance(expression, exp.DDL):
581
+ if isinstance(expression.expression, exp.Query):
582
+ yield from _traverse_ctes(scope)
583
+ yield from _traverse_scope(Scope(expression.expression, cte_sources=scope.cte_sources))
584
+ return
585
+ elif isinstance(expression, exp.DML):
586
+ yield from _traverse_ctes(scope)
587
+ for query in find_all_in_scope(expression, exp.Query):
588
+ # This check ensures we don't yield the CTE/nested queries twice
589
+ if not isinstance(query.parent, (exp.CTE, exp.Subquery)):
590
+ yield from _traverse_scope(Scope(query, cte_sources=scope.cte_sources))
591
+ return
592
+ else:
593
+ logger.warning("Cannot traverse scope %s with type '%s'", expression, type(expression))
594
+ return
595
+
596
+ yield scope
597
+
598
+
599
+ def _traverse_select(scope):
600
+ yield from _traverse_ctes(scope)
601
+ yield from _traverse_tables(scope)
602
+ yield from _traverse_subqueries(scope)
603
+
604
+
605
+ def _traverse_union(scope):
606
+ prev_scope = None
607
+ union_scope_stack = [scope]
608
+ expression_stack = [scope.expression.right, scope.expression.left]
609
+
610
+ while expression_stack:
611
+ expression = expression_stack.pop()
612
+ union_scope = union_scope_stack[-1]
613
+
614
+ new_scope = union_scope.branch(
615
+ expression,
616
+ outer_columns=union_scope.outer_columns,
617
+ scope_type=ScopeType.UNION,
618
+ )
619
+
620
+ if isinstance(expression, exp.SetOperation):
621
+ yield from _traverse_ctes(new_scope)
622
+
623
+ union_scope_stack.append(new_scope)
624
+ expression_stack.extend([expression.right, expression.left])
625
+ continue
626
+
627
+ for scope in _traverse_scope(new_scope):
628
+ yield scope
629
+
630
+ if prev_scope:
631
+ union_scope_stack.pop()
632
+ union_scope.union_scopes = [prev_scope, scope]
633
+ prev_scope = union_scope
634
+
635
+ yield union_scope
636
+ else:
637
+ prev_scope = scope
638
+
639
+
640
+ def _traverse_ctes(scope):
641
+ sources = {}
642
+
643
+ for cte in scope.ctes:
644
+ cte_name = cte.alias
645
+
646
+ # if the scope is a recursive cte, it must be in the form of base_case UNION recursive.
647
+ # thus the recursive scope is the first section of the union.
648
+ with_ = scope.expression.args.get("with")
649
+ if with_ and with_.recursive:
650
+ union = cte.this
651
+
652
+ if isinstance(union, exp.SetOperation):
653
+ sources[cte_name] = scope.branch(union.this, scope_type=ScopeType.CTE)
654
+
655
+ child_scope = None
656
+
657
+ for child_scope in _traverse_scope(
658
+ scope.branch(
659
+ cte.this,
660
+ cte_sources=sources,
661
+ outer_columns=cte.alias_column_names,
662
+ scope_type=ScopeType.CTE,
663
+ )
664
+ ):
665
+ yield child_scope
666
+
667
+ # append the final child_scope yielded
668
+ if child_scope:
669
+ sources[cte_name] = child_scope
670
+ scope.cte_scopes.append(child_scope)
671
+
672
+ scope.sources.update(sources)
673
+ scope.cte_sources.update(sources)
674
+
675
+
676
+ def _is_derived_table(expression: exp.Subquery) -> bool:
677
+ """
678
+ We represent (tbl1 JOIN tbl2) as a Subquery, but it's not really a "derived table",
679
+ as it doesn't introduce a new scope. If an alias is present, it shadows all names
680
+ under the Subquery, so that's one exception to this rule.
681
+ """
682
+ return isinstance(expression, exp.Subquery) and bool(
683
+ expression.alias or isinstance(expression.this, exp.UNWRAPPED_QUERIES)
684
+ )
685
+
686
+
687
+ def _is_from_or_join(expression: exp.Expression) -> bool:
688
+ """
689
+ Determine if `expression` is the FROM or JOIN clause of a SELECT statement.
690
+ """
691
+ parent = expression.parent
692
+
693
+ # Subqueries can be arbitrarily nested
694
+ while isinstance(parent, exp.Subquery):
695
+ parent = parent.parent
696
+
697
+ return isinstance(parent, (exp.From, exp.Join))
698
+
699
+
700
+ def _traverse_tables(scope):
701
+ sources = {}
702
+
703
+ # Traverse FROMs, JOINs, and LATERALs in the order they are defined
704
+ expressions = []
705
+ from_ = scope.expression.args.get("from")
706
+ if from_:
707
+ expressions.append(from_.this)
708
+
709
+ for join in scope.expression.args.get("joins") or []:
710
+ expressions.append(join.this)
711
+
712
+ if isinstance(scope.expression, exp.Table):
713
+ expressions.append(scope.expression)
714
+
715
+ expressions.extend(scope.expression.args.get("laterals") or [])
716
+
717
+ for expression in expressions:
718
+ if isinstance(expression, exp.Final):
719
+ expression = expression.this
720
+ if isinstance(expression, exp.Table):
721
+ table_name = expression.name
722
+ source_name = expression.alias_or_name
723
+
724
+ if table_name in scope.sources and not expression.db:
725
+ # This is a reference to a parent source (e.g. a CTE), not an actual table, unless
726
+ # it is pivoted, because then we get back a new table and hence a new source.
727
+ pivots = expression.args.get("pivots")
728
+ if pivots:
729
+ sources[pivots[0].alias] = expression
730
+ else:
731
+ sources[source_name] = scope.sources[table_name]
732
+ elif source_name in sources:
733
+ sources[find_new_name(sources, table_name)] = expression
734
+ else:
735
+ sources[source_name] = expression
736
+
737
+ # Make sure to not include the joins twice
738
+ if expression is not scope.expression:
739
+ expressions.extend(join.this for join in expression.args.get("joins") or [])
740
+
741
+ continue
742
+
743
+ if not isinstance(expression, exp.DerivedTable):
744
+ continue
745
+
746
+ if isinstance(expression, exp.UDTF):
747
+ lateral_sources = sources
748
+ scope_type = ScopeType.UDTF
749
+ scopes = scope.udtf_scopes
750
+ elif _is_derived_table(expression):
751
+ lateral_sources = None
752
+ scope_type = ScopeType.DERIVED_TABLE
753
+ scopes = scope.derived_table_scopes
754
+ expressions.extend(join.this for join in expression.args.get("joins") or [])
755
+ else:
756
+ # Makes sure we check for possible sources in nested table constructs
757
+ expressions.append(expression.this)
758
+ expressions.extend(join.this for join in expression.args.get("joins") or [])
759
+ continue
760
+
761
+ child_scope = None
762
+
763
+ for child_scope in _traverse_scope(
764
+ scope.branch(
765
+ expression,
766
+ lateral_sources=lateral_sources,
767
+ outer_columns=expression.alias_column_names,
768
+ scope_type=scope_type,
769
+ )
770
+ ):
771
+ yield child_scope
772
+
773
+ # Tables without aliases will be set as ""
774
+ # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything.
775
+ # Until then, this means that only a single, unaliased derived table is allowed (rather,
776
+ # the latest one wins.
777
+ sources[expression.alias] = child_scope
778
+
779
+ # append the final child_scope yielded
780
+ if child_scope:
781
+ scopes.append(child_scope)
782
+ scope.table_scopes.append(child_scope)
783
+
784
+ scope.sources.update(sources)
785
+
786
+
787
+ def _traverse_subqueries(scope):
788
+ for subquery in scope.subqueries:
789
+ top = None
790
+ for child_scope in _traverse_scope(scope.branch(subquery, scope_type=ScopeType.SUBQUERY)):
791
+ yield child_scope
792
+ top = child_scope
793
+ scope.subquery_scopes.append(top)
794
+
795
+
796
+ def _traverse_udtfs(scope):
797
+ if isinstance(scope.expression, exp.Unnest):
798
+ expressions = scope.expression.expressions
799
+ elif isinstance(scope.expression, exp.Lateral):
800
+ expressions = [scope.expression.this]
801
+ else:
802
+ expressions = []
803
+
804
+ sources = {}
805
+ for expression in expressions:
806
+ if _is_derived_table(expression):
807
+ top = None
808
+ for child_scope in _traverse_scope(
809
+ scope.branch(
810
+ expression,
811
+ scope_type=ScopeType.SUBQUERY,
812
+ outer_columns=expression.alias_column_names,
813
+ )
814
+ ):
815
+ yield child_scope
816
+ top = child_scope
817
+ sources[expression.alias] = child_scope
818
+
819
+ scope.subquery_scopes.append(top)
820
+
821
+ scope.sources.update(sources)
822
+
823
+
824
+ def walk_in_scope(expression, bfs=True, prune=None):
825
+ """
826
+ Returns a generator object which visits all nodes in the syntrax tree, stopping at
827
+ nodes that start child scopes.
828
+
829
+ Args:
830
+ expression (exp.Expression):
831
+ bfs (bool): if set to True the BFS traversal order will be applied,
832
+ otherwise the DFS traversal will be used instead.
833
+ prune ((node, parent, arg_key) -> bool): callable that returns True if
834
+ the generator should stop traversing this branch of the tree.
835
+
836
+ Yields:
837
+ tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key
838
+ """
839
+ # We'll use this variable to pass state into the dfs generator.
840
+ # Whenever we set it to True, we exclude a subtree from traversal.
841
+ crossed_scope_boundary = False
842
+
843
+ for node in expression.walk(
844
+ bfs=bfs, prune=lambda n: crossed_scope_boundary or (prune and prune(n))
845
+ ):
846
+ crossed_scope_boundary = False
847
+
848
+ yield node
849
+
850
+ if node is expression:
851
+ continue
852
+ if (
853
+ isinstance(node, exp.CTE)
854
+ or (
855
+ isinstance(node.parent, (exp.From, exp.Join, exp.Subquery))
856
+ and (_is_derived_table(node) or isinstance(node, exp.UDTF))
857
+ )
858
+ or isinstance(node, exp.UNWRAPPED_QUERIES)
859
+ ):
860
+ crossed_scope_boundary = True
861
+
862
+ if isinstance(node, (exp.Subquery, exp.UDTF)):
863
+ # The following args are not actually in the inner scope, so we should visit them
864
+ for key in ("joins", "laterals", "pivots"):
865
+ for arg in node.args.get(key) or []:
866
+ yield from walk_in_scope(arg, bfs=bfs)
867
+
868
+
869
+ def find_all_in_scope(expression, expression_types, bfs=True):
870
+ """
871
+ Returns a generator object which visits all nodes in this scope and only yields those that
872
+ match at least one of the specified expression types.
873
+
874
+ This does NOT traverse into subscopes.
875
+
876
+ Args:
877
+ expression (exp.Expression):
878
+ expression_types (tuple[type]|type): the expression type(s) to match.
879
+ bfs (bool): True to use breadth-first search, False to use depth-first.
880
+
881
+ Yields:
882
+ exp.Expression: nodes
883
+ """
884
+ for expression in walk_in_scope(expression, bfs=bfs):
885
+ if isinstance(expression, tuple(ensure_collection(expression_types))):
886
+ yield expression
887
+
888
+
889
+ def find_in_scope(expression, expression_types, bfs=True):
890
+ """
891
+ Returns the first node in this scope which matches at least one of the specified types.
892
+
893
+ This does NOT traverse into subscopes.
894
+
895
+ Args:
896
+ expression (exp.Expression):
897
+ expression_types (tuple[type]|type): the expression type(s) to match.
898
+ bfs (bool): True to use breadth-first search, False to use depth-first.
899
+
900
+ Returns:
901
+ exp.Expression: the node which matches the criteria or None if no node matching
902
+ the criteria was found.
903
+ """
904
+ return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)