altimate-code 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/bin/altimate +6 -0
  3. package/bin/altimate-code +6 -0
  4. package/dbt-tools/bin/altimate-dbt +2 -0
  5. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
  6. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
  7. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
  8. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
  9. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
  10. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
  11. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
  12. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
  13. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
  14. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
  15. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
  16. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
  17. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
  18. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
  19. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
  20. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
  21. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
  22. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
  23. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
  24. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
  25. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
  26. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
  27. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
  28. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
  29. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
  30. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
  31. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
  32. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
  33. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
  34. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
  35. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
  36. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
  37. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
  38. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
  39. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
  40. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
  41. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
  42. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
  43. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
  44. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
  45. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
  46. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
  47. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
  48. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
  49. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
  50. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
  51. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
  52. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
  53. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
  54. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
  55. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
  56. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
  57. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
  58. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
  59. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
  60. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
  61. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
  62. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
  63. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
  64. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
  65. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
  66. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
  67. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
  68. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
  69. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
  70. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
  71. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
  72. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
  73. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
  74. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
  75. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
  76. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
  77. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
  78. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
  79. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
  80. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
  81. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
  82. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
  83. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
  84. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
  85. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
  86. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
  87. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
  88. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
  89. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
  90. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
  91. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
  92. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
  93. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
  94. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
  95. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
  96. package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
  97. package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
  98. package/dbt-tools/dist/index.js +23859 -0
  99. package/package.json +13 -13
  100. package/postinstall.mjs +42 -0
  101. package/skills/altimate-setup/SKILL.md +31 -0
@@ -0,0 +1,302 @@
1
+ from sqlglot import exp
2
+ from sqlglot.helper import name_sequence
3
+ from sqlglot.optimizer.scope import ScopeType, find_in_scope, traverse_scope
4
+
5
+
6
+ def unnest_subqueries(expression):
7
+ """
8
+ Rewrite sqlglot AST to convert some predicates with subqueries into joins.
9
+
10
+ Convert scalar subqueries into cross joins.
11
+ Convert correlated or vectorized subqueries into a group by so it is not a many to many left join.
12
+
13
+ Example:
14
+ >>> import sqlglot
15
+ >>> expression = sqlglot.parse_one("SELECT * FROM x AS x WHERE (SELECT y.a AS a FROM y AS y WHERE x.a = y.a) = 1 ")
16
+ >>> unnest_subqueries(expression).sql()
17
+ 'SELECT * FROM x AS x LEFT JOIN (SELECT y.a AS a FROM y AS y WHERE TRUE GROUP BY y.a) AS _u_0 ON x.a = _u_0.a WHERE _u_0.a = 1'
18
+
19
+ Args:
20
+ expression (sqlglot.Expression): expression to unnest
21
+ Returns:
22
+ sqlglot.Expression: unnested expression
23
+ """
24
+ next_alias_name = name_sequence("_u_")
25
+
26
+ for scope in traverse_scope(expression):
27
+ select = scope.expression
28
+ parent = select.parent_select
29
+ if not parent:
30
+ continue
31
+ if scope.external_columns:
32
+ decorrelate(select, parent, scope.external_columns, next_alias_name)
33
+ elif scope.scope_type == ScopeType.SUBQUERY:
34
+ unnest(select, parent, next_alias_name)
35
+
36
+ return expression
37
+
38
+
39
+ def unnest(select, parent_select, next_alias_name):
40
+ if len(select.selects) > 1:
41
+ return
42
+
43
+ predicate = select.find_ancestor(exp.Condition)
44
+ if (
45
+ not predicate
46
+ or parent_select is not predicate.parent_select
47
+ or not parent_select.args.get("from")
48
+ ):
49
+ return
50
+
51
+ if isinstance(select, exp.SetOperation):
52
+ select = exp.select(*select.selects).from_(select.subquery(next_alias_name()))
53
+
54
+ alias = next_alias_name()
55
+ clause = predicate.find_ancestor(exp.Having, exp.Where, exp.Join)
56
+
57
+ # This subquery returns a scalar and can just be converted to a cross join
58
+ if not isinstance(predicate, (exp.In, exp.Any)):
59
+ column = exp.column(select.selects[0].alias_or_name, alias)
60
+
61
+ clause_parent_select = clause.parent_select if clause else None
62
+
63
+ if (isinstance(clause, exp.Having) and clause_parent_select is parent_select) or (
64
+ (not clause or clause_parent_select is not parent_select)
65
+ and (
66
+ parent_select.args.get("group")
67
+ or any(find_in_scope(select, exp.AggFunc) for select in parent_select.selects)
68
+ )
69
+ ):
70
+ column = exp.Max(this=column)
71
+ elif not isinstance(select.parent, exp.Subquery):
72
+ return
73
+
74
+ _replace(select.parent, column)
75
+ parent_select.join(select, join_type="CROSS", join_alias=alias, copy=False)
76
+ return
77
+
78
+ if select.find(exp.Limit, exp.Offset):
79
+ return
80
+
81
+ if isinstance(predicate, exp.Any):
82
+ predicate = predicate.find_ancestor(exp.EQ)
83
+
84
+ if not predicate or parent_select is not predicate.parent_select:
85
+ return
86
+
87
+ column = _other_operand(predicate)
88
+ value = select.selects[0]
89
+
90
+ join_key = exp.column(value.alias, alias)
91
+ join_key_not_null = join_key.is_(exp.null()).not_()
92
+
93
+ if isinstance(clause, exp.Join):
94
+ _replace(predicate, exp.true())
95
+ parent_select.where(join_key_not_null, copy=False)
96
+ else:
97
+ _replace(predicate, join_key_not_null)
98
+
99
+ group = select.args.get("group")
100
+
101
+ if group:
102
+ if {value.this} != set(group.expressions):
103
+ select = (
104
+ exp.select(exp.alias_(exp.column(value.alias, "_q"), value.alias))
105
+ .from_(select.subquery("_q", copy=False), copy=False)
106
+ .group_by(exp.column(value.alias, "_q"), copy=False)
107
+ )
108
+ elif not find_in_scope(value.this, exp.AggFunc):
109
+ select = select.group_by(value.this, copy=False)
110
+
111
+ parent_select.join(
112
+ select,
113
+ on=column.eq(join_key),
114
+ join_type="LEFT",
115
+ join_alias=alias,
116
+ copy=False,
117
+ )
118
+
119
+
120
+ def decorrelate(select, parent_select, external_columns, next_alias_name):
121
+ where = select.args.get("where")
122
+
123
+ if not where or where.find(exp.Or) or select.find(exp.Limit, exp.Offset):
124
+ return
125
+
126
+ table_alias = next_alias_name()
127
+ keys = []
128
+
129
+ # for all external columns in the where statement, find the relevant predicate
130
+ # keys to convert it into a join
131
+ for column in external_columns:
132
+ if column.find_ancestor(exp.Where) is not where:
133
+ return
134
+
135
+ predicate = column.find_ancestor(exp.Predicate)
136
+
137
+ if not predicate or predicate.find_ancestor(exp.Where) is not where:
138
+ return
139
+
140
+ if isinstance(predicate, exp.Binary):
141
+ key = (
142
+ predicate.right
143
+ if any(node is column for node in predicate.left.walk())
144
+ else predicate.left
145
+ )
146
+ else:
147
+ return
148
+
149
+ keys.append((key, column, predicate))
150
+
151
+ if not any(isinstance(predicate, exp.EQ) for *_, predicate in keys):
152
+ return
153
+
154
+ is_subquery_projection = any(
155
+ node is select.parent
156
+ for node in map(lambda s: s.unalias(), parent_select.selects)
157
+ if isinstance(node, exp.Subquery)
158
+ )
159
+
160
+ value = select.selects[0]
161
+ key_aliases = {}
162
+ group_by = []
163
+
164
+ for key, _, predicate in keys:
165
+ # if we filter on the value of the subquery, it needs to be unique
166
+ if key == value.this:
167
+ key_aliases[key] = value.alias
168
+ group_by.append(key)
169
+ else:
170
+ if key not in key_aliases:
171
+ key_aliases[key] = next_alias_name()
172
+ # all predicates that are equalities must also be in the unique
173
+ # so that we don't do a many to many join
174
+ if isinstance(predicate, exp.EQ) and key not in group_by:
175
+ group_by.append(key)
176
+
177
+ parent_predicate = select.find_ancestor(exp.Predicate)
178
+
179
+ # if the value of the subquery is not an agg or a key, we need to collect it into an array
180
+ # so that it can be grouped. For subquery projections, we use a MAX aggregation instead.
181
+ agg_func = exp.Max if is_subquery_projection else exp.ArrayAgg
182
+ if not value.find(exp.AggFunc) and value.this not in group_by:
183
+ select.select(
184
+ exp.alias_(agg_func(this=value.this), value.alias, quoted=False),
185
+ append=False,
186
+ copy=False,
187
+ )
188
+
189
+ # exists queries should not have any selects as it only checks if there are any rows
190
+ # all selects will be added by the optimizer and only used for join keys
191
+ if isinstance(parent_predicate, exp.Exists):
192
+ select.args["expressions"] = []
193
+
194
+ for key, alias in key_aliases.items():
195
+ if key in group_by:
196
+ # add all keys to the projections of the subquery
197
+ # so that we can use it as a join key
198
+ if isinstance(parent_predicate, exp.Exists) or key != value.this:
199
+ select.select(f"{key} AS {alias}", copy=False)
200
+ else:
201
+ select.select(exp.alias_(agg_func(this=key.copy()), alias, quoted=False), copy=False)
202
+
203
+ alias = exp.column(value.alias, table_alias)
204
+ other = _other_operand(parent_predicate)
205
+ op_type = type(parent_predicate.parent) if parent_predicate else None
206
+
207
+ if isinstance(parent_predicate, exp.Exists):
208
+ alias = exp.column(list(key_aliases.values())[0], table_alias)
209
+ parent_predicate = _replace(parent_predicate, f"NOT {alias} IS NULL")
210
+ elif isinstance(parent_predicate, exp.All):
211
+ assert issubclass(op_type, exp.Binary)
212
+ predicate = op_type(this=other, expression=exp.column("_x"))
213
+ parent_predicate = _replace(
214
+ parent_predicate.parent, f"ARRAY_ALL({alias}, _x -> {predicate})"
215
+ )
216
+ elif isinstance(parent_predicate, exp.Any):
217
+ assert issubclass(op_type, exp.Binary)
218
+ if value.this in group_by:
219
+ predicate = op_type(this=other, expression=alias)
220
+ parent_predicate = _replace(parent_predicate.parent, predicate)
221
+ else:
222
+ predicate = op_type(this=other, expression=exp.column("_x"))
223
+ parent_predicate = _replace(parent_predicate, f"ARRAY_ANY({alias}, _x -> {predicate})")
224
+ elif isinstance(parent_predicate, exp.In):
225
+ if value.this in group_by:
226
+ parent_predicate = _replace(parent_predicate, f"{other} = {alias}")
227
+ else:
228
+ parent_predicate = _replace(
229
+ parent_predicate,
230
+ f"ARRAY_ANY({alias}, _x -> _x = {parent_predicate.this})",
231
+ )
232
+ else:
233
+ if is_subquery_projection and select.parent.alias:
234
+ alias = exp.alias_(alias, select.parent.alias)
235
+
236
+ # COUNT always returns 0 on empty datasets, so we need take that into consideration here
237
+ # by transforming all counts into 0 and using that as the coalesced value
238
+ if value.find(exp.Count):
239
+
240
+ def remove_aggs(node):
241
+ if isinstance(node, exp.Count):
242
+ return exp.Literal.number(0)
243
+ elif isinstance(node, exp.AggFunc):
244
+ return exp.null()
245
+ return node
246
+
247
+ alias = exp.Coalesce(this=alias, expressions=[value.this.transform(remove_aggs)])
248
+
249
+ select.parent.replace(alias)
250
+
251
+ for key, column, predicate in keys:
252
+ predicate.replace(exp.true())
253
+ nested = exp.column(key_aliases[key], table_alias)
254
+
255
+ if is_subquery_projection:
256
+ key.replace(nested)
257
+ if not isinstance(predicate, exp.EQ):
258
+ parent_select.where(predicate, copy=False)
259
+ continue
260
+
261
+ if key in group_by:
262
+ key.replace(nested)
263
+ elif isinstance(predicate, exp.EQ):
264
+ parent_predicate = _replace(
265
+ parent_predicate,
266
+ f"({parent_predicate} AND ARRAY_CONTAINS({nested}, {column}))",
267
+ )
268
+ else:
269
+ key.replace(exp.to_identifier("_x"))
270
+ parent_predicate = _replace(
271
+ parent_predicate,
272
+ f"({parent_predicate} AND ARRAY_ANY({nested}, _x -> {predicate}))",
273
+ )
274
+
275
+ parent_select.join(
276
+ select.group_by(*group_by, copy=False),
277
+ on=[predicate for *_, predicate in keys if isinstance(predicate, exp.EQ)],
278
+ join_type="LEFT",
279
+ join_alias=table_alias,
280
+ copy=False,
281
+ )
282
+
283
+
284
+ def _replace(expression, condition):
285
+ return expression.replace(exp.condition(condition))
286
+
287
+
288
+ def _other_operand(expression):
289
+ if isinstance(expression, exp.In):
290
+ return expression.this
291
+
292
+ if isinstance(expression, (exp.Any, exp.All)):
293
+ return _other_operand(expression.parent)
294
+
295
+ if isinstance(expression, exp.Binary):
296
+ return (
297
+ expression.right
298
+ if isinstance(expression.left, (exp.Subquery, exp.Any, exp.Exists, exp.All))
299
+ else expression.left
300
+ )
301
+
302
+ return None