altimate-code 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +1 -5
  3. package/bin/altimate +6 -0
  4. package/bin/altimate-code +6 -0
  5. package/dbt-tools/bin/altimate-dbt +2 -0
  6. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
  7. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
  8. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
  9. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
  10. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
  11. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
  12. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
  13. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
  14. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
  15. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
  16. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
  17. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
  18. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
  19. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
  20. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
  21. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
  22. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
  23. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
  24. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
  25. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
  26. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
  27. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
  28. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
  29. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
  30. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
  31. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
  32. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
  33. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
  34. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
  35. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
  36. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
  37. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
  38. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
  39. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
  40. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
  41. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
  42. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
  43. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
  44. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
  45. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
  46. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
  47. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
  48. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
  49. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
  50. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
  51. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
  52. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
  53. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
  54. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
  55. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
  56. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
  57. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
  58. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
  59. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
  60. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
  61. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
  62. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
  63. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
  64. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
  65. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
  66. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
  67. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
  68. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
  69. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
  70. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
  71. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
  72. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
  73. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
  74. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
  75. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
  76. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
  77. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
  78. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
  79. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
  80. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
  81. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
  82. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
  83. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
  84. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
  85. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
  86. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
  87. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
  88. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
  89. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
  90. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
  91. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
  92. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
  93. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
  94. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
  95. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
  96. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
  97. package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
  98. package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
  99. package/dbt-tools/dist/index.js +23859 -0
  100. package/package.json +13 -13
  101. package/postinstall.mjs +42 -0
  102. package/skills/altimate-setup/SKILL.md +31 -0
@@ -0,0 +1,212 @@
1
+ import typing as t
2
+
3
+
4
+ class DataType:
5
+ def __repr__(self) -> str:
6
+ return self.__class__.__name__ + "()"
7
+
8
+ def __hash__(self) -> int:
9
+ return hash(str(self))
10
+
11
+ def __eq__(self, other: t.Any) -> bool:
12
+ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
13
+
14
+ def __ne__(self, other: t.Any) -> bool:
15
+ return not self.__eq__(other)
16
+
17
+ def __str__(self) -> str:
18
+ return self.typeName()
19
+
20
+ @classmethod
21
+ def typeName(cls) -> str:
22
+ return cls.__name__[:-4].lower()
23
+
24
+ def simpleString(self) -> str:
25
+ return str(self)
26
+
27
+ def jsonValue(self) -> t.Union[str, t.Dict[str, t.Any]]:
28
+ return str(self)
29
+
30
+
31
+ class DataTypeWithLength(DataType):
32
+ def __init__(self, length: int):
33
+ self.length = length
34
+
35
+ def __repr__(self) -> str:
36
+ return f"{self.__class__.__name__}({self.length})"
37
+
38
+ def __str__(self) -> str:
39
+ return f"{self.typeName()}({self.length})"
40
+
41
+
42
+ class StringType(DataType):
43
+ pass
44
+
45
+
46
+ class CharType(DataTypeWithLength):
47
+ pass
48
+
49
+
50
+ class VarcharType(DataTypeWithLength):
51
+ pass
52
+
53
+
54
+ class BinaryType(DataType):
55
+ pass
56
+
57
+
58
+ class BooleanType(DataType):
59
+ pass
60
+
61
+
62
+ class DateType(DataType):
63
+ pass
64
+
65
+
66
+ class TimestampType(DataType):
67
+ pass
68
+
69
+
70
+ class TimestampNTZType(DataType):
71
+ @classmethod
72
+ def typeName(cls) -> str:
73
+ return "timestamp_ntz"
74
+
75
+
76
+ class DecimalType(DataType):
77
+ def __init__(self, precision: int = 10, scale: int = 0):
78
+ self.precision = precision
79
+ self.scale = scale
80
+
81
+ def simpleString(self) -> str:
82
+ return f"decimal({self.precision}, {self.scale})"
83
+
84
+ def jsonValue(self) -> str:
85
+ return f"decimal({self.precision}, {self.scale})"
86
+
87
+ def __repr__(self) -> str:
88
+ return f"DecimalType({self.precision}, {self.scale})"
89
+
90
+
91
+ class DoubleType(DataType):
92
+ pass
93
+
94
+
95
+ class FloatType(DataType):
96
+ pass
97
+
98
+
99
+ class ByteType(DataType):
100
+ def __str__(self) -> str:
101
+ return "tinyint"
102
+
103
+
104
+ class IntegerType(DataType):
105
+ def __str__(self) -> str:
106
+ return "int"
107
+
108
+
109
+ class LongType(DataType):
110
+ def __str__(self) -> str:
111
+ return "bigint"
112
+
113
+
114
+ class ShortType(DataType):
115
+ def __str__(self) -> str:
116
+ return "smallint"
117
+
118
+
119
+ class ArrayType(DataType):
120
+ def __init__(self, elementType: DataType, containsNull: bool = True):
121
+ self.elementType = elementType
122
+ self.containsNull = containsNull
123
+
124
+ def __repr__(self) -> str:
125
+ return f"ArrayType({self.elementType, str(self.containsNull)}"
126
+
127
+ def simpleString(self) -> str:
128
+ return f"array<{self.elementType.simpleString()}>"
129
+
130
+ def jsonValue(self) -> t.Dict[str, t.Any]:
131
+ return {
132
+ "type": self.typeName(),
133
+ "elementType": self.elementType.jsonValue(),
134
+ "containsNull": self.containsNull,
135
+ }
136
+
137
+
138
+ class MapType(DataType):
139
+ def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True):
140
+ self.keyType = keyType
141
+ self.valueType = valueType
142
+ self.valueContainsNull = valueContainsNull
143
+
144
+ def __repr__(self) -> str:
145
+ return f"MapType({self.keyType}, {self.valueType}, {str(self.valueContainsNull)})"
146
+
147
+ def simpleString(self) -> str:
148
+ return f"map<{self.keyType.simpleString()}, {self.valueType.simpleString()}>"
149
+
150
+ def jsonValue(self) -> t.Dict[str, t.Any]:
151
+ return {
152
+ "type": self.typeName(),
153
+ "keyType": self.keyType.jsonValue(),
154
+ "valueType": self.valueType.jsonValue(),
155
+ "valueContainsNull": self.valueContainsNull,
156
+ }
157
+
158
+
159
+ class StructField(DataType):
160
+ def __init__(
161
+ self,
162
+ name: str,
163
+ dataType: DataType,
164
+ nullable: bool = True,
165
+ metadata: t.Optional[t.Dict[str, t.Any]] = None,
166
+ ):
167
+ self.name = name
168
+ self.dataType = dataType
169
+ self.nullable = nullable
170
+ self.metadata = metadata or {}
171
+
172
+ def __repr__(self) -> str:
173
+ return f"StructField('{self.name}', {self.dataType}, {str(self.nullable)})"
174
+
175
+ def simpleString(self) -> str:
176
+ return f"{self.name}:{self.dataType.simpleString()}"
177
+
178
+ def jsonValue(self) -> t.Dict[str, t.Any]:
179
+ return {
180
+ "name": self.name,
181
+ "type": self.dataType.jsonValue(),
182
+ "nullable": self.nullable,
183
+ "metadata": self.metadata,
184
+ }
185
+
186
+
187
+ class StructType(DataType):
188
+ def __init__(self, fields: t.Optional[t.List[StructField]] = None):
189
+ if not fields:
190
+ self.fields = []
191
+ self.names = []
192
+ else:
193
+ self.fields = fields
194
+ self.names = [f.name for f in fields]
195
+
196
+ def __iter__(self) -> t.Iterator[StructField]:
197
+ return iter(self.fields)
198
+
199
+ def __len__(self) -> int:
200
+ return len(self.fields)
201
+
202
+ def __repr__(self) -> str:
203
+ return f"StructType({', '.join(str(field) for field in self)})"
204
+
205
+ def simpleString(self) -> str:
206
+ return f"struct<{', '.join(x.simpleString() for x in self)}>"
207
+
208
+ def jsonValue(self) -> t.Dict[str, t.Any]:
209
+ return {"type": self.typeName(), "fields": [x.jsonValue() for x in self]}
210
+
211
+ def fieldNames(self) -> t.List[str]:
212
+ return list(self.names)
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ from sqlglot import expressions as exp
6
+ from sqlglot.dataframe.sql import types
7
+
8
+ if t.TYPE_CHECKING:
9
+ from sqlglot.dataframe.sql._typing import SchemaInput
10
+
11
+
12
+ def get_column_mapping_from_schema_input(schema: SchemaInput) -> t.Dict[str, t.Optional[str]]:
13
+ if isinstance(schema, dict):
14
+ return schema
15
+ elif isinstance(schema, str):
16
+ col_name_type_strs = [x.strip() for x in schema.split(",")]
17
+ return {
18
+ name_type_str.split(":")[0].strip(): name_type_str.split(":")[1].strip()
19
+ for name_type_str in col_name_type_strs
20
+ }
21
+ elif isinstance(schema, types.StructType):
22
+ return {struct_field.name: struct_field.dataType.simpleString() for struct_field in schema}
23
+ return {x.strip(): None for x in schema} # type: ignore
24
+
25
+
26
+ def get_tables_from_expression_with_join(expression: exp.Select) -> t.List[exp.Table]:
27
+ if not expression.args.get("joins"):
28
+ return []
29
+
30
+ left_table = expression.args["from"].this
31
+ other_tables = [join.this for join in expression.args["joins"]]
32
+ return [left_table] + other_tables
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ import typing as t
5
+
6
+ from sqlglot import expressions as exp
7
+ from sqlglot.dataframe.sql import functions as F
8
+ from sqlglot.helper import flatten
9
+
10
+ if t.TYPE_CHECKING:
11
+ from sqlglot.dataframe.sql._typing import ColumnOrName
12
+
13
+
14
+ class Window:
15
+ _JAVA_MIN_LONG = -(1 << 63) # -9223372036854775808
16
+ _JAVA_MAX_LONG = (1 << 63) - 1 # 9223372036854775807
17
+ _PRECEDING_THRESHOLD = max(-sys.maxsize, _JAVA_MIN_LONG)
18
+ _FOLLOWING_THRESHOLD = min(sys.maxsize, _JAVA_MAX_LONG)
19
+
20
+ unboundedPreceding: int = _JAVA_MIN_LONG
21
+
22
+ unboundedFollowing: int = _JAVA_MAX_LONG
23
+
24
+ currentRow: int = 0
25
+
26
+ @classmethod
27
+ def partitionBy(cls, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
28
+ return WindowSpec().partitionBy(*cols)
29
+
30
+ @classmethod
31
+ def orderBy(cls, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
32
+ return WindowSpec().orderBy(*cols)
33
+
34
+ @classmethod
35
+ def rowsBetween(cls, start: int, end: int) -> WindowSpec:
36
+ return WindowSpec().rowsBetween(start, end)
37
+
38
+ @classmethod
39
+ def rangeBetween(cls, start: int, end: int) -> WindowSpec:
40
+ return WindowSpec().rangeBetween(start, end)
41
+
42
+
43
+ class WindowSpec:
44
+ def __init__(self, expression: exp.Expression = exp.Window()):
45
+ self.expression = expression
46
+
47
+ def copy(self):
48
+ return WindowSpec(self.expression.copy())
49
+
50
+ def sql(self, **kwargs) -> str:
51
+ from sqlglot.dataframe.sql.session import SparkSession
52
+
53
+ return self.expression.sql(dialect=SparkSession().dialect, **kwargs)
54
+
55
+ def partitionBy(self, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
56
+ from sqlglot.dataframe.sql.column import Column
57
+
58
+ cols = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols # type: ignore
59
+ expressions = [Column.ensure_col(x).expression for x in cols]
60
+ window_spec = self.copy()
61
+ partition_by_expressions = window_spec.expression.args.get("partition_by", [])
62
+ partition_by_expressions.extend(expressions)
63
+ window_spec.expression.set("partition_by", partition_by_expressions)
64
+ return window_spec
65
+
66
+ def orderBy(self, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
67
+ from sqlglot.dataframe.sql.column import Column
68
+
69
+ cols = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols # type: ignore
70
+ expressions = [Column.ensure_col(x).expression for x in cols]
71
+ window_spec = self.copy()
72
+ if window_spec.expression.args.get("order") is None:
73
+ window_spec.expression.set("order", exp.Order(expressions=[]))
74
+ order_by = window_spec.expression.args["order"].expressions
75
+ order_by.extend(expressions)
76
+ window_spec.expression.args["order"].set("expressions", order_by)
77
+ return window_spec
78
+
79
+ def _calc_start_end(
80
+ self, start: int, end: int
81
+ ) -> t.Dict[str, t.Optional[t.Union[str, exp.Expression]]]:
82
+ kwargs: t.Dict[str, t.Optional[t.Union[str, exp.Expression]]] = {
83
+ "start_side": None,
84
+ "end_side": None,
85
+ }
86
+ if start == Window.currentRow:
87
+ kwargs["start"] = "CURRENT ROW"
88
+ else:
89
+ kwargs = {
90
+ **kwargs,
91
+ **{
92
+ "start_side": "PRECEDING",
93
+ "start": "UNBOUNDED"
94
+ if start <= Window.unboundedPreceding
95
+ else F.lit(start).expression,
96
+ },
97
+ }
98
+ if end == Window.currentRow:
99
+ kwargs["end"] = "CURRENT ROW"
100
+ else:
101
+ kwargs = {
102
+ **kwargs,
103
+ **{
104
+ "end_side": "FOLLOWING",
105
+ "end": "UNBOUNDED"
106
+ if end >= Window.unboundedFollowing
107
+ else F.lit(end).expression,
108
+ },
109
+ }
110
+ return kwargs
111
+
112
+ def rowsBetween(self, start: int, end: int) -> WindowSpec:
113
+ window_spec = self.copy()
114
+ spec = self._calc_start_end(start, end)
115
+ spec["kind"] = "ROWS"
116
+ window_spec.expression.set(
117
+ "spec",
118
+ exp.WindowSpec(
119
+ **{**window_spec.expression.args.get("spec", exp.WindowSpec()).args, **spec}
120
+ ),
121
+ )
122
+ return window_spec
123
+
124
+ def rangeBetween(self, start: int, end: int) -> WindowSpec:
125
+ window_spec = self.copy()
126
+ spec = self._calc_start_end(start, end)
127
+ spec["kind"] = "RANGE"
128
+ window_spec.expression.set(
129
+ "spec",
130
+ exp.WindowSpec(
131
+ **{**window_spec.expression.args.get("spec", exp.WindowSpec()).args, **spec}
132
+ ),
133
+ )
134
+ return window_spec
@@ -0,0 +1,118 @@
1
+ # ruff: noqa: F401
2
+ """
3
+ ## Dialects
4
+
5
+ While there is a SQL standard, most SQL engines support a variation of that standard. This makes it difficult
6
+ to write portable SQL code. SQLGlot bridges all the different variations, called "dialects", with an extensible
7
+ SQL transpilation framework.
8
+
9
+ The base `sqlglot.dialects.dialect.Dialect` class implements a generic dialect that aims to be as universal as possible.
10
+
11
+ Each SQL variation has its own `Dialect` subclass, extending the corresponding `Tokenizer`, `Parser` and `Generator`
12
+ classes as needed.
13
+
14
+ ### Implementing a custom Dialect
15
+
16
+ Creating a new SQL dialect may seem complicated at first, but it is actually quite simple in SQLGlot:
17
+
18
+ ```python
19
+ from sqlglot import exp
20
+ from sqlglot.dialects.dialect import Dialect
21
+ from sqlglot.generator import Generator
22
+ from sqlglot.tokens import Tokenizer, TokenType
23
+
24
+
25
+ class Custom(Dialect):
26
+ class Tokenizer(Tokenizer):
27
+ QUOTES = ["'", '"'] # Strings can be delimited by either single or double quotes
28
+ IDENTIFIERS = ["`"] # Identifiers can be delimited by backticks
29
+
30
+ # Associates certain meaningful words with tokens that capture their intent
31
+ KEYWORDS = {
32
+ **Tokenizer.KEYWORDS,
33
+ "INT64": TokenType.BIGINT,
34
+ "FLOAT64": TokenType.DOUBLE,
35
+ }
36
+
37
+ class Generator(Generator):
38
+ # Specifies how AST nodes, i.e. subclasses of exp.Expression, should be converted into SQL
39
+ TRANSFORMS = {
40
+ exp.Array: lambda self, e: f"[{self.expressions(e)}]",
41
+ }
42
+
43
+ # Specifies how AST nodes representing data types should be converted into SQL
44
+ TYPE_MAPPING = {
45
+ exp.DataType.Type.TINYINT: "INT64",
46
+ exp.DataType.Type.SMALLINT: "INT64",
47
+ exp.DataType.Type.INT: "INT64",
48
+ exp.DataType.Type.BIGINT: "INT64",
49
+ exp.DataType.Type.DECIMAL: "NUMERIC",
50
+ exp.DataType.Type.FLOAT: "FLOAT64",
51
+ exp.DataType.Type.DOUBLE: "FLOAT64",
52
+ exp.DataType.Type.BOOLEAN: "BOOL",
53
+ exp.DataType.Type.TEXT: "STRING",
54
+ }
55
+ ```
56
+
57
+ The above example demonstrates how certain parts of the base `Dialect` class can be overridden to match a different
58
+ specification. Even though it is a fairly realistic starting point, we strongly encourage the reader to study existing
59
+ dialect implementations in order to understand how their various components can be modified, depending on the use-case.
60
+
61
+ ----
62
+ """
63
+
64
+ import importlib
65
+ import threading
66
+
67
+ DIALECTS = [
68
+ "Athena",
69
+ "BigQuery",
70
+ "ClickHouse",
71
+ "Databricks",
72
+ "Doris",
73
+ "Drill",
74
+ "Druid",
75
+ "DuckDB",
76
+ "Dune",
77
+ "Hive",
78
+ "Materialize",
79
+ "MySQL",
80
+ "Oracle",
81
+ "Postgres",
82
+ "Presto",
83
+ "PRQL",
84
+ "Redshift",
85
+ "RisingWave",
86
+ "Snowflake",
87
+ "Spark",
88
+ "Spark2",
89
+ "SQLite",
90
+ "StarRocks",
91
+ "Tableau",
92
+ "Teradata",
93
+ "Trino",
94
+ "TSQL",
95
+ ]
96
+
97
+ MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
98
+ DIALECT_MODULE_NAMES = MODULE_BY_DIALECT.values()
99
+
100
+ MODULE_BY_ATTRIBUTE = {
101
+ **MODULE_BY_DIALECT,
102
+ "Dialect": "dialect",
103
+ "Dialects": "dialect",
104
+ }
105
+
106
+ __all__ = list(MODULE_BY_ATTRIBUTE)
107
+
108
+ _import_lock = threading.Lock()
109
+
110
+
111
+ def __getattr__(name):
112
+ module_name = MODULE_BY_ATTRIBUTE.get(name)
113
+ if module_name:
114
+ with _import_lock:
115
+ module = importlib.import_module(f"sqlglot.dialects.{module_name}")
116
+ return getattr(module, name)
117
+
118
+ raise AttributeError(f"module {__name__} has no attribute {name}")
@@ -0,0 +1,166 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ from sqlglot import exp
6
+ from sqlglot.dialects.trino import Trino
7
+ from sqlglot.dialects.hive import Hive
8
+ from sqlglot.tokens import TokenType
9
+
10
+
11
+ def _generate_as_hive(expression: exp.Expression) -> bool:
12
+ if isinstance(expression, exp.Create):
13
+ if expression.kind == "TABLE":
14
+ properties: t.Optional[exp.Properties] = expression.args.get("properties")
15
+ if properties and properties.find(exp.ExternalProperty):
16
+ return True # CREATE EXTERNAL TABLE is Hive
17
+
18
+ if not isinstance(expression.expression, exp.Query):
19
+ return True # any CREATE TABLE other than CREATE TABLE AS SELECT is Hive
20
+ else:
21
+ return expression.kind != "VIEW" # CREATE VIEW is never Hive but CREATE SCHEMA etc is
22
+
23
+ # https://docs.aws.amazon.com/athena/latest/ug/ddl-reference.html
24
+ elif isinstance(expression, (exp.Alter, exp.Drop, exp.Describe)):
25
+ if isinstance(expression, exp.Drop) and expression.kind == "VIEW":
26
+ # DROP VIEW is Trino (I guess because CREATE VIEW is)
27
+ return False
28
+
29
+ # Everything else is Hive
30
+ return True
31
+
32
+ return False
33
+
34
+
35
+ def _is_iceberg_table(properties: exp.Properties) -> bool:
36
+ table_type_property = next(
37
+ (
38
+ p
39
+ for p in properties.expressions
40
+ if isinstance(p, exp.Property) and p.name == "table_type"
41
+ ),
42
+ None,
43
+ )
44
+ return bool(table_type_property and table_type_property.text("value").lower() == "iceberg")
45
+
46
+
47
+ def _location_property_sql(self: Athena.Generator, e: exp.LocationProperty):
48
+ # If table_type='iceberg', the LocationProperty is called 'location'
49
+ # Otherwise, it's called 'external_location'
50
+ # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html
51
+
52
+ prop_name = "external_location"
53
+
54
+ if isinstance(e.parent, exp.Properties):
55
+ if _is_iceberg_table(e.parent):
56
+ prop_name = "location"
57
+
58
+ return f"{prop_name}={self.sql(e, 'this')}"
59
+
60
+
61
+ def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByProperty) -> str:
62
+ # If table_type='iceberg' then the table property for partitioning is called 'partitioning'
63
+ # If table_type='hive' it's called 'partitioned_by'
64
+ # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
65
+
66
+ prop_name = "partitioned_by"
67
+ if isinstance(e.parent, exp.Properties):
68
+ if _is_iceberg_table(e.parent):
69
+ prop_name = "partitioning"
70
+
71
+ return f"{prop_name}={self.sql(e, 'this')}"
72
+
73
+
74
+ class Athena(Trino):
75
+ """
76
+ Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific modifications and then
77
+ built the Athena service around them.
78
+
79
+ Thus, Athena is not simply hosted Trino, it's more like a router that routes SQL queries to an execution engine depending
80
+ on the query type.
81
+
82
+ As at 2024-09-10, assuming your Athena workgroup is configured to use "Athena engine version 3", the following engines exist:
83
+
84
+ Hive:
85
+ - Accepts mostly the same syntax as Hadoop / Hive
86
+ - Uses backticks to quote identifiers
87
+ - Has a distinctive DDL syntax (around things like setting table properties, storage locations etc) that is different from Trino
88
+ - Used for *most* DDL, with some exceptions that get routed to the Trino engine instead:
89
+ - CREATE [EXTERNAL] TABLE (without AS SELECT)
90
+ - ALTER
91
+ - DROP
92
+
93
+ Trino:
94
+ - Uses double quotes to quote identifiers
95
+ - Used for DDL operations that involve SELECT queries, eg:
96
+ - CREATE VIEW / DROP VIEW
97
+ - CREATE TABLE... AS SELECT
98
+ - Used for DML operations
99
+ - SELECT, INSERT, UPDATE, DELETE, MERGE
100
+
101
+ The SQLGlot Athena dialect tries to identify which engine a query would be routed to and then uses the parser / generator for that engine
102
+ rather than trying to create a universal syntax that can handle both types.
103
+ """
104
+
105
+ class Tokenizer(Trino.Tokenizer):
106
+ """
107
+ The Tokenizer is flexible enough to tokenize queries across both the Hive and Trino engines
108
+ """
109
+
110
+ IDENTIFIERS = ['"', "`"]
111
+ KEYWORDS = {
112
+ **Hive.Tokenizer.KEYWORDS,
113
+ **Trino.Tokenizer.KEYWORDS,
114
+ "UNLOAD": TokenType.COMMAND,
115
+ }
116
+
117
+ class Parser(Trino.Parser):
118
+ """
119
+ Parse queries for the Athena Trino execution engine
120
+ """
121
+
122
+ STATEMENT_PARSERS = {
123
+ **Trino.Parser.STATEMENT_PARSERS,
124
+ TokenType.USING: lambda self: self._parse_as_command(self._prev),
125
+ }
126
+
127
+ class _HiveGenerator(Hive.Generator):
128
+ def alter_sql(self, expression: exp.Alter) -> str:
129
+ # package any ALTER TABLE ADD actions into a Schema object
130
+ # so it gets generated as `ALTER TABLE .. ADD COLUMNS(...)`
131
+ # instead of `ALTER TABLE ... ADD COLUMN` which is invalid syntax on Athena
132
+ if isinstance(expression, exp.Alter) and expression.kind == "TABLE":
133
+ if expression.actions and isinstance(expression.actions[0], exp.ColumnDef):
134
+ new_actions = exp.Schema(expressions=expression.actions)
135
+ expression.set("actions", [new_actions])
136
+
137
+ return super().alter_sql(expression)
138
+
139
+ class Generator(Trino.Generator):
140
+ """
141
+ Generate queries for the Athena Trino execution engine
142
+ """
143
+
144
+ PROPERTIES_LOCATION = {
145
+ **Trino.Generator.PROPERTIES_LOCATION,
146
+ exp.LocationProperty: exp.Properties.Location.POST_WITH,
147
+ }
148
+
149
+ TRANSFORMS = {
150
+ **Trino.Generator.TRANSFORMS,
151
+ exp.PartitionedByProperty: _partitioned_by_property_sql,
152
+ exp.LocationProperty: _location_property_sql,
153
+ }
154
+
155
+ def __init__(self, *args, **kwargs):
156
+ super().__init__(*args, **kwargs)
157
+
158
+ hive_kwargs = {**kwargs, "dialect": "hive"}
159
+
160
+ self._hive_generator = Athena._HiveGenerator(*args, **hive_kwargs)
161
+
162
+ def generate(self, expression: exp.Expression, copy: bool = True) -> str:
163
+ if _generate_as_hive(expression):
164
+ return self._hive_generator.generate(expression, copy)
165
+
166
+ return super().generate(expression, copy)