altimate-code 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/bin/altimate +6 -0
  3. package/bin/altimate-code +6 -0
  4. package/dbt-tools/bin/altimate-dbt +2 -0
  5. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
  6. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
  7. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
  8. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
  9. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
  10. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
  11. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
  12. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
  13. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
  14. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
  15. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
  16. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
  17. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
  18. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
  19. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
  20. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
  21. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
  22. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
  23. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
  24. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
  25. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
  26. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
  27. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
  28. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
  29. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
  30. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
  31. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
  32. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
  33. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
  34. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
  35. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
  36. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
  37. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
  38. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
  39. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
  40. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
  41. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
  42. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
  43. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
  44. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
  45. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
  46. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
  47. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
  48. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
  49. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
  50. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
  51. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
  52. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
  53. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
  54. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
  55. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
  56. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
  57. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
  58. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
  59. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
  60. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
  61. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
  62. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
  63. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
  64. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
  65. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
  66. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
  67. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
  68. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
  69. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
  70. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
  71. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
  72. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
  73. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
  74. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
  75. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
  76. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
  77. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
  78. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
  79. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
  80. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
  81. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
  82. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
  83. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
  84. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
  85. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
  86. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
  87. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
  88. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
  89. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
  90. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
  91. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
  92. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
  93. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
  94. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
  95. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
  96. package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
  97. package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
  98. package/dbt-tools/dist/index.js +23859 -0
  99. package/package.json +13 -13
  100. package/postinstall.mjs +42 -0
  101. package/skills/altimate-setup/SKILL.md +31 -0
@@ -0,0 +1,178 @@
1
+ # ruff: noqa: F401
2
+ """
3
+ .. include:: ../README.md
4
+
5
+ ----
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import typing as t
12
+
13
+ from sqlglot import expressions as exp
14
+ from sqlglot.dialects.dialect import Dialect as Dialect, Dialects as Dialects
15
+ from sqlglot.diff import diff as diff
16
+ from sqlglot.errors import (
17
+ ErrorLevel as ErrorLevel,
18
+ ParseError as ParseError,
19
+ TokenError as TokenError,
20
+ UnsupportedError as UnsupportedError,
21
+ )
22
+ from sqlglot.expressions import (
23
+ Expression as Expression,
24
+ alias_ as alias,
25
+ and_ as and_,
26
+ case as case,
27
+ cast as cast,
28
+ column as column,
29
+ condition as condition,
30
+ delete as delete,
31
+ except_ as except_,
32
+ from_ as from_,
33
+ func as func,
34
+ insert as insert,
35
+ intersect as intersect,
36
+ maybe_parse as maybe_parse,
37
+ merge as merge,
38
+ not_ as not_,
39
+ or_ as or_,
40
+ select as select,
41
+ subquery as subquery,
42
+ table_ as table,
43
+ to_column as to_column,
44
+ to_identifier as to_identifier,
45
+ to_table as to_table,
46
+ union as union,
47
+ )
48
+ from sqlglot.generator import Generator as Generator
49
+ from sqlglot.parser import Parser as Parser
50
+ from sqlglot.schema import MappingSchema as MappingSchema, Schema as Schema
51
+ from sqlglot.tokens import Token as Token, Tokenizer as Tokenizer, TokenType as TokenType
52
+
53
+ if t.TYPE_CHECKING:
54
+ from sqlglot._typing import E
55
+ from sqlglot.dialects.dialect import DialectType as DialectType
56
+
57
+ logger = logging.getLogger("sqlglot")
58
+
59
+
60
+ try:
61
+ from sqlglot._version import __version__, __version_tuple__
62
+ except ImportError:
63
+ logger.error(
64
+ "Unable to set __version__, run `pip install -e .` or `python setup.py develop` first."
65
+ )
66
+
67
+
68
+ pretty = False
69
+ """Whether to format generated SQL by default."""
70
+
71
+
72
+ def tokenize(sql: str, read: DialectType = None, dialect: DialectType = None) -> t.List[Token]:
73
+ """
74
+ Tokenizes the given SQL string.
75
+
76
+ Args:
77
+ sql: the SQL code string to tokenize.
78
+ read: the SQL dialect to apply during tokenizing (eg. "spark", "hive", "presto", "mysql").
79
+ dialect: the SQL dialect (alias for read).
80
+
81
+ Returns:
82
+ The resulting list of tokens.
83
+ """
84
+ return Dialect.get_or_raise(read or dialect).tokenize(sql)
85
+
86
+
87
+ def parse(
88
+ sql: str, read: DialectType = None, dialect: DialectType = None, **opts
89
+ ) -> t.List[t.Optional[Expression]]:
90
+ """
91
+ Parses the given SQL string into a collection of syntax trees, one per parsed SQL statement.
92
+
93
+ Args:
94
+ sql: the SQL code string to parse.
95
+ read: the SQL dialect to apply during parsing (eg. "spark", "hive", "presto", "mysql").
96
+ dialect: the SQL dialect (alias for read).
97
+ **opts: other `sqlglot.parser.Parser` options.
98
+
99
+ Returns:
100
+ The resulting syntax tree collection.
101
+ """
102
+ return Dialect.get_or_raise(read or dialect).parse(sql, **opts)
103
+
104
+
105
+ @t.overload
106
+ def parse_one(sql: str, *, into: t.Type[E], **opts) -> E: ...
107
+
108
+
109
+ @t.overload
110
+ def parse_one(sql: str, **opts) -> Expression: ...
111
+
112
+
113
+ def parse_one(
114
+ sql: str,
115
+ read: DialectType = None,
116
+ dialect: DialectType = None,
117
+ into: t.Optional[exp.IntoType] = None,
118
+ **opts,
119
+ ) -> Expression:
120
+ """
121
+ Parses the given SQL string and returns a syntax tree for the first parsed SQL statement.
122
+
123
+ Args:
124
+ sql: the SQL code string to parse.
125
+ read: the SQL dialect to apply during parsing (eg. "spark", "hive", "presto", "mysql").
126
+ dialect: the SQL dialect (alias for read)
127
+ into: the SQLGlot Expression to parse into.
128
+ **opts: other `sqlglot.parser.Parser` options.
129
+
130
+ Returns:
131
+ The syntax tree for the first parsed statement.
132
+ """
133
+
134
+ dialect = Dialect.get_or_raise(read or dialect)
135
+
136
+ if into:
137
+ result = dialect.parse_into(into, sql, **opts)
138
+ else:
139
+ result = dialect.parse(sql, **opts)
140
+
141
+ for expression in result:
142
+ if not expression:
143
+ raise ParseError(f"No expression was parsed from '{sql}'")
144
+ return expression
145
+ else:
146
+ raise ParseError(f"No expression was parsed from '{sql}'")
147
+
148
+
149
+ def transpile(
150
+ sql: str,
151
+ read: DialectType = None,
152
+ write: DialectType = None,
153
+ identity: bool = True,
154
+ error_level: t.Optional[ErrorLevel] = None,
155
+ **opts,
156
+ ) -> t.List[str]:
157
+ """
158
+ Parses the given SQL string in accordance with the source dialect and returns a list of SQL strings transformed
159
+ to conform to the target dialect. Each string in the returned list represents a single transformed SQL statement.
160
+
161
+ Args:
162
+ sql: the SQL code string to transpile.
163
+ read: the source dialect used to parse the input string (eg. "spark", "hive", "presto", "mysql").
164
+ write: the target dialect into which the input should be transformed (eg. "spark", "hive", "presto", "mysql").
165
+ identity: if set to `True` and if the target dialect is not specified the source dialect will be used as both:
166
+ the source and the target dialect.
167
+ error_level: the desired error level of the parser.
168
+ **opts: other `sqlglot.generator.Generator` options.
169
+
170
+ Returns:
171
+ The list of transpiled SQL statements.
172
+ """
173
+ write = (read if write is None else write) if identity else write
174
+ write = Dialect.get_or_raise(write)
175
+ return [
176
+ write.generate(expression, copy=False, **opts) if expression else ""
177
+ for expression in parse(sql, read, error_level=error_level)
178
+ ]
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import sys
5
+ import typing as t
6
+
7
+ import sqlglot
8
+
9
+ parser = argparse.ArgumentParser(description="Transpile SQL")
10
+ parser.add_argument(
11
+ "sql",
12
+ metavar="sql",
13
+ type=str,
14
+ help="SQL statement(s) to transpile, or - to parse stdin.",
15
+ )
16
+ parser.add_argument(
17
+ "--read",
18
+ dest="read",
19
+ type=str,
20
+ default=None,
21
+ help="Dialect to read default is generic",
22
+ )
23
+ parser.add_argument(
24
+ "--write",
25
+ dest="write",
26
+ type=str,
27
+ default=None,
28
+ help="Dialect to write default is generic",
29
+ )
30
+ parser.add_argument(
31
+ "--no-identify",
32
+ dest="identify",
33
+ action="store_false",
34
+ help="Don't auto identify fields",
35
+ )
36
+ parser.add_argument(
37
+ "--no-pretty",
38
+ dest="pretty",
39
+ action="store_false",
40
+ help="Compress sql",
41
+ )
42
+ parser.add_argument(
43
+ "--parse",
44
+ dest="parse",
45
+ action="store_true",
46
+ help="Parse and return the expression tree",
47
+ )
48
+ parser.add_argument(
49
+ "--tokenize",
50
+ dest="tokenize",
51
+ action="store_true",
52
+ help="Tokenize and return the tokens list",
53
+ )
54
+ parser.add_argument(
55
+ "--error-level",
56
+ dest="error_level",
57
+ type=str,
58
+ default="IMMEDIATE",
59
+ help="IGNORE, WARN, RAISE, IMMEDIATE (default)",
60
+ )
61
+ parser.add_argument(
62
+ "--version",
63
+ action="version",
64
+ version=sqlglot.__version__,
65
+ help="Display the SQLGlot version",
66
+ )
67
+
68
+
69
+ args = parser.parse_args()
70
+ error_level = sqlglot.ErrorLevel[args.error_level.upper()]
71
+
72
+ sql = sys.stdin.read() if args.sql == "-" else args.sql
73
+
74
+ if args.parse:
75
+ objs: t.Union[t.List[str], t.List[sqlglot.tokens.Token]] = [
76
+ repr(expression)
77
+ for expression in sqlglot.parse(
78
+ sql,
79
+ read=args.read,
80
+ error_level=error_level,
81
+ )
82
+ ]
83
+ elif args.tokenize:
84
+ objs = sqlglot.Dialect.get_or_raise(args.read).tokenize(sql)
85
+ else:
86
+ objs = sqlglot.transpile(
87
+ sql,
88
+ read=args.read,
89
+ write=args.write,
90
+ identify=args.identify,
91
+ pretty=args.pretty,
92
+ error_level=error_level,
93
+ )
94
+
95
+ for obj in objs:
96
+ print(obj)
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ import sqlglot
6
+
7
+ if t.TYPE_CHECKING:
8
+ from typing_extensions import Literal as Lit # noqa
9
+
10
+ # A little hack for backwards compatibility with Python 3.7.
11
+ # For example, we might want a TypeVar for objects that support comparison e.g. SupportsRichComparisonT from typeshed.
12
+ # But Python 3.7 doesn't support Protocols, so we'd also need typing_extensions, which we don't want as a dependency.
13
+ A = t.TypeVar("A", bound=t.Any)
14
+ B = t.TypeVar("B", bound="sqlglot.exp.Binary")
15
+ E = t.TypeVar("E", bound="sqlglot.exp.Expression")
16
+ F = t.TypeVar("F", bound="sqlglot.exp.Func")
17
+ T = t.TypeVar("T")
@@ -0,0 +1,18 @@
1
+ from sqlglot.dataframe.sql.column import Column
2
+ from sqlglot.dataframe.sql.dataframe import DataFrame, DataFrameNaFunctions
3
+ from sqlglot.dataframe.sql.group import GroupedData
4
+ from sqlglot.dataframe.sql.readwriter import DataFrameReader, DataFrameWriter
5
+ from sqlglot.dataframe.sql.session import SparkSession
6
+ from sqlglot.dataframe.sql.window import Window, WindowSpec
7
+
8
+ __all__ = [
9
+ "SparkSession",
10
+ "DataFrame",
11
+ "GroupedData",
12
+ "Column",
13
+ "DataFrameNaFunctions",
14
+ "Window",
15
+ "WindowSpec",
16
+ "DataFrameReader",
17
+ "DataFrameWriter",
18
+ ]
@@ -0,0 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import typing as t
5
+
6
+ from sqlglot import expressions as exp
7
+
8
+ if t.TYPE_CHECKING:
9
+ from sqlglot.dataframe.sql.column import Column
10
+ from sqlglot.dataframe.sql.types import StructType
11
+
12
+ ColumnLiterals = t.Union[str, float, int, bool, t.List, t.Tuple, datetime.date, datetime.datetime]
13
+ ColumnOrName = t.Union[Column, str]
14
+ ColumnOrLiteral = t.Union[
15
+ Column, str, float, int, bool, t.List, t.Tuple, datetime.date, datetime.datetime
16
+ ]
17
+ SchemaInput = t.Union[str, t.List[str], StructType, t.Dict[str, t.Optional[str]]]
18
+ OutputExpressionContainer = t.Union[exp.Select, exp.Create, exp.Insert]
@@ -0,0 +1,332 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ import sqlglot as sqlglot
6
+ from sqlglot import expressions as exp
7
+ from sqlglot.dataframe.sql.types import DataType
8
+ from sqlglot.helper import flatten, is_iterable
9
+
10
+ if t.TYPE_CHECKING:
11
+ from sqlglot.dataframe.sql._typing import ColumnOrLiteral
12
+ from sqlglot.dataframe.sql.window import WindowSpec
13
+
14
+
15
+ class Column:
16
+ def __init__(self, expression: t.Optional[t.Union[ColumnOrLiteral, exp.Expression]]):
17
+ from sqlglot.dataframe.sql.session import SparkSession
18
+
19
+ if isinstance(expression, Column):
20
+ expression = expression.expression # type: ignore
21
+ elif expression is None or not isinstance(expression, (str, exp.Expression)):
22
+ expression = self._lit(expression).expression # type: ignore
23
+ elif not isinstance(expression, exp.Column):
24
+ expression = sqlglot.maybe_parse(expression, dialect=SparkSession().dialect).transform(
25
+ SparkSession().dialect.normalize_identifier, copy=False
26
+ )
27
+ if expression is None:
28
+ raise ValueError(f"Could not parse {expression}")
29
+
30
+ self.expression: exp.Expression = expression # type: ignore
31
+
32
+ def __repr__(self):
33
+ return repr(self.expression)
34
+
35
+ def __hash__(self):
36
+ return hash(self.expression)
37
+
38
+ def __eq__(self, other: ColumnOrLiteral) -> Column: # type: ignore
39
+ return self.binary_op(exp.EQ, other)
40
+
41
+ def __ne__(self, other: ColumnOrLiteral) -> Column: # type: ignore
42
+ return self.binary_op(exp.NEQ, other)
43
+
44
+ def __gt__(self, other: ColumnOrLiteral) -> Column:
45
+ return self.binary_op(exp.GT, other)
46
+
47
+ def __ge__(self, other: ColumnOrLiteral) -> Column:
48
+ return self.binary_op(exp.GTE, other)
49
+
50
+ def __lt__(self, other: ColumnOrLiteral) -> Column:
51
+ return self.binary_op(exp.LT, other)
52
+
53
+ def __le__(self, other: ColumnOrLiteral) -> Column:
54
+ return self.binary_op(exp.LTE, other)
55
+
56
+ def __and__(self, other: ColumnOrLiteral) -> Column:
57
+ return self.binary_op(exp.And, other)
58
+
59
+ def __or__(self, other: ColumnOrLiteral) -> Column:
60
+ return self.binary_op(exp.Or, other)
61
+
62
+ def __mod__(self, other: ColumnOrLiteral) -> Column:
63
+ return self.binary_op(exp.Mod, other)
64
+
65
+ def __add__(self, other: ColumnOrLiteral) -> Column:
66
+ return self.binary_op(exp.Add, other)
67
+
68
+ def __sub__(self, other: ColumnOrLiteral) -> Column:
69
+ return self.binary_op(exp.Sub, other)
70
+
71
+ def __mul__(self, other: ColumnOrLiteral) -> Column:
72
+ return self.binary_op(exp.Mul, other)
73
+
74
+ def __truediv__(self, other: ColumnOrLiteral) -> Column:
75
+ return self.binary_op(exp.Div, other)
76
+
77
+ def __div__(self, other: ColumnOrLiteral) -> Column:
78
+ return self.binary_op(exp.Div, other)
79
+
80
+ def __neg__(self) -> Column:
81
+ return self.unary_op(exp.Neg)
82
+
83
+ def __radd__(self, other: ColumnOrLiteral) -> Column:
84
+ return self.inverse_binary_op(exp.Add, other)
85
+
86
+ def __rsub__(self, other: ColumnOrLiteral) -> Column:
87
+ return self.inverse_binary_op(exp.Sub, other)
88
+
89
+ def __rmul__(self, other: ColumnOrLiteral) -> Column:
90
+ return self.inverse_binary_op(exp.Mul, other)
91
+
92
+ def __rdiv__(self, other: ColumnOrLiteral) -> Column:
93
+ return self.inverse_binary_op(exp.Div, other)
94
+
95
+ def __rtruediv__(self, other: ColumnOrLiteral) -> Column:
96
+ return self.inverse_binary_op(exp.Div, other)
97
+
98
+ def __rmod__(self, other: ColumnOrLiteral) -> Column:
99
+ return self.inverse_binary_op(exp.Mod, other)
100
+
101
+ def __pow__(self, power: ColumnOrLiteral, modulo=None):
102
+ return Column(exp.Pow(this=self.expression, expression=Column(power).expression))
103
+
104
+ def __rpow__(self, power: ColumnOrLiteral):
105
+ return Column(exp.Pow(this=Column(power).expression, expression=self.expression))
106
+
107
+ def __invert__(self):
108
+ return self.unary_op(exp.Not)
109
+
110
+ def __rand__(self, other: ColumnOrLiteral) -> Column:
111
+ return self.inverse_binary_op(exp.And, other)
112
+
113
+ def __ror__(self, other: ColumnOrLiteral) -> Column:
114
+ return self.inverse_binary_op(exp.Or, other)
115
+
116
+ @classmethod
117
+ def ensure_col(cls, value: t.Optional[t.Union[ColumnOrLiteral, exp.Expression]]) -> Column:
118
+ return cls(value)
119
+
120
+ @classmethod
121
+ def ensure_cols(cls, args: t.List[t.Union[ColumnOrLiteral, exp.Expression]]) -> t.List[Column]:
122
+ return [cls.ensure_col(x) if not isinstance(x, Column) else x for x in args]
123
+
124
+ @classmethod
125
+ def _lit(cls, value: ColumnOrLiteral) -> Column:
126
+ if isinstance(value, dict):
127
+ columns = [cls._lit(v).alias(k).expression for k, v in value.items()]
128
+ return cls(exp.Struct(expressions=columns))
129
+ return cls(exp.convert(value))
130
+
131
+ @classmethod
132
+ def invoke_anonymous_function(
133
+ cls, column: t.Optional[ColumnOrLiteral], func_name: str, *args: t.Optional[ColumnOrLiteral]
134
+ ) -> Column:
135
+ columns = [] if column is None else [cls.ensure_col(column)]
136
+ column_args = [cls.ensure_col(arg) for arg in args]
137
+ expressions = [x.expression for x in columns + column_args]
138
+ new_expression = exp.Anonymous(this=func_name.upper(), expressions=expressions)
139
+ return Column(new_expression)
140
+
141
+ @classmethod
142
+ def invoke_expression_over_column(
143
+ cls, column: t.Optional[ColumnOrLiteral], callable_expression: t.Callable, **kwargs
144
+ ) -> Column:
145
+ ensured_column = None if column is None else cls.ensure_col(column)
146
+ ensure_expression_values = {
147
+ k: [Column.ensure_col(x).expression for x in v]
148
+ if is_iterable(v)
149
+ else Column.ensure_col(v).expression
150
+ for k, v in kwargs.items()
151
+ if v is not None
152
+ }
153
+ new_expression = (
154
+ callable_expression(**ensure_expression_values)
155
+ if ensured_column is None
156
+ else callable_expression(
157
+ this=ensured_column.column_expression, **ensure_expression_values
158
+ )
159
+ )
160
+ return Column(new_expression)
161
+
162
+ def binary_op(self, klass: t.Callable, other: ColumnOrLiteral, **kwargs) -> Column:
163
+ return Column(
164
+ klass(this=self.column_expression, expression=Column(other).column_expression, **kwargs)
165
+ )
166
+
167
+ def inverse_binary_op(self, klass: t.Callable, other: ColumnOrLiteral, **kwargs) -> Column:
168
+ return Column(
169
+ klass(this=Column(other).column_expression, expression=self.column_expression, **kwargs)
170
+ )
171
+
172
+ def unary_op(self, klass: t.Callable, **kwargs) -> Column:
173
+ return Column(klass(this=self.column_expression, **kwargs))
174
+
175
+ @property
176
+ def is_alias(self):
177
+ return isinstance(self.expression, exp.Alias)
178
+
179
+ @property
180
+ def is_column(self):
181
+ return isinstance(self.expression, exp.Column)
182
+
183
+ @property
184
+ def column_expression(self) -> t.Union[exp.Column, exp.Literal]:
185
+ return self.expression.unalias()
186
+
187
+ @property
188
+ def alias_or_name(self) -> str:
189
+ return self.expression.alias_or_name
190
+
191
+ @classmethod
192
+ def ensure_literal(cls, value) -> Column:
193
+ from sqlglot.dataframe.sql.functions import lit
194
+
195
+ if isinstance(value, cls):
196
+ value = value.expression
197
+ if not isinstance(value, exp.Literal):
198
+ return lit(value)
199
+ return Column(value)
200
+
201
+ def copy(self) -> Column:
202
+ return Column(self.expression.copy())
203
+
204
+ def set_table_name(self, table_name: str, copy=False) -> Column:
205
+ expression = self.expression.copy() if copy else self.expression
206
+ expression.set("table", exp.to_identifier(table_name))
207
+ return Column(expression)
208
+
209
+ def sql(self, **kwargs) -> str:
210
+ from sqlglot.dataframe.sql.session import SparkSession
211
+
212
+ return self.expression.sql(**{"dialect": SparkSession().dialect, **kwargs})
213
+
214
+ def alias(self, name: str) -> Column:
215
+ new_expression = exp.alias_(self.column_expression, name)
216
+ return Column(new_expression)
217
+
218
+ def asc(self) -> Column:
219
+ new_expression = exp.Ordered(this=self.column_expression, desc=False, nulls_first=True)
220
+ return Column(new_expression)
221
+
222
+ def desc(self) -> Column:
223
+ new_expression = exp.Ordered(this=self.column_expression, desc=True, nulls_first=False)
224
+ return Column(new_expression)
225
+
226
+ asc_nulls_first = asc
227
+
228
+ def asc_nulls_last(self) -> Column:
229
+ new_expression = exp.Ordered(this=self.column_expression, desc=False, nulls_first=False)
230
+ return Column(new_expression)
231
+
232
+ def desc_nulls_first(self) -> Column:
233
+ new_expression = exp.Ordered(this=self.column_expression, desc=True, nulls_first=True)
234
+ return Column(new_expression)
235
+
236
+ desc_nulls_last = desc
237
+
238
+ def when(self, condition: Column, value: t.Any) -> Column:
239
+ from sqlglot.dataframe.sql.functions import when
240
+
241
+ column_with_if = when(condition, value)
242
+ if not isinstance(self.expression, exp.Case):
243
+ return column_with_if
244
+ new_column = self.copy()
245
+ new_column.expression.args["ifs"].extend(column_with_if.expression.args["ifs"])
246
+ return new_column
247
+
248
+ def otherwise(self, value: t.Any) -> Column:
249
+ from sqlglot.dataframe.sql.functions import lit
250
+
251
+ true_value = value if isinstance(value, Column) else lit(value)
252
+ new_column = self.copy()
253
+ new_column.expression.set("default", true_value.column_expression)
254
+ return new_column
255
+
256
+ def isNull(self) -> Column:
257
+ new_expression = exp.Is(this=self.column_expression, expression=exp.Null())
258
+ return Column(new_expression)
259
+
260
+ def isNotNull(self) -> Column:
261
+ new_expression = exp.Not(this=exp.Is(this=self.column_expression, expression=exp.Null()))
262
+ return Column(new_expression)
263
+
264
+ def cast(self, dataType: t.Union[str, DataType]) -> Column:
265
+ """
266
+ Functionality Difference: PySpark cast accepts a datatype instance of the datatype class
267
+ Sqlglot doesn't currently replicate this class so it only accepts a string
268
+ """
269
+ from sqlglot.dataframe.sql.session import SparkSession
270
+
271
+ if isinstance(dataType, DataType):
272
+ dataType = dataType.simpleString()
273
+ return Column(exp.cast(self.column_expression, dataType, dialect=SparkSession().dialect))
274
+
275
+ def startswith(self, value: t.Union[str, Column]) -> Column:
276
+ value = self._lit(value) if not isinstance(value, Column) else value
277
+ return self.invoke_anonymous_function(self, "STARTSWITH", value)
278
+
279
+ def endswith(self, value: t.Union[str, Column]) -> Column:
280
+ value = self._lit(value) if not isinstance(value, Column) else value
281
+ return self.invoke_anonymous_function(self, "ENDSWITH", value)
282
+
283
+ def rlike(self, regexp: str) -> Column:
284
+ return self.invoke_expression_over_column(
285
+ column=self, callable_expression=exp.RegexpLike, expression=self._lit(regexp).expression
286
+ )
287
+
288
+ def like(self, other: str):
289
+ return self.invoke_expression_over_column(
290
+ self, exp.Like, expression=self._lit(other).expression
291
+ )
292
+
293
+ def ilike(self, other: str):
294
+ return self.invoke_expression_over_column(
295
+ self, exp.ILike, expression=self._lit(other).expression
296
+ )
297
+
298
+ def substr(self, startPos: t.Union[int, Column], length: t.Union[int, Column]) -> Column:
299
+ startPos = self._lit(startPos) if not isinstance(startPos, Column) else startPos
300
+ length = self._lit(length) if not isinstance(length, Column) else length
301
+ return Column.invoke_expression_over_column(
302
+ self, exp.Substring, start=startPos.expression, length=length.expression
303
+ )
304
+
305
+ def isin(self, *cols: t.Union[ColumnOrLiteral, t.Iterable[ColumnOrLiteral]]):
306
+ columns = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols # type: ignore
307
+ expressions = [self._lit(x).expression for x in columns]
308
+ return Column.invoke_expression_over_column(self, exp.In, expressions=expressions) # type: ignore
309
+
310
+ def between(
311
+ self,
312
+ lowerBound: t.Union[ColumnOrLiteral],
313
+ upperBound: t.Union[ColumnOrLiteral],
314
+ ) -> Column:
315
+ lower_bound_exp = (
316
+ self._lit(lowerBound) if not isinstance(lowerBound, Column) else lowerBound
317
+ )
318
+ upper_bound_exp = (
319
+ self._lit(upperBound) if not isinstance(upperBound, Column) else upperBound
320
+ )
321
+ return Column(
322
+ exp.Between(
323
+ this=self.column_expression,
324
+ low=lower_bound_exp.expression,
325
+ high=upper_bound_exp.expression,
326
+ )
327
+ )
328
+
329
+ def over(self, window: WindowSpec) -> Column:
330
+ window_expression = window.expression.copy()
331
+ window_expression.set("this", self.column_expression)
332
+ return Column(window_expression)