@altimateai/altimate-code 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/bin/altimate +6 -0
- package/bin/altimate-code +6 -0
- package/dbt-tools/bin/altimate-dbt +2 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
- package/dbt-tools/dist/index.js +23859 -0
- package/package.json +13 -13
- package/postinstall.mjs +42 -0
- package/skills/altimate-setup/SKILL.md +31 -0
package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py
ADDED
|
@@ -0,0 +1,1267 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlglot import exp as expression
|
|
6
|
+
from sqlglot.dataframe.sql.column import Column
|
|
7
|
+
from sqlglot.helper import ensure_list, flatten as _flatten
|
|
8
|
+
|
|
9
|
+
if t.TYPE_CHECKING:
|
|
10
|
+
from sqlglot.dataframe.sql._typing import ColumnOrLiteral, ColumnOrName
|
|
11
|
+
from sqlglot.dataframe.sql.dataframe import DataFrame
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
|
|
15
|
+
return Column(column_name)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def lit(value: t.Optional[t.Any] = None) -> Column:
|
|
19
|
+
if isinstance(value, str):
|
|
20
|
+
return Column(expression.Literal.string(str(value)))
|
|
21
|
+
return Column(value)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def greatest(*cols: ColumnOrName) -> Column:
|
|
25
|
+
if len(cols) > 1:
|
|
26
|
+
return Column.invoke_expression_over_column(
|
|
27
|
+
cols[0], expression.Greatest, expressions=cols[1:]
|
|
28
|
+
)
|
|
29
|
+
return Column.invoke_expression_over_column(cols[0], expression.Greatest)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def least(*cols: ColumnOrName) -> Column:
|
|
33
|
+
if len(cols) > 1:
|
|
34
|
+
return Column.invoke_expression_over_column(cols[0], expression.Least, expressions=cols[1:])
|
|
35
|
+
return Column.invoke_expression_over_column(cols[0], expression.Least)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def count_distinct(col: ColumnOrName, *cols: ColumnOrName) -> Column:
|
|
39
|
+
columns = [Column.ensure_col(x) for x in [col] + list(cols)]
|
|
40
|
+
return Column(
|
|
41
|
+
expression.Count(this=expression.Distinct(expressions=[x.expression for x in columns]))
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def countDistinct(col: ColumnOrName, *cols: ColumnOrName) -> Column:
|
|
46
|
+
return count_distinct(col, *cols)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def when(condition: Column, value: t.Any) -> Column:
|
|
50
|
+
true_value = value if isinstance(value, Column) else lit(value)
|
|
51
|
+
return Column(
|
|
52
|
+
expression.Case(
|
|
53
|
+
ifs=[expression.If(this=condition.column_expression, true=true_value.column_expression)]
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def asc(col: ColumnOrName) -> Column:
|
|
59
|
+
return Column.ensure_col(col).asc()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def desc(col: ColumnOrName):
|
|
63
|
+
return Column.ensure_col(col).desc()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def broadcast(df: DataFrame) -> DataFrame:
|
|
67
|
+
return df.hint("broadcast")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def sqrt(col: ColumnOrName) -> Column:
|
|
71
|
+
return Column.invoke_expression_over_column(col, expression.Sqrt)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def abs(col: ColumnOrName) -> Column:
|
|
75
|
+
return Column.invoke_expression_over_column(col, expression.Abs)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def max(col: ColumnOrName) -> Column:
|
|
79
|
+
return Column.invoke_expression_over_column(col, expression.Max)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def min(col: ColumnOrName) -> Column:
|
|
83
|
+
return Column.invoke_expression_over_column(col, expression.Min)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def max_by(col: ColumnOrName, ord: ColumnOrName) -> Column:
|
|
87
|
+
return Column.invoke_anonymous_function(col, "MAX_BY", ord)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def min_by(col: ColumnOrName, ord: ColumnOrName) -> Column:
|
|
91
|
+
return Column.invoke_anonymous_function(col, "MIN_BY", ord)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def count(col: ColumnOrName) -> Column:
|
|
95
|
+
return Column.invoke_expression_over_column(col, expression.Count)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def sum(col: ColumnOrName) -> Column:
|
|
99
|
+
return Column.invoke_expression_over_column(col, expression.Sum)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def avg(col: ColumnOrName) -> Column:
|
|
103
|
+
return Column.invoke_expression_over_column(col, expression.Avg)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def mean(col: ColumnOrName) -> Column:
|
|
107
|
+
return Column.invoke_anonymous_function(col, "MEAN")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def sumDistinct(col: ColumnOrName) -> Column:
|
|
111
|
+
return sum_distinct(col)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def sum_distinct(col: ColumnOrName) -> Column:
|
|
115
|
+
raise NotImplementedError("Sum distinct is not currently implemented")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def product(col: ColumnOrName) -> Column:
|
|
119
|
+
raise NotImplementedError("Product is not currently implemented")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def acos(col: ColumnOrName) -> Column:
|
|
123
|
+
return Column.invoke_anonymous_function(col, "ACOS")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def acosh(col: ColumnOrName) -> Column:
|
|
127
|
+
return Column.invoke_anonymous_function(col, "ACOSH")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def asin(col: ColumnOrName) -> Column:
|
|
131
|
+
return Column.invoke_anonymous_function(col, "ASIN")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def asinh(col: ColumnOrName) -> Column:
|
|
135
|
+
return Column.invoke_anonymous_function(col, "ASINH")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def atan(col: ColumnOrName) -> Column:
|
|
139
|
+
return Column.invoke_anonymous_function(col, "ATAN")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def atan2(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
|
|
143
|
+
return Column.invoke_anonymous_function(col1, "ATAN2", col2)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def atanh(col: ColumnOrName) -> Column:
|
|
147
|
+
return Column.invoke_anonymous_function(col, "ATANH")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def cbrt(col: ColumnOrName) -> Column:
|
|
151
|
+
return Column.invoke_anonymous_function(col, "CBRT")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def ceil(col: ColumnOrName) -> Column:
|
|
155
|
+
return Column.invoke_expression_over_column(col, expression.Ceil)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def cos(col: ColumnOrName) -> Column:
|
|
159
|
+
return Column.invoke_anonymous_function(col, "COS")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def cosh(col: ColumnOrName) -> Column:
|
|
163
|
+
return Column.invoke_anonymous_function(col, "COSH")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def cot(col: ColumnOrName) -> Column:
|
|
167
|
+
return Column.invoke_anonymous_function(col, "COT")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def csc(col: ColumnOrName) -> Column:
|
|
171
|
+
return Column.invoke_anonymous_function(col, "CSC")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def exp(col: ColumnOrName) -> Column:
|
|
175
|
+
return Column.invoke_expression_over_column(col, expression.Exp)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def expm1(col: ColumnOrName) -> Column:
|
|
179
|
+
return Column.invoke_anonymous_function(col, "EXPM1")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def floor(col: ColumnOrName) -> Column:
|
|
183
|
+
return Column.invoke_expression_over_column(col, expression.Floor)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def log10(col: ColumnOrName) -> Column:
|
|
187
|
+
return Column.invoke_expression_over_column(col, expression.Log10)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def log1p(col: ColumnOrName) -> Column:
|
|
191
|
+
return Column.invoke_anonymous_function(col, "LOG1P")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def log2(col: ColumnOrName) -> Column:
|
|
195
|
+
return Column.invoke_expression_over_column(col, expression.Log2)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def log(arg1: t.Union[ColumnOrName, float], arg2: t.Optional[ColumnOrName] = None) -> Column:
|
|
199
|
+
if arg2 is None:
|
|
200
|
+
return Column.invoke_expression_over_column(arg1, expression.Ln)
|
|
201
|
+
return Column.invoke_expression_over_column(arg1, expression.Log, expression=arg2)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def rint(col: ColumnOrName) -> Column:
|
|
205
|
+
return Column.invoke_anonymous_function(col, "RINT")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def sec(col: ColumnOrName) -> Column:
|
|
209
|
+
return Column.invoke_anonymous_function(col, "SEC")
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def signum(col: ColumnOrName) -> Column:
|
|
213
|
+
return Column.invoke_anonymous_function(col, "SIGNUM")
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def sin(col: ColumnOrName) -> Column:
|
|
217
|
+
return Column.invoke_anonymous_function(col, "SIN")
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def sinh(col: ColumnOrName) -> Column:
|
|
221
|
+
return Column.invoke_anonymous_function(col, "SINH")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def tan(col: ColumnOrName) -> Column:
|
|
225
|
+
return Column.invoke_anonymous_function(col, "TAN")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def tanh(col: ColumnOrName) -> Column:
|
|
229
|
+
return Column.invoke_anonymous_function(col, "TANH")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def toDegrees(col: ColumnOrName) -> Column:
|
|
233
|
+
return degrees(col)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def degrees(col: ColumnOrName) -> Column:
|
|
237
|
+
return Column.invoke_anonymous_function(col, "DEGREES")
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def toRadians(col: ColumnOrName) -> Column:
|
|
241
|
+
return radians(col)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def radians(col: ColumnOrName) -> Column:
|
|
245
|
+
return Column.invoke_anonymous_function(col, "RADIANS")
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def bitwiseNOT(col: ColumnOrName) -> Column:
|
|
249
|
+
return bitwise_not(col)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def bitwise_not(col: ColumnOrName) -> Column:
|
|
253
|
+
return Column.invoke_expression_over_column(col, expression.BitwiseNot)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def asc_nulls_first(col: ColumnOrName) -> Column:
|
|
257
|
+
return Column.ensure_col(col).asc_nulls_first()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def asc_nulls_last(col: ColumnOrName) -> Column:
|
|
261
|
+
return Column.ensure_col(col).asc_nulls_last()
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def desc_nulls_first(col: ColumnOrName) -> Column:
|
|
265
|
+
return Column.ensure_col(col).desc_nulls_first()
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def desc_nulls_last(col: ColumnOrName) -> Column:
|
|
269
|
+
return Column.ensure_col(col).desc_nulls_last()
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def stddev(col: ColumnOrName) -> Column:
|
|
273
|
+
return Column.invoke_expression_over_column(col, expression.Stddev)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def stddev_samp(col: ColumnOrName) -> Column:
|
|
277
|
+
return Column.invoke_expression_over_column(col, expression.StddevSamp)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def stddev_pop(col: ColumnOrName) -> Column:
|
|
281
|
+
return Column.invoke_expression_over_column(col, expression.StddevPop)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def variance(col: ColumnOrName) -> Column:
|
|
285
|
+
return Column.invoke_expression_over_column(col, expression.Variance)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def var_samp(col: ColumnOrName) -> Column:
|
|
289
|
+
return Column.invoke_expression_over_column(col, expression.Variance)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def var_pop(col: ColumnOrName) -> Column:
|
|
293
|
+
return Column.invoke_expression_over_column(col, expression.VariancePop)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def skewness(col: ColumnOrName) -> Column:
|
|
297
|
+
return Column.invoke_anonymous_function(col, "SKEWNESS")
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def kurtosis(col: ColumnOrName) -> Column:
|
|
301
|
+
return Column.invoke_anonymous_function(col, "KURTOSIS")
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def collect_list(col: ColumnOrName) -> Column:
|
|
305
|
+
return Column.invoke_expression_over_column(col, expression.ArrayAgg)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def collect_set(col: ColumnOrName) -> Column:
|
|
309
|
+
return Column.invoke_expression_over_column(col, expression.SetAgg)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def hypot(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
|
|
313
|
+
return Column.invoke_anonymous_function(col1, "HYPOT", col2)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def pow(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
|
|
317
|
+
return Column.invoke_expression_over_column(col1, expression.Pow, expression=col2)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def row_number() -> Column:
|
|
321
|
+
return Column(expression.Anonymous(this="ROW_NUMBER"))
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def dense_rank() -> Column:
|
|
325
|
+
return Column(expression.Anonymous(this="DENSE_RANK"))
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def rank() -> Column:
|
|
329
|
+
return Column(expression.Anonymous(this="RANK"))
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def cume_dist() -> Column:
|
|
333
|
+
return Column(expression.Anonymous(this="CUME_DIST"))
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def percent_rank() -> Column:
|
|
337
|
+
return Column(expression.Anonymous(this="PERCENT_RANK"))
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def approxCountDistinct(col: ColumnOrName, rsd: t.Optional[float] = None) -> Column:
|
|
341
|
+
return approx_count_distinct(col, rsd)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def approx_count_distinct(col: ColumnOrName, rsd: t.Optional[float] = None) -> Column:
|
|
345
|
+
if rsd is None:
|
|
346
|
+
return Column.invoke_expression_over_column(col, expression.ApproxDistinct)
|
|
347
|
+
return Column.invoke_expression_over_column(col, expression.ApproxDistinct, accuracy=rsd)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def coalesce(*cols: ColumnOrName) -> Column:
|
|
351
|
+
if len(cols) > 1:
|
|
352
|
+
return Column.invoke_expression_over_column(
|
|
353
|
+
cols[0], expression.Coalesce, expressions=cols[1:]
|
|
354
|
+
)
|
|
355
|
+
return Column.invoke_expression_over_column(cols[0], expression.Coalesce)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def corr(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
359
|
+
return Column.invoke_anonymous_function(col1, "CORR", col2)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def covar_pop(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
363
|
+
return Column.invoke_anonymous_function(col1, "COVAR_POP", col2)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
367
|
+
return Column.invoke_anonymous_function(col1, "COVAR_SAMP", col2)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def first(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
|
|
371
|
+
return Column.invoke_expression_over_column(col, expression.First, ignore_nulls=ignorenulls)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def grouping_id(*cols: ColumnOrName) -> Column:
|
|
375
|
+
if not cols:
|
|
376
|
+
return Column.invoke_anonymous_function(None, "GROUPING_ID")
|
|
377
|
+
if len(cols) == 1:
|
|
378
|
+
return Column.invoke_anonymous_function(cols[0], "GROUPING_ID")
|
|
379
|
+
return Column.invoke_anonymous_function(cols[0], "GROUPING_ID", *cols[1:])
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def input_file_name() -> Column:
|
|
383
|
+
return Column.invoke_anonymous_function(None, "INPUT_FILE_NAME")
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def isnan(col: ColumnOrName) -> Column:
|
|
387
|
+
return Column.invoke_expression_over_column(col, expression.IsNan)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def isnull(col: ColumnOrName) -> Column:
|
|
391
|
+
return Column.invoke_anonymous_function(col, "ISNULL")
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def last(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
|
|
395
|
+
return Column.invoke_expression_over_column(col, expression.Last, ignore_nulls=ignorenulls)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def monotonically_increasing_id() -> Column:
|
|
399
|
+
return Column.invoke_anonymous_function(None, "MONOTONICALLY_INCREASING_ID")
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def nanvl(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
403
|
+
return Column.invoke_anonymous_function(col1, "NANVL", col2)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def percentile_approx(
|
|
407
|
+
col: ColumnOrName,
|
|
408
|
+
percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]],
|
|
409
|
+
accuracy: t.Optional[t.Union[ColumnOrLiteral, int]] = None,
|
|
410
|
+
) -> Column:
|
|
411
|
+
if accuracy:
|
|
412
|
+
return Column.invoke_expression_over_column(
|
|
413
|
+
col, expression.ApproxQuantile, quantile=lit(percentage), accuracy=accuracy
|
|
414
|
+
)
|
|
415
|
+
return Column.invoke_expression_over_column(
|
|
416
|
+
col, expression.ApproxQuantile, quantile=lit(percentage)
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def rand(seed: t.Optional[ColumnOrLiteral] = None) -> Column:
|
|
421
|
+
return Column.invoke_anonymous_function(seed, "RAND")
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def randn(seed: t.Optional[ColumnOrLiteral] = None) -> Column:
|
|
425
|
+
return Column.invoke_anonymous_function(seed, "RANDN")
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def round(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
|
|
429
|
+
if scale is not None:
|
|
430
|
+
return Column.invoke_expression_over_column(col, expression.Round, decimals=scale)
|
|
431
|
+
return Column.invoke_expression_over_column(col, expression.Round)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def bround(col: ColumnOrName, scale: t.Optional[int] = None) -> Column:
|
|
435
|
+
if scale is not None:
|
|
436
|
+
return Column.invoke_anonymous_function(col, "BROUND", scale)
|
|
437
|
+
return Column.invoke_anonymous_function(col, "BROUND")
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def shiftleft(col: ColumnOrName, numBits: int) -> Column:
|
|
441
|
+
return Column.invoke_expression_over_column(
|
|
442
|
+
col, expression.BitwiseLeftShift, expression=numBits
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def shiftLeft(col: ColumnOrName, numBits: int) -> Column:
|
|
447
|
+
return shiftleft(col, numBits)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def shiftright(col: ColumnOrName, numBits: int) -> Column:
|
|
451
|
+
return Column.invoke_expression_over_column(
|
|
452
|
+
col, expression.BitwiseRightShift, expression=numBits
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def shiftRight(col: ColumnOrName, numBits: int) -> Column:
|
|
457
|
+
return shiftright(col, numBits)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def shiftrightunsigned(col: ColumnOrName, numBits: int) -> Column:
|
|
461
|
+
return Column.invoke_anonymous_function(col, "SHIFTRIGHTUNSIGNED", numBits)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def shiftRightUnsigned(col: ColumnOrName, numBits: int) -> Column:
|
|
465
|
+
return shiftrightunsigned(col, numBits)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def expr(str: str) -> Column:
|
|
469
|
+
return Column(str)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def struct(col: t.Union[ColumnOrName, t.Iterable[ColumnOrName]], *cols: ColumnOrName) -> Column:
|
|
473
|
+
columns = ensure_list(col) + list(cols)
|
|
474
|
+
return Column.invoke_expression_over_column(None, expression.Struct, expressions=columns)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def conv(col: ColumnOrName, fromBase: int, toBase: int) -> Column:
|
|
478
|
+
return Column.invoke_anonymous_function(col, "CONV", fromBase, toBase)
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def factorial(col: ColumnOrName) -> Column:
|
|
482
|
+
return Column.invoke_anonymous_function(col, "FACTORIAL")
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def lag(
|
|
486
|
+
col: ColumnOrName, offset: t.Optional[int] = 1, default: t.Optional[ColumnOrLiteral] = None
|
|
487
|
+
) -> Column:
|
|
488
|
+
if default is not None:
|
|
489
|
+
return Column.invoke_anonymous_function(col, "LAG", offset, default)
|
|
490
|
+
if offset != 1:
|
|
491
|
+
return Column.invoke_anonymous_function(col, "LAG", offset)
|
|
492
|
+
return Column.invoke_anonymous_function(col, "LAG")
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def lead(
|
|
496
|
+
col: ColumnOrName, offset: t.Optional[int] = 1, default: t.Optional[t.Any] = None
|
|
497
|
+
) -> Column:
|
|
498
|
+
if default is not None:
|
|
499
|
+
return Column.invoke_anonymous_function(col, "LEAD", offset, default)
|
|
500
|
+
if offset != 1:
|
|
501
|
+
return Column.invoke_anonymous_function(col, "LEAD", offset)
|
|
502
|
+
return Column.invoke_anonymous_function(col, "LEAD")
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def nth_value(
|
|
506
|
+
col: ColumnOrName, offset: t.Optional[int] = 1, ignoreNulls: t.Optional[bool] = None
|
|
507
|
+
) -> Column:
|
|
508
|
+
if ignoreNulls is not None:
|
|
509
|
+
raise NotImplementedError("There is currently not support for `ignoreNulls` parameter")
|
|
510
|
+
if offset != 1:
|
|
511
|
+
return Column.invoke_anonymous_function(col, "NTH_VALUE", offset)
|
|
512
|
+
return Column.invoke_anonymous_function(col, "NTH_VALUE")
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def ntile(n: int) -> Column:
|
|
516
|
+
return Column.invoke_anonymous_function(None, "NTILE", n)
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def current_date() -> Column:
|
|
520
|
+
return Column.invoke_expression_over_column(None, expression.CurrentDate)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def current_timestamp() -> Column:
|
|
524
|
+
return Column.invoke_expression_over_column(None, expression.CurrentTimestamp)
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def date_format(col: ColumnOrName, format: str) -> Column:
|
|
528
|
+
return Column.invoke_expression_over_column(col, expression.TimeToStr, format=lit(format))
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def year(col: ColumnOrName) -> Column:
|
|
532
|
+
return Column.invoke_expression_over_column(col, expression.Year)
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def quarter(col: ColumnOrName) -> Column:
|
|
536
|
+
return Column.invoke_anonymous_function(col, "QUARTER")
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def month(col: ColumnOrName) -> Column:
|
|
540
|
+
return Column.invoke_expression_over_column(col, expression.Month)
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
def dayofweek(col: ColumnOrName) -> Column:
|
|
544
|
+
return Column.invoke_expression_over_column(col, expression.DayOfWeek)
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def dayofmonth(col: ColumnOrName) -> Column:
|
|
548
|
+
return Column.invoke_expression_over_column(col, expression.DayOfMonth)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def dayofyear(col: ColumnOrName) -> Column:
|
|
552
|
+
return Column.invoke_expression_over_column(col, expression.DayOfYear)
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def hour(col: ColumnOrName) -> Column:
|
|
556
|
+
return Column.invoke_anonymous_function(col, "HOUR")
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def minute(col: ColumnOrName) -> Column:
|
|
560
|
+
return Column.invoke_anonymous_function(col, "MINUTE")
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def second(col: ColumnOrName) -> Column:
|
|
564
|
+
return Column.invoke_anonymous_function(col, "SECOND")
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
def weekofyear(col: ColumnOrName) -> Column:
|
|
568
|
+
return Column.invoke_expression_over_column(col, expression.WeekOfYear)
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def make_date(year: ColumnOrName, month: ColumnOrName, day: ColumnOrName) -> Column:
|
|
572
|
+
return Column.invoke_anonymous_function(year, "MAKE_DATE", month, day)
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def date_add(col: ColumnOrName, days: t.Union[ColumnOrName, int]) -> Column:
|
|
576
|
+
return Column.invoke_expression_over_column(
|
|
577
|
+
col, expression.DateAdd, expression=days, unit=expression.Var(this="day")
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def date_sub(col: ColumnOrName, days: t.Union[ColumnOrName, int]) -> Column:
|
|
582
|
+
return Column.invoke_expression_over_column(
|
|
583
|
+
col, expression.DateSub, expression=days, unit=expression.Var(this="day")
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def date_diff(end: ColumnOrName, start: ColumnOrName) -> Column:
|
|
588
|
+
return Column.invoke_expression_over_column(end, expression.DateDiff, expression=start)
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def add_months(start: ColumnOrName, months: t.Union[ColumnOrName, int]) -> Column:
|
|
592
|
+
return Column.invoke_anonymous_function(start, "ADD_MONTHS", months)
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def months_between(
|
|
596
|
+
date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None
|
|
597
|
+
) -> Column:
|
|
598
|
+
if roundOff is None:
|
|
599
|
+
return Column.invoke_expression_over_column(
|
|
600
|
+
date1, expression.MonthsBetween, expression=date2
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
return Column.invoke_expression_over_column(
|
|
604
|
+
date1, expression.MonthsBetween, expression=date2, roundoff=roundOff
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def to_date(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
609
|
+
if format is not None:
|
|
610
|
+
return Column.invoke_expression_over_column(
|
|
611
|
+
col, expression.TsOrDsToDate, format=lit(format)
|
|
612
|
+
)
|
|
613
|
+
return Column.invoke_expression_over_column(col, expression.TsOrDsToDate)
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
617
|
+
if format is not None:
|
|
618
|
+
return Column.invoke_expression_over_column(col, expression.StrToTime, format=lit(format))
|
|
619
|
+
|
|
620
|
+
return Column.ensure_col(col).cast("timestamp")
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def trunc(col: ColumnOrName, format: str) -> Column:
|
|
624
|
+
return Column.invoke_expression_over_column(col, expression.DateTrunc, unit=lit(format))
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
def date_trunc(format: str, timestamp: ColumnOrName) -> Column:
|
|
628
|
+
return Column.invoke_expression_over_column(
|
|
629
|
+
timestamp, expression.TimestampTrunc, unit=lit(format)
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def next_day(col: ColumnOrName, dayOfWeek: str) -> Column:
|
|
634
|
+
return Column.invoke_anonymous_function(col, "NEXT_DAY", lit(dayOfWeek))
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def last_day(col: ColumnOrName) -> Column:
|
|
638
|
+
return Column.invoke_anonymous_function(col, "LAST_DAY")
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column:
|
|
642
|
+
if format is not None:
|
|
643
|
+
return Column.invoke_expression_over_column(col, expression.UnixToStr, format=lit(format))
|
|
644
|
+
return Column.invoke_expression_over_column(col, expression.UnixToStr)
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def unix_timestamp(
|
|
648
|
+
timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None
|
|
649
|
+
) -> Column:
|
|
650
|
+
if format is not None:
|
|
651
|
+
return Column.invoke_expression_over_column(
|
|
652
|
+
timestamp, expression.StrToUnix, format=lit(format)
|
|
653
|
+
)
|
|
654
|
+
return Column.invoke_expression_over_column(timestamp, expression.StrToUnix)
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def from_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column:
|
|
658
|
+
tz_column = tz if isinstance(tz, Column) else lit(tz)
|
|
659
|
+
return Column.invoke_anonymous_function(timestamp, "FROM_UTC_TIMESTAMP", tz_column)
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def to_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column:
|
|
663
|
+
tz_column = tz if isinstance(tz, Column) else lit(tz)
|
|
664
|
+
return Column.invoke_anonymous_function(timestamp, "TO_UTC_TIMESTAMP", tz_column)
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def timestamp_seconds(col: ColumnOrName) -> Column:
|
|
668
|
+
return Column.invoke_anonymous_function(col, "TIMESTAMP_SECONDS")
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def window(
|
|
672
|
+
timeColumn: ColumnOrName,
|
|
673
|
+
windowDuration: str,
|
|
674
|
+
slideDuration: t.Optional[str] = None,
|
|
675
|
+
startTime: t.Optional[str] = None,
|
|
676
|
+
) -> Column:
|
|
677
|
+
if slideDuration is not None and startTime is not None:
|
|
678
|
+
return Column.invoke_anonymous_function(
|
|
679
|
+
timeColumn, "WINDOW", lit(windowDuration), lit(slideDuration), lit(startTime)
|
|
680
|
+
)
|
|
681
|
+
if slideDuration is not None:
|
|
682
|
+
return Column.invoke_anonymous_function(
|
|
683
|
+
timeColumn, "WINDOW", lit(windowDuration), lit(slideDuration)
|
|
684
|
+
)
|
|
685
|
+
if startTime is not None:
|
|
686
|
+
return Column.invoke_anonymous_function(
|
|
687
|
+
timeColumn, "WINDOW", lit(windowDuration), lit(windowDuration), lit(startTime)
|
|
688
|
+
)
|
|
689
|
+
return Column.invoke_anonymous_function(timeColumn, "WINDOW", lit(windowDuration))
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def session_window(timeColumn: ColumnOrName, gapDuration: ColumnOrName) -> Column:
|
|
693
|
+
gap_duration_column = gapDuration if isinstance(gapDuration, Column) else lit(gapDuration)
|
|
694
|
+
return Column.invoke_anonymous_function(timeColumn, "SESSION_WINDOW", gap_duration_column)
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def crc32(col: ColumnOrName) -> Column:
|
|
698
|
+
column = col if isinstance(col, Column) else lit(col)
|
|
699
|
+
return Column.invoke_anonymous_function(column, "CRC32")
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
def md5(col: ColumnOrName) -> Column:
|
|
703
|
+
column = col if isinstance(col, Column) else lit(col)
|
|
704
|
+
return Column.invoke_expression_over_column(column, expression.MD5)
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def sha1(col: ColumnOrName) -> Column:
|
|
708
|
+
column = col if isinstance(col, Column) else lit(col)
|
|
709
|
+
return Column.invoke_expression_over_column(column, expression.SHA)
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def sha2(col: ColumnOrName, numBits: int) -> Column:
|
|
713
|
+
column = col if isinstance(col, Column) else lit(col)
|
|
714
|
+
return Column.invoke_expression_over_column(column, expression.SHA2, length=lit(numBits))
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def hash(*cols: ColumnOrName) -> Column:
|
|
718
|
+
args = cols[1:] if len(cols) > 1 else []
|
|
719
|
+
return Column.invoke_anonymous_function(cols[0], "HASH", *args)
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
def xxhash64(*cols: ColumnOrName) -> Column:
|
|
723
|
+
args = cols[1:] if len(cols) > 1 else []
|
|
724
|
+
return Column.invoke_anonymous_function(cols[0], "XXHASH64", *args)
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
def assert_true(col: ColumnOrName, errorMsg: t.Optional[ColumnOrName] = None) -> Column:
|
|
728
|
+
if errorMsg is not None:
|
|
729
|
+
error_msg_col = errorMsg if isinstance(errorMsg, Column) else lit(errorMsg)
|
|
730
|
+
return Column.invoke_anonymous_function(col, "ASSERT_TRUE", error_msg_col)
|
|
731
|
+
return Column.invoke_anonymous_function(col, "ASSERT_TRUE")
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def raise_error(errorMsg: ColumnOrName) -> Column:
|
|
735
|
+
error_msg_col = errorMsg if isinstance(errorMsg, Column) else lit(errorMsg)
|
|
736
|
+
return Column.invoke_anonymous_function(error_msg_col, "RAISE_ERROR")
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
def upper(col: ColumnOrName) -> Column:
|
|
740
|
+
return Column.invoke_expression_over_column(col, expression.Upper)
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def lower(col: ColumnOrName) -> Column:
|
|
744
|
+
return Column.invoke_expression_over_column(col, expression.Lower)
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def ascii(col: ColumnOrLiteral) -> Column:
|
|
748
|
+
return Column.invoke_anonymous_function(col, "ASCII")
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
def base64(col: ColumnOrLiteral) -> Column:
|
|
752
|
+
return Column.invoke_expression_over_column(col, expression.ToBase64)
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def unbase64(col: ColumnOrLiteral) -> Column:
|
|
756
|
+
return Column.invoke_expression_over_column(col, expression.FromBase64)
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def ltrim(col: ColumnOrName) -> Column:
|
|
760
|
+
return Column.invoke_anonymous_function(col, "LTRIM")
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def rtrim(col: ColumnOrName) -> Column:
|
|
764
|
+
return Column.invoke_anonymous_function(col, "RTRIM")
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
def trim(col: ColumnOrName) -> Column:
|
|
768
|
+
return Column.invoke_expression_over_column(col, expression.Trim)
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
def concat_ws(sep: str, *cols: ColumnOrName) -> Column:
|
|
772
|
+
return Column.invoke_expression_over_column(
|
|
773
|
+
None, expression.ConcatWs, expressions=[lit(sep)] + list(cols)
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
def decode(col: ColumnOrName, charset: str) -> Column:
|
|
778
|
+
return Column.invoke_expression_over_column(
|
|
779
|
+
col, expression.Decode, charset=expression.Literal.string(charset)
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def encode(col: ColumnOrName, charset: str) -> Column:
|
|
784
|
+
return Column.invoke_expression_over_column(
|
|
785
|
+
col, expression.Encode, charset=expression.Literal.string(charset)
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def format_number(col: ColumnOrName, d: int) -> Column:
|
|
790
|
+
return Column.invoke_anonymous_function(col, "FORMAT_NUMBER", lit(d))
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def format_string(format: str, *cols: ColumnOrName) -> Column:
|
|
794
|
+
format_col = lit(format)
|
|
795
|
+
columns = [Column.ensure_col(x) for x in cols]
|
|
796
|
+
return Column.invoke_anonymous_function(format_col, "FORMAT_STRING", *columns)
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
def instr(col: ColumnOrName, substr: str) -> Column:
|
|
800
|
+
return Column.invoke_anonymous_function(col, "INSTR", lit(substr))
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
def overlay(
|
|
804
|
+
src: ColumnOrName,
|
|
805
|
+
replace: ColumnOrName,
|
|
806
|
+
pos: t.Union[ColumnOrName, int],
|
|
807
|
+
len: t.Optional[t.Union[ColumnOrName, int]] = None,
|
|
808
|
+
) -> Column:
|
|
809
|
+
if len is not None:
|
|
810
|
+
return Column.invoke_anonymous_function(src, "OVERLAY", replace, pos, len)
|
|
811
|
+
return Column.invoke_anonymous_function(src, "OVERLAY", replace, pos)
|
|
812
|
+
|
|
813
|
+
|
|
814
|
+
def sentences(
|
|
815
|
+
string: ColumnOrName,
|
|
816
|
+
language: t.Optional[ColumnOrName] = None,
|
|
817
|
+
country: t.Optional[ColumnOrName] = None,
|
|
818
|
+
) -> Column:
|
|
819
|
+
if language is not None and country is not None:
|
|
820
|
+
return Column.invoke_anonymous_function(string, "SENTENCES", language, country)
|
|
821
|
+
if language is not None:
|
|
822
|
+
return Column.invoke_anonymous_function(string, "SENTENCES", language)
|
|
823
|
+
if country is not None:
|
|
824
|
+
return Column.invoke_anonymous_function(string, "SENTENCES", lit("en"), country)
|
|
825
|
+
return Column.invoke_anonymous_function(string, "SENTENCES")
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def substring(str: ColumnOrName, pos: int, len: int) -> Column:
|
|
829
|
+
return Column.ensure_col(str).substr(pos, len)
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
def substring_index(str: ColumnOrName, delim: str, count: int) -> Column:
|
|
833
|
+
return Column.invoke_anonymous_function(str, "SUBSTRING_INDEX", lit(delim), lit(count))
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
def levenshtein(left: ColumnOrName, right: ColumnOrName) -> Column:
|
|
837
|
+
return Column.invoke_expression_over_column(left, expression.Levenshtein, expression=right)
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
def locate(substr: str, str: ColumnOrName, pos: t.Optional[int] = None) -> Column:
|
|
841
|
+
substr_col = lit(substr)
|
|
842
|
+
if pos is not None:
|
|
843
|
+
return Column.invoke_expression_over_column(
|
|
844
|
+
str, expression.StrPosition, substr=substr_col, position=pos
|
|
845
|
+
)
|
|
846
|
+
return Column.invoke_expression_over_column(str, expression.StrPosition, substr=substr_col)
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
def lpad(col: ColumnOrName, len: int, pad: str) -> Column:
|
|
850
|
+
return Column.invoke_anonymous_function(col, "LPAD", lit(len), lit(pad))
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
def rpad(col: ColumnOrName, len: int, pad: str) -> Column:
|
|
854
|
+
return Column.invoke_anonymous_function(col, "RPAD", lit(len), lit(pad))
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
def repeat(col: ColumnOrName, n: int) -> Column:
|
|
858
|
+
return Column.invoke_expression_over_column(col, expression.Repeat, times=lit(n))
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def split(str: ColumnOrName, pattern: str, limit: t.Optional[int] = None) -> Column:
|
|
862
|
+
if limit is not None:
|
|
863
|
+
return Column.invoke_expression_over_column(
|
|
864
|
+
str, expression.RegexpSplit, expression=lit(pattern).expression, limit=limit
|
|
865
|
+
)
|
|
866
|
+
return Column.invoke_expression_over_column(
|
|
867
|
+
str, expression.RegexpSplit, expression=lit(pattern)
|
|
868
|
+
)
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
def regexp_extract(str: ColumnOrName, pattern: str, idx: t.Optional[int] = None) -> Column:
|
|
872
|
+
return Column.invoke_expression_over_column(
|
|
873
|
+
str,
|
|
874
|
+
expression.RegexpExtract,
|
|
875
|
+
expression=lit(pattern),
|
|
876
|
+
group=idx,
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
def regexp_replace(
|
|
881
|
+
str: ColumnOrName, pattern: str, replacement: str, position: t.Optional[int] = None
|
|
882
|
+
) -> Column:
|
|
883
|
+
return Column.invoke_expression_over_column(
|
|
884
|
+
str,
|
|
885
|
+
expression.RegexpReplace,
|
|
886
|
+
expression=lit(pattern),
|
|
887
|
+
replacement=lit(replacement),
|
|
888
|
+
position=position,
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
def initcap(col: ColumnOrName) -> Column:
|
|
893
|
+
return Column.invoke_expression_over_column(col, expression.Initcap)
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def soundex(col: ColumnOrName) -> Column:
|
|
897
|
+
return Column.invoke_anonymous_function(col, "SOUNDEX")
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def bin(col: ColumnOrName) -> Column:
|
|
901
|
+
return Column.invoke_anonymous_function(col, "BIN")
|
|
902
|
+
|
|
903
|
+
|
|
904
|
+
def hex(col: ColumnOrName) -> Column:
|
|
905
|
+
return Column.invoke_expression_over_column(col, expression.Hex)
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
def unhex(col: ColumnOrName) -> Column:
|
|
909
|
+
return Column.invoke_expression_over_column(col, expression.Unhex)
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
def length(col: ColumnOrName) -> Column:
|
|
913
|
+
return Column.invoke_expression_over_column(col, expression.Length)
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
def octet_length(col: ColumnOrName) -> Column:
|
|
917
|
+
return Column.invoke_anonymous_function(col, "OCTET_LENGTH")
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
def bit_length(col: ColumnOrName) -> Column:
|
|
921
|
+
return Column.invoke_anonymous_function(col, "BIT_LENGTH")
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
def translate(srcCol: ColumnOrName, matching: str, replace: str) -> Column:
|
|
925
|
+
return Column.invoke_anonymous_function(srcCol, "TRANSLATE", lit(matching), lit(replace))
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
def array(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
|
|
929
|
+
columns = _flatten(cols) if not isinstance(cols[0], (str, Column)) else cols
|
|
930
|
+
return Column.invoke_expression_over_column(None, expression.Array, expressions=columns)
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
def create_map(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
|
|
934
|
+
cols = list(_flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore
|
|
935
|
+
return Column.invoke_expression_over_column(
|
|
936
|
+
None,
|
|
937
|
+
expression.VarMap,
|
|
938
|
+
keys=array(*cols[::2]).expression,
|
|
939
|
+
values=array(*cols[1::2]).expression,
|
|
940
|
+
)
|
|
941
|
+
|
|
942
|
+
|
|
943
|
+
def map_from_arrays(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
944
|
+
return Column.invoke_expression_over_column(None, expression.Map, keys=col1, values=col2)
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
def array_contains(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
948
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
949
|
+
return Column.invoke_expression_over_column(
|
|
950
|
+
col, expression.ArrayContains, expression=value_col.expression
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
def arrays_overlap(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
955
|
+
return Column.invoke_anonymous_function(col1, "ARRAYS_OVERLAP", Column.ensure_col(col2))
|
|
956
|
+
|
|
957
|
+
|
|
958
|
+
def slice(
|
|
959
|
+
x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int]
|
|
960
|
+
) -> Column:
|
|
961
|
+
start_col = start if isinstance(start, Column) else lit(start)
|
|
962
|
+
length_col = length if isinstance(length, Column) else lit(length)
|
|
963
|
+
return Column.invoke_anonymous_function(x, "SLICE", start_col, length_col)
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
def array_join(
|
|
967
|
+
col: ColumnOrName, delimiter: str, null_replacement: t.Optional[str] = None
|
|
968
|
+
) -> Column:
|
|
969
|
+
if null_replacement is not None:
|
|
970
|
+
return Column.invoke_expression_over_column(
|
|
971
|
+
col, expression.ArrayJoin, expression=lit(delimiter), null=lit(null_replacement)
|
|
972
|
+
)
|
|
973
|
+
return Column.invoke_expression_over_column(
|
|
974
|
+
col, expression.ArrayJoin, expression=lit(delimiter)
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def concat(*cols: ColumnOrName) -> Column:
|
|
979
|
+
return Column.invoke_expression_over_column(None, expression.Concat, expressions=cols)
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
983
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
984
|
+
return Column.invoke_anonymous_function(col, "ARRAY_POSITION", value_col)
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
def element_at(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
988
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
989
|
+
return Column.invoke_anonymous_function(col, "ELEMENT_AT", value_col)
|
|
990
|
+
|
|
991
|
+
|
|
992
|
+
def array_remove(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
993
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
994
|
+
return Column.invoke_anonymous_function(col, "ARRAY_REMOVE", value_col)
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def array_distinct(col: ColumnOrName) -> Column:
|
|
998
|
+
return Column.invoke_anonymous_function(col, "ARRAY_DISTINCT")
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
def array_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
1002
|
+
return Column.invoke_anonymous_function(col1, "ARRAY_INTERSECT", Column.ensure_col(col2))
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
def array_union(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
1006
|
+
return Column.invoke_anonymous_function(col1, "ARRAY_UNION", Column.ensure_col(col2))
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
def array_except(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
1010
|
+
return Column.invoke_anonymous_function(col1, "ARRAY_EXCEPT", Column.ensure_col(col2))
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
def explode(col: ColumnOrName) -> Column:
|
|
1014
|
+
return Column.invoke_expression_over_column(col, expression.Explode)
|
|
1015
|
+
|
|
1016
|
+
|
|
1017
|
+
def posexplode(col: ColumnOrName) -> Column:
|
|
1018
|
+
return Column.invoke_expression_over_column(col, expression.Posexplode)
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
def explode_outer(col: ColumnOrName) -> Column:
|
|
1022
|
+
return Column.invoke_anonymous_function(col, "EXPLODE_OUTER")
|
|
1023
|
+
|
|
1024
|
+
|
|
1025
|
+
def posexplode_outer(col: ColumnOrName) -> Column:
|
|
1026
|
+
return Column.invoke_anonymous_function(col, "POSEXPLODE_OUTER")
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
def get_json_object(col: ColumnOrName, path: str) -> Column:
|
|
1030
|
+
return Column.invoke_expression_over_column(col, expression.JSONExtract, path=lit(path))
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
def json_tuple(col: ColumnOrName, *fields: str) -> Column:
|
|
1034
|
+
return Column.invoke_anonymous_function(col, "JSON_TUPLE", *[lit(field) for field in fields])
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
def from_json(
|
|
1038
|
+
col: ColumnOrName,
|
|
1039
|
+
schema: t.Union[Column, str],
|
|
1040
|
+
options: t.Optional[t.Dict[str, str]] = None,
|
|
1041
|
+
) -> Column:
|
|
1042
|
+
schema = schema if isinstance(schema, Column) else lit(schema)
|
|
1043
|
+
if options is not None:
|
|
1044
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1045
|
+
return Column.invoke_anonymous_function(col, "FROM_JSON", schema, options_col)
|
|
1046
|
+
return Column.invoke_anonymous_function(col, "FROM_JSON", schema)
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
def to_json(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1050
|
+
if options is not None:
|
|
1051
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1052
|
+
return Column.invoke_expression_over_column(col, expression.JSONFormat, options=options_col)
|
|
1053
|
+
return Column.invoke_expression_over_column(col, expression.JSONFormat)
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
def schema_of_json(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1057
|
+
if options is not None:
|
|
1058
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1059
|
+
return Column.invoke_anonymous_function(col, "SCHEMA_OF_JSON", options_col)
|
|
1060
|
+
return Column.invoke_anonymous_function(col, "SCHEMA_OF_JSON")
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
def schema_of_csv(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1064
|
+
if options is not None:
|
|
1065
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1066
|
+
return Column.invoke_anonymous_function(col, "SCHEMA_OF_CSV", options_col)
|
|
1067
|
+
return Column.invoke_anonymous_function(col, "SCHEMA_OF_CSV")
|
|
1068
|
+
|
|
1069
|
+
|
|
1070
|
+
def to_csv(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1071
|
+
if options is not None:
|
|
1072
|
+
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1073
|
+
return Column.invoke_anonymous_function(col, "TO_CSV", options_col)
|
|
1074
|
+
return Column.invoke_anonymous_function(col, "TO_CSV")
|
|
1075
|
+
|
|
1076
|
+
|
|
1077
|
+
def size(col: ColumnOrName) -> Column:
|
|
1078
|
+
return Column.invoke_expression_over_column(col, expression.ArraySize)
|
|
1079
|
+
|
|
1080
|
+
|
|
1081
|
+
def array_min(col: ColumnOrName) -> Column:
|
|
1082
|
+
return Column.invoke_anonymous_function(col, "ARRAY_MIN")
|
|
1083
|
+
|
|
1084
|
+
|
|
1085
|
+
def array_max(col: ColumnOrName) -> Column:
|
|
1086
|
+
return Column.invoke_anonymous_function(col, "ARRAY_MAX")
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def sort_array(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column:
|
|
1090
|
+
if asc is not None:
|
|
1091
|
+
return Column.invoke_expression_over_column(col, expression.SortArray, asc=asc)
|
|
1092
|
+
return Column.invoke_expression_over_column(col, expression.SortArray)
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
def array_sort(
|
|
1096
|
+
col: ColumnOrName,
|
|
1097
|
+
comparator: t.Optional[t.Union[t.Callable[[Column, Column], Column]]] = None,
|
|
1098
|
+
) -> Column:
|
|
1099
|
+
if comparator is not None:
|
|
1100
|
+
f_expression = _get_lambda_from_func(comparator)
|
|
1101
|
+
return Column.invoke_expression_over_column(
|
|
1102
|
+
col, expression.ArraySort, expression=f_expression
|
|
1103
|
+
)
|
|
1104
|
+
return Column.invoke_expression_over_column(col, expression.ArraySort)
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
def shuffle(col: ColumnOrName) -> Column:
|
|
1108
|
+
return Column.invoke_anonymous_function(col, "SHUFFLE")
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
def reverse(col: ColumnOrName) -> Column:
|
|
1112
|
+
return Column.invoke_anonymous_function(col, "REVERSE")
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
def flatten(col: ColumnOrName) -> Column:
|
|
1116
|
+
return Column.invoke_anonymous_function(col, "FLATTEN")
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
def map_keys(col: ColumnOrName) -> Column:
|
|
1120
|
+
return Column.invoke_anonymous_function(col, "MAP_KEYS")
|
|
1121
|
+
|
|
1122
|
+
|
|
1123
|
+
def map_values(col: ColumnOrName) -> Column:
|
|
1124
|
+
return Column.invoke_anonymous_function(col, "MAP_VALUES")
|
|
1125
|
+
|
|
1126
|
+
|
|
1127
|
+
def map_entries(col: ColumnOrName) -> Column:
|
|
1128
|
+
return Column.invoke_anonymous_function(col, "MAP_ENTRIES")
|
|
1129
|
+
|
|
1130
|
+
|
|
1131
|
+
def map_from_entries(col: ColumnOrName) -> Column:
|
|
1132
|
+
return Column.invoke_expression_over_column(col, expression.MapFromEntries)
|
|
1133
|
+
|
|
1134
|
+
|
|
1135
|
+
def array_repeat(col: ColumnOrName, count: t.Union[ColumnOrName, int]) -> Column:
|
|
1136
|
+
count_col = count if isinstance(count, Column) else lit(count)
|
|
1137
|
+
return Column.invoke_anonymous_function(col, "ARRAY_REPEAT", count_col)
|
|
1138
|
+
|
|
1139
|
+
|
|
1140
|
+
def array_zip(*cols: ColumnOrName) -> Column:
|
|
1141
|
+
if len(cols) == 1:
|
|
1142
|
+
return Column.invoke_anonymous_function(cols[0], "ARRAY_ZIP")
|
|
1143
|
+
return Column.invoke_anonymous_function(cols[0], "ARRAY_ZIP", *cols[1:])
|
|
1144
|
+
|
|
1145
|
+
|
|
1146
|
+
def map_concat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
|
|
1147
|
+
columns = list(flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore
|
|
1148
|
+
if len(columns) == 1:
|
|
1149
|
+
return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT")
|
|
1150
|
+
return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT", *columns[1:])
|
|
1151
|
+
|
|
1152
|
+
|
|
1153
|
+
def sequence(
|
|
1154
|
+
start: ColumnOrName, stop: ColumnOrName, step: t.Optional[ColumnOrName] = None
|
|
1155
|
+
) -> Column:
|
|
1156
|
+
if step is not None:
|
|
1157
|
+
return Column.invoke_anonymous_function(start, "SEQUENCE", stop, step)
|
|
1158
|
+
return Column.invoke_anonymous_function(start, "SEQUENCE", stop)
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def from_csv(
|
|
1162
|
+
col: ColumnOrName,
|
|
1163
|
+
schema: t.Union[Column, str],
|
|
1164
|
+
options: t.Optional[t.Dict[str, str]] = None,
|
|
1165
|
+
) -> Column:
|
|
1166
|
+
schema = schema if isinstance(schema, Column) else lit(schema)
|
|
1167
|
+
if options is not None:
|
|
1168
|
+
option_cols = create_map([lit(x) for x in _flatten(options.items())])
|
|
1169
|
+
return Column.invoke_anonymous_function(col, "FROM_CSV", schema, option_cols)
|
|
1170
|
+
return Column.invoke_anonymous_function(col, "FROM_CSV", schema)
|
|
1171
|
+
|
|
1172
|
+
|
|
1173
|
+
def aggregate(
|
|
1174
|
+
col: ColumnOrName,
|
|
1175
|
+
initialValue: ColumnOrName,
|
|
1176
|
+
merge: t.Callable[[Column, Column], Column],
|
|
1177
|
+
finish: t.Optional[t.Callable[[Column], Column]] = None,
|
|
1178
|
+
) -> Column:
|
|
1179
|
+
merge_exp = _get_lambda_from_func(merge)
|
|
1180
|
+
if finish is not None:
|
|
1181
|
+
finish_exp = _get_lambda_from_func(finish)
|
|
1182
|
+
return Column.invoke_expression_over_column(
|
|
1183
|
+
col,
|
|
1184
|
+
expression.Reduce,
|
|
1185
|
+
initial=initialValue,
|
|
1186
|
+
merge=Column(merge_exp),
|
|
1187
|
+
finish=Column(finish_exp),
|
|
1188
|
+
)
|
|
1189
|
+
return Column.invoke_expression_over_column(
|
|
1190
|
+
col, expression.Reduce, initial=initialValue, merge=Column(merge_exp)
|
|
1191
|
+
)
|
|
1192
|
+
|
|
1193
|
+
|
|
1194
|
+
def transform(
|
|
1195
|
+
col: ColumnOrName,
|
|
1196
|
+
f: t.Union[t.Callable[[Column], Column], t.Callable[[Column, Column], Column]],
|
|
1197
|
+
) -> Column:
|
|
1198
|
+
f_expression = _get_lambda_from_func(f)
|
|
1199
|
+
return Column.invoke_expression_over_column(
|
|
1200
|
+
col, expression.Transform, expression=Column(f_expression)
|
|
1201
|
+
)
|
|
1202
|
+
|
|
1203
|
+
|
|
1204
|
+
def exists(col: ColumnOrName, f: t.Callable[[Column], Column]) -> Column:
|
|
1205
|
+
f_expression = _get_lambda_from_func(f)
|
|
1206
|
+
return Column.invoke_anonymous_function(col, "EXISTS", Column(f_expression))
|
|
1207
|
+
|
|
1208
|
+
|
|
1209
|
+
def forall(col: ColumnOrName, f: t.Callable[[Column], Column]) -> Column:
|
|
1210
|
+
f_expression = _get_lambda_from_func(f)
|
|
1211
|
+
return Column.invoke_anonymous_function(col, "FORALL", Column(f_expression))
|
|
1212
|
+
|
|
1213
|
+
|
|
1214
|
+
def filter(
|
|
1215
|
+
col: ColumnOrName,
|
|
1216
|
+
f: t.Union[t.Callable[[Column], Column], t.Callable[[Column, Column], Column]],
|
|
1217
|
+
) -> Column:
|
|
1218
|
+
f_expression = _get_lambda_from_func(f)
|
|
1219
|
+
return Column.invoke_expression_over_column(
|
|
1220
|
+
col, expression.ArrayFilter, expression=f_expression
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1223
|
+
|
|
1224
|
+
def zip_with(
|
|
1225
|
+
left: ColumnOrName, right: ColumnOrName, f: t.Callable[[Column, Column], Column]
|
|
1226
|
+
) -> Column:
|
|
1227
|
+
f_expression = _get_lambda_from_func(f)
|
|
1228
|
+
return Column.invoke_anonymous_function(left, "ZIP_WITH", right, Column(f_expression))
|
|
1229
|
+
|
|
1230
|
+
|
|
1231
|
+
def transform_keys(col: ColumnOrName, f: t.Union[t.Callable[[Column, Column], Column]]) -> Column:
|
|
1232
|
+
f_expression = _get_lambda_from_func(f)
|
|
1233
|
+
return Column.invoke_anonymous_function(col, "TRANSFORM_KEYS", Column(f_expression))
|
|
1234
|
+
|
|
1235
|
+
|
|
1236
|
+
def transform_values(col: ColumnOrName, f: t.Union[t.Callable[[Column, Column], Column]]) -> Column:
|
|
1237
|
+
f_expression = _get_lambda_from_func(f)
|
|
1238
|
+
return Column.invoke_anonymous_function(col, "TRANSFORM_VALUES", Column(f_expression))
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
def map_filter(col: ColumnOrName, f: t.Union[t.Callable[[Column, Column], Column]]) -> Column:
|
|
1242
|
+
f_expression = _get_lambda_from_func(f)
|
|
1243
|
+
return Column.invoke_anonymous_function(col, "MAP_FILTER", Column(f_expression))
|
|
1244
|
+
|
|
1245
|
+
|
|
1246
|
+
def map_zip_with(
|
|
1247
|
+
col1: ColumnOrName,
|
|
1248
|
+
col2: ColumnOrName,
|
|
1249
|
+
f: t.Union[t.Callable[[Column, Column, Column], Column]],
|
|
1250
|
+
) -> Column:
|
|
1251
|
+
f_expression = _get_lambda_from_func(f)
|
|
1252
|
+
return Column.invoke_anonymous_function(col1, "MAP_ZIP_WITH", col2, Column(f_expression))
|
|
1253
|
+
|
|
1254
|
+
|
|
1255
|
+
def _lambda_quoted(value: str) -> t.Optional[bool]:
|
|
1256
|
+
return False if value == "_" else None
|
|
1257
|
+
|
|
1258
|
+
|
|
1259
|
+
def _get_lambda_from_func(lambda_expression: t.Callable):
|
|
1260
|
+
variables = [
|
|
1261
|
+
expression.to_identifier(x, quoted=_lambda_quoted(x))
|
|
1262
|
+
for x in lambda_expression.__code__.co_varnames
|
|
1263
|
+
]
|
|
1264
|
+
return expression.Lambda(
|
|
1265
|
+
this=lambda_expression(*[Column(x) for x in variables]).expression,
|
|
1266
|
+
expressions=variables,
|
|
1267
|
+
)
|