spark-connect 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +82 -0
  3. data/LICENSE +202 -0
  4. data/NOTICE +16 -0
  5. data/README.md +166 -0
  6. data/lib/spark-connect.rb +5 -0
  7. data/lib/spark_connect/arrow.rb +115 -0
  8. data/lib/spark_connect/catalog.rb +190 -0
  9. data/lib/spark_connect/channel_builder.rb +134 -0
  10. data/lib/spark_connect/client.rb +264 -0
  11. data/lib/spark_connect/column.rb +379 -0
  12. data/lib/spark_connect/conf.rb +79 -0
  13. data/lib/spark_connect/data_frame.rb +828 -0
  14. data/lib/spark_connect/errors.rb +58 -0
  15. data/lib/spark_connect/functions.rb +903 -0
  16. data/lib/spark_connect/grouped_data.rb +101 -0
  17. data/lib/spark_connect/na_functions.rb +98 -0
  18. data/lib/spark_connect/observation.rb +61 -0
  19. data/lib/spark_connect/pipelines.rb +221 -0
  20. data/lib/spark_connect/plan.rb +39 -0
  21. data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
  22. data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
  23. data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
  24. data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
  25. data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
  26. data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
  27. data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
  28. data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
  29. data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
  30. data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
  31. data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
  32. data/lib/spark_connect/proto.rb +32 -0
  33. data/lib/spark_connect/reader.rb +98 -0
  34. data/lib/spark_connect/row.rb +105 -0
  35. data/lib/spark_connect/session.rb +317 -0
  36. data/lib/spark_connect/stat_functions.rb +109 -0
  37. data/lib/spark_connect/streaming.rb +351 -0
  38. data/lib/spark_connect/types.rb +490 -0
  39. data/lib/spark_connect/version.rb +11 -0
  40. data/lib/spark_connect/window.rb +119 -0
  41. data/lib/spark_connect/writer.rb +208 -0
  42. data/lib/spark_connect.rb +58 -0
  43. data/proto/spark/connect/base.proto +1275 -0
  44. data/proto/spark/connect/catalog.proto +243 -0
  45. data/proto/spark/connect/commands.proto +553 -0
  46. data/proto/spark/connect/common.proto +179 -0
  47. data/proto/spark/connect/expressions.proto +557 -0
  48. data/proto/spark/connect/ml.proto +147 -0
  49. data/proto/spark/connect/ml_common.proto +64 -0
  50. data/proto/spark/connect/pipelines.proto +307 -0
  51. data/proto/spark/connect/relations.proto +1252 -0
  52. data/proto/spark/connect/types.proto +227 -0
  53. metadata +149 -0
@@ -0,0 +1,903 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SparkConnect
4
+ # The standard Spark SQL function library, mirroring PySpark's
5
+ # `pyspark.sql.functions`. Every function returns a {Column}.
6
+ #
7
+ # Available both as `SparkConnect::Functions` and the shorthand
8
+ # `SparkConnect::F`. All methods are module functions.
9
+ #
10
+ # Following PySpark's convention, a {String} argument denotes a **column name**
11
+ # for most functions (e.g. `F.sum("salary")` aggregates the `salary` column),
12
+ # while functions whose parameters are genuinely literal (regex patterns, date
13
+ # formats, JSON paths, ...) treat their {String} arguments as literal values.
14
+ #
15
+ # @example
16
+ # F = SparkConnect::F
17
+ # F.col("a") + F.lit(1)
18
+ # F.when(F.col("x") > 0, "pos").otherwise("non-pos")
19
+ # F.sum("amount").alias("total")
20
+ module Functions
21
+ Proto = SparkConnect::Proto
22
+ extend self
23
+
24
+ # ---- Core constructors -------------------------------------------------
25
+
26
+ # A column reference by name. `"*"` selects all columns.
27
+ # @return [Column]
28
+ def col(name) = Column.from_name(name.to_s)
29
+ alias column col
30
+
31
+ # A literal value column. See {Column.lit} for supported Ruby types.
32
+ # @return [Column]
33
+ def lit(value) = Column.lit(value)
34
+
35
+ # Parse a SQL expression string into a {Column}.
36
+ # @return [Column]
37
+ def expr(sql)
38
+ Column.from_expr(Proto::Expression.new(expression_string: Proto::Expression::ExpressionString.new(expression: sql)))
39
+ end
40
+
41
+ # @return [Column] an ascending sort order for the named/given column.
42
+ def asc(col) = _col(col).asc
43
+ def desc(col) = _col(col).desc
44
+ def asc_nulls_first(col) = _col(col).asc_nulls_first
45
+ def asc_nulls_last(col) = _col(col).asc_nulls_last
46
+ def desc_nulls_first(col) = _col(col).desc_nulls_first
47
+ def desc_nulls_last(col) = _col(col).desc_nulls_last
48
+
49
+ # Start a CASE WHEN expression. Chain {Column#when} / {Column#otherwise}.
50
+ # @return [Column]
51
+ def when(condition, value)
52
+ Column.invoke("when", condition, value)
53
+ end
54
+
55
+ # ---- Aggregate / counting ---------------------------------------------
56
+
57
+ # @return [Column] count of rows (or non-null values of a column). `"*"`
58
+ # counts all rows.
59
+ def count(col)
60
+ col.to_s == "*" ? Column.invoke("count", lit(1)) : Column.invoke("count", _col(col))
61
+ end
62
+
63
+ # @return [Column] count of distinct combinations of the given columns.
64
+ def count_distinct(*cols)
65
+ Column.invoke("count", *cols.map { |c| _col(c) }, is_distinct: true)
66
+ end
67
+ alias countDistinct count_distinct
68
+
69
+ # @return [Column] approximate distinct count (optionally with relative SD).
70
+ def approx_count_distinct(col, rsd = nil)
71
+ rsd.nil? ? Column.invoke("approx_count_distinct", _col(col)) : Column.invoke("approx_count_distinct", _col(col), lit(rsd))
72
+ end
73
+
74
+ # @return [Column] sum of distinct values.
75
+ def sum_distinct(col) = Column.invoke("sum", _col(col), is_distinct: true)
76
+
77
+ # ---- Rounding ----------------------------------------------------------
78
+
79
+ # @return [Column] HALF_UP rounding to `scale` decimal places.
80
+ def round(col, scale = 0) = Column.invoke("round", _col(col), lit(scale))
81
+ # @return [Column] HALF_EVEN ("banker's") rounding to `scale` places.
82
+ def bround(col, scale = 0) = Column.invoke("bround", _col(col), lit(scale))
83
+
84
+ # ---- Conditionals / null handling -------------------------------------
85
+
86
+ # @return [Column] first non-null among the given columns.
87
+ def coalesce(*cols) = Column.invoke("coalesce", *cols.map { |c| _col(c) })
88
+ # @return [Column] `value` if `col` is NaN else `col`.
89
+ def nanvl(col1, col2) = Column.invoke("nanvl", _col(col1), _col(col2))
90
+
91
+ # ---- Constructors of complex types ------------------------------------
92
+
93
+ # @return [Column] a struct from the given columns.
94
+ def struct(*cols) = Column.invoke("struct", *cols.map { |c| _col(c) })
95
+ # @return [Column] an array from the given columns.
96
+ def array(*cols) = Column.invoke("array", *cols.map { |c| _col(c) })
97
+ # @return [Column] a map from alternating key/value columns.
98
+ def create_map(*cols) = Column.invoke("map", *cols.map { |c| _col(c) })
99
+ # @return [Column] a map from two array columns (keys, values).
100
+ def map_from_arrays(keys, values) = Column.invoke("map_from_arrays", _col(keys), _col(values))
101
+ # @return [Column] a named struct from alternating name/value arguments.
102
+ def named_struct(*cols) = Column.invoke("named_struct", *cols.map { |c| _col(c) })
103
+
104
+ # ---- String functions with literal arguments --------------------------
105
+
106
+ # @return [Column] concatenation of columns separated by literal `sep`.
107
+ def concat_ws(sep, *cols) = Column.invoke("concat_ws", lit(sep), *cols.map { |c| _col(c) })
108
+ # @return [Column] printf-style formatting using literal `fmt`.
109
+ def format_string(fmt, *cols) = Column.invoke("format_string", lit(fmt), *cols.map { |c| _col(c) })
110
+ # @return [Column] number formatted to `d` decimal places.
111
+ def format_number(col, d) = Column.invoke("format_number", _col(col), lit(d))
112
+ # @return [Column] substring of length `len` from 1-based `pos`.
113
+ def substring(col, pos, len) = Column.invoke("substring", _col(col), lit(pos), lit(len))
114
+ # @return [Column] substring before the `count`-th occurrence of `delim`.
115
+ def substring_index(col, delim, count) = Column.invoke("substring_index", _col(col), lit(delim), lit(count))
116
+ # @return [Column] 1-based position of literal `substr` within `col` (0 if absent).
117
+ def instr(col, substr) = Column.invoke("instr", _col(col), lit(substr))
118
+ # @return [Column] 1-based position of `substr` in `col` at/after `pos`.
119
+ def locate(substr, col, pos = 1) = Column.invoke("locate", lit(substr), _col(col), lit(pos))
120
+ # @return [Column] left-padded string.
121
+ def lpad(col, len, pad) = Column.invoke("lpad", _col(col), lit(len), lit(pad))
122
+ # @return [Column] right-padded string.
123
+ def rpad(col, len, pad) = Column.invoke("rpad", _col(col), lit(len), lit(pad))
124
+ # @return [Column] the string repeated `n` times.
125
+ def repeat(col, n) = Column.invoke("repeat", _col(col), lit(n))
126
+ # @return [Column] split `col` by the literal regex `pattern`.
127
+ def split(col, pattern, limit = -1) = Column.invoke("split", _col(col), lit(pattern), lit(limit))
128
+ # @return [Column] characters of `col` matching `matching` replaced per `replace`.
129
+ def translate(col, matching, replace) = Column.invoke("translate", _col(col), lit(matching), lit(replace))
130
+ # @return [Column] the `idx`-th group of `pattern` matched in `col`.
131
+ def regexp_extract(col, pattern, idx = 0) = Column.invoke("regexp_extract", _col(col), lit(pattern), lit(idx))
132
+ # @return [Column] all matches of group `idx` of `pattern`.
133
+ def regexp_extract_all(col, pattern, idx = 1) = Column.invoke("regexp_extract_all", _col(col), lit(pattern), lit(idx))
134
+ # @return [Column] `col` with `pattern` replaced by `replacement`.
135
+ def regexp_replace(col, pattern, replacement) = Column.invoke("regexp_replace", _col(col), lit(pattern), lit(replacement))
136
+ # @return [Column] whether `col` matches `pattern`.
137
+ def regexp_like(col, pattern) = Column.invoke("regexp_like", _col(col), lit(pattern))
138
+ def regexp_count(col, pattern) = Column.invoke("regexp_count", _col(col), lit(pattern))
139
+ def regexp_substr(col, pattern) = Column.invoke("regexp_substr", _col(col), lit(pattern))
140
+ # @return [Column] overlay `replace` into `col` at `pos` for `len` chars.
141
+ def overlay(col, replace, pos, len = -1) = Column.invoke("overlay", _col(col), _col(replace), lit(pos), lit(len))
142
+ # @return [Column] SHA-2 hash with the given bit length (224/256/384/512).
143
+ def sha2(col, num_bits) = Column.invoke("sha2", _col(col), lit(num_bits))
144
+ # @return [Column] convert a number string from `from_base` to `to_base`.
145
+ def conv(col, from_base, to_base) = Column.invoke("conv", _col(col), lit(from_base), lit(to_base))
146
+ # @return [Column] left shift / right shift by literal bit counts.
147
+ def shiftleft(col, num_bits) = Column.invoke("shiftleft", _col(col), lit(num_bits))
148
+ def shiftright(col, num_bits) = Column.invoke("shiftright", _col(col), lit(num_bits))
149
+ def shiftrightunsigned(col, num_bits) = Column.invoke("shiftrightunsigned", _col(col), lit(num_bits))
150
+
151
+ # ---- Date / time functions with literal arguments ---------------------
152
+
153
+ def date_format(col, fmt) = Column.invoke("date_format", _col(col), lit(fmt))
154
+ def to_date(col, fmt = nil) = fmt ? Column.invoke("to_date", _col(col), lit(fmt)) : Column.invoke("to_date", _col(col))
155
+ def to_timestamp(col, fmt = nil) = fmt ? Column.invoke("to_timestamp", _col(col), lit(fmt)) : Column.invoke("to_timestamp", _col(col))
156
+ def date_add(col, days) = Column.invoke("date_add", _col(col), lit(days))
157
+ def date_sub(col, days) = Column.invoke("date_sub", _col(col), lit(days))
158
+ def datediff(end_col, start_col) = Column.invoke("datediff", _col(end_col), _col(start_col))
159
+ def add_months(col, months) = Column.invoke("add_months", _col(col), lit(months))
160
+ def months_between(d1, d2, round_off = true) = Column.invoke("months_between", _col(d1), _col(d2), lit(round_off))
161
+ def next_day(col, day_of_week) = Column.invoke("next_day", _col(col), lit(day_of_week))
162
+ def trunc(col, fmt) = Column.invoke("trunc", _col(col), lit(fmt))
163
+ def date_trunc(fmt, col) = Column.invoke("date_trunc", lit(fmt), _col(col))
164
+ def from_unixtime(col, fmt = "yyyy-MM-dd HH:mm:ss") = Column.invoke("from_unixtime", _col(col), lit(fmt))
165
+
166
+ def unix_timestamp(col = nil, fmt = "yyyy-MM-dd HH:mm:ss")
167
+ col.nil? ? Column.invoke("unix_timestamp") : Column.invoke("unix_timestamp", _col(col), lit(fmt))
168
+ end
169
+
170
+ def from_utc_timestamp(col, tz) = Column.invoke("from_utc_timestamp", _col(col), lit(tz))
171
+ def to_utc_timestamp(col, tz) = Column.invoke("to_utc_timestamp", _col(col), lit(tz))
172
+ def make_date(year, month, day) = Column.invoke("make_date", _col(year), _col(month), _col(day))
173
+
174
+ # ---- JSON / CSV --------------------------------------------------------
175
+
176
+ def get_json_object(col, path) = Column.invoke("get_json_object", _col(col), lit(path))
177
+ def json_tuple(col, *fields) = Column.invoke("json_tuple", _col(col), *fields.map { |f| lit(f) })
178
+
179
+ # @param schema [Types::DataType, String]
180
+ def from_json(col, schema, options = {})
181
+ schema_col = schema.is_a?(Types::DataType) ? lit(schema.json) : lit(schema.to_s)
182
+ args = [_col(col), schema_col] + options.flat_map { |k, v| [lit(k.to_s), lit(v.to_s)] }
183
+ Column.invoke("from_json", *args)
184
+ end
185
+
186
+ def to_json(col, options = {})
187
+ args = [_col(col)] + options.flat_map { |k, v| [lit(k.to_s), lit(v.to_s)] }
188
+ Column.invoke("to_json", *args)
189
+ end
190
+
191
+ def schema_of_json(json, options = {})
192
+ Column.invoke("schema_of_json", _lit_or_col(json), *options.flat_map { |k, v| [lit(k.to_s), lit(v.to_s)] })
193
+ end
194
+
195
+ # ---- Array / map functions with value arguments -----------------------
196
+
197
+ def array_contains(col, value) = Column.invoke("array_contains", _col(col), lit(value))
198
+ def array_position(col, value) = Column.invoke("array_position", _col(col), lit(value))
199
+ def array_remove(col, element) = Column.invoke("array_remove", _col(col), lit(element))
200
+ def array_repeat(col, count) = Column.invoke("array_repeat", _col(col), lit(count))
201
+ def array_append(col, value) = Column.invoke("array_append", _col(col), lit(value))
202
+ def array_prepend(col, value) = Column.invoke("array_prepend", _col(col), lit(value))
203
+ def array_insert(col, pos, value) = Column.invoke("array_insert", _col(col), lit(pos), lit(value))
204
+
205
+ def array_join(col, delimiter, null_replacement = nil)
206
+ if null_replacement.nil?
207
+ Column.invoke("array_join", _col(col),
208
+ lit(delimiter))
209
+ else
210
+ Column.invoke("array_join", _col(col), lit(delimiter), lit(null_replacement))
211
+ end
212
+ end
213
+
214
+ def element_at(col, extraction) = Column.invoke("element_at", _col(col), lit(extraction))
215
+ def slice(col, start, length) = Column.invoke("slice", _col(col), _lit_or_col(start), _lit_or_col(length))
216
+
217
+ def sequence(start, stop, step = nil)
218
+ step.nil? ? Column.invoke("sequence", _col(start), _col(stop)) : Column.invoke("sequence", _col(start), _col(stop), _col(step))
219
+ end
220
+
221
+ def map_contains_key(col, key) = Column.invoke("map_contains_key", _col(col), lit(key))
222
+
223
+ # ---- Window / analytic functions --------------------------------------
224
+
225
+ def lag(col, offset = 1, default = nil) = Column.invoke("lag", _col(col), lit(offset), lit(default))
226
+ def lead(col, offset = 1, default = nil) = Column.invoke("lead", _col(col), lit(offset), lit(default))
227
+ def ntile(n) = Column.invoke("ntile", lit(n))
228
+ def nth_value(col, offset, ignore_nulls = false) = Column.invoke("nth_value", _col(col), lit(offset), lit(ignore_nulls))
229
+
230
+ # ---- Sorting helpers ---------------------------------------------------
231
+
232
+ def sort_array(col, asc = true) = Column.invoke("sort_array", _col(col), lit(asc))
233
+
234
+ # ---- Randomness --------------------------------------------------------
235
+
236
+ def rand(seed = nil) = seed.nil? ? Column.invoke("rand") : Column.invoke("rand", lit(seed))
237
+ def randn(seed = nil) = seed.nil? ? Column.invoke("randn") : Column.invoke("randn", lit(seed))
238
+
239
+ # ---- Higher-order (lambda) functions -----------------------------------
240
+
241
+ # Transform each element of an array. The block receives a {Column} (and
242
+ # optionally the index) and returns a {Column}.
243
+ # @yieldparam element [Column]
244
+ # @return [Column]
245
+ def transform(col, &block) = Column.invoke("transform", _col(col), _lambda(block))
246
+ def exists(col, &block) = Column.invoke("exists", _col(col), _lambda(block))
247
+ def forall(col, &block) = Column.invoke("forall", _col(col), _lambda(block))
248
+ def filter(col, &block) = Column.invoke("filter", _col(col), _lambda(block))
249
+ def zip_with(left, right, &block) = Column.invoke("zip_with", _col(left), _col(right), _lambda(block))
250
+ def transform_keys(col, &block) = Column.invoke("transform_keys", _col(col), _lambda(block))
251
+ def transform_values(col, &block) = Column.invoke("transform_values", _col(col), _lambda(block))
252
+ def map_filter(col, &block) = Column.invoke("map_filter", _col(col), _lambda(block))
253
+ def map_zip_with(c1, c2, &block) = Column.invoke("map_zip_with", _col(c1), _col(c2), _lambda(block))
254
+
255
+ # Aggregate (fold) an array. `merge` combines accumulator and element;
256
+ # optional `finish` post-processes the result.
257
+ # @return [Column]
258
+ def aggregate(col, initial, merge, finish = nil)
259
+ args = [_col(col), _col(initial), _lambda(merge)]
260
+ args << _lambda(finish) if finish
261
+ Column.invoke("aggregate", *args)
262
+ end
263
+
264
+ # ---- DataFrame-level helper -------------------------------------------
265
+
266
+ # Mark a DataFrame for broadcast (map-side) join.
267
+ # @param df [DataFrame]
268
+ # @return [DataFrame]
269
+ def broadcast(df) = df.hint("broadcast")
270
+
271
+ # UDFs require a server-side execution environment (Python/Scala) and are not
272
+ # supported by the pure-Ruby client.
273
+ def udf(*)
274
+ raise NotImplementedError, "User-defined functions are not supported by the Ruby Spark Connect client"
275
+ end
276
+
277
+ # The following functions are generated programmatically below
278
+ # (`UNIFORM` and `NO_ARG`). The `@!method` directives document them so they
279
+ # appear in the API reference; each returns a {Column}.
280
+ #
281
+ # @!method sum(*cols)
282
+ # The Spark SQL `sum` function. String arguments are treated as column names.
283
+ # @return [Column]
284
+ # @!method avg(*cols)
285
+ # The Spark SQL `avg` function. String arguments are treated as column names.
286
+ # @return [Column]
287
+ # @!method mean(*cols)
288
+ # The Spark SQL `mean` function. String arguments are treated as column names.
289
+ # @return [Column]
290
+ # @!method max(*cols)
291
+ # The Spark SQL `max` function. String arguments are treated as column names.
292
+ # @return [Column]
293
+ # @!method min(*cols)
294
+ # The Spark SQL `min` function. String arguments are treated as column names.
295
+ # @return [Column]
296
+ # @!method first(*cols)
297
+ # The Spark SQL `first` function. String arguments are treated as column names.
298
+ # @return [Column]
299
+ # @!method last(*cols)
300
+ # The Spark SQL `last` function. String arguments are treated as column names.
301
+ # @return [Column]
302
+ # @!method stddev(*cols)
303
+ # The Spark SQL `stddev` function. String arguments are treated as column names.
304
+ # @return [Column]
305
+ # @!method stddev_samp(*cols)
306
+ # The Spark SQL `stddev_samp` function. String arguments are treated as column names.
307
+ # @return [Column]
308
+ # @!method stddev_pop(*cols)
309
+ # The Spark SQL `stddev_pop` function. String arguments are treated as column names.
310
+ # @return [Column]
311
+ # @!method variance(*cols)
312
+ # The Spark SQL `variance` function. String arguments are treated as column names.
313
+ # @return [Column]
314
+ # @!method var_samp(*cols)
315
+ # The Spark SQL `var_samp` function. String arguments are treated as column names.
316
+ # @return [Column]
317
+ # @!method var_pop(*cols)
318
+ # The Spark SQL `var_pop` function. String arguments are treated as column names.
319
+ # @return [Column]
320
+ # @!method skewness(*cols)
321
+ # The Spark SQL `skewness` function. String arguments are treated as column names.
322
+ # @return [Column]
323
+ # @!method kurtosis(*cols)
324
+ # The Spark SQL `kurtosis` function. String arguments are treated as column names.
325
+ # @return [Column]
326
+ # @!method collect_list(*cols)
327
+ # The Spark SQL `collect_list` function. String arguments are treated as column names.
328
+ # @return [Column]
329
+ # @!method collect_set(*cols)
330
+ # The Spark SQL `collect_set` function. String arguments are treated as column names.
331
+ # @return [Column]
332
+ # @!method first_value(*cols)
333
+ # The Spark SQL `first_value` function. String arguments are treated as column names.
334
+ # @return [Column]
335
+ # @!method last_value(*cols)
336
+ # The Spark SQL `last_value` function. String arguments are treated as column names.
337
+ # @return [Column]
338
+ # @!method max_by(*cols)
339
+ # The Spark SQL `max_by` function. String arguments are treated as column names.
340
+ # @return [Column]
341
+ # @!method min_by(*cols)
342
+ # The Spark SQL `min_by` function. String arguments are treated as column names.
343
+ # @return [Column]
344
+ # @!method corr(*cols)
345
+ # The Spark SQL `corr` function. String arguments are treated as column names.
346
+ # @return [Column]
347
+ # @!method covar_pop(*cols)
348
+ # The Spark SQL `covar_pop` function. String arguments are treated as column names.
349
+ # @return [Column]
350
+ # @!method covar_samp(*cols)
351
+ # The Spark SQL `covar_samp` function. String arguments are treated as column names.
352
+ # @return [Column]
353
+ # @!method median(*cols)
354
+ # The Spark SQL `median` function. String arguments are treated as column names.
355
+ # @return [Column]
356
+ # @!method mode(*cols)
357
+ # The Spark SQL `mode` function. String arguments are treated as column names.
358
+ # @return [Column]
359
+ # @!method any_value(*cols)
360
+ # The Spark SQL `any_value` function. String arguments are treated as column names.
361
+ # @return [Column]
362
+ # @!method every(*cols)
363
+ # The Spark SQL `every` function. String arguments are treated as column names.
364
+ # @return [Column]
365
+ # @!method some(*cols)
366
+ # The Spark SQL `some` function. String arguments are treated as column names.
367
+ # @return [Column]
368
+ # @!method bit_and(*cols)
369
+ # The Spark SQL `bit_and` function. String arguments are treated as column names.
370
+ # @return [Column]
371
+ # @!method bit_or(*cols)
372
+ # The Spark SQL `bit_or` function. String arguments are treated as column names.
373
+ # @return [Column]
374
+ # @!method bit_xor(*cols)
375
+ # The Spark SQL `bit_xor` function. String arguments are treated as column names.
376
+ # @return [Column]
377
+ # @!method bool_and(*cols)
378
+ # The Spark SQL `bool_and` function. String arguments are treated as column names.
379
+ # @return [Column]
380
+ # @!method bool_or(*cols)
381
+ # The Spark SQL `bool_or` function. String arguments are treated as column names.
382
+ # @return [Column]
383
+ # @!method product(*cols)
384
+ # The Spark SQL `product` function. String arguments are treated as column names.
385
+ # @return [Column]
386
+ # @!method count_if(*cols)
387
+ # The Spark SQL `count_if` function. String arguments are treated as column names.
388
+ # @return [Column]
389
+ # @!method grouping(*cols)
390
+ # The Spark SQL `grouping` function. String arguments are treated as column names.
391
+ # @return [Column]
392
+ # @!method abs(*cols)
393
+ # The Spark SQL `abs` function. String arguments are treated as column names.
394
+ # @return [Column]
395
+ # @!method acos(*cols)
396
+ # The Spark SQL `acos` function. String arguments are treated as column names.
397
+ # @return [Column]
398
+ # @!method acosh(*cols)
399
+ # The Spark SQL `acosh` function. String arguments are treated as column names.
400
+ # @return [Column]
401
+ # @!method asin(*cols)
402
+ # The Spark SQL `asin` function. String arguments are treated as column names.
403
+ # @return [Column]
404
+ # @!method asinh(*cols)
405
+ # The Spark SQL `asinh` function. String arguments are treated as column names.
406
+ # @return [Column]
407
+ # @!method atan(*cols)
408
+ # The Spark SQL `atan` function. String arguments are treated as column names.
409
+ # @return [Column]
410
+ # @!method atanh(*cols)
411
+ # The Spark SQL `atanh` function. String arguments are treated as column names.
412
+ # @return [Column]
413
+ # @!method atan2(*cols)
414
+ # The Spark SQL `atan2` function. String arguments are treated as column names.
415
+ # @return [Column]
416
+ # @!method bin(*cols)
417
+ # The Spark SQL `bin` function. String arguments are treated as column names.
418
+ # @return [Column]
419
+ # @!method cbrt(*cols)
420
+ # The Spark SQL `cbrt` function. String arguments are treated as column names.
421
+ # @return [Column]
422
+ # @!method ceil(*cols)
423
+ # The Spark SQL `ceil` function. String arguments are treated as column names.
424
+ # @return [Column]
425
+ # @!method ceiling(*cols)
426
+ # The Spark SQL `ceiling` function. String arguments are treated as column names.
427
+ # @return [Column]
428
+ # @!method cos(*cols)
429
+ # The Spark SQL `cos` function. String arguments are treated as column names.
430
+ # @return [Column]
431
+ # @!method cosh(*cols)
432
+ # The Spark SQL `cosh` function. String arguments are treated as column names.
433
+ # @return [Column]
434
+ # @!method cot(*cols)
435
+ # The Spark SQL `cot` function. String arguments are treated as column names.
436
+ # @return [Column]
437
+ # @!method csc(*cols)
438
+ # The Spark SQL `csc` function. String arguments are treated as column names.
439
+ # @return [Column]
440
+ # @!method degrees(*cols)
441
+ # The Spark SQL `degrees` function. String arguments are treated as column names.
442
+ # @return [Column]
443
+ # @!method exp(*cols)
444
+ # The Spark SQL `exp` function. String arguments are treated as column names.
445
+ # @return [Column]
446
+ # @!method expm1(*cols)
447
+ # The Spark SQL `expm1` function. String arguments are treated as column names.
448
+ # @return [Column]
449
+ # @!method factorial(*cols)
450
+ # The Spark SQL `factorial` function. String arguments are treated as column names.
451
+ # @return [Column]
452
+ # @!method floor(*cols)
453
+ # The Spark SQL `floor` function. String arguments are treated as column names.
454
+ # @return [Column]
455
+ # @!method hypot(*cols)
456
+ # The Spark SQL `hypot` function. String arguments are treated as column names.
457
+ # @return [Column]
458
+ # @!method ln(*cols)
459
+ # The Spark SQL `ln` function. String arguments are treated as column names.
460
+ # @return [Column]
461
+ # @!method log(*cols)
462
+ # The Spark SQL `log` function. String arguments are treated as column names.
463
+ # @return [Column]
464
+ # @!method log2(*cols)
465
+ # The Spark SQL `log2` function. String arguments are treated as column names.
466
+ # @return [Column]
467
+ # @!method log10(*cols)
468
+ # The Spark SQL `log10` function. String arguments are treated as column names.
469
+ # @return [Column]
470
+ # @!method log1p(*cols)
471
+ # The Spark SQL `log1p` function. String arguments are treated as column names.
472
+ # @return [Column]
473
+ # @!method negative(*cols)
474
+ # The Spark SQL `negative` function. String arguments are treated as column names.
475
+ # @return [Column]
476
+ # @!method negate(*cols)
477
+ # The Spark SQL `negate` function. String arguments are treated as column names.
478
+ # @return [Column]
479
+ # @!method positive(*cols)
480
+ # The Spark SQL `positive` function. String arguments are treated as column names.
481
+ # @return [Column]
482
+ # @!method pow(*cols)
483
+ # The Spark SQL `pow` function. String arguments are treated as column names.
484
+ # @return [Column]
485
+ # @!method power(*cols)
486
+ # The Spark SQL `power` function. String arguments are treated as column names.
487
+ # @return [Column]
488
+ # @!method radians(*cols)
489
+ # The Spark SQL `radians` function. String arguments are treated as column names.
490
+ # @return [Column]
491
+ # @!method rint(*cols)
492
+ # The Spark SQL `rint` function. String arguments are treated as column names.
493
+ # @return [Column]
494
+ # @!method sec(*cols)
495
+ # The Spark SQL `sec` function. String arguments are treated as column names.
496
+ # @return [Column]
497
+ # @!method signum(*cols)
498
+ # The Spark SQL `signum` function. String arguments are treated as column names.
499
+ # @return [Column]
500
+ # @!method sin(*cols)
501
+ # The Spark SQL `sin` function. String arguments are treated as column names.
502
+ # @return [Column]
503
+ # @!method sinh(*cols)
504
+ # The Spark SQL `sinh` function. String arguments are treated as column names.
505
+ # @return [Column]
506
+ # @!method sqrt(*cols)
507
+ # The Spark SQL `sqrt` function. String arguments are treated as column names.
508
+ # @return [Column]
509
+ # @!method tan(*cols)
510
+ # The Spark SQL `tan` function. String arguments are treated as column names.
511
+ # @return [Column]
512
+ # @!method tanh(*cols)
513
+ # The Spark SQL `tanh` function. String arguments are treated as column names.
514
+ # @return [Column]
515
+ # @!method hex(*cols)
516
+ # The Spark SQL `hex` function. String arguments are treated as column names.
517
+ # @return [Column]
518
+ # @!method unhex(*cols)
519
+ # The Spark SQL `unhex` function. String arguments are treated as column names.
520
+ # @return [Column]
521
+ # @!method pmod(*cols)
522
+ # The Spark SQL `pmod` function. String arguments are treated as column names.
523
+ # @return [Column]
524
+ # @!method isnan(*cols)
525
+ # The Spark SQL `isnan` function. String arguments are treated as column names.
526
+ # @return [Column]
527
+ # @!method isnull(*cols)
528
+ # The Spark SQL `isnull` function. String arguments are treated as column names.
529
+ # @return [Column]
530
+ # @!method upper(*cols)
531
+ # The Spark SQL `upper` function. String arguments are treated as column names.
532
+ # @return [Column]
533
+ # @!method lower(*cols)
534
+ # The Spark SQL `lower` function. String arguments are treated as column names.
535
+ # @return [Column]
536
+ # @!method ltrim(*cols)
537
+ # The Spark SQL `ltrim` function. String arguments are treated as column names.
538
+ # @return [Column]
539
+ # @!method rtrim(*cols)
540
+ # The Spark SQL `rtrim` function. String arguments are treated as column names.
541
+ # @return [Column]
542
+ # @!method trim(*cols)
543
+ # The Spark SQL `trim` function. String arguments are treated as column names.
544
+ # @return [Column]
545
+ # @!method length(*cols)
546
+ # The Spark SQL `length` function. String arguments are treated as column names.
547
+ # @return [Column]
548
+ # @!method char_length(*cols)
549
+ # The Spark SQL `char_length` function. String arguments are treated as column names.
550
+ # @return [Column]
551
+ # @!method character_length(*cols)
552
+ # The Spark SQL `character_length` function. String arguments are treated as column names.
553
+ # @return [Column]
554
+ # @!method octet_length(*cols)
555
+ # The Spark SQL `octet_length` function. String arguments are treated as column names.
556
+ # @return [Column]
557
+ # @!method bit_length(*cols)
558
+ # The Spark SQL `bit_length` function. String arguments are treated as column names.
559
+ # @return [Column]
560
+ # @!method reverse(*cols)
561
+ # The Spark SQL `reverse` function. String arguments are treated as column names.
562
+ # @return [Column]
563
+ # @!method ascii(*cols)
564
+ # The Spark SQL `ascii` function. String arguments are treated as column names.
565
+ # @return [Column]
566
+ # @!method base64(*cols)
567
+ # The Spark SQL `base64` function. String arguments are treated as column names.
568
+ # @return [Column]
569
+ # @!method unbase64(*cols)
570
+ # The Spark SQL `unbase64` function. String arguments are treated as column names.
571
+ # @return [Column]
572
+ # @!method initcap(*cols)
573
+ # The Spark SQL `initcap` function. String arguments are treated as column names.
574
+ # @return [Column]
575
+ # @!method soundex(*cols)
576
+ # The Spark SQL `soundex` function. String arguments are treated as column names.
577
+ # @return [Column]
578
+ # @!method crc32(*cols)
579
+ # The Spark SQL `crc32` function. String arguments are treated as column names.
580
+ # @return [Column]
581
+ # @!method md5(*cols)
582
+ # The Spark SQL `md5` function. String arguments are treated as column names.
583
+ # @return [Column]
584
+ # @!method sha1(*cols)
585
+ # The Spark SQL `sha1` function. String arguments are treated as column names.
586
+ # @return [Column]
587
+ # @!method sha(*cols)
588
+ # The Spark SQL `sha` function. String arguments are treated as column names.
589
+ # @return [Column]
590
+ # @!method ucase(*cols)
591
+ # The Spark SQL `ucase` function. String arguments are treated as column names.
592
+ # @return [Column]
593
+ # @!method lcase(*cols)
594
+ # The Spark SQL `lcase` function. String arguments are treated as column names.
595
+ # @return [Column]
596
+ # @!method size(*cols)
597
+ # The Spark SQL `size` function. String arguments are treated as column names.
598
+ # @return [Column]
599
+ # @!method cardinality(*cols)
600
+ # The Spark SQL `cardinality` function. String arguments are treated as column names.
601
+ # @return [Column]
602
+ # @!method array_distinct(*cols)
603
+ # The Spark SQL `array_distinct` function. String arguments are treated as column names.
604
+ # @return [Column]
605
+ # @!method array_max(*cols)
606
+ # The Spark SQL `array_max` function. String arguments are treated as column names.
607
+ # @return [Column]
608
+ # @!method array_min(*cols)
609
+ # The Spark SQL `array_min` function. String arguments are treated as column names.
610
+ # @return [Column]
611
+ # @!method array_compact(*cols)
612
+ # The Spark SQL `array_compact` function. String arguments are treated as column names.
613
+ # @return [Column]
614
+ # @!method flatten(*cols)
615
+ # The Spark SQL `flatten` function. String arguments are treated as column names.
616
+ # @return [Column]
617
+ # @!method explode(*cols)
618
+ # The Spark SQL `explode` function. String arguments are treated as column names.
619
+ # @return [Column]
620
+ # @!method explode_outer(*cols)
621
+ # The Spark SQL `explode_outer` function. String arguments are treated as column names.
622
+ # @return [Column]
623
+ # @!method posexplode(*cols)
624
+ # The Spark SQL `posexplode` function. String arguments are treated as column names.
625
+ # @return [Column]
626
+ # @!method posexplode_outer(*cols)
627
+ # The Spark SQL `posexplode_outer` function. String arguments are treated as column names.
628
+ # @return [Column]
629
+ # @!method inline(*cols)
630
+ # The Spark SQL `inline` function. String arguments are treated as column names.
631
+ # @return [Column]
632
+ # @!method inline_outer(*cols)
633
+ # The Spark SQL `inline_outer` function. String arguments are treated as column names.
634
+ # @return [Column]
635
+ # @!method map_keys(*cols)
636
+ # The Spark SQL `map_keys` function. String arguments are treated as column names.
637
+ # @return [Column]
638
+ # @!method map_values(*cols)
639
+ # The Spark SQL `map_values` function. String arguments are treated as column names.
640
+ # @return [Column]
641
+ # @!method map_entries(*cols)
642
+ # The Spark SQL `map_entries` function. String arguments are treated as column names.
643
+ # @return [Column]
644
+ # @!method map_from_entries(*cols)
645
+ # The Spark SQL `map_from_entries` function. String arguments are treated as column names.
646
+ # @return [Column]
647
+ # @!method array_sort(*cols)
648
+ # The Spark SQL `array_sort` function. String arguments are treated as column names.
649
+ # @return [Column]
650
+ # @!method shuffle(*cols)
651
+ # The Spark SQL `shuffle` function. String arguments are treated as column names.
652
+ # @return [Column]
653
+ # @!method arrays_zip(*cols)
654
+ # The Spark SQL `arrays_zip` function. String arguments are treated as column names.
655
+ # @return [Column]
656
+ # @!method map_concat(*cols)
657
+ # The Spark SQL `map_concat` function. String arguments are treated as column names.
658
+ # @return [Column]
659
+ # @!method concat(*cols)
660
+ # The Spark SQL `concat` function. String arguments are treated as column names.
661
+ # @return [Column]
662
+ # @!method greatest(*cols)
663
+ # The Spark SQL `greatest` function. String arguments are treated as column names.
664
+ # @return [Column]
665
+ # @!method least(*cols)
666
+ # The Spark SQL `least` function. String arguments are treated as column names.
667
+ # @return [Column]
668
+ # @!method hash(*cols)
669
+ # The Spark SQL `hash` function. String arguments are treated as column names.
670
+ # @return [Column]
671
+ # @!method xxhash64(*cols)
672
+ # The Spark SQL `xxhash64` function. String arguments are treated as column names.
673
+ # @return [Column]
674
+ # @!method array_union(*cols)
675
+ # The Spark SQL `array_union` function. String arguments are treated as column names.
676
+ # @return [Column]
677
+ # @!method array_intersect(*cols)
678
+ # The Spark SQL `array_intersect` function. String arguments are treated as column names.
679
+ # @return [Column]
680
+ # @!method array_except(*cols)
681
+ # The Spark SQL `array_except` function. String arguments are treated as column names.
682
+ # @return [Column]
683
+ # @!method arrays_overlap(*cols)
684
+ # The Spark SQL `arrays_overlap` function. String arguments are treated as column names.
685
+ # @return [Column]
686
+ # @!method year(*cols)
687
+ # The Spark SQL `year` function. String arguments are treated as column names.
688
+ # @return [Column]
689
+ # @!method quarter(*cols)
690
+ # The Spark SQL `quarter` function. String arguments are treated as column names.
691
+ # @return [Column]
692
+ # @!method month(*cols)
693
+ # The Spark SQL `month` function. String arguments are treated as column names.
694
+ # @return [Column]
695
+ # @!method dayofmonth(*cols)
696
+ # The Spark SQL `dayofmonth` function. String arguments are treated as column names.
697
+ # @return [Column]
698
+ # @!method day(*cols)
699
+ # The Spark SQL `day` function. String arguments are treated as column names.
700
+ # @return [Column]
701
+ # @!method dayofweek(*cols)
702
+ # The Spark SQL `dayofweek` function. String arguments are treated as column names.
703
+ # @return [Column]
704
+ # @!method dayofyear(*cols)
705
+ # The Spark SQL `dayofyear` function. String arguments are treated as column names.
706
+ # @return [Column]
707
+ # @!method hour(*cols)
708
+ # The Spark SQL `hour` function. String arguments are treated as column names.
709
+ # @return [Column]
710
+ # @!method minute(*cols)
711
+ # The Spark SQL `minute` function. String arguments are treated as column names.
712
+ # @return [Column]
713
+ # @!method second(*cols)
714
+ # The Spark SQL `second` function. String arguments are treated as column names.
715
+ # @return [Column]
716
+ # @!method weekofyear(*cols)
717
+ # The Spark SQL `weekofyear` function. String arguments are treated as column names.
718
+ # @return [Column]
719
+ # @!method last_day(*cols)
720
+ # The Spark SQL `last_day` function. String arguments are treated as column names.
721
+ # @return [Column]
722
+ # @!method weekday(*cols)
723
+ # The Spark SQL `weekday` function. String arguments are treated as column names.
724
+ # @return [Column]
725
+ # @!method unix_date(*cols)
726
+ # The Spark SQL `unix_date` function. String arguments are treated as column names.
727
+ # @return [Column]
728
+ # @!method unix_micros(*cols)
729
+ # The Spark SQL `unix_micros` function. String arguments are treated as column names.
730
+ # @return [Column]
731
+ # @!method unix_millis(*cols)
732
+ # The Spark SQL `unix_millis` function. String arguments are treated as column names.
733
+ # @return [Column]
734
+ # @!method unix_seconds(*cols)
735
+ # The Spark SQL `unix_seconds` function. String arguments are treated as column names.
736
+ # @return [Column]
737
+ # @!method timestamp_seconds(*cols)
738
+ # The Spark SQL `timestamp_seconds` function. String arguments are treated as column names.
739
+ # @return [Column]
740
+ # @!method timestamp_millis(*cols)
741
+ # The Spark SQL `timestamp_millis` function. String arguments are treated as column names.
742
+ # @return [Column]
743
+ # @!method timestamp_micros(*cols)
744
+ # The Spark SQL `timestamp_micros` function. String arguments are treated as column names.
745
+ # @return [Column]
746
+ # @!method date_from_unix_date(*cols)
747
+ # The Spark SQL `date_from_unix_date` function. String arguments are treated as column names.
748
+ # @return [Column]
749
+ # @!method bitwise_not(*cols)
750
+ # The Spark SQL `bitwise_not` function. String arguments are treated as column names.
751
+ # @return [Column]
752
+ # @!method bit_count(*cols)
753
+ # The Spark SQL `bit_count` function. String arguments are treated as column names.
754
+ # @return [Column]
755
+ # @!method typeof(*cols)
756
+ # The Spark SQL `typeof` function. String arguments are treated as column names.
757
+ # @return [Column]
758
+ # @!method current_date
759
+ # The Spark SQL `current_date` function (takes no arguments).
760
+ # @return [Column]
761
+ # @!method current_timestamp
762
+ # The Spark SQL `current_timestamp` function (takes no arguments).
763
+ # @return [Column]
764
+ # @!method now
765
+ # The Spark SQL `now` function (takes no arguments).
766
+ # @return [Column]
767
+ # @!method current_timezone
768
+ # The Spark SQL `current_timezone` function (takes no arguments).
769
+ # @return [Column]
770
+ # @!method current_user
771
+ # The Spark SQL `current_user` function (takes no arguments).
772
+ # @return [Column]
773
+ # @!method current_catalog
774
+ # The Spark SQL `current_catalog` function (takes no arguments).
775
+ # @return [Column]
776
+ # @!method current_database
777
+ # The Spark SQL `current_database` function (takes no arguments).
778
+ # @return [Column]
779
+ # @!method current_schema
780
+ # The Spark SQL `current_schema` function (takes no arguments).
781
+ # @return [Column]
782
+ # @!method monotonically_increasing_id
783
+ # The Spark SQL `monotonically_increasing_id` function (takes no arguments).
784
+ # @return [Column]
785
+ # @!method spark_partition_id
786
+ # The Spark SQL `spark_partition_id` function (takes no arguments).
787
+ # @return [Column]
788
+ # @!method input_file_name
789
+ # The Spark SQL `input_file_name` function (takes no arguments).
790
+ # @return [Column]
791
+ # @!method input_file_block_start
792
+ # The Spark SQL `input_file_block_start` function (takes no arguments).
793
+ # @return [Column]
794
+ # @!method input_file_block_length
795
+ # The Spark SQL `input_file_block_length` function (takes no arguments).
796
+ # @return [Column]
797
+ # @!method version
798
+ # The Spark SQL `version` function (takes no arguments).
799
+ # @return [Column]
800
+ # @!method uuid
801
+ # The Spark SQL `uuid` function (takes no arguments).
802
+ # @return [Column]
803
+ # @!method row_number
804
+ # The Spark SQL `row_number` function (takes no arguments).
805
+ # @return [Column]
806
+ # @!method rank
807
+ # The Spark SQL `rank` function (takes no arguments).
808
+ # @return [Column]
809
+ # @!method dense_rank
810
+ # The Spark SQL `dense_rank` function (takes no arguments).
811
+ # @return [Column]
812
+ # @!method percent_rank
813
+ # The Spark SQL `percent_rank` function (takes no arguments).
814
+ # @return [Column]
815
+ # @!method cume_dist
816
+ # The Spark SQL `cume_dist` function (takes no arguments).
817
+ # @return [Column]
818
+ # ---- Generated uniform functions --------------------------------------
819
+ # Functions whose arguments are all ColumnOrName (a String denotes a column
820
+ # name). Defined programmatically to keep the surface complete and compact.
821
+
822
+ UNIFORM = %w[
823
+ sum avg mean max min first last stddev stddev_samp stddev_pop variance var_samp var_pop
824
+ skewness kurtosis collect_list collect_set first_value last_value max_by min_by corr
825
+ covar_pop covar_samp median mode any_value every some bit_and bit_or bit_xor bool_and bool_or
826
+ product count_if grouping
827
+ abs acos acosh asin asinh atan atanh atan2 bin cbrt ceil ceiling cos cosh cot csc degrees
828
+ exp expm1 factorial floor hypot ln log log2 log10 log1p negative negate positive pow power
829
+ radians rint sec signum sin sinh sqrt tan tanh hex unhex pmod isnan isnull positive
830
+ upper lower ltrim rtrim trim length char_length character_length octet_length bit_length
831
+ reverse ascii base64 unbase64 initcap soundex crc32 md5 sha1 sha ucase lcase
832
+ size cardinality array_distinct array_max array_min array_compact flatten explode explode_outer
833
+ posexplode posexplode_outer inline inline_outer map_keys map_values map_entries map_from_entries
834
+ array_sort shuffle arrays_zip map_concat concat greatest least hash xxhash64
835
+ array_union array_intersect array_except arrays_overlap
836
+ year quarter month dayofmonth day dayofweek dayofyear hour minute second weekofyear last_day
837
+ weekday unix_date unix_micros unix_millis unix_seconds timestamp_seconds timestamp_millis
838
+ timestamp_micros date_from_unix_date
839
+ bitwise_not bit_count typeof
840
+ ].uniq.freeze
841
+
842
+ UNIFORM.each do |fn|
843
+ define_method(fn) { |*cols| Column.invoke(fn, *cols.map { |c| _col(c) }) }
844
+ end
845
+
846
+ # No-argument functions.
847
+ NO_ARG = %w[
848
+ current_date current_timestamp now current_timezone current_user current_catalog
849
+ current_database current_schema monotonically_increasing_id spark_partition_id
850
+ input_file_name input_file_block_start input_file_block_length version uuid
851
+ row_number rank dense_rank percent_rank cume_dist
852
+ ].freeze
853
+
854
+ NO_ARG.each do |fn|
855
+ define_method(fn) { Column.invoke(fn) }
856
+ end
857
+
858
+ # ---- Internal helpers --------------------------------------------------
859
+
860
+ # ColumnOrName coercion: String/Symbol -> column reference, Column -> itself,
861
+ # everything else -> literal.
862
+ # @api private
863
+ def _col(value)
864
+ case value
865
+ when Column then value
866
+ when String, Symbol then col(value.to_s)
867
+ else lit(value)
868
+ end
869
+ end
870
+
871
+ # @api private
872
+ def _lit_or_col(value)
873
+ value.is_a?(Column) ? value : lit(value)
874
+ end
875
+
876
+ @lambda_counter = 0
877
+
878
+ class << self
879
+ # @api private
880
+ attr_accessor :lambda_counter
881
+ end
882
+
883
+ # Build a {Column} wrapping a LambdaFunction from a Ruby block. The block is
884
+ # called with one or more lambda-variable columns and must return a {Column}.
885
+ # @api private
886
+ def _lambda(block)
887
+ arity = block.arity.negative? ? 1 : [block.arity, 1].max
888
+ Functions.lambda_counter += 1
889
+ names = (0...arity).map { |i| "x_#{Functions.lambda_counter}_#{i}" }
890
+ vars = names.map do |n|
891
+ Proto::Expression::UnresolvedNamedLambdaVariable.new(name_parts: [n])
892
+ end
893
+ cols = vars.map { |v| Column.new(Proto::Expression.new(unresolved_named_lambda_variable: v)) }
894
+ body = block.call(*cols)
895
+ Column.new(Proto::Expression.new(
896
+ lambda_function: Proto::Expression::LambdaFunction.new(function: body.to_expr, arguments: vars)
897
+ ))
898
+ end
899
+ end
900
+
901
+ # Short alias for {Functions}: `SparkConnect::F.col("x")`.
902
+ F = Functions
903
+ end