spark-connect 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +82 -0
- data/LICENSE +202 -0
- data/NOTICE +16 -0
- data/README.md +166 -0
- data/lib/spark-connect.rb +5 -0
- data/lib/spark_connect/arrow.rb +115 -0
- data/lib/spark_connect/catalog.rb +190 -0
- data/lib/spark_connect/channel_builder.rb +134 -0
- data/lib/spark_connect/client.rb +264 -0
- data/lib/spark_connect/column.rb +379 -0
- data/lib/spark_connect/conf.rb +79 -0
- data/lib/spark_connect/data_frame.rb +828 -0
- data/lib/spark_connect/errors.rb +58 -0
- data/lib/spark_connect/functions.rb +903 -0
- data/lib/spark_connect/grouped_data.rb +101 -0
- data/lib/spark_connect/na_functions.rb +98 -0
- data/lib/spark_connect/observation.rb +61 -0
- data/lib/spark_connect/pipelines.rb +221 -0
- data/lib/spark_connect/plan.rb +39 -0
- data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
- data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
- data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
- data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
- data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
- data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
- data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
- data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
- data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
- data/lib/spark_connect/proto.rb +32 -0
- data/lib/spark_connect/reader.rb +98 -0
- data/lib/spark_connect/row.rb +105 -0
- data/lib/spark_connect/session.rb +317 -0
- data/lib/spark_connect/stat_functions.rb +109 -0
- data/lib/spark_connect/streaming.rb +351 -0
- data/lib/spark_connect/types.rb +490 -0
- data/lib/spark_connect/version.rb +11 -0
- data/lib/spark_connect/window.rb +119 -0
- data/lib/spark_connect/writer.rb +208 -0
- data/lib/spark_connect.rb +58 -0
- data/proto/spark/connect/base.proto +1275 -0
- data/proto/spark/connect/catalog.proto +243 -0
- data/proto/spark/connect/commands.proto +553 -0
- data/proto/spark/connect/common.proto +179 -0
- data/proto/spark/connect/expressions.proto +557 -0
- data/proto/spark/connect/ml.proto +147 -0
- data/proto/spark/connect/ml_common.proto +64 -0
- data/proto/spark/connect/pipelines.proto +307 -0
- data/proto/spark/connect/relations.proto +1252 -0
- data/proto/spark/connect/types.proto +227 -0
- metadata +149 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bigdecimal"
|
|
4
|
+
require "date"
|
|
5
|
+
|
|
6
|
+
module SparkConnect
|
|
7
|
+
# A column expression: a lazily-evaluated reference to a column or a
|
|
8
|
+
# computation over columns. Columns are immutable; operators and methods
|
|
9
|
+
# return new {Column}s.
|
|
10
|
+
#
|
|
11
|
+
# A {Column} wraps a protobuf `Expression`. Build them with
|
|
12
|
+
# {SparkConnect::Functions.col}, {SparkConnect::Functions.lit}, by indexing a
|
|
13
|
+
# DataFrame (`df["id"]`), or by combining other columns with operators.
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# F = SparkConnect::F
|
|
17
|
+
# (F.col("age") + 1).alias("next_age")
|
|
18
|
+
# F.col("name").like("a%") & (F.col("age") >= 18)
|
|
19
|
+
class Column
|
|
20
|
+
Proto = SparkConnect::Proto
|
|
21
|
+
|
|
22
|
+
# @return [Spark::Connect::Expression] the wrapped protobuf expression.
|
|
23
|
+
attr_reader :expr
|
|
24
|
+
|
|
25
|
+
# @param expr [Spark::Connect::Expression]
|
|
26
|
+
def initialize(expr)
|
|
27
|
+
@expr = expr
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @return [Spark::Connect::Expression]
|
|
31
|
+
def to_expr
|
|
32
|
+
@expr
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
class << self
|
|
36
|
+
# Wrap an existing protobuf expression.
|
|
37
|
+
# @return [Column]
|
|
38
|
+
def from_expr(expr)
|
|
39
|
+
new(expr)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# An unresolved attribute reference by (possibly dotted) name. The special
|
|
43
|
+
# name `"*"` expands to all columns.
|
|
44
|
+
#
|
|
45
|
+
# @param name [String]
|
|
46
|
+
# @return [Column]
|
|
47
|
+
def from_name(name)
|
|
48
|
+
if name == "*"
|
|
49
|
+
new(Proto::Expression.new(unresolved_star: Proto::Expression::UnresolvedStar.new))
|
|
50
|
+
else
|
|
51
|
+
new(Proto::Expression.new(
|
|
52
|
+
unresolved_attribute: Proto::Expression::UnresolvedAttribute.new(unparsed_identifier: name.to_s)
|
|
53
|
+
))
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Build a literal column from a Ruby value.
|
|
58
|
+
#
|
|
59
|
+
# @param value [Object] nil, Boolean, Integer, Float, String, Symbol,
|
|
60
|
+
# Time, Date, BigDecimal, Array, Hash, or an existing {Column}.
|
|
61
|
+
# @return [Column]
|
|
62
|
+
def lit(value)
|
|
63
|
+
return value if value.is_a?(Column)
|
|
64
|
+
|
|
65
|
+
new(Proto::Expression.new(literal: to_literal(value)))
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Build an `UnresolvedFunction` call column.
|
|
69
|
+
#
|
|
70
|
+
# @param name [String] the Spark function name.
|
|
71
|
+
# @param args [Array<Column, Object>] arguments (non-columns become literals).
|
|
72
|
+
# @param is_distinct [Boolean]
|
|
73
|
+
# @return [Column]
|
|
74
|
+
def invoke(name, *args, is_distinct: false)
|
|
75
|
+
new(Proto::Expression.new(
|
|
76
|
+
unresolved_function: Proto::Expression::UnresolvedFunction.new(
|
|
77
|
+
function_name: name.to_s,
|
|
78
|
+
arguments: args.map { |a| to_col(a).to_expr },
|
|
79
|
+
is_distinct: is_distinct
|
|
80
|
+
)
|
|
81
|
+
))
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Coerce a value into a {Column} (literals are wrapped).
|
|
85
|
+
# @return [Column]
|
|
86
|
+
def to_col(value)
|
|
87
|
+
value.is_a?(Column) ? value : lit(value)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Encode a Ruby value as a protobuf `Expression.Literal`.
|
|
91
|
+
#
|
|
92
|
+
# @param value [Object]
|
|
93
|
+
# @return [Spark::Connect::Expression::Literal]
|
|
94
|
+
def to_literal(value)
|
|
95
|
+
l = Proto::Expression::Literal
|
|
96
|
+
case value
|
|
97
|
+
when nil
|
|
98
|
+
l.new(null: Types.null.to_proto)
|
|
99
|
+
when true, false
|
|
100
|
+
l.new(boolean: value)
|
|
101
|
+
when Integer
|
|
102
|
+
if value.between?(-2_147_483_648, 2_147_483_647)
|
|
103
|
+
l.new(integer: value)
|
|
104
|
+
else
|
|
105
|
+
l.new(long: value)
|
|
106
|
+
end
|
|
107
|
+
when Float
|
|
108
|
+
l.new(double: value)
|
|
109
|
+
when BigDecimal
|
|
110
|
+
l.new(decimal: l::Decimal.new(value: value.to_s("F")))
|
|
111
|
+
when Rational
|
|
112
|
+
l.new(double: value.to_f)
|
|
113
|
+
when String
|
|
114
|
+
if value.encoding == Encoding::ASCII_8BIT
|
|
115
|
+
l.new(binary: value)
|
|
116
|
+
else
|
|
117
|
+
l.new(string: value)
|
|
118
|
+
end
|
|
119
|
+
when Symbol
|
|
120
|
+
l.new(string: value.to_s)
|
|
121
|
+
when Time
|
|
122
|
+
l.new(timestamp: (value.to_r * 1_000_000).to_i)
|
|
123
|
+
when DateTime
|
|
124
|
+
l.new(timestamp: (value.to_time.to_r * 1_000_000).to_i)
|
|
125
|
+
when Date
|
|
126
|
+
l.new(date: (value - Date.new(1970, 1, 1)).to_i)
|
|
127
|
+
when Array
|
|
128
|
+
elem_type = infer_array_element_type(value)
|
|
129
|
+
l.new(array: l::Array.new(
|
|
130
|
+
element_type: elem_type.to_proto,
|
|
131
|
+
elements: value.map { |v| to_literal(v) }
|
|
132
|
+
))
|
|
133
|
+
when Hash
|
|
134
|
+
key_type = value.empty? ? Types.string : infer_type(value.keys.first)
|
|
135
|
+
val_type = value.empty? ? Types.string : infer_type(value.values.first)
|
|
136
|
+
l.new(map: l::Map.new(
|
|
137
|
+
key_type: key_type.to_proto,
|
|
138
|
+
value_type: val_type.to_proto,
|
|
139
|
+
keys: value.keys.map { |k| to_literal(k) },
|
|
140
|
+
values: value.values.map { |v| to_literal(v) }
|
|
141
|
+
))
|
|
142
|
+
else
|
|
143
|
+
raise IllegalArgumentError, "Unsupported literal value of type #{value.class}: #{value.inspect}"
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Infer the Spark {Types::DataType} for a Ruby value (used when building
|
|
148
|
+
# array/map literals). Mirrors PySpark's literal type inference.
|
|
149
|
+
#
|
|
150
|
+
# @param value [Object]
|
|
151
|
+
# @return [Types::DataType]
|
|
152
|
+
def infer_type(value)
|
|
153
|
+
case value
|
|
154
|
+
when nil then Types.null
|
|
155
|
+
when true, false then Types.boolean
|
|
156
|
+
when Integer
|
|
157
|
+
value.between?(-2_147_483_648, 2_147_483_647) ? Types.integer : Types.long
|
|
158
|
+
when Float, Rational then Types.double
|
|
159
|
+
when BigDecimal then Types.decimal(38, 18)
|
|
160
|
+
when String then value.encoding == Encoding::ASCII_8BIT ? Types.binary : Types.string
|
|
161
|
+
when Symbol then Types.string
|
|
162
|
+
when Time, DateTime then Types.timestamp
|
|
163
|
+
when Date then Types.date
|
|
164
|
+
when Array then Types.array(value.empty? ? Types.null : infer_type(value.find { |v| !v.nil? }))
|
|
165
|
+
when Hash
|
|
166
|
+
Types.map(value.empty? ? Types.string : infer_type(value.keys.first),
|
|
167
|
+
value.empty? ? Types.string : infer_type(value.values.first))
|
|
168
|
+
else
|
|
169
|
+
raise IllegalArgumentError, "Cannot infer Spark type for #{value.class}"
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
private
|
|
174
|
+
|
|
175
|
+
def infer_array_element_type(array)
|
|
176
|
+
sample = array.find { |v| !v.nil? }
|
|
177
|
+
sample.nil? ? Types.null : infer_type(sample)
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# ---- Arithmetic --------------------------------------------------------
|
|
182
|
+
def +(other) = bin_op("+", other)
|
|
183
|
+
def -(other) = bin_op("-", other)
|
|
184
|
+
def *(other) = bin_op("*", other)
|
|
185
|
+
def /(other) = bin_op("/", other)
|
|
186
|
+
def %(other) = bin_op("%", other)
|
|
187
|
+
def -@ = Column.invoke("negative", self)
|
|
188
|
+
def +@ = self
|
|
189
|
+
|
|
190
|
+
# Raise this column to the power of `other`.
|
|
191
|
+
# @return [Column]
|
|
192
|
+
def **(other) = bin_op("power", other)
|
|
193
|
+
|
|
194
|
+
# ---- Comparison --------------------------------------------------------
|
|
195
|
+
def ==(other) = bin_op("==", other)
|
|
196
|
+
def !=(other) = bin_op("!=", other)
|
|
197
|
+
def <(other) = bin_op("<", other)
|
|
198
|
+
def <=(other) = bin_op("<=", other)
|
|
199
|
+
def >(other) = bin_op(">", other)
|
|
200
|
+
def >=(other) = bin_op(">=", other)
|
|
201
|
+
|
|
202
|
+
# Null-safe equality (`<=>` in Spark SQL): `null <=> null` is true.
|
|
203
|
+
# @return [Column]
|
|
204
|
+
def eq_null_safe(other) = bin_op("<=>", other)
|
|
205
|
+
|
|
206
|
+
# ---- Boolean -----------------------------------------------------------
|
|
207
|
+
def &(other) = bin_op("and", other)
|
|
208
|
+
def |(other) = bin_op("or", other)
|
|
209
|
+
|
|
210
|
+
def !
|
|
211
|
+
Column.invoke("not", self)
|
|
212
|
+
end
|
|
213
|
+
alias not !
|
|
214
|
+
|
|
215
|
+
# ---- Bitwise -----------------------------------------------------------
|
|
216
|
+
def bitwise_and(other) = bin_op("&", other)
|
|
217
|
+
def bitwise_or(other) = bin_op("|", other)
|
|
218
|
+
def bitwise_xor(other) = bin_op("^", other)
|
|
219
|
+
|
|
220
|
+
# ---- Null / membership predicates -------------------------------------
|
|
221
|
+
def is_null = Column.invoke("isNull", self)
|
|
222
|
+
def is_not_null = Column.invoke("isNotNull", self)
|
|
223
|
+
def is_nan = Column.invoke("isNaN", self)
|
|
224
|
+
alias isNull is_null
|
|
225
|
+
alias isNotNull is_not_null
|
|
226
|
+
|
|
227
|
+
# True if the column's value is in `values`.
|
|
228
|
+
# @return [Column]
|
|
229
|
+
def isin(*values)
|
|
230
|
+
values = values.first if values.size == 1 && values.first.is_a?(Array)
|
|
231
|
+
Column.invoke("in", self, *Array(values))
|
|
232
|
+
end
|
|
233
|
+
alias in_list isin
|
|
234
|
+
|
|
235
|
+
# True if `lower <= self <= upper`.
|
|
236
|
+
# @return [Column]
|
|
237
|
+
def between(lower, upper)
|
|
238
|
+
(self >= lower) & (self <= upper)
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# ---- String predicates -------------------------------------------------
|
|
242
|
+
def like(pattern) = bin_op("like", pattern)
|
|
243
|
+
def rlike(pattern) = bin_op("rlike", pattern)
|
|
244
|
+
def ilike(pattern) = bin_op("ilike", pattern)
|
|
245
|
+
def contains(other) = bin_op("contains", other)
|
|
246
|
+
def startswith(other) = bin_op("startswith", other)
|
|
247
|
+
def endswith(other) = bin_op("endswith", other)
|
|
248
|
+
|
|
249
|
+
# Substring of length `len` starting at 1-based position `start`.
|
|
250
|
+
# @return [Column]
|
|
251
|
+
def substr(start, len)
|
|
252
|
+
Column.invoke("substr", self, start, len)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# ---- Complex-type access ----------------------------------------------
|
|
256
|
+
# Extract an array element by index, a map value by key, or a struct field.
|
|
257
|
+
# @return [Column]
|
|
258
|
+
def [](key)
|
|
259
|
+
get_item(key)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def get_item(key)
|
|
263
|
+
Column.new(Proto::Expression.new(
|
|
264
|
+
unresolved_extract_value: Proto::Expression::UnresolvedExtractValue.new(
|
|
265
|
+
child: @expr, extraction: Column.lit(key).to_expr
|
|
266
|
+
)
|
|
267
|
+
))
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Extract a struct field by name.
|
|
271
|
+
# @return [Column]
|
|
272
|
+
def get_field(name)
|
|
273
|
+
get_item(name.to_s)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# ---- Aliasing / naming -------------------------------------------------
|
|
277
|
+
# Assign one or more output names. With multiple names the expression must
|
|
278
|
+
# produce a struct/multiple columns (e.g. `inline`).
|
|
279
|
+
#
|
|
280
|
+
# @param names [Array<String>]
|
|
281
|
+
# @param metadata [Hash, nil] optional JSON metadata for a single alias.
|
|
282
|
+
# @return [Column]
|
|
283
|
+
def alias(*names, metadata: nil)
|
|
284
|
+
a = Proto::Expression::Alias.new(expr: @expr, name: names.map(&:to_s))
|
|
285
|
+
a.metadata = JSON.generate(metadata) if metadata
|
|
286
|
+
Column.new(Proto::Expression.new(alias: a))
|
|
287
|
+
end
|
|
288
|
+
alias name alias
|
|
289
|
+
alias as alias
|
|
290
|
+
|
|
291
|
+
# ---- Casting -----------------------------------------------------------
|
|
292
|
+
# Cast to another type, given either a {Types::DataType} or a DDL type
|
|
293
|
+
# string (e.g. `"int"`, `"decimal(10,2)"`).
|
|
294
|
+
#
|
|
295
|
+
# @param data_type [Types::DataType, String]
|
|
296
|
+
# @return [Column]
|
|
297
|
+
def cast(data_type)
|
|
298
|
+
c = Proto::Expression::Cast.new(expr: @expr)
|
|
299
|
+
if data_type.is_a?(String)
|
|
300
|
+
c.type_str = data_type
|
|
301
|
+
else
|
|
302
|
+
c.type = data_type.to_proto
|
|
303
|
+
end
|
|
304
|
+
Column.new(Proto::Expression.new(cast: c))
|
|
305
|
+
end
|
|
306
|
+
alias as_type cast
|
|
307
|
+
alias astype cast
|
|
308
|
+
|
|
309
|
+
# ---- Sort ordering -----------------------------------------------------
|
|
310
|
+
def asc = sort_order(:SORT_DIRECTION_ASCENDING, :SORT_NULLS_FIRST)
|
|
311
|
+
def desc = sort_order(:SORT_DIRECTION_DESCENDING, :SORT_NULLS_LAST)
|
|
312
|
+
def asc_nulls_first = sort_order(:SORT_DIRECTION_ASCENDING, :SORT_NULLS_FIRST)
|
|
313
|
+
def asc_nulls_last = sort_order(:SORT_DIRECTION_ASCENDING, :SORT_NULLS_LAST)
|
|
314
|
+
def desc_nulls_first = sort_order(:SORT_DIRECTION_DESCENDING, :SORT_NULLS_FIRST)
|
|
315
|
+
def desc_nulls_last = sort_order(:SORT_DIRECTION_DESCENDING, :SORT_NULLS_LAST)
|
|
316
|
+
|
|
317
|
+
# ---- CASE WHEN ---------------------------------------------------------
|
|
318
|
+
# Add a branch to a CASE expression started by {Functions.when}.
|
|
319
|
+
#
|
|
320
|
+
# @return [Column]
|
|
321
|
+
def when(condition, value)
|
|
322
|
+
unless @expr.expr_type == :unresolved_function && @expr.unresolved_function.function_name == "when"
|
|
323
|
+
raise IllegalArgumentError, "when() can only be applied on a Column previously generated by when()"
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
args = @expr.unresolved_function.arguments.to_a + [Column.to_col(condition).to_expr, Column.to_col(value).to_expr]
|
|
327
|
+
Column.new(Proto::Expression.new(
|
|
328
|
+
unresolved_function: Proto::Expression::UnresolvedFunction.new(function_name: "when", arguments: args)
|
|
329
|
+
))
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
# Provide the default (ELSE) value for a CASE expression.
|
|
333
|
+
# @return [Column]
|
|
334
|
+
def otherwise(value)
|
|
335
|
+
unless @expr.expr_type == :unresolved_function && @expr.unresolved_function.function_name == "when"
|
|
336
|
+
raise IllegalArgumentError, "otherwise() can only be applied on a Column previously generated by when()"
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
args = @expr.unresolved_function.arguments.to_a + [Column.to_col(value).to_expr]
|
|
340
|
+
Column.new(Proto::Expression.new(
|
|
341
|
+
unresolved_function: Proto::Expression::UnresolvedFunction.new(function_name: "when", arguments: args)
|
|
342
|
+
))
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# ---- Windowing ---------------------------------------------------------
|
|
346
|
+
# Define a windowed aggregation / analytic computation over this column.
|
|
347
|
+
#
|
|
348
|
+
# @param window [WindowSpec]
|
|
349
|
+
# @return [Column]
|
|
350
|
+
def over(window)
|
|
351
|
+
w = Proto::Expression::Window.new(
|
|
352
|
+
window_function: @expr,
|
|
353
|
+
partition_spec: window.partition_spec,
|
|
354
|
+
order_spec: window.order_spec
|
|
355
|
+
)
|
|
356
|
+
w.frame_spec = window.frame_spec if window.frame_spec
|
|
357
|
+
Column.new(Proto::Expression.new(window: w))
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
def to_s
|
|
361
|
+
"Column<#{@expr.expr_type}>"
|
|
362
|
+
end
|
|
363
|
+
alias inspect to_s
|
|
364
|
+
|
|
365
|
+
private
|
|
366
|
+
|
|
367
|
+
def bin_op(name, other)
|
|
368
|
+
Column.invoke(name, self, Column.to_col(other))
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def sort_order(direction, null_ordering)
|
|
372
|
+
Column.new(Proto::Expression.new(
|
|
373
|
+
sort_order: Proto::Expression::SortOrder.new(
|
|
374
|
+
child: @expr, direction: direction, null_ordering: null_ordering
|
|
375
|
+
)
|
|
376
|
+
))
|
|
377
|
+
end
|
|
378
|
+
end
|
|
379
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SparkConnect
|
|
4
|
+
# Runtime configuration interface, returned by {SparkSession#conf}. Mirrors
|
|
5
|
+
# PySpark's `spark.conf`.
|
|
6
|
+
#
|
|
7
|
+
# @example
|
|
8
|
+
# spark.conf.set("spark.sql.shuffle.partitions", "8")
|
|
9
|
+
# spark.conf.get("spark.sql.shuffle.partitions") #=> "8"
|
|
10
|
+
class RuntimeConfig
|
|
11
|
+
Proto = SparkConnect::Proto
|
|
12
|
+
Op = Proto::ConfigRequest::Operation
|
|
13
|
+
CR = Proto::ConfigRequest
|
|
14
|
+
|
|
15
|
+
# @param client [SparkConnectClient]
|
|
16
|
+
def initialize(client)
|
|
17
|
+
@client = client
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Set a configuration property.
|
|
21
|
+
#
|
|
22
|
+
# @param key [String]
|
|
23
|
+
# @param value [String, Integer, Boolean]
|
|
24
|
+
# @return [void]
|
|
25
|
+
def set(key, value)
|
|
26
|
+
op = Op.new(set: CR::Set.new(pairs: [Proto::KeyValue.new(key: key.to_s, value: value.to_s)]))
|
|
27
|
+
@client.config(op)
|
|
28
|
+
nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Get the value of a configuration property.
|
|
32
|
+
#
|
|
33
|
+
# @param key [String]
|
|
34
|
+
# @param default [String, nil] returned when the key is unset (when given).
|
|
35
|
+
# @return [String, nil]
|
|
36
|
+
def get(key, default = :__unset__)
|
|
37
|
+
op =
|
|
38
|
+
if default == :__unset__
|
|
39
|
+
Op.new(get: CR::Get.new(keys: [key.to_s]))
|
|
40
|
+
else
|
|
41
|
+
Op.new(get_with_default: CR::GetWithDefault.new(
|
|
42
|
+
pairs: [Proto::KeyValue.new(key: key.to_s, value: default)]
|
|
43
|
+
))
|
|
44
|
+
end
|
|
45
|
+
resp = @client.config(op)
|
|
46
|
+
pair = resp.pairs.first
|
|
47
|
+
pair&.value
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Unset a configuration property.
|
|
51
|
+
#
|
|
52
|
+
# @param key [String]
|
|
53
|
+
# @return [void]
|
|
54
|
+
def unset(key)
|
|
55
|
+
@client.config(Op.new(unset: CR::Unset.new(keys: [key.to_s])))
|
|
56
|
+
nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# All configuration properties (optionally filtered by `prefix`).
|
|
60
|
+
#
|
|
61
|
+
# @param prefix [String, nil]
|
|
62
|
+
# @return [Hash{String=>String}]
|
|
63
|
+
def get_all(prefix = nil)
|
|
64
|
+
ga = CR::GetAll.new
|
|
65
|
+
ga.prefix = prefix if prefix
|
|
66
|
+
resp = @client.config(Op.new(get_all: ga))
|
|
67
|
+
resp.pairs.to_h { |p| [p.key, p.value] }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Whether a configuration property is modifiable in the current session.
|
|
71
|
+
#
|
|
72
|
+
# @param key [String]
|
|
73
|
+
# @return [Boolean]
|
|
74
|
+
def modifiable?(key)
|
|
75
|
+
resp = @client.config(Op.new(is_modifiable: CR::IsModifiable.new(keys: [key.to_s])))
|
|
76
|
+
resp.pairs.first&.value == "true"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|