spark-connect 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +82 -0
- data/LICENSE +202 -0
- data/NOTICE +16 -0
- data/README.md +166 -0
- data/lib/spark-connect.rb +5 -0
- data/lib/spark_connect/arrow.rb +115 -0
- data/lib/spark_connect/catalog.rb +190 -0
- data/lib/spark_connect/channel_builder.rb +134 -0
- data/lib/spark_connect/client.rb +264 -0
- data/lib/spark_connect/column.rb +379 -0
- data/lib/spark_connect/conf.rb +79 -0
- data/lib/spark_connect/data_frame.rb +828 -0
- data/lib/spark_connect/errors.rb +58 -0
- data/lib/spark_connect/functions.rb +903 -0
- data/lib/spark_connect/grouped_data.rb +101 -0
- data/lib/spark_connect/na_functions.rb +98 -0
- data/lib/spark_connect/observation.rb +61 -0
- data/lib/spark_connect/pipelines.rb +221 -0
- data/lib/spark_connect/plan.rb +39 -0
- data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
- data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
- data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
- data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
- data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
- data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
- data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
- data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
- data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
- data/lib/spark_connect/proto.rb +32 -0
- data/lib/spark_connect/reader.rb +98 -0
- data/lib/spark_connect/row.rb +105 -0
- data/lib/spark_connect/session.rb +317 -0
- data/lib/spark_connect/stat_functions.rb +109 -0
- data/lib/spark_connect/streaming.rb +351 -0
- data/lib/spark_connect/types.rb +490 -0
- data/lib/spark_connect/version.rb +11 -0
- data/lib/spark_connect/window.rb +119 -0
- data/lib/spark_connect/writer.rb +208 -0
- data/lib/spark_connect.rb +58 -0
- data/proto/spark/connect/base.proto +1275 -0
- data/proto/spark/connect/catalog.proto +243 -0
- data/proto/spark/connect/commands.proto +553 -0
- data/proto/spark/connect/common.proto +179 -0
- data/proto/spark/connect/expressions.proto +557 -0
- data/proto/spark/connect/ml.proto +147 -0
- data/proto/spark/connect/ml_common.proto +64 -0
- data/proto/spark/connect/pipelines.proto +307 -0
- data/proto/spark/connect/relations.proto +1252 -0
- data/proto/spark/connect/types.proto +227 -0
- metadata +149 -0
|
@@ -0,0 +1,553 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
3
|
+
* contributor license agreements. See the NOTICE file distributed with
|
|
4
|
+
* this work for additional information regarding copyright ownership.
|
|
5
|
+
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
6
|
+
* (the "License"); you may not use this file except in compliance with
|
|
7
|
+
* the License. You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
syntax = 'proto3';
|
|
19
|
+
|
|
20
|
+
import "google/protobuf/any.proto";
|
|
21
|
+
import "spark/connect/common.proto";
|
|
22
|
+
import "spark/connect/expressions.proto";
|
|
23
|
+
import "spark/connect/relations.proto";
|
|
24
|
+
import "spark/connect/ml.proto";
|
|
25
|
+
import "spark/connect/pipelines.proto";
|
|
26
|
+
|
|
27
|
+
package spark.connect;
|
|
28
|
+
|
|
29
|
+
option java_multiple_files = true;
|
|
30
|
+
option java_package = "org.apache.spark.connect.proto";
|
|
31
|
+
option go_package = "internal/generated";
|
|
32
|
+
|
|
33
|
+
// A [[Command]] is an operation that is executed by the server that does not directly consume or
|
|
34
|
+
// produce a relational result.
|
|
35
|
+
message Command {
|
|
36
|
+
oneof command_type {
|
|
37
|
+
CommonInlineUserDefinedFunction register_function = 1;
|
|
38
|
+
WriteOperation write_operation = 2;
|
|
39
|
+
CreateDataFrameViewCommand create_dataframe_view = 3;
|
|
40
|
+
WriteOperationV2 write_operation_v2 = 4;
|
|
41
|
+
SqlCommand sql_command = 5;
|
|
42
|
+
WriteStreamOperationStart write_stream_operation_start = 6;
|
|
43
|
+
StreamingQueryCommand streaming_query_command = 7;
|
|
44
|
+
GetResourcesCommand get_resources_command = 8;
|
|
45
|
+
StreamingQueryManagerCommand streaming_query_manager_command = 9;
|
|
46
|
+
CommonInlineUserDefinedTableFunction register_table_function = 10;
|
|
47
|
+
StreamingQueryListenerBusCommand streaming_query_listener_bus_command = 11;
|
|
48
|
+
CommonInlineUserDefinedDataSource register_data_source = 12;
|
|
49
|
+
CreateResourceProfileCommand create_resource_profile_command = 13;
|
|
50
|
+
CheckpointCommand checkpoint_command = 14;
|
|
51
|
+
RemoveCachedRemoteRelationCommand remove_cached_remote_relation_command = 15;
|
|
52
|
+
MergeIntoTableCommand merge_into_table_command = 16;
|
|
53
|
+
MlCommand ml_command = 17;
|
|
54
|
+
ExecuteExternalCommand execute_external_command = 18;
|
|
55
|
+
PipelineCommand pipeline_command = 19;
|
|
56
|
+
|
|
57
|
+
// This field is used to mark extensions to the protocol. When plugins generate arbitrary
|
|
58
|
+
// Commands they can add them here. During the planning the correct resolution is done.
|
|
59
|
+
google.protobuf.Any extension = 999;
|
|
60
|
+
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// A SQL Command is used to trigger the eager evaluation of SQL commands in Spark.
|
|
65
|
+
//
|
|
66
|
+
// When the SQL provide as part of the message is a command it will be immediately evaluated
|
|
67
|
+
// and the result will be collected and returned as part of a LocalRelation. If the result is
|
|
68
|
+
// not a command, the operation will simply return a SQL Relation. This allows the client to be
|
|
69
|
+
// almost oblivious to the server-side behavior.
|
|
70
|
+
message SqlCommand {
|
|
71
|
+
// (Required) SQL Query.
|
|
72
|
+
string sql = 1 [deprecated=true];
|
|
73
|
+
|
|
74
|
+
// (Optional) A map of parameter names to literal expressions.
|
|
75
|
+
map<string, Expression.Literal> args = 2 [deprecated=true];
|
|
76
|
+
|
|
77
|
+
// (Optional) A sequence of literal expressions for positional parameters in the SQL query text.
|
|
78
|
+
repeated Expression.Literal pos_args = 3 [deprecated=true];
|
|
79
|
+
|
|
80
|
+
// (Optional) A map of parameter names to expressions.
|
|
81
|
+
// It cannot coexist with `pos_arguments`.
|
|
82
|
+
map<string, Expression> named_arguments = 4 [deprecated=true];
|
|
83
|
+
|
|
84
|
+
// (Optional) A sequence of expressions for positional parameters in the SQL query text.
|
|
85
|
+
// It cannot coexist with `named_arguments`.
|
|
86
|
+
repeated Expression pos_arguments = 5 [deprecated=true];
|
|
87
|
+
|
|
88
|
+
// (Optional) The relation that this SQL command will be built on.
|
|
89
|
+
Relation input = 6;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// A command that can create DataFrame global temp view or local temp view.
|
|
93
|
+
message CreateDataFrameViewCommand {
|
|
94
|
+
// (Required) The relation that this view will be built on.
|
|
95
|
+
Relation input = 1;
|
|
96
|
+
|
|
97
|
+
// (Required) View name.
|
|
98
|
+
string name = 2;
|
|
99
|
+
|
|
100
|
+
// (Required) Whether this is global temp view or local temp view.
|
|
101
|
+
bool is_global = 3;
|
|
102
|
+
|
|
103
|
+
// (Required)
|
|
104
|
+
//
|
|
105
|
+
// If true, and if the view already exists, updates it; if false, and if the view
|
|
106
|
+
// already exists, throws exception.
|
|
107
|
+
bool replace = 4;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// As writes are not directly handled during analysis and planning, they are modeled as commands.
|
|
111
|
+
message WriteOperation {
|
|
112
|
+
// (Required) The output of the `input` relation will be persisted according to the options.
|
|
113
|
+
Relation input = 1;
|
|
114
|
+
|
|
115
|
+
// (Optional) Format value according to the Spark documentation. Examples are: text, parquet, delta.
|
|
116
|
+
optional string source = 2;
|
|
117
|
+
|
|
118
|
+
// (Optional)
|
|
119
|
+
//
|
|
120
|
+
// The destination of the write operation can be either a path or a table.
|
|
121
|
+
// If the destination is neither a path nor a table, such as jdbc and noop,
|
|
122
|
+
// the `save_type` should not be set.
|
|
123
|
+
oneof save_type {
|
|
124
|
+
string path = 3;
|
|
125
|
+
SaveTable table = 4;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// (Required) the save mode.
|
|
129
|
+
SaveMode mode = 5;
|
|
130
|
+
|
|
131
|
+
// (Optional) List of columns to sort the output by.
|
|
132
|
+
repeated string sort_column_names = 6;
|
|
133
|
+
|
|
134
|
+
// (Optional) List of columns for partitioning.
|
|
135
|
+
repeated string partitioning_columns = 7;
|
|
136
|
+
|
|
137
|
+
// (Optional) Bucketing specification. Bucketing must set the number of buckets and the columns
|
|
138
|
+
// to bucket by.
|
|
139
|
+
BucketBy bucket_by = 8;
|
|
140
|
+
|
|
141
|
+
// (Optional) A list of configuration options.
|
|
142
|
+
map<string, string> options = 9;
|
|
143
|
+
|
|
144
|
+
// (Optional) Columns used for clustering the table.
|
|
145
|
+
repeated string clustering_columns = 10;
|
|
146
|
+
|
|
147
|
+
message SaveTable {
|
|
148
|
+
// (Required) The table name.
|
|
149
|
+
string table_name = 1;
|
|
150
|
+
// (Required) The method to be called to write to the table.
|
|
151
|
+
TableSaveMethod save_method = 2;
|
|
152
|
+
|
|
153
|
+
enum TableSaveMethod {
|
|
154
|
+
TABLE_SAVE_METHOD_UNSPECIFIED = 0;
|
|
155
|
+
TABLE_SAVE_METHOD_SAVE_AS_TABLE = 1;
|
|
156
|
+
TABLE_SAVE_METHOD_INSERT_INTO = 2;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
message BucketBy {
|
|
161
|
+
repeated string bucket_column_names = 1;
|
|
162
|
+
int32 num_buckets = 2;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
enum SaveMode {
|
|
166
|
+
SAVE_MODE_UNSPECIFIED = 0;
|
|
167
|
+
SAVE_MODE_APPEND = 1;
|
|
168
|
+
SAVE_MODE_OVERWRITE = 2;
|
|
169
|
+
SAVE_MODE_ERROR_IF_EXISTS = 3;
|
|
170
|
+
SAVE_MODE_IGNORE = 4;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// As writes are not directly handled during analysis and planning, they are modeled as commands.
|
|
175
|
+
message WriteOperationV2 {
|
|
176
|
+
// (Required) The output of the `input` relation will be persisted according to the options.
|
|
177
|
+
Relation input = 1;
|
|
178
|
+
|
|
179
|
+
// (Required) The destination of the write operation must be either a path or a table.
|
|
180
|
+
string table_name = 2;
|
|
181
|
+
|
|
182
|
+
// (Optional) A provider for the underlying output data source. Spark's default catalog supports
|
|
183
|
+
// "parquet", "json", etc.
|
|
184
|
+
optional string provider = 3;
|
|
185
|
+
|
|
186
|
+
// (Optional) List of columns for partitioning for output table created by `create`,
|
|
187
|
+
// `createOrReplace`, or `replace`
|
|
188
|
+
repeated Expression partitioning_columns = 4;
|
|
189
|
+
|
|
190
|
+
// (Optional) A list of configuration options.
|
|
191
|
+
map<string, string> options = 5;
|
|
192
|
+
|
|
193
|
+
// (Optional) A list of table properties.
|
|
194
|
+
map<string, string> table_properties = 6;
|
|
195
|
+
|
|
196
|
+
// (Required) Write mode.
|
|
197
|
+
Mode mode = 7;
|
|
198
|
+
|
|
199
|
+
enum Mode {
|
|
200
|
+
MODE_UNSPECIFIED = 0;
|
|
201
|
+
MODE_CREATE = 1;
|
|
202
|
+
MODE_OVERWRITE = 2;
|
|
203
|
+
MODE_OVERWRITE_PARTITIONS = 3;
|
|
204
|
+
MODE_APPEND = 4;
|
|
205
|
+
MODE_REPLACE = 5;
|
|
206
|
+
MODE_CREATE_OR_REPLACE = 6;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// (Optional) A condition for overwrite saving mode
|
|
210
|
+
Expression overwrite_condition = 8;
|
|
211
|
+
|
|
212
|
+
// (Optional) Columns used for clustering the table.
|
|
213
|
+
repeated string clustering_columns = 9;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Starts write stream operation as streaming query. Query ID and Run ID of the streaming
|
|
217
|
+
// query are returned.
|
|
218
|
+
message WriteStreamOperationStart {
|
|
219
|
+
|
|
220
|
+
// (Required) The output of the `input` streaming relation will be written.
|
|
221
|
+
Relation input = 1;
|
|
222
|
+
|
|
223
|
+
// The following fields directly map to API for DataStreamWriter().
|
|
224
|
+
// Consult API documentation unless explicitly documented here.
|
|
225
|
+
|
|
226
|
+
string format = 2;
|
|
227
|
+
map<string, string> options = 3;
|
|
228
|
+
repeated string partitioning_column_names = 4;
|
|
229
|
+
|
|
230
|
+
oneof trigger {
|
|
231
|
+
string processing_time_interval = 5;
|
|
232
|
+
bool available_now = 6;
|
|
233
|
+
bool once = 7;
|
|
234
|
+
string continuous_checkpoint_interval = 8;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
string output_mode = 9;
|
|
238
|
+
string query_name = 10;
|
|
239
|
+
|
|
240
|
+
// The destination is optional. When set, it can be a path or a table name.
|
|
241
|
+
oneof sink_destination {
|
|
242
|
+
string path = 11;
|
|
243
|
+
string table_name = 12;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
StreamingForeachFunction foreach_writer = 13;
|
|
247
|
+
StreamingForeachFunction foreach_batch = 14;
|
|
248
|
+
|
|
249
|
+
// (Optional) Columns used for clustering the table.
|
|
250
|
+
repeated string clustering_column_names = 15;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
message StreamingForeachFunction {
|
|
254
|
+
oneof function {
|
|
255
|
+
PythonUDF python_function = 1;
|
|
256
|
+
ScalarScalaUDF scala_function = 2;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
message WriteStreamOperationStartResult {
|
|
261
|
+
|
|
262
|
+
// (Required) Query instance. See `StreamingQueryInstanceId`.
|
|
263
|
+
StreamingQueryInstanceId query_id = 1;
|
|
264
|
+
|
|
265
|
+
// An optional query name.
|
|
266
|
+
string name = 2;
|
|
267
|
+
|
|
268
|
+
// Optional query started event if there is any listener registered on the client side.
|
|
269
|
+
optional string query_started_event_json = 3;
|
|
270
|
+
|
|
271
|
+
// TODO: How do we indicate errors?
|
|
272
|
+
// TODO: Consider adding status, last progress etc here.
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// A tuple that uniquely identifies an instance of streaming query run. It consists of `id` that
|
|
276
|
+
// persists across the streaming runs and `run_id` that changes between each run of the
|
|
277
|
+
// streaming query that resumes from the checkpoint.
|
|
278
|
+
message StreamingQueryInstanceId {
|
|
279
|
+
|
|
280
|
+
// (Required) The unique id of this query that persists across restarts from checkpoint data.
|
|
281
|
+
// That is, this id is generated when a query is started for the first time, and
|
|
282
|
+
// will be the same every time it is restarted from checkpoint data.
|
|
283
|
+
string id = 1;
|
|
284
|
+
|
|
285
|
+
// (Required) The unique id of this run of the query. That is, every start/restart of a query
|
|
286
|
+
// will generate a unique run_id. Therefore, every time a query is restarted from
|
|
287
|
+
// checkpoint, it will have the same `id` but different `run_id`s.
|
|
288
|
+
string run_id = 2;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Commands for a streaming query.
|
|
292
|
+
message StreamingQueryCommand {
|
|
293
|
+
|
|
294
|
+
// (Required) Query instance. See `StreamingQueryInstanceId`.
|
|
295
|
+
StreamingQueryInstanceId query_id = 1;
|
|
296
|
+
|
|
297
|
+
// See documentation for the corresponding API method in StreamingQuery.
|
|
298
|
+
oneof command {
|
|
299
|
+
// status() API.
|
|
300
|
+
bool status = 2;
|
|
301
|
+
// lastProgress() API.
|
|
302
|
+
bool last_progress = 3;
|
|
303
|
+
// recentProgress() API.
|
|
304
|
+
bool recent_progress = 4;
|
|
305
|
+
// stop() API. Stops the query.
|
|
306
|
+
bool stop = 5;
|
|
307
|
+
// processAllAvailable() API. Waits till all the available data is processed
|
|
308
|
+
bool process_all_available = 6;
|
|
309
|
+
// explain() API. Returns logical and physical plans.
|
|
310
|
+
ExplainCommand explain = 7;
|
|
311
|
+
// exception() API. Returns the exception in the query if any.
|
|
312
|
+
bool exception = 8;
|
|
313
|
+
// awaitTermination() API. Waits for the termination of the query.
|
|
314
|
+
AwaitTerminationCommand await_termination = 9;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
message ExplainCommand {
|
|
318
|
+
// TODO: Consider reusing Explain from AnalyzePlanRequest message.
|
|
319
|
+
// We can not do this right now since it base.proto imports this file.
|
|
320
|
+
bool extended = 1;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
message AwaitTerminationCommand {
|
|
324
|
+
optional int64 timeout_ms = 2;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Response for commands on a streaming query.
|
|
329
|
+
message StreamingQueryCommandResult {
|
|
330
|
+
// (Required) Query instance id. See `StreamingQueryInstanceId`.
|
|
331
|
+
StreamingQueryInstanceId query_id = 1;
|
|
332
|
+
|
|
333
|
+
oneof result_type {
|
|
334
|
+
StatusResult status = 2;
|
|
335
|
+
RecentProgressResult recent_progress = 3;
|
|
336
|
+
ExplainResult explain = 4;
|
|
337
|
+
ExceptionResult exception = 5;
|
|
338
|
+
AwaitTerminationResult await_termination = 6;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
message StatusResult {
|
|
342
|
+
// See documentation for these Scala 'StreamingQueryStatus' struct
|
|
343
|
+
string status_message = 1;
|
|
344
|
+
bool is_data_available = 2;
|
|
345
|
+
bool is_trigger_active = 3;
|
|
346
|
+
bool is_active = 4;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
message RecentProgressResult {
|
|
350
|
+
// Progress reports as an array of json strings.
|
|
351
|
+
repeated string recent_progress_json = 5;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
message ExplainResult {
|
|
355
|
+
// Logical and physical plans as string
|
|
356
|
+
string result = 1;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
message ExceptionResult {
|
|
360
|
+
// (Optional) Exception message as string, maps to the return value of original
|
|
361
|
+
// StreamingQueryException's toString method
|
|
362
|
+
optional string exception_message = 1;
|
|
363
|
+
// (Optional) Exception error class as string
|
|
364
|
+
optional string error_class = 2;
|
|
365
|
+
// (Optional) Exception stack trace as string
|
|
366
|
+
optional string stack_trace = 3;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
message AwaitTerminationResult {
|
|
370
|
+
bool terminated = 1;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Commands for the streaming query manager.
|
|
375
|
+
message StreamingQueryManagerCommand {
|
|
376
|
+
|
|
377
|
+
// See documentation for the corresponding API method in StreamingQueryManager.
|
|
378
|
+
oneof command {
|
|
379
|
+
// active() API, returns a list of active queries.
|
|
380
|
+
bool active = 1;
|
|
381
|
+
// get() API, returns the StreamingQuery identified by id.
|
|
382
|
+
string get_query = 2;
|
|
383
|
+
// awaitAnyTermination() API, wait until any query terminates or timeout.
|
|
384
|
+
AwaitAnyTerminationCommand await_any_termination = 3;
|
|
385
|
+
// resetTerminated() API.
|
|
386
|
+
bool reset_terminated = 4;
|
|
387
|
+
// addListener API.
|
|
388
|
+
StreamingQueryListenerCommand add_listener = 5;
|
|
389
|
+
// removeListener API.
|
|
390
|
+
StreamingQueryListenerCommand remove_listener = 6;
|
|
391
|
+
// listListeners() API, returns a list of streaming query listeners.
|
|
392
|
+
bool list_listeners = 7;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
message AwaitAnyTerminationCommand {
|
|
396
|
+
// (Optional) The waiting time in milliseconds to wait for any query to terminate.
|
|
397
|
+
optional int64 timeout_ms = 1;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
message StreamingQueryListenerCommand {
|
|
401
|
+
bytes listener_payload = 1;
|
|
402
|
+
optional PythonUDF python_listener_payload = 2;
|
|
403
|
+
string id = 3;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// Response for commands on the streaming query manager.
|
|
408
|
+
message StreamingQueryManagerCommandResult {
|
|
409
|
+
oneof result_type {
|
|
410
|
+
ActiveResult active = 1;
|
|
411
|
+
StreamingQueryInstance query = 2;
|
|
412
|
+
AwaitAnyTerminationResult await_any_termination = 3;
|
|
413
|
+
bool reset_terminated = 4;
|
|
414
|
+
bool add_listener = 5;
|
|
415
|
+
bool remove_listener = 6;
|
|
416
|
+
ListStreamingQueryListenerResult list_listeners = 7;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
message ActiveResult {
|
|
420
|
+
repeated StreamingQueryInstance active_queries = 1;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
message StreamingQueryInstance {
|
|
424
|
+
// (Required) The id and runId of this query.
|
|
425
|
+
StreamingQueryInstanceId id = 1;
|
|
426
|
+
// (Optional) The name of this query.
|
|
427
|
+
optional string name = 2;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
message AwaitAnyTerminationResult {
|
|
431
|
+
bool terminated = 1;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
message StreamingQueryListenerInstance {
|
|
435
|
+
bytes listener_payload = 1;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
message ListStreamingQueryListenerResult {
|
|
439
|
+
// (Required) Reference IDs of listener instances.
|
|
440
|
+
repeated string listener_ids = 1;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// The protocol for client-side StreamingQueryListener.
|
|
445
|
+
// This command will only be set when either the first listener is added to the client, or the last
|
|
446
|
+
// listener is removed from the client.
|
|
447
|
+
// The add_listener_bus_listener command will only be set true in the first case.
|
|
448
|
+
// The remove_listener_bus_listener command will only be set true in the second case.
|
|
449
|
+
message StreamingQueryListenerBusCommand {
|
|
450
|
+
oneof command {
|
|
451
|
+
bool add_listener_bus_listener = 1;
|
|
452
|
+
bool remove_listener_bus_listener = 2;
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// The enum used for client side streaming query listener event
|
|
457
|
+
// There is no QueryStartedEvent defined here,
|
|
458
|
+
// it is added as a field in WriteStreamOperationStartResult
|
|
459
|
+
enum StreamingQueryEventType {
|
|
460
|
+
QUERY_PROGRESS_UNSPECIFIED = 0;
|
|
461
|
+
QUERY_PROGRESS_EVENT = 1;
|
|
462
|
+
QUERY_TERMINATED_EVENT = 2;
|
|
463
|
+
QUERY_IDLE_EVENT = 3;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// The protocol for the returned events in the long-running response channel.
|
|
467
|
+
message StreamingQueryListenerEvent {
|
|
468
|
+
// (Required) The json serialized event, all StreamingQueryListener events have a json method
|
|
469
|
+
string event_json = 1;
|
|
470
|
+
// (Required) Query event type used by client to decide how to deserialize the event_json
|
|
471
|
+
StreamingQueryEventType event_type = 2;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
message StreamingQueryListenerEventsResult {
|
|
475
|
+
repeated StreamingQueryListenerEvent events = 1;
|
|
476
|
+
optional bool listener_bus_listener_added = 2;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// Command to get the output of 'SparkContext.resources'
|
|
480
|
+
message GetResourcesCommand { }
|
|
481
|
+
|
|
482
|
+
// Response for command 'GetResourcesCommand'.
|
|
483
|
+
message GetResourcesCommandResult {
|
|
484
|
+
map<string, ResourceInformation> resources = 1;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// Command to create ResourceProfile
|
|
488
|
+
message CreateResourceProfileCommand {
|
|
489
|
+
// (Required) The ResourceProfile to be built on the server-side.
|
|
490
|
+
ResourceProfile profile = 1;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
// Response for command 'CreateResourceProfileCommand'.
|
|
494
|
+
message CreateResourceProfileCommandResult {
|
|
495
|
+
// (Required) Server-side generated resource profile id.
|
|
496
|
+
int32 profile_id = 1;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// Command to remove `CashedRemoteRelation`
|
|
500
|
+
message RemoveCachedRemoteRelationCommand {
|
|
501
|
+
// (Required) The remote to be related
|
|
502
|
+
CachedRemoteRelation relation = 1;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
message CheckpointCommand {
|
|
506
|
+
// (Required) The logical plan to checkpoint.
|
|
507
|
+
Relation relation = 1;
|
|
508
|
+
|
|
509
|
+
// (Required) Locally checkpoint using a local temporary
|
|
510
|
+
// directory in Spark Connect server (Spark Driver)
|
|
511
|
+
bool local = 2;
|
|
512
|
+
|
|
513
|
+
// (Required) Whether to checkpoint this dataframe immediately.
|
|
514
|
+
bool eager = 3;
|
|
515
|
+
|
|
516
|
+
// (Optional) For local checkpoint, the storage level to use.
|
|
517
|
+
optional StorageLevel storage_level = 4;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
message MergeIntoTableCommand {
|
|
521
|
+
// (Required) The name of the target table.
|
|
522
|
+
string target_table_name = 1;
|
|
523
|
+
|
|
524
|
+
// (Required) The relation of the source table.
|
|
525
|
+
Relation source_table_plan = 2;
|
|
526
|
+
|
|
527
|
+
// (Required) The condition to match the source and target.
|
|
528
|
+
Expression merge_condition = 3;
|
|
529
|
+
|
|
530
|
+
// (Optional) The actions to be taken when the condition is matched.
|
|
531
|
+
repeated Expression match_actions = 4;
|
|
532
|
+
|
|
533
|
+
// (Optional) The actions to be taken when the condition is not matched.
|
|
534
|
+
repeated Expression not_matched_actions = 5;
|
|
535
|
+
|
|
536
|
+
// (Optional) The actions to be taken when the condition is not matched by source.
|
|
537
|
+
repeated Expression not_matched_by_source_actions = 6;
|
|
538
|
+
|
|
539
|
+
// (Required) Whether to enable schema evolution.
|
|
540
|
+
bool with_schema_evolution = 7;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Execute an arbitrary string command inside an external execution engine
|
|
544
|
+
message ExecuteExternalCommand {
|
|
545
|
+
// (Required) The class name of the runner that implements `ExternalCommandRunner`
|
|
546
|
+
string runner = 1;
|
|
547
|
+
|
|
548
|
+
// (Required) The target command to be executed.
|
|
549
|
+
string command = 2;
|
|
550
|
+
|
|
551
|
+
// (Optional) The options for the runner.
|
|
552
|
+
map<string, string> options = 3;
|
|
553
|
+
}
|