spark-connect 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +82 -0
- data/LICENSE +202 -0
- data/NOTICE +16 -0
- data/README.md +166 -0
- data/lib/spark-connect.rb +5 -0
- data/lib/spark_connect/arrow.rb +115 -0
- data/lib/spark_connect/catalog.rb +190 -0
- data/lib/spark_connect/channel_builder.rb +134 -0
- data/lib/spark_connect/client.rb +264 -0
- data/lib/spark_connect/column.rb +379 -0
- data/lib/spark_connect/conf.rb +79 -0
- data/lib/spark_connect/data_frame.rb +828 -0
- data/lib/spark_connect/errors.rb +58 -0
- data/lib/spark_connect/functions.rb +903 -0
- data/lib/spark_connect/grouped_data.rb +101 -0
- data/lib/spark_connect/na_functions.rb +98 -0
- data/lib/spark_connect/observation.rb +61 -0
- data/lib/spark_connect/pipelines.rb +221 -0
- data/lib/spark_connect/plan.rb +39 -0
- data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
- data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
- data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
- data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
- data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
- data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
- data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
- data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
- data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
- data/lib/spark_connect/proto.rb +32 -0
- data/lib/spark_connect/reader.rb +98 -0
- data/lib/spark_connect/row.rb +105 -0
- data/lib/spark_connect/session.rb +317 -0
- data/lib/spark_connect/stat_functions.rb +109 -0
- data/lib/spark_connect/streaming.rb +351 -0
- data/lib/spark_connect/types.rb +490 -0
- data/lib/spark_connect/version.rb +11 -0
- data/lib/spark_connect/window.rb +119 -0
- data/lib/spark_connect/writer.rb +208 -0
- data/lib/spark_connect.rb +58 -0
- data/proto/spark/connect/base.proto +1275 -0
- data/proto/spark/connect/catalog.proto +243 -0
- data/proto/spark/connect/commands.proto +553 -0
- data/proto/spark/connect/common.proto +179 -0
- data/proto/spark/connect/expressions.proto +557 -0
- data/proto/spark/connect/ml.proto +147 -0
- data/proto/spark/connect/ml_common.proto +64 -0
- data/proto/spark/connect/pipelines.proto +307 -0
- data/proto/spark/connect/relations.proto +1252 -0
- data/proto/spark/connect/types.proto +227 -0
- metadata +149 -0
|
@@ -0,0 +1,1275 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
3
|
+
* contributor license agreements. See the NOTICE file distributed with
|
|
4
|
+
* this work for additional information regarding copyright ownership.
|
|
5
|
+
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
6
|
+
* (the "License"); you may not use this file except in compliance with
|
|
7
|
+
* the License. You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
syntax = 'proto3';
|
|
19
|
+
|
|
20
|
+
package spark.connect;
|
|
21
|
+
|
|
22
|
+
import "google/protobuf/any.proto";
|
|
23
|
+
import "spark/connect/commands.proto";
|
|
24
|
+
import "spark/connect/common.proto";
|
|
25
|
+
import "spark/connect/expressions.proto";
|
|
26
|
+
import "spark/connect/relations.proto";
|
|
27
|
+
import "spark/connect/types.proto";
|
|
28
|
+
import "spark/connect/ml.proto";
|
|
29
|
+
import "spark/connect/pipelines.proto";
|
|
30
|
+
|
|
31
|
+
option java_multiple_files = true;
|
|
32
|
+
option java_package = "org.apache.spark.connect.proto";
|
|
33
|
+
option go_package = "internal/generated";
|
|
34
|
+
|
|
35
|
+
// A [[Plan]] is the structure that carries the runtime information for the execution from the
|
|
36
|
+
// client to the server. A [[Plan]] can be one of the following:
|
|
37
|
+
// - [[Relation]]: a reference to the underlying logical plan.
|
|
38
|
+
// - [[Command]]: used to execute commands on the server.
|
|
39
|
+
// - [[CompressedOperation]]: a compressed representation of either a Relation or a Command.
|
|
40
|
+
message Plan {
|
|
41
|
+
oneof op_type {
|
|
42
|
+
Relation root = 1;
|
|
43
|
+
Command command = 2;
|
|
44
|
+
CompressedOperation compressed_operation = 3;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
message CompressedOperation {
|
|
48
|
+
bytes data = 1;
|
|
49
|
+
OpType op_type = 2;
|
|
50
|
+
CompressionCodec compression_codec = 3;
|
|
51
|
+
|
|
52
|
+
enum OpType {
|
|
53
|
+
OP_TYPE_UNSPECIFIED = 0;
|
|
54
|
+
OP_TYPE_RELATION = 1;
|
|
55
|
+
OP_TYPE_COMMAND = 2;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Compression codec for plan compression.
|
|
61
|
+
enum CompressionCodec {
|
|
62
|
+
COMPRESSION_CODEC_UNSPECIFIED = 0;
|
|
63
|
+
COMPRESSION_CODEC_ZSTD = 1;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// User Context is used to refer to one particular user session that is executing
|
|
67
|
+
// queries in the backend.
|
|
68
|
+
message UserContext {
|
|
69
|
+
string user_id = 1;
|
|
70
|
+
string user_name = 2;
|
|
71
|
+
|
|
72
|
+
// To extend the existing user context message that is used to identify incoming requests,
|
|
73
|
+
// Spark Connect leverages the Any protobuf type that can be used to inject arbitrary other
|
|
74
|
+
// messages into this message. Extensions are stored as a `repeated` type to be able to
|
|
75
|
+
// handle multiple active extensions.
|
|
76
|
+
repeated google.protobuf.Any extensions = 999;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Request to perform plan analyze, optionally to explain the plan.
|
|
80
|
+
message AnalyzePlanRequest {
|
|
81
|
+
// (Required)
|
|
82
|
+
//
|
|
83
|
+
// The session_id specifies a spark session for a user id (which is specified
|
|
84
|
+
// by user_context.user_id). The session_id is set by the client to be able to
|
|
85
|
+
// collate streaming responses from different queries within the dedicated session.
|
|
86
|
+
// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
87
|
+
string session_id = 1;
|
|
88
|
+
|
|
89
|
+
// (Optional)
|
|
90
|
+
//
|
|
91
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
92
|
+
// can use this to validate that the server side session has not changed.
|
|
93
|
+
optional string client_observed_server_side_session_id = 17;
|
|
94
|
+
|
|
95
|
+
// (Required) User context
|
|
96
|
+
UserContext user_context = 2;
|
|
97
|
+
|
|
98
|
+
// Provides optional information about the client sending the request. This field
|
|
99
|
+
// can be used for language or version specific information and is only intended for
|
|
100
|
+
// logging purposes and will not be interpreted by the server.
|
|
101
|
+
optional string client_type = 3;
|
|
102
|
+
|
|
103
|
+
oneof analyze {
|
|
104
|
+
Schema schema = 4;
|
|
105
|
+
Explain explain = 5;
|
|
106
|
+
TreeString tree_string = 6;
|
|
107
|
+
IsLocal is_local = 7;
|
|
108
|
+
IsStreaming is_streaming = 8;
|
|
109
|
+
InputFiles input_files = 9;
|
|
110
|
+
SparkVersion spark_version = 10;
|
|
111
|
+
DDLParse ddl_parse = 11;
|
|
112
|
+
SameSemantics same_semantics = 12;
|
|
113
|
+
SemanticHash semantic_hash = 13;
|
|
114
|
+
Persist persist = 14;
|
|
115
|
+
Unpersist unpersist = 15;
|
|
116
|
+
GetStorageLevel get_storage_level = 16;
|
|
117
|
+
JsonToDDL json_to_ddl = 18;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
message Schema {
|
|
121
|
+
// (Required) The logical plan to be analyzed.
|
|
122
|
+
Plan plan = 1;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Explains the input plan based on a configurable mode.
|
|
126
|
+
message Explain {
|
|
127
|
+
// (Required) The logical plan to be analyzed.
|
|
128
|
+
Plan plan = 1;
|
|
129
|
+
|
|
130
|
+
// (Required) For analyzePlan rpc calls, configure the mode to explain plan in strings.
|
|
131
|
+
ExplainMode explain_mode = 2;
|
|
132
|
+
|
|
133
|
+
// Plan explanation mode.
|
|
134
|
+
enum ExplainMode {
|
|
135
|
+
EXPLAIN_MODE_UNSPECIFIED = 0;
|
|
136
|
+
|
|
137
|
+
// Generates only physical plan.
|
|
138
|
+
EXPLAIN_MODE_SIMPLE = 1;
|
|
139
|
+
|
|
140
|
+
// Generates parsed logical plan, analyzed logical plan, optimized logical plan and physical plan.
|
|
141
|
+
// Parsed Logical plan is a unresolved plan that extracted from the query. Analyzed logical plans
|
|
142
|
+
// transforms which translates unresolvedAttribute and unresolvedRelation into fully typed objects.
|
|
143
|
+
// The optimized logical plan transforms through a set of optimization rules, resulting in the
|
|
144
|
+
// physical plan.
|
|
145
|
+
EXPLAIN_MODE_EXTENDED = 2;
|
|
146
|
+
|
|
147
|
+
// Generates code for the statement, if any and a physical plan.
|
|
148
|
+
EXPLAIN_MODE_CODEGEN = 3;
|
|
149
|
+
|
|
150
|
+
// If plan node statistics are available, generates a logical plan and also the statistics.
|
|
151
|
+
EXPLAIN_MODE_COST = 4;
|
|
152
|
+
|
|
153
|
+
// Generates a physical plan outline and also node details.
|
|
154
|
+
EXPLAIN_MODE_FORMATTED = 5;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
message TreeString {
|
|
159
|
+
// (Required) The logical plan to be analyzed.
|
|
160
|
+
Plan plan = 1;
|
|
161
|
+
|
|
162
|
+
// (Optional) Max level of the schema.
|
|
163
|
+
optional int32 level = 2;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
message IsLocal {
|
|
167
|
+
// (Required) The logical plan to be analyzed.
|
|
168
|
+
Plan plan = 1;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
message IsStreaming {
|
|
172
|
+
// (Required) The logical plan to be analyzed.
|
|
173
|
+
Plan plan = 1;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
message InputFiles {
|
|
177
|
+
// (Required) The logical plan to be analyzed.
|
|
178
|
+
Plan plan = 1;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
message SparkVersion { }
|
|
182
|
+
|
|
183
|
+
message DDLParse {
|
|
184
|
+
// (Required) The DDL formatted string to be parsed.
|
|
185
|
+
string ddl_string = 1;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
// Returns `true` when the logical query plans are equal and therefore return same results.
|
|
190
|
+
message SameSemantics {
|
|
191
|
+
// (Required) The plan to be compared.
|
|
192
|
+
Plan target_plan = 1;
|
|
193
|
+
|
|
194
|
+
// (Required) The other plan to be compared.
|
|
195
|
+
Plan other_plan = 2;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
message SemanticHash {
|
|
199
|
+
// (Required) The logical plan to get a hashCode.
|
|
200
|
+
Plan plan = 1;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
message Persist {
|
|
204
|
+
// (Required) The logical plan to persist.
|
|
205
|
+
Relation relation = 1;
|
|
206
|
+
|
|
207
|
+
// (Optional) The storage level.
|
|
208
|
+
optional StorageLevel storage_level = 2;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
message Unpersist {
|
|
212
|
+
// (Required) The logical plan to unpersist.
|
|
213
|
+
Relation relation = 1;
|
|
214
|
+
|
|
215
|
+
// (Optional) Whether to block until all blocks are deleted.
|
|
216
|
+
optional bool blocking = 2;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
message GetStorageLevel {
|
|
220
|
+
// (Required) The logical plan to get the storage level.
|
|
221
|
+
Relation relation = 1;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
message JsonToDDL {
|
|
225
|
+
// (Required) The JSON formatted string to be converted to DDL.
|
|
226
|
+
string json_string = 1;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Response to performing analysis of the query. Contains relevant metadata to be able to
|
|
231
|
+
// reason about the performance.
|
|
232
|
+
// Next ID: 16
|
|
233
|
+
message AnalyzePlanResponse {
|
|
234
|
+
string session_id = 1;
|
|
235
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
236
|
+
// session has not changed.
|
|
237
|
+
string server_side_session_id = 15;
|
|
238
|
+
|
|
239
|
+
oneof result {
|
|
240
|
+
Schema schema = 2;
|
|
241
|
+
Explain explain = 3;
|
|
242
|
+
TreeString tree_string = 4;
|
|
243
|
+
IsLocal is_local = 5;
|
|
244
|
+
IsStreaming is_streaming = 6;
|
|
245
|
+
InputFiles input_files = 7;
|
|
246
|
+
SparkVersion spark_version = 8;
|
|
247
|
+
DDLParse ddl_parse = 9;
|
|
248
|
+
SameSemantics same_semantics = 10;
|
|
249
|
+
SemanticHash semantic_hash = 11;
|
|
250
|
+
Persist persist = 12;
|
|
251
|
+
Unpersist unpersist = 13;
|
|
252
|
+
GetStorageLevel get_storage_level = 14;
|
|
253
|
+
JsonToDDL json_to_ddl = 16;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
message Schema {
|
|
257
|
+
DataType schema = 1;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
message Explain {
|
|
261
|
+
string explain_string = 1;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
message TreeString {
|
|
265
|
+
string tree_string = 1;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
message IsLocal {
|
|
269
|
+
bool is_local = 1;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
message IsStreaming {
|
|
273
|
+
bool is_streaming = 1;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
message InputFiles {
|
|
277
|
+
// A best-effort snapshot of the files that compose this Dataset
|
|
278
|
+
repeated string files = 1;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
message SparkVersion {
|
|
282
|
+
string version = 1;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
message DDLParse {
|
|
286
|
+
DataType parsed = 1;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
message SameSemantics {
|
|
290
|
+
bool result = 1;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
message SemanticHash {
|
|
294
|
+
int32 result = 1;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
message Persist { }
|
|
298
|
+
|
|
299
|
+
message Unpersist { }
|
|
300
|
+
|
|
301
|
+
message GetStorageLevel {
|
|
302
|
+
// (Required) The StorageLevel as a result of get_storage_level request.
|
|
303
|
+
StorageLevel storage_level = 1;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
message JsonToDDL {
|
|
307
|
+
string ddl_string = 1;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// A request to be executed by the service.
|
|
312
|
+
message ExecutePlanRequest {
|
|
313
|
+
// (Required)
|
|
314
|
+
//
|
|
315
|
+
// The session_id specifies a spark session for a user id (which is specified
|
|
316
|
+
// by user_context.user_id). The session_id is set by the client to be able to
|
|
317
|
+
// collate streaming responses from different queries within the dedicated session.
|
|
318
|
+
// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
319
|
+
string session_id = 1;
|
|
320
|
+
|
|
321
|
+
// (Optional)
|
|
322
|
+
//
|
|
323
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
324
|
+
// can use this to validate that the server side session has not changed.
|
|
325
|
+
optional string client_observed_server_side_session_id = 8;
|
|
326
|
+
|
|
327
|
+
// (Required) User context
|
|
328
|
+
//
|
|
329
|
+
// user_context.user_id and session+id both identify a unique remote spark session on the
|
|
330
|
+
// server side.
|
|
331
|
+
UserContext user_context = 2;
|
|
332
|
+
|
|
333
|
+
// (Optional)
|
|
334
|
+
// Provide an id for this request. If not provided, it will be generated by the server.
|
|
335
|
+
// It is returned in every ExecutePlanResponse.operation_id of the ExecutePlan response stream.
|
|
336
|
+
// The id must be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
337
|
+
optional string operation_id = 6;
|
|
338
|
+
|
|
339
|
+
// (Required) The logical plan to be executed / analyzed.
|
|
340
|
+
Plan plan = 3;
|
|
341
|
+
|
|
342
|
+
// Provides optional information about the client sending the request. This field
|
|
343
|
+
// can be used for language or version specific information and is only intended for
|
|
344
|
+
// logging purposes and will not be interpreted by the server.
|
|
345
|
+
optional string client_type = 4;
|
|
346
|
+
|
|
347
|
+
// Repeated element for options that can be passed to the request. This element is currently
|
|
348
|
+
// unused but allows to pass in an extension value used for arbitrary options.
|
|
349
|
+
repeated RequestOption request_options = 5;
|
|
350
|
+
|
|
351
|
+
message RequestOption {
|
|
352
|
+
oneof request_option {
|
|
353
|
+
ReattachOptions reattach_options = 1;
|
|
354
|
+
ResultChunkingOptions result_chunking_options = 2;
|
|
355
|
+
// Extension type for request options
|
|
356
|
+
google.protobuf.Any extension = 999;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Tags to tag the given execution with.
|
|
361
|
+
// Tags cannot contain ',' character and cannot be empty strings.
|
|
362
|
+
// Used by Interrupt with interrupt.tag.
|
|
363
|
+
repeated string tags = 7;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// The response of a query, can be one or more for each request. Responses belonging to the
|
|
367
|
+
// same input query, carry the same `session_id`.
|
|
368
|
+
// Next ID: 17
|
|
369
|
+
message ExecutePlanResponse {
|
|
370
|
+
string session_id = 1;
|
|
371
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
372
|
+
// session has not changed.
|
|
373
|
+
string server_side_session_id = 15;
|
|
374
|
+
|
|
375
|
+
// Identifies the ExecutePlan execution.
|
|
376
|
+
// If set by the client in ExecutePlanRequest.operationId, that value is returned.
|
|
377
|
+
// Otherwise generated by the server.
|
|
378
|
+
// It is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
379
|
+
string operation_id = 12;
|
|
380
|
+
|
|
381
|
+
// Identified the response in the stream.
|
|
382
|
+
// The id is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
383
|
+
string response_id = 13;
|
|
384
|
+
|
|
385
|
+
// Union type for the different response messages.
|
|
386
|
+
oneof response_type {
|
|
387
|
+
ArrowBatch arrow_batch = 2;
|
|
388
|
+
|
|
389
|
+
// Special case for executing SQL commands.
|
|
390
|
+
SqlCommandResult sql_command_result = 5;
|
|
391
|
+
|
|
392
|
+
// Response for a streaming query.
|
|
393
|
+
WriteStreamOperationStartResult write_stream_operation_start_result = 8;
|
|
394
|
+
|
|
395
|
+
// Response for commands on a streaming query.
|
|
396
|
+
StreamingQueryCommandResult streaming_query_command_result = 9;
|
|
397
|
+
|
|
398
|
+
// Response for 'SparkContext.resources'.
|
|
399
|
+
GetResourcesCommandResult get_resources_command_result = 10;
|
|
400
|
+
|
|
401
|
+
// Response for commands on the streaming query manager.
|
|
402
|
+
StreamingQueryManagerCommandResult streaming_query_manager_command_result = 11;
|
|
403
|
+
|
|
404
|
+
// Response for commands on the client side streaming query listener.
|
|
405
|
+
StreamingQueryListenerEventsResult streaming_query_listener_events_result = 16;
|
|
406
|
+
|
|
407
|
+
// Response type informing if the stream is complete in reattachable execution.
|
|
408
|
+
ResultComplete result_complete = 14;
|
|
409
|
+
|
|
410
|
+
// Response for command that creates ResourceProfile.
|
|
411
|
+
CreateResourceProfileCommandResult create_resource_profile_command_result = 17;
|
|
412
|
+
|
|
413
|
+
// (Optional) Intermediate query progress reports.
|
|
414
|
+
ExecutionProgress execution_progress = 18;
|
|
415
|
+
|
|
416
|
+
// Response for command that checkpoints a DataFrame.
|
|
417
|
+
CheckpointCommandResult checkpoint_command_result = 19;
|
|
418
|
+
|
|
419
|
+
// ML command response
|
|
420
|
+
MlCommandResult ml_command_result = 20;
|
|
421
|
+
|
|
422
|
+
// Response containing pipeline event that is streamed back to the client during a pipeline run
|
|
423
|
+
PipelineEventResult pipeline_event_result = 21;
|
|
424
|
+
|
|
425
|
+
// Pipeline command response
|
|
426
|
+
PipelineCommandResult pipeline_command_result = 22;
|
|
427
|
+
|
|
428
|
+
// A signal from the server to the client to execute the query function for a flow, and to
|
|
429
|
+
// register its result with the server.
|
|
430
|
+
PipelineQueryFunctionExecutionSignal pipeline_query_function_execution_signal = 23;
|
|
431
|
+
|
|
432
|
+
// Support arbitrary result objects.
|
|
433
|
+
google.protobuf.Any extension = 999;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Metrics for the query execution. Typically, this field is only present in the last
|
|
437
|
+
// batch of results and then represent the overall state of the query execution.
|
|
438
|
+
Metrics metrics = 4;
|
|
439
|
+
|
|
440
|
+
// The metrics observed during the execution of the query plan.
|
|
441
|
+
repeated ObservedMetrics observed_metrics = 6;
|
|
442
|
+
|
|
443
|
+
// (Optional) The Spark schema. This field is available when `collect` is called.
|
|
444
|
+
DataType schema = 7;
|
|
445
|
+
|
|
446
|
+
// A SQL command returns an opaque Relation that can be directly used as input for the next
|
|
447
|
+
// call.
|
|
448
|
+
message SqlCommandResult {
|
|
449
|
+
Relation relation = 1;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// Batch results of metrics.
|
|
453
|
+
message ArrowBatch {
|
|
454
|
+
// Count rows in `data`. Must match the number of rows inside `data`.
|
|
455
|
+
int64 row_count = 1;
|
|
456
|
+
// Serialized Arrow data.
|
|
457
|
+
bytes data = 2;
|
|
458
|
+
|
|
459
|
+
// If set, row offset of the start of this ArrowBatch in execution results.
|
|
460
|
+
optional int64 start_offset = 3;
|
|
461
|
+
|
|
462
|
+
// Index of this chunk in the batch if chunking is enabled. The index starts from 0.
|
|
463
|
+
optional int64 chunk_index = 4;
|
|
464
|
+
|
|
465
|
+
// Total number of chunks in this batch if chunking is enabled.
|
|
466
|
+
// It is missing when chunking is disabled - the batch is returned whole
|
|
467
|
+
// and client will treat this response as the batch.
|
|
468
|
+
optional int64 num_chunks_in_batch = 5;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
message Metrics {
|
|
472
|
+
|
|
473
|
+
repeated MetricObject metrics = 1;
|
|
474
|
+
|
|
475
|
+
message MetricObject {
|
|
476
|
+
string name = 1;
|
|
477
|
+
int64 plan_id = 2;
|
|
478
|
+
int64 parent = 3;
|
|
479
|
+
map<string, MetricValue> execution_metrics = 4;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
message MetricValue {
|
|
483
|
+
string name = 1;
|
|
484
|
+
int64 value = 2;
|
|
485
|
+
string metric_type = 3;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
message ObservedMetrics {
|
|
490
|
+
string name = 1;
|
|
491
|
+
repeated Expression.Literal values = 2;
|
|
492
|
+
repeated string keys = 3;
|
|
493
|
+
int64 plan_id = 4;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
message ResultComplete {
|
|
497
|
+
// If present, in a reattachable execution this means that after server sends onComplete,
|
|
498
|
+
// the execution is complete. If the server sends onComplete without sending a ResultComplete,
|
|
499
|
+
// it means that there is more, and the client should use ReattachExecute RPC to continue.
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// This message is used to communicate progress about the query progress during the execution.
|
|
503
|
+
message ExecutionProgress {
|
|
504
|
+
// Captures the progress of each individual stage.
|
|
505
|
+
repeated StageInfo stages = 1;
|
|
506
|
+
|
|
507
|
+
// Captures the currently in progress tasks.
|
|
508
|
+
int64 num_inflight_tasks = 2;
|
|
509
|
+
|
|
510
|
+
message StageInfo {
|
|
511
|
+
int64 stage_id = 1;
|
|
512
|
+
int64 num_tasks = 2;
|
|
513
|
+
int64 num_completed_tasks = 3;
|
|
514
|
+
int64 input_bytes_read = 4;
|
|
515
|
+
bool done = 5;
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// The key-value pair for the config request and response.
|
|
521
|
+
message KeyValue {
|
|
522
|
+
// (Required) The key.
|
|
523
|
+
string key = 1;
|
|
524
|
+
// (Optional) The value.
|
|
525
|
+
optional string value = 2;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
// Request to update or fetch the configurations.
|
|
529
|
+
message ConfigRequest {
|
|
530
|
+
// (Required)
|
|
531
|
+
//
|
|
532
|
+
// The session_id specifies a spark session for a user id (which is specified
|
|
533
|
+
// by user_context.user_id). The session_id is set by the client to be able to
|
|
534
|
+
// collate streaming responses from different queries within the dedicated session.
|
|
535
|
+
// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
536
|
+
string session_id = 1;
|
|
537
|
+
|
|
538
|
+
// (Optional)
|
|
539
|
+
//
|
|
540
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
541
|
+
// can use this to validate that the server side session has not changed.
|
|
542
|
+
optional string client_observed_server_side_session_id = 8;
|
|
543
|
+
|
|
544
|
+
// (Required) User context
|
|
545
|
+
UserContext user_context = 2;
|
|
546
|
+
|
|
547
|
+
// (Required) The operation for the config.
|
|
548
|
+
Operation operation = 3;
|
|
549
|
+
|
|
550
|
+
// Provides optional information about the client sending the request. This field
|
|
551
|
+
// can be used for language or version specific information and is only intended for
|
|
552
|
+
// logging purposes and will not be interpreted by the server.
|
|
553
|
+
optional string client_type = 4;
|
|
554
|
+
|
|
555
|
+
message Operation {
|
|
556
|
+
oneof op_type {
|
|
557
|
+
Set set = 1;
|
|
558
|
+
Get get = 2;
|
|
559
|
+
GetWithDefault get_with_default = 3;
|
|
560
|
+
GetOption get_option = 4;
|
|
561
|
+
GetAll get_all = 5;
|
|
562
|
+
Unset unset = 6;
|
|
563
|
+
IsModifiable is_modifiable = 7;
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
message Set {
|
|
568
|
+
// (Required) The config key-value pairs to set.
|
|
569
|
+
repeated KeyValue pairs = 1;
|
|
570
|
+
|
|
571
|
+
// (Optional) Whether to ignore failures.
|
|
572
|
+
optional bool silent = 2;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
message Get {
|
|
576
|
+
// (Required) The config keys to get.
|
|
577
|
+
repeated string keys = 1;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
message GetWithDefault {
|
|
581
|
+
// (Required) The config key-value pairs to get. The value will be used as the default value.
|
|
582
|
+
repeated KeyValue pairs = 1;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
message GetOption {
|
|
586
|
+
// (Required) The config keys to get optionally.
|
|
587
|
+
repeated string keys = 1;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
message GetAll {
|
|
591
|
+
// (Optional) The prefix of the config key to get.
|
|
592
|
+
optional string prefix = 1;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
message Unset {
|
|
596
|
+
// (Required) The config keys to unset.
|
|
597
|
+
repeated string keys = 1;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
message IsModifiable {
|
|
601
|
+
// (Required) The config keys to check the config is modifiable.
|
|
602
|
+
repeated string keys = 1;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// Response to the config request.
|
|
607
|
+
// Next ID: 5
|
|
608
|
+
message ConfigResponse {
|
|
609
|
+
string session_id = 1;
|
|
610
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
611
|
+
// session has not changed.
|
|
612
|
+
string server_side_session_id = 4;
|
|
613
|
+
|
|
614
|
+
// (Optional) The result key-value pairs.
|
|
615
|
+
//
|
|
616
|
+
// Available when the operation is 'Get', 'GetWithDefault', 'GetOption', 'GetAll'.
|
|
617
|
+
// Also available for the operation 'IsModifiable' with boolean string "true" and "false".
|
|
618
|
+
repeated KeyValue pairs = 2;
|
|
619
|
+
|
|
620
|
+
// (Optional)
|
|
621
|
+
//
|
|
622
|
+
// Warning messages for deprecated or unsupported configurations.
|
|
623
|
+
repeated string warnings = 3;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// Request to transfer client-local artifacts.
|
|
627
|
+
message AddArtifactsRequest {
|
|
628
|
+
|
|
629
|
+
// (Required)
|
|
630
|
+
//
|
|
631
|
+
// The session_id specifies a spark session for a user id (which is specified
|
|
632
|
+
// by user_context.user_id). The session_id is set by the client to be able to
|
|
633
|
+
// collate streaming responses from different queries within the dedicated session.
|
|
634
|
+
// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
635
|
+
string session_id = 1;
|
|
636
|
+
|
|
637
|
+
// User context
|
|
638
|
+
UserContext user_context = 2;
|
|
639
|
+
|
|
640
|
+
// (Optional)
|
|
641
|
+
//
|
|
642
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
643
|
+
// can use this to validate that the server side session has not changed.
|
|
644
|
+
optional string client_observed_server_side_session_id = 7;
|
|
645
|
+
|
|
646
|
+
// Provides optional information about the client sending the request. This field
|
|
647
|
+
// can be used for language or version specific information and is only intended for
|
|
648
|
+
// logging purposes and will not be interpreted by the server.
|
|
649
|
+
optional string client_type = 6;
|
|
650
|
+
|
|
651
|
+
// A chunk of an Artifact.
|
|
652
|
+
message ArtifactChunk {
|
|
653
|
+
// Data chunk.
|
|
654
|
+
bytes data = 1;
|
|
655
|
+
// CRC to allow server to verify integrity of the chunk.
|
|
656
|
+
int64 crc = 2;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
// An artifact that is contained in a single `ArtifactChunk`.
|
|
660
|
+
// Generally, this message represents tiny artifacts such as REPL-generated class files.
|
|
661
|
+
message SingleChunkArtifact {
|
|
662
|
+
// The name of the artifact is expected in the form of a "Relative Path" that is made up of a
|
|
663
|
+
// sequence of directories and the final file element.
|
|
664
|
+
// Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar".
|
|
665
|
+
// The server is expected to maintain the hierarchy of files as defined by their name. (i.e
|
|
666
|
+
// The relative path of the file on the server's filesystem will be the same as the name of
|
|
667
|
+
// the provided artifact)
|
|
668
|
+
string name = 1;
|
|
669
|
+
// A single data chunk.
|
|
670
|
+
ArtifactChunk data = 2;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
// A number of `SingleChunkArtifact` batched into a single RPC.
|
|
674
|
+
message Batch {
|
|
675
|
+
repeated SingleChunkArtifact artifacts = 1;
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
// Signals the beginning/start of a chunked artifact.
|
|
679
|
+
// A large artifact is transferred through a payload of `BeginChunkedArtifact` followed by a
|
|
680
|
+
// sequence of `ArtifactChunk`s.
|
|
681
|
+
message BeginChunkedArtifact {
|
|
682
|
+
// Name of the artifact undergoing chunking. Follows the same conventions as the `name` in
|
|
683
|
+
// the `Artifact` message.
|
|
684
|
+
string name = 1;
|
|
685
|
+
// Total size of the artifact in bytes.
|
|
686
|
+
int64 total_bytes = 2;
|
|
687
|
+
// Number of chunks the artifact is split into.
|
|
688
|
+
// This includes the `initial_chunk`.
|
|
689
|
+
int64 num_chunks = 3;
|
|
690
|
+
// The first/initial chunk.
|
|
691
|
+
ArtifactChunk initial_chunk = 4;
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
// The payload is either a batch of artifacts or a partial chunk of a large artifact.
|
|
695
|
+
oneof payload {
|
|
696
|
+
Batch batch = 3;
|
|
697
|
+
// The metadata and the initial chunk of a large artifact chunked into multiple requests.
|
|
698
|
+
// The server side is notified about the total size of the large artifact as well as the
|
|
699
|
+
// number of chunks to expect.
|
|
700
|
+
BeginChunkedArtifact begin_chunk = 4;
|
|
701
|
+
// A chunk of an artifact excluding metadata. This can be any chunk of a large artifact
|
|
702
|
+
// excluding the first chunk (which is included in `BeginChunkedArtifact`).
|
|
703
|
+
ArtifactChunk chunk = 5;
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
// Response to adding an artifact. Contains relevant metadata to verify successful transfer of
|
|
708
|
+
// artifact(s).
|
|
709
|
+
// Next ID: 4
|
|
710
|
+
message AddArtifactsResponse {
|
|
711
|
+
// Session id in which the AddArtifact was running.
|
|
712
|
+
string session_id = 2;
|
|
713
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
714
|
+
// session has not changed.
|
|
715
|
+
string server_side_session_id = 3;
|
|
716
|
+
|
|
717
|
+
// The list of artifact(s) seen by the server.
|
|
718
|
+
repeated ArtifactSummary artifacts = 1;
|
|
719
|
+
|
|
720
|
+
// Metadata of an artifact.
|
|
721
|
+
message ArtifactSummary {
|
|
722
|
+
string name = 1;
|
|
723
|
+
// Whether the CRC (Cyclic Redundancy Check) is successful on server verification.
|
|
724
|
+
// The server discards any artifact that fails the CRC.
|
|
725
|
+
// If false, the client may choose to resend the artifact specified by `name`.
|
|
726
|
+
bool is_crc_successful = 2;
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
// Request to get current statuses of artifacts at the server side.
|
|
731
|
+
message ArtifactStatusesRequest {
|
|
732
|
+
// (Required)
|
|
733
|
+
//
|
|
734
|
+
// The session_id specifies a spark session for a user id (which is specified
|
|
735
|
+
// by user_context.user_id). The session_id is set by the client to be able to
|
|
736
|
+
// collate streaming responses from different queries within the dedicated session.
|
|
737
|
+
// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
738
|
+
string session_id = 1;
|
|
739
|
+
|
|
740
|
+
// (Optional)
|
|
741
|
+
//
|
|
742
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
743
|
+
// can use this to validate that the server side session has not changed.
|
|
744
|
+
optional string client_observed_server_side_session_id = 5;
|
|
745
|
+
|
|
746
|
+
// User context
|
|
747
|
+
UserContext user_context = 2;
|
|
748
|
+
|
|
749
|
+
// Provides optional information about the client sending the request. This field
|
|
750
|
+
// can be used for language or version specific information and is only intended for
|
|
751
|
+
// logging purposes and will not be interpreted by the server.
|
|
752
|
+
optional string client_type = 3;
|
|
753
|
+
|
|
754
|
+
// The name of the artifact is expected in the form of a "Relative Path" that is made up of a
|
|
755
|
+
// sequence of directories and the final file element.
|
|
756
|
+
// Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar".
|
|
757
|
+
// The server is expected to maintain the hierarchy of files as defined by their name. (i.e
|
|
758
|
+
// The relative path of the file on the server's filesystem will be the same as the name of
|
|
759
|
+
// the provided artifact)
|
|
760
|
+
repeated string names = 4;
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
// Response to checking artifact statuses.
|
|
764
|
+
// Next ID: 4
|
|
765
|
+
message ArtifactStatusesResponse {
|
|
766
|
+
// Session id in which the ArtifactStatus was running.
|
|
767
|
+
string session_id = 2;
|
|
768
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
769
|
+
// session has not changed.
|
|
770
|
+
string server_side_session_id = 3;
|
|
771
|
+
// A map of artifact names to their statuses.
|
|
772
|
+
map<string, ArtifactStatus> statuses = 1;
|
|
773
|
+
|
|
774
|
+
message ArtifactStatus {
|
|
775
|
+
// Exists or not particular artifact at the server.
|
|
776
|
+
bool exists = 1;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
message InterruptRequest {
|
|
781
|
+
// (Required)
|
|
782
|
+
//
|
|
783
|
+
// The session_id specifies a spark session for a user id (which is specified
|
|
784
|
+
// by user_context.user_id). The session_id is set by the client to be able to
|
|
785
|
+
// collate streaming responses from different queries within the dedicated session.
|
|
786
|
+
// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
787
|
+
string session_id = 1;
|
|
788
|
+
|
|
789
|
+
// (Optional)
|
|
790
|
+
//
|
|
791
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
792
|
+
// can use this to validate that the server side session has not changed.
|
|
793
|
+
optional string client_observed_server_side_session_id = 7;
|
|
794
|
+
|
|
795
|
+
// (Required) User context
|
|
796
|
+
UserContext user_context = 2;
|
|
797
|
+
|
|
798
|
+
// Provides optional information about the client sending the request. This field
|
|
799
|
+
// can be used for language or version specific information and is only intended for
|
|
800
|
+
// logging purposes and will not be interpreted by the server.
|
|
801
|
+
optional string client_type = 3;
|
|
802
|
+
|
|
803
|
+
// (Required) The type of interrupt to execute.
|
|
804
|
+
InterruptType interrupt_type = 4;
|
|
805
|
+
|
|
806
|
+
enum InterruptType {
|
|
807
|
+
INTERRUPT_TYPE_UNSPECIFIED = 0;
|
|
808
|
+
|
|
809
|
+
// Interrupt all running executions within the session with the provided session_id.
|
|
810
|
+
INTERRUPT_TYPE_ALL = 1;
|
|
811
|
+
|
|
812
|
+
// Interrupt all running executions within the session with the provided operation_tag.
|
|
813
|
+
INTERRUPT_TYPE_TAG = 2;
|
|
814
|
+
|
|
815
|
+
// Interrupt the running execution within the session with the provided operation_id.
|
|
816
|
+
INTERRUPT_TYPE_OPERATION_ID = 3;
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
oneof interrupt {
|
|
820
|
+
// if interrupt_tag == INTERRUPT_TYPE_TAG, interrupt operation with this tag.
|
|
821
|
+
string operation_tag = 5;
|
|
822
|
+
|
|
823
|
+
// if interrupt_tag == INTERRUPT_TYPE_OPERATION_ID, interrupt operation with this operation_id.
|
|
824
|
+
string operation_id = 6;
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
// Next ID: 4
|
|
829
|
+
message InterruptResponse {
|
|
830
|
+
// Session id in which the interrupt was running.
|
|
831
|
+
string session_id = 1;
|
|
832
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
833
|
+
// session has not changed.
|
|
834
|
+
string server_side_session_id = 3;
|
|
835
|
+
|
|
836
|
+
// Operation ids of the executions which were interrupted.
|
|
837
|
+
repeated string interrupted_ids = 2;
|
|
838
|
+
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
message ReattachOptions {
|
|
842
|
+
// If true, the request can be reattached to using ReattachExecute.
|
|
843
|
+
// ReattachExecute can be used either if the stream broke with a GRPC network error,
|
|
844
|
+
// or if the server closed the stream without sending a response with StreamStatus.complete=true.
|
|
845
|
+
// The server will keep a buffer of responses in case a response is lost, and
|
|
846
|
+
// ReattachExecute needs to back-track.
|
|
847
|
+
//
|
|
848
|
+
// If false, the execution response stream will will not be reattachable, and all responses are
|
|
849
|
+
// immediately released by the server after being sent.
|
|
850
|
+
bool reattachable = 1;
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
message ResultChunkingOptions {
|
|
854
|
+
// Although Arrow results are split into batches with a size limit according to estimation, the
|
|
855
|
+
// size of the batches is not guaranteed to be less than the limit, especially when a single row
|
|
856
|
+
// is larger than the limit, in which case the server will fail to split it further into smaller
|
|
857
|
+
// batches. As a result, the client may encounter a gRPC error stating “Received message larger
|
|
858
|
+
// than max” when a batch is too large.
|
|
859
|
+
// If allow_arrow_batch_chunking=true, the server will split large Arrow batches into smaller chunks,
|
|
860
|
+
// and the client is expected to handle the chunked Arrow batches.
|
|
861
|
+
//
|
|
862
|
+
// If false, the server will not chunk large Arrow batches.
|
|
863
|
+
bool allow_arrow_batch_chunking = 1;
|
|
864
|
+
|
|
865
|
+
// Optional preferred Arrow batch size in bytes for the server to use when sending Arrow results.
|
|
866
|
+
// The server will attempt to use this size if it is set and within the valid range
|
|
867
|
+
// ([1KB, max batch size on server]). Otherwise, the server's maximum batch size is used.
|
|
868
|
+
optional int64 preferred_arrow_chunk_size = 2;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
message ReattachExecuteRequest {
|
|
872
|
+
// (Required)
|
|
873
|
+
//
|
|
874
|
+
// The session_id of the request to reattach to.
|
|
875
|
+
// This must be an id of existing session.
|
|
876
|
+
string session_id = 1;
|
|
877
|
+
|
|
878
|
+
// (Optional)
|
|
879
|
+
//
|
|
880
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
881
|
+
// can use this to validate that the server side session has not changed.
|
|
882
|
+
optional string client_observed_server_side_session_id = 6;
|
|
883
|
+
|
|
884
|
+
// (Required) User context
|
|
885
|
+
//
|
|
886
|
+
// user_context.user_id and session+id both identify a unique remote spark session on the
|
|
887
|
+
// server side.
|
|
888
|
+
UserContext user_context = 2;
|
|
889
|
+
|
|
890
|
+
// (Required)
|
|
891
|
+
// Provide an id of the request to reattach to.
|
|
892
|
+
// This must be an id of existing operation.
|
|
893
|
+
string operation_id = 3;
|
|
894
|
+
|
|
895
|
+
// Provides optional information about the client sending the request. This field
|
|
896
|
+
// can be used for language or version specific information and is only intended for
|
|
897
|
+
// logging purposes and will not be interpreted by the server.
|
|
898
|
+
optional string client_type = 4;
|
|
899
|
+
|
|
900
|
+
// (Optional)
|
|
901
|
+
// Last already processed response id from the response stream.
|
|
902
|
+
// After reattach, server will resume the response stream after that response.
|
|
903
|
+
// If not specified, server will restart the stream from the start.
|
|
904
|
+
//
|
|
905
|
+
// Note: server controls the amount of responses that it buffers and it may drop responses,
|
|
906
|
+
// that are far behind the latest returned response, so this can't be used to arbitrarily
|
|
907
|
+
// scroll back the cursor. If the response is no longer available, this will result in an error.
|
|
908
|
+
optional string last_response_id = 5;
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
message ReleaseExecuteRequest {
|
|
912
|
+
// (Required)
|
|
913
|
+
//
|
|
914
|
+
// The session_id of the request to reattach to.
|
|
915
|
+
// This must be an id of existing session.
|
|
916
|
+
string session_id = 1;
|
|
917
|
+
|
|
918
|
+
// (Optional)
|
|
919
|
+
//
|
|
920
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
921
|
+
// can use this to validate that the server side session has not changed.
|
|
922
|
+
optional string client_observed_server_side_session_id = 7;
|
|
923
|
+
|
|
924
|
+
// (Required) User context
|
|
925
|
+
//
|
|
926
|
+
// user_context.user_id and session+id both identify a unique remote spark session on the
|
|
927
|
+
// server side.
|
|
928
|
+
UserContext user_context = 2;
|
|
929
|
+
|
|
930
|
+
// (Required)
|
|
931
|
+
// Provide an id of the request to reattach to.
|
|
932
|
+
// This must be an id of existing operation.
|
|
933
|
+
string operation_id = 3;
|
|
934
|
+
|
|
935
|
+
// Provides optional information about the client sending the request. This field
|
|
936
|
+
// can be used for language or version specific information and is only intended for
|
|
937
|
+
// logging purposes and will not be interpreted by the server.
|
|
938
|
+
optional string client_type = 4;
|
|
939
|
+
|
|
940
|
+
// Release and close operation completely.
|
|
941
|
+
// This will also interrupt the query if it is running execution, and wait for it to be torn down.
|
|
942
|
+
message ReleaseAll {}
|
|
943
|
+
|
|
944
|
+
// Release all responses from the operation response stream up to and including
|
|
945
|
+
// the response with the given by response_id.
|
|
946
|
+
// While server determines by itself how much of a buffer of responses to keep, client providing
|
|
947
|
+
// explicit release calls will help reduce resource consumption.
|
|
948
|
+
// Noop if response_id not found in cached responses.
|
|
949
|
+
message ReleaseUntil {
|
|
950
|
+
string response_id = 1;
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
oneof release {
|
|
954
|
+
ReleaseAll release_all = 5;
|
|
955
|
+
ReleaseUntil release_until = 6;
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
// Next ID: 4
|
|
960
|
+
message ReleaseExecuteResponse {
|
|
961
|
+
// Session id in which the release was running.
|
|
962
|
+
string session_id = 1;
|
|
963
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
964
|
+
// session has not changed.
|
|
965
|
+
string server_side_session_id = 3;
|
|
966
|
+
|
|
967
|
+
// Operation id of the operation on which the release executed.
|
|
968
|
+
// If the operation couldn't be found (because e.g. it was concurrently released), will be unset.
|
|
969
|
+
// Otherwise, it will be equal to the operation_id from request.
|
|
970
|
+
optional string operation_id = 2;
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
message ReleaseSessionRequest {
|
|
974
|
+
// (Required)
|
|
975
|
+
//
|
|
976
|
+
// The session_id of the request to reattach to.
|
|
977
|
+
// This must be an id of existing session.
|
|
978
|
+
string session_id = 1;
|
|
979
|
+
|
|
980
|
+
// (Required) User context
|
|
981
|
+
//
|
|
982
|
+
// user_context.user_id and session+id both identify a unique remote spark session on the
|
|
983
|
+
// server side.
|
|
984
|
+
UserContext user_context = 2;
|
|
985
|
+
|
|
986
|
+
// Provides optional information about the client sending the request. This field
|
|
987
|
+
// can be used for language or version specific information and is only intended for
|
|
988
|
+
// logging purposes and will not be interpreted by the server.
|
|
989
|
+
optional string client_type = 3;
|
|
990
|
+
|
|
991
|
+
// Signals the server to allow the client to reconnect to the session after it is released.
|
|
992
|
+
//
|
|
993
|
+
// By default, the server tombstones the session upon release, preventing reconnections and
|
|
994
|
+
// fully cleaning the session state.
|
|
995
|
+
//
|
|
996
|
+
// If this flag is set to true, the server may permit the client to reconnect to the session
|
|
997
|
+
// post-release, even if the session state has been cleaned. This can result in missing state,
|
|
998
|
+
// such as Temporary Views, Temporary UDFs, or the Current Catalog, in the reconnected session.
|
|
999
|
+
//
|
|
1000
|
+
// Use this option sparingly and only when the client fully understands the implications of
|
|
1001
|
+
// reconnecting to a released session. The client must ensure that any queries executed do not
|
|
1002
|
+
// rely on the session state prior to its release.
|
|
1003
|
+
bool allow_reconnect = 4;
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
// Next ID: 3
|
|
1007
|
+
message ReleaseSessionResponse {
|
|
1008
|
+
// Session id of the session on which the release executed.
|
|
1009
|
+
string session_id = 1;
|
|
1010
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
1011
|
+
// session has not changed.
|
|
1012
|
+
string server_side_session_id = 2;
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
message FetchErrorDetailsRequest {
|
|
1016
|
+
|
|
1017
|
+
// (Required)
|
|
1018
|
+
// The session_id specifies a Spark session for a user identified by user_context.user_id.
|
|
1019
|
+
// The id should be a UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`.
|
|
1020
|
+
string session_id = 1;
|
|
1021
|
+
|
|
1022
|
+
// (Optional)
|
|
1023
|
+
//
|
|
1024
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
1025
|
+
// can use this to validate that the server side session has not changed.
|
|
1026
|
+
optional string client_observed_server_side_session_id = 5;
|
|
1027
|
+
|
|
1028
|
+
// User context
|
|
1029
|
+
UserContext user_context = 2;
|
|
1030
|
+
|
|
1031
|
+
// (Required)
|
|
1032
|
+
// The id of the error.
|
|
1033
|
+
string error_id = 3;
|
|
1034
|
+
|
|
1035
|
+
// Provides optional information about the client sending the request. This field
|
|
1036
|
+
// can be used for language or version specific information and is only intended for
|
|
1037
|
+
// logging purposes and will not be interpreted by the server.
|
|
1038
|
+
optional string client_type = 4;
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
// Next ID: 5
|
|
1042
|
+
message FetchErrorDetailsResponse {
|
|
1043
|
+
|
|
1044
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
1045
|
+
// session has not changed.
|
|
1046
|
+
string server_side_session_id = 3;
|
|
1047
|
+
|
|
1048
|
+
string session_id = 4;
|
|
1049
|
+
|
|
1050
|
+
// The index of the root error in errors. The field will not be set if the error is not found.
|
|
1051
|
+
optional int32 root_error_idx = 1;
|
|
1052
|
+
|
|
1053
|
+
// A list of errors.
|
|
1054
|
+
repeated Error errors = 2;
|
|
1055
|
+
|
|
1056
|
+
message StackTraceElement {
|
|
1057
|
+
// The fully qualified name of the class containing the execution point.
|
|
1058
|
+
string declaring_class = 1;
|
|
1059
|
+
|
|
1060
|
+
// The name of the method containing the execution point.
|
|
1061
|
+
string method_name = 2;
|
|
1062
|
+
|
|
1063
|
+
// The name of the file containing the execution point.
|
|
1064
|
+
optional string file_name = 3;
|
|
1065
|
+
|
|
1066
|
+
// The line number of the source line containing the execution point.
|
|
1067
|
+
int32 line_number = 4;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
// QueryContext defines the schema for the query context of a SparkThrowable.
|
|
1071
|
+
// It helps users understand where the error occurs while executing queries.
|
|
1072
|
+
message QueryContext {
|
|
1073
|
+
// The type of this query context.
|
|
1074
|
+
enum ContextType {
|
|
1075
|
+
SQL = 0;
|
|
1076
|
+
DATAFRAME = 1;
|
|
1077
|
+
}
|
|
1078
|
+
ContextType context_type = 10;
|
|
1079
|
+
|
|
1080
|
+
// The object type of the query which throws the exception.
|
|
1081
|
+
// If the exception is directly from the main query, it should be an empty string.
|
|
1082
|
+
// Otherwise, it should be the exact object type in upper case. For example, a "VIEW".
|
|
1083
|
+
string object_type = 1;
|
|
1084
|
+
|
|
1085
|
+
// The object name of the query which throws the exception.
|
|
1086
|
+
// If the exception is directly from the main query, it should be an empty string.
|
|
1087
|
+
// Otherwise, it should be the object name. For example, a view name "V1".
|
|
1088
|
+
string object_name = 2;
|
|
1089
|
+
|
|
1090
|
+
// The starting index in the query text which throws the exception. The index starts from 0.
|
|
1091
|
+
int32 start_index = 3;
|
|
1092
|
+
|
|
1093
|
+
// The stopping index in the query which throws the exception. The index starts from 0.
|
|
1094
|
+
int32 stop_index = 4;
|
|
1095
|
+
|
|
1096
|
+
// The corresponding fragment of the query which throws the exception.
|
|
1097
|
+
string fragment = 5;
|
|
1098
|
+
|
|
1099
|
+
// The user code (call site of the API) that caused throwing the exception.
|
|
1100
|
+
string call_site = 6;
|
|
1101
|
+
|
|
1102
|
+
// Summary of the exception cause.
|
|
1103
|
+
string summary = 7;
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
// SparkThrowable defines the schema for SparkThrowable exceptions.
|
|
1107
|
+
message SparkThrowable {
|
|
1108
|
+
// Succinct, human-readable, unique, and consistent representation of the error category.
|
|
1109
|
+
optional string error_class = 1;
|
|
1110
|
+
|
|
1111
|
+
// The message parameters for the error framework.
|
|
1112
|
+
map<string, string> message_parameters = 2;
|
|
1113
|
+
|
|
1114
|
+
// The query context of a SparkThrowable.
|
|
1115
|
+
repeated QueryContext query_contexts = 3;
|
|
1116
|
+
|
|
1117
|
+
// Portable error identifier across SQL engines
|
|
1118
|
+
// If null, error class or SQLSTATE is not set.
|
|
1119
|
+
optional string sql_state = 4;
|
|
1120
|
+
|
|
1121
|
+
// Additional information if the error was caused by a breaking change.
|
|
1122
|
+
optional BreakingChangeInfo breaking_change_info = 5;
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
// BreakingChangeInfo defines the schema for breaking change information.
|
|
1126
|
+
message BreakingChangeInfo {
|
|
1127
|
+
// A message explaining how the user can migrate their job to work
|
|
1128
|
+
// with the breaking change.
|
|
1129
|
+
repeated string migration_message = 1;
|
|
1130
|
+
|
|
1131
|
+
// A spark config flag that can be used to mitigate the breaking change.
|
|
1132
|
+
optional MitigationConfig mitigation_config = 2;
|
|
1133
|
+
|
|
1134
|
+
// If true, the breaking change should be inspected manually.
|
|
1135
|
+
// If false, the spark job should be retried by setting the mitigationConfig.
|
|
1136
|
+
optional bool needs_audit = 3;
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
// MitigationConfig defines a spark config flag that can be used to mitigate a breaking change.
|
|
1140
|
+
message MitigationConfig {
|
|
1141
|
+
// The spark config key.
|
|
1142
|
+
string key = 1;
|
|
1143
|
+
|
|
1144
|
+
// The spark config value that mitigates the breaking change.
|
|
1145
|
+
string value = 2;
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
// Error defines the schema for the representing exception.
|
|
1149
|
+
message Error {
|
|
1150
|
+
// The fully qualified names of the exception class and its parent classes.
|
|
1151
|
+
repeated string error_type_hierarchy = 1;
|
|
1152
|
+
|
|
1153
|
+
// The detailed message of the exception.
|
|
1154
|
+
string message = 2;
|
|
1155
|
+
|
|
1156
|
+
// The stackTrace of the exception. It will be set
|
|
1157
|
+
// if the SQLConf spark.sql.connect.serverStacktrace.enabled is true.
|
|
1158
|
+
repeated StackTraceElement stack_trace = 3;
|
|
1159
|
+
|
|
1160
|
+
// The index of the cause error in errors.
|
|
1161
|
+
optional int32 cause_idx = 4;
|
|
1162
|
+
|
|
1163
|
+
// The structured data of a SparkThrowable exception.
|
|
1164
|
+
optional SparkThrowable spark_throwable = 5;
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
message CheckpointCommandResult {
|
|
1169
|
+
// (Required) The logical plan checkpointed.
|
|
1170
|
+
CachedRemoteRelation relation = 1;
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
message CloneSessionRequest {
|
|
1174
|
+
// (Required)
|
|
1175
|
+
//
|
|
1176
|
+
// The session_id specifies a spark session for a user id (which is specified
|
|
1177
|
+
// by user_context.user_id). The session_id is set by the client to be able to
|
|
1178
|
+
// collate streaming responses from different queries within the dedicated session.
|
|
1179
|
+
// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
1180
|
+
string session_id = 1;
|
|
1181
|
+
|
|
1182
|
+
// (Optional)
|
|
1183
|
+
//
|
|
1184
|
+
// Server-side generated idempotency key from the previous responses (if any). Server
|
|
1185
|
+
// can use this to validate that the server side session has not changed.
|
|
1186
|
+
optional string client_observed_server_side_session_id = 5;
|
|
1187
|
+
|
|
1188
|
+
// (Required) User context
|
|
1189
|
+
//
|
|
1190
|
+
// user_context.user_id and session_id both identify a unique remote spark session on the
|
|
1191
|
+
// server side.
|
|
1192
|
+
UserContext user_context = 2;
|
|
1193
|
+
|
|
1194
|
+
// Provides optional information about the client sending the request. This field
|
|
1195
|
+
// can be used for language or version specific information and is only intended for
|
|
1196
|
+
// logging purposes and will not be interpreted by the server.
|
|
1197
|
+
optional string client_type = 3;
|
|
1198
|
+
|
|
1199
|
+
// (Optional)
|
|
1200
|
+
// The session_id for the new cloned session. If not provided, a new UUID will be generated.
|
|
1201
|
+
// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
|
|
1202
|
+
optional string new_session_id = 4;
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
// Next ID: 5
|
|
1206
|
+
message CloneSessionResponse {
|
|
1207
|
+
// Session id of the original session that was cloned.
|
|
1208
|
+
string session_id = 1;
|
|
1209
|
+
|
|
1210
|
+
// Server-side generated idempotency key that the client can use to assert that the server side
|
|
1211
|
+
// session (parent of the cloned session) has not changed.
|
|
1212
|
+
string server_side_session_id = 2;
|
|
1213
|
+
|
|
1214
|
+
// Session id of the new cloned session.
|
|
1215
|
+
string new_session_id = 3;
|
|
1216
|
+
|
|
1217
|
+
// Server-side session ID of the new cloned session.
|
|
1218
|
+
string new_server_side_session_id = 4;
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
// Main interface for the SparkConnect service.
|
|
1222
|
+
service SparkConnectService {
|
|
1223
|
+
|
|
1224
|
+
// Executes a request that contains the query and returns a stream of [[Response]].
|
|
1225
|
+
//
|
|
1226
|
+
// It is guaranteed that there is at least one ARROW batch returned even if the result set is empty.
|
|
1227
|
+
rpc ExecutePlan(ExecutePlanRequest) returns (stream ExecutePlanResponse) {}
|
|
1228
|
+
|
|
1229
|
+
// Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query.
|
|
1230
|
+
rpc AnalyzePlan(AnalyzePlanRequest) returns (AnalyzePlanResponse) {}
|
|
1231
|
+
|
|
1232
|
+
// Update or fetch the configurations and returns a [[ConfigResponse]] containing the result.
|
|
1233
|
+
rpc Config(ConfigRequest) returns (ConfigResponse) {}
|
|
1234
|
+
|
|
1235
|
+
// Add artifacts to the session and returns a [[AddArtifactsResponse]] containing metadata about
|
|
1236
|
+
// the added artifacts.
|
|
1237
|
+
rpc AddArtifacts(stream AddArtifactsRequest) returns (AddArtifactsResponse) {}
|
|
1238
|
+
|
|
1239
|
+
// Check statuses of artifacts in the session and returns them in a [[ArtifactStatusesResponse]]
|
|
1240
|
+
rpc ArtifactStatus(ArtifactStatusesRequest) returns (ArtifactStatusesResponse) {}
|
|
1241
|
+
|
|
1242
|
+
// Interrupts running executions
|
|
1243
|
+
rpc Interrupt(InterruptRequest) returns (InterruptResponse) {}
|
|
1244
|
+
|
|
1245
|
+
// Reattach to an existing reattachable execution.
|
|
1246
|
+
// The ExecutePlan must have been started with ReattachOptions.reattachable=true.
|
|
1247
|
+
// If the ExecutePlanResponse stream ends without a ResultComplete message, there is more to
|
|
1248
|
+
// continue. If there is a ResultComplete, the client should use ReleaseExecute with
|
|
1249
|
+
rpc ReattachExecute(ReattachExecuteRequest) returns (stream ExecutePlanResponse) {}
|
|
1250
|
+
|
|
1251
|
+
// Release an reattachable execution, or parts thereof.
|
|
1252
|
+
// The ExecutePlan must have been started with ReattachOptions.reattachable=true.
|
|
1253
|
+
// Non reattachable executions are released automatically and immediately after the ExecutePlan
|
|
1254
|
+
// RPC and ReleaseExecute may not be used.
|
|
1255
|
+
rpc ReleaseExecute(ReleaseExecuteRequest) returns (ReleaseExecuteResponse) {}
|
|
1256
|
+
|
|
1257
|
+
// Release a session.
|
|
1258
|
+
// All the executions in the session will be released. Any further requests for the session with
|
|
1259
|
+
// that session_id for the given user_id will fail. If the session didn't exist or was already
|
|
1260
|
+
// released, this is a noop.
|
|
1261
|
+
rpc ReleaseSession(ReleaseSessionRequest) returns (ReleaseSessionResponse) {}
|
|
1262
|
+
|
|
1263
|
+
// FetchErrorDetails retrieves the matched exception with details based on a provided error id.
|
|
1264
|
+
rpc FetchErrorDetails(FetchErrorDetailsRequest) returns (FetchErrorDetailsResponse) {}
|
|
1265
|
+
|
|
1266
|
+
// Create a clone of a Spark Connect session on the server side. The server-side session
|
|
1267
|
+
// is cloned with all its current state (SQL configurations, temporary views, registered
|
|
1268
|
+
// functions, catalog state) copied over to a new independent session. The cloned session
|
|
1269
|
+
// is isolated from the source session - any subsequent changes to either session's
|
|
1270
|
+
// server-side state will not be reflected in the other.
|
|
1271
|
+
//
|
|
1272
|
+
// The request can optionally specify a custom session ID for the cloned session (must be
|
|
1273
|
+
// a valid UUID). If not provided, a new UUID will be generated automatically.
|
|
1274
|
+
rpc CloneSession(CloneSessionRequest) returns (CloneSessionResponse) {}
|
|
1275
|
+
}
|