spark-connect 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +82 -0
- data/LICENSE +202 -0
- data/NOTICE +16 -0
- data/README.md +166 -0
- data/lib/spark-connect.rb +5 -0
- data/lib/spark_connect/arrow.rb +115 -0
- data/lib/spark_connect/catalog.rb +190 -0
- data/lib/spark_connect/channel_builder.rb +134 -0
- data/lib/spark_connect/client.rb +264 -0
- data/lib/spark_connect/column.rb +379 -0
- data/lib/spark_connect/conf.rb +79 -0
- data/lib/spark_connect/data_frame.rb +828 -0
- data/lib/spark_connect/errors.rb +58 -0
- data/lib/spark_connect/functions.rb +903 -0
- data/lib/spark_connect/grouped_data.rb +101 -0
- data/lib/spark_connect/na_functions.rb +98 -0
- data/lib/spark_connect/observation.rb +61 -0
- data/lib/spark_connect/pipelines.rb +221 -0
- data/lib/spark_connect/plan.rb +39 -0
- data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
- data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
- data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
- data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
- data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
- data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
- data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
- data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
- data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
- data/lib/spark_connect/proto.rb +32 -0
- data/lib/spark_connect/reader.rb +98 -0
- data/lib/spark_connect/row.rb +105 -0
- data/lib/spark_connect/session.rb +317 -0
- data/lib/spark_connect/stat_functions.rb +109 -0
- data/lib/spark_connect/streaming.rb +351 -0
- data/lib/spark_connect/types.rb +490 -0
- data/lib/spark_connect/version.rb +11 -0
- data/lib/spark_connect/window.rb +119 -0
- data/lib/spark_connect/writer.rb +208 -0
- data/lib/spark_connect.rb +58 -0
- data/proto/spark/connect/base.proto +1275 -0
- data/proto/spark/connect/catalog.proto +243 -0
- data/proto/spark/connect/commands.proto +553 -0
- data/proto/spark/connect/common.proto +179 -0
- data/proto/spark/connect/expressions.proto +557 -0
- data/proto/spark/connect/ml.proto +147 -0
- data/proto/spark/connect/ml_common.proto +64 -0
- data/proto/spark/connect/pipelines.proto +307 -0
- data/proto/spark/connect/relations.proto +1252 -0
- data/proto/spark/connect/types.proto +227 -0
- metadata +149 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
3
|
+
* contributor license agreements. See the NOTICE file distributed with
|
|
4
|
+
* this work for additional information regarding copyright ownership.
|
|
5
|
+
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
6
|
+
* (the "License"); you may not use this file except in compliance with
|
|
7
|
+
* the License. You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
syntax = 'proto3';
|
|
19
|
+
|
|
20
|
+
package spark.connect;
|
|
21
|
+
|
|
22
|
+
option java_multiple_files = true;
|
|
23
|
+
option java_package = "org.apache.spark.connect.proto";
|
|
24
|
+
option go_package = "internal/generated";
|
|
25
|
+
|
|
26
|
+
// StorageLevel for persisting Datasets/Tables.
|
|
27
|
+
message StorageLevel {
|
|
28
|
+
// (Required) Whether the cache should use disk or not.
|
|
29
|
+
bool use_disk = 1;
|
|
30
|
+
// (Required) Whether the cache should use memory or not.
|
|
31
|
+
bool use_memory = 2;
|
|
32
|
+
// (Required) Whether the cache should use off-heap or not.
|
|
33
|
+
bool use_off_heap = 3;
|
|
34
|
+
// (Required) Whether the cached data is deserialized or not.
|
|
35
|
+
bool deserialized = 4;
|
|
36
|
+
// (Required) The number of replicas.
|
|
37
|
+
int32 replication = 5;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
// ResourceInformation to hold information about a type of Resource.
|
|
42
|
+
// The corresponding class is 'org.apache.spark.resource.ResourceInformation'
|
|
43
|
+
message ResourceInformation {
|
|
44
|
+
// (Required) The name of the resource
|
|
45
|
+
string name = 1;
|
|
46
|
+
// (Required) An array of strings describing the addresses of the resource.
|
|
47
|
+
repeated string addresses = 2;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// An executor resource request.
|
|
51
|
+
message ExecutorResourceRequest {
|
|
52
|
+
// (Required) resource name.
|
|
53
|
+
string resource_name = 1;
|
|
54
|
+
|
|
55
|
+
// (Required) resource amount requesting.
|
|
56
|
+
int64 amount = 2;
|
|
57
|
+
|
|
58
|
+
// Optional script used to discover the resources.
|
|
59
|
+
optional string discovery_script = 3;
|
|
60
|
+
|
|
61
|
+
// Optional vendor, required for some cluster managers.
|
|
62
|
+
optional string vendor = 4;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// A task resource request.
|
|
66
|
+
message TaskResourceRequest {
|
|
67
|
+
// (Required) resource name.
|
|
68
|
+
string resource_name = 1;
|
|
69
|
+
|
|
70
|
+
// (Required) resource amount requesting as a double to support fractional
|
|
71
|
+
// resource requests.
|
|
72
|
+
double amount = 2;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
message ResourceProfile {
|
|
76
|
+
// (Optional) Resource requests for executors. Mapped from the resource name
|
|
77
|
+
// (e.g., cores, memory, CPU) to its specific request.
|
|
78
|
+
map<string, ExecutorResourceRequest> executor_resources = 1;
|
|
79
|
+
|
|
80
|
+
// (Optional) Resource requests for tasks. Mapped from the resource name
|
|
81
|
+
// (e.g., cores, memory, CPU) to its specific request.
|
|
82
|
+
map<string, TaskResourceRequest> task_resources = 2;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
message Origin {
|
|
86
|
+
// (Required) Indicate the origin type.
|
|
87
|
+
oneof function {
|
|
88
|
+
PythonOrigin python_origin = 1;
|
|
89
|
+
JvmOrigin jvm_origin = 2;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
message PythonOrigin {
|
|
94
|
+
// (Required) Name of the origin, for example, the name of the function
|
|
95
|
+
string fragment = 1;
|
|
96
|
+
|
|
97
|
+
// (Required) Callsite to show to end users, for example, stacktrace.
|
|
98
|
+
string call_site = 2;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
message JvmOrigin {
|
|
102
|
+
// (Optional) Line number in the source file.
|
|
103
|
+
optional int32 line = 1;
|
|
104
|
+
|
|
105
|
+
// (Optional) Start position in the source file.
|
|
106
|
+
optional int32 start_position = 2;
|
|
107
|
+
|
|
108
|
+
// (Optional) Start index in the source file.
|
|
109
|
+
optional int32 start_index = 3;
|
|
110
|
+
|
|
111
|
+
// (Optional) Stop index in the source file.
|
|
112
|
+
optional int32 stop_index = 4;
|
|
113
|
+
|
|
114
|
+
// (Optional) SQL text.
|
|
115
|
+
optional string sql_text = 5;
|
|
116
|
+
|
|
117
|
+
// (Optional) Object type.
|
|
118
|
+
optional string object_type = 6;
|
|
119
|
+
|
|
120
|
+
// (Optional) Object name.
|
|
121
|
+
optional string object_name = 7;
|
|
122
|
+
|
|
123
|
+
// (Optional) Stack trace.
|
|
124
|
+
repeated StackTraceElement stack_trace = 8;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// A message to hold a [[java.lang.StackTraceElement]].
|
|
128
|
+
message StackTraceElement {
|
|
129
|
+
// (Optional) Class loader name
|
|
130
|
+
optional string class_loader_name = 1;
|
|
131
|
+
|
|
132
|
+
// (Optional) Module name
|
|
133
|
+
optional string module_name = 2;
|
|
134
|
+
|
|
135
|
+
// (Optional) Module version
|
|
136
|
+
optional string module_version = 3;
|
|
137
|
+
|
|
138
|
+
// (Required) Declaring class
|
|
139
|
+
string declaring_class = 4;
|
|
140
|
+
|
|
141
|
+
// (Required) Method name
|
|
142
|
+
string method_name = 5;
|
|
143
|
+
|
|
144
|
+
// (Optional) File name
|
|
145
|
+
optional string file_name = 6;
|
|
146
|
+
|
|
147
|
+
// (Required) Line number
|
|
148
|
+
int32 line_number = 7;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
message ResolvedIdentifier {
|
|
152
|
+
string catalog_name = 1;
|
|
153
|
+
repeated string namespace = 2;
|
|
154
|
+
string table_name = 3;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
message Bools {
|
|
158
|
+
repeated bool values = 1;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
message Ints {
|
|
162
|
+
repeated int32 values = 1;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
message Longs {
|
|
166
|
+
repeated int64 values = 1;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
message Floats {
|
|
170
|
+
repeated float values = 1;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
message Doubles {
|
|
174
|
+
repeated double values = 1;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
message Strings {
|
|
178
|
+
repeated string values = 1;
|
|
179
|
+
}
|