spark-connect 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +82 -0
- data/LICENSE +202 -0
- data/NOTICE +16 -0
- data/README.md +166 -0
- data/lib/spark-connect.rb +5 -0
- data/lib/spark_connect/arrow.rb +115 -0
- data/lib/spark_connect/catalog.rb +190 -0
- data/lib/spark_connect/channel_builder.rb +134 -0
- data/lib/spark_connect/client.rb +264 -0
- data/lib/spark_connect/column.rb +379 -0
- data/lib/spark_connect/conf.rb +79 -0
- data/lib/spark_connect/data_frame.rb +828 -0
- data/lib/spark_connect/errors.rb +58 -0
- data/lib/spark_connect/functions.rb +903 -0
- data/lib/spark_connect/grouped_data.rb +101 -0
- data/lib/spark_connect/na_functions.rb +98 -0
- data/lib/spark_connect/observation.rb +61 -0
- data/lib/spark_connect/pipelines.rb +221 -0
- data/lib/spark_connect/plan.rb +39 -0
- data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
- data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
- data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
- data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
- data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
- data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
- data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
- data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
- data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
- data/lib/spark_connect/proto.rb +32 -0
- data/lib/spark_connect/reader.rb +98 -0
- data/lib/spark_connect/row.rb +105 -0
- data/lib/spark_connect/session.rb +317 -0
- data/lib/spark_connect/stat_functions.rb +109 -0
- data/lib/spark_connect/streaming.rb +351 -0
- data/lib/spark_connect/types.rb +490 -0
- data/lib/spark_connect/version.rb +11 -0
- data/lib/spark_connect/window.rb +119 -0
- data/lib/spark_connect/writer.rb +208 -0
- data/lib/spark_connect.rb +58 -0
- data/proto/spark/connect/base.proto +1275 -0
- data/proto/spark/connect/catalog.proto +243 -0
- data/proto/spark/connect/commands.proto +553 -0
- data/proto/spark/connect/common.proto +179 -0
- data/proto/spark/connect/expressions.proto +557 -0
- data/proto/spark/connect/ml.proto +147 -0
- data/proto/spark/connect/ml_common.proto +64 -0
- data/proto/spark/connect/pipelines.proto +307 -0
- data/proto/spark/connect/relations.proto +1252 -0
- data/proto/spark/connect/types.proto +227 -0
- metadata +149 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
3
|
+
* contributor license agreements. See the NOTICE file distributed with
|
|
4
|
+
* this work for additional information regarding copyright ownership.
|
|
5
|
+
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
6
|
+
* (the "License"); you may not use this file except in compliance with
|
|
7
|
+
* the License. You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
syntax = 'proto3';
|
|
19
|
+
|
|
20
|
+
package spark.connect;
|
|
21
|
+
|
|
22
|
+
option java_multiple_files = true;
|
|
23
|
+
option java_package = "org.apache.spark.connect.proto";
|
|
24
|
+
option go_package = "internal/generated";
|
|
25
|
+
|
|
26
|
+
// This message describes the logical [[DataType]] of something. It does not carry the value
|
|
27
|
+
// itself but only describes it.
|
|
28
|
+
message DataType {
|
|
29
|
+
oneof kind {
|
|
30
|
+
NULL null = 1;
|
|
31
|
+
|
|
32
|
+
Binary binary = 2;
|
|
33
|
+
|
|
34
|
+
Boolean boolean = 3;
|
|
35
|
+
|
|
36
|
+
// Numeric types
|
|
37
|
+
Byte byte = 4;
|
|
38
|
+
Short short = 5;
|
|
39
|
+
Integer integer = 6;
|
|
40
|
+
Long long = 7;
|
|
41
|
+
|
|
42
|
+
Float float = 8;
|
|
43
|
+
Double double = 9;
|
|
44
|
+
Decimal decimal = 10;
|
|
45
|
+
|
|
46
|
+
// String types
|
|
47
|
+
String string = 11;
|
|
48
|
+
Char char = 12;
|
|
49
|
+
VarChar var_char = 13;
|
|
50
|
+
|
|
51
|
+
// Datatime types
|
|
52
|
+
Date date = 14;
|
|
53
|
+
Timestamp timestamp = 15;
|
|
54
|
+
TimestampNTZ timestamp_ntz = 16;
|
|
55
|
+
|
|
56
|
+
// Interval types
|
|
57
|
+
CalendarInterval calendar_interval = 17;
|
|
58
|
+
YearMonthInterval year_month_interval = 18;
|
|
59
|
+
DayTimeInterval day_time_interval = 19;
|
|
60
|
+
|
|
61
|
+
// Complex types
|
|
62
|
+
Array array = 20;
|
|
63
|
+
Struct struct = 21;
|
|
64
|
+
Map map = 22;
|
|
65
|
+
Variant variant = 25;
|
|
66
|
+
|
|
67
|
+
// UserDefinedType
|
|
68
|
+
UDT udt = 23;
|
|
69
|
+
|
|
70
|
+
// Geospatial types
|
|
71
|
+
Geometry geometry = 26;
|
|
72
|
+
|
|
73
|
+
Geography geography = 27;
|
|
74
|
+
|
|
75
|
+
// UnparsedDataType
|
|
76
|
+
Unparsed unparsed = 24;
|
|
77
|
+
|
|
78
|
+
Time time = 28;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
message Boolean {
|
|
82
|
+
uint32 type_variation_reference = 1;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
message Byte {
|
|
86
|
+
uint32 type_variation_reference = 1;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
message Short {
|
|
90
|
+
uint32 type_variation_reference = 1;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
message Integer {
|
|
94
|
+
uint32 type_variation_reference = 1;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
message Long {
|
|
98
|
+
uint32 type_variation_reference = 1;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
message Float {
|
|
102
|
+
uint32 type_variation_reference = 1;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
message Double {
|
|
106
|
+
uint32 type_variation_reference = 1;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
message String {
|
|
110
|
+
uint32 type_variation_reference = 1;
|
|
111
|
+
string collation = 2;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
message Binary {
|
|
115
|
+
uint32 type_variation_reference = 1;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
message NULL {
|
|
119
|
+
uint32 type_variation_reference = 1;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
message Timestamp {
|
|
123
|
+
uint32 type_variation_reference = 1;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
message Date {
|
|
127
|
+
uint32 type_variation_reference = 1;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
message TimestampNTZ {
|
|
131
|
+
uint32 type_variation_reference = 1;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
message Time {
|
|
135
|
+
optional int32 precision = 1;
|
|
136
|
+
uint32 type_variation_reference = 2;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
message CalendarInterval {
|
|
140
|
+
uint32 type_variation_reference = 1;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
message YearMonthInterval {
|
|
144
|
+
optional int32 start_field = 1;
|
|
145
|
+
optional int32 end_field = 2;
|
|
146
|
+
uint32 type_variation_reference = 3;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
message DayTimeInterval {
|
|
150
|
+
optional int32 start_field = 1;
|
|
151
|
+
optional int32 end_field = 2;
|
|
152
|
+
uint32 type_variation_reference = 3;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Start compound types.
|
|
156
|
+
message Char {
|
|
157
|
+
int32 length = 1;
|
|
158
|
+
uint32 type_variation_reference = 2;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
message VarChar {
|
|
162
|
+
int32 length = 1;
|
|
163
|
+
uint32 type_variation_reference = 2;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
message Decimal {
|
|
167
|
+
optional int32 scale = 1;
|
|
168
|
+
optional int32 precision = 2;
|
|
169
|
+
uint32 type_variation_reference = 3;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
message StructField {
|
|
173
|
+
string name = 1;
|
|
174
|
+
DataType data_type = 2;
|
|
175
|
+
bool nullable = 3;
|
|
176
|
+
optional string metadata = 4;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
message Struct {
|
|
180
|
+
repeated StructField fields = 1;
|
|
181
|
+
uint32 type_variation_reference = 2;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
message Array {
|
|
185
|
+
DataType element_type = 1;
|
|
186
|
+
bool contains_null = 2;
|
|
187
|
+
uint32 type_variation_reference = 3;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
message Map {
|
|
191
|
+
DataType key_type = 1;
|
|
192
|
+
DataType value_type = 2;
|
|
193
|
+
bool value_contains_null = 3;
|
|
194
|
+
uint32 type_variation_reference = 4;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
message Geometry {
|
|
198
|
+
int32 srid = 1;
|
|
199
|
+
uint32 type_variation_reference = 2;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
message Geography {
|
|
203
|
+
int32 srid = 1;
|
|
204
|
+
uint32 type_variation_reference = 2;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
message Variant {
|
|
208
|
+
uint32 type_variation_reference = 1;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
message UDT {
|
|
212
|
+
string type = 1;
|
|
213
|
+
// Required for Scala/Java UDT
|
|
214
|
+
optional string jvm_class = 2;
|
|
215
|
+
// Required for Python UDT
|
|
216
|
+
optional string python_class = 3;
|
|
217
|
+
// Required for Python UDT
|
|
218
|
+
optional string serialized_python_class = 4;
|
|
219
|
+
// Required for Python UDT
|
|
220
|
+
optional DataType sql_type = 5;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
message Unparsed {
|
|
224
|
+
// (Required) The unparsed data type string
|
|
225
|
+
string data_type_string = 1;
|
|
226
|
+
}
|
|
227
|
+
}
|
metadata
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: spark-connect
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.2.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Hyukjin Kwon
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: google-protobuf
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '3.25'
|
|
19
|
+
- - "<"
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '5.0'
|
|
22
|
+
type: :runtime
|
|
23
|
+
prerelease: false
|
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
+
requirements:
|
|
26
|
+
- - ">="
|
|
27
|
+
- !ruby/object:Gem::Version
|
|
28
|
+
version: '3.25'
|
|
29
|
+
- - "<"
|
|
30
|
+
- !ruby/object:Gem::Version
|
|
31
|
+
version: '5.0'
|
|
32
|
+
- !ruby/object:Gem::Dependency
|
|
33
|
+
name: grpc
|
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
|
35
|
+
requirements:
|
|
36
|
+
- - "~>"
|
|
37
|
+
- !ruby/object:Gem::Version
|
|
38
|
+
version: '1.60'
|
|
39
|
+
type: :runtime
|
|
40
|
+
prerelease: false
|
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
42
|
+
requirements:
|
|
43
|
+
- - "~>"
|
|
44
|
+
- !ruby/object:Gem::Version
|
|
45
|
+
version: '1.60'
|
|
46
|
+
- !ruby/object:Gem::Dependency
|
|
47
|
+
name: red-arrow
|
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
|
49
|
+
requirements:
|
|
50
|
+
- - ">="
|
|
51
|
+
- !ruby/object:Gem::Version
|
|
52
|
+
version: '15.0'
|
|
53
|
+
type: :runtime
|
|
54
|
+
prerelease: false
|
|
55
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
56
|
+
requirements:
|
|
57
|
+
- - ">="
|
|
58
|
+
- !ruby/object:Gem::Version
|
|
59
|
+
version: '15.0'
|
|
60
|
+
description: |
|
|
61
|
+
spark-connect is a Ruby client for Apache Spark Connect, the gRPC-based
|
|
62
|
+
decoupled client-server protocol for Apache Spark. It provides a DataFrame
|
|
63
|
+
API closely modeled on PySpark, including SQL, relational operators,
|
|
64
|
+
column expressions, a comprehensive functions library, typed schemas, and
|
|
65
|
+
Apache Arrow-based result decoding.
|
|
66
|
+
email:
|
|
67
|
+
- gurwls223@apache.org
|
|
68
|
+
executables: []
|
|
69
|
+
extensions: []
|
|
70
|
+
extra_rdoc_files: []
|
|
71
|
+
files:
|
|
72
|
+
- CHANGELOG.md
|
|
73
|
+
- LICENSE
|
|
74
|
+
- NOTICE
|
|
75
|
+
- README.md
|
|
76
|
+
- lib/spark-connect.rb
|
|
77
|
+
- lib/spark_connect.rb
|
|
78
|
+
- lib/spark_connect/arrow.rb
|
|
79
|
+
- lib/spark_connect/catalog.rb
|
|
80
|
+
- lib/spark_connect/channel_builder.rb
|
|
81
|
+
- lib/spark_connect/client.rb
|
|
82
|
+
- lib/spark_connect/column.rb
|
|
83
|
+
- lib/spark_connect/conf.rb
|
|
84
|
+
- lib/spark_connect/data_frame.rb
|
|
85
|
+
- lib/spark_connect/errors.rb
|
|
86
|
+
- lib/spark_connect/functions.rb
|
|
87
|
+
- lib/spark_connect/grouped_data.rb
|
|
88
|
+
- lib/spark_connect/na_functions.rb
|
|
89
|
+
- lib/spark_connect/observation.rb
|
|
90
|
+
- lib/spark_connect/pipelines.rb
|
|
91
|
+
- lib/spark_connect/plan.rb
|
|
92
|
+
- lib/spark_connect/proto.rb
|
|
93
|
+
- lib/spark_connect/proto/spark/connect/base_pb.rb
|
|
94
|
+
- lib/spark_connect/proto/spark/connect/base_services_pb.rb
|
|
95
|
+
- lib/spark_connect/proto/spark/connect/catalog_pb.rb
|
|
96
|
+
- lib/spark_connect/proto/spark/connect/commands_pb.rb
|
|
97
|
+
- lib/spark_connect/proto/spark/connect/common_pb.rb
|
|
98
|
+
- lib/spark_connect/proto/spark/connect/expressions_pb.rb
|
|
99
|
+
- lib/spark_connect/proto/spark/connect/ml_common_pb.rb
|
|
100
|
+
- lib/spark_connect/proto/spark/connect/ml_pb.rb
|
|
101
|
+
- lib/spark_connect/proto/spark/connect/pipelines_pb.rb
|
|
102
|
+
- lib/spark_connect/proto/spark/connect/relations_pb.rb
|
|
103
|
+
- lib/spark_connect/proto/spark/connect/types_pb.rb
|
|
104
|
+
- lib/spark_connect/reader.rb
|
|
105
|
+
- lib/spark_connect/row.rb
|
|
106
|
+
- lib/spark_connect/session.rb
|
|
107
|
+
- lib/spark_connect/stat_functions.rb
|
|
108
|
+
- lib/spark_connect/streaming.rb
|
|
109
|
+
- lib/spark_connect/types.rb
|
|
110
|
+
- lib/spark_connect/version.rb
|
|
111
|
+
- lib/spark_connect/window.rb
|
|
112
|
+
- lib/spark_connect/writer.rb
|
|
113
|
+
- proto/spark/connect/base.proto
|
|
114
|
+
- proto/spark/connect/catalog.proto
|
|
115
|
+
- proto/spark/connect/commands.proto
|
|
116
|
+
- proto/spark/connect/common.proto
|
|
117
|
+
- proto/spark/connect/expressions.proto
|
|
118
|
+
- proto/spark/connect/ml.proto
|
|
119
|
+
- proto/spark/connect/ml_common.proto
|
|
120
|
+
- proto/spark/connect/pipelines.proto
|
|
121
|
+
- proto/spark/connect/relations.proto
|
|
122
|
+
- proto/spark/connect/types.proto
|
|
123
|
+
homepage: https://github.com/HyukjinKwon/spark-connect-ruby
|
|
124
|
+
licenses:
|
|
125
|
+
- Apache-2.0
|
|
126
|
+
metadata:
|
|
127
|
+
homepage_uri: https://github.com/HyukjinKwon/spark-connect-ruby
|
|
128
|
+
source_code_uri: https://github.com/HyukjinKwon/spark-connect-ruby
|
|
129
|
+
documentation_uri: https://hyukjinkwon.github.io/spark-connect-ruby/
|
|
130
|
+
bug_tracker_uri: https://github.com/HyukjinKwon/spark-connect-ruby/issues
|
|
131
|
+
changelog_uri: https://github.com/HyukjinKwon/spark-connect-ruby/blob/main/CHANGELOG.md
|
|
132
|
+
rdoc_options: []
|
|
133
|
+
require_paths:
|
|
134
|
+
- lib
|
|
135
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
136
|
+
requirements:
|
|
137
|
+
- - ">="
|
|
138
|
+
- !ruby/object:Gem::Version
|
|
139
|
+
version: 3.1.0
|
|
140
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
141
|
+
requirements:
|
|
142
|
+
- - ">="
|
|
143
|
+
- !ruby/object:Gem::Version
|
|
144
|
+
version: '0'
|
|
145
|
+
requirements: []
|
|
146
|
+
rubygems_version: 4.0.11
|
|
147
|
+
specification_version: 4
|
|
148
|
+
summary: A pure-Ruby client for Apache Spark Connect.
|
|
149
|
+
test_files: []
|