spark-connect 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +82 -0
  3. data/LICENSE +202 -0
  4. data/NOTICE +16 -0
  5. data/README.md +166 -0
  6. data/lib/spark-connect.rb +5 -0
  7. data/lib/spark_connect/arrow.rb +115 -0
  8. data/lib/spark_connect/catalog.rb +190 -0
  9. data/lib/spark_connect/channel_builder.rb +134 -0
  10. data/lib/spark_connect/client.rb +264 -0
  11. data/lib/spark_connect/column.rb +379 -0
  12. data/lib/spark_connect/conf.rb +79 -0
  13. data/lib/spark_connect/data_frame.rb +828 -0
  14. data/lib/spark_connect/errors.rb +58 -0
  15. data/lib/spark_connect/functions.rb +903 -0
  16. data/lib/spark_connect/grouped_data.rb +101 -0
  17. data/lib/spark_connect/na_functions.rb +98 -0
  18. data/lib/spark_connect/observation.rb +61 -0
  19. data/lib/spark_connect/pipelines.rb +221 -0
  20. data/lib/spark_connect/plan.rb +39 -0
  21. data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
  22. data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
  23. data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
  24. data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
  25. data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
  26. data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
  27. data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
  28. data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
  29. data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
  30. data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
  31. data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
  32. data/lib/spark_connect/proto.rb +32 -0
  33. data/lib/spark_connect/reader.rb +98 -0
  34. data/lib/spark_connect/row.rb +105 -0
  35. data/lib/spark_connect/session.rb +317 -0
  36. data/lib/spark_connect/stat_functions.rb +109 -0
  37. data/lib/spark_connect/streaming.rb +351 -0
  38. data/lib/spark_connect/types.rb +490 -0
  39. data/lib/spark_connect/version.rb +11 -0
  40. data/lib/spark_connect/window.rb +119 -0
  41. data/lib/spark_connect/writer.rb +208 -0
  42. data/lib/spark_connect.rb +58 -0
  43. data/proto/spark/connect/base.proto +1275 -0
  44. data/proto/spark/connect/catalog.proto +243 -0
  45. data/proto/spark/connect/commands.proto +553 -0
  46. data/proto/spark/connect/common.proto +179 -0
  47. data/proto/spark/connect/expressions.proto +557 -0
  48. data/proto/spark/connect/ml.proto +147 -0
  49. data/proto/spark/connect/ml_common.proto +64 -0
  50. data/proto/spark/connect/pipelines.proto +307 -0
  51. data/proto/spark/connect/relations.proto +1252 -0
  52. data/proto/spark/connect/types.proto +227 -0
  53. metadata +149 -0
@@ -0,0 +1,227 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one or more
3
+ * contributor license agreements. See the NOTICE file distributed with
4
+ * this work for additional information regarding copyright ownership.
5
+ * The ASF licenses this file to You under the Apache License, Version 2.0
6
+ * (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ syntax = 'proto3';
19
+
20
+ package spark.connect;
21
+
22
+ option java_multiple_files = true;
23
+ option java_package = "org.apache.spark.connect.proto";
24
+ option go_package = "internal/generated";
25
+
26
+ // This message describes the logical [[DataType]] of something. It does not carry the value
27
+ // itself but only describes it.
28
+ message DataType {
29
+ oneof kind {
30
+ NULL null = 1;
31
+
32
+ Binary binary = 2;
33
+
34
+ Boolean boolean = 3;
35
+
36
+ // Numeric types
37
+ Byte byte = 4;
38
+ Short short = 5;
39
+ Integer integer = 6;
40
+ Long long = 7;
41
+
42
+ Float float = 8;
43
+ Double double = 9;
44
+ Decimal decimal = 10;
45
+
46
+ // String types
47
+ String string = 11;
48
+ Char char = 12;
49
+ VarChar var_char = 13;
50
+
51
+ // Datatime types
52
+ Date date = 14;
53
+ Timestamp timestamp = 15;
54
+ TimestampNTZ timestamp_ntz = 16;
55
+
56
+ // Interval types
57
+ CalendarInterval calendar_interval = 17;
58
+ YearMonthInterval year_month_interval = 18;
59
+ DayTimeInterval day_time_interval = 19;
60
+
61
+ // Complex types
62
+ Array array = 20;
63
+ Struct struct = 21;
64
+ Map map = 22;
65
+ Variant variant = 25;
66
+
67
+ // UserDefinedType
68
+ UDT udt = 23;
69
+
70
+ // Geospatial types
71
+ Geometry geometry = 26;
72
+
73
+ Geography geography = 27;
74
+
75
+ // UnparsedDataType
76
+ Unparsed unparsed = 24;
77
+
78
+ Time time = 28;
79
+ }
80
+
81
+ message Boolean {
82
+ uint32 type_variation_reference = 1;
83
+ }
84
+
85
+ message Byte {
86
+ uint32 type_variation_reference = 1;
87
+ }
88
+
89
+ message Short {
90
+ uint32 type_variation_reference = 1;
91
+ }
92
+
93
+ message Integer {
94
+ uint32 type_variation_reference = 1;
95
+ }
96
+
97
+ message Long {
98
+ uint32 type_variation_reference = 1;
99
+ }
100
+
101
+ message Float {
102
+ uint32 type_variation_reference = 1;
103
+ }
104
+
105
+ message Double {
106
+ uint32 type_variation_reference = 1;
107
+ }
108
+
109
+ message String {
110
+ uint32 type_variation_reference = 1;
111
+ string collation = 2;
112
+ }
113
+
114
+ message Binary {
115
+ uint32 type_variation_reference = 1;
116
+ }
117
+
118
+ message NULL {
119
+ uint32 type_variation_reference = 1;
120
+ }
121
+
122
+ message Timestamp {
123
+ uint32 type_variation_reference = 1;
124
+ }
125
+
126
+ message Date {
127
+ uint32 type_variation_reference = 1;
128
+ }
129
+
130
+ message TimestampNTZ {
131
+ uint32 type_variation_reference = 1;
132
+ }
133
+
134
+ message Time {
135
+ optional int32 precision = 1;
136
+ uint32 type_variation_reference = 2;
137
+ }
138
+
139
+ message CalendarInterval {
140
+ uint32 type_variation_reference = 1;
141
+ }
142
+
143
+ message YearMonthInterval {
144
+ optional int32 start_field = 1;
145
+ optional int32 end_field = 2;
146
+ uint32 type_variation_reference = 3;
147
+ }
148
+
149
+ message DayTimeInterval {
150
+ optional int32 start_field = 1;
151
+ optional int32 end_field = 2;
152
+ uint32 type_variation_reference = 3;
153
+ }
154
+
155
+ // Start compound types.
156
+ message Char {
157
+ int32 length = 1;
158
+ uint32 type_variation_reference = 2;
159
+ }
160
+
161
+ message VarChar {
162
+ int32 length = 1;
163
+ uint32 type_variation_reference = 2;
164
+ }
165
+
166
+ message Decimal {
167
+ optional int32 scale = 1;
168
+ optional int32 precision = 2;
169
+ uint32 type_variation_reference = 3;
170
+ }
171
+
172
+ message StructField {
173
+ string name = 1;
174
+ DataType data_type = 2;
175
+ bool nullable = 3;
176
+ optional string metadata = 4;
177
+ }
178
+
179
+ message Struct {
180
+ repeated StructField fields = 1;
181
+ uint32 type_variation_reference = 2;
182
+ }
183
+
184
+ message Array {
185
+ DataType element_type = 1;
186
+ bool contains_null = 2;
187
+ uint32 type_variation_reference = 3;
188
+ }
189
+
190
+ message Map {
191
+ DataType key_type = 1;
192
+ DataType value_type = 2;
193
+ bool value_contains_null = 3;
194
+ uint32 type_variation_reference = 4;
195
+ }
196
+
197
+ message Geometry {
198
+ int32 srid = 1;
199
+ uint32 type_variation_reference = 2;
200
+ }
201
+
202
+ message Geography {
203
+ int32 srid = 1;
204
+ uint32 type_variation_reference = 2;
205
+ }
206
+
207
+ message Variant {
208
+ uint32 type_variation_reference = 1;
209
+ }
210
+
211
+ message UDT {
212
+ string type = 1;
213
+ // Required for Scala/Java UDT
214
+ optional string jvm_class = 2;
215
+ // Required for Python UDT
216
+ optional string python_class = 3;
217
+ // Required for Python UDT
218
+ optional string serialized_python_class = 4;
219
+ // Required for Python UDT
220
+ optional DataType sql_type = 5;
221
+ }
222
+
223
+ message Unparsed {
224
+ // (Required) The unparsed data type string
225
+ string data_type_string = 1;
226
+ }
227
+ }
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spark-connect
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Hyukjin Kwon
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: google-protobuf
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '3.25'
19
+ - - "<"
20
+ - !ruby/object:Gem::Version
21
+ version: '5.0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ version: '3.25'
29
+ - - "<"
30
+ - !ruby/object:Gem::Version
31
+ version: '5.0'
32
+ - !ruby/object:Gem::Dependency
33
+ name: grpc
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - "~>"
37
+ - !ruby/object:Gem::Version
38
+ version: '1.60'
39
+ type: :runtime
40
+ prerelease: false
41
+ version_requirements: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - "~>"
44
+ - !ruby/object:Gem::Version
45
+ version: '1.60'
46
+ - !ruby/object:Gem::Dependency
47
+ name: red-arrow
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '15.0'
53
+ type: :runtime
54
+ prerelease: false
55
+ version_requirements: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '15.0'
60
+ description: |
61
+ spark-connect is a Ruby client for Apache Spark Connect, the gRPC-based
62
+ decoupled client-server protocol for Apache Spark. It provides a DataFrame
63
+ API closely modeled on PySpark, including SQL, relational operators,
64
+ column expressions, a comprehensive functions library, typed schemas, and
65
+ Apache Arrow-based result decoding.
66
+ email:
67
+ - gurwls223@apache.org
68
+ executables: []
69
+ extensions: []
70
+ extra_rdoc_files: []
71
+ files:
72
+ - CHANGELOG.md
73
+ - LICENSE
74
+ - NOTICE
75
+ - README.md
76
+ - lib/spark-connect.rb
77
+ - lib/spark_connect.rb
78
+ - lib/spark_connect/arrow.rb
79
+ - lib/spark_connect/catalog.rb
80
+ - lib/spark_connect/channel_builder.rb
81
+ - lib/spark_connect/client.rb
82
+ - lib/spark_connect/column.rb
83
+ - lib/spark_connect/conf.rb
84
+ - lib/spark_connect/data_frame.rb
85
+ - lib/spark_connect/errors.rb
86
+ - lib/spark_connect/functions.rb
87
+ - lib/spark_connect/grouped_data.rb
88
+ - lib/spark_connect/na_functions.rb
89
+ - lib/spark_connect/observation.rb
90
+ - lib/spark_connect/pipelines.rb
91
+ - lib/spark_connect/plan.rb
92
+ - lib/spark_connect/proto.rb
93
+ - lib/spark_connect/proto/spark/connect/base_pb.rb
94
+ - lib/spark_connect/proto/spark/connect/base_services_pb.rb
95
+ - lib/spark_connect/proto/spark/connect/catalog_pb.rb
96
+ - lib/spark_connect/proto/spark/connect/commands_pb.rb
97
+ - lib/spark_connect/proto/spark/connect/common_pb.rb
98
+ - lib/spark_connect/proto/spark/connect/expressions_pb.rb
99
+ - lib/spark_connect/proto/spark/connect/ml_common_pb.rb
100
+ - lib/spark_connect/proto/spark/connect/ml_pb.rb
101
+ - lib/spark_connect/proto/spark/connect/pipelines_pb.rb
102
+ - lib/spark_connect/proto/spark/connect/relations_pb.rb
103
+ - lib/spark_connect/proto/spark/connect/types_pb.rb
104
+ - lib/spark_connect/reader.rb
105
+ - lib/spark_connect/row.rb
106
+ - lib/spark_connect/session.rb
107
+ - lib/spark_connect/stat_functions.rb
108
+ - lib/spark_connect/streaming.rb
109
+ - lib/spark_connect/types.rb
110
+ - lib/spark_connect/version.rb
111
+ - lib/spark_connect/window.rb
112
+ - lib/spark_connect/writer.rb
113
+ - proto/spark/connect/base.proto
114
+ - proto/spark/connect/catalog.proto
115
+ - proto/spark/connect/commands.proto
116
+ - proto/spark/connect/common.proto
117
+ - proto/spark/connect/expressions.proto
118
+ - proto/spark/connect/ml.proto
119
+ - proto/spark/connect/ml_common.proto
120
+ - proto/spark/connect/pipelines.proto
121
+ - proto/spark/connect/relations.proto
122
+ - proto/spark/connect/types.proto
123
+ homepage: https://github.com/HyukjinKwon/spark-connect-ruby
124
+ licenses:
125
+ - Apache-2.0
126
+ metadata:
127
+ homepage_uri: https://github.com/HyukjinKwon/spark-connect-ruby
128
+ source_code_uri: https://github.com/HyukjinKwon/spark-connect-ruby
129
+ documentation_uri: https://hyukjinkwon.github.io/spark-connect-ruby/
130
+ bug_tracker_uri: https://github.com/HyukjinKwon/spark-connect-ruby/issues
131
+ changelog_uri: https://github.com/HyukjinKwon/spark-connect-ruby/blob/main/CHANGELOG.md
132
+ rdoc_options: []
133
+ require_paths:
134
+ - lib
135
+ required_ruby_version: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - ">="
138
+ - !ruby/object:Gem::Version
139
+ version: 3.1.0
140
+ required_rubygems_version: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ requirements: []
146
+ rubygems_version: 4.0.11
147
+ specification_version: 4
148
+ summary: A pure-Ruby client for Apache Spark Connect.
149
+ test_files: []