spark-connect 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +82 -0
  3. data/LICENSE +202 -0
  4. data/NOTICE +16 -0
  5. data/README.md +166 -0
  6. data/lib/spark-connect.rb +5 -0
  7. data/lib/spark_connect/arrow.rb +115 -0
  8. data/lib/spark_connect/catalog.rb +190 -0
  9. data/lib/spark_connect/channel_builder.rb +134 -0
  10. data/lib/spark_connect/client.rb +264 -0
  11. data/lib/spark_connect/column.rb +379 -0
  12. data/lib/spark_connect/conf.rb +79 -0
  13. data/lib/spark_connect/data_frame.rb +828 -0
  14. data/lib/spark_connect/errors.rb +58 -0
  15. data/lib/spark_connect/functions.rb +903 -0
  16. data/lib/spark_connect/grouped_data.rb +101 -0
  17. data/lib/spark_connect/na_functions.rb +98 -0
  18. data/lib/spark_connect/observation.rb +61 -0
  19. data/lib/spark_connect/pipelines.rb +221 -0
  20. data/lib/spark_connect/plan.rb +39 -0
  21. data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
  22. data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
  23. data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
  24. data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
  25. data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
  26. data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
  27. data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
  28. data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
  29. data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
  30. data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
  31. data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
  32. data/lib/spark_connect/proto.rb +32 -0
  33. data/lib/spark_connect/reader.rb +98 -0
  34. data/lib/spark_connect/row.rb +105 -0
  35. data/lib/spark_connect/session.rb +317 -0
  36. data/lib/spark_connect/stat_functions.rb +109 -0
  37. data/lib/spark_connect/streaming.rb +351 -0
  38. data/lib/spark_connect/types.rb +490 -0
  39. data/lib/spark_connect/version.rb +11 -0
  40. data/lib/spark_connect/window.rb +119 -0
  41. data/lib/spark_connect/writer.rb +208 -0
  42. data/lib/spark_connect.rb +58 -0
  43. data/proto/spark/connect/base.proto +1275 -0
  44. data/proto/spark/connect/catalog.proto +243 -0
  45. data/proto/spark/connect/commands.proto +553 -0
  46. data/proto/spark/connect/common.proto +179 -0
  47. data/proto/spark/connect/expressions.proto +557 -0
  48. data/proto/spark/connect/ml.proto +147 -0
  49. data/proto/spark/connect/ml_common.proto +64 -0
  50. data/proto/spark/connect/pipelines.proto +307 -0
  51. data/proto/spark/connect/relations.proto +1252 -0
  52. data/proto/spark/connect/types.proto +227 -0
  53. metadata +149 -0
@@ -0,0 +1,243 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one or more
3
+ * contributor license agreements. See the NOTICE file distributed with
4
+ * this work for additional information regarding copyright ownership.
5
+ * The ASF licenses this file to You under the Apache License, Version 2.0
6
+ * (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ syntax = 'proto3';
19
+
20
+ package spark.connect;
21
+
22
+ import "spark/connect/common.proto";
23
+ import "spark/connect/types.proto";
24
+
25
+ option java_multiple_files = true;
26
+ option java_package = "org.apache.spark.connect.proto";
27
+ option go_package = "internal/generated";
28
+
29
+ // Catalog messages are marked as unstable.
30
+ message Catalog {
31
+ oneof cat_type {
32
+ CurrentDatabase current_database = 1;
33
+ SetCurrentDatabase set_current_database = 2;
34
+ ListDatabases list_databases = 3;
35
+ ListTables list_tables = 4;
36
+ ListFunctions list_functions = 5;
37
+ ListColumns list_columns = 6;
38
+ GetDatabase get_database = 7;
39
+ GetTable get_table = 8;
40
+ GetFunction get_function = 9;
41
+ DatabaseExists database_exists = 10;
42
+ TableExists table_exists = 11;
43
+ FunctionExists function_exists = 12;
44
+ CreateExternalTable create_external_table = 13;
45
+ CreateTable create_table = 14;
46
+ DropTempView drop_temp_view = 15;
47
+ DropGlobalTempView drop_global_temp_view = 16;
48
+ RecoverPartitions recover_partitions = 17;
49
+ IsCached is_cached = 18;
50
+ CacheTable cache_table = 19;
51
+ UncacheTable uncache_table = 20;
52
+ ClearCache clear_cache = 21;
53
+ RefreshTable refresh_table = 22;
54
+ RefreshByPath refresh_by_path = 23;
55
+ CurrentCatalog current_catalog = 24;
56
+ SetCurrentCatalog set_current_catalog = 25;
57
+ ListCatalogs list_catalogs = 26;
58
+ }
59
+ }
60
+
61
+ // See `spark.catalog.currentDatabase`
62
+ message CurrentDatabase { }
63
+
64
+ // See `spark.catalog.setCurrentDatabase`
65
+ message SetCurrentDatabase {
66
+ // (Required)
67
+ string db_name = 1;
68
+ }
69
+
70
+ // See `spark.catalog.listDatabases`
71
+ message ListDatabases {
72
+ // (Optional) The pattern that the database name needs to match
73
+ optional string pattern = 1;
74
+ }
75
+
76
+ // See `spark.catalog.listTables`
77
+ message ListTables {
78
+ // (Optional)
79
+ optional string db_name = 1;
80
+ // (Optional) The pattern that the table name needs to match
81
+ optional string pattern = 2;
82
+ }
83
+
84
+ // See `spark.catalog.listFunctions`
85
+ message ListFunctions {
86
+ // (Optional)
87
+ optional string db_name = 1;
88
+ // (Optional) The pattern that the function name needs to match
89
+ optional string pattern = 2;
90
+ }
91
+
92
+ // See `spark.catalog.listColumns`
93
+ message ListColumns {
94
+ // (Required)
95
+ string table_name = 1;
96
+ // (Optional)
97
+ optional string db_name = 2;
98
+ }
99
+
100
+ // See `spark.catalog.getDatabase`
101
+ message GetDatabase {
102
+ // (Required)
103
+ string db_name = 1;
104
+ }
105
+
106
+ // See `spark.catalog.getTable`
107
+ message GetTable {
108
+ // (Required)
109
+ string table_name = 1;
110
+ // (Optional)
111
+ optional string db_name = 2;
112
+ }
113
+
114
+ // See `spark.catalog.getFunction`
115
+ message GetFunction {
116
+ // (Required)
117
+ string function_name = 1;
118
+ // (Optional)
119
+ optional string db_name = 2;
120
+ }
121
+
122
+ // See `spark.catalog.databaseExists`
123
+ message DatabaseExists {
124
+ // (Required)
125
+ string db_name = 1;
126
+ }
127
+
128
+ // See `spark.catalog.tableExists`
129
+ message TableExists {
130
+ // (Required)
131
+ string table_name = 1;
132
+ // (Optional)
133
+ optional string db_name = 2;
134
+ }
135
+
136
+ // See `spark.catalog.functionExists`
137
+ message FunctionExists {
138
+ // (Required)
139
+ string function_name = 1;
140
+ // (Optional)
141
+ optional string db_name = 2;
142
+ }
143
+
144
+ // See `spark.catalog.createExternalTable`
145
+ message CreateExternalTable {
146
+ // (Required)
147
+ string table_name = 1;
148
+ // (Optional)
149
+ optional string path = 2;
150
+ // (Optional)
151
+ optional string source = 3;
152
+ // (Optional)
153
+ optional DataType schema = 4;
154
+ // Options could be empty for valid data source format.
155
+ // The map key is case insensitive.
156
+ map<string, string> options = 5;
157
+ }
158
+
159
+ // See `spark.catalog.createTable`
160
+ message CreateTable {
161
+ // (Required)
162
+ string table_name = 1;
163
+ // (Optional)
164
+ optional string path = 2;
165
+ // (Optional)
166
+ optional string source = 3;
167
+ // (Optional)
168
+ optional string description = 4;
169
+ // (Optional)
170
+ optional DataType schema = 5;
171
+ // Options could be empty for valid data source format.
172
+ // The map key is case insensitive.
173
+ map<string, string> options = 6;
174
+ }
175
+
176
+ // See `spark.catalog.dropTempView`
177
+ message DropTempView {
178
+ // (Required)
179
+ string view_name = 1;
180
+ }
181
+
182
+ // See `spark.catalog.dropGlobalTempView`
183
+ message DropGlobalTempView {
184
+ // (Required)
185
+ string view_name = 1;
186
+ }
187
+
188
+ // See `spark.catalog.recoverPartitions`
189
+ message RecoverPartitions {
190
+ // (Required)
191
+ string table_name = 1;
192
+ }
193
+
194
+ // See `spark.catalog.isCached`
195
+ message IsCached {
196
+ // (Required)
197
+ string table_name = 1;
198
+ }
199
+
200
+ // See `spark.catalog.cacheTable`
201
+ message CacheTable {
202
+ // (Required)
203
+ string table_name = 1;
204
+
205
+ // (Optional)
206
+ optional StorageLevel storage_level = 2;
207
+ }
208
+
209
+ // See `spark.catalog.uncacheTable`
210
+ message UncacheTable {
211
+ // (Required)
212
+ string table_name = 1;
213
+ }
214
+
215
+ // See `spark.catalog.clearCache`
216
+ message ClearCache { }
217
+
218
+ // See `spark.catalog.refreshTable`
219
+ message RefreshTable {
220
+ // (Required)
221
+ string table_name = 1;
222
+ }
223
+
224
+ // See `spark.catalog.refreshByPath`
225
+ message RefreshByPath {
226
+ // (Required)
227
+ string path = 1;
228
+ }
229
+
230
+ // See `spark.catalog.currentCatalog`
231
+ message CurrentCatalog { }
232
+
233
+ // See `spark.catalog.setCurrentCatalog`
234
+ message SetCurrentCatalog {
235
+ // (Required)
236
+ string catalog_name = 1;
237
+ }
238
+
239
+ // See `spark.catalog.listCatalogs`
240
+ message ListCatalogs {
241
+ // (Optional) The pattern that the catalog name needs to match
242
+ optional string pattern = 1;
243
+ }