spark-connect 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +82 -0
  3. data/LICENSE +202 -0
  4. data/NOTICE +16 -0
  5. data/README.md +166 -0
  6. data/lib/spark-connect.rb +5 -0
  7. data/lib/spark_connect/arrow.rb +115 -0
  8. data/lib/spark_connect/catalog.rb +190 -0
  9. data/lib/spark_connect/channel_builder.rb +134 -0
  10. data/lib/spark_connect/client.rb +264 -0
  11. data/lib/spark_connect/column.rb +379 -0
  12. data/lib/spark_connect/conf.rb +79 -0
  13. data/lib/spark_connect/data_frame.rb +828 -0
  14. data/lib/spark_connect/errors.rb +58 -0
  15. data/lib/spark_connect/functions.rb +903 -0
  16. data/lib/spark_connect/grouped_data.rb +101 -0
  17. data/lib/spark_connect/na_functions.rb +98 -0
  18. data/lib/spark_connect/observation.rb +61 -0
  19. data/lib/spark_connect/pipelines.rb +221 -0
  20. data/lib/spark_connect/plan.rb +39 -0
  21. data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
  22. data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
  23. data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
  24. data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
  25. data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
  26. data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
  27. data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
  28. data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
  29. data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
  30. data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
  31. data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
  32. data/lib/spark_connect/proto.rb +32 -0
  33. data/lib/spark_connect/reader.rb +98 -0
  34. data/lib/spark_connect/row.rb +105 -0
  35. data/lib/spark_connect/session.rb +317 -0
  36. data/lib/spark_connect/stat_functions.rb +109 -0
  37. data/lib/spark_connect/streaming.rb +351 -0
  38. data/lib/spark_connect/types.rb +490 -0
  39. data/lib/spark_connect/version.rb +11 -0
  40. data/lib/spark_connect/window.rb +119 -0
  41. data/lib/spark_connect/writer.rb +208 -0
  42. data/lib/spark_connect.rb +58 -0
  43. data/proto/spark/connect/base.proto +1275 -0
  44. data/proto/spark/connect/catalog.proto +243 -0
  45. data/proto/spark/connect/commands.proto +553 -0
  46. data/proto/spark/connect/common.proto +179 -0
  47. data/proto/spark/connect/expressions.proto +557 -0
  48. data/proto/spark/connect/ml.proto +147 -0
  49. data/proto/spark/connect/ml_common.proto +64 -0
  50. data/proto/spark/connect/pipelines.proto +307 -0
  51. data/proto/spark/connect/relations.proto +1252 -0
  52. data/proto/spark/connect/types.proto +227 -0
  53. metadata +149 -0
@@ -0,0 +1,179 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one or more
3
+ * contributor license agreements. See the NOTICE file distributed with
4
+ * this work for additional information regarding copyright ownership.
5
+ * The ASF licenses this file to You under the Apache License, Version 2.0
6
+ * (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ syntax = 'proto3';
19
+
20
+ package spark.connect;
21
+
22
+ option java_multiple_files = true;
23
+ option java_package = "org.apache.spark.connect.proto";
24
+ option go_package = "internal/generated";
25
+
26
+ // StorageLevel for persisting Datasets/Tables.
27
+ message StorageLevel {
28
+ // (Required) Whether the cache should use disk or not.
29
+ bool use_disk = 1;
30
+ // (Required) Whether the cache should use memory or not.
31
+ bool use_memory = 2;
32
+ // (Required) Whether the cache should use off-heap or not.
33
+ bool use_off_heap = 3;
34
+ // (Required) Whether the cached data is deserialized or not.
35
+ bool deserialized = 4;
36
+ // (Required) The number of replicas.
37
+ int32 replication = 5;
38
+ }
39
+
40
+
41
+ // ResourceInformation to hold information about a type of Resource.
42
+ // The corresponding class is 'org.apache.spark.resource.ResourceInformation'
43
+ message ResourceInformation {
44
+ // (Required) The name of the resource
45
+ string name = 1;
46
+ // (Required) An array of strings describing the addresses of the resource.
47
+ repeated string addresses = 2;
48
+ }
49
+
50
+ // An executor resource request.
51
+ message ExecutorResourceRequest {
52
+ // (Required) resource name.
53
+ string resource_name = 1;
54
+
55
+ // (Required) resource amount requesting.
56
+ int64 amount = 2;
57
+
58
+ // Optional script used to discover the resources.
59
+ optional string discovery_script = 3;
60
+
61
+ // Optional vendor, required for some cluster managers.
62
+ optional string vendor = 4;
63
+ }
64
+
65
+ // A task resource request.
66
+ message TaskResourceRequest {
67
+ // (Required) resource name.
68
+ string resource_name = 1;
69
+
70
+ // (Required) resource amount requesting as a double to support fractional
71
+ // resource requests.
72
+ double amount = 2;
73
+ }
74
+
75
+ message ResourceProfile {
76
+ // (Optional) Resource requests for executors. Mapped from the resource name
77
+ // (e.g., cores, memory, CPU) to its specific request.
78
+ map<string, ExecutorResourceRequest> executor_resources = 1;
79
+
80
+ // (Optional) Resource requests for tasks. Mapped from the resource name
81
+ // (e.g., cores, memory, CPU) to its specific request.
82
+ map<string, TaskResourceRequest> task_resources = 2;
83
+ }
84
+
85
+ message Origin {
86
+ // (Required) Indicate the origin type.
87
+ oneof function {
88
+ PythonOrigin python_origin = 1;
89
+ JvmOrigin jvm_origin = 2;
90
+ }
91
+ }
92
+
93
+ message PythonOrigin {
94
+ // (Required) Name of the origin, for example, the name of the function
95
+ string fragment = 1;
96
+
97
+ // (Required) Callsite to show to end users, for example, stacktrace.
98
+ string call_site = 2;
99
+ }
100
+
101
+ message JvmOrigin {
102
+ // (Optional) Line number in the source file.
103
+ optional int32 line = 1;
104
+
105
+ // (Optional) Start position in the source file.
106
+ optional int32 start_position = 2;
107
+
108
+ // (Optional) Start index in the source file.
109
+ optional int32 start_index = 3;
110
+
111
+ // (Optional) Stop index in the source file.
112
+ optional int32 stop_index = 4;
113
+
114
+ // (Optional) SQL text.
115
+ optional string sql_text = 5;
116
+
117
+ // (Optional) Object type.
118
+ optional string object_type = 6;
119
+
120
+ // (Optional) Object name.
121
+ optional string object_name = 7;
122
+
123
+ // (Optional) Stack trace.
124
+ repeated StackTraceElement stack_trace = 8;
125
+ }
126
+
127
+ // A message to hold a [[java.lang.StackTraceElement]].
128
+ message StackTraceElement {
129
+ // (Optional) Class loader name
130
+ optional string class_loader_name = 1;
131
+
132
+ // (Optional) Module name
133
+ optional string module_name = 2;
134
+
135
+ // (Optional) Module version
136
+ optional string module_version = 3;
137
+
138
+ // (Required) Declaring class
139
+ string declaring_class = 4;
140
+
141
+ // (Required) Method name
142
+ string method_name = 5;
143
+
144
+ // (Optional) File name
145
+ optional string file_name = 6;
146
+
147
+ // (Required) Line number
148
+ int32 line_number = 7;
149
+ }
150
+
151
+ message ResolvedIdentifier {
152
+ string catalog_name = 1;
153
+ repeated string namespace = 2;
154
+ string table_name = 3;
155
+ }
156
+
157
+ message Bools {
158
+ repeated bool values = 1;
159
+ }
160
+
161
+ message Ints {
162
+ repeated int32 values = 1;
163
+ }
164
+
165
+ message Longs {
166
+ repeated int64 values = 1;
167
+ }
168
+
169
+ message Floats {
170
+ repeated float values = 1;
171
+ }
172
+
173
+ message Doubles {
174
+ repeated double values = 1;
175
+ }
176
+
177
+ message Strings {
178
+ repeated string values = 1;
179
+ }