spark-connect 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +82 -0
  3. data/LICENSE +202 -0
  4. data/NOTICE +16 -0
  5. data/README.md +166 -0
  6. data/lib/spark-connect.rb +5 -0
  7. data/lib/spark_connect/arrow.rb +115 -0
  8. data/lib/spark_connect/catalog.rb +190 -0
  9. data/lib/spark_connect/channel_builder.rb +134 -0
  10. data/lib/spark_connect/client.rb +264 -0
  11. data/lib/spark_connect/column.rb +379 -0
  12. data/lib/spark_connect/conf.rb +79 -0
  13. data/lib/spark_connect/data_frame.rb +828 -0
  14. data/lib/spark_connect/errors.rb +58 -0
  15. data/lib/spark_connect/functions.rb +903 -0
  16. data/lib/spark_connect/grouped_data.rb +101 -0
  17. data/lib/spark_connect/na_functions.rb +98 -0
  18. data/lib/spark_connect/observation.rb +61 -0
  19. data/lib/spark_connect/pipelines.rb +221 -0
  20. data/lib/spark_connect/plan.rb +39 -0
  21. data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
  22. data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
  23. data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
  24. data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
  25. data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
  26. data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
  27. data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
  28. data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
  29. data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
  30. data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
  31. data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
  32. data/lib/spark_connect/proto.rb +32 -0
  33. data/lib/spark_connect/reader.rb +98 -0
  34. data/lib/spark_connect/row.rb +105 -0
  35. data/lib/spark_connect/session.rb +317 -0
  36. data/lib/spark_connect/stat_functions.rb +109 -0
  37. data/lib/spark_connect/streaming.rb +351 -0
  38. data/lib/spark_connect/types.rb +490 -0
  39. data/lib/spark_connect/version.rb +11 -0
  40. data/lib/spark_connect/window.rb +119 -0
  41. data/lib/spark_connect/writer.rb +208 -0
  42. data/lib/spark_connect.rb +58 -0
  43. data/proto/spark/connect/base.proto +1275 -0
  44. data/proto/spark/connect/catalog.proto +243 -0
  45. data/proto/spark/connect/commands.proto +553 -0
  46. data/proto/spark/connect/common.proto +179 -0
  47. data/proto/spark/connect/expressions.proto +557 -0
  48. data/proto/spark/connect/ml.proto +147 -0
  49. data/proto/spark/connect/ml_common.proto +64 -0
  50. data/proto/spark/connect/pipelines.proto +307 -0
  51. data/proto/spark/connect/relations.proto +1252 -0
  52. data/proto/spark/connect/types.proto +227 -0
  53. metadata +149 -0
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SparkConnect
4
+ # Base class for every error raised by spark-connect. Rescue this to catch
5
+ # any library-specific failure.
6
+ class Error < StandardError; end
7
+
8
+ # Raised when a connection string (`sc://...`) or builder configuration is
9
+ # malformed.
10
+ class ConnectionError < Error; end
11
+
12
+ # Raised for invalid arguments passed to the public API before any request is
13
+ # sent to the server (mirrors PySpark's analysis-time argument validation).
14
+ class IllegalArgumentError < Error; end
15
+
16
+ # Raised when a feature is recognised but not implemented by this client.
17
+ class NotImplementedError < Error; end
18
+
19
+ # Wraps an error returned by the Spark Connect server (a gRPC failure carrying
20
+ # a Spark error payload). The original gRPC exception is available via
21
+ # {#cause}, and Spark's error class / SQL state are surfaced when present.
22
+ class SparkConnectError < Error
23
+ # @return [String, nil] Spark's canonical error class, e.g.
24
+ # `"TABLE_OR_VIEW_NOT_FOUND"`, when the server provided one.
25
+ attr_reader :error_class
26
+
27
+ # @return [String, nil] the ANSI SQL state, when present.
28
+ attr_reader :sql_state
29
+
30
+ # @return [String, nil] the gRPC status code name, e.g. `"UNAVAILABLE"`.
31
+ attr_reader :grpc_code
32
+
33
+ # @return [Array<String>] the server-side stack trace lines, when present.
34
+ attr_reader :stack_trace
35
+
36
+ def initialize(message, error_class: nil, sql_state: nil, grpc_code: nil, stack_trace: [])
37
+ super(message)
38
+ @error_class = error_class
39
+ @sql_state = sql_state
40
+ @grpc_code = grpc_code
41
+ @stack_trace = stack_trace
42
+ end
43
+ end
44
+
45
+ # Raised when an analysis-time error is reported by the server (e.g. an
46
+ # unresolved column or an invalid plan).
47
+ class AnalysisError < SparkConnectError; end
48
+
49
+ # Raised when SQL parsing fails on the server.
50
+ class ParseError < AnalysisError; end
51
+
52
+ # Raised on a temporary/transient server or transport condition that the
53
+ # client gave up retrying.
54
+ class RetriesExceededError < SparkConnectError; end
55
+
56
+ # Raised when the user (or a signal) interrupts a running operation.
57
+ class OperationInterruptedError < SparkConnectError; end
58
+ end