spark-connect 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +82 -0
  3. data/LICENSE +202 -0
  4. data/NOTICE +16 -0
  5. data/README.md +166 -0
  6. data/lib/spark-connect.rb +5 -0
  7. data/lib/spark_connect/arrow.rb +115 -0
  8. data/lib/spark_connect/catalog.rb +190 -0
  9. data/lib/spark_connect/channel_builder.rb +134 -0
  10. data/lib/spark_connect/client.rb +264 -0
  11. data/lib/spark_connect/column.rb +379 -0
  12. data/lib/spark_connect/conf.rb +79 -0
  13. data/lib/spark_connect/data_frame.rb +828 -0
  14. data/lib/spark_connect/errors.rb +58 -0
  15. data/lib/spark_connect/functions.rb +903 -0
  16. data/lib/spark_connect/grouped_data.rb +101 -0
  17. data/lib/spark_connect/na_functions.rb +98 -0
  18. data/lib/spark_connect/observation.rb +61 -0
  19. data/lib/spark_connect/pipelines.rb +221 -0
  20. data/lib/spark_connect/plan.rb +39 -0
  21. data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
  22. data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
  23. data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
  24. data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
  25. data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
  26. data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
  27. data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
  28. data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
  29. data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
  30. data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
  31. data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
  32. data/lib/spark_connect/proto.rb +32 -0
  33. data/lib/spark_connect/reader.rb +98 -0
  34. data/lib/spark_connect/row.rb +105 -0
  35. data/lib/spark_connect/session.rb +317 -0
  36. data/lib/spark_connect/stat_functions.rb +109 -0
  37. data/lib/spark_connect/streaming.rb +351 -0
  38. data/lib/spark_connect/types.rb +490 -0
  39. data/lib/spark_connect/version.rb +11 -0
  40. data/lib/spark_connect/window.rb +119 -0
  41. data/lib/spark_connect/writer.rb +208 -0
  42. data/lib/spark_connect.rb +58 -0
  43. data/proto/spark/connect/base.proto +1275 -0
  44. data/proto/spark/connect/catalog.proto +243 -0
  45. data/proto/spark/connect/commands.proto +553 -0
  46. data/proto/spark/connect/common.proto +179 -0
  47. data/proto/spark/connect/expressions.proto +557 -0
  48. data/proto/spark/connect/ml.proto +147 -0
  49. data/proto/spark/connect/ml_common.proto +64 -0
  50. data/proto/spark/connect/pipelines.proto +307 -0
  51. data/proto/spark/connect/relations.proto +1252 -0
  52. data/proto/spark/connect/types.proto +227 -0
  53. metadata +149 -0
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: spark/connect/ml.proto
4
+
5
+ require 'google/protobuf'
6
+
7
+ require 'spark/connect/relations_pb'
8
+ require 'spark/connect/expressions_pb'
9
+ require 'spark/connect/ml_common_pb'
10
+
11
+
12
+ descriptor_data = "\n\x16spark/connect/ml.proto\x12\rspark.connect\x1a\x1dspark/connect/relations.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/ml_common.proto\"\xa6\x0c\n\tMlCommand\x12+\n\x03\x66it\x18\x01 \x01(\x0b\x32\x1c.spark.connect.MlCommand.FitH\x00\x12%\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00\x12\x31\n\x06\x64\x65lete\x18\x03 \x01(\x0b\x32\x1f.spark.connect.MlCommand.DeleteH\x00\x12/\n\x05write\x18\x04 \x01(\x0b\x32\x1e.spark.connect.MlCommand.WriteH\x00\x12-\n\x04read\x18\x05 \x01(\x0b\x32\x1d.spark.connect.MlCommand.ReadH\x00\x12\x35\n\x08\x65valuate\x18\x06 \x01(\x0b\x32!.spark.connect.MlCommand.EvaluateH\x00\x12:\n\x0b\x63lean_cache\x18\x07 \x01(\x0b\x32#.spark.connect.MlCommand.CleanCacheH\x00\x12?\n\x0eget_cache_info\x18\x08 \x01(\x0b\x32%.spark.connect.MlCommand.GetCacheInfoH\x00\x12@\n\x0e\x63reate_summary\x18\t \x01(\x0b\x32&.spark.connect.MlCommand.CreateSummaryH\x00\x12?\n\x0eget_model_size\x18\n \x01(\x0b\x32%.spark.connect.MlCommand.GetModelSizeH\x00\x1a\x96\x01\n\x03\x46it\x12,\n\testimator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperator\x12,\n\x06params\x18\x02 \x01(\x0b\x32\x17.spark.connect.MlParamsH\x00\x88\x01\x01\x12(\n\x07\x64\x61taset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationB\t\n\x07_params\x1a\\\n\x06\x44\x65lete\x12*\n\x08obj_refs\x18\x01 \x03(\x0b\x32\x18.spark.connect.ObjectRef\x12\x17\n\nevict_only\x18\x02 \x01(\x08H\x00\x88\x01\x01\x42\r\n\x0b_evict_only\x1a\x0c\n\nCleanCache\x1a\x0e\n\x0cGetCacheInfo\x1a\xd4\x02\n\x05Write\x12-\n\x08operator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00\x12+\n\x07obj_ref\x18\x02 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00\x12,\n\x06params\x18\x03 \x01(\x0b\x32\x17.spark.connect.MlParamsH\x01\x88\x01\x01\x12\x0c\n\x04path\x18\x04 \x01(\t\x12\x1d\n\x10should_overwrite\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12<\n\x07options\x18\x06 \x03(\x0b\x32+.spark.connect.MlCommand.Write.OptionsEntry\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x06\n\x04typeB\t\n\x07_paramsB\x13\n\x11_should_overwrite\x1a\x41\n\x04Read\x12+\n\x08operator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperator\x12\x0c\n\x04path\x18\x02 \x01(\t\x1a\x9b\x01\n\x08\x45valuate\x12,\n\tevaluator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperator\x12,\n\x06params\x18\x02 \x01(\x0b\x32\x17.spark.connect.MlParamsH\x00\x88\x01\x01\x12(\n\x07\x64\x61taset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationB\t\n\x07_params\x1a\x66\n\rCreateSummary\x12+\n\tmodel_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRef\x12(\n\x07\x64\x61taset\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x1a;\n\x0cGetModelSize\x12+\n\tmodel_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefB\t\n\x07\x63ommand\"\x8c\x03\n\x0fMlCommandResult\x12\x32\n\x05param\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00\x12\x11\n\x07summary\x18\x02 \x01(\tH\x00\x12\x46\n\roperator_info\x18\x03 \x01(\x0b\x32-.spark.connect.MlCommandResult.MlOperatorInfoH\x00\x1a\xda\x01\n\x0eMlOperatorInfo\x12+\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00\x12\x0e\n\x04name\x18\x02 \x01(\tH\x00\x12\x10\n\x03uid\x18\x03 \x01(\tH\x01\x88\x01\x01\x12,\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsH\x02\x88\x01\x01\x12\x1c\n\x0fwarning_message\x18\x05 \x01(\tH\x03\x88\x01\x01\x42\x06\n\x04typeB\x06\n\x04_uidB\t\n\x07_paramsB\x12\n\x10_warning_messageB\r\n\x0bresult_typeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
13
+
14
+ pool = ::Google::Protobuf::DescriptorPool.generated_pool
15
+ pool.add_serialized_file(descriptor_data)
16
+
17
+ module Spark
18
+ module Connect
19
+ MlCommand = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand").msgclass
20
+ MlCommand::Fit = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Fit").msgclass
21
+ MlCommand::Delete = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Delete").msgclass
22
+ MlCommand::CleanCache = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.CleanCache").msgclass
23
+ MlCommand::GetCacheInfo = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.GetCacheInfo").msgclass
24
+ MlCommand::Write = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Write").msgclass
25
+ MlCommand::Read = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Read").msgclass
26
+ MlCommand::Evaluate = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Evaluate").msgclass
27
+ MlCommand::CreateSummary = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.CreateSummary").msgclass
28
+ MlCommand::GetModelSize = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.GetModelSize").msgclass
29
+ MlCommandResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommandResult").msgclass
30
+ MlCommandResult::MlOperatorInfo = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommandResult.MlOperatorInfo").msgclass
31
+ end
32
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: spark/connect/pipelines.proto
4
+
5
+ require 'google/protobuf'
6
+
7
+ require 'google/protobuf/any_pb'
8
+ require 'google/protobuf/timestamp_pb'
9
+ require 'spark/connect/common_pb'
10
+ require 'spark/connect/relations_pb'
11
+ require 'spark/connect/types_pb'
12
+
13
+
14
+ descriptor_data = "\n\x1dspark/connect/pipelines.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto\"\xa8\x1c\n\x0fPipelineCommand\x12S\n\x15\x63reate_dataflow_graph\x18\x01 \x01(\x0b\x32\x32.spark.connect.PipelineCommand.CreateDataflowGraphH\x00\x12\x44\n\rdefine_output\x18\x02 \x01(\x0b\x32+.spark.connect.PipelineCommand.DefineOutputH\x00\x12@\n\x0b\x64\x65\x66ine_flow\x18\x03 \x01(\x0b\x32).spark.connect.PipelineCommand.DefineFlowH\x00\x12O\n\x13\x64rop_dataflow_graph\x18\x04 \x01(\x0b\x32\x30.spark.connect.PipelineCommand.DropDataflowGraphH\x00\x12<\n\tstart_run\x18\x05 \x01(\x0b\x32\'.spark.connect.PipelineCommand.StartRunH\x00\x12Z\n\x19\x64\x65\x66ine_sql_graph_elements\x18\x06 \x01(\x0b\x32\x35.spark.connect.PipelineCommand.DefineSqlGraphElementsH\x00\x12z\n*get_query_function_execution_signal_stream\x18\x07 \x01(\x0b\x32\x44.spark.connect.PipelineCommand.GetQueryFunctionExecutionSignalStreamH\x00\x12i\n!define_flow_query_function_result\x18\x08 \x01(\x0b\x32<.spark.connect.PipelineCommand.DefineFlowQueryFunctionResultH\x00\x12*\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x1a\xfe\x01\n\x13\x43reateDataflowGraph\x12\x1c\n\x0f\x64\x65\x66\x61ult_catalog\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65\x66\x61ult_database\x18\x02 \x01(\tH\x01\x88\x01\x01\x12Q\n\x08sql_conf\x18\x05 \x03(\x0b\x32?.spark.connect.PipelineCommand.CreateDataflowGraph.SqlConfEntry\x1a.\n\x0cSqlConfEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x12\n\x10_default_catalogB\x13\n\x11_default_database\x1aI\n\x11\x44ropDataflowGraph\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x14\n\x12_dataflow_graph_id\x1a\xa4\x08\n\x0c\x44\x65\x66ineOutput\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0boutput_name\x18\x02 \x01(\tH\x02\x88\x01\x01\x12\x33\n\x0boutput_type\x18\x03 \x01(\x0e\x32\x19.spark.connect.OutputTypeH\x03\x88\x01\x01\x12\x14\n\x07\x63omment\x18\x04 \x01(\tH\x04\x88\x01\x01\x12\x44\n\x14source_code_location\x18\x05 \x01(\x0b\x32!.spark.connect.SourceCodeLocationH\x05\x88\x01\x01\x12Q\n\rtable_details\x18\x06 \x01(\x0b\x32\x38.spark.connect.PipelineCommand.DefineOutput.TableDetailsH\x00\x12O\n\x0csink_details\x18\x07 \x01(\x0b\x32\x37.spark.connect.PipelineCommand.DefineOutput.SinkDetailsH\x00\x12*\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x1a\xdb\x02\n\x0cTableDetails\x12g\n\x10table_properties\x18\x01 \x03(\x0b\x32M.spark.connect.PipelineCommand.DefineOutput.TableDetails.TablePropertiesEntry\x12\x16\n\x0epartition_cols\x18\x02 \x03(\t\x12\x13\n\x06\x66ormat\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x33\n\x10schema_data_type\x18\x04 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00\x12\x17\n\rschema_string\x18\x05 \x01(\tH\x00\x12\x1a\n\x12\x63lustering_columns\x18\x06 \x03(\t\x1a\x36\n\x14TablePropertiesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x08\n\x06schemaB\t\n\x07_format\x1a\xb4\x01\n\x0bSinkDetails\x12U\n\x07options\x18\x01 \x03(\x0b\x32\x44.spark.connect.PipelineCommand.DefineOutput.SinkDetails.OptionsEntry\x12\x13\n\x06\x66ormat\x18\x02 \x01(\tH\x00\x88\x01\x01\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07\x64\x65tailsB\x14\n\x12_dataflow_graph_idB\x0e\n\x0c_output_nameB\x0e\n\x0c_output_typeB\n\n\x08_commentB\x17\n\x15_source_code_location\x1a\xe4\x05\n\nDefineFlow\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tflow_name\x18\x02 \x01(\tH\x02\x88\x01\x01\x12 \n\x13target_dataset_name\x18\x03 \x01(\tH\x03\x88\x01\x01\x12H\n\x08sql_conf\x18\x04 \x03(\x0b\x32\x36.spark.connect.PipelineCommand.DefineFlow.SqlConfEntry\x12\x16\n\tclient_id\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x44\n\x14source_code_location\x18\x06 \x01(\x0b\x32!.spark.connect.SourceCodeLocationH\x05\x88\x01\x01\x12\x63\n\x15relation_flow_details\x18\x07 \x01(\x0b\x32\x42.spark.connect.PipelineCommand.DefineFlow.WriteRelationFlowDetailsH\x00\x12*\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x12\x11\n\x04once\x18\x08 \x01(\x08H\x06\x88\x01\x01\x1a.\n\x0cSqlConfEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aW\n\x18WriteRelationFlowDetails\x12.\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00\x88\x01\x01\x42\x0b\n\t_relation\x1a\x30\n\x08Response\x12\x16\n\tflow_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0c\n\n_flow_nameB\t\n\x07\x64\x65tailsB\x14\n\x12_dataflow_graph_idB\x0c\n\n_flow_nameB\x16\n\x14_target_dataset_nameB\x0c\n\n_client_idB\x17\n\x15_source_code_locationB\x07\n\x05_once\x1a\xeb\x01\n\x08StartRun\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1e\n\x16\x66ull_refresh_selection\x18\x02 \x03(\t\x12\x1d\n\x10\x66ull_refresh_all\x18\x03 \x01(\x08H\x01\x88\x01\x01\x12\x19\n\x11refresh_selection\x18\x04 \x03(\t\x12\x10\n\x03\x64ry\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x14\n\x07storage\x18\x06 \x01(\tH\x03\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x13\n\x11_full_refresh_allB\x06\n\x04_dryB\n\n\x08_storage\x1a\xa0\x01\n\x16\x44\x65\x66ineSqlGraphElements\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rsql_file_path\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08sql_text\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x10\n\x0e_sql_file_pathB\x0b\n\t_sql_text\x1a\x83\x01\n%GetQueryFunctionExecutionSignalStream\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tclient_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x0c\n\n_client_id\x1a\xb8\x01\n\x1d\x44\x65\x66ineFlowQueryFunctionResult\x12\x16\n\tflow_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12.\n\x08relation\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x02\x88\x01\x01\x42\x0c\n\n_flow_nameB\x14\n\x12_dataflow_graph_idB\x0b\n\t_relationB\x0e\n\x0c\x63ommand_type\"\xf3\x04\n\x15PipelineCommandResult\x12\x66\n\x1c\x63reate_dataflow_graph_result\x18\x01 \x01(\x0b\x32>.spark.connect.PipelineCommandResult.CreateDataflowGraphResultH\x00\x12W\n\x14\x64\x65\x66ine_output_result\x18\x02 \x01(\x0b\x32\x37.spark.connect.PipelineCommandResult.DefineOutputResultH\x00\x12S\n\x12\x64\x65\x66ine_flow_result\x18\x03 \x01(\x0b\x32\x35.spark.connect.PipelineCommandResult.DefineFlowResultH\x00\x1aQ\n\x19\x43reateDataflowGraphResult\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x14\n\x12_dataflow_graph_id\x1aq\n\x12\x44\x65\x66ineOutputResult\x12\x43\n\x13resolved_identifier\x18\x01 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x00\x88\x01\x01\x42\x16\n\x14_resolved_identifier\x1ao\n\x10\x44\x65\x66ineFlowResult\x12\x43\n\x13resolved_identifier\x18\x01 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x00\x88\x01\x01\x42\x16\n\x14_resolved_identifierB\r\n\x0bresult_type\"B\n\x13PipelineEventResult\x12+\n\x05\x65vent\x18\x01 \x01(\x0b\x32\x1c.spark.connect.PipelineEvent\"`\n\rPipelineEvent\x12-\n\ttimestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x14\n\x07message\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\n\n\x08_message\"\xc0\x01\n\x12SourceCodeLocation\x12\x16\n\tfile_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0bline_number\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12\x1c\n\x0f\x64\x65\x66inition_path\x18\x03 \x01(\tH\x02\x88\x01\x01\x12(\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyB\x0c\n\n_file_nameB\x0e\n\x0c_line_numberB\x12\n\x10_definition_path\":\n$PipelineQueryFunctionExecutionSignal\x12\x12\n\nflow_names\x18\x01 \x03(\t\"\xd1\x01\n\x17PipelineAnalysisContext\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1c\n\x0f\x64\x65\x66inition_path\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tflow_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12(\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyB\x14\n\x12_dataflow_graph_idB\x12\n\x10_definition_pathB\x0c\n\n_flow_name*i\n\nOutputType\x12\x1b\n\x17OUTPUT_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MATERIALIZED_VIEW\x10\x01\x12\t\n\x05TABLE\x10\x02\x12\x12\n\x0eTEMPORARY_VIEW\x10\x03\x12\x08\n\x04SINK\x10\x04\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
15
+
16
+ pool = ::Google::Protobuf::DescriptorPool.generated_pool
17
+ pool.add_serialized_file(descriptor_data)
18
+
19
+ module Spark
20
+ module Connect
21
+ PipelineCommand = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand").msgclass
22
+ PipelineCommand::CreateDataflowGraph = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.CreateDataflowGraph").msgclass
23
+ PipelineCommand::DropDataflowGraph = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DropDataflowGraph").msgclass
24
+ PipelineCommand::DefineOutput = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineOutput").msgclass
25
+ PipelineCommand::DefineOutput::TableDetails = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineOutput.TableDetails").msgclass
26
+ PipelineCommand::DefineOutput::SinkDetails = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineOutput.SinkDetails").msgclass
27
+ PipelineCommand::DefineFlow = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineFlow").msgclass
28
+ PipelineCommand::DefineFlow::WriteRelationFlowDetails = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineFlow.WriteRelationFlowDetails").msgclass
29
+ PipelineCommand::DefineFlow::Response = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineFlow.Response").msgclass
30
+ PipelineCommand::StartRun = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.StartRun").msgclass
31
+ PipelineCommand::DefineSqlGraphElements = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineSqlGraphElements").msgclass
32
+ PipelineCommand::GetQueryFunctionExecutionSignalStream = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.GetQueryFunctionExecutionSignalStream").msgclass
33
+ PipelineCommand::DefineFlowQueryFunctionResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineFlowQueryFunctionResult").msgclass
34
+ PipelineCommandResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommandResult").msgclass
35
+ PipelineCommandResult::CreateDataflowGraphResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommandResult.CreateDataflowGraphResult").msgclass
36
+ PipelineCommandResult::DefineOutputResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommandResult.DefineOutputResult").msgclass
37
+ PipelineCommandResult::DefineFlowResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommandResult.DefineFlowResult").msgclass
38
+ PipelineEventResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineEventResult").msgclass
39
+ PipelineEvent = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineEvent").msgclass
40
+ SourceCodeLocation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SourceCodeLocation").msgclass
41
+ PipelineQueryFunctionExecutionSignal = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineQueryFunctionExecutionSignal").msgclass
42
+ PipelineAnalysisContext = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineAnalysisContext").msgclass
43
+ OutputType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.OutputType").enummodule
44
+ end
45
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: spark/connect/relations.proto
4
+
5
+ require 'google/protobuf'
6
+
7
+ require 'google/protobuf/any_pb'
8
+ require 'spark/connect/expressions_pb'
9
+ require 'spark/connect/types_pb'
10
+ require 'spark/connect/catalog_pb'
11
+ require 'spark/connect/common_pb'
12
+ require 'spark/connect/ml_common_pb'
13
+
14
+
15
+ descriptor_data = "\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/ml_common.proto\"\xa3\x18\n\x08Relation\x12-\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommon\x12#\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00\x12)\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00\x12\'\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00\x12#\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00\x12-\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00\x12#\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00\x12%\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00\x12-\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00\x12!\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00\x12\x36\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00\x12\'\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00\x12\'\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00\x12\x31\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00\x12%\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00\x12\x36\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00\x12\x31\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00\x12$\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00\x12\x41\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00\x12\x30\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00\x12#\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00\x12#\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00\x12\x32\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00\x12#\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00\x12)\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00\x12,\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00\x12K\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00\x12\x36\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00\x12\x38\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00\x12%\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00\x12,\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00\x12\x31\n\x0c\x63o_group_map\x18 \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00\x12\x36\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00\x12K\n\x1a\x61pply_in_pandas_with_state\x18\" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00\x12\x30\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00\x12\x43\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32\".spark.connect.CachedLocalRelationH\x00\x12\x45\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00\x12h\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00\x12-\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00\x12\x62\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00\x12\x36\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00\x12-\n\ttranspose\x18* \x01(\x0b\x32\x18.spark.connect.TransposeH\x00\x12X\n unresolved_table_valued_function\x18+ \x01(\x0b\x32,.spark.connect.UnresolvedTableValuedFunctionH\x00\x12\x32\n\x0clateral_join\x18, \x01(\x0b\x32\x1a.spark.connect.LateralJoinH\x00\x12R\n\x1d\x63hunked_cached_local_relation\x18- \x01(\x0b\x32).spark.connect.ChunkedCachedLocalRelationH\x00\x12(\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00\x12(\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00\x12+\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00\x12-\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00\x12/\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00\x12/\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00\x12%\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00\x12\'\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00\x12<\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00\x12\x32\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00\x12\x30\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00\x12*\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00\x12\x31\n\x0bml_relation\x18\xac\x02 \x01(\x0b\x32\x19.spark.connect.MlRelationH\x00\x12*\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x12*\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00\x42\n\n\x08rel_type\"\x99\x03\n\nMlRelation\x12\x38\n\ttransform\x18\x01 \x01(\x0b\x32#.spark.connect.MlRelation.TransformH\x00\x12%\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00\x12;\n\x15model_summary_dataset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x01\x88\x01\x01\x1a\xc7\x01\n\tTransform\x12+\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00\x12\x30\n\x0btransformer\x18\x02 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00\x12&\n\x05input\x18\x03 \x01(\x0b\x32\x17.spark.connect.Relation\x12\'\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsB\n\n\x08operatorB\t\n\x07ml_typeB\x18\n\x16_model_summary_dataset\"\x9e\x02\n\x05\x46\x65tch\x12)\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRef\x12,\n\x07methods\x18\x02 \x03(\x0b\x32\x1b.spark.connect.Fetch.Method\x1a\xbb\x01\n\x06Method\x12\x0e\n\x06method\x18\x01 \x01(\t\x12.\n\x04\x61rgs\x18\x02 \x03(\x0b\x32 .spark.connect.Fetch.Method.Args\x1aq\n\x04\x41rgs\x12\x32\n\x05param\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00\x12(\n\x05input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationH\x00\x42\x0b\n\targs_type\"\t\n\x07Unknown\"r\n\x0eRelationCommon\x12\x17\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01\x12\x14\n\x07plan_id\x18\x02 \x01(\x03H\x00\x88\x01\x01\x12%\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginB\n\n\x08_plan_id\"\x92\x03\n\x03SQL\x12\r\n\x05query\x18\x01 \x01(\t\x12.\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01\x12\x37\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01\x12?\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntry\x12\x30\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.Expression\x1aN\n\tArgsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x30\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.Literal:\x02\x38\x01\x1aP\n\x13NamedArgumentsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.Expression:\x02\x38\x01\"c\n\rWithRelations\x12%\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12+\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.Relation\"\x91\x04\n\x04Read\x12\x35\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00\x12\x35\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00\x12\x14\n\x0cis_streaming\x18\x03 \x01(\x08\x1a\x97\x01\n\nNamedTable\x12\x1b\n\x13unparsed_identifier\x18\x01 \x01(\t\x12<\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntry\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xdd\x01\n\nDataSource\x12\x13\n\x06\x66ormat\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x06schema\x18\x02 \x01(\tH\x01\x88\x01\x01\x12<\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntry\x12\r\n\x05paths\x18\x04 \x03(\t\x12\x12\n\npredicates\x18\x05 \x03(\t\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type\"a\n\x07Project\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12.\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\"^\n\x06\x46ilter\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12,\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.Expression\"\xb6\x04\n\x04Join\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x31\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.Expression\x12/\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinType\x12\x15\n\rusing_columns\x18\x05 \x03(\t\x12=\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00\x88\x01\x01\x1a?\n\x0cJoinDataType\x12\x16\n\x0eis_left_struct\x18\x01 \x01(\x08\x12\x17\n\x0fis_right_struct\x18\x02 \x01(\x08\"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type\"\x99\x03\n\x0cSetOperation\x12+\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12,\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x12:\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpType\x12\x13\n\x06is_all\x18\x04 \x01(\x08H\x00\x88\x01\x01\x12\x14\n\x07\x62y_name\x18\x05 \x01(\x08H\x01\x88\x01\x01\x12\"\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02\x88\x01\x01\"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns\">\n\x05Limit\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\r\n\x05limit\x18\x02 \x01(\x05\"@\n\x06Offset\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0e\n\x06offset\x18\x02 \x01(\x05\"=\n\x04Tail\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\r\n\x05limit\x18\x02 \x01(\x05\"\x92\x05\n\tAggregate\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x36\n\ngroup_type\x18\x02 \x01(\x0e\x32\".spark.connect.Aggregate.GroupType\x12\x37\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x38\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x12-\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.Pivot\x12<\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSets\x1a\x62\n\x05Pivot\x12&\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.Expression\x12\x31\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.Literal\x1a?\n\x0cGroupingSets\x12/\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05\"\x88\x01\n\x04Sort\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x32\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrder\x12\x16\n\tis_global\x18\x03 \x01(\x08H\x00\x88\x01\x01\x42\x0c\n\n_is_global\"p\n\x04\x44rop\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12*\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x14\n\x0c\x63olumn_names\x18\x03 \x03(\t\"\xb9\x01\n\x0b\x44\x65\x64uplicate\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x14\n\x0c\x63olumn_names\x18\x02 \x03(\t\x12 \n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00\x88\x01\x01\x12\x1d\n\x10within_watermark\x18\x04 \x01(\x08H\x01\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark\"K\n\rLocalRelation\x12\x11\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00\x88\x01\x01\x12\x13\n\x06schema\x18\x02 \x01(\tH\x01\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema\"B\n\x13\x43\x61\x63hedLocalRelation\x12\x0c\n\x04hash\x18\x03 \x01(\tJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId\"X\n\x1a\x43hunkedCachedLocalRelation\x12\x12\n\ndataHashes\x18\x01 \x03(\t\x12\x17\n\nschemaHash\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\r\n\x0b_schemaHash\"+\n\x14\x43\x61\x63hedRemoteRelation\x12\x13\n\x0brelation_id\x18\x01 \x01(\t\"\xc7\x01\n\x06Sample\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x13\n\x0blower_bound\x18\x02 \x01(\x01\x12\x13\n\x0bupper_bound\x18\x03 \x01(\x01\x12\x1d\n\x10with_replacement\x18\x04 \x01(\x08H\x00\x88\x01\x01\x12\x11\n\x04seed\x18\x05 \x01(\x03H\x01\x88\x01\x01\x12\x1b\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08\x42\x13\n\x11_with_replacementB\x07\n\x05_seed\"p\n\x05Range\x12\x12\n\x05start\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x0b\n\x03\x65nd\x18\x02 \x01(\x03\x12\x0c\n\x04step\x18\x03 \x01(\x03\x12\x1b\n\x0enum_partitions\x18\x04 \x01(\x05H\x01\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions\"Y\n\rSubqueryAlias\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\r\n\x05\x61lias\x18\x02 \x01(\t\x12\x11\n\tqualifier\x18\x03 \x03(\t\"o\n\x0bRepartition\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x16\n\x0enum_partitions\x18\x02 \x01(\x05\x12\x14\n\x07shuffle\x18\x03 \x01(\x08H\x00\x88\x01\x01\x42\n\n\x08_shuffle\"j\n\nShowString\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x10\n\x08num_rows\x18\x02 \x01(\x05\x12\x10\n\x08truncate\x18\x03 \x01(\x05\x12\x10\n\x08vertical\x18\x04 \x01(\x08\"X\n\nHtmlString\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x10\n\x08num_rows\x18\x02 \x01(\x05\x12\x10\n\x08truncate\x18\x03 \x01(\x05\"I\n\x0bStatSummary\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x12\n\nstatistics\x18\x02 \x03(\t\"D\n\x0cStatDescribe\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\"R\n\x0cStatCrosstab\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ol1\x18\x02 \x01(\t\x12\x0c\n\x04\x63ol2\x18\x03 \x01(\t\"M\n\x07StatCov\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ol1\x18\x02 \x01(\t\x12\x0c\n\x04\x63ol2\x18\x03 \x01(\t\"n\n\x08StatCorr\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ol1\x18\x02 \x01(\t\x12\x0c\n\x04\x63ol2\x18\x03 \x01(\t\x12\x13\n\x06method\x18\x04 \x01(\tH\x00\x88\x01\x01\x42\t\n\x07_method\"y\n\x12StatApproxQuantile\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12\x15\n\rprobabilities\x18\x03 \x03(\x01\x12\x16\n\x0erelative_error\x18\x04 \x01(\x01\"g\n\rStatFreqItems\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12\x14\n\x07support\x18\x03 \x01(\x01H\x00\x88\x01\x01\x42\n\n\x08_support\"\x85\x02\n\x0cStatSampleBy\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.Expression\x12\x37\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.Fraction\x12\x11\n\x04seed\x18\x05 \x01(\x03H\x00\x88\x01\x01\x1aP\n\x08\x46raction\x12\x32\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.Literal\x12\x10\n\x08\x66raction\x18\x02 \x01(\x01\x42\x07\n\x05_seed\"q\n\x06NAFill\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12\x31\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.Literal\"l\n\x06NADrop\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12\x1a\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00\x88\x01\x01\x42\x10\n\x0e_min_non_nulls\"\xf8\x01\n\tNAReplace\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12:\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.Replacement\x1ay\n\x0bReplacement\x12\x34\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.Literal\x12\x34\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.Literal\"D\n\x04ToDF\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x14\n\x0c\x63olumn_names\x18\x02 \x03(\t\"\xbb\x02\n\x12WithColumnsRenamed\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12W\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01\x12\x39\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.Rename\x1a\x37\n\x15RenameColumnsMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x06Rename\x12\x10\n\x08\x63ol_name\x18\x01 \x01(\t\x12\x14\n\x0cnew_col_name\x18\x02 \x01(\t\"g\n\x0bWithColumns\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x30\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.Alias\"d\n\rWithWatermark\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x12\n\nevent_time\x18\x02 \x01(\t\x12\x17\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\t\"k\n\x04Hint\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\"\x86\x02\n\x07Unpivot\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x32\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00\x88\x01\x01\x12\x1c\n\x14variable_column_name\x18\x04 \x01(\t\x12\x19\n\x11value_column_name\x18\x05 \x01(\t\x1a\x33\n\x06Values\x12)\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionB\t\n\x07_values\"e\n\tTranspose\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x30\n\rindex_columns\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\"d\n\x1dUnresolvedTableValuedFunction\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12,\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\"[\n\x08ToSchema\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\'\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataType\"\xa5\x01\n\x17RepartitionByExpression\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x32\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x1b\n\x0enum_partitions\x18\x03 \x01(\x05H\x00\x88\x01\x01\x42\x11\n\x0f_num_partitions\"\xc5\x01\n\rMapPartitions\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12<\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunction\x12\x17\n\nis_barrier\x18\x03 \x01(\x08H\x00\x88\x01\x01\x12\x17\n\nprofile_id\x18\x04 \x01(\x05H\x01\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id\"\x9e\x05\n\x08GroupMap\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x37\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12<\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunction\x12\x36\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x12.\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.Relation\x12?\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.Expression\x12%\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00\x88\x01\x01\x12\x18\n\x0boutput_mode\x18\x08 \x01(\tH\x01\x88\x01\x01\x12\x19\n\x0ctimeout_conf\x18\t \x01(\tH\x02\x88\x01\x01\x12\x32\n\x0cstate_schema\x18\n \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03\x88\x01\x01\x12M\n\x19transform_with_state_info\x18\x0b \x01(\x0b\x32%.spark.connect.TransformWithStateInfoH\x04\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_confB\x0f\n\r_state_schemaB\x1c\n\x1a_transform_with_state_info\"\xb2\x01\n\x16TransformWithStateInfo\x12\x11\n\ttime_mode\x18\x01 \x01(\t\x12#\n\x16\x65vent_time_column_name\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x33\n\routput_schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x01\x88\x01\x01\x42\x19\n\x17_event_time_column_nameB\x10\n\x0e_output_schema\"\x94\x03\n\nCoGroupMap\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12=\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12&\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.Relation\x12=\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x12<\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunction\x12<\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.Expression\x12<\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.Expression\"\x8f\x02\n\x16\x41pplyInPandasWithState\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x37\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12<\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunction\x12\x15\n\routput_schema\x18\x04 \x01(\t\x12\x14\n\x0cstate_schema\x18\x05 \x01(\t\x12\x13\n\x0boutput_mode\x18\x06 \x01(\t\x12\x14\n\x0ctimeout_conf\x18\x07 \x01(\t\"\xc0\x01\n$CommonInlineUserDefinedTableFunction\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x15\n\rdeterministic\x18\x02 \x01(\x08\x12,\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x30\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00\x42\n\n\x08\x66unction\"\x87\x01\n\nPythonUDTF\x12\x31\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00\x88\x01\x01\x12\x11\n\teval_type\x18\x02 \x01(\x05\x12\x0f\n\x07\x63ommand\x18\x03 \x01(\x0c\x12\x12\n\npython_ver\x18\x04 \x01(\tB\x0e\n\x0c_return_type\"\x7f\n!CommonInlineUserDefinedDataSource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12=\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00\x42\r\n\x0b\x64\x61ta_source\"7\n\x10PythonDataSource\x12\x0f\n\x07\x63ommand\x18\x01 \x01(\x0c\x12\x12\n\npython_ver\x18\x02 \x01(\t\"r\n\x0e\x43ollectMetrics\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04name\x18\x02 \x01(\t\x12*\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\"\xd8\x02\n\x05Parse\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x30\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormat\x12,\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00\x88\x01\x01\x12\x32\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntry\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema\"\xee\x02\n\x08\x41sOfJoin\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x12-\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.Expression\x12.\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.Expression\x12,\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.Expression\x12\x15\n\rusing_columns\x18\x06 \x03(\t\x12\x11\n\tjoin_type\x18\x07 \x01(\t\x12,\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.Expression\x12\x1b\n\x13\x61llow_exact_matches\x18\t \x01(\x08\x12\x11\n\tdirection\x18\n \x01(\t\"\xc0\x01\n\x0bLateralJoin\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x31\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.Expression\x12/\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
16
+
17
+ pool = ::Google::Protobuf::DescriptorPool.generated_pool
18
+ pool.add_serialized_file(descriptor_data)
19
+
20
+ module Spark
21
+ module Connect
22
+ Relation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Relation").msgclass
23
+ MlRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlRelation").msgclass
24
+ MlRelation::Transform = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlRelation.Transform").msgclass
25
+ Fetch = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Fetch").msgclass
26
+ Fetch::Method = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Fetch.Method").msgclass
27
+ Fetch::Method::Args = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Fetch.Method.Args").msgclass
28
+ Unknown = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Unknown").msgclass
29
+ RelationCommon = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.RelationCommon").msgclass
30
+ SQL = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SQL").msgclass
31
+ WithRelations = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithRelations").msgclass
32
+ Read = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Read").msgclass
33
+ Read::NamedTable = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Read.NamedTable").msgclass
34
+ Read::DataSource = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Read.DataSource").msgclass
35
+ Project = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Project").msgclass
36
+ Filter = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Filter").msgclass
37
+ Join = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Join").msgclass
38
+ Join::JoinDataType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Join.JoinDataType").msgclass
39
+ Join::JoinType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Join.JoinType").enummodule
40
+ SetOperation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SetOperation").msgclass
41
+ SetOperation::SetOpType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SetOperation.SetOpType").enummodule
42
+ Limit = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Limit").msgclass
43
+ Offset = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Offset").msgclass
44
+ Tail = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Tail").msgclass
45
+ Aggregate = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Aggregate").msgclass
46
+ Aggregate::Pivot = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Aggregate.Pivot").msgclass
47
+ Aggregate::GroupingSets = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Aggregate.GroupingSets").msgclass
48
+ Aggregate::GroupType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Aggregate.GroupType").enummodule
49
+ Sort = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Sort").msgclass
50
+ Drop = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Drop").msgclass
51
+ Deduplicate = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Deduplicate").msgclass
52
+ LocalRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.LocalRelation").msgclass
53
+ CachedLocalRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CachedLocalRelation").msgclass
54
+ ChunkedCachedLocalRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ChunkedCachedLocalRelation").msgclass
55
+ CachedRemoteRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CachedRemoteRelation").msgclass
56
+ Sample = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Sample").msgclass
57
+ Range = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Range").msgclass
58
+ SubqueryAlias = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SubqueryAlias").msgclass
59
+ Repartition = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Repartition").msgclass
60
+ ShowString = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ShowString").msgclass
61
+ HtmlString = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.HtmlString").msgclass
62
+ StatSummary = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatSummary").msgclass
63
+ StatDescribe = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatDescribe").msgclass
64
+ StatCrosstab = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatCrosstab").msgclass
65
+ StatCov = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatCov").msgclass
66
+ StatCorr = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatCorr").msgclass
67
+ StatApproxQuantile = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatApproxQuantile").msgclass
68
+ StatFreqItems = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatFreqItems").msgclass
69
+ StatSampleBy = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatSampleBy").msgclass
70
+ StatSampleBy::Fraction = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatSampleBy.Fraction").msgclass
71
+ NAFill = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.NAFill").msgclass
72
+ NADrop = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.NADrop").msgclass
73
+ NAReplace = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.NAReplace").msgclass
74
+ NAReplace::Replacement = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.NAReplace.Replacement").msgclass
75
+ ToDF = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ToDF").msgclass
76
+ WithColumnsRenamed = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithColumnsRenamed").msgclass
77
+ WithColumnsRenamed::Rename = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithColumnsRenamed.Rename").msgclass
78
+ WithColumns = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithColumns").msgclass
79
+ WithWatermark = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithWatermark").msgclass
80
+ Hint = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Hint").msgclass
81
+ Unpivot = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Unpivot").msgclass
82
+ Unpivot::Values = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Unpivot.Values").msgclass
83
+ Transpose = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Transpose").msgclass
84
+ UnresolvedTableValuedFunction = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.UnresolvedTableValuedFunction").msgclass
85
+ ToSchema = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ToSchema").msgclass
86
+ RepartitionByExpression = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.RepartitionByExpression").msgclass
87
+ MapPartitions = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MapPartitions").msgclass
88
+ GroupMap = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.GroupMap").msgclass
89
+ TransformWithStateInfo = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.TransformWithStateInfo").msgclass
90
+ CoGroupMap = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CoGroupMap").msgclass
91
+ ApplyInPandasWithState = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ApplyInPandasWithState").msgclass
92
+ CommonInlineUserDefinedTableFunction = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CommonInlineUserDefinedTableFunction").msgclass
93
+ PythonUDTF = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PythonUDTF").msgclass
94
+ CommonInlineUserDefinedDataSource = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CommonInlineUserDefinedDataSource").msgclass
95
+ PythonDataSource = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PythonDataSource").msgclass
96
+ CollectMetrics = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CollectMetrics").msgclass
97
+ Parse = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Parse").msgclass
98
+ Parse::ParseFormat = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Parse.ParseFormat").enummodule
99
+ AsOfJoin = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.AsOfJoin").msgclass
100
+ LateralJoin = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.LateralJoin").msgclass
101
+ end
102
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: spark/connect/types.proto
4
+
5
+ require 'google/protobuf'
6
+
7
+
8
+ descriptor_data = "\n\x19spark/connect/types.proto\x12\rspark.connect\"\xbc\x1c\n\x08\x44\x61taType\x12,\n\x04null\x18\x01 \x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00\x12\x30\n\x06\x62inary\x18\x02 \x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00\x12\x32\n\x07\x62oolean\x18\x03 \x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00\x12,\n\x04\x62yte\x18\x04 \x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00\x12.\n\x05short\x18\x05 \x01(\x0b\x32\x1d.spark.connect.DataType.ShortH\x00\x12\x32\n\x07integer\x18\x06 \x01(\x0b\x32\x1f.spark.connect.DataType.IntegerH\x00\x12,\n\x04long\x18\x07 \x01(\x0b\x32\x1c.spark.connect.DataType.LongH\x00\x12.\n\x05\x66loat\x18\x08 \x01(\x0b\x32\x1d.spark.connect.DataType.FloatH\x00\x12\x30\n\x06\x64ouble\x18\t \x01(\x0b\x32\x1e.spark.connect.DataType.DoubleH\x00\x12\x32\n\x07\x64\x65\x63imal\x18\n \x01(\x0b\x32\x1f.spark.connect.DataType.DecimalH\x00\x12\x30\n\x06string\x18\x0b \x01(\x0b\x32\x1e.spark.connect.DataType.StringH\x00\x12,\n\x04\x63har\x18\x0c \x01(\x0b\x32\x1c.spark.connect.DataType.CharH\x00\x12\x33\n\x08var_char\x18\r \x01(\x0b\x32\x1f.spark.connect.DataType.VarCharH\x00\x12,\n\x04\x64\x61te\x18\x0e \x01(\x0b\x32\x1c.spark.connect.DataType.DateH\x00\x12\x36\n\ttimestamp\x18\x0f \x01(\x0b\x32!.spark.connect.DataType.TimestampH\x00\x12=\n\rtimestamp_ntz\x18\x10 \x01(\x0b\x32$.spark.connect.DataType.TimestampNTZH\x00\x12\x45\n\x11\x63\x61lendar_interval\x18\x11 \x01(\x0b\x32(.spark.connect.DataType.CalendarIntervalH\x00\x12H\n\x13year_month_interval\x18\x12 \x01(\x0b\x32).spark.connect.DataType.YearMonthIntervalH\x00\x12\x44\n\x11\x64\x61y_time_interval\x18\x13 \x01(\x0b\x32\'.spark.connect.DataType.DayTimeIntervalH\x00\x12.\n\x05\x61rray\x18\x14 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayH\x00\x12\x30\n\x06struct\x18\x15 \x01(\x0b\x32\x1e.spark.connect.DataType.StructH\x00\x12*\n\x03map\x18\x16 \x01(\x0b\x32\x1b.spark.connect.DataType.MapH\x00\x12\x32\n\x07variant\x18\x19 \x01(\x0b\x32\x1f.spark.connect.DataType.VariantH\x00\x12*\n\x03udt\x18\x17 \x01(\x0b\x32\x1b.spark.connect.DataType.UDTH\x00\x12\x34\n\x08geometry\x18\x1a \x01(\x0b\x32 .spark.connect.DataType.GeometryH\x00\x12\x36\n\tgeography\x18\x1b \x01(\x0b\x32!.spark.connect.DataType.GeographyH\x00\x12\x34\n\x08unparsed\x18\x18 \x01(\x0b\x32 .spark.connect.DataType.UnparsedH\x00\x12,\n\x04time\x18\x1c \x01(\x0b\x32\x1c.spark.connect.DataType.TimeH\x00\x1a+\n\x07\x42oolean\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a(\n\x04\x42yte\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a)\n\x05Short\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a+\n\x07Integer\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a(\n\x04Long\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a)\n\x05\x46loat\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a*\n\x06\x44ouble\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a=\n\x06String\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x12\x11\n\tcollation\x18\x02 \x01(\t\x1a*\n\x06\x42inary\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a(\n\x04NULL\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a-\n\tTimestamp\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a(\n\x04\x44\x61te\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a\x30\n\x0cTimestampNTZ\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1aN\n\x04Time\x12\x16\n\tprecision\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12 \n\x18type_variation_reference\x18\x02 \x01(\rB\x0c\n\n_precision\x1a\x34\n\x10\x43\x61lendarInterval\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a\x85\x01\n\x11YearMonthInterval\x12\x18\n\x0bstart_field\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12\x16\n\tend_field\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12 \n\x18type_variation_reference\x18\x03 \x01(\rB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\x83\x01\n\x0f\x44\x61yTimeInterval\x12\x18\n\x0bstart_field\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12\x16\n\tend_field\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12 \n\x18type_variation_reference\x18\x03 \x01(\rB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\x38\n\x04\x43har\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1a;\n\x07VarChar\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1ao\n\x07\x44\x65\x63imal\x12\x12\n\x05scale\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12\x16\n\tprecision\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12 \n\x18type_variation_reference\x18\x03 \x01(\rB\x08\n\x06_scaleB\x0c\n\n_precision\x1a}\n\x0bStructField\x12\x0c\n\x04name\x18\x01 \x01(\t\x12*\n\tdata_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataType\x12\x10\n\x08nullable\x18\x03 \x01(\x08\x12\x15\n\x08metadata\x18\x04 \x01(\tH\x00\x88\x01\x01\x42\x0b\n\t_metadata\x1a_\n\x06Struct\x12\x33\n\x06\x66ields\x18\x01 \x03(\x0b\x32#.spark.connect.DataType.StructField\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1ao\n\x05\x41rray\x12-\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataType\x12\x15\n\rcontains_null\x18\x02 \x01(\x08\x12 \n\x18type_variation_reference\x18\x03 \x01(\r\x1a\x9c\x01\n\x03Map\x12)\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataType\x12+\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataType\x12\x1b\n\x13value_contains_null\x18\x03 \x01(\x08\x12 \n\x18type_variation_reference\x18\x04 \x01(\r\x1a:\n\x08Geometry\x12\x0c\n\x04srid\x18\x01 \x01(\x05\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1a;\n\tGeography\x12\x0c\n\x04srid\x18\x01 \x01(\x05\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1a+\n\x07Variant\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a\xe4\x01\n\x03UDT\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\x16\n\tjvm_class\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x19\n\x0cpython_class\x18\x03 \x01(\tH\x01\x88\x01\x01\x12$\n\x17serialized_python_class\x18\x04 \x01(\tH\x02\x88\x01\x01\x12.\n\x08sql_type\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03\x88\x01\x01\x42\x0c\n\n_jvm_classB\x0f\n\r_python_classB\x1a\n\x18_serialized_python_classB\x0b\n\t_sql_type\x1a$\n\x08Unparsed\x12\x18\n\x10\x64\x61ta_type_string\x18\x01 \x01(\tB\x06\n\x04kindB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
9
+
10
+ pool = ::Google::Protobuf::DescriptorPool.generated_pool
11
+ pool.add_serialized_file(descriptor_data)
12
+
13
+ module Spark
14
+ module Connect
15
+ DataType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType").msgclass
16
+ DataType::Boolean = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Boolean").msgclass
17
+ DataType::Byte = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Byte").msgclass
18
+ DataType::Short = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Short").msgclass
19
+ DataType::Integer = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Integer").msgclass
20
+ DataType::Long = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Long").msgclass
21
+ DataType::Float = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Float").msgclass
22
+ DataType::Double = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Double").msgclass
23
+ DataType::String = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.String").msgclass
24
+ DataType::Binary = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Binary").msgclass
25
+ DataType::NULL = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.NULL").msgclass
26
+ DataType::Timestamp = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Timestamp").msgclass
27
+ DataType::Date = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Date").msgclass
28
+ DataType::TimestampNTZ = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.TimestampNTZ").msgclass
29
+ DataType::Time = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Time").msgclass
30
+ DataType::CalendarInterval = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.CalendarInterval").msgclass
31
+ DataType::YearMonthInterval = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.YearMonthInterval").msgclass
32
+ DataType::DayTimeInterval = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.DayTimeInterval").msgclass
33
+ DataType::Char = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Char").msgclass
34
+ DataType::VarChar = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.VarChar").msgclass
35
+ DataType::Decimal = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Decimal").msgclass
36
+ DataType::StructField = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.StructField").msgclass
37
+ DataType::Struct = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Struct").msgclass
38
+ DataType::Array = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Array").msgclass
39
+ DataType::Map = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Map").msgclass
40
+ DataType::Geometry = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Geometry").msgclass
41
+ DataType::Geography = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Geography").msgclass
42
+ DataType::Variant = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Variant").msgclass
43
+ DataType::UDT = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.UDT").msgclass
44
+ DataType::Unparsed = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Unparsed").msgclass
45
+ end
46
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Loads the generated Spark Connect protobuf/gRPC stubs and exposes them under a
4
+ # convenient, stable alias.
5
+ #
6
+ # The stubs are generated by `grpc_tools_ruby_protoc` from the vendored
7
+ # `proto/spark/connect/*.proto` files (see the `proto:generate` Rake task). The
8
+ # generated files `require` each other using paths rooted at
9
+ # `lib/spark_connect/proto` (e.g. `require "spark/connect/base_pb"`), so that
10
+ # directory must be on the load path before they are required.
11
+ proto_root = File.expand_path("proto", __dir__)
12
+ $LOAD_PATH.unshift(proto_root) unless $LOAD_PATH.include?(proto_root)
13
+
14
+ require "spark/connect/base_pb"
15
+ require "spark/connect/base_services_pb"
16
+ require "spark/connect/catalog_pb"
17
+ require "spark/connect/commands_pb"
18
+ require "spark/connect/common_pb"
19
+ require "spark/connect/expressions_pb"
20
+ require "spark/connect/relations_pb"
21
+ require "spark/connect/types_pb"
22
+ require "spark/connect/ml_pb"
23
+ require "spark/connect/ml_common_pb"
24
+
25
+ module SparkConnect
26
+ # All generated Spark Connect protobuf message classes and the gRPC service
27
+ # stub live under the `Spark::Connect` namespace (derived from the protobuf
28
+ # package `spark.connect`). `SparkConnect::Proto` is an alias that keeps the
29
+ # rest of the codebase decoupled from that detail and reads clearly, e.g.
30
+ # `SparkConnect::Proto::Relation` or `SparkConnect::Proto::Expression`.
31
+ Proto = ::Spark::Connect
32
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SparkConnect
4
+ # Loads data from external sources into a {DataFrame}. Returned by
5
+ # {SparkSession#read}. Mirrors PySpark's `DataFrameReader`.
6
+ #
7
+ # @example
8
+ # spark.read.format("csv").option("header", true).load("data.csv")
9
+ # spark.read.json("events.json")
10
+ # spark.read.table("my_table")
11
+ class DataFrameReader
12
+ Proto = SparkConnect::Proto
13
+
14
+ # @param session [SparkSession]
15
+ def initialize(session)
16
+ @session = session
17
+ @format = nil
18
+ @schema = nil
19
+ @options = {}
20
+ end
21
+
22
+ # Set the input format (`"csv"`, `"json"`, `"parquet"`, `"orc"`, ...).
23
+ # @return [self]
24
+ def format(source)
25
+ @format = source.to_s
26
+ self
27
+ end
28
+
29
+ # Set the input schema (a {Types::StructType} or DDL string).
30
+ # @return [self]
31
+ def schema(schema)
32
+ @schema = schema.is_a?(Types::StructType) ? schema.simple_string : schema.to_s
33
+ self
34
+ end
35
+
36
+ # Set a single read option.
37
+ # @return [self]
38
+ def option(key, value)
39
+ @options[key.to_s] = value.to_s
40
+ self
41
+ end
42
+
43
+ # Set multiple read options.
44
+ # @return [self]
45
+ def options(opts)
46
+ opts.each { |k, v| @options[k.to_s] = v.to_s }
47
+ self
48
+ end
49
+
50
+ # Load data from the given path(s) using the configured format.
51
+ #
52
+ # @param paths [Array<String>]
53
+ # @return [DataFrame]
54
+ def load(*paths)
55
+ ds = Proto::Read::DataSource.new(options: @options, paths: paths.flatten.map(&:to_s))
56
+ ds.format = @format if @format
57
+ ds.schema = @schema if @schema
58
+ read_relation(data_source: ds)
59
+ end
60
+
61
+ # Read a registered table or view.
62
+ #
63
+ # @param name [String]
64
+ # @return [DataFrame]
65
+ def table(name)
66
+ nt = Proto::Read::NamedTable.new(unparsed_identifier: name.to_s, options: @options)
67
+ read_relation(named_table: nt)
68
+ end
69
+
70
+ # @return [DataFrame] CSV at `paths`.
71
+ def csv(*paths) = format("csv").load(*paths)
72
+ # @return [DataFrame] JSON at `paths`.
73
+ def json(*paths) = format("json").load(*paths)
74
+ # @return [DataFrame] Parquet at `paths`.
75
+ def parquet(*paths) = format("parquet").load(*paths)
76
+ # @return [DataFrame] ORC at `paths`.
77
+ def orc(*paths) = format("orc").load(*paths)
78
+ # @return [DataFrame] text at `paths` (one `value` column per line).
79
+ def text(*paths) = format("text").load(*paths)
80
+
81
+ # Read from a JDBC source.
82
+ #
83
+ # @param url [String] the JDBC URL.
84
+ # @param table [String] the table name (or subquery).
85
+ # @param properties [Hash] connection properties (`user`, `password`, ...).
86
+ # @return [DataFrame]
87
+ def jdbc(url, table, properties = {})
88
+ opts = { "url" => url, "dbtable" => table }.merge(properties.transform_keys(&:to_s))
89
+ format("jdbc").options(opts).load
90
+ end
91
+
92
+ private
93
+
94
+ def read_relation(**read_kw)
95
+ DataFrame.new(@session, PlanBuilder.relation(@session, read: Proto::Read.new(**read_kw)))
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SparkConnect
4
+ # An ordered collection of named fields representing a single row of a
5
+ # {DataFrame}, returned by {DataFrame#collect}, {DataFrame#take}, etc.
6
+ #
7
+ # Fields are accessible positionally (`row[0]`), by name (`row["id"]` or
8
+ # `row.id`), and the whole row converts cleanly to a Hash or Array.
9
+ #
10
+ # @example
11
+ # row = SparkConnect::Row.new({ "id" => 1, "name" => "alice" })
12
+ # row[0] #=> 1
13
+ # row["name"] #=> "alice"
14
+ # row.name #=> "alice"
15
+ # row.to_h #=> {"id"=>1, "name"=>"alice"}
16
+ class Row
17
+ include Enumerable
18
+
19
+ # @return [Array<String>] the field names, in order.
20
+ attr_reader :fields
21
+
22
+ # @return [Array] the field values, in order.
23
+ attr_reader :values
24
+
25
+ # @overload initialize(hash)
26
+ # @param hash [Hash] an ordered mapping of field name to value.
27
+ # @overload initialize(values, fields:)
28
+ # @param values [Array] positional values
29
+ # @param fields [Array<String>] field names
30
+ def initialize(data = {}, fields: nil)
31
+ if fields
32
+ @fields = fields.map(&:to_s)
33
+ @values = data
34
+ else
35
+ @fields = data.keys.map(&:to_s)
36
+ @values = data.values
37
+ end
38
+ end
39
+
40
+ # Look up a value by zero-based index or by field name.
41
+ #
42
+ # @param key [Integer, String, Symbol]
43
+ # @return [Object, nil]
44
+ def [](key)
45
+ case key
46
+ when Integer then @values[key]
47
+ else
48
+ idx = @fields.index(key.to_s)
49
+ idx && @values[idx]
50
+ end
51
+ end
52
+
53
+ # @return [Hash] an ordered Hash of field name to value.
54
+ def to_h
55
+ @fields.zip(@values).to_h
56
+ end
57
+ alias as_dict to_h
58
+
59
+ # @return [Array] the row's values, in order.
60
+ def to_a
61
+ @values.dup
62
+ end
63
+
64
+ # Iterate over the values in order.
65
+ def each(&) = @values.each(&)
66
+
67
+ # @return [Integer] number of fields.
68
+ def length = @values.length
69
+ alias size length
70
+
71
+ # @return [Object] the value for `name`, raising if the field is absent.
72
+ def field(name)
73
+ idx = @fields.index(name.to_s)
74
+ raise IllegalArgumentError, "No such field: #{name}" unless idx
75
+
76
+ @values[idx]
77
+ end
78
+
79
+ def ==(other)
80
+ other.is_a?(Row) && other.fields == fields && other.values == values
81
+ end
82
+ alias eql? ==
83
+
84
+ def hash = [fields, values].hash
85
+
86
+ # Allows `row.field_name` access for field names that are valid method names.
87
+ def method_missing(name, *args)
88
+ key = name.to_s
89
+ if args.empty? && @fields.include?(key)
90
+ self[key]
91
+ else
92
+ super
93
+ end
94
+ end
95
+
96
+ def respond_to_missing?(name, include_private = false)
97
+ @fields.include?(name.to_s) || super
98
+ end
99
+
100
+ def to_s
101
+ "Row(#{@fields.zip(@values).map { |k, v| "#{k}=#{v.inspect}" }.join(', ')})"
102
+ end
103
+ alias inspect to_s
104
+ end
105
+ end