spark-connect 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +82 -0
- data/LICENSE +202 -0
- data/NOTICE +16 -0
- data/README.md +166 -0
- data/lib/spark-connect.rb +5 -0
- data/lib/spark_connect/arrow.rb +115 -0
- data/lib/spark_connect/catalog.rb +190 -0
- data/lib/spark_connect/channel_builder.rb +134 -0
- data/lib/spark_connect/client.rb +264 -0
- data/lib/spark_connect/column.rb +379 -0
- data/lib/spark_connect/conf.rb +79 -0
- data/lib/spark_connect/data_frame.rb +828 -0
- data/lib/spark_connect/errors.rb +58 -0
- data/lib/spark_connect/functions.rb +903 -0
- data/lib/spark_connect/grouped_data.rb +101 -0
- data/lib/spark_connect/na_functions.rb +98 -0
- data/lib/spark_connect/observation.rb +61 -0
- data/lib/spark_connect/pipelines.rb +221 -0
- data/lib/spark_connect/plan.rb +39 -0
- data/lib/spark_connect/proto/spark/connect/base_pb.rb +118 -0
- data/lib/spark_connect/proto/spark/connect/base_services_pb.rb +82 -0
- data/lib/spark_connect/proto/spark/connect/catalog_pb.rb +46 -0
- data/lib/spark_connect/proto/spark/connect/commands_pb.rb +67 -0
- data/lib/spark_connect/proto/spark/connect/common_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/expressions_pb.rb +63 -0
- data/lib/spark_connect/proto/spark/connect/ml_common_pb.rb +22 -0
- data/lib/spark_connect/proto/spark/connect/ml_pb.rb +32 -0
- data/lib/spark_connect/proto/spark/connect/pipelines_pb.rb +45 -0
- data/lib/spark_connect/proto/spark/connect/relations_pb.rb +102 -0
- data/lib/spark_connect/proto/spark/connect/types_pb.rb +46 -0
- data/lib/spark_connect/proto.rb +32 -0
- data/lib/spark_connect/reader.rb +98 -0
- data/lib/spark_connect/row.rb +105 -0
- data/lib/spark_connect/session.rb +317 -0
- data/lib/spark_connect/stat_functions.rb +109 -0
- data/lib/spark_connect/streaming.rb +351 -0
- data/lib/spark_connect/types.rb +490 -0
- data/lib/spark_connect/version.rb +11 -0
- data/lib/spark_connect/window.rb +119 -0
- data/lib/spark_connect/writer.rb +208 -0
- data/lib/spark_connect.rb +58 -0
- data/proto/spark/connect/base.proto +1275 -0
- data/proto/spark/connect/catalog.proto +243 -0
- data/proto/spark/connect/commands.proto +553 -0
- data/proto/spark/connect/common.proto +179 -0
- data/proto/spark/connect/expressions.proto +557 -0
- data/proto/spark/connect/ml.proto +147 -0
- data/proto/spark/connect/ml_common.proto +64 -0
- data/proto/spark/connect/pipelines.proto +307 -0
- data/proto/spark/connect/relations.proto +1252 -0
- data/proto/spark/connect/types.proto +227 -0
- metadata +149 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: spark/connect/ml.proto
|
|
4
|
+
|
|
5
|
+
require 'google/protobuf'
|
|
6
|
+
|
|
7
|
+
require 'spark/connect/relations_pb'
|
|
8
|
+
require 'spark/connect/expressions_pb'
|
|
9
|
+
require 'spark/connect/ml_common_pb'
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
descriptor_data = "\n\x16spark/connect/ml.proto\x12\rspark.connect\x1a\x1dspark/connect/relations.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/ml_common.proto\"\xa6\x0c\n\tMlCommand\x12+\n\x03\x66it\x18\x01 \x01(\x0b\x32\x1c.spark.connect.MlCommand.FitH\x00\x12%\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00\x12\x31\n\x06\x64\x65lete\x18\x03 \x01(\x0b\x32\x1f.spark.connect.MlCommand.DeleteH\x00\x12/\n\x05write\x18\x04 \x01(\x0b\x32\x1e.spark.connect.MlCommand.WriteH\x00\x12-\n\x04read\x18\x05 \x01(\x0b\x32\x1d.spark.connect.MlCommand.ReadH\x00\x12\x35\n\x08\x65valuate\x18\x06 \x01(\x0b\x32!.spark.connect.MlCommand.EvaluateH\x00\x12:\n\x0b\x63lean_cache\x18\x07 \x01(\x0b\x32#.spark.connect.MlCommand.CleanCacheH\x00\x12?\n\x0eget_cache_info\x18\x08 \x01(\x0b\x32%.spark.connect.MlCommand.GetCacheInfoH\x00\x12@\n\x0e\x63reate_summary\x18\t \x01(\x0b\x32&.spark.connect.MlCommand.CreateSummaryH\x00\x12?\n\x0eget_model_size\x18\n \x01(\x0b\x32%.spark.connect.MlCommand.GetModelSizeH\x00\x1a\x96\x01\n\x03\x46it\x12,\n\testimator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperator\x12,\n\x06params\x18\x02 \x01(\x0b\x32\x17.spark.connect.MlParamsH\x00\x88\x01\x01\x12(\n\x07\x64\x61taset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationB\t\n\x07_params\x1a\\\n\x06\x44\x65lete\x12*\n\x08obj_refs\x18\x01 \x03(\x0b\x32\x18.spark.connect.ObjectRef\x12\x17\n\nevict_only\x18\x02 \x01(\x08H\x00\x88\x01\x01\x42\r\n\x0b_evict_only\x1a\x0c\n\nCleanCache\x1a\x0e\n\x0cGetCacheInfo\x1a\xd4\x02\n\x05Write\x12-\n\x08operator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00\x12+\n\x07obj_ref\x18\x02 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00\x12,\n\x06params\x18\x03 \x01(\x0b\x32\x17.spark.connect.MlParamsH\x01\x88\x01\x01\x12\x0c\n\x04path\x18\x04 \x01(\t\x12\x1d\n\x10should_overwrite\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12<\n\x07options\x18\x06 \x03(\x0b\x32+.spark.connect.MlCommand.Write.OptionsEntry\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x06\n\x04typeB\t\n\x07_paramsB\x13\n\x11_should_overwrite\x1a\x41\n\x04Read\x12+\n\x08operator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperator\x12\x0c\n\x04path\x18\x02 \x01(\t\x1a\x9b\x01\n\x08\x45valuate\x12,\n\tevaluator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperator\x12,\n\x06params\x18\x02 \x01(\x0b\x32\x17.spark.connect.MlParamsH\x00\x88\x01\x01\x12(\n\x07\x64\x61taset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationB\t\n\x07_params\x1a\x66\n\rCreateSummary\x12+\n\tmodel_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRef\x12(\n\x07\x64\x61taset\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x1a;\n\x0cGetModelSize\x12+\n\tmodel_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefB\t\n\x07\x63ommand\"\x8c\x03\n\x0fMlCommandResult\x12\x32\n\x05param\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00\x12\x11\n\x07summary\x18\x02 \x01(\tH\x00\x12\x46\n\roperator_info\x18\x03 \x01(\x0b\x32-.spark.connect.MlCommandResult.MlOperatorInfoH\x00\x1a\xda\x01\n\x0eMlOperatorInfo\x12+\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00\x12\x0e\n\x04name\x18\x02 \x01(\tH\x00\x12\x10\n\x03uid\x18\x03 \x01(\tH\x01\x88\x01\x01\x12,\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsH\x02\x88\x01\x01\x12\x1c\n\x0fwarning_message\x18\x05 \x01(\tH\x03\x88\x01\x01\x42\x06\n\x04typeB\x06\n\x04_uidB\t\n\x07_paramsB\x12\n\x10_warning_messageB\r\n\x0bresult_typeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
|
|
13
|
+
|
|
14
|
+
pool = ::Google::Protobuf::DescriptorPool.generated_pool
|
|
15
|
+
pool.add_serialized_file(descriptor_data)
|
|
16
|
+
|
|
17
|
+
module Spark
|
|
18
|
+
module Connect
|
|
19
|
+
MlCommand = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand").msgclass
|
|
20
|
+
MlCommand::Fit = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Fit").msgclass
|
|
21
|
+
MlCommand::Delete = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Delete").msgclass
|
|
22
|
+
MlCommand::CleanCache = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.CleanCache").msgclass
|
|
23
|
+
MlCommand::GetCacheInfo = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.GetCacheInfo").msgclass
|
|
24
|
+
MlCommand::Write = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Write").msgclass
|
|
25
|
+
MlCommand::Read = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Read").msgclass
|
|
26
|
+
MlCommand::Evaluate = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.Evaluate").msgclass
|
|
27
|
+
MlCommand::CreateSummary = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.CreateSummary").msgclass
|
|
28
|
+
MlCommand::GetModelSize = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommand.GetModelSize").msgclass
|
|
29
|
+
MlCommandResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommandResult").msgclass
|
|
30
|
+
MlCommandResult::MlOperatorInfo = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlCommandResult.MlOperatorInfo").msgclass
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: spark/connect/pipelines.proto
|
|
4
|
+
|
|
5
|
+
require 'google/protobuf'
|
|
6
|
+
|
|
7
|
+
require 'google/protobuf/any_pb'
|
|
8
|
+
require 'google/protobuf/timestamp_pb'
|
|
9
|
+
require 'spark/connect/common_pb'
|
|
10
|
+
require 'spark/connect/relations_pb'
|
|
11
|
+
require 'spark/connect/types_pb'
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
descriptor_data = "\n\x1dspark/connect/pipelines.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto\"\xa8\x1c\n\x0fPipelineCommand\x12S\n\x15\x63reate_dataflow_graph\x18\x01 \x01(\x0b\x32\x32.spark.connect.PipelineCommand.CreateDataflowGraphH\x00\x12\x44\n\rdefine_output\x18\x02 \x01(\x0b\x32+.spark.connect.PipelineCommand.DefineOutputH\x00\x12@\n\x0b\x64\x65\x66ine_flow\x18\x03 \x01(\x0b\x32).spark.connect.PipelineCommand.DefineFlowH\x00\x12O\n\x13\x64rop_dataflow_graph\x18\x04 \x01(\x0b\x32\x30.spark.connect.PipelineCommand.DropDataflowGraphH\x00\x12<\n\tstart_run\x18\x05 \x01(\x0b\x32\'.spark.connect.PipelineCommand.StartRunH\x00\x12Z\n\x19\x64\x65\x66ine_sql_graph_elements\x18\x06 \x01(\x0b\x32\x35.spark.connect.PipelineCommand.DefineSqlGraphElementsH\x00\x12z\n*get_query_function_execution_signal_stream\x18\x07 \x01(\x0b\x32\x44.spark.connect.PipelineCommand.GetQueryFunctionExecutionSignalStreamH\x00\x12i\n!define_flow_query_function_result\x18\x08 \x01(\x0b\x32<.spark.connect.PipelineCommand.DefineFlowQueryFunctionResultH\x00\x12*\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x1a\xfe\x01\n\x13\x43reateDataflowGraph\x12\x1c\n\x0f\x64\x65\x66\x61ult_catalog\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65\x66\x61ult_database\x18\x02 \x01(\tH\x01\x88\x01\x01\x12Q\n\x08sql_conf\x18\x05 \x03(\x0b\x32?.spark.connect.PipelineCommand.CreateDataflowGraph.SqlConfEntry\x1a.\n\x0cSqlConfEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x12\n\x10_default_catalogB\x13\n\x11_default_database\x1aI\n\x11\x44ropDataflowGraph\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x14\n\x12_dataflow_graph_id\x1a\xa4\x08\n\x0c\x44\x65\x66ineOutput\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0boutput_name\x18\x02 \x01(\tH\x02\x88\x01\x01\x12\x33\n\x0boutput_type\x18\x03 \x01(\x0e\x32\x19.spark.connect.OutputTypeH\x03\x88\x01\x01\x12\x14\n\x07\x63omment\x18\x04 \x01(\tH\x04\x88\x01\x01\x12\x44\n\x14source_code_location\x18\x05 \x01(\x0b\x32!.spark.connect.SourceCodeLocationH\x05\x88\x01\x01\x12Q\n\rtable_details\x18\x06 \x01(\x0b\x32\x38.spark.connect.PipelineCommand.DefineOutput.TableDetailsH\x00\x12O\n\x0csink_details\x18\x07 \x01(\x0b\x32\x37.spark.connect.PipelineCommand.DefineOutput.SinkDetailsH\x00\x12*\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x1a\xdb\x02\n\x0cTableDetails\x12g\n\x10table_properties\x18\x01 \x03(\x0b\x32M.spark.connect.PipelineCommand.DefineOutput.TableDetails.TablePropertiesEntry\x12\x16\n\x0epartition_cols\x18\x02 \x03(\t\x12\x13\n\x06\x66ormat\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x33\n\x10schema_data_type\x18\x04 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00\x12\x17\n\rschema_string\x18\x05 \x01(\tH\x00\x12\x1a\n\x12\x63lustering_columns\x18\x06 \x03(\t\x1a\x36\n\x14TablePropertiesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x08\n\x06schemaB\t\n\x07_format\x1a\xb4\x01\n\x0bSinkDetails\x12U\n\x07options\x18\x01 \x03(\x0b\x32\x44.spark.connect.PipelineCommand.DefineOutput.SinkDetails.OptionsEntry\x12\x13\n\x06\x66ormat\x18\x02 \x01(\tH\x00\x88\x01\x01\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07\x64\x65tailsB\x14\n\x12_dataflow_graph_idB\x0e\n\x0c_output_nameB\x0e\n\x0c_output_typeB\n\n\x08_commentB\x17\n\x15_source_code_location\x1a\xe4\x05\n\nDefineFlow\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tflow_name\x18\x02 \x01(\tH\x02\x88\x01\x01\x12 \n\x13target_dataset_name\x18\x03 \x01(\tH\x03\x88\x01\x01\x12H\n\x08sql_conf\x18\x04 \x03(\x0b\x32\x36.spark.connect.PipelineCommand.DefineFlow.SqlConfEntry\x12\x16\n\tclient_id\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x44\n\x14source_code_location\x18\x06 \x01(\x0b\x32!.spark.connect.SourceCodeLocationH\x05\x88\x01\x01\x12\x63\n\x15relation_flow_details\x18\x07 \x01(\x0b\x32\x42.spark.connect.PipelineCommand.DefineFlow.WriteRelationFlowDetailsH\x00\x12*\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x12\x11\n\x04once\x18\x08 \x01(\x08H\x06\x88\x01\x01\x1a.\n\x0cSqlConfEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aW\n\x18WriteRelationFlowDetails\x12.\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00\x88\x01\x01\x42\x0b\n\t_relation\x1a\x30\n\x08Response\x12\x16\n\tflow_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0c\n\n_flow_nameB\t\n\x07\x64\x65tailsB\x14\n\x12_dataflow_graph_idB\x0c\n\n_flow_nameB\x16\n\x14_target_dataset_nameB\x0c\n\n_client_idB\x17\n\x15_source_code_locationB\x07\n\x05_once\x1a\xeb\x01\n\x08StartRun\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1e\n\x16\x66ull_refresh_selection\x18\x02 \x03(\t\x12\x1d\n\x10\x66ull_refresh_all\x18\x03 \x01(\x08H\x01\x88\x01\x01\x12\x19\n\x11refresh_selection\x18\x04 \x03(\t\x12\x10\n\x03\x64ry\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x14\n\x07storage\x18\x06 \x01(\tH\x03\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x13\n\x11_full_refresh_allB\x06\n\x04_dryB\n\n\x08_storage\x1a\xa0\x01\n\x16\x44\x65\x66ineSqlGraphElements\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rsql_file_path\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08sql_text\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x10\n\x0e_sql_file_pathB\x0b\n\t_sql_text\x1a\x83\x01\n%GetQueryFunctionExecutionSignalStream\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tclient_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x0c\n\n_client_id\x1a\xb8\x01\n\x1d\x44\x65\x66ineFlowQueryFunctionResult\x12\x16\n\tflow_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12.\n\x08relation\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x02\x88\x01\x01\x42\x0c\n\n_flow_nameB\x14\n\x12_dataflow_graph_idB\x0b\n\t_relationB\x0e\n\x0c\x63ommand_type\"\xf3\x04\n\x15PipelineCommandResult\x12\x66\n\x1c\x63reate_dataflow_graph_result\x18\x01 \x01(\x0b\x32>.spark.connect.PipelineCommandResult.CreateDataflowGraphResultH\x00\x12W\n\x14\x64\x65\x66ine_output_result\x18\x02 \x01(\x0b\x32\x37.spark.connect.PipelineCommandResult.DefineOutputResultH\x00\x12S\n\x12\x64\x65\x66ine_flow_result\x18\x03 \x01(\x0b\x32\x35.spark.connect.PipelineCommandResult.DefineFlowResultH\x00\x1aQ\n\x19\x43reateDataflowGraphResult\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x14\n\x12_dataflow_graph_id\x1aq\n\x12\x44\x65\x66ineOutputResult\x12\x43\n\x13resolved_identifier\x18\x01 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x00\x88\x01\x01\x42\x16\n\x14_resolved_identifier\x1ao\n\x10\x44\x65\x66ineFlowResult\x12\x43\n\x13resolved_identifier\x18\x01 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x00\x88\x01\x01\x42\x16\n\x14_resolved_identifierB\r\n\x0bresult_type\"B\n\x13PipelineEventResult\x12+\n\x05\x65vent\x18\x01 \x01(\x0b\x32\x1c.spark.connect.PipelineEvent\"`\n\rPipelineEvent\x12-\n\ttimestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x14\n\x07message\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\n\n\x08_message\"\xc0\x01\n\x12SourceCodeLocation\x12\x16\n\tfile_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0bline_number\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12\x1c\n\x0f\x64\x65\x66inition_path\x18\x03 \x01(\tH\x02\x88\x01\x01\x12(\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyB\x0c\n\n_file_nameB\x0e\n\x0c_line_numberB\x12\n\x10_definition_path\":\n$PipelineQueryFunctionExecutionSignal\x12\x12\n\nflow_names\x18\x01 \x03(\t\"\xd1\x01\n\x17PipelineAnalysisContext\x12\x1e\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1c\n\x0f\x64\x65\x66inition_path\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tflow_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12(\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyB\x14\n\x12_dataflow_graph_idB\x12\n\x10_definition_pathB\x0c\n\n_flow_name*i\n\nOutputType\x12\x1b\n\x17OUTPUT_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MATERIALIZED_VIEW\x10\x01\x12\t\n\x05TABLE\x10\x02\x12\x12\n\x0eTEMPORARY_VIEW\x10\x03\x12\x08\n\x04SINK\x10\x04\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
|
|
15
|
+
|
|
16
|
+
pool = ::Google::Protobuf::DescriptorPool.generated_pool
|
|
17
|
+
pool.add_serialized_file(descriptor_data)
|
|
18
|
+
|
|
19
|
+
module Spark
|
|
20
|
+
module Connect
|
|
21
|
+
PipelineCommand = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand").msgclass
|
|
22
|
+
PipelineCommand::CreateDataflowGraph = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.CreateDataflowGraph").msgclass
|
|
23
|
+
PipelineCommand::DropDataflowGraph = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DropDataflowGraph").msgclass
|
|
24
|
+
PipelineCommand::DefineOutput = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineOutput").msgclass
|
|
25
|
+
PipelineCommand::DefineOutput::TableDetails = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineOutput.TableDetails").msgclass
|
|
26
|
+
PipelineCommand::DefineOutput::SinkDetails = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineOutput.SinkDetails").msgclass
|
|
27
|
+
PipelineCommand::DefineFlow = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineFlow").msgclass
|
|
28
|
+
PipelineCommand::DefineFlow::WriteRelationFlowDetails = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineFlow.WriteRelationFlowDetails").msgclass
|
|
29
|
+
PipelineCommand::DefineFlow::Response = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineFlow.Response").msgclass
|
|
30
|
+
PipelineCommand::StartRun = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.StartRun").msgclass
|
|
31
|
+
PipelineCommand::DefineSqlGraphElements = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineSqlGraphElements").msgclass
|
|
32
|
+
PipelineCommand::GetQueryFunctionExecutionSignalStream = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.GetQueryFunctionExecutionSignalStream").msgclass
|
|
33
|
+
PipelineCommand::DefineFlowQueryFunctionResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommand.DefineFlowQueryFunctionResult").msgclass
|
|
34
|
+
PipelineCommandResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommandResult").msgclass
|
|
35
|
+
PipelineCommandResult::CreateDataflowGraphResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommandResult.CreateDataflowGraphResult").msgclass
|
|
36
|
+
PipelineCommandResult::DefineOutputResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommandResult.DefineOutputResult").msgclass
|
|
37
|
+
PipelineCommandResult::DefineFlowResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineCommandResult.DefineFlowResult").msgclass
|
|
38
|
+
PipelineEventResult = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineEventResult").msgclass
|
|
39
|
+
PipelineEvent = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineEvent").msgclass
|
|
40
|
+
SourceCodeLocation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SourceCodeLocation").msgclass
|
|
41
|
+
PipelineQueryFunctionExecutionSignal = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineQueryFunctionExecutionSignal").msgclass
|
|
42
|
+
PipelineAnalysisContext = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PipelineAnalysisContext").msgclass
|
|
43
|
+
OutputType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.OutputType").enummodule
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: spark/connect/relations.proto
|
|
4
|
+
|
|
5
|
+
require 'google/protobuf'
|
|
6
|
+
|
|
7
|
+
require 'google/protobuf/any_pb'
|
|
8
|
+
require 'spark/connect/expressions_pb'
|
|
9
|
+
require 'spark/connect/types_pb'
|
|
10
|
+
require 'spark/connect/catalog_pb'
|
|
11
|
+
require 'spark/connect/common_pb'
|
|
12
|
+
require 'spark/connect/ml_common_pb'
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
descriptor_data = "\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/ml_common.proto\"\xa3\x18\n\x08Relation\x12-\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommon\x12#\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00\x12)\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00\x12\'\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00\x12#\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00\x12-\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00\x12#\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00\x12%\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00\x12-\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00\x12!\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00\x12\x36\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00\x12\'\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00\x12\'\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00\x12\x31\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00\x12%\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00\x12\x36\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00\x12\x31\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00\x12$\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00\x12\x41\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00\x12\x30\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00\x12#\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00\x12#\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00\x12\x32\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00\x12#\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00\x12)\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00\x12,\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00\x12K\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00\x12\x36\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00\x12\x38\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00\x12%\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00\x12,\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00\x12\x31\n\x0c\x63o_group_map\x18 \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00\x12\x36\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00\x12K\n\x1a\x61pply_in_pandas_with_state\x18\" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00\x12\x30\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00\x12\x43\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32\".spark.connect.CachedLocalRelationH\x00\x12\x45\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00\x12h\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00\x12-\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00\x12\x62\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00\x12\x36\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00\x12-\n\ttranspose\x18* \x01(\x0b\x32\x18.spark.connect.TransposeH\x00\x12X\n unresolved_table_valued_function\x18+ \x01(\x0b\x32,.spark.connect.UnresolvedTableValuedFunctionH\x00\x12\x32\n\x0clateral_join\x18, \x01(\x0b\x32\x1a.spark.connect.LateralJoinH\x00\x12R\n\x1d\x63hunked_cached_local_relation\x18- \x01(\x0b\x32).spark.connect.ChunkedCachedLocalRelationH\x00\x12(\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00\x12(\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00\x12+\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00\x12-\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00\x12/\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00\x12/\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00\x12%\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00\x12\'\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00\x12<\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00\x12\x32\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00\x12\x30\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00\x12*\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00\x12\x31\n\x0bml_relation\x18\xac\x02 \x01(\x0b\x32\x19.spark.connect.MlRelationH\x00\x12*\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x12*\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00\x42\n\n\x08rel_type\"\x99\x03\n\nMlRelation\x12\x38\n\ttransform\x18\x01 \x01(\x0b\x32#.spark.connect.MlRelation.TransformH\x00\x12%\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00\x12;\n\x15model_summary_dataset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x01\x88\x01\x01\x1a\xc7\x01\n\tTransform\x12+\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00\x12\x30\n\x0btransformer\x18\x02 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00\x12&\n\x05input\x18\x03 \x01(\x0b\x32\x17.spark.connect.Relation\x12\'\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsB\n\n\x08operatorB\t\n\x07ml_typeB\x18\n\x16_model_summary_dataset\"\x9e\x02\n\x05\x46\x65tch\x12)\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRef\x12,\n\x07methods\x18\x02 \x03(\x0b\x32\x1b.spark.connect.Fetch.Method\x1a\xbb\x01\n\x06Method\x12\x0e\n\x06method\x18\x01 \x01(\t\x12.\n\x04\x61rgs\x18\x02 \x03(\x0b\x32 .spark.connect.Fetch.Method.Args\x1aq\n\x04\x41rgs\x12\x32\n\x05param\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00\x12(\n\x05input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationH\x00\x42\x0b\n\targs_type\"\t\n\x07Unknown\"r\n\x0eRelationCommon\x12\x17\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01\x12\x14\n\x07plan_id\x18\x02 \x01(\x03H\x00\x88\x01\x01\x12%\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginB\n\n\x08_plan_id\"\x92\x03\n\x03SQL\x12\r\n\x05query\x18\x01 \x01(\t\x12.\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01\x12\x37\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01\x12?\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntry\x12\x30\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.Expression\x1aN\n\tArgsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x30\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.Literal:\x02\x38\x01\x1aP\n\x13NamedArgumentsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.Expression:\x02\x38\x01\"c\n\rWithRelations\x12%\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12+\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.Relation\"\x91\x04\n\x04Read\x12\x35\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00\x12\x35\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00\x12\x14\n\x0cis_streaming\x18\x03 \x01(\x08\x1a\x97\x01\n\nNamedTable\x12\x1b\n\x13unparsed_identifier\x18\x01 \x01(\t\x12<\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntry\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xdd\x01\n\nDataSource\x12\x13\n\x06\x66ormat\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x06schema\x18\x02 \x01(\tH\x01\x88\x01\x01\x12<\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntry\x12\r\n\x05paths\x18\x04 \x03(\t\x12\x12\n\npredicates\x18\x05 \x03(\t\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type\"a\n\x07Project\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12.\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\"^\n\x06\x46ilter\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12,\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.Expression\"\xb6\x04\n\x04Join\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x31\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.Expression\x12/\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinType\x12\x15\n\rusing_columns\x18\x05 \x03(\t\x12=\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00\x88\x01\x01\x1a?\n\x0cJoinDataType\x12\x16\n\x0eis_left_struct\x18\x01 \x01(\x08\x12\x17\n\x0fis_right_struct\x18\x02 \x01(\x08\"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type\"\x99\x03\n\x0cSetOperation\x12+\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12,\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x12:\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpType\x12\x13\n\x06is_all\x18\x04 \x01(\x08H\x00\x88\x01\x01\x12\x14\n\x07\x62y_name\x18\x05 \x01(\x08H\x01\x88\x01\x01\x12\"\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02\x88\x01\x01\"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns\">\n\x05Limit\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\r\n\x05limit\x18\x02 \x01(\x05\"@\n\x06Offset\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0e\n\x06offset\x18\x02 \x01(\x05\"=\n\x04Tail\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\r\n\x05limit\x18\x02 \x01(\x05\"\x92\x05\n\tAggregate\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x36\n\ngroup_type\x18\x02 \x01(\x0e\x32\".spark.connect.Aggregate.GroupType\x12\x37\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x38\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x12-\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.Pivot\x12<\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSets\x1a\x62\n\x05Pivot\x12&\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.Expression\x12\x31\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.Literal\x1a?\n\x0cGroupingSets\x12/\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05\"\x88\x01\n\x04Sort\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x32\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrder\x12\x16\n\tis_global\x18\x03 \x01(\x08H\x00\x88\x01\x01\x42\x0c\n\n_is_global\"p\n\x04\x44rop\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12*\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x14\n\x0c\x63olumn_names\x18\x03 \x03(\t\"\xb9\x01\n\x0b\x44\x65\x64uplicate\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x14\n\x0c\x63olumn_names\x18\x02 \x03(\t\x12 \n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00\x88\x01\x01\x12\x1d\n\x10within_watermark\x18\x04 \x01(\x08H\x01\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark\"K\n\rLocalRelation\x12\x11\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00\x88\x01\x01\x12\x13\n\x06schema\x18\x02 \x01(\tH\x01\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema\"B\n\x13\x43\x61\x63hedLocalRelation\x12\x0c\n\x04hash\x18\x03 \x01(\tJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId\"X\n\x1a\x43hunkedCachedLocalRelation\x12\x12\n\ndataHashes\x18\x01 \x03(\t\x12\x17\n\nschemaHash\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\r\n\x0b_schemaHash\"+\n\x14\x43\x61\x63hedRemoteRelation\x12\x13\n\x0brelation_id\x18\x01 \x01(\t\"\xc7\x01\n\x06Sample\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x13\n\x0blower_bound\x18\x02 \x01(\x01\x12\x13\n\x0bupper_bound\x18\x03 \x01(\x01\x12\x1d\n\x10with_replacement\x18\x04 \x01(\x08H\x00\x88\x01\x01\x12\x11\n\x04seed\x18\x05 \x01(\x03H\x01\x88\x01\x01\x12\x1b\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08\x42\x13\n\x11_with_replacementB\x07\n\x05_seed\"p\n\x05Range\x12\x12\n\x05start\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x0b\n\x03\x65nd\x18\x02 \x01(\x03\x12\x0c\n\x04step\x18\x03 \x01(\x03\x12\x1b\n\x0enum_partitions\x18\x04 \x01(\x05H\x01\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions\"Y\n\rSubqueryAlias\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\r\n\x05\x61lias\x18\x02 \x01(\t\x12\x11\n\tqualifier\x18\x03 \x03(\t\"o\n\x0bRepartition\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x16\n\x0enum_partitions\x18\x02 \x01(\x05\x12\x14\n\x07shuffle\x18\x03 \x01(\x08H\x00\x88\x01\x01\x42\n\n\x08_shuffle\"j\n\nShowString\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x10\n\x08num_rows\x18\x02 \x01(\x05\x12\x10\n\x08truncate\x18\x03 \x01(\x05\x12\x10\n\x08vertical\x18\x04 \x01(\x08\"X\n\nHtmlString\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x10\n\x08num_rows\x18\x02 \x01(\x05\x12\x10\n\x08truncate\x18\x03 \x01(\x05\"I\n\x0bStatSummary\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x12\n\nstatistics\x18\x02 \x03(\t\"D\n\x0cStatDescribe\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\"R\n\x0cStatCrosstab\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ol1\x18\x02 \x01(\t\x12\x0c\n\x04\x63ol2\x18\x03 \x01(\t\"M\n\x07StatCov\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ol1\x18\x02 \x01(\t\x12\x0c\n\x04\x63ol2\x18\x03 \x01(\t\"n\n\x08StatCorr\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ol1\x18\x02 \x01(\t\x12\x0c\n\x04\x63ol2\x18\x03 \x01(\t\x12\x13\n\x06method\x18\x04 \x01(\tH\x00\x88\x01\x01\x42\t\n\x07_method\"y\n\x12StatApproxQuantile\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12\x15\n\rprobabilities\x18\x03 \x03(\x01\x12\x16\n\x0erelative_error\x18\x04 \x01(\x01\"g\n\rStatFreqItems\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12\x14\n\x07support\x18\x03 \x01(\x01H\x00\x88\x01\x01\x42\n\n\x08_support\"\x85\x02\n\x0cStatSampleBy\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.Expression\x12\x37\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.Fraction\x12\x11\n\x04seed\x18\x05 \x01(\x03H\x00\x88\x01\x01\x1aP\n\x08\x46raction\x12\x32\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.Literal\x12\x10\n\x08\x66raction\x18\x02 \x01(\x01\x42\x07\n\x05_seed\"q\n\x06NAFill\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12\x31\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.Literal\"l\n\x06NADrop\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12\x1a\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00\x88\x01\x01\x42\x10\n\x0e_min_non_nulls\"\xf8\x01\n\tNAReplace\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x63ols\x18\x02 \x03(\t\x12:\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.Replacement\x1ay\n\x0bReplacement\x12\x34\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.Literal\x12\x34\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.Literal\"D\n\x04ToDF\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x14\n\x0c\x63olumn_names\x18\x02 \x03(\t\"\xbb\x02\n\x12WithColumnsRenamed\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12W\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01\x12\x39\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.Rename\x1a\x37\n\x15RenameColumnsMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x06Rename\x12\x10\n\x08\x63ol_name\x18\x01 \x01(\t\x12\x14\n\x0cnew_col_name\x18\x02 \x01(\t\"g\n\x0bWithColumns\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x30\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.Alias\"d\n\rWithWatermark\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x12\n\nevent_time\x18\x02 \x01(\t\x12\x17\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\t\"k\n\x04Hint\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\"\x86\x02\n\x07Unpivot\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x32\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00\x88\x01\x01\x12\x1c\n\x14variable_column_name\x18\x04 \x01(\t\x12\x19\n\x11value_column_name\x18\x05 \x01(\t\x1a\x33\n\x06Values\x12)\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionB\t\n\x07_values\"e\n\tTranspose\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x30\n\rindex_columns\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\"d\n\x1dUnresolvedTableValuedFunction\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12,\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\"[\n\x08ToSchema\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\'\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataType\"\xa5\x01\n\x17RepartitionByExpression\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x32\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x1b\n\x0enum_partitions\x18\x03 \x01(\x05H\x00\x88\x01\x01\x42\x11\n\x0f_num_partitions\"\xc5\x01\n\rMapPartitions\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12<\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunction\x12\x17\n\nis_barrier\x18\x03 \x01(\x08H\x00\x88\x01\x01\x12\x17\n\nprofile_id\x18\x04 \x01(\x05H\x01\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id\"\x9e\x05\n\x08GroupMap\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x37\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12<\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunction\x12\x36\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x12.\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.Relation\x12?\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.Expression\x12%\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00\x88\x01\x01\x12\x18\n\x0boutput_mode\x18\x08 \x01(\tH\x01\x88\x01\x01\x12\x19\n\x0ctimeout_conf\x18\t \x01(\tH\x02\x88\x01\x01\x12\x32\n\x0cstate_schema\x18\n \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03\x88\x01\x01\x12M\n\x19transform_with_state_info\x18\x0b \x01(\x0b\x32%.spark.connect.TransformWithStateInfoH\x04\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_confB\x0f\n\r_state_schemaB\x1c\n\x1a_transform_with_state_info\"\xb2\x01\n\x16TransformWithStateInfo\x12\x11\n\ttime_mode\x18\x01 \x01(\t\x12#\n\x16\x65vent_time_column_name\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x33\n\routput_schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x01\x88\x01\x01\x42\x19\n\x17_event_time_column_nameB\x10\n\x0e_output_schema\"\x94\x03\n\nCoGroupMap\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12=\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12&\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.Relation\x12=\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x12<\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunction\x12<\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.Expression\x12<\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.Expression\"\x8f\x02\n\x16\x41pplyInPandasWithState\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x37\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12<\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunction\x12\x15\n\routput_schema\x18\x04 \x01(\t\x12\x14\n\x0cstate_schema\x18\x05 \x01(\t\x12\x13\n\x0boutput_mode\x18\x06 \x01(\t\x12\x14\n\x0ctimeout_conf\x18\x07 \x01(\t\"\xc0\x01\n$CommonInlineUserDefinedTableFunction\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x15\n\rdeterministic\x18\x02 \x01(\x08\x12,\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x30\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00\x42\n\n\x08\x66unction\"\x87\x01\n\nPythonUDTF\x12\x31\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00\x88\x01\x01\x12\x11\n\teval_type\x18\x02 \x01(\x05\x12\x0f\n\x07\x63ommand\x18\x03 \x01(\x0c\x12\x12\n\npython_ver\x18\x04 \x01(\tB\x0e\n\x0c_return_type\"\x7f\n!CommonInlineUserDefinedDataSource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12=\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00\x42\r\n\x0b\x64\x61ta_source\"7\n\x10PythonDataSource\x12\x0f\n\x07\x63ommand\x18\x01 \x01(\x0c\x12\x12\n\npython_ver\x18\x02 \x01(\t\"r\n\x0e\x43ollectMetrics\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04name\x18\x02 \x01(\t\x12*\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\"\xd8\x02\n\x05Parse\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x30\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormat\x12,\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00\x88\x01\x01\x12\x32\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntry\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema\"\xee\x02\n\x08\x41sOfJoin\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x12-\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.Expression\x12.\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.Expression\x12,\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.Expression\x12\x15\n\rusing_columns\x18\x06 \x03(\t\x12\x11\n\tjoin_type\x18\x07 \x01(\t\x12,\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.Expression\x12\x1b\n\x13\x61llow_exact_matches\x18\t \x01(\x08\x12\x11\n\tdirection\x18\n \x01(\t\"\xc0\x01\n\x0bLateralJoin\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x31\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.Expression\x12/\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
|
|
16
|
+
|
|
17
|
+
pool = ::Google::Protobuf::DescriptorPool.generated_pool
|
|
18
|
+
pool.add_serialized_file(descriptor_data)
|
|
19
|
+
|
|
20
|
+
module Spark
|
|
21
|
+
module Connect
|
|
22
|
+
Relation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Relation").msgclass
|
|
23
|
+
MlRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlRelation").msgclass
|
|
24
|
+
MlRelation::Transform = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MlRelation.Transform").msgclass
|
|
25
|
+
Fetch = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Fetch").msgclass
|
|
26
|
+
Fetch::Method = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Fetch.Method").msgclass
|
|
27
|
+
Fetch::Method::Args = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Fetch.Method.Args").msgclass
|
|
28
|
+
Unknown = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Unknown").msgclass
|
|
29
|
+
RelationCommon = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.RelationCommon").msgclass
|
|
30
|
+
SQL = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SQL").msgclass
|
|
31
|
+
WithRelations = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithRelations").msgclass
|
|
32
|
+
Read = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Read").msgclass
|
|
33
|
+
Read::NamedTable = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Read.NamedTable").msgclass
|
|
34
|
+
Read::DataSource = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Read.DataSource").msgclass
|
|
35
|
+
Project = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Project").msgclass
|
|
36
|
+
Filter = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Filter").msgclass
|
|
37
|
+
Join = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Join").msgclass
|
|
38
|
+
Join::JoinDataType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Join.JoinDataType").msgclass
|
|
39
|
+
Join::JoinType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Join.JoinType").enummodule
|
|
40
|
+
SetOperation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SetOperation").msgclass
|
|
41
|
+
SetOperation::SetOpType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SetOperation.SetOpType").enummodule
|
|
42
|
+
Limit = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Limit").msgclass
|
|
43
|
+
Offset = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Offset").msgclass
|
|
44
|
+
Tail = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Tail").msgclass
|
|
45
|
+
Aggregate = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Aggregate").msgclass
|
|
46
|
+
Aggregate::Pivot = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Aggregate.Pivot").msgclass
|
|
47
|
+
Aggregate::GroupingSets = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Aggregate.GroupingSets").msgclass
|
|
48
|
+
Aggregate::GroupType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Aggregate.GroupType").enummodule
|
|
49
|
+
Sort = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Sort").msgclass
|
|
50
|
+
Drop = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Drop").msgclass
|
|
51
|
+
Deduplicate = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Deduplicate").msgclass
|
|
52
|
+
LocalRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.LocalRelation").msgclass
|
|
53
|
+
CachedLocalRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CachedLocalRelation").msgclass
|
|
54
|
+
ChunkedCachedLocalRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ChunkedCachedLocalRelation").msgclass
|
|
55
|
+
CachedRemoteRelation = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CachedRemoteRelation").msgclass
|
|
56
|
+
Sample = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Sample").msgclass
|
|
57
|
+
Range = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Range").msgclass
|
|
58
|
+
SubqueryAlias = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.SubqueryAlias").msgclass
|
|
59
|
+
Repartition = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Repartition").msgclass
|
|
60
|
+
ShowString = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ShowString").msgclass
|
|
61
|
+
HtmlString = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.HtmlString").msgclass
|
|
62
|
+
StatSummary = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatSummary").msgclass
|
|
63
|
+
StatDescribe = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatDescribe").msgclass
|
|
64
|
+
StatCrosstab = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatCrosstab").msgclass
|
|
65
|
+
StatCov = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatCov").msgclass
|
|
66
|
+
StatCorr = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatCorr").msgclass
|
|
67
|
+
StatApproxQuantile = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatApproxQuantile").msgclass
|
|
68
|
+
StatFreqItems = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatFreqItems").msgclass
|
|
69
|
+
StatSampleBy = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatSampleBy").msgclass
|
|
70
|
+
StatSampleBy::Fraction = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.StatSampleBy.Fraction").msgclass
|
|
71
|
+
NAFill = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.NAFill").msgclass
|
|
72
|
+
NADrop = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.NADrop").msgclass
|
|
73
|
+
NAReplace = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.NAReplace").msgclass
|
|
74
|
+
NAReplace::Replacement = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.NAReplace.Replacement").msgclass
|
|
75
|
+
ToDF = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ToDF").msgclass
|
|
76
|
+
WithColumnsRenamed = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithColumnsRenamed").msgclass
|
|
77
|
+
WithColumnsRenamed::Rename = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithColumnsRenamed.Rename").msgclass
|
|
78
|
+
WithColumns = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithColumns").msgclass
|
|
79
|
+
WithWatermark = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.WithWatermark").msgclass
|
|
80
|
+
Hint = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Hint").msgclass
|
|
81
|
+
Unpivot = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Unpivot").msgclass
|
|
82
|
+
Unpivot::Values = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Unpivot.Values").msgclass
|
|
83
|
+
Transpose = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Transpose").msgclass
|
|
84
|
+
UnresolvedTableValuedFunction = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.UnresolvedTableValuedFunction").msgclass
|
|
85
|
+
ToSchema = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ToSchema").msgclass
|
|
86
|
+
RepartitionByExpression = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.RepartitionByExpression").msgclass
|
|
87
|
+
MapPartitions = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.MapPartitions").msgclass
|
|
88
|
+
GroupMap = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.GroupMap").msgclass
|
|
89
|
+
TransformWithStateInfo = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.TransformWithStateInfo").msgclass
|
|
90
|
+
CoGroupMap = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CoGroupMap").msgclass
|
|
91
|
+
ApplyInPandasWithState = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.ApplyInPandasWithState").msgclass
|
|
92
|
+
CommonInlineUserDefinedTableFunction = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CommonInlineUserDefinedTableFunction").msgclass
|
|
93
|
+
PythonUDTF = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PythonUDTF").msgclass
|
|
94
|
+
CommonInlineUserDefinedDataSource = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CommonInlineUserDefinedDataSource").msgclass
|
|
95
|
+
PythonDataSource = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.PythonDataSource").msgclass
|
|
96
|
+
CollectMetrics = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.CollectMetrics").msgclass
|
|
97
|
+
Parse = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Parse").msgclass
|
|
98
|
+
Parse::ParseFormat = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.Parse.ParseFormat").enummodule
|
|
99
|
+
AsOfJoin = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.AsOfJoin").msgclass
|
|
100
|
+
LateralJoin = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.LateralJoin").msgclass
|
|
101
|
+
end
|
|
102
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: spark/connect/types.proto
|
|
4
|
+
|
|
5
|
+
require 'google/protobuf'
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
descriptor_data = "\n\x19spark/connect/types.proto\x12\rspark.connect\"\xbc\x1c\n\x08\x44\x61taType\x12,\n\x04null\x18\x01 \x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00\x12\x30\n\x06\x62inary\x18\x02 \x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00\x12\x32\n\x07\x62oolean\x18\x03 \x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00\x12,\n\x04\x62yte\x18\x04 \x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00\x12.\n\x05short\x18\x05 \x01(\x0b\x32\x1d.spark.connect.DataType.ShortH\x00\x12\x32\n\x07integer\x18\x06 \x01(\x0b\x32\x1f.spark.connect.DataType.IntegerH\x00\x12,\n\x04long\x18\x07 \x01(\x0b\x32\x1c.spark.connect.DataType.LongH\x00\x12.\n\x05\x66loat\x18\x08 \x01(\x0b\x32\x1d.spark.connect.DataType.FloatH\x00\x12\x30\n\x06\x64ouble\x18\t \x01(\x0b\x32\x1e.spark.connect.DataType.DoubleH\x00\x12\x32\n\x07\x64\x65\x63imal\x18\n \x01(\x0b\x32\x1f.spark.connect.DataType.DecimalH\x00\x12\x30\n\x06string\x18\x0b \x01(\x0b\x32\x1e.spark.connect.DataType.StringH\x00\x12,\n\x04\x63har\x18\x0c \x01(\x0b\x32\x1c.spark.connect.DataType.CharH\x00\x12\x33\n\x08var_char\x18\r \x01(\x0b\x32\x1f.spark.connect.DataType.VarCharH\x00\x12,\n\x04\x64\x61te\x18\x0e \x01(\x0b\x32\x1c.spark.connect.DataType.DateH\x00\x12\x36\n\ttimestamp\x18\x0f \x01(\x0b\x32!.spark.connect.DataType.TimestampH\x00\x12=\n\rtimestamp_ntz\x18\x10 \x01(\x0b\x32$.spark.connect.DataType.TimestampNTZH\x00\x12\x45\n\x11\x63\x61lendar_interval\x18\x11 \x01(\x0b\x32(.spark.connect.DataType.CalendarIntervalH\x00\x12H\n\x13year_month_interval\x18\x12 \x01(\x0b\x32).spark.connect.DataType.YearMonthIntervalH\x00\x12\x44\n\x11\x64\x61y_time_interval\x18\x13 \x01(\x0b\x32\'.spark.connect.DataType.DayTimeIntervalH\x00\x12.\n\x05\x61rray\x18\x14 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayH\x00\x12\x30\n\x06struct\x18\x15 \x01(\x0b\x32\x1e.spark.connect.DataType.StructH\x00\x12*\n\x03map\x18\x16 \x01(\x0b\x32\x1b.spark.connect.DataType.MapH\x00\x12\x32\n\x07variant\x18\x19 \x01(\x0b\x32\x1f.spark.connect.DataType.VariantH\x00\x12*\n\x03udt\x18\x17 \x01(\x0b\x32\x1b.spark.connect.DataType.UDTH\x00\x12\x34\n\x08geometry\x18\x1a \x01(\x0b\x32 .spark.connect.DataType.GeometryH\x00\x12\x36\n\tgeography\x18\x1b \x01(\x0b\x32!.spark.connect.DataType.GeographyH\x00\x12\x34\n\x08unparsed\x18\x18 \x01(\x0b\x32 .spark.connect.DataType.UnparsedH\x00\x12,\n\x04time\x18\x1c \x01(\x0b\x32\x1c.spark.connect.DataType.TimeH\x00\x1a+\n\x07\x42oolean\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a(\n\x04\x42yte\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a)\n\x05Short\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a+\n\x07Integer\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a(\n\x04Long\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a)\n\x05\x46loat\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a*\n\x06\x44ouble\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a=\n\x06String\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x12\x11\n\tcollation\x18\x02 \x01(\t\x1a*\n\x06\x42inary\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a(\n\x04NULL\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a-\n\tTimestamp\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a(\n\x04\x44\x61te\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a\x30\n\x0cTimestampNTZ\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1aN\n\x04Time\x12\x16\n\tprecision\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12 \n\x18type_variation_reference\x18\x02 \x01(\rB\x0c\n\n_precision\x1a\x34\n\x10\x43\x61lendarInterval\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a\x85\x01\n\x11YearMonthInterval\x12\x18\n\x0bstart_field\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12\x16\n\tend_field\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12 \n\x18type_variation_reference\x18\x03 \x01(\rB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\x83\x01\n\x0f\x44\x61yTimeInterval\x12\x18\n\x0bstart_field\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12\x16\n\tend_field\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12 \n\x18type_variation_reference\x18\x03 \x01(\rB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\x38\n\x04\x43har\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1a;\n\x07VarChar\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1ao\n\x07\x44\x65\x63imal\x12\x12\n\x05scale\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12\x16\n\tprecision\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12 \n\x18type_variation_reference\x18\x03 \x01(\rB\x08\n\x06_scaleB\x0c\n\n_precision\x1a}\n\x0bStructField\x12\x0c\n\x04name\x18\x01 \x01(\t\x12*\n\tdata_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataType\x12\x10\n\x08nullable\x18\x03 \x01(\x08\x12\x15\n\x08metadata\x18\x04 \x01(\tH\x00\x88\x01\x01\x42\x0b\n\t_metadata\x1a_\n\x06Struct\x12\x33\n\x06\x66ields\x18\x01 \x03(\x0b\x32#.spark.connect.DataType.StructField\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1ao\n\x05\x41rray\x12-\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataType\x12\x15\n\rcontains_null\x18\x02 \x01(\x08\x12 \n\x18type_variation_reference\x18\x03 \x01(\r\x1a\x9c\x01\n\x03Map\x12)\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataType\x12+\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataType\x12\x1b\n\x13value_contains_null\x18\x03 \x01(\x08\x12 \n\x18type_variation_reference\x18\x04 \x01(\r\x1a:\n\x08Geometry\x12\x0c\n\x04srid\x18\x01 \x01(\x05\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1a;\n\tGeography\x12\x0c\n\x04srid\x18\x01 \x01(\x05\x12 \n\x18type_variation_reference\x18\x02 \x01(\r\x1a+\n\x07Variant\x12 \n\x18type_variation_reference\x18\x01 \x01(\r\x1a\xe4\x01\n\x03UDT\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\x16\n\tjvm_class\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x19\n\x0cpython_class\x18\x03 \x01(\tH\x01\x88\x01\x01\x12$\n\x17serialized_python_class\x18\x04 \x01(\tH\x02\x88\x01\x01\x12.\n\x08sql_type\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03\x88\x01\x01\x42\x0c\n\n_jvm_classB\x0f\n\r_python_classB\x1a\n\x18_serialized_python_classB\x0b\n\t_sql_type\x1a$\n\x08Unparsed\x12\x18\n\x10\x64\x61ta_type_string\x18\x01 \x01(\tB\x06\n\x04kindB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
|
|
9
|
+
|
|
10
|
+
pool = ::Google::Protobuf::DescriptorPool.generated_pool
|
|
11
|
+
pool.add_serialized_file(descriptor_data)
|
|
12
|
+
|
|
13
|
+
module Spark
|
|
14
|
+
module Connect
|
|
15
|
+
DataType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType").msgclass
|
|
16
|
+
DataType::Boolean = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Boolean").msgclass
|
|
17
|
+
DataType::Byte = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Byte").msgclass
|
|
18
|
+
DataType::Short = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Short").msgclass
|
|
19
|
+
DataType::Integer = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Integer").msgclass
|
|
20
|
+
DataType::Long = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Long").msgclass
|
|
21
|
+
DataType::Float = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Float").msgclass
|
|
22
|
+
DataType::Double = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Double").msgclass
|
|
23
|
+
DataType::String = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.String").msgclass
|
|
24
|
+
DataType::Binary = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Binary").msgclass
|
|
25
|
+
DataType::NULL = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.NULL").msgclass
|
|
26
|
+
DataType::Timestamp = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Timestamp").msgclass
|
|
27
|
+
DataType::Date = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Date").msgclass
|
|
28
|
+
DataType::TimestampNTZ = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.TimestampNTZ").msgclass
|
|
29
|
+
DataType::Time = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Time").msgclass
|
|
30
|
+
DataType::CalendarInterval = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.CalendarInterval").msgclass
|
|
31
|
+
DataType::YearMonthInterval = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.YearMonthInterval").msgclass
|
|
32
|
+
DataType::DayTimeInterval = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.DayTimeInterval").msgclass
|
|
33
|
+
DataType::Char = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Char").msgclass
|
|
34
|
+
DataType::VarChar = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.VarChar").msgclass
|
|
35
|
+
DataType::Decimal = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Decimal").msgclass
|
|
36
|
+
DataType::StructField = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.StructField").msgclass
|
|
37
|
+
DataType::Struct = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Struct").msgclass
|
|
38
|
+
DataType::Array = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Array").msgclass
|
|
39
|
+
DataType::Map = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Map").msgclass
|
|
40
|
+
DataType::Geometry = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Geometry").msgclass
|
|
41
|
+
DataType::Geography = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Geography").msgclass
|
|
42
|
+
DataType::Variant = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Variant").msgclass
|
|
43
|
+
DataType::UDT = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.UDT").msgclass
|
|
44
|
+
DataType::Unparsed = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("spark.connect.DataType.Unparsed").msgclass
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Loads the generated Spark Connect protobuf/gRPC stubs and exposes them under a
|
|
4
|
+
# convenient, stable alias.
|
|
5
|
+
#
|
|
6
|
+
# The stubs are generated by `grpc_tools_ruby_protoc` from the vendored
|
|
7
|
+
# `proto/spark/connect/*.proto` files (see the `proto:generate` Rake task). The
|
|
8
|
+
# generated files `require` each other using paths rooted at
|
|
9
|
+
# `lib/spark_connect/proto` (e.g. `require "spark/connect/base_pb"`), so that
|
|
10
|
+
# directory must be on the load path before they are required.
|
|
11
|
+
proto_root = File.expand_path("proto", __dir__)
|
|
12
|
+
$LOAD_PATH.unshift(proto_root) unless $LOAD_PATH.include?(proto_root)
|
|
13
|
+
|
|
14
|
+
require "spark/connect/base_pb"
|
|
15
|
+
require "spark/connect/base_services_pb"
|
|
16
|
+
require "spark/connect/catalog_pb"
|
|
17
|
+
require "spark/connect/commands_pb"
|
|
18
|
+
require "spark/connect/common_pb"
|
|
19
|
+
require "spark/connect/expressions_pb"
|
|
20
|
+
require "spark/connect/relations_pb"
|
|
21
|
+
require "spark/connect/types_pb"
|
|
22
|
+
require "spark/connect/ml_pb"
|
|
23
|
+
require "spark/connect/ml_common_pb"
|
|
24
|
+
|
|
25
|
+
module SparkConnect
|
|
26
|
+
# All generated Spark Connect protobuf message classes and the gRPC service
|
|
27
|
+
# stub live under the `Spark::Connect` namespace (derived from the protobuf
|
|
28
|
+
# package `spark.connect`). `SparkConnect::Proto` is an alias that keeps the
|
|
29
|
+
# rest of the codebase decoupled from that detail and reads clearly, e.g.
|
|
30
|
+
# `SparkConnect::Proto::Relation` or `SparkConnect::Proto::Expression`.
|
|
31
|
+
Proto = ::Spark::Connect
|
|
32
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SparkConnect
|
|
4
|
+
# Loads data from external sources into a {DataFrame}. Returned by
|
|
5
|
+
# {SparkSession#read}. Mirrors PySpark's `DataFrameReader`.
|
|
6
|
+
#
|
|
7
|
+
# @example
|
|
8
|
+
# spark.read.format("csv").option("header", true).load("data.csv")
|
|
9
|
+
# spark.read.json("events.json")
|
|
10
|
+
# spark.read.table("my_table")
|
|
11
|
+
class DataFrameReader
|
|
12
|
+
Proto = SparkConnect::Proto
|
|
13
|
+
|
|
14
|
+
# @param session [SparkSession]
|
|
15
|
+
def initialize(session)
|
|
16
|
+
@session = session
|
|
17
|
+
@format = nil
|
|
18
|
+
@schema = nil
|
|
19
|
+
@options = {}
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Set the input format (`"csv"`, `"json"`, `"parquet"`, `"orc"`, ...).
|
|
23
|
+
# @return [self]
|
|
24
|
+
def format(source)
|
|
25
|
+
@format = source.to_s
|
|
26
|
+
self
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Set the input schema (a {Types::StructType} or DDL string).
|
|
30
|
+
# @return [self]
|
|
31
|
+
def schema(schema)
|
|
32
|
+
@schema = schema.is_a?(Types::StructType) ? schema.simple_string : schema.to_s
|
|
33
|
+
self
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Set a single read option.
|
|
37
|
+
# @return [self]
|
|
38
|
+
def option(key, value)
|
|
39
|
+
@options[key.to_s] = value.to_s
|
|
40
|
+
self
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Set multiple read options.
|
|
44
|
+
# @return [self]
|
|
45
|
+
def options(opts)
|
|
46
|
+
opts.each { |k, v| @options[k.to_s] = v.to_s }
|
|
47
|
+
self
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Load data from the given path(s) using the configured format.
|
|
51
|
+
#
|
|
52
|
+
# @param paths [Array<String>]
|
|
53
|
+
# @return [DataFrame]
|
|
54
|
+
def load(*paths)
|
|
55
|
+
ds = Proto::Read::DataSource.new(options: @options, paths: paths.flatten.map(&:to_s))
|
|
56
|
+
ds.format = @format if @format
|
|
57
|
+
ds.schema = @schema if @schema
|
|
58
|
+
read_relation(data_source: ds)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Read a registered table or view.
|
|
62
|
+
#
|
|
63
|
+
# @param name [String]
|
|
64
|
+
# @return [DataFrame]
|
|
65
|
+
def table(name)
|
|
66
|
+
nt = Proto::Read::NamedTable.new(unparsed_identifier: name.to_s, options: @options)
|
|
67
|
+
read_relation(named_table: nt)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @return [DataFrame] CSV at `paths`.
|
|
71
|
+
def csv(*paths) = format("csv").load(*paths)
|
|
72
|
+
# @return [DataFrame] JSON at `paths`.
|
|
73
|
+
def json(*paths) = format("json").load(*paths)
|
|
74
|
+
# @return [DataFrame] Parquet at `paths`.
|
|
75
|
+
def parquet(*paths) = format("parquet").load(*paths)
|
|
76
|
+
# @return [DataFrame] ORC at `paths`.
|
|
77
|
+
def orc(*paths) = format("orc").load(*paths)
|
|
78
|
+
# @return [DataFrame] text at `paths` (one `value` column per line).
|
|
79
|
+
def text(*paths) = format("text").load(*paths)
|
|
80
|
+
|
|
81
|
+
# Read from a JDBC source.
|
|
82
|
+
#
|
|
83
|
+
# @param url [String] the JDBC URL.
|
|
84
|
+
# @param table [String] the table name (or subquery).
|
|
85
|
+
# @param properties [Hash] connection properties (`user`, `password`, ...).
|
|
86
|
+
# @return [DataFrame]
|
|
87
|
+
def jdbc(url, table, properties = {})
|
|
88
|
+
opts = { "url" => url, "dbtable" => table }.merge(properties.transform_keys(&:to_s))
|
|
89
|
+
format("jdbc").options(opts).load
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
def read_relation(**read_kw)
|
|
95
|
+
DataFrame.new(@session, PlanBuilder.relation(@session, read: Proto::Read.new(**read_kw)))
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SparkConnect
|
|
4
|
+
# An ordered collection of named fields representing a single row of a
|
|
5
|
+
# {DataFrame}, returned by {DataFrame#collect}, {DataFrame#take}, etc.
|
|
6
|
+
#
|
|
7
|
+
# Fields are accessible positionally (`row[0]`), by name (`row["id"]` or
|
|
8
|
+
# `row.id`), and the whole row converts cleanly to a Hash or Array.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# row = SparkConnect::Row.new({ "id" => 1, "name" => "alice" })
|
|
12
|
+
# row[0] #=> 1
|
|
13
|
+
# row["name"] #=> "alice"
|
|
14
|
+
# row.name #=> "alice"
|
|
15
|
+
# row.to_h #=> {"id"=>1, "name"=>"alice"}
|
|
16
|
+
class Row
|
|
17
|
+
include Enumerable
|
|
18
|
+
|
|
19
|
+
# @return [Array<String>] the field names, in order.
|
|
20
|
+
attr_reader :fields
|
|
21
|
+
|
|
22
|
+
# @return [Array] the field values, in order.
|
|
23
|
+
attr_reader :values
|
|
24
|
+
|
|
25
|
+
# @overload initialize(hash)
|
|
26
|
+
# @param hash [Hash] an ordered mapping of field name to value.
|
|
27
|
+
# @overload initialize(values, fields:)
|
|
28
|
+
# @param values [Array] positional values
|
|
29
|
+
# @param fields [Array<String>] field names
|
|
30
|
+
def initialize(data = {}, fields: nil)
|
|
31
|
+
if fields
|
|
32
|
+
@fields = fields.map(&:to_s)
|
|
33
|
+
@values = data
|
|
34
|
+
else
|
|
35
|
+
@fields = data.keys.map(&:to_s)
|
|
36
|
+
@values = data.values
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Look up a value by zero-based index or by field name.
|
|
41
|
+
#
|
|
42
|
+
# @param key [Integer, String, Symbol]
|
|
43
|
+
# @return [Object, nil]
|
|
44
|
+
def [](key)
|
|
45
|
+
case key
|
|
46
|
+
when Integer then @values[key]
|
|
47
|
+
else
|
|
48
|
+
idx = @fields.index(key.to_s)
|
|
49
|
+
idx && @values[idx]
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# @return [Hash] an ordered Hash of field name to value.
|
|
54
|
+
def to_h
|
|
55
|
+
@fields.zip(@values).to_h
|
|
56
|
+
end
|
|
57
|
+
alias as_dict to_h
|
|
58
|
+
|
|
59
|
+
# @return [Array] the row's values, in order.
|
|
60
|
+
def to_a
|
|
61
|
+
@values.dup
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Iterate over the values in order.
|
|
65
|
+
def each(&) = @values.each(&)
|
|
66
|
+
|
|
67
|
+
# @return [Integer] number of fields.
|
|
68
|
+
def length = @values.length
|
|
69
|
+
alias size length
|
|
70
|
+
|
|
71
|
+
# @return [Object] the value for `name`, raising if the field is absent.
|
|
72
|
+
def field(name)
|
|
73
|
+
idx = @fields.index(name.to_s)
|
|
74
|
+
raise IllegalArgumentError, "No such field: #{name}" unless idx
|
|
75
|
+
|
|
76
|
+
@values[idx]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def ==(other)
|
|
80
|
+
other.is_a?(Row) && other.fields == fields && other.values == values
|
|
81
|
+
end
|
|
82
|
+
alias eql? ==
|
|
83
|
+
|
|
84
|
+
def hash = [fields, values].hash
|
|
85
|
+
|
|
86
|
+
# Allows `row.field_name` access for field names that are valid method names.
|
|
87
|
+
def method_missing(name, *args)
|
|
88
|
+
key = name.to_s
|
|
89
|
+
if args.empty? && @fields.include?(key)
|
|
90
|
+
self[key]
|
|
91
|
+
else
|
|
92
|
+
super
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def respond_to_missing?(name, include_private = false)
|
|
97
|
+
@fields.include?(name.to_s) || super
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def to_s
|
|
101
|
+
"Row(#{@fields.zip(@values).map { |k, v| "#{k}=#{v.inspect}" }.join(', ')})"
|
|
102
|
+
end
|
|
103
|
+
alias inspect to_s
|
|
104
|
+
end
|
|
105
|
+
end
|