impala 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +28 -0
  5. data/Rakefile +15 -0
  6. data/impala.gemspec +25 -0
  7. data/lib/impala.rb +33 -0
  8. data/lib/impala/connection.rb +93 -0
  9. data/lib/impala/cursor.rb +86 -0
  10. data/lib/impala/protocol.rb +6 -0
  11. data/lib/impala/protocol/beeswax_constants.rb +14 -0
  12. data/lib/impala/protocol/beeswax_service.rb +747 -0
  13. data/lib/impala/protocol/beeswax_types.rb +192 -0
  14. data/lib/impala/protocol/data_constants.rb +12 -0
  15. data/lib/impala/protocol/data_sinks_constants.rb +12 -0
  16. data/lib/impala/protocol/data_sinks_types.rb +107 -0
  17. data/lib/impala/protocol/data_types.rb +77 -0
  18. data/lib/impala/protocol/descriptors_constants.rb +12 -0
  19. data/lib/impala/protocol/descriptors_types.rb +266 -0
  20. data/lib/impala/protocol/exprs_constants.rb +12 -0
  21. data/lib/impala/protocol/exprs_types.rb +345 -0
  22. data/lib/impala/protocol/facebook_service.rb +706 -0
  23. data/lib/impala/protocol/fb303_constants.rb +14 -0
  24. data/lib/impala/protocol/fb303_types.rb +24 -0
  25. data/lib/impala/protocol/frontend_constants.rb +12 -0
  26. data/lib/impala/protocol/frontend_types.rb +347 -0
  27. data/lib/impala/protocol/hive_metastore_constants.rb +52 -0
  28. data/lib/impala/protocol/hive_metastore_types.rb +697 -0
  29. data/lib/impala/protocol/impala_internal_service.rb +244 -0
  30. data/lib/impala/protocol/impala_internal_service_constants.rb +12 -0
  31. data/lib/impala/protocol/impala_internal_service_types.rb +362 -0
  32. data/lib/impala/protocol/impala_plan_service.rb +310 -0
  33. data/lib/impala/protocol/impala_plan_service_constants.rb +12 -0
  34. data/lib/impala/protocol/impala_plan_service_types.rb +36 -0
  35. data/lib/impala/protocol/impala_service.rb +260 -0
  36. data/lib/impala/protocol/impala_service_constants.rb +12 -0
  37. data/lib/impala/protocol/impala_service_types.rb +46 -0
  38. data/lib/impala/protocol/java_constants_constants.rb +42 -0
  39. data/lib/impala/protocol/java_constants_types.rb +14 -0
  40. data/lib/impala/protocol/opcodes_constants.rb +12 -0
  41. data/lib/impala/protocol/opcodes_types.rb +309 -0
  42. data/lib/impala/protocol/partitions_constants.rb +12 -0
  43. data/lib/impala/protocol/partitions_types.rb +44 -0
  44. data/lib/impala/protocol/plan_nodes_constants.rb +12 -0
  45. data/lib/impala/protocol/plan_nodes_types.rb +345 -0
  46. data/lib/impala/protocol/planner_constants.rb +12 -0
  47. data/lib/impala/protocol/planner_types.rb +78 -0
  48. data/lib/impala/protocol/runtime_profile_constants.rb +12 -0
  49. data/lib/impala/protocol/runtime_profile_types.rb +97 -0
  50. data/lib/impala/protocol/state_store_service.rb +244 -0
  51. data/lib/impala/protocol/state_store_service_constants.rb +12 -0
  52. data/lib/impala/protocol/state_store_service_types.rb +185 -0
  53. data/lib/impala/protocol/state_store_subscriber_service.rb +82 -0
  54. data/lib/impala/protocol/state_store_subscriber_service_constants.rb +12 -0
  55. data/lib/impala/protocol/state_store_subscriber_service_types.rb +67 -0
  56. data/lib/impala/protocol/statestore_types_constants.rb +12 -0
  57. data/lib/impala/protocol/statestore_types_types.rb +77 -0
  58. data/lib/impala/protocol/status_constants.rb +12 -0
  59. data/lib/impala/protocol/status_types.rb +44 -0
  60. data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
  61. data/lib/impala/protocol/types_constants.rb +12 -0
  62. data/lib/impala/protocol/types_types.rb +86 -0
  63. data/lib/impala/version.rb +3 -0
  64. data/thrift/Data.thrift +52 -0
  65. data/thrift/DataSinks.thrift +61 -0
  66. data/thrift/Descriptors.thrift +115 -0
  67. data/thrift/Exprs.thrift +134 -0
  68. data/thrift/Frontend.thrift +193 -0
  69. data/thrift/ImpalaInternalService.thrift +265 -0
  70. data/thrift/ImpalaPlanService.thrift +44 -0
  71. data/thrift/ImpalaService.thrift +105 -0
  72. data/thrift/JavaConstants.thrift +60 -0
  73. data/thrift/Opcodes.thrift +317 -0
  74. data/thrift/Partitions.thrift +41 -0
  75. data/thrift/PlanNodes.thrift +184 -0
  76. data/thrift/Planner.thrift +72 -0
  77. data/thrift/RuntimeProfile.thrift +58 -0
  78. data/thrift/StateStoreService.thrift +121 -0
  79. data/thrift/StateStoreSubscriberService.thrift +64 -0
  80. data/thrift/StatestoreTypes.thrift +50 -0
  81. data/thrift/Status.thrift +31 -0
  82. data/thrift/Types.thrift +71 -0
  83. data/thrift/beeswax.thrift +175 -0
  84. data/thrift/fb303.thrift +112 -0
  85. data/thrift/hive_metastore.thrift +528 -0
  86. metadata +206 -0
@@ -0,0 +1,193 @@
1
+ // Copyright 2012 Cloudera Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ namespace cpp impala
16
+ namespace java com.cloudera.impala.thrift
17
+ namespace rb Impala.Protocol
18
+
19
+ include "Types.thrift"
20
+ include "ImpalaInternalService.thrift"
21
+ include "PlanNodes.thrift"
22
+ include "Planner.thrift"
23
+ include "Descriptors.thrift"
24
+
25
+ // These are supporting structs for JniFrontend.java, which serves as the glue
26
+ // between our C++ execution environment and the Java frontend.
27
+
28
+ // Arguments to getTableNames, which returns a list of tables that match an
29
+ // optional pattern.
30
+ struct TGetTablesParams {
31
+ // If not set, match tables in all DBs
32
+ 1: optional string db
33
+
34
+ // If not set, match every table
35
+ 2: optional string pattern
36
+ }
37
+
38
+ // getTableNames returns a list of unqualified table names
39
+ struct TGetTablesResult {
40
+ 1: list<string> tables
41
+ }
42
+
43
+ // Arguments to getDbNames, which returns a list of dbs that match an optional
44
+ // pattern
45
+ struct TGetDbsParams {
46
+ // If not set, match every database
47
+ 1: optional string pattern
48
+ }
49
+
50
+ // getDbNames returns a list of database names
51
+ struct TGetDbsResult {
52
+ 1: list<string> dbs
53
+ }
54
+
55
+ struct TColumnDesc {
56
+ 1: required string columnName
57
+ 2: required Types.TPrimitiveType columnType
58
+ }
59
+
60
+ // Arguments to DescribeTable, which returns a list of column descriptors for a
61
+ // given table
62
+ struct TDescribeTableParams {
63
+ 1: optional string db
64
+ 2: required string table_name
65
+ }
66
+
67
+ // Results of a call to describeTable()
68
+ struct TDescribeTableResult {
69
+ 1: required list<TColumnDesc> columns
70
+ }
71
+
72
+ // Per-client session state
73
+ struct TSessionState {
74
+ // The default database, changed by USE <database> queries.
75
+ 1: required string database
76
+ }
77
+
78
+ struct TClientRequest {
79
+ // select stmt to be executed
80
+ 1: required string stmt
81
+
82
+ // query options
83
+ 2: required ImpalaInternalService.TQueryOptions queryOptions
84
+
85
+ // session state
86
+ 3: required TSessionState sessionState;
87
+ }
88
+
89
+ struct TResultSetMetadata {
90
+ 1: required list<TColumnDesc> columnDescs
91
+ }
92
+
93
+ // Describes a set of changes to make to the metastore
94
+ struct TCatalogUpdate {
95
+ // Unqualified name of the table to change
96
+ 1: required string target_table;
97
+
98
+ // Database that the table belongs to
99
+ 2: required string db_name;
100
+
101
+ // List of partitions that are new and need to be created. May
102
+ // include the root partition (represented by the empty string).
103
+ 3: required set<string> created_partitions;
104
+ }
105
+
106
+ // Metadata required to finalize a query - that is, to clean up after the query is done.
107
+ // Only relevant for INSERT queries.
108
+ struct TFinalizeParams {
109
+ // True if the INSERT query was OVERWRITE, rather than INTO
110
+ 1: required bool is_overwrite;
111
+
112
+ // The base directory in hdfs of the table targeted by this INSERT
113
+ 2: required string hdfs_base_dir;
114
+
115
+ // The target table name
116
+ 3: required string table_name;
117
+
118
+ // The target table database
119
+ 4: required string table_db;
120
+ }
121
+
122
+ // Result of call to ImpalaPlanService/JniFrontend.CreateQueryRequest()
123
+ struct TQueryExecRequest {
124
+ // global descriptor tbl for all fragments
125
+ 1: optional Descriptors.TDescriptorTable desc_tbl
126
+
127
+ // fragments[i] may consume the output of fragments[j > i];
128
+ // fragments[0] is the root fragment and also the coordinator fragment, if
129
+ // it is unpartitioned.
130
+ 2: required list<Planner.TPlanFragment> fragments
131
+
132
+ // Specifies the destination fragment of the output of each fragment.
133
+ // parent_fragment_idx.size() == fragments.size() - 1 and
134
+ // fragments[i] sends its output to fragments[dest_fragment_idx[i-1]]
135
+ 3: optional list<i32> dest_fragment_idx
136
+
137
+ // A map from scan node ids to a list of scan range locations.
138
+ // The node ids refer to scan nodes in fragments[].plan_tree
139
+ 4: optional map<Types.TPlanNodeId, list<Planner.TScanRangeLocations>>
140
+ per_node_scan_ranges
141
+
142
+ // Metadata of the query result set (only for select)
143
+ 5: optional TResultSetMetadata result_set_metadata
144
+
145
+ // Set if the query needs finalization after it executes
146
+ 6: optional TFinalizeParams finalize_params
147
+
148
+ 7: required ImpalaInternalService.TQueryGlobals query_globals
149
+ }
150
+
151
+ enum TDdlType {
152
+ SHOW_TABLES,
153
+ SHOW_DBS,
154
+ USE,
155
+ DESCRIBE
156
+ }
157
+
158
+ struct TDdlExecRequest {
159
+ 1: required TDdlType ddl_type
160
+
161
+ // Used for USE and DESCRIBE
162
+ 2: optional string database;
163
+
164
+ // Table name (not fully-qualified) for DESCRIBE
165
+ 3: optional string describe_table;
166
+
167
+ // Patterns to match table names against for SHOW
168
+ 4: optional string show_pattern;
169
+ }
170
+
171
+ // Result of call to createExecRequest()
172
+ struct TExecRequest {
173
+ 1: required Types.TStmtType stmt_type;
174
+
175
+ 2: optional string sql_stmt;
176
+
177
+ // Globally unique id for this request. Assigned by the planner.
178
+ 3: required Types.TUniqueId request_id
179
+
180
+ // Copied from the corresponding TClientRequest
181
+ 4: required ImpalaInternalService.TQueryOptions query_options;
182
+
183
+ // TQueryExecRequest for the backend
184
+ // Set iff stmt_type is QUERY or DML
185
+ 5: optional TQueryExecRequest query_exec_request
186
+
187
+ // Set iff stmt_type is DDL
188
+ 6: optional TDdlExecRequest ddl_exec_request
189
+
190
+ // Metadata of the query result set (not set for DML)
191
+ 7: optional TResultSetMetadata result_set_metadata
192
+ }
193
+
@@ -0,0 +1,265 @@
1
+ // Copyright 2012 Cloudera Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ //
16
+ // This file contains the details of the protocol between coordinators and backends.
17
+
18
+ namespace cpp impala
19
+ namespace java com.cloudera.impala.thrift
20
+ namespace rb Impala.Protocol
21
+
22
+ include "Status.thrift"
23
+ include "Types.thrift"
24
+ include "Exprs.thrift"
25
+ include "Descriptors.thrift"
26
+ include "PlanNodes.thrift"
27
+ include "Planner.thrift"
28
+ include "DataSinks.thrift"
29
+ include "Data.thrift"
30
+ include "RuntimeProfile.thrift"
31
+ include "ImpalaService.thrift"
32
+ include "JavaConstants.thrift"
33
+
34
+ // Query options that correspond to ImpalaService.ImpalaQueryOptions
35
+ // TODO: make all of these optional, otherwise it will be impossible to
36
+ // retire options and do rolling upgrades between releases
37
+ struct TQueryOptions {
38
+ 1: required bool abort_on_error = 0
39
+ 2: required i32 max_errors = 0
40
+ 3: required bool disable_codegen = 0
41
+ 4: required i32 batch_size = 0
42
+
43
+ // return_as_ascii is not listed in ImpalaService.ImpalaQueryOptions because Beeswax
44
+ // should only return ascii. This option is only for internal testing.
45
+ // if true, return query results in ASCII format (TColumnValue.stringVal),
46
+ // otherwise return results in their native format (each TColumnValue
47
+ // uses the field corresponding to the column's native type).
48
+ 5: required bool return_as_ascii = 1
49
+
50
+ 6: required i32 num_nodes = JavaConstants.NUM_NODES_ALL
51
+ 7: required i64 max_scan_range_length = 0
52
+ 8: required i32 num_scanner_threads = 0
53
+ 9: required i32 max_io_buffers = 0
54
+ 10: required bool allow_unsupported_formats = 0
55
+ 11: optional i64 default_order_by_limit = -1
56
+ }
57
+
58
+ // A scan range plus the parameters needed to execute that scan.
59
+ struct TScanRangeParams {
60
+ 1: required PlanNodes.TScanRange scan_range
61
+ 2: optional i32 volume_id = -1
62
+ }
63
+
64
+ // Specification of one output destination of a plan fragment
65
+ struct TPlanFragmentDestination {
66
+ // the globally unique fragment instance id
67
+ 1: required Types.TUniqueId fragment_instance_id
68
+
69
+ // ... which is being executed on this server
70
+ 2: required Types.THostPort server
71
+ }
72
+
73
+ // Parameters for a single execution instance of a particular TPlanFragment
74
+ // TODO: for range partitioning, we also need to specify the range boundaries
75
+ struct TPlanFragmentExecParams {
76
+ // a globally unique id assigned to the entire query
77
+ 1: required Types.TUniqueId query_id
78
+
79
+ // a globally unique id assigned to this particular execution instance of
80
+ // a TPlanFragment
81
+ 2: required Types.TUniqueId fragment_instance_id
82
+
83
+ // initial scan ranges for each scan node in TPlanFragment.plan_tree
84
+ 3: required map<Types.TPlanNodeId, list<TScanRangeParams>> per_node_scan_ranges
85
+
86
+ // number of senders for ExchangeNodes contained in TPlanFragment.plan_tree;
87
+ // needed to create a DataStreamRecvr
88
+ 4: required map<Types.TPlanNodeId, i32> per_exch_num_senders
89
+
90
+ // Output destinations, one per output partition.
91
+ // The partitioning of the output is specified by
92
+ // TPlanFragment.output_sink.output_partition.
93
+ // The number of output partitions is destinations.size().
94
+ 5: list<TPlanFragmentDestination> destinations
95
+ }
96
+
97
+ // Global query parameters assigned by the coordinator.
98
+ struct TQueryGlobals {
99
+ // String containing a timestamp set as the current time.
100
+ 1: required string now_string
101
+ }
102
+
103
+
104
+ // Service Protocol Details
105
+
106
+ enum ImpalaInternalServiceVersion {
107
+ V1
108
+ }
109
+
110
+
111
+ // ExecPlanFragment
112
+
113
+ struct TExecPlanFragmentParams {
114
+ 1: required ImpalaInternalServiceVersion protocol_version
115
+
116
+ // required in V1
117
+ 2: optional Planner.TPlanFragment fragment
118
+
119
+ // required in V1
120
+ 3: optional Descriptors.TDescriptorTable desc_tbl
121
+
122
+ // required in V1
123
+ 4: optional TPlanFragmentExecParams params
124
+
125
+ // Initiating coordinator.
126
+ // TODO: determine whether we can get this somehow via the Thrift rpc mechanism.
127
+ // required in V1
128
+ 5: optional Types.THostPort coord
129
+
130
+ // backend number assigned by coord to identify backend
131
+ // required in V1
132
+ 6: optional i32 backend_num
133
+
134
+ // Global query parameters assigned by coordinator.
135
+ // required in V1
136
+ 7: optional TQueryGlobals query_globals
137
+
138
+ // options for the query
139
+ // required in V1
140
+ 8: optional TQueryOptions query_options
141
+ }
142
+
143
+ struct TExecPlanFragmentResult {
144
+ // required in V1
145
+ 1: optional Status.TStatus status
146
+ }
147
+
148
+
149
+ // ReportExecStatus
150
+
151
+ // The results of an INSERT query, sent to the coordinator as part of
152
+ // TReportExecStatusParams
153
+ struct TInsertExecStatus {
154
+ // Number of rows appended by an INSERT, per-partition.
155
+ // The keys represent partitions to create, coded as k1=v1/k2=v2/k3=v3..., with the
156
+ // root in an unpartitioned table being the empty string.
157
+ // The target table name is recorded in the corresponding TQueryExecRequest
158
+ 1: optional map<string, i64> num_appended_rows
159
+
160
+ // A map from temporary absolute file path to final absolute destination. The
161
+ // coordinator performs these updates after the query completes.
162
+ 2: required map<string, string> files_to_move;
163
+ }
164
+
165
+ struct TReportExecStatusParams {
166
+ 1: required ImpalaInternalServiceVersion protocol_version
167
+
168
+ // required in V1
169
+ 2: optional Types.TUniqueId query_id
170
+
171
+ // passed into ExecPlanFragment() as TExecPlanFragmentParams.backend_num
172
+ // required in V1
173
+ 3: optional i32 backend_num
174
+
175
+ // required in V1
176
+ 4: optional Types.TUniqueId fragment_instance_id
177
+
178
+ // Status of fragment execution; any error status means it's done.
179
+ // required in V1
180
+ 5: optional Status.TStatus status
181
+
182
+ // If true, fragment finished executing.
183
+ // required in V1
184
+ 6: optional bool done
185
+
186
+ // cumulative profile
187
+ // required in V1
188
+ 7: optional RuntimeProfile.TRuntimeProfileTree profile
189
+
190
+ // Cumulative structural changes made by a table sink
191
+ // optional in V1
192
+ 8: optional TInsertExecStatus insert_exec_status;
193
+
194
+ // New errors that have not been reported to the coordinator
195
+ // optional in V1
196
+ 9: optional list<string> error_log
197
+ }
198
+
199
+ struct TReportExecStatusResult {
200
+ // required in V1
201
+ 1: optional Status.TStatus status
202
+ }
203
+
204
+
205
+ // CancelPlanFragment
206
+
207
+ struct TCancelPlanFragmentParams {
208
+ 1: required ImpalaInternalServiceVersion protocol_version
209
+
210
+ // required in V1
211
+ 2: optional Types.TUniqueId fragment_instance_id
212
+ }
213
+
214
+ struct TCancelPlanFragmentResult {
215
+ // required in V1
216
+ 1: optional Status.TStatus status
217
+ }
218
+
219
+
220
+ // TransmitData
221
+
222
+ struct TTransmitDataParams {
223
+ 1: required ImpalaInternalServiceVersion protocol_version
224
+
225
+ // required in V1
226
+ 2: optional Types.TUniqueId dest_fragment_instance_id
227
+
228
+ // for debugging purposes; currently ignored
229
+ //3: optional Types.TUniqueId src_fragment_instance_id
230
+
231
+ // required in V1
232
+ 4: optional Types.TPlanNodeId dest_node_id
233
+
234
+ // required in V1
235
+ 5: optional Data.TRowBatch row_batch
236
+
237
+ // if set to true, indicates that no more row batches will be sent
238
+ // for this dest_node_id
239
+ 6: optional bool eos
240
+ }
241
+
242
+ struct TTransmitDataResult {
243
+ // required in V1
244
+ 1: optional Status.TStatus status
245
+ }
246
+
247
+
248
+ service ImpalaInternalService {
249
+ // Called by coord to start asynchronous execution of plan fragment in backend.
250
+ // Returns as soon as all incoming data streams have been set up.
251
+ TExecPlanFragmentResult ExecPlanFragment(1:TExecPlanFragmentParams params);
252
+
253
+ // Periodically called by backend to report status of plan fragment execution
254
+ // back to coord; also called when execution is finished, for whatever reason.
255
+ TReportExecStatusResult ReportExecStatus(1:TReportExecStatusParams params);
256
+
257
+ // Called by coord to cancel execution of a single plan fragment, which this
258
+ // coordinator initiated with a prior call to ExecPlanFragment.
259
+ // Cancellation is asynchronous.
260
+ TCancelPlanFragmentResult CancelPlanFragment(1:TCancelPlanFragmentParams params);
261
+
262
+ // Called by sender to transmit single row batch. Returns error indication
263
+ // if params.fragmentId or params.destNodeId are unknown or if data couldn't be read.
264
+ TTransmitDataResult TransmitData(1:TTransmitDataParams params);
265
+ }