impala 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +28 -0
  5. data/Rakefile +15 -0
  6. data/impala.gemspec +25 -0
  7. data/lib/impala.rb +33 -0
  8. data/lib/impala/connection.rb +93 -0
  9. data/lib/impala/cursor.rb +86 -0
  10. data/lib/impala/protocol.rb +6 -0
  11. data/lib/impala/protocol/beeswax_constants.rb +14 -0
  12. data/lib/impala/protocol/beeswax_service.rb +747 -0
  13. data/lib/impala/protocol/beeswax_types.rb +192 -0
  14. data/lib/impala/protocol/data_constants.rb +12 -0
  15. data/lib/impala/protocol/data_sinks_constants.rb +12 -0
  16. data/lib/impala/protocol/data_sinks_types.rb +107 -0
  17. data/lib/impala/protocol/data_types.rb +77 -0
  18. data/lib/impala/protocol/descriptors_constants.rb +12 -0
  19. data/lib/impala/protocol/descriptors_types.rb +266 -0
  20. data/lib/impala/protocol/exprs_constants.rb +12 -0
  21. data/lib/impala/protocol/exprs_types.rb +345 -0
  22. data/lib/impala/protocol/facebook_service.rb +706 -0
  23. data/lib/impala/protocol/fb303_constants.rb +14 -0
  24. data/lib/impala/protocol/fb303_types.rb +24 -0
  25. data/lib/impala/protocol/frontend_constants.rb +12 -0
  26. data/lib/impala/protocol/frontend_types.rb +347 -0
  27. data/lib/impala/protocol/hive_metastore_constants.rb +52 -0
  28. data/lib/impala/protocol/hive_metastore_types.rb +697 -0
  29. data/lib/impala/protocol/impala_internal_service.rb +244 -0
  30. data/lib/impala/protocol/impala_internal_service_constants.rb +12 -0
  31. data/lib/impala/protocol/impala_internal_service_types.rb +362 -0
  32. data/lib/impala/protocol/impala_plan_service.rb +310 -0
  33. data/lib/impala/protocol/impala_plan_service_constants.rb +12 -0
  34. data/lib/impala/protocol/impala_plan_service_types.rb +36 -0
  35. data/lib/impala/protocol/impala_service.rb +260 -0
  36. data/lib/impala/protocol/impala_service_constants.rb +12 -0
  37. data/lib/impala/protocol/impala_service_types.rb +46 -0
  38. data/lib/impala/protocol/java_constants_constants.rb +42 -0
  39. data/lib/impala/protocol/java_constants_types.rb +14 -0
  40. data/lib/impala/protocol/opcodes_constants.rb +12 -0
  41. data/lib/impala/protocol/opcodes_types.rb +309 -0
  42. data/lib/impala/protocol/partitions_constants.rb +12 -0
  43. data/lib/impala/protocol/partitions_types.rb +44 -0
  44. data/lib/impala/protocol/plan_nodes_constants.rb +12 -0
  45. data/lib/impala/protocol/plan_nodes_types.rb +345 -0
  46. data/lib/impala/protocol/planner_constants.rb +12 -0
  47. data/lib/impala/protocol/planner_types.rb +78 -0
  48. data/lib/impala/protocol/runtime_profile_constants.rb +12 -0
  49. data/lib/impala/protocol/runtime_profile_types.rb +97 -0
  50. data/lib/impala/protocol/state_store_service.rb +244 -0
  51. data/lib/impala/protocol/state_store_service_constants.rb +12 -0
  52. data/lib/impala/protocol/state_store_service_types.rb +185 -0
  53. data/lib/impala/protocol/state_store_subscriber_service.rb +82 -0
  54. data/lib/impala/protocol/state_store_subscriber_service_constants.rb +12 -0
  55. data/lib/impala/protocol/state_store_subscriber_service_types.rb +67 -0
  56. data/lib/impala/protocol/statestore_types_constants.rb +12 -0
  57. data/lib/impala/protocol/statestore_types_types.rb +77 -0
  58. data/lib/impala/protocol/status_constants.rb +12 -0
  59. data/lib/impala/protocol/status_types.rb +44 -0
  60. data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
  61. data/lib/impala/protocol/types_constants.rb +12 -0
  62. data/lib/impala/protocol/types_types.rb +86 -0
  63. data/lib/impala/version.rb +3 -0
  64. data/thrift/Data.thrift +52 -0
  65. data/thrift/DataSinks.thrift +61 -0
  66. data/thrift/Descriptors.thrift +115 -0
  67. data/thrift/Exprs.thrift +134 -0
  68. data/thrift/Frontend.thrift +193 -0
  69. data/thrift/ImpalaInternalService.thrift +265 -0
  70. data/thrift/ImpalaPlanService.thrift +44 -0
  71. data/thrift/ImpalaService.thrift +105 -0
  72. data/thrift/JavaConstants.thrift +60 -0
  73. data/thrift/Opcodes.thrift +317 -0
  74. data/thrift/Partitions.thrift +41 -0
  75. data/thrift/PlanNodes.thrift +184 -0
  76. data/thrift/Planner.thrift +72 -0
  77. data/thrift/RuntimeProfile.thrift +58 -0
  78. data/thrift/StateStoreService.thrift +121 -0
  79. data/thrift/StateStoreSubscriberService.thrift +64 -0
  80. data/thrift/StatestoreTypes.thrift +50 -0
  81. data/thrift/Status.thrift +31 -0
  82. data/thrift/Types.thrift +71 -0
  83. data/thrift/beeswax.thrift +175 -0
  84. data/thrift/fb303.thrift +112 -0
  85. data/thrift/hive_metastore.thrift +528 -0
  86. metadata +206 -0
@@ -0,0 +1,193 @@
1
+ // Copyright 2012 Cloudera Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ namespace cpp impala
16
+ namespace java com.cloudera.impala.thrift
17
+ namespace rb Impala.Protocol
18
+
19
+ include "Types.thrift"
20
+ include "ImpalaInternalService.thrift"
21
+ include "PlanNodes.thrift"
22
+ include "Planner.thrift"
23
+ include "Descriptors.thrift"
24
+
25
+ // These are supporting structs for JniFrontend.java, which serves as the glue
26
+ // between our C++ execution environment and the Java frontend.
27
+
28
+ // Arguments to getTableNames, which returns a list of tables that match an
29
+ // optional pattern.
30
+ struct TGetTablesParams {
31
+ // If not set, match tables in all DBs
32
+ 1: optional string db
33
+
34
+ // If not set, match every table
35
+ 2: optional string pattern
36
+ }
37
+
38
+ // getTableNames returns a list of unqualified table names
39
+ struct TGetTablesResult {
40
+ 1: list<string> tables
41
+ }
42
+
43
+ // Arguments to getDbNames, which returns a list of dbs that match an optional
44
+ // pattern
45
+ struct TGetDbsParams {
46
+ // If not set, match every database
47
+ 1: optional string pattern
48
+ }
49
+
50
+ // getDbNames returns a list of database names
51
+ struct TGetDbsResult {
52
+ 1: list<string> dbs
53
+ }
54
+
55
+ struct TColumnDesc {
56
+ 1: required string columnName
57
+ 2: required Types.TPrimitiveType columnType
58
+ }
59
+
60
+ // Arguments to DescribeTable, which returns a list of column descriptors for a
61
+ // given table
62
+ struct TDescribeTableParams {
63
+ 1: optional string db
64
+ 2: required string table_name
65
+ }
66
+
67
+ // Results of a call to describeTable()
68
+ struct TDescribeTableResult {
69
+ 1: required list<TColumnDesc> columns
70
+ }
71
+
72
+ // Per-client session state
73
+ struct TSessionState {
74
+ // The default database, changed by USE <database> queries.
75
+ 1: required string database
76
+ }
77
+
78
+ struct TClientRequest {
79
+ // select stmt to be executed
80
+ 1: required string stmt
81
+
82
+ // query options
83
+ 2: required ImpalaInternalService.TQueryOptions queryOptions
84
+
85
+ // session state
86
+ 3: required TSessionState sessionState;
87
+ }
88
+
89
+ struct TResultSetMetadata {
90
+ 1: required list<TColumnDesc> columnDescs
91
+ }
92
+
93
+ // Describes a set of changes to make to the metastore
94
+ struct TCatalogUpdate {
95
+ // Unqualified name of the table to change
96
+ 1: required string target_table;
97
+
98
+ // Database that the table belongs to
99
+ 2: required string db_name;
100
+
101
+ // List of partitions that are new and need to be created. May
102
+ // include the root partition (represented by the empty string).
103
+ 3: required set<string> created_partitions;
104
+ }
105
+
106
+ // Metadata required to finalize a query - that is, to clean up after the query is done.
107
+ // Only relevant for INSERT queries.
108
+ struct TFinalizeParams {
109
+ // True if the INSERT query was OVERWRITE, rather than INTO
110
+ 1: required bool is_overwrite;
111
+
112
+ // The base directory in hdfs of the table targeted by this INSERT
113
+ 2: required string hdfs_base_dir;
114
+
115
+ // The target table name
116
+ 3: required string table_name;
117
+
118
+ // The target table database
119
+ 4: required string table_db;
120
+ }
121
+
122
+ // Result of call to ImpalaPlanService/JniFrontend.CreateQueryRequest()
123
+ struct TQueryExecRequest {
124
+ // global descriptor tbl for all fragments
125
+ 1: optional Descriptors.TDescriptorTable desc_tbl
126
+
127
+ // fragments[i] may consume the output of fragments[j > i];
128
+ // fragments[0] is the root fragment and also the coordinator fragment, if
129
+ // it is unpartitioned.
130
+ 2: required list<Planner.TPlanFragment> fragments
131
+
132
+ // Specifies the destination fragment of the output of each fragment.
133
+ // parent_fragment_idx.size() == fragments.size() - 1 and
134
+ // fragments[i] sends its output to fragments[dest_fragment_idx[i-1]]
135
+ 3: optional list<i32> dest_fragment_idx
136
+
137
+ // A map from scan node ids to a list of scan range locations.
138
+ // The node ids refer to scan nodes in fragments[].plan_tree
139
+ 4: optional map<Types.TPlanNodeId, list<Planner.TScanRangeLocations>>
140
+ per_node_scan_ranges
141
+
142
+ // Metadata of the query result set (only for select)
143
+ 5: optional TResultSetMetadata result_set_metadata
144
+
145
+ // Set if the query needs finalization after it executes
146
+ 6: optional TFinalizeParams finalize_params
147
+
148
+ 7: required ImpalaInternalService.TQueryGlobals query_globals
149
+ }
150
+
151
+ enum TDdlType {
152
+ SHOW_TABLES,
153
+ SHOW_DBS,
154
+ USE,
155
+ DESCRIBE
156
+ }
157
+
158
+ struct TDdlExecRequest {
159
+ 1: required TDdlType ddl_type
160
+
161
+ // Used for USE and DESCRIBE
162
+ 2: optional string database;
163
+
164
+ // Table name (not fully-qualified) for DESCRIBE
165
+ 3: optional string describe_table;
166
+
167
+ // Patterns to match table names against for SHOW
168
+ 4: optional string show_pattern;
169
+ }
170
+
171
+ // Result of call to createExecRequest()
172
+ struct TExecRequest {
173
+ 1: required Types.TStmtType stmt_type;
174
+
175
+ 2: optional string sql_stmt;
176
+
177
+ // Globally unique id for this request. Assigned by the planner.
178
+ 3: required Types.TUniqueId request_id
179
+
180
+ // Copied from the corresponding TClientRequest
181
+ 4: required ImpalaInternalService.TQueryOptions query_options;
182
+
183
+ // TQueryExecRequest for the backend
184
+ // Set iff stmt_type is QUERY or DML
185
+ 5: optional TQueryExecRequest query_exec_request
186
+
187
+ // Set iff stmt_type is DDL
188
+ 6: optional TDdlExecRequest ddl_exec_request
189
+
190
+ // Metadata of the query result set (not set for DML)
191
+ 7: optional TResultSetMetadata result_set_metadata
192
+ }
193
+
@@ -0,0 +1,265 @@
1
+ // Copyright 2012 Cloudera Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ //
16
+ // This file contains the details of the protocol between coordinators and backends.
17
+
18
+ namespace cpp impala
19
+ namespace java com.cloudera.impala.thrift
20
+ namespace rb Impala.Protocol
21
+
22
+ include "Status.thrift"
23
+ include "Types.thrift"
24
+ include "Exprs.thrift"
25
+ include "Descriptors.thrift"
26
+ include "PlanNodes.thrift"
27
+ include "Planner.thrift"
28
+ include "DataSinks.thrift"
29
+ include "Data.thrift"
30
+ include "RuntimeProfile.thrift"
31
+ include "ImpalaService.thrift"
32
+ include "JavaConstants.thrift"
33
+
34
+ // Query options that correspond to ImpalaService.ImpalaQueryOptions
35
+ // TODO: make all of these optional, otherwise it will be impossible to
36
+ // retire options and do rolling upgrades between releases
37
+ struct TQueryOptions {
38
+ 1: required bool abort_on_error = 0
39
+ 2: required i32 max_errors = 0
40
+ 3: required bool disable_codegen = 0
41
+ 4: required i32 batch_size = 0
42
+
43
+ // return_as_ascii is not listed in ImpalaService.ImpalaQueryOptions because Beeswax
44
+ // should only return ascii. This option is only for internal testing.
45
+ // if true, return query results in ASCII format (TColumnValue.stringVal),
46
+ // otherwise return results in their native format (each TColumnValue
47
+ // uses the field corresponding to the column's native type).
48
+ 5: required bool return_as_ascii = 1
49
+
50
+ 6: required i32 num_nodes = JavaConstants.NUM_NODES_ALL
51
+ 7: required i64 max_scan_range_length = 0
52
+ 8: required i32 num_scanner_threads = 0
53
+ 9: required i32 max_io_buffers = 0
54
+ 10: required bool allow_unsupported_formats = 0
55
+ 11: optional i64 default_order_by_limit = -1
56
+ }
57
+
58
+ // A scan range plus the parameters needed to execute that scan.
59
+ struct TScanRangeParams {
60
+ 1: required PlanNodes.TScanRange scan_range
61
+ 2: optional i32 volume_id = -1
62
+ }
63
+
64
+ // Specification of one output destination of a plan fragment
65
+ struct TPlanFragmentDestination {
66
+ // the globally unique fragment instance id
67
+ 1: required Types.TUniqueId fragment_instance_id
68
+
69
+ // ... which is being executed on this server
70
+ 2: required Types.THostPort server
71
+ }
72
+
73
+ // Parameters for a single execution instance of a particular TPlanFragment
74
+ // TODO: for range partitioning, we also need to specify the range boundaries
75
+ struct TPlanFragmentExecParams {
76
+ // a globally unique id assigned to the entire query
77
+ 1: required Types.TUniqueId query_id
78
+
79
+ // a globally unique id assigned to this particular execution instance of
80
+ // a TPlanFragment
81
+ 2: required Types.TUniqueId fragment_instance_id
82
+
83
+ // initial scan ranges for each scan node in TPlanFragment.plan_tree
84
+ 3: required map<Types.TPlanNodeId, list<TScanRangeParams>> per_node_scan_ranges
85
+
86
+ // number of senders for ExchangeNodes contained in TPlanFragment.plan_tree;
87
+ // needed to create a DataStreamRecvr
88
+ 4: required map<Types.TPlanNodeId, i32> per_exch_num_senders
89
+
90
+ // Output destinations, one per output partition.
91
+ // The partitioning of the output is specified by
92
+ // TPlanFragment.output_sink.output_partition.
93
+ // The number of output partitions is destinations.size().
94
+ 5: list<TPlanFragmentDestination> destinations
95
+ }
96
+
97
+ // Global query parameters assigned by the coordinator.
98
+ struct TQueryGlobals {
99
+ // String containing a timestamp set as the current time.
100
+ 1: required string now_string
101
+ }
102
+
103
+
104
+ // Service Protocol Details
105
+
106
+ enum ImpalaInternalServiceVersion {
107
+ V1
108
+ }
109
+
110
+
111
+ // ExecPlanFragment
112
+
113
+ struct TExecPlanFragmentParams {
114
+ 1: required ImpalaInternalServiceVersion protocol_version
115
+
116
+ // required in V1
117
+ 2: optional Planner.TPlanFragment fragment
118
+
119
+ // required in V1
120
+ 3: optional Descriptors.TDescriptorTable desc_tbl
121
+
122
+ // required in V1
123
+ 4: optional TPlanFragmentExecParams params
124
+
125
+ // Initiating coordinator.
126
+ // TODO: determine whether we can get this somehow via the Thrift rpc mechanism.
127
+ // required in V1
128
+ 5: optional Types.THostPort coord
129
+
130
+ // backend number assigned by coord to identify backend
131
+ // required in V1
132
+ 6: optional i32 backend_num
133
+
134
+ // Global query parameters assigned by coordinator.
135
+ // required in V1
136
+ 7: optional TQueryGlobals query_globals
137
+
138
+ // options for the query
139
+ // required in V1
140
+ 8: optional TQueryOptions query_options
141
+ }
142
+
143
+ struct TExecPlanFragmentResult {
144
+ // required in V1
145
+ 1: optional Status.TStatus status
146
+ }
147
+
148
+
149
+ // ReportExecStatus
150
+
151
+ // The results of an INSERT query, sent to the coordinator as part of
152
+ // TReportExecStatusParams
153
+ struct TInsertExecStatus {
154
+ // Number of rows appended by an INSERT, per-partition.
155
+ // The keys represent partitions to create, coded as k1=v1/k2=v2/k3=v3..., with the
156
+ // root in an unpartitioned table being the empty string.
157
+ // The target table name is recorded in the corresponding TQueryExecRequest
158
+ 1: optional map<string, i64> num_appended_rows
159
+
160
+ // A map from temporary absolute file path to final absolute destination. The
161
+ // coordinator performs these updates after the query completes.
162
+ 2: required map<string, string> files_to_move;
163
+ }
164
+
165
+ struct TReportExecStatusParams {
166
+ 1: required ImpalaInternalServiceVersion protocol_version
167
+
168
+ // required in V1
169
+ 2: optional Types.TUniqueId query_id
170
+
171
+ // passed into ExecPlanFragment() as TExecPlanFragmentParams.backend_num
172
+ // required in V1
173
+ 3: optional i32 backend_num
174
+
175
+ // required in V1
176
+ 4: optional Types.TUniqueId fragment_instance_id
177
+
178
+ // Status of fragment execution; any error status means it's done.
179
+ // required in V1
180
+ 5: optional Status.TStatus status
181
+
182
+ // If true, fragment finished executing.
183
+ // required in V1
184
+ 6: optional bool done
185
+
186
+ // cumulative profile
187
+ // required in V1
188
+ 7: optional RuntimeProfile.TRuntimeProfileTree profile
189
+
190
+ // Cumulative structural changes made by a table sink
191
+ // optional in V1
192
+ 8: optional TInsertExecStatus insert_exec_status;
193
+
194
+ // New errors that have not been reported to the coordinator
195
+ // optional in V1
196
+ 9: optional list<string> error_log
197
+ }
198
+
199
+ struct TReportExecStatusResult {
200
+ // required in V1
201
+ 1: optional Status.TStatus status
202
+ }
203
+
204
+
205
+ // CancelPlanFragment
206
+
207
+ struct TCancelPlanFragmentParams {
208
+ 1: required ImpalaInternalServiceVersion protocol_version
209
+
210
+ // required in V1
211
+ 2: optional Types.TUniqueId fragment_instance_id
212
+ }
213
+
214
+ struct TCancelPlanFragmentResult {
215
+ // required in V1
216
+ 1: optional Status.TStatus status
217
+ }
218
+
219
+
220
+ // TransmitData
221
+
222
+ struct TTransmitDataParams {
223
+ 1: required ImpalaInternalServiceVersion protocol_version
224
+
225
+ // required in V1
226
+ 2: optional Types.TUniqueId dest_fragment_instance_id
227
+
228
+ // for debugging purposes; currently ignored
229
+ //3: optional Types.TUniqueId src_fragment_instance_id
230
+
231
+ // required in V1
232
+ 4: optional Types.TPlanNodeId dest_node_id
233
+
234
+ // required in V1
235
+ 5: optional Data.TRowBatch row_batch
236
+
237
+ // if set to true, indicates that no more row batches will be sent
238
+ // for this dest_node_id
239
+ 6: optional bool eos
240
+ }
241
+
242
+ struct TTransmitDataResult {
243
+ // required in V1
244
+ 1: optional Status.TStatus status
245
+ }
246
+
247
+
248
+ service ImpalaInternalService {
249
+ // Called by coord to start asynchronous execution of plan fragment in backend.
250
+ // Returns as soon as all incoming data streams have been set up.
251
+ TExecPlanFragmentResult ExecPlanFragment(1:TExecPlanFragmentParams params);
252
+
253
+ // Periodically called by backend to report status of plan fragment execution
254
+ // back to coord; also called when execution is finished, for whatever reason.
255
+ TReportExecStatusResult ReportExecStatus(1:TReportExecStatusParams params);
256
+
257
+ // Called by coord to cancel execution of a single plan fragment, which this
258
+ // coordinator initiated with a prior call to ExecPlanFragment.
259
+ // Cancellation is asynchronous.
260
+ TCancelPlanFragmentResult CancelPlanFragment(1:TCancelPlanFragmentParams params);
261
+
262
+ // Called by sender to transmit single row batch. Returns error indication
263
+ // if params.fragmentId or params.destNodeId are unknown or if data couldn't be read.
264
+ TTransmitDataResult TransmitData(1:TTransmitDataParams params);
265
+ }