bigrecord-driver 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bin/bigrecord-driver +41 -40
- data/bin/hbase-driver +12 -12
- data/lib/big_record_driver.rb +13 -6
- data/lib/big_record_driver/client.rb +32 -29
- data/lib/big_record_driver/column_descriptor.rb +13 -16
- data/lib/big_record_driver/exceptions.rb +12 -10
- data/lib/big_record_driver/hbase_driver/server.rb +351 -320
- data/lib/big_record_driver/server.rb +123 -0
- data/lib/big_record_driver/version.rb +4 -2
- data/test/abstract_test_client.rb +5 -6
- data/test/test_client_hbase.rb +13 -17
- metadata +3 -5
- data/lib/big_record_driver/bigrecord_server.rb +0 -119
- data/lib/big_record_driver/driver_manager.rb +0 -34
- data/test/test_driver_manager.rb +0 -46
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
data/bin/bigrecord-driver
CHANGED
@@ -62,7 +62,7 @@ unset RUBYLIB
|
|
62
62
|
if [ "$DRIVERNAME" == "" ]; then
|
63
63
|
echo "Driver type was not specified"
|
64
64
|
exit 1
|
65
|
-
elif [ "$DRIVERNAME" != "hbase"
|
65
|
+
elif [ "$DRIVERNAME" != "hbase" ]; then
|
66
66
|
echo "Driver type \"$DRIVERNAME\" is invalid"
|
67
67
|
exit 1
|
68
68
|
fi
|
@@ -126,30 +126,30 @@ start() {
|
|
126
126
|
refresh_pids
|
127
127
|
|
128
128
|
if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
|
129
|
-
|
130
|
-
|
129
|
+
echo -e "\nAlready running (pid="$PID")."
|
130
|
+
exit 1
|
131
131
|
else
|
132
132
|
rm -f $LOGS_DIR/$PORT.log
|
133
133
|
nohup jruby $DRIVER $PORT >> $LOGS_DIR/$PORT.log 2>&1 < /dev/null &
|
134
134
|
PID=$!
|
135
135
|
if [ "$PID" != "" ] ; then
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
136
|
+
# monitor the log file for the message saying that the server is started
|
137
|
+
for ((i=0; i<$STARTUP_TIMEOUT; i+=1)); do
|
138
|
+
sleep 1
|
139
|
+
echo -n "."
|
140
|
+
if [ "$(cat $LOGS_DIR/$PORT.log | grep 'Started drb server')" != "" ] ; then
|
141
|
+
break
|
142
|
+
fi
|
143
|
+
done
|
144
|
+
|
145
|
+
if [ "$i" == $STARTUP_TIMEOUT ] ; then
|
146
|
+
echo -e "\nStartup timeout: couldn't start the DRb server."
|
147
|
+
else
|
148
|
+
echo $PID > $PIDS_DIR/$PORT.pid
|
149
|
+
fi
|
150
|
+
echo ""
|
151
151
|
else
|
152
|
-
|
152
|
+
echo -e "\nAn error occured while starting the DRb server."
|
153
153
|
fi
|
154
154
|
fi
|
155
155
|
}
|
@@ -161,8 +161,8 @@ stop() {
|
|
161
161
|
|
162
162
|
if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
|
163
163
|
echo "Stopping driver (pid = $PID)." >> $LOGS_DIR/$PORT.log
|
164
|
-
|
165
|
-
|
164
|
+
kill $PID
|
165
|
+
rm $PIDS_DIR/$PORT.pid
|
166
166
|
else
|
167
167
|
echo "No $DRIVERNAME driver to kill."
|
168
168
|
fi
|
@@ -183,24 +183,25 @@ start_debug() {
|
|
183
183
|
}
|
184
184
|
|
185
185
|
case "$ACTION" in
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
186
|
+
start)
|
187
|
+
start
|
188
|
+
;;
|
189
|
+
stop)
|
190
|
+
stop
|
191
|
+
;;
|
192
|
+
restart)
|
193
|
+
stop
|
194
|
+
start
|
195
|
+
;;
|
196
|
+
status)
|
197
|
+
status
|
198
|
+
;;
|
199
|
+
start_debug)
|
200
|
+
start_debug
|
201
|
+
;;
|
202
|
+
*)
|
203
|
+
print_usage
|
204
|
+
exit 1
|
205
205
|
esac
|
206
|
+
|
206
207
|
exit 0
|
data/bin/hbase-driver
CHANGED
@@ -11,22 +11,22 @@ if ARGV.include?("-l")
|
|
11
11
|
|
12
12
|
# Make sure that the folder exists
|
13
13
|
if File.exists?(hbase_path)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
14
|
+
# We're only going to include the jar files we need.
|
15
|
+
required_jars = Dir[hbase_path+"/*.jar",
|
16
|
+
hbase_path+"/lib/commons-logging*.jar",
|
17
|
+
hbase_path+"/lib/zookeeper*.jar",
|
18
|
+
hbase_path+"/lib/log4j*.jar",
|
19
|
+
hbase_path+"/lib/hadoop*.jar"]
|
20
|
+
classpath = required_jars.join(":")
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
args.delete_at(switch_index)
|
23
|
+
args.delete_at(switch_index)
|
24
|
+
args = args + ["-c", '"'+classpath+'"']
|
25
25
|
|
26
26
|
# Otherwise we'll warn the user and quit
|
27
27
|
else
|
28
|
-
|
29
|
-
|
28
|
+
puts "Folder #{hbase_path} does not exist"
|
29
|
+
exit
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
data/lib/big_record_driver.rb
CHANGED
@@ -1,7 +1,14 @@
|
|
1
|
-
|
1
|
+
module BigRecord
|
2
|
+
module Driver
|
3
|
+
end
|
4
|
+
end
|
2
5
|
|
3
|
-
|
4
|
-
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
6
|
+
BRD_ROOT = File.dirname(__FILE__)
|
7
|
+
|
8
|
+
require BRD_ROOT + '/big_record_driver/client'
|
9
|
+
require BRD_ROOT + '/big_record_driver/exceptions'
|
10
|
+
require BRD_ROOT + '/big_record_driver/column_descriptor'
|
11
|
+
require BRD_ROOT + '/big_record_driver/version'
|
12
|
+
|
13
|
+
# Aliasing the old namespace
|
14
|
+
BigRecordDriver = BigRecord::Driver
|
@@ -1,36 +1,39 @@
|
|
1
|
-
require '
|
2
|
-
require 'activesupport'
|
1
|
+
require 'active_support'
|
3
2
|
require 'set'
|
4
3
|
require 'drb'
|
5
4
|
|
6
|
-
module
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
config
|
11
|
-
|
12
|
-
config
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
5
|
+
module BigRecord
|
6
|
+
module Driver
|
7
|
+
|
8
|
+
class Client
|
9
|
+
attr_accessor :config, :server
|
10
|
+
|
11
|
+
def initialize(config={}) # :nodoc:
|
12
|
+
config = config.symbolize_keys
|
13
|
+
config[:drb_host] ||= '127.0.0.1'
|
14
|
+
config[:drb_port] ||= 40000
|
15
|
+
|
16
|
+
@config = config
|
17
|
+
|
18
|
+
DRb.start_service nil
|
19
|
+
begin
|
20
|
+
@server = DRbObject.new(nil, "druby://#{@config[:drb_host]}:#{@config[:drb_port]}")
|
21
|
+
rescue DRb::DRbConnError
|
22
|
+
raise ConnectionError, "Failed to connect to the DRb server (jruby) " +
|
23
|
+
"at #{@config[:drb_host]}:#{@config[:drb_port]}."
|
24
|
+
end
|
25
|
+
@server.configure(@config)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Delegate the methods to the server
|
29
|
+
def method_missing(method, *args)
|
30
|
+
@server.send(method, *args)
|
31
|
+
end
|
32
|
+
|
33
|
+
def respond_to?(method)
|
34
|
+
super
|
22
35
|
end
|
23
|
-
@server.configure(@config)
|
24
|
-
end
|
25
|
-
|
26
|
-
# Delegate the methods to the server
|
27
|
-
def method_missing(method, *args)
|
28
|
-
@server.send(method, *args)
|
29
|
-
end
|
30
|
-
|
31
|
-
def respond_to?(method)
|
32
|
-
super
|
33
36
|
end
|
34
|
-
|
37
|
+
|
35
38
|
end
|
36
39
|
end
|
@@ -1,23 +1,20 @@
|
|
1
|
-
module
|
1
|
+
module BigRecord
|
2
|
+
module Driver
|
2
3
|
|
3
|
-
|
4
|
+
class ColumnDescriptor
|
5
|
+
attr_accessor :name, :versions, :in_memory, :bloom_filter, :compression
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
attr_accessor :in_memory
|
8
|
-
attr_accessor :bloom_filter
|
9
|
-
attr_accessor :compression
|
7
|
+
def initialize(name, options={})
|
8
|
+
raise ArgumentError, "name is mandatory" unless name
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
@bloom_filter = options[:bloom_filter]
|
18
|
-
@compression = options[:compression]
|
10
|
+
@name = name.to_s
|
11
|
+
@versions = options[:versions]
|
12
|
+
@in_memory = options[:in_memory]
|
13
|
+
@bloom_filter = options[:bloom_filter]
|
14
|
+
@compression = options[:compression]
|
15
|
+
end
|
19
16
|
end
|
20
17
|
|
21
18
|
end
|
22
|
-
|
23
19
|
end
|
20
|
+
|
@@ -1,12 +1,14 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
1
|
+
module BigRecord
|
2
|
+
module Driver
|
3
|
+
class DriverError < StandardError
|
4
|
+
end
|
5
|
+
class TableNotFound < DriverError
|
6
|
+
end
|
7
|
+
class TableAlreadyExists < DriverError
|
8
|
+
end
|
9
|
+
class JavaError < DriverError
|
10
|
+
end
|
11
|
+
class ConnectionError < DriverError
|
12
|
+
end
|
11
13
|
end
|
12
14
|
end
|
@@ -1,396 +1,427 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/../column_descriptor'
|
2
2
|
require File.dirname(__FILE__) + '/../exceptions'
|
3
|
-
require File.dirname(__FILE__) + '/../
|
3
|
+
require File.dirname(__FILE__) + '/../server'
|
4
4
|
|
5
|
-
module
|
5
|
+
module BigRecord
|
6
|
+
module Driver
|
6
7
|
|
7
|
-
class HbaseServer <
|
8
|
-
|
8
|
+
class HbaseServer < Server
|
9
|
+
java_import "java.util.TreeMap"
|
10
|
+
include_package "org.apache.hadoop.hbase.client"
|
11
|
+
java_import "org.apache.hadoop.hbase.KeyValue"
|
12
|
+
java_import "org.apache.hadoop.hbase.io.hfile.Compression"
|
13
|
+
java_import "org.apache.hadoop.hbase.HBaseConfiguration"
|
14
|
+
java_import "org.apache.hadoop.hbase.HTableDescriptor"
|
15
|
+
java_import "org.apache.hadoop.hbase.HColumnDescriptor"
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
include_class "org.apache.hadoop.hbase.HBaseConfiguration"
|
15
|
-
include_class "org.apache.hadoop.hbase.HConstants"
|
16
|
-
include_class "org.apache.hadoop.hbase.HStoreKey"
|
17
|
-
include_class "org.apache.hadoop.hbase.HTableDescriptor"
|
18
|
-
include_class "org.apache.hadoop.hbase.HColumnDescriptor"
|
17
|
+
# Establish the connection with HBase with the given configuration parameters.
|
18
|
+
def configure(config = {})
|
19
|
+
config[:zookeeper_quorum] ||= 'localhost'
|
20
|
+
config[:zookeeper_client_port] ||= '2181'
|
19
21
|
|
20
|
-
|
22
|
+
@config = config
|
21
23
|
|
22
|
-
|
23
|
-
|
24
|
-
config[:zookeeper_quorum] ||= 'localhost'
|
25
|
-
config[:zookeeper_client_port] ||= '2181'
|
24
|
+
init_connection
|
25
|
+
end
|
26
26
|
|
27
|
-
|
27
|
+
# Atomic row insertion/update. Example:
|
28
|
+
# update('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', {'attribute:name' => "--- Oahu\n",
|
29
|
+
# 'attribute:travel_rank' => "--- 0.90124565\n"})
|
30
|
+
# => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
|
31
|
+
def update(table_name, row, values, timestamp=nil)
|
32
|
+
safe_exec do
|
33
|
+
return nil unless row
|
28
34
|
|
29
|
-
|
30
|
-
|
35
|
+
table = connect_table(table_name)
|
36
|
+
row_lock = table.lockRow(row.to_bytes)
|
31
37
|
|
32
|
-
|
33
|
-
|
34
|
-
# 'attribute:travel_rank' => "--- 0.90124565\n"})
|
35
|
-
# => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
|
36
|
-
def update(table_name, row, values, timestamp=nil)
|
37
|
-
safe_exec do
|
38
|
-
return nil unless row
|
39
|
-
table = connect_table(table_name)
|
38
|
+
put = generate_put(row, values, timestamp, row_lock)
|
39
|
+
table.put(put)
|
40
40
|
|
41
|
-
|
41
|
+
table.unlockRow(row_lock)
|
42
42
|
|
43
|
-
|
44
|
-
|
43
|
+
row
|
44
|
+
end
|
45
45
|
end
|
46
46
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
47
|
+
# Returns a column of a row. Example:
|
48
|
+
# get('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 'attribute:travel_rank')
|
49
|
+
# => "--- 0.90124565\n"
|
50
|
+
#
|
51
|
+
# valid options:
|
52
|
+
# :timestamp => integer corresponding to the time when the record was saved in hbase
|
53
|
+
# :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
|
54
|
+
def get(table_name, row, column, options={})
|
55
|
+
safe_exec do
|
56
|
+
return nil unless row
|
57
|
+
|
58
|
+
table = connect_table(table_name)
|
59
|
+
|
60
|
+
# Grab the version number if the client's using the old API,
|
61
|
+
# or retrieve only the lastest version by default
|
62
|
+
options[:versions] ||= options[:num_versions]
|
63
|
+
options[:versions] ||= 1
|
64
|
+
|
65
|
+
# validate the arguments
|
66
|
+
raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
|
67
|
+
|
68
|
+
get = generate_get(row, column, options)
|
69
|
+
result = table.get(get)
|
70
|
+
|
71
|
+
if (result.nil? || result.isEmpty)
|
72
|
+
return (options[:versions] == 1 ? nil : [])
|
73
|
+
else
|
74
|
+
output = result.list.collect do |keyvalue|
|
75
|
+
to_ruby_string(keyvalue.getValue)
|
76
|
+
end
|
51
77
|
|
52
|
-
|
53
|
-
|
54
|
-
# => "--- 0.90124565\n"
|
55
|
-
#
|
56
|
-
# valid options:
|
57
|
-
# :timestamp => integer corresponding to the time when the record was saved in hbase
|
58
|
-
# :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
|
59
|
-
def get(table_name, row, column, options={})
|
60
|
-
safe_exec do
|
61
|
-
return nil unless row
|
62
|
-
table = connect_table(table_name)
|
63
|
-
|
64
|
-
# Retreive only the last version by default
|
65
|
-
options[:versions] ||= options[:num_versions]
|
66
|
-
options[:versions] ||= 1
|
67
|
-
|
68
|
-
# validate the arguments
|
69
|
-
raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
|
70
|
-
|
71
|
-
# get the raw data from hbase
|
72
|
-
unless options[:timestamp]
|
73
|
-
if options[:versions] == 1
|
74
|
-
raw_data = table.get(row, column)
|
75
|
-
else
|
76
|
-
raw_data = table.get(row,
|
77
|
-
column,
|
78
|
-
options[:versions])
|
78
|
+
return (options[:versions] == 1 ? output[0] : output)
|
79
|
+
end
|
79
80
|
end
|
80
|
-
else
|
81
|
-
raw_data = table.get(row,
|
82
|
-
column,
|
83
|
-
options[:timestamp],
|
84
|
-
options[:versions])
|
85
81
|
end
|
86
82
|
|
87
|
-
#
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
83
|
+
# Returns the last version of the given columns of the given row. The columns works with
|
84
|
+
# regular expressions (e.g. 'attribute:' matches all attributes columns). Example:
|
85
|
+
# get_columns('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', ['attribute:'])
|
86
|
+
# => {"attribute:name" => "--- Oahu\n", "attribute:travel_rank" => "--- 0.90124565\n", etc...}
|
87
|
+
def get_columns(table_name, row, columns, options={})
|
88
|
+
safe_exec do
|
89
|
+
return nil unless row
|
90
|
+
|
91
|
+
table_name = table_name.to_s
|
92
|
+
table = connect_table(table_name)
|
93
|
+
|
94
|
+
get = generate_get(row, columns, options)
|
95
|
+
result = table.get(get)
|
96
|
+
|
97
|
+
begin
|
98
|
+
parse_result(result)
|
99
|
+
rescue
|
100
|
+
nil
|
101
|
+
end
|
96
102
|
end
|
97
103
|
end
|
98
|
-
end
|
99
|
-
end
|
100
104
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
table_name = table_name.to_s
|
109
|
-
table = connect_table(table_name)
|
110
|
-
|
111
|
-
java_cols = Java::String[columns.size].new
|
112
|
-
columns.each_with_index do |col, i|
|
113
|
-
java_cols[i] = Java::String.new(col)
|
114
|
-
end
|
105
|
+
# Get consecutive rows. Example to get 100 records starting with the one specified and get all the
|
106
|
+
# columns in the column family 'attribute:' :
|
107
|
+
# get_consecutive_rows('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 100, ['attribute:'])
|
108
|
+
def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
|
109
|
+
safe_exec do
|
110
|
+
table_name = table_name.to_s
|
111
|
+
table = connect_table(table_name)
|
115
112
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
113
|
+
scan = Scan.new
|
114
|
+
scan.setStartRow(start_row.to_bytes) if start_row
|
115
|
+
scan.setStopRow(stop_row.to_bytes) if stop_row
|
116
|
+
|
117
|
+
columns.each do |column|
|
118
|
+
(column[-1,1] == ":") ?
|
119
|
+
scan.addFamily(column.gsub(":", "").to_bytes) :
|
120
|
+
scan.addColumn(column.to_bytes)
|
121
|
+
end
|
122
|
+
|
123
|
+
scanner = table.getScanner(scan)
|
122
124
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
if limit
|
126
|
+
results = scanner.next(limit)
|
127
|
+
else
|
128
|
+
results = []
|
129
|
+
while (row_result = scanner.next) != nil
|
130
|
+
results << row_result
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
output = []
|
135
|
+
results.each do |result|
|
136
|
+
output << parse_result(result)
|
137
|
+
end
|
138
|
+
scanner.close
|
139
|
+
|
140
|
+
return output
|
128
141
|
end
|
129
|
-
values["id"] = row
|
130
|
-
values
|
131
|
-
else
|
132
|
-
nil
|
133
142
|
end
|
134
|
-
end
|
135
|
-
end
|
136
143
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
144
|
+
# Delete a whole row.
|
145
|
+
def delete(table_name, row, timestamp = nil)
|
146
|
+
safe_exec do
|
147
|
+
table = connect_table(table_name)
|
148
|
+
|
149
|
+
if timestamp
|
150
|
+
row_lock = table.lockRow(row.to_bytes)
|
151
|
+
table.delete(Delete.new(row.to_bytes, timestamp, row_lock))
|
152
|
+
table.unlockRow(row_lock)
|
153
|
+
else
|
154
|
+
table.delete(Delete.new(row.to_bytes))
|
155
|
+
end
|
156
|
+
end
|
148
157
|
end
|
149
158
|
|
150
|
-
|
151
|
-
|
159
|
+
# Create a table
|
160
|
+
def create_table(table_name, column_descriptors)
|
161
|
+
safe_exec do
|
162
|
+
table_name = table_name.to_s
|
163
|
+
unless table_exists?(table_name)
|
164
|
+
tdesc = HTableDescriptor.new(table_name)
|
152
165
|
|
153
|
-
|
154
|
-
|
155
|
-
if stop_row
|
156
|
-
scanner = table.getScanner(java_cols, start_row, stop_row, HConstants::LATEST_TIMESTAMP)
|
157
|
-
else
|
158
|
-
scanner = table.getScanner(java_cols, start_row)
|
159
|
-
end
|
166
|
+
column_descriptors.each do |cd|
|
167
|
+
cdesc = generate_column_descriptor(cd)
|
160
168
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
end
|
168
|
-
values = {}
|
169
|
-
row_result.entrySet.each do |entry|
|
170
|
-
column_name = Java::String.new(entry.getKey).to_s
|
171
|
-
data = to_ruby_string(entry.getValue)
|
172
|
-
values[column_name] = data
|
169
|
+
tdesc.addFamily(cdesc)
|
170
|
+
end
|
171
|
+
@admin.createTable(tdesc)
|
172
|
+
else
|
173
|
+
raise TableAlreadyExists, table_name
|
174
|
+
end
|
173
175
|
end
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
176
|
+
end
|
177
|
+
|
178
|
+
# Delete a table
|
179
|
+
def drop_table(table_name)
|
180
|
+
safe_exec do
|
181
|
+
table_name = table_name.to_s
|
182
|
+
|
183
|
+
if @admin.tableExists(table_name)
|
184
|
+
@admin.disableTable(table_name)
|
185
|
+
@admin.deleteTable(table_name)
|
186
|
+
|
187
|
+
# Remove the table connection from the cache
|
188
|
+
@tables.delete(table_name) if @tables.has_key?(table_name)
|
189
|
+
else
|
190
|
+
raise TableNotFound, table_name
|
191
|
+
end
|
178
192
|
end
|
179
193
|
end
|
180
|
-
scanner.close
|
181
|
-
result
|
182
|
-
end
|
183
|
-
end
|
184
194
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
table = connect_table(table_name)
|
189
|
-
timestamp ? table.deleteAll(row.to_bytes, timestamp) : table.deleteAll(row.to_bytes)
|
190
|
-
end
|
191
|
-
end
|
195
|
+
def add_column(table_name, column_descriptor)
|
196
|
+
safe_exec do
|
197
|
+
table_name = table_name.to_s
|
192
198
|
|
193
|
-
|
194
|
-
|
195
|
-
safe_exec do
|
196
|
-
table_name = table_name.to_s
|
197
|
-
unless table_exists?(table_name)
|
198
|
-
tdesc = HTableDescriptor.new(table_name)
|
199
|
+
if @admin.tableExists(table_name)
|
200
|
+
@admin.disableTable(table_name)
|
199
201
|
|
200
|
-
|
201
|
-
|
202
|
+
cdesc = generate_column_descriptor(column_descriptor)
|
203
|
+
@admin.addColumn(table_name, cdesc)
|
202
204
|
|
203
|
-
|
205
|
+
@admin.enableTable(table_name)
|
206
|
+
else
|
207
|
+
raise TableNotFound, table_name
|
208
|
+
end
|
204
209
|
end
|
205
|
-
@admin.createTable(tdesc)
|
206
|
-
else
|
207
|
-
raise BigRecordDriver::TableAlreadyExists, table_name
|
208
210
|
end
|
209
|
-
end
|
210
|
-
end
|
211
211
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
212
|
+
def remove_column(table_name, column_name)
|
213
|
+
safe_exec do
|
214
|
+
table_name = table_name.to_s
|
215
|
+
column_name = column_name.to_s
|
216
|
+
|
217
|
+
if @admin.tableExists(table_name)
|
218
|
+
@admin.disableTable(table_name)
|
216
219
|
|
217
|
-
|
218
|
-
|
219
|
-
@admin.deleteTable(table_name)
|
220
|
+
column_name << ":" unless column_name =~ /:$/
|
221
|
+
@admin.deleteColumn(table_name, column_name)
|
220
222
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
223
|
+
@admin.enableTable(table_name)
|
224
|
+
else
|
225
|
+
raise TableNotFound, table_name
|
226
|
+
end
|
227
|
+
end
|
225
228
|
end
|
226
|
-
end
|
227
|
-
end
|
228
229
|
|
229
|
-
|
230
|
-
|
231
|
-
|
230
|
+
def modify_column(table_name, column_descriptor)
|
231
|
+
safe_exec do
|
232
|
+
table_name = table_name.to_s
|
232
233
|
|
233
|
-
|
234
|
-
|
234
|
+
if @admin.tableExists(table_name)
|
235
|
+
@admin.disableTable(table_name)
|
235
236
|
|
236
|
-
|
237
|
-
|
237
|
+
cdesc = generate_column_descriptor(column_descriptor)
|
238
|
+
@admin.modifyColumn(table_name, column_descriptor.name, cdesc)
|
238
239
|
|
239
|
-
|
240
|
-
|
241
|
-
|
240
|
+
@admin.enableTable(table_name)
|
241
|
+
else
|
242
|
+
raise TableNotFound, table_name
|
243
|
+
end
|
244
|
+
end
|
242
245
|
end
|
243
|
-
end
|
244
|
-
end
|
245
246
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
247
|
+
def truncate_table(table_name)
|
248
|
+
safe_exec do
|
249
|
+
table_name = table_name.to_s
|
250
|
+
table = connect_table(table_name)
|
251
|
+
tableDescriptor = table.getTableDescriptor
|
252
|
+
drop_table(table_name)
|
253
|
+
@admin.createTable(tableDescriptor)
|
254
|
+
end
|
255
|
+
end
|
250
256
|
|
251
|
-
|
252
|
-
|
257
|
+
def ping
|
258
|
+
safe_exec do
|
259
|
+
@admin.isMasterRunning
|
260
|
+
end
|
261
|
+
end
|
253
262
|
|
254
|
-
|
255
|
-
|
263
|
+
def table_exists?(table_name)
|
264
|
+
safe_exec do
|
265
|
+
@admin.tableExists(table_name.to_s)
|
266
|
+
end
|
267
|
+
end
|
256
268
|
|
257
|
-
|
258
|
-
|
259
|
-
|
269
|
+
def table_names
|
270
|
+
safe_exec do
|
271
|
+
@admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
|
272
|
+
end
|
260
273
|
end
|
261
|
-
end
|
262
|
-
end
|
263
274
|
|
264
|
-
|
265
|
-
safe_exec do
|
266
|
-
table_name = table_name.to_s
|
267
|
-
column_name = column_name.to_s
|
275
|
+
private
|
268
276
|
|
269
|
-
|
270
|
-
|
277
|
+
def init_connection
|
278
|
+
safe_exec do
|
279
|
+
@conf = HBaseConfiguration.new
|
280
|
+
@conf.set('hbase.zookeeper.quorum', "#{@config[:zookeeper_quorum]}")
|
281
|
+
@conf.set('hbase.zookeeper.property.clientPort', "#{@config[:zookeeper_client_port]}")
|
282
|
+
@admin = HBaseAdmin.new(@conf)
|
283
|
+
@tables = {}
|
284
|
+
end
|
285
|
+
end
|
271
286
|
|
272
|
-
|
273
|
-
|
287
|
+
# Create a connection to an HBase table and keep it in memory.
|
288
|
+
def connect_table(table_name)
|
289
|
+
safe_exec do
|
290
|
+
table_name = table_name.to_s
|
291
|
+
return @tables[table_name] if @tables.has_key?(table_name)
|
274
292
|
|
275
|
-
|
276
|
-
|
277
|
-
|
293
|
+
if table_exists?(table_name)
|
294
|
+
@tables[table_name] = HTable.new(@conf, table_name)
|
295
|
+
else
|
296
|
+
if table_name and !table_name.empty?
|
297
|
+
raise TableNotFound, table_name
|
298
|
+
else
|
299
|
+
raise ArgumentError, "Table name not specified"
|
300
|
+
end
|
301
|
+
end
|
302
|
+
@tables[table_name]
|
303
|
+
end
|
278
304
|
end
|
279
|
-
end
|
280
|
-
end
|
281
305
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
306
|
+
# Create a Get object given parameters.
|
307
|
+
#
|
308
|
+
# @param [String] row
|
309
|
+
# @param [Array, String] A single (or collection) of strings
|
310
|
+
# fully qualified column name or column family (ends with ':').
|
311
|
+
# @param [Hash] options
|
312
|
+
#
|
313
|
+
# @return [Get] org.apache.hadoop.hbase.client.Get object
|
314
|
+
# corresponding to the arguments passed.
|
315
|
+
def generate_get(row, columns, options = {})
|
316
|
+
columns = [columns].flatten
|
317
|
+
|
318
|
+
get = Get.new(row.to_bytes)
|
319
|
+
|
320
|
+
columns.each do |column|
|
321
|
+
# If the column name ends with ':' then it's a column family.
|
322
|
+
(column[-1,1] == ":") ?
|
323
|
+
get.addFamily(column.gsub(":", "").to_bytes) :
|
324
|
+
get.addColumn(column.to_bytes)
|
325
|
+
end
|
291
326
|
|
292
|
-
|
293
|
-
safe_exec do
|
294
|
-
@admin.isMasterRunning
|
295
|
-
end
|
296
|
-
end
|
327
|
+
get.setMaxVersions(options[:versions]) if options[:versions]
|
297
328
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
end
|
302
|
-
end
|
329
|
+
# Need to add 1 to the timestamp due to the the API sillyness, i.e. min timestamp
|
330
|
+
# is inclusive while max timestamp is exclusive.
|
331
|
+
get.setTimeRange(java.lang.Long::MIN_VALUE, options[:timestamp]+1) if options[:timestamp]
|
303
332
|
|
304
|
-
|
305
|
-
|
306
|
-
@admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
|
307
|
-
end
|
308
|
-
end
|
333
|
+
return get
|
334
|
+
end
|
309
335
|
|
310
|
-
#
|
311
|
-
#
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
else
|
329
|
-
raise ArgumentError, "Table name not specified"
|
336
|
+
# Create a Put object given parameters.
|
337
|
+
#
|
338
|
+
# @param [String] row
|
339
|
+
# @param [Hash] Keys as the fully qualified column names and
|
340
|
+
# their associated values.
|
341
|
+
# @param [Integer] timestamp
|
342
|
+
# @param [org.apache.hadoop.hbase.client.RowLock] row_lock
|
343
|
+
#
|
344
|
+
# @return [Put] org.apache.hadoop.hbase.client.Put object
|
345
|
+
# corresponding to the arguments passed.
|
346
|
+
def generate_put(row, columns = {}, timestamp = nil, row_lock = nil)
|
347
|
+
put = row_lock ? Put.new(row.to_bytes, row_lock) : Put.new(row.to_bytes)
|
348
|
+
|
349
|
+
columns.each do |name, value|
|
350
|
+
family, qualifier = name.split(":")
|
351
|
+
timestamp ?
|
352
|
+
put.add(family.to_bytes, qualifier.to_bytes, timestamp, value.to_bytes) :
|
353
|
+
put.add(family.to_bytes, qualifier.to_bytes, value.to_bytes)
|
330
354
|
end
|
355
|
+
|
356
|
+
return put
|
331
357
|
end
|
332
|
-
@tables[table_name]
|
333
|
-
end
|
334
|
-
end
|
335
358
|
|
336
|
-
|
337
|
-
|
338
|
-
@
|
339
|
-
|
340
|
-
@
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
359
|
+
# Parse a Result object into a Hash.
|
360
|
+
#
|
361
|
+
# @param [Result] result
|
362
|
+
#
|
363
|
+
# @return [Hash] Fully qualified column names as keys
|
364
|
+
# and their corresponding values.
|
365
|
+
def parse_result(result)
|
366
|
+
output = {}
|
367
|
+
|
368
|
+
result.list.each do |keyvalue|
|
369
|
+
output[to_ruby_string(keyvalue.getColumn)] = to_ruby_string(keyvalue.getValue)
|
370
|
+
end
|
345
371
|
|
346
|
-
|
347
|
-
raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
|
348
|
-
raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
|
372
|
+
output["id"] = to_ruby_string(result.getRow)
|
349
373
|
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
374
|
+
return output
|
375
|
+
end
|
376
|
+
|
377
|
+
def generate_column_descriptor(column_descriptor)
|
378
|
+
raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
|
379
|
+
raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
|
380
|
+
|
381
|
+
if column_descriptor.compression
|
382
|
+
compression =
|
383
|
+
case column_descriptor.compression.to_s
|
384
|
+
when 'none'; Compression::Algorithm::NONE.getName()
|
385
|
+
when 'gz'; Compression::Algorithm::GZ.getName()
|
386
|
+
when 'lzo'; Compression::Algorithm::LZO.getName()
|
387
|
+
else
|
388
|
+
raise ArgumentError, "Invalid compression type: #{column_descriptor.compression} for the column_family #{column_descriptor.name}"
|
389
|
+
end
|
358
390
|
end
|
359
|
-
end
|
360
391
|
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
392
|
+
n_versions = column_descriptor.versions
|
393
|
+
in_memory = column_descriptor.in_memory
|
394
|
+
|
395
|
+
# set the default values of the missing parameters
|
396
|
+
n_versions ||= HColumnDescriptor::DEFAULT_VERSIONS
|
397
|
+
compression ||= HColumnDescriptor::DEFAULT_COMPRESSION
|
398
|
+
in_memory ||= HColumnDescriptor::DEFAULT_IN_MEMORY
|
399
|
+
block_cache ||= HColumnDescriptor::DEFAULT_BLOCKCACHE
|
400
|
+
block_size ||= HColumnDescriptor::DEFAULT_BLOCKSIZE
|
401
|
+
bloomfilter ||= HColumnDescriptor::DEFAULT_BLOOMFILTER
|
402
|
+
ttl ||= HColumnDescriptor::DEFAULT_TTL
|
403
|
+
|
404
|
+
# add the ':' at the end if the user didn't specify it
|
405
|
+
column_descriptor.name << ":" unless column_descriptor.name =~ /:$/
|
406
|
+
|
407
|
+
cdesc = HColumnDescriptor.new(column_descriptor.name.to_bytes,
|
408
|
+
n_versions,
|
409
|
+
compression,
|
410
|
+
in_memory,
|
411
|
+
block_cache,
|
412
|
+
block_size,
|
413
|
+
ttl,
|
414
|
+
bloomfilter)
|
415
|
+
|
416
|
+
return cdesc
|
417
|
+
end
|
387
418
|
|
388
|
-
end
|
419
|
+
end
|
389
420
|
|
421
|
+
end
|
390
422
|
end
|
391
423
|
|
392
|
-
port = ARGV[0]
|
393
|
-
port
|
394
|
-
DRb.start_service("druby://:#{port}", BigRecordDriver::HbaseServer.new)
|
424
|
+
port = ARGV[0] || 40000
|
425
|
+
DRb.start_service("druby://:#{port}", BigRecord::Driver::HbaseServer.new)
|
395
426
|
puts "Started drb server on port #{port}."
|
396
427
|
DRb.thread.join
|