bigrecord-driver 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/bigrecord-driver +41 -40
- data/bin/hbase-driver +12 -12
- data/lib/big_record_driver.rb +13 -6
- data/lib/big_record_driver/client.rb +32 -29
- data/lib/big_record_driver/column_descriptor.rb +13 -16
- data/lib/big_record_driver/exceptions.rb +12 -10
- data/lib/big_record_driver/hbase_driver/server.rb +351 -320
- data/lib/big_record_driver/server.rb +123 -0
- data/lib/big_record_driver/version.rb +4 -2
- data/test/abstract_test_client.rb +5 -6
- data/test/test_client_hbase.rb +13 -17
- metadata +3 -5
- data/lib/big_record_driver/bigrecord_server.rb +0 -119
- data/lib/big_record_driver/driver_manager.rb +0 -34
- data/test/test_driver_manager.rb +0 -46
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
data/bin/bigrecord-driver
CHANGED
@@ -62,7 +62,7 @@ unset RUBYLIB
|
|
62
62
|
if [ "$DRIVERNAME" == "" ]; then
|
63
63
|
echo "Driver type was not specified"
|
64
64
|
exit 1
|
65
|
-
elif [ "$DRIVERNAME" != "hbase"
|
65
|
+
elif [ "$DRIVERNAME" != "hbase" ]; then
|
66
66
|
echo "Driver type \"$DRIVERNAME\" is invalid"
|
67
67
|
exit 1
|
68
68
|
fi
|
@@ -126,30 +126,30 @@ start() {
|
|
126
126
|
refresh_pids
|
127
127
|
|
128
128
|
if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
|
129
|
-
|
130
|
-
|
129
|
+
echo -e "\nAlready running (pid="$PID")."
|
130
|
+
exit 1
|
131
131
|
else
|
132
132
|
rm -f $LOGS_DIR/$PORT.log
|
133
133
|
nohup jruby $DRIVER $PORT >> $LOGS_DIR/$PORT.log 2>&1 < /dev/null &
|
134
134
|
PID=$!
|
135
135
|
if [ "$PID" != "" ] ; then
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
136
|
+
# monitor the log file for the message saying that the server is started
|
137
|
+
for ((i=0; i<$STARTUP_TIMEOUT; i+=1)); do
|
138
|
+
sleep 1
|
139
|
+
echo -n "."
|
140
|
+
if [ "$(cat $LOGS_DIR/$PORT.log | grep 'Started drb server')" != "" ] ; then
|
141
|
+
break
|
142
|
+
fi
|
143
|
+
done
|
144
|
+
|
145
|
+
if [ "$i" == $STARTUP_TIMEOUT ] ; then
|
146
|
+
echo -e "\nStartup timeout: couldn't start the DRb server."
|
147
|
+
else
|
148
|
+
echo $PID > $PIDS_DIR/$PORT.pid
|
149
|
+
fi
|
150
|
+
echo ""
|
151
151
|
else
|
152
|
-
|
152
|
+
echo -e "\nAn error occured while starting the DRb server."
|
153
153
|
fi
|
154
154
|
fi
|
155
155
|
}
|
@@ -161,8 +161,8 @@ stop() {
|
|
161
161
|
|
162
162
|
if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
|
163
163
|
echo "Stopping driver (pid = $PID)." >> $LOGS_DIR/$PORT.log
|
164
|
-
|
165
|
-
|
164
|
+
kill $PID
|
165
|
+
rm $PIDS_DIR/$PORT.pid
|
166
166
|
else
|
167
167
|
echo "No $DRIVERNAME driver to kill."
|
168
168
|
fi
|
@@ -183,24 +183,25 @@ start_debug() {
|
|
183
183
|
}
|
184
184
|
|
185
185
|
case "$ACTION" in
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
186
|
+
start)
|
187
|
+
start
|
188
|
+
;;
|
189
|
+
stop)
|
190
|
+
stop
|
191
|
+
;;
|
192
|
+
restart)
|
193
|
+
stop
|
194
|
+
start
|
195
|
+
;;
|
196
|
+
status)
|
197
|
+
status
|
198
|
+
;;
|
199
|
+
start_debug)
|
200
|
+
start_debug
|
201
|
+
;;
|
202
|
+
*)
|
203
|
+
print_usage
|
204
|
+
exit 1
|
205
205
|
esac
|
206
|
+
|
206
207
|
exit 0
|
data/bin/hbase-driver
CHANGED
@@ -11,22 +11,22 @@ if ARGV.include?("-l")
|
|
11
11
|
|
12
12
|
# Make sure that the folder exists
|
13
13
|
if File.exists?(hbase_path)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
14
|
+
# We're only going to include the jar files we need.
|
15
|
+
required_jars = Dir[hbase_path+"/*.jar",
|
16
|
+
hbase_path+"/lib/commons-logging*.jar",
|
17
|
+
hbase_path+"/lib/zookeeper*.jar",
|
18
|
+
hbase_path+"/lib/log4j*.jar",
|
19
|
+
hbase_path+"/lib/hadoop*.jar"]
|
20
|
+
classpath = required_jars.join(":")
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
args.delete_at(switch_index)
|
23
|
+
args.delete_at(switch_index)
|
24
|
+
args = args + ["-c", '"'+classpath+'"']
|
25
25
|
|
26
26
|
# Otherwise we'll warn the user and quit
|
27
27
|
else
|
28
|
-
|
29
|
-
|
28
|
+
puts "Folder #{hbase_path} does not exist"
|
29
|
+
exit
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
data/lib/big_record_driver.rb
CHANGED
@@ -1,7 +1,14 @@
|
|
1
|
-
|
1
|
+
module BigRecord
|
2
|
+
module Driver
|
3
|
+
end
|
4
|
+
end
|
2
5
|
|
3
|
-
|
4
|
-
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
6
|
+
BRD_ROOT = File.dirname(__FILE__)
|
7
|
+
|
8
|
+
require BRD_ROOT + '/big_record_driver/client'
|
9
|
+
require BRD_ROOT + '/big_record_driver/exceptions'
|
10
|
+
require BRD_ROOT + '/big_record_driver/column_descriptor'
|
11
|
+
require BRD_ROOT + '/big_record_driver/version'
|
12
|
+
|
13
|
+
# Aliasing the old namespace
|
14
|
+
BigRecordDriver = BigRecord::Driver
|
@@ -1,36 +1,39 @@
|
|
1
|
-
require '
|
2
|
-
require 'activesupport'
|
1
|
+
require 'active_support'
|
3
2
|
require 'set'
|
4
3
|
require 'drb'
|
5
4
|
|
6
|
-
module
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
config
|
11
|
-
|
12
|
-
config
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
5
|
+
module BigRecord
|
6
|
+
module Driver
|
7
|
+
|
8
|
+
class Client
|
9
|
+
attr_accessor :config, :server
|
10
|
+
|
11
|
+
def initialize(config={}) # :nodoc:
|
12
|
+
config = config.symbolize_keys
|
13
|
+
config[:drb_host] ||= '127.0.0.1'
|
14
|
+
config[:drb_port] ||= 40000
|
15
|
+
|
16
|
+
@config = config
|
17
|
+
|
18
|
+
DRb.start_service nil
|
19
|
+
begin
|
20
|
+
@server = DRbObject.new(nil, "druby://#{@config[:drb_host]}:#{@config[:drb_port]}")
|
21
|
+
rescue DRb::DRbConnError
|
22
|
+
raise ConnectionError, "Failed to connect to the DRb server (jruby) " +
|
23
|
+
"at #{@config[:drb_host]}:#{@config[:drb_port]}."
|
24
|
+
end
|
25
|
+
@server.configure(@config)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Delegate the methods to the server
|
29
|
+
def method_missing(method, *args)
|
30
|
+
@server.send(method, *args)
|
31
|
+
end
|
32
|
+
|
33
|
+
def respond_to?(method)
|
34
|
+
super
|
22
35
|
end
|
23
|
-
@server.configure(@config)
|
24
|
-
end
|
25
|
-
|
26
|
-
# Delegate the methods to the server
|
27
|
-
def method_missing(method, *args)
|
28
|
-
@server.send(method, *args)
|
29
|
-
end
|
30
|
-
|
31
|
-
def respond_to?(method)
|
32
|
-
super
|
33
36
|
end
|
34
|
-
|
37
|
+
|
35
38
|
end
|
36
39
|
end
|
@@ -1,23 +1,20 @@
|
|
1
|
-
module
|
1
|
+
module BigRecord
|
2
|
+
module Driver
|
2
3
|
|
3
|
-
|
4
|
+
class ColumnDescriptor
|
5
|
+
attr_accessor :name, :versions, :in_memory, :bloom_filter, :compression
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
attr_accessor :in_memory
|
8
|
-
attr_accessor :bloom_filter
|
9
|
-
attr_accessor :compression
|
7
|
+
def initialize(name, options={})
|
8
|
+
raise ArgumentError, "name is mandatory" unless name
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
@bloom_filter = options[:bloom_filter]
|
18
|
-
@compression = options[:compression]
|
10
|
+
@name = name.to_s
|
11
|
+
@versions = options[:versions]
|
12
|
+
@in_memory = options[:in_memory]
|
13
|
+
@bloom_filter = options[:bloom_filter]
|
14
|
+
@compression = options[:compression]
|
15
|
+
end
|
19
16
|
end
|
20
17
|
|
21
18
|
end
|
22
|
-
|
23
19
|
end
|
20
|
+
|
@@ -1,12 +1,14 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
1
|
+
module BigRecord
|
2
|
+
module Driver
|
3
|
+
class DriverError < StandardError
|
4
|
+
end
|
5
|
+
class TableNotFound < DriverError
|
6
|
+
end
|
7
|
+
class TableAlreadyExists < DriverError
|
8
|
+
end
|
9
|
+
class JavaError < DriverError
|
10
|
+
end
|
11
|
+
class ConnectionError < DriverError
|
12
|
+
end
|
11
13
|
end
|
12
14
|
end
|
@@ -1,396 +1,427 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/../column_descriptor'
|
2
2
|
require File.dirname(__FILE__) + '/../exceptions'
|
3
|
-
require File.dirname(__FILE__) + '/../
|
3
|
+
require File.dirname(__FILE__) + '/../server'
|
4
4
|
|
5
|
-
module
|
5
|
+
module BigRecord
|
6
|
+
module Driver
|
6
7
|
|
7
|
-
class HbaseServer <
|
8
|
-
|
8
|
+
class HbaseServer < Server
|
9
|
+
java_import "java.util.TreeMap"
|
10
|
+
include_package "org.apache.hadoop.hbase.client"
|
11
|
+
java_import "org.apache.hadoop.hbase.KeyValue"
|
12
|
+
java_import "org.apache.hadoop.hbase.io.hfile.Compression"
|
13
|
+
java_import "org.apache.hadoop.hbase.HBaseConfiguration"
|
14
|
+
java_import "org.apache.hadoop.hbase.HTableDescriptor"
|
15
|
+
java_import "org.apache.hadoop.hbase.HColumnDescriptor"
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
include_class "org.apache.hadoop.hbase.HBaseConfiguration"
|
15
|
-
include_class "org.apache.hadoop.hbase.HConstants"
|
16
|
-
include_class "org.apache.hadoop.hbase.HStoreKey"
|
17
|
-
include_class "org.apache.hadoop.hbase.HTableDescriptor"
|
18
|
-
include_class "org.apache.hadoop.hbase.HColumnDescriptor"
|
17
|
+
# Establish the connection with HBase with the given configuration parameters.
|
18
|
+
def configure(config = {})
|
19
|
+
config[:zookeeper_quorum] ||= 'localhost'
|
20
|
+
config[:zookeeper_client_port] ||= '2181'
|
19
21
|
|
20
|
-
|
22
|
+
@config = config
|
21
23
|
|
22
|
-
|
23
|
-
|
24
|
-
config[:zookeeper_quorum] ||= 'localhost'
|
25
|
-
config[:zookeeper_client_port] ||= '2181'
|
24
|
+
init_connection
|
25
|
+
end
|
26
26
|
|
27
|
-
|
27
|
+
# Atomic row insertion/update. Example:
|
28
|
+
# update('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', {'attribute:name' => "--- Oahu\n",
|
29
|
+
# 'attribute:travel_rank' => "--- 0.90124565\n"})
|
30
|
+
# => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
|
31
|
+
def update(table_name, row, values, timestamp=nil)
|
32
|
+
safe_exec do
|
33
|
+
return nil unless row
|
28
34
|
|
29
|
-
|
30
|
-
|
35
|
+
table = connect_table(table_name)
|
36
|
+
row_lock = table.lockRow(row.to_bytes)
|
31
37
|
|
32
|
-
|
33
|
-
|
34
|
-
# 'attribute:travel_rank' => "--- 0.90124565\n"})
|
35
|
-
# => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
|
36
|
-
def update(table_name, row, values, timestamp=nil)
|
37
|
-
safe_exec do
|
38
|
-
return nil unless row
|
39
|
-
table = connect_table(table_name)
|
38
|
+
put = generate_put(row, values, timestamp, row_lock)
|
39
|
+
table.put(put)
|
40
40
|
|
41
|
-
|
41
|
+
table.unlockRow(row_lock)
|
42
42
|
|
43
|
-
|
44
|
-
|
43
|
+
row
|
44
|
+
end
|
45
45
|
end
|
46
46
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
47
|
+
# Returns a column of a row. Example:
|
48
|
+
# get('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 'attribute:travel_rank')
|
49
|
+
# => "--- 0.90124565\n"
|
50
|
+
#
|
51
|
+
# valid options:
|
52
|
+
# :timestamp => integer corresponding to the time when the record was saved in hbase
|
53
|
+
# :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
|
54
|
+
def get(table_name, row, column, options={})
|
55
|
+
safe_exec do
|
56
|
+
return nil unless row
|
57
|
+
|
58
|
+
table = connect_table(table_name)
|
59
|
+
|
60
|
+
# Grab the version number if the client's using the old API,
|
61
|
+
# or retrieve only the lastest version by default
|
62
|
+
options[:versions] ||= options[:num_versions]
|
63
|
+
options[:versions] ||= 1
|
64
|
+
|
65
|
+
# validate the arguments
|
66
|
+
raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
|
67
|
+
|
68
|
+
get = generate_get(row, column, options)
|
69
|
+
result = table.get(get)
|
70
|
+
|
71
|
+
if (result.nil? || result.isEmpty)
|
72
|
+
return (options[:versions] == 1 ? nil : [])
|
73
|
+
else
|
74
|
+
output = result.list.collect do |keyvalue|
|
75
|
+
to_ruby_string(keyvalue.getValue)
|
76
|
+
end
|
51
77
|
|
52
|
-
|
53
|
-
|
54
|
-
# => "--- 0.90124565\n"
|
55
|
-
#
|
56
|
-
# valid options:
|
57
|
-
# :timestamp => integer corresponding to the time when the record was saved in hbase
|
58
|
-
# :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
|
59
|
-
def get(table_name, row, column, options={})
|
60
|
-
safe_exec do
|
61
|
-
return nil unless row
|
62
|
-
table = connect_table(table_name)
|
63
|
-
|
64
|
-
# Retreive only the last version by default
|
65
|
-
options[:versions] ||= options[:num_versions]
|
66
|
-
options[:versions] ||= 1
|
67
|
-
|
68
|
-
# validate the arguments
|
69
|
-
raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
|
70
|
-
|
71
|
-
# get the raw data from hbase
|
72
|
-
unless options[:timestamp]
|
73
|
-
if options[:versions] == 1
|
74
|
-
raw_data = table.get(row, column)
|
75
|
-
else
|
76
|
-
raw_data = table.get(row,
|
77
|
-
column,
|
78
|
-
options[:versions])
|
78
|
+
return (options[:versions] == 1 ? output[0] : output)
|
79
|
+
end
|
79
80
|
end
|
80
|
-
else
|
81
|
-
raw_data = table.get(row,
|
82
|
-
column,
|
83
|
-
options[:timestamp],
|
84
|
-
options[:versions])
|
85
81
|
end
|
86
82
|
|
87
|
-
#
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
83
|
+
# Returns the last version of the given columns of the given row. The columns works with
|
84
|
+
# regular expressions (e.g. 'attribute:' matches all attributes columns). Example:
|
85
|
+
# get_columns('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', ['attribute:'])
|
86
|
+
# => {"attribute:name" => "--- Oahu\n", "attribute:travel_rank" => "--- 0.90124565\n", etc...}
|
87
|
+
def get_columns(table_name, row, columns, options={})
|
88
|
+
safe_exec do
|
89
|
+
return nil unless row
|
90
|
+
|
91
|
+
table_name = table_name.to_s
|
92
|
+
table = connect_table(table_name)
|
93
|
+
|
94
|
+
get = generate_get(row, columns, options)
|
95
|
+
result = table.get(get)
|
96
|
+
|
97
|
+
begin
|
98
|
+
parse_result(result)
|
99
|
+
rescue
|
100
|
+
nil
|
101
|
+
end
|
96
102
|
end
|
97
103
|
end
|
98
|
-
end
|
99
|
-
end
|
100
104
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
table_name = table_name.to_s
|
109
|
-
table = connect_table(table_name)
|
110
|
-
|
111
|
-
java_cols = Java::String[columns.size].new
|
112
|
-
columns.each_with_index do |col, i|
|
113
|
-
java_cols[i] = Java::String.new(col)
|
114
|
-
end
|
105
|
+
# Get consecutive rows. Example to get 100 records starting with the one specified and get all the
|
106
|
+
# columns in the column family 'attribute:' :
|
107
|
+
# get_consecutive_rows('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 100, ['attribute:'])
|
108
|
+
def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
|
109
|
+
safe_exec do
|
110
|
+
table_name = table_name.to_s
|
111
|
+
table = connect_table(table_name)
|
115
112
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
113
|
+
scan = Scan.new
|
114
|
+
scan.setStartRow(start_row.to_bytes) if start_row
|
115
|
+
scan.setStopRow(stop_row.to_bytes) if stop_row
|
116
|
+
|
117
|
+
columns.each do |column|
|
118
|
+
(column[-1,1] == ":") ?
|
119
|
+
scan.addFamily(column.gsub(":", "").to_bytes) :
|
120
|
+
scan.addColumn(column.to_bytes)
|
121
|
+
end
|
122
|
+
|
123
|
+
scanner = table.getScanner(scan)
|
122
124
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
if limit
|
126
|
+
results = scanner.next(limit)
|
127
|
+
else
|
128
|
+
results = []
|
129
|
+
while (row_result = scanner.next) != nil
|
130
|
+
results << row_result
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
output = []
|
135
|
+
results.each do |result|
|
136
|
+
output << parse_result(result)
|
137
|
+
end
|
138
|
+
scanner.close
|
139
|
+
|
140
|
+
return output
|
128
141
|
end
|
129
|
-
values["id"] = row
|
130
|
-
values
|
131
|
-
else
|
132
|
-
nil
|
133
142
|
end
|
134
|
-
end
|
135
|
-
end
|
136
143
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
144
|
+
# Delete a whole row.
|
145
|
+
def delete(table_name, row, timestamp = nil)
|
146
|
+
safe_exec do
|
147
|
+
table = connect_table(table_name)
|
148
|
+
|
149
|
+
if timestamp
|
150
|
+
row_lock = table.lockRow(row.to_bytes)
|
151
|
+
table.delete(Delete.new(row.to_bytes, timestamp, row_lock))
|
152
|
+
table.unlockRow(row_lock)
|
153
|
+
else
|
154
|
+
table.delete(Delete.new(row.to_bytes))
|
155
|
+
end
|
156
|
+
end
|
148
157
|
end
|
149
158
|
|
150
|
-
|
151
|
-
|
159
|
+
# Create a table
|
160
|
+
def create_table(table_name, column_descriptors)
|
161
|
+
safe_exec do
|
162
|
+
table_name = table_name.to_s
|
163
|
+
unless table_exists?(table_name)
|
164
|
+
tdesc = HTableDescriptor.new(table_name)
|
152
165
|
|
153
|
-
|
154
|
-
|
155
|
-
if stop_row
|
156
|
-
scanner = table.getScanner(java_cols, start_row, stop_row, HConstants::LATEST_TIMESTAMP)
|
157
|
-
else
|
158
|
-
scanner = table.getScanner(java_cols, start_row)
|
159
|
-
end
|
166
|
+
column_descriptors.each do |cd|
|
167
|
+
cdesc = generate_column_descriptor(cd)
|
160
168
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
end
|
168
|
-
values = {}
|
169
|
-
row_result.entrySet.each do |entry|
|
170
|
-
column_name = Java::String.new(entry.getKey).to_s
|
171
|
-
data = to_ruby_string(entry.getValue)
|
172
|
-
values[column_name] = data
|
169
|
+
tdesc.addFamily(cdesc)
|
170
|
+
end
|
171
|
+
@admin.createTable(tdesc)
|
172
|
+
else
|
173
|
+
raise TableAlreadyExists, table_name
|
174
|
+
end
|
173
175
|
end
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
176
|
+
end
|
177
|
+
|
178
|
+
# Delete a table
|
179
|
+
def drop_table(table_name)
|
180
|
+
safe_exec do
|
181
|
+
table_name = table_name.to_s
|
182
|
+
|
183
|
+
if @admin.tableExists(table_name)
|
184
|
+
@admin.disableTable(table_name)
|
185
|
+
@admin.deleteTable(table_name)
|
186
|
+
|
187
|
+
# Remove the table connection from the cache
|
188
|
+
@tables.delete(table_name) if @tables.has_key?(table_name)
|
189
|
+
else
|
190
|
+
raise TableNotFound, table_name
|
191
|
+
end
|
178
192
|
end
|
179
193
|
end
|
180
|
-
scanner.close
|
181
|
-
result
|
182
|
-
end
|
183
|
-
end
|
184
194
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
table = connect_table(table_name)
|
189
|
-
timestamp ? table.deleteAll(row.to_bytes, timestamp) : table.deleteAll(row.to_bytes)
|
190
|
-
end
|
191
|
-
end
|
195
|
+
def add_column(table_name, column_descriptor)
|
196
|
+
safe_exec do
|
197
|
+
table_name = table_name.to_s
|
192
198
|
|
193
|
-
|
194
|
-
|
195
|
-
safe_exec do
|
196
|
-
table_name = table_name.to_s
|
197
|
-
unless table_exists?(table_name)
|
198
|
-
tdesc = HTableDescriptor.new(table_name)
|
199
|
+
if @admin.tableExists(table_name)
|
200
|
+
@admin.disableTable(table_name)
|
199
201
|
|
200
|
-
|
201
|
-
|
202
|
+
cdesc = generate_column_descriptor(column_descriptor)
|
203
|
+
@admin.addColumn(table_name, cdesc)
|
202
204
|
|
203
|
-
|
205
|
+
@admin.enableTable(table_name)
|
206
|
+
else
|
207
|
+
raise TableNotFound, table_name
|
208
|
+
end
|
204
209
|
end
|
205
|
-
@admin.createTable(tdesc)
|
206
|
-
else
|
207
|
-
raise BigRecordDriver::TableAlreadyExists, table_name
|
208
210
|
end
|
209
|
-
end
|
210
|
-
end
|
211
211
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
212
|
+
def remove_column(table_name, column_name)
|
213
|
+
safe_exec do
|
214
|
+
table_name = table_name.to_s
|
215
|
+
column_name = column_name.to_s
|
216
|
+
|
217
|
+
if @admin.tableExists(table_name)
|
218
|
+
@admin.disableTable(table_name)
|
216
219
|
|
217
|
-
|
218
|
-
|
219
|
-
@admin.deleteTable(table_name)
|
220
|
+
column_name << ":" unless column_name =~ /:$/
|
221
|
+
@admin.deleteColumn(table_name, column_name)
|
220
222
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
223
|
+
@admin.enableTable(table_name)
|
224
|
+
else
|
225
|
+
raise TableNotFound, table_name
|
226
|
+
end
|
227
|
+
end
|
225
228
|
end
|
226
|
-
end
|
227
|
-
end
|
228
229
|
|
229
|
-
|
230
|
-
|
231
|
-
|
230
|
+
def modify_column(table_name, column_descriptor)
|
231
|
+
safe_exec do
|
232
|
+
table_name = table_name.to_s
|
232
233
|
|
233
|
-
|
234
|
-
|
234
|
+
if @admin.tableExists(table_name)
|
235
|
+
@admin.disableTable(table_name)
|
235
236
|
|
236
|
-
|
237
|
-
|
237
|
+
cdesc = generate_column_descriptor(column_descriptor)
|
238
|
+
@admin.modifyColumn(table_name, column_descriptor.name, cdesc)
|
238
239
|
|
239
|
-
|
240
|
-
|
241
|
-
|
240
|
+
@admin.enableTable(table_name)
|
241
|
+
else
|
242
|
+
raise TableNotFound, table_name
|
243
|
+
end
|
244
|
+
end
|
242
245
|
end
|
243
|
-
end
|
244
|
-
end
|
245
246
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
247
|
+
def truncate_table(table_name)
|
248
|
+
safe_exec do
|
249
|
+
table_name = table_name.to_s
|
250
|
+
table = connect_table(table_name)
|
251
|
+
tableDescriptor = table.getTableDescriptor
|
252
|
+
drop_table(table_name)
|
253
|
+
@admin.createTable(tableDescriptor)
|
254
|
+
end
|
255
|
+
end
|
250
256
|
|
251
|
-
|
252
|
-
|
257
|
+
def ping
|
258
|
+
safe_exec do
|
259
|
+
@admin.isMasterRunning
|
260
|
+
end
|
261
|
+
end
|
253
262
|
|
254
|
-
|
255
|
-
|
263
|
+
def table_exists?(table_name)
|
264
|
+
safe_exec do
|
265
|
+
@admin.tableExists(table_name.to_s)
|
266
|
+
end
|
267
|
+
end
|
256
268
|
|
257
|
-
|
258
|
-
|
259
|
-
|
269
|
+
def table_names
|
270
|
+
safe_exec do
|
271
|
+
@admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
|
272
|
+
end
|
260
273
|
end
|
261
|
-
end
|
262
|
-
end
|
263
274
|
|
264
|
-
|
265
|
-
safe_exec do
|
266
|
-
table_name = table_name.to_s
|
267
|
-
column_name = column_name.to_s
|
275
|
+
private
|
268
276
|
|
269
|
-
|
270
|
-
|
277
|
+
def init_connection
|
278
|
+
safe_exec do
|
279
|
+
@conf = HBaseConfiguration.new
|
280
|
+
@conf.set('hbase.zookeeper.quorum', "#{@config[:zookeeper_quorum]}")
|
281
|
+
@conf.set('hbase.zookeeper.property.clientPort', "#{@config[:zookeeper_client_port]}")
|
282
|
+
@admin = HBaseAdmin.new(@conf)
|
283
|
+
@tables = {}
|
284
|
+
end
|
285
|
+
end
|
271
286
|
|
272
|
-
|
273
|
-
|
287
|
+
# Create a connection to an HBase table and keep it in memory.
|
288
|
+
def connect_table(table_name)
|
289
|
+
safe_exec do
|
290
|
+
table_name = table_name.to_s
|
291
|
+
return @tables[table_name] if @tables.has_key?(table_name)
|
274
292
|
|
275
|
-
|
276
|
-
|
277
|
-
|
293
|
+
if table_exists?(table_name)
|
294
|
+
@tables[table_name] = HTable.new(@conf, table_name)
|
295
|
+
else
|
296
|
+
if table_name and !table_name.empty?
|
297
|
+
raise TableNotFound, table_name
|
298
|
+
else
|
299
|
+
raise ArgumentError, "Table name not specified"
|
300
|
+
end
|
301
|
+
end
|
302
|
+
@tables[table_name]
|
303
|
+
end
|
278
304
|
end
|
279
|
-
end
|
280
|
-
end
|
281
305
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
306
|
+
# Create a Get object given parameters.
|
307
|
+
#
|
308
|
+
# @param [String] row
|
309
|
+
# @param [Array, String] A single (or collection) of strings
|
310
|
+
# fully qualified column name or column family (ends with ':').
|
311
|
+
# @param [Hash] options
|
312
|
+
#
|
313
|
+
# @return [Get] org.apache.hadoop.hbase.client.Get object
|
314
|
+
# corresponding to the arguments passed.
|
315
|
+
def generate_get(row, columns, options = {})
|
316
|
+
columns = [columns].flatten
|
317
|
+
|
318
|
+
get = Get.new(row.to_bytes)
|
319
|
+
|
320
|
+
columns.each do |column|
|
321
|
+
# If the column name ends with ':' then it's a column family.
|
322
|
+
(column[-1,1] == ":") ?
|
323
|
+
get.addFamily(column.gsub(":", "").to_bytes) :
|
324
|
+
get.addColumn(column.to_bytes)
|
325
|
+
end
|
291
326
|
|
292
|
-
|
293
|
-
safe_exec do
|
294
|
-
@admin.isMasterRunning
|
295
|
-
end
|
296
|
-
end
|
327
|
+
get.setMaxVersions(options[:versions]) if options[:versions]
|
297
328
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
end
|
302
|
-
end
|
329
|
+
# Need to add 1 to the timestamp due to the the API sillyness, i.e. min timestamp
|
330
|
+
# is inclusive while max timestamp is exclusive.
|
331
|
+
get.setTimeRange(java.lang.Long::MIN_VALUE, options[:timestamp]+1) if options[:timestamp]
|
303
332
|
|
304
|
-
|
305
|
-
|
306
|
-
@admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
|
307
|
-
end
|
308
|
-
end
|
333
|
+
return get
|
334
|
+
end
|
309
335
|
|
310
|
-
#
|
311
|
-
#
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
else
|
329
|
-
raise ArgumentError, "Table name not specified"
|
336
|
+
# Create a Put object given parameters.
|
337
|
+
#
|
338
|
+
# @param [String] row
|
339
|
+
# @param [Hash] Keys as the fully qualified column names and
|
340
|
+
# their associated values.
|
341
|
+
# @param [Integer] timestamp
|
342
|
+
# @param [org.apache.hadoop.hbase.client.RowLock] row_lock
|
343
|
+
#
|
344
|
+
# @return [Put] org.apache.hadoop.hbase.client.Put object
|
345
|
+
# corresponding to the arguments passed.
|
346
|
+
def generate_put(row, columns = {}, timestamp = nil, row_lock = nil)
|
347
|
+
put = row_lock ? Put.new(row.to_bytes, row_lock) : Put.new(row.to_bytes)
|
348
|
+
|
349
|
+
columns.each do |name, value|
|
350
|
+
family, qualifier = name.split(":")
|
351
|
+
timestamp ?
|
352
|
+
put.add(family.to_bytes, qualifier.to_bytes, timestamp, value.to_bytes) :
|
353
|
+
put.add(family.to_bytes, qualifier.to_bytes, value.to_bytes)
|
330
354
|
end
|
355
|
+
|
356
|
+
return put
|
331
357
|
end
|
332
|
-
@tables[table_name]
|
333
|
-
end
|
334
|
-
end
|
335
358
|
|
336
|
-
|
337
|
-
|
338
|
-
@
|
339
|
-
|
340
|
-
@
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
359
|
+
# Parse a Result object into a Hash.
|
360
|
+
#
|
361
|
+
# @param [Result] result
|
362
|
+
#
|
363
|
+
# @return [Hash] Fully qualified column names as keys
|
364
|
+
# and their corresponding values.
|
365
|
+
def parse_result(result)
|
366
|
+
output = {}
|
367
|
+
|
368
|
+
result.list.each do |keyvalue|
|
369
|
+
output[to_ruby_string(keyvalue.getColumn)] = to_ruby_string(keyvalue.getValue)
|
370
|
+
end
|
345
371
|
|
346
|
-
|
347
|
-
raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
|
348
|
-
raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
|
372
|
+
output["id"] = to_ruby_string(result.getRow)
|
349
373
|
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
374
|
+
return output
|
375
|
+
end
|
376
|
+
|
377
|
+
def generate_column_descriptor(column_descriptor)
|
378
|
+
raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
|
379
|
+
raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
|
380
|
+
|
381
|
+
if column_descriptor.compression
|
382
|
+
compression =
|
383
|
+
case column_descriptor.compression.to_s
|
384
|
+
when 'none'; Compression::Algorithm::NONE.getName()
|
385
|
+
when 'gz'; Compression::Algorithm::GZ.getName()
|
386
|
+
when 'lzo'; Compression::Algorithm::LZO.getName()
|
387
|
+
else
|
388
|
+
raise ArgumentError, "Invalid compression type: #{column_descriptor.compression} for the column_family #{column_descriptor.name}"
|
389
|
+
end
|
358
390
|
end
|
359
|
-
end
|
360
391
|
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
392
|
+
n_versions = column_descriptor.versions
|
393
|
+
in_memory = column_descriptor.in_memory
|
394
|
+
|
395
|
+
# set the default values of the missing parameters
|
396
|
+
n_versions ||= HColumnDescriptor::DEFAULT_VERSIONS
|
397
|
+
compression ||= HColumnDescriptor::DEFAULT_COMPRESSION
|
398
|
+
in_memory ||= HColumnDescriptor::DEFAULT_IN_MEMORY
|
399
|
+
block_cache ||= HColumnDescriptor::DEFAULT_BLOCKCACHE
|
400
|
+
block_size ||= HColumnDescriptor::DEFAULT_BLOCKSIZE
|
401
|
+
bloomfilter ||= HColumnDescriptor::DEFAULT_BLOOMFILTER
|
402
|
+
ttl ||= HColumnDescriptor::DEFAULT_TTL
|
403
|
+
|
404
|
+
# add the ':' at the end if the user didn't specify it
|
405
|
+
column_descriptor.name << ":" unless column_descriptor.name =~ /:$/
|
406
|
+
|
407
|
+
cdesc = HColumnDescriptor.new(column_descriptor.name.to_bytes,
|
408
|
+
n_versions,
|
409
|
+
compression,
|
410
|
+
in_memory,
|
411
|
+
block_cache,
|
412
|
+
block_size,
|
413
|
+
ttl,
|
414
|
+
bloomfilter)
|
415
|
+
|
416
|
+
return cdesc
|
417
|
+
end
|
387
418
|
|
388
|
-
end
|
419
|
+
end
|
389
420
|
|
421
|
+
end
|
390
422
|
end
|
391
423
|
|
392
|
-
port = ARGV[0]
|
393
|
-
port
|
394
|
-
DRb.start_service("druby://:#{port}", BigRecordDriver::HbaseServer.new)
|
424
|
+
port = ARGV[0] || 40000
|
425
|
+
DRb.start_service("druby://:#{port}", BigRecord::Driver::HbaseServer.new)
|
395
426
|
puts "Started drb server on port #{port}."
|
396
427
|
DRb.thread.join
|