bigrecord-driver 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.0.7
data/bin/bigrecord-driver CHANGED
@@ -62,7 +62,7 @@ unset RUBYLIB
62
62
  if [ "$DRIVERNAME" == "" ]; then
63
63
  echo "Driver type was not specified"
64
64
  exit 1
65
- elif [ "$DRIVERNAME" != "hbase" -a "$DRIVERNAME" != "cassandra" ]; then
65
+ elif [ "$DRIVERNAME" != "hbase" ]; then
66
66
  echo "Driver type \"$DRIVERNAME\" is invalid"
67
67
  exit 1
68
68
  fi
@@ -126,30 +126,30 @@ start() {
126
126
  refresh_pids
127
127
 
128
128
  if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
129
- echo -e "\nAlready running (pid="$PID")."
130
- exit 1
129
+ echo -e "\nAlready running (pid="$PID")."
130
+ exit 1
131
131
  else
132
132
  rm -f $LOGS_DIR/$PORT.log
133
133
  nohup jruby $DRIVER $PORT >> $LOGS_DIR/$PORT.log 2>&1 < /dev/null &
134
134
  PID=$!
135
135
  if [ "$PID" != "" ] ; then
136
- # monitor the log file for the message saying that the server is started
137
- for ((i=0; i<$STARTUP_TIMEOUT; i+=1)); do
138
- sleep 1
139
- echo -n "."
140
- if [ "$(cat $LOGS_DIR/$PORT.log | grep 'Started drb server')" != "" ] ; then
141
- break
142
- fi
143
- done
144
-
145
- if [ "$i" == $STARTUP_TIMEOUT ] ; then
146
- echo -e "\nStartup timeout: couldn't start the DRb server."
147
- else
148
- echo $PID > $PIDS_DIR/$PORT.pid
149
- fi
150
- echo ""
136
+ # monitor the log file for the message saying that the server is started
137
+ for ((i=0; i<$STARTUP_TIMEOUT; i+=1)); do
138
+ sleep 1
139
+ echo -n "."
140
+ if [ "$(cat $LOGS_DIR/$PORT.log | grep 'Started drb server')" != "" ] ; then
141
+ break
142
+ fi
143
+ done
144
+
145
+ if [ "$i" == $STARTUP_TIMEOUT ] ; then
146
+ echo -e "\nStartup timeout: couldn't start the DRb server."
147
+ else
148
+ echo $PID > $PIDS_DIR/$PORT.pid
149
+ fi
150
+ echo ""
151
151
  else
152
- echo -e "\nAn error occured while starting the DRb server."
152
+ echo -e "\nAn error occured while starting the DRb server."
153
153
  fi
154
154
  fi
155
155
  }
@@ -161,8 +161,8 @@ stop() {
161
161
 
162
162
  if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
163
163
  echo "Stopping driver (pid = $PID)." >> $LOGS_DIR/$PORT.log
164
- kill $PID
165
- rm $PIDS_DIR/$PORT.pid
164
+ kill $PID
165
+ rm $PIDS_DIR/$PORT.pid
166
166
  else
167
167
  echo "No $DRIVERNAME driver to kill."
168
168
  fi
@@ -183,24 +183,25 @@ start_debug() {
183
183
  }
184
184
 
185
185
  case "$ACTION" in
186
- start)
187
- start
188
- ;;
189
- stop)
190
- stop
191
- ;;
192
- restart)
193
- stop
194
- start
195
- ;;
196
- status)
197
- status
198
- ;;
199
- start_debug)
200
- start_debug
201
- ;;
202
- *)
203
- print_usage
204
- exit 1
186
+ start)
187
+ start
188
+ ;;
189
+ stop)
190
+ stop
191
+ ;;
192
+ restart)
193
+ stop
194
+ start
195
+ ;;
196
+ status)
197
+ status
198
+ ;;
199
+ start_debug)
200
+ start_debug
201
+ ;;
202
+ *)
203
+ print_usage
204
+ exit 1
205
205
  esac
206
+
206
207
  exit 0
data/bin/hbase-driver CHANGED
@@ -11,22 +11,22 @@ if ARGV.include?("-l")
11
11
 
12
12
  # Make sure that the folder exists
13
13
  if File.exists?(hbase_path)
14
- # We're only going to include the jar files we need.
15
- required_jars = Dir[hbase_path+"/*.jar",
16
- hbase_path+"/lib/commons-logging*.jar",
17
- hbase_path+"/lib/zookeeper*.jar",
18
- hbase_path+"/lib/log4j*.jar",
19
- hbase_path+"/lib/hadoop*.jar"]
20
- classpath = required_jars.join(":")
14
+ # We're only going to include the jar files we need.
15
+ required_jars = Dir[hbase_path+"/*.jar",
16
+ hbase_path+"/lib/commons-logging*.jar",
17
+ hbase_path+"/lib/zookeeper*.jar",
18
+ hbase_path+"/lib/log4j*.jar",
19
+ hbase_path+"/lib/hadoop*.jar"]
20
+ classpath = required_jars.join(":")
21
21
 
22
- args.delete_at(switch_index)
23
- args.delete_at(switch_index)
24
- args = args + ["-c", '"'+classpath+'"']
22
+ args.delete_at(switch_index)
23
+ args.delete_at(switch_index)
24
+ args = args + ["-c", '"'+classpath+'"']
25
25
 
26
26
  # Otherwise we'll warn the user and quit
27
27
  else
28
- puts "Folder #{hbase_path} does not exist"
29
- exit
28
+ puts "Folder #{hbase_path} does not exist"
29
+ exit
30
30
  end
31
31
  end
32
32
 
@@ -1,7 +1,14 @@
1
- LIB_ROOT = File.dirname(__FILE__)
1
+ module BigRecord
2
+ module Driver
3
+ end
4
+ end
2
5
 
3
- require LIB_ROOT + '/big_record_driver/client'
4
- require LIB_ROOT + '/big_record_driver/exceptions'
5
- require LIB_ROOT + '/big_record_driver/column_descriptor'
6
- require LIB_ROOT + '/big_record_driver/driver_manager'
7
- require LIB_ROOT + '/big_record_driver/version'
6
+ BRD_ROOT = File.dirname(__FILE__)
7
+
8
+ require BRD_ROOT + '/big_record_driver/client'
9
+ require BRD_ROOT + '/big_record_driver/exceptions'
10
+ require BRD_ROOT + '/big_record_driver/column_descriptor'
11
+ require BRD_ROOT + '/big_record_driver/version'
12
+
13
+ # Aliasing the old namespace
14
+ BigRecordDriver = BigRecord::Driver
@@ -1,36 +1,39 @@
1
- require 'rubygems'
2
- require 'activesupport'
1
+ require 'active_support'
3
2
  require 'set'
4
3
  require 'drb'
5
4
 
6
- module BigRecordDriver
7
- class Client
8
-
9
- def initialize(config={}) # :nodoc:
10
- config = config.symbolize_keys
11
- config[:drb_host] ||= '127.0.0.1'
12
- config[:drb_port] ||= 40000
13
-
14
- @config = config
15
-
16
- DRb.start_service('druby://127.0.0.1:0')
17
- begin
18
- @server = DRbObject.new(nil, "druby://#{@config[:drb_host]}:#{@config[:drb_port]}")
19
- rescue DRb::DRbConnError
20
- raise ConnectionError, "Failed to connect to the DRb server (jruby) " +
21
- "at #{@config[:drb_host]}:#{@config[:drb_port]}."
5
+ module BigRecord
6
+ module Driver
7
+
8
+ class Client
9
+ attr_accessor :config, :server
10
+
11
+ def initialize(config={}) # :nodoc:
12
+ config = config.symbolize_keys
13
+ config[:drb_host] ||= '127.0.0.1'
14
+ config[:drb_port] ||= 40000
15
+
16
+ @config = config
17
+
18
+ DRb.start_service nil
19
+ begin
20
+ @server = DRbObject.new(nil, "druby://#{@config[:drb_host]}:#{@config[:drb_port]}")
21
+ rescue DRb::DRbConnError
22
+ raise ConnectionError, "Failed to connect to the DRb server (jruby) " +
23
+ "at #{@config[:drb_host]}:#{@config[:drb_port]}."
24
+ end
25
+ @server.configure(@config)
26
+ end
27
+
28
+ # Delegate the methods to the server
29
+ def method_missing(method, *args)
30
+ @server.send(method, *args)
31
+ end
32
+
33
+ def respond_to?(method)
34
+ super
22
35
  end
23
- @server.configure(@config)
24
- end
25
-
26
- # Delegate the methods to the server
27
- def method_missing(method, *args)
28
- @server.send(method, *args)
29
- end
30
-
31
- def respond_to?(method)
32
- super
33
36
  end
34
-
37
+
35
38
  end
36
39
  end
@@ -1,23 +1,20 @@
1
- module BigRecordDriver
1
+ module BigRecord
2
+ module Driver
2
3
 
3
- class ColumnDescriptor
4
+ class ColumnDescriptor
5
+ attr_accessor :name, :versions, :in_memory, :bloom_filter, :compression
4
6
 
5
- attr_accessor :name
6
- attr_accessor :versions
7
- attr_accessor :in_memory
8
- attr_accessor :bloom_filter
9
- attr_accessor :compression
7
+ def initialize(name, options={})
8
+ raise ArgumentError, "name is mandatory" unless name
10
9
 
11
- def initialize(name, options={})
12
- raise ArgumentError, "name is mandatory" unless name
13
-
14
- @name = name.to_s
15
- @versions = options[:versions]
16
- @in_memory = options[:in_memory]
17
- @bloom_filter = options[:bloom_filter]
18
- @compression = options[:compression]
10
+ @name = name.to_s
11
+ @versions = options[:versions]
12
+ @in_memory = options[:in_memory]
13
+ @bloom_filter = options[:bloom_filter]
14
+ @compression = options[:compression]
15
+ end
19
16
  end
20
17
 
21
18
  end
22
-
23
19
  end
20
+
@@ -1,12 +1,14 @@
1
- module BigRecordDriver
2
- class BigDBError < StandardError
3
- end
4
- class TableNotFound < BigDBError
5
- end
6
- class TableAlreadyExists < BigDBError
7
- end
8
- class JavaError < BigDBError
9
- end
10
- class ConnectionError < BigDBError
1
+ module BigRecord
2
+ module Driver
3
+ class DriverError < StandardError
4
+ end
5
+ class TableNotFound < DriverError
6
+ end
7
+ class TableAlreadyExists < DriverError
8
+ end
9
+ class JavaError < DriverError
10
+ end
11
+ class ConnectionError < DriverError
12
+ end
11
13
  end
12
14
  end
@@ -1,396 +1,427 @@
1
1
  require File.dirname(__FILE__) + '/../column_descriptor'
2
2
  require File.dirname(__FILE__) + '/../exceptions'
3
- require File.dirname(__FILE__) + '/../bigrecord_server'
3
+ require File.dirname(__FILE__) + '/../server'
4
4
 
5
- module BigRecordDriver
5
+ module BigRecord
6
+ module Driver
6
7
 
7
- class HbaseServer < BigRecordServer
8
- include_class "java.util.TreeMap"
8
+ class HbaseServer < Server
9
+ java_import "java.util.TreeMap"
10
+ include_package "org.apache.hadoop.hbase.client"
11
+ java_import "org.apache.hadoop.hbase.KeyValue"
12
+ java_import "org.apache.hadoop.hbase.io.hfile.Compression"
13
+ java_import "org.apache.hadoop.hbase.HBaseConfiguration"
14
+ java_import "org.apache.hadoop.hbase.HTableDescriptor"
15
+ java_import "org.apache.hadoop.hbase.HColumnDescriptor"
9
16
 
10
- include_class "org.apache.hadoop.hbase.client.HTable"
11
- include_class "org.apache.hadoop.hbase.client.HBaseAdmin"
12
- include_class "org.apache.hadoop.hbase.io.BatchUpdate"
13
- include_class "org.apache.hadoop.hbase.io.hfile.Compression"
14
- include_class "org.apache.hadoop.hbase.HBaseConfiguration"
15
- include_class "org.apache.hadoop.hbase.HConstants"
16
- include_class "org.apache.hadoop.hbase.HStoreKey"
17
- include_class "org.apache.hadoop.hbase.HTableDescriptor"
18
- include_class "org.apache.hadoop.hbase.HColumnDescriptor"
17
+ # Establish the connection with HBase with the given configuration parameters.
18
+ def configure(config = {})
19
+ config[:zookeeper_quorum] ||= 'localhost'
20
+ config[:zookeeper_client_port] ||= '2181'
19
21
 
20
- include_class "org.apache.hadoop.io.Writable"
22
+ @config = config
21
23
 
22
- # Establish the connection with HBase with the given configuration parameters.
23
- def configure(config = {})
24
- config[:zookeeper_quorum] ||= 'localhost'
25
- config[:zookeeper_client_port] ||= '2181'
24
+ init_connection
25
+ end
26
26
 
27
- @config = config
27
+ # Atomic row insertion/update. Example:
28
+ # update('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', {'attribute:name' => "--- Oahu\n",
29
+ # 'attribute:travel_rank' => "--- 0.90124565\n"})
30
+ # => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
31
+ def update(table_name, row, values, timestamp=nil)
32
+ safe_exec do
33
+ return nil unless row
28
34
 
29
- init_connection
30
- end
35
+ table = connect_table(table_name)
36
+ row_lock = table.lockRow(row.to_bytes)
31
37
 
32
- # Atomic row insertion/update. Example:
33
- # update('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', {'attribute:name' => "--- Oahu\n",
34
- # 'attribute:travel_rank' => "--- 0.90124565\n"})
35
- # => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
36
- def update(table_name, row, values, timestamp=nil)
37
- safe_exec do
38
- return nil unless row
39
- table = connect_table(table_name)
38
+ put = generate_put(row, values, timestamp, row_lock)
39
+ table.put(put)
40
40
 
41
- batch = timestamp ? BatchUpdate.new(row, timestamp) : BatchUpdate.new(row)
41
+ table.unlockRow(row_lock)
42
42
 
43
- values.each do |column, value|
44
- batch.put(column, value.to_bytes)
43
+ row
44
+ end
45
45
  end
46
46
 
47
- table.commit(batch)
48
- row
49
- end
50
- end
47
+ # Returns a column of a row. Example:
48
+ # get('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 'attribute:travel_rank')
49
+ # => "--- 0.90124565\n"
50
+ #
51
+ # valid options:
52
+ # :timestamp => integer corresponding to the time when the record was saved in hbase
53
+ # :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
54
+ def get(table_name, row, column, options={})
55
+ safe_exec do
56
+ return nil unless row
57
+
58
+ table = connect_table(table_name)
59
+
60
+ # Grab the version number if the client's using the old API,
61
+ # or retrieve only the lastest version by default
62
+ options[:versions] ||= options[:num_versions]
63
+ options[:versions] ||= 1
64
+
65
+ # validate the arguments
66
+ raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
67
+
68
+ get = generate_get(row, column, options)
69
+ result = table.get(get)
70
+
71
+ if (result.nil? || result.isEmpty)
72
+ return (options[:versions] == 1 ? nil : [])
73
+ else
74
+ output = result.list.collect do |keyvalue|
75
+ to_ruby_string(keyvalue.getValue)
76
+ end
51
77
 
52
- # Returns a column of a row. Example:
53
- # get('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 'attribute:travel_rank')
54
- # => "--- 0.90124565\n"
55
- #
56
- # valid options:
57
- # :timestamp => integer corresponding to the time when the record was saved in hbase
58
- # :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
59
- def get(table_name, row, column, options={})
60
- safe_exec do
61
- return nil unless row
62
- table = connect_table(table_name)
63
-
64
- # Retreive only the last version by default
65
- options[:versions] ||= options[:num_versions]
66
- options[:versions] ||= 1
67
-
68
- # validate the arguments
69
- raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
70
-
71
- # get the raw data from hbase
72
- unless options[:timestamp]
73
- if options[:versions] == 1
74
- raw_data = table.get(row, column)
75
- else
76
- raw_data = table.get(row,
77
- column,
78
- options[:versions])
78
+ return (options[:versions] == 1 ? output[0] : output)
79
+ end
79
80
  end
80
- else
81
- raw_data = table.get(row,
82
- column,
83
- options[:timestamp],
84
- options[:versions])
85
81
  end
86
82
 
87
- # Return either a single value or an array, depending on the number of version that have been requested
88
- if options[:versions] == 1
89
- return nil unless raw_data
90
- raw_data = raw_data[0] if options[:timestamp]
91
- to_ruby_string(raw_data)
92
- else
93
- return [] unless raw_data
94
- raw_data.collect do |raw_data_version|
95
- to_ruby_string(raw_data_version)
83
+ # Returns the last version of the given columns of the given row. The columns works with
84
+ # regular expressions (e.g. 'attribute:' matches all attributes columns). Example:
85
+ # get_columns('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', ['attribute:'])
86
+ # => {"attribute:name" => "--- Oahu\n", "attribute:travel_rank" => "--- 0.90124565\n", etc...}
87
+ def get_columns(table_name, row, columns, options={})
88
+ safe_exec do
89
+ return nil unless row
90
+
91
+ table_name = table_name.to_s
92
+ table = connect_table(table_name)
93
+
94
+ get = generate_get(row, columns, options)
95
+ result = table.get(get)
96
+
97
+ begin
98
+ parse_result(result)
99
+ rescue
100
+ nil
101
+ end
96
102
  end
97
103
  end
98
- end
99
- end
100
104
 
101
- # Returns the last version of the given columns of the given row. The columns works with
102
- # regular expressions (e.g. 'attribute:' matches all attributes columns). Example:
103
- # get_columns('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', ['attribute:'])
104
- # => {"attribute:name" => "--- Oahu\n", "attribute:travel_rank" => "--- 0.90124565\n", etc...}
105
- def get_columns(table_name, row, columns, options={})
106
- safe_exec do
107
- return nil unless row
108
- table_name = table_name.to_s
109
- table = connect_table(table_name)
110
-
111
- java_cols = Java::String[columns.size].new
112
- columns.each_with_index do |col, i|
113
- java_cols[i] = Java::String.new(col)
114
- end
105
+ # Get consecutive rows. Example to get 100 records starting with the one specified and get all the
106
+ # columns in the column family 'attribute:' :
107
+ # get_consecutive_rows('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 100, ['attribute:'])
108
+ def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
109
+ safe_exec do
110
+ table_name = table_name.to_s
111
+ table = connect_table(table_name)
115
112
 
116
- result =
117
- if options[:timestamp]
118
- table.getRow(row, java_cols, options[:timestamp])
119
- else
120
- table.getRow(row, java_cols)
121
- end
113
+ scan = Scan.new
114
+ scan.setStartRow(start_row.to_bytes) if start_row
115
+ scan.setStopRow(stop_row.to_bytes) if stop_row
116
+
117
+ columns.each do |column|
118
+ (column[-1,1] == ":") ?
119
+ scan.addFamily(column.gsub(":", "").to_bytes) :
120
+ scan.addColumn(column.to_bytes)
121
+ end
122
+
123
+ scanner = table.getScanner(scan)
122
124
 
123
- unless !result or result.isEmpty
124
- values = {}
125
- result.entrySet.each do |entry|
126
- column_name = Java::String.new(entry.getKey).to_s
127
- values[column_name] = to_ruby_string(entry.getValue)
125
+ if limit
126
+ results = scanner.next(limit)
127
+ else
128
+ results = []
129
+ while (row_result = scanner.next) != nil
130
+ results << row_result
131
+ end
132
+ end
133
+
134
+ output = []
135
+ results.each do |result|
136
+ output << parse_result(result)
137
+ end
138
+ scanner.close
139
+
140
+ return output
128
141
  end
129
- values["id"] = row
130
- values
131
- else
132
- nil
133
142
  end
134
- end
135
- end
136
143
 
137
- # Get consecutive rows. Example to get 100 records starting with the one specified and get all the
138
- # columns in the column family 'attribute:' :
139
- # get_consecutive_rows('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 100, ['attribute:'])
140
- def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
141
- safe_exec do
142
- table_name = table_name.to_s
143
- table = connect_table(table_name)
144
-
145
- java_cols = Java::String[columns.size].new
146
- columns.each_with_index do |col, i|
147
- java_cols[i] = Java::String.new(col)
144
+ # Delete a whole row.
145
+ def delete(table_name, row, timestamp = nil)
146
+ safe_exec do
147
+ table = connect_table(table_name)
148
+
149
+ if timestamp
150
+ row_lock = table.lockRow(row.to_bytes)
151
+ table.delete(Delete.new(row.to_bytes, timestamp, row_lock))
152
+ table.unlockRow(row_lock)
153
+ else
154
+ table.delete(Delete.new(row.to_bytes))
155
+ end
156
+ end
148
157
  end
149
158
 
150
- start_row ||= ""
151
- start_row = start_row.to_s
159
+ # Create a table
160
+ def create_table(table_name, column_descriptors)
161
+ safe_exec do
162
+ table_name = table_name.to_s
163
+ unless table_exists?(table_name)
164
+ tdesc = HTableDescriptor.new(table_name)
152
165
 
153
- # We cannot set stop_row like start_row because a
154
- # default stop row would have to be the biggest value possible
155
- if stop_row
156
- scanner = table.getScanner(java_cols, start_row, stop_row, HConstants::LATEST_TIMESTAMP)
157
- else
158
- scanner = table.getScanner(java_cols, start_row)
159
- end
166
+ column_descriptors.each do |cd|
167
+ cdesc = generate_column_descriptor(cd)
160
168
 
161
- row_count = 0 if limit
162
- result = []
163
- while (row_result = scanner.next) != nil
164
- if limit
165
- break if row_count == limit
166
- row_count += 1
167
- end
168
- values = {}
169
- row_result.entrySet.each do |entry|
170
- column_name = Java::String.new(entry.getKey).to_s
171
- data = to_ruby_string(entry.getValue)
172
- values[column_name] = data
169
+ tdesc.addFamily(cdesc)
170
+ end
171
+ @admin.createTable(tdesc)
172
+ else
173
+ raise TableAlreadyExists, table_name
174
+ end
173
175
  end
174
- unless values.empty?
175
- # TODO: is this really supposed to be hard coded?
176
- values['id'] = Java::String.new(row_result.getRow).to_s
177
- result << values
176
+ end
177
+
178
+ # Delete a table
179
+ def drop_table(table_name)
180
+ safe_exec do
181
+ table_name = table_name.to_s
182
+
183
+ if @admin.tableExists(table_name)
184
+ @admin.disableTable(table_name)
185
+ @admin.deleteTable(table_name)
186
+
187
+ # Remove the table connection from the cache
188
+ @tables.delete(table_name) if @tables.has_key?(table_name)
189
+ else
190
+ raise TableNotFound, table_name
191
+ end
178
192
  end
179
193
  end
180
- scanner.close
181
- result
182
- end
183
- end
184
194
 
185
- # Delete a whole row.
186
- def delete(table_name, row, timestamp = nil)
187
- safe_exec do
188
- table = connect_table(table_name)
189
- timestamp ? table.deleteAll(row.to_bytes, timestamp) : table.deleteAll(row.to_bytes)
190
- end
191
- end
195
+ def add_column(table_name, column_descriptor)
196
+ safe_exec do
197
+ table_name = table_name.to_s
192
198
 
193
- # Create a table
194
- def create_table(table_name, column_descriptors)
195
- safe_exec do
196
- table_name = table_name.to_s
197
- unless table_exists?(table_name)
198
- tdesc = HTableDescriptor.new(table_name)
199
+ if @admin.tableExists(table_name)
200
+ @admin.disableTable(table_name)
199
201
 
200
- column_descriptors.each do |cd|
201
- cdesc = generate_column_descriptor(cd)
202
+ cdesc = generate_column_descriptor(column_descriptor)
203
+ @admin.addColumn(table_name, cdesc)
202
204
 
203
- tdesc.addFamily(cdesc)
205
+ @admin.enableTable(table_name)
206
+ else
207
+ raise TableNotFound, table_name
208
+ end
204
209
  end
205
- @admin.createTable(tdesc)
206
- else
207
- raise BigRecordDriver::TableAlreadyExists, table_name
208
210
  end
209
- end
210
- end
211
211
 
212
- # Delete a table
213
- def drop_table(table_name)
214
- safe_exec do
215
- table_name = table_name.to_s
212
+ def remove_column(table_name, column_name)
213
+ safe_exec do
214
+ table_name = table_name.to_s
215
+ column_name = column_name.to_s
216
+
217
+ if @admin.tableExists(table_name)
218
+ @admin.disableTable(table_name)
216
219
 
217
- if @admin.tableExists(table_name)
218
- @admin.disableTable(table_name)
219
- @admin.deleteTable(table_name)
220
+ column_name << ":" unless column_name =~ /:$/
221
+ @admin.deleteColumn(table_name, column_name)
220
222
 
221
- # Remove the table connection from the cache
222
- @tables.delete(table_name) if @tables.has_key?(table_name)
223
- else
224
- raise BigRecordDriver::TableNotFound, table_name
223
+ @admin.enableTable(table_name)
224
+ else
225
+ raise TableNotFound, table_name
226
+ end
227
+ end
225
228
  end
226
- end
227
- end
228
229
 
229
- def add_column(table_name, column_descriptor)
230
- safe_exec do
231
- table_name = table_name.to_s
230
+ def modify_column(table_name, column_descriptor)
231
+ safe_exec do
232
+ table_name = table_name.to_s
232
233
 
233
- if @admin.tableExists(table_name)
234
- @admin.disableTable(table_name)
234
+ if @admin.tableExists(table_name)
235
+ @admin.disableTable(table_name)
235
236
 
236
- cdesc = generate_column_descriptor(column_descriptor)
237
- @admin.addColumn(table_name, cdesc)
237
+ cdesc = generate_column_descriptor(column_descriptor)
238
+ @admin.modifyColumn(table_name, column_descriptor.name, cdesc)
238
239
 
239
- @admin.enableTable(table_name)
240
- else
241
- raise BigRecordDriver::TableNotFound, table_name
240
+ @admin.enableTable(table_name)
241
+ else
242
+ raise TableNotFound, table_name
243
+ end
244
+ end
242
245
  end
243
- end
244
- end
245
246
 
246
- def remove_column(table_name, column_name)
247
- safe_exec do
248
- table_name = table_name.to_s
249
- column_name = column_name.to_s
247
+ def truncate_table(table_name)
248
+ safe_exec do
249
+ table_name = table_name.to_s
250
+ table = connect_table(table_name)
251
+ tableDescriptor = table.getTableDescriptor
252
+ drop_table(table_name)
253
+ @admin.createTable(tableDescriptor)
254
+ end
255
+ end
250
256
 
251
- if @admin.tableExists(table_name)
252
- @admin.disableTable(table_name)
257
+ def ping
258
+ safe_exec do
259
+ @admin.isMasterRunning
260
+ end
261
+ end
253
262
 
254
- column_name << ":" unless column_name =~ /:$/
255
- @admin.deleteColumn(table_name, column_name)
263
+ def table_exists?(table_name)
264
+ safe_exec do
265
+ @admin.tableExists(table_name.to_s)
266
+ end
267
+ end
256
268
 
257
- @admin.enableTable(table_name)
258
- else
259
- raise BigRecordDriver::TableNotFound, table_name
269
+ def table_names
270
+ safe_exec do
271
+ @admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
272
+ end
260
273
  end
261
- end
262
- end
263
274
 
264
- def modify_column(table_name, column_descriptor)
265
- safe_exec do
266
- table_name = table_name.to_s
267
- column_name = column_name.to_s
275
+ private
268
276
 
269
- if @admin.tableExists(table_name)
270
- @admin.disableTable(table_name)
277
+ def init_connection
278
+ safe_exec do
279
+ @conf = HBaseConfiguration.new
280
+ @conf.set('hbase.zookeeper.quorum', "#{@config[:zookeeper_quorum]}")
281
+ @conf.set('hbase.zookeeper.property.clientPort', "#{@config[:zookeeper_client_port]}")
282
+ @admin = HBaseAdmin.new(@conf)
283
+ @tables = {}
284
+ end
285
+ end
271
286
 
272
- cdesc = generate_column_descriptor(column_descriptor)
273
- @admin.modifyColumn(table_name, column_descriptor.name, cdesc)
287
+ # Create a connection to an HBase table and keep it in memory.
288
+ def connect_table(table_name)
289
+ safe_exec do
290
+ table_name = table_name.to_s
291
+ return @tables[table_name] if @tables.has_key?(table_name)
274
292
 
275
- @admin.enableTable(table_name)
276
- else
277
- raise BigRecordDriver::TableNotFound, table_name
293
+ if table_exists?(table_name)
294
+ @tables[table_name] = HTable.new(@conf, table_name)
295
+ else
296
+ if table_name and !table_name.empty?
297
+ raise TableNotFound, table_name
298
+ else
299
+ raise ArgumentError, "Table name not specified"
300
+ end
301
+ end
302
+ @tables[table_name]
303
+ end
278
304
  end
279
- end
280
- end
281
305
 
282
- def truncate_table(table_name)
283
- safe_exec do
284
- table_name = table_name.to_s
285
- table = connect_table(table_name)
286
- tableDescriptor = table.getTableDescriptor
287
- drop_table(table_name)
288
- @admin.createTable(tableDescriptor)
289
- end
290
- end
306
+ # Create a Get object given parameters.
307
+ #
308
+ # @param [String] row
309
+ # @param [Array, String] A single (or collection) of strings
310
+ # fully qualified column name or column family (ends with ':').
311
+ # @param [Hash] options
312
+ #
313
+ # @return [Get] org.apache.hadoop.hbase.client.Get object
314
+ # corresponding to the arguments passed.
315
+ def generate_get(row, columns, options = {})
316
+ columns = [columns].flatten
317
+
318
+ get = Get.new(row.to_bytes)
319
+
320
+ columns.each do |column|
321
+ # If the column name ends with ':' then it's a column family.
322
+ (column[-1,1] == ":") ?
323
+ get.addFamily(column.gsub(":", "").to_bytes) :
324
+ get.addColumn(column.to_bytes)
325
+ end
291
326
 
292
- def ping
293
- safe_exec do
294
- @admin.isMasterRunning
295
- end
296
- end
327
+ get.setMaxVersions(options[:versions]) if options[:versions]
297
328
 
298
- def table_exists?(table_name)
299
- safe_exec do
300
- @admin.tableExists(table_name.to_s)
301
- end
302
- end
329
+ # Need to add 1 to the timestamp due to the the API sillyness, i.e. min timestamp
330
+ # is inclusive while max timestamp is exclusive.
331
+ get.setTimeRange(java.lang.Long::MIN_VALUE, options[:timestamp]+1) if options[:timestamp]
303
332
 
304
- def table_names
305
- safe_exec do
306
- @admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
307
- end
308
- end
333
+ return get
334
+ end
309
335
 
310
- # def const_missing(const)
311
- # super
312
- # rescue NameError => ex
313
- # raise NameError, "uninitialized constant #{const}"
314
- # end
315
-
316
- private
317
- # Create a connection to a Hbase table and keep it in memory.
318
- def connect_table(table_name)
319
- safe_exec do
320
- table_name = table_name.to_s
321
- return @tables[table_name] if @tables.has_key?(table_name)
322
-
323
- if table_exists?(table_name)
324
- @tables[table_name] = HTable.new(@conf, table_name)
325
- else
326
- if table_name and !table_name.empty?
327
- raise BigRecordDriver::TableNotFound, table_name
328
- else
329
- raise ArgumentError, "Table name not specified"
336
+ # Create a Put object given parameters.
337
+ #
338
+ # @param [String] row
339
+ # @param [Hash] Keys as the fully qualified column names and
340
+ # their associated values.
341
+ # @param [Integer] timestamp
342
+ # @param [org.apache.hadoop.hbase.client.RowLock] row_lock
343
+ #
344
+ # @return [Put] org.apache.hadoop.hbase.client.Put object
345
+ # corresponding to the arguments passed.
346
+ def generate_put(row, columns = {}, timestamp = nil, row_lock = nil)
347
+ put = row_lock ? Put.new(row.to_bytes, row_lock) : Put.new(row.to_bytes)
348
+
349
+ columns.each do |name, value|
350
+ family, qualifier = name.split(":")
351
+ timestamp ?
352
+ put.add(family.to_bytes, qualifier.to_bytes, timestamp, value.to_bytes) :
353
+ put.add(family.to_bytes, qualifier.to_bytes, value.to_bytes)
330
354
  end
355
+
356
+ return put
331
357
  end
332
- @tables[table_name]
333
- end
334
- end
335
358
 
336
- def init_connection
337
- safe_exec do
338
- @conf = HBaseConfiguration.new
339
- @conf.set('hbase.zookeeper.quorum', "#{@config[:zookeeper_quorum]}")
340
- @conf.set('hbase.zookeeper.property.clientPort', "#{@config[:zookeeper_client_port]}")
341
- @admin = HBaseAdmin.new(@conf)
342
- @tables = {}
343
- end
344
- end
359
+ # Parse a Result object into a Hash.
360
+ #
361
+ # @param [Result] result
362
+ #
363
+ # @return [Hash] Fully qualified column names as keys
364
+ # and their corresponding values.
365
+ def parse_result(result)
366
+ output = {}
367
+
368
+ result.list.each do |keyvalue|
369
+ output[to_ruby_string(keyvalue.getColumn)] = to_ruby_string(keyvalue.getValue)
370
+ end
345
371
 
346
- def generate_column_descriptor(column_descriptor)
347
- raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
348
- raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
372
+ output["id"] = to_ruby_string(result.getRow)
349
373
 
350
- if column_descriptor.compression
351
- compression =
352
- case column_descriptor.compression.to_s
353
- when 'none'; Compression::Algorithm::NONE.getName()
354
- when 'gz'; Compression::Algorithm::GZ.getName()
355
- when 'lzo'; Compression::Algorithm::LZO.getName()
356
- else
357
- raise ArgumentError, "Invalid compression type: #{column_descriptor.compression} for the column_family #{column_descriptor.name}"
374
+ return output
375
+ end
376
+
377
+ def generate_column_descriptor(column_descriptor)
378
+ raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
379
+ raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
380
+
381
+ if column_descriptor.compression
382
+ compression =
383
+ case column_descriptor.compression.to_s
384
+ when 'none'; Compression::Algorithm::NONE.getName()
385
+ when 'gz'; Compression::Algorithm::GZ.getName()
386
+ when 'lzo'; Compression::Algorithm::LZO.getName()
387
+ else
388
+ raise ArgumentError, "Invalid compression type: #{column_descriptor.compression} for the column_family #{column_descriptor.name}"
389
+ end
358
390
  end
359
- end
360
391
 
361
- n_versions = column_descriptor.versions
362
- in_memory = column_descriptor.in_memory
363
-
364
- # set the default values of the missing parameters
365
- n_versions ||= HColumnDescriptor::DEFAULT_VERSIONS
366
- compression ||= HColumnDescriptor::DEFAULT_COMPRESSION
367
- in_memory ||= HColumnDescriptor::DEFAULT_IN_MEMORY
368
- block_cache ||= HColumnDescriptor::DEFAULT_BLOCKCACHE
369
- block_size ||= HColumnDescriptor::DEFAULT_BLOCKSIZE
370
- bloomfilter ||= HColumnDescriptor::DEFAULT_BLOOMFILTER
371
- ttl ||= HColumnDescriptor::DEFAULT_TTL
372
-
373
- # add the ':' at the end if the user didn't specify it
374
- column_descriptor.name << ":" unless column_descriptor.name =~ /:$/
375
-
376
- cdesc = HColumnDescriptor.new(column_descriptor.name.to_bytes,
377
- n_versions,
378
- compression,
379
- in_memory,
380
- block_cache,
381
- block_size,
382
- ttl,
383
- bloomfilter)
384
-
385
- return cdesc
386
- end
392
+ n_versions = column_descriptor.versions
393
+ in_memory = column_descriptor.in_memory
394
+
395
+ # set the default values of the missing parameters
396
+ n_versions ||= HColumnDescriptor::DEFAULT_VERSIONS
397
+ compression ||= HColumnDescriptor::DEFAULT_COMPRESSION
398
+ in_memory ||= HColumnDescriptor::DEFAULT_IN_MEMORY
399
+ block_cache ||= HColumnDescriptor::DEFAULT_BLOCKCACHE
400
+ block_size ||= HColumnDescriptor::DEFAULT_BLOCKSIZE
401
+ bloomfilter ||= HColumnDescriptor::DEFAULT_BLOOMFILTER
402
+ ttl ||= HColumnDescriptor::DEFAULT_TTL
403
+
404
+ # add the ':' at the end if the user didn't specify it
405
+ column_descriptor.name << ":" unless column_descriptor.name =~ /:$/
406
+
407
+ cdesc = HColumnDescriptor.new(column_descriptor.name.to_bytes,
408
+ n_versions,
409
+ compression,
410
+ in_memory,
411
+ block_cache,
412
+ block_size,
413
+ ttl,
414
+ bloomfilter)
415
+
416
+ return cdesc
417
+ end
387
418
 
388
- end
419
+ end
389
420
 
421
+ end
390
422
  end
391
423
 
392
- port = ARGV[0]
393
- port ||= 40000
394
- DRb.start_service("druby://:#{port}", BigRecordDriver::HbaseServer.new)
424
+ port = ARGV[0] || 40000
425
+ DRb.start_service("druby://:#{port}", BigRecord::Driver::HbaseServer.new)
395
426
  puts "Started drb server on port #{port}."
396
427
  DRb.thread.join