bigrecord-driver 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.0.7
data/bin/bigrecord-driver CHANGED
@@ -62,7 +62,7 @@ unset RUBYLIB
62
62
  if [ "$DRIVERNAME" == "" ]; then
63
63
  echo "Driver type was not specified"
64
64
  exit 1
65
- elif [ "$DRIVERNAME" != "hbase" -a "$DRIVERNAME" != "cassandra" ]; then
65
+ elif [ "$DRIVERNAME" != "hbase" ]; then
66
66
  echo "Driver type \"$DRIVERNAME\" is invalid"
67
67
  exit 1
68
68
  fi
@@ -126,30 +126,30 @@ start() {
126
126
  refresh_pids
127
127
 
128
128
  if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
129
- echo -e "\nAlready running (pid="$PID")."
130
- exit 1
129
+ echo -e "\nAlready running (pid="$PID")."
130
+ exit 1
131
131
  else
132
132
  rm -f $LOGS_DIR/$PORT.log
133
133
  nohup jruby $DRIVER $PORT >> $LOGS_DIR/$PORT.log 2>&1 < /dev/null &
134
134
  PID=$!
135
135
  if [ "$PID" != "" ] ; then
136
- # monitor the log file for the message saying that the server is started
137
- for ((i=0; i<$STARTUP_TIMEOUT; i+=1)); do
138
- sleep 1
139
- echo -n "."
140
- if [ "$(cat $LOGS_DIR/$PORT.log | grep 'Started drb server')" != "" ] ; then
141
- break
142
- fi
143
- done
144
-
145
- if [ "$i" == $STARTUP_TIMEOUT ] ; then
146
- echo -e "\nStartup timeout: couldn't start the DRb server."
147
- else
148
- echo $PID > $PIDS_DIR/$PORT.pid
149
- fi
150
- echo ""
136
+ # monitor the log file for the message saying that the server is started
137
+ for ((i=0; i<$STARTUP_TIMEOUT; i+=1)); do
138
+ sleep 1
139
+ echo -n "."
140
+ if [ "$(cat $LOGS_DIR/$PORT.log | grep 'Started drb server')" != "" ] ; then
141
+ break
142
+ fi
143
+ done
144
+
145
+ if [ "$i" == $STARTUP_TIMEOUT ] ; then
146
+ echo -e "\nStartup timeout: couldn't start the DRb server."
147
+ else
148
+ echo $PID > $PIDS_DIR/$PORT.pid
149
+ fi
150
+ echo ""
151
151
  else
152
- echo -e "\nAn error occured while starting the DRb server."
152
+ echo -e "\nAn error occured while starting the DRb server."
153
153
  fi
154
154
  fi
155
155
  }
@@ -161,8 +161,8 @@ stop() {
161
161
 
162
162
  if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
163
163
  echo "Stopping driver (pid = $PID)." >> $LOGS_DIR/$PORT.log
164
- kill $PID
165
- rm $PIDS_DIR/$PORT.pid
164
+ kill $PID
165
+ rm $PIDS_DIR/$PORT.pid
166
166
  else
167
167
  echo "No $DRIVERNAME driver to kill."
168
168
  fi
@@ -183,24 +183,25 @@ start_debug() {
183
183
  }
184
184
 
185
185
  case "$ACTION" in
186
- start)
187
- start
188
- ;;
189
- stop)
190
- stop
191
- ;;
192
- restart)
193
- stop
194
- start
195
- ;;
196
- status)
197
- status
198
- ;;
199
- start_debug)
200
- start_debug
201
- ;;
202
- *)
203
- print_usage
204
- exit 1
186
+ start)
187
+ start
188
+ ;;
189
+ stop)
190
+ stop
191
+ ;;
192
+ restart)
193
+ stop
194
+ start
195
+ ;;
196
+ status)
197
+ status
198
+ ;;
199
+ start_debug)
200
+ start_debug
201
+ ;;
202
+ *)
203
+ print_usage
204
+ exit 1
205
205
  esac
206
+
206
207
  exit 0
data/bin/hbase-driver CHANGED
@@ -11,22 +11,22 @@ if ARGV.include?("-l")
11
11
 
12
12
  # Make sure that the folder exists
13
13
  if File.exists?(hbase_path)
14
- # We're only going to include the jar files we need.
15
- required_jars = Dir[hbase_path+"/*.jar",
16
- hbase_path+"/lib/commons-logging*.jar",
17
- hbase_path+"/lib/zookeeper*.jar",
18
- hbase_path+"/lib/log4j*.jar",
19
- hbase_path+"/lib/hadoop*.jar"]
20
- classpath = required_jars.join(":")
14
+ # We're only going to include the jar files we need.
15
+ required_jars = Dir[hbase_path+"/*.jar",
16
+ hbase_path+"/lib/commons-logging*.jar",
17
+ hbase_path+"/lib/zookeeper*.jar",
18
+ hbase_path+"/lib/log4j*.jar",
19
+ hbase_path+"/lib/hadoop*.jar"]
20
+ classpath = required_jars.join(":")
21
21
 
22
- args.delete_at(switch_index)
23
- args.delete_at(switch_index)
24
- args = args + ["-c", '"'+classpath+'"']
22
+ args.delete_at(switch_index)
23
+ args.delete_at(switch_index)
24
+ args = args + ["-c", '"'+classpath+'"']
25
25
 
26
26
  # Otherwise we'll warn the user and quit
27
27
  else
28
- puts "Folder #{hbase_path} does not exist"
29
- exit
28
+ puts "Folder #{hbase_path} does not exist"
29
+ exit
30
30
  end
31
31
  end
32
32
 
@@ -1,7 +1,14 @@
1
- LIB_ROOT = File.dirname(__FILE__)
1
+ module BigRecord
2
+ module Driver
3
+ end
4
+ end
2
5
 
3
- require LIB_ROOT + '/big_record_driver/client'
4
- require LIB_ROOT + '/big_record_driver/exceptions'
5
- require LIB_ROOT + '/big_record_driver/column_descriptor'
6
- require LIB_ROOT + '/big_record_driver/driver_manager'
7
- require LIB_ROOT + '/big_record_driver/version'
6
+ BRD_ROOT = File.dirname(__FILE__)
7
+
8
+ require BRD_ROOT + '/big_record_driver/client'
9
+ require BRD_ROOT + '/big_record_driver/exceptions'
10
+ require BRD_ROOT + '/big_record_driver/column_descriptor'
11
+ require BRD_ROOT + '/big_record_driver/version'
12
+
13
+ # Aliasing the old namespace
14
+ BigRecordDriver = BigRecord::Driver
@@ -1,36 +1,39 @@
1
- require 'rubygems'
2
- require 'activesupport'
1
+ require 'active_support'
3
2
  require 'set'
4
3
  require 'drb'
5
4
 
6
- module BigRecordDriver
7
- class Client
8
-
9
- def initialize(config={}) # :nodoc:
10
- config = config.symbolize_keys
11
- config[:drb_host] ||= '127.0.0.1'
12
- config[:drb_port] ||= 40000
13
-
14
- @config = config
15
-
16
- DRb.start_service('druby://127.0.0.1:0')
17
- begin
18
- @server = DRbObject.new(nil, "druby://#{@config[:drb_host]}:#{@config[:drb_port]}")
19
- rescue DRb::DRbConnError
20
- raise ConnectionError, "Failed to connect to the DRb server (jruby) " +
21
- "at #{@config[:drb_host]}:#{@config[:drb_port]}."
5
+ module BigRecord
6
+ module Driver
7
+
8
+ class Client
9
+ attr_accessor :config, :server
10
+
11
+ def initialize(config={}) # :nodoc:
12
+ config = config.symbolize_keys
13
+ config[:drb_host] ||= '127.0.0.1'
14
+ config[:drb_port] ||= 40000
15
+
16
+ @config = config
17
+
18
+ DRb.start_service nil
19
+ begin
20
+ @server = DRbObject.new(nil, "druby://#{@config[:drb_host]}:#{@config[:drb_port]}")
21
+ rescue DRb::DRbConnError
22
+ raise ConnectionError, "Failed to connect to the DRb server (jruby) " +
23
+ "at #{@config[:drb_host]}:#{@config[:drb_port]}."
24
+ end
25
+ @server.configure(@config)
26
+ end
27
+
28
+ # Delegate the methods to the server
29
+ def method_missing(method, *args)
30
+ @server.send(method, *args)
31
+ end
32
+
33
+ def respond_to?(method)
34
+ super
22
35
  end
23
- @server.configure(@config)
24
- end
25
-
26
- # Delegate the methods to the server
27
- def method_missing(method, *args)
28
- @server.send(method, *args)
29
- end
30
-
31
- def respond_to?(method)
32
- super
33
36
  end
34
-
37
+
35
38
  end
36
39
  end
@@ -1,23 +1,20 @@
1
- module BigRecordDriver
1
+ module BigRecord
2
+ module Driver
2
3
 
3
- class ColumnDescriptor
4
+ class ColumnDescriptor
5
+ attr_accessor :name, :versions, :in_memory, :bloom_filter, :compression
4
6
 
5
- attr_accessor :name
6
- attr_accessor :versions
7
- attr_accessor :in_memory
8
- attr_accessor :bloom_filter
9
- attr_accessor :compression
7
+ def initialize(name, options={})
8
+ raise ArgumentError, "name is mandatory" unless name
10
9
 
11
- def initialize(name, options={})
12
- raise ArgumentError, "name is mandatory" unless name
13
-
14
- @name = name.to_s
15
- @versions = options[:versions]
16
- @in_memory = options[:in_memory]
17
- @bloom_filter = options[:bloom_filter]
18
- @compression = options[:compression]
10
+ @name = name.to_s
11
+ @versions = options[:versions]
12
+ @in_memory = options[:in_memory]
13
+ @bloom_filter = options[:bloom_filter]
14
+ @compression = options[:compression]
15
+ end
19
16
  end
20
17
 
21
18
  end
22
-
23
19
  end
20
+
@@ -1,12 +1,14 @@
1
- module BigRecordDriver
2
- class BigDBError < StandardError
3
- end
4
- class TableNotFound < BigDBError
5
- end
6
- class TableAlreadyExists < BigDBError
7
- end
8
- class JavaError < BigDBError
9
- end
10
- class ConnectionError < BigDBError
1
+ module BigRecord
2
+ module Driver
3
+ class DriverError < StandardError
4
+ end
5
+ class TableNotFound < DriverError
6
+ end
7
+ class TableAlreadyExists < DriverError
8
+ end
9
+ class JavaError < DriverError
10
+ end
11
+ class ConnectionError < DriverError
12
+ end
11
13
  end
12
14
  end
@@ -1,396 +1,427 @@
1
1
  require File.dirname(__FILE__) + '/../column_descriptor'
2
2
  require File.dirname(__FILE__) + '/../exceptions'
3
- require File.dirname(__FILE__) + '/../bigrecord_server'
3
+ require File.dirname(__FILE__) + '/../server'
4
4
 
5
- module BigRecordDriver
5
+ module BigRecord
6
+ module Driver
6
7
 
7
- class HbaseServer < BigRecordServer
8
- include_class "java.util.TreeMap"
8
+ class HbaseServer < Server
9
+ java_import "java.util.TreeMap"
10
+ include_package "org.apache.hadoop.hbase.client"
11
+ java_import "org.apache.hadoop.hbase.KeyValue"
12
+ java_import "org.apache.hadoop.hbase.io.hfile.Compression"
13
+ java_import "org.apache.hadoop.hbase.HBaseConfiguration"
14
+ java_import "org.apache.hadoop.hbase.HTableDescriptor"
15
+ java_import "org.apache.hadoop.hbase.HColumnDescriptor"
9
16
 
10
- include_class "org.apache.hadoop.hbase.client.HTable"
11
- include_class "org.apache.hadoop.hbase.client.HBaseAdmin"
12
- include_class "org.apache.hadoop.hbase.io.BatchUpdate"
13
- include_class "org.apache.hadoop.hbase.io.hfile.Compression"
14
- include_class "org.apache.hadoop.hbase.HBaseConfiguration"
15
- include_class "org.apache.hadoop.hbase.HConstants"
16
- include_class "org.apache.hadoop.hbase.HStoreKey"
17
- include_class "org.apache.hadoop.hbase.HTableDescriptor"
18
- include_class "org.apache.hadoop.hbase.HColumnDescriptor"
17
+ # Establish the connection with HBase with the given configuration parameters.
18
+ def configure(config = {})
19
+ config[:zookeeper_quorum] ||= 'localhost'
20
+ config[:zookeeper_client_port] ||= '2181'
19
21
 
20
- include_class "org.apache.hadoop.io.Writable"
22
+ @config = config
21
23
 
22
- # Establish the connection with HBase with the given configuration parameters.
23
- def configure(config = {})
24
- config[:zookeeper_quorum] ||= 'localhost'
25
- config[:zookeeper_client_port] ||= '2181'
24
+ init_connection
25
+ end
26
26
 
27
- @config = config
27
+ # Atomic row insertion/update. Example:
28
+ # update('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', {'attribute:name' => "--- Oahu\n",
29
+ # 'attribute:travel_rank' => "--- 0.90124565\n"})
30
+ # => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
31
+ def update(table_name, row, values, timestamp=nil)
32
+ safe_exec do
33
+ return nil unless row
28
34
 
29
- init_connection
30
- end
35
+ table = connect_table(table_name)
36
+ row_lock = table.lockRow(row.to_bytes)
31
37
 
32
- # Atomic row insertion/update. Example:
33
- # update('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', {'attribute:name' => "--- Oahu\n",
34
- # 'attribute:travel_rank' => "--- 0.90124565\n"})
35
- # => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
36
- def update(table_name, row, values, timestamp=nil)
37
- safe_exec do
38
- return nil unless row
39
- table = connect_table(table_name)
38
+ put = generate_put(row, values, timestamp, row_lock)
39
+ table.put(put)
40
40
 
41
- batch = timestamp ? BatchUpdate.new(row, timestamp) : BatchUpdate.new(row)
41
+ table.unlockRow(row_lock)
42
42
 
43
- values.each do |column, value|
44
- batch.put(column, value.to_bytes)
43
+ row
44
+ end
45
45
  end
46
46
 
47
- table.commit(batch)
48
- row
49
- end
50
- end
47
+ # Returns a column of a row. Example:
48
+ # get('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 'attribute:travel_rank')
49
+ # => "--- 0.90124565\n"
50
+ #
51
+ # valid options:
52
+ # :timestamp => integer corresponding to the time when the record was saved in hbase
53
+ # :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
54
+ def get(table_name, row, column, options={})
55
+ safe_exec do
56
+ return nil unless row
57
+
58
+ table = connect_table(table_name)
59
+
60
+ # Grab the version number if the client's using the old API,
61
+ # or retrieve only the lastest version by default
62
+ options[:versions] ||= options[:num_versions]
63
+ options[:versions] ||= 1
64
+
65
+ # validate the arguments
66
+ raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
67
+
68
+ get = generate_get(row, column, options)
69
+ result = table.get(get)
70
+
71
+ if (result.nil? || result.isEmpty)
72
+ return (options[:versions] == 1 ? nil : [])
73
+ else
74
+ output = result.list.collect do |keyvalue|
75
+ to_ruby_string(keyvalue.getValue)
76
+ end
51
77
 
52
- # Returns a column of a row. Example:
53
- # get('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 'attribute:travel_rank')
54
- # => "--- 0.90124565\n"
55
- #
56
- # valid options:
57
- # :timestamp => integer corresponding to the time when the record was saved in hbase
58
- # :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
59
- def get(table_name, row, column, options={})
60
- safe_exec do
61
- return nil unless row
62
- table = connect_table(table_name)
63
-
64
- # Retreive only the last version by default
65
- options[:versions] ||= options[:num_versions]
66
- options[:versions] ||= 1
67
-
68
- # validate the arguments
69
- raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
70
-
71
- # get the raw data from hbase
72
- unless options[:timestamp]
73
- if options[:versions] == 1
74
- raw_data = table.get(row, column)
75
- else
76
- raw_data = table.get(row,
77
- column,
78
- options[:versions])
78
+ return (options[:versions] == 1 ? output[0] : output)
79
+ end
79
80
  end
80
- else
81
- raw_data = table.get(row,
82
- column,
83
- options[:timestamp],
84
- options[:versions])
85
81
  end
86
82
 
87
- # Return either a single value or an array, depending on the number of version that have been requested
88
- if options[:versions] == 1
89
- return nil unless raw_data
90
- raw_data = raw_data[0] if options[:timestamp]
91
- to_ruby_string(raw_data)
92
- else
93
- return [] unless raw_data
94
- raw_data.collect do |raw_data_version|
95
- to_ruby_string(raw_data_version)
83
+ # Returns the last version of the given columns of the given row. The columns works with
84
+ # regular expressions (e.g. 'attribute:' matches all attributes columns). Example:
85
+ # get_columns('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', ['attribute:'])
86
+ # => {"attribute:name" => "--- Oahu\n", "attribute:travel_rank" => "--- 0.90124565\n", etc...}
87
+ def get_columns(table_name, row, columns, options={})
88
+ safe_exec do
89
+ return nil unless row
90
+
91
+ table_name = table_name.to_s
92
+ table = connect_table(table_name)
93
+
94
+ get = generate_get(row, columns, options)
95
+ result = table.get(get)
96
+
97
+ begin
98
+ parse_result(result)
99
+ rescue
100
+ nil
101
+ end
96
102
  end
97
103
  end
98
- end
99
- end
100
104
 
101
- # Returns the last version of the given columns of the given row. The columns works with
102
- # regular expressions (e.g. 'attribute:' matches all attributes columns). Example:
103
- # get_columns('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', ['attribute:'])
104
- # => {"attribute:name" => "--- Oahu\n", "attribute:travel_rank" => "--- 0.90124565\n", etc...}
105
- def get_columns(table_name, row, columns, options={})
106
- safe_exec do
107
- return nil unless row
108
- table_name = table_name.to_s
109
- table = connect_table(table_name)
110
-
111
- java_cols = Java::String[columns.size].new
112
- columns.each_with_index do |col, i|
113
- java_cols[i] = Java::String.new(col)
114
- end
105
+ # Get consecutive rows. Example to get 100 records starting with the one specified and get all the
106
+ # columns in the column family 'attribute:' :
107
+ # get_consecutive_rows('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 100, ['attribute:'])
108
+ def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
109
+ safe_exec do
110
+ table_name = table_name.to_s
111
+ table = connect_table(table_name)
115
112
 
116
- result =
117
- if options[:timestamp]
118
- table.getRow(row, java_cols, options[:timestamp])
119
- else
120
- table.getRow(row, java_cols)
121
- end
113
+ scan = Scan.new
114
+ scan.setStartRow(start_row.to_bytes) if start_row
115
+ scan.setStopRow(stop_row.to_bytes) if stop_row
116
+
117
+ columns.each do |column|
118
+ (column[-1,1] == ":") ?
119
+ scan.addFamily(column.gsub(":", "").to_bytes) :
120
+ scan.addColumn(column.to_bytes)
121
+ end
122
+
123
+ scanner = table.getScanner(scan)
122
124
 
123
- unless !result or result.isEmpty
124
- values = {}
125
- result.entrySet.each do |entry|
126
- column_name = Java::String.new(entry.getKey).to_s
127
- values[column_name] = to_ruby_string(entry.getValue)
125
+ if limit
126
+ results = scanner.next(limit)
127
+ else
128
+ results = []
129
+ while (row_result = scanner.next) != nil
130
+ results << row_result
131
+ end
132
+ end
133
+
134
+ output = []
135
+ results.each do |result|
136
+ output << parse_result(result)
137
+ end
138
+ scanner.close
139
+
140
+ return output
128
141
  end
129
- values["id"] = row
130
- values
131
- else
132
- nil
133
142
  end
134
- end
135
- end
136
143
 
137
- # Get consecutive rows. Example to get 100 records starting with the one specified and get all the
138
- # columns in the column family 'attribute:' :
139
- # get_consecutive_rows('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 100, ['attribute:'])
140
- def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
141
- safe_exec do
142
- table_name = table_name.to_s
143
- table = connect_table(table_name)
144
-
145
- java_cols = Java::String[columns.size].new
146
- columns.each_with_index do |col, i|
147
- java_cols[i] = Java::String.new(col)
144
+ # Delete a whole row.
145
+ def delete(table_name, row, timestamp = nil)
146
+ safe_exec do
147
+ table = connect_table(table_name)
148
+
149
+ if timestamp
150
+ row_lock = table.lockRow(row.to_bytes)
151
+ table.delete(Delete.new(row.to_bytes, timestamp, row_lock))
152
+ table.unlockRow(row_lock)
153
+ else
154
+ table.delete(Delete.new(row.to_bytes))
155
+ end
156
+ end
148
157
  end
149
158
 
150
- start_row ||= ""
151
- start_row = start_row.to_s
159
+ # Create a table
160
+ def create_table(table_name, column_descriptors)
161
+ safe_exec do
162
+ table_name = table_name.to_s
163
+ unless table_exists?(table_name)
164
+ tdesc = HTableDescriptor.new(table_name)
152
165
 
153
- # We cannot set stop_row like start_row because a
154
- # default stop row would have to be the biggest value possible
155
- if stop_row
156
- scanner = table.getScanner(java_cols, start_row, stop_row, HConstants::LATEST_TIMESTAMP)
157
- else
158
- scanner = table.getScanner(java_cols, start_row)
159
- end
166
+ column_descriptors.each do |cd|
167
+ cdesc = generate_column_descriptor(cd)
160
168
 
161
- row_count = 0 if limit
162
- result = []
163
- while (row_result = scanner.next) != nil
164
- if limit
165
- break if row_count == limit
166
- row_count += 1
167
- end
168
- values = {}
169
- row_result.entrySet.each do |entry|
170
- column_name = Java::String.new(entry.getKey).to_s
171
- data = to_ruby_string(entry.getValue)
172
- values[column_name] = data
169
+ tdesc.addFamily(cdesc)
170
+ end
171
+ @admin.createTable(tdesc)
172
+ else
173
+ raise TableAlreadyExists, table_name
174
+ end
173
175
  end
174
- unless values.empty?
175
- # TODO: is this really supposed to be hard coded?
176
- values['id'] = Java::String.new(row_result.getRow).to_s
177
- result << values
176
+ end
177
+
178
+ # Delete a table
179
+ def drop_table(table_name)
180
+ safe_exec do
181
+ table_name = table_name.to_s
182
+
183
+ if @admin.tableExists(table_name)
184
+ @admin.disableTable(table_name)
185
+ @admin.deleteTable(table_name)
186
+
187
+ # Remove the table connection from the cache
188
+ @tables.delete(table_name) if @tables.has_key?(table_name)
189
+ else
190
+ raise TableNotFound, table_name
191
+ end
178
192
  end
179
193
  end
180
- scanner.close
181
- result
182
- end
183
- end
184
194
 
185
- # Delete a whole row.
186
- def delete(table_name, row, timestamp = nil)
187
- safe_exec do
188
- table = connect_table(table_name)
189
- timestamp ? table.deleteAll(row.to_bytes, timestamp) : table.deleteAll(row.to_bytes)
190
- end
191
- end
195
+ def add_column(table_name, column_descriptor)
196
+ safe_exec do
197
+ table_name = table_name.to_s
192
198
 
193
- # Create a table
194
- def create_table(table_name, column_descriptors)
195
- safe_exec do
196
- table_name = table_name.to_s
197
- unless table_exists?(table_name)
198
- tdesc = HTableDescriptor.new(table_name)
199
+ if @admin.tableExists(table_name)
200
+ @admin.disableTable(table_name)
199
201
 
200
- column_descriptors.each do |cd|
201
- cdesc = generate_column_descriptor(cd)
202
+ cdesc = generate_column_descriptor(column_descriptor)
203
+ @admin.addColumn(table_name, cdesc)
202
204
 
203
- tdesc.addFamily(cdesc)
205
+ @admin.enableTable(table_name)
206
+ else
207
+ raise TableNotFound, table_name
208
+ end
204
209
  end
205
- @admin.createTable(tdesc)
206
- else
207
- raise BigRecordDriver::TableAlreadyExists, table_name
208
210
  end
209
- end
210
- end
211
211
 
212
- # Delete a table
213
- def drop_table(table_name)
214
- safe_exec do
215
- table_name = table_name.to_s
212
+ def remove_column(table_name, column_name)
213
+ safe_exec do
214
+ table_name = table_name.to_s
215
+ column_name = column_name.to_s
216
+
217
+ if @admin.tableExists(table_name)
218
+ @admin.disableTable(table_name)
216
219
 
217
- if @admin.tableExists(table_name)
218
- @admin.disableTable(table_name)
219
- @admin.deleteTable(table_name)
220
+ column_name << ":" unless column_name =~ /:$/
221
+ @admin.deleteColumn(table_name, column_name)
220
222
 
221
- # Remove the table connection from the cache
222
- @tables.delete(table_name) if @tables.has_key?(table_name)
223
- else
224
- raise BigRecordDriver::TableNotFound, table_name
223
+ @admin.enableTable(table_name)
224
+ else
225
+ raise TableNotFound, table_name
226
+ end
227
+ end
225
228
  end
226
- end
227
- end
228
229
 
229
- def add_column(table_name, column_descriptor)
230
- safe_exec do
231
- table_name = table_name.to_s
230
+ def modify_column(table_name, column_descriptor)
231
+ safe_exec do
232
+ table_name = table_name.to_s
232
233
 
233
- if @admin.tableExists(table_name)
234
- @admin.disableTable(table_name)
234
+ if @admin.tableExists(table_name)
235
+ @admin.disableTable(table_name)
235
236
 
236
- cdesc = generate_column_descriptor(column_descriptor)
237
- @admin.addColumn(table_name, cdesc)
237
+ cdesc = generate_column_descriptor(column_descriptor)
238
+ @admin.modifyColumn(table_name, column_descriptor.name, cdesc)
238
239
 
239
- @admin.enableTable(table_name)
240
- else
241
- raise BigRecordDriver::TableNotFound, table_name
240
+ @admin.enableTable(table_name)
241
+ else
242
+ raise TableNotFound, table_name
243
+ end
244
+ end
242
245
  end
243
- end
244
- end
245
246
 
246
- def remove_column(table_name, column_name)
247
- safe_exec do
248
- table_name = table_name.to_s
249
- column_name = column_name.to_s
247
+ def truncate_table(table_name)
248
+ safe_exec do
249
+ table_name = table_name.to_s
250
+ table = connect_table(table_name)
251
+ tableDescriptor = table.getTableDescriptor
252
+ drop_table(table_name)
253
+ @admin.createTable(tableDescriptor)
254
+ end
255
+ end
250
256
 
251
- if @admin.tableExists(table_name)
252
- @admin.disableTable(table_name)
257
+ def ping
258
+ safe_exec do
259
+ @admin.isMasterRunning
260
+ end
261
+ end
253
262
 
254
- column_name << ":" unless column_name =~ /:$/
255
- @admin.deleteColumn(table_name, column_name)
263
+ def table_exists?(table_name)
264
+ safe_exec do
265
+ @admin.tableExists(table_name.to_s)
266
+ end
267
+ end
256
268
 
257
- @admin.enableTable(table_name)
258
- else
259
- raise BigRecordDriver::TableNotFound, table_name
269
+ def table_names
270
+ safe_exec do
271
+ @admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
272
+ end
260
273
  end
261
- end
262
- end
263
274
 
264
- def modify_column(table_name, column_descriptor)
265
- safe_exec do
266
- table_name = table_name.to_s
267
- column_name = column_name.to_s
275
+ private
268
276
 
269
- if @admin.tableExists(table_name)
270
- @admin.disableTable(table_name)
277
+ def init_connection
278
+ safe_exec do
279
+ @conf = HBaseConfiguration.new
280
+ @conf.set('hbase.zookeeper.quorum', "#{@config[:zookeeper_quorum]}")
281
+ @conf.set('hbase.zookeeper.property.clientPort', "#{@config[:zookeeper_client_port]}")
282
+ @admin = HBaseAdmin.new(@conf)
283
+ @tables = {}
284
+ end
285
+ end
271
286
 
272
- cdesc = generate_column_descriptor(column_descriptor)
273
- @admin.modifyColumn(table_name, column_descriptor.name, cdesc)
287
+ # Create a connection to an HBase table and keep it in memory.
288
+ def connect_table(table_name)
289
+ safe_exec do
290
+ table_name = table_name.to_s
291
+ return @tables[table_name] if @tables.has_key?(table_name)
274
292
 
275
- @admin.enableTable(table_name)
276
- else
277
- raise BigRecordDriver::TableNotFound, table_name
293
+ if table_exists?(table_name)
294
+ @tables[table_name] = HTable.new(@conf, table_name)
295
+ else
296
+ if table_name and !table_name.empty?
297
+ raise TableNotFound, table_name
298
+ else
299
+ raise ArgumentError, "Table name not specified"
300
+ end
301
+ end
302
+ @tables[table_name]
303
+ end
278
304
  end
279
- end
280
- end
281
305
 
282
- def truncate_table(table_name)
283
- safe_exec do
284
- table_name = table_name.to_s
285
- table = connect_table(table_name)
286
- tableDescriptor = table.getTableDescriptor
287
- drop_table(table_name)
288
- @admin.createTable(tableDescriptor)
289
- end
290
- end
306
+ # Create a Get object given parameters.
307
+ #
308
+ # @param [String] row
309
+ # @param [Array, String] A single (or collection) of strings
310
+ # fully qualified column name or column family (ends with ':').
311
+ # @param [Hash] options
312
+ #
313
+ # @return [Get] org.apache.hadoop.hbase.client.Get object
314
+ # corresponding to the arguments passed.
315
+ def generate_get(row, columns, options = {})
316
+ columns = [columns].flatten
317
+
318
+ get = Get.new(row.to_bytes)
319
+
320
+ columns.each do |column|
321
+ # If the column name ends with ':' then it's a column family.
322
+ (column[-1,1] == ":") ?
323
+ get.addFamily(column.gsub(":", "").to_bytes) :
324
+ get.addColumn(column.to_bytes)
325
+ end
291
326
 
292
- def ping
293
- safe_exec do
294
- @admin.isMasterRunning
295
- end
296
- end
327
+ get.setMaxVersions(options[:versions]) if options[:versions]
297
328
 
298
- def table_exists?(table_name)
299
- safe_exec do
300
- @admin.tableExists(table_name.to_s)
301
- end
302
- end
329
+ # Need to add 1 to the timestamp due to the the API sillyness, i.e. min timestamp
330
+ # is inclusive while max timestamp is exclusive.
331
+ get.setTimeRange(java.lang.Long::MIN_VALUE, options[:timestamp]+1) if options[:timestamp]
303
332
 
304
- def table_names
305
- safe_exec do
306
- @admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
307
- end
308
- end
333
+ return get
334
+ end
309
335
 
310
- # def const_missing(const)
311
- # super
312
- # rescue NameError => ex
313
- # raise NameError, "uninitialized constant #{const}"
314
- # end
315
-
316
- private
317
- # Create a connection to a Hbase table and keep it in memory.
318
- def connect_table(table_name)
319
- safe_exec do
320
- table_name = table_name.to_s
321
- return @tables[table_name] if @tables.has_key?(table_name)
322
-
323
- if table_exists?(table_name)
324
- @tables[table_name] = HTable.new(@conf, table_name)
325
- else
326
- if table_name and !table_name.empty?
327
- raise BigRecordDriver::TableNotFound, table_name
328
- else
329
- raise ArgumentError, "Table name not specified"
336
+ # Create a Put object given parameters.
337
+ #
338
+ # @param [String] row
339
+ # @param [Hash] Keys as the fully qualified column names and
340
+ # their associated values.
341
+ # @param [Integer] timestamp
342
+ # @param [org.apache.hadoop.hbase.client.RowLock] row_lock
343
+ #
344
+ # @return [Put] org.apache.hadoop.hbase.client.Put object
345
+ # corresponding to the arguments passed.
346
+ def generate_put(row, columns = {}, timestamp = nil, row_lock = nil)
347
+ put = row_lock ? Put.new(row.to_bytes, row_lock) : Put.new(row.to_bytes)
348
+
349
+ columns.each do |name, value|
350
+ family, qualifier = name.split(":")
351
+ timestamp ?
352
+ put.add(family.to_bytes, qualifier.to_bytes, timestamp, value.to_bytes) :
353
+ put.add(family.to_bytes, qualifier.to_bytes, value.to_bytes)
330
354
  end
355
+
356
+ return put
331
357
  end
332
- @tables[table_name]
333
- end
334
- end
335
358
 
336
- def init_connection
337
- safe_exec do
338
- @conf = HBaseConfiguration.new
339
- @conf.set('hbase.zookeeper.quorum', "#{@config[:zookeeper_quorum]}")
340
- @conf.set('hbase.zookeeper.property.clientPort', "#{@config[:zookeeper_client_port]}")
341
- @admin = HBaseAdmin.new(@conf)
342
- @tables = {}
343
- end
344
- end
359
+ # Parse a Result object into a Hash.
360
+ #
361
+ # @param [Result] result
362
+ #
363
+ # @return [Hash] Fully qualified column names as keys
364
+ # and their corresponding values.
365
+ def parse_result(result)
366
+ output = {}
367
+
368
+ result.list.each do |keyvalue|
369
+ output[to_ruby_string(keyvalue.getColumn)] = to_ruby_string(keyvalue.getValue)
370
+ end
345
371
 
346
- def generate_column_descriptor(column_descriptor)
347
- raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
348
- raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
372
+ output["id"] = to_ruby_string(result.getRow)
349
373
 
350
- if column_descriptor.compression
351
- compression =
352
- case column_descriptor.compression.to_s
353
- when 'none'; Compression::Algorithm::NONE.getName()
354
- when 'gz'; Compression::Algorithm::GZ.getName()
355
- when 'lzo'; Compression::Algorithm::LZO.getName()
356
- else
357
- raise ArgumentError, "Invalid compression type: #{column_descriptor.compression} for the column_family #{column_descriptor.name}"
374
+ return output
375
+ end
376
+
377
+ def generate_column_descriptor(column_descriptor)
378
+ raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
379
+ raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
380
+
381
+ if column_descriptor.compression
382
+ compression =
383
+ case column_descriptor.compression.to_s
384
+ when 'none'; Compression::Algorithm::NONE.getName()
385
+ when 'gz'; Compression::Algorithm::GZ.getName()
386
+ when 'lzo'; Compression::Algorithm::LZO.getName()
387
+ else
388
+ raise ArgumentError, "Invalid compression type: #{column_descriptor.compression} for the column_family #{column_descriptor.name}"
389
+ end
358
390
  end
359
- end
360
391
 
361
- n_versions = column_descriptor.versions
362
- in_memory = column_descriptor.in_memory
363
-
364
- # set the default values of the missing parameters
365
- n_versions ||= HColumnDescriptor::DEFAULT_VERSIONS
366
- compression ||= HColumnDescriptor::DEFAULT_COMPRESSION
367
- in_memory ||= HColumnDescriptor::DEFAULT_IN_MEMORY
368
- block_cache ||= HColumnDescriptor::DEFAULT_BLOCKCACHE
369
- block_size ||= HColumnDescriptor::DEFAULT_BLOCKSIZE
370
- bloomfilter ||= HColumnDescriptor::DEFAULT_BLOOMFILTER
371
- ttl ||= HColumnDescriptor::DEFAULT_TTL
372
-
373
- # add the ':' at the end if the user didn't specify it
374
- column_descriptor.name << ":" unless column_descriptor.name =~ /:$/
375
-
376
- cdesc = HColumnDescriptor.new(column_descriptor.name.to_bytes,
377
- n_versions,
378
- compression,
379
- in_memory,
380
- block_cache,
381
- block_size,
382
- ttl,
383
- bloomfilter)
384
-
385
- return cdesc
386
- end
392
+ n_versions = column_descriptor.versions
393
+ in_memory = column_descriptor.in_memory
394
+
395
+ # set the default values of the missing parameters
396
+ n_versions ||= HColumnDescriptor::DEFAULT_VERSIONS
397
+ compression ||= HColumnDescriptor::DEFAULT_COMPRESSION
398
+ in_memory ||= HColumnDescriptor::DEFAULT_IN_MEMORY
399
+ block_cache ||= HColumnDescriptor::DEFAULT_BLOCKCACHE
400
+ block_size ||= HColumnDescriptor::DEFAULT_BLOCKSIZE
401
+ bloomfilter ||= HColumnDescriptor::DEFAULT_BLOOMFILTER
402
+ ttl ||= HColumnDescriptor::DEFAULT_TTL
403
+
404
+ # add the ':' at the end if the user didn't specify it
405
+ column_descriptor.name << ":" unless column_descriptor.name =~ /:$/
406
+
407
+ cdesc = HColumnDescriptor.new(column_descriptor.name.to_bytes,
408
+ n_versions,
409
+ compression,
410
+ in_memory,
411
+ block_cache,
412
+ block_size,
413
+ ttl,
414
+ bloomfilter)
415
+
416
+ return cdesc
417
+ end
387
418
 
388
- end
419
+ end
389
420
 
421
+ end
390
422
  end
391
423
 
392
- port = ARGV[0]
393
- port ||= 40000
394
- DRb.start_service("druby://:#{port}", BigRecordDriver::HbaseServer.new)
424
+ port = ARGV[0] || 40000
425
+ DRb.start_service("druby://:#{port}", BigRecord::Driver::HbaseServer.new)
395
426
  puts "Started drb server on port #{port}."
396
427
  DRb.thread.join