bigrecord-driver 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
File without changes
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ launcher = File.join(File.dirname(__FILE__), 'launcher')
4
+ args = ["cassandra"] + ARGV
5
+ exec "bash", launcher, *args
data/bin/hbase-driver ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ launcher = File.join(File.dirname(__FILE__), 'launcher')
4
+ args = ["hbase"] + ARGV
5
+ exec "bash", launcher, *args
data/bin/launcher ADDED
@@ -0,0 +1,155 @@
1
+ #! /bin/bash
2
+ DRIVERNAME=$1
3
+ # Fix for Amazon EC2 instances
4
+ unset RUBYLIB
5
+
6
+ # Needs the $JRUBY_HOME environment variable to be set
7
+ if [ "$JRUBY_HOME" == "" ]; then
8
+ echo "\$JRUBY_HOME is not set. Please set it in your ~/.bashrc file."
9
+ exit 1
10
+ fi
11
+
12
+ VENDOR_DIR=`dirname "$0"`
13
+ VENDOR_DIR=`cd "$VENDOR_DIR/../vendor"; pwd`
14
+
15
+ CONF_DIR=`dirname "$0"`
16
+ CONF_DIR=`cd "$CONF_DIR/../conf"; pwd`
17
+
18
+ # Ruby file that contains the drb server
19
+ DRIVER=$VENDOR_DIR/../lib/big_record_driver/"$DRIVERNAME"_driver/server.rb
20
+
21
+ # location where the pids are stored
22
+ PIDS_DIR=/tmp/$DRIVERNAME-driver/run
23
+ mkdir -p $PIDS_DIR
24
+
25
+ # startup delay
26
+ STARTUP_TIMEOUT=60
27
+
28
+ # location where the log files are stored
29
+ LOGS_DIR=/tmp/$DRIVERNAME-driver/log
30
+ mkdir -p $LOGS_DIR
31
+
32
+ # set the environment to use jruby by default
33
+ PATH=$JRUBY_HOME/bin:$PATH
34
+ for f in $VENDOR_DIR/java/$DRIVERNAME/*.jar; do
35
+ if [ -f $f ]; then
36
+ CLASSPATH=${CLASSPATH}:$f;
37
+ fi
38
+ done
39
+ #CLASSPATH=$CLASSPATH:$VENDOR_DIR/java/hadoop-0.19.1-core.jar:$VENDOR_DIR/java/hbase-0.19.1.jar:$VENDOR_DIR/java/commons-logging-1.0.4.jar:$VENDOR_DIR/java/commons-logging-api-1.0.4.jar:$VENDOR_DIR/java/log4j-1.2.13.jar:$CONF_DIR/log4j.properties
40
+
41
+ export PATH CLASSPATH
42
+
43
+ # TODO: find a way to use the return value of this function instead of
44
+ # manipulating the global variable
45
+ PIDS=""
46
+
47
+ refresh_pids() {
48
+ if [ -f $PIDS_DIR/$PORT.pid ]; then
49
+ PID=`cat $PIDS_DIR/$PORT.pid`
50
+ else
51
+ PID=""
52
+ fi
53
+ }
54
+
55
+ start() {
56
+ echo -n "Starting $DRIVERNAME driver on port $PORT."
57
+
58
+ refresh_pids
59
+
60
+ if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
61
+ echo -e "\nAlready running (pid="$PID")."
62
+ exit 1
63
+ else
64
+ rm -f $LOGS_DIR/$PORT.log
65
+ nohup jruby $DRIVER $PORT >> $LOGS_DIR/$PORT.log 2>&1 < /dev/null &
66
+ PID=$!
67
+ if [ "$PID" != "" ] ; then
68
+ # monitor the log file for the message saying that the server is started
69
+ for ((i=0; i<$STARTUP_TIMEOUT; i+=1)); do
70
+ sleep 1
71
+ echo -n "."
72
+ if [ "$(cat $LOGS_DIR/$PORT.log | grep 'Started drb server')" != "" ] ; then
73
+ break
74
+ fi
75
+ done
76
+
77
+ if [ "$i" == $STARTUP_TIMEOUT ] ; then
78
+ echo -e "\nStartup timeout: couldn't start the DRb server."
79
+ else
80
+ echo $PID > $PIDS_DIR/$PORT.pid
81
+ fi
82
+ echo ""
83
+ else
84
+ echo -e "\nAn error occured while starting the DRb server."
85
+ fi
86
+ fi
87
+ }
88
+
89
+ stop() {
90
+ echo "Stopping $DRIVERNAME driver on port $PORT."
91
+
92
+ refresh_pids
93
+
94
+ if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
95
+ echo "Stopping driver (pid = $PID)." >> $LOGS_DIR/$PORT.log
96
+ kill $PID
97
+ rm $PIDS_DIR/$PORT.pid
98
+ else
99
+ echo "No $DRIVERNAME driver to kill."
100
+ fi
101
+ }
102
+
103
+ status() {
104
+ refresh_pids
105
+
106
+ if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
107
+ echo "Running."
108
+ else
109
+ echo "Stopped."
110
+ fi
111
+ }
112
+
113
+ start_debug() {
114
+ jruby $DRIVER $PORT
115
+ }
116
+
117
+ print_usage() {
118
+ echo "Usage: $DRIVERNAME-driver {start|stop|restart|status|start_debug} [-p <port>]"
119
+ }
120
+
121
+ set_port() {
122
+ if [ "$3" == "-p" -a "$4" != "" ]; then
123
+ PORT="$4"
124
+ else
125
+ PORT=40000
126
+ fi
127
+ }
128
+
129
+ case "$2" in
130
+ start)
131
+ set_port $@
132
+ start
133
+ ;;
134
+ stop)
135
+ set_port $@
136
+ stop
137
+ ;;
138
+ restart)
139
+ set_port $@
140
+ stop
141
+ start
142
+ ;;
143
+ status)
144
+ set_port $@
145
+ status
146
+ ;;
147
+ start_debug)
148
+ set_port $@
149
+ start_debug
150
+ ;;
151
+ *)
152
+ print_usage
153
+ exit 1
154
+ esac
155
+ exit 0
@@ -0,0 +1,48 @@
1
+ # Define some default values that can be overridden by system properties
2
+ bigrecord.root.logger=INFO,DRFA
3
+ bigrecord.log.dir=/tmp/hbase-driver/log
4
+ bigrecord.log.file=hbase.log
5
+
6
+ # Define the root logger to the system property "hbase.root.logger".
7
+ log4j.rootLogger=${bigrecord.root.logger}
8
+
9
+ # Logging Threshold
10
+ log4j.threshhold=ALL
11
+
12
+ #
13
+ # Daily Rolling File Appender
14
+ #
15
+ log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
16
+ log4j.appender.DRFA.File=${bigrecord.log.dir}/${bigrecord.log.file}
17
+
18
+ # Rollver at midnight
19
+ log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
20
+
21
+ # 30-day backup
22
+ #log4j.appender.DRFA.MaxBackupIndex=30
23
+ log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
24
+
25
+ # Pattern format: Date LogLevel LoggerName LogMessage
26
+ log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
27
+
28
+ # Debugging Pattern format
29
+ #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
30
+
31
+
32
+ #
33
+ # console
34
+ # Add "console" to rootlogger above if you want to use this
35
+ #
36
+ log4j.appender.console=org.apache.log4j.ConsoleAppender
37
+ log4j.appender.console.target=System.err
38
+ log4j.appender.console.layout=org.apache.log4j.PatternLayout
39
+ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
40
+
41
+ # Custom Logging levels
42
+
43
+ #log4j.logger.com.bigrecord=TRACE
44
+ #log4j.logger.com.bigrecord.util.hbaserecord=DEBUG
45
+ #log4j.logger.com.bigrecord.util.jactiverecord=DEBUG
46
+ #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
47
+ log4j.logger.org.apache.hadoop.hbase=DEBUG
48
+ #log4j.logger.org.apache.hadoop.dfs=DEBUG
@@ -0,0 +1,119 @@
1
+ require File.dirname(__FILE__) + '/exceptions'
2
+ require File.dirname(__FILE__) + '/column_descriptor'
3
+ require 'drb'
4
+ # The name of the java String class conflicts with ruby's String class.
5
+ module Java
6
+ include_class "java.lang.String"
7
+ include_class "java.lang.Exception"
8
+
9
+ end
10
+
11
+ class String
12
+ def to_bytes
13
+ Java::String.new(self).getBytes
14
+ end
15
+ end
16
+
17
+
18
+ module BigRecordDriver
19
+
20
+ class BigRecordServer
21
+ include_class "java.io.IOException"
22
+
23
+ def configure(config = {})
24
+ raise NotImplementedError
25
+ end
26
+
27
+ def update(table_name, row, values, timestamp=nil)
28
+ raise NotImplementedError
29
+ end
30
+
31
+ def get(table_name, row, column, options={})
32
+ raise NotImplementedError
33
+ end
34
+
35
+ def get_columns(table_name, row, columns, options={})
36
+ raise NotImplementedError
37
+ end
38
+
39
+ def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
40
+ raise NotImplementedError
41
+ end
42
+
43
+ def delete(table_name, row)
44
+ raise NotImplementedError
45
+ end
46
+
47
+ def create_table(table_name, column_descriptors)
48
+ raise NotImplementedError
49
+ end
50
+
51
+ def drop_table(table_name)
52
+ raise NotImplementedError
53
+ end
54
+
55
+ def truncate_table(table_name)
56
+ raise NotImplementedError
57
+ end
58
+
59
+ def ping
60
+ raise NotImplementedError
61
+ end
62
+
63
+ def table_exists?(table_name)
64
+ raise NotImplementedError
65
+ end
66
+
67
+ def table_names
68
+ raise NotImplementedError
69
+ end
70
+
71
+ def method_missing(method, *args)
72
+ super
73
+ rescue NoMethodError
74
+ raise NoMethodError, "undefined method `#{method}' for \"#{self}\":#{self.class}"
75
+ end
76
+
77
+ def respond_to?(method)
78
+ super
79
+ end
80
+
81
+ protected
82
+
83
+ def to_ruby_string(cell)
84
+ Java::String.new(cell.getValue).to_s
85
+ end
86
+ # Try to recover from network related exceptions. e.g. hbase has been restarted and the
87
+ # cached connections in @tables are no longer valid. Every method in this class (except connect_table)
88
+ # should have its code wrapped by a call to this method.
89
+ def safe_exec
90
+ yield
91
+ rescue IOException => e
92
+ puts "A network error occured: #{e.message}. Trying to recover..."
93
+ init_connection
94
+ begin
95
+ yield
96
+ rescue Exception, Java::Exception => e2
97
+ if e2.class == e.class
98
+ puts "Failed to recover the connection."
99
+ else
100
+ puts "Failed to recover the connection but got a different error this time: #{e2.message}."
101
+ end
102
+ puts "Stack trace:"
103
+ puts e2.backtrace.join("\n")
104
+
105
+ if e2.kind_of?(NativeException)
106
+ raise BigRecordDriver::JavaError, e2.message
107
+ else
108
+ raise e2
109
+ end
110
+ end
111
+ puts "Connection recovered successfully..."
112
+ rescue Exception => e
113
+ puts "\n#{e.class.name}: #{e.message}"
114
+ puts e.backtrace.join("\n")
115
+ raise e
116
+ end
117
+ end
118
+
119
+ end
@@ -0,0 +1,135 @@
1
+ require File.dirname(__FILE__) + '/../column_descriptor'
2
+ require File.dirname(__FILE__) + '/../exceptions'
3
+ require File.dirname(__FILE__) + '/../bigrecord_server'
4
+
5
+ module BigRecordDriver
6
+ class CassandraServer < BigRecordServer
7
+ include_class "org.apache.cassandra.service.Cassandra"
8
+ include_class "org.apache.cassandra.service.InvalidRequestException"
9
+ include_class "org.apache.cassandra.service.NotFoundException"
10
+ include_class "org.apache.cassandra.service.UnavailableException"
11
+ include_class "org.apache.cassandra.service.column_t"
12
+ include_class "org.apache.thrift.TException"
13
+ include_class "org.apache.thrift.protocol.TBinaryProtocol"
14
+ include_class "org.apache.thrift.transport.TSocket"
15
+ include_class "org.apache.thrift.transport.TTransport"
16
+
17
+ def configure(config = {})
18
+ config[:adr] ||= 'localhost'
19
+ config[:port] ||= 9160
20
+ @config = config
21
+ init_connection
22
+ end
23
+
24
+ def update(table_name, row, values, timestamp=nil)
25
+ safe_exec do
26
+ return nil unless row
27
+ timestamp = 0 unless timestamp
28
+ values.each do |column, value|
29
+ @cassandraClient.insert(table_name.to_s, row, column, value.to_bytes, timestamp, true)
30
+ end
31
+ row
32
+ end
33
+ end
34
+
35
+ def get(table_name, row, column, options={})
36
+ safe_exec do
37
+ return nil unless row
38
+ # Retreive only the last version by default
39
+ options[:num_versions] ||= 1
40
+
41
+ # validate the arguments
42
+ raise ArgumentError, "num_versions must be >= 1" unless options[:num_versions] >= 1
43
+ begin
44
+ if options[:timestamp]
45
+ raw_data = @cassandraClient.get_columns_since(table_name.to_s, row, column, options[:timestamp])
46
+ else
47
+ raw_data = @cassandraClient.get_column(table_name.to_s, row, column)
48
+ end
49
+ rescue NotFoundException => e2
50
+ puts e2.message
51
+ puts e2.class
52
+ end
53
+ # Return either a single value or an array, depending on the number of version that have been requested
54
+ if options[:timestamp]
55
+ return [] unless raw_data
56
+ max_index = raw_data.length > options[:num_versions] || raw_data.length
57
+ 0..max_index.each do |i|
58
+ arr[i] = Java::String.new(raw_data[i].value).to_s
59
+ end
60
+ arr
61
+ else
62
+ return nil unless raw_data
63
+ Java::String.new(raw_data.value).to_s
64
+ end
65
+ end
66
+ end
67
+
68
+ def get_columns(table_name, row, columns, options={})
69
+ safe_exec do
70
+ return nil unless row
71
+ raise ArgumentError, "timestamp on get_columns is not currently supported with cassandra" if options[:timestamp]
72
+ arr = []
73
+ columns.each_with_index do |col, i|
74
+ begin
75
+ if col[-1,1] == ':'
76
+ arr + @cassandraClient.get_slice(table_name.to_s, row, col, -1, -1).to_a
77
+ else
78
+ arr + @cassandraClient.get_column(table_name.to_s, row, col)
79
+ end
80
+ rescue NotFoundException => e2
81
+ puts e2.message
82
+ puts e2.class
83
+ end
84
+ end
85
+ unless !result or result.isEmpty
86
+ values = {}
87
+ arr.each do |column_t|
88
+ values[column_t.getColumnName.to_s] = Java::String.new(column_t.value).to_s
89
+ end
90
+ values["attribute:id"] = row
91
+ values
92
+ end
93
+
94
+ end
95
+ end
96
+
97
+ ## It's currently impossible to have compliant delete with cassandra,
98
+ ## you would have to do it famiyl by family
99
+ # def delete(table_name, row)
100
+ # safe_exec do
101
+ # table.remove(table_name, row, ??, ??, true)
102
+ # end
103
+ # end
104
+
105
+ def ping
106
+ safe_exec do
107
+ @socket.isOpen
108
+ end
109
+ end
110
+
111
+ def table_names
112
+ safe_exec do
113
+ @cassandraClient.getStringListProperty("tables") #.collect{|td| Java::String.new(td.getName).to_s}
114
+ end
115
+ end
116
+
117
+ def table_exists?(table_name)
118
+ !@cassandraClient.describeTable(table_name.to_s).include?("not found.")
119
+ end
120
+
121
+ private
122
+ def init_connection
123
+ @socket = TSocket.new(@config[:adr], @config[:port]);
124
+ binary_protocol = TBinaryProtocol.new(@socket, false, false);
125
+ @cassandraClient = Cassandra::Client.new(binary_protocol);
126
+ @socket.open;
127
+ end
128
+ end
129
+ end
130
+
131
+ port = ARGV[0]
132
+ port ||= 45000
133
+ DRb.start_service("druby://:#{port}", BigRecordDriver::CassandraServer.new)
134
+ puts "Started drb server on port #{port}."
135
+ DRb.thread.join
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'activesupport'
3
+ require 'set'
4
+ require 'drb'
5
+
6
+ module BigRecordDriver
7
+ class Client
8
+
9
+ def initialize(config={}) # :nodoc:
10
+ config = config.symbolize_keys
11
+ config[:drb_host] ||= '127.0.0.1'
12
+ config[:drb_port] ||= 40000
13
+
14
+ @config = config
15
+
16
+ DRb.start_service('druby://127.0.0.1:0')
17
+ begin
18
+ @server = DRbObject.new(nil, "druby://#{@config[:drb_host]}:#{@config[:drb_port]}")
19
+ rescue DRb::DRbConnError
20
+ raise ConnectionError, "Failed to connect to the DRb server (jruby) " +
21
+ "at #{@config[:drb_host]}:#{@config[:drb_port]}."
22
+ end
23
+ @server.configure(@config)
24
+ end
25
+
26
+ # Delegate the methods to the server
27
+ def method_missing(method, *args)
28
+ @server.send(method, *args)
29
+ end
30
+
31
+ def respond_to?(method)
32
+ super
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,23 @@
1
+ module BigRecordDriver
2
+
3
+ class ColumnDescriptor
4
+
5
+ attr_accessor :name
6
+ attr_accessor :versions
7
+ attr_accessor :in_memory
8
+ attr_accessor :bloom_filter
9
+ attr_accessor :compression
10
+
11
+ def initialize(name, options={})
12
+ raise ArgumentError, "name is mandatory" unless name
13
+
14
+ @name = name.to_s
15
+ @versions = options[:versions]
16
+ @in_memory = options[:in_memory]
17
+ @bloom_filter = options[:bloom_filter]
18
+ @compression = options[:compression]
19
+ end
20
+
21
+ end
22
+
23
+ end
@@ -0,0 +1,34 @@
1
+ module BigRecordDriver
2
+
3
+ class DriverManager
4
+ class << self
5
+
6
+ def set_cmd(db = 'hbase')
7
+ @@CMD = File.dirname(__FILE__) + "/../../bin/#{db}-driver"
8
+ end
9
+ DriverManager.set_cmd
10
+ def start(port = 40005)
11
+ `ruby #{@@CMD} start -p #{port.to_s}`
12
+ end
13
+
14
+ def restart(port = 40005)
15
+ `ruby #{@@CMD} restart -p #{port.to_s}`
16
+ end
17
+
18
+ def stop(port = 40005)
19
+ `ruby #{@@CMD} stop -p #{port.to_s}`
20
+ end
21
+
22
+ def running?(port = 40005)
23
+ status = `ruby #{@@CMD} status -p #{port.to_s}`
24
+ status == "Running.\n"
25
+ end
26
+
27
+ def silent_start(port = 40005)
28
+ start(port) unless running?(port)
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,12 @@
1
+ module BigRecordDriver
2
+ class BigDBError < StandardError
3
+ end
4
+ class TableNotFound < BigDBError
5
+ end
6
+ class TableAlreadyExists < BigDBError
7
+ end
8
+ class JavaError < BigDBError
9
+ end
10
+ class ConnectionError < BigDBError
11
+ end
12
+ end