bigrecord-driver 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
File without changes
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ launcher = File.join(File.dirname(__FILE__), 'launcher')
4
+ args = ["cassandra"] + ARGV
5
+ exec "bash", launcher, *args
data/bin/hbase-driver ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ launcher = File.join(File.dirname(__FILE__), 'launcher')
4
+ args = ["hbase"] + ARGV
5
+ exec "bash", launcher, *args
data/bin/launcher ADDED
@@ -0,0 +1,155 @@
1
+ #! /bin/bash
2
+ DRIVERNAME=$1
3
+ # Fix for Amazon EC2 instances
4
+ unset RUBYLIB
5
+
6
+ # Needs the $JRUBY_HOME environment variable to be set
7
+ if [ "$JRUBY_HOME" == "" ]; then
8
+ echo "\$JRUBY_HOME is not set. Please set it in your ~/.bashrc file."
9
+ exit 1
10
+ fi
11
+
12
+ VENDOR_DIR=`dirname "$0"`
13
+ VENDOR_DIR=`cd "$VENDOR_DIR/../vendor"; pwd`
14
+
15
+ CONF_DIR=`dirname "$0"`
16
+ CONF_DIR=`cd "$CONF_DIR/../conf"; pwd`
17
+
18
+ # Ruby file that contains the drb server
19
+ DRIVER=$VENDOR_DIR/../lib/big_record_driver/"$DRIVERNAME"_driver/server.rb
20
+
21
+ # location where the pids are stored
22
+ PIDS_DIR=/tmp/$DRIVERNAME-driver/run
23
+ mkdir -p $PIDS_DIR
24
+
25
+ # startup delay
26
+ STARTUP_TIMEOUT=60
27
+
28
+ # location where the log files are stored
29
+ LOGS_DIR=/tmp/$DRIVERNAME-driver/log
30
+ mkdir -p $LOGS_DIR
31
+
32
+ # set the environment to use jruby by default
33
+ PATH=$JRUBY_HOME/bin:$PATH
34
+ for f in $VENDOR_DIR/java/$DRIVERNAME/*.jar; do
35
+ if [ -f $f ]; then
36
+ CLASSPATH=${CLASSPATH}:$f;
37
+ fi
38
+ done
39
+ #CLASSPATH=$CLASSPATH:$VENDOR_DIR/java/hadoop-0.19.1-core.jar:$VENDOR_DIR/java/hbase-0.19.1.jar:$VENDOR_DIR/java/commons-logging-1.0.4.jar:$VENDOR_DIR/java/commons-logging-api-1.0.4.jar:$VENDOR_DIR/java/log4j-1.2.13.jar:$CONF_DIR/log4j.properties
40
+
41
+ export PATH CLASSPATH
42
+
43
+ # TODO: find a way to use the return value of this function instead of
44
+ # manipulating the global variable
45
+ PIDS=""
46
+
47
+ refresh_pids() {
48
+ if [ -f $PIDS_DIR/$PORT.pid ]; then
49
+ PID=`cat $PIDS_DIR/$PORT.pid`
50
+ else
51
+ PID=""
52
+ fi
53
+ }
54
+
55
+ start() {
56
+ echo -n "Starting $DRIVERNAME driver on port $PORT."
57
+
58
+ refresh_pids
59
+
60
+ if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
61
+ echo -e "\nAlready running (pid="$PID")."
62
+ exit 1
63
+ else
64
+ rm -f $LOGS_DIR/$PORT.log
65
+ nohup jruby $DRIVER $PORT >> $LOGS_DIR/$PORT.log 2>&1 < /dev/null &
66
+ PID=$!
67
+ if [ "$PID" != "" ] ; then
68
+ # monitor the log file for the message saying that the server is started
69
+ for ((i=0; i<$STARTUP_TIMEOUT; i+=1)); do
70
+ sleep 1
71
+ echo -n "."
72
+ if [ "$(cat $LOGS_DIR/$PORT.log | grep 'Started drb server')" != "" ] ; then
73
+ break
74
+ fi
75
+ done
76
+
77
+ if [ "$i" == $STARTUP_TIMEOUT ] ; then
78
+ echo -e "\nStartup timeout: couldn't start the DRb server."
79
+ else
80
+ echo $PID > $PIDS_DIR/$PORT.pid
81
+ fi
82
+ echo ""
83
+ else
84
+ echo -e "\nAn error occured while starting the DRb server."
85
+ fi
86
+ fi
87
+ }
88
+
89
+ stop() {
90
+ echo "Stopping $DRIVERNAME driver on port $PORT."
91
+
92
+ refresh_pids
93
+
94
+ if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
95
+ echo "Stopping driver (pid = $PID)." >> $LOGS_DIR/$PORT.log
96
+ kill $PID
97
+ rm $PIDS_DIR/$PORT.pid
98
+ else
99
+ echo "No $DRIVERNAME driver to kill."
100
+ fi
101
+ }
102
+
103
+ status() {
104
+ refresh_pids
105
+
106
+ if [ -f "$PIDS_DIR/$PORT.pid" -a "$PID" != "" ] ; then
107
+ echo "Running."
108
+ else
109
+ echo "Stopped."
110
+ fi
111
+ }
112
+
113
+ start_debug() {
114
+ jruby $DRIVER $PORT
115
+ }
116
+
117
+ print_usage() {
118
+ echo "Usage: $DRIVERNAME-driver {start|stop|restart|status|start_debug} [-p <port>]"
119
+ }
120
+
121
+ set_port() {
122
+ if [ "$3" == "-p" -a "$4" != "" ]; then
123
+ PORT="$4"
124
+ else
125
+ PORT=40000
126
+ fi
127
+ }
128
+
129
+ case "$2" in
130
+ start)
131
+ set_port $@
132
+ start
133
+ ;;
134
+ stop)
135
+ set_port $@
136
+ stop
137
+ ;;
138
+ restart)
139
+ set_port $@
140
+ stop
141
+ start
142
+ ;;
143
+ status)
144
+ set_port $@
145
+ status
146
+ ;;
147
+ start_debug)
148
+ set_port $@
149
+ start_debug
150
+ ;;
151
+ *)
152
+ print_usage
153
+ exit 1
154
+ esac
155
+ exit 0
@@ -0,0 +1,48 @@
1
+ # Define some default values that can be overridden by system properties
2
+ bigrecord.root.logger=INFO,DRFA
3
+ bigrecord.log.dir=/tmp/hbase-driver/log
4
+ bigrecord.log.file=hbase.log
5
+
6
+ # Define the root logger to the system property "hbase.root.logger".
7
+ log4j.rootLogger=${bigrecord.root.logger}
8
+
9
+ # Logging Threshold
10
+ log4j.threshhold=ALL
11
+
12
+ #
13
+ # Daily Rolling File Appender
14
+ #
15
+ log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
16
+ log4j.appender.DRFA.File=${bigrecord.log.dir}/${bigrecord.log.file}
17
+
18
+ # Rollver at midnight
19
+ log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
20
+
21
+ # 30-day backup
22
+ #log4j.appender.DRFA.MaxBackupIndex=30
23
+ log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
24
+
25
+ # Pattern format: Date LogLevel LoggerName LogMessage
26
+ log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
27
+
28
+ # Debugging Pattern format
29
+ #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
30
+
31
+
32
+ #
33
+ # console
34
+ # Add "console" to rootlogger above if you want to use this
35
+ #
36
+ log4j.appender.console=org.apache.log4j.ConsoleAppender
37
+ log4j.appender.console.target=System.err
38
+ log4j.appender.console.layout=org.apache.log4j.PatternLayout
39
+ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
40
+
41
+ # Custom Logging levels
42
+
43
+ #log4j.logger.com.bigrecord=TRACE
44
+ #log4j.logger.com.bigrecord.util.hbaserecord=DEBUG
45
+ #log4j.logger.com.bigrecord.util.jactiverecord=DEBUG
46
+ #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
47
+ log4j.logger.org.apache.hadoop.hbase=DEBUG
48
+ #log4j.logger.org.apache.hadoop.dfs=DEBUG
@@ -0,0 +1,119 @@
1
+ require File.dirname(__FILE__) + '/exceptions'
2
+ require File.dirname(__FILE__) + '/column_descriptor'
3
+ require 'drb'
4
+ # The name of the java String class conflicts with ruby's String class.
5
+ module Java
6
+ include_class "java.lang.String"
7
+ include_class "java.lang.Exception"
8
+
9
+ end
10
+
11
+ class String
12
+ def to_bytes
13
+ Java::String.new(self).getBytes
14
+ end
15
+ end
16
+
17
+
18
+ module BigRecordDriver
19
+
20
+ class BigRecordServer
21
+ include_class "java.io.IOException"
22
+
23
+ def configure(config = {})
24
+ raise NotImplementedError
25
+ end
26
+
27
+ def update(table_name, row, values, timestamp=nil)
28
+ raise NotImplementedError
29
+ end
30
+
31
+ def get(table_name, row, column, options={})
32
+ raise NotImplementedError
33
+ end
34
+
35
+ def get_columns(table_name, row, columns, options={})
36
+ raise NotImplementedError
37
+ end
38
+
39
+ def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
40
+ raise NotImplementedError
41
+ end
42
+
43
+ def delete(table_name, row)
44
+ raise NotImplementedError
45
+ end
46
+
47
+ def create_table(table_name, column_descriptors)
48
+ raise NotImplementedError
49
+ end
50
+
51
+ def drop_table(table_name)
52
+ raise NotImplementedError
53
+ end
54
+
55
+ def truncate_table(table_name)
56
+ raise NotImplementedError
57
+ end
58
+
59
+ def ping
60
+ raise NotImplementedError
61
+ end
62
+
63
+ def table_exists?(table_name)
64
+ raise NotImplementedError
65
+ end
66
+
67
+ def table_names
68
+ raise NotImplementedError
69
+ end
70
+
71
+ def method_missing(method, *args)
72
+ super
73
+ rescue NoMethodError
74
+ raise NoMethodError, "undefined method `#{method}' for \"#{self}\":#{self.class}"
75
+ end
76
+
77
+ def respond_to?(method)
78
+ super
79
+ end
80
+
81
+ protected
82
+
83
+ def to_ruby_string(cell)
84
+ Java::String.new(cell.getValue).to_s
85
+ end
86
+ # Try to recover from network related exceptions. e.g. hbase has been restarted and the
87
+ # cached connections in @tables are no longer valid. Every method in this class (except connect_table)
88
+ # should have its code wrapped by a call to this method.
89
+ def safe_exec
90
+ yield
91
+ rescue IOException => e
92
+ puts "A network error occured: #{e.message}. Trying to recover..."
93
+ init_connection
94
+ begin
95
+ yield
96
+ rescue Exception, Java::Exception => e2
97
+ if e2.class == e.class
98
+ puts "Failed to recover the connection."
99
+ else
100
+ puts "Failed to recover the connection but got a different error this time: #{e2.message}."
101
+ end
102
+ puts "Stack trace:"
103
+ puts e2.backtrace.join("\n")
104
+
105
+ if e2.kind_of?(NativeException)
106
+ raise BigRecordDriver::JavaError, e2.message
107
+ else
108
+ raise e2
109
+ end
110
+ end
111
+ puts "Connection recovered successfully..."
112
+ rescue Exception => e
113
+ puts "\n#{e.class.name}: #{e.message}"
114
+ puts e.backtrace.join("\n")
115
+ raise e
116
+ end
117
+ end
118
+
119
+ end
@@ -0,0 +1,135 @@
1
+ require File.dirname(__FILE__) + '/../column_descriptor'
2
+ require File.dirname(__FILE__) + '/../exceptions'
3
+ require File.dirname(__FILE__) + '/../bigrecord_server'
4
+
5
+ module BigRecordDriver
6
+ class CassandraServer < BigRecordServer
7
+ include_class "org.apache.cassandra.service.Cassandra"
8
+ include_class "org.apache.cassandra.service.InvalidRequestException"
9
+ include_class "org.apache.cassandra.service.NotFoundException"
10
+ include_class "org.apache.cassandra.service.UnavailableException"
11
+ include_class "org.apache.cassandra.service.column_t"
12
+ include_class "org.apache.thrift.TException"
13
+ include_class "org.apache.thrift.protocol.TBinaryProtocol"
14
+ include_class "org.apache.thrift.transport.TSocket"
15
+ include_class "org.apache.thrift.transport.TTransport"
16
+
17
+ def configure(config = {})
18
+ config[:adr] ||= 'localhost'
19
+ config[:port] ||= 9160
20
+ @config = config
21
+ init_connection
22
+ end
23
+
24
+ def update(table_name, row, values, timestamp=nil)
25
+ safe_exec do
26
+ return nil unless row
27
+ timestamp = 0 unless timestamp
28
+ values.each do |column, value|
29
+ @cassandraClient.insert(table_name.to_s, row, column, value.to_bytes, timestamp, true)
30
+ end
31
+ row
32
+ end
33
+ end
34
+
35
+ def get(table_name, row, column, options={})
36
+ safe_exec do
37
+ return nil unless row
38
+ # Retreive only the last version by default
39
+ options[:num_versions] ||= 1
40
+
41
+ # validate the arguments
42
+ raise ArgumentError, "num_versions must be >= 1" unless options[:num_versions] >= 1
43
+ begin
44
+ if options[:timestamp]
45
+ raw_data = @cassandraClient.get_columns_since(table_name.to_s, row, column, options[:timestamp])
46
+ else
47
+ raw_data = @cassandraClient.get_column(table_name.to_s, row, column)
48
+ end
49
+ rescue NotFoundException => e2
50
+ puts e2.message
51
+ puts e2.class
52
+ end
53
+ # Return either a single value or an array, depending on the number of version that have been requested
54
+ if options[:timestamp]
55
+ return [] unless raw_data
56
+ max_index = raw_data.length > options[:num_versions] || raw_data.length
57
+ 0..max_index.each do |i|
58
+ arr[i] = Java::String.new(raw_data[i].value).to_s
59
+ end
60
+ arr
61
+ else
62
+ return nil unless raw_data
63
+ Java::String.new(raw_data.value).to_s
64
+ end
65
+ end
66
+ end
67
+
68
+ def get_columns(table_name, row, columns, options={})
69
+ safe_exec do
70
+ return nil unless row
71
+ raise ArgumentError, "timestamp on get_columns is not currently supported with cassandra" if options[:timestamp]
72
+ arr = []
73
+ columns.each_with_index do |col, i|
74
+ begin
75
+ if col[-1,1] == ':'
76
+ arr + @cassandraClient.get_slice(table_name.to_s, row, col, -1, -1).to_a
77
+ else
78
+ arr + @cassandraClient.get_column(table_name.to_s, row, col)
79
+ end
80
+ rescue NotFoundException => e2
81
+ puts e2.message
82
+ puts e2.class
83
+ end
84
+ end
85
+ unless !result or result.isEmpty
86
+ values = {}
87
+ arr.each do |column_t|
88
+ values[column_t.getColumnName.to_s] = Java::String.new(column_t.value).to_s
89
+ end
90
+ values["attribute:id"] = row
91
+ values
92
+ end
93
+
94
+ end
95
+ end
96
+
97
+ ## It's currently impossible to have compliant delete with cassandra,
98
+ ## you would have to do it famiyl by family
99
+ # def delete(table_name, row)
100
+ # safe_exec do
101
+ # table.remove(table_name, row, ??, ??, true)
102
+ # end
103
+ # end
104
+
105
+ def ping
106
+ safe_exec do
107
+ @socket.isOpen
108
+ end
109
+ end
110
+
111
+ def table_names
112
+ safe_exec do
113
+ @cassandraClient.getStringListProperty("tables") #.collect{|td| Java::String.new(td.getName).to_s}
114
+ end
115
+ end
116
+
117
+ def table_exists?(table_name)
118
+ !@cassandraClient.describeTable(table_name.to_s).include?("not found.")
119
+ end
120
+
121
+ private
122
+ def init_connection
123
+ @socket = TSocket.new(@config[:adr], @config[:port]);
124
+ binary_protocol = TBinaryProtocol.new(@socket, false, false);
125
+ @cassandraClient = Cassandra::Client.new(binary_protocol);
126
+ @socket.open;
127
+ end
128
+ end
129
+ end
130
+
131
+ port = ARGV[0]
132
+ port ||= 45000
133
+ DRb.start_service("druby://:#{port}", BigRecordDriver::CassandraServer.new)
134
+ puts "Started drb server on port #{port}."
135
+ DRb.thread.join
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'activesupport'
3
+ require 'set'
4
+ require 'drb'
5
+
6
+ module BigRecordDriver
7
+ class Client
8
+
9
+ def initialize(config={}) # :nodoc:
10
+ config = config.symbolize_keys
11
+ config[:drb_host] ||= '127.0.0.1'
12
+ config[:drb_port] ||= 40000
13
+
14
+ @config = config
15
+
16
+ DRb.start_service('druby://127.0.0.1:0')
17
+ begin
18
+ @server = DRbObject.new(nil, "druby://#{@config[:drb_host]}:#{@config[:drb_port]}")
19
+ rescue DRb::DRbConnError
20
+ raise ConnectionError, "Failed to connect to the DRb server (jruby) " +
21
+ "at #{@config[:drb_host]}:#{@config[:drb_port]}."
22
+ end
23
+ @server.configure(@config)
24
+ end
25
+
26
+ # Delegate the methods to the server
27
+ def method_missing(method, *args)
28
+ @server.send(method, *args)
29
+ end
30
+
31
+ def respond_to?(method)
32
+ super
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,23 @@
1
+ module BigRecordDriver
2
+
3
+ class ColumnDescriptor
4
+
5
+ attr_accessor :name
6
+ attr_accessor :versions
7
+ attr_accessor :in_memory
8
+ attr_accessor :bloom_filter
9
+ attr_accessor :compression
10
+
11
+ def initialize(name, options={})
12
+ raise ArgumentError, "name is mandatory" unless name
13
+
14
+ @name = name.to_s
15
+ @versions = options[:versions]
16
+ @in_memory = options[:in_memory]
17
+ @bloom_filter = options[:bloom_filter]
18
+ @compression = options[:compression]
19
+ end
20
+
21
+ end
22
+
23
+ end
@@ -0,0 +1,34 @@
1
+ module BigRecordDriver
2
+
3
+ class DriverManager
4
+ class << self
5
+
6
+ def set_cmd(db = 'hbase')
7
+ @@CMD = File.dirname(__FILE__) + "/../../bin/#{db}-driver"
8
+ end
9
+ DriverManager.set_cmd
10
+ def start(port = 40005)
11
+ `ruby #{@@CMD} start -p #{port.to_s}`
12
+ end
13
+
14
+ def restart(port = 40005)
15
+ `ruby #{@@CMD} restart -p #{port.to_s}`
16
+ end
17
+
18
+ def stop(port = 40005)
19
+ `ruby #{@@CMD} stop -p #{port.to_s}`
20
+ end
21
+
22
+ def running?(port = 40005)
23
+ status = `ruby #{@@CMD} status -p #{port.to_s}`
24
+ status == "Running.\n"
25
+ end
26
+
27
+ def silent_start(port = 40005)
28
+ start(port) unless running?(port)
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,12 @@
1
+ module BigRecordDriver
2
+ class BigDBError < StandardError
3
+ end
4
+ class TableNotFound < BigDBError
5
+ end
6
+ class TableAlreadyExists < BigDBError
7
+ end
8
+ class JavaError < BigDBError
9
+ end
10
+ class ConnectionError < BigDBError
11
+ end
12
+ end