scbi_mapreduce 0.0.29
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +49 -0
- data/Manifest.txt +46 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +295 -0
- data/Rakefile +28 -0
- data/bin/scbi_mapreduce +52 -0
- data/lib/scbi_mapreduce.rb +15 -0
- data/lib/scbi_mapreduce/error_handler.rb +15 -0
- data/lib/scbi_mapreduce/main_worker.rb +50 -0
- data/lib/scbi_mapreduce/manager.rb +110 -0
- data/lib/scbi_mapreduce/work_manager.rb +405 -0
- data/lib/scbi_mapreduce/worker.rb +163 -0
- data/lib/scbi_mapreduce/worker_launcher.rb +96 -0
- data/lib/scbi_mapreduce/zlib_serializer.rb +32 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/skeleton/dummy_calcs/README.txt +25 -0
- data/skeleton/dummy_calcs/lib/calculations.rb +37 -0
- data/skeleton/dummy_calcs/lib/thread_pool.rb +107 -0
- data/skeleton/dummy_calcs/linear_implementation.rb +22 -0
- data/skeleton/dummy_calcs/main.rb +67 -0
- data/skeleton/dummy_calcs/my_worker.rb +56 -0
- data/skeleton/dummy_calcs/my_worker_manager.rb +52 -0
- data/skeleton/dummy_calcs/threads_implementation.rb +33 -0
- data/skeleton/remove_mids/README.txt +30 -0
- data/skeleton/remove_mids/launch_only_workers.rb +29 -0
- data/skeleton/remove_mids/lib/db/mids.fasta +120 -0
- data/skeleton/remove_mids/lib/find_mids.rb +191 -0
- data/skeleton/remove_mids/lib/global_match.rb +97 -0
- data/skeleton/remove_mids/linear_implementation.rb +87 -0
- data/skeleton/remove_mids/main.rb +89 -0
- data/skeleton/remove_mids/my_worker.rb +59 -0
- data/skeleton/remove_mids/my_worker_manager.rb +68 -0
- data/skeleton/simple/README.txt +16 -0
- data/skeleton/simple/main.rb +41 -0
- data/skeleton/simple/my_worker.rb +53 -0
- data/skeleton/simple/my_worker_manager.rb +55 -0
- data/test/drb_test/main.rb +31 -0
- data/test/drb_test/my_worker.rb +36 -0
- data/test/drb_test/my_worker_manager.rb +41 -0
- data/test/drb_test/scbi_drb_checkpoint +1 -0
- data/test/drb_test/scbi_mapreduce_checkpoint +1 -0
- data/test/test_helper.rb +3 -0
- data/test/test_scbi_drb.rb +11 -0
- metadata +127 -0
@@ -0,0 +1,163 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'eventmachine'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
# require 'error_handler'
|
7
|
+
|
8
|
+
module ScbiMapreduce
|
9
|
+
|
10
|
+
class Worker < EventMachine::Connection
|
11
|
+
include EM::P::ObjectProtocol
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
def receive_initial_config(obj)
|
16
|
+
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def process_object(obj)
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
def starting_worker
|
27
|
+
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
def worker_connected
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
def closing_worker
|
36
|
+
|
37
|
+
|
38
|
+
end
|
39
|
+
######################
|
40
|
+
|
41
|
+
def initialize(*args)
|
42
|
+
super
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
def post_init
|
47
|
+
$WORKER_LOG.info('WORKER CONNECTED')
|
48
|
+
|
49
|
+
worker_connected
|
50
|
+
rescue Exception => e
|
51
|
+
$WORKER_LOG.error("Exiting worker #{@@worker_id} due to exception:\n" + e.message+"\n"+e.backtrace.join("\n"))
|
52
|
+
#raise e
|
53
|
+
end
|
54
|
+
|
55
|
+
def receive_object(obj)
|
56
|
+
|
57
|
+
if @@count < 0
|
58
|
+
@@count += 1
|
59
|
+
# receive initial config
|
60
|
+
if obj != :no_initial_config then
|
61
|
+
receive_initial_config(obj[:initial_config])
|
62
|
+
|
63
|
+
$WORKER_LOG.info('Initial config: received')
|
64
|
+
else
|
65
|
+
$WORKER_LOG.info('Initial config: empty config')
|
66
|
+
end
|
67
|
+
# At first iteration, start worker
|
68
|
+
starting_worker
|
69
|
+
else
|
70
|
+
|
71
|
+
if obj == :quit
|
72
|
+
stop_worker
|
73
|
+
else
|
74
|
+
@@count += 1
|
75
|
+
|
76
|
+
# OJO - HAY QUE PASAR EL MODIFIED OBJECT
|
77
|
+
# operation = proc {
|
78
|
+
# # calculations
|
79
|
+
# obj=process_object(obj)
|
80
|
+
# #puts '.' + obj.seq_name
|
81
|
+
# #return obj
|
82
|
+
# }
|
83
|
+
#
|
84
|
+
# callback = proc { |modified_obj|
|
85
|
+
# send_object(modified_obj)
|
86
|
+
# }
|
87
|
+
#
|
88
|
+
# EventMachine.defer(operation, callback)
|
89
|
+
#send_object(obj)
|
90
|
+
|
91
|
+
|
92
|
+
begin
|
93
|
+
|
94
|
+
modified_data=process_object(obj.data)
|
95
|
+
obj.data = modified_data
|
96
|
+
|
97
|
+
send_object(obj)
|
98
|
+
|
99
|
+
rescue Exception => e
|
100
|
+
$WORKER_LOG.error("Error processing object\n" + e.message + ":\n" + e.backtrace.join("\n"))
|
101
|
+
exception= WorkerError.new('Message',e,@@worker_id,obj)
|
102
|
+
send_object(exception)
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def unbind
|
112
|
+
$WORKER_LOG.info "EXITING WORKER"
|
113
|
+
EventMachine::stop_event_loop
|
114
|
+
end
|
115
|
+
|
116
|
+
def stop_worker
|
117
|
+
close_connection
|
118
|
+
EventMachine::stop_event_loop
|
119
|
+
closing_worker
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.start_worker(worker_id,ip,port,log_file=nil)
|
123
|
+
#puts "NEW WORKER - INIIIIIIIIIIIIIIIIIIIIT #{self}"
|
124
|
+
ip = ip
|
125
|
+
port = port
|
126
|
+
@@count = -1
|
127
|
+
|
128
|
+
@@worker_id=worker_id
|
129
|
+
|
130
|
+
if log_file.nil?
|
131
|
+
log_file = 'logs/worker'+worker_id+'_'+`hostname`.chomp+'_log.txt'
|
132
|
+
end
|
133
|
+
|
134
|
+
FileUtils.mkdir_p(File.dirname(log_file)) if ((log_file!=STDOUT) && (!File.exists?(File.dirname(log_file))))
|
135
|
+
|
136
|
+
$WORKER_LOG = Logger.new(log_file)
|
137
|
+
$WORKER_LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
138
|
+
|
139
|
+
$LOG = $WORKER_LOG
|
140
|
+
|
141
|
+
total_seconds = Time.now
|
142
|
+
|
143
|
+
EM.error_handler{ |e|
|
144
|
+
$WORKER_LOG.error(e.message + ' => ' + e.backtrace.join("\n"))
|
145
|
+
}
|
146
|
+
|
147
|
+
EventMachine::run {
|
148
|
+
|
149
|
+
EventMachine::connect ip, port, self
|
150
|
+
$WORKER_LOG.info "Worker connected to #{ip}:#{port}"
|
151
|
+
|
152
|
+
}
|
153
|
+
|
154
|
+
total_seconds = Time.now-total_seconds
|
155
|
+
$WORKER_LOG.info "Client #{@@worker_id} processed: #{@@count} objs"
|
156
|
+
$WORKER_LOG.info "Client #{@@worker_id} proc rate: #{@@count/total_seconds.to_f} objects/seg"
|
157
|
+
|
158
|
+
end
|
159
|
+
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module ScbiMapreduce
|
2
|
+
|
3
|
+
INTERPRETER='ruby'
|
4
|
+
|
5
|
+
class WorkerLauncher
|
6
|
+
|
7
|
+
attr_accessor :server_ip, :server_port
|
8
|
+
|
9
|
+
def initialize(server_ip,server_port,workers, worker_file, log_file=nil, init_env_file=nil)
|
10
|
+
@server_ip = server_ip
|
11
|
+
@server_port = server_port
|
12
|
+
@worker_file = worker_file
|
13
|
+
@workers=workers
|
14
|
+
@init_env_file=init_env_file
|
15
|
+
|
16
|
+
|
17
|
+
if log_file.nil?
|
18
|
+
|
19
|
+
log_file = 'logs/launcher_log.txt'
|
20
|
+
end
|
21
|
+
|
22
|
+
FileUtils.mkdir_p(File.dirname(log_file)) if ((log_file!=STDOUT) && (!File.exists?(File.dirname(log_file))))
|
23
|
+
|
24
|
+
|
25
|
+
$LAUNCHER_LOG = Logger.new(log_file)
|
26
|
+
|
27
|
+
$LAUNCHER_LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
28
|
+
end
|
29
|
+
|
30
|
+
def launch_workers_and_wait
|
31
|
+
launch_workers
|
32
|
+
Process.waitall
|
33
|
+
end
|
34
|
+
|
35
|
+
def launch_workers
|
36
|
+
if @workers > 0
|
37
|
+
$LAUNCHER_LOG.info "Connecting #{@workers} local workers to #{@server_ip}:#{@server_port}"
|
38
|
+
threads = []
|
39
|
+
@workers.times do |i|
|
40
|
+
pid=fork{
|
41
|
+
launch_worker(i,@server_ip,@server_port)
|
42
|
+
$LAUNCHER_LOG.info "Worker #{i} launched [#{@server_ip}:#{@server_port}]"
|
43
|
+
}
|
44
|
+
|
45
|
+
#threads.each { |aThread| aThread.join }
|
46
|
+
end
|
47
|
+
#Process.waitall
|
48
|
+
$LAUNCHER_LOG.info "All workers launched"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# override this
|
53
|
+
def launch_worker(worker_id, server_ip, server_port)
|
54
|
+
|
55
|
+
cmd = "#{INTERPRETER} #{File.join(File.dirname(__FILE__),'main_worker.rb')} #{worker_id.to_s} #{server_ip} #{server_port} #{@worker_file}"
|
56
|
+
puts cmd
|
57
|
+
exec(cmd)
|
58
|
+
end
|
59
|
+
|
60
|
+
def launch_external_workers(workers)
|
61
|
+
puts "Launching #{workers.count} external workers: #{workers}"
|
62
|
+
worker_id=0
|
63
|
+
init=''
|
64
|
+
if @init_env_file
|
65
|
+
path = File.expand_path(@init_env_file)
|
66
|
+
# path = File.join($ROOT_PATH)
|
67
|
+
# puts "init_env file: #{path}"
|
68
|
+
if File.exists?(path)
|
69
|
+
puts "File #{path} exists, using it"
|
70
|
+
init=". #{path}; "
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
pwd=`pwd`.chomp
|
75
|
+
|
76
|
+
cd =''
|
77
|
+
|
78
|
+
if File.exists?(pwd)
|
79
|
+
cd = "cd #{pwd}; "
|
80
|
+
end
|
81
|
+
|
82
|
+
workers.each do |machine|
|
83
|
+
|
84
|
+
cmd = "ssh #{machine} \"#{init} #{cd} #{INTERPRETER} #{File.join(File.dirname(__FILE__),'main_worker.rb')} #{worker_id.to_s} #{@server_ip} #{@server_port} #{@worker_file}\""
|
85
|
+
$LAUNCHER_LOG.info cmd
|
86
|
+
|
87
|
+
pid=fork{
|
88
|
+
exec(cmd)
|
89
|
+
}
|
90
|
+
|
91
|
+
worker_id+=1
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
# A serializer class that provides compression
|
5
|
+
#
|
6
|
+
# To use this instead of the default Marshal serializer, redefine the serializer method in your worker and worker_manager as this:
|
7
|
+
#
|
8
|
+
# def serializer
|
9
|
+
# ZlibSerializer
|
10
|
+
#
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
|
14
|
+
class ZlibSerializer
|
15
|
+
|
16
|
+
def self.dump(data)
|
17
|
+
input=Marshal.dump(data)
|
18
|
+
zipper = Zlib::Deflate.new(Zlib::BEST_COMPRESSION,15,9)
|
19
|
+
res= zipper.deflate(input, Zlib::FINISH)
|
20
|
+
zipper.close
|
21
|
+
|
22
|
+
return res
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.load(input)
|
26
|
+
unzipper = Zlib::Inflate.new(15)
|
27
|
+
res= unzipper.inflate(input)
|
28
|
+
unzipper.close
|
29
|
+
|
30
|
+
return Marshal.load(res)
|
31
|
+
end
|
32
|
+
end
|
data/script/console
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
libs = " -r irb/completion"
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/scbi_mapreduce.rb'}"
|
9
|
+
puts "Loading scbi_mapreduce gem"
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Comparison of workers with scbi_mapreduce vs ruby-threads
|
2
|
+
=========================================================
|
3
|
+
|
4
|
+
This application is only useful for testing. You can modify the files
|
5
|
+
to perform other tasks. There are other templates available, you
|
6
|
+
can list them by issuing this command:
|
7
|
+
|
8
|
+
scbi_mapreduce
|
9
|
+
|
10
|
+
You can launch the tests application right now with the following command:
|
11
|
+
|
12
|
+
time ruby main.rb
|
13
|
+
|
14
|
+
|
15
|
+
This launches 4 workers that do some simple calculations (only to keep busy
|
16
|
+
the processor), to demonstrate the gain speed agains threads. 4 workers are
|
17
|
+
used for a quad-core processor. Adjust it accordingly to your processor cores.
|
18
|
+
|
19
|
+
|
20
|
+
To launch the threaded version of the application, you can do:
|
21
|
+
|
22
|
+
time ruby threads_implementation.rb
|
23
|
+
|
24
|
+
You can compare the two times obtained. Threaded version will last the same with 1 thread or with 100.
|
25
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Calculations
|
2
|
+
|
3
|
+
|
4
|
+
def do_dummy_calculations
|
5
|
+
t=Time.now
|
6
|
+
x=0
|
7
|
+
20000000.times do |i|
|
8
|
+
x+=1
|
9
|
+
end
|
10
|
+
puts Time.now-t
|
11
|
+
end
|
12
|
+
|
13
|
+
def do_dummy_calculations2
|
14
|
+
numer_of_calcs=250000
|
15
|
+
|
16
|
+
# t=Time.now
|
17
|
+
|
18
|
+
x1=1
|
19
|
+
x2=1
|
20
|
+
|
21
|
+
# do a loop with calculations
|
22
|
+
numer_of_calcs.times do |i|
|
23
|
+
x=x1+x2
|
24
|
+
|
25
|
+
x1=x2
|
26
|
+
x2=x
|
27
|
+
|
28
|
+
# puts some info at regular intervals
|
29
|
+
# if (i % 100000)==0
|
30
|
+
# puts "Calculated #{i}"
|
31
|
+
# end
|
32
|
+
end
|
33
|
+
# puts Time.now-t
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require "thread.rb"
|
2
|
+
|
3
|
+
######################################
|
4
|
+
# This class creates a thread's pool
|
5
|
+
######################################
|
6
|
+
|
7
|
+
class ThreadPool
|
8
|
+
class Worker
|
9
|
+
@@count=0
|
10
|
+
def initialize
|
11
|
+
|
12
|
+
@identifier = @@count
|
13
|
+
@@count+=1
|
14
|
+
|
15
|
+
Thread.abort_on_exception = true
|
16
|
+
@mutex = Mutex.new
|
17
|
+
@thread = Thread.new do
|
18
|
+
while true
|
19
|
+
sleep 0.001
|
20
|
+
block = get_block
|
21
|
+
if block
|
22
|
+
begin
|
23
|
+
block.call
|
24
|
+
rescue Exception => e
|
25
|
+
puts "In thread: " + e.message
|
26
|
+
raise e
|
27
|
+
end
|
28
|
+
|
29
|
+
reset_block
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_block
|
36
|
+
@mutex.synchronize {@block}
|
37
|
+
end
|
38
|
+
|
39
|
+
def set_block(block)
|
40
|
+
# puts "set block #{@identifier}"
|
41
|
+
@mutex.synchronize do
|
42
|
+
raise RuntimeError, "Thread already busy." if @block
|
43
|
+
@block = block
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def reset_block
|
48
|
+
@mutex.synchronize {@block = nil}
|
49
|
+
end
|
50
|
+
|
51
|
+
def busy?
|
52
|
+
@mutex.synchronize {!@block.nil?}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
attr_accessor :max_size
|
57
|
+
attr_reader :workers
|
58
|
+
|
59
|
+
# Defines the max number of threads that will be able to exist
|
60
|
+
def initialize(max_size = 10)
|
61
|
+
@max_size = max_size
|
62
|
+
@workers = []
|
63
|
+
@mutex = Mutex.new
|
64
|
+
end
|
65
|
+
|
66
|
+
def size
|
67
|
+
@mutex.synchronize {@workers.size}
|
68
|
+
end
|
69
|
+
|
70
|
+
def busy?
|
71
|
+
@mutex.synchronize {@workers.any? {|w| w.busy?}}
|
72
|
+
end
|
73
|
+
|
74
|
+
#Allows that main program doesn't finish until the thread have been executed
|
75
|
+
def join
|
76
|
+
sleep 0.01 while busy?
|
77
|
+
end
|
78
|
+
|
79
|
+
# Begin the block's processing. After using this method, will call to "join"
|
80
|
+
def process(&block)
|
81
|
+
wait_for_worker.set_block(block)
|
82
|
+
end
|
83
|
+
|
84
|
+
def wait_for_worker
|
85
|
+
while true
|
86
|
+
worker = find_available_worker
|
87
|
+
return worker if worker
|
88
|
+
sleep 0.01
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def find_available_worker
|
93
|
+
@mutex.synchronize {free_worker || create_worker}
|
94
|
+
end
|
95
|
+
|
96
|
+
def free_worker
|
97
|
+
@workers.each {|w| return w unless w.busy?}; nil
|
98
|
+
end
|
99
|
+
|
100
|
+
def create_worker
|
101
|
+
return nil if @workers.size >= @max_size
|
102
|
+
worker = Worker.new
|
103
|
+
@workers << worker
|
104
|
+
worker
|
105
|
+
end
|
106
|
+
private :wait_for_worker , :find_available_worker , :free_worker , :create_worker
|
107
|
+
end
|