scbi_mapreduce 0.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +49 -0
- data/Manifest.txt +46 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +295 -0
- data/Rakefile +28 -0
- data/bin/scbi_mapreduce +52 -0
- data/lib/scbi_mapreduce.rb +15 -0
- data/lib/scbi_mapreduce/error_handler.rb +15 -0
- data/lib/scbi_mapreduce/main_worker.rb +50 -0
- data/lib/scbi_mapreduce/manager.rb +110 -0
- data/lib/scbi_mapreduce/work_manager.rb +405 -0
- data/lib/scbi_mapreduce/worker.rb +163 -0
- data/lib/scbi_mapreduce/worker_launcher.rb +96 -0
- data/lib/scbi_mapreduce/zlib_serializer.rb +32 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/skeleton/dummy_calcs/README.txt +25 -0
- data/skeleton/dummy_calcs/lib/calculations.rb +37 -0
- data/skeleton/dummy_calcs/lib/thread_pool.rb +107 -0
- data/skeleton/dummy_calcs/linear_implementation.rb +22 -0
- data/skeleton/dummy_calcs/main.rb +67 -0
- data/skeleton/dummy_calcs/my_worker.rb +56 -0
- data/skeleton/dummy_calcs/my_worker_manager.rb +52 -0
- data/skeleton/dummy_calcs/threads_implementation.rb +33 -0
- data/skeleton/remove_mids/README.txt +30 -0
- data/skeleton/remove_mids/launch_only_workers.rb +29 -0
- data/skeleton/remove_mids/lib/db/mids.fasta +120 -0
- data/skeleton/remove_mids/lib/find_mids.rb +191 -0
- data/skeleton/remove_mids/lib/global_match.rb +97 -0
- data/skeleton/remove_mids/linear_implementation.rb +87 -0
- data/skeleton/remove_mids/main.rb +89 -0
- data/skeleton/remove_mids/my_worker.rb +59 -0
- data/skeleton/remove_mids/my_worker_manager.rb +68 -0
- data/skeleton/simple/README.txt +16 -0
- data/skeleton/simple/main.rb +41 -0
- data/skeleton/simple/my_worker.rb +53 -0
- data/skeleton/simple/my_worker_manager.rb +55 -0
- data/test/drb_test/main.rb +31 -0
- data/test/drb_test/my_worker.rb +36 -0
- data/test/drb_test/my_worker_manager.rb +41 -0
- data/test/drb_test/scbi_drb_checkpoint +1 -0
- data/test/drb_test/scbi_mapreduce_checkpoint +1 -0
- data/test/test_helper.rb +3 -0
- data/test/test_scbi_drb.rb +11 -0
- metadata +127 -0
@@ -0,0 +1,163 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'eventmachine'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
# require 'error_handler'
|
7
|
+
|
8
|
+
module ScbiMapreduce
|
9
|
+
|
10
|
+
class Worker < EventMachine::Connection
|
11
|
+
include EM::P::ObjectProtocol
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
def receive_initial_config(obj)
|
16
|
+
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def process_object(obj)
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
def starting_worker
|
27
|
+
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
def worker_connected
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
def closing_worker
|
36
|
+
|
37
|
+
|
38
|
+
end
|
39
|
+
######################
|
40
|
+
|
41
|
+
def initialize(*args)
|
42
|
+
super
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
def post_init
|
47
|
+
$WORKER_LOG.info('WORKER CONNECTED')
|
48
|
+
|
49
|
+
worker_connected
|
50
|
+
rescue Exception => e
|
51
|
+
$WORKER_LOG.error("Exiting worker #{@@worker_id} due to exception:\n" + e.message+"\n"+e.backtrace.join("\n"))
|
52
|
+
#raise e
|
53
|
+
end
|
54
|
+
|
55
|
+
def receive_object(obj)
|
56
|
+
|
57
|
+
if @@count < 0
|
58
|
+
@@count += 1
|
59
|
+
# receive initial config
|
60
|
+
if obj != :no_initial_config then
|
61
|
+
receive_initial_config(obj[:initial_config])
|
62
|
+
|
63
|
+
$WORKER_LOG.info('Initial config: received')
|
64
|
+
else
|
65
|
+
$WORKER_LOG.info('Initial config: empty config')
|
66
|
+
end
|
67
|
+
# At first iteration, start worker
|
68
|
+
starting_worker
|
69
|
+
else
|
70
|
+
|
71
|
+
if obj == :quit
|
72
|
+
stop_worker
|
73
|
+
else
|
74
|
+
@@count += 1
|
75
|
+
|
76
|
+
# OJO - HAY QUE PASAR EL MODIFIED OBJECT
|
77
|
+
# operation = proc {
|
78
|
+
# # calculations
|
79
|
+
# obj=process_object(obj)
|
80
|
+
# #puts '.' + obj.seq_name
|
81
|
+
# #return obj
|
82
|
+
# }
|
83
|
+
#
|
84
|
+
# callback = proc { |modified_obj|
|
85
|
+
# send_object(modified_obj)
|
86
|
+
# }
|
87
|
+
#
|
88
|
+
# EventMachine.defer(operation, callback)
|
89
|
+
#send_object(obj)
|
90
|
+
|
91
|
+
|
92
|
+
begin
|
93
|
+
|
94
|
+
modified_data=process_object(obj.data)
|
95
|
+
obj.data = modified_data
|
96
|
+
|
97
|
+
send_object(obj)
|
98
|
+
|
99
|
+
rescue Exception => e
|
100
|
+
$WORKER_LOG.error("Error processing object\n" + e.message + ":\n" + e.backtrace.join("\n"))
|
101
|
+
exception= WorkerError.new('Message',e,@@worker_id,obj)
|
102
|
+
send_object(exception)
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def unbind
|
112
|
+
$WORKER_LOG.info "EXITING WORKER"
|
113
|
+
EventMachine::stop_event_loop
|
114
|
+
end
|
115
|
+
|
116
|
+
def stop_worker
|
117
|
+
close_connection
|
118
|
+
EventMachine::stop_event_loop
|
119
|
+
closing_worker
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.start_worker(worker_id,ip,port,log_file=nil)
|
123
|
+
#puts "NEW WORKER - INIIIIIIIIIIIIIIIIIIIIT #{self}"
|
124
|
+
ip = ip
|
125
|
+
port = port
|
126
|
+
@@count = -1
|
127
|
+
|
128
|
+
@@worker_id=worker_id
|
129
|
+
|
130
|
+
if log_file.nil?
|
131
|
+
log_file = 'logs/worker'+worker_id+'_'+`hostname`.chomp+'_log.txt'
|
132
|
+
end
|
133
|
+
|
134
|
+
FileUtils.mkdir_p(File.dirname(log_file)) if ((log_file!=STDOUT) && (!File.exists?(File.dirname(log_file))))
|
135
|
+
|
136
|
+
$WORKER_LOG = Logger.new(log_file)
|
137
|
+
$WORKER_LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
138
|
+
|
139
|
+
$LOG = $WORKER_LOG
|
140
|
+
|
141
|
+
total_seconds = Time.now
|
142
|
+
|
143
|
+
EM.error_handler{ |e|
|
144
|
+
$WORKER_LOG.error(e.message + ' => ' + e.backtrace.join("\n"))
|
145
|
+
}
|
146
|
+
|
147
|
+
EventMachine::run {
|
148
|
+
|
149
|
+
EventMachine::connect ip, port, self
|
150
|
+
$WORKER_LOG.info "Worker connected to #{ip}:#{port}"
|
151
|
+
|
152
|
+
}
|
153
|
+
|
154
|
+
total_seconds = Time.now-total_seconds
|
155
|
+
$WORKER_LOG.info "Client #{@@worker_id} processed: #{@@count} objs"
|
156
|
+
$WORKER_LOG.info "Client #{@@worker_id} proc rate: #{@@count/total_seconds.to_f} objects/seg"
|
157
|
+
|
158
|
+
end
|
159
|
+
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module ScbiMapreduce
|
2
|
+
|
3
|
+
INTERPRETER='ruby'
|
4
|
+
|
5
|
+
class WorkerLauncher
|
6
|
+
|
7
|
+
attr_accessor :server_ip, :server_port
|
8
|
+
|
9
|
+
def initialize(server_ip,server_port,workers, worker_file, log_file=nil, init_env_file=nil)
|
10
|
+
@server_ip = server_ip
|
11
|
+
@server_port = server_port
|
12
|
+
@worker_file = worker_file
|
13
|
+
@workers=workers
|
14
|
+
@init_env_file=init_env_file
|
15
|
+
|
16
|
+
|
17
|
+
if log_file.nil?
|
18
|
+
|
19
|
+
log_file = 'logs/launcher_log.txt'
|
20
|
+
end
|
21
|
+
|
22
|
+
FileUtils.mkdir_p(File.dirname(log_file)) if ((log_file!=STDOUT) && (!File.exists?(File.dirname(log_file))))
|
23
|
+
|
24
|
+
|
25
|
+
$LAUNCHER_LOG = Logger.new(log_file)
|
26
|
+
|
27
|
+
$LAUNCHER_LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
28
|
+
end
|
29
|
+
|
30
|
+
def launch_workers_and_wait
|
31
|
+
launch_workers
|
32
|
+
Process.waitall
|
33
|
+
end
|
34
|
+
|
35
|
+
def launch_workers
|
36
|
+
if @workers > 0
|
37
|
+
$LAUNCHER_LOG.info "Connecting #{@workers} local workers to #{@server_ip}:#{@server_port}"
|
38
|
+
threads = []
|
39
|
+
@workers.times do |i|
|
40
|
+
pid=fork{
|
41
|
+
launch_worker(i,@server_ip,@server_port)
|
42
|
+
$LAUNCHER_LOG.info "Worker #{i} launched [#{@server_ip}:#{@server_port}]"
|
43
|
+
}
|
44
|
+
|
45
|
+
#threads.each { |aThread| aThread.join }
|
46
|
+
end
|
47
|
+
#Process.waitall
|
48
|
+
$LAUNCHER_LOG.info "All workers launched"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# override this
|
53
|
+
def launch_worker(worker_id, server_ip, server_port)
|
54
|
+
|
55
|
+
cmd = "#{INTERPRETER} #{File.join(File.dirname(__FILE__),'main_worker.rb')} #{worker_id.to_s} #{server_ip} #{server_port} #{@worker_file}"
|
56
|
+
puts cmd
|
57
|
+
exec(cmd)
|
58
|
+
end
|
59
|
+
|
60
|
+
def launch_external_workers(workers)
|
61
|
+
puts "Launching #{workers.count} external workers: #{workers}"
|
62
|
+
worker_id=0
|
63
|
+
init=''
|
64
|
+
if @init_env_file
|
65
|
+
path = File.expand_path(@init_env_file)
|
66
|
+
# path = File.join($ROOT_PATH)
|
67
|
+
# puts "init_env file: #{path}"
|
68
|
+
if File.exists?(path)
|
69
|
+
puts "File #{path} exists, using it"
|
70
|
+
init=". #{path}; "
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
pwd=`pwd`.chomp
|
75
|
+
|
76
|
+
cd =''
|
77
|
+
|
78
|
+
if File.exists?(pwd)
|
79
|
+
cd = "cd #{pwd}; "
|
80
|
+
end
|
81
|
+
|
82
|
+
workers.each do |machine|
|
83
|
+
|
84
|
+
cmd = "ssh #{machine} \"#{init} #{cd} #{INTERPRETER} #{File.join(File.dirname(__FILE__),'main_worker.rb')} #{worker_id.to_s} #{@server_ip} #{@server_port} #{@worker_file}\""
|
85
|
+
$LAUNCHER_LOG.info cmd
|
86
|
+
|
87
|
+
pid=fork{
|
88
|
+
exec(cmd)
|
89
|
+
}
|
90
|
+
|
91
|
+
worker_id+=1
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
# A serializer class that provides compression
|
5
|
+
#
|
6
|
+
# To use this instead of the default Marshal serializer, redefine the serializer method in your worker and worker_manager as this:
|
7
|
+
#
|
8
|
+
# def serializer
|
9
|
+
# ZlibSerializer
|
10
|
+
#
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
|
14
|
+
class ZlibSerializer
|
15
|
+
|
16
|
+
def self.dump(data)
|
17
|
+
input=Marshal.dump(data)
|
18
|
+
zipper = Zlib::Deflate.new(Zlib::BEST_COMPRESSION,15,9)
|
19
|
+
res= zipper.deflate(input, Zlib::FINISH)
|
20
|
+
zipper.close
|
21
|
+
|
22
|
+
return res
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.load(input)
|
26
|
+
unzipper = Zlib::Inflate.new(15)
|
27
|
+
res= unzipper.inflate(input)
|
28
|
+
unzipper.close
|
29
|
+
|
30
|
+
return Marshal.load(res)
|
31
|
+
end
|
32
|
+
end
|
data/script/console
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
libs = " -r irb/completion"
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/scbi_mapreduce.rb'}"
|
9
|
+
puts "Loading scbi_mapreduce gem"
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Comparison of workers with scbi_mapreduce vs ruby-threads
|
2
|
+
=========================================================
|
3
|
+
|
4
|
+
This application is only useful for testing. You can modify the files
|
5
|
+
to perform other tasks. There are other templates available, you
|
6
|
+
can list them by issuing this command:
|
7
|
+
|
8
|
+
scbi_mapreduce
|
9
|
+
|
10
|
+
You can launch the tests application right now with the following command:
|
11
|
+
|
12
|
+
time ruby main.rb
|
13
|
+
|
14
|
+
|
15
|
+
This launches 4 workers that do some simple calculations (only to keep busy
|
16
|
+
the processor), to demonstrate the gain speed agains threads. 4 workers are
|
17
|
+
used for a quad-core processor. Adjust it accordingly to your processor cores.
|
18
|
+
|
19
|
+
|
20
|
+
To launch the threaded version of the application, you can do:
|
21
|
+
|
22
|
+
time ruby threads_implementation.rb
|
23
|
+
|
24
|
+
You can compare the two times obtained. Threaded version will last the same with 1 thread or with 100.
|
25
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Calculations
|
2
|
+
|
3
|
+
|
4
|
+
def do_dummy_calculations
|
5
|
+
t=Time.now
|
6
|
+
x=0
|
7
|
+
20000000.times do |i|
|
8
|
+
x+=1
|
9
|
+
end
|
10
|
+
puts Time.now-t
|
11
|
+
end
|
12
|
+
|
13
|
+
def do_dummy_calculations2
|
14
|
+
numer_of_calcs=250000
|
15
|
+
|
16
|
+
# t=Time.now
|
17
|
+
|
18
|
+
x1=1
|
19
|
+
x2=1
|
20
|
+
|
21
|
+
# do a loop with calculations
|
22
|
+
numer_of_calcs.times do |i|
|
23
|
+
x=x1+x2
|
24
|
+
|
25
|
+
x1=x2
|
26
|
+
x2=x
|
27
|
+
|
28
|
+
# puts some info at regular intervals
|
29
|
+
# if (i % 100000)==0
|
30
|
+
# puts "Calculated #{i}"
|
31
|
+
# end
|
32
|
+
end
|
33
|
+
# puts Time.now-t
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require "thread.rb"
|
2
|
+
|
3
|
+
######################################
|
4
|
+
# This class creates a thread's pool
|
5
|
+
######################################
|
6
|
+
|
7
|
+
class ThreadPool
|
8
|
+
class Worker
|
9
|
+
@@count=0
|
10
|
+
def initialize
|
11
|
+
|
12
|
+
@identifier = @@count
|
13
|
+
@@count+=1
|
14
|
+
|
15
|
+
Thread.abort_on_exception = true
|
16
|
+
@mutex = Mutex.new
|
17
|
+
@thread = Thread.new do
|
18
|
+
while true
|
19
|
+
sleep 0.001
|
20
|
+
block = get_block
|
21
|
+
if block
|
22
|
+
begin
|
23
|
+
block.call
|
24
|
+
rescue Exception => e
|
25
|
+
puts "In thread: " + e.message
|
26
|
+
raise e
|
27
|
+
end
|
28
|
+
|
29
|
+
reset_block
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_block
|
36
|
+
@mutex.synchronize {@block}
|
37
|
+
end
|
38
|
+
|
39
|
+
def set_block(block)
|
40
|
+
# puts "set block #{@identifier}"
|
41
|
+
@mutex.synchronize do
|
42
|
+
raise RuntimeError, "Thread already busy." if @block
|
43
|
+
@block = block
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def reset_block
|
48
|
+
@mutex.synchronize {@block = nil}
|
49
|
+
end
|
50
|
+
|
51
|
+
def busy?
|
52
|
+
@mutex.synchronize {!@block.nil?}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
attr_accessor :max_size
|
57
|
+
attr_reader :workers
|
58
|
+
|
59
|
+
# Defines the max number of threads that will be able to exist
|
60
|
+
def initialize(max_size = 10)
|
61
|
+
@max_size = max_size
|
62
|
+
@workers = []
|
63
|
+
@mutex = Mutex.new
|
64
|
+
end
|
65
|
+
|
66
|
+
def size
|
67
|
+
@mutex.synchronize {@workers.size}
|
68
|
+
end
|
69
|
+
|
70
|
+
def busy?
|
71
|
+
@mutex.synchronize {@workers.any? {|w| w.busy?}}
|
72
|
+
end
|
73
|
+
|
74
|
+
#Allows that main program doesn't finish until the thread have been executed
|
75
|
+
def join
|
76
|
+
sleep 0.01 while busy?
|
77
|
+
end
|
78
|
+
|
79
|
+
# Begin the block's processing. After using this method, will call to "join"
|
80
|
+
def process(&block)
|
81
|
+
wait_for_worker.set_block(block)
|
82
|
+
end
|
83
|
+
|
84
|
+
def wait_for_worker
|
85
|
+
while true
|
86
|
+
worker = find_available_worker
|
87
|
+
return worker if worker
|
88
|
+
sleep 0.01
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def find_available_worker
|
93
|
+
@mutex.synchronize {free_worker || create_worker}
|
94
|
+
end
|
95
|
+
|
96
|
+
def free_worker
|
97
|
+
@workers.each {|w| return w unless w.busy?}; nil
|
98
|
+
end
|
99
|
+
|
100
|
+
def create_worker
|
101
|
+
return nil if @workers.size >= @max_size
|
102
|
+
worker = Worker.new
|
103
|
+
@workers << worker
|
104
|
+
worker
|
105
|
+
end
|
106
|
+
private :wait_for_worker , :find_available_worker , :free_worker , :create_worker
|
107
|
+
end
|