scbi_mapreduce 0.0.29

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/History.txt +49 -0
  2. data/Manifest.txt +46 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +295 -0
  5. data/Rakefile +28 -0
  6. data/bin/scbi_mapreduce +52 -0
  7. data/lib/scbi_mapreduce.rb +15 -0
  8. data/lib/scbi_mapreduce/error_handler.rb +15 -0
  9. data/lib/scbi_mapreduce/main_worker.rb +50 -0
  10. data/lib/scbi_mapreduce/manager.rb +110 -0
  11. data/lib/scbi_mapreduce/work_manager.rb +405 -0
  12. data/lib/scbi_mapreduce/worker.rb +163 -0
  13. data/lib/scbi_mapreduce/worker_launcher.rb +96 -0
  14. data/lib/scbi_mapreduce/zlib_serializer.rb +32 -0
  15. data/script/console +10 -0
  16. data/script/destroy +14 -0
  17. data/script/generate +14 -0
  18. data/skeleton/dummy_calcs/README.txt +25 -0
  19. data/skeleton/dummy_calcs/lib/calculations.rb +37 -0
  20. data/skeleton/dummy_calcs/lib/thread_pool.rb +107 -0
  21. data/skeleton/dummy_calcs/linear_implementation.rb +22 -0
  22. data/skeleton/dummy_calcs/main.rb +67 -0
  23. data/skeleton/dummy_calcs/my_worker.rb +56 -0
  24. data/skeleton/dummy_calcs/my_worker_manager.rb +52 -0
  25. data/skeleton/dummy_calcs/threads_implementation.rb +33 -0
  26. data/skeleton/remove_mids/README.txt +30 -0
  27. data/skeleton/remove_mids/launch_only_workers.rb +29 -0
  28. data/skeleton/remove_mids/lib/db/mids.fasta +120 -0
  29. data/skeleton/remove_mids/lib/find_mids.rb +191 -0
  30. data/skeleton/remove_mids/lib/global_match.rb +97 -0
  31. data/skeleton/remove_mids/linear_implementation.rb +87 -0
  32. data/skeleton/remove_mids/main.rb +89 -0
  33. data/skeleton/remove_mids/my_worker.rb +59 -0
  34. data/skeleton/remove_mids/my_worker_manager.rb +68 -0
  35. data/skeleton/simple/README.txt +16 -0
  36. data/skeleton/simple/main.rb +41 -0
  37. data/skeleton/simple/my_worker.rb +53 -0
  38. data/skeleton/simple/my_worker_manager.rb +55 -0
  39. data/test/drb_test/main.rb +31 -0
  40. data/test/drb_test/my_worker.rb +36 -0
  41. data/test/drb_test/my_worker_manager.rb +41 -0
  42. data/test/drb_test/scbi_drb_checkpoint +1 -0
  43. data/test/drb_test/scbi_mapreduce_checkpoint +1 -0
  44. data/test/test_helper.rb +3 -0
  45. data/test/test_scbi_drb.rb +11 -0
  46. metadata +127 -0
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'eventmachine'
4
+ require 'logger'
5
+
6
+ # require 'error_handler'
7
+
8
+ module ScbiMapreduce
9
+
10
+ class Worker < EventMachine::Connection
11
+ include EM::P::ObjectProtocol
12
+
13
+
14
+
15
+ def receive_initial_config(obj)
16
+
17
+
18
+ end
19
+
20
+
21
+ def process_object(obj)
22
+
23
+ end
24
+
25
+
26
+ def starting_worker
27
+
28
+
29
+ end
30
+
31
+ def worker_connected
32
+
33
+ end
34
+
35
+ def closing_worker
36
+
37
+
38
+ end
39
+ ######################
40
+
41
+ def initialize(*args)
42
+ super
43
+
44
+ end
45
+
46
+ def post_init
47
+ $WORKER_LOG.info('WORKER CONNECTED')
48
+
49
+ worker_connected
50
+ rescue Exception => e
51
+ $WORKER_LOG.error("Exiting worker #{@@worker_id} due to exception:\n" + e.message+"\n"+e.backtrace.join("\n"))
52
+ #raise e
53
+ end
54
+
55
+ def receive_object(obj)
56
+
57
+ if @@count < 0
58
+ @@count += 1
59
+ # receive initial config
60
+ if obj != :no_initial_config then
61
+ receive_initial_config(obj[:initial_config])
62
+
63
+ $WORKER_LOG.info('Initial config: received')
64
+ else
65
+ $WORKER_LOG.info('Initial config: empty config')
66
+ end
67
+ # At first iteration, start worker
68
+ starting_worker
69
+ else
70
+
71
+ if obj == :quit
72
+ stop_worker
73
+ else
74
+ @@count += 1
75
+
76
+ # OJO - HAY QUE PASAR EL MODIFIED OBJECT
77
+ # operation = proc {
78
+ # # calculations
79
+ # obj=process_object(obj)
80
+ # #puts '.' + obj.seq_name
81
+ # #return obj
82
+ # }
83
+ #
84
+ # callback = proc { |modified_obj|
85
+ # send_object(modified_obj)
86
+ # }
87
+ #
88
+ # EventMachine.defer(operation, callback)
89
+ #send_object(obj)
90
+
91
+
92
+ begin
93
+
94
+ modified_data=process_object(obj.data)
95
+ obj.data = modified_data
96
+
97
+ send_object(obj)
98
+
99
+ rescue Exception => e
100
+ $WORKER_LOG.error("Error processing object\n" + e.message + ":\n" + e.backtrace.join("\n"))
101
+ exception= WorkerError.new('Message',e,@@worker_id,obj)
102
+ send_object(exception)
103
+
104
+ end
105
+
106
+
107
+ end
108
+ end
109
+ end
110
+
111
+ def unbind
112
+ $WORKER_LOG.info "EXITING WORKER"
113
+ EventMachine::stop_event_loop
114
+ end
115
+
116
+ def stop_worker
117
+ close_connection
118
+ EventMachine::stop_event_loop
119
+ closing_worker
120
+ end
121
+
122
+ def self.start_worker(worker_id,ip,port,log_file=nil)
123
+ #puts "NEW WORKER - INIIIIIIIIIIIIIIIIIIIIT #{self}"
124
+ ip = ip
125
+ port = port
126
+ @@count = -1
127
+
128
+ @@worker_id=worker_id
129
+
130
+ if log_file.nil?
131
+ log_file = 'logs/worker'+worker_id+'_'+`hostname`.chomp+'_log.txt'
132
+ end
133
+
134
+ FileUtils.mkdir_p(File.dirname(log_file)) if ((log_file!=STDOUT) && (!File.exists?(File.dirname(log_file))))
135
+
136
+ $WORKER_LOG = Logger.new(log_file)
137
+ $WORKER_LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
138
+
139
+ $LOG = $WORKER_LOG
140
+
141
+ total_seconds = Time.now
142
+
143
+ EM.error_handler{ |e|
144
+ $WORKER_LOG.error(e.message + ' => ' + e.backtrace.join("\n"))
145
+ }
146
+
147
+ EventMachine::run {
148
+
149
+ EventMachine::connect ip, port, self
150
+ $WORKER_LOG.info "Worker connected to #{ip}:#{port}"
151
+
152
+ }
153
+
154
+ total_seconds = Time.now-total_seconds
155
+ $WORKER_LOG.info "Client #{@@worker_id} processed: #{@@count} objs"
156
+ $WORKER_LOG.info "Client #{@@worker_id} proc rate: #{@@count/total_seconds.to_f} objects/seg"
157
+
158
+ end
159
+
160
+
161
+ end
162
+
163
+ end
@@ -0,0 +1,96 @@
1
+ module ScbiMapreduce
2
+
3
+ INTERPRETER='ruby'
4
+
5
+ class WorkerLauncher
6
+
7
+ attr_accessor :server_ip, :server_port
8
+
9
+ def initialize(server_ip,server_port,workers, worker_file, log_file=nil, init_env_file=nil)
10
+ @server_ip = server_ip
11
+ @server_port = server_port
12
+ @worker_file = worker_file
13
+ @workers=workers
14
+ @init_env_file=init_env_file
15
+
16
+
17
+ if log_file.nil?
18
+
19
+ log_file = 'logs/launcher_log.txt'
20
+ end
21
+
22
+ FileUtils.mkdir_p(File.dirname(log_file)) if ((log_file!=STDOUT) && (!File.exists?(File.dirname(log_file))))
23
+
24
+
25
+ $LAUNCHER_LOG = Logger.new(log_file)
26
+
27
+ $LAUNCHER_LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
28
+ end
29
+
30
+ def launch_workers_and_wait
31
+ launch_workers
32
+ Process.waitall
33
+ end
34
+
35
+ def launch_workers
36
+ if @workers > 0
37
+ $LAUNCHER_LOG.info "Connecting #{@workers} local workers to #{@server_ip}:#{@server_port}"
38
+ threads = []
39
+ @workers.times do |i|
40
+ pid=fork{
41
+ launch_worker(i,@server_ip,@server_port)
42
+ $LAUNCHER_LOG.info "Worker #{i} launched [#{@server_ip}:#{@server_port}]"
43
+ }
44
+
45
+ #threads.each { |aThread| aThread.join }
46
+ end
47
+ #Process.waitall
48
+ $LAUNCHER_LOG.info "All workers launched"
49
+ end
50
+ end
51
+
52
+ # override this
53
+ def launch_worker(worker_id, server_ip, server_port)
54
+
55
+ cmd = "#{INTERPRETER} #{File.join(File.dirname(__FILE__),'main_worker.rb')} #{worker_id.to_s} #{server_ip} #{server_port} #{@worker_file}"
56
+ puts cmd
57
+ exec(cmd)
58
+ end
59
+
60
+ def launch_external_workers(workers)
61
+ puts "Launching #{workers.count} external workers: #{workers}"
62
+ worker_id=0
63
+ init=''
64
+ if @init_env_file
65
+ path = File.expand_path(@init_env_file)
66
+ # path = File.join($ROOT_PATH)
67
+ # puts "init_env file: #{path}"
68
+ if File.exists?(path)
69
+ puts "File #{path} exists, using it"
70
+ init=". #{path}; "
71
+ end
72
+ end
73
+
74
+ pwd=`pwd`.chomp
75
+
76
+ cd =''
77
+
78
+ if File.exists?(pwd)
79
+ cd = "cd #{pwd}; "
80
+ end
81
+
82
+ workers.each do |machine|
83
+
84
+ cmd = "ssh #{machine} \"#{init} #{cd} #{INTERPRETER} #{File.join(File.dirname(__FILE__),'main_worker.rb')} #{worker_id.to_s} #{@server_ip} #{@server_port} #{@worker_file}\""
85
+ $LAUNCHER_LOG.info cmd
86
+
87
+ pid=fork{
88
+ exec(cmd)
89
+ }
90
+
91
+ worker_id+=1
92
+ end
93
+ end
94
+
95
+ end
96
+ end
@@ -0,0 +1,32 @@
1
+ require 'zlib'
2
+ require 'json'
3
+
4
+ # A serializer class that provides compression
5
+ #
6
+ # To use this instead of the default Marshal serializer, redefine the serializer method in your worker and worker_manager as this:
7
+ #
8
+ # def serializer
9
+ # ZlibSerializer
10
+ #
11
+ # end
12
+ #
13
+
14
+ class ZlibSerializer
15
+
16
+ def self.dump(data)
17
+ input=Marshal.dump(data)
18
+ zipper = Zlib::Deflate.new(Zlib::BEST_COMPRESSION,15,9)
19
+ res= zipper.deflate(input, Zlib::FINISH)
20
+ zipper.close
21
+
22
+ return res
23
+ end
24
+
25
+ def self.load(input)
26
+ unzipper = Zlib::Inflate.new(15)
27
+ res= unzipper.inflate(input)
28
+ unzipper.close
29
+
30
+ return Marshal.load(res)
31
+ end
32
+ end
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/scbi_mapreduce.rb'}"
9
+ puts "Loading scbi_mapreduce gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,25 @@
1
+ Comparison of workers with scbi_mapreduce vs ruby-threads
2
+ =========================================================
3
+
4
+ This application is only useful for testing. You can modify the files
5
+ to perform other tasks. There are other templates available, you
6
+ can list them by issuing this command:
7
+
8
+ scbi_mapreduce
9
+
10
+ You can launch the tests application right now with the following command:
11
+
12
+ time ruby main.rb
13
+
14
+
15
+ This launches 4 workers that do some simple calculations (only to keep busy
16
+ the processor), to demonstrate the gain speed agains threads. 4 workers are
17
+ used for a quad-core processor. Adjust it accordingly to your processor cores.
18
+
19
+
20
+ To launch the threaded version of the application, you can do:
21
+
22
+ time ruby threads_implementation.rb
23
+
24
+ You can compare the two times obtained. Threaded version will last the same with 1 thread or with 100.
25
+
@@ -0,0 +1,37 @@
1
+ module Calculations
2
+
3
+
4
+ def do_dummy_calculations
5
+ t=Time.now
6
+ x=0
7
+ 20000000.times do |i|
8
+ x+=1
9
+ end
10
+ puts Time.now-t
11
+ end
12
+
13
+ def do_dummy_calculations2
14
+ numer_of_calcs=250000
15
+
16
+ # t=Time.now
17
+
18
+ x1=1
19
+ x2=1
20
+
21
+ # do a loop with calculations
22
+ numer_of_calcs.times do |i|
23
+ x=x1+x2
24
+
25
+ x1=x2
26
+ x2=x
27
+
28
+ # puts some info at regular intervals
29
+ # if (i % 100000)==0
30
+ # puts "Calculated #{i}"
31
+ # end
32
+ end
33
+ # puts Time.now-t
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,107 @@
1
+ require "thread.rb"
2
+
3
+ ######################################
4
+ # This class creates a thread's pool
5
+ ######################################
6
+
7
+ class ThreadPool
8
+ class Worker
9
+ @@count=0
10
+ def initialize
11
+
12
+ @identifier = @@count
13
+ @@count+=1
14
+
15
+ Thread.abort_on_exception = true
16
+ @mutex = Mutex.new
17
+ @thread = Thread.new do
18
+ while true
19
+ sleep 0.001
20
+ block = get_block
21
+ if block
22
+ begin
23
+ block.call
24
+ rescue Exception => e
25
+ puts "In thread: " + e.message
26
+ raise e
27
+ end
28
+
29
+ reset_block
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ def get_block
36
+ @mutex.synchronize {@block}
37
+ end
38
+
39
+ def set_block(block)
40
+ # puts "set block #{@identifier}"
41
+ @mutex.synchronize do
42
+ raise RuntimeError, "Thread already busy." if @block
43
+ @block = block
44
+ end
45
+ end
46
+
47
+ def reset_block
48
+ @mutex.synchronize {@block = nil}
49
+ end
50
+
51
+ def busy?
52
+ @mutex.synchronize {!@block.nil?}
53
+ end
54
+ end
55
+
56
+ attr_accessor :max_size
57
+ attr_reader :workers
58
+
59
+ # Defines the max number of threads that will be able to exist
60
+ def initialize(max_size = 10)
61
+ @max_size = max_size
62
+ @workers = []
63
+ @mutex = Mutex.new
64
+ end
65
+
66
+ def size
67
+ @mutex.synchronize {@workers.size}
68
+ end
69
+
70
+ def busy?
71
+ @mutex.synchronize {@workers.any? {|w| w.busy?}}
72
+ end
73
+
74
+ #Allows that main program doesn't finish until the thread have been executed
75
+ def join
76
+ sleep 0.01 while busy?
77
+ end
78
+
79
+ # Begin the block's processing. After using this method, will call to "join"
80
+ def process(&block)
81
+ wait_for_worker.set_block(block)
82
+ end
83
+
84
+ def wait_for_worker
85
+ while true
86
+ worker = find_available_worker
87
+ return worker if worker
88
+ sleep 0.01
89
+ end
90
+ end
91
+
92
+ def find_available_worker
93
+ @mutex.synchronize {free_worker || create_worker}
94
+ end
95
+
96
+ def free_worker
97
+ @workers.each {|w| return w unless w.busy?}; nil
98
+ end
99
+
100
+ def create_worker
101
+ return nil if @workers.size >= @max_size
102
+ worker = Worker.new
103
+ @workers << worker
104
+ worker
105
+ end
106
+ private :wait_for_worker , :find_available_worker , :free_worker , :create_worker
107
+ end