scbi_mapreduce 0.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/History.txt +49 -0
  2. data/Manifest.txt +46 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +295 -0
  5. data/Rakefile +28 -0
  6. data/bin/scbi_mapreduce +52 -0
  7. data/lib/scbi_mapreduce.rb +15 -0
  8. data/lib/scbi_mapreduce/error_handler.rb +15 -0
  9. data/lib/scbi_mapreduce/main_worker.rb +50 -0
  10. data/lib/scbi_mapreduce/manager.rb +110 -0
  11. data/lib/scbi_mapreduce/work_manager.rb +405 -0
  12. data/lib/scbi_mapreduce/worker.rb +163 -0
  13. data/lib/scbi_mapreduce/worker_launcher.rb +96 -0
  14. data/lib/scbi_mapreduce/zlib_serializer.rb +32 -0
  15. data/script/console +10 -0
  16. data/script/destroy +14 -0
  17. data/script/generate +14 -0
  18. data/skeleton/dummy_calcs/README.txt +25 -0
  19. data/skeleton/dummy_calcs/lib/calculations.rb +37 -0
  20. data/skeleton/dummy_calcs/lib/thread_pool.rb +107 -0
  21. data/skeleton/dummy_calcs/linear_implementation.rb +22 -0
  22. data/skeleton/dummy_calcs/main.rb +67 -0
  23. data/skeleton/dummy_calcs/my_worker.rb +56 -0
  24. data/skeleton/dummy_calcs/my_worker_manager.rb +52 -0
  25. data/skeleton/dummy_calcs/threads_implementation.rb +33 -0
  26. data/skeleton/remove_mids/README.txt +30 -0
  27. data/skeleton/remove_mids/launch_only_workers.rb +29 -0
  28. data/skeleton/remove_mids/lib/db/mids.fasta +120 -0
  29. data/skeleton/remove_mids/lib/find_mids.rb +191 -0
  30. data/skeleton/remove_mids/lib/global_match.rb +97 -0
  31. data/skeleton/remove_mids/linear_implementation.rb +87 -0
  32. data/skeleton/remove_mids/main.rb +89 -0
  33. data/skeleton/remove_mids/my_worker.rb +59 -0
  34. data/skeleton/remove_mids/my_worker_manager.rb +68 -0
  35. data/skeleton/simple/README.txt +16 -0
  36. data/skeleton/simple/main.rb +41 -0
  37. data/skeleton/simple/my_worker.rb +53 -0
  38. data/skeleton/simple/my_worker_manager.rb +55 -0
  39. data/test/drb_test/main.rb +31 -0
  40. data/test/drb_test/my_worker.rb +36 -0
  41. data/test/drb_test/my_worker_manager.rb +41 -0
  42. data/test/drb_test/scbi_drb_checkpoint +1 -0
  43. data/test/drb_test/scbi_mapreduce_checkpoint +1 -0
  44. data/test/test_helper.rb +3 -0
  45. data/test/test_scbi_drb.rb +11 -0
  46. metadata +127 -0
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'eventmachine'
4
+ require 'logger'
5
+
6
+ # require 'error_handler'
7
+
8
+ module ScbiMapreduce
9
+
10
+ class Worker < EventMachine::Connection
11
+ include EM::P::ObjectProtocol
12
+
13
+
14
+
15
+ def receive_initial_config(obj)
16
+
17
+
18
+ end
19
+
20
+
21
+ def process_object(obj)
22
+
23
+ end
24
+
25
+
26
+ def starting_worker
27
+
28
+
29
+ end
30
+
31
+ def worker_connected
32
+
33
+ end
34
+
35
+ def closing_worker
36
+
37
+
38
+ end
39
+ ######################
40
+
41
+ def initialize(*args)
42
+ super
43
+
44
+ end
45
+
46
+ def post_init
47
+ $WORKER_LOG.info('WORKER CONNECTED')
48
+
49
+ worker_connected
50
+ rescue Exception => e
51
+ $WORKER_LOG.error("Exiting worker #{@@worker_id} due to exception:\n" + e.message+"\n"+e.backtrace.join("\n"))
52
+ #raise e
53
+ end
54
+
55
+ def receive_object(obj)
56
+
57
+ if @@count < 0
58
+ @@count += 1
59
+ # receive initial config
60
+ if obj != :no_initial_config then
61
+ receive_initial_config(obj[:initial_config])
62
+
63
+ $WORKER_LOG.info('Initial config: received')
64
+ else
65
+ $WORKER_LOG.info('Initial config: empty config')
66
+ end
67
+ # At first iteration, start worker
68
+ starting_worker
69
+ else
70
+
71
+ if obj == :quit
72
+ stop_worker
73
+ else
74
+ @@count += 1
75
+
76
+ # OJO - HAY QUE PASAR EL MODIFIED OBJECT
77
+ # operation = proc {
78
+ # # calculations
79
+ # obj=process_object(obj)
80
+ # #puts '.' + obj.seq_name
81
+ # #return obj
82
+ # }
83
+ #
84
+ # callback = proc { |modified_obj|
85
+ # send_object(modified_obj)
86
+ # }
87
+ #
88
+ # EventMachine.defer(operation, callback)
89
+ #send_object(obj)
90
+
91
+
92
+ begin
93
+
94
+ modified_data=process_object(obj.data)
95
+ obj.data = modified_data
96
+
97
+ send_object(obj)
98
+
99
+ rescue Exception => e
100
+ $WORKER_LOG.error("Error processing object\n" + e.message + ":\n" + e.backtrace.join("\n"))
101
+ exception= WorkerError.new('Message',e,@@worker_id,obj)
102
+ send_object(exception)
103
+
104
+ end
105
+
106
+
107
+ end
108
+ end
109
+ end
110
+
111
+ def unbind
112
+ $WORKER_LOG.info "EXITING WORKER"
113
+ EventMachine::stop_event_loop
114
+ end
115
+
116
+ def stop_worker
117
+ close_connection
118
+ EventMachine::stop_event_loop
119
+ closing_worker
120
+ end
121
+
122
+ def self.start_worker(worker_id,ip,port,log_file=nil)
123
+ #puts "NEW WORKER - INIIIIIIIIIIIIIIIIIIIIT #{self}"
124
+ ip = ip
125
+ port = port
126
+ @@count = -1
127
+
128
+ @@worker_id=worker_id
129
+
130
+ if log_file.nil?
131
+ log_file = 'logs/worker'+worker_id+'_'+`hostname`.chomp+'_log.txt'
132
+ end
133
+
134
+ FileUtils.mkdir_p(File.dirname(log_file)) if ((log_file!=STDOUT) && (!File.exists?(File.dirname(log_file))))
135
+
136
+ $WORKER_LOG = Logger.new(log_file)
137
+ $WORKER_LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
138
+
139
+ $LOG = $WORKER_LOG
140
+
141
+ total_seconds = Time.now
142
+
143
+ EM.error_handler{ |e|
144
+ $WORKER_LOG.error(e.message + ' => ' + e.backtrace.join("\n"))
145
+ }
146
+
147
+ EventMachine::run {
148
+
149
+ EventMachine::connect ip, port, self
150
+ $WORKER_LOG.info "Worker connected to #{ip}:#{port}"
151
+
152
+ }
153
+
154
+ total_seconds = Time.now-total_seconds
155
+ $WORKER_LOG.info "Client #{@@worker_id} processed: #{@@count} objs"
156
+ $WORKER_LOG.info "Client #{@@worker_id} proc rate: #{@@count/total_seconds.to_f} objects/seg"
157
+
158
+ end
159
+
160
+
161
+ end
162
+
163
+ end
@@ -0,0 +1,96 @@
1
+ module ScbiMapreduce
2
+
3
+ INTERPRETER='ruby'
4
+
5
+ class WorkerLauncher
6
+
7
+ attr_accessor :server_ip, :server_port
8
+
9
+ def initialize(server_ip,server_port,workers, worker_file, log_file=nil, init_env_file=nil)
10
+ @server_ip = server_ip
11
+ @server_port = server_port
12
+ @worker_file = worker_file
13
+ @workers=workers
14
+ @init_env_file=init_env_file
15
+
16
+
17
+ if log_file.nil?
18
+
19
+ log_file = 'logs/launcher_log.txt'
20
+ end
21
+
22
+ FileUtils.mkdir_p(File.dirname(log_file)) if ((log_file!=STDOUT) && (!File.exists?(File.dirname(log_file))))
23
+
24
+
25
+ $LAUNCHER_LOG = Logger.new(log_file)
26
+
27
+ $LAUNCHER_LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
28
+ end
29
+
30
+ def launch_workers_and_wait
31
+ launch_workers
32
+ Process.waitall
33
+ end
34
+
35
+ def launch_workers
36
+ if @workers > 0
37
+ $LAUNCHER_LOG.info "Connecting #{@workers} local workers to #{@server_ip}:#{@server_port}"
38
+ threads = []
39
+ @workers.times do |i|
40
+ pid=fork{
41
+ launch_worker(i,@server_ip,@server_port)
42
+ $LAUNCHER_LOG.info "Worker #{i} launched [#{@server_ip}:#{@server_port}]"
43
+ }
44
+
45
+ #threads.each { |aThread| aThread.join }
46
+ end
47
+ #Process.waitall
48
+ $LAUNCHER_LOG.info "All workers launched"
49
+ end
50
+ end
51
+
52
+ # override this
53
+ def launch_worker(worker_id, server_ip, server_port)
54
+
55
+ cmd = "#{INTERPRETER} #{File.join(File.dirname(__FILE__),'main_worker.rb')} #{worker_id.to_s} #{server_ip} #{server_port} #{@worker_file}"
56
+ puts cmd
57
+ exec(cmd)
58
+ end
59
+
60
+ def launch_external_workers(workers)
61
+ puts "Launching #{workers.count} external workers: #{workers}"
62
+ worker_id=0
63
+ init=''
64
+ if @init_env_file
65
+ path = File.expand_path(@init_env_file)
66
+ # path = File.join($ROOT_PATH)
67
+ # puts "init_env file: #{path}"
68
+ if File.exists?(path)
69
+ puts "File #{path} exists, using it"
70
+ init=". #{path}; "
71
+ end
72
+ end
73
+
74
+ pwd=`pwd`.chomp
75
+
76
+ cd =''
77
+
78
+ if File.exists?(pwd)
79
+ cd = "cd #{pwd}; "
80
+ end
81
+
82
+ workers.each do |machine|
83
+
84
+ cmd = "ssh #{machine} \"#{init} #{cd} #{INTERPRETER} #{File.join(File.dirname(__FILE__),'main_worker.rb')} #{worker_id.to_s} #{@server_ip} #{@server_port} #{@worker_file}\""
85
+ $LAUNCHER_LOG.info cmd
86
+
87
+ pid=fork{
88
+ exec(cmd)
89
+ }
90
+
91
+ worker_id+=1
92
+ end
93
+ end
94
+
95
+ end
96
+ end
@@ -0,0 +1,32 @@
1
+ require 'zlib'
2
+ require 'json'
3
+
4
+ # A serializer class that provides compression
5
+ #
6
+ # To use this instead of the default Marshal serializer, redefine the serializer method in your worker and worker_manager as this:
7
+ #
8
+ # def serializer
9
+ # ZlibSerializer
10
+ #
11
+ # end
12
+ #
13
+
14
+ class ZlibSerializer
15
+
16
+ def self.dump(data)
17
+ input=Marshal.dump(data)
18
+ zipper = Zlib::Deflate.new(Zlib::BEST_COMPRESSION,15,9)
19
+ res= zipper.deflate(input, Zlib::FINISH)
20
+ zipper.close
21
+
22
+ return res
23
+ end
24
+
25
+ def self.load(input)
26
+ unzipper = Zlib::Inflate.new(15)
27
+ res= unzipper.inflate(input)
28
+ unzipper.close
29
+
30
+ return Marshal.load(res)
31
+ end
32
+ end
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/scbi_mapreduce.rb'}"
9
+ puts "Loading scbi_mapreduce gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,25 @@
1
+ Comparison of workers with scbi_mapreduce vs ruby-threads
2
+ =========================================================
3
+
4
+ This application is only useful for testing. You can modify the files
5
+ to perform other tasks. There are other templates available, you
6
+ can list them by issuing this command:
7
+
8
+ scbi_mapreduce
9
+
10
+ You can launch the tests application right now with the following command:
11
+
12
+ time ruby main.rb
13
+
14
+
15
+ This launches 4 workers that do some simple calculations (only to keep busy
16
+ the processor), to demonstrate the gain speed agains threads. 4 workers are
17
+ used for a quad-core processor. Adjust it accordingly to your processor cores.
18
+
19
+
20
+ To launch the threaded version of the application, you can do:
21
+
22
+ time ruby threads_implementation.rb
23
+
24
+ You can compare the two times obtained. Threaded version will last the same with 1 thread or with 100.
25
+
@@ -0,0 +1,37 @@
1
+ module Calculations
2
+
3
+
4
+ def do_dummy_calculations
5
+ t=Time.now
6
+ x=0
7
+ 20000000.times do |i|
8
+ x+=1
9
+ end
10
+ puts Time.now-t
11
+ end
12
+
13
+ def do_dummy_calculations2
14
+ numer_of_calcs=250000
15
+
16
+ # t=Time.now
17
+
18
+ x1=1
19
+ x2=1
20
+
21
+ # do a loop with calculations
22
+ numer_of_calcs.times do |i|
23
+ x=x1+x2
24
+
25
+ x1=x2
26
+ x2=x
27
+
28
+ # puts some info at regular intervals
29
+ # if (i % 100000)==0
30
+ # puts "Calculated #{i}"
31
+ # end
32
+ end
33
+ # puts Time.now-t
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,107 @@
1
+ require "thread.rb"
2
+
3
+ ######################################
4
+ # This class creates a thread's pool
5
+ ######################################
6
+
7
+ class ThreadPool
8
+ class Worker
9
+ @@count=0
10
+ def initialize
11
+
12
+ @identifier = @@count
13
+ @@count+=1
14
+
15
+ Thread.abort_on_exception = true
16
+ @mutex = Mutex.new
17
+ @thread = Thread.new do
18
+ while true
19
+ sleep 0.001
20
+ block = get_block
21
+ if block
22
+ begin
23
+ block.call
24
+ rescue Exception => e
25
+ puts "In thread: " + e.message
26
+ raise e
27
+ end
28
+
29
+ reset_block
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ def get_block
36
+ @mutex.synchronize {@block}
37
+ end
38
+
39
+ def set_block(block)
40
+ # puts "set block #{@identifier}"
41
+ @mutex.synchronize do
42
+ raise RuntimeError, "Thread already busy." if @block
43
+ @block = block
44
+ end
45
+ end
46
+
47
+ def reset_block
48
+ @mutex.synchronize {@block = nil}
49
+ end
50
+
51
+ def busy?
52
+ @mutex.synchronize {!@block.nil?}
53
+ end
54
+ end
55
+
56
+ attr_accessor :max_size
57
+ attr_reader :workers
58
+
59
+ # Defines the max number of threads that will be able to exist
60
+ def initialize(max_size = 10)
61
+ @max_size = max_size
62
+ @workers = []
63
+ @mutex = Mutex.new
64
+ end
65
+
66
+ def size
67
+ @mutex.synchronize {@workers.size}
68
+ end
69
+
70
+ def busy?
71
+ @mutex.synchronize {@workers.any? {|w| w.busy?}}
72
+ end
73
+
74
+ #Allows that main program doesn't finish until the thread have been executed
75
+ def join
76
+ sleep 0.01 while busy?
77
+ end
78
+
79
+ # Begin the block's processing. After using this method, will call to "join"
80
+ def process(&block)
81
+ wait_for_worker.set_block(block)
82
+ end
83
+
84
+ def wait_for_worker
85
+ while true
86
+ worker = find_available_worker
87
+ return worker if worker
88
+ sleep 0.01
89
+ end
90
+ end
91
+
92
+ def find_available_worker
93
+ @mutex.synchronize {free_worker || create_worker}
94
+ end
95
+
96
+ def free_worker
97
+ @workers.each {|w| return w unless w.busy?}; nil
98
+ end
99
+
100
+ def create_worker
101
+ return nil if @workers.size >= @max_size
102
+ worker = Worker.new
103
+ @workers << worker
104
+ worker
105
+ end
106
+ private :wait_for_worker , :find_available_worker , :free_worker , :create_worker
107
+ end