scbi_mapreduce 0.0.40 → 0.0.45

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/{README.rdoc → README.md} +0 -0
  6. data/Rakefile +8 -28
  7. data/lib/scbi_mapreduce.rb +2 -10
  8. data/lib/scbi_mapreduce/main_worker.rb +20 -6
  9. data/lib/scbi_mapreduce/manager.rb +4 -0
  10. data/lib/scbi_mapreduce/version.rb +3 -0
  11. data/lib/scbi_mapreduce/work_manager.rb +22 -0
  12. data/lib/scbi_mapreduce/worker_launcher.rb +34 -0
  13. data/scbi_mapreduce.gemspec +27 -0
  14. data/skeleton/.DS_Store +0 -0
  15. data/skeleton/dummy_calcs/.DS_Store +0 -0
  16. data/skeleton/old/dummy_calculations/README.txt +25 -0
  17. data/skeleton/old/dummy_calculations/lib/calculations.rb +37 -0
  18. data/skeleton/old/dummy_calculations/lib/thread_pool.rb +107 -0
  19. data/skeleton/old/dummy_calculations/main.rb +59 -0
  20. data/skeleton/old/dummy_calculations/my_worker.rb +56 -0
  21. data/skeleton/old/dummy_calculations/my_worker_manager.rb +52 -0
  22. data/skeleton/old/dummy_calculations/threads_implementation.rb +29 -0
  23. data/skeleton/old/sequences_blast/README.txt +31 -0
  24. data/{test/drb_test/main.rb → skeleton/old/sequences_blast/launch_only_workers.rb} +6 -10
  25. data/skeleton/old/sequences_blast/lib/db/mids.fasta +64 -0
  26. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nhr +0 -0
  27. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nin +0 -0
  28. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nog +0 -0
  29. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nsd +48 -0
  30. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nsi +0 -0
  31. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nsq +0 -0
  32. data/skeleton/old/sequences_blast/lib/find_mids.rb +134 -0
  33. data/skeleton/old/sequences_blast/lib/thread_pool.rb +107 -0
  34. data/skeleton/old/sequences_blast/linear_implementation.rb +86 -0
  35. data/skeleton/old/sequences_blast/logs/worker0_osiris-2.local_log.txt +13 -0
  36. data/skeleton/old/sequences_blast/logs/worker1_osiris-2.local_log.txt +13 -0
  37. data/skeleton/old/sequences_blast/main.rb +63 -0
  38. data/skeleton/old/sequences_blast/my_worker.rb +58 -0
  39. data/skeleton/old/sequences_blast/my_worker_manager.rb +60 -0
  40. data/skeleton/old/sequences_blast/results.fastq +3996 -0
  41. data/skeleton/old/sequences_blast/test_threads.rb +32 -0
  42. data/skeleton/old/sequences_blast/threads_implementation.rb +108 -0
  43. data/skeleton/remove_mids/lib/db/mids.fasta.nhr +0 -0
  44. data/skeleton/remove_mids/lib/db/mids.fasta.nin +0 -0
  45. data/skeleton/remove_mids/lib/db/mids.fasta.nog +0 -0
  46. data/skeleton/remove_mids/lib/db/mids.fasta.nsd +120 -0
  47. data/skeleton/remove_mids/lib/db/mids.fasta.nsi +0 -0
  48. data/skeleton/remove_mids/lib/db/mids.fasta.nsq +0 -0
  49. data/{.gemtest → skeleton/remove_mids/results.fastq558292} +0 -0
  50. data/skeleton/remove_mids/results.fastq662870 +3996 -0
  51. data/skeleton/simple/launch_only_workers.rb +29 -0
  52. metadata +102 -110
  53. data/History.txt +0 -93
  54. data/Manifest.txt +0 -47
  55. data/PostInstall.txt +0 -7
  56. data/script/console +0 -10
  57. data/script/destroy +0 -14
  58. data/script/generate +0 -14
  59. data/test/drb_test/my_worker.rb +0 -36
  60. data/test/drb_test/my_worker_manager.rb +0 -41
  61. data/test/drb_test/scbi_drb_checkpoint +0 -1
  62. data/test/drb_test/scbi_mapreduce_checkpoint +0 -1
  63. data/test/test_helper.rb +0 -3
  64. data/test/test_scbi_drb.rb +0 -11
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4e90686d178555e506b72d838c6124daa917ce3f
4
+ data.tar.gz: e67e2f68ea90141d054662425c9be5fba4001ca8
5
+ SHA512:
6
+ metadata.gz: a4ab49d1a862ca5432744b2c1b6039dfd6de3c6b391e163cf92713f2b49ea647b2aecc6561b76ca27523fa234f515d37f4ad3aa764db7ef219e15667ce2790ab
7
+ data.tar.gz: 414bd290d851360a7b2d8e4cfd6439d4aa7b46e3eb06798f856f6265992dd94186f69ddbed6dc3faa4b758d821aa35752b1b86fad45b32ea29fd28830ac74fbf
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in scbi_mapreduce.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 dariogf
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
File without changes
data/Rakefile CHANGED
@@ -1,28 +1,8 @@
1
- require 'rubygems'
2
- gem 'hoe', '>= 2.1.0'
3
- require 'hoe'
4
- require 'fileutils'
5
- require './lib/scbi_mapreduce'
6
-
7
- Hoe.plugin :newgem
8
- # Hoe.plugin :website
9
- # Hoe.plugin :cucumberfeatures
10
-
11
- # Generate all the Rake tasks
12
- # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
- $hoe = Hoe.spec 'scbi_mapreduce' do
14
- self.developer 'Dario Guerrero', 'dariogf@gmail.com'
15
- self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
- self.rubyforge_name = self.name # TODO this is default value
17
- # self.extra_deps = [['activesupport','>= 2.0.2']]
18
- self.extra_deps = [['eventmachine','>= 0.12.0'],['json','>= 0']]
19
-
20
-
21
- end
22
-
23
- require 'newgem/tasks'
24
- Dir['tasks/**/*.rake'].each { |t| load t }
25
-
26
- # TODO - want other tests/tasks run by default? Add them to the list
27
- # remove_task :default
28
- # task :default => [:spec, :features]
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_test.rb"
8
+ end
@@ -1,15 +1,7 @@
1
- $:.unshift(File.dirname(__FILE__)) unless
2
- $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
-
4
- # $: << File.join(File.dirname(__FILE__),File.basename(__FILE__,File.extname(__FILE__)))
5
-
6
- $: << File.expand_path('scbi_mapreduce')
7
- # puts $:
1
+ require "scbi_mapreduce/version"
8
2
 
9
3
  module ScbiMapreduce
10
- VERSION = '0.0.40'
11
-
12
-
4
+ # Your code goes here...
13
5
  end
14
6
 
15
7
  class Time
@@ -35,17 +35,31 @@ ip = ARGV[1]
35
35
  port = ARGV[2].to_i
36
36
  custom_worker_file = ARGV[3]
37
37
 
38
- puts "Launching worker with: worker_id:#{worker_id}, ip:#{ip}, port:#{port}, worker_file:#{custom_worker_file}"
38
+ using_slurm=false
39
39
 
40
- #$: << File.expand_path(File.dirname(custom_worker_file))
40
+ if worker_id.upcase == 'AUTO'
41
+ worker_id = ENV['SLURM_PROCID']
42
+ using_slurm=true
43
+ end
44
+
45
+ if worker_id.to_i == 0 && using_slurm
46
+ puts "Launching worker with: worker_id:#{worker_id}, ip:#{ip}, port:#{port}, worker_file:#{custom_worker_file}"
47
+ puts "Ignoring first worker in manager node worker_id:#{worker_id}"
48
+ else
49
+
50
+ puts "Launching worker with: worker_id:#{worker_id}, ip:#{ip}, port:#{port}, worker_file:#{custom_worker_file}"
41
51
 
42
- require custom_worker_file
52
+ #$: << File.expand_path(File.dirname(custom_worker_file))
43
53
 
44
- klass_name = File.basename(custom_worker_file,File.extname(custom_worker_file)).camelize
54
+ require custom_worker_file
45
55
 
46
- worker_class = Object.const_get(klass_name)
56
+ klass_name = File.basename(custom_worker_file,File.extname(custom_worker_file)).camelize
47
57
 
48
- worker_class.start_worker(worker_id,ip,port)
58
+ worker_class = Object.const_get(klass_name)
59
+
60
+ worker_class.start_worker(worker_id,ip,port)
61
+
62
+ end
49
63
 
50
64
  puts "FINISH WORKER"
51
65
 
@@ -96,12 +96,16 @@ module ScbiMapreduce
96
96
 
97
97
  end
98
98
 
99
+
100
+
101
+
99
102
  # Start a EventMachine loop acting as a server for incoming workers connections
100
103
  def start_server
101
104
 
102
105
  # set a custom error handler, otherwise errors are silently ignored when they occurs inside a callback.
103
106
  EM.error_handler{ |e|
104
107
  $SERVER_LOG.error(e.message + ' => ' + e.backtrace.join("\n"))
108
+ @work_manager_class.global_error_received(e)
105
109
  }
106
110
 
107
111
  # $SERVER_LOG.info("Installing INT and TERM traps in #{@work_manager_class}")
@@ -0,0 +1,3 @@
1
+ module ScbiMapreduce
2
+ VERSION = "0.0.45"
3
+ end
@@ -148,6 +148,14 @@ module ScbiMapreduce
148
148
 
149
149
  end
150
150
 
151
+ def self.work_manager_finished
152
+
153
+ end
154
+
155
+ def self.global_error_received(error_exception)
156
+
157
+ end
158
+
151
159
  def next_work
152
160
 
153
161
  end
@@ -385,6 +393,7 @@ module ScbiMapreduce
385
393
 
386
394
  t=Time.now_us
387
395
 
396
+ begin
388
397
  # prepare new data
389
398
  @@chunk_size.times do
390
399
  obj=next_work
@@ -395,6 +404,13 @@ module ScbiMapreduce
395
404
  objs << obj
396
405
  end
397
406
  end
407
+ rescue Exception => e
408
+ $SERVER_LOG.error("Exception creating next_work. Worker, quit!")
409
+ send_object(:sleep)
410
+ self.class.global_error_received(e)
411
+
412
+ #raise e
413
+ end
398
414
 
399
415
  @@total_read_time+=(Time.now_us - t)
400
416
 
@@ -648,8 +664,10 @@ module ScbiMapreduce
648
664
  EM.stop
649
665
  $SERVER_LOG.info "Exiting server"
650
666
 
667
+
651
668
  self.class.end_work_manager
652
669
 
670
+
653
671
  @@total_seconds = (Time.now_us-@@total_seconds)
654
672
  @@total_manager_time= @@total_manager_time
655
673
 
@@ -679,6 +697,8 @@ module ScbiMapreduce
679
697
  @@stats[:connected_workers]=@@max_workers
680
698
  @@stats[:each_transmission_time]=@@each_transmission_time
681
699
  @@stats[:each_worker_time]=@@each_worker_time
700
+
701
+
682
702
 
683
703
 
684
704
  $SERVER_LOG.info "Total processed: #{@@count} objects in #{@@total_seconds} seconds"
@@ -703,6 +723,8 @@ module ScbiMapreduce
703
723
  $SERVER_LOG.info "Chunk size: #{@@chunk_size}"
704
724
  $SERVER_LOG.info "Total connected workers: #{@@max_workers}"
705
725
 
726
+ self.class.work_manager_finished
727
+
706
728
  end
707
729
 
708
730
  end
@@ -40,6 +40,34 @@ module ScbiMapreduce
40
40
  end
41
41
 
42
42
  def launch_workers
43
+
44
+ if system("which srun > /dev/null 2>&1") && (!ENV['SLURM_PROCID'].to_s.empty?)
45
+ $LAUNCHER_LOG.info "SLURM DETECTED"
46
+ $LAUNCHER_LOG.info "Launching #{@workers} workers via srun"
47
+ launch_workers_srun
48
+ else
49
+ $LAUNCHER_LOG.info "Launching #{@workers} workers via SSH"
50
+ launch_workers_ssh
51
+ end
52
+
53
+ end
54
+
55
+ def launch_workers_srun
56
+ # TODO - si aqui falla algo, no peta, se bloquea
57
+ $LAUNCHER_LOG.info "Launching #{@workers} srun workers"
58
+
59
+ pid=fork{
60
+ $LAUNCHER_LOG.info "Connecting #{@workers} srun workers to #{@server_ip}:#{@server_port}"
61
+ cmd = "srun #{File.join(File.dirname(__FILE__),'main_worker.rb')} auto #{server_ip} #{server_port} #{@worker_file}"
62
+ $LAUNCHER_LOG.info cmd
63
+ exec(cmd)
64
+ }
65
+
66
+ $LAUNCHER_LOG.info "All workers launched"
67
+
68
+ end
69
+
70
+ def launch_workers_ssh
43
71
  # TODO - si aqui falla algo, no peta, se bloquea
44
72
  $LAUNCHER_LOG.info "Launching #{@workers} local workers"
45
73
  if @workers > 0
@@ -111,6 +139,12 @@ module ScbiMapreduce
111
139
 
112
140
 
113
141
  def launch_external_workers(workers)
142
+
143
+ #skip if slurm detected
144
+ if system("which srun > /dev/null 2>&1")
145
+ return
146
+ end
147
+
114
148
  puts "Launching #{workers.count} external workers: #{workers}"
115
149
  puts "INIT_ENV_FILE: #{@init_env_file}"
116
150
 
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scbi_mapreduce/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scbi_mapreduce"
8
+ spec.version = ScbiMapreduce::VERSION
9
+ spec.authors = ["dariogf"]
10
+ spec.email = ["dariogf@gmail.com"]
11
+ spec.summary = %q{scbi_mapreduce brings parallel and distributed computing capabilities to your code.}
12
+ spec.description = %q{scbi_mapreduce brings parallel and distributed computing capabilities to your code, with a very easy to use framework that allows you to exploit your clustered or cloud computational resources.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+
24
+ spec.add_runtime_dependency 'eventmachine','>=0.12.0'
25
+ spec.add_runtime_dependency 'json','>=0'
26
+
27
+ end
Binary file
@@ -0,0 +1,25 @@
1
+ Comparison of workers with scbi_mapreduce vs ruby-threads
2
+ =========================================================
3
+
4
+ This application is only useful for testing. You can modify the files
5
+ to perform other tasks. There are other templates available, you
6
+ can list them by issuing this command:
7
+
8
+ scbi_mapreduce
9
+
10
+ You can launch the tests application right now with the following command:
11
+
12
+ time ruby main.rb
13
+
14
+
15
+ This launches 4 workers that do some simple calculations (only to keep busy
16
+ the processor), to demonstrate the gain speed agains threads. 4 workers are
17
+ used for a quad-core processor. Adjust it accordingly to your processor cores.
18
+
19
+
20
+ To launch the threaded version of the application, you can do:
21
+
22
+ time ruby threads_implementation.rb
23
+
24
+ You can compare the two times obtained. Threaded version will last the same with 1 thread or with 100.
25
+
@@ -0,0 +1,37 @@
1
+ module Calculations
2
+
3
+
4
+ def do_dummy_calculations
5
+ t=Time.now
6
+ x=0
7
+ 20000000.times do |i|
8
+ x+=1
9
+ end
10
+ puts Time.now-t
11
+ end
12
+
13
+ def do_dummy_calculations2
14
+ numer_of_calcs=250000
15
+
16
+ # t=Time.now
17
+
18
+ x1=1
19
+ x2=1
20
+
21
+ # do a loop with calculations
22
+ numer_of_calcs.times do |i|
23
+ x=x1+x2
24
+
25
+ x1=x2
26
+ x2=x
27
+
28
+ # puts some info at regular intervals
29
+ # if (i % 100000)==0
30
+ # puts "Calculated #{i}"
31
+ # end
32
+ end
33
+ # puts Time.now-t
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,107 @@
1
+ require "thread.rb"
2
+
3
+ ######################################
4
+ # This class creates a thread's pool
5
+ ######################################
6
+
7
+ class ThreadPool
8
+ class Worker
9
+ @@count=0
10
+ def initialize
11
+
12
+ @identifier = @@count
13
+ @@count+=1
14
+
15
+ Thread.abort_on_exception = true
16
+ @mutex = Mutex.new
17
+ @thread = Thread.new do
18
+ while true
19
+ sleep 0.001
20
+ block = get_block
21
+ if block
22
+ begin
23
+ block.call
24
+ rescue Exception => e
25
+ puts "In thread: " + e.message
26
+ raise e
27
+ end
28
+
29
+ reset_block
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ def get_block
36
+ @mutex.synchronize {@block}
37
+ end
38
+
39
+ def set_block(block)
40
+ # puts "set block #{@identifier}"
41
+ @mutex.synchronize do
42
+ raise RuntimeError, "Thread already busy." if @block
43
+ @block = block
44
+ end
45
+ end
46
+
47
+ def reset_block
48
+ @mutex.synchronize {@block = nil}
49
+ end
50
+
51
+ def busy?
52
+ @mutex.synchronize {!@block.nil?}
53
+ end
54
+ end
55
+
56
+ attr_accessor :max_size
57
+ attr_reader :workers
58
+
59
+ # Defines the max number of threads that will be able to exist
60
+ def initialize(max_size = 10)
61
+ @max_size = max_size
62
+ @workers = []
63
+ @mutex = Mutex.new
64
+ end
65
+
66
+ def size
67
+ @mutex.synchronize {@workers.size}
68
+ end
69
+
70
+ def busy?
71
+ @mutex.synchronize {@workers.any? {|w| w.busy?}}
72
+ end
73
+
74
+ #Allows that main program doesn't finish until the thread have been executed
75
+ def join
76
+ sleep 0.01 while busy?
77
+ end
78
+
79
+ # Begin the block's processing. After using this method, will call to "join"
80
+ def process(&block)
81
+ wait_for_worker.set_block(block)
82
+ end
83
+
84
+ def wait_for_worker
85
+ while true
86
+ worker = find_available_worker
87
+ return worker if worker
88
+ sleep 0.01
89
+ end
90
+ end
91
+
92
+ def find_available_worker
93
+ @mutex.synchronize {free_worker || create_worker}
94
+ end
95
+
96
+ def free_worker
97
+ @workers.each {|w| return w unless w.busy?}; nil
98
+ end
99
+
100
+ def create_worker
101
+ return nil if @workers.size >= @max_size
102
+ worker = Worker.new
103
+ @workers << worker
104
+ worker
105
+ end
106
+ private :wait_for_worker , :find_available_worker , :free_worker , :create_worker
107
+ end