scbi_mapreduce 0.0.40 → 0.0.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/{README.rdoc → README.md} +0 -0
  6. data/Rakefile +8 -28
  7. data/lib/scbi_mapreduce.rb +2 -10
  8. data/lib/scbi_mapreduce/main_worker.rb +20 -6
  9. data/lib/scbi_mapreduce/manager.rb +4 -0
  10. data/lib/scbi_mapreduce/version.rb +3 -0
  11. data/lib/scbi_mapreduce/work_manager.rb +22 -0
  12. data/lib/scbi_mapreduce/worker_launcher.rb +34 -0
  13. data/scbi_mapreduce.gemspec +27 -0
  14. data/skeleton/.DS_Store +0 -0
  15. data/skeleton/dummy_calcs/.DS_Store +0 -0
  16. data/skeleton/old/dummy_calculations/README.txt +25 -0
  17. data/skeleton/old/dummy_calculations/lib/calculations.rb +37 -0
  18. data/skeleton/old/dummy_calculations/lib/thread_pool.rb +107 -0
  19. data/skeleton/old/dummy_calculations/main.rb +59 -0
  20. data/skeleton/old/dummy_calculations/my_worker.rb +56 -0
  21. data/skeleton/old/dummy_calculations/my_worker_manager.rb +52 -0
  22. data/skeleton/old/dummy_calculations/threads_implementation.rb +29 -0
  23. data/skeleton/old/sequences_blast/README.txt +31 -0
  24. data/{test/drb_test/main.rb → skeleton/old/sequences_blast/launch_only_workers.rb} +6 -10
  25. data/skeleton/old/sequences_blast/lib/db/mids.fasta +64 -0
  26. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nhr +0 -0
  27. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nin +0 -0
  28. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nog +0 -0
  29. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nsd +48 -0
  30. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nsi +0 -0
  31. data/skeleton/old/sequences_blast/lib/db/mids.fasta.nsq +0 -0
  32. data/skeleton/old/sequences_blast/lib/find_mids.rb +134 -0
  33. data/skeleton/old/sequences_blast/lib/thread_pool.rb +107 -0
  34. data/skeleton/old/sequences_blast/linear_implementation.rb +86 -0
  35. data/skeleton/old/sequences_blast/logs/worker0_osiris-2.local_log.txt +13 -0
  36. data/skeleton/old/sequences_blast/logs/worker1_osiris-2.local_log.txt +13 -0
  37. data/skeleton/old/sequences_blast/main.rb +63 -0
  38. data/skeleton/old/sequences_blast/my_worker.rb +58 -0
  39. data/skeleton/old/sequences_blast/my_worker_manager.rb +60 -0
  40. data/skeleton/old/sequences_blast/results.fastq +3996 -0
  41. data/skeleton/old/sequences_blast/test_threads.rb +32 -0
  42. data/skeleton/old/sequences_blast/threads_implementation.rb +108 -0
  43. data/skeleton/remove_mids/lib/db/mids.fasta.nhr +0 -0
  44. data/skeleton/remove_mids/lib/db/mids.fasta.nin +0 -0
  45. data/skeleton/remove_mids/lib/db/mids.fasta.nog +0 -0
  46. data/skeleton/remove_mids/lib/db/mids.fasta.nsd +120 -0
  47. data/skeleton/remove_mids/lib/db/mids.fasta.nsi +0 -0
  48. data/skeleton/remove_mids/lib/db/mids.fasta.nsq +0 -0
  49. data/{.gemtest → skeleton/remove_mids/results.fastq558292} +0 -0
  50. data/skeleton/remove_mids/results.fastq662870 +3996 -0
  51. data/skeleton/simple/launch_only_workers.rb +29 -0
  52. metadata +102 -110
  53. data/History.txt +0 -93
  54. data/Manifest.txt +0 -47
  55. data/PostInstall.txt +0 -7
  56. data/script/console +0 -10
  57. data/script/destroy +0 -14
  58. data/script/generate +0 -14
  59. data/test/drb_test/my_worker.rb +0 -36
  60. data/test/drb_test/my_worker_manager.rb +0 -41
  61. data/test/drb_test/scbi_drb_checkpoint +0 -1
  62. data/test/drb_test/scbi_mapreduce_checkpoint +0 -1
  63. data/test/test_helper.rb +0 -3
  64. data/test/test_scbi_drb.rb +0 -11
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4e90686d178555e506b72d838c6124daa917ce3f
4
+ data.tar.gz: e67e2f68ea90141d054662425c9be5fba4001ca8
5
+ SHA512:
6
+ metadata.gz: a4ab49d1a862ca5432744b2c1b6039dfd6de3c6b391e163cf92713f2b49ea647b2aecc6561b76ca27523fa234f515d37f4ad3aa764db7ef219e15667ce2790ab
7
+ data.tar.gz: 414bd290d851360a7b2d8e4cfd6439d4aa7b46e3eb06798f856f6265992dd94186f69ddbed6dc3faa4b758d821aa35752b1b86fad45b32ea29fd28830ac74fbf
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in scbi_mapreduce.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 dariogf
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
File without changes
data/Rakefile CHANGED
@@ -1,28 +1,8 @@
1
- require 'rubygems'
2
- gem 'hoe', '>= 2.1.0'
3
- require 'hoe'
4
- require 'fileutils'
5
- require './lib/scbi_mapreduce'
6
-
7
- Hoe.plugin :newgem
8
- # Hoe.plugin :website
9
- # Hoe.plugin :cucumberfeatures
10
-
11
- # Generate all the Rake tasks
12
- # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
- $hoe = Hoe.spec 'scbi_mapreduce' do
14
- self.developer 'Dario Guerrero', 'dariogf@gmail.com'
15
- self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
- self.rubyforge_name = self.name # TODO this is default value
17
- # self.extra_deps = [['activesupport','>= 2.0.2']]
18
- self.extra_deps = [['eventmachine','>= 0.12.0'],['json','>= 0']]
19
-
20
-
21
- end
22
-
23
- require 'newgem/tasks'
24
- Dir['tasks/**/*.rake'].each { |t| load t }
25
-
26
- # TODO - want other tests/tasks run by default? Add them to the list
27
- # remove_task :default
28
- # task :default => [:spec, :features]
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_test.rb"
8
+ end
@@ -1,15 +1,7 @@
1
- $:.unshift(File.dirname(__FILE__)) unless
2
- $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
-
4
- # $: << File.join(File.dirname(__FILE__),File.basename(__FILE__,File.extname(__FILE__)))
5
-
6
- $: << File.expand_path('scbi_mapreduce')
7
- # puts $:
1
+ require "scbi_mapreduce/version"
8
2
 
9
3
  module ScbiMapreduce
10
- VERSION = '0.0.40'
11
-
12
-
4
+ # Your code goes here...
13
5
  end
14
6
 
15
7
  class Time
@@ -35,17 +35,31 @@ ip = ARGV[1]
35
35
  port = ARGV[2].to_i
36
36
  custom_worker_file = ARGV[3]
37
37
 
38
- puts "Launching worker with: worker_id:#{worker_id}, ip:#{ip}, port:#{port}, worker_file:#{custom_worker_file}"
38
+ using_slurm=false
39
39
 
40
- #$: << File.expand_path(File.dirname(custom_worker_file))
40
+ if worker_id.upcase == 'AUTO'
41
+ worker_id = ENV['SLURM_PROCID']
42
+ using_slurm=true
43
+ end
44
+
45
+ if worker_id.to_i == 0 && using_slurm
46
+ puts "Launching worker with: worker_id:#{worker_id}, ip:#{ip}, port:#{port}, worker_file:#{custom_worker_file}"
47
+ puts "Ignoring first worker in manager node worker_id:#{worker_id}"
48
+ else
49
+
50
+ puts "Launching worker with: worker_id:#{worker_id}, ip:#{ip}, port:#{port}, worker_file:#{custom_worker_file}"
41
51
 
42
- require custom_worker_file
52
+ #$: << File.expand_path(File.dirname(custom_worker_file))
43
53
 
44
- klass_name = File.basename(custom_worker_file,File.extname(custom_worker_file)).camelize
54
+ require custom_worker_file
45
55
 
46
- worker_class = Object.const_get(klass_name)
56
+ klass_name = File.basename(custom_worker_file,File.extname(custom_worker_file)).camelize
47
57
 
48
- worker_class.start_worker(worker_id,ip,port)
58
+ worker_class = Object.const_get(klass_name)
59
+
60
+ worker_class.start_worker(worker_id,ip,port)
61
+
62
+ end
49
63
 
50
64
  puts "FINISH WORKER"
51
65
 
@@ -96,12 +96,16 @@ module ScbiMapreduce
96
96
 
97
97
  end
98
98
 
99
+
100
+
101
+
99
102
  # Start a EventMachine loop acting as a server for incoming workers connections
100
103
  def start_server
101
104
 
102
105
  # set a custom error handler, otherwise errors are silently ignored when they occurs inside a callback.
103
106
  EM.error_handler{ |e|
104
107
  $SERVER_LOG.error(e.message + ' => ' + e.backtrace.join("\n"))
108
+ @work_manager_class.global_error_received(e)
105
109
  }
106
110
 
107
111
  # $SERVER_LOG.info("Installing INT and TERM traps in #{@work_manager_class}")
@@ -0,0 +1,3 @@
1
+ module ScbiMapreduce
2
+ VERSION = "0.0.45"
3
+ end
@@ -148,6 +148,14 @@ module ScbiMapreduce
148
148
 
149
149
  end
150
150
 
151
+ def self.work_manager_finished
152
+
153
+ end
154
+
155
+ def self.global_error_received(error_exception)
156
+
157
+ end
158
+
151
159
  def next_work
152
160
 
153
161
  end
@@ -385,6 +393,7 @@ module ScbiMapreduce
385
393
 
386
394
  t=Time.now_us
387
395
 
396
+ begin
388
397
  # prepare new data
389
398
  @@chunk_size.times do
390
399
  obj=next_work
@@ -395,6 +404,13 @@ module ScbiMapreduce
395
404
  objs << obj
396
405
  end
397
406
  end
407
+ rescue Exception => e
408
+ $SERVER_LOG.error("Exception creating next_work. Worker, quit!")
409
+ send_object(:sleep)
410
+ self.class.global_error_received(e)
411
+
412
+ #raise e
413
+ end
398
414
 
399
415
  @@total_read_time+=(Time.now_us - t)
400
416
 
@@ -648,8 +664,10 @@ module ScbiMapreduce
648
664
  EM.stop
649
665
  $SERVER_LOG.info "Exiting server"
650
666
 
667
+
651
668
  self.class.end_work_manager
652
669
 
670
+
653
671
  @@total_seconds = (Time.now_us-@@total_seconds)
654
672
  @@total_manager_time= @@total_manager_time
655
673
 
@@ -679,6 +697,8 @@ module ScbiMapreduce
679
697
  @@stats[:connected_workers]=@@max_workers
680
698
  @@stats[:each_transmission_time]=@@each_transmission_time
681
699
  @@stats[:each_worker_time]=@@each_worker_time
700
+
701
+
682
702
 
683
703
 
684
704
  $SERVER_LOG.info "Total processed: #{@@count} objects in #{@@total_seconds} seconds"
@@ -703,6 +723,8 @@ module ScbiMapreduce
703
723
  $SERVER_LOG.info "Chunk size: #{@@chunk_size}"
704
724
  $SERVER_LOG.info "Total connected workers: #{@@max_workers}"
705
725
 
726
+ self.class.work_manager_finished
727
+
706
728
  end
707
729
 
708
730
  end
@@ -40,6 +40,34 @@ module ScbiMapreduce
40
40
  end
41
41
 
42
42
  def launch_workers
43
+
44
+ if system("which srun > /dev/null 2>&1") && (!ENV['SLURM_PROCID'].to_s.empty?)
45
+ $LAUNCHER_LOG.info "SLURM DETECTED"
46
+ $LAUNCHER_LOG.info "Launching #{@workers} workers via srun"
47
+ launch_workers_srun
48
+ else
49
+ $LAUNCHER_LOG.info "Launching #{@workers} workers via SSH"
50
+ launch_workers_ssh
51
+ end
52
+
53
+ end
54
+
55
+ def launch_workers_srun
56
+ # TODO - si aqui falla algo, no peta, se bloquea
57
+ $LAUNCHER_LOG.info "Launching #{@workers} srun workers"
58
+
59
+ pid=fork{
60
+ $LAUNCHER_LOG.info "Connecting #{@workers} srun workers to #{@server_ip}:#{@server_port}"
61
+ cmd = "srun #{File.join(File.dirname(__FILE__),'main_worker.rb')} auto #{server_ip} #{server_port} #{@worker_file}"
62
+ $LAUNCHER_LOG.info cmd
63
+ exec(cmd)
64
+ }
65
+
66
+ $LAUNCHER_LOG.info "All workers launched"
67
+
68
+ end
69
+
70
+ def launch_workers_ssh
43
71
  # TODO - si aqui falla algo, no peta, se bloquea
44
72
  $LAUNCHER_LOG.info "Launching #{@workers} local workers"
45
73
  if @workers > 0
@@ -111,6 +139,12 @@ module ScbiMapreduce
111
139
 
112
140
 
113
141
  def launch_external_workers(workers)
142
+
143
+ #skip if slurm detected
144
+ if system("which srun > /dev/null 2>&1")
145
+ return
146
+ end
147
+
114
148
  puts "Launching #{workers.count} external workers: #{workers}"
115
149
  puts "INIT_ENV_FILE: #{@init_env_file}"
116
150
 
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scbi_mapreduce/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scbi_mapreduce"
8
+ spec.version = ScbiMapreduce::VERSION
9
+ spec.authors = ["dariogf"]
10
+ spec.email = ["dariogf@gmail.com"]
11
+ spec.summary = %q{scbi_mapreduce brings parallel and distributed computing capabilities to your code.}
12
+ spec.description = %q{scbi_mapreduce brings parallel and distributed computing capabilities to your code, with a very easy to use framework that allows you to exploit your clustered or cloud computational resources.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+
24
+ spec.add_runtime_dependency 'eventmachine','>=0.12.0'
25
+ spec.add_runtime_dependency 'json','>=0'
26
+
27
+ end
Binary file
@@ -0,0 +1,25 @@
1
+ Comparison of workers with scbi_mapreduce vs ruby-threads
2
+ =========================================================
3
+
4
+ This application is only useful for testing. You can modify the files
5
+ to perform other tasks. There are other templates available, you
6
+ can list them by issuing this command:
7
+
8
+ scbi_mapreduce
9
+
10
+ You can launch the tests application right now with the following command:
11
+
12
+ time ruby main.rb
13
+
14
+
15
+ This launches 4 workers that do some simple calculations (only to keep busy
16
+ the processor), to demonstrate the gain speed agains threads. 4 workers are
17
+ used for a quad-core processor. Adjust it accordingly to your processor cores.
18
+
19
+
20
+ To launch the threaded version of the application, you can do:
21
+
22
+ time ruby threads_implementation.rb
23
+
24
+ You can compare the two times obtained. Threaded version will last the same with 1 thread or with 100.
25
+
@@ -0,0 +1,37 @@
1
+ module Calculations
2
+
3
+
4
+ def do_dummy_calculations
5
+ t=Time.now
6
+ x=0
7
+ 20000000.times do |i|
8
+ x+=1
9
+ end
10
+ puts Time.now-t
11
+ end
12
+
13
+ def do_dummy_calculations2
14
+ numer_of_calcs=250000
15
+
16
+ # t=Time.now
17
+
18
+ x1=1
19
+ x2=1
20
+
21
+ # do a loop with calculations
22
+ numer_of_calcs.times do |i|
23
+ x=x1+x2
24
+
25
+ x1=x2
26
+ x2=x
27
+
28
+ # puts some info at regular intervals
29
+ # if (i % 100000)==0
30
+ # puts "Calculated #{i}"
31
+ # end
32
+ end
33
+ # puts Time.now-t
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,107 @@
1
+ require "thread.rb"
2
+
3
+ ######################################
4
+ # This class creates a thread's pool
5
+ ######################################
6
+
7
+ class ThreadPool
8
+ class Worker
9
+ @@count=0
10
+ def initialize
11
+
12
+ @identifier = @@count
13
+ @@count+=1
14
+
15
+ Thread.abort_on_exception = true
16
+ @mutex = Mutex.new
17
+ @thread = Thread.new do
18
+ while true
19
+ sleep 0.001
20
+ block = get_block
21
+ if block
22
+ begin
23
+ block.call
24
+ rescue Exception => e
25
+ puts "In thread: " + e.message
26
+ raise e
27
+ end
28
+
29
+ reset_block
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ def get_block
36
+ @mutex.synchronize {@block}
37
+ end
38
+
39
+ def set_block(block)
40
+ # puts "set block #{@identifier}"
41
+ @mutex.synchronize do
42
+ raise RuntimeError, "Thread already busy." if @block
43
+ @block = block
44
+ end
45
+ end
46
+
47
+ def reset_block
48
+ @mutex.synchronize {@block = nil}
49
+ end
50
+
51
+ def busy?
52
+ @mutex.synchronize {!@block.nil?}
53
+ end
54
+ end
55
+
56
+ attr_accessor :max_size
57
+ attr_reader :workers
58
+
59
+ # Defines the max number of threads that will be able to exist
60
+ def initialize(max_size = 10)
61
+ @max_size = max_size
62
+ @workers = []
63
+ @mutex = Mutex.new
64
+ end
65
+
66
+ def size
67
+ @mutex.synchronize {@workers.size}
68
+ end
69
+
70
+ def busy?
71
+ @mutex.synchronize {@workers.any? {|w| w.busy?}}
72
+ end
73
+
74
+ #Allows that main program doesn't finish until the thread have been executed
75
+ def join
76
+ sleep 0.01 while busy?
77
+ end
78
+
79
+ # Begin the block's processing. After using this method, will call to "join"
80
+ def process(&block)
81
+ wait_for_worker.set_block(block)
82
+ end
83
+
84
+ def wait_for_worker
85
+ while true
86
+ worker = find_available_worker
87
+ return worker if worker
88
+ sleep 0.01
89
+ end
90
+ end
91
+
92
+ def find_available_worker
93
+ @mutex.synchronize {free_worker || create_worker}
94
+ end
95
+
96
+ def free_worker
97
+ @workers.each {|w| return w unless w.busy?}; nil
98
+ end
99
+
100
+ def create_worker
101
+ return nil if @workers.size >= @max_size
102
+ worker = Worker.new
103
+ @workers << worker
104
+ worker
105
+ end
106
+ private :wait_for_worker , :find_available_worker , :free_worker , :create_worker
107
+ end