scbi_mapreduce 0.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +49 -0
- data/Manifest.txt +46 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +295 -0
- data/Rakefile +28 -0
- data/bin/scbi_mapreduce +52 -0
- data/lib/scbi_mapreduce.rb +15 -0
- data/lib/scbi_mapreduce/error_handler.rb +15 -0
- data/lib/scbi_mapreduce/main_worker.rb +50 -0
- data/lib/scbi_mapreduce/manager.rb +110 -0
- data/lib/scbi_mapreduce/work_manager.rb +405 -0
- data/lib/scbi_mapreduce/worker.rb +163 -0
- data/lib/scbi_mapreduce/worker_launcher.rb +96 -0
- data/lib/scbi_mapreduce/zlib_serializer.rb +32 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/skeleton/dummy_calcs/README.txt +25 -0
- data/skeleton/dummy_calcs/lib/calculations.rb +37 -0
- data/skeleton/dummy_calcs/lib/thread_pool.rb +107 -0
- data/skeleton/dummy_calcs/linear_implementation.rb +22 -0
- data/skeleton/dummy_calcs/main.rb +67 -0
- data/skeleton/dummy_calcs/my_worker.rb +56 -0
- data/skeleton/dummy_calcs/my_worker_manager.rb +52 -0
- data/skeleton/dummy_calcs/threads_implementation.rb +33 -0
- data/skeleton/remove_mids/README.txt +30 -0
- data/skeleton/remove_mids/launch_only_workers.rb +29 -0
- data/skeleton/remove_mids/lib/db/mids.fasta +120 -0
- data/skeleton/remove_mids/lib/find_mids.rb +191 -0
- data/skeleton/remove_mids/lib/global_match.rb +97 -0
- data/skeleton/remove_mids/linear_implementation.rb +87 -0
- data/skeleton/remove_mids/main.rb +89 -0
- data/skeleton/remove_mids/my_worker.rb +59 -0
- data/skeleton/remove_mids/my_worker_manager.rb +68 -0
- data/skeleton/simple/README.txt +16 -0
- data/skeleton/simple/main.rb +41 -0
- data/skeleton/simple/my_worker.rb +53 -0
- data/skeleton/simple/my_worker_manager.rb +55 -0
- data/test/drb_test/main.rb +31 -0
- data/test/drb_test/my_worker.rb +36 -0
- data/test/drb_test/my_worker_manager.rb +41 -0
- data/test/drb_test/scbi_drb_checkpoint +1 -0
- data/test/drb_test/scbi_mapreduce_checkpoint +1 -0
- data/test/test_helper.rb +3 -0
- data/test/test_scbi_drb.rb +11 -0
- metadata +127 -0
@@ -0,0 +1,53 @@
|
|
1
|
+
# MyWorker defines the behaviour of workers.
|
2
|
+
# Here is where the real processing takes place
|
3
|
+
class MyWorker < ScbiMapreduce::Worker
|
4
|
+
|
5
|
+
# starting_worker method is called one time at initialization
|
6
|
+
# and allows you to initialize your variables
|
7
|
+
def starting_worker
|
8
|
+
|
9
|
+
# You can use worker logs at any time in this way:
|
10
|
+
# $WORKER_LOG.info "Starting a worker"
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
# receive_initial_config is called only once just after
|
16
|
+
# the first connection, when initial parameters are
|
17
|
+
# received from manager
|
18
|
+
def receive_initial_config(parameters)
|
19
|
+
|
20
|
+
# Reads the parameters
|
21
|
+
|
22
|
+
# You can use worker logs at any time in this way:
|
23
|
+
# $WORKER_LOG.info "Params received"
|
24
|
+
|
25
|
+
# save received parameters, if any
|
26
|
+
# @params = parameters
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
# process_object method is called for each received object.
|
31
|
+
# Be aware that objs is always an array, and you must iterate
|
32
|
+
# over it if you need to process it independently
|
33
|
+
#
|
34
|
+
# The value returned here will be received by the work_received
|
35
|
+
# method at your worker_manager subclass.
|
36
|
+
def process_object(objs)
|
37
|
+
|
38
|
+
# iterate over all objects received
|
39
|
+
objs.each do |obj|
|
40
|
+
|
41
|
+
# convert to uppercase
|
42
|
+
obj.upcase!
|
43
|
+
end
|
44
|
+
|
45
|
+
# return objs back to manager
|
46
|
+
return objs
|
47
|
+
end
|
48
|
+
|
49
|
+
# called once, when the worker is about to be closed
|
50
|
+
def closing_worker
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
# MyWorkerManager class is used to implement the methods
|
4
|
+
# to send and receive the data to or from workers
|
5
|
+
class MyWorkerManager < ScbiMapreduce::WorkManager
|
6
|
+
|
7
|
+
# init_work_manager is executed at the start, prior to any processing.
|
8
|
+
# You can use init_work_manager to initialize global variables, open files, etc...
|
9
|
+
# Note that an instance of MyWorkerManager will be created for each
|
10
|
+
# worker connection, and thus, all global variables here should be
|
11
|
+
# class variables (starting with @@)
|
12
|
+
def self.init_work_manager
|
13
|
+
|
14
|
+
# use 200000 strings
|
15
|
+
@@remaining_data = 200000
|
16
|
+
|
17
|
+
# of 1024 characters each
|
18
|
+
@@basic_string='a'*1024
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
# end_work_manager is executed at the end, when all the process is done.
|
23
|
+
# You can use it to close files opened in init_work_manager
|
24
|
+
def self.end_work_manager
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
# worker_initial_config is used to send initial parameters to workers.
|
29
|
+
# The method is executed once per each worker
|
30
|
+
def worker_initial_config
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
# next_work method is called every time a worker needs a new work
|
35
|
+
# Here you can read data from disk
|
36
|
+
# This method must return the work data or nil if no more data is available
|
37
|
+
def next_work
|
38
|
+
@@remaining_data -= 1
|
39
|
+
|
40
|
+
e = @@basic_string
|
41
|
+
|
42
|
+
e = nil if @@remaining_data<0
|
43
|
+
return e
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
# work_received is executed each time a worker has finished a job.
|
49
|
+
# Here you can write results down to disk, perform some aggregated statistics, etc...
|
50
|
+
def work_received(results)
|
51
|
+
|
52
|
+
# write_data_to_disk(results)
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$: << File.dirname(__FILE__)
|
4
|
+
|
5
|
+
require "logger"
|
6
|
+
|
7
|
+
# $: << '/Users/dariogf/progs/ruby/gems/scbi_mapreduce/lib'
|
8
|
+
|
9
|
+
require 'scbi_mapreduce'
|
10
|
+
require 'my_worker_manager'
|
11
|
+
|
12
|
+
|
13
|
+
$LOG = Logger.new(STDOUT)
|
14
|
+
$LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
15
|
+
|
16
|
+
ip='0.0.0.0'
|
17
|
+
port = 50000
|
18
|
+
workers = 8
|
19
|
+
|
20
|
+
custom_worker_file = File.join(File.dirname(__FILE__),'my_worker.rb')
|
21
|
+
|
22
|
+
$LOG.info 'Starting server'
|
23
|
+
|
24
|
+
MyWorkerManager.init_work_manager
|
25
|
+
|
26
|
+
# launch processor server
|
27
|
+
mgr = ScbiMapreduce::Manager.new(ip,port, workers, MyWorkerManager,custom_worker_file, STDOUT)
|
28
|
+
# mgr.checkpointing=false
|
29
|
+
# mgr.keep_order=true
|
30
|
+
mgr.start_server
|
31
|
+
$LOG.info 'Closing server'
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# require 'scbi_mapreduce'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
class MyWorker < ScbiMapreduce::Worker
|
5
|
+
|
6
|
+
def starting_worker
|
7
|
+
|
8
|
+
# $WORKER_LOG.info "Loading actions"
|
9
|
+
rescue Exception => e
|
10
|
+
puts (e.message+ e.backtrace.join("\n"))
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
def receive_initial_config(obj)
|
16
|
+
|
17
|
+
# Reads the parameters
|
18
|
+
# $WORKER_LOG.info "Params received: #{obj.to_json}"
|
19
|
+
@params = obj
|
20
|
+
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def process_object(obj)
|
26
|
+
#return `echo #{obj} | tr aeiou AEIOU`.chomp
|
27
|
+
# sleep 1
|
28
|
+
return obj
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def closing_worker
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
class MyWorkerManager < ScbiMapreduce::WorkManager
|
4
|
+
|
5
|
+
def self.init_work_manager
|
6
|
+
@@params = {:algo=> 5}
|
7
|
+
@@datos = 200000
|
8
|
+
# @@fi=File.open(/tmp
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.end_work_manager
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
def worker_initial_config
|
16
|
+
return @@params
|
17
|
+
end
|
18
|
+
|
19
|
+
# this method is called every time a worker needs a new work
|
20
|
+
# Return the work data or nil if no more data is available
|
21
|
+
def next_work
|
22
|
+
@@datos -= 1
|
23
|
+
if (@@datos%10000)==0
|
24
|
+
puts "=> #{@@datos}"
|
25
|
+
end
|
26
|
+
|
27
|
+
e = @@datos
|
28
|
+
e = nil if @@datos<0
|
29
|
+
return e
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def work_received(obj)
|
34
|
+
|
35
|
+
# puts obj
|
36
|
+
# write_seq_to_disk(seq)
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
95598
|
@@ -0,0 +1 @@
|
|
1
|
+
199909
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scbi_mapreduce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.29
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Dario Guerrero
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-06-13 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: eventmachine
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.12.0
|
24
|
+
type: :runtime
|
25
|
+
version_requirements: *id001
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: hoe
|
28
|
+
prerelease: false
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: 2.8.0
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id002
|
37
|
+
description: scbi_mapreduce brings parallel and distributed computing capabilities to your code, with a very easy to use framework that allows you to exploit your clustered or cloud computational resources.
|
38
|
+
email:
|
39
|
+
- dariogf@gmail.com
|
40
|
+
executables:
|
41
|
+
- scbi_mapreduce
|
42
|
+
extensions: []
|
43
|
+
|
44
|
+
extra_rdoc_files:
|
45
|
+
- History.txt
|
46
|
+
- Manifest.txt
|
47
|
+
- PostInstall.txt
|
48
|
+
- skeleton/simple/README.txt
|
49
|
+
- skeleton/remove_mids/README.txt
|
50
|
+
- skeleton/dummy_calcs/README.txt
|
51
|
+
files:
|
52
|
+
- History.txt
|
53
|
+
- lib/scbi_mapreduce/error_handler.rb
|
54
|
+
- lib/scbi_mapreduce/main_worker.rb
|
55
|
+
- lib/scbi_mapreduce/manager.rb
|
56
|
+
- lib/scbi_mapreduce/work_manager.rb
|
57
|
+
- lib/scbi_mapreduce/worker.rb
|
58
|
+
- lib/scbi_mapreduce/zlib_serializer.rb
|
59
|
+
- lib/scbi_mapreduce/worker_launcher.rb
|
60
|
+
- lib/scbi_mapreduce.rb
|
61
|
+
- Manifest.txt
|
62
|
+
- PostInstall.txt
|
63
|
+
- Rakefile
|
64
|
+
- README.rdoc
|
65
|
+
- script/console
|
66
|
+
- script/destroy
|
67
|
+
- script/generate
|
68
|
+
- test/drb_test/main.rb
|
69
|
+
- test/drb_test/my_worker.rb
|
70
|
+
- test/drb_test/my_worker_manager.rb
|
71
|
+
- test/drb_test/scbi_drb_checkpoint
|
72
|
+
- test/drb_test/scbi_mapreduce_checkpoint
|
73
|
+
- test/test_helper.rb
|
74
|
+
- test/test_scbi_drb.rb
|
75
|
+
- bin/scbi_mapreduce
|
76
|
+
- skeleton/simple/main.rb
|
77
|
+
- skeleton/simple/my_worker.rb
|
78
|
+
- skeleton/simple/my_worker_manager.rb
|
79
|
+
- skeleton/simple/README.txt
|
80
|
+
- skeleton/remove_mids/launch_only_workers.rb
|
81
|
+
- skeleton/remove_mids/lib/db/mids.fasta
|
82
|
+
- skeleton/remove_mids/lib/find_mids.rb
|
83
|
+
- skeleton/remove_mids/lib/global_match.rb
|
84
|
+
- skeleton/remove_mids/linear_implementation.rb
|
85
|
+
- skeleton/remove_mids/main.rb
|
86
|
+
- skeleton/remove_mids/my_worker.rb
|
87
|
+
- skeleton/remove_mids/my_worker_manager.rb
|
88
|
+
- skeleton/remove_mids/README.txt
|
89
|
+
- skeleton/dummy_calcs/lib/calculations.rb
|
90
|
+
- skeleton/dummy_calcs/lib/thread_pool.rb
|
91
|
+
- skeleton/dummy_calcs/linear_implementation.rb
|
92
|
+
- skeleton/dummy_calcs/main.rb
|
93
|
+
- skeleton/dummy_calcs/my_worker.rb
|
94
|
+
- skeleton/dummy_calcs/my_worker_manager.rb
|
95
|
+
- skeleton/dummy_calcs/README.txt
|
96
|
+
- skeleton/dummy_calcs/threads_implementation.rb
|
97
|
+
homepage: http://www.scbi.uma.es/downloads
|
98
|
+
licenses: []
|
99
|
+
|
100
|
+
post_install_message: PostInstall.txt
|
101
|
+
rdoc_options:
|
102
|
+
- --main
|
103
|
+
- README.rdoc
|
104
|
+
require_paths:
|
105
|
+
- lib
|
106
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ">="
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: "0"
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: "0"
|
118
|
+
requirements: []
|
119
|
+
|
120
|
+
rubyforge_project: scbi_mapreduce
|
121
|
+
rubygems_version: 1.7.2
|
122
|
+
signing_key:
|
123
|
+
specification_version: 3
|
124
|
+
summary: scbi_mapreduce brings parallel and distributed computing capabilities to your code, with a very easy to use framework that allows you to exploit your clustered or cloud computational resources.
|
125
|
+
test_files:
|
126
|
+
- test/test_helper.rb
|
127
|
+
- test/test_scbi_drb.rb
|