pmux 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +8 -0
- data/README.md +36 -0
- data/Rakefile +4 -0
- data/bin/pmux +5 -0
- data/lib/pmux/application.rb +166 -0
- data/lib/pmux/cleaner.rb +28 -0
- data/lib/pmux/fiber18.rb +64 -0
- data/lib/pmux/fixcmd.rb +25 -0
- data/lib/pmux/gatherer.rb +23 -0
- data/lib/pmux/handler.rb +262 -0
- data/lib/pmux/job.rb +101 -0
- data/lib/pmux/joblogger.rb +46 -0
- data/lib/pmux/mapper.rb +151 -0
- data/lib/pmux/mros.rb +207 -0
- data/lib/pmux/multi_session.rb +309 -0
- data/lib/pmux/pipeio.rb +19 -0
- data/lib/pmux/plugin.rb +23 -0
- data/lib/pmux/q.rb +3 -0
- data/lib/pmux/reducer.rb +90 -0
- data/lib/pmux/storage_adapter.rb +105 -0
- data/lib/pmux/task_dispatcher.rb +167 -0
- data/lib/pmux/task_queue.rb +11 -0
- data/lib/pmux/task_scheduler.rb +166 -0
- data/lib/pmux/util_daemon.rb +18 -0
- data/lib/pmux/util_logger.rb +137 -0
- data/lib/pmux/version.rb +3 -0
- data/lib/pmux/worker.rb +91 -0
- data/lib/pmux/writer.rb +19 -0
- data/lib/pmux.rb +27 -0
- data/pmux.gemspec +24 -0
- data/test/mock_mros.rb +284 -0
- data/test/mock_pipeio.rb +26 -0
- data/test/mock_world.rb +193 -0
- data/test/mock_xattr.rb +10 -0
- data/test/runner.rb +10 -0
- data/test/test_application.rb +13 -0
- data/test/test_fixcmd.rb +17 -0
- data/test/test_handler.rb +15 -0
- data/test/test_i_mapreduce.rb +169 -0
- data/test/test_i_mros.rb +28 -0
- data/test/test_i_msession.rb +27 -0
- data/test/test_job.rb +35 -0
- data/test/test_joblogger.rb +16 -0
- data/test/test_mapper.rb +60 -0
- data/test/test_pipeio.rb +24 -0
- data/test/test_storage_adapter.rb +63 -0
- data/test/test_task_queue.rb +87 -0
- data/test/test_task_scheduler.rb +39 -0
- data/test/txt/0.log +105 -0
- data/test/txt/1.log +105 -0
- data/test/txt/2.log +105 -0
- data/test/txt/3.log +105 -0
- data/test/txt/4.log +105 -0
- data/test/txt/5.log +105 -0
- data/test/txt/6.log +105 -0
- data/test/txt/7.log +105 -0
- data/test/txt/8.log +105 -0
- data/test/unittest_helper.rb +57 -0
- metadata +153 -0
data/.gitignore
ADDED
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Pmux: pipeline multiplexer
|
2
|
+
|
3
|
+
Pmux is a lightweight file-based MapReduce system, written in Ruby.
|
4
|
+
Applying the philosophy of Unix pipeline processing to distributed
|
5
|
+
computing on a GlusterFS cluster, pmux provides a tool capable of
|
6
|
+
handling large amounts of data stored in files.
|
7
|
+
|
8
|
+
## Requirements
|
9
|
+
* ruby 1.9.2 or higher
|
10
|
+
* msgpack-rpc
|
11
|
+
* net-ssh, net-scp
|
12
|
+
* gflocator
|
13
|
+
* GlusterFS 3.3.0 or higher, native client (FUSE)
|
14
|
+
|
15
|
+
## Install
|
16
|
+
|
17
|
+
on all GlusterFS server nodes:
|
18
|
+
|
19
|
+
gem install pmux
|
20
|
+
|
21
|
+
on the GlusterFS client node
|
22
|
+
|
23
|
+
gem install pmux
|
24
|
+
gem install gflocator
|
25
|
+
sudo gflocator
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
show status
|
30
|
+
|
31
|
+
$ pmux --status
|
32
|
+
host0.example.com: pmux 0.1.0, num_cpu=8, ruby 1.9.3
|
33
|
+
|
34
|
+
distributed grep
|
35
|
+
|
36
|
+
$ pmux --mapper="grep PATTERN" /glusterfs/xxx/*.log
|
data/Rakefile
ADDED
data/bin/pmux
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Pmux
|
4
|
+
class Application
|
5
|
+
OPTS = {}
|
6
|
+
|
7
|
+
def run options=OPTS
|
8
|
+
optparser = optparse options
|
9
|
+
optparser.parse!
|
10
|
+
options[:program_name] = optparser.program_name
|
11
|
+
options[:user] ||=
|
12
|
+
(ENV['USER'] || ENV['LOGNAME'] || Etc.getlogin || Etc.getpwuid.name)
|
13
|
+
|
14
|
+
root_dir = (options[:root_dir] ||=
|
15
|
+
File.expand_path "~/.#{options[:program_name]}")
|
16
|
+
tmp_dir = (options[:tmp_dir] ||= root_dir + '/tmp')
|
17
|
+
log_dir = (options[:log_dir] ||= root_dir + '/log')
|
18
|
+
[root_dir, tmp_dir, log_dir].each {|dir|
|
19
|
+
Dir.mkdir dir unless File.exist? dir
|
20
|
+
}
|
21
|
+
|
22
|
+
Plugin.load_plugins unless options[:disable_plugins]
|
23
|
+
addrs = (options[:hosts] || '').split(',').map {|host| getaddr host}
|
24
|
+
|
25
|
+
case
|
26
|
+
when options[:server]
|
27
|
+
(options[:sock_path] ||= '/tmp/.pmuxsock') << ".#{$$}"
|
28
|
+
run_server options
|
29
|
+
when options[:status]
|
30
|
+
show_status addrs, options
|
31
|
+
when options[:lookup]
|
32
|
+
lookup addrs, options
|
33
|
+
else
|
34
|
+
run_mr addrs, options
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def getaddr host
|
39
|
+
sa = Socket.pack_sockaddr_in 0, host
|
40
|
+
port, addr = Socket.unpack_sockaddr_in sa
|
41
|
+
addr
|
42
|
+
end
|
43
|
+
|
44
|
+
def run_server options
|
45
|
+
STDOUT.sync = true
|
46
|
+
server = MR::Server.new
|
47
|
+
handler = Pmux::Handler.new server, options
|
48
|
+
pipe_transport = MR::PipeTransport.new STDIN, STDOUT, STDERR
|
49
|
+
server.listen pipe_transport, handler
|
50
|
+
unix_transport = MR::UNIXServerTransport.new options[:sock_path]
|
51
|
+
server.listen unix_transport, handler
|
52
|
+
server.run
|
53
|
+
rescue SystemCallError
|
54
|
+
ensure
|
55
|
+
File.unlink options[:sock_path] rescue nil
|
56
|
+
end
|
57
|
+
|
58
|
+
def show_status addrs, options
|
59
|
+
addrs = ['localhost'] if addrs.empty?
|
60
|
+
adapter = StorageAdapter.create options[:storage_name], addrs
|
61
|
+
msession = MRSession.new addrs, options
|
62
|
+
msession.on_error {|addr, err| $stderr.printf "%s: %s\n", addr, err.to_s}
|
63
|
+
msession.connect
|
64
|
+
|
65
|
+
mf = msession.multicast_call_async :get_properties
|
66
|
+
templ = "%s: %s %s, num_cpu=%s, ruby %s\n"
|
67
|
+
mf.on_success {|f|
|
68
|
+
props = f.get
|
69
|
+
print templ % [props['hostname'],
|
70
|
+
props['program_name'], props['VERSION'],
|
71
|
+
props['num_cpu'], props['RUBY_VERSION']]
|
72
|
+
}
|
73
|
+
mf.on_error {|f| printf "%s: ERROR: %s\n", f.addr, f.error}
|
74
|
+
mf.join_all
|
75
|
+
|
76
|
+
msession.on_error {}
|
77
|
+
mf = msession.multicast_call_async :quit
|
78
|
+
mf.join_all
|
79
|
+
end
|
80
|
+
|
81
|
+
def lookup addrs, options
|
82
|
+
adapter = StorageAdapter.create options[:storage_name], addrs
|
83
|
+
name = options[:lookup]
|
84
|
+
locator_host = options[:locator_host] || addrs.first || 'localhost'
|
85
|
+
locator_port = options[:locator_port]
|
86
|
+
adapter.connect_to_storage locator_host, locator_port
|
87
|
+
adapter.get_files [name]
|
88
|
+
locations = adapter.lookup_file name
|
89
|
+
$stderr.puts "name: #{name}"
|
90
|
+
for host, path in locations
|
91
|
+
$stderr.puts "location: #{host}:#{path}"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def run_mr addrs, options, argv=ARGV
|
96
|
+
options[:__start_time] = Time.now
|
97
|
+
if options[:storage_name] == 'local' and addrs.empty?
|
98
|
+
addrs = ['localhost']
|
99
|
+
end
|
100
|
+
adapter = StorageAdapter.create options[:storage_name], addrs, options
|
101
|
+
locator_host = options[:locator_host] || addrs.first || 'localhost'
|
102
|
+
locator_port = options[:locator_port]
|
103
|
+
|
104
|
+
puts "storage: #{options[:storage_name]}" if options[:verbose]
|
105
|
+
begin
|
106
|
+
adapter.connect_to_storage locator_host, locator_port
|
107
|
+
files = adapter.get_files argv, options[:expand_glob]
|
108
|
+
options[:__get_files_time] = Time.now
|
109
|
+
rescue
|
110
|
+
STDERR.puts "Storage Error: #{$!}"
|
111
|
+
return
|
112
|
+
end
|
113
|
+
raise RuntimeError, "no hostname specified" if adapter.addrs.empty?
|
114
|
+
|
115
|
+
msession = MRSession.new adapter.addrs, options
|
116
|
+
msession.on_error {|addr, err|
|
117
|
+
$stderr.printf "%s: %s\n", addr, err.inspect if err
|
118
|
+
}
|
119
|
+
msession.connect
|
120
|
+
|
121
|
+
if options[:reducer]
|
122
|
+
options[:num_r] ||= 1
|
123
|
+
end
|
124
|
+
dispatcher = TaskDispatcher.new options, adapter, msession
|
125
|
+
job = Job.new options, files
|
126
|
+
job.mk_reducer_addrs adapter.addrs
|
127
|
+
dispatcher.run job
|
128
|
+
end
|
129
|
+
|
130
|
+
def optparse opts
|
131
|
+
op = OptionParser.new
|
132
|
+
op.on('--debug') {$debug = true; STDOUT.sync = true}
|
133
|
+
op.on('--server') {opts[:server] = true}
|
134
|
+
op.on('--argv=FILES') {}
|
135
|
+
op.on('--disable-plugins') {opts[:disable_plugins] = true}
|
136
|
+
op.on('--expand-glob') {opts[:expand_glob] = true}
|
137
|
+
op.on('--ff=FF', Integer) {|arg| opts[:ff] = arg}
|
138
|
+
op.on('--hosts=HOST,HOST,...') {|arg| opts[:hosts] = arg}
|
139
|
+
op.on('--ipaddr=ADDR') {|arg| opts[:ipaddr] = arg}
|
140
|
+
op.on('--locator-host=HOST') {|arg| opts[:locator_host] = arg}
|
141
|
+
op.on('--locator-port=PORT', Integer) {|arg|
|
142
|
+
opts[:locator_port] = arg}
|
143
|
+
op.on('--lookup=FILE') {|arg| opts[:lookup] = arg}
|
144
|
+
op.on('--mapper=CMD') {|arg| opts[:mapper] = arg}
|
145
|
+
op.on('--reducer=CMD') {|arg| opts[:reducer] = arg}
|
146
|
+
op.on('--num-r=NUM', Integer) {|arg| opts[:num_r] = arg}
|
147
|
+
op.on('--root-dir=DIR') {|arg| opts[:root_dir] = arg}
|
148
|
+
op.on('--sec', '--connect-secondary') {opts[:connect_secondary] = true}
|
149
|
+
op.on('--ship-file=FILE', '--file=FILE') {|arg|
|
150
|
+
(opts[:ship_files] ||= []).push arg}
|
151
|
+
op.on('--status') {opts[:status] = true}
|
152
|
+
op.on('--storage=STORAGE_NAME') {|arg|
|
153
|
+
opts[:storage_name] = arg}
|
154
|
+
op.on('--verbose') {opts[:verbose] = true}
|
155
|
+
op.on('--version') {
|
156
|
+
puts "#{op.program_name} #{VERSION}"
|
157
|
+
exit
|
158
|
+
}
|
159
|
+
class <<op
|
160
|
+
attr_accessor :options
|
161
|
+
end
|
162
|
+
op.options = opts
|
163
|
+
op
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
data/lib/pmux/cleaner.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module Pmux
|
4
|
+
class Cleaner
|
5
|
+
def initialize glob_pat
|
6
|
+
@glob_pat = glob_pat
|
7
|
+
end
|
8
|
+
|
9
|
+
def run lim_time=nil
|
10
|
+
fork {fork {clean @glob_pat, lim_time}}
|
11
|
+
Process.wait
|
12
|
+
end
|
13
|
+
|
14
|
+
def clean glob_pat=nil, lim_time=nil
|
15
|
+
glob_pat ||= @glob_pat
|
16
|
+
lim_time ||= Time.now - 3600*24
|
17
|
+
paths = Dir.glob glob_pat
|
18
|
+
for path in paths
|
19
|
+
if File.exist? path
|
20
|
+
mtime = File.mtime path
|
21
|
+
if mtime < lim_time
|
22
|
+
FileUtils.rm_rf path
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/pmux/fiber18.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Thread based Fiber implementation for Ruby 1.8 by Aman Gupta
|
2
|
+
unless defined? Fiber
|
3
|
+
require 'thread'
|
4
|
+
require 'singleton'
|
5
|
+
class FiberError < StandardError; end
|
6
|
+
class Fiber
|
7
|
+
def initialize
|
8
|
+
raise ArgumentError, 'new Fiber requires a block' unless block_given?
|
9
|
+
|
10
|
+
@yield = Queue.new
|
11
|
+
@resume = Queue.new
|
12
|
+
|
13
|
+
@thread = Thread.new{ @yield.push [ *yield(*@resume.pop) ] }
|
14
|
+
@thread.abort_on_exception = true
|
15
|
+
@thread[:fiber] = self
|
16
|
+
end
|
17
|
+
attr_reader :thread
|
18
|
+
|
19
|
+
def resume *args
|
20
|
+
raise FiberError, 'dead fiber called' unless @thread.alive?
|
21
|
+
@resume.push(args)
|
22
|
+
result = @yield.pop
|
23
|
+
result.size > 1 ? result : result.first
|
24
|
+
end
|
25
|
+
|
26
|
+
def yield *args
|
27
|
+
@yield.push(args)
|
28
|
+
result = @resume.pop
|
29
|
+
result.size > 1 ? result : result.first
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.yield *args
|
33
|
+
raise FiberError, "can't yield from root fiber" unless fiber = Thread.current[:fiber]
|
34
|
+
fiber.yield(*args)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.current
|
38
|
+
Thread.current[:fiber] or raise FiberError, 'not inside a fiber'
|
39
|
+
end
|
40
|
+
|
41
|
+
def inspect
|
42
|
+
"#<#{self.class}:0x#{self.object_id.to_s(16)}>"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class RootFiber < Fiber
|
47
|
+
include Singleton
|
48
|
+
def initialize
|
49
|
+
end
|
50
|
+
|
51
|
+
def resume *args
|
52
|
+
raise FiberError, "can't resume root fiber"
|
53
|
+
end
|
54
|
+
|
55
|
+
def yield *args
|
56
|
+
raise FiberError, "can't yield from root fiber"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
#attach the root fiber to the main thread
|
61
|
+
Thread.main[:fiber] = RootFiber.instance
|
62
|
+
else
|
63
|
+
require 'fiber'
|
64
|
+
end
|
data/lib/pmux/fixcmd.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
module Pmux
|
2
|
+
module FixCmdLine
|
3
|
+
def fix_cmd_line cmd_line, in_path=nil, out_path=nil, err_path=nil, tmp_dir=nil
|
4
|
+
res = []
|
5
|
+
cmds = cmd_line.split /\s*\|\s*/
|
6
|
+
n = 0
|
7
|
+
for cmd in cmds
|
8
|
+
c, = cmd.split
|
9
|
+
if tmp_dir and File.executable?(cc = "#{tmp_dir}/#{c}")
|
10
|
+
#cmd = cc
|
11
|
+
cmd = "#{tmp_dir}/#{cmd}"
|
12
|
+
end
|
13
|
+
if n == 0
|
14
|
+
cmd << " #{in_path}" if in_path
|
15
|
+
err_path ||= '/dev/null'
|
16
|
+
cmd << " 2>#{err_path}"
|
17
|
+
end
|
18
|
+
res.push cmd
|
19
|
+
n += 1
|
20
|
+
end
|
21
|
+
cmd << " >>#{out_path}" if out_path
|
22
|
+
res.join '|'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Pmux
|
2
|
+
class Gatherer
|
3
|
+
attr_accessor :writer
|
4
|
+
attr_reader :mf
|
5
|
+
|
6
|
+
def initialize writer=nil
|
7
|
+
@writer = writer
|
8
|
+
@mf = MR::MultiFuture.new
|
9
|
+
@mf.on_success {|f| writer.write f.get}
|
10
|
+
end
|
11
|
+
|
12
|
+
def gather msession, node_addr, remote, local, options={}
|
13
|
+
future = msession.scp_download node_addr, remote, local,
|
14
|
+
:set_result=>local
|
15
|
+
@mf.add future
|
16
|
+
end
|
17
|
+
|
18
|
+
def join_all
|
19
|
+
@mf.join_all
|
20
|
+
@writer.finish
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/pmux/handler.rb
ADDED
@@ -0,0 +1,262 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module Pmux
|
4
|
+
class Handler
|
5
|
+
attr_reader :options
|
6
|
+
attr_accessor :server
|
7
|
+
|
8
|
+
def initialize server=nil, options={}
|
9
|
+
@server = server
|
10
|
+
@options = options
|
11
|
+
@ipaddr = options[:ipaddr]
|
12
|
+
|
13
|
+
@wtq = []
|
14
|
+
@wq = []
|
15
|
+
@ases = {}
|
16
|
+
@msession = nil
|
17
|
+
@seqid = 0
|
18
|
+
end
|
19
|
+
|
20
|
+
def init_job job_id
|
21
|
+
path = "#{options[:tmp_dir]}/#{job_id}"
|
22
|
+
Dir.mkdir path
|
23
|
+
options[:job_dir] = path
|
24
|
+
Log.init "#{path}/worker.log", :log_level=>'debug'
|
25
|
+
|
26
|
+
num_cpu = get_num_cpu
|
27
|
+
#fork_worker num_cpu, options
|
28
|
+
|
29
|
+
{
|
30
|
+
'job_dir' => path,
|
31
|
+
'num_cpu' => num_cpu,
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_num_cpu
|
36
|
+
cpuinfo_path = '/proc/cpuinfo'
|
37
|
+
if File.exist? cpuinfo_path
|
38
|
+
lines = File.readlines(cpuinfo_path).grep(/^processor/)
|
39
|
+
lines.size
|
40
|
+
else
|
41
|
+
2
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# execute a task and return the result
|
46
|
+
def exec_streaming_task task
|
47
|
+
start_time = Time.now
|
48
|
+
as = MR::AsyncResult.new
|
49
|
+
if (task_keys = task['task_keys'])
|
50
|
+
error_ids = []
|
51
|
+
fusion_id = task['task_id']
|
52
|
+
fiber = Fiber.new {
|
53
|
+
for task_id, file in task_keys
|
54
|
+
ntask = task.merge 'fusion_id'=>fusion_id,
|
55
|
+
'task_id'=>task_id, 'path'=>file
|
56
|
+
result = do_one_task ntask, fiber
|
57
|
+
end
|
58
|
+
result.update :task_id=>fusion_id, :task_keys=>task_keys,
|
59
|
+
:welapse=>(Time.now - start_time)
|
60
|
+
as.result result
|
61
|
+
}
|
62
|
+
else
|
63
|
+
fiber = Fiber.new {
|
64
|
+
result = do_one_task(task, fiber)
|
65
|
+
result[:welapse] = Time.now - start_time
|
66
|
+
as.result result
|
67
|
+
}
|
68
|
+
end
|
69
|
+
fiber.resume
|
70
|
+
as
|
71
|
+
end
|
72
|
+
|
73
|
+
def do_one_task task, fiber=nil
|
74
|
+
job_id, task_id, node_addr =
|
75
|
+
task.values_at 'job_id', 'task_id', 'node_addr'
|
76
|
+
num_r = task['num_r'].to_i
|
77
|
+
result = {:job_id=>job_id, :task_id=>task_id, :node_addr=>node_addr}
|
78
|
+
exception = nil
|
79
|
+
error_level = nil
|
80
|
+
if @options and (otmp_dir = @options[:tmp_dir])
|
81
|
+
tmp_dir = "#{otmp_dir}/#{job_id}"
|
82
|
+
else
|
83
|
+
tmp_dir = "/var/tmp/#{job_id}"
|
84
|
+
end
|
85
|
+
if task['pindex']
|
86
|
+
# reduce task
|
87
|
+
result[:reduce] = true
|
88
|
+
reducer = StreamingReducer.new task, tmp_dir, @server.loop
|
89
|
+
reducer.on_success {
|
90
|
+
result[:output_path] = reducer.output_path
|
91
|
+
fiber.resume result if fiber
|
92
|
+
}
|
93
|
+
reducer.on_error {|e|
|
94
|
+
result.update :error=>e.class.to_s,
|
95
|
+
:error_message=>e.message, :backtrace=>e.backtrace[0]
|
96
|
+
fiber.resume result if fiber
|
97
|
+
}
|
98
|
+
reducer.do_streaming_reduce_task
|
99
|
+
else
|
100
|
+
# map task
|
101
|
+
result[:map] = true
|
102
|
+
mapper = StreamingMapper.new task, tmp_dir, @server.loop
|
103
|
+
#result[:ifbase] = mapper.do_map_task
|
104
|
+
mapper.on_success {
|
105
|
+
result[:ifbase] = mapper.ifbase
|
106
|
+
if num_r.zero?
|
107
|
+
if (rsize = mapper.result_size)
|
108
|
+
result[:result_body] = mapper.result_body if rsize < 1024
|
109
|
+
else
|
110
|
+
result[:result_body] = ''
|
111
|
+
end
|
112
|
+
end
|
113
|
+
fiber.resume result if fiber
|
114
|
+
}
|
115
|
+
mapper.on_error {|e|
|
116
|
+
result.update :error=>e.class.to_s,
|
117
|
+
:error_message=>e.message, :backtrace=>e.backtrace[0]
|
118
|
+
fiber.resume result if fiber
|
119
|
+
}
|
120
|
+
mapper.do_streaming_map_task
|
121
|
+
end
|
122
|
+
Fiber.yield
|
123
|
+
end
|
124
|
+
private :do_one_task
|
125
|
+
|
126
|
+
def notify_reduce params
|
127
|
+
job_id, task_id, pindex, node_addr, ifbase =
|
128
|
+
params.values_at 'job_id', 'task_id', 'pindex', 'node_addr', 'ifbase'
|
129
|
+
ifpath = "#{ifbase}-#{pindex}"
|
130
|
+
Log.debug "H: notify_reduce #{job_id}-#{task_id} #{ifbase}"
|
131
|
+
|
132
|
+
if @ipaddr == node_addr
|
133
|
+
# local
|
134
|
+
local = "#{options[:job_dir]}/#{File.basename(ifpath).sub(/^m/, 't')}"
|
135
|
+
File.rename ifpath, local
|
136
|
+
{:job_id=>job_id, :task_id=>task_id, :ifbase=>ifbase}
|
137
|
+
else
|
138
|
+
# remote
|
139
|
+
@msession ||=
|
140
|
+
MultiSession.new([], {:user=>@options[:user]}, @server.loop)
|
141
|
+
@msession.connect_to_addr node_addr
|
142
|
+
local = "#{options[:job_dir]}/#{File.basename(ifpath).sub(/^m/, 't')}"
|
143
|
+
future = @msession.scp_download node_addr, ifpath, local
|
144
|
+
future.attach_callback {|f|
|
145
|
+
if (as = @ases.delete "r#{job_id}-#{task_id}")
|
146
|
+
as.result :job_id=>job_id, :task_id=>task_id, :ifbase=>ifbase
|
147
|
+
end
|
148
|
+
}
|
149
|
+
@ases["r#{job_id}-#{task_id}"] = MR::AsyncResult.new
|
150
|
+
end
|
151
|
+
#rescue Exception
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
def init_scan addrs
|
156
|
+
log_path = "#{options[:log_dir]}/diffuser.log"
|
157
|
+
Log.init log_path, :log_level=>'debug'
|
158
|
+
@adapter = StorageAdapter.create 'pmuxfs', addrs
|
159
|
+
@fs_dir = options[:fs_dir]
|
160
|
+
@adapter.set_fs_dir @fs_dir
|
161
|
+
@fs_dir
|
162
|
+
end
|
163
|
+
|
164
|
+
def scan_once
|
165
|
+
files = @adapter.find
|
166
|
+
end
|
167
|
+
|
168
|
+
def close_download_channel node_addr
|
169
|
+
@msession.close_channel node_addr if @msession
|
170
|
+
@msession.class.to_s
|
171
|
+
end
|
172
|
+
|
173
|
+
|
174
|
+
def get_status
|
175
|
+
[
|
176
|
+
['ruby_version', RUBY_VERSION, :string],
|
177
|
+
['hoge', 1, :gauge],
|
178
|
+
]
|
179
|
+
end
|
180
|
+
|
181
|
+
def get_properties
|
182
|
+
{
|
183
|
+
'hostname' => Socket.gethostname,
|
184
|
+
'program_name' => options[:program_name],
|
185
|
+
'root_dir' => options[:root_dir],
|
186
|
+
'tmp_dir' => options[:tmp_dir],
|
187
|
+
'VERSION' => VERSION,
|
188
|
+
'RUBY_VERSION' => RUBY_VERSION,
|
189
|
+
'num_cpu' => get_num_cpu,
|
190
|
+
}
|
191
|
+
end
|
192
|
+
|
193
|
+
def hello
|
194
|
+
'hello'
|
195
|
+
end
|
196
|
+
|
197
|
+
def quit
|
198
|
+
@server.loop.stop
|
199
|
+
cleaner = Cleaner.new "#{options[:tmp_dir]}/[0-9]*"
|
200
|
+
cleaner.run
|
201
|
+
nil
|
202
|
+
end
|
203
|
+
|
204
|
+
=begin
|
205
|
+
private
|
206
|
+
def fork_worker num_worker, options
|
207
|
+
for n in 1..num_worker
|
208
|
+
pid = fork {
|
209
|
+
@server.loop.stop
|
210
|
+
begin
|
211
|
+
worker = Worker.new options
|
212
|
+
rescue Timeout::Error
|
213
|
+
Log.info "worker #{$$}: initialization timeout"
|
214
|
+
worker = nil
|
215
|
+
end
|
216
|
+
if worker
|
217
|
+
worker.run
|
218
|
+
Log.puts 'worker exit'
|
219
|
+
end
|
220
|
+
exit
|
221
|
+
}
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def enq_task task
|
226
|
+
Log.debug "H: enq_task #{task['job_id']}-#{task['task_id']}"
|
227
|
+
@wtq.push task
|
228
|
+
process_task_queue
|
229
|
+
if $test and @fiber
|
230
|
+
@fiber.resume
|
231
|
+
end
|
232
|
+
@ases["#{task['job_id']}-#{task['task_id']}"] = MR::AsyncResult.new
|
233
|
+
end
|
234
|
+
|
235
|
+
def get_task
|
236
|
+
Log.debug "H: get_task"
|
237
|
+
if @wtq.empty?
|
238
|
+
as = MR::AsyncResult.new
|
239
|
+
@wq.push as
|
240
|
+
as
|
241
|
+
else
|
242
|
+
@wtq.shift
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def put_result result
|
247
|
+
Log.debug "H: put_result #{result['task_id']}"
|
248
|
+
if (as = @ases.delete "#{result['job_id']}-#{result['task_id']}")
|
249
|
+
Log.debug "H: return as.result"
|
250
|
+
as.result result
|
251
|
+
end
|
252
|
+
nil
|
253
|
+
end
|
254
|
+
|
255
|
+
def process_task_queue
|
256
|
+
while !@wtq.empty? and (as = @wq.shift)
|
257
|
+
as.result @wtq.shift
|
258
|
+
end
|
259
|
+
end
|
260
|
+
=end
|
261
|
+
end
|
262
|
+
end
|