pmux 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/README.md +36 -0
- data/Rakefile +4 -0
- data/bin/pmux +5 -0
- data/lib/pmux/application.rb +166 -0
- data/lib/pmux/cleaner.rb +28 -0
- data/lib/pmux/fiber18.rb +64 -0
- data/lib/pmux/fixcmd.rb +25 -0
- data/lib/pmux/gatherer.rb +23 -0
- data/lib/pmux/handler.rb +262 -0
- data/lib/pmux/job.rb +101 -0
- data/lib/pmux/joblogger.rb +46 -0
- data/lib/pmux/mapper.rb +151 -0
- data/lib/pmux/mros.rb +207 -0
- data/lib/pmux/multi_session.rb +309 -0
- data/lib/pmux/pipeio.rb +19 -0
- data/lib/pmux/plugin.rb +23 -0
- data/lib/pmux/q.rb +3 -0
- data/lib/pmux/reducer.rb +90 -0
- data/lib/pmux/storage_adapter.rb +105 -0
- data/lib/pmux/task_dispatcher.rb +167 -0
- data/lib/pmux/task_queue.rb +11 -0
- data/lib/pmux/task_scheduler.rb +166 -0
- data/lib/pmux/util_daemon.rb +18 -0
- data/lib/pmux/util_logger.rb +137 -0
- data/lib/pmux/version.rb +3 -0
- data/lib/pmux/worker.rb +91 -0
- data/lib/pmux/writer.rb +19 -0
- data/lib/pmux.rb +27 -0
- data/pmux.gemspec +24 -0
- data/test/mock_mros.rb +284 -0
- data/test/mock_pipeio.rb +26 -0
- data/test/mock_world.rb +193 -0
- data/test/mock_xattr.rb +10 -0
- data/test/runner.rb +10 -0
- data/test/test_application.rb +13 -0
- data/test/test_fixcmd.rb +17 -0
- data/test/test_handler.rb +15 -0
- data/test/test_i_mapreduce.rb +169 -0
- data/test/test_i_mros.rb +28 -0
- data/test/test_i_msession.rb +27 -0
- data/test/test_job.rb +35 -0
- data/test/test_joblogger.rb +16 -0
- data/test/test_mapper.rb +60 -0
- data/test/test_pipeio.rb +24 -0
- data/test/test_storage_adapter.rb +63 -0
- data/test/test_task_queue.rb +87 -0
- data/test/test_task_scheduler.rb +39 -0
- data/test/txt/0.log +105 -0
- data/test/txt/1.log +105 -0
- data/test/txt/2.log +105 -0
- data/test/txt/3.log +105 -0
- data/test/txt/4.log +105 -0
- data/test/txt/5.log +105 -0
- data/test/txt/6.log +105 -0
- data/test/txt/7.log +105 -0
- data/test/txt/8.log +105 -0
- data/test/unittest_helper.rb +57 -0
- metadata +153 -0
data/.gitignore
ADDED
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Pmux: pipeline multiplexer
|
2
|
+
|
3
|
+
Pmux is a lightweight file-based MapReduce system, written in Ruby.
|
4
|
+
Applying the philosophy of Unix pipeline processing to distributed
|
5
|
+
computing on a GlusterFS cluster, pmux provides a tool capable of
|
6
|
+
handling large amounts of data stored in files.
|
7
|
+
|
8
|
+
## Requirements
|
9
|
+
* ruby 1.9.2 or higher
|
10
|
+
* msgpack-rpc
|
11
|
+
* net-ssh, net-scp
|
12
|
+
* gflocator
|
13
|
+
* GlusterFS 3.3.0 or higher, native client (FUSE)
|
14
|
+
|
15
|
+
## Install
|
16
|
+
|
17
|
+
on all GlusterFS server nodes:
|
18
|
+
|
19
|
+
gem install pmux
|
20
|
+
|
21
|
+
on the GlusterFS client node
|
22
|
+
|
23
|
+
gem install pmux
|
24
|
+
gem install gflocator
|
25
|
+
sudo gflocator
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
show status
|
30
|
+
|
31
|
+
$ pmux --status
|
32
|
+
host0.example.com: pmux 0.1.0, num_cpu=8, ruby 1.9.3
|
33
|
+
|
34
|
+
distributed grep
|
35
|
+
|
36
|
+
$ pmux --mapper="grep PATTERN" /glusterfs/xxx/*.log
|
data/Rakefile
ADDED
data/bin/pmux
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Pmux
|
4
|
+
class Application
|
5
|
+
OPTS = {}
|
6
|
+
|
7
|
+
def run options=OPTS
|
8
|
+
optparser = optparse options
|
9
|
+
optparser.parse!
|
10
|
+
options[:program_name] = optparser.program_name
|
11
|
+
options[:user] ||=
|
12
|
+
(ENV['USER'] || ENV['LOGNAME'] || Etc.getlogin || Etc.getpwuid.name)
|
13
|
+
|
14
|
+
root_dir = (options[:root_dir] ||=
|
15
|
+
File.expand_path "~/.#{options[:program_name]}")
|
16
|
+
tmp_dir = (options[:tmp_dir] ||= root_dir + '/tmp')
|
17
|
+
log_dir = (options[:log_dir] ||= root_dir + '/log')
|
18
|
+
[root_dir, tmp_dir, log_dir].each {|dir|
|
19
|
+
Dir.mkdir dir unless File.exist? dir
|
20
|
+
}
|
21
|
+
|
22
|
+
Plugin.load_plugins unless options[:disable_plugins]
|
23
|
+
addrs = (options[:hosts] || '').split(',').map {|host| getaddr host}
|
24
|
+
|
25
|
+
case
|
26
|
+
when options[:server]
|
27
|
+
(options[:sock_path] ||= '/tmp/.pmuxsock') << ".#{$$}"
|
28
|
+
run_server options
|
29
|
+
when options[:status]
|
30
|
+
show_status addrs, options
|
31
|
+
when options[:lookup]
|
32
|
+
lookup addrs, options
|
33
|
+
else
|
34
|
+
run_mr addrs, options
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def getaddr host
|
39
|
+
sa = Socket.pack_sockaddr_in 0, host
|
40
|
+
port, addr = Socket.unpack_sockaddr_in sa
|
41
|
+
addr
|
42
|
+
end
|
43
|
+
|
44
|
+
def run_server options
|
45
|
+
STDOUT.sync = true
|
46
|
+
server = MR::Server.new
|
47
|
+
handler = Pmux::Handler.new server, options
|
48
|
+
pipe_transport = MR::PipeTransport.new STDIN, STDOUT, STDERR
|
49
|
+
server.listen pipe_transport, handler
|
50
|
+
unix_transport = MR::UNIXServerTransport.new options[:sock_path]
|
51
|
+
server.listen unix_transport, handler
|
52
|
+
server.run
|
53
|
+
rescue SystemCallError
|
54
|
+
ensure
|
55
|
+
File.unlink options[:sock_path] rescue nil
|
56
|
+
end
|
57
|
+
|
58
|
+
def show_status addrs, options
|
59
|
+
addrs = ['localhost'] if addrs.empty?
|
60
|
+
adapter = StorageAdapter.create options[:storage_name], addrs
|
61
|
+
msession = MRSession.new addrs, options
|
62
|
+
msession.on_error {|addr, err| $stderr.printf "%s: %s\n", addr, err.to_s}
|
63
|
+
msession.connect
|
64
|
+
|
65
|
+
mf = msession.multicast_call_async :get_properties
|
66
|
+
templ = "%s: %s %s, num_cpu=%s, ruby %s\n"
|
67
|
+
mf.on_success {|f|
|
68
|
+
props = f.get
|
69
|
+
print templ % [props['hostname'],
|
70
|
+
props['program_name'], props['VERSION'],
|
71
|
+
props['num_cpu'], props['RUBY_VERSION']]
|
72
|
+
}
|
73
|
+
mf.on_error {|f| printf "%s: ERROR: %s\n", f.addr, f.error}
|
74
|
+
mf.join_all
|
75
|
+
|
76
|
+
msession.on_error {}
|
77
|
+
mf = msession.multicast_call_async :quit
|
78
|
+
mf.join_all
|
79
|
+
end
|
80
|
+
|
81
|
+
def lookup addrs, options
|
82
|
+
adapter = StorageAdapter.create options[:storage_name], addrs
|
83
|
+
name = options[:lookup]
|
84
|
+
locator_host = options[:locator_host] || addrs.first || 'localhost'
|
85
|
+
locator_port = options[:locator_port]
|
86
|
+
adapter.connect_to_storage locator_host, locator_port
|
87
|
+
adapter.get_files [name]
|
88
|
+
locations = adapter.lookup_file name
|
89
|
+
$stderr.puts "name: #{name}"
|
90
|
+
for host, path in locations
|
91
|
+
$stderr.puts "location: #{host}:#{path}"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def run_mr addrs, options, argv=ARGV
|
96
|
+
options[:__start_time] = Time.now
|
97
|
+
if options[:storage_name] == 'local' and addrs.empty?
|
98
|
+
addrs = ['localhost']
|
99
|
+
end
|
100
|
+
adapter = StorageAdapter.create options[:storage_name], addrs, options
|
101
|
+
locator_host = options[:locator_host] || addrs.first || 'localhost'
|
102
|
+
locator_port = options[:locator_port]
|
103
|
+
|
104
|
+
puts "storage: #{options[:storage_name]}" if options[:verbose]
|
105
|
+
begin
|
106
|
+
adapter.connect_to_storage locator_host, locator_port
|
107
|
+
files = adapter.get_files argv, options[:expand_glob]
|
108
|
+
options[:__get_files_time] = Time.now
|
109
|
+
rescue
|
110
|
+
STDERR.puts "Storage Error: #{$!}"
|
111
|
+
return
|
112
|
+
end
|
113
|
+
raise RuntimeError, "no hostname specified" if adapter.addrs.empty?
|
114
|
+
|
115
|
+
msession = MRSession.new adapter.addrs, options
|
116
|
+
msession.on_error {|addr, err|
|
117
|
+
$stderr.printf "%s: %s\n", addr, err.inspect if err
|
118
|
+
}
|
119
|
+
msession.connect
|
120
|
+
|
121
|
+
if options[:reducer]
|
122
|
+
options[:num_r] ||= 1
|
123
|
+
end
|
124
|
+
dispatcher = TaskDispatcher.new options, adapter, msession
|
125
|
+
job = Job.new options, files
|
126
|
+
job.mk_reducer_addrs adapter.addrs
|
127
|
+
dispatcher.run job
|
128
|
+
end
|
129
|
+
|
130
|
+
def optparse opts
|
131
|
+
op = OptionParser.new
|
132
|
+
op.on('--debug') {$debug = true; STDOUT.sync = true}
|
133
|
+
op.on('--server') {opts[:server] = true}
|
134
|
+
op.on('--argv=FILES') {}
|
135
|
+
op.on('--disable-plugins') {opts[:disable_plugins] = true}
|
136
|
+
op.on('--expand-glob') {opts[:expand_glob] = true}
|
137
|
+
op.on('--ff=FF', Integer) {|arg| opts[:ff] = arg}
|
138
|
+
op.on('--hosts=HOST,HOST,...') {|arg| opts[:hosts] = arg}
|
139
|
+
op.on('--ipaddr=ADDR') {|arg| opts[:ipaddr] = arg}
|
140
|
+
op.on('--locator-host=HOST') {|arg| opts[:locator_host] = arg}
|
141
|
+
op.on('--locator-port=PORT', Integer) {|arg|
|
142
|
+
opts[:locator_port] = arg}
|
143
|
+
op.on('--lookup=FILE') {|arg| opts[:lookup] = arg}
|
144
|
+
op.on('--mapper=CMD') {|arg| opts[:mapper] = arg}
|
145
|
+
op.on('--reducer=CMD') {|arg| opts[:reducer] = arg}
|
146
|
+
op.on('--num-r=NUM', Integer) {|arg| opts[:num_r] = arg}
|
147
|
+
op.on('--root-dir=DIR') {|arg| opts[:root_dir] = arg}
|
148
|
+
op.on('--sec', '--connect-secondary') {opts[:connect_secondary] = true}
|
149
|
+
op.on('--ship-file=FILE', '--file=FILE') {|arg|
|
150
|
+
(opts[:ship_files] ||= []).push arg}
|
151
|
+
op.on('--status') {opts[:status] = true}
|
152
|
+
op.on('--storage=STORAGE_NAME') {|arg|
|
153
|
+
opts[:storage_name] = arg}
|
154
|
+
op.on('--verbose') {opts[:verbose] = true}
|
155
|
+
op.on('--version') {
|
156
|
+
puts "#{op.program_name} #{VERSION}"
|
157
|
+
exit
|
158
|
+
}
|
159
|
+
class <<op
|
160
|
+
attr_accessor :options
|
161
|
+
end
|
162
|
+
op.options = opts
|
163
|
+
op
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
data/lib/pmux/cleaner.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module Pmux
|
4
|
+
class Cleaner
|
5
|
+
def initialize glob_pat
|
6
|
+
@glob_pat = glob_pat
|
7
|
+
end
|
8
|
+
|
9
|
+
def run lim_time=nil
|
10
|
+
fork {fork {clean @glob_pat, lim_time}}
|
11
|
+
Process.wait
|
12
|
+
end
|
13
|
+
|
14
|
+
def clean glob_pat=nil, lim_time=nil
|
15
|
+
glob_pat ||= @glob_pat
|
16
|
+
lim_time ||= Time.now - 3600*24
|
17
|
+
paths = Dir.glob glob_pat
|
18
|
+
for path in paths
|
19
|
+
if File.exist? path
|
20
|
+
mtime = File.mtime path
|
21
|
+
if mtime < lim_time
|
22
|
+
FileUtils.rm_rf path
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/pmux/fiber18.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Thread based Fiber implementation for Ruby 1.8 by Aman Gupta
|
2
|
+
unless defined? Fiber
|
3
|
+
require 'thread'
|
4
|
+
require 'singleton'
|
5
|
+
class FiberError < StandardError; end
|
6
|
+
class Fiber
|
7
|
+
def initialize
|
8
|
+
raise ArgumentError, 'new Fiber requires a block' unless block_given?
|
9
|
+
|
10
|
+
@yield = Queue.new
|
11
|
+
@resume = Queue.new
|
12
|
+
|
13
|
+
@thread = Thread.new{ @yield.push [ *yield(*@resume.pop) ] }
|
14
|
+
@thread.abort_on_exception = true
|
15
|
+
@thread[:fiber] = self
|
16
|
+
end
|
17
|
+
attr_reader :thread
|
18
|
+
|
19
|
+
def resume *args
|
20
|
+
raise FiberError, 'dead fiber called' unless @thread.alive?
|
21
|
+
@resume.push(args)
|
22
|
+
result = @yield.pop
|
23
|
+
result.size > 1 ? result : result.first
|
24
|
+
end
|
25
|
+
|
26
|
+
def yield *args
|
27
|
+
@yield.push(args)
|
28
|
+
result = @resume.pop
|
29
|
+
result.size > 1 ? result : result.first
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.yield *args
|
33
|
+
raise FiberError, "can't yield from root fiber" unless fiber = Thread.current[:fiber]
|
34
|
+
fiber.yield(*args)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.current
|
38
|
+
Thread.current[:fiber] or raise FiberError, 'not inside a fiber'
|
39
|
+
end
|
40
|
+
|
41
|
+
def inspect
|
42
|
+
"#<#{self.class}:0x#{self.object_id.to_s(16)}>"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class RootFiber < Fiber
|
47
|
+
include Singleton
|
48
|
+
def initialize
|
49
|
+
end
|
50
|
+
|
51
|
+
def resume *args
|
52
|
+
raise FiberError, "can't resume root fiber"
|
53
|
+
end
|
54
|
+
|
55
|
+
def yield *args
|
56
|
+
raise FiberError, "can't yield from root fiber"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
#attach the root fiber to the main thread
|
61
|
+
Thread.main[:fiber] = RootFiber.instance
|
62
|
+
else
|
63
|
+
require 'fiber'
|
64
|
+
end
|
data/lib/pmux/fixcmd.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
module Pmux
|
2
|
+
module FixCmdLine
|
3
|
+
def fix_cmd_line cmd_line, in_path=nil, out_path=nil, err_path=nil, tmp_dir=nil
|
4
|
+
res = []
|
5
|
+
cmds = cmd_line.split /\s*\|\s*/
|
6
|
+
n = 0
|
7
|
+
for cmd in cmds
|
8
|
+
c, = cmd.split
|
9
|
+
if tmp_dir and File.executable?(cc = "#{tmp_dir}/#{c}")
|
10
|
+
#cmd = cc
|
11
|
+
cmd = "#{tmp_dir}/#{cmd}"
|
12
|
+
end
|
13
|
+
if n == 0
|
14
|
+
cmd << " #{in_path}" if in_path
|
15
|
+
err_path ||= '/dev/null'
|
16
|
+
cmd << " 2>#{err_path}"
|
17
|
+
end
|
18
|
+
res.push cmd
|
19
|
+
n += 1
|
20
|
+
end
|
21
|
+
cmd << " >>#{out_path}" if out_path
|
22
|
+
res.join '|'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Pmux
|
2
|
+
class Gatherer
|
3
|
+
attr_accessor :writer
|
4
|
+
attr_reader :mf
|
5
|
+
|
6
|
+
def initialize writer=nil
|
7
|
+
@writer = writer
|
8
|
+
@mf = MR::MultiFuture.new
|
9
|
+
@mf.on_success {|f| writer.write f.get}
|
10
|
+
end
|
11
|
+
|
12
|
+
def gather msession, node_addr, remote, local, options={}
|
13
|
+
future = msession.scp_download node_addr, remote, local,
|
14
|
+
:set_result=>local
|
15
|
+
@mf.add future
|
16
|
+
end
|
17
|
+
|
18
|
+
def join_all
|
19
|
+
@mf.join_all
|
20
|
+
@writer.finish
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/pmux/handler.rb
ADDED
@@ -0,0 +1,262 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module Pmux
|
4
|
+
class Handler
|
5
|
+
attr_reader :options
|
6
|
+
attr_accessor :server
|
7
|
+
|
8
|
+
def initialize server=nil, options={}
|
9
|
+
@server = server
|
10
|
+
@options = options
|
11
|
+
@ipaddr = options[:ipaddr]
|
12
|
+
|
13
|
+
@wtq = []
|
14
|
+
@wq = []
|
15
|
+
@ases = {}
|
16
|
+
@msession = nil
|
17
|
+
@seqid = 0
|
18
|
+
end
|
19
|
+
|
20
|
+
def init_job job_id
|
21
|
+
path = "#{options[:tmp_dir]}/#{job_id}"
|
22
|
+
Dir.mkdir path
|
23
|
+
options[:job_dir] = path
|
24
|
+
Log.init "#{path}/worker.log", :log_level=>'debug'
|
25
|
+
|
26
|
+
num_cpu = get_num_cpu
|
27
|
+
#fork_worker num_cpu, options
|
28
|
+
|
29
|
+
{
|
30
|
+
'job_dir' => path,
|
31
|
+
'num_cpu' => num_cpu,
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_num_cpu
|
36
|
+
cpuinfo_path = '/proc/cpuinfo'
|
37
|
+
if File.exist? cpuinfo_path
|
38
|
+
lines = File.readlines(cpuinfo_path).grep(/^processor/)
|
39
|
+
lines.size
|
40
|
+
else
|
41
|
+
2
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# execute a task and return the result
|
46
|
+
def exec_streaming_task task
|
47
|
+
start_time = Time.now
|
48
|
+
as = MR::AsyncResult.new
|
49
|
+
if (task_keys = task['task_keys'])
|
50
|
+
error_ids = []
|
51
|
+
fusion_id = task['task_id']
|
52
|
+
fiber = Fiber.new {
|
53
|
+
for task_id, file in task_keys
|
54
|
+
ntask = task.merge 'fusion_id'=>fusion_id,
|
55
|
+
'task_id'=>task_id, 'path'=>file
|
56
|
+
result = do_one_task ntask, fiber
|
57
|
+
end
|
58
|
+
result.update :task_id=>fusion_id, :task_keys=>task_keys,
|
59
|
+
:welapse=>(Time.now - start_time)
|
60
|
+
as.result result
|
61
|
+
}
|
62
|
+
else
|
63
|
+
fiber = Fiber.new {
|
64
|
+
result = do_one_task(task, fiber)
|
65
|
+
result[:welapse] = Time.now - start_time
|
66
|
+
as.result result
|
67
|
+
}
|
68
|
+
end
|
69
|
+
fiber.resume
|
70
|
+
as
|
71
|
+
end
|
72
|
+
|
73
|
+
def do_one_task task, fiber=nil
|
74
|
+
job_id, task_id, node_addr =
|
75
|
+
task.values_at 'job_id', 'task_id', 'node_addr'
|
76
|
+
num_r = task['num_r'].to_i
|
77
|
+
result = {:job_id=>job_id, :task_id=>task_id, :node_addr=>node_addr}
|
78
|
+
exception = nil
|
79
|
+
error_level = nil
|
80
|
+
if @options and (otmp_dir = @options[:tmp_dir])
|
81
|
+
tmp_dir = "#{otmp_dir}/#{job_id}"
|
82
|
+
else
|
83
|
+
tmp_dir = "/var/tmp/#{job_id}"
|
84
|
+
end
|
85
|
+
if task['pindex']
|
86
|
+
# reduce task
|
87
|
+
result[:reduce] = true
|
88
|
+
reducer = StreamingReducer.new task, tmp_dir, @server.loop
|
89
|
+
reducer.on_success {
|
90
|
+
result[:output_path] = reducer.output_path
|
91
|
+
fiber.resume result if fiber
|
92
|
+
}
|
93
|
+
reducer.on_error {|e|
|
94
|
+
result.update :error=>e.class.to_s,
|
95
|
+
:error_message=>e.message, :backtrace=>e.backtrace[0]
|
96
|
+
fiber.resume result if fiber
|
97
|
+
}
|
98
|
+
reducer.do_streaming_reduce_task
|
99
|
+
else
|
100
|
+
# map task
|
101
|
+
result[:map] = true
|
102
|
+
mapper = StreamingMapper.new task, tmp_dir, @server.loop
|
103
|
+
#result[:ifbase] = mapper.do_map_task
|
104
|
+
mapper.on_success {
|
105
|
+
result[:ifbase] = mapper.ifbase
|
106
|
+
if num_r.zero?
|
107
|
+
if (rsize = mapper.result_size)
|
108
|
+
result[:result_body] = mapper.result_body if rsize < 1024
|
109
|
+
else
|
110
|
+
result[:result_body] = ''
|
111
|
+
end
|
112
|
+
end
|
113
|
+
fiber.resume result if fiber
|
114
|
+
}
|
115
|
+
mapper.on_error {|e|
|
116
|
+
result.update :error=>e.class.to_s,
|
117
|
+
:error_message=>e.message, :backtrace=>e.backtrace[0]
|
118
|
+
fiber.resume result if fiber
|
119
|
+
}
|
120
|
+
mapper.do_streaming_map_task
|
121
|
+
end
|
122
|
+
Fiber.yield
|
123
|
+
end
|
124
|
+
private :do_one_task
|
125
|
+
|
126
|
+
def notify_reduce params
|
127
|
+
job_id, task_id, pindex, node_addr, ifbase =
|
128
|
+
params.values_at 'job_id', 'task_id', 'pindex', 'node_addr', 'ifbase'
|
129
|
+
ifpath = "#{ifbase}-#{pindex}"
|
130
|
+
Log.debug "H: notify_reduce #{job_id}-#{task_id} #{ifbase}"
|
131
|
+
|
132
|
+
if @ipaddr == node_addr
|
133
|
+
# local
|
134
|
+
local = "#{options[:job_dir]}/#{File.basename(ifpath).sub(/^m/, 't')}"
|
135
|
+
File.rename ifpath, local
|
136
|
+
{:job_id=>job_id, :task_id=>task_id, :ifbase=>ifbase}
|
137
|
+
else
|
138
|
+
# remote
|
139
|
+
@msession ||=
|
140
|
+
MultiSession.new([], {:user=>@options[:user]}, @server.loop)
|
141
|
+
@msession.connect_to_addr node_addr
|
142
|
+
local = "#{options[:job_dir]}/#{File.basename(ifpath).sub(/^m/, 't')}"
|
143
|
+
future = @msession.scp_download node_addr, ifpath, local
|
144
|
+
future.attach_callback {|f|
|
145
|
+
if (as = @ases.delete "r#{job_id}-#{task_id}")
|
146
|
+
as.result :job_id=>job_id, :task_id=>task_id, :ifbase=>ifbase
|
147
|
+
end
|
148
|
+
}
|
149
|
+
@ases["r#{job_id}-#{task_id}"] = MR::AsyncResult.new
|
150
|
+
end
|
151
|
+
#rescue Exception
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
def init_scan addrs
|
156
|
+
log_path = "#{options[:log_dir]}/diffuser.log"
|
157
|
+
Log.init log_path, :log_level=>'debug'
|
158
|
+
@adapter = StorageAdapter.create 'pmuxfs', addrs
|
159
|
+
@fs_dir = options[:fs_dir]
|
160
|
+
@adapter.set_fs_dir @fs_dir
|
161
|
+
@fs_dir
|
162
|
+
end
|
163
|
+
|
164
|
+
def scan_once
|
165
|
+
files = @adapter.find
|
166
|
+
end
|
167
|
+
|
168
|
+
def close_download_channel node_addr
|
169
|
+
@msession.close_channel node_addr if @msession
|
170
|
+
@msession.class.to_s
|
171
|
+
end
|
172
|
+
|
173
|
+
|
174
|
+
def get_status
|
175
|
+
[
|
176
|
+
['ruby_version', RUBY_VERSION, :string],
|
177
|
+
['hoge', 1, :gauge],
|
178
|
+
]
|
179
|
+
end
|
180
|
+
|
181
|
+
def get_properties
|
182
|
+
{
|
183
|
+
'hostname' => Socket.gethostname,
|
184
|
+
'program_name' => options[:program_name],
|
185
|
+
'root_dir' => options[:root_dir],
|
186
|
+
'tmp_dir' => options[:tmp_dir],
|
187
|
+
'VERSION' => VERSION,
|
188
|
+
'RUBY_VERSION' => RUBY_VERSION,
|
189
|
+
'num_cpu' => get_num_cpu,
|
190
|
+
}
|
191
|
+
end
|
192
|
+
|
193
|
+
def hello
|
194
|
+
'hello'
|
195
|
+
end
|
196
|
+
|
197
|
+
def quit
|
198
|
+
@server.loop.stop
|
199
|
+
cleaner = Cleaner.new "#{options[:tmp_dir]}/[0-9]*"
|
200
|
+
cleaner.run
|
201
|
+
nil
|
202
|
+
end
|
203
|
+
|
204
|
+
=begin
|
205
|
+
private
|
206
|
+
def fork_worker num_worker, options
|
207
|
+
for n in 1..num_worker
|
208
|
+
pid = fork {
|
209
|
+
@server.loop.stop
|
210
|
+
begin
|
211
|
+
worker = Worker.new options
|
212
|
+
rescue Timeout::Error
|
213
|
+
Log.info "worker #{$$}: initialization timeout"
|
214
|
+
worker = nil
|
215
|
+
end
|
216
|
+
if worker
|
217
|
+
worker.run
|
218
|
+
Log.puts 'worker exit'
|
219
|
+
end
|
220
|
+
exit
|
221
|
+
}
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def enq_task task
|
226
|
+
Log.debug "H: enq_task #{task['job_id']}-#{task['task_id']}"
|
227
|
+
@wtq.push task
|
228
|
+
process_task_queue
|
229
|
+
if $test and @fiber
|
230
|
+
@fiber.resume
|
231
|
+
end
|
232
|
+
@ases["#{task['job_id']}-#{task['task_id']}"] = MR::AsyncResult.new
|
233
|
+
end
|
234
|
+
|
235
|
+
def get_task
|
236
|
+
Log.debug "H: get_task"
|
237
|
+
if @wtq.empty?
|
238
|
+
as = MR::AsyncResult.new
|
239
|
+
@wq.push as
|
240
|
+
as
|
241
|
+
else
|
242
|
+
@wtq.shift
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def put_result result
|
247
|
+
Log.debug "H: put_result #{result['task_id']}"
|
248
|
+
if (as = @ases.delete "#{result['job_id']}-#{result['task_id']}")
|
249
|
+
Log.debug "H: return as.result"
|
250
|
+
as.result result
|
251
|
+
end
|
252
|
+
nil
|
253
|
+
end
|
254
|
+
|
255
|
+
def process_task_queue
|
256
|
+
while !@wtq.empty? and (as = @wq.shift)
|
257
|
+
as.result @wtq.shift
|
258
|
+
end
|
259
|
+
end
|
260
|
+
=end
|
261
|
+
end
|
262
|
+
end
|