pmux 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/.gitignore +8 -0
  2. data/README.md +36 -0
  3. data/Rakefile +4 -0
  4. data/bin/pmux +5 -0
  5. data/lib/pmux/application.rb +166 -0
  6. data/lib/pmux/cleaner.rb +28 -0
  7. data/lib/pmux/fiber18.rb +64 -0
  8. data/lib/pmux/fixcmd.rb +25 -0
  9. data/lib/pmux/gatherer.rb +23 -0
  10. data/lib/pmux/handler.rb +262 -0
  11. data/lib/pmux/job.rb +101 -0
  12. data/lib/pmux/joblogger.rb +46 -0
  13. data/lib/pmux/mapper.rb +151 -0
  14. data/lib/pmux/mros.rb +207 -0
  15. data/lib/pmux/multi_session.rb +309 -0
  16. data/lib/pmux/pipeio.rb +19 -0
  17. data/lib/pmux/plugin.rb +23 -0
  18. data/lib/pmux/q.rb +3 -0
  19. data/lib/pmux/reducer.rb +90 -0
  20. data/lib/pmux/storage_adapter.rb +105 -0
  21. data/lib/pmux/task_dispatcher.rb +167 -0
  22. data/lib/pmux/task_queue.rb +11 -0
  23. data/lib/pmux/task_scheduler.rb +166 -0
  24. data/lib/pmux/util_daemon.rb +18 -0
  25. data/lib/pmux/util_logger.rb +137 -0
  26. data/lib/pmux/version.rb +3 -0
  27. data/lib/pmux/worker.rb +91 -0
  28. data/lib/pmux/writer.rb +19 -0
  29. data/lib/pmux.rb +27 -0
  30. data/pmux.gemspec +24 -0
  31. data/test/mock_mros.rb +284 -0
  32. data/test/mock_pipeio.rb +26 -0
  33. data/test/mock_world.rb +193 -0
  34. data/test/mock_xattr.rb +10 -0
  35. data/test/runner.rb +10 -0
  36. data/test/test_application.rb +13 -0
  37. data/test/test_fixcmd.rb +17 -0
  38. data/test/test_handler.rb +15 -0
  39. data/test/test_i_mapreduce.rb +169 -0
  40. data/test/test_i_mros.rb +28 -0
  41. data/test/test_i_msession.rb +27 -0
  42. data/test/test_job.rb +35 -0
  43. data/test/test_joblogger.rb +16 -0
  44. data/test/test_mapper.rb +60 -0
  45. data/test/test_pipeio.rb +24 -0
  46. data/test/test_storage_adapter.rb +63 -0
  47. data/test/test_task_queue.rb +87 -0
  48. data/test/test_task_scheduler.rb +39 -0
  49. data/test/txt/0.log +105 -0
  50. data/test/txt/1.log +105 -0
  51. data/test/txt/2.log +105 -0
  52. data/test/txt/3.log +105 -0
  53. data/test/txt/4.log +105 -0
  54. data/test/txt/5.log +105 -0
  55. data/test/txt/6.log +105 -0
  56. data/test/txt/7.log +105 -0
  57. data/test/txt/8.log +105 -0
  58. data/test/unittest_helper.rb +57 -0
  59. metadata +153 -0
@@ -0,0 +1,11 @@
1
+ module Pmux
2
+ class TaskQueue < Array
3
+ def inject_tasks tasks
4
+ self.concat tasks
5
+ end
6
+
7
+ def delete_job job_id
8
+ delete_if {|item| item[:job_id] == job_id}
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,166 @@
1
+
2
+ module Pmux
3
+ class TaskScheduler
4
+ attr_reader :shipped
5
+
6
+ def initialize adapter=nil
7
+ @adapter = adapter
8
+ @node_table = adapter
9
+ @job_table = {}
10
+ @task_queue = TaskQueue.new
11
+ @allocated_tasks = {}
12
+ @shipped = {}
13
+ end
14
+
15
+ def push_job job
16
+ @job_table[job.id] = job
17
+ @task_queue.inject_tasks job.tasks
18
+ end
19
+
20
+ def inject_tasks tasks
21
+ @task_queue.inject_tasks tasks
22
+ end
23
+
24
+ def delete_task_from_job job, task, node_addr=nil
25
+ node_addr ||= task[:node_addr]
26
+ remove_allocated_task node_addr, job.id, task[:task_id]
27
+ job.delete_task_by_id task[:task_id]
28
+ end
29
+
30
+ def process_queue
31
+ job_map = {}
32
+ success_count = 0
33
+ fail_count = 0
34
+ fail_lim = @node_table.size * 2
35
+ tmp_queue = []
36
+
37
+ task_queue_size = @task_queue.size
38
+ loop_count = 0
39
+ while task = @task_queue.shift
40
+ if task[:pindex] and task[:node_addr]
41
+ allocated_p = allocate_reduce_task_to_node job_map, task
42
+ else
43
+ unless task[:node_addrs]
44
+ task[:node_addrs] = @adapter.lookup_file task[:file]
45
+ end
46
+ allocated_p = allocate_map_task_to_node job_map, task
47
+ end
48
+ if allocated_p
49
+ # success
50
+ fail_count = 0
51
+ else
52
+ # fail
53
+ tmp_queue.push task
54
+ fail_count += 1
55
+ end
56
+
57
+ loop_count += 1
58
+
59
+ break if loop_count >= task_queue_size
60
+ end
61
+ @task_queue.replace tmp_queue + @task_queue
62
+
63
+ flush_job_map job_map
64
+ end
65
+
66
+ #
67
+ def attach_flush_callback &block
68
+ @flush_callback = block
69
+ end
70
+
71
+ #
72
+ def flush_job_map job_map
73
+ for job_id, nt_map in job_map
74
+ for node_addr, fslots in nt_map
75
+ for fslot in fslots
76
+ if fslot.size > 1
77
+ task_keys =
78
+ Hash[*(fslot.map {|t| [t[:task_id], t[:path]]}).flatten]
79
+ task = fslot.first.merge :task_keys=>task_keys
80
+ @flush_callback.call node_addr, task if @flush_callback
81
+ else
82
+ @flush_callback.call node_addr, fslot.first if @flush_callback
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ def allocate_task_to_node job_map, task, node_addr
90
+ if (ff = task[:ff]) and !task[:pindex]
91
+ # task fusion
92
+ job_id = task[:job_id]
93
+ if (nt_map = job_map[job_id]) and (fslots = nt_map[node_addr])
94
+ for fslot in fslots
95
+ if fslot.size < ff
96
+ fslot.push task
97
+ return true
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ #if @node_table.allocate_task node_addr, task
104
+ if allocate_task_to_slot node_addr, task
105
+ # success
106
+ job_id = task[:job_id]
107
+ nt_map = (job_map[job_id] ||= {})
108
+
109
+ #task = task.dup #???
110
+ task[:alloc_time] = Time.now
111
+ (nt_map[node_addr] ||= []).push [task]
112
+
113
+ return true
114
+ end
115
+ return false
116
+ end
117
+
118
+ #
119
+ def allocate_task_to_slot node_addr, task
120
+ if (node = @node_table[node_addr]) and @shipped[node_addr]
121
+ slot = (@allocated_tasks[node_addr] ||= [])
122
+ num_workers = node[:num_workers] || 2
123
+ if slot.size >= num_workers
124
+ return false
125
+ else
126
+ return false if slot.include? task
127
+ task[:node_addr] = node_addr
128
+ slot.push task
129
+ return true
130
+ end
131
+ end
132
+ end
133
+
134
+ #
135
+ def remove_allocated_task node_addr, job_id, task_id
136
+ if (slot = @allocated_tasks[node_addr])
137
+ slot.delete_if {|t| t[:job_id] == job_id and t[:task_id] == task_id}
138
+ end
139
+ end
140
+
141
+ #
142
+ def allocate_map_task_to_node job_map, task
143
+ node_addrs = task[:node_addrs].dup
144
+ for node_addr, path in node_addrs
145
+ next unless @node_table[node_addr]
146
+ next unless path
147
+ task[:path] = path
148
+ if allocate_task_to_node job_map, task, node_addr
149
+ task[:node_addrs].delete [node_addr, path]
150
+ return true
151
+ end
152
+ end
153
+ return false
154
+ end
155
+
156
+ #
157
+ def allocate_reduce_task_to_node job_map, task
158
+ return false unless (node_addr = task[:node_addr])
159
+ if allocate_task_to_node job_map, task, node_addr
160
+ return true
161
+ else
162
+ return false
163
+ end
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,18 @@
1
+ module Process
2
+ class << self
3
+ unless method_defined?(:daemon)
4
+ def daemon(nochdir = nil, noclose = nil)
5
+ exit!(0) if Process.fork
6
+ Process.setsid
7
+ exit!(0) if Process.fork
8
+ Dir.chdir("/") unless nochdir
9
+ unless noclose
10
+ STDIN.reopen("/dev/null", "r")
11
+ STDOUT.reopen("/dev/null", "w")
12
+ STDERR.reopen("/dev/null", "w")
13
+ end
14
+ 0
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,137 @@
1
+ require 'logger'
2
+
3
+ module Log
4
+ IOBUF = {}
5
+
6
+ class Formatter
7
+ Format = "%s %s\n"
8
+ attr_accessor :datetime_format
9
+
10
+ def call severity, time, progname, msg
11
+ tstr = time.strftime("%Y-%m-%d %H:%M:%S")
12
+ Format % [tstr, msg2str(msg)]
13
+ end
14
+
15
+ def msg2str(msg)
16
+ case msg
17
+ when ::String
18
+ msg
19
+ when ::Exception
20
+ "#{ msg.message } (#{ msg.class })\n" <<
21
+ (msg.backtrace || []).join("\n")
22
+ else
23
+ msg.inspect
24
+ end
25
+ end
26
+ end
27
+
28
+ class << self
29
+ attr_accessor :logger
30
+ end
31
+
32
+ module_function
33
+
34
+ def init logdev, options={}
35
+ shift_age = options[:log_shift_age] || 8
36
+ shift_size = options[:log_shift_size] || 100_000_000
37
+ logger = Logger.new logdev, shift_age, shift_size
38
+ logger.formatter = Log::Formatter.new
39
+ logger.level = {'fatal'=>Logger::FATAL, 'error'=>Logger::ERROR,
40
+ 'warn'=>Logger::WARN, 'info'=>Logger::INFO, 'debug'=>Logger::DEBUG,
41
+ }[(options[:log_level] || '').downcase] || Logger::INFO
42
+ Log.logger = logger
43
+ end
44
+
45
+ def open logdev
46
+ if logdev == '-'
47
+ logdev = STDOUT
48
+ end
49
+ init logdev
50
+ end
51
+
52
+ def close grp=nil
53
+ if @logger
54
+ @logger.close
55
+ else
56
+ if grp
57
+ if (io = IOBUF[grp])
58
+ io.close
59
+ end
60
+ else
61
+ for io in IOBUF.values
62
+ io.close
63
+ end
64
+ end
65
+ end
66
+ end
67
+
68
+ def write grp, str, severity=nil
69
+ if @logger
70
+ if severity.kind_of? Symbol
71
+ severity = @logger.class.const_get severity
72
+ else
73
+ severity ||= @logger.class::INFO
74
+ end
75
+ begin
76
+ logger.add severity, str.chomp
77
+ rescue Logger::Error
78
+ disable
79
+ end
80
+ else
81
+ io = IOBUF[grp]
82
+ if io
83
+ if io.kind_of? File
84
+ #mtime = io.mtime
85
+ tstr = Time.now.strftime("%Y-%m-%d %H:%M:%S ")
86
+ gstr = (grp == :default) ? '' : "#{grp}: "
87
+ io.print tstr + gstr + str
88
+ else
89
+ io.print str
90
+ io.print "\n" if severity #XXX
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ def puts(*args)
97
+ if args.size >= 2 and args[0].kind_of?(Symbol)
98
+ grp = args.shift
99
+ else
100
+ grp = :default
101
+ end
102
+ str = args.join('').chomp + "\n"
103
+ self.write grp, str
104
+ end
105
+
106
+ def pp(*args)
107
+ if args.size >= 2 and args[0].kind_of?(Symbol)
108
+ self.puts args[0], args[1..-1].map {|e| e.inspect}.join(', ')
109
+ else
110
+ self.puts args.map {|e| e.inspect}.join(', ')
111
+ end
112
+ end
113
+
114
+ def debug(*args, &block)
115
+ write :default, args.first, :DEBUG
116
+ end
117
+ def info(*args, &block)
118
+ write :default, args.first, :INFO
119
+ end
120
+ def warn(*args, &block)
121
+ write :default, args.first, :WARN
122
+ end
123
+ def error(*args, &block)
124
+ write :default, args.first, :ERROR
125
+ end
126
+ def fatal(*args, &block)
127
+ write :default, args.first, :FATAL
128
+ end
129
+
130
+ def null(*args, &block)
131
+ end
132
+
133
+ def disable
134
+ @logger = nil
135
+ IOBUF.clear
136
+ end
137
+ end
@@ -0,0 +1,3 @@
1
+ module Pmux
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,91 @@
1
+ require 'timeout'
2
+
3
+ module Pmux
4
+ class Worker
5
+ def initialize options, client=nil
6
+ @options = options
7
+ unless (@client = client)
8
+ timeout(3) {
9
+ @client = init_client(options[:sock_path])
10
+ @client.timeout = 3600
11
+ }
12
+ end
13
+ end
14
+
15
+ def init_client sock_path
16
+ transport = MR::UNIXTransport.new
17
+ MR::Client.new transport, sock_path
18
+ end
19
+
20
+ def run
21
+ Log.debug "W#{$$}: run"
22
+ while true
23
+ task = @client.call :get_task
24
+ result = exec_task task
25
+ @client.call :put_result, result
26
+ end
27
+ Log.debug "W#{$$}: end"
28
+ rescue MR::TimeoutError
29
+ Log.debug "W#{$$}: request timed out"
30
+ rescue MR::TransportError
31
+ Log.debug "W#{$$}: transport closed"
32
+ end
33
+
34
+ def exec_task task
35
+ start_time = Time.now
36
+ if task['task_keys']
37
+ error_ids = []
38
+ fusion_id = task['task_id']
39
+ for task_id, file in task['task_keys']
40
+ ntask = task.merge 'fusion_id'=>fusion_id,
41
+ 'task_id'=>task_id, 'path'=>file
42
+ result = do_one_task ntask
43
+ result.update :task_id=>fusion_id, :task_keys=>task['task_keys']
44
+ end
45
+ else
46
+ result = do_one_task task
47
+ end
48
+ result[:welapse] = Time.now - start_time
49
+ result
50
+ end
51
+
52
+ def do_one_task task
53
+ job_id, task_id, node_addr =
54
+ task.values_at 'job_id', 'task_id', 'node_addr'
55
+ num_r = task['num_r'].to_i
56
+ result = {:job_id=>job_id, :task_id=>task_id, :node_addr=>node_addr}
57
+ exception = nil
58
+ error_level = nil
59
+ if @options and (otmp_dir = @options[:tmp_dir])
60
+ tmp_dir = "#{otmp_dir}/#{job_id}"
61
+ else
62
+ tmp_dir = "/var/tmp/#{job_id}"
63
+ end
64
+ begin
65
+ if task['pindex']
66
+ # reduce task
67
+ result[:reduce] = true
68
+ reducer = StreamingReducer.new task, tmp_dir
69
+ result[:output_path] = reducer.do_reduce_task
70
+ else
71
+ # map task
72
+ result[:map] = true
73
+ mapper = StreamingMapper.new task, tmp_dir
74
+ result[:ifbase] = mapper.do_map_task
75
+ if num_r.zero?
76
+ if (rsize = mapper.result_size)
77
+ result[:result_body] = mapper.result_body if rsize < 1024
78
+ else
79
+ result[:result_body] = ''
80
+ end
81
+ end
82
+ end
83
+ rescue StandardError => e
84
+ #q $!, $@[0] if $test
85
+ result.update :error=>e.class.to_s,
86
+ :error_message=>e.message, :backtrace=>e.backtrace[0]
87
+ end
88
+ result
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,19 @@
1
+ module Pmux
2
+ class Writer
3
+ CHUNK_SIZE = 8192
4
+
5
+ def write; end
6
+ def finish; end
7
+ end
8
+
9
+ class STDOUTWriter < Writer
10
+ def write path
11
+ open(path) {|f|
12
+ until f.eof?
13
+ data = f.read(CHUNK_SIZE)
14
+ STDOUT.write data
15
+ end
16
+ }
17
+ end
18
+ end
19
+ end
data/lib/pmux.rb ADDED
@@ -0,0 +1,27 @@
1
+ File.umask 0022
2
+ ENV['LC_ALL'] = 'C'
3
+ Encoding.default_external = 'ascii-8bit' if RUBY_VERSION > '1.9'
4
+
5
+ require 'pmux/version'
6
+ require 'pmux/q'
7
+ require 'pmux/util_logger'
8
+ require 'pmux/fiber18'
9
+ require 'pmux/plugin'
10
+ require 'pmux/mros'
11
+ require 'pmux/pipeio'
12
+ require 'pmux/storage_adapter'
13
+ require 'pmux/job'
14
+ require 'pmux/joblogger'
15
+ require 'pmux/fixcmd'
16
+ require 'pmux/mapper'
17
+ require 'pmux/reducer'
18
+ require 'pmux/worker'
19
+ require 'pmux/handler'
20
+ require 'pmux/writer'
21
+ require 'pmux/gatherer'
22
+ require 'pmux/task_queue'
23
+ require 'pmux/task_scheduler'
24
+ require 'pmux/task_dispatcher'
25
+ require 'pmux/multi_session'
26
+ require 'pmux/cleaner'
27
+ require 'pmux/application'
data/pmux.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "pmux/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "pmux"
7
+ s.version = Pmux::VERSION
8
+ s.authors = ["maebashi"]
9
+ s.homepage = ""
10
+ s.summary = %q{lightweight file-based MapReduce system}
11
+ s.description = %q{lightweight file-based MapReduce system}
12
+
13
+ s.rubyforge_project = "pmux"
14
+
15
+ s.files = `git ls-files`.split("\n").select {|e| /^tmp/!~e}
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ["lib"]
19
+
20
+ # specify any dependencies here; for example:
21
+
22
+ s.add_runtime_dependency "msgpack-rpc"
23
+ s.add_runtime_dependency "net-scp"
24
+ end