costan-tem_mr_search 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -0
- data/LICENSE +21 -0
- data/Manifest +32 -0
- data/README +4 -0
- data/Rakefile +23 -0
- data/bin/tem_mr_search_server +13 -0
- data/lib/tem_mr_search/client.rb +52 -0
- data/lib/tem_mr_search/client_query.rb +21 -0
- data/lib/tem_mr_search/db.rb +26 -0
- data/lib/tem_mr_search/map_reduce_executor.rb +115 -0
- data/lib/tem_mr_search/map_reduce_job.rb +108 -0
- data/lib/tem_mr_search/map_reduce_planner.rb +169 -0
- data/lib/tem_mr_search/query_builder.rb +167 -0
- data/lib/tem_mr_search/server.rb +90 -0
- data/lib/tem_mr_search/web_client_query_builder.rb +50 -0
- data/lib/tem_mr_search.rb +20 -0
- data/tem_mr_search.gemspec +39 -0
- data/test/mr_test_case.rb +36 -0
- data/test/test_client_server.rb +57 -0
- data/test/test_db.rb +16 -0
- data/test/test_map_reduce_executor.rb +40 -0
- data/test/test_map_reduce_job.rb +65 -0
- data/test/test_map_reduce_planner.rb +55 -0
- data/test/test_query_builders.rb +40 -0
- data/testdata/cluster.yml +3 -0
- data/testdata/empty_cluster.yml +2 -0
- data/testdata/fares.yml +57 -0
- data/testdata/parallel_plan_431.yml +52 -0
- data/testdata/parallel_plan_740.yml +87 -0
- data/testdata/serial_plan_410.yml +36 -0
- data/testdata/serial_plan_431.yml +56 -0
- data/testdata/serial_plan_740.yml +93 -0
- metadata +126 -0
data/CHANGELOG
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2009 Massachusetts Institute of Technology
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/Manifest
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
bin/tem_mr_search_server
|
|
2
|
+
CHANGELOG
|
|
3
|
+
lib/tem_mr_search/client.rb
|
|
4
|
+
lib/tem_mr_search/client_query.rb
|
|
5
|
+
lib/tem_mr_search/db.rb
|
|
6
|
+
lib/tem_mr_search/map_reduce_executor.rb
|
|
7
|
+
lib/tem_mr_search/map_reduce_job.rb
|
|
8
|
+
lib/tem_mr_search/map_reduce_planner.rb
|
|
9
|
+
lib/tem_mr_search/query_builder.rb
|
|
10
|
+
lib/tem_mr_search/server.rb
|
|
11
|
+
lib/tem_mr_search/web_client_query_builder.rb
|
|
12
|
+
lib/tem_mr_search.rb
|
|
13
|
+
LICENSE
|
|
14
|
+
Manifest
|
|
15
|
+
Rakefile
|
|
16
|
+
README
|
|
17
|
+
tem_mr_search.gemspec
|
|
18
|
+
test/mr_test_case.rb
|
|
19
|
+
test/test_client_server.rb
|
|
20
|
+
test/test_db.rb
|
|
21
|
+
test/test_map_reduce_executor.rb
|
|
22
|
+
test/test_map_reduce_job.rb
|
|
23
|
+
test/test_map_reduce_planner.rb
|
|
24
|
+
test/test_query_builders.rb
|
|
25
|
+
testdata/cluster.yml
|
|
26
|
+
testdata/empty_cluster.yml
|
|
27
|
+
testdata/fares.yml
|
|
28
|
+
testdata/parallel_plan_431.yml
|
|
29
|
+
testdata/parallel_plan_740.yml
|
|
30
|
+
testdata/serial_plan_410.yml
|
|
31
|
+
testdata/serial_plan_431.yml
|
|
32
|
+
testdata/serial_plan_740.yml
|
data/README
ADDED
data/Rakefile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
gem 'echoe'
|
|
3
|
+
require 'echoe'
|
|
4
|
+
|
|
5
|
+
Echoe.new('tem_mr_search') do |p|
|
|
6
|
+
p.project = 'tem' # rubyforge project
|
|
7
|
+
p.docs_host = "costan@rubyforge.org:/var/www/gforge-projects/tem/rdoc/"
|
|
8
|
+
|
|
9
|
+
p.author = 'Victor Costan'
|
|
10
|
+
p.email = 'victor@costan.us'
|
|
11
|
+
p.summary = 'Tem Map-Reduce proof of concept: database search.'
|
|
12
|
+
p.url = 'http://tem.rubyforge.org'
|
|
13
|
+
p.dependencies = ['tem_ruby >=0.11.2', 'tem_multi_proxy >=0.2']
|
|
14
|
+
|
|
15
|
+
p.need_tar_gz = !Platform.windows?
|
|
16
|
+
p.need_zip = !Platform.windows?
|
|
17
|
+
p.rdoc_pattern = /^(lib|bin|tasks|ext)|^BUILD|^README|^CHANGELOG|^TODO|^LICENSE|^COPYING$/
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
if $0 == __FILE__
|
|
21
|
+
Rake.application = Rake::Application.new
|
|
22
|
+
Rake.application.run
|
|
23
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'rubygems'
|
|
4
|
+
require 'tem_mr_search'
|
|
5
|
+
|
|
6
|
+
if ARGV.length < 2 || ARGV.length >= 3
|
|
7
|
+
print "Usage: #{$0} db_file cluster_file [server_port]\n"
|
|
8
|
+
exit
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
db_file, cluster_file, server_port = ARGV[0], ARGV[1], ARGV[2]
|
|
12
|
+
server = Tem::Mr::Search::Server.new db_file, cluster_file, server_port
|
|
13
|
+
server.serve_loop
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# :nodoc: namespace
|
|
2
|
+
module Tem::Mr::Search
|
|
3
|
+
|
|
4
|
+
class Client
|
|
5
|
+
OP = Zerg::Support::Protocols::ObjectProtocol
|
|
6
|
+
OPAdapter = Zerg::Support::Sockets::ProtocolAdapter.adapter_module OP
|
|
7
|
+
|
|
8
|
+
# Performs a private database search using a Map-Reduce.
|
|
9
|
+
def self.search(server_addr, client_query)
|
|
10
|
+
output = issue_request server_addr, :type => :search, :root_tem => 0,
|
|
11
|
+
:map_reduce => client_query.to_hash
|
|
12
|
+
output ? client_query.unpack_output(output) : nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Asks for an item in the server's database.
|
|
16
|
+
#
|
|
17
|
+
# In production, there should be per-client rate-limiting on this request.
|
|
18
|
+
def self.fetch_item(server_addr, item_id)
|
|
19
|
+
issue_request server_addr, :type => :fetch, :id => item_id
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Terminates the server.
|
|
23
|
+
#
|
|
24
|
+
# In production, normal clients wouldn't have access to this.
|
|
25
|
+
def self.shutdown_server(server_addr)
|
|
26
|
+
issue_request server_addr, :type => :shutdown
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Dumps the server database.
|
|
30
|
+
#
|
|
31
|
+
# In production, normal clients wouldn't have access to this.
|
|
32
|
+
def self.dump_database(server_addr)
|
|
33
|
+
issue_request server_addr, :type => :db_dump
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Issues a request against a Map-Reduce server and returns the response.
|
|
37
|
+
def self.issue_request(server_addr, request)
|
|
38
|
+
socket = Zerg::Support::SocketFactory.socket :out_addr => server_addr,
|
|
39
|
+
:out_port => Server::DEFAULT_PORT
|
|
40
|
+
socket.extend OPAdapter
|
|
41
|
+
begin
|
|
42
|
+
socket.send_object request
|
|
43
|
+
response = socket.recv_object response
|
|
44
|
+
rescue
|
|
45
|
+
response = nil
|
|
46
|
+
end
|
|
47
|
+
socket.close rescue nil
|
|
48
|
+
response
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end # namespace Tem::Mr::Search
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# :nodoc: namespace
|
|
2
|
+
module Tem::Mr::Search
|
|
3
|
+
|
|
4
|
+
class ClientQuery < MapReduceJob
|
|
5
|
+
def initialize(attributes)
|
|
6
|
+
super
|
|
7
|
+
@query_key = attributes[:key]
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Unpacks a reduce output into its components.
|
|
11
|
+
#
|
|
12
|
+
# This is expected to be called with the encrypted output returned by the
|
|
13
|
+
# search provider.
|
|
14
|
+
def unpack_output(output)
|
|
15
|
+
# TODO(costan): decrypt output once we enable encryption
|
|
16
|
+
decrypted_output = output
|
|
17
|
+
unpack_decrypted_output decrypted_output
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end # namespace Tem::Mr::search
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# :nodoc: namespace
|
|
2
|
+
module Tem::Mr::Search
|
|
3
|
+
|
|
4
|
+
class Db
|
|
5
|
+
attr_reader :data
|
|
6
|
+
attr_reader :id_attribute
|
|
7
|
+
|
|
8
|
+
def initialize(path)
|
|
9
|
+
@data = File.open(path, 'r') { |f| YAML.load f }
|
|
10
|
+
@id_attribute = 'flight'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def length
|
|
14
|
+
@data.length
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def item(item_index)
|
|
18
|
+
@data[item_index]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def item_by_id(item_id)
|
|
22
|
+
@data.find { |item| item[@id_attribute] == item_id }
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end # namespace Tem::Mr::search
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
require 'thread'
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# :nodoc: namespace
|
|
5
|
+
module Tem::Mr::Search
|
|
6
|
+
|
|
7
|
+
class MapReduceExecutor
|
|
8
|
+
# Creates an executor for a Map-Reduce job.
|
|
9
|
+
#
|
|
10
|
+
# Arguments:
|
|
11
|
+
# job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
|
|
12
|
+
# db:: the database to run Map-Reduce over
|
|
13
|
+
# tems:: sessions to the available TEMs
|
|
14
|
+
# root_tem:: the index of the TEM that has the root mapper and reducer
|
|
15
|
+
# planner_class:: (optional) replacement for the default planner strategy
|
|
16
|
+
def initialize(job, db, tems, root_tem, planner_class = nil)
|
|
17
|
+
planner_class ||= MapReducePlanner
|
|
18
|
+
|
|
19
|
+
@db = db # Writable only in main thread.
|
|
20
|
+
@tems = tems # Writable only in main thread.
|
|
21
|
+
|
|
22
|
+
# Writable only in main thread.
|
|
23
|
+
@planner = planner_class.new @job, db.length, tems.length, root_tem
|
|
24
|
+
|
|
25
|
+
# Protected by @lock
|
|
26
|
+
@tem_parts = { :mapper => { root_tem => job.mapper },
|
|
27
|
+
:reducer => { root_tem => job.reducer },
|
|
28
|
+
:finalizer => { root_tem => job.finalizer } }
|
|
29
|
+
# Protected by @lock
|
|
30
|
+
@outputs = {}
|
|
31
|
+
|
|
32
|
+
# Thread-safe.
|
|
33
|
+
@thread_queues = tems.map { |tem| Queue.new }
|
|
34
|
+
@main_queue = Queue.new
|
|
35
|
+
@lock = Mutex.new
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Executes the job.
|
|
39
|
+
def execute
|
|
40
|
+
# Spawn TEM threads.
|
|
41
|
+
@tems.each_index { |i| Thread.new(i) { |i| executor_thread i } }
|
|
42
|
+
|
|
43
|
+
until @planner.done?
|
|
44
|
+
actions = @planner.next_actions!
|
|
45
|
+
@lock.synchronize do
|
|
46
|
+
actions.each { |action| @thread_queues[action[:with]] << action }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
action = @main_queue.pop
|
|
50
|
+
@planner.action_done action
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
return @outputs[@planner.output_id]
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Main method for thread in charge of a TEM.
|
|
57
|
+
def executor_thread(tem_index)
|
|
58
|
+
queue = @thread_queues[tem_index]
|
|
59
|
+
while action = queue.pop
|
|
60
|
+
execute_action action, tem_index
|
|
61
|
+
@main_queue << action
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Executes a Map-Reduce planner action.
|
|
66
|
+
#
|
|
67
|
+
# This method is called on the thread corresponding to the TEM that the action
|
|
68
|
+
# is supposed to execute on.
|
|
69
|
+
def execute_action(action, tem_index)
|
|
70
|
+
case action[:action]
|
|
71
|
+
when :migrate
|
|
72
|
+
in_part = @lock.synchronize { @tem_parts[action[:secpack]][tem_index] }
|
|
73
|
+
out_part = in_part # TODO(costan): actual migration
|
|
74
|
+
@lock.synchronize do
|
|
75
|
+
@tem_parts[action[:secpack]][action[:to]] = out_part
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
when :map
|
|
79
|
+
mapper, item = nil, nil
|
|
80
|
+
@lock.synchronize do
|
|
81
|
+
mapper = @tem_parts[:mapper][tem_index]
|
|
82
|
+
item = @db.item(action[:item])
|
|
83
|
+
end
|
|
84
|
+
output = mapper.map_object item, @tems[tem_index]
|
|
85
|
+
@lock.synchronize do
|
|
86
|
+
@outputs[action[:output_id]] = output
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
when :reduce
|
|
90
|
+
reducer, output1, output2 = nil, nil, nil
|
|
91
|
+
@lock.synchronize do
|
|
92
|
+
reducer = @tem_parts[:reducer][tem_index]
|
|
93
|
+
output1 = @outputs[action[:output1_id]]
|
|
94
|
+
output2 = @outputs[action[:output2_id]]
|
|
95
|
+
end
|
|
96
|
+
output = reducer.reduce_outputs output1, output2, @tems[tem_index]
|
|
97
|
+
@lock.synchronize do
|
|
98
|
+
@outputs[action[:output_id]] = output
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
when :finalize
|
|
102
|
+
finalizer = nil
|
|
103
|
+
@lock.synchronize do
|
|
104
|
+
finalizer = @tem_parts[:finalizer][tem_index]
|
|
105
|
+
output = @outputs[action[:output_id]]
|
|
106
|
+
end
|
|
107
|
+
final_output = finalizer.finalize_output output, @tems[tem_index]
|
|
108
|
+
@lock.synchronize do
|
|
109
|
+
@outputs[action[:final_id]] = final_output
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
end # namespace Tem::Mr::Search
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# :nodoc: namespace
|
|
2
|
+
module Tem::Mr::Search
|
|
3
|
+
|
|
4
|
+
class MapReduceJob
|
|
5
|
+
attr_reader :mapper, :reducer, :finalizer, :attributes, :id_attribute
|
|
6
|
+
|
|
7
|
+
def initialize(attributes)
|
|
8
|
+
@attributes = attributes[:attributes]
|
|
9
|
+
@id_attribute = attributes[:id_attribute]
|
|
10
|
+
|
|
11
|
+
@mapper = Mapper.new attributes[:map], self
|
|
12
|
+
@reducer = Reducer.new attributes[:reduce], self
|
|
13
|
+
@finalizer = Finalizer.new attributes[:finalize], self
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Unpacks a decrypted output into its components.
|
|
17
|
+
def unpack_decrypted_output(output)
|
|
18
|
+
{
|
|
19
|
+
:id => output[0, 8].reverse.pack('C*').unpack('q').first,
|
|
20
|
+
:score => Tem::Abi.read_tem_short(output, 8),
|
|
21
|
+
:check => output[13, 3]
|
|
22
|
+
}
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Serializes a job to a hash.
|
|
26
|
+
#
|
|
27
|
+
# Useful in conjunction with ObjectProtocol in ZergSupport, for sending jobs
|
|
28
|
+
# across the wire. De-serialize with MapReduceJob#new
|
|
29
|
+
def to_hash
|
|
30
|
+
{ :attributes => @attributes, :id_attribute => @id_attribute,
|
|
31
|
+
:map => @mapper.to_plain_object, :reduce => @reducer.to_plain_object,
|
|
32
|
+
:finalize => @finalizer.to_plain_object }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Base class for the Map-Reduce SECpack wrappers.
|
|
36
|
+
class JobPart
|
|
37
|
+
def initialize(secpack, job)
|
|
38
|
+
unless secpack.nil? or secpack.kind_of? Tem::SecPack
|
|
39
|
+
secpack = Tem::SecPack.new_from_array secpack
|
|
40
|
+
end
|
|
41
|
+
@secpack = secpack
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def to_plain_object
|
|
45
|
+
return nil if @secpack.nil?
|
|
46
|
+
@secpack.to_array
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Wrapper for the map SECpack.
|
|
51
|
+
class Mapper < JobPart
|
|
52
|
+
def initialize(secpack, job)
|
|
53
|
+
super
|
|
54
|
+
@attributes = job.attributes
|
|
55
|
+
@id_attribute = job.id_attribute
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Returns a SECpack for mapping the given object data into the query.
|
|
59
|
+
def map_for_object(object_data)
|
|
60
|
+
return nil unless @secpack
|
|
61
|
+
object_id = object_data[@id_attribute.to_s]
|
|
62
|
+
new_secpack = Tem::SecPack.new_from_array @secpack.to_array
|
|
63
|
+
new_secpack.set_bytes :_id, [object_id].pack('q').unpack('C*').reverse
|
|
64
|
+
@attributes.each do |attribute|
|
|
65
|
+
name, type = attribute[:name], attribute[:type]
|
|
66
|
+
new_secpack.set_value name.to_sym, type, object_data[name.to_s]
|
|
67
|
+
end
|
|
68
|
+
new_secpack
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Maps the given object into the query.
|
|
72
|
+
def map_object(object_data, tem)
|
|
73
|
+
secpack = map_for_object object_data
|
|
74
|
+
secpack ? tem.execute(secpack) : object_data
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Wrapper for the reduce SECpack.
|
|
79
|
+
class Reducer < JobPart
|
|
80
|
+
# Returns a SECpack for reducing two inputs coming from maps or other reduces.
|
|
81
|
+
def reduce_for_outputs(output1, output2)
|
|
82
|
+
new_secpack = Tem::SecPack.new_from_array @secpack.to_array
|
|
83
|
+
|
|
84
|
+
new_secpack.set_bytes :_output1, output1
|
|
85
|
+
new_secpack.set_bytes :_output2, output2
|
|
86
|
+
new_secpack
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Reduces two inputs coming from maps or other reduces.
|
|
90
|
+
def reduce_outputs(output1, output2, tem)
|
|
91
|
+
secpack = reduce_for_outputs output1, output2
|
|
92
|
+
tem.execute secpack
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Wrapper for the finalize SECpack.
|
|
97
|
+
class Finalizer < JobPart
|
|
98
|
+
# Converts a map/reduce output into the final result for the operation.
|
|
99
|
+
def finalize_output(output, tem)
|
|
100
|
+
return output unless @secpack
|
|
101
|
+
secpack = Tem::SecPack.new_from_array @finalize_secpack.to_array
|
|
102
|
+
secpack.set_bytes :_output, output
|
|
103
|
+
tem.execute secpack
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
end # namespace Tem::Mr::search
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
require 'rbtree'
|
|
2
|
+
require 'set'
|
|
3
|
+
|
|
4
|
+
# :nodoc: namespace
|
|
5
|
+
module Tem::Mr::Search
|
|
6
|
+
|
|
7
|
+
class MapReducePlanner
|
|
8
|
+
# Creates a planner for a Map-Reduce job.
|
|
9
|
+
#
|
|
10
|
+
# Arguments:
|
|
11
|
+
# job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
|
|
12
|
+
# num_items: how many data items does the Map-Reduce run over
|
|
13
|
+
# num_tems:: how many TEMs are available
|
|
14
|
+
# root_tem:: the index of the TEM that has the root mapper and reducer
|
|
15
|
+
def initialize(job, num_items, num_tems, root_tem)
|
|
16
|
+
@job = job
|
|
17
|
+
@root_tem = root_tem
|
|
18
|
+
|
|
19
|
+
@without = { :mapper => RBTree.new, :reducer => RBTree.new }
|
|
20
|
+
@with = { :mapper => Set.new([root_tem]),
|
|
21
|
+
:reducer => Set.new([root_tem]) }
|
|
22
|
+
@free_tems = RBTree.new
|
|
23
|
+
0.upto(num_tems - 1) do |tem|
|
|
24
|
+
@free_tems[tem] = true
|
|
25
|
+
next if tem == root_tem
|
|
26
|
+
@without.each { |k, v| v[tem] = true }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
@unmapped_items = (0...num_items).to_a.reverse
|
|
30
|
+
@reduce_queue = RBTree.new
|
|
31
|
+
@last_output_id = 0
|
|
32
|
+
@last_reduce_id = 2 * num_items - 2
|
|
33
|
+
@done_reducing, @output_id = false, nil
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Generates migrating actions for a SECpack type that are possible now.
|
|
37
|
+
def migrate_actions(sec_type)
|
|
38
|
+
actions = []
|
|
39
|
+
return actions if @without[sec_type].length == 0
|
|
40
|
+
free_tems = free_tems_with_sec sec_type
|
|
41
|
+
free_tems.each do |source_tem|
|
|
42
|
+
break if @without[sec_type].length == 0
|
|
43
|
+
target_tem = @without[sec_type].min.first
|
|
44
|
+
@without[sec_type].delete target_tem
|
|
45
|
+
@free_tems.delete source_tem
|
|
46
|
+
actions.push :action => :migrate, :secpack => sec_type,
|
|
47
|
+
:with => source_tem, :to => target_tem
|
|
48
|
+
end
|
|
49
|
+
actions
|
|
50
|
+
end
|
|
51
|
+
private :migrate_actions
|
|
52
|
+
|
|
53
|
+
# Informs the planner that a SECpack migration has completed.
|
|
54
|
+
def done_migrating(action)
|
|
55
|
+
@free_tems[action[:with]] = true
|
|
56
|
+
@with[action[:secpack]] << action[:to]
|
|
57
|
+
end
|
|
58
|
+
private :done_migrating
|
|
59
|
+
|
|
60
|
+
# A sorted array of the free TEMs that have a SECpack type.
|
|
61
|
+
def free_tems_with_sec(sec_type)
|
|
62
|
+
tems = []
|
|
63
|
+
@free_tems.each do |tem, true_value|
|
|
64
|
+
tems << tem if @with[sec_type].include? tem
|
|
65
|
+
end
|
|
66
|
+
tems
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# A unique output_id.
|
|
70
|
+
def next_output_id
|
|
71
|
+
next_id = @last_output_id
|
|
72
|
+
@last_output_id += 1
|
|
73
|
+
next_id
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Generates mapping actions possible right now.
|
|
77
|
+
def map_actions
|
|
78
|
+
actions = []
|
|
79
|
+
return actions if @unmapped_items.empty?
|
|
80
|
+
free_tems_with_sec(:mapper).each do |tem|
|
|
81
|
+
break unless item = @unmapped_items.pop
|
|
82
|
+
@free_tems.delete tem
|
|
83
|
+
actions.push :action => :map, :item => item, :with => tem,
|
|
84
|
+
:output_id => next_output_id
|
|
85
|
+
end
|
|
86
|
+
actions
|
|
87
|
+
end
|
|
88
|
+
private :map_actions
|
|
89
|
+
|
|
90
|
+
# Informs the planner that a data mapping has completed.
|
|
91
|
+
def done_mapping(action)
|
|
92
|
+
@free_tems[action[:with]] = true
|
|
93
|
+
@reduce_queue[action[:output_id]] = true
|
|
94
|
+
end
|
|
95
|
+
private :done_mapping
|
|
96
|
+
|
|
97
|
+
# Generates reducing actions possible right now.
|
|
98
|
+
def reduce_actions
|
|
99
|
+
actions = []
|
|
100
|
+
return actions if @reduce_queue.length <= 1
|
|
101
|
+
free_tems_with_sec(:reducer).each do |tem|
|
|
102
|
+
break if @reduce_queue.length <= 1
|
|
103
|
+
output1_id, output2_id = *[0, 1].map do |i|
|
|
104
|
+
output_id = @reduce_queue.min.first
|
|
105
|
+
@reduce_queue.delete output_id
|
|
106
|
+
output_id
|
|
107
|
+
end
|
|
108
|
+
@free_tems.delete tem
|
|
109
|
+
actions.push :action => :reduce, :with => tem, :output1_id => output1_id,
|
|
110
|
+
:output2_id => output2_id, :output_id => next_output_id
|
|
111
|
+
end
|
|
112
|
+
actions
|
|
113
|
+
end
|
|
114
|
+
private :reduce_actions
|
|
115
|
+
|
|
116
|
+
# Informs the planner that a data reduction has completed.
|
|
117
|
+
def done_reducing(action)
|
|
118
|
+
@free_tems[action[:with]] = true
|
|
119
|
+
if action[:output_id] == @last_reduce_id
|
|
120
|
+
@done_reducing = true
|
|
121
|
+
return
|
|
122
|
+
end
|
|
123
|
+
@reduce_queue[action[:output_id]] = true
|
|
124
|
+
end
|
|
125
|
+
private :done_reducing
|
|
126
|
+
|
|
127
|
+
# Generates finalizing actions possible right now.
|
|
128
|
+
def finalize_actions
|
|
129
|
+
return [] unless @done_reducing and !@output_id and @free_tems[@root_tem]
|
|
130
|
+
@finalize_ready = false
|
|
131
|
+
return [ :action => :finalize, :with => @root_tem,
|
|
132
|
+
:output_id => @last_reduce_id, :final_id => next_output_id ]
|
|
133
|
+
end
|
|
134
|
+
private :finalize_actions
|
|
135
|
+
|
|
136
|
+
# Informs the planner that an action issued by next_action was done.
|
|
137
|
+
def done_finalizing(action)
|
|
138
|
+
@free_tems[action[:with]] = true
|
|
139
|
+
@output_id = action[:final_id]
|
|
140
|
+
end
|
|
141
|
+
private :done_finalizing
|
|
142
|
+
|
|
143
|
+
# True when the Map-Reduce job is complete.
|
|
144
|
+
def done?
|
|
145
|
+
!@output_id.nil?
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# The output ID of the Map-Reduce's final result.
|
|
149
|
+
attr_reader :output_id
|
|
150
|
+
|
|
151
|
+
# Informs the planner that an action issued by next_actions was completed.
|
|
152
|
+
def action_done(action)
|
|
153
|
+
dispatch = { :migrate => :done_migrating, :map => :done_mapping, :reduce =>
|
|
154
|
+
:done_reducing, :finalize => :done_finalizing }
|
|
155
|
+
self.send dispatch[action[:action]], action
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Issues a set of actions that can be performed right now.
|
|
159
|
+
def next_actions!
|
|
160
|
+
actions = migrate_actions :mapper
|
|
161
|
+
actions += migrate_actions :reducer
|
|
162
|
+
actions += map_actions
|
|
163
|
+
actions += reduce_actions
|
|
164
|
+
actions += finalize_actions
|
|
165
|
+
actions
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
end # namespace Tem::Mr::search
|