tem_mr_search 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +2 -0
- data/Manifest +8 -1
- data/bin/tem_mr_search_server +13 -0
- data/lib/tem_mr_search/client.rb +44 -0
- data/lib/tem_mr_search/db.rb +7 -0
- data/lib/tem_mr_search/map_reduce_executor.rb +17 -16
- data/lib/tem_mr_search/map_reduce_job.rb +85 -44
- data/lib/tem_mr_search/server.rb +74 -0
- data/lib/tem_mr_search/web_client_query_builder.rb +50 -0
- data/lib/tem_mr_search.rb +3 -0
- data/tem_mr_search.gemspec +7 -5
- data/test/mr_test_case.rb +11 -33
- data/test/test_client_server.rb +44 -0
- data/test/test_db.rb +9 -3
- data/test/test_map_reduce_executor.rb +8 -9
- data/test/test_map_reduce_job.rb +24 -4
- data/test/{test_query_builder.rb → test_query_builders.rb} +3 -3
- data/testdata/cluster.yml +3 -0
- data/testdata/empty_cluster.yml +2 -0
- data/testdata/fares.yml +44 -36
- metadata +18 -6
data/CHANGELOG
CHANGED
data/Manifest
CHANGED
@@ -1,10 +1,14 @@
|
|
1
|
+
bin/tem_mr_search_server
|
1
2
|
CHANGELOG
|
3
|
+
lib/tem_mr_search/client.rb
|
2
4
|
lib/tem_mr_search/client_query.rb
|
3
5
|
lib/tem_mr_search/db.rb
|
4
6
|
lib/tem_mr_search/map_reduce_executor.rb
|
5
7
|
lib/tem_mr_search/map_reduce_job.rb
|
6
8
|
lib/tem_mr_search/map_reduce_planner.rb
|
7
9
|
lib/tem_mr_search/query_builder.rb
|
10
|
+
lib/tem_mr_search/server.rb
|
11
|
+
lib/tem_mr_search/web_client_query_builder.rb
|
8
12
|
lib/tem_mr_search.rb
|
9
13
|
LICENSE
|
10
14
|
Manifest
|
@@ -12,11 +16,14 @@ Rakefile
|
|
12
16
|
README
|
13
17
|
tem_mr_search.gemspec
|
14
18
|
test/mr_test_case.rb
|
19
|
+
test/test_client_server.rb
|
15
20
|
test/test_db.rb
|
16
21
|
test/test_map_reduce_executor.rb
|
17
22
|
test/test_map_reduce_job.rb
|
18
23
|
test/test_map_reduce_planner.rb
|
19
|
-
test/
|
24
|
+
test/test_query_builders.rb
|
25
|
+
testdata/cluster.yml
|
26
|
+
testdata/empty_cluster.yml
|
20
27
|
testdata/fares.yml
|
21
28
|
testdata/parallel_plan_431.yml
|
22
29
|
testdata/parallel_plan_740.yml
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'tem_mr_search'
|
5
|
+
|
6
|
+
if ARGV.length < 2 || ARGV.length >= 3
|
7
|
+
print "Usage: #{$0} db_file cluster_file [server_port]\n"
|
8
|
+
exit
|
9
|
+
end
|
10
|
+
|
11
|
+
db_file, cluster_file, server_port = ARGV[0], ARGV[1], ARGV[2]
|
12
|
+
server = Tem::Mr::Search::Server.new db_file, cluster_file, server_port
|
13
|
+
server.serve_loop
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# :nodoc: namespace
|
2
|
+
module Tem::Mr::Search
|
3
|
+
|
4
|
+
class Client
|
5
|
+
OP = Zerg::Support::Protocols::ObjectProtocol
|
6
|
+
OPAdapter = Zerg::Support::Sockets::ProtocolAdapter.adapter_module OP
|
7
|
+
|
8
|
+
# Performs a private database search using a Map-Reduce.
|
9
|
+
def self.search(server_addr, client_query)
|
10
|
+
output = issue_request server_addr, :type => :search, :root_tem => 0,
|
11
|
+
:map_reduce => client_query.to_hash
|
12
|
+
client_query.unpack_output output
|
13
|
+
end
|
14
|
+
|
15
|
+
# Asks for an item in the server's database.
|
16
|
+
#
|
17
|
+
# In production, there should be per-client rate-limiting on this request.
|
18
|
+
def self.fetch_item(server_addr, item_id)
|
19
|
+
issue_request server_addr, :type => :fetch, :id => item_id
|
20
|
+
end
|
21
|
+
|
22
|
+
# Terminates the server.
|
23
|
+
#
|
24
|
+
# In production, normal clients wouldn't have access to this.
|
25
|
+
def self.shutdown_server(server_addr)
|
26
|
+
issue_request server_addr, :type => :shutdown
|
27
|
+
end
|
28
|
+
|
29
|
+
# Issues a request against a Map-Reduce server and returns the response.
|
30
|
+
def self.issue_request(server_addr, request)
|
31
|
+
socket = Zerg::Support::SocketFactory.socket :out_addr => server_addr
|
32
|
+
socket.extend OPAdapter
|
33
|
+
begin
|
34
|
+
socket.send_object request
|
35
|
+
response = socket.recv_object response
|
36
|
+
rescue
|
37
|
+
response = nil
|
38
|
+
end
|
39
|
+
socket.close rescue nil
|
40
|
+
response
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end # namespace Tem::Mr::Search
|
data/lib/tem_mr_search/db.rb
CHANGED
@@ -3,8 +3,11 @@ module Tem::Mr::Search
|
|
3
3
|
|
4
4
|
class Db
|
5
5
|
attr_reader :data
|
6
|
+
attr_reader :id_attribute
|
7
|
+
|
6
8
|
def initialize(path)
|
7
9
|
@data = File.open(path, 'r') { |f| YAML.load f }
|
10
|
+
@id_attribute = 'flight'
|
8
11
|
end
|
9
12
|
|
10
13
|
def length
|
@@ -14,6 +17,10 @@ class Db
|
|
14
17
|
def item(item_index)
|
15
18
|
@data[item_index]
|
16
19
|
end
|
20
|
+
|
21
|
+
def item_by_id(item_id)
|
22
|
+
@data.find { |item| item[@id_attribute] == item_id }
|
23
|
+
end
|
17
24
|
end
|
18
25
|
|
19
26
|
end # namespace Tem::Mr::search
|
@@ -8,12 +8,12 @@ class MapReduceExecutor
|
|
8
8
|
# Creates an executor for a Map-Reduce job.
|
9
9
|
#
|
10
10
|
# Arguments:
|
11
|
-
#
|
11
|
+
# job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
|
12
12
|
# db:: the database to run Map-Reduce over
|
13
13
|
# tems:: sessions to the available TEMs
|
14
14
|
# root_tem:: the index of the TEM that has the root mapper and reducer
|
15
15
|
# planner_class:: (optional) replacement for the default planner strategy
|
16
|
-
def initialize(
|
16
|
+
def initialize(job, db, tems, root_tem, planner_class = nil)
|
17
17
|
planner_class ||= MapReducePlanner
|
18
18
|
|
19
19
|
@db = db # Writable only in main thread.
|
@@ -23,9 +23,9 @@ class MapReduceExecutor
|
|
23
23
|
@planner = planner_class.new @job, db.length, tems.length, root_tem
|
24
24
|
|
25
25
|
# Protected by @lock
|
26
|
-
@
|
27
|
-
|
28
|
-
|
26
|
+
@tem_parts = { :mapper => { root_tem => job.mapper },
|
27
|
+
:reducer => { root_tem => job.reducer },
|
28
|
+
:finalizer => { root_tem => job.finalizer } }
|
29
29
|
# Protected by @lock
|
30
30
|
@outputs = {}
|
31
31
|
|
@@ -69,41 +69,42 @@ class MapReduceExecutor
|
|
69
69
|
def execute_action(action, tem_index)
|
70
70
|
case action[:action]
|
71
71
|
when :migrate
|
72
|
-
|
73
|
-
|
72
|
+
in_part = @lock.synchronize { @tem_parts[action[:secpack]][tem_index] }
|
73
|
+
out_part = in_part # TODO(costan): actual migration
|
74
74
|
@lock.synchronize do
|
75
|
-
@
|
75
|
+
@tem_parts[action[:secpack]][action[:to]] = out_part
|
76
76
|
end
|
77
77
|
|
78
78
|
when :map
|
79
|
-
|
79
|
+
mapper, item = nil, nil
|
80
80
|
@lock.synchronize do
|
81
|
-
|
81
|
+
mapper = @tem_parts[:mapper][tem_index]
|
82
82
|
item = @db.item(action[:item])
|
83
83
|
end
|
84
|
-
output =
|
84
|
+
output = mapper.map_object item, @tems[tem_index]
|
85
85
|
@lock.synchronize do
|
86
86
|
@outputs[action[:output_id]] = output
|
87
87
|
end
|
88
88
|
|
89
89
|
when :reduce
|
90
|
-
|
90
|
+
reducer, output1, output2 = nil, nil, nil
|
91
91
|
@lock.synchronize do
|
92
|
-
|
92
|
+
reducer = @tem_parts[:reducer][tem_index]
|
93
93
|
output1 = @outputs[action[:output1_id]]
|
94
94
|
output2 = @outputs[action[:output2_id]]
|
95
95
|
end
|
96
|
-
output =
|
96
|
+
output = reducer.reduce_outputs output1, output2, @tems[tem_index]
|
97
97
|
@lock.synchronize do
|
98
98
|
@outputs[action[:output_id]] = output
|
99
99
|
end
|
100
100
|
|
101
101
|
when :finalize
|
102
|
+
finalizer = nil
|
102
103
|
@lock.synchronize do
|
103
|
-
|
104
|
+
finalizer = @tem_parts[:finalizer][tem_index]
|
104
105
|
output = @outputs[action[:output_id]]
|
105
106
|
end
|
106
|
-
final_output =
|
107
|
+
final_output = finalizer.finalize_output output, @tems[tem_index]
|
107
108
|
@lock.synchronize do
|
108
109
|
@outputs[action[:final_id]] = final_output
|
109
110
|
end
|
@@ -2,66 +2,107 @@
|
|
2
2
|
module Tem::Mr::Search
|
3
3
|
|
4
4
|
class MapReduceJob
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :mapper, :reducer, :finalizer, :attributes, :id_attribute
|
6
6
|
|
7
7
|
def initialize(attributes)
|
8
|
-
@map_secpack = attributes[:map]
|
9
|
-
@reduce_secpack = attributes[:reduce]
|
10
|
-
@finalize_secpack = attributes[:finalize]
|
11
8
|
@attributes = attributes[:attributes]
|
12
9
|
@id_attribute = attributes[:id_attribute]
|
10
|
+
|
11
|
+
@mapper = Mapper.new attributes[:map], self
|
12
|
+
@reducer = Reducer.new attributes[:reduce], self
|
13
|
+
@finalizer = Finalizer.new attributes[:finalize], self
|
13
14
|
end
|
14
15
|
|
15
|
-
#
|
16
|
-
def
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
name, type = attribute[:name], attribute[:type]
|
23
|
-
secpack.set_value name.to_sym, type, object_data[name.to_s]
|
24
|
-
end
|
25
|
-
secpack
|
16
|
+
# Unpacks a decrypted output into its components.
|
17
|
+
def unpack_decrypted_output(output)
|
18
|
+
{
|
19
|
+
:id => output[0, 8].reverse.pack('C*').unpack('q').first,
|
20
|
+
:score => Tem::Abi.read_tem_short(output, 8),
|
21
|
+
:check => output[13, 3]
|
22
|
+
}
|
26
23
|
end
|
27
24
|
|
28
|
-
#
|
29
|
-
|
30
|
-
|
31
|
-
|
25
|
+
# Serializes a job to a hash.
|
26
|
+
#
|
27
|
+
# Useful in conjunction with ObjectProtocol in ZergSupport, for sending jobs
|
28
|
+
# across the wire. De-serialize with MapReduceJob#new
|
29
|
+
def to_hash
|
30
|
+
{ :attributes => @attributes, :id_attribute => @id_attribute,
|
31
|
+
:map => @mapper.to_plain_object, :reduce => @reducer.to_plain_object,
|
32
|
+
:finalize => @finalizer.to_plain_object }
|
32
33
|
end
|
33
34
|
|
34
|
-
#
|
35
|
-
|
36
|
-
secpack
|
35
|
+
# Base class for the Map-Reduce SECpack wrappers.
|
36
|
+
class JobPart
|
37
|
+
def initialize(secpack, job)
|
38
|
+
unless secpack.nil? or secpack.kind_of? Tem::SecPack
|
39
|
+
secpack = Tem::SecPack.new_from_array secpack
|
40
|
+
end
|
41
|
+
@secpack = secpack
|
42
|
+
end
|
37
43
|
|
38
|
-
|
39
|
-
|
40
|
-
|
44
|
+
def to_plain_object
|
45
|
+
return nil if @secpack.nil?
|
46
|
+
@secpack.to_array
|
47
|
+
end
|
41
48
|
end
|
49
|
+
|
50
|
+
# Wrapper for the map SECpack.
|
51
|
+
class Mapper < JobPart
|
52
|
+
def initialize(secpack, job)
|
53
|
+
super
|
54
|
+
@attributes = job.attributes
|
55
|
+
@id_attribute = job.id_attribute
|
56
|
+
end
|
57
|
+
|
58
|
+
# Returns a SECpack for mapping the given object data into the query.
|
59
|
+
def map_for_object(object_data)
|
60
|
+
return nil unless @secpack
|
61
|
+
object_id = object_data[@id_attribute.to_s]
|
62
|
+
new_secpack = Tem::SecPack.new_from_array @secpack.to_array
|
63
|
+
new_secpack.set_bytes :_id, [object_id].pack('q').unpack('C*').reverse
|
64
|
+
@attributes.each do |attribute|
|
65
|
+
name, type = attribute[:name], attribute[:type]
|
66
|
+
new_secpack.set_value name.to_sym, type, object_data[name.to_s]
|
67
|
+
end
|
68
|
+
new_secpack
|
69
|
+
end
|
70
|
+
|
71
|
+
# Maps the given object into the query.
|
72
|
+
def map_object(object_data, tem)
|
73
|
+
secpack = map_for_object object_data
|
74
|
+
secpack ? tem.execute(secpack) : object_data
|
75
|
+
end
|
76
|
+
end
|
42
77
|
|
43
|
-
#
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
78
|
+
# Wrapper for the reduce SECpack.
|
79
|
+
class Reducer < JobPart
|
80
|
+
# Returns a SECpack for reducing two inputs coming from maps or other reduces.
|
81
|
+
def reduce_for_outputs(output1, output2)
|
82
|
+
new_secpack = Tem::SecPack.new_from_array @secpack.to_array
|
83
|
+
|
84
|
+
new_secpack.set_bytes :_output1, output1
|
85
|
+
new_secpack.set_bytes :_output2, output2
|
86
|
+
new_secpack
|
87
|
+
end
|
48
88
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
tem.execute secpack
|
89
|
+
# Reduces two inputs coming from maps or other reduces.
|
90
|
+
def reduce_outputs(output1, output2, tem)
|
91
|
+
secpack = reduce_for_outputs output1, output2
|
92
|
+
tem.execute secpack
|
93
|
+
end
|
55
94
|
end
|
56
95
|
|
57
|
-
#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
96
|
+
# Wrapper for the finalize SECpack.
|
97
|
+
class Finalizer < JobPart
|
98
|
+
# Converts a map/reduce output into the final result for the operation.
|
99
|
+
def finalize_output(output, tem)
|
100
|
+
return output unless @secpack
|
101
|
+
secpack = Tem::SecPack.new_from_array @finalize_secpack.to_array
|
102
|
+
secpack.set_bytes :_output, output
|
103
|
+
tem.execute secpack
|
104
|
+
end
|
105
|
+
end
|
65
106
|
end
|
66
107
|
|
67
108
|
end # namespace Tem::Mr::search
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
# :nodoc: namespace
|
5
|
+
module Tem::Mr::Search
|
6
|
+
|
7
|
+
class Server
|
8
|
+
OP = Zerg::Support::Protocols::ObjectProtocol
|
9
|
+
OPAdapter = Zerg::Support::Sockets::ProtocolAdapter.adapter_module OP
|
10
|
+
|
11
|
+
# Creates a new Map-Reduce server (master).
|
12
|
+
def initialize(db_file, cluster_file, port)
|
13
|
+
@logger = Logger.new STDERR
|
14
|
+
@db = Db.new db_file
|
15
|
+
@tems = Server.tems_from_cluster_file cluster_file
|
16
|
+
@port = port
|
17
|
+
end
|
18
|
+
|
19
|
+
# This server's loop.
|
20
|
+
def serve_loop
|
21
|
+
listen_socket = Zerg::Support::SocketFactory.socket :in_port => @port
|
22
|
+
listen_socket.listen
|
23
|
+
shutdown_received = false
|
24
|
+
until shutdown_received
|
25
|
+
begin
|
26
|
+
client_socket, client_addr = listen_socket.accept
|
27
|
+
client_socket.extend OPAdapter
|
28
|
+
request = client_socket.recv_object
|
29
|
+
begin
|
30
|
+
response = process_request request
|
31
|
+
rescue Exception => e
|
32
|
+
@logger.error e
|
33
|
+
response = nil
|
34
|
+
end
|
35
|
+
client_socket.send_object response if response
|
36
|
+
shutdown_received = true if response == :shutdown
|
37
|
+
rescue Exception => e
|
38
|
+
@logger.error e
|
39
|
+
end
|
40
|
+
client_socket.close rescue nil
|
41
|
+
end
|
42
|
+
listen_socket.close
|
43
|
+
end
|
44
|
+
|
45
|
+
# Computes the response of a single request.
|
46
|
+
def process_request(request)
|
47
|
+
case request[:type]
|
48
|
+
when :search
|
49
|
+
job = MapReduceJob.new request[:map_reduce]
|
50
|
+
root_tem = request[:root_tem]
|
51
|
+
executor = MapReduceExecutor.new job, @db, @tems, root_tem
|
52
|
+
return executor.execute
|
53
|
+
when :fetch
|
54
|
+
return @db.item_by_id(request[:id]) || :not_found
|
55
|
+
when :shutdown
|
56
|
+
return :shutdown
|
57
|
+
else
|
58
|
+
return :unknown
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Creates sessions to all the TEMs in a cluster.
|
63
|
+
def self.tems_from_cluster_file(cluster_file)
|
64
|
+
cluster_hosts = File.open(cluster_file, 'r') { |f| YAML.load f }
|
65
|
+
cluster_configs = cluster_hosts.map { |host|
|
66
|
+
Tem::MultiProxy::Client.query_tems host
|
67
|
+
}.flatten
|
68
|
+
cluster_configs.reject { |config| config.nil? }.map do |config|
|
69
|
+
Tem::Session.new Tem::Transport::AutoConfigurator.try_transport(config)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end # namespace Tem::Mr::Search
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# :nodoc: namespace
|
2
|
+
module Tem::Mr::Search
|
3
|
+
|
4
|
+
class WebClientQueryBuilder < MapReduceJob
|
5
|
+
# Builds a client query covering preferences expressed in the Web UI.
|
6
|
+
#
|
7
|
+
# The supported (and required) options are:
|
8
|
+
# layovers_cost:: the cost of each layover
|
9
|
+
# start_time_cost:: the cost of the flight's departure time, in minutes
|
10
|
+
# duration_cost:: the cost of each minute of flying
|
11
|
+
def self.query(options)
|
12
|
+
QueryBuilder.query { |q|
|
13
|
+
q.attributes :price => :tem_short, :start_time => :tem_short,
|
14
|
+
:end_time => :tem_short, :layovers => :tem_short
|
15
|
+
q.id_attribute :flight
|
16
|
+
|
17
|
+
# Score: 20000 - price - layover_cost * layovers -
|
18
|
+
# start_time * start_time_cost -
|
19
|
+
# (end_time - start_time) * duration_cost
|
20
|
+
q.map { |s|
|
21
|
+
s.ldwc 20000
|
22
|
+
s.ldw :price
|
23
|
+
s.sub
|
24
|
+
s.ldw :end_time
|
25
|
+
s.ldw :start_time
|
26
|
+
s.sub
|
27
|
+
s.ldwc options[:duration_cost]
|
28
|
+
s.mul
|
29
|
+
s.sub
|
30
|
+
[:start_time, :layovers].each do |factor|
|
31
|
+
s.ldw factor
|
32
|
+
s.ldwc options[:"#{factor}_cost"]
|
33
|
+
s.mul
|
34
|
+
s.sub
|
35
|
+
end
|
36
|
+
s.stw :score
|
37
|
+
}
|
38
|
+
|
39
|
+
# The greater score wins.
|
40
|
+
q.reduce { |s|
|
41
|
+
s.ldw :score1
|
42
|
+
s.ldw :score2
|
43
|
+
s.cmp
|
44
|
+
s.stw :comparison
|
45
|
+
}
|
46
|
+
}
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end # namespace Tem::Mr::search
|
data/lib/tem_mr_search.rb
CHANGED
@@ -9,9 +9,12 @@ end
|
|
9
9
|
module Tem::Mr::Search
|
10
10
|
end
|
11
11
|
|
12
|
+
require 'tem_mr_search/client.rb'
|
12
13
|
require 'tem_mr_search/db.rb'
|
13
14
|
require 'tem_mr_search/map_reduce_executor.rb'
|
14
15
|
require 'tem_mr_search/map_reduce_job.rb'
|
15
16
|
require 'tem_mr_search/map_reduce_planner.rb'
|
16
17
|
require 'tem_mr_search/query_builder.rb'
|
17
18
|
require 'tem_mr_search/client_query.rb'
|
19
|
+
require 'tem_mr_search/server.rb'
|
20
|
+
require 'tem_mr_search/web_client_query_builder.rb'
|
data/tem_mr_search.gemspec
CHANGED
@@ -2,22 +2,24 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{tem_mr_search}
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.2"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Victor Costan"]
|
9
|
-
s.date = %q{2009-06-
|
9
|
+
s.date = %q{2009-06-04}
|
10
|
+
s.default_executable = %q{tem_mr_search_server}
|
10
11
|
s.description = %q{Tem Map-Reduce proof of concept: database search.}
|
11
12
|
s.email = %q{victor@costan.us}
|
12
|
-
s.
|
13
|
-
s.
|
13
|
+
s.executables = ["tem_mr_search_server"]
|
14
|
+
s.extra_rdoc_files = ["bin/tem_mr_search_server", "CHANGELOG", "lib/tem_mr_search/client.rb", "lib/tem_mr_search/client_query.rb", "lib/tem_mr_search/db.rb", "lib/tem_mr_search/map_reduce_executor.rb", "lib/tem_mr_search/map_reduce_job.rb", "lib/tem_mr_search/map_reduce_planner.rb", "lib/tem_mr_search/query_builder.rb", "lib/tem_mr_search/server.rb", "lib/tem_mr_search/web_client_query_builder.rb", "lib/tem_mr_search.rb", "LICENSE", "README"]
|
15
|
+
s.files = ["bin/tem_mr_search_server", "CHANGELOG", "lib/tem_mr_search/client.rb", "lib/tem_mr_search/client_query.rb", "lib/tem_mr_search/db.rb", "lib/tem_mr_search/map_reduce_executor.rb", "lib/tem_mr_search/map_reduce_job.rb", "lib/tem_mr_search/map_reduce_planner.rb", "lib/tem_mr_search/query_builder.rb", "lib/tem_mr_search/server.rb", "lib/tem_mr_search/web_client_query_builder.rb", "lib/tem_mr_search.rb", "LICENSE", "Manifest", "Rakefile", "README", "tem_mr_search.gemspec", "test/mr_test_case.rb", "test/test_client_server.rb", "test/test_db.rb", "test/test_map_reduce_executor.rb", "test/test_map_reduce_job.rb", "test/test_map_reduce_planner.rb", "test/test_query_builders.rb", "testdata/cluster.yml", "testdata/empty_cluster.yml", "testdata/fares.yml", "testdata/parallel_plan_431.yml", "testdata/parallel_plan_740.yml", "testdata/serial_plan_410.yml", "testdata/serial_plan_431.yml", "testdata/serial_plan_740.yml"]
|
14
16
|
s.homepage = %q{http://tem.rubyforge.org}
|
15
17
|
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Tem_mr_search", "--main", "README"]
|
16
18
|
s.require_paths = ["lib"]
|
17
19
|
s.rubyforge_project = %q{tem}
|
18
20
|
s.rubygems_version = %q{1.3.4}
|
19
21
|
s.summary = %q{Tem Map-Reduce proof of concept: database search.}
|
20
|
-
s.test_files = ["test/test_db.rb", "test/test_map_reduce_executor.rb", "test/test_map_reduce_job.rb", "test/test_map_reduce_planner.rb", "test/
|
22
|
+
s.test_files = ["test/test_client_server.rb", "test/test_db.rb", "test/test_map_reduce_executor.rb", "test/test_map_reduce_job.rb", "test/test_map_reduce_planner.rb", "test/test_query_builders.rb"]
|
21
23
|
|
22
24
|
if s.respond_to? :specification_version then
|
23
25
|
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
data/test/mr_test_case.rb
CHANGED
@@ -9,43 +9,21 @@ class MrTestCase < Test::Unit::TestCase
|
|
9
9
|
|
10
10
|
Thread.abort_on_exception = true
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
@db_path = File.join File.dirname(__FILE__), "..", "testdata", "fares.yml"
|
13
|
+
@cluster_file = File.join File.dirname(__FILE__), "..", "testdata",
|
14
|
+
"cluster.yml"
|
15
|
+
@empty_cluster_file = File.join File.dirname(__FILE__), "..", "testdata",
|
16
|
+
"empty_cluster.yml"
|
17
|
+
@db = Db.new @db_path
|
15
18
|
|
16
|
-
@client_query =
|
17
|
-
|
18
|
-
|
19
|
-
q.id_attribute :flight
|
20
|
-
|
21
|
-
# Score: 200 + start / 100 - duration - price
|
22
|
-
q.map { |s|
|
23
|
-
s.ldwc 200
|
24
|
-
s.ldw :start
|
25
|
-
s.ldbc 100
|
26
|
-
s.div
|
27
|
-
s.add
|
28
|
-
s.ldw :end
|
29
|
-
s.ldw :start
|
30
|
-
s.sub
|
31
|
-
s.sub
|
32
|
-
s.ldw :price
|
33
|
-
s.sub
|
34
|
-
s.stw :score
|
35
|
-
}
|
36
|
-
|
37
|
-
# The greater score wins.
|
38
|
-
q.reduce { |s|
|
39
|
-
s.ldw :score1
|
40
|
-
s.ldw :score2
|
41
|
-
s.cmp
|
42
|
-
s.stw :comparison
|
43
|
-
}
|
44
|
-
}
|
19
|
+
@client_query = WebClientQueryBuilder.query :layovers_cost => 1000,
|
20
|
+
:start_time_cost => -1,
|
21
|
+
:duration_cost => 1
|
45
22
|
end
|
46
23
|
|
47
24
|
def fare_score(fare)
|
48
|
-
|
25
|
+
20000 + fare['start_time'] - fare['price'] - (fare['end_time'] -
|
26
|
+
fare['start_time']) - fare['layovers'] * 1000
|
49
27
|
end
|
50
28
|
|
51
29
|
def fare_id(fare)
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'test/mr_test_case'
|
2
|
+
require 'flexmock/test_unit'
|
3
|
+
|
4
|
+
|
5
|
+
class ClientServerTest < MrTestCase
|
6
|
+
Client = Tem::Mr::Search::Client
|
7
|
+
Server = Tem::Mr::Search::Server
|
8
|
+
|
9
|
+
def setup
|
10
|
+
super
|
11
|
+
@server_port = 29552
|
12
|
+
end
|
13
|
+
|
14
|
+
def _test_request
|
15
|
+
Thread.new do
|
16
|
+
Server.new(@db_path, @empty_cluster_file, @server_port).serve_loop
|
17
|
+
end
|
18
|
+
sleep 0.1
|
19
|
+
yield "localhost:#{@server_port}"
|
20
|
+
Client.shutdown_server "localhost:#{@server_port}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_fetch_item
|
24
|
+
_test_request do |server_addr|
|
25
|
+
fetched_item = Client.fetch_item server_addr, fare_id(@db.item(3))
|
26
|
+
assert_equal @db.item(3), fetched_item, 'Fetch fail'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_query
|
31
|
+
Tem.auto_conf
|
32
|
+
@server_port = 29553
|
33
|
+
flexmock(Server).should_receive(:tems_from_cluster_file).
|
34
|
+
with(@empty_cluster_file).and_return [$tem]
|
35
|
+
_test_request do |server_addr|
|
36
|
+
result = Client.search server_addr, @client_query
|
37
|
+
gold_item = @db.item 5
|
38
|
+
assert_equal fare_id(gold_item), result[:id],
|
39
|
+
'Incorrect Map-Reduce result (ID)'
|
40
|
+
assert_equal fare_score(gold_item), result[:score],
|
41
|
+
'Incorrect Map-Reduce result (score)'
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/test/test_db.rb
CHANGED
@@ -2,9 +2,15 @@ require 'test/mr_test_case'
|
|
2
2
|
|
3
3
|
class DbTest < MrTestCase
|
4
4
|
def test_loading
|
5
|
-
assert_equal
|
6
|
-
gold_item = {
|
7
|
-
|
5
|
+
assert_equal 8, @db.length, 'Number of items in the database'
|
6
|
+
gold_item = {"price" => 2500, "from" => "BOS", "to" => "TPE",
|
7
|
+
"flight" => 15, "layovers"=>2, "end_time"=>2100,
|
8
|
+
"start_time"=>900}
|
8
9
|
assert_equal gold_item, @db.item(0), 'First database item'
|
9
10
|
end
|
11
|
+
|
12
|
+
def test_by_id
|
13
|
+
assert_equal 18, @db.item_by_id(18)['flight'], 'Finding existing item by ID'
|
14
|
+
assert_equal nil, @db.item_by_id(5), 'Finding non-existing item'
|
15
|
+
end
|
10
16
|
end
|
@@ -14,21 +14,20 @@ class MapReduceExecutorTest < MrTestCase
|
|
14
14
|
executor = MRExecutor.new @client_query, @db, tems, root_tem
|
15
15
|
packed_output = executor.execute
|
16
16
|
result = @client_query.unpack_output packed_output
|
17
|
-
|
17
|
+
gold_item = @db.item 5
|
18
|
+
assert_equal fare_id(gold_item), result[:id],
|
19
|
+
'Incorrect Map-Reduce result (ID)'
|
20
|
+
assert_equal fare_score(gold_item), result[:score],
|
21
|
+
'Incorrect Map-Reduce result (score)'
|
18
22
|
end
|
19
23
|
|
20
24
|
def test_executor_with_autoconf
|
21
25
|
_test_executor [$tem], 0
|
22
26
|
end
|
23
27
|
|
24
|
-
def test_executor_with_cluster
|
25
|
-
|
26
|
-
|
27
|
-
}.flatten
|
28
|
-
assert_equal 8, cluster_config.length, 'Incorrect cluster setup'
|
29
|
-
tems = cluster_config.map do |config|
|
30
|
-
Tem::Session.new Tem::Transport::AutoConfigurator.try_transport(config)
|
31
|
-
end
|
28
|
+
def test_executor_with_cluster
|
29
|
+
tems = Tem::Mr::Search::Server.tems_from_cluster_file @cluster_file
|
30
|
+
assert_equal 8, tems.length, 'Incorrect cluster setup'
|
32
31
|
|
33
32
|
tems.each { |tem| tem.activate; tem.emit }
|
34
33
|
|
data/test/test_map_reduce_job.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'test/mr_test_case'
|
2
2
|
|
3
3
|
class MapReduceJobTest < MrTestCase
|
4
|
+
MapReduceJob = Tem::Mr::Search::MapReduceJob
|
5
|
+
|
4
6
|
def setup
|
5
7
|
super
|
6
8
|
|
@@ -12,20 +14,24 @@ class MapReduceJobTest < MrTestCase
|
|
12
14
|
|
13
15
|
def test_map_for_object
|
14
16
|
obj = @obj1.merge 'flight' => 0x12345678
|
15
|
-
secpack = @client_query.map_for_object obj
|
17
|
+
secpack = @client_query.mapper.map_for_object obj
|
16
18
|
|
17
19
|
assert_equal [0, 0, 0, 0, 0x12, 0x34, 0x56, 0x78],
|
18
20
|
secpack.get_bytes(:_id, 8), 'Object ID embedded incorrectly'
|
19
21
|
assert_equal @obj1['price'], secpack.get_value(:price, :tem_short),
|
20
22
|
'Price embedded incorrectly'
|
21
|
-
assert_equal @obj1['
|
23
|
+
assert_equal @obj1['layovers'], secpack.get_value(:layovers, :tem_short),
|
24
|
+
'Layover count embedded incorrectly'
|
25
|
+
assert_equal @obj1['start_time'],
|
26
|
+
secpack.get_value(:start_time, :tem_short),
|
22
27
|
'Starting time embedded incorrectly'
|
23
|
-
assert_equal @obj1['
|
28
|
+
assert_equal @obj1['end_time'],
|
29
|
+
secpack.get_value(:end_time, :tem_short),
|
24
30
|
'Ending time embedded incorrectly'
|
25
31
|
end
|
26
32
|
|
27
33
|
def test_reduce_for_outputs
|
28
|
-
secpack = @client_query.reduce_for_outputs @output1, @output2
|
34
|
+
secpack = @client_query.reducer.reduce_for_outputs @output1, @output2
|
29
35
|
|
30
36
|
assert_equal @output1, secpack.get_bytes(:_output1, 16),
|
31
37
|
'Output1 embedded incorrectly'
|
@@ -41,5 +47,19 @@ class MapReduceJobTest < MrTestCase
|
|
41
47
|
assert_equal 0x12345678, output[:id], 'ID incorrectly unpacked'
|
42
48
|
assert_equal 0x3141, output[:score], 'Score incorrectly unpacked'
|
43
49
|
assert_equal [0xBE, 0xEF, 0xFE], output[:check], 'Check bytes'
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class SerializedMapReduceJobTest < MapReduceJobTest
|
54
|
+
# Serialization is successful if a de-serialized job can pass all the tests.
|
55
|
+
|
56
|
+
def setup
|
57
|
+
super
|
58
|
+
@hash = @client_query.to_hash
|
59
|
+
@client_query = MapReduceJob.new @hash
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_serialized_form
|
63
|
+
assert_equal Hash, @hash.class, 'Serialization did not produce a plain hash'
|
44
64
|
end
|
45
65
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'test/mr_test_case'
|
2
2
|
|
3
|
-
class
|
3
|
+
class QueryBuildersTest < MrTestCase
|
4
4
|
def setup
|
5
5
|
super
|
6
6
|
Tem.auto_conf
|
@@ -9,7 +9,7 @@ class QueryBuilderTest < MrTestCase
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def _test_map_fare(fare)
|
12
|
-
enc_output = @client_query.map_object fare, $tem
|
12
|
+
enc_output = @client_query.mapper.map_object fare, $tem
|
13
13
|
output = @client_query.unpack_output enc_output
|
14
14
|
assert_equal fare_id(fare), output[:id], 'Object ID incorrectly encoded'
|
15
15
|
assert_equal fare_score(fare), output[:score],
|
@@ -26,7 +26,7 @@ class QueryBuilderTest < MrTestCase
|
|
26
26
|
win_fare = (fare_score(fare1) > fare_score(fare2)) ? fare1 : fare2
|
27
27
|
# Try both permutations to ensure all branches of the reduce code work.
|
28
28
|
[[output1, output2], [output2, output1]].each do |o1, o2|
|
29
|
-
enc_output = @client_query.reduce_outputs o1, o2, $tem
|
29
|
+
enc_output = @client_query.reducer.reduce_outputs o1, o2, $tem
|
30
30
|
output = @client_query.unpack_output enc_output
|
31
31
|
assert_equal fare_id(win_fare), output[:id], 'The wrong fare won (bad ID)'
|
32
32
|
assert_equal fare_score(win_fare), output[:score],
|
data/testdata/fares.yml
CHANGED
@@ -1,49 +1,57 @@
|
|
1
1
|
---
|
2
2
|
- from: BOS
|
3
|
-
to:
|
4
|
-
price:
|
5
|
-
|
6
|
-
|
3
|
+
to: TPE
|
4
|
+
price: 2500
|
5
|
+
start_time: 900
|
6
|
+
end_time: 2100
|
7
|
+
layovers: 2
|
7
8
|
flight: 15
|
8
9
|
- from: BOS
|
9
|
-
to:
|
10
|
-
price:
|
11
|
-
|
12
|
-
|
10
|
+
to: TPE
|
11
|
+
price: 2500
|
12
|
+
start_time: 1100
|
13
|
+
end_time: 2300
|
14
|
+
layovers: 2
|
13
15
|
flight: 16
|
14
16
|
- from: BOS
|
15
|
-
to:
|
16
|
-
price:
|
17
|
-
|
18
|
-
|
17
|
+
to: TPE
|
18
|
+
price: 3000
|
19
|
+
start_time: 900
|
20
|
+
end_time: 1900
|
21
|
+
layovers: 1
|
19
22
|
flight: 17
|
20
23
|
- from: BOS
|
21
|
-
to:
|
22
|
-
price:
|
23
|
-
|
24
|
-
|
24
|
+
to: TPE
|
25
|
+
price: 3000
|
26
|
+
start_time: 1100
|
27
|
+
end_time: 2100
|
28
|
+
layovers: 1
|
25
29
|
flight: 18
|
26
30
|
- from: BOS
|
27
|
-
to:
|
28
|
-
price:
|
29
|
-
|
30
|
-
|
31
|
+
to: TPE
|
32
|
+
price: 4000
|
33
|
+
start_time: 900
|
34
|
+
end_time: 1700
|
35
|
+
layovers: 0
|
36
|
+
flight: 19
|
37
|
+
- from: BOS
|
38
|
+
to: TPE
|
39
|
+
price: 4000
|
40
|
+
start_time: 1100
|
41
|
+
end_time: 1900
|
42
|
+
layovers: 0
|
43
|
+
flight: 20
|
44
|
+
- from: BOS
|
45
|
+
to: TPE
|
46
|
+
price: 2400
|
47
|
+
start_time: 900
|
48
|
+
end_time: 2100
|
49
|
+
layovers: 2
|
31
50
|
flight: 21
|
32
51
|
- from: BOS
|
33
|
-
to:
|
34
|
-
price:
|
35
|
-
|
36
|
-
|
52
|
+
to: TPE
|
53
|
+
price: 2400
|
54
|
+
start_time: 1100
|
55
|
+
end_time: 2300
|
56
|
+
layovers: 2
|
37
57
|
flight: 22
|
38
|
-
- from: BOS
|
39
|
-
to: NYC
|
40
|
-
price: 210
|
41
|
-
start: 1200
|
42
|
-
end: 1245
|
43
|
-
flight: 23
|
44
|
-
- from: BOS
|
45
|
-
to: NYC
|
46
|
-
price: 210
|
47
|
-
start: 1700
|
48
|
-
end: 1745
|
49
|
-
flight: 24
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tem_mr_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.2"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Costan
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-06-
|
12
|
+
date: 2009-06-04 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -34,29 +34,37 @@ dependencies:
|
|
34
34
|
version:
|
35
35
|
description: "Tem Map-Reduce proof of concept: database search."
|
36
36
|
email: victor@costan.us
|
37
|
-
executables:
|
38
|
-
|
37
|
+
executables:
|
38
|
+
- tem_mr_search_server
|
39
39
|
extensions: []
|
40
40
|
|
41
41
|
extra_rdoc_files:
|
42
|
+
- bin/tem_mr_search_server
|
42
43
|
- CHANGELOG
|
44
|
+
- lib/tem_mr_search/client.rb
|
43
45
|
- lib/tem_mr_search/client_query.rb
|
44
46
|
- lib/tem_mr_search/db.rb
|
45
47
|
- lib/tem_mr_search/map_reduce_executor.rb
|
46
48
|
- lib/tem_mr_search/map_reduce_job.rb
|
47
49
|
- lib/tem_mr_search/map_reduce_planner.rb
|
48
50
|
- lib/tem_mr_search/query_builder.rb
|
51
|
+
- lib/tem_mr_search/server.rb
|
52
|
+
- lib/tem_mr_search/web_client_query_builder.rb
|
49
53
|
- lib/tem_mr_search.rb
|
50
54
|
- LICENSE
|
51
55
|
- README
|
52
56
|
files:
|
57
|
+
- bin/tem_mr_search_server
|
53
58
|
- CHANGELOG
|
59
|
+
- lib/tem_mr_search/client.rb
|
54
60
|
- lib/tem_mr_search/client_query.rb
|
55
61
|
- lib/tem_mr_search/db.rb
|
56
62
|
- lib/tem_mr_search/map_reduce_executor.rb
|
57
63
|
- lib/tem_mr_search/map_reduce_job.rb
|
58
64
|
- lib/tem_mr_search/map_reduce_planner.rb
|
59
65
|
- lib/tem_mr_search/query_builder.rb
|
66
|
+
- lib/tem_mr_search/server.rb
|
67
|
+
- lib/tem_mr_search/web_client_query_builder.rb
|
60
68
|
- lib/tem_mr_search.rb
|
61
69
|
- LICENSE
|
62
70
|
- Manifest
|
@@ -64,11 +72,14 @@ files:
|
|
64
72
|
- README
|
65
73
|
- tem_mr_search.gemspec
|
66
74
|
- test/mr_test_case.rb
|
75
|
+
- test/test_client_server.rb
|
67
76
|
- test/test_db.rb
|
68
77
|
- test/test_map_reduce_executor.rb
|
69
78
|
- test/test_map_reduce_job.rb
|
70
79
|
- test/test_map_reduce_planner.rb
|
71
|
-
- test/
|
80
|
+
- test/test_query_builders.rb
|
81
|
+
- testdata/cluster.yml
|
82
|
+
- testdata/empty_cluster.yml
|
72
83
|
- testdata/fares.yml
|
73
84
|
- testdata/parallel_plan_431.yml
|
74
85
|
- testdata/parallel_plan_740.yml
|
@@ -109,8 +120,9 @@ signing_key:
|
|
109
120
|
specification_version: 3
|
110
121
|
summary: "Tem Map-Reduce proof of concept: database search."
|
111
122
|
test_files:
|
123
|
+
- test/test_client_server.rb
|
112
124
|
- test/test_db.rb
|
113
125
|
- test/test_map_reduce_executor.rb
|
114
126
|
- test/test_map_reduce_job.rb
|
115
127
|
- test/test_map_reduce_planner.rb
|
116
|
-
- test/
|
128
|
+
- test/test_query_builders.rb
|