tem_mr_search 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1 +1,3 @@
1
+ v0.2. Implemented remote queries.
2
+
1
3
  v0.1. Initial release.
data/Manifest CHANGED
@@ -1,10 +1,14 @@
1
+ bin/tem_mr_search_server
1
2
  CHANGELOG
3
+ lib/tem_mr_search/client.rb
2
4
  lib/tem_mr_search/client_query.rb
3
5
  lib/tem_mr_search/db.rb
4
6
  lib/tem_mr_search/map_reduce_executor.rb
5
7
  lib/tem_mr_search/map_reduce_job.rb
6
8
  lib/tem_mr_search/map_reduce_planner.rb
7
9
  lib/tem_mr_search/query_builder.rb
10
+ lib/tem_mr_search/server.rb
11
+ lib/tem_mr_search/web_client_query_builder.rb
8
12
  lib/tem_mr_search.rb
9
13
  LICENSE
10
14
  Manifest
@@ -12,11 +16,14 @@ Rakefile
12
16
  README
13
17
  tem_mr_search.gemspec
14
18
  test/mr_test_case.rb
19
+ test/test_client_server.rb
15
20
  test/test_db.rb
16
21
  test/test_map_reduce_executor.rb
17
22
  test/test_map_reduce_job.rb
18
23
  test/test_map_reduce_planner.rb
19
- test/test_query_builder.rb
24
+ test/test_query_builders.rb
25
+ testdata/cluster.yml
26
+ testdata/empty_cluster.yml
20
27
  testdata/fares.yml
21
28
  testdata/parallel_plan_431.yml
22
29
  testdata/parallel_plan_740.yml
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'tem_mr_search'
5
+
6
+ if ARGV.length < 2 || ARGV.length >= 3
7
+ print "Usage: #{$0} db_file cluster_file [server_port]\n"
8
+ exit
9
+ end
10
+
11
+ db_file, cluster_file, server_port = ARGV[0], ARGV[1], ARGV[2]
12
+ server = Tem::Mr::Search::Server.new db_file, cluster_file, server_port
13
+ server.serve_loop
@@ -0,0 +1,44 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class Client
5
+ OP = Zerg::Support::Protocols::ObjectProtocol
6
+ OPAdapter = Zerg::Support::Sockets::ProtocolAdapter.adapter_module OP
7
+
8
+ # Performs a private database search using a Map-Reduce.
9
+ def self.search(server_addr, client_query)
10
+ output = issue_request server_addr, :type => :search, :root_tem => 0,
11
+ :map_reduce => client_query.to_hash
12
+ client_query.unpack_output output
13
+ end
14
+
15
+ # Asks for an item in the server's database.
16
+ #
17
+ # In production, there should be per-client rate-limiting on this request.
18
+ def self.fetch_item(server_addr, item_id)
19
+ issue_request server_addr, :type => :fetch, :id => item_id
20
+ end
21
+
22
+ # Terminates the server.
23
+ #
24
+ # In production, normal clients wouldn't have access to this.
25
+ def self.shutdown_server(server_addr)
26
+ issue_request server_addr, :type => :shutdown
27
+ end
28
+
29
+ # Issues a request against a Map-Reduce server and returns the response.
30
+ def self.issue_request(server_addr, request)
31
+ socket = Zerg::Support::SocketFactory.socket :out_addr => server_addr
32
+ socket.extend OPAdapter
33
+ begin
34
+ socket.send_object request
35
+ response = socket.recv_object response
36
+ rescue
37
+ response = nil
38
+ end
39
+ socket.close rescue nil
40
+ response
41
+ end
42
+ end
43
+
44
+ end # namespace Tem::Mr::Search
@@ -3,8 +3,11 @@ module Tem::Mr::Search
3
3
 
4
4
  class Db
5
5
  attr_reader :data
6
+ attr_reader :id_attribute
7
+
6
8
  def initialize(path)
7
9
  @data = File.open(path, 'r') { |f| YAML.load f }
10
+ @id_attribute = 'flight'
8
11
  end
9
12
 
10
13
  def length
@@ -14,6 +17,10 @@ class Db
14
17
  def item(item_index)
15
18
  @data[item_index]
16
19
  end
20
+
21
+ def item_by_id(item_id)
22
+ @data.find { |item| item[@id_attribute] == item_id }
23
+ end
17
24
  end
18
25
 
19
26
  end # namespace Tem::Mr::search
@@ -8,12 +8,12 @@ class MapReduceExecutor
8
8
  # Creates an executor for a Map-Reduce job.
9
9
  #
10
10
  # Arguments:
11
- # root_job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
11
+ # job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
12
12
  # db:: the database to run Map-Reduce over
13
13
  # tems:: sessions to the available TEMs
14
14
  # root_tem:: the index of the TEM that has the root mapper and reducer
15
15
  # planner_class:: (optional) replacement for the default planner strategy
16
- def initialize(root_job, db, tems, root_tem, planner_class = nil)
16
+ def initialize(job, db, tems, root_tem, planner_class = nil)
17
17
  planner_class ||= MapReducePlanner
18
18
 
19
19
  @db = db # Writable only in main thread.
@@ -23,9 +23,9 @@ class MapReduceExecutor
23
23
  @planner = planner_class.new @job, db.length, tems.length, root_tem
24
24
 
25
25
  # Protected by @lock
26
- @tem_jobs = { :mapper => { root_tem => root_job },
27
- :reducer => { root_tem => root_job },
28
- :finalizer => { root_tem => root_job } }
26
+ @tem_parts = { :mapper => { root_tem => job.mapper },
27
+ :reducer => { root_tem => job.reducer },
28
+ :finalizer => { root_tem => job.finalizer } }
29
29
  # Protected by @lock
30
30
  @outputs = {}
31
31
 
@@ -69,41 +69,42 @@ class MapReduceExecutor
69
69
  def execute_action(action, tem_index)
70
70
  case action[:action]
71
71
  when :migrate
72
- in_job = @lock.synchronize { @tem_jobs[action[:secpack]][tem_index] }
73
- out_job = in_job # TODO(costan): actual migration
72
+ in_part = @lock.synchronize { @tem_parts[action[:secpack]][tem_index] }
73
+ out_part = in_part # TODO(costan): actual migration
74
74
  @lock.synchronize do
75
- @tem_jobs[action[:secpack]][action[:to]] = out_job
75
+ @tem_parts[action[:secpack]][action[:to]] = out_part
76
76
  end
77
77
 
78
78
  when :map
79
- job, item = nil, nil
79
+ mapper, item = nil, nil
80
80
  @lock.synchronize do
81
- job = @tem_jobs[:mapper][tem_index]
81
+ mapper = @tem_parts[:mapper][tem_index]
82
82
  item = @db.item(action[:item])
83
83
  end
84
- output = job.map_object item, @tems[tem_index]
84
+ output = mapper.map_object item, @tems[tem_index]
85
85
  @lock.synchronize do
86
86
  @outputs[action[:output_id]] = output
87
87
  end
88
88
 
89
89
  when :reduce
90
- job, output1, output2 = nil, nil, nil
90
+ reducer, output1, output2 = nil, nil, nil
91
91
  @lock.synchronize do
92
- job = @tem_jobs[:reducer][tem_index]
92
+ reducer = @tem_parts[:reducer][tem_index]
93
93
  output1 = @outputs[action[:output1_id]]
94
94
  output2 = @outputs[action[:output2_id]]
95
95
  end
96
- output = job.reduce_outputs output1, output2, @tems[tem_index]
96
+ output = reducer.reduce_outputs output1, output2, @tems[tem_index]
97
97
  @lock.synchronize do
98
98
  @outputs[action[:output_id]] = output
99
99
  end
100
100
 
101
101
  when :finalize
102
+ finalizer = nil
102
103
  @lock.synchronize do
103
- job = @tem_jobs[:finalizer][tem_index]
104
+ finalizer = @tem_parts[:finalizer][tem_index]
104
105
  output = @outputs[action[:output_id]]
105
106
  end
106
- final_output = job.finalize_output output, @tems[tem_index]
107
+ final_output = finalizer.finalize_output output, @tems[tem_index]
107
108
  @lock.synchronize do
108
109
  @outputs[action[:final_id]] = final_output
109
110
  end
@@ -2,66 +2,107 @@
2
2
  module Tem::Mr::Search
3
3
 
4
4
  class MapReduceJob
5
- attr_reader :map_secpack, :reduce_secpack, :attributes, :id_attribute
5
+ attr_reader :mapper, :reducer, :finalizer, :attributes, :id_attribute
6
6
 
7
7
  def initialize(attributes)
8
- @map_secpack = attributes[:map]
9
- @reduce_secpack = attributes[:reduce]
10
- @finalize_secpack = attributes[:finalize]
11
8
  @attributes = attributes[:attributes]
12
9
  @id_attribute = attributes[:id_attribute]
10
+
11
+ @mapper = Mapper.new attributes[:map], self
12
+ @reducer = Reducer.new attributes[:reduce], self
13
+ @finalizer = Finalizer.new attributes[:finalize], self
13
14
  end
14
15
 
15
- # Returns a SECpack for mapping the given object data into the query.
16
- def map_for_object(object_data)
17
- return nil unless @map_secpack
18
- object_id = object_data[id_attribute.to_s]
19
- secpack = Tem::SecPack.new_from_array @map_secpack.to_array
20
- secpack.set_bytes :_id, [object_id].pack('q').unpack('C*').reverse
21
- attributes.each do |attribute|
22
- name, type = attribute[:name], attribute[:type]
23
- secpack.set_value name.to_sym, type, object_data[name.to_s]
24
- end
25
- secpack
16
+ # Unpacks a decrypted output into its components.
17
+ def unpack_decrypted_output(output)
18
+ {
19
+ :id => output[0, 8].reverse.pack('C*').unpack('q').first,
20
+ :score => Tem::Abi.read_tem_short(output, 8),
21
+ :check => output[13, 3]
22
+ }
26
23
  end
27
24
 
28
- # Maps the given object into the query.
29
- def map_object(object_data, tem)
30
- secpack = map_for_object object_data
31
- secpack ? tem.execute(secpack) : object_data
25
+ # Serializes a job to a hash.
26
+ #
27
+ # Useful in conjunction with ObjectProtocol in ZergSupport, for sending jobs
28
+ # across the wire. De-serialize with MapReduceJob#new
29
+ def to_hash
30
+ { :attributes => @attributes, :id_attribute => @id_attribute,
31
+ :map => @mapper.to_plain_object, :reduce => @reducer.to_plain_object,
32
+ :finalize => @finalizer.to_plain_object }
32
33
  end
33
34
 
34
- # Returns a SECpack for reducing two inputs coming from maps or other reduces.
35
- def reduce_for_outputs(output1, output2)
36
- secpack = Tem::SecPack.new_from_array @reduce_secpack.to_array
35
+ # Base class for the Map-Reduce SECpack wrappers.
36
+ class JobPart
37
+ def initialize(secpack, job)
38
+ unless secpack.nil? or secpack.kind_of? Tem::SecPack
39
+ secpack = Tem::SecPack.new_from_array secpack
40
+ end
41
+ @secpack = secpack
42
+ end
37
43
 
38
- secpack.set_bytes :_output1, output1
39
- secpack.set_bytes :_output2, output2
40
- secpack
44
+ def to_plain_object
45
+ return nil if @secpack.nil?
46
+ @secpack.to_array
47
+ end
41
48
  end
49
+
50
+ # Wrapper for the map SECpack.
51
+ class Mapper < JobPart
52
+ def initialize(secpack, job)
53
+ super
54
+ @attributes = job.attributes
55
+ @id_attribute = job.id_attribute
56
+ end
57
+
58
+ # Returns a SECpack for mapping the given object data into the query.
59
+ def map_for_object(object_data)
60
+ return nil unless @secpack
61
+ object_id = object_data[@id_attribute.to_s]
62
+ new_secpack = Tem::SecPack.new_from_array @secpack.to_array
63
+ new_secpack.set_bytes :_id, [object_id].pack('q').unpack('C*').reverse
64
+ @attributes.each do |attribute|
65
+ name, type = attribute[:name], attribute[:type]
66
+ new_secpack.set_value name.to_sym, type, object_data[name.to_s]
67
+ end
68
+ new_secpack
69
+ end
70
+
71
+ # Maps the given object into the query.
72
+ def map_object(object_data, tem)
73
+ secpack = map_for_object object_data
74
+ secpack ? tem.execute(secpack) : object_data
75
+ end
76
+ end
42
77
 
43
- # Reduces two inputs coming from maps or other reduces.
44
- def reduce_outputs(output1, output2, tem)
45
- secpack = reduce_for_outputs output1, output2
46
- tem.execute secpack
47
- end
78
+ # Wrapper for the reduce SECpack.
79
+ class Reducer < JobPart
80
+ # Returns a SECpack for reducing two inputs coming from maps or other reduces.
81
+ def reduce_for_outputs(output1, output2)
82
+ new_secpack = Tem::SecPack.new_from_array @secpack.to_array
83
+
84
+ new_secpack.set_bytes :_output1, output1
85
+ new_secpack.set_bytes :_output2, output2
86
+ new_secpack
87
+ end
48
88
 
49
- # Converts a map/reduce output into the final result for the operation.
50
- def finalize_output(output, tem)
51
- return output unless @finalize_secpack
52
- secpack = Tem::SecPack.new_from_array @finalize_secpack.to_array
53
- secpack.set_bytes :_output, output
54
- tem.execute secpack
89
+ # Reduces two inputs coming from maps or other reduces.
90
+ def reduce_outputs(output1, output2, tem)
91
+ secpack = reduce_for_outputs output1, output2
92
+ tem.execute secpack
93
+ end
55
94
  end
56
95
 
57
- # Unpacks a decrypted output into its components.
58
- def unpack_decrypted_output(output)
59
- {
60
- :id => output[0, 8].reverse.pack('C*').unpack('q').first,
61
- :score => Tem::Abi.read_tem_short(output, 8),
62
- :check => output[13, 3]
63
- }
64
- end
96
+ # Wrapper for the finalize SECpack.
97
+ class Finalizer < JobPart
98
+ # Converts a map/reduce output into the final result for the operation.
99
+ def finalize_output(output, tem)
100
+ return output unless @secpack
101
+ secpack = Tem::SecPack.new_from_array @finalize_secpack.to_array
102
+ secpack.set_bytes :_output, output
103
+ tem.execute secpack
104
+ end
105
+ end
65
106
  end
66
107
 
67
108
  end # namespace Tem::Mr::search
@@ -0,0 +1,74 @@
1
+ require 'logger'
2
+ require 'yaml'
3
+
4
+ # :nodoc: namespace
5
+ module Tem::Mr::Search
6
+
7
+ class Server
8
+ OP = Zerg::Support::Protocols::ObjectProtocol
9
+ OPAdapter = Zerg::Support::Sockets::ProtocolAdapter.adapter_module OP
10
+
11
+ # Creates a new Map-Reduce server (master).
12
+ def initialize(db_file, cluster_file, port)
13
+ @logger = Logger.new STDERR
14
+ @db = Db.new db_file
15
+ @tems = Server.tems_from_cluster_file cluster_file
16
+ @port = port
17
+ end
18
+
19
+ # This server's loop.
20
+ def serve_loop
21
+ listen_socket = Zerg::Support::SocketFactory.socket :in_port => @port
22
+ listen_socket.listen
23
+ shutdown_received = false
24
+ until shutdown_received
25
+ begin
26
+ client_socket, client_addr = listen_socket.accept
27
+ client_socket.extend OPAdapter
28
+ request = client_socket.recv_object
29
+ begin
30
+ response = process_request request
31
+ rescue Exception => e
32
+ @logger.error e
33
+ response = nil
34
+ end
35
+ client_socket.send_object response if response
36
+ shutdown_received = true if response == :shutdown
37
+ rescue Exception => e
38
+ @logger.error e
39
+ end
40
+ client_socket.close rescue nil
41
+ end
42
+ listen_socket.close
43
+ end
44
+
45
+ # Computes the response of a single request.
46
+ def process_request(request)
47
+ case request[:type]
48
+ when :search
49
+ job = MapReduceJob.new request[:map_reduce]
50
+ root_tem = request[:root_tem]
51
+ executor = MapReduceExecutor.new job, @db, @tems, root_tem
52
+ return executor.execute
53
+ when :fetch
54
+ return @db.item_by_id(request[:id]) || :not_found
55
+ when :shutdown
56
+ return :shutdown
57
+ else
58
+ return :unknown
59
+ end
60
+ end
61
+
62
+ # Creates sessions to all the TEMs in a cluster.
63
+ def self.tems_from_cluster_file(cluster_file)
64
+ cluster_hosts = File.open(cluster_file, 'r') { |f| YAML.load f }
65
+ cluster_configs = cluster_hosts.map { |host|
66
+ Tem::MultiProxy::Client.query_tems host
67
+ }.flatten
68
+ cluster_configs.reject { |config| config.nil? }.map do |config|
69
+ Tem::Session.new Tem::Transport::AutoConfigurator.try_transport(config)
70
+ end
71
+ end
72
+ end
73
+
74
+ end # namespace Tem::Mr::Search
@@ -0,0 +1,50 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class WebClientQueryBuilder < MapReduceJob
5
+ # Builds a client query covering preferences expressed in the Web UI.
6
+ #
7
+ # The supported (and required) options are:
8
+ # layovers_cost:: the cost of each layover
9
+ # start_time_cost:: the cost of the flight's departure time, in minutes
10
+ # duration_cost:: the cost of each minute of flying
11
+ def self.query(options)
12
+ QueryBuilder.query { |q|
13
+ q.attributes :price => :tem_short, :start_time => :tem_short,
14
+ :end_time => :tem_short, :layovers => :tem_short
15
+ q.id_attribute :flight
16
+
17
+ # Score: 20000 - price - layover_cost * layovers -
18
+ # start_time * start_time_cost -
19
+ # (end_time - start_time) * duration_cost
20
+ q.map { |s|
21
+ s.ldwc 20000
22
+ s.ldw :price
23
+ s.sub
24
+ s.ldw :end_time
25
+ s.ldw :start_time
26
+ s.sub
27
+ s.ldwc options[:duration_cost]
28
+ s.mul
29
+ s.sub
30
+ [:start_time, :layovers].each do |factor|
31
+ s.ldw factor
32
+ s.ldwc options[:"#{factor}_cost"]
33
+ s.mul
34
+ s.sub
35
+ end
36
+ s.stw :score
37
+ }
38
+
39
+ # The greater score wins.
40
+ q.reduce { |s|
41
+ s.ldw :score1
42
+ s.ldw :score2
43
+ s.cmp
44
+ s.stw :comparison
45
+ }
46
+ }
47
+ end
48
+ end
49
+
50
+ end # namespace Tem::Mr::search
data/lib/tem_mr_search.rb CHANGED
@@ -9,9 +9,12 @@ end
9
9
  module Tem::Mr::Search
10
10
  end
11
11
 
12
+ require 'tem_mr_search/client.rb'
12
13
  require 'tem_mr_search/db.rb'
13
14
  require 'tem_mr_search/map_reduce_executor.rb'
14
15
  require 'tem_mr_search/map_reduce_job.rb'
15
16
  require 'tem_mr_search/map_reduce_planner.rb'
16
17
  require 'tem_mr_search/query_builder.rb'
17
18
  require 'tem_mr_search/client_query.rb'
19
+ require 'tem_mr_search/server.rb'
20
+ require 'tem_mr_search/web_client_query_builder.rb'
@@ -2,22 +2,24 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{tem_mr_search}
5
- s.version = "0.1"
5
+ s.version = "0.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Victor Costan"]
9
- s.date = %q{2009-06-02}
9
+ s.date = %q{2009-06-04}
10
+ s.default_executable = %q{tem_mr_search_server}
10
11
  s.description = %q{Tem Map-Reduce proof of concept: database search.}
11
12
  s.email = %q{victor@costan.us}
12
- s.extra_rdoc_files = ["CHANGELOG", "lib/tem_mr_search/client_query.rb", "lib/tem_mr_search/db.rb", "lib/tem_mr_search/map_reduce_executor.rb", "lib/tem_mr_search/map_reduce_job.rb", "lib/tem_mr_search/map_reduce_planner.rb", "lib/tem_mr_search/query_builder.rb", "lib/tem_mr_search.rb", "LICENSE", "README"]
13
- s.files = ["CHANGELOG", "lib/tem_mr_search/client_query.rb", "lib/tem_mr_search/db.rb", "lib/tem_mr_search/map_reduce_executor.rb", "lib/tem_mr_search/map_reduce_job.rb", "lib/tem_mr_search/map_reduce_planner.rb", "lib/tem_mr_search/query_builder.rb", "lib/tem_mr_search.rb", "LICENSE", "Manifest", "Rakefile", "README", "tem_mr_search.gemspec", "test/mr_test_case.rb", "test/test_db.rb", "test/test_map_reduce_executor.rb", "test/test_map_reduce_job.rb", "test/test_map_reduce_planner.rb", "test/test_query_builder.rb", "testdata/fares.yml", "testdata/parallel_plan_431.yml", "testdata/parallel_plan_740.yml", "testdata/serial_plan_410.yml", "testdata/serial_plan_431.yml", "testdata/serial_plan_740.yml"]
13
+ s.executables = ["tem_mr_search_server"]
14
+ s.extra_rdoc_files = ["bin/tem_mr_search_server", "CHANGELOG", "lib/tem_mr_search/client.rb", "lib/tem_mr_search/client_query.rb", "lib/tem_mr_search/db.rb", "lib/tem_mr_search/map_reduce_executor.rb", "lib/tem_mr_search/map_reduce_job.rb", "lib/tem_mr_search/map_reduce_planner.rb", "lib/tem_mr_search/query_builder.rb", "lib/tem_mr_search/server.rb", "lib/tem_mr_search/web_client_query_builder.rb", "lib/tem_mr_search.rb", "LICENSE", "README"]
15
+ s.files = ["bin/tem_mr_search_server", "CHANGELOG", "lib/tem_mr_search/client.rb", "lib/tem_mr_search/client_query.rb", "lib/tem_mr_search/db.rb", "lib/tem_mr_search/map_reduce_executor.rb", "lib/tem_mr_search/map_reduce_job.rb", "lib/tem_mr_search/map_reduce_planner.rb", "lib/tem_mr_search/query_builder.rb", "lib/tem_mr_search/server.rb", "lib/tem_mr_search/web_client_query_builder.rb", "lib/tem_mr_search.rb", "LICENSE", "Manifest", "Rakefile", "README", "tem_mr_search.gemspec", "test/mr_test_case.rb", "test/test_client_server.rb", "test/test_db.rb", "test/test_map_reduce_executor.rb", "test/test_map_reduce_job.rb", "test/test_map_reduce_planner.rb", "test/test_query_builders.rb", "testdata/cluster.yml", "testdata/empty_cluster.yml", "testdata/fares.yml", "testdata/parallel_plan_431.yml", "testdata/parallel_plan_740.yml", "testdata/serial_plan_410.yml", "testdata/serial_plan_431.yml", "testdata/serial_plan_740.yml"]
14
16
  s.homepage = %q{http://tem.rubyforge.org}
15
17
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Tem_mr_search", "--main", "README"]
16
18
  s.require_paths = ["lib"]
17
19
  s.rubyforge_project = %q{tem}
18
20
  s.rubygems_version = %q{1.3.4}
19
21
  s.summary = %q{Tem Map-Reduce proof of concept: database search.}
20
- s.test_files = ["test/test_db.rb", "test/test_map_reduce_executor.rb", "test/test_map_reduce_job.rb", "test/test_map_reduce_planner.rb", "test/test_query_builder.rb"]
22
+ s.test_files = ["test/test_client_server.rb", "test/test_db.rb", "test/test_map_reduce_executor.rb", "test/test_map_reduce_job.rb", "test/test_map_reduce_planner.rb", "test/test_query_builders.rb"]
21
23
 
22
24
  if s.respond_to? :specification_version then
23
25
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
data/test/mr_test_case.rb CHANGED
@@ -9,43 +9,21 @@ class MrTestCase < Test::Unit::TestCase
9
9
 
10
10
  Thread.abort_on_exception = true
11
11
 
12
- testdb_path = File.join File.dirname(__FILE__), "..", "testdata",
13
- "fares.yml"
14
- @db = Db.new testdb_path
12
+ @db_path = File.join File.dirname(__FILE__), "..", "testdata", "fares.yml"
13
+ @cluster_file = File.join File.dirname(__FILE__), "..", "testdata",
14
+ "cluster.yml"
15
+ @empty_cluster_file = File.join File.dirname(__FILE__), "..", "testdata",
16
+ "empty_cluster.yml"
17
+ @db = Db.new @db_path
15
18
 
16
- @client_query = QueryBuilder.query { |q|
17
- q.attributes :price => :tem_short, :start => :tem_short,
18
- :end => :tem_short
19
- q.id_attribute :flight
20
-
21
- # Score: 200 + start / 100 - duration - price
22
- q.map { |s|
23
- s.ldwc 200
24
- s.ldw :start
25
- s.ldbc 100
26
- s.div
27
- s.add
28
- s.ldw :end
29
- s.ldw :start
30
- s.sub
31
- s.sub
32
- s.ldw :price
33
- s.sub
34
- s.stw :score
35
- }
36
-
37
- # The greater score wins.
38
- q.reduce { |s|
39
- s.ldw :score1
40
- s.ldw :score2
41
- s.cmp
42
- s.stw :comparison
43
- }
44
- }
19
+ @client_query = WebClientQueryBuilder.query :layovers_cost => 1000,
20
+ :start_time_cost => -1,
21
+ :duration_cost => 1
45
22
  end
46
23
 
47
24
  def fare_score(fare)
48
- 200 + fare['start'] / 100 - fare['price'] - (fare['end'] - fare['start'])
25
+ 20000 + fare['start_time'] - fare['price'] - (fare['end_time'] -
26
+ fare['start_time']) - fare['layovers'] * 1000
49
27
  end
50
28
 
51
29
  def fare_id(fare)
@@ -0,0 +1,44 @@
1
+ require 'test/mr_test_case'
2
+ require 'flexmock/test_unit'
3
+
4
+
5
+ class ClientServerTest < MrTestCase
6
+ Client = Tem::Mr::Search::Client
7
+ Server = Tem::Mr::Search::Server
8
+
9
+ def setup
10
+ super
11
+ @server_port = 29552
12
+ end
13
+
14
+ def _test_request
15
+ Thread.new do
16
+ Server.new(@db_path, @empty_cluster_file, @server_port).serve_loop
17
+ end
18
+ sleep 0.1
19
+ yield "localhost:#{@server_port}"
20
+ Client.shutdown_server "localhost:#{@server_port}"
21
+ end
22
+
23
+ def test_fetch_item
24
+ _test_request do |server_addr|
25
+ fetched_item = Client.fetch_item server_addr, fare_id(@db.item(3))
26
+ assert_equal @db.item(3), fetched_item, 'Fetch fail'
27
+ end
28
+ end
29
+
30
+ def test_query
31
+ Tem.auto_conf
32
+ @server_port = 29553
33
+ flexmock(Server).should_receive(:tems_from_cluster_file).
34
+ with(@empty_cluster_file).and_return [$tem]
35
+ _test_request do |server_addr|
36
+ result = Client.search server_addr, @client_query
37
+ gold_item = @db.item 5
38
+ assert_equal fare_id(gold_item), result[:id],
39
+ 'Incorrect Map-Reduce result (ID)'
40
+ assert_equal fare_score(gold_item), result[:score],
41
+ 'Incorrect Map-Reduce result (score)'
42
+ end
43
+ end
44
+ end
data/test/test_db.rb CHANGED
@@ -2,9 +2,15 @@ require 'test/mr_test_case'
2
2
 
3
3
  class DbTest < MrTestCase
4
4
  def test_loading
5
- assert_equal 4, @db.length, 'Number of items in the database'
6
- gold_item = {'from' => 'BOS', 'to' => 'NYC', 'price' => 150, 'start' => 900,
7
- 'end' => 1000, 'flight' => 15 }
5
+ assert_equal 8, @db.length, 'Number of items in the database'
6
+ gold_item = {"price" => 2500, "from" => "BOS", "to" => "TPE",
7
+ "flight" => 15, "layovers"=>2, "end_time"=>2100,
8
+ "start_time"=>900}
8
9
  assert_equal gold_item, @db.item(0), 'First database item'
9
10
  end
11
+
12
+ def test_by_id
13
+ assert_equal 18, @db.item_by_id(18)['flight'], 'Finding existing item by ID'
14
+ assert_equal nil, @db.item_by_id(5), 'Finding non-existing item'
15
+ end
10
16
  end
@@ -14,21 +14,20 @@ class MapReduceExecutorTest < MrTestCase
14
14
  executor = MRExecutor.new @client_query, @db, tems, root_tem
15
15
  packed_output = executor.execute
16
16
  result = @client_query.unpack_output packed_output
17
- assert_equal 18, result[:id], 'Incorrect Map-Reduce result'
17
+ gold_item = @db.item 5
18
+ assert_equal fare_id(gold_item), result[:id],
19
+ 'Incorrect Map-Reduce result (ID)'
20
+ assert_equal fare_score(gold_item), result[:score],
21
+ 'Incorrect Map-Reduce result (score)'
18
22
  end
19
23
 
20
24
  def test_executor_with_autoconf
21
25
  _test_executor [$tem], 0
22
26
  end
23
27
 
24
- def test_executor_with_cluster
25
- cluster_config = ['lightbulb2.local', 'darkbulb.local'].map { |host|
26
- Tem::MultiProxy::Client.query_tems host
27
- }.flatten
28
- assert_equal 8, cluster_config.length, 'Incorrect cluster setup'
29
- tems = cluster_config.map do |config|
30
- Tem::Session.new Tem::Transport::AutoConfigurator.try_transport(config)
31
- end
28
+ def test_executor_with_cluster
29
+ tems = Tem::Mr::Search::Server.tems_from_cluster_file @cluster_file
30
+ assert_equal 8, tems.length, 'Incorrect cluster setup'
32
31
 
33
32
  tems.each { |tem| tem.activate; tem.emit }
34
33
 
@@ -1,6 +1,8 @@
1
1
  require 'test/mr_test_case'
2
2
 
3
3
  class MapReduceJobTest < MrTestCase
4
+ MapReduceJob = Tem::Mr::Search::MapReduceJob
5
+
4
6
  def setup
5
7
  super
6
8
 
@@ -12,20 +14,24 @@ class MapReduceJobTest < MrTestCase
12
14
 
13
15
  def test_map_for_object
14
16
  obj = @obj1.merge 'flight' => 0x12345678
15
- secpack = @client_query.map_for_object obj
17
+ secpack = @client_query.mapper.map_for_object obj
16
18
 
17
19
  assert_equal [0, 0, 0, 0, 0x12, 0x34, 0x56, 0x78],
18
20
  secpack.get_bytes(:_id, 8), 'Object ID embedded incorrectly'
19
21
  assert_equal @obj1['price'], secpack.get_value(:price, :tem_short),
20
22
  'Price embedded incorrectly'
21
- assert_equal @obj1['start'], secpack.get_value(:start, :tem_short),
23
+ assert_equal @obj1['layovers'], secpack.get_value(:layovers, :tem_short),
24
+ 'Layover count embedded incorrectly'
25
+ assert_equal @obj1['start_time'],
26
+ secpack.get_value(:start_time, :tem_short),
22
27
  'Starting time embedded incorrectly'
23
- assert_equal @obj1['end'], secpack.get_value(:end, :tem_short),
28
+ assert_equal @obj1['end_time'],
29
+ secpack.get_value(:end_time, :tem_short),
24
30
  'Ending time embedded incorrectly'
25
31
  end
26
32
 
27
33
  def test_reduce_for_outputs
28
- secpack = @client_query.reduce_for_outputs @output1, @output2
34
+ secpack = @client_query.reducer.reduce_for_outputs @output1, @output2
29
35
 
30
36
  assert_equal @output1, secpack.get_bytes(:_output1, 16),
31
37
  'Output1 embedded incorrectly'
@@ -41,5 +47,19 @@ class MapReduceJobTest < MrTestCase
41
47
  assert_equal 0x12345678, output[:id], 'ID incorrectly unpacked'
42
48
  assert_equal 0x3141, output[:score], 'Score incorrectly unpacked'
43
49
  assert_equal [0xBE, 0xEF, 0xFE], output[:check], 'Check bytes'
50
+ end
51
+ end
52
+
53
+ class SerializedMapReduceJobTest < MapReduceJobTest
54
+ # Serialization is successful if a de-serialized job can pass all the tests.
55
+
56
+ def setup
57
+ super
58
+ @hash = @client_query.to_hash
59
+ @client_query = MapReduceJob.new @hash
60
+ end
61
+
62
+ def test_serialized_form
63
+ assert_equal Hash, @hash.class, 'Serialization did not produce a plain hash'
44
64
  end
45
65
  end
@@ -1,6 +1,6 @@
1
1
  require 'test/mr_test_case'
2
2
 
3
- class QueryBuilderTest < MrTestCase
3
+ class QueryBuildersTest < MrTestCase
4
4
  def setup
5
5
  super
6
6
  Tem.auto_conf
@@ -9,7 +9,7 @@ class QueryBuilderTest < MrTestCase
9
9
  end
10
10
 
11
11
  def _test_map_fare(fare)
12
- enc_output = @client_query.map_object fare, $tem
12
+ enc_output = @client_query.mapper.map_object fare, $tem
13
13
  output = @client_query.unpack_output enc_output
14
14
  assert_equal fare_id(fare), output[:id], 'Object ID incorrectly encoded'
15
15
  assert_equal fare_score(fare), output[:score],
@@ -26,7 +26,7 @@ class QueryBuilderTest < MrTestCase
26
26
  win_fare = (fare_score(fare1) > fare_score(fare2)) ? fare1 : fare2
27
27
  # Try both permutations to ensure all branches of the reduce code work.
28
28
  [[output1, output2], [output2, output1]].each do |o1, o2|
29
- enc_output = @client_query.reduce_outputs o1, o2, $tem
29
+ enc_output = @client_query.reducer.reduce_outputs o1, o2, $tem
30
30
  output = @client_query.unpack_output enc_output
31
31
  assert_equal fare_id(win_fare), output[:id], 'The wrong fare won (bad ID)'
32
32
  assert_equal fare_score(win_fare), output[:score],
@@ -0,0 +1,3 @@
1
+ ---
2
+ - darkbulb2.local
3
+ - lightbulb2.local
@@ -0,0 +1,2 @@
1
+ --- []
2
+
data/testdata/fares.yml CHANGED
@@ -1,49 +1,57 @@
1
1
  ---
2
2
  - from: BOS
3
- to: NYC
4
- price: 150
5
- start: 900
6
- end: 1000
3
+ to: TPE
4
+ price: 2500
5
+ start_time: 900
6
+ end_time: 2100
7
+ layovers: 2
7
8
  flight: 15
8
9
  - from: BOS
9
- to: NYC
10
- price: 150
11
- start: 1800
12
- end: 1900
10
+ to: TPE
11
+ price: 2500
12
+ start_time: 1100
13
+ end_time: 2300
14
+ layovers: 2
13
15
  flight: 16
14
16
  - from: BOS
15
- to: NYC
16
- price: 200
17
- start: 1200
18
- end: 1245
17
+ to: TPE
18
+ price: 3000
19
+ start_time: 900
20
+ end_time: 1900
21
+ layovers: 1
19
22
  flight: 17
20
23
  - from: BOS
21
- to: NYC
22
- price: 200
23
- start: 1700
24
- end: 1745
24
+ to: TPE
25
+ price: 3000
26
+ start_time: 1100
27
+ end_time: 2100
28
+ layovers: 1
25
29
  flight: 18
26
30
  - from: BOS
27
- to: NYC
28
- price: 160
29
- start: 900
30
- end: 1000
31
+ to: TPE
32
+ price: 4000
33
+ start_time: 900
34
+ end_time: 1700
35
+ layovers: 0
36
+ flight: 19
37
+ - from: BOS
38
+ to: TPE
39
+ price: 4000
40
+ start_time: 1100
41
+ end_time: 1900
42
+ layovers: 0
43
+ flight: 20
44
+ - from: BOS
45
+ to: TPE
46
+ price: 2400
47
+ start_time: 900
48
+ end_time: 2100
49
+ layovers: 2
31
50
  flight: 21
32
51
  - from: BOS
33
- to: NYC
34
- price: 160
35
- start: 1800
36
- end: 1900
52
+ to: TPE
53
+ price: 2400
54
+ start_time: 1100
55
+ end_time: 2300
56
+ layovers: 2
37
57
  flight: 22
38
- - from: BOS
39
- to: NYC
40
- price: 210
41
- start: 1200
42
- end: 1245
43
- flight: 23
44
- - from: BOS
45
- to: NYC
46
- price: 210
47
- start: 1700
48
- end: 1745
49
- flight: 24
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tem_mr_search
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Costan
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-02 00:00:00 -04:00
12
+ date: 2009-06-04 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -34,29 +34,37 @@ dependencies:
34
34
  version:
35
35
  description: "Tem Map-Reduce proof of concept: database search."
36
36
  email: victor@costan.us
37
- executables: []
38
-
37
+ executables:
38
+ - tem_mr_search_server
39
39
  extensions: []
40
40
 
41
41
  extra_rdoc_files:
42
+ - bin/tem_mr_search_server
42
43
  - CHANGELOG
44
+ - lib/tem_mr_search/client.rb
43
45
  - lib/tem_mr_search/client_query.rb
44
46
  - lib/tem_mr_search/db.rb
45
47
  - lib/tem_mr_search/map_reduce_executor.rb
46
48
  - lib/tem_mr_search/map_reduce_job.rb
47
49
  - lib/tem_mr_search/map_reduce_planner.rb
48
50
  - lib/tem_mr_search/query_builder.rb
51
+ - lib/tem_mr_search/server.rb
52
+ - lib/tem_mr_search/web_client_query_builder.rb
49
53
  - lib/tem_mr_search.rb
50
54
  - LICENSE
51
55
  - README
52
56
  files:
57
+ - bin/tem_mr_search_server
53
58
  - CHANGELOG
59
+ - lib/tem_mr_search/client.rb
54
60
  - lib/tem_mr_search/client_query.rb
55
61
  - lib/tem_mr_search/db.rb
56
62
  - lib/tem_mr_search/map_reduce_executor.rb
57
63
  - lib/tem_mr_search/map_reduce_job.rb
58
64
  - lib/tem_mr_search/map_reduce_planner.rb
59
65
  - lib/tem_mr_search/query_builder.rb
66
+ - lib/tem_mr_search/server.rb
67
+ - lib/tem_mr_search/web_client_query_builder.rb
60
68
  - lib/tem_mr_search.rb
61
69
  - LICENSE
62
70
  - Manifest
@@ -64,11 +72,14 @@ files:
64
72
  - README
65
73
  - tem_mr_search.gemspec
66
74
  - test/mr_test_case.rb
75
+ - test/test_client_server.rb
67
76
  - test/test_db.rb
68
77
  - test/test_map_reduce_executor.rb
69
78
  - test/test_map_reduce_job.rb
70
79
  - test/test_map_reduce_planner.rb
71
- - test/test_query_builder.rb
80
+ - test/test_query_builders.rb
81
+ - testdata/cluster.yml
82
+ - testdata/empty_cluster.yml
72
83
  - testdata/fares.yml
73
84
  - testdata/parallel_plan_431.yml
74
85
  - testdata/parallel_plan_740.yml
@@ -109,8 +120,9 @@ signing_key:
109
120
  specification_version: 3
110
121
  summary: "Tem Map-Reduce proof of concept: database search."
111
122
  test_files:
123
+ - test/test_client_server.rb
112
124
  - test/test_db.rb
113
125
  - test/test_map_reduce_executor.rb
114
126
  - test/test_map_reduce_job.rb
115
127
  - test/test_map_reduce_planner.rb
116
- - test/test_query_builder.rb
128
+ - test/test_query_builders.rb