costan-tem_mr_search 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG ADDED
@@ -0,0 +1,5 @@
1
+ v0.2.1. Check for cluster configuration on each query.
2
+
3
+ v0.2. Implemented remote queries.
4
+
5
+ v0.1. Initial release.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2009 Massachusetts Institute of Technology
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/Manifest ADDED
@@ -0,0 +1,32 @@
1
+ bin/tem_mr_search_server
2
+ CHANGELOG
3
+ lib/tem_mr_search/client.rb
4
+ lib/tem_mr_search/client_query.rb
5
+ lib/tem_mr_search/db.rb
6
+ lib/tem_mr_search/map_reduce_executor.rb
7
+ lib/tem_mr_search/map_reduce_job.rb
8
+ lib/tem_mr_search/map_reduce_planner.rb
9
+ lib/tem_mr_search/query_builder.rb
10
+ lib/tem_mr_search/server.rb
11
+ lib/tem_mr_search/web_client_query_builder.rb
12
+ lib/tem_mr_search.rb
13
+ LICENSE
14
+ Manifest
15
+ Rakefile
16
+ README
17
+ tem_mr_search.gemspec
18
+ test/mr_test_case.rb
19
+ test/test_client_server.rb
20
+ test/test_db.rb
21
+ test/test_map_reduce_executor.rb
22
+ test/test_map_reduce_job.rb
23
+ test/test_map_reduce_planner.rb
24
+ test/test_query_builders.rb
25
+ testdata/cluster.yml
26
+ testdata/empty_cluster.yml
27
+ testdata/fares.yml
28
+ testdata/parallel_plan_431.yml
29
+ testdata/parallel_plan_740.yml
30
+ testdata/serial_plan_410.yml
31
+ testdata/serial_plan_431.yml
32
+ testdata/serial_plan_740.yml
data/README ADDED
@@ -0,0 +1,4 @@
1
+ This is a concept proof of a Map-Reduce on a cluster of Trusted Execution
2
+ Modules (TEMs). The Map-Reduce cluster performs queries consisting of finding
3
+ the best item in a homogenous database using secret ranking criteria.
4
+
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ gem 'echoe'
3
+ require 'echoe'
4
+
5
+ Echoe.new('tem_mr_search') do |p|
6
+ p.project = 'tem' # rubyforge project
7
+ p.docs_host = "costan@rubyforge.org:/var/www/gforge-projects/tem/rdoc/"
8
+
9
+ p.author = 'Victor Costan'
10
+ p.email = 'victor@costan.us'
11
+ p.summary = 'Tem Map-Reduce proof of concept: database search.'
12
+ p.url = 'http://tem.rubyforge.org'
13
+ p.dependencies = ['tem_ruby >=0.11.2', 'tem_multi_proxy >=0.2']
14
+
15
+ p.need_tar_gz = !Platform.windows?
16
+ p.need_zip = !Platform.windows?
17
+ p.rdoc_pattern = /^(lib|bin|tasks|ext)|^BUILD|^README|^CHANGELOG|^TODO|^LICENSE|^COPYING$/
18
+ end
19
+
20
+ if $0 == __FILE__
21
+ Rake.application = Rake::Application.new
22
+ Rake.application.run
23
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'tem_mr_search'
5
+
6
+ if ARGV.length < 2 || ARGV.length >= 3
7
+ print "Usage: #{$0} db_file cluster_file [server_port]\n"
8
+ exit
9
+ end
10
+
11
+ db_file, cluster_file, server_port = ARGV[0], ARGV[1], ARGV[2]
12
+ server = Tem::Mr::Search::Server.new db_file, cluster_file, server_port
13
+ server.serve_loop
@@ -0,0 +1,52 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class Client
5
+ OP = Zerg::Support::Protocols::ObjectProtocol
6
+ OPAdapter = Zerg::Support::Sockets::ProtocolAdapter.adapter_module OP
7
+
8
+ # Performs a private database search using a Map-Reduce.
9
+ def self.search(server_addr, client_query)
10
+ output = issue_request server_addr, :type => :search, :root_tem => 0,
11
+ :map_reduce => client_query.to_hash
12
+ output ? client_query.unpack_output(output) : nil
13
+ end
14
+
15
+ # Asks for an item in the server's database.
16
+ #
17
+ # In production, there should be per-client rate-limiting on this request.
18
+ def self.fetch_item(server_addr, item_id)
19
+ issue_request server_addr, :type => :fetch, :id => item_id
20
+ end
21
+
22
+ # Terminates the server.
23
+ #
24
+ # In production, normal clients wouldn't have access to this.
25
+ def self.shutdown_server(server_addr)
26
+ issue_request server_addr, :type => :shutdown
27
+ end
28
+
29
+ # Dumps the server database.
30
+ #
31
+ # In production, normal clients wouldn't have access to this.
32
+ def self.dump_database(server_addr)
33
+ issue_request server_addr, :type => :db_dump
34
+ end
35
+
36
+ # Issues a request against a Map-Reduce server and returns the response.
37
+ def self.issue_request(server_addr, request)
38
+ socket = Zerg::Support::SocketFactory.socket :out_addr => server_addr,
39
+ :out_port => Server::DEFAULT_PORT
40
+ socket.extend OPAdapter
41
+ begin
42
+ socket.send_object request
43
+ response = socket.recv_object response
44
+ rescue
45
+ response = nil
46
+ end
47
+ socket.close rescue nil
48
+ response
49
+ end
50
+ end
51
+
52
+ end # namespace Tem::Mr::Search
@@ -0,0 +1,21 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class ClientQuery < MapReduceJob
5
+ def initialize(attributes)
6
+ super
7
+ @query_key = attributes[:key]
8
+ end
9
+
10
+ # Unpacks a reduce output into its components.
11
+ #
12
+ # This is expected to be called with the encrypted output returned by the
13
+ # search provider.
14
+ def unpack_output(output)
15
+ # TODO(costan): decrypt output once we enable encryption
16
+ decrypted_output = output
17
+ unpack_decrypted_output decrypted_output
18
+ end
19
+ end
20
+
21
+ end # namespace Tem::Mr::search
@@ -0,0 +1,26 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class Db
5
+ attr_reader :data
6
+ attr_reader :id_attribute
7
+
8
+ def initialize(path)
9
+ @data = File.open(path, 'r') { |f| YAML.load f }
10
+ @id_attribute = 'flight'
11
+ end
12
+
13
+ def length
14
+ @data.length
15
+ end
16
+
17
+ def item(item_index)
18
+ @data[item_index]
19
+ end
20
+
21
+ def item_by_id(item_id)
22
+ @data.find { |item| item[@id_attribute] == item_id }
23
+ end
24
+ end
25
+
26
+ end # namespace Tem::Mr::search
@@ -0,0 +1,115 @@
1
+ require 'thread'
2
+
3
+
4
+ # :nodoc: namespace
5
+ module Tem::Mr::Search
6
+
7
+ class MapReduceExecutor
8
+ # Creates an executor for a Map-Reduce job.
9
+ #
10
+ # Arguments:
11
+ # job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
12
+ # db:: the database to run Map-Reduce over
13
+ # tems:: sessions to the available TEMs
14
+ # root_tem:: the index of the TEM that has the root mapper and reducer
15
+ # planner_class:: (optional) replacement for the default planner strategy
16
+ def initialize(job, db, tems, root_tem, planner_class = nil)
17
+ planner_class ||= MapReducePlanner
18
+
19
+ @db = db # Writable only in main thread.
20
+ @tems = tems # Writable only in main thread.
21
+
22
+ # Writable only in main thread.
23
+ @planner = planner_class.new @job, db.length, tems.length, root_tem
24
+
25
+ # Protected by @lock
26
+ @tem_parts = { :mapper => { root_tem => job.mapper },
27
+ :reducer => { root_tem => job.reducer },
28
+ :finalizer => { root_tem => job.finalizer } }
29
+ # Protected by @lock
30
+ @outputs = {}
31
+
32
+ # Thread-safe.
33
+ @thread_queues = tems.map { |tem| Queue.new }
34
+ @main_queue = Queue.new
35
+ @lock = Mutex.new
36
+ end
37
+
38
+ # Executes the job.
39
+ def execute
40
+ # Spawn TEM threads.
41
+ @tems.each_index { |i| Thread.new(i) { |i| executor_thread i } }
42
+
43
+ until @planner.done?
44
+ actions = @planner.next_actions!
45
+ @lock.synchronize do
46
+ actions.each { |action| @thread_queues[action[:with]] << action }
47
+ end
48
+
49
+ action = @main_queue.pop
50
+ @planner.action_done action
51
+ end
52
+
53
+ return @outputs[@planner.output_id]
54
+ end
55
+
56
+ # Main method for thread in charge of a TEM.
57
+ def executor_thread(tem_index)
58
+ queue = @thread_queues[tem_index]
59
+ while action = queue.pop
60
+ execute_action action, tem_index
61
+ @main_queue << action
62
+ end
63
+ end
64
+
65
+ # Executes a Map-Reduce planner action.
66
+ #
67
+ # This method is called on the thread corresponding to the TEM that the action
68
+ # is supposed to execute on.
69
+ def execute_action(action, tem_index)
70
+ case action[:action]
71
+ when :migrate
72
+ in_part = @lock.synchronize { @tem_parts[action[:secpack]][tem_index] }
73
+ out_part = in_part # TODO(costan): actual migration
74
+ @lock.synchronize do
75
+ @tem_parts[action[:secpack]][action[:to]] = out_part
76
+ end
77
+
78
+ when :map
79
+ mapper, item = nil, nil
80
+ @lock.synchronize do
81
+ mapper = @tem_parts[:mapper][tem_index]
82
+ item = @db.item(action[:item])
83
+ end
84
+ output = mapper.map_object item, @tems[tem_index]
85
+ @lock.synchronize do
86
+ @outputs[action[:output_id]] = output
87
+ end
88
+
89
+ when :reduce
90
+ reducer, output1, output2 = nil, nil, nil
91
+ @lock.synchronize do
92
+ reducer = @tem_parts[:reducer][tem_index]
93
+ output1 = @outputs[action[:output1_id]]
94
+ output2 = @outputs[action[:output2_id]]
95
+ end
96
+ output = reducer.reduce_outputs output1, output2, @tems[tem_index]
97
+ @lock.synchronize do
98
+ @outputs[action[:output_id]] = output
99
+ end
100
+
101
+ when :finalize
102
+ finalizer = nil
103
+ @lock.synchronize do
104
+ finalizer = @tem_parts[:finalizer][tem_index]
105
+ output = @outputs[action[:output_id]]
106
+ end
107
+ final_output = finalizer.finalize_output output, @tems[tem_index]
108
+ @lock.synchronize do
109
+ @outputs[action[:final_id]] = final_output
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ end # namespace Tem::Mr::Search
@@ -0,0 +1,108 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class MapReduceJob
5
+ attr_reader :mapper, :reducer, :finalizer, :attributes, :id_attribute
6
+
7
+ def initialize(attributes)
8
+ @attributes = attributes[:attributes]
9
+ @id_attribute = attributes[:id_attribute]
10
+
11
+ @mapper = Mapper.new attributes[:map], self
12
+ @reducer = Reducer.new attributes[:reduce], self
13
+ @finalizer = Finalizer.new attributes[:finalize], self
14
+ end
15
+
16
+ # Unpacks a decrypted output into its components.
17
+ def unpack_decrypted_output(output)
18
+ {
19
+ :id => output[0, 8].reverse.pack('C*').unpack('q').first,
20
+ :score => Tem::Abi.read_tem_short(output, 8),
21
+ :check => output[13, 3]
22
+ }
23
+ end
24
+
25
+ # Serializes a job to a hash.
26
+ #
27
+ # Useful in conjunction with ObjectProtocol in ZergSupport, for sending jobs
28
+ # across the wire. De-serialize with MapReduceJob#new
29
+ def to_hash
30
+ { :attributes => @attributes, :id_attribute => @id_attribute,
31
+ :map => @mapper.to_plain_object, :reduce => @reducer.to_plain_object,
32
+ :finalize => @finalizer.to_plain_object }
33
+ end
34
+
35
+ # Base class for the Map-Reduce SECpack wrappers.
36
+ class JobPart
37
+ def initialize(secpack, job)
38
+ unless secpack.nil? or secpack.kind_of? Tem::SecPack
39
+ secpack = Tem::SecPack.new_from_array secpack
40
+ end
41
+ @secpack = secpack
42
+ end
43
+
44
+ def to_plain_object
45
+ return nil if @secpack.nil?
46
+ @secpack.to_array
47
+ end
48
+ end
49
+
50
+ # Wrapper for the map SECpack.
51
+ class Mapper < JobPart
52
+ def initialize(secpack, job)
53
+ super
54
+ @attributes = job.attributes
55
+ @id_attribute = job.id_attribute
56
+ end
57
+
58
+ # Returns a SECpack for mapping the given object data into the query.
59
+ def map_for_object(object_data)
60
+ return nil unless @secpack
61
+ object_id = object_data[@id_attribute.to_s]
62
+ new_secpack = Tem::SecPack.new_from_array @secpack.to_array
63
+ new_secpack.set_bytes :_id, [object_id].pack('q').unpack('C*').reverse
64
+ @attributes.each do |attribute|
65
+ name, type = attribute[:name], attribute[:type]
66
+ new_secpack.set_value name.to_sym, type, object_data[name.to_s]
67
+ end
68
+ new_secpack
69
+ end
70
+
71
+ # Maps the given object into the query.
72
+ def map_object(object_data, tem)
73
+ secpack = map_for_object object_data
74
+ secpack ? tem.execute(secpack) : object_data
75
+ end
76
+ end
77
+
78
+ # Wrapper for the reduce SECpack.
79
+ class Reducer < JobPart
80
+ # Returns a SECpack for reducing two inputs coming from maps or other reduces.
81
+ def reduce_for_outputs(output1, output2)
82
+ new_secpack = Tem::SecPack.new_from_array @secpack.to_array
83
+
84
+ new_secpack.set_bytes :_output1, output1
85
+ new_secpack.set_bytes :_output2, output2
86
+ new_secpack
87
+ end
88
+
89
+ # Reduces two inputs coming from maps or other reduces.
90
+ def reduce_outputs(output1, output2, tem)
91
+ secpack = reduce_for_outputs output1, output2
92
+ tem.execute secpack
93
+ end
94
+ end
95
+
96
+ # Wrapper for the finalize SECpack.
97
+ class Finalizer < JobPart
98
+ # Converts a map/reduce output into the final result for the operation.
99
+ def finalize_output(output, tem)
100
+ return output unless @secpack
101
+ secpack = Tem::SecPack.new_from_array @finalize_secpack.to_array
102
+ secpack.set_bytes :_output, output
103
+ tem.execute secpack
104
+ end
105
+ end
106
+ end
107
+
108
+ end # namespace Tem::Mr::search
@@ -0,0 +1,169 @@
1
+ require 'rbtree'
2
+ require 'set'
3
+
4
+ # :nodoc: namespace
5
+ module Tem::Mr::Search
6
+
7
+ class MapReducePlanner
8
+ # Creates a planner for a Map-Reduce job.
9
+ #
10
+ # Arguments:
11
+ # job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
12
+ # num_items: how many data items does the Map-Reduce run over
13
+ # num_tems:: how many TEMs are available
14
+ # root_tem:: the index of the TEM that has the root mapper and reducer
15
+ def initialize(job, num_items, num_tems, root_tem)
16
+ @job = job
17
+ @root_tem = root_tem
18
+
19
+ @without = { :mapper => RBTree.new, :reducer => RBTree.new }
20
+ @with = { :mapper => Set.new([root_tem]),
21
+ :reducer => Set.new([root_tem]) }
22
+ @free_tems = RBTree.new
23
+ 0.upto(num_tems - 1) do |tem|
24
+ @free_tems[tem] = true
25
+ next if tem == root_tem
26
+ @without.each { |k, v| v[tem] = true }
27
+ end
28
+
29
+ @unmapped_items = (0...num_items).to_a.reverse
30
+ @reduce_queue = RBTree.new
31
+ @last_output_id = 0
32
+ @last_reduce_id = 2 * num_items - 2
33
+ @done_reducing, @output_id = false, nil
34
+ end
35
+
36
+ # Generates migrating actions for a SECpack type that are possible now.
37
+ def migrate_actions(sec_type)
38
+ actions = []
39
+ return actions if @without[sec_type].length == 0
40
+ free_tems = free_tems_with_sec sec_type
41
+ free_tems.each do |source_tem|
42
+ break if @without[sec_type].length == 0
43
+ target_tem = @without[sec_type].min.first
44
+ @without[sec_type].delete target_tem
45
+ @free_tems.delete source_tem
46
+ actions.push :action => :migrate, :secpack => sec_type,
47
+ :with => source_tem, :to => target_tem
48
+ end
49
+ actions
50
+ end
51
+ private :migrate_actions
52
+
53
+ # Informs the planner that a SECpack migration has completed.
54
+ def done_migrating(action)
55
+ @free_tems[action[:with]] = true
56
+ @with[action[:secpack]] << action[:to]
57
+ end
58
+ private :done_migrating
59
+
60
+ # A sorted array of the free TEMs that have a SECpack type.
61
+ def free_tems_with_sec(sec_type)
62
+ tems = []
63
+ @free_tems.each do |tem, true_value|
64
+ tems << tem if @with[sec_type].include? tem
65
+ end
66
+ tems
67
+ end
68
+
69
+ # A unique output_id.
70
+ def next_output_id
71
+ next_id = @last_output_id
72
+ @last_output_id += 1
73
+ next_id
74
+ end
75
+
76
+ # Generates mapping actions possible right now.
77
+ def map_actions
78
+ actions = []
79
+ return actions if @unmapped_items.empty?
80
+ free_tems_with_sec(:mapper).each do |tem|
81
+ break unless item = @unmapped_items.pop
82
+ @free_tems.delete tem
83
+ actions.push :action => :map, :item => item, :with => tem,
84
+ :output_id => next_output_id
85
+ end
86
+ actions
87
+ end
88
+ private :map_actions
89
+
90
+ # Informs the planner that a data mapping has completed.
91
+ def done_mapping(action)
92
+ @free_tems[action[:with]] = true
93
+ @reduce_queue[action[:output_id]] = true
94
+ end
95
+ private :done_mapping
96
+
97
+ # Generates reducing actions possible right now.
98
+ def reduce_actions
99
+ actions = []
100
+ return actions if @reduce_queue.length <= 1
101
+ free_tems_with_sec(:reducer).each do |tem|
102
+ break if @reduce_queue.length <= 1
103
+ output1_id, output2_id = *[0, 1].map do |i|
104
+ output_id = @reduce_queue.min.first
105
+ @reduce_queue.delete output_id
106
+ output_id
107
+ end
108
+ @free_tems.delete tem
109
+ actions.push :action => :reduce, :with => tem, :output1_id => output1_id,
110
+ :output2_id => output2_id, :output_id => next_output_id
111
+ end
112
+ actions
113
+ end
114
+ private :reduce_actions
115
+
116
+ # Informs the planner that a data reduction has completed.
117
+ def done_reducing(action)
118
+ @free_tems[action[:with]] = true
119
+ if action[:output_id] == @last_reduce_id
120
+ @done_reducing = true
121
+ return
122
+ end
123
+ @reduce_queue[action[:output_id]] = true
124
+ end
125
+ private :done_reducing
126
+
127
+ # Generates finalizing actions possible right now.
128
+ def finalize_actions
129
+ return [] unless @done_reducing and !@output_id and @free_tems[@root_tem]
130
+ @finalize_ready = false
131
+ return [ :action => :finalize, :with => @root_tem,
132
+ :output_id => @last_reduce_id, :final_id => next_output_id ]
133
+ end
134
+ private :finalize_actions
135
+
136
+ # Informs the planner that an action issued by next_action was done.
137
+ def done_finalizing(action)
138
+ @free_tems[action[:with]] = true
139
+ @output_id = action[:final_id]
140
+ end
141
+ private :done_finalizing
142
+
143
+ # True when the Map-Reduce job is complete.
144
+ def done?
145
+ !@output_id.nil?
146
+ end
147
+
148
+ # The output ID of the Map-Reduce's final result.
149
+ attr_reader :output_id
150
+
151
+ # Informs the planner that an action issued by next_actions was completed.
152
+ def action_done(action)
153
+ dispatch = { :migrate => :done_migrating, :map => :done_mapping, :reduce =>
154
+ :done_reducing, :finalize => :done_finalizing }
155
+ self.send dispatch[action[:action]], action
156
+ end
157
+
158
+ # Issues a set of actions that can be performed right now.
159
+ def next_actions!
160
+ actions = migrate_actions :mapper
161
+ actions += migrate_actions :reducer
162
+ actions += map_actions
163
+ actions += reduce_actions
164
+ actions += finalize_actions
165
+ actions
166
+ end
167
+ end
168
+
169
+ end # namespace Tem::Mr::search