costan-tem_mr_search 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG ADDED
@@ -0,0 +1,5 @@
1
+ v0.2.1. Check for cluster configuration on each query.
2
+
3
+ v0.2. Implemented remote queries.
4
+
5
+ v0.1. Initial release.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2009 Massachusetts Institute of Technology
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/Manifest ADDED
@@ -0,0 +1,32 @@
1
+ bin/tem_mr_search_server
2
+ CHANGELOG
3
+ lib/tem_mr_search/client.rb
4
+ lib/tem_mr_search/client_query.rb
5
+ lib/tem_mr_search/db.rb
6
+ lib/tem_mr_search/map_reduce_executor.rb
7
+ lib/tem_mr_search/map_reduce_job.rb
8
+ lib/tem_mr_search/map_reduce_planner.rb
9
+ lib/tem_mr_search/query_builder.rb
10
+ lib/tem_mr_search/server.rb
11
+ lib/tem_mr_search/web_client_query_builder.rb
12
+ lib/tem_mr_search.rb
13
+ LICENSE
14
+ Manifest
15
+ Rakefile
16
+ README
17
+ tem_mr_search.gemspec
18
+ test/mr_test_case.rb
19
+ test/test_client_server.rb
20
+ test/test_db.rb
21
+ test/test_map_reduce_executor.rb
22
+ test/test_map_reduce_job.rb
23
+ test/test_map_reduce_planner.rb
24
+ test/test_query_builders.rb
25
+ testdata/cluster.yml
26
+ testdata/empty_cluster.yml
27
+ testdata/fares.yml
28
+ testdata/parallel_plan_431.yml
29
+ testdata/parallel_plan_740.yml
30
+ testdata/serial_plan_410.yml
31
+ testdata/serial_plan_431.yml
32
+ testdata/serial_plan_740.yml
data/README ADDED
@@ -0,0 +1,4 @@
1
+ This is a concept proof of a Map-Reduce on a cluster of Trusted Execution
2
+ Modules (TEMs). The Map-Reduce cluster performs queries consisting of finding
3
+ the best item in a homogenous database using secret ranking criteria.
4
+
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ gem 'echoe'
3
+ require 'echoe'
4
+
5
+ Echoe.new('tem_mr_search') do |p|
6
+ p.project = 'tem' # rubyforge project
7
+ p.docs_host = "costan@rubyforge.org:/var/www/gforge-projects/tem/rdoc/"
8
+
9
+ p.author = 'Victor Costan'
10
+ p.email = 'victor@costan.us'
11
+ p.summary = 'Tem Map-Reduce proof of concept: database search.'
12
+ p.url = 'http://tem.rubyforge.org'
13
+ p.dependencies = ['tem_ruby >=0.11.2', 'tem_multi_proxy >=0.2']
14
+
15
+ p.need_tar_gz = !Platform.windows?
16
+ p.need_zip = !Platform.windows?
17
+ p.rdoc_pattern = /^(lib|bin|tasks|ext)|^BUILD|^README|^CHANGELOG|^TODO|^LICENSE|^COPYING$/
18
+ end
19
+
20
+ if $0 == __FILE__
21
+ Rake.application = Rake::Application.new
22
+ Rake.application.run
23
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'tem_mr_search'
5
+
6
+ if ARGV.length < 2 || ARGV.length >= 3
7
+ print "Usage: #{$0} db_file cluster_file [server_port]\n"
8
+ exit
9
+ end
10
+
11
+ db_file, cluster_file, server_port = ARGV[0], ARGV[1], ARGV[2]
12
+ server = Tem::Mr::Search::Server.new db_file, cluster_file, server_port
13
+ server.serve_loop
@@ -0,0 +1,52 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class Client
5
+ OP = Zerg::Support::Protocols::ObjectProtocol
6
+ OPAdapter = Zerg::Support::Sockets::ProtocolAdapter.adapter_module OP
7
+
8
+ # Performs a private database search using a Map-Reduce.
9
+ def self.search(server_addr, client_query)
10
+ output = issue_request server_addr, :type => :search, :root_tem => 0,
11
+ :map_reduce => client_query.to_hash
12
+ output ? client_query.unpack_output(output) : nil
13
+ end
14
+
15
+ # Asks for an item in the server's database.
16
+ #
17
+ # In production, there should be per-client rate-limiting on this request.
18
+ def self.fetch_item(server_addr, item_id)
19
+ issue_request server_addr, :type => :fetch, :id => item_id
20
+ end
21
+
22
+ # Terminates the server.
23
+ #
24
+ # In production, normal clients wouldn't have access to this.
25
+ def self.shutdown_server(server_addr)
26
+ issue_request server_addr, :type => :shutdown
27
+ end
28
+
29
+ # Dumps the server database.
30
+ #
31
+ # In production, normal clients wouldn't have access to this.
32
+ def self.dump_database(server_addr)
33
+ issue_request server_addr, :type => :db_dump
34
+ end
35
+
36
+ # Issues a request against a Map-Reduce server and returns the response.
37
+ def self.issue_request(server_addr, request)
38
+ socket = Zerg::Support::SocketFactory.socket :out_addr => server_addr,
39
+ :out_port => Server::DEFAULT_PORT
40
+ socket.extend OPAdapter
41
+ begin
42
+ socket.send_object request
43
+ response = socket.recv_object response
44
+ rescue
45
+ response = nil
46
+ end
47
+ socket.close rescue nil
48
+ response
49
+ end
50
+ end
51
+
52
+ end # namespace Tem::Mr::Search
@@ -0,0 +1,21 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class ClientQuery < MapReduceJob
5
+ def initialize(attributes)
6
+ super
7
+ @query_key = attributes[:key]
8
+ end
9
+
10
+ # Unpacks a reduce output into its components.
11
+ #
12
+ # This is expected to be called with the encrypted output returned by the
13
+ # search provider.
14
+ def unpack_output(output)
15
+ # TODO(costan): decrypt output once we enable encryption
16
+ decrypted_output = output
17
+ unpack_decrypted_output decrypted_output
18
+ end
19
+ end
20
+
21
+ end # namespace Tem::Mr::search
@@ -0,0 +1,26 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class Db
5
+ attr_reader :data
6
+ attr_reader :id_attribute
7
+
8
+ def initialize(path)
9
+ @data = File.open(path, 'r') { |f| YAML.load f }
10
+ @id_attribute = 'flight'
11
+ end
12
+
13
+ def length
14
+ @data.length
15
+ end
16
+
17
+ def item(item_index)
18
+ @data[item_index]
19
+ end
20
+
21
+ def item_by_id(item_id)
22
+ @data.find { |item| item[@id_attribute] == item_id }
23
+ end
24
+ end
25
+
26
+ end # namespace Tem::Mr::search
@@ -0,0 +1,115 @@
1
+ require 'thread'
2
+
3
+
4
+ # :nodoc: namespace
5
+ module Tem::Mr::Search
6
+
7
+ class MapReduceExecutor
8
+ # Creates an executor for a Map-Reduce job.
9
+ #
10
+ # Arguments:
11
+ # job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
12
+ # db:: the database to run Map-Reduce over
13
+ # tems:: sessions to the available TEMs
14
+ # root_tem:: the index of the TEM that has the root mapper and reducer
15
+ # planner_class:: (optional) replacement for the default planner strategy
16
+ def initialize(job, db, tems, root_tem, planner_class = nil)
17
+ planner_class ||= MapReducePlanner
18
+
19
+ @db = db # Writable only in main thread.
20
+ @tems = tems # Writable only in main thread.
21
+
22
+ # Writable only in main thread.
23
+ @planner = planner_class.new @job, db.length, tems.length, root_tem
24
+
25
+ # Protected by @lock
26
+ @tem_parts = { :mapper => { root_tem => job.mapper },
27
+ :reducer => { root_tem => job.reducer },
28
+ :finalizer => { root_tem => job.finalizer } }
29
+ # Protected by @lock
30
+ @outputs = {}
31
+
32
+ # Thread-safe.
33
+ @thread_queues = tems.map { |tem| Queue.new }
34
+ @main_queue = Queue.new
35
+ @lock = Mutex.new
36
+ end
37
+
38
+ # Executes the job.
39
+ def execute
40
+ # Spawn TEM threads.
41
+ @tems.each_index { |i| Thread.new(i) { |i| executor_thread i } }
42
+
43
+ until @planner.done?
44
+ actions = @planner.next_actions!
45
+ @lock.synchronize do
46
+ actions.each { |action| @thread_queues[action[:with]] << action }
47
+ end
48
+
49
+ action = @main_queue.pop
50
+ @planner.action_done action
51
+ end
52
+
53
+ return @outputs[@planner.output_id]
54
+ end
55
+
56
+ # Main method for thread in charge of a TEM.
57
+ def executor_thread(tem_index)
58
+ queue = @thread_queues[tem_index]
59
+ while action = queue.pop
60
+ execute_action action, tem_index
61
+ @main_queue << action
62
+ end
63
+ end
64
+
65
+ # Executes a Map-Reduce planner action.
66
+ #
67
+ # This method is called on the thread corresponding to the TEM that the action
68
+ # is supposed to execute on.
69
+ def execute_action(action, tem_index)
70
+ case action[:action]
71
+ when :migrate
72
+ in_part = @lock.synchronize { @tem_parts[action[:secpack]][tem_index] }
73
+ out_part = in_part # TODO(costan): actual migration
74
+ @lock.synchronize do
75
+ @tem_parts[action[:secpack]][action[:to]] = out_part
76
+ end
77
+
78
+ when :map
79
+ mapper, item = nil, nil
80
+ @lock.synchronize do
81
+ mapper = @tem_parts[:mapper][tem_index]
82
+ item = @db.item(action[:item])
83
+ end
84
+ output = mapper.map_object item, @tems[tem_index]
85
+ @lock.synchronize do
86
+ @outputs[action[:output_id]] = output
87
+ end
88
+
89
+ when :reduce
90
+ reducer, output1, output2 = nil, nil, nil
91
+ @lock.synchronize do
92
+ reducer = @tem_parts[:reducer][tem_index]
93
+ output1 = @outputs[action[:output1_id]]
94
+ output2 = @outputs[action[:output2_id]]
95
+ end
96
+ output = reducer.reduce_outputs output1, output2, @tems[tem_index]
97
+ @lock.synchronize do
98
+ @outputs[action[:output_id]] = output
99
+ end
100
+
101
+ when :finalize
102
+ finalizer = nil
103
+ @lock.synchronize do
104
+ finalizer = @tem_parts[:finalizer][tem_index]
105
+ output = @outputs[action[:output_id]]
106
+ end
107
+ final_output = finalizer.finalize_output output, @tems[tem_index]
108
+ @lock.synchronize do
109
+ @outputs[action[:final_id]] = final_output
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ end # namespace Tem::Mr::Search
@@ -0,0 +1,108 @@
1
+ # :nodoc: namespace
2
+ module Tem::Mr::Search
3
+
4
+ class MapReduceJob
5
+ attr_reader :mapper, :reducer, :finalizer, :attributes, :id_attribute
6
+
7
+ def initialize(attributes)
8
+ @attributes = attributes[:attributes]
9
+ @id_attribute = attributes[:id_attribute]
10
+
11
+ @mapper = Mapper.new attributes[:map], self
12
+ @reducer = Reducer.new attributes[:reduce], self
13
+ @finalizer = Finalizer.new attributes[:finalize], self
14
+ end
15
+
16
+ # Unpacks a decrypted output into its components.
17
+ def unpack_decrypted_output(output)
18
+ {
19
+ :id => output[0, 8].reverse.pack('C*').unpack('q').first,
20
+ :score => Tem::Abi.read_tem_short(output, 8),
21
+ :check => output[13, 3]
22
+ }
23
+ end
24
+
25
+ # Serializes a job to a hash.
26
+ #
27
+ # Useful in conjunction with ObjectProtocol in ZergSupport, for sending jobs
28
+ # across the wire. De-serialize with MapReduceJob#new
29
+ def to_hash
30
+ { :attributes => @attributes, :id_attribute => @id_attribute,
31
+ :map => @mapper.to_plain_object, :reduce => @reducer.to_plain_object,
32
+ :finalize => @finalizer.to_plain_object }
33
+ end
34
+
35
+ # Base class for the Map-Reduce SECpack wrappers.
36
+ class JobPart
37
+ def initialize(secpack, job)
38
+ unless secpack.nil? or secpack.kind_of? Tem::SecPack
39
+ secpack = Tem::SecPack.new_from_array secpack
40
+ end
41
+ @secpack = secpack
42
+ end
43
+
44
+ def to_plain_object
45
+ return nil if @secpack.nil?
46
+ @secpack.to_array
47
+ end
48
+ end
49
+
50
+ # Wrapper for the map SECpack.
51
+ class Mapper < JobPart
52
+ def initialize(secpack, job)
53
+ super
54
+ @attributes = job.attributes
55
+ @id_attribute = job.id_attribute
56
+ end
57
+
58
+ # Returns a SECpack for mapping the given object data into the query.
59
+ def map_for_object(object_data)
60
+ return nil unless @secpack
61
+ object_id = object_data[@id_attribute.to_s]
62
+ new_secpack = Tem::SecPack.new_from_array @secpack.to_array
63
+ new_secpack.set_bytes :_id, [object_id].pack('q').unpack('C*').reverse
64
+ @attributes.each do |attribute|
65
+ name, type = attribute[:name], attribute[:type]
66
+ new_secpack.set_value name.to_sym, type, object_data[name.to_s]
67
+ end
68
+ new_secpack
69
+ end
70
+
71
+ # Maps the given object into the query.
72
+ def map_object(object_data, tem)
73
+ secpack = map_for_object object_data
74
+ secpack ? tem.execute(secpack) : object_data
75
+ end
76
+ end
77
+
78
+ # Wrapper for the reduce SECpack.
79
+ class Reducer < JobPart
80
+ # Returns a SECpack for reducing two inputs coming from maps or other reduces.
81
+ def reduce_for_outputs(output1, output2)
82
+ new_secpack = Tem::SecPack.new_from_array @secpack.to_array
83
+
84
+ new_secpack.set_bytes :_output1, output1
85
+ new_secpack.set_bytes :_output2, output2
86
+ new_secpack
87
+ end
88
+
89
+ # Reduces two inputs coming from maps or other reduces.
90
+ def reduce_outputs(output1, output2, tem)
91
+ secpack = reduce_for_outputs output1, output2
92
+ tem.execute secpack
93
+ end
94
+ end
95
+
96
+ # Wrapper for the finalize SECpack.
97
+ class Finalizer < JobPart
98
+ # Converts a map/reduce output into the final result for the operation.
99
+ def finalize_output(output, tem)
100
+ return output unless @secpack
101
+ secpack = Tem::SecPack.new_from_array @finalize_secpack.to_array
102
+ secpack.set_bytes :_output, output
103
+ tem.execute secpack
104
+ end
105
+ end
106
+ end
107
+
108
+ end # namespace Tem::Mr::search
@@ -0,0 +1,169 @@
1
+ require 'rbtree'
2
+ require 'set'
3
+
4
+ # :nodoc: namespace
5
+ module Tem::Mr::Search
6
+
7
+ class MapReducePlanner
8
+ # Creates a planner for a Map-Reduce job.
9
+ #
10
+ # Arguments:
11
+ # job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
12
+ # num_items: how many data items does the Map-Reduce run over
13
+ # num_tems:: how many TEMs are available
14
+ # root_tem:: the index of the TEM that has the root mapper and reducer
15
+ def initialize(job, num_items, num_tems, root_tem)
16
+ @job = job
17
+ @root_tem = root_tem
18
+
19
+ @without = { :mapper => RBTree.new, :reducer => RBTree.new }
20
+ @with = { :mapper => Set.new([root_tem]),
21
+ :reducer => Set.new([root_tem]) }
22
+ @free_tems = RBTree.new
23
+ 0.upto(num_tems - 1) do |tem|
24
+ @free_tems[tem] = true
25
+ next if tem == root_tem
26
+ @without.each { |k, v| v[tem] = true }
27
+ end
28
+
29
+ @unmapped_items = (0...num_items).to_a.reverse
30
+ @reduce_queue = RBTree.new
31
+ @last_output_id = 0
32
+ @last_reduce_id = 2 * num_items - 2
33
+ @done_reducing, @output_id = false, nil
34
+ end
35
+
36
+ # Generates migrating actions for a SECpack type that are possible now.
37
+ def migrate_actions(sec_type)
38
+ actions = []
39
+ return actions if @without[sec_type].length == 0
40
+ free_tems = free_tems_with_sec sec_type
41
+ free_tems.each do |source_tem|
42
+ break if @without[sec_type].length == 0
43
+ target_tem = @without[sec_type].min.first
44
+ @without[sec_type].delete target_tem
45
+ @free_tems.delete source_tem
46
+ actions.push :action => :migrate, :secpack => sec_type,
47
+ :with => source_tem, :to => target_tem
48
+ end
49
+ actions
50
+ end
51
+ private :migrate_actions
52
+
53
+ # Informs the planner that a SECpack migration has completed.
54
+ def done_migrating(action)
55
+ @free_tems[action[:with]] = true
56
+ @with[action[:secpack]] << action[:to]
57
+ end
58
+ private :done_migrating
59
+
60
+ # A sorted array of the free TEMs that have a SECpack type.
61
+ def free_tems_with_sec(sec_type)
62
+ tems = []
63
+ @free_tems.each do |tem, true_value|
64
+ tems << tem if @with[sec_type].include? tem
65
+ end
66
+ tems
67
+ end
68
+
69
+ # A unique output_id.
70
+ def next_output_id
71
+ next_id = @last_output_id
72
+ @last_output_id += 1
73
+ next_id
74
+ end
75
+
76
+ # Generates mapping actions possible right now.
77
+ def map_actions
78
+ actions = []
79
+ return actions if @unmapped_items.empty?
80
+ free_tems_with_sec(:mapper).each do |tem|
81
+ break unless item = @unmapped_items.pop
82
+ @free_tems.delete tem
83
+ actions.push :action => :map, :item => item, :with => tem,
84
+ :output_id => next_output_id
85
+ end
86
+ actions
87
+ end
88
+ private :map_actions
89
+
90
+ # Informs the planner that a data mapping has completed.
91
+ def done_mapping(action)
92
+ @free_tems[action[:with]] = true
93
+ @reduce_queue[action[:output_id]] = true
94
+ end
95
+ private :done_mapping
96
+
97
+ # Generates reducing actions possible right now.
98
+ def reduce_actions
99
+ actions = []
100
+ return actions if @reduce_queue.length <= 1
101
+ free_tems_with_sec(:reducer).each do |tem|
102
+ break if @reduce_queue.length <= 1
103
+ output1_id, output2_id = *[0, 1].map do |i|
104
+ output_id = @reduce_queue.min.first
105
+ @reduce_queue.delete output_id
106
+ output_id
107
+ end
108
+ @free_tems.delete tem
109
+ actions.push :action => :reduce, :with => tem, :output1_id => output1_id,
110
+ :output2_id => output2_id, :output_id => next_output_id
111
+ end
112
+ actions
113
+ end
114
+ private :reduce_actions
115
+
116
+ # Informs the planner that a data reduction has completed.
117
+ def done_reducing(action)
118
+ @free_tems[action[:with]] = true
119
+ if action[:output_id] == @last_reduce_id
120
+ @done_reducing = true
121
+ return
122
+ end
123
+ @reduce_queue[action[:output_id]] = true
124
+ end
125
+ private :done_reducing
126
+
127
+ # Generates finalizing actions possible right now.
128
+ def finalize_actions
129
+ return [] unless @done_reducing and !@output_id and @free_tems[@root_tem]
130
+ @finalize_ready = false
131
+ return [ :action => :finalize, :with => @root_tem,
132
+ :output_id => @last_reduce_id, :final_id => next_output_id ]
133
+ end
134
+ private :finalize_actions
135
+
136
+ # Informs the planner that an action issued by next_action was done.
137
+ def done_finalizing(action)
138
+ @free_tems[action[:with]] = true
139
+ @output_id = action[:final_id]
140
+ end
141
+ private :done_finalizing
142
+
143
+ # True when the Map-Reduce job is complete.
144
+ def done?
145
+ !@output_id.nil?
146
+ end
147
+
148
+ # The output ID of the Map-Reduce's final result.
149
+ attr_reader :output_id
150
+
151
+ # Informs the planner that an action issued by next_actions was completed.
152
+ def action_done(action)
153
+ dispatch = { :migrate => :done_migrating, :map => :done_mapping, :reduce =>
154
+ :done_reducing, :finalize => :done_finalizing }
155
+ self.send dispatch[action[:action]], action
156
+ end
157
+
158
+ # Issues a set of actions that can be performed right now.
159
+ def next_actions!
160
+ actions = migrate_actions :mapper
161
+ actions += migrate_actions :reducer
162
+ actions += map_actions
163
+ actions += reduce_actions
164
+ actions += finalize_actions
165
+ actions
166
+ end
167
+ end
168
+
169
+ end # namespace Tem::Mr::search