tem_mr_search 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +1 -0
- data/LICENSE +21 -0
- data/Manifest +25 -0
- data/README +4 -0
- data/Rakefile +23 -0
- data/lib/tem_mr_search/client_query.rb +21 -0
- data/lib/tem_mr_search/db.rb +19 -0
- data/lib/tem_mr_search/map_reduce_executor.rb +114 -0
- data/lib/tem_mr_search/map_reduce_job.rb +67 -0
- data/lib/tem_mr_search/map_reduce_planner.rb +169 -0
- data/lib/tem_mr_search/query_builder.rb +167 -0
- data/lib/tem_mr_search.rb +17 -0
- data/tem_mr_search.gemspec +37 -0
- data/test/mr_test_case.rb +58 -0
- data/test/test_db.rb +10 -0
- data/test/test_map_reduce_executor.rb +37 -0
- data/test/test_map_reduce_job.rb +45 -0
- data/test/test_map_reduce_planner.rb +55 -0
- data/test/test_query_builder.rb +40 -0
- data/testdata/fares.yml +49 -0
- data/testdata/parallel_plan_431.yml +52 -0
- data/testdata/parallel_plan_740.yml +87 -0
- data/testdata/serial_plan_410.yml +36 -0
- data/testdata/serial_plan_431.yml +56 -0
- data/testdata/serial_plan_740.yml +93 -0
- metadata +116 -0
data/CHANGELOG
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
v0.1. Initial release.
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2009 Massachusetts Institute of Technology
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/Manifest
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
CHANGELOG
|
2
|
+
lib/tem_mr_search/client_query.rb
|
3
|
+
lib/tem_mr_search/db.rb
|
4
|
+
lib/tem_mr_search/map_reduce_executor.rb
|
5
|
+
lib/tem_mr_search/map_reduce_job.rb
|
6
|
+
lib/tem_mr_search/map_reduce_planner.rb
|
7
|
+
lib/tem_mr_search/query_builder.rb
|
8
|
+
lib/tem_mr_search.rb
|
9
|
+
LICENSE
|
10
|
+
Manifest
|
11
|
+
Rakefile
|
12
|
+
README
|
13
|
+
tem_mr_search.gemspec
|
14
|
+
test/mr_test_case.rb
|
15
|
+
test/test_db.rb
|
16
|
+
test/test_map_reduce_executor.rb
|
17
|
+
test/test_map_reduce_job.rb
|
18
|
+
test/test_map_reduce_planner.rb
|
19
|
+
test/test_query_builder.rb
|
20
|
+
testdata/fares.yml
|
21
|
+
testdata/parallel_plan_431.yml
|
22
|
+
testdata/parallel_plan_740.yml
|
23
|
+
testdata/serial_plan_410.yml
|
24
|
+
testdata/serial_plan_431.yml
|
25
|
+
testdata/serial_plan_740.yml
|
data/README
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'echoe'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('tem_mr_search') do |p|
|
6
|
+
p.project = 'tem' # rubyforge project
|
7
|
+
p.docs_host = "costan@rubyforge.org:/var/www/gforge-projects/tem/rdoc/"
|
8
|
+
|
9
|
+
p.author = 'Victor Costan'
|
10
|
+
p.email = 'victor@costan.us'
|
11
|
+
p.summary = 'Tem Map-Reduce proof of concept: database search.'
|
12
|
+
p.url = 'http://tem.rubyforge.org'
|
13
|
+
p.dependencies = ['tem_ruby >=0.11.2', 'tem_multi_proxy >=0.2']
|
14
|
+
|
15
|
+
p.need_tar_gz = !Platform.windows?
|
16
|
+
p.need_zip = !Platform.windows?
|
17
|
+
p.rdoc_pattern = /^(lib|bin|tasks|ext)|^BUILD|^README|^CHANGELOG|^TODO|^LICENSE|^COPYING$/
|
18
|
+
end
|
19
|
+
|
20
|
+
if $0 == __FILE__
|
21
|
+
Rake.application = Rake::Application.new
|
22
|
+
Rake.application.run
|
23
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# :nodoc: namespace
|
2
|
+
module Tem::Mr::Search
|
3
|
+
|
4
|
+
class ClientQuery < MapReduceJob
|
5
|
+
def initialize(attributes)
|
6
|
+
super
|
7
|
+
@query_key = attributes[:key]
|
8
|
+
end
|
9
|
+
|
10
|
+
# Unpacks a reduce output into its components.
|
11
|
+
#
|
12
|
+
# This is expected to be called with the encrypted output returned by the
|
13
|
+
# search provider.
|
14
|
+
def unpack_output(output)
|
15
|
+
# TODO(costan): decrypt output once we enable encryption
|
16
|
+
decrypted_output = output
|
17
|
+
unpack_decrypted_output decrypted_output
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end # namespace Tem::Mr::search
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# :nodoc: namespace
|
2
|
+
module Tem::Mr::Search
|
3
|
+
|
4
|
+
class Db
|
5
|
+
attr_reader :data
|
6
|
+
def initialize(path)
|
7
|
+
@data = File.open(path, 'r') { |f| YAML.load f }
|
8
|
+
end
|
9
|
+
|
10
|
+
def length
|
11
|
+
@data.length
|
12
|
+
end
|
13
|
+
|
14
|
+
def item(item_index)
|
15
|
+
@data[item_index]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end # namespace Tem::Mr::search
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
|
4
|
+
# :nodoc: namespace
|
5
|
+
module Tem::Mr::Search
|
6
|
+
|
7
|
+
class MapReduceExecutor
|
8
|
+
# Creates an executor for a Map-Reduce job.
|
9
|
+
#
|
10
|
+
# Arguments:
|
11
|
+
# root_job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
|
12
|
+
# db:: the database to run Map-Reduce over
|
13
|
+
# tems:: sessions to the available TEMs
|
14
|
+
# root_tem:: the index of the TEM that has the root mapper and reducer
|
15
|
+
# planner_class:: (optional) replacement for the default planner strategy
|
16
|
+
def initialize(root_job, db, tems, root_tem, planner_class = nil)
|
17
|
+
planner_class ||= MapReducePlanner
|
18
|
+
|
19
|
+
@db = db # Writable only in main thread.
|
20
|
+
@tems = tems # Writable only in main thread.
|
21
|
+
|
22
|
+
# Writable only in main thread.
|
23
|
+
@planner = planner_class.new @job, db.length, tems.length, root_tem
|
24
|
+
|
25
|
+
# Protected by @lock
|
26
|
+
@tem_jobs = { :mapper => { root_tem => root_job },
|
27
|
+
:reducer => { root_tem => root_job },
|
28
|
+
:finalizer => { root_tem => root_job } }
|
29
|
+
# Protected by @lock
|
30
|
+
@outputs = {}
|
31
|
+
|
32
|
+
# Thread-safe.
|
33
|
+
@thread_queues = tems.map { |tem| Queue.new }
|
34
|
+
@main_queue = Queue.new
|
35
|
+
@lock = Mutex.new
|
36
|
+
end
|
37
|
+
|
38
|
+
# Executes the job.
|
39
|
+
def execute
|
40
|
+
# Spawn TEM threads.
|
41
|
+
@tems.each_index { |i| Thread.new(i) { |i| executor_thread i } }
|
42
|
+
|
43
|
+
until @planner.done?
|
44
|
+
actions = @planner.next_actions!
|
45
|
+
@lock.synchronize do
|
46
|
+
actions.each { |action| @thread_queues[action[:with]] << action }
|
47
|
+
end
|
48
|
+
|
49
|
+
action = @main_queue.pop
|
50
|
+
@planner.action_done action
|
51
|
+
end
|
52
|
+
|
53
|
+
return @outputs[@planner.output_id]
|
54
|
+
end
|
55
|
+
|
56
|
+
# Main method for thread in charge of a TEM.
|
57
|
+
def executor_thread(tem_index)
|
58
|
+
queue = @thread_queues[tem_index]
|
59
|
+
while action = queue.pop
|
60
|
+
execute_action action, tem_index
|
61
|
+
@main_queue << action
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Executes a Map-Reduce planner action.
|
66
|
+
#
|
67
|
+
# This method is called on the thread corresponding to the TEM that the action
|
68
|
+
# is supposed to execute on.
|
69
|
+
def execute_action(action, tem_index)
|
70
|
+
case action[:action]
|
71
|
+
when :migrate
|
72
|
+
in_job = @lock.synchronize { @tem_jobs[action[:secpack]][tem_index] }
|
73
|
+
out_job = in_job # TODO(costan): actual migration
|
74
|
+
@lock.synchronize do
|
75
|
+
@tem_jobs[action[:secpack]][action[:to]] = out_job
|
76
|
+
end
|
77
|
+
|
78
|
+
when :map
|
79
|
+
job, item = nil, nil
|
80
|
+
@lock.synchronize do
|
81
|
+
job = @tem_jobs[:mapper][tem_index]
|
82
|
+
item = @db.item(action[:item])
|
83
|
+
end
|
84
|
+
output = job.map_object item, @tems[tem_index]
|
85
|
+
@lock.synchronize do
|
86
|
+
@outputs[action[:output_id]] = output
|
87
|
+
end
|
88
|
+
|
89
|
+
when :reduce
|
90
|
+
job, output1, output2 = nil, nil, nil
|
91
|
+
@lock.synchronize do
|
92
|
+
job = @tem_jobs[:reducer][tem_index]
|
93
|
+
output1 = @outputs[action[:output1_id]]
|
94
|
+
output2 = @outputs[action[:output2_id]]
|
95
|
+
end
|
96
|
+
output = job.reduce_outputs output1, output2, @tems[tem_index]
|
97
|
+
@lock.synchronize do
|
98
|
+
@outputs[action[:output_id]] = output
|
99
|
+
end
|
100
|
+
|
101
|
+
when :finalize
|
102
|
+
@lock.synchronize do
|
103
|
+
job = @tem_jobs[:finalizer][tem_index]
|
104
|
+
output = @outputs[action[:output_id]]
|
105
|
+
end
|
106
|
+
final_output = job.finalize_output output, @tems[tem_index]
|
107
|
+
@lock.synchronize do
|
108
|
+
@outputs[action[:final_id]] = final_output
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end # namespace Tem::Mr::Search
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# :nodoc: namespace
|
2
|
+
module Tem::Mr::Search
|
3
|
+
|
4
|
+
class MapReduceJob
|
5
|
+
attr_reader :map_secpack, :reduce_secpack, :attributes, :id_attribute
|
6
|
+
|
7
|
+
def initialize(attributes)
|
8
|
+
@map_secpack = attributes[:map]
|
9
|
+
@reduce_secpack = attributes[:reduce]
|
10
|
+
@finalize_secpack = attributes[:finalize]
|
11
|
+
@attributes = attributes[:attributes]
|
12
|
+
@id_attribute = attributes[:id_attribute]
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns a SECpack for mapping the given object data into the query.
|
16
|
+
def map_for_object(object_data)
|
17
|
+
return nil unless @map_secpack
|
18
|
+
object_id = object_data[id_attribute.to_s]
|
19
|
+
secpack = Tem::SecPack.new_from_array @map_secpack.to_array
|
20
|
+
secpack.set_bytes :_id, [object_id].pack('q').unpack('C*').reverse
|
21
|
+
attributes.each do |attribute|
|
22
|
+
name, type = attribute[:name], attribute[:type]
|
23
|
+
secpack.set_value name.to_sym, type, object_data[name.to_s]
|
24
|
+
end
|
25
|
+
secpack
|
26
|
+
end
|
27
|
+
|
28
|
+
# Maps the given object into the query.
|
29
|
+
def map_object(object_data, tem)
|
30
|
+
secpack = map_for_object object_data
|
31
|
+
secpack ? tem.execute(secpack) : object_data
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns a SECpack for reducing two inputs coming from maps or other reduces.
|
35
|
+
def reduce_for_outputs(output1, output2)
|
36
|
+
secpack = Tem::SecPack.new_from_array @reduce_secpack.to_array
|
37
|
+
|
38
|
+
secpack.set_bytes :_output1, output1
|
39
|
+
secpack.set_bytes :_output2, output2
|
40
|
+
secpack
|
41
|
+
end
|
42
|
+
|
43
|
+
# Reduces two inputs coming from maps or other reduces.
|
44
|
+
def reduce_outputs(output1, output2, tem)
|
45
|
+
secpack = reduce_for_outputs output1, output2
|
46
|
+
tem.execute secpack
|
47
|
+
end
|
48
|
+
|
49
|
+
# Converts a map/reduce output into the final result for the operation.
|
50
|
+
def finalize_output(output, tem)
|
51
|
+
return output unless @finalize_secpack
|
52
|
+
secpack = Tem::SecPack.new_from_array @finalize_secpack.to_array
|
53
|
+
secpack.set_bytes :_output, output
|
54
|
+
tem.execute secpack
|
55
|
+
end
|
56
|
+
|
57
|
+
# Unpacks a decrypted output into its components.
|
58
|
+
def unpack_decrypted_output(output)
|
59
|
+
{
|
60
|
+
:id => output[0, 8].reverse.pack('C*').unpack('q').first,
|
61
|
+
:score => Tem::Abi.read_tem_short(output, 8),
|
62
|
+
:check => output[13, 3]
|
63
|
+
}
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end # namespace Tem::Mr::search
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'rbtree'
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
# :nodoc: namespace
|
5
|
+
module Tem::Mr::Search
|
6
|
+
|
7
|
+
class MapReducePlanner
|
8
|
+
# Creates a planner for a Map-Reduce job.
|
9
|
+
#
|
10
|
+
# Arguments:
|
11
|
+
# job:: the Map-Reduce job (see Tem::Mr::Search::MapReduceJob)
|
12
|
+
# num_items: how many data items does the Map-Reduce run over
|
13
|
+
# num_tems:: how many TEMs are available
|
14
|
+
# root_tem:: the index of the TEM that has the root mapper and reducer
|
15
|
+
def initialize(job, num_items, num_tems, root_tem)
|
16
|
+
@job = job
|
17
|
+
@root_tem = root_tem
|
18
|
+
|
19
|
+
@without = { :mapper => RBTree.new, :reducer => RBTree.new }
|
20
|
+
@with = { :mapper => Set.new([root_tem]),
|
21
|
+
:reducer => Set.new([root_tem]) }
|
22
|
+
@free_tems = RBTree.new
|
23
|
+
0.upto(num_tems - 1) do |tem|
|
24
|
+
@free_tems[tem] = true
|
25
|
+
next if tem == root_tem
|
26
|
+
@without.each { |k, v| v[tem] = true }
|
27
|
+
end
|
28
|
+
|
29
|
+
@unmapped_items = (0...num_items).to_a.reverse
|
30
|
+
@reduce_queue = RBTree.new
|
31
|
+
@last_output_id = 0
|
32
|
+
@last_reduce_id = 2 * num_items - 2
|
33
|
+
@done_reducing, @output_id = false, nil
|
34
|
+
end
|
35
|
+
|
36
|
+
# Generates migrating actions for a SECpack type that are possible now.
|
37
|
+
def migrate_actions(sec_type)
|
38
|
+
actions = []
|
39
|
+
return actions if @without[sec_type].length == 0
|
40
|
+
free_tems = free_tems_with_sec sec_type
|
41
|
+
free_tems.each do |source_tem|
|
42
|
+
break if @without[sec_type].length == 0
|
43
|
+
target_tem = @without[sec_type].min.first
|
44
|
+
@without[sec_type].delete target_tem
|
45
|
+
@free_tems.delete source_tem
|
46
|
+
actions.push :action => :migrate, :secpack => sec_type,
|
47
|
+
:with => source_tem, :to => target_tem
|
48
|
+
end
|
49
|
+
actions
|
50
|
+
end
|
51
|
+
private :migrate_actions
|
52
|
+
|
53
|
+
# Informs the planner that a SECpack migration has completed.
|
54
|
+
def done_migrating(action)
|
55
|
+
@free_tems[action[:with]] = true
|
56
|
+
@with[action[:secpack]] << action[:to]
|
57
|
+
end
|
58
|
+
private :done_migrating
|
59
|
+
|
60
|
+
# A sorted array of the free TEMs that have a SECpack type.
|
61
|
+
def free_tems_with_sec(sec_type)
|
62
|
+
tems = []
|
63
|
+
@free_tems.each do |tem, true_value|
|
64
|
+
tems << tem if @with[sec_type].include? tem
|
65
|
+
end
|
66
|
+
tems
|
67
|
+
end
|
68
|
+
|
69
|
+
# A unique output_id.
|
70
|
+
def next_output_id
|
71
|
+
next_id = @last_output_id
|
72
|
+
@last_output_id += 1
|
73
|
+
next_id
|
74
|
+
end
|
75
|
+
|
76
|
+
# Generates mapping actions possible right now.
|
77
|
+
def map_actions
|
78
|
+
actions = []
|
79
|
+
return actions if @unmapped_items.empty?
|
80
|
+
free_tems_with_sec(:mapper).each do |tem|
|
81
|
+
break unless item = @unmapped_items.pop
|
82
|
+
@free_tems.delete tem
|
83
|
+
actions.push :action => :map, :item => item, :with => tem,
|
84
|
+
:output_id => next_output_id
|
85
|
+
end
|
86
|
+
actions
|
87
|
+
end
|
88
|
+
private :map_actions
|
89
|
+
|
90
|
+
# Informs the planner that a data mapping has completed.
|
91
|
+
def done_mapping(action)
|
92
|
+
@free_tems[action[:with]] = true
|
93
|
+
@reduce_queue[action[:output_id]] = true
|
94
|
+
end
|
95
|
+
private :done_mapping
|
96
|
+
|
97
|
+
# Generates reducing actions possible right now.
|
98
|
+
def reduce_actions
|
99
|
+
actions = []
|
100
|
+
return actions if @reduce_queue.length <= 1
|
101
|
+
free_tems_with_sec(:reducer).each do |tem|
|
102
|
+
break if @reduce_queue.length <= 1
|
103
|
+
output1_id, output2_id = *[0, 1].map do |i|
|
104
|
+
output_id = @reduce_queue.min.first
|
105
|
+
@reduce_queue.delete output_id
|
106
|
+
output_id
|
107
|
+
end
|
108
|
+
@free_tems.delete tem
|
109
|
+
actions.push :action => :reduce, :with => tem, :output1_id => output1_id,
|
110
|
+
:output2_id => output2_id, :output_id => next_output_id
|
111
|
+
end
|
112
|
+
actions
|
113
|
+
end
|
114
|
+
private :reduce_actions
|
115
|
+
|
116
|
+
# Informs the planner that a data reduction has completed.
|
117
|
+
def done_reducing(action)
|
118
|
+
@free_tems[action[:with]] = true
|
119
|
+
if action[:output_id] == @last_reduce_id
|
120
|
+
@done_reducing = true
|
121
|
+
return
|
122
|
+
end
|
123
|
+
@reduce_queue[action[:output_id]] = true
|
124
|
+
end
|
125
|
+
private :done_reducing
|
126
|
+
|
127
|
+
# Generates finalizing actions possible right now.
|
128
|
+
def finalize_actions
|
129
|
+
return [] unless @done_reducing and !@output_id and @free_tems[@root_tem]
|
130
|
+
@finalize_ready = false
|
131
|
+
return [ :action => :finalize, :with => @root_tem,
|
132
|
+
:output_id => @last_reduce_id, :final_id => next_output_id ]
|
133
|
+
end
|
134
|
+
private :finalize_actions
|
135
|
+
|
136
|
+
# Informs the planner that an action issued by next_action was done.
|
137
|
+
def done_finalizing(action)
|
138
|
+
@free_tems[action[:with]] = true
|
139
|
+
@output_id = action[:final_id]
|
140
|
+
end
|
141
|
+
private :done_finalizing
|
142
|
+
|
143
|
+
# True when the Map-Reduce job is complete.
|
144
|
+
def done?
|
145
|
+
!@output_id.nil?
|
146
|
+
end
|
147
|
+
|
148
|
+
# The output ID of the Map-Reduce's final result.
|
149
|
+
attr_reader :output_id
|
150
|
+
|
151
|
+
# Informs the planner that an action issued by next_actions was completed.
|
152
|
+
def action_done(action)
|
153
|
+
dispatch = { :migrate => :done_migrating, :map => :done_mapping, :reduce =>
|
154
|
+
:done_reducing, :finalize => :done_finalizing }
|
155
|
+
self.send dispatch[action[:action]], action
|
156
|
+
end
|
157
|
+
|
158
|
+
# Issues a set of actions that can be performed right now.
|
159
|
+
def next_actions!
|
160
|
+
actions = migrate_actions :mapper
|
161
|
+
actions += migrate_actions :reducer
|
162
|
+
actions += map_actions
|
163
|
+
actions += reduce_actions
|
164
|
+
actions += finalize_actions
|
165
|
+
actions
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end # namespace Tem::Mr::search
|
@@ -0,0 +1,167 @@
|
|
1
|
+
# :nodoc: namespace
|
2
|
+
module Tem::Mr::Search
|
3
|
+
|
4
|
+
class QueryBuilder
|
5
|
+
# Build a Query.
|
6
|
+
def self.query
|
7
|
+
builder = self.new
|
8
|
+
yield builder
|
9
|
+
builder.query
|
10
|
+
end
|
11
|
+
|
12
|
+
# Defines the object attributes imported into the map method.
|
13
|
+
def attributes(attributes)
|
14
|
+
@attributes = attributes.to_a.map do |k, v|
|
15
|
+
{ :name => k,
|
16
|
+
:type => v,
|
17
|
+
:length => Tem::Abi.send(:"#{v}_length")
|
18
|
+
}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Defines the object attribute that's used as an object ID.
|
23
|
+
def id_attribute(id_attribute)
|
24
|
+
@id_attribute = id_attribute.to_sym
|
25
|
+
end
|
26
|
+
|
27
|
+
# Defines the query's map procedure.
|
28
|
+
def map
|
29
|
+
@map_secpack = Tem::Assembler.assemble do |s|
|
30
|
+
s.label :_secret
|
31
|
+
s.label :_key
|
32
|
+
s.zeros :tem_ubyte, 16
|
33
|
+
s.label :_check_bytes
|
34
|
+
s.data :tem_ubyte, @check_bytes
|
35
|
+
|
36
|
+
# User-provided ranking procedure (secret).
|
37
|
+
s.label :_ranking
|
38
|
+
yield s
|
39
|
+
s.ret
|
40
|
+
|
41
|
+
s.entry
|
42
|
+
s.ldbc 16
|
43
|
+
s.outnew
|
44
|
+
s.call :_ranking
|
45
|
+
s.ldbc 3
|
46
|
+
s.ldwc :_nonce
|
47
|
+
s.rnd
|
48
|
+
s.mcfxb :from => :_check_bytes, :to => :_check, :size => 3
|
49
|
+
# TODO(costan): encryption instead of plain dump
|
50
|
+
s.outfxb :from => :_id, :size => 16
|
51
|
+
s.halt
|
52
|
+
|
53
|
+
s.label :_plain
|
54
|
+
|
55
|
+
# Make room for query attributes.
|
56
|
+
@attributes.each do |attribute|
|
57
|
+
s.label attribute[:name]
|
58
|
+
s.zeros attribute[:type], 1
|
59
|
+
end
|
60
|
+
# Object ID.
|
61
|
+
s.label :_id
|
62
|
+
s.zeros :tem_ubyte, 8
|
63
|
+
# Object score.
|
64
|
+
s.label :score
|
65
|
+
s.zeros :tem_short, 1
|
66
|
+
# Random nonce to prevent matching map outputs.
|
67
|
+
s.label :_nonce
|
68
|
+
s.zeros :tem_ubyte, 3
|
69
|
+
# Check bytes to prevent malicious input corruption.
|
70
|
+
s.label :_check
|
71
|
+
s.zeros :tem_ubyte, 3
|
72
|
+
|
73
|
+
s.stack 64
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Defines the query's reduce procedure.
|
78
|
+
def reduce
|
79
|
+
@reduce_secpack = Tem::Assembler.assemble do |s|
|
80
|
+
s.label :_secret
|
81
|
+
s.label :_key
|
82
|
+
s.zeros :tem_ubyte, 16
|
83
|
+
s.label :_check
|
84
|
+
s.data :tem_ubyte, @check_bytes
|
85
|
+
|
86
|
+
s.label :_signed
|
87
|
+
# User-provided comparison procedure (signed).
|
88
|
+
s.label :_comparison_proc
|
89
|
+
yield s
|
90
|
+
s.ret
|
91
|
+
|
92
|
+
s.entry
|
93
|
+
s.ldbc 16
|
94
|
+
s.outnew
|
95
|
+
# Decode inputs.
|
96
|
+
[1, 2].each do |i|
|
97
|
+
# TODO(costan): decrypt instead of copying
|
98
|
+
s.mcfxb :from => :"_output#{i}", :to => :"_id#{i}", :size => 16
|
99
|
+
|
100
|
+
# Compare the check bytes and abort if the inputs were tampered with.
|
101
|
+
s.mcmpfxb :op1 => :"_check#{i}", :op2 => :"_check", :size => 3
|
102
|
+
s.jz :"_check_#{i}_ok"
|
103
|
+
s.halt
|
104
|
+
s.label :"_check_#{i}_ok"
|
105
|
+
end
|
106
|
+
|
107
|
+
# Compare and output.
|
108
|
+
s.call :_comparison_proc
|
109
|
+
s.ldw :comparison
|
110
|
+
s.jae :_output1_wins
|
111
|
+
s.mcfxb :from => :_id2, :to => :_id1, :size => 16
|
112
|
+
s.jmp :_output
|
113
|
+
s.label :_output1_wins
|
114
|
+
# Still do a memcpy, to prevent timing attacks.
|
115
|
+
s.mcfxb :from => :_id2, :to => :_id2, :size => 16
|
116
|
+
s.jmp :_output
|
117
|
+
# Refresh the nonce to prevent learning about the comparison criteria.
|
118
|
+
s.label :_output
|
119
|
+
s.ldbc 3
|
120
|
+
s.ldwc :_nonce1
|
121
|
+
s.rnd
|
122
|
+
# TODO(costan): encrypt instead of copying
|
123
|
+
s.outfxb :from => :_id1, :size => 16
|
124
|
+
s.halt
|
125
|
+
|
126
|
+
s.label :_plain
|
127
|
+
# The comparison result produced by the user comparison procedure.
|
128
|
+
s.label :comparison
|
129
|
+
s.zeros :tem_short, 1
|
130
|
+
|
131
|
+
# The two inputs to reduce.
|
132
|
+
[1, 2].each do |i|
|
133
|
+
# Encrypted map/reduce output.
|
134
|
+
s.label :"_output#{i}"
|
135
|
+
s.zeros :tem_ubyte, 16
|
136
|
+
# Unencrypted input (decrypted inside TEM).
|
137
|
+
s.label :"_id#{i}"
|
138
|
+
s.zeros :tem_ubyte, 8
|
139
|
+
s.label :"score#{i}"
|
140
|
+
s.zeros :tem_short, 1
|
141
|
+
s.label :"_nonce#{i}"
|
142
|
+
s.zeros :tem_ubyte, 3
|
143
|
+
s.label :"_check#{i}"
|
144
|
+
s.zeros :tem_ubyte, 3
|
145
|
+
end
|
146
|
+
s.stack 8
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def query
|
151
|
+
raise "Map procedure not specified" unless @map_secpack
|
152
|
+
raise "Reduce procedure not specified" unless @reduce_secpack
|
153
|
+
raise "ID attribute not specified" unless @id_attribute
|
154
|
+
|
155
|
+
ClientQuery.new :key => @query_key, :attributes => @attributes,
|
156
|
+
:map => @map_secpack, :reduce => @reduce_secpack,
|
157
|
+
:id_attribute => @id_attribute
|
158
|
+
end
|
159
|
+
|
160
|
+
def initialize
|
161
|
+
@check_bytes = [1, 2, 3]
|
162
|
+
# TODO(costan): generate query key
|
163
|
+
@query_key = nil
|
164
|
+
end
|
165
|
+
end # class QueryBuilder
|
166
|
+
|
167
|
+
end # namespace Tem::Mr::Search
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'tem_multi_proxy'
|
3
|
+
require 'tem_ruby'
|
4
|
+
|
5
|
+
# :nodoc: namespace
|
6
|
+
module Tem::Mr
|
7
|
+
end
|
8
|
+
# :nodoc: namespace
|
9
|
+
module Tem::Mr::Search
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'tem_mr_search/db.rb'
|
13
|
+
require 'tem_mr_search/map_reduce_executor.rb'
|
14
|
+
require 'tem_mr_search/map_reduce_job.rb'
|
15
|
+
require 'tem_mr_search/map_reduce_planner.rb'
|
16
|
+
require 'tem_mr_search/query_builder.rb'
|
17
|
+
require 'tem_mr_search/client_query.rb'
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{tem_mr_search}
|
5
|
+
s.version = "0.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Victor Costan"]
|
9
|
+
s.date = %q{2009-06-02}
|
10
|
+
s.description = %q{Tem Map-Reduce proof of concept: database search.}
|
11
|
+
s.email = %q{victor@costan.us}
|
12
|
+
s.extra_rdoc_files = ["CHANGELOG", "lib/tem_mr_search/client_query.rb", "lib/tem_mr_search/db.rb", "lib/tem_mr_search/map_reduce_executor.rb", "lib/tem_mr_search/map_reduce_job.rb", "lib/tem_mr_search/map_reduce_planner.rb", "lib/tem_mr_search/query_builder.rb", "lib/tem_mr_search.rb", "LICENSE", "README"]
|
13
|
+
s.files = ["CHANGELOG", "lib/tem_mr_search/client_query.rb", "lib/tem_mr_search/db.rb", "lib/tem_mr_search/map_reduce_executor.rb", "lib/tem_mr_search/map_reduce_job.rb", "lib/tem_mr_search/map_reduce_planner.rb", "lib/tem_mr_search/query_builder.rb", "lib/tem_mr_search.rb", "LICENSE", "Manifest", "Rakefile", "README", "tem_mr_search.gemspec", "test/mr_test_case.rb", "test/test_db.rb", "test/test_map_reduce_executor.rb", "test/test_map_reduce_job.rb", "test/test_map_reduce_planner.rb", "test/test_query_builder.rb", "testdata/fares.yml", "testdata/parallel_plan_431.yml", "testdata/parallel_plan_740.yml", "testdata/serial_plan_410.yml", "testdata/serial_plan_431.yml", "testdata/serial_plan_740.yml"]
|
14
|
+
s.homepage = %q{http://tem.rubyforge.org}
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Tem_mr_search", "--main", "README"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = %q{tem}
|
18
|
+
s.rubygems_version = %q{1.3.4}
|
19
|
+
s.summary = %q{Tem Map-Reduce proof of concept: database search.}
|
20
|
+
s.test_files = ["test/test_db.rb", "test/test_map_reduce_executor.rb", "test/test_map_reduce_job.rb", "test/test_map_reduce_planner.rb", "test/test_query_builder.rb"]
|
21
|
+
|
22
|
+
if s.respond_to? :specification_version then
|
23
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
24
|
+
s.specification_version = 3
|
25
|
+
|
26
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
27
|
+
s.add_runtime_dependency(%q<tem_ruby>, [">= 0.11.2"])
|
28
|
+
s.add_runtime_dependency(%q<tem_multi_proxy>, [">= 0.2"])
|
29
|
+
else
|
30
|
+
s.add_dependency(%q<tem_ruby>, [">= 0.11.2"])
|
31
|
+
s.add_dependency(%q<tem_multi_proxy>, [">= 0.2"])
|
32
|
+
end
|
33
|
+
else
|
34
|
+
s.add_dependency(%q<tem_ruby>, [">= 0.11.2"])
|
35
|
+
s.add_dependency(%q<tem_multi_proxy>, [">= 0.2"])
|
36
|
+
end
|
37
|
+
end
|