lorax 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,108 @@
1
+ module Lorax
2
+ class FastMatcher
3
+ attr_accessor :match_set
4
+
5
+ def initialize(doc1, doc2, dependency_injection={})
6
+ @document1 = doc1
7
+ @document2 = doc2
8
+ @match_set = dependency_injection[:matcher_match_set] || MatchSet.new(doc1, doc2, dependency_injection)
9
+ end
10
+
11
+ def match
12
+ match_node @document1.root
13
+ end
14
+
15
+ private
16
+
17
+ def match_node(node1)
18
+ return if match_set.match(node1)
19
+ signature1 = match_set.signature1.signature(node1) # assumes node1 is in signature1
20
+ candidates = match_set.signature2.nodes(signature1) || []
21
+ candidates.reject! { |node| match_set.match(node) }
22
+
23
+ if candidates.empty?
24
+ node1.children.each do |child|
25
+ match_node(child)
26
+ end
27
+ match = match_set.match(node1)
28
+ propagate_to_children(match.pair.first, match.pair.last) if match
29
+ else
30
+ match_candidate(node1, candidates)
31
+ end
32
+ propagate_to_parent(node1) unless match_set.match(node1)
33
+ match_set
34
+ end
35
+
36
+ def match_candidate(node1, candidates)
37
+ ancestral_matches = candidates.collect do |node2|
38
+ ancestral_match(node1, node2, depth(node2, match_set.signature2))
39
+ end
40
+ longest_trail = ancestral_matches.max { |a, b| a.length <=> b.length }
41
+ longest_trail.each do |ancestral_match|
42
+ match_set.add ancestral_match
43
+ end
44
+ end
45
+
46
+ def ancestral_match(node1, node2, max_depth)
47
+ matches = [Match.new(node1, node2, :perfect => true)]
48
+ curr1, curr2 = node1.parent, node2.parent
49
+ 1.upto(max_depth) do
50
+ break unless curr1.name == curr2.name && ! curr1.is_a?(Nokogiri::XML::Document)
51
+ matches << Match.new(curr1, curr2)
52
+ curr1, curr2 = curr1.parent, curr2.parent
53
+ end
54
+ matches
55
+ end
56
+
57
+ def propagate_to_parent(node1)
58
+ node1.children.sort_by { |child| match_set.signature1.weight(child) }.reverse.each do |child|
59
+ next unless match = match_set.match(child)
60
+ match_parent = match.pair.last.parent
61
+ if match_parent.name == node1.name
62
+ match_set.add Match.new(node1, match_parent)
63
+ return
64
+ end
65
+ end
66
+ end
67
+
68
+ def propagate_to_children(node1, node2)
69
+ # TODO: OMG! MY EYES ARE BLEEDING! REFACTOR ME AND OPTIMIZE ME!
70
+ children_set1 = collect_children_by_name(node1.children)
71
+ children_set2 = collect_children_by_name(node2.children)
72
+
73
+ children_set1.each do |name1, children1|
74
+ children_set2.each do |name2, children2|
75
+ next unless name1 == name2
76
+ if children1.length == 1 && children2.length == 1
77
+ match_set.add Match.new(children1.first, children2.first)
78
+ propagate_to_children children1.first, children2.first
79
+ else
80
+ children1.each do |child1|
81
+ children2.each do |child2|
82
+ if node1.children.index(child1) == node2.children.index(child2)
83
+ match_set.add Match.new(child1, child2)
84
+ propagate_to_children child1, child2
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+ def depth(node, sig)
94
+ depth = 1 + Math.log(sig.size) * sig.weight(node) / sig.weight
95
+ # puts "lorax: debug: #{__FILE__}:#{__LINE__}: depth #{depth} = 1 + #{Math.log(sig.size)} * #{sig.weight(node)} / #{sig.weight}"
96
+ depth.to_i
97
+ end
98
+
99
+ def collect_children_by_name(node_set)
100
+ collection = {}
101
+ node_set.each do |child|
102
+ next if match_set.match(child)
103
+ (collection[child.name] ||= []) << child
104
+ end
105
+ collection
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,22 @@
1
+ module Lorax
2
+ class Match
3
+ attr_accessor :pair
4
+
5
+ def initialize(node1, node2, options={})
6
+ @pair = [node1, node2]
7
+ @perfect = options[:perfect] ? true : false
8
+ end
9
+
10
+ def perfect?
11
+ @perfect
12
+ end
13
+
14
+ def other(node)
15
+ case node
16
+ when pair.first then pair.last
17
+ when pair.last then pair.first
18
+ else nil
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,30 @@
1
+ module Lorax
2
+ class MatchSet
3
+ attr_accessor :signature1, :signature2
4
+
5
+ def initialize(doc1, doc2, dependency_injection={})
6
+ @document1 = doc1
7
+ @document2 = doc2
8
+ @signature1 = dependency_injection[:match_set_signature1] || Lorax::Signature.new(@document1.root)
9
+ @signature2 = dependency_injection[:match_set_signature2] || Lorax::Signature.new(@document2.root)
10
+ @matches = {}
11
+ end
12
+
13
+ def match(node)
14
+ @matches[node]
15
+ end
16
+
17
+ def matches
18
+ puts "MIKE: #{__FILE__}:#{__LINE__} REMOVE ME THIS IS FOR DEBUGGING ONLY"
19
+ @matches.values.uniq.collect {|m| [m.pair.first.path, m.pair.last.path, m.perfect?]}.sort
20
+ end
21
+
22
+ def add(match)
23
+ match.pair.each { |node| @matches[node] = match }
24
+ end
25
+
26
+ def to_delta_set
27
+ DeltaSetGenerator.generate_delta_set(self)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,101 @@
1
+ require 'digest/sha1'
2
+
3
+ module Lorax
4
+ class Signature
5
+ SEP = "\0"
6
+
7
+ def initialize(node=nil)
8
+ @signatures = {} # node => signature
9
+ @monograms = {} # node => monogram (signature not including children)
10
+ @nodes = {} # signature => [node, ...]
11
+ @weights = {} # node => weight
12
+ @size = 0
13
+ @node = node
14
+ signature(node) if node
15
+ end
16
+
17
+ def root
18
+ @node
19
+ end
20
+
21
+ def nodes(sig=nil)
22
+ sig ? @nodes[sig] : @node
23
+ end
24
+
25
+ def size
26
+ @size
27
+ end
28
+
29
+ def signature(node=@node)
30
+ return @signatures[node] if @signatures.key?(node)
31
+ raise ArgumentError, "signature expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)
32
+
33
+ if node.text? || node.cdata? || node.comment?
34
+ monogram = signature = hashify(node.content)
35
+ elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
36
+ monogram = signature = hashify(node.to_html)
37
+ elsif node.element?
38
+ children_sig = hashify(node.children .collect { |child| signature(child) })
39
+ attr_sig = hashify(node.attributes.sort.collect { |k,v| [k, v.value] }.flatten)
40
+ monogram = hashify(node.name, attr_sig)
41
+ signature = hashify(node.name, attr_sig, children_sig)
42
+ else
43
+ raise ArgumentError, "signature expects an element, text, cdata or comment node, but received #{node.class}"
44
+ end
45
+
46
+ @size += 1
47
+ weight(node)
48
+
49
+ (@nodes[signature] ||= []) << node
50
+ @monograms[node] = monogram
51
+ @signatures[node] = signature
52
+ end
53
+
54
+ def weight(node=@node)
55
+ return @weights[node] if @weights.key?(node)
56
+ raise ArgumentError, "weight expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)
57
+
58
+ calculated_weight = \
59
+ if node.text? || node.cdata? || node.comment?
60
+ 1 + Math.log(node.content.length)
61
+ elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
62
+ 1
63
+ elsif node.element?
64
+ node.children.inject(1) { |sum, child| sum += weight(child) }
65
+ else
66
+ raise ArgumentError, "weight expects an element, text, cdata or comment node, but received #{node.class}"
67
+ end
68
+
69
+ @weights[node] = calculated_weight
70
+ end
71
+
72
+ def monogram(node=@node)
73
+ return @monograms[node] if @monograms.key?(node)
74
+ signature(node)
75
+ @monograms[node]
76
+ end
77
+
78
+ def set_signature(node, value) # :nodoc: for testing
79
+ (@nodes[value] ||= []) << node
80
+ @signatures[node] = value
81
+ end
82
+
83
+ def set_weight(node, value) # :nodoc: for testing
84
+ @weights[node] = value
85
+ end
86
+
87
+ private
88
+
89
+ def hashify(*args)
90
+ if args.length == 1
91
+ if args.first.is_a?(Array)
92
+ Digest::SHA1.hexdigest args.first.join(SEP)
93
+ else
94
+ Digest::SHA1.hexdigest args.first
95
+ end
96
+ else
97
+ Digest::SHA1.hexdigest args.join(SEP)
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,400 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Lorax::FastMatcher do
4
+ describe ".new" do
5
+ context "normal usage" do
6
+ it "takes two arguments" do
7
+ proc { Lorax::FastMatcher.new(xml{root}) }.should raise_error(ArgumentError)
8
+ proc { Lorax::FastMatcher.new(xml{root}, xml{root}) }.should_not raise_error(ArgumentError)
9
+ end
10
+
11
+ it "builds a MatchSet for the documents" do
12
+ doc1 = xml { root1 }
13
+ doc2 = xml { root2 }
14
+ mock.proxy(Lorax::MatchSet).new(doc1, doc2, anything)
15
+ Lorax::FastMatcher.new(doc1, doc2)
16
+ end
17
+ end
18
+
19
+ context "dependency injection" do
20
+ it "takes an optional third argument for dependency injection" do
21
+ proc { Lorax::FastMatcher.new(xml{root}, xml{root}, {:foo => :bar}) }.should_not raise_error(ArgumentError)
22
+ end
23
+
24
+ it "will use the value of ':matcher_match_set' for @match_set" do
25
+ matcher = Lorax::FastMatcher.new(xml{root}, xml{root}, {:matcher_match_set => :foo})
26
+ matcher.match_set.should == :foo
27
+ end
28
+ end
29
+ end
30
+
31
+ describe "basic node matching" do
32
+ context "simple matches" do
33
+ before do
34
+ @doc1 = xml { root1 {
35
+ a1
36
+ b1
37
+ } }
38
+ @doc2 = xml { root2 {
39
+ a1
40
+ b2
41
+ } }
42
+ @signature1 = Lorax::Signature.new(@doc1.root)
43
+ @signature1.set_signature(@doc1.at_css("root1"), "root1")
44
+ @signature1.set_signature(@doc1.at_css("a1"), "a1")
45
+ @signature1.set_signature(@doc1.at_css("b1"), "b1")
46
+ @signature2 = Lorax::Signature.new(@doc2.root)
47
+ @signature2.set_signature(@doc2.at_css("root2"), "root2")
48
+ @signature2.set_signature(@doc2.at_css("a1"), "a1")
49
+ @signature2.set_signature(@doc2.at_css("b2"), "b2")
50
+ end
51
+
52
+ it "matches identical nodes" do
53
+ match_set = Lorax::FastMatcher.new(@doc1, @doc2,
54
+ :match_set_signature1 => @signature1,
55
+ :match_set_signature2 => @signature2).match
56
+ assert_perfect_match_exists match_set, @doc1.at_css("a1"), @doc2.at_css("a1")
57
+ end
58
+
59
+ it "does not match different nodes" do
60
+ match_set = Lorax::FastMatcher.new(@doc1, @doc2,
61
+ :match_set_signature1 => @signature1,
62
+ :match_set_signature2 => @signature2).match
63
+ assert_no_match_exists match_set, @doc1.at_css("b1"), @doc2.at_css("b2")
64
+ end
65
+ end
66
+
67
+ context "sibling matches" do
68
+ it "matches all identical siblings" do
69
+ doc1 = xml { root {
70
+ a1_1 ; a1_3 ; a1_5
71
+ } }
72
+ doc2 = xml { root {
73
+ a2_1 ; a2_2 ; a2_3 ; a2_4 ; a2_5
74
+ } }
75
+ signature1 = Lorax::Signature.new(doc1.root)
76
+ signature1.set_signature(doc1.at_css("a1_1"), "a1")
77
+ signature1.set_signature(doc1.at_css("a1_3"), "a3")
78
+ signature1.set_signature(doc1.at_css("a1_5"), "a5")
79
+
80
+ signature2 = Lorax::Signature.new(doc2.root)
81
+ signature2.set_signature(doc2.at_css("a2_1"), "a1")
82
+ signature2.set_signature(doc2.at_css("a2_3"), "a3")
83
+ signature2.set_signature(doc2.at_css("a2_5"), "a5")
84
+
85
+ match_set = Lorax::FastMatcher.new(doc1, doc2,
86
+ :match_set_signature1 => signature1, :match_set_signature2 => signature2).match
87
+ assert_perfect_match_exists match_set, doc1.at_css("a1_1"), doc2.at_css("a2_1")
88
+ assert_perfect_match_exists match_set, doc1.at_css("a1_3"), doc2.at_css("a2_3")
89
+ assert_perfect_match_exists match_set, doc1.at_css("a1_5"), doc2.at_css("a2_5")
90
+ end
91
+ end
92
+
93
+ context "matching children of an unmatched node" do
94
+ it "matches those children" do
95
+ doc1 = xml { root {
96
+ a1 {
97
+ b1 ; b2
98
+ }
99
+ } }
100
+ doc2 = xml { root {
101
+ a2 {
102
+ b1 ; b2
103
+ }
104
+ } }
105
+ signature1 = Lorax::Signature.new(doc1.root)
106
+ signature1.set_signature(doc1.at_css("a1"), "a1")
107
+ signature1.set_signature(doc1.at_css("b1"), "b1")
108
+ signature1.set_signature(doc1.at_css("b2"), "b2")
109
+
110
+ signature2 = Lorax::Signature.new(doc2.root)
111
+ signature1.set_signature(doc2.at_css("a2"), "a2")
112
+ signature2.set_signature(doc2.at_css("b1"), "b1")
113
+ signature2.set_signature(doc2.at_css("b2"), "b2")
114
+
115
+ match_set = Lorax::FastMatcher.new(doc1, doc2,
116
+ :match_set_signature1 => signature1, :match_set_signature2 => signature2).match
117
+ assert_perfect_match_exists match_set, doc1.at_css("b1"), doc2.at_css("b1")
118
+ assert_perfect_match_exists match_set, doc1.at_css("b2"), doc2.at_css("b2")
119
+ end
120
+ end
121
+
122
+ context "nested matches" do
123
+ before do
124
+ @doc1 = xml { root1 { a1 { b1 } } }
125
+ @doc2 = xml { root2 { a1 { b1 } } }
126
+ @signature1 = Lorax::Signature.new(@doc1.root)
127
+ @signature1.set_signature(@doc1.at_css("a1"), "a1")
128
+ @signature1.set_signature(@doc1.at_css("b1"), "b1")
129
+ @signature2 = Lorax::Signature.new(@doc2.root)
130
+ @signature2.set_signature(@doc2.at_css("a1"), "a1")
131
+ @signature2.set_signature(@doc2.at_css("b1"), "b2")
132
+ end
133
+
134
+ it "matches the root nodes of the largest identical subtree" do
135
+ match_set = Lorax::FastMatcher.new(@doc1, @doc2,
136
+ :match_set_signature1 => @signature1, :match_set_signature2 => @signature2).match
137
+ assert_perfect_match_exists match_set, @doc1.at_css("a1"), @doc2.at_css("a1")
138
+ end
139
+
140
+ it "does not match children of identical match nodes" do
141
+ match_set = Lorax::FastMatcher.new(@doc1, @doc2,
142
+ :match_set_signature1 => @signature1, :match_set_signature2 => @signature2).match
143
+ assert_no_match_exists match_set, @doc1.at_css("b1"), @doc2.at_css("b1")
144
+ end
145
+ end
146
+ end
147
+
148
+ describe "forced parent matching" do
149
+ before do
150
+ stub.instance_of(Lorax::FastMatcher).propagate_to_parent # we're not testing propagation to parent
151
+ end
152
+
153
+ it "forces a match when parent names are the same but attributes are different" do
154
+ doc1 = xml { root { a1(:foo => "bar") { b1 } } }
155
+ doc2 = xml { root { a1(:bazz => "quux") { b1 } } }
156
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
157
+ assert_perfect_match_exists match_set, doc1.at_css("b1"), doc2.at_css("b1")
158
+ assert_forced_match_exists match_set, doc1.at_css("a1"), doc2.at_css("a1")
159
+ end
160
+
161
+ it "forces a match when parent names and attributes are the same but siblings are different" do
162
+ doc1 = xml { root { a1(:foo => "bar") { b1 ; b2 } } }
163
+ doc2 = xml { root { a1(:foo => "bar") { b1 ; b3 } } }
164
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
165
+ assert_perfect_match_exists match_set, doc1.at_css("b1"), doc2.at_css("b1")
166
+ assert_forced_match_exists match_set, doc1.at_css("a1"), doc2.at_css("a1")
167
+ end
168
+
169
+ describe "subsequent forced child matching" do
170
+ it "force matches a uniquely-named sibling" do
171
+ doc1 = xml { root { a1 {
172
+ b2 "goodbye"
173
+ b1 "hello"
174
+ b3
175
+ b4
176
+ } } }
177
+ doc2 = xml { root { a1 {
178
+ b2 "good boy"
179
+ b1 "hello"
180
+ b3 "something"
181
+ b4 { c1 }
182
+ } } }
183
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
184
+ assert_perfect_match_exists match_set, doc1.at_css("b1"), doc2.at_css("b1")
185
+ assert_forced_match_exists match_set, doc1.at_css("a1"), doc2.at_css("a1")
186
+ assert_forced_match_exists match_set, doc1.at_css("b2"), doc2.at_css("b2")
187
+ assert_forced_match_exists match_set, doc1.at_css("b3"), doc2.at_css("b3")
188
+ assert_forced_match_exists match_set, doc1.at_css("b4"), doc2.at_css("b4")
189
+ end
190
+
191
+ it "force matches recursively" do
192
+ doc1 = xml { root { a1 ; a2 { b2 "hello" } } }
193
+ doc2 = xml { root { a1 ; a2 { b2 "goodbye" } } }
194
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
195
+ assert_perfect_match_exists match_set, doc1.at_css("a1"), doc2.at_css("a1")
196
+ assert_forced_match_exists match_set, doc1.at_css("a2"), doc2.at_css("a2")
197
+ assert_forced_match_exists match_set, doc1.at_css("b2"), doc2.at_css("b2")
198
+ assert_forced_match_exists match_set, doc1.at_xpath("//b2/text()"), doc2.at_xpath("//b2/text()")
199
+ end
200
+
201
+ it "should match uniquely-named unmatched children" do
202
+ doc1 = xml { root {
203
+ a1 "hello"
204
+ a2 "goodbye"
205
+ a3 "natch"
206
+ } }
207
+ doc2 = xml { root {
208
+ a1 "hello"
209
+ a3 "not"
210
+ } }
211
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
212
+ assert_perfect_match_exists match_set, doc1.at_css("a1"), doc2.at_css("a1")
213
+ assert_forced_match_exists match_set, doc1.at_css("a3"), doc2.at_css("a3")
214
+ end
215
+
216
+ it "should match same-named children in the same position, even if they are not uniquely named" do
217
+ doc1 = xml { root {
218
+ a1 {
219
+ text "hello"
220
+ b1 "foo"
221
+ text "goodbye"
222
+ }
223
+ } }
224
+ doc2 = xml { root {
225
+ a1 {
226
+ text "bonjour"
227
+ b1 "foo"
228
+ text "au revoir"
229
+ }
230
+ } }
231
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
232
+ assert_forced_match_exists match_set, doc1.at_xpath("/root/a1/text()[1]"), doc2.at_xpath("/root/a1/text()[1]")
233
+ assert_forced_match_exists match_set, doc1.at_xpath("/root/a1/text()[2]"), doc2.at_xpath("/root/a1/text()[2]")
234
+ end
235
+
236
+ it "large subtree matches force more parent matches than smaller subtree matches" do
237
+ small_doc1 = xml { root { a1 { b1 { c1 { d1 { e1 {
238
+ f1
239
+ f2
240
+ } } } } } } }
241
+ small_doc2 = xml { root { a1 { b1 { c1 { d1 { e1 {
242
+ f1
243
+ f3
244
+ } } } } } } }
245
+ large_doc1 = xml { root { a1 { b1 { c1 { d1 { e1 {
246
+ f1
247
+ f2
248
+ } } } } } } }
249
+ large_doc2 = xml { root { a1 { b1 { c1 { d1 { e1 {
250
+ f1
251
+ f3
252
+ } } } } } } }
253
+
254
+ small_signature1 = Lorax::Signature.new(small_doc1.root)
255
+ small_signature1.set_weight(small_doc1.at_css("f1"), 1)
256
+ small_signature2 = Lorax::Signature.new(small_doc2.root)
257
+ small_signature2.set_weight(small_doc2.at_css("f1"), 1)
258
+ large_signature1 = Lorax::Signature.new(large_doc1.root)
259
+ large_signature1.set_weight(large_doc1.at_css("f1"), 10)
260
+ large_signature2 = Lorax::Signature.new(large_doc2.root)
261
+ large_signature2.set_weight(large_doc2.at_css("f1"), 10)
262
+
263
+ small_match_set = Lorax::FastMatcher.new(small_doc1, small_doc2,
264
+ :match_set_signature1 => small_signature1, :match_set_signature2 => small_signature2).match
265
+ large_match_set = Lorax::FastMatcher.new(large_doc1, large_doc2,
266
+ :match_set_signature1 => large_signature1, :match_set_signature2 => large_signature2).match
267
+
268
+ assert_forced_match_exists small_match_set, small_doc1.at_css("e1"), small_doc2.at_css("e1")
269
+ assert_no_match_exists small_match_set, small_doc1.at_css("d1"), small_doc2.at_css("d1")
270
+
271
+ assert_forced_match_exists large_match_set, large_doc1.at_css("e1"), large_doc2.at_css("e1")
272
+ assert_forced_match_exists large_match_set, large_doc1.at_css("d1"), large_doc2.at_css("d1")
273
+ assert_forced_match_exists large_match_set, large_doc1.at_css("c1"), large_doc2.at_css("c1")
274
+ assert_no_match_exists large_match_set, large_doc1.at_css("b1"), large_doc2.at_css("b1")
275
+ end
276
+ end
277
+ end
278
+
279
+ describe "propagating matches to unmatched parents based on children's matches' parents" do
280
+ context "when there is only one child" do
281
+ it "should match parents all the way up the tree" do
282
+ doc1 = xml { root { a1 { b1 { c1 { d1 { e1 {
283
+ f1 "hello"
284
+ f2
285
+ } } } } } } }
286
+ doc2 = xml { root { a1 { b1 { c1 { d1 { e1 {
287
+ f1 "hello"
288
+ f3
289
+ } } } } } } }
290
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
291
+ assert_perfect_match_exists match_set, doc1.at_css("f1"), doc2.at_css("f1")
292
+ %w[e1 d1 c1 b1 a1 root].each do |node_name|
293
+ assert_forced_match_exists match_set, doc1.at_css(node_name), doc2.at_css(node_name)
294
+ end
295
+ end
296
+ end
297
+
298
+ context "there are many possible children" do
299
+ it "should match via children with largest weight" do
300
+ doc1 = xml { root {
301
+ a1 { b1 ; b2 }
302
+ } }
303
+ doc2 = xml { root {
304
+ a1 { b1 ; b3 }
305
+ a1 { b2 ; b4 }
306
+ } }
307
+ signature1 = Lorax::Signature.new(doc1.root)
308
+ signature2 = Lorax::Signature.new(doc2.root)
309
+ signature1.set_weight(doc1.at_css("b1"), 10)
310
+ signature1.set_weight(doc1.at_css("b2"), 100)
311
+ signature2.set_weight(doc2.at_css("b1"), 10)
312
+ signature2.set_weight(doc2.at_css("b2"), 100)
313
+
314
+ match_set = Lorax::MatchSet.new(doc1, doc2, :match_set_signature1 => signature1, :match_set_signature2 => signature2)
315
+ match_set.add Lorax::Match.new(doc1.at_css("b1"), doc2.at_css("b1"))
316
+ match_set.add Lorax::Match.new(doc1.at_css("b2"), doc2.at_css("b2"))
317
+
318
+ match_set = Lorax::FastMatcher.new(doc1, doc2, :matcher_match_set => match_set).match
319
+ assert_forced_match_exists match_set, doc1.at_css("a1"), doc2.at_xpath("//a1[2]")
320
+ end
321
+ end
322
+ end
323
+
324
+ describe "choosing the best among multiple possible matches" do
325
+ context "no match's parent is same-named" do
326
+ it "we don't care which node we match, just pick one" do
327
+ doc1 = xml { root {
328
+ a1 { b1 }
329
+ } }
330
+ doc2 = xml { root {
331
+ a2 { b1 }
332
+ a3 { b1 }
333
+ } }
334
+ signature1 = Lorax::Signature.new(doc1.root)
335
+ signature2 = Lorax::Signature.new(doc2.root)
336
+ signature1.set_signature(doc1.at_xpath("//b1"), "b1")
337
+ signature2.set_signature(doc2.at_xpath("//a2/b1"), "b1")
338
+ signature2.set_signature(doc2.at_xpath("//a3/b1"), "b1")
339
+ match_set = Lorax::FastMatcher.new(doc1, doc2,
340
+ :match_set_signature1 => signature1, :match_set_signature2 => signature2).match
341
+ match_set.match(doc1.at_css("b1")).other(doc1.at_css("b1")).name.should == "b1"
342
+ end
343
+ end
344
+
345
+ context "one match's parent is same-named" do
346
+ it "matches the node with the same-named parent" do
347
+ doc1 = xml { root {
348
+ a2 { b1 ; b2 }
349
+ } }
350
+ doc2 = xml { root {
351
+ a1 { b1 }
352
+ a2 { b1 }
353
+ a3 { b1 }
354
+ } }
355
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
356
+ assert_forced_match_exists match_set, doc1.at_css("a2"), doc2.at_css("a2")
357
+ end
358
+ end
359
+
360
+ context "multiple identical nodes exist in both documents" do
361
+ it "should create one-to-one match relationships" do
362
+ doc1 = xml { root1 {
363
+ a1 ; a1 ; a1
364
+ } }
365
+ doc2 = xml { root2 {
366
+ a1 ; a1
367
+ } }
368
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
369
+ [doc1, doc2].each do |doc|
370
+ others = doc.css("a1").collect do |node|
371
+ m = match_set.match(node)
372
+ m ? m.pair.last : nil
373
+ end
374
+ others.uniq.length.should == others.length
375
+ end
376
+ end
377
+ end
378
+
379
+ context "multiple matches' parents are same-named" do
380
+ it "matches the node with the same-named grandparent" do
381
+ doc1 = xml { root {
382
+ wrap2 {
383
+ a1 { b1 { 10.times { c1 "hello there" } } ; b2 }
384
+ } } }
385
+ doc2 = xml { root {
386
+ wrap1 {
387
+ a1 { b1 { 10.times { c1 "hello there" } } }
388
+ }
389
+ wrap2 {
390
+ a1 { b1 { 10.times { c1 "hello there" } } }
391
+ }
392
+ wrap3 {
393
+ a1 { b1 { 10.times { c1 "hello there" } } }
394
+ } } }
395
+ match_set = Lorax::FastMatcher.new(doc1, doc2).match
396
+ assert_forced_match_exists match_set, doc1.at_css("wrap2"), doc2.at_css("wrap2")
397
+ end
398
+ end
399
+ end
400
+ end