lorax 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Lorax::DeltaSetGenerator do
4
+ describe "#generate_delta_set" do
5
+ context "InsertDeltas" do
6
+ it "should be generated for an atomic node without a match" do
7
+ doc1 = xml { root1 }
8
+ doc2 = xml { root2 }
9
+ match_set = Lorax::MatchSet.new doc1, doc2
10
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
11
+ delta_set.deltas.select { |d| d.is_a?(Lorax::InsertDelta) }.length.should == 1
12
+ end
13
+
14
+ it "should be generated for a subtree without a match" do
15
+ doc1 = xml { root1 }
16
+ doc2 = xml { root2 { a1 ; a2 "hello" } }
17
+ match_set = Lorax::MatchSet.new doc1, doc2
18
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
19
+ delta_set.deltas.select { |d| d.is_a?(Lorax::InsertDelta) }.length.should == 1
20
+ end
21
+
22
+ it "should not be generated for children of a perfect match" do
23
+ doc1 = xml { root { a1 { b1 "hello" } } }
24
+ doc2 = xml { root { a1 { b1 "hello" } ; a2 } }
25
+ match_set = Lorax::MatchSet.new doc1, doc2
26
+ match_set.add Lorax::Match.new(doc1.at_css("root"), doc2.at_css("root"))
27
+ match_set.add Lorax::Match.new(doc1.at_css("a1"), doc2.at_css("a1"), :perfect => true)
28
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
29
+ delta_set.deltas.select { |d| d.is_a?(Lorax::InsertDelta) }.length.should == 1 # a2
30
+ end
31
+
32
+ it "should be generated for siblings without a match" do
33
+ doc1 = xml { root {
34
+ a1 "hello"
35
+ a3 "goodbye"
36
+ a5 "again"
37
+ } }
38
+ doc2 = xml { root {
39
+ a1 "hello"
40
+ a2 "middleman"
41
+ a3 "goodbye"
42
+ a4 "good boy"
43
+ a5 "again"
44
+ } }
45
+ match_set = Lorax::MatchSet.new doc1, doc2
46
+ match_set.add Lorax::Match.new(doc1.at_css("a1"), doc2.at_css("a1"), :perfect => true)
47
+ match_set.add Lorax::Match.new(doc1.at_css("a3"), doc2.at_css("a3"), :perfect => true)
48
+ match_set.add Lorax::Match.new(doc1.at_css("a5"), doc2.at_css("a5"), :perfect => true)
49
+ match_set.add Lorax::Match.new(doc1.at_css("root"), doc2.at_css("root"))
50
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
51
+ delta_set.deltas.select { |d| d.is_a?(Lorax::InsertDelta) }.length.should == 2
52
+ end
53
+ end
54
+
55
+ context "ModifyDeltas" do
56
+ it "should be generated for nodes that are imperfectly matched" do
57
+ doc1 = xml { root(:foo => :bar) }
58
+ doc2 = xml { root(:foo => :quux) }
59
+ match_set = Lorax::MatchSet.new doc1, doc2
60
+ match_set.add Lorax::Match.new doc1.at_css("root"), doc2.at_css("root")
61
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
62
+ delta_set.deltas.select { |d| d.is_a?(Lorax::ModifyDelta) }.length.should == 1
63
+ end
64
+
65
+ context "imperfect self-same match with children" do
66
+ it "should handle children as expected" do
67
+ doc1 = xml { root {
68
+ a1
69
+ a2
70
+ a4(:foo => :bar)
71
+ } }
72
+ doc2 = xml { root {
73
+ a2
74
+ a3
75
+ a4(:foo => :quux)
76
+ } }
77
+ match_set = Lorax::MatchSet.new doc1, doc2
78
+ match_set.add Lorax::Match.new doc1.root, doc2.root
79
+ match_set.add Lorax::Match.new doc1.at_css("a2"), doc2.at_css("a2"), :perfect => true
80
+ match_set.add Lorax::Match.new doc1.at_css("a4"), doc2.at_css("a4")
81
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
82
+ delta_set.deltas.select { |d| d.is_a?(Lorax::InsertDelta) }.length.should == 1 # a3
83
+ delta_set.deltas.select { |d| d.is_a?(Lorax::ModifyDelta) }.length.should == 1 # a4
84
+ delta_set.deltas.select { |d| d.is_a?(Lorax::DeleteDelta) }.length.should == 1 # a1
85
+ end
86
+ end
87
+
88
+ it "should not be generated for nodes that are imperfectly matched but are self-same" do
89
+ doc1 = xml { root(:foo => :bar) { a1 } }
90
+ doc2 = xml { root(:foo => :bar) { a2 } }
91
+ match_set = Lorax::MatchSet.new doc1, doc2
92
+ match_set.add Lorax::Match.new doc1.at_css("root"), doc2.at_css("root")
93
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
94
+ delta_set.deltas.select { |d| d.is_a?(Lorax::ModifyDelta) }.length.should == 0
95
+ end
96
+
97
+ it "should not be generated for nodes that are perfectly matched" do
98
+ doc1 = xml { root }
99
+ doc2 = xml { root }
100
+ match_set = Lorax::MatchSet.new doc1, doc2
101
+ match_set.add Lorax::Match.new doc1.at_css("root"), doc2.at_css("root"), :perfect => true
102
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
103
+ delta_set.deltas.select { |d| d.is_a?(Lorax::ModifyDelta) }.length.should == 0
104
+ end
105
+ end
106
+
107
+ context "DeleteDeltas" do
108
+ it "should be generated for an atomic node without a match" do
109
+ doc1 = xml { root1 }
110
+ doc2 = xml { root2 }
111
+ match_set = Lorax::MatchSet.new doc1, doc2
112
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
113
+ delta_set.deltas.select { |d| d.is_a?(Lorax::DeleteDelta) }.length.should == 1
114
+ end
115
+
116
+ it "should be generated for a subtree without a match" do
117
+ doc1 = xml { root1 { a1 ; a2 "hello" } }
118
+ doc2 = xml { root2 }
119
+ match_set = Lorax::MatchSet.new doc1, doc2
120
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
121
+ delta_set.deltas.select { |d| d.is_a?(Lorax::DeleteDelta) }.length.should == 1
122
+ end
123
+
124
+ it "should not be generated for children of a deleted node" do
125
+ doc1 = xml { root { a1 { b1 "hello" } ; a2 } }
126
+ doc2 = xml { root { a2 } }
127
+ match_set = Lorax::MatchSet.new doc1, doc2
128
+ match_set.add Lorax::Match.new(doc1.at_css("root"), doc2.at_css("root"))
129
+ match_set.add Lorax::Match.new(doc1.at_css("a2"), doc2.at_css("a2"), :perfect => true)
130
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
131
+ delta_set.deltas.select { |d| d.is_a?(Lorax::DeleteDelta) }.length.should == 1 # a1
132
+ end
133
+
134
+ it "should be generated for siblings without a match" do
135
+ doc1 = xml { root {
136
+ a1 "hello"
137
+ a2 "middleman"
138
+ a3 "goodbye"
139
+ a4 "good boy"
140
+ a5 "again"
141
+ } }
142
+ doc2 = xml { root {
143
+ a1 "hello"
144
+ a3 "goodbye"
145
+ a5 "again"
146
+ } }
147
+ match_set = Lorax::MatchSet.new doc1, doc2
148
+ match_set.add Lorax::Match.new(doc1.at_css("a1"), doc2.at_css("a1"), :perfect => true)
149
+ match_set.add Lorax::Match.new(doc1.at_css("a3"), doc2.at_css("a3"), :perfect => true)
150
+ match_set.add Lorax::Match.new(doc1.at_css("a5"), doc2.at_css("a5"), :perfect => true)
151
+ match_set.add Lorax::Match.new(doc1.at_css("root"), doc2.at_css("root"))
152
+ delta_set = Lorax::DeltaSetGenerator.generate_delta_set(match_set)
153
+ delta_set.deltas.select { |d| d.is_a?(Lorax::DeleteDelta) }.length.should == 2
154
+ end
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,40 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Lorax::DeltaSet do
4
+ describe "#add / #deltas" do
5
+ it "appends to and returns an ordered list of deltas" do
6
+ delta_set = Lorax::DeltaSet.new
7
+ delta_set.add :foo
8
+ delta_set.add :bar
9
+ delta_set.deltas.should == [:foo, :bar]
10
+ end
11
+ end
12
+
13
+ describe "#apply" do
14
+ it "calls apply! on a duplicate document" do
15
+ delta_set = Lorax::DeltaSet.new
16
+ document = Nokogiri::XML::Document.new
17
+ mock(document).dup { :foo }
18
+ mock(delta_set).apply!(:foo)
19
+ delta_set.apply document
20
+ end
21
+ end
22
+
23
+ describe "#apply!" do
24
+ it "invokes apply! on each delta in order" do
25
+ doc = xml { root }
26
+ delta_set = Lorax::DeltaSet.new
27
+ delta1 = Lorax::InsertDelta.new(:foo, :bar, :quux)
28
+ delta2 = Lorax::InsertDelta.new(:foo, :bar, :quux)
29
+ delta_set.add delta1
30
+ delta_set.add delta2
31
+
32
+ order_of_invocation = []
33
+ mock(delta1).apply!(doc) { order_of_invocation << :delta1 }
34
+ mock(delta2).apply!(doc) { order_of_invocation << :delta2 }
35
+ delta_set.apply!(doc)
36
+
37
+ order_of_invocation.should == [:delta1, :delta2]
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,9 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Lorax do
4
+ describe ".diff" do
5
+ it "should accept an IO"
6
+ it "should accept a string"
7
+ it "should accept a Nokogiri::XML::Document"
8
+ end
9
+ end
@@ -0,0 +1,93 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Lorax::MatchSet do
4
+ describe "#new" do
5
+ context "normal usage" do
6
+ it "takes two arguments" do
7
+ proc { Lorax::MatchSet.new(xml{root}) }.should raise_error(ArgumentError)
8
+ proc { Lorax::MatchSet.new(xml{root}, xml{root}) }.should_not raise_error(ArgumentError)
9
+ end
10
+
11
+ it "builds a Signature for each document root" do
12
+ doc1 = xml { root1 }
13
+ doc2 = xml { root2 }
14
+ mock.proxy(Lorax::Signature).new(doc1.root)
15
+ mock.proxy(Lorax::Signature).new(doc2.root)
16
+ Lorax::MatchSet.new(doc1, doc2)
17
+ end
18
+ end
19
+
20
+ context "with dependency injection" do
21
+ it "takes an optional third argument for dependency injection" do
22
+ proc { Lorax::MatchSet.new(xml{root}, xml{root}, {:foo => :bar}) }.should_not raise_error(ArgumentError)
23
+ end
24
+
25
+ it "will use the value of ':match_set_signature1' for @signature1" do
26
+ match_set = Lorax::MatchSet.new(xml{root}, xml{root}, {:match_set_signature1 => :foo})
27
+ match_set.signature1.should == :foo
28
+ end
29
+
30
+ it "will use the value of ':match_set_signature2' for @signature2" do
31
+ match_set = Lorax::MatchSet.new(xml{root}, xml{root}, {:match_set_signature2 => :foo})
32
+ match_set.signature2.should == :foo
33
+ end
34
+ end
35
+ end
36
+
37
+ describe "#signature1" do
38
+ it "returns the Signature of the first document" do
39
+ doc1 = xml { root1 }
40
+ doc2 = xml { root2 }
41
+ match_set = Lorax::MatchSet.new(doc1, doc2)
42
+ match_set.signature1.should_not be_nil
43
+ match_set.signature1.root.should == doc1.root
44
+ end
45
+ end
46
+
47
+ describe "#signature2" do
48
+ it "returns the Signature of the second document" do
49
+ doc1 = xml { root1 }
50
+ doc2 = xml { root2 }
51
+ match_set = Lorax::MatchSet.new(doc1, doc2)
52
+ match_set.signature2.should_not be_nil
53
+ match_set.signature2.root.should == doc2.root
54
+ end
55
+ end
56
+
57
+ describe "#match and #add" do
58
+ before do
59
+ @doc1 = xml { root1 { a1 } }
60
+ @doc2 = xml { root2 { a1 } }
61
+ @match_set = Lorax::MatchSet.new(@doc1, @doc2)
62
+ end
63
+
64
+ context "when there is a match for the node" do
65
+ before do
66
+ @match = Lorax::Match.new(@doc1.at_css("a1"), @doc2.at_css("a1"))
67
+ @match_set.add @match
68
+ end
69
+
70
+ it "returns the match" do
71
+ @match_set.match(@doc1.at_css("a1")).should == @match
72
+ @match_set.match(@doc2.at_css("a1")).should == @match
73
+ end
74
+ end
75
+
76
+ context "when there is no match" do
77
+ it "returns nil" do
78
+ @match_set.match(@doc1.at_css("a1")).should be_nil
79
+ @match_set.match(@doc2.at_css("a1")).should be_nil
80
+ end
81
+ end
82
+ end
83
+
84
+ describe "#to_delta_set" do
85
+ it "invokes DeltaSetGenerator.generate_delta_set on itself" do
86
+ doc1 = xml { root1 }
87
+ doc2 = xml { root2 }
88
+ match_set = Lorax::MatchSet.new(doc1, doc2)
89
+ mock(Lorax::DeltaSetGenerator).generate_delta_set(match_set)
90
+ match_set.to_delta_set
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,473 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Lorax::Signature do
4
+ def assert_node_signature_equal(node1, node2)
5
+ Lorax::Signature.new(node1).signature.should == Lorax::Signature.new(node2).signature
6
+ end
7
+
8
+ def assert_node_signature_not_equal(node1, node2)
9
+ Lorax::Signature.new(node1).signature.should_not == Lorax::Signature.new(node2).signature
10
+ end
11
+
12
+ describe ".new" do
13
+ it "accepts nil" do
14
+ proc { Lorax::Signature.new }.should_not raise_error
15
+ end
16
+
17
+ it "does not call signature if param is nil" do
18
+ mock.instance_of(Lorax::Signature).signature(42).never
19
+ Lorax::Signature.new(nil)
20
+ end
21
+
22
+ it "calls signature if a param is non-nil" do
23
+ mock.instance_of(Lorax::Signature).signature(42).once
24
+ Lorax::Signature.new(42)
25
+ end
26
+ end
27
+
28
+ describe "#root" do
29
+ it "returns the subtree root" do
30
+ doc = xml { root { a1 "hello" } }
31
+ node = doc.at_css("a1")
32
+ sig = Lorax::Signature.new(node)
33
+ sig.root.should == node
34
+ end
35
+ end
36
+
37
+ describe "#nodes" do
38
+ it "returns an array of nodes matching the signature" do
39
+ doc = xml { root {
40
+ a1 "hello"
41
+ a1 "hello"
42
+ a1 "hello"
43
+ } }
44
+ nodes = doc.css("a1")
45
+ doc_sig = Lorax::Signature.new(doc.root)
46
+ node_sig = Lorax::Signature.new(nodes.first)
47
+ doc_sig.nodes(node_sig.signature).should =~ nodes.to_a
48
+ end
49
+ end
50
+
51
+ describe "#size" do
52
+ it "returns the total number of nodes in the subtree" do
53
+ doc = xml { root { a1 "hello" } }
54
+ node = doc.at_css("a1")
55
+ doc_sig = Lorax::Signature.new(doc.root)
56
+ doc_sig.size.should == 3 # root, a1, hello
57
+ end
58
+ end
59
+
60
+ describe "#set_signature" do
61
+ it "assigns values such that signature and nodes return the proper thing" do
62
+ signature = Lorax::Signature.new
63
+ signature.set_signature(:foo, "a")
64
+ signature.set_signature(:bar, "a")
65
+ signature.set_signature(:bazz, "b")
66
+ signature.signature(:foo).should == "a"
67
+ signature.signature(:bar).should == "a"
68
+ signature.signature(:bazz).should == "b"
69
+ signature.nodes("a").should =~ [:foo, :bar]
70
+ signature.nodes("b").should == [:bazz]
71
+ end
72
+ end
73
+
74
+ describe "#set_weight" do
75
+ it "assigns values such that weight returns the proper thing" do
76
+ signature = Lorax::Signature.new
77
+ signature.set_weight(:foo, 2.2)
78
+ signature.weight(:foo).should == 2.2
79
+ end
80
+ end
81
+
82
+ describe "#signature" do
83
+ context "passed no argument" do
84
+ it "returns the subtree root's signature" do
85
+ doc = xml { root { a1 "hello" } }
86
+ sig = Lorax::Signature.new(doc.root)
87
+ sig.signature.should == sig.signature(doc.root)
88
+ end
89
+ end
90
+
91
+ context "passed a node" do
92
+ it "returns the node's signature" do
93
+ doc = xml { root { a1 "hello" } }
94
+ node = doc.at_css("a1")
95
+ doc_sig = Lorax::Signature.new(doc.root)
96
+ node_sig = Lorax::Signature.new(node)
97
+ doc_sig.signature(node).should == node_sig.signature
98
+ end
99
+ end
100
+
101
+ context "passed a non-Node" do
102
+ it "raises an error" do
103
+ proc { Lorax::Signature.new.signature(42) }.should raise_error(ArgumentError, /signature expects a Node/)
104
+ end
105
+ end
106
+
107
+ context "passed a cdata Node" do
108
+ it "treats it like a leaf text node" do
109
+ doc = xml { root { cdata "hello" } }
110
+ node = doc.root.children.first
111
+ doc_sig = Lorax::Signature.new(doc.root)
112
+ node_sig = Lorax::Signature.new(node)
113
+ doc_sig.signature(node).should == node_sig.signature
114
+ end
115
+ end
116
+
117
+ context "passed a comment Node" do
118
+ it "treats it like a leaf text node" do
119
+ doc = Nokogiri::XML "<root><!-- hello --></root>"
120
+ node = doc.root.children.first
121
+ doc_sig = Lorax::Signature.new(doc.root)
122
+ node_sig = Lorax::Signature.new(node)
123
+ doc_sig.signature(node).should == node_sig.signature
124
+ end
125
+ end
126
+
127
+ context "passed an entity reference Node" do
128
+ it "treats it like a leaf text node" do
129
+ doc = Nokogiri::XML %q(<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html><span>&nbsp;</span></html>)
130
+ node = doc.at_css("span").children.first
131
+ doc_sig = Lorax::Signature.new(doc.root)
132
+ node_sig = Lorax::Signature.new(node)
133
+ doc_sig.signature(node).should == node_sig.signature
134
+ end
135
+ end
136
+
137
+ context "passed an invalid Node" do
138
+ it "raises an error" do
139
+ doc = xml { root { a1("foo" => "bar") } }
140
+ attr = doc.at_css("a1").attributes.first.last
141
+ proc { Lorax::Signature.new.signature(attr) }.should raise_error(ArgumentError, /signature expects an element/)
142
+ end
143
+ end
144
+
145
+ it "hashes each node only once" do
146
+ doc = xml { root { a1 { b1 { c1 "hello" } } } }
147
+ node = doc.at_css "c1"
148
+ mock.proxy.instance_of(Lorax::Signature).signature(anything).times(5)
149
+ Lorax::Signature.new.signature(doc.root)
150
+ end
151
+
152
+ it "caches signaturees" do
153
+ doc = xml { root { a1 { b1 { c1 "hello" } } } }
154
+ node = doc.at_css "c1"
155
+ mock.proxy.instance_of(Lorax::Signature).signature(anything).times(6)
156
+ sig = Lorax::Signature.new
157
+ sig.signature(doc.root)
158
+ sig.signature(doc.root)
159
+ end
160
+
161
+ it "calculates weights along the way" do
162
+ doc = xml { root { a1 } }
163
+ node = doc.at_css "a1"
164
+ sig = Lorax::Signature.new
165
+ mock(sig).weight(node)
166
+ sig.signature(node)
167
+ end
168
+
169
+ context "identical text nodes" do
170
+ it "have equal signatures" do
171
+ doc = xml { root {
172
+ span "hello"
173
+ span "hello"
174
+ } }
175
+ assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
176
+ end
177
+ end
178
+
179
+ context "different text nodes" do
180
+ it "have inequal signatures" do
181
+ doc = xml { root {
182
+ span "hello"
183
+ span "goodbye"
184
+ } }
185
+ assert_node_signature_not_equal(*doc.css("span").collect { |n| n.children.first })
186
+ end
187
+ end
188
+
189
+ context "elements with same name (with no attributes and no content)" do
190
+ it "have equal signatures" do
191
+ doc = xml { root { a1 ; a1 } }
192
+ assert_node_signature_equal(*doc.css("a1"))
193
+ end
194
+ end
195
+
196
+ context "elements with different names" do
197
+ it "have inequal signatures" do
198
+ doc = xml { root { a1 ; a2 } }
199
+ assert_node_signature_not_equal doc.at_css("a1"), doc.at_css("a2")
200
+ end
201
+ end
202
+
203
+ context "same elements in different docs" do
204
+ it "have equal signatures" do
205
+ doc1 = xml { root { a1 } }
206
+ doc2 = xml { root { a1 } }
207
+ assert_node_signature_equal doc1.at_css("a1"), doc2.at_css("a1")
208
+ end
209
+ end
210
+
211
+ context "elements with same name and content (with no attributes)" do
212
+ context "and content is the same" do
213
+ it "have equal signatures" do
214
+ doc = xml { root {
215
+ a1 "hello"
216
+ a1 "hello"
217
+ } }
218
+ assert_node_signature_equal(*doc.css("a1"))
219
+ end
220
+ end
221
+
222
+ context "and content is not the same" do
223
+ it "have inequal signatures" do
224
+ doc = xml { root {
225
+ a1 "hello"
226
+ a1 "goodbye"
227
+ } }
228
+ assert_node_signature_not_equal(*doc.css("a1"))
229
+ end
230
+ end
231
+ end
232
+
233
+ context "elements with same name and children (with no attributes)" do
234
+ context "and children are in the same order" do
235
+ it "have equal signatures" do
236
+ doc = xml { root {
237
+ a1 { b1 ; b2 }
238
+ a1 { b1 ; b2 }
239
+ } }
240
+ assert_node_signature_equal(*doc.css("a1"))
241
+ end
242
+ end
243
+
244
+ context "and children are not in the same order" do
245
+ it "have inequal signatures" do
246
+ doc = xml { root {
247
+ a1 { b1 ; b2 }
248
+ a1 { b2 ; b1 }
249
+ } }
250
+ assert_node_signature_not_equal(*doc.css("a1"))
251
+ end
252
+ end
253
+ end
254
+
255
+ context "elements with same name and same attributes (with no content)" do
256
+ it "have equal signatures" do
257
+ doc = xml { root {
258
+ a1("foo" => "bar", "bazz" => "quux")
259
+ a1("foo" => "bar", "bazz" => "quux")
260
+ } }
261
+ assert_node_signature_equal(*doc.css("a1"))
262
+ end
263
+ end
264
+
265
+ context "elements with same name and different attributes (with no content)" do
266
+ it "have inequal signatures" do
267
+ doc = xml { root {
268
+ a1("foo" => "bar", "bazz" => "quux")
269
+ a1("foo" => "123", "bazz" => "456")
270
+ } }
271
+ assert_node_signature_not_equal(*doc.css("a1"))
272
+ end
273
+ end
274
+
275
+ context "attributes reverse-engineered to be similar" do
276
+ it "have inequal signatures" do
277
+ doc = xml { root {
278
+ a1("foo" => "bar#{Lorax::Signature::SEP}quux")
279
+ a1("foo#{Lorax::Signature::SEP}bar" => "quux")
280
+ } }
281
+ assert_node_signature_not_equal(*doc.css("a1"))
282
+ end
283
+ end
284
+
285
+ context "HTML" do
286
+ it "should be case-insensitive" do
287
+ doc1 = Nokogiri::HTML <<-EOH
288
+ <html><body>
289
+ <DIV FOO="BAR">hello</DIV>
290
+ </body></html>
291
+ EOH
292
+ doc2 = Nokogiri::HTML <<-EOH
293
+ <html><body>
294
+ <div foo="BAR">hello</div>
295
+ </body></html>
296
+ EOH
297
+ assert_node_signature_equal(doc1.at_css("body").children.first,
298
+ doc2.at_css("body").children.first)
299
+ end
300
+ end
301
+ end
302
+
303
+ describe "#weight" do
304
+ context "passed no argument" do
305
+ it "returns the subtree root's weight" do
306
+ doc = xml { root { a1 { b1 { c1 { d1 } } } } }
307
+ node = doc.at_css("a1")
308
+ doc_sig = Lorax::Signature.new(doc.root)
309
+ doc_sig.weight.should == 5
310
+ end
311
+ end
312
+
313
+ context "passed a node" do
314
+ it "returns the node's weight" do
315
+ doc = xml { root { a1 "hello" } }
316
+ node = doc.at_css("a1")
317
+ doc_sig = Lorax::Signature.new(doc.root)
318
+ node_sig = Lorax::Signature.new(node)
319
+ doc_sig.weight(node).should == node_sig.weight
320
+ end
321
+ end
322
+
323
+ context "passed a non-Node" do
324
+ it "raises an error" do
325
+ proc { Lorax::Signature.new.weight(42) }.should raise_error(ArgumentError, /weight expects a Node/)
326
+ end
327
+ end
328
+
329
+ context "passed a cdata Node" do
330
+ it "treats it like a leaf text node" do
331
+ doc = xml { root { cdata "hello" } }
332
+ node = doc.root.children.first
333
+ doc_sig = Lorax::Signature.new(doc.root)
334
+ node_sig = Lorax::Signature.new(node)
335
+ doc_sig.weight(node).should == node_sig.weight
336
+ end
337
+ end
338
+
339
+ context "passed a comment Node" do
340
+ it "treats it like a leaf text node" do
341
+ doc = Nokogiri::XML "<root><!-- hello --></root>"
342
+ node = doc.root.children.first
343
+ doc_sig = Lorax::Signature.new(doc.root)
344
+ node_sig = Lorax::Signature.new(node)
345
+ doc_sig.weight(node).should == node_sig.weight
346
+ end
347
+ end
348
+
349
+ context "passed an entity reference Node" do
350
+ it "treats it like a leaf text node" do
351
+ doc = Nokogiri::XML %q(<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html><span>&nbsp;</span></html>)
352
+ node = doc.at_css("span").children.first
353
+ doc_sig = Lorax::Signature.new(doc.root)
354
+ node_sig = Lorax::Signature.new(node)
355
+ doc_sig.weight(node).should == node_sig.weight
356
+ end
357
+ end
358
+
359
+ context "passed an invalid Node" do
360
+ it "raises an error" do
361
+ doc = xml { root { a1("foo" => "bar") } }
362
+ attr = doc.at_css("a1").attributes.first.last
363
+ proc { Lorax::Signature.new.weight(attr) }.should raise_error(ArgumentError, /weight expects an element/)
364
+ end
365
+ end
366
+
367
+ it "weighs each node only once" do
368
+ doc = xml { root { a1 { b1 { c1 "hello" } } } }
369
+ node = doc.at_css "c1"
370
+ mock.proxy.instance_of(Lorax::Signature).weight(anything).times(5)
371
+ Lorax::Signature.new.weight(doc.root)
372
+ end
373
+
374
+ it "caches weights" do
375
+ doc = xml { root { a1 { b1 { c1 "hello" } } } }
376
+ node = doc.at_css "c1"
377
+ mock.proxy.instance_of(Lorax::Signature).weight(anything).times(6)
378
+ sig = Lorax::Signature.new
379
+ sig.weight(doc.root)
380
+ sig.weight(doc.root)
381
+ end
382
+
383
+ it "weighs empty nodes with no children as 1" do
384
+ doc = xml { root { a1 } }
385
+ sig = Lorax::Signature.new(doc.root)
386
+ sig.weight(doc.at_css("a1")).should == 1
387
+ end
388
+
389
+ it "weighs nodes with children as 1 + sum(weight(children))" do
390
+ doc = xml { root {
391
+ a1 { b1 ; b2 }
392
+ a2 { b1 ; b2 ; b3 ; b4 }
393
+ } }
394
+ sig = Lorax::Signature.new(doc.root)
395
+ sig.weight(doc.at_css("a1")).should == 3
396
+ sig.weight(doc.at_css("a2")).should == 5
397
+ sig.weight.should == 9
398
+ end
399
+
400
+ describe "text nodes" do
401
+ it "scores as 1 + log(length)" do
402
+ doc = xml { root {
403
+ a1 "x"
404
+ a2("x" * 500)
405
+ a3("x" * 50_000)
406
+ } }
407
+ sig = Lorax::Signature.new(doc.root)
408
+ sig.weight(doc.at_css("a1")).should be_close(2, 0.0005)
409
+ sig.weight(doc.at_css("a2")).should be_close(2 + Math.log(500), 0.0005)
410
+ sig.weight(doc.at_css("a3")).should be_close(2 + Math.log(50_000), 0.0005)
411
+ end
412
+ end
413
+ end
414
+
415
+ describe "#monogram" do
416
+ context "passed no argument" do
417
+ it "returns the subtree root's signature" do
418
+ doc = xml { root { a1(:foo => :bar) } }
419
+ sig = Lorax::Signature.new(doc.root)
420
+ sig.monogram.should == sig.monogram(doc.root)
421
+ end
422
+ end
423
+
424
+ context "passed a node" do
425
+ it "returns the node's signature" do
426
+ doc = xml { root { a1(:foo => :bar) } }
427
+ node = doc.at_css("a1")
428
+ doc_sig = Lorax::Signature.new(doc.root)
429
+ node_sig = Lorax::Signature.new(node)
430
+ doc_sig.monogram(node).should == node_sig.monogram
431
+ end
432
+ end
433
+
434
+ context "passed a non-Node" do
435
+ it "raises an error" do
436
+ proc { Lorax::Signature.new.monogram(42) }.should raise_error(ArgumentError, /signature expects a Node/)
437
+ end
438
+ end
439
+
440
+ context "text nodes" do
441
+ it "returns the signature as the monogram" do
442
+ doc = xml { root { text "hello" } }
443
+ node = doc.root.children.first
444
+ sig = Lorax::Signature.new(doc.root)
445
+ sig.monogram(node).should == sig.signature(node)
446
+ end
447
+ end
448
+
449
+ context "element nodes" do
450
+ it "is equal for nodes with equal names and attributes" do
451
+ doc = xml { root {
452
+ a1(:foo => :bar, :bazz => :quux) { text "hello" }
453
+ a1(:foo => :bar, :bazz => :quux) { b1 }
454
+ a1(:foo => :bar, :bazz => :quux)
455
+ } }
456
+ nodes = doc.css("a1")
457
+ sig = Lorax::Signature.new(doc.root)
458
+ sig.monogram(nodes[0]).should == sig.monogram(nodes[1])
459
+ sig.monogram(nodes[1]).should == sig.monogram(nodes[2])
460
+ end
461
+
462
+ it "is inequal for nodes with different attributes" do
463
+ doc = xml { root {
464
+ a1(:foo => :bar)
465
+ a1(:foo => :bar, :bazz => :quux)
466
+ } }
467
+ nodes = doc.css("a1")
468
+ sig = Lorax::Signature.new(doc.root)
469
+ sig.monogram(nodes[0]).should_not == sig.monogram(nodes[1])
470
+ end
471
+ end
472
+ end
473
+ end