lorax 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/README.rdoc +2 -0
- data/Rakefile +1 -0
- data/TODO +1 -6
- data/lib/lorax.rb +1 -1
- data/lib/lorax/signature.rb +25 -12
- data/spec/unit/signature_spec.rb +41 -5
- metadata +47 -44
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/README.rdoc
CHANGED
@@ -15,6 +15,8 @@ generates deltas in less than O(n * log n) time, accepting some
|
|
15
15
|
tradeoffs in the size of the delta set. You can find his paper at
|
16
16
|
http://gregory.cobena.free.fr/www/Publications/thesis.html.
|
17
17
|
|
18
|
+
"I am the Lorax, I speak for the trees."
|
19
|
+
|
18
20
|
== Features / Problems
|
19
21
|
|
20
22
|
* Detect differences between documents, or tell whether two documents are the same.
|
data/Rakefile
CHANGED
data/TODO
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
# -*-org-*-
|
2
|
-
|
2
|
+
Lorax TODO
|
3
3
|
|
4
|
-
* gem
|
5
|
-
*** gemspec
|
6
|
-
*** license
|
7
|
-
*** gemcutter
|
8
4
|
* docs
|
9
5
|
*** rdocs
|
10
|
-
*** readme
|
11
6
|
*** class description notes
|
12
7
|
- Signature: calculate and persist signatures and weights for nodes in a single document
|
13
8
|
- Match: represents a match between two nodes
|
data/lib/lorax.rb
CHANGED
data/lib/lorax/signature.rb
CHANGED
@@ -19,7 +19,7 @@ module Lorax
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def nodes(sig=nil)
|
22
|
-
sig ? @nodes[sig] : @node
|
22
|
+
sig ? @nodes[sig] : [@node]
|
23
23
|
end
|
24
24
|
|
25
25
|
def size
|
@@ -30,12 +30,19 @@ module Lorax
|
|
30
30
|
return @signatures[node] if @signatures.key?(node)
|
31
31
|
raise ArgumentError, "signature expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)
|
32
32
|
|
33
|
-
if node.text?
|
33
|
+
if node.text?
|
34
|
+
content = node.content.strip
|
35
|
+
if content.empty?
|
36
|
+
return nil
|
37
|
+
else
|
38
|
+
monogram = signature = hashify(content)
|
39
|
+
end
|
40
|
+
elsif node.cdata? || node.comment?
|
34
41
|
monogram = signature = hashify(node.content)
|
35
42
|
elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
|
36
43
|
monogram = signature = hashify(node.to_html)
|
37
44
|
elsif node.element?
|
38
|
-
children_sig = hashify(node.children .collect { |child| signature(child) })
|
45
|
+
children_sig = hashify(node.children .collect { |child| signature(child) }.compact)
|
39
46
|
attr_sig = hashify(node.attributes.sort.collect { |k,v| [k, v.value] }.flatten)
|
40
47
|
monogram = hashify(node.name, attr_sig)
|
41
48
|
signature = hashify(node.name, attr_sig, children_sig)
|
@@ -55,16 +62,22 @@ module Lorax
|
|
55
62
|
return @weights[node] if @weights.key?(node)
|
56
63
|
raise ArgumentError, "weight expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)
|
57
64
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
node.children.inject(1) { |sum, child| sum += weight(child) }
|
65
|
-
else
|
66
|
-
raise ArgumentError, "weight expects an element, text, cdata or comment node, but received #{node.class}"
|
65
|
+
if node.text?
|
66
|
+
content = node.content.strip
|
67
|
+
if content.empty?
|
68
|
+
calculated_weight = 0
|
69
|
+
else
|
70
|
+
calculated_weight = 1 + Math.log(content.length)
|
67
71
|
end
|
72
|
+
elsif node.cdata? || node.comment?
|
73
|
+
calculated_weight = 1 + Math.log(node.content.length)
|
74
|
+
elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
|
75
|
+
calculated_weight = 1
|
76
|
+
elsif node.element?
|
77
|
+
calculated_weight = node.children.inject(1) { |sum, child| sum += weight(child) }
|
78
|
+
else
|
79
|
+
raise ArgumentError, "weight expects an element, text, cdata or comment node, but received #{node.class}"
|
80
|
+
end
|
68
81
|
|
69
82
|
@weights[node] = calculated_weight
|
70
83
|
end
|
data/spec/unit/signature_spec.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
2
|
|
3
3
|
describe Lorax::Signature do
|
4
|
+
WHITESPACES = ["\n"," ","\t","\r","\f"]
|
5
|
+
|
4
6
|
def assert_node_signature_equal(node1, node2)
|
5
7
|
Lorax::Signature.new(node1).signature.should == Lorax::Signature.new(node2).signature
|
6
8
|
end
|
@@ -46,6 +48,18 @@ describe Lorax::Signature do
|
|
46
48
|
node_sig = Lorax::Signature.new(nodes.first)
|
47
49
|
doc_sig.nodes(node_sig.signature).should =~ nodes.to_a
|
48
50
|
end
|
51
|
+
|
52
|
+
it "returns the node if I pass nil" do
|
53
|
+
doc = xml { root {
|
54
|
+
a1 "hello1"
|
55
|
+
a1 "hello2"
|
56
|
+
a1 "hello3"
|
57
|
+
} }
|
58
|
+
nodes = doc.css("a1")
|
59
|
+
doc_sig = Lorax::Signature.new(doc.root)
|
60
|
+
node_sig = Lorax::Signature.new(nodes.first)
|
61
|
+
doc_sig.nodes(nil).should == [doc.root]
|
62
|
+
end
|
49
63
|
end
|
50
64
|
|
51
65
|
describe "#size" do
|
@@ -166,24 +180,46 @@ describe Lorax::Signature do
|
|
166
180
|
sig.signature(node)
|
167
181
|
end
|
168
182
|
|
169
|
-
context "
|
170
|
-
it "
|
183
|
+
context "passed a text Node" do
|
184
|
+
it "returns equal signatures for identical text nodes" do
|
171
185
|
doc = xml { root {
|
172
186
|
span "hello"
|
173
187
|
span "hello"
|
174
188
|
} }
|
175
189
|
assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
|
176
190
|
end
|
177
|
-
end
|
178
191
|
|
179
|
-
|
180
|
-
it "have inequal signatures" do
|
192
|
+
it "returns inequal signatures for different text nodes" do
|
181
193
|
doc = xml { root {
|
182
194
|
span "hello"
|
183
195
|
span "goodbye"
|
184
196
|
} }
|
185
197
|
assert_node_signature_not_equal(*doc.css("span").collect { |n| n.children.first })
|
186
198
|
end
|
199
|
+
|
200
|
+
it "ignores leading whitespace" do
|
201
|
+
doc = xml { root {
|
202
|
+
span "hello"
|
203
|
+
span "#{WHITESPACES.join}hello"
|
204
|
+
} }
|
205
|
+
assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
|
206
|
+
end
|
207
|
+
|
208
|
+
it "ignores trailing whitespace" do
|
209
|
+
doc = xml { root {
|
210
|
+
span "hello"
|
211
|
+
span "hello#{WHITESPACES.join}"
|
212
|
+
} }
|
213
|
+
assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
|
214
|
+
end
|
215
|
+
|
216
|
+
it "treats empty text nodes the same as no text node" do
|
217
|
+
doc = xml { root {
|
218
|
+
span WHITESPACES.join
|
219
|
+
span
|
220
|
+
} }
|
221
|
+
assert_node_signature_equal(*doc.css("span"))
|
222
|
+
end
|
187
223
|
end
|
188
224
|
|
189
225
|
context "elements with same name (with no attributes and no content)" do
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lorax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
|
-
-
|
8
|
+
- 2
|
8
9
|
- 0
|
9
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Mike Dalessio
|
@@ -17,34 +18,36 @@ cert_chain:
|
|
17
18
|
-----BEGIN CERTIFICATE-----
|
18
19
|
MIIDPDCCAiSgAwIBAgIBADANBgkqhkiG9w0BAQUFADBEMRYwFAYDVQQDDA1taWtl
|
19
20
|
LmRhbGVzc2lvMRUwEwYKCZImiZPyLGQBGRYFZ21haWwxEzARBgoJkiaJk/IsZAEZ
|
20
|
-
|
21
|
+
FgNjb20wHhcNMTAwOTMwMDYyNjQ3WhcNMTEwOTMwMDYyNjQ3WjBEMRYwFAYDVQQD
|
21
22
|
DA1taWtlLmRhbGVzc2lvMRUwEwYKCZImiZPyLGQBGRYFZ21haWwxEzARBgoJkiaJ
|
22
|
-
k/
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
23
|
+
k/IsZAEZFgNjb20wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDLv4nl
|
24
|
+
BGRtliYy5s5MhlFO88UvkkETFcS79OCaGFKorxPTmcfDrR2/2x0mAySXJ6I1uPEU
|
25
|
+
WSAWaPb1at61NEOvp5kRNzUNdwGakBA/fd1vZ1N2rwHRtjk/8t6DX8yiflr6T761
|
26
|
+
9ZMYPE+t85NvlPt0/WpT778imNZXwGQNcQJwNESDiBTgyjN8bOWpvRrVADVdOCme
|
27
|
+
DW3AfJnF/kdMYuSiUuFMZpyOlULEbOsrvOfUoEKjoFaVNv7FJ28/kLH1UgmtucOD
|
28
|
+
m5bZ/qy5b2+CWzzsmUfysaGnLQ4LjvAFpmgZGAjIE9TnyjU0jw+2e7dq8uRjdnFJ
|
29
|
+
gfWQlnJuwAlZXR1nAgMBAAGjOTA3MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0G
|
30
|
+
A1UdDgQWBBRbc4XnK6it228clp2DjyqaVjKW+DANBgkqhkiG9w0BAQUFAAOCAQEA
|
31
|
+
xPtSPtMl9qsgNGcnSDLSTjwGouwsjOB19IbtdODFTabUpRPCk7OFHeYGdJik4iiZ
|
32
|
+
fk10t3vzr6uWMAyOfwpWWFRnEYAvy9ZaMGDIZPKD8xWxaRTLwmi+pQsS8Lo2IpDC
|
33
|
+
Lb+l0lUiRiYS3/Ez7tA6pS122cvuQroWfuqh5Mi3pNAi1nuBTlhCNJuR5XUaOjqs
|
34
|
+
DAoZLfYEEW+4bmkAb6ky2TPUslaln56PO3/JG+IfWZwCvTFFVdKRBKXqLaAxO9rv
|
35
|
+
7nflCv7xpUSUGGZ6hoPG8dil+Mp/kKV8cb1kxZz+C8660hC93dJ3FQ3adX30ylvZ
|
36
|
+
C4THW+6HEQDCdOkiArif8A==
|
36
37
|
-----END CERTIFICATE-----
|
37
38
|
|
38
|
-
date: 2010-
|
39
|
+
date: 2010-10-14 00:00:00 -04:00
|
39
40
|
default_executable:
|
40
41
|
dependencies:
|
41
42
|
- !ruby/object:Gem::Dependency
|
42
43
|
name: nokogiri
|
43
44
|
prerelease: false
|
44
45
|
requirement: &id001 !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
45
47
|
requirements:
|
46
48
|
- - ">="
|
47
49
|
- !ruby/object:Gem::Version
|
50
|
+
hash: 7
|
48
51
|
segments:
|
49
52
|
- 1
|
50
53
|
- 4
|
@@ -56,72 +59,66 @@ dependencies:
|
|
56
59
|
name: rubyforge
|
57
60
|
prerelease: false
|
58
61
|
requirement: &id002 !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
59
63
|
requirements:
|
60
64
|
- - ">="
|
61
65
|
- !ruby/object:Gem::Version
|
66
|
+
hash: 7
|
62
67
|
segments:
|
63
68
|
- 2
|
64
69
|
- 0
|
65
|
-
-
|
66
|
-
version: 2.0.
|
70
|
+
- 4
|
71
|
+
version: 2.0.4
|
67
72
|
type: :development
|
68
73
|
version_requirements: *id002
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: gemcutter
|
71
|
-
prerelease: false
|
72
|
-
requirement: &id003 !ruby/object:Gem::Requirement
|
73
|
-
requirements:
|
74
|
-
- - ">="
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
segments:
|
77
|
-
- 0
|
78
|
-
- 3
|
79
|
-
- 0
|
80
|
-
version: 0.3.0
|
81
|
-
type: :development
|
82
|
-
version_requirements: *id003
|
83
74
|
- !ruby/object:Gem::Dependency
|
84
75
|
name: rspec
|
85
76
|
prerelease: false
|
86
|
-
requirement: &
|
77
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
87
79
|
requirements:
|
88
80
|
- - ">="
|
89
81
|
- !ruby/object:Gem::Version
|
82
|
+
hash: 13
|
90
83
|
segments:
|
91
84
|
- 1
|
92
85
|
- 2
|
93
86
|
- 9
|
94
87
|
version: 1.2.9
|
95
88
|
type: :development
|
96
|
-
version_requirements: *
|
89
|
+
version_requirements: *id003
|
97
90
|
- !ruby/object:Gem::Dependency
|
98
91
|
name: rr
|
99
92
|
prerelease: false
|
100
|
-
requirement: &
|
93
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
101
95
|
requirements:
|
102
96
|
- - ">="
|
103
97
|
- !ruby/object:Gem::Version
|
98
|
+
hash: 63
|
104
99
|
segments:
|
105
100
|
- 0
|
106
101
|
- 10
|
107
102
|
- 4
|
108
103
|
version: 0.10.4
|
109
104
|
type: :development
|
110
|
-
version_requirements: *
|
105
|
+
version_requirements: *id004
|
111
106
|
- !ruby/object:Gem::Dependency
|
112
107
|
name: hoe
|
113
108
|
prerelease: false
|
114
|
-
requirement: &
|
109
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
110
|
+
none: false
|
115
111
|
requirements:
|
116
112
|
- - ">="
|
117
113
|
- !ruby/object:Gem::Version
|
114
|
+
hash: 21
|
118
115
|
segments:
|
119
116
|
- 2
|
120
|
-
-
|
121
|
-
-
|
122
|
-
version: 2.
|
117
|
+
- 6
|
118
|
+
- 1
|
119
|
+
version: 2.6.1
|
123
120
|
type: :development
|
124
|
-
version_requirements: *
|
121
|
+
version_requirements: *id005
|
125
122
|
description: |-
|
126
123
|
The Lorax is a full diff and patch library for XML/HTML documents, based on Nokogiri.
|
127
124
|
|
@@ -133,6 +130,8 @@ description: |-
|
|
133
130
|
generates deltas in less than O(n * log n) time, accepting some
|
134
131
|
tradeoffs in the size of the delta set. You can find his paper at
|
135
132
|
http://gregory.cobena.free.fr/www/Publications/thesis.html.
|
133
|
+
|
134
|
+
"I am the Lorax, I speak for the trees."
|
136
135
|
email:
|
137
136
|
- mike.dalessio@gmail.com
|
138
137
|
executables:
|
@@ -192,23 +191,27 @@ rdoc_options:
|
|
192
191
|
require_paths:
|
193
192
|
- lib
|
194
193
|
required_ruby_version: !ruby/object:Gem::Requirement
|
194
|
+
none: false
|
195
195
|
requirements:
|
196
196
|
- - ">="
|
197
197
|
- !ruby/object:Gem::Version
|
198
|
+
hash: 3
|
198
199
|
segments:
|
199
200
|
- 0
|
200
201
|
version: "0"
|
201
202
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
203
|
+
none: false
|
202
204
|
requirements:
|
203
205
|
- - ">="
|
204
206
|
- !ruby/object:Gem::Version
|
207
|
+
hash: 3
|
205
208
|
segments:
|
206
209
|
- 0
|
207
210
|
version: "0"
|
208
211
|
requirements: []
|
209
212
|
|
210
213
|
rubyforge_project: lorax
|
211
|
-
rubygems_version: 1.3.
|
214
|
+
rubygems_version: 1.3.7
|
212
215
|
signing_key:
|
213
216
|
specification_version: 3
|
214
217
|
summary: The Lorax is a full diff and patch library for XML/HTML documents, based on Nokogiri
|
metadata.gz.sig
CHANGED
Binary file
|