lorax 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data.tar.gz.sig CHANGED
Binary file
@@ -15,6 +15,8 @@ generates deltas in less than O(n * log n) time, accepting some
15
15
  tradeoffs in the size of the delta set. You can find his paper at
16
16
  http://gregory.cobena.free.fr/www/Publications/thesis.html.
17
17
 
18
+ "I am the Lorax, I speak for the trees."
19
+
18
20
  == Features / Problems
19
21
 
20
22
  * Detect differences between documents, or tell whether two documents are the same.
data/Rakefile CHANGED
@@ -5,6 +5,7 @@ gem 'hoe', '>= 2.5.0'
5
5
  require 'hoe'
6
6
 
7
7
  Hoe.plugin :git
8
+ Hoe.plugin :gemspec
8
9
 
9
10
  Hoe.spec 'lorax' do
10
11
  developer "Mike Dalessio", "mike.dalessio@gmail.com"
data/TODO CHANGED
@@ -1,13 +1,8 @@
1
1
  # -*-org-*-
2
- Diffaroo TODO
2
+ Lorax TODO
3
3
 
4
- * gem
5
- *** gemspec
6
- *** license
7
- *** gemcutter
8
4
  * docs
9
5
  *** rdocs
10
- *** readme
11
6
  *** class description notes
12
7
  - Signature: calculate and persist signatures and weights for nodes in a single document
13
8
  - Match: represents a match between two nodes
@@ -1,7 +1,7 @@
1
1
  require 'nokogiri'
2
2
 
3
3
  module Lorax
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  REQUIRED_NOKOGIRI_VERSION = "1.4.0"
6
6
  raise LoadError, "lorax requires Nokogiri version #{REQUIRED_NOKOGIRI_VERSION} or higher" unless Nokogiri::VERSION >= REQUIRED_NOKOGIRI_VERSION
7
7
  end
@@ -19,7 +19,7 @@ module Lorax
19
19
  end
20
20
 
21
21
  def nodes(sig=nil)
22
- sig ? @nodes[sig] : @node
22
+ sig ? @nodes[sig] : [@node]
23
23
  end
24
24
 
25
25
  def size
@@ -30,12 +30,19 @@ module Lorax
30
30
  return @signatures[node] if @signatures.key?(node)
31
31
  raise ArgumentError, "signature expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)
32
32
 
33
- if node.text? || node.cdata? || node.comment?
33
+ if node.text?
34
+ content = node.content.strip
35
+ if content.empty?
36
+ return nil
37
+ else
38
+ monogram = signature = hashify(content)
39
+ end
40
+ elsif node.cdata? || node.comment?
34
41
  monogram = signature = hashify(node.content)
35
42
  elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
36
43
  monogram = signature = hashify(node.to_html)
37
44
  elsif node.element?
38
- children_sig = hashify(node.children .collect { |child| signature(child) })
45
+ children_sig = hashify(node.children .collect { |child| signature(child) }.compact)
39
46
  attr_sig = hashify(node.attributes.sort.collect { |k,v| [k, v.value] }.flatten)
40
47
  monogram = hashify(node.name, attr_sig)
41
48
  signature = hashify(node.name, attr_sig, children_sig)
@@ -55,16 +62,22 @@ module Lorax
55
62
  return @weights[node] if @weights.key?(node)
56
63
  raise ArgumentError, "weight expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)
57
64
 
58
- calculated_weight = \
59
- if node.text? || node.cdata? || node.comment?
60
- 1 + Math.log(node.content.length)
61
- elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
62
- 1
63
- elsif node.element?
64
- node.children.inject(1) { |sum, child| sum += weight(child) }
65
- else
66
- raise ArgumentError, "weight expects an element, text, cdata or comment node, but received #{node.class}"
65
+ if node.text?
66
+ content = node.content.strip
67
+ if content.empty?
68
+ calculated_weight = 0
69
+ else
70
+ calculated_weight = 1 + Math.log(content.length)
67
71
  end
72
+ elsif node.cdata? || node.comment?
73
+ calculated_weight = 1 + Math.log(node.content.length)
74
+ elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
75
+ calculated_weight = 1
76
+ elsif node.element?
77
+ calculated_weight = node.children.inject(1) { |sum, child| sum += weight(child) }
78
+ else
79
+ raise ArgumentError, "weight expects an element, text, cdata or comment node, but received #{node.class}"
80
+ end
68
81
 
69
82
  @weights[node] = calculated_weight
70
83
  end
@@ -1,6 +1,8 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
2
 
3
3
  describe Lorax::Signature do
4
+ WHITESPACES = ["\n"," ","\t","\r","\f"]
5
+
4
6
  def assert_node_signature_equal(node1, node2)
5
7
  Lorax::Signature.new(node1).signature.should == Lorax::Signature.new(node2).signature
6
8
  end
@@ -46,6 +48,18 @@ describe Lorax::Signature do
46
48
  node_sig = Lorax::Signature.new(nodes.first)
47
49
  doc_sig.nodes(node_sig.signature).should =~ nodes.to_a
48
50
  end
51
+
52
+ it "returns the node if I pass nil" do
53
+ doc = xml { root {
54
+ a1 "hello1"
55
+ a1 "hello2"
56
+ a1 "hello3"
57
+ } }
58
+ nodes = doc.css("a1")
59
+ doc_sig = Lorax::Signature.new(doc.root)
60
+ node_sig = Lorax::Signature.new(nodes.first)
61
+ doc_sig.nodes(nil).should == [doc.root]
62
+ end
49
63
  end
50
64
 
51
65
  describe "#size" do
@@ -166,24 +180,46 @@ describe Lorax::Signature do
166
180
  sig.signature(node)
167
181
  end
168
182
 
169
- context "identical text nodes" do
170
- it "have equal signatures" do
183
+ context "passed a text Node" do
184
+ it "returns equal signatures for identical text nodes" do
171
185
  doc = xml { root {
172
186
  span "hello"
173
187
  span "hello"
174
188
  } }
175
189
  assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
176
190
  end
177
- end
178
191
 
179
- context "different text nodes" do
180
- it "have inequal signatures" do
192
+ it "returns inequal signatures for different text nodes" do
181
193
  doc = xml { root {
182
194
  span "hello"
183
195
  span "goodbye"
184
196
  } }
185
197
  assert_node_signature_not_equal(*doc.css("span").collect { |n| n.children.first })
186
198
  end
199
+
200
+ it "ignores leading whitespace" do
201
+ doc = xml { root {
202
+ span "hello"
203
+ span "#{WHITESPACES.join}hello"
204
+ } }
205
+ assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
206
+ end
207
+
208
+ it "ignores trailing whitespace" do
209
+ doc = xml { root {
210
+ span "hello"
211
+ span "hello#{WHITESPACES.join}"
212
+ } }
213
+ assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
214
+ end
215
+
216
+ it "treats empty text nodes the same as no text node" do
217
+ doc = xml { root {
218
+ span WHITESPACES.join
219
+ span
220
+ } }
221
+ assert_node_signature_equal(*doc.css("span"))
222
+ end
187
223
  end
188
224
 
189
225
  context "elements with same name (with no attributes and no content)" do
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lorax
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 23
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
- - 1
8
+ - 2
8
9
  - 0
9
- version: 0.1.0
10
+ version: 0.2.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - Mike Dalessio
@@ -17,34 +18,36 @@ cert_chain:
17
18
  -----BEGIN CERTIFICATE-----
18
19
  MIIDPDCCAiSgAwIBAgIBADANBgkqhkiG9w0BAQUFADBEMRYwFAYDVQQDDA1taWtl
19
20
  LmRhbGVzc2lvMRUwEwYKCZImiZPyLGQBGRYFZ21haWwxEzARBgoJkiaJk/IsZAEZ
20
- FgNjb20wHhcNMDkwODExMDU0MjQ5WhcNMTAwODExMDU0MjQ5WjBEMRYwFAYDVQQD
21
+ FgNjb20wHhcNMTAwOTMwMDYyNjQ3WhcNMTEwOTMwMDYyNjQ3WjBEMRYwFAYDVQQD
21
22
  DA1taWtlLmRhbGVzc2lvMRUwEwYKCZImiZPyLGQBGRYFZ21haWwxEzARBgoJkiaJ
22
- k/IsZAEZFgNjb20wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDANjr7
23
- lZ1DKtK8YvNp+5kBzIpwrpClHRrosqo01qmWfGBxZckQUtrJUwGPxpzvIHVq1VKp
24
- a9FXU/QWYek/1S0vhkOf9XGmFBnVCtbJhwGeyzsQFFSoQIfs2hd5gO0dSRpuKdi3
25
- slfJAXzFKg1u/7OCVPgrY/mkdh34MzL5p0gSDzPt7vLPibctHg0GoepYT5Fh1tMQ
26
- luzgrN0weTw/QoEWTMQcNk6CyUpzv0pOe7d0qEPQ9Lx7Lz64gIym3f0pKFpWLfME
27
- l7PFLeR95zw2zsuZQwCR5ma5zjXD3mo2jk1mVqiI8qplOL1u30FU7hRhTV5n/Qe9
28
- elDQoZW9Xz0R5JGDAgMBAAGjOTA3MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0G
29
- A1UdDgQWBBRXWlUJZXcR1jkZPE24+mjUTCqNxjANBgkqhkiG9w0BAQUFAAOCAQEA
30
- jDh5M41sg1MZKG1DXzQmo/IADeWRmXyb3EZaED9lhFFpoQqaralgpgmvuc0GswvO
31
- QIZijh03tPQz8lgp1U1OFZod2ZwbEVTtVZpxs1ssjMraOA6KzlsNROH0XonIiy6j
32
- r2Q0UF35ax8pvr3D5Y6AKzIW1F3aeiREylUDJlb/i1dPQ2PVK0yRrSQoK2epwM9E
33
- zoczlHTTJc/tRvH5Up3Agcv9y+J0U9a1Af9NRsnHPVBdo2H32MsJ99x5NRDWJmJg
34
- ohH37UR7njcc6j4fo22IwTqXaaXJdtVdAWjXP/xs5B3cPYSP6uqFnR46Jf86Iqj1
35
- FlqnTjy13J3nD30uxy9a1g==
23
+ k/IsZAEZFgNjb20wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDLv4nl
24
+ BGRtliYy5s5MhlFO88UvkkETFcS79OCaGFKorxPTmcfDrR2/2x0mAySXJ6I1uPEU
25
+ WSAWaPb1at61NEOvp5kRNzUNdwGakBA/fd1vZ1N2rwHRtjk/8t6DX8yiflr6T761
26
+ 9ZMYPE+t85NvlPt0/WpT778imNZXwGQNcQJwNESDiBTgyjN8bOWpvRrVADVdOCme
27
+ DW3AfJnF/kdMYuSiUuFMZpyOlULEbOsrvOfUoEKjoFaVNv7FJ28/kLH1UgmtucOD
28
+ m5bZ/qy5b2+CWzzsmUfysaGnLQ4LjvAFpmgZGAjIE9TnyjU0jw+2e7dq8uRjdnFJ
29
+ gfWQlnJuwAlZXR1nAgMBAAGjOTA3MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0G
30
+ A1UdDgQWBBRbc4XnK6it228clp2DjyqaVjKW+DANBgkqhkiG9w0BAQUFAAOCAQEA
31
+ xPtSPtMl9qsgNGcnSDLSTjwGouwsjOB19IbtdODFTabUpRPCk7OFHeYGdJik4iiZ
32
+ fk10t3vzr6uWMAyOfwpWWFRnEYAvy9ZaMGDIZPKD8xWxaRTLwmi+pQsS8Lo2IpDC
33
+ Lb+l0lUiRiYS3/Ez7tA6pS122cvuQroWfuqh5Mi3pNAi1nuBTlhCNJuR5XUaOjqs
34
+ DAoZLfYEEW+4bmkAb6ky2TPUslaln56PO3/JG+IfWZwCvTFFVdKRBKXqLaAxO9rv
35
+ 7nflCv7xpUSUGGZ6hoPG8dil+Mp/kKV8cb1kxZz+C8660hC93dJ3FQ3adX30ylvZ
36
+ C4THW+6HEQDCdOkiArif8A==
36
37
  -----END CERTIFICATE-----
37
38
 
38
- date: 2010-03-09 00:00:00 -05:00
39
+ date: 2010-10-14 00:00:00 -04:00
39
40
  default_executable:
40
41
  dependencies:
41
42
  - !ruby/object:Gem::Dependency
42
43
  name: nokogiri
43
44
  prerelease: false
44
45
  requirement: &id001 !ruby/object:Gem::Requirement
46
+ none: false
45
47
  requirements:
46
48
  - - ">="
47
49
  - !ruby/object:Gem::Version
50
+ hash: 7
48
51
  segments:
49
52
  - 1
50
53
  - 4
@@ -56,72 +59,66 @@ dependencies:
56
59
  name: rubyforge
57
60
  prerelease: false
58
61
  requirement: &id002 !ruby/object:Gem::Requirement
62
+ none: false
59
63
  requirements:
60
64
  - - ">="
61
65
  - !ruby/object:Gem::Version
66
+ hash: 7
62
67
  segments:
63
68
  - 2
64
69
  - 0
65
- - 3
66
- version: 2.0.3
70
+ - 4
71
+ version: 2.0.4
67
72
  type: :development
68
73
  version_requirements: *id002
69
- - !ruby/object:Gem::Dependency
70
- name: gemcutter
71
- prerelease: false
72
- requirement: &id003 !ruby/object:Gem::Requirement
73
- requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- segments:
77
- - 0
78
- - 3
79
- - 0
80
- version: 0.3.0
81
- type: :development
82
- version_requirements: *id003
83
74
  - !ruby/object:Gem::Dependency
84
75
  name: rspec
85
76
  prerelease: false
86
- requirement: &id004 !ruby/object:Gem::Requirement
77
+ requirement: &id003 !ruby/object:Gem::Requirement
78
+ none: false
87
79
  requirements:
88
80
  - - ">="
89
81
  - !ruby/object:Gem::Version
82
+ hash: 13
90
83
  segments:
91
84
  - 1
92
85
  - 2
93
86
  - 9
94
87
  version: 1.2.9
95
88
  type: :development
96
- version_requirements: *id004
89
+ version_requirements: *id003
97
90
  - !ruby/object:Gem::Dependency
98
91
  name: rr
99
92
  prerelease: false
100
- requirement: &id005 !ruby/object:Gem::Requirement
93
+ requirement: &id004 !ruby/object:Gem::Requirement
94
+ none: false
101
95
  requirements:
102
96
  - - ">="
103
97
  - !ruby/object:Gem::Version
98
+ hash: 63
104
99
  segments:
105
100
  - 0
106
101
  - 10
107
102
  - 4
108
103
  version: 0.10.4
109
104
  type: :development
110
- version_requirements: *id005
105
+ version_requirements: *id004
111
106
  - !ruby/object:Gem::Dependency
112
107
  name: hoe
113
108
  prerelease: false
114
- requirement: &id006 !ruby/object:Gem::Requirement
109
+ requirement: &id005 !ruby/object:Gem::Requirement
110
+ none: false
115
111
  requirements:
116
112
  - - ">="
117
113
  - !ruby/object:Gem::Version
114
+ hash: 21
118
115
  segments:
119
116
  - 2
120
- - 5
121
- - 0
122
- version: 2.5.0
117
+ - 6
118
+ - 1
119
+ version: 2.6.1
123
120
  type: :development
124
- version_requirements: *id006
121
+ version_requirements: *id005
125
122
  description: |-
126
123
  The Lorax is a full diff and patch library for XML/HTML documents, based on Nokogiri.
127
124
 
@@ -133,6 +130,8 @@ description: |-
133
130
  generates deltas in less than O(n * log n) time, accepting some
134
131
  tradeoffs in the size of the delta set. You can find his paper at
135
132
  http://gregory.cobena.free.fr/www/Publications/thesis.html.
133
+
134
+ "I am the Lorax, I speak for the trees."
136
135
  email:
137
136
  - mike.dalessio@gmail.com
138
137
  executables:
@@ -192,23 +191,27 @@ rdoc_options:
192
191
  require_paths:
193
192
  - lib
194
193
  required_ruby_version: !ruby/object:Gem::Requirement
194
+ none: false
195
195
  requirements:
196
196
  - - ">="
197
197
  - !ruby/object:Gem::Version
198
+ hash: 3
198
199
  segments:
199
200
  - 0
200
201
  version: "0"
201
202
  required_rubygems_version: !ruby/object:Gem::Requirement
203
+ none: false
202
204
  requirements:
203
205
  - - ">="
204
206
  - !ruby/object:Gem::Version
207
+ hash: 3
205
208
  segments:
206
209
  - 0
207
210
  version: "0"
208
211
  requirements: []
209
212
 
210
213
  rubyforge_project: lorax
211
- rubygems_version: 1.3.6
214
+ rubygems_version: 1.3.7
212
215
  signing_key:
213
216
  specification_version: 3
214
217
  summary: The Lorax is a full diff and patch library for XML/HTML documents, based on Nokogiri
metadata.gz.sig CHANGED
Binary file