lorax 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig CHANGED
Binary file
@@ -15,6 +15,8 @@ generates deltas in less than O(n * log n) time, accepting some
15
15
  tradeoffs in the size of the delta set. You can find his paper at
16
16
  http://gregory.cobena.free.fr/www/Publications/thesis.html.
17
17
 
18
+ "I am the Lorax, I speak for the trees."
19
+
18
20
  == Features / Problems
19
21
 
20
22
  * Detect differences between documents, or tell whether two documents are the same.
data/Rakefile CHANGED
@@ -5,6 +5,7 @@ gem 'hoe', '>= 2.5.0'
5
5
  require 'hoe'
6
6
 
7
7
  Hoe.plugin :git
8
+ Hoe.plugin :gemspec
8
9
 
9
10
  Hoe.spec 'lorax' do
10
11
  developer "Mike Dalessio", "mike.dalessio@gmail.com"
data/TODO CHANGED
@@ -1,13 +1,8 @@
1
1
  # -*-org-*-
2
- Diffaroo TODO
2
+ Lorax TODO
3
3
 
4
- * gem
5
- *** gemspec
6
- *** license
7
- *** gemcutter
8
4
  * docs
9
5
  *** rdocs
10
- *** readme
11
6
  *** class description notes
12
7
  - Signature: calculate and persist signatures and weights for nodes in a single document
13
8
  - Match: represents a match between two nodes
@@ -1,7 +1,7 @@
1
1
  require 'nokogiri'
2
2
 
3
3
  module Lorax
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  REQUIRED_NOKOGIRI_VERSION = "1.4.0"
6
6
  raise LoadError, "lorax requires Nokogiri version #{REQUIRED_NOKOGIRI_VERSION} or higher" unless Nokogiri::VERSION >= REQUIRED_NOKOGIRI_VERSION
7
7
  end
@@ -19,7 +19,7 @@ module Lorax
19
19
  end
20
20
 
21
21
  def nodes(sig=nil)
22
- sig ? @nodes[sig] : @node
22
+ sig ? @nodes[sig] : [@node]
23
23
  end
24
24
 
25
25
  def size
@@ -30,12 +30,19 @@ module Lorax
30
30
  return @signatures[node] if @signatures.key?(node)
31
31
  raise ArgumentError, "signature expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)
32
32
 
33
- if node.text? || node.cdata? || node.comment?
33
+ if node.text?
34
+ content = node.content.strip
35
+ if content.empty?
36
+ return nil
37
+ else
38
+ monogram = signature = hashify(content)
39
+ end
40
+ elsif node.cdata? || node.comment?
34
41
  monogram = signature = hashify(node.content)
35
42
  elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
36
43
  monogram = signature = hashify(node.to_html)
37
44
  elsif node.element?
38
- children_sig = hashify(node.children .collect { |child| signature(child) })
45
+ children_sig = hashify(node.children .collect { |child| signature(child) }.compact)
39
46
  attr_sig = hashify(node.attributes.sort.collect { |k,v| [k, v.value] }.flatten)
40
47
  monogram = hashify(node.name, attr_sig)
41
48
  signature = hashify(node.name, attr_sig, children_sig)
@@ -55,16 +62,22 @@ module Lorax
55
62
  return @weights[node] if @weights.key?(node)
56
63
  raise ArgumentError, "weight expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)
57
64
 
58
- calculated_weight = \
59
- if node.text? || node.cdata? || node.comment?
60
- 1 + Math.log(node.content.length)
61
- elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
62
- 1
63
- elsif node.element?
64
- node.children.inject(1) { |sum, child| sum += weight(child) }
65
- else
66
- raise ArgumentError, "weight expects an element, text, cdata or comment node, but received #{node.class}"
65
+ if node.text?
66
+ content = node.content.strip
67
+ if content.empty?
68
+ calculated_weight = 0
69
+ else
70
+ calculated_weight = 1 + Math.log(content.length)
67
71
  end
72
+ elsif node.cdata? || node.comment?
73
+ calculated_weight = 1 + Math.log(node.content.length)
74
+ elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
75
+ calculated_weight = 1
76
+ elsif node.element?
77
+ calculated_weight = node.children.inject(1) { |sum, child| sum += weight(child) }
78
+ else
79
+ raise ArgumentError, "weight expects an element, text, cdata or comment node, but received #{node.class}"
80
+ end
68
81
 
69
82
  @weights[node] = calculated_weight
70
83
  end
@@ -1,6 +1,8 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
2
 
3
3
  describe Lorax::Signature do
4
+ WHITESPACES = ["\n"," ","\t","\r","\f"]
5
+
4
6
  def assert_node_signature_equal(node1, node2)
5
7
  Lorax::Signature.new(node1).signature.should == Lorax::Signature.new(node2).signature
6
8
  end
@@ -46,6 +48,18 @@ describe Lorax::Signature do
46
48
  node_sig = Lorax::Signature.new(nodes.first)
47
49
  doc_sig.nodes(node_sig.signature).should =~ nodes.to_a
48
50
  end
51
+
52
+ it "returns the node if I pass nil" do
53
+ doc = xml { root {
54
+ a1 "hello1"
55
+ a1 "hello2"
56
+ a1 "hello3"
57
+ } }
58
+ nodes = doc.css("a1")
59
+ doc_sig = Lorax::Signature.new(doc.root)
60
+ node_sig = Lorax::Signature.new(nodes.first)
61
+ doc_sig.nodes(nil).should == [doc.root]
62
+ end
49
63
  end
50
64
 
51
65
  describe "#size" do
@@ -166,24 +180,46 @@ describe Lorax::Signature do
166
180
  sig.signature(node)
167
181
  end
168
182
 
169
- context "identical text nodes" do
170
- it "have equal signatures" do
183
+ context "passed a text Node" do
184
+ it "returns equal signatures for identical text nodes" do
171
185
  doc = xml { root {
172
186
  span "hello"
173
187
  span "hello"
174
188
  } }
175
189
  assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
176
190
  end
177
- end
178
191
 
179
- context "different text nodes" do
180
- it "have inequal signatures" do
192
+ it "returns inequal signatures for different text nodes" do
181
193
  doc = xml { root {
182
194
  span "hello"
183
195
  span "goodbye"
184
196
  } }
185
197
  assert_node_signature_not_equal(*doc.css("span").collect { |n| n.children.first })
186
198
  end
199
+
200
+ it "ignores leading whitespace" do
201
+ doc = xml { root {
202
+ span "hello"
203
+ span "#{WHITESPACES.join}hello"
204
+ } }
205
+ assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
206
+ end
207
+
208
+ it "ignores trailing whitespace" do
209
+ doc = xml { root {
210
+ span "hello"
211
+ span "hello#{WHITESPACES.join}"
212
+ } }
213
+ assert_node_signature_equal(*doc.css("span").collect { |n| n.children.first })
214
+ end
215
+
216
+ it "treats empty text nodes the same as no text node" do
217
+ doc = xml { root {
218
+ span WHITESPACES.join
219
+ span
220
+ } }
221
+ assert_node_signature_equal(*doc.css("span"))
222
+ end
187
223
  end
188
224
 
189
225
  context "elements with same name (with no attributes and no content)" do
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lorax
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 23
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
- - 1
8
+ - 2
8
9
  - 0
9
- version: 0.1.0
10
+ version: 0.2.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - Mike Dalessio
@@ -17,34 +18,36 @@ cert_chain:
17
18
  -----BEGIN CERTIFICATE-----
18
19
  MIIDPDCCAiSgAwIBAgIBADANBgkqhkiG9w0BAQUFADBEMRYwFAYDVQQDDA1taWtl
19
20
  LmRhbGVzc2lvMRUwEwYKCZImiZPyLGQBGRYFZ21haWwxEzARBgoJkiaJk/IsZAEZ
20
- FgNjb20wHhcNMDkwODExMDU0MjQ5WhcNMTAwODExMDU0MjQ5WjBEMRYwFAYDVQQD
21
+ FgNjb20wHhcNMTAwOTMwMDYyNjQ3WhcNMTEwOTMwMDYyNjQ3WjBEMRYwFAYDVQQD
21
22
  DA1taWtlLmRhbGVzc2lvMRUwEwYKCZImiZPyLGQBGRYFZ21haWwxEzARBgoJkiaJ
22
- k/IsZAEZFgNjb20wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDANjr7
23
- lZ1DKtK8YvNp+5kBzIpwrpClHRrosqo01qmWfGBxZckQUtrJUwGPxpzvIHVq1VKp
24
- a9FXU/QWYek/1S0vhkOf9XGmFBnVCtbJhwGeyzsQFFSoQIfs2hd5gO0dSRpuKdi3
25
- slfJAXzFKg1u/7OCVPgrY/mkdh34MzL5p0gSDzPt7vLPibctHg0GoepYT5Fh1tMQ
26
- luzgrN0weTw/QoEWTMQcNk6CyUpzv0pOe7d0qEPQ9Lx7Lz64gIym3f0pKFpWLfME
27
- l7PFLeR95zw2zsuZQwCR5ma5zjXD3mo2jk1mVqiI8qplOL1u30FU7hRhTV5n/Qe9
28
- elDQoZW9Xz0R5JGDAgMBAAGjOTA3MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0G
29
- A1UdDgQWBBRXWlUJZXcR1jkZPE24+mjUTCqNxjANBgkqhkiG9w0BAQUFAAOCAQEA
30
- jDh5M41sg1MZKG1DXzQmo/IADeWRmXyb3EZaED9lhFFpoQqaralgpgmvuc0GswvO
31
- QIZijh03tPQz8lgp1U1OFZod2ZwbEVTtVZpxs1ssjMraOA6KzlsNROH0XonIiy6j
32
- r2Q0UF35ax8pvr3D5Y6AKzIW1F3aeiREylUDJlb/i1dPQ2PVK0yRrSQoK2epwM9E
33
- zoczlHTTJc/tRvH5Up3Agcv9y+J0U9a1Af9NRsnHPVBdo2H32MsJ99x5NRDWJmJg
34
- ohH37UR7njcc6j4fo22IwTqXaaXJdtVdAWjXP/xs5B3cPYSP6uqFnR46Jf86Iqj1
35
- FlqnTjy13J3nD30uxy9a1g==
23
+ k/IsZAEZFgNjb20wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDLv4nl
24
+ BGRtliYy5s5MhlFO88UvkkETFcS79OCaGFKorxPTmcfDrR2/2x0mAySXJ6I1uPEU
25
+ WSAWaPb1at61NEOvp5kRNzUNdwGakBA/fd1vZ1N2rwHRtjk/8t6DX8yiflr6T761
26
+ 9ZMYPE+t85NvlPt0/WpT778imNZXwGQNcQJwNESDiBTgyjN8bOWpvRrVADVdOCme
27
+ DW3AfJnF/kdMYuSiUuFMZpyOlULEbOsrvOfUoEKjoFaVNv7FJ28/kLH1UgmtucOD
28
+ m5bZ/qy5b2+CWzzsmUfysaGnLQ4LjvAFpmgZGAjIE9TnyjU0jw+2e7dq8uRjdnFJ
29
+ gfWQlnJuwAlZXR1nAgMBAAGjOTA3MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0G
30
+ A1UdDgQWBBRbc4XnK6it228clp2DjyqaVjKW+DANBgkqhkiG9w0BAQUFAAOCAQEA
31
+ xPtSPtMl9qsgNGcnSDLSTjwGouwsjOB19IbtdODFTabUpRPCk7OFHeYGdJik4iiZ
32
+ fk10t3vzr6uWMAyOfwpWWFRnEYAvy9ZaMGDIZPKD8xWxaRTLwmi+pQsS8Lo2IpDC
33
+ Lb+l0lUiRiYS3/Ez7tA6pS122cvuQroWfuqh5Mi3pNAi1nuBTlhCNJuR5XUaOjqs
34
+ DAoZLfYEEW+4bmkAb6ky2TPUslaln56PO3/JG+IfWZwCvTFFVdKRBKXqLaAxO9rv
35
+ 7nflCv7xpUSUGGZ6hoPG8dil+Mp/kKV8cb1kxZz+C8660hC93dJ3FQ3adX30ylvZ
36
+ C4THW+6HEQDCdOkiArif8A==
36
37
  -----END CERTIFICATE-----
37
38
 
38
- date: 2010-03-09 00:00:00 -05:00
39
+ date: 2010-10-14 00:00:00 -04:00
39
40
  default_executable:
40
41
  dependencies:
41
42
  - !ruby/object:Gem::Dependency
42
43
  name: nokogiri
43
44
  prerelease: false
44
45
  requirement: &id001 !ruby/object:Gem::Requirement
46
+ none: false
45
47
  requirements:
46
48
  - - ">="
47
49
  - !ruby/object:Gem::Version
50
+ hash: 7
48
51
  segments:
49
52
  - 1
50
53
  - 4
@@ -56,72 +59,66 @@ dependencies:
56
59
  name: rubyforge
57
60
  prerelease: false
58
61
  requirement: &id002 !ruby/object:Gem::Requirement
62
+ none: false
59
63
  requirements:
60
64
  - - ">="
61
65
  - !ruby/object:Gem::Version
66
+ hash: 7
62
67
  segments:
63
68
  - 2
64
69
  - 0
65
- - 3
66
- version: 2.0.3
70
+ - 4
71
+ version: 2.0.4
67
72
  type: :development
68
73
  version_requirements: *id002
69
- - !ruby/object:Gem::Dependency
70
- name: gemcutter
71
- prerelease: false
72
- requirement: &id003 !ruby/object:Gem::Requirement
73
- requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- segments:
77
- - 0
78
- - 3
79
- - 0
80
- version: 0.3.0
81
- type: :development
82
- version_requirements: *id003
83
74
  - !ruby/object:Gem::Dependency
84
75
  name: rspec
85
76
  prerelease: false
86
- requirement: &id004 !ruby/object:Gem::Requirement
77
+ requirement: &id003 !ruby/object:Gem::Requirement
78
+ none: false
87
79
  requirements:
88
80
  - - ">="
89
81
  - !ruby/object:Gem::Version
82
+ hash: 13
90
83
  segments:
91
84
  - 1
92
85
  - 2
93
86
  - 9
94
87
  version: 1.2.9
95
88
  type: :development
96
- version_requirements: *id004
89
+ version_requirements: *id003
97
90
  - !ruby/object:Gem::Dependency
98
91
  name: rr
99
92
  prerelease: false
100
- requirement: &id005 !ruby/object:Gem::Requirement
93
+ requirement: &id004 !ruby/object:Gem::Requirement
94
+ none: false
101
95
  requirements:
102
96
  - - ">="
103
97
  - !ruby/object:Gem::Version
98
+ hash: 63
104
99
  segments:
105
100
  - 0
106
101
  - 10
107
102
  - 4
108
103
  version: 0.10.4
109
104
  type: :development
110
- version_requirements: *id005
105
+ version_requirements: *id004
111
106
  - !ruby/object:Gem::Dependency
112
107
  name: hoe
113
108
  prerelease: false
114
- requirement: &id006 !ruby/object:Gem::Requirement
109
+ requirement: &id005 !ruby/object:Gem::Requirement
110
+ none: false
115
111
  requirements:
116
112
  - - ">="
117
113
  - !ruby/object:Gem::Version
114
+ hash: 21
118
115
  segments:
119
116
  - 2
120
- - 5
121
- - 0
122
- version: 2.5.0
117
+ - 6
118
+ - 1
119
+ version: 2.6.1
123
120
  type: :development
124
- version_requirements: *id006
121
+ version_requirements: *id005
125
122
  description: |-
126
123
  The Lorax is a full diff and patch library for XML/HTML documents, based on Nokogiri.
127
124
 
@@ -133,6 +130,8 @@ description: |-
133
130
  generates deltas in less than O(n * log n) time, accepting some
134
131
  tradeoffs in the size of the delta set. You can find his paper at
135
132
  http://gregory.cobena.free.fr/www/Publications/thesis.html.
133
+
134
+ "I am the Lorax, I speak for the trees."
136
135
  email:
137
136
  - mike.dalessio@gmail.com
138
137
  executables:
@@ -192,23 +191,27 @@ rdoc_options:
192
191
  require_paths:
193
192
  - lib
194
193
  required_ruby_version: !ruby/object:Gem::Requirement
194
+ none: false
195
195
  requirements:
196
196
  - - ">="
197
197
  - !ruby/object:Gem::Version
198
+ hash: 3
198
199
  segments:
199
200
  - 0
200
201
  version: "0"
201
202
  required_rubygems_version: !ruby/object:Gem::Requirement
203
+ none: false
202
204
  requirements:
203
205
  - - ">="
204
206
  - !ruby/object:Gem::Version
207
+ hash: 3
205
208
  segments:
206
209
  - 0
207
210
  version: "0"
208
211
  requirements: []
209
212
 
210
213
  rubyforge_project: lorax
211
- rubygems_version: 1.3.6
214
+ rubygems_version: 1.3.7
212
215
  signing_key:
213
216
  specification_version: 3
214
217
  summary: The Lorax is a full diff and patch library for XML/HTML documents, based on Nokogiri
metadata.gz.sig CHANGED
Binary file