html-dom-diff 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c64fe7c87ae9884bf398b8c8699ee352deaa766ef92c078a1a8789badf0ad8ac
4
- data.tar.gz: cebe07112a9f6ec7b2e573d59e95037bbec4af6fc8937afac112124730fa9bbe
3
+ metadata.gz: 0d731a32d076bd3fc64436c74dff88fa653349a1dc35fd7dfe1598bb7210b001
4
+ data.tar.gz: 79e7466f13f022fd357dd4997d5f168aa6a68b56e9e4f78f494b645658d1fbb5
5
5
  SHA512:
6
- metadata.gz: 76f6659021fbdf482dcf640c9211fbd814a6c21cb92aba508ab24a20e6be79af5b9fd1dbe349a46dec00a198e8aeacb8ed74a5ec1e8362df1e8a16c4dc027c8a
7
- data.tar.gz: 97222b9c82eff526515ce3e2040478e0890202c3fb91d45678461ae58ebd331825109276b8dff33eec971032a00a5d8e6b21ea3ca7606becbff151890d2879b5
6
+ metadata.gz: ca5d8dcc3952fff8d1b4f03cad348b178f23b1b7c27de085117fce57eba4596fd8e05875f7fc84d9608cbb0c4d4c0ad7665d7d62bfdf1c513d336b0bfa7cdb2d
7
+ data.tar.gz: 1590f4c2fc8325396d07c2483d2f59f82348c4485132653ca966662a3e62456e9da93ebd687b65858365479b34313d26341dcb4e5edb2de325302e0a910afc3b
@@ -1,22 +1,55 @@
1
1
  module HTMLDOMDiff
2
2
  class DeltaTreeBuilder
3
3
  attr_reader :ldoc, :rdoc
4
- def initialize(ldoc, rdoc, weights, forward, backward)
4
+ def initialize(ldoc, rdoc)
5
5
  @ldoc = ldoc
6
6
  @rdoc = rdoc
7
- @weights = weights
8
- @forward = forward
9
- @backward = backward
7
+ @weights = {}
8
+ @forward = {}
9
+ @backward = {}
10
10
  end
11
11
 
12
- def build
12
+ def root
13
13
  wrap @rdoc
14
14
  end
15
15
 
16
+ def total_weight
17
+ @weights[ldoc].to_f + @weights[rdoc].to_f
18
+ end
19
+
20
+ def add_weight(element, weight)
21
+ @weights[element] = weight
22
+ end
23
+
24
+ def weight(element)
25
+ @weights[element]
26
+ end
27
+
28
+ def match(left, right)
29
+ @forward[left] = right
30
+ @backward[right] = left
31
+ end
32
+
33
+ def left_matches?(lnode, rnode)
34
+ @forward[lnode] == rnode
35
+ end
36
+
37
+ def left_match(lnode)
38
+ @forward[lnode]
39
+ end
40
+
41
+ def left_matched?(lnode)
42
+ @forward.has_key?(lnode)
43
+ end
44
+
45
+ def right_matched?(rnode)
46
+ @backward.has_key?(rnode)
47
+ end
48
+
16
49
  private
17
50
 
18
51
  def wrap(rnode, parent=nil)
19
- result = Node.new rnode, @backward[rnode], parent
52
+ result = Node.new rnode, @backward[rnode], @weights[rnode], parent
20
53
  rnode.children.each do |child|
21
54
  wrap child, result
22
55
  end
@@ -33,7 +66,7 @@ module HTMLDOMDiff
33
66
 
34
67
  def reverse_wrap(lnode, parent)
35
68
  return if @forward[lnode]
36
- result = Node.new nil, lnode
69
+ result = Node.new nil, lnode, @weights[lnode]
37
70
  lnode.children.each { |c| reverse_wrap c, result }
38
71
  parent.add_child result
39
72
  end
@@ -13,7 +13,7 @@ module HTMLDOMDiff
13
13
  end
14
14
 
15
15
  def diff(ldoc, rdoc)
16
- reset
16
+ reset ldoc, rdoc
17
17
 
18
18
  match_by_ids ldoc, rdoc
19
19
  prep_with @lsignatures, ldoc
@@ -27,11 +27,17 @@ module HTMLDOMDiff
27
27
  match_bottom_up ldoc
28
28
  match_top_down ldoc
29
29
 
30
- DeltaTreeBuilder.new(ldoc, rdoc, @weights, @forward, @backward).build
30
+ @builder
31
31
  end
32
32
 
33
33
  private
34
34
 
35
+ [:left_matches?, :left_match, :left_matched?, :right_matched?].each do |m|
36
+ define_method m do |*args|
37
+ @builder.send m, *args
38
+ end
39
+ end
40
+
35
41
  def parse(string)
36
42
  Nokogiri::HTML(string, nil, nil, (Nokogiri::XML::ParseOptions::DEFAULT_HTML & Nokogiri::XML::ParseOptions::NOBLANKS))
37
43
  end
@@ -40,14 +46,12 @@ module HTMLDOMDiff
40
46
  Nokogiri::HTML::DocumentFragment.parse(string)
41
47
  end
42
48
 
43
- def reset
44
- @forward = {}
45
- @backward = {}
46
- @weights = {}
49
+ def reset(ldoc, rdoc)
50
+ @builder = DeltaTreeBuilder.new(ldoc, rdoc)
47
51
  @depths = {}
48
52
  @lsignatures = {}
49
53
  @rsignatures = {}
50
- @matchqueue = PQueue.new() { |a, b| @weights[a] > @weights[b] }
54
+ @matchqueue = PQueue.new() { |a, b| @builder.weight(a) > @builder.weight(b) }
51
55
  end
52
56
 
53
57
  def match_by_ids(ldoc, rdoc)
@@ -70,11 +74,11 @@ module HTMLDOMDiff
70
74
  signatures << signature
71
75
  end
72
76
 
73
- @weights[element] = weights
77
+ @builder.add_weight(element, weights)
74
78
  sig_hash[element] = hash_for(signatures)
75
79
  @depths[element] = level
76
80
 
77
- [ @weights[element], sig_hash[element] ]
81
+ [ weights, sig_hash[element] ]
78
82
  end
79
83
 
80
84
  def weight_for(element)
@@ -98,8 +102,7 @@ module HTMLDOMDiff
98
102
  end
99
103
 
100
104
  def record_matching(left, right)
101
- @forward[left] = right
102
- @backward[right] = left
105
+ @builder.match(left, right)
103
106
  end
104
107
 
105
108
  def perform_initial_top_down_matching(lnodes, rnodes)
@@ -119,7 +122,7 @@ module HTMLDOMDiff
119
122
  def perform_initial_matching
120
123
  while @matchqueue.size > 0
121
124
  element = @matchqueue.pop
122
- if @backward[element].nil? && (match = find_best_match(element))
125
+ if !right_matched?(element) && (match = find_best_match(element))
123
126
  match_all_children match, element
124
127
  match_parents match, element
125
128
  else
@@ -131,7 +134,7 @@ module HTMLDOMDiff
131
134
  def find_best_match(element)
132
135
  candidates = []
133
136
  @lsignatures.each do |left, sig|
134
- if @forward[left].nil? && sig == @rsignatures[element]
137
+ if !left_matched?(left) && sig == @rsignatures[element]
135
138
  candidates << left
136
139
  end
137
140
  end
@@ -142,7 +145,7 @@ module HTMLDOMDiff
142
145
  return candidates.first
143
146
  else
144
147
  matching_parents = candidates.select do |left|
145
- @forward[left.parent] == element.parent
148
+ left_matches?(left.parent, element.parent)
146
149
  end
147
150
 
148
151
  if matching_parents.size == 1
@@ -162,9 +165,9 @@ module HTMLDOMDiff
162
165
 
163
166
  def match_parents(left, right)
164
167
  # TODO implement multi-ancestor matching
165
- return if @forward[left.parent] || @backward[right.parent]
168
+ return if left_matched?(left.parent) || right_matched?(right.parent)
166
169
  if left.parent.name == right.parent.name
167
- record_matching left, right
170
+ record_matching left.parent, right.parent
168
171
  end
169
172
  end
170
173
 
@@ -173,16 +176,17 @@ module HTMLDOMDiff
173
176
  match_bottom_up child
174
177
  end
175
178
 
176
- if element.respond_to?(:parent) && @forward[element.parent]
177
- match = @forward[element.parent].children.find { |c| @backward[c].nil? && c.name == element.name }
179
+ if !left_matched?(element) && element.respond_to?(:parent) && left_matched?(element.parent)
180
+ children = left_match(element.parent).children.reject { |c| right_matched?(c) }
181
+ match = children.find { |c| c.name == element.name }
178
182
  record_matching(element, match) if match
179
183
  end
180
184
  end
181
185
 
182
186
  def match_top_down(element)
183
- if @forward[element].nil?
184
- childmatches = element.children.map { |c| @forward[c] && @forward[c].parent }.compact.uniq
185
- childmatches.reject! { |e| @backward[e] }
187
+ unless left_matched?(element)
188
+ childmatches = element.children.select { |c| left_matched?(c) }.map { |c| left_match(c).parent }.uniq
189
+ childmatches.reject! { |e| right_matched?(e) }
186
190
  if childmatches.size == 1 && childmatches.first.name == element.name
187
191
  record_matching(element, childmatches.first)
188
192
  end
@@ -1,12 +1,13 @@
1
1
  module HTMLDOMDiff
2
2
  class Node
3
- attr_reader :parent, :children
3
+ attr_reader :parent, :children, :weight
4
4
 
5
5
  attr_reader :rnode
6
6
 
7
- def initialize(rnode, lnode, parent=nil)
7
+ def initialize(rnode, lnode, weight, parent=nil)
8
8
  @rnode = rnode
9
9
  @lnode = lnode
10
+ @weight = weight
10
11
  @parent = parent
11
12
  @children = []
12
13
  end
@@ -71,6 +72,7 @@ module HTMLDOMDiff
71
72
 
72
73
  # states
73
74
  def changed?
75
+ return false unless @rnode && @lnode
74
76
  if @rnode.text?
75
77
  text != original_text
76
78
  else
@@ -1,3 +1,3 @@
1
1
  module HTMLDOMDiff
2
- VERSION = "0.1"
2
+ VERSION = "0.1.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-dom-diff
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Frederik Fix
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-07 00:00:00.000000000 Z
11
+ date: 2018-04-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri