html-dom-diff 0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c64fe7c87ae9884bf398b8c8699ee352deaa766ef92c078a1a8789badf0ad8ac
4
- data.tar.gz: cebe07112a9f6ec7b2e573d59e95037bbec4af6fc8937afac112124730fa9bbe
3
+ metadata.gz: 0d731a32d076bd3fc64436c74dff88fa653349a1dc35fd7dfe1598bb7210b001
4
+ data.tar.gz: 79e7466f13f022fd357dd4997d5f168aa6a68b56e9e4f78f494b645658d1fbb5
5
5
  SHA512:
6
- metadata.gz: 76f6659021fbdf482dcf640c9211fbd814a6c21cb92aba508ab24a20e6be79af5b9fd1dbe349a46dec00a198e8aeacb8ed74a5ec1e8362df1e8a16c4dc027c8a
7
- data.tar.gz: 97222b9c82eff526515ce3e2040478e0890202c3fb91d45678461ae58ebd331825109276b8dff33eec971032a00a5d8e6b21ea3ca7606becbff151890d2879b5
6
+ metadata.gz: ca5d8dcc3952fff8d1b4f03cad348b178f23b1b7c27de085117fce57eba4596fd8e05875f7fc84d9608cbb0c4d4c0ad7665d7d62bfdf1c513d336b0bfa7cdb2d
7
+ data.tar.gz: 1590f4c2fc8325396d07c2483d2f59f82348c4485132653ca966662a3e62456e9da93ebd687b65858365479b34313d26341dcb4e5edb2de325302e0a910afc3b
@@ -1,22 +1,55 @@
1
1
  module HTMLDOMDiff
2
2
  class DeltaTreeBuilder
3
3
  attr_reader :ldoc, :rdoc
4
- def initialize(ldoc, rdoc, weights, forward, backward)
4
+ def initialize(ldoc, rdoc)
5
5
  @ldoc = ldoc
6
6
  @rdoc = rdoc
7
- @weights = weights
8
- @forward = forward
9
- @backward = backward
7
+ @weights = {}
8
+ @forward = {}
9
+ @backward = {}
10
10
  end
11
11
 
12
- def build
12
+ def root
13
13
  wrap @rdoc
14
14
  end
15
15
 
16
+ def total_weight
17
+ @weights[ldoc].to_f + @weights[rdoc].to_f
18
+ end
19
+
20
+ def add_weight(element, weight)
21
+ @weights[element] = weight
22
+ end
23
+
24
+ def weight(element)
25
+ @weights[element]
26
+ end
27
+
28
+ def match(left, right)
29
+ @forward[left] = right
30
+ @backward[right] = left
31
+ end
32
+
33
+ def left_matches?(lnode, rnode)
34
+ @forward[lnode] == rnode
35
+ end
36
+
37
+ def left_match(lnode)
38
+ @forward[lnode]
39
+ end
40
+
41
+ def left_matched?(lnode)
42
+ @forward.has_key?(lnode)
43
+ end
44
+
45
+ def right_matched?(rnode)
46
+ @backward.has_key?(rnode)
47
+ end
48
+
16
49
  private
17
50
 
18
51
  def wrap(rnode, parent=nil)
19
- result = Node.new rnode, @backward[rnode], parent
52
+ result = Node.new rnode, @backward[rnode], @weights[rnode], parent
20
53
  rnode.children.each do |child|
21
54
  wrap child, result
22
55
  end
@@ -33,7 +66,7 @@ module HTMLDOMDiff
33
66
 
34
67
  def reverse_wrap(lnode, parent)
35
68
  return if @forward[lnode]
36
- result = Node.new nil, lnode
69
+ result = Node.new nil, lnode, @weights[lnode]
37
70
  lnode.children.each { |c| reverse_wrap c, result }
38
71
  parent.add_child result
39
72
  end
@@ -13,7 +13,7 @@ module HTMLDOMDiff
13
13
  end
14
14
 
15
15
  def diff(ldoc, rdoc)
16
- reset
16
+ reset ldoc, rdoc
17
17
 
18
18
  match_by_ids ldoc, rdoc
19
19
  prep_with @lsignatures, ldoc
@@ -27,11 +27,17 @@ module HTMLDOMDiff
27
27
  match_bottom_up ldoc
28
28
  match_top_down ldoc
29
29
 
30
- DeltaTreeBuilder.new(ldoc, rdoc, @weights, @forward, @backward).build
30
+ @builder
31
31
  end
32
32
 
33
33
  private
34
34
 
35
+ [:left_matches?, :left_match, :left_matched?, :right_matched?].each do |m|
36
+ define_method m do |*args|
37
+ @builder.send m, *args
38
+ end
39
+ end
40
+
35
41
  def parse(string)
36
42
  Nokogiri::HTML(string, nil, nil, (Nokogiri::XML::ParseOptions::DEFAULT_HTML & Nokogiri::XML::ParseOptions::NOBLANKS))
37
43
  end
@@ -40,14 +46,12 @@ module HTMLDOMDiff
40
46
  Nokogiri::HTML::DocumentFragment.parse(string)
41
47
  end
42
48
 
43
- def reset
44
- @forward = {}
45
- @backward = {}
46
- @weights = {}
49
+ def reset(ldoc, rdoc)
50
+ @builder = DeltaTreeBuilder.new(ldoc, rdoc)
47
51
  @depths = {}
48
52
  @lsignatures = {}
49
53
  @rsignatures = {}
50
- @matchqueue = PQueue.new() { |a, b| @weights[a] > @weights[b] }
54
+ @matchqueue = PQueue.new() { |a, b| @builder.weight(a) > @builder.weight(b) }
51
55
  end
52
56
 
53
57
  def match_by_ids(ldoc, rdoc)
@@ -70,11 +74,11 @@ module HTMLDOMDiff
70
74
  signatures << signature
71
75
  end
72
76
 
73
- @weights[element] = weights
77
+ @builder.add_weight(element, weights)
74
78
  sig_hash[element] = hash_for(signatures)
75
79
  @depths[element] = level
76
80
 
77
- [ @weights[element], sig_hash[element] ]
81
+ [ weights, sig_hash[element] ]
78
82
  end
79
83
 
80
84
  def weight_for(element)
@@ -98,8 +102,7 @@ module HTMLDOMDiff
98
102
  end
99
103
 
100
104
  def record_matching(left, right)
101
- @forward[left] = right
102
- @backward[right] = left
105
+ @builder.match(left, right)
103
106
  end
104
107
 
105
108
  def perform_initial_top_down_matching(lnodes, rnodes)
@@ -119,7 +122,7 @@ module HTMLDOMDiff
119
122
  def perform_initial_matching
120
123
  while @matchqueue.size > 0
121
124
  element = @matchqueue.pop
122
- if @backward[element].nil? && (match = find_best_match(element))
125
+ if !right_matched?(element) && (match = find_best_match(element))
123
126
  match_all_children match, element
124
127
  match_parents match, element
125
128
  else
@@ -131,7 +134,7 @@ module HTMLDOMDiff
131
134
  def find_best_match(element)
132
135
  candidates = []
133
136
  @lsignatures.each do |left, sig|
134
- if @forward[left].nil? && sig == @rsignatures[element]
137
+ if !left_matched?(left) && sig == @rsignatures[element]
135
138
  candidates << left
136
139
  end
137
140
  end
@@ -142,7 +145,7 @@ module HTMLDOMDiff
142
145
  return candidates.first
143
146
  else
144
147
  matching_parents = candidates.select do |left|
145
- @forward[left.parent] == element.parent
148
+ left_matches?(left.parent, element.parent)
146
149
  end
147
150
 
148
151
  if matching_parents.size == 1
@@ -162,9 +165,9 @@ module HTMLDOMDiff
162
165
 
163
166
  def match_parents(left, right)
164
167
  # TODO implement multi-ancestor matching
165
- return if @forward[left.parent] || @backward[right.parent]
168
+ return if left_matched?(left.parent) || right_matched?(right.parent)
166
169
  if left.parent.name == right.parent.name
167
- record_matching left, right
170
+ record_matching left.parent, right.parent
168
171
  end
169
172
  end
170
173
 
@@ -173,16 +176,17 @@ module HTMLDOMDiff
173
176
  match_bottom_up child
174
177
  end
175
178
 
176
- if element.respond_to?(:parent) && @forward[element.parent]
177
- match = @forward[element.parent].children.find { |c| @backward[c].nil? && c.name == element.name }
179
+ if !left_matched?(element) && element.respond_to?(:parent) && left_matched?(element.parent)
180
+ children = left_match(element.parent).children.reject { |c| right_matched?(c) }
181
+ match = children.find { |c| c.name == element.name }
178
182
  record_matching(element, match) if match
179
183
  end
180
184
  end
181
185
 
182
186
  def match_top_down(element)
183
- if @forward[element].nil?
184
- childmatches = element.children.map { |c| @forward[c] && @forward[c].parent }.compact.uniq
185
- childmatches.reject! { |e| @backward[e] }
187
+ unless left_matched?(element)
188
+ childmatches = element.children.select { |c| left_matched?(c) }.map { |c| left_match(c).parent }.uniq
189
+ childmatches.reject! { |e| right_matched?(e) }
186
190
  if childmatches.size == 1 && childmatches.first.name == element.name
187
191
  record_matching(element, childmatches.first)
188
192
  end
@@ -1,12 +1,13 @@
1
1
  module HTMLDOMDiff
2
2
  class Node
3
- attr_reader :parent, :children
3
+ attr_reader :parent, :children, :weight
4
4
 
5
5
  attr_reader :rnode
6
6
 
7
- def initialize(rnode, lnode, parent=nil)
7
+ def initialize(rnode, lnode, weight, parent=nil)
8
8
  @rnode = rnode
9
9
  @lnode = lnode
10
+ @weight = weight
10
11
  @parent = parent
11
12
  @children = []
12
13
  end
@@ -71,6 +72,7 @@ module HTMLDOMDiff
71
72
 
72
73
  # states
73
74
  def changed?
75
+ return false unless @rnode && @lnode
74
76
  if @rnode.text?
75
77
  text != original_text
76
78
  else
@@ -1,3 +1,3 @@
1
1
  module HTMLDOMDiff
2
- VERSION = "0.1"
2
+ VERSION = "0.1.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-dom-diff
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Frederik Fix
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-07 00:00:00.000000000 Z
11
+ date: 2018-04-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri