html-dom-diff 0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/html-dom-diff/delta_tree_builder.rb +40 -7
- data/lib/html-dom-diff/differ.rb +25 -21
- data/lib/html-dom-diff/node.rb +4 -2
- data/lib/html-dom-diff/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d731a32d076bd3fc64436c74dff88fa653349a1dc35fd7dfe1598bb7210b001
|
4
|
+
data.tar.gz: 79e7466f13f022fd357dd4997d5f168aa6a68b56e9e4f78f494b645658d1fbb5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca5d8dcc3952fff8d1b4f03cad348b178f23b1b7c27de085117fce57eba4596fd8e05875f7fc84d9608cbb0c4d4c0ad7665d7d62bfdf1c513d336b0bfa7cdb2d
|
7
|
+
data.tar.gz: 1590f4c2fc8325396d07c2483d2f59f82348c4485132653ca966662a3e62456e9da93ebd687b65858365479b34313d26341dcb4e5edb2de325302e0a910afc3b
|
@@ -1,22 +1,55 @@
|
|
1
1
|
module HTMLDOMDiff
|
2
2
|
class DeltaTreeBuilder
|
3
3
|
attr_reader :ldoc, :rdoc
|
4
|
-
def initialize(ldoc, rdoc
|
4
|
+
def initialize(ldoc, rdoc)
|
5
5
|
@ldoc = ldoc
|
6
6
|
@rdoc = rdoc
|
7
|
-
@weights =
|
8
|
-
@forward =
|
9
|
-
@backward =
|
7
|
+
@weights = {}
|
8
|
+
@forward = {}
|
9
|
+
@backward = {}
|
10
10
|
end
|
11
11
|
|
12
|
-
def
|
12
|
+
def root
|
13
13
|
wrap @rdoc
|
14
14
|
end
|
15
15
|
|
16
|
+
def total_weight
|
17
|
+
@weights[ldoc].to_f + @weights[rdoc].to_f
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_weight(element, weight)
|
21
|
+
@weights[element] = weight
|
22
|
+
end
|
23
|
+
|
24
|
+
def weight(element)
|
25
|
+
@weights[element]
|
26
|
+
end
|
27
|
+
|
28
|
+
def match(left, right)
|
29
|
+
@forward[left] = right
|
30
|
+
@backward[right] = left
|
31
|
+
end
|
32
|
+
|
33
|
+
def left_matches?(lnode, rnode)
|
34
|
+
@forward[lnode] == rnode
|
35
|
+
end
|
36
|
+
|
37
|
+
def left_match(lnode)
|
38
|
+
@forward[lnode]
|
39
|
+
end
|
40
|
+
|
41
|
+
def left_matched?(lnode)
|
42
|
+
@forward.has_key?(lnode)
|
43
|
+
end
|
44
|
+
|
45
|
+
def right_matched?(rnode)
|
46
|
+
@backward.has_key?(rnode)
|
47
|
+
end
|
48
|
+
|
16
49
|
private
|
17
50
|
|
18
51
|
def wrap(rnode, parent=nil)
|
19
|
-
result = Node.new rnode, @backward[rnode], parent
|
52
|
+
result = Node.new rnode, @backward[rnode], @weights[rnode], parent
|
20
53
|
rnode.children.each do |child|
|
21
54
|
wrap child, result
|
22
55
|
end
|
@@ -33,7 +66,7 @@ module HTMLDOMDiff
|
|
33
66
|
|
34
67
|
def reverse_wrap(lnode, parent)
|
35
68
|
return if @forward[lnode]
|
36
|
-
result = Node.new nil, lnode
|
69
|
+
result = Node.new nil, lnode, @weights[lnode]
|
37
70
|
lnode.children.each { |c| reverse_wrap c, result }
|
38
71
|
parent.add_child result
|
39
72
|
end
|
data/lib/html-dom-diff/differ.rb
CHANGED
@@ -13,7 +13,7 @@ module HTMLDOMDiff
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def diff(ldoc, rdoc)
|
16
|
-
reset
|
16
|
+
reset ldoc, rdoc
|
17
17
|
|
18
18
|
match_by_ids ldoc, rdoc
|
19
19
|
prep_with @lsignatures, ldoc
|
@@ -27,11 +27,17 @@ module HTMLDOMDiff
|
|
27
27
|
match_bottom_up ldoc
|
28
28
|
match_top_down ldoc
|
29
29
|
|
30
|
-
|
30
|
+
@builder
|
31
31
|
end
|
32
32
|
|
33
33
|
private
|
34
34
|
|
35
|
+
[:left_matches?, :left_match, :left_matched?, :right_matched?].each do |m|
|
36
|
+
define_method m do |*args|
|
37
|
+
@builder.send m, *args
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
35
41
|
def parse(string)
|
36
42
|
Nokogiri::HTML(string, nil, nil, (Nokogiri::XML::ParseOptions::DEFAULT_HTML & Nokogiri::XML::ParseOptions::NOBLANKS))
|
37
43
|
end
|
@@ -40,14 +46,12 @@ module HTMLDOMDiff
|
|
40
46
|
Nokogiri::HTML::DocumentFragment.parse(string)
|
41
47
|
end
|
42
48
|
|
43
|
-
def reset
|
44
|
-
@
|
45
|
-
@backward = {}
|
46
|
-
@weights = {}
|
49
|
+
def reset(ldoc, rdoc)
|
50
|
+
@builder = DeltaTreeBuilder.new(ldoc, rdoc)
|
47
51
|
@depths = {}
|
48
52
|
@lsignatures = {}
|
49
53
|
@rsignatures = {}
|
50
|
-
@matchqueue = PQueue.new() { |a, b| @
|
54
|
+
@matchqueue = PQueue.new() { |a, b| @builder.weight(a) > @builder.weight(b) }
|
51
55
|
end
|
52
56
|
|
53
57
|
def match_by_ids(ldoc, rdoc)
|
@@ -70,11 +74,11 @@ module HTMLDOMDiff
|
|
70
74
|
signatures << signature
|
71
75
|
end
|
72
76
|
|
73
|
-
@
|
77
|
+
@builder.add_weight(element, weights)
|
74
78
|
sig_hash[element] = hash_for(signatures)
|
75
79
|
@depths[element] = level
|
76
80
|
|
77
|
-
[
|
81
|
+
[ weights, sig_hash[element] ]
|
78
82
|
end
|
79
83
|
|
80
84
|
def weight_for(element)
|
@@ -98,8 +102,7 @@ module HTMLDOMDiff
|
|
98
102
|
end
|
99
103
|
|
100
104
|
def record_matching(left, right)
|
101
|
-
@
|
102
|
-
@backward[right] = left
|
105
|
+
@builder.match(left, right)
|
103
106
|
end
|
104
107
|
|
105
108
|
def perform_initial_top_down_matching(lnodes, rnodes)
|
@@ -119,7 +122,7 @@ module HTMLDOMDiff
|
|
119
122
|
def perform_initial_matching
|
120
123
|
while @matchqueue.size > 0
|
121
124
|
element = @matchqueue.pop
|
122
|
-
if
|
125
|
+
if !right_matched?(element) && (match = find_best_match(element))
|
123
126
|
match_all_children match, element
|
124
127
|
match_parents match, element
|
125
128
|
else
|
@@ -131,7 +134,7 @@ module HTMLDOMDiff
|
|
131
134
|
def find_best_match(element)
|
132
135
|
candidates = []
|
133
136
|
@lsignatures.each do |left, sig|
|
134
|
-
if
|
137
|
+
if !left_matched?(left) && sig == @rsignatures[element]
|
135
138
|
candidates << left
|
136
139
|
end
|
137
140
|
end
|
@@ -142,7 +145,7 @@ module HTMLDOMDiff
|
|
142
145
|
return candidates.first
|
143
146
|
else
|
144
147
|
matching_parents = candidates.select do |left|
|
145
|
-
|
148
|
+
left_matches?(left.parent, element.parent)
|
146
149
|
end
|
147
150
|
|
148
151
|
if matching_parents.size == 1
|
@@ -162,9 +165,9 @@ module HTMLDOMDiff
|
|
162
165
|
|
163
166
|
def match_parents(left, right)
|
164
167
|
# TODO implement multi-ancestor matching
|
165
|
-
return if
|
168
|
+
return if left_matched?(left.parent) || right_matched?(right.parent)
|
166
169
|
if left.parent.name == right.parent.name
|
167
|
-
record_matching left, right
|
170
|
+
record_matching left.parent, right.parent
|
168
171
|
end
|
169
172
|
end
|
170
173
|
|
@@ -173,16 +176,17 @@ module HTMLDOMDiff
|
|
173
176
|
match_bottom_up child
|
174
177
|
end
|
175
178
|
|
176
|
-
if element.respond_to?(:parent) &&
|
177
|
-
|
179
|
+
if !left_matched?(element) && element.respond_to?(:parent) && left_matched?(element.parent)
|
180
|
+
children = left_match(element.parent).children.reject { |c| right_matched?(c) }
|
181
|
+
match = children.find { |c| c.name == element.name }
|
178
182
|
record_matching(element, match) if match
|
179
183
|
end
|
180
184
|
end
|
181
185
|
|
182
186
|
def match_top_down(element)
|
183
|
-
|
184
|
-
childmatches = element.children.
|
185
|
-
childmatches.reject! { |e|
|
187
|
+
unless left_matched?(element)
|
188
|
+
childmatches = element.children.select { |c| left_matched?(c) }.map { |c| left_match(c).parent }.uniq
|
189
|
+
childmatches.reject! { |e| right_matched?(e) }
|
186
190
|
if childmatches.size == 1 && childmatches.first.name == element.name
|
187
191
|
record_matching(element, childmatches.first)
|
188
192
|
end
|
data/lib/html-dom-diff/node.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
module HTMLDOMDiff
|
2
2
|
class Node
|
3
|
-
attr_reader :parent, :children
|
3
|
+
attr_reader :parent, :children, :weight
|
4
4
|
|
5
5
|
attr_reader :rnode
|
6
6
|
|
7
|
-
def initialize(rnode, lnode, parent=nil)
|
7
|
+
def initialize(rnode, lnode, weight, parent=nil)
|
8
8
|
@rnode = rnode
|
9
9
|
@lnode = lnode
|
10
|
+
@weight = weight
|
10
11
|
@parent = parent
|
11
12
|
@children = []
|
12
13
|
end
|
@@ -71,6 +72,7 @@ module HTMLDOMDiff
|
|
71
72
|
|
72
73
|
# states
|
73
74
|
def changed?
|
75
|
+
return false unless @rnode && @lnode
|
74
76
|
if @rnode.text?
|
75
77
|
text != original_text
|
76
78
|
else
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-dom-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Frederik Fix
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|