fast_html_diff 0.8 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/fast_html_diff.rb +43 -9
- data/lib/fast_html_diff/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MmI5NmQwZjQyNDNjMDVmODhmZjk0YzgxZDA1MzViNzE5NWRiOTQ4Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YzAzZWM0NmYyYWY2NmFhOGIyYWE2MmJkNjg2ZTM3OWNmMTdkYmQyOA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YWQwMjc5NjM4YTU4NzliYWMzNmJkMDk0OGZhNjYzZDhkNDE0Y2VhZWFjN2I2
|
10
|
+
OTM3Zjc4ZjcxODdjYTJhZmZhYmI3MmVmNmExNzBiMzM2ZTYyYzU1ZGMyZjQ2
|
11
|
+
YTQ5ZGUwMDM3MjE1OTI4ZGRhY2RmZDI3OTYxMWMyMjBhMjQ2N2I=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MTAwMjNkMDM1ZjE4NTQ3ZGJjMmNiMmExZDFmZTllZDM3YTY0NjMyMzIxZTZm
|
14
|
+
N2VhNTVjOThkOTNhY2Q1N2NlM2ZmODRlYzYwYWU2ZmUxY2NhMzQ0ZGRmMWVm
|
15
|
+
YTBjNjkxMzliNmQ4N2RjODFkOGM4MjJhN2IwM2E5OThiNTkwMzQ=
|
data/lib/fast_html_diff.rb
CHANGED
@@ -4,9 +4,7 @@ require 'nokogiri'
|
|
4
4
|
module FastHtmlDiff
|
5
5
|
class DiffBuilder
|
6
6
|
def initialize(html_str_a,html_str_b,config={})
|
7
|
-
|
8
|
-
@b = html_str_b
|
9
|
-
|
7
|
+
# merge specified config with defaults
|
10
8
|
@config = default_config.merge(config)
|
11
9
|
if config[:tokenizer_regexp].nil?
|
12
10
|
if @config[:ignore_punctuation]
|
@@ -19,14 +17,13 @@ module FastHtmlDiff
|
|
19
17
|
@word_list = {}
|
20
18
|
@insertions = []
|
21
19
|
@deletions = []
|
20
|
+
@matches = []
|
22
21
|
@split_nodes = Hash.new
|
23
22
|
@insertion_nodes = Hash.new
|
24
|
-
end
|
25
23
|
|
26
|
-
|
27
|
-
|
28
|
-
@
|
29
|
-
@b = Nokogiri::HTML(@b)
|
24
|
+
# parse, tokenize and index the input documents
|
25
|
+
@a = Nokogiri::HTML(html_str_a)
|
26
|
+
@b = Nokogiri::HTML(html_str_b)
|
30
27
|
if @config[:simplify_html]
|
31
28
|
simplify_html(@a)
|
32
29
|
simplify_html(@b)
|
@@ -36,12 +33,40 @@ module FastHtmlDiff
|
|
36
33
|
|
37
34
|
# find the insertions and deletions
|
38
35
|
diff_words
|
36
|
+
end
|
39
37
|
|
40
|
-
|
38
|
+
# build output HTML
|
39
|
+
def build
|
40
|
+
# update doc_a with tags for the insertions and deletions
|
41
41
|
update_dom
|
42
42
|
@a.to_html
|
43
43
|
end
|
44
44
|
|
45
|
+
# output statistics on insertions and deletions
|
46
|
+
def statistics
|
47
|
+
result = {
|
48
|
+
insertions: { segments: 0, words: 0, chars: 0 },
|
49
|
+
deletions: { segments: 0, words: 0, chars: 0 },
|
50
|
+
matches: { segments: 0, words: 0, chars: 0}
|
51
|
+
}
|
52
|
+
@insertions.each do |i|
|
53
|
+
result[:insertions][:segments] += 1
|
54
|
+
result[:insertions][:words] += i[:b_end] - i[:b_start] + 1
|
55
|
+
result[:insertions][:chars] += @word_list[:b][i[:b_end]][:end_pos] - @word_list[:b][i[:b_start]][:start_pos]
|
56
|
+
end
|
57
|
+
@deletions.each do |i|
|
58
|
+
result[:deletions][:segments] += 1
|
59
|
+
result[:deletions][:words] += i[:a_end] - i[:a_start] + 1
|
60
|
+
result[:deletions][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
|
61
|
+
end
|
62
|
+
@matches.each do |i|
|
63
|
+
result[:matches][:segments] += 1
|
64
|
+
result[:matches][:words] += i[:a_end] - i[:a_start] + 1
|
65
|
+
result[:matches][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
|
66
|
+
end
|
67
|
+
result
|
68
|
+
end
|
69
|
+
|
45
70
|
private
|
46
71
|
|
47
72
|
# index the words in the document
|
@@ -140,6 +165,9 @@ module FastHtmlDiff
|
|
140
165
|
prev_operation = :deletion
|
141
166
|
end
|
142
167
|
doca_i += 1
|
168
|
+
else
|
169
|
+
if prev_operation == :match
|
170
|
+
@matches.last[:a_end] = doca_i
|
143
171
|
else
|
144
172
|
if prev_operation == :insertion
|
145
173
|
@insertions.last[:next_operation] = :match
|
@@ -147,6 +175,12 @@ module FastHtmlDiff
|
|
147
175
|
@deletions.last[:next_operation] = :match
|
148
176
|
end
|
149
177
|
|
178
|
+
@matches << {
|
179
|
+
a_start: doca_i,
|
180
|
+
a_end: doca_i
|
181
|
+
}
|
182
|
+
end
|
183
|
+
|
150
184
|
prev_operation = :match
|
151
185
|
doca_i += 1
|
152
186
|
docb_i += 1
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fast_html_diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kent Mewhort
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|