fast_html_diff 0.8 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/fast_html_diff.rb +43 -9
- data/lib/fast_html_diff/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MmI5NmQwZjQyNDNjMDVmODhmZjk0YzgxZDA1MzViNzE5NWRiOTQ4Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YzAzZWM0NmYyYWY2NmFhOGIyYWE2MmJkNjg2ZTM3OWNmMTdkYmQyOA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YWQwMjc5NjM4YTU4NzliYWMzNmJkMDk0OGZhNjYzZDhkNDE0Y2VhZWFjN2I2
|
10
|
+
OTM3Zjc4ZjcxODdjYTJhZmZhYmI3MmVmNmExNzBiMzM2ZTYyYzU1ZGMyZjQ2
|
11
|
+
YTQ5ZGUwMDM3MjE1OTI4ZGRhY2RmZDI3OTYxMWMyMjBhMjQ2N2I=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MTAwMjNkMDM1ZjE4NTQ3ZGJjMmNiMmExZDFmZTllZDM3YTY0NjMyMzIxZTZm
|
14
|
+
N2VhNTVjOThkOTNhY2Q1N2NlM2ZmODRlYzYwYWU2ZmUxY2NhMzQ0ZGRmMWVm
|
15
|
+
YTBjNjkxMzliNmQ4N2RjODFkOGM4MjJhN2IwM2E5OThiNTkwMzQ=
|
data/lib/fast_html_diff.rb
CHANGED
@@ -4,9 +4,7 @@ require 'nokogiri'
|
|
4
4
|
module FastHtmlDiff
|
5
5
|
class DiffBuilder
|
6
6
|
def initialize(html_str_a,html_str_b,config={})
|
7
|
-
|
8
|
-
@b = html_str_b
|
9
|
-
|
7
|
+
# merge specified config with defaults
|
10
8
|
@config = default_config.merge(config)
|
11
9
|
if config[:tokenizer_regexp].nil?
|
12
10
|
if @config[:ignore_punctuation]
|
@@ -19,14 +17,13 @@ module FastHtmlDiff
|
|
19
17
|
@word_list = {}
|
20
18
|
@insertions = []
|
21
19
|
@deletions = []
|
20
|
+
@matches = []
|
22
21
|
@split_nodes = Hash.new
|
23
22
|
@insertion_nodes = Hash.new
|
24
|
-
end
|
25
23
|
|
26
|
-
|
27
|
-
|
28
|
-
@
|
29
|
-
@b = Nokogiri::HTML(@b)
|
24
|
+
# parse, tokenize and index the input documents
|
25
|
+
@a = Nokogiri::HTML(html_str_a)
|
26
|
+
@b = Nokogiri::HTML(html_str_b)
|
30
27
|
if @config[:simplify_html]
|
31
28
|
simplify_html(@a)
|
32
29
|
simplify_html(@b)
|
@@ -36,12 +33,40 @@ module FastHtmlDiff
|
|
36
33
|
|
37
34
|
# find the insertions and deletions
|
38
35
|
diff_words
|
36
|
+
end
|
39
37
|
|
40
|
-
|
38
|
+
# build output HTML
|
39
|
+
def build
|
40
|
+
# update doc_a with tags for the insertions and deletions
|
41
41
|
update_dom
|
42
42
|
@a.to_html
|
43
43
|
end
|
44
44
|
|
45
|
+
# output statistics on insertions and deletions
|
46
|
+
def statistics
|
47
|
+
result = {
|
48
|
+
insertions: { segments: 0, words: 0, chars: 0 },
|
49
|
+
deletions: { segments: 0, words: 0, chars: 0 },
|
50
|
+
matches: { segments: 0, words: 0, chars: 0}
|
51
|
+
}
|
52
|
+
@insertions.each do |i|
|
53
|
+
result[:insertions][:segments] += 1
|
54
|
+
result[:insertions][:words] += i[:b_end] - i[:b_start] + 1
|
55
|
+
result[:insertions][:chars] += @word_list[:b][i[:b_end]][:end_pos] - @word_list[:b][i[:b_start]][:start_pos]
|
56
|
+
end
|
57
|
+
@deletions.each do |i|
|
58
|
+
result[:deletions][:segments] += 1
|
59
|
+
result[:deletions][:words] += i[:a_end] - i[:a_start] + 1
|
60
|
+
result[:deletions][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
|
61
|
+
end
|
62
|
+
@matches.each do |i|
|
63
|
+
result[:matches][:segments] += 1
|
64
|
+
result[:matches][:words] += i[:a_end] - i[:a_start] + 1
|
65
|
+
result[:matches][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
|
66
|
+
end
|
67
|
+
result
|
68
|
+
end
|
69
|
+
|
45
70
|
private
|
46
71
|
|
47
72
|
# index the words in the document
|
@@ -140,6 +165,9 @@ module FastHtmlDiff
|
|
140
165
|
prev_operation = :deletion
|
141
166
|
end
|
142
167
|
doca_i += 1
|
168
|
+
else
|
169
|
+
if prev_operation == :match
|
170
|
+
@matches.last[:a_end] = doca_i
|
143
171
|
else
|
144
172
|
if prev_operation == :insertion
|
145
173
|
@insertions.last[:next_operation] = :match
|
@@ -147,6 +175,12 @@ module FastHtmlDiff
|
|
147
175
|
@deletions.last[:next_operation] = :match
|
148
176
|
end
|
149
177
|
|
178
|
+
@matches << {
|
179
|
+
a_start: doca_i,
|
180
|
+
a_end: doca_i
|
181
|
+
}
|
182
|
+
end
|
183
|
+
|
150
184
|
prev_operation = :match
|
151
185
|
doca_i += 1
|
152
186
|
docb_i += 1
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fast_html_diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kent Mewhort
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|