fast_html_diff 0.8 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MGI5NjhiMjI5ODFlNjJhM2NhODJhNjAxMDRlMWM2Y2ExNGJmYzlkNg==
4
+ MmI5NmQwZjQyNDNjMDVmODhmZjk0YzgxZDA1MzViNzE5NWRiOTQ4Yw==
5
5
  data.tar.gz: !binary |-
6
- OTgzNjEwMWQ5NTJiNGMxNzljODljYzJmYmY5ZGEzMjg1NDA5ZjA0OA==
6
+ YzAzZWM0NmYyYWY2NmFhOGIyYWE2MmJkNjg2ZTM3OWNmMTdkYmQyOA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- ZTAxYmU5MzEwMGY4ZTYwYmYwYjVhNTIxM2UzM2RjMTg2MDZkZWZjYzYwNjQ1
10
- NjMzOGY1YThjMzY1M2E4MjU4ZTZhODBhMmM3MmRlYzA0ZmQ2MDdkZWI5ZjE0
11
- NzI0NGVmNjMwMjYxMGM5ZmE0MDdiMWU3ZTAwNmRjZWMyNGIxMWQ=
9
+ YWQwMjc5NjM4YTU4NzliYWMzNmJkMDk0OGZhNjYzZDhkNDE0Y2VhZWFjN2I2
10
+ OTM3Zjc4ZjcxODdjYTJhZmZhYmI3MmVmNmExNzBiMzM2ZTYyYzU1ZGMyZjQ2
11
+ YTQ5ZGUwMDM3MjE1OTI4ZGRhY2RmZDI3OTYxMWMyMjBhMjQ2N2I=
12
12
  data.tar.gz: !binary |-
13
- ZWYyOWMwMDZkN2JiMjZjMWExNDA3OWNlMGI4ZDZiNDg3OTMxN2YwZjc0NDFk
14
- OGE0OGZkYTU0ZTk4MjkwOGJlYmZlZmRiYjZiMmE2MzhjODU3ZjZlOWZkMmIy
15
- ODU1NGQ1ZGY3MjU1ZDZhNDMzOTNmNGI5YzNkZjVjOTg0ZDIyMTc=
13
+ MTAwMjNkMDM1ZjE4NTQ3ZGJjMmNiMmExZDFmZTllZDM3YTY0NjMyMzIxZTZm
14
+ N2VhNTVjOThkOTNhY2Q1N2NlM2ZmODRlYzYwYWU2ZmUxY2NhMzQ0ZGRmMWVm
15
+ YTBjNjkxMzliNmQ4N2RjODFkOGM4MjJhN2IwM2E5OThiNTkwMzQ=
@@ -4,9 +4,7 @@ require 'nokogiri'
4
4
  module FastHtmlDiff
5
5
  class DiffBuilder
6
6
  def initialize(html_str_a,html_str_b,config={})
7
- @a = html_str_a
8
- @b = html_str_b
9
-
7
+ # merge specified config with defaults
10
8
  @config = default_config.merge(config)
11
9
  if config[:tokenizer_regexp].nil?
12
10
  if @config[:ignore_punctuation]
@@ -19,14 +17,13 @@ module FastHtmlDiff
19
17
  @word_list = {}
20
18
  @insertions = []
21
19
  @deletions = []
20
+ @matches = []
22
21
  @split_nodes = Hash.new
23
22
  @insertion_nodes = Hash.new
24
- end
25
23
 
26
- def build
27
- # parse, tokenize and index
28
- @a = Nokogiri::HTML(@a)
29
- @b = Nokogiri::HTML(@b)
24
+ # parse, tokenize and index the input documents
25
+ @a = Nokogiri::HTML(html_str_a)
26
+ @b = Nokogiri::HTML(html_str_b)
30
27
  if @config[:simplify_html]
31
28
  simplify_html(@a)
32
29
  simplify_html(@b)
@@ -36,12 +33,40 @@ module FastHtmlDiff
36
33
 
37
34
  # find the insertions and deletions
38
35
  diff_words
36
+ end
39
37
 
40
- # update doc a with tags for the insertions and deletions
38
+ # build output HTML
39
+ def build
40
+ # update doc_a with tags for the insertions and deletions
41
41
  update_dom
42
42
  @a.to_html
43
43
  end
44
44
 
45
+ # output statistics on insertions and deletions
46
+ def statistics
47
+ result = {
48
+ insertions: { segments: 0, words: 0, chars: 0 },
49
+ deletions: { segments: 0, words: 0, chars: 0 },
50
+ matches: { segments: 0, words: 0, chars: 0}
51
+ }
52
+ @insertions.each do |i|
53
+ result[:insertions][:segments] += 1
54
+ result[:insertions][:words] += i[:b_end] - i[:b_start] + 1
55
+ result[:insertions][:chars] += @word_list[:b][i[:b_end]][:end_pos] - @word_list[:b][i[:b_start]][:start_pos]
56
+ end
57
+ @deletions.each do |i|
58
+ result[:deletions][:segments] += 1
59
+ result[:deletions][:words] += i[:a_end] - i[:a_start] + 1
60
+ result[:deletions][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
61
+ end
62
+ @matches.each do |i|
63
+ result[:matches][:segments] += 1
64
+ result[:matches][:words] += i[:a_end] - i[:a_start] + 1
65
+ result[:matches][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
66
+ end
67
+ result
68
+ end
69
+
45
70
  private
46
71
 
47
72
  # index the words in the document
@@ -140,6 +165,9 @@ module FastHtmlDiff
140
165
  prev_operation = :deletion
141
166
  end
142
167
  doca_i += 1
168
+ else
169
+ if prev_operation == :match
170
+ @matches.last[:a_end] = doca_i
143
171
  else
144
172
  if prev_operation == :insertion
145
173
  @insertions.last[:next_operation] = :match
@@ -147,6 +175,12 @@ module FastHtmlDiff
147
175
  @deletions.last[:next_operation] = :match
148
176
  end
149
177
 
178
+ @matches << {
179
+ a_start: doca_i,
180
+ a_end: doca_i
181
+ }
182
+ end
183
+
150
184
  prev_operation = :match
151
185
  doca_i += 1
152
186
  docb_i += 1
@@ -1,3 +1,3 @@
1
1
  module FastHtmlDiff
2
- VERSION = "0.8"
2
+ VERSION = "0.8.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fast_html_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.8'
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kent Mewhort
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-06-30 00:00:00.000000000 Z
11
+ date: 2013-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler