fast_html_diff 0.8 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MGI5NjhiMjI5ODFlNjJhM2NhODJhNjAxMDRlMWM2Y2ExNGJmYzlkNg==
4
+ MmI5NmQwZjQyNDNjMDVmODhmZjk0YzgxZDA1MzViNzE5NWRiOTQ4Yw==
5
5
  data.tar.gz: !binary |-
6
- OTgzNjEwMWQ5NTJiNGMxNzljODljYzJmYmY5ZGEzMjg1NDA5ZjA0OA==
6
+ YzAzZWM0NmYyYWY2NmFhOGIyYWE2MmJkNjg2ZTM3OWNmMTdkYmQyOA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- ZTAxYmU5MzEwMGY4ZTYwYmYwYjVhNTIxM2UzM2RjMTg2MDZkZWZjYzYwNjQ1
10
- NjMzOGY1YThjMzY1M2E4MjU4ZTZhODBhMmM3MmRlYzA0ZmQ2MDdkZWI5ZjE0
11
- NzI0NGVmNjMwMjYxMGM5ZmE0MDdiMWU3ZTAwNmRjZWMyNGIxMWQ=
9
+ YWQwMjc5NjM4YTU4NzliYWMzNmJkMDk0OGZhNjYzZDhkNDE0Y2VhZWFjN2I2
10
+ OTM3Zjc4ZjcxODdjYTJhZmZhYmI3MmVmNmExNzBiMzM2ZTYyYzU1ZGMyZjQ2
11
+ YTQ5ZGUwMDM3MjE1OTI4ZGRhY2RmZDI3OTYxMWMyMjBhMjQ2N2I=
12
12
  data.tar.gz: !binary |-
13
- ZWYyOWMwMDZkN2JiMjZjMWExNDA3OWNlMGI4ZDZiNDg3OTMxN2YwZjc0NDFk
14
- OGE0OGZkYTU0ZTk4MjkwOGJlYmZlZmRiYjZiMmE2MzhjODU3ZjZlOWZkMmIy
15
- ODU1NGQ1ZGY3MjU1ZDZhNDMzOTNmNGI5YzNkZjVjOTg0ZDIyMTc=
13
+ MTAwMjNkMDM1ZjE4NTQ3ZGJjMmNiMmExZDFmZTllZDM3YTY0NjMyMzIxZTZm
14
+ N2VhNTVjOThkOTNhY2Q1N2NlM2ZmODRlYzYwYWU2ZmUxY2NhMzQ0ZGRmMWVm
15
+ YTBjNjkxMzliNmQ4N2RjODFkOGM4MjJhN2IwM2E5OThiNTkwMzQ=
@@ -4,9 +4,7 @@ require 'nokogiri'
4
4
  module FastHtmlDiff
5
5
  class DiffBuilder
6
6
  def initialize(html_str_a,html_str_b,config={})
7
- @a = html_str_a
8
- @b = html_str_b
9
-
7
+ # merge specified config with defaults
10
8
  @config = default_config.merge(config)
11
9
  if config[:tokenizer_regexp].nil?
12
10
  if @config[:ignore_punctuation]
@@ -19,14 +17,13 @@ module FastHtmlDiff
19
17
  @word_list = {}
20
18
  @insertions = []
21
19
  @deletions = []
20
+ @matches = []
22
21
  @split_nodes = Hash.new
23
22
  @insertion_nodes = Hash.new
24
- end
25
23
 
26
- def build
27
- # parse, tokenize and index
28
- @a = Nokogiri::HTML(@a)
29
- @b = Nokogiri::HTML(@b)
24
+ # parse, tokenize and index the input documents
25
+ @a = Nokogiri::HTML(html_str_a)
26
+ @b = Nokogiri::HTML(html_str_b)
30
27
  if @config[:simplify_html]
31
28
  simplify_html(@a)
32
29
  simplify_html(@b)
@@ -36,12 +33,40 @@ module FastHtmlDiff
36
33
 
37
34
  # find the insertions and deletions
38
35
  diff_words
36
+ end
39
37
 
40
- # update doc a with tags for the insertions and deletions
38
+ # build output HTML
39
+ def build
40
+ # update doc_a with tags for the insertions and deletions
41
41
  update_dom
42
42
  @a.to_html
43
43
  end
44
44
 
45
+ # output statistics on insertions and deletions
46
+ def statistics
47
+ result = {
48
+ insertions: { segments: 0, words: 0, chars: 0 },
49
+ deletions: { segments: 0, words: 0, chars: 0 },
50
+ matches: { segments: 0, words: 0, chars: 0}
51
+ }
52
+ @insertions.each do |i|
53
+ result[:insertions][:segments] += 1
54
+ result[:insertions][:words] += i[:b_end] - i[:b_start] + 1
55
+ result[:insertions][:chars] += @word_list[:b][i[:b_end]][:end_pos] - @word_list[:b][i[:b_start]][:start_pos]
56
+ end
57
+ @deletions.each do |i|
58
+ result[:deletions][:segments] += 1
59
+ result[:deletions][:words] += i[:a_end] - i[:a_start] + 1
60
+ result[:deletions][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
61
+ end
62
+ @matches.each do |i|
63
+ result[:matches][:segments] += 1
64
+ result[:matches][:words] += i[:a_end] - i[:a_start] + 1
65
+ result[:matches][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
66
+ end
67
+ result
68
+ end
69
+
45
70
  private
46
71
 
47
72
  # index the words in the document
@@ -140,6 +165,9 @@ module FastHtmlDiff
140
165
  prev_operation = :deletion
141
166
  end
142
167
  doca_i += 1
168
+ else
169
+ if prev_operation == :match
170
+ @matches.last[:a_end] = doca_i
143
171
  else
144
172
  if prev_operation == :insertion
145
173
  @insertions.last[:next_operation] = :match
@@ -147,6 +175,12 @@ module FastHtmlDiff
147
175
  @deletions.last[:next_operation] = :match
148
176
  end
149
177
 
178
+ @matches << {
179
+ a_start: doca_i,
180
+ a_end: doca_i
181
+ }
182
+ end
183
+
150
184
  prev_operation = :match
151
185
  doca_i += 1
152
186
  docb_i += 1
@@ -1,3 +1,3 @@
1
1
  module FastHtmlDiff
2
- VERSION = "0.8"
2
+ VERSION = "0.8.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fast_html_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.8'
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kent Mewhort
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-06-30 00:00:00.000000000 Z
11
+ date: 2013-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler