fast_html_diff 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MGI5NjhiMjI5ODFlNjJhM2NhODJhNjAxMDRlMWM2Y2ExNGJmYzlkNg==
5
+ data.tar.gz: !binary |-
6
+ OTgzNjEwMWQ5NTJiNGMxNzljODljYzJmYmY5ZGEzMjg1NDA5ZjA0OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZTAxYmU5MzEwMGY4ZTYwYmYwYjVhNTIxM2UzM2RjMTg2MDZkZWZjYzYwNjQ1
10
+ NjMzOGY1YThjMzY1M2E4MjU4ZTZhODBhMmM3MmRlYzA0ZmQ2MDdkZWI5ZjE0
11
+ NzI0NGVmNjMwMjYxMGM5ZmE0MDdiMWU3ZTAwNmRjZWMyNGIxMWQ=
12
+ data.tar.gz: !binary |-
13
+ ZWYyOWMwMDZkN2JiMjZjMWExNDA3OWNlMGI4ZDZiNDg3OTMxN2YwZjc0NDFk
14
+ OGE0OGZkYTU0ZTk4MjkwOGJlYmZlZmRiYjZiMmE2MzhjODU3ZjZlOWZkMmIy
15
+ ODU1NGQ1ZGY3MjU1ZDZhNDMzOTNmNGI5YzNkZjVjOTg0ZDIyMTc=
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fast_html_diff.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ BSD License for FastHtmlDiff (http://github.com/kmewhort/fast_html_diff)
2
+
3
+ Copyright (c) 2013, Kent Mewhort
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
7
+ following conditions are met:
8
+
9
+ Redistributions of source code must retain the above copyright notice, this list of conditions and the following
10
+ disclaimer.
11
+ Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
12
+ disclaimer in the documentation and/or other materials provided with the distribution.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
15
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
17
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
18
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
19
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
20
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # FastHtmlDiff
2
+
3
+ This gem performs a diff on two input HTML files (outputting the result in HTML as well). It's built for speed, using
4
+ tried-and-true UNIX diff as the LCS algorithm. The implementation works directly on the DOM to ensure the output
5
+ always remains valid.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'fast_html_diff'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install fast_html_diff
20
+
21
+ ## Usage
22
+
23
+ Basic usage:
24
+
25
+ result_html_str = FastHtmlDiff::DiffBuilder.new(string_a,string_b).build
26
+
27
+ With options (see below for details):
28
+
29
+ result_html_str = FastHtmlDiff::DiffBuilder.new(string_a,string_b,
30
+ simplify_html: true, try_hard: true).build
31
+
32
+ ## Options
33
+
34
+ * **ignore_punctuation:** boolean [default: true]
35
+ * **case_insensitive:** boolean [default: true]
36
+ * **tokenizer_regexp:** regexp [default: %r{([^A-Za-z0-9]+)};] Make sure to include the outer parentheses. This option overrides any "ignore_punctuation" setting.
37
+ * **diff_cmd:** str [default: 'diff']. May be useful if you only have diff available through cygwin or a Windows port.
38
+ * **try_hard:** boolean [default: false]. Try hard to find smaller-length matches (at a bit of a performance cost).
39
+ * **simplify_html:** boolean [default: false]. Strips HTML to only the permitted tags, giving better output format where the structure of the two inputs differ greatly.
40
+ * **simplified_html_tags:** array of strings [default %w(html body p strong em ul ol li)]
41
+
42
+ ## Styling
43
+
44
+ Insertions are wrapped in **<ins>**; Deletions are wrapped **<del>**. Add the following CSS for much nicer looking output:
45
+
46
+ ins {
47
+ text-decoration: none;
48
+ background-color: #a3ffad;
49
+ }
50
+ del {
51
+ color: #ff5d5a;
52
+ background-color: #b4ecff;
53
+ }
54
+
55
+ ## License
56
+
57
+ (c) 2013, Kent Mewhort, licensed under BSD. See LICENSE.txt for details.
58
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fast_html_diff/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fast_html_diff"
8
+ spec.version = FastHtmlDiff::VERSION
9
+ spec.authors = ["Kent Mewhort"]
10
+ spec.email = ["kent@openissues.ca"]
11
+ spec.description = %q{Performs a diff on two HTML inputs, outputting the result as HTML.}
12
+ spec.summary = %q{Performs a diff on two HTML inputs, outputting the result as HTML.}
13
+ spec.homepage = "https://github.com/kmewhort/fast_html_diff"
14
+ spec.license = "BSD"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_runtime_dependency "nokogiri"
24
+ end
@@ -0,0 +1,443 @@
1
+ require "fast_html_diff/version"
2
+ require 'nokogiri'
3
+
4
+ module FastHtmlDiff
5
+ class DiffBuilder
6
+ def initialize(html_str_a,html_str_b,config={})
7
+ @a = html_str_a
8
+ @b = html_str_b
9
+
10
+ @config = default_config.merge(config)
11
+ if config[:tokenizer_regexp].nil?
12
+ if @config[:ignore_punctuation]
13
+ @config[:tokenizer_regexp] = %r{([^A-Za-z0-9]+)}
14
+ else
15
+ @config[:tokenizer_regexp] = %r{(\s+)}
16
+ end
17
+ end
18
+
19
+ @word_list = {}
20
+ @insertions = []
21
+ @deletions = []
22
+ @split_nodes = Hash.new
23
+ @insertion_nodes = Hash.new
24
+ end
25
+
26
+ def build
27
+ # parse, tokenize and index
28
+ @a = Nokogiri::HTML(@a)
29
+ @b = Nokogiri::HTML(@b)
30
+ if @config[:simplify_html]
31
+ simplify_html(@a)
32
+ simplify_html(@b)
33
+ end
34
+ index_document(@a, :a)
35
+ index_document(@b, :b)
36
+
37
+ # find the insertions and deletions
38
+ diff_words
39
+
40
+ # update doc a with tags for the insertions and deletions
41
+ update_dom
42
+ @a.to_html
43
+ end
44
+
45
+ private
46
+
47
+ # index the words in the document
48
+ def index_document(doc, doc_name)
49
+ @word_list[doc_name] = Array.new
50
+
51
+ # index each word of each text node
52
+ preceding_chars = ""
53
+ doc.xpath('//text()').each do |text_node|
54
+ position = 0
55
+ is_a_word = true
56
+ text_node.content.split(@config[:tokenizer_regexp]).each_with_index do |word,i|
57
+ # check whether we're starting with a word or a split itself
58
+ if (i == 0) || (i == 1)
59
+ is_a_word = !word.empty? && !word.match(@config[:tokenizer_regexp])
60
+ else
61
+ is_a_word = !is_a_word
62
+ end
63
+
64
+ if !is_a_word
65
+ preceding_chars = word unless word.empty?
66
+ else
67
+ @word_list[doc_name] << {
68
+ node: text_node,
69
+ index_word: (@config[:case_insensitive] ? word.downcase : word),
70
+ start_pos: position,
71
+ end_pos: position + word.length,
72
+ preceding_chars: preceding_chars
73
+ }
74
+ preceding_chars = ""
75
+ end
76
+ position += word.length
77
+ end
78
+ end
79
+ end
80
+
81
+ def diff_words
82
+ # run diff on the word lists, using it as a quick, natively-run lcs algorithm
83
+ diff_result = nil
84
+ begin
85
+ file_a = Tempfile.new('fast_html_diff_a')
86
+ file_a.write @word_list[:a].map{|w| w[:index_word]}.join("\n") + "\n"
87
+ file_a.close
88
+
89
+ file_b = Tempfile.new('fast_html_diff_b')
90
+ file_b.write @word_list[:b].map{|w| w[:index_word]}.join("\n") + "\n"
91
+ file_b.close
92
+
93
+ diff_args = "-U 100000" + (@config[:try_hard] ? ' -d' : '')
94
+ diff_result = `#{@config[:diff_cmd]} #{diff_args} #{file_a.path} #{file_b.path}`
95
+ ensure
96
+ file_a.close
97
+ file_a.unlink
98
+ file_b.close
99
+ file_b.unlink
100
+ end
101
+
102
+ # remap output back to the indexed word list
103
+ doca_i = 0
104
+ docb_i = 0
105
+ prev_operation = :none
106
+ diff_result.each_line do |word|
107
+ next if word.match /^(---|\+\+\+|@@|\\\\)/ # skip info lines
108
+
109
+ case word[0]
110
+ when '+'
111
+ if prev_operation == :insertion
112
+ @insertions.last[:b_end] = docb_i
113
+ else
114
+ if prev_operation == :deletion
115
+ @deletions.last[:next_operation] = :insertion
116
+ end
117
+
118
+ @insertions << {
119
+ a_position: doca_i-1, #insert before the current word
120
+ b_start: docb_i,
121
+ b_end: docb_i,
122
+ prev_operation: prev_operation
123
+ }
124
+ prev_operation = :insertion
125
+ end
126
+ docb_i += 1
127
+ when '-'
128
+ if prev_operation == :deletion
129
+ @deletions.last[:a_end] = doca_i
130
+ else
131
+ if prev_operation == :insertion
132
+ @insertions.last[:next_operation] = :insertion
133
+ end
134
+
135
+ @deletions << {
136
+ a_start: doca_i,
137
+ a_end: doca_i,
138
+ prev_operation: prev_operation
139
+ }
140
+ prev_operation = :deletion
141
+ end
142
+ doca_i += 1
143
+ else
144
+ if prev_operation == :insertion
145
+ @insertions.last[:next_operation] = :match
146
+ elsif prev_operation == :deletion
147
+ @deletions.last[:next_operation] = :match
148
+ end
149
+
150
+ prev_operation = :match
151
+ doca_i += 1
152
+ docb_i += 1
153
+ end
154
+ # if an additon is one past the end, keep the marker at the end
155
+ doca_i = (@word_list[:a].length-1) if doca_i >= @word_list[:a].length
156
+ docb_i = (@word_list[:b].length-1) if docb_i >= @word_list[:b].length
157
+ end
158
+ end
159
+
160
+ # mark insertions and deletions in doc a
161
+ def update_dom
162
+ # prepare the nodes to insert before making any modifications
163
+ @insertions.map! do |insertion|
164
+ prepare_insertion(insertion)
165
+ end
166
+
167
+ # perform the insertions
168
+ @insertions.each do |insertion|
169
+ # if the insertion point's parent is the same type as the cca, merge the children
170
+ # together; otherwise, insert the cca wholesale
171
+
172
+ # TODO: handle case where a_position is -1 (insertion before start of document)
173
+
174
+ # add whole nodes as-is and wrap partial nodes in a span
175
+ additional_node = nil
176
+ touches_node_start = @word_list[:b][insertion[:b_start]-1].nil? ||
177
+ (@word_list[:b][insertion[:b_start]-1][:node] != @word_list[:b][insertion[:b_start]][:node])
178
+ touches_node_end = @word_list[:b][insertion[:b_end]+1].nil? ||
179
+ (@word_list[:b][insertion[:b_end]+1][:node] != @word_list[:b][insertion[:b_end]][:node])
180
+ if touches_node_start && touches_node_end
181
+ additional_node = insertion[:new_nodes]
182
+
183
+ # bump the end char past whitespace/punctuation
184
+ unless @word_list[:b][insertion[:b_end]+1].nil?
185
+ insertion[:insertion_char_index] += @word_list[:b][insertion[:b_end]+1][:preceding_chars].length
186
+ end
187
+ else
188
+ additional_node = Nokogiri::XML::Node.new('span', @a)
189
+ if insertion[:new_nodes].children.length > 0
190
+ insertion[:new_nodes].children.each {|c| additional_node.add_child(c) }
191
+ else
192
+ additional_node.add_child(insertion[:new_nodes])
193
+ end
194
+ end
195
+ @insertion_nodes[additional_node] = true
196
+
197
+ # insertions need to wrap around the text nodes
198
+ additional_node.search('text()').each do |text_node|
199
+ parent = text_node.parent
200
+ wrapper = Nokogiri::XML::Node.new('ins', @a)
201
+ wrapper.add_child(text_node)
202
+ parent.add_child(wrapper)
203
+ end
204
+
205
+ # split the insertion point node (if necessary) and insert the new nodes
206
+ modify_each_node_between(insertion[:insertion_point_node],
207
+ insertion[:insertion_char_index], insertion[:insertion_char_index]) do |n|
208
+ additional_node
209
+ end
210
+ end
211
+
212
+ @deletions.each do |deletion|
213
+ start_node = @word_list[:a][deletion[:a_start]][:node]
214
+ start_char = @word_list[:a][deletion[:a_start]][:start_pos]
215
+ end_node = @word_list[:a][deletion[:a_end]][:node]
216
+ end_char = @word_list[:a][deletion[:a_end]][:end_pos]
217
+
218
+ # wrap deletions in del tags just above each text node (so as to preserve
219
+ # the original formatting)
220
+ prev_node = cur_node = nil
221
+ for word_i in deletion[:a_start]..deletion[:a_end]
222
+ cur_node = @word_list[:a][word_i][:node]
223
+ if cur_node != prev_node
224
+ first = (cur_node == start_node) ? start_char : 0
225
+ last = (cur_node == end_node) ? end_char : cur_node.content.length
226
+ modify_each_node_between(cur_node, first, last) do |n|
227
+ wrapper = Nokogiri::XML::Node.new('del', @a)
228
+ wrapper.add_child(n)
229
+ wrapper
230
+ end
231
+ end
232
+ prev_node = cur_node
233
+ end
234
+ end
235
+ end
236
+
237
+ # build the exact DOM tree for an insertion
238
+ def prepare_insertion(insertion)
239
+ start_node = @word_list[:b][insertion[:b_start]][:node]
240
+ start_char = @word_list[:b][insertion[:b_start]][:start_pos]
241
+ end_node = @word_list[:b][insertion[:b_end]][:node]
242
+ end_char = @word_list[:b][insertion[:b_end]][:end_pos]
243
+
244
+ # find the closest common ancestor of the start and end, and clone this portion
245
+ cca = (start_node.ancestors & end_node.ancestors).first
246
+ cca_clone = cca.dup
247
+
248
+ # find the start node in the clone by retracing the path
249
+ path_to_cca = []
250
+ target_node = start_node
251
+ until target_node == cca
252
+ path_to_cca.unshift target_node.parent.children.index(target_node)
253
+ target_node = target_node.parent
254
+ end
255
+ start_node = cca_clone
256
+ path_to_cca.each {|i| start_node = start_node.children[i]}
257
+
258
+ # find the end node in the clone by retracing the path
259
+ path_to_cca = []
260
+ target_node = end_node
261
+ until target_node == cca
262
+ path_to_cca.unshift target_node.parent.children.index(target_node)
263
+ target_node = target_node.parent
264
+ end
265
+ end_node = cca_clone
266
+ path_to_cca.each {|i| end_node = end_node.children[i]}
267
+
268
+ # trim away NODES up the tree that fall to the left of the start
269
+ # or to the right of the end
270
+ left_node = start_node
271
+ while left_node != cca_clone
272
+ siblings = left_node.parent.children
273
+ self_index = siblings.index(left_node)
274
+ unless self_index == 0
275
+ left_of_self = siblings.slice(0..(self_index-1))
276
+ left_of_self.each {|n| n.remove} unless left_of_self.nil?
277
+ end
278
+ left_node = left_node.parent
279
+ end
280
+
281
+ right_node = end_node
282
+ while right_node != cca_clone
283
+ siblings = right_node.parent.children
284
+ self_index = siblings.index(right_node)
285
+ right_of_self = siblings.slice((self_index+1)..-1)
286
+ right_of_self.each {|n| n.remove} unless right_of_self.nil?
287
+ right_node = right_node.parent
288
+ end
289
+
290
+ # trim away the TEXT that falls to the left of the start or to the right of
291
+ # the end; also include the preceding characters to the insertion
292
+ end_node.content = end_node.content[0..(end_char-1)]
293
+ start_node.content = start_node.content[start_char..-1]
294
+
295
+ # unless there's a deletion immediately before, include the preceding chars in the insertion
296
+ unless (insertion[:prev_operation] == :deletion) || (insertion[:b_start] <= 0)
297
+ start_node.content = @word_list[:b][insertion[:b_start]][:preceding_chars] + start_node.content
298
+ end
299
+ #unless (insertion[:next_operation] == :deletion) || (insertion[:b_end] >= @word_list[:b].length-1)
300
+ # end_node.content += @word_list[:b][insertion[:b_end]+1][:preceding_chars]
301
+ #end
302
+
303
+ insertion_data = {
304
+ new_nodes: cca_clone,
305
+ insertion_point_node: @word_list[:a][insertion[:a_position]][:node],
306
+ insertion_char_index: @word_list[:a][insertion[:a_position]][:end_pos]
307
+ }
308
+ insertion.merge insertion_data
309
+ end
310
+
311
+ # splits nodes (if necessary) between the specified character positions
312
+ # and runs the block for each node between the start and end
313
+ def modify_each_node_between(node, start_char, end_char)
314
+ prev_node_set = nil
315
+ if @split_nodes[node].nil?
316
+ prev_node_set = [node]
317
+ else
318
+ prev_node_set = @split_nodes[node]
319
+ end
320
+
321
+ # skip over inserted nodes, as they're not included in the character
322
+ # counts (and there's no further operations on them)
323
+ prev_node_set.delete_if {|n| @insertion_nodes[n] }
324
+
325
+ new_node_set = []
326
+ inside_nodes = []
327
+ insertion_queue = Hash.new
328
+ cur_char = 0
329
+ start_trimmed = false
330
+ end_trimmed = false
331
+ prev_node_set.each do |n|
332
+ cur_node = n
333
+ new_node_set << cur_node
334
+ node_end_char = cur_char + cur_node.content.length
335
+
336
+ # split node at the start_char
337
+ unless start_trimmed
338
+ if start_char > node_end_char
339
+ cur_char = node_end_char
340
+ next
341
+ else
342
+ if start_char == cur_char
343
+ start_trimmed = true
344
+ else # start_char beteen cur_char and node_end_char
345
+ after_node = cur_node.dup
346
+ cur_node.content = after_node.content[0..(start_char-cur_char-1)]
347
+ after_node.content = after_node.content[(start_char-cur_char)..-1]
348
+ insertion_queue[after_node] = cur_node # don't actually add_next_sibling yet, as Nokogiri will merge them
349
+ start_trimmed = true
350
+
351
+ cur_char += cur_node.content.length
352
+ cur_node = after_node
353
+ new_node_set << cur_node
354
+ end
355
+ end
356
+ end
357
+
358
+ # split node at the end_char
359
+ unless end_trimmed || !start_trimmed
360
+ inside_nodes << cur_node
361
+ if end_char > node_end_char
362
+ cur_char = node_end_char
363
+ next
364
+ elsif end_char == node_end_char
365
+ end_trimmed = true
366
+ cur_char = node_end_char
367
+ next
368
+ else # end_char < node_end_char
369
+ after_node = cur_node.dup
370
+ if (end_char-cur_char) > 0
371
+ cur_node.content = after_node.content[0..(end_char-cur_char-1)]
372
+ after_node.content = after_node.content[(end_char-cur_char)..-1]
373
+ else
374
+ cur_node.content = ""
375
+ end
376
+ insertion_queue[after_node] = cur_node
377
+ end_trimmed = true
378
+
379
+ new_node_set << after_node
380
+ end
381
+ end
382
+ cur_char = node_end_char
383
+ end
384
+ new_node_set.map! do |node_in_set|
385
+ insert_after = insertion_queue[node_in_set]
386
+ if inside_nodes.include?(node_in_set) && block_given?
387
+ modified_node = nil
388
+ if !insert_after.nil?
389
+ modified_node = yield node_in_set
390
+ insert_after.add_next_sibling(modified_node)
391
+ else
392
+ node_parent = node_in_set.parent
393
+ node_position = node_parent.children.index(node_in_set)
394
+ modified_node = yield node_in_set
395
+
396
+ # if the actual node has changed, need to rehook to parent (assume the origial has been removed)
397
+ if modified_node != node_in_set
398
+ if node_parent.children.length > node_position
399
+ node_parent.children[node_position].add_previous_sibling(modified_node)
400
+ else
401
+ node_parent.add_child(modified_node)
402
+ end
403
+ end
404
+ end
405
+
406
+ # also need to update the insertion queue if a node referenced by
407
+ # another has changed
408
+ if modified_node != node_in_set
409
+ insertion_queue.each do |floating_node, target_node|
410
+ if target_node == node_in_set
411
+ insertion_queue[floating_node] = modified_node
412
+ end
413
+ end
414
+ end
415
+ modified_node
416
+ else
417
+ insert_after.add_next_sibling(node_in_set) unless insert_after.nil?
418
+ node_in_set
419
+ end
420
+ end
421
+
422
+ @split_nodes[node] = new_node_set
423
+ end
424
+
425
+ def simplify_html(html)
426
+ (html.css('*') - html.css(@config[:simplified_html_tags].join(','))).each do |node|
427
+ node.replace(node.children)
428
+ end
429
+ end
430
+
431
+ def default_config
432
+ {
433
+ ignore_punctuation: true,
434
+ case_insensitive: true,
435
+ tokenizer_regexp: %r{([^A-Za-z0-9]+)}, # overrides any ignore_punctuation setting
436
+ diff_cmd: 'diff',
437
+ try_hard: false,
438
+ simplify_html: false,
439
+ simplified_html_tags: ['html','body','p','strong','em','ul','ol','li']
440
+ }
441
+ end
442
+ end
443
+ end
@@ -0,0 +1,3 @@
1
+ module FastHtmlDiff
2
+ VERSION = "0.8"
3
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fast_html_diff
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.8'
5
+ platform: ruby
6
+ authors:
7
+ - Kent Mewhort
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-06-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Performs a diff on two HTML inputs, outputting the result as HTML.
56
+ email:
57
+ - kent@openissues.ca
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - .gitignore
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - fast_html_diff.gemspec
68
+ - lib/fast_html_diff.rb
69
+ - lib/fast_html_diff/version.rb
70
+ homepage: https://github.com/kmewhort/fast_html_diff
71
+ licenses:
72
+ - BSD
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.0.3
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: Performs a diff on two HTML inputs, outputting the result as HTML.
94
+ test_files: []