sms-htmldiff 0.0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +24 -0
- data/LICENSE +20 -0
- data/README.md +35 -0
- data/Rakefile +54 -0
- data/htmldiff.gemspec +25 -0
- data/lib/htmldiff/diff_builder.rb +156 -0
- data/lib/htmldiff/list_of_words.rb +182 -0
- data/lib/htmldiff/match.rb +17 -0
- data/lib/htmldiff/match_finder.rb +238 -0
- data/lib/htmldiff/operation.rb +38 -0
- data/lib/htmldiff/word.rb +57 -0
- data/lib/htmldiff.rb +14 -0
- data/spec/diffing_output/block_tag_spec.rb +11 -0
- data/spec/diffing_output/iframes_spec.rb +33 -0
- data/spec/diffing_output/img_tags_spec.rb +49 -0
- data/spec/diffing_output/paragraph_tags_spec.rb +60 -0
- data/spec/diffing_output/tables_spec.rb +47 -0
- data/spec/diffing_output/text_spec.rb +48 -0
- data/spec/list_of_words_spec.rb +53 -0
- data/spec/operation_spec.rb +45 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/word_spec.rb +31 -0
- metadata +93 -0
@@ -0,0 +1,238 @@
|
|
1
|
+
module HTMLDiff
|
2
|
+
# This class is responsible for comparing the list of old and new words and
|
3
|
+
# coming up with a list of insert, delete and replace operations, which the
|
4
|
+
# builder will turn into presentable HTML output.
|
5
|
+
class MatchFinder
|
6
|
+
attr_accessor :old_words, :new_words
|
7
|
+
|
8
|
+
def initialize(old_words, new_words)
|
9
|
+
@old_words = old_words
|
10
|
+
@new_words = new_words
|
11
|
+
@matching_blocks = []
|
12
|
+
@new_word_indices = Hash.new { |h, word| h[word] = [] }
|
13
|
+
end
|
14
|
+
|
15
|
+
def operations
|
16
|
+
index_new_words
|
17
|
+
locate_matching_blocks
|
18
|
+
define_operations
|
19
|
+
@operations
|
20
|
+
end
|
21
|
+
|
22
|
+
# This leaves us with { first => [1], 'second' => [2, 3] } to tell us where
|
23
|
+
# in @new_words each word appears.
|
24
|
+
#
|
25
|
+
# %w(ant bat cat ant) => { ant => [0, 3], bat => 1, cat => 2}
|
26
|
+
def index_new_words
|
27
|
+
@new_words.each_with_index { |word, i| @new_word_indices[word.to_s] << i }
|
28
|
+
end
|
29
|
+
|
30
|
+
# This gets an array of the sections of the two strings that match, then
|
31
|
+
# returns an array of operations that need to be performed in order to
|
32
|
+
# build the HTML output that will show the diff.
|
33
|
+
#
|
34
|
+
# The method is to move along the old and new strings, marking the bits
|
35
|
+
# between the matched portions as insert, delete or replace by creating an
|
36
|
+
# instance of Operation for each one.
|
37
|
+
def define_operations
|
38
|
+
# Starting point of potential difference (end of last match, or start
|
39
|
+
# of string)
|
40
|
+
@position_in_old = @position_in_new = 0
|
41
|
+
@operations = []
|
42
|
+
|
43
|
+
@matching_blocks.each do |match|
|
44
|
+
create_operation_from(match)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# The returned array is of matches in the order in which they appear in the
|
49
|
+
# strings. Each array item is an instance of Match, which contains the
|
50
|
+
# start index of the match in @old_words, the start index in @new_words,
|
51
|
+
# and the length in number of words.
|
52
|
+
def locate_matching_blocks
|
53
|
+
recursively_find_matching_blocks_in_range(0, @old_words.count,
|
54
|
+
0, @new_words.count)
|
55
|
+
|
56
|
+
# an empty match at the end forces the loop to make operations to handle
|
57
|
+
# the unmatched tails I'm sure it can be done more gracefully, but not at
|
58
|
+
# 23:52
|
59
|
+
@matching_blocks << HTMLDiff::Match.new(@old_words.count,
|
60
|
+
@new_words.count, 0)
|
61
|
+
end
|
62
|
+
|
63
|
+
# The first time this is called, it checks the whole of the two strings and
|
64
|
+
# finds the longest match between them.
|
65
|
+
# It then recursively checks the gaps that are left either side of the
|
66
|
+
# longest match, until there are no smaller matches.
|
67
|
+
def recursively_find_matching_blocks_in_range(start_in_old,
|
68
|
+
end_in_old,
|
69
|
+
start_in_new,
|
70
|
+
end_in_new)
|
71
|
+
# Longest match in the given range.
|
72
|
+
longest_match = find_longest_match_between_ranges(start_in_old,
|
73
|
+
end_in_old,
|
74
|
+
start_in_new,
|
75
|
+
end_in_new)
|
76
|
+
return unless longest_match.size > 0
|
77
|
+
|
78
|
+
|
79
|
+
if start_in_old < longest_match.start_in_old &&
|
80
|
+
start_in_new < longest_match.start_in_new
|
81
|
+
# The match is not at the start of either range.
|
82
|
+
# Search the gap before the longest match and add any smaller matches
|
83
|
+
# from there.
|
84
|
+
recursively_find_matching_blocks_in_range(start_in_old,
|
85
|
+
longest_match.start_in_old,
|
86
|
+
start_in_new,
|
87
|
+
longest_match.start_in_new)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Add the longest match
|
91
|
+
@matching_blocks << longest_match
|
92
|
+
|
93
|
+
if longest_match.end_in_old < end_in_old &&
|
94
|
+
longest_match.end_in_new < end_in_new
|
95
|
+
# The match is not at the end of either range.
|
96
|
+
# Search the gap after the longest match and add any smaller matches
|
97
|
+
# from there
|
98
|
+
recursively_find_matching_blocks_in_range(longest_match.end_in_old,
|
99
|
+
end_in_old,
|
100
|
+
longest_match.end_in_new,
|
101
|
+
end_in_new)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# This will find the longest matching set of words when comparing the given
|
106
|
+
# ranges in @old_words and @new_words. This function is used recursively, so
|
107
|
+
# the variables should not be class variables.
|
108
|
+
#
|
109
|
+
# @return [HTMLDiff::Match]
|
110
|
+
def find_longest_match_between_ranges(start_in_old, end_in_old,
|
111
|
+
start_in_new, end_in_new)
|
112
|
+
best_match = HTMLDiff::Match.new 0, 0, 0
|
113
|
+
matches = []
|
114
|
+
|
115
|
+
# A match is a string of words which is in both @old_words and @new words
|
116
|
+
# at a certain position. Keep track of the length of matches starting at
|
117
|
+
# each index position in @new_words. e.g. if the match length at index
|
118
|
+
# 4 = 3, then that means that the fourth word in @new_words is the
|
119
|
+
# end of a 3-word-long match.
|
120
|
+
#
|
121
|
+
# If there are two matches of the same size, it'll get the first one.
|
122
|
+
match_lengths_at_previous_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
|
123
|
+
|
124
|
+
# Start at the beginning position in @old_words and move forwards one
|
125
|
+
# word at a time.
|
126
|
+
start_in_old.upto(end_in_old - 1) do |index_in_old|
|
127
|
+
# This will store the match lengths for all words so far up to the
|
128
|
+
# current word. Just looking at this word, the lengths will all be 1,
|
129
|
+
# so we check the match length for the preceding word in @new_words.
|
130
|
+
# If that is non-zero, it means that a previous match happened up to
|
131
|
+
# this point.
|
132
|
+
#
|
133
|
+
# If the current word is a continuation of a match, then we will
|
134
|
+
# increment the match length and store it for the current index
|
135
|
+
# position in @new_words. We replace the old hash because then we
|
136
|
+
# ignore the previous match that has now been extended and any that have
|
137
|
+
# stopped.
|
138
|
+
match_lengths_at_current_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
|
139
|
+
|
140
|
+
# Take the word which is at this position in @old_words,
|
141
|
+
# then for each position it occurs in within @new_words...
|
142
|
+
current_word_in_old = @old_words[index_in_old].to_s
|
143
|
+
@new_word_indices[current_word_in_old].each do |index_in_new|
|
144
|
+
# Skip if this position is before the start of the range we're
|
145
|
+
# checking.
|
146
|
+
next if index_in_new < start_in_new
|
147
|
+
# Since the indices in @new_words start at the earliest occurrence
|
148
|
+
# and are in order, if we are now after the end of the range we are
|
149
|
+
# checking, then all later occurrences can be ignored.
|
150
|
+
break if index_in_new >= end_in_new
|
151
|
+
|
152
|
+
# Add 1 to the length of the match we have for the previous word
|
153
|
+
# position in @new_words. i.e. we are moving along @old words,
|
154
|
+
# ticking off the words in @new_words as we go.
|
155
|
+
#
|
156
|
+
# Will be zero if the previous word in @new_words has not been marked
|
157
|
+
# as a match.
|
158
|
+
|
159
|
+
new_match_length = match_lengths_at_previous_index_positions_in_new[index_in_new - 1] + 1
|
160
|
+
match_lengths_at_current_index_positions_in_new[index_in_new] = new_match_length
|
161
|
+
|
162
|
+
# Keep track of the longest match so we can return it.
|
163
|
+
if new_match_length > best_match.size
|
164
|
+
start_of_best_match_in_old = index_in_old - new_match_length + 1
|
165
|
+
start_of_best_match_in_new = index_in_new - new_match_length + 1
|
166
|
+
|
167
|
+
best_match = HTMLDiff::Match.new(start_of_best_match_in_old,
|
168
|
+
start_of_best_match_in_new,
|
169
|
+
new_match_length)
|
170
|
+
|
171
|
+
# best_match = HTMLDiff::NewMatch.new(index_in_old - new_match_length + 1, index_in_old,
|
172
|
+
# index_in_new - new_match_length + 1, index_in_new
|
173
|
+
# )
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# We have now added the current word to all the matches we had so far,
|
178
|
+
# making some of them longer by 1. Any matches that are shorter (didn't
|
179
|
+
# have the current word as the next word) are discarded.
|
180
|
+
match_lengths_at_previous_index_positions_in_new = match_lengths_at_current_index_positions_in_new
|
181
|
+
end
|
182
|
+
|
183
|
+
best_match
|
184
|
+
end
|
185
|
+
|
186
|
+
# @param [HTMLDiff::Match] match
|
187
|
+
def create_operation_from(match)
|
188
|
+
# We have a problem with single space matches found in between words
|
189
|
+
# which are otherwise different. If we find a match that is just a
|
190
|
+
# single space, then we should ignore it so that the # changes before
|
191
|
+
# and after it merge together.
|
192
|
+
old_text = @old_words[match.start_in_old...match.end_in_old].join
|
193
|
+
new_text = @new_words[match.start_in_new...match.end_in_new].join
|
194
|
+
return if old_text == ' ' && old_text == new_text
|
195
|
+
|
196
|
+
match_starts_at_current_position_in_old = (@position_in_old == match.start_in_old)
|
197
|
+
match_starts_at_current_position_in_new = (@position_in_new == match.start_in_new)
|
198
|
+
|
199
|
+
# Based on where the match starts and ends, work out what the preceding
|
200
|
+
# non-matching bit represents.
|
201
|
+
action_upto_match_positions =
|
202
|
+
case [match_starts_at_current_position_in_old,
|
203
|
+
match_starts_at_current_position_in_new]
|
204
|
+
when [false, false]
|
205
|
+
:replace
|
206
|
+
when [true, false]
|
207
|
+
:insert
|
208
|
+
when [false, true]
|
209
|
+
:delete
|
210
|
+
else
|
211
|
+
# this happens if the first few words are same in both versions
|
212
|
+
:none
|
213
|
+
end
|
214
|
+
|
215
|
+
# This operation will add the <ins> or <del> tag, plus the content
|
216
|
+
# that has changed.
|
217
|
+
if action_upto_match_positions != :none
|
218
|
+
operation_upto_match_positions =
|
219
|
+
Operation.new(action_upto_match_positions,
|
220
|
+
@old_words[@position_in_old...match.start_in_old],
|
221
|
+
@new_words[@position_in_new...match.start_in_new]
|
222
|
+
)
|
223
|
+
@operations << operation_upto_match_positions
|
224
|
+
end
|
225
|
+
if match.size != 0
|
226
|
+
match_operation = Operation.new(:equal,
|
227
|
+
@old_words[match.start_in_old...match.end_in_old],
|
228
|
+
@new_words[match.start_in_new...match.end_in_new]
|
229
|
+
)
|
230
|
+
@operations << match_operation
|
231
|
+
end
|
232
|
+
|
233
|
+
# Move to the end of the match (start of next difference).
|
234
|
+
@position_in_old = match.end_in_old
|
235
|
+
@position_in_new = match.end_in_new
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module HTMLDiff
|
2
|
+
# An operation represents one difference between the old HTML and the new
|
3
|
+
# HTML. e.g. adding three letters.
|
4
|
+
# @param operation can be :insert, :delete or :equal
|
5
|
+
|
6
|
+
Operation = Struct.new(:action, :old_words, :new_words)
|
7
|
+
|
8
|
+
class Operation
|
9
|
+
# @!method action
|
10
|
+
# @!method start_in_old
|
11
|
+
# @!method end_in_old
|
12
|
+
# @!method start_in_new
|
13
|
+
# @!method end_in_new
|
14
|
+
# @!method old_words
|
15
|
+
# @!method new_words
|
16
|
+
|
17
|
+
# Ignores any attributes and tells us if the tag is the same e.g. <p> and
|
18
|
+
# <p style="margin: 2px;"> are the same.
|
19
|
+
def same_tag?
|
20
|
+
pattern = /<([^>\s]+)[\s>].*/
|
21
|
+
first_tagname = pattern.match(old_text) # nil means they are not tags
|
22
|
+
first_tagname = first_tagname[1] if first_tagname
|
23
|
+
|
24
|
+
second_tagname = pattern.match(new_text)
|
25
|
+
second_tagname = second_tagname[1] if second_tagname
|
26
|
+
|
27
|
+
first_tagname && (first_tagname == second_tagname)
|
28
|
+
end
|
29
|
+
|
30
|
+
def old_text
|
31
|
+
old_words.join
|
32
|
+
end
|
33
|
+
|
34
|
+
def new_text
|
35
|
+
new_words.join
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module HTMLDiff
|
2
|
+
# This class is responsible for representing one word in one of the HTML
|
3
|
+
# strings. Once the HTML has been transformed into words by the ListOfWords
|
4
|
+
# class, the diff algorithm then looks for what has changed. The idea is that
|
5
|
+
# rather than the standard diff which looks character by character, this will
|
6
|
+
# work around the HTML tags so that the output looks only at the text inside
|
7
|
+
# them.
|
8
|
+
class Word
|
9
|
+
def initialize(word = '')
|
10
|
+
@word = word
|
11
|
+
end
|
12
|
+
|
13
|
+
def <<(character)
|
14
|
+
@word << character
|
15
|
+
end
|
16
|
+
|
17
|
+
def empty?
|
18
|
+
@word.empty?
|
19
|
+
end
|
20
|
+
|
21
|
+
def standalone_tag?
|
22
|
+
@word.downcase =~ /<(img|hr|br)/
|
23
|
+
end
|
24
|
+
|
25
|
+
def iframe_tag?
|
26
|
+
(@word[0..7].downcase =~ %r{^<\/?iframe ?})
|
27
|
+
end
|
28
|
+
|
29
|
+
def tag?
|
30
|
+
opening_tag? || closing_tag? || standalone_tag?
|
31
|
+
end
|
32
|
+
|
33
|
+
def opening_tag?
|
34
|
+
@word =~ %r{[\s]*<[^\/]{1}[^>]*>\s*$}
|
35
|
+
end
|
36
|
+
|
37
|
+
def closing_tag?
|
38
|
+
@word =~ %r{^\s*</[^>]+>\s*$}
|
39
|
+
end
|
40
|
+
|
41
|
+
def block_tag?
|
42
|
+
@word =~ /^<div[^<]*class="[^"]*#{block_tag_class}[^"]*"/
|
43
|
+
end
|
44
|
+
|
45
|
+
def to_s
|
46
|
+
@word
|
47
|
+
end
|
48
|
+
|
49
|
+
def ==(other)
|
50
|
+
@word == other
|
51
|
+
end
|
52
|
+
|
53
|
+
def block_tag_class
|
54
|
+
@block_tag_class ||= 'block_tag'
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/htmldiff.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'htmldiff/diff_builder'
|
3
|
+
require_relative 'htmldiff/match'
|
4
|
+
require_relative 'htmldiff/operation'
|
5
|
+
require_relative 'htmldiff/word'
|
6
|
+
require_relative 'htmldiff/list_of_words'
|
7
|
+
require_relative 'htmldiff/match_finder'
|
8
|
+
|
9
|
+
# Main module for namespacing the gem.
|
10
|
+
module HTMLDiff
|
11
|
+
def self.diff(old, new, options = {})
|
12
|
+
DiffBuilder.new(old, new, options).build
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Treating a block tag as a single item' do
|
4
|
+
|
5
|
+
it 'shows the whole div as an insert' do
|
6
|
+
oldv = '<p>text</p>'
|
7
|
+
newv = '<p>text<div class="block_tag"><img src="something" /></div></p>'
|
8
|
+
diff = HTMLDiff.diff(oldv, newv, {block_tag_classes: ['inserted']})
|
9
|
+
expect(diff).to eq('<p>text<ins class="diffins"><div class="block_tag"><img src="something" /></div></ins></p>')
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'iframes' do
|
6
|
+
it 'wraps iframe inserts' do
|
7
|
+
oldv = 'a b c'
|
8
|
+
newv = 'a b <iframe src="some_url"></iframe> c'
|
9
|
+
diff = HTMLDiff.diff(oldv, newv)
|
10
|
+
expect(diff).to eq('a b <ins class="diffins"><iframe src="some_url"></iframe></ins><ins class="diffins"> </ins>c')
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'wraps iframe inserts with extra stuff' do
|
14
|
+
oldv = ''
|
15
|
+
newv = '
|
16
|
+
<div class="iframe-wrap scribd">
|
17
|
+
<div class="iframe-aspect-ratio">
|
18
|
+
</div>
|
19
|
+
<iframe src="url"></iframe>
|
20
|
+
</div>
|
21
|
+
'
|
22
|
+
diff = HTMLDiff.diff(oldv, newv)
|
23
|
+
expect(diff).to eq('<ins class="diffins">
|
24
|
+
</ins><ins class="diffins"><div class="iframe-wrap scribd"><ins class="diffins">
|
25
|
+
</ins><div class="iframe-aspect-ratio"><ins class="diffins">
|
26
|
+
</ins></div><ins class="diffins">
|
27
|
+
</ins><ins class="diffins"><iframe src="url"></iframe></ins><ins class="diffins">
|
28
|
+
</ins></div><ins class="diffins">
|
29
|
+
</ins></ins>')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'img tags' do
|
6
|
+
it 'should support img tags insertion' do
|
7
|
+
oldv = 'a b c'
|
8
|
+
newv = 'a b <img src="some_url" /> c'
|
9
|
+
diff = HTMLDiff.diff(oldv, newv)
|
10
|
+
expect(diff).to eq('a b <ins class="diffins"><img src="some_url" /></ins><ins class="diffins"> </ins>c')
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'wraps img tags inside other tags' do
|
14
|
+
oldv = '<p>text</p>'
|
15
|
+
newv = '<p>text<img src="something" /></p>'
|
16
|
+
diff = HTMLDiff.diff(oldv, newv)
|
17
|
+
expect(diff).to eq('<p>text<ins class="diffins"><img src="something" /></ins></p>')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'wraps img tags inserted with other tags' do
|
21
|
+
oldv = 'text'
|
22
|
+
newv = 'text<p><img src="something" /></p>'
|
23
|
+
diff = HTMLDiff.diff(oldv, newv)
|
24
|
+
expect(diff).to eq('text<ins class="diffins"><p><ins class="diffins"><img src="something" /></ins></p></ins>')
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'wraps img tags inserted with other tags and new lines' do
|
28
|
+
oldv = 'text'
|
29
|
+
newv = %(text<p>\r\n<img src="something" />\r\n</p>)
|
30
|
+
diff = HTMLDiff.diff(oldv, newv)
|
31
|
+
expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something" />\r\n</ins></p></ins>))
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'wraps badly terminated img tags inserted with other tags and new lines' do
|
35
|
+
oldv = 'text'
|
36
|
+
newv = %(text<p>\r\n<img src="something">\r\n</p>)
|
37
|
+
diff = HTMLDiff.diff(oldv, newv)
|
38
|
+
expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something">\r\n</ins></p></ins>))
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'supports img tags deletion' do
|
42
|
+
oldv = 'a b <img src="some_url" /> c'
|
43
|
+
newv = 'a b c'
|
44
|
+
diff = HTMLDiff.diff(oldv, newv)
|
45
|
+
expect(diff).to eq('a b <del class="diffdel"><img src="some_url" /></del><del class="diffdel"> </del>c')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'simple tags' do
|
6
|
+
it 'wraps deleted tags' do
|
7
|
+
doc_a = '<p> Test Paragraph </p><p>More Stuff</p>'
|
8
|
+
doc_b = '<p>Nothing!</p>'
|
9
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
10
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'wraps inserted tags' do
|
14
|
+
doc_a = '<p>Nothing!</p>'
|
15
|
+
doc_b = '<p> Test Paragraph </p><p>More Stuff</p>'
|
16
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
17
|
+
expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p><ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
|
18
|
+
end
|
19
|
+
|
20
|
+
describe 'wrapping deleted tags even with text around them' do
|
21
|
+
it 'changes inside plus deleted consecutive paragraph, leaving text afterwards' do
|
22
|
+
doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
|
23
|
+
doc_b = '<p>Nothing!</p>weee'
|
24
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
25
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'changes inside plus deleted consecutive paragraph, plus deleted consecutive text' do
|
29
|
+
doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
|
30
|
+
doc_b = '<p>Nothing!</p>'
|
31
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
32
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">weee</del><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'changes inside plus deleted consecutive paragraph, leaving text afterwards with some extra text' do
|
36
|
+
doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>asd'
|
37
|
+
doc_b = '<p>Nothing!</p>weee asd'
|
38
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
39
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffmod"><p><del class="diffmod">More Stuff</del></p></del><ins class="diffmod"> </ins>asd')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'wraps inserted tags even with text around' do
|
44
|
+
doc_a = '<p>Nothing!</p>weee'
|
45
|
+
doc_b = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
|
46
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
47
|
+
expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p>weee<ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'changing the attributes of tags' do
|
51
|
+
it 'ignores a tag with new attributes' do
|
52
|
+
doc_a = 'text <p>Nothing!</p> text'
|
53
|
+
doc_b = 'text <p style="margin-left: 20px">Nothing!</p> text'
|
54
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
55
|
+
expect(diff).to eq('text <p style="margin-left: 20px">Nothing!</p> text')
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'tables' do
|
6
|
+
it 'wraps deleted table tags' do
|
7
|
+
doc_a = '<p> Test Paragraph </p>
|
8
|
+
<p> </p>
|
9
|
+
<table><tbody><tr><td>hello</td><td>bye</td></tr></tbody></table>
|
10
|
+
<p> </p>
|
11
|
+
'
|
12
|
+
doc_b = '<p>Nothing!</p>'
|
13
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
14
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">
|
15
|
+
</del><del class="diffdel"><p><del class="diffdel"> </del></p><del class="diffdel">
|
16
|
+
</del><table><tbody><tr><td><del class="diffdel">hello</del></td><td><del class="diffdel">bye</del></td></tr></tbody></table><del class="diffdel">
|
17
|
+
</del><p><del class="diffdel"> </del></p><del class="diffdel">
|
18
|
+
</del></del>')
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should wrap deleted table rows' do
|
22
|
+
doc_a = '<p>my table</p>
|
23
|
+
<table>
|
24
|
+
<tbody>
|
25
|
+
<tr><td>hello</td><td>bye</td></tr>
|
26
|
+
<tr><td>remove</td><td>me</td></tr>
|
27
|
+
</tbody>
|
28
|
+
</table>'
|
29
|
+
doc_b = '<p>my table</p>
|
30
|
+
<table>
|
31
|
+
<tbody>
|
32
|
+
<tr><td>hello</td><td>bye</td></tr>
|
33
|
+
</tbody>
|
34
|
+
</table>'
|
35
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
36
|
+
expect(diff).to eq('<p>my table</p>
|
37
|
+
<table>
|
38
|
+
<tbody>
|
39
|
+
<tr><td>hello</td><td>bye</td></tr>
|
40
|
+
<del class="diffdel"><tr><td><del class="diffdel">remove</del></td>'\
|
41
|
+
'<td><del class="diffdel">me</del></td></tr><del class="diffdel">
|
42
|
+
</del></del></tbody>
|
43
|
+
</table>')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'text' do
|
6
|
+
it 'should diff text' do
|
7
|
+
diff = HTMLDiff.diff('a word is here', 'a nother word is there')
|
8
|
+
expect(diff).to eq("a<ins class=\"diffins\"> nother</ins> word is "\
|
9
|
+
"<del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>")
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should insert a letter and a space' do
|
13
|
+
diff = HTMLDiff.diff('a c', 'a b c')
|
14
|
+
expect(diff).to eq("a <ins class=\"diffins\">b </ins>c")
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should remove a letter and a space' do
|
18
|
+
diff = HTMLDiff.diff('a b c', 'a c')
|
19
|
+
expect(diff).to eq("a <del class=\"diffdel\">b </del>c")
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should change a letter' do
|
23
|
+
diff = HTMLDiff.diff('a b c', 'a d c')
|
24
|
+
expect(diff).to eq("a <del class=\"diffmod\">b</del><ins "\
|
25
|
+
"class=\"diffmod\">d</ins> c")
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'supports Chinese' do
|
29
|
+
diff = HTMLDiff.diff('这个是中文内容, Ruby is the bast',
|
30
|
+
'这是中国语内容,Ruby is the best language.')
|
31
|
+
expect(diff).to eq("这<del class=\"diffdel\">个</del>是中<del "\
|
32
|
+
"class=\"diffmod\">文</del><ins class=\"diffmod\">国语</ins>内容<del "\
|
33
|
+
"class=\"diffmod\">, Ruby</del><ins class=\"diffmod\">,Ruby</ins> is "\
|
34
|
+
"the <del class=\"diffmod\">bast</del><ins class=\"diffmod\">best "\
|
35
|
+
'language.</ins>')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'puts long bit of replaced text together, rather than '\
|
39
|
+
'breaking on word boundaries' do
|
40
|
+
diff = HTMLDiff.diff('a long bit of text',
|
41
|
+
'some totally different text')
|
42
|
+
expected = '<del class="diffmod">a long bit of</del>'\
|
43
|
+
'<ins class="diffmod">some totally different</ins> text'
|
44
|
+
expect(diff).to eq(expected)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe HTMLDiff::ListOfWords do
|
4
|
+
describe 'breaking tags up correctly' do
|
5
|
+
it 'separates tags' do
|
6
|
+
input = '<p>input</p>'
|
7
|
+
words_as_array = HTMLDiff::ListOfWords.new(input).to_a.map(&:to_s)
|
8
|
+
expect(words_as_array).to eq %w(<p> input </p>)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'separates block tags' do
|
12
|
+
input = '<p>text<div class="block_tag"><img src="something" /></div></p>'
|
13
|
+
words_as_array = HTMLDiff::ListOfWords.new(input, {block_tag_class: 'inserted'}).to_a.map(&:to_s)
|
14
|
+
expect(words_as_array).to eq ['<p>', 'text', '<div class="block_tag"><img src="something" /></div>', '</p>']
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'contains_unclosed_tag?' do
|
19
|
+
it 'returns true with an open <p> tag' do
|
20
|
+
expect(described_class.new('<p>').contains_unclosed_tag?).to be_true
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns true with an unclosed closed <p> tag with an attribute' do
|
24
|
+
html = '<p style="margin: 20px">'
|
25
|
+
expect(described_class.new(html).contains_unclosed_tag?).to be_true
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'returns true with an unclosed closed <p> tag with an attribute '\
|
29
|
+
'that contains stuff' do
|
30
|
+
html = '<p style="margin: 20px">blah'
|
31
|
+
expect(described_class.new(html).contains_unclosed_tag?).to be_true
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'returns false with a properly closed <p> tag' do
|
35
|
+
expect(described_class.new('<p></p>').contains_unclosed_tag?).to be_false
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'returns false with a properly closed <p> tag with an attribute' do
|
39
|
+
html = '<p style="margin: 20px"></p>'
|
40
|
+
expect(described_class.new(html).contains_unclosed_tag?).to be_false
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'returns false with a properly closed <p> tag with an attribute '\
|
44
|
+
'that contains stuff' do
|
45
|
+
html = '<p style="margin: 20px">blah</p>'
|
46
|
+
expect(described_class.new(html).contains_unclosed_tag?).to be_false
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'returns false with a self closing tag' do
|
50
|
+
expect(described_class.new('<img>').contains_unclosed_tag?).to be_false
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|