sms-htmldiff 0.0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +24 -0
- data/LICENSE +20 -0
- data/README.md +35 -0
- data/Rakefile +54 -0
- data/htmldiff.gemspec +25 -0
- data/lib/htmldiff/diff_builder.rb +156 -0
- data/lib/htmldiff/list_of_words.rb +182 -0
- data/lib/htmldiff/match.rb +17 -0
- data/lib/htmldiff/match_finder.rb +238 -0
- data/lib/htmldiff/operation.rb +38 -0
- data/lib/htmldiff/word.rb +57 -0
- data/lib/htmldiff.rb +14 -0
- data/spec/diffing_output/block_tag_spec.rb +11 -0
- data/spec/diffing_output/iframes_spec.rb +33 -0
- data/spec/diffing_output/img_tags_spec.rb +49 -0
- data/spec/diffing_output/paragraph_tags_spec.rb +60 -0
- data/spec/diffing_output/tables_spec.rb +47 -0
- data/spec/diffing_output/text_spec.rb +48 -0
- data/spec/list_of_words_spec.rb +53 -0
- data/spec/operation_spec.rb +45 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/word_spec.rb +31 -0
- metadata +93 -0
@@ -0,0 +1,238 @@
|
|
1
|
+
module HTMLDiff
|
2
|
+
# This class is responsible for comparing the list of old and new words and
|
3
|
+
# coming up with a list of insert, delete and replace operations, which the
|
4
|
+
# builder will turn into presentable HTML output.
|
5
|
+
class MatchFinder
|
6
|
+
attr_accessor :old_words, :new_words
|
7
|
+
|
8
|
+
def initialize(old_words, new_words)
|
9
|
+
@old_words = old_words
|
10
|
+
@new_words = new_words
|
11
|
+
@matching_blocks = []
|
12
|
+
@new_word_indices = Hash.new { |h, word| h[word] = [] }
|
13
|
+
end
|
14
|
+
|
15
|
+
def operations
|
16
|
+
index_new_words
|
17
|
+
locate_matching_blocks
|
18
|
+
define_operations
|
19
|
+
@operations
|
20
|
+
end
|
21
|
+
|
22
|
+
# This leaves us with { first => [1], 'second' => [2, 3] } to tell us where
|
23
|
+
# in @new_words each word appears.
|
24
|
+
#
|
25
|
+
# %w(ant bat cat ant) => { ant => [0, 3], bat => 1, cat => 2}
|
26
|
+
def index_new_words
|
27
|
+
@new_words.each_with_index { |word, i| @new_word_indices[word.to_s] << i }
|
28
|
+
end
|
29
|
+
|
30
|
+
# This gets an array of the sections of the two strings that match, then
|
31
|
+
# returns an array of operations that need to be performed in order to
|
32
|
+
# build the HTML output that will show the diff.
|
33
|
+
#
|
34
|
+
# The method is to move along the old and new strings, marking the bits
|
35
|
+
# between the matched portions as insert, delete or replace by creating an
|
36
|
+
# instance of Operation for each one.
|
37
|
+
def define_operations
|
38
|
+
# Starting point of potential difference (end of last match, or start
|
39
|
+
# of string)
|
40
|
+
@position_in_old = @position_in_new = 0
|
41
|
+
@operations = []
|
42
|
+
|
43
|
+
@matching_blocks.each do |match|
|
44
|
+
create_operation_from(match)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# The returned array is of matches in the order in which they appear in the
|
49
|
+
# strings. Each array item is an instance of Match, which contains the
|
50
|
+
# start index of the match in @old_words, the start index in @new_words,
|
51
|
+
# and the length in number of words.
|
52
|
+
def locate_matching_blocks
|
53
|
+
recursively_find_matching_blocks_in_range(0, @old_words.count,
|
54
|
+
0, @new_words.count)
|
55
|
+
|
56
|
+
# an empty match at the end forces the loop to make operations to handle
|
57
|
+
# the unmatched tails I'm sure it can be done more gracefully, but not at
|
58
|
+
# 23:52
|
59
|
+
@matching_blocks << HTMLDiff::Match.new(@old_words.count,
|
60
|
+
@new_words.count, 0)
|
61
|
+
end
|
62
|
+
|
63
|
+
# The first time this is called, it checks the whole of the two strings and
|
64
|
+
# finds the longest match between them.
|
65
|
+
# It then recursively checks the gaps that are left either side of the
|
66
|
+
# longest match, until there are no smaller matches.
|
67
|
+
def recursively_find_matching_blocks_in_range(start_in_old,
|
68
|
+
end_in_old,
|
69
|
+
start_in_new,
|
70
|
+
end_in_new)
|
71
|
+
# Longest match in the given range.
|
72
|
+
longest_match = find_longest_match_between_ranges(start_in_old,
|
73
|
+
end_in_old,
|
74
|
+
start_in_new,
|
75
|
+
end_in_new)
|
76
|
+
return unless longest_match.size > 0
|
77
|
+
|
78
|
+
|
79
|
+
if start_in_old < longest_match.start_in_old &&
|
80
|
+
start_in_new < longest_match.start_in_new
|
81
|
+
# The match is not at the start of either range.
|
82
|
+
# Search the gap before the longest match and add any smaller matches
|
83
|
+
# from there.
|
84
|
+
recursively_find_matching_blocks_in_range(start_in_old,
|
85
|
+
longest_match.start_in_old,
|
86
|
+
start_in_new,
|
87
|
+
longest_match.start_in_new)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Add the longest match
|
91
|
+
@matching_blocks << longest_match
|
92
|
+
|
93
|
+
if longest_match.end_in_old < end_in_old &&
|
94
|
+
longest_match.end_in_new < end_in_new
|
95
|
+
# The match is not at the end of either range.
|
96
|
+
# Search the gap after the longest match and add any smaller matches
|
97
|
+
# from there
|
98
|
+
recursively_find_matching_blocks_in_range(longest_match.end_in_old,
|
99
|
+
end_in_old,
|
100
|
+
longest_match.end_in_new,
|
101
|
+
end_in_new)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# This will find the longest matching set of words when comparing the given
|
106
|
+
# ranges in @old_words and @new_words. This function is used recursively, so
|
107
|
+
# the variables should not be class variables.
|
108
|
+
#
|
109
|
+
# @return [HTMLDiff::Match]
|
110
|
+
def find_longest_match_between_ranges(start_in_old, end_in_old,
|
111
|
+
start_in_new, end_in_new)
|
112
|
+
best_match = HTMLDiff::Match.new 0, 0, 0
|
113
|
+
matches = []
|
114
|
+
|
115
|
+
# A match is a string of words which is in both @old_words and @new words
|
116
|
+
# at a certain position. Keep track of the length of matches starting at
|
117
|
+
# each index position in @new_words. e.g. if the match length at index
|
118
|
+
# 4 = 3, then that means that the fourth word in @new_words is the
|
119
|
+
# end of a 3-word-long match.
|
120
|
+
#
|
121
|
+
# If there are two matches of the same size, it'll get the first one.
|
122
|
+
match_lengths_at_previous_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
|
123
|
+
|
124
|
+
# Start at the beginning position in @old_words and move forwards one
|
125
|
+
# word at a time.
|
126
|
+
start_in_old.upto(end_in_old - 1) do |index_in_old|
|
127
|
+
# This will store the match lengths for all words so far up to the
|
128
|
+
# current word. Just looking at this word, the lengths will all be 1,
|
129
|
+
# so we check the match length for the preceding word in @new_words.
|
130
|
+
# If that is non-zero, it means that a previous match happened up to
|
131
|
+
# this point.
|
132
|
+
#
|
133
|
+
# If the current word is a continuation of a match, then we will
|
134
|
+
# increment the match length and store it for the current index
|
135
|
+
# position in @new_words. We replace the old hash because then we
|
136
|
+
# ignore the previous match that has now been extended and any that have
|
137
|
+
# stopped.
|
138
|
+
match_lengths_at_current_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
|
139
|
+
|
140
|
+
# Take the word which is at this position in @old_words,
|
141
|
+
# then for each position it occurs in within @new_words...
|
142
|
+
current_word_in_old = @old_words[index_in_old].to_s
|
143
|
+
@new_word_indices[current_word_in_old].each do |index_in_new|
|
144
|
+
# Skip if this position is before the start of the range we're
|
145
|
+
# checking.
|
146
|
+
next if index_in_new < start_in_new
|
147
|
+
# Since the indices in @new_words start at the earliest occurrence
|
148
|
+
# and are in order, if we are now after the end of the range we are
|
149
|
+
# checking, then all later occurrences can be ignored.
|
150
|
+
break if index_in_new >= end_in_new
|
151
|
+
|
152
|
+
# Add 1 to the length of the match we have for the previous word
|
153
|
+
# position in @new_words. i.e. we are moving along @old words,
|
154
|
+
# ticking off the words in @new_words as we go.
|
155
|
+
#
|
156
|
+
# Will be zero if the previous word in @new_words has not been marked
|
157
|
+
# as a match.
|
158
|
+
|
159
|
+
new_match_length = match_lengths_at_previous_index_positions_in_new[index_in_new - 1] + 1
|
160
|
+
match_lengths_at_current_index_positions_in_new[index_in_new] = new_match_length
|
161
|
+
|
162
|
+
# Keep track of the longest match so we can return it.
|
163
|
+
if new_match_length > best_match.size
|
164
|
+
start_of_best_match_in_old = index_in_old - new_match_length + 1
|
165
|
+
start_of_best_match_in_new = index_in_new - new_match_length + 1
|
166
|
+
|
167
|
+
best_match = HTMLDiff::Match.new(start_of_best_match_in_old,
|
168
|
+
start_of_best_match_in_new,
|
169
|
+
new_match_length)
|
170
|
+
|
171
|
+
# best_match = HTMLDiff::NewMatch.new(index_in_old - new_match_length + 1, index_in_old,
|
172
|
+
# index_in_new - new_match_length + 1, index_in_new
|
173
|
+
# )
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# We have now added the current word to all the matches we had so far,
|
178
|
+
# making some of them longer by 1. Any matches that are shorter (didn't
|
179
|
+
# have the current word as the next word) are discarded.
|
180
|
+
match_lengths_at_previous_index_positions_in_new = match_lengths_at_current_index_positions_in_new
|
181
|
+
end
|
182
|
+
|
183
|
+
best_match
|
184
|
+
end
|
185
|
+
|
186
|
+
# @param [HTMLDiff::Match] match
|
187
|
+
def create_operation_from(match)
|
188
|
+
# We have a problem with single space matches found in between words
|
189
|
+
# which are otherwise different. If we find a match that is just a
|
190
|
+
# single space, then we should ignore it so that the # changes before
|
191
|
+
# and after it merge together.
|
192
|
+
old_text = @old_words[match.start_in_old...match.end_in_old].join
|
193
|
+
new_text = @new_words[match.start_in_new...match.end_in_new].join
|
194
|
+
return if old_text == ' ' && old_text == new_text
|
195
|
+
|
196
|
+
match_starts_at_current_position_in_old = (@position_in_old == match.start_in_old)
|
197
|
+
match_starts_at_current_position_in_new = (@position_in_new == match.start_in_new)
|
198
|
+
|
199
|
+
# Based on where the match starts and ends, work out what the preceding
|
200
|
+
# non-matching bit represents.
|
201
|
+
action_upto_match_positions =
|
202
|
+
case [match_starts_at_current_position_in_old,
|
203
|
+
match_starts_at_current_position_in_new]
|
204
|
+
when [false, false]
|
205
|
+
:replace
|
206
|
+
when [true, false]
|
207
|
+
:insert
|
208
|
+
when [false, true]
|
209
|
+
:delete
|
210
|
+
else
|
211
|
+
# this happens if the first few words are same in both versions
|
212
|
+
:none
|
213
|
+
end
|
214
|
+
|
215
|
+
# This operation will add the <ins> or <del> tag, plus the content
|
216
|
+
# that has changed.
|
217
|
+
if action_upto_match_positions != :none
|
218
|
+
operation_upto_match_positions =
|
219
|
+
Operation.new(action_upto_match_positions,
|
220
|
+
@old_words[@position_in_old...match.start_in_old],
|
221
|
+
@new_words[@position_in_new...match.start_in_new]
|
222
|
+
)
|
223
|
+
@operations << operation_upto_match_positions
|
224
|
+
end
|
225
|
+
if match.size != 0
|
226
|
+
match_operation = Operation.new(:equal,
|
227
|
+
@old_words[match.start_in_old...match.end_in_old],
|
228
|
+
@new_words[match.start_in_new...match.end_in_new]
|
229
|
+
)
|
230
|
+
@operations << match_operation
|
231
|
+
end
|
232
|
+
|
233
|
+
# Move to the end of the match (start of next difference).
|
234
|
+
@position_in_old = match.end_in_old
|
235
|
+
@position_in_new = match.end_in_new
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module HTMLDiff
|
2
|
+
# An operation represents one difference between the old HTML and the new
|
3
|
+
# HTML. e.g. adding three letters.
|
4
|
+
# @param operation can be :insert, :delete or :equal
|
5
|
+
|
6
|
+
Operation = Struct.new(:action, :old_words, :new_words)
|
7
|
+
|
8
|
+
class Operation
|
9
|
+
# @!method action
|
10
|
+
# @!method start_in_old
|
11
|
+
# @!method end_in_old
|
12
|
+
# @!method start_in_new
|
13
|
+
# @!method end_in_new
|
14
|
+
# @!method old_words
|
15
|
+
# @!method new_words
|
16
|
+
|
17
|
+
# Ignores any attributes and tells us if the tag is the same e.g. <p> and
|
18
|
+
# <p style="margin: 2px;"> are the same.
|
19
|
+
def same_tag?
|
20
|
+
pattern = /<([^>\s]+)[\s>].*/
|
21
|
+
first_tagname = pattern.match(old_text) # nil means they are not tags
|
22
|
+
first_tagname = first_tagname[1] if first_tagname
|
23
|
+
|
24
|
+
second_tagname = pattern.match(new_text)
|
25
|
+
second_tagname = second_tagname[1] if second_tagname
|
26
|
+
|
27
|
+
first_tagname && (first_tagname == second_tagname)
|
28
|
+
end
|
29
|
+
|
30
|
+
def old_text
|
31
|
+
old_words.join
|
32
|
+
end
|
33
|
+
|
34
|
+
def new_text
|
35
|
+
new_words.join
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module HTMLDiff
|
2
|
+
# This class is responsible for representing one word in one of the HTML
|
3
|
+
# strings. Once the HTML has been transformed into words by the ListOfWords
|
4
|
+
# class, the diff algorithm then looks for what has changed. The idea is that
|
5
|
+
# rather than the standard diff which looks character by character, this will
|
6
|
+
# work around the HTML tags so that the output looks only at the text inside
|
7
|
+
# them.
|
8
|
+
class Word
|
9
|
+
def initialize(word = '')
|
10
|
+
@word = word
|
11
|
+
end
|
12
|
+
|
13
|
+
def <<(character)
|
14
|
+
@word << character
|
15
|
+
end
|
16
|
+
|
17
|
+
def empty?
|
18
|
+
@word.empty?
|
19
|
+
end
|
20
|
+
|
21
|
+
def standalone_tag?
|
22
|
+
@word.downcase =~ /<(img|hr|br)/
|
23
|
+
end
|
24
|
+
|
25
|
+
def iframe_tag?
|
26
|
+
(@word[0..7].downcase =~ %r{^<\/?iframe ?})
|
27
|
+
end
|
28
|
+
|
29
|
+
def tag?
|
30
|
+
opening_tag? || closing_tag? || standalone_tag?
|
31
|
+
end
|
32
|
+
|
33
|
+
def opening_tag?
|
34
|
+
@word =~ %r{[\s]*<[^\/]{1}[^>]*>\s*$}
|
35
|
+
end
|
36
|
+
|
37
|
+
def closing_tag?
|
38
|
+
@word =~ %r{^\s*</[^>]+>\s*$}
|
39
|
+
end
|
40
|
+
|
41
|
+
def block_tag?
|
42
|
+
@word =~ /^<div[^<]*class="[^"]*#{block_tag_class}[^"]*"/
|
43
|
+
end
|
44
|
+
|
45
|
+
def to_s
|
46
|
+
@word
|
47
|
+
end
|
48
|
+
|
49
|
+
def ==(other)
|
50
|
+
@word == other
|
51
|
+
end
|
52
|
+
|
53
|
+
def block_tag_class
|
54
|
+
@block_tag_class ||= 'block_tag'
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/htmldiff.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'htmldiff/diff_builder'
|
3
|
+
require_relative 'htmldiff/match'
|
4
|
+
require_relative 'htmldiff/operation'
|
5
|
+
require_relative 'htmldiff/word'
|
6
|
+
require_relative 'htmldiff/list_of_words'
|
7
|
+
require_relative 'htmldiff/match_finder'
|
8
|
+
|
9
|
+
# Main module for namespacing the gem.
|
10
|
+
module HTMLDiff
|
11
|
+
def self.diff(old, new, options = {})
|
12
|
+
DiffBuilder.new(old, new, options).build
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Treating a block tag as a single item' do
|
4
|
+
|
5
|
+
it 'shows the whole div as an insert' do
|
6
|
+
oldv = '<p>text</p>'
|
7
|
+
newv = '<p>text<div class="block_tag"><img src="something" /></div></p>'
|
8
|
+
diff = HTMLDiff.diff(oldv, newv, {block_tag_classes: ['inserted']})
|
9
|
+
expect(diff).to eq('<p>text<ins class="diffins"><div class="block_tag"><img src="something" /></div></ins></p>')
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'iframes' do
|
6
|
+
it 'wraps iframe inserts' do
|
7
|
+
oldv = 'a b c'
|
8
|
+
newv = 'a b <iframe src="some_url"></iframe> c'
|
9
|
+
diff = HTMLDiff.diff(oldv, newv)
|
10
|
+
expect(diff).to eq('a b <ins class="diffins"><iframe src="some_url"></iframe></ins><ins class="diffins"> </ins>c')
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'wraps iframe inserts with extra stuff' do
|
14
|
+
oldv = ''
|
15
|
+
newv = '
|
16
|
+
<div class="iframe-wrap scribd">
|
17
|
+
<div class="iframe-aspect-ratio">
|
18
|
+
</div>
|
19
|
+
<iframe src="url"></iframe>
|
20
|
+
</div>
|
21
|
+
'
|
22
|
+
diff = HTMLDiff.diff(oldv, newv)
|
23
|
+
expect(diff).to eq('<ins class="diffins">
|
24
|
+
</ins><ins class="diffins"><div class="iframe-wrap scribd"><ins class="diffins">
|
25
|
+
</ins><div class="iframe-aspect-ratio"><ins class="diffins">
|
26
|
+
</ins></div><ins class="diffins">
|
27
|
+
</ins><ins class="diffins"><iframe src="url"></iframe></ins><ins class="diffins">
|
28
|
+
</ins></div><ins class="diffins">
|
29
|
+
</ins></ins>')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'img tags' do
|
6
|
+
it 'should support img tags insertion' do
|
7
|
+
oldv = 'a b c'
|
8
|
+
newv = 'a b <img src="some_url" /> c'
|
9
|
+
diff = HTMLDiff.diff(oldv, newv)
|
10
|
+
expect(diff).to eq('a b <ins class="diffins"><img src="some_url" /></ins><ins class="diffins"> </ins>c')
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'wraps img tags inside other tags' do
|
14
|
+
oldv = '<p>text</p>'
|
15
|
+
newv = '<p>text<img src="something" /></p>'
|
16
|
+
diff = HTMLDiff.diff(oldv, newv)
|
17
|
+
expect(diff).to eq('<p>text<ins class="diffins"><img src="something" /></ins></p>')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'wraps img tags inserted with other tags' do
|
21
|
+
oldv = 'text'
|
22
|
+
newv = 'text<p><img src="something" /></p>'
|
23
|
+
diff = HTMLDiff.diff(oldv, newv)
|
24
|
+
expect(diff).to eq('text<ins class="diffins"><p><ins class="diffins"><img src="something" /></ins></p></ins>')
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'wraps img tags inserted with other tags and new lines' do
|
28
|
+
oldv = 'text'
|
29
|
+
newv = %(text<p>\r\n<img src="something" />\r\n</p>)
|
30
|
+
diff = HTMLDiff.diff(oldv, newv)
|
31
|
+
expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something" />\r\n</ins></p></ins>))
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'wraps badly terminated img tags inserted with other tags and new lines' do
|
35
|
+
oldv = 'text'
|
36
|
+
newv = %(text<p>\r\n<img src="something">\r\n</p>)
|
37
|
+
diff = HTMLDiff.diff(oldv, newv)
|
38
|
+
expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something">\r\n</ins></p></ins>))
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'supports img tags deletion' do
|
42
|
+
oldv = 'a b <img src="some_url" /> c'
|
43
|
+
newv = 'a b c'
|
44
|
+
diff = HTMLDiff.diff(oldv, newv)
|
45
|
+
expect(diff).to eq('a b <del class="diffdel"><img src="some_url" /></del><del class="diffdel"> </del>c')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'simple tags' do
|
6
|
+
it 'wraps deleted tags' do
|
7
|
+
doc_a = '<p> Test Paragraph </p><p>More Stuff</p>'
|
8
|
+
doc_b = '<p>Nothing!</p>'
|
9
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
10
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'wraps inserted tags' do
|
14
|
+
doc_a = '<p>Nothing!</p>'
|
15
|
+
doc_b = '<p> Test Paragraph </p><p>More Stuff</p>'
|
16
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
17
|
+
expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p><ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
|
18
|
+
end
|
19
|
+
|
20
|
+
describe 'wrapping deleted tags even with text around them' do
|
21
|
+
it 'changes inside plus deleted consecutive paragraph, leaving text afterwards' do
|
22
|
+
doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
|
23
|
+
doc_b = '<p>Nothing!</p>weee'
|
24
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
25
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'changes inside plus deleted consecutive paragraph, plus deleted consecutive text' do
|
29
|
+
doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
|
30
|
+
doc_b = '<p>Nothing!</p>'
|
31
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
32
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">weee</del><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'changes inside plus deleted consecutive paragraph, leaving text afterwards with some extra text' do
|
36
|
+
doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>asd'
|
37
|
+
doc_b = '<p>Nothing!</p>weee asd'
|
38
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
39
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffmod"><p><del class="diffmod">More Stuff</del></p></del><ins class="diffmod"> </ins>asd')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'wraps inserted tags even with text around' do
|
44
|
+
doc_a = '<p>Nothing!</p>weee'
|
45
|
+
doc_b = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
|
46
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
47
|
+
expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p>weee<ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'changing the attributes of tags' do
|
51
|
+
it 'ignores a tag with new attributes' do
|
52
|
+
doc_a = 'text <p>Nothing!</p> text'
|
53
|
+
doc_b = 'text <p style="margin-left: 20px">Nothing!</p> text'
|
54
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
55
|
+
expect(diff).to eq('text <p style="margin-left: 20px">Nothing!</p> text')
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'tables' do
|
6
|
+
it 'wraps deleted table tags' do
|
7
|
+
doc_a = '<p> Test Paragraph </p>
|
8
|
+
<p> </p>
|
9
|
+
<table><tbody><tr><td>hello</td><td>bye</td></tr></tbody></table>
|
10
|
+
<p> </p>
|
11
|
+
'
|
12
|
+
doc_b = '<p>Nothing!</p>'
|
13
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
14
|
+
expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">
|
15
|
+
</del><del class="diffdel"><p><del class="diffdel"> </del></p><del class="diffdel">
|
16
|
+
</del><table><tbody><tr><td><del class="diffdel">hello</del></td><td><del class="diffdel">bye</del></td></tr></tbody></table><del class="diffdel">
|
17
|
+
</del><p><del class="diffdel"> </del></p><del class="diffdel">
|
18
|
+
</del></del>')
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should wrap deleted table rows' do
|
22
|
+
doc_a = '<p>my table</p>
|
23
|
+
<table>
|
24
|
+
<tbody>
|
25
|
+
<tr><td>hello</td><td>bye</td></tr>
|
26
|
+
<tr><td>remove</td><td>me</td></tr>
|
27
|
+
</tbody>
|
28
|
+
</table>'
|
29
|
+
doc_b = '<p>my table</p>
|
30
|
+
<table>
|
31
|
+
<tbody>
|
32
|
+
<tr><td>hello</td><td>bye</td></tr>
|
33
|
+
</tbody>
|
34
|
+
</table>'
|
35
|
+
diff = HTMLDiff.diff(doc_a, doc_b)
|
36
|
+
expect(diff).to eq('<p>my table</p>
|
37
|
+
<table>
|
38
|
+
<tbody>
|
39
|
+
<tr><td>hello</td><td>bye</td></tr>
|
40
|
+
<del class="diffdel"><tr><td><del class="diffdel">remove</del></td>'\
|
41
|
+
'<td><del class="diffdel">me</del></td></tr><del class="diffdel">
|
42
|
+
</del></del></tbody>
|
43
|
+
</table>')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe 'HTMLDiff' do
|
4
|
+
describe 'diff' do
|
5
|
+
describe 'text' do
|
6
|
+
it 'should diff text' do
|
7
|
+
diff = HTMLDiff.diff('a word is here', 'a nother word is there')
|
8
|
+
expect(diff).to eq("a<ins class=\"diffins\"> nother</ins> word is "\
|
9
|
+
"<del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>")
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should insert a letter and a space' do
|
13
|
+
diff = HTMLDiff.diff('a c', 'a b c')
|
14
|
+
expect(diff).to eq("a <ins class=\"diffins\">b </ins>c")
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should remove a letter and a space' do
|
18
|
+
diff = HTMLDiff.diff('a b c', 'a c')
|
19
|
+
expect(diff).to eq("a <del class=\"diffdel\">b </del>c")
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should change a letter' do
|
23
|
+
diff = HTMLDiff.diff('a b c', 'a d c')
|
24
|
+
expect(diff).to eq("a <del class=\"diffmod\">b</del><ins "\
|
25
|
+
"class=\"diffmod\">d</ins> c")
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'supports Chinese' do
|
29
|
+
diff = HTMLDiff.diff('这个是中文内容, Ruby is the bast',
|
30
|
+
'这是中国语内容,Ruby is the best language.')
|
31
|
+
expect(diff).to eq("这<del class=\"diffdel\">个</del>是中<del "\
|
32
|
+
"class=\"diffmod\">文</del><ins class=\"diffmod\">国语</ins>内容<del "\
|
33
|
+
"class=\"diffmod\">, Ruby</del><ins class=\"diffmod\">,Ruby</ins> is "\
|
34
|
+
"the <del class=\"diffmod\">bast</del><ins class=\"diffmod\">best "\
|
35
|
+
'language.</ins>')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'puts long bit of replaced text together, rather than '\
|
39
|
+
'breaking on word boundaries' do
|
40
|
+
diff = HTMLDiff.diff('a long bit of text',
|
41
|
+
'some totally different text')
|
42
|
+
expected = '<del class="diffmod">a long bit of</del>'\
|
43
|
+
'<ins class="diffmod">some totally different</ins> text'
|
44
|
+
expect(diff).to eq(expected)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe HTMLDiff::ListOfWords do
|
4
|
+
describe 'breaking tags up correctly' do
|
5
|
+
it 'separates tags' do
|
6
|
+
input = '<p>input</p>'
|
7
|
+
words_as_array = HTMLDiff::ListOfWords.new(input).to_a.map(&:to_s)
|
8
|
+
expect(words_as_array).to eq %w(<p> input </p>)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'separates block tags' do
|
12
|
+
input = '<p>text<div class="block_tag"><img src="something" /></div></p>'
|
13
|
+
words_as_array = HTMLDiff::ListOfWords.new(input, {block_tag_class: 'inserted'}).to_a.map(&:to_s)
|
14
|
+
expect(words_as_array).to eq ['<p>', 'text', '<div class="block_tag"><img src="something" /></div>', '</p>']
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'contains_unclosed_tag?' do
|
19
|
+
it 'returns true with an open <p> tag' do
|
20
|
+
expect(described_class.new('<p>').contains_unclosed_tag?).to be_true
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns true with an unclosed closed <p> tag with an attribute' do
|
24
|
+
html = '<p style="margin: 20px">'
|
25
|
+
expect(described_class.new(html).contains_unclosed_tag?).to be_true
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'returns true with an unclosed closed <p> tag with an attribute '\
|
29
|
+
'that contains stuff' do
|
30
|
+
html = '<p style="margin: 20px">blah'
|
31
|
+
expect(described_class.new(html).contains_unclosed_tag?).to be_true
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'returns false with a properly closed <p> tag' do
|
35
|
+
expect(described_class.new('<p></p>').contains_unclosed_tag?).to be_false
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'returns false with a properly closed <p> tag with an attribute' do
|
39
|
+
html = '<p style="margin: 20px"></p>'
|
40
|
+
expect(described_class.new(html).contains_unclosed_tag?).to be_false
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'returns false with a properly closed <p> tag with an attribute '\
|
44
|
+
'that contains stuff' do
|
45
|
+
html = '<p style="margin: 20px">blah</p>'
|
46
|
+
expect(described_class.new(html).contains_unclosed_tag?).to be_false
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'returns false with a self closing tag' do
|
50
|
+
expect(described_class.new('<img>').contains_unclosed_tag?).to be_false
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|