sms-htmldiff 0.0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,238 @@
1
+ module HTMLDiff
2
+ # This class is responsible for comparing the list of old and new words and
3
+ # coming up with a list of insert, delete and replace operations, which the
4
+ # builder will turn into presentable HTML output.
5
+ class MatchFinder
6
+ attr_accessor :old_words, :new_words
7
+
8
+ def initialize(old_words, new_words)
9
+ @old_words = old_words
10
+ @new_words = new_words
11
+ @matching_blocks = []
12
+ @new_word_indices = Hash.new { |h, word| h[word] = [] }
13
+ end
14
+
15
+ def operations
16
+ index_new_words
17
+ locate_matching_blocks
18
+ define_operations
19
+ @operations
20
+ end
21
+
22
+ # This leaves us with { first => [1], 'second' => [2, 3] } to tell us where
23
+ # in @new_words each word appears.
24
+ #
25
+ # %w(ant bat cat ant) => { ant => [0, 3], bat => 1, cat => 2}
26
+ def index_new_words
27
+ @new_words.each_with_index { |word, i| @new_word_indices[word.to_s] << i }
28
+ end
29
+
30
+ # This gets an array of the sections of the two strings that match, then
31
+ # returns an array of operations that need to be performed in order to
32
+ # build the HTML output that will show the diff.
33
+ #
34
+ # The method is to move along the old and new strings, marking the bits
35
+ # between the matched portions as insert, delete or replace by creating an
36
+ # instance of Operation for each one.
37
+ def define_operations
38
+ # Starting point of potential difference (end of last match, or start
39
+ # of string)
40
+ @position_in_old = @position_in_new = 0
41
+ @operations = []
42
+
43
+ @matching_blocks.each do |match|
44
+ create_operation_from(match)
45
+ end
46
+ end
47
+
48
+ # The returned array is of matches in the order in which they appear in the
49
+ # strings. Each array item is an instance of Match, which contains the
50
+ # start index of the match in @old_words, the start index in @new_words,
51
+ # and the length in number of words.
52
+ def locate_matching_blocks
53
+ recursively_find_matching_blocks_in_range(0, @old_words.count,
54
+ 0, @new_words.count)
55
+
56
+ # an empty match at the end forces the loop to make operations to handle
57
+ # the unmatched tails I'm sure it can be done more gracefully, but not at
58
+ # 23:52
59
+ @matching_blocks << HTMLDiff::Match.new(@old_words.count,
60
+ @new_words.count, 0)
61
+ end
62
+
63
+ # The first time this is called, it checks the whole of the two strings and
64
+ # finds the longest match between them.
65
+ # It then recursively checks the gaps that are left either side of the
66
+ # longest match, until there are no smaller matches.
67
+ def recursively_find_matching_blocks_in_range(start_in_old,
68
+ end_in_old,
69
+ start_in_new,
70
+ end_in_new)
71
+ # Longest match in the given range.
72
+ longest_match = find_longest_match_between_ranges(start_in_old,
73
+ end_in_old,
74
+ start_in_new,
75
+ end_in_new)
76
+ return unless longest_match.size > 0
77
+
78
+
79
+ if start_in_old < longest_match.start_in_old &&
80
+ start_in_new < longest_match.start_in_new
81
+ # The match is not at the start of either range.
82
+ # Search the gap before the longest match and add any smaller matches
83
+ # from there.
84
+ recursively_find_matching_blocks_in_range(start_in_old,
85
+ longest_match.start_in_old,
86
+ start_in_new,
87
+ longest_match.start_in_new)
88
+ end
89
+
90
+ # Add the longest match
91
+ @matching_blocks << longest_match
92
+
93
+ if longest_match.end_in_old < end_in_old &&
94
+ longest_match.end_in_new < end_in_new
95
+ # The match is not at the end of either range.
96
+ # Search the gap after the longest match and add any smaller matches
97
+ # from there
98
+ recursively_find_matching_blocks_in_range(longest_match.end_in_old,
99
+ end_in_old,
100
+ longest_match.end_in_new,
101
+ end_in_new)
102
+ end
103
+ end
104
+
105
+ # This will find the longest matching set of words when comparing the given
106
+ # ranges in @old_words and @new_words. This function is used recursively, so
107
+ # the variables should not be class variables.
108
+ #
109
+ # @return [HTMLDiff::Match]
110
+ def find_longest_match_between_ranges(start_in_old, end_in_old,
111
+ start_in_new, end_in_new)
112
+ best_match = HTMLDiff::Match.new 0, 0, 0
113
+ matches = []
114
+
115
+ # A match is a string of words which is in both @old_words and @new words
116
+ # at a certain position. Keep track of the length of matches starting at
117
+ # each index position in @new_words. e.g. if the match length at index
118
+ # 4 = 3, then that means that the fourth word in @new_words is the
119
+ # end of a 3-word-long match.
120
+ #
121
+ # If there are two matches of the same size, it'll get the first one.
122
+ match_lengths_at_previous_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
123
+
124
+ # Start at the beginning position in @old_words and move forwards one
125
+ # word at a time.
126
+ start_in_old.upto(end_in_old - 1) do |index_in_old|
127
+ # This will store the match lengths for all words so far up to the
128
+ # current word. Just looking at this word, the lengths will all be 1,
129
+ # so we check the match length for the preceding word in @new_words.
130
+ # If that is non-zero, it means that a previous match happened up to
131
+ # this point.
132
+ #
133
+ # If the current word is a continuation of a match, then we will
134
+ # increment the match length and store it for the current index
135
+ # position in @new_words. We replace the old hash because then we
136
+ # ignore the previous match that has now been extended and any that have
137
+ # stopped.
138
+ match_lengths_at_current_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
139
+
140
+ # Take the word which is at this position in @old_words,
141
+ # then for each position it occurs in within @new_words...
142
+ current_word_in_old = @old_words[index_in_old].to_s
143
+ @new_word_indices[current_word_in_old].each do |index_in_new|
144
+ # Skip if this position is before the start of the range we're
145
+ # checking.
146
+ next if index_in_new < start_in_new
147
+ # Since the indices in @new_words start at the earliest occurrence
148
+ # and are in order, if we are now after the end of the range we are
149
+ # checking, then all later occurrences can be ignored.
150
+ break if index_in_new >= end_in_new
151
+
152
+ # Add 1 to the length of the match we have for the previous word
153
+ # position in @new_words. i.e. we are moving along @old words,
154
+ # ticking off the words in @new_words as we go.
155
+ #
156
+ # Will be zero if the previous word in @new_words has not been marked
157
+ # as a match.
158
+
159
+ new_match_length = match_lengths_at_previous_index_positions_in_new[index_in_new - 1] + 1
160
+ match_lengths_at_current_index_positions_in_new[index_in_new] = new_match_length
161
+
162
+ # Keep track of the longest match so we can return it.
163
+ if new_match_length > best_match.size
164
+ start_of_best_match_in_old = index_in_old - new_match_length + 1
165
+ start_of_best_match_in_new = index_in_new - new_match_length + 1
166
+
167
+ best_match = HTMLDiff::Match.new(start_of_best_match_in_old,
168
+ start_of_best_match_in_new,
169
+ new_match_length)
170
+
171
+ # best_match = HTMLDiff::NewMatch.new(index_in_old - new_match_length + 1, index_in_old,
172
+ # index_in_new - new_match_length + 1, index_in_new
173
+ # )
174
+ end
175
+ end
176
+
177
+ # We have now added the current word to all the matches we had so far,
178
+ # making some of them longer by 1. Any matches that are shorter (didn't
179
+ # have the current word as the next word) are discarded.
180
+ match_lengths_at_previous_index_positions_in_new = match_lengths_at_current_index_positions_in_new
181
+ end
182
+
183
+ best_match
184
+ end
185
+
186
+ # @param [HTMLDiff::Match] match
187
+ def create_operation_from(match)
188
+ # We have a problem with single space matches found in between words
189
+ # which are otherwise different. If we find a match that is just a
190
+ # single space, then we should ignore it so that the # changes before
191
+ # and after it merge together.
192
+ old_text = @old_words[match.start_in_old...match.end_in_old].join
193
+ new_text = @new_words[match.start_in_new...match.end_in_new].join
194
+ return if old_text == ' ' && old_text == new_text
195
+
196
+ match_starts_at_current_position_in_old = (@position_in_old == match.start_in_old)
197
+ match_starts_at_current_position_in_new = (@position_in_new == match.start_in_new)
198
+
199
+ # Based on where the match starts and ends, work out what the preceding
200
+ # non-matching bit represents.
201
+ action_upto_match_positions =
202
+ case [match_starts_at_current_position_in_old,
203
+ match_starts_at_current_position_in_new]
204
+ when [false, false]
205
+ :replace
206
+ when [true, false]
207
+ :insert
208
+ when [false, true]
209
+ :delete
210
+ else
211
+ # this happens if the first few words are same in both versions
212
+ :none
213
+ end
214
+
215
+ # This operation will add the <ins> or <del> tag, plus the content
216
+ # that has changed.
217
+ if action_upto_match_positions != :none
218
+ operation_upto_match_positions =
219
+ Operation.new(action_upto_match_positions,
220
+ @old_words[@position_in_old...match.start_in_old],
221
+ @new_words[@position_in_new...match.start_in_new]
222
+ )
223
+ @operations << operation_upto_match_positions
224
+ end
225
+ if match.size != 0
226
+ match_operation = Operation.new(:equal,
227
+ @old_words[match.start_in_old...match.end_in_old],
228
+ @new_words[match.start_in_new...match.end_in_new]
229
+ )
230
+ @operations << match_operation
231
+ end
232
+
233
+ # Move to the end of the match (start of next difference).
234
+ @position_in_old = match.end_in_old
235
+ @position_in_new = match.end_in_new
236
+ end
237
+ end
238
+ end
@@ -0,0 +1,38 @@
1
+ module HTMLDiff
2
+ # An operation represents one difference between the old HTML and the new
3
+ # HTML. e.g. adding three letters.
4
+ # @param operation can be :insert, :delete or :equal
5
+
6
+ Operation = Struct.new(:action, :old_words, :new_words)
7
+
8
+ class Operation
9
+ # @!method action
10
+ # @!method start_in_old
11
+ # @!method end_in_old
12
+ # @!method start_in_new
13
+ # @!method end_in_new
14
+ # @!method old_words
15
+ # @!method new_words
16
+
17
+ # Ignores any attributes and tells us if the tag is the same e.g. <p> and
18
+ # <p style="margin: 2px;"> are the same.
19
+ def same_tag?
20
+ pattern = /<([^>\s]+)[\s>].*/
21
+ first_tagname = pattern.match(old_text) # nil means they are not tags
22
+ first_tagname = first_tagname[1] if first_tagname
23
+
24
+ second_tagname = pattern.match(new_text)
25
+ second_tagname = second_tagname[1] if second_tagname
26
+
27
+ first_tagname && (first_tagname == second_tagname)
28
+ end
29
+
30
+ def old_text
31
+ old_words.join
32
+ end
33
+
34
+ def new_text
35
+ new_words.join
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,57 @@
1
+ module HTMLDiff
2
+ # This class is responsible for representing one word in one of the HTML
3
+ # strings. Once the HTML has been transformed into words by the ListOfWords
4
+ # class, the diff algorithm then looks for what has changed. The idea is that
5
+ # rather than the standard diff which looks character by character, this will
6
+ # work around the HTML tags so that the output looks only at the text inside
7
+ # them.
8
+ class Word
9
+ def initialize(word = '')
10
+ @word = word
11
+ end
12
+
13
+ def <<(character)
14
+ @word << character
15
+ end
16
+
17
+ def empty?
18
+ @word.empty?
19
+ end
20
+
21
+ def standalone_tag?
22
+ @word.downcase =~ /<(img|hr|br)/
23
+ end
24
+
25
+ def iframe_tag?
26
+ (@word[0..7].downcase =~ %r{^<\/?iframe ?})
27
+ end
28
+
29
+ def tag?
30
+ opening_tag? || closing_tag? || standalone_tag?
31
+ end
32
+
33
+ def opening_tag?
34
+ @word =~ %r{[\s]*<[^\/]{1}[^>]*>\s*$}
35
+ end
36
+
37
+ def closing_tag?
38
+ @word =~ %r{^\s*</[^>]+>\s*$}
39
+ end
40
+
41
+ def block_tag?
42
+ @word =~ /^<div[^<]*class="[^"]*#{block_tag_class}[^"]*"/
43
+ end
44
+
45
+ def to_s
46
+ @word
47
+ end
48
+
49
+ def ==(other)
50
+ @word == other
51
+ end
52
+
53
+ def block_tag_class
54
+ @block_tag_class ||= 'block_tag'
55
+ end
56
+ end
57
+ end
data/lib/htmldiff.rb ADDED
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+ require_relative 'htmldiff/diff_builder'
3
+ require_relative 'htmldiff/match'
4
+ require_relative 'htmldiff/operation'
5
+ require_relative 'htmldiff/word'
6
+ require_relative 'htmldiff/list_of_words'
7
+ require_relative 'htmldiff/match_finder'
8
+
9
+ # Main module for namespacing the gem.
10
+ module HTMLDiff
11
+ def self.diff(old, new, options = {})
12
+ DiffBuilder.new(old, new, options).build
13
+ end
14
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Treating a block tag as a single item' do
4
+
5
+ it 'shows the whole div as an insert' do
6
+ oldv = '<p>text</p>'
7
+ newv = '<p>text<div class="block_tag"><img src="something" /></div></p>'
8
+ diff = HTMLDiff.diff(oldv, newv, {block_tag_classes: ['inserted']})
9
+ expect(diff).to eq('<p>text<ins class="diffins"><div class="block_tag"><img src="something" /></div></ins></p>')
10
+ end
11
+ end
@@ -0,0 +1,33 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'iframes' do
6
+ it 'wraps iframe inserts' do
7
+ oldv = 'a b c'
8
+ newv = 'a b <iframe src="some_url"></iframe> c'
9
+ diff = HTMLDiff.diff(oldv, newv)
10
+ expect(diff).to eq('a b <ins class="diffins"><iframe src="some_url"></iframe></ins><ins class="diffins"> </ins>c')
11
+ end
12
+
13
+ it 'wraps iframe inserts with extra stuff' do
14
+ oldv = ''
15
+ newv = '
16
+ <div class="iframe-wrap scribd">
17
+ <div class="iframe-aspect-ratio">
18
+ </div>
19
+ <iframe src="url"></iframe>
20
+ </div>
21
+ '
22
+ diff = HTMLDiff.diff(oldv, newv)
23
+ expect(diff).to eq('<ins class="diffins">
24
+ </ins><ins class="diffins"><div class="iframe-wrap scribd"><ins class="diffins">
25
+ </ins><div class="iframe-aspect-ratio"><ins class="diffins">
26
+ </ins></div><ins class="diffins">
27
+ </ins><ins class="diffins"><iframe src="url"></iframe></ins><ins class="diffins">
28
+ </ins></div><ins class="diffins">
29
+ </ins></ins>')
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,49 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'img tags' do
6
+ it 'should support img tags insertion' do
7
+ oldv = 'a b c'
8
+ newv = 'a b <img src="some_url" /> c'
9
+ diff = HTMLDiff.diff(oldv, newv)
10
+ expect(diff).to eq('a b <ins class="diffins"><img src="some_url" /></ins><ins class="diffins"> </ins>c')
11
+ end
12
+
13
+ it 'wraps img tags inside other tags' do
14
+ oldv = '<p>text</p>'
15
+ newv = '<p>text<img src="something" /></p>'
16
+ diff = HTMLDiff.diff(oldv, newv)
17
+ expect(diff).to eq('<p>text<ins class="diffins"><img src="something" /></ins></p>')
18
+ end
19
+
20
+ it 'wraps img tags inserted with other tags' do
21
+ oldv = 'text'
22
+ newv = 'text<p><img src="something" /></p>'
23
+ diff = HTMLDiff.diff(oldv, newv)
24
+ expect(diff).to eq('text<ins class="diffins"><p><ins class="diffins"><img src="something" /></ins></p></ins>')
25
+ end
26
+
27
+ it 'wraps img tags inserted with other tags and new lines' do
28
+ oldv = 'text'
29
+ newv = %(text<p>\r\n<img src="something" />\r\n</p>)
30
+ diff = HTMLDiff.diff(oldv, newv)
31
+ expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something" />\r\n</ins></p></ins>))
32
+ end
33
+
34
+ it 'wraps badly terminated img tags inserted with other tags and new lines' do
35
+ oldv = 'text'
36
+ newv = %(text<p>\r\n<img src="something">\r\n</p>)
37
+ diff = HTMLDiff.diff(oldv, newv)
38
+ expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something">\r\n</ins></p></ins>))
39
+ end
40
+
41
+ it 'supports img tags deletion' do
42
+ oldv = 'a b <img src="some_url" /> c'
43
+ newv = 'a b c'
44
+ diff = HTMLDiff.diff(oldv, newv)
45
+ expect(diff).to eq('a b <del class="diffdel"><img src="some_url" /></del><del class="diffdel"> </del>c')
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,60 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'simple tags' do
6
+ it 'wraps deleted tags' do
7
+ doc_a = '<p> Test Paragraph </p><p>More Stuff</p>'
8
+ doc_b = '<p>Nothing!</p>'
9
+ diff = HTMLDiff.diff(doc_a, doc_b)
10
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
11
+ end
12
+
13
+ it 'wraps inserted tags' do
14
+ doc_a = '<p>Nothing!</p>'
15
+ doc_b = '<p> Test Paragraph </p><p>More Stuff</p>'
16
+ diff = HTMLDiff.diff(doc_a, doc_b)
17
+ expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p><ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
18
+ end
19
+
20
+ describe 'wrapping deleted tags even with text around them' do
21
+ it 'changes inside plus deleted consecutive paragraph, leaving text afterwards' do
22
+ doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
23
+ doc_b = '<p>Nothing!</p>weee'
24
+ diff = HTMLDiff.diff(doc_a, doc_b)
25
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
26
+ end
27
+
28
+ it 'changes inside plus deleted consecutive paragraph, plus deleted consecutive text' do
29
+ doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
30
+ doc_b = '<p>Nothing!</p>'
31
+ diff = HTMLDiff.diff(doc_a, doc_b)
32
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">weee</del><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
33
+ end
34
+
35
+ it 'changes inside plus deleted consecutive paragraph, leaving text afterwards with some extra text' do
36
+ doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>asd'
37
+ doc_b = '<p>Nothing!</p>weee asd'
38
+ diff = HTMLDiff.diff(doc_a, doc_b)
39
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffmod"><p><del class="diffmod">More Stuff</del></p></del><ins class="diffmod"> </ins>asd')
40
+ end
41
+ end
42
+
43
+ it 'wraps inserted tags even with text around' do
44
+ doc_a = '<p>Nothing!</p>weee'
45
+ doc_b = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
46
+ diff = HTMLDiff.diff(doc_a, doc_b)
47
+ expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p>weee<ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
48
+ end
49
+
50
+ describe 'changing the attributes of tags' do
51
+ it 'ignores a tag with new attributes' do
52
+ doc_a = 'text <p>Nothing!</p> text'
53
+ doc_b = 'text <p style="margin-left: 20px">Nothing!</p> text'
54
+ diff = HTMLDiff.diff(doc_a, doc_b)
55
+ expect(diff).to eq('text <p style="margin-left: 20px">Nothing!</p> text')
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,47 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'tables' do
6
+ it 'wraps deleted table tags' do
7
+ doc_a = '<p> Test Paragraph </p>
8
+ <p> </p>
9
+ <table><tbody><tr><td>hello</td><td>bye</td></tr></tbody></table>
10
+ <p>&nbsp;</p>
11
+ '
12
+ doc_b = '<p>Nothing!</p>'
13
+ diff = HTMLDiff.diff(doc_a, doc_b)
14
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">
15
+ </del><del class="diffdel"><p><del class="diffdel"> </del></p><del class="diffdel">
16
+ </del><table><tbody><tr><td><del class="diffdel">hello</del></td><td><del class="diffdel">bye</del></td></tr></tbody></table><del class="diffdel">
17
+ </del><p><del class="diffdel">&nbsp;</del></p><del class="diffdel">
18
+ </del></del>')
19
+ end
20
+
21
+ it 'should wrap deleted table rows' do
22
+ doc_a = '<p>my table</p>
23
+ <table>
24
+ <tbody>
25
+ <tr><td>hello</td><td>bye</td></tr>
26
+ <tr><td>remove</td><td>me</td></tr>
27
+ </tbody>
28
+ </table>'
29
+ doc_b = '<p>my table</p>
30
+ <table>
31
+ <tbody>
32
+ <tr><td>hello</td><td>bye</td></tr>
33
+ </tbody>
34
+ </table>'
35
+ diff = HTMLDiff.diff(doc_a, doc_b)
36
+ expect(diff).to eq('<p>my table</p>
37
+ <table>
38
+ <tbody>
39
+ <tr><td>hello</td><td>bye</td></tr>
40
+ <del class="diffdel"><tr><td><del class="diffdel">remove</del></td>'\
41
+ '<td><del class="diffdel">me</del></td></tr><del class="diffdel">
42
+ </del></del></tbody>
43
+ </table>')
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,48 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'text' do
6
+ it 'should diff text' do
7
+ diff = HTMLDiff.diff('a word is here', 'a nother word is there')
8
+ expect(diff).to eq("a<ins class=\"diffins\"> nother</ins> word is "\
9
+ "<del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>")
10
+ end
11
+
12
+ it 'should insert a letter and a space' do
13
+ diff = HTMLDiff.diff('a c', 'a b c')
14
+ expect(diff).to eq("a <ins class=\"diffins\">b </ins>c")
15
+ end
16
+
17
+ it 'should remove a letter and a space' do
18
+ diff = HTMLDiff.diff('a b c', 'a c')
19
+ expect(diff).to eq("a <del class=\"diffdel\">b </del>c")
20
+ end
21
+
22
+ it 'should change a letter' do
23
+ diff = HTMLDiff.diff('a b c', 'a d c')
24
+ expect(diff).to eq("a <del class=\"diffmod\">b</del><ins "\
25
+ "class=\"diffmod\">d</ins> c")
26
+ end
27
+
28
+ it 'supports Chinese' do
29
+ diff = HTMLDiff.diff('这个是中文内容, Ruby is the bast',
30
+ '这是中国语内容,Ruby is the best language.')
31
+ expect(diff).to eq("这<del class=\"diffdel\">个</del>是中<del "\
32
+ "class=\"diffmod\">文</del><ins class=\"diffmod\">国语</ins>内容<del "\
33
+ "class=\"diffmod\">, Ruby</del><ins class=\"diffmod\">,Ruby</ins> is "\
34
+ "the <del class=\"diffmod\">bast</del><ins class=\"diffmod\">best "\
35
+ 'language.</ins>')
36
+ end
37
+
38
+ it 'puts long bit of replaced text together, rather than '\
39
+ 'breaking on word boundaries' do
40
+ diff = HTMLDiff.diff('a long bit of text',
41
+ 'some totally different text')
42
+ expected = '<del class="diffmod">a long bit of</del>'\
43
+ '<ins class="diffmod">some totally different</ins> text'
44
+ expect(diff).to eq(expected)
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,53 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe HTMLDiff::ListOfWords do
4
+ describe 'breaking tags up correctly' do
5
+ it 'separates tags' do
6
+ input = '<p>input</p>'
7
+ words_as_array = HTMLDiff::ListOfWords.new(input).to_a.map(&:to_s)
8
+ expect(words_as_array).to eq %w(<p> input </p>)
9
+ end
10
+
11
+ it 'separates block tags' do
12
+ input = '<p>text<div class="block_tag"><img src="something" /></div></p>'
13
+ words_as_array = HTMLDiff::ListOfWords.new(input, {block_tag_class: 'inserted'}).to_a.map(&:to_s)
14
+ expect(words_as_array).to eq ['<p>', 'text', '<div class="block_tag"><img src="something" /></div>', '</p>']
15
+ end
16
+ end
17
+
18
+ describe 'contains_unclosed_tag?' do
19
+ it 'returns true with an open <p> tag' do
20
+ expect(described_class.new('<p>').contains_unclosed_tag?).to be_true
21
+ end
22
+
23
+ it 'returns true with an unclosed closed <p> tag with an attribute' do
24
+ html = '<p style="margin: 20px">'
25
+ expect(described_class.new(html).contains_unclosed_tag?).to be_true
26
+ end
27
+
28
+ it 'returns true with an unclosed closed <p> tag with an attribute '\
29
+ 'that contains stuff' do
30
+ html = '<p style="margin: 20px">blah'
31
+ expect(described_class.new(html).contains_unclosed_tag?).to be_true
32
+ end
33
+
34
+ it 'returns false with a properly closed <p> tag' do
35
+ expect(described_class.new('<p></p>').contains_unclosed_tag?).to be_false
36
+ end
37
+
38
+ it 'returns false with a properly closed <p> tag with an attribute' do
39
+ html = '<p style="margin: 20px"></p>'
40
+ expect(described_class.new(html).contains_unclosed_tag?).to be_false
41
+ end
42
+
43
+ it 'returns false with a properly closed <p> tag with an attribute '\
44
+ 'that contains stuff' do
45
+ html = '<p style="margin: 20px">blah</p>'
46
+ expect(described_class.new(html).contains_unclosed_tag?).to be_false
47
+ end
48
+
49
+ it 'returns false with a self closing tag' do
50
+ expect(described_class.new('<img>').contains_unclosed_tag?).to be_false
51
+ end
52
+ end
53
+ end