sms-htmldiff 0.0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,238 @@
1
+ module HTMLDiff
2
+ # This class is responsible for comparing the list of old and new words and
3
+ # coming up with a list of insert, delete and replace operations, which the
4
+ # builder will turn into presentable HTML output.
5
+ class MatchFinder
6
+ attr_accessor :old_words, :new_words
7
+
8
+ def initialize(old_words, new_words)
9
+ @old_words = old_words
10
+ @new_words = new_words
11
+ @matching_blocks = []
12
+ @new_word_indices = Hash.new { |h, word| h[word] = [] }
13
+ end
14
+
15
+ def operations
16
+ index_new_words
17
+ locate_matching_blocks
18
+ define_operations
19
+ @operations
20
+ end
21
+
22
+ # This leaves us with { first => [1], 'second' => [2, 3] } to tell us where
23
+ # in @new_words each word appears.
24
+ #
25
+ # %w(ant bat cat ant) => { ant => [0, 3], bat => 1, cat => 2}
26
+ def index_new_words
27
+ @new_words.each_with_index { |word, i| @new_word_indices[word.to_s] << i }
28
+ end
29
+
30
+ # This gets an array of the sections of the two strings that match, then
31
+ # returns an array of operations that need to be performed in order to
32
+ # build the HTML output that will show the diff.
33
+ #
34
+ # The method is to move along the old and new strings, marking the bits
35
+ # between the matched portions as insert, delete or replace by creating an
36
+ # instance of Operation for each one.
37
+ def define_operations
38
+ # Starting point of potential difference (end of last match, or start
39
+ # of string)
40
+ @position_in_old = @position_in_new = 0
41
+ @operations = []
42
+
43
+ @matching_blocks.each do |match|
44
+ create_operation_from(match)
45
+ end
46
+ end
47
+
48
+ # The returned array is of matches in the order in which they appear in the
49
+ # strings. Each array item is an instance of Match, which contains the
50
+ # start index of the match in @old_words, the start index in @new_words,
51
+ # and the length in number of words.
52
+ def locate_matching_blocks
53
+ recursively_find_matching_blocks_in_range(0, @old_words.count,
54
+ 0, @new_words.count)
55
+
56
+ # an empty match at the end forces the loop to make operations to handle
57
+ # the unmatched tails I'm sure it can be done more gracefully, but not at
58
+ # 23:52
59
+ @matching_blocks << HTMLDiff::Match.new(@old_words.count,
60
+ @new_words.count, 0)
61
+ end
62
+
63
+ # The first time this is called, it checks the whole of the two strings and
64
+ # finds the longest match between them.
65
+ # It then recursively checks the gaps that are left either side of the
66
+ # longest match, until there are no smaller matches.
67
+ def recursively_find_matching_blocks_in_range(start_in_old,
68
+ end_in_old,
69
+ start_in_new,
70
+ end_in_new)
71
+ # Longest match in the given range.
72
+ longest_match = find_longest_match_between_ranges(start_in_old,
73
+ end_in_old,
74
+ start_in_new,
75
+ end_in_new)
76
+ return unless longest_match.size > 0
77
+
78
+
79
+ if start_in_old < longest_match.start_in_old &&
80
+ start_in_new < longest_match.start_in_new
81
+ # The match is not at the start of either range.
82
+ # Search the gap before the longest match and add any smaller matches
83
+ # from there.
84
+ recursively_find_matching_blocks_in_range(start_in_old,
85
+ longest_match.start_in_old,
86
+ start_in_new,
87
+ longest_match.start_in_new)
88
+ end
89
+
90
+ # Add the longest match
91
+ @matching_blocks << longest_match
92
+
93
+ if longest_match.end_in_old < end_in_old &&
94
+ longest_match.end_in_new < end_in_new
95
+ # The match is not at the end of either range.
96
+ # Search the gap after the longest match and add any smaller matches
97
+ # from there
98
+ recursively_find_matching_blocks_in_range(longest_match.end_in_old,
99
+ end_in_old,
100
+ longest_match.end_in_new,
101
+ end_in_new)
102
+ end
103
+ end
104
+
105
+ # This will find the longest matching set of words when comparing the given
106
+ # ranges in @old_words and @new_words. This function is used recursively, so
107
+ # the variables should not be class variables.
108
+ #
109
+ # @return [HTMLDiff::Match]
110
+ def find_longest_match_between_ranges(start_in_old, end_in_old,
111
+ start_in_new, end_in_new)
112
+ best_match = HTMLDiff::Match.new 0, 0, 0
113
+ matches = []
114
+
115
+ # A match is a string of words which is in both @old_words and @new words
116
+ # at a certain position. Keep track of the length of matches starting at
117
+ # each index position in @new_words. e.g. if the match length at index
118
+ # 4 = 3, then that means that the fourth word in @new_words is the
119
+ # end of a 3-word-long match.
120
+ #
121
+ # If there are two matches of the same size, it'll get the first one.
122
+ match_lengths_at_previous_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
123
+
124
+ # Start at the beginning position in @old_words and move forwards one
125
+ # word at a time.
126
+ start_in_old.upto(end_in_old - 1) do |index_in_old|
127
+ # This will store the match lengths for all words so far up to the
128
+ # current word. Just looking at this word, the lengths will all be 1,
129
+ # so we check the match length for the preceding word in @new_words.
130
+ # If that is non-zero, it means that a previous match happened up to
131
+ # this point.
132
+ #
133
+ # If the current word is a continuation of a match, then we will
134
+ # increment the match length and store it for the current index
135
+ # position in @new_words. We replace the old hash because then we
136
+ # ignore the previous match that has now been extended and any that have
137
+ # stopped.
138
+ match_lengths_at_current_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
139
+
140
+ # Take the word which is at this position in @old_words,
141
+ # then for each position it occurs in within @new_words...
142
+ current_word_in_old = @old_words[index_in_old].to_s
143
+ @new_word_indices[current_word_in_old].each do |index_in_new|
144
+ # Skip if this position is before the start of the range we're
145
+ # checking.
146
+ next if index_in_new < start_in_new
147
+ # Since the indices in @new_words start at the earliest occurrence
148
+ # and are in order, if we are now after the end of the range we are
149
+ # checking, then all later occurrences can be ignored.
150
+ break if index_in_new >= end_in_new
151
+
152
+ # Add 1 to the length of the match we have for the previous word
153
+ # position in @new_words. i.e. we are moving along @old words,
154
+ # ticking off the words in @new_words as we go.
155
+ #
156
+ # Will be zero if the previous word in @new_words has not been marked
157
+ # as a match.
158
+
159
+ new_match_length = match_lengths_at_previous_index_positions_in_new[index_in_new - 1] + 1
160
+ match_lengths_at_current_index_positions_in_new[index_in_new] = new_match_length
161
+
162
+ # Keep track of the longest match so we can return it.
163
+ if new_match_length > best_match.size
164
+ start_of_best_match_in_old = index_in_old - new_match_length + 1
165
+ start_of_best_match_in_new = index_in_new - new_match_length + 1
166
+
167
+ best_match = HTMLDiff::Match.new(start_of_best_match_in_old,
168
+ start_of_best_match_in_new,
169
+ new_match_length)
170
+
171
+ # best_match = HTMLDiff::NewMatch.new(index_in_old - new_match_length + 1, index_in_old,
172
+ # index_in_new - new_match_length + 1, index_in_new
173
+ # )
174
+ end
175
+ end
176
+
177
+ # We have now added the current word to all the matches we had so far,
178
+ # making some of them longer by 1. Any matches that are shorter (didn't
179
+ # have the current word as the next word) are discarded.
180
+ match_lengths_at_previous_index_positions_in_new = match_lengths_at_current_index_positions_in_new
181
+ end
182
+
183
+ best_match
184
+ end
185
+
186
+ # @param [HTMLDiff::Match] match
187
+ def create_operation_from(match)
188
+ # We have a problem with single space matches found in between words
189
+ # which are otherwise different. If we find a match that is just a
190
+ # single space, then we should ignore it so that the # changes before
191
+ # and after it merge together.
192
+ old_text = @old_words[match.start_in_old...match.end_in_old].join
193
+ new_text = @new_words[match.start_in_new...match.end_in_new].join
194
+ return if old_text == ' ' && old_text == new_text
195
+
196
+ match_starts_at_current_position_in_old = (@position_in_old == match.start_in_old)
197
+ match_starts_at_current_position_in_new = (@position_in_new == match.start_in_new)
198
+
199
+ # Based on where the match starts and ends, work out what the preceding
200
+ # non-matching bit represents.
201
+ action_upto_match_positions =
202
+ case [match_starts_at_current_position_in_old,
203
+ match_starts_at_current_position_in_new]
204
+ when [false, false]
205
+ :replace
206
+ when [true, false]
207
+ :insert
208
+ when [false, true]
209
+ :delete
210
+ else
211
+ # this happens if the first few words are same in both versions
212
+ :none
213
+ end
214
+
215
+ # This operation will add the <ins> or <del> tag, plus the content
216
+ # that has changed.
217
+ if action_upto_match_positions != :none
218
+ operation_upto_match_positions =
219
+ Operation.new(action_upto_match_positions,
220
+ @old_words[@position_in_old...match.start_in_old],
221
+ @new_words[@position_in_new...match.start_in_new]
222
+ )
223
+ @operations << operation_upto_match_positions
224
+ end
225
+ if match.size != 0
226
+ match_operation = Operation.new(:equal,
227
+ @old_words[match.start_in_old...match.end_in_old],
228
+ @new_words[match.start_in_new...match.end_in_new]
229
+ )
230
+ @operations << match_operation
231
+ end
232
+
233
+ # Move to the end of the match (start of next difference).
234
+ @position_in_old = match.end_in_old
235
+ @position_in_new = match.end_in_new
236
+ end
237
+ end
238
+ end
@@ -0,0 +1,38 @@
1
+ module HTMLDiff
2
+ # An operation represents one difference between the old HTML and the new
3
+ # HTML. e.g. adding three letters.
4
+ # @param operation can be :insert, :delete or :equal
5
+
6
+ Operation = Struct.new(:action, :old_words, :new_words)
7
+
8
+ class Operation
9
+ # @!method action
10
+ # @!method start_in_old
11
+ # @!method end_in_old
12
+ # @!method start_in_new
13
+ # @!method end_in_new
14
+ # @!method old_words
15
+ # @!method new_words
16
+
17
+ # Ignores any attributes and tells us if the tag is the same e.g. <p> and
18
+ # <p style="margin: 2px;"> are the same.
19
+ def same_tag?
20
+ pattern = /<([^>\s]+)[\s>].*/
21
+ first_tagname = pattern.match(old_text) # nil means they are not tags
22
+ first_tagname = first_tagname[1] if first_tagname
23
+
24
+ second_tagname = pattern.match(new_text)
25
+ second_tagname = second_tagname[1] if second_tagname
26
+
27
+ first_tagname && (first_tagname == second_tagname)
28
+ end
29
+
30
+ def old_text
31
+ old_words.join
32
+ end
33
+
34
+ def new_text
35
+ new_words.join
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,57 @@
1
+ module HTMLDiff
2
+ # This class is responsible for representing one word in one of the HTML
3
+ # strings. Once the HTML has been transformed into words by the ListOfWords
4
+ # class, the diff algorithm then looks for what has changed. The idea is that
5
+ # rather than the standard diff which looks character by character, this will
6
+ # work around the HTML tags so that the output looks only at the text inside
7
+ # them.
8
+ class Word
9
+ def initialize(word = '')
10
+ @word = word
11
+ end
12
+
13
+ def <<(character)
14
+ @word << character
15
+ end
16
+
17
+ def empty?
18
+ @word.empty?
19
+ end
20
+
21
+ def standalone_tag?
22
+ @word.downcase =~ /<(img|hr|br)/
23
+ end
24
+
25
+ def iframe_tag?
26
+ (@word[0..7].downcase =~ %r{^<\/?iframe ?})
27
+ end
28
+
29
+ def tag?
30
+ opening_tag? || closing_tag? || standalone_tag?
31
+ end
32
+
33
+ def opening_tag?
34
+ @word =~ %r{[\s]*<[^\/]{1}[^>]*>\s*$}
35
+ end
36
+
37
+ def closing_tag?
38
+ @word =~ %r{^\s*</[^>]+>\s*$}
39
+ end
40
+
41
+ def block_tag?
42
+ @word =~ /^<div[^<]*class="[^"]*#{block_tag_class}[^"]*"/
43
+ end
44
+
45
+ def to_s
46
+ @word
47
+ end
48
+
49
+ def ==(other)
50
+ @word == other
51
+ end
52
+
53
+ def block_tag_class
54
+ @block_tag_class ||= 'block_tag'
55
+ end
56
+ end
57
+ end
data/lib/htmldiff.rb ADDED
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+ require_relative 'htmldiff/diff_builder'
3
+ require_relative 'htmldiff/match'
4
+ require_relative 'htmldiff/operation'
5
+ require_relative 'htmldiff/word'
6
+ require_relative 'htmldiff/list_of_words'
7
+ require_relative 'htmldiff/match_finder'
8
+
9
+ # Main module for namespacing the gem.
10
+ module HTMLDiff
11
+ def self.diff(old, new, options = {})
12
+ DiffBuilder.new(old, new, options).build
13
+ end
14
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Treating a block tag as a single item' do
4
+
5
+ it 'shows the whole div as an insert' do
6
+ oldv = '<p>text</p>'
7
+ newv = '<p>text<div class="block_tag"><img src="something" /></div></p>'
8
+ diff = HTMLDiff.diff(oldv, newv, {block_tag_classes: ['inserted']})
9
+ expect(diff).to eq('<p>text<ins class="diffins"><div class="block_tag"><img src="something" /></div></ins></p>')
10
+ end
11
+ end
@@ -0,0 +1,33 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'iframes' do
6
+ it 'wraps iframe inserts' do
7
+ oldv = 'a b c'
8
+ newv = 'a b <iframe src="some_url"></iframe> c'
9
+ diff = HTMLDiff.diff(oldv, newv)
10
+ expect(diff).to eq('a b <ins class="diffins"><iframe src="some_url"></iframe></ins><ins class="diffins"> </ins>c')
11
+ end
12
+
13
+ it 'wraps iframe inserts with extra stuff' do
14
+ oldv = ''
15
+ newv = '
16
+ <div class="iframe-wrap scribd">
17
+ <div class="iframe-aspect-ratio">
18
+ </div>
19
+ <iframe src="url"></iframe>
20
+ </div>
21
+ '
22
+ diff = HTMLDiff.diff(oldv, newv)
23
+ expect(diff).to eq('<ins class="diffins">
24
+ </ins><ins class="diffins"><div class="iframe-wrap scribd"><ins class="diffins">
25
+ </ins><div class="iframe-aspect-ratio"><ins class="diffins">
26
+ </ins></div><ins class="diffins">
27
+ </ins><ins class="diffins"><iframe src="url"></iframe></ins><ins class="diffins">
28
+ </ins></div><ins class="diffins">
29
+ </ins></ins>')
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,49 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'img tags' do
6
+ it 'should support img tags insertion' do
7
+ oldv = 'a b c'
8
+ newv = 'a b <img src="some_url" /> c'
9
+ diff = HTMLDiff.diff(oldv, newv)
10
+ expect(diff).to eq('a b <ins class="diffins"><img src="some_url" /></ins><ins class="diffins"> </ins>c')
11
+ end
12
+
13
+ it 'wraps img tags inside other tags' do
14
+ oldv = '<p>text</p>'
15
+ newv = '<p>text<img src="something" /></p>'
16
+ diff = HTMLDiff.diff(oldv, newv)
17
+ expect(diff).to eq('<p>text<ins class="diffins"><img src="something" /></ins></p>')
18
+ end
19
+
20
+ it 'wraps img tags inserted with other tags' do
21
+ oldv = 'text'
22
+ newv = 'text<p><img src="something" /></p>'
23
+ diff = HTMLDiff.diff(oldv, newv)
24
+ expect(diff).to eq('text<ins class="diffins"><p><ins class="diffins"><img src="something" /></ins></p></ins>')
25
+ end
26
+
27
+ it 'wraps img tags inserted with other tags and new lines' do
28
+ oldv = 'text'
29
+ newv = %(text<p>\r\n<img src="something" />\r\n</p>)
30
+ diff = HTMLDiff.diff(oldv, newv)
31
+ expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something" />\r\n</ins></p></ins>))
32
+ end
33
+
34
+ it 'wraps badly terminated img tags inserted with other tags and new lines' do
35
+ oldv = 'text'
36
+ newv = %(text<p>\r\n<img src="something">\r\n</p>)
37
+ diff = HTMLDiff.diff(oldv, newv)
38
+ expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something">\r\n</ins></p></ins>))
39
+ end
40
+
41
+ it 'supports img tags deletion' do
42
+ oldv = 'a b <img src="some_url" /> c'
43
+ newv = 'a b c'
44
+ diff = HTMLDiff.diff(oldv, newv)
45
+ expect(diff).to eq('a b <del class="diffdel"><img src="some_url" /></del><del class="diffdel"> </del>c')
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,60 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'simple tags' do
6
+ it 'wraps deleted tags' do
7
+ doc_a = '<p> Test Paragraph </p><p>More Stuff</p>'
8
+ doc_b = '<p>Nothing!</p>'
9
+ diff = HTMLDiff.diff(doc_a, doc_b)
10
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
11
+ end
12
+
13
+ it 'wraps inserted tags' do
14
+ doc_a = '<p>Nothing!</p>'
15
+ doc_b = '<p> Test Paragraph </p><p>More Stuff</p>'
16
+ diff = HTMLDiff.diff(doc_a, doc_b)
17
+ expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p><ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
18
+ end
19
+
20
+ describe 'wrapping deleted tags even with text around them' do
21
+ it 'changes inside plus deleted consecutive paragraph, leaving text afterwards' do
22
+ doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
23
+ doc_b = '<p>Nothing!</p>weee'
24
+ diff = HTMLDiff.diff(doc_a, doc_b)
25
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
26
+ end
27
+
28
+ it 'changes inside plus deleted consecutive paragraph, plus deleted consecutive text' do
29
+ doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
30
+ doc_b = '<p>Nothing!</p>'
31
+ diff = HTMLDiff.diff(doc_a, doc_b)
32
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">weee</del><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
33
+ end
34
+
35
+ it 'changes inside plus deleted consecutive paragraph, leaving text afterwards with some extra text' do
36
+ doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>asd'
37
+ doc_b = '<p>Nothing!</p>weee asd'
38
+ diff = HTMLDiff.diff(doc_a, doc_b)
39
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffmod"><p><del class="diffmod">More Stuff</del></p></del><ins class="diffmod"> </ins>asd')
40
+ end
41
+ end
42
+
43
+ it 'wraps inserted tags even with text around' do
44
+ doc_a = '<p>Nothing!</p>weee'
45
+ doc_b = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
46
+ diff = HTMLDiff.diff(doc_a, doc_b)
47
+ expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p>weee<ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
48
+ end
49
+
50
+ describe 'changing the attributes of tags' do
51
+ it 'ignores a tag with new attributes' do
52
+ doc_a = 'text <p>Nothing!</p> text'
53
+ doc_b = 'text <p style="margin-left: 20px">Nothing!</p> text'
54
+ diff = HTMLDiff.diff(doc_a, doc_b)
55
+ expect(diff).to eq('text <p style="margin-left: 20px">Nothing!</p> text')
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,47 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'tables' do
6
+ it 'wraps deleted table tags' do
7
+ doc_a = '<p> Test Paragraph </p>
8
+ <p> </p>
9
+ <table><tbody><tr><td>hello</td><td>bye</td></tr></tbody></table>
10
+ <p>&nbsp;</p>
11
+ '
12
+ doc_b = '<p>Nothing!</p>'
13
+ diff = HTMLDiff.diff(doc_a, doc_b)
14
+ expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">
15
+ </del><del class="diffdel"><p><del class="diffdel"> </del></p><del class="diffdel">
16
+ </del><table><tbody><tr><td><del class="diffdel">hello</del></td><td><del class="diffdel">bye</del></td></tr></tbody></table><del class="diffdel">
17
+ </del><p><del class="diffdel">&nbsp;</del></p><del class="diffdel">
18
+ </del></del>')
19
+ end
20
+
21
+ it 'should wrap deleted table rows' do
22
+ doc_a = '<p>my table</p>
23
+ <table>
24
+ <tbody>
25
+ <tr><td>hello</td><td>bye</td></tr>
26
+ <tr><td>remove</td><td>me</td></tr>
27
+ </tbody>
28
+ </table>'
29
+ doc_b = '<p>my table</p>
30
+ <table>
31
+ <tbody>
32
+ <tr><td>hello</td><td>bye</td></tr>
33
+ </tbody>
34
+ </table>'
35
+ diff = HTMLDiff.diff(doc_a, doc_b)
36
+ expect(diff).to eq('<p>my table</p>
37
+ <table>
38
+ <tbody>
39
+ <tr><td>hello</td><td>bye</td></tr>
40
+ <del class="diffdel"><tr><td><del class="diffdel">remove</del></td>'\
41
+ '<td><del class="diffdel">me</del></td></tr><del class="diffdel">
42
+ </del></del></tbody>
43
+ </table>')
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,48 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe 'HTMLDiff' do
4
+ describe 'diff' do
5
+ describe 'text' do
6
+ it 'should diff text' do
7
+ diff = HTMLDiff.diff('a word is here', 'a nother word is there')
8
+ expect(diff).to eq("a<ins class=\"diffins\"> nother</ins> word is "\
9
+ "<del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>")
10
+ end
11
+
12
+ it 'should insert a letter and a space' do
13
+ diff = HTMLDiff.diff('a c', 'a b c')
14
+ expect(diff).to eq("a <ins class=\"diffins\">b </ins>c")
15
+ end
16
+
17
+ it 'should remove a letter and a space' do
18
+ diff = HTMLDiff.diff('a b c', 'a c')
19
+ expect(diff).to eq("a <del class=\"diffdel\">b </del>c")
20
+ end
21
+
22
+ it 'should change a letter' do
23
+ diff = HTMLDiff.diff('a b c', 'a d c')
24
+ expect(diff).to eq("a <del class=\"diffmod\">b</del><ins "\
25
+ "class=\"diffmod\">d</ins> c")
26
+ end
27
+
28
+ it 'supports Chinese' do
29
+ diff = HTMLDiff.diff('这个是中文内容, Ruby is the bast',
30
+ '这是中国语内容,Ruby is the best language.')
31
+ expect(diff).to eq("这<del class=\"diffdel\">个</del>是中<del "\
32
+ "class=\"diffmod\">文</del><ins class=\"diffmod\">国语</ins>内容<del "\
33
+ "class=\"diffmod\">, Ruby</del><ins class=\"diffmod\">,Ruby</ins> is "\
34
+ "the <del class=\"diffmod\">bast</del><ins class=\"diffmod\">best "\
35
+ 'language.</ins>')
36
+ end
37
+
38
+ it 'puts long bit of replaced text together, rather than '\
39
+ 'breaking on word boundaries' do
40
+ diff = HTMLDiff.diff('a long bit of text',
41
+ 'some totally different text')
42
+ expected = '<del class="diffmod">a long bit of</del>'\
43
+ '<ins class="diffmod">some totally different</ins> text'
44
+ expect(diff).to eq(expected)
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,53 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe HTMLDiff::ListOfWords do
4
+ describe 'breaking tags up correctly' do
5
+ it 'separates tags' do
6
+ input = '<p>input</p>'
7
+ words_as_array = HTMLDiff::ListOfWords.new(input).to_a.map(&:to_s)
8
+ expect(words_as_array).to eq %w(<p> input </p>)
9
+ end
10
+
11
+ it 'separates block tags' do
12
+ input = '<p>text<div class="block_tag"><img src="something" /></div></p>'
13
+ words_as_array = HTMLDiff::ListOfWords.new(input, {block_tag_class: 'inserted'}).to_a.map(&:to_s)
14
+ expect(words_as_array).to eq ['<p>', 'text', '<div class="block_tag"><img src="something" /></div>', '</p>']
15
+ end
16
+ end
17
+
18
+ describe 'contains_unclosed_tag?' do
19
+ it 'returns true with an open <p> tag' do
20
+ expect(described_class.new('<p>').contains_unclosed_tag?).to be_true
21
+ end
22
+
23
+ it 'returns true with an unclosed closed <p> tag with an attribute' do
24
+ html = '<p style="margin: 20px">'
25
+ expect(described_class.new(html).contains_unclosed_tag?).to be_true
26
+ end
27
+
28
+ it 'returns true with an unclosed closed <p> tag with an attribute '\
29
+ 'that contains stuff' do
30
+ html = '<p style="margin: 20px">blah'
31
+ expect(described_class.new(html).contains_unclosed_tag?).to be_true
32
+ end
33
+
34
+ it 'returns false with a properly closed <p> tag' do
35
+ expect(described_class.new('<p></p>').contains_unclosed_tag?).to be_false
36
+ end
37
+
38
+ it 'returns false with a properly closed <p> tag with an attribute' do
39
+ html = '<p style="margin: 20px"></p>'
40
+ expect(described_class.new(html).contains_unclosed_tag?).to be_false
41
+ end
42
+
43
+ it 'returns false with a properly closed <p> tag with an attribute '\
44
+ 'that contains stuff' do
45
+ html = '<p style="margin: 20px">blah</p>'
46
+ expect(described_class.new(html).contains_unclosed_tag?).to be_false
47
+ end
48
+
49
+ it 'returns false with a self closing tag' do
50
+ expect(described_class.new('<img>').contains_unclosed_tag?).to be_false
51
+ end
52
+ end
53
+ end