RubyGems - sms-htmldiff - Versions diffs - 0.0.1.1 - Mend

sms-htmldiff 0.0.1.1

Files changed (25) hide show

checksums.yaml +7 -0
data/Gemfile +6 -0
data/Gemfile.lock +24 -0
data/LICENSE +20 -0
data/README.md +35 -0
data/Rakefile +54 -0
data/htmldiff.gemspec +25 -0
data/lib/htmldiff/diff_builder.rb +156 -0
data/lib/htmldiff/list_of_words.rb +182 -0
data/lib/htmldiff/match.rb +17 -0
data/lib/htmldiff/match_finder.rb +238 -0
data/lib/htmldiff/operation.rb +38 -0
data/lib/htmldiff/word.rb +57 -0
data/lib/htmldiff.rb +14 -0
data/spec/diffing_output/block_tag_spec.rb +11 -0
data/spec/diffing_output/iframes_spec.rb +33 -0
data/spec/diffing_output/img_tags_spec.rb +49 -0
data/spec/diffing_output/paragraph_tags_spec.rb +60 -0
data/spec/diffing_output/tables_spec.rb +47 -0
data/spec/diffing_output/text_spec.rb +48 -0
data/spec/list_of_words_spec.rb +53 -0
data/spec/operation_spec.rb +45 -0
data/spec/spec_helper.rb +3 -0
data/spec/word_spec.rb +31 -0
metadata +93 -0

data/lib/htmldiff/match_finder.rb ADDED Viewed

@@ -0,0 +1,238 @@
+module HTMLDiff
+  # This class is responsible for comparing the list of old and new words and
+  # coming up with a list of insert, delete and replace operations, which the
+  # builder will turn into presentable HTML output.
+  class MatchFinder
+    attr_accessor :old_words, :new_words
+    def initialize(old_words, new_words)
+      @old_words = old_words
+      @new_words = new_words
+      @matching_blocks = []
+      @new_word_indices = Hash.new { |h, word| h[word] = [] }
+    end
+    def operations
+      index_new_words
+      locate_matching_blocks
+      define_operations
+      @operations
+    end
+    # This leaves us with { first => [1], 'second' => [2, 3] } to tell us where
+    # in @new_words each word appears.
+    #
+    # %w(ant bat cat ant) => { ant => [0, 3], bat => 1, cat => 2}
+    def index_new_words
+      @new_words.each_with_index { |word, i| @new_word_indices[word.to_s] << i }
+    end
+    # This gets an array of the sections of the two strings that match, then
+    # returns an array of operations that need to be performed in order to
+    # build the HTML output that will show the diff.
+    #
+    # The method is to move along the old and new strings, marking the bits
+    # between the matched portions as insert, delete or replace by creating an
+    # instance of Operation for each one.
+    def define_operations
+      # Starting point of potential difference (end of last match, or start
+      # of string)
+      @position_in_old = @position_in_new = 0
+      @operations = []
+      @matching_blocks.each do |match|
+        create_operation_from(match)
+      end
+    end
+    # The returned array is of matches in the order in which they appear in the
+    # strings. Each array item is an instance of Match, which contains the
+    # start index of the match in @old_words, the start index in @new_words,
+    # and the length in number of words.
+    def locate_matching_blocks
+      recursively_find_matching_blocks_in_range(0, @old_words.count,
+                                                0, @new_words.count)
+      # an empty match at the end forces the loop to make operations to handle
+      # the unmatched tails I'm sure it can be done more gracefully, but not at
+      # 23:52
+      @matching_blocks << HTMLDiff::Match.new(@old_words.count,
+                                              @new_words.count, 0)
+    end
+    # The first time this is called, it checks the whole of the two strings and
+    # finds the longest match between them.
+    # It then recursively checks the gaps that are left either side of the
+    # longest match, until there are no smaller matches.
+    def recursively_find_matching_blocks_in_range(start_in_old,
+                                                  end_in_old,
+                                                  start_in_new,
+                                                  end_in_new)
+      # Longest match in the given range.
+      longest_match = find_longest_match_between_ranges(start_in_old,
+                                                        end_in_old,
+                                                        start_in_new,
+                                                        end_in_new)
+      return unless longest_match.size > 0
+      if start_in_old < longest_match.start_in_old &&
+         start_in_new < longest_match.start_in_new
+        # The match is not at the start of either range.
+        # Search the gap before the longest match and add any smaller matches
+        # from there.
+        recursively_find_matching_blocks_in_range(start_in_old,
+                                                  longest_match.start_in_old,
+                                                  start_in_new,
+                                                  longest_match.start_in_new)
+      end
+      # Add the longest match
+      @matching_blocks << longest_match
+      if longest_match.end_in_old < end_in_old &&
+         longest_match.end_in_new < end_in_new
+        # The match is not at the end of either range.
+        # Search the gap after the longest match and add any smaller matches
+        # from there
+        recursively_find_matching_blocks_in_range(longest_match.end_in_old,
+                                                  end_in_old,
+                                                  longest_match.end_in_new,
+                                                  end_in_new)
+      end
+    end
+    # This will find the longest matching set of words when comparing the given
+    # ranges in @old_words and @new_words. This function is used recursively, so
+    # the variables should not be class variables.
+    #
+    # @return [HTMLDiff::Match]
+    def find_longest_match_between_ranges(start_in_old, end_in_old,
+                                          start_in_new, end_in_new)
+      best_match = HTMLDiff::Match.new 0, 0, 0
+      matches = []
+      # A match is a string of words which is in both @old_words and @new words
+      # at a certain position. Keep track of the length of matches starting at
+      # each index position in @new_words. e.g. if the match length at index
+      # 4 = 3, then that means that the fourth word in @new_words is the
+      # end of a 3-word-long match.
+      #
+      # If there are two matches of the same size, it'll get the first one.
+      match_lengths_at_previous_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
+      # Start at the beginning position in @old_words and move forwards one
+      # word at a time.
+      start_in_old.upto(end_in_old - 1) do |index_in_old|
+        # This will store the match lengths for all words so far up to the
+        # current word. Just looking at this word, the lengths will all be 1,
+        # so we check the match length for the preceding word in @new_words.
+        # If that is non-zero, it means that a previous match happened up to
+        # this point.
+        #
+        # If the current word is a continuation of a match, then we will
+        # increment the match length and store it for the current index
+        # position in @new_words. We replace the old hash because then we
+        # ignore the previous match that has now been extended and any that have
+        # stopped.
+        match_lengths_at_current_index_positions_in_new = Hash.new { |h, index| h[index] = 0 }
+        # Take the word which is at this position in @old_words,
+        # then for each position it occurs in within @new_words...
+        current_word_in_old = @old_words[index_in_old].to_s
+        @new_word_indices[current_word_in_old].each do |index_in_new|
+          # Skip if this position is before the start of the range we're
+          # checking.
+          next if index_in_new < start_in_new
+          # Since the indices in @new_words start at the earliest occurrence
+          # and are in order, if we are now after the end of the range we are
+          # checking, then all later occurrences can be ignored.
+          break if index_in_new >= end_in_new
+          # Add 1 to the length of the match we have for the previous word
+          # position in @new_words. i.e. we are moving along @old words,
+          # ticking off the words in @new_words as we go.
+          #
+          # Will be zero if the previous word in @new_words has not been marked
+          # as a match.
+          new_match_length = match_lengths_at_previous_index_positions_in_new[index_in_new - 1] + 1
+          match_lengths_at_current_index_positions_in_new[index_in_new] = new_match_length
+          # Keep track of the longest match so we can return it.
+          if new_match_length > best_match.size
+            start_of_best_match_in_old = index_in_old - new_match_length + 1
+            start_of_best_match_in_new = index_in_new - new_match_length + 1
+            best_match = HTMLDiff::Match.new(start_of_best_match_in_old,
+                                             start_of_best_match_in_new,
+                                             new_match_length)
+            # best_match = HTMLDiff::NewMatch.new(index_in_old - new_match_length + 1, index_in_old,
+            #                                     index_in_new - new_match_length + 1, index_in_new
+            #                                  )
+          end
+        end
+        # We have now added the current word to all the matches we had so far,
+        # making some of them longer by 1. Any matches that are shorter (didn't
+        # have the current word as the next word) are discarded.
+        match_lengths_at_previous_index_positions_in_new = match_lengths_at_current_index_positions_in_new
+      end
+      best_match
+    end
+    # @param [HTMLDiff::Match] match
+    def create_operation_from(match)
+      # We have a problem with single space matches found in between words
+      # which are otherwise different. If we find a match that is just a
+      # single space, then we should ignore it so that the # changes before
+      # and after it merge together.
+      old_text = @old_words[match.start_in_old...match.end_in_old].join
+      new_text = @new_words[match.start_in_new...match.end_in_new].join
+      return if old_text == ' ' && old_text == new_text
+      match_starts_at_current_position_in_old = (@position_in_old == match.start_in_old)
+      match_starts_at_current_position_in_new = (@position_in_new == match.start_in_new)
+      # Based on where the match starts and ends, work out what the preceding
+      # non-matching bit represents.
+      action_upto_match_positions =
+        case [match_starts_at_current_position_in_old,
+              match_starts_at_current_position_in_new]
+        when [false, false]
+          :replace
+        when [true, false]
+          :insert
+        when [false, true]
+          :delete
+        else
+          # this happens if the first few words are same in both versions
+          :none
+        end
+      # This operation will add the <ins> or <del> tag, plus the content
+      # that has changed.
+      if action_upto_match_positions != :none
+        operation_upto_match_positions =
+          Operation.new(action_upto_match_positions,
+                        @old_words[@position_in_old...match.start_in_old],
+                        @new_words[@position_in_new...match.start_in_new]
+          )
+        @operations << operation_upto_match_positions
+      end
+      if match.size != 0
+        match_operation = Operation.new(:equal,
+                                        @old_words[match.start_in_old...match.end_in_old],
+                                        @new_words[match.start_in_new...match.end_in_new]
+        )
+        @operations << match_operation
+      end
+      # Move to the end of the match (start of next difference).
+      @position_in_old = match.end_in_old
+      @position_in_new = match.end_in_new
+    end
+  end
+end

data/lib/htmldiff/operation.rb ADDED Viewed

@@ -0,0 +1,38 @@
+module HTMLDiff
+  # An operation represents one difference between the old HTML and the new
+  # HTML. e.g. adding three letters.
+  # @param operation can be :insert, :delete or :equal
+  Operation = Struct.new(:action, :old_words, :new_words)
+  class Operation
+    # @!method action
+    # @!method start_in_old
+    # @!method end_in_old
+    # @!method start_in_new
+    # @!method end_in_new
+    # @!method old_words
+    # @!method new_words
+    # Ignores any attributes and tells us if the tag is the same e.g. <p> and
+    # <p style="margin: 2px;"> are the same.
+    def same_tag?
+      pattern = /<([^>\s]+)[\s>].*/
+      first_tagname = pattern.match(old_text) # nil means they are not tags
+      first_tagname = first_tagname[1] if first_tagname
+      second_tagname = pattern.match(new_text)
+      second_tagname = second_tagname[1] if second_tagname
+      first_tagname && (first_tagname == second_tagname)
+    end
+    def old_text
+      old_words.join
+    end
+    def new_text
+      new_words.join
+    end
+  end
+end

data/lib/htmldiff/word.rb ADDED Viewed

@@ -0,0 +1,57 @@
+module HTMLDiff
+  # This class is responsible for representing one word in one of the HTML
+  # strings. Once the HTML has been transformed into words by the ListOfWords
+  # class, the diff algorithm then looks for what has changed. The idea is that
+  # rather than the standard diff which looks character by character, this will
+  # work around the HTML tags so that the output looks only at the text inside
+  # them.
+  class Word
+    def initialize(word = '')
+      @word = word
+    end
+    def <<(character)
+      @word << character
+    end
+    def empty?
+      @word.empty?
+    end
+    def standalone_tag?
+      @word.downcase =~ /<(img|hr|br)/
+    end
+    def iframe_tag?
+      (@word[0..7].downcase =~ %r{^<\/?iframe ?})
+    end
+    def tag?
+      opening_tag? || closing_tag? || standalone_tag?
+    end
+    def opening_tag?
+      @word =~ %r{[\s]*<[^\/]{1}[^>]*>\s*$}
+    end
+    def closing_tag?
+      @word =~ %r{^\s*</[^>]+>\s*$}
+    end
+    def block_tag?
+      @word =~ /^<div[^<]*class="[^"]*#{block_tag_class}[^"]*"/
+    end
+    def to_s
+      @word
+    end
+    def ==(other)
+      @word == other
+    end
+    def block_tag_class
+      @block_tag_class ||= 'block_tag'
+    end
+  end
+end

data/lib/htmldiff.rb ADDED Viewed

@@ -0,0 +1,14 @@
+# encoding: utf-8
+require_relative 'htmldiff/diff_builder'
+require_relative 'htmldiff/match'
+require_relative 'htmldiff/operation'
+require_relative 'htmldiff/word'
+require_relative 'htmldiff/list_of_words'
+require_relative 'htmldiff/match_finder'
+# Main module for namespacing the gem.
+module HTMLDiff
+  def self.diff(old, new, options = {})
+    DiffBuilder.new(old, new, options).build
+  end
+end

data/spec/diffing_output/block_tag_spec.rb ADDED Viewed

@@ -0,0 +1,11 @@
+require 'spec_helper'
+describe 'Treating a block tag as a single item' do
+  it 'shows the whole div as an insert' do
+    oldv = '<p>text</p>'
+    newv = '<p>text<div class="block_tag"><img src="something" /></div></p>'
+    diff = HTMLDiff.diff(oldv, newv, {block_tag_classes: ['inserted']})
+    expect(diff).to eq('<p>text<ins class="diffins"><div class="block_tag"><img src="something" /></div></ins></p>')
+  end
+end

data/spec/diffing_output/iframes_spec.rb ADDED Viewed

@@ -0,0 +1,33 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+describe 'HTMLDiff' do
+  describe 'diff' do
+    describe 'iframes' do
+      it 'wraps iframe inserts' do
+        oldv = 'a b c'
+        newv = 'a b <iframe src="some_url"></iframe> c'
+        diff = HTMLDiff.diff(oldv, newv)
+        expect(diff).to eq('a b <ins class="diffins"><iframe src="some_url"></iframe></ins><ins class="diffins"> </ins>c')
+      end
+      it 'wraps iframe inserts with extra stuff' do
+        oldv = ''
+        newv = '
+      <div class="iframe-wrap scribd">
+      <div class="iframe-aspect-ratio">
+      </div>
+      <iframe src="url"></iframe>
+      </div>
+  '
+        diff = HTMLDiff.diff(oldv, newv)
+        expect(diff).to eq('<ins class="diffins">
+      </ins><ins class="diffins"><div class="iframe-wrap scribd"><ins class="diffins">
+      </ins><div class="iframe-aspect-ratio"><ins class="diffins">
+      </ins></div><ins class="diffins">
+      </ins><ins class="diffins"><iframe src="url"></iframe></ins><ins class="diffins">
+      </ins></div><ins class="diffins">
+  </ins></ins>')
+      end
+    end
+  end
+end

data/spec/diffing_output/img_tags_spec.rb ADDED Viewed

@@ -0,0 +1,49 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+describe 'HTMLDiff' do
+  describe 'diff' do
+    describe 'img tags' do
+      it 'should support img tags insertion' do
+        oldv = 'a b c'
+        newv = 'a b <img src="some_url" /> c'
+        diff = HTMLDiff.diff(oldv, newv)
+        expect(diff).to eq('a b <ins class="diffins"><img src="some_url" /></ins><ins class="diffins"> </ins>c')
+      end
+      it 'wraps img tags inside other tags' do
+        oldv = '<p>text</p>'
+        newv = '<p>text<img src="something" /></p>'
+        diff = HTMLDiff.diff(oldv, newv)
+        expect(diff).to eq('<p>text<ins class="diffins"><img src="something" /></ins></p>')
+      end
+      it 'wraps img tags inserted with other tags' do
+        oldv = 'text'
+        newv = 'text<p><img src="something" /></p>'
+        diff = HTMLDiff.diff(oldv, newv)
+        expect(diff).to eq('text<ins class="diffins"><p><ins class="diffins"><img src="something" /></ins></p></ins>')
+      end
+      it 'wraps img tags inserted with other tags and new lines' do
+        oldv = 'text'
+        newv = %(text<p>\r\n<img src="something" />\r\n</p>)
+        diff = HTMLDiff.diff(oldv, newv)
+        expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something" />\r\n</ins></p></ins>))
+      end
+      it 'wraps badly terminated img tags inserted with other tags and new lines' do
+        oldv = 'text'
+        newv = %(text<p>\r\n<img src="something">\r\n</p>)
+        diff = HTMLDiff.diff(oldv, newv)
+        expect(diff).to eq(%(text<ins class="diffins"><p><ins class="diffins">\r\n<img src="something">\r\n</ins></p></ins>))
+      end
+      it 'supports img tags deletion' do
+        oldv = 'a b <img src="some_url" /> c'
+        newv = 'a b c'
+        diff = HTMLDiff.diff(oldv, newv)
+        expect(diff).to eq('a b <del class="diffdel"><img src="some_url" /></del><del class="diffdel"> </del>c')
+      end
+    end
+  end
+end

data/spec/diffing_output/paragraph_tags_spec.rb ADDED Viewed

@@ -0,0 +1,60 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+describe 'HTMLDiff' do
+  describe 'diff' do
+    describe 'simple tags' do
+      it 'wraps deleted tags' do
+        doc_a = '<p> Test Paragraph </p><p>More Stuff</p>'
+        doc_b = '<p>Nothing!</p>'
+        diff = HTMLDiff.diff(doc_a, doc_b)
+        expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
+      end
+      it 'wraps inserted tags' do
+        doc_a = '<p>Nothing!</p>'
+        doc_b = '<p> Test Paragraph </p><p>More Stuff</p>'
+        diff = HTMLDiff.diff(doc_a, doc_b)
+        expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p><ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
+      end
+      describe 'wrapping deleted tags even with text around them' do
+        it 'changes inside plus deleted consecutive paragraph, leaving text afterwards' do
+          doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
+          doc_b = '<p>Nothing!</p>weee'
+          diff = HTMLDiff.diff(doc_a, doc_b)
+          expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
+        end
+        it 'changes inside plus deleted consecutive paragraph, plus deleted consecutive text' do
+          doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
+          doc_b = '<p>Nothing!</p>'
+          diff = HTMLDiff.diff(doc_a, doc_b)
+          expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">weee</del><del class="diffdel"><p><del class="diffdel">More Stuff</del></p></del>')
+        end
+        it 'changes inside plus deleted consecutive paragraph, leaving text afterwards with some extra text' do
+          doc_a = '<p> Test Paragraph </p>weee<p>More Stuff</p>asd'
+          doc_b = '<p>Nothing!</p>weee asd'
+          diff = HTMLDiff.diff(doc_a, doc_b)
+          expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p>weee<del class="diffmod"><p><del class="diffmod">More Stuff</del></p></del><ins class="diffmod"> </ins>asd')
+        end
+      end
+      it 'wraps inserted tags even with text around' do
+        doc_a = '<p>Nothing!</p>weee'
+        doc_b = '<p> Test Paragraph </p>weee<p>More Stuff</p>'
+        diff = HTMLDiff.diff(doc_a, doc_b)
+        expect(diff).to eq('<p><del class="diffmod">Nothing!</del><ins class="diffmod"> Test Paragraph </ins></p>weee<ins class="diffins"><p><ins class="diffins">More Stuff</ins></p></ins>')
+      end
+      describe 'changing the attributes of tags' do
+        it 'ignores a tag with new attributes' do
+          doc_a = 'text <p>Nothing!</p> text'
+          doc_b = 'text <p style="margin-left: 20px">Nothing!</p> text'
+          diff = HTMLDiff.diff(doc_a, doc_b)
+          expect(diff).to eq('text <p style="margin-left: 20px">Nothing!</p> text')
+        end
+      end
+    end
+  end
+end

data/spec/diffing_output/tables_spec.rb ADDED Viewed

@@ -0,0 +1,47 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+describe 'HTMLDiff' do
+  describe 'diff' do
+    describe 'tables' do
+      it 'wraps deleted table tags' do
+        doc_a = '<p> Test Paragraph </p>
+        <p> </p>
+        <table><tbody><tr><td>hello</td><td>bye</td></tr></tbody></table>
+        <p>&nbsp;</p>
+        '
+        doc_b = '<p>Nothing!</p>'
+        diff = HTMLDiff.diff(doc_a, doc_b)
+        expect(diff).to eq('<p><del class="diffmod"> Test Paragraph </del><ins class="diffmod">Nothing!</ins></p><del class="diffdel">
+        </del><del class="diffdel"><p><del class="diffdel"> </del></p><del class="diffdel">
+        </del><table><tbody><tr><td><del class="diffdel">hello</del></td><td><del class="diffdel">bye</del></td></tr></tbody></table><del class="diffdel">
+        </del><p><del class="diffdel">&nbsp;</del></p><del class="diffdel">
+        </del></del>')
+      end
+      it 'should wrap deleted table rows' do
+        doc_a = '<p>my table</p>
+        <table>
+        <tbody>
+        <tr><td>hello</td><td>bye</td></tr>
+        <tr><td>remove</td><td>me</td></tr>
+        </tbody>
+        </table>'
+        doc_b = '<p>my table</p>
+        <table>
+        <tbody>
+        <tr><td>hello</td><td>bye</td></tr>
+        </tbody>
+        </table>'
+        diff = HTMLDiff.diff(doc_a, doc_b)
+        expect(diff).to eq('<p>my table</p>
+        <table>
+        <tbody>
+        <tr><td>hello</td><td>bye</td></tr>
+        <del class="diffdel"><tr><td><del class="diffdel">remove</del></td>'\
+        '<td><del class="diffdel">me</del></td></tr><del class="diffdel">
+        </del></del></tbody>
+        </table>')
+      end
+    end
+  end
+end

data/spec/diffing_output/text_spec.rb ADDED Viewed

@@ -0,0 +1,48 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+describe 'HTMLDiff' do
+  describe 'diff' do
+    describe 'text' do
+      it 'should diff text' do
+        diff = HTMLDiff.diff('a word is here', 'a nother word is there')
+        expect(diff).to eq("a<ins class=\"diffins\"> nother</ins> word is "\
+        "<del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>")
+      end
+      it 'should insert a letter and a space' do
+        diff = HTMLDiff.diff('a c', 'a b c')
+        expect(diff).to eq("a <ins class=\"diffins\">b </ins>c")
+      end
+      it 'should remove a letter and a space' do
+        diff = HTMLDiff.diff('a b c', 'a c')
+        expect(diff).to eq("a <del class=\"diffdel\">b </del>c")
+      end
+      it 'should change a letter' do
+        diff = HTMLDiff.diff('a b c', 'a d c')
+        expect(diff).to eq("a <del class=\"diffmod\">b</del><ins "\
+        "class=\"diffmod\">d</ins> c")
+      end
+      it 'supports Chinese' do
+        diff = HTMLDiff.diff('这个是中文内容, Ruby is the bast',
+                             '这是中国语内容，Ruby is the best language.')
+        expect(diff).to eq("这<del class=\"diffdel\">个</del>是中<del "\
+        "class=\"diffmod\">文</del><ins class=\"diffmod\">国语</ins>内容<del "\
+        "class=\"diffmod\">, Ruby</del><ins class=\"diffmod\">，Ruby</ins> is "\
+        "the <del class=\"diffmod\">bast</del><ins class=\"diffmod\">best "\
+        'language.</ins>')
+      end
+      it 'puts long bit of replaced text together, rather than '\
+      'breaking on word boundaries' do
+        diff = HTMLDiff.diff('a long bit of text',
+                             'some totally different text')
+        expected = '<del class="diffmod">a long bit of</del>'\
+        '<ins class="diffmod">some totally different</ins> text'
+        expect(diff).to eq(expected)
+      end
+    end
+  end
+end

data/spec/list_of_words_spec.rb ADDED Viewed

@@ -0,0 +1,53 @@
+require File.dirname(__FILE__) + '/spec_helper'
+describe HTMLDiff::ListOfWords do
+  describe 'breaking tags up correctly' do
+    it 'separates tags' do
+      input = '<p>input</p>'
+      words_as_array = HTMLDiff::ListOfWords.new(input).to_a.map(&:to_s)
+      expect(words_as_array).to eq %w(<p> input </p>)
+    end
+    it 'separates block tags' do
+      input = '<p>text<div class="block_tag"><img src="something" /></div></p>'
+      words_as_array = HTMLDiff::ListOfWords.new(input, {block_tag_class: 'inserted'}).to_a.map(&:to_s)
+      expect(words_as_array).to eq ['<p>', 'text', '<div class="block_tag"><img src="something" /></div>', '</p>']
+    end
+  end
+  describe 'contains_unclosed_tag?' do
+    it 'returns true with an open <p> tag' do
+      expect(described_class.new('<p>').contains_unclosed_tag?).to be_true
+    end
+    it 'returns true with an unclosed closed <p> tag with an attribute' do
+      html = '<p style="margin: 20px">'
+      expect(described_class.new(html).contains_unclosed_tag?).to be_true
+    end
+    it 'returns true with an unclosed closed <p> tag with an attribute '\
+    'that contains stuff' do
+      html = '<p style="margin: 20px">blah'
+      expect(described_class.new(html).contains_unclosed_tag?).to be_true
+    end
+    it 'returns false with a properly closed <p> tag' do
+      expect(described_class.new('<p></p>').contains_unclosed_tag?).to be_false
+    end
+    it 'returns false with a properly closed <p> tag with an attribute' do
+      html = '<p style="margin: 20px"></p>'
+      expect(described_class.new(html).contains_unclosed_tag?).to be_false
+    end
+    it 'returns false with a properly closed <p> tag with an attribute '\
+    'that contains stuff' do
+      html = '<p style="margin: 20px">blah</p>'
+      expect(described_class.new(html).contains_unclosed_tag?).to be_false
+    end
+    it 'returns false with a self closing tag' do
+      expect(described_class.new('<img>').contains_unclosed_tag?).to be_false
+    end
+  end
+end