RubyGems - sms-htmldiff - Versions diffs - 0.0.1.1 - Mend

sms-htmldiff 0.0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +7 -0
data/Gemfile +6 -0
data/Gemfile.lock +24 -0
data/LICENSE +20 -0
data/README.md +35 -0
data/Rakefile +54 -0
data/htmldiff.gemspec +25 -0
data/lib/htmldiff/diff_builder.rb +156 -0
data/lib/htmldiff/list_of_words.rb +182 -0
data/lib/htmldiff/match.rb +17 -0
data/lib/htmldiff/match_finder.rb +238 -0
data/lib/htmldiff/operation.rb +38 -0
data/lib/htmldiff/word.rb +57 -0
data/lib/htmldiff.rb +14 -0
data/spec/diffing_output/block_tag_spec.rb +11 -0
data/spec/diffing_output/iframes_spec.rb +33 -0
data/spec/diffing_output/img_tags_spec.rb +49 -0
data/spec/diffing_output/paragraph_tags_spec.rb +60 -0
data/spec/diffing_output/tables_spec.rb +47 -0
data/spec/diffing_output/text_spec.rb +48 -0
data/spec/list_of_words_spec.rb +53 -0
data/spec/operation_spec.rb +45 -0
data/spec/spec_helper.rb +3 -0
data/spec/word_spec.rb +31 -0
metadata +93 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 1e1961c6bcec22268da84f5306440c707016b105cc76e9bed199a3c0579144dc
+  data.tar.gz: 3a77483261d5a2240e1c36e4a83c3038628ba30b3776c95a9c6dea59a126124c
+SHA512:
+  metadata.gz: cd6156d5e248ef46485acdc90672ad2dee39b6abe57bbddb80882539a23cbcfd168fba3415f8aa00848682c4363c25e5f101e7c02d23b24b0a99a23b07b8a56d
+  data.tar.gz: e999a7613321792df4a8830e9108ef9d8ad6bc3a67a735da9682fecc84928e1aed2bdf316f1e3c04bd826d8fdef106fc2590b5a00d0dcb48832d53725ec4029d

data/Gemfile ADDED Viewed

@@ -0,0 +1,6 @@
+source 'https://rubygems.org'
+gem 'rspec'
+gem 'rake'
+gem 'nokogiri'

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,24 @@
+GEM
+  remote: https://rubygems.org/
+  specs:
+    diff-lcs (1.2.5)
+    mini_portile (0.6.2)
+    nokogiri (1.6.6.2)
+      mini_portile (~> 0.6.0)
+    rake (10.4.2)
+    rspec (2.14.1)
+      rspec-core (~> 2.14.0)
+      rspec-expectations (~> 2.14.0)
+      rspec-mocks (~> 2.14.0)
+    rspec-core (2.14.7)
+    rspec-expectations (2.14.4)
+      diff-lcs (>= 1.1.3, < 2.0)
+    rspec-mocks (2.14.4)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  nokogiri
+  rake
+  rspec

data/LICENSE ADDED Viewed

@@ -0,0 +1,20 @@
+Copyright (c) 2008 Nathan Herald
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,35 @@
+# HTMLdiff
+This gem generates nice diff outputs (in HTML) from two supplied bits of HTML
+which are (presumably) partially different. It is aimed at the limited HTML
+that one would expect to be outputted from a WYSIWYG editor.
+It is not foolproof and only gives good results with a limited (and not fully
+documented) range of HTML tags. See the specs for stuff that is known to work.
+Beyond that you're on your own!
+## Usage
+```
+doc_a = 'a word is here'
+doc_b = 'a nother word is there'
+HTMLDiff.diff(doc_a, doc_b)
+# => 'a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>'
+```
+## Block tags
+If you have some things which need to be treated as one unified lump of HTML
+that has been added or taken away e.g. an embedded document composed of a div
+with some images in it, then add `class="block_tag"` to the opening div tag.
+The diff will then surround the whole thing with an `<ins>` or `<del>` tag,
+as well as showing the whole thing as replaced with a new version if any of
+the content changes.
+## Credits
+This is a fork of the htmldiff gem here https://github.com/myobie/htmldiff/
+which appears to no longer be maintained. It has been refactored internally and
+made to be more useful when comparing the diff generated by HTML editors.

data/Rakefile ADDED Viewed

@@ -0,0 +1,54 @@
+require 'rubygems'
+require 'rubygems/package_task'
+require 'rubygems/specification'
+require 'date'
+require 'rspec'
+require 'rspec/core/rake_task'
+GEM = 'htmldiff'
+GEM_VERSION = '0.0.1'
+AUTHOR = 'Nathan Herald'
+EMAIL = 'nathan@myobie.com'
+HOMEPAGE = 'http://github.com/myobie/htmldiff'
+SUMMARY = 'HTML diffs of text (borrowed from a wiki software I '\
+'no longer remember)'
+spec = Gem::Specification.new do |s|
+  s.name = GEM
+  s.version = GEM_VERSION
+  s.platform = Gem::Platform::RUBY
+  s.has_rdoc = true
+  s.extra_rdoc_files = %w(README LICENSE TODO)
+  s.summary = SUMMARY
+  s.description = s.summary
+  s.author = AUTHOR
+  s.email = EMAIL
+  s.homepage = HOMEPAGE
+  s.require_path = 'lib'
+  s.autorequire = GEM
+  s.files = %w(LICENSE README Rakefile TODO) + Dir.glob('{lib,spec}/**/*')
+end
+task default: :spec
+desc 'Run specs'
+RSpec::Core::RakeTask.new do |t|
+  t.spec_files = FileList['spec/**/*_spec.rb']
+  t.spec_opts = %w(-fs --color)
+end
+Gem::PackageTask.new(spec) do |pkg|
+  pkg.gem_spec = spec
+end
+desc 'install the gem locally'
+task install: [:package] do
+  sh %(sudo gem install pkg/#{GEM}-#{GEM_VERSION})
+end
+desc 'create a gemspec file'
+task :make_spec do
+  File.open("#{GEM}.gemspec", 'w') do |file|
+    file.puts spec.to_ruby
+  end
+end

data/htmldiff.gemspec ADDED Viewed

@@ -0,0 +1,25 @@
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name             = 'sms-htmldiff'
+  s.version          = '0.0.1.1'
+  s.homepage         = 'http://github.com/stackmystack/htmldiff'
+  s.summary          = 'HTML diffs of text (borrowed from a wiki software I no longer remember)'
+  s.license          = 'MIT'
+  s.authors          = ['Nathan Herald']
+  s.email            = 'nathan@myobie.com'
+  s.date             = '2008-11-21'
+  s.rdoc_options     = ['--main', 'README.md']
+  s.extra_rdoc_files = ['README.md', 'LICENSE']
+  s.require_paths    = ['lib']
+  # Manifest
+  s.files            = `git ls-files`.split("\n")
+  s.test_files       = `git ls-files -- {test,spec,features}/*`.split("\n")
+  # Dependencies
+  s.add_runtime_dependency('nokogiri', '>= 1.6.5')
+end

data/lib/htmldiff/diff_builder.rb ADDED Viewed

@@ -0,0 +1,156 @@
+require 'nokogiri'
+module HTMLDiff
+  # Main class for building the diff output between two strings. Other classes
+  # find out where the differences actually are, then this class turns that into
+  # HTML.
+  class DiffBuilder
+    attr_reader :content
+    def initialize(old_version, new_version, options = {})
+      @options = default_options.merge options
+      @old_words = ListOfWords.new old_version, @options
+      @new_words = ListOfWords.new new_version, @options
+      @content = []
+    end
+    def default_options
+      {
+        block_tag_classes: []
+      }
+    end
+    def build
+      perform_operations
+      content.join
+    end
+    # These operations are a list of things that changed between the two
+    # versions, which now need to be turned into valid HTML that shows things
+    # with ins and del tags.
+    def operations
+      HTMLDiff::MatchFinder.new(@old_words, @new_words).operations
+    end
+    def perform_operations
+      operations.each { |op| perform_operation(op) }
+    end
+    def perform_operation(operation)
+      send operation.action, operation
+    end
+    # This is for when a chunk of text has been replaced with a different bit.
+    # We want to ignore tags that are the same e.g.
+    # '<p>' replaced by
+    # '<p class="highlight">'
+    # will come back from the diff algorithm as a replacement (tags are treated
+    # as words in their entirety), but we don't have any use for seeing this
+    # represented visually.
+    #
+    # @param operation [HTMLDiff::Operation]
+    def replace(operation)
+      # Special case: a tag has been altered so that an attribute has been
+      # added e.g. <p> becomes <p style="margin: 2px"> due to an editor button
+      # press. For this, we just show the new version, otherwise it gets messy
+      # trying to find the closing tag.
+      if operation.same_tag?
+        equal(operation)
+      else
+        delete(operation, 'diffmod')
+        insert(operation, 'diffmod')
+      end
+    end
+    # @param operation [HTMLDiff::Operation]
+    def insert(operation, tagclass = 'diffins')
+      insert_tag('ins', tagclass, operation.new_words)
+    end
+    # @param operation [HTMLDiff::Operation]
+    def delete(operation, tagclass = 'diffdel')
+      insert_tag('del', tagclass, operation.old_words)
+    end
+    # No difference between these parts of the text. No tags to insert, simply
+    # copy the matching words from one of the versions.
+    #
+    # @param operation [HTMLDiff::Operation]
+    def equal(operation)
+      @content << operation.new_text
+    end
+    # This method encloses words within a specified tag (ins or del), and adds
+    # this into @content, with a twist: if there are words contain tags, it
+    # actually creates multiple ins or del, so that they don't include any ins
+    # or del tags that are not properly nested. This handles cases like
+    # old: '<p>a</p>'
+    # new: '<p>ab</p><p>c</p>'
+    # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
+    # This still doesn't guarantee valid HTML (hint: think about diffing a text
+    # containing ins or del tags), but handles correctly more cases than the
+    # earlier version.
+    #
+    # P.S.: Spare a thought for people who write HTML browsers. They live in
+    # this... every day.
+    def insert_tag(tagname, cssclass, words)
+      wrapped = false
+      loop do
+        break if words.empty?
+        if words.first.standalone_tag?
+          tag_words = words.extract_consecutive_words! do |word|
+            word.standalone_tag?
+          end
+          @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
+        elsif words.first.iframe_tag?
+          tag_words = words.extract_consecutive_words! { |word| word.iframe_tag? }
+          @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
+        elsif words.first.block_tag?
+          tag_words = words.extract_consecutive_words! { |word| word.block_tag? }
+          @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
+        elsif words.first.tag?
+          # If this chunk of text contains unclosed tags, then wrapping it will
+          # cause weirdness. This would be the case if we have e.g. a style
+          # applied to a paragraph tag, which will change the opening tag, but
+          # not the closing tag.
+          #
+          #
+          if !wrapped && !words.contains_unclosed_tag?
+            @content << diff_tag_start(tagname, cssclass)
+            wrapped = true
+          end
+          @content += words.extract_consecutive_words! do |word|
+            word.tag? && !word.standalone_tag? && !word.iframe_tag?
+          end
+        else
+          non_tags = words.extract_consecutive_words! do |word|
+            (word.standalone_tag? || !word.tag?)
+          end
+          unless non_tags.join.empty?
+            @content << wrap_text_in_diff_tag(non_tags.join, tagname, cssclass)
+          end
+          break if words.empty?
+        end
+      end
+      @content << diff_tag_end(tagname) if wrapped
+    end
+    def wrap_text_in_diff_tag(text, tagname, cssclass)
+      diff_tag_start(tagname, cssclass) + text + diff_tag_end(tagname)
+    end
+    def diff_tag_start(tagname, cssclass)
+      %(<#{tagname} class="#{cssclass}">)
+    end
+    def diff_tag_end(tagname)
+      %(</#{tagname}>)
+    end
+  end
+end

data/lib/htmldiff/list_of_words.rb ADDED Viewed

@@ -0,0 +1,182 @@
+module HTMLDiff
+  class ListOfWords
+    attr_reader :options
+    include Enumerable
+    def initialize(string, options = {})
+      @options = options
+      @block_tag_class = options[:block_tag_class]
+      if string.respond_to?(:all?) && string.all? { |i| i.is_a?(Word) }
+        @words = string
+      else
+        convert_html_to_list_of_words string.chars
+      end
+    end
+    def each(&block)
+      @words.each { |word| block.call(word) }
+    end
+    def [](index)
+      if index.is_a?(Range)
+        self.class.new @words[index]
+      else
+        @words[index]
+      end
+    end
+    def join(&args)
+      @words.join(args)
+    end
+    def empty?
+      count == 0
+    end
+    def extract_consecutive_words!(&condition)
+      index_of_first_tag = nil
+      @words.each_with_index do |word, i|
+        unless condition.call(word)
+          index_of_first_tag = i
+          break
+        end
+      end
+      if index_of_first_tag
+        @words.slice!(0...index_of_first_tag)
+      else
+        @words.slice!(0..@words.length)
+      end
+    end
+    def contains_unclosed_tag?
+      tags = 0
+      temp_words = @words.dup
+      while temp_words.count > 0
+        current_word = temp_words.shift
+        if current_word.standalone_tag?
+          next
+        elsif current_word.opening_tag?
+          tags += 1
+        elsif current_word.closing_tag?
+          tags -= 1
+        end
+      end
+      tags != 0
+    end
+    private
+    def convert_html_to_list_of_words(character_array)
+      @mode = :char
+      @current_word = Word.new
+      @words = []
+      @block_tags = 0
+      while character_array.length > 0
+        char = character_array.first
+        case @mode
+          when :tag
+            if end_of_tag? char
+              @current_word << '>'
+              @words << @current_word
+              @current_word = Word.new
+              if whitespace? char
+                @mode = :whitespace
+              else
+                @mode = :char
+              end
+            else
+              @current_word << char
+            end
+          when :block_tag
+            if start_of_div_tag? character_array
+              @block_tags += 1
+            elsif end_of_div_tag? character_array
+              @block_tags -= 1
+              if @block_tags == 0
+                @mode = :tag
+              end
+            end
+            @current_word << char
+          when :char
+            if start_of_tag? char
+              @words << @current_word unless @current_word.empty?
+              @current_word = Word.new('<')
+              if starts_with_block_tag character_array
+                @mode = :block_tag
+                @block_tags = 1
+              else
+                @mode = :tag
+              end
+            elsif whitespace? char
+              @words << @current_word unless @current_word.empty?
+              @current_word = Word.new char
+              @mode = :whitespace
+            elsif char? char
+              @current_word << char
+            else
+              @words << @current_word unless @current_word.empty?
+              @current_word = Word.new char
+            end
+          when :whitespace
+            if start_of_tag? char
+              @words << @current_word unless @current_word.empty?
+              @current_word = Word.new('<')
+              @mode = :tag
+            elsif whitespace? char
+              @current_word << char
+            else
+              @words << @current_word unless @current_word.empty?
+              @current_word = Word.new char
+              @mode = :char
+            end
+          else
+            fail "Unknown mode #{@mode.inspect}"
+        end
+        character_array.shift # Remove this character now we are done
+      end
+      @words << @current_word unless @current_word.empty?
+    end
+    def start_of_tag?(char)
+      char == '<'
+    end
+    def start_of_div_tag?(character_array)
+      character_array.join =~ /^<div/
+    end
+    def end_of_div_tag?(character_array)
+      character_array.join =~ /^<\/div>/
+    end
+    def whitespace?(char)
+      char =~ /\s/
+    end
+    def end_of_tag?(char)
+      char == '>'
+    end
+    def char?(char)
+      char =~ /[\w\#@]+/i
+    end
+    def standalone_tag?(item)
+      item.downcase =~ /<(img|hr|br)/
+    end
+    def starts_with_block_tag(character_array)
+      Word.new(character_array.join).block_tag?
+    end
+  end
+end

data/lib/htmldiff/match.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module HTMLDiff
+  Match = Struct.new(:start_in_old, :start_in_new, :size) do
+    def end_in_old
+      start_in_old + size
+    end
+    def end_in_new
+      start_in_new + size
+    end
+  end
+  class Match
+    # @!method start_in_old
+    # @!method start_in_new
+    # @!method size
+  end
+end