sms-htmldiff 0.0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1e1961c6bcec22268da84f5306440c707016b105cc76e9bed199a3c0579144dc
4
+ data.tar.gz: 3a77483261d5a2240e1c36e4a83c3038628ba30b3776c95a9c6dea59a126124c
5
+ SHA512:
6
+ metadata.gz: cd6156d5e248ef46485acdc90672ad2dee39b6abe57bbddb80882539a23cbcfd168fba3415f8aa00848682c4363c25e5f101e7c02d23b24b0a99a23b07b8a56d
7
+ data.tar.gz: e999a7613321792df4a8830e9108ef9d8ad6bc3a67a735da9682fecc84928e1aed2bdf316f1e3c04bd826d8fdef106fc2590b5a00d0dcb48832d53725ec4029d
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'rspec'
4
+ gem 'rake'
5
+
6
+ gem 'nokogiri'
data/Gemfile.lock ADDED
@@ -0,0 +1,24 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.2.5)
5
+ mini_portile (0.6.2)
6
+ nokogiri (1.6.6.2)
7
+ mini_portile (~> 0.6.0)
8
+ rake (10.4.2)
9
+ rspec (2.14.1)
10
+ rspec-core (~> 2.14.0)
11
+ rspec-expectations (~> 2.14.0)
12
+ rspec-mocks (~> 2.14.0)
13
+ rspec-core (2.14.7)
14
+ rspec-expectations (2.14.4)
15
+ diff-lcs (>= 1.1.3, < 2.0)
16
+ rspec-mocks (2.14.4)
17
+
18
+ PLATFORMS
19
+ ruby
20
+
21
+ DEPENDENCIES
22
+ nokogiri
23
+ rake
24
+ rspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Nathan Herald
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # HTMLdiff
2
+
3
+ This gem generates nice diff outputs (in HTML) from two supplied bits of HTML
4
+ which are (presumably) partially different. It is aimed at the limited HTML
5
+ that one would expect to be outputted from a WYSIWYG editor.
6
+
7
+ It is not foolproof and only gives good results with a limited (and not fully
8
+ documented) range of HTML tags. See the specs for stuff that is known to work.
9
+ Beyond that you're on your own!
10
+
11
+ ## Usage
12
+
13
+ ```
14
+ doc_a = 'a word is here'
15
+ doc_b = 'a nother word is there'
16
+
17
+ HTMLDiff.diff(doc_a, doc_b)
18
+
19
+ # => 'a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>'
20
+ ```
21
+
22
+ ## Block tags
23
+
24
+ If you have some things which need to be treated as one unified lump of HTML
25
+ that has been added or taken away e.g. an embedded document composed of a div
26
+ with some images in it, then add `class="block_tag"` to the opening div tag.
27
+ The diff will then surround the whole thing with an `<ins>` or `<del>` tag,
28
+ as well as showing the whole thing as replaced with a new version if any of
29
+ the content changes.
30
+
31
+ ## Credits
32
+
33
+ This is a fork of the htmldiff gem here https://github.com/myobie/htmldiff/
34
+ which appears to no longer be maintained. It has been refactored internally and
35
+ made to be more useful when comparing the diff generated by HTML editors.
data/Rakefile ADDED
@@ -0,0 +1,54 @@
1
+ require 'rubygems'
2
+ require 'rubygems/package_task'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'rspec'
6
+ require 'rspec/core/rake_task'
7
+
8
+ GEM = 'htmldiff'
9
+ GEM_VERSION = '0.0.1'
10
+ AUTHOR = 'Nathan Herald'
11
+ EMAIL = 'nathan@myobie.com'
12
+ HOMEPAGE = 'http://github.com/myobie/htmldiff'
13
+ SUMMARY = 'HTML diffs of text (borrowed from a wiki software I '\
14
+ 'no longer remember)'
15
+
16
+ spec = Gem::Specification.new do |s|
17
+ s.name = GEM
18
+ s.version = GEM_VERSION
19
+ s.platform = Gem::Platform::RUBY
20
+ s.has_rdoc = true
21
+ s.extra_rdoc_files = %w(README LICENSE TODO)
22
+ s.summary = SUMMARY
23
+ s.description = s.summary
24
+ s.author = AUTHOR
25
+ s.email = EMAIL
26
+ s.homepage = HOMEPAGE
27
+ s.require_path = 'lib'
28
+ s.autorequire = GEM
29
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob('{lib,spec}/**/*')
30
+ end
31
+
32
+ task default: :spec
33
+
34
+ desc 'Run specs'
35
+ RSpec::Core::RakeTask.new do |t|
36
+ t.spec_files = FileList['spec/**/*_spec.rb']
37
+ t.spec_opts = %w(-fs --color)
38
+ end
39
+
40
+ Gem::PackageTask.new(spec) do |pkg|
41
+ pkg.gem_spec = spec
42
+ end
43
+
44
+ desc 'install the gem locally'
45
+ task install: [:package] do
46
+ sh %(sudo gem install pkg/#{GEM}-#{GEM_VERSION})
47
+ end
48
+
49
+ desc 'create a gemspec file'
50
+ task :make_spec do
51
+ File.open("#{GEM}.gemspec", 'w') do |file|
52
+ file.puts spec.to_ruby
53
+ end
54
+ end
data/htmldiff.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'sms-htmldiff'
5
+ s.version = '0.0.1.1'
6
+ s.homepage = 'http://github.com/stackmystack/htmldiff'
7
+ s.summary = 'HTML diffs of text (borrowed from a wiki software I no longer remember)'
8
+ s.license = 'MIT'
9
+
10
+ s.authors = ['Nathan Herald']
11
+ s.email = 'nathan@myobie.com'
12
+ s.date = '2008-11-21'
13
+
14
+ s.rdoc_options = ['--main', 'README.md']
15
+ s.extra_rdoc_files = ['README.md', 'LICENSE']
16
+
17
+ s.require_paths = ['lib']
18
+
19
+ # Manifest
20
+ s.files = `git ls-files`.split("\n")
21
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
22
+
23
+ # Dependencies
24
+ s.add_runtime_dependency('nokogiri', '>= 1.6.5')
25
+ end
@@ -0,0 +1,156 @@
1
+ require 'nokogiri'
2
+
3
+ module HTMLDiff
4
+ # Main class for building the diff output between two strings. Other classes
5
+ # find out where the differences actually are, then this class turns that into
6
+ # HTML.
7
+ class DiffBuilder
8
+ attr_reader :content
9
+
10
+ def initialize(old_version, new_version, options = {})
11
+ @options = default_options.merge options
12
+ @old_words = ListOfWords.new old_version, @options
13
+ @new_words = ListOfWords.new new_version, @options
14
+ @content = []
15
+ end
16
+
17
+ def default_options
18
+ {
19
+ block_tag_classes: []
20
+ }
21
+ end
22
+
23
+ def build
24
+ perform_operations
25
+ content.join
26
+ end
27
+
28
+ # These operations are a list of things that changed between the two
29
+ # versions, which now need to be turned into valid HTML that shows things
30
+ # with ins and del tags.
31
+ def operations
32
+ HTMLDiff::MatchFinder.new(@old_words, @new_words).operations
33
+ end
34
+
35
+ def perform_operations
36
+ operations.each { |op| perform_operation(op) }
37
+ end
38
+
39
+ def perform_operation(operation)
40
+ send operation.action, operation
41
+ end
42
+
43
+ # This is for when a chunk of text has been replaced with a different bit.
44
+ # We want to ignore tags that are the same e.g.
45
+ # '<p>' replaced by
46
+ # '<p class="highlight">'
47
+ # will come back from the diff algorithm as a replacement (tags are treated
48
+ # as words in their entirety), but we don't have any use for seeing this
49
+ # represented visually.
50
+ #
51
+ # @param operation [HTMLDiff::Operation]
52
+ def replace(operation)
53
+ # Special case: a tag has been altered so that an attribute has been
54
+ # added e.g. <p> becomes <p style="margin: 2px"> due to an editor button
55
+ # press. For this, we just show the new version, otherwise it gets messy
56
+ # trying to find the closing tag.
57
+ if operation.same_tag?
58
+ equal(operation)
59
+ else
60
+ delete(operation, 'diffmod')
61
+ insert(operation, 'diffmod')
62
+ end
63
+ end
64
+
65
+ # @param operation [HTMLDiff::Operation]
66
+ def insert(operation, tagclass = 'diffins')
67
+ insert_tag('ins', tagclass, operation.new_words)
68
+ end
69
+
70
+ # @param operation [HTMLDiff::Operation]
71
+ def delete(operation, tagclass = 'diffdel')
72
+ insert_tag('del', tagclass, operation.old_words)
73
+ end
74
+
75
+ # No difference between these parts of the text. No tags to insert, simply
76
+ # copy the matching words from one of the versions.
77
+ #
78
+ # @param operation [HTMLDiff::Operation]
79
+ def equal(operation)
80
+ @content << operation.new_text
81
+ end
82
+
83
+ # This method encloses words within a specified tag (ins or del), and adds
84
+ # this into @content, with a twist: if there are words contain tags, it
85
+ # actually creates multiple ins or del, so that they don't include any ins
86
+ # or del tags that are not properly nested. This handles cases like
87
+ # old: '<p>a</p>'
88
+ # new: '<p>ab</p><p>c</p>'
89
+ # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
90
+ # This still doesn't guarantee valid HTML (hint: think about diffing a text
91
+ # containing ins or del tags), but handles correctly more cases than the
92
+ # earlier version.
93
+ #
94
+ # P.S.: Spare a thought for people who write HTML browsers. They live in
95
+ # this... every day.
96
+ def insert_tag(tagname, cssclass, words)
97
+ wrapped = false
98
+
99
+ loop do
100
+ break if words.empty?
101
+
102
+ if words.first.standalone_tag?
103
+ tag_words = words.extract_consecutive_words! do |word|
104
+ word.standalone_tag?
105
+ end
106
+ @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
107
+ elsif words.first.iframe_tag?
108
+ tag_words = words.extract_consecutive_words! { |word| word.iframe_tag? }
109
+ @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
110
+ elsif words.first.block_tag?
111
+ tag_words = words.extract_consecutive_words! { |word| word.block_tag? }
112
+ @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
113
+ elsif words.first.tag?
114
+
115
+ # If this chunk of text contains unclosed tags, then wrapping it will
116
+ # cause weirdness. This would be the case if we have e.g. a style
117
+ # applied to a paragraph tag, which will change the opening tag, but
118
+ # not the closing tag.
119
+ #
120
+ #
121
+
122
+ if !wrapped && !words.contains_unclosed_tag?
123
+ @content << diff_tag_start(tagname, cssclass)
124
+ wrapped = true
125
+ end
126
+ @content += words.extract_consecutive_words! do |word|
127
+ word.tag? && !word.standalone_tag? && !word.iframe_tag?
128
+ end
129
+ else
130
+ non_tags = words.extract_consecutive_words! do |word|
131
+ (word.standalone_tag? || !word.tag?)
132
+ end
133
+ unless non_tags.join.empty?
134
+ @content << wrap_text_in_diff_tag(non_tags.join, tagname, cssclass)
135
+ end
136
+
137
+ break if words.empty?
138
+ end
139
+ end
140
+
141
+ @content << diff_tag_end(tagname) if wrapped
142
+ end
143
+
144
+ def wrap_text_in_diff_tag(text, tagname, cssclass)
145
+ diff_tag_start(tagname, cssclass) + text + diff_tag_end(tagname)
146
+ end
147
+
148
+ def diff_tag_start(tagname, cssclass)
149
+ %(<#{tagname} class="#{cssclass}">)
150
+ end
151
+
152
+ def diff_tag_end(tagname)
153
+ %(</#{tagname}>)
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,182 @@
1
+ module HTMLDiff
2
+ class ListOfWords
3
+
4
+ attr_reader :options
5
+
6
+ include Enumerable
7
+
8
+ def initialize(string, options = {})
9
+ @options = options
10
+ @block_tag_class = options[:block_tag_class]
11
+
12
+ if string.respond_to?(:all?) && string.all? { |i| i.is_a?(Word) }
13
+ @words = string
14
+ else
15
+ convert_html_to_list_of_words string.chars
16
+ end
17
+ end
18
+
19
+ def each(&block)
20
+ @words.each { |word| block.call(word) }
21
+ end
22
+
23
+ def [](index)
24
+ if index.is_a?(Range)
25
+ self.class.new @words[index]
26
+ else
27
+ @words[index]
28
+ end
29
+ end
30
+
31
+ def join(&args)
32
+ @words.join(args)
33
+ end
34
+
35
+ def empty?
36
+ count == 0
37
+ end
38
+
39
+ def extract_consecutive_words!(&condition)
40
+ index_of_first_tag = nil
41
+ @words.each_with_index do |word, i|
42
+ unless condition.call(word)
43
+ index_of_first_tag = i
44
+ break
45
+ end
46
+ end
47
+ if index_of_first_tag
48
+ @words.slice!(0...index_of_first_tag)
49
+ else
50
+ @words.slice!(0..@words.length)
51
+ end
52
+ end
53
+
54
+ def contains_unclosed_tag?
55
+ tags = 0
56
+
57
+ temp_words = @words.dup
58
+
59
+ while temp_words.count > 0
60
+ current_word = temp_words.shift
61
+ if current_word.standalone_tag?
62
+ next
63
+ elsif current_word.opening_tag?
64
+ tags += 1
65
+ elsif current_word.closing_tag?
66
+ tags -= 1
67
+ end
68
+ end
69
+
70
+ tags != 0
71
+ end
72
+
73
+ private
74
+
75
+ def convert_html_to_list_of_words(character_array)
76
+ @mode = :char
77
+ @current_word = Word.new
78
+ @words = []
79
+ @block_tags = 0
80
+
81
+ while character_array.length > 0
82
+ char = character_array.first
83
+
84
+ case @mode
85
+ when :tag
86
+ if end_of_tag? char
87
+ @current_word << '>'
88
+ @words << @current_word
89
+ @current_word = Word.new
90
+ if whitespace? char
91
+ @mode = :whitespace
92
+ else
93
+ @mode = :char
94
+ end
95
+ else
96
+ @current_word << char
97
+ end
98
+ when :block_tag
99
+ if start_of_div_tag? character_array
100
+ @block_tags += 1
101
+ elsif end_of_div_tag? character_array
102
+ @block_tags -= 1
103
+ if @block_tags == 0
104
+ @mode = :tag
105
+ end
106
+ end
107
+ @current_word << char
108
+ when :char
109
+ if start_of_tag? char
110
+ @words << @current_word unless @current_word.empty?
111
+ @current_word = Word.new('<')
112
+
113
+ if starts_with_block_tag character_array
114
+ @mode = :block_tag
115
+ @block_tags = 1
116
+ else
117
+ @mode = :tag
118
+ end
119
+ elsif whitespace? char
120
+ @words << @current_word unless @current_word.empty?
121
+ @current_word = Word.new char
122
+ @mode = :whitespace
123
+ elsif char? char
124
+ @current_word << char
125
+ else
126
+ @words << @current_word unless @current_word.empty?
127
+ @current_word = Word.new char
128
+ end
129
+ when :whitespace
130
+ if start_of_tag? char
131
+ @words << @current_word unless @current_word.empty?
132
+ @current_word = Word.new('<')
133
+ @mode = :tag
134
+ elsif whitespace? char
135
+ @current_word << char
136
+ else
137
+ @words << @current_word unless @current_word.empty?
138
+ @current_word = Word.new char
139
+ @mode = :char
140
+ end
141
+ else
142
+ fail "Unknown mode #{@mode.inspect}"
143
+ end
144
+
145
+ character_array.shift # Remove this character now we are done
146
+ end
147
+ @words << @current_word unless @current_word.empty?
148
+ end
149
+
150
+ def start_of_tag?(char)
151
+ char == '<'
152
+ end
153
+
154
+ def start_of_div_tag?(character_array)
155
+ character_array.join =~ /^<div/
156
+ end
157
+
158
+ def end_of_div_tag?(character_array)
159
+ character_array.join =~ /^<\/div>/
160
+ end
161
+
162
+ def whitespace?(char)
163
+ char =~ /\s/
164
+ end
165
+
166
+ def end_of_tag?(char)
167
+ char == '>'
168
+ end
169
+
170
+ def char?(char)
171
+ char =~ /[\w\#@]+/i
172
+ end
173
+
174
+ def standalone_tag?(item)
175
+ item.downcase =~ /<(img|hr|br)/
176
+ end
177
+
178
+ def starts_with_block_tag(character_array)
179
+ Word.new(character_array.join).block_tag?
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,17 @@
1
+ module HTMLDiff
2
+ Match = Struct.new(:start_in_old, :start_in_new, :size) do
3
+ def end_in_old
4
+ start_in_old + size
5
+ end
6
+
7
+ def end_in_new
8
+ start_in_new + size
9
+ end
10
+ end
11
+
12
+ class Match
13
+ # @!method start_in_old
14
+ # @!method start_in_new
15
+ # @!method size
16
+ end
17
+ end