sms-htmldiff 0.0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1e1961c6bcec22268da84f5306440c707016b105cc76e9bed199a3c0579144dc
4
+ data.tar.gz: 3a77483261d5a2240e1c36e4a83c3038628ba30b3776c95a9c6dea59a126124c
5
+ SHA512:
6
+ metadata.gz: cd6156d5e248ef46485acdc90672ad2dee39b6abe57bbddb80882539a23cbcfd168fba3415f8aa00848682c4363c25e5f101e7c02d23b24b0a99a23b07b8a56d
7
+ data.tar.gz: e999a7613321792df4a8830e9108ef9d8ad6bc3a67a735da9682fecc84928e1aed2bdf316f1e3c04bd826d8fdef106fc2590b5a00d0dcb48832d53725ec4029d
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'rspec'
4
+ gem 'rake'
5
+
6
+ gem 'nokogiri'
data/Gemfile.lock ADDED
@@ -0,0 +1,24 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.2.5)
5
+ mini_portile (0.6.2)
6
+ nokogiri (1.6.6.2)
7
+ mini_portile (~> 0.6.0)
8
+ rake (10.4.2)
9
+ rspec (2.14.1)
10
+ rspec-core (~> 2.14.0)
11
+ rspec-expectations (~> 2.14.0)
12
+ rspec-mocks (~> 2.14.0)
13
+ rspec-core (2.14.7)
14
+ rspec-expectations (2.14.4)
15
+ diff-lcs (>= 1.1.3, < 2.0)
16
+ rspec-mocks (2.14.4)
17
+
18
+ PLATFORMS
19
+ ruby
20
+
21
+ DEPENDENCIES
22
+ nokogiri
23
+ rake
24
+ rspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Nathan Herald
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # HTMLdiff
2
+
3
+ This gem generates nice diff outputs (in HTML) from two supplied bits of HTML
4
+ which are (presumably) partially different. It is aimed at the limited HTML
5
+ that one would expect to be outputted from a WYSIWYG editor.
6
+
7
+ It is not foolproof and only gives good results with a limited (and not fully
8
+ documented) range of HTML tags. See the specs for stuff that is known to work.
9
+ Beyond that you're on your own!
10
+
11
+ ## Usage
12
+
13
+ ```
14
+ doc_a = 'a word is here'
15
+ doc_b = 'a nother word is there'
16
+
17
+ HTMLDiff.diff(doc_a, doc_b)
18
+
19
+ # => 'a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>'
20
+ ```
21
+
22
+ ## Block tags
23
+
24
+ If you have some things which need to be treated as one unified lump of HTML
25
+ that has been added or taken away e.g. an embedded document composed of a div
26
+ with some images in it, then add `class="block_tag"` to the opening div tag.
27
+ The diff will then surround the whole thing with an `<ins>` or `<del>` tag,
28
+ as well as showing the whole thing as replaced with a new version if any of
29
+ the content changes.
30
+
31
+ ## Credits
32
+
33
+ This is a fork of the htmldiff gem here https://github.com/myobie/htmldiff/
34
+ which appears to no longer be maintained. It has been refactored internally and
35
+ made to be more useful when comparing the diff generated by HTML editors.
data/Rakefile ADDED
@@ -0,0 +1,54 @@
1
+ require 'rubygems'
2
+ require 'rubygems/package_task'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'rspec'
6
+ require 'rspec/core/rake_task'
7
+
8
+ GEM = 'htmldiff'
9
+ GEM_VERSION = '0.0.1'
10
+ AUTHOR = 'Nathan Herald'
11
+ EMAIL = 'nathan@myobie.com'
12
+ HOMEPAGE = 'http://github.com/myobie/htmldiff'
13
+ SUMMARY = 'HTML diffs of text (borrowed from a wiki software I '\
14
+ 'no longer remember)'
15
+
16
+ spec = Gem::Specification.new do |s|
17
+ s.name = GEM
18
+ s.version = GEM_VERSION
19
+ s.platform = Gem::Platform::RUBY
20
+ s.has_rdoc = true
21
+ s.extra_rdoc_files = %w(README LICENSE TODO)
22
+ s.summary = SUMMARY
23
+ s.description = s.summary
24
+ s.author = AUTHOR
25
+ s.email = EMAIL
26
+ s.homepage = HOMEPAGE
27
+ s.require_path = 'lib'
28
+ s.autorequire = GEM
29
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob('{lib,spec}/**/*')
30
+ end
31
+
32
+ task default: :spec
33
+
34
+ desc 'Run specs'
35
+ RSpec::Core::RakeTask.new do |t|
36
+ t.spec_files = FileList['spec/**/*_spec.rb']
37
+ t.spec_opts = %w(-fs --color)
38
+ end
39
+
40
+ Gem::PackageTask.new(spec) do |pkg|
41
+ pkg.gem_spec = spec
42
+ end
43
+
44
+ desc 'install the gem locally'
45
+ task install: [:package] do
46
+ sh %(sudo gem install pkg/#{GEM}-#{GEM_VERSION})
47
+ end
48
+
49
+ desc 'create a gemspec file'
50
+ task :make_spec do
51
+ File.open("#{GEM}.gemspec", 'w') do |file|
52
+ file.puts spec.to_ruby
53
+ end
54
+ end
data/htmldiff.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'sms-htmldiff'
5
+ s.version = '0.0.1.1'
6
+ s.homepage = 'http://github.com/stackmystack/htmldiff'
7
+ s.summary = 'HTML diffs of text (borrowed from a wiki software I no longer remember)'
8
+ s.license = 'MIT'
9
+
10
+ s.authors = ['Nathan Herald']
11
+ s.email = 'nathan@myobie.com'
12
+ s.date = '2008-11-21'
13
+
14
+ s.rdoc_options = ['--main', 'README.md']
15
+ s.extra_rdoc_files = ['README.md', 'LICENSE']
16
+
17
+ s.require_paths = ['lib']
18
+
19
+ # Manifest
20
+ s.files = `git ls-files`.split("\n")
21
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
22
+
23
+ # Dependencies
24
+ s.add_runtime_dependency('nokogiri', '>= 1.6.5')
25
+ end
@@ -0,0 +1,156 @@
1
+ require 'nokogiri'
2
+
3
+ module HTMLDiff
4
+ # Main class for building the diff output between two strings. Other classes
5
+ # find out where the differences actually are, then this class turns that into
6
+ # HTML.
7
+ class DiffBuilder
8
+ attr_reader :content
9
+
10
+ def initialize(old_version, new_version, options = {})
11
+ @options = default_options.merge options
12
+ @old_words = ListOfWords.new old_version, @options
13
+ @new_words = ListOfWords.new new_version, @options
14
+ @content = []
15
+ end
16
+
17
+ def default_options
18
+ {
19
+ block_tag_classes: []
20
+ }
21
+ end
22
+
23
+ def build
24
+ perform_operations
25
+ content.join
26
+ end
27
+
28
+ # These operations are a list of things that changed between the two
29
+ # versions, which now need to be turned into valid HTML that shows things
30
+ # with ins and del tags.
31
+ def operations
32
+ HTMLDiff::MatchFinder.new(@old_words, @new_words).operations
33
+ end
34
+
35
+ def perform_operations
36
+ operations.each { |op| perform_operation(op) }
37
+ end
38
+
39
+ def perform_operation(operation)
40
+ send operation.action, operation
41
+ end
42
+
43
+ # This is for when a chunk of text has been replaced with a different bit.
44
+ # We want to ignore tags that are the same e.g.
45
+ # '<p>' replaced by
46
+ # '<p class="highlight">'
47
+ # will come back from the diff algorithm as a replacement (tags are treated
48
+ # as words in their entirety), but we don't have any use for seeing this
49
+ # represented visually.
50
+ #
51
+ # @param operation [HTMLDiff::Operation]
52
+ def replace(operation)
53
+ # Special case: a tag has been altered so that an attribute has been
54
+ # added e.g. <p> becomes <p style="margin: 2px"> due to an editor button
55
+ # press. For this, we just show the new version, otherwise it gets messy
56
+ # trying to find the closing tag.
57
+ if operation.same_tag?
58
+ equal(operation)
59
+ else
60
+ delete(operation, 'diffmod')
61
+ insert(operation, 'diffmod')
62
+ end
63
+ end
64
+
65
+ # @param operation [HTMLDiff::Operation]
66
+ def insert(operation, tagclass = 'diffins')
67
+ insert_tag('ins', tagclass, operation.new_words)
68
+ end
69
+
70
+ # @param operation [HTMLDiff::Operation]
71
+ def delete(operation, tagclass = 'diffdel')
72
+ insert_tag('del', tagclass, operation.old_words)
73
+ end
74
+
75
+ # No difference between these parts of the text. No tags to insert, simply
76
+ # copy the matching words from one of the versions.
77
+ #
78
+ # @param operation [HTMLDiff::Operation]
79
+ def equal(operation)
80
+ @content << operation.new_text
81
+ end
82
+
83
+ # This method encloses words within a specified tag (ins or del), and adds
84
+ # this into @content, with a twist: if there are words contain tags, it
85
+ # actually creates multiple ins or del, so that they don't include any ins
86
+ # or del tags that are not properly nested. This handles cases like
87
+ # old: '<p>a</p>'
88
+ # new: '<p>ab</p><p>c</p>'
89
+ # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
90
+ # This still doesn't guarantee valid HTML (hint: think about diffing a text
91
+ # containing ins or del tags), but handles correctly more cases than the
92
+ # earlier version.
93
+ #
94
+ # P.S.: Spare a thought for people who write HTML browsers. They live in
95
+ # this... every day.
96
+ def insert_tag(tagname, cssclass, words)
97
+ wrapped = false
98
+
99
+ loop do
100
+ break if words.empty?
101
+
102
+ if words.first.standalone_tag?
103
+ tag_words = words.extract_consecutive_words! do |word|
104
+ word.standalone_tag?
105
+ end
106
+ @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
107
+ elsif words.first.iframe_tag?
108
+ tag_words = words.extract_consecutive_words! { |word| word.iframe_tag? }
109
+ @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
110
+ elsif words.first.block_tag?
111
+ tag_words = words.extract_consecutive_words! { |word| word.block_tag? }
112
+ @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
113
+ elsif words.first.tag?
114
+
115
+ # If this chunk of text contains unclosed tags, then wrapping it will
116
+ # cause weirdness. This would be the case if we have e.g. a style
117
+ # applied to a paragraph tag, which will change the opening tag, but
118
+ # not the closing tag.
119
+ #
120
+ #
121
+
122
+ if !wrapped && !words.contains_unclosed_tag?
123
+ @content << diff_tag_start(tagname, cssclass)
124
+ wrapped = true
125
+ end
126
+ @content += words.extract_consecutive_words! do |word|
127
+ word.tag? && !word.standalone_tag? && !word.iframe_tag?
128
+ end
129
+ else
130
+ non_tags = words.extract_consecutive_words! do |word|
131
+ (word.standalone_tag? || !word.tag?)
132
+ end
133
+ unless non_tags.join.empty?
134
+ @content << wrap_text_in_diff_tag(non_tags.join, tagname, cssclass)
135
+ end
136
+
137
+ break if words.empty?
138
+ end
139
+ end
140
+
141
+ @content << diff_tag_end(tagname) if wrapped
142
+ end
143
+
144
+ def wrap_text_in_diff_tag(text, tagname, cssclass)
145
+ diff_tag_start(tagname, cssclass) + text + diff_tag_end(tagname)
146
+ end
147
+
148
+ def diff_tag_start(tagname, cssclass)
149
+ %(<#{tagname} class="#{cssclass}">)
150
+ end
151
+
152
+ def diff_tag_end(tagname)
153
+ %(</#{tagname}>)
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,182 @@
1
+ module HTMLDiff
2
+ class ListOfWords
3
+
4
+ attr_reader :options
5
+
6
+ include Enumerable
7
+
8
+ def initialize(string, options = {})
9
+ @options = options
10
+ @block_tag_class = options[:block_tag_class]
11
+
12
+ if string.respond_to?(:all?) && string.all? { |i| i.is_a?(Word) }
13
+ @words = string
14
+ else
15
+ convert_html_to_list_of_words string.chars
16
+ end
17
+ end
18
+
19
+ def each(&block)
20
+ @words.each { |word| block.call(word) }
21
+ end
22
+
23
+ def [](index)
24
+ if index.is_a?(Range)
25
+ self.class.new @words[index]
26
+ else
27
+ @words[index]
28
+ end
29
+ end
30
+
31
+ def join(&args)
32
+ @words.join(args)
33
+ end
34
+
35
+ def empty?
36
+ count == 0
37
+ end
38
+
39
+ def extract_consecutive_words!(&condition)
40
+ index_of_first_tag = nil
41
+ @words.each_with_index do |word, i|
42
+ unless condition.call(word)
43
+ index_of_first_tag = i
44
+ break
45
+ end
46
+ end
47
+ if index_of_first_tag
48
+ @words.slice!(0...index_of_first_tag)
49
+ else
50
+ @words.slice!(0..@words.length)
51
+ end
52
+ end
53
+
54
+ def contains_unclosed_tag?
55
+ tags = 0
56
+
57
+ temp_words = @words.dup
58
+
59
+ while temp_words.count > 0
60
+ current_word = temp_words.shift
61
+ if current_word.standalone_tag?
62
+ next
63
+ elsif current_word.opening_tag?
64
+ tags += 1
65
+ elsif current_word.closing_tag?
66
+ tags -= 1
67
+ end
68
+ end
69
+
70
+ tags != 0
71
+ end
72
+
73
+ private
74
+
75
+ def convert_html_to_list_of_words(character_array)
76
+ @mode = :char
77
+ @current_word = Word.new
78
+ @words = []
79
+ @block_tags = 0
80
+
81
+ while character_array.length > 0
82
+ char = character_array.first
83
+
84
+ case @mode
85
+ when :tag
86
+ if end_of_tag? char
87
+ @current_word << '>'
88
+ @words << @current_word
89
+ @current_word = Word.new
90
+ if whitespace? char
91
+ @mode = :whitespace
92
+ else
93
+ @mode = :char
94
+ end
95
+ else
96
+ @current_word << char
97
+ end
98
+ when :block_tag
99
+ if start_of_div_tag? character_array
100
+ @block_tags += 1
101
+ elsif end_of_div_tag? character_array
102
+ @block_tags -= 1
103
+ if @block_tags == 0
104
+ @mode = :tag
105
+ end
106
+ end
107
+ @current_word << char
108
+ when :char
109
+ if start_of_tag? char
110
+ @words << @current_word unless @current_word.empty?
111
+ @current_word = Word.new('<')
112
+
113
+ if starts_with_block_tag character_array
114
+ @mode = :block_tag
115
+ @block_tags = 1
116
+ else
117
+ @mode = :tag
118
+ end
119
+ elsif whitespace? char
120
+ @words << @current_word unless @current_word.empty?
121
+ @current_word = Word.new char
122
+ @mode = :whitespace
123
+ elsif char? char
124
+ @current_word << char
125
+ else
126
+ @words << @current_word unless @current_word.empty?
127
+ @current_word = Word.new char
128
+ end
129
+ when :whitespace
130
+ if start_of_tag? char
131
+ @words << @current_word unless @current_word.empty?
132
+ @current_word = Word.new('<')
133
+ @mode = :tag
134
+ elsif whitespace? char
135
+ @current_word << char
136
+ else
137
+ @words << @current_word unless @current_word.empty?
138
+ @current_word = Word.new char
139
+ @mode = :char
140
+ end
141
+ else
142
+ fail "Unknown mode #{@mode.inspect}"
143
+ end
144
+
145
+ character_array.shift # Remove this character now we are done
146
+ end
147
+ @words << @current_word unless @current_word.empty?
148
+ end
149
+
150
+ def start_of_tag?(char)
151
+ char == '<'
152
+ end
153
+
154
+ def start_of_div_tag?(character_array)
155
+ character_array.join =~ /^<div/
156
+ end
157
+
158
+ def end_of_div_tag?(character_array)
159
+ character_array.join =~ /^<\/div>/
160
+ end
161
+
162
+ def whitespace?(char)
163
+ char =~ /\s/
164
+ end
165
+
166
+ def end_of_tag?(char)
167
+ char == '>'
168
+ end
169
+
170
+ def char?(char)
171
+ char =~ /[\w\#@]+/i
172
+ end
173
+
174
+ def standalone_tag?(item)
175
+ item.downcase =~ /<(img|hr|br)/
176
+ end
177
+
178
+ def starts_with_block_tag(character_array)
179
+ Word.new(character_array.join).block_tag?
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,17 @@
1
+ module HTMLDiff
2
+ Match = Struct.new(:start_in_old, :start_in_new, :size) do
3
+ def end_in_old
4
+ start_in_old + size
5
+ end
6
+
7
+ def end_in_new
8
+ start_in_new + size
9
+ end
10
+ end
11
+
12
+ class Match
13
+ # @!method start_in_old
14
+ # @!method start_in_new
15
+ # @!method size
16
+ end
17
+ end