hyp_diff 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ gem "pry"
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Kristian Hanekamp
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # HypDiff
2
+
3
+ HypDiff compares HTML snippets. It generates a diff between two input snippets. The diff is a new HTML snippet that highlights textual changes. The tag structure and formatting of the input snippets is preserved. The generated diff snippet is valid, well-formed HTML and suitable for presentation inside a WYSIWYG environment.
4
+
5
+ ## Usage
6
+
7
+ HypDiff.compare("<p>byebye world</p>", "<p>hello world</p>")
8
+ # '<p><del>byebye</del><ins>hello</ins> world</p> '
9
+
10
+ For more examples, take a look at the [specs](spec/hyp_diff_spec.rb).
11
+
12
+ ## Why another diff tool?
13
+
14
+ Many existing tools simply create a diff of the html source code. Unfortunately a diff of the source code can only be viewed as source code. It cannot be viewed in a browser. While that is fine for developers, it is not suitable for an audience that prefers not to be exposed to HTML source code, for example users of WYSIWYG editors.
15
+
16
+ There are other tools that try to generate an HTML diff that is can be rendered by a browser. But many tools simply try to "work around" the HTML document structure by use of regular expressions. This simplistic approach only works for a small subset of HTML snippets. These tools often output incomprehensible diffs or even invalid HTML.
17
+
18
+ ## How is HypDiff different?
19
+
20
+ HypDiff takes two HTML snippets and generates a comparison that is again a valid HTML snippet that can be viewed inside a browser.
21
+
22
+ HypDiff does not rely on regular expressions, but actually parses the input snippets using Nokogiri. It extracts the textual content of the documents and compares them with a state-of-the-art diff algorithm provided by the diff-lcs gem. It then inserts `<ins>` and `<del>` tags into the HTML snippet to highlight changes, but leaves all other HTML tags intact.
23
+
24
+ ## Limitations
25
+
26
+ HypDiff does not perform a comparison of the html source code or the DOM tree, but compares changes to visible text. It does not care about changes that do not involve visible text.
27
+
28
+ ## Installation
29
+
30
+ Add this line to your application's Gemfile:
31
+
32
+ gem 'hyp_diff'
33
+
34
+ And then execute:
35
+
36
+ $ bundle
37
+
38
+ Or install it yourself as:
39
+
40
+ $ gem install hyp_diff
41
+
42
+ ## Contributing
43
+
44
+ 1. Fork it ( http://github.com/krishan/hyp_diff/fork )
45
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
46
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
47
+ 4. Push to the branch (`git push origin my-new-feature`)
48
+ 5. Create new Pull Request
49
+
50
+ Copyright 2014 Kristian Hanekamp
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/hyp_diff.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'hyp_diff/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "hyp_diff"
8
+ spec.version = HypDiff::VERSION
9
+ spec.authors = ["Kristian Hanekamp"]
10
+ spec.email = ["kris.hanekamp@gmail.com"]
11
+ spec.summary = %q{HypDiff compares html snippets}
12
+ spec.description = %q{
13
+ HypDiff compares HTML snippets. It generates a diff between two input snippets. The diff is a new HTML snippet that highlights textual changes. The tag structure and formatting of the input snippets is preserved. The generated diff snippet is valid, well-formed HTML and suitable for presentation inside a WYSIWYG environment.
14
+ }
15
+ spec.homepage = "https://github.com/krishan/hyp_diff"
16
+ spec.license = "MIT"
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_dependency "nokogiri", "~> 1.5.6"
24
+ spec.add_dependency "diff-lcs", "~> 1.2.5"
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.5"
27
+ spec.add_development_dependency "rspec", "~> 2.14.1"
28
+ spec.add_development_dependency "rake", "~> 10.1"
29
+ end
data/lib/hyp_diff.rb ADDED
@@ -0,0 +1,163 @@
1
+ require "nokogiri"
2
+ require "diff-lcs"
3
+
4
+ require "text_from_node"
5
+
6
+ module HypDiff; class << self
7
+
8
+ def compare(before, after)
9
+ parsed_after = parse(after)
10
+ parsed_before = parse(before)
11
+
12
+ text_changes = Diff::LCS.sdiff(extract_text(parsed_before), extract_text(parsed_after))
13
+
14
+ NodeMap.for(text_changes).each do |node, changes|
15
+ node.replace(ChangeRenderer.render(changes))
16
+ end
17
+
18
+ parsed_after.to_html
19
+ end
20
+
21
+ private
22
+
23
+ class NodeMap
24
+ def self.for(changes)
25
+ new.build(changes).map
26
+ end
27
+
28
+ attr_reader :map
29
+
30
+ def initialize
31
+ @map = {}
32
+ @stashed = []
33
+ end
34
+
35
+ def build(changes)
36
+ changes.each do |change|
37
+ if change.new_element
38
+ node = change.new_element.node
39
+
40
+ if @stashed.length > 0
41
+ @stashed.each do |stashed_change|
42
+ append_to_node(node, stashed_change)
43
+ end
44
+ @stashed = []
45
+ end
46
+
47
+ append_to_node(node, change)
48
+
49
+ @last_processed_node = node
50
+ else
51
+ if @last_processed_node
52
+ append_to_node(@last_processed_node, change)
53
+ else
54
+ @stashed << change
55
+ end
56
+ end
57
+ end
58
+
59
+ self
60
+ end
61
+
62
+ def append_to_node(node, change)
63
+ list = (@map[node] ||= [])
64
+ list << change
65
+ end
66
+ end
67
+
68
+ class ChangeRenderer
69
+ def self.render(changes)
70
+ renderer = new.render(changes).rendered_text
71
+ end
72
+
73
+ def initialize
74
+ @new_text = []
75
+ end
76
+
77
+ def render(changes)
78
+ @insertions = []
79
+ @deletions = []
80
+
81
+ changes.each do |change|
82
+ case change.action
83
+ when "!" then
84
+ deletions << change.old_element.text
85
+ insertions << change.new_element.text
86
+ when "=" then
87
+ apply_insertions_and_deletions
88
+ new_text << change.new_element.text
89
+ when "+" then
90
+ insertions << change.new_element.text
91
+ when "-" then
92
+ deletions << change.old_element.text
93
+ else
94
+ raise "unexpected change.action #{change.action}"
95
+ end
96
+ end
97
+
98
+ apply_insertions_and_deletions
99
+
100
+ self
101
+ end
102
+
103
+ def rendered_text
104
+ new_text.join
105
+ end
106
+
107
+ private
108
+
109
+ attr_reader :insertions, :deletions, :new_text
110
+
111
+ def apply_insertions_and_deletions
112
+ if deletions.length > 0
113
+ @new_text << deletion_tag(deletions.join)
114
+ end
115
+ if insertions.length > 0
116
+ @new_text << insertion_tag(insertions.join)
117
+ end
118
+
119
+ @insertions = []
120
+ @deletions = []
121
+ end
122
+
123
+ def insertion_tag(text)
124
+ "<ins>#{text}</ins>"
125
+ end
126
+
127
+ def deletion_tag(text)
128
+ "<del>#{text}</del>"
129
+ end
130
+
131
+ end
132
+
133
+ def parse(text)
134
+ Nokogiri::HTML.fragment(text)
135
+ end
136
+
137
+ def extract_text(node)
138
+ filter_whitespace(text_fragments(node))
139
+ end
140
+
141
+ def text_fragments(node)
142
+ if node.is_a?(Nokogiri::XML::Text)
143
+ node.text.split(/(?=[.!, ])|\b/).map { |token| TextFromNode.new(token, node) }
144
+ else
145
+ node.children.map { |c| text_fragments(c) }.flatten
146
+ end
147
+ end
148
+
149
+ def filter_whitespace(node_list)
150
+ result = []
151
+ last_node_whitespace = false
152
+ node_list.each do |node|
153
+ node_whitespace = node.whitespace?
154
+ result << node unless last_node_whitespace && node_whitespace
155
+
156
+ last_node_whitespace = node_whitespace
157
+ end
158
+
159
+ result
160
+ end
161
+
162
+ end; end
163
+
@@ -0,0 +1,3 @@
1
+ module HypDiff
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,31 @@
1
+ class TextFromNode
2
+ def initialize(raw_text, node)
3
+ @text = raw_text.strip == "" ? " " : raw_text
4
+ @node = node
5
+ end
6
+
7
+ def ==(other)
8
+ text == other.text
9
+ end
10
+
11
+ def eql?(other)
12
+ text == other.text
13
+ end
14
+
15
+ def hash
16
+ text.hash
17
+ end
18
+
19
+ def whitespace?
20
+ @text == " "
21
+ end
22
+
23
+ def text
24
+ @text
25
+ end
26
+
27
+ def node
28
+ @node
29
+ end
30
+ end
31
+
@@ -0,0 +1,141 @@
1
+ require "hyp_diff"
2
+
3
+ describe HypDiff do
4
+
5
+ def expect_diff(old, new, expected)
6
+ HypDiff.compare(old, new).should == expected
7
+ end
8
+
9
+ it "diffs two texts, applying tags to indicate changes" do
10
+ expect_diff("byebye", "hello", '<del>byebye</del><ins>hello</ins>')
11
+ end
12
+
13
+ it "extracts text to diff from input markup, reapplying the (after-)markup to the diff" do
14
+ expect_diff(
15
+ "<b>byebye</b> world",
16
+ "<i>hello</i> world",
17
+ '<i><del>byebye</del><ins>hello</ins></i> world'
18
+ )
19
+ end
20
+
21
+ it "diffs word-by-word" do
22
+ expect_diff("byebye world", "hello world", '<del>byebye</del><ins>hello</ins> world')
23
+ end
24
+
25
+ it "handles pure additions" do
26
+ expect_diff("hello ", "hello world", 'hello <ins>world</ins>')
27
+ end
28
+
29
+ it "handles pure deletions" do
30
+ expect_diff("hello world", "hello ", 'hello <del>world</del>')
31
+ end
32
+
33
+ it "handles pure deletions at the beginning" do
34
+ expect_diff("hello world", " world", '<del>hello</del> world')
35
+ end
36
+
37
+ it "handles several pure deletions at the beginning" do
38
+ expect_diff("hello beautiful world", "world", '<del>hello beautiful </del>world')
39
+ end
40
+
41
+ it "merges consecutive additions into a single tag" do
42
+ expect_diff(
43
+ "hello world",
44
+ "why hello beautiful world",
45
+ "<ins>why </ins>hello<ins> beautiful</ins> world"
46
+ )
47
+ end
48
+
49
+ it "merges consecutive deletions into a single tag" do
50
+ expect_diff("hello beautiful world", "hello world", "hello <del>beautiful </del>world")
51
+ end
52
+
53
+ it "merge consecutive additions and edits into single tags" do
54
+ expect_diff(
55
+ "hello world",
56
+ "hello my beautiful",
57
+ "hello <del>world</del><ins>my beautiful</ins>"
58
+ )
59
+ end
60
+
61
+ it "merge consecutive deletions and edits into single tags" do
62
+ expect_diff(
63
+ "hello my beautiful",
64
+ "hello world",
65
+ "hello <del>my beautiful</del><ins>world</ins>"
66
+ )
67
+ end
68
+
69
+ describe "handling whitespace" do
70
+ it "treats consecutive whitespace as a single whitespace" do
71
+ expect_diff("hello world", "hello world", "hello world")
72
+ end
73
+
74
+ it "treats consecutive whitespace as a single whitespace across tags" do
75
+ expect_diff(
76
+ "<span>hello </span> <span> world</span>",
77
+ "hello world",
78
+ "hello world"
79
+ )
80
+ expect_diff(
81
+ "<span>hello </span>world",
82
+ "hello<span> world</span>",
83
+ "hello<span> world</span>"
84
+ )
85
+ end
86
+
87
+ it "considers trailing and leading whitespace for insertions and deletions" do
88
+ expect_diff("hello", "hello world", "hello<ins> world</ins>")
89
+ expect_diff("hello world", "hello", "hello<del> world</del>")
90
+ expect_diff("world", "hello world", "<ins>hello </ins>world")
91
+ expect_diff("hello world", "world", "<del>hello </del>world")
92
+ expect_diff(" world", "hello world", "<ins>hello</ins> world")
93
+ expect_diff("hello world", " world", "<del>hello</del> world")
94
+ expect_diff("hello ", "hello world", "hello <ins>world</ins>")
95
+ expect_diff("hello world", "hello ", "hello <del>world</del>")
96
+ end
97
+
98
+ it "considers trailing and leading whitespace changes" do
99
+ expect_diff("hello ", "hello", "hello<del> </del>")
100
+ expect_diff("hello", "hello ", "hello<ins> </ins>")
101
+ expect_diff(" hello", "hello", "<del> </del>hello")
102
+ expect_diff("hello", " hello", "<ins> </ins>hello")
103
+ end
104
+
105
+ it "considers changes of text and whitespace" do
106
+ expect_diff("hello world ", "hello friend", "hello <del>world </del><ins>friend</ins>")
107
+ expect_diff(" bye world", "hello world", "<del> bye</del><ins>hello</ins> world")
108
+ expect_diff("hello friend", "hello world ", "hello <del>friend</del><ins>world </ins>")
109
+ expect_diff("hello world", " bye world", "<del>hello</del><ins> bye</ins> world")
110
+ end
111
+ end
112
+
113
+ it "diffs punctuation signs as single tokens when followed by whitespace" do
114
+ expect_diff("hello world", "hello, world", "hello<ins>,</ins> world")
115
+ end
116
+
117
+ it "diffs changes of punctuation to words" do
118
+ expect_diff(
119
+ "hello, world",
120
+ "hello beautiful world",
121
+ "hello<del>,</del><ins> beautiful</ins> world"
122
+ )
123
+ expect_diff(
124
+ "hello beautiful world",
125
+ "hello, world",
126
+ "hello<del> beautiful</del><ins>,</ins> world"
127
+ )
128
+ end
129
+
130
+ it "diffs changes of punctuation to leading and trailing spaces" do
131
+ expect_diff("hello.", "hello ", "hello<del>.</del><ins> </ins>")
132
+ expect_diff("hello ", "hello.", "hello<del> </del><ins>.</ins>")
133
+ expect_diff(" hello", ".hello", "<del> </del><ins>.</ins>hello")
134
+ expect_diff(".hello", " hello", "<del>.</del><ins> </ins>hello")
135
+ end
136
+
137
+ it "diffs punctuation signs as single tokens when at end of string" do
138
+ expect_diff("hello world", "hello world.", "hello world<ins>.</ins>")
139
+ end
140
+
141
+ end
@@ -0,0 +1 @@
1
+ $LOAD_PATH.unshift(File.expand_path("../lib", File.dirname(__FILE__)))
@@ -0,0 +1,26 @@
1
+ require "text_from_node"
2
+
3
+ describe TextFromNode do
4
+
5
+ let(:node) { double }
6
+ let(:other_node) { double }
7
+
8
+ let(:subject) { TextFromNode.new("spam", node) }
9
+ let(:same_text_other_node) { TextFromNode.new("spam", other_node) }
10
+ let(:other_text_same_node) { TextFromNode.new("eggs", node) }
11
+
12
+ it "equals other instance when text is equal" do
13
+ subject.should == same_text_other_node
14
+ subject.should be_eql(same_text_other_node)
15
+
16
+ subject.should_not == other_text_same_node
17
+ subject.should_not be_eql(other_text_same_node)
18
+ end
19
+
20
+ it "provides a sane hash implementation" do
21
+ subject.hash.should == same_text_other_node.hash
22
+ subject.hash.should_not == other_text_same_node.hash
23
+ end
24
+
25
+ end
26
+
metadata ADDED
@@ -0,0 +1,151 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hyp_diff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Kristian Hanekamp
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-03-04 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.5.6
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 1.5.6
30
+ - !ruby/object:Gem::Dependency
31
+ name: diff-lcs
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.2.5
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.2.5
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '1.5'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.5'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 2.14.1
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 2.14.1
78
+ - !ruby/object:Gem::Dependency
79
+ name: rake
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: '10.1'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: '10.1'
94
+ description: ! "\nHypDiff compares HTML snippets. It generates a diff between two
95
+ input snippets. The diff is a new HTML snippet that highlights textual changes.
96
+ The tag structure and formatting of the input snippets is preserved. The generated
97
+ diff snippet is valid, well-formed HTML and suitable for presentation inside a WYSIWYG
98
+ environment.\n "
99
+ email:
100
+ - kris.hanekamp@gmail.com
101
+ executables: []
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - .gitignore
106
+ - Gemfile
107
+ - LICENSE.txt
108
+ - README.md
109
+ - Rakefile
110
+ - hyp_diff.gemspec
111
+ - lib/hyp_diff.rb
112
+ - lib/hyp_diff/version.rb
113
+ - lib/text_from_node.rb
114
+ - spec/hyp_diff_spec.rb
115
+ - spec/spec_helper.rb
116
+ - spec/text_from_node_spec.rb
117
+ homepage: https://github.com/krishan/hyp_diff
118
+ licenses:
119
+ - MIT
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ none: false
126
+ requirements:
127
+ - - ! '>='
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ segments:
131
+ - 0
132
+ hash: 4061769782701953502
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ none: false
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ segments:
140
+ - 0
141
+ hash: 4061769782701953502
142
+ requirements: []
143
+ rubyforge_project:
144
+ rubygems_version: 1.8.23
145
+ signing_key:
146
+ specification_version: 3
147
+ summary: HypDiff compares html snippets
148
+ test_files:
149
+ - spec/hyp_diff_spec.rb
150
+ - spec/spec_helper.rb
151
+ - spec/text_from_node_spec.rb