edouard-htmldiff 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Nathan Herald
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,15 @@
1
+ class Stuff
2
+
3
+ class << self
4
+ include HTMLDiff
5
+ end
6
+
7
+ # or extend HTMLDiff ?
8
+
9
+ end
10
+
11
+ Stuff.diff('a word is here', 'a nother word is there')
12
+
13
+ # => 'a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>'
14
+
15
+ Checkout the crappy specs for good examples.
data/Rakefile ADDED
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'spec/rake/spectask'
6
+
7
+ GEM = "htmldiff"
8
+ GEM_VERSION = "0.0.1"
9
+ AUTHOR = "Nathan Herald"
10
+ EMAIL = "nathan@myobie.com"
11
+ HOMEPAGE = "http://github.com/myobie/htmldiff"
12
+ SUMMARY = "HTML diffs of text (borrowed from a wiki software I no longer remember)"
13
+
14
+ spec = Gem::Specification.new do |s|
15
+ s.name = GEM
16
+ s.version = GEM_VERSION
17
+ s.platform = Gem::Platform::RUBY
18
+ s.has_rdoc = true
19
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
20
+ s.summary = SUMMARY
21
+ s.description = s.summary
22
+ s.author = AUTHOR
23
+ s.email = EMAIL
24
+ s.homepage = HOMEPAGE
25
+
26
+ # Uncomment this to add a dependency
27
+ # s.add_dependency "foo"
28
+
29
+ s.require_path = 'lib'
30
+ s.autorequire = GEM
31
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,spec}/**/*")
32
+ end
33
+
34
+ task :default => :spec
35
+
36
+ desc "Run specs"
37
+ Spec::Rake::SpecTask.new do |t|
38
+ t.spec_files = FileList['spec/**/*_spec.rb']
39
+ t.spec_opts = %w(-fs --color)
40
+ end
41
+
42
+
43
+ Rake::GemPackageTask.new(spec) do |pkg|
44
+ pkg.gem_spec = spec
45
+ end
46
+
47
+ desc "install the gem locally"
48
+ task :install => [:package] do
49
+ sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
50
+ end
51
+
52
+ desc "create a gemspec file"
53
+ task :make_spec do
54
+ File.open("#{GEM}.gemspec", "w") do |file|
55
+ file.puts spec.to_ruby
56
+ end
57
+ end
data/TODO ADDED
File without changes
data/lib/htmldiff.rb ADDED
@@ -0,0 +1,340 @@
1
+ module HTMLDiff
2
+
3
+ Match = Struct.new(:start_in_old, :start_in_new, :size)
4
+ class Match
5
+ def end_in_old
6
+ self.start_in_old + self.size
7
+ end
8
+
9
+ def end_in_new
10
+ self.start_in_new + self.size
11
+ end
12
+ end
13
+
14
+ Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
15
+
16
+ class DiffBuilder
17
+
18
+ def initialize(old_version, new_version, dual = false)
19
+ @old_version, @new_version = old_version, new_version
20
+ @dual = dual
21
+ if @dual
22
+ @new_content = []
23
+ @old_content = []
24
+ else
25
+ @content = []
26
+ end
27
+ end
28
+
29
+ def build
30
+ split_inputs_to_words
31
+ index_new_words
32
+ operations.each { |op| perform_operation(op) }
33
+ return @dual ? [ @old_content.join, @new_content.join ] : @content.join
34
+ end
35
+
36
+ def split_inputs_to_words
37
+ @old_words = convert_html_to_list_of_words(explode(@old_version))
38
+ @new_words = convert_html_to_list_of_words(explode(@new_version))
39
+ end
40
+
41
+ def index_new_words
42
+ @word_indices = Hash.new { |h, word| h[word] = [] }
43
+ @new_words.each_with_index { |word, i| @word_indices[word] << i }
44
+ end
45
+
46
+ def operations
47
+ position_in_old = position_in_new = 0
48
+ operations = []
49
+
50
+ matches = matching_blocks
51
+ # an empty match at the end forces the loop below to handle the unmatched tails
52
+ # I'm sure it can be done more gracefully, but not at 23:52
53
+ matches << Match.new(@old_words.length, @new_words.length, 0)
54
+
55
+ matches.each_with_index do |match, i|
56
+ match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
57
+ match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
58
+
59
+ action_upto_match_positions =
60
+ case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
61
+ when [false, false]
62
+ :replace
63
+ when [true, false]
64
+ :insert
65
+ when [false, true]
66
+ :delete
67
+ else
68
+ # this happens if the first few words are same in both versions
69
+ :none
70
+ end
71
+
72
+ if action_upto_match_positions != :none
73
+ operation_upto_match_positions =
74
+ Operation.new(action_upto_match_positions,
75
+ position_in_old, match.start_in_old,
76
+ position_in_new, match.start_in_new)
77
+ operations << operation_upto_match_positions
78
+ end
79
+ if match.size != 0
80
+ match_operation = Operation.new(:equal,
81
+ match.start_in_old, match.end_in_old,
82
+ match.start_in_new, match.end_in_new)
83
+ operations << match_operation
84
+ end
85
+
86
+ position_in_old = match.end_in_old
87
+ position_in_new = match.end_in_new
88
+ end
89
+
90
+ operations
91
+ end
92
+
93
+ def matching_blocks
94
+ matching_blocks = []
95
+ recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
96
+ matching_blocks
97
+ end
98
+
99
+ def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
100
+ match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
101
+ if match
102
+ if start_in_old < match.start_in_old and start_in_new < match.start_in_new
103
+ recursively_find_matching_blocks(
104
+ start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks)
105
+ end
106
+ matching_blocks << match
107
+ if match.end_in_old < end_in_old and match.end_in_new < end_in_new
108
+ recursively_find_matching_blocks(
109
+ match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
110
+ end
111
+ end
112
+ end
113
+
114
+ def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
115
+
116
+ best_match_in_old = start_in_old
117
+ best_match_in_new = start_in_new
118
+ best_match_size = 0
119
+
120
+ match_length_at = Hash.new { |h, index| h[index] = 0 }
121
+
122
+ start_in_old.upto(end_in_old - 1) do |index_in_old|
123
+
124
+ new_match_length_at = Hash.new { |h, index| h[index] = 0 }
125
+
126
+ @word_indices[@old_words[index_in_old]].each do |index_in_new|
127
+ next if index_in_new < start_in_new
128
+ break if index_in_new >= end_in_new
129
+
130
+ new_match_length = match_length_at[index_in_new - 1] + 1
131
+ new_match_length_at[index_in_new] = new_match_length
132
+
133
+ if new_match_length > best_match_size
134
+ best_match_in_old = index_in_old - new_match_length + 1
135
+ best_match_in_new = index_in_new - new_match_length + 1
136
+ best_match_size = new_match_length
137
+ end
138
+ end
139
+ match_length_at = new_match_length_at
140
+ end
141
+
142
+ # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
143
+ # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
144
+ # best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
145
+ # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
146
+
147
+ return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
148
+ end
149
+
150
+ def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
151
+ while match_in_old > start_in_old and
152
+ match_in_new > start_in_new and
153
+ @old_words[match_in_old - 1] == @new_words[match_in_new - 1]
154
+ match_in_old -= 1
155
+ match_in_new -= 1
156
+ match_size += 1
157
+ end
158
+ [match_in_old, match_in_new, match_size]
159
+ end
160
+
161
+ def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
162
+ while match_in_old + match_size < end_in_old and
163
+ match_in_new + match_size < end_in_new and
164
+ @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
165
+ match_size += 1
166
+ end
167
+ [match_in_old, match_in_new, match_size]
168
+ end
169
+
170
+ VALID_METHODS = [:replace, :insert, :delete, :equal]
171
+
172
+ def perform_operation(operation)
173
+ @operation = operation
174
+ self.send operation.action, operation
175
+ end
176
+
177
+ def replace(operation)
178
+ delete(operation, 'diffmod')
179
+ insert(operation, 'diffmod')
180
+ end
181
+
182
+ def insert(operation, tagclass = 'diffins')
183
+ insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new], @dual ? @new_content : @content)
184
+ end
185
+
186
+ def delete(operation, tagclass = 'diffdel')
187
+ insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old], @dual ? @old_content : @content)
188
+ end
189
+
190
+ def equal(operation)
191
+ # no tags to insert, simply copy the matching words from one of the versions
192
+ if @dual
193
+ @old_content += @old_words[operation.start_in_old...operation.end_in_old]
194
+ @new_content += @new_words[operation.start_in_new...operation.end_in_new]
195
+ else
196
+ @content += @new_words[operation.start_in_new...operation.end_in_new]
197
+ end
198
+ end
199
+
200
+ def opening_tag?(item)
201
+ item =~ %r!^\s*<[^>]+>\s*$!
202
+ end
203
+
204
+ def closing_tag?(item)
205
+ item =~ %r!^\s*</[^>]+>\s*$!
206
+ end
207
+
208
+ def tag?(item)
209
+ opening_tag?(item) or closing_tag?(item)
210
+ end
211
+
212
+ def extract_consecutive_words(words, &condition)
213
+ index_of_first_tag = nil
214
+ words.each_with_index do |word, i|
215
+ if !condition.call(word)
216
+ index_of_first_tag = i
217
+ break
218
+ end
219
+ end
220
+ if index_of_first_tag
221
+ return words.slice!(0...index_of_first_tag)
222
+ else
223
+ return words.slice!(0..words.length)
224
+ end
225
+ end
226
+
227
+ def add_special_attribute opening_tag, type
228
+ opening_tag.sub('>', ' difftype="' + type + '">')
229
+ end
230
+
231
+ # This method encloses words within a specified tag (ins or del), and adds this into @content,
232
+ # with a twist: if there are words contain tags, it actually creates multiple ins or del,
233
+ # so that they don't include any ins or del. This handles cases like
234
+ # old: '<p>a</p>'
235
+ # new: '<p>ab</p><p>c</b>'
236
+ # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
237
+ # this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
238
+ # del tags), but handles correctly more cases than the earlier version.
239
+ #
240
+ # P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
241
+
242
+ def insert_tag(tagname, cssclass, words, content)
243
+ loop do
244
+ break if words.empty?
245
+ non_tags = extract_consecutive_words(words) { |word| not tag?(word) }
246
+ content << wrap_text(non_tags.join, tagname, cssclass) unless non_tags.empty?
247
+
248
+ break if words.empty?
249
+ loop do
250
+ opening = opening_tag? words.first
251
+ closing = closing_tag? words.first
252
+ break unless opening || closing
253
+ if opening
254
+ content << add_special_attribute(words.shift, cssclass)
255
+ else
256
+ content << words.shift
257
+ end
258
+ end
259
+ #content.push(*extract_consecutive_words(words) { |word| tag?(word) })
260
+ end
261
+ end
262
+
263
+ def wrap_text(text, tagname, cssclass)
264
+ %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>)
265
+ end
266
+
267
+ def explode(sequence)
268
+ sequence.is_a?(String) ? sequence.split(//) : sequence
269
+ end
270
+
271
+ def end_of_tag?(char)
272
+ char == '>'
273
+ end
274
+
275
+ def start_of_tag?(char)
276
+ char == '<'
277
+ end
278
+
279
+ def whitespace?(char)
280
+ char =~ /\s/
281
+ end
282
+
283
+ def convert_html_to_list_of_words(x, use_brackets = false)
284
+ mode = :char
285
+ current_word = ''
286
+ words = []
287
+
288
+ explode(x).each do |char|
289
+ case mode
290
+ when :tag
291
+ if end_of_tag? char
292
+ current_word << (use_brackets ? ']' : '>')
293
+ words << current_word
294
+ current_word = ''
295
+ if whitespace?(char)
296
+ mode = :whitespace
297
+ else
298
+ mode = :char
299
+ end
300
+ else
301
+ current_word << char
302
+ end
303
+ when :char
304
+ if start_of_tag? char
305
+ words << current_word unless current_word.empty?
306
+ current_word = (use_brackets ? '[' : '<')
307
+ mode = :tag
308
+ elsif /\s/.match char
309
+ words << current_word unless current_word.empty?
310
+ current_word = char
311
+ mode = :whitespace
312
+ else
313
+ current_word << char
314
+ end
315
+ when :whitespace
316
+ if start_of_tag? char
317
+ words << current_word unless current_word.empty?
318
+ current_word = (use_brackets ? '[' : '<')
319
+ mode = :tag
320
+ elsif /\s/.match char
321
+ current_word << char
322
+ else
323
+ words << current_word unless current_word.empty?
324
+ current_word = char
325
+ mode = :char
326
+ end
327
+ else
328
+ raise "Unknown mode #{mode.inspect}"
329
+ end
330
+ end
331
+ words << current_word unless current_word.empty?
332
+ words
333
+ end
334
+
335
+ end # of class Diff Builder
336
+
337
+ def diff(a, b, dual = false)
338
+ DiffBuilder.new(a, b, dual).build
339
+ end
340
+ end
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'htmldiff'
3
+
4
+ class TestDiff
5
+ extend HTMLDiff
6
+ end
7
+
8
+ describe "htmldiff" do
9
+
10
+ it "should diff text" do
11
+
12
+ diff = TestDiff.diff('a word is here', 'a nother word is there')
13
+ diff.should == "a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>"
14
+
15
+ end
16
+
17
+ it "should insert a letter and a space" do
18
+ diff = TestDiff.diff('a c', 'a b c')
19
+ diff.should == "a <ins class=\"diffins\">b </ins>c"
20
+ end
21
+
22
+ it "should remove a letter and a space" do
23
+ diff = TestDiff.diff('a b c', 'a c')
24
+ diff.should == "a <del class=\"diffdel\">b </del>c"
25
+ end
26
+
27
+ it "should change a letter" do
28
+ diff = TestDiff.diff('a b c', 'a d c')
29
+ diff.should == "a <del class=\"diffmod\">b</del><ins class=\"diffmod\">d</ins> c"
30
+ end
31
+
32
+ end
@@ -0,0 +1,2 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: edouard-htmldiff
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 2
9
+ version: 0.0.2
10
+ platform: ruby
11
+ authors:
12
+ - Nathan Herald
13
+ autorequire: htmldiff
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-12-23 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description:
22
+ email: nathan@myobie.com
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files:
28
+ - README
29
+ - LICENSE
30
+ - TODO
31
+ files:
32
+ - LICENSE
33
+ - README
34
+ - Rakefile
35
+ - TODO
36
+ - lib/htmldiff.rb
37
+ - spec/htmldiff_spec.rb
38
+ - spec/spec_helper.rb
39
+ has_rdoc: true
40
+ homepage: http://github.com/edouard/htmldiff
41
+ licenses: []
42
+
43
+ post_install_message:
44
+ rdoc_options: []
45
+
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ segments:
54
+ - 0
55
+ version: "0"
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
63
+ version: "0"
64
+ requirements: []
65
+
66
+ rubyforge_project:
67
+ rubygems_version: 1.3.7
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: HTML diffs of text
71
+ test_files: []
72
+