jacobat-htmldiff 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Nathan Herald
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,9 @@
1
+ HTMLDiff.diff('a word is here', 'a nother word is there')
2
+ # => 'a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>'
3
+
4
+
5
+ HTMLDiff.textdiff('a word is here', 'a nother word is there')
6
+ # => a[++ nother++] word is [--here--][++there++]
7
+
8
+
9
+ Checkout the crappy specs for good examples.
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'spec/rake/spectask'
6
+
7
+ GEM = "jacobat-htmldiff"
8
+ GEM_VERSION = "0.0.2"
9
+ AUTHOR = "Nathan Herald"
10
+ EMAIL = "nathan@myobie.com"
11
+ HOMEPAGE = "http://github.com/myobie/htmldiff"
12
+ SUMMARY = "HTML diffs of text (borrowed from a wiki software I no longer remember)"
13
+
14
+ spec = Gem::Specification.new do |s|
15
+ s.name = GEM
16
+ s.version = GEM_VERSION
17
+ s.platform = Gem::Platform::RUBY
18
+ s.has_rdoc = true
19
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
20
+ s.summary = SUMMARY
21
+ s.description = s.summary
22
+ s.author = AUTHOR
23
+ s.email = EMAIL
24
+ s.homepage = HOMEPAGE
25
+
26
+ # Uncomment this to add a dependency
27
+ # s.add_dependency "foo"
28
+
29
+ s.require_path = 'lib'
30
+ s.autorequire = GEM
31
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,spec}/**/*")
32
+ end
33
+
34
+ task :default => :spec
35
+
36
+ desc "Run specs"
37
+ Spec::Rake::SpecTask.new do |t|
38
+ t.spec_files = FileList['spec/**/*_spec.rb']
39
+ t.spec_opts = %w(-fs --color)
40
+ end
41
+
42
+
43
+ Rake::GemPackageTask.new(spec) do |pkg|
44
+ pkg.gem_spec = spec
45
+ end
46
+
47
+ desc "install the gem locally"
48
+ task :install => [:package] do
49
+ sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
50
+ end
51
+
52
+ desc "create a gemspec file"
53
+ task :make_spec do
54
+ File.open("#{GEM}.gemspec", "w") do |file|
55
+ file.puts spec.to_ruby
56
+ end
57
+ end
data/TODO ADDED
File without changes
@@ -0,0 +1,341 @@
1
+ class HTMLDiff
2
+
3
+ Match = Struct.new(:start_in_old, :start_in_new, :size)
4
+ class Match
5
+ def end_in_old
6
+ self.start_in_old + self.size
7
+ end
8
+
9
+ def end_in_new
10
+ self.start_in_new + self.size
11
+ end
12
+ end
13
+
14
+ Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
15
+
16
+ class DiffBuilder
17
+
18
+ def initialize(old_version, new_version, mode = :html)
19
+ @old_version, @new_version = old_version, new_version
20
+ @content = []
21
+ @mode = mode
22
+ end
23
+
24
+ def build
25
+ split_inputs_to_words
26
+ index_new_words
27
+ operations.each { |op| perform_operation(op) }
28
+ return @content.join
29
+ end
30
+
31
+ def split_inputs_to_words
32
+ @old_words = convert_html_to_list_of_words(explode(@old_version))
33
+ @new_words = convert_html_to_list_of_words(explode(@new_version))
34
+ end
35
+
36
+ def index_new_words
37
+ @word_indices = Hash.new { |h, word| h[word] = [] }
38
+ @new_words.each_with_index { |word, i| @word_indices[word] << i }
39
+ end
40
+
41
+ def operations
42
+ position_in_old = position_in_new = 0
43
+ operations = []
44
+
45
+ matches = matching_blocks
46
+ # an empty match at the end forces the loop below to handle the unmatched tails
47
+ # I'm sure it can be done more gracefully, but not at 23:52
48
+ matches << Match.new(@old_words.length, @new_words.length, 0)
49
+
50
+ matches.each_with_index do |match, i|
51
+ match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
52
+ match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
53
+
54
+ action_upto_match_positions =
55
+ case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
56
+ when [false, false]
57
+ :replace
58
+ when [true, false]
59
+ :insert
60
+ when [false, true]
61
+ :delete
62
+ else
63
+ # this happens if the first few words are same in both versions
64
+ :none
65
+ end
66
+
67
+ if action_upto_match_positions != :none
68
+ operation_upto_match_positions =
69
+ Operation.new(action_upto_match_positions,
70
+ position_in_old, match.start_in_old,
71
+ position_in_new, match.start_in_new)
72
+ operations << operation_upto_match_positions
73
+ end
74
+ if match.size != 0
75
+ match_operation = Operation.new(:equal,
76
+ match.start_in_old, match.end_in_old,
77
+ match.start_in_new, match.end_in_new)
78
+ operations << match_operation
79
+ end
80
+
81
+ position_in_old = match.end_in_old
82
+ position_in_new = match.end_in_new
83
+ end
84
+
85
+ operations
86
+ end
87
+
88
+ def matching_blocks
89
+ matching_blocks = []
90
+ recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
91
+ matching_blocks
92
+ end
93
+
94
+ def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
95
+ match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
96
+ if match
97
+ if start_in_old < match.start_in_old and start_in_new < match.start_in_new
98
+ recursively_find_matching_blocks(
99
+ start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks)
100
+ end
101
+ matching_blocks << match
102
+ if match.end_in_old < end_in_old and match.end_in_new < end_in_new
103
+ recursively_find_matching_blocks(
104
+ match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
105
+ end
106
+ end
107
+ end
108
+
109
+ def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
110
+
111
+ best_match_in_old = start_in_old
112
+ best_match_in_new = start_in_new
113
+ best_match_size = 0
114
+
115
+ match_length_at = Hash.new { |h, index| h[index] = 0 }
116
+
117
+ start_in_old.upto(end_in_old - 1) do |index_in_old|
118
+
119
+ new_match_length_at = Hash.new { |h, index| h[index] = 0 }
120
+
121
+ @word_indices[@old_words[index_in_old]].each do |index_in_new|
122
+ next if index_in_new < start_in_new
123
+ break if index_in_new >= end_in_new
124
+
125
+ new_match_length = match_length_at[index_in_new - 1] + 1
126
+ new_match_length_at[index_in_new] = new_match_length
127
+
128
+ if new_match_length > best_match_size
129
+ best_match_in_old = index_in_old - new_match_length + 1
130
+ best_match_in_new = index_in_new - new_match_length + 1
131
+ best_match_size = new_match_length
132
+ end
133
+ end
134
+ match_length_at = new_match_length_at
135
+ end
136
+
137
+ # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
138
+ # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
139
+ # best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
140
+ # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
141
+
142
+ return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
143
+ end
144
+
145
+ def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
146
+ while match_in_old > start_in_old and
147
+ match_in_new > start_in_new and
148
+ @old_words[match_in_old - 1] == @new_words[match_in_new - 1]
149
+ match_in_old -= 1
150
+ match_in_new -= 1
151
+ match_size += 1
152
+ end
153
+ [match_in_old, match_in_new, match_size]
154
+ end
155
+
156
+ def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
157
+ while match_in_old + match_size < end_in_old and
158
+ match_in_new + match_size < end_in_new and
159
+ @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
160
+ match_size += 1
161
+ end
162
+ [match_in_old, match_in_new, match_size]
163
+ end
164
+
165
+ VALID_METHODS = [:replace, :insert, :delete, :equal]
166
+
167
+ def perform_operation(operation)
168
+ @operation = operation
169
+ self.send operation.action, operation
170
+ end
171
+
172
+ def replace(operation)
173
+ delete(operation, 'diffmod')
174
+ insert(operation, 'diffmod')
175
+ end
176
+
177
+ def insert(operation, tagclass = 'diffins')
178
+ if @mode == :html
179
+ insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new])
180
+ else
181
+ @content << '[++'
182
+ @content << @new_words[operation.start_in_new...operation.end_in_new]
183
+ @content << '++]'
184
+ end
185
+ end
186
+
187
+ def delete(operation, tagclass = 'diffdel')
188
+ if @mode == :html
189
+ insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old])
190
+ else
191
+ @content << '[--'
192
+ @content << @old_words[operation.start_in_old...operation.end_in_old]
193
+ @content << '--]'
194
+ end
195
+ end
196
+
197
+ def equal(operation)
198
+ # no tags to insert, simply copy the matching words from one of the versions
199
+ @content += @new_words[operation.start_in_new...operation.end_in_new]
200
+ end
201
+
202
+ def opening_tag?(item)
203
+ item =~ %r!^\s*<[^>]+>\s*$!
204
+ end
205
+
206
+ def closing_tag?(item)
207
+ item =~ %r!^\s*</[^>]+>\s*$!
208
+ end
209
+
210
+ def tag?(item)
211
+ opening_tag?(item) or closing_tag?(item)
212
+ end
213
+
214
+ def extract_consecutive_words(words, &condition)
215
+ index_of_first_tag = nil
216
+ words.each_with_index do |word, i|
217
+ if !condition.call(word)
218
+ index_of_first_tag = i
219
+ break
220
+ end
221
+ end
222
+ if index_of_first_tag
223
+ return words.slice!(0...index_of_first_tag)
224
+ else
225
+ return words.slice!(0..words.length)
226
+ end
227
+ end
228
+
229
+ # This method encloses words within a specified tag (ins or del), and adds this into @content,
230
+ # with a twist: if there are words contain tags, it actually creates multiple ins or del,
231
+ # so that they don't include any ins or del. This handles cases like
232
+ # old: '<p>a</p>'
233
+ # new: '<p>ab</p><p>c</b>'
234
+ # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
235
+ # this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
236
+ # del tags), but handles correctly more cases than the earlier version.
237
+ #
238
+ # P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
239
+
240
+ def insert_tag(tagname, cssclass, words)
241
+ loop do
242
+ break if words.empty?
243
+ non_tags = extract_consecutive_words(words) { |word| not tag?(word) }
244
+ @content << wrap_text(non_tags.join, tagname, cssclass) unless non_tags.empty?
245
+
246
+ break if words.empty?
247
+ @content += extract_consecutive_words(words) { |word| tag?(word) }
248
+ end
249
+ end
250
+
251
+ def wrap_text(text, tagname, cssclass)
252
+ %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>)
253
+ end
254
+
255
+ def explode(sequence)
256
+ sequence.is_a?(String) ? sequence.split(//) : sequence
257
+ end
258
+
259
+ def end_of_tag?(char)
260
+ char == '>'
261
+ end
262
+
263
+ def start_of_tag?(char)
264
+ char == '<'
265
+ end
266
+
267
+ def whitespace?(char)
268
+ char =~ /\s/
269
+ end
270
+
271
+ def convert_html_to_list_of_words(x, use_brackets = false)
272
+ mode = :char
273
+ current_word = ''
274
+ words = []
275
+
276
+ explode(x).each do |char|
277
+ case mode
278
+ when :tag
279
+ if end_of_tag? char
280
+ current_word << (use_brackets ? ']' : '>')
281
+ words << current_word
282
+ current_word = ''
283
+ if whitespace?(char)
284
+ mode = :whitespace
285
+ else
286
+ mode = :char
287
+ end
288
+ else
289
+ current_word << char
290
+ end
291
+ when :char
292
+ if start_of_tag? char
293
+ words << current_word unless current_word.empty?
294
+ current_word = (use_brackets ? '[' : '<')
295
+ mode = :tag
296
+ elsif /\s/.match char
297
+ words << current_word unless current_word.empty?
298
+ current_word = char
299
+ mode = :whitespace
300
+ else
301
+ current_word << char
302
+ end
303
+ when :whitespace
304
+ if start_of_tag? char
305
+ words << current_word unless current_word.empty?
306
+ current_word = (use_brackets ? '[' : '<')
307
+ mode = :tag
308
+ elsif /\s/.match char
309
+ current_word << char
310
+ else
311
+ words << current_word unless current_word.empty?
312
+ current_word = char
313
+ mode = :char
314
+ end
315
+ else
316
+ raise "Unknown mode #{mode.inspect}"
317
+ end
318
+ end
319
+ words << current_word unless current_word.empty?
320
+ words
321
+ end
322
+
323
+ end # of class Diff Builder
324
+
325
+ def HTMLDiff.diff(a, b)
326
+ HTMLDiff.new.diff(a, b)
327
+ end
328
+
329
+ def HTMLDiff.textdiff(a, b)
330
+ HTMLDiff.new.textdiff(a, b)
331
+ end
332
+
333
+ def diff(a, b)
334
+ DiffBuilder.new(a, b).build
335
+ end
336
+
337
+ def textdiff(a, b)
338
+ DiffBuilder.new(a, b, :text).build
339
+ end
340
+
341
+ end
@@ -0,0 +1,33 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'htmldiff'
3
+
4
+ describe "htmldiff" do
5
+
6
+ it "should diff text" do
7
+
8
+ diff = HTMLDiff.diff('a word is here', 'a nother word is there')
9
+ diff.should == "a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>"
10
+
11
+ end
12
+
13
+ it "should insert a letter and a space" do
14
+ diff = HTMLDiff.diff('a c', 'a b c')
15
+ diff.should == "a <ins class=\"diffins\">b </ins>c"
16
+ end
17
+
18
+ it "should remove a letter and a space" do
19
+ diff = HTMLDiff.diff('a b c', 'a c')
20
+ diff.should == "a <del class=\"diffdel\">b </del>c"
21
+ end
22
+
23
+ it "should change a letter" do
24
+ diff = HTMLDiff.diff('a b c', 'a d c')
25
+ diff.should == "a <del class=\"diffmod\">b</del><ins class=\"diffmod\">d</ins> c"
26
+ end
27
+
28
+ it "should provide a text output format" do
29
+ diff = HTMLDiff.textdiff('a b c', 'a d c')
30
+ diff.should == "a [--b--][++d++] c"
31
+ end
32
+
33
+ end
@@ -0,0 +1,2 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jacobat-htmldiff
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 2
10
+ version: 0.0.2
11
+ platform: ruby
12
+ authors:
13
+ - Nathan Herald
14
+ autorequire: jacobat-htmldiff
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-03 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: HTML diffs of text (borrowed from a wiki software I no longer remember)
23
+ email: nathan@myobie.com
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - README
30
+ - LICENSE
31
+ - TODO
32
+ files:
33
+ - LICENSE
34
+ - README
35
+ - Rakefile
36
+ - TODO
37
+ - lib/htmldiff.rb
38
+ - spec/htmldiff_spec.rb
39
+ - spec/spec_helper.rb
40
+ has_rdoc: true
41
+ homepage: http://github.com/myobie/htmldiff
42
+ licenses: []
43
+
44
+ post_install_message:
45
+ rdoc_options: []
46
+
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ hash: 3
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ hash: 3
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.3.7
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: HTML diffs of text (borrowed from a wiki software I no longer remember)
74
+ test_files: []
75
+