jacobat-htmldiff 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Nathan Herald
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,9 @@
1
+ HTMLDiff.diff('a word is here', 'a nother word is there')
2
+ # => 'a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>'
3
+
4
+
5
+ HTMLDiff.textdiff('a word is here', 'a nother word is there')
6
+ # => a[++ nother++] word is [--here--][++there++]
7
+
8
+
9
+ Checkout the crappy specs for good examples.
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'spec/rake/spectask'
6
+
7
+ GEM = "jacobat-htmldiff"
8
+ GEM_VERSION = "0.0.2"
9
+ AUTHOR = "Nathan Herald"
10
+ EMAIL = "nathan@myobie.com"
11
+ HOMEPAGE = "http://github.com/myobie/htmldiff"
12
+ SUMMARY = "HTML diffs of text (borrowed from a wiki software I no longer remember)"
13
+
14
+ spec = Gem::Specification.new do |s|
15
+ s.name = GEM
16
+ s.version = GEM_VERSION
17
+ s.platform = Gem::Platform::RUBY
18
+ s.has_rdoc = true
19
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
20
+ s.summary = SUMMARY
21
+ s.description = s.summary
22
+ s.author = AUTHOR
23
+ s.email = EMAIL
24
+ s.homepage = HOMEPAGE
25
+
26
+ # Uncomment this to add a dependency
27
+ # s.add_dependency "foo"
28
+
29
+ s.require_path = 'lib'
30
+ s.autorequire = GEM
31
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,spec}/**/*")
32
+ end
33
+
34
+ task :default => :spec
35
+
36
+ desc "Run specs"
37
+ Spec::Rake::SpecTask.new do |t|
38
+ t.spec_files = FileList['spec/**/*_spec.rb']
39
+ t.spec_opts = %w(-fs --color)
40
+ end
41
+
42
+
43
+ Rake::GemPackageTask.new(spec) do |pkg|
44
+ pkg.gem_spec = spec
45
+ end
46
+
47
+ desc "install the gem locally"
48
+ task :install => [:package] do
49
+ sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
50
+ end
51
+
52
+ desc "create a gemspec file"
53
+ task :make_spec do
54
+ File.open("#{GEM}.gemspec", "w") do |file|
55
+ file.puts spec.to_ruby
56
+ end
57
+ end
data/TODO ADDED
File without changes
@@ -0,0 +1,341 @@
1
+ class HTMLDiff
2
+
3
+ Match = Struct.new(:start_in_old, :start_in_new, :size)
4
+ class Match
5
+ def end_in_old
6
+ self.start_in_old + self.size
7
+ end
8
+
9
+ def end_in_new
10
+ self.start_in_new + self.size
11
+ end
12
+ end
13
+
14
+ Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
15
+
16
+ class DiffBuilder
17
+
18
+ def initialize(old_version, new_version, mode = :html)
19
+ @old_version, @new_version = old_version, new_version
20
+ @content = []
21
+ @mode = mode
22
+ end
23
+
24
+ def build
25
+ split_inputs_to_words
26
+ index_new_words
27
+ operations.each { |op| perform_operation(op) }
28
+ return @content.join
29
+ end
30
+
31
+ def split_inputs_to_words
32
+ @old_words = convert_html_to_list_of_words(explode(@old_version))
33
+ @new_words = convert_html_to_list_of_words(explode(@new_version))
34
+ end
35
+
36
+ def index_new_words
37
+ @word_indices = Hash.new { |h, word| h[word] = [] }
38
+ @new_words.each_with_index { |word, i| @word_indices[word] << i }
39
+ end
40
+
41
+ def operations
42
+ position_in_old = position_in_new = 0
43
+ operations = []
44
+
45
+ matches = matching_blocks
46
+ # an empty match at the end forces the loop below to handle the unmatched tails
47
+ # I'm sure it can be done more gracefully, but not at 23:52
48
+ matches << Match.new(@old_words.length, @new_words.length, 0)
49
+
50
+ matches.each_with_index do |match, i|
51
+ match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
52
+ match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
53
+
54
+ action_upto_match_positions =
55
+ case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
56
+ when [false, false]
57
+ :replace
58
+ when [true, false]
59
+ :insert
60
+ when [false, true]
61
+ :delete
62
+ else
63
+ # this happens if the first few words are same in both versions
64
+ :none
65
+ end
66
+
67
+ if action_upto_match_positions != :none
68
+ operation_upto_match_positions =
69
+ Operation.new(action_upto_match_positions,
70
+ position_in_old, match.start_in_old,
71
+ position_in_new, match.start_in_new)
72
+ operations << operation_upto_match_positions
73
+ end
74
+ if match.size != 0
75
+ match_operation = Operation.new(:equal,
76
+ match.start_in_old, match.end_in_old,
77
+ match.start_in_new, match.end_in_new)
78
+ operations << match_operation
79
+ end
80
+
81
+ position_in_old = match.end_in_old
82
+ position_in_new = match.end_in_new
83
+ end
84
+
85
+ operations
86
+ end
87
+
88
+ def matching_blocks
89
+ matching_blocks = []
90
+ recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
91
+ matching_blocks
92
+ end
93
+
94
+ def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
95
+ match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
96
+ if match
97
+ if start_in_old < match.start_in_old and start_in_new < match.start_in_new
98
+ recursively_find_matching_blocks(
99
+ start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks)
100
+ end
101
+ matching_blocks << match
102
+ if match.end_in_old < end_in_old and match.end_in_new < end_in_new
103
+ recursively_find_matching_blocks(
104
+ match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
105
+ end
106
+ end
107
+ end
108
+
109
+ def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
110
+
111
+ best_match_in_old = start_in_old
112
+ best_match_in_new = start_in_new
113
+ best_match_size = 0
114
+
115
+ match_length_at = Hash.new { |h, index| h[index] = 0 }
116
+
117
+ start_in_old.upto(end_in_old - 1) do |index_in_old|
118
+
119
+ new_match_length_at = Hash.new { |h, index| h[index] = 0 }
120
+
121
+ @word_indices[@old_words[index_in_old]].each do |index_in_new|
122
+ next if index_in_new < start_in_new
123
+ break if index_in_new >= end_in_new
124
+
125
+ new_match_length = match_length_at[index_in_new - 1] + 1
126
+ new_match_length_at[index_in_new] = new_match_length
127
+
128
+ if new_match_length > best_match_size
129
+ best_match_in_old = index_in_old - new_match_length + 1
130
+ best_match_in_new = index_in_new - new_match_length + 1
131
+ best_match_size = new_match_length
132
+ end
133
+ end
134
+ match_length_at = new_match_length_at
135
+ end
136
+
137
+ # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
138
+ # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
139
+ # best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
140
+ # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
141
+
142
+ return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
143
+ end
144
+
145
+ def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
146
+ while match_in_old > start_in_old and
147
+ match_in_new > start_in_new and
148
+ @old_words[match_in_old - 1] == @new_words[match_in_new - 1]
149
+ match_in_old -= 1
150
+ match_in_new -= 1
151
+ match_size += 1
152
+ end
153
+ [match_in_old, match_in_new, match_size]
154
+ end
155
+
156
+ def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
157
+ while match_in_old + match_size < end_in_old and
158
+ match_in_new + match_size < end_in_new and
159
+ @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
160
+ match_size += 1
161
+ end
162
+ [match_in_old, match_in_new, match_size]
163
+ end
164
+
165
+ VALID_METHODS = [:replace, :insert, :delete, :equal]
166
+
167
+ def perform_operation(operation)
168
+ @operation = operation
169
+ self.send operation.action, operation
170
+ end
171
+
172
+ def replace(operation)
173
+ delete(operation, 'diffmod')
174
+ insert(operation, 'diffmod')
175
+ end
176
+
177
+ def insert(operation, tagclass = 'diffins')
178
+ if @mode == :html
179
+ insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new])
180
+ else
181
+ @content << '[++'
182
+ @content << @new_words[operation.start_in_new...operation.end_in_new]
183
+ @content << '++]'
184
+ end
185
+ end
186
+
187
+ def delete(operation, tagclass = 'diffdel')
188
+ if @mode == :html
189
+ insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old])
190
+ else
191
+ @content << '[--'
192
+ @content << @old_words[operation.start_in_old...operation.end_in_old]
193
+ @content << '--]'
194
+ end
195
+ end
196
+
197
+ def equal(operation)
198
+ # no tags to insert, simply copy the matching words from one of the versions
199
+ @content += @new_words[operation.start_in_new...operation.end_in_new]
200
+ end
201
+
202
+ def opening_tag?(item)
203
+ item =~ %r!^\s*<[^>]+>\s*$!
204
+ end
205
+
206
+ def closing_tag?(item)
207
+ item =~ %r!^\s*</[^>]+>\s*$!
208
+ end
209
+
210
+ def tag?(item)
211
+ opening_tag?(item) or closing_tag?(item)
212
+ end
213
+
214
+ def extract_consecutive_words(words, &condition)
215
+ index_of_first_tag = nil
216
+ words.each_with_index do |word, i|
217
+ if !condition.call(word)
218
+ index_of_first_tag = i
219
+ break
220
+ end
221
+ end
222
+ if index_of_first_tag
223
+ return words.slice!(0...index_of_first_tag)
224
+ else
225
+ return words.slice!(0..words.length)
226
+ end
227
+ end
228
+
229
+ # This method encloses words within a specified tag (ins or del), and adds this into @content,
230
+ # with a twist: if there are words contain tags, it actually creates multiple ins or del,
231
+ # so that they don't include any ins or del. This handles cases like
232
+ # old: '<p>a</p>'
233
+ # new: '<p>ab</p><p>c</b>'
234
+ # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
235
+ # this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
236
+ # del tags), but handles correctly more cases than the earlier version.
237
+ #
238
+ # P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
239
+
240
+ def insert_tag(tagname, cssclass, words)
241
+ loop do
242
+ break if words.empty?
243
+ non_tags = extract_consecutive_words(words) { |word| not tag?(word) }
244
+ @content << wrap_text(non_tags.join, tagname, cssclass) unless non_tags.empty?
245
+
246
+ break if words.empty?
247
+ @content += extract_consecutive_words(words) { |word| tag?(word) }
248
+ end
249
+ end
250
+
251
+ def wrap_text(text, tagname, cssclass)
252
+ %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>)
253
+ end
254
+
255
+ def explode(sequence)
256
+ sequence.is_a?(String) ? sequence.split(//) : sequence
257
+ end
258
+
259
+ def end_of_tag?(char)
260
+ char == '>'
261
+ end
262
+
263
+ def start_of_tag?(char)
264
+ char == '<'
265
+ end
266
+
267
+ def whitespace?(char)
268
+ char =~ /\s/
269
+ end
270
+
271
+ def convert_html_to_list_of_words(x, use_brackets = false)
272
+ mode = :char
273
+ current_word = ''
274
+ words = []
275
+
276
+ explode(x).each do |char|
277
+ case mode
278
+ when :tag
279
+ if end_of_tag? char
280
+ current_word << (use_brackets ? ']' : '>')
281
+ words << current_word
282
+ current_word = ''
283
+ if whitespace?(char)
284
+ mode = :whitespace
285
+ else
286
+ mode = :char
287
+ end
288
+ else
289
+ current_word << char
290
+ end
291
+ when :char
292
+ if start_of_tag? char
293
+ words << current_word unless current_word.empty?
294
+ current_word = (use_brackets ? '[' : '<')
295
+ mode = :tag
296
+ elsif /\s/.match char
297
+ words << current_word unless current_word.empty?
298
+ current_word = char
299
+ mode = :whitespace
300
+ else
301
+ current_word << char
302
+ end
303
+ when :whitespace
304
+ if start_of_tag? char
305
+ words << current_word unless current_word.empty?
306
+ current_word = (use_brackets ? '[' : '<')
307
+ mode = :tag
308
+ elsif /\s/.match char
309
+ current_word << char
310
+ else
311
+ words << current_word unless current_word.empty?
312
+ current_word = char
313
+ mode = :char
314
+ end
315
+ else
316
+ raise "Unknown mode #{mode.inspect}"
317
+ end
318
+ end
319
+ words << current_word unless current_word.empty?
320
+ words
321
+ end
322
+
323
+ end # of class Diff Builder
324
+
325
+ def HTMLDiff.diff(a, b)
326
+ HTMLDiff.new.diff(a, b)
327
+ end
328
+
329
+ def HTMLDiff.textdiff(a, b)
330
+ HTMLDiff.new.textdiff(a, b)
331
+ end
332
+
333
+ def diff(a, b)
334
+ DiffBuilder.new(a, b).build
335
+ end
336
+
337
+ def textdiff(a, b)
338
+ DiffBuilder.new(a, b, :text).build
339
+ end
340
+
341
+ end
@@ -0,0 +1,33 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'htmldiff'
3
+
4
+ describe "htmldiff" do
5
+
6
+ it "should diff text" do
7
+
8
+ diff = HTMLDiff.diff('a word is here', 'a nother word is there')
9
+ diff.should == "a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>"
10
+
11
+ end
12
+
13
+ it "should insert a letter and a space" do
14
+ diff = HTMLDiff.diff('a c', 'a b c')
15
+ diff.should == "a <ins class=\"diffins\">b </ins>c"
16
+ end
17
+
18
+ it "should remove a letter and a space" do
19
+ diff = HTMLDiff.diff('a b c', 'a c')
20
+ diff.should == "a <del class=\"diffdel\">b </del>c"
21
+ end
22
+
23
+ it "should change a letter" do
24
+ diff = HTMLDiff.diff('a b c', 'a d c')
25
+ diff.should == "a <del class=\"diffmod\">b</del><ins class=\"diffmod\">d</ins> c"
26
+ end
27
+
28
+ it "should provide a text output format" do
29
+ diff = HTMLDiff.textdiff('a b c', 'a d c')
30
+ diff.should == "a [--b--][++d++] c"
31
+ end
32
+
33
+ end
@@ -0,0 +1,2 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jacobat-htmldiff
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 2
10
+ version: 0.0.2
11
+ platform: ruby
12
+ authors:
13
+ - Nathan Herald
14
+ autorequire: jacobat-htmldiff
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-03 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: HTML diffs of text (borrowed from a wiki software I no longer remember)
23
+ email: nathan@myobie.com
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - README
30
+ - LICENSE
31
+ - TODO
32
+ files:
33
+ - LICENSE
34
+ - README
35
+ - Rakefile
36
+ - TODO
37
+ - lib/htmldiff.rb
38
+ - spec/htmldiff_spec.rb
39
+ - spec/spec_helper.rb
40
+ has_rdoc: true
41
+ homepage: http://github.com/myobie/htmldiff
42
+ licenses: []
43
+
44
+ post_install_message:
45
+ rdoc_options: []
46
+
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ hash: 3
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ hash: 3
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.3.7
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: HTML diffs of text (borrowed from a wiki software I no longer remember)
74
+ test_files: []
75
+