htmldiff 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Nathan Herald
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,15 @@
1
+ class Stuff
2
+
3
+ class << self
4
+ include HTMLDiff
5
+ end
6
+
7
+ # or extend HTMLDiff ?
8
+
9
+ end
10
+
11
+ Stuff.diff('a word is here', 'a nother word is there')
12
+
13
+ # => 'a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>'
14
+
15
+ Checkout the crappy specs for good examples.
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'spec/rake/spectask'
6
+
7
+ GEM = "htmldiff"
8
+ GEM_VERSION = "0.0.1"
9
+ AUTHOR = "Nathan Herald"
10
+ EMAIL = "nathan@myobie.com"
11
+ HOMEPAGE = "http://github.com/myobie/htmldiff"
12
+ SUMMARY = "HTML diffs of text (borrowed from a wiki software I no longer remember)"
13
+
14
+ spec = Gem::Specification.new do |s|
15
+ s.name = GEM
16
+ s.version = GEM_VERSION
17
+ s.platform = Gem::Platform::RUBY
18
+ s.has_rdoc = true
19
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
20
+ s.summary = SUMMARY
21
+ s.description = s.summary
22
+ s.author = AUTHOR
23
+ s.email = EMAIL
24
+ s.homepage = HOMEPAGE
25
+
26
+ # Uncomment this to add a dependency
27
+ # s.add_dependency "foo"
28
+
29
+ s.require_path = 'lib'
30
+ s.autorequire = GEM
31
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,spec}/**/*")
32
+ end
33
+
34
+ task :default => :spec
35
+
36
+ desc "Run specs"
37
+ Spec::Rake::SpecTask.new do |t|
38
+ t.spec_files = FileList['spec/**/*_spec.rb']
39
+ t.spec_opts = %w(-fs --color)
40
+ end
41
+
42
+
43
+ Rake::GemPackageTask.new(spec) do |pkg|
44
+ pkg.gem_spec = spec
45
+ end
46
+
47
+ desc "install the gem locally"
48
+ task :install => [:package] do
49
+ sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
50
+ end
51
+
52
+ desc "create a gemspec file"
53
+ task :make_spec do
54
+ File.open("#{GEM}.gemspec", "w") do |file|
55
+ file.puts spec.to_ruby
56
+ end
57
+ end
data/TODO ADDED
File without changes
@@ -0,0 +1,316 @@
1
+ module HTMLDiff
2
+
3
+ Match = Struct.new(:start_in_old, :start_in_new, :size)
4
+ class Match
5
+ def end_in_old
6
+ self.start_in_old + self.size
7
+ end
8
+
9
+ def end_in_new
10
+ self.start_in_new + self.size
11
+ end
12
+ end
13
+
14
+ Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
15
+
16
+ class DiffBuilder
17
+
18
+ def initialize(old_version, new_version)
19
+ @old_version, @new_version = old_version, new_version
20
+ @content = []
21
+ end
22
+
23
+ def build
24
+ split_inputs_to_words
25
+ index_new_words
26
+ operations.each { |op| perform_operation(op) }
27
+ return @content.join
28
+ end
29
+
30
+ def split_inputs_to_words
31
+ @old_words = convert_html_to_list_of_words(explode(@old_version))
32
+ @new_words = convert_html_to_list_of_words(explode(@new_version))
33
+ end
34
+
35
+ def index_new_words
36
+ @word_indices = Hash.new { |h, word| h[word] = [] }
37
+ @new_words.each_with_index { |word, i| @word_indices[word] << i }
38
+ end
39
+
40
+ def operations
41
+ position_in_old = position_in_new = 0
42
+ operations = []
43
+
44
+ matches = matching_blocks
45
+ # an empty match at the end forces the loop below to handle the unmatched tails
46
+ # I'm sure it can be done more gracefully, but not at 23:52
47
+ matches << Match.new(@old_words.length, @new_words.length, 0)
48
+
49
+ matches.each_with_index do |match, i|
50
+ match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
51
+ match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
52
+
53
+ action_upto_match_positions =
54
+ case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
55
+ when [false, false]
56
+ :replace
57
+ when [true, false]
58
+ :insert
59
+ when [false, true]
60
+ :delete
61
+ else
62
+ # this happens if the first few words are same in both versions
63
+ :none
64
+ end
65
+
66
+ if action_upto_match_positions != :none
67
+ operation_upto_match_positions =
68
+ Operation.new(action_upto_match_positions,
69
+ position_in_old, match.start_in_old,
70
+ position_in_new, match.start_in_new)
71
+ operations << operation_upto_match_positions
72
+ end
73
+ if match.size != 0
74
+ match_operation = Operation.new(:equal,
75
+ match.start_in_old, match.end_in_old,
76
+ match.start_in_new, match.end_in_new)
77
+ operations << match_operation
78
+ end
79
+
80
+ position_in_old = match.end_in_old
81
+ position_in_new = match.end_in_new
82
+ end
83
+
84
+ operations
85
+ end
86
+
87
+ def matching_blocks
88
+ matching_blocks = []
89
+ recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
90
+ matching_blocks
91
+ end
92
+
93
+ def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
94
+ match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
95
+ if match
96
+ if start_in_old < match.start_in_old and start_in_new < match.start_in_new
97
+ recursively_find_matching_blocks(
98
+ start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks)
99
+ end
100
+ matching_blocks << match
101
+ if match.end_in_old < end_in_old and match.end_in_new < end_in_new
102
+ recursively_find_matching_blocks(
103
+ match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
104
+ end
105
+ end
106
+ end
107
+
108
+ def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
109
+
110
+ best_match_in_old = start_in_old
111
+ best_match_in_new = start_in_new
112
+ best_match_size = 0
113
+
114
+ match_length_at = Hash.new { |h, index| h[index] = 0 }
115
+
116
+ start_in_old.upto(end_in_old - 1) do |index_in_old|
117
+
118
+ new_match_length_at = Hash.new { |h, index| h[index] = 0 }
119
+
120
+ @word_indices[@old_words[index_in_old]].each do |index_in_new|
121
+ next if index_in_new < start_in_new
122
+ break if index_in_new >= end_in_new
123
+
124
+ new_match_length = match_length_at[index_in_new - 1] + 1
125
+ new_match_length_at[index_in_new] = new_match_length
126
+
127
+ if new_match_length > best_match_size
128
+ best_match_in_old = index_in_old - new_match_length + 1
129
+ best_match_in_new = index_in_new - new_match_length + 1
130
+ best_match_size = new_match_length
131
+ end
132
+ end
133
+ match_length_at = new_match_length_at
134
+ end
135
+
136
+ # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
137
+ # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
138
+ # best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
139
+ # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
140
+
141
+ return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
142
+ end
143
+
144
+ def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
145
+ while match_in_old > start_in_old and
146
+ match_in_new > start_in_new and
147
+ @old_words[match_in_old - 1] == @new_words[match_in_new - 1]
148
+ match_in_old -= 1
149
+ match_in_new -= 1
150
+ match_size += 1
151
+ end
152
+ [match_in_old, match_in_new, match_size]
153
+ end
154
+
155
+ def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
156
+ while match_in_old + match_size < end_in_old and
157
+ match_in_new + match_size < end_in_new and
158
+ @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
159
+ match_size += 1
160
+ end
161
+ [match_in_old, match_in_new, match_size]
162
+ end
163
+
164
+ VALID_METHODS = [:replace, :insert, :delete, :equal]
165
+
166
+ def perform_operation(operation)
167
+ @operation = operation
168
+ self.send operation.action, operation
169
+ end
170
+
171
+ def replace(operation)
172
+ delete(operation, 'diffmod')
173
+ insert(operation, 'diffmod')
174
+ end
175
+
176
+ def insert(operation, tagclass = 'diffins')
177
+ insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new])
178
+ end
179
+
180
+ def delete(operation, tagclass = 'diffdel')
181
+ insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old])
182
+ end
183
+
184
+ def equal(operation)
185
+ # no tags to insert, simply copy the matching words from one of the versions
186
+ @content += @new_words[operation.start_in_new...operation.end_in_new]
187
+ end
188
+
189
+ def opening_tag?(item)
190
+ item =~ %r!^\s*<[^>]+>\s*$!
191
+ end
192
+
193
+ def closing_tag?(item)
194
+ item =~ %r!^\s*</[^>]+>\s*$!
195
+ end
196
+
197
+ def tag?(item)
198
+ opening_tag?(item) or closing_tag?(item)
199
+ end
200
+
201
+ def extract_consecutive_words(words, &condition)
202
+ index_of_first_tag = nil
203
+ words.each_with_index do |word, i|
204
+ if !condition.call(word)
205
+ index_of_first_tag = i
206
+ break
207
+ end
208
+ end
209
+ if index_of_first_tag
210
+ return words.slice!(0...index_of_first_tag)
211
+ else
212
+ return words.slice!(0..words.length)
213
+ end
214
+ end
215
+
216
+ # This method encloses words within a specified tag (ins or del), and adds this into @content,
217
+ # with a twist: if there are words contain tags, it actually creates multiple ins or del,
218
+ # so that they don't include any ins or del. This handles cases like
219
+ # old: '<p>a</p>'
220
+ # new: '<p>ab</p><p>c</b>'
221
+ # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
222
+ # this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
223
+ # del tags), but handles correctly more cases than the earlier version.
224
+ #
225
+ # P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
226
+
227
+ def insert_tag(tagname, cssclass, words)
228
+ loop do
229
+ break if words.empty?
230
+ non_tags = extract_consecutive_words(words) { |word| not tag?(word) }
231
+ @content << wrap_text(non_tags.join, tagname, cssclass) unless non_tags.empty?
232
+
233
+ break if words.empty?
234
+ @content += extract_consecutive_words(words) { |word| tag?(word) }
235
+ end
236
+ end
237
+
238
+ def wrap_text(text, tagname, cssclass)
239
+ %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>)
240
+ end
241
+
242
+ def explode(sequence)
243
+ sequence.is_a?(String) ? sequence.split(//) : sequence
244
+ end
245
+
246
+ def end_of_tag?(char)
247
+ char == '>'
248
+ end
249
+
250
+ def start_of_tag?(char)
251
+ char == '<'
252
+ end
253
+
254
+ def whitespace?(char)
255
+ char =~ /\s/
256
+ end
257
+
258
+ def convert_html_to_list_of_words(x, use_brackets = false)
259
+ mode = :char
260
+ current_word = ''
261
+ words = []
262
+
263
+ explode(x).each do |char|
264
+ case mode
265
+ when :tag
266
+ if end_of_tag? char
267
+ current_word << (use_brackets ? ']' : '>')
268
+ words << current_word
269
+ current_word = ''
270
+ if whitespace?(char)
271
+ mode = :whitespace
272
+ else
273
+ mode = :char
274
+ end
275
+ else
276
+ current_word << char
277
+ end
278
+ when :char
279
+ if start_of_tag? char
280
+ words << current_word unless current_word.empty?
281
+ current_word = (use_brackets ? '[' : '<')
282
+ mode = :tag
283
+ elsif /\s/.match char
284
+ words << current_word unless current_word.empty?
285
+ current_word = char
286
+ mode = :whitespace
287
+ else
288
+ current_word << char
289
+ end
290
+ when :whitespace
291
+ if start_of_tag? char
292
+ words << current_word unless current_word.empty?
293
+ current_word = (use_brackets ? '[' : '<')
294
+ mode = :tag
295
+ elsif /\s/.match char
296
+ current_word << char
297
+ else
298
+ words << current_word unless current_word.empty?
299
+ current_word = char
300
+ mode = :char
301
+ end
302
+ else
303
+ raise "Unknown mode #{mode.inspect}"
304
+ end
305
+ end
306
+ words << current_word unless current_word.empty?
307
+ words
308
+ end
309
+
310
+ end # of class Diff Builder
311
+
312
+ def diff(a, b)
313
+ DiffBuilder.new(a, b).build
314
+ end
315
+
316
+ end
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'htmldiff'
3
+
4
+ class TestDiff
5
+ extend HTMLDiff
6
+ end
7
+
8
+ describe "htmldiff" do
9
+
10
+ it "should diff text" do
11
+
12
+ diff = TestDiff.diff('a word is here', 'a nother word is there')
13
+ diff.should == "a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>"
14
+
15
+ end
16
+
17
+ it "should insert a letter and a space" do
18
+ diff = TestDiff.diff('a c', 'a b c')
19
+ diff.should == "a <ins class=\"diffins\">b </ins>c"
20
+ end
21
+
22
+ it "should remove a letter and a space" do
23
+ diff = TestDiff.diff('a b c', 'a c')
24
+ diff.should == "a <del class=\"diffdel\">b </del>c"
25
+ end
26
+
27
+ it "should change a letter" do
28
+ diff = TestDiff.diff('a b c', 'a d c')
29
+ diff.should == "a <del class=\"diffmod\">b</del><ins class=\"diffmod\">d</ins> c"
30
+ end
31
+
32
+ end
@@ -0,0 +1,2 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: htmldiff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Nathan Herald
8
+ autorequire: htmldiff
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-11-21 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: HTML diffs of text (borrowed from a wiki software I no longer remember)
17
+ email: nathan@myobie.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ - LICENSE
25
+ - TODO
26
+ files:
27
+ - LICENSE
28
+ - README
29
+ - Rakefile
30
+ - TODO
31
+ - lib/htmldiff.rb
32
+ - spec/htmldiff_spec.rb
33
+ - spec/spec_helper.rb
34
+ has_rdoc: true
35
+ homepage: http://github.com/myobie/htmldiff
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: "0"
48
+ version:
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ requirements: []
56
+
57
+ rubyforge_project:
58
+ rubygems_version: 1.3.5
59
+ signing_key:
60
+ specification_version: 2
61
+ summary: HTML diffs of text (borrowed from a wiki software I no longer remember)
62
+ test_files: []
63
+