htmldiff 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Nathan Herald
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,15 @@
1
+ class Stuff
2
+
3
+ class << self
4
+ include HTMLDiff
5
+ end
6
+
7
+ # or extend HTMLDiff ?
8
+
9
+ end
10
+
11
+ Stuff.diff('a word is here', 'a nother word is there')
12
+
13
+ # => 'a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>'
14
+
15
+ Checkout the crappy specs for good examples.
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'spec/rake/spectask'
6
+
7
+ GEM = "htmldiff"
8
+ GEM_VERSION = "0.0.1"
9
+ AUTHOR = "Nathan Herald"
10
+ EMAIL = "nathan@myobie.com"
11
+ HOMEPAGE = "http://github.com/myobie/htmldiff"
12
+ SUMMARY = "HTML diffs of text (borrowed from a wiki software I no longer remember)"
13
+
14
+ spec = Gem::Specification.new do |s|
15
+ s.name = GEM
16
+ s.version = GEM_VERSION
17
+ s.platform = Gem::Platform::RUBY
18
+ s.has_rdoc = true
19
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
20
+ s.summary = SUMMARY
21
+ s.description = s.summary
22
+ s.author = AUTHOR
23
+ s.email = EMAIL
24
+ s.homepage = HOMEPAGE
25
+
26
+ # Uncomment this to add a dependency
27
+ # s.add_dependency "foo"
28
+
29
+ s.require_path = 'lib'
30
+ s.autorequire = GEM
31
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,spec}/**/*")
32
+ end
33
+
34
+ task :default => :spec
35
+
36
+ desc "Run specs"
37
+ Spec::Rake::SpecTask.new do |t|
38
+ t.spec_files = FileList['spec/**/*_spec.rb']
39
+ t.spec_opts = %w(-fs --color)
40
+ end
41
+
42
+
43
+ Rake::GemPackageTask.new(spec) do |pkg|
44
+ pkg.gem_spec = spec
45
+ end
46
+
47
+ desc "install the gem locally"
48
+ task :install => [:package] do
49
+ sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
50
+ end
51
+
52
+ desc "create a gemspec file"
53
+ task :make_spec do
54
+ File.open("#{GEM}.gemspec", "w") do |file|
55
+ file.puts spec.to_ruby
56
+ end
57
+ end
data/TODO ADDED
File without changes
@@ -0,0 +1,316 @@
1
+ module HTMLDiff
2
+
3
+ Match = Struct.new(:start_in_old, :start_in_new, :size)
4
+ class Match
5
+ def end_in_old
6
+ self.start_in_old + self.size
7
+ end
8
+
9
+ def end_in_new
10
+ self.start_in_new + self.size
11
+ end
12
+ end
13
+
14
+ Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
15
+
16
+ class DiffBuilder
17
+
18
+ def initialize(old_version, new_version)
19
+ @old_version, @new_version = old_version, new_version
20
+ @content = []
21
+ end
22
+
23
+ def build
24
+ split_inputs_to_words
25
+ index_new_words
26
+ operations.each { |op| perform_operation(op) }
27
+ return @content.join
28
+ end
29
+
30
+ def split_inputs_to_words
31
+ @old_words = convert_html_to_list_of_words(explode(@old_version))
32
+ @new_words = convert_html_to_list_of_words(explode(@new_version))
33
+ end
34
+
35
+ def index_new_words
36
+ @word_indices = Hash.new { |h, word| h[word] = [] }
37
+ @new_words.each_with_index { |word, i| @word_indices[word] << i }
38
+ end
39
+
40
+ def operations
41
+ position_in_old = position_in_new = 0
42
+ operations = []
43
+
44
+ matches = matching_blocks
45
+ # an empty match at the end forces the loop below to handle the unmatched tails
46
+ # I'm sure it can be done more gracefully, but not at 23:52
47
+ matches << Match.new(@old_words.length, @new_words.length, 0)
48
+
49
+ matches.each_with_index do |match, i|
50
+ match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
51
+ match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
52
+
53
+ action_upto_match_positions =
54
+ case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
55
+ when [false, false]
56
+ :replace
57
+ when [true, false]
58
+ :insert
59
+ when [false, true]
60
+ :delete
61
+ else
62
+ # this happens if the first few words are same in both versions
63
+ :none
64
+ end
65
+
66
+ if action_upto_match_positions != :none
67
+ operation_upto_match_positions =
68
+ Operation.new(action_upto_match_positions,
69
+ position_in_old, match.start_in_old,
70
+ position_in_new, match.start_in_new)
71
+ operations << operation_upto_match_positions
72
+ end
73
+ if match.size != 0
74
+ match_operation = Operation.new(:equal,
75
+ match.start_in_old, match.end_in_old,
76
+ match.start_in_new, match.end_in_new)
77
+ operations << match_operation
78
+ end
79
+
80
+ position_in_old = match.end_in_old
81
+ position_in_new = match.end_in_new
82
+ end
83
+
84
+ operations
85
+ end
86
+
87
+ def matching_blocks
88
+ matching_blocks = []
89
+ recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
90
+ matching_blocks
91
+ end
92
+
93
+ def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
94
+ match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
95
+ if match
96
+ if start_in_old < match.start_in_old and start_in_new < match.start_in_new
97
+ recursively_find_matching_blocks(
98
+ start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks)
99
+ end
100
+ matching_blocks << match
101
+ if match.end_in_old < end_in_old and match.end_in_new < end_in_new
102
+ recursively_find_matching_blocks(
103
+ match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
104
+ end
105
+ end
106
+ end
107
+
108
+ def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
109
+
110
+ best_match_in_old = start_in_old
111
+ best_match_in_new = start_in_new
112
+ best_match_size = 0
113
+
114
+ match_length_at = Hash.new { |h, index| h[index] = 0 }
115
+
116
+ start_in_old.upto(end_in_old - 1) do |index_in_old|
117
+
118
+ new_match_length_at = Hash.new { |h, index| h[index] = 0 }
119
+
120
+ @word_indices[@old_words[index_in_old]].each do |index_in_new|
121
+ next if index_in_new < start_in_new
122
+ break if index_in_new >= end_in_new
123
+
124
+ new_match_length = match_length_at[index_in_new - 1] + 1
125
+ new_match_length_at[index_in_new] = new_match_length
126
+
127
+ if new_match_length > best_match_size
128
+ best_match_in_old = index_in_old - new_match_length + 1
129
+ best_match_in_new = index_in_new - new_match_length + 1
130
+ best_match_size = new_match_length
131
+ end
132
+ end
133
+ match_length_at = new_match_length_at
134
+ end
135
+
136
+ # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
137
+ # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
138
+ # best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
139
+ # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
140
+
141
+ return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
142
+ end
143
+
144
+ def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
145
+ while match_in_old > start_in_old and
146
+ match_in_new > start_in_new and
147
+ @old_words[match_in_old - 1] == @new_words[match_in_new - 1]
148
+ match_in_old -= 1
149
+ match_in_new -= 1
150
+ match_size += 1
151
+ end
152
+ [match_in_old, match_in_new, match_size]
153
+ end
154
+
155
+ def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
156
+ while match_in_old + match_size < end_in_old and
157
+ match_in_new + match_size < end_in_new and
158
+ @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
159
+ match_size += 1
160
+ end
161
+ [match_in_old, match_in_new, match_size]
162
+ end
163
+
164
+ VALID_METHODS = [:replace, :insert, :delete, :equal]
165
+
166
+ def perform_operation(operation)
167
+ @operation = operation
168
+ self.send operation.action, operation
169
+ end
170
+
171
+ def replace(operation)
172
+ delete(operation, 'diffmod')
173
+ insert(operation, 'diffmod')
174
+ end
175
+
176
+ def insert(operation, tagclass = 'diffins')
177
+ insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new])
178
+ end
179
+
180
+ def delete(operation, tagclass = 'diffdel')
181
+ insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old])
182
+ end
183
+
184
+ def equal(operation)
185
+ # no tags to insert, simply copy the matching words from one of the versions
186
+ @content += @new_words[operation.start_in_new...operation.end_in_new]
187
+ end
188
+
189
+ def opening_tag?(item)
190
+ item =~ %r!^\s*<[^>]+>\s*$!
191
+ end
192
+
193
+ def closing_tag?(item)
194
+ item =~ %r!^\s*</[^>]+>\s*$!
195
+ end
196
+
197
+ def tag?(item)
198
+ opening_tag?(item) or closing_tag?(item)
199
+ end
200
+
201
+ def extract_consecutive_words(words, &condition)
202
+ index_of_first_tag = nil
203
+ words.each_with_index do |word, i|
204
+ if !condition.call(word)
205
+ index_of_first_tag = i
206
+ break
207
+ end
208
+ end
209
+ if index_of_first_tag
210
+ return words.slice!(0...index_of_first_tag)
211
+ else
212
+ return words.slice!(0..words.length)
213
+ end
214
+ end
215
+
216
+ # This method encloses words within a specified tag (ins or del), and adds this into @content,
217
+ # with a twist: if there are words contain tags, it actually creates multiple ins or del,
218
+ # so that they don't include any ins or del. This handles cases like
219
+ # old: '<p>a</p>'
220
+ # new: '<p>ab</p><p>c</b>'
221
+ # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
222
+ # this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
223
+ # del tags), but handles correctly more cases than the earlier version.
224
+ #
225
+ # P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
226
+
227
+ def insert_tag(tagname, cssclass, words)
228
+ loop do
229
+ break if words.empty?
230
+ non_tags = extract_consecutive_words(words) { |word| not tag?(word) }
231
+ @content << wrap_text(non_tags.join, tagname, cssclass) unless non_tags.empty?
232
+
233
+ break if words.empty?
234
+ @content += extract_consecutive_words(words) { |word| tag?(word) }
235
+ end
236
+ end
237
+
238
+ def wrap_text(text, tagname, cssclass)
239
+ %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>)
240
+ end
241
+
242
+ def explode(sequence)
243
+ sequence.is_a?(String) ? sequence.split(//) : sequence
244
+ end
245
+
246
+ def end_of_tag?(char)
247
+ char == '>'
248
+ end
249
+
250
+ def start_of_tag?(char)
251
+ char == '<'
252
+ end
253
+
254
+ def whitespace?(char)
255
+ char =~ /\s/
256
+ end
257
+
258
+ def convert_html_to_list_of_words(x, use_brackets = false)
259
+ mode = :char
260
+ current_word = ''
261
+ words = []
262
+
263
+ explode(x).each do |char|
264
+ case mode
265
+ when :tag
266
+ if end_of_tag? char
267
+ current_word << (use_brackets ? ']' : '>')
268
+ words << current_word
269
+ current_word = ''
270
+ if whitespace?(char)
271
+ mode = :whitespace
272
+ else
273
+ mode = :char
274
+ end
275
+ else
276
+ current_word << char
277
+ end
278
+ when :char
279
+ if start_of_tag? char
280
+ words << current_word unless current_word.empty?
281
+ current_word = (use_brackets ? '[' : '<')
282
+ mode = :tag
283
+ elsif /\s/.match char
284
+ words << current_word unless current_word.empty?
285
+ current_word = char
286
+ mode = :whitespace
287
+ else
288
+ current_word << char
289
+ end
290
+ when :whitespace
291
+ if start_of_tag? char
292
+ words << current_word unless current_word.empty?
293
+ current_word = (use_brackets ? '[' : '<')
294
+ mode = :tag
295
+ elsif /\s/.match char
296
+ current_word << char
297
+ else
298
+ words << current_word unless current_word.empty?
299
+ current_word = char
300
+ mode = :char
301
+ end
302
+ else
303
+ raise "Unknown mode #{mode.inspect}"
304
+ end
305
+ end
306
+ words << current_word unless current_word.empty?
307
+ words
308
+ end
309
+
310
+ end # of class Diff Builder
311
+
312
+ def diff(a, b)
313
+ DiffBuilder.new(a, b).build
314
+ end
315
+
316
+ end
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'htmldiff'
3
+
4
+ class TestDiff
5
+ extend HTMLDiff
6
+ end
7
+
8
+ describe "htmldiff" do
9
+
10
+ it "should diff text" do
11
+
12
+ diff = TestDiff.diff('a word is here', 'a nother word is there')
13
+ diff.should == "a<ins class=\"diffins\"> nother</ins> word is <del class=\"diffmod\">here</del><ins class=\"diffmod\">there</ins>"
14
+
15
+ end
16
+
17
+ it "should insert a letter and a space" do
18
+ diff = TestDiff.diff('a c', 'a b c')
19
+ diff.should == "a <ins class=\"diffins\">b </ins>c"
20
+ end
21
+
22
+ it "should remove a letter and a space" do
23
+ diff = TestDiff.diff('a b c', 'a c')
24
+ diff.should == "a <del class=\"diffdel\">b </del>c"
25
+ end
26
+
27
+ it "should change a letter" do
28
+ diff = TestDiff.diff('a b c', 'a d c')
29
+ diff.should == "a <del class=\"diffmod\">b</del><ins class=\"diffmod\">d</ins> c"
30
+ end
31
+
32
+ end
@@ -0,0 +1,2 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: htmldiff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Nathan Herald
8
+ autorequire: htmldiff
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-11-21 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: HTML diffs of text (borrowed from a wiki software I no longer remember)
17
+ email: nathan@myobie.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ - LICENSE
25
+ - TODO
26
+ files:
27
+ - LICENSE
28
+ - README
29
+ - Rakefile
30
+ - TODO
31
+ - lib/htmldiff.rb
32
+ - spec/htmldiff_spec.rb
33
+ - spec/spec_helper.rb
34
+ has_rdoc: true
35
+ homepage: http://github.com/myobie/htmldiff
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: "0"
48
+ version:
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ requirements: []
56
+
57
+ rubyforge_project:
58
+ rubygems_version: 1.3.5
59
+ signing_key:
60
+ specification_version: 2
61
+ summary: HTML diffs of text (borrowed from a wiki software I no longer remember)
62
+ test_files: []
63
+