diff-lcs 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +42 -0
- data/Install +6 -0
- data/README +77 -0
- data/Rakefile +103 -0
- data/bin/htmldiff +111 -0
- data/bin/ldiff +45 -0
- data/lib/diff/lcs.rb +1105 -0
- data/lib/diff/lcs/array.rb +21 -0
- data/lib/diff/lcs/block.rb +51 -0
- data/lib/diff/lcs/callbacks.rb +322 -0
- data/lib/diff/lcs/change.rb +169 -0
- data/lib/diff/lcs/hunk.rb +257 -0
- data/lib/diff/lcs/ldiff.rb +226 -0
- data/lib/diff/lcs/string.rb +19 -0
- data/tests/00test.rb +626 -0
- metadata +66 -0
@@ -0,0 +1,257 @@
|
|
1
|
+
#! /usr/env/bin ruby
|
2
|
+
#--
|
3
|
+
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
|
4
|
+
# adapted from:
|
5
|
+
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
|
6
|
+
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
|
7
|
+
# implements McIlroy-Hunt diff algorithm
|
8
|
+
#
|
9
|
+
# This program is free software. It may be redistributed and/or modified under
|
10
|
+
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
11
|
+
# Ruby licence.
|
12
|
+
#
|
13
|
+
# $Id: hunk.rb,v 1.2 2004/08/08 20:33:09 austin Exp $
|
14
|
+
#++
|
15
|
+
# Contains Diff::LCS::Hunk for bin/ldiff.
|
16
|
+
|
17
|
+
require 'diff/lcs/block'
|
18
|
+
|
19
|
+
# A Hunk is a group of Blocks which overlap because of the context
|
20
|
+
# surrounding each block. (So if we're not using context, every hunk will
|
21
|
+
# contain one block.) Used in the diff program (bin/diff).
|
22
|
+
class Diff::LCS::Hunk
|
23
|
+
# Create a hunk using references to both the old and new data, as well as
|
24
|
+
# the piece of data
|
25
|
+
def initialize(data_old, data_new, piece, context, file_length_difference)
|
26
|
+
# At first, a hunk will have just one Block in it
|
27
|
+
@blocks = [ Diff::LCS::Block.new(piece) ]
|
28
|
+
@data_old = data_old
|
29
|
+
@data_new = data_new
|
30
|
+
|
31
|
+
before = after = file_length_difference
|
32
|
+
after += @blocks[0].diff_size
|
33
|
+
@file_length_difference = after # The caller must get this manually
|
34
|
+
|
35
|
+
# Save the start & end of each array. If the array doesn't exist
|
36
|
+
# (e.g., we're only adding items in this block), then figure out the
|
37
|
+
# line number based on the line number of the other file and the
|
38
|
+
# current difference in file lengths.
|
39
|
+
if @blocks[0].remove.empty?
|
40
|
+
a1 = a2 = nil
|
41
|
+
else
|
42
|
+
a1 = @blocks[0].remove[0].position
|
43
|
+
a2 = @blocks[0].remove[-1].position
|
44
|
+
end
|
45
|
+
|
46
|
+
if @blocks[0].insert.empty?
|
47
|
+
b1 = b2 = nil
|
48
|
+
else
|
49
|
+
b1 = @blocks[0].insert[0].position
|
50
|
+
b2 = @blocks[0].insert[-1].position
|
51
|
+
end
|
52
|
+
|
53
|
+
@start_old = a1 || (b1 - before)
|
54
|
+
@start_new = b1 || (a1 + before)
|
55
|
+
@end_old = a2 || (b2 - after)
|
56
|
+
@end_new = b2 || (a2 + after)
|
57
|
+
|
58
|
+
self.flag_context = context
|
59
|
+
end
|
60
|
+
|
61
|
+
attr_reader :blocks
|
62
|
+
attr_reader :start_old, :start_new
|
63
|
+
attr_reader :end_old, :end_new
|
64
|
+
attr_reader :file_length_difference
|
65
|
+
|
66
|
+
# Change the "start" and "end" fields to note that context should be added
|
67
|
+
# to this hunk
|
68
|
+
attr_accessor :flag_context
|
69
|
+
def flag_context=(context) #:nodoc:
|
70
|
+
return if context.nil? or context.zero?
|
71
|
+
|
72
|
+
add_start = (context > @start_old) ? @start_old : context
|
73
|
+
@start_old -= add_start
|
74
|
+
@start_new -= add_start
|
75
|
+
|
76
|
+
if (@end_old + context) > @data_old.size
|
77
|
+
add_end = @data_old.size - @end_old
|
78
|
+
else
|
79
|
+
add_end = context
|
80
|
+
end
|
81
|
+
@end_old += add_end
|
82
|
+
@end_new += add_end
|
83
|
+
end
|
84
|
+
|
85
|
+
def unshift(hunk)
|
86
|
+
@start_old = hunk.start_old
|
87
|
+
@start_new = hunk.start_new
|
88
|
+
blocks.unshift(*hunk.blocks)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Is there an overlap between hunk arg0 and old hunk arg1? Note: if end
|
92
|
+
# of old hunk is one less than beginning of second, they overlap
|
93
|
+
def overlaps?(hunk = nil)
|
94
|
+
return nil if hunk.nil?
|
95
|
+
|
96
|
+
a = (@start_old - hunk.end_old) <= 1
|
97
|
+
b = (@start_new - hunk.end_new) <= 1
|
98
|
+
return (a or b)
|
99
|
+
end
|
100
|
+
|
101
|
+
def diff(format)
|
102
|
+
case format
|
103
|
+
when :old
|
104
|
+
old_diff
|
105
|
+
when :unified
|
106
|
+
unified_diff
|
107
|
+
when :context
|
108
|
+
context_diff
|
109
|
+
when :ed
|
110
|
+
self
|
111
|
+
when :reverse_ed, :ed_finish
|
112
|
+
ed_diff(format)
|
113
|
+
else
|
114
|
+
raise "Unknown diff format #{format}."
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def each_old(block)
|
119
|
+
@data_old[@start_old .. @end_old].each { |e| yield e }
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
# Note that an old diff can't have any context. Therefore, we know that
|
124
|
+
# there's only one block in the hunk.
|
125
|
+
def old_diff
|
126
|
+
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
|
127
|
+
op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
|
128
|
+
|
129
|
+
block = @blocks[0]
|
130
|
+
|
131
|
+
# Calculate item number range. Old diff range is just like a context
|
132
|
+
# diff range, except the ranges are on one line with the action between
|
133
|
+
# them.
|
134
|
+
s = "#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n"
|
135
|
+
# If removing anything, just print out all the remove lines in the hunk
|
136
|
+
# which is just all the remove lines in the block.
|
137
|
+
@data_old[@start_old .. @end_old].each { |e| s << "< #{e}\n" } unless block.remove.empty?
|
138
|
+
s << "---\n" if block.op == "!"
|
139
|
+
@data_new[@start_new .. @end_new].each { |e| s << "> #{e}\n" } unless block.insert.empty?
|
140
|
+
s
|
141
|
+
end
|
142
|
+
|
143
|
+
def unified_diff
|
144
|
+
# Calculate item number range.
|
145
|
+
s = "@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n"
|
146
|
+
|
147
|
+
# Outlist starts containing the hunk of the old file. Removing an item
|
148
|
+
# just means putting a '-' in front of it. Inserting an item requires
|
149
|
+
# getting it from the new file and splicing it in. We splice in
|
150
|
+
# +num_added+ items. Remove blocks use +num_added+ because splicing
|
151
|
+
# changed the length of outlist.
|
152
|
+
#
|
153
|
+
# We remove +num_removed+ items. Insert blocks use +num_removed+
|
154
|
+
# because their item numbers -- corresponding to positions in the NEW
|
155
|
+
# file -- don't take removed items into account.
|
156
|
+
lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0
|
157
|
+
|
158
|
+
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
|
159
|
+
|
160
|
+
@blocks.each do |block|
|
161
|
+
block.remove.each do |item|
|
162
|
+
op = item.action.to_s # -
|
163
|
+
offset = item.position - lo + num_added
|
164
|
+
outlist[offset].gsub!(/^ /, op.to_s)
|
165
|
+
num_removed += 1
|
166
|
+
end
|
167
|
+
block.insert.each do |item|
|
168
|
+
op = item.action.to_s # +
|
169
|
+
offset = item.position - @start_new + num_removed
|
170
|
+
outlist[offset, 0] = "#{op}#{@data_new[item.position]}"
|
171
|
+
num_added += 1
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
s << outlist.join("\n")
|
176
|
+
end
|
177
|
+
|
178
|
+
def context_diff
|
179
|
+
s = "***************\n"
|
180
|
+
s << "*** #{context_range(:old)} ****\n"
|
181
|
+
r = context_range(:new)
|
182
|
+
|
183
|
+
# Print out file 1 part for each block in context diff format if there
|
184
|
+
# are any blocks that remove items
|
185
|
+
lo, hi = @start_old, @end_old
|
186
|
+
removes = @blocks.select { |e| not e.remove.empty? }
|
187
|
+
if removes
|
188
|
+
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
|
189
|
+
removes.each do |block|
|
190
|
+
block.remove.each do |item|
|
191
|
+
outlist[item.position - lo].gsub!(/^ /) { block.op } # - or !
|
192
|
+
end
|
193
|
+
end
|
194
|
+
s << outlist.join("\n")
|
195
|
+
end
|
196
|
+
|
197
|
+
s << "\n--- #{r} ----\n"
|
198
|
+
lo, hi = @start_new, @end_new
|
199
|
+
inserts = @blocks.select { |e| not e.insert.empty? }
|
200
|
+
if inserts
|
201
|
+
outlist = @data_new[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
|
202
|
+
inserts.each do |block|
|
203
|
+
block.insert.each do |item|
|
204
|
+
outlist[item.position - lo].gsub!(/^ /) { block.op } # + or !
|
205
|
+
end
|
206
|
+
end
|
207
|
+
s << outlist.join("\n")
|
208
|
+
end
|
209
|
+
s
|
210
|
+
end
|
211
|
+
|
212
|
+
def ed_diff(format)
|
213
|
+
op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
|
214
|
+
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
|
215
|
+
|
216
|
+
if format == :reverse_ed
|
217
|
+
s = "#{op_act[@blocks[0].op]}#{context_range(:old)}\n"
|
218
|
+
else
|
219
|
+
s = "#{context_range(:old).gsub(/,/, ' ')}#{op_act[@blocks[0].op]}\n"
|
220
|
+
end
|
221
|
+
|
222
|
+
unless @blocks[0].insert.empty?
|
223
|
+
@data_new[@start_new .. @end_new].each { |e| s << "#{e}\n" }
|
224
|
+
s << ".\n"
|
225
|
+
end
|
226
|
+
s
|
227
|
+
end
|
228
|
+
|
229
|
+
# Generate a range of item numbers to print. Only print 1 number if the
|
230
|
+
# range has only one item in it. Otherwise, it's 'start,end'
|
231
|
+
def context_range(mode)
|
232
|
+
case mode
|
233
|
+
when :old
|
234
|
+
s, e = (@start_old + 1), (@end_old + 1)
|
235
|
+
when :new
|
236
|
+
s, e = (@start_new + 1), (@end_new + 1)
|
237
|
+
end
|
238
|
+
|
239
|
+
(s < e) ? "#{s},#{e}" : "#{e}"
|
240
|
+
end
|
241
|
+
|
242
|
+
# Generate a range of item numbers to print for unified diff. Print
|
243
|
+
# number where block starts, followed by number of lines in the block
|
244
|
+
# (don't print number of lines if it's 1)
|
245
|
+
def unified_range(mode)
|
246
|
+
case mode
|
247
|
+
when :old
|
248
|
+
s, e = (@start_old + 1), (@end_old + 1)
|
249
|
+
when :new
|
250
|
+
s, e = (@start_new + 1), (@end_new + 1)
|
251
|
+
end
|
252
|
+
|
253
|
+
length = e - s + 1
|
254
|
+
first = (length < 2) ? e : s # "strange, but correct"
|
255
|
+
(length == 1) ? "#{first}" : "#{first},#{length}"
|
256
|
+
end
|
257
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ostruct'
|
5
|
+
require 'diff/lcs/hunk'
|
6
|
+
|
7
|
+
# == ldiff Usage
|
8
|
+
# ldiff [options] oldfile newfile
|
9
|
+
#
|
10
|
+
# -c:: Displays a context diff with 3 lines of context.
|
11
|
+
# -C [LINES], --context [LINES]:: Displays a context diff with LINES lines of context. Default 3 lines.
|
12
|
+
# -u:: Displays a unified diff with 3 lines of context.
|
13
|
+
# -U [LINES], --unified [LINES]:: Displays a unified diff with LINES lines of context. Default 3 lines.
|
14
|
+
# -e:: Creates an 'ed' script to change oldfile to newfile.
|
15
|
+
# -f:: Creates an 'ed' script to change oldfile to newfile in reverse order.
|
16
|
+
# -a, --text:: Treats the files as text and compares them line-by-line, even if they do not seem to be text.
|
17
|
+
# --binary:: Treats the files as binary.
|
18
|
+
# -q, --brief:: Reports only whether or not the files differ, not the details.
|
19
|
+
# --help:: Shows the command-line help.
|
20
|
+
# --version:: Shows the version of Diff::LCS.
|
21
|
+
#
|
22
|
+
# By default, runs produces an "old-style" diff, with output like UNIX diff.
|
23
|
+
#
|
24
|
+
# == Copyright
|
25
|
+
# Copyright © 2004 Austin Ziegler
|
26
|
+
#
|
27
|
+
# Part of Diff::LCS <http://rubyforge.org/projects/ruwiki/>
|
28
|
+
# Austin Ziegler <diff-lcs@halostatue.ca>
|
29
|
+
#
|
30
|
+
# This program is free software. It may be redistributed and/or modified under
|
31
|
+
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
32
|
+
# Ruby licence.
|
33
|
+
module Diff::LCS::Ldiff
|
34
|
+
BANNER = <<-COPYRIGHT
|
35
|
+
ldiff #{Diff::LCS::VERSION}
|
36
|
+
Copyright � 2004 Austin Ziegler
|
37
|
+
|
38
|
+
Part of Diff::LCS.
|
39
|
+
http://rubyforge.org/projects/ruwiki/
|
40
|
+
|
41
|
+
Austin Ziegler <diff-lcs@halostatue.ca>
|
42
|
+
|
43
|
+
This program is free software. It may be redistributed and/or modified under
|
44
|
+
the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
45
|
+
Ruby licence.
|
46
|
+
|
47
|
+
$Id: ldiff.rb,v 1.1 2004/09/26 01:37:49 austin Exp $
|
48
|
+
COPYRIGHT
|
49
|
+
|
50
|
+
class << self
|
51
|
+
attr_reader :format, :lines #:nodoc:
|
52
|
+
attr_reader :file_old, :file_new #:nodoc:
|
53
|
+
attr_reader :data_old, :data_new #:nodoc:
|
54
|
+
|
55
|
+
def run(args, input = $stdin, output = $stdout, error = $stderr) #:nodoc:
|
56
|
+
args.options do |o|
|
57
|
+
o.banner = "Usage: #{File.basename($0)} [options] oldfile newfile"
|
58
|
+
o.separator ""
|
59
|
+
o.on('-c',
|
60
|
+
'Displays a context diff with 3 lines of',
|
61
|
+
'context.') do |ctx|
|
62
|
+
@format = :context
|
63
|
+
@lines = 3
|
64
|
+
end
|
65
|
+
o.on('-C', '--context [LINES]', Numeric,
|
66
|
+
'Displays a context diff with LINES lines',
|
67
|
+
'of context. Default 3 lines.') do |ctx|
|
68
|
+
@format = :context
|
69
|
+
@lines = ctx || 3
|
70
|
+
end
|
71
|
+
o.on('-u',
|
72
|
+
'Displays a unified diff with 3 lines of',
|
73
|
+
'context.') do |ctx|
|
74
|
+
@format = :unified
|
75
|
+
@lines = 3
|
76
|
+
end
|
77
|
+
o.on('-U', '--unified [LINES]', Numeric,
|
78
|
+
'Displays a unified diff with LINES lines',
|
79
|
+
'of context. Default 3 lines.') do |ctx|
|
80
|
+
@format = :unified
|
81
|
+
@lines = ctx || 3
|
82
|
+
end
|
83
|
+
o.on('-e',
|
84
|
+
'Creates an \'ed\' script to change',
|
85
|
+
'oldfile to newfile.') do |ctx|
|
86
|
+
@format = :ed
|
87
|
+
end
|
88
|
+
o.on('-f',
|
89
|
+
'Creates an \'ed\' script to change',
|
90
|
+
'oldfile to newfile in reverse order.') do |ctx|
|
91
|
+
@format = :reverse_ed
|
92
|
+
end
|
93
|
+
o.on('-a', '--text',
|
94
|
+
'Treat the files as text and compare them',
|
95
|
+
'line-by-line, even if they do not seem',
|
96
|
+
'to be text.') do |txt|
|
97
|
+
@binary = false
|
98
|
+
end
|
99
|
+
o.on('--binary',
|
100
|
+
'Treats the files as binary.') do |bin|
|
101
|
+
@binary = true
|
102
|
+
end
|
103
|
+
o.on('-q', '--brief',
|
104
|
+
'Report only whether or not the files',
|
105
|
+
'differ, not the details.') do |ctx|
|
106
|
+
@format = :report
|
107
|
+
end
|
108
|
+
o.on_tail('--help', 'Shows this text.') do
|
109
|
+
error << o
|
110
|
+
return 0
|
111
|
+
end
|
112
|
+
o.on_tail('--version', 'Shows the version of Diff::LCS.') do
|
113
|
+
error << BANNER
|
114
|
+
return 0
|
115
|
+
end
|
116
|
+
o.on_tail ""
|
117
|
+
o.on_tail 'By default, runs produces an "old-style" diff, with output like UNIX diff.'
|
118
|
+
o.parse!
|
119
|
+
end
|
120
|
+
|
121
|
+
unless args.size == 2
|
122
|
+
error << args.options
|
123
|
+
return 127
|
124
|
+
end
|
125
|
+
|
126
|
+
# Defaults are for old-style diff
|
127
|
+
@format ||= :old
|
128
|
+
@lines ||= 0
|
129
|
+
|
130
|
+
file_old, file_new = *ARGV
|
131
|
+
|
132
|
+
case @format
|
133
|
+
when :context
|
134
|
+
char_old = '*' * 3
|
135
|
+
char_new = '-' * 3
|
136
|
+
when :unified
|
137
|
+
char_old = '-' * 3
|
138
|
+
char_new = '+' * 3
|
139
|
+
end
|
140
|
+
|
141
|
+
# After we've read up to a certain point in each file, the number of
|
142
|
+
# items we've read from each file will differ by FLD (could be 0).
|
143
|
+
file_length_difference = 0
|
144
|
+
|
145
|
+
if @binary.nil? or @binary
|
146
|
+
data_old = IO::read(file_old)
|
147
|
+
data_new = IO::read(file_new)
|
148
|
+
|
149
|
+
# Test binary status
|
150
|
+
if @binary.nil?
|
151
|
+
old_txt = data_old[0...4096].grep(/\0/).empty?
|
152
|
+
new_txt = data_new[0...4096].grep(/\0/).empty?
|
153
|
+
@binary = (not old_txt) or (not new_txt)
|
154
|
+
old_txt = new_txt = nil
|
155
|
+
end
|
156
|
+
|
157
|
+
unless @binary
|
158
|
+
data_old = data_old.split(/\n/).map! { |e| e.chomp }
|
159
|
+
data_new = data_new.split(/\n/).map! { |e| e.chomp }
|
160
|
+
end
|
161
|
+
else
|
162
|
+
data_old = IO::readlines(file_old).map! { |e| e.chomp }
|
163
|
+
data_new = IO::readlines(file_new).map! { |e| e.chomp }
|
164
|
+
end
|
165
|
+
|
166
|
+
# diff yields lots of pieces, each of which is basically a Block object
|
167
|
+
if @binary
|
168
|
+
diffs = (data_old == data_new)
|
169
|
+
else
|
170
|
+
diffs = Diff::LCS.diff(data_old, data_new)
|
171
|
+
diffs = nil if diffs.empty?
|
172
|
+
end
|
173
|
+
|
174
|
+
return 0 unless diffs
|
175
|
+
|
176
|
+
if (@format == :report) and diffs
|
177
|
+
output << "Files #{file_old} and #{file_new} differ\n"
|
178
|
+
return 1
|
179
|
+
end
|
180
|
+
|
181
|
+
if (@format == :unified) or (@format == :context)
|
182
|
+
ft = File.stat(file_old).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
|
183
|
+
puts "#{char_old} #{file_old}\t#{ft}"
|
184
|
+
ft = File.stat(file_new).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
|
185
|
+
puts "#{char_new} #{file_new}\t#{ft}"
|
186
|
+
end
|
187
|
+
|
188
|
+
# Loop over hunks. If a hunk overlaps with the last hunk, join them.
|
189
|
+
# Otherwise, print out the old one.
|
190
|
+
oldhunk = hunk = nil
|
191
|
+
|
192
|
+
if @format == :ed
|
193
|
+
real_output = output
|
194
|
+
output = []
|
195
|
+
end
|
196
|
+
|
197
|
+
diffs.each do |piece|
|
198
|
+
begin
|
199
|
+
hunk = Diff::LCS::Hunk.new(data_old, data_new, piece, @lines,
|
200
|
+
file_length_difference)
|
201
|
+
file_length_difference = hunk.file_length_difference
|
202
|
+
|
203
|
+
next unless oldhunk
|
204
|
+
|
205
|
+
if (@lines > 0) and hunk.overlaps?(oldhunk)
|
206
|
+
hunk.unshift(oldhunk)
|
207
|
+
else
|
208
|
+
output << oldhunk.diff(@format)
|
209
|
+
end
|
210
|
+
ensure
|
211
|
+
oldhunk = hunk
|
212
|
+
output << "\n"
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
output << oldhunk.diff(@format)
|
217
|
+
output << "\n"
|
218
|
+
|
219
|
+
if @format == :ed
|
220
|
+
output.reverse_each { |e| real_output << e.diff(:ed_finish) }
|
221
|
+
end
|
222
|
+
|
223
|
+
return 1
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|