merge3 0.8 → 0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/merge3.rb.~1.2.~ +765 -0
- metadata +8 -4
@@ -0,0 +1,765 @@
|
|
1
|
+
#
|
2
|
+
# merge3.rb - a 3 way text merging tool
|
3
|
+
#
|
4
|
+
# Copyright 2004 Helsinki Institute for Information Technology (HIIT)
|
5
|
+
# and the authors. All rights reserved.
|
6
|
+
#
|
7
|
+
# Authors: Torsten Rueger <torsten@lightning.nu>
|
8
|
+
#
|
9
|
+
|
10
|
+
# Permission is hereby granted, free of charge, to any person
|
11
|
+
# obtaining a copy of this software and associated documentation files
|
12
|
+
# (the "Software"), to deal in the Software without restriction,
|
13
|
+
# including without limitation the rights to use, copy, modify, merge,
|
14
|
+
# publish, distribute, sublicense, and/or sell copies of the Software,
|
15
|
+
# and to permit persons to whom the Software is furnished to do so,
|
16
|
+
# subject to the following conditions:
|
17
|
+
#
|
18
|
+
# The above copyright notice and this permission notice shall be
|
19
|
+
# included in all copies or substantial portions of the Software.
|
20
|
+
#
|
21
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
22
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
23
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
24
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
25
|
+
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
26
|
+
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
27
|
+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
28
|
+
# SOFTWARE.
|
29
|
+
#
|
30
|
+
# start : last week it was really nice weather
|
31
|
+
# one (+end) : last weekend it was really nice weather
|
32
|
+
# two (order) : it was really nice weather last week
|
33
|
+
#
|
34
|
+
# merged : it was really nice weather last weekend
|
35
|
+
|
36
|
+
# It's a simple example, but getting that merged result is not.
|
37
|
+
|
38
|
+
# it seems the standard diff/patch does not use move or copy as a command
|
39
|
+
# as a consequence, simple avoidable conflicts are produced as soon as
|
40
|
+
# move + edit touch the same data.
|
41
|
+
|
42
|
+
# this is not only surprising as diff/patch have been around for so long
|
43
|
+
# also the algorithms for detecting moves have been around for long:
|
44
|
+
# basically there are the two approaches of using add/delete
|
45
|
+
# or add/copy (or just copy). Diff/Patch just use the first, whereas the second
|
46
|
+
# would allow better patching.
|
47
|
+
|
48
|
+
# now the really interesting thing is that modern diff algorithms I know
|
49
|
+
# (xdelta + vcdiff) use copy. As these algorithms are used to work on
|
50
|
+
# 2 versions, A,B , producing a diff ab that then may be aplied to A and
|
51
|
+
# A only, the algorithms use copy not because it's better for 3-way merging,
|
52
|
+
# but because it's more efficient.
|
53
|
+
|
54
|
+
# This algorithm is a 3 way merge on text files.
|
55
|
+
# It turns out that it merges xml just fine though.
|
56
|
+
|
57
|
+
# A very rough scetch is:
|
58
|
+
# -- find the common parts of two files . We do this by a hashing technique
|
59
|
+
# once we have the common (copied) parts we add the added/deleted ones
|
60
|
+
# -- common parts are sorted according to both original nad edited file order
|
61
|
+
# -- merge by going through the list of parts. Start at one file and always
|
62
|
+
# swap to the other when there is a change in the other, a change from
|
63
|
+
# the original files order
|
64
|
+
|
65
|
+
# Note: While this does not concentrate performance, it is quite reasonable,
|
66
|
+
# meaning linear in file size and quadrtic in numer of edits.
|
67
|
+
# Performace may be tested with the speedy.rb file in test directory
|
68
|
+
#
|
69
|
+
|
70
|
+
DEBUG = false #toggle this to true to get a complete trace of what happens
|
71
|
+
|
72
|
+
|
73
|
+
# so here comes the main algorithm. We have the class to carry the original file
|
74
|
+
# and it's hash codes we need for the matching of the files
|
75
|
+
|
76
|
+
class Merge3
|
77
|
+
|
78
|
+
# make a three way merge, passing in strings and returning a string
|
79
|
+
# strings are byte arrays (not line arrays as in diff)
|
80
|
+
def Merge3::three_way( start_file , one_file , two_file , ignore_white_space = false )
|
81
|
+
# We also create chunks of the whole files, in case whitespace
|
82
|
+
# handling is on. Chunk::subchunk passed compressed whitespace on
|
83
|
+
start_chunk = Chunk.new( 0 , start_file.length , start_file )
|
84
|
+
one_chunk = Chunk.new( 0 , one_file.length , one_file )
|
85
|
+
two_chunk = Chunk.new( 0 , two_file.length , two_file )
|
86
|
+
if ignore_white_space
|
87
|
+
start_chunk.squeeze # all consecutive whitespace is then squeezed
|
88
|
+
one_chunk.squeeze # into one, and a talbe create to unsqueeze later
|
89
|
+
two_chunk.squeeze # Yes, this is expensive
|
90
|
+
end
|
91
|
+
# only create the macthes hash once for the start file
|
92
|
+
start_matches = Merge3::all_matches(start_chunk)
|
93
|
+
# Create the chunks( moves first , then added and deleted)
|
94
|
+
one_seq = Sequence.new( one_chunk , start_chunk , start_matches )
|
95
|
+
two_seq = Sequence.new( two_chunk , start_chunk , start_matches )
|
96
|
+
puts "Diff original 0" , one_seq if DEBUG
|
97
|
+
puts "Diff original 1" , two_seq if DEBUG
|
98
|
+
# break all chunks into non overlapping regions (in the original file order)
|
99
|
+
one_seq.no_overlap( two_seq )
|
100
|
+
two_seq.no_overlap( one_seq )
|
101
|
+
puts "Diff non-overlapping 0" , one_seq if DEBUG
|
102
|
+
puts "Diff non-overlapping 1" , two_seq if DEBUG
|
103
|
+
# and finally (and most simply) do the merge
|
104
|
+
Merge3::merge( one_seq , two_seq )
|
105
|
+
end
|
106
|
+
|
107
|
+
# given the Sequences of both files, attempt merging them.
|
108
|
+
# the chunks in the sequences are non-overlapping, so we can always work
|
109
|
+
# on whole chunks.
|
110
|
+
# In a sentence, the algorithm is now: Follow the line of change
|
111
|
+
def Merge3::merge( one_seq , two_seq )
|
112
|
+
# we go along the original file's order and change to the other sequence
|
113
|
+
# if there has been a change compared to the original file.
|
114
|
+
out = [] # this is the array of string to be outputted, the result
|
115
|
+
# this doesn't consider changes at the beginning of the file
|
116
|
+
# which just goes to prove that we're researchers
|
117
|
+
one_chunk = one_seq.chunks.first
|
118
|
+
two_chunk = two_seq.chunks.first
|
119
|
+
last_choosen = one_chunk
|
120
|
+
break_flag = nil
|
121
|
+
while break_flag.nil?
|
122
|
+
if DEBUG and one_chunk.from != two_chunk.from
|
123
|
+
#chunks are non overlapping, so they must be the same
|
124
|
+
raise "CHUNKS differ #{one_chunk}\n#{two_chunk}"
|
125
|
+
end
|
126
|
+
# we start with the next in the edited file order and then check where
|
127
|
+
next_one = one_seq.get_next( one_chunk ) # the change is
|
128
|
+
next_two = two_seq.get_next( two_chunk )
|
129
|
+
break_flag = true if next_one.nil? or next_two.nil? #eof
|
130
|
+
# so if the change is in the first file,
|
131
|
+
if break_flag.nil? and one_chunk.org_stop != next_one.from
|
132
|
+
raise "conflict at #{one_chunk.org_stop}" if two_chunk.org_stop != next_two.from
|
133
|
+
# we find the one in file two that starts where it (one) ends
|
134
|
+
next_two = two_seq.find_from( next_one.from )
|
135
|
+
puts "ONE #{one_chunk}\n #{two_chunk} nNEXT #{next_one}\n #{next_two}" if DEBUG
|
136
|
+
next_choosen = next_one
|
137
|
+
# otherwise the change must be in file two
|
138
|
+
elsif break_flag.nil?
|
139
|
+
# and we look for the chunk in one, that starts where two ends
|
140
|
+
next_one = one_seq.find_from( next_two.from )
|
141
|
+
puts "TWO #{two_chunk}\n #{one_chunk}\nNEXT #{next_two}\n #{next_one}" if DEBUG
|
142
|
+
next_choosen = next_two
|
143
|
+
else
|
144
|
+
next_one , next_two = nil ,nil
|
145
|
+
end
|
146
|
+
this_del = ( one_chunk.kind_of?(Deleted) or two_chunk.kind_of?(Deleted) )
|
147
|
+
# Output the associated (real==whitespace expanded) string if not deleted
|
148
|
+
out << last_choosen.real_string unless this_del
|
149
|
+
puts "OUT #{Chunk::short_string(one_chunk.real_string)}" if DEBUG and not this_del
|
150
|
+
# we ignore adds (from one file) that happen to fall in the middle of a deleted in the other
|
151
|
+
one_del = ( one_chunk.kind_of?( Deleted) and next_one.kind_of?( Deleted) )
|
152
|
+
two_del = ( two_chunk.kind_of?( Deleted) and next_two.kind_of?( Deleted) )
|
153
|
+
unless one_del or two_del
|
154
|
+
# otherwise we output added text from both files
|
155
|
+
puts "ADD one #{one_chunk.added}" if DEBUG and not one_chunk.added.nil?
|
156
|
+
out << one_chunk.added.real_string unless one_chunk.added.nil?
|
157
|
+
# and if both have added text create an asymetric result, hmm
|
158
|
+
puts "ADD two #{ two_chunk.added}" if DEBUG and not two_chunk.added.nil?
|
159
|
+
out << two_chunk.added.real_string unless two_chunk.added.nil?
|
160
|
+
end
|
161
|
+
one_chunk = next_one
|
162
|
+
two_chunk = next_two
|
163
|
+
last_choosen = next_choosen
|
164
|
+
end
|
165
|
+
puts "RESULT" , out.join , "END" if DEBUG
|
166
|
+
return out.join # joins the pieces and returns the merged string
|
167
|
+
end
|
168
|
+
|
169
|
+
# build a hash of chunks( hash of the chunks string against the chunk).
|
170
|
+
# This is the backbone of the matching algorithm, so that finding a match
|
171
|
+
# is basically a hash lookup
|
172
|
+
# Matches may be of different sizes, we use 16,32,48 and -1 for line matching
|
173
|
+
def Merge3::matches( chunk , size )
|
174
|
+
if size == -1 then
|
175
|
+
return Merge3::match_newlines( chunk )
|
176
|
+
else
|
177
|
+
return Merge3::match( chunk , size )
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# collect all matches (for the start file, so it doesn't have to be done twice)
|
182
|
+
def Merge3::all_matches( chunk )
|
183
|
+
# order counts, so large chunks overwrites small for hash collisions
|
184
|
+
matches = Merge3::match(chunk,16)
|
185
|
+
matches.update(match(chunk,32))
|
186
|
+
matches.update(match(chunk,48))
|
187
|
+
matches.update(match_newlines(chunk))
|
188
|
+
matches
|
189
|
+
end
|
190
|
+
|
191
|
+
# find matches according to newline seperation.
|
192
|
+
def Merge3::match_newlines( chunk )
|
193
|
+
match = {} # the hash of matches , using the strings as key, chunk as value
|
194
|
+
index = 0 #count through the string length
|
195
|
+
if chunk.whitespace.empty? # whitespace compression on or not ?
|
196
|
+
# because there are only newline if whitespace compression is off
|
197
|
+
chunk.str.split("\n").each do |line|
|
198
|
+
# this logic strips the whitespace off both sides of the line, in an
|
199
|
+
# effort to match things like changed indentation
|
200
|
+
offset = 0
|
201
|
+
offset += 1 while line[offset,1] =~ /\s/
|
202
|
+
index += offset
|
203
|
+
line.strip!
|
204
|
+
cop = chunk.subchunk( index , line.length , index )
|
205
|
+
match[cop.str] = cop # and store in our hash (no collision detection)
|
206
|
+
#puts "LINE #{index} #{line.length} #{offset}--#{cop.str}--"
|
207
|
+
index += line.length + 1 # 1 is the newline
|
208
|
+
end
|
209
|
+
else #whitespace compression is on, so the whitespace table contains
|
210
|
+
# the newlines, just have to find them
|
211
|
+
chunk.whitespace.each do | at , str |
|
212
|
+
#puts "###{str}##"
|
213
|
+
next unless str.include?( "\n" )
|
214
|
+
cop = chunk.subchunk( index + 1 , at - index , index )
|
215
|
+
index =at
|
216
|
+
#puts "###{cop.str}##" if cop.str.include?("Four")
|
217
|
+
match[cop.str] = cop # and store in our hash (no collision detection)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
match
|
221
|
+
end
|
222
|
+
|
223
|
+
# find matches for around the given length. Exactly the length for binary files,
|
224
|
+
# but if there is whitespace, we use that to break within 70% of the given
|
225
|
+
# value
|
226
|
+
def Merge3::match( chunk , around )
|
227
|
+
match = {} # the hash of matches , using the strings as key, chunks as value
|
228
|
+
index = 0 #count through the string length
|
229
|
+
while index < chunk.length # also possible here to do half from the
|
230
|
+
left = chunk.length - index #front and half from the back (?)
|
231
|
+
if left > 0.7*around and left <= 1.5*around # are we at the end
|
232
|
+
len = chunk.length - index
|
233
|
+
else
|
234
|
+
sub = chunk.str[index , 1.5*around] # take the maximum length we want
|
235
|
+
if (white = sub.index( /\W/ , 0.7 * around )).nil? # and check for non char
|
236
|
+
len = around # either taking the whole
|
237
|
+
else #or the "words" we found
|
238
|
+
len = white #hoping this is better for text, so we have no allignment
|
239
|
+
end # issues
|
240
|
+
end #then create a chunk with the indexes and string
|
241
|
+
cop = chunk.subchunk( index , len , index )
|
242
|
+
match[cop.str] = cop # and store in our hash (no collision detection)
|
243
|
+
index += len
|
244
|
+
end
|
245
|
+
# now we only take one from the back,because that's often a match and will be
|
246
|
+
at = (chunk.length - 0.7*around).to_i
|
247
|
+
cop = chunk.subchunk( at , (0.7*around).to_i , at )
|
248
|
+
match[cop.str] = cop
|
249
|
+
match
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# A Sequence represents a sequence of chunks , in other words the modified file
|
254
|
+
# broken into the chunks that were found
|
255
|
+
|
256
|
+
# the sequence keeps the chunks as lists, one in edited file order
|
257
|
+
# and one in original file order
|
258
|
+
class Sequence
|
259
|
+
attr :chunks # the list of chunks, sorted in order of the edited file
|
260
|
+
attr :org_chunks # the list of chunks, sorted in order of the original file
|
261
|
+
|
262
|
+
# output both lists of chunks, edited and original, with chunks and numbers
|
263
|
+
# but strings only up to 70 chars (for readability)
|
264
|
+
def to_s
|
265
|
+
ret = ""
|
266
|
+
@chunks.each_with_index do | chunk , index |
|
267
|
+
ret += index.to_s + " - " + chunk.to_s + "\n"
|
268
|
+
end
|
269
|
+
@org_chunks.each_with_index do | chunk , index |
|
270
|
+
ret += index.to_s + " - " + chunk.to_s + "\n"
|
271
|
+
end
|
272
|
+
return ret
|
273
|
+
end
|
274
|
+
|
275
|
+
# find the difference from (file, or byte array) too to start
|
276
|
+
# (matches are precalculated matches for start ) start.
|
277
|
+
# Collect the differences as chunks (copies or adds) and mark deletes
|
278
|
+
# The algorithm is "greedy", meaning if it finds a match (using the
|
279
|
+
# matches) it tries to extend in both directions
|
280
|
+
def initialize too_chunk , start_chunk , start_matches
|
281
|
+
@chunks = []
|
282
|
+
@org_chunks = []
|
283
|
+
# run by length to get big (sure) hits first (-1 == newlines)
|
284
|
+
[ -1 , 48 , 32, 16 ].each do |c_size|
|
285
|
+
two_matches = Merge3::matches(too_chunk , c_size).each do |key , two|
|
286
|
+
next if (start = start_matches[two.str]).nil?
|
287
|
+
raise "Keys collide :#{start}" if start.str != two.str
|
288
|
+
next if done?( two , start )
|
289
|
+
# here comes the greedy part, first left then right
|
290
|
+
two_start , start_start , len = two.start , start.start , two.length
|
291
|
+
puts two , "MINUS" if len <= 0 and DEBUG
|
292
|
+
start_rim , edit_rim = find_left_rim( start_start , two_start )
|
293
|
+
while ( start_chunk.str[start_start - 1] == too_chunk.str[two_start - 1] ) and
|
294
|
+
( edit_rim < two_start ) and ( start_rim < start_start )
|
295
|
+
two_start -= 1
|
296
|
+
start_start -= 1
|
297
|
+
len += 1 # to keep the right end where it was
|
298
|
+
end
|
299
|
+
start_rim , edit_rim = find_right_rim( start_start + len , two_start + len , start_chunk.length , too_chunk.length )
|
300
|
+
while ( start_chunk.str[start_start + len ] == too_chunk.str[two_start + len ] ) and
|
301
|
+
((len + start_start) < start_rim) and ((two_start + len) < edit_rim)
|
302
|
+
len += 1
|
303
|
+
end
|
304
|
+
chunk = too_chunk.subchunk(two_start,len , start_start )
|
305
|
+
puts "Matched #{chunk}" if DEBUG
|
306
|
+
add_chunk( chunk )
|
307
|
+
end
|
308
|
+
end
|
309
|
+
# now find the parts that were deleted
|
310
|
+
# (gaps in the matching of the original file)
|
311
|
+
each_org_pair do | org , next_org |
|
312
|
+
if org.org_stop < next_org.from
|
313
|
+
del_str = start_chunk.str[org.org_stop , next_org.from - org.org_stop ]
|
314
|
+
puts "DELETED #{org.org_stop} --#{del_str}--" if DEBUG
|
315
|
+
del = Deleted.new( org.stop , org.org_stop , del_str)
|
316
|
+
add_chunk( del )
|
317
|
+
end
|
318
|
+
end
|
319
|
+
# now find the parts that were added (gaps in the matching of the edited file)
|
320
|
+
adds = {}
|
321
|
+
each_pair do |chunk , next_chunk |
|
322
|
+
if chunk.stop < next_chunk.start
|
323
|
+
add = too_chunk.subchunk( chunk.stop , next_chunk.start - chunk.stop , -1 )
|
324
|
+
puts "ADDING #{add} " if DEBUG
|
325
|
+
chunk.added = add # hang the add onto the chunk
|
326
|
+
# this following logic has fixed some cases, where the matching had
|
327
|
+
# been somewhat unintuitive. Though correct it produced
|
328
|
+
# unneccessary conflicts, in conjunction with deletes
|
329
|
+
while add.str[0] == next_chunk.str[0] and
|
330
|
+
add.str[0] != 32 and # not spaces, avoid whitespace headaches
|
331
|
+
chunk.org_stop == next_chunk.from
|
332
|
+
puts "before: #{chunk} \nNext:#{next_chunk}" if DEBUG
|
333
|
+
add.rotate #put the first to the end
|
334
|
+
remove_chunk( next_chunk )
|
335
|
+
# move the first from the next to the end of the last
|
336
|
+
chunk.push( next_chunk.pop )
|
337
|
+
puts "after: #{chunk} \nNext:#{next_chunk}" if DEBUG
|
338
|
+
add_chunk( next_chunk )
|
339
|
+
end unless chunk.kind_of?(Deleted) or next_chunk.kind_of?(Deleted)
|
340
|
+
end
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
# helper to iterate over each pair in the edited file order
|
345
|
+
def each_pair
|
346
|
+
return if @chunks.length < 2
|
347
|
+
idx = 1
|
348
|
+
chunk = @chunks[0]
|
349
|
+
while idx < @chunks.length
|
350
|
+
next_chunk = @chunks[idx]
|
351
|
+
yield(chunk , next_chunk)
|
352
|
+
chunk = next_chunk
|
353
|
+
idx = idx + 1
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
# helper to iterate over each pair in the original file order
|
358
|
+
def each_org_pair
|
359
|
+
return if @org_chunks.length < 2
|
360
|
+
idx = 1
|
361
|
+
chunk = @org_chunks[0]
|
362
|
+
while idx < @org_chunks.length
|
363
|
+
next_chunk = @org_chunks[idx]
|
364
|
+
yield(chunk , next_chunk)
|
365
|
+
chunk = next_chunk
|
366
|
+
idx = idx + 1
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# get the next chunk in edited file order
|
371
|
+
def get_next( prev )
|
372
|
+
idx = @chunks.index prev
|
373
|
+
return nil unless idx
|
374
|
+
return nil if idx == @chunks.length - 1
|
375
|
+
nekst = @chunks[idx + 1 ]
|
376
|
+
return nekst
|
377
|
+
end
|
378
|
+
|
379
|
+
# add this chunk. This implementation keeps the order of both
|
380
|
+
# arrays correct all the time
|
381
|
+
def add_chunk( chunk )
|
382
|
+
raise "Nil added " if not chunk
|
383
|
+
raise "False added " if chunk == false
|
384
|
+
@chunks.push( chunk )
|
385
|
+
@chunks.sort! { |a , b |
|
386
|
+
ret = a.start <=> b.start
|
387
|
+
# so this adds the Deleted chunks into the edited file too
|
388
|
+
if ret == 0
|
389
|
+
ret = -1 if a.kind_of? Deleted
|
390
|
+
ret = 1 if b.kind_of? Deleted
|
391
|
+
end
|
392
|
+
ret
|
393
|
+
}
|
394
|
+
@org_chunks.push(chunk)
|
395
|
+
@org_chunks.sort! do |a , b | a.from <=> b.from end
|
396
|
+
end
|
397
|
+
|
398
|
+
def remove_chunk( chunk ) # for rotation, will be readded
|
399
|
+
@chunks.delete(chunk)
|
400
|
+
@org_chunks.delete(chunk)
|
401
|
+
end
|
402
|
+
|
403
|
+
#find the chunk that starts at the given position in the original file
|
404
|
+
def find_from pos
|
405
|
+
each_org do | chunk |
|
406
|
+
#puts "FOUND for #{pos} #{chunk}"
|
407
|
+
return chunk if chunk.from == pos
|
408
|
+
end
|
409
|
+
raise "Not found #{pos} in\n#{self}"
|
410
|
+
# hmmm this was here for some reason I don't recall
|
411
|
+
# return chunk.next if chunk.added and chunk.added.start == pos
|
412
|
+
end
|
413
|
+
|
414
|
+
# find the chunk left to these positions (original and edited) and return
|
415
|
+
# the maximum bound a new chunk can expand too in both coordinates.
|
416
|
+
def find_left_rim org , pos
|
417
|
+
start_rim , two_rim = 0 ,0
|
418
|
+
each_org do | two |
|
419
|
+
start_rim = two.org_stop if start_rim <= two.org_stop and two.org_stop <= org
|
420
|
+
end
|
421
|
+
each do | one |
|
422
|
+
two_rim = one.stop if two_rim <= one.stop and one.stop <= pos
|
423
|
+
end
|
424
|
+
#puts "Left rim #{org} #{pos} is #{start_rim} #{two_rim}" if DEBUG
|
425
|
+
return start_rim , two_rim
|
426
|
+
end
|
427
|
+
|
428
|
+
# find the chunk right to these positions (original and edited) and return
|
429
|
+
# the maximum bound a new chunk can expand too in both coordinates.
|
430
|
+
def find_right_rim start , two , start_start , two_start
|
431
|
+
start_rim , two_rim = start_start , two_start
|
432
|
+
each_org do | tw |
|
433
|
+
start_rim = tw.from if start <= tw.from and tw.from <= start_rim
|
434
|
+
end
|
435
|
+
each do | one |
|
436
|
+
two_rim = one.start if two <= one.start and one.start <= two_rim
|
437
|
+
end
|
438
|
+
#puts "\Right rim #{start} #{two} is #{start_rim} #{two_rim}" if DEBUG
|
439
|
+
return start_rim , two_rim
|
440
|
+
end
|
441
|
+
|
442
|
+
# that part is done if the are (start-stop) is already included in the
|
443
|
+
# patches (first if) or the org part overlaps any of the patches org's
|
444
|
+
# the second case is then a copy
|
445
|
+
def done? two , org
|
446
|
+
#puts "checking done? \n #{two} \n #{org} "
|
447
|
+
each do |chunk|
|
448
|
+
return true if chunk.overlaps? two
|
449
|
+
end
|
450
|
+
each_org do |chunk|
|
451
|
+
return true if chunk.org_overlaps? org
|
452
|
+
end
|
453
|
+
#puts "NOT DONE? : \n #{self}"
|
454
|
+
false
|
455
|
+
end
|
456
|
+
|
457
|
+
# yields all chunks in the sequence in edited file order
|
458
|
+
def each
|
459
|
+
@chunks.each do |chunk|
|
460
|
+
yield chunk if chunk
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
# yields all chunks in original file order
|
465
|
+
def each_org
|
466
|
+
@org_chunks.each do |chunk|
|
467
|
+
yield chunk if chunk
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
# change all chunks in this sequence so that they don't have overlaps with
|
472
|
+
# any of the chunks in the given sequence. done both ways results in
|
473
|
+
# non-overlapping sequences
|
474
|
+
def no_overlap sequence
|
475
|
+
each_org do |one|
|
476
|
+
sequence.each_org do |two|
|
477
|
+
break_at( one , two.from ) if one.org_includes?( two.from )
|
478
|
+
break_at( one , two.org_stop ) if one.org_includes?( two.org_stop )
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
# split in two at pos (thus assuming pos is included) and link the
|
484
|
+
def break_at( chunk , pos )
|
485
|
+
return if pos == chunk.from or pos == chunk.org_stop
|
486
|
+
old_s = chunk.to_s
|
487
|
+
new_chunk = chunk.split_at_org!(pos)
|
488
|
+
if not new_chunk.kind_of? Deleted
|
489
|
+
new_chunk.added = chunk.added
|
490
|
+
chunk.added = nil
|
491
|
+
end
|
492
|
+
#puts "SPLIT #{pos}\nBEFORE #{old_s}\nOLD:#{chunk} \nNEW:#{new_chunk} \n" if DEBUG
|
493
|
+
idx = @chunks.index(chunk)
|
494
|
+
@chunks[idx+1,0] = new_chunk
|
495
|
+
idx = @org_chunks.index(chunk)
|
496
|
+
@org_chunks[idx+1,0] = new_chunk
|
497
|
+
end
|
498
|
+
|
499
|
+
end
|
500
|
+
|
501
|
+
# this is a little holder class (struct?) that represents a piece of text in
|
502
|
+
# a file.
|
503
|
+
|
504
|
+
class Chunk
|
505
|
+
# the byte index into the file where the string is in (the edited file)
|
506
|
+
attr_reader :start
|
507
|
+
#length of the string (somewhat redundant with the string below)
|
508
|
+
attr_reader :length
|
509
|
+
# the actual string. This is just here for convinience, it's in the file
|
510
|
+
attr_reader :str
|
511
|
+
# the index into the original file where the string starts, -1 for adds
|
512
|
+
attr_reader :from
|
513
|
+
attr :added , true # a chunk that was added (if there is such a one)
|
514
|
+
attr :whitespace , true # the table for whitespace
|
515
|
+
|
516
|
+
# cut the chunk to max 71 characters for readable output
|
517
|
+
def Chunk::short_string( s )
|
518
|
+
if s.length < 71
|
519
|
+
return "--" + s.to_s + "--"
|
520
|
+
else
|
521
|
+
return "--" + s[0,20] + "...Cut #{s.length-40} chars..." + s[s.length - 20 , 20] + "--"
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
def initialize( start , length , string , from = -1)
|
526
|
+
raise "Error nil string passed start=#{start} length=#{length} str=-#{str}=" if string.nil?
|
527
|
+
@start , @length , @str , @from = start , length , string , from
|
528
|
+
@whitespace = []
|
529
|
+
end
|
530
|
+
|
531
|
+
# outputs all attributes and a readable version of the string
|
532
|
+
# also whitespace if present
|
533
|
+
def to_s #for debug output
|
534
|
+
st = "start=#{start} len=#{length} stop=#{stop} from=#{from} org_stop=#{org_stop} "
|
535
|
+
st += Chunk::short_string(real_string)
|
536
|
+
st += " \n added=#{added} " unless added.nil?
|
537
|
+
st += " \n whitespace=#{@whitespace.length} " unless @whitespace.empty?
|
538
|
+
ws_count = 0
|
539
|
+
@whitespace.each do | start , s |
|
540
|
+
st += "-" + start.to_s + " " + s.length.to_s + " "
|
541
|
+
st += "n " if s.include?( "\n" )
|
542
|
+
ws_count += 1
|
543
|
+
break if ws_count > 5
|
544
|
+
end
|
545
|
+
st
|
546
|
+
end
|
547
|
+
|
548
|
+
# put the first character at the end
|
549
|
+
def rotate
|
550
|
+
@str = @str[1..-1] + @str[0,1]
|
551
|
+
@start += 1
|
552
|
+
end
|
553
|
+
|
554
|
+
# puts the char at the end of the string, adjusting the length
|
555
|
+
# add the whitespace if it's not nil
|
556
|
+
def push( arg )
|
557
|
+
char , space = arg
|
558
|
+
@str << char
|
559
|
+
@length += 1
|
560
|
+
@whitespace << space unless space.nil?
|
561
|
+
end
|
562
|
+
|
563
|
+
# return the first char, and remove it from the string, adjusting length + start
|
564
|
+
# also return any whitespace entry, if there is such a thing
|
565
|
+
def pop
|
566
|
+
white = nil
|
567
|
+
if (not @whitespace.empty?) and (@whitespace.first[0] == @start )
|
568
|
+
white = @whitespace.delete_at(0)
|
569
|
+
puts "White --#{white}--#{@start}"
|
570
|
+
end
|
571
|
+
char = @str[0]
|
572
|
+
@str = @str[1..-1]
|
573
|
+
@length -= 1
|
574
|
+
@start += 1
|
575
|
+
@from += 1 if @from != -1
|
576
|
+
return [ char , white ]
|
577
|
+
end
|
578
|
+
|
579
|
+
# squeeze the whitespace, ie for all sequences of whitespace(space,tab,newline),
|
580
|
+
# and all non space whitespaces, replace it with a single whitespace and record
|
581
|
+
# the change in the array
|
582
|
+
def squeeze
|
583
|
+
old = @str.dup
|
584
|
+
@whitespace = []
|
585
|
+
at = 0
|
586
|
+
while ( index = @str.index(/\s+/ , at ) )
|
587
|
+
at = index + $&.length
|
588
|
+
if $& != ' ' # do nothing for single spaces
|
589
|
+
@whitespace.push [ index , $& ]
|
590
|
+
@str[index , $&.length] = ' '
|
591
|
+
end
|
592
|
+
end
|
593
|
+
@length = @str.length
|
594
|
+
throw old if DEBUG and old != real_string
|
595
|
+
end
|
596
|
+
|
597
|
+
# unsqueeze whitespace if present
|
598
|
+
def real_string
|
599
|
+
return @str if @whitespace.empty?
|
600
|
+
stri = @str.dup
|
601
|
+
#puts "--" + str + "--" + @str.length.to_s
|
602
|
+
begin
|
603
|
+
@whitespace.reverse.each do | index , st |
|
604
|
+
stri[index - @start ,1] = st
|
605
|
+
end
|
606
|
+
rescue
|
607
|
+
st = "start=#{start} len=#{length} stop=#{stop} from=#{from} org_stop=#{org_stop} " + @str
|
608
|
+
st += " \n whitespace=#{@whitespace.length} "
|
609
|
+
@whitespace.each do | start , s |
|
610
|
+
st += "-" + start.to_s + " " + s.length.to_s + " "
|
611
|
+
st += "n " if s.include?( "\n" )
|
612
|
+
end
|
613
|
+
puts st
|
614
|
+
raise
|
615
|
+
end
|
616
|
+
stri
|
617
|
+
end
|
618
|
+
|
619
|
+
# get a substring from the chunk and carry whitespace table accross
|
620
|
+
# start is in the files coordinates (not the strings of the chunk)
|
621
|
+
def subchunk( startt , len , fromm )
|
622
|
+
stri = @str[ startt - @start , len ]
|
623
|
+
table = []
|
624
|
+
@whitespace.each do | arr |
|
625
|
+
table << arr if arr[0] >= startt and arr[0] < ( startt + len )
|
626
|
+
end
|
627
|
+
chunk = Chunk.new( startt , len , stri , fromm )
|
628
|
+
chunk.whitespace = table
|
629
|
+
#puts "SUB at #{startt} #{len} #{chunk}" if fromm == 62
|
630
|
+
chunk
|
631
|
+
end
|
632
|
+
|
633
|
+
# index of where the string ends.
|
634
|
+
# this should be called end, but that's a keyword.
|
635
|
+
def stop
|
636
|
+
@start + @length
|
637
|
+
end
|
638
|
+
|
639
|
+
# index of where the string ends in the original file (nonsense for adds)
|
640
|
+
def org_stop
|
641
|
+
@from + @length
|
642
|
+
end
|
643
|
+
|
644
|
+
def add? # adds are not copied, hence have no from attribute.
|
645
|
+
@from == -1 # a copy carries a positive number as offset into the original
|
646
|
+
end
|
647
|
+
|
648
|
+
# is the point (char index) in the original copied string
|
649
|
+
def org_includes? point
|
650
|
+
( @from != -1 ) and ( @from <= point ) and ( point < org_stop )
|
651
|
+
end
|
652
|
+
|
653
|
+
def includes_copy? copy # is the copy chunk fully inside this
|
654
|
+
(copy.start >= @start) and (copy.stop <= stop)
|
655
|
+
end
|
656
|
+
|
657
|
+
# return a substring in the original files coordinates
|
658
|
+
def rstring( fro , to )
|
659
|
+
to = org_stop if to == -1
|
660
|
+
throw "error #{fro} #{to} #{self}" if fro < @from or to > org_stop
|
661
|
+
@length -= to - fro
|
662
|
+
@str[ fro - @from , to - @from ]
|
663
|
+
end
|
664
|
+
|
665
|
+
# does some part of two overlap. always takes me 5 minutes to get
|
666
|
+
def overlaps? two # but a drawing helps
|
667
|
+
start > two.start ? start < two.stop : stop > two.start
|
668
|
+
end
|
669
|
+
|
670
|
+
#does any part of the original overlap (from - org_stop )
|
671
|
+
def org_overlaps? two
|
672
|
+
from > two.from ? from < two.org_stop : org_stop > two.from
|
673
|
+
end
|
674
|
+
|
675
|
+
# this joins two chunks, which are presumed to be adds (so no fiddling with from)
|
676
|
+
# self is changed and the argument untouched (to be deleted)
|
677
|
+
def join_adds add
|
678
|
+
@str += add.str
|
679
|
+
@length = @str.length
|
680
|
+
end
|
681
|
+
|
682
|
+
#split the chunk into two at the given position,
|
683
|
+
# change this and return the right one
|
684
|
+
def split_at_org! pos
|
685
|
+
#puts "SPLIT #{pos} #{self}"
|
686
|
+
throw "position not in chunk #{pos} : #{self}" unless org_includes?( pos )
|
687
|
+
left_length = pos - @from
|
688
|
+
right_length = org_stop - pos
|
689
|
+
right = Chunk.new( @start + left_length , right_length , @str[left_length, right_length] , pos )
|
690
|
+
right.whitespace = @whitespace.dup.delete_if do | index , s |
|
691
|
+
index < right.start
|
692
|
+
end
|
693
|
+
#puts " split #{right} "
|
694
|
+
throw "Right split error #{self} , #{right} :#{pos}" if right.length != right_length
|
695
|
+
@str = @str[0, left_length]
|
696
|
+
raise "Error nil string =#{start} length=#{length} str=-#{@str}=" if @str.nil?
|
697
|
+
@length = @str.length
|
698
|
+
@whitespace.delete_if do | index , s |
|
699
|
+
index >= right.start
|
700
|
+
end
|
701
|
+
#puts " white #{@whitespace} "
|
702
|
+
#puts " left #{self} "
|
703
|
+
throw "Left split error #{self} , #{right} :#{pos} :#{left_length}" if @length != left_length
|
704
|
+
return right
|
705
|
+
end
|
706
|
+
|
707
|
+
end
|
708
|
+
|
709
|
+
# a seperate class for deleted chunks (for destinction)
|
710
|
+
class Deleted < Chunk
|
711
|
+
|
712
|
+
def initialize start , from , string
|
713
|
+
@start = start
|
714
|
+
@length = string.length
|
715
|
+
@str = string
|
716
|
+
@from = from
|
717
|
+
@whitespace = []
|
718
|
+
raise "Error nil string =#{start} length=#{length} str=-#{@str}=" if string.nil?
|
719
|
+
end
|
720
|
+
|
721
|
+
def stop # no length in the modified file
|
722
|
+
@start
|
723
|
+
end
|
724
|
+
|
725
|
+
def split_at_org! pos
|
726
|
+
raise "position not in chunk #{pos} : #{self}" unless org_includes?( pos )
|
727
|
+
left_length = pos - @from
|
728
|
+
right_length = org_stop - pos
|
729
|
+
right = Deleted.new( @start + left_length , pos , @str[left_length, @length] )
|
730
|
+
raise "Back to the right drawingboard #{self} , #{right} :#{pos}" if right.length != right_length
|
731
|
+
@str = @str[0, left_length]
|
732
|
+
@length = @str.length
|
733
|
+
raise "Back to the left drawingboard #{self} , #{right} :#{pos} :#{left_length}" if @length != left_length
|
734
|
+
raise "Error nil string =#{start} length=#{length} str=-#{@str}=" if @str.nil?
|
735
|
+
return right
|
736
|
+
end
|
737
|
+
|
738
|
+
def to_s
|
739
|
+
super.to_s + " deleted"
|
740
|
+
end
|
741
|
+
|
742
|
+
end
|
743
|
+
|
744
|
+
def main()
|
745
|
+
|
746
|
+
ignore_whitespace = false
|
747
|
+
|
748
|
+
args = ARGV.dup
|
749
|
+
|
750
|
+
if args[0] == "-w"
|
751
|
+
ignore_whitespace = true
|
752
|
+
args.shift
|
753
|
+
end
|
754
|
+
start = File.new( args.shift ).read
|
755
|
+
one = File.new( args.shift ).read
|
756
|
+
two = File.new( args.shift ).read
|
757
|
+
|
758
|
+
result = Merge3::three_way( start , one , two , ignore_whitespace )
|
759
|
+
|
760
|
+
puts result
|
761
|
+
|
762
|
+
end
|
763
|
+
|
764
|
+
main() if $0 == __FILE__
|
765
|
+
|
metadata
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.8.
|
2
|
+
rubygems_version: 0.8.11
|
3
3
|
specification_version: 1
|
4
4
|
name: merge3
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "0.
|
7
|
-
date:
|
8
|
-
summary:
|
6
|
+
version: "0.9"
|
7
|
+
date: 2006-05-03 00:00:00 +03:00
|
8
|
+
summary: "Three way merge, including recognition of moves and edit in moves, unlike
|
9
|
+
traditional tools"
|
9
10
|
require_paths:
|
10
11
|
- lib
|
11
12
|
email:
|
@@ -24,9 +25,12 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
|
|
24
25
|
version: "1.6"
|
25
26
|
version:
|
26
27
|
platform: ruby
|
28
|
+
signing_key:
|
29
|
+
cert_chain:
|
27
30
|
authors: []
|
28
31
|
files:
|
29
32
|
- lib/merge3.rb
|
33
|
+
- lib/merge3.rb.~1.2.~
|
30
34
|
test_files: []
|
31
35
|
rdoc_options: []
|
32
36
|
extra_rdoc_files: []
|